#' @title Direct Semi-Parametric Regression Model for Continuous-Scale ROC Curves
#'
#' @description you can use this function to fit an direct semi-parametric regression
#'     model for continuous-scale ROC curves based on indicator variables. This
#'     function will print and return the parameters of the model and the AUC of
#'     the corresponding ROC curve, and will make an image of the ROC curve and
#'     its confidence band.
#'
#' @param vars a vector of characters representing the covariates in the model.
#' @param eta_vars a vector of characters representing the covariates interacting
#'     with functions of p in the model.
#' @param data a data frame consisting of data from subjects without condition.
#' @param data_d a data frame consisting of data from subjects with condition.
#' @param newdata_d a data frame for generating ROC curve.
#' @param response a character representing the name of the column in both data
#'     and data_d that represents the test results of subjects.
#' @param link link function in direct regression model. "probit", "logit" or "cauchit".
#'     Default "probit".
#' @param eta function interacting with eta_vars. "probit", "logit" or "cauchit".
#'     Default "probit".
#' @param p_lower the lower bound of fprs that users are interested in. Default 0.0.
#' @param p_upper the upper bound of fprs that users are interested in. Default 1.0.
#' @param level the confidence level. Default 0.95.
#' @param n_boot the number of bootstrap samples to draw. Default 1000.
#'
#' @import quantreg
#' @import nleqslv
#' @import stats
#' @import graphics
#'
#' @return a list of the parameters of the model and the AUC of the corresponding ROC curve.
#'
#' @examples
#' \donttest{
#' data("data_continous")
#' data <- data_continous[data_continous[["d"]] == 0, c("t", "x1", "x2")]
#' data_d <- data_continous[data_continous[["d"]] == 1, c("t", "x1", "x2")]
#' cdspROC(vars = c("x1", "x2"),
#'         eta_vars = c(),
#'         data = data,
#'         data_d = data_d,
#'         newdata_d = data.frame(x1 = c(0), x2 = c(0)),
#'         response = "t")
#' }
#'
#' @export

cdspROC <- function(vars, eta_vars, data, data_d, newdata_d, response, link = "probit", eta = "probit",
                    p_lower = 0.0, p_upper = 1.0, level = 0.95, n_boot = 1000){

  # Generate fpr_set required for parameter estimation
  fpr_set <- seq(1, (nrow(data) - 1)) / nrow(data)
  fpr_set <- fpr_set[(fpr_set >= p_lower) & (fpr_set <= p_upper)]

  # Calculate the value of the ROC curve
  cal_ROC <- function(p, beta, gamma, p_jump, h_jump){
    W_new <- sum(beta * newdata_d[vars])
    W_eta_new <- sum(gamma * newdata_d[eta_vars])

    vec_eta_new <- make.link(eta)$linkfun(p)
    h <- numeric(length(p))
    num <- rowSums(p >= tcrossprod(rep(1, length(p)), p_jump))
    h[num == 0] = -Inf
    h[num > 0] = h_jump[num]
    return(make.link(link)$linkinv(W_new + W_eta_new * vec_eta_new + h))
  }

  # Calculate AUC
  cal_AUC <- function(beta, gamma, p_jump, h_jump){
    p_cal <- c(p_lower, p_jump[(p_jump >= p_lower) & (p_jump <= p_upper)], p_upper)
    AUC <- 0
    for (i in 1: (length(p_cal) - 1)){
      AUC <- AUC + integrate(cal_ROC, p_cal[i], p_cal[i+1], beta, gamma, p_jump, h_jump)$value
    }
    return(AUC)
  }

  # Use the auxiliary function cdspROC_ to get parameter estimates
  par_list <- cdspROC_(vars, eta_vars, data, data_d, response, link, eta, fpr_set)
  beta <- par_list$beta
  gamma <- par_list$gamma
  h <- par_list$h
  p_jump <- h$p_jump
  h_jump <- h$h_jump

  if (length(beta) != 0){
    names(beta) <- vars
  }
  if (length(gamma) != 0){
    names(gamma) <- paste(eta_vars, eta, sep = ":")
  }

  # Generate fpr_plot for mapping, calculate se_plot and AUC
  fpr_plot <- seq(0.001, 0.999, 0.001)
  fpr_plot <- fpr_plot[(fpr_plot >= p_lower) & (fpr_plot <= p_upper)]
  se_plot <- cal_ROC(fpr_plot, beta, gamma, p_jump, h_jump)
  AUC <- cal_AUC(beta, gamma, p_jump, h_jump)

  # Use bootstrap to calculate standard deviation of parameters, ROC curves and AUCs
  beta_boot_mat <- matrix(nrow = length(beta), ncol = 0)
  gamma_boot_mat <- matrix(nrow = length(gamma), ncol = 0)
  h_boot_mat <- matrix(nrow = length(h), ncol = 0)

  se_boot_mat <- matrix(nrow = length(se_plot), ncol = 0)
  AUC_boot <- c()

  for (iter in 1: n_boot){
    stop <- FALSE
    # Prevents errors from being reported due to too many duplicate samples
    while (!stop){
      index <- sample(rownames(data), nrow(data), replace = TRUE)
      index_d <- sample(rownames(data_d), nrow(data_d), replace = TRUE)
      result <- tryCatch({
        par_list_boot <- cdspROC_(vars, eta_vars, data[index, ], data_d[index_d, ], response,
                                  link, eta, fpr_set)
        1
      }, warning = function(w){
        2
      }, error = function(e){
        3
      }, finally = {})

      if (result == 1){
        stop = TRUE
      }
    }

    beta_boot <- par_list_boot$beta
    gamma_boot <- par_list_boot$gamma
    h_boot <- par_list_boot$h
    h_jump_boot <- h_boot$h_jump
    p_jump_boot <- h_boot$p_jump

    beta_boot_mat <- cbind(beta_boot_mat, beta_boot)
    gamma_boot_mat <- cbind(gamma_boot_mat, gamma_boot)
    h_boot_mat <- cbind(h_boot_mat, h_boot)
    se_boot_mat <- cbind(se_boot_mat, cal_ROC(fpr_plot, beta_boot, gamma_boot, p_jump_boot, h_jump_boot))
    AUC_boot <- c(AUC_boot, cal_AUC(beta_boot, gamma_boot, p_jump_boot, h_jump_boot))
  }

  # Print the estimates of the parameters along with the standard deviations
  if (length(beta) != 0){
    beta_std <- sqrt(apply(beta_boot_mat, MARGIN = 1, var))
    beta_info <- cbind(beta, beta_std)
    colnames(beta_info) <- c("Estimate", "std. Error")
    cat("Coefficients: \n")
    print(beta_info, digits = 4)
    cat("\n")
  }
  if (length(gamma) != 0){
    gamma_std <- sqrt(apply(gamma_boot_mat, MARGIN = 1, var))
    gamma_info <- cbind(gamma, gamma_std)
    colnames(gamma_info) <- c("Estimate", "std. Error")
    cat("Coefficients: \n")
    print(gamma_info, digits = 4)
    cat("\n")
  }

  # Print the estimate of AUC along with the standard deviation
  AUC_std <- sqrt(var(AUC_boot))
  AUC_info <- cbind(AUC, AUC_std)
  colnames(AUC_info) <- c("Estimate", "std. Error")
  rownames(AUC_info) <- c("AUC")
  cat("AUC: \n")
  print(AUC_info, digits = 4)
  cat("\n")

  # plot
  se_std <- sqrt(apply(se_boot_mat, MARGIN = 1, var))
  plot(se_plot ~ fpr_plot, type = "l", main = "ROC Curve Using Semi-parametric Direct Regression Model",
       xlim = c(p_lower, p_upper), ylim = c(0.0, 1.0), xlab = "FPR", ylab = "TPR")
  lines(se_plot - qnorm((1 + level) / 2) * se_std ~ fpr_plot, lty = 2)
  lines(se_plot + qnorm((1 + level) / 2) * se_std ~ fpr_plot, lty = 2)
  legend("bottomright", legend = c("ROC Curve", "CI"), lty = c(1, 2))

  list_return <- list(beta = beta, h = h, AUC = AUC)
  return(list_return)
}


# Auxiliary function, used for parameter and ROC curve estimation
cdspROC_ <- function(vars, eta_vars, data, data_d, response, link, eta, fpr_set){

  # Calculate the distribution function and density function corresponding to link
  func <- function(x, type){
    if (link == "logit"){
      expx = exp(x)
    }
    if (type == "p"){
      return(switch(link, "logit" = expx / (1 + expx),
                    "probit" = pnorm(x),
                    "cauchit" = 0.5 + 1 / pi * atan(x)))
    }
    if (type == "d"){
      return(switch(link, "logit" = expx / (1 + expx)^2,
                    "probit" = dnorm(x),
                    "cauchit" = 1 / (pi * (1 + x^2))))
    }
    if (type == "dd"){
      return(switch(link, "logit" = expx * (1 - expx) / (1 + expx)^3,
                    "probit" = -x * dnorm(x),
                    "cauchit" = -2 * x / (pi * (1 + x^2)^2)))
    }
  }

  # Generate the matrices needed to estimate the parameters
  mat <- as.matrix(data_d[vars])
  mat_eta <- as.matrix(data_d[eta_vars])
  n <- nrow(data)
  n_d <- nrow(data_d)

  a <- length(vars)
  b <- length(eta_vars)

  # Formula for quantile regression
  terms = intersect(colnames(data), vars)
  if (length(terms) == 0){
    rq_formula <- as.formula(paste(response, "1", sep = " ~ "))
  }
  else {
    rq_formula <- reformulate(terms, response = response)
  }

  # Use quantile regression to obtain placement values and indicator variable U_ip
  # U_ip is an n_d * n_p matrix, where U_ip = 1 if and only if T_i <= p
  U_ip = matrix(nrow = n_d, ncol = 0)
  for (fpr in fpr_set){
    model <- rq(rq_formula, 1 - 1e-6 - fpr, data)
    U_ip <- cbind(U_ip, as.numeric(data_d[[response]] >= predict(model, data_d)))
  }

  if (ncol(U_ip) <= 1){
    weight <- 1
  }
  else {
    cum <- colSums(U_ip)
    weight <- cum - c(0, cum[1: (ncol(U_ip) - 1)])
  }

  fpr_set <- fpr_set[weight > 0]
  U_ip <- U_ip[1: n_d, weight > 0]
  n_p <- length(fpr_set)

  # Generate the vector corresponding to eta
  vec_eta <- make.link(eta)$linkfun(fpr_set)

  # score function
  score <- function(x){
    y <- numeric(a + b + n_p)

    W <- tcrossprod(rep(1, n_d), x[(a + b + 1): (a + b + n_p)])
    if (a != 0){
      W <- W + as.vector(mat %*% x[1: a])
    }
    if (b != 0){
      m <- tcrossprod(mat_eta %*% x[(a + 1): (a + b)], vec_eta)
      W <- W + tcrossprod(mat_eta %*% x[(a + 1): (a + b)], vec_eta)
    }
    tmp <- U_ip - func(W, "p")

    if (a != 0){
      y[1: a] <- crossprod(mat, rowSums(tmp))
    }
    if (b != 0){
      y[(a + 1): (a + b)] <- crossprod(mat_eta, tmp) %*% vec_eta
    }
    y[(a + b + 1): (a + b + n_p)] <- colSums(tmp)

    return(y)
  }

  # Jacobian Matrix
  jac <- function(x){
    I <- matrix(0, nrow = (a + b + n_p), ncol = (a + b + n_p))

    W <- tcrossprod(rep(1, n_d), x[(a + b + 1): (a + b + n_p)])
    if (a != 0){
      W <- W + as.vector(mat %*% x[1: a])
    }
    if (b != 0){
      W <- W + tcrossprod(mat_eta %*% x[(a + 1): (a + b)], vec_eta)
    }
    tmp <- - func(W, "d")

    if (a != 0){
      I[1: a, 1: a] <- crossprod(mat, diag(rowSums(tmp))) %*% mat
      if (b != 0){
        I[(a + 1): (a + b), 1: a] <-
          crossprod(mat_eta, diag(as.numeric(tmp %*% vec_eta))) %*% mat
        I[1: a, (a + 1): (a + b)] <- t(I[(a + 1): (a + b), 1: a])
      }
      I[(a + b + 1): (a + b + n_p), 1: a] <- crossprod(tmp, mat)
      I[1: a, (a + b + 1): (a + b + n_p)] <- t(I[(a + b + 1): (a + b + n_p), 1: a])
    }
    if (b != 0){
      I[(a + 1): (a + b), (a + 1): (a + b)] <-
        crossprod(mat_eta, diag(as.numeric(tmp %*% (vec_eta^2))))%*% mat_eta
      I[(a + b + 1): (a + b + n_p), (a + 1): (a + b)] <-
        crossprod(tmp, mat_eta) * vec_eta
      I[(a + 1): (a + b), (a + b + 1): (a + b + n_p)] <-
        t(I[(a + b + 1): (a + b + n_p), (a + 1): (a + b)])
    }

    diag(I[(a + b + 1): (a + b + n_p), (a + b + 1): (a + b + n_p)]) <- colSums(tmp)
    return(I)
  }

  x0 <- numeric(a + b + n_p)
  result <- nleqslv(x = x0, fn = score, jac = jac, method = "Newton")
  list_return <- list(beta = c(), gamma = c(), h = c())

  if (a != 0){
    list_return$beta <- result$x[1: a]
  }
  if (b != 0){
    list_return$gamma <- result$x[(a + 1): (a + b)]
  }
  list_return$h <- list(p_jump = fpr_set, h_jump = result$x[(a + b + 1): (a + b + n_p)])

  return(list_return)
}
