#' @title Direct Semi-Parametric Regression Model for Continuous-Scale ROC Curves
#'
#' @description you can use this function to fit an direct semi-parametric regression
#'     model for continuous-scale ROC curves based on placement values. This
#'     function will print and return the parameters of the model and the AUC of
#'     the corresponding ROC curve, and will make an image of the ROC curve and
#'     its confidence band.
#'
#' @param vars a vector of characters representing the covariates in the model.
#' @param data a data frame consisting of data from subjects without condition.
#' @param data_d a data frame consisting of data from subjects with condition.
#' @param newdata_d a data frame for generating ROC curve.
#' @param response a character representing the name of the column in both data
#'     and data_d that represents the test results of subjects.
#' @param link link function in direct regression model. "probit", "logit" or "cauchit".
#'     Default "probit".
#' @param level the confidence level. Default 0.95.
#' @param n_boot the number of bootstrap samples to draw. Default 1000.
#'
#' @import nleqslv
#' @import stats
#' @import graphics
#'
#' @return a list of the parameters of the model and the AUC of the corresponding ROC curve.
#'
#' @examples
#' \donttest{
#' data("data_continous")
#' data <- data_continous[data_continous[["d"]] == 0, c("t", "x1", "x2")]
#' data_d <- data_continous[data_continous[["d"]] == 1, c("t", "x1", "x2")]
#' cdspROC2(vars = c("x1", "x2"),
#'          data = data,
#'          data_d = data_d,
#'          newdata_d = data.frame(x1 = c(0), x2 = c(0)),
#'          response = "t")
#' }
#'
#' @export

cdspROC2 <- function(vars, data, data_d, newdata_d, response, link = "probit",
                     level = 0.95, n_boot = 1000){

  f_link <- make.link(link)

  # Calculate the value of the ROC curve
  cal_ROC <- function(p, beta, p_jump, h_jump){
    W <- sum(beta * newdata_d[vars])
    h <- numeric(length(p))
    num <- rowSums(p >= tcrossprod(rep(1, length(p)), p_jump))
    h[num == 0] = -Inf
    h[num > 0] = h_jump[num]
    return(f_link$linkinv(W + h))
  }

  # Calculate AUC
  cal_AUC <- function(beta, p_jump, h_jump){
    W <- sum(beta * newdata_d[vars])
    if (length(p_jump) == 1){
      return(1 - p_jump[1])
    }
    else{
      dp <- c(p_jump[2: length(p_jump)], 1) - p_jump
      return(sum(f_link$linkinv(W + h_jump) * dp))
    }
  }

  # Use auxiliary function cdspROC2_ to get parameter estimates
  par_list <- cdspROC2_(vars, data, data_d, response, link)
  beta <- par_list$beta
  h <- par_list$h
  p_jump <- h$p_jump
  h_jump <- h$h_jump

  if (length(beta) != 0){
    names(beta) <- vars
  }

  # Generate fpr_plot for mapping, calculate se_plot & AUC
  fpr_plot <- seq(0.001, 0.999, 0.001)
  se_plot <- cal_ROC(fpr_plot, beta, p_jump, h_jump)
  AUC <- cal_AUC(beta, p_jump, h_jump)

  # Use bootstrap to calculate parameters, standard deviation of ROC curves and AUCs
  beta_boot_mat <- matrix(nrow = length(beta), ncol = 0)
  se_boot_mat <- matrix(nrow = length(se_plot), ncol = 0)
  AUC_boot <- c()

  for (iter in 1: n_boot){
    stop <- FALSE
    # Prevent errors from being reported due to too many duplicate samples
    while (!stop){
      index <- sample(rownames(data), nrow(data), replace = TRUE)
      index_d <- sample(rownames(data_d), nrow(data_d), replace = TRUE)

      result <- tryCatch({
        par_list_boot <- cdspROC2_(vars, data[index, ], data_d[index_d, ], response, link)
        1
      }, warning = function(w){
        2
      }, error = function(e){
        3
      }, finally = {})

      if (result == 1){
        stop = TRUE
      }
    }

    beta_boot <- par_list_boot$beta
    h_boot <- par_list_boot$h
    h_jump_boot <- h_boot$h_jump
    p_jump_boot <- h_boot$p_jump

    beta_boot_mat <- cbind(beta_boot_mat, beta_boot)
    se_boot_mat <- cbind(se_boot_mat, cal_ROC(fpr_plot, beta_boot, p_jump_boot, h_jump_boot))
    AUC_boot <- c(AUC_boot, cal_AUC(beta_boot, p_jump_boot, h_jump_boot))
  }

  # Print the estimates of the parameters along with the standard deviation
  if (length(beta) != 0){
    beta_std <- sqrt(apply(beta_boot_mat, MARGIN = 1, var))
    beta_info <- cbind(beta, beta_std)
    colnames(beta_info) <- c("Estimate", "std. Error")
    cat("Coefficients: \n")
    print(beta_info, digits = 4)
    cat("\n")
  }

  # Print the estimate of AUC along with the standard deviation
  AUC_std <- sqrt(var(AUC_boot))
  AUC_info <- cbind(AUC, AUC_std)
  colnames(AUC_info) <- c("Estimate", "std. Error")
  rownames(AUC_info) <- c("AUC")
  cat("AUC: \n")
  print(AUC_info, digits = 4)
  cat("\n")

  # plot
  se_std <- sqrt(apply(se_boot_mat, MARGIN = 1, var))
  plot(se_plot ~ fpr_plot, type = "l", main = "ROC Curve Using Semi-parametric Direct Regression Model",
       xlim = c(0.0, 1.0), ylim = c(0.0, 1.0), xlab = "FPR", ylab = "TPR")
  lines(se_plot - qnorm((1 + level) / 2) * se_std ~ fpr_plot, lty = 2)
  lines(se_plot + qnorm((1 + level) / 2) * se_std ~ fpr_plot, lty = 2)
  legend("bottomright", legend = c("ROC Curve", "CI"), lty = c(1, 2))

  list_return <- list(beta = beta, h = h, AUC = AUC)
  return(list_return)
}


# Auxiliary function, used for parameter and ROC curve estimation
cdspROC2_ <- function(vars, data, data_d, response, link){

  # Distribution function and density function of the sum of independent identities of the logistic distribution
  sum_logit <- function(x, type){
    y <- x
    t <- x[x != 0]
    expt = exp(t)
    if (type == "p"){
      y[x == 0] <- 0.5
      y[x != 0] <- expt * (expt - t - 1) / (expt - 1)^2
    }
    if (type == "d"){
      y[x == 0] <- 1/6
      y[x != 0] <- expt * ((t - 2) * expt + t + 2) / (expt - 1)^3
    }
    if (type == "dd"){
      y[x == 0] <- 0
      y[x != 0] <- ((3 - t) * expt^2 - 4 * t * expt - t - 3) * expt / (expt - 1)^4
    }
    return(y)
  }

  # Calculate the distribution function and density function corresponding to "link"
  func <- function(x, type){
    if (link == "logit"){
      expx = exp(x)
    }
    if (type == "p"){
      return(switch(link, "logit" = expx / (1 + expx),
                    "probit" = pnorm(x),
                    "cauchit" = 0.5 + 1 / pi * atan(x)))
    }
    if (type == "d"){
      return(switch(link, "logit" = expx / (1 + expx)^2,
                    "probit" = dnorm(x),
                    "cauchit" = 1 / (pi * (1 + x^2))))
    }
    if (type == "dd"){
      return(switch(link, "logit" = expx * (1 - expx) / (1 + expx)^3,
                    "probit" = -x * dnorm(x),
                    "cauchit" = -2 * x / (pi * (1 + x^2)^2)))
    }
  }

  # Calculate the distribution function and density function of the sum of the independent identities of the links.
  # Here, since x is an (inverse) symmetric matrix, only the lower part is computed.
  sum_func <- function(x, type){
    y = x
    if (type == "p"){
      y[upper.tri(x, diag = TRUE)] = 0.5
      y[lower.tri(x)] = switch(link, "logit" = sum_logit(x[lower.tri(x)], "p"),
                               "probit" = func(x[lower.tri(x)] / sqrt(2), "p"),
                               "cauchit" = func(x[lower.tri(x)] / 2, "p"))
    }
    if (type == "d"){
      y[upper.tri(x, diag = TRUE)] = 0
      y[lower.tri(x)] = switch(link, "logit" = sum_logit(x[lower.tri(x)], "d"),
                               "probit" = func(x[lower.tri(x)] / sqrt(2), "d") / sqrt(2),
                               "cauchit" = func(x[lower.tri(x)] / 2, "d") / 2)
    }
    if (type == "dd"){
      y[upper.tri(x, diag = TRUE)] = 0
      y[lower.tri(x)] = switch(link, "logit" = sum_logit(x[lower.tri(x)], "dd"),
                               "probit" = func(x[lower.tri(x)] / sqrt(2), "dd") / 2,
                               "cauchit" = func(x[lower.tri(x)] / 2, "dd") / 4)
    }
    return(y)
  }

  n <- nrow(data)
  n_d <- nrow(data_d)
  b <- length(vars)

  # Using the semi-parametric position model to obtain placement values
  terms = intersect(colnames(data), vars)
  if (length(terms) == 0){
    location <- as.formula(paste(response, "1", sep = " ~ "))
  }
  else {
    location <- reformulate(terms, response = response)
  }

  model <- lm(location, data)
  res <- model[["residuals"]]
  res_d <- data_d[[response]] - predict(model, data_d)
  plv <- rowSums(tcrossprod(rep(1, n_d), res) >= res_d) / n

  # Use placement value to estimate beta
  mat <- as.matrix(data_d[vars])
  U_ij <- (res_d >= tcrossprod(rep(1, n_d), res_d))

  if (b != 0){
    # score function
    score <- function(x){
      W <- as.vector(mat %*% x)
      W_ij <- W - tcrossprod(rep(1, n_d), W)

      tmp <- sum_func(W_ij, "d") / (sum_func(W_ij, "p") - 1 + U_ij)
      tmp <- t(tmp) - tmp

      y <- crossprod(rep(1, n_d), tmp %*% mat)
      return(y)
    }

    # Jacobian matrix
    jac <- function(x){
      W <- as.vector(mat %*% x)
      W_ij <- W - tcrossprod(rep(1, n_d), W)

      tmp <- sum_func(W_ij, "p")
      tmp <- (sum_func(W_ij, "dd") / (tmp - 1 + U_ij)) -
        (sum_func(W_ij, "d") / (tmp - 1 + U_ij))^2
      tmp <- tmp + t(tmp)

      jac <- crossprod(mat, diag(colSums(tmp)) %*% mat) -
        crossprod(mat, tmp %*% mat)
      return(jac)
    }

    x0 <- rep(0, b)
    result <- nleqslv(x0, fn = score, jac = jac, method = "Newton")
    beta <- result$x
    W_hat <- as.vector(mat %*% beta)
  }
  else {
    beta <- c()
    W_hat <- numeric(n_d)
  }

  # Using the estimate of beta to estimate h
  p <- sort(unique(plv))
  n_p <- length(p)

  if (n_p == 1){
    h <- c(Inf)
  }
  else {
    h <- c(numeric(n_p - 1), Inf)

    for (i in 1: (n_p - 1)){
      U_ip <- (plv <= p[i])
      fn <- function(x){
        return(sum(func(x + W_hat, "d") /
                     (func(x + W_hat, "p") - 1 + U_ip)))
      }

      # select an appropriate initial value
      h_upper <- 4.0 - max(W_hat[U_ip == 0])
      h_lower <- -4.0 - min(W_hat[U_ip == 1])
      h0 <- (h_upper + h_lower) / 2

      h[i] <- nleqslv(h0, fn)$x
    }
  }

  return(list(beta = beta, h = list(p_jump = p, h_jump = h)))
}
