## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.align = "center", fig.width = 7, fig.height = 6, cache = TRUE ) ## ----eval=FALSE--------------------------------------------------------------- # library(survival) # library(catalytic) # # set.seed(1) # # # Compute the Partial Likelihood for the Cox Proportional Hazards Model # get_cox_partial_likelihood <- function(X, # time, # status, # coefs, # entry_points) { # # Identify the risk set indices for Cox proportional hazards model # get_cox_risk_set_idx <- function(time_of_interest, # entry_vector, # time_vector, # status_vector) { # time_vector <- as.vector(time_vector) # entry_vector <- as.vector(entry_vector) # status_vector <- as.vector(status_vector) # # # Find indices where subjects are at risk at the given time of interest # return(which((time_of_interest >= entry_vector) & ( # (time_vector == time_of_interest & status_vector == 1) | ( # time_vector + 1e-08 > time_of_interest)))) # } # # X <- as.matrix(X) # time <- as.vector(time) # status <- as.vector(status) # # pl <- 0 # # # Calculate partial likelihood for each censored observation # for (i in which(status == 1)) { # risk_set_idx <- get_cox_risk_set_idx( # time_of_interest = time[i], # entry_vector = entry_points, # time_vector = time, # status_vector = status # ) # # # Compute the linear predictors for the risk set # exp_risk_lp <- exp(X[risk_set_idx, , drop = FALSE] %*% coefs) # # # Update partial likelihood # pl <- pl + sum(X[i, , drop = FALSE] * coefs) - log(sum(exp_risk_lp)) # } # # return(pl) # } # # # Load pbc dataset for Cox proportional hazards model # data("pbc") # # pbc <- pbc[stats::complete.cases(pbc), 2:20] # Remove NA values and `id` column # # pbc$trt <- ifelse(pbc$trt == 1, 1, 0) # Convert trt to binary # pbc$sex <- ifelse(pbc$sex == "f", 1, 0) # Convert sex to binary # pbc$edema2 <- ifelse(pbc$edema == 0.5, 1, 0) # Convert edema2 to binary # pbc$edema <- ifelse(pbc$edema == 1, 1, 0) # Convert edema to binary # pbc$time <- pbc$time / 365 # Convert time to years # pbc$status <- (pbc$status == 2) * 1 # Convert status to binary # # # Identify columns to be standardized # not_standarlized_index <- c(1, 2, 3, 5, 6, 7, 8, 9, 20) # # # Standardize the columns # pbc[, -not_standarlized_index] <- scale(pbc[, -not_standarlized_index]) # # # Seperate observation data into train and test data # train_n <- (ncol(pbc) - 2) * 5 # train_idx <- sample(1:nrow(pbc), train_n) # train_data <- pbc[train_idx, ] # test_data <- pbc[-train_idx, ] # # test_x <- test_data[, -which(names(test_data) %in% c("time", "status"))] # test_time <- test_data$time # test_status <- test_data$status # test_entry_points <- rep(0, nrow(test_x)) # # dim(train_data) ## ----eval=FALSE--------------------------------------------------------------- # # Calculate MPLE (Marginal Partial Likelihood Estimate) prediction score with 0 coefficients # MPLE_0 <- get_cox_partial_likelihood( # X = test_x, # time = test_time, # status = test_status, # coefs = rep(0, (ncol(test_x))), # entry_points = test_entry_points # ) # # # Fit a COX regression model # cox_model <- survival::coxph( # formula = survival::Surv(time, status) ~ ., # data = train_data # ) # # # Calculate MPLE (Marginal Partial Likelihood Estimate) prediction score of estimated coefficients minus 0 coefficients # cat( # "MLE COX Model - Predition Score:", # get_cox_partial_likelihood( # X = test_x, # time = test_time, # status = test_status, # coefs = coef(cox_model), # entry_points = test_entry_points # ) - MPLE_0 # ) ## ----eval=FALSE--------------------------------------------------------------- # cat_init <- cat_cox_initialization( # formula = survival::Surv(time, status) ~ 1, # data = train_data, # syn_size = 100, # Default: 4 times the number of predictors # hazard_constant = 1, # Default: calculated from observed data # entry_points = rep(0, nrow(train_data)), # Default: Null, which is vector of zeros # x_degree = NULL, # Default: NULL, which means degrees of all predictors are 1 # resample_only = FALSE, # Default: FALSE # na_replace = mean # Default: stats::na.omit # ) # # cat_init ## ----eval=FALSE--------------------------------------------------------------- # names(cat_init) ## ----eval=FALSE--------------------------------------------------------------- # # The number of observations (rows) in the original dataset (`obs_data`) # cat_init$obs_size # # # The information detailing the process of resampling synthetic data # cat_init$syn_x_resample_inform ## ----eval=FALSE--------------------------------------------------------------- # cat_cox_model <- cat_cox( # formula = survival::Surv(time, status) ~ ., # Same as `~ .` # cat_init = cat_init, # tau = 10, # Default: number of predictors # method = "WME", # Default: CRE # init_coefficients = NULL, # Default: all zeros # tol = 0.01, # Default: 1e-5 # max_iter = 5 # Default: 25 # ) # # cat_cox_model ## ----eval=FALSE--------------------------------------------------------------- # cat( # "Catalytic `cat_cox` - Predition Score:", # get_cox_partial_likelihood( # X = test_x, # time = test_time, # status = test_status, # coefs = coef(cat_cox_model), # entry_points = test_entry_points # ) - MPLE_0 # ) ## ----eval=FALSE--------------------------------------------------------------- # names(cat_cox_model) ## ----eval=FALSE--------------------------------------------------------------- # # The formula used for modeling # cat_cox_model$formula # # # The fitted model object obtained from `survival::coxph` with `tau` # cat_cox_model$coefficients ## ----eval=FALSE--------------------------------------------------------------- # cat_cox_tune_model <- cat_cox_tune( # formula = survival::Surv(time, status) ~ ., # Same as `~ .` # cat_init = cat_init, # method = "CRE", # Default: "CRE" # tau_seq = seq(1, 5, 0.5), # Default is a numeric sequence around the number of predictors # cross_validation_fold_num = 3, # Default: 5 # tol = 1 # Additional arguments passed to the `cat_cox` function # ) # # cat_cox_tune_model ## ----eval=FALSE--------------------------------------------------------------- # plot(cat_cox_tune_model, text_pos = 2, legend_pos = "bottomright") ## ----eval=FALSE--------------------------------------------------------------- # cat( # "Catalytic `cat_cox_tune` - Predition Score:", # get_cox_partial_likelihood( # X = test_x, # time = test_time, # status = test_status, # coefs = coef(cat_cox_tune_model), # entry_points = test_entry_points # ) - MPLE_0 # ) ## ----eval=FALSE--------------------------------------------------------------- # names(cat_cox_tune_model) ## ----eval=FALSE--------------------------------------------------------------- # # The estimated coefficients from the fitted model # cat_cox_tune_model$coefficients # # # Selected optimal tau value from `tau_seq` that maximize the likelihood # cat_cox_tune_model$tau ## ----eval = FALSE------------------------------------------------------------- # cat_cox_bayes_model <- cat_cox_bayes( # formula = survival::Surv(time, status) ~ ., # Same as `~ . # cat_init = cat_init, # tau = 1, # Default: NULL # chains = 1, # Default: 4 # iter = 100, # Default: 2000 # warmup = 50, # Default: 1000 # hazard_beta = 2 # Default: 2 # ) # # cat_cox_bayes_model ## ----eval=FALSE--------------------------------------------------------------- # traceplot(cat_cox_bayes_model) ## ----eval=FALSE--------------------------------------------------------------- # traceplot(cat_cox_bayes_model, inc_warmup = TRUE) ## ----eval = FALSE------------------------------------------------------------- # cat( # "Catalytic `cat_cox_bayes` - Predition Score:", # get_cox_partial_likelihood( # X = test_x, # time = test_time, # status = test_status, # coefs = coef(cat_cox_bayes_model), # entry_points = test_entry_points # ) - MPLE_0 # ) ## ----eval = FALSE------------------------------------------------------------- # names(cat_cox_bayes_model) ## ----eval = FALSE------------------------------------------------------------- # # The number of Markov chains used during MCMC sampling in `rstan`. # cat_cox_bayes_model$chain # # # The mean estimated coefficients from the Bayesian model # cat_cox_bayes_model$coefficients ## ----eval = FALSE------------------------------------------------------------- # cat_cox_bayes_joint_model <- cat_cox_bayes_joint( # formula = survival::Surv(time, status) ~ ., # Same as `~ . # cat_init = cat_init, # chains = 1, # Default: 4 # iter = 100, # Default: 2000 # warmup = 50, # Default: 1000 # tau_alpha = 2, # Default: 2 # tau_gamma = 1, # Default: 1 # hazard_beta = 2 # Default: 2 # ) # # cat_cox_bayes_joint_model ## ----eval = FALSE------------------------------------------------------------- # traceplot(cat_cox_bayes_joint_model) ## ----eval = FALSE------------------------------------------------------------- # traceplot(cat_cox_bayes_joint_model, inc_warmup = TRUE) ## ----eval = FALSE------------------------------------------------------------- # cat( # "Catalytic `cat_cox_bayes_joint` - Predition Score:", # get_cox_partial_likelihood( # X = test_x, # time = test_time, # status = test_status, # coefs = coef(cat_cox_bayes_joint_model), # entry_point = test_entry_points # ) - MPLE_0 # ) ## ----eval = FALSE------------------------------------------------------------- # names(cat_cox_bayes_joint_model) ## ----eval = FALSE------------------------------------------------------------- # # The estimated tau parameter from the MCMC sampling `rstan::sampling`, # cat_cox_bayes_joint_model$tau # # # The mean estimated coefficients from the Bayesian model # cat_cox_bayes_joint_model$coefficients