Skip to content

Commit 32fa30d

Browse files
committed
Some restructuring so that the simulations can be run without access to the raw data
1 parent 32106f5 commit 32fa30d

8 files changed

+449
-437
lines changed

01-setup.R

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# R Packages -------------------------------------------------------------------
2+
library("data.table")
3+
library("doParallel")
4+
library("doRNG") # Reproducible parallel foreach loops
5+
library("dplyr")
6+
library("foreach")
7+
library("glmnet")
8+
library("ggplot2")
9+
library("ggpubr")
10+
library("impute") # From bioconductor
11+
library("knitr")
12+
library("parallel")
13+
library("purrr")
14+
library("rngtools") # Required by dorng
15+
library("rsample")
16+
library("survival")
17+
library("tidyr") # (>= 1.0.0) for pivot_longer() and pivot_wider()
18+
# library("xfun") # We will use xfun::cache_rds() below, but not attach the package here
19+
library("xtable")
20+
21+
# Custom R functions -----------------------------------------------------------
22+
source("R/plot_patient_followup.R")
23+
source("R/impute_genes.R")
24+
source("R/make_xy.R")
25+
source("R/calibrate_sim.R")
26+
source("R/run_sim.R")
27+
source("R/adjust_surv.R")
28+
source("R/tidycoef.R")
29+
source("R/concordance.R")
30+
source("R/calibrate.R")
31+
32+
# Settings ---------------------------------------------------------------------
33+
set.seed(77)
34+
theme_set(theme_bw())
35+
center_title <- function() theme(plot.title = element_text(hjust = 0.5))
36+
N_SIMS <- 200
37+
38+
# Caching
39+
RERUN_CACHE <- TRUE # Set to TRUE to rerun all cached results
40+
if (!dir.exists("cache")){
41+
dir.create("cache")
42+
}
43+
44+
# Parallel
45+
PARALLEL <- TRUE
46+
if (PARALLEL) {
47+
cl <- parallel::makeCluster(20, setup_strategy = "sequential", outfile = "simout")
48+
registerDoParallel(cl)
49+
}
50+
51+
# Model formula ----------------------------------------------------------------
52+
# This formula is used for both the simulations and the real-world data application
53+
f_small <- formula(
54+
~ PracticeType + index_date_year + Race + age_at_dx + RE_TP53 +
55+
CN_TP53 + SV_TP53 + SV_KRAS + SV_EGFR
56+
)
57+
vars_small <- c("Practice type: community", "Index year", "African American",
58+
"Other race", "White", "Age", "TP53 RE",
59+
"TP53 CN", "TP53 SV", "KRAS SV", "EGFR SV")
60+
61+
# Followup plot ----------------------------------------------------------------
62+
p_followup <- plot_patient_followup()
63+
ggsave("figs/followup.pdf", p_followup, height = 5, width = 7)

02-simulation.R

+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Calibrate simulation ---------------------------------------------------------
2+
if (file.exists("sim_settings.rds")) { # Interested readers can run this
3+
sim_settings <- readRDS("sim_settings.rds")
4+
} else { # This setting is run for the paper
5+
data <- readRDS("data.rds")
6+
sim_settings <- calibrate_sim(f_small, data = data, store_x = TRUE)
7+
}
8+
9+
# Save cumulative hazard plot
10+
ggsave("figs/sim_calibration_cumhaz.pdf",
11+
sim_settings$cumhaz_plot,
12+
height = 5, width = 10)
13+
14+
# Example simulated data -------------------------------------------------------
15+
# No predictors (intercept only model)
16+
params <- set_params(sim_settings = sim_settings, dist = "weibullPH")
17+
simdata <- sim_survdata(params = params, n_pats = 10000)
18+
simdata_summary_int <- summarize_simdata(simdata, km_rwd = sim_settings$os_km$rc,
19+
save = TRUE, name = "int")
20+
21+
# Now include predictors (and hence create informative censoring when
22+
# using the Kaplan-Meier estimator). The design matrix X and parameters will
23+
# be used in the simulations that follow
24+
x_sim <- sim_x(n_pats = 5000, sim_settings, p_bin = 0)
25+
params <- set_params(x_sim, sim_settings, dist = "weibullPH")
26+
simdata <- sim_survdata(x_sim, params)
27+
simdata_summary_p11 <- summarize_simdata(simdata, km_rwd = sim_settings$os_km$rc,
28+
save = TRUE, name = "p11")
29+
30+
# Run simulation for unpenalized Cox model -------------------------------------
31+
sim_coxph_p21 <- xfun::cache_rds({
32+
run_sim(n_sims = N_SIMS, x = x_sim, params = params, method = "coxph")
33+
}, file = "sim_coxph_p21.rds", rerun = RERUN_CACHE)
34+
sim_coxph_21_summary <- summarize_sim(sim_coxph_p21, save = TRUE,
35+
model_name = "coxph_p21")
36+
rm(sim_coxph_p21)
37+
38+
# Run simulation for lasso model with lambda = 0 and small p -------------------
39+
# run_sim1(simdata, method = "coxnet", lambda = 0) # For debugging
40+
sim_coxlasso_lambda0_p21 <- xfun::cache_rds({
41+
run_sim(n_sims = N_SIMS, x = x_sim, params = params,
42+
method = "coxnet", lambda = c(1, 0))
43+
}, file = "sim_coxlasso_lambda0_p21.rds", rerun = RERUN_CACHE)
44+
coxlasso_lambda0_p21_summary <- summarize_sim(
45+
sim_coxlasso_lambda0_p21, save = TRUE,
46+
model_name = "coxlasso_lambda0_p21"
47+
)
48+
rm(sim_coxlasso_lambda0_p21)
49+
50+
# Run simulation for lasso model with small p ----------------------------------
51+
sim_coxlasso_p21 <- xfun::cache_rds({
52+
run_sim(n_sims = N_SIMS, x = x_sim, params = params,
53+
method = "coxnet")
54+
}, file = "sim_coxlasso_p21.rds", rerun = RERUN_CACHE)
55+
sim_coxlasso_p21_summary <- summarize_sim(sim_coxlasso_p21, save = TRUE,
56+
model_name = "coxlasso_p21")
57+
rm(sim_coxlasso_p21)
58+
59+
# Run simulation for ridge model with small p ----------------------------------
60+
sim_coxridge_p21 <- xfun::cache_rds({
61+
run_sim(n_sims = N_SIMS, x = x_sim, params = params,
62+
method = "coxnet", alpha = 0)
63+
}, file = "sim_coxridge_p21.rds", rerun = RERUN_CACHE)
64+
sim_coxridge_p21_summary <- summarize_sim(sim_coxridge_p21, save = TRUE,
65+
model_name = "coxridge_p21")
66+
rm(sim_coxridge_p21)
67+
68+
# Run simulation for lasso model with large p ----------------------------------
69+
x_sim <- sim_x(n_pats = 5000, sim_settings, p_bin = 1000)
70+
params <- set_params(x_sim, sim_settings, dist = "weibullPH")
71+
72+
sim_coxlasso_p1011 <- xfun::cache_rds({
73+
run_sim(n_sims = N_SIMS, x = x_sim, params = params,
74+
method = "coxnet")
75+
}, file = "sim_coxlasso_p1011.rds", rerun = RERUN_CACHE)
76+
sim_coxlasso_p1011_summary <- summarize_sim(sim_coxlasso_p1011, save = TRUE,
77+
model_name = "coxlasso_p1011")
78+
rm(sim_coxlasso_p1011)
79+
80+
# Calibration plot comparing small and large p simulation ----------------------
81+
sim_coxlasso_calplot <- ggarrange(
82+
sim_coxlasso_p21_summary$calibration_plots$complete +
83+
ggtitle("(A) Small p") + center_title(),
84+
sim_coxlasso_p1011_summary$calibration_plots$complete +
85+
ggtitle("(B) Large p") + center_title(),
86+
nrow = 2, common.legend = TRUE, legend = "bottom"
87+
)
88+
ggsave("figs/sim_coxlasso_calibration_complete.pdf", sim_coxlasso_calplot,
89+
width = 7, height = 9)

0 commit comments

Comments
 (0)