-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsurvivalKM.R
103 lines (79 loc) · 3.32 KB
/
survivalKM.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# script to generate survival (KM) plots using gene expression data
library(reshape2)
library(data.table)
library(survival)
library(survminer)
library(tidyverse)
library(dplyr)
# function call = returns KM plot
survivalKM(exprData = expression_data, metadata = associated_metadata, endpoint = 'overall survival', Risk = 'All', gene = 'ALK')
# function definition
survivalKM <- function(exprData, metadata, gene, endpoint, Risk) {
if(endpoint == "overall survival") {
time <- 'nti_surv_overall'
status <- 'nti_event_overall_num'
} else {
time <- 'nti_surv_progrfree'
status <- 'nti_event_progrfree_num'
}
# Step1: Prepare data ----------------------------------
# condition for specific RISK group
if (Risk != 'All') {
metadata <- metadata[metadata$RISK == Risk,]
exprData <- exprData[, which(colnames(exprData) %in% rownames(metadata))]
} else {
metadata <- metadata
exprData <- exprData
}
# subset metadata
metadata <- metadata[,c("RISK",time,status)]
# subset expr data
exprData <- exprData[gene,]
# Step2: Normalizing expression data ----------------------------------
# z-score > 0 = high expression
# z-score < 0 = low expression
exprData <- data.frame("gene.Z" = apply(X = exprData, MARGIN = 1, FUN = scale))
# adding expr data to metadata
metadata <- cbind(metadata, exprData)
metadata <- metadata %>%
gather(key = 'gene', value = 'zScore', -c("RISK",time,status))
metadata$gene <- gsub('gene.Z.','',metadata$gene)
metadata$gene <- as.factor(metadata$gene)
# assigning exprStatus
# 1 = high
# 0 = low
# 2 = mid expression
# ignore mid expressions
metadata$exprStatus <- ifelse(metadata$zScore > 0, 1,
ifelse(metadata$zScore < 0, 0, 2))
# remove the ones with mid expressions
metadata <- metadata[metadata$exprStatus != 2,]
# Step3: Fitting survival curves ----------------------------------
group = "exprStatus"
fit <- survfit(Surv(get(time), get(status)) ~ exprStatus, data = metadata)
# Step4: Comparing survival times between groups ----------------------------------
# conduct between-group significance tests using a log-rank test
# to be conducted between high-low expressing groups
diff <- survdiff(formula = Surv(get(time), get(status)) ~ exprStatus,data = metadata)
pval <- pchisq(diff$chisq, length(diff$n)-1, lower.tail = FALSE)
adjpval <- p.adjust(pval, method = "fdr", n = (fit$n[1] + fit$n[2]))
plotTitle <- paste0(paste(gene, collapse = " | "), " P-val(Adj) :", format(pval, scientific=T, digits=3), "(", format(adjpval, scientific=T, digits=3), ")")
# Step5: Generate plot ----------------------------------
# change strata names
low <- paste0("Low : n = ", fit$n[1])
high <- paste0("High : n = ", fit$n[2])
names(fit$strata) <- c(low,high)
plot <- ggsurvplot(fit,
data = metadata,
pval = TRUE,
pval.size = 5,
conf.int = TRUE,
risk.table = TRUE, # Add risk table
risk.table.col = "strata", # Change risk table color by groups
surv.median.line = "hv", # Specify median survival
ggtheme = theme_bw(),
risk.table.fontsize = 5,
palette = c("#E7B800", "#2E9FDF"),
title = plotTitle) + xlab('Survival Time')
return(plot)
} # function ends here