library(nanostringr)
library(survival)
library(survminer)
## Loading required package: ggplot2
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
library(ggplot2)
library(ggpubr)
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.14.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(cluster)
library(readxl)
library(MCPcounter)
## Loading required package: curl
## Using libcurl 8.7.1 with LibreSSL/3.3.6
rcc_dir <- list.dirs("~/Dog/Nanostring/", recursive = FALSE)
raw_expr <- lapply(rcc_dir, function(x) read_rcc(path = x)$raw)
sfs <- list()
sfs2 <- list()
i = 1
norm_expr <- lapply(raw_expr, function(x) {
#positive control normalization
gm <- exp(colMeans(log(x[x$Code.Class == "Positive",4:15]), na.rm = T))
am <- mean(gm, na.rm = T)
sf <- sapply(gm, function(y) am/y)
sfs[[i]] <<- sf
x1 <- as.matrix(x[,4:15])
x1 <- sapply(1:ncol(x1), function(i) sf[i] * as.numeric(x1[,i]))
#House keeping gene normalization
gm <- exp(colMeans(log(x[x$Code.Class == "Housekeeping", 4:15]), na.rm = T))
am <- mean(gm, na.rm = T)
sf <- sapply(gm, function(y) am/y)
sfs2[[i]] <<- sf
x2 <- sapply(1:ncol(x1),function(i) sf[i]*as.numeric(x1[,i]))
rownames(x2) <- x$Name
colnames(x2) <- colnames(x)[4:ncol(x)]
x2 <- x2[x$Code.Class == "Endogenous",]
x2 <- x2[,!is.na(sf) & !is.infinite(sf)]
i <<- i+1
return(x2)
})
combined_expr <- Reduce(cbind, norm_expr)
rownames(combined_expr) <- rownames(norm_expr[[1]])
samp_names <- sapply(colnames(combined_expr), function(x) {
splitstr = strsplit(x,split = "_")[[1]]
return(splitstr[3])
})
samp_names <- sapply(samp_names, trimws)
colnames(combined_expr) <- samp_names
combined_expr <- combined_expr[,!grepl("0814 RNAlater tumor", samp_names)]
nanostring_annotations <- read_excel("~/Dog/Nanostring/CS032282 and CS9318963 Nanostring 07-29-2022.xlsx")
nanostring_annotations <- as.data.frame(nanostring_annotations)
rownames(nanostring_annotations) <- nanostring_annotations$`Nanostring Sample Name`
common <- intersect(rownames(nanostring_annotations), colnames(combined_expr))
CE <- combined_expr[,common]
NaAn <- nanostring_annotations[common,]
library(readr)
##
## Attaching package: 'readr'
## The following object is masked from 'package:curl':
##
## parse_date
Canine_RNA_SEQ_TPM_All_data <- read_csv("~/Downloads/Canine_RNA_SEQ_TPM_All_data.csv")
## Rows: 37952 Columns: 199
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): gene_id
## dbl (198): 100_0514_tumor_rsem.genes.results, 101_0518_tumor_rsem.genes.resu...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Canine_RNA_SEQ_TPM_All_data <- as.data.frame(Canine_RNA_SEQ_TPM_All_data)
rownames(Canine_RNA_SEQ_TPM_All_data) <- Canine_RNA_SEQ_TPM_All_data$gene_id
Canine_RNA_SEQ_TPM_All_data <- Canine_RNA_SEQ_TPM_All_data[,-1]
sample_type <- c("Met","Primary")[as.numeric(grepl("(Tumor)|(tumor)", colnames(Canine_RNA_SEQ_TPM_All_data)))+1]
colnames(Canine_RNA_SEQ_TPM_All_data) <- sapply(colnames(Canine_RNA_SEQ_TPM_All_data), function(x) as.character(as.numeric(strsplit(x,split = '_')[[1]][2])))
rownames(Canine_RNA_SEQ_TPM_All_data) <- sapply(rownames(Canine_RNA_SEQ_TPM_All_data), function(x) gsub("_3","",x))
DOG2_RNAseq_Sample_list <- read_excel("~/Downloads/DOG2 RNAseq Sample list.xlsx")
DOG2_RNAseq_Sample_list$`Sample ID`[7] <- "18_0620_Met_Skin"
DOG2_RNAseq_Sample_list$`Sample ID`[8] <- "20_0621_Met_Unspecified"
sites_RNASeq <- rep("Primary", ncol(Canine_RNA_SEQ_TPM_All_data))
sites_RNASeq[sample_type == "Met"] <- sapply(DOG2_RNAseq_Sample_list$`Sample ID`[1:8], function(x) toupper(strsplit(x,split = "_")[[1]][4]))
helper functions to measure expression of distinct pathway signatures: (eg chromosomal instability, cGAS-STING, type1 interferon, etc)
library(msigdbr)
msigdbr_df <- msigdbr(species = "Homo sapiens", category = "H")
msigdbr_list = split(x = msigdbr_df$gene_symbol, f = msigdbr_df$gs_name)
CIN_signature <- function(expr){
genes <- c("CCNB2","PRC1","CDC45L","KIF20A","UBE2C","CDC2","MAD2L1","H2AFZ","TRIP13","ESPL1","TTK","FEN1","MCM7","CNAP1","MELK","TPX2","FOXM1","TOP2A","MCM2","RAD51AP1","RFC4","PCNA","RNASEH2A","TGIF2","CCT5")
mat <- as.data.frame(expr)
mat <- mat[genes,]
mat <- apply(as.matrix(mat),1,scale)
scores <- rowMeans(mat, na.rm = T)
return(scores)
}
CYT_signature <- function(expr){
genes <- c("GZMA","PRF1")
mat <- as.data.frame(expr)
mat <- log2(mat[genes,] +1)
scores <- colMeans(mat, na.rm = T)
return(scores)
}
type1_interferon_signature <- function(expr) {
genes <- msigdbr_list$HALLMARK_INTERFERON_ALPHA_RESPONSE
mat <- as.data.frame(expr)
mat <- mat[genes,]
mat <- apply(as.matrix(mat),1,scale)
scores <- rowMeans(mat, na.rm = T)
return(scores)
}
cGAS_STING_signature <- function(expr){
genes = c("CGAS","STING1","TBK1","IKBKE","IRF3")
mat <- as.data.frame(expr)
mat <- mat[genes,]
mat <- apply(as.matrix(mat),1,scale)
scores <- rowMeans(mat, na.rm = T)
return(scores)
}
library(RColorBrewer)
set.seed(2425)
canin_exp <- Canine_RNA_SEQ_TPM_All_data
cellprop_rnaseq<- t(MCPcounter.estimate(expression = log2(canin_exp+1), featuresType = "HUGO_symbols", probesets = read.table("~/Dog/MCP_counter_markers.csv")))
cellprop_rnaseq <- apply(cellprop_rnaseq, 2, scale)
rownames(cellprop_rnaseq) <- colnames(canin_exp)
#k means clustering
km.res <- kmeans(cellprop_rnaseq, centers = 3, nstart = 500, iter.max = 500)
cls <- km.res$cluster
imm_clus <- rep("ID", nrow(cellprop_rnaseq))
imm_clus[cls == 1] <- "IE-ECM"
imm_clus[cls == 2] <- "IE"
ii1 <- imm_clus
imm_clus <- factor(imm_clus, levels = c("IE","IE-ECM","ID"))
#Plot results
ha = rowAnnotation(
sample = sample_type,
preservation = rep("Frozen", length(sample_type)),
col = list("sample" = c("Primary" = "yellow", "Met" = "darkgreen"),
"preservation" = c("Frozen" = "#db6d00", "FFPE" = "#924900")
)
)
n <- 45
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
cols <- sample(col_vector, n, replace = FALSE)
cols <- sample(cols, length(unique(sites_RNASeq)), replace = F)
names(cols) <- unique(sites_RNASeq)
cols["Primary"] <- "white"
ha2 = rowAnnotation(
site = sites_RNASeq,
col = list("site"=cols)
)
Heatmap(cellprop_rnaseq, row_split = imm_clus, cluster_columns = F, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38","#619CFF","#F8766D")), row_names_gp = gpar(cex = 0.5), left_annotation = ha, right_annotation = ha2)
p1 <- Heatmap(cellprop_rnaseq, row_split = imm_clus, cluster_columns = F, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38","#619CFF","#F8766D")), row_names_gp = gpar(cex = 0.5), left_annotation = ha)
#pdf('~/Dog/TME_manuscript1.pdf')
#Heatmap(cellprop_rnaseq, row_split = imm_clus, cluster_columns = F, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")), row_names_gp = gpar(cex = 0.5), left_annotation = ha)
#dev.off()
names(imm_clus) <- rownames(cellprop_rnaseq)
names(ii1) <- rownames(cellprop_rnaseq)
cyt_scores <- CYT_signature(canin_exp)
names(cyt_scores) <- rownames(cellprop_rnaseq)
cell_props_dat <- as.data.frame(cellprop_rnaseq)
cell_props_dat$imm_clus <- imm_clus
Heatmap(cellprop_rnaseq, row_split = imm_clus, cluster_columns = F, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38","#619CFF","#F8766D")), row_names_gp = gpar(cex = 0.5), left_annotation = ha, right_annotation = ha2)
#pdf('~/Dog/MCP_marker_genes_expression.pdf',width = 18,height = 9)
#Heatmap(cellprop_rnaseq, row_split = imm_clus, cluster_columns = F, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38","#619CFF","#F8766D")), row_names_gp = gpar(cex = 0.5), left_annotation = ha, right_annotation = ha2)
#dev.off()
p1 <- ggplot(data = data.frame(subtype = imm_clus, CIN_score = CIN_signature(log2(canin_exp+1))), aes(x = subtype, y=CIN_score)) + geom_boxplot() + theme_bw() + theme(text = element_text(size = 15), axis.text.x = element_text(size = 15)) + stat_compare_means(comparisons = list(c("IE","IE-ECM"),c("IE-ECM","ID"),c("IE","ID")), method = "wilcox", label = "p.signif")
#+ stat_compare_means(method = "anova", label = "p.format")
print(p1)
#ggsave(plot = p1, filename = "~/Dog/CIN_subtypes.pdf", width = 7, height = 6)
p2 <- ggplot(data = data.frame(subtype = imm_clus, CGAS_STING_score = cGAS_STING_signature(canin_exp)), aes(x = subtype, y=CGAS_STING_score)) + geom_boxplot() + theme_bw() + theme(text = element_text(size = 15), axis.text.x = element_text(size = 15)) + stat_compare_means(comparisons = list(c("IE","IE-ECM"),c("IE-ECM","ID"),c("IE","ID")), method = "wilcox", label = "p.signif")
print(p2)
p2 <- ggplot(data = data.frame(subtype = imm_clus, IN_score = type1_interferon_signature(log2(canin_exp+1))), aes(x = subtype,y=IN_score)) + geom_boxplot() +theme_bw() + stat_compare_means(method = "anova")
print(p2)
#ggsave(plot = p2, filename = "~/Dog/IN_caines.pdf", width = 4, height = 4)
p2 <- ggplot(data = data.frame(subtype = imm_clus, CYT_score = cyt_scores), aes(x = subtype,y=CYT_score)) + geom_boxplot() +theme_bw() + stat_compare_means(method = "anova") + ylab("Cytotoxicity scores")
print(p2)
#ggsave(plot = p2, filename = "~/Dog/CYT_caines.pdf", width = 4, height = 4)
Assessing reproducibility of TME clusters via MCP counter analysis using canine single cell RNA seq cell type signatures (Ammons et al, Communications Biology, 2024)
library(ConsensusClusterPlus)
set.seed(2425)
Ammons_signatures <- read_excel("~/Dog/Ammons_signatures.xlsx")
genes <- c()
cell_types <- c()
cell_subtypes <- c()
for(i in 1:nrow(Ammons_signatures)){
genes <- c(genes, strsplit(Ammons_signatures$Markers[i],split = ", ")[[1]])
cell_types <- c(cell_types, rep(Ammons_signatures$`Cell type`[i], length(strsplit(Ammons_signatures$Markers[i],split = ", ")[[1]])))
cell_subtypes <- c(cell_subtypes, rep(Ammons_signatures$`Cell type detailed`[i], length(strsplit(Ammons_signatures$Markers[i],split = ", ")[[1]])))
}
markers_ammons <- data.frame(HUGO_symbols = genes, Cell_population = cell_subtypes)
colnames(markers_ammons) <- c("HUGO symbols","Cell population")
cellprop_rnaseq2<- t(MCPcounter.estimate(expression = log2(canin_exp+1), featuresType = "HUGO_symbols", genes = markers_ammons))
cellprop_rnaseq2 <- apply(cellprop_rnaseq2, 2, scale)
rownames(cellprop_rnaseq2) <- colnames(canin_exp)
res = ConsensusClusterPlus(d = t(cellprop_rnaseq2), distance = "euclidean", clusterAlg = "km", pItem = 0.8, pFeature = 1, seed = 2425,maxK = 10, reps = 100,plot = "pdf",title = "ConsensusAmmonsRNASeq")
## end fraction
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
ha = rowAnnotation(
sample = sample_type,
col = list("sample" = c("Primary" = "yellow", "Met" = "darkgreen")
)
)
imm_clus_ammons <- rep("ID", nrow(cellprop_rnaseq2))
imm_clus_ammons[as.character(res[[3]]$consensusClass) == "3"] <- "IE"
imm_clus_ammons[as.character(res[[3]]$consensusClass) == "2"] <- "IE-ECM"
names(imm_clus_ammons) <- rownames(cellprop_rnaseq2)
Heatmap(cellprop_rnaseq2, row_split = factor(imm_clus_ammons, levels = c("IE","IE-ECM","ID")), cluster_columns = T, cluster_row_slices = FALSE, row_gap = unit(3, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")), row_names_gp = gpar(cex = 0.5), left_annotation = ha)
rownames(cellprop_rnaseq2) <- NULL
pdf('~/Dog/TME_manuscript_ammons.pdf', width = "13",height = "11")
## Warning: 'mode(width)' and 'mode(height)' differ between new and previous
## ==> NOT changing 'width' & 'height'
Heatmap(cellprop_rnaseq2, row_split = factor(imm_clus_ammons, levels = c("IE","IE-ECM","ID")), cluster_columns = T, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")), row_names_gp = gpar(cex = 0.5), left_annotation = ha)
dev.off()
## quartz_off_screen
## 2
chisq.test(table(factor(imm_clus,levels = c("IE","IE-ECM","ID")), factor(imm_clus_ammons, levels = c("IE","IE-ECM","ID"))))
## Warning in chisq.test(table(factor(imm_clus, levels = c("IE", "IE-ECM", :
## Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: table(factor(imm_clus, levels = c("IE", "IE-ECM", "ID")), factor(imm_clus_ammons, levels = c("IE", "IE-ECM", "ID")))
## X-squared = 105.4, df = 4, p-value < 2.2e-16
library(readr)
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
trichrome_quant_updated <- read_csv("~/Dog/trichrome_MLH_quant_updated4.csv")
## Rows: 113 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Image, Object ID, Object type, Classification, Parent, ROI, ROI ann...
## dbl (9): Centroid X µm, Centroid Y µm, trichrome_threholder_hires: Tumor %, ...
## lgl (1): Name
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
osteoid_quant_updated <- read_csv("~/Dog/osteoid_quant_MLH.csv")
## Rows: 113 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Image Tag, Algorithm Name, Algorithm Name.1, Analysis Region, Analy...
## dbl (4): QC Slide V2: % Artifact Area, Tumor: % Osteoid, Tumor: % Osteoid.1,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Filtering out poor quality cases post manual review
osteoid_quant_updated <- osteoid_quant_updated[!(osteoid_quant_updated$`Image Tag` %in% c("2023-09-16-03-34-18.ndpi",
"2023-09-20-13-12-21.ndpi",
"2023-09-23-17-29-02.ndpi",
"2023-09-26-23-32-10.ndpi",
"2023-09-27-12-06-34.ndpi",
"2023-09-27-17-26-47.ndpi")),]
tcell_quant_updated <- read_csv("~/Dog/tcell_quant_MLH.csv")
## Rows: 112 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Image, Name, Class, Parent, ROI, ROI annotations completed
## dbl (10): Centroid X µm, Centroid Y µm, Num Detections, Num Negative, Num Po...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Filtering out poor quality cases post manual review
tcell_quant_updated <- tcell_quant_updated[!(grepl('bad section', tcell_quant_updated$`ROI annotations completed`)),]
macrophage_quant_updated = read_csv("~/Dog/macrophage_quant_MLH.csv")
## Rows: 111 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Image, Name, Class, Parent, ROI, ROI annotations completed
## dbl (10): Centroid X µm, Centroid Y µm, Num Detections, Num Negative, Num Po...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Filtering out poor quality cases post manual review
macrophage_quant_updated <- macrophage_quant_updated[!(grepl('bad section', macrophage_quant_updated$`ROI annotations completed`)) & !(macrophage_quant_updated$Image == "2023-09-22-11-22-33.ndpi"),]
mac_density = data.frame(id = macrophage_quant_updated$`Subject Code`, density = macrophage_quant_updated$`Positive %`)
mac_density = mac_density %>% group_by(id) %>% summarize(density = mean(density, na.rm=TRUE))
tcell_density = data.frame(id = tcell_quant_updated$`Subject Code`, density = tcell_quant_updated$`Positive %`)
tcell_density = tcell_density %>% group_by(id) %>% summarize(density = mean(density, na.rm=TRUE))
tri_density = data.frame(id = trichrome_quant_updated$`Subject Code`, area = trichrome_quant_updated$`trichrome_threholder_hires: Collagen %`)
tri_density = tri_density %>% group_by(id) %>% summarize(area = mean(area, na.rm=TRUE))
osteoid_density = data.frame(id = osteoid_quant_updated$`Subject Code`, area = osteoid_quant_updated$`Tumor: % Osteoid`)
osteoid_density = osteoid_density %>% group_by(id) %>% summarize(area = mean(area, na.rm=TRUE))
tcell_density$subtype = ii1[as.character(tcell_density$id)]
tcell_density <- tcell_density[!is.na(tcell_density$subtype),]
tcell_density$subtype <- factor(tcell_density$subtype, levels = c("ID","IE-ECM","IE"))
mac_density$subtype = ii1[as.character(mac_density$id)]
mac_density <- mac_density[!is.na(mac_density$subtype),]
mac_density$subtype <- factor(mac_density$subtype, levels = c("ID","IE-ECM","IE"))
tri_density$subtype = ii1[as.character(tri_density$id)]
tri_density <- tri_density[!is.na(tri_density$subtype),]
tri_density$subtype <- factor(tri_density$subtype, levels = c("ID","IE-ECM","IE"))
osteoid_density$subtype = ii1[as.character(osteoid_density$id)]
osteoid_density <- osteoid_density[!is.na(osteoid_density$subtype),]
osteoid_density$subtype <- factor(osteoid_density$subtype, levels = c("ID","IE-ECM","IE"))
tcell_density$exp <- cellprop_rnaseq[as.character(tcell_density$id),"T cells"]
mac_density$exp <- cellprop_rnaseq[as.character(mac_density$id),"Monocytic lineage"]
tri_density$exp <- cellprop_rnaseq[as.character(tri_density$id),"Fibroblasts"]
osteoid_density$exp <- cellprop_rnaseq[as.character(osteoid_density$id),"Fibroblasts"]
p1 <- ggplot(data = tcell_density, aes(x=subtype, y = density)) + geom_boxplot() + stat_compare_means(comparisons = list(c("IE","IE-ECM"),c("IE-ECM","ID"),c("IE","ID")), method = "wilcox", label = "p.format") + theme_minimal() + ylab("%Positive") + xlab("Subtype") + theme(text = element_text(size = 15)) + labs(title = "CD3 IHC (T cells)")
p2 <- ggplot(data = mac_density, aes(x=subtype, y = density)) + geom_boxplot() + stat_compare_means(comparisons = list(c("IE","IE-ECM"),c("IE-ECM","ID"),c("IE","ID")), method = "wilcox", label = "p.format") + theme_minimal() + ylab("%Positive") + xlab("Subtype") + theme(text = element_text(size = 15)) + labs(title = "CD204 IHC (Macrophage)")
p3 <- ggplot(data = tri_density, aes(x=subtype, y = area)) + geom_boxplot() + stat_compare_means(comparisons = list(c("IE","IE-ECM"),c("IE-ECM","ID"),c("IE","ID")), method = "wilcox", label = "p.format") + theme_minimal() + ylab("%Collagen") + xlab("Subtype") + theme(text = element_text(size = 15)) + labs(title = "Trichrome (Collagen)")
p33 <- ggplot(data = osteoid_density, aes(x=subtype, y = area)) + geom_boxplot() + stat_compare_means(comparisons = list(c("IE","IE-ECM"),c("IE-ECM","ID"),c("IE","ID")), method = "wilcox", label = "p.format") + theme_minimal() + ylab("%Osteoid") + xlab("Subtype") + theme(text = element_text(size = 15)) + labs(title = "Osteoid")
p6 <- ggplot(data = tri_density, aes(x = exp, y= area)) + geom_point()+ geom_smooth(method = "lm", se = FALSE) + theme_minimal() + geom_text(x = 1, y = 1,
label = paste0('r = ', round(cor(tri_density$area, tri_density$exp, method = c("pearson")), 4)),
color = 'red') + ylab("%Collagen") + xlab("MCP counter score (Stroma)") + theme(text = element_text(size = 15)) + labs(title = "Trichrome (Collagen)")
p5 <- ggplot(data = mac_density, aes(x = exp, y= density)) + geom_point()+ geom_smooth(method = "lm", se = FALSE) + theme_minimal() + geom_text(x = 1, y = 1,
label = paste0('r = ', round(cor(mac_density$density, mac_density$exp, method = c("pearson")), 4)),
color = 'red') + ylab("%Positive") + xlab("MCP counter score (Monocytic lineage)") + theme(text = element_text(size = 15)) + labs(title = "CD204 IHC (Macrophage)")
p4 <- ggplot(data = tcell_density, aes(x = exp, y= density)) + geom_point()+ geom_smooth(method = "lm", se = FALSE) + theme_minimal() + geom_text(x = 1, y = 1,
label = paste0('r = ', round(cor(tcell_density$density, tcell_density$exp, method = c("pearson")), 4)),
color = 'red') + ylab("%Positive") + xlab("MCP counter score (T cells)") + theme(text = element_text(size = 15)) + labs(title = "CD3 IHC (T cells)")
p66 <- ggplot(data = osteoid_density, aes(x = exp, y= area)) + geom_point()+ geom_smooth(method = "lm", se = FALSE) + theme_minimal() + geom_text(x = 1, y = 1,
label = paste0('r = ', round(cor(osteoid_density$area, osteoid_density$exp, method = c("pearson")), 4)),
color = 'red') + ylab("%Osteoid") + xlab("MCP counter score (Stroma)") + theme(text = element_text(size = 15)) + labs(title = "Osteoid")
fig1 <- ggarrange(p1,p2,p3,p33,p4,p5,p6,p66, nrow = 2, ncol = 4, labels = c("A","B","C","D","E","F","G","H"))
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
print(fig1)
########################################################################################################################
mac_density = data.frame(id = macrophage_quant_updated$`Subject Code`, density = macrophage_quant_updated$`Num Positive per mm^2`)
mac_density = mac_density %>% group_by(id) %>% summarize(density = mean(density, na.rm=TRUE))
tcell_density = data.frame(id = tcell_quant_updated$`Subject Code`, density = tcell_quant_updated$`Num Positive per mm^2`)
tcell_density = tcell_density %>% group_by(id) %>% summarize(density = mean(density, na.rm=TRUE))
tri_density = data.frame(id = trichrome_quant_updated$`Subject Code`, area = trichrome_quant_updated$`trichrome_threholder_hires: Collagen %`)
tri_density = tri_density %>% group_by(id) %>% summarize(area = mean(area, na.rm=TRUE))
osteoid_density = data.frame(id = osteoid_quant_updated$`Subject Code`, area = osteoid_quant_updated$`Tumor: % Osteoid`)
osteoid_density = osteoid_density %>% group_by(id) %>% summarize(area = mean(area, na.rm=TRUE))
tcell_density$subtype = ii1[as.character(tcell_density$id)]
tcell_density <- tcell_density[!is.na(tcell_density$subtype),]
tcell_density$subtype <- factor(tcell_density$subtype, levels = c("ID","IE-ECM","IE"))
mac_density$subtype = ii1[as.character(mac_density$id)]
mac_density <- mac_density[!is.na(mac_density$subtype),]
mac_density$subtype <- factor(mac_density$subtype, levels = c("ID","IE-ECM","IE"))
tri_density$subtype = ii1[as.character(tri_density$id)]
tri_density <- tri_density[!is.na(tri_density$subtype),]
tri_density$subtype <- factor(tri_density$subtype, levels = c("ID","IE-ECM","IE"))
osteoid_density$subtype = ii1[as.character(osteoid_density$id)]
osteoid_density <- osteoid_density[!is.na(osteoid_density$subtype),]
osteoid_density$subtype <- factor(osteoid_density$subtype, levels = c("ID","IE-ECM","IE"))
tcell_density$exp <- cellprop_rnaseq[as.character(tcell_density$id),"T cells"]
mac_density$exp <- cellprop_rnaseq[as.character(mac_density$id),"Monocytic lineage"]
tri_density$exp <- cellprop_rnaseq[as.character(tri_density$id),"Fibroblasts"]
osteoid_density$exp <- cellprop_rnaseq[as.character(osteoid_density$id),"Fibroblasts"]
p1 <- ggplot(data = tcell_density, aes(x=subtype, y = density)) + geom_boxplot() + stat_compare_means(method = "anova") + theme_minimal() + ylab("Density") + xlab("Subtype") + theme(text = element_text(size = 15)) + labs(title = "CD3 IHC (T cells)")
p2 <- ggplot(data = mac_density, aes(x=subtype, y = density)) + geom_boxplot() + stat_compare_means(method = "anova") + theme_minimal() + ylab("Density") + xlab("Suntype") + theme(text = element_text(size = 15)) + labs(title = "CD204 IHC (Macrophage)")
p3 <- ggplot(data = tri_density, aes(x=subtype, y = area)) + geom_boxplot() + stat_compare_means(method = "anova") + theme_minimal() + ylab("%Collagen") + xlab("Subtype") + theme(text = element_text(size = 15)) + labs(title = "Trichrome (Collagen)")
p33 <- ggplot(data = osteoid_density, aes(x=subtype, y = area)) + geom_boxplot() + stat_compare_means(method = "anova") + theme_minimal() + ylab("%Osteoid") + xlab("Subtype") + theme(text = element_text(size = 15)) + labs(title = "Osteoid")
p6 <- ggplot(data = tri_density, aes(x = exp, y= area)) + geom_point()+ geom_smooth(method = "lm", se = FALSE) + theme_minimal() + geom_text(x = 1, y = 10,
label = paste0('Spearman rho = ', round(cor(tri_density$area, tri_density$exp, method = c("spearman")), 4)),
color = 'red') + ylab("%Collagen") + xlab("MCP counter score (Stroma)") + theme(text = element_text(size = 15)) + labs(title = "Trichrome (Collagen)")
p5 <- ggplot(data = mac_density, aes(x = exp, y= density)) + geom_point()+ geom_smooth(method = "lm", se = FALSE) + theme_minimal() + geom_text(x = 1, y = 1,
label = paste0('Spearman rho = ', round(cor(mac_density$density, mac_density$exp, method = c("spearman")), 4)),
color = 'red') + ylab("Density") + xlab("MCP counter score (Monocytic lineage)") + theme(text = element_text(size = 15)) + labs(title = "CD204 IHC (Macrophage)")
p4 <- ggplot(data = tcell_density, aes(x = exp, y= density)) + geom_point()+ geom_smooth(method = "lm", se = FALSE) + theme_minimal() + geom_text(x = 1, y = 1,
label = paste0('Spearman rho = ', round(cor(tcell_density$density, tcell_density$exp, method = c("spearman")), 4)),
color = 'red') + ylab("Density") + xlab("MCP counter score (T cells)") + theme(text = element_text(size = 15)) + labs(title = "CD3 IHC (T cells)")
p66 <- ggplot(data = osteoid_density, aes(x = exp, y= area)) + geom_point()+ geom_smooth(method = "lm", se = FALSE) + theme_minimal() + geom_text(x = 1, y = 1,
label = paste0('Spearman rho = ', round(cor(osteoid_density$area, osteoid_density$exp, method = c("spearman")), 4)),
color = 'red') + ylab("%Osteoid") + xlab("MCP counter score (Stroma)") + theme(text = element_text(size = 15)) + labs(title = "Osteoid")
fig2 <- ggarrange(p1,p2,p3,p33,p4,p5,p6,p66, nrow = 2, ncol = 4, labels = c("A","B","C","D","E","F","G","H"))
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
print(fig2)
p1 <- ggplot(data = tcell_density, aes(x=subtype, y = density)) + geom_boxplot() + stat_compare_means(method = "anova") + theme_minimal() + ylab("Density") + xlab("Subtype") + theme(text = element_text(size = 15)) + labs(title = "CD3 IHC (T cells)") + scale_y_continuous(limits = c(0,4500))
p2 <- ggplot(data = mac_density[mac_density$density < 6000,], aes(x=subtype, y = density)) + geom_boxplot() + stat_compare_means(method = "anova") + theme_minimal() + ylab("Density") + xlab("Subtype") + theme(text = element_text(size = 15)) + labs(title = "CD204 IHC (Macrophage)") + scale_y_continuous(limits = c(0,4500))
p3 <- ggplot(data = tri_density, aes(x=subtype, y = area)) + geom_boxplot() + stat_compare_means(method = "anova") + theme_minimal() + ylab("%Collagen") + xlab("Subtype") + theme(text = element_text(size = 15)) + labs(title = "Trichrome (Collagen)")
p33 <- ggplot(data = osteoid_density, aes(x=subtype, y = area)) + geom_boxplot() + stat_compare_means(method = "anova") + theme_minimal() + ylab("%Osteoid") + xlab("Subtype") + theme(text = element_text(size = 15)) + labs(title = "Osteoid")
fig3 <- ggarrange(p1,p2,p3,p33, nrow = 2, ncol = 2, labels = c("A","B","C","D"))
print(fig3)
#ggsave(plot = fig3, filename = "~/Dog/Final_IHC_validation_figure_panels.pdf", width = 6, height = 6)
library(reshape)
##
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
##
## rename
out <- t(log2(canin_exp[intersect(c("PDCD1","CD274","CTLA4","TIGIT","LAG3","HAVCR2"), rownames(canin_exp)),]+1))
out <- data.frame(apply(out,2,scale), subtype = imm_clus)
dd_out <- melt(out, id.vars = "subtype")
colnames(dd_out) <- c("subtype","ICI_target","expression")
dd_out$ICI_target <- as.character(dd_out$ICI_target)
dd_out$subtype <- factor(dd_out$subtype, levels = c("IE","IE-ECM","ID"))
dd_out$ICI_target[dd_out$ICI_target == "PDCD1"] = "PD1"
dd_out$ICI_target[dd_out$ICI_target == "CD274"] = "PDL1"
#plot results
p1 <- ggboxplot(dd_out, x = "subtype", y="expression", facet.by = "ICI_target") + theme_bw() + theme(text = element_text(size = 15)) + stat_compare_means(method = "anova")+ xlab("") + ylab("expression (normalized)")
print(p1)
#ggsave(plot = p1, filename = "~/Dog/canine_immune_checkpoints.pdf")
library(xCell)
out <- t(xCellAnalysis(canin_exp))
## [1] "Num. of genes: 9787"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Setting parallel calculations through a MulticoreParam back-end
## with workers=4 and tasks=100.
## Estimating ssGSEA scores for 489 gene sets.
## [1] "Calculating ranks..."
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## [1] "Calculating absolute values from ranks..."
## | | | 0% | |= | 1% | |= | 2% | |== | 3% | |=== | 4% | |==== | 5% | |==== | 6% | |===== | 7% | |====== | 8% | |====== | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 13% | |========== | 14% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============= | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |================ | 22% | |================ | 23% | |================= | 24% | |================== | 25% | |================== | 26% | |=================== | 27% | |==================== | 28% | |===================== | 29% | |===================== | 30% | |====================== | 31% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |=========================== | 38% | |============================ | 39% | |============================ | 40% | |============================= | 41% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |================================ | 45% | |================================= | 46% | |================================= | 47% | |================================== | 48% | |=================================== | 49% | |=================================== | 51% | |==================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
res = ConsensusClusterPlus(d = t(apply(out,2,scale)), distance = "euclidean", clusterAlg = "km", pItem = 0.8, pFeature = 1, seed = 2425,maxK = 10, reps = 100, plot = "pdf",title = "ConsensusXCellRNASeq")
## end fraction
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
ha = rowAnnotation(
sample = sample_type,
col = list("sample" = c("Primary" = "yellow", "Met" = "darkgreen")
)
)
#plot results
Heatmap(apply(out,2,scale), row_split = as.character(res[[3]]$consensusClass), cluster_columns = T, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")), row_names_gp = gpar(cex = 0.5), left_annotation = ha)
## The automatically generated colors map from the minus and plus 99^th of
## the absolute values in the matrix. There are outliers in the matrix
## whose patterns might be hidden by this color mapping. You can manually
## set the color to `col` argument.
##
## Use `suppressMessages()` to turn off this message.
#pdf('~/Dog/TME_manuscript3.pdf', width = "13",height = "11")
#Heatmap(apply(out,2,scale), row_split = imm_clus, cluster_columns = T, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")), row_names_gp = gpar(cex = 0.5), left_annotation = ha)
#dev.off()
set.seed(24524)
library(RColorBrewer)
cellprop_nano <- t(MCPcounter.estimate(expression = log2(cbind(CE[,NaAn$`Sample Type` %in% c("Primary Tumor")],CE[,NaAn$`Sample Type` %in% c("Met")])+1), featuresType = "HUGO_symbols", probesets = read.table("~/Dog/MCP_counter_markers.csv")))
#expression of immune checkpoint genes in nanostring
chkpt_nano <- cbind(CE[,NaAn$`Sample Type` %in% c("Primary Tumor")],CE[,NaAn$`Sample Type` %in% c("Met")])
chkpt_nano <- t(chkpt_nano[intersect(rownames(chkpt_nano), c("PDCD1","CD274","CTLA4","LAG3","HAVCR2","TIGIT")),])
nanostring_annotations <- rbind(NaAn[NaAn$`Sample Type` %in% c("Primary Tumor"),], NaAn[NaAn$`Sample Type` %in% c("Met"),])
sample_preservation = c("Frozen","FFPE")[as.numeric(grepl("FFPE",nanostring_annotations$`Name, detailed`))+1]
#perform sample preservation-specific normalization to mitigate batch effects across different sample preservation techniques
cellprop_nano_ffpe <- apply(cellprop_nano[sample_preservation == "FFPE",],2,scale)
chkpt_nano_ffpe <- apply(chkpt_nano[sample_preservation == "FFPE",],2,scale)
km.res <- kmeans(cellprop_nano_ffpe, centers = 3, nstart = 100, iter.max = 500)
imm_clus_nano_ffpe <- rep("ID", nrow(cellprop_nano_ffpe))
imm_clus_nano_ffpe[km.res$cluster == 2] <- "IE"
imm_clus_nano_ffpe[km.res$cluster == 1] <- "IE-ECM"
Heatmap(cellprop_nano_ffpe, row_split = imm_clus_nano_ffpe, cluster_column_slices = F, cluster_columns = F)
cellprop_nano_frozen <- apply(cellprop_nano[sample_preservation == "Frozen",],2,scale)
chkpt_nano_frozen <- apply(chkpt_nano[sample_preservation == "Frozen",],2,scale)
km.res <- kmeans(cellprop_nano_frozen, centers = 3, nstart = 100, iter.max = 500)
imm_clus_nano_frozen <- rep("ID", nrow(cellprop_nano_frozen))
imm_clus_nano_frozen[km.res$cluster == 3] <- "IE"
imm_clus_nano_frozen[km.res$cluster == 2] <- "IE-ECM"
Heatmap(cellprop_nano_frozen, row_split = imm_clus_nano_frozen, cluster_column_slices = F, cluster_columns = F)
#merge FFPe and frozen sample results
cellprop_nano <- rbind(cellprop_nano_ffpe, cellprop_nano_frozen)
chkpt_nano <- rbind(chkpt_nano_ffpe, chkpt_nano_frozen)
nanostring_annotations <- rbind(nanostring_annotations[sample_preservation == "FFPE",],nanostring_annotations[sample_preservation == "Frozen",])
imm_clus_nano = c(imm_clus_nano_ffpe, imm_clus_nano_frozen)
sample_preservation <- c(sample_preservation[sample_preservation == "FFPE"],sample_preservation[sample_preservation == "Frozen"])
#Primary vs metastatic tumor sample status
sstatus = nanostring_annotations$`Sample Type`
ha = rowAnnotation(
sample = sstatus,
col = list("sample" = c("Primary Tumor" = "yellow", "Met" = "green")
)
)
#metastatic site information
ss <- toupper(nanostring_annotations$Site)
ss_split <- rep("OTHER",length(ss))
ss_split[ss == "LUNG"] <- "LUNG"
ss_split[ss == "KIDNEY"] <- "KIDNEY"
ss_split[ss == "LIVER"] <- "LIVER"
ss_split[ss %in% c("BONE","RIB","L4","T5","T6","T11/T13","SPINE")] <- "SKELETAL"
ss_split[ss == "PRIMARY"] <- "PRIMARY"
n <- 70
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
cols <- sample(col_vector, n, replace = F)
cols <- sample(cols, length(unique(ss)), replace = F)
names(cols) <- unique(ss)
#plot results
ha = rowAnnotation(
sample = sstatus,
preservation = sample_preservation,
col = list("sample" = c("Primary Tumor" = "yellow", "Met" = "darkgreen"),
"preservation" = c("Frozen" = "#db6d00", "FFPE" = "#924900")
)
)
ha2 = rowAnnotation(
subtype = imm_clus_nano,
col = list("subtype" = c("IE" = "#882255", "IE-ECM" = "#CC6677", "ID" = "#88CCEE"))
)
names(imm_clus_nano) <- as.character(as.numeric(nanostring_annotations$`Subject Code`))
ii <- imm_clus_nano
imm_clus_nano <- factor(imm_clus_nano, levels = c("IE","IE-ECM", "ID"))
rownames(cellprop_nano) <- names(ii)
Heatmap(cellprop_nano, row_split = ss_split, cluster_columns = F, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), left_annotation = ha, right_annotation = ha2)
#pdf("~/Dog/metTME_landscape.pdf", height = 14, width = 8)
#Heatmap(cellprop_nano, row_split = ss_split, cluster_columns = F, cluster_row_slices = FALSE, row_gap = unit(3, #"mm"), left_annotation = ha, right_annotation = ha2)
#dev.off()
#rownames(cellprop_nano) <- samp_old
#Plotting distribution of immune checkpoint gene expression across TME clusters (Nanostring)
Heatmap(chkpt_nano, row_split = ss_split, cluster_columns = T, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), left_annotation = ha, right_annotation = ha2)
out <- data.frame(chkpt_nano, subtype = imm_clus_nano)
dd_out <- melt(out, id.vars = "subtype")
colnames(dd_out) <- c("subtype","ICI_target","scaled_expression")
dd_out$ICI_target <- as.character(dd_out$ICI_target)
dd_out$subtype <- factor(dd_out$subtype, levels = c("IE","IE-ECM","ID"))
dd_out$ICI_target[dd_out$ICI_target == "PDCD1"] = "PD1"
dd_out$ICI_target[dd_out$ICI_target == "CD274"] = "PDL1"
p1 <- ggboxplot(dd_out, x = "subtype", y="scaled_expression", facet.by = "ICI_target") + theme_bw() + theme(text = element_text(size = 15)) + stat_compare_means(method = "anova") + xlab("") + ylab("expression (normalized)")
#stat_compare_means(comparisons = list(c("IE","IE-ECM"),c("IE-ECM","ID"),c("IE","ID")), method = "wilcox", label = "p.signif")
print(p1)
#ggsave(plot = p1, filename = "~/Dog/canine_immune_checkpoints_nano.pdf")
library(tibble)
set.seed(317796929)
#All metastatic samples (Nanostring + RNASeq)
a1 <- rbind(cellprop_nano[sstatus == "Met",], cellprop_rnaseq[sample_type == "Met",])
source_a1 <- c(rep("Nanostring", nrow(cellprop_nano[sstatus == "Met",])), rep("RNASeq",nrow(cellprop_rnaseq[sample_type == "Met",])))
prep_a1 <- c(sample_preservation[sstatus == "Met"], rep("Frozen",nrow(cellprop_rnaseq[sample_type == "Met",])))
imm_clus_a1 <- c(ii[sstatus == "Met"], ii1[sample_type == "Met"])
sites_a1 <- c(ss[sstatus == "Met"], sites_RNASeq[sample_type == "Met"])
#All primary samples (Nanostring + RNASeq)
a2 <- rbind(cellprop_rnaseq[sample_type == "Primary",], cellprop_nano[sstatus == "Primary Tumor",])
source_a2 <- c(rep("RNASeq", nrow(cellprop_rnaseq[sample_type == "Primary",])), rep("Nanostring",nrow(cellprop_nano[sstatus == "Primary Tumor",])))
prep_a2 <- c(rep("Frozen",nrow(cellprop_rnaseq[sample_type == "Primary",])), sample_preservation[sstatus == "Primary Tumor"])
imm_clus_a2 <- c(ii1[sample_type == "Primary"], ii[sstatus == "Primary Tumor"])
sites_a2 <- rep("Primary", length(imm_clus_a2))
#match tumor samples from same patient
common <- intersect(rownames(a1), rownames(a2))
case_list <- list()
for(pt in common){
xx <- a1[rownames(a1) == pt,]
if(is.null(dim(xx))){
xx <- t(xx)
}
d1 <- data.frame(xx)
d1$sample_type = "Metastatic"
d1$source = source_a1[rownames(a1) == pt]
d1$prep = prep_a1[rownames(a1) == pt]
d1$site = sites_a1[rownames(a1) == pt]
d1$case = pt
d1$immune_subtype = imm_clus_a1[rownames(a1) == pt]
xx <- a2[rownames(a2) == pt,]
if(is.null(dim(xx))){
xx <- t(xx)
}
d2 <- data.frame(xx)
d2$sample_type = "Primary"
d2$source = source_a2[rownames(a2) == pt]
d2$prep = prep_a2[rownames(a2) == pt]
d2$site = sites_a2[rownames(a2) == pt]
d2$case = pt
d2$immune_subtype = imm_clus_a2[rownames(a2) == pt]
case_list[[pt]] <- rbind(d2,d1)
}
matched_cases <- Reduce(rbind, case_list)
matched_cases$sample_type <- factor(matched_cases$sample_type, levels = c("Primary", "Metastatic"))
ss1_split <- rep("OTHER",nrow(matched_cases))
ss1_split[matched_cases$site == "LUNG"] <- "LUNG"
ss1_split[matched_cases$site == "KIDNEY"] <- "KIDNEY"
ss1_split[matched_cases$site == "LIVER"] <- "LIVER"
ss1_split[matched_cases$site %in% c("BONE","RIB","L4","T5","T6","T11/T13","SPINE")] <- "SKELETAL"
ss1_split[matched_cases$site == "Primary"] <- "PRIMARY"
#plot results
ha = columnAnnotation(
site = ss1_split,
preservation = matched_cases$prep,
platform = matched_cases$source,
subtype = matched_cases$immune_subtype,
col = list("site" = c("OTHER" = "#009E73",
"PRIMARY" = "#F0E442",
"LUNG"="#DF65B0",
"KIDNEY" = "#984EA3",
"SKELETAL" = "#FEE5D9",
"LIVER" = "#FB6A4A"),
"preservation" = c("Frozen" = "#db6d00",
"FFPE" = "#924900"),
"platform" = c("Nanostring" = "#525252",
"RNASeq" = "#F2F0F7"),
"subtype" = c("ID" = "#88CCEE",
"IE" = "#882255",
"IE-ECM" = "#CC6677")
)
)
matched_cases$site2 <- ss1_split
met_mat <- as.matrix(matched_cases[,1:10])
rownames(met_mat) <- matched_cases$case
Heatmap(t(met_mat), column_split = matched_cases$sample_type, cluster_column_slices = F, column_gap = unit(5, "mm"), cluster_rows = F, cluster_columns = T, top_annotation = ha, column_names_gp = grid::gpar(fontsize = 8))
sampIds1 <- rownames(a1)
idx <- sapply(sampIds1, function(x) x %in% rownames(a2))
a1 = a1[idx,]
imm_clus_a1 = imm_clus_a1[idx]
ss1 <- sites_a1[idx]
ha = rowAnnotation(
sample = rep("Primary Tumor", nrow(a2[sapply(rownames(a2), function(x) x %in% sampIds1[idx]),])),
col = list("Primary Tumor" = "yellow", "Met "= "darkgreen")
)
#pdf("~/Dog/Met_evolve_primary.pdf", width = 5, height = 10)
#Heatmap(a2[sapply(rownames(a2), function(x) x %in% rownames(a1)),], row_split = imm_clus_a2[sapply(rownames(a2), function(x) x %in% rownames(a1))], cluster_columns = F, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), cluster_rows = F, left_annotation = ha)
#dev.off()
n <- length(unique(ss1))
#qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
#col_vector = grDevices::colors()[grep('gr(a|e)y', grDevices::colors(), invert = T)]
cols <- sample(col_vector, n, replace = F)
ss1_split <- rep("OTHER",length(ss1))
ss1_split[ss1 == "LUNG"] <- "LUNG"
ss1_split[ss1 == "KIDNEY"] <- "KIDNEY"
ss1_split[ss1 == "LIVER"] <- "LIVER"
ss1_split[ss1 %in% c("BONE","RIB","L4","T5","T6","T11/T13","SPINE")] <- "SKELETAL"
ss1_split[ss1 == "PRIMARY"] <- "PRIMARY"
names(cols) <- unique(ss1)
#samp <- sapply(rownames(a1), function(x) as.character(as.numeric(strsplit(x,"_")[[1]][2])))
#rownames(a1) <- samp
#names(imm_clus_a1) <- rownames(a1)
ha = rowAnnotation(
met_subtype = imm_clus_a1[imm_clus_a2[names(imm_clus_a1)] == "IE-ECM"],
col = list("met_subtype" = c("IE" = "#882255", "IE-ECM" = "#CC6677", "ID" = "#88CCEE")
)
)
ha2 = rowAnnotation(
sample = rep("Met", length(imm_clus_a1[imm_clus_a2[names(imm_clus_a1)] == "IE-ECM"])),
col = list("Primary Tumor"="yellow","Met"="darkgreen")
)
#pdf("~/Dog/Met_evolve_IE-ECM.pdf", width = 5, height = 10)
#Heatmap(a1[imm_clus_a2[names(imm_clus_a1)] == "IE-ECM",], row_split = ss1_split[imm_clus_a2[names(imm_clus_a1)] == "IE-ECM"], cluster_columns = T, cluster_row_slices = FALSE, row_gap = unit(3, "mm"), right_annotation = ha,left_annotation = ha2)
#dev.off()
ha = rowAnnotation(
met_subtype = imm_clus_a1[imm_clus_a2[names(imm_clus_a1)] == "ID"],
col = list("met_subtype" = c("IE" = "#882255", "IE-ECM" = "#CC6677", "ID" = "#88CCEE")
)
)
ha2 = rowAnnotation(
sample = rep("Met", length(imm_clus_a1[imm_clus_a2[names(imm_clus_a1)] == "ID"])),
col = list("Primary Tumor"="yellow","Met"="darkgreen")
)
#pdf("~/Dog/Met_evolve_ID.pdf", width = 5, height = 10)
#Heatmap(a1[imm_clus_a2[names(imm_clus_a1)] == "ID",], row_split = ss1_split[imm_clus_a2[names(imm_clus_a1)] == "ID"], cluster_columns =T, cluster_row_slices = FALSE, row_gap = unit(3, "mm"), right_annotation = ha, left_annotation = ha2)
#dev.off()
#contingency table of matched tumor samples
mat_subtype <- matrix(0,nrow = 3, ncol = 3)
rownames(mat_subtype) <- c("IE","IE-ECM","ID")
colnames(mat_subtype) <- c("IE","IE-ECM","ID")
edge_list <- list()
idx <- 1
for(id in unique(matched_cases$case)){
case_id <- which(matched_cases$case == id)
is_prim <- matched_cases$sample_type[case_id] == "Primary"
is_met <- matched_cases$sample_type[case_id] == "Metastatic"
for(i in case_id[is_prim]){
for(j in case_id[is_met]){
for(subtype_i in c("IE","IE-ECM","ID")){
for(subtype_j in c("IE","IE-ECM","ID")){
if(matched_cases$immune_subtype[i] == subtype_i & matched_cases$immune_subtype[j] == subtype_j){
mat_subtype[subtype_i,subtype_j] = mat_subtype[subtype_i, subtype_j] + 1
edge_list[[idx]] <- c(i,j)
idx <- idx + 1
}
}
}
}
}
}
print(mat_subtype)
## IE IE-ECM ID
## IE 1 0 0
## IE-ECM 7 37 18
## ID 12 29 17
chisq.test(mat_subtype, simulate.p.value = TRUE)
##
## Pearson's Chi-squared test with simulated p-value (based on 2000
## replicates)
##
## data: mat_subtype
## X-squared = 7.2091, df = NA, p-value = 0.1059
#generate sankey diagram
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:reshape':
##
## rename
## The following object is masked from 'package:ComplexHeatmap':
##
## add_heatmap
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
#reticulate::import("sys")
matched_cases$site[matched_cases$site == "SPINAL CORD"] = "EXTRADURAL"
matched_cases$site[matched_cases$site == "LIP"] = "ORAL"
prim_sample <- sapply(edge_list, function(x) paste("primary:",x[1], sep = ""))
met_sample <- sapply(edge_list, function(x) paste(matched_cases$site[x[2]],x[2],sep = ":"))
node_lab <- c(unique(sapply(edge_list, function(x) matched_cases$case[x[1]])), unique(prim_sample), unique(met_sample))
subtype_col <- c("IE" = "#882255", "IE-ECM" = "#CC6677", "ID" = "#88CCEE")
node_col <- c(rep("grey", length(unique(sapply(edge_list, function(x) matched_cases$case[x[1]])))),
subtype_col[sapply(unique(prim_sample), function(x) matched_cases$immune_subtype[as.numeric(strsplit(x,split = ":")[[1]][2])])],
subtype_col[sapply(unique(met_sample), function(x) matched_cases$immune_subtype[as.numeric(strsplit(x,split = ":")[[1]][2])])])
names(node_col) <- NULL
elist1 <- t(sapply(edge_list, function(x) {
a <- matched_cases$case[x[1]]
b <- paste("primary:",x[1], sep = "")
return(c(which(node_lab == a), which(node_lab == b)))
}))
elist2 <- t(sapply(edge_list, function(x) {
a <- paste("primary:",x[1], sep = "")
b <- paste(matched_cases$site[x[2]],x[2],sep = ":")
return(c(which(node_lab == a), which(node_lab == b)))
}))
elist <- rbind(elist1,elist2)
fig <- plot_ly(
type = "sankey",
domain = list(
x = c(0,1),
y = c(0,1)
),
orientation = "h",
valueformat = ".0f",
valuesuffix = "TWh",
node = list(
label = sapply(node_lab, function(x) strsplit(x,split = ":")[[1]][1]),
color = node_col,
pad = 15,
thickness = 15,
line = list(
color = "black",
width = 0.5
)
),
link = list(
source = elist[,1]-1,
target = elist[,2]-1,
value = rep(1,nrow(elist))
)
)
fig <- fig %>% layout(
title = " ",
font = list(
size = 9
),
xaxis = list(showgrid = F, zeroline = F),
yaxis = list(showgrid = F, zeroline = F)
)
print(fig)
#save_image(fig, file = "/Users/sushantpa/Dog/met_evolve_kaleido.pdf",width = 410, height = 720)
library(survival)
library(survminer)
#Scripts to read in the canine OS clinical metadata
#clinical metadata for all cases that received Rapamycin (mTOR inhibitor) in addition to standard of care therapy
Slides2Outcomes_Rapa_all <- read_csv("~/Dog/Slides2Outcomes_Rapa_all_new.csv")
## New names:
## Rows: 295 Columns: 24
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (15): Study, Site, Name, breed, gender, Tumor Location, PH vs NPH, ALP, ... dbl
## (9): ...1, Patient ID, age, weight, PH, DFI, DFI_censor, Survival (days...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
#clinical metadata for all cases that received standard of care therapy
Slides2Outcomes_SOC_all <- read_csv("~/Dog/Slides2Outcomes_SOC_all_new.csv")
## New names:
## Rows: 305 Columns: 24
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (15): Study, Site, Name, breed, gender, Tumor Location, PH vs NPH, ALP, ... dbl
## (9): ...1, Patient ID, age, weight, PH, DFI, DFI_censor, Survival (days...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
dat1 <- subset(Slides2Outcomes_Rapa_all,select = c("slide","Patient ID","Tumor Location","Site","age","weight","breed","gender","PH","ALP","Group","DFI","DFI_censor","Survival (days from sx)","Surv_censor"))
dat1$treatment = rep("Rapamycin", nrow(dat1))
dat2 <- subset(Slides2Outcomes_SOC_all,select = c("slide","Patient ID","Tumor Location","Site","age","weight","breed","gender","PH","ALP","Group","DFI","DFI_censor","Survival (days from sx)","Surv_censor"))
dat2$treatment = rep("SOC", nrow(dat2))
clindat_all <- rbind(dat1,dat2)
clindat_all <- as.data.frame(clindat_all)
clindat_all <- clindat_all[!duplicated(clindat_all$slide),]
rownames(clindat_all) <- substr(clindat_all$slide,1,19)
clindat_all <- clindat_all[,-1]
colnames(clindat_all)[c(11,12,13,14)] <- c("DFS_time","DFS_status","OS_time","OS_status")
clindat_all$ALP[clindat_all$ALP == "elevated"] <- "Elevated"
clindat_all$gender[clindat_all$`Patient ID` == "307"] <- "Spayed Female"
clindat_all$gender[grepl("Castrated",clindat_all$gender)] <- "Castrated Male"
clindat_all$gender[grepl("Spayed",clindat_all$gender)] <- "Spayed Female"
clindat_all$gender[grepl("Male Phenotype",clindat_all$gender)] <- "Intact Male"
clindat_all$gender[grepl("Female Phenotype",clindat_all$gender)] <- "Intact Female"
clindat_all <- clindat_all[!duplicated(clindat_all$`Patient ID`),]
cc1 <- clindat_all
rownames(cc1) <- as.character(clindat_all$`Patient ID`)
common <- intersect(names(imm_clus)[sample_type == "Primary"], rownames(cc1))
cc1 <- cc1[common,]
cc1$primary_immune_subtype <- as.character(imm_clus[sample_type == "Primary"][common])
#cc1$integrated = cc1$group
#cc1$integrated[cc1$group == "{cluster 1, 2}"] = cc1$immune_subtype[cc1$group == "{cluster 1, 2}"]
ccc1 <-cc1
#ccc1 <- ccc1[names(met_subtype_lung),]
#ccc1$met_subtype_lung <- met_subtype_lung
surv <- survfit(Surv(OS_time, OS_status) ~ primary_immune_subtype, data = ccc1)
diff <- survdiff(Surv(OS_time, OS_status) ~ primary_immune_subtype, data = ccc1)
p1 <- ggsurvplot(surv, data = ccc1,
legend.title = "",
legend = "right",
conf.int = F,
pval = TRUE,
risk.table = TRUE,
tables.height = 0.2,
tables.theme = theme_cleantable(),
risk.table.y.text = FALSE,
pval.coord = c(0, 0.03),
# Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
# or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
palette = c("#88CCEE","#882255","#CC6677"),
ggtheme = theme(text = element_text(size = 15)) +theme_bw() # Change ggplot2 theme
) + xlab("Time (days from sx)")
print(p1)
#ggsave(filename = "~/Dog/canine_OS_survfigure.pdf", plot=p1$plot, width = 9, height = 6)
surv <- survfit(Surv(DFS_time, DFS_status) ~ primary_immune_subtype, data = ccc1)
diff <- survdiff(Surv(DFS_time, DFS_status) ~ primary_immune_subtype, data = ccc1)
p1 <- ggsurvplot(surv, data = ccc1,
legend.title = "",
legend = "right",
conf.int = F,
pval = TRUE,
risk.table = TRUE,
tables.height = 0.2,
tables.theme = theme_cleantable(),
risk.table.y.text = FALSE,
pval.coord = c(0, 0.03),
# Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
# or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
palette = c("#88CCEE","#882255","#CC6677"),
ggtheme = theme(text = element_text(size = 15)) + theme_bw() # Change ggplot2 theme
) + xlab("Time (days from sx)")
print(p1)
#ggsave(filename = "~/Dog/canine_DFS_survfigure.pdf", plot=p1$plot, width = 9, height = 6)
cph_dat <- data.frame(time = cc1$DFS_time,
subtype = cc1$primary_immune_subtype,
cytotoxicity_score = as.numeric(cyt_scores[common]),
status = cc1$DFS_status,
location = c("NPH","PH")[as.numeric(ccc1$PH)+1],
ALP = cc1$ALP,
age = scale(as.numeric(cc1$age)),
gender = cc1$gender,
weight = scale(cc1$weight),
treatment = c("SOC","Sirolimus+SOC")[as.numeric(cc1$treatment == "Rapamycin")+1])
cph_dat$gender[grepl("female", ignore.case = T, cph_dat$gender)] = "F"
cph_dat$gender[grepl("male", ignore.case = T, cph_dat$gender)] = "M"
cph_dat$ALP[cph_dat$ALP != "Elevated"] = "Normal"
#cph_dat$isFemale = as.numeric(cph_dat$isFemale == "F")
cph_dat$ALP <- relevel(as.factor(cph_dat$ALP), ref = "Normal")
cph_dat$treatment <- relevel(as.factor(cph_dat$treatment), ref = "SOC")
cph_dat$location <- relevel(as.factor(cph_dat$location), ref = "NPH")
cph_dat$gender <- relevel(as.factor(cph_dat$gender), ref = "M")
fit <- coxph(Surv(time, status) ~ location + ALP + age + gender + weight + treatment + subtype, data = cph_dat)
p1 <- ggforest(fit, data = cph_dat,fontsize = 0.7, noDigits = 2) + theme(text = element_text(size = 15))
#ggsave(plot = p1, filename = "~/Dog/ggforest_DFS.pdf", width = 6, height = 8)
cph_dat <- data.frame(time = cc1$OS_time,
subtype = cc1$primary_immune_subtype,
cytotoxicity_score = as.numeric(cyt_scores[common]),
status = cc1$OS_status,
location = c("NPH","PH")[as.numeric(ccc1$PH)+1],
ALP = cc1$ALP,
age = scale(as.numeric(cc1$age)),
gender = cc1$gender,
weight = scale(cc1$weight),
treatment = c("SOC","Sirolimus+SOC")[as.numeric(cc1$treatment == "Rapamycin")+1])
cph_dat$gender[grepl("female", ignore.case = T, cph_dat$gender)] = "F"
cph_dat$gender[grepl("male", ignore.case = T, cph_dat$gender)] = "M"
cph_dat$ALP[cph_dat$ALP != "Elevated"] = "Normal"
#cph_dat$isFemale = as.numeric(cph_dat$isFemale == "F")
cph_dat$ALP <- relevel(as.factor(cph_dat$ALP), ref = "Normal")
cph_dat$treatment <- relevel(as.factor(cph_dat$treatment), ref = "SOC")
cph_dat$location <- relevel(as.factor(cph_dat$location), ref = "NPH")
cph_dat$gender <- relevel(as.factor(cph_dat$gender), ref = "M")
fit <- coxph(Surv(time, status) ~ location + ALP + age + gender + weight + treatment + subtype, data = cph_dat)
p2 <-ggforest(fit, data = cph_dat,fontsize = 0.7, noDigits = 2) + theme(text = element_text(size = 15))
#ggsave(plot = p2, filename = "~/Dog/ggforest_OS.pdf", width = 6, height = 8)
fig <- ggarrange(p2,p1, ncol = 2, nrow = 1, labels = c("",""))
print(fig)
#ggsave(plot = fig, filename = "~/Dog/ggforest_cyt_figures.pdf",width = 12, height = 8)
#Plot TME characteristics of metastatic samples in long term vs short term survivors
library(reshape)
cc1 <- clindat_all
rownames(cc1) <- as.character(clindat_all$`Patient ID`)
met_dd1 <- data.frame(surv_group = c("shorter survival","longer survival")[as.numeric(cc1[matched_cases$case[matched_cases$sample_type == "Metastatic"], "OS_time"] > median(cc1$OS_time[cc1$OS_status==1]))+1], os_status = as.numeric(cc1[matched_cases$case[matched_cases$sample_type == "Metastatic"], "OS_status"]), os_time = as.numeric(cc1[matched_cases$case[matched_cases$sample_type == "Metastatic"], "OS_time"]), met_subtype = matched_cases$immune_subtype[matched_cases$sample_type == "Metastatic"], sites = matched_cases$site2[matched_cases$sample_type == "Metastatic"], matched_cases[matched_cases$sample_type == "Metastatic",1:10])
met_dd1 <- met_dd1[!is.na(met_dd1$surv_group),]
ha <- columnAnnotation(
group = met_dd1$surv_group,
preservation = matched_cases$prep[matched_cases$sample_type == "Metastatic"],
platform = matched_cases$source[matched_cases$sample_type == "Metastatic"],
subtype = matched_cases$immune_subtype[matched_cases$sample_type == "Metastatic"],
col = list(
"preservation" = c("Frozen" = "#db6d00",
"FFPE" = "#924900"),
"platform" = c("Nanostring" = "#525252",
"RNASeq" = "#F2F0F7"),
"subtype" = c("ID" = "#88CCEE",
"IE" = "#882255",
"IE-ECM" = "#CC6677"),
group = c("shorter survival"="#AE017E",
"longer survival"="#FDAE61")
)
)
mat <- as.matrix(t(met_dd1[,6:ncol(met_dd1)]))
colnames(mat) <- matched_cases$case[matched_cases$sample_type == "Metastatic"]
colnames(mat) <- gsub("X","",colnames(mat))
colnames(mat) <- gsub("(\\.1)|(\\.2)|(\\.3)|(\\.4)|(\\.5)|(\\.6)","",colnames(mat))
Heatmap(mat, column_split = met_dd1$sites, bottom_annotation = ha, cluster_rows = F, cluster_column_slices = F,column_gap = unit(1, "mm"))
#pdf("~/Dog/met_sites_survival_association.pdf", width = 16, height = 4)
#Heatmap(mat, column_split = met_dd1$sites, bottom_annotation = ha, cluster_rows = F, cluster_column_slices = F,column_gap = unit(1, "mm"))
#dev.off()
#Heatmap(as.matrix(met_dd1[,6:ncol(met_dd1)]), row_split = met_dd1$surv_group, cluster_columns = F, cluster_row_slices = F, row_gap = unit(5, "mm"), right_annotation = ha2)
met_dd2 <- melt(met_dd1[,c(1,6:ncol(met_dd1))])
## Using surv_group as id variables
p1 <- ggplot(data = met_dd2, aes(x = variable, y = value, fill = surv_group)) + geom_boxplot() + theme_bw() + theme(text = element_text(size = 15), axis.text.x = element_text(angle = 90)) + stat_compare_means(aes(group = surv_group), label = "p.signif", method = "t.test") + ylim(c(-4,4))
print(p1)
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
#ggsave(filename = "~/Dog/met_survival_association.pdf",plot = p1, width = 12, height = 7)
Perform Differential expression + gene set enrichment analysis to characterize enriched hallmark pathways in each TME cluster
library(edgeR)
## Loading required package: limma
library(matrixStats)
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
##
## count
library(fgsea)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ matrixStats::count() masks dplyr::count()
## ✖ tidyr::expand() masks reshape::expand()
## ✖ plotly::filter() masks dplyr::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ readr::parse_date() masks curl::parse_date()
## ✖ plotly::rename() masks reshape::rename(), dplyr::rename()
## ✖ lubridate::stamp() masks reshape::stamp()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(msigdbr)
library(clusterProfiler)
##
## Registered S3 methods overwritten by 'treeio':
## method from
## MRCA.phylo tidytree
## MRCA.treedata tidytree
## Nnode.treedata tidytree
## Ntip.treedata tidytree
## ancestor.phylo tidytree
## ancestor.treedata tidytree
## child.phylo tidytree
## child.treedata tidytree
## full_join.phylo tidytree
## full_join.treedata tidytree
## groupClade.phylo tidytree
## groupClade.treedata tidytree
## groupOTU.phylo tidytree
## groupOTU.treedata tidytree
## is.rooted.treedata tidytree
## nodeid.phylo tidytree
## nodeid.treedata tidytree
## nodelab.phylo tidytree
## nodelab.treedata tidytree
## offspring.phylo tidytree
## offspring.treedata tidytree
## parent.phylo tidytree
## parent.treedata tidytree
## root.treedata tidytree
## rootnode.phylo tidytree
## sibling.phylo tidytree
## clusterProfiler v4.6.2 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
##
## Attaching package: 'clusterProfiler'
##
## The following object is masked from 'package:purrr':
##
## simplify
##
## The following object is masked from 'package:reshape':
##
## rename
##
## The following object is masked from 'package:stats':
##
## filter
# Read raw counts
DOG2_Raw_RNA_SEQ_DATA <- read_csv("~/Dog/DOG2_Raw_RNA_SEQ_DATA.csv")
## Rows: 37952 Columns: 187
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): gene_id
## dbl (186): 102, 105, 106, 201, 202, 203, 204, 206, 207, 209, 210, 211, 212, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
genes <- DOG2_Raw_RNA_SEQ_DATA$gene_id
genes <- sapply(genes, function(x) strsplit(x, split = "_")[[1]][1])
DOG2_Raw_RNA_SEQ_DATA$gene_id <- NULL
groups = ii1[colnames(DOG2_Raw_RNA_SEQ_DATA)]
groups = c("rest","IE")[as.numeric(groups == "IE")+1]
counts.DGEList <- DGEList(counts = DOG2_Raw_RNA_SEQ_DATA, genes = genes, group = as.factor(groups))
counts.keep <- filterByExpr(y = counts.DGEList)
counts.DGEList <- counts.DGEList[counts.keep, , keep.lib.sizes = FALSE]
counts.DGEList <- calcNormFactors(counts.DGEList)
counts.DGEList <- estimateDisp(counts.DGEList,design = model.matrix(~groups))
rest_IE.DGEExact <- exactTest(counts.DGEList, pair = c("rest","IE"))
msigdbr_df <- msigdbr(species = "Homo sapiens", category = "H")
msigdbr_list = split(x = msigdbr_df$gene_symbol, f = msigdbr_df$gs_name)
ranks <- rest_IE.DGEExact$table$logFC
names(ranks) <- counts.DGEList$genes$genes
ranks <- sort(ranks, decreasing = T)
gsea_res <- GSEA(geneList = ranks, TERM2GENE = subset(msigdbr_df, select = c("gs_name","gene_symbol")),pvalueCutoff = 1)
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.12% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
fgseaResTidy_IE <- gsea_res %>%
as_tibble() %>%
arrange(desc(NES))
p1 <- ggplot(fgseaResTidy_IE, aes(reorder(ID, NES), NES)) +
geom_col(aes(fill=p.adjust < 0.05)) +
coord_flip() +
labs(x="Pathway", y="Normalized Enrichment Score",title="Hallmark pathways NES from GSEA") +
theme_minimal()
print(p1)
groups = ii1[colnames(DOG2_Raw_RNA_SEQ_DATA)]
groups = c("rest","IE-ECM")[as.numeric(groups == "IE-ECM")+1]
counts.DGEList <- DGEList(counts = DOG2_Raw_RNA_SEQ_DATA, genes = genes, group = as.factor(groups))
counts.keep <- filterByExpr(y = counts.DGEList)
counts.DGEList <- counts.DGEList[counts.keep, , keep.lib.sizes = FALSE]
counts.DGEList <- calcNormFactors(counts.DGEList)
counts.DGEList <- estimateDisp(counts.DGEList,design = model.matrix(~groups))
rest_IEECM.DGEExact <- exactTest(counts.DGEList, pair = c("rest","IE-ECM"))
msigdbr_df <- msigdbr(species = "Homo sapiens", category = "H")
msigdbr_list = split(x = msigdbr_df$gene_symbol, f = msigdbr_df$gs_name)
ranks <- rest_IEECM.DGEExact$table$logFC
names(ranks) <- counts.DGEList$genes$genes
ranks <- sort(ranks, decreasing = T)
gsea_res <- GSEA(geneList = ranks, TERM2GENE = subset(msigdbr_df, select = c("gs_name","gene_symbol")), pvalueCutoff = 1, nPermSimple=100000)
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.11% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
fgseaResTidy_IEECM <- gsea_res %>%
as_tibble() %>%
arrange(desc(NES))
p1 <- ggplot(fgseaResTidy_IEECM, aes(reorder(ID, NES), NES)) +
geom_col(aes(fill=p.adjust < 0.05)) +
coord_flip() +
labs(x="Pathway", y="Normalized Enrichment Score",
title="Hallmark pathways NES from GSEA") +
theme_minimal()
print(p1)
#ggsave(plot = p1, filename = "~/Dog/IEECM_enrichmentplot.pdf")
groups = ii1[colnames(DOG2_Raw_RNA_SEQ_DATA)]
groups = c("rest","ID")[as.numeric(groups == "ID")+1]
counts.DGEList <- DGEList(counts = DOG2_Raw_RNA_SEQ_DATA, genes = genes, group = as.factor(groups))
counts.keep <- filterByExpr(y = counts.DGEList)
counts.DGEList <- counts.DGEList[counts.keep, , keep.lib.sizes = FALSE]
counts.DGEList <- calcNormFactors(counts.DGEList)
counts.DGEList <- estimateDisp(counts.DGEList,design = model.matrix(~groups))
rest_ID.DGEExact <- exactTest(counts.DGEList, pair = c("rest","ID"))
msigdbr_df <- msigdbr(species = "Homo sapiens", category = "H")
msigdbr_list = split(x = msigdbr_df$gene_symbol, f = msigdbr_df$gs_name)
ranks <- rest_ID.DGEExact$table$logFC
names(ranks) <- counts.DGEList$genes$genes
ranks <- sort(ranks, decreasing = T)
gsea_res <- GSEA(geneList = ranks, TERM2GENE = subset(msigdbr_df, select = c("gs_name","gene_symbol")),pvalueCutoff = 1, nPermSimple = 100000)
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.11% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
fgseaResTidy_ID <- gsea_res %>%
as_tibble() %>%
arrange(desc(NES))
p1 <- ggplot(fgseaResTidy_ID, aes(reorder(ID, NES), NES)) +
geom_col((aes(fill=p.adjust < 0.05))) +
coord_flip() +
labs(x="Pathway", y="Normalized Enrichment Score",
title="Hallmark pathways NES from GSEA") +
theme_minimal()
print(p1)
#ggsave(plot = p1, filename = "~/Dog/ID_enrichmentplot.pdf")
results
NES_IE <- fgseaResTidy_IE$NES
names(NES_IE) <- fgseaResTidy_IE$ID
NES_IEECM <- fgseaResTidy_IEECM$NES
names(NES_IEECM) <- fgseaResTidy_IEECM$ID
NES_ID <- fgseaResTidy_ID$NES
names(NES_ID) <- fgseaResTidy_ID$NES
NES_ID <- fgseaResTidy_ID$NES
names(NES_ID) <- fgseaResTidy_ID$ID
NES_mat <- cbind(NES_IE, cbind(NES_IEECM[names(NES_IE)], NES_ID[names(NES_IE)]))
rownames(NES_mat) <- names(NES_IE)
lpval_IE <- -log10(fgseaResTidy_IE$p.adjust)
names(lpval_IE) <- fgseaResTidy_IE$ID
lpval_IEECM <- -log10(fgseaResTidy_IEECM$p.adjust)
names(lpval_IEECM) <- fgseaResTidy_IEECM$ID
lpval_ID <- -log10(fgseaResTidy_ID$p.adjust)
names(lpval_ID) <- fgseaResTidy_ID$ID
pval_mat <- cbind(lpval_IE, cbind(lpval_IEECM[names(lpval_IE)],lpval_ID[names(lpval_IE)]))
rownames(pval_mat) <- names(lpval_IE)
colnames(NES_mat) <- c("IE","IE-ECM","ID")
NES_mat <- NES_mat[names(NES_IEECM),]
pval_mat <- pval_mat[names(NES_IEECM),]
col_fun = circlize::colorRamp2(c(-1, 0, 1), c("navyblue", "darkmagenta", "yellow"))
layer_fun = function(j, i, x, y, w, h, fill){
grid.rect(x = x, y = y, width = w, height = h,
gp = gpar(col = NA, fill = NA))
grid.circle(x=x,y=y,r= pindex(pval_mat, i, j)/10 * unit(2, "mm"),
gp = gpar(fill = col_fun(pindex(NES_mat, i, j)), col = NA))}
lgd_list = list(
Legend( labels = c(0,2.5,5,7.5,10), title = "-log10(adjusted P-value)",
graphics = list(
function(x, y, w, h) grid.circle(x = x, y = y, r = 0 * unit(2, "mm"),
gp = gpar(fill = "black")),
function(x, y, w, h) grid.circle(x = x, y = y, r = 0.25 * unit(2, "mm"),
gp = gpar(fill = "black")),
function(x, y, w, h) grid.circle(x = x, y = y, r = 0.5 * unit(2, "mm"),
gp = gpar(fill = "black")),
function(x, y, w, h) grid.circle(x = x, y = y, r = 0.75 * unit(2, "mm"),
gp = gpar(fill = "black")),
function(x, y, w, h) grid.circle(x = x, y = y, r = 1 * unit(2, "mm"),
gp = gpar(fill = "black")))
))
set.seed(123)
hp<- Heatmap(NES_mat,
heatmap_legend_param=list(title="NES"),
col=col_fun,
rect_gp = gpar(type = "none"),
layer_fun = layer_fun,
row_names_gp = gpar(fontsize = 7.5),
cluster_rows = F,
cluster_columns = F)
draw( hp, annotation_legend_list = lgd_list)
library("org.Hs.eg.db")
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:lubridate':
##
## intersect, setdiff, union
## The following object is masked from 'package:limma':
##
## plotMA
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:clusterProfiler':
##
## rename
## The following objects are masked from 'package:lubridate':
##
## second, second<-
## The following object is masked from 'package:tidyr':
##
## expand
## The following object is masked from 'package:plotly':
##
## rename
## The following objects are masked from 'package:reshape':
##
## expand, rename
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:clusterProfiler':
##
## slice
## The following object is masked from 'package:lubridate':
##
## %within%
## The following object is masked from 'package:purrr':
##
## reduce
## The following object is masked from 'package:plotly':
##
## slice
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:clusterProfiler':
##
## select
## The following object is masked from 'package:plotly':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
##
TARGET_OS_star_counts <- read_delim("~/Downloads/TARGET-OS.star_counts.tsv",
delim = "\t", escape_double = FALSE,
trim_ws = TRUE)
## Rows: 60487 Columns: 89
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): Ensembl_ID
## dbl (88): TARGET-40-PASUUH-01A, TARGET-40-PAUTWB-01A, TARGET-40-PAKUZU-01A, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
genes <- TARGET_OS_star_counts$Ensembl_ID
genes <- sapply(genes, function(x) strsplit(x, split = "\\.")[[1]][1])
symbols <- mapIds(org.Hs.eg.db, keys = genes, keytype = "ENSEMBL", column="SYMBOL")
## 'select()' returned 1:many mapping between keys and columns
#mart <- useDataset("hsapiens_gene_ensembl", useMart("ensembl"))
#G_list <- getBM(filters= "ensembl_gene_id", attributes= c("ensembl_gene_id","hgnc_symbol"),values=genes,mart= mart)
TARGET_OS_star_counts <- as.data.frame(TARGET_OS_star_counts)
TARGET_OS_star_counts <- TARGET_OS_star_counts[!is.na(symbols),]
expr_human <- aggregate.data.frame(TARGET_OS_star_counts[,2:ncol(TARGET_OS_star_counts)], by = list(symbols[!is.na(symbols)]), FUN = mean, na.rm=T)
rownames(expr_human) <- expr_human$Group.1
expr_human <- expr_human[,-1]
TARGET_OS_clinical <- read_delim("~/Downloads/TARGET-OS.clinical.tsv", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## Rows: 527 Columns: 31
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (23): sample_id, TARGET USI, Gender, Race, Ethnicity, First Event, Time ...
## dbl (8): Age at Diagnosis in Days, Overall Survival Time in Days, Year of D...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
TARGET_OS_clinical <- as.data.frame(TARGET_OS_clinical)
TARGET_OS_clinical <- TARGET_OS_clinical[!duplicated(TARGET_OS_clinical$sample_id),]
rownames(TARGET_OS_clinical) <- TARGET_OS_clinical$sample_id
common <- intersect(rownames(TARGET_OS_clinical), colnames(expr_human))
ee <- expr_human[,common]
human_clin <- TARGET_OS_clinical[common,]
set.seed(2425)
#ccommon = intersect(rownames(ee), intersect(rownames(expr_human2),rownames(expr_human3)))
cellprop_human <- t(MCPcounter.estimate(expression = ee, featuresType = "HUGO_symbols", probesets = read.table("~/Dog/MCP_counter_markers.csv")))
cellprop_human <- apply(cellprop_human, 2, scale)
#rownames(cellprop_primary) <- colnames(Canine_RNA_SEQ_TPM_All_data)
km.res <- kmeans(cellprop_human, centers = 3, nstart = 100, iter.max = 500)
imm_clus_target <- rep("ID", nrow(cellprop_human))
imm_clus_target[km.res$cluster == 3] <- "IE"
imm_clus_target[km.res$cluster == 1] <- "IE-ECM"
#imm_clus <- factor(imm_clus, levels = c("IE","IE fibrotic", "ID"))
Heatmap(cellprop_human, row_split = imm_clus_target, cluster_columns = F, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")))
names(imm_clus_target) <- colnames(ee)
human_clin <- human_clin[names(imm_clus_target),]
human_clin$immune_subtype <- imm_clus_target
hc <- data.frame(OS_time = as.numeric(human_clin$`Overall Survival Time in Days`), OS_status = as.numeric(human_clin$`Vital Status` == "Dead"),
DFS_time = as.numeric(human_clin$`Time to First Event in Days`), DFS_status = as.numeric(human_clin$`First Event` == "Relapse"),
immune_subtype = human_clin$immune_subtype,
met_at_daig = toupper(human_clin$`Disease at diagnosis`),
age = human_clin$`Age at Diagnosis in Days`/365,
gender = c("M","F")[as.numeric(human_clin$Gender == "Female")+1])
hc$necrosis <- NA
hc$necrosis[!is.na(human_clin$`Percent necrosis`)] <- c("0-90","91-100")[as.numeric(as.numeric(human_clin$`Percent necrosis`[!is.na(human_clin$`Percent necrosis`)]) > 90)+1]
## Warning: NAs introduced by coercion
hc$necrosis[!is.na(human_clin$`Histologic response`)] <-human_clin$`Histologic response`[!is.na(human_clin$`Histologic response`)]
hc$necrosis[hc$necrosis %in% c("91-100","Stage 3/4","98","95") & !is.na(hc$necrosis)] <- ">90%"
hc$necrosis[!(hc$necrosis %in% c(">90%")) & !is.na(hc$necrosis)] <- "<=90%"
hc$met_at_daig[hc$met_at_daig == "METASTATIC (CONFIRMED)"] = "METASTATIC"
hc$met_at_daig[hc$met_at_daig == "NON-METASTATIC (CONFIRMED)"] = "NON-METASTATIC"
surv <- survfit(Surv(OS_time, OS_status) ~ immune_subtype, data = hc)
diff <- survdiff(Surv(OS_time, OS_status) ~ immune_subtype, data = hc)
p1 <- ggsurvplot(surv, data = hc,
legend.title = "",
legend = "right",
#legend.labs = c("ID","IE","IE fibrotic"),
conf.int = F,
pval = TRUE,
risk.table = TRUE,
tables.height = 0.2,
tables.theme = theme_cleantable(),
risk.table.y.text = FALSE,
pval.coord = c(0, 0.03),
# Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
# or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
ggtheme = theme(text = element_text(size = 15)) +theme_bw() # Change ggplot2 theme
) + xlab("Time (days)")
print(p1)
surv <- survfit(Surv(DFS_time, DFS_status) ~ immune_subtype, data = hc)
diff <- survdiff(Surv(DFS_time, DFS_status) ~ immune_subtype, data = hc)
#
p1 <- ggsurvplot(surv, data = hc,
legend.title = "",
legend = "right",
#legend.labs = c("ID","IE","IE fibrotic"),
conf.int = F,
pval = TRUE,
risk.table = TRUE,
tables.height = 0.2,
tables.theme = theme_cleantable(),
risk.table.y.text = FALSE,
pval.coord = c(0, 0.03),
# Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
# or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
ggtheme = theme(text = element_text(size = 15)) +theme_bw() # Change ggplot2 theme
) + xlab("Time (days)")
print(p1)
expr_human2 <- read_csv("~/Dog/GSE21257_expr.csv")
## New names:
## Rows: 30549 Columns: 54
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 dbl (53): GSM530667, GSM530899, GSM531283, GSM531284, GSM531285,
## GSM531286, ...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
expr_human2 <- as.data.frame(expr_human2)
expr_human2 <- aggregate.data.frame(expr_human2[,2:ncol(expr_human2)], by = list(as.character(expr_human2[,1])), FUN = mean, na.rm = T)
rownames(expr_human2) <- expr_human2[,1]
expr_human2 <- expr_human2[,-1]
expr_human2 <- as.matrix(expr_human2)
set.seed(2425)
cellprop_human2 <- t(MCPcounter.estimate(expression = expr_human2, featuresType = "HUGO_symbols", probesets = read.table("~/Dog/MCP_counter_markers.csv")))
cellprop_human2 <- apply(cellprop_human2, 2, scale)
km.res <- kmeans(cellprop_human2, centers = 3, nstart = 100, iter.max = 500)
imm_clus2 <- rep("ID", nrow(cellprop_human2))
imm_clus2[km.res$cluster == 1] <- "IE"
imm_clus2[km.res$cluster == 3] <- "IE-ECM"
Heatmap(cellprop_human2, row_split = imm_clus2, cluster_columns = F, cluster_row_slices = F, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")))
names(imm_clus2) <- colnames(expr_human2)
human_clin2 <- read_delim("~/Dog/GSE21257_pheno.tsv", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## New names:
## Rows: 53 Columns: 49
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (40): ...1, title, geo_accession, status, submission_date, last_update_d... dbl
## (8): channel_count, taxid_ch1, contact_zip/postal_code, data_row_count,... lgl
## (1): ...45
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
## • `` -> `...45`
human_clin2 <- as.data.frame(human_clin2)
rownames(human_clin2) <- human_clin2$...1
human_clin2 <- human_clin2[,-1]
human_clin2 <- human_clin2[names(imm_clus2),]
human_clin2$`age:ch1` <- sapply(human_clin2$`age:ch1`, function(x) as.numeric(strsplit(x, split = " ")[[1]][1])/12)
human_clin2$immune_subtype <- as.character(imm_clus2)
hc2 <- data.frame(OS_time = as.numeric(human_clin2$OS_time*30), OS_status = as.numeric(human_clin2$OS_status),
DFS_time = as.numeric(human_clin2$MFS_time*30), DFS_status = as.numeric(human_clin2$met_status),
immune_subtype = human_clin2$immune_subtype,
met_at_daig = human_clin2$`group:ch1`,
age = human_clin2$`age:ch1`,
gender = human_clin2$`gender:ch1`)
hc2$met_at_daig = c("NON-METASTATIC","METASTATIC")[as.numeric(hc2$met_at_daig == "Metastases present at diagnosis")+1]
hc2 <- hc2[!is.na(hc2$OS_status) & !is.na(hc2$DFS_status),]
hc2$necrosis <- NA
surv <- survfit(Surv(OS_time, OS_status) ~ immune_subtype, data = hc2)
diff <- survfit(Surv(OS_time, OS_status) ~ immune_subtype, data = hc2)
p1 <- ggsurvplot(surv, data = hc2,
legend.title = "",
legend = "right",
legend.labs = c("ID","IE","IE-ECM"),
conf.int = F,
pval = TRUE,
risk.table = TRUE,
tables.height = 0.2,
tables.theme = theme_cleantable(),
risk.table.y.text = FALSE,
pval.coord = c(0, 0.03),
# Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
# or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
ggtheme = theme(text = element_text(size = 15)) +theme_bw() # Change ggplot2 theme
) + xlab("Time (days)")
print(p1)
surv <- survfit(Surv(DFS_time, DFS_status) ~ immune_subtype, data = hc2)
diff <- survfit(Surv(DFS_time, DFS_status) ~ immune_subtype, data = hc2)
p1 <- ggsurvplot(surv, data = hc2,
legend.title = "",
legend = "right",
#legend.labs = c("ID","IE","IE fibrotic"),
conf.int = F,
pval = TRUE,
risk.table = TRUE,
tables.height = 0.2,
tables.theme = theme_cleantable(),
risk.table.y.text = FALSE,
pval.coord = c(0, 0.03),
# Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
# or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
ggtheme = theme(text = element_text(size = 15)) +theme_bw() # Change ggplot2 theme
) + xlab("Time (days)")
print(p1)
expr_human3 <- read_csv("~/Dog/GSE39055_expr.csv")
## New names:
## Rows: 29377 Columns: 38
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 dbl (37): GSM954790, GSM954791, GSM954792, GSM954793, GSM954794,
## GSM954795, ...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
expr_human3 <- as.data.frame(expr_human3)
expr_human3 <- aggregate.data.frame(expr_human3[,2:ncol(expr_human3)], by = list(as.character(expr_human3[,1])), FUN = mean, na.rm = T)
rownames(expr_human3) <- expr_human3[,1]
expr_human3 <- expr_human3[,-1]
#expr_human3 <- as.matrix(log2(expr_human3+1))
set.seed(2425)
cellprop_human3 <- t(MCPcounter.estimate(expression = expr_human3, featuresType = "HUGO_symbols",probesets = read.table("~/Dog/MCP_counter_markers.csv")))
cellprop_human3 <- apply(cellprop_human3, 2, scale)
km.res <- kmeans(cellprop_human3, centers = 3, nstart = 100, iter.max = 500)
imm_clus3 <- rep("ID", nrow(cellprop_human3))
imm_clus3[km.res$cluster == 1] <- "IE"
imm_clus3[km.res$cluster == 2] <- "IE-ECM"
#imm_clus3 <- factor(imm_clus3, levels = c("IE", "IE fibrotic", "ID"))
Heatmap(cellprop_human3, row_split = imm_clus3, cluster_columns = F, cluster_row_slices = F, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")))
names(imm_clus3) <- colnames(expr_human3)
human_clin3 <- read_delim("~/Dog/GSE39055_pheno.tsv", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## Rows: 37 Columns: 51
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (45): title, Metastatic_at_Diag, geo_accession, status, submission_date,...
## dbl (6): channel_count, taxid_ch1, contact_zip/postal_code, data_row_count,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
human_clin3 <- as.data.frame(human_clin3)
rownames(human_clin3) <- human_clin3$geo_accession
human_clin3 <- human_clin3[names(imm_clus3),]
human_clin3$immune_subtype <- as.character(imm_clus3)
human_clin3$`age:ch1` <- sapply(human_clin3$`age:ch1`, function(x) as.numeric(strsplit(x, split = " ")[[1]][1]))
#human_clin3[human_clin3$`time until first recurrence or latest follow-up (months):ch1` > 0,]
hc3 <- data.frame(OS_time = NA, OS_status = NA,
DFS_time = as.numeric(human_clin3$`time until first recurrence or latest follow-up (months):ch1`*30), DFS_status = as.numeric(human_clin3$`recurrence:ch1` == "Y"),
immune_subtype = human_clin3$immune_subtype, met_at_daig = human_clin3$Metastatic_at_Diag,
age = human_clin3$`age:ch1`,
gender = c("M","F")[as.numeric(human_clin3$`gender:ch1` == "female")+1])
hc3$necrosis <- c("<=90%",">90%")[as.numeric(human_clin3$necrosis =="Y")+1]
surv <- survfit(Surv(DFS_time, DFS_status) ~ immune_subtype, data = hc3)
diff <- survdiff(Surv(DFS_time, DFS_status) ~ immune_subtype, data = hc3)
p1 <- ggsurvplot(surv, data = hc3,
legend.title = "",
legend = "right",
#legend.labs = c("ID","IE","IE fibrotic"),
conf.int = F,
pval = TRUE,
risk.table = TRUE,
tables.height = 0.2,
tables.theme = theme_cleantable(),
risk.table.y.text = FALSE,
pval.coord = c(0, 0.03),
# Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
# or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
ggtheme = theme(text = element_text(size = 15)) +theme_bw() # Change ggplot2 theme
) + xlab("Time (days)")
print(p1)
expr_human5 <- read_csv("~/Dog/GSE16091_expr.csv")
## Rows: 22283 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): gene
## dbl (34): GSM402718, GSM402719, GSM402720, GSM402721, GSM402722, GSM402723, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
expr_human5 <- as.data.frame(expr_human5)
expr_human5 <- aggregate.data.frame(expr_human5[,2:ncol(expr_human5)], by = list(as.character(expr_human5[,1])), FUN = mean, na.rm = T)
rownames(expr_human5) <- expr_human5[,1]
expr_human5 <- expr_human5[,-1]
set.seed(2425)
cellprop_human5 <- t(MCPcounter.estimate(expression = expr_human5, featuresType = "HUGO_symbols", probesets = read.table("~/Dog/MCP_counter_markers.csv")))
cellprop_human5 <- apply(cellprop_human5, 2, scale)
#rownames(cellprop_primary) <- colnames(Canine_RNA_SEQ_TPM_All_data)
km.res <- kmeans(cellprop_human5, centers = 3, nstart = 100, iter.max = 500)
imm_clus5 <- rep("ID", nrow(cellprop_human5))
imm_clus5[km.res$cluster == 3] <- "IE"
imm_clus5[km.res$cluster == 2] <- "IE-ECM"
#imm_clus3 <- factor(imm_clus3, levels = c("IE", "IE fibrotic", "ID"))
Heatmap(cellprop_human5, row_split = imm_clus5, cluster_columns = F, cluster_row_slices = F, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")))
names(imm_clus5) <- colnames(expr_human5)
human_clin5 <- read_delim("~/Dog/GSE16091_pheno.tsv", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## Rows: 34 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (30): title, geo_accession, status, submission_date, last_update_date, t...
## dbl (5): channel_count, taxid_ch1, contact_zip/postal_code, data_row_count,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
human_clin5 <- as.data.frame(human_clin5)
rownames(human_clin5) <- human_clin5$geo_accession
human_clin5 <- human_clin5[names(imm_clus5),]
human_clin5$immune_subtype <- as.character(imm_clus5)
#human_clin3[human_clin3$`time until first recurrence or latest follow-up (months):ch1` > 0,]
hc5 <- data.frame(OS_time = as.numeric(human_clin5$`days-followup:ch1`), OS_status = as.numeric(human_clin5$`alive-or-dead:ch1` == "D"),
DFS_time = NA, DFS_status = NA,
immune_subtype = human_clin5$immune_subtype, met_at_daig = NA,
age = NA,
gender = NA,
necrosis = NA)
surv <- survfit(Surv(OS_time, OS_status) ~ immune_subtype, data = hc5)
diff <- survdiff(Surv(OS_time, OS_status) ~ immune_subtype, data = hc5)
p1 <- ggsurvplot(surv, data = hc5,
legend.title = "",
legend = "right",
#legend.labs = c("ID","IE","IE fibrotic"),
conf.int = F,
pval = TRUE,
risk.table = TRUE,
tables.height = 0.2,
tables.theme = theme_cleantable(),
risk.table.y.text = FALSE,
pval.coord = c(0, 0.03),
# Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
# or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
ggtheme = theme(text = element_text(size = 15)) +theme_bw() # Change ggplot2 theme
) + xlab("Time (days)")
print(p1)
expr_human6 <- read_csv("~/Dog/GSE30699_expr.csv")
## Rows: 48701 Columns: 108
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): gene
## dbl (107): GSM761349, GSM761350, GSM761351, GSM761352, GSM761353, GSM761354,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
human_clin6 <- read_delim("~/Dog/GSE30699_pheno.tsv", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## Rows: 107 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (34): title, geo_accession, status, submission_date, last_update_date, t...
## dbl (3): channel_count, taxid_ch1, data_row_count
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
expr_human6 <- as.data.frame(expr_human6)
expr_human6 <- aggregate.data.frame(expr_human6[,2:ncol(expr_human6)], by = list(as.character(expr_human6[,1])), FUN = mean, na.rm = T)
rownames(expr_human6) <- expr_human6[,1]
expr_human6 <- expr_human6[,-1]
expr_human6 <- expr_human6[, grepl("biopsy",human_clin6$source_name_ch1)]
set.seed(2425)
cellprop_human6 <- t(MCPcounter.estimate(expression = expr_human6, featuresType = "HUGO_symbols", probesets = read.table("~/Dog/MCP_counter_markers.csv")))
cellprop_human6 <- apply(cellprop_human6, 2, scale)
km.res <- kmeans(cellprop_human6, centers = 3, nstart = 100, iter.max = 500)
imm_clus6 <- rep("ID", nrow(cellprop_human6))
imm_clus6[km.res$cluster == 3] <- "IE"
imm_clus6[km.res$cluster == 1] <- "IE-ECM"
Heatmap(cellprop_human6, row_split = imm_clus6, cluster_columns = F, cluster_row_slices = F, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")))
names(imm_clus6) <- colnames(expr_human6)
hc6 <- data.frame(OS_time = NA, OS_status = NA,
DFS_time = NA, DFS_status = NA,
immune_subtype = imm_clus6, met_at_daig = NA,
age = NA,
gender = NA,
necrosis = NA)
expr_human7 <- read_csv("~/Dog/GSE32981_expr.csv")
## Rows: 12513 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Group.1
## dbl (23): GSM816942, GSM816943, GSM816944, GSM816945, GSM816946, GSM816947, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
expr_human7 <- expr_human7[!is.na(expr_human7$Group.1),]
expr_human7 <- as.data.frame(expr_human7)
rownames(expr_human7) <- expr_human7$Group.1
expr_human7 <- expr_human7[,-1]
GSE32981_pheno <- read_delim("~/Dog/GSE32981_pheno.tsv", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## Rows: 23 Columns: 42
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (36): title, geo_accession, status, submission_date, last_update_date, t...
## dbl (6): channel_count, taxid_ch1, contact_zip/postal_code, data_row_count,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
set.seed(2425)
cellprop_human7 <- t(MCPcounter.estimate(expression = expr_human7, featuresType = "HUGO_symbols", probesets = read.table("~/Dog/MCP_counter_markers.csv")))
cellprop_human7 <- apply(cellprop_human7, 2, scale)
km.res <- kmeans(cellprop_human7, centers = 3, nstart = 100, iter.max = 500)
imm_clus7 <- rep("ID", nrow(cellprop_human7))
imm_clus7[km.res$cluster == 1] <- "IE"
imm_clus7[km.res$cluster == 3] <- "IE-ECM"
Heatmap(cellprop_human7, row_split = imm_clus7, cluster_columns = F, cluster_row_slices = F, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")))
names(imm_clus7) <- colnames(expr_human7)
sample_type_7 <- c("Primary","Met")[as.numeric(GSE32981_pheno$`origin:ch1` == "Met")+1]
treatment_stat7 <- c("naive","treated")[as.numeric(GSE32981_pheno$Treated == "Yes")+1]
hc7 <- data.frame(OS_time = NA, OS_status = NA,
DFS_time = as.numeric(GSE32981_pheno$MFS)*30, DFS_status = as.numeric(GSE32981_pheno$`developed metastases:ch1` == "Yes"),
immune_subtype = imm_clus7, met_at_daig = as.numeric(GSE32981_pheno$MFS) == 0 & !is.na(as.numeric(GSE32981_pheno$MFS)),
age = as.numeric(GSE32981_pheno$`age:ch1`),
gender = c("M","F")[as.numeric(GSE32981_pheno$`gender:ch1` == "Female")+1],
necrosis = GSE32981_pheno$necrosis)
hc7$met_at_daig <- c("NON-METASTATIC","METASTATIC")[as.numeric(hc7$met_at_daig & !is.na(hc7$met_at_daig)) +1]
expr_human4 <- read_csv("~/Dog/GSE33383_expr.csv")
## New names:
## Rows: 30549 Columns: 100
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 dbl (99): GSM717846, GSM717847, GSM717848, GSM717849, GSM717850,
## GSM717851, ...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
expr_human4 <- as.data.frame(expr_human4)
expr_human4 <- aggregate.data.frame(expr_human4[,2:ncol(expr_human4)], by = list(as.character(expr_human4[,1])), FUN = mean, na.rm = T)
rownames(expr_human4) <- expr_human4[,1]
expr_human4 <- expr_human4[,-1]
GSE33383_pheno <- read_delim("~/Dog/GSE33383_pheno.tsv", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## Rows: 99 Columns: 50
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (46): title, geo_accession, status, submission_date, last_update_date, t...
## dbl (3): channel_count, taxid_ch1, data_row_count
## lgl (1): description.1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
expr_human4 <- expr_human4[,grepl("pre-chemotherapy biopsy", GSE33383_pheno$source_name_ch1)]
GSE33383_pheno <-GSE33383_pheno[grepl("pre-chemotherapy biopsy", GSE33383_pheno$source_name_ch1),]
set.seed(2425)
cellprop_human4 <- t(MCPcounter.estimate(expression = expr_human4, featuresType = "HUGO_symbols"))
cellprop_human4 <- apply(cellprop_human4, 2, scale)
#rownames(cellprop_primary) <- colnames(Canine_RNA_SEQ_TPM_All_data)
km.res <- kmeans(cellprop_human4, centers = 3, nstart = 100, iter.max = 500)
imm_clus4 <- rep("ID", nrow(cellprop_human4))
imm_clus4[km.res$cluster == 1] <- "IE"
imm_clus4[km.res$cluster == 3] <- "IE-ECM"
imm_clus4 <- factor(imm_clus4, levels = c("IE", "IE-ECM", "ID"))
ha = rowAnnotation(
met_within_5yrs = GSE33383_pheno$`metastasis within 5yrs:ch1`,
col = list("met_within_5yrs" = c("no" = "green", "yes" = "yellow")
)
)
names(imm_clus4) <- colnames(expr_human4)
hc44 <- data.frame(OS_time = NA, OS_status = NA,
DFS_time = NA, DFS_status = NA,
immune_subtype = imm_clus4, met_at_daig = rep("NON-METASTATIC", length(imm_clus4)),
age = sapply(GSE33383_pheno$`age:ch1`, function(x) as.numeric(strsplit(x," ")[[1]][1])/12),
gender = c("M","F")[as.numeric(GSE33383_pheno$`gender:ch1` == "F")+1],
necrosis = NA)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
ct <- colnames(cellprop_human4)
bulk_exp_scOS <- read_csv("~/Dog/GSE152048_merged_expr.csv")
## New names:
## Rows: 32864 Columns: 12
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 dbl (11): BC2, BC3, BC5, BC6, BC10, BC11, BC16, BC17, BC20, BC21,
## BC22
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
bulk_exp_scOS <- as.data.frame(bulk_exp_scOS)
rownames(bulk_exp_scOS) <- bulk_exp_scOS$...1
bulk_exp_scOS <- bulk_exp_scOS[,-1]
cellprop_human_scOS <- t(MCPcounter.estimate(expression = bulk_exp_scOS, featuresType = "HUGO_symbols"))
cellprop_human_scOS <- apply(cellprop_human_scOS, 2, scale)
newcolnames <- sapply(colnames(cellprop_human_scOS), function(x) gsub(" ","_",x))
names(newcolnames) <- NULL
colnames(cellprop_human_scOS) <- newcolnames
sc_sample_type <- c("Primary","Primary","Primary","Primary","Met","Recurrent","Primary","Primary","Met","Recurrent","Primary")
samples
library(caret)
## Loading required package: lattice
##
## Attaching package: 'lattice'
## The following object is masked from 'package:clusterProfiler':
##
## dotplot
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
## The following object is masked from 'package:survival':
##
## cluster
library(e1071)
library(naivebayes)
## naivebayes 0.9.7 loaded
set.seed(268548)
df <- cbind(data.frame(rbind(cellprop_rnaseq[sample_type == "Primary",], cellprop_nano[sstatus == "Primary Tumor",])), imm_clus = c(imm_clus[sample_type=="Primary"],imm_clus_nano[sstatus=="Primary Tumor"]))
df$imm_clus <- as.factor(df$imm_clus)
colnames(df) <- gsub("\\.","_",colnames(df))
df <- df[,colnames(df)[c(1,3:ncol(df))]]
ind <- c(sample(which(df$imm_clus == "IE"),size = 50,replace = T), sample(which(df$imm_clus == "IE-ECM"),size = 50,replace = T), sample(which(df$imm_clus == "ID"),size = 50,replace = T))
#apply data augmentation
cp1 <- t(apply(df[ind,1:(ncol(df))-1],1,function(x) x + rnorm((ncol(df))-1, mean = 0, sd = 0.01)))
df1 <- cbind(data.frame(cp1), df$imm_clus[ind])
colnames(df1) <- colnames(df)
control <- trainControl(method = "cv", number=10)
nb <- train(imm_clus ~ ., data = df1, "naive_bayes", trainControl = control, tuneGrid = data.frame(usekernel = FALSE, laplace = 0, adjust=0))
cellprop_allhuman <- rbind(cellprop_human[,ct],rbind(cellprop_human2[,ct],rbind(cellprop_human3[,ct],rbind(cellprop_human4[,ct], rbind(cellprop_human5[,ct], rbind(cellprop_human6[,ct], cellprop_human7[,ct]))))))
set.seed(2425)
km.res <- kmeans(cellprop_allhuman, centers = 3, nstart = 100, iter.max = 500)
imm_clus_all <- rep("ID", nrow(cellprop_allhuman))
imm_clus_all[km.res$cluster == 1] <- "IE"
imm_clus_all[km.res$cluster == 3] <- "IE-ECM"
imm_clus_all <- factor(imm_clus_all, levels = c("IE", "IE-ECM", "ID"))
human_test <- cbind(data.frame(cellprop_allhuman), imm_clus = imm_clus_all)
colnames(human_test) <- gsub("\\.","_", colnames(human_test))
preds <- predict(nb, human_test)
confusionMatrix(preds, human_test$imm_clus)
## Confusion Matrix and Statistics
##
## Reference
## Prediction IE IE-ECM ID
## IE 42 0 0
## IE-ECM 50 103 11
## ID 0 91 97
##
## Overall Statistics
##
## Accuracy : 0.6142
## 95% CI : (0.5642, 0.6625)
## No Information Rate : 0.4924
## P-Value [Acc > NIR] : 7.796e-07
##
## Kappa : 0.3966
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: IE Class: IE-ECM Class: ID
## Sensitivity 0.4565 0.5309 0.8981
## Specificity 1.0000 0.6950 0.6818
## Pos Pred Value 1.0000 0.6280 0.5160
## Neg Pred Value 0.8580 0.6043 0.9466
## Prevalence 0.2335 0.4924 0.2741
## Detection Rate 0.1066 0.2614 0.2462
## Detection Prevalence 0.1066 0.4162 0.4772
## Balanced Accuracy 0.7283 0.6130 0.7900
sc_merged_subtypes <- predict(nb, cellprop_human_scOS[, colnames(human_test)[1:9]])
rownames(cellprop_human_scOS) <- colnames(bulk_exp_scOS)
Heatmap(cellprop_human_scOS, row_split = sc_merged_subtypes, cluster_columns = F, cluster_row_slices = F, row_gap = unit(1, "mm"), row_title_gp = gpar(col = c("#F8766D","#00BA38", "#619CFF")))
names(sc_merged_subtypes) <- colnames(bulk_exp_scOS)
ds <- c(rep("TARGET", length(imm_clus_target)), rep("GSE21257", length(imm_clus2)), rep("GSE39055", length(imm_clus3)), rep("GSE33383",length(imm_clus4)), rep("GSE16091",length(imm_clus5)), rep("GSE30699",length(imm_clus6)), rep("GSE32981", length(imm_clus7)))
sample_type_bulk <- c(rep("Primary", length(imm_clus_target) + length(imm_clus2) + length(imm_clus3) + length(imm_clus4) + length(imm_clus5) + length(imm_clus6)), sample_type_7)
sample_type_all <- c(sample_type_bulk,sc_sample_type)
treatment_bulk <- c(rep("naive", length(imm_clus_target) + length(imm_clus2) + length(imm_clus3) + length(imm_clus4) + length(imm_clus5) + length(imm_clus6)), c("naive","post-chemo")[as.numeric(treatment_stat7 == "post-chemo")+1])
treatment <- c(treatment_bulk, rep("post-chemo",length(sc_merged_subtypes)))
platform = c(c("microarray","RNASeq")[as.numeric(ds == "TARGET")+1],rep("10x(scRNASeq)", length(sc_merged_subtypes)))
ds <- c(ds, rep("GSE152048", length(sc_merged_subtypes)))
ha = rowAnnotation(
dataset = ds,
platform = platform,
sample = sample_type_all,
treatment_status = treatment,
col = list("dataset" = c("TARGET" = "#AF32CD", "GSE21257" = "cyan","GSE39055" = "#A35711", "GSE33383" = "magenta", "GSE16091" = "#4500B2","GSE30699" = "#BCFF96", "GSE152048" = "#FB4232","GSE32981" = "#6775CD"),
"platform" = c("microarray" = "#929292",
"RNASeq" = "#E2E0E7",
"10x(scRNASeq)" = "#3B3BFF"),
"sample" = c("Primary" = "yellow", "Met" = "darkgreen", "Recurrent" = "darkorange"),
"treatment_status" = c("naive" = "#EDAB32","post-chemo" = "#B30721"))
)
Heatmap(rbind(cellprop_allhuman,cellprop_human_scOS[,c(1,3:10)]), row_split = c(preds,sc_merged_subtypes), cluster_columns = F, cluster_row_slices = F, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")), right_annotation = ha)
#chromosomal instability signature
names(imm_clus_all) <- c(names(imm_clus_target),names(imm_clus2), names(imm_clus3), names(imm_clus4))
CIN_1 <- CIN_signature(ee)
CIN_2 <- CIN_signature(expr_human2)
CIN_3 <- CIN_signature(expr_human3)
CIN_4 <- CIN_signature(expr_human4)
CIN_5 <- CIN_signature(expr_human5)
CIN_6 <- CIN_signature(expr_human6)
CIN_7 <- CIN_signature(expr_human7)
CIN_8 <- CIN_signature(bulk_exp_scOS)
CIN_all <- c(CIN_1,CIN_2,CIN_3,CIN_4, CIN_5, CIN_6, CIN_7, CIN_8)
names(CIN_all) <- c(names(imm_clus_all), names(sc_merged_subtypes))
p1 <- ggplot(data = data.frame(subtype = c(preds, sc_merged_subtypes), CIN_score = CIN_all), aes(x = subtype,y=CIN_score)) + geom_boxplot() +theme_bw() + stat_compare_means(method = "anova")
print(p1)
#ggsave(plot = p1, filename = "~/Dog/CIN_human.pdf", width = 4, height = 4)
#type 1 interferon signature
type1_interferon_signature <- function(expr) {
genes <- c("IFNA1", "IFNA2", "IFNA4", "IFNA5", "IFNA6", "IFNA7", "IFNA8", "IFNA10", "IFNA13", "IFNA14", "IFNA16", "IFNA17", "IFNA21", "IFNW1", "IFNE", "IFNK","IFNB1")
mat <- as.data.frame(expr)
mat <- mat[genes,]
mat <- apply(as.matrix(mat),1,scale)
scores <- rowMeans(mat, na.rm = T)
return(scores)
}
IN_1 <- type1_interferon_signature(ee)
IN_2 <- type1_interferon_signature(expr_human2)
IN_3 <- type1_interferon_signature(expr_human3)
IN_4 <- type1_interferon_signature(expr_human4)
IN_5 <- type1_interferon_signature(expr_human5)
IN_6 <- type1_interferon_signature(expr_human6)
IN_7 <- type1_interferon_signature(expr_human7)
IN_8 <- type1_interferon_signature(bulk_exp_scOS)
IN_all <- c(IN_1,IN_2,IN_3,IN_4, IN_5, IN_6, IN_7, IN_8)
names(IN_all) <- c(names(imm_clus_all), names(sc_merged_subtypes))
p1 <- ggplot(data = data.frame(subtype = c(preds, sc_merged_subtypes), IN_score = IN_all), aes(x = subtype,y=IN_score)) + geom_boxplot() +theme_bw() + stat_compare_means( method = "anova")
print(p1)
#ggsave(plot = p1, filename = "~/Dog/IN_human.pdf", width = 4, height = 4)
#Cytotoxic t cell signature (Rooney et al)
CYT_signature_human <- function(expr){
genes <- c("GZMA","PRF1")
mat <- as.data.frame(expr)
mat <- (mat[genes,])
scores <- sqrt(mat[1,]*mat[2,])
return(scores)
}
CYT <- list()
CYT[[1]] <- CYT_signature_human(ee)
CYT[[2]] <- CYT_signature_human(expr_human2)
CYT[[3]] <- CYT_signature_human(expr_human3)
CYT[[4]] <- CYT_signature_human(expr_human4)
CYT[[5]] <- CYT_signature_human(expr_human5)
CYT[[6]] <- CYT_signature_human(expr_human6)
CYT[[7]] <- CYT_signature_human(expr_human7)
CYT[[8]] <- CYT_signature_human(bulk_exp_scOS)
CYT_all <- c()
for(i in 1: length(CYT)){
cyt <- as.numeric(CYT[[i]])
names(cyt) <- colnames(CYT[[i]])
CYT_all <- c(CYT_all,cyt)
}
p1 <- ggplot(data = data.frame(subtype = c(preds, sc_merged_subtypes), CYT_score = CYT_all), aes(x = subtype,y=CYT_score)) + geom_boxplot() +theme_bw() + stat_compare_means( method = "anova")
print(p1) + ylab("Cytotoxicity Score")
#ggsave(plot = p1, filename = "~/Dog/CYT_human.pdf", width = 4, height = 4)
#expression of checkpoints
chkpoint_exp <- function(expr){
out <- t(log2(data.frame(expr)[c("PDCD1","CD274","CTLA4","TIGIT","LAG3","HAVCR2"),]+1))
colnames(out) <- c("PDCD1","CD274","CTLA4","TIGIT","LAG3","HAVCR2")
return(data.frame(apply(out,2,scale)))
}
CP <- list()
CP[[1]] <- chkpoint_exp(ee)
CP[[2]] <- chkpoint_exp(expr_human2)
CP[[3]] <- chkpoint_exp(expr_human3)
CP[[4]] <- chkpoint_exp(expr_human4)
CP[[5]] <- chkpoint_exp(expr_human5)
CP[[6]] <- chkpoint_exp(expr_human6)
CP[[7]] <- chkpoint_exp(expr_human7)
CP[[8]] <- chkpoint_exp(bulk_exp_scOS)
CP_all <- Reduce(rbind, CP)
out <- data.frame(CP_all, subtype= c(preds, sc_merged_subtypes))
out <- out[!is.na(rowSums(out[,1:6])),]
dd_out <- melt(out, id.vars = "subtype")
colnames(dd_out) <- c("subtype","ICI_target","scaled_expression")
dd_out$ICI_target <- as.character(dd_out$ICI_target)
dd_out$subtype <- factor(dd_out$subtype, levels = c("IE","IE-ECM","ID"))
dd_out$ICI_target[dd_out$ICI_target == "PDCD1"] = "PD1"
dd_out$ICI_target[dd_out$ICI_target == "CD274"] = "PDL1"
dd_out$ICI_target[dd_out$ICI_target == "HAVCR2"] = "TIM3"
#p1 <- ggplot(dd_out, aes(x = subtype, y = log2TPM)) +
# facet_wrap(~ICI_target, scales = "free_y") + stat_compare_means(comparisons = list(c("IE","IE-ECM"),c("IE-ECM","ID"),c("IE","ID")), method = "wilcox", label = "p.signif") + theme_bw() + theme(text = element_text(size = 15)) + xlab("ICI target") + ylab("expression (log2(TPM)")
p1 <- ggboxplot(dd_out, x = "subtype", y="scaled_expression", facet.by = "ICI_target") + theme_bw() + theme(text = element_text(size = 15)) + stat_compare_means(method = "anova") + xlab("") + ylab("normalized expression")
print(p1)
#ggsave(plot = p1,filename = "~/Dog/human_immune_checkpoints.pdf", width = 6, height = 8)
Evaluate progonostic power of TME subtype predictons using matched progression free survival outcome data
hc4 <- rbind(rbind(hc,hc2),rbind(hc3,rbind(hc44,rbind(hc5,rbind(hc6,hc7)))))
hc4$immune_subtype_1 <- imm_clus_all
hc4$ml_class <- preds
hc4$cyt <- CYT_all[names(imm_clus_all)]
hc4 <- hc4[sample_type_bulk == "Primary",]
surv <- survfit(Surv(OS_time, OS_status) ~ ml_class, data = hc4[!is.na(hc4$OS_time),])
diff <- survdiff(Surv(OS_time, OS_status) ~ ml_class, data = hc4[!is.na(hc4$OS_time),])
p1 <- ggsurvplot(surv, data = hc4[!is.na(hc4$OS_time),],
legend.title = "",
legend = "right",
conf.int = F,
pval = TRUE,
risk.table = TRUE,
tables.height = 0.2,
tables.theme = theme_cleantable(),
risk.table.y.text = FALSE,
pval.coord = c(0, 0.03),
xlim = c(0,4500),
# Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
# or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
palette = c("#882255","#CC6677","#88CCEE"),
ggtheme = theme(text = element_text(size = 15)) +theme_bw() # Change ggplot2 theme
) + xlab("Time (days)")
#print(p1)
#ggsave(filename = "~/Dog/Human_OS_pred_primary.pdf", plot = p1$plot, width = 7, height = 4)
surv <- survfit(Surv(DFS_time, DFS_status) ~ ml_class, data = hc4[!is.na(hc4$DFS_time),])
diff <- survdiff(Surv(DFS_time, DFS_status) ~ ml_class, data = hc4[!is.na(hc4$DFS_time),])
p1 <- ggsurvplot(surv, data = hc4[!is.na(hc4$DFS_time),],
legend.title = "",
legend = "right",
conf.int = F,
pval = TRUE,
risk.table = TRUE,
tables.height = 0.2,
tables.theme = theme_cleantable(),
risk.table.y.text = FALSE,
pval.coord = c(0, 0.03),
xlim = c(0,4500),
# Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
# or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
palette = c("#882255","#CC6677","#88CCEE"),
ggtheme = theme(text = element_text(size = 15)) +theme_bw() # Change ggplot2 theme
) + xlab("Time (days)")
print(p1)
#ggsave(filename = "~/Dog/Human_DFS_primary.pdf", plot = p1$plot, width = 7, height = 4)
#ggsave(p1$plot, filename = "~/Dog/aim3_human_DFS.pdf")
#plot proprtion of patients developing mets in 5 years for each TME subtype (GSE33383)
pp <- as.character(preds)
names(pp)<- names(imm_clus_all)
dd_m <- as.matrix(table(pp[names(imm_clus4)][!is.na(GSE33383_pheno$`metastasis within 5yrs:ch1`)], GSE33383_pheno$`metastasis within 5yrs:ch1`[!is.na(GSE33383_pheno$`metastasis within 5yrs:ch1`)]))
dd_m <- (dd_m/rowSums(dd_m))*100
dd_m <- melt(dd_m)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
colnames(dd_m) <- c("subtype","mets_within_5yrs","percentage")
dd_m$subtype <- factor(dd_m$subtype,levels = c("IE","IE-ECM","ID"))
p1 <- ggplot(as.data.frame(dd_m), aes(fill = mets_within_5yrs,
y = percentage, x = subtype))+
geom_bar(position = "fill", stat = "identity")+
ggtitle("Metastasis rates")+
theme(plot.title = element_text(hjust = 0.5)) + theme_bw()
print(p1)
#ggsave(filename = "~/Dog/Human_5yr_mets.pdf", plot = p1, width = 5, height = 4)
chisq.test(table(pp[names(imm_clus4)][!is.na(GSE33383_pheno$`metastasis within 5yrs:ch1`)], GSE33383_pheno$`metastasis within 5yrs:ch1`[!is.na(GSE33383_pheno$`metastasis within 5yrs:ch1`)]))
## Warning in
## chisq.test(table(pp[names(imm_clus4)][!is.na(GSE33383_pheno$`metastasis within
## 5yrs:ch1`)], : Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: table(pp[names(imm_clus4)][!is.na(GSE33383_pheno$`metastasis within 5yrs:ch1`)], GSE33383_pheno$`metastasis within 5yrs:ch1`[!is.na(GSE33383_pheno$`metastasis within 5yrs:ch1`)])
## X-squared = 7.7036, df = 2, p-value = 0.02124
hc4$cyt_score <- c("low","high")[as.numeric(hc4$cyt > median(hc4$cyt, na.rm = TRUE))+1]
surv <- survfit(Surv(OS_time, OS_status) ~ cyt_score, data = hc4[!is.na(hc4$OS_time),])
diff <- survdiff(Surv(OS_time, OS_status) ~ cyt_score, data = hc4[!is.na(hc4$OS_time),])
p1 <- ggsurvplot(surv, data = hc4[!is.na(hc4$OS_time),],
legend.title = "",
legend = "right",
conf.int = F,
pval = TRUE,
risk.table = TRUE,
tables.height = 0.2,
tables.theme = theme_cleantable(),
risk.table.y.text = FALSE,
pval.coord = c(0, 0.03),
xlim = c(0,4500),
# Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
# or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
palette = c("#882255","#88CCEE","#CC6677"),
ggtheme = theme(text = element_text(size = 15)) +theme_bw() # Change ggplot2 theme
) + xlab("Time (days)")
#print(p1)
#ggsave(filename = "~/Dog/Human_OS_pred_primary.pdf", plot = p1$plot, width = 7, height = 4)
surv <- survfit(Surv(DFS_time, DFS_status) ~ cyt_score, data = hc4[!is.na(hc4$DFS_time),])
diff <- survdiff(Surv(DFS_time, DFS_status) ~ cyt_score, data = hc4[!is.na(hc4$DFS_time),])
p1 <- ggsurvplot(surv, data = hc4[!is.na(hc4$DFS_time),],
legend.title = "",
legend = "right",
conf.int = F,
pval = TRUE,
risk.table = TRUE,
tables.height = 0.2,
tables.theme = theme_cleantable(),
risk.table.y.text = FALSE,
pval.coord = c(0, 0.03),
xlim = c(0,4500),
# Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
# or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
palette = c("#882255","#88CCEE", "#CC6677"),
ggtheme = theme(text = element_text(size = 15)) +theme_bw() # Change ggplot2 theme
) + xlab("Time (days)")
print(p1)
cph_dat <- data.frame(time = hc4$DFS_time,
subtype = hc4$ml_class,
cyt_score = hc4$cyt,
status = hc4$DFS_status,
stage = hc4$met_at_daig,
gender = hc4$gender,
age = scale(hc4$age, center = T, scale = T), necrosis = hc4$necrosis)
cph_dat <- cph_dat[!is.na(cph_dat$necrosis) & !is.na(hc4$DFS_time),]
cph_dat$subtype <- relevel(as.factor(cph_dat$subtype), ref = "ID")
gender <- relevel(as.factor(cph_dat$gender), ref = "M")
cph_dat$stage[cph_dat$stage == "NON-METASTATIC"] <- "LOCALIZED"
cph_dat$stage <- relevel(as.factor(cph_dat$stage), ref = "LOCALIZED")
cph_dat$predictor <- rep(3, nrow(cph_dat))
cph_dat$predictor[cph_dat$subtype == "IE-ECM"] <- 2
cph_dat$predictor[cph_dat$subtype == "IE"] <- 1
fit <- coxph(Surv(time, status) ~ age + gender + stage + necrosis + subtype, data = cph_dat)
p2 <-ggforest(fit, data = cph_dat,fontsize = 0.7, noDigits = 2) + theme(text = element_text(size = 25))
print(p2)
#ggsave(plot = p2, filename = "~/Dog/human_cph_results_cyt.pdf", width = 5, height = 6)
dd_rnaseq <- data.frame(cellprop_rnaseq[,c(1:10)], subtype = imm_clus)
dd_rnaseq <- melt(dd_rnaseq, id.vars = "subtype")
p1 <- ggboxplot(dd_rnaseq, x = "subtype", y="value", facet.by = "variable",) + theme_bw() + theme(text = element_text(size = 15)) + stat_compare_means( method = "anova") + xlab("") + ylab("MCP counter score (normalized)")
print(p1)
#ggsave(plot = p1, filename = "~/Dog/caninernaseq_mcpcounter_boxplots.pdf", width = 14, height = 9)
dd_rnaseq <- data.frame(cellprop_nano[,c(1:10)], subtype = imm_clus_nano)
dd_rnaseq <- melt(dd_rnaseq, id.vars = "subtype")
p1 <- ggboxplot(dd_rnaseq, x = "subtype", y="value", facet.by = "variable") + theme_bw() + theme(text = element_text(size = 15)) + stat_compare_means(method = "anova") + xlab("") + ylab("MCP counter score (normalized)")
print(p1)
#ggsave(plot = p1, filename = "~/Dog/caninenano_mcpcounter_boxplots.pdf", width = 14, height = 9)
dd_rnaseq <- data.frame(rbind(cellprop_rnaseq[,c(1,3:10)],cellprop_nano[,c(1,3:10)]), subtype = c(imm_clus,imm_clus_nano))
dd_rnaseq <- melt(dd_rnaseq, id.vars = "subtype")
p1 <- ggboxplot(dd_rnaseq, x = "subtype", y="value", facet.by = "variable") + theme_bw() + theme(text = element_text(size = 15)) + stat_compare_means(method = "anova") + xlab("") + ylab("MCP counter score (normalized)")
print(p1)
#ggsave(plot = p1, filename = "~/Dog/allcanine_mcpcounter_boxplots.pdf", width = 11, height = 9)
dd_rnaseq <- data.frame(rbind(cellprop_allhuman), subtype = preds)
dd_rnaseq <- melt(dd_rnaseq, id.vars = "subtype")
p1 <- ggboxplot(dd_rnaseq, x = "subtype", y="value", facet.by = "variable") + theme_bw() + theme(text = element_text(size = 15)) + stat_compare_means(method = "anova") + xlab("") + ylab("MCP counter score (normalized)")
print(p1)
#ggsave(plot = p1, filename = "~/Dog/allhuman_mcpcounter_boxplots.pdf", width = 11, height = 9)
read single cell celltype fractions from Zhou et al, Nat Comm 2020 and compare against predicted TME subtypes
human_OS_sc_celltypes <- read_excel("~/Dog/sourcedat_human_OS_singlecell.xlsx", sheet = "Figure S5a", skip = 2)
## New names:
## • `` -> `...1`
human_OS_sc_celltypes$...1 <- sapply(human_OS_sc_celltypes$...1, function(x) strsplit(x,split = "\\.")[[1]][2])
human_OS_sc_celltypes <- human_OS_sc_celltypes[!is.na(human_OS_sc_celltypes$...1),]
names(human_OS_sc_celltypes$...1) <- NULL
human_OS_sc_celltypes <- as.data.frame(human_OS_sc_celltypes)
rownames(human_OS_sc_celltypes) <- human_OS_sc_celltypes$...1
human_OS_sc_celltypes <- human_OS_sc_celltypes[,-1]
human_OS_sc_celltypes <- as.data.frame(t(human_OS_sc_celltypes))
scOS_cancercell <- read_excel("~/Dog/sourcedat_human_OS_singlecell.xlsx", sheet = "Figure S6b", skip = 2)
## New names:
## • `` -> `...1`
scOS_cancercell <- as.data.frame(scOS_cancercell)
rownames(scOS_cancercell) <- scOS_cancercell$...1
scOS_cancercell <- scOS_cancercell[,-1]
scOS_cancercell <- as.data.frame(scOS_cancercell[rownames(human_OS_sc_celltypes),])
scOS_chondro <- read_excel("~/Dog/sourcedat_human_OS_singlecell.xlsx", sheet = "Figure S6g", skip = 1)
scOS_chondro <- as.data.frame(scOS_chondro)
rownames(scOS_chondro) <- scOS_chondro$Patient
scOS_chondro <- scOS_chondro[,-1]
scOS_chondro <- scOS_chondro[,-5]
scOS_chondro <- as.data.frame(scOS_chondro[rownames(human_OS_sc_celltypes),])
scOS_osteoclast <- read_excel("~/Dog/sourcedat_human_OS_singlecell.xlsx", sheet = "Figure S11c", skip = 1)
scOS_osteoclast <- as.data.frame(scOS_osteoclast)
rownames(scOS_osteoclast) <- scOS_osteoclast$`Cell type`
scOS_osteoclast <- scOS_osteoclast[,-1]
scOS_osteoclast <- t(scOS_osteoclast)
scOS_osteoclast <- as.data.frame(scOS_osteoclast[rownames(human_OS_sc_celltypes),])
scOS_MSC <- read_excel("~/Dog/sourcedat_human_OS_singlecell.xlsx", sheet = "Figure S14a", skip = 1)
scOS_MSC <- as.data.frame(scOS_MSC)
rownames(scOS_MSC) <- scOS_MSC$`Cell type`
scOS_MSC<- scOS_MSC[,-1]
scOS_MSC <- t(scOS_MSC)
scOS_MSC <- as.data.frame(scOS_MSC[rownames(human_OS_sc_celltypes),])
scOS_CAF <- read_excel("~/Dog/sourcedat_human_OS_singlecell.xlsx", sheet = "Figure S14d", skip = 1)
scOS_CAF <- as.data.frame(scOS_CAF)
rownames(scOS_CAF) <- scOS_CAF$`Cell type`
scOS_CAF <- scOS_CAF[,-1]
scOS_CAF <- t(scOS_CAF)
scOS_CAF <- as.data.frame(scOS_CAF[rownames(human_OS_sc_celltypes),])
scOS_Myeloid <- read_excel("~/Dog/sourcedat_human_OS_singlecell.xlsx", sheet = "Figure S15b", skip = 1)
scOS_Myeloid <- as.data.frame(scOS_Myeloid)
rownames(scOS_Myeloid) <- scOS_Myeloid$`Cell type`
scOS_Myeloid <- scOS_Myeloid[,-1]
scOS_Myeloid <- t(scOS_Myeloid)
scOS_Myeloid <- as.data.frame(scOS_Myeloid[rownames(human_OS_sc_celltypes),])
scOS_TIL <- read_excel("~/Dog/sourcedat_human_OS_singlecell.xlsx", sheet = "Figure S16b", skip = 1)
scOS_TIL <- as.data.frame(scOS_TIL)
rownames(scOS_TIL) <- scOS_TIL$Patient
scOS_TIL <- scOS_TIL[,-1]
scOS_TIL <- as.data.frame(scOS_TIL[rownames(human_OS_sc_celltypes),])
total <- rowSums(human_OS_sc_celltypes)
scOS_cancercell$Osteoblastic_1 <- scOS_cancercell$Osteoblastic_1/total
scOS_cancercell$Osteoblastic_2 <- scOS_cancercell$Osteoblastic_2/total
scOS_cancercell$Osteoblastic_3 <- scOS_cancercell$Osteoblastic_3/total
scOS_cancercell$Osteoblastic_4 <- scOS_cancercell$Osteoblastic_4/total
scOS_cancercell$Osteoblastic_5 <- scOS_cancercell$Osteoblastic_5/total
scOS_cancercell$Osteoblastic_6 <- scOS_cancercell$Osteoblastic_6/total
scOS_cancercell$`Chondroblastic OS` <- scOS_cancercell$`Chondroblastic OS`/total
scOS_chondro$Chondro_Proli <- scOS_chondro$Chondro_Proli/total
scOS_chondro$Chondro_hyper_1 <- scOS_chondro$Chondro_hyper_1/total
scOS_chondro$Chondro_hyper_2 <- scOS_chondro$Chondro_hyper_2/total
scOS_chondro$Chondro_trans_diff <- scOS_chondro$Chondro_trans_diff/total
scOS_osteoclast$OC_progenitor <- scOS_osteoclast$OC_progenitor/total
scOS_osteoclast$OC_immature <- scOS_osteoclast$OC_immature/total
scOS_osteoclast$OC_mature <- scOS_osteoclast$OC_mature/total
scOS_CAF$`COL14A1+ fibroblast` <- scOS_CAF$`COL14A1+ fibroblast` / total
scOS_CAF$`Smooth muscle cell` <- scOS_CAF$`Smooth muscle cell` / total
scOS_CAF$Myofibroblast <- scOS_CAF$Myofibroblast / total
scOS_MSC$`NT5E+MSC` <- scOS_MSC$`NT5E+MSC` / total
scOS_MSC$`WISP2+MSC` <- scOS_MSC$`WISP2+MSC` / total
scOS_MSC$`CLEC11A+MSC` <- scOS_MSC$`CLEC11A+MSC` / total
scOS_Myeloid$`CD14+_monocytes` <- scOS_Myeloid$`CD14+_monocytes` / total
scOS_Myeloid$`CXCL8+_monocytes` <- scOS_Myeloid$`CXCL8+_monocytes` / total
scOS_Myeloid$M2_TAM <- scOS_Myeloid$M2_TAM / total
scOS_Myeloid$M1_macrophage <- scOS_Myeloid$M1_macrophage / total
scOS_Myeloid$`FABP4+_macrophage` <- scOS_Myeloid$`FABP4+_macrophage` / total
scOS_Myeloid$Neutrophil <- scOS_Myeloid$Neutrophil / total
scOS_Myeloid$`CD14+/CD163+_DC` <- scOS_Myeloid$`CD14+/CD163+_DC` / total
scOS_Myeloid$`CD1C+_DC` <- scOS_Myeloid$`CD1C+_DC` / total
scOS_Myeloid$`CD141+/CLEC9A+_DC` <- scOS_Myeloid$`CD141+/CLEC9A+_DC` / total
scOS_TIL$`CD4+T` <- scOS_TIL$`CD4+T` / total
scOS_TIL$`CD4-/CD8-` <- scOS_TIL$`CD4-/CD8-` / total
scOS_TIL$`T-reg` <- scOS_TIL$`T-reg` / total
scOS_TIL$`CXCL13+T` <- scOS_TIL$`CXCL13+T` / total
scOS_TIL$`CD8+T` <- scOS_TIL$`CD8+T` / total
scOS_TIL$NKT <- scOS_TIL$NKT / total
scOS_TIL$NK <- scOS_TIL$NK / total
scOS_TIL$B_cell <- scOS_TIL$B_cell / total
scOS_TIL$`Proliferating T` <- scOS_TIL$`Proliferating T` / total
scOS_detailed <- list()
scOS_detailed[[1]] <- scOS_cancercell
scOS_detailed[[2]] <- scOS_chondro
scOS_detailed[[3]] <- scOS_osteoclast
scOS_detailed[[4]] <- scOS_TIL
scOS_detailed[[5]] <- scOS_Myeloid
scOS_detailed[[6]] <- scOS_CAF
scOS_detailed[[7]] <- data.frame(Pericytes = human_OS_sc_celltypes$Pericytes) / total
scOS_detailed[[8]] <- scOS_MSC
scOS_detailed[[9]] <- data.frame(Myoblast = human_OS_sc_celltypes$Myoblast) / total
scOS_detailed[[10]] <- data.frame(Endothelial = human_OS_sc_celltypes$Endothelial) / total
scOS_detailed_all <- cbind(scOS_detailed[[1]], scOS_detailed[[2]],
scOS_detailed[[3]], scOS_detailed[[4]],
scOS_detailed[[5]], scOS_detailed[[6]],
scOS_detailed[[7]], scOS_detailed[[8]],
scOS_detailed[[9]], scOS_detailed[[10]])
#plot results
ha = rowAnnotation(
sample = sc_sample_type,
subtype = sc_merged_subtypes,
col = list("sample" = c("Primary" = "yellow", "Met" = "darkgreen", "Recurrent" = "orange"),
"subtype" = c("IE" = "#882255", "IE-ECM" = "#CC6677", "ID" = "#88CCEE")
)
)
aa <- apply(scOS_detailed_all[names(sc_merged_subtypes),],2,scale)
rownames(aa) <- names(sc_merged_subtypes)
p1 <- Heatmap(aa, row_split = sc_merged_subtypes, cluster_columns = F, cluster_row_slices = F, cluster_rows = F, row_gap = unit(1, "mm"), row_title_gp = gpar(col = c("#F8766D","#00BA38", "#619CFF")), right_annotation = ha)
print(p1)
#pdf("~/Dog/scOS_heatmap_extended.pdf", width = 12, height = 7)
#print(p1)
#dev.off()
aa <- human_OS_sc_celltypes[names(sc_merged_subtypes),]
aa <- human_OS_sc_celltypes[names(sc_merged_subtypes),]/rowSums(human_OS_sc_celltypes[names(sc_merged_subtypes),])
aa <- apply(aa,2,scale)
rownames(aa) <- names(sc_merged_subtypes)
Heatmap(aa, row_split = sc_merged_subtypes, cluster_columns = F, cluster_row_slices = F, cluster_rows = F,row_gap = unit(1, "mm"), row_title_gp = gpar(col = c("#F8766D","#00BA38", "#619CFF")), right_annotation = ha)
#pdf("~/Dog/scOS_heatmap.pdf")
#Heatmap(aa, row_split = sc_merged_subtypes, cluster_columns = F, cluster_row_slices = F, cluster_rows = F,row_gap = unit(1, "mm"), row_title_gp = gpar(col = c("#F8766D","#00BA38", "#619CFF")), right_annotation = ha)
#dev.off()
dd_rnaseq <- data.frame(scOS_detailed_all[names(sc_merged_subtypes),], subtype = sc_merged_subtypes)
dd_rnaseq <- melt(dd_rnaseq, id.vars = "subtype")
p1 <- ggboxplot(dd_rnaseq, x = "subtype", y="value", facet.by = "variable") + theme_bw() + theme(text = element_text(size = 15)) + stat_compare_means(comparisons = list(c("IE","IE-ECM"),c("IE-ECM","ID"),c("IE","ID")), method = "wilcox", label = "p.signif") + xlab("") + ylab("proportions")
print(p1)
## Warning in wilcox.test.default(0, c(0, 0, 0.000821900486817981, 0,
## 6.94251596778673e-05, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.00011887779362815, 0.00690483338336836, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(0, c(0, 0, 0.0832858607724735, 0), paired =
## FALSE): cannot compute exact p-value with ties
## Warning in wilcox.test.default(0, c(0, 0, 0, 0, 0, 0), paired = FALSE): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(c(0, 0, 0.0832858607724735, 0), c(0, 0, 0, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(0, c(0.000356633380884451, 0, 0.191409365386806,
## : cannot compute exact p-value with ties
## Warning in wilcox.test.default(0, c(0, 0.00137589433131536,
## 0.000632231143706139, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.000356633380884451, 0, 0.191409365386806, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(0.152477763659466, c(0.0306704707560628, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.0306704707560628, 0.0054037826478535, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(0.00381194409148666, c(0.00855920114122682, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.00855920114122682, 0, 0, 0.003397341211226:
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(0, c(0.000713266761768902, 0, 0, 0), paired =
## FALSE): cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.000713266761768902, 0, 0, 0),
## c(0.000755144421370587, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(0.0986022871664549, c(0, 0, 0, 0, 0,
## 0.00034965034965035: cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.00546837850689491, 0, 0.000455736584254301,
## : cannot compute exact p-value with ties
## Warning in wilcox.test.default(0.000762388818297332, c(0.00011887779362815, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.00011887779362815, 0, 0.000227868292127151,
## : cannot compute exact p-value with ties
## Warning in wilcox.test.default(0, c(1.41319293865906e-06, 0.000684958606424497,
## : cannot compute exact p-value with ties
## Warning in wilcox.test.default(0, c(0, 0, 0, 0, 0, 0), paired = FALSE): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(c(1.41319293865906e-06, 0.000684958606424497, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(0, c(0.000188786105342647, 0,
## 0.000189669343111842, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.000356633380884451, 0.000300210147102972, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0, 0.000300210147102972, 0.00478523413467016,
## : cannot compute exact p-value with ties
## Warning in wilcox.test.default(0, c(0, 0.032422695887121, 0,
## 0.000295420974889217: cannot compute exact p-value with ties
## Warning in wilcox.test.default(0, c(0, 0, 0.000126446228741228, 0, 0, 0), :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0, 0.032422695887121, 0, 0.000295420974889217:
## cannot compute exact p-value with ties