Skip to content

Latest commit

 

History

History
3332 lines (2668 loc) · 158 KB

comparative_deconv_analysis.md

File metadata and controls

3332 lines (2668 loc) · 158 KB
library(nanostringr)
library(survival)
library(survminer)
## Loading required package: ggplot2

## Loading required package: ggpubr

## 
## Attaching package: 'survminer'

## The following object is masked from 'package:survival':
## 
##     myeloma
library(ggplot2)
library(ggpubr)
library(ComplexHeatmap)
## Loading required package: grid

## ========================================
## ComplexHeatmap version 2.14.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##     genomic data. Bioinformatics 2016.
## 
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(cluster)
library(readxl)
library(MCPcounter)
## Loading required package: curl

## Using libcurl 8.7.1 with LibreSSL/3.3.6

Read in COTC021/22 bulk nanostring data

rcc_dir <- list.dirs("~/Dog/Nanostring/", recursive = FALSE)
raw_expr <- lapply(rcc_dir, function(x) read_rcc(path = x)$raw)
sfs <- list()
sfs2 <- list()
i = 1
norm_expr <- lapply(raw_expr, function(x) {
  
  #positive control normalization
  gm <- exp(colMeans(log(x[x$Code.Class == "Positive",4:15]), na.rm = T))
  am <- mean(gm, na.rm = T)
  sf <- sapply(gm, function(y) am/y)
  sfs[[i]] <<- sf
  x1 <- as.matrix(x[,4:15])
  x1 <- sapply(1:ncol(x1), function(i) sf[i] * as.numeric(x1[,i]))
  
  #House keeping gene normalization
  gm <- exp(colMeans(log(x[x$Code.Class == "Housekeeping", 4:15]), na.rm = T))
  am <- mean(gm, na.rm = T)
  sf <- sapply(gm, function(y) am/y)
  sfs2[[i]] <<- sf
  x2 <- sapply(1:ncol(x1),function(i) sf[i]*as.numeric(x1[,i]))
  rownames(x2) <- x$Name
  colnames(x2) <- colnames(x)[4:ncol(x)]
  x2 <- x2[x$Code.Class == "Endogenous",]
  x2 <- x2[,!is.na(sf) & !is.infinite(sf)]
  i <<- i+1
  return(x2)
})

combined_expr <- Reduce(cbind, norm_expr)
rownames(combined_expr) <- rownames(norm_expr[[1]])
samp_names <- sapply(colnames(combined_expr), function(x) {
splitstr = strsplit(x,split = "_")[[1]]
return(splitstr[3])
})
samp_names <- sapply(samp_names, trimws)
colnames(combined_expr) <- samp_names
combined_expr <- combined_expr[,!grepl("0814 RNAlater tumor", samp_names)]
nanostring_annotations <- read_excel("~/Dog/Nanostring/CS032282 and CS9318963 Nanostring 07-29-2022.xlsx")
nanostring_annotations <- as.data.frame(nanostring_annotations)
rownames(nanostring_annotations) <- nanostring_annotations$`Nanostring Sample Name`
common <- intersect(rownames(nanostring_annotations), colnames(combined_expr))
CE <- combined_expr[,common]
NaAn <- nanostring_annotations[common,]

Read COTC021/22 bulk RNA Seq data

library(readr)
## 
## Attaching package: 'readr'

## The following object is masked from 'package:curl':
## 
##     parse_date
Canine_RNA_SEQ_TPM_All_data <- read_csv("~/Downloads/Canine_RNA_SEQ_TPM_All_data.csv")
## Rows: 37952 Columns: 199

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (1): gene_id
## dbl (198): 100_0514_tumor_rsem.genes.results, 101_0518_tumor_rsem.genes.resu...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Canine_RNA_SEQ_TPM_All_data <- as.data.frame(Canine_RNA_SEQ_TPM_All_data)
rownames(Canine_RNA_SEQ_TPM_All_data) <- Canine_RNA_SEQ_TPM_All_data$gene_id
Canine_RNA_SEQ_TPM_All_data <- Canine_RNA_SEQ_TPM_All_data[,-1]
sample_type <- c("Met","Primary")[as.numeric(grepl("(Tumor)|(tumor)", colnames(Canine_RNA_SEQ_TPM_All_data)))+1]
colnames(Canine_RNA_SEQ_TPM_All_data) <- sapply(colnames(Canine_RNA_SEQ_TPM_All_data), function(x) as.character(as.numeric(strsplit(x,split = '_')[[1]][2])))
rownames(Canine_RNA_SEQ_TPM_All_data) <- sapply(rownames(Canine_RNA_SEQ_TPM_All_data), function(x) gsub("_3","",x))

DOG2_RNAseq_Sample_list <- read_excel("~/Downloads/DOG2 RNAseq Sample list.xlsx")
DOG2_RNAseq_Sample_list$`Sample ID`[7] <- "18_0620_Met_Skin"
DOG2_RNAseq_Sample_list$`Sample ID`[8] <- "20_0621_Met_Unspecified"
sites_RNASeq <- rep("Primary", ncol(Canine_RNA_SEQ_TPM_All_data))
sites_RNASeq[sample_type == "Met"] <- sapply(DOG2_RNAseq_Sample_list$`Sample ID`[1:8], function(x) toupper(strsplit(x,split = "_")[[1]][4]))

helper functions to measure expression of distinct pathway signatures: (eg chromosomal instability, cGAS-STING, type1 interferon, etc)

library(msigdbr)
msigdbr_df <- msigdbr(species = "Homo sapiens", category = "H")
msigdbr_list = split(x = msigdbr_df$gene_symbol, f = msigdbr_df$gs_name)
CIN_signature <- function(expr){
    genes <- c("CCNB2","PRC1","CDC45L","KIF20A","UBE2C","CDC2","MAD2L1","H2AFZ","TRIP13","ESPL1","TTK","FEN1","MCM7","CNAP1","MELK","TPX2","FOXM1","TOP2A","MCM2","RAD51AP1","RFC4","PCNA","RNASEH2A","TGIF2","CCT5")
    mat <- as.data.frame(expr)
    mat <- mat[genes,]
    mat <- apply(as.matrix(mat),1,scale)
    scores <- rowMeans(mat, na.rm = T)
    return(scores)
}

CYT_signature <- function(expr){
    genes <- c("GZMA","PRF1")
    mat <- as.data.frame(expr)
    mat <- log2(mat[genes,] +1)
    scores <- colMeans(mat, na.rm = T)
    return(scores)
}

type1_interferon_signature <- function(expr) {
  genes <- msigdbr_list$HALLMARK_INTERFERON_ALPHA_RESPONSE
    mat <- as.data.frame(expr)
    mat <- mat[genes,]
    mat <- apply(as.matrix(mat),1,scale)
    scores <- rowMeans(mat, na.rm = T)
    return(scores)
}

cGAS_STING_signature <- function(expr){
  genes = c("CGAS","STING1","TBK1","IKBKE","IRF3")
  mat <- as.data.frame(expr)
  mat <- mat[genes,]
  mat <- apply(as.matrix(mat),1,scale)
  scores <- rowMeans(mat, na.rm = T)
  return(scores)
}

run MCP counter + clustering analysis on bulk rnaseq data

library(RColorBrewer)
set.seed(2425)
canin_exp <- Canine_RNA_SEQ_TPM_All_data



cellprop_rnaseq<- t(MCPcounter.estimate(expression = log2(canin_exp+1), featuresType = "HUGO_symbols", probesets = read.table("~/Dog/MCP_counter_markers.csv")))
cellprop_rnaseq <- apply(cellprop_rnaseq, 2, scale)
rownames(cellprop_rnaseq) <- colnames(canin_exp)

#k means clustering
km.res <- kmeans(cellprop_rnaseq, centers = 3, nstart = 500, iter.max = 500)
cls <- km.res$cluster


imm_clus <- rep("ID", nrow(cellprop_rnaseq))
imm_clus[cls == 1] <- "IE-ECM"
imm_clus[cls == 2] <- "IE"
ii1 <- imm_clus
imm_clus <- factor(imm_clus, levels = c("IE","IE-ECM","ID"))


#Plot results
ha = rowAnnotation(
    sample = sample_type,
    preservation = rep("Frozen", length(sample_type)),
    col = list("sample" = c("Primary" = "yellow", "Met" =  "darkgreen"),
               "preservation" = c("Frozen" = "#db6d00", "FFPE" = "#924900")
    )
)

n <- 45
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
cols <- sample(col_vector, n, replace = FALSE)
cols <- sample(cols, length(unique(sites_RNASeq)), replace = F)
names(cols) <- unique(sites_RNASeq)
cols["Primary"] <- "white"
ha2 = rowAnnotation(
    site = sites_RNASeq,
    col = list("site"=cols)
)
Heatmap(cellprop_rnaseq, row_split = imm_clus, cluster_columns = F, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38","#619CFF","#F8766D")), row_names_gp = gpar(cex = 0.5), left_annotation = ha, right_annotation = ha2)

p1 <- Heatmap(cellprop_rnaseq, row_split = imm_clus, cluster_columns = F, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38","#619CFF","#F8766D")), row_names_gp = gpar(cex = 0.5), left_annotation = ha)


#pdf('~/Dog/TME_manuscript1.pdf')
#Heatmap(cellprop_rnaseq, row_split = imm_clus, cluster_columns = F, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")), row_names_gp = gpar(cex = 0.5), left_annotation = ha)
#dev.off()

names(imm_clus) <- rownames(cellprop_rnaseq)
names(ii1) <- rownames(cellprop_rnaseq)

cyt_scores <- CYT_signature(canin_exp)
names(cyt_scores) <- rownames(cellprop_rnaseq)


cell_props_dat <- as.data.frame(cellprop_rnaseq)
cell_props_dat$imm_clus <- imm_clus
Heatmap(cellprop_rnaseq, row_split = imm_clus, cluster_columns = F, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38","#619CFF","#F8766D")), row_names_gp = gpar(cex = 0.5), left_annotation = ha, right_annotation = ha2)

#pdf('~/Dog/MCP_marker_genes_expression.pdf',width = 18,height = 9)
#Heatmap(cellprop_rnaseq, row_split = imm_clus, cluster_columns = F, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38","#619CFF","#F8766D")), row_names_gp = gpar(cex = 0.5), left_annotation = ha, right_annotation = ha2)
#dev.off()

p1 <- ggplot(data = data.frame(subtype = imm_clus, CIN_score = CIN_signature(log2(canin_exp+1))), aes(x = subtype, y=CIN_score)) + geom_boxplot() + theme_bw() + theme(text = element_text(size = 15), axis.text.x = element_text(size = 15)) + stat_compare_means(comparisons = list(c("IE","IE-ECM"),c("IE-ECM","ID"),c("IE","ID")), method = "wilcox", label = "p.signif")

#+ stat_compare_means(method = "anova", label = "p.format")
print(p1)

#ggsave(plot = p1, filename = "~/Dog/CIN_subtypes.pdf", width = 7, height = 6)
p2 <- ggplot(data = data.frame(subtype = imm_clus, CGAS_STING_score = cGAS_STING_signature(canin_exp)), aes(x = subtype, y=CGAS_STING_score)) + geom_boxplot() + theme_bw() + theme(text = element_text(size = 15), axis.text.x = element_text(size = 15)) + stat_compare_means(comparisons = list(c("IE","IE-ECM"),c("IE-ECM","ID"),c("IE","ID")), method = "wilcox", label = "p.signif")
print(p2)

p2 <- ggplot(data = data.frame(subtype = imm_clus, IN_score = type1_interferon_signature(log2(canin_exp+1))), aes(x = subtype,y=IN_score)) + geom_boxplot() +theme_bw() + stat_compare_means(method = "anova")
print(p2)

#ggsave(plot = p2, filename = "~/Dog/IN_caines.pdf", width = 4, height = 4)


p2 <- ggplot(data = data.frame(subtype = imm_clus, CYT_score = cyt_scores), aes(x = subtype,y=CYT_score)) + geom_boxplot() +theme_bw() + stat_compare_means(method = "anova") + ylab("Cytotoxicity scores")
print(p2)

#ggsave(plot = p2, filename = "~/Dog/CYT_caines.pdf", width = 4, height = 4)

Assessing reproducibility of TME clusters via MCP counter analysis using canine single cell RNA seq cell type signatures (Ammons et al, Communications Biology, 2024)

library(ConsensusClusterPlus)
set.seed(2425)
Ammons_signatures <- read_excel("~/Dog/Ammons_signatures.xlsx")
genes <- c()
cell_types <- c()
cell_subtypes <- c()
for(i in 1:nrow(Ammons_signatures)){
  genes <- c(genes, strsplit(Ammons_signatures$Markers[i],split = ", ")[[1]])
  cell_types <- c(cell_types, rep(Ammons_signatures$`Cell type`[i], length(strsplit(Ammons_signatures$Markers[i],split = ", ")[[1]])))
  cell_subtypes <- c(cell_subtypes, rep(Ammons_signatures$`Cell type detailed`[i], length(strsplit(Ammons_signatures$Markers[i],split = ", ")[[1]])))
}

markers_ammons <- data.frame(HUGO_symbols = genes, Cell_population = cell_subtypes)
colnames(markers_ammons) <- c("HUGO symbols","Cell population")

cellprop_rnaseq2<- t(MCPcounter.estimate(expression = log2(canin_exp+1), featuresType = "HUGO_symbols", genes = markers_ammons))
cellprop_rnaseq2 <- apply(cellprop_rnaseq2, 2, scale)
rownames(cellprop_rnaseq2) <- colnames(canin_exp)

res = ConsensusClusterPlus(d = t(cellprop_rnaseq2), distance = "euclidean", clusterAlg = "km", pItem = 0.8, pFeature = 1, seed = 2425,maxK = 10, reps = 100,plot = "pdf",title = "ConsensusAmmonsRNASeq")
## end fraction

## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
ha = rowAnnotation(
    sample = sample_type,
    col = list("sample" = c("Primary" = "yellow", "Met" =  "darkgreen")
    )
)

imm_clus_ammons <- rep("ID", nrow(cellprop_rnaseq2))
imm_clus_ammons[as.character(res[[3]]$consensusClass) == "3"] <- "IE"
imm_clus_ammons[as.character(res[[3]]$consensusClass) == "2"] <- "IE-ECM"
names(imm_clus_ammons) <- rownames(cellprop_rnaseq2)
Heatmap(cellprop_rnaseq2, row_split = factor(imm_clus_ammons, levels = c("IE","IE-ECM","ID")), cluster_columns = T, cluster_row_slices = FALSE, row_gap = unit(3, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")), row_names_gp = gpar(cex = 0.5), left_annotation = ha)

rownames(cellprop_rnaseq2) <- NULL
pdf('~/Dog/TME_manuscript_ammons.pdf', width = "13",height = "11")
## Warning: 'mode(width)' and 'mode(height)' differ between new and previous
##   ==> NOT changing 'width' & 'height'
Heatmap(cellprop_rnaseq2, row_split = factor(imm_clus_ammons, levels = c("IE","IE-ECM","ID")), cluster_columns = T, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")), row_names_gp = gpar(cex = 0.5), left_annotation = ha)
dev.off()
## quartz_off_screen 
##                 2
chisq.test(table(factor(imm_clus,levels = c("IE","IE-ECM","ID")), factor(imm_clus_ammons, levels = c("IE","IE-ECM","ID"))))
## Warning in chisq.test(table(factor(imm_clus, levels = c("IE", "IE-ECM", :
## Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  table(factor(imm_clus, levels = c("IE", "IE-ECM", "ID")), factor(imm_clus_ammons,     levels = c("IE", "IE-ECM", "ID")))
## X-squared = 105.4, df = 4, p-value < 2.2e-16

Validation of TME subtypes through quantitative imaging analysis usig matched H&E and IHC slides

library(readr)
library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
trichrome_quant_updated <- read_csv("~/Dog/trichrome_MLH_quant_updated4.csv")
## Rows: 113 Columns: 17

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Image, Object ID, Object type, Classification, Parent, ROI, ROI ann...
## dbl (9): Centroid X µm, Centroid Y µm, trichrome_threholder_hires: Tumor %, ...
## lgl (1): Name
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
osteoid_quant_updated <- read_csv("~/Dog/osteoid_quant_MLH.csv")
## Rows: 113 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Image Tag, Algorithm Name, Algorithm Name.1, Analysis Region, Analy...
## dbl (4): QC Slide V2: % Artifact Area, Tumor: % Osteoid, Tumor: % Osteoid.1,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Filtering out poor quality cases post manual review
osteoid_quant_updated <- osteoid_quant_updated[!(osteoid_quant_updated$`Image Tag` %in% c("2023-09-16-03-34-18.ndpi",
                                                                                          "2023-09-20-13-12-21.ndpi",
                                                                                          "2023-09-23-17-29-02.ndpi",
                                                                                          "2023-09-26-23-32-10.ndpi",
                                                                                          "2023-09-27-12-06-34.ndpi",
                                                                                          "2023-09-27-17-26-47.ndpi")),]

tcell_quant_updated <- read_csv("~/Dog/tcell_quant_MLH.csv")
## Rows: 112 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (6): Image, Name, Class, Parent, ROI, ROI annotations completed
## dbl (10): Centroid X µm, Centroid Y µm, Num Detections, Num Negative, Num Po...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Filtering out poor quality cases post manual review
tcell_quant_updated <- tcell_quant_updated[!(grepl('bad section', tcell_quant_updated$`ROI annotations completed`)),]
macrophage_quant_updated = read_csv("~/Dog/macrophage_quant_MLH.csv")
## Rows: 111 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (6): Image, Name, Class, Parent, ROI, ROI annotations completed
## dbl (10): Centroid X µm, Centroid Y µm, Num Detections, Num Negative, Num Po...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Filtering out poor quality cases post manual review
macrophage_quant_updated <- macrophage_quant_updated[!(grepl('bad section', macrophage_quant_updated$`ROI annotations completed`)) & !(macrophage_quant_updated$Image == "2023-09-22-11-22-33.ndpi"),]


mac_density = data.frame(id = macrophage_quant_updated$`Subject Code`, density = macrophage_quant_updated$`Positive %`)
mac_density = mac_density %>% group_by(id) %>% summarize(density = mean(density, na.rm=TRUE))

tcell_density = data.frame(id = tcell_quant_updated$`Subject Code`, density = tcell_quant_updated$`Positive %`)
tcell_density = tcell_density %>% group_by(id) %>% summarize(density = mean(density, na.rm=TRUE))

tri_density = data.frame(id = trichrome_quant_updated$`Subject Code`, area = trichrome_quant_updated$`trichrome_threholder_hires: Collagen %`)
tri_density = tri_density %>% group_by(id) %>% summarize(area = mean(area, na.rm=TRUE))

osteoid_density = data.frame(id = osteoid_quant_updated$`Subject Code`, area = osteoid_quant_updated$`Tumor: % Osteoid`)
osteoid_density = osteoid_density %>% group_by(id) %>% summarize(area = mean(area, na.rm=TRUE))


tcell_density$subtype = ii1[as.character(tcell_density$id)]
tcell_density <- tcell_density[!is.na(tcell_density$subtype),]
tcell_density$subtype <- factor(tcell_density$subtype, levels = c("ID","IE-ECM","IE"))
mac_density$subtype = ii1[as.character(mac_density$id)]
mac_density <- mac_density[!is.na(mac_density$subtype),]
mac_density$subtype <- factor(mac_density$subtype, levels = c("ID","IE-ECM","IE"))
tri_density$subtype = ii1[as.character(tri_density$id)]
tri_density <- tri_density[!is.na(tri_density$subtype),]
tri_density$subtype <- factor(tri_density$subtype, levels = c("ID","IE-ECM","IE"))
osteoid_density$subtype = ii1[as.character(osteoid_density$id)]
osteoid_density <- osteoid_density[!is.na(osteoid_density$subtype),]
osteoid_density$subtype <- factor(osteoid_density$subtype, levels = c("ID","IE-ECM","IE"))

tcell_density$exp <- cellprop_rnaseq[as.character(tcell_density$id),"T cells"]
mac_density$exp <- cellprop_rnaseq[as.character(mac_density$id),"Monocytic lineage"]
tri_density$exp <- cellprop_rnaseq[as.character(tri_density$id),"Fibroblasts"]
osteoid_density$exp <- cellprop_rnaseq[as.character(osteoid_density$id),"Fibroblasts"]

p1 <- ggplot(data = tcell_density, aes(x=subtype, y = density)) + geom_boxplot() + stat_compare_means(comparisons = list(c("IE","IE-ECM"),c("IE-ECM","ID"),c("IE","ID")), method = "wilcox", label = "p.format") + theme_minimal() + ylab("%Positive") + xlab("Subtype") + theme(text = element_text(size = 15)) + labs(title = "CD3 IHC (T cells)")


p2 <- ggplot(data = mac_density, aes(x=subtype, y = density)) + geom_boxplot() + stat_compare_means(comparisons = list(c("IE","IE-ECM"),c("IE-ECM","ID"),c("IE","ID")), method = "wilcox", label = "p.format") + theme_minimal() + ylab("%Positive") + xlab("Subtype") + theme(text = element_text(size = 15)) + labs(title = "CD204 IHC (Macrophage)")


p3 <- ggplot(data = tri_density, aes(x=subtype, y = area)) + geom_boxplot() + stat_compare_means(comparisons = list(c("IE","IE-ECM"),c("IE-ECM","ID"),c("IE","ID")), method = "wilcox", label = "p.format") + theme_minimal() + ylab("%Collagen") + xlab("Subtype") + theme(text = element_text(size = 15)) + labs(title = "Trichrome (Collagen)")

p33 <- ggplot(data = osteoid_density, aes(x=subtype, y = area)) + geom_boxplot() + stat_compare_means(comparisons = list(c("IE","IE-ECM"),c("IE-ECM","ID"),c("IE","ID")), method = "wilcox", label = "p.format") + theme_minimal() + ylab("%Osteoid") + xlab("Subtype") + theme(text = element_text(size = 15)) + labs(title = "Osteoid")

p6 <- ggplot(data = tri_density, aes(x = exp, y= area)) + geom_point()+ geom_smooth(method = "lm", se = FALSE) + theme_minimal() + geom_text(x = 1, y = 1,
            label = paste0('r = ', round(cor(tri_density$area, tri_density$exp, method = c("pearson")), 4)),
            color = 'red') + ylab("%Collagen") + xlab("MCP counter score (Stroma)") + theme(text = element_text(size = 15)) + labs(title = "Trichrome (Collagen)")

p5 <- ggplot(data = mac_density, aes(x = exp, y= density)) + geom_point()+ geom_smooth(method = "lm", se = FALSE) + theme_minimal() + geom_text(x = 1, y = 1,
            label = paste0('r = ', round(cor(mac_density$density, mac_density$exp, method = c("pearson")), 4)),
            color = 'red') + ylab("%Positive") + xlab("MCP counter score (Monocytic lineage)") + theme(text = element_text(size = 15)) + labs(title = "CD204 IHC (Macrophage)")

p4 <- ggplot(data = tcell_density, aes(x = exp, y= density)) + geom_point()+ geom_smooth(method = "lm", se = FALSE) + theme_minimal() + geom_text(x = 1, y = 1,
            label = paste0('r = ', round(cor(tcell_density$density, tcell_density$exp, method = c("pearson")), 4)),
            color = 'red') + ylab("%Positive") + xlab("MCP counter score (T cells)") + theme(text = element_text(size = 15)) + labs(title = "CD3 IHC (T cells)")

p66 <- ggplot(data = osteoid_density, aes(x = exp, y= area)) + geom_point()+ geom_smooth(method = "lm", se = FALSE) + theme_minimal() + geom_text(x = 1, y = 1,
            label = paste0('r = ', round(cor(osteoid_density$area, osteoid_density$exp, method = c("pearson")), 4)),
            color = 'red') + ylab("%Osteoid") + xlab("MCP counter score (Stroma)") + theme(text = element_text(size = 15)) + labs(title = "Osteoid")

fig1 <- ggarrange(p1,p2,p3,p33,p4,p5,p6,p66, nrow = 2, ncol = 4, labels = c("A","B","C","D","E","F","G","H"))
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
print(fig1)

########################################################################################################################

mac_density = data.frame(id = macrophage_quant_updated$`Subject Code`, density = macrophage_quant_updated$`Num Positive per mm^2`)
mac_density = mac_density %>% group_by(id) %>% summarize(density = mean(density, na.rm=TRUE))

tcell_density = data.frame(id = tcell_quant_updated$`Subject Code`, density = tcell_quant_updated$`Num Positive per mm^2`)
tcell_density = tcell_density %>% group_by(id) %>% summarize(density = mean(density, na.rm=TRUE))

tri_density = data.frame(id = trichrome_quant_updated$`Subject Code`, area = trichrome_quant_updated$`trichrome_threholder_hires: Collagen %`)
tri_density = tri_density %>% group_by(id) %>% summarize(area = mean(area, na.rm=TRUE))

osteoid_density = data.frame(id = osteoid_quant_updated$`Subject Code`, area = osteoid_quant_updated$`Tumor: % Osteoid`)
osteoid_density = osteoid_density %>% group_by(id) %>% summarize(area = mean(area, na.rm=TRUE))



tcell_density$subtype = ii1[as.character(tcell_density$id)]
tcell_density <- tcell_density[!is.na(tcell_density$subtype),]
tcell_density$subtype <- factor(tcell_density$subtype, levels = c("ID","IE-ECM","IE"))
mac_density$subtype = ii1[as.character(mac_density$id)]
mac_density <- mac_density[!is.na(mac_density$subtype),]
mac_density$subtype <- factor(mac_density$subtype, levels = c("ID","IE-ECM","IE"))
tri_density$subtype = ii1[as.character(tri_density$id)]
tri_density <- tri_density[!is.na(tri_density$subtype),]
tri_density$subtype <- factor(tri_density$subtype, levels = c("ID","IE-ECM","IE"))
osteoid_density$subtype = ii1[as.character(osteoid_density$id)]
osteoid_density <- osteoid_density[!is.na(osteoid_density$subtype),]
osteoid_density$subtype <- factor(osteoid_density$subtype, levels = c("ID","IE-ECM","IE"))

tcell_density$exp <- cellprop_rnaseq[as.character(tcell_density$id),"T cells"]
mac_density$exp <- cellprop_rnaseq[as.character(mac_density$id),"Monocytic lineage"]
tri_density$exp <- cellprop_rnaseq[as.character(tri_density$id),"Fibroblasts"]
osteoid_density$exp <- cellprop_rnaseq[as.character(osteoid_density$id),"Fibroblasts"]


p1 <- ggplot(data = tcell_density, aes(x=subtype, y = density)) + geom_boxplot() + stat_compare_means(method = "anova") + theme_minimal() + ylab("Density") + xlab("Subtype") + theme(text = element_text(size = 15)) + labs(title = "CD3 IHC (T cells)")


p2 <- ggplot(data = mac_density, aes(x=subtype, y = density)) + geom_boxplot() + stat_compare_means(method = "anova") + theme_minimal() + ylab("Density") + xlab("Suntype") + theme(text = element_text(size = 15)) + labs(title = "CD204 IHC (Macrophage)")


p3 <- ggplot(data = tri_density, aes(x=subtype, y = area)) + geom_boxplot() + stat_compare_means(method = "anova") + theme_minimal() + ylab("%Collagen") + xlab("Subtype") + theme(text = element_text(size = 15)) + labs(title = "Trichrome (Collagen)")

p33 <- ggplot(data = osteoid_density, aes(x=subtype, y = area)) + geom_boxplot() + stat_compare_means(method = "anova") + theme_minimal() + ylab("%Osteoid") + xlab("Subtype") + theme(text = element_text(size = 15)) + labs(title = "Osteoid")

p6 <- ggplot(data = tri_density, aes(x = exp, y= area)) + geom_point()+ geom_smooth(method = "lm", se = FALSE) + theme_minimal() + geom_text(x = 1, y = 10,
            label = paste0('Spearman rho = ', round(cor(tri_density$area, tri_density$exp, method = c("spearman")), 4)),
            color = 'red') + ylab("%Collagen") + xlab("MCP counter score (Stroma)") + theme(text = element_text(size = 15)) + labs(title = "Trichrome (Collagen)")

p5 <- ggplot(data = mac_density, aes(x = exp, y= density)) + geom_point()+ geom_smooth(method = "lm", se = FALSE) + theme_minimal() + geom_text(x = 1, y = 1,
            label = paste0('Spearman rho = ', round(cor(mac_density$density, mac_density$exp, method = c("spearman")), 4)),
            color = 'red') + ylab("Density") + xlab("MCP counter score (Monocytic lineage)") + theme(text = element_text(size = 15)) + labs(title = "CD204 IHC (Macrophage)")

p4 <- ggplot(data = tcell_density, aes(x = exp, y= density)) + geom_point()+ geom_smooth(method = "lm", se = FALSE) + theme_minimal() + geom_text(x = 1, y = 1,
            label = paste0('Spearman rho = ', round(cor(tcell_density$density, tcell_density$exp, method = c("spearman")), 4)),
            color = 'red') + ylab("Density") + xlab("MCP counter score (T cells)") + theme(text = element_text(size = 15)) + labs(title = "CD3 IHC (T cells)")

p66 <- ggplot(data = osteoid_density, aes(x = exp, y= area)) + geom_point()+ geom_smooth(method = "lm", se = FALSE) + theme_minimal() + geom_text(x = 1, y = 1,
            label = paste0('Spearman rho = ', round(cor(osteoid_density$area, osteoid_density$exp, method = c("spearman")), 4)),
            color = 'red') + ylab("%Osteoid") + xlab("MCP counter score (Stroma)") + theme(text = element_text(size = 15)) + labs(title = "Osteoid")

fig2 <- ggarrange(p1,p2,p3,p33,p4,p5,p6,p66, nrow = 2, ncol = 4, labels = c("A","B","C","D","E","F","G","H"))
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
print(fig2)

p1 <- ggplot(data = tcell_density, aes(x=subtype, y = density)) + geom_boxplot()  + stat_compare_means(method = "anova") + theme_minimal() + ylab("Density") + xlab("Subtype") + theme(text = element_text(size = 15)) + labs(title = "CD3 IHC (T cells)") + scale_y_continuous(limits = c(0,4500))


p2 <- ggplot(data = mac_density[mac_density$density < 6000,], aes(x=subtype, y = density)) + geom_boxplot()  + stat_compare_means(method = "anova") + theme_minimal() + ylab("Density") + xlab("Subtype") + theme(text = element_text(size = 15)) + labs(title = "CD204 IHC (Macrophage)") + scale_y_continuous(limits = c(0,4500))


p3 <- ggplot(data = tri_density, aes(x=subtype, y = area)) + geom_boxplot()  + stat_compare_means(method = "anova") + theme_minimal() + ylab("%Collagen") + xlab("Subtype") + theme(text = element_text(size = 15)) + labs(title = "Trichrome (Collagen)")

p33 <- ggplot(data = osteoid_density, aes(x=subtype, y = area)) +  geom_boxplot()  + stat_compare_means(method = "anova") + theme_minimal() + ylab("%Osteoid") + xlab("Subtype") + theme(text = element_text(size = 15)) + labs(title = "Osteoid")

fig3 <- ggarrange(p1,p2,p3,p33, nrow = 2, ncol = 2, labels = c("A","B","C","D"))
print(fig3)

#ggsave(plot = fig3, filename = "~/Dog/Final_IHC_validation_figure_panels.pdf", width = 6, height = 6)

Plotting distribution of immune checkpoint gene expression across TME clusters

library(reshape)
## 
## Attaching package: 'reshape'

## The following object is masked from 'package:dplyr':
## 
##     rename
out <- t(log2(canin_exp[intersect(c("PDCD1","CD274","CTLA4","TIGIT","LAG3","HAVCR2"), rownames(canin_exp)),]+1))
out <- data.frame(apply(out,2,scale), subtype = imm_clus)

dd_out <- melt(out, id.vars = "subtype")
colnames(dd_out) <- c("subtype","ICI_target","expression")
dd_out$ICI_target <- as.character(dd_out$ICI_target)
dd_out$subtype <- factor(dd_out$subtype, levels = c("IE","IE-ECM","ID"))
dd_out$ICI_target[dd_out$ICI_target == "PDCD1"] = "PD1"
dd_out$ICI_target[dd_out$ICI_target == "CD274"] = "PDL1"

#plot results
p1 <- ggboxplot(dd_out, x = "subtype", y="expression", facet.by = "ICI_target") + theme_bw() + theme(text = element_text(size = 15)) + stat_compare_means(method = "anova")+ xlab("") + ylab("expression (normalized)")
print(p1)

#ggsave(plot = p1, filename = "~/Dog/canine_immune_checkpoints.pdf")

Assessing reproducibility of TME clusters via xCell analysis

library(xCell)
out <- t(xCellAnalysis(canin_exp))
## [1] "Num. of genes: 9787"

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.

## Setting parallel calculations through a MulticoreParam back-end
## with workers=4 and tasks=100.
## Estimating ssGSEA scores for 489 gene sets.
## [1] "Calculating ranks..."

## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.

## [1] "Calculating absolute values from ranks..."
##   |                                                                              |                                                                      |   0%  |                                                                              |=                                                                     |   1%  |                                                                              |=                                                                     |   2%  |                                                                              |==                                                                    |   3%  |                                                                              |===                                                                   |   4%  |                                                                              |====                                                                  |   5%  |                                                                              |====                                                                  |   6%  |                                                                              |=====                                                                 |   7%  |                                                                              |======                                                                |   8%  |                                                                              |======                                                                |   9%  |                                                                              |=======                                                               |  10%  |                                                                              |========                                                              |  11%  |                                                                              |========                                                              |  12%  |                                                                              |=========                                                             |  13%  |                                                                              |==========                                                            |  14%  |                                                                              |===========                                                           |  15%  |                                                                              |===========                                                           |  16%  |                                                                              |============                                                          |  17%  |                                                                              |=============                                                         |  18%  |                                                                              |=============                                                         |  19%  |                                                                              |==============                                                        |  20%  |                                                                              |===============                                                       |  21%  |                                                                              |================                                                      |  22%  |                                                                              |================                                                      |  23%  |                                                                              |=================                                                     |  24%  |                                                                              |==================                                                    |  25%  |                                                                              |==================                                                    |  26%  |                                                                              |===================                                                   |  27%  |                                                                              |====================                                                  |  28%  |                                                                              |=====================                                                 |  29%  |                                                                              |=====================                                                 |  30%  |                                                                              |======================                                                |  31%  |                                                                              |=======================                                               |  32%  |                                                                              |=======================                                               |  33%  |                                                                              |========================                                              |  34%  |                                                                              |=========================                                             |  35%  |                                                                              |=========================                                             |  36%  |                                                                              |==========================                                            |  37%  |                                                                              |===========================                                           |  38%  |                                                                              |============================                                          |  39%  |                                                                              |============================                                          |  40%  |                                                                              |=============================                                         |  41%  |                                                                              |==============================                                        |  42%  |                                                                              |==============================                                        |  43%  |                                                                              |===============================                                       |  44%  |                                                                              |================================                                      |  45%  |                                                                              |=================================                                     |  46%  |                                                                              |=================================                                     |  47%  |                                                                              |==================================                                    |  48%  |                                                                              |===================================                                   |  49%  |                                                                              |===================================                                   |  51%  |                                                                              |====================================                                  |  52%  |                                                                              |=====================================                                 |  53%  |                                                                              |=====================================                                 |  54%  |                                                                              |======================================                                |  55%  |                                                                              |=======================================                               |  56%  |                                                                              |========================================                              |  57%  |                                                                              |========================================                              |  58%  |                                                                              |=========================================                             |  59%  |                                                                              |==========================================                            |  60%  |                                                                              |==========================================                            |  61%  |                                                                              |===========================================                           |  62%  |                                                                              |============================================                          |  63%  |                                                                              |=============================================                         |  64%  |                                                                              |=============================================                         |  65%  |                                                                              |==============================================                        |  66%  |                                                                              |===============================================                       |  67%  |                                                                              |===============================================                       |  68%  |                                                                              |================================================                      |  69%  |                                                                              |=================================================                     |  70%  |                                                                              |=================================================                     |  71%  |                                                                              |==================================================                    |  72%  |                                                                              |===================================================                   |  73%  |                                                                              |====================================================                  |  74%  |                                                                              |====================================================                  |  75%  |                                                                              |=====================================================                 |  76%  |                                                                              |======================================================                |  77%  |                                                                              |======================================================                |  78%  |                                                                              |=======================================================               |  79%  |                                                                              |========================================================              |  80%  |                                                                              |=========================================================             |  81%  |                                                                              |=========================================================             |  82%  |                                                                              |==========================================================            |  83%  |                                                                              |===========================================================           |  84%  |                                                                              |===========================================================           |  85%  |                                                                              |============================================================          |  86%  |                                                                              |=============================================================         |  87%  |                                                                              |==============================================================        |  88%  |                                                                              |==============================================================        |  89%  |                                                                              |===============================================================       |  90%  |                                                                              |================================================================      |  91%  |                                                                              |================================================================      |  92%  |                                                                              |=================================================================     |  93%  |                                                                              |==================================================================    |  94%  |                                                                              |==================================================================    |  95%  |                                                                              |===================================================================   |  96%  |                                                                              |====================================================================  |  97%  |                                                                              |===================================================================== |  98%  |                                                                              |===================================================================== |  99%  |                                                                              |======================================================================| 100%
res = ConsensusClusterPlus(d = t(apply(out,2,scale)), distance = "euclidean", clusterAlg = "km", pItem = 0.8, pFeature = 1, seed = 2425,maxK = 10, reps = 100, plot = "pdf",title = "ConsensusXCellRNASeq")
## end fraction

## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
ha = rowAnnotation(
    sample = sample_type,
    col = list("sample" = c("Primary" = "yellow", "Met" =  "darkgreen")
    )
)

#plot results
Heatmap(apply(out,2,scale), row_split = as.character(res[[3]]$consensusClass), cluster_columns = T, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")), row_names_gp = gpar(cex = 0.5), left_annotation = ha)
## The automatically generated colors map from the minus and plus 99^th of
## the absolute values in the matrix. There are outliers in the matrix
## whose patterns might be hidden by this color mapping. You can manually
## set the color to `col` argument.
## 
## Use `suppressMessages()` to turn off this message.

#pdf('~/Dog/TME_manuscript3.pdf', width = "13",height = "11")
#Heatmap(apply(out,2,scale), row_split = imm_clus, cluster_columns = T, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")), row_names_gp = gpar(cex = 0.5), left_annotation = ha)
#dev.off()

run MCP counter + clustering analysis on bulk nanostring data

set.seed(24524)
library(RColorBrewer)
cellprop_nano <- t(MCPcounter.estimate(expression = log2(cbind(CE[,NaAn$`Sample Type` %in% c("Primary Tumor")],CE[,NaAn$`Sample Type` %in% c("Met")])+1), featuresType = "HUGO_symbols", probesets = read.table("~/Dog/MCP_counter_markers.csv")))

#expression of immune checkpoint genes in nanostring
chkpt_nano <- cbind(CE[,NaAn$`Sample Type` %in% c("Primary Tumor")],CE[,NaAn$`Sample Type` %in% c("Met")])
chkpt_nano <- t(chkpt_nano[intersect(rownames(chkpt_nano), c("PDCD1","CD274","CTLA4","LAG3","HAVCR2","TIGIT")),])


nanostring_annotations <- rbind(NaAn[NaAn$`Sample Type` %in% c("Primary Tumor"),], NaAn[NaAn$`Sample Type` %in% c("Met"),])

sample_preservation = c("Frozen","FFPE")[as.numeric(grepl("FFPE",nanostring_annotations$`Name, detailed`))+1]

#perform sample preservation-specific normalization to mitigate batch effects across different sample preservation techniques
cellprop_nano_ffpe <- apply(cellprop_nano[sample_preservation == "FFPE",],2,scale)
chkpt_nano_ffpe <- apply(chkpt_nano[sample_preservation == "FFPE",],2,scale)
km.res <- kmeans(cellprop_nano_ffpe, centers = 3, nstart = 100, iter.max = 500)

imm_clus_nano_ffpe <- rep("ID", nrow(cellprop_nano_ffpe))
imm_clus_nano_ffpe[km.res$cluster == 2] <- "IE"
imm_clus_nano_ffpe[km.res$cluster == 1] <- "IE-ECM"
Heatmap(cellprop_nano_ffpe, row_split = imm_clus_nano_ffpe, cluster_column_slices = F, cluster_columns = F)

cellprop_nano_frozen <- apply(cellprop_nano[sample_preservation == "Frozen",],2,scale)
chkpt_nano_frozen <- apply(chkpt_nano[sample_preservation == "Frozen",],2,scale)

km.res <- kmeans(cellprop_nano_frozen, centers = 3, nstart = 100, iter.max = 500)

imm_clus_nano_frozen <- rep("ID", nrow(cellprop_nano_frozen))
imm_clus_nano_frozen[km.res$cluster == 3] <- "IE"
imm_clus_nano_frozen[km.res$cluster == 2] <- "IE-ECM"
Heatmap(cellprop_nano_frozen, row_split = imm_clus_nano_frozen, cluster_column_slices = F, cluster_columns = F)

#merge FFPe and frozen sample results
cellprop_nano <- rbind(cellprop_nano_ffpe, cellprop_nano_frozen)
chkpt_nano <- rbind(chkpt_nano_ffpe, chkpt_nano_frozen)
nanostring_annotations <- rbind(nanostring_annotations[sample_preservation == "FFPE",],nanostring_annotations[sample_preservation == "Frozen",])
imm_clus_nano = c(imm_clus_nano_ffpe, imm_clus_nano_frozen)
sample_preservation <- c(sample_preservation[sample_preservation == "FFPE"],sample_preservation[sample_preservation == "Frozen"])


#Primary vs metastatic tumor sample status
sstatus = nanostring_annotations$`Sample Type`
ha = rowAnnotation(
    sample = sstatus,
    col = list("sample" = c("Primary Tumor" = "yellow", "Met" =  "green")
    )
)

#metastatic site information
ss <- toupper(nanostring_annotations$Site)
ss_split <- rep("OTHER",length(ss))
ss_split[ss == "LUNG"] <- "LUNG"
ss_split[ss == "KIDNEY"] <- "KIDNEY"
ss_split[ss == "LIVER"] <- "LIVER"
ss_split[ss %in% c("BONE","RIB","L4","T5","T6","T11/T13","SPINE")] <- "SKELETAL"
ss_split[ss == "PRIMARY"] <- "PRIMARY"
n <- 70
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
cols <- sample(col_vector, n, replace = F)
cols <- sample(cols, length(unique(ss)), replace = F)
names(cols) <- unique(ss)



#plot results
ha = rowAnnotation(
    sample = sstatus,
    preservation = sample_preservation,
    col = list("sample" = c("Primary Tumor" = "yellow", "Met" =  "darkgreen"),
               "preservation" = c("Frozen" = "#db6d00", "FFPE" = "#924900")
    )
)

ha2 = rowAnnotation(
    subtype = imm_clus_nano,
    col = list("subtype" = c("IE" = "#882255", "IE-ECM" =  "#CC6677", "ID" = "#88CCEE"))
)



names(imm_clus_nano) <- as.character(as.numeric(nanostring_annotations$`Subject Code`))
ii <- imm_clus_nano
imm_clus_nano <- factor(imm_clus_nano, levels = c("IE","IE-ECM", "ID"))
rownames(cellprop_nano) <- names(ii)
Heatmap(cellprop_nano, row_split = ss_split, cluster_columns = F, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), left_annotation = ha, right_annotation = ha2)

#pdf("~/Dog/metTME_landscape.pdf", height = 14, width = 8)
#Heatmap(cellprop_nano, row_split = ss_split, cluster_columns = F, cluster_row_slices = FALSE, row_gap = unit(3, #"mm"), left_annotation = ha, right_annotation = ha2)
#dev.off()
#rownames(cellprop_nano) <- samp_old

#Plotting distribution of immune checkpoint gene expression across TME clusters (Nanostring)

Heatmap(chkpt_nano, row_split = ss_split, cluster_columns = T, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), left_annotation = ha, right_annotation = ha2)

out <- data.frame(chkpt_nano, subtype = imm_clus_nano)
dd_out <- melt(out, id.vars = "subtype")
colnames(dd_out) <- c("subtype","ICI_target","scaled_expression")
dd_out$ICI_target <- as.character(dd_out$ICI_target)
dd_out$subtype <- factor(dd_out$subtype, levels = c("IE","IE-ECM","ID"))
dd_out$ICI_target[dd_out$ICI_target == "PDCD1"] = "PD1"
dd_out$ICI_target[dd_out$ICI_target == "CD274"] = "PDL1"

p1 <- ggboxplot(dd_out, x = "subtype", y="scaled_expression", facet.by = "ICI_target") + theme_bw() + theme(text = element_text(size = 15)) + stat_compare_means(method = "anova") + xlab("") + ylab("expression (normalized)")
#stat_compare_means(comparisons = list(c("IE","IE-ECM"),c("IE-ECM","ID"),c("IE","ID")), method = "wilcox", label = "p.signif")
print(p1)

#ggsave(plot = p1, filename = "~/Dog/canine_immune_checkpoints_nano.pdf")

Joint analysis of primary and metastatic tumor samples from dogs enolled in COTC021/22

library(tibble)
set.seed(317796929)

#All metastatic samples (Nanostring + RNASeq)
a1 <- rbind(cellprop_nano[sstatus == "Met",], cellprop_rnaseq[sample_type == "Met",])
source_a1 <- c(rep("Nanostring", nrow(cellprop_nano[sstatus == "Met",])), rep("RNASeq",nrow(cellprop_rnaseq[sample_type == "Met",])))
prep_a1 <- c(sample_preservation[sstatus == "Met"], rep("Frozen",nrow(cellprop_rnaseq[sample_type == "Met",])))
imm_clus_a1 <- c(ii[sstatus == "Met"], ii1[sample_type == "Met"])
sites_a1 <- c(ss[sstatus == "Met"], sites_RNASeq[sample_type == "Met"])

#All primary samples (Nanostring + RNASeq)
a2 <- rbind(cellprop_rnaseq[sample_type == "Primary",], cellprop_nano[sstatus == "Primary Tumor",])
source_a2 <- c(rep("RNASeq", nrow(cellprop_rnaseq[sample_type == "Primary",])), rep("Nanostring",nrow(cellprop_nano[sstatus == "Primary Tumor",])))
prep_a2 <- c(rep("Frozen",nrow(cellprop_rnaseq[sample_type == "Primary",])), sample_preservation[sstatus == "Primary Tumor"])
imm_clus_a2 <- c(ii1[sample_type == "Primary"], ii[sstatus == "Primary Tumor"])
sites_a2 <- rep("Primary", length(imm_clus_a2))

#match tumor samples from same patient
common <- intersect(rownames(a1), rownames(a2))
case_list <- list()
for(pt in common){
    xx <- a1[rownames(a1) == pt,]
    if(is.null(dim(xx))){
        xx <- t(xx)
    }
    d1 <- data.frame(xx)
    d1$sample_type = "Metastatic"
    d1$source = source_a1[rownames(a1) == pt]
    d1$prep = prep_a1[rownames(a1) == pt]
    d1$site = sites_a1[rownames(a1) == pt]
    d1$case = pt
    d1$immune_subtype = imm_clus_a1[rownames(a1) == pt]
    
    xx <- a2[rownames(a2) == pt,]
    if(is.null(dim(xx))){
        xx <- t(xx)
    }
    d2 <- data.frame(xx)
    d2$sample_type = "Primary"
    d2$source = source_a2[rownames(a2) == pt]
    d2$prep = prep_a2[rownames(a2) == pt]
    d2$site = sites_a2[rownames(a2) == pt]
    d2$case = pt
    d2$immune_subtype = imm_clus_a2[rownames(a2) == pt]
    
    case_list[[pt]] <- rbind(d2,d1)
}
matched_cases <- Reduce(rbind, case_list)

matched_cases$sample_type <- factor(matched_cases$sample_type, levels = c("Primary", "Metastatic"))
ss1_split <- rep("OTHER",nrow(matched_cases))
ss1_split[matched_cases$site == "LUNG"] <- "LUNG"
ss1_split[matched_cases$site == "KIDNEY"] <- "KIDNEY"
ss1_split[matched_cases$site == "LIVER"] <- "LIVER"
ss1_split[matched_cases$site %in% c("BONE","RIB","L4","T5","T6","T11/T13","SPINE")] <- "SKELETAL"
ss1_split[matched_cases$site == "Primary"] <- "PRIMARY"


#plot results
ha = columnAnnotation(
    site = ss1_split,
    preservation = matched_cases$prep,
    platform = matched_cases$source,
    subtype = matched_cases$immune_subtype,
    col = list("site" = c("OTHER" = "#009E73", 
                          "PRIMARY" =  "#F0E442",
                          "LUNG"="#DF65B0", 
                          "KIDNEY" = "#984EA3",
                          "SKELETAL" = "#FEE5D9",
                          "LIVER" = "#FB6A4A"),
               "preservation" = c("Frozen" = "#db6d00", 
                                  "FFPE" = "#924900"),
               "platform" = c("Nanostring" = "#525252",
                              "RNASeq" = "#F2F0F7"),
               "subtype" = c("ID" = "#88CCEE",
                             "IE" = "#882255",
                             "IE-ECM" = "#CC6677")
    )
)

matched_cases$site2 <- ss1_split

met_mat <- as.matrix(matched_cases[,1:10])
rownames(met_mat) <- matched_cases$case

Heatmap(t(met_mat), column_split =  matched_cases$sample_type, cluster_column_slices = F, column_gap = unit(5, "mm"),  cluster_rows = F, cluster_columns = T, top_annotation = ha, column_names_gp = grid::gpar(fontsize = 8))

sampIds1 <- rownames(a1)
idx <- sapply(sampIds1, function(x) x %in% rownames(a2))
a1 = a1[idx,]
imm_clus_a1 = imm_clus_a1[idx]
ss1 <- sites_a1[idx]

ha = rowAnnotation(
  sample = rep("Primary Tumor", nrow(a2[sapply(rownames(a2), function(x) x %in% sampIds1[idx]),])),
  col = list("Primary Tumor" = "yellow", "Met "= "darkgreen")
)

#pdf("~/Dog/Met_evolve_primary.pdf", width = 5, height = 10)
#Heatmap(a2[sapply(rownames(a2), function(x) x %in% rownames(a1)),], row_split = imm_clus_a2[sapply(rownames(a2), function(x) x %in% rownames(a1))], cluster_columns = F, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), cluster_rows = F, left_annotation = ha)
#dev.off()



n <- length(unique(ss1))
#qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
#col_vector = grDevices::colors()[grep('gr(a|e)y', grDevices::colors(), invert = T)]
cols <- sample(col_vector, n, replace = F)
ss1_split <- rep("OTHER",length(ss1))
ss1_split[ss1 == "LUNG"] <- "LUNG"
ss1_split[ss1 == "KIDNEY"] <- "KIDNEY"
ss1_split[ss1 == "LIVER"] <- "LIVER"
ss1_split[ss1 %in% c("BONE","RIB","L4","T5","T6","T11/T13","SPINE")] <- "SKELETAL"
ss1_split[ss1 == "PRIMARY"] <- "PRIMARY"
names(cols) <- unique(ss1)



#samp <- sapply(rownames(a1), function(x) as.character(as.numeric(strsplit(x,"_")[[1]][2])))
#rownames(a1) <- samp
#names(imm_clus_a1) <- rownames(a1)
ha = rowAnnotation(
    met_subtype = imm_clus_a1[imm_clus_a2[names(imm_clus_a1)] == "IE-ECM"],
    col = list("met_subtype" = c("IE" = "#882255", "IE-ECM" =  "#CC6677", "ID" = "#88CCEE")
          )
)

ha2 = rowAnnotation(
  sample = rep("Met", length(imm_clus_a1[imm_clus_a2[names(imm_clus_a1)] == "IE-ECM"])),
  col = list("Primary Tumor"="yellow","Met"="darkgreen")
)

#pdf("~/Dog/Met_evolve_IE-ECM.pdf", width = 5, height = 10)
#Heatmap(a1[imm_clus_a2[names(imm_clus_a1)] == "IE-ECM",], row_split = ss1_split[imm_clus_a2[names(imm_clus_a1)] == "IE-ECM"], cluster_columns = T, cluster_row_slices = FALSE, row_gap = unit(3, "mm"), right_annotation = ha,left_annotation = ha2)
#dev.off()

ha = rowAnnotation(
    met_subtype = imm_clus_a1[imm_clus_a2[names(imm_clus_a1)] == "ID"],
    col = list("met_subtype" = c("IE" = "#882255", "IE-ECM" =  "#CC6677", "ID" = "#88CCEE")
          )
)
ha2 = rowAnnotation(
  sample = rep("Met", length(imm_clus_a1[imm_clus_a2[names(imm_clus_a1)] == "ID"])),
  col = list("Primary Tumor"="yellow","Met"="darkgreen")
)
#pdf("~/Dog/Met_evolve_ID.pdf", width = 5, height = 10)
#Heatmap(a1[imm_clus_a2[names(imm_clus_a1)] == "ID",], row_split = ss1_split[imm_clus_a2[names(imm_clus_a1)] == "ID"], cluster_columns =T, cluster_row_slices = FALSE, row_gap = unit(3, "mm"), right_annotation = ha, left_annotation = ha2)
#dev.off()



#contingency table of matched tumor samples
mat_subtype <- matrix(0,nrow = 3, ncol = 3)
rownames(mat_subtype) <- c("IE","IE-ECM","ID")
colnames(mat_subtype) <- c("IE","IE-ECM","ID")
edge_list <- list()
idx <- 1
for(id in unique(matched_cases$case)){
    case_id <- which(matched_cases$case == id)
    is_prim <- matched_cases$sample_type[case_id] == "Primary"
    is_met <- matched_cases$sample_type[case_id] == "Metastatic"
    for(i in case_id[is_prim]){
        for(j in case_id[is_met]){
            for(subtype_i in c("IE","IE-ECM","ID")){
                for(subtype_j in c("IE","IE-ECM","ID")){
                    if(matched_cases$immune_subtype[i] == subtype_i & matched_cases$immune_subtype[j] == subtype_j){
                        mat_subtype[subtype_i,subtype_j] = mat_subtype[subtype_i, subtype_j] + 1
                        edge_list[[idx]] <- c(i,j)
                        idx <- idx + 1
                    }
                }
            }
        }
    }
}

print(mat_subtype)
##        IE IE-ECM ID
## IE      1      0  0
## IE-ECM  7     37 18
## ID     12     29 17
chisq.test(mat_subtype, simulate.p.value = TRUE)
## 
##  Pearson's Chi-squared test with simulated p-value (based on 2000
##  replicates)
## 
## data:  mat_subtype
## X-squared = 7.2091, df = NA, p-value = 0.1059
#generate sankey diagram
library(plotly)
## 
## Attaching package: 'plotly'

## The following object is masked from 'package:reshape':
## 
##     rename

## The following object is masked from 'package:ComplexHeatmap':
## 
##     add_heatmap

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout
#reticulate::import("sys")
matched_cases$site[matched_cases$site == "SPINAL CORD"] = "EXTRADURAL"
matched_cases$site[matched_cases$site == "LIP"] = "ORAL"

prim_sample <- sapply(edge_list, function(x) paste("primary:",x[1], sep = ""))
met_sample <- sapply(edge_list, function(x) paste(matched_cases$site[x[2]],x[2],sep = ":"))

node_lab <- c(unique(sapply(edge_list, function(x) matched_cases$case[x[1]])), unique(prim_sample), unique(met_sample))

subtype_col <- c("IE" = "#882255", "IE-ECM" =  "#CC6677", "ID" = "#88CCEE")
node_col <- c(rep("grey", length(unique(sapply(edge_list, function(x) matched_cases$case[x[1]])))),
               subtype_col[sapply(unique(prim_sample), function(x) matched_cases$immune_subtype[as.numeric(strsplit(x,split = ":")[[1]][2])])],
               subtype_col[sapply(unique(met_sample), function(x) matched_cases$immune_subtype[as.numeric(strsplit(x,split = ":")[[1]][2])])])

names(node_col) <- NULL

elist1 <- t(sapply(edge_list, function(x) {
  a <- matched_cases$case[x[1]]
  b <- paste("primary:",x[1], sep = "")
  return(c(which(node_lab == a), which(node_lab == b)))
}))

elist2 <- t(sapply(edge_list, function(x) {
  a <- paste("primary:",x[1], sep = "")
  b <- paste(matched_cases$site[x[2]],x[2],sep = ":")
  return(c(which(node_lab == a), which(node_lab == b)))
}))

elist <- rbind(elist1,elist2)
                      

fig <- plot_ly(
    type = "sankey",
    domain = list(
      x =  c(0,1),
      y =  c(0,1)
    ),
    orientation = "h",
    valueformat = ".0f",
    valuesuffix = "TWh",

    node = list(
      label = sapply(node_lab, function(x) strsplit(x,split = ":")[[1]][1]),
      color = node_col,
      pad = 15,
      thickness = 15,
      line = list(
        color = "black",
        width = 0.5
      )
    ),

    link = list(
      source = elist[,1]-1,
      target = elist[,2]-1,
      value = rep(1,nrow(elist))
    )
  ) 

fig <- fig %>% layout(
    title = " ",
    font = list(
      size = 9
    ),
    xaxis = list(showgrid = F, zeroline = F),
    yaxis = list(showgrid = F, zeroline = F)
)

print(fig)
#save_image(fig, file = "/Users/sushantpa/Dog/met_evolve_kaleido.pdf",width = 410, height = 720)

Read clinical outcome data

library(survival)
library(survminer)
#Scripts to read in the canine OS clinical metadata
#clinical metadata for all cases that received Rapamycin (mTOR inhibitor) in addition to standard of care therapy
Slides2Outcomes_Rapa_all <- read_csv("~/Dog/Slides2Outcomes_Rapa_all_new.csv")
## New names:
## Rows: 295 Columns: 24
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (15): Study, Site, Name, breed, gender, Tumor Location, PH vs NPH, ALP, ... dbl
## (9): ...1, Patient ID, age, weight, PH, DFI, DFI_censor, Survival (days...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
#clinical metadata for all cases that received standard of care therapy
Slides2Outcomes_SOC_all <- read_csv("~/Dog/Slides2Outcomes_SOC_all_new.csv")
## New names:
## Rows: 305 Columns: 24
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (15): Study, Site, Name, breed, gender, Tumor Location, PH vs NPH, ALP, ... dbl
## (9): ...1, Patient ID, age, weight, PH, DFI, DFI_censor, Survival (days...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
dat1 <- subset(Slides2Outcomes_Rapa_all,select = c("slide","Patient ID","Tumor Location","Site","age","weight","breed","gender","PH","ALP","Group","DFI","DFI_censor","Survival (days from sx)","Surv_censor"))
dat1$treatment = rep("Rapamycin", nrow(dat1))

dat2 <- subset(Slides2Outcomes_SOC_all,select = c("slide","Patient ID","Tumor Location","Site","age","weight","breed","gender","PH","ALP","Group","DFI","DFI_censor","Survival (days from sx)","Surv_censor"))
dat2$treatment = rep("SOC", nrow(dat2))

clindat_all <- rbind(dat1,dat2)
clindat_all <- as.data.frame(clindat_all)
clindat_all <- clindat_all[!duplicated(clindat_all$slide),]
rownames(clindat_all) <- substr(clindat_all$slide,1,19)
clindat_all <- clindat_all[,-1]
colnames(clindat_all)[c(11,12,13,14)] <- c("DFS_time","DFS_status","OS_time","OS_status")
clindat_all$ALP[clindat_all$ALP == "elevated"] <- "Elevated"
clindat_all$gender[clindat_all$`Patient ID` == "307"] <- "Spayed Female"
clindat_all$gender[grepl("Castrated",clindat_all$gender)] <- "Castrated Male"
clindat_all$gender[grepl("Spayed",clindat_all$gender)] <- "Spayed Female"
clindat_all$gender[grepl("Male Phenotype",clindat_all$gender)] <- "Intact Male"
clindat_all$gender[grepl("Female Phenotype",clindat_all$gender)] <- "Intact Female"
clindat_all <- clindat_all[!duplicated(clindat_all$`Patient ID`),]
cc1 <- clindat_all
rownames(cc1) <- as.character(clindat_all$`Patient ID`)
common <- intersect(names(imm_clus)[sample_type == "Primary"], rownames(cc1))
cc1 <- cc1[common,]
cc1$primary_immune_subtype <- as.character(imm_clus[sample_type == "Primary"][common])

#cc1$integrated = cc1$group
#cc1$integrated[cc1$group == "{cluster 1, 2}"] = cc1$immune_subtype[cc1$group == "{cluster 1, 2}"]
ccc1 <-cc1
#ccc1 <- ccc1[names(met_subtype_lung),]
#ccc1$met_subtype_lung <- met_subtype_lung
surv <- survfit(Surv(OS_time, OS_status) ~ primary_immune_subtype, data = ccc1)
diff <- survdiff(Surv(OS_time, OS_status) ~ primary_immune_subtype, data = ccc1)

p1 <- ggsurvplot(surv, data = ccc1,
                  legend.title = "",
                 legend = "right",
                  conf.int = F,
                  pval = TRUE,
                  risk.table = TRUE,
                  tables.height = 0.2,
                  tables.theme = theme_cleantable(),
                  risk.table.y.text = FALSE,
                 pval.coord = c(0, 0.03),
                  # Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
                  # or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
                 palette = c("#88CCEE","#882255","#CC6677"),
                  ggtheme = theme(text = element_text(size = 15)) +theme_bw() # Change ggplot2 theme
) + xlab("Time (days from sx)")
print(p1)

#ggsave(filename = "~/Dog/canine_OS_survfigure.pdf", plot=p1$plot, width = 9, height = 6)

surv <- survfit(Surv(DFS_time, DFS_status) ~ primary_immune_subtype, data = ccc1)
diff <- survdiff(Surv(DFS_time, DFS_status) ~ primary_immune_subtype, data = ccc1)

p1 <- ggsurvplot(surv, data = ccc1,
                  legend.title = "",
                 legend = "right",
                  conf.int = F,
                  pval = TRUE,
                  risk.table = TRUE,
                  tables.height = 0.2,
                  tables.theme = theme_cleantable(),
                  risk.table.y.text = FALSE,
                 pval.coord = c(0, 0.03),
                  # Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
                  # or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
                 palette = c("#88CCEE","#882255","#CC6677"),
                  ggtheme = theme(text = element_text(size = 15)) + theme_bw() # Change ggplot2 theme
) + xlab("Time (days from sx)")
print(p1)

#ggsave(filename = "~/Dog/canine_DFS_survfigure.pdf", plot=p1$plot, width = 9, height = 6)

Multivaruate Cox proportional hazards analysis to account for confounding clinical factors

cph_dat <- data.frame(time = cc1$DFS_time,
subtype = cc1$primary_immune_subtype,
cytotoxicity_score = as.numeric(cyt_scores[common]),
status = cc1$DFS_status,
location = c("NPH","PH")[as.numeric(ccc1$PH)+1],
ALP = cc1$ALP,
age = scale(as.numeric(cc1$age)),
gender = cc1$gender,
weight = scale(cc1$weight),
treatment = c("SOC","Sirolimus+SOC")[as.numeric(cc1$treatment == "Rapamycin")+1])
cph_dat$gender[grepl("female", ignore.case = T, cph_dat$gender)] = "F"
cph_dat$gender[grepl("male", ignore.case = T, cph_dat$gender)] = "M"
cph_dat$ALP[cph_dat$ALP != "Elevated"] = "Normal"
#cph_dat$isFemale = as.numeric(cph_dat$isFemale == "F")
cph_dat$ALP <- relevel(as.factor(cph_dat$ALP), ref = "Normal")
cph_dat$treatment <- relevel(as.factor(cph_dat$treatment), ref = "SOC")
cph_dat$location <- relevel(as.factor(cph_dat$location), ref = "NPH")
cph_dat$gender <- relevel(as.factor(cph_dat$gender), ref = "M")
fit <- coxph(Surv(time, status) ~ location + ALP + age + gender + weight + treatment +  subtype, data = cph_dat)
p1 <- ggforest(fit, data = cph_dat,fontsize = 0.7, noDigits = 2) + theme(text = element_text(size = 15))
#ggsave(plot = p1, filename = "~/Dog/ggforest_DFS.pdf", width = 6, height = 8)


cph_dat <- data.frame(time = cc1$OS_time,
subtype = cc1$primary_immune_subtype,
cytotoxicity_score = as.numeric(cyt_scores[common]),
status = cc1$OS_status,
location = c("NPH","PH")[as.numeric(ccc1$PH)+1],
ALP = cc1$ALP,
age = scale(as.numeric(cc1$age)),
gender = cc1$gender,
weight = scale(cc1$weight),
treatment = c("SOC","Sirolimus+SOC")[as.numeric(cc1$treatment == "Rapamycin")+1])
cph_dat$gender[grepl("female", ignore.case = T, cph_dat$gender)] = "F"
cph_dat$gender[grepl("male", ignore.case = T, cph_dat$gender)] = "M"
cph_dat$ALP[cph_dat$ALP != "Elevated"] = "Normal"
#cph_dat$isFemale = as.numeric(cph_dat$isFemale == "F")
cph_dat$ALP <- relevel(as.factor(cph_dat$ALP), ref = "Normal")
cph_dat$treatment <- relevel(as.factor(cph_dat$treatment), ref = "SOC")
cph_dat$location <- relevel(as.factor(cph_dat$location), ref = "NPH")
cph_dat$gender <- relevel(as.factor(cph_dat$gender), ref = "M")
fit <- coxph(Surv(time, status) ~ location + ALP + age + gender + weight + treatment +  subtype, data = cph_dat)
p2 <-ggforest(fit, data = cph_dat,fontsize = 0.7, noDigits = 2) + theme(text = element_text(size = 15))

#ggsave(plot = p2, filename = "~/Dog/ggforest_OS.pdf", width = 6, height = 8)

fig <- ggarrange(p2,p1, ncol = 2, nrow = 1, labels = c("",""))
print(fig)

#ggsave(plot = fig, filename = "~/Dog/ggforest_cyt_figures.pdf",width = 12, height = 8)

#Plot TME characteristics of metastatic samples in long term vs short term survivors

library(reshape)
cc1 <- clindat_all
rownames(cc1) <- as.character(clindat_all$`Patient ID`)

met_dd1 <- data.frame(surv_group = c("shorter survival","longer survival")[as.numeric(cc1[matched_cases$case[matched_cases$sample_type == "Metastatic"], "OS_time"] > median(cc1$OS_time[cc1$OS_status==1]))+1], os_status = as.numeric(cc1[matched_cases$case[matched_cases$sample_type == "Metastatic"], "OS_status"]), os_time = as.numeric(cc1[matched_cases$case[matched_cases$sample_type == "Metastatic"], "OS_time"]), met_subtype = matched_cases$immune_subtype[matched_cases$sample_type == "Metastatic"], sites = matched_cases$site2[matched_cases$sample_type == "Metastatic"], matched_cases[matched_cases$sample_type == "Metastatic",1:10])
met_dd1 <- met_dd1[!is.na(met_dd1$surv_group),]

ha <- columnAnnotation(
  group = met_dd1$surv_group,
  preservation = matched_cases$prep[matched_cases$sample_type == "Metastatic"],
  platform = matched_cases$source[matched_cases$sample_type == "Metastatic"],
  subtype = matched_cases$immune_subtype[matched_cases$sample_type == "Metastatic"],
  col = list(
               "preservation" = c("Frozen" = "#db6d00", 
                                  "FFPE" = "#924900"),
               "platform" = c("Nanostring" = "#525252",
                              "RNASeq" = "#F2F0F7"),
               "subtype" = c("ID" = "#88CCEE",
                             "IE" = "#882255",
                             "IE-ECM" = "#CC6677"),
             group = c("shorter survival"="#AE017E",
                       "longer survival"="#FDAE61")
    )
)
mat <- as.matrix(t(met_dd1[,6:ncol(met_dd1)]))
colnames(mat) <- matched_cases$case[matched_cases$sample_type == "Metastatic"]
colnames(mat) <- gsub("X","",colnames(mat))
colnames(mat) <- gsub("(\\.1)|(\\.2)|(\\.3)|(\\.4)|(\\.5)|(\\.6)","",colnames(mat))
Heatmap(mat, column_split = met_dd1$sites, bottom_annotation = ha, cluster_rows = F, cluster_column_slices = F,column_gap = unit(1, "mm"))

#pdf("~/Dog/met_sites_survival_association.pdf", width = 16, height = 4)
#Heatmap(mat, column_split = met_dd1$sites, bottom_annotation = ha, cluster_rows = F, cluster_column_slices = F,column_gap = unit(1, "mm"))
#dev.off()

#Heatmap(as.matrix(met_dd1[,6:ncol(met_dd1)]), row_split = met_dd1$surv_group, cluster_columns = F, cluster_row_slices = F, row_gap = unit(5, "mm"), right_annotation = ha2)
met_dd2 <- melt(met_dd1[,c(1,6:ncol(met_dd1))])
## Using surv_group as id variables
p1 <- ggplot(data = met_dd2, aes(x = variable, y = value, fill = surv_group)) + geom_boxplot() + theme_bw() + theme(text = element_text(size = 15), axis.text.x = element_text(angle = 90)) + stat_compare_means(aes(group = surv_group), label = "p.signif", method = "t.test") + ylim(c(-4,4))
print(p1)
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

#ggsave(filename = "~/Dog/met_survival_association.pdf",plot = p1, width = 12, height = 7)

Perform Differential expression + gene set enrichment analysis to characterize enriched hallmark pathways in each TME cluster

library(edgeR)
## Loading required package: limma
library(matrixStats)
## 
## Attaching package: 'matrixStats'

## The following object is masked from 'package:dplyr':
## 
##     count
library(fgsea)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ matrixStats::count() masks dplyr::count()
## ✖ tidyr::expand()      masks reshape::expand()
## ✖ plotly::filter()     masks dplyr::filter(), stats::filter()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ readr::parse_date()  masks curl::parse_date()
## ✖ plotly::rename()     masks reshape::rename(), dplyr::rename()
## ✖ lubridate::stamp()   masks reshape::stamp()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(msigdbr)
library(clusterProfiler)
## 
## Registered S3 methods overwritten by 'treeio':
##   method              from    
##   MRCA.phylo          tidytree
##   MRCA.treedata       tidytree
##   Nnode.treedata      tidytree
##   Ntip.treedata       tidytree
##   ancestor.phylo      tidytree
##   ancestor.treedata   tidytree
##   child.phylo         tidytree
##   child.treedata      tidytree
##   full_join.phylo     tidytree
##   full_join.treedata  tidytree
##   groupClade.phylo    tidytree
##   groupClade.treedata tidytree
##   groupOTU.phylo      tidytree
##   groupOTU.treedata   tidytree
##   is.rooted.treedata  tidytree
##   nodeid.phylo        tidytree
##   nodeid.treedata     tidytree
##   nodelab.phylo       tidytree
##   nodelab.treedata    tidytree
##   offspring.phylo     tidytree
##   offspring.treedata  tidytree
##   parent.phylo        tidytree
##   parent.treedata     tidytree
##   root.treedata       tidytree
##   rootnode.phylo      tidytree
##   sibling.phylo       tidytree
## clusterProfiler v4.6.2  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
## 
## Attaching package: 'clusterProfiler'
## 
## The following object is masked from 'package:purrr':
## 
##     simplify
## 
## The following object is masked from 'package:reshape':
## 
##     rename
## 
## The following object is masked from 'package:stats':
## 
##     filter
# Read raw counts
DOG2_Raw_RNA_SEQ_DATA <- read_csv("~/Dog/DOG2_Raw_RNA_SEQ_DATA.csv")
## Rows: 37952 Columns: 187
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (1): gene_id
## dbl (186): 102, 105, 106, 201, 202, 203, 204, 206, 207, 209, 210, 211, 212, ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

IE

genes <- DOG2_Raw_RNA_SEQ_DATA$gene_id
genes <- sapply(genes, function(x) strsplit(x, split = "_")[[1]][1])
DOG2_Raw_RNA_SEQ_DATA$gene_id <- NULL
groups = ii1[colnames(DOG2_Raw_RNA_SEQ_DATA)]
groups = c("rest","IE")[as.numeric(groups == "IE")+1]
counts.DGEList <- DGEList(counts = DOG2_Raw_RNA_SEQ_DATA, genes = genes, group = as.factor(groups))
counts.keep <- filterByExpr(y = counts.DGEList)
counts.DGEList <- counts.DGEList[counts.keep, , keep.lib.sizes = FALSE]
counts.DGEList <- calcNormFactors(counts.DGEList)
counts.DGEList <- estimateDisp(counts.DGEList,design = model.matrix(~groups))
rest_IE.DGEExact <- exactTest(counts.DGEList, pair = c("rest","IE"))
msigdbr_df <- msigdbr(species = "Homo sapiens", category = "H")
msigdbr_list = split(x = msigdbr_df$gene_symbol, f = msigdbr_df$gs_name)
ranks <- rest_IE.DGEExact$table$logFC
names(ranks) <- counts.DGEList$genes$genes
ranks <- sort(ranks, decreasing = T)
gsea_res <- GSEA(geneList = ranks, TERM2GENE = subset(msigdbr_df, select = c("gs_name","gene_symbol")),pvalueCutoff = 1)
## preparing geneSet collections...
## GSEA analysis...

## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.12% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.

## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.

## leading edge analysis...
## done...
fgseaResTidy_IE <- gsea_res %>%
as_tibble() %>%
arrange(desc(NES))
p1 <- ggplot(fgseaResTidy_IE, aes(reorder(ID, NES), NES)) +
geom_col(aes(fill=p.adjust < 0.05)) +
coord_flip() +
labs(x="Pathway", y="Normalized Enrichment Score",title="Hallmark pathways NES from GSEA") + 
  theme_minimal()
print(p1)

IE-ECM

groups = ii1[colnames(DOG2_Raw_RNA_SEQ_DATA)]
groups = c("rest","IE-ECM")[as.numeric(groups == "IE-ECM")+1]
counts.DGEList <- DGEList(counts = DOG2_Raw_RNA_SEQ_DATA, genes = genes, group = as.factor(groups))
counts.keep <- filterByExpr(y = counts.DGEList)
counts.DGEList <- counts.DGEList[counts.keep, , keep.lib.sizes = FALSE]
counts.DGEList <- calcNormFactors(counts.DGEList)
counts.DGEList <- estimateDisp(counts.DGEList,design = model.matrix(~groups))
rest_IEECM.DGEExact <- exactTest(counts.DGEList, pair = c("rest","IE-ECM"))
msigdbr_df <- msigdbr(species = "Homo sapiens", category = "H")
msigdbr_list = split(x = msigdbr_df$gene_symbol, f = msigdbr_df$gs_name)


ranks <- rest_IEECM.DGEExact$table$logFC
names(ranks) <- counts.DGEList$genes$genes
ranks <- sort(ranks, decreasing = T)

gsea_res <- GSEA(geneList = ranks, TERM2GENE = subset(msigdbr_df, select = c("gs_name","gene_symbol")), pvalueCutoff = 1, nPermSimple=100000)
## preparing geneSet collections...

## GSEA analysis...

## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.11% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.

## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.

## leading edge analysis...

## done...
fgseaResTidy_IEECM <- gsea_res %>%
  as_tibble() %>%
  arrange(desc(NES))


p1 <- ggplot(fgseaResTidy_IEECM, aes(reorder(ID, NES), NES)) +
  geom_col(aes(fill=p.adjust < 0.05)) +
  coord_flip() +
  labs(x="Pathway", y="Normalized Enrichment Score",
       title="Hallmark pathways NES from GSEA") + 
  theme_minimal()
print(p1)

#ggsave(plot = p1, filename = "~/Dog/IEECM_enrichmentplot.pdf")

ID

groups = ii1[colnames(DOG2_Raw_RNA_SEQ_DATA)]
groups = c("rest","ID")[as.numeric(groups == "ID")+1]
counts.DGEList <- DGEList(counts = DOG2_Raw_RNA_SEQ_DATA, genes = genes, group = as.factor(groups))
counts.keep <- filterByExpr(y = counts.DGEList)
counts.DGEList <- counts.DGEList[counts.keep, , keep.lib.sizes = FALSE]
counts.DGEList <- calcNormFactors(counts.DGEList)
counts.DGEList <- estimateDisp(counts.DGEList,design = model.matrix(~groups))
rest_ID.DGEExact <- exactTest(counts.DGEList, pair = c("rest","ID"))


msigdbr_df <- msigdbr(species = "Homo sapiens", category = "H")
msigdbr_list = split(x = msigdbr_df$gene_symbol, f = msigdbr_df$gs_name)


ranks <- rest_ID.DGEExact$table$logFC
names(ranks) <- counts.DGEList$genes$genes
ranks <- sort(ranks, decreasing = T)

gsea_res <- GSEA(geneList = ranks, TERM2GENE = subset(msigdbr_df, select = c("gs_name","gene_symbol")),pvalueCutoff = 1, nPermSimple = 100000)
## preparing geneSet collections...

## GSEA analysis...

## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.11% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.

## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.

## leading edge analysis...

## done...
fgseaResTidy_ID <- gsea_res %>%
  as_tibble() %>%
  arrange(desc(NES))



p1 <- ggplot(fgseaResTidy_ID, aes(reorder(ID, NES), NES)) +
  geom_col((aes(fill=p.adjust < 0.05))) +
  coord_flip() +
  labs(x="Pathway", y="Normalized Enrichment Score",
       title="Hallmark pathways NES from GSEA") + 
  theme_minimal()
print(p1)

#ggsave(plot = p1, filename = "~/Dog/ID_enrichmentplot.pdf")

Generate combined dotplot depicting differential expression + gsea

results

NES_IE <- fgseaResTidy_IE$NES
names(NES_IE) <- fgseaResTidy_IE$ID
NES_IEECM <- fgseaResTidy_IEECM$NES
names(NES_IEECM) <- fgseaResTidy_IEECM$ID
NES_ID <- fgseaResTidy_ID$NES
names(NES_ID) <- fgseaResTidy_ID$NES
NES_ID <- fgseaResTidy_ID$NES
names(NES_ID) <- fgseaResTidy_ID$ID
NES_mat <- cbind(NES_IE, cbind(NES_IEECM[names(NES_IE)], NES_ID[names(NES_IE)]))
rownames(NES_mat) <- names(NES_IE)
lpval_IE <- -log10(fgseaResTidy_IE$p.adjust)
names(lpval_IE) <- fgseaResTidy_IE$ID
lpval_IEECM <- -log10(fgseaResTidy_IEECM$p.adjust)
names(lpval_IEECM) <- fgseaResTidy_IEECM$ID
lpval_ID <- -log10(fgseaResTidy_ID$p.adjust)
names(lpval_ID) <- fgseaResTidy_ID$ID
pval_mat <- cbind(lpval_IE, cbind(lpval_IEECM[names(lpval_IE)],lpval_ID[names(lpval_IE)]))
rownames(pval_mat) <- names(lpval_IE)
colnames(NES_mat) <- c("IE","IE-ECM","ID")

NES_mat <- NES_mat[names(NES_IEECM),]
pval_mat <- pval_mat[names(NES_IEECM),]

col_fun = circlize::colorRamp2(c(-1, 0, 1), c("navyblue", "darkmagenta", "yellow"))

layer_fun = function(j, i, x, y, w, h, fill){
          grid.rect(x = x, y = y, width = w, height = h, 
                    gp = gpar(col = NA, fill = NA))
          grid.circle(x=x,y=y,r= pindex(pval_mat, i, j)/10 * unit(2, "mm"),
                      gp = gpar(fill = col_fun(pindex(NES_mat, i, j)), col = NA))}


lgd_list = list(
    Legend( labels = c(0,2.5,5,7.5,10), title = "-log10(adjusted P-value)",
            graphics = list(
              function(x, y, w, h) grid.circle(x = x, y = y, r = 0 * unit(2, "mm"),
                                               gp = gpar(fill = "black")),
              function(x, y, w, h) grid.circle(x = x, y = y, r = 0.25 * unit(2, "mm"),
                                               gp = gpar(fill = "black")),
              function(x, y, w, h) grid.circle(x = x, y = y, r = 0.5 * unit(2, "mm"),
                                               gp = gpar(fill = "black")),
              function(x, y, w, h) grid.circle(x = x, y = y, r = 0.75 * unit(2, "mm"),
                                               gp = gpar(fill = "black")),
              function(x, y, w, h) grid.circle(x = x, y = y, r = 1 * unit(2, "mm"),
                                               gp = gpar(fill = "black")))
            ))

set.seed(123)    
hp<- Heatmap(NES_mat,
        heatmap_legend_param=list(title="NES"),
        col=col_fun,
        rect_gp = gpar(type = "none"),
        layer_fun = layer_fun,
        row_names_gp = gpar(fontsize = 7.5),
        cluster_rows = F,
        cluster_columns = F)

draw( hp, annotation_legend_list = lgd_list)

Read human data

TARGET

library("org.Hs.eg.db") 
## Loading required package: AnnotationDbi

## Loading required package: stats4

## Loading required package: BiocGenerics

## 
## Attaching package: 'BiocGenerics'

## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union

## The following object is masked from 'package:limma':
## 
##     plotMA

## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min

## Loading required package: Biobase

## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

## 
## Attaching package: 'Biobase'

## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians

## Loading required package: IRanges

## Loading required package: S4Vectors

## 
## Attaching package: 'S4Vectors'

## The following object is masked from 'package:clusterProfiler':
## 
##     rename

## The following objects are masked from 'package:lubridate':
## 
##     second, second<-

## The following object is masked from 'package:tidyr':
## 
##     expand

## The following object is masked from 'package:plotly':
## 
##     rename

## The following objects are masked from 'package:reshape':
## 
##     expand, rename

## The following objects are masked from 'package:dplyr':
## 
##     first, rename

## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname

## 
## Attaching package: 'IRanges'

## The following object is masked from 'package:clusterProfiler':
## 
##     slice

## The following object is masked from 'package:lubridate':
## 
##     %within%

## The following object is masked from 'package:purrr':
## 
##     reduce

## The following object is masked from 'package:plotly':
## 
##     slice

## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice

## 
## Attaching package: 'AnnotationDbi'

## The following object is masked from 'package:clusterProfiler':
## 
##     select

## The following object is masked from 'package:plotly':
## 
##     select

## The following object is masked from 'package:dplyr':
## 
##     select

## 
TARGET_OS_star_counts <- read_delim("~/Downloads/TARGET-OS.star_counts.tsv", 
delim = "\t", escape_double = FALSE, 
trim_ws = TRUE)
## Rows: 60487 Columns: 89

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (1): Ensembl_ID
## dbl (88): TARGET-40-PASUUH-01A, TARGET-40-PAUTWB-01A, TARGET-40-PAKUZU-01A, ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
genes <- TARGET_OS_star_counts$Ensembl_ID
genes <- sapply(genes, function(x) strsplit(x, split = "\\.")[[1]][1])
symbols <- mapIds(org.Hs.eg.db, keys = genes, keytype = "ENSEMBL", column="SYMBOL")
## 'select()' returned 1:many mapping between keys and columns
#mart <- useDataset("hsapiens_gene_ensembl", useMart("ensembl"))
#G_list <- getBM(filters= "ensembl_gene_id", attributes= c("ensembl_gene_id","hgnc_symbol"),values=genes,mart= mart)
TARGET_OS_star_counts <- as.data.frame(TARGET_OS_star_counts)
TARGET_OS_star_counts <- TARGET_OS_star_counts[!is.na(symbols),]
expr_human <- aggregate.data.frame(TARGET_OS_star_counts[,2:ncol(TARGET_OS_star_counts)], by = list(symbols[!is.na(symbols)]), FUN = mean, na.rm=T)
rownames(expr_human) <- expr_human$Group.1
expr_human <- expr_human[,-1]

TARGET_OS_clinical <- read_delim("~/Downloads/TARGET-OS.clinical.tsv", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## Rows: 527 Columns: 31
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (23): sample_id, TARGET USI, Gender, Race, Ethnicity, First Event, Time ...
## dbl  (8): Age at Diagnosis in Days, Overall Survival Time in Days, Year of D...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
TARGET_OS_clinical <- as.data.frame(TARGET_OS_clinical)
TARGET_OS_clinical <- TARGET_OS_clinical[!duplicated(TARGET_OS_clinical$sample_id),]
rownames(TARGET_OS_clinical) <- TARGET_OS_clinical$sample_id

common <- intersect(rownames(TARGET_OS_clinical), colnames(expr_human))
ee <- expr_human[,common]
human_clin <- TARGET_OS_clinical[common,]


set.seed(2425)
#ccommon = intersect(rownames(ee), intersect(rownames(expr_human2),rownames(expr_human3)))
cellprop_human <- t(MCPcounter.estimate(expression = ee, featuresType = "HUGO_symbols", probesets = read.table("~/Dog/MCP_counter_markers.csv")))
cellprop_human <- apply(cellprop_human, 2, scale)
#rownames(cellprop_primary) <- colnames(Canine_RNA_SEQ_TPM_All_data)
km.res <- kmeans(cellprop_human, centers = 3, nstart = 100, iter.max = 500)
imm_clus_target <- rep("ID", nrow(cellprop_human))
imm_clus_target[km.res$cluster == 3] <- "IE"
imm_clus_target[km.res$cluster == 1] <- "IE-ECM"
#imm_clus <- factor(imm_clus, levels = c("IE","IE fibrotic", "ID"))
Heatmap(cellprop_human, row_split = imm_clus_target, cluster_columns = F, cluster_row_slices = FALSE, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")))

names(imm_clus_target) <- colnames(ee)
human_clin <- human_clin[names(imm_clus_target),]
human_clin$immune_subtype <- imm_clus_target
hc <- data.frame(OS_time = as.numeric(human_clin$`Overall Survival Time in Days`), OS_status = as.numeric(human_clin$`Vital Status` == "Dead"),
                 DFS_time = as.numeric(human_clin$`Time to First Event in Days`), DFS_status = as.numeric(human_clin$`First Event` == "Relapse"),
                 immune_subtype = human_clin$immune_subtype,
                 met_at_daig = toupper(human_clin$`Disease at diagnosis`),
                 age = human_clin$`Age at Diagnosis in Days`/365,
                 gender = c("M","F")[as.numeric(human_clin$Gender == "Female")+1])
hc$necrosis <- NA
hc$necrosis[!is.na(human_clin$`Percent necrosis`)] <- c("0-90","91-100")[as.numeric(as.numeric(human_clin$`Percent necrosis`[!is.na(human_clin$`Percent necrosis`)]) > 90)+1]
## Warning: NAs introduced by coercion
hc$necrosis[!is.na(human_clin$`Histologic response`)] <-human_clin$`Histologic response`[!is.na(human_clin$`Histologic response`)]
hc$necrosis[hc$necrosis %in% c("91-100","Stage 3/4","98","95") & !is.na(hc$necrosis)] <- ">90%"
hc$necrosis[!(hc$necrosis %in% c(">90%")) & !is.na(hc$necrosis)] <- "<=90%"

hc$met_at_daig[hc$met_at_daig == "METASTATIC (CONFIRMED)"] = "METASTATIC"
hc$met_at_daig[hc$met_at_daig == "NON-METASTATIC (CONFIRMED)"] = "NON-METASTATIC"

surv <- survfit(Surv(OS_time, OS_status) ~ immune_subtype, data = hc)
diff <- survdiff(Surv(OS_time, OS_status) ~ immune_subtype, data = hc)

p1 <- ggsurvplot(surv, data = hc,
                  
                  legend.title = "",
                 legend = "right",
                 #legend.labs = c("ID","IE","IE fibrotic"),
                  conf.int = F,
                  pval = TRUE,
                  risk.table = TRUE,
                  tables.height = 0.2,
                  tables.theme = theme_cleantable(),
                  risk.table.y.text = FALSE,
                 pval.coord = c(0, 0.03),
                  # Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
                  # or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
                  ggtheme = theme(text = element_text(size = 15)) +theme_bw() # Change ggplot2 theme
) + xlab("Time (days)")
print(p1)

 surv <- survfit(Surv(DFS_time, DFS_status) ~ immune_subtype, data = hc)
 diff <- survdiff(Surv(DFS_time, DFS_status) ~ immune_subtype, data = hc)
# 
 p1 <- ggsurvplot(surv, data = hc,
                   
                   legend.title = "",
                  legend = "right",
                  #legend.labs = c("ID","IE","IE fibrotic"),
                   conf.int = F,
                   pval = TRUE,
                   risk.table = TRUE,
                   tables.height = 0.2,
                   tables.theme = theme_cleantable(),
                   risk.table.y.text = FALSE,
                  pval.coord = c(0, 0.03),
                   # Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
                   # or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
                   ggtheme = theme(text = element_text(size = 15)) +theme_bw() # Change ggplot2 theme
 ) + xlab("Time (days)")
 print(p1)

GSE21257

expr_human2 <- read_csv("~/Dog/GSE21257_expr.csv")
## New names:
## Rows: 30549 Columns: 54
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 dbl (53): GSM530667, GSM530899, GSM531283, GSM531284, GSM531285,
## GSM531286, ...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
expr_human2 <- as.data.frame(expr_human2)
expr_human2 <- aggregate.data.frame(expr_human2[,2:ncol(expr_human2)], by = list(as.character(expr_human2[,1])), FUN = mean, na.rm = T)
rownames(expr_human2) <- expr_human2[,1]
expr_human2 <- expr_human2[,-1]
expr_human2 <- as.matrix(expr_human2)

set.seed(2425)
cellprop_human2 <- t(MCPcounter.estimate(expression = expr_human2, featuresType = "HUGO_symbols", probesets = read.table("~/Dog/MCP_counter_markers.csv")))
cellprop_human2 <- apply(cellprop_human2, 2, scale)
km.res <- kmeans(cellprop_human2, centers = 3, nstart = 100, iter.max = 500)
imm_clus2 <- rep("ID", nrow(cellprop_human2))
imm_clus2[km.res$cluster == 1] <- "IE"
imm_clus2[km.res$cluster == 3] <- "IE-ECM"
Heatmap(cellprop_human2, row_split = imm_clus2, cluster_columns = F, cluster_row_slices = F, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")))

names(imm_clus2) <- colnames(expr_human2)
human_clin2 <- read_delim("~/Dog/GSE21257_pheno.tsv", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## New names:
## Rows: 53 Columns: 49
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "\t" chr
## (40): ...1, title, geo_accession, status, submission_date, last_update_d... dbl
## (8): channel_count, taxid_ch1, contact_zip/postal_code, data_row_count,... lgl
## (1): ...45
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
## • `` -> `...45`
human_clin2 <- as.data.frame(human_clin2)
rownames(human_clin2) <- human_clin2$...1
human_clin2 <- human_clin2[,-1]
human_clin2 <- human_clin2[names(imm_clus2),]
human_clin2$`age:ch1` <- sapply(human_clin2$`age:ch1`, function(x) as.numeric(strsplit(x, split = " ")[[1]][1])/12)
human_clin2$immune_subtype <- as.character(imm_clus2)
hc2 <- data.frame(OS_time = as.numeric(human_clin2$OS_time*30), OS_status = as.numeric(human_clin2$OS_status),
                 DFS_time = as.numeric(human_clin2$MFS_time*30), DFS_status = as.numeric(human_clin2$met_status),
                 immune_subtype = human_clin2$immune_subtype,
                 met_at_daig = human_clin2$`group:ch1`,
                 age = human_clin2$`age:ch1`,
                 gender = human_clin2$`gender:ch1`)
hc2$met_at_daig = c("NON-METASTATIC","METASTATIC")[as.numeric(hc2$met_at_daig == "Metastases present at diagnosis")+1]
hc2 <- hc2[!is.na(hc2$OS_status) & !is.na(hc2$DFS_status),]
hc2$necrosis <- NA
surv <- survfit(Surv(OS_time, OS_status) ~ immune_subtype, data = hc2)
diff <- survfit(Surv(OS_time, OS_status) ~ immune_subtype, data = hc2)

p1 <- ggsurvplot(surv, data = hc2,
                  
                  legend.title = "",
                 legend = "right",
                 legend.labs = c("ID","IE","IE-ECM"),
                  conf.int = F,
                  pval = TRUE,
                  risk.table = TRUE,
                  tables.height = 0.2,
                  tables.theme = theme_cleantable(),
                  risk.table.y.text = FALSE,
                 pval.coord = c(0, 0.03),
                  # Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
                  # or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
                  ggtheme = theme(text = element_text(size = 15)) +theme_bw() # Change ggplot2 theme
) + xlab("Time (days)")
print(p1)

surv <- survfit(Surv(DFS_time, DFS_status) ~ immune_subtype, data = hc2)
diff <- survfit(Surv(DFS_time, DFS_status) ~ immune_subtype, data = hc2)

p1 <- ggsurvplot(surv, data = hc2,
                  legend.title = "",
                 legend = "right",
                 #legend.labs = c("ID","IE","IE fibrotic"),
                  conf.int = F,
                  pval = TRUE,
                  risk.table = TRUE,
                  tables.height = 0.2,
                  tables.theme = theme_cleantable(),
                  risk.table.y.text = FALSE,
                 pval.coord = c(0, 0.03),
                  # Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
                  # or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
                  ggtheme = theme(text = element_text(size = 15)) +theme_bw() # Change ggplot2 theme
) + xlab("Time (days)")
print(p1)

GSE39055

expr_human3 <- read_csv("~/Dog/GSE39055_expr.csv")
## New names:
## Rows: 29377 Columns: 38
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 dbl (37): GSM954790, GSM954791, GSM954792, GSM954793, GSM954794,
## GSM954795, ...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
expr_human3 <- as.data.frame(expr_human3)
expr_human3 <- aggregate.data.frame(expr_human3[,2:ncol(expr_human3)], by = list(as.character(expr_human3[,1])), FUN = mean, na.rm = T)
rownames(expr_human3) <- expr_human3[,1]
expr_human3 <- expr_human3[,-1]
#expr_human3 <- as.matrix(log2(expr_human3+1))

set.seed(2425)
cellprop_human3 <- t(MCPcounter.estimate(expression = expr_human3, featuresType = "HUGO_symbols",probesets = read.table("~/Dog/MCP_counter_markers.csv")))
cellprop_human3 <- apply(cellprop_human3, 2, scale)
km.res <- kmeans(cellprop_human3, centers = 3, nstart = 100, iter.max = 500)
imm_clus3 <- rep("ID", nrow(cellprop_human3))
imm_clus3[km.res$cluster == 1] <- "IE"
imm_clus3[km.res$cluster == 2] <- "IE-ECM"
#imm_clus3 <- factor(imm_clus3, levels = c("IE", "IE fibrotic", "ID"))
Heatmap(cellprop_human3, row_split = imm_clus3, cluster_columns = F, cluster_row_slices = F, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")))

names(imm_clus3) <- colnames(expr_human3)

human_clin3 <- read_delim("~/Dog/GSE39055_pheno.tsv", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## Rows: 37 Columns: 51
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (45): title, Metastatic_at_Diag, geo_accession, status, submission_date,...
## dbl  (6): channel_count, taxid_ch1, contact_zip/postal_code, data_row_count,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
human_clin3 <- as.data.frame(human_clin3)
rownames(human_clin3) <- human_clin3$geo_accession

human_clin3 <- human_clin3[names(imm_clus3),]
human_clin3$immune_subtype <- as.character(imm_clus3)
human_clin3$`age:ch1` <- sapply(human_clin3$`age:ch1`, function(x) as.numeric(strsplit(x, split = " ")[[1]][1]))
#human_clin3[human_clin3$`time until first recurrence or latest follow-up (months):ch1` > 0,]
hc3 <- data.frame(OS_time = NA, OS_status = NA,
                 DFS_time = as.numeric(human_clin3$`time until first recurrence or latest follow-up (months):ch1`*30), DFS_status = as.numeric(human_clin3$`recurrence:ch1` == "Y"),
                 immune_subtype = human_clin3$immune_subtype, met_at_daig = human_clin3$Metastatic_at_Diag,
                 age = human_clin3$`age:ch1`,
                 gender = c("M","F")[as.numeric(human_clin3$`gender:ch1` == "female")+1])

hc3$necrosis <- c("<=90%",">90%")[as.numeric(human_clin3$necrosis =="Y")+1]

surv <- survfit(Surv(DFS_time, DFS_status) ~ immune_subtype, data = hc3)
diff <- survdiff(Surv(DFS_time, DFS_status) ~ immune_subtype, data = hc3)

p1 <- ggsurvplot(surv, data = hc3,
                  legend.title = "",
                 legend = "right",
                 #legend.labs = c("ID","IE","IE fibrotic"),
                  conf.int = F,
                  pval = TRUE,
                  risk.table = TRUE,
                  tables.height = 0.2,
                  tables.theme = theme_cleantable(),
                  risk.table.y.text = FALSE,
                 pval.coord = c(0, 0.03),
                  # Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
                  # or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
                  ggtheme = theme(text = element_text(size = 15)) +theme_bw() # Change ggplot2 theme
) + xlab("Time (days)")
print(p1)

GSE16091

expr_human5 <- read_csv("~/Dog/GSE16091_expr.csv")
## Rows: 22283 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): gene
## dbl (34): GSM402718, GSM402719, GSM402720, GSM402721, GSM402722, GSM402723, ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
expr_human5 <- as.data.frame(expr_human5)
expr_human5 <- aggregate.data.frame(expr_human5[,2:ncol(expr_human5)], by = list(as.character(expr_human5[,1])), FUN = mean, na.rm = T)
rownames(expr_human5) <- expr_human5[,1]
expr_human5 <- expr_human5[,-1]

set.seed(2425)
cellprop_human5 <- t(MCPcounter.estimate(expression = expr_human5, featuresType = "HUGO_symbols", probesets = read.table("~/Dog/MCP_counter_markers.csv")))
cellprop_human5 <- apply(cellprop_human5, 2, scale)
#rownames(cellprop_primary) <- colnames(Canine_RNA_SEQ_TPM_All_data)
km.res <- kmeans(cellprop_human5, centers = 3, nstart = 100, iter.max = 500)
imm_clus5 <- rep("ID", nrow(cellprop_human5))
imm_clus5[km.res$cluster == 3] <- "IE"
imm_clus5[km.res$cluster == 2] <- "IE-ECM"
#imm_clus3 <- factor(imm_clus3, levels = c("IE", "IE fibrotic", "ID"))
Heatmap(cellprop_human5, row_split = imm_clus5, cluster_columns = F, cluster_row_slices = F, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")))

names(imm_clus5) <- colnames(expr_human5)

human_clin5 <- read_delim("~/Dog/GSE16091_pheno.tsv", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## Rows: 34 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (30): title, geo_accession, status, submission_date, last_update_date, t...
## dbl  (5): channel_count, taxid_ch1, contact_zip/postal_code, data_row_count,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
human_clin5 <- as.data.frame(human_clin5)
rownames(human_clin5) <- human_clin5$geo_accession

human_clin5 <- human_clin5[names(imm_clus5),]
human_clin5$immune_subtype <- as.character(imm_clus5)
#human_clin3[human_clin3$`time until first recurrence or latest follow-up (months):ch1` > 0,]
hc5 <- data.frame(OS_time = as.numeric(human_clin5$`days-followup:ch1`), OS_status = as.numeric(human_clin5$`alive-or-dead:ch1` == "D"),
                 DFS_time = NA, DFS_status = NA,
                 immune_subtype = human_clin5$immune_subtype, met_at_daig = NA,
                 age = NA,
                 gender = NA,
                 necrosis = NA)

surv <- survfit(Surv(OS_time, OS_status) ~ immune_subtype, data = hc5)
diff <- survdiff(Surv(OS_time, OS_status) ~ immune_subtype, data = hc5)

p1 <- ggsurvplot(surv, data = hc5,
                  legend.title = "",
                 legend = "right",
                 #legend.labs = c("ID","IE","IE fibrotic"),
                  conf.int = F,
                  pval = TRUE,
                  risk.table = TRUE,
                  tables.height = 0.2,
                  tables.theme = theme_cleantable(),
                  risk.table.y.text = FALSE,
                 pval.coord = c(0, 0.03),
                  # Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
                  # or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
                  ggtheme = theme(text = element_text(size = 15)) +theme_bw() # Change ggplot2 theme
) + xlab("Time (days)")
print(p1)

GSE30699

expr_human6 <- read_csv("~/Dog/GSE30699_expr.csv")
## Rows: 48701 Columns: 108
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (1): gene
## dbl (107): GSM761349, GSM761350, GSM761351, GSM761352, GSM761353, GSM761354,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
human_clin6 <- read_delim("~/Dog/GSE30699_pheno.tsv", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## Rows: 107 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (34): title, geo_accession, status, submission_date, last_update_date, t...
## dbl  (3): channel_count, taxid_ch1, data_row_count
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
expr_human6 <- as.data.frame(expr_human6)
expr_human6 <- aggregate.data.frame(expr_human6[,2:ncol(expr_human6)], by = list(as.character(expr_human6[,1])), FUN = mean, na.rm = T)
rownames(expr_human6) <- expr_human6[,1]
expr_human6 <- expr_human6[,-1]
expr_human6 <- expr_human6[, grepl("biopsy",human_clin6$source_name_ch1)]

set.seed(2425)
cellprop_human6 <- t(MCPcounter.estimate(expression = expr_human6, featuresType = "HUGO_symbols", probesets = read.table("~/Dog/MCP_counter_markers.csv")))
cellprop_human6 <- apply(cellprop_human6, 2, scale)
km.res <- kmeans(cellprop_human6, centers = 3, nstart = 100, iter.max = 500)
imm_clus6 <- rep("ID", nrow(cellprop_human6))
imm_clus6[km.res$cluster == 3] <- "IE"
imm_clus6[km.res$cluster == 1] <- "IE-ECM"
Heatmap(cellprop_human6, row_split = imm_clus6, cluster_columns = F, cluster_row_slices = F, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")))

names(imm_clus6) <- colnames(expr_human6)

hc6 <- data.frame(OS_time = NA, OS_status = NA,
                 DFS_time = NA, DFS_status = NA,
                 immune_subtype = imm_clus6, met_at_daig = NA,
                 age = NA,
                 gender =  NA,
                 necrosis = NA)

GSE32981

expr_human7 <- read_csv("~/Dog/GSE32981_expr.csv")
## Rows: 12513 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): Group.1
## dbl (23): GSM816942, GSM816943, GSM816944, GSM816945, GSM816946, GSM816947, ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
expr_human7 <- expr_human7[!is.na(expr_human7$Group.1),]
expr_human7 <- as.data.frame(expr_human7)
rownames(expr_human7) <- expr_human7$Group.1
expr_human7 <- expr_human7[,-1]
GSE32981_pheno <- read_delim("~/Dog/GSE32981_pheno.tsv", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## Rows: 23 Columns: 42
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (36): title, geo_accession, status, submission_date, last_update_date, t...
## dbl  (6): channel_count, taxid_ch1, contact_zip/postal_code, data_row_count,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
set.seed(2425)
cellprop_human7 <- t(MCPcounter.estimate(expression = expr_human7, featuresType = "HUGO_symbols", probesets = read.table("~/Dog/MCP_counter_markers.csv")))
cellprop_human7 <- apply(cellprop_human7, 2, scale)
km.res <- kmeans(cellprop_human7, centers = 3, nstart = 100, iter.max = 500)
imm_clus7 <- rep("ID", nrow(cellprop_human7))
imm_clus7[km.res$cluster == 1] <- "IE"
imm_clus7[km.res$cluster == 3] <- "IE-ECM"
Heatmap(cellprop_human7, row_split = imm_clus7, cluster_columns = F, cluster_row_slices = F, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")))

names(imm_clus7) <- colnames(expr_human7)
sample_type_7 <- c("Primary","Met")[as.numeric(GSE32981_pheno$`origin:ch1` == "Met")+1]
treatment_stat7 <- c("naive","treated")[as.numeric(GSE32981_pheno$Treated == "Yes")+1]
hc7 <- data.frame(OS_time = NA, OS_status = NA,
                 DFS_time = as.numeric(GSE32981_pheno$MFS)*30, DFS_status = as.numeric(GSE32981_pheno$`developed metastases:ch1` == "Yes"),
                 immune_subtype = imm_clus7, met_at_daig = as.numeric(GSE32981_pheno$MFS) == 0 & !is.na(as.numeric(GSE32981_pheno$MFS)),
                 age = as.numeric(GSE32981_pheno$`age:ch1`),
                 gender =  c("M","F")[as.numeric(GSE32981_pheno$`gender:ch1` == "Female")+1],
                 necrosis = GSE32981_pheno$necrosis)
hc7$met_at_daig <- c("NON-METASTATIC","METASTATIC")[as.numeric(hc7$met_at_daig & !is.na(hc7$met_at_daig)) +1]

GSE33383

expr_human4 <- read_csv("~/Dog/GSE33383_expr.csv")
## New names:
## Rows: 30549 Columns: 100
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 dbl (99): GSM717846, GSM717847, GSM717848, GSM717849, GSM717850,
## GSM717851, ...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
expr_human4 <- as.data.frame(expr_human4)
expr_human4 <- aggregate.data.frame(expr_human4[,2:ncol(expr_human4)], by = list(as.character(expr_human4[,1])), FUN = mean, na.rm = T)
rownames(expr_human4) <- expr_human4[,1]
expr_human4 <- expr_human4[,-1]
GSE33383_pheno <- read_delim("~/Dog/GSE33383_pheno.tsv", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## Rows: 99 Columns: 50
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (46): title, geo_accession, status, submission_date, last_update_date, t...
## dbl  (3): channel_count, taxid_ch1, data_row_count
## lgl  (1): description.1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
expr_human4 <- expr_human4[,grepl("pre-chemotherapy biopsy", GSE33383_pheno$source_name_ch1)]
GSE33383_pheno <-GSE33383_pheno[grepl("pre-chemotherapy biopsy", GSE33383_pheno$source_name_ch1),]

set.seed(2425)
cellprop_human4 <- t(MCPcounter.estimate(expression = expr_human4, featuresType = "HUGO_symbols"))
cellprop_human4 <- apply(cellprop_human4, 2, scale)
#rownames(cellprop_primary) <- colnames(Canine_RNA_SEQ_TPM_All_data)
km.res <- kmeans(cellprop_human4, centers = 3, nstart = 100, iter.max = 500)
imm_clus4 <- rep("ID", nrow(cellprop_human4))
imm_clus4[km.res$cluster == 1] <- "IE"
imm_clus4[km.res$cluster == 3] <- "IE-ECM"
imm_clus4 <- factor(imm_clus4, levels = c("IE", "IE-ECM", "ID"))
ha = rowAnnotation(
    met_within_5yrs = GSE33383_pheno$`metastasis within 5yrs:ch1`,
    col = list("met_within_5yrs" = c("no" = "green", "yes" =  "yellow")
    )
)

names(imm_clus4) <- colnames(expr_human4)

hc44 <- data.frame(OS_time = NA, OS_status = NA,
                 DFS_time = NA, DFS_status = NA,
                 immune_subtype = imm_clus4, met_at_daig = rep("NON-METASTATIC", length(imm_clus4)),
                 age = sapply(GSE33383_pheno$`age:ch1`, function(x) as.numeric(strsplit(x," ")[[1]][1])/12),
                 gender =  c("M","F")[as.numeric(GSE33383_pheno$`gender:ch1` == "F")+1],
                 necrosis = NA)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
ct <- colnames(cellprop_human4)

GSE152048

bulk_exp_scOS <- read_csv("~/Dog/GSE152048_merged_expr.csv")
## New names:
## Rows: 32864 Columns: 12
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 dbl (11): BC2, BC3, BC5, BC6, BC10, BC11, BC16, BC17, BC20, BC21,
## BC22
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
bulk_exp_scOS <- as.data.frame(bulk_exp_scOS)
rownames(bulk_exp_scOS) <- bulk_exp_scOS$...1
bulk_exp_scOS <- bulk_exp_scOS[,-1]
cellprop_human_scOS <- t(MCPcounter.estimate(expression = bulk_exp_scOS, featuresType = "HUGO_symbols"))
cellprop_human_scOS <- apply(cellprop_human_scOS, 2, scale)
newcolnames <- sapply(colnames(cellprop_human_scOS), function(x) gsub(" ","_",x))
names(newcolnames) <- NULL
colnames(cellprop_human_scOS) <- newcolnames
sc_sample_type <- c("Primary","Primary","Primary","Primary","Met","Recurrent","Primary","Primary","Met","Recurrent","Primary")

Build a TME subtype classifier to classify TME of new (human) tumor

samples

library(caret)
## Loading required package: lattice

## 
## Attaching package: 'lattice'

## The following object is masked from 'package:clusterProfiler':
## 
##     dotplot

## 
## Attaching package: 'caret'

## The following object is masked from 'package:purrr':
## 
##     lift

## The following object is masked from 'package:survival':
## 
##     cluster
library(e1071)
library(naivebayes)
## naivebayes 0.9.7 loaded
set.seed(268548)

df <- cbind(data.frame(rbind(cellprop_rnaseq[sample_type == "Primary",], cellprop_nano[sstatus == "Primary Tumor",])), imm_clus = c(imm_clus[sample_type=="Primary"],imm_clus_nano[sstatus=="Primary Tumor"]))
df$imm_clus <- as.factor(df$imm_clus)
colnames(df) <- gsub("\\.","_",colnames(df))
df <- df[,colnames(df)[c(1,3:ncol(df))]]
ind <- c(sample(which(df$imm_clus == "IE"),size = 50,replace = T), sample(which(df$imm_clus == "IE-ECM"),size = 50,replace = T), sample(which(df$imm_clus == "ID"),size = 50,replace = T))

#apply data augmentation
cp1 <- t(apply(df[ind,1:(ncol(df))-1],1,function(x) x + rnorm((ncol(df))-1, mean = 0, sd = 0.01)))
df1 <- cbind(data.frame(cp1), df$imm_clus[ind])
colnames(df1) <- colnames(df)
control <- trainControl(method = "cv", number=10)
nb <- train(imm_clus ~ ., data = df1, "naive_bayes", trainControl = control, tuneGrid = data.frame(usekernel = FALSE, laplace = 0, adjust=0))

Apply trained canine model to human datasets

cellprop_allhuman <- rbind(cellprop_human[,ct],rbind(cellprop_human2[,ct],rbind(cellprop_human3[,ct],rbind(cellprop_human4[,ct], rbind(cellprop_human5[,ct], rbind(cellprop_human6[,ct], cellprop_human7[,ct]))))))

set.seed(2425)
km.res <- kmeans(cellprop_allhuman, centers = 3, nstart = 100, iter.max = 500)
imm_clus_all <- rep("ID", nrow(cellprop_allhuman))
imm_clus_all[km.res$cluster == 1] <- "IE"
imm_clus_all[km.res$cluster == 3] <- "IE-ECM"
imm_clus_all <- factor(imm_clus_all, levels = c("IE", "IE-ECM", "ID"))

human_test <- cbind(data.frame(cellprop_allhuman), imm_clus = imm_clus_all)
colnames(human_test) <- gsub("\\.","_", colnames(human_test))
preds <- predict(nb, human_test)
confusionMatrix(preds, human_test$imm_clus)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  IE IE-ECM  ID
##     IE      42      0   0
##     IE-ECM  50    103  11
##     ID       0     91  97
## 
## Overall Statistics
##                                           
##                Accuracy : 0.6142          
##                  95% CI : (0.5642, 0.6625)
##     No Information Rate : 0.4924          
##     P-Value [Acc > NIR] : 7.796e-07       
##                                           
##                   Kappa : 0.3966          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: IE Class: IE-ECM Class: ID
## Sensitivity             0.4565        0.5309    0.8981
## Specificity             1.0000        0.6950    0.6818
## Pos Pred Value          1.0000        0.6280    0.5160
## Neg Pred Value          0.8580        0.6043    0.9466
## Prevalence              0.2335        0.4924    0.2741
## Detection Rate          0.1066        0.2614    0.2462
## Detection Prevalence    0.1066        0.4162    0.4772
## Balanced Accuracy       0.7283        0.6130    0.7900
sc_merged_subtypes <- predict(nb, cellprop_human_scOS[, colnames(human_test)[1:9]])
rownames(cellprop_human_scOS) <- colnames(bulk_exp_scOS)
Heatmap(cellprop_human_scOS, row_split = sc_merged_subtypes, cluster_columns = F, cluster_row_slices = F, row_gap = unit(1, "mm"), row_title_gp = gpar(col = c("#F8766D","#00BA38", "#619CFF")))

names(sc_merged_subtypes) <- colnames(bulk_exp_scOS)

Merge all human datasets

ds <- c(rep("TARGET", length(imm_clus_target)), rep("GSE21257", length(imm_clus2)), rep("GSE39055", length(imm_clus3)), rep("GSE33383",length(imm_clus4)), rep("GSE16091",length(imm_clus5)), rep("GSE30699",length(imm_clus6)), rep("GSE32981", length(imm_clus7)))

sample_type_bulk <- c(rep("Primary", length(imm_clus_target) + length(imm_clus2) + length(imm_clus3) + length(imm_clus4) + length(imm_clus5) + length(imm_clus6)), sample_type_7)
sample_type_all <- c(sample_type_bulk,sc_sample_type)
treatment_bulk <- c(rep("naive", length(imm_clus_target) + length(imm_clus2) + length(imm_clus3) + length(imm_clus4) + length(imm_clus5) + length(imm_clus6)), c("naive","post-chemo")[as.numeric(treatment_stat7 == "post-chemo")+1])
treatment <- c(treatment_bulk, rep("post-chemo",length(sc_merged_subtypes)))
platform = c(c("microarray","RNASeq")[as.numeric(ds == "TARGET")+1],rep("10x(scRNASeq)", length(sc_merged_subtypes)))
ds <- c(ds, rep("GSE152048", length(sc_merged_subtypes)))

ha = rowAnnotation(
    dataset = ds,
    platform = platform,
    sample = sample_type_all,
    treatment_status = treatment,
    col = list("dataset" = c("TARGET" = "#AF32CD", "GSE21257" =  "cyan","GSE39055" = "#A35711", "GSE33383" = "magenta", "GSE16091" = "#4500B2","GSE30699" = "#BCFF96", "GSE152048" = "#FB4232","GSE32981" = "#6775CD"),
               "platform" = c("microarray" = "#929292",
                              "RNASeq" = "#E2E0E7",
                              "10x(scRNASeq)" = "#3B3BFF"),
               "sample" = c("Primary" = "yellow", "Met" = "darkgreen", "Recurrent" = "darkorange"),
               "treatment_status" = c("naive" = "#EDAB32","post-chemo" = "#B30721"))
    
)
Heatmap(rbind(cellprop_allhuman,cellprop_human_scOS[,c(1,3:10)]), row_split = c(preds,sc_merged_subtypes), cluster_columns = F, cluster_row_slices = F, row_gap = unit(5, "mm"), row_title_gp = gpar(col = c("#00BA38", "#619CFF","#F8766D")), right_annotation = ha)

#chromosomal instability signature
names(imm_clus_all) <- c(names(imm_clus_target),names(imm_clus2), names(imm_clus3), names(imm_clus4))
CIN_1 <- CIN_signature(ee)
CIN_2 <- CIN_signature(expr_human2)
CIN_3 <- CIN_signature(expr_human3)
CIN_4 <- CIN_signature(expr_human4)
CIN_5 <- CIN_signature(expr_human5)
CIN_6 <- CIN_signature(expr_human6)
CIN_7 <- CIN_signature(expr_human7)
CIN_8 <- CIN_signature(bulk_exp_scOS)
CIN_all <- c(CIN_1,CIN_2,CIN_3,CIN_4, CIN_5, CIN_6, CIN_7, CIN_8)
names(CIN_all) <- c(names(imm_clus_all), names(sc_merged_subtypes))
p1 <- ggplot(data = data.frame(subtype = c(preds, sc_merged_subtypes), CIN_score = CIN_all), aes(x = subtype,y=CIN_score)) + geom_boxplot() +theme_bw() + stat_compare_means(method = "anova")
print(p1)

#ggsave(plot = p1, filename = "~/Dog/CIN_human.pdf", width = 4, height = 4)


#type 1 interferon signature
type1_interferon_signature <- function(expr) {
  genes <- c("IFNA1", "IFNA2", "IFNA4", "IFNA5", "IFNA6", "IFNA7", "IFNA8", "IFNA10", "IFNA13", "IFNA14", "IFNA16", "IFNA17", "IFNA21", "IFNW1", "IFNE", "IFNK","IFNB1")
    mat <- as.data.frame(expr)
    mat <- mat[genes,]
    mat <- apply(as.matrix(mat),1,scale)
    scores <- rowMeans(mat, na.rm = T)
    return(scores)
}

IN_1 <- type1_interferon_signature(ee)
IN_2 <- type1_interferon_signature(expr_human2)
IN_3 <- type1_interferon_signature(expr_human3)
IN_4 <- type1_interferon_signature(expr_human4)
IN_5 <- type1_interferon_signature(expr_human5)
IN_6 <- type1_interferon_signature(expr_human6)
IN_7 <- type1_interferon_signature(expr_human7)
IN_8 <- type1_interferon_signature(bulk_exp_scOS)
IN_all <- c(IN_1,IN_2,IN_3,IN_4, IN_5, IN_6, IN_7, IN_8)
names(IN_all) <- c(names(imm_clus_all), names(sc_merged_subtypes))
p1 <- ggplot(data = data.frame(subtype = c(preds, sc_merged_subtypes), IN_score = IN_all), aes(x = subtype,y=IN_score)) + geom_boxplot() +theme_bw() + stat_compare_means( method = "anova")
print(p1)

#ggsave(plot = p1, filename = "~/Dog/IN_human.pdf", width = 4, height = 4)


#Cytotoxic t cell signature (Rooney et al)
CYT_signature_human <- function(expr){
    genes <- c("GZMA","PRF1")
    mat <- as.data.frame(expr)
    mat <- (mat[genes,])
    scores <- sqrt(mat[1,]*mat[2,])
    return(scores)
}
CYT <- list()
CYT[[1]] <- CYT_signature_human(ee)
CYT[[2]] <- CYT_signature_human(expr_human2)
CYT[[3]] <- CYT_signature_human(expr_human3)
CYT[[4]] <- CYT_signature_human(expr_human4)
CYT[[5]] <- CYT_signature_human(expr_human5)
CYT[[6]] <- CYT_signature_human(expr_human6)
CYT[[7]] <- CYT_signature_human(expr_human7)
CYT[[8]] <- CYT_signature_human(bulk_exp_scOS)
CYT_all <- c()
for(i in 1: length(CYT)){
  cyt <-  as.numeric(CYT[[i]])
  names(cyt) <- colnames(CYT[[i]])
  CYT_all <- c(CYT_all,cyt)
}

p1 <- ggplot(data = data.frame(subtype = c(preds, sc_merged_subtypes), CYT_score = CYT_all), aes(x = subtype,y=CYT_score)) + geom_boxplot() +theme_bw() + stat_compare_means( method = "anova")
print(p1) + ylab("Cytotoxicity Score")

#ggsave(plot = p1, filename = "~/Dog/CYT_human.pdf", width = 4, height = 4)


#expression of checkpoints
chkpoint_exp <- function(expr){
  out <- t(log2(data.frame(expr)[c("PDCD1","CD274","CTLA4","TIGIT","LAG3","HAVCR2"),]+1))
  colnames(out) <- c("PDCD1","CD274","CTLA4","TIGIT","LAG3","HAVCR2")
  return(data.frame(apply(out,2,scale)))
}

CP <- list()
CP[[1]] <- chkpoint_exp(ee)
CP[[2]] <- chkpoint_exp(expr_human2)
CP[[3]] <- chkpoint_exp(expr_human3)
CP[[4]] <- chkpoint_exp(expr_human4)
CP[[5]] <- chkpoint_exp(expr_human5)
CP[[6]] <- chkpoint_exp(expr_human6)
CP[[7]] <- chkpoint_exp(expr_human7)
CP[[8]] <- chkpoint_exp(bulk_exp_scOS)
CP_all <- Reduce(rbind, CP)

out <- data.frame(CP_all, subtype= c(preds, sc_merged_subtypes))
out <- out[!is.na(rowSums(out[,1:6])),]
dd_out <- melt(out, id.vars = "subtype")
colnames(dd_out) <- c("subtype","ICI_target","scaled_expression")
dd_out$ICI_target <- as.character(dd_out$ICI_target)
dd_out$subtype <- factor(dd_out$subtype, levels = c("IE","IE-ECM","ID"))
dd_out$ICI_target[dd_out$ICI_target == "PDCD1"] = "PD1"
dd_out$ICI_target[dd_out$ICI_target == "CD274"] = "PDL1"
dd_out$ICI_target[dd_out$ICI_target == "HAVCR2"] = "TIM3"
#p1 <- ggplot(dd_out, aes(x = subtype, y = log2TPM)) +
#  facet_wrap(~ICI_target, scales = "free_y") + stat_compare_means(comparisons = list(c("IE","IE-ECM"),c("IE-ECM","ID"),c("IE","ID")), method = "wilcox", label = "p.signif") + theme_bw() + theme(text = element_text(size = 15)) + xlab("ICI target") + ylab("expression (log2(TPM)")

p1 <- ggboxplot(dd_out, x = "subtype", y="scaled_expression", facet.by = "ICI_target") + theme_bw() + theme(text = element_text(size = 15)) + stat_compare_means(method = "anova") + xlab("") + ylab("normalized expression")
print(p1)

#ggsave(plot = p1,filename = "~/Dog/human_immune_checkpoints.pdf", width = 6, height = 8)

Evaluate progonostic power of TME subtype predictons using matched progression free survival outcome data

hc4 <- rbind(rbind(hc,hc2),rbind(hc3,rbind(hc44,rbind(hc5,rbind(hc6,hc7)))))
hc4$immune_subtype_1 <- imm_clus_all
hc4$ml_class <- preds
hc4$cyt <- CYT_all[names(imm_clus_all)]
hc4 <- hc4[sample_type_bulk == "Primary",]


surv <- survfit(Surv(OS_time, OS_status) ~ ml_class, data = hc4[!is.na(hc4$OS_time),])
diff <- survdiff(Surv(OS_time, OS_status) ~ ml_class, data = hc4[!is.na(hc4$OS_time),])

p1 <- ggsurvplot(surv, data = hc4[!is.na(hc4$OS_time),],
                  
                  legend.title = "",
                 legend = "right",
                  conf.int = F,
                  pval = TRUE,
                  risk.table = TRUE,
                  tables.height = 0.2,
                  tables.theme = theme_cleantable(),
                  risk.table.y.text = FALSE,
                 pval.coord = c(0, 0.03),
                 xlim = c(0,4500),
                  # Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
                  # or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
                 palette = c("#882255","#CC6677","#88CCEE"),
                  ggtheme = theme(text = element_text(size = 15)) +theme_bw() # Change ggplot2 theme
) + xlab("Time (days)")
#print(p1)
#ggsave(filename = "~/Dog/Human_OS_pred_primary.pdf", plot = p1$plot, width = 7, height = 4)


surv <- survfit(Surv(DFS_time, DFS_status) ~ ml_class, data = hc4[!is.na(hc4$DFS_time),])
diff <- survdiff(Surv(DFS_time, DFS_status) ~ ml_class, data = hc4[!is.na(hc4$DFS_time),])

p1 <- ggsurvplot(surv, data = hc4[!is.na(hc4$DFS_time),],

                  legend.title = "",
                 legend = "right",
                  conf.int = F,
                  pval = TRUE,
                  risk.table = TRUE,
                  tables.height = 0.2,
                  tables.theme = theme_cleantable(),
                  risk.table.y.text = FALSE,
                 pval.coord = c(0, 0.03),
                 xlim = c(0,4500),
                  # Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
                  # or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
                 palette = c("#882255","#CC6677","#88CCEE"),
                  ggtheme = theme(text = element_text(size = 15)) +theme_bw() # Change ggplot2 theme
) + xlab("Time (days)")
print(p1)

#ggsave(filename = "~/Dog/Human_DFS_primary.pdf", plot = p1$plot, width = 7, height = 4)
#ggsave(p1$plot, filename = "~/Dog/aim3_human_DFS.pdf")

#plot proprtion of patients developing mets in 5 years for each TME subtype (GSE33383)

pp <- as.character(preds)
names(pp)<- names(imm_clus_all)
dd_m <- as.matrix(table(pp[names(imm_clus4)][!is.na(GSE33383_pheno$`metastasis within 5yrs:ch1`)], GSE33383_pheno$`metastasis within 5yrs:ch1`[!is.na(GSE33383_pheno$`metastasis within 5yrs:ch1`)]))
dd_m <- (dd_m/rowSums(dd_m))*100
dd_m <- melt(dd_m)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
colnames(dd_m) <- c("subtype","mets_within_5yrs","percentage")
dd_m$subtype <- factor(dd_m$subtype,levels = c("IE","IE-ECM","ID"))
p1 <- ggplot(as.data.frame(dd_m), aes(fill = mets_within_5yrs,
                      y = percentage, x = subtype))+
geom_bar(position = "fill", stat = "identity")+
ggtitle("Metastasis rates")+
theme(plot.title = element_text(hjust = 0.5)) + theme_bw()

print(p1)

#ggsave(filename = "~/Dog/Human_5yr_mets.pdf", plot = p1, width = 5, height = 4)


chisq.test(table(pp[names(imm_clus4)][!is.na(GSE33383_pheno$`metastasis within 5yrs:ch1`)], GSE33383_pheno$`metastasis within 5yrs:ch1`[!is.na(GSE33383_pheno$`metastasis within 5yrs:ch1`)]))
## Warning in
## chisq.test(table(pp[names(imm_clus4)][!is.na(GSE33383_pheno$`metastasis within
## 5yrs:ch1`)], : Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  table(pp[names(imm_clus4)][!is.na(GSE33383_pheno$`metastasis within 5yrs:ch1`)],     GSE33383_pheno$`metastasis within 5yrs:ch1`[!is.na(GSE33383_pheno$`metastasis within 5yrs:ch1`)])
## X-squared = 7.7036, df = 2, p-value = 0.02124

Evaluate prognostic power of cytolytic sores

hc4$cyt_score <- c("low","high")[as.numeric(hc4$cyt > median(hc4$cyt, na.rm = TRUE))+1]
surv <- survfit(Surv(OS_time, OS_status) ~ cyt_score, data = hc4[!is.na(hc4$OS_time),])
diff <- survdiff(Surv(OS_time, OS_status) ~ cyt_score, data = hc4[!is.na(hc4$OS_time),])

p1 <- ggsurvplot(surv, data = hc4[!is.na(hc4$OS_time),],
                  
                  legend.title = "",
                 legend = "right",
                  conf.int = F,
                  pval = TRUE,
                  risk.table = TRUE,
                  tables.height = 0.2,
                  tables.theme = theme_cleantable(),
                  risk.table.y.text = FALSE,
                 pval.coord = c(0, 0.03),
                 xlim = c(0,4500),
                  # Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
                  # or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
                 palette = c("#882255","#88CCEE","#CC6677"),
                  ggtheme = theme(text = element_text(size = 15)) +theme_bw() # Change ggplot2 theme
) + xlab("Time (days)")
#print(p1)
#ggsave(filename = "~/Dog/Human_OS_pred_primary.pdf", plot = p1$plot, width = 7, height = 4)


surv <- survfit(Surv(DFS_time, DFS_status) ~ cyt_score, data = hc4[!is.na(hc4$DFS_time),])
diff <- survdiff(Surv(DFS_time, DFS_status) ~ cyt_score, data = hc4[!is.na(hc4$DFS_time),])

p1 <- ggsurvplot(surv, data = hc4[!is.na(hc4$DFS_time),],

                  legend.title = "",
                 legend = "right",
                  conf.int = F,
                  pval = TRUE,
                  risk.table = TRUE,
                  tables.height = 0.2,
                  tables.theme = theme_cleantable(),
                  risk.table.y.text = FALSE,
                 pval.coord = c(0, 0.03),
                 xlim = c(0,4500),
                  # Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
                  # or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
                 palette = c("#882255","#88CCEE", "#CC6677"),
                  ggtheme = theme(text = element_text(size = 15)) +theme_bw() # Change ggplot2 theme
) + xlab("Time (days)")
print(p1)

Run multivariate cox proportional hazards analysis to account for confounding effects

cph_dat <- data.frame(time = hc4$DFS_time,
subtype = hc4$ml_class,
cyt_score = hc4$cyt,
status = hc4$DFS_status,
stage = hc4$met_at_daig,
gender = hc4$gender,
age = scale(hc4$age, center = T, scale = T), necrosis = hc4$necrosis)
cph_dat <- cph_dat[!is.na(cph_dat$necrosis) & !is.na(hc4$DFS_time),]
cph_dat$subtype <- relevel(as.factor(cph_dat$subtype), ref = "ID")
gender <- relevel(as.factor(cph_dat$gender), ref = "M")
cph_dat$stage[cph_dat$stage == "NON-METASTATIC"] <- "LOCALIZED"
cph_dat$stage <- relevel(as.factor(cph_dat$stage), ref = "LOCALIZED")
cph_dat$predictor <- rep(3, nrow(cph_dat))
cph_dat$predictor[cph_dat$subtype == "IE-ECM"] <- 2
cph_dat$predictor[cph_dat$subtype == "IE"] <- 1

fit <- coxph(Surv(time, status) ~ age + gender + stage + necrosis + subtype, data = cph_dat)
p2 <-ggforest(fit, data = cph_dat,fontsize = 0.7, noDigits = 2) + theme(text = element_text(size = 25))
print(p2)

#ggsave(plot = p2, filename = "~/Dog/human_cph_results_cyt.pdf", width = 5, height = 6)

plot distribution of mcp counter scores for both canine and human samples

dd_rnaseq <- data.frame(cellprop_rnaseq[,c(1:10)], subtype = imm_clus)
dd_rnaseq <- melt(dd_rnaseq, id.vars = "subtype")
p1 <- ggboxplot(dd_rnaseq, x = "subtype", y="value", facet.by = "variable",) + theme_bw() + theme(text = element_text(size = 15)) + stat_compare_means( method = "anova") + xlab("") + ylab("MCP counter score (normalized)")
print(p1)

#ggsave(plot = p1, filename = "~/Dog/caninernaseq_mcpcounter_boxplots.pdf", width = 14, height = 9)

dd_rnaseq <- data.frame(cellprop_nano[,c(1:10)], subtype = imm_clus_nano)
dd_rnaseq <- melt(dd_rnaseq, id.vars = "subtype")
p1 <- ggboxplot(dd_rnaseq, x = "subtype", y="value", facet.by = "variable") + theme_bw() + theme(text = element_text(size = 15)) + stat_compare_means(method = "anova") + xlab("") + ylab("MCP counter score (normalized)")
print(p1)

#ggsave(plot = p1, filename = "~/Dog/caninenano_mcpcounter_boxplots.pdf", width = 14, height = 9)

dd_rnaseq <- data.frame(rbind(cellprop_rnaseq[,c(1,3:10)],cellprop_nano[,c(1,3:10)]), subtype = c(imm_clus,imm_clus_nano))
dd_rnaseq <- melt(dd_rnaseq, id.vars = "subtype")
p1 <- ggboxplot(dd_rnaseq, x = "subtype", y="value", facet.by = "variable") + theme_bw() + theme(text = element_text(size = 15)) + stat_compare_means(method = "anova") + xlab("") + ylab("MCP counter score (normalized)")
print(p1)

#ggsave(plot = p1, filename = "~/Dog/allcanine_mcpcounter_boxplots.pdf", width = 11, height = 9)
dd_rnaseq <- data.frame(rbind(cellprop_allhuman), subtype = preds)
dd_rnaseq <- melt(dd_rnaseq, id.vars = "subtype")
p1 <- ggboxplot(dd_rnaseq, x = "subtype", y="value", facet.by = "variable") + theme_bw() + theme(text = element_text(size = 15)) + stat_compare_means(method = "anova") + xlab("") + ylab("MCP counter score (normalized)")
print(p1)

#ggsave(plot = p1, filename = "~/Dog/allhuman_mcpcounter_boxplots.pdf", width = 11, height = 9)

read single cell celltype fractions from Zhou et al, Nat Comm 2020 and compare against predicted TME subtypes

human_OS_sc_celltypes <- read_excel("~/Dog/sourcedat_human_OS_singlecell.xlsx", sheet = "Figure S5a", skip = 2)
## New names:
## • `` -> `...1`
human_OS_sc_celltypes$...1 <- sapply(human_OS_sc_celltypes$...1, function(x) strsplit(x,split = "\\.")[[1]][2])
human_OS_sc_celltypes <- human_OS_sc_celltypes[!is.na(human_OS_sc_celltypes$...1),]
names(human_OS_sc_celltypes$...1) <- NULL
human_OS_sc_celltypes <- as.data.frame(human_OS_sc_celltypes)
rownames(human_OS_sc_celltypes) <- human_OS_sc_celltypes$...1
human_OS_sc_celltypes <- human_OS_sc_celltypes[,-1]
human_OS_sc_celltypes <- as.data.frame(t(human_OS_sc_celltypes))

scOS_cancercell <- read_excel("~/Dog/sourcedat_human_OS_singlecell.xlsx", sheet = "Figure S6b", skip = 2)
## New names:
## • `` -> `...1`
scOS_cancercell <- as.data.frame(scOS_cancercell)
rownames(scOS_cancercell) <- scOS_cancercell$...1
scOS_cancercell <- scOS_cancercell[,-1]
scOS_cancercell <- as.data.frame(scOS_cancercell[rownames(human_OS_sc_celltypes),])


scOS_chondro <- read_excel("~/Dog/sourcedat_human_OS_singlecell.xlsx", sheet = "Figure S6g", skip = 1)
scOS_chondro <- as.data.frame(scOS_chondro)
rownames(scOS_chondro) <- scOS_chondro$Patient
scOS_chondro <- scOS_chondro[,-1]
scOS_chondro <- scOS_chondro[,-5]
scOS_chondro <- as.data.frame(scOS_chondro[rownames(human_OS_sc_celltypes),])


scOS_osteoclast <- read_excel("~/Dog/sourcedat_human_OS_singlecell.xlsx", sheet = "Figure S11c", skip = 1)
scOS_osteoclast <- as.data.frame(scOS_osteoclast)
rownames(scOS_osteoclast) <- scOS_osteoclast$`Cell type`
scOS_osteoclast <- scOS_osteoclast[,-1]
scOS_osteoclast <- t(scOS_osteoclast)
scOS_osteoclast <- as.data.frame(scOS_osteoclast[rownames(human_OS_sc_celltypes),])

scOS_MSC <- read_excel("~/Dog/sourcedat_human_OS_singlecell.xlsx", sheet = "Figure S14a", skip = 1)
scOS_MSC <- as.data.frame(scOS_MSC)
rownames(scOS_MSC) <- scOS_MSC$`Cell type`
scOS_MSC<- scOS_MSC[,-1]
scOS_MSC <- t(scOS_MSC)
scOS_MSC <- as.data.frame(scOS_MSC[rownames(human_OS_sc_celltypes),])

scOS_CAF <- read_excel("~/Dog/sourcedat_human_OS_singlecell.xlsx", sheet = "Figure S14d", skip = 1)
scOS_CAF <- as.data.frame(scOS_CAF)
rownames(scOS_CAF) <- scOS_CAF$`Cell type`
scOS_CAF <- scOS_CAF[,-1]
scOS_CAF <- t(scOS_CAF)
scOS_CAF <- as.data.frame(scOS_CAF[rownames(human_OS_sc_celltypes),])

scOS_Myeloid <- read_excel("~/Dog/sourcedat_human_OS_singlecell.xlsx", sheet = "Figure S15b", skip = 1)
scOS_Myeloid <- as.data.frame(scOS_Myeloid)
rownames(scOS_Myeloid) <- scOS_Myeloid$`Cell type`
scOS_Myeloid <- scOS_Myeloid[,-1]
scOS_Myeloid <- t(scOS_Myeloid)
scOS_Myeloid <- as.data.frame(scOS_Myeloid[rownames(human_OS_sc_celltypes),])


scOS_TIL <- read_excel("~/Dog/sourcedat_human_OS_singlecell.xlsx", sheet = "Figure S16b", skip = 1)
scOS_TIL <- as.data.frame(scOS_TIL)
rownames(scOS_TIL) <- scOS_TIL$Patient
scOS_TIL <- scOS_TIL[,-1]
scOS_TIL <- as.data.frame(scOS_TIL[rownames(human_OS_sc_celltypes),])


total <- rowSums(human_OS_sc_celltypes)
scOS_cancercell$Osteoblastic_1 <- scOS_cancercell$Osteoblastic_1/total
scOS_cancercell$Osteoblastic_2 <- scOS_cancercell$Osteoblastic_2/total
scOS_cancercell$Osteoblastic_3 <- scOS_cancercell$Osteoblastic_3/total
scOS_cancercell$Osteoblastic_4 <- scOS_cancercell$Osteoblastic_4/total
scOS_cancercell$Osteoblastic_5 <- scOS_cancercell$Osteoblastic_5/total
scOS_cancercell$Osteoblastic_6 <- scOS_cancercell$Osteoblastic_6/total

scOS_cancercell$`Chondroblastic OS` <- scOS_cancercell$`Chondroblastic OS`/total
scOS_chondro$Chondro_Proli <- scOS_chondro$Chondro_Proli/total
scOS_chondro$Chondro_hyper_1 <- scOS_chondro$Chondro_hyper_1/total
scOS_chondro$Chondro_hyper_2 <- scOS_chondro$Chondro_hyper_2/total
scOS_chondro$Chondro_trans_diff <- scOS_chondro$Chondro_trans_diff/total

scOS_osteoclast$OC_progenitor <- scOS_osteoclast$OC_progenitor/total
scOS_osteoclast$OC_immature <- scOS_osteoclast$OC_immature/total
scOS_osteoclast$OC_mature <- scOS_osteoclast$OC_mature/total

scOS_CAF$`COL14A1+ fibroblast` <- scOS_CAF$`COL14A1+ fibroblast` / total
scOS_CAF$`Smooth muscle cell` <- scOS_CAF$`Smooth muscle cell` / total
scOS_CAF$Myofibroblast <- scOS_CAF$Myofibroblast / total

scOS_MSC$`NT5E+MSC` <- scOS_MSC$`NT5E+MSC` / total
scOS_MSC$`WISP2+MSC` <- scOS_MSC$`WISP2+MSC` / total
scOS_MSC$`CLEC11A+MSC` <- scOS_MSC$`CLEC11A+MSC` / total

scOS_Myeloid$`CD14+_monocytes` <- scOS_Myeloid$`CD14+_monocytes` / total
scOS_Myeloid$`CXCL8+_monocytes` <- scOS_Myeloid$`CXCL8+_monocytes` / total
scOS_Myeloid$M2_TAM <- scOS_Myeloid$M2_TAM / total
scOS_Myeloid$M1_macrophage <- scOS_Myeloid$M1_macrophage / total
scOS_Myeloid$`FABP4+_macrophage` <- scOS_Myeloid$`FABP4+_macrophage` / total
scOS_Myeloid$Neutrophil <- scOS_Myeloid$Neutrophil / total
scOS_Myeloid$`CD14+/CD163+_DC` <- scOS_Myeloid$`CD14+/CD163+_DC` / total
scOS_Myeloid$`CD1C+_DC` <- scOS_Myeloid$`CD1C+_DC` / total
scOS_Myeloid$`CD141+/CLEC9A+_DC` <- scOS_Myeloid$`CD141+/CLEC9A+_DC` / total

scOS_TIL$`CD4+T` <- scOS_TIL$`CD4+T` / total
scOS_TIL$`CD4-/CD8-` <- scOS_TIL$`CD4-/CD8-` / total
scOS_TIL$`T-reg` <- scOS_TIL$`T-reg` / total
scOS_TIL$`CXCL13+T` <- scOS_TIL$`CXCL13+T` / total
scOS_TIL$`CD8+T` <- scOS_TIL$`CD8+T` / total
scOS_TIL$NKT <- scOS_TIL$NKT / total
scOS_TIL$NK <- scOS_TIL$NK / total
scOS_TIL$B_cell <- scOS_TIL$B_cell / total
scOS_TIL$`Proliferating T` <- scOS_TIL$`Proliferating T` / total


scOS_detailed <- list()
scOS_detailed[[1]] <- scOS_cancercell
scOS_detailed[[2]] <- scOS_chondro
scOS_detailed[[3]] <- scOS_osteoclast
scOS_detailed[[4]] <- scOS_TIL
scOS_detailed[[5]] <- scOS_Myeloid
scOS_detailed[[6]] <- scOS_CAF
scOS_detailed[[7]] <- data.frame(Pericytes = human_OS_sc_celltypes$Pericytes) / total
scOS_detailed[[8]] <- scOS_MSC
scOS_detailed[[9]] <- data.frame(Myoblast = human_OS_sc_celltypes$Myoblast) / total
scOS_detailed[[10]] <- data.frame(Endothelial = human_OS_sc_celltypes$Endothelial) / total

scOS_detailed_all <- cbind(scOS_detailed[[1]], scOS_detailed[[2]],
                           scOS_detailed[[3]], scOS_detailed[[4]],
                           scOS_detailed[[5]], scOS_detailed[[6]],
                           scOS_detailed[[7]], scOS_detailed[[8]],
                           scOS_detailed[[9]], scOS_detailed[[10]])

#plot results
ha = rowAnnotation(
    sample = sc_sample_type,
    subtype = sc_merged_subtypes,
    col = list("sample" = c("Primary" = "yellow", "Met" =  "darkgreen", "Recurrent" = "orange"),
               "subtype" = c("IE" = "#882255", "IE-ECM" =  "#CC6677", "ID" = "#88CCEE")
    )
)

aa <- apply(scOS_detailed_all[names(sc_merged_subtypes),],2,scale)
rownames(aa) <- names(sc_merged_subtypes)

p1 <- Heatmap(aa, row_split = sc_merged_subtypes, cluster_columns = F, cluster_row_slices = F, cluster_rows = F, row_gap = unit(1, "mm"), row_title_gp = gpar(col = c("#F8766D","#00BA38", "#619CFF")), right_annotation = ha)
print(p1)

#pdf("~/Dog/scOS_heatmap_extended.pdf", width = 12, height = 7)
#print(p1)
#dev.off()



aa <- human_OS_sc_celltypes[names(sc_merged_subtypes),]
aa <- human_OS_sc_celltypes[names(sc_merged_subtypes),]/rowSums(human_OS_sc_celltypes[names(sc_merged_subtypes),])
aa <- apply(aa,2,scale)
rownames(aa) <- names(sc_merged_subtypes)
Heatmap(aa, row_split = sc_merged_subtypes, cluster_columns = F, cluster_row_slices = F, cluster_rows = F,row_gap = unit(1, "mm"), row_title_gp = gpar(col = c("#F8766D","#00BA38", "#619CFF")), right_annotation = ha)

#pdf("~/Dog/scOS_heatmap.pdf")
#Heatmap(aa, row_split = sc_merged_subtypes, cluster_columns = F, cluster_row_slices = F, cluster_rows = F,row_gap = unit(1, "mm"), row_title_gp = gpar(col = c("#F8766D","#00BA38", "#619CFF")), right_annotation = ha)
#dev.off()

dd_rnaseq <- data.frame(scOS_detailed_all[names(sc_merged_subtypes),], subtype = sc_merged_subtypes)
dd_rnaseq <- melt(dd_rnaseq, id.vars = "subtype")
p1 <- ggboxplot(dd_rnaseq, x = "subtype", y="value", facet.by = "variable") + theme_bw() + theme(text = element_text(size = 15)) + stat_compare_means(comparisons = list(c("IE","IE-ECM"),c("IE-ECM","ID"),c("IE","ID")), method = "wilcox", label = "p.signif") + xlab("") + ylab("proportions")
print(p1)
## Warning in wilcox.test.default(0, c(0, 0, 0.000821900486817981, 0,
## 6.94251596778673e-05, : cannot compute exact p-value with ties

## Warning in wilcox.test.default(c(0.00011887779362815, 0.00690483338336836, :
## cannot compute exact p-value with ties

## Warning in wilcox.test.default(0, c(0, 0, 0.0832858607724735, 0), paired =
## FALSE): cannot compute exact p-value with ties

## Warning in wilcox.test.default(0, c(0, 0, 0, 0, 0, 0), paired = FALSE): cannot
## compute exact p-value with ties

## Warning in wilcox.test.default(c(0, 0, 0.0832858607724735, 0), c(0, 0, 0, :
## cannot compute exact p-value with ties

## Warning in wilcox.test.default(0, c(0.000356633380884451, 0, 0.191409365386806,
## : cannot compute exact p-value with ties

## Warning in wilcox.test.default(0, c(0, 0.00137589433131536,
## 0.000632231143706139, : cannot compute exact p-value with ties

## Warning in wilcox.test.default(c(0.000356633380884451, 0, 0.191409365386806, :
## cannot compute exact p-value with ties

## Warning in wilcox.test.default(0.152477763659466, c(0.0306704707560628, :
## cannot compute exact p-value with ties

## Warning in wilcox.test.default(c(0.0306704707560628, 0.0054037826478535, :
## cannot compute exact p-value with ties

## Warning in wilcox.test.default(0.00381194409148666, c(0.00855920114122682, :
## cannot compute exact p-value with ties

## Warning in wilcox.test.default(c(0.00855920114122682, 0, 0, 0.003397341211226:
## cannot compute exact p-value with ties

## Warning in wilcox.test.default(0, c(0.000713266761768902, 0, 0, 0), paired =
## FALSE): cannot compute exact p-value with ties

## Warning in wilcox.test.default(c(0.000713266761768902, 0, 0, 0),
## c(0.000755144421370587, : cannot compute exact p-value with ties

## Warning in wilcox.test.default(0.0986022871664549, c(0, 0, 0, 0, 0,
## 0.00034965034965035: cannot compute exact p-value with ties

## Warning in wilcox.test.default(c(0.00546837850689491, 0, 0.000455736584254301,
## : cannot compute exact p-value with ties

## Warning in wilcox.test.default(0.000762388818297332, c(0.00011887779362815, :
## cannot compute exact p-value with ties

## Warning in wilcox.test.default(c(0.00011887779362815, 0, 0.000227868292127151,
## : cannot compute exact p-value with ties

## Warning in wilcox.test.default(0, c(1.41319293865906e-06, 0.000684958606424497,
## : cannot compute exact p-value with ties

## Warning in wilcox.test.default(0, c(0, 0, 0, 0, 0, 0), paired = FALSE): cannot
## compute exact p-value with ties

## Warning in wilcox.test.default(c(1.41319293865906e-06, 0.000684958606424497, :
## cannot compute exact p-value with ties

## Warning in wilcox.test.default(0, c(0.000188786105342647, 0,
## 0.000189669343111842, : cannot compute exact p-value with ties

## Warning in wilcox.test.default(c(0.000356633380884451, 0.000300210147102972, :
## cannot compute exact p-value with ties

## Warning in wilcox.test.default(c(0, 0.000300210147102972, 0.00478523413467016,
## : cannot compute exact p-value with ties

## Warning in wilcox.test.default(0, c(0, 0.032422695887121, 0,
## 0.000295420974889217: cannot compute exact p-value with ties

## Warning in wilcox.test.default(0, c(0, 0, 0.000126446228741228, 0, 0, 0), :
## cannot compute exact p-value with ties

## Warning in wilcox.test.default(c(0, 0.032422695887121, 0, 0.000295420974889217:
## cannot compute exact p-value with ties