@@ -26,32 +26,32 @@ mergeSamples<-function(){
26
26
distinct()
27
27
28
28
# ##########################
29
- # # CCLE Cell Line data
29
+ # # Broad Sanger data
30
30
# # We have many more cancer types here, so we try to map what we have to CPTAC names and then adjust the rest
31
31
# #
32
32
# ##########################
33
- depmap <- readr :: read_csv(' depmap_samples .csv' )| >
33
+ broad_sanger <- readr :: read_csv(' broad_sanger_samples .csv' )| >
34
34
dplyr :: mutate(`Cell line cancer type` = cancer_type )| >
35
35
mutate(sampleSource = ' CCLE' )
36
36
37
- allec <- grep(' Endometrial' ,depmap $ `Cell line cancer type` )
38
- depmap $ `Cell line cancer type` [allec ]<- ' Uterine corpus endometrial carcinoma'
37
+ allec <- grep(' Endometrial' ,broad_sanger $ `Cell line cancer type` )
38
+ broad_sanger $ `Cell line cancer type` [allec ]<- ' Uterine corpus endometrial carcinoma'
39
39
40
- depmap <- depmap | >
40
+ broad_sanger <- broad_sanger | >
41
41
left_join(cmaps )
42
42
43
43
# #first we collect the names of the cancers that are NOT in CPTAC
44
- other_cans <- which(is.na(depmap $ `CPTAC Cancer type` ))
45
- depmap $ `CPTAC Cancer type` [other_cans ]<- depmap $ `Cell line cancer type` [other_cans ]
46
- depmap <- depmap | >
44
+ other_cans <- which(is.na(broad_sanger $ `CPTAC Cancer type` ))
45
+ broad_sanger $ `CPTAC Cancer type` [other_cans ]<- broad_sanger $ `Cell line cancer type` [other_cans ]
46
+ broad_sanger <- broad_sanger | >
47
47
dplyr :: select(improve_sample_id ,`CPTAC Cancer type` ,model_type ,species ,sampleSource )| >
48
48
distinct()
49
49
50
50
# then we rename the NA values to 'Other' if we want
51
- # other_cans<-which(is.na(depmap $`CPTAC Cancer type`))
52
- # depmap $`CPTAC Cancer type`[other_cans]<-'Other'
51
+ # other_cans<-which(is.na(broad_sanger $`CPTAC Cancer type`))
52
+ # broad_sanger $`CPTAC Cancer type`[other_cans]<-'Other'
53
53
# or just remove them
54
- depmap <- depmap | >
54
+ broad_sanger <- broad_sanger | >
55
55
subset(! is.na(`CPTAC Cancer type` ))
56
56
57
57
# ##########################
@@ -61,10 +61,8 @@ mergeSamples<-function(){
61
61
hcmi <- readr :: read_csv(' hcmi_samples.csv' )| >
62
62
dplyr :: rename(id_source = ' other_id_source' )| >
63
63
mutate(species = ' human' )| >
64
- subset(model_type %in% c(' 3D Organoid ' ,' Solid Tissue ' ,' Adherent Cell Line ' ))| >
64
+ subset(model_type %in% c(' organoid ' ,' tumor ' , ' cell line ' ,' Patient derived xenograft ' ))| >
65
65
dplyr :: mutate(`HCMI Cancer type` = cancer_type ,`HCMI Common name` = common_name )| >
66
- dplyr :: mutate(model_type = stringr :: str_replace_all(model_type ,' Solid Tissue' ,' Tumor' ))| >
67
- dplyr :: mutate(model_type = stringr :: str_replace_all(model_type ,' Adherent Cell Line' ,' cell line' ))| >
68
66
left_join(cmaps )| >
69
67
mutate(sampleSource = ' HCMI' )| >
70
68
dplyr :: select(improve_sample_id ,`CPTAC Cancer type` ,model_type ,species ,sampleSource )| >
@@ -80,18 +78,23 @@ mergeSamples<-function(){
80
78
baml <- readr :: read_csv(" beataml_samples.csv" )| >
81
79
mutate(cancer_type = ' Acute myeloid leukemia' )| >
82
80
mutate(species = ' Human' )| >
83
- mutate(model_type = ' Tumor ' )| >
81
+ mutate(model_type = ' tumor ' )| >
84
82
mutate(sampleSource = ' BeatAML' )| >
85
83
dplyr :: select(improve_sample_id ,species ,cancer_type ,sampleSource ,model_type )| >
86
84
distinct()
87
85
# ##########################
88
- # # TCGA SAMPLE DATA
86
+ # # MPNST SAMPLE DATA
89
87
# # TBD
90
88
# ##########################
91
-
89
+ mpnst <- readr :: read_csv(" mpnst_samples.csv" )| >
90
+ mutate(cancer_type = ' Neurofibromatosis' )| >
91
+ mutate(species = ' Human' )| >
92
+ mutate(sampleSource = ' MPNST' )| >
93
+ dplyr :: select(improve_sample_id ,species ,cancer_type ,sampleSource ,model_type )| >
94
+ distinct()
92
95
93
96
# #now we join thomdelsem into a single table, with cancer type
94
- fulldat <<- rbind(cptac ,depmap ,hcmi )| >
97
+ fulldat <<- rbind(cptac ,broad_sanger ,hcmi )| >
95
98
dplyr :: rename(cancer_type = `CPTAC Cancer type` )| >
96
99
subset()
97
100
@@ -100,6 +103,11 @@ mergeSamples<-function(){
100
103
distinct()| >
101
104
rbind(baml )
102
105
106
+ fulldat <- fulldat | >
107
+ dplyr :: select(improve_sample_id ,species ,cancer_type ,sampleSource ,model_type )| >
108
+ distinct()| >
109
+ rbind(mpnst )
110
+
103
111
models <- fulldat | >
104
112
group_by(cancer_type )| >
105
113
summarize(num_models = n_distinct(model_type ))| >
@@ -126,67 +134,76 @@ stats<-fulldat|>
126
134
subset(model_type != ' Not Reported' )| >
127
135
subset(numSamps > 1 )
128
136
129
- color_palette <- brewer.pal(n = 3 , name = " Set2" )
137
+ color_palette <- brewer.pal(n = 4 , name = " Set2" )
130
138
131
139
# Assign colors to the model types
132
- names(color_palette ) <- c(" Tumor " , " cell line" , " 3D Organoid " )
133
- background_color <- " #E0F2F1 "
140
+ names(color_palette ) <- c(" tumor " , " cell line" , " organoid " , ' Patient derived xenograft ' )
141
+
134
142
fig0 <- ggplot(stats ,aes(x = cancer_type ,y = numSamps ,fill = model_type ))+
135
143
geom_bar(stat = ' identity' ,position = ' dodge' )+
136
144
theme(axis.text.x = element_text(angle = 45 , hjust = 1 ),
137
- plot.background = element_rect(fill = background_color , color = background_color ),
138
- legend.background = element_rect(fill = background_color , color = background_color ))+
145
+ plot.background = element_rect(fill = background_color , color = background_color ),
146
+ legend.background = element_rect(fill = background_color , color = background_color ))+
139
147
scale_y_log10()+ scale_fill_manual(values = color_palette )+
140
148
ggtitle(' Samples by tumor type' )
141
149
142
150
print(fig0 )
143
- ggsave(' Fig0_Overview.png' ,fig0 ,height = 9 ,width = 12 , bg = background_color )
151
+ ggsave(' Fig0_Overview.png' ,fig0 ,height = 8 ,width = 10 )
144
152
145
153
# Subset data for each type
146
154
data_type1 <- subset(stats , sampleSource == ' HCMI' )
147
155
data_type2 <- subset(stats , sampleSource == ' BeatAML' )
148
156
data_type3 <- subset(stats , sampleSource == ' CPTAC' )
149
157
data_type4 <- subset(stats , sampleSource == ' CCLE' )
158
+ data_type5 <- subset(stats , sampleSource == ' MPNST' )
150
159
151
- # Create separate plots for each type
160
+ # Create separate plots for each type, with colorblind-friendly colors
152
161
background_color <- " #E0F2F1"
153
-
154
162
fig1 <- ggplot(data_type1 , aes(x = cancer_type , y = numSamps , fill = model_type )) +
155
163
geom_bar(stat = ' identity' , position = ' dodge' ) +
156
164
scale_fill_manual(values = color_palette ) +
157
165
theme(axis.text.x = element_text(angle = 45 , hjust = 1 ),
158
- plot.background = element_rect(fill = background_color , color = background_color ),
159
- legend.background = element_rect(fill = background_color , color = background_color )) +
166
+ plot.background = element_rect(fill = background_color , color = background_color ),
167
+ legend.background = element_rect(fill = background_color , color = background_color )) +
160
168
ggtitle(' Cancer and Tissue Types - HCMI' )
161
169
162
- fig2 <- ggplot(data_type2 , aes(x = cancer_type , y = numSamps , fill = model_type ),
163
- legend.background = element_rect(fill = background_color , color = background_color )) +
170
+ fig2 <- ggplot(data_type2 , aes(x = cancer_type , y = numSamps , fill = model_type )) +
164
171
geom_bar(stat = ' identity' , position = ' dodge' ) +
165
172
scale_fill_manual(values = color_palette ) +
166
173
theme(axis.text.x = element_text(angle = 45 , hjust = 1 ),
167
- plot.background = element_rect(fill = background_color , color = background_color ),
168
- legend.background = element_rect(fill = background_color , color = background_color )) +
174
+ plot.background = element_rect(fill = background_color , color = background_color ),
175
+ legend.background = element_rect(fill = background_color , color = background_color )) +
169
176
ggtitle(' Cancer and Tissue Types - BeatAML' )
170
177
171
178
fig3 <- ggplot(data_type3 , aes(x = cancer_type , y = numSamps , fill = model_type )) +
172
179
geom_bar(stat = ' identity' , position = ' dodge' ) +
173
180
scale_fill_manual(values = color_palette ) +
174
181
theme(axis.text.x = element_text(angle = 45 , hjust = 1 ),
175
- plot.background = element_rect(fill = background_color , color = background_color ),
176
- legend.background = element_rect(fill = background_color , color = background_color )) +
182
+ plot.background = element_rect(fill = background_color , color = background_color ),
183
+ legend.background = element_rect(fill = background_color , color = background_color ))+
177
184
ggtitle(' Cancer and Tissue Types - CPTAC' )
178
185
179
186
fig4 <- ggplot(data_type4 , aes(x = cancer_type , y = numSamps , fill = model_type )) +
180
187
geom_bar(stat = ' identity' , position = ' dodge' ) +
181
188
scale_fill_manual(values = color_palette ) +
182
189
theme(axis.text.x = element_text(angle = 45 , hjust = 1 ),
183
- plot.background = element_rect(fill = background_color , color = background_color ),
184
- legend.background = element_rect(fill = background_color , color = background_color )) +
185
- ggtitle(' Cancer and Tissue Types - CCLE' )
186
-
187
- ggsave(' Fig1_HCMI.png' , fig1 , height = 9 , width = 12 , bg = background_color )
188
- ggsave(' Fig2_BeatAML.png' , fig2 , height = 9 , width = 12 , bg = background_color )
189
- ggsave(' Fig3_CPTAC.png' , fig3 , height = 9 , width = 12 , bg = background_color )
190
- ggsave(' Fig4_CCLE.png' , fig4 , height = 9 , width = 12 , bg = background_color )
190
+ plot.background = element_rect(fill = background_color , color = background_color ),
191
+ legend.background = element_rect(fill = background_color , color = background_color )) +
192
+ scale_y_log10() +
193
+ ggtitle(' Cancer and Tissue Types - Broad Sanger' )
194
+
195
+ fig5 <- ggplot(data_type5 , aes(x = cancer_type , y = numSamps , fill = model_type )) +
196
+ geom_bar(stat = ' identity' , position = ' dodge' ) +
197
+ scale_fill_manual(values = color_palette ) +
198
+ theme(axis.text.x = element_text(angle = 45 , hjust = 1 ),
199
+ plot.background = element_rect(fill = background_color , color = background_color ),
200
+ legend.background = element_rect(fill = background_color , color = background_color )) +
201
+ ggtitle(' Cancer and Tissue Types - MPNST' )
202
+
203
+ ggsave(' Fig1_HCMI.png' , fig1 , height = 8 , width = 10 )
204
+ ggsave(' Fig2_BeatAML.png' , fig2 , height = 8 , width = 10 )
205
+ ggsave(' Fig3_CPTAC.png' , fig3 , height = 8 , width = 10 )
206
+ ggsave(' Fig4_Broad_Sanger.png' , fig4 , height = 8 , width = 10 )
207
+ ggsave(' Fig5_MPNST.png' , fig5 , height = 8 , width = 10 )
191
208
192
209
0 commit comments