@@ -44,11 +44,13 @@ manifest<-synapser::synTableQuery("select * from syn53503360")$asDataFrame()|>
44
44
# #they each get their own sample identifier
45
45
pdx_data <- manifest | > dplyr :: select(common_name ,starts_with(" PDX" ))| >
46
46
left_join(pdx_samps )| >
47
- dplyr :: select(improve_sample_id ,RNASeq = ' PDX_RNASeq' ,Mutations = ' PDX_Somatic_Mutations' ,CopyNumber = ' PDX_CNV' )
47
+ dplyr :: select(improve_sample_id ,RNASeq = ' PDX_RNASeq' ,Mutations = ' PDX_Somatic_Mutations' ,CopyNumber = ' PDX_CNV' , Proteomics = ' PDX_Proteomics ' )
48
48
49
49
tumor_data <- manifest | > dplyr :: select(common_name ,starts_with(" Tumor" ))| >
50
50
left_join(tumor_samps )| >
51
- dplyr :: select(improve_sample_id ,RNASeq = ' Tumor_RNASeq' ,Mutations = ' Tumor_Somatic_Mutations' ,CopyNumber = ' Tumor_CNV' )
51
+ dplyr :: select(improve_sample_id ,RNASeq = ' Tumor_RNASeq' ,Mutations = ' Tumor_Somatic_Mutations' ,CopyNumber = ' Tumor_CNV' )| >
52
+ mutate(Proteomics = ' ' ) # #we dont have tumor proteomics from these samples
53
+ # print(tumor_data)
52
54
53
55
54
56
combined <- rbind(pdx_data ,tumor_data )| > distinct()
@@ -58,6 +60,33 @@ genes_df <- fread(genefile)
58
60
59
61
60
62
63
+
64
+ # #added proteomics first
65
+ proteomics <- do.call(' rbind' ,lapply(setdiff(combined $ Proteomics ,c(' ' ,NA )),function (x ){
66
+ # if(x!=""){
67
+ # print(x)
68
+ sample <- subset(combined ,Proteomics == x )
69
+ # print(sample)
70
+ res <- fread(synGet(x )$ path )| >
71
+ # tidyr::separate(Name,into=c('other_id','vers'),sep='\\.')|>
72
+ # dplyr::select(-vers)|>
73
+ dplyr :: rename(gene_symbol = ' Gene' )| >
74
+ left_join(genes_df )| >
75
+ dplyr :: select(entrez_id ,proteomics = ' logRatio' )| >
76
+ distinct()| >
77
+ subset(! is.na(entrez_id ))| >
78
+ subset(proteomics != 0 )
79
+
80
+ res $ improve_sample_id = rep(sample $ improve_sample_id [1 ],nrow(res ))
81
+ res $ source = rep(' NF Data Portal' ,nrow(res ))
82
+ res $ study = rep(' MPNST PDX MT' ,nrow(res ))
83
+ return (distinct(res ))
84
+ # }
85
+ }))
86
+
87
+ fwrite(proteomics ,' /tmp/mpnst_proteomics.csv.gz' )
88
+
89
+
61
90
# ### FIRST WE GET RNASeq Data
62
91
63
92
rnaseq <- do.call(' rbind' ,lapply(setdiff(combined $ RNASeq ,NA ),function (x ){
@@ -146,4 +175,3 @@ cnv<-do.call(rbind,lapply(setdiff(combined$CopyNumber,NA),function(x){
146
175
fwrite(cnv ,' /tmp/mpnst_copy_number.csv.gz' )
147
176
148
177
# #TODO: get proteomics!!!
149
-
0 commit comments