@@ -149,26 +149,44 @@ cnv<-do.call(rbind,lapply(setdiff(combined$CopyNumber,NA),function(x){
149
149
sample <- subset(combined ,CopyNumber == x )
150
150
print(sample $ improve_sample_id )
151
151
res <- fread(synGet(x2 )$ path )
152
- long_df <- res [, strsplit(as.character(gene ), " ," ), by = .(chromosome , start , end , depth , log2 )]
153
- filtered_df <- long_df | >
154
- subset(is.finite(log2 ))| >
155
- filter(V1 %in% genes_df $ gene ) # get only protein coding genes and remove empty gene symbols
156
- filtered_df <- filtered_df [, .(gene_symbol = V1 ,
157
- improve_sample_id = sample $ improve_sample_id [1 ],
158
- copy_number = 2 ^ log2 ,
159
- source = " NF Data Portal" ,
160
- study = " MPNST PDX MT" )]
161
- res <- filtered_df | > # #deep del < 0.5210507 < het loss < 0.7311832 < diploid < 1.214125 < gain < 1.422233 < amp
162
- dplyr :: mutate(copy_call = ifelse(copy_number < 0.5210507 ,' deep del' ,
163
- ifelse(copy_number < 0.7311832 ,' het loss' ,
164
- ifelse(copy_number < 1.214125 ,' diploid' ,
165
- ifelse(copy_number < 1.422233 ,' gain' ,' amp' )))))| >
166
- left_join(genes_df )| >
167
- dplyr :: select(entrez_id ,improve_sample_id ,copy_number ,copy_call ,study ,source )| >
168
- subset(! is.na(entrez_id ))| >
169
- distinct()
170
- res | > group_by(copy_call )| > summarize(n_distinct(entrez_id ))
171
- return (distinct(res ))
152
+
153
+ long_df <- res | >
154
+ tidyr :: separate_rows(gene ,sep = ' ,' )| >
155
+ dplyr :: rename(gene_symbol = ' gene' )| >
156
+ dplyr :: left_join(genes_df )| >
157
+ subset(! is.na(entrez_id ))| >
158
+ dplyr :: select(entrez_id ,log2 )| >
159
+ dplyr :: distinct()| >
160
+ dplyr :: mutate(copy_number = 2 ^ log2 )
161
+
162
+ res <- long_df | > # #deep del < 0.5210507 < het loss < 0.7311832 < diploid < 1.214125 < gain < 1.422233 < amp
163
+ dplyr :: mutate(copy_call = ifelse(copy_number < 0.5210507 ,' deep del' ,
164
+ ifelse(copy_number < 0.7311832 ,' het loss' ,
165
+ ifelse(copy_number < 1.214125 ,' diploid' ,
166
+ ifelse(copy_number < 1.422233 ,' gain' ,' amp' )))))| >
167
+ mutate(study = ' MPNST PDX MT' ,source = ' NF Data Portal' ,improve_sample_id = sample $ improve_sample_id [1 ])| >
168
+ dplyr :: distinct()
169
+
170
+ # long_df <- res[, strsplit(as.character(gene), ","), by = .(chromosome, start, end, depth, log2)]
171
+ # filtered_df <- long_df |>
172
+ # subset(is.finite(log2))|>
173
+ # filter(V1 %in% genes_df$gene) # get only protein coding genes and remove empty gene symbols
174
+ # filtered_df <- filtered_df[, .(gene_symbol = V1,
175
+ # improve_sample_id = sample$improve_sample_id[1],
176
+ # copy_number = 2^log2,
177
+ # source = "NF Data Portal",
178
+ # study = "MPNST PDX MT")]
179
+ # res<-filtered_df|> ##deep del < 0.5210507 < het loss < 0.7311832 < diploid < 1.214125 < gain < 1.422233 < amp
180
+ # dplyr::mutate(copy_call=ifelse(copy_number<0.5210507,'deep del',
181
+ # ifelse(copy_number<0.7311832,'het loss',
182
+ # ifelse(copy_number<1.214125,'diploid',
183
+ # ifelse(copy_number<1.422233,'gain','amp')))))|>
184
+ # left_join(genes_df)|>
185
+ # dplyr::select(entrez_id,improve_sample_id,copy_number,copy_call,study,source)|>
186
+ # subset(!is.na(entrez_id))|>
187
+ # distinct()
188
+ # res|>group_by(copy_call)|>summarize(n_distinct(entrez_id))
189
+ return (res )
172
190
# }
173
191
}))
174
192
0 commit comments