@@ -31,7 +31,7 @@ variant_schema =list(`3'UTR`=c("3'UTR",'THREE_PRIME_UTR','3prime_UTR_variant','3
31
31
IGR = c(' IGR' ,' nc_variant' ),
32
32
In_Frame_Del = c(' IN_FRAME_DEL' ,' In_Frame_Del' ,' inframe' ),
33
33
In_Frame_Ins = c(' IN_FRAME_INS' ,' In_Frame_Ins' ),
34
- Intron = c(' INTRON' ,' Intron' ,' intronic' ),
34
+ Intron = c(' INTRON' ,' Intron' ,' intronic' , ' intron ' ),
35
35
Missense_Mutation = c(' Missense_Mutation' ,' MISSENSE' ,' missense' ),
36
36
Nonsense_Mutation = c(' Nonsense_Mutation' ,' NONSENSE' ,' nonsense' ),
37
37
Nonstop_Mutation = c(' Nonstop_Mutation' ,' NONSTOP' ),
@@ -160,8 +160,17 @@ sanger_files<-function(fi,value){
160
160
left_join(smap )| >
161
161
mutate(study = ' Sanger' )| >
162
162
dplyr :: select(- c(other_id ,gene_symbol ))| >
163
- left_join(as.data.frame(sanger_vtab ))| >
164
- dplyr :: select(- effect )| >
163
+ left_join(as.data.frame(sanger_vtab ))
164
+
165
+ # #now many variants are missing???
166
+ missing <- res | >
167
+ select(effect ,variant_classification )| >
168
+ distinct()| >
169
+ subset(is.na(variant_classification ))
170
+ print(missing )
171
+
172
+ # ##TODO double check to see if any variants are missing
173
+ res <- res | > dplyr :: select(- effect )| >
165
174
subset(! is.na(improve_sample_id ))| >
166
175
distinct()
167
176
@@ -387,7 +396,16 @@ depmap_files<-function(fi,value){
387
396
388
397
res <- exp_file | >
389
398
mutate(entrez_id = as.numeric(EntrezGeneID ))| >
390
- left_join(as.data.frame(depmap_vtab ))| >
399
+ left_join(as.data.frame(depmap_vtab ))
400
+
401
+ # #now many variants are missing???
402
+ missing <- res | >
403
+ select(VariantInfo ,variant_classification )| >
404
+ distinct()| >
405
+ subset(is.na(variant_classification ))
406
+ print(missing )
407
+
408
+ res <- res | >
391
409
dplyr :: select(- c(EntrezGeneID ,VariantInfo ))| >
392
410
distinct()| >
393
411
subset(! is.na(entrez_id )) # #removes thos with unknonw entrez
@@ -538,8 +556,8 @@ main<-function(){
538
556
539
557
lapply(alltypes ,function (dt ){
540
558
print(dt )
541
- temps <- sanger_files(sanger_filenames [[dt ]],dt )
542
- tempd <- depmap_files(depmap_filenames [[dt ]],dt )
559
+ temps <- sanger_files(sanger_filenames [[dt ]],dt )| > tidyr :: drop_na()
560
+ tempd <- depmap_files(depmap_filenames [[dt ]],dt )| > tidyr :: drop_na()
543
561
readr :: write_csv(rbind(tempd ,temps ),file = paste0(' /tmp/broad_sanger_' ,dt ,' .csv.gz' ))
544
562
rm(tempd )
545
563
rm(temps )
0 commit comments