Skip to content

Commit 1e1541f

Browse files
authored
Merge pull request #152 from PNNL-CompBio/docker-cleanup
Docker cleanup to reduce image size and standardize builds
2 parents 2007152 + 8c436c7 commit 1e1541f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+355506
-3034
lines changed

.dockerignore

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
local/
2+
tests/
3+
coderdata/
4+
dataSummary/
5+
docs/
6+
candle_bmd/
7+
schema/

build/beatAML/GetBeatAML.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,7 @@ def map_and_combine(df, data_type, entrez_map_file, improve_map_file, map_file=N
454454
mapped_df['source'] = 'synapse'
455455
mapped_df['study'] = 'BeatAML'
456456

457-
final_dataframe = mapped_df
457+
final_dataframe = mapped_df.dropna()#pd.dropna(mapped_df,0)
458458
return final_dataframe
459459

460460

build/beatAML/build_drugs.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
python GetBeatAML.py --token $SYNAPSE_AUTH_TOKEN --drugs --drugFile $1

build/beatAML/build_exp.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
python GetBeatAML.py --exp --token $SYNAPSE_AUTH_TOKEN --curSamples $1 --drugFile $2

build/beatAML/build_omics.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
python GetBeatAML.py --token $SYNAPSE_AUTH_TOKEN --omics --curSamples $2 --genes $1

build/beatAML/build_samples.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
python GetBeatAML.py --token $SYNAPSE_AUTH_TOKEN --samples --prevSamples $1

build/broad_sanger/02-broadSangerOmics.R

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Sys.setenv(VROOM_CONNECTION_SIZE=100000000)
1010

1111
##### DEPMAP FILES
1212

13-
depmap_filenames=list( copy_number='https://figshare.com/ndownloader/files/40448840',
13+
depmap_filenames=list(copy_number='https://figshare.com/ndownloader/files/40448840',
1414
transcriptomics='https://figshare.com/ndownloader/files/40449128',
1515
mutations='https://figshare.com/ndownloader/files/40449638')
1616
##### SANGER FILES
@@ -87,11 +87,12 @@ sanger_files<-function(fi,value){
8787
dplyr::select(improve_sample_id,other_id)|>
8888
distinct()
8989

90-
gmap<-genes|>
91-
subset(gene_symbol%in%exp_file$symbol)|>
90+
gvals <- intersect(exp_file$symbol,genes$gene_symbol)
91+
gmap<-genes|>
92+
subset(gene_symbol%in%gvals)|>
9293
distinct()
9394

94-
print('wide to long')
95+
print('wide to long')
9596

9697
res<-exp_file|>
9798
dplyr::select(other_id='model_id',gene_symbol='symbol',gatk_mean_log2_copy_ratio,source,data_type,cn_category)|>
@@ -289,11 +290,12 @@ sanger_files<-function(fi,value){
289290
missed<-full|>subset(is.na(improve_sample_id))|>
290291
dplyr::select(improve_sample_id,other_id)|>
291292
distinct()
292-
print(paste('missing',nrow(missed),'identifiers'))
293+
print(paste('missing',nrow(missed),' sample identifiers'))
293294
print(missed)
294295

295296
full<-full|>
296-
subset(!is.na(improve_sample_id))|>
297+
subset(!is.na(improve_sample_id))|>
298+
subset(!is.na(entrez_id))|>
297299
dplyr::select(-other_id)
298300

299301
#write_csv(full,file=gzfile(fname))
@@ -340,6 +342,7 @@ depmap_files<-function(fi,value){
340342
dplyr::select(-entrez_id)|>
341343
left_join(genes)|>
342344
dplyr::select(other_id,entrez_id,copy_number)|>
345+
subset(!is.na(entrez_id))|>
343346
distinct()
344347
##these are messing things up
345348
# res$entrez_id<-stringr::str_replace(res$entrez_id,'\\)','')
@@ -370,7 +373,8 @@ depmap_files<-function(fi,value){
370373
res<-res|>
371374
tidyr::separate(gene_region,into=c('gene_symbol','num','start','end'),sep='_')|>
372375
dplyr::left_join(genes)|>
373-
dplyr::distinct()
376+
dplyr::distinct()|>
377+
subset(!is.na(entrez_id))
374378

375379
colnames(res)[1]<-'other_id'
376380
vars=c('methylation','start','end')
@@ -423,6 +427,7 @@ depmap_files<-function(fi,value){
423427
dplyr::select(-entrez_par)|>
424428
left_join(genes)|>
425429
dplyr::select(other_id,entrez_id,transcriptomics)|>
430+
subset(!is.na(entrez_id))|>
426431
distinct()
427432

428433
#mutate(entrez_id=stringr::str_replace_all(entrez_par,'\\)|\\(',''))|>
@@ -489,6 +494,7 @@ depmap_files<-function(fi,value){
489494
print(missed)
490495

491496
full<-full|>dplyr::select(c('entrez_id','improve_sample_id',vars))|>
497+
subset(entrez_id%in%genes$entrez_id)|>
492498
subset(!is.na(improve_sample_id))|>
493499
dplyr::distinct()|>
494500
dplyr::mutate(source='Broad',study='DepMap')

build/broad_sanger/03-createDrugFile.R

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,8 @@ library('tidyr')
55
#this is a helper file that loads the data
66
source_python("pubchem_retrieval.py")
77

8-
9-
#if(!require('PharmacoGx')){
10-
# BiocManager::install("PharmacoGx",force=TRUE)
118
library('PharmacoGx')
12-
#}
9+
1310

1411
all.dsets<-PharmacoGx::availablePSets()
1512

build/broad_sanger/04a-drugResponseData.R

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
#this is a helper file that loads the data
44
#source("mapDrugsToPubchem.R")
55

6-
#if(!require('PharmacoGx')){
7-
# BiocManager::install("PharmacoGx",force=TRUE)
6+
87
library('PharmacoGx')
98
library(readr)
109
library(dplyr)

build/broad_sanger/broad_sanger_samples.csv

Lines changed: 42150 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)