9
9
from concurrent .futures import ThreadPoolExecutor
10
10
import shutil
11
11
import gzip
12
- import argparse
13
12
from glob import glob
14
13
from packaging import version
15
14
@@ -24,6 +23,7 @@ def main():
24
23
parser .add_argument ('--figshare' , action = 'store_true' , help = "Flag to trigger Figshare upload" )
25
24
parser .add_argument ('--pypi' , action = 'store_true' , help = "Flag to trigger PyPI upload" )
26
25
parser .add_argument ('--all' ,dest = 'all' ,default = False ,action = 'store_true' )
26
+ parser .add_argument ('--high_mem' ,dest = 'high_mem' ,default = False ,action = 'store_true' )
27
27
parser .add_argument ('--dataset' ,dest = 'datasets' ,default = 'broad_sanger,hcmi,beataml,mpnst,cptac' ,help = 'Datasets to process. Defaults to all available, but if there are synapse issues, please remove beataml and mpnst' )
28
28
parser .add_argument ('--version' , type = str , required = False , help = 'Version number for the package and data upload title.' )
29
29
args = parser .parse_args ()
@@ -128,38 +128,39 @@ def process_samples(executor, datasets):
128
128
last_sample_future = executor .submit (run_docker_cmd , [di , 'sh' , 'build_samples.sh' , sf ], f'{ da } samples' )
129
129
sf = f'/tmp/{ da } _samples.csv'
130
130
131
- def process_omics (executor , datasets ):
131
+ def process_omics (executor , datasets , high_mem ):
132
132
'''
133
133
Build all omics files concurrently
134
134
'''
135
- last_sample_future = None
135
+ last_omics_future = None
136
136
for da in datasets :
137
137
di = 'broad_sanger_omics' if da == 'broad_sanger' else da
138
-
139
138
#Run all at once:
140
- # executor.submit(run_docker_cmd, [di, 'sh', 'build_omics.sh', '/tmp/genes.csv', f'/tmp/{da}_samples.csv'], f'{da} omics')
141
-
139
+ if high_mem :
140
+ executor . submit ( run_docker_cmd , [ di , 'sh' , 'build_omics.sh' , '/tmp/genes.csv' , f'/tmp/ { da } _samples.csv' ], f' { da } omics' )
142
141
#Run one at a time.
143
- if last_sample_future :
144
- last_sample_future .result ()
145
- last_sample_future = executor .submit (run_docker_cmd , [di , 'sh' , 'build_omics.sh' , '/tmp/genes.csv' , f'/tmp/{ da } _samples.csv' ], f'{ da } omics' )
146
-
147
- def process_experiments (executor , datasets ):
142
+ else :
143
+ if last_omics_future :
144
+ last_omics_future .result ()
145
+ last_omics_future = executor .submit (run_docker_cmd , [di , 'sh' , 'build_omics.sh' , '/tmp/genes.csv' , f'/tmp/{ da } _samples.csv' ], f'{ da } omics' )
146
+
147
+ def process_experiments (executor , datasets , high_mem ):
148
148
'''
149
149
Build all experiments files concurrently
150
150
'''
151
+ last_experiments_future = None
151
152
for da in datasets :
152
153
if da not in ['cptac' , 'hcmi' ]:
153
154
di = 'broad_sanger_exp' if da == 'broad_sanger' else da
154
155
if not os .path .exists (f'local/{ da } _experiments.tsv' ):
155
-
156
156
#Run all at once
157
- # executor.submit(run_docker_cmd, [di, 'sh', 'build_exp.sh', f'/tmp/{da}_samples.csv', f'/tmp/{da}_drugs.tsv'], f'{da} experiments')
158
-
157
+ if high_mem :
158
+ executor . submit ( run_docker_cmd , [ di , 'sh' , 'build_exp.sh' , f'/tmp/ { da } _samples.csv' , f'/tmp/ { da } _drugs.tsv' ], f' { da } experiments' )
159
159
#Run one at a time
160
- if last_sample_future :
161
- last_sample_future .result ()
162
- last_sample_future = executor .submit (run_docker_cmd , [di , 'sh' , 'build_exp.sh' , f'/tmp/{ da } _samples.csv' , f'/tmp/{ da } _drugs.tsv' ], f'{ da } experiments' )
160
+ else :
161
+ if last_experiments_future :
162
+ last_experiments_future .result ()
163
+ last_experiments_future = executor .submit (run_docker_cmd , [di , 'sh' , 'build_exp.sh' , f'/tmp/{ da } _samples.csv' , f'/tmp/{ da } _drugs.tsv' ], f'{ da } experiments' )
163
164
164
165
def process_genes (executor ):
165
166
if not os .path .exists ('/tmp/genes.csv' ):
@@ -250,20 +251,9 @@ def compress_file(file_path):
250
251
251
252
with ThreadPoolExecutor () as executor :
252
253
if args .omics or args .all :
253
- omics_thread = executor .submit (process_omics , executor , datasets )
254
- if args .exp or args .all :
255
- exp_thread = executor .submit (process_experiments , executor , datasets )
256
-
257
- if args .omics or args .all :
258
- omics_thread .result ()
259
- if args .exp or args .all :
260
- exp_thread .result ()
261
-
262
- with ThreadPoolExecutor () as executor :
263
- if args .omics or args .all :
264
- omics_thread = executor .submit (process_omics , executor , datasets )
254
+ omics_thread = executor .submit (process_omics , executor , datasets , args .high_mem )
265
255
if args .exp or args .all :
266
- exp_thread = executor .submit (process_experiments , executor , datasets )
256
+ exp_thread = executor .submit (process_experiments , executor , datasets , args . high_mem )
267
257
268
258
if args .omics or args .all :
269
259
omics_thread .result ()
0 commit comments