11
11
import gzip
12
12
from glob import glob
13
13
import sys
14
+ import requests
14
15
15
16
def main ():
16
17
parser = argparse .ArgumentParser (
@@ -39,7 +40,7 @@ def main():
39
40
parser .add_argument ('--figshare' , action = 'store_true' , help = "Upload all local data to Figshare. FIGSHARE_TOKEN must be set in local environment." )
40
41
parser .add_argument ('--all' ,dest = 'all' ,default = False ,action = 'store_true' , help = "Run all data build commands. This includes docker, samples, omics, drugs, exp arguments. This does not run the validate or figshare commands" )
41
42
parser .add_argument ('--high_mem' ,dest = 'high_mem' ,default = False ,action = 'store_true' ,help = "If you have 32 or more CPUs, this option is recommended. It will run many code portions in parallel. If you don't have enough memory, this will cause a run failure." )
42
- parser .add_argument ('--dataset' ,dest = 'datasets' ,default = 'broad_sanger,hcmi,beataml,cptac,mpnst,mpnstpdx' ,help = 'Datasets to process. Defaults to all available.' )
43
+ parser .add_argument ('--dataset' ,dest = 'datasets' ,default = 'broad_sanger,hcmi,beataml,cptac,mpnst,mpnstpdx,pancpdo ' ,help = 'Datasets to process. Defaults to all available.' )
43
44
parser .add_argument ('--version' , type = str , required = False , help = 'Version number for the Figshare upload title (e.g., "0.1.29"). This is required for Figshare upload. This must be a higher version than previously published versions.' )
44
45
parser .add_argument ('--github-username' , type = str , required = False , help = 'GitHub username for the repository.' )
45
46
parser .add_argument ('--github-email' , type = str , required = False , help = 'GitHub email for the repository.' )
@@ -119,6 +120,7 @@ def process_docker(datasets):
119
120
'beataml' : ['beataml' ],
120
121
'mpnst' : ['mpnst' ],
121
122
'mpnstpdx' : ['mpnstpdx' ],
123
+ 'pancpdo' : ['pancpdo' ],
122
124
'cptac' : ['cptac' ],
123
125
'genes' : ['genes' ],
124
126
'upload' : ['upload' ]
@@ -130,7 +132,7 @@ def process_docker(datasets):
130
132
datasets_to_build .extend (dataset_map .get (dataset , []))
131
133
132
134
# Build the docker-compose command, adding specific datasets
133
- compose_command = ['docker- compose' , '-f' , compose_file , 'build' , '--parallel' ] + datasets_to_build
135
+ compose_command = ['docker' , ' compose' , '-f' , compose_file , 'build' , '--parallel' ] + datasets_to_build
134
136
135
137
log_file_path = 'local/docker.log'
136
138
env = os .environ .copy ()
@@ -265,9 +267,11 @@ def run_docker_upload_cmd(cmd_arr, all_files_dir, name, version):
265
267
docker_run = ['docker' , 'run' , '--rm' , '-v' , f"{ env ['PWD' ]} /local/{ all_files_dir } :/tmp" , '-e' , f"VERSION={ version } " ]
266
268
267
269
# Add Appropriate Environment Variables
270
+ if name == "validate" :
271
+ docker_run .extend (['upload' ])
268
272
if 'FIGSHARE_TOKEN' in env and name == 'Figshare' :
269
273
docker_run .extend (['-e' , f"FIGSHARE_TOKEN={ env ['FIGSHARE_TOKEN' ]} " , 'upload' ])
270
- if name == "validate " :
274
+ if name == "Map_Drugs" or name == "Map_Samples " :
271
275
docker_run .extend (['upload' ])
272
276
if 'GITHUB_TOKEN' in env and name == "GitHub" :
273
277
docker_run .extend (['-e' , f"GITHUB_TOKEN={ env ['GITHUB_TOKEN' ]} " , 'upload' ])
@@ -299,6 +303,18 @@ def compress_file(file_path):
299
303
with gzip .open (compressed_file_path , 'wb' ) as f_out :
300
304
shutil .copyfileobj (f_in , f_out )
301
305
os .remove (file_path )
306
+
307
+ def get_latest_commit_hash (owner , repo , branch = 'main' ):
308
+ """
309
+ Returns the SHA of the latest commit on the specified branch.
310
+ """
311
+ url = f"https://api.github.com/repos/{ owner } /{ repo } /commits/{ branch } "
312
+ response = requests .get (url )
313
+ response .raise_for_status ()
314
+
315
+ # The commit data is in JSON format; the 'sha' field is the full commit hash.
316
+ commit_data = response .json ()
317
+ return commit_data ['sha' ]
302
318
303
319
######
304
320
### Pre-Build Environment Token Check
@@ -387,17 +403,17 @@ def compress_file(file_path):
387
403
######
388
404
### Begin Upload and/or validation
389
405
#####
390
-
391
- if args .figshare or args .validate :
406
+ if args . figshare or args . validate or github_token :
407
+ # if args.figshare or args.validate:
392
408
# FigShare File Prefixes:
409
+
393
410
prefixes = ['beataml' , 'hcmi' , 'cptac' , 'mpnst' , 'genes' , 'drugs' ]
394
411
broad_sanger_datasets = ["ccle" ,"ctrpv2" ,"fimm" ,"gdscv1" ,"gdscv2" ,"gcsi" ,"prism" ,"nci60" ]
395
412
if "broad_sanger" in datasets :
396
413
prefixes .extend (broad_sanger_datasets )
397
414
datasets .extend (broad_sanger_datasets )
398
415
datasets .remove ("broad_sanger" )
399
416
400
-
401
417
figshare_token = os .getenv ('FIGSHARE_TOKEN' )
402
418
403
419
all_files_dir = 'local/all_files_dir'
@@ -421,6 +437,13 @@ def compress_file(file_path):
421
437
for file in glob (os .path .join (all_files_dir , '*.gz' )):
422
438
decompress_file (file )
423
439
440
+ ### These should be done before schema checking.
441
+ sample_mapping_command = ['python3' , 'scripts/map_improve_sample_ids.py' , '--local_dir' , "/tmp" , '--version' , args .version ]
442
+ run_docker_upload_cmd (sample_mapping_command , 'all_files_dir' , 'Map_Samples' , args .version )
443
+
444
+ drug_mapping_command = ['python3' , 'scripts/map_improve_drug_ids.py' , '--local_dir' , "/tmp" , '--version' , args .version ]
445
+ run_docker_upload_cmd (drug_mapping_command , 'all_files_dir' , 'Map_Drugs' , args .version )
446
+
424
447
# Run schema checker - This will always run if uploading data.
425
448
schema_check_command = ['python3' , 'scripts/check_schema.py' , '--datasets' ] + datasets
426
449
run_docker_upload_cmd (schema_check_command , 'all_files_dir' , 'validate' , args .version )
@@ -437,28 +460,47 @@ def compress_file(file_path):
437
460
438
461
print ("File compression and decompression adjustments are complete." )
439
462
440
- # Upload to Figshare using Docker
463
+ ## # Upload to Figshare using Docker
441
464
if args .figshare and args .version and figshare_token :
442
- figshare_command = ['python3' , 'scripts/push_to_figshare.py' , '--directory' , "/tmp" , '--title' , f"CODERData{ args .version } " , '--token' , os .getenv ('FIGSHARE_TOKEN' ), '--project_id' , '189342' , '--publish' ]
465
+ figshare_command = ['python3' , 'scripts/push_to_figshare.py' , '--directory' , "/tmp" , '--title' , f"CODERData{ args .version } " , '--token' , os .getenv ('FIGSHARE_TOKEN' ), '--project_id' , '189342' , '--version' , args . version , '-- publish' ]
443
466
run_docker_upload_cmd (figshare_command , 'all_files_dir' , 'Figshare' , args .version )
444
467
468
+ ### Push changes to GitHub using Docker
469
+ # if args.version and args.figshare and figshare_token and github_token and args.github_username and args.github_email:
445
470
446
- # Push changes to GitHub using Docker
447
- if args .version and args .figshare and figshare_token and github_token and args .github_username and args .github_email :
448
- git_command = [
449
- 'bash' , '-c' , (
450
- f'git config --global user.name "{ args .github_username } " '
451
- f'&& git config --global user.email "{ args .github_email } " '
452
- f'&& cp /tmp/figshare_latest.yml /usr/src/app/coderdata/docs/_data/figshare_latest.yml '
453
- f'&& git add docs/_data/figshare_latest.yml '
454
- f'&& git commit -m "Data Built and Uploaded. New Tag: { args .version } " '
455
- f'&& git tag { args .version } '
456
- f'&& git push https://{ args .github_username } :{ github_token } @github.com/PNNL-CompBio/coderdata.git main '
457
- f'&& git push https://{ args .github_username } :{ github_token } @github.com/PNNL-CompBio/coderdata.git --tags'
458
- )
459
- ]
460
- run_docker_upload_cmd (git_command , 'all_files_dir' , 'GitHub' , args .version )
471
+ # You can only upload to Github after Figshare upload is completed - otherwise figshare_latest.yml and dataset.yml won't be available.
472
+ if args .version and github_token and args .github_username and args .github_email :
473
+
474
+ git_command = [
475
+ 'bash' , '-c' , (
476
+ f'git config --global user.name "{ args .github_username } " '
477
+ f'&& git config --global user.email "{ args .github_email } " '
478
+
479
+ # Checkout a new branch
480
+ f'&& git checkout -b testing-auto-build-pr-{ args .version } '
481
+
482
+ # Copy and add the necessary files
483
+ f'&& cp /tmp/improve_sample_mapping.json.gz /usr/src/app/coderdata/build/improve_sample_mapping.json.gz '
484
+ f'&& cp /tmp/improve_drug_mapping.json.gz /usr/src/app/coderdata/build/improve_drug_mapping.json.gz '
485
+ f'&& gunzip /usr/src/app/coderdata/build/*.gz '
486
+ f'&& git add -f build/improve_sample_mapping.json build/improve_drug_mapping.json '
487
+ f'&& cp /tmp/figshare_latest.yml /usr/src/app/coderdata/docs/_data/figshare_latest.yml '
488
+ f'&& cp /tmp/dataset.yml /usr/src/app/coderdata/coderdata/dataset.yml '
489
+ f'&& git add -f docs/_data/figshare_latest.yml coderdata/dataset.yml'
490
+
491
+ # Tag and push
492
+ f'&& git commit -m "Data Built and Uploaded. New Tag: { args .version } " '
493
+ f'&& git tag { args .version } '
494
+ f'&& git push https://{ args .github_username } :{ github_token } @github.com/PNNL-CompBio/coderdata.git testing-auto-build-pr-{ args .version } '
495
+
496
+ # Create a PR using GitHub CLI
497
+ f'&& gh pr create --title "Testing Auto PR instead of auto Merge { args .version } " '
498
+ f'--body "This PR was automatically generated by the build process." '
499
+ f'--base main --head testing-auto-build-pr-{ args .version } '
500
+ )
501
+ ]
502
+
503
+ run_docker_upload_cmd (git_command , 'all_files_dir' , 'GitHub' , args .version )
461
504
462
-
463
505
if __name__ == '__main__' :
464
506
main ()
0 commit comments