1
- from os import listdir , makedirs
2
- from os .path import isfile
1
+ from os import listdir , makedirs , walk
2
+ from os .path import isfile , join , basename , dirname , abspath
3
3
from shutil import copyfile
4
4
from sequence_processing_pipeline .NuQCJob import NuQCJob
5
5
from sequence_processing_pipeline .FastQCJob import FastQCJob
6
6
from sequence_processing_pipeline .GenPrepFileJob import GenPrepFileJob
7
- from os .path import join
8
7
import pandas as pd
9
8
from json import dumps
10
9
from collections import defaultdict
11
- from os .path import basename , dirname
12
10
13
11
14
12
ASSAY_NAME_NONE = "Assay"
@@ -253,7 +251,7 @@ def generate_prep_file(self):
253
251
seqpro_path ,
254
252
config ['modules_to_load' ],
255
253
self .master_qiita_job_id ,
256
- join ( self .pipeline . output_path , 'ConvertJob' ) ,
254
+ self .reports_path ,
257
255
is_amplicon = True )
258
256
259
257
if 'GenPrepFileJob' not in self .skip_steps :
@@ -417,11 +415,6 @@ def generate_reports(self):
417
415
def generate_prep_file (self ):
418
416
config = self .pipeline .get_software_configuration ('seqpro' )
419
417
420
- if 'ConvertJob' in self .raw_fastq_files_path :
421
- reports_dir = join (self .pipeline .output_path , 'ConvertJob' )
422
- elif 'TRIntegrateJob' in self .raw_fastq_files_path :
423
- reports_dir = join (self .pipeline .output_path , 'SeqCountsJob' )
424
-
425
418
job = GenPrepFileJob (self .pipeline .run_dir ,
426
419
self .raw_fastq_files_path ,
427
420
join (self .pipeline .output_path , 'NuQCJob' ),
@@ -430,7 +423,7 @@ def generate_prep_file(self):
430
423
config ['seqpro_path' ],
431
424
config ['modules_to_load' ],
432
425
self .master_qiita_job_id ,
433
- reports_dir )
426
+ self . reports_path )
434
427
435
428
if 'GenPrepFileJob' not in self .skip_steps :
436
429
job .run (callback = self .job_callback )
@@ -505,6 +498,103 @@ class Metagenomic(MetaOmic):
505
498
METAGENOMIC_TYPE = 'Metagenomic'
506
499
assay_type = ASSAY_NAME_METAGENOMIC
507
500
501
+ def execute_pipeline (self ):
502
+ '''
503
+ Executes steps of pipeline in proper sequence.
504
+ :return: None
505
+ '''
506
+ self .pre_check ()
507
+
508
+ self .generate_special_map ()
509
+
510
+ self .update_status ("Converting data" , 1 , 9 )
511
+
512
+ self .convert_raw_to_fastq ()
513
+
514
+ self .integrate_results ()
515
+
516
+ self .generate_sequence_counts ()
517
+
518
+ self .update_status ("Performing quality control" , 2 , 9 )
519
+ self .quality_control ()
520
+
521
+ self .update_status ("Generating reports" , 3 , 9 )
522
+ self .generate_reports ()
523
+
524
+ self .update_status ("Generating preps" , 4 , 9 )
525
+ self .generate_prep_file ()
526
+
527
+ # moved final component of genprepfilejob outside of object.
528
+ # obtain the paths to the prep-files generated by GenPrepFileJob
529
+ # w/out having to recover full state.
530
+ tmp = join (self .pipeline .output_path , 'GenPrepFileJob' , 'PrepFiles' )
531
+
532
+ self .has_replicates = False
533
+
534
+ prep_paths = []
535
+ self .prep_file_paths = {}
536
+
537
+ for root , dirs , files in walk (tmp ):
538
+ for _file in files :
539
+ # breakup the prep-info-file into segments
540
+ # (run-id, project_qid, other) and cleave
541
+ # the qiita-id from the project_name.
542
+ qid = _file .split ('.' )[1 ].split ('_' )[- 1 ]
543
+
544
+ if qid not in self .prep_file_paths :
545
+ self .prep_file_paths [qid ] = []
546
+
547
+ _path = abspath (join (root , _file ))
548
+ if _path .endswith ('.tsv' ):
549
+ prep_paths .append (_path )
550
+ self .prep_file_paths [qid ].append (_path )
551
+
552
+ for _dir in dirs :
553
+ if _dir == '1' :
554
+ # if PrepFiles contains the '1' directory, then it's a
555
+ # given that this sample-sheet contains replicates.
556
+ self .has_replicates = True
557
+
558
+ # currently imported from Assay although it is a base method. it
559
+ # could be imported into Workflows potentially, since it is a post-
560
+ # processing step. All pairings of assay and instrument type need to
561
+ # generate prep-info files in the same format.
562
+ self .overwrite_prep_files (prep_paths )
563
+
564
+ # for now, simply re-run any line below as if it was a new job, even
565
+ # for a restart. functionality is idempotent, except for the
566
+ # registration of new preps in Qiita. These will simply be removed
567
+ # manually.
568
+
569
+ # post-processing steps are by default associated with the Workflow
570
+ # class, since they deal with fastq files and Qiita, and don't depend
571
+ # on assay or instrument type.
572
+ self .update_status ("Generating sample information" , 5 , 9 )
573
+ self .sifs = self .generate_sifs ()
574
+
575
+ # post-processing step.
576
+ self .update_status ("Registering blanks in Qiita" , 6 , 9 )
577
+ if self .update :
578
+ self .update_blanks_in_qiita ()
579
+
580
+ self .update_status ("Loading preps into Qiita" , 7 , 9 )
581
+ if self .update :
582
+ self .update_prep_templates ()
583
+
584
+ # before we load preps into Qiita we need to copy the fastq
585
+ # files n times for n preps and correct the file-paths each
586
+ # prep is pointing to.
587
+ self .load_preps_into_qiita ()
588
+
589
+ self .fsr .generate_report ()
590
+
591
+ self .update_status ("Generating packaging commands" , 8 , 9 )
592
+ self .generate_commands ()
593
+
594
+ self .update_status ("Packaging results" , 9 , 9 )
595
+ if self .update :
596
+ self .execute_commands ()
597
+
508
598
509
599
class Metatranscriptomic (MetaOmic ):
510
600
METATRANSCRIPTOMIC_TYPE = 'Metatranscriptomic'
0 commit comments