Skip to content

Commit a5e9f04

Browse files
charles-cowartqiita_tantgonza
authored
production hotfixes (#98)
* Production hotfixes * production hotfixes * hotfix * Fix for generating failed_samples.html reporting. (#99) * mv execute_pipeline to Metagenomic * rm execute_pipeline from TellSeqMetagenomicWorkflow --------- Co-authored-by: qiita_t <[email protected]> Co-authored-by: Antonio Gonzalez <[email protected]>
1 parent ad33a70 commit a5e9f04

7 files changed

+128
-362
lines changed

qp_klp/Assays.py

Lines changed: 101 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
1-
from os import listdir, makedirs
2-
from os.path import isfile
1+
from os import listdir, makedirs, walk
2+
from os.path import isfile, join, basename, dirname, abspath
33
from shutil import copyfile
44
from sequence_processing_pipeline.NuQCJob import NuQCJob
55
from sequence_processing_pipeline.FastQCJob import FastQCJob
66
from sequence_processing_pipeline.GenPrepFileJob import GenPrepFileJob
7-
from os.path import join
87
import pandas as pd
98
from json import dumps
109
from collections import defaultdict
11-
from os.path import basename, dirname
1210

1311

1412
ASSAY_NAME_NONE = "Assay"
@@ -253,7 +251,7 @@ def generate_prep_file(self):
253251
seqpro_path,
254252
config['modules_to_load'],
255253
self.master_qiita_job_id,
256-
join(self.pipeline.output_path, 'ConvertJob'),
254+
self.reports_path,
257255
is_amplicon=True)
258256

259257
if 'GenPrepFileJob' not in self.skip_steps:
@@ -417,11 +415,6 @@ def generate_reports(self):
417415
def generate_prep_file(self):
418416
config = self.pipeline.get_software_configuration('seqpro')
419417

420-
if 'ConvertJob' in self.raw_fastq_files_path:
421-
reports_dir = join(self.pipeline.output_path, 'ConvertJob')
422-
elif 'TRIntegrateJob' in self.raw_fastq_files_path:
423-
reports_dir = join(self.pipeline.output_path, 'SeqCountsJob')
424-
425418
job = GenPrepFileJob(self.pipeline.run_dir,
426419
self.raw_fastq_files_path,
427420
join(self.pipeline.output_path, 'NuQCJob'),
@@ -430,7 +423,7 @@ def generate_prep_file(self):
430423
config['seqpro_path'],
431424
config['modules_to_load'],
432425
self.master_qiita_job_id,
433-
reports_dir)
426+
self.reports_path)
434427

435428
if 'GenPrepFileJob' not in self.skip_steps:
436429
job.run(callback=self.job_callback)
@@ -505,6 +498,103 @@ class Metagenomic(MetaOmic):
505498
METAGENOMIC_TYPE = 'Metagenomic'
506499
assay_type = ASSAY_NAME_METAGENOMIC
507500

501+
def execute_pipeline(self):
502+
'''
503+
Executes steps of pipeline in proper sequence.
504+
:return: None
505+
'''
506+
self.pre_check()
507+
508+
self.generate_special_map()
509+
510+
self.update_status("Converting data", 1, 9)
511+
512+
self.convert_raw_to_fastq()
513+
514+
self.integrate_results()
515+
516+
self.generate_sequence_counts()
517+
518+
self.update_status("Performing quality control", 2, 9)
519+
self.quality_control()
520+
521+
self.update_status("Generating reports", 3, 9)
522+
self.generate_reports()
523+
524+
self.update_status("Generating preps", 4, 9)
525+
self.generate_prep_file()
526+
527+
# moved final component of genprepfilejob outside of object.
528+
# obtain the paths to the prep-files generated by GenPrepFileJob
529+
# w/out having to recover full state.
530+
tmp = join(self.pipeline.output_path, 'GenPrepFileJob', 'PrepFiles')
531+
532+
self.has_replicates = False
533+
534+
prep_paths = []
535+
self.prep_file_paths = {}
536+
537+
for root, dirs, files in walk(tmp):
538+
for _file in files:
539+
# breakup the prep-info-file into segments
540+
# (run-id, project_qid, other) and cleave
541+
# the qiita-id from the project_name.
542+
qid = _file.split('.')[1].split('_')[-1]
543+
544+
if qid not in self.prep_file_paths:
545+
self.prep_file_paths[qid] = []
546+
547+
_path = abspath(join(root, _file))
548+
if _path.endswith('.tsv'):
549+
prep_paths.append(_path)
550+
self.prep_file_paths[qid].append(_path)
551+
552+
for _dir in dirs:
553+
if _dir == '1':
554+
# if PrepFiles contains the '1' directory, then it's a
555+
# given that this sample-sheet contains replicates.
556+
self.has_replicates = True
557+
558+
# currently imported from Assay although it is a base method. it
559+
# could be imported into Workflows potentially, since it is a post-
560+
# processing step. All pairings of assay and instrument type need to
561+
# generate prep-info files in the same format.
562+
self.overwrite_prep_files(prep_paths)
563+
564+
# for now, simply re-run any line below as if it was a new job, even
565+
# for a restart. functionality is idempotent, except for the
566+
# registration of new preps in Qiita. These will simply be removed
567+
# manually.
568+
569+
# post-processing steps are by default associated with the Workflow
570+
# class, since they deal with fastq files and Qiita, and don't depend
571+
# on assay or instrument type.
572+
self.update_status("Generating sample information", 5, 9)
573+
self.sifs = self.generate_sifs()
574+
575+
# post-processing step.
576+
self.update_status("Registering blanks in Qiita", 6, 9)
577+
if self.update:
578+
self.update_blanks_in_qiita()
579+
580+
self.update_status("Loading preps into Qiita", 7, 9)
581+
if self.update:
582+
self.update_prep_templates()
583+
584+
# before we load preps into Qiita we need to copy the fastq
585+
# files n times for n preps and correct the file-paths each
586+
# prep is pointing to.
587+
self.load_preps_into_qiita()
588+
589+
self.fsr.generate_report()
590+
591+
self.update_status("Generating packaging commands", 8, 9)
592+
self.generate_commands()
593+
594+
self.update_status("Packaging results", 9, 9)
595+
if self.update:
596+
self.execute_commands()
597+
508598

509599
class Metatranscriptomic(MetaOmic):
510600
METATRANSCRIPTOMIC_TYPE = 'Metatranscriptomic'

qp_klp/Protocol.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,14 @@ def get_config(command):
7777
if 'ConvertJob' not in self.skip_steps:
7878
job.run(callback=self.job_callback)
7979

80+
# if successful, set self.reports_path
81+
self.reports_path = join(self.pipeline.output_path,
82+
'ConvertJob',
83+
'Reports',
84+
'Demultiplex_Stats.csv')
85+
# TODO: Include alternative path when using bcl2fastq instead of
86+
# bcl-convert.
87+
8088
# audit the results to determine which samples failed to convert
8189
# properly. Append these to the failed-samples report and also
8290
# return the list directly to the caller.
@@ -157,6 +165,11 @@ def generate_sequence_counts(self):
157165
if 'SeqCountsJob' not in self.skip_steps:
158166
job.run(callback=self.job_callback)
159167

168+
# if successful, set self.reports_path
169+
self.reports_path = join(self.pipeline.output_path,
170+
'SeqCountsJob',
171+
'SeqCounts.csv')
172+
160173
# Do not add an entry to fsr because w/respect to counting, either
161174
# all jobs are going to fail or none are going to fail. It's not
162175
# likely that we're going to fail to count sequences for only some

qp_klp/StandardAmpliconWorkflow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def __init__(self, **kwargs):
3535
# NB: Amplicon workflows don't have failed samples records because
3636
# the fastq files are not demultiplexed.
3737

38-
self.master_qiita_job_id = None
38+
self.master_qiita_job_id = self.kwargs['job_id']
3939

4040
self.lane_number = self.kwargs['lane_number']
4141
self.is_restart = bool(self.kwargs['is_restart'])

qp_klp/StandardMetagenomicWorkflow.py

Lines changed: 2 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from .Protocol import Illumina
2-
from os.path import join, abspath, exists
3-
from os import walk
2+
from os.path import join, exists
43
from shutil import rmtree
54
from sequence_processing_pipeline.Pipeline import Pipeline
65
from .Assays import Metagenomic
@@ -35,7 +34,7 @@ def __init__(self, **kwargs):
3534
self.fsr = FailedSamplesRecord(self.kwargs['output_dir'],
3635
self.pipeline.sample_sheet.samples)
3736

38-
self.master_qiita_job_id = None
37+
self.master_qiita_job_id = self.kwargs['job_id']
3938

4039
self.lane_number = self.kwargs['lane_number']
4140
self.is_restart = bool(self.kwargs['is_restart'])
@@ -67,119 +66,3 @@ def determine_steps_to_skip(self):
6766
else:
6867
# work stopped before this job could be completed.
6968
rmtree(join(out_dir, directory))
70-
71-
def execute_pipeline(self):
72-
'''
73-
Executes steps of pipeline in proper sequence.
74-
:return: None
75-
'''
76-
if not self.is_restart:
77-
self.pre_check()
78-
79-
# this is performed even in the event of a restart.
80-
self.generate_special_map()
81-
82-
# even if a job is being skipped, it's being skipped because it was
83-
# determined that it already completed successfully. Hence,
84-
# increment the status because we are still iterating through them.
85-
86-
self.update_status("Converting data", 1, 9)
87-
if "ConvertJob" not in self.skip_steps:
88-
# converting raw data to fastq depends heavily on the instrument
89-
# used to generate the run_directory. Hence this method is
90-
# supplied by the instrument mixin.
91-
# NB: convert_raw_to_fastq() now generates fsr on its own.
92-
self.convert_raw_to_fastq()
93-
94-
self.update_status("Performing quality control", 2, 9)
95-
if "NuQCJob" not in self.skip_steps:
96-
# quality_control generates its own fsr now
97-
self.quality_control(self.pipeline)
98-
99-
self.update_status("Generating reports", 3, 9)
100-
if "FastQCJob" not in self.skip_steps:
101-
# reports are currently implemented by the assay mixin. This is
102-
# only because metagenomic runs currently require a failed-samples
103-
# report to be generated. This is not done for amplicon runs since
104-
# demultiplexing occurs downstream of SPP.
105-
results = self.generate_reports()
106-
self.fsr_write(results, 'FastQCJob')
107-
108-
self.update_status("Generating preps", 4, 9)
109-
if "GenPrepFileJob" not in self.skip_steps:
110-
# preps are currently associated with array mixin, but only
111-
# because there are currently some slight differences in how
112-
# FastQCJob gets instantiated(). This could get moved into a
113-
# shared method, but probably still in Assay.
114-
self.generate_prep_file()
115-
116-
# moved final component of genprepfilejob outside of object.
117-
# obtain the paths to the prep-files generated by GenPrepFileJob
118-
# w/out having to recover full state.
119-
tmp = join(self.pipeline.output_path, 'GenPrepFileJob', 'PrepFiles')
120-
121-
self.has_replicates = False
122-
123-
prep_paths = []
124-
self.prep_file_paths = {}
125-
126-
for root, dirs, files in walk(tmp):
127-
for _file in files:
128-
# breakup the prep-info-file into segments
129-
# (run-id, project_qid, other) and cleave
130-
# the qiita-id from the project_name.
131-
qid = _file.split('.')[1].split('_')[-1]
132-
133-
if qid not in self.prep_file_paths:
134-
self.prep_file_paths[qid] = []
135-
136-
_path = abspath(join(root, _file))
137-
if _path.endswith('.tsv'):
138-
prep_paths.append(_path)
139-
self.prep_file_paths[qid].append(_path)
140-
141-
for _dir in dirs:
142-
if _dir == '1':
143-
# if PrepFiles contains the '1' directory, then it's a
144-
# given that this sample-sheet contains replicates.
145-
self.has_replicates = True
146-
147-
# currently imported from Assay although it is a base method. it
148-
# could be imported into Workflows potentially, since it is a post-
149-
# processing step. All pairings of assay and instrument type need to
150-
# generate prep-info files in the same format.
151-
self.overwrite_prep_files(prep_paths)
152-
153-
# for now, simply re-run any line below as if it was a new job, even
154-
# for a restart. functionality is idempotent, except for the
155-
# registration of new preps in Qiita. These will simply be removed
156-
# manually.
157-
158-
# post-processing steps are by default associated with the Workflow
159-
# class, since they deal with fastq files and Qiita, and don't depend
160-
# on assay or instrument type.
161-
self.update_status("Generating sample information", 5, 9)
162-
self.sifs = self.generate_sifs()
163-
164-
# post-processing step.
165-
self.update_status("Registering blanks in Qiita", 6, 9)
166-
if self.update:
167-
self.update_blanks_in_qiita()
168-
169-
self.update_status("Loading preps into Qiita", 7, 9)
170-
if self.update:
171-
self.update_prep_templates()
172-
173-
# before we load preps into Qiita we need to copy the fastq
174-
# files n times for n preps and correct the file-paths each
175-
# prep is pointing to.
176-
self.load_preps_into_qiita()
177-
178-
self.fsr.generate_report()
179-
180-
self.update_status("Generating packaging commands", 8, 9)
181-
self.generate_commands()
182-
183-
self.update_status("Packaging results", 9, 9)
184-
if self.update:
185-
self.execute_commands()

0 commit comments

Comments
 (0)