12
12
from qp_klp .Metagenomic import Metagenomic
13
13
from qp_klp .Step import Step
14
14
from os import makedirs
15
- from os .path import join
15
+ from os .path import join , split , exists
16
16
from sequence_processing_pipeline .Pipeline import Pipeline
17
17
from sequence_processing_pipeline .PipelineError import PipelineError
18
+ from sequence_processing_pipeline .ConvertJob import ConvertJob
19
+ from metapool import load_sample_sheet
18
20
19
21
20
22
CONFIG_FP = environ ["QP_KLP_CONFIG_FP" ]
@@ -70,11 +72,46 @@ def sequence_processing_pipeline(qclient, job_id, parameters, out_dir):
70
72
bool, list, str
71
73
The results of the job
72
74
"""
73
- # available fields for parameters are:
74
- # run_identifier, sample_sheet, content_type, filename, lane_number
75
- run_identifier = parameters .pop ('run_identifier' )
76
- user_input_file = parameters .pop ('sample_sheet' )
77
- lane_number = parameters .pop ('lane_number' )
75
+ # Assume that for a job to be considered a restart, there must be work
76
+ # performed worth re-starting for. Since the working directory for each
77
+ # step is created only if the previous steps were successful, testing
78
+ # for the presence of them ensures that n-1 steps exist and were
79
+ # successful.
80
+
81
+ # at minimum, ConvertJob needs to have been successful.
82
+ is_restart = True if exists (join (out_dir , 'NuQCJob' )) else False
83
+
84
+ if is_restart :
85
+ # Assume ConvertJob directory exists and parse the job-script found
86
+ # there. If this is a restart, we won't be given the run-identifier,
87
+ # the lane number, and the sample-sheet as input parameters.
88
+ some_path = join (out_dir , 'ConvertJob' , 'ConvertJob.sh' )
89
+ result = ConvertJob .parse_job_script (some_path )
90
+ run_identifier = split (result ['out_directory' ])[- 1 ]
91
+ user_input_file = result ['sample_sheet_path' ]
92
+ sheet = load_sample_sheet (user_input_file )
93
+ # on Amplicon runs, lane_number is always 1, and this will be
94
+ # properly reflected in the dummy sample-sheet as well.
95
+ lane_number = sheet .get_lane_number ()
96
+
97
+ # check if sample-sheet is a dummy-sample-sheet. If this is an
98
+ # Amplicon run, then Assay type will be 'TruSeq HT' and Chemistry
99
+ # will be 'Amplicon'. For now, raise Error on restarting an
100
+ # Amplicon run so we don't have to search for the pre-prep file.
101
+ if sheet .Header ['Assay' ] == 'TruSeq HT' and \
102
+ sheet .Header ['Chemistry' ] == 'Amplicon' :
103
+ raise ValueError ("Restarting Amplicon jobs currently unsupported" )
104
+
105
+ # add a note for the wetlab that this job was restarted.
106
+ with open (join (out_dir , 'notes.txt' ), 'w' ) as f :
107
+ f .write ("This job was restarted.\n "
108
+ "failed_samples.html may contain incorrect data.\n " )
109
+ else :
110
+ # available fields for parameters are:
111
+ # run_identifier, sample_sheet, content_type, filename, lane_number
112
+ run_identifier = parameters .pop ('run_identifier' )
113
+ user_input_file = parameters .pop ('sample_sheet' )
114
+ lane_number = parameters .pop ('lane_number' )
78
115
79
116
if {'body' , 'content_type' , 'filename' } != set (user_input_file ):
80
117
return False , None , ("This doesn't appear to be a valid sample sheet "
@@ -86,19 +123,14 @@ def sequence_processing_pipeline(qclient, job_id, parameters, out_dir):
86
123
# replace any whitespace in the filename with underscores
87
124
uif_path = out_path (user_input_file ['filename' ].replace (' ' , '_' ))
88
125
89
- # save raw data to file
90
- with open (uif_path , 'w' ) as f :
91
- f .write (user_input_file ['body' ])
126
+ if is_restart :
127
+ pass
128
+ else :
129
+ # save raw data to file
130
+ with open (uif_path , 'w' ) as f :
131
+ f .write (user_input_file ['body' ])
92
132
93
133
if Pipeline .is_sample_sheet (uif_path ):
94
- # if file follows basic sample-sheet format, then it is most likely
95
- # a sample-sheet, even if it's an invalid one.
96
-
97
- # a valid sample-sheet is going to have one and only one occurrence of
98
- # 'Assay,Metagenomic' or 'Assay,Metatranscriptomic'. Anything else is
99
- # an error.
100
-
101
- # works best from file
102
134
with open (uif_path , 'r' ) as f :
103
135
assay = [x for x in f .readlines () if 'Assay' in x ]
104
136
@@ -141,6 +173,21 @@ def sequence_processing_pipeline(qclient, job_id, parameters, out_dir):
141
173
status_line = StatusUpdate (qclient , job_id , msgs )
142
174
status_line .update_current_message ()
143
175
176
+ skip_steps = []
177
+ if is_restart :
178
+ # figure out what actually needs to be skipped if restarting:
179
+ if exists (join (out_dir , 'NuQCJob' )):
180
+ skip_steps .append ('ConvertJob' )
181
+
182
+ if exists (join (out_dir , 'FastQCJob' )):
183
+ skip_steps .append ('NuQCJob' )
184
+
185
+ if exists (join (out_dir , 'GenPrepFileJob' )):
186
+ skip_steps .append ('FastQCJob' )
187
+
188
+ if exists (join (out_dir , 'cmds.log' )):
189
+ skip_steps .append ('GenPrepFileJob' )
190
+
144
191
try :
145
192
pipeline = Step .generate_pipeline (pipeline_type ,
146
193
uif_path ,
@@ -157,10 +204,12 @@ def sequence_processing_pipeline(qclient, job_id, parameters, out_dir):
157
204
try :
158
205
if pipeline .pipeline_type in Step .META_TYPES :
159
206
step = Metagenomic (
160
- pipeline , job_id , status_line , lane_number )
207
+ pipeline , job_id , status_line , lane_number ,
208
+ is_restart = is_restart )
161
209
else :
162
210
step = Amplicon (
163
- pipeline , job_id , status_line , lane_number )
211
+ pipeline , job_id , status_line , lane_number ,
212
+ is_restart = is_restart )
164
213
165
214
status_line .update_current_message ()
166
215
@@ -170,7 +219,8 @@ def sequence_processing_pipeline(qclient, job_id, parameters, out_dir):
170
219
# files into uploads directory. Useful for testing.
171
220
step .execute_pipeline (qclient ,
172
221
status_line .update_current_message ,
173
- update = True )
222
+ update = True ,
223
+ skip_steps = skip_steps )
174
224
175
225
status_line .update_current_message ()
176
226
0 commit comments