@@ -48,6 +48,7 @@ def run_and_poll(*args, **kwargs):
48
48
:param str log_path: the location to store the log in (if not output path).
49
49
:param resume: whether to resume an existing execution.
50
50
:param function runner: a function to run the pipeline command.
51
+ :param io: an optional custom io object to handle file operations.
51
52
:param str version: the nextflow version to use.
52
53
:param list configs: any config files to be applied.
53
54
:param dict params: the parameters to pass.
@@ -66,16 +67,17 @@ def run_and_poll(*args, **kwargs):
66
67
67
68
def _run (
68
69
pipeline_path , resume = False , poll = False , run_path = None , output_path = None ,
69
- log_path = None ,runner = None ,
70
+ log_path = None , runner = None , io = None ,
70
71
version = None , configs = None , params = None , profiles = None , timezone = None ,
71
72
report = None , timeline = None , dag = None , trace = None , sleep = 1
72
73
):
73
- if not run_path : run_path = os .path .abspath ("." )
74
+ if not run_path and not io : run_path = os .path .abspath ("." )
75
+ if not run_path and io : run_path = io .abspath ("." )
74
76
if not output_path : output_path = run_path
75
77
if not log_path : log_path = output_path
76
78
nextflow_command = make_nextflow_command (
77
79
run_path , output_path , log_path , pipeline_path , resume , version , configs ,
78
- params , profiles , timezone , report , timeline , dag , trace
80
+ params , profiles , timezone , report , timeline , dag , trace , io
79
81
)
80
82
start = datetime .now ()
81
83
if runner :
@@ -86,11 +88,11 @@ def _run(
86
88
nextflow_command , universal_newlines = True , shell = True
87
89
)
88
90
execution , log_start = None , 0
89
- if resume : wait_for_log_creation (log_path , start )
91
+ if resume : wait_for_log_creation (log_path , start , io )
90
92
while True :
91
93
time .sleep (sleep )
92
94
execution , diff = get_execution (
93
- output_path , log_path , nextflow_command , execution , log_start , timezone
95
+ output_path , log_path , nextflow_command , execution , log_start , timezone , io
94
96
)
95
97
log_start += diff
96
98
if execution and poll : yield execution
@@ -100,7 +102,7 @@ def _run(
100
102
break
101
103
102
104
103
- def make_nextflow_command (run_path , output_path , log_path , pipeline_path , resume ,version , configs , params , profiles , timezone , report , timeline , dag , trace ):
105
+ def make_nextflow_command (run_path , output_path , log_path , pipeline_path , resume ,version , configs , params , profiles , timezone , report , timeline , dag , trace , io ):
104
106
"""Generates the `nextflow run` commmand.
105
107
106
108
:param str run_path: the location to run the pipeline in.
@@ -117,6 +119,7 @@ def make_nextflow_command(run_path, output_path, log_path, pipeline_path, resume
117
119
:param str timeline: the filename to use for the timeline report.
118
120
:param str dag: the filename to use for the DAG report.
119
121
:param str trace: the filename to use for the trace report.
122
+ :param io: an optional custom io object to handle file operations.
120
123
:rtype: ``str``"""
121
124
122
125
env = make_nextflow_command_env_string (version , timezone , output_path , run_path )
@@ -132,7 +135,8 @@ def make_nextflow_command(run_path, output_path, log_path, pipeline_path, resume
132
135
profiles = make_nextflow_command_profiles_string (profiles )
133
136
reports = make_reports_string (output_path , report , timeline , dag , trace )
134
137
command = f"{ env } { nf } { log } { configs } run { pipeline_path } { resume } { params } { profiles } { reports } "
135
- if run_path != os .path .abspath ("." ): command = f"cd { run_path } ; { command } "
138
+ abspath = io .abspath if io else os .path .abspath
139
+ if run_path != abspath ("." ): command = f"cd { run_path } ; { command } "
136
140
prefix = (str (output_path ) + os .path .sep ) if output_path != run_path else ""
137
141
command = command .rstrip () + f" >{ prefix } "
138
142
command += f"stdout.txt 2>{ prefix } "
@@ -239,19 +243,20 @@ def make_reports_string(output_path, report, timeline, dag, trace):
239
243
return " " .join (params )
240
244
241
245
242
- def wait_for_log_creation (output_path , start ):
246
+ def wait_for_log_creation (output_path , start , io ):
243
247
"""Waits for a log file for this execution to be created.
244
248
245
249
:param str output_path: the location to store the output in.
246
- :param datetime start: the start time."""
250
+ :param datetime start: the start time.
251
+ :param io: an optional custom io object to handle file operations."""
247
252
248
253
while True :
249
- created = get_file_creation_time (os .path .join (output_path , ".nextflow.log" ))
254
+ created = get_file_creation_time (os .path .join (output_path , ".nextflow.log" ), io )
250
255
if created and created > start : break
251
256
time .sleep (0.1 )
252
257
253
258
254
- def get_execution (execution_path , log_path , nextflow_command , execution = None , log_start = 0 , timezone = None ):
259
+ def get_execution (execution_path , log_path , nextflow_command , execution = None , log_start = 0 , timezone = None , io = None ):
255
260
"""Creates an execution object from a location. If you are polling, you can
256
261
pass in the previous execution to update it with new information.
257
262
@@ -260,33 +265,36 @@ def get_execution(execution_path, log_path, nextflow_command, execution=None, lo
260
265
:param str nextflow_command: the command used to run the pipeline.
261
266
:param nextflow.models.Execution execution: the existing execution, if any.
262
267
:param int log_start: the number of lines already read from the log.
268
+ :param str timezone: the timezone to use for the log.
269
+ :param io: an optional custom io object to handle file operations.
263
270
:rtype: ``nextflow.models.Execution``"""
264
271
265
- log = get_file_text (os .path .join (log_path , ".nextflow.log" ))
272
+ log = get_file_text (os .path .join (log_path , ".nextflow.log" ), io )
266
273
if not log : return None , 0
267
274
log = log [log_start :]
268
- execution = make_or_update_execution (log , execution_path , nextflow_command , execution )
269
- process_executions , changed = get_initial_process_executions (log , execution )
275
+ execution = make_or_update_execution (log , execution_path , nextflow_command , execution , io )
276
+ process_executions , changed = get_initial_process_executions (log , execution , io )
270
277
no_path = [k for k , v in process_executions .items () if not v .path ]
271
- process_ids_to_paths = get_process_ids_to_paths (no_path , execution_path )
278
+ process_ids_to_paths = get_process_ids_to_paths (no_path , execution_path , io )
272
279
for process_id , path in process_ids_to_paths .items ():
273
280
process_executions [process_id ].path = path
274
281
for process_execution in process_executions .values ():
275
282
if not process_execution .finished or not process_execution .started or \
276
283
process_execution .identifier in changed :
277
- update_process_execution_from_path (process_execution , execution_path , timezone )
284
+ update_process_execution_from_path (process_execution , execution_path , timezone , io )
278
285
execution .process_executions = list (process_executions .values ())
279
286
return execution , len (log )
280
287
281
288
282
- def make_or_update_execution (log , execution_path , nextflow_command , execution ):
289
+ def make_or_update_execution (log , execution_path , nextflow_command , execution , io ):
283
290
"""Creates an Execution object from a log file, or updates an existing one
284
291
from a previous poll.
285
292
286
293
:param str log: a section of the log file.
287
294
:param str execution_path: the location of the execution.
288
295
:param str nextflow_command: the command used to run the pipeline.
289
296
:param nextflow.models.Execution execution: the existing execution.
297
+ :param io: an optional custom io object to handle file operations.
290
298
:rtype: ``nextflow.models.Execution``"""
291
299
292
300
if not execution :
@@ -303,13 +311,13 @@ def make_or_update_execution(log, execution_path, nextflow_command, execution):
303
311
if not execution .finished : execution .finished = get_finished_from_log (log )
304
312
if not execution .session_uuid : execution .session_uuid = get_session_uuid_from_log (log )
305
313
execution .log += log
306
- execution .stdout = get_file_text (os .path .join (execution_path , "stdout.txt" ))
307
- execution .stderr = get_file_text (os .path .join (execution_path , "stderr.txt" ))
308
- execution .return_code = get_file_text (os .path .join (execution_path , "rc.txt" )).rstrip ()
314
+ execution .stdout = get_file_text (os .path .join (execution_path , "stdout.txt" ), io )
315
+ execution .stderr = get_file_text (os .path .join (execution_path , "stderr.txt" ), io )
316
+ execution .return_code = get_file_text (os .path .join (execution_path , "rc.txt" ), io ).rstrip ()
309
317
return execution
310
318
311
319
312
- def get_initial_process_executions (log , execution ):
320
+ def get_initial_process_executions (log , execution , io ):
313
321
"""Parses a section of a log file and looks for new process executions not
314
322
currently in the list, or uncompleted ones which can now be completed. Some
315
323
attributes are not yet filled in.
@@ -318,6 +326,7 @@ def get_initial_process_executions(log, execution):
318
326
319
327
:param str log: a section of the log file.
320
328
:param nextflow.models.Execution execution: the containing execution.
329
+ :param io: an optional custom io object to handle file operations.
321
330
:rtype: ``tuple``"""
322
331
323
332
lines = log .splitlines ()
@@ -326,7 +335,7 @@ def get_initial_process_executions(log, execution):
326
335
for line in lines :
327
336
if "Submitted process" in line or "Cached process" in line :
328
337
is_cached = "Cached process" in line
329
- proc_ex = create_process_execution_from_line (line , is_cached )
338
+ proc_ex = create_process_execution_from_line (line , is_cached , io )
330
339
if not proc_ex : continue
331
340
proc_ex .execution = execution
332
341
process_executions [proc_ex .identifier ] = proc_ex
@@ -338,12 +347,13 @@ def get_initial_process_executions(log, execution):
338
347
return process_executions , just_updated
339
348
340
349
341
- def create_process_execution_from_line (line , cached = False ):
350
+ def create_process_execution_from_line (line , cached = False , io = None ):
342
351
"""Creates a process execution from a line of the log file in which its
343
352
submission (or previous caching) is reported.
344
353
345
354
:param str line: a line from the log file.
346
355
:param bool cached: whether the process is cached.
356
+ :param io: an optional custom io object to handle file operations.
347
357
:rtype: ``nextflow.models.ProcessExecution``"""
348
358
349
359
if cached :
@@ -357,7 +367,7 @@ def create_process_execution_from_line(line, cached=False):
357
367
path = "" , stdout = "" , stderr = "" , bash = "" , started = None , finished = None ,
358
368
return_code = "0" if cached else "" ,
359
369
status = "COMPLETED" if cached else "-" ,
360
- cached = cached
370
+ cached = cached , io = io
361
371
)
362
372
363
373
@@ -380,20 +390,22 @@ def update_process_execution_from_line(process_executions, line):
380
390
return identifier
381
391
382
392
383
- def update_process_execution_from_path (process_execution , execution_path , timezone = None ):
393
+ def update_process_execution_from_path (process_execution , execution_path , timezone = None , io = None ):
384
394
"""Some attributes of a process execution need to be obtained from files on
385
395
disk. This function updates the process execution with these values.
386
396
387
397
:param nextflow.models.ProcessExecution process_execution: the process execution.
388
- :param str execution_path: the location of the containing execution."""
398
+ :param str execution_path: the location of the containing execution.
399
+ :param str timezone: the timezone to use for the log.
400
+ :param io: an optional custom io object to handle file operations."""
389
401
390
402
if not process_execution .path : return
391
403
full_path = os .path .join (execution_path , "work" , process_execution .path )
392
- process_execution .stdout = get_file_text (os .path .join (full_path , ".command.out" ))
393
- process_execution .stderr = get_file_text (os .path .join (full_path , ".command.err" ))
404
+ process_execution .stdout = get_file_text (os .path .join (full_path , ".command.out" ), io )
405
+ process_execution .stderr = get_file_text (os .path .join (full_path , ".command.err" ), io )
394
406
if not process_execution .started and not process_execution .cached :
395
- process_execution .started = get_file_creation_time (os .path .join (full_path , ".command.begin" ), timezone )
407
+ process_execution .started = get_file_creation_time (os .path .join (full_path , ".command.begin" ), timezone , io )
396
408
if not process_execution .bash :
397
- process_execution .bash = get_file_text (os .path .join (full_path , ".command.sh" ))
409
+ process_execution .bash = get_file_text (os .path .join (full_path , ".command.sh" ), io )
398
410
if process_execution .execution .finished and not process_execution .return_code :
399
411
process_execution .return_code = process_execution .execution .return_code
0 commit comments