1
1
# -*- coding: utf-8 -*-
2
2
"""Single and multi-threaded executors."""
3
3
import datetime
4
+ import functools
4
5
import logging
5
6
import math
6
7
import os
34
35
from .utils import CWLObjectType , JobsType
35
36
from .workflow import Workflow
36
37
from .workflow_job import WorkflowJob , WorkflowJobStep
38
+ from .task_queue import TaskQueue
39
+
37
40
38
41
TMPDIR_LOCK = Lock ()
39
42
@@ -277,7 +280,6 @@ class MultithreadedJobExecutor(JobExecutor):
277
280
def __init__ (self ) -> None :
278
281
"""Initialize."""
279
282
super (MultithreadedJobExecutor , self ).__init__ ()
280
- self .threads = set () # type: Set[threading.Thread]
281
283
self .exceptions = [] # type: List[WorkflowException]
282
284
self .pending_jobs = [] # type: List[JobsType]
283
285
self .pending_jobs_lock = threading .Lock ()
@@ -339,7 +341,6 @@ def _runner(self, job, runtime_context, TMPDIR_LOCK):
339
341
finally :
340
342
if runtime_context .workflow_eval_lock :
341
343
with runtime_context .workflow_eval_lock :
342
- self .threads .remove (threading .current_thread ())
343
344
if isinstance (job , JobBase ):
344
345
ram = job .builder .resources ["ram" ]
345
346
if not isinstance (ram , str ):
@@ -362,6 +363,10 @@ def run_job(
362
363
with self .pending_jobs_lock :
363
364
n = 0
364
365
while (n + 1 ) <= len (self .pending_jobs ):
366
+ # Simple greedy resource allocation strategy. Go
367
+ # through pending jobs in the order they were
368
+ # generated and add them to the queue only if there
369
+ # are resources available.
365
370
job = self .pending_jobs [n ]
366
371
if isinstance (job , JobBase ):
367
372
ram = job .builder .resources ["ram" ]
@@ -403,26 +408,24 @@ def run_job(
403
408
n += 1
404
409
continue
405
410
406
- thread = threading .Thread (
407
- target = self ._runner , args = (job , runtime_context , TMPDIR_LOCK )
408
- )
409
- thread .daemon = True
410
- self .threads .add (thread )
411
411
if isinstance (job , JobBase ):
412
412
ram = job .builder .resources ["ram" ]
413
413
if not isinstance (ram , str ):
414
414
self .allocated_ram += ram
415
415
cores = job .builder .resources ["cores" ]
416
416
if not isinstance (cores , str ):
417
417
self .allocated_cores += cores
418
- thread .start ()
418
+ self .taskqueue .add (
419
+ functools .partial (self ._runner , job , runtime_context , TMPDIR_LOCK ),
420
+ runtime_context .workflow_eval_lock ,
421
+ )
419
422
self .pending_jobs .remove (job )
420
423
421
424
def wait_for_next_completion (self , runtime_context ):
422
425
# type: (RuntimeContext) -> None
423
426
"""Wait for jobs to finish."""
424
427
if runtime_context .workflow_eval_lock is not None :
425
- runtime_context .workflow_eval_lock .wait ()
428
+ runtime_context .workflow_eval_lock .wait (timeout = 3 )
426
429
if self .exceptions :
427
430
raise self .exceptions [0 ]
428
431
@@ -434,36 +437,46 @@ def run_jobs(
434
437
runtime_context : RuntimeContext ,
435
438
) -> None :
436
439
437
- jobiter = process .job (job_order_object , self .output_callback , runtime_context )
440
+ self .taskqueue = TaskQueue (
441
+ threading .Lock (), psutil .cpu_count ()
442
+ ) # type: TaskQueue
443
+ try :
438
444
439
- if runtime_context .workflow_eval_lock is None :
440
- raise WorkflowException (
441
- "runtimeContext.workflow_eval_lock must not be None"
445
+ jobiter = process .job (
446
+ job_order_object , self .output_callback , runtime_context
442
447
)
443
448
444
- runtime_context .workflow_eval_lock .acquire ()
445
- for job in jobiter :
446
- if job is not None :
447
- if isinstance (job , JobBase ):
448
- job .builder = runtime_context .builder or job .builder
449
- if job .outdir is not None :
450
- self .output_dirs .add (job .outdir )
449
+ if runtime_context .workflow_eval_lock is None :
450
+ raise WorkflowException (
451
+ "runtimeContext.workflow_eval_lock must not be None"
452
+ )
451
453
452
- self .run_job (job , runtime_context )
454
+ runtime_context .workflow_eval_lock .acquire ()
455
+ for job in jobiter :
456
+ if job is not None :
457
+ if isinstance (job , JobBase ):
458
+ job .builder = runtime_context .builder or job .builder
459
+ if job .outdir is not None :
460
+ self .output_dirs .add (job .outdir )
453
461
454
- if job is None :
455
- if self .threads :
456
- self .wait_for_next_completion (runtime_context )
457
- else :
458
- logger .error ("Workflow cannot make any more progress." )
459
- break
462
+ self .run_job (job , runtime_context )
463
+
464
+ if job is None :
465
+ if self .taskqueue .in_flight > 0 :
466
+ self .wait_for_next_completion (runtime_context )
467
+ else :
468
+ logger .error ("Workflow cannot make any more progress." )
469
+ break
460
470
461
- self .run_job (None , runtime_context )
462
- while self .threads :
463
- self .wait_for_next_completion (runtime_context )
464
471
self .run_job (None , runtime_context )
472
+ while self .taskqueue .in_flight > 0 :
473
+ self .wait_for_next_completion (runtime_context )
474
+ self .run_job (None , runtime_context )
465
475
466
- runtime_context .workflow_eval_lock .release ()
476
+ runtime_context .workflow_eval_lock .release ()
477
+ finally :
478
+ self .taskqueue .drain ()
479
+ self .taskqueue .join ()
467
480
468
481
469
482
class NoopJobExecutor (JobExecutor ):
0 commit comments