Skip to content

Commit

Permalink
Replace reissued jobs message with better progress indicators (#3044)
Browse files Browse the repository at this point in the history
* Only talk about over long jobs if there actually are any

* Add a periodic overall status message

This can replace the reissued over long jobs message's function as a way of
marking time passing with nothing much happening. Plus it's more informative.

* Start doing exactly what Apt does for its progress bars

* Use Enlighten instead and try a running bar

* Set up a sensible progress bar and an off switch

I've removed the option to configure the scheduling status frequency for now on
the theory that nobody actually wants it.
  • Loading branch information
adamnovak authored Apr 29, 2020
1 parent b523cc4 commit 8b45f1a
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 58 deletions.
3 changes: 3 additions & 0 deletions docs/running/cliOptions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,9 @@ the logging module:
--debugWorker Experimental no forking mode for local debugging.
Specifically, workers are not forked and stderr/stdout
are not redirected to the log. (default=False)
--disableProgress Disables the progress bar shown when standard error is
a terminal.


Restart Option
--------------
Expand Down
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def runSetup():
dateutil = 'python-dateutil'
addict = 'addict<=2.2.0'
pathlib2 = 'pathlib2==2.3.2'
enlighten = 'enlighten>=1.5.1, <2'

core_reqs = [
dill,
Expand All @@ -55,7 +56,8 @@ def runSetup():
psutil,
addict,
pathlib2,
pytz]
pytz,
enlighten]

aws_reqs = [
boto,
Expand Down
11 changes: 7 additions & 4 deletions src/toil/batchSystems/abstractBatchSystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
class BatchJobExitReason(enum.Enum):
FINISHED = 1 # Successfully finished.
FAILED = 2 # Job finished, but failed.
LOST = 3 # Preemptable failure.
LOST = 3 # Preemptable failure (job's executing host went away).
KILLED = 4 # Job killed before finishing.
ERROR = 5 # Internal error.

Expand Down Expand Up @@ -130,7 +130,8 @@ def issueBatchJob(self, jobNode):
def killBatchJobs(self, jobIDs):
"""
Kills the given job IDs. After returning, the killed jobs will not
appear in the results of getRunningBatchJobIDs.
appear in the results of getRunningBatchJobIDs. The killed job will not
be returned from getUpdatedBatchJob.
:param jobIDs: list of IDs of jobs to kill
:type jobIDs: list[int]
Expand Down Expand Up @@ -169,15 +170,17 @@ def getUpdatedBatchJob(self, maxWait):
Returns information about job that has updated its status (i.e. ceased
running, either successfully or with an error). Each such job will be
returned exactly once.
Does not return info for jobs killed by killBatchJobs, although they
may cause None to be returned earlier than maxWait.
:param float maxWait: the number of seconds to block, waiting for a result
:rtype: UpdatedBatchJobInfo or None
:return: If a result is available, returns UpdatedBatchJobInfo.
Otherwise it returns None. wallTime is the number of seconds (a strictly
positive float) in wall-clock time the job ran for, or None if this
batch system does not support tracking wall time. Returns None for jobs
that were killed.
batch system does not support tracking wall time.
"""
raise NotImplementedError()

Expand Down
5 changes: 5 additions & 0 deletions src/toil/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ def __init__(self):
self.useAsync = True
self.forceDockerAppliance = False
self.runCwlInternalJobsOnWorkers = False
self.statusWait = 3600
self.disableProgress = False

# Debug options
self.debugWorker = False
Expand Down Expand Up @@ -279,6 +281,7 @@ def parseIntList(s):
setOption("writeLogsGzip")
setOption("writeLogsFromAllJobs")
setOption("runCwlInternalJobsOnWorkers")
setOption("disableProgress")

assert not (self.writeLogs and self.writeLogsGzip), \
"Cannot use both --writeLogs and --writeLogsGzip at the same time."
Expand Down Expand Up @@ -603,6 +606,8 @@ def _addOptions(addGroupFn, config):
default=False,
help='Disables sanity checking the existence of the docker image specified by '
'TOIL_APPLIANCE_SELF, which Toil uses to provision mesos for autoscaling.')
addOptionFn('--disableProgress', dest='disableProgress', action='store_true', default=False,
help="Disables the progress bar shown when standard error is a terminal.")
#
# Debug options
#
Expand Down
Loading

0 comments on commit 8b45f1a

Please sign in to comment.