Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support CWL 1.2.1 #4682

Merged
merged 27 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
0fe1551
cwl: use the latest commit from the proposed CWL v1.2.1 branch
mr-c Aug 16, 2023
9654d5f
Merge branch 'cwl_test_cwl_1.2.1' of github.com:DataBiosphere/toil in…
stxue1 Sep 26, 2023
9bbf27a
Double default CWL conformance test timeout
stxue1 Sep 26, 2023
562490f
Merge branch 'master' of github.com:DataBiosphere/toil into issues/45…
stxue1 Nov 1, 2023
44563f6
Support abs path for directory outputs
stxue1 Nov 2, 2023
3f45b76
Merge branch 'master' of github.com:DataBiosphere/toil into issues/45…
stxue1 Nov 2, 2023
fb713c2
Better comment for why local paths are permitted
stxue1 Nov 3, 2023
1e27125
Pass remaining CWL 1.2.1 tests
stxue1 Nov 3, 2023
8b36387
Update cwl 1.2.1 conformance tests to latest commit
stxue1 Nov 3, 2023
e9cbcf4
mypy types
stxue1 Nov 4, 2023
be6b994
Change expected output of cwl tests to match cwltool
stxue1 Nov 9, 2023
ff3b8e6
Fix rest of hardcoded cwl tests
stxue1 Nov 10, 2023
a433670
Merge branch 'master' of github.com:DataBiosphere/toil into issues/45…
stxue1 Nov 10, 2023
6ae4546
Fix nested test values
stxue1 Nov 11, 2023
bb219b2
Remove format function in favor of adding staging step, add relax-pat…
stxue1 Nov 14, 2023
8357c15
Merge branch 'master' into issues/4570-support-cwl-121
mr-c Nov 15, 2023
f84b94a
Merge branch 'master' of github.com:DataBiosphere/toil into issues/45…
stxue1 Nov 15, 2023
ef1864d
Merge branch 'issues/4570-support-cwl-121' of github.com:DataBiospher…
stxue1 Nov 15, 2023
868ad90
Add --bypass-file-store
stxue1 Nov 15, 2023
e18b003
undo --bypass-file-store
stxue1 Nov 16, 2023
1296f28
Merge branch 'master' into issues/4570-support-cwl-121
stxue1 Nov 16, 2023
4c714da
run make format
stxue1 Nov 16, 2023
f561514
Merge branch 'master' of github.com:DataBiosphere/toil into issues/45…
stxue1 Nov 16, 2023
419d28d
Merge branch 'issues/4570-support-cwl-121' of github.com:DataBiospher…
stxue1 Nov 16, 2023
b6c695b
Merge branch 'master' into issues/4570-support-cwl-121
stxue1 Nov 30, 2023
4560213
Merge branch 'master' of github.com:DataBiosphere/toil into issues/45…
stxue1 Dec 4, 2023
cfa3f63
Fix CWL glob test output
stxue1 Dec 4, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 5 additions & 8 deletions src/toil/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
Union,
cast,
overload)
from urllib.parse import urlparse
from urllib.parse import urlparse, unquote, quote

import requests
from configargparse import ArgParser, YAMLConfigFileParser
Expand Down Expand Up @@ -2167,10 +2167,8 @@ def normalize_uri(uri: str, check_existence: bool = False) -> str:
:param check_existence: If set, raise FileNotFoundError if a URI points to
a local file that does not exist.
"""
if urlparse(uri).scheme == "file":
uri = urlparse(
uri
).path # this should strip off the local file scheme; it will be added back
if urlparse(uri).scheme == 'file':
uri = unquote(urlparse(uri).path) # this should strip off the local file scheme; it will be added back

# account for the scheme-less case, which should be coerced to a local absolute path
if urlparse(uri).scheme == "":
Expand All @@ -2179,9 +2177,8 @@ def normalize_uri(uri: str, check_existence: bool = False) -> str:
raise FileNotFoundError(
f'Could not find local file "{abs_path}" when importing "{uri}".\n'
f'Make sure paths are relative to "{os.getcwd()}" or use absolute paths.\n'
f"If this is not a local file, please include the scheme (s3:/, gs:/, ftp://, etc.)."
)
return f"file://{abs_path}"
f'If this is not a local file, please include the scheme (s3:/, gs:/, ftp://, etc.).')
return f'file://{quote(abs_path)}'
return uri

def _setBatchSystemEnvVars(self) -> None:
Expand Down
25 changes: 20 additions & 5 deletions src/toil/cwl/cwltoil.py
stxue1 marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
TypeVar,
Union,
cast,
Sequence,
)
from urllib.parse import quote, unquote, urlparse, urlsplit

Expand Down Expand Up @@ -689,6 +690,8 @@ def __init__(
streaming on, and returns a file: URI to where the file or
directory has been downloaded to. Meant to be a partially-bound
version of toil_get_file().
:param referenced_files: List of CWL File and Directory objects, which can have their locations set as both
virtualized and absolute local paths
"""
self.get_file = get_file
self.stage_listing = stage_listing
Expand All @@ -710,8 +713,9 @@ def visit(
This is called on each File or Directory CWL object. The Files and
Directories all have "location" fields. For the Files, these are from
upload_file(), and for the Directories, these are from
upload_directory(), with their children being assigned
locations based on listing the Directories using ToilFsAccess.
upload_directory() or cwltool internally. With upload_directory(), they and their children will be assigned
locations based on listing the Directories using ToilFsAccess. With cwltool, locations will be set as absolute
paths.

:param obj: The CWL File or Directory to process

Expand Down Expand Up @@ -842,6 +846,14 @@ def visit(
# We can't really make the directory. Maybe we are
# exporting from the leader and it doesn't matter.
resolved = location
elif location.startswith("/"):
# Test if path is an absolute local path
# Does not check if the path is relative
# While Toil encodes paths into a URL with ToilPathMapper,
# something called internally in cwltool may return an absolute path
# ex: if cwltool calls itself internally in command_line_tool.py,
# it collects outputs with collect_output, and revmap_file will use its own internal pathmapper
resolved = location
else:
raise RuntimeError("Unsupported location: " + location)

Expand Down Expand Up @@ -918,7 +930,6 @@ def visit(
)
else:
deref = ab

if deref.startswith("file:"):
deref = schema_salad.ref_resolver.uri_file_path(deref)
if urlsplit(deref).scheme in ["http", "https"]:
Expand Down Expand Up @@ -2075,7 +2086,7 @@ def _realpath(
"WritableDirectory",
]:
os.makedirs(p.target)
if not os.path.exists(p.target) and p.type in ["File", "WritableFile"]:
if p.type in ["File", "WritableFile"]:
if p.resolved.startswith("toilfile:"):
# We can actually export this
os.makedirs(os.path.dirname(p.target), exist_ok=True)
Expand All @@ -2088,7 +2099,7 @@ def _realpath(
os.makedirs(os.path.dirname(p.target), exist_ok=True)
shutil.copyfile(p.resolved, p.target)
# TODO: can a toildir: "file" get here?
if not os.path.exists(p.target) and p.type in [
if p.type in [
"CreateFile",
"CreateWritableFile",
]:
Expand All @@ -2111,6 +2122,7 @@ def _check_adjust(f: CWLObjectType) -> CWLObjectType:
# Make the location point to the place we put this thing on the
# local filesystem.
f["location"] = schema_salad.ref_resolver.file_uri(mapped_location.target)
f["path"] = mapped_location.target

if "contents" in f:
del f["contents"]
Expand Down Expand Up @@ -3214,6 +3226,8 @@ def determine_load_listing(
class NoAvailableJobStoreException(Exception):
"""Indicates that no job store name is available."""

pass


def generate_default_job_store(
batch_system_name: Optional[str],
Expand Down Expand Up @@ -3307,6 +3321,7 @@ def add_cwl_options(parser: argparse.ArgumentParser) -> None:
'For example: "%(prog)s workflow.cwl --file1 file". '
"If an input has the same name as a Toil option, pass '--' before it.",
)

parser.add_argument("--not-strict", action="store_true")
parser.add_argument(
"--enable-dev",
Expand Down
13 changes: 7 additions & 6 deletions src/toil/jobStores/fileJobStore.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,13 +317,14 @@ def _import_file(self, otherCls, uri, shared_file_name=None, hardlink=False, sym
# symlink argument says whether the caller can take symlinks or not
# ex: if false, it implies the workflow cannot work with symlinks and thus will hardlink imports
# default is true since symlinking everything is ideal
uri_path = unquote(uri.path)
if issubclass(otherCls, FileJobStore):
if os.path.isdir(uri.path):
if os.path.isdir(uri_path):
# Don't allow directories (unless someone is racing us)
raise IsADirectoryError(f"URI {uri} points to a directory but a file was expected")
if shared_file_name is None:
executable = os.stat(uri.path).st_mode & stat.S_IXUSR != 0
absPath = self._get_unique_file_path(uri.path) # use this to get a valid path to write to in job store
executable = os.stat(uri_path).st_mode & stat.S_IXUSR != 0
absPath = self._get_unique_file_path(uri_path) # use this to get a valid path to write to in job store
with self.optional_hard_copy(hardlink):
self._copy_or_link(uri, absPath, symlink=symlink)
# TODO: os.stat(absPath).st_size consistently gives values lower than
Expand Down Expand Up @@ -819,7 +820,7 @@ def _get_file_path_from_id(self, jobStoreFileID):
"""

# We just make the file IDs paths under the job store overall.
absPath = os.path.join(self.jobStoreDir, jobStoreFileID)
absPath = os.path.join(self.jobStoreDir, unquote(jobStoreFileID))

# Don't validate here, we are called by the validation logic

Expand All @@ -832,14 +833,14 @@ def _get_file_id_from_path(self, absPath):
:rtype : string, string is the file ID.
"""

return absPath[len(self.jobStoreDir)+1:]
return quote(absPath[len(self.jobStoreDir)+1:])

def _check_job_store_file_id(self, jobStoreFileID):
"""
:raise NoSuchFileException: if the file with ID jobStoreFileID does
not exist or is not a file
"""
if not self.file_exists(jobStoreFileID):
if not self.file_exists(unquote(jobStoreFileID)):
raise NoSuchFileException(jobStoreFileID)

def _get_arbitrary_jobs_dir_for_name(self, jobNameSlug):
Expand Down
38 changes: 28 additions & 10 deletions src/toil/test/cwl/cwlTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
from toil.test.provisioners.clusterTest import AbstractClusterTest

log = logging.getLogger(__name__)
CONFORMANCE_TEST_TIMEOUT = 5000
CONFORMANCE_TEST_TIMEOUT = 10000


def run_conformance_tests(
Expand Down Expand Up @@ -134,10 +134,10 @@ def run_conformance_tests(
"--logDebug",
"--statusWait=10",
"--retryCount=2",
"--relax-path-checks",
f"--caching={caching}"
]

args_passed_directly_to_runner.append(f"--caching={caching}")

if extra_args:
args_passed_directly_to_runner += extra_args

Expand Down Expand Up @@ -385,6 +385,7 @@ def test_run_colon_output(self):
)

def test_glob_dir_bypass_file_store(self):
self.maxDiff = 1000
try:
# We need to output to the current directory to make sure that
# works.
Expand Down Expand Up @@ -609,10 +610,12 @@ def test_preemptible_expression(self):

@staticmethod
def _expected_seqtk_output(outDir):
loc = "file://" + os.path.join(outDir, "out")
path = os.path.join(outDir, "out")
loc = "file://" + path
return {
"output1": {
"location": loc,
"path": path,
"checksum": "sha1$322e001e5a99f19abdce9f02ad0f02a17b5066c2",
"basename": "out",
"class": "File",
Expand All @@ -622,10 +625,12 @@ def _expected_seqtk_output(outDir):

@staticmethod
def _expected_revsort_output(outDir):
loc = "file://" + os.path.join(outDir, "output.txt")
path = os.path.join(outDir, "output.txt")
loc = "file://" + path
return {
"output": {
"location": loc,
"path": path,
"basename": "output.txt",
"size": 1111,
"class": "File",
Expand All @@ -635,10 +640,12 @@ def _expected_revsort_output(outDir):

@staticmethod
def _expected_revsort_nochecksum_output(outDir):
loc = "file://" + os.path.join(outDir, "output.txt")
path = os.path.join(outDir, "output.txt")
loc = "file://" + path
return {
"output": {
"location": loc,
"path": path,
"basename": "output.txt",
"size": 1111,
"class": "File",
Expand All @@ -647,24 +654,29 @@ def _expected_revsort_nochecksum_output(outDir):

@staticmethod
def _expected_download_output(outDir):
loc = "file://" + os.path.join(outDir, "output.txt")
path = os.path.join(outDir, "output.txt")
loc = "file://" + path
return {
"output": {
"location": loc,
"basename": "output.txt",
"size": 0,
"class": "File",
"checksum": "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709",
"path": path
}
}

@staticmethod
def _expected_glob_dir_output(out_dir):
dir_loc = "file://" + os.path.join(out_dir, "shouldmake")
dir_path = os.path.join(out_dir, "shouldmake")
dir_loc = "file://" + dir_path
file_path = os.path.join(dir_path, "test.txt")
file_loc = os.path.join(dir_loc, "test.txt")
return {
"shouldmake": {
"location": dir_loc,
"path": dir_path,
"basename": "shouldmake",
"nameroot": "shouldmake",
"nameext": "",
Expand All @@ -673,6 +685,7 @@ def _expected_glob_dir_output(out_dir):
{
"class": "File",
"location": file_loc,
"path": file_path,
"basename": "test.txt",
"checksum": "sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709",
"size": 0,
Expand All @@ -695,10 +708,12 @@ def _expected_load_contents_output(cls, out_dir):

@staticmethod
def _expected_colon_output(outDir):
path = os.path.join(outDir, "A:Gln2Cys_result")
loc = "file://" + os.path.join(outDir, "A%3AGln2Cys_result")
return {
"result": {
"location": loc,
"path": path,
"basename": "A:Gln2Cys_result",
"class": "Directory",
"listing": [
Expand All @@ -710,16 +725,19 @@ def _expected_colon_output(outDir):
"size": 1111,
"nameroot": "whale",
"nameext": ".txt",
"path": f"{path}/whale.txt"
}
],
}
}

def _expected_streaming_output(self, outDir):
loc = "file://" + os.path.join(outDir, "output.txt")
path = os.path.join(outDir, "output.txt")
loc = "file://" + path
return {
"output": {
"location": loc,
"path": path,
"basename": "output.txt",
"size": 24,
"class": "File",
Expand Down Expand Up @@ -926,7 +944,7 @@ def setUpClass(cls):
cls.test_yaml = os.path.join(cls.cwlSpec, "conformance_tests.yaml")
# TODO: Use a commit zip in case someone decides to rewrite master's history?
url = "https://github.com/common-workflow-language/cwl-v1.2.git"
commit = "8c3fd9d9f0209a51c5efacb1c7bc02a1164688d6"
commit = "0d538a0dbc5518f3c6083ce4571926f65cb84f76"
p = subprocess.Popen(
f"git clone {url} {cls.cwlSpec} && cd {cls.cwlSpec} && git checkout {commit}",
shell=True,
Expand Down