Skip to content

Commit 5495aa5

Browse files
authored
Rework how identifiers are rewritten in packing to handle $imports. (#277)
* Rework how identifiers are rewritten in packing to handle $imports. * Don't prepend filename to ids in primary file. * Only rewrite file:// ids. * Packing also collects schemas.
1 parent 4a3bba3 commit 5495aa5

File tree

6 files changed

+82
-30
lines changed

6 files changed

+82
-30
lines changed

cwltool/load_tool.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def validate_document(document_loader, # type: Loader
123123
}
124124

125125
if not isinstance(workflowobj, dict):
126-
raise ValueError("workflowjobj must be a dict")
126+
raise ValueError("workflowjobj must be a dict, got '%s': %s" % (type(workflowobj), workflowobj))
127127

128128
jobobj = None
129129
if "cwl:tool" in workflowobj:

cwltool/pack.py

+75-29
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import copy
2-
import json
2+
import urlparse
33

44
from schema_salad.ref_resolver import Loader
55

@@ -19,68 +19,114 @@ def flatten_deps(d, files): # type: (Any, Set[Text]) -> None
1919
if "listing" in d:
2020
flatten_deps(d["listing"], files)
2121

22-
def find_run(d, runs): # type: (Any, Set[Text]) -> None
22+
def find_run(d, loadref, runs): # type: (Any, Callable[[Text, Text], Union[Dict, List, Text]], Set[Text]) -> None
2323
if isinstance(d, list):
2424
for s in d:
25-
find_run(s, runs)
25+
find_run(s, loadref, runs)
2626
elif isinstance(d, dict):
2727
if "run" in d and isinstance(d["run"], (str, unicode)):
28-
runs.add(d["run"])
28+
if d["run"] not in runs:
29+
runs.add(d["run"])
30+
find_run(loadref(None, d["run"]), loadref, runs)
2931
for s in d.values():
30-
find_run(s, runs)
32+
find_run(s, loadref, runs)
33+
34+
def find_ids(d, ids): # type: (Any, Set[Text]) -> None
35+
if isinstance(d, list):
36+
for s in d:
37+
find_ids(s, ids)
38+
elif isinstance(d, dict):
39+
for i in ("id", "name"):
40+
if i in d and isinstance(d[i], (str, unicode)):
41+
ids.add(d[i])
42+
for s in d.values():
43+
find_ids(s, ids)
3144

3245
def replace_refs(d, rewrite, stem, newstem):
3346
# type: (Any, Dict[Text, Text], Text, Text) -> None
3447
if isinstance(d, list):
3548
for s,v in enumerate(d):
36-
if isinstance(v, (str, unicode)) and v.startswith(stem):
37-
d[s] = newstem + v[len(stem):]
49+
if isinstance(v, (str, unicode)):
50+
if v in rewrite:
51+
d[s] = rewrite[v]
52+
elif v.startswith(stem):
53+
d[s] = newstem + v[len(stem):]
3854
else:
3955
replace_refs(v, rewrite, stem, newstem)
4056
elif isinstance(d, dict):
41-
if "run" in d and isinstance(d["run"], (str, unicode)):
42-
d["run"] = rewrite[d["run"]]
4357
for s,v in d.items():
44-
if isinstance(v, (str, unicode)) and v.startswith(stem):
45-
d[s] = newstem + v[len(stem):]
58+
if isinstance(v, (str, unicode)):
59+
if v in rewrite:
60+
d[s] = rewrite[v]
61+
elif v.startswith(stem):
62+
d[s] = newstem + v[len(stem):]
4663
replace_refs(v, rewrite, stem, newstem)
4764

4865
def pack(document_loader, processobj, uri, metadata):
4966
# type: (Loader, Union[Dict[Text, Any], List[Dict[Text, Any]]], Text, Dict[Text, Text]) -> Dict[Text, Any]
5067
def loadref(b, u):
5168
# type: (Text, Text) -> Union[Dict, List, Text]
5269
return document_loader.resolve_ref(u, base_url=b)[0]
53-
deps = scandeps(uri, processobj, set(("run",)), set(), loadref)
5470

55-
fdeps = set((uri,))
56-
flatten_deps(deps, fdeps)
71+
runs = set((uri,))
72+
find_run(processobj, loadref, runs)
5773

58-
runs = set() # type: Set[Text]
59-
for f in fdeps:
60-
find_run(document_loader.idx[f], runs)
74+
ids = set() # type: Set[Text]
75+
for f in runs:
76+
find_ids(document_loader.resolve_ref(f)[0], ids)
6177

6278
names = set() # type: Set[Text]
63-
rewrite = {}
64-
if isinstance(processobj, list):
65-
for p in processobj:
66-
rewrite[p["id"]] = "#" + uniquename(shortname(p["id"]), names)
67-
else:
68-
rewrite[uri] = "#main"
79+
rewrite = {} # type: Dict[Text, Text]
6980

70-
for r in sorted(runs):
71-
rewrite[r] = "#" + uniquename(shortname(r), names)
81+
mainpath, _ = urlparse.urldefrag(uri)
82+
83+
def rewrite_id(r, mainuri):
84+
# type: (Text, Text) -> None
85+
if r == mainuri:
86+
rewrite[r] = "#main"
87+
elif r.startswith(mainuri) and r[len(mainuri)] in ("#", "/"):
88+
pass
89+
else:
90+
path, frag = urlparse.urldefrag(r)
91+
if path == mainpath:
92+
rewrite[r] = "#" + uniquename(frag, names)
93+
else:
94+
if path not in rewrite:
95+
rewrite[path] = "#" + uniquename(shortname(path), names)
96+
97+
sortedids = sorted(ids)
98+
99+
for r in sortedids:
100+
if r.startswith("file://"):
101+
rewrite_id(r, uri)
72102

73103
packed = {"$graph": [], "cwlVersion": metadata["cwlVersion"]
74104
} # type: Dict[Text, Any]
75105

76-
for r in sorted(rewrite.keys()):
106+
schemas = set() # type: Set[Text]
107+
for r in sorted(runs):
108+
dcr, metadata = document_loader.resolve_ref(r)
109+
if not isinstance(dcr, dict):
110+
continue
111+
for doc in (dcr, metadata):
112+
if "$schemas" in doc:
113+
for s in doc["$schemas"]:
114+
schemas.add(s)
115+
if dcr.get("class") not in ("Workflow", "CommandLineTool", "ExpressionTool"):
116+
continue
117+
dc = cast(Dict[Text, Any], copy.deepcopy(dcr))
77118
v = rewrite[r]
78-
dc = cast(Dict[Text, Any], copy.deepcopy(document_loader.idx[r]))
79119
dc["id"] = v
80-
for n in ("name", "cwlVersion"):
120+
for n in ("name", "cwlVersion", "$namespaces", "$schemas"):
81121
if n in dc:
82122
del dc[n]
83-
replace_refs(dc, rewrite, r+"/" if "#" in r else r+"#", v+"/")
84123
packed["$graph"].append(dc)
85124

125+
if schemas:
126+
packed["$schemas"] = list(schemas)
127+
128+
for r in rewrite:
129+
v = rewrite[r]
130+
replace_refs(packed, rewrite, r+"/" if "#" in r else r+"#", v+"/")
131+
86132
return packed

tests/test_pack.py

+3
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,8 @@ def test_pack(self):
2222
expect_packed = json.load(f)
2323
adjustFileObjs(packed, partial(makeRelative, os.path.abspath("tests/wf")))
2424
adjustDirObjs(packed, partial(makeRelative, os.path.abspath("tests/wf")))
25+
self.assertIn("$schemas", packed)
26+
del packed["$schemas"]
27+
del expect_packed["$schemas"]
2528

2629
self.assertEqual(expect_packed, packed)

tests/wf/empty.ttl

Whitespace-only changes.

tests/wf/expect_packed.cwl

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
{
22
"cwlVersion": "v1.0",
3+
"$schemas": ["file:///home/peter/work/cwltool/tests/wf/empty.ttl"],
34
"$graph": [
45
{
56
"inputs": [

tests/wf/revtool.cwl

+2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
class: CommandLineTool
55
cwlVersion: v1.0
66
doc: "Reverse each line using the `rev` command"
7+
$schemas:
8+
- empty.ttl
79

810
# The "inputs" array defines the structure of the input object that describes
911
# the inputs to the underlying program. Here, there is one input field

0 commit comments

Comments
 (0)