Skip to content

Commit 83851db

Browse files
authored
Merge pull request #893 from common-workflow-language/cwlprov-add_output
add output object to RO
2 parents 659c15c + c228908 commit 83851db

File tree

4 files changed

+41
-22
lines changed

4 files changed

+41
-22
lines changed

cwltool/main.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -712,6 +712,8 @@ def my_represent_none(self, data): # pylint: disable=unused-argument
712712
logger=_logger)
713713

714714
if out is not None:
715+
if runtimeContext.research_obj:
716+
runtimeContext.research_obj.create_job(out, None, True)
715717
def loc_to_path(obj):
716718
for field in ("path", "nameext", "nameroot", "dirname"):
717719
if field in obj:

cwltool/provenance.py

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import copy
55
import datetime
66
import hashlib
7-
import io
87
import logging
98
import os
109
import os.path
@@ -15,10 +14,10 @@
1514
import uuid
1615
from collections import OrderedDict
1716
from getpass import getuser
18-
from io import open
17+
from io import BytesIO, FileIO, TextIOWrapper, open
1918
from socket import getfqdn
20-
from typing import (IO, Any, Callable, Dict, List, MutableMapping, Optional,
21-
Set, Tuple, Union, cast)
19+
from typing import (IO, Any, Callable, Dict, List, Generator, MutableMapping,
20+
Optional, Set, Tuple, Union, cast)
2221

2322
import prov.model as provM
2423
import six
@@ -73,7 +72,7 @@ class PermissionError(OSError): # pylint: disable=redefined-builtin
7372
# 2. Bump minor number if adding resources or PROV statements
7473
# 3. Bump patch number for non-breaking non-adding changes,
7574
# e.g. fixing broken relative paths
76-
CWLPROV_VERSION = "https://w3id.org/cwl/prov/0.4.0"
75+
CWLPROV_VERSION = "https://w3id.org/cwl/prov/0.5.0"
7776

7877
# Research Object folders
7978
METADATA = "metadata"
@@ -149,7 +148,7 @@ def _whoami():
149148
return (username, fullname)
150149

151150

152-
class WritableBagFile(io.FileIO):
151+
class WritableBagFile(FileIO):
153152
"""Writes files in research object."""
154153

155154
def __init__(self, research_object, rel_path):
@@ -209,7 +208,7 @@ def readable(self):
209208

210209
def truncate(self, size=None):
211210
# type: (Optional[int]) -> int
212-
# FIXME: This breaks contract io.IOBase,
211+
# FIXME: This breaks contract IOBase,
213212
# as it means we would have to recalculate the hash
214213
if size is not None:
215214
raise IOError("WritableBagFile can't truncate")
@@ -458,7 +457,7 @@ def evaluate(self,
458457
self.prospective_prov(job)
459458
customised_job = copy_job_order(job, job_order_object)
460459
self.used_artefacts(customised_job, self.workflow_run_uri)
461-
research_obj.create_job(job, customised_job)
460+
research_obj.create_job(customised_job, job)
462461
# self.used_artefacts(inputs, self.workflow_run_uri)
463462
name = ""
464463
if hasattr(job, "name"):
@@ -680,7 +679,7 @@ def declare_directory(self, value): # type: (MutableMapping) -> ProvEntity
680679
def declare_string(self, value):
681680
# type: (Union[Text, str]) -> Tuple[ProvEntity,Text]
682681
"""Save as string in UTF-8."""
683-
byte_s = io.BytesIO(str(value).encode(ENCODING))
682+
byte_s = BytesIO(str(value).encode(ENCODING))
684683
data_file = self.research_object.add_data_file(byte_s, content_type=TEXT_PLAIN)
685684
checksum = posixpath.basename(data_file)
686685
# FIXME: Don't naively assume add_data_file uses hash in filename!
@@ -716,7 +715,7 @@ def declare_artefact(self, value):
716715

717716
if isinstance(value, bytes):
718717
# If we got here then we must be in Python 3
719-
byte_s = io.BytesIO(value)
718+
byte_s = BytesIO(value)
720719
data_file = self.research_object.add_data_file(byte_s)
721720
# FIXME: Don't naively assume add_data_file uses hash in filename!
722721
data_id = "data:%s" % posixpath.split(data_file)[1]
@@ -1051,13 +1050,13 @@ def write_bag_file(self, path, encoding=ENCODING):
10511050
# type: (Text, Optional[str]) -> IO
10521051
"""Write the bag file into our research object."""
10531052
# For some reason below throws BlockingIOError
1054-
#fp = io.BufferedWriter(WritableBagFile(self, path))
1053+
#fp = BufferedWriter(WritableBagFile(self, path))
10551054
bag_file = cast(IO, WritableBagFile(self, path))
10561055
if encoding:
10571056
# encoding: match Tag-File-Character-Encoding: UTF-8
10581057
# newline: ensure LF also on Windows
10591058
return cast(IO,
1060-
io.TextIOWrapper(bag_file, encoding=encoding, newline="\n"))
1059+
TextIOWrapper(bag_file, encoding=encoding, newline="\n"))
10611060
return bag_file
10621061

10631062
def add_tagfile(self, path, when=None):
@@ -1490,16 +1489,22 @@ def _add_to_bagit(self, rel_path, **checksums):
14901489
self.add_to_manifest(rel_path, checksums)
14911490

14921491
def create_job(self,
1493-
wf_job,
1494-
builder_job # type: Dict
1492+
builder_job, # type: Dict[Text, Any]
1493+
wf_job=None, # type: Callable[[Dict[Text, Text], Callable[[Any, Any], Any], RuntimeContext], Generator[Any, None, None]]
1494+
is_output=False
14951495
): # type: (...) -> Dict
14961496
#TODO customise the file
14971497
"""Generate the new job object with RO specific relative paths."""
14981498
copied = copy.deepcopy(builder_job)
1499-
relativised_input_objecttemp = {} # type: Dict[Any, Any]
1499+
relativised_input_objecttemp = {} # type: Dict[Text, Any]
15001500
self._relativise_files(copied)
1501-
rel_path = posixpath.join(_posix_path(WORKFLOW), "primary-job.json")
1502-
j = json_dumps(copied, indent=4, ensure_ascii=False)
1501+
def jdefault(o):
1502+
return dict(o)
1503+
if is_output:
1504+
rel_path = posixpath.join(_posix_path(WORKFLOW), "primary-output.json")
1505+
else:
1506+
rel_path = posixpath.join(_posix_path(WORKFLOW), "primary-job.json")
1507+
j = json_dumps(copied, indent=4, ensure_ascii=False, default=jdefault)
15031508
with self.write_bag_file(rel_path) as file_path:
15041509
file_path.write(j + u"\n")
15051510
_logger.debug(u"[provenance] Generated customised job file: %s",
@@ -1520,7 +1525,7 @@ def create_job(self,
15201525
return self.relativised_input_object
15211526

15221527
def _relativise_files(self, structure):
1523-
# type: (Any, Dict) -> None
1528+
# type: (Any, Dict[Any, Any]) -> None
15241529
"""Save any file objects into the RO and update the local paths."""
15251530
# Base case - we found a File we need to update
15261531
_logger.debug(u"[provenance] Relativising: %s", structure)

cwltool/workflow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,7 @@ def job(self,
568568
runtimeContext.research_obj.make_fs_access = runtimeContext.make_fs_access
569569
if runtimeContext.toplevel:
570570
# Record primary-job.json
571-
runtimeContext.research_obj.create_job(self.job, builder.job)
571+
runtimeContext.research_obj.create_job(builder.job, self.job)
572572

573573
job = WorkflowJob(self, runtimeContext)
574574
yield job

tests/test_provenance.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@
1313
import arcp
1414
import pytest
1515
from rdflib import Graph, Literal, Namespace, URIRef
16-
from rdflib.namespace import DC, DCTERMS, FOAF, RDF, RDFS, SKOS, XSD
17-
from six import StringIO
16+
from rdflib.namespace import DC, DCTERMS, RDF
1817
from six.moves import urllib
1918

2019
import bagit
@@ -82,6 +81,7 @@ def test_hello_single_tool(self):
8281
def test_revsort_workflow(self):
8382
self.cwltool(get_data('tests/wf/revsort.cwl'),
8483
get_data('tests/wf/revsort-job.json'))
84+
self.check_output_object()
8585
self.check_provenance()
8686

8787
def test_nested_workflow(self):
@@ -182,6 +182,17 @@ def test_directory_workflow(self):
182182
self.assertTrue(os.path.isfile(p),
183183
"Could not find %s as %s" % (l, p))
184184

185+
def check_output_object(self):
186+
output_obj = os.path.join(self.folder, "workflow", "primary-output.json")
187+
compare_checksum = "sha1$b9214658cc453331b62c2282b772a5c063dbd284"
188+
compare_location = "../data/b9/b9214658cc453331b62c2282b772a5c063dbd284"
189+
with open(output_obj) as fp:
190+
out_json = json.load(fp)
191+
f1 = out_json["output"]
192+
self.assertEqual(f1["checksum"], compare_checksum)
193+
self.assertEqual(f1["location"], compare_location)
194+
195+
185196
def check_secondary_files(self):
186197
foo_data = os.path.join(self.folder, "data",
187198
# checksum as returned from:
@@ -316,7 +327,7 @@ def check_ro(self, nested=False):
316327
f = "metadata/provenance/primary.cwlprov.%s" % ext
317328
self.assertTrue(f in paths, "provenance file missing " + f)
318329

319-
for f in ["workflow/primary-job.json", "workflow/packed.cwl"]:
330+
for f in ["workflow/primary-job.json", "workflow/packed.cwl", "workflow/primary-output.json"]:
320331
self.assertTrue(f in paths, "workflow file missing " + f)
321332
# Can't test snapshot/ files directly as their name varies
322333

@@ -325,6 +336,7 @@ def check_ro(self, nested=False):
325336

326337
packed = urllib.parse.urljoin(arcp_root, "/workflow/packed.cwl")
327338
primary_job = urllib.parse.urljoin(arcp_root, "/workflow/primary-job.json")
339+
output_object = urllib.parse.urljoin(arcp_root, "/workflow/primary-output.json")
328340
primary_prov_nt = urllib.parse.urljoin(arcp_root, "/metadata/provenance/primary.cwlprov.nt")
329341
uuid = arcp.parse_arcp(arcp_root).uuid
330342

0 commit comments

Comments
 (0)