Allow packing Operation records (#1467)

jmfernandez · mr-c · web-flow · commit ed9dd4c3472e · 2021-06-28T16:32:08.000Z
* cwltool now packs `Operation` definitions CWL 1.2 introduced [Operation](https://www.commonwl.org/v1.2/Workflow.html#Operation) in order to model abstract workflow steps, needed to use CWL to describe a workflow which could not exist or be implemented in a different way than CWL. As `cwltool --pack` was ignoring records of class `Operation`, packed workflows were not correct (they did not validate with `cwltool --validate`). The issue was explained at #1466. This commit fixes the issue which arises while packing workflows having `Operation` records. Co-authored-by: Michael R. Crusoe <michael.crusoe@gmail.com>
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -7,6 +7,7 @@ include tests/*
 include tests/tmp1/tmp2/tmp3/.gitkeep
 include tests/tmp4/alpha/*
 include tests/wf/*
+include tests/wf/operation/*
 include tests/override/*
 include tests/checker_wf/*
 include tests/subgraph/*
diff --git a/cwltool/pack.py b/cwltool/pack.py
@@ -251,7 +251,12 @@ def rewrite_id(r: str, mainuri: str) -> None:
         if "$schemas" in metadata:
             for s in metadata["$schemas"]:
                 schemas.add(s)
-        if dcr.get("class") not in ("Workflow", "CommandLineTool", "ExpressionTool"):
+        if dcr.get("class") not in (
+            "Workflow",
+            "CommandLineTool",
+            "ExpressionTool",
+            "Operation",
+        ):
             continue
         dc = cast(Dict[str, Any], copy.deepcopy(dcr))
         v = rewrite[r]
diff --git a/tests/test_pack.py b/tests/test_pack.py
@@ -21,57 +21,41 @@
 from .util import get_data, needs_docker
 
 
-def test_pack() -> None:
-    loadingContext, workflowobj, uri = fetch_document(get_data("tests/wf/revsort.cwl"))
-    yaml = YAML(typ="safe", pure=True)
-
-    with open(get_data("tests/wf/expect_packed.cwl")) as packed_file:
-        expect_packed = yaml.load(packed_file)
-
-    packed = cwltool.pack.pack(loadingContext, uri)
-    adjustFileObjs(
-        packed, partial(make_relative, os.path.abspath(get_data("tests/wf")))
-    )
-    adjustDirObjs(packed, partial(make_relative, os.path.abspath(get_data("tests/wf"))))
-
-    assert "$schemas" in packed
-    packed_schemas = packed["$schemas"]
-    assert isinstance(packed_schemas, Sized)
-    assert len(packed_schemas) == len(expect_packed["$schemas"])
-    del packed["$schemas"]
-    del expect_packed["$schemas"]
-
-    assert packed == expect_packed
-
-
-def test_pack_input_named_name() -> None:
-    loadingContext, workflowobj, uri = fetch_document(
-        get_data("tests/wf/trick_revsort.cwl")
-    )
+@pytest.mark.parametrize(
+    "unpacked,expected",
+    [
+        ("tests/wf/revsort.cwl", "tests/wf/expect_packed.cwl"),
+        (
+            "tests/wf/operation/operation-single.cwl",
+            "tests/wf/operation/expect_operation-single_packed.cwl",
+        ),
+        ("tests/wf/trick_revsort.cwl", "tests/wf/expect_trick_packed.cwl"),
+    ],
+)
+def test_packing(unpacked: str, expected: str) -> None:
+    """Compare expected version reality with various workflows and --pack."""
+    loadingContext, workflowobj, uri = fetch_document(get_data(unpacked))
     loadingContext.do_update = False
     loadingContext, uri = resolve_and_validate_document(
         loadingContext, workflowobj, uri
     )
-    loader = loadingContext.loader
-    assert loader
-    loader.resolve_ref(uri)[0]
-
-    yaml = YAML()
-    with open(get_data("tests/wf/expect_trick_packed.cwl")) as packed_file:
-        expect_packed = yaml.load(packed_file)
 
     packed = cwltool.pack.pack(loadingContext, uri)
-    adjustFileObjs(
-        packed, partial(make_relative, os.path.abspath(get_data("tests/wf")))
-    )
-    adjustDirObjs(packed, partial(make_relative, os.path.abspath(get_data("tests/wf"))))
+    context_dir = os.path.abspath(os.path.dirname(get_data(unpacked)))
+    adjustFileObjs(packed, partial(make_relative, context_dir))
+    adjustDirObjs(packed, partial(make_relative, context_dir))
 
-    assert "$schemas" in packed
-    packed_schemas = packed["$schemas"]
-    assert isinstance(packed_schemas, Sized)
-    assert len(packed_schemas) == len(expect_packed["$schemas"])
-    del packed["$schemas"]
-    del expect_packed["$schemas"]
+    yaml = YAML(typ="safe", pure=True)
+    with open(get_data(expected)) as packed_file:
+        expect_packed = yaml.load(packed_file)
+
+    if "$schemas" in expect_packed:
+        assert "$schemas" in packed
+        packed_schemas = packed["$schemas"]
+        assert isinstance(packed_schemas, Sized)
+        assert len(packed_schemas) == len(expect_packed["$schemas"])
+        del packed["$schemas"]
+        del expect_packed["$schemas"]
 
     assert packed == expect_packed
 
diff --git a/tests/wf/operation/abstract-cosifer.cwl b/tests/wf/operation/abstract-cosifer.cwl
@@ -0,0 +1,25 @@
+class: Operation
+cwlVersion: v1.2
+
+requirements:
+  DockerRequirement:
+    dockerPull: 'tsenit/cosifer:b4d5af45d2fc54b6bff2a9153a8e9054e560302e'
+
+inputs:
+  data_matrix:
+    type: File
+  separator:
+    type: string?
+    doc: The separator used in the data_matrix file
+  index_col:
+    type: int?
+  gmt_filepath:
+    type: File?
+  outdir:
+    type: string?
+  samples_on_rows:
+    type: boolean?
+
+outputs:
+  resdir:
+    type: Directory
diff --git a/tests/wf/operation/expect_operation-single_packed.cwl b/tests/wf/operation/expect_operation-single_packed.cwl
@@ -0,0 +1,154 @@
+{
+    "$graph": [
+        {
+            "class": "Operation",
+            "requirements": [
+                {
+                    "dockerPull": "tsenit/cosifer:b4d5af45d2fc54b6bff2a9153a8e9054e560302e",
+                    "class": "DockerRequirement"
+                }
+            ],
+            "inputs": [
+                {
+                    "type": "File",
+                    "id": "#abstract-cosifer.cwl/data_matrix"
+                },
+                {
+                    "type": [
+                        "null",
+                        "File"
+                    ],
+                    "id": "#abstract-cosifer.cwl/gmt_filepath"
+                },
+                {
+                    "type": [
+                        "null",
+                        "int"
+                    ],
+                    "id": "#abstract-cosifer.cwl/index_col"
+                },
+                {
+                    "type": [
+                        "null",
+                        "string"
+                    ],
+                    "id": "#abstract-cosifer.cwl/outdir"
+                },
+                {
+                    "type": [
+                        "null",
+                        "boolean"
+                    ],
+                    "id": "#abstract-cosifer.cwl/samples_on_rows"
+                },
+                {
+                    "type": [
+                        "null",
+                        "string"
+                    ],
+                    "doc": "The separator used in the data_matrix file",
+                    "id": "#abstract-cosifer.cwl/separator"
+                }
+            ],
+            "outputs": [
+                {
+                    "type": "Directory",
+                    "id": "#abstract-cosifer.cwl/resdir"
+                }
+            ],
+            "id": "#abstract-cosifer.cwl"
+        },
+        {
+            "class": "Workflow",
+            "id": "#main",
+            "label": "abstract-cosifer-workflow",
+            "inputs": [
+                {
+                    "type": "File",
+                    "doc": "Gene expression data matrix",
+                    "id": "#data_matrix"
+                },
+                {
+                    "type": [
+                        "null",
+                        "File"
+                    ],
+                    "doc": "Optional GMT file to perform inference on multiple gene sets",
+                    "id": "#gmt_filepath"
+                },
+                {
+                    "type": [
+                        "null",
+                        "int"
+                    ],
+                    "doc": "Column index in the data. Defaults to None, a.k.a., no index",
+                    "id": "#index_col"
+                },
+                {
+                    "type": "string",
+                    "doc": "Path to the output directory",
+                    "id": "#outdir"
+                },
+                {
+                    "type": [
+                        "null",
+                        "boolean"
+                    ],
+                    "doc": "Flag that indicates that data contain the samples on rows. Defaults to False.",
+                    "id": "#samples_on_rows"
+                },
+                {
+                    "type": [
+                        "null",
+                        "string"
+                    ],
+                    "doc": "Separator for the data. Defaults to .",
+                    "id": "#separator"
+                }
+            ],
+            "outputs": [
+                {
+                    "type": "Directory",
+                    "outputSource": "#/abstract_cosifer/resdir",
+                    "id": "#resdir"
+                }
+            ],
+            "steps": [
+                {
+                    "run": "#abstract-cosifer.cwl",
+                    "in": [
+                        {
+                            "source": "#data_matrix",
+                            "id": "#abstract_cosifer/data_matrix"
+                        },
+                        {
+                            "source": "#gmt_filepath",
+                            "id": "#abstract_cosifer/gmt_filepath"
+                        },
+                        {
+                            "source": "#index_col",
+                            "id": "#abstract_cosifer/index_col"
+                        },
+                        {
+                            "source": "#outdir",
+                            "id": "#abstract_cosifer/outdir"
+                        },
+                        {
+                            "source": "#samples_on_rows",
+                            "id": "#abstract_cosifer/samples_on_rows"
+                        },
+                        {
+                            "source": "#separator",
+                            "id": "#abstract_cosifer/separator"
+                        }
+                    ],
+                    "out": [
+                        "#/abstract_cosifer/resdir"
+                    ],
+                    "id": "#abstract_cosifer"
+                }
+            ]
+        }
+    ],
+    "cwlVersion": "v1.2"
+}
diff --git a/tests/wf/operation/operation-single.cwl b/tests/wf/operation/operation-single.cwl
@@ -0,0 +1,27 @@
+class: Workflow
+cwlVersion: v1.2
+id: abstract_cosifer_workflow
+label: abstract-cosifer-workflow
+
+inputs:
+  data_matrix: {type: File, doc: "Gene expression data matrix"}
+  gmt_filepath: {type: "File?", doc: "Optional GMT file to perform inference on multiple gene sets"}
+  index_col: {type: "int?", doc: "Column index in the data. Defaults to None, a.k.a., no index"}
+  outdir: {type: string, doc: "Path to the output directory"}
+  separator: {type: "string?", doc: "Separator for the data. Defaults to ."}
+  samples_on_rows: {type: "boolean?", doc: "Flag that indicates that data contain the samples on rows. Defaults to False."}
+
+outputs:
+  resdir: {type: Directory, outputSource: abstract_cosifer/resdir}
+
+steps:
+  abstract_cosifer:
+    run: abstract-cosifer.cwl
+    in:
+      data_matrix: data_matrix
+      separator: separator
+      index_col: index_col
+      gmt_filepath: gmt_filepath
+      outdir: outdir
+      samples_on_rows: samples_on_rows
+    out: [resdir]