PMCC-BioinformaticsCore · GraceAHall · Sep 10, 2023 · Sep 10, 2023 · Sep 10, 2023 · Sep 12, 2023
diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
@@ -21,5 +21,5 @@ jobs:
         pip install -r requirements/tests.txt
     - name: Test with pytest
       run: |
-        PYTHONPATH=. pytest janis_core/tests/test_*.py
+        PYTHONPATH=. pytest janis_core/tests/test_*.py -m "not release"
     - uses: codecov/codecov-action@v2
diff --git a/.gitignore b/.gitignore
@@ -83,6 +83,8 @@ docs/
 test.py
 myscript.sh
 translated/
+translated/*
+translated/*/*
 involucro
 
 TODO.txt
@@ -96,4 +98,4 @@ temp.txt
 translated/
 myscript.sh
 notes.txt
-presenter_notes.txt
+presenter_notes.txt
diff --git a/janis_core/__meta__.py b/janis_core/__meta__.py
@@ -1,4 +1,4 @@
-__version__ = "v0.13.0"
+__version__ = "v0.13.1"
 
 GITHUB_URL = "https://github.com/PMCC-BioinformaticsCore/janis"
 DOCS_URL = "https://janis.readthedocs.io/en/latest/"
diff --git a/janis_core/cli.py b/janis_core/cli.py
@@ -0,0 +1,82 @@
+
+
+import argparse
+import sys 
+
+from janis_core.ingestion import SupportedIngestion 
+from janis_core.translation_deps.supportedtranslations import SupportedTranslation
+from janis_core.ingestion import ingest
+from janis_core.translations import translate
+
+
+def main() -> None:
+    sysargs = sys.argv[1:]
+    args_namespace = parse_args(sysargs)
+    args_dict = interpret_args(args_namespace)
+    do_translate(args_dict)
+
+def do_translate(args: dict[str, str]) -> None:
+    internal = ingest(args['infile'], args['from']) 
+    return translate(internal, dest_fmt=args['to'], mode=args['mode'], export_path=args['outdir'], as_workflow=args['as_workflow'])
+
+def interpret_args(args: argparse.Namespace) -> dict[str, str]:
+    out: dict[str, str] = {}
+    for key, val in args._get_kwargs():  # workaround for '--from' name: usually a python error.
+        if key == 'from':
+            out['from'] = val
+        elif key == 'to':
+            out['to'] = val
+        elif key == 'mode':
+            out['mode'] = val
+        elif key == 'infile':
+            out['infile'] = val
+        elif key == 'output_dir':
+            out['outdir'] = val
+        elif key == 'as_workflow':
+            out['as_workflow'] = val
+    return out
+
+def parse_args(sysargs: list[str]) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description='Translate a janis workflow to CWL, WDL or Nextflow')
+
+    parser.add_argument(
+        "infile", 
+        help="Path to input file",
+    )
+    parser.add_argument(
+        "--from",
+        help="Language of infile. Will be autodetected if not supplied",
+        choices=SupportedIngestion.all(),
+        type=str
+    )
+    parser.add_argument(
+        "--to",
+        help="Language to translate to.",
+        choices=SupportedTranslation.all(),
+        type=str
+    )
+    parser.add_argument(
+        "-o",
+        "--output-dir",
+        help="Output directory to write output to (default: translated).",
+        type=str,
+        default="translated"
+    )
+    parser.add_argument(
+        "--as-workflow",
+        action="store_true",
+        help="For tool translation: wraps output tool in workflow.",
+    )
+    parser.add_argument(
+        "--mode",
+        help="Translate mode (default: regular). Controls extent of tool translation\n\
+        - skeleton: ignores inputs which aren't used in workflow. no CLI command generation.\n\
+        - regular: ignores inputs which aren't used in workflow. \n\
+        - extended: full translation of all inputs & CLI command",
+        type=str,
+        choices=["skeleton", "regular", "extended"],
+        default="extended"
+    )
+
+    return parser.parse_args(sysargs)
+
diff --git a/janis_core/graph/steptaginput.py b/janis_core/graph/steptaginput.py
@@ -2,12 +2,14 @@
 
 from janis_core.types import get_instantiated_type
 from janis_core.operators import Selector
+from janis_core.operators import Operator
 from janis_core.graph.node import Node, NodeType
 from janis_core.tool.tool import TInput
 from janis_core.utils import first_value
 from janis_core.utils.logger import Logger
 from janis_core import settings
-from janis_core.messages import log_warning
+from janis_core.messages import log_message
+from janis_core.messages import ErrorCategory
 from uuid import uuid4
 
 
@@ -45,7 +47,7 @@ def validate_tags(self):
             if self.ftag not in self.finish.inputs():
                 if settings.graph.ALLOW_UNKNOWN_SOURCE:
                     msg = "Could not connect this input to its data source"
-                    log_warning(self.uuid, msg)
+                    log_message(self.uuid, msg, ErrorCategory.PLUMBING)
                 else:
                     raise Exception(
                         f"Could not find the tag '{self.ftag}' in the outputs of '{self.finish.id()}': {list(self.finish.inputs().keys())}"
@@ -69,7 +71,7 @@ def check_types(self):
             if not stype.is_array():
                 if settings.graph.ALLOW_NON_ARRAY_SCATTER_INPUT:
                     msg = f"This task is supposed to run in parallel across this input ({ftoolin.id()}), but the data source is not an array."
-                    log_warning(self.uuid, msg)
+                    log_message(self.uuid, msg, ErrorCategory.PLUMBING)
                 else:
                     raise Exception(
                         f"Scatter was required for '{operator} → '{self.finish.id()}.{self.ftag}' but "
@@ -89,7 +91,7 @@ def check_types(self):
         if not self.compatible_types:
             if settings.graph.ALLOW_INCOMPATIBLE_TYPES:
                 msg = f"The data source for this input is a {stype.id()}, but the input is a {ftoolin.intype.id()}"
-                log_warning(self.uuid, msg)
+                log_message(self.uuid, msg, ErrorCategory.DATATYPES)
             else:
                 s = str(self.source)
                 f = full_dot(self.finish, self.ftag)
@@ -114,6 +116,7 @@ def __init__(self, finish: Node, finish_tag: str):
         self.ftag: Optional[str] = finish_tag
         self.multiple_inputs = False
         self.source_map: list[Edge] = []
+        self.operator: Optional[Operator] = None
 
     def add_source(self, operator: Selector, should_scatter: Optional[bool]=None) -> Edge:
         """
@@ -126,6 +129,8 @@ def add_source(self, operator: Selector, should_scatter: Optional[bool]=None) ->
         stype = get_instantiated_type(operator.returntype())
 
         if self.ftag:
+            if self.ftag not in self.finish.inputs():
+                raise RuntimeError(f'no step exists with tag: {self.ftag}')
             tinput = self.finish.inputs()[self.ftag]
         else:
             tinput = first_value(self.finish.inputs())        
@@ -145,7 +150,7 @@ def add_source(self, operator: Selector, should_scatter: Optional[bool]=None) ->
             if not stype.is_array():
                 if settings.graph.ALLOW_NON_ARRAY_SCATTER_INPUT:
                     msg = f"This task is supposed to run in parallel across this input ({tinput.id()}), but the data source is not an array."
-                    log_warning(self.uuid, msg)
+                    log_message(self.uuid, msg, ErrorCategory.PLUMBING)
                 else:
                     raise Exception(
                         f"Scatter was required for '{operator} → '{self.finish.id()}.{self.ftag}' but "
@@ -159,7 +164,7 @@ def add_source(self, operator: Selector, should_scatter: Optional[bool]=None) ->
             if not ftype.is_array():
                 if settings.graph.ALLOW_INCORRECT_NUMBER_OF_SOURCES:
                     msg = "This input has multiple data sources, but should only have one (it is not an array)."
-                    log_warning(self.uuid, msg)
+                    log_message(self.uuid, msg, ErrorCategory.PLUMBING)
                 else:
                     raise Exception(
                         f"Adding multiple inputs to '{self.finish.id()}' "

diff --git a/janis_core/ingestion/common/__init__.py b/janis_core/ingestion/common/__init__.py
@@ -0,0 +1,14 @@
+
+
+from .fileio import safe_init_file
+from .fileio import safe_init_folder
+
+
+from .graph import add_step_edges_to_graph
+from .graph import get_janis_wf_sources
+
+from .identifiers import get_id_entity
+from .identifiers import get_id_filename
+from .identifiers import get_id_path
+from .identifiers import get_cwl_reference
+from .identifiers import remove_output_name_from_output_source
diff --git a/...ingestion/galaxy/fileio/initialisation.py → janis_core/ingestion/common/fileio.py b/...ingestion/galaxy/fileio/initialisation.py → janis_core/ingestion/common/fileio.py
@@ -1,6 +1,8 @@
 
-import os
 
+import os 
+import shutil
+PERMISSIONS=0o777
 
 def safe_init_file(path: str, override: bool=False, contents: str='') -> None:
     dirname = os.path.dirname(path)
@@ -9,6 +11,7 @@ def safe_init_file(path: str, override: bool=False, contents: str='') -> None:
         fp.write(contents)
 
 def safe_init_folder(path: str, override: bool=False) -> None:
-    if not os.path.isdir(path):
-        os.makedirs(path)
-
+    if override:
+        if os.path.isdir(path):
+            shutil.rmtree(path)
+    os.makedirs(path, PERMISSIONS, exist_ok=True)
diff --git a/janis_core/ingestion/cwl/graph.py → janis_core/ingestion/common/graph.py b/janis_core/ingestion/cwl/graph.py → janis_core/ingestion/common/graph.py
@@ -1,6 +1,7 @@
 
 
 import copy
+from typing import Any
 
 from janis_core.utils.errors import UnsupportedError
 
@@ -14,58 +15,20 @@
     InputQualityType,
 )
 
+from janis_core import settings 
 from .identifiers import get_id_entity
 from .identifiers import get_id_path
 
 
-def get_janis_wf_sources(wf: Workflow, sources: str | list[str]) -> list[InputNode | StepOutputSelector]:
-    """
-    each source is a workflow input, step output, or complex expression.
-    input nodes will all be parsed into janis wf at this stage.
-    we can check if the source is an input on the janis wf, then if not, must be a step output.
-    """
-    out: list[InputNode | StepOutputSelector] = []
-
-    if isinstance(sources, str):
-        sources = [sources]
-
-    for src in sources:
-        # get the wfinp / step output identifier
-        identifier = get_id_entity(src)
-
-        # is complex expression?
-        if identifier.startswith("$("):
-            raise UnsupportedError(
-                f"This script can't parse expressions in the step input {step_input}"
-            )
-
-        # is step output?
-        # if referencing step output, that step will have already been parsed into the janis wf
-        if get_id_path(src) and get_id_path(src) in wf.step_nodes:
-            stp_id = get_id_path(src)
-            out_id = get_id_entity(src)
-            stp = wf.step_nodes[stp_id]
-            selector = stp.get_item(out_id)
-            out.append(selector)
-
-        # is wf input?
-        elif identifier in wf.input_nodes:
-            resolved_src = wf[identifier]
-            out.append(resolved_src) 
-
-        else:
-            raise NotImplementedError
-
-    return out
-
-
-def add_step_edges_to_graph(jstep: StepNode, wf: Workflow) -> None:
-    connections = jstep.tool.connections
+def add_step_edges_to_graph(jstep: StepNode, inputs_dict: dict[str, Any], wf: Workflow) -> None:
+    jstep.tool.connections = inputs_dict
     tinputs = jstep.tool.inputs_map()
+    if jstep.sources:
+        raise RuntimeError("Step already has sources??")
     jstep.sources = {}
 
     added_edges = []
-    for (k, v) in connections.items():
+    for (k, v) in inputs_dict.items():
 
         # static values provided when creating step.
         # janis wants to create a workflow input for these.
@@ -102,4 +65,57 @@ def add_step_edges_to_graph(jstep: StepNode, wf: Workflow) -> None:
     for e in added_edges:
         si = e.finish.sources[e.ftag] if e.ftag else first_value(e.finish.sources)
         wf.has_multiple_inputs = wf.has_multiple_inputs or si.multiple_inputs
-
+
+def get_janis_wf_sources(wf: Workflow, sources: str | list[str]) -> list[InputNode | StepOutputSelector]:
+    """
+    each source is a workflow input, step output, or complex expression.
+    input nodes will all be parsed into janis wf at this stage.
+    we can check if the source is an input on the janis wf, then if not, must be a step output.
+    """
+    if settings.ingest.SOURCE == 'cwl':
+        return _get_janis_wf_sources_cwl(wf, sources)
+    elif settings.ingest.SOURCE == 'wdl':
+        return _get_janis_wf_sources_wdl(wf, sources)
+    else:
+        raise NotImplementedError
+
+
+
+def _get_janis_wf_sources_wdl(wf: Workflow, sources: str | list[str]) -> list[InputNode | StepOutputSelector]:
+    raise NotImplementedError
+
+def _get_janis_wf_sources_cwl(wf: Workflow, sources: str | list[str]) -> list[InputNode | StepOutputSelector]:
+    out: list[InputNode | StepOutputSelector] = []
+
+    if isinstance(sources, str):
+        sources = [sources]
+
+    for src in sources:
+        # get the wfinp / step output identifier
+        identifier = get_id_entity(src)
+
+        # is complex expression?
+        if identifier.startswith("$("):
+            raise UnsupportedError(
+                f"This script can't parse expressions in the step input {step_input}"
+            )
+
+        # is step output?
+        # if referencing step output, that step will have already been parsed into the janis wf
+        if get_id_path(src) and get_id_path(src) in wf.step_nodes:
+            stp_id = get_id_path(src)
+            out_id = get_id_entity(src)
+            stp = wf.step_nodes[stp_id]
+            selector = stp.get_item(out_id)
+            out.append(selector)
+
+        # is wf input?
+        elif identifier in wf.input_nodes:
+            resolved_src = wf[identifier]
+            out.append(resolved_src) 
+
+        else:
+            raise NotImplementedError
+
+    return out
+