datalad · christian-monch · Jul 29, 2022 · Jul 29, 2022 · Jul 29, 2022 · Jul 30, 2022
diff --git a/Makefile b/Makefile
@@ -43,6 +43,7 @@ code-analysis:
 release-pypi: clean
 	# better safe than sorry
 	test ! -e dist
-	python setup.py sdist
-	python setup.py bdist_wheel --universal
-	twine upload dist/*
+	python3 -m pip install --upgrade build
+	python3 -m pip install --upgrade twine
+	python3 -m build
+	python3 -m twine upload dist/*
diff --git a/datalad_metalad/__init__.py b/datalad_metalad/__init__.py
@@ -2,6 +2,8 @@
 import os
 import hashlib
 
+from ._version import get_versions
+
 
 __docformat__ = 'restructuredtext'
 
@@ -105,9 +107,5 @@ def get_agent_id(name, email):
     ).encode('utf-8')).hexdigest()
 
 
-from datalad import setup_package
-from datalad import teardown_package
-
-from ._version import get_versions
 __version__ = get_versions()['version']
 del get_versions
diff --git a/datalad_metalad/add.py b/datalad_metalad/add.py
@@ -672,7 +672,7 @@ def _get_top_nodes(realm: Path,
         # with an un-versioned path. In both cases the internal dataset-tree
         # path is "". If set, the un-versioned path is stored in the prefix
         # path element in the version list (which confusingly is also called
-        # "path".
+        # "path").
         assert ap.dataset_path in (top_level_dataset_tree_path, None)
 
         # We leave the creation of the respective nodes to auto_create
@@ -738,7 +738,7 @@ def get_tvl_uuid_mrr_metadata_file_tree(
     Read tree version list, uuid set, metadata root record, dataset-level
     metadata, and filetree from the metadata store, for the given root
     dataset id, root dataset version, dataset id, dataset version, dataset path,
-    and unversioned path.
+    and un-versioned path.
 
     This function caches results in order to avoid costly persist operations.
 

diff --git a/datalad_metalad/aggregate.py b/datalad_metalad/aggregate.py
@@ -43,7 +43,7 @@
                 sds-path = path of sds-pd-version in rds-version
                 add metadata_root_record to uuid-set(rds).sds-pd-version, sds-path
             else:
-                Error("Cannot find path of sds-uuid@sds-pd-version in any rds@version)
+                Error("Cannot find path of sds-uuid@sds-pd-version in any rds@version")
                 Error("What can you do? Not much besides re-aggregating")
                 Error("What can we do? Add a structure that allows for 'detached' metadata")
 
@@ -128,7 +128,7 @@ class Aggregate(Interface):
     i.e. the directory that contains the ".datalad"-entry, to the top-level
     directory of the respective sub-dataset.
 
-    Aggregate works on existing metadata, it will not extract meta data from
+    Aggregate works on existing metadata, it will not extract metadata from
     data file. To create metadata, use the meta-extract command.
 
     As a result of the aggregation, the metadata of all specified sub-datasets
@@ -305,7 +305,7 @@ def copy_uuid_set(destination_metadata_store: str,
 
         # If the destination does not contain a version list for the
         # source UUID, we add a copy of the source version list with
-        # a the specified path prefix
+        # the specified path prefix
         if uuid not in destination_uuid_set.uuids():
 
             lgr.debug(
@@ -349,7 +349,7 @@ def copy_uuid_set(destination_metadata_store: str,
                     element=element.deepcopy(
                         new_destination=destination_metadata_store))
 
-                # Unget the versioned element
+                # un-get the versioned element
                 lgr.debug(
                     f"persisting copied metadata element for pd version "
                     f"{pd_version} of UUID: {uuid}")
@@ -368,7 +368,7 @@ def copy_uuid_set(destination_metadata_store: str,
                     primary_data_version=pd_version,
                     prefix_path=old_path)
 
-            # Unget the version list in the destination, that should persist it
+            # un-get the version list in the destination, that should persist it
             lgr.debug(f"persisting copied version list for UUID: {uuid}")
             destination_uuid_set.unget_version_list(uuid)
 

diff --git a/datalad_metalad/conduct.py b/datalad_metalad/conduct.py
@@ -101,15 +101,15 @@ class Conduct(Interface):
     - A list of processors. A processor reads data,
       either from the previous processor or the provider and performs
       computations on the data and return a result that is processed by
-      the next processor. The computation may have side-effect,
+      the next processor. The computation may have side effects,
       e.g. store metadata.
 
     The provider is usually executed in the current processes' main
     thread. Processors are usually executed in concurrent processes,
     i.e. workers. The maximum number of workers is given by the
     parameter `max_workers`.
 
-    Which provider and which processors are used is defined in an
+    Which provider and which processors are used is defined in a
     "configuration", which is given as JSON-serialized dictionary.
     """
 
@@ -303,7 +303,7 @@ def process_parallel(executor,
                 status="ok",
                 path=str(path),
                 logger=lgr,
-                pipeline_data=pipeline_data.to_json())
+                pipeline_data=pipeline_data.to_dict())
             continue
 
         lgr.debug(f"Starting new instance of {processor_specs[0]} on {pipeline_data}")
@@ -343,7 +343,7 @@ def process_parallel(executor,
                             status="ok",
                             path=str(path),
                             logger=lgr,
-                            pipeline_data=pipeline_data.to_json())
+                            pipeline_data=pipeline_data.to_dict())
                 else:
                     lgr.debug(
                         f"Starting processor[{next_index}]"
@@ -396,7 +396,7 @@ def process_parallel(executor,
                             status="ok",
                             path=str(path),
                             logger=lgr,
-                            pipeline_data=pipeline_data.to_json())
+                            pipeline_data=pipeline_data.to_dict())
                 else:
                     lgr.debug(
                         f"Handing pipeline data {pipeline_data} to"
@@ -418,6 +418,10 @@ def process_parallel(executor,
                     status="error",
                     logger=lgr,
                     message=traceback.format_exc())
+
+    if consumer_instance:
+        consumer_instance.consume(PipelineData(state=PipelineDataState.STOP))
+
     return
 
 
@@ -435,6 +439,9 @@ def process_sequential(provider_instance: Provider,
             evaluated_constructor_args=evaluated_constructor_args,
             consumer_instance=consumer_instance)
 
+    if consumer_instance:
+        consumer_instance.consume(PipelineData(state=PipelineDataState.STOP))
+
 
 def process_downstream(pipeline_data: PipelineData,
                        processor_specs: list[dict],
@@ -493,7 +500,7 @@ def process_downstream(pipeline_data: PipelineData,
             status="ok",
             path=str(path),
             logger=lgr,
-            pipeline_data=pipeline_data.to_json())
+            pipeline_data=pipeline_data.to_dict())
 
         lgr.debug(
             f"Pipeline finished, returning datalad result {datalad_result}")

diff --git a/datalad_metalad/exceptions.py b/datalad_metalad/exceptions.py
@@ -1,9 +1,17 @@
 from typing import List, Optional
 
-from datalad.support.exceptions import InsufficientArgumentsError
+from datalad.support.exceptions import (
+    InsufficientArgumentsError,
+    NoDatasetArgumentFound,
+)
 from datalad.utils import ensure_unicode
 
 
+class NoDatasetIdFound(NoDatasetArgumentFound):
+    """Raised whenever a dataset ID cannot be found in a dataset."""
+    pass
+
+
 class MetadataKeyException(RuntimeError):
     def __init__(self,
                  message: str = "",