Skip to content

Commit

Permalink
fix dataset reporting
Browse files Browse the repository at this point in the history
This commit fixes the reporting of datasets in
traversal.

There is still something to due in datasets, i.e.
report "state", "gitshasum", and "prev_gitshasum"
  • Loading branch information
christian-monch committed Jan 26, 2023
1 parent f812156 commit 113321d
Showing 1 changed file with 47 additions and 21 deletions.
68 changes: 47 additions & 21 deletions datalad_metalad/pipeline/provider/datasettraverse.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,29 +358,55 @@ def _traverse_single_dataset(self,
"""

if not self.is_installed(dataset):
lgr.debug(f"ignoring un-installed dataset at {dataset.path}")
lgr.warning(f"ignoring un-installed dataset at {dataset.path}")
return

if isinstance(dataset.repo, AnnexRepo):
status = get_annexstatus
else:
status = GitRepo.status
for path_str, element_info in status(dataset.repo).items():
if element_info["type"] in self.item_set:
element_path = Path(path_str)
if self.is_excluded(element_path):
lgr.debug(f"Ignoring excluded path {element_path}")
continue
traverse_result = self._generate_result(
dataset=dataset,
dataset_path=str(dataset.pathobj),
element_path=element_path,
element_info=element_info
)
yield PipelineData((
("path", element_path),
("dataset-traversal-record", [traverse_result])
))
if "dataset" in self.item_set:
if self._already_visited(dataset, Path("")):
return
element_path = resolve_path("", dataset)
traverse_result = self._generate_result(
dataset=dataset,
dataset_path=str(dataset.pathobj),
element_path=element_path,
element_info={
"type": "dataset",
"state": "",
"gitshasum": "",
"prev_gitshasum": ""
}
)
yield PipelineData((
("path", element_path),
("dataset-traversal-record", [traverse_result])
))

if "file" in self.item_set:
if isinstance(dataset.repo, AnnexRepo):
status = get_annexstatus
else:
status = GitRepo.status

for path_str, element_info in status(dataset.repo).items():
if element_info["type"] == "file":
element_path = Path(path_str)
if self.is_excluded(element_path):
lgr.debug(f"Ignoring excluded path {element_path}")
continue
traverse_result = self._generate_result(
dataset=dataset,
dataset_path=str(dataset.pathobj),
element_path=element_path,
element_info=element_info
)
yield PipelineData((
("path", element_path),
("dataset-traversal-record", [traverse_result])
))

if self.traverse_sub_datasets:
self._traverse_subdatasets(dataset)


def _traverse_subdatasets(self,
root_dataset: Dataset
Expand Down

0 comments on commit 113321d

Please sign in to comment.