Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce {cont_dspath} to contain what {img_dspath} was and fix {img_dspath} #201

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
3 changes: 3 additions & 0 deletions changelog.d/pr-201.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
### 🚀 Enhancements and New Features

- WiP: introduce cont_dspath to contain what img_dspath was and fix img_dspath. [PR #201](https://github.com/datalad/datalad-container/pull/201) (by [@yarikoptic](https://github.com/yarikoptic))
4 changes: 3 additions & 1 deletion datalad_container/containers_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ class ContainersAdd(Interface):
this container, e.g. "singularity exec {img} {cmd}". Where '{img}'
is a placeholder for the path to the container image and '{cmd}' is
replaced with the desired command. Additional placeholders:
'{cont_dspath}' is relative path to the dataset containing container
specification in its config,
'{img_dspath}' is relative path to the dataset containing the image,
'{img_dirpath}' is the directory containing the '{img}'.
""",
Expand All @@ -150,7 +152,7 @@ class ContainersAdd(Interface):
args=("--extra-input",),
doc="""Additional file the container invocation depends on (e.g.
overlays used in --call-fmt). Can be specified multiple times.
Similar to --call-fmt, the placeholders {img_dspath} and
Similar to --call-fmt, the placeholders {cont_dspath}, {img_dspath} and
{img_dirpath} are available. Will be stored in the dataset config and
later added alongside the container image to the `extra_inputs`
field in the run-record and thus automatically be fetched when
Expand Down
37 changes: 21 additions & 16 deletions datalad_container/containers_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from datalad.interface.base import build_doc
from datalad.support.param import Parameter
from datalad.distribution.dataset import datasetmethod
from datalad.distribution.dataset import require_dataset
from datalad.distribution.dataset import require_dataset, get_dataset_root
from datalad.interface.base import eval_results
from datalad.utils import ensure_iter

Expand Down Expand Up @@ -85,14 +85,23 @@ def __call__(cmd, container_name=None, dataset=None,
yield res
assert container, "bug: container should always be defined here"

image_path = op.relpath(container["path"], pwd)
# container record would contain path to the (sub)dataset containing
# it. If not - take current dataset, as it must be coming from it
image_dspath = op.relpath(container.get('parentds', ds.path), pwd)
cont_dspath = op.relpath(container.get('parentds', ds.path), pwd)

image_path = container["path"]
# container definition might point to an image in some nested dataset.
# it might be useful to be distinguish between the two in such cases
image_dspath = op.relpath(get_dataset_root(image_path), pwd)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_dataset_root is likely not enough here! We must first ensure that there is a subdataset there, so pretty much perform the same trill as datalad get does to install all subdatasets leading to that path. Will need to check for that later and add a dedicated test for that to have hope to get this PR "proper".

image_path = op.relpath(image_path, pwd)
# sure we could check whether the container image is present,
# but it might live in a subdataset that isn't even installed yet
# let's leave all this business to `get` that is called by `run`
common_kwargs = dict(
cont_dspath=cont_dspath,
img_dspath=image_dspath,
img_dirpath=op.dirname(image_path) or ".",
)

cmd = normalize_command(cmd)
# expand the command with container execution
Expand All @@ -110,13 +119,13 @@ def __call__(cmd, container_name=None, dataset=None,
raise ValueError(
'cmdexe {!r} is in an old, unsupported format. '
'Convert it to a plain string.'.format(callspec))

cmd_kwargs = dict(
img=image_path,
cmd=cmd,
**common_kwargs,
)
try:
cmd_kwargs = dict(
img=image_path,
cmd=cmd,
img_dspath=image_dspath,
img_dirpath=op.dirname(image_path) or ".",
)
cmd = callspec.format(**cmd_kwargs)
except KeyError as exc:
yield get_status_dict(
Expand All @@ -134,13 +143,9 @@ def __call__(cmd, container_name=None, dataset=None,
cmd = container['path'] + ' ' + cmd

extra_inputs = []
for extra_input in ensure_iter(container.get("extra-input",[]), set):
for extra_input in ensure_iter(container.get("extra-input", []), set):
try:
xi_kwargs = dict(
img_dspath=image_dspath,
img_dirpath=op.dirname(image_path) or ".",
)
extra_inputs.append(extra_input.format(**xi_kwargs))
extra_inputs.append(extra_input.format(**common_kwargs))
except KeyError as exc:
yield get_status_dict(
'run',
Expand All @@ -150,7 +155,7 @@ def __call__(cmd, container_name=None, dataset=None,
'Unrecognized extra_input placeholder: %s. '
'See containers-add for information on known ones: %s',
exc,
", ".join(xi_kwargs)))
", ".join(common_kwargs)))
return

lgr.debug("extra_inputs = %r", extra_inputs)
Expand Down
34 changes: 23 additions & 11 deletions datalad_container/tests/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ def test_custom_call_fmt(path=None, local_file=None):
url=get_local_file_url(op.join(local_file, 'some_container.img')),
image='righthere',
call_fmt='echo image={img} cmd={cmd} img_dspath={img_dspath} '
'cont_dspath={cont_dspath} '
# and environment variable being set/propagated by default
'name=$DATALAD_CONTAINER_NAME'
)
Expand All @@ -175,21 +176,34 @@ def test_custom_call_fmt(path=None, local_file=None):
out = WitlessRunner(cwd=subds.path).run(
['datalad', 'containers-run', '-n', 'mycontainer', 'XXX'],
protocol=StdOutCapture)
assert_in('image=righthere cmd=XXX img_dspath=. name=mycontainer',
assert_in('image=righthere cmd=XXX img_dspath=. cont_dspath=. name=mycontainer',
out['stdout'])

out = WitlessRunner(cwd=ds.path).run(
['datalad', 'containers-run', '-n', 'sub/mycontainer', 'XXX'],
protocol=StdOutCapture)
assert_in('image=sub/righthere cmd=XXX img_dspath=sub', out['stdout'])
assert_in('image=sub/righthere cmd=XXX img_dspath=sub cont_dspath=sub', out['stdout'])

# Test within subdirectory of the super-dataset
subdir = op.join(ds.path, 'subdir')
os.mkdir(subdir)
out = WitlessRunner(cwd=subdir).run(
['datalad', 'containers-run', '-n', 'sub/mycontainer', 'XXX'],
protocol=StdOutCapture)
assert_in('image=../sub/righthere cmd=XXX img_dspath=../sub', out['stdout'])
assert_in('image=../sub/righthere cmd=XXX img_dspath=../sub cont_dspath=../sub',
out['stdout'])

# Add to super a container definition pointing to the image within sub
ds.containers_add(
'sub-mycontainer',
# we point into subdataset
image='sub/righthere',
call_fmt=subds.config.get('datalad.containers.mycontainer.cmdexec')
)
out = WitlessRunner(cwd=ds.path).run(
['datalad', 'containers-run', '-n', 'sub-mycontainer', 'XXX'],
protocol=StdOutCapture)
assert_in('image=sub/righthere cmd=XXX img_dspath=sub cont_dspath=.', out['stdout'])


@with_tree(
Expand Down Expand Up @@ -223,14 +237,12 @@ def test_extra_inputs(path=None):
)
commit_msg = ds.repo.call_git(["show", "--format=%B"])
cmd, runinfo = get_run_info(ds, commit_msg)
assert set(
[
"sub/containers/container.img",
"overlay1.img",
"sub/containers/../overlays/overlay2.img",
"sub/overlays/overlay3.img",
]
) == set(runinfo.get("extra_inputs", set()))
assert {
"sub/containers/container.img",
"overlay1.img",
"sub/containers/../overlays/overlay2.img",
"sub/overlays/overlay3.img",
} == set(runinfo.get("extra_inputs", set()))


@skip_if_no_network
Expand Down