From f58c00847dae3f585a3da2f1ce4bd796b2eea058 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 31 May 2023 13:41:42 +0200 Subject: [PATCH 01/21] Cleanup code --- bin/getmeta_studyvisit | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/bin/getmeta_studyvisit b/bin/getmeta_studyvisit index 80b8e96..8517c6e 100755 --- a/bin/getmeta_studyvisit +++ b/bin/getmeta_studyvisit @@ -17,11 +17,6 @@ from datalad.utils import md5sum lgr = logging.getLogger('inm-icf-utilities') -# this points to the top of the ICF data store. -# internally it will be amended with the missing components -# for study and visit deposit locations -icfstore_baseurl = 'https://data.inm-icf.de' - # which DICOM tags to extract from DICOM files and store as # git-annex metadata (e.g., to enable metadata-driven views # of visit datasets) @@ -58,7 +53,7 @@ def main(store_dir: str, if not tar_path.exists(): raise ValueError(f'no tarball at {tar_path}') - runshit( + describe_tarball( # source visit tarball tar_path.resolve(), # source visit tarball URL, relative to store @@ -70,7 +65,7 @@ def main(store_dir: str, ) -def runshit(tarpath, tarurl, metapath_dataset, metapath_file): +def describe_tarball(tarpath, tarurl, metapath_dataset, metapath_file): # construct and dump dataset metadata tar_meta = { 'size': tarpath.stat().st_size, From 4f42ddae7807a847b72853bff759c3fd24fa0725 Mon Sep 17 00:00:00 2001 From: Adina Wagner Date: Wed, 31 May 2023 13:52:37 +0200 Subject: [PATCH 02/21] Add less package in singularity image MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise, users can't display help texts: ``` ❱ singularity exec icf.sif datalad --help 2 ! /bin/sh: 1: less: not found ``` --- singularity/icf.def | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/singularity/icf.def b/singularity/icf.def index b109a6e..16db2aa 100644 --- a/singularity/icf.def +++ b/singularity/icf.def @@ -13,7 +13,7 @@ From: debian:bookworm-slim # install all non-datalad deps from Debian proper apt-get update -qq apt-get -y install eatmydata - eatmydata apt-get -y install --no-install-recommends git python3-pip python3-dicom python3-tqdm + eatmydata apt-get -y install --no-install-recommends git python3-pip python3-dicom python3-tqdm less # fresh git-annex via the datalad-installer python3 -m pip install --break-system-packages datalad-installer datalad-installer -E /tmp/dlinstaller_env.sh --sudo ok git-annex -m snapshot From 5845c49a58835fbbe7bb3e49d5287e6b8b1d76e8 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 31 May 2023 14:00:01 +0200 Subject: [PATCH 03/21] `getmeta_studyvisit` -> `deposit_visit_metadata` Document more comprehensively and remove the ICF label from the help. --- ...meta_studyvisit => deposit_visit_metadata} | 26 +++++++++++++++++-- tests/test_datalad_workflows/test_pipeline.py | 2 +- 2 files changed, 25 insertions(+), 3 deletions(-) rename bin/{getmeta_studyvisit => deposit_visit_metadata} (78%) diff --git a/bin/getmeta_studyvisit b/bin/deposit_visit_metadata similarity index 78% rename from bin/getmeta_studyvisit rename to bin/deposit_visit_metadata index 8517c6e..6371b26 100755 --- a/bin/getmeta_studyvisit +++ b/bin/deposit_visit_metadata @@ -1,6 +1,25 @@ #!/usr/bin/env python3 """ +This command locates the DICOM tarball for a particular visit in a study (given +by their respective identifiers) in the data store, and extracts a minimal set +of metadata tags for each DICOM image, and the TAR archive as a whole. These +metadata are then deposited in two files, in JSON format, in the study +directory: +- `{visit_id}_metadata_tarball.json` + + JSON object with basic properties of the archive, such as 'size', and + 'md5'. + +- `{visit_id}_metadata_dicoms.json` + + JSON array with essential properties for each DICOM image file, such as + 'path' (relative path inside the TAR archive), 'md5' (MD5 checksum of + the DICOM file), 'size' (in bytes), and select standard DICOM tags, + such as "SeriesDescription", "SeriesNumber", "Modality", + "MRAcquisitionType", "ProtocolName", "PulseSequenceName". The latter + enable a rough, technical characterization of the images in the TAR + archive. """ import logging import os @@ -110,10 +129,13 @@ def describe_tarball(tarpath, tarurl, metapath_dataset, metapath_file): if __name__ == '__main__': import argparse - p = argparse.ArgumentParser(description=__doc__) + p = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) p.add_argument( "-o", "--store-dir", metavar='PATH', default=os.getcwd(), - help="Root directory of the ICF data store. " + help="Root directory of the data store. " "Visit data will be read from it, and extracted metadata will be " "deposited into it." ) diff --git a/tests/test_datalad_workflows/test_pipeline.py b/tests/test_datalad_workflows/test_pipeline.py index ec750d3..21475af 100644 --- a/tests/test_datalad_workflows/test_pipeline.py +++ b/tests/test_datalad_workflows/test_pipeline.py @@ -50,7 +50,7 @@ def process_visits(studies_dir: Path, for visit in visits: # run metadata generation script run_script( - 'getmeta_studyvisit', + 'deposit_visit_metadata', studies_dir, study, visit ) From 02483b2333495ecc4bf11abec6b507d6fb9fc250 Mon Sep 17 00:00:00 2001 From: Adina Wagner Date: Wed, 31 May 2023 14:53:44 +0200 Subject: [PATCH 04/21] Add minimal developer docs --- docs/source/developer.rst | 57 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/docs/source/developer.rst b/docs/source/developer.rst index b04eaf4..1517517 100644 --- a/docs/source/developer.rst +++ b/docs/source/developer.rst @@ -1,2 +1,59 @@ Developer docs ============== + +Contributions in the form of bug reports or code are warmly welcomed. +You can find the source code as well as an issue tracker on `GitHub`_. + +Development environment +^^^^^^^^^^^^^^^^^^^^^^^ + +Clone the source repository:: + + git clone git@github.com:psychoinformatics-de/inm-icf-utilities.git + cd inm-icf-utilities + +Install development software requirements in a virtual environment:: + + # create and enter a new virtual environment (optional) + $ virtualenv --python=python3 ~/env/icf + $ . ~/env/icf/bin/activate + pip install -r requirements-devel.txt + pip install -r docs/requirements.txt + +Running tests +^^^^^^^^^^^^^ + +This package uses pytest for integration testing, and a CI test suite runs in +the source repository on GitHub automatically. +As the tooling are meant to run on specific systems only, executing tests locally +requires additional setup steps that likely make it infeasible unless your system +are the ICF servers. + +The requirements are local DICOM data (not distributed alongside this software) +that matches the layout, naming scheme, and permission set that the ICF uses. +In addition, it requires the icf-utils Singularity image (see :ref:`singularity`), +which needs to be installed as ``icf-utils`` in ``/usr/bin``. +Code that achieves this setup can be found in `.appveyor.yml `_. + +If this is in place, set the environment variable ``INM_ICF_TEST_STUDIES`` +to point to the directory with DICOM data, and execute integrations tests with:: + + pytest -s -v . + +.. _singularity: + +Bundled utilities +^^^^^^^^^^^^^^^^^ + +The INM-ICF utilities are distributed as a bundle in the form of a +Singularity software container. This container is updated regularly +and can be downloaded from `ci.appveyor.com/api/projects/mih/inm-icf-utilities/artifacts/icf.sif `_. +When testing changes to the INM-ICF-utilities, the Singularity image needs to be +rebuild with the changes included. +Its recipe can be found under ``singularity/icf.def``. +The image can be rebuild using the Appveyor-based CI testsuite, but an update is +not triggered automatically with a code change. +It instead requires that the `build cache is wiped `_. + + +.. _GitHub: https://github.com/psychoinformatics-de/inm-icf-utilities \ No newline at end of file From e494cb03e68b5aae166f994ea89e48a885796594 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 31 May 2023 14:52:38 +0200 Subject: [PATCH 05/21] `dataladify_studyvisit_from_meta` -> `deposit_visit_dataset` document more comprehensively and remove the ICF label from the help. However, the base URL of the store continues to default to https://data.inm-icf.de --- ...yvisit_from_meta => deposit_visit_dataset} | 91 ++++++++++++------- tests/test_datalad_workflows/test_pipeline.py | 2 +- 2 files changed, 60 insertions(+), 33 deletions(-) rename bin/{dataladify_studyvisit_from_meta => deposit_visit_dataset} (73%) diff --git a/bin/dataladify_studyvisit_from_meta b/bin/deposit_visit_dataset similarity index 73% rename from bin/dataladify_studyvisit_from_meta rename to bin/deposit_visit_dataset index 6249169..a70ba38 100755 --- a/bin/dataladify_studyvisit_from_meta +++ b/bin/deposit_visit_dataset @@ -1,20 +1,27 @@ #!/usr/bin/env python3 """ - +This command reads the metadata deposit from `deposit_visit_metadata` for a +visit in a study (given by their respective identifiers) from the data store, +and generates a DataLad dataset from it. This DataLad dataset provides +versioned access to the visit's DICOM data, up to single-image granularity. +Moreover, all DICOM files are annotated with basic DICOM tags that enable +on-demand dataset views for particular applications (e.g., DICOMs sorted +by image series and protocol name). The DataLad dataset is deposited in +two files in the study directory: + +- `{visit_id}_XDLRA--refs` +- `{visit_id}_XDLRA--repo-export` + +where the former enables `datalad/git clone` operations, and the latter +represents the actual dataset as a compressed archive. """ import json import os from pathlib import Path -import sys import tempfile import datalad.api as dl -# this points to the top of the ICF data store. -# internally it will be amended with the missing components -# for study and visit deposit locations -icfstore_baseurl = 'https://data.inm-icf.de' - # which DICOM tags to extract from DICOM files and store as # git-annex metadata (e.g., to enable metadata-driven views # of visit datasets) @@ -28,9 +35,12 @@ dicom_metadata_keys = [ ] -def main(store_dir: str, - study_id: str, - visit_id: str): +def main( + store_dir: str, + store_url: str, + study_id: str, + visit_id: str, +): store_base_dir = Path(store_dir) # where to deposit the final datalad dataset repo_base_path = store_base_dir / study_id / f'{visit_id}_' @@ -48,20 +58,27 @@ def main(store_dir: str, f'{visit_id}_metadata_dicoms.json' with tempfile.TemporaryDirectory(prefix='dataladify_visit_') as wdir: - runshit( + deposit_dataset( # workdir wdir, # path to deposited dataset metadata dataset_metadata_path.absolute(), # path to deposited file metadata file_metadata_path.absolute(), + # base URL of the store to complete access URLs + store_url, # path to deposit the repo at repo_base_path.absolute(), ) -def runshit(wdir, metapath_dataset, metapath_file, repobasepath): - +def deposit_dataset( + wdir: Path, + metapath_dataset: Path, + metapath_files: Path, + store_url: str, + repobasepath: Path, +): # read tar metadata dict tar_metadata = read_json_file(metapath_dataset) expected_keys = ('size', 'md5', 'dspath', 'storepath') @@ -88,7 +105,7 @@ def runshit(wdir, metapath_dataset, metapath_file, repobasepath): uncurl_uuid = repo.call_annex_records(['info', 'uncurl'])[0]['uuid'] assert uncurl_uuid # register the URL of the tarball - tar_metadata['url'] = f"{icfstore_baseurl}/{tar_metadata['storepath']}" + tar_metadata['url'] = f"{store_url}/{tar_metadata['storepath']}" res = ds.addurls( [tar_metadata], '{url}', @@ -98,9 +115,11 @@ def runshit(wdir, metapath_dataset, metapath_file, repobasepath): # fish out annex key of tarball. # we could also construct that, but let's not duplicate the setup above tarpath = Path(tar_metadata.get('dspath')) - tarkey = [r.get('annexkey') for r in res - if r.get('action') == 'fromkey' - and r.get('path', '').endswith(tarpath.name)] + tarkey = [ + r.get('annexkey') for r in res + if r.get('action') == 'fromkey' + and r.get('path', '').endswith(tarpath.name) + ] assert len(tarkey) == 1 tarkey = tarkey[0] assert tarkey @@ -123,7 +142,7 @@ def runshit(wdir, metapath_dataset, metapath_file, repobasepath): assert archivist_uuid # load dicom metadata - dicoms = read_json_file(metapath_file) + dicoms = read_json_file(metapath_files) # add to dataset dicom_recs = ds.addurls( dicoms, @@ -146,7 +165,10 @@ def runshit(wdir, metapath_dataset, metapath_file, repobasepath): repo.call_annex(['setpresentkey', dicomkey, archivist_uuid, '1']) repo.call_git([ - 'remote', 'add', 'icfstore', + 'remote', 'add', + # the remote name is arbitrary, it will not end up in the resulting + # deposit + 'store', # this is a little twisted: # the first line is an f-string, because we need to get the base URL # pointing to the study directory into the remote URL @@ -163,7 +185,7 @@ def runshit(wdir, metapath_dataset, metapath_file, repobasepath): # to be able to actually push everything repo.call_annex(['whereis', '--key', dicomkeys[0]]) ds.push( - to='icfstore', + to='store', # under no circumstances do we want to push annexed content. # and there also should be none data='nothing', @@ -174,31 +196,36 @@ def read_json_file(file_path): """ Load content from catalog metadata file for current node """ - try: - with open(file_path) as f: - return json.load(f) - except OSError as err: - raise("OS error: {0}".format(err)) - except: - raise("Unexpected error:", sys.exc_info()[0]) + with open(file_path) as f: + return json.load(f) if __name__ == '__main__': import argparse - p = argparse.ArgumentParser(description=__doc__) + p = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + p.add_argument( + '--id', nargs=2, metavar=('STUDY-ID', 'VISIT-ID'), required=True, + help="study and visit identifiers, used to " + "locate the visit archive in the storage organization. " + ) p.add_argument( "-o", "--store-dir", metavar='PATH', default=os.getcwd(), - help="Root directory of the ICF data store. " + help="root directory of the data store. " "Visit data will be read from it, and the DataLad dataset will be " "deposited into it." ) p.add_argument( - '--id', nargs=2, metavar=('STUDY-ID', 'VISIT-ID'), required=True, - help="The study and visit identifiers, used to " - "locate the visit archive in the storage organization. " + '--store-url', metavar='URL', default='https://data.inm-icf.de', + help="base URL of the DICOM data store. This URL is used to " + "register TAR archive download URLs in the generated DataLad " + "dataset." ) args = p.parse_args() main(store_dir=args.store_dir, + store_url=args.store_url, study_id=args.id[0], visit_id=args.id[1], ) diff --git a/tests/test_datalad_workflows/test_pipeline.py b/tests/test_datalad_workflows/test_pipeline.py index 21475af..3fa230d 100644 --- a/tests/test_datalad_workflows/test_pipeline.py +++ b/tests/test_datalad_workflows/test_pipeline.py @@ -56,7 +56,7 @@ def process_visits(studies_dir: Path, ) # run dataladification script run_script( - 'dataladify_studyvisit_from_meta', + 'deposit_visit_dataset', studies_dir, study, visit ) From 1bcdfd2e469776d425201094e6dc672d6f29e750 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 31 May 2023 15:05:36 +0200 Subject: [PATCH 06/21] Use the actual store URL for generating the dataset ... not a disconnected default --- tests/test_datalad_workflows/test_pipeline.py | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/tests/test_datalad_workflows/test_pipeline.py b/tests/test_datalad_workflows/test_pipeline.py index 3fa230d..9397ead 100644 --- a/tests/test_datalad_workflows/test_pipeline.py +++ b/tests/test_datalad_workflows/test_pipeline.py @@ -28,6 +28,7 @@ def run_script(name: str, working_directory: str | Path, study_id: str, visit_id: str, + *args ): script_path = Path(*(Path(__file__).parts[:-3] + ('bin',))) / name @@ -37,14 +38,16 @@ def run_script(name: str, str(script_path), '--id', study_id, - visit_id + visit_id, + *args ] ) def process_visits(studies_dir: Path, studies: list[str], - visits: list[str] + visits: list[str], + baseurl: str, ): for study in studies: for visit in visits: @@ -58,7 +61,8 @@ def process_visits(studies_dir: Path, run_script( 'deposit_visit_dataset', studies_dir, - study, visit + study, visit, + '--store-url', baseurl, ) # run catalogification script run_script( @@ -140,6 +144,7 @@ def test_pipeline(tmp_path: Path, Path(test_studies_dir), test_study_names, existing_visits, + data_webserver, ) # 1. Test metadata generation @@ -166,12 +171,9 @@ def test_pipeline(tmp_path: Path, dataaccess_credential, credman, ) - # TODO reenable once the server setup is actually compatible - # TODO swap the order of gets, or actually drop the tar get - # completely. Pulling individual files will do all that internally - # Try to get the tar file and the DICOMs - #dataset.get(f'icf/{visit}_dicom.tar') - #dataset.get(f'{study}_{visit}') + # pull all individual DICOM files, this will internally + # access/download the archive at the store + dataset.get(f'{study}_{visit}') # 3. Test catalog generation # - assert that study catalogs have been created using webcatalog method From 2d9f284b7d24646bf7b6fe2dfdff61a1475fffa7 Mon Sep 17 00:00:00 2001 From: Adina Wagner Date: Wed, 31 May 2023 15:23:30 +0200 Subject: [PATCH 07/21] Fix note about building singularity image via CI --- docs/source/developer.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/developer.rst b/docs/source/developer.rst index 1517517..f85f6bc 100644 --- a/docs/source/developer.rst +++ b/docs/source/developer.rst @@ -51,9 +51,9 @@ and can be downloaded from `ci.appveyor.com/api/projects/mih/inm-icf-utilities/a When testing changes to the INM-ICF-utilities, the Singularity image needs to be rebuild with the changes included. Its recipe can be found under ``singularity/icf.def``. -The image can be rebuild using the Appveyor-based CI testsuite, but an update is -not triggered automatically with a code change. -It instead requires that the `build cache is wiped `_. +The image can be rebuild automatically using the Appveyor-based CI testsuite. +If only software dependencies change, an update is **not** triggered automatically +but requires that the `build cache is wiped `_. .. _GitHub: https://github.com/psychoinformatics-de/inm-icf-utilities \ No newline at end of file From fddb5c4a06b20e32897b127c6f75bf033529d662 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Wed, 31 May 2023 15:24:04 +0200 Subject: [PATCH 08/21] Post acknowledgements --- README.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 51df1c1..64508ba 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,14 @@ -# inm-icf-utilities +# Utilities for managing the INM-ICF DICOM data store at Research Center Jülich [![Documentation Status](https://readthedocs.org/projects/inm-icf-utilities/badge/?version=latest)](https://inm-icf-utilities.readthedocs.io/en/latest/?badge=latest) - [![Build status](https://ci.appveyor.com/api/projects/status/jaife669slqyru52/branch/main?svg=true)](https://ci.appveyor.com/project/mih/inm-icf-utilities/branch/main) + + +## Acknowledgements + +This software was developed with support from the German Federal Ministry of +Education and Research (BMBF 01GQ1905), the US National Science Foundation (NSF +1912266), the Helmholtz research center Jülich (RDM challenge 2022), and the +Deutsche Forschungsgemeinschaft (DFG, German Research Foundation) under grant +SFB 1451 ([431549029](https://gepris.dfg.de/gepris/projekt/431549029), INF +project). From 81e4024a6df79e8fbc7087122fea6aeb2c64e820 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Szczepanik?= Date: Wed, 31 May 2023 15:30:30 +0200 Subject: [PATCH 09/21] docs: remove indices, tables & search page These standard generated sections were not showing any contents, and it doesn't seem that we are planning to use them. --- docs/source/index.rst | 7 ------- 1 file changed, 7 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 86c308d..0edd5a0 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -18,10 +18,3 @@ individuals. user/index personnel developer - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` From 69fa8d3d99c9c53fdd8e5f912c496c668998cbc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Szczepanik?= Date: Wed, 31 May 2023 15:40:30 +0200 Subject: [PATCH 10/21] docs: add acknowledgements to index page The rubric directive creates a paragraph heading that is not used to create a table of contents node. Makes for a smaller heading, but less vertical space, compared to regular heading. Suggested-by: Michael Hanke --- docs/source/index.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/source/index.rst b/docs/source/index.rst index 0edd5a0..2f74f83 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -18,3 +18,13 @@ individuals. user/index personnel developer + + +.. rubric:: Acknowledgements + +This software was developed with support from the German Federal +Ministry of Education and Research (BMBF 01GQ1905), the US National +Science Foundation (NSF 1912266), the Helmholtz research center Jülich +(RDM challenge 2022), and the Deutsche Forschungsgemeinschaft (DFG, +German Research Foundation) under grant SFB 1451 (`431549029 +`_, INF project). From d597e4a366d8aa7246f9aa45ba1cb21bdd0ba3e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Szczepanik?= Date: Wed, 31 May 2023 13:45:48 +0200 Subject: [PATCH 11/21] docs: intro & subsection headers for personnel guide --- docs/source/personnel.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docs/source/personnel.rst b/docs/source/personnel.rst index f37a38c..745a48f 100644 --- a/docs/source/personnel.rst +++ b/docs/source/personnel.rst @@ -1,2 +1,19 @@ ICF Personnel guide =================== + +The INM-ICF Utilities Github repository provides a set of executable +Python scripts which automate generation of deposits in the ICF +archive. To simplify deployment, these scripts and all their +dependencies are packaged as a Singularity container. + +Archive generation +------------------ + +Containerized execution +^^^^^^^^^^^^^^^^^^^^^^^ + +Archive generation +^^^^^^^^^^^^^^^^^^ + +DataLad dataset generation +^^^^^^^^^^^^^^^^^^^^^^^^^^ From c1ff8683d171efd39a208e7c4e31b0f0715e451b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Szczepanik?= Date: Wed, 31 May 2023 14:32:13 +0200 Subject: [PATCH 12/21] docs: add an overview of icf scripts Scripts are described with one-sentence explanation, and a copy-paste of the usage message from -h. --- docs/source/personnel.rst | 52 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/docs/source/personnel.rst b/docs/source/personnel.rst index 745a48f..c5183ef 100644 --- a/docs/source/personnel.rst +++ b/docs/source/personnel.rst @@ -12,8 +12,60 @@ Archive generation Containerized execution ^^^^^^^^^^^^^^^^^^^^^^^ +With the Singilarity image, ``icf.sif``, all scripts are made directly +available, either through ``singularity run``: + +.. code-block:: console + + $ singularity run icf.sif make_studyvisit_archive ... + +or, after making the image file executable: + +.. code-block:: console + + $ ./icf.sif make_studyvisit_archive ... + Archive generation ^^^^^^^^^^^^^^^^^^ +A TAR archive containing files from a single study visit can be +generated and deposited in the study directory with +``make_studyvisit_archive``. + +.. code-block:: console + + $ ./icf.sif make_studyvisit_archive --help + usage: make_studyvisit_archive [-h] [-o PATH] --id STUDY-ID VISIT-ID + DataLad dataset generation ^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The DataLad dataset can be generated and placed alongside the tarballs +without affecting them. Placement in the study folder guarantees the +same access permissions (authenticated https). This provides users +with DataLad-based access and related additional features. The +datasets are generated based on file metadata -- the TAR archive +remains the only data source -- so storage overhead is minimal. + +All scripts have the same set of arguments. + +Required metadata can be prepared with ``getmeta_studyvisit``: + +.. code-block:: console + + $ ./icf.sif getmeta_studyvisit -h + usage: getmeta_studyvisit [-h] [-o PATH] --id STUDY-ID VISIT-ID + +A dataset can be created with ``dataladify_studyvisit_from_meta``: + +.. code-block:: console + + $ ./icf.sif dataladify_studyvisit_from_meta -h + usage: dataladify_studyvisit_from_meta [-h] [-o PATH] --id STUDY-ID VISIT-ID + +DataLad catalog can be created or updated with ``catalogify_studyvisit_from_meta``: + +.. code-block:: console + + $ ./icf.sif dataladify_studyvisit_from_meta --help + usage: dataladify_studyvisit_from_meta [-h] [-o PATH] --id STUDY-ID VISIT-ID From 2a4477e7235027c42f7fd45c02d9135680f36755 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Szczepanik?= Date: Wed, 31 May 2023 15:46:34 +0200 Subject: [PATCH 13/21] docs: retitle icf personnel guide to administratod docs --- docs/source/personnel.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/personnel.rst b/docs/source/personnel.rst index c5183ef..3814da4 100644 --- a/docs/source/personnel.rst +++ b/docs/source/personnel.rst @@ -1,5 +1,5 @@ -ICF Personnel guide -=================== +Administrator docs +================== The INM-ICF Utilities Github repository provides a set of executable Python scripts which automate generation of deposits in the ICF From 29954a159455854df4b621ac07ddd4b9203869cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Szczepanik?= Date: Wed, 31 May 2023 15:49:13 +0200 Subject: [PATCH 14/21] docs: rename icf personnel guide to administratod docs --- docs/source/{personnel.rst => admin.rst} | 0 docs/source/index.rst | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename docs/source/{personnel.rst => admin.rst} (100%) diff --git a/docs/source/personnel.rst b/docs/source/admin.rst similarity index 100% rename from docs/source/personnel.rst rename to docs/source/admin.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index 86c308d..5984745 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -16,7 +16,7 @@ individuals. :caption: Contents: user/index - personnel + admin developer Indices and tables From ba7edefae82901a7012bfb816f89fc2eb6e65736 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Szczepanik?= Date: Wed, 31 May 2023 16:02:11 +0200 Subject: [PATCH 15/21] docs: explain "installing" singularity image This explains how to create a script that defines a bind point and does the singularity run call, and add it under /usr/bin - based on how it's done in the CI setup. Suggested-by: Adina Wagner --- docs/source/admin.rst | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/docs/source/admin.rst b/docs/source/admin.rst index 3814da4..eeb6dc5 100644 --- a/docs/source/admin.rst +++ b/docs/source/admin.rst @@ -17,13 +17,29 @@ available, either through ``singularity run``: .. code-block:: console - $ singularity run icf.sif make_studyvisit_archive ... + $ singularity run icf.sif