Skip to content

Commit a6b6e1d

Browse files
authored
Merge pull request #290 from PNNL-CompBio/247-update-cli-to-enable-data-query-and-download
247 update cli to enable data query and download
2 parents 5482f44 + 5659516 commit a6b6e1d

File tree

3 files changed

+187
-17
lines changed

3 files changed

+187
-17
lines changed

coderdata/cli.py

Lines changed: 185 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,197 @@
1+
"""
2+
Command Line Interface to retrieve coderdata datasets.
3+
"""
4+
15
import argparse
2-
from .download.downloader import download
6+
from os import PathLike
7+
from pathlib import Path
8+
from typing import Union
9+
import sys
10+
11+
from .download.downloader import download as download_datasets
12+
from .utils import version
13+
from .utils import list_datasets
314

415
def main():
5-
parser = argparse.ArgumentParser(prog='coderdata')
6-
subparsers = parser.add_subparsers(dest='command')
16+
"""
17+
Main method containing the argument parsing and execution of
18+
individual subroutines.
19+
"""
20+
parser = argparse.ArgumentParser(prog='coderdata', add_help=True)
21+
parser.set_defaults(func=info)
722

8-
# Subcommand 'download'
9-
parser_download = subparsers.add_parser('download', help='Download datasets')
10-
parser_download.add_argument('--prefix', type=str, default=None,
11-
help='Prefix of the dataset to download (e.g., "hcmi"), "all", or leave empty for all files.')
23+
# subparser for the 'download' subroutine
24+
subparsers = parser.add_subparsers(
25+
title="commands",
26+
dest='command'
27+
)
28+
parser_download = subparsers.add_parser(
29+
'download',
30+
help='subroutine to download datasets. See "coderdata download -h" '
31+
'for more options.'
32+
)
33+
parser_download.add_argument(
34+
'-n', '--name',
35+
dest='DATASET_NAME',
36+
type=str,
37+
default='all',
38+
help='name of the dataset to download (e.g., "beataml"). '
39+
'Alternatively, "all" will download the full repository of '
40+
'coderdata datasets. See "coderdata --list" for a complete list '
41+
'of available datasets. Defaults to "all"'
42+
)
43+
parser_download.add_argument(
44+
'-p', '--local_path',
45+
dest="LOCAL_PATH",
46+
type=check_folder,
47+
default=Path.cwd(),
48+
help='defines the folder the datasets should be stored in. Defaults '
49+
'to the current working directory if omitted.'
50+
)
51+
parser_download.add_argument(
52+
'-o', '--overwrite',
53+
dest="OVERWRITE",
54+
default=False,
55+
action='store_true',
56+
help='allow dataset files to be overwritten if they already exist.'
57+
)
1258
parser_download.set_defaults(func=download)
59+
60+
# argument group that contains flags for additional information
61+
grp = parser.add_mutually_exclusive_group()
62+
grp.add_argument(
63+
'-l', '--list',
64+
dest="LIST",
65+
action='store_true',
66+
help="prints list of available datasets and exits program."
67+
)
68+
grp.add_argument(
69+
'-v', '--version',
70+
dest="VERSION",
71+
action='store_true',
72+
help='prints the versions of the coderdata API and dataset and exits '
73+
'the program'
74+
)
1375

76+
# checks if 'coderdata' was executed without additional arguments
77+
# and if so prints help message and exits
78+
if len(sys.argv) == 1:
79+
parser.print_help(sys.stderr)
80+
sys.exit(0)
81+
82+
# parse arguments and execute defined functions by `set_default()`
83+
# according to which subcommands / arguments where passed on the
84+
# command line
1485
args = parser.parse_args()
15-
if hasattr(args, 'func'):
16-
# Check if 'prefix' argument is present and pass it directly to the function
17-
if 'prefix' in args:
18-
args.func(args.prefix)
19-
else:
20-
args.func()
86+
args.func(args)
87+
88+
89+
def info(args):
90+
"""
91+
Helper function that takes the parsed command line arguments and
92+
prints either verison information or information on the available
93+
datasets depending on the arguments in ``args``.
94+
95+
Parameters
96+
----------
97+
args : Namespace
98+
A Namespace object that contains commandline arguments parsed by
99+
``ArgumentParser.parse_args()``.
100+
"""
101+
102+
# retrieve the dataset information stored in dataset.yml via
103+
# coderdata.utils.list_dataset() and print the information to
104+
# sys.stdout
105+
if args.LIST:
106+
print(
107+
'\n'
108+
'Available datasets\n'
109+
'------------------\n'
110+
)
111+
list_datasets()
112+
print(
113+
'\n'
114+
'------------------\n\n'
115+
'To download individual datasets run "coderdata download --name '
116+
'DATASET_NAME" where "DATASET_NAME" is for example "beataml".'
117+
)
118+
119+
# retrieve version number information stored in dataset.yml via
120+
# coderdata.utils.version() and print the information to sys.stdout
121+
elif args.VERSION:
122+
version_numbers = version()
123+
print(
124+
*(
125+
f"package version: {version_numbers['package']}",
126+
f"dataset version: {version_numbers['dataset']}"
127+
),
128+
sep='\n',
129+
file=sys.stdout,
130+
)
131+
132+
133+
def download(args):
134+
"""
135+
Wrapper function to download datasets via ``coderdata.download()``.
136+
Function passes commandline arguments to the internal download
137+
function.
138+
139+
Parameters
140+
----------
141+
args : Namespace
142+
A Namespace object that contains commandline arguments parsed by
143+
``ArgumentParser.parse_args()``.
144+
"""
145+
download_datasets(
146+
name=args.DATASET_NAME,
147+
local_path=args.LOCAL_PATH,
148+
exist_ok=args.OVERWRITE,
149+
)
150+
151+
152+
def check_folder(path: Union[str, PathLike, Path]) -> Path:
153+
"""
154+
Helper function to check if a defined folder exists.
155+
156+
Returns
157+
-------
158+
Path
159+
Cleaned path object with the absolute path to the folder passed
160+
to the function.
161+
162+
Raises
163+
------
164+
TypeError
165+
If passed path argument is not of the requested type.
166+
OSError
167+
If the passed path argument does not link to a valid existing
168+
folder.
169+
"""
170+
171+
if not isinstance(path, (str, PathLike, Path)):
172+
raise TypeError(
173+
f"'path' must be of type str, PathLike or Path. Supplied argument "
174+
f"is of type {type(path)}."
175+
)
176+
if not isinstance(path, Path):
177+
abs_path = Path(path).absolute()
21178
else:
22-
parser.print_help()
179+
abs_path = path.absolute()
180+
181+
if not abs_path.is_dir():
182+
raise OSError(
183+
f"The defined folder path '{path}' does not exist or is not a "
184+
f"folder."
185+
)
186+
187+
return abs_path
188+
23189

190+
# Routine to execute the main function.
24191
if __name__ == '__main__':
25-
main()
192+
try:
193+
main()
194+
except KeyboardInterrupt:
195+
pass
26196

27197

coderdata/dataset.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
figshare: "https://api.figshare.com/v2/articles/26409316"
2-
version: "v0.1.4"
2+
version: "0.1.4"
33
datasets:
44
beataml:
55
description: "Beat acute myeloid leukemia (BeatAML) focuses on acute myeloid leukemia tumor data. Data includes drug response, proteomics, and transcriptomics datasets."

coderdata/download/downloader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import yaml
1111

1212
def download(
13-
name: str=None,
13+
name: str='all',
1414
local_path: PathLike=Path.cwd(),
1515
exist_ok: bool=False
1616
):

0 commit comments

Comments
 (0)