|
| 1 | +""" |
| 2 | +Command Line Interface to retrieve coderdata datasets. |
| 3 | +""" |
| 4 | + |
1 | 5 | import argparse
|
2 |
| -from .download.downloader import download |
| 6 | +from os import PathLike |
| 7 | +from pathlib import Path |
| 8 | +from typing import Union |
| 9 | +import sys |
| 10 | + |
| 11 | +from .download.downloader import download as download_datasets |
| 12 | +from .utils import version |
| 13 | +from .utils import list_datasets |
3 | 14 |
|
4 | 15 | def main():
|
5 |
| - parser = argparse.ArgumentParser(prog='coderdata') |
6 |
| - subparsers = parser.add_subparsers(dest='command') |
| 16 | + """ |
| 17 | + Main method containing the argument parsing and execution of |
| 18 | + individual subroutines. |
| 19 | + """ |
| 20 | + parser = argparse.ArgumentParser(prog='coderdata', add_help=True) |
| 21 | + parser.set_defaults(func=info) |
7 | 22 |
|
8 |
| - # Subcommand 'download' |
9 |
| - parser_download = subparsers.add_parser('download', help='Download datasets') |
10 |
| - parser_download.add_argument('--prefix', type=str, default=None, |
11 |
| - help='Prefix of the dataset to download (e.g., "hcmi"), "all", or leave empty for all files.') |
| 23 | + # subparser for the 'download' subroutine |
| 24 | + subparsers = parser.add_subparsers( |
| 25 | + title="commands", |
| 26 | + dest='command' |
| 27 | + ) |
| 28 | + parser_download = subparsers.add_parser( |
| 29 | + 'download', |
| 30 | + help='subroutine to download datasets. See "coderdata download -h" ' |
| 31 | + 'for more options.' |
| 32 | + ) |
| 33 | + parser_download.add_argument( |
| 34 | + '-n', '--name', |
| 35 | + dest='DATASET_NAME', |
| 36 | + type=str, |
| 37 | + default='all', |
| 38 | + help='name of the dataset to download (e.g., "beataml"). ' |
| 39 | + 'Alternatively, "all" will download the full repository of ' |
| 40 | + 'coderdata datasets. See "coderdata --list" for a complete list ' |
| 41 | + 'of available datasets. Defaults to "all"' |
| 42 | + ) |
| 43 | + parser_download.add_argument( |
| 44 | + '-p', '--local_path', |
| 45 | + dest="LOCAL_PATH", |
| 46 | + type=check_folder, |
| 47 | + default=Path.cwd(), |
| 48 | + help='defines the folder the datasets should be stored in. Defaults ' |
| 49 | + 'to the current working directory if omitted.' |
| 50 | + ) |
| 51 | + parser_download.add_argument( |
| 52 | + '-o', '--overwrite', |
| 53 | + dest="OVERWRITE", |
| 54 | + default=False, |
| 55 | + action='store_true', |
| 56 | + help='allow dataset files to be overwritten if they already exist.' |
| 57 | + ) |
12 | 58 | parser_download.set_defaults(func=download)
|
| 59 | + |
| 60 | + # argument group that contains flags for additional information |
| 61 | + grp = parser.add_mutually_exclusive_group() |
| 62 | + grp.add_argument( |
| 63 | + '-l', '--list', |
| 64 | + dest="LIST", |
| 65 | + action='store_true', |
| 66 | + help="prints list of available datasets and exits program." |
| 67 | + ) |
| 68 | + grp.add_argument( |
| 69 | + '-v', '--version', |
| 70 | + dest="VERSION", |
| 71 | + action='store_true', |
| 72 | + help='prints the versions of the coderdata API and dataset and exits ' |
| 73 | + 'the program' |
| 74 | + ) |
13 | 75 |
|
| 76 | + # checks if 'coderdata' was executed without additional arguments |
| 77 | + # and if so prints help message and exits |
| 78 | + if len(sys.argv) == 1: |
| 79 | + parser.print_help(sys.stderr) |
| 80 | + sys.exit(0) |
| 81 | + |
| 82 | + # parse arguments and execute defined functions by `set_default()` |
| 83 | + # according to which subcommands / arguments where passed on the |
| 84 | + # command line |
14 | 85 | args = parser.parse_args()
|
15 |
| - if hasattr(args, 'func'): |
16 |
| - # Check if 'prefix' argument is present and pass it directly to the function |
17 |
| - if 'prefix' in args: |
18 |
| - args.func(args.prefix) |
19 |
| - else: |
20 |
| - args.func() |
| 86 | + args.func(args) |
| 87 | + |
| 88 | + |
| 89 | +def info(args): |
| 90 | + """ |
| 91 | + Helper function that takes the parsed command line arguments and |
| 92 | + prints either verison information or information on the available |
| 93 | + datasets depending on the arguments in ``args``. |
| 94 | +
|
| 95 | + Parameters |
| 96 | + ---------- |
| 97 | + args : Namespace |
| 98 | + A Namespace object that contains commandline arguments parsed by |
| 99 | + ``ArgumentParser.parse_args()``. |
| 100 | + """ |
| 101 | + |
| 102 | + # retrieve the dataset information stored in dataset.yml via |
| 103 | + # coderdata.utils.list_dataset() and print the information to |
| 104 | + # sys.stdout |
| 105 | + if args.LIST: |
| 106 | + print( |
| 107 | + '\n' |
| 108 | + 'Available datasets\n' |
| 109 | + '------------------\n' |
| 110 | + ) |
| 111 | + list_datasets() |
| 112 | + print( |
| 113 | + '\n' |
| 114 | + '------------------\n\n' |
| 115 | + 'To download individual datasets run "coderdata download --name ' |
| 116 | + 'DATASET_NAME" where "DATASET_NAME" is for example "beataml".' |
| 117 | + ) |
| 118 | + |
| 119 | + # retrieve version number information stored in dataset.yml via |
| 120 | + # coderdata.utils.version() and print the information to sys.stdout |
| 121 | + elif args.VERSION: |
| 122 | + version_numbers = version() |
| 123 | + print( |
| 124 | + *( |
| 125 | + f"package version: {version_numbers['package']}", |
| 126 | + f"dataset version: {version_numbers['dataset']}" |
| 127 | + ), |
| 128 | + sep='\n', |
| 129 | + file=sys.stdout, |
| 130 | + ) |
| 131 | + |
| 132 | + |
| 133 | +def download(args): |
| 134 | + """ |
| 135 | + Wrapper function to download datasets via ``coderdata.download()``. |
| 136 | + Function passes commandline arguments to the internal download |
| 137 | + function. |
| 138 | +
|
| 139 | + Parameters |
| 140 | + ---------- |
| 141 | + args : Namespace |
| 142 | + A Namespace object that contains commandline arguments parsed by |
| 143 | + ``ArgumentParser.parse_args()``. |
| 144 | + """ |
| 145 | + download_datasets( |
| 146 | + name=args.DATASET_NAME, |
| 147 | + local_path=args.LOCAL_PATH, |
| 148 | + exist_ok=args.OVERWRITE, |
| 149 | + ) |
| 150 | + |
| 151 | + |
| 152 | +def check_folder(path: Union[str, PathLike, Path]) -> Path: |
| 153 | + """ |
| 154 | + Helper function to check if a defined folder exists. |
| 155 | +
|
| 156 | + Returns |
| 157 | + ------- |
| 158 | + Path |
| 159 | + Cleaned path object with the absolute path to the folder passed |
| 160 | + to the function. |
| 161 | +
|
| 162 | + Raises |
| 163 | + ------ |
| 164 | + TypeError |
| 165 | + If passed path argument is not of the requested type. |
| 166 | + OSError |
| 167 | + If the passed path argument does not link to a valid existing |
| 168 | + folder. |
| 169 | + """ |
| 170 | + |
| 171 | + if not isinstance(path, (str, PathLike, Path)): |
| 172 | + raise TypeError( |
| 173 | + f"'path' must be of type str, PathLike or Path. Supplied argument " |
| 174 | + f"is of type {type(path)}." |
| 175 | + ) |
| 176 | + if not isinstance(path, Path): |
| 177 | + abs_path = Path(path).absolute() |
21 | 178 | else:
|
22 |
| - parser.print_help() |
| 179 | + abs_path = path.absolute() |
| 180 | + |
| 181 | + if not abs_path.is_dir(): |
| 182 | + raise OSError( |
| 183 | + f"The defined folder path '{path}' does not exist or is not a " |
| 184 | + f"folder." |
| 185 | + ) |
| 186 | + |
| 187 | + return abs_path |
| 188 | + |
23 | 189 |
|
| 190 | +# Routine to execute the main function. |
24 | 191 | if __name__ == '__main__':
|
25 |
| - main() |
| 192 | + try: |
| 193 | + main() |
| 194 | + except KeyboardInterrupt: |
| 195 | + pass |
26 | 196 |
|
27 | 197 |
|
0 commit comments