added pre-filter for wildcard patterns and tests, included more examples for help message

yucongalicechen · yucongalicechen · commit f0ffd0f7ee0b · 2024-05-15T12:54:51.000-04:00
diff --git a/src/diffpy/labpdfproc/labpdfprocapp.py b/src/diffpy/labpdfproc/labpdfprocapp.py
@@ -30,9 +30,13 @@ def get_args(override_cli_inputs=None):
         "everything in the folder ./data), 'data/file_list.txt' (load"
         " the list of files contained in the text-file called "
         "file_list.txt that can be found in the folder ./data). "
-        "Wildcard character (*) is accepted. Examples include './*chi'"
-        " (load all files with .chi extension) and 'data/test*' (load "
-        "all files starting with 'test' in the folder ./data). ",
+        "\nWildcard character (*) is accepted. Examples include './*.chi'"
+        " (load all files with .chi extension), 'data/*.chi' (load all "
+        "files in 'data' file with .chi extension), 'file*.chi' (load all "
+        "files starting with 'file' and ending with .chi extension), 'test*' "
+        "(load all files and directories starting with 'test'), 'test*/*.chi' "
+        "(load all directories starting with 'test' and all files under "
+        "with .chi extension). ",
     )
     p.add_argument(
         "-a",
diff --git a/src/diffpy/labpdfproc/tests/test_tools.py b/src/diffpy/labpdfproc/tests/test_tools.py
@@ -7,6 +7,7 @@
 from diffpy.labpdfproc.labpdfprocapp import get_args
 from diffpy.labpdfproc.tools import (
     expand_list_file,
+    expand_wildcard_file,
     known_sources,
     load_user_metadata,
     set_input_lists,
@@ -54,25 +55,36 @@
         ["input_dir/file_list_example2.txt"],
         ["input_dir/good_data.chi", "good_data.xy", "input_dir/good_data.txt"],
     ),
-    (  # wildcard pattern, same directory
+    (  # wildcard pattern, matching files with .chi extension in the same directory
         ["./*.chi"],
         ["good_data.chi"],
     ),
-    (  # wildcard pattern, input directory
+    (  # wildcard pattern, matching files with .chi extension in the input directory
         ["input_dir/*.chi"],
         ["input_dir/good_data.chi"],
     ),
-    (  # mixture of valid wildcard patterns
-        ["good_data*", "./*.pkl", "unreadable*.txt", "input_dir/*.chi"],
+    (  # wildcard pattern, matching files starting with good_data
+        ["good_data*"],
+        ["good_data.chi", "good_data.xy", "good_data.txt"],
+    ),
+    (  # wildcard pattern, matching files or directories starting with input
+        ["input*"],
         [
-            "good_data.chi",
-            "good_data.xy",
-            "good_data.txt",
-            "unreadable_file.txt",
-            "binary.pkl",
             "input_dir/good_data.chi",
+            "input_dir/good_data.xy",
+            "input_dir/good_data.txt",
+            "input_dir/unreadable_file.txt",
+            "input_dir/binary.pkl",
         ],
     ),
+    (  # wildcard pattern, matching files or directories starting with unreadable and ending with .txt extension
+        ["unreadable*.txt"],
+        ["unreadable_file.txt"],
+    ),
+    (  # wildcard pattern, matching directories starting with input and all files under with .chi extension
+        ["input*/*.chi"],
+        ["input_dir/good_data.chi"],
+    ),
 ]
 
 
@@ -84,6 +96,7 @@ def test_set_input_lists(inputs, expected, user_filesystem):
 
     cli_inputs = ["2.5"] + inputs
     actual_args = get_args(cli_inputs)
+    actual_args = expand_wildcard_file(actual_args)
     actual_args = expand_list_file(actual_args)
     actual_args = set_input_lists(actual_args)
     assert sorted(actual_args.input_paths) == sorted(expected_paths)
@@ -108,6 +121,16 @@ def test_set_input_lists(inputs, expected, user_filesystem):
         ["input_dir/file_list.txt"],
         "Cannot find missing_file.txt. Please specify valid input file(s) or directories.",
     ),
+    (  # valid wildcard pattern, but does not match any files or directories
+        ["non_existing_dir*"],
+        "Invalid wildcard input non_existing_dir*. "
+        "Please ensure the wildcard pattern matches at least one file or directory.",
+    ),
+    (  # invalid wildcard pattern
+        ["invalid_dir**"],
+        "Invalid wildcard input invalid_dir**. "
+        "Please ensure the wildcard pattern matches at least one file or directory.",
+    ),
 ]
 
 
@@ -117,8 +140,9 @@ def test_set_input_files_bad(inputs, msg, user_filesystem):
     os.chdir(base_dir)
     cli_inputs = ["2.5"] + inputs
     actual_args = get_args(cli_inputs)
-    actual_args = expand_list_file(actual_args)
     with pytest.raises(FileNotFoundError, match=msg[0]):
+        actual_args = expand_wildcard_file(actual_args)
+        actual_args = expand_list_file(actual_args)
         actual_args = set_input_lists(actual_args)
 
 
diff --git a/src/diffpy/labpdfproc/tools.py b/src/diffpy/labpdfproc/tools.py
@@ -1,3 +1,4 @@
+import glob
 from pathlib import Path
 
 WAVELENGTHS = {"Mo": 0.71, "Ag": 0.59, "Cu": 1.54}
@@ -28,6 +29,41 @@ def set_output_directory(args):
     return output_dir
 
 
+def expand_wildcard_file(args):
+    """
+    Expands wildcard inputs by adding all files or directories within directories matching the pattern.
+
+    Parameters
+    ----------
+    args argparse.Namespace
+        the arguments from the parser
+
+    Returns
+    -------
+    the arguments with the wildcard inputs expanded
+
+    """
+    wildcard_inputs = [input_name for input_name in args.input if "*" in input_name]
+    for wildcard_input in wildcard_inputs:
+        if not glob.glob(wildcard_input):
+            raise FileNotFoundError(
+                f"Invalid wildcard input {wildcard_input}. "
+                f"Please ensure the wildcard pattern matches at least one file or directory."
+            )
+        input_files = Path(".").glob(wildcard_input)
+        for input_file in input_files:
+            if input_file.is_file():
+                args.input.append(str(input_file))
+            elif input_file.is_dir():
+                files = input_file.glob("*")
+                inputs = [str(file) for file in files if file.is_file() and "file_list" not in file.name]
+                args.input.extend(inputs)
+            else:
+                raise FileNotFoundError(f"Invalid wildcard input {wildcard_input}.")
+        args.input.remove(wildcard_input)
+    return args
+
+
 def expand_list_file(args):
     """
     Expands the list of inputs by adding files from file lists and removing the file list.
@@ -86,16 +122,7 @@ def set_input_lists(args):
                     f"Cannot find {input_name}. Please specify valid input file(s) or directories."
                 )
         else:
-            if "*" in input_name:
-                input_parent_directory = input_path.parents[0]
-                input_pattern = input_path.relative_to(input_parent_directory)
-                input_files = Path(input_parent_directory).glob(str(input_pattern))
-                input_files = [
-                    file.resolve() for file in input_files if file.is_file() and "file_list" not in file.name
-                ]
-                input_paths.extend(input_files)
-            else:
-                raise FileNotFoundError(f"Cannot find {input_name}")
+            raise FileNotFoundError(f"Cannot find {input_name}.")
     setattr(args, "input_paths", list(set(input_paths)))
     return args