From 6dc27b7ca298171d48d3b4d90411b4fbad620770 Mon Sep 17 00:00:00 2001 From: Son Pham-Ba Date: Mon, 6 Nov 2023 17:20:36 +0100 Subject: [PATCH] add tests for io/find ges_disc --- gpm_api/io/ges_disc.py | 19 +++++----- gpm_api/tests/test_io/test_find.py | 56 +++++++++++++++++++++++++++--- 2 files changed, 62 insertions(+), 13 deletions(-) diff --git a/gpm_api/io/ges_disc.py b/gpm_api/io/ges_disc.py index e17f8d9a..bfc159ed 100644 --- a/gpm_api/io/ges_disc.py +++ b/gpm_api/io/ges_disc.py @@ -5,7 +5,6 @@ @author: ghiggi """ import datetime -import os import re import subprocess @@ -51,7 +50,7 @@ def _get_gesc_disc_list_path(url): list_content = [s for s in list_content if s != ""] if len(list_content) == 0: raise ValueError(f"The GES DISC {url} directory is empty.") - list_path = [os.path.join(url, s) for s in list_content] + list_path = [f"{url}/{s}" for s in list_content] return list_path @@ -72,11 +71,11 @@ def _get_gesc_disc_list_path(url): def _get_ges_disc_server(product): # TRMM if is_trmm_product(product): - ges_disc_base_url = "https://disc2.gesdisc.eosdis.nasa.gov/data/" + ges_disc_base_url = "https://disc2.gesdisc.eosdis.nasa.gov/data" # GPM else: - ges_disc_base_url = "https://gpm1.gesdisc.eosdis.nasa.gov/data" + # ges_disc_base_url = "https://gpm1.gesdisc.eosdis.nasa.gov/data" ges_disc_base_url = "https://gpm2.gesdisc.eosdis.nasa.gov/data" return ges_disc_base_url @@ -114,9 +113,11 @@ def _get_ges_disc_product_directory_tree(product, date, version): # Specify the directory tree # --> TODO: currently specified only for L1 and L2 - directory_tree = os.path.join( - folder_name, - datetime.datetime.strftime(date, "%Y/%j"), + directory_tree = "/".join( + [ + folder_name, + datetime.datetime.strftime(date, "%Y/%j"), + ] ) return directory_tree @@ -148,7 +149,7 @@ def get_ges_disc_product_directory(product, date, version): product=product, date=date, version=version ) # Define product directory where data are listed - url_product_dir = os.path.join(url_server, dir_structure) + url_product_dir = f"{url_server}/{dir_structure}" return url_product_dir @@ -248,5 +249,5 @@ def define_gesdisc_filepath(product, product_type, date, version, filename): # Retrieve product directory url url_product_dir = get_ges_disc_product_directory(product=product, date=date, version=version) # Define GES DISC filepath - fpath = os.path.join(url_product_dir, filename) + fpath = f"{url_product_dir}/{filename}" return fpath diff --git a/gpm_api/tests/test_io/test_find.py b/gpm_api/tests/test_io/test_find.py index 7bfa6976..6f109259 100644 --- a/gpm_api/tests/test_io/test_find.py +++ b/gpm_api/tests/test_io/test_find.py @@ -1,6 +1,6 @@ from datetime import datetime import os -from typing import Dict +from typing import Dict, List from pytest_mock.plugin import MockerFixture @@ -12,7 +12,7 @@ def test_get_local_daily_filepaths( mock_configuration: Dict[str, str], mocker: MockerFixture, product_info: Dict[str, dict], -): +) -> None: """Test _get_all_daily_filepaths for "local" storage""" storage = "local" @@ -82,7 +82,7 @@ def test_get_local_daily_filepaths( def test_get_pps_daily_filepaths( mocker: MockerFixture, product_info: Dict[str, dict], -): +) -> None: """Test _get_all_daily_filepaths for "pps" storage""" stoarge = "pps" @@ -94,7 +94,7 @@ def test_get_pps_daily_filepaths( "file2.HDF5", ] - def mock_get_pps_file_list(url_product_dir): + def mock_get_pps_file_list(url_product_dir: str) -> List[str]: # Remove the base URL, assuming they have the followgin format: # RS: https://arthurhouhttps.pps.eosdis.nasa.gov/text/... # NRT: https://jsimpsonhttps.pps.eosdis.nasa.gov/text/... @@ -187,3 +187,51 @@ def mock_get_pps_file_list(url_product_dir): ) expected_filepaths = [f"{base_url}{filename}" for filename in mock_filenames] assert returned_filepaths == expected_filepaths + + +def test_get_gesdisc_daily_filepaths( + mocker: MockerFixture, + product_info: Dict[str, dict], +) -> None: + """Test _get_all_daily_filepaths for "ges_disc" storage""" + + stoarge = "ges_disc" + date = datetime(2020, 12, 31) + version = 7 + + # Mock gpm_api.io.ges_disc._get_gesc_disc_list_path, which uses wget to get a list of files + mock_filenames = [ + "file1.HDF5", + "file2.HDF5", + ] + + def mock_get_gesc_disc_list_path(url: str) -> List[str]: + return [f"{url}/{filename}" for filename in mock_filenames] + + mocker.patch( + "gpm_api.io.ges_disc._get_gesc_disc_list_path", side_effect=mock_get_gesc_disc_list_path + ) + + for product, info in product_info.items(): + version = info["available_versions"][-1] + ges_disc_dir = info["ges_disc_dir"] + if ges_disc_dir is None: + continue + + returned_filepaths = find._get_all_daily_filepaths( + storage=stoarge, + date=date, + product=product, + product_type=None, + version=version, + verbose=True, + ) + + if "TRMM" in ges_disc_dir: + subdomain = "disc2" + else: + subdomain = "gpm2" + + base_url = f"https://{subdomain}.gesdisc.eosdis.nasa.gov/data/{ges_disc_dir}.0{version}/{date.strftime('%Y/%j')}" + expected_filepaths = [f"{base_url}/{filename}" for filename in mock_filenames] + assert returned_filepaths == expected_filepaths