Skip to content

Commit

Permalink
add tests for io/find ges_disc
Browse files Browse the repository at this point in the history
  • Loading branch information
sphamba committed Nov 6, 2023
1 parent 2386c9d commit 6dc27b7
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 13 deletions.
19 changes: 10 additions & 9 deletions gpm_api/io/ges_disc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
@author: ghiggi
"""
import datetime
import os
import re
import subprocess

Expand Down Expand Up @@ -51,7 +50,7 @@ def _get_gesc_disc_list_path(url):
list_content = [s for s in list_content if s != ""]
if len(list_content) == 0:
raise ValueError(f"The GES DISC {url} directory is empty.")
list_path = [os.path.join(url, s) for s in list_content]
list_path = [f"{url}/{s}" for s in list_content]
return list_path


Expand All @@ -72,11 +71,11 @@ def _get_gesc_disc_list_path(url):
def _get_ges_disc_server(product):
# TRMM
if is_trmm_product(product):
ges_disc_base_url = "https://disc2.gesdisc.eosdis.nasa.gov/data/"
ges_disc_base_url = "https://disc2.gesdisc.eosdis.nasa.gov/data"

# GPM
else:
ges_disc_base_url = "https://gpm1.gesdisc.eosdis.nasa.gov/data"
# ges_disc_base_url = "https://gpm1.gesdisc.eosdis.nasa.gov/data"
ges_disc_base_url = "https://gpm2.gesdisc.eosdis.nasa.gov/data"
return ges_disc_base_url

Expand Down Expand Up @@ -114,9 +113,11 @@ def _get_ges_disc_product_directory_tree(product, date, version):

# Specify the directory tree
# --> TODO: currently specified only for L1 and L2
directory_tree = os.path.join(
folder_name,
datetime.datetime.strftime(date, "%Y/%j"),
directory_tree = "/".join(
[
folder_name,
datetime.datetime.strftime(date, "%Y/%j"),
]
)
return directory_tree

Expand Down Expand Up @@ -148,7 +149,7 @@ def get_ges_disc_product_directory(product, date, version):
product=product, date=date, version=version
)
# Define product directory where data are listed
url_product_dir = os.path.join(url_server, dir_structure)
url_product_dir = f"{url_server}/{dir_structure}"
return url_product_dir


Expand Down Expand Up @@ -248,5 +249,5 @@ def define_gesdisc_filepath(product, product_type, date, version, filename):
# Retrieve product directory url
url_product_dir = get_ges_disc_product_directory(product=product, date=date, version=version)
# Define GES DISC filepath
fpath = os.path.join(url_product_dir, filename)
fpath = f"{url_product_dir}/{filename}"
return fpath
56 changes: 52 additions & 4 deletions gpm_api/tests/test_io/test_find.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from datetime import datetime
import os
from typing import Dict
from typing import Dict, List

from pytest_mock.plugin import MockerFixture

Expand All @@ -12,7 +12,7 @@ def test_get_local_daily_filepaths(
mock_configuration: Dict[str, str],
mocker: MockerFixture,
product_info: Dict[str, dict],
):
) -> None:
"""Test _get_all_daily_filepaths for "local" storage"""

storage = "local"
Expand Down Expand Up @@ -82,7 +82,7 @@ def test_get_local_daily_filepaths(
def test_get_pps_daily_filepaths(
mocker: MockerFixture,
product_info: Dict[str, dict],
):
) -> None:
"""Test _get_all_daily_filepaths for "pps" storage"""

stoarge = "pps"
Expand All @@ -94,7 +94,7 @@ def test_get_pps_daily_filepaths(
"file2.HDF5",
]

def mock_get_pps_file_list(url_product_dir):
def mock_get_pps_file_list(url_product_dir: str) -> List[str]:
# Remove the base URL, assuming they have the followgin format:
# RS: https://arthurhouhttps.pps.eosdis.nasa.gov/text/...
# NRT: https://jsimpsonhttps.pps.eosdis.nasa.gov/text/...
Expand Down Expand Up @@ -187,3 +187,51 @@ def mock_get_pps_file_list(url_product_dir):
)
expected_filepaths = [f"{base_url}{filename}" for filename in mock_filenames]
assert returned_filepaths == expected_filepaths


def test_get_gesdisc_daily_filepaths(
mocker: MockerFixture,
product_info: Dict[str, dict],
) -> None:
"""Test _get_all_daily_filepaths for "ges_disc" storage"""

stoarge = "ges_disc"
date = datetime(2020, 12, 31)
version = 7

# Mock gpm_api.io.ges_disc._get_gesc_disc_list_path, which uses wget to get a list of files
mock_filenames = [
"file1.HDF5",
"file2.HDF5",
]

def mock_get_gesc_disc_list_path(url: str) -> List[str]:
return [f"{url}/{filename}" for filename in mock_filenames]

mocker.patch(
"gpm_api.io.ges_disc._get_gesc_disc_list_path", side_effect=mock_get_gesc_disc_list_path
)

for product, info in product_info.items():
version = info["available_versions"][-1]
ges_disc_dir = info["ges_disc_dir"]
if ges_disc_dir is None:
continue

returned_filepaths = find._get_all_daily_filepaths(
storage=stoarge,
date=date,
product=product,
product_type=None,
version=version,
verbose=True,
)

if "TRMM" in ges_disc_dir:
subdomain = "disc2"
else:
subdomain = "gpm2"

base_url = f"https://{subdomain}.gesdisc.eosdis.nasa.gov/data/{ges_disc_dir}.0{version}/{date.strftime('%Y/%j')}"
expected_filepaths = [f"{base_url}/{filename}" for filename in mock_filenames]
assert returned_filepaths == expected_filepaths

0 comments on commit 6dc27b7

Please sign in to comment.