Skip to content

Commit

Permalink
apply pr suggestions for utils tests
Browse files Browse the repository at this point in the history
  • Loading branch information
sphamba committed Dec 7, 2023
1 parent 9b7db1e commit 479a0aa
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 69 deletions.
2 changes: 2 additions & 0 deletions gpm_api/tests/test_utils/test_slices.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,11 @@ def test_list_slices_difference() -> None:

# Base cases
assert gpm_slices.list_slices_difference([slice(3, 6)], [slice(1, 2)]) == [slice(3, 6)]
assert gpm_slices.list_slices_difference([slice(3, 6)], [slice(1, 3)]) == [slice(3, 6)]
assert gpm_slices.list_slices_difference([slice(3, 6)], [slice(1, 4)]) == [slice(4, 6)]
assert gpm_slices.list_slices_difference([slice(3, 6)], [slice(1, 8)]) == []
assert gpm_slices.list_slices_difference([slice(3, 6)], [slice(5, 8)]) == [slice(3, 5)]
assert gpm_slices.list_slices_difference([slice(3, 6)], [slice(6, 8)]) == [slice(3, 6)]
assert gpm_slices.list_slices_difference([slice(3, 6)], [slice(7, 8)]) == [slice(3, 6)]
assert gpm_slices.list_slices_difference([slice(3, 6)], [slice(4, 5)]) == [
slice(3, 4),
Expand Down
118 changes: 86 additions & 32 deletions gpm_api/tests/test_utils/test_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@
import pytest

from gpm_api.utils import time as gpm_time
from utils import convert_hours_array_to_datetime_array, get_time_range
from utils import create_fake_datetime_array_from_hours_list, get_time_range


_ = float("nan")
N = float("nan")


class TestSubsetByTime:
"""Test subset_by_time"""

time = get_time_range(0, 24)
datetime_type_wrappers = [lambda x: x, str, np.datetime64]

@pytest.fixture
def data_array(self) -> xr.DataArray:
Expand All @@ -24,33 +25,66 @@ def test_no_subset(self, data_array: xr.DataArray) -> None:
returned_da = gpm_time.subset_by_time(data_array, start_time=None, end_time=None)
xr.testing.assert_equal(data_array["time"], returned_da["time"])

def test_subset_by_start_time(self, data_array: xr.DataArray) -> None:
start_time = datetime.datetime(2020, 12, 31, 12, 0, 0)
@pytest.mark.parametrize("type_wrapper", datetime_type_wrappers)
def test_subset_by_start_time(
self,
data_array: xr.DataArray,
type_wrapper,
) -> None:
start_time = type_wrapper(datetime.datetime(2020, 12, 31, 12, 0, 0))
returned_da = gpm_time.subset_by_time(data_array, start_time=start_time, end_time=None)
assert returned_da["time"].values[0] == np.datetime64(start_time)
assert returned_da["time"].values[-1] == np.datetime64(self.time[-1])
assert len(returned_da) == len(returned_da["time"])

Check warning on line 38 in gpm_api/tests/test_utils/test_time.py

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

❌ New issue: Code Duplication

The module contains 3 functions with similar structure: TestSubsetByTime.test_subset_by_end_time,TestSubsetByTime.test_subset_by_start_and_end_time,TestSubsetByTime.test_subset_by_start_time. Avoid duplicated, aka copy-pasted, code inside the module. More duplication lowers the code health.

def test_subset_by_end_time(self, data_array: xr.DataArray) -> None:
end_time = datetime.datetime(2020, 12, 31, 12, 0, 0)
@pytest.mark.parametrize("type_wrapper", datetime_type_wrappers)
def test_subset_by_end_time(
self,
data_array: xr.DataArray,
type_wrapper,
) -> None:
end_time = type_wrapper(datetime.datetime(2020, 12, 31, 12, 0, 0))
returned_da = gpm_time.subset_by_time(data_array, start_time=None, end_time=end_time)
assert returned_da["time"].values[0] == np.datetime64(self.time[0])
assert returned_da["time"].values[-1] == np.datetime64(end_time)
assert len(returned_da) == len(returned_da["time"])

def test_subset_by_start_and_end_time(self, data_array: xr.DataArray) -> None:
start_time = datetime.datetime(2020, 12, 31, 6, 0, 0)
end_time = datetime.datetime(2020, 12, 31, 18, 0, 0)
@pytest.mark.parametrize("type_wrapper", datetime_type_wrappers)
def test_subset_by_start_and_end_time(
self,
data_array: xr.DataArray,
type_wrapper,
) -> None:
start_time = type_wrapper(datetime.datetime(2020, 12, 31, 6, 0, 0))
end_time = type_wrapper(datetime.datetime(2020, 12, 31, 18, 0, 0))
returned_da = gpm_time.subset_by_time(data_array, start_time=start_time, end_time=end_time)
assert returned_da["time"].values[0] == np.datetime64(start_time)
assert returned_da["time"].values[-1] == np.datetime64(end_time)
assert len(returned_da) == len(returned_da["time"])

@pytest.mark.parametrize("type_wrapper", datetime_type_wrappers)
def test_dataset(
self,
type_wrapper,
) -> None:
"""Test dataset with "time" as variable"""
ds = xr.Dataset(
{
"time": xr.DataArray(self.time, coords={"along_track": np.arange(len(self.time))}),
}
)
start_time = type_wrapper(datetime.datetime(2020, 12, 31, 6, 0, 0))
end_time = type_wrapper(datetime.datetime(2020, 12, 31, 18, 0, 0))
returned_ds = gpm_time.subset_by_time(ds, start_time=start_time, end_time=end_time)
assert returned_ds["time"].values[0] == np.datetime64(start_time)
assert returned_ds["time"].values[-1] == np.datetime64(end_time)

def test_no_dimension(self):
da = xr.DataArray(42) # Scalar value -> no dimension
ds = xr.Dataset({"time": da})
returned_ds = gpm_time.subset_by_time(ds, start_time=None, end_time=None)
assert returned_ds == {}

with pytest.raises(ValueError):
gpm_time.subset_by_time(ds, start_time=None, end_time=None)

def test_wrong_time_dimension(self):
lat = np.arange(5)
Expand All @@ -63,8 +97,13 @@ def test_wrong_time_dimension(self):
with pytest.raises(ValueError):
gpm_time.subset_by_time(ds, start_time=None, end_time=None)

def test_empty_subsets(self, data_array: xr.DataArray) -> None:
start_time = datetime.datetime(2021, 1, 1, 0, 0, 0)
@pytest.mark.parametrize("type_wrapper", datetime_type_wrappers)
def test_empty_subsets(
self,
data_array: xr.DataArray,
type_wrapper,
) -> None:
start_time = type_wrapper(datetime.datetime(2021, 1, 1, 0, 0, 0))
with pytest.raises(ValueError):
gpm_time.subset_by_time(data_array, start_time=start_time, end_time=None)

Expand Down Expand Up @@ -114,24 +153,32 @@ def test_interpolate_nat():
kwargs = {"method": "linear", "limit": 5, "limit_direction": None, "limit_area": "inside"}

# Test with no NaNs
time = convert_hours_array_to_datetime_array(np.arange(0, 10))
time = create_fake_datetime_array_from_hours_list(np.arange(0, 10))
returned_time = gpm_time.interpolate_nat(time, **kwargs)
np.testing.assert_equal(time, returned_time)

# Test arrays too small to interpolate
for hour_list in ([], [N], [1, N]):
time = create_fake_datetime_array_from_hours_list(hour_list)
returned_time = gpm_time.interpolate_nat(time, **kwargs)
np.testing.assert_equal(time, returned_time)

# Test with outside NaNs (not extrapolated)
time = convert_hours_array_to_datetime_array([_, 1, 2, 3, _])
time = create_fake_datetime_array_from_hours_list([N, 1, 2, 3, N])
returned_time = gpm_time.interpolate_nat(time, **kwargs)
np.testing.assert_equal(time, returned_time)

# Test linear interpolation
time = convert_hours_array_to_datetime_array([_, 1, 2, _, _, _, 6, 7, _])
expected_time = convert_hours_array_to_datetime_array([_, 1, 2, 3, 4, 5, 6, 7, _])
time = create_fake_datetime_array_from_hours_list([N, 1, 2, N, N, N, 6, 7, N])
expected_time = create_fake_datetime_array_from_hours_list([N, 1, 2, 3, 4, 5, 6, 7, N])
returned_time = gpm_time.interpolate_nat(time, **kwargs)
np.testing.assert_equal(expected_time, returned_time)

# Test with gap too large: not all values are filled
time = convert_hours_array_to_datetime_array([_, 1, 2, _, _, _, _, _, _, _, 10, 11, _])
expected_time = convert_hours_array_to_datetime_array([_, 1, 2, 3, 4, 5, 6, 7, _, _, 10, 11, _])
time = create_fake_datetime_array_from_hours_list([N, 1, 2, N, N, N, N, N, N, N, 10, 11, N])
expected_time = create_fake_datetime_array_from_hours_list(
[N, 1, 2, 3, 4, 5, 6, 7, N, N, 10, 11, N]
)
returned_time = gpm_time.interpolate_nat(time, **kwargs)
np.testing.assert_equal(expected_time, returned_time)

Expand All @@ -140,41 +187,48 @@ def test_infill_timesteps():
"""Test infill_timesteps"""

# Test with no NaNs
time = convert_hours_array_to_datetime_array(np.arange(0, 10))
time = create_fake_datetime_array_from_hours_list(np.arange(0, 10))
returned_time = gpm_time.infill_timesteps(time, limit=5)
np.testing.assert_equal(time, returned_time)

# Test arrays too small to interpolate
time = convert_hours_array_to_datetime_array([_])
with pytest.raises(ValueError):
gpm_time.infill_timesteps(time, limit=5)
for hour_list in ([], [1], [1, 2]):
time = create_fake_datetime_array_from_hours_list(hour_list)
returned_time = gpm_time.infill_timesteps(time, limit=5)
np.testing.assert_equal(time, returned_time)

time = convert_hours_array_to_datetime_array([1, _])
with pytest.raises(ValueError):
gpm_time.infill_timesteps(time, limit=5)
for hour_list in ([N], [1, N]):
time = create_fake_datetime_array_from_hours_list(hour_list)
with pytest.raises(ValueError):
gpm_time.infill_timesteps(time, limit=5)

# Test interpolation
time = convert_hours_array_to_datetime_array([1, 2, _, _, _, 6, 7])
expected_time = convert_hours_array_to_datetime_array([1, 2, 3, 4, 5, 6, 7])
time = create_fake_datetime_array_from_hours_list([1, 2, N, N, N, 6, 7])
expected_time = create_fake_datetime_array_from_hours_list([1, 2, 3, 4, 5, 6, 7])
returned_time = gpm_time.infill_timesteps(time, limit=5)
np.testing.assert_equal(expected_time, returned_time)

# Test with gap too large: raise error
time = convert_hours_array_to_datetime_array([1, 2, _, _, _, 6, 7])
time = create_fake_datetime_array_from_hours_list([1, 2, N, N, N, 6, 7])
with pytest.raises(ValueError):
gpm_time.infill_timesteps(time, limit=2)

# Test with outside NaNs: raise error
time = convert_hours_array_to_datetime_array([_, 1, 2, 3, _])
time = create_fake_datetime_array_from_hours_list([N, 1, 2, 3, N])
with pytest.raises(ValueError):
gpm_time.infill_timesteps(time, limit=5)

# Test all NaNs: raise error
time = create_fake_datetime_array_from_hours_list([N, N, N, N])
with pytest.raises(ValueError):
gpm_time.infill_timesteps(time, limit=5)


class TestEnsureTimeValidity:
"""Test ensure_time_validity"""

time = convert_hours_array_to_datetime_array([1, 2, _, _, _, 6, 7])
expected_time = convert_hours_array_to_datetime_array([1, 2, 3, 4, 5, 6, 7])
time = create_fake_datetime_array_from_hours_list([1, 2, N, N, N, 6, 7])
expected_time = create_fake_datetime_array_from_hours_list([1, 2, 3, 4, 5, 6, 7])

def test_with_time_in_dims(self) -> None:
da = xr.DataArray(np.random.rand(len(self.time)), coords={"time": self.time})
Expand Down
34 changes: 17 additions & 17 deletions gpm_api/tests/test_utils/test_utils_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pytest_mock import MockerFixture

from gpm_api.utils import checks
from utils import convert_hours_array_to_datetime_array
from utils import create_fake_datetime_array_from_hours_list


# Utility functions ###########################################################
Expand Down Expand Up @@ -85,7 +85,7 @@ def test_grid(
self,
set_is_grid_to_true: None,
) -> None:
time = convert_hours_array_to_datetime_array([0, 1, 2, 7, 8, 9])
time = create_fake_datetime_array_from_hours_list([0, 1, 2, 7, 8, 9])
ds = create_dataset_with_coordinate("time", time)
expected_slices = [slice(0, 3), slice(3, 6)]
returned_slices = checks.get_slices_contiguous_granules(ds)
Expand Down Expand Up @@ -123,7 +123,7 @@ def test_unknown(
mocker.patch("gpm_api.utils.checks.is_grid", return_value=False)
mocker.patch("gpm_api.utils.checks.is_orbit", return_value=False)

time = convert_hours_array_to_datetime_array([0, 1, 2, 7, 8, 9])
time = create_fake_datetime_array_from_hours_list([0, 1, 2, 7, 8, 9])
ds = create_dataset_with_coordinate("time", time)
with pytest.raises(ValueError):
checks.get_slices_contiguous_granules(ds)
Expand Down Expand Up @@ -196,35 +196,35 @@ class TestGetSlicesRegularTime:

def test_tolerance_provided(self) -> None:
# Test regular time
time = convert_hours_array_to_datetime_array(np.arange(0, 10))
time = create_fake_datetime_array_from_hours_list(np.arange(0, 10))
tolerance = time[1] - time[0]
ds = create_dataset_with_coordinate("time", time)
expected_slices = [slice(0, 10)]
returned_slices = checks.get_slices_regular_time(ds, tolerance=tolerance)
assert returned_slices == expected_slices

# Test irregular time
time = convert_hours_array_to_datetime_array([0, 1, 2, 7, 8, 9])
time = create_fake_datetime_array_from_hours_list([0, 1, 2, 7, 8, 9])
ds = create_dataset_with_coordinate("time", time)
expected_slices = [slice(0, 3), slice(3, 6)]
returned_slices = checks.get_slices_regular_time(ds, tolerance=tolerance)
assert returned_slices == expected_slices

# Test 0 or 1 timesteps
time = convert_hours_array_to_datetime_array([])
time = create_fake_datetime_array_from_hours_list([])
ds = create_dataset_with_coordinate("time", time)
expected_slices = []
returned_slices = checks.get_slices_regular_time(ds, tolerance=tolerance)
assert returned_slices == expected_slices

time = convert_hours_array_to_datetime_array([0])
time = create_fake_datetime_array_from_hours_list([0])
ds = create_dataset_with_coordinate("time", time)
expected_slices = [slice(0, 1)]
returned_slices = checks.get_slices_regular_time(ds, tolerance=tolerance)
assert returned_slices == expected_slices

# Only keep large enough slices
time = convert_hours_array_to_datetime_array([0, 1, 2, 7, 8])
time = create_fake_datetime_array_from_hours_list([0, 1, 2, 7, 8])
ds = create_dataset_with_coordinate("time", time)
expected_slices = [slice(0, 3)]
returned_slices = checks.get_slices_regular_time(ds, tolerance=tolerance, min_size=3)
Expand All @@ -235,7 +235,7 @@ def test_grid(
set_is_grid_to_true: None,
) -> None:
# Tolerance not provided: inferred from first two values
time = convert_hours_array_to_datetime_array([1, 2, 3, 7, 8, 9])
time = create_fake_datetime_array_from_hours_list([1, 2, 3, 7, 8, 9])
ds = create_dataset_with_coordinate("time", time)
expected_slices = [slice(0, 3), slice(3, 6)]
returned_slices = checks.get_slices_regular_time(ds, tolerance=None)
Expand All @@ -258,28 +258,28 @@ class TestGetSlicesNonRegularTime:

def test_tolerance_provided(self) -> None:
# Test regular time
time = convert_hours_array_to_datetime_array(np.arange(0, 10))
time = create_fake_datetime_array_from_hours_list(np.arange(0, 10))
tolerance = time[1] - time[0]
ds = create_dataset_with_coordinate("time", time)
expected_slices = []
returned_slices = checks.get_slices_non_regular_time(ds, tolerance=tolerance)

# Test irregular time
# 0 1 2 3 4 5 6 7 8
time = convert_hours_array_to_datetime_array([0, 1, 2, 4, 5, 6, 10, 11, 12])
time = create_fake_datetime_array_from_hours_list([0, 1, 2, 4, 5, 6, 10, 11, 12])
ds = create_dataset_with_coordinate("time", time)
expected_slices = [slice(2, 4), slice(5, 7)] # All slices have length 2
returned_slices = checks.get_slices_non_regular_time(ds, tolerance=tolerance)
assert returned_slices == expected_slices

# Test 0 or 1 timesteps
time = convert_hours_array_to_datetime_array([])
time = create_fake_datetime_array_from_hours_list([])
ds = create_dataset_with_coordinate("time", time)
expected_slices = []
returned_slices = checks.get_slices_non_regular_time(ds, tolerance=tolerance)
assert returned_slices == expected_slices

time = convert_hours_array_to_datetime_array([0])
time = create_fake_datetime_array_from_hours_list([0])
ds = create_dataset_with_coordinate("time", time)
expected_slices = []
returned_slices = checks.get_slices_non_regular_time(ds, tolerance=tolerance)
Expand All @@ -291,7 +291,7 @@ def test_grid(
) -> None:
# Tolernace not provided: inferred from first two values
# 0 1 2 3 4 5 6 7 8
time = convert_hours_array_to_datetime_array([0, 1, 2, 4, 5, 6, 10, 11, 12])
time = create_fake_datetime_array_from_hours_list([0, 1, 2, 4, 5, 6, 10, 11, 12])
ds = create_dataset_with_coordinate("time", time)
expected_slices = [slice(2, 4), slice(5, 7)] # All slices have length 2
returned_slices = checks.get_slices_non_regular_time(ds, tolerance=None)
Expand All @@ -318,12 +318,12 @@ def test_grid(
set_is_grid_to_true: None,
) -> None:
# Test regular time
time = convert_hours_array_to_datetime_array(np.arange(0, 10))
time = create_fake_datetime_array_from_hours_list(np.arange(0, 10))
ds = create_dataset_with_coordinate("time", time)
checks.check_regular_time(ds)

# Test irregular time
time = convert_hours_array_to_datetime_array([0, 1, 2, 7, 8, 9])
time = create_fake_datetime_array_from_hours_list([0, 1, 2, 7, 8, 9])
ds = create_dataset_with_coordinate("time", time)
with pytest.raises(ValueError):
checks.check_regular_time(ds)
Expand Down Expand Up @@ -629,7 +629,7 @@ def test_get_slices_wobbling_swath(self) -> None:
"""Test get_slices_wobbling_swath"""

returned_slices = checks.get_slices_wobbling_swath(self.ds, threshold=self.threshold)
expected_slices = [slice(7, 9)] # TODO: check that this is the expected behavior
expected_slices = [slice(7, 9)]
assert returned_slices == expected_slices


Expand Down
4 changes: 2 additions & 2 deletions gpm_api/tests/test_utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Union


def convert_hours_array_to_datetime_array(hours: Union[list, np.ndarray]) -> np.ndarray:
def create_fake_datetime_array_from_hours_list(hours: Union[list, np.ndarray]) -> np.ndarray:
"""Convert list of integers and NaNs into a np.datetime64 array"""

datetimes = []
Expand All @@ -21,4 +21,4 @@ def convert_hours_array_to_datetime_array(hours: Union[list, np.ndarray]) -> np.


def get_time_range(start_hour: int, end_hour: int) -> np.ndarray:
return convert_hours_array_to_datetime_array(np.arange(start_hour, end_hour))
return create_fake_datetime_array_from_hours_list(np.arange(start_hour, end_hour))
Loading

0 comments on commit 479a0aa

Please sign in to comment.