Skip to content

Commit de7dc90

Browse files
committed
Update test confirming minted run_timestamp
1 parent e006b1c commit de7dc90

File tree

1 file changed

+27
-8
lines changed

1 file changed

+27
-8
lines changed

tests/test_dataset.py

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import pytest
99
from pyarrow import fs
1010

11+
from tests.utils import generate_sample_records
1112
from timdex_dataset_api.dataset import (
1213
DatasetNotLoadedError,
1314
TIMDEXDataset,
@@ -466,13 +467,31 @@ def test_dataset_current_records_index_filtering_accurate_records_yielded(
466467

467468

468469
@pytest.mark.freeze_time("2025-05-22 01:23:45.567890")
469-
def test_dataset_write_includes_minted_run_timestamp(
470-
dataset_with_same_day_runs,
471-
):
470+
def test_dataset_write_includes_minted_run_timestamp(tmp_path):
471+
# create dataset
472+
location = str(tmp_path / "one_run_at_frozen_time")
473+
os.mkdir(location)
474+
timdex_dataset = TIMDEXDataset(location)
475+
476+
run_id = "abc123"
477+
478+
# perform a single ETL run that should pickup the frozen time for run_timestamp
479+
records = generate_sample_records(
480+
10,
481+
timdex_record_id_prefix="alma",
482+
source="alma",
483+
run_date="2025-05-22",
484+
run_type="full",
485+
action="index",
486+
run_id=run_id,
487+
)
488+
timdex_dataset.write(records)
489+
timdex_dataset.load()
490+
472491
# assert TIMDEXDataset.write() applies current time as run_timestamp
473-
row_dict = next(dataset_with_same_day_runs.read_dicts_iter())
474-
assert "run_timestamp" in row_dict
475-
assert row_dict["run_timestamp"] == datetime(
492+
run_row_dict = next(timdex_dataset.read_dicts_iter())
493+
assert "run_timestamp" in run_row_dict
494+
assert run_row_dict["run_timestamp"] == datetime(
476495
2025,
477496
5,
478497
22,
@@ -483,8 +502,8 @@ def test_dataset_write_includes_minted_run_timestamp(
483502
tzinfo=UTC,
484503
)
485504

486-
# assert same time is used for entire batch
487-
df = dataset_with_same_day_runs.read_dataframe()
505+
# assert the same run_timestamp is applied to all rows in the run
506+
df = timdex_dataset.read_dataframe(run_id=run_id)
488507
assert len(list(df.run_timestamp.unique())) == 1
489508

490509

0 commit comments

Comments
 (0)