Skip to content

Commit a479435

Browse files
authored
Merge pull request #149 from MITLibraries/TIMX-509-explicit-run-timestamp
TIMX 509 - explicit run timestamp
2 parents 0a20234 + 029506e commit a479435

File tree

12 files changed

+570
-576
lines changed

12 files changed

+570
-576
lines changed

Pipfile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ pytest = "*"
2525
ruff = "*"
2626
setuptools = "*"
2727
pip-audit = "*"
28-
pytest-freezegun = "*"
2928

3029
[requires]
3130
python_version = "3.12"

Pipfile.lock

Lines changed: 474 additions & 512 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,6 @@ ignore = [
9393
"PLR0912",
9494
"PLR0913",
9595
"PLR0915",
96-
"S320",
9796
"S321",
9897
"S608",
9998
"TRY003"

tests/conftest.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -182,16 +182,16 @@ def dataset_with_same_day_runs(tmp_path) -> TIMDEXDataset:
182182
# be represented.
183183
run_params.extend(
184184
[
185-
(100, "alma", "2025-01-01", "full", "index", "run-1"),
186-
(75, "alma", "2025-01-01", "full", "index", "run-2"),
187-
(10, "alma", "2025-01-01", "daily", "index", "run-3"),
188-
(20, "alma", "2025-01-02", "daily", "index", "run-4"),
189-
(5, "alma", "2025-01-02", "daily", "delete", "run-5"),
185+
(100, "alma", "2025-01-01", "full", "index", "run-1", "2025-01-01T01:00:00"),
186+
(75, "alma", "2025-01-01", "full", "index", "run-2", "2025-01-01T02:00:00"),
187+
(10, "alma", "2025-01-01", "daily", "index", "run-3", "2025-01-01T03:00:00"),
188+
(20, "alma", "2025-01-02", "daily", "index", "run-4", "2025-01-02T01:00:00"),
189+
(5, "alma", "2025-01-02", "daily", "delete", "run-5", "2025-01-02T02:00:00"),
190190
]
191191
)
192192

193193
for params in run_params:
194-
num_records, source, run_date, run_type, action, run_id = params
194+
num_records, source, run_date, run_type, action, run_id, run_timestamp = params
195195
records = generate_sample_records(
196196
num_records,
197197
timdex_record_id_prefix=source,
@@ -200,6 +200,7 @@ def dataset_with_same_day_runs(tmp_path) -> TIMDEXDataset:
200200
run_type=run_type,
201201
action=action,
202202
run_id=run_id,
203+
run_timestamp=run_timestamp,
203204
)
204205
timdex_dataset.write(records)
205206

tests/test_dataset.py

Lines changed: 2 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
1-
# ruff: noqa: D205, D209, S105, S106, SLF001, PD901, PLR2004
1+
# ruff: noqa: D205, D209, SLF001, PLR2004
22

33
import os
4-
from datetime import UTC, date, datetime
4+
from datetime import date
55
from unittest.mock import MagicMock, patch
66

77
import pyarrow as pa
88
import pytest
99
from pyarrow import fs
1010

11-
from tests.utils import generate_sample_records
1211
from timdex_dataset_api.dataset import (
1312
DatasetNotLoadedError,
1413
TIMDEXDataset,
@@ -466,47 +465,6 @@ def test_dataset_current_records_index_filtering_accurate_records_yielded(
466465
]
467466

468467

469-
@pytest.mark.freeze_time("2025-05-22 01:23:45.567890")
470-
def test_dataset_write_includes_minted_run_timestamp(tmp_path):
471-
# create dataset
472-
location = str(tmp_path / "one_run_at_frozen_time")
473-
os.mkdir(location)
474-
timdex_dataset = TIMDEXDataset(location)
475-
476-
run_id = "abc123"
477-
478-
# perform a single ETL run that should pickup the frozen time for run_timestamp
479-
records = generate_sample_records(
480-
10,
481-
timdex_record_id_prefix="alma",
482-
source="alma",
483-
run_date="2025-05-22",
484-
run_type="full",
485-
action="index",
486-
run_id=run_id,
487-
)
488-
timdex_dataset.write(records)
489-
timdex_dataset.load()
490-
491-
# assert TIMDEXDataset.write() applies current time as run_timestamp
492-
run_row_dict = next(timdex_dataset.read_dicts_iter())
493-
assert "run_timestamp" in run_row_dict
494-
assert run_row_dict["run_timestamp"] == datetime(
495-
2025,
496-
5,
497-
22,
498-
1,
499-
23,
500-
45,
501-
567890,
502-
tzinfo=UTC,
503-
)
504-
505-
# assert the same run_timestamp is applied to all rows in the run
506-
df = timdex_dataset.read_dataframe(run_id=run_id)
507-
assert len(list(df.run_timestamp.unique())) == 1
508-
509-
510468
def test_dataset_load_current_records_gets_correct_same_day_full_run(
511469
dataset_with_same_day_runs,
512470
):

tests/test_read.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# ruff: noqa: PLR2004, PD901
1+
# ruff: noqa: PLR2004
22

33
import pandas as pd
44
import pyarrow as pa

tests/test_records.py

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import re
2-
from datetime import date
2+
from datetime import UTC, date, datetime
33

44
import pytest
55

@@ -71,7 +71,70 @@ def test_dataset_record_serialization():
7171
"action": "index",
7272
"run_id": "abc123",
7373
"run_record_offset": 0,
74+
"run_timestamp": datetime(2024, 12, 1, 0, 0, tzinfo=UTC),
7475
"year": "2024",
7576
"month": "12",
7677
"day": "01",
7778
}
79+
80+
81+
@pytest.mark.parametrize(
82+
("run_timestamp_input", "expected_run_timestamp", "expected_exception"),
83+
[
84+
(
85+
None,
86+
None,
87+
TypeError, # expecting string, not None
88+
),
89+
(
90+
date(2025, 1, 1),
91+
None,
92+
TypeError, # expecting string, not datetime object
93+
),
94+
(
95+
"2024-12-01T10:00:00Z",
96+
datetime(2024, 12, 1, 10, 0, tzinfo=UTC),
97+
None,
98+
),
99+
(
100+
"2024-12-01T23:59:59.999999+00:00",
101+
datetime(2024, 12, 1, 23, 59, 59, 999999, tzinfo=UTC),
102+
None,
103+
),
104+
],
105+
)
106+
def test_dataset_record_run_timestamp_parsing(
107+
run_timestamp_input, expected_run_timestamp, expected_exception
108+
):
109+
values = {
110+
"timdex_record_id": "alma:123",
111+
"source_record": b"<record><title>Hello World.</title></record>",
112+
"transformed_record": b"""{"title":["Hello World."]}""",
113+
"source": "libguides",
114+
"run_date": "2024-12-01",
115+
"run_type": "full",
116+
"action": "index",
117+
"run_id": "abc123",
118+
"run_record_offset": 0,
119+
"run_timestamp": run_timestamp_input,
120+
}
121+
if not expected_exception:
122+
dataset_record = DatasetRecord(**values)
123+
assert dataset_record.to_dict() == {
124+
"timdex_record_id": "alma:123",
125+
"source_record": b"<record><title>Hello World.</title></record>",
126+
"transformed_record": b"""{"title":["Hello World."]}""",
127+
"source": "libguides",
128+
"run_date": date(2024, 12, 1),
129+
"run_type": "full",
130+
"action": "index",
131+
"run_id": "abc123",
132+
"run_record_offset": 0,
133+
"run_timestamp": expected_run_timestamp,
134+
"year": "2024",
135+
"month": "12",
136+
"day": "01",
137+
}
138+
else:
139+
with pytest.raises(expected_exception):
140+
DatasetRecord(**values)

tests/test_write.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# ruff: noqa: S105, S106, SLF001, PLR2004, PD901, D209, D205
1+
# ruff: noqa: PLR2004, D209, D205
22
import math
33
import os
44
from unittest.mock import patch

tests/utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ def generate_sample_records(
1717
run_type: str | None = "daily",
1818
action: str | None = "index",
1919
run_id: str | None = None,
20+
run_timestamp: str | None = None,
2021
) -> Iterator[DatasetRecord]:
2122
"""Generate sample DatasetRecords."""
2223
if not run_id:
@@ -33,6 +34,7 @@ def generate_sample_records(
3334
action=action,
3435
run_id=run_id,
3536
run_record_offset=x,
37+
run_timestamp=run_timestamp or run_date,
3638
)
3739

3840

timdex_dataset_api/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from timdex_dataset_api.dataset import TIMDEXDataset
44
from timdex_dataset_api.record import DatasetRecord
55

6-
__version__ = "2.0.0"
6+
__version__ = "2.1.0"
77

88
__all__ = [
99
"DatasetRecord",

0 commit comments

Comments
 (0)