Skip to content

Commit 1dc01c9

Browse files
fix(floorist): RHICOMPL-2984 Bugfix for empty prefix floorplan
- Fix a bug for when floorplan file does not provide prefix - Add helper function for generating directory names - Add helper function for validating Floorplan files - Add unit test coverage
1 parent ae2b896 commit 1dc01c9

File tree

7 files changed

+124
-10
lines changed

7 files changed

+124
-10
lines changed

Diff for: Dockerfile

+2-2
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,6 @@ CMD python ./app.py
3939

4040
FROM base as test
4141

42-
ADD tests/test_* tests/floorplan_* tests/requirements.txt ./tests/
42+
ADD tests/test_* tests/unit/ tests/floorplan_* tests/requirements.txt ./tests/
4343

44-
RUN pip install --no-cache-dir -r tests/requirements.txt
44+
RUN pip install --no-cache-dir .[test] -r tests/requirements.txt

Diff for: setup.cfg

+1
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ python_requires = >=3.6, <4
1616
[options.extras_require]
1717
test =
1818
pytest
19+
pytest-mock

Diff for: src/floorist/floorist.py

+13-8
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
import pandas as pd
1111
import yaml
1212

13+
from floorist.helpers import generate_name, validate_floorplan_entry
14+
15+
1316
def _configure_loglevel():
1417

1518
LOGLEVEL = environ.get('LOGLEVEL', 'INFO').upper()
@@ -43,15 +46,17 @@ def main():
4346
dump_count += 1
4447

4548
try:
46-
logging.debug(f"Dumping #{dump_count}: {row['query']} to {row['prefix']}")
4749

48-
cursor = pd.read_sql(row['query'], conn, chunksize=row.get('chunksize', 1000))
50+
query = row['query']
51+
prefix = row['prefix']
52+
chunksize = row.get('chunksize', 1000)
53+
54+
logging.debug(f"Dumping #{dump_count}: {query} to {prefix}")
55+
56+
validate_floorplan_entry(query, prefix)
4957

50-
target = '/'.join([
51-
f"s3://{config.bucket_name}",
52-
row['prefix'],
53-
date.today().strftime('year_created=%Y/month_created=%-m/day_created=%-d')
54-
])
58+
cursor = pd.read_sql(query, conn, chunksize=chunksize)
59+
target = generate_name(config.bucket_name, prefix)
5560

5661
uuids = {}
5762

@@ -72,7 +77,7 @@ def main():
7277
mode='append'
7378
)
7479

75-
logging.debug(f"Dumped #{dumped_count}: {row['query']} to {row['prefix']}")
80+
logging.debug(f"Dumped #{dumped_count}: {query} to {prefix}")
7681

7782
dumped_count += 1
7883
except Exception as ex:

Diff for: src/floorist/helpers.py

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from datetime import date
2+
3+
4+
def generate_name(bucket_name, prefix=None):
5+
6+
file_name = date.today().strftime('year_created=%Y/month_created=%-m/day_created=%-d')
7+
parts = ["s3:/", bucket_name, file_name]
8+
if prefix:
9+
parts.insert(2, prefix)
10+
11+
return '/'.join(parts)
12+
13+
def validate_floorplan_entry(query, prefix):
14+
if not query:
15+
raise ValueError("Query cannot be empty!")
16+
elif not prefix:
17+
raise ValueError("Prefix cannot be empty!")
18+
else:
19+
return True

Diff for: tests/test_floorist.py

+15
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import os
2+
import re
3+
14
import awswrangler as wr
25
import boto3
36
import pytest
@@ -10,6 +13,8 @@
1013
from sqlalchemy.exc import OperationalError
1114
from tempfile import NamedTemporaryFile
1215

16+
from floorist.helpers import generate_name
17+
1318

1419
class TestFloorist:
1520
@pytest.fixture(autouse=True)
@@ -192,3 +197,13 @@ def test_floorplan_valid(self, caplog, session):
192197
assert len(wr.s3.list_objects(f"{prefix}/valid/", boto3_session=session)) == 1
193198
df = wr.s3.read_parquet(f"{prefix}/valid/", boto3_session=session)
194199
assert len(df), 3
200+
201+
def test_target_files_have_expected_names(self, session):
202+
203+
bucket = f"s3://{env['AWS_BUCKET']}"
204+
env['FLOORPLAN_FILE'] = "tests/floorplan_valid.yaml"
205+
filename = generate_name(env['AWS_BUCKET'], "valid")
206+
main()
207+
existing_objects = wr.s3.list_objects(bucket, boto3_session=session)
208+
assert len(existing_objects) == 1
209+
assert re.match(rf"{filename}/[0-z]*\.gz.parquet", existing_objects[0])

Diff for: tests/unit/test_core.py

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import uuid
2+
3+
from floorist.floorist import main
4+
from floorist.config import Config
5+
from pandas import DataFrame
6+
7+
8+
def test_floorplan_without_prefix_raises_exception_keeps_reading_other_floorplans(mocker):
9+
10+
mocker.patch('floorist.floorist.open')
11+
mocker.patch('floorist.floorist.logging')
12+
13+
config_mock = mocker.patch('floorist.floorist.get_config')
14+
config_mock.return_value = Config(bucket_name='foo')
15+
16+
awswrangler_mock = mocker.patch('floorist.floorist.wr')
17+
18+
connection_engine_mock = mocker.patch('floorist.floorist.create_engine')
19+
connection_mock = connection_engine_mock().connect().execution_options()
20+
21+
exit_mock = mocker.patch('floorist.floorist.exit')
22+
23+
safe_load_mock = mocker.patch('floorist.floorist.yaml.safe_load')
24+
safe_load_mock.return_value = [{'query': "a query", 'prefix': None}, {'query': 'another-query', 'prefix': 'a prefix'}]
25+
26+
pandas_mock = mocker.patch('floorist.floorist.pd')
27+
data_stub = DataFrame({
28+
'ID': [uuid.uuid4(), uuid.uuid4(), uuid.uuid4()],
29+
'columnA': ["foo", "bar", "baz"]
30+
})
31+
pandas_mock.read_sql.return_value = [data_stub]
32+
33+
main()
34+
35+
pandas_mock.read_sql.assert_called_once_with("another-query", connection_mock, chunksize=1000)
36+
data_stub.equals(awswrangler_mock.s3.to_parquet.call_args[0])
37+
exit_mock.assert_called_once_with(1)

Diff for: tests/unit/test_helpers.py

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from floorist.helpers import generate_name
2+
from floorist.helpers import validate_floorplan_entry
3+
from datetime import date
4+
5+
import pytest
6+
7+
8+
def test_name_without_prefix():
9+
bucket_name = "my_bucket"
10+
actual_name = generate_name("my_bucket")
11+
name = date.today().strftime('year_created=%Y/month_created=%-m/day_created=%-d')
12+
expected_name = f"s3://{bucket_name}/{name}"
13+
14+
assert actual_name == expected_name
15+
16+
17+
def test_name_with_prefix():
18+
bucket_name = "my_bucket"
19+
prefix = "some-prefix"
20+
actual_name = generate_name(bucket_name, prefix)
21+
name = date.today().strftime('year_created=%Y/month_created=%-m/day_created=%-d')
22+
expected_name = f"s3://{bucket_name}/{prefix}/{name}"
23+
24+
assert actual_name == expected_name
25+
26+
@pytest.mark.parametrize("query,prefix", [(None, "prefix"), (None, None), ("query", None)])
27+
def test_validate_floorplan_entry_captures_invalid_data(query,prefix):
28+
with pytest.raises(ValueError) as excinfo:
29+
validate_floorplan_entry(query,prefix)
30+
31+
if (not prefix and not query) or not query:
32+
assert "Query cannot be empty!" in str(excinfo.value)
33+
elif not prefix:
34+
assert "Prefix cannot be empty" in str(excinfo.value)
35+
36+
def test_validate_floorplan_entry_checks_valid_data():
37+
assert validate_floorplan_entry("query", "prefix")

0 commit comments

Comments
 (0)