Skip to content

Commit 4cd5c78

Browse files
fix(floorist): RHICOMPL-2984 Bugfix for empty prefix floorplan
- Fix a bug for when floorplan file does not provide prefix - Add helper function for generating directory names - Add unit test coverage
1 parent d522496 commit 4cd5c78

8 files changed

+84
-8
lines changed

Dockerfile

+2-2
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,6 @@ CMD python ./app.py
3939

4040
FROM base as test
4141

42-
ADD tests/test_* tests/floorplan_* tests/requirements.txt ./tests/
42+
ADD tests/test_* tests/unit/ tests/floorplan_* tests/requirements.txt ./tests/
4343

44-
RUN pip install --no-cache-dir -r tests/requirements.txt
44+
RUN pip install --no-cache-dir .[test] -r tests/requirements.txt

setup.cfg

+1
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ python_requires = >=3.6, <4
1616
[options.extras_require]
1717
test =
1818
pytest
19+
pytest-mock

src/floorist/floorist.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
import pandas as pd
1010
import yaml
1111

12+
from floorist.helpers import generate_name
13+
14+
1215
def _configure_loglevel():
1316

1417
LOGLEVEL = environ.get('LOGLEVEL', 'INFO').upper()
@@ -45,12 +48,7 @@ def main():
4548
logging.debug(f"Dumping #{dump_count}: {row['query']} to {row['prefix']}")
4649

4750
cursor = pd.read_sql(row['query'], conn, chunksize=row.get('chunksize', 1000))
48-
49-
target = '/'.join([
50-
f"s3://{config.bucket_name}",
51-
row['prefix'],
52-
date.today().strftime('year_created=%Y/month_created=%-m/day_created=%-d')
53-
])
51+
target = generate_name(config.bucket_name, row['prefix'])
5452

5553
for data in cursor:
5654
wr.s3.to_parquet(data, target,

src/floorist/helpers.py

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from datetime import date
2+
3+
4+
def generate_name(bucket_name, prefix=None):
5+
6+
file_name = date.today().strftime('year_created=%Y/month_created=%-m/day_created=%-d')
7+
parts = ["s3:/", bucket_name, file_name]
8+
if prefix:
9+
parts.insert(2, prefix)
10+
11+
return '/'.join(parts)
+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
- query: SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (3, 'three')) AS t (num,letter);
2+
prefix: some-prefix

tests/test_floorist.py

+15
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import os
2+
import re
3+
14
import awswrangler as wr
25
import boto3
36
import pytest
@@ -9,6 +12,8 @@
912
from sqlalchemy.exc import OperationalError
1013
from tempfile import NamedTemporaryFile
1114

15+
from floorist.helpers import generate_name
16+
1217

1318
class TestFloorist:
1419
@pytest.fixture(autouse=True)
@@ -177,3 +182,13 @@ def test_floorplan_with_one_failing_dump(self, caplog, session):
177182
assert 'ProgrammingError' in caplog.text
178183
assert 'Dumped 1 from total of 2'
179184
assert wr.s3.list_directories(prefix, boto3_session=session) == [f"{prefix}/numbers/"]
185+
186+
@pytest.mark.parametrize("template,prefix", [["tests/floorplan_valid.yaml", None], ["tests/floorplan_valid_with_prefix.yaml", "some-prefix"]])
187+
def test_target_files_have_expected_names(self, template, prefix, session):
188+
bucket = f"s3://{env['AWS_BUCKET']}"
189+
env['FLOORPLAN_FILE'] = template
190+
filename = generate_name(env['AWS_BUCKET'], prefix)
191+
main()
192+
existing_objects = wr.s3.list_objects(bucket, boto3_session=session)
193+
assert len(existing_objects) == 1
194+
assert re.match(rf"{filename}/[0-z]*\.gz.parquet", existing_objects[0])

tests/unit/test_core.py

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from floorist.floorist import main
2+
from floorist.config import Config
3+
4+
5+
def test_floorplan_without_prefix_is_handled_correctly(mocker):
6+
config_mock = mocker.patch('floorist.floorist.get_config')
7+
config_mock.return_value = Config(bucket_name='foo')
8+
awswrangler_mock = mocker.patch('floorist.floorist.wr')
9+
mocker.patch('floorist.floorist.create_engine')
10+
mocker.patch('floorist.floorist.open')
11+
mocker.patch('floorist.floorist.logging')
12+
safe_load_mock = mocker.patch('floorist.floorist.yaml.safe_load')
13+
safe_load_mock.return_value = [{'query': "a query", 'prefix': None}]
14+
pandas_mock = mocker.patch('floorist.floorist.pd')
15+
pandas_mock.read_sql.return_value = ["some-data"]
16+
helper_mock = mocker.patch("floorist.floorist.generate_name")
17+
helper_mock.return_value = "some-name"
18+
main()
19+
helper_mock.assert_called_with("foo", None)
20+
expected_kwargs = {
21+
"index" : False,
22+
"compression" : 'gzip',
23+
"dataset" : True,
24+
"mode" : 'append'
25+
}
26+
awswrangler_mock.s3.to_parquet.assert_called_with("some-data", "some-name", **expected_kwargs)
27+
28+

tests/unit/test_helpers.py

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from floorist.helpers import generate_name
2+
from datetime import date
3+
4+
5+
def test_name_without_prefix():
6+
bucket_name = "my_bucket"
7+
actual_name = generate_name("my_bucket")
8+
name = date.today().strftime('year_created=%Y/month_created=%-m/day_created=%-d')
9+
expected_name = f"s3://{bucket_name}/{name}"
10+
11+
assert actual_name == expected_name
12+
13+
14+
def test_name_with_prefix():
15+
bucket_name = "my_bucket"
16+
prefix = "some-prefix"
17+
actual_name = generate_name(bucket_name, prefix)
18+
name = date.today().strftime('year_created=%Y/month_created=%-m/day_created=%-d')
19+
expected_name = f"s3://{bucket_name}/{prefix}/{name}"
20+
21+
assert actual_name == expected_name

0 commit comments

Comments
 (0)