Skip to content

Commit 7a2be60

Browse files
committed
Created data preprocessor class stub
1 parent 3a652f0 commit 7a2be60

File tree

4 files changed

+22
-3
lines changed

4 files changed

+22
-3
lines changed

data_preprocessor.py

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
class DataPreprocessor(object):
2+
pass

tests/test_data_loader.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,16 @@
1919
"native_country",
2020
"income"]
2121

22+
TEST_DATA_PATH = "data/data_example_for_tests.csv"
23+
2224

2325
@pytest.fixture
2426
def data_loader():
2527
return DataLoader()
2628

2729

28-
def test_data_loader_loads_data(data_loader):
29-
df = data_loader.load_relative(path="data/data_example_for_tests.csv", columns=COLUMN_NAMES)
30+
def test_data_loader_loads_data_frame(data_loader):
31+
df = data_loader.load_relative(path=TEST_DATA_PATH, columns=COLUMN_NAMES)
3032
assert isinstance(df, DataFrame)
3133
assert df.columns == COLUMN_NAMES
3234
# Check some values from the first row

tests/test_data_preprocessor.py

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
"""The preprocessor is expected to clean and encode the data,
2+
making it ready for modelling"""
3+
4+
import pytest
5+
6+
from data_preprocessor import DataPreprocessor
7+
8+
9+
@pytest.fixture
10+
def preprocessor():
11+
return DataPreprocessor()
12+
13+
14+
def test_data_preprocessor_x(preprocessor):
15+
pass

todo_list.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
- [x] start and configure Spark
2-
- [ ] load_relative data from the files
2+
- [x] load data from the files
33
- [ ] prepare data for classification models
44
- [ ] string encode the factor columns
55
- [ ] one-hot encode the numerically encoded factor columns

0 commit comments

Comments
 (0)