Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 176de26

Browse files
committedJul 29, 2020
Models container, trained and evaluated several models
1 parent bc3fd03 commit 176de26

File tree

3 files changed

+26
-11
lines changed

3 files changed

+26
-11
lines changed
 

‎models_container.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def _wrap_models(self):
6969
@property
7070
def _all_models_dict(self):
7171
return {name: obj for name, obj in self.__dict__.items()
72-
if self.model_path in str(obj.__class__)}
72+
if self.model_path in str(obj.__class__) or isinstance(obj, Model)}
7373

7474

7575
def _get_models_of_kind(self, kind):

‎tests/test_model_evaluator.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pytest
55
from pyspark.mllib.evaluation import BinaryClassificationMetrics
66

7+
from data_preprocessor import DataPreprocessor
78
from model_evaluator import ModelEvaluator
89
from models_container import ModelsContainer, ModelKinds
910

@@ -28,17 +29,30 @@ def test_model_evaluator_with_linear_regression_and_full_train_data(logistic_mod
2829

2930

3031
def test_several_classification_models_fitting(preprocessor_train_data):
31-
preprocessor_train_data.prepare_to_model(target_col='income', to_strip=' .')
32-
evaluator = ModelEvaluator(metrics_class=BinaryClassificationMetrics)
32+
df = preprocessor_train_data.train_df.sample(0.1)
33+
preprocessor = DataPreprocessor(train_df=df, test_df=df)
34+
preprocessor.prepare_to_model(target_col='income', to_strip=' .')
35+
3336
models = ModelsContainer()
34-
models.fit(preprocessor_train_data.train_encoded_df, kind=ModelKinds.CLASSIFICATION)
35-
evaluator.compare({"train": preprocessor_train_data.train_encoded_df}, models=models.fitted_models)
36-
print('kk')
37+
models.fit(preprocessor.train_encoded_df, kind=ModelKinds.CLASSIFICATION)
38+
expected_results = [
39+
{"model": models.logistic_class.fitted_model,
40+
"metrics": {"areaUnderROC": 0.770414, "areaUnderPR": 0.646093}, },
41+
{"model": models.random_forest_class.fitted_model,
42+
"metrics": {"areaUnderROC": 0.674751, "areaUnderPR": 0.664931}, },
43+
{"model": models.gbt_class.fitted_model,
44+
"metrics": {"areaUnderROC": 0.811643, "areaUnderPR": 0.746147}, },
45+
{"model": models.svm_class.fitted_model,
46+
"metrics": {"areaUnderROC": 0.750627, "areaUnderPR": 0.645328}, },
47+
{"model": models.naive_bayes_class.fitted_model,
48+
"metrics": {"areaUnderROC": 0.615000, "areaUnderPR": 0.504709}, },
49+
]
50+
for result in expected_results:
51+
_check_evaluation(preprocessor=preprocessor, model=result["model"], metrics=result["metrics"])
3752

3853

3954
def _check_evaluation(preprocessor, model, metrics: Dict[str, float]):
40-
metrics_class = BinaryClassificationMetrics
41-
evaluator = ModelEvaluator(metrics_class=metrics_class)
55+
evaluator = ModelEvaluator(metrics_class=BinaryClassificationMetrics)
4256
# The purpose of this parameter is to prove names can be arbitrary in the compare method
4357
dataframes_sets = [['train', 'test'], ['train1', 'test1']]
4458
for dataframes in dataframes_sets:
@@ -51,4 +65,5 @@ def _check_evaluation(preprocessor, model, metrics: Dict[str, float]):
5165
for metric in metrics:
5266
assert metric in comparison
5367
for dataframe in dataframes:
54-
assert comparison[metric][evaluator.index_key(dataframe, model)] == pytest.approx(metrics[metric])
68+
assert comparison[metric][evaluator.index_key(dataframe, model)] == pytest.approx(metrics[metric],
69+
abs=0.035)

‎todo_list.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@
1414
- [x] prepare the data frame by applying all transformations
1515
(cleaning, encoding, etc)
1616
- [x] obtain evaluation metrics for a single model
17-
- [ ] **fit and compare several classification models without tuning**
17+
- [x] fit and compare several classification models without tuning
1818
- [x] create an object container for the models
1919
- [x] initialize the models with default hyperparameters
20-
- [ ] fit and compare the results with the evaluator
20+
- [x] fit and compare the results with the evaluator
2121
- [ ] fit and compare several classification models with tuning and crossvalidation
2222
- [ ] be able to pass a list of hyperparameters values for each hyperparameter
2323
- [ ] tune and obtain the best hyperparam set per model

0 commit comments

Comments
 (0)
Please sign in to comment.