-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathMakefile_windows
46 lines (37 loc) · 2.27 KB
/
Makefile_windows
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# Makefile to run pipeline for Rock Predictor
# Selected model that will be saved as joblib file
selected_model = models/unfitted/randomforest.joblib
oversampling = SMOTE # Oversampling strategy
#### MODEL TRAINING
# To train selected model, use `make train`
train : models/fitted/final_model.joblib doc/final_eval.txt
# To update evaluation reports for all models, use `make model_evals`
model_evals : doc/eval_models.csv doc/eval_models.txt
# Defines targets for each separate step of the pipeline
# 1) Split data into train and test sets
data/pipeline/train.csv data/pipeline/test.csv : data/input_train/* data/input_mapping/*
python rock_predictor/process_data.py for_train
# 2) Creates features for both train and test data sets
data/pipeline/train_features.csv data/pipeline/test_features.csv : data/pipeline/train.csv data/pipeline/test.csv
python rock_predictor/create_features.py for_train
# Optional Step: Evaluates all models and outputs performance reports + confusion matrices as files
doc/eval_models.csv doc/eval_models.txt : data/pipeline/train_features.csv data/input_mapping/explosive_by_rock_class.csv models/unfitted/*
python -u rock_predictor/cv_models.py data/pipeline/train_features.csv data/input_mapping/explosive_by_rock_class.csv doc/eval_models.csv | tee doc/eval_models.txt
# 3) Train on entire dataset for selected model and save final model as joblib file
models/fitted/final_model.joblib doc/final_eval.txt : data/pipeline/train_features.csv data/pipeline/test_features.csv
python rock_predictor/train_final_model.py $(selected_model) data/pipeline/train_features.csv data/pipeline/test_features.csv $(oversampling) > doc/final_eval.txt
#### PREDICTION USING SELECTED MODEL
## To make predictions on new data, use `make predict`
predict : data/input_predict/* data/input_mapping/*
# 1) Process input data for new predictions
python rock_predictor/process_data.py for_predict
# 2) Create features for new predictions
python rock_predictor/create_features.py for_predict
# 3) Make predictions using a saved model
python rock_predictor/predict.py models/fitted/final_model.joblib
# Uses train & test features for web app visualization, but does not regenerate them
# Clean up files
clean :
del /s /q data\pipeline\*
rmdir /s /q data\pipeline\cv_cross_val_pred
del /s /q doc\*