-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel-ensemble-lookback-predict.py
106 lines (93 loc) · 3.62 KB
/
model-ensemble-lookback-predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# %%
# Ensemble Forecasting of RNN Models Trained for Lookbacks Ranging from 1 to 6
# Ehsan Moradi, Ph.D. Candidate
# %%
# Import required libraries
import pandas as pd
import numpy as np
import pickle
from scipy.stats.mstats import trimmed_mean, winsorize
from sklearn.ensemble import (
GradientBoostingRegressor,
AdaBoostRegressor,
RandomForestRegressor,
)
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.utils import check_random_state
# %%
# Load settings of best models
def load_best_ensemble_settings(sheet):
directory = "../../Google Drive/Academia/PhD Thesis/Charts, Tables, Forms, Flowcharts, Spreadsheets, Figures/"
input_file = "Paper III - Ensemble Lookback Ranking.xlsx"
input_path = directory + input_file
settings = pd.read_excel(input_path, sheet_name=sheet, header=0)
return settings
# %%
# Load data from Excel to a pandas dataframe
def load_data_from_Excel(sensor, vehicle):
directory = (
"../../Google Drive/Academia/PhD Thesis/Field Experiments/"
+ sensor
+ "/"
+ vehicle
+ "/Processed/RNN/"
)
input_file = "{0} - RNN - 05.xlsx".format(vehicle)
input_path = directory + input_file
df = pd.read_excel(input_path, sheet_name="Sheet1", header=0)
return df
# %%
# Save back the predictions to Excel
def save_data_to_Excel(df, sensor, vehicle):
directory = (
"../../Google Drive/Academia/PhD Thesis/Field Experiments/"
+ sensor
+ "/"
+ vehicle
+ "/Processed/ENSEMBLE/"
)
output_file = "{0} - ENSEMBLE - 06.xlsx".format(vehicle)
output_path = directory + output_file
with pd.ExcelWriter(output_path, engine="openpyxl", mode="w") as writer:
df.to_excel(writer, header=True, index=None)
print("{0} -> Output is successfully saved to Excel.".format(vehicle))
return None
# %%
# Save fitted model to .sav file
def save_to_sav(model, vehicle, dependent, estimator):
directory = "../../Google Drive/Academia/PhD Thesis/Modeling Outputs/ENSEMBLE_LOOKBACK_MODELS/"
output_file = "{0} - {1} - {2}.sav".format(vehicle, dependent, estimator)
output_path = directory + output_file
with open(output_path, "wb") as writer:
pickle.dump(model, writer)
# %%
# General settings
pd.options.mode.chained_assignment = None
# %%
# Batch execution for the best ensemble models dedicated to each vehicle-dependent pair
best_ensemble_settings = load_best_ensemble_settings("Best Ensemble Settings")
old_vehicle = ""
rng = check_random_state(0)
for index, ensemble_setting in best_ensemble_settings.iterrows():
vehicle = ensemble_setting["VEHICLE"]
dependent = ensemble_setting["DEPENDENT"]
features = ["{0}_PRED_L{1}".format(dependent, str(i + 1)) for i in range(6)]
sensor = "Veepeak" if dependent == "FCR_LH" else "3DATX parSYNC Plus"
if vehicle != old_vehicle:
if old_vehicle != "":
save_data_to_Excel(df, old_sensor, old_vehicle)
df = load_data_from_Excel(sensor, vehicle)
estimator = eval(ensemble_setting["ESTIMATOR"])
print(vehicle, dependent, estimator)
df.dropna(inplace=True)
X, y = df[features], df[dependent]
X_train, _, y_train, _ = train_test_split(X, y, test_size=0.3, random_state=rng)
ensemble = estimator.fit(X_train, y_train)
save_to_sav(ensemble, vehicle, dependent, estimator)
df["{0}_PRED_{1}".format(dependent, estimator)] = ensemble.predict(X)
old_vehicle, old_sensor = vehicle, sensor
# %%