Skip to content

Commit d0f84da

Browse files
committed
Implemented and tested bootstrap idea.. may need to fix issue of outlier predictions.
1 parent 91eb89c commit d0f84da

20 files changed

+129
-102
lines changed

RNN_experiments/bootstrap/__init__.py

Whitespace-only changes.

RNN_experiments/bootstrap/gp_bootstrap.py

+5-28
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,7 @@
77
"""
88

99
from sklearn.gaussian_process import GaussianProcessRegressor
10-
from sklearn.gaussian_process.kernels import ExpSineSquared
1110
import numpy as np
12-
import matplotlib.pylab as plt
1311

1412

1513
def bootstrap_data(X, Y, kernel, samples=50):
@@ -28,40 +26,19 @@ def bootstrap_data(X, Y, kernel, samples=50):
2826
m.fit(X, Y)
2927

3028
boot_samples = []
31-
# num_records = len(X)
29+
num_records = len(X)
3230

3331
for si in range(samples):
3432
# - Start of the sequence is a normal distribution centered on X[0]
3533
# and with standard deviation equal to 5% the value of X[0].
3634
# - Same logic applies to the end of the sequence.
3735
# - The number of steps in the sequence is equal to the number
3836
# of records in X +/- 10% of that number.
39-
xx = np.atleast_2d(np.linspace(np.random.normal(loc=X[0], scale=.05 * X[0]),
40-
np.random.normal(loc=X[-1], scale=.05 * X[-1]),
41-
len(X)*2)).T
42-
#num_records + np.random.randint(low=-int(.1 * num_records),
43-
# high=int(.1 * num_records)))).T
37+
xx = np.atleast_2d(np.linspace(np.random.normal(loc=X[0], scale=.0001 * X[0]),
38+
np.random.normal(loc=X[-1], scale=.0001 * X[-1]),
39+
num_records + np.random.randint(low=-int(.1 * num_records),
40+
high=int(.1 * num_records)))).T
4441
yy = m.predict(xx)
45-
#print yy
4642
boot_samples.append((xx, yy))
4743

4844
return boot_samples
49-
50-
51-
def test_bootstrap():
52-
53-
def f(x):
54-
return 2. * x * np.sin(x)
55-
56-
X_ = np.atleast_2d(np.linspace(1., 50., 20)).T
57-
Y_ = f(X_).ravel()
58-
59-
boots = bootstrap_data(X_, Y_, ExpSineSquared(), samples=15)
60-
61-
for bt in boots:
62-
plt.plot(bt[0], bt[1])
63-
64-
plt.show()
65-
66-
67-
test_bootstrap()
47.9 KB
Loading
22.7 KB
Binary file not shown.
22.7 KB
Binary file not shown.
22.7 KB
Binary file not shown.
22.7 KB
Binary file not shown.
22.7 KB
Binary file not shown.
22.7 KB
Binary file not shown.
22.7 KB
Binary file not shown.
22.7 KB
Binary file not shown.
22.7 KB
Binary file not shown.
22.7 KB
Binary file not shown.
22.7 KB
Binary file not shown.
22.7 KB
Binary file not shown.
22.7 KB
Binary file not shown.
22.7 KB
Binary file not shown.
22.7 KB
Binary file not shown.
+124-74
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
1-
# src : https://machinelearningmastery.com/time-series-forecasting-long-short-term-memory-network-python/
21
import pandas as pd
3-
from sklearn.metrics import mean_squared_error
42
from sklearn.preprocessing import MinMaxScaler
53
from keras.models import Sequential
64
from keras.layers import Dense
75
from keras.layers import LSTM
8-
from math import sqrt
9-
from matplotlib import pyplot
10-
import numpy
6+
from keras.models import load_model, save_model
7+
from matplotlib import pyplot as plt
8+
import numpy as np
9+
import glob
10+
from joblib import Parallel, delayed
11+
from sklearn.gaussian_process.kernels import RBF, WhiteKernel, RationalQuadratic, ExpSineSquared
12+
from RNN_experiments.bootstrap.gp_bootstrap import bootstrap_data
1113

1214

1315
# frame a sequence as a supervised learning problem
@@ -19,6 +21,7 @@ def timeseries_to_supervised(data, lag=1):
1921
df.fillna(0, inplace=True)
2022
return df
2123

24+
2225
# scale train and test data to [-1, 1]
2326
def scale(train, test):
2427
# fit scaler
@@ -36,7 +39,7 @@ def scale(train, test):
3639
# inverse scaling for a forecasted value
3740
def invert_scale(scaler, X, value):
3841
new_row = [x for x in X] + [value]
39-
array = numpy.array(new_row)
42+
array = np.array(new_row)
4043
array = array.reshape(1, len(array))
4144
inverted = scaler.inverse_transform(array)
4245
return inverted[0, -1]
@@ -61,71 +64,118 @@ def fit_lstm(train, batch_size, nb_epoch, neurons):
6164
def forecast_lstm(model, batch_size, X):
6265
X = X.reshape(1, 1, len(X))
6366
yhat = model.predict(X, batch_size=batch_size)
64-
return yhat[0,0]
65-
66-
67-
# load dataset
68-
ex_dataset = pd.read_csv('../../data/mauna-loa-atmospheric-co2.csv',
69-
header=None)
70-
ex_dataset.columns = ['CO2Concentration', 'Time']
71-
72-
print ex_dataset['CO2Concentration'].diff()
73-
74-
# transform data to be stationary
75-
raw_values = series.values
76-
diff_values = difference(raw_values, 1)
77-
# transform data to be supervised learning
78-
supervised = timeseries_to_supervised(diff_values, 1)
79-
supervised_values = supervised.values
80-
81-
# split data into train and test-sets
82-
train, test = supervised_values[0:-228], supervised_values[-228:]
83-
# transform the scale of the data
84-
# scaler, train_scaled, test_scaled = scale(train, test)
85-
86-
# fit the model
87-
# lstm_model = fit_lstm(train_scaled, 1, 1000, 4)
88-
lstm_model = fit_lstm(train, 1, 1000, 4)
89-
90-
# forecast the entire training dataset to build up state for forecasting
91-
train_reshaped = train[:, 0].reshape(len(train), 1, 1)
92-
lstm_model.predict(train_reshaped, batch_size=1)
93-
94-
# walk-forward validation on the test data
95-
predictions = list()
96-
prev = None
97-
prev_history = [raw_values[len(test)+1]] # initial
98-
for i in range(len(test)):
99-
# make one-step forecast
100-
if prev is None:
101-
prev = test[i, 0:-1]
102-
print('Start---------------------')
103-
print('prev=', prev)
104-
yhat = forecast_lstm(lstm_model, 1, prev)
105-
prev = yhat
106-
print('forcast=', yhat)
107-
# reshape
108-
prev = numpy.array([prev])
109-
# invert scaling
110-
# yhat = invert_scale(scaler, X, yhat)
111-
# print('yhat after inverse scale: ', yhat)
112-
# invert differencing
113-
# yhat = inverse_difference(prev_history, yhat, i)
114-
yhat = yhat + prev_history[i]
115-
print('value added=', prev_history[i])
116-
print('prediction=', yhat)
117-
prev_history.append(yhat)
118-
# store forecast
119-
predictions.append(yhat)
120-
expected = raw_values[len(train) + i +1]
121-
print('End---------------')
122-
print('Month=%d, Predicted=%f, Expected=%f' % (i+1, yhat, expected))
123-
124-
# report performance
125-
rmse = sqrt(mean_squared_error(raw_values[-228:], predictions))
126-
print('Test RMSE: %.3f' % rmse)
127-
# line plot of observed vs predicted
128-
pyplot.plot(raw_values[-228:])
129-
pyplot.plot(predictions)
130-
pyplot.savefig('test.eps')
131-
# pyplot.show()
67+
return yhat[0, 0]
68+
69+
70+
def train_model(ith_dataset, idx):
71+
diff_values = ith_dataset['CO2Concentration'].diff()
72+
73+
# transform data to be supervised learning
74+
supervised = timeseries_to_supervised(diff_values, 1)
75+
76+
lstm_model = fit_lstm(supervised.values, 1, 50, 4)
77+
78+
save_model(lstm_model, "./models/" + str(idx) + ".kmodel")
79+
80+
81+
def make_model_predictions(model, train, all_y, init_val):
82+
83+
# forecast the entire training dataset to build up state for forecasting
84+
train_reshaped = train[:, 0].reshape(len(train), 1, 1)
85+
86+
model.predict(train_reshaped, batch_size=1)
87+
88+
# walk-forward validation on the test data
89+
predictions = []
90+
prev = None
91+
prev_history = [init_val] # initial
92+
for i in range(len(all_y)):
93+
# make one-step forecast
94+
if prev is None:
95+
prev = all_y[i, 0:-1]
96+
97+
yhat = forecast_lstm(model, 1, prev)
98+
prev = yhat
99+
# reshape
100+
prev = np.array([prev])
101+
# invert scaling
102+
# yhat = invert_scale(scaler, X, yhat)
103+
# print('yhat after inverse scale: ', yhat)
104+
# invert differencing
105+
# yhat = inverse_difference(prev_history, yhat, i)
106+
yhat = yhat + prev_history[i]
107+
prev_history.append(yhat)
108+
# store forecast
109+
predictions.append(yhat)
110+
111+
return predictions
112+
113+
114+
def main(retrain=True):
115+
116+
# load dataset
117+
ex_dataset = pd.read_csv('../../data/mauna-loa-atmospheric-co2.csv',
118+
header=None)
119+
ex_dataset.columns = ['CO2Concentration', 'Time']
120+
121+
train_data = ex_dataset.loc[ex_dataset.Time <= 1980, ['CO2Concentration', 'Time']]
122+
123+
if retrain:
124+
bootstrapped_dataset = bootstrap_data(train_data['Time'].reshape(-1, 1),
125+
train_data['CO2Concentration'].reshape(-1, 1),
126+
34.4**2 * RBF(length_scale=41.8) +
127+
3.27**2 * RBF(length_scale=180) * ExpSineSquared(length_scale=1.44,
128+
periodicity=1) +
129+
0.446**2 * RationalQuadratic(alpha=17.7, length_scale=0.957) +
130+
0.197**2 * RBF(length_scale=0.138) + WhiteKernel(noise_level=0.0336),
131+
samples=10)
132+
# Need to run this in parallel:
133+
134+
# t_pool = ThreadPool(20)
135+
Parallel(n_jobs=10)(delayed(train_model)(pd.DataFrame({'Time': np.ravel(dat[0]),
136+
'CO2Concentration': np.ravel(dat[1])}),
137+
idx)
138+
for idx, dat in enumerate(bootstrapped_dataset))
139+
140+
"""for x, y in bootstrapped_dataset:
141+
temp_df = pd.DataFrame({'Time': np.ravel(x),
142+
'CO2Concentration': np.ravel(y)})
143+
rnn_models.append(train_model(temp_df))"""
144+
145+
rnn_models = []
146+
for mod_path in glob.glob("./models/*.kmodel"):
147+
rnn_models.append(load_model(mod_path))
148+
149+
diff_values = ex_dataset['CO2Concentration'].diff()
150+
# transform data to be supervised learning
151+
supervised = timeseries_to_supervised(diff_values, 1)
152+
supervised_values = supervised.values
153+
154+
# split data into train and test-sets
155+
train, test = supervised_values[0:-228], supervised_values[-228:]
156+
157+
preds = []
158+
for mod in rnn_models:
159+
preds.append(make_model_predictions(mod, train, test, ex_dataset['CO2Concentration'][len(test)+1]))
160+
161+
# line plot of observed vs predicted
162+
rnn_means = np.array([])
163+
rnn_conf = np.array([])
164+
165+
for k in range(len(preds[0])):
166+
step_vals = [el[k] for el in preds]
167+
rnn_means = np.append(rnn_means, np.mean(step_vals))
168+
rnn_conf = np.append(rnn_conf, np.std(step_vals))
169+
170+
plt.plot(ex_dataset['CO2Concentration'][-228:])
171+
plt.plot(rnn_means)
172+
plt.fill_between(list(range(len(rnn_means))),
173+
rnn_means - rnn_conf,
174+
rnn_means + rnn_conf,
175+
color="gray", alpha=0.2)
176+
177+
plt.show()
178+
179+
180+
if __name__ == "__main__":
181+
main(False)

__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)