1
- # src : https://machinelearningmastery.com/time-series-forecasting-long-short-term-memory-network-python/
2
1
import pandas as pd
3
- from sklearn .metrics import mean_squared_error
4
2
from sklearn .preprocessing import MinMaxScaler
5
3
from keras .models import Sequential
6
4
from keras .layers import Dense
7
5
from keras .layers import LSTM
8
- from math import sqrt
9
- from matplotlib import pyplot
10
- import numpy
6
+ from keras .models import load_model , save_model
7
+ from matplotlib import pyplot as plt
8
+ import numpy as np
9
+ import glob
10
+ from joblib import Parallel , delayed
11
+ from sklearn .gaussian_process .kernels import RBF , WhiteKernel , RationalQuadratic , ExpSineSquared
12
+ from RNN_experiments .bootstrap .gp_bootstrap import bootstrap_data
11
13
12
14
13
15
# frame a sequence as a supervised learning problem
@@ -19,6 +21,7 @@ def timeseries_to_supervised(data, lag=1):
19
21
df .fillna (0 , inplace = True )
20
22
return df
21
23
24
+
22
25
# scale train and test data to [-1, 1]
23
26
def scale (train , test ):
24
27
# fit scaler
@@ -36,7 +39,7 @@ def scale(train, test):
36
39
# inverse scaling for a forecasted value
37
40
def invert_scale (scaler , X , value ):
38
41
new_row = [x for x in X ] + [value ]
39
- array = numpy .array (new_row )
42
+ array = np .array (new_row )
40
43
array = array .reshape (1 , len (array ))
41
44
inverted = scaler .inverse_transform (array )
42
45
return inverted [0 , - 1 ]
@@ -61,71 +64,118 @@ def fit_lstm(train, batch_size, nb_epoch, neurons):
61
64
def forecast_lstm (model , batch_size , X ):
62
65
X = X .reshape (1 , 1 , len (X ))
63
66
yhat = model .predict (X , batch_size = batch_size )
64
- return yhat [0 ,0 ]
65
-
66
-
67
- # load dataset
68
- ex_dataset = pd .read_csv ('../../data/mauna-loa-atmospheric-co2.csv' ,
69
- header = None )
70
- ex_dataset .columns = ['CO2Concentration' , 'Time' ]
71
-
72
- print ex_dataset ['CO2Concentration' ].diff ()
73
-
74
- # transform data to be stationary
75
- raw_values = series .values
76
- diff_values = difference (raw_values , 1 )
77
- # transform data to be supervised learning
78
- supervised = timeseries_to_supervised (diff_values , 1 )
79
- supervised_values = supervised .values
80
-
81
- # split data into train and test-sets
82
- train , test = supervised_values [0 :- 228 ], supervised_values [- 228 :]
83
- # transform the scale of the data
84
- # scaler, train_scaled, test_scaled = scale(train, test)
85
-
86
- # fit the model
87
- # lstm_model = fit_lstm(train_scaled, 1, 1000, 4)
88
- lstm_model = fit_lstm (train , 1 , 1000 , 4 )
89
-
90
- # forecast the entire training dataset to build up state for forecasting
91
- train_reshaped = train [:, 0 ].reshape (len (train ), 1 , 1 )
92
- lstm_model .predict (train_reshaped , batch_size = 1 )
93
-
94
- # walk-forward validation on the test data
95
- predictions = list ()
96
- prev = None
97
- prev_history = [raw_values [len (test )+ 1 ]] # initial
98
- for i in range (len (test )):
99
- # make one-step forecast
100
- if prev is None :
101
- prev = test [i , 0 :- 1 ]
102
- print ('Start---------------------' )
103
- print ('prev=' , prev )
104
- yhat = forecast_lstm (lstm_model , 1 , prev )
105
- prev = yhat
106
- print ('forcast=' , yhat )
107
- # reshape
108
- prev = numpy .array ([prev ])
109
- # invert scaling
110
- # yhat = invert_scale(scaler, X, yhat)
111
- # print('yhat after inverse scale: ', yhat)
112
- # invert differencing
113
- # yhat = inverse_difference(prev_history, yhat, i)
114
- yhat = yhat + prev_history [i ]
115
- print ('value added=' , prev_history [i ])
116
- print ('prediction=' , yhat )
117
- prev_history .append (yhat )
118
- # store forecast
119
- predictions .append (yhat )
120
- expected = raw_values [len (train ) + i + 1 ]
121
- print ('End---------------' )
122
- print ('Month=%d, Predicted=%f, Expected=%f' % (i + 1 , yhat , expected ))
123
-
124
- # report performance
125
- rmse = sqrt (mean_squared_error (raw_values [- 228 :], predictions ))
126
- print ('Test RMSE: %.3f' % rmse )
127
- # line plot of observed vs predicted
128
- pyplot .plot (raw_values [- 228 :])
129
- pyplot .plot (predictions )
130
- pyplot .savefig ('test.eps' )
131
- # pyplot.show()
67
+ return yhat [0 , 0 ]
68
+
69
+
70
+ def train_model (ith_dataset , idx ):
71
+ diff_values = ith_dataset ['CO2Concentration' ].diff ()
72
+
73
+ # transform data to be supervised learning
74
+ supervised = timeseries_to_supervised (diff_values , 1 )
75
+
76
+ lstm_model = fit_lstm (supervised .values , 1 , 50 , 4 )
77
+
78
+ save_model (lstm_model , "./models/" + str (idx ) + ".kmodel" )
79
+
80
+
81
+ def make_model_predictions (model , train , all_y , init_val ):
82
+
83
+ # forecast the entire training dataset to build up state for forecasting
84
+ train_reshaped = train [:, 0 ].reshape (len (train ), 1 , 1 )
85
+
86
+ model .predict (train_reshaped , batch_size = 1 )
87
+
88
+ # walk-forward validation on the test data
89
+ predictions = []
90
+ prev = None
91
+ prev_history = [init_val ] # initial
92
+ for i in range (len (all_y )):
93
+ # make one-step forecast
94
+ if prev is None :
95
+ prev = all_y [i , 0 :- 1 ]
96
+
97
+ yhat = forecast_lstm (model , 1 , prev )
98
+ prev = yhat
99
+ # reshape
100
+ prev = np .array ([prev ])
101
+ # invert scaling
102
+ # yhat = invert_scale(scaler, X, yhat)
103
+ # print('yhat after inverse scale: ', yhat)
104
+ # invert differencing
105
+ # yhat = inverse_difference(prev_history, yhat, i)
106
+ yhat = yhat + prev_history [i ]
107
+ prev_history .append (yhat )
108
+ # store forecast
109
+ predictions .append (yhat )
110
+
111
+ return predictions
112
+
113
+
114
+ def main (retrain = True ):
115
+
116
+ # load dataset
117
+ ex_dataset = pd .read_csv ('../../data/mauna-loa-atmospheric-co2.csv' ,
118
+ header = None )
119
+ ex_dataset .columns = ['CO2Concentration' , 'Time' ]
120
+
121
+ train_data = ex_dataset .loc [ex_dataset .Time <= 1980 , ['CO2Concentration' , 'Time' ]]
122
+
123
+ if retrain :
124
+ bootstrapped_dataset = bootstrap_data (train_data ['Time' ].reshape (- 1 , 1 ),
125
+ train_data ['CO2Concentration' ].reshape (- 1 , 1 ),
126
+ 34.4 ** 2 * RBF (length_scale = 41.8 ) +
127
+ 3.27 ** 2 * RBF (length_scale = 180 ) * ExpSineSquared (length_scale = 1.44 ,
128
+ periodicity = 1 ) +
129
+ 0.446 ** 2 * RationalQuadratic (alpha = 17.7 , length_scale = 0.957 ) +
130
+ 0.197 ** 2 * RBF (length_scale = 0.138 ) + WhiteKernel (noise_level = 0.0336 ),
131
+ samples = 10 )
132
+ # Need to run this in parallel:
133
+
134
+ # t_pool = ThreadPool(20)
135
+ Parallel (n_jobs = 10 )(delayed (train_model )(pd .DataFrame ({'Time' : np .ravel (dat [0 ]),
136
+ 'CO2Concentration' : np .ravel (dat [1 ])}),
137
+ idx )
138
+ for idx , dat in enumerate (bootstrapped_dataset ))
139
+
140
+ """for x, y in bootstrapped_dataset:
141
+ temp_df = pd.DataFrame({'Time': np.ravel(x),
142
+ 'CO2Concentration': np.ravel(y)})
143
+ rnn_models.append(train_model(temp_df))"""
144
+
145
+ rnn_models = []
146
+ for mod_path in glob .glob ("./models/*.kmodel" ):
147
+ rnn_models .append (load_model (mod_path ))
148
+
149
+ diff_values = ex_dataset ['CO2Concentration' ].diff ()
150
+ # transform data to be supervised learning
151
+ supervised = timeseries_to_supervised (diff_values , 1 )
152
+ supervised_values = supervised .values
153
+
154
+ # split data into train and test-sets
155
+ train , test = supervised_values [0 :- 228 ], supervised_values [- 228 :]
156
+
157
+ preds = []
158
+ for mod in rnn_models :
159
+ preds .append (make_model_predictions (mod , train , test , ex_dataset ['CO2Concentration' ][len (test )+ 1 ]))
160
+
161
+ # line plot of observed vs predicted
162
+ rnn_means = np .array ([])
163
+ rnn_conf = np .array ([])
164
+
165
+ for k in range (len (preds [0 ])):
166
+ step_vals = [el [k ] for el in preds ]
167
+ rnn_means = np .append (rnn_means , np .mean (step_vals ))
168
+ rnn_conf = np .append (rnn_conf , np .std (step_vals ))
169
+
170
+ plt .plot (ex_dataset ['CO2Concentration' ][- 228 :])
171
+ plt .plot (rnn_means )
172
+ plt .fill_between (list (range (len (rnn_means ))),
173
+ rnn_means - rnn_conf ,
174
+ rnn_means + rnn_conf ,
175
+ color = "gray" , alpha = 0.2 )
176
+
177
+ plt .show ()
178
+
179
+
180
+ if __name__ == "__main__" :
181
+ main (False )
0 commit comments