-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathARIMA_Template.py
178 lines (149 loc) · 5.42 KB
/
ARIMA_Template.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import pandas as pd
import numpy as np
import statsmodels.tsa.api as smt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller, arma_order_select_ic
from sklearn.metrics import mean_squared_error
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
sns.set_theme()
# Helper Functions in case are needed for the data
def Diff(x):
x_diff = 100*(x/x.shift(1))
return x_diff - 100
def rmse(predicted, sample):
from math import sqrt
""" Function to get Root Squared Mean Error.
Args:
pred (array): Predicted Values for Model
target (array): Test values
Returns:
Float: Root Mean Squared Error
"""
return sqrt(((predicted - sample) ** 2).mean())
def adf_test(timeseries):
'''
Augmented Dickey-Fuller Test for unit root testing over
univariate series.
'''
dftest = adfuller(timeseries, autolag="AIC")
dfoutput = pd.Series(
dftest[0:4],
index=[
"Test Statistic",
"p-value",
"#Lags Used",
"Number of Observations Used",
],
)
for key, value in dftest[4].items():
dfoutput["Critical Value (%s)" % key] = value
return dfoutput
def get_order_sbic(max_range, y_train):
'''
Iteration to retreive the minimum SBIC value to select order
for a model since statsmodels didn't work once I've tried
order = get_order_sbic(5, y)[0][0] # Get the order
After Statsmodels has been restored it is not necessary this function
'''
import itertools
bic = []
parameter = []
p=d=q=range(0, max_range)
pdq = list(itertools.product(p,d,q))
for param in pdq:
try:
model_arima= ARIMA(y_train, order = param)
model_arima_fit= model_arima.fit()
bic.append(model_arima_fit.bic)
parameter.append(param)
except Exception as e:
print(e)
continue
d= {'Params': parameter, 'SBIC': bic }
output = pd.DataFrame(data= d)
result = output[output['SBIC']==output['SBIC'].min()]
param = result['Params']
minSBIC= result['SBIC']
return param.values, minSBIC.values
def get_order_aic(max_range, y_train):
'''
Iteration to retreive the minimum AIC value to select order
for a model since statsmodels didn't work once I've tried
order = get_order_aic(5, y)[0][0]
'''
import itertools
aic = []
parameter = []
p=d=q=range(0, max_range)
pdq = list(itertools.product(p,d,q))
for param in pdq:
try:
model_arima= ARIMA(y_train, order = param)
model_arima_fit= model_arima.fit()
aic.append(model_arima_fit.aic)
parameter.append(param)
except Exception as e:
print(e)
continue
d= {'Params': parameter, 'AIC': aic }
output = pd.DataFrame(data= d)
result = output[output['AIC']==output['AIC'].min()]
param = result['Params']
minAIC= result['AIC']
return param.values, minAIC.values
# Data storage in case we want scalate the model or keep track of values over the program run
values = {}
data = # Read your data here pd.read_excel('****.xlsx', index_col='Date').pct_change()*100 for example
data = data.dropna()
print(data.tail())
print(data.info())
print(f'Total Data: {len(data)}')
y = np.array(data['Column Selected'])
# Augmented Dickey-Fuller test for stationarity
test = adf_test(data['Column Selected'])
print("Results of Dickey-Fuller Test:")
print(adf_test(data['Column Selected']))
if (test['p-value']) <= 0.05:
print('Series is stationary')
else:
print('Series is not stationary. Should we differenciate it?')
# To add here PACF and ACF
# Slice your data here for train and test the model (hardcoding or develop this section)
y_train = data[:35]
y_test = data[35:]
print('Data splitted into:\n')
print('train', len(y_train))
print('test', len(y_test))
res = arma_order_select_ic(y_train, ic=['aic', 'bic'])
order = (res.aic_min_order[0],0,res.aic_min_order[1])
print(order)
# implement ARMA
model_arima = ARIMA(y_train, order= order)
model_arima_fit = model_arima.fit()
print(model_arima_fit.summary())
# Check conditions of your model
print(smt.ArmaProcess.from_estimation(model_arima_fit).isinvertible)
print(smt.ArmaProcess.from_estimation(model_arima_fit).isstationary)
predicted = model_arima_fit.predict(0, (len(y)) , dynamic=False) # change the length or dates for your forecasting
# Essential error metrics
stats1 = np.sqrt(mean_squared_error(y, predicted[:len(y)]))
print('root mean squared error2: {}'.format(stats1) )
print('sum-of-squared residuals: {}'.format(model_arima_fit.sse) )
# Plotting predefined, customise it
plt.title(f'Your value forecast MoM ARIMA approach for *** last value predicted = {predicted[-1]:.2f}% ')
plt.plot(data[f'Column Selected'], color= 'orange',alpha=0.7, label='Sample')
plt.plot(data[f'Column Selected'],'o')
plt.plot(predicted,'--', color= 'blue', alpha=0.3, label=f'Model')
plt.plot(predicted, 'ro',)
plt.ylabel('Y label title here')
plt.axvline(y_train.index[-1], alpha =0.3 )
plt.xlabel(f'Order by Minimizing AIC {order} and RSME = {stats1:.2f}. Augmented Dickey-Fuller for unit root test p-value < 0.05.') # change if necessary
plt.xticks(rotation=45)
plt.legend()
plt.show()
plt.savefig('model.png', format='png')
plt.tight_layout()
values[f'Column Selected'] = predicted[-1] # Note the lenght for the selected value