-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patharima.py
137 lines (97 loc) · 4.05 KB
/
arima.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import yfinance as yf
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
import warnings
import itertools
from sklearn.metrics import mean_squared_error
from datetime import datetime
from statsmodels.tsa.stattools import adfuller
warnings.filterwarnings("ignore") # To ignore warnings from ARIMA
# Define parameter range
p = d = q = 1 # This gives a range of (0, 1, 2)
param_combinations = list(itertools.product(p, q))
def preprocess_data(data, start_date, end_date):
# Ensure 'Date' is in datetime format
data['Date'] = pd.to_datetime(data['Date'])
# Limit the data to a specific time range if provided
if start_date and end_date:
mask = (data['Date'] >= start_date) & (data['Date'] <= end_date)
filtered_data = data.loc[mask]
else:
filtered_data = data
# Use the 'Close' prices for analysis from the filtered data
close_prices = filtered_data['Close']
# Split into training and testing datasets
train_size = int(len(close_prices) * 0.8)
train_data = close_prices[:train_size]
test_data = close_prices[train_size:]
return close_prices, train_data, test_data
# Function to apply manual differencing
def manual_differencing(series, order=1):
diff_series = series.diff(periods=order).dropna()
return diff_series
# Function to determine the best differencing order
def find_best_differencing(series, max_order=3):
for order in range(1, max_order + 1):
diff_series = manual_differencing(series, order=order)
adf_result = adfuller(diff_series)
p_value = adf_result[1]
# If the p-value is less than 0.05, the series is stationary
if p_value < 0.05:
print(f"Stationarity achieved with differencing order: {order}")
return order
# If no order results in stationarity, return the max_order differencing
print(f"Stationarity not achieved up to differencing order: {max_order}. Returning maximum differencing.")
return max_order, manual_differencing(series, order=max_order)
def build_arima_model(train_data, test_data):
# Find the best differencing order for the training data
best_d= find_best_differencing(train_data)
best_aic = float('inf')
best_order = None
best_model = None
for order in param_combinations:
try:
# Use the best differencing order (d) found
model_order = (order[0], best_d, order[1])
model = ARIMA(train_data, order=model_order)
model_fit = model.fit()
aic = model_fit.aic
if aic < best_aic:
best_aic = aic
best_order = model_order
best_model = model_fit
# Apply walk-forward validation
print(f'Performing walk-forward validation for order: {model_order}')
walk_forward_validation(train_data, test_data, model_order)
except Exception as e:
continue
print(f'Best Order: {best_order}, AIC: {best_aic}')
print(best_model.summary())
return best_model
def calculate_residuals(model_fit):
# Get residuals from the ARIMA model
residuals = model_fit.resid
# Convert residuals to a DataFrame
residuals_df = pd.DataFrame(residuals, columns=['Residuals'])
return residuals_df
def walk_forward_validation(train_data, test_data, order):
history = [x for x in train_data]
predictions = list()
# Walk-forward validation
for t in range(len(test_data)):
model = ARIMA(history, order=order)
model_fit = model.fit()
output = model_fit.forecast()
yhat = output[0]
predictions.append(yhat)
obs = test_data[t]
history.append(obs)
print('predicted=%f, expected=%f' % (yhat, obs))
# Evaluate forecasts
rmse = sqrt(mean_squared_error(test_data, predictions))
print('Test RMSE: %.3f' % rmse)
def make_predictions(model_fit, test_data):
predictions = model_fit.forecast(steps=len(test_data))
#print(predictions)
return predictions