-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathread_datasets.py
69 lines (58 loc) · 3.5 KB
/
read_datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import pandas as pd
import pandas as pd
from fredapi import Fred
import streamlit as st
import requests
import os
import csv
# @st.cache
class Datasets:
def __init__(self):
# train data from 1992-01-01 to 2021-04-01
self.train = pd.read_csv('datasets/train.csv', index_col=0, parse_dates=True)
self.train = self.train.squeeze()
self.train.index.freq = 'MS'
# test data from 2021-05-01 to 2023-04-01
self.test = pd.read_csv('artifacts/test.csv', index_col=0, parse_dates=True)
self.test = self.test.squeeze()
self.test.index.freq = 'MS'
# Read the entire data (train and test) from 1992-01-01 to 2023-04-01
self.entire_data = pd.read_csv('datasets/entire_data.csv', index_col=0, parse_dates=True)
self.entire_data = self.entire_data.squeeze()
self.entire_data.index.freq = 'MS'
# Read trend data that was decomposed with CMA (Centered Moving Average)
self.cma = pd.read_csv('datasets/cma.csv', index_col=0, parse_dates=True)
self.cma = self.cma.squeeze()
# read the seasonal indices series that includes the seasonal indices for each month from 1992-01-01 to 2023-04-01
self.seasonal_indices_series = pd.read_csv('datasets/seasonal_indices_series.csv', index_col=0, parse_dates=True)
self.seasonal_indices_series = self.seasonal_indices_series.squeeze()
self.seasonal_indices_series.index.freq = 'MS'
# read the residuals (what wasn't captured by trend and seasonality)
self.residuals = pd.read_csv('datasets/residuals.csv', index_col=0, parse_dates=True)
self.residuals = self.residuals.squeeze()
self.residuals.index.freq = 'MS'
# read the seasonal indices dataframe that includes seasoanl value, change in seasonal value, and the percentage change in seasonal value
# The dataframe is used to plot the seasonal indices
self.seasonal_indices_df = pd.read_csv('datasets/seasonal_indices.csv', index_col=0, parse_dates=True)
self.seasonal_indices_df = self.seasonal_indices_df.squeeze()
self.seasonal_indices_df.index.freq = 'MS'
# forecast generated during model development from 2021-05-01 to 2023-04-01
self.hw_forecast_dev = pd.read_csv('datasets/hw_forecast_dev.csv', index_col=0, parse_dates=True)
self.hw_forecast_dev = self.hw_forecast_dev.squeeze()
self.hw_forecast_dev.index.freq = 'MS'
self.hw_forecast_dev_mean = self.hw_forecast_dev.iloc[:, 0]
self.hw_forecast_dev_lower = self.hw_forecast_dev.iloc[:, 2]
self.hw_forecast_dev_upper = self.hw_forecast_dev.iloc[:, 3]
# read the saved forecast for the next 24 months after 2023-04-01 from (2023-05-01 to 2025-04-01)
self.hw_forecast = pd.read_csv('datasets/hw_forecast.csv', index_col=0, parse_dates=True)
self.hw_forecast = self.hw_forecast.squeeze()
self.hw_forecast.index.freq = 'MS'
self.hw_forecast_mean = self.hw_forecast.iloc[:, 0]
self.hw_forecast_lower = self.hw_forecast.iloc[:, 2]
self.hw_forecast_upper = self.hw_forecast.iloc[:, 3]
# function to return the relevant time series
def get_datasets(self):
return (self.train, self.test, self.entire_data, self.cma, self.seasonal_indices_series, self.residuals,
self.seasonal_indices_df, self.hw_forecast_dev_mean,
self.hw_forecast_dev_lower, self.hw_forecast_dev_upper, self.hw_forecast_mean,
self.hw_forecast_lower, self.hw_forecast_upper, self.hw_forecast)