-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutils.py
163 lines (121 loc) · 4.47 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import datetime as dt
import pandas as pd
import pandas_datareader as pdr
from pathlib import Path
from _config import *
# ==================================================================================================
# global variables
# ==================================================================================================
''' moved to _config.py
DEBUG = True
'''
# ==================================================================================================
# core functions
# ==================================================================================================
def initialize_df(column_names, set_index=False, index_column=''):
df = pd.DataFrame(
columns=column_names
)
if (set_index and index_column==''):
assert(f'load_data() -> index_column is blank, please provide the correct column name')
if (set_index):
df.set_index(index_column,inplace=True)
return df
# --------------------------------------------------------------------------------------------------
def download_stock_data(
ticker='SPY',
start_date=dt.datetime(2000,1,1),
end_date=dt.datetime.now()):
"""download_stock_data() TODO: a summary of what this function does
TODO: add a detailed description if necessary
Parameters
----------
TODO: update the list of parameters
...use this format when listing parameters...
<variable_name> : <variable_type> (required/optional)
<variable_description_or_purpose>
...for example...
df : pandas.DataFrame (required)
A OHLC dataframe containing the pricing data related to this order.
Returns
-------
TODO: specify the return value
"""
# stock_tickers = ['SPY','QQQ','AAPL','AMZN','GOOG','META','MSFT']
# stock_df = {}
# for ticker in stock_tickers :
# # download the stock data for this ticker
# stock_df[ticker] = pdr.get_data_yahoo(ticker, start_date, end_date, ret_index=True)
# # reset the index so 'Date' becomes a column again
# stock_df[ticker].reset_index(inplace=True)
stock = pdr.get_data_yahoo(ticker, start_date, end_date, ret_index=True)
if DEBUG:
print(f'download_data -- ticker: {ticker}')
print(stock.head(20))
# stock.to_csv(Path('data/'+ticker+'.csv'))
return stock
# --------------------------------------------------------------------------------------------------
def save_data(df, dir='data', filename='file.csv'):
"""load_stock_data() TODO: a summary of what this function does
TODO: add a detailed description if necessary
Parameters
----------
TODO: update the list of parameters
...use this format when listing parameters...
<variable_name> : <variable_type> (required/optional)
<variable_description_or_purpose>
...for example...
df : pandas.DataFrame (required)
A OHLC dataframe containing the pricing data related to this order.
Returns
-------
TODO: specify the return value
"""
path = Path(dir+'/'+filename)
result = df.to_csv(path)
if DEBUG:
print(f'save_data ------ saving {path}')
return None
# --------------------------------------------------------------------------------------------------
def load_data(ticker, set_index=False, index_column=''):
"""load_data() TODO: a summary of what this function does
TODO: add a detailed description if necessary
Parameters
----------
TODO: update the list of parameters
...use this format when listing parameters...
<variable_name> : <variable_type> (required/optional)
<variable_description_or_purpose>
...for example...
df : pandas.DataFrame (required)
A OHLC dataframe containing the pricing data related to this order.
Returns
-------
TODO: specify the return value
"""
# generate a path for the ticker data we want to load
path = Path('data/'+ticker+'.csv')
# check for the existence of that file
if not path.is_file():
if DEBUG:
print(f'load_data ------ path {path} doesn\'t exist.')
print(f'load_data ------ initiating download for [{ticker}] now...')
# since data for that file doesn't exist, let's download it
stock_data_to_save = download_stock_data(ticker)
# and then save it out to disk so we have it for next time
save_data(stock_data_to_save, filename=ticker+'.csv')
data_df = pd.read_csv(
path,
# index_col='Date',
parse_dates=True,
infer_datetime_format=True
)
if (set_index and index_column==''):
assert(f'load_data() -> index_column is blank, please provide the correct column name')
if (set_index):
data_df.set_index(index_column,inplace=True)
if DEBUG:
print('---- symbol_df ----')
print(data_df.head())
print(data_df.tail())
return data_df