-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
182 lines (155 loc) · 6.89 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
import streamlit as st
st.set_page_config(layout="wide", page_title="SOP Bot", page_icon=":gear:")
from streamlit_extras.metric_cards import style_metric_cards
from streamlit_extras.add_vertical_space import add_vertical_space
from streamlit_extras.dataframe_explorer import dataframe_explorer
from streamlit_extras.altex import sparkline_chart
from streamlit_extras.altex import hist_chart
import pandas as pd
#from ydata_profiling import ProfileReport
#from streamlit_pandas_profiling import st_profile_report
from utils.parse import Dataset
from tfx_algo import DNNModel
AGGREGATION_DEPTH = 10
COLUMNS_TO_DROP = ["fouls", "yellowcards", "redcards", "goalkeepersaves", "offsides", "longballs"]
st.title('Welcome to SOP Bot :gear:')
st.caption("By Brothers Alejandro Alonso (AAWorks) and Andres Alonso (AXAStudio)")
st.info("SOP Bot is a sports outcome prediction bot with the goal of accurately predicting the outcome of upcoming soccer matches. SOP Bot utilizes a live soccer API (FootAPI), an extensive amount of data processing (see utils/parse.py), and a Tensorflow-Keras deep neural network (tfx_algo.py). Currently supports teams from the English Premier League and the Spanish La Liga. Cross-matchups are supported, although precision sees a ~7% increase in error.")
dataset = Dataset("laligapremier")
@st.cache_data
def preprocessing(_dataset):
vis_raw = _dataset.peek()
#agg_txt = "Processing Match Data (0% Complete)"
#agg_bar = st.progress(0, text=agg_txt)
vis_aggregate = _dataset.aggregate_data(AGGREGATION_DEPTH)
#agg_bar.progress(1.0, text="Done")
vis_norm = _dataset.normalize_aggregate(vis_aggregate)
dnn_train = _dataset.dnn_preprocessing(vis_norm, columns_to_drop=COLUMNS_TO_DROP, include_ties=False)
return vis_raw, vis_aggregate, vis_norm, dnn_train
raw, agg, norm, records = preprocessing(dataset)
@st.cache_resource
def train_model():
model = DNNModel(records)
model.build()
model.train()
return model
model = train_model()
#@st.cache_resource
#def generate_profile_report():
# return ProfileReport(records, minimal=True)
#pr = generate_profile_report()
pred, tfkeras, preprocessed_dataset_profile, view_datasets = st.tabs(["Get Prediction :brain:", "Model Analytics :spider_web:", "Processed Dataset Profile Report :mag:", "View Base Dataset :page_facing_up:"])
with pred:
teamnames = dataset.team_names
st.info("Predict a Match Outcome")
col1, col2 = st.columns(2)
# select team 1
home_team = col1.selectbox(
'Home Team',
teamnames)
# select team 2
away_team = col2.selectbox(
'Away Team',
teamnames)
if home_team == away_team:
submitted = st.button("Generate Prediction", disabled=True, use_container_width=True)
else:
submitted = st.button("Generate Prediction", disabled=False, use_container_width=True)
if submitted:
historical_statistics = dataset.potential_match_preprocessing(agg, home_team, away_team, AGGREGATION_DEPTH, COLUMNS_TO_DROP)
historical_statistics.drop("result", axis=1, inplace=True)
probability, prediction = model.pretty_prediction(historical_statistics, home_team, away_team)
if 0.45 < probability < 0.55 : st.warning(prediction)
else: st.success(prediction)
# if submitted:
# if home_team == away_team:
# st.warning("Please select 2 different teams")
# else:
# st.success("Submitted to the AI :brain:")
# done = True
# if done:
# with st.spinner('Asking the AI'):
# ask_ai(home_team,away_team)
with tfkeras:
st.info("Tensorflow/Keras Deep Neural Network Model Summary & Evaluation Metrics")
st.code(model.summary(), language='python')
history = model.train_analytics()
trainstat = model.evaluate_train_on_confidence()
style_metric_cards()
st.divider()
testloss, testacc, trainloss, trainacc = st.columns(4)
with testloss:
data = pd.DataFrame()
data['val_loss'] = history['val_loss']
data['epoch'] = list(range(data.shape[0]))
st.metric("Test Loss", str(data["val_loss"].min())[:6])
sparkline_chart(
data=data,
x="epoch",
y="val_loss:Q",
height=80,
autoscale_y=True,
)
with testacc:
data = pd.DataFrame()
data['val_accuracy'] = history['val_accuracy']
data['epoch'] = list(range(data.shape[0]))
st.metric("Test Accuracy", str(data["val_accuracy"].max() * 100)[:5] + "%", delta=f"{round(data['val_accuracy'].max() * 100 - 50, 2)}%")
sparkline_chart(
data=data,
x="epoch",
y="val_accuracy:Q",
height=80,
autoscale_y=True,
)
with trainloss:
data = pd.DataFrame()
data['loss'] = history['loss']
data['epoch'] = list(range(data.shape[0]))
st.metric("Training Loss", str(data["loss"].min())[:6])
sparkline_chart(
data=data,
x="epoch",
y="loss:Q",
height=80,
autoscale_y=True,
)
with trainacc:
data = pd.DataFrame()
data['accuracy'] = history['accuracy']
data['epoch'] = list(range(data.shape[0]))
st.metric("Training Accuracy", f"{round(data['accuracy'].max() * 100, 2)}%", delta=f"{round(data['accuracy'].max() * 100 - 50, 2)}%")
sparkline_chart(
data=data,
x="epoch",
y="accuracy:Q",
height=80,
autoscale_y=True,
)
st.divider()
other_metrics = [round(max(history[metric]) * 100, 2) for metric in ("val_precision", "val_recall", "precision", "recall")]
test_p, test_r, train_p, train_r = other_metrics
col1, col2, col3, col4 = st.columns(4)
col1.metric(label="Test Precision", value=f"{test_p}%", delta=f"{round(test_p - 50, 2)}%")
col2.metric(label="Test Recall", value=f"{test_r}%", delta=f"{round(test_r - 50, 2)}%")
col3.metric(label="Train Precision", value=f"{train_p}%", delta=f"{round(train_p - 50, 2)}%")
col4.metric(label="Train Recall", value=f"{train_r}%", delta=f"{round(train_r - 50, 2)}%")
st.divider()
predictions = model.get_test_predictions() * 100
prediction_df = pd.DataFrame()
prediction_df["probability"] = predictions.astype(int).tolist()
hist_chart(
data=prediction_df,
x="probability",
title="Distribution of Outputted Test Set Probabilities"
)
with preprocessed_dataset_profile:
st.info("Profile Report on the Utilized Dataset (Post Processing)")
st.info("Currently unable to display due to recent updates to Streamlit's package handling. Waiting on fix from devs.")
#if pr:
# st_profile_report(pr)
with view_datasets:
st.info("Base Dataset - Pulled Directly from FootAPI")
data = pd.DataFrame(raw)
filtered_df = dataframe_explorer(data)
st.dataframe(filtered_df, use_container_width=True)