Skip to content

Commit

Permalink
add new features
Browse files Browse the repository at this point in the history
  • Loading branch information
chacetrev10 committed Apr 10, 2021
1 parent 20bb9ba commit f00232f
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 57 deletions.
29 changes: 26 additions & 3 deletions feat_calc.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import pandas as pd
import numpy as np

from datetime import timedelta,datetime
def avg_ppg(df):
return round(sum(df['PTS'].astype(int)) / len(df), 4)
pts = round(sum(df['PTS'].astype(float)) , 4)
poss = sum(df['FGA'].astype(float))- sum(df['OREB'].astype(float)) + sum(df['TOV'].astype(float)) + (0.4 * sum(df['FTA'].astype(float)))
return (pts/poss)*100

def avg_fg_pct(df):
return round(sum(df['FG_PCT'].astype(float)) / len(df), 4)
Expand All @@ -11,4 +13,25 @@ def avg_ft_pct(df):
return round(sum(df['FT_PCT'].astype(float)) / len(df), 4)

def avg_rbpg(df):
return round(sum(df['REB'].astype(int)) / len(df), 4)
return round(sum(df['REB'].astype(int)) / len(df), 4)


def team_form(df):
team1_id = df.iloc[0]['TEAM_ID']
date1 = datetime.strptime(df.iloc[0]['GAME_DATE'],'%Y-%m-%d')
past = date1 - timedelta(days=10)
past = str(past.strftime('%Y-%m-%d'))
team1_form = (df.loc[(df['GAME_DATE'] <= df.iloc[0]['GAME_DATE']) & (df['GAME_DATE'] >= past)
& (df['TEAM_ID'] == team1_id)])
team1_form = len(team1_form[team1_form.WL == 'W'])/ len(team1_form)
return(team1_form)

def back_to_back(df):
date1 = datetime.strptime(df.iloc[0]['GAME_DATE'],'%Y-%m-%d')
past = date1 - timedelta(days=1)
past = str(past.strftime('%Y-%m-%d'))
if(len(df.loc[(df['GAME_DATE'] == past)])):
return True
else:
return False

103 changes: 49 additions & 54 deletions prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,16 @@
https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/GettingStarted.Python.04.html
'''
#pylint: disable=E1101

import boto3
from boto3.dynamodb.conditions import Key
import pandas as pd
import numpy as np
from feat_calc import *
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

TABLE_NAME='nba'

def query_games(year):
#DON'T COMMIT WITH AWS KEYS!!!!
dynamo_conn = boto3.resource('dynamodb', region_name='us-east-2', aws_access_key_id='', aws_secret_access_key='')
Expand All @@ -24,7 +22,6 @@ def query_games(year):
# 'ProjectionExpression': "#yr, title, info.rating",
# 'ExpressionAttributeNames': {"#yr": "year"}
}

done = False
start_key = None
while not done:
Expand All @@ -34,68 +31,64 @@ def query_games(year):
#display_movies(response.get('Items', []))
start_key = response.get('LastEvaluatedKey', None)
done = start_key is None

game_data = pd.DataFrame(response['Items'])
game_data['IS_HOME'] = np.where(game_data['MATCHUP'].str.contains('@'), False, True)
return game_data
#return pd.DataFrame(response['Items'])

def extract_features_train(df, matchup, date):
#create feature vector given team names
if '@' in matchup:
matchup_v2 = matchup[-3:] + ' vs. ' + matchup[:3]
else:
matchup_v2 = matchup[-3:] + ' @ ' + matchup[:3]

game = df.loc[(df['GAME_DATE'] == date) & ((df['MATCHUP'] == matchup) | (df['MATCHUP'] == matchup_v2))]
game = df.loc[(df['GAME_DATE'] == date) & ((df['MATCHUP'] == matchup) | (df['MATCHUP'] == matchup_v2))]
home = game.loc[game['IS_HOME'] == True]
away = game.loc[game['IS_HOME'] == False]

home_str = list(home['TEAM_NAME'])[0]
home_past = df.loc[(df['GAME_DATE'] < date) & (df['TEAM_NAME'] == home_str)]
away_str = list(away['TEAM_NAME'])[0]
away_past = df.loc[(df['GAME_DATE'] < date) & (df['TEAM_NAME'] == away_str)]

if list(home['WL'])[0] == 'W':
label = 1
else:
label = 0

feat_dict = {
'PPG_HOME': avg_ppg(home_past), #Points per game
'PPG_AWAY': avg_ppg(away_past),
'FG_PCT_HOME': avg_fg_pct(home_past), #Field goal percentage
'FG_PCT_AWAY': avg_fg_pct(away_past),
'FT_PCT_HOME': avg_ft_pct(home_past), #Free throw percentage
'FT_PCT_AWAY': avg_ft_pct(away_past),
# 'FT_PCT_HOME': avg_ft_pct(home_past), #Free throw percentage
# 'FT_PCT_AWAY': avg_ft_pct(away_past),
'RBPG_HOME': avg_rbpg(home_past), #Rebounds per game
'RBPG_AWAY': avg_rbpg(away_past),
'FORM_HOME': team_form(home_past), #Team's recent preformances
'FORM_AWAY': team_form(away_past),
'HOME_WIN': label
}

return feat_dict

def extract_features_predict(df, home, away):
home_past = df.loc[df['TEAM_ABBREVIATION'] == home]
away_past = df.loc[df['TEAM_ABBREVIATION'] == away]

feat_list = [
avg_ppg(home_past), #Points per game
avg_ppg(away_past),
avg_fg_pct(home_past), #Field goal percentage
avg_fg_pct(away_past),
avg_ft_pct(home_past), #Free throw percentage
avg_ft_pct(away_past),
# avg_ft_pct(home_past), #Free throw percentage
# avg_ft_pct(away_past),
avg_rbpg(home_past), #Rebounds per game
avg_rbpg(away_past),
team_form(home_past), #Team form
team_form(away_past),
]

return feat_list

def train_model(X, y):
#return a trained classifier
clf = RandomForestClassifier(n_estimators=1000, random_state=42)
clf = svm.SVC(kernel = 'linear', gamma = 'scale', probability= True)
clf.fit(X, y)
# clf = RandomForestClassifier(n_estimators=1000, random_state=42)
# clf.fit(X, y)
return clf

def test_model(clf, X, y_true):
Expand All @@ -119,50 +112,52 @@ def predict_winner(df, home, away, clf_trained):
games21 = query_games('2021')
#Combine into one dataframe
games = games20.append(games21)

# games = games.sort_values(by='GAME_DATE')
#Loop through every row of 2021 games and extract relevant features
used_ids = []
feat_dicts = []
for i, row in games.iterrows():
if row['GAME_DATE'][:4] == '2021':
if row['GAME_ID'] not in used_ids:
#print(row['MATCHUP'])
new_feats = extract_features_train(games, row['MATCHUP'], row['GAME_DATE'])
feat_dicts.append(new_feats)
used_ids.append(row['GAME_ID'])

try:
new_feats = extract_features_train(games, row['MATCHUP'], row['GAME_DATE'])
feat_dicts.append(new_feats)
used_ids.append(row['GAME_ID'])
except Exception as e:
print(e)
#Convert list of dictionaries to dataframe
feat_df = pd.DataFrame(feat_dicts)

#Separate the labels from the features
labels = feat_df['HOME_WIN']
feats = feat_df.loc[:, feat_df.columns != 'HOME_WIN']

#Split into training and testing
X_train, X_test, y_train, y_test = train_test_split(feats, labels, test_size=0.20, random_state=50)

#Train the model on training set
model = train_model(X_train, y_train)
#Test on testing set to determine performance
# test_acc = test_model(model, X_test, y_test)
# print(test_acc)

home_exists = False
while not home_exists:
home_team = input("Enter home team: ")
if len(games.loc[games['TEAM_ABBREVIATION'] == home_team]) == 0:
print("Home team does not exist. Try again.")
else:
home_exists = True

away_exists = False
while not away_exists:
away_team = input("Enter away team: ")
if len(games.loc[games['TEAM_ABBREVIATION'] == away_team]) == 0:
print("Away team does not exist. Try again.")
else:
away_exists = True

winner, proba = predict_winner(games, home_team, away_team, model)

print("Prediction: {} will win with {}% probability.".format(winner, proba*100))
avg = 0
for i in range(20):
X_train, X_test, y_train, y_test = train_test_split(feats, labels, test_size=0.20,random_state = i)
#Train the model on training set
model = train_model(X_train, y_train)
#Test on testing set to determine performance
test_acc = test_model(model, X_test, y_test)
print(test_acc)
avg += test_acc
print('Avg =',avg/20)
con = 1
while(con == 1):
home_exists = False
while not home_exists:
home_team = input("Enter home team: ")
if len(games.loc[games['TEAM_ABBREVIATION'] == home_team]) == 0:
print("Home team does not exist. Try again.")
else:
home_exists = True
away_exists = False
while not away_exists:
away_team = input("Enter away team: ")
if len(games.loc[games['TEAM_ABBREVIATION'] == away_team]) == 0:
print("Away team does not exist. Try again.")
else:
away_exists = True
winner, proba = predict_winner(games, home_team, away_team, model)
print("Prediction: {} will win with {}% probability.".format(winner, proba*100))
# con = input('Press 1 to continue, or 0 to exit: ')

0 comments on commit f00232f

Please sign in to comment.