import numpy as np
import pandas as pd
from sklearn.metrics import log_loss
K = 20.
HOME_ADVANTAGE = 100.
compact = pd.read_csv("./Documents/R Scripts/March Madness Submission Data/RegularSeasonCompactResults.csv")
compact = compact.loc[compact['Season'] >= 2002]
team_ids = set(compact.WTeamID).union(set(compact.LTeamID))
elo_dict = dict(zip(list(team_ids), [1500] * len(team_ids)))
compact['spread'] = compact.WScore - compact.LScore
compact['w_elo'] = None
compact['l_elo'] = None
def elo_pred(elo1, elo2):
return(1. / (10. ** (-(elo1 - elo2) / 400.) + 1.))
def expected_margin(elo_diff):
return((7.5 + 0.006 * elo_diff))
def elo_update(w_elo, l_elo, margin):
elo_diff = w_elo - l_elo
pred = elo_pred(w_elo, l_elo)
mult = ((margin + 3.) ** 0.8) / expected_margin(elo_diff)
update = K * mult * (1 - pred)
return(pred, update)
preds = []
for i in range(69493, (69493 + compact.shape[0])):
# Get key data from current row
w = compact.at[i, 'WTeamID']
l = compact.at[i, 'LTeamID']
margin = compact.at[i, 'spread']
wloc = compact.at[i, 'WLoc']
# Does either team get a home-court advantage?
w_ad, l_ad, = 0., 0.
if wloc == "H":
w_ad += HOME_ADVANTAGE
elif wloc == "A":
l_ad += HOME_ADVANTAGE
# Get elo updates as a result of the game
pred, update = elo_update(elo_dict[w] + w_ad,
elo_dict[l] + l_ad,
margin)
elo_dict[w] += update
elo_dict[l] -= update
preds.append(pred)
# Stores new elos in the games dataframe
compact.loc[i, 'w_elo'] = elo_dict[w]
compact.loc[i, 'l_elo'] = elo_dict[l]
Since the primary purpose of elo rating is to determine the march madness tournament results before the tournament begins.
def final_elo_per_season(df, team_id):
d = df.copy()
d = d.loc[(d.WTeamID == team_id) | (d.LTeamID == team_id), :]
d.sort_values(['Season', 'DayNum'], inplace=True)
d.drop_duplicates(['Season'], keep='last', inplace=True)
w_mask = d.WTeamID == team_id
l_mask = d.LTeamID == team_id
d['season_elo'] = None
d.loc[w_mask, 'season_elo'] = d.loc[w_mask, 'w_elo']
d.loc[l_mask, 'season_elo'] = d.loc[l_mask, 'l_elo']
out = pd.DataFrame({
'team_id': team_id,
'season': d.Season,
'season_elo': d.season_elo
})
return(out)
df_list = [final_elo_per_season(compact, i) for i in team_ids]
season_elos = pd.concat(df_list)
season_elos.to_csv("elos_season.csv", index=None)