import requests
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from time import sleep
def fetch_espn_league_data(league_id, USER_AGENT, SWID_COOKIE, ESPN_S2_COOKIES, start_season=2015, end_season=2023):
"""
Fetch ESPN fantasy football league data for a specified range of seasons.
Parameters:
- league_id (str): The unique identifier for the ESPN league.
- USER_AGENT (str): The user agent string for the HTTP request header.
- SWID_COOKIE (str): The SWID cookie for ESPN authentication.
- ESPN_S2_COOKIES (str): The ESPN S2 cookie for authentication.
- start_season (int, optional): The starting season year. Defaults to 2015.
- end_season (int, optional): The ending season year. Defaults to 2023.
Returns:
- df (pd.DataFrame): A DataFrame containing the league data for the specified seasons.
Note: If needed, uncomment team_name, team_abbrev, and owners for unique identifiers or data validation.
"""
headers = {
'Connection': 'keep-alive',
'Accept': 'application/json, text/plain, */*',
'User-Agent': USER_AGENT,
}
espn_cookies = {
"swid": SWID_COOKIE,
"espn_s2": ESPN_S2_COOKIES
}
all_data = []
for season in range(start_season, end_season + 1):
if 2015 <= season <= 2017:
url = f'https://lm-api-reads.fantasy.espn.com/apis/v3/games/ffl/leagueHistory/{league_id}?view=mDraftDetail&view=mSettings&view=mTeam&view=modular&view=mNav&seasonId={season}'
else:
url = f'https://lm-api-reads.fantasy.espn.com/apis/v3/games/ffl/seasons/{season}/segments/0/leagues/{league_id}?view=mDraftDetail&view=mSettings&view=mTeam&view=modular&view=mNav'
r = requests.get(url, headers=headers, cookies=espn_cookies)
if r.status_code != 200:
print(f"HTTP error for year {season}: {r.status_code} {r.reason}. Skipping...")
continue
espn_raw_data = r.json()
if 2015 <= season <= 2017:
espn_raw_data = espn_raw_data[0]
picks_information = espn_raw_data['draftDetail']['picks']
game_members = espn_raw_data['members']
teams_information = espn_raw_data['teams']
columns_to_extract = ['roundId', 'roundPickNumber', 'teamId']
for pick in picks_information:
if pick['roundId'] != 1:
continue
pick_data = {}
for column in columns_to_extract:
pick_data[column] = pick[column]
team_id = pick['teamId']
for team in teams_information:
if team['id'] == team_id:
# pick_data['team_name'] = team['name']
# pick_data['team_abbrev'] = team['abbrev']
pick_data['rankCalculatedFinal'] = team['rankCalculatedFinal']
pick_data['regular_sn_overall_rank'] = team['record']['overall']['gamesBack']
pick_data['wins'] = team['record']['overall']['wins']
pick_data['losses'] = team['record']['overall']['losses']
pick_data['win_percentage'] = team['record']['overall']['percentage']
# pick_data['owners'] = team['owners']
pick_data['acquisitions'] = team['transactionCounter']['acquisitions']
pick_data['trades'] = team['transactionCounter']['trades']
for owner_id in team['owners']:
for member in game_members:
if member['id'] == owner_id:
pick_data['firstName'] = member.get('firstName', 'NO_FIRST_NAME')
break
break
pick_data['year'] = season
all_data.append(pick_data)
sleep(5)
df = pd.DataFrame(all_data)
return df
# Usage:
league_id = '459034'
SWID_COOKIE = 'FB0D810B-1463-40C1-BF4B-66AEDAAFC086'
ESPN_S2_COOKIES = 'AEBPR9B%2Br6I9CvOwG0LZiOowZZfKFDUxlnFBrrNP2z6PdUTI%2B%2FBZfQYv7MiVTfLJ7mBQGlksfBO6fAopwl7rej%2ByYOiM1Ge5qSTv3UZB%2BAEdoTFRQ7222hDyn5IJFohGlnsT%2FCna56N1EsmsGtSoidyGjWHVqH5wVtnJZkVdQHVx0Xwq1LAUWI0iQ3KKpJzrTYA5Q1ZPY2ADJi%2FYdq3z0zfz%2BjvKq%2BQgLBGOYe41lasy8eR157IDdcbF1WF0j1g9yBHSUz3a4T4NCptJiv4Rik6Q7WUgQ9DOEHOsnRV7cQEWPA%3D%3D'
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36'
df = fetch_espn_league_data(league_id, USER_AGENT, SWID_COOKIE, ESPN_S2_COOKIES)
print(df)
roundId roundPickNumber teamId rankCalculatedFinal \ 0 1 1 6 8 1 1 2 5 6 2 1 3 9 5 3 1 4 7 1 4 1 5 3 4 .. ... ... ... ... 91 1 8 13 0 92 1 9 10 0 93 1 10 6 0 94 1 11 3 0 95 1 12 7 0 regular_sn_overall_rank wins losses win_percentage acquisitions \ 0 0.0 8 5 0.615385 5 1 0.0 5 8 0.384615 11 2 0.0 12 1 0.923077 11 3 0.0 6 7 0.461538 42 4 0.0 5 8 0.384615 17 .. ... ... ... ... ... 91 1.0 0 1 0.000000 2 92 0.0 1 0 1.000000 4 93 1.0 0 1 0.000000 1 94 1.0 0 1 0.000000 1 95 0.0 1 0 1.000000 0 trades firstName year 0 1 vincent 2015 1 0 Michael 2015 2 0 robby 2015 3 2 Weston 2015 4 0 Chad 2015 .. ... ... ... 91 0 william 2023 92 0 brady 2023 93 0 Ryan 2023 94 0 Chad 2023 95 0 Weston 2023 [96 rows x 12 columns]
df.head()
roundId | roundPickNumber | teamId | rankCalculatedFinal | regular_sn_overall_rank | wins | losses | win_percentage | acquisitions | trades | firstName | year | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 1 | 6 | 8 | 0.0 | 8 | 5 | 0.615385 | 5 | 1 | vincent | 2015 |
1 | 1 | 2 | 5 | 6 | 0.0 | 5 | 8 | 0.384615 | 11 | 0 | Michael | 2015 |
2 | 1 | 3 | 9 | 5 | 0.0 | 12 | 1 | 0.923077 | 11 | 0 | robby | 2015 |
3 | 1 | 4 | 7 | 1 | 0.0 | 6 | 7 | 0.461538 | 42 | 2 | Weston | 2015 |
4 | 1 | 5 | 3 | 4 | 0.0 | 5 | 8 | 0.384615 | 17 | 0 | Chad | 2015 |
# Convert 'firstName' to proper case
df['firstName'] = df['firstName'].str.title()
# #Post Data Collection: Filter of Years to Analyze if needed
# # Assuming 'year' is the column that represents years in df
# start_year = 2015
# end_year = 2023
# # Filter the dataframe by the range of years
# df = df[(df['year'] >= start_year) & (df['year'] <= end_year)]
count_firstName = df['firstName'].value_counts()
print(count_firstName)
Rhett 9 Chad 9 Brady 9 Casey 9 David 9 Chase 8 Ryan 7 Scott 7 Patrick 6 Weston 5 Weston 4 Dan 3 William 3 Michael 3 Robby 2 Vincent 2 Jackson 1 Name: firstName, dtype: int64
# df.to_csv('First_Round.csv', index=False)
# Group by 'firstName' and count the frequency of each 'roundPickNumber'
frequency = df.groupby('firstName')['roundPickNumber'].value_counts().unstack().fillna(0).astype(int)
# Desired image dimensions in pixels and chosen DPI (default 535.5 x 510 optimized for portfolio website)
desired_width = 535.5
desired_height = 510
dpi = 72
width_in_inches = desired_width / dpi
height_in_inches = desired_height / dpi
plt.figure(figsize=(width_in_inches, height_in_inches))
sns.heatmap(frequency, annot=True, cmap="YlGnBu", fmt='g', linewidths=.5)
plt.title("Frequency of RoundPickNumber by FirstName")
# Save the figure before showing it
plt.savefig('heatmap.png', dpi=dpi, bbox_inches='tight') # Adjusted 'dpi'
plt.show()
average_pick = df.groupby('firstName')['roundPickNumber'].mean().round(2).sort_values()
average_pick
firstName Robby 2.50 Vincent 3.50 Chase 4.50 Michael 5.00 Rhett 5.11 Casey 5.22 Weston 5.40 Chad 5.67 Scott 5.86 Ryan 6.14 Brady 6.22 Dan 6.33 Patrick 6.50 William 7.00 David 7.33 Weston 8.75 Jackson 10.00 Name: roundPickNumber, dtype: float64
# Calculate the order of 'firstName' based on the median of 'roundPickNumber'
order = df.groupby('firstName')['roundPickNumber'].median().sort_values(ascending=True).index.tolist()
plt.figure(figsize=(12, 8))
sns.boxplot(x='firstName', y='roundPickNumber', data=df, order=order)
plt.title("Boxplot of RoundPickNumber by FirstName")
plt.xticks(rotation=45) # Optional: Rotate x-labels if they overlap
plt.show()
#Champions
# Step 1: Filter the dataframe
filtered_df = df[df['rankCalculatedFinal'] == 1]
# Step 2: Get the counts for 2021 and 2022 Asterick indicates when the league turned to a 12-man leauge
filtered_years_df = filtered_df[filtered_df['year'].isin([2021, 2022])]
year_counts = filtered_years_df['roundPickNumber'].value_counts()
year_counts = year_counts.rename(lambda x: f"{x}*")
# Step 3: Get counts for other years
other_years_df = filtered_df[~filtered_df['year'].isin([2021, 2022])]
other_counts = other_years_df['roundPickNumber'].value_counts()
# Merge the results using pd.concat
final_counts = pd.concat([year_counts, other_counts])
# Sort values from largest to lowest
sorted_counts = final_counts.sort_values(ascending=False)
sorted_counts
4 2 10* 1 3* 1 5 1 8 1 9 1 10 1 Name: roundPickNumber, dtype: int64
#Top three finishers (get money back)
# Step 1: Filter the dataframe
filtered_df = df[df['rankCalculatedFinal'] <= 3]
# Step 2: Get the counts for 2021 and 2022 Asterick indicates when the league turned to a 12-man leauge
filtered_years_df = filtered_df[filtered_df['year'].isin([2021, 2022])]
year_counts = filtered_years_df['roundPickNumber'].value_counts()
year_counts = year_counts.rename(lambda x: f"{x}*")
# Step 3: Get counts for other years
other_years_df = filtered_df[~filtered_df['year'].isin([2021, 2022])]
other_counts = other_years_df['roundPickNumber'].value_counts()
# Merge the results using pd.concat
final_counts = pd.concat([year_counts, other_counts])
# Sort values from largest to lowest
sorted_counts = final_counts.sort_values(ascending=False)
sorted_counts
4 5 5 4 10 3 2 3 9 3 3 3 1 2 8 2 7 2 10* 2 1* 1 12* 1 3* 1 8* 1 6 1 11 1 12 1 Name: roundPickNumber, dtype: int64
#Data Validation Check
# Count the number of times rankCalculatedFinal is equal to 1
count = df[df['rankCalculatedFinal'] == 1].shape[0]
print(f"The number of times rankCalculatedFinal (Champions) is equal to 1 is: {count}")
The number of times rankCalculatedFinal (Champions) is equal to 1 is: 8
# Exclude data from the year 2023
df = df[df['year'] != 2023]
# Create a mask for teams with above average win percentages
high_win_pct_mask = df['win_percentage'] > df['win_percentage'].mean()
# Create a mask for teams with worse than average playoff ranks
low_playoff_rank_mask = df['rankCalculatedFinal'] > df['rankCalculatedFinal'].median()
# Combine both masks to identify underperformers in playoffs
underperformers_in_playoffs = df[high_win_pct_mask & low_playoff_rank_mask]
# Create a mask for teams with below average win percentages
low_win_pct_mask = df['win_percentage'] < df['win_percentage'].mean()
# Create a mask for teams with better than average playoff ranks
high_playoff_rank_mask = df['rankCalculatedFinal'] < df['rankCalculatedFinal'].median()
# Combine both masks to identify overperformers in playoffs
overperformers_in_playoffs = df[low_win_pct_mask & high_playoff_rank_mask]
print("Underperformers in Playoffs:")
print(underperformers_in_playoffs[['year', 'firstName', 'win_percentage', 'rankCalculatedFinal']])
print("\nOverperformers in Playoffs:")
print(overperformers_in_playoffs[['year', 'firstName', 'win_percentage', 'rankCalculatedFinal']])
Underperformers in Playoffs: year firstName win_percentage rankCalculatedFinal 0 2015 Vincent 0.615385 8 6 2015 Casey 0.538462 7 19 2016 Michael 0.692308 7 25 2017 Brady 0.692308 7 29 2017 Chase 0.692308 8 64 2021 Chase 0.571429 7 72 2022 Chase 0.500000 8 82 2022 Patrick 0.714286 7 Overperformers in Playoffs: year firstName win_percentage rankCalculatedFinal 3 2015 Weston 0.461538 1 4 2015 Chad 0.384615 4 8 2015 Rhett 0.384615 2 11 2016 Robby 0.461538 2 21 2017 Casey 0.384615 2 22 2017 Michael 0.461538 4 27 2017 Weston 0.384615 1 30 2018 Chase 0.461538 5 36 2018 Brady 0.461538 4 54 2020 Ryan 0.461538 3
# Exclude data from the year 2023 and assign to df_filtered
df_filtered = df[df['year'] != 2023]
# Group by firstName and then compute the mean for relevant metrics
grouped_data = df_filtered.groupby('firstName').agg({
'win_percentage': 'mean',
'rankCalculatedFinal': 'mean',
'wins': 'sum',
'losses': 'sum'
}).reset_index()
# Create a mask for names with above average win percentages
high_win_pct_mask = grouped_data['win_percentage'] > grouped_data['win_percentage'].mean()
# Create a mask for names with worse than average playoff ranks
low_playoff_rank_mask = grouped_data['rankCalculatedFinal'] > grouped_data['rankCalculatedFinal'].median()
# Combine both masks to identify underperformers based on firstName
underperformers = grouped_data[high_win_pct_mask & low_playoff_rank_mask]
# Create a mask for names with below average win percentages
low_win_pct_mask = grouped_data['win_percentage'] < grouped_data['win_percentage'].mean()
# Create a mask for names with better than average playoff ranks
high_playoff_rank_mask = grouped_data['rankCalculatedFinal'] < grouped_data['rankCalculatedFinal'].median()
# Combine both masks to identify overperformers based on firstName
overperformers = grouped_data[low_win_pct_mask & high_playoff_rank_mask]
print("Underperformers based on FirstName:")
print(underperformers[['firstName', 'win_percentage', 'rankCalculatedFinal', 'wins', 'losses']])
print("\nOverperformers based on FirstName:")
print(overperformers[['firstName', 'win_percentage', 'rankCalculatedFinal', 'wins', 'losses']])
Underperformers based on FirstName: firstName win_percentage rankCalculatedFinal wins losses 3 Chase 0.537677 6.857143 50 43 5 David 0.541896 6.375000 57 49 Overperformers based on FirstName: firstName win_percentage rankCalculatedFinal wins losses 2 Chad 0.480082 4.750000 51 55 11 Ryan 0.446886 5.833333 36 44
# Filter teams that finished first based on rankCalculatedFinal
first_place_teams = df_filtered[df_filtered['rankCalculatedFinal'] == 1]
# Calculate the average acquisitions for these teams
average_acquisitions = first_place_teams['acquisitions'].mean()
print(f"Teams that finish first have an average of {average_acquisitions:.2f} waiver adds.")
# Calculate the average acquisitions for all teams
overall_average_acquisitions = df_filtered['acquisitions'].mean()
print(f"On average, teams make {overall_average_acquisitions:.2f} waiver adds.")
# Calculate the average acquisitions for these teams
average_trades = first_place_teams['trades'].mean()
print(f"Teams that finish first have an average of {average_trades:.2f} trades.")
# Calculate the average acquisitions for all teams
overall_average_trades = df_filtered['trades'].mean()
print(f"On average, teams make {overall_average_trades:.2f} trades.")
Teams that finish first have an average of 26.75 waiver adds. On average, teams make 20.62 waiver adds. Teams that finish first have an average of 0.75 trades. On average, teams make 0.67 trades.