import requests
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from time import sleep


def fetch_espn_league_data(league_id, USER_AGENT, SWID_COOKIE, ESPN_S2_COOKIES, start_season=2015, end_season=2023):
    """
    Fetch ESPN fantasy football league data for a specified range of seasons.

    Parameters:
    - league_id (str): The unique identifier for the ESPN league.
    - USER_AGENT (str): The user agent string for the HTTP request header.
    - SWID_COOKIE (str): The SWID cookie for ESPN authentication.
    - ESPN_S2_COOKIES (str): The ESPN S2 cookie for authentication.
    - start_season (int, optional): The starting season year. Defaults to 2015.
    - end_season (int, optional): The ending season year. Defaults to 2023.

    Returns:
    - df (pd.DataFrame): A DataFrame containing the league data for the specified seasons.

    Note: If needed, uncomment team_name, team_abbrev, and owners for unique identifiers or data validation.
    """

    headers = {
        'Connection': 'keep-alive',
        'Accept': 'application/json, text/plain, */*',
        'User-Agent': USER_AGENT,
    }

    espn_cookies = {
        "swid": SWID_COOKIE, 
        "espn_s2": ESPN_S2_COOKIES
    }

    all_data = []

    for season in range(start_season, end_season + 1):
        if 2015 <= season <= 2017:
            url = f'https://lm-api-reads.fantasy.espn.com/apis/v3/games/ffl/leagueHistory/{league_id}?view=mDraftDetail&view=mSettings&view=mTeam&view=modular&view=mNav&seasonId={season}'
        else:
            url = f'https://lm-api-reads.fantasy.espn.com/apis/v3/games/ffl/seasons/{season}/segments/0/leagues/{league_id}?view=mDraftDetail&view=mSettings&view=mTeam&view=modular&view=mNav'

        r = requests.get(url, headers=headers, cookies=espn_cookies)

        if r.status_code != 200:
            print(f"HTTP error for year {season}: {r.status_code} {r.reason}. Skipping...")
            continue

        espn_raw_data = r.json()

        if 2015 <= season <= 2017:
            espn_raw_data = espn_raw_data[0]

        picks_information = espn_raw_data['draftDetail']['picks']
        game_members = espn_raw_data['members']
        teams_information = espn_raw_data['teams']

        columns_to_extract = ['roundId', 'roundPickNumber', 'teamId']

        for pick in picks_information:
            if pick['roundId'] != 1:
                continue

            pick_data = {}
            for column in columns_to_extract:
                pick_data[column] = pick[column]

            team_id = pick['teamId']
            for team in teams_information:
                if team['id'] == team_id:
                    # pick_data['team_name'] = team['name']
                    # pick_data['team_abbrev'] = team['abbrev']
                    pick_data['rankCalculatedFinal'] = team['rankCalculatedFinal']
                    pick_data['regular_sn_overall_rank'] = team['record']['overall']['gamesBack']
                    pick_data['wins'] = team['record']['overall']['wins']
                    pick_data['losses'] = team['record']['overall']['losses']
                    pick_data['win_percentage'] = team['record']['overall']['percentage']
                    # pick_data['owners'] = team['owners']
                    pick_data['acquisitions'] = team['transactionCounter']['acquisitions']
                    pick_data['trades'] = team['transactionCounter']['trades']

                    for owner_id in team['owners']:
                        for member in game_members:
                            if member['id'] == owner_id:
                                pick_data['firstName'] = member.get('firstName', 'NO_FIRST_NAME')
                                break

                    break

            pick_data['year'] = season
            all_data.append(pick_data)

        sleep(5)

    df = pd.DataFrame(all_data)
    return df

# Usage:
league_id = '459034'
SWID_COOKIE = 'FB0D810B-1463-40C1-BF4B-66AEDAAFC086'
ESPN_S2_COOKIES = 'AEBPR9B%2Br6I9CvOwG0LZiOowZZfKFDUxlnFBrrNP2z6PdUTI%2B%2FBZfQYv7MiVTfLJ7mBQGlksfBO6fAopwl7rej%2ByYOiM1Ge5qSTv3UZB%2BAEdoTFRQ7222hDyn5IJFohGlnsT%2FCna56N1EsmsGtSoidyGjWHVqH5wVtnJZkVdQHVx0Xwq1LAUWI0iQ3KKpJzrTYA5Q1ZPY2ADJi%2FYdq3z0zfz%2BjvKq%2BQgLBGOYe41lasy8eR157IDdcbF1WF0j1g9yBHSUz3a4T4NCptJiv4Rik6Q7WUgQ9DOEHOsnRV7cQEWPA%3D%3D'
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36'

df = fetch_espn_league_data(league_id, USER_AGENT, SWID_COOKIE, ESPN_S2_COOKIES)
print(df)

    roundId  roundPickNumber  teamId  rankCalculatedFinal  \
0         1                1       6                    8   
1         1                2       5                    6   
2         1                3       9                    5   
3         1                4       7                    1   
4         1                5       3                    4   
..      ...              ...     ...                  ...   
91        1                8      13                    0   
92        1                9      10                    0   
93        1               10       6                    0   
94        1               11       3                    0   
95        1               12       7                    0   

    regular_sn_overall_rank  wins  losses  win_percentage  acquisitions  \
0                       0.0     8       5        0.615385             5   
1                       0.0     5       8        0.384615            11   
2                       0.0    12       1        0.923077            11   
3                       0.0     6       7        0.461538            42   
4                       0.0     5       8        0.384615            17   
..                      ...   ...     ...             ...           ...   
91                      1.0     0       1        0.000000             2   
92                      0.0     1       0        1.000000             4   
93                      1.0     0       1        0.000000             1   
94                      1.0     0       1        0.000000             1   
95                      0.0     1       0        1.000000             0   

    trades firstName  year  
0        1   vincent  2015  
1        0   Michael  2015  
2        0     robby  2015  
3        2    Weston  2015  
4        0      Chad  2015  
..     ...       ...   ...  
91       0   william  2023  
92       0     brady  2023  
93       0      Ryan  2023  
94       0      Chad  2023  
95       0   Weston   2023  

[96 rows x 12 columns]


df.head()


# Convert 'firstName' to proper case
df['firstName'] = df['firstName'].str.title()


# #Post Data Collection: Filter of Years to Analyze if needed
# # Assuming 'year' is the column that represents years in df
# start_year = 2015
# end_year = 2023

# # Filter the dataframe by the range of years
# df = df[(df['year'] >= start_year) & (df['year'] <= end_year)]


count_firstName = df['firstName'].value_counts()
print(count_firstName)

Rhett      9
Chad       9
Brady      9
Casey      9
David      9
Chase      8
Ryan       7
Scott      7
Patrick    6
Weston     5
Weston     4
Dan        3
William    3
Michael    3
Robby      2
Vincent    2
Jackson    1
Name: firstName, dtype: int64


# df.to_csv('First_Round.csv', index=False)


# Group by 'firstName' and count the frequency of each 'roundPickNumber'
frequency = df.groupby('firstName')['roundPickNumber'].value_counts().unstack().fillna(0).astype(int)

# Desired image dimensions in pixels and chosen DPI (default 535.5 x 510 optimized for portfolio website)
desired_width = 535.5
desired_height = 510
dpi = 72
width_in_inches = desired_width / dpi
height_in_inches = desired_height / dpi

plt.figure(figsize=(width_in_inches, height_in_inches))
sns.heatmap(frequency, annot=True, cmap="YlGnBu", fmt='g', linewidths=.5)
plt.title("Frequency of RoundPickNumber by FirstName")

# Save the figure before showing it
plt.savefig('heatmap.png', dpi=dpi, bbox_inches='tight')  # Adjusted 'dpi'

plt.show()


average_pick = df.groupby('firstName')['roundPickNumber'].mean().round(2).sort_values()

average_pick

firstName
Robby       2.50
Vincent     3.50
Chase       4.50
Michael     5.00
Rhett       5.11
Casey       5.22
Weston      5.40
Chad        5.67
Scott       5.86
Ryan        6.14
Brady       6.22
Dan         6.33
Patrick     6.50
William     7.00
David       7.33
Weston      8.75
Jackson    10.00
Name: roundPickNumber, dtype: float64


# Calculate the order of 'firstName' based on the median of 'roundPickNumber'
order = df.groupby('firstName')['roundPickNumber'].median().sort_values(ascending=True).index.tolist()

plt.figure(figsize=(12, 8))
sns.boxplot(x='firstName', y='roundPickNumber', data=df, order=order)
plt.title("Boxplot of RoundPickNumber by FirstName")
plt.xticks(rotation=45)  # Optional: Rotate x-labels if they overlap
plt.show()


#Champions
# Step 1: Filter the dataframe
filtered_df = df[df['rankCalculatedFinal'] == 1]

# Step 2: Get the counts for 2021 and 2022 Asterick indicates when the league turned to a 12-man leauge
filtered_years_df = filtered_df[filtered_df['year'].isin([2021, 2022])]
year_counts = filtered_years_df['roundPickNumber'].value_counts()
year_counts = year_counts.rename(lambda x: f"{x}*")

# Step 3: Get counts for other years
other_years_df = filtered_df[~filtered_df['year'].isin([2021, 2022])]
other_counts = other_years_df['roundPickNumber'].value_counts()

# Merge the results using pd.concat
final_counts = pd.concat([year_counts, other_counts])

# Sort values from largest to lowest
sorted_counts = final_counts.sort_values(ascending=False)

sorted_counts

4      2
10*    1
3*     1
5      1
8      1
9      1
10     1
Name: roundPickNumber, dtype: int64


#Top three finishers (get money back)
# Step 1: Filter the dataframe
filtered_df = df[df['rankCalculatedFinal'] <= 3]

# Step 2: Get the counts for 2021 and 2022 Asterick indicates when the league turned to a 12-man leauge
filtered_years_df = filtered_df[filtered_df['year'].isin([2021, 2022])]
year_counts = filtered_years_df['roundPickNumber'].value_counts()
year_counts = year_counts.rename(lambda x: f"{x}*")

# Step 3: Get counts for other years
other_years_df = filtered_df[~filtered_df['year'].isin([2021, 2022])]
other_counts = other_years_df['roundPickNumber'].value_counts()

# Merge the results using pd.concat
final_counts = pd.concat([year_counts, other_counts])

# Sort values from largest to lowest
sorted_counts = final_counts.sort_values(ascending=False)

sorted_counts

4      5
5      4
10     3
2      3
9      3
3      3
1      2
8      2
7      2
10*    2
1*     1
12*    1
3*     1
8*     1
6      1
11     1
12     1
Name: roundPickNumber, dtype: int64


#Data Validation Check
# Count the number of times rankCalculatedFinal is equal to 1
count = df[df['rankCalculatedFinal'] == 1].shape[0]

print(f"The number of times rankCalculatedFinal (Champions) is equal to 1 is: {count}")

The number of times rankCalculatedFinal (Champions) is equal to 1 is: 8


# Exclude data from the year 2023
df = df[df['year'] != 2023]

# Create a mask for teams with above average win percentages
high_win_pct_mask = df['win_percentage'] > df['win_percentage'].mean()

# Create a mask for teams with worse than average playoff ranks
low_playoff_rank_mask = df['rankCalculatedFinal'] > df['rankCalculatedFinal'].median()

# Combine both masks to identify underperformers in playoffs
underperformers_in_playoffs = df[high_win_pct_mask & low_playoff_rank_mask]

# Create a mask for teams with below average win percentages
low_win_pct_mask = df['win_percentage'] < df['win_percentage'].mean()

# Create a mask for teams with better than average playoff ranks
high_playoff_rank_mask = df['rankCalculatedFinal'] < df['rankCalculatedFinal'].median()

# Combine both masks to identify overperformers in playoffs
overperformers_in_playoffs = df[low_win_pct_mask & high_playoff_rank_mask]

print("Underperformers in Playoffs:")
print(underperformers_in_playoffs[['year', 'firstName', 'win_percentage', 'rankCalculatedFinal']])

print("\nOverperformers in Playoffs:")
print(overperformers_in_playoffs[['year', 'firstName', 'win_percentage', 'rankCalculatedFinal']])

Underperformers in Playoffs:
    year firstName  win_percentage  rankCalculatedFinal
0   2015   Vincent        0.615385                    8
6   2015     Casey        0.538462                    7
19  2016   Michael        0.692308                    7
25  2017     Brady        0.692308                    7
29  2017     Chase        0.692308                    8
64  2021     Chase        0.571429                    7
72  2022     Chase        0.500000                    8
82  2022   Patrick        0.714286                    7

Overperformers in Playoffs:
    year firstName  win_percentage  rankCalculatedFinal
3   2015    Weston        0.461538                    1
4   2015      Chad        0.384615                    4
8   2015     Rhett        0.384615                    2
11  2016     Robby        0.461538                    2
21  2017     Casey        0.384615                    2
22  2017   Michael        0.461538                    4
27  2017    Weston        0.384615                    1
30  2018     Chase        0.461538                    5
36  2018     Brady        0.461538                    4
54  2020      Ryan        0.461538                    3


# Exclude data from the year 2023 and assign to df_filtered
df_filtered = df[df['year'] != 2023]

# Group by firstName and then compute the mean for relevant metrics
grouped_data = df_filtered.groupby('firstName').agg({
    'win_percentage': 'mean',
    'rankCalculatedFinal': 'mean',
    'wins': 'sum',
    'losses': 'sum'
}).reset_index()

# Create a mask for names with above average win percentages
high_win_pct_mask = grouped_data['win_percentage'] > grouped_data['win_percentage'].mean()

# Create a mask for names with worse than average playoff ranks
low_playoff_rank_mask = grouped_data['rankCalculatedFinal'] > grouped_data['rankCalculatedFinal'].median()

# Combine both masks to identify underperformers based on firstName
underperformers = grouped_data[high_win_pct_mask & low_playoff_rank_mask]

# Create a mask for names with below average win percentages
low_win_pct_mask = grouped_data['win_percentage'] < grouped_data['win_percentage'].mean()

# Create a mask for names with better than average playoff ranks
high_playoff_rank_mask = grouped_data['rankCalculatedFinal'] < grouped_data['rankCalculatedFinal'].median()

# Combine both masks to identify overperformers based on firstName
overperformers = grouped_data[low_win_pct_mask & high_playoff_rank_mask]

print("Underperformers based on FirstName:")
print(underperformers[['firstName', 'win_percentage', 'rankCalculatedFinal', 'wins', 'losses']])

print("\nOverperformers based on FirstName:")
print(overperformers[['firstName', 'win_percentage', 'rankCalculatedFinal', 'wins', 'losses']])

Underperformers based on FirstName:
  firstName  win_percentage  rankCalculatedFinal  wins  losses
3     Chase        0.537677             6.857143    50      43
5     David        0.541896             6.375000    57      49

Overperformers based on FirstName:
   firstName  win_percentage  rankCalculatedFinal  wins  losses
2       Chad        0.480082             4.750000    51      55
11      Ryan        0.446886             5.833333    36      44


# Filter teams that finished first based on rankCalculatedFinal
first_place_teams = df_filtered[df_filtered['rankCalculatedFinal'] == 1]

# Calculate the average acquisitions for these teams
average_acquisitions = first_place_teams['acquisitions'].mean()

print(f"Teams that finish first have an average of {average_acquisitions:.2f} waiver adds.")

# Calculate the average acquisitions for all teams
overall_average_acquisitions = df_filtered['acquisitions'].mean()

print(f"On average, teams make {overall_average_acquisitions:.2f} waiver adds.")


# Calculate the average acquisitions for these teams
average_trades = first_place_teams['trades'].mean()

print(f"Teams that finish first have an average of {average_trades:.2f} trades.")

# Calculate the average acquisitions for all teams
overall_average_trades = df_filtered['trades'].mean()

print(f"On average, teams make {overall_average_trades:.2f} trades.")

Teams that finish first have an average of 26.75 waiver adds.
On average, teams make 20.62 waiver adds.
Teams that finish first have an average of 0.75 trades.
On average, teams make 0.67 trades.

Project Overview: ESPN Fantasy Football Data Analysis¶

Objective:¶

To understand the performance trends, player behaviors, and strategies of teams in an ESPN fantasy football league across multiple seasons.¶

Key Steps:¶

1. Data Extraction:¶

2. Data Processing and Aggregation:¶

3. Data Visualization:¶

4. Insightful Analysis:¶

4. Value Proposition:¶

Instructions for Obtaining Request URL, league_id, SWID_COOKIE, and ESPN_S2_COOKIES:¶

	roundId	roundPickNumber	teamId	rankCalculatedFinal	wins	losses	win_percentage	acquisitions	trades	firstName	year
0	1	1	6	8	8	5	0.615385	5	1	vincent	2015
1	1	2	5	6	5	8	0.384615	11	0	Michael	2015
2	1	3	9	5	12	1	0.923077	11	0	robby	2015
3	1	4	7	1	6	7	0.461538	42	2	Weston	2015
4	1	5	3	4	5	8	0.384615	17	0	Chad	2015