import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.patches import Arc, Circle, FancyBboxPatch
from matplotlib.lines import Line2D
from scipy.spatial import ConvexHull, distance
from sklearn.linear_model import LinearRegression


# Read the CSV file into a pandas DataFrame
df = pd.read_csv('take_home.csv')

df


df.describe()


df.isna().sum().sort_values(ascending=False)

Position     250
JerseyNum    250
Team         250
VelZ           3
AccelZ         3
CoordsZ        2
CoordsX        1
CoordsY        1
VelX           1
VelY           1
AccelX         1
AccelY         1
EntityId       0
Time           0
dtype: int64


# Show rows with missing CoordsX values
print(df[df['CoordsX'].isna()])

# Drop those rows
df = df.dropna(subset=['CoordsX'])

     EntityId  CoordsX  CoordsY  CoordsZ  VelX  VelY  VelZ  AccelX  AccelY  \
846      8054      NaN      NaN      NaN   NaN   NaN   NaN     NaN     NaN   

     AccelZ Position  JerseyNum Team   Time  
846     NaN        D       54.0    B  3.908


df.isna().sum().sort_values(ascending=False)

Position     250
JerseyNum    250
Team         250
VelZ           2
AccelZ         2
CoordsZ        1
EntityId       0
CoordsX        0
CoordsY        0
VelX           0
VelY           0
AccelX         0
AccelY         0
Time           0
dtype: int64


# # Check for outliers using IQR method
# def check_outliers(column):
#     Q1 = df[column].quantile(0.25)
#     Q3 = df[column].quantile(0.75)
#     IQR = Q3 - Q1
    
#     lower_boundary = Q1 - 1.5 * IQR
#     upper_boundary = Q3 + 1.5 * IQR
    
#     outliers = df[(df[column] < lower_boundary) | (df[column] > upper_boundary)]
    
#     return outliers

# outliers_accelX = check_outliers('AccelX')
# outliers_accelY = check_outliers('AccelY')
# outliers_accelZ = check_outliers('AccelZ')

# print("Outliers in AccelX:\n", outliers_accelX)
# print("Outliers in AccelY:\n", outliers_accelY)
# print("Outliers in AccelZ:\n", outliers_accelZ)

# # Visualizing outliers using boxplots
# plt.figure(figsize=(10, 6))
# sns.boxplot(data=df)
# plt.title("Boxplot of AccelX, AccelY, and AccelZ")
# plt.show()


# Show rows with missing VelX values
print(df[df['VelX'].isna()])

# Drop those rows
df = df.dropna(subset=['VelX'])

Empty DataFrame
Columns: [EntityId, CoordsX, CoordsY, CoordsZ, VelX, VelY, VelZ, AccelX, AccelY, AccelZ, Position, JerseyNum, Team, Time]
Index: []


# Color mapping based on Team
team_colors = {
    'A': 'blue',
    'B': 'red',
    'NaN': 'gray'  # Using 'NaN' as a string, change this if your NaNs are actual NaN objects
}


# Adjust the team_colors mapping to provide a color for NaN or any other unmapped value.
colors = df['Team'].map(team_colors).fillna('gray')

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
scatter = ax.scatter(df['CoordsX'], df['CoordsY'], df['CoordsZ'], c=colors, marker='o')


legend_elements = [Line2D([0], [0], marker='o', color='w', label=team, markersize=10, markerfacecolor=team_colors.get(team, 'gray')) 
                   for team in set(df['Team'])]
ax.legend(handles=legend_elements)

ax.set_xlabel('CoordsX')
ax.set_ylabel('CoordsY')
ax.set_zlabel('CoordsZ')
ax.set_title('3D Scatter plot of Entity Coordinates by Team')
plt.show()


# Function to draw NHL rink with goalie creases
def draw_nhl_rink(ax=None, color='gray'):
    if ax is None:
        fig, ax = plt.subplots(figsize=(10, 6))
    
    ax.set_facecolor('azure')
    
    # Rink boundary with rounded corners
    rink = FancyBboxPatch((-100, -42.5), 200, 85, boxstyle="round,pad=0,rounding_size=23", edgecolor=color, facecolor='none', lw=2)
    ax.add_patch(rink)
    
    left_goal_line = plt.Line2D([-89, -89], [-40, 40], c=color, lw=2)
    right_goal_line = plt.Line2D([89, 89], [-40, 40], c=color, lw=2)
    
    left_blue_line = plt.Line2D([-25, -25], [-42.5, 42.5], c=color, lw=2)
    right_blue_line = plt.Line2D([25, 25], [-42.5, 42.5], c=color, lw=2)
    
    center_line = plt.Line2D([0, 0], [-42.5, 42.5], c=color, lw=2)
    center_circle = plt.Circle((0, 0), 15, edgecolor=color, facecolor='none', lw=2)
    
    # Goalie creases
    left_crease = Arc((89, 0), width=16, height=16, angle=0, theta1=90, theta2=270, edgecolor=color, lw=2)
    right_crease = Arc((-89, 0), width=16, height=16, angle=0, theta1=270, theta2=90, edgecolor=color, lw=2)
    
    # Offensive and Defensive circles (Left and Right)
    for x in [-69, 69]:  # X-coordinates for the left and right circles
        # Main circles
        ax.add_patch(Circle((x, 22), 15, edgecolor=color, facecolor='none', lw=2))
        ax.add_patch(Circle((x, -22), 15, edgecolor=color, facecolor='none', lw=2))
        
        # Face-off dots
        for y in [22, -22]:  # Y-coordinates for top, center, bottom dots
            ax.plot(x, y, 'o', color='green')
    
    # ax.add_patch(rink_boundary)
    ax.add_line(left_goal_line)
    ax.add_line(right_goal_line)
    ax.add_line(left_blue_line)
    ax.add_line(right_blue_line)
    ax.add_line(center_line)
    ax.add_patch(center_circle)
    ax.add_patch(left_crease)
    ax.add_patch(right_crease)
    
    ax.set_xlim(-105, 105)
    ax.set_ylim(-47.5, 47.5)
    ax.set_aspect('equal', 'box')
    ax.axis('off')
    
    return ax


# Draw the rink and scatter the data
ax = draw_nhl_rink()
colors = df['Team'].map(team_colors).fillna('gray')
scatter = ax.scatter(df['CoordsX'], df['CoordsY'], c=colors, alpha=0.6, edgecolors='w', linewidth=0.5)

# Legend
legend_elements = [Line2D([0], [0], marker='o', color='w', label='Puck' if team == 'nan' else team, markersize=10, markerfacecolor=team_colors.get(team, 'gray')) 
                   for team in set(df['Team'].fillna('nan'))]
ax.legend(handles=legend_elements, loc='upper left')

plt.show()


# # For Portfolio Website Ignore
# def draw_nhl_rink(ax=None, color='gray'):
#     if ax is None:
#         fig, ax = plt.subplots(figsize=(10, 6))
    
#     ax.set_facecolor('black')  # Set axes background to black
    
#     # Rink boundary with rounded corners
#     rink = FancyBboxPatch((-100, -42.5), 200, 85, boxstyle="round,pad=0,rounding_size=23", edgecolor=color, facecolor='white', lw=2)
#     ax.add_patch(rink)
    
#     left_goal_line = plt.Line2D([-89, -89], [-40, 40], c=color, lw=2)
#     right_goal_line = plt.Line2D([89, 89], [-40, 40], c=color, lw=2)
#     left_blue_line = plt.Line2D([-25, -25], [-42.5, 42.5], c=color, lw=2)
#     right_blue_line = plt.Line2D([25, 25], [-42.5, 42.5], c=color, lw=2)
#     center_line = plt.Line2D([0, 0], [-42.5, 42.5], c=color, lw=2)
#     center_circle = plt.Circle((0, 0), 15, edgecolor=color, facecolor='none', lw=2)
#     left_crease = Arc((89, 0), width=16, height=16, angle=0, theta1=90, theta2=270, edgecolor=color, lw=2)
#     right_crease = Arc((-89, 0), width=16, height=16, angle=0, theta1=270, theta2=90, edgecolor=color, lw=2)
    
#     # Offensive and Defensive circles (Left and Right)
#     for x in [-69, 69]:  # X-coordinates for the left and right circles
#         ax.add_patch(Circle((x, 22), 15, edgecolor=color, facecolor='none', lw=2))
#         ax.add_patch(Circle((x, -22), 15, edgecolor=color, facecolor='none', lw=2))
#         for y in [22, -22]:  # Y-coordinates for top and bottom dots
#             ax.plot(x, y, 'o', color='green')
    
#     ax.add_line(left_goal_line)
#     ax.add_line(right_goal_line)
#     ax.add_line(left_blue_line)
#     ax.add_line(right_blue_line)
#     ax.add_line(center_line)
#     ax.add_patch(center_circle)
#     ax.add_patch(left_crease)
#     ax.add_patch(right_crease)
    
#     ax.set_xlim(-105, 105)
#     ax.set_ylim(-47.5, 47.5)
#     ax.set_aspect('equal', 'box')
#     ax.axis('off')
    
#     return ax

# # For Portfolio Website
# # Calculate the figure size in inches
# desired_width_px, desired_height_px = 500, 500
# dpi = 100  # Adjust this value as needed
# fig_width_in = desired_width_px / dpi
# fig_height_in = desired_height_px / dpi

# fig, ax = plt.subplots(figsize=(fig_width_in, fig_height_in))
# ax = draw_nhl_rink(ax=ax)  # Using the existing 'ax'

# colors = df['Team'].map(team_colors).fillna('gray')
# scatter = ax.scatter(df['CoordsX'], df['CoordsY'], c=colors, alpha=0.6, edgecolors='w', linewidth=0.5)

# # Legend
# legend_elements = [Line2D([0], [0], marker='o', color='w', label='Puck' if team == 'nan' else team, markersize=10, markerfacecolor=team_colors.get(team, 'gray')) 
#                    for team in set(df['Team'].fillna('nan'))]
# ax.legend(handles=legend_elements, loc='upper left')

# plt.tight_layout()
# plt.savefig('rink_image.png', dpi=dpi)  # Save the figure with a specific DPI
# plt.show()


# For Portfolio Website
# Calculate the figure size in inches
desired_width_px, desired_height_px = 500, 500
dpi = 100  # Adjust this value as needed
fig_width_in = desired_width_px / dpi
fig_height_in = desired_height_px / dpi

fig, ax = plt.subplots(figsize=(fig_width_in, fig_height_in))
ax = draw_nhl_rink(ax=ax)  # Using the existing 'ax'

# Set only the figure's background color to black
fig.set_facecolor('black')

colors = df['Team'].map(team_colors).fillna('gray')
scatter = ax.scatter(df['CoordsX'], df['CoordsY'], c=colors, alpha=0.6, edgecolors='w', linewidth=0.5)

# Legend
legend_elements = [Line2D([0], [0], marker='o', color='w', label='Puck' if team == 'nan' else team, markersize=10, markerfacecolor=team_colors.get(team, 'gray')) 
                   for team in set(df['Team'].fillna('nan'))]
ax.legend(handles=legend_elements, loc='upper left')

plt.tight_layout()
plt.savefig('rink_image.png', dpi=dpi)  # Save the figure with a specific DPI
plt.show()


def plot_entity_coords_over_time(df, entity_id, coord='CoordsZ'):
    """
    Plot a specified coordinate for a given EntityId over time.

    Parameters:
    - df: Dataframe containing entity data.
    - entity_id: ID of the entity to be plotted.
    - coord: The coordinate to be plotted (default is 'CoordsZ').
    
    Returns:
    - Displays the plot.
    """
    # Filter data for given EntityId
    entity_data = df[df['EntityId'] == entity_id]

    # Plotting
    fig, ax = plt.subplots()
    ax.plot(entity_data['Time'], entity_data[coord], marker='o', color='blue', linestyle='-')
    ax.set_xlabel('Time')
    ax.set_ylabel(coord)
    ax.set_title(f'{coord} for EntityId == {entity_id} over Time')
    plt.show()


# Use the function
plot_entity_coords_over_time(df, 1, 'CoordsY')


# Use the function
plot_entity_coords_over_time(df, 1, 'CoordsZ')


# Filter data for EntityId == 1
entity_data = df[df['EntityId'] == 1]

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

# Plotting Z coordinate of EntityId == 1
scatter = ax.scatter(entity_data['CoordsX'], entity_data['CoordsY'], entity_data['CoordsZ'], c='blue', marker='o')

ax.set_xlabel('CoordsX')
ax.set_ylabel('CoordsY')
ax.set_zlabel('CoordsZ')
ax.set_title('3D Scatter plot of Z Coordinate for EntityId == 1')
plt.show()


nearby_data = df[(df['Time'] > 1.62) & (df['Time'] < 1.66)]
print(nearby_data)

     EntityId    CoordsX    CoordsY   CoordsZ       VelX       VelY      VelZ  \
353         1  42.619913   2.079347  0.000000  26.489989 -38.072351  0.000000   
354      8040  61.840918   3.551868  4.682931   3.228459   1.288098 -0.295262   
355     14086  58.757717 -25.834717  4.981964   6.318514   2.240820 -0.970498   
356      8044  68.926687 -11.563980  4.845452   0.936971   0.978526 -0.199894   
357         1  43.007685   1.426259  0.000000  24.235677 -40.817931  0.000000   
358      8034  84.723458   0.953518  5.198738   0.146683   1.368064 -1.008279   
359      8021  77.855679   7.552948  5.077283   1.210059   4.771175 -0.190689   
360         1  43.421595   0.756963  0.000000  25.869335 -41.830900  0.000000   

         AccelX      AccelY     AccelZ Position  JerseyNum Team   Time  
353  -17.203639 -115.498206   0.000000      NaN        NaN  NaN  1.624  
354   -4.180771   -2.751472   0.567719       RW       40.0    B  1.628  
355   -1.390160   -6.732454 -11.200774       RW       86.0    A  1.632  
356   -4.857288   -5.600645 -13.855313        D       44.0    B  1.636  
357 -140.894198 -171.598383   0.000000      NaN        NaN  NaN  1.640  
358   -1.341155   -8.714046  -3.177618        G       34.0    B  1.648  
359   -1.639178   -1.736494  -4.530988        D       21.0    B  1.652  
360  102.103401  -63.310421   0.000000      NaN        NaN  NaN  1.656


tolerance=0.5


Coords =  df[(df['CoordsX'] >= 69 - tolerance) & (df['Time'] <= 69 + tolerance)]
print(Coords)

     EntityId    CoordsX    CoordsY   CoordsZ       VelX      VelY      VelZ  \
0        8021  73.272591   2.351175  5.243467   6.023007  3.959640 -0.310185   
5       14038  77.234042  -7.618020  5.305041   3.886750  1.031621 -0.282560   
14       8044  71.622231 -13.645578  5.511122  -2.468628  5.033422  0.397189   
16       8034  85.024377  -1.279169  5.597798  -0.833619  0.354787 -0.500028   
18       8021  73.748806   2.615554  5.215993   5.669217  3.147376 -0.327071   
..        ...        ...        ...       ...        ...       ...       ...   
860         1  85.241260 -37.380837  0.470997   0.560875 -7.148680  8.884455   
861      8040  73.650126  -1.378924  5.232279   4.251730 -2.838572 -1.161895   
863     14086  74.073498 -28.027884  4.674300  10.829922 -4.591188  0.463799   
864         1  85.200855 -37.395350  0.323678  -2.525309 -0.907093 -9.207382   
865      8044  74.960319 -16.304413  5.487128   6.808158 -4.734654  1.006715   

         AccelX      AccelY       AccelZ Position  JerseyNum Team   Time  
0     -4.205320   -2.372893    -3.277896        D       21.0    B  0.000  
5      1.796894    5.944007     4.509041       LW       38.0    A  0.028  
14    -4.444613    5.559126     8.421545        D       44.0    B  0.068  
16    -3.570760   -2.299714   -21.327963        G       34.0    B  0.076  
18    -4.211769   -9.669797    -0.201023        D       21.0    B  0.084  
..          ...         ...          ...      ...        ...  ...    ...  
860  260.087755 -370.457649  1032.800889      NaN        NaN  NaN  3.976  
861   -2.971638   -1.488420   -14.729052       RW       40.0    B  3.980  
863   -4.546898  -17.861194     7.867935       RW       86.0    A  3.988  
864 -192.886119  390.098348 -1130.737522      NaN        NaN  NaN  3.992  
865    9.633872   -6.497119     0.930741        D       44.0    B  3.996  

[346 rows x 14 columns]


tolerance = 0.0001
filtered_df = df[(df['Time'] >= 1.640 - tolerance) & (df['Time'] <= 1.640 + tolerance)]
print(filtered_df)

     EntityId    CoordsX   CoordsY  CoordsZ       VelX       VelY  VelZ  \
357         1  43.007685  1.426259      0.0  24.235677 -40.817931   0.0   

         AccelX      AccelY  AccelZ Position  JerseyNum Team  Time  
357 -140.894198 -171.598383     0.0      NaN        NaN  NaN  1.64


def players_within_faceoff_circle(df, time_target, faceoff_center, faceoff_radius=15):
    """
    Return players within a faceoff circle at a given time.

    Parameters:
    - df: Dataframe containing player and puck data.
    - time_target: The specific time to filter on.
    - faceoff_center: Tuple (x, y) representing the center of the faceoff circle.
    - faceoff_radius: Radius of the faceoff circle (default is 15).
    
    Returns:
    - Dataframe with players within the faceoff circle at the given time.
    """
    
    # Initialize an empty list to store players within the faceoff circle
    players_in_circle = []

    # Iterate through each unique player ID in the dataset
    for player_id in df['EntityId'].unique():
        if player_id != 1:  # Exclude the puck
            # Extract player's data and find the position at the closest recorded timestamp to the desired time
            player_data = df[df['EntityId'] == player_id].reset_index(drop=True)
            closest_row = player_data.iloc[(player_data['Time'] - time_target).abs().idxmin()]

            # Calculate distance from the faceoff circle center
            distance = ((closest_row['CoordsX'] - faceoff_center[0]) ** 2 + 
                        (closest_row['CoordsY'] - faceoff_center[1]) ** 2) ** 0.5

            # If distance is within the radius, append to the list
            if distance <= faceoff_radius:
                players_in_circle.append(closest_row)

    # Convert the list to a dataframe
    return pd.DataFrame(players_in_circle)[['Time', 'EntityId', 'CoordsX', 'CoordsY']]

# Use the function
result = players_within_faceoff_circle(df, 1.64, (69, -22))
print(result)

     Time  EntityId    CoordsX    CoordsY
18  1.632     14086  58.757717 -25.834717
18  1.636      8044  68.926687 -11.563980


def players_close_to_point_line(df, time_target, point_coords, distance_threshold=10):
    """
    Return players close to the line formed by a given point and the puck's position at a specific time.

    Parameters:
    - df: Dataframe containing player and puck data.
    - time_target: The specific time to filter on.
    - point_coords: Tuple (x, y) representing the given point.
    - distance_threshold: Distance threshold from the line (default is 10).
    
    Returns:
    - Dataframe with players close to the line at the given time.
    """
    
    # Get puck coordinates at the exact time_target
    puck_data_at_time = df[df['EntityId'] == 1]
    closest_time_index = (puck_data_at_time['Time'] - time_target).abs().idxmin()
    closest_puck_row = puck_data_at_time.loc[closest_time_index]
    puck_coords = closest_puck_row[['CoordsX', 'CoordsY']]

    # Calculate A, B, and C for the line equation
    A = point_coords[1] - puck_coords['CoordsY']
    B = puck_coords['CoordsX'] - point_coords[0]
    C = (point_coords[0] * puck_coords['CoordsY']) - (puck_coords['CoordsX'] * point_coords[1])
    
    # Iterate through each player's data and find the row with the closest timestamp to time_target
    close_players = []
    for player_id in df['EntityId'].unique():
        if player_id != 1:  # Exclude the puck
            player_data = df[df['EntityId'] == player_id].reset_index(drop=True)  # Resetting the index
            closest_row = player_data.iloc[(player_data['Time'] - time_target).abs().idxmin()]
            
            # If the player is not a goalie, calculate the distance to the line
            if closest_row['Position'] != 'G':
                distance = abs(A * closest_row['CoordsX'] + B * closest_row['CoordsY'] + C) / np.sqrt(A**2 + B**2)
                if distance <= distance_threshold:
                    close_players.append(closest_row)

    # Convert the list of close players to a dataframe
    return pd.DataFrame(close_players)[['EntityId', 'CoordsX', 'CoordsY', 'Time']]

# Use the function
result = players_close_to_point_line(df, 1.64, (89, 0))
print(result)

    EntityId    CoordsX   CoordsY   Time
19      8021  77.855679  7.552948  1.652
19     14077  38.357055  4.038366  1.664
19     14038  80.777729 -0.470452  1.680
18      8028  57.145263 -1.350225  1.612
18     14021  69.023993  0.480203  1.620
18      8040  61.840918  3.551868  1.628


def expected_puck_receive_time(df, player_id, proximity_threshold=3):
    """
    Determine the time a player is expected to receive the puck.

    Parameters:
    - df: Dataframe containing player and puck data.
    - player_id: ID of the player of interest.
    - proximity_threshold: Direct distance threshold between the player and the puck (default is 2).
    
    Returns:
    - Time at which the player is expected to receive the puck.
    """
    PUCK_ID = 1

    # Extract player and puck data
    player_data = df[df['EntityId'] == player_id].copy()
    puck_data = df[df['EntityId'] == PUCK_ID].copy()

    received_puck_time = None

    for idx, row in player_data.iterrows():
        # Find the closest timestamp for the puck using the current player timestamp
        closest_puck_row = puck_data.iloc[(puck_data['Time'] - row['Time']).abs().argsort()[:1]].iloc[0]
        
        # Calculate the distance between the player and puck for the timestamp
        distance = np.sqrt((row['CoordsX'] - closest_puck_row['CoordsX'])**2 + 
                           (row['CoordsY'] - closest_puck_row['CoordsY'])**2)
        
        if distance <= proximity_threshold:
            received_puck_time = row['Time']
            break

    if received_puck_time:
        return round(received_puck_time, 2)
    else:
        return None

# Sample usage
time_received = expected_puck_receive_time(df, 14086)
if time_received:
    print(f"Expected to receive puck at: {time_received} seconds")
else:
    print("Player is not expected to receive the puck within the given thresholds.")

Expected to receive puck at: 2.32 seconds


# print(player_data.head())
# print(puck_data.head())


# print(close_points.head())


def predict_puck_crossing(df, time_target, entity_id=1, goal_x=89, net_y_bounds=(-3, 3), net_z_bounds=(0, 4), time_tolerance=0.01):
    """
    Predict the puck's crossing point at the given goal x-coordinate based on its velocity and position at a specified time.

    Parameters:
    - df: Dataframe containing puck data.
    - time_target: The target time for the puck's data.
    - entity_id: ID of the puck (default is 1).
    - goal_x: x-coordinate of the goal line (default is 89).
    - net_y_bounds: y-coordinate boundaries of the net (default is (-3, 3)).
    - net_z_bounds: z-coordinate boundaries of the net (default is (0, 4)).
    - time_tolerance: Tolerance around the target time (default is 0.01).

    Returns:
    - Prints the expected crossing point and whether it would be on the net.
    """
    
    # Filter the puck data for a range around the target time
    puck_data_range = df[(df['EntityId'] == entity_id) & 
                         (df['Time'] >= time_target - time_tolerance) &
                         (df['Time'] <= time_target + time_tolerance)].sort_values(by='Time')
    
    # Find the puck data at the exact target time
    puck_data = puck_data_range.iloc[(puck_data_range['Time'] - time_target).abs().argsort()[:1]].iloc[0]
    
    x1, y1, z1 = puck_data['CoordsX'], puck_data['CoordsY'], puck_data['CoordsZ']
    vx, vy, vz = puck_data['VelX'], puck_data['VelY'], puck_data['VelZ']
    ax, ay, az = puck_data['AccelX'], puck_data['AccelY'], puck_data['AccelZ']
    
    # Calculate time it would take for the puck to reach goal_x
    t = (goal_x - x1) / vx
    
    # Update velocities using accelerations
    vx += ax * t
    vy += ay * t
    vz += az * t
    
    # Predict the puck's position using updated velocities
    y_cross = y1 + vy * t
    z_cross = z1 + vz * t
    
    # Check if the puck would be on net
    on_net_y = net_y_bounds[0] <= y_cross <= net_y_bounds[1]
    on_net_z = net_z_bounds[0] <= z_cross <= net_z_bounds[1]
    on_net = on_net_y and on_net_z
    
    print(f"Expected crossing point: CoordsX = {goal_x}, CoordsY = {y_cross:.2f}, CoordsZ = {z_cross:.2f}")
    print(f"Y-coordinate on net? {'Yes' if on_net_y else 'No'}")
    print(f"Z-coordinate on net? {'Yes' if on_net_z else 'No'}")
    print(f"Would it be on net overall? {'Yes' if on_net else 'No'}")

# Usage example
predict_puck_crossing(df, 2.424)

Expected crossing point: CoordsX = 89, CoordsY = -9.68, CoordsZ = 10.60
Y-coordinate on net? No
Z-coordinate on net? No
Would it be on net overall? No


def predict_puck_crossing(df, time_target, num_samples=5, entity_id=1, goal_x=89, net_y_bounds=(-3, 3), net_z_bounds=(0, 4)):
    """
    ... [same docstring]
    """
    
    # Get the data points leading up to the target time
    puck_data = df[(df['EntityId'] == entity_id) & 
                   (df['Time'] <= time_target)].tail(num_samples)
    
    X = puck_data['Time'].values.reshape(-1, 1)
    
    # Fit models for CoordsY and CoordsZ
    y_model = LinearRegression().fit(X, puck_data['CoordsY'])
    z_model = LinearRegression().fit(X, puck_data['CoordsZ'])
    
    # Calculate the time it would take for the puck to reach the goal based on the latest velocity
    time_to_goal = (goal_x - puck_data['CoordsX'].iloc[-1]) / puck_data['VelX'].iloc[-1]
    predict_time = time_target + time_to_goal
    
    # Predict the puck's position using the models
    y_cross = y_model.predict([[predict_time]])[0]
    z_cross = z_model.predict([[predict_time]])[0]
    
    # Check if the puck would be on net
    on_net_y = net_y_bounds[0] <= y_cross <= net_y_bounds[1]
    on_net_z = net_z_bounds[0] <= z_cross <= net_z_bounds[1]
    on_net = on_net_y and on_net_z
    
    print(f"Expected crossing point: CoordsX = {goal_x}, CoordsY = {y_cross:.2f}, CoordsZ = {z_cross:.2f}")
    print(f"Y-coordinate on net? {'Yes' if on_net_y else 'No'}")
    print(f"Z-coordinate on net? {'Yes' if on_net_z else 'No'}")
    print(f"Would it be on net overall? {'Yes' if on_net else 'No'}")

# Usage example
predict_puck_crossing(df, 2.424)

Expected crossing point: CoordsX = 89, CoordsY = -4.37, CoordsZ = 2.07
Y-coordinate on net? No
Z-coordinate on net? Yes
Would it be on net overall? No


def individual_area_covered(df, team='B'):
    """
    Calculate the areas covered by each individual player of a specified team.
    Identify the players who covered maximum and minimum areas.

    Parameters:
    - df: DataFrame containing player data.
    - team: Team for which the areas need to be calculated (default is 'B').

    Returns:
    - Prints the EntityId of the player with the maximum and minimum non-zero area coverage.
    """
    
    team_data = df[(df['Team'] == team) & (df['Position'] != 'G') & (df['EntityId'] != 1)]  # Filtering out the goalies
    player_ids = team_data['EntityId'].unique()
    
    areas = {}
    
    for player_id in player_ids:
        player_positions = team_data[team_data['EntityId'] == player_id][['CoordsX', 'CoordsY']].values
        
        # If there are 3 or more unique positions (required to form a polygon), compute the area
        if len(player_positions) >= 3:
            try:
                hull = ConvexHull(player_positions)
                areas[player_id] = hull.volume
            except:
                areas[player_id] = 0
        else:
            areas[player_id] = 0

    # Extracting players with maximum and minimum non-zero areas
    max_area_player = max(areas, key=areas.get)
    
    # Filtering players with an area greater than a small threshold
    min_area_non_zero_players = [player for player, area in areas.items() if area > 0.2]

    if min_area_non_zero_players:
        min_area_player = min(min_area_non_zero_players, key=areas.get)
    else:
        min_area_player = None

    print(f"Player with EntityId {max_area_player} has covered the maximum area of {areas[max_area_player]:.2f} feet^2.")
    
    if min_area_player:
        print(f"Player with EntityId {min_area_player} has covered the minimum non-zero area of {areas[min_area_player]:.2f} feet^2.")
    else:
        print("No player has covered a non-zero area.")

# Sample usage
individual_area_covered(df)

Player with EntityId 8028 has covered the maximum area of 81.89 feet^2.
Player with EntityId 8044 has covered the minimum non-zero area of 6.65 feet^2.


#Total Area
def calculate_team_area(df, team='B', threshold_velocity=0.7):
    """
    Calculate the area covered by active players of a specified team, excluding goalies.

    Parameters:
    - df: DataFrame containing player data.
    - team: Team for which the area needs to be calculated (default is 'B').
    - threshold_velocity: Minimum average velocity to consider a player as active (default is 0.5).

    Returns:
    - Prints the total area covered by active players of the team.
    """
    
    # Set time bounds for the entire play
    start_time = df['Time'].min()
    end_time = df['Time'].max()

    # Filter the dataframe for the relevant team and time window, excluding goalies and the puck
    team_data = df[(df['Team'] == team) & (df['EntityId'] != 1) & (df['Position'] != 'G') 
                   & (df['Time'] >= start_time) & (df['Time'] <= end_time)]
    
    # Filter players based on their average velocity over the entire play
    avg_velocities = team_data.groupby('EntityId').apply(lambda x: np.sqrt(x['VelX']**2 + x['VelY']**2).mean())
    active_players = avg_velocities[avg_velocities > threshold_velocity].index
    
    # Filter team data for active players
    active_team_data = team_data[team_data['EntityId'].isin(active_players)]
    print(avg_velocities)
    # Determine the convex hull for the active players over the entire play
    if len(active_team_data) >= 3:  # At least 3 points required for a convex hull
        hull = ConvexHull(active_team_data[['CoordsX', 'CoordsY']].values)
        area = hull.volume
    else:
        area = 0  # Cannot form a convex hull with less than 3 points

    print(f"Total area covered by active players on team {team} (excluding goalie): {area:.2f} feet^2")

# Sample usage:
calculate_team_area(df, team='B')

EntityId
8008    0.127664
8014    0.158226
8017    0.091376
8020    0.081666
8021    4.515679
8022    0.034872
8026    0.006275
8027    0.208694
8028    8.261210
8032    0.061081
8040    5.209823
8044    3.264906
8054    0.042345
8055    0.579712
8068    0.171085
8071    0.153664
8072    0.599163
8077    0.320985
dtype: float64
Total area covered by active players on team B (excluding goalie): 509.33 feet^2

	EntityId	CoordsX	CoordsY	CoordsZ	VelX	VelY	VelZ	AccelX	AccelY	AccelZ	JerseyNum	Time
count	866.000000	865.000000	865.000000	864.000000	865.000000	865.000000	863.000000	865.000000	865.000000	863.000000	616.000000	866.000000
mean	7998.729792	47.311950	-0.926946	4.077169	5.239159	-0.972334	0.010439	-0.414187	-2.498931	-0.623657	49.756494	1.995210
std	5694.952952	42.097904	22.438681	2.096739	11.051092	14.651737	4.467737	175.117528	243.649558	242.359950	26.844917	1.156147
min	1.000000	-87.531334	-47.490429	0.000000	-7.961270	-41.830900	-39.766704	-2744.232688	-3424.438536	-2052.675233	7.000000	0.000000
25%	1.000000	35.592000	-15.061167	4.148106	-0.003631	-2.863803	-0.331370	-5.282626	-10.459871	-3.507098	28.000000	0.997000
50%	8040.000000	60.169978	-1.378924	4.933929	2.195820	-0.006234	0.000000	0.001555	-0.087746	0.000000	40.000000	1.994000
75%	14038.000000	75.173162	4.153883	5.307729	6.775000	3.869563	0.309666	6.153132	4.739852	2.467336	77.000000	2.999000
max	14091.000000	86.891065	46.602153	7.263885	104.454824	81.638760	22.686833	1766.445784	3112.183039	2452.579493	91.000000	3.996000

NHL Gameplay Analysis Using Python¶

Project Highlights:¶

Key Learnings:¶

Question 1: Give a detailed description of what is happening in the play.¶

Question 1:¶

Further EDA¶

Question 2:¶

At time = 1.64, which players do you expect to be in the right faceoff circle in the offensive zone (circle centered at (CoordsX,CoordsY)=(69,-22))? What woould their coordinates (CoordsX, CoordsY) be?¶

Question 3:¶

At time = 1.64, which players (not including the goalies) do you expect to be within 10 feet of the straight line connecting the puck to the point (CoordsX, CoordsY)=(89,0)? What would their coordinates (CoordsX, CoordsY) be?¶

Question 4:¶

At what time do you expect the player EntityId=14086 to have received the puck?¶

Question 5:¶

Question 6:¶

What is the maximum area and minimum area covered by the players on team B not including team B's goalie (in feet^2)? We define the area as the portion of the ice enclosed by the players on team B.¶

	EntityId	CoordsX	CoordsY	CoordsZ	VelX	VelY	VelZ	AccelX	AccelY	AccelZ	Position	JerseyNum	Team	Time
0	8021	73.272591	2.351175	5.243467	6.023007	3.959640	-0.310185	-4.205320	-2.372893	-3.277896	D	21.0	B	0.000
1	14077	29.869501	-24.063186	4.822356	8.805199	11.446608	1.473480	7.723395	-2.324801	-0.313997	D	77.0	A	0.004
2	1	32.768152	-20.286515	0.000000	6.004303	14.086539	0.000000	-67.633571	59.767687	0.000000	NaN	NaN	NaN	0.008
3	14088	-86.818391	-0.040823	5.782036	-0.757721	0.633295	0.212986	3.618049	-7.766732	1.938287	G	88.0	A	0.016
4	1	32.848668	-20.058244	0.000000	5.032239	14.266853	0.000000	-60.753913	11.269588	0.000000	NaN	NaN	NaN	0.024
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
861	8040	73.650126	-1.378924	5.232279	4.251730	-2.838572	-1.161895	-2.971638	-1.488420	-14.729052	RW	40.0	B	3.980
862	8022	13.436602	45.815682	5.154806	-0.079027	-0.093668	0.055335	-0.081229	-0.085687	0.057494	RW	22.0	B	3.984
863	14086	74.073498	-28.027884	4.674300	10.829922	-4.591188	0.463799	-4.546898	-17.861194	7.867935	RW	86.0	A	3.988
864	1	85.200855	-37.395350	0.323678	-2.525309	-0.907093	-9.207382	-192.886119	390.098348	-1130.737522	NaN	NaN	NaN	3.992
865	8044	74.960319	-16.304413	5.487128	6.808158	-4.734654	1.006715	9.633872	-6.497119	0.930741	D	44.0	B	3.996