Tools Utilized: Python, Pandas, NumPy, Matplotlib, Seaborn, SciPy, Sympy
Gained a deepened understanding of data visualization techniques, spatial data analysis, and real-world application of geometric algorithms to sports analytics.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.patches import Arc, Circle, FancyBboxPatch
from matplotlib.lines import Line2D
from scipy.spatial import ConvexHull, distance
from sklearn.linear_model import LinearRegression
# Read the CSV file into a pandas DataFrame
df = pd.read_csv('take_home.csv')
df
EntityId | CoordsX | CoordsY | CoordsZ | VelX | VelY | VelZ | AccelX | AccelY | AccelZ | Position | JerseyNum | Team | Time | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 8021 | 73.272591 | 2.351175 | 5.243467 | 6.023007 | 3.959640 | -0.310185 | -4.205320 | -2.372893 | -3.277896 | D | 21.0 | B | 0.000 |
1 | 14077 | 29.869501 | -24.063186 | 4.822356 | 8.805199 | 11.446608 | 1.473480 | 7.723395 | -2.324801 | -0.313997 | D | 77.0 | A | 0.004 |
2 | 1 | 32.768152 | -20.286515 | 0.000000 | 6.004303 | 14.086539 | 0.000000 | -67.633571 | 59.767687 | 0.000000 | NaN | NaN | NaN | 0.008 |
3 | 14088 | -86.818391 | -0.040823 | 5.782036 | -0.757721 | 0.633295 | 0.212986 | 3.618049 | -7.766732 | 1.938287 | G | 88.0 | A | 0.016 |
4 | 1 | 32.848668 | -20.058244 | 0.000000 | 5.032239 | 14.266853 | 0.000000 | -60.753913 | 11.269588 | 0.000000 | NaN | NaN | NaN | 0.024 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
861 | 8040 | 73.650126 | -1.378924 | 5.232279 | 4.251730 | -2.838572 | -1.161895 | -2.971638 | -1.488420 | -14.729052 | RW | 40.0 | B | 3.980 |
862 | 8022 | 13.436602 | 45.815682 | 5.154806 | -0.079027 | -0.093668 | 0.055335 | -0.081229 | -0.085687 | 0.057494 | RW | 22.0 | B | 3.984 |
863 | 14086 | 74.073498 | -28.027884 | 4.674300 | 10.829922 | -4.591188 | 0.463799 | -4.546898 | -17.861194 | 7.867935 | RW | 86.0 | A | 3.988 |
864 | 1 | 85.200855 | -37.395350 | 0.323678 | -2.525309 | -0.907093 | -9.207382 | -192.886119 | 390.098348 | -1130.737522 | NaN | NaN | NaN | 3.992 |
865 | 8044 | 74.960319 | -16.304413 | 5.487128 | 6.808158 | -4.734654 | 1.006715 | 9.633872 | -6.497119 | 0.930741 | D | 44.0 | B | 3.996 |
866 rows × 14 columns
df.describe()
EntityId | CoordsX | CoordsY | CoordsZ | VelX | VelY | VelZ | AccelX | AccelY | AccelZ | JerseyNum | Time | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 866.000000 | 865.000000 | 865.000000 | 864.000000 | 865.000000 | 865.000000 | 863.000000 | 865.000000 | 865.000000 | 863.000000 | 616.000000 | 866.000000 |
mean | 7998.729792 | 47.311950 | -0.926946 | 4.077169 | 5.239159 | -0.972334 | 0.010439 | -0.414187 | -2.498931 | -0.623657 | 49.756494 | 1.995210 |
std | 5694.952952 | 42.097904 | 22.438681 | 2.096739 | 11.051092 | 14.651737 | 4.467737 | 175.117528 | 243.649558 | 242.359950 | 26.844917 | 1.156147 |
min | 1.000000 | -87.531334 | -47.490429 | 0.000000 | -7.961270 | -41.830900 | -39.766704 | -2744.232688 | -3424.438536 | -2052.675233 | 7.000000 | 0.000000 |
25% | 1.000000 | 35.592000 | -15.061167 | 4.148106 | -0.003631 | -2.863803 | -0.331370 | -5.282626 | -10.459871 | -3.507098 | 28.000000 | 0.997000 |
50% | 8040.000000 | 60.169978 | -1.378924 | 4.933929 | 2.195820 | -0.006234 | 0.000000 | 0.001555 | -0.087746 | 0.000000 | 40.000000 | 1.994000 |
75% | 14038.000000 | 75.173162 | 4.153883 | 5.307729 | 6.775000 | 3.869563 | 0.309666 | 6.153132 | 4.739852 | 2.467336 | 77.000000 | 2.999000 |
max | 14091.000000 | 86.891065 | 46.602153 | 7.263885 | 104.454824 | 81.638760 | 22.686833 | 1766.445784 | 3112.183039 | 2452.579493 | 91.000000 | 3.996000 |
df.isna().sum().sort_values(ascending=False)
Position 250 JerseyNum 250 Team 250 VelZ 3 AccelZ 3 CoordsZ 2 CoordsX 1 CoordsY 1 VelX 1 VelY 1 AccelX 1 AccelY 1 EntityId 0 Time 0 dtype: int64
# Show rows with missing CoordsX values
print(df[df['CoordsX'].isna()])
# Drop those rows
df = df.dropna(subset=['CoordsX'])
EntityId CoordsX CoordsY CoordsZ VelX VelY VelZ AccelX AccelY \ 846 8054 NaN NaN NaN NaN NaN NaN NaN NaN AccelZ Position JerseyNum Team Time 846 NaN D 54.0 B 3.908
df.isna().sum().sort_values(ascending=False)
Position 250 JerseyNum 250 Team 250 VelZ 2 AccelZ 2 CoordsZ 1 EntityId 0 CoordsX 0 CoordsY 0 VelX 0 VelY 0 AccelX 0 AccelY 0 Time 0 dtype: int64
# # Check for outliers using IQR method
# def check_outliers(column):
# Q1 = df[column].quantile(0.25)
# Q3 = df[column].quantile(0.75)
# IQR = Q3 - Q1
# lower_boundary = Q1 - 1.5 * IQR
# upper_boundary = Q3 + 1.5 * IQR
# outliers = df[(df[column] < lower_boundary) | (df[column] > upper_boundary)]
# return outliers
# outliers_accelX = check_outliers('AccelX')
# outliers_accelY = check_outliers('AccelY')
# outliers_accelZ = check_outliers('AccelZ')
# print("Outliers in AccelX:\n", outliers_accelX)
# print("Outliers in AccelY:\n", outliers_accelY)
# print("Outliers in AccelZ:\n", outliers_accelZ)
# # Visualizing outliers using boxplots
# plt.figure(figsize=(10, 6))
# sns.boxplot(data=df)
# plt.title("Boxplot of AccelX, AccelY, and AccelZ")
# plt.show()
# Show rows with missing VelX values
print(df[df['VelX'].isna()])
# Drop those rows
df = df.dropna(subset=['VelX'])
Empty DataFrame Columns: [EntityId, CoordsX, CoordsY, CoordsZ, VelX, VelY, VelZ, AccelX, AccelY, AccelZ, Position, JerseyNum, Team, Time] Index: []
# Color mapping based on Team
team_colors = {
'A': 'blue',
'B': 'red',
'NaN': 'gray' # Using 'NaN' as a string, change this if your NaNs are actual NaN objects
}
# Adjust the team_colors mapping to provide a color for NaN or any other unmapped value.
colors = df['Team'].map(team_colors).fillna('gray')
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
scatter = ax.scatter(df['CoordsX'], df['CoordsY'], df['CoordsZ'], c=colors, marker='o')
legend_elements = [Line2D([0], [0], marker='o', color='w', label=team, markersize=10, markerfacecolor=team_colors.get(team, 'gray'))
for team in set(df['Team'])]
ax.legend(handles=legend_elements)
ax.set_xlabel('CoordsX')
ax.set_ylabel('CoordsY')
ax.set_zlabel('CoordsZ')
ax.set_title('3D Scatter plot of Entity Coordinates by Team')
plt.show()
# Function to draw NHL rink with goalie creases
def draw_nhl_rink(ax=None, color='gray'):
if ax is None:
fig, ax = plt.subplots(figsize=(10, 6))
ax.set_facecolor('azure')
# Rink boundary with rounded corners
rink = FancyBboxPatch((-100, -42.5), 200, 85, boxstyle="round,pad=0,rounding_size=23", edgecolor=color, facecolor='none', lw=2)
ax.add_patch(rink)
left_goal_line = plt.Line2D([-89, -89], [-40, 40], c=color, lw=2)
right_goal_line = plt.Line2D([89, 89], [-40, 40], c=color, lw=2)
left_blue_line = plt.Line2D([-25, -25], [-42.5, 42.5], c=color, lw=2)
right_blue_line = plt.Line2D([25, 25], [-42.5, 42.5], c=color, lw=2)
center_line = plt.Line2D([0, 0], [-42.5, 42.5], c=color, lw=2)
center_circle = plt.Circle((0, 0), 15, edgecolor=color, facecolor='none', lw=2)
# Goalie creases
left_crease = Arc((89, 0), width=16, height=16, angle=0, theta1=90, theta2=270, edgecolor=color, lw=2)
right_crease = Arc((-89, 0), width=16, height=16, angle=0, theta1=270, theta2=90, edgecolor=color, lw=2)
# Offensive and Defensive circles (Left and Right)
for x in [-69, 69]: # X-coordinates for the left and right circles
# Main circles
ax.add_patch(Circle((x, 22), 15, edgecolor=color, facecolor='none', lw=2))
ax.add_patch(Circle((x, -22), 15, edgecolor=color, facecolor='none', lw=2))
# Face-off dots
for y in [22, -22]: # Y-coordinates for top, center, bottom dots
ax.plot(x, y, 'o', color='green')
# ax.add_patch(rink_boundary)
ax.add_line(left_goal_line)
ax.add_line(right_goal_line)
ax.add_line(left_blue_line)
ax.add_line(right_blue_line)
ax.add_line(center_line)
ax.add_patch(center_circle)
ax.add_patch(left_crease)
ax.add_patch(right_crease)
ax.set_xlim(-105, 105)
ax.set_ylim(-47.5, 47.5)
ax.set_aspect('equal', 'box')
ax.axis('off')
return ax
# Draw the rink and scatter the data
ax = draw_nhl_rink()
colors = df['Team'].map(team_colors).fillna('gray')
scatter = ax.scatter(df['CoordsX'], df['CoordsY'], c=colors, alpha=0.6, edgecolors='w', linewidth=0.5)
# Legend
legend_elements = [Line2D([0], [0], marker='o', color='w', label='Puck' if team == 'nan' else team, markersize=10, markerfacecolor=team_colors.get(team, 'gray'))
for team in set(df['Team'].fillna('nan'))]
ax.legend(handles=legend_elements, loc='upper left')
plt.show()
# # For Portfolio Website Ignore
# def draw_nhl_rink(ax=None, color='gray'):
# if ax is None:
# fig, ax = plt.subplots(figsize=(10, 6))
# ax.set_facecolor('black') # Set axes background to black
# # Rink boundary with rounded corners
# rink = FancyBboxPatch((-100, -42.5), 200, 85, boxstyle="round,pad=0,rounding_size=23", edgecolor=color, facecolor='white', lw=2)
# ax.add_patch(rink)
# left_goal_line = plt.Line2D([-89, -89], [-40, 40], c=color, lw=2)
# right_goal_line = plt.Line2D([89, 89], [-40, 40], c=color, lw=2)
# left_blue_line = plt.Line2D([-25, -25], [-42.5, 42.5], c=color, lw=2)
# right_blue_line = plt.Line2D([25, 25], [-42.5, 42.5], c=color, lw=2)
# center_line = plt.Line2D([0, 0], [-42.5, 42.5], c=color, lw=2)
# center_circle = plt.Circle((0, 0), 15, edgecolor=color, facecolor='none', lw=2)
# left_crease = Arc((89, 0), width=16, height=16, angle=0, theta1=90, theta2=270, edgecolor=color, lw=2)
# right_crease = Arc((-89, 0), width=16, height=16, angle=0, theta1=270, theta2=90, edgecolor=color, lw=2)
# # Offensive and Defensive circles (Left and Right)
# for x in [-69, 69]: # X-coordinates for the left and right circles
# ax.add_patch(Circle((x, 22), 15, edgecolor=color, facecolor='none', lw=2))
# ax.add_patch(Circle((x, -22), 15, edgecolor=color, facecolor='none', lw=2))
# for y in [22, -22]: # Y-coordinates for top and bottom dots
# ax.plot(x, y, 'o', color='green')
# ax.add_line(left_goal_line)
# ax.add_line(right_goal_line)
# ax.add_line(left_blue_line)
# ax.add_line(right_blue_line)
# ax.add_line(center_line)
# ax.add_patch(center_circle)
# ax.add_patch(left_crease)
# ax.add_patch(right_crease)
# ax.set_xlim(-105, 105)
# ax.set_ylim(-47.5, 47.5)
# ax.set_aspect('equal', 'box')
# ax.axis('off')
# return ax
# # For Portfolio Website
# # Calculate the figure size in inches
# desired_width_px, desired_height_px = 500, 500
# dpi = 100 # Adjust this value as needed
# fig_width_in = desired_width_px / dpi
# fig_height_in = desired_height_px / dpi
# fig, ax = plt.subplots(figsize=(fig_width_in, fig_height_in))
# ax = draw_nhl_rink(ax=ax) # Using the existing 'ax'
# colors = df['Team'].map(team_colors).fillna('gray')
# scatter = ax.scatter(df['CoordsX'], df['CoordsY'], c=colors, alpha=0.6, edgecolors='w', linewidth=0.5)
# # Legend
# legend_elements = [Line2D([0], [0], marker='o', color='w', label='Puck' if team == 'nan' else team, markersize=10, markerfacecolor=team_colors.get(team, 'gray'))
# for team in set(df['Team'].fillna('nan'))]
# ax.legend(handles=legend_elements, loc='upper left')
# plt.tight_layout()
# plt.savefig('rink_image.png', dpi=dpi) # Save the figure with a specific DPI
# plt.show()
# For Portfolio Website
# Calculate the figure size in inches
desired_width_px, desired_height_px = 500, 500
dpi = 100 # Adjust this value as needed
fig_width_in = desired_width_px / dpi
fig_height_in = desired_height_px / dpi
fig, ax = plt.subplots(figsize=(fig_width_in, fig_height_in))
ax = draw_nhl_rink(ax=ax) # Using the existing 'ax'
# Set only the figure's background color to black
fig.set_facecolor('black')
colors = df['Team'].map(team_colors).fillna('gray')
scatter = ax.scatter(df['CoordsX'], df['CoordsY'], c=colors, alpha=0.6, edgecolors='w', linewidth=0.5)
# Legend
legend_elements = [Line2D([0], [0], marker='o', color='w', label='Puck' if team == 'nan' else team, markersize=10, markerfacecolor=team_colors.get(team, 'gray'))
for team in set(df['Team'].fillna('nan'))]
ax.legend(handles=legend_elements, loc='upper left')
plt.tight_layout()
plt.savefig('rink_image.png', dpi=dpi) # Save the figure with a specific DPI
plt.show()
def plot_entity_coords_over_time(df, entity_id, coord='CoordsZ'):
"""
Plot a specified coordinate for a given EntityId over time.
Parameters:
- df: Dataframe containing entity data.
- entity_id: ID of the entity to be plotted.
- coord: The coordinate to be plotted (default is 'CoordsZ').
Returns:
- Displays the plot.
"""
# Filter data for given EntityId
entity_data = df[df['EntityId'] == entity_id]
# Plotting
fig, ax = plt.subplots()
ax.plot(entity_data['Time'], entity_data[coord], marker='o', color='blue', linestyle='-')
ax.set_xlabel('Time')
ax.set_ylabel(coord)
ax.set_title(f'{coord} for EntityId == {entity_id} over Time')
plt.show()
# Use the function
plot_entity_coords_over_time(df, 1, 'CoordsY')
# Use the function
plot_entity_coords_over_time(df, 1, 'CoordsZ')
During the power play, the blue team successfully entered the zone and cycled the puck back to the point. The defenseman then took charge, maneuvering the puck to the center of the ice and positioning himself as both a shooting threat and a pivotal passing option. Subsequently, he relayed the puck to the right wing, who seemed to briefly control it before initiating what, in my estimation, looked like either a snapshot or a shot pass. This shot ricocheted off a red team defenseman's hockey skate. Given that skates typically add "5-6 inches to one's height," as mentioned by rinkresult.com, the puck's Z-Coordinate around the release time (referenced from Question 5) stands at approximately 0.538010596, suggesting it was about 6 inches above the ground. The puck remained airborne for roughly 1.25 seconds after deflection off the player's skate. The absence of any ensuing puck battle implies the puck might have ended up in the net. Notably, the opposing winger was tardy in blocking the shot, prompting their defenseman to intervene. The bumper player moved closer to the net, anticipating a potential rebound, while another teammate screened the goalie.
0.5 ft above the starting point - why? NHL boards typically range from 40 to 48 inches in height. This means that in our scenario, the netting would commence at approximately 3.33 feet. There's a cluster of points at the CoordsZ = 6 that I'd like to explore further to determine if the puck contacted the netting.
# Filter data for EntityId == 1
entity_data = df[df['EntityId'] == 1]
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
# Plotting Z coordinate of EntityId == 1
scatter = ax.scatter(entity_data['CoordsX'], entity_data['CoordsY'], entity_data['CoordsZ'], c='blue', marker='o')
ax.set_xlabel('CoordsX')
ax.set_ylabel('CoordsY')
ax.set_zlabel('CoordsZ')
ax.set_title('3D Scatter plot of Z Coordinate for EntityId == 1')
plt.show()
nearby_data = df[(df['Time'] > 1.62) & (df['Time'] < 1.66)]
print(nearby_data)
EntityId CoordsX CoordsY CoordsZ VelX VelY VelZ \ 353 1 42.619913 2.079347 0.000000 26.489989 -38.072351 0.000000 354 8040 61.840918 3.551868 4.682931 3.228459 1.288098 -0.295262 355 14086 58.757717 -25.834717 4.981964 6.318514 2.240820 -0.970498 356 8044 68.926687 -11.563980 4.845452 0.936971 0.978526 -0.199894 357 1 43.007685 1.426259 0.000000 24.235677 -40.817931 0.000000 358 8034 84.723458 0.953518 5.198738 0.146683 1.368064 -1.008279 359 8021 77.855679 7.552948 5.077283 1.210059 4.771175 -0.190689 360 1 43.421595 0.756963 0.000000 25.869335 -41.830900 0.000000 AccelX AccelY AccelZ Position JerseyNum Team Time 353 -17.203639 -115.498206 0.000000 NaN NaN NaN 1.624 354 -4.180771 -2.751472 0.567719 RW 40.0 B 1.628 355 -1.390160 -6.732454 -11.200774 RW 86.0 A 1.632 356 -4.857288 -5.600645 -13.855313 D 44.0 B 1.636 357 -140.894198 -171.598383 0.000000 NaN NaN NaN 1.640 358 -1.341155 -8.714046 -3.177618 G 34.0 B 1.648 359 -1.639178 -1.736494 -4.530988 D 21.0 B 1.652 360 102.103401 -63.310421 0.000000 NaN NaN NaN 1.656
tolerance=0.5
Coords = df[(df['CoordsX'] >= 69 - tolerance) & (df['Time'] <= 69 + tolerance)]
print(Coords)
EntityId CoordsX CoordsY CoordsZ VelX VelY VelZ \ 0 8021 73.272591 2.351175 5.243467 6.023007 3.959640 -0.310185 5 14038 77.234042 -7.618020 5.305041 3.886750 1.031621 -0.282560 14 8044 71.622231 -13.645578 5.511122 -2.468628 5.033422 0.397189 16 8034 85.024377 -1.279169 5.597798 -0.833619 0.354787 -0.500028 18 8021 73.748806 2.615554 5.215993 5.669217 3.147376 -0.327071 .. ... ... ... ... ... ... ... 860 1 85.241260 -37.380837 0.470997 0.560875 -7.148680 8.884455 861 8040 73.650126 -1.378924 5.232279 4.251730 -2.838572 -1.161895 863 14086 74.073498 -28.027884 4.674300 10.829922 -4.591188 0.463799 864 1 85.200855 -37.395350 0.323678 -2.525309 -0.907093 -9.207382 865 8044 74.960319 -16.304413 5.487128 6.808158 -4.734654 1.006715 AccelX AccelY AccelZ Position JerseyNum Team Time 0 -4.205320 -2.372893 -3.277896 D 21.0 B 0.000 5 1.796894 5.944007 4.509041 LW 38.0 A 0.028 14 -4.444613 5.559126 8.421545 D 44.0 B 0.068 16 -3.570760 -2.299714 -21.327963 G 34.0 B 0.076 18 -4.211769 -9.669797 -0.201023 D 21.0 B 0.084 .. ... ... ... ... ... ... ... 860 260.087755 -370.457649 1032.800889 NaN NaN NaN 3.976 861 -2.971638 -1.488420 -14.729052 RW 40.0 B 3.980 863 -4.546898 -17.861194 7.867935 RW 86.0 A 3.988 864 -192.886119 390.098348 -1130.737522 NaN NaN NaN 3.992 865 9.633872 -6.497119 0.930741 D 44.0 B 3.996 [346 rows x 14 columns]
tolerance = 0.0001
filtered_df = df[(df['Time'] >= 1.640 - tolerance) & (df['Time'] <= 1.640 + tolerance)]
print(filtered_df)
EntityId CoordsX CoordsY CoordsZ VelX VelY VelZ \ 357 1 43.007685 1.426259 0.0 24.235677 -40.817931 0.0 AccelX AccelY AccelZ Position JerseyNum Team Time 357 -140.894198 -171.598383 0.0 NaN NaN NaN 1.64
def players_within_faceoff_circle(df, time_target, faceoff_center, faceoff_radius=15):
"""
Return players within a faceoff circle at a given time.
Parameters:
- df: Dataframe containing player and puck data.
- time_target: The specific time to filter on.
- faceoff_center: Tuple (x, y) representing the center of the faceoff circle.
- faceoff_radius: Radius of the faceoff circle (default is 15).
Returns:
- Dataframe with players within the faceoff circle at the given time.
"""
# Initialize an empty list to store players within the faceoff circle
players_in_circle = []
# Iterate through each unique player ID in the dataset
for player_id in df['EntityId'].unique():
if player_id != 1: # Exclude the puck
# Extract player's data and find the position at the closest recorded timestamp to the desired time
player_data = df[df['EntityId'] == player_id].reset_index(drop=True)
closest_row = player_data.iloc[(player_data['Time'] - time_target).abs().idxmin()]
# Calculate distance from the faceoff circle center
distance = ((closest_row['CoordsX'] - faceoff_center[0]) ** 2 +
(closest_row['CoordsY'] - faceoff_center[1]) ** 2) ** 0.5
# If distance is within the radius, append to the list
if distance <= faceoff_radius:
players_in_circle.append(closest_row)
# Convert the list to a dataframe
return pd.DataFrame(players_in_circle)[['Time', 'EntityId', 'CoordsX', 'CoordsY']]
# Use the function
result = players_within_faceoff_circle(df, 1.64, (69, -22))
print(result)
Time EntityId CoordsX CoordsY 18 1.632 14086 58.757717 -25.834717 18 1.636 8044 68.926687 -11.563980
def players_close_to_point_line(df, time_target, point_coords, distance_threshold=10):
"""
Return players close to the line formed by a given point and the puck's position at a specific time.
Parameters:
- df: Dataframe containing player and puck data.
- time_target: The specific time to filter on.
- point_coords: Tuple (x, y) representing the given point.
- distance_threshold: Distance threshold from the line (default is 10).
Returns:
- Dataframe with players close to the line at the given time.
"""
# Get puck coordinates at the exact time_target
puck_data_at_time = df[df['EntityId'] == 1]
closest_time_index = (puck_data_at_time['Time'] - time_target).abs().idxmin()
closest_puck_row = puck_data_at_time.loc[closest_time_index]
puck_coords = closest_puck_row[['CoordsX', 'CoordsY']]
# Calculate A, B, and C for the line equation
A = point_coords[1] - puck_coords['CoordsY']
B = puck_coords['CoordsX'] - point_coords[0]
C = (point_coords[0] * puck_coords['CoordsY']) - (puck_coords['CoordsX'] * point_coords[1])
# Iterate through each player's data and find the row with the closest timestamp to time_target
close_players = []
for player_id in df['EntityId'].unique():
if player_id != 1: # Exclude the puck
player_data = df[df['EntityId'] == player_id].reset_index(drop=True) # Resetting the index
closest_row = player_data.iloc[(player_data['Time'] - time_target).abs().idxmin()]
# If the player is not a goalie, calculate the distance to the line
if closest_row['Position'] != 'G':
distance = abs(A * closest_row['CoordsX'] + B * closest_row['CoordsY'] + C) / np.sqrt(A**2 + B**2)
if distance <= distance_threshold:
close_players.append(closest_row)
# Convert the list of close players to a dataframe
return pd.DataFrame(close_players)[['EntityId', 'CoordsX', 'CoordsY', 'Time']]
# Use the function
result = players_close_to_point_line(df, 1.64, (89, 0))
print(result)
EntityId CoordsX CoordsY Time 19 8021 77.855679 7.552948 1.652 19 14077 38.357055 4.038366 1.664 19 14038 80.777729 -0.470452 1.680 18 8028 57.145263 -1.350225 1.612 18 14021 69.023993 0.480203 1.620 18 8040 61.840918 3.551868 1.628
def expected_puck_receive_time(df, player_id, proximity_threshold=3):
"""
Determine the time a player is expected to receive the puck.
Parameters:
- df: Dataframe containing player and puck data.
- player_id: ID of the player of interest.
- proximity_threshold: Direct distance threshold between the player and the puck (default is 2).
Returns:
- Time at which the player is expected to receive the puck.
"""
PUCK_ID = 1
# Extract player and puck data
player_data = df[df['EntityId'] == player_id].copy()
puck_data = df[df['EntityId'] == PUCK_ID].copy()
received_puck_time = None
for idx, row in player_data.iterrows():
# Find the closest timestamp for the puck using the current player timestamp
closest_puck_row = puck_data.iloc[(puck_data['Time'] - row['Time']).abs().argsort()[:1]].iloc[0]
# Calculate the distance between the player and puck for the timestamp
distance = np.sqrt((row['CoordsX'] - closest_puck_row['CoordsX'])**2 +
(row['CoordsY'] - closest_puck_row['CoordsY'])**2)
if distance <= proximity_threshold:
received_puck_time = row['Time']
break
if received_puck_time:
return round(received_puck_time, 2)
else:
return None
# Sample usage
time_received = expected_puck_receive_time(df, 14086)
if time_received:
print(f"Expected to receive puck at: {time_received} seconds")
else:
print("Player is not expected to receive the puck within the given thresholds.")
Expected to receive puck at: 2.32 seconds
# print(player_data.head())
# print(puck_data.head())
# print(close_points.head())
def predict_puck_crossing(df, time_target, entity_id=1, goal_x=89, net_y_bounds=(-3, 3), net_z_bounds=(0, 4), time_tolerance=0.01):
"""
Predict the puck's crossing point at the given goal x-coordinate based on its velocity and position at a specified time.
Parameters:
- df: Dataframe containing puck data.
- time_target: The target time for the puck's data.
- entity_id: ID of the puck (default is 1).
- goal_x: x-coordinate of the goal line (default is 89).
- net_y_bounds: y-coordinate boundaries of the net (default is (-3, 3)).
- net_z_bounds: z-coordinate boundaries of the net (default is (0, 4)).
- time_tolerance: Tolerance around the target time (default is 0.01).
Returns:
- Prints the expected crossing point and whether it would be on the net.
"""
# Filter the puck data for a range around the target time
puck_data_range = df[(df['EntityId'] == entity_id) &
(df['Time'] >= time_target - time_tolerance) &
(df['Time'] <= time_target + time_tolerance)].sort_values(by='Time')
# Find the puck data at the exact target time
puck_data = puck_data_range.iloc[(puck_data_range['Time'] - time_target).abs().argsort()[:1]].iloc[0]
x1, y1, z1 = puck_data['CoordsX'], puck_data['CoordsY'], puck_data['CoordsZ']
vx, vy, vz = puck_data['VelX'], puck_data['VelY'], puck_data['VelZ']
ax, ay, az = puck_data['AccelX'], puck_data['AccelY'], puck_data['AccelZ']
# Calculate time it would take for the puck to reach goal_x
t = (goal_x - x1) / vx
# Update velocities using accelerations
vx += ax * t
vy += ay * t
vz += az * t
# Predict the puck's position using updated velocities
y_cross = y1 + vy * t
z_cross = z1 + vz * t
# Check if the puck would be on net
on_net_y = net_y_bounds[0] <= y_cross <= net_y_bounds[1]
on_net_z = net_z_bounds[0] <= z_cross <= net_z_bounds[1]
on_net = on_net_y and on_net_z
print(f"Expected crossing point: CoordsX = {goal_x}, CoordsY = {y_cross:.2f}, CoordsZ = {z_cross:.2f}")
print(f"Y-coordinate on net? {'Yes' if on_net_y else 'No'}")
print(f"Z-coordinate on net? {'Yes' if on_net_z else 'No'}")
print(f"Would it be on net overall? {'Yes' if on_net else 'No'}")
# Usage example
predict_puck_crossing(df, 2.424)
Expected crossing point: CoordsX = 89, CoordsY = -9.68, CoordsZ = 10.60 Y-coordinate on net? No Z-coordinate on net? No Would it be on net overall? No
def predict_puck_crossing(df, time_target, num_samples=5, entity_id=1, goal_x=89, net_y_bounds=(-3, 3), net_z_bounds=(0, 4)):
"""
... [same docstring]
"""
# Get the data points leading up to the target time
puck_data = df[(df['EntityId'] == entity_id) &
(df['Time'] <= time_target)].tail(num_samples)
X = puck_data['Time'].values.reshape(-1, 1)
# Fit models for CoordsY and CoordsZ
y_model = LinearRegression().fit(X, puck_data['CoordsY'])
z_model = LinearRegression().fit(X, puck_data['CoordsZ'])
# Calculate the time it would take for the puck to reach the goal based on the latest velocity
time_to_goal = (goal_x - puck_data['CoordsX'].iloc[-1]) / puck_data['VelX'].iloc[-1]
predict_time = time_target + time_to_goal
# Predict the puck's position using the models
y_cross = y_model.predict([[predict_time]])[0]
z_cross = z_model.predict([[predict_time]])[0]
# Check if the puck would be on net
on_net_y = net_y_bounds[0] <= y_cross <= net_y_bounds[1]
on_net_z = net_z_bounds[0] <= z_cross <= net_z_bounds[1]
on_net = on_net_y and on_net_z
print(f"Expected crossing point: CoordsX = {goal_x}, CoordsY = {y_cross:.2f}, CoordsZ = {z_cross:.2f}")
print(f"Y-coordinate on net? {'Yes' if on_net_y else 'No'}")
print(f"Z-coordinate on net? {'Yes' if on_net_z else 'No'}")
print(f"Would it be on net overall? {'Yes' if on_net else 'No'}")
# Usage example
predict_puck_crossing(df, 2.424)
Expected crossing point: CoordsX = 89, CoordsY = -4.37, CoordsZ = 2.07 Y-coordinate on net? No Z-coordinate on net? Yes Would it be on net overall? No
The analyses below EXCLUDE players on the bench and in the penalty box.
def individual_area_covered(df, team='B'):
"""
Calculate the areas covered by each individual player of a specified team.
Identify the players who covered maximum and minimum areas.
Parameters:
- df: DataFrame containing player data.
- team: Team for which the areas need to be calculated (default is 'B').
Returns:
- Prints the EntityId of the player with the maximum and minimum non-zero area coverage.
"""
team_data = df[(df['Team'] == team) & (df['Position'] != 'G') & (df['EntityId'] != 1)] # Filtering out the goalies
player_ids = team_data['EntityId'].unique()
areas = {}
for player_id in player_ids:
player_positions = team_data[team_data['EntityId'] == player_id][['CoordsX', 'CoordsY']].values
# If there are 3 or more unique positions (required to form a polygon), compute the area
if len(player_positions) >= 3:
try:
hull = ConvexHull(player_positions)
areas[player_id] = hull.volume
except:
areas[player_id] = 0
else:
areas[player_id] = 0
# Extracting players with maximum and minimum non-zero areas
max_area_player = max(areas, key=areas.get)
# Filtering players with an area greater than a small threshold
min_area_non_zero_players = [player for player, area in areas.items() if area > 0.2]
if min_area_non_zero_players:
min_area_player = min(min_area_non_zero_players, key=areas.get)
else:
min_area_player = None
print(f"Player with EntityId {max_area_player} has covered the maximum area of {areas[max_area_player]:.2f} feet^2.")
if min_area_player:
print(f"Player with EntityId {min_area_player} has covered the minimum non-zero area of {areas[min_area_player]:.2f} feet^2.")
else:
print("No player has covered a non-zero area.")
# Sample usage
individual_area_covered(df)
Player with EntityId 8028 has covered the maximum area of 81.89 feet^2. Player with EntityId 8044 has covered the minimum non-zero area of 6.65 feet^2.
#Total Area
def calculate_team_area(df, team='B', threshold_velocity=0.7):
"""
Calculate the area covered by active players of a specified team, excluding goalies.
Parameters:
- df: DataFrame containing player data.
- team: Team for which the area needs to be calculated (default is 'B').
- threshold_velocity: Minimum average velocity to consider a player as active (default is 0.5).
Returns:
- Prints the total area covered by active players of the team.
"""
# Set time bounds for the entire play
start_time = df['Time'].min()
end_time = df['Time'].max()
# Filter the dataframe for the relevant team and time window, excluding goalies and the puck
team_data = df[(df['Team'] == team) & (df['EntityId'] != 1) & (df['Position'] != 'G')
& (df['Time'] >= start_time) & (df['Time'] <= end_time)]
# Filter players based on their average velocity over the entire play
avg_velocities = team_data.groupby('EntityId').apply(lambda x: np.sqrt(x['VelX']**2 + x['VelY']**2).mean())
active_players = avg_velocities[avg_velocities > threshold_velocity].index
# Filter team data for active players
active_team_data = team_data[team_data['EntityId'].isin(active_players)]
print(avg_velocities)
# Determine the convex hull for the active players over the entire play
if len(active_team_data) >= 3: # At least 3 points required for a convex hull
hull = ConvexHull(active_team_data[['CoordsX', 'CoordsY']].values)
area = hull.volume
else:
area = 0 # Cannot form a convex hull with less than 3 points
print(f"Total area covered by active players on team {team} (excluding goalie): {area:.2f} feet^2")
# Sample usage:
calculate_team_area(df, team='B')
EntityId 8008 0.127664 8014 0.158226 8017 0.091376 8020 0.081666 8021 4.515679 8022 0.034872 8026 0.006275 8027 0.208694 8028 8.261210 8032 0.061081 8040 5.209823 8044 3.264906 8054 0.042345 8055 0.579712 8068 0.171085 8071 0.153664 8072 0.599163 8077 0.320985 dtype: float64 Total area covered by active players on team B (excluding goalie): 509.33 feet^2