sportowe_wizualizacja/PassClusters.ipynb

812 lines
213 KiB
Plaintext
Raw Normal View History

{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "PassClusters.ipynb",
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyPfMQt+YGiFxDzZsdceN8ls",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/koushikkirugulige/Football-Analytics/blob/master/PassClusters.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"id": "8HJxpCQF07Lo",
"colab_type": "code",
"outputId": "71b81ca6-8e11-41ae-bf43-fece6a8d4244",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 153
}
},
"source": [
"!git clone https://github.com/statsbomb/open-data.git"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
"Cloning into 'open-data'...\n",
"remote: Enumerating objects: 760, done.\u001b[K\n",
"remote: Counting objects: 100% (760/760), done.\u001b[K\n",
"remote: Compressing objects: 100% (367/367), done.\u001b[K\n",
"remote: Total 9482 (delta 640), reused 504 (delta 384), pack-reused 8722\u001b[K\n",
"Receiving objects: 100% (9482/9482), 991.55 MiB | 26.66 MiB/s, done.\n",
"Resolving deltas: 100% (8387/8387), done.\n",
"Checking out files: 100% (1648/1648), done.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "j4gtYmw71qLx",
"colab_type": "code",
"colab": {}
},
"source": [
"#5503\n",
"\n",
"#change this cell to change season and player name\n",
"season = '2009/2010'\n",
"ssn = '5503'\n",
"length = int(20)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "A2aofsrG1VvE",
"colab_type": "code",
"outputId": "e142d619-15ae-4a10-f68e-3bbc45dc567e",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
}
},
"source": [
"import os\n",
"import json\n",
"from pandas.io.json import json_normalize\n",
"import codecs\n",
"import pandas as pd\n",
"import codecs\n",
"main_df = pd.DataFrame(data=None)\n",
"path_match = \"/content/open-data/data/events/\" #location for play by play events\n",
"for root, dirs, files in os.walk(r'/content/open-data/data/matches'):\n",
" for file in files:\n",
" with open(os.path.join(root, file), \"r\") as auto:\n",
" with codecs.open(root + str('/') + file,encoding='utf-8') as data_file:\n",
" data = json.load(data_file)\n",
" df = pd.DataFrame(data=None)\n",
" df = json_normalize(data, sep = \"_\")\n",
" #for x in df.competition_country_name:\n",
" # if x == 'Spain':\n",
" # print(df.match_id)\n",
" for i in range(len(df)):\n",
" if df.iloc[i]['competition_country_name'] == 'Spain' and df.iloc[i]['season_season_name'] == season :\n",
" match_no = df.iloc[i]['match_id'] #gets match with Spain as country\n",
" match_no = str(match_no) # from int to str \n",
" with codecs.open(path_match + match_no + str(r'.json'),encoding=\"utf8\") as event_file: #open the respective file\n",
" df_match = json.load(event_file)\n",
" df_match2 = pd.DataFrame(data=None)\n",
" df_match2 = json_normalize(df_match,sep=\"_\") \n",
" \n",
" main_df = main_df.append(df_match2,ignore_index=True,sort=False) \n",
"print('Done')"
],
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"text": [
"Done\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "VDmUsMN74fmX",
"colab_type": "code",
"colab": {}
},
"source": [
"messi_df = main_df.query('type_id == 30 & player_id == 5503')"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "MwcFd8lx5dza",
"colab_type": "code",
"colab": {}
},
"source": [
"messi_df = messi_df[['location','pass_end_location','pass_height_name','pass_type_name','pass_body_part_name']]"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "z_jbQy5Q5uvW",
"colab_type": "code",
"outputId": "b6361e15-f95a-4b2a-fffc-fc35f3d83549",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
}
},
"source": [
"messi_df.head()"
],
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>location</th>\n",
" <th>pass_end_location</th>\n",
" <th>pass_height_name</th>\n",
" <th>pass_type_name</th>\n",
" <th>pass_body_part_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>[29.5, 55.8]</td>\n",
" <td>[33.1, 48.1]</td>\n",
" <td>Low Pass</td>\n",
" <td>NaN</td>\n",
" <td>Left Foot</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99</th>\n",
" <td>[64.7, 72.8]</td>\n",
" <td>[57.2, 74.5]</td>\n",
" <td>Ground Pass</td>\n",
" <td>NaN</td>\n",
" <td>Left Foot</td>\n",
" </tr>\n",
" <tr>\n",
" <th>111</th>\n",
" <td>[64.4, 73.7]</td>\n",
" <td>[54.1, 42.1]</td>\n",
" <td>Ground Pass</td>\n",
" <td>NaN</td>\n",
" <td>Left Foot</td>\n",
" </tr>\n",
" <tr>\n",
" <th>140</th>\n",
" <td>[80.6, 33.0]</td>\n",
" <td>[73.3, 43.8]</td>\n",
" <td>Ground Pass</td>\n",
" <td>NaN</td>\n",
" <td>Right Foot</td>\n",
" </tr>\n",
" <tr>\n",
" <th>155</th>\n",
" <td>[97.9, 46.4]</td>\n",
" <td>[115.5, 53.8]</td>\n",
" <td>Ground Pass</td>\n",
" <td>NaN</td>\n",
" <td>Left Foot</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" location pass_end_location ... pass_type_name pass_body_part_name\n",
"29 [29.5, 55.8] [33.1, 48.1] ... NaN Left Foot\n",
"99 [64.7, 72.8] [57.2, 74.5] ... NaN Left Foot\n",
"111 [64.4, 73.7] [54.1, 42.1] ... NaN Left Foot\n",
"140 [80.6, 33.0] [73.3, 43.8] ... NaN Right Foot\n",
"155 [97.9, 46.4] [115.5, 53.8] ... NaN Left Foot\n",
"\n",
"[5 rows x 5 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 6
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "3UI4LKrw61Jf",
"colab_type": "code",
"colab": {}
},
"source": [
"def draw_pitch(ax):\n",
" # focus on only half of the pitch\n",
" #Pitch Outline & Centre Line\n",
" Pitch = Rectangle([0,0], width = 120, height = 80, fill = False)\n",
" #Left, Right Penalty Area and midline\n",
" LeftPenalty = Rectangle([0,22.3], width = 14.6, height = 35.3, fill = False)\n",
" RightPenalty = Rectangle([105.4,22.3], width = 14.6, height = 35.3, fill = False)\n",
" midline = ConnectionPatch([60,0], [60,80], \"data\", \"data\")\n",
"\n",
" #Left, Right 6-yard Box\n",
" LeftSixYard = Rectangle([0,32], width = 4.9, height = 16, fill = False)\n",
" RightSixYard = Rectangle([115.1,32], width = 4.9, height = 16, fill = False)\n",
"\n",
"\n",
" #Prepare Circles\n",
" centreCircle = plt.Circle((60,40),8.1, fill = False)\n",
" centreSpot = plt.Circle((60,40),0.71)\n",
" #Penalty spots and Arcs around penalty boxes\n",
" leftPenSpot = plt.Circle((9.7,40),0.71)\n",
" rightPenSpot = plt.Circle((110.3,40),0.71)\n",
" leftArc = Arc((9.7,40),height=16.2,width=16.2,angle=0,theta1=310,theta2=50)\n",
" rightArc = Arc((110.3,40),height=16.2,width=16.2,angle=0,theta1=130,theta2=230)\n",
" \n",
" element = [Pitch, LeftPenalty, RightPenalty, midline, LeftSixYard, RightSixYard, centreCircle, \n",
" centreSpot, rightPenSpot, leftPenSpot, leftArc, rightArc]\n",
" for i in element:\n",
" ax.add_patch(i)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "75LxtmrS8MU6",
"colab_type": "code",
"outputId": "bbb6053b-41fb-4824-fbde-41591025b4aa",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 320
}
},
"source": [
"import matplotlib.pyplot as plt\n",
"from matplotlib.patches import Arc, Rectangle, ConnectionPatch\n",
"from matplotlib.offsetbox import OffsetImage\n",
"\n",
"fig=plt.figure() #set up the figures\n",
"fig.set_size_inches(7, 5)\n",
"ax=fig.add_subplot(1,1,1)\n",
"draw_pitch(ax) #overlay our different objects on the pitch\n",
"plt.ylim(-2, 82)\n",
"plt.xlim(-2, 122)\n",
"#plt.plot(x_axis,y_axis,'ro')\n",
"#plt.plot(x,y,'bo')\n",
"#plt.axis('off')\n",
"\n",
"for i in range(len(messi_df)):\n",
" # annotate draw an arrow from a current position to pass_end_location\n",
" ax.annotate(\"\", xy = (messi_df.iloc[i]['pass_end_location'][0], messi_df.iloc[i]['pass_end_location'][1]), xycoords = 'data',\n",
" xytext = (messi_df.iloc[i]['location'][0], messi_df.iloc[i]['location'][1]), textcoords = 'data',\n",
" arrowprops=dict(arrowstyle=\"->\",connectionstyle=\"arc3\", color = \"blue\"),)\n",
"\"\"\"\n",
"for i in range(len(assist)):\n",
" # annotate draw an arrow from a current position to pass_end_location\n",
" ax.annotate(\"\", xy = (assist.iloc[i]['pass_end_location'][0], assist.iloc[i]['pass_end_location'][1]), xycoords = 'data',\n",
" xytext = (assist.iloc[i]['location'][0], assist.iloc[i]['location'][1]), textcoords = 'data',\n",
" arrowprops=dict(arrowstyle=\"->\",connectionstyle=\"arc3\", color = \"red\"),)\n",
"\"\"\"\n",
"\n",
"plt.show()"
],
"execution_count": 8,
"outputs": [
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAasAAAEvCAYAAAD7MO6jAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nOxdeZyNZfu/nrPOcuacObOd2fcxBmMZ\nM9YY2UUJ9SKEUNlSUlSUipK0UUlakPR7XyUlRUlKiZTKni1LyG4Yxmzn/v3x7ep+znOeMzMy1Ps6\n1+fzfGbOs97r9b22+7oVIQT5yU9+8pOf/PRPJsPfXQA/+clPfvKTnyojP1j5yU9+8pOf/vHkBys/\n+clPfvLTP578YOUnP/nJT376x5MfrPzkJz/5yU//ePKDlZ/85Cc/+ekfT6Yr+bGIiAiRnJx8JT/p\nJz/5yU9++gfTDz/8cFwIEVnZfVcUrJKTk+n777+/kp/0k5/85Cc//YNJUZR9VbnPbwb0k5/85Cc/\n/eOpSmClKMo9iqJsURRls6Io7yiKEqAoSoqiKOsURdmlKMq/FUWxXO7C+slPfvKTn65OqhSsFEWJ\nI6K7iChXCFGHiIxE1IuIniKi54QQ6UR0iogGXc6C+slPfvKTn65eqqoZ0EREgYqimIgoiIgOE1Fr\nInr3j+tziejG6i+en/zkJz/56XKSEETNmhENGkT08cdExcV/d4l8kBCi0oOIRhFRIREdI6K3iSiC\niHapricQ0WYfz95ORN8T0feJiYlCj5KSkgQR+Q//4T+IBJFVEAX+A8pR2WH8C+UMEESLBdEIQRRV\nzeWpKYhGCqKugqiBIAr/B7RRVY/agqjVZXhvHUFUrwr3JQqicYJogyAqFEQ/CaKYS/x2iCCaIIie\nFEQkkpKSdPk/EX1fJRyqAlA5iWglEUUSkZmIFhNRX6oiWKmPhg0b+iqs7nk/+el/kd58803Rv39/\nr/MFBUJMmSJEdLQQs2df+XIJIcTrrwuxbp3+NbdbiFWr5O+1a4UICxNi7Fghjhyp+L3FxUI884wQ\nCQlCLF4sRL9+QjgcQjRqJMS4cXh3ZbR5sxAXLvi+/vPPQgwfLkTnzkKkpAhhNAoRGup939atQrz0\nkvf51auF6Nu38nJUB5WXo53HjhUiI0OIxES0z8VSQYH+uSlThIiLE0JRhBgwQF47dkyIJ54Q4qOP\n8L0hQ4Ro2VKIqCghbDYhatdGm2VkCHHihHzuww+FeOSRivuprEyIhx4SYsQIIcLDgS58/Pvfvvl8\ndYLVzUT0uur3rUQ0k4iOE5Hpj3NNiWh5Ze/yg9XVS2+8IcQLL4AJz5olxMsvC7Fhw99dqr+HtGB1\n/LgQEyZggvfuLcTGjZfnu2VlQuzbV/E9n3wiRESEEJ995n3trbeEyM72PLd3rxBDhwrhdAoxerQQ\nhw7Ja6WlQuzZI8TkyUK4XEKkpgrRoYMQ8fEAqoAAIcxmMMeioorLdeQI7n3wQf3rbjeY/4gRQkRG\nopw2mxBLlnje98MPEAYWLJDnysvB3F0uIT7+uOJyXAqVlgqxcqUQI0eiDWrWRH2+/75iENi9GwDP\nZV23TogHHhCiVi20aVkZjuXLhWjXDu1kNgOEpk7FN3r0ECIrC+BltwvRvr0Qd90F0P78cyEOHMDc\njIgQYsYM7/J06YJnIyOFKCyU548eFWL+fCFuvhnCgRqgvI9y3fpVJ1g1JqItBF+VQvBPjSSihUTU\n6497XiGiYZW9yw9WVy81bIjBnp0N6fWOOyDdXY3EYHXwIBi80ynE4MFC7Nz519+5ebMEudOnhTh1\nyvuegwchQRcX4/eqVULcc4/3fV9+Cab03nvy3K+/gpH9+KM853YDaL/7ToiZM4Vo1kwIqxWMOCEB\nDDMgQIjAQCHathXittuEiIkB1+ndG+Dw0EOyPL7I7Rbi+uuFMJmEaN3a89qePUI89pgQNWoIkZ4u\nxKOPQguIihJi2TLPe1evRr0WLZLnjh+HJta0qRD791dcjr9CFy5gnN92G9qvYUOA99atVXt+0SKU\nefRoIe68E+2XlQWwWrNGiE8/FeKmm4QICkL7mM0A6cBAIRo0QDtPnCjEvHlC5OVBy9IDxmXLcP/m\nzfrliItDvzmdQrz7rhDjxwuRk4P+DgqqGKTq1r1CYIV30aNEtJ2INhPRW0RkJaJUIvqOiHb9AVzW\nyt7jB6urlwoKYJpp2xYDftQoIX777e8u1d9DU6cuFDVqfP5nOxw4cGnvc7uFyM0FuKxaBcbyn//o\n39uyJe4rKoKp58MP9e9jDWTWLCG2bROiTh0hunUTYswYIbp3F6J+fUjodjuYXI8eQtx3nxBPPglg\nsVgAVOPHC7FjB0AmOBjSd0AAxoIvc6OWZs0Sol49PG+3AzhnzRLimmsAAMOHC/Htt2iHc+dQbjXQ\nCgFmHBkJ5s60Zg3Mb2PGCFFSUrWyMLnd0NpKS72vnT0Lc2peHhh5ixZCPPccyl1VOnJEiI4d8Xxw\nMOrfq5cQw4YJ0bWrNPEpihAGA4D/jjtQz/37oWllZqKdGzaE1h4a+tcExIkTgRQBAQAniwXvIgI4\n2u2+gSokBFoXfus3clXBSsG9V4Zyc3OFXgYLRVHoSpbDT38PbdpE1Lo10cKFREuWEL35JlHPnkTj\nxhElJf3dpbt4On0akVMuV9Xu37qVaMoUovffv0ApKZ/R559fT5E6SWZKS4kOHyY6eJDIZCLKy6v4\nvR99RPTgg0Q9ehC98gratWNH/XvnzSN68UWi+Hj8XrTI+54dO4iGDiX68UeiU6eIAgOJAgKIbrqJ\nKDXV83A6iRQFz5WXE732GtEjjxB16EAUGko0Zw6RxUJ05gxRzZpERUVEv/5K9NxzeG9ZGepbWur5\nP/8+dQplbt6caM0afKekhCgmBnUIC8N3y8qILlxAmxER2e1E8+cTZWcTvfce0bBhRO+/j6g3IYie\nfZZo6lSU9/rric6eRRnj4irtRiovJxo+nGj9eqJ164g++YRo0iSi9u2J3nqL6MAB3Od2o++ysojS\n0ogeftj7XYcPE91+O9EHHxB99x2eX7YMbWQ2o91LSohq1CByOIhOniTavRv1LijA/LnnHqI6dbzf\nXVCAOo8dS3TsGN61di1R3bqV11FdvthY9LHJhH4hwu/gYKLISJTVFzVvjnYqKSEiKiAhHF73KIry\ngxAit7KyXNF0S366uik7m2jaNEzar74CSD37LFFODtGNNxI98ABRevrfXcrK6dQpAMPXXxM9/TTR\nqFEV3//DD0RPPIH777yTaPToT2nt2m30ySfX08GDRL/9Rh5/T5wAE4iPJ2rZsmKwEgJARUT0+ecQ\nBOx2ed3tJvryS4BG48ZEAwYQ3XEH0ebNACWmsjKA3ssvE/30E1GfPgDWgwcBUoMHE73wggQmLa1a\nhXZwOsFs69fHebudaPJk/L9pE1GtWvjWpElE110HBmg2y4N/W61EQUFgzunpRCtXoq6tWhEZDEQD\nB+K6+pmyMqK5c4l27kTdWrTAeUUh+vRTlOnUKbTB778DHJKSwMjr1iVKTEQ9DhwAOOjRoUNox+PH\nwfg7dCD64guUzWAAGLrdRIsXQ5ARgig3l6hXLzx/7hzK9ssvRNu2Eb3+OtrYaMTzdjvR+fNEnToR\njRyJ859+SvT22wBjRQFoDRsGkNMTdk6eRH+/8grGUlAQwG358osDqg0b0HcWC8DGakV7FhVBQDt1\nCkBlNALAtRQZSdSvH9E33/CZ40TkDVZVpqqoX9V1+M2AfhLC259y4oQQDz8MU0XfvlW351c3rV3r\nGSCgpZMnEcVmt8OERoRouMmTYQ46cgSO8LvvhuknLQ3mGqsVZpqQEPwfGVkgXK5tok8fIe6/H4En\n772H7x84oG9a8kXTp6McsbEwFyUkCNGpEwIlBg2C/6hOHSGefVZGd3XqhEAEtemrb1+8x2qF6Sgv\nD6a7kBAh8vNR/
"text/plain": [
"<Figure size 504x360 with 1 Axes>"
]
},
"metadata": {
"tags": []
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "UQsuobid9gd5",
"colab_type": "code",
"colab": {}
},
"source": [
"messi_df[['X','Y']] = pd.DataFrame(messi_df.location.values.tolist(), index= messi_df.index)\n",
"messi_df[['endX','endY']] = pd.DataFrame(messi_df.pass_end_location.values.tolist(), index= messi_df.index)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "OxiCT0hn-W4b",
"colab_type": "code",
"outputId": "376ac331-3689-437c-af90-d553c4086510",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
}
},
"source": [
"messi_df"
],
"execution_count": 10,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>location</th>\n",
" <th>pass_end_location</th>\n",
" <th>pass_height_name</th>\n",
" <th>pass_type_name</th>\n",
" <th>pass_body_part_name</th>\n",
" <th>X</th>\n",
" <th>Y</th>\n",
" <th>endX</th>\n",
" <th>endY</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>[29.5, 55.8]</td>\n",
" <td>[33.1, 48.1]</td>\n",
" <td>Low Pass</td>\n",
" <td>NaN</td>\n",
" <td>Left Foot</td>\n",
" <td>29.5</td>\n",
" <td>55.8</td>\n",
" <td>33.1</td>\n",
" <td>48.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99</th>\n",
" <td>[64.7, 72.8]</td>\n",
" <td>[57.2, 74.5]</td>\n",
" <td>Ground Pass</td>\n",
" <td>NaN</td>\n",
" <td>Left Foot</td>\n",
" <td>64.7</td>\n",
" <td>72.8</td>\n",
" <td>57.2</td>\n",
" <td>74.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>111</th>\n",
" <td>[64.4, 73.7]</td>\n",
" <td>[54.1, 42.1]</td>\n",
" <td>Ground Pass</td>\n",
" <td>NaN</td>\n",
" <td>Left Foot</td>\n",
" <td>64.4</td>\n",
" <td>73.7</td>\n",
" <td>54.1</td>\n",
" <td>42.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>140</th>\n",
" <td>[80.6, 33.0]</td>\n",
" <td>[73.3, 43.8]</td>\n",
" <td>Ground Pass</td>\n",
" <td>NaN</td>\n",
" <td>Right Foot</td>\n",
" <td>80.6</td>\n",
" <td>33.0</td>\n",
" <td>73.3</td>\n",
" <td>43.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>155</th>\n",
" <td>[97.9, 46.4]</td>\n",
" <td>[115.5, 53.8]</td>\n",
" <td>Ground Pass</td>\n",
" <td>NaN</td>\n",
" <td>Left Foot</td>\n",
" <td>97.9</td>\n",
" <td>46.4</td>\n",
" <td>115.5</td>\n",
" <td>53.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>127962</th>\n",
" <td>[69.9, 56.5]</td>\n",
" <td>[75.1, 73.7]</td>\n",
" <td>Ground Pass</td>\n",
" <td>NaN</td>\n",
" <td>Left Foot</td>\n",
" <td>69.9</td>\n",
" <td>56.5</td>\n",
" <td>75.1</td>\n",
" <td>73.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>127969</th>\n",
" <td>[85.4, 60.7]</td>\n",
" <td>[90.3, 62.4]</td>\n",
" <td>Ground Pass</td>\n",
" <td>NaN</td>\n",
" <td>Right Foot</td>\n",
" <td>85.4</td>\n",
" <td>60.7</td>\n",
" <td>90.3</td>\n",
" <td>62.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>128096</th>\n",
" <td>[75.3, 47.8]</td>\n",
" <td>[93.0, 45.7]</td>\n",
" <td>Ground Pass</td>\n",
" <td>NaN</td>\n",
" <td>Right Foot</td>\n",
" <td>75.3</td>\n",
" <td>47.8</td>\n",
" <td>93.0</td>\n",
" <td>45.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>128221</th>\n",
" <td>[87.9, 26.6]</td>\n",
" <td>[81.4, 37.8]</td>\n",
" <td>Ground Pass</td>\n",
" <td>NaN</td>\n",
" <td>Left Foot</td>\n",
" <td>87.9</td>\n",
" <td>26.6</td>\n",
" <td>81.4</td>\n",
" <td>37.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>128301</th>\n",
" <td>[83.7, 43.2]</td>\n",
" <td>[91.3, 64.7]</td>\n",
" <td>Ground Pass</td>\n",
" <td>NaN</td>\n",
" <td>Left Foot</td>\n",
" <td>83.7</td>\n",
" <td>43.2</td>\n",
" <td>91.3</td>\n",
" <td>64.7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1642 rows × 9 columns</p>\n",
"</div>"
],
"text/plain": [
" location pass_end_location pass_height_name ... Y endX endY\n",
"29 [29.5, 55.8] [33.1, 48.1] Low Pass ... 55.8 33.1 48.1\n",
"99 [64.7, 72.8] [57.2, 74.5] Ground Pass ... 72.8 57.2 74.5\n",
"111 [64.4, 73.7] [54.1, 42.1] Ground Pass ... 73.7 54.1 42.1\n",
"140 [80.6, 33.0] [73.3, 43.8] Ground Pass ... 33.0 73.3 43.8\n",
"155 [97.9, 46.4] [115.5, 53.8] Ground Pass ... 46.4 115.5 53.8\n",
"... ... ... ... ... ... ... ...\n",
"127962 [69.9, 56.5] [75.1, 73.7] Ground Pass ... 56.5 75.1 73.7\n",
"127969 [85.4, 60.7] [90.3, 62.4] Ground Pass ... 60.7 90.3 62.4\n",
"128096 [75.3, 47.8] [93.0, 45.7] Ground Pass ... 47.8 93.0 45.7\n",
"128221 [87.9, 26.6] [81.4, 37.8] Ground Pass ... 26.6 81.4 37.8\n",
"128301 [83.7, 43.2] [91.3, 64.7] Ground Pass ... 43.2 91.3 64.7\n",
"\n",
"[1642 rows x 9 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 10
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "hZw7T6aT85n0",
"colab_type": "code",
"colab": {}
},
"source": [
"from sklearn.cluster import KMeans\n",
"import numpy as np"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "YS7-VHD488l2",
"colab_type": "code",
"colab": {}
},
"source": [
"np.random.seed(42)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "hYO1QlOx8-4G",
"colab_type": "code",
"colab": {}
},
"source": [
"# Create training data for clustering\n",
"training_data = []\n",
"training_data = messi_df[['X','Y','endX','endY']].to_numpy()"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "4D0bvtBY_QIs",
"colab_type": "code",
"colab": {}
},
"source": [
"cluster_model = KMeans(n_clusters=25).fit(training_data)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "TFlNwR-N_mXB",
"colab_type": "code",
"colab": {}
},
"source": [
"cluster_df = pd.DataFrame(data=cluster_model.cluster_centers_,columns= ['X','Y','endX','endY'])"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "S7eLnuOlB2lF",
"colab_type": "code",
"outputId": "a493fbfa-2e9f-41e0-dfd7-07d193deaa49",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
}
},
"source": [
"cluster_df.head()"
],
"execution_count": 16,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>X</th>\n",
" <th>Y</th>\n",
" <th>endX</th>\n",
" <th>endY</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>69.526437</td>\n",
" <td>73.794253</td>\n",
" <td>61.916092</td>\n",
" <td>70.040230</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>91.892045</td>\n",
" <td>33.001136</td>\n",
" <td>104.770455</td>\n",
" <td>24.712500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>56.681333</td>\n",
" <td>40.832000</td>\n",
" <td>50.676000</td>\n",
" <td>40.288000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>86.620000</td>\n",
" <td>48.557778</td>\n",
" <td>96.874444</td>\n",
" <td>34.186667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>78.813235</td>\n",
" <td>36.172059</td>\n",
" <td>83.302941</td>\n",
" <td>49.801471</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" X Y endX endY\n",
"0 69.526437 73.794253 61.916092 70.040230\n",
"1 91.892045 33.001136 104.770455 24.712500\n",
"2 56.681333 40.832000 50.676000 40.288000\n",
"3 86.620000 48.557778 96.874444 34.186667\n",
"4 78.813235 36.172059 83.302941 49.801471"
]
},
"metadata": {
"tags": []
},
"execution_count": 16
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "S15d3IpX_uGW",
"colab_type": "code",
"outputId": "2d0127d8-373c-460b-e35b-3237c51c7f46",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 609
}
},
"source": [
"import matplotlib.pyplot as plt\n",
"from matplotlib.patches import Arc, Rectangle, ConnectionPatch\n",
"from matplotlib.offsetbox import OffsetImage\n",
"plt.style.use('classic')\n",
"fig=plt.figure() #set up the figures\n",
"fig.set_size_inches(16, 9)\n",
"ax=fig.add_subplot(1,1,1)\n",
"draw_pitch(ax) #overlay our different objects on the pitch\n",
"plt.ylim(-2, 82)\n",
"plt.xlim(-2, 122)\n",
"plt.axis('off')\n",
"y_cary_end = 0\n",
"y_loc = 0\n",
"ax.set_facecolor(\"grey\")\n",
"for i in range(len(cluster_df)):\n",
" # annotate draw an arrow from a current position to pass_end_location\n",
" ax.annotate(\"\", xy = (cluster_df.iloc[i]['endX'], cluster_df.iloc[i]['endY']), xycoords = 'data',\n",
" xytext = (cluster_df.iloc[i].X, cluster_df.iloc[i].Y), textcoords = 'data',\n",
" arrowprops=dict(arrowstyle=\"->\",connectionstyle=\"arc3\", color = \"seagreen\"),)\n",
"\"\"\"\n",
"for i in range(len(assist)):\n",
" # annotate draw an arrow from a current position to pass_end_location\n",
" ax.annotate(\"\", xy = (assist.iloc[i]['pass_end_location'][0], assist.iloc[i]['pass_end_location'][1]), xycoords = 'data',\n",
" xytext = (assist.iloc[i]['location'][0], assist.iloc[i]['location'][1]), textcoords = 'data',\n",
" arrowprops=dict(arrowstyle=\"fancy\",connectionstyle=\"arc3\", color = \"red\"),)\n",
"\"\"\"\n",
"\n",
"plt.show()"
],
"execution_count": 36,
"outputs": [
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA/AAAAJQCAYAAAApNI9UAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAMTQAADE0B0s6tTgAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nOzdeZiU9Z3v/U9V9Va97/ROb9B0AyIo\ni7iBgIBGjRvGTOIWNTNJjs7EOJmTc2Uyk1xnTsbkeR4nmWSOozG4RKNmcY2oCAiyg4qATUNvdDfd\n0Pte61338wdIREG6sbvuqq7367q8rkRt6hOD3fWp7+/+/mzr1683BQAAAAAAQprd6gAAAAAAAODs\nKPAAAAAAAIQBCjwAAAAAAGGAAg8AAAAAQBigwAMAAAAAEAYo8AAAAAAAhAEKPAAAAAAAYYACDwAA\nAABAGKDAAwAAAAAQBijwAAAAAACEAQo8AAAAAABhgAIPAAAAAEAYoMADAAAAABAGKPAAAAAAAIQB\nCjwAAAAAAGGAAg8AAAAAQBigwAMAAAAAEAYo8AAAAAAAhAEKPAAAAAAAYYACDwAAAABAGIga7xfw\ner3y+Xzj/TIAAOCE9evXS5IWL15scRIAACJLdHS0YmJixu3Xt61fv94cr1/c6/Xq1ltvVXd393i9\nBAAAAAAAISE9PV3PPvvsuJX4cZ3A+3w+dXd36/nnn1d8fPx4vhQAADjh0UcflSTdc889FicBACBy\nDA8Pa9WqVfL5fOFZ4D8WHx+vhISEYLwUAAARLzo6WpL42QsAwATDEjsAAAAAAMIABR4AAAAAgDBA\ngQcAAAAAIAxQ4AEAAAAACAMUeAAAAAAAwgAFHgAAAACAMECBBwAAAAAgDFDgAQAAAAAIAxR4AAAA\nAADCAAUeAAAAAIAwQIEHAAAAACAMUOABAAAAAAgDFHgAAAAAAMIABR4AAAAAgDBAgQcAAAAAIAxQ\n4AEAAAAACAMUeAAAAAAAwgAFHgAAAACAMECBBwAAAAAgDFDgAQAAAABhyzRNtQx36rUjO3TU1WN1\nnHEVZXUAAAAAAABGq8vTr93dtdrddUjd3gHNTC1RnCPa6ljjigIPAAAAAAgLgz6XPuip167uQ2oa\naldFcoGW5c7RzNRixU7w8i5R4AEAAAAAIcxr+LS3t1G7ug+ppr9FRfFZuiBjiu4uW67EaKfV8YKK\nAg8AAAAACCmGGdDB/hbt6jqkD3sblBaTpAvTy3VT0SXKiE22Op5lKPAAAAAAAMuZpqnDQ+3a3X1I\n73XXyWGzaU76FN0/7cvKd2bIZrNZHdFyFHgAAAAAgGXa3b3a1XVIu7sPadDv1qy0Ut1eulTlSbmy\n27g47ZMo8AAAAACAoOrzDun9njrt6jqkVleXpqdM1jUFCzQ9pUjRdmrqmfBPBgAAAAAw7tyGV3t6\n6rWr65BqB9tUlpiji7OqNCutVPFRsVbHCwsUeAAAAADAuPAHDFX3NWlX9yHt7z2s7LhUXZgxRX9T\nslipMYlWxws7FHgAAAAAwJgJmKbqB9u0u+uQPuipV5wjRhekl+t7VTcqx5ludbywRoEHAAAAAHxh\nrcNd2tV9SO9118ob8Ov8tFLdU75CJYk5bJAfIxR4AAAAAMA56fYMaHd3rXZ3H1Knp18zU4t1U9Gl\nqkwukMPusDrehEOBBwAAAACM2JDfrQ966rW765Aaho6pIilfS3LO13mpJYp1RFsdb0KjwAMAAAAA\nPpc34Nf+3kbt6q5VdV+TCuIzdWH6FN1RtkzJ0fFWx4sYFHgAAAAAwGcEzIAO9h/R7u5D2tPToJTo\neF2QMUVfLrhIWXEpVseLSBR4AAAAAIggpmlqc8dHmpKcr0lxqZ/5a83DHdrVdUjv99RJkmanlek7\nFdeoMD6LZXQWo8ADAAAAQAR5s+09bWrfp6qUopN/rsPdp93dh7S765D6fMOalVaqr5VcoSlJebLb\n7BamxSdR4AEAAAAgQqw7ukfvtO/VdyquVZTdoXeO7dXu7kNqGe5UVUqRrsqfp+mpkxVjpyqGIv5f\nAQAAAIAIsKl9n95o263Fk2bppeatOjhwRCUJk7Qgc5pmpZUqISrO6og4Cwo8AACISC3DnZKkgvhM\ni5MAwPh77cgOvdn2nhw2u97vrtXcjKm6ZfJlSo9NsjoaRoECDwAAIoY/YGhPT702te/TEVeXriu4\niAIPICLU9Lcozh4tT8CndnevNnfsV3Vfk7LjUvXlwoXc3x4mKPAAAGDC6/MOaUvHR9rSWa0Ye5Qu\nzZ6hezOuUnxUrNXRACAovlt5gyTJCBjq9Q2pxzuobs+AhvxuOVhSFzYo8AAAYEIyTVMNg0e1sX2f\nPuxtUEVygb5avEgVyYWycw0SgAjlsDuUEZusjNhkidPzYYcCDwAAJhSv4dPu7lptat+nbu+A5mdO\n0/+cfouy4lKsjgYAwBdCgQcAABNCl6df77bv17bOA0qJSdBl2TN0QfoUnusEAEwYFHgAABC2Aqap\ng/0t2tS+T9X9zZqZWqxvlC9XWWKubByTBwBMMBR4AAAQdtyGVzs6a7SpfZ+GDa8WZlXq5smXKjUm\n0epoAACMGwo8AAAIG0dd3drUvl87uw4qx5mm5XkX6Py0MkXZHVZHAxBEbsOr147sUEVygWakFlsd\nBwgaCjwAAAhphhnQ/t7D2ti+Tw2DRzUnvUzfrrhGkxOyrY4GwAIH+pr17OF3lB2boqU5s62OAwQV\nBR4AAISkQZ9LWzsPaHPHfpmSLsmq0h2lS5UY7bQ6GgALuA2vXmzeqve6a3Vd4UVamFnJrgtEHAo8\nAAAIKc1DHdrYvk/vd9eqODFH1xderBmpk+Ww2a2OFnYaB4/p6YZ1urHoYlWmFFkdBzhnB/qa9fvD\n7ygrNkX/NH2V0mO5wByRiQIPAAAs5w8Y+qCnTpva96vV1aW5GVP1QNWNynWmWx0tLLUMd+r1IztV\n3d8swwzw4QfCltvw6qXmrdrdXatrCxbo4qwqpu6IaBR4AABgmV7voLZ0VGtLx0eKc8Tokuzp+tuM\nq+SMirU6Wlg6MtylNa27jl+plzJZNh0vOsUJkyxOBoxeTX+Lnm3coMzYFH1/+s3KiE22OhIs0uMd\n1Oq6t/QPlddbHcVyFHgAABBUpmmqfvCoNrXv04e9DZqWXKi/KblCFckFsjNZO2dGwNAjh/6iWWkl\neqDyBj1y6C+amzFVhwaOKMYRbXU8YMTchlcvt2zTrq5DurZggRZmVfG9IcJ1ewbU6xuyOkZIoMAD\nAICg8Bg+7e4+pE3t+9XjHdCCzEr9YMZXlMlUbUw47A79eNbXZZqmflP3hvKcGcqKTdGQ3211tJPa\n3b060N8ij+GTx/DKE/BpyO/RkeFOLcudrQszplodERar6W/R7xs3KCM2mak7TnIbXjkdMVbHCAkU\neAAAMK46Pf16t32/tnVWKy0mSZdlz9AF6eVMhcfJpo79ahrq0D9W3aQXW7aG1B6B1uEu1fS3KNYe\npVhHjGLt0Woaalenp08p0QlWx4OFPjl1v6Zgvi7Oms7UHSe5Da/i7PzMkCjwAABgHARMUzX9LdrY\nvlc1/S06L7VE95SvVGliDguoxlHzUIdeadmmv51ytRKjnerxDGhWaonVsU46P71M56eXnfzvXsOn\n/b2N+lL+fE1JzrcwGax0sP+Inm3coPTYJKbuOC13wKc4JvCSKPAAAGAMufwebe+q0bvt++UyvLo4\nq0q3TL5MqTGJVkeb8NyGV6vr39LSnNkqS8qVJN1ZdqUSouIsTnZmrx7ZofioOF2RM8vqKLCAx/Dp\n5ZZt2tl1kKk7Ppfb72G56QkUeAAAMCK+gF/1g0dVkVzwmb/W5urWpvZ92tl1UHnODK3Iu1Dnp5Uq\nyu6wIGnkMU1Tzx/eqPSYJC3LnX3yzydGOy1M9flqB1q1tbNaD1bdJDvX3EWcQyem7mkxifrH6Tez\nCwOfy8UR+pMo8
"text/plain": [
"<Figure size 1280x720 with 1 Axes>"
]
},
"metadata": {
"tags": []
}
}
]
}
]
}