sportowe_wizualizacja/Dribbles by player and season.ipynb

246 lines
78 KiB
Plaintext
Raw Normal View History

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#change this cell to change season and player name\n",
2020-05-18 09:45:27 +02:00
"season = '2009/2010'\n",
"ssn = '0506'\n",
"length = int(20)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Done\n"
]
}
],
"source": [
"import os\n",
"import json\n",
"import codecs\n",
"import pandas as pd\n",
"import codecs\n",
2020-05-18 09:45:27 +02:00
"import StatsbombPitch as sb\n",
"main_df = pd.DataFrame(data=None)\n",
2020-05-18 09:45:27 +02:00
"path_match = \"/home/kirugulige/Documents/Football-Analytics/open-data-master/data/events/\" #location for play by play events\n",
"for root, dirs, files in os.walk('/home/kirugulige/Documents/Football-Analytics/open-data-master/data/matches'):\n",
" for file in files:\n",
" with open(os.path.join(root, file), \"r\") as auto:\n",
2020-05-18 09:45:27 +02:00
" with codecs.open(root + str('/') + file,encoding='utf-8') as data_file:\n",
" data = json.load(data_file)\n",
" df = pd.DataFrame(data=None)\n",
2020-05-18 09:45:27 +02:00
" df = pd.json_normalize(data, sep = \"_\")\n",
" #for x in df.competition_country_name:\n",
" # if x == 'Spain':\n",
" # print(df.match_id)\n",
" for i in range(len(df)):\n",
" if df.iloc[i]['competition_country_name'] == 'Spain' and df.iloc[i]['season_season_name'] == season :\n",
" match_no = df.iloc[i]['match_id'] #gets match with Spain as country\n",
" match_no = str(match_no) # from int to str \n",
" with codecs.open(path_match + match_no + str(r'.json'),encoding=\"utf8\") as event_file: #open the respective file\n",
" df_match = json.load(event_file)\n",
" df_match2 = pd.DataFrame(data=None)\n",
2020-05-18 09:45:27 +02:00
" df_match2 = pd.json_normalize(df_match,sep=\"_\") \n",
" \n",
" main_df = main_df.append(df_match2,ignore_index=True,sort=False) \n",
"print('Done')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
2020-05-18 09:45:27 +02:00
"#change player_id to get different player 5503 = Messi\n",
"Player = main_df.query('player_id == 5503 & type_id ==43 & play_pattern_id ==1 & duration >= 1.50')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"player_name = Player.player_name.iloc[0]\n",
"#df[['a','b']]\n",
"Player = Player [['location','carry_end_location']]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"from math import sqrt\n",
"distance= []\n",
"for i in range(len(Player)):\n",
" distance.append(sqrt((Player.iloc[i]['carry_end_location'][0] - Player.iloc[i]['location'][0])**2 + ((Player.iloc[i]['carry_end_location'][1] - Player.iloc[i]['location'][1])**2)))\n",
2020-05-18 09:45:27 +02:00
" #using distance formula above (sqrt((x2-x1)^2 + (y2-y1)^2))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"Player['dribble_distance'] = distance"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>location</th>\n",
" <th>carry_end_location</th>\n",
" <th>dribble_distance</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2020-05-18 09:45:27 +02:00
" <th>109</th>\n",
" <td>[64.4, 78.2]</td>\n",
" <td>[64.4, 73.7]</td>\n",
" <td>4.500000</td>\n",
" </tr>\n",
" <tr>\n",
2020-05-18 09:45:27 +02:00
" <th>800</th>\n",
" <td>[53.3, 25.5]</td>\n",
" <td>[54.4, 27.9]</td>\n",
" <td>2.640076</td>\n",
" </tr>\n",
" <tr>\n",
2020-05-18 09:45:27 +02:00
" <th>1000</th>\n",
" <td>[75.5, 58.8]</td>\n",
" <td>[71.0, 69.7]</td>\n",
" <td>11.792370</td>\n",
" </tr>\n",
" <tr>\n",
2020-05-18 09:45:27 +02:00
" <th>1015</th>\n",
" <td>[94.6, 65.1]</td>\n",
" <td>[89.9, 58.0]</td>\n",
" <td>8.514693</td>\n",
" </tr>\n",
" <tr>\n",
2020-05-18 09:45:27 +02:00
" <th>1417</th>\n",
" <td>[67.0, 37.0]</td>\n",
" <td>[85.8, 21.6]</td>\n",
" <td>24.302263</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" location carry_end_location dribble_distance\n",
2020-05-18 09:45:27 +02:00
"109 [64.4, 78.2] [64.4, 73.7] 4.500000\n",
"800 [53.3, 25.5] [54.4, 27.9] 2.640076\n",
"1000 [75.5, 58.8] [71.0, 69.7] 11.792370\n",
"1015 [94.6, 65.1] [89.9, 58.0] 8.514693\n",
"1417 [67.0, 37.0] [85.8, 21.6] 24.302263"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Player.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
2020-05-18 09:45:27 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmsAAAGhCAYAAAA+1/OrAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzs3Xd4VFX+x/H3mT6TMqmUECRUIVSVInUBC7oWdm0Loq4VdfXn6lpXXV3brrquveLaG1bsYgNUOoh0kd4F0ttMMu38/riTGEICAZLcSfJ9PU8ekrnte2cmzCfn3HuO0lojhBBCCCFik8XsAoQQQgghRN0krAkhhBBCxDAJa0IIIYQQMUzCmhBCCCFEDJOwJoQQQggRwySsCSGEEELEMAlrQgghhBAxTMKaaJaUUiOVUr80wXFeVkrd29jHiR5rtFJqez3XdSul5iilTmnsuhqSUmqVUmq02XVUUko9q5T6x0Fu84VS6s+NsO4BX//qz59S6p9Kqdf3s+5mpdTxh3osIUTskLAmYlpdHzha6x+01keaUVNN0Q8+rZS6uQkP+xzwsNb6s4PdsFq902o83j/6+KyGKrImrXVvrXWd+1dKDVZKfa6UKlRK5SulFiqlLmrEeq7QWt8TPfY+Aaa2QKS1Pllr/Uo991/vdeu5v/0+f2ZSSh2rlPo6+rrlKKXeVUq1r7ZcKaUeUErlRb8eUEqpassHKKV+VEr5ov8OqLYsSSn1ilJqT/Trn7Ucf6hSaq5Sqo1S6i2l1E6lVFH0j5ohNdY9Vym1RSlVppT6UCmVUm3Z1UqpxUqpCqXUy7Uc5zil1JponTOVUp0O/9kTYv8krAlx+P4M5AMXNNYBlFK26j9rrS/QWr9/GLvMAYYqpVKrPfZnYO1h7POwKKWGAjOA74BuQCpwJXCyWTXFipqvf4xKBqYAWUAnoAR4qdryycAfgP5AP+A04HIApZQD+Ah4PbqfV4CPoo8DPAJ4ovseDJxfS4g/BfgciAcWAccAKdF9faaUio8eqzfGHzvnA20BH/B0tf3sBO4FXqx5gkqpNOAD4B/RfS8G3q7PkyPEYdFay5d8xewXsBk4vpbHRwPbq/3cC5gFFAKrgNOrLXsZeAr4DOMDZAHQtdrynsDXGIHrF+CcGtveu5/64qL7nAAEgIHVlmUBGiMEbQVygduqLXdH918ArAZurHFOm4GbgeVABWADMoD3McLWJuCaausPxvjwKAZ2Y7S81VbzaGA78CxwVfQxK7ADuAOYVc/n5vfRukui294QfTwN+DT6WuQDPwCW/b2e0WWzgaf281xfCMyu8ZgGulV7Pv8LbAGKovtzR5eNAOZGa9oGXFj99Y2+jn4gApRGv86NvqbB6M/LotvMAi6tXhPwUPR13AScXK2+qnVrOZ9Def2rnj/gn8B7GGGhBFgC9K+x/d+j+y7ACE6uOn5/Dvt9Vcv5HQ2UVPt5LjC52s+XAPOj358YfQ+pasu3AidFv88FBlVbdivwQ43jLQGOrqOWYuCY6Pf/At6stqxr9HVOqLHNvcDLNR6bDMyt8fvvB3oe7v918iVf+/uSljXR7Cml7MAnwFdAG+D/gDeUUtW7SScAd2H81b4euC+6bRxGGHkzuu0E4GmlVHY9D38Gxgf5u8CXGMGsphHAkcBxwB1KqV7Rx+/E+KDoCoyrY9uJGC0GSRhB4hNgGdAhur9rlVLjous+BjymtU6M7vOdA9T+Kr+1Bo4DVmK0KgD1em5eAC7XWicAfTBaxQCuxwiD6RgtF7dihKo6KaU8wFCM8HGoHsJoTRmG0epxExCJdlN9ATwRrWkAsLT6hlrrMowWvJ1a6/jo15sYH+xvR3/uX8dxh2AE2TTgQeCF6t17+3FQr7/WOlTL8vEY770UjNfpw+jvQ6VJ0X13BXoAt9fcgVLKQsO+ryqNwvjDqVLv6DEqLYs+Vrlsuda6+vtkebXlAKrG932qnUN7jPfaT7Wc3wDAgfF7v08dWusNGGGtRz3Oqea2ZcCGGnUK0eAkrImW4FiMro/7tdYBrfUMjJadidXWmaa1Xhj9wHsD4wMb4FRgs9b6Ja11SGv9E0YLw9n1PPafMT7MwxgflhNqfFgC3KW19mutl2H8R1/5oX8OcJ/WOl9rvQ14vJb9P6613qa19gODgHSt9d3R89wIPI8RosBoAeqmlErTWpdqrefvr3Ct9VwgJRpqL8AIb9Ud6LkJAtlKqUStdYHWekm1x9sDnbTWQW1cX7jfsIYRoi3ArwdYr1bRwHEx8Fet9Q6tdVhrPVdrXYHRQvaN1vqtaD15Wuul+9/jQdmitX4++h54BePc29Zju4N9/Wvzo9b6Pa11EHgYcGH8PlR6Mrp9PsYfKBNr2UeDvq8AlFL9MFppb6z2cDxGi2elIiA+GmxrLqtcnhD9fjpwi1IqQSnVDeO19lRb9/fA9JrvM6VUIvAaxu9g5f4PdKz9OZxthThkEtZES5ABbNNaR6o9tgWjlaDSrmrf+zD+0wXj2poh0QvaC5VShRitEe0OdFClVEdgDEb4A+OaGxdGS0h1dR07A6NLrnrNNVVf3gnIqFHrrfwWDC7BaB1Yo5RapJQ69UDngPFBdnX0PKbVWHag5+ZMjA/JLUqp76LXnAH8B6MV4yul1Eal1C31qKMAo+Ww/YFWrEMaxnO/oZZlHet4vKFUvb5aa1/02/g61q3uYF//2lQtj77/t0f3W9v2W2osq9Sg76tomPoCIzj/UG1RKZBY7edEoDQasGouq1xeEv3+GozuxnUYv2dvRc+10u8xrlerXocbo8Vwvtb63/upo+ax9udwthXikElYEy3BTqBjtHWl0hEY18AcyDbgO611UrWveK31lfXY9nyM36FPlFK7gI0YgaFeQzVgtCJ1rFFzTdVbCrYBm2rUmqC1/j2A1nqd1noiRpflA8B70a7M/XkN+AvwebWgUf14dT43WutFWuvx0eN9SLR7TGtdorW+XmvdBTgd+JtS6rj9FRE99jyMAFiXMqq1piilqgfqXKAco5uupm11PL5PGfV8rKEc7Otfm6rto+//TKp1Zdey/+rLKjXY+yra5fwNcI/W+rUai1fxW6sy0e9XVVvWr0b3cb/K5dHWx0la63Za694Yv3cLo8e0A7/D6LKvrMOJ8Z7cTvQmhrrqUEp1AZzU7+aamtvGYby3VtW5hRANQMKaaA7sSilXta+ad8YtwGixukkpZVfGOFSnAVPrse9PgR5KqfOj29qVUoOqXVe2P3/GuA5uQLWvM4Hf17jLsi7vAH9XSiUrpTIxrrXbn4VAiVLqZmWMs2ZVSvVRSg0CUEqdp5RKj7awFEa3idS5N0BrvQnjg+62WhbX+dwopRxKqUlKKW+0C6648lhKqVOVUt2iH7xFQPhAdUTdBFyolLqx8vlTxnAila/jMqC3MoZ4cGFcYF95HhGMu/ceVkplRJ+bodEP7TeA45VS5yilbEqpVFVtWIhqdgOpSilvjceyavwh0FAO9vWvzTFKqTOivxPXYtyIUL2b8iqlVKYyhqa4jdrvXGyQ95VSqgPGdYtPaq2freU4r2IE9w5KqQyMaxtfji6bhfE+uUYp5VRKXR19fEZ0312jr5tVKXUyxoX+leMfjsC43q04uq4d49pHP/DnGi3uYLwfTlPGWI1xwN3AB1rrkuj2tuj7ywpYa/yfMw3oo5Q6M7rOHdFjr6nlfIVoMBLWRHPwOcZ/vJVf/6y+UGsdwAhnJ2O0sDwNXFCf/0Cj/0GfiHF9zk6MLq0HMP7SrpNS6liM7qOntNa7qn19jNEFWNu1QTXdhdE1tQnj5oiaLRE1aw1jXEc2ILpNLvA/oDJcnASsUkqVYlwUPmE/1zpV3+9srfU+LS71eG7OBzYrpYqBKzC6SAG6Y7SulGK0lj2ttZ5ZjzrmAmOjXxuVUvkYQ0F8Hl2+FuOD9RuM7rDZNXZxA7ACY9iG/GitFq31Voxusuujjy9l7xaeyuOvwehe2xjtDszAuHgfIE8ptaTmNofpoF7/OnwE/AmjG/l84IxoeK70ZnTfGzG6gvcZ4LkB31eXAl2AfyqlSiu
"text/plain": [
2020-05-18 09:45:27 +02:00
"<Figure size 748.8x489.6 with 1 Axes>"
]
},
2020-05-18 09:45:27 +02:00
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
2020-05-18 09:45:27 +02:00
"sb.sb_pitch(\"#195905\",\"#faf0e6\",\"horizontal\",\"full\")\n",
"plt.gca().invert_yaxis()\n",
"y_cary_end = 0\n",
"y_loc = 0\n",
"for i in range(len(Player)):\n",
" y_cary_end = -2*(Player.iloc[i]['carry_end_location'][1] - 40) + Player.iloc[i]['carry_end_location'][1] \n",
" y_loc = -2*(Player.iloc[i]['location'][1] - 40) + Player.iloc[i]['location'][1] \n",
" if Player.iloc[i]['carry_end_location'][0] >= 90 and Player.iloc[i]['dribble_distance'] >= length:\n",
2020-05-18 09:45:27 +02:00
" plt.annotate(\"\", xy = (Player.iloc[i]['carry_end_location'][0],Player.iloc[i]['carry_end_location'][1]), xycoords = 'data',\n",
" xytext = (Player.iloc[i]['location'][0],Player.iloc[i]['location'][1] ), textcoords = 'data',\n",
" arrowprops=dict(arrowstyle=\"->\",connectionstyle=\"arc3\", color = \"blue\"),)\n",
2020-05-18 09:45:27 +02:00
" plt.title(player_name +str(' dribbles ') + season)\n",
" \n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2020-05-18 09:45:27 +02:00
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}