2019-07-26 17:58:36 +02:00
|
|
|
{
|
|
|
|
"cells": [
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 1,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"#change this cell to change season and player name\n",
|
2020-05-18 09:45:27 +02:00
|
|
|
"season = '2009/2010'\n",
|
2019-07-26 17:58:36 +02:00
|
|
|
"ssn = '0506'\n",
|
|
|
|
"length = int(20)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 2,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"Done\n"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"import os\n",
|
|
|
|
"import json\n",
|
|
|
|
"import codecs\n",
|
|
|
|
"import pandas as pd\n",
|
|
|
|
"import codecs\n",
|
2020-05-18 09:45:27 +02:00
|
|
|
"import StatsbombPitch as sb\n",
|
2019-07-26 17:58:36 +02:00
|
|
|
"main_df = pd.DataFrame(data=None)\n",
|
2020-05-18 09:45:27 +02:00
|
|
|
"path_match = \"/home/kirugulige/Documents/Football-Analytics/open-data-master/data/events/\" #location for play by play events\n",
|
|
|
|
"for root, dirs, files in os.walk('/home/kirugulige/Documents/Football-Analytics/open-data-master/data/matches'):\n",
|
2019-07-26 17:58:36 +02:00
|
|
|
" for file in files:\n",
|
|
|
|
" with open(os.path.join(root, file), \"r\") as auto:\n",
|
2020-05-18 09:45:27 +02:00
|
|
|
" with codecs.open(root + str('/') + file,encoding='utf-8') as data_file:\n",
|
2019-07-26 17:58:36 +02:00
|
|
|
" data = json.load(data_file)\n",
|
|
|
|
" df = pd.DataFrame(data=None)\n",
|
2020-05-18 09:45:27 +02:00
|
|
|
" df = pd.json_normalize(data, sep = \"_\")\n",
|
2019-07-26 17:58:36 +02:00
|
|
|
" #for x in df.competition_country_name:\n",
|
|
|
|
" # if x == 'Spain':\n",
|
|
|
|
" # print(df.match_id)\n",
|
|
|
|
" for i in range(len(df)):\n",
|
|
|
|
" if df.iloc[i]['competition_country_name'] == 'Spain' and df.iloc[i]['season_season_name'] == season :\n",
|
|
|
|
" match_no = df.iloc[i]['match_id'] #gets match with Spain as country\n",
|
|
|
|
" match_no = str(match_no) # from int to str \n",
|
|
|
|
" with codecs.open(path_match + match_no + str(r'.json'),encoding=\"utf8\") as event_file: #open the respective file\n",
|
|
|
|
" df_match = json.load(event_file)\n",
|
|
|
|
" df_match2 = pd.DataFrame(data=None)\n",
|
2020-05-18 09:45:27 +02:00
|
|
|
" df_match2 = pd.json_normalize(df_match,sep=\"_\") \n",
|
2019-07-26 17:58:36 +02:00
|
|
|
" \n",
|
|
|
|
" main_df = main_df.append(df_match2,ignore_index=True,sort=False) \n",
|
|
|
|
"print('Done')"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 3,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
2020-05-18 09:45:27 +02:00
|
|
|
"#change player_id to get different player 5503 = Messi\n",
|
2019-07-26 17:58:36 +02:00
|
|
|
"Player = main_df.query('player_id == 5503 & type_id ==43 & play_pattern_id ==1 & duration >= 1.50')"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 4,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"player_name = Player.player_name.iloc[0]\n",
|
|
|
|
"#df[['a','b']]\n",
|
|
|
|
"Player = Player [['location','carry_end_location']]"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 5,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"from math import sqrt\n",
|
|
|
|
"distance= []\n",
|
|
|
|
"for i in range(len(Player)):\n",
|
|
|
|
" distance.append(sqrt((Player.iloc[i]['carry_end_location'][0] - Player.iloc[i]['location'][0])**2 + ((Player.iloc[i]['carry_end_location'][1] - Player.iloc[i]['location'][1])**2)))\n",
|
2020-05-18 09:45:27 +02:00
|
|
|
" #using distance formula above (sqrt((x2-x1)^2 + (y2-y1)^2))"
|
2019-07-26 17:58:36 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 6,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"Player['dribble_distance'] = distance"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 7,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/html": [
|
|
|
|
"<div>\n",
|
|
|
|
"<style scoped>\n",
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
" text-align: right;\n",
|
|
|
|
" }\n",
|
|
|
|
"</style>\n",
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
" <thead>\n",
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
" <th></th>\n",
|
|
|
|
" <th>location</th>\n",
|
|
|
|
" <th>carry_end_location</th>\n",
|
|
|
|
" <th>dribble_distance</th>\n",
|
|
|
|
" </tr>\n",
|
|
|
|
" </thead>\n",
|
|
|
|
" <tbody>\n",
|
|
|
|
" <tr>\n",
|
2020-05-18 09:45:27 +02:00
|
|
|
" <th>109</th>\n",
|
|
|
|
" <td>[64.4, 78.2]</td>\n",
|
|
|
|
" <td>[64.4, 73.7]</td>\n",
|
|
|
|
" <td>4.500000</td>\n",
|
2019-07-26 17:58:36 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2020-05-18 09:45:27 +02:00
|
|
|
" <th>800</th>\n",
|
|
|
|
" <td>[53.3, 25.5]</td>\n",
|
|
|
|
" <td>[54.4, 27.9]</td>\n",
|
|
|
|
" <td>2.640076</td>\n",
|
2019-07-26 17:58:36 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2020-05-18 09:45:27 +02:00
|
|
|
" <th>1000</th>\n",
|
|
|
|
" <td>[75.5, 58.8]</td>\n",
|
|
|
|
" <td>[71.0, 69.7]</td>\n",
|
|
|
|
" <td>11.792370</td>\n",
|
2019-07-26 17:58:36 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2020-05-18 09:45:27 +02:00
|
|
|
" <th>1015</th>\n",
|
|
|
|
" <td>[94.6, 65.1]</td>\n",
|
|
|
|
" <td>[89.9, 58.0]</td>\n",
|
|
|
|
" <td>8.514693</td>\n",
|
2019-07-26 17:58:36 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
2020-05-18 09:45:27 +02:00
|
|
|
" <th>1417</th>\n",
|
|
|
|
" <td>[67.0, 37.0]</td>\n",
|
|
|
|
" <td>[85.8, 21.6]</td>\n",
|
|
|
|
" <td>24.302263</td>\n",
|
2019-07-26 17:58:36 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" </tbody>\n",
|
|
|
|
"</table>\n",
|
|
|
|
"</div>"
|
|
|
|
],
|
|
|
|
"text/plain": [
|
|
|
|
" location carry_end_location dribble_distance\n",
|
2020-05-18 09:45:27 +02:00
|
|
|
"109 [64.4, 78.2] [64.4, 73.7] 4.500000\n",
|
|
|
|
"800 [53.3, 25.5] [54.4, 27.9] 2.640076\n",
|
|
|
|
"1000 [75.5, 58.8] [71.0, 69.7] 11.792370\n",
|
|
|
|
"1015 [94.6, 65.1] [89.9, 58.0] 8.514693\n",
|
|
|
|
"1417 [67.0, 37.0] [85.8, 21.6] 24.302263"
|
2019-07-26 17:58:36 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
"execution_count": 7,
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"Player.head()"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 8,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
2020-05-18 09:45:27 +02:00
|
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmsAAAGhCAYAAAA+1/OrAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzs3Xd4VFX+x/H3mT6TMqmUECRUIVSVInUBC7oWdm0Loq4VdfXn6lpXXV3brrquveLaG1bsYgNUOoh0kd4F0ttMMu38/riTGEICAZLcSfJ9PU8ekrnte2cmzCfn3HuO0lojhBBCCCFik8XsAoQQQgghRN0krAkhhBBCxDAJa0IIIYQQMUzCmhBCCCFEDJOwJoQQQggRwySsCSGEEELEMAlrQgghhBAxTMKaaJaUUiOVUr80wXFeVkrd29jHiR5rtFJqez3XdSul5iilTmnsuhqSUmqVUmq02XVUUko9q5T6x0Fu84VS6s+NsO4BX//qz59S6p9Kqdf3s+5mpdTxh3osIUTskLAmYlpdHzha6x+01keaUVNN0Q8+rZS6uQkP+xzwsNb6s4PdsFq902o83j/6+KyGKrImrXVvrXWd+1dKDVZKfa6UKlRK5SulFiqlLmrEeq7QWt8TPfY+Aaa2QKS1Pllr/Uo991/vdeu5v/0+f2ZSSh2rlPo6+rrlKKXeVUq1r7ZcKaUeUErlRb8eUEqpassHKKV+VEr5ov8OqLYsSSn1ilJqT/Trn7Ucf6hSaq5Sqo1S6i2l1E6lVFH0j5ohNdY9Vym1RSlVppT6UCmVUm3Z1UqpxUqpCqXUy7Uc5zil1JponTOVUp0O/9kTYv8krAlx+P4M5AMXNNYBlFK26j9rrS/QWr9/GLvMAYYqpVKrPfZnYO1h7POwKKWGAjOA74BuQCpwJXCyWTXFipqvf4xKBqYAWUAnoAR4qdryycAfgP5AP+A04HIApZQD+Ah4PbqfV4CPoo8DPAJ4ovseDJxfS4g/BfgciAcWAccAKdF9faaUio8eqzfGHzvnA20BH/B0tf3sBO4FXqx5gkqpNOAD4B/RfS8G3q7PkyPEYdFay5d8xewXsBk4vpbHRwPbq/3cC5gFFAKrgNOrLXsZeAr4DOMDZAHQtdrynsDXGIHrF+CcGtveu5/64qL7nAAEgIHVlmUBGiMEbQVygduqLXdH918ArAZurHFOm4GbgeVABWADMoD3McLWJuCaausPxvjwKAZ2Y7S81VbzaGA78CxwVfQxK7ADuAOYVc/n5vfRukui294QfTwN+DT6WuQDPwCW/b2e0WWzgaf281xfCMyu8ZgGulV7Pv8LbAGKovtzR5eNAOZGa9oGXFj99Y2+jn4gApRGv86NvqbB6M/LotvMAi6tXhPwUPR13AScXK2+qnVrOZ9Def2rnj/gn8B7GGGhBFgC9K+x/d+j+y7ACE6uOn5/Dvt9Vcv5HQ2UVPt5LjC52s+XAPOj358YfQ+pasu3AidFv88FBlVbdivwQ43jLQGOrqOWYuCY6Pf/At6stqxr9HVOqLHNvcDLNR6bDMyt8fvvB3oe7v918iVf+/uSljXR7Cml7MAnwFdAG+D/gDeUUtW7SScAd2H81b4euC+6bRxGGHkzuu0E4GmlVHY9D38Gxgf5u8CXGMGsphHAkcBxwB1KqV7Rx+/E+KDoCoyrY9uJGC0GSRhB4hNgGdAhur9rlVLjous+BjymtU6M7vOdA9T+Kr+1Bo4DVmK0KgD1em5eAC7XWicAfTBaxQCuxwiD6RgtF7dihKo6KaU8wFCM8HGoHsJoTRmG0epxExCJdlN9ATwRrWkAsLT6hlrrMowWvJ1a6/jo15sYH+xvR3/uX8dxh2AE2TTgQeCF6t17+3FQr7/WOlTL8vEY770UjNfpw+jvQ6VJ0X13BXoAt9fcgVLKQsO+ryqNwvjDqVLv6DEqLYs+Vrlsuda6+vtkebXlAKrG932qnUN7jPfaT7Wc3wDAgfF7v08dWusNGGGtRz3Oqea2ZcCGGnUK0eAkrImW4FiMro/7tdYBrfUMjJadidXWmaa1Xhj9wHsD4wMb4FRgs9b6Ja11SGv9E0YLw9n1PPafMT7MwxgflhNqfFgC3KW19mutl2H8R1/5oX8OcJ/WOl9rvQ14vJb9P6613qa19gODgHSt9d3R89wIPI8RosBoAeqmlErTWpdqrefvr3Ct9VwgJRpqL8AIb9Ud6LkJAtlKqUStdYHWekm1x9sDnbTWQW1cX7jfsIYRoi3ArwdYr1bRwHEx8Fet9Q6tdVhrPVdrXYHRQvaN1vqtaD15Wuul+9/jQdmitX4++h54BePc29Zju4N9/Wvzo9b6Pa11EHgYcGH8PlR6Mrp9PsYfKBNr2UeDvq8AlFL9MFppb6z2cDxGi2elIiA+GmxrLqtcnhD9fjpwi1IqQSnVDeO19lRb9/fA9JrvM6VUIvAaxu9g5f4PdKz9OZxthThkEtZES5ABbNNaR6o9tgWjlaDSrmrf+zD+0wXj2poh0QvaC5VShRitEe0OdFClVEdgDEb4A+OaGxdGS0h1dR07A6NLrnrNNVVf3gnIqFHrrfwWDC7BaB1Yo5RapJQ69UDngPFBdnX0PKbVWHag5+ZMjA/JLUqp76LXnAH8B6MV4yul1Eal1C31qKMAo+Ww/YFWrEMaxnO/oZZlHet4vKFUvb5aa1/02/g61q3uYF//2lQtj77/t0f3W9v2W2osq9Sg76tomPoCIzj/UG1RKZBY7edEoDQasGouq1xeEv3+GozuxnUYv2dvRc+10u8xrlerXocbo8Vwvtb63/upo+ax9udwthXikElYEy3BTqBjtHWl0hEY18AcyDbgO611UrWveK31lfXY9nyM36FPlFK7gI0YgaFeQzVgtCJ1rFFzTdVbCrYBm2rUmqC1/j2A1nqd1noiRpflA8B70a7M/XkN+AvwebWgUf14dT43WutFWuvx0eN9SLR7TGtdorW+XmvdBTgd+JtS6rj9FRE99jyMAFiXMqq1piilqgfqXKAco5uupm11PL5PGfV8rKEc7Otfm6rto+//TKp1Zdey/+rLKjXY+yra5fwNcI/W+rUai1fxW6sy0e9XVVvWr0b3cb/K5dHWx0la63Za694Yv3cLo8e0A7/D6LKvrMOJ8Z7cTvQmhrrqUEp1AZzU7+aamtvGYby3VtW5hRANQMKaaA7sSilXta+ad8YtwGixukkpZVfGOFSnAVPrse9PgR5KqfOj29qVUoOqXVe2P3/GuA5uQLWvM4Hf17jLsi7vAH9XSiUrpTIxrrXbn4VAiVLqZmWMs2ZVSvVRSg0CUEqdp5RKj7awFEa3idS5N0BrvQnjg+62WhbX+dwopRxKqUlKKW+0C6648lhKqVOVUt2iH7xFQPhAdUTdBFyolLqx8vlTxnAila/jMqC3MoZ4cGFcYF95HhGMu/ceVkplRJ+bodEP7TeA45VS5yilbEqpVFVtWIhqdgOpSilvjceyavwh0FAO9vWvzTFKqTOivxPXYtyIUL2b8iqlVKYyhqa4jdrvXGyQ95VSqgPGdYtPaq2freU4r2IE9w5KqQyMaxtfji6bhfE+uUYp5VRKXR19fEZ0312jr5tVKXUyxoX+leMfjsC43q04uq4d49pHP/DnGi3uYLwfTlPGWI1xwN3AB1rrkuj2tuj7ywpYa/yfMw3oo5Q6M7rOHdFjr6nlfIVoMBLWRHPwOcZ/vJVf/6y+UGsdwAhnJ2O0sDwNXFCf/0Cj/0GfiHF9zk6MLq0HMP7SrpNS6liM7qOntNa7qn19jNEFWNu1QTXdhdE1tQnj5oiaLRE1aw1jXEc2ILpNLvA/oDJcnASsUkqVYlwUPmE/1zpV3+9srfU+LS71eG7OBzYrpYqBKzC6SAG6Y7SulGK0lj2ttZ5ZjzrmAmOjXxuVUvkYQ0F8Hl2+FuOD9RuM7rDZNXZxA7ACY9iG/GitFq31Voxusuujjy9l7xaeyuOvwehe2xjtDszAuHgfIE8ptaTmNofpoF7/OnwE/AmjG/l84IxoeK70ZnTfGzG6gvcZ4LkB31eXAl2AfyqlSiu
|
2019-07-26 17:58:36 +02:00
|
|
|
"text/plain": [
|
2020-05-18 09:45:27 +02:00
|
|
|
"<Figure size 748.8x489.6 with 1 Axes>"
|
2019-07-26 17:58:36 +02:00
|
|
|
]
|
|
|
|
},
|
2020-05-18 09:45:27 +02:00
|
|
|
"metadata": {
|
|
|
|
"needs_background": "light"
|
2019-07-26 17:58:36 +02:00
|
|
|
},
|
|
|
|
"output_type": "display_data"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"import matplotlib.pyplot as plt\n",
|
2020-05-18 09:45:27 +02:00
|
|
|
"sb.sb_pitch(\"#195905\",\"#faf0e6\",\"horizontal\",\"full\")\n",
|
|
|
|
"plt.gca().invert_yaxis()\n",
|
2019-07-26 17:58:36 +02:00
|
|
|
"y_cary_end = 0\n",
|
|
|
|
"y_loc = 0\n",
|
|
|
|
"for i in range(len(Player)):\n",
|
|
|
|
" y_cary_end = -2*(Player.iloc[i]['carry_end_location'][1] - 40) + Player.iloc[i]['carry_end_location'][1] \n",
|
|
|
|
" y_loc = -2*(Player.iloc[i]['location'][1] - 40) + Player.iloc[i]['location'][1] \n",
|
|
|
|
" if Player.iloc[i]['carry_end_location'][0] >= 90 and Player.iloc[i]['dribble_distance'] >= length:\n",
|
2020-05-18 09:45:27 +02:00
|
|
|
" plt.annotate(\"\", xy = (Player.iloc[i]['carry_end_location'][0],Player.iloc[i]['carry_end_location'][1]), xycoords = 'data',\n",
|
|
|
|
" xytext = (Player.iloc[i]['location'][0],Player.iloc[i]['location'][1] ), textcoords = 'data',\n",
|
2019-07-26 17:58:36 +02:00
|
|
|
" arrowprops=dict(arrowstyle=\"->\",connectionstyle=\"arc3\", color = \"blue\"),)\n",
|
2020-05-18 09:45:27 +02:00
|
|
|
" plt.title(player_name +str(' dribbles ') + season)\n",
|
|
|
|
" \n",
|
2019-07-26 17:58:36 +02:00
|
|
|
"plt.show()"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"kernelspec": {
|
|
|
|
"display_name": "Python 3",
|
|
|
|
"language": "python",
|
|
|
|
"name": "python3"
|
|
|
|
},
|
|
|
|
"language_info": {
|
|
|
|
"codemirror_mode": {
|
|
|
|
"name": "ipython",
|
|
|
|
"version": 3
|
|
|
|
},
|
|
|
|
"file_extension": ".py",
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
"name": "python",
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
"pygments_lexer": "ipython3",
|
2020-05-18 09:45:27 +02:00
|
|
|
"version": "3.6.9"
|
2019-07-26 17:58:36 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
"nbformat": 4,
|
|
|
|
"nbformat_minor": 2
|
|
|
|
}
|