{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "#change this cell to change season and player name\n", "season = '2009/2010'\n", "ssn = '0506'\n", "length = int(20)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Done\n" ] } ], "source": [ "import os\n", "import json\n", "import codecs\n", "import pandas as pd\n", "import codecs\n", "import StatsbombPitch as sb\n", "main_df = pd.DataFrame(data=None)\n", "path_match = \"/home/kirugulige/Documents/Football-Analytics/open-data-master/data/events/\" #location for play by play events\n", "for root, dirs, files in os.walk('/home/kirugulige/Documents/Football-Analytics/open-data-master/data/matches'):\n", " for file in files:\n", " with open(os.path.join(root, file), \"r\") as auto:\n", " with codecs.open(root + str('/') + file,encoding='utf-8') as data_file:\n", " data = json.load(data_file)\n", " df = pd.DataFrame(data=None)\n", " df = pd.json_normalize(data, sep = \"_\")\n", " #for x in df.competition_country_name:\n", " # if x == 'Spain':\n", " # print(df.match_id)\n", " for i in range(len(df)):\n", " if df.iloc[i]['competition_country_name'] == 'Spain' and df.iloc[i]['season_season_name'] == season :\n", " match_no = df.iloc[i]['match_id'] #gets match with Spain as country\n", " match_no = str(match_no) # from int to str \n", " with codecs.open(path_match + match_no + str(r'.json'),encoding=\"utf8\") as event_file: #open the respective file\n", " df_match = json.load(event_file)\n", " df_match2 = pd.DataFrame(data=None)\n", " df_match2 = pd.json_normalize(df_match,sep=\"_\") \n", " \n", " main_df = main_df.append(df_match2,ignore_index=True,sort=False) \n", "print('Done')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "#change player_id to get different player 5503 = Messi\n", "Player = main_df.query('player_id == 5503 & type_id ==43 & play_pattern_id ==1 & duration >= 1.50')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "player_name = Player.player_name.iloc[0]\n", "#df[['a','b']]\n", "Player = Player [['location','carry_end_location']]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "from math import sqrt\n", "distance= []\n", "for i in range(len(Player)):\n", " distance.append(sqrt((Player.iloc[i]['carry_end_location'][0] - Player.iloc[i]['location'][0])**2 + ((Player.iloc[i]['carry_end_location'][1] - Player.iloc[i]['location'][1])**2)))\n", " #using distance formula above (sqrt((x2-x1)^2 + (y2-y1)^2))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "Player['dribble_distance'] = distance" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | location | \n", "carry_end_location | \n", "dribble_distance | \n", "
---|---|---|---|
109 | \n", "[64.4, 78.2] | \n", "[64.4, 73.7] | \n", "4.500000 | \n", "
800 | \n", "[53.3, 25.5] | \n", "[54.4, 27.9] | \n", "2.640076 | \n", "
1000 | \n", "[75.5, 58.8] | \n", "[71.0, 69.7] | \n", "11.792370 | \n", "
1015 | \n", "[94.6, 65.1] | \n", "[89.9, 58.0] | \n", "8.514693 | \n", "
1417 | \n", "[67.0, 37.0] | \n", "[85.8, 21.6] | \n", "24.302263 | \n", "