{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "#change this cell to change season and player name\n", "season = '2005/2006'\n", "ssn = '0506'\n", "length = int(20)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Done\n" ] } ], "source": [ "import os\n", "import json\n", "from pandas.io.json import json_normalize\n", "import codecs\n", "import pandas as pd\n", "import codecs\n", "main_df = pd.DataFrame(data=None)\n", "path_match = \"\"\"C:\\\\Users\\\\koushik.r\\\\Documents\\\\open-data-master\\\\\\\\data\\\\events\\\\\"\"\" #location for play by play events\n", "for root, dirs, files in os.walk(r'C:\\Users\\koushik.r\\Documents\\open-data-master\\data\\matches'):\n", " for file in files:\n", " with open(os.path.join(root, file), \"r\") as auto:\n", " with codecs.open(root + str('\\\\') + file,encoding='utf-8') as data_file:\n", " data = json.load(data_file)\n", " df = pd.DataFrame(data=None)\n", " df = json_normalize(data, sep = \"_\")\n", " #for x in df.competition_country_name:\n", " # if x == 'Spain':\n", " # print(df.match_id)\n", " for i in range(len(df)):\n", " if df.iloc[i]['competition_country_name'] == 'Spain' and df.iloc[i]['season_season_name'] == season :\n", " match_no = df.iloc[i]['match_id'] #gets match with Spain as country\n", " match_no = str(match_no) # from int to str \n", " with codecs.open(path_match + match_no + str(r'.json'),encoding=\"utf8\") as event_file: #open the respective file\n", " df_match = json.load(event_file)\n", " df_match2 = pd.DataFrame(data=None)\n", " df_match2 = json_normalize(df_match,sep=\"_\") \n", " \n", " main_df = main_df.append(df_match2,ignore_index=True,sort=False) \n", "print('Done')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "Player = main_df.query('player_id == 5503 & type_id ==43 & play_pattern_id ==1 & duration >= 1.50')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "player_name = Player.player_name.iloc[0]\n", "#df[['a','b']]\n", "Player = Player [['location','carry_end_location']]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "from math import sqrt\n", "distance= []\n", "for i in range(len(Player)):\n", " distance.append(sqrt((Player.iloc[i]['carry_end_location'][0] - Player.iloc[i]['location'][0])**2 + ((Player.iloc[i]['carry_end_location'][1] - Player.iloc[i]['location'][1])**2)))\n", " #using distance formula above (sqrt((x2-x1)^2 + (y2-y1)^2))\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "Player['dribble_distance'] = distance" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | location | \n", "carry_end_location | \n", "dribble_distance | \n", "
---|---|---|---|
2791 | \n", "[81.9, 77.4] | \n", "[85.6, 66.6] | \n", "11.416217 | \n", "
3160 | \n", "[78.7, 7.3] | \n", "[115.6, 22.8] | \n", "40.023243 | \n", "
3765 | \n", "[45.5, 4.5] | \n", "[48.2, 6.4] | \n", "3.301515 | \n", "
4811 | \n", "[43.1, 40.0] | \n", "[50.3, 28.4] | \n", "13.652839 | \n", "
5012 | \n", "[77.0, 65.6] | \n", "[97.8, 53.3] | \n", "24.164644 | \n", "