{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "#change this cell to change season and player name\n", "season = ['2017/2018','2018/2019','2019/2020']\n", "ssn = '0506'\n", "length = int(20)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Done\n" ] } ], "source": [ "import os\n", "import json\n", "import StatsbombPitch as sb\n", "#from pandas.io.json import json_normalize\n", "import codecs\n", "import seaborn as sns\n", "import pandas as pd\n", "import codecs\n", "import StatsbombPitch as sb\n", "\n", "main_df = pd.DataFrame(data=None)\n", "path_match = \"/home/kirugulige/Documents/Football-Analytics/open-data-master/data/events/\" #location for play by play events\n", "for root, dirs, files in os.walk('/home/kirugulige/Documents/Football-Analytics/open-data-master/data/matches'):\n", " for file in files:\n", " with open(os.path.join(root, file), \"r\") as auto:\n", " with codecs.open(root + str('/') + file,encoding='utf-8') as data_file:\n", " data = json.load(data_file)\n", " df = pd.DataFrame(data=None)\n", " df = pd.json_normalize(data, sep = \"_\")\n", " #for x in df.competition_country_name:\n", " # if x == 'Spain':\n", " # print(df.match_id)\n", " for i in range(len(df)):\n", " if df.iloc[i]['competition_country_name'] == 'Spain' and df.iloc[i]['season_season_name'] in season :\n", " match_no = df.iloc[i]['match_id'] #gets match with Spain as country\n", " match_no = str(match_no) # from int to str \n", " #print('match list \\n',match_no)\n", " with codecs.open(path_match + match_no + str(r'.json'),encoding=\"utf8\") as event_file: #open the respective file\n", " df_match = json.load(event_file)\n", " df_match2 = pd.DataFrame(data=None)\n", " df_match2 = pd.json_normalize(df_match,sep=\"_\") \n", " df_match2 = df_match2[(df_match2['type_name'] == \"Shot\") & (df_match2['team_name'] == 'Barcelona')]\n", " main_df = main_df.append(df_match2,ignore_index=True,sort=False) \n", "#print('total matches ',len(match_no)) \n", "print('Done')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "#main_df['player_name'].value_counts()\n", "df = main_df.loc[:,['location','shot_body_part_id','shot_end_location','shot_outcome_id','player_name','player_id']]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df['player_name'].value_counts()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df['shot_outcome_id'].value_counts()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | location | \n", "shot_body_part_id | \n", "shot_end_location | \n", "shot_outcome_id | \n", "player_name | \n", "player_id | \n", "
---|---|---|---|---|---|---|
3 | \n", "[99.1, 44.1] | \n", "38.0 | \n", "[120.0, 36.6, 2.2] | \n", "97.0 | \n", "Lionel Andrés Messi Cuccittini | \n", "5503.0 | \n", "
48 | \n", "[94.7, 51.6] | \n", "38.0 | \n", "[120.0, 42.4, 0.4] | \n", "97.0 | \n", "Lionel Andrés Messi Cuccittini | \n", "5503.0 | \n", "
49 | \n", "[106.7, 36.8] | \n", "38.0 | \n", "[120.0, 37.9, 0.1] | \n", "97.0 | \n", "Lionel Andrés Messi Cuccittini | \n", "5503.0 | \n", "
54 | \n", "[114.1, 41.1] | \n", "38.0 | \n", "[120.0, 39.0, 1.8] | \n", "97.0 | \n", "Lionel Andrés Messi Cuccittini | \n", "5503.0 | \n", "
62 | \n", "[109.3, 32.0] | \n", "38.0 | \n", "[120.0, 40.9, 0.2] | \n", "97.0 | \n", "Lionel Andrés Messi Cuccittini | \n", "5503.0 | \n", "