{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "xGStatsbombBarca.ipynb", "provenance": [], "authorship_tag": "ABX9TyNQ/Xflwl0BRHHBlkVWMmvt", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "" ] }, { "cell_type": "code", "metadata": { "id": "g37QpaaPZHA5", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 187 }, "outputId": "c8ee2fc2-3a61-41a1-dc0a-c5b738da11d3" }, "source": [ "%%time\n", "!git clone https://github.com/statsbomb/open-data.git" ], "execution_count": 1, "outputs": [ { "output_type": "stream", "text": [ "Cloning into 'open-data'...\n", "remote: Enumerating objects: 1088, done.\u001b[K\n", "remote: Counting objects: 100% (1088/1088), done.\u001b[K\n", "remote: Compressing objects: 100% (591/591), done.\u001b[K\n", "remote: Total 9810 (delta 893), reused 674 (delta 479), pack-reused 8722\u001b[K\n", "Receiving objects: 100% (9810/9810), 995.57 MiB | 14.28 MiB/s, done.\n", "Resolving deltas: 100% (8640/8640), done.\n", "Checking out files: 100% (1648/1648), done.\n", "CPU times: user 548 ms, sys: 115 ms, total: 663 ms\n", "Wall time: 2min 44s\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "nd6vcG3uZNJb", "colab_type": "code", "colab": {} }, "source": [ "#import all modules\n", "import json\n", "import os\n", "import codecs\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "from matplotlib.patches import Arc, Rectangle, ConnectionPatch\n", "from matplotlib.offsetbox import OffsetImage\n", "from matplotlib.patches import Ellipse\n", "from functools import reduce\n", "import math" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "5NMxa9NNZR5m", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 68 }, "outputId": "133de3d7-2515-4ec6-f479-d4c69cee4f8c" }, "source": [ "%%time\n", "comp = ['FIFA World Cup','La Liga']\n", "main_df = pd.DataFrame(data=None)\n", "path_match = \"/content/open-data/data/events/\" #location for play by play events\n", "for root, dirs, files in os.walk('/content/open-data/data/matches/'):\n", " for file in files:\n", " with open(os.path.join(root, file), \"r\") as auto:\n", " with codecs.open(root + str('/') + file,encoding='utf-8') as data_file:\n", " data = json.load(data_file)\n", " df = pd.DataFrame(data=None)\n", " df = pd.json_normalize(data, sep = \"_\")\n", " #for x in df.competition_country_name:\n", " # if x == 'Spain':\n", " # print(df.match_id)\n", " #print(df['competition_competition_name'])\n", " for i in range(len(df)):\n", " if df.iloc[i]['competition_competition_name'] in comp :\n", " match_no = df.iloc[i]['match_id'] #gets match with Spain as country\n", " match_no = str(match_no) # from int to str \n", " #print('match list \\n',match_no)\n", " with codecs.open(path_match + match_no + str(r'.json'),encoding=\"utf8\") as event_file: #open the respective file\n", " df_match = json.load(event_file)\n", " df_match2 = pd.DataFrame(data=None)\n", " df_match2 = pd.json_normalize(df_match,sep=\"_\") \n", " df_match2 = df_match2[(df_match2['type_name'] == \"Shot\")]\n", " main_df = main_df.append(df_match2,ignore_index=True,sort=False) \n", "#print('total matches ',len(match_no)) \n", "print('Done')" ], "execution_count": 3, "outputs": [ { "output_type": "stream", "text": [ "Done\n", "CPU times: user 6min 3s, sys: 1.3 s, total: 6min 4s\n", "Wall time: 6min 4s\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "waQI6t6OVM33", "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", "height": 508 }, "outputId": "59fa3bca-64b8-4260-b33a-b34cbfdd9248" }, "source": [ "main_df.head()" ], "execution_count": 7, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", " | id | \n", "index | \n", "period | \n", "timestamp | \n", "minute | \n", "second | \n", "possession | \n", "duration | \n", "type_id | \n", "type_name | \n", "possession_team_id | \n", "possession_team_name | \n", "play_pattern_id | \n", "play_pattern_name | \n", "team_id | \n", "team_name | \n", "tactics_formation | \n", "tactics_lineup | \n", "related_events | \n", "location | \n", "player_id | \n", "player_name | \n", "position_id | \n", "position_name | \n", "pass_recipient_id | \n", "pass_recipient_name | \n", "pass_length | \n", "pass_angle | \n", "pass_height_id | \n", "pass_height_name | \n", "pass_end_location | \n", "pass_type_id | \n", "pass_type_name | \n", "pass_body_part_id | \n", "pass_body_part_name | \n", "carry_end_location | \n", "under_pressure | \n", "pass_outcome_id | \n", "pass_outcome_name | \n", "pass_aerial_won | \n", "... | \n", "substitution_outcome_id | \n", "substitution_outcome_name | \n", "substitution_replacement_id | \n", "substitution_replacement_name | \n", "shot_one_on_one | \n", "bad_behaviour_card_id | \n", "bad_behaviour_card_name | \n", "50_50_outcome_id | \n", "50_50_outcome_name | \n", "dribble_overrun | \n", "goalkeeper_punched_out | \n", "pass_miscommunication | \n", "block_deflection | \n", "pass_goal_assist | \n", "clearance_other | \n", "injury_stoppage_in_chain | \n", "shot_deflected | \n", "dribble_no_touch | \n", "pass_deflected | \n", "shot_saved_off_target | \n", "goalkeeper_shot_saved_off_target | \n", "ball_recovery_offensive | \n", "pass_straight | \n", "foul_committed_penalty | \n", "foul_won_penalty | \n", "block_save_block | \n", "shot_open_goal | \n", "goalkeeper_lost_out | \n", "goalkeeper_success_in_play | \n", "player_off_permanent | \n", "goalkeeper_shot_saved_to_post | \n", "shot_redirect | \n", "shot_saved_to_post | \n", "shot_follows_dribble | \n", "goalkeeper_success_out | \n", "half_start_late_video_start | \n", "goalkeeper_lost_in_play | \n", "goalkeeper_saved_to_post | \n", "pass_backheel | \n", "half_end_early_video_end | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "2f046b33-685c-4122-8af2-8ceadf56c83d | \n", "294 | \n", "1 | \n", "00:06:50.216 | \n", "6 | \n", "50 | \n", "12 | \n", "0.115400 | \n", "16 | \n", "Shot | \n", "217 | \n", "Barcelona | \n", "4 | \n", "From Throw In | \n", "217 | \n", "Barcelona | \n", "NaN | \n", "NaN | \n", "[58295c63-1ffa-4e27-9258-818ea90c6b04, f514442... | \n", "[104.4, 41.8] | \n", "5503.0 | \n", "Lionel Andrés Messi Cuccittini | \n", "17.0 | \n", "Right Wing | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
1 | \n", "855d47fc-9017-4508-8b41-0275dfb4d755 | \n", "962 | \n", "1 | \n", "00:22:27.038 | \n", "22 | \n", "27 | \n", "38 | \n", "2.046458 | \n", "16 | \n", "Shot | \n", "217 | \n", "Barcelona | \n", "2 | \n", "From Corner | \n", "217 | \n", "Barcelona | \n", "NaN | \n", "NaN | \n", "[aec80f5c-807e-47ac-8c33-092c92b222d1] | \n", "[110.8, 35.8] | \n", "5470.0 | \n", "Ivan Rakitić | \n", "10.0 | \n", "Center Defensive Midfield | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
2 | \n", "7c69fb86-c77d-463b-8f00-503e447492a4 | \n", "1153 | \n", "1 | \n", "00:27:08.522 | \n", "27 | \n", "8 | \n", "46 | \n", "0.804175 | \n", "16 | \n", "Shot | \n", "217 | \n", "Barcelona | \n", "2 | \n", "From Corner | \n", "217 | \n", "Barcelona | \n", "NaN | \n", "NaN | \n", "[350f13e2-16cc-449d-a72d-f7ccd571fc50, 662299b... | \n", "[109.9, 40.5] | \n", "5492.0 | \n", "Samuel Yves Umtiti | \n", "5.0 | \n", "Left Center Back | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "True | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
3 | \n", "77ea8775-f9f4-4bf7-b3f9-7635ab861ab5 | \n", "1254 | \n", "1 | \n", "00:30:13.151 | \n", "30 | \n", "13 | \n", "59 | \n", "0.380900 | \n", "16 | \n", "Shot | \n", "217 | \n", "Barcelona | \n", "3 | \n", "From Free Kick | \n", "217 | \n", "Barcelona | \n", "NaN | \n", "NaN | \n", "[30b9d0e1-5eeb-4cb0-86ea-a6e8967893e2, ae620c7... | \n", "[90.0, 36.2] | \n", "5503.0 | \n", "Lionel Andrés Messi Cuccittini | \n", "17.0 | \n", "Right Wing | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
4 | \n", "09c2667a-4827-4871-a70a-96adb1d73243 | \n", "1381 | \n", "1 | \n", "00:33:19.875 | \n", "33 | \n", "19 | \n", "63 | \n", "0.222600 | \n", "16 | \n", "Shot | \n", "217 | \n", "Barcelona | \n", "4 | \n", "From Throw In | \n", "217 | \n", "Barcelona | \n", "NaN | \n", "NaN | \n", "[19491e5f-dd7c-47a8-994d-b6aae0630b55, a81b342... | \n", "[97.3, 28.8] | \n", "6998.0 | \n", "Rafael Alcântara do Nascimento | \n", "15.0 | \n", "Left Center Midfield | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
5 rows × 147 columns
\n", "\n", " | location | \n", "Distance | \n", "Angle | \n", "UnderPressure | \n", "ShotType | \n", "ShotBodyPart | \n", "ShotTechnique | \n", "ShotFirstTime | \n", "ShotOneonOne | \n", "isGoal | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "[104.4, 41.8] | \n", "16.198841 | \n", "28.422114 | \n", "0 | \n", "3 | \n", "4 | \n", "5 | \n", "1 | \n", "0 | \n", "False | \n", "
1 | \n", "[110.8, 35.8] | \n", "10.763067 | \n", "40.465393 | \n", "0 | \n", "3 | \n", "1 | \n", "5 | \n", "0 | \n", "0 | \n", "False | \n", "
2 | \n", "[109.9, 40.5] | \n", "10.873186 | \n", "43.128076 | \n", "1 | \n", "3 | \n", "1 | \n", "5 | \n", "0 | \n", "0 | \n", "False | \n", "
3 | \n", "[90.0, 36.2] | \n", "30.499043 | \n", "14.956182 | \n", "0 | \n", "2 | \n", "2 | \n", "5 | \n", "0 | \n", "0 | \n", "False | \n", "
4 | \n", "[97.3, 28.8] | \n", "25.566766 | \n", "16.208386 | \n", "0 | \n", "3 | \n", "4 | \n", "5 | \n", "0 | \n", "0 | \n", "False | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
12952 | \n", "[111.0, 27.0] | \n", "15.981653 | \n", "17.102729 | \n", "0 | \n", "3 | \n", "2 | \n", "5 | \n", "0 | \n", "0 | \n", "False | \n", "
12953 | \n", "[114.0, 33.0] | \n", "9.619084 | \n", "34.824489 | \n", "0 | \n", "3 | \n", "2 | \n", "5 | \n", "0 | \n", "0 | \n", "True | \n", "
12954 | \n", "[107.0, 32.0] | \n", "15.646638 | \n", "25.606661 | \n", "0 | \n", "3 | \n", "2 | \n", "5 | \n", "0 | \n", "0 | \n", "False | \n", "
12955 | \n", "[97.0, 22.0] | \n", "29.376742 | \n", "12.398277 | \n", "0 | \n", "3 | \n", "4 | \n", "5 | \n", "0 | \n", "0 | \n", "False | \n", "
12956 | \n", "[109.0, 52.0] | \n", "16.508979 | \n", "19.464104 | \n", "0 | \n", "3 | \n", "4 | \n", "5 | \n", "0 | \n", "0 | \n", "False | \n", "
12957 rows × 10 columns
\n", "\n", " | location | \n", "Distance | \n", "Angle | \n", "UnderPressure | \n", "ShotType | \n", "ShotBodyPart | \n", "ShotTechnique | \n", "ShotFirstTime | \n", "ShotOneonOne | \n", "xG | \n", "
---|---|---|---|---|---|---|---|---|---|---|
2961 | \n", "[92.2, 58.9] | \n", "33.779300 | \n", "11.289656 | \n", "0 | \n", "3 | \n", "4 | \n", "5 | \n", "0 | \n", "0 | \n", "0.035840 | \n", "
10990 | \n", "[115.2, 45.8] | \n", "8.019390 | \n", "43.348531 | \n", "0 | \n", "3 | \n", "1 | \n", "5 | \n", "0 | \n", "1 | \n", "0.340789 | \n", "
12649 | \n", "[115.0, 41.0] | \n", "6.451010 | \n", "75.963757 | \n", "0 | \n", "3 | \n", "1 | \n", "5 | \n", "0 | \n", "0 | \n", "0.453033 | \n", "
5264 | \n", "[111.9, 32.5] | \n", "11.445052 | \n", "31.472019 | \n", "1 | \n", "3 | \n", "1 | \n", "5 | \n", "0 | \n", "0 | \n", "0.082841 | \n", "
9283 | \n", "[111.6, 37.0] | \n", "9.696832 | \n", "46.594546 | \n", "1 | \n", "3 | \n", "1 | \n", "5 | \n", "0 | \n", "0 | \n", "0.135449 | \n", "
\n", " | location | \n", "Distance | \n", "Angle | \n", "UnderPressure | \n", "ShotType | \n", "ShotBodyPart | \n", "ShotTechnique | \n", "ShotFirstTime | \n", "ShotOneonOne | \n", "xG | \n", "
---|---|---|---|---|---|---|---|---|---|---|
4624 | \n", "[119.3, 41.4] | \n", "4.068882 | \n", "157.545469 | \n", "0 | \n", "3 | \n", "4 | \n", "5 | \n", "1 | \n", "0 | \n", "0.955584 | \n", "
3818 | \n", "[119.1, 42.6] | \n", "4.162706 | \n", "139.499608 | \n", "0 | \n", "3 | \n", "4 | \n", "5 | \n", "1 | \n", "0 | \n", "0.927110 | \n", "
6952 | \n", "[118.4, 39.4] | \n", "4.313989 | \n", "135.619868 | \n", "0 | \n", "3 | \n", "4 | \n", "5 | \n", "1 | \n", "0 | \n", "0.918449 | \n", "
8311 | \n", "[119.2, 37.0] | \n", "4.163095 | \n", "134.820390 | \n", "0 | \n", "3 | \n", "1 | \n", "5 | \n", "0 | \n", "1 | \n", "0.902713 | \n", "
12822 | \n", "[119.0, 43.0] | \n", "4.242641 | \n", "126.869898 | \n", "0 | \n", "3 | \n", "4 | \n", "5 | \n", "1 | \n", "0 | \n", "0.897920 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
3937 | \n", "[69.2, 77.6] | \n", "63.283107 | \n", "5.832625 | \n", "0 | \n", "3 | \n", "4 | \n", "4 | \n", "0 | \n", "0 | \n", "0.004453 | \n", "
2834 | \n", "[81.1, 5.9] | \n", "51.817864 | \n", "6.672792 | \n", "0 | \n", "2 | \n", "4 | \n", "5 | \n", "0 | \n", "0 | \n", "0.003497 | \n", "
6385 | \n", "[57.2, 34.0] | \n", "63.211517 | \n", "7.223482 | \n", "0 | \n", "3 | \n", "2 | \n", "5 | \n", "0 | \n", "0 | \n", "0.003241 | \n", "
11776 | \n", "[62.0, 36.0] | \n", "58.274562 | \n", "7.853313 | \n", "1 | \n", "3 | \n", "2 | \n", "5 | \n", "0 | \n", "0 | \n", "0.002498 | \n", "
6659 | \n", "[51.9, 43.4] | \n", "68.301760 | \n", "6.706436 | \n", "0 | \n", "3 | \n", "2 | \n", "4 | \n", "0 | \n", "0 | \n", "0.002257 | \n", "
2592 rows × 10 columns
\n", "