2020-04-25 16:52:24 +02:00
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
2020-04-25 17:13:12 +02:00
"name": "xGStatsbomb.ipynb",
2020-04-25 16:52:24 +02:00
"provenance": [],
2020-05-13 14:17:39 +02:00
"collapsed_sections": [],
"authorship_tag": "ABX9TyNsyQ1grvKucvpzQ+6OWDaR",
2020-04-25 16:52:24 +02:00
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
2020-04-25 17:13:12 +02:00
"<a href=\"https://colab.research.google.com/github/koushikkirugulige/Football-Analytics/blob/master/xGStatsbomb.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
2020-04-25 16:52:24 +02:00
]
},
{
"cell_type": "code",
"metadata": {
"id": "g37QpaaPZHA5",
"colab_type": "code",
2020-05-13 14:17:39 +02:00
"outputId": "e4d0b0c8-f58b-4e46-a41e-65d2743d1e50",
2020-04-25 16:52:24 +02:00
"colab": {
"base_uri": "https://localhost:8080/",
"height": 187
2020-05-13 14:17:39 +02:00
}
2020-04-25 16:52:24 +02:00
},
"source": [
"%%time\n",
"!git clone https://github.com/statsbomb/open-data.git"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
"Cloning into 'open-data'...\n",
2020-05-13 14:17:39 +02:00
"remote: Enumerating objects: 1133, done.\u001b[K\n",
"remote: Counting objects: 100% (1133/1133), done.\u001b[K\n",
"remote: Compressing objects: 100% (632/632), done.\u001b[K\n",
"remote: Total 9855 (delta 925), reused 690 (delta 482), pack-reused 8722\u001b[K\n",
"Receiving objects: 100% (9855/9855), 996.16 MiB | 25.43 MiB/s, done.\n",
"Resolving deltas: 100% (8672/8672), done.\n",
2020-04-25 16:52:24 +02:00
"Checking out files: 100% (1648/1648), done.\n",
2020-05-13 14:17:39 +02:00
"CPU times: user 478 ms, sys: 94.4 ms, total: 572 ms\n",
"Wall time: 2min 17s\n"
2020-04-25 16:52:24 +02:00
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "nd6vcG3uZNJb",
"colab_type": "code",
"colab": {}
},
"source": [
"#import all modules\n",
"import json\n",
"import os\n",
"import codecs\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib.patches import Arc, Rectangle, ConnectionPatch\n",
"from matplotlib.offsetbox import OffsetImage\n",
"from matplotlib.patches import Ellipse\n",
"from functools import reduce\n",
"import math"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "5NMxa9NNZR5m",
"colab_type": "code",
2020-05-13 14:17:39 +02:00
"outputId": "163c47f0-dae4-4337-af58-3d2696232bf7",
2020-04-25 16:52:24 +02:00
"colab": {
"base_uri": "https://localhost:8080/",
"height": 68
2020-05-13 14:17:39 +02:00
}
2020-04-25 16:52:24 +02:00
},
"source": [
"%%time\n",
"comp = ['FIFA World Cup','La Liga']\n",
"main_df = pd.DataFrame(data=None)\n",
"path_match = \"/content/open-data/data/events/\" #location for play by play events\n",
"for root, dirs, files in os.walk('/content/open-data/data/matches/'):\n",
" for file in files:\n",
" with open(os.path.join(root, file), \"r\") as auto:\n",
" with codecs.open(root + str('/') + file,encoding='utf-8') as data_file:\n",
" data = json.load(data_file)\n",
" df = pd.DataFrame(data=None)\n",
" df = pd.json_normalize(data, sep = \"_\")\n",
" #for x in df.competition_country_name:\n",
" # if x == 'Spain':\n",
" # print(df.match_id)\n",
" #print(df['competition_competition_name'])\n",
" for i in range(len(df)):\n",
" if df.iloc[i]['competition_competition_name'] in comp :\n",
" match_no = df.iloc[i]['match_id'] #gets match with Spain as country\n",
" match_no = str(match_no) # from int to str \n",
" #print('match list \\n',match_no)\n",
" with codecs.open(path_match + match_no + str(r'.json'),encoding=\"utf8\") as event_file: #open the respective file\n",
" df_match = json.load(event_file)\n",
" df_match2 = pd.DataFrame(data=None)\n",
" df_match2 = pd.json_normalize(df_match,sep=\"_\") \n",
" df_match2 = df_match2[(df_match2['type_name'] == \"Shot\")]\n",
" main_df = main_df.append(df_match2,ignore_index=True,sort=False) \n",
"#print('total matches ',len(match_no)) \n",
"print('Done')"
],
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"text": [
"Done\n",
2020-05-13 14:17:39 +02:00
"CPU times: user 6min 28s, sys: 1.27 s, total: 6min 29s\n",
"Wall time: 6min 30s\n"
2020-04-25 16:52:24 +02:00
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "waQI6t6OVM33",
"colab_type": "code",
2020-05-13 14:17:39 +02:00
"outputId": "16372efd-c12f-48ec-f02c-95279925490b",
2020-04-25 16:52:24 +02:00
"colab": {
"base_uri": "https://localhost:8080/",
"height": 508
2020-05-13 14:17:39 +02:00
}
2020-04-25 16:52:24 +02:00
},
"source": [
"main_df.head()"
],
2020-05-13 14:17:39 +02:00
"execution_count": 0,
2020-04-25 16:52:24 +02:00
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>index</th>\n",
" <th>period</th>\n",
" <th>timestamp</th>\n",
" <th>minute</th>\n",
" <th>second</th>\n",
" <th>possession</th>\n",
" <th>duration</th>\n",
" <th>type_id</th>\n",
" <th>type_name</th>\n",
" <th>possession_team_id</th>\n",
" <th>possession_team_name</th>\n",
" <th>play_pattern_id</th>\n",
" <th>play_pattern_name</th>\n",
" <th>team_id</th>\n",
" <th>team_name</th>\n",
" <th>tactics_formation</th>\n",
" <th>tactics_lineup</th>\n",
" <th>related_events</th>\n",
" <th>location</th>\n",
" <th>player_id</th>\n",
" <th>player_name</th>\n",
" <th>position_id</th>\n",
" <th>position_name</th>\n",
" <th>pass_recipient_id</th>\n",
" <th>pass_recipient_name</th>\n",
" <th>pass_length</th>\n",
" <th>pass_angle</th>\n",
" <th>pass_height_id</th>\n",
" <th>pass_height_name</th>\n",
" <th>pass_end_location</th>\n",
" <th>pass_type_id</th>\n",
" <th>pass_type_name</th>\n",
" <th>pass_body_part_id</th>\n",
" <th>pass_body_part_name</th>\n",
" <th>carry_end_location</th>\n",
" <th>under_pressure</th>\n",
" <th>pass_outcome_id</th>\n",
" <th>pass_outcome_name</th>\n",
2020-05-13 14:17:39 +02:00
" <th>ball_receipt_outcome_id</th>\n",
2020-04-25 16:52:24 +02:00
" <th>...</th>\n",
2020-05-13 14:17:39 +02:00
" <th>pass_deflected</th>\n",
" <th>block_deflection</th>\n",
2020-04-25 16:52:24 +02:00
" <th>substitution_outcome_id</th>\n",
" <th>substitution_outcome_name</th>\n",
" <th>substitution_replacement_id</th>\n",
" <th>substitution_replacement_name</th>\n",
2020-05-13 14:17:39 +02:00
" <th>ball_recovery_recovery_failure</th>\n",
" <th>dribble_overrun</th>\n",
2020-04-25 16:52:24 +02:00
" <th>50_50_outcome_id</th>\n",
" <th>50_50_outcome_name</th>\n",
2020-05-13 14:17:39 +02:00
" <th>shot_aerial_won</th>\n",
" <th>shot_open_goal</th>\n",
" <th>bad_behaviour_card_id</th>\n",
" <th>bad_behaviour_card_name</th>\n",
" <th>pass_no_touch</th>\n",
" <th>block_offensive</th>\n",
" <th>foul_committed_offensive</th>\n",
" <th>shot_saved_off_target</th>\n",
" <th>goalkeeper_shot_saved_off_target</th>\n",
" <th>miscontrol_aerial_won</th>\n",
2020-04-25 16:52:24 +02:00
" <th>goalkeeper_punched_out</th>\n",
" <th>clearance_other</th>\n",
2020-05-13 14:17:39 +02:00
" <th>ball_recovery_offensive</th>\n",
2020-04-25 16:52:24 +02:00
" <th>shot_deflected</th>\n",
" <th>dribble_no_touch</th>\n",
" <th>shot_redirect</th>\n",
2020-05-13 14:17:39 +02:00
" <th>block_save_block</th>\n",
" <th>injury_stoppage_in_chain</th>\n",
2020-04-25 16:52:24 +02:00
" <th>half_start_late_video_start</th>\n",
2020-05-13 14:17:39 +02:00
" <th>player_off_permanent</th>\n",
" <th>goalkeeper_lost_out</th>\n",
2020-04-25 16:52:24 +02:00
" <th>goalkeeper_saved_to_post</th>\n",
2020-05-13 14:17:39 +02:00
" <th>shot_follows_dribble</th>\n",
" <th>shot_saved_to_post</th>\n",
" <th>goalkeeper_shot_saved_to_post</th>\n",
2020-04-25 16:52:24 +02:00
" <th>pass_backheel</th>\n",
2020-05-13 14:17:39 +02:00
" <th>goalkeeper_lost_in_play</th>\n",
" <th>goalkeeper_success_out</th>\n",
" <th>goalkeeper_success_in_play</th>\n",
2020-04-25 16:52:24 +02:00
" <th>half_end_early_video_end</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
2020-05-13 14:17:39 +02:00
" <td>fb785612-71d3-44df-aae4-da6e005756de</td>\n",
" <td>117</td>\n",
2020-04-25 16:52:24 +02:00
" <td>1</td>\n",
2020-05-13 14:17:39 +02:00
" <td>00:02:06.532</td>\n",
" <td>2</td>\n",
2020-04-25 16:52:24 +02:00
" <td>6</td>\n",
2020-05-13 14:17:39 +02:00
" <td>6</td>\n",
" <td>1.015179</td>\n",
2020-04-25 16:52:24 +02:00
" <td>16</td>\n",
" <td>Shot</td>\n",
2020-05-13 14:17:39 +02:00
" <td>222</td>\n",
" <td>Villarreal</td>\n",
" <td>1</td>\n",
" <td>Regular Play</td>\n",
" <td>222</td>\n",
" <td>Villarreal</td>\n",
2020-04-25 16:52:24 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2020-05-13 14:17:39 +02:00
" <td>[1febb4f7-0e2c-43f7-96fe-51fbffaaf664]</td>\n",
" <td>[107.5, 27.2]</td>\n",
" <td>11386.0</td>\n",
" <td>Santiago Cazorla González</td>\n",
" <td>16.0</td>\n",
" <td>Left Midfield</td>\n",
2020-04-25 16:52:24 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2020-05-13 14:17:39 +02:00
" <td>2dfa84fe-3579-4705-8d27-44b8917907e1</td>\n",
" <td>499</td>\n",
2020-04-25 16:52:24 +02:00
" <td>1</td>\n",
2020-05-13 14:17:39 +02:00
" <td>00:09:49.110</td>\n",
" <td>9</td>\n",
" <td>49</td>\n",
" <td>17</td>\n",
" <td>1.302674</td>\n",
2020-04-25 16:52:24 +02:00
" <td>16</td>\n",
" <td>Shot</td>\n",
2020-05-13 14:17:39 +02:00
" <td>222</td>\n",
" <td>Villarreal</td>\n",
" <td>4</td>\n",
" <td>From Throw In</td>\n",
" <td>222</td>\n",
" <td>Villarreal</td>\n",
2020-04-25 16:52:24 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2020-05-13 14:17:39 +02:00
" <td>[1af68944-ff3d-49a1-92a0-fe6198e73e78]</td>\n",
" <td>[88.3, 50.2]</td>\n",
" <td>25921.0</td>\n",
" <td>Rubén Gracia Calmache</td>\n",
" <td>12.0</td>\n",
" <td>Right Midfield</td>\n",
2020-04-25 16:52:24 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2020-05-13 14:17:39 +02:00
" <td>c9d92f30-2159-4a5a-a5bf-9d1163e4b33f</td>\n",
" <td>587</td>\n",
" <td>1</td>\n",
" <td>00:12:58.407</td>\n",
" <td>12</td>\n",
" <td>58</td>\n",
" <td>24</td>\n",
" <td>0.521000</td>\n",
2020-04-25 16:52:24 +02:00
" <td>16</td>\n",
" <td>Shot</td>\n",
2020-05-13 14:17:39 +02:00
" <td>222</td>\n",
" <td>Villarreal</td>\n",
" <td>5</td>\n",
" <td>Other</td>\n",
" <td>222</td>\n",
" <td>Villarreal</td>\n",
" <td>NaN</td>\n",
2020-04-25 16:52:24 +02:00
" <td>NaN</td>\n",
2020-05-13 14:17:39 +02:00
" <td>[61239f9b-052f-42ab-8a73-3c3c3841d419]</td>\n",
" <td>[108.0, 40.0]</td>\n",
" <td>20135.0</td>\n",
" <td>Marcos Antonio Senna da Silva</td>\n",
" <td>15.0</td>\n",
" <td>Left Center Midfield</td>\n",
2020-04-25 16:52:24 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2020-05-13 14:17:39 +02:00
" <td>26b5d67b-5fce-4a5a-8b31-b879adbb61d3</td>\n",
" <td>1113</td>\n",
" <td>1</td>\n",
" <td>00:24:01.266</td>\n",
" <td>24</td>\n",
" <td>1</td>\n",
" <td>42</td>\n",
" <td>0.647279</td>\n",
2020-04-25 16:52:24 +02:00
" <td>16</td>\n",
" <td>Shot</td>\n",
" <td>217</td>\n",
" <td>Barcelona</td>\n",
2020-05-13 14:17:39 +02:00
" <td>7</td>\n",
" <td>From Goal Kick</td>\n",
2020-04-25 16:52:24 +02:00
" <td>217</td>\n",
" <td>Barcelona</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2020-05-13 14:17:39 +02:00
" <td>[1d8506ef-3a55-45c7-aba9-e31204db051e, 354b60e...</td>\n",
" <td>[107.3, 36.1]</td>\n",
" <td>4913.0</td>\n",
" <td>Bojan Krkíc Pérez</td>\n",
" <td>23.0</td>\n",
" <td>Center Forward</td>\n",
2020-04-25 16:52:24 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2020-05-13 14:17:39 +02:00
" <td>True</td>\n",
2020-04-25 16:52:24 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2020-05-13 14:17:39 +02:00
" <td>20528610-c092-482c-8238-6b9679328680</td>\n",
" <td>1343</td>\n",
" <td>1</td>\n",
" <td>00:28:38.374</td>\n",
" <td>28</td>\n",
" <td>38</td>\n",
" <td>49</td>\n",
" <td>1.023005</td>\n",
2020-04-25 16:52:24 +02:00
" <td>16</td>\n",
" <td>Shot</td>\n",
" <td>217</td>\n",
" <td>Barcelona</td>\n",
2020-05-13 14:17:39 +02:00
" <td>1</td>\n",
" <td>Regular Play</td>\n",
2020-04-25 16:52:24 +02:00
" <td>217</td>\n",
" <td>Barcelona</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2020-05-13 14:17:39 +02:00
" <td>[06385378-493b-4bd8-8731-f983c4ac28d8]</td>\n",
" <td>[105.3, 29.4]</td>\n",
" <td>5503.0</td>\n",
" <td>Lionel Andrés Messi Cuccittini</td>\n",
" <td>17.0</td>\n",
" <td>Right Wing</td>\n",
2020-04-25 16:52:24 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 147 columns</p>\n",
"</div>"
],
"text/plain": [
" id ... half_end_early_video_end\n",
2020-05-13 14:17:39 +02:00
"0 fb785612-71d3-44df-aae4-da6e005756de ... NaN\n",
"1 2dfa84fe-3579-4705-8d27-44b8917907e1 ... NaN\n",
"2 c9d92f30-2159-4a5a-a5bf-9d1163e4b33f ... NaN\n",
"3 26b5d67b-5fce-4a5a-8b31-b879adbb61d3 ... NaN\n",
"4 20528610-c092-482c-8238-6b9679328680 ... NaN\n",
2020-04-25 16:52:24 +02:00
"\n",
"[5 rows x 147 columns]"
]
},
"metadata": {
"tags": []
},
2020-05-13 14:17:39 +02:00
"execution_count": 4
2020-04-25 16:52:24 +02:00
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "WPZPi5M7ZI5o",
"colab_type": "code",
"colab": {}
},
"source": [
2020-04-25 17:11:18 +02:00
"\"\"\"Distance of shot location to centre of goal\"\"\"\n",
2020-04-25 16:52:24 +02:00
"def distFormula(coordinate):\n",
" a =(math.sqrt(((coordinate.location[0] - 120)**2) + ((coordinate.location[1] - 36)**2))) \n",
" b =(math.sqrt(((coordinate.location[0] - 120)**2) + ((coordinate.location[1] - 44)**2))) \n",
" return ((a+b)/2)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "ZRX43XViZshf",
"colab_type": "code",
"colab": {}
},
"source": [
2020-04-25 17:11:18 +02:00
"\"\"\" near x y (nx,ny) (120,44)and far x y (fx,fy) (120,36)\"\"\" \n",
2020-04-25 16:52:24 +02:00
"nx = 120\n",
"ny = 44\n",
"fx = 120\n",
"fy = 36\n",
"\n",
"goalpostLength = 8\n",
"def shot_angle(points):\n",
" len1 = (math.sqrt(((points.location[0] - nx)**2) + ((points.location[1] - ny)**2))) \n",
" len2 = (math.sqrt(((points.location[0] - fx)**2) + ((points.location[1] - fy)**2)))\n",
" ang = (len1**2 + len2**2 - goalpostLength**2)/(2 * len1 * len2)\n",
" if ang > 1:\n",
" ang = 1\n",
" elif ang < -1:\n",
" ang = -1 \n",
" angRad = math.acos(ang)\n",
" return( (angRad * 180)/math.pi) "
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Skzv7m2GcOek",
"colab_type": "code",
"colab": {}
},
"source": [
"\"\"\"If shot was taken under Pressure?\"\"\"\n",
"def under_pressure(coordinate):\n",
" if coordinate['under_pressure'] == True:\n",
" return 1\n",
" return 0"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "VRBmlZBtdbCf",
"colab_type": "code",
"colab": {}
},
"source": [
"\"\"\"The Shot type Id\"\"\"\n",
"def shot_type(coordinate):\n",
" if coordinate['shot_type_id'] == 61:\n",
" return 1\n",
" if coordinate['shot_type_id'] == 62:\n",
" return 2\n",
" if coordinate['shot_type_id'] == 87:\n",
" return 3\n",
" if coordinate['shot_type_id'] == 88:\n",
" return 4\n",
" return 5"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "SfSB2laheYY4",
"colab_type": "code",
"colab": {}
},
"source": [
"\"\"\"The Shot Body part\"\"\"\n",
"def shot_body_part(coordinate):\n",
" if coordinate['shot_body_part_id'] == 37:\n",
" return 1\n",
" if coordinate['shot_body_part_id'] == 38:\n",
" return 2\n",
" if coordinate['shot_body_part_id'] == 70:\n",
" return 3\n",
" return 4"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "XshqSKelezC2",
"colab_type": "code",
"colab": {}
},
"source": [
"\"\"\"The Shot Technique Id\"\"\"\n",
"def shot_technique(coordinate):\n",
" if coordinate['shot_technique_id'] == 89:\n",
" return 1\n",
" if coordinate['shot_technique_id'] == 90:\n",
" return 2\n",
" if coordinate['shot_technique_id'] == 91:\n",
" return 3\n",
" if coordinate['shot_technique_id'] == 92:\n",
" return 4\n",
" if coordinate['shot_technique_id'] == 93:\n",
" return 5\n",
" if coordinate['shot_technique_id'] == 94:\n",
" return 6\n",
" return 7"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "7DLY4vXtffsL",
"colab_type": "code",
"colab": {}
},
"source": [
"\"\"\"If shot was taken first time?\"\"\"\n",
"def shot_first_time(coordinate):\n",
" if coordinate['shot_first_time'] == True:\n",
" return 1\n",
" return 0"
],
"execution_count": 0,
"outputs": []
},
2020-05-13 14:17:39 +02:00
{
"cell_type": "code",
"metadata": {
"id": "7PkUfTrOwjmd",
"colab_type": "code",
"colab": {}
},
"source": [
"\"\"\" To Find if a point is inside the triangle\n",
"https://www.geeksforgeeks.org/check-whether-a-given-point-lies-inside-a-triangle-or-not/\"\"\"\n",
"def Triarea(a,b,c):\n",
"#return abs((x1*(y2-y3) + x2*(y3-y1)+ x3*(y1-y2))/2.0); \n",
" return abs((a[0] * (b[1] - c[1]) + b[0] * (c[1] - a[1]) + c[0] * (a[1] - b[1]))/2.0)\n",
"\n",
"def isInside(a,b,c,p):\n",
" A = Triarea(a,b,c)\n",
"\n",
" A1 = Triarea(a,b,p)\n",
"\n",
" A2 = Triarea(p,b,c)\n",
"\n",
" A3 = Triarea(a,p,c) \n",
" \n",
" if (round(A,2) == round((A1 + A2 + A3),2)):\n",
" return 1\n",
" return 0 "
],
"execution_count": 0,
"outputs": []
},
2020-04-25 16:52:24 +02:00
{
"cell_type": "code",
"metadata": {
"id": "U0pQQZPDf4oV",
"colab_type": "code",
"colab": {}
},
"source": [
"\"\"\"If shot was taken first time?\"\"\"\n",
"def shot_one_on_one(coordinate):\n",
" if coordinate['shot_one_on_one'] == True:\n",
" return 1\n",
" return 0"
],
"execution_count": 0,
"outputs": []
},
2020-05-13 14:17:39 +02:00
{
"cell_type": "code",
"metadata": {
"id": "C30AcDZJ9DY1",
"colab_type": "code",
"colab": {}
},
"source": [
"def infronofShot(frame):\n",
" if str(type(frame['shot_freeze_frame'])) == '<class \\'float\\'>':\n",
" return 0\n",
" if not len(frame['shot_freeze_frame']):\n",
" return 0\n",
" #print(type(frame['shot_freeze_frame']),'\\n')\n",
" loc = pd.DataFrame(frame['shot_freeze_frame'])\n",
" \n",
" #loc = loc[['location']]\n",
" X = frame['location'][0]\n",
" Y = frame['location'][1]\n",
" countgoal = 0\n",
" \n",
" for i in range(len(loc)):\n",
" \n",
" if isInside((X,Y),(120,36),(120,44),(loc['location'].iloc[i][0],loc['location'].iloc[i][1])) == 1:\n",
" countgoal +=1\n",
" return countgoal "
],
"execution_count": 0,
"outputs": []
},
2020-04-25 16:52:24 +02:00
{
"cell_type": "code",
"metadata": {
"id": "J2Qd_aEHZaja",
"colab_type": "code",
"colab": {}
},
"source": [
"main_df['Distance'] = main_df.apply(distFormula,axis = 1)\n",
"main_df['Angle'] = main_df.apply(shot_angle,axis = 1)\n",
"main_df['UnderPressure'] = main_df.apply(under_pressure,axis = 1)\n",
"main_df['ShotType'] = main_df.apply(shot_type,axis = 1)\n",
"main_df['ShotBodyPart'] = main_df.apply(shot_body_part,axis = 1)\n",
"main_df['ShotTechnique'] = main_df.apply(shot_technique,axis = 1)\n",
"main_df['ShotFirstTime'] = main_df.apply(shot_first_time,axis = 1)\n",
2020-05-13 14:17:39 +02:00
"main_df['ShotOneonOne']= main_df.apply(shot_one_on_one,axis = 1)\n",
"main_df['InFrontofGoal'] = main_df.apply(infronofShot,axis = 1)"
2020-04-25 16:52:24 +02:00
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "VbP_YO_tg694",
"colab_type": "code",
"colab": {}
},
"source": [
"goals_lst = main_df[main_df['shot_outcome_id'] == 97].index.tolist()"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "e4X-RpdOguqH",
"colab_type": "code",
"colab": {}
},
"source": [
"#if shot is a goal \n",
"main_df['isGoal'] = False\n",
"goals_lst\n",
"main_df.loc[main_df.index.isin(goals_lst),'isGoal'] = True"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
2020-05-13 14:17:39 +02:00
"id": "5Y8BHzgGp2iS",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 401
},
"outputId": "2b549b10-f7a9-49d0-a290-361596e56c41"
},
"source": [
"import statsbombpitch as sb\n",
"sb.sb_pitch(\"#195905\",\"#faf0e6\",\"horizontal\",\"full\")\n",
"ilocv = 3477\n",
"plt.scatter(main_df.iloc[ilocv]['location'][0],main_df.iloc[ilocv]['location'][1],color=\"#ee3e32\",edgecolors=\"none\",zorder=10,alpha=1,s = 40 )\n",
"#plt.plt.plot((main_df.iloc[0]['location'][0],120),(main_df.iloc[0]['location'][1],44),color = 'black',zorder = 10)\n",
"#(main_df.iloc[1]['location'][1],main_df.iloc[1]['location'][0],120,zorder = 10)\n",
"#plt.plot((main_df.iloc[0]['location'][0],120),(main_df.iloc[0]['location'][1],44),color = 'black',zorder = 10)\n",
"tri = np.array([[main_df.iloc[ilocv]['location'][0],main_df.iloc[ilocv]['location'][1]],[120,36],[120,44]])\n",
"t1 = plt.Polygon(tri, color = 'blue',zorder = 8)\n",
"plt.gca().add_patch(t1)\n",
"loc = pd.DataFrame(main_df.iloc[ilocv]['shot_freeze_frame'])\n",
"for i in range(len(loc)):\n",
" plt.scatter(loc['location'].iloc[i][0],loc['location'].iloc[i][1],color=\"#ee3e32\",edgecolors=\"black\",zorder=10,alpha=1,s = 20 ) \n",
"plt.show()"
],
"execution_count": 233,
"outputs": [
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlIAAAGACAYAAABmwYzKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3deXxU5cH28WsyWYYkJAiERQKJKzABgWIsFlrABUF5GhWKwQVBC9YWRbRiW7Ha6kt9VLZWUVoV91BBJAoKqIAsBonKmqFhTQRDIKzZmITMzPsHwqMWJLmznDmT3/fz8Z8wM+dKnHPmmvvc5z6OQCAgAAAA1FyY1QEAAADsiiIFAABgiCIFAABgiCIFAABgiCIFAABgiCIFAABgKPzH/vHcmyMLJbVuoCwAAABBqeCtSsfpfn62ESlKFAAAwBlwag8AAMAQRQoAAMDQj86R+qHhfYYrLTWtvrIAwPckJyRLkvKK8izNAaBxyczOVMaqjGo9tkYjUmmpaXInuo1CAUBNRUdFKzoq2uoYABoRd6K7RoNGNRqRkiTPHo/Sp6bX9GkAUGOzx8+WJI45ABrMyeNOdTFHCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwBBFCgAAwFB4TR6cnJCs6KhozR4/u77yNFqZ2ZnKWJVhdQwAgE0M7zNcaalpVscIOe5Et8oryqv9eEakgoA70c3OAACokbTUNLkT3VbHaPRqNCKVV5QnSUqfml4fWRotRvgAACY8ezx8Jtexmn4mMyIFAABgiCIFAABgiCIFAABgiCIFAABgiCIFAABgiCIFAABgiCIFAABgiCIFAABgiCIFAABgiCIFAABgiCIFAABgiCIFAABgiCIFAABgiCIFAABgiCIFAABgiCIFAABgiCIFAABgiCIFAABgiCIFAABswecNqPKgXz5vwOoop4RbHQAAAOBsvPl+ebP9auuK1F5vpVypYXIlWT8eRJECAABBzecNyJvt15KuHZUSG62c0nINyM5VRGuHnC6Hpdmsr3IAAKDaTp7eOnL4qNVRGoyvLKC2rkilxEZLklJio9XGFSlfmfWn+ChSAADYhDffr5IFPsVnhWvE9b/V0sWrrI7UIJwxDu31ViqntFySlFNarkJvpZwx1o5GSZzaAwDAFk53emvwpOcVNTBg+emt+uZ0OeRKDdOA7Fy1cUWq8Ns5UsHwe1OkAACwgdOd3kqMcamwrCQoCkV9cyWFKaK1Q8VlVWoa4wya35lTewAA2MDpTm/tKfMGxemthuJ0ORTZIjhGok6iSAEAYAOnTm9tylXPdTkavCVP9/3p7qAqFY0RRQoAAJtwJYWp6WCnii+v0mvzZ+iKa/pYHanRY44UAAA24nSdWDup2TnxVkeBGJECAAAwRpECAAAwRJECAAAwRJECAAAwRJECAAAwRJECAAAwRJECAAAwRJECAAAwRJECAAAwRJECAAAwRJECAAAwRJECAAAwRJECAAAwRJECAAAwRJECAAAwRJECAAAwRJECAAAwRJECAAAwRJECAAD1xucNqPKgXz5vwOoo9SLc6gAAACA0efP98mb71dYVqb3eSrlSw+RKCq0xHIoUAACocz5vQN5sv5Z07aiU2GjllJZrQHauIlo75HQ5rI5XZ0KrFgIAgKDgKwuorStSKbHRkqSU2Gi1cUXKVxZap/hqNCKVnJCs6KhozR4/u77y/JfM7ExlrMposO0BAIDac8Y4tNdbqZzS8lMjUoXeSjWNcVb7NYb3Ga601LR6TPnf3IlulVeUV/vxQX1qz53oliSKFAAANuN0OeRKDdOA7Fy1cUWq8Ns5UjU5rZeWmiZ3oluePZ56TFo7NSpSeUV5kqT0qen1keW/NOTIFwAAqFuupDBFtHaouKxKTWOcRnOjPHs8DdY7pJp3j6AekQLQuDl04qDrcDgUCITWvAqgsXC6Qmty+Q9RpABYwuFwqEPLDkppn6KUxBQltkhUQlyCEuIS1Cq+lZrFNDv12LwZearyVelQ6SEVFRdp/9H9Kiou0o59O+TZ7VHOnhwdLDlo4W8DoLGiSAFoMJ3adVL/lP7q6+6rLh26qGmTppKk477jKjhUoP1H92t74XZlbc3S0fKjuuGyGyRJc9bMUWR4pFrEtjhVtjondtawnw079dr7juzTV7u+0rLNy7QsZ5n2H91vye8IoHGhSAGoVz3O66GhvYbqyq5Xqu05bSVJObtzNO/zecrZnaOc3TnatnebKqoq/uu5qRemSpKmL5x+2teOi46Tu537xKhW+xRdfvHlGtRj0KltLN6wWHM+m6OCwwX19NsBocfnDchXFpAzJrRPydUVihSAOhcfHa8bf3qj0nunq1O7TiqvKNfynOWa8v4ULfcsr7PRouLyYq3ZtkZrtq059bOO53ZU/y791T+lv+679j7dd+19WrFlhTJWZejjjR+ryl9VJ9sGQlFjWIm8rlGkANSZ5rHNddfVd2lE3xGKjorW+rz1euiNh7TgywUq9ZY2SIbcglzlFuTqhSUvKLF5oob9bJiG/WyYZt41U3sO7tE/PvyH5mbNpVABP9BYViKvaxQpALXWLKaZxlw1RiP7jZQr0qX3st/TzI9mass3WyzNtefQHk1ZMEXTP5iufin9NHbQWP3vrf+rsQPHnihUa+bK5/dZmhEIFmdaiby4rIoi9SMoUgBqZUivIZo4ZKKaRTfT+1++r+kLp2vHvh1Wx/oen9+nTzZ9ok82faL+Kf01fvB4PXXbU7q93+36w5t/0Mb8jVZHBCxXFyuRN0YUKQBGkhOSNenmSerdqbeyt2fr4YyHlVuQa3Wss1qWc+Kqvmt7XKtHhz2q+RPma9ayWZr8/uQa3RYCCDV1sRJ5Y0SRAlBjN/e5WY8Oe1QVxyv0xzf/qIzVGbZbMPODdR9o5ZaVmnD9BN3R/w5d0/0a/e5fv9OG/A1WRwMsUxcrkVvJiisOmYofREZfOdrqCMCPckW4NPn2yfrbLX/Tmq1rdNVfrtJbq96yXYk6qcRbokdmP6IhzwyRAtKcB+bolp/fYnUswFJOl0ORLew3EuXN96tkgU/xWeEqWeCTN9/fINulSAWRiUMn6vnRzysmKsbqKMB/ad+yvd6d8K5uvOxGTV0wVSOfG6n9xaGx6OVXu77SdX+7TllbszTp5kmafPtkRUVEWR0LQDV994rDL3qkaEnXjvJm++Xz1v+XPIpUEHli7hMa0G2AMu7LUPPY5lbHAU5JSUzRuw++q3PPOVejZozStIXTbDsKdSZHy49q5HMjNWXBFN142Y16/Z7XFdckzupYAKrhTFcc+sooUo3Kvz75l8bMHKOO53bUnAfmqHV8a6sjAbr0/Ev17/v/rcqqSt349I1anrPc6kj1JhAIaPrC6bp31r3qcV4PzR4/my81CFrtmrezOkLQ+O4Vh5JOXXHojKn/05MUqSDzyaZPdOs/blWbZm307/v/rTbN2lgdCY1Y6oWpe
"text/plain": [
"<Figure size 748.8x489.6 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "w-w3CxaPylta",
2020-04-25 16:52:24 +02:00
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
2020-05-13 14:17:39 +02:00
"height": 1000
2020-04-25 16:52:24 +02:00
},
2020-05-13 14:17:39 +02:00
"outputId": "0d4f68b2-291b-44e4-82a9-cec63d91df92"
2020-04-25 16:52:24 +02:00
},
"source": [
2020-05-13 14:17:39 +02:00
"sortxg[50:100]"
2020-04-25 16:52:24 +02:00
],
2020-05-13 14:17:39 +02:00
"execution_count": 237,
2020-04-25 16:52:24 +02:00
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>location</th>\n",
" <th>Distance</th>\n",
" <th>Angle</th>\n",
" <th>UnderPressure</th>\n",
" <th>ShotType</th>\n",
" <th>ShotBodyPart</th>\n",
" <th>ShotTechnique</th>\n",
" <th>ShotFirstTime</th>\n",
2020-05-13 14:17:39 +02:00
" <th>InFrontofGoal</th>\n",
2020-04-25 16:52:24 +02:00
" <th>ShotOneonOne</th>\n",
2020-05-13 14:17:39 +02:00
" <th>xG</th>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2020-05-13 14:17:39 +02:00
" <th>10082</th>\n",
" <td>[115.2, 43.6]</td>\n",
" <td>6.902760</td>\n",
" <td>62.487997</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
2020-05-13 14:17:39 +02:00
" <td>3</td>\n",
" <td>1</td>\n",
2020-04-25 16:52:24 +02:00
" <td>1</td>\n",
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>0.593055</td>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
" <tr>\n",
2020-05-13 14:17:39 +02:00
" <th>11940</th>\n",
" <td>[118.0, 45.0]</td>\n",
" <td>5.727806</td>\n",
" <td>50.906141</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
" <td>3</td>\n",
2020-05-13 14:17:39 +02:00
" <td>4</td>\n",
" <td>7</td>\n",
2020-04-25 16:52:24 +02:00
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>0.588634</td>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
" <tr>\n",
2020-05-13 14:17:39 +02:00
" <th>5946</th>\n",
" <td>[115.1, 37.6]</td>\n",
" <td>6.607503</td>\n",
" <td>70.644874</td>\n",
" <td>0</td>\n",
2020-04-25 16:52:24 +02:00
" <td>3</td>\n",
2020-05-13 14:17:39 +02:00
" <td>4</td>\n",
" <td>7</td>\n",
2020-04-25 16:52:24 +02:00
" <td>1</td>\n",
2020-05-13 14:17:39 +02:00
" <td>2</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>0.588151</td>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
" <tr>\n",
2020-05-13 14:17:39 +02:00
" <th>4032</th>\n",
" <td>[114.3, 42.8]</td>\n",
" <td>7.348970</td>\n",
" <td>61.917732</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>3</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>0.582283</td>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
" <tr>\n",
2020-05-13 14:17:39 +02:00
" <th>10411</th>\n",
" <td>[116.3, 40.1]</td>\n",
" <td>5.449276</td>\n",
" <td>94.443109</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
" <td>3</td>\n",
2020-05-13 14:17:39 +02:00
" <td>1</td>\n",
2020-04-25 16:52:24 +02:00
" <td>5</td>\n",
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>2</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>0.578671</td>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
" <tr>\n",
2020-05-13 14:17:39 +02:00
" <th>10059</th>\n",
" <td>[117.9, 44.7]</td>\n",
" <td>5.581727</td>\n",
" <td>57.994617</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.577080</td>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
" <tr>\n",
2020-05-13 14:17:39 +02:00
" <th>5721</th>\n",
" <td>[115.0, 40.0]</td>\n",
" <td>6.403124</td>\n",
" <td>77.319617</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
" <td>3</td>\n",
2020-05-13 14:17:39 +02:00
" <td>1</td>\n",
2020-04-25 16:52:24 +02:00
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>0</td>\n",
" <td>0.573219</td>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
" <tr>\n",
2020-05-13 14:17:39 +02:00
" <th>10216</th>\n",
" <td>[114.8, 38.0]</td>\n",
" <td>6.755564</td>\n",
" <td>70.123128</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
" <td>3</td>\n",
2020-05-13 14:17:39 +02:00
" <td>1</td>\n",
2020-04-25 16:52:24 +02:00
" <td>5</td>\n",
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0.563681</td>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
" <tr>\n",
2020-05-13 14:17:39 +02:00
" <th>4014</th>\n",
" <td>[112.3, 38.2]</td>\n",
" <td>8.824071</td>\n",
" <td>52.934164</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
" <td>3</td>\n",
2020-05-13 14:17:39 +02:00
" <td>4</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>0.561885</td>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
" <tr>\n",
2020-05-13 14:17:39 +02:00
" <th>8422</th>\n",
" <td>[113.1, 43.4]</td>\n",
" <td>8.521922</td>\n",
" <td>51.972274</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
2020-05-13 14:17:39 +02:00
" <td>1</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>0.557730</td>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
" <tr>\n",
2020-05-13 14:17:39 +02:00
" <th>6056</th>\n",
" <td>[114.0, 37.0]</td>\n",
" <td>7.651153</td>\n",
" <td>58.861028</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
2020-05-13 14:17:39 +02:00
" <td>6</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>0.552528</td>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
2020-05-13 14:17:39 +02:00
" <tr>\n",
" <th>1551</th>\n",
" <td>[114.4, 39.4]</td>\n",
" <td>6.899202</td>\n",
" <td>70.664392</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.549695</td>\n",
" </tr>\n",
" <tr>\n",
" <th>250</th>\n",
" <td>[113.1, 42.1]</td>\n",
" <td>8.183296</td>\n",
" <td>56.874096</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.548621</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3272</th>\n",
" <td>[113.6, 40.1]</td>\n",
" <td>7.547661</td>\n",
" <td>64.001725</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.542347</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9799</th>\n",
" <td>[113.2, 42.0]</td>\n",
" <td>8.078323</td>\n",
" <td>57.813206</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.538975</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10820</th>\n",
" <td>[113.6, 41.8]</td>\n",
" <td>7.702349</td>\n",
" <td>61.154851</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.533676</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5077</th>\n",
" <td>[114.1, 43.6]</td>\n",
" <td>7.767437</td>\n",
" <td>56.055770</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.528097</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12541</th>\n",
" <td>[113.0, 44.0]</td>\n",
" <td>8.815073</td>\n",
" <td>48.814075</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0.527048</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11843</th>\n",
" <td>[113.0, 38.0]</td>\n",
" <td>8.249827</td>\n",
" <td>56.546691</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.524932</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3227</th>\n",
" <td>[115.6, 41.8]</td>\n",
" <td>6.099730</td>\n",
" <td>79.380345</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.522069</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1710</th>\n",
" <td>[113.3, 39.1]</td>\n",
" <td>7.841507</td>\n",
" <td>61.008967</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.514230</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1796</th>\n",
" <td>[112.4, 39.7]</td>\n",
" <td>8.592467</td>\n",
" <td>55.459494</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.512211</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12751</th>\n",
" <td>[115.0, 42.0]</td>\n",
" <td>6.597707</td>\n",
" <td>71.995838</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.510614</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12635</th>\n",
" <td>[118.0, 36.0]</td>\n",
" <td>5.123106</td>\n",
" <td>75.963757</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.509163</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5094</th>\n",
" <td>[115.6, 43.6]</td>\n",
" <td>6.599972</td>\n",
" <td>65.125846</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.509059</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12870</th>\n",
" <td>[109.0, 41.0]</td>\n",
" <td>11.742400</td>\n",
" <td>39.699073</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.509021</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12363</th>\n",
" <td>[109.0, 41.0]</td>\n",
" <td>11.742400</td>\n",
" <td>39.699073</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.509021</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12622</th>\n",
" <td>[109.0, 41.0]</td>\n",
" <td>11.742400</td>\n",
" <td>39.699073</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.509021</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11356</th>\n",
" <td>[109.0, 41.0]</td>\n",
" <td>11.742400</td>\n",
" <td>39.699073</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.509021</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11390</th>\n",
" <td>[109.0, 41.0]</td>\n",
" <td>11.742400</td>\n",
" <td>39.699073</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.509021</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12918</th>\n",
" <td>[109.0, 41.0]</td>\n",
" <td>11.742400</td>\n",
" <td>39.699073</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.509021</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11495</th>\n",
" <td>[109.0, 41.0]</td>\n",
" <td>11.742400</td>\n",
" <td>39.699073</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.509021</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9319</th>\n",
" <td>[114.0, 42.8]</td>\n",
" <td>7.593725</td>\n",
" <td>59.886267</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.508912</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5479</th>\n",
" <td>[114.3, 42.5]</td>\n",
" <td>7.269647</td>\n",
" <td>63.495292</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0.503553</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9820</th>\n",
" <td>[116.5, 37.5]</td>\n",
" <td>5.595149</td>\n",
" <td>84.897835</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0.502442</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5385</th>\n",
" <td>[112.6, 42.2]</td>\n",
" <td>8.634894</td>\n",
" <td>53.628856</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.496973</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9100</th>\n",
" <td>[115.8, 44.3]</td>\n",
" <td>6.756425</td>\n",
" <td>59.073874</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0.495486</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1360</th>\n",
" <td>[113.7, 36.1]</td>\n",
" <td>8.202624</td>\n",
" <td>52.338128</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.493024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3664</th>\n",
" <td>[116.4, 43.2]</td>\n",
" <td>5.868831</td>\n",
" <td>75.963757</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.491836</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8527</th>\n",
" <td>[115.0, 43.3]</td>\n",
" <td>6.948463</td>\n",
" <td>63.561138</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.491444</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10394</th>\n",
" <td>[112.4, 38.0]</td>\n",
" <td>8.770864</td>\n",
" <td>53.033726</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.490224</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3910</th>\n",
" <td>[112.6, 39.5]</td>\n",
" <td>8.423397</td>\n",
" <td>56.617007</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.486898</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4346</th>\n",
" <td>[108.3, 40.3]</td>\n",
" <td>12.368128</td>\n",
" <td>37.728461</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.484093</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11026</th>\n",
" <td>[108.2, 40.1]</td>\n",
" <td>12.459894</td>\n",
" <td>37.449331</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.480520</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3793</th>\n",
" <td>[112.3, 44.5]</td>\n",
" <td>9.592653</td>\n",
" <td>44.111835</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0.479089</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6938</th>\n",
" <td>[112.2, 42.0]</td>\n",
" <td>8.946530</td>\n",
" <td>51.949987</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.478999</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7928</th>\n",
" <td>[113.3, 38.2]</td>\n",
" <td>7.956833</td>\n",
" <td>59.059829</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0.477739</td>\n",
" </tr>\n",
" <tr>\n",
" <th>554</th>\n",
" <td>[114.8, 36.7]</td>\n",
" <td>7.104802</td>\n",
" <td>62.203440</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.477342</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1305</th>\n",
" <td>[108.1, 40.0]</td>\n",
" <td>12.554282</td>\n",
" <td>37.158541</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.476817</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3704</th>\n",
" <td>[108.1, 40.1]</td>\n",
" <td>12.554640</td>\n",
" <td>37.156345</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.476794</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" location Distance ... ShotOneonOne xG\n",
"10082 [115.2, 43.6] 6.902760 ... 0 0.593055\n",
"11940 [118.0, 45.0] 5.727806 ... 0 0.588634\n",
"5946 [115.1, 37.6] 6.607503 ... 0 0.588151\n",
"4032 [114.3, 42.8] 7.348970 ... 0 0.582283\n",
"10411 [116.3, 40.1] 5.449276 ... 0 0.578671\n",
"10059 [117.9, 44.7] 5.581727 ... 0 0.577080\n",
"5721 [115.0, 40.0] 6.403124 ... 0 0.573219\n",
"10216 [114.8, 38.0] 6.755564 ... 1 0.563681\n",
"4014 [112.3, 38.2] 8.824071 ... 0 0.561885\n",
"8422 [113.1, 43.4] 8.521922 ... 0 0.557730\n",
"6056 [114.0, 37.0] 7.651153 ... 0 0.552528\n",
"1551 [114.4, 39.4] 6.899202 ... 0 0.549695\n",
"250 [113.1, 42.1] 8.183296 ... 0 0.548621\n",
"3272 [113.6, 40.1] 7.547661 ... 0 0.542347\n",
"9799 [113.2, 42.0] 8.078323 ... 0 0.538975\n",
"10820 [113.6, 41.8] 7.702349 ... 0 0.533676\n",
"5077 [114.1, 43.6] 7.767437 ... 0 0.528097\n",
"12541 [113.0, 44.0] 8.815073 ... 1 0.527048\n",
"11843 [113.0, 38.0] 8.249827 ... 0 0.524932\n",
"3227 [115.6, 41.8] 6.099730 ... 0 0.522069\n",
"1710 [113.3, 39.1] 7.841507 ... 0 0.514230\n",
"1796 [112.4, 39.7] 8.592467 ... 0 0.512211\n",
"12751 [115.0, 42.0] 6.597707 ... 0 0.510614\n",
"12635 [118.0, 36.0] 5.123106 ... 0 0.509163\n",
"5094 [115.6, 43.6] 6.599972 ... 0 0.509059\n",
"12870 [109.0, 41.0] 11.742400 ... 0 0.509021\n",
"12363 [109.0, 41.0] 11.742400 ... 0 0.509021\n",
"12622 [109.0, 41.0] 11.742400 ... 0 0.509021\n",
"11356 [109.0, 41.0] 11.742400 ... 0 0.509021\n",
"11390 [109.0, 41.0] 11.742400 ... 0 0.509021\n",
"12918 [109.0, 41.0] 11.742400 ... 0 0.509021\n",
"11495 [109.0, 41.0] 11.742400 ... 0 0.509021\n",
"9319 [114.0, 42.8] 7.593725 ... 0 0.508912\n",
"5479 [114.3, 42.5] 7.269647 ... 1 0.503553\n",
"9820 [116.5, 37.5] 5.595149 ... 0 0.502442\n",
"5385 [112.6, 42.2] 8.634894 ... 0 0.496973\n",
"9100 [115.8, 44.3] 6.756425 ... 0 0.495486\n",
"1360 [113.7, 36.1] 8.202624 ... 0 0.493024\n",
"3664 [116.4, 43.2] 5.868831 ... 0 0.491836\n",
"8527 [115.0, 43.3] 6.948463 ... 0 0.491444\n",
"10394 [112.4, 38.0] 8.770864 ... 0 0.490224\n",
"3910 [112.6, 39.5] 8.423397 ... 0 0.486898\n",
"4346 [108.3, 40.3] 12.368128 ... 0 0.484093\n",
"11026 [108.2, 40.1] 12.459894 ... 0 0.480520\n",
"3793 [112.3, 44.5] 9.592653 ... 1 0.479089\n",
"6938 [112.2, 42.0] 8.946530 ... 0 0.478999\n",
"7928 [113.3, 38.2] 7.956833 ... 0 0.477739\n",
"554 [114.8, 36.7] 7.104802 ... 0 0.477342\n",
"1305 [108.1, 40.0] 12.554282 ... 0 0.476817\n",
"3704 [108.1, 40.1] 12.554640 ... 0 0.476794\n",
"\n",
"[50 rows x 11 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 237
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Ybc3gad1S2Nm",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "afd2ff80-63d6-4f19-8063-c4e63998d027"
},
"source": [
"countgoal = 0\n",
"#isInside((X,Y),(120,36),(120,44),(loc['location'].iloc[i][0],loc['location'].iloc[i][1])) == 1:\n",
"for i in range(len(loc)):\n",
" if isInside((100.7,25.6),(120,36),(120,44),(loc['location'].iloc[i][0],loc['location'].iloc[i][1])) == 1:\n",
" countgoal +=1\n",
"countgoal "
],
"execution_count": 205,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"9"
]
},
"metadata": {
"tags": []
},
"execution_count": 205
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Mp8Ut4YUBZUQ",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
},
"outputId": "cdcaa1bd-7e6c-47a2-f8eb-2142672d80bd"
},
"source": [
"%%time\n",
"main_df['InFrontofGoal'] = main_df.apply(infronofShot,axis = 1)"
],
"execution_count": 182,
"outputs": [
{
"output_type": "stream",
"text": [
"CPU times: user 16.9 s, sys: 39.6 ms, total: 17 s\n",
"Wall time: 16.9 s\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "ddhlIVCHbZ-_",
"colab_type": "code",
"outputId": "439c3763-a163-4885-c410-03a900095f0f",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 609
}
},
"source": [
"main_df[['location','Distance','Angle','UnderPressure','ShotType','ShotBodyPart','ShotTechnique','ShotFirstTime','ShotOneonOne','InFrontofGoal','isGoal']]"
],
"execution_count": 207,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>location</th>\n",
" <th>Distance</th>\n",
" <th>Angle</th>\n",
" <th>UnderPressure</th>\n",
" <th>ShotType</th>\n",
" <th>ShotBodyPart</th>\n",
" <th>ShotTechnique</th>\n",
" <th>ShotFirstTime</th>\n",
" <th>ShotOneonOne</th>\n",
" <th>InFrontofGoal</th>\n",
" <th>isGoal</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>[110.5, 36.2]</td>\n",
" <td>10.896986</td>\n",
" <td>40.593846</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>[114.2, 48.0]</td>\n",
" <td>10.186866</td>\n",
" <td>29.611685</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>[95.3, 48.3]</td>\n",
" <td>26.332306</td>\n",
" <td>16.596593</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>[103.3, 61.6]</td>\n",
" <td>27.413807</td>\n",
" <td>10.378789</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>[94.9, 55.2]</td>\n",
" <td>29.543437</td>\n",
" <td>13.366677</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12952</th>\n",
" <td>[111.0, 27.0]</td>\n",
" <td>15.981653</td>\n",
" <td>17.102729</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12953</th>\n",
" <td>[114.0, 33.0]</td>\n",
" <td>9.619084</td>\n",
" <td>34.824489</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12954</th>\n",
" <td>[107.0, 32.0]</td>\n",
" <td>15.646638</td>\n",
" <td>25.606661</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12955</th>\n",
" <td>[97.0, 22.0]</td>\n",
" <td>29.376742</td>\n",
" <td>12.398277</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12956</th>\n",
" <td>[109.0, 52.0]</td>\n",
" <td>16.508979</td>\n",
" <td>19.464104</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>12957 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" location Distance Angle ... ShotOneonOne InFrontofGoal isGoal\n",
"0 [110.5, 36.2] 10.896986 40.593846 ... 0 1 False\n",
"1 [114.2, 48.0] 10.186866 29.611685 ... 0 2 False\n",
"2 [95.3, 48.3] 26.332306 16.596593 ... 0 4 False\n",
"3 [103.3, 61.6] 27.413807 10.378789 ... 0 1 False\n",
"4 [94.9, 55.2] 29.543437 13.366677 ... 0 4 False\n",
"... ... ... ... ... ... ... ...\n",
"12952 [111.0, 27.0] 15.981653 17.102729 ... 0 3 False\n",
"12953 [114.0, 33.0] 9.619084 34.824489 ... 0 1 True\n",
"12954 [107.0, 32.0] 15.646638 25.606661 ... 0 1 False\n",
"12955 [97.0, 22.0] 29.376742 12.398277 ... 0 1 False\n",
"12956 [109.0, 52.0] 16.508979 19.464104 ... 0 1 False\n",
2020-04-25 16:52:24 +02:00
"\n",
2020-05-13 14:17:39 +02:00
"[12957 rows x 11 columns]"
2020-04-25 16:52:24 +02:00
]
},
"metadata": {
"tags": []
},
2020-05-13 14:17:39 +02:00
"execution_count": 207
2020-04-25 16:52:24 +02:00
}
]
},
2020-04-25 17:11:18 +02:00
{
"cell_type": "markdown",
"metadata": {
"id": "54NSbs9R1HQD",
"colab_type": "text"
},
"source": [
"#xG Model"
]
},
2020-04-25 16:52:24 +02:00
{
"cell_type": "code",
"metadata": {
"id": "KCl0-Opqhxx-",
"colab_type": "code",
"colab": {}
},
"source": [
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LogisticRegression\n",
"import xgboost as xgb\n",
"from sklearn import svm\n",
"from sklearn import linear_model"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Q5_VKZrBhyRi",
"colab_type": "code",
"colab": {}
},
"source": [
2020-05-13 14:17:39 +02:00
"xgModel = main_df[['location','Distance','Angle','UnderPressure','ShotType','ShotBodyPart','ShotTechnique','ShotFirstTime','ShotOneonOne','InFrontofGoal','isGoal']]"
2020-04-25 16:52:24 +02:00
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "WOLF5IqBh1Nx",
"colab_type": "code",
"colab": {}
},
"source": [
2020-05-13 14:17:39 +02:00
"X_train,X_test,y_train,y_test = train_test_split(xgModel[['location','Distance','Angle','UnderPressure','ShotType','ShotBodyPart','ShotTechnique','ShotFirstTime','InFrontofGoal','ShotOneonOne']],xgModel[['isGoal']],test_size = 0.2,shuffle = True)"
2020-04-25 16:52:24 +02:00
],
"execution_count": 0,
"outputs": []
},
2020-04-25 17:11:18 +02:00
{
"cell_type": "markdown",
"metadata": {
"id": "mJrKwMAy1Pdl",
"colab_type": "text"
},
"source": [
"**Logistic Regression** Model"
]
},
2020-04-25 16:52:24 +02:00
{
"cell_type": "code",
"metadata": {
"id": "fwxMcSWQiCbw",
"colab_type": "code",
2020-05-13 14:17:39 +02:00
"colab": {
"base_uri": "https://localhost:8080/",
"height": 71
},
"outputId": "e1df75a6-e814-4037-a0e3-6775e7761e8b"
2020-04-25 16:52:24 +02:00
},
"source": [
2020-05-13 14:17:39 +02:00
"clf = LogisticRegression(random_state=0,max_iter = 5000).fit(X_train[['Distance','Angle','UnderPressure','ShotType','ShotBodyPart','ShotTechnique','ShotFirstTime','ShotOneonOne','InFrontofGoal']], y_train)"
2020-04-25 16:52:24 +02:00
],
2020-05-13 14:17:39 +02:00
"execution_count": 214,
"outputs": [
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/sklearn/utils/validation.py:760: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n"
],
"name": "stderr"
}
]
2020-04-25 16:52:24 +02:00
},
{
"cell_type": "code",
"metadata": {
"id": "v2sAXpKaiS7d",
"colab_type": "code",
2020-05-13 14:17:39 +02:00
"outputId": "f16daadf-0f9a-4b34-a5ca-f6e8b4eb7f94",
2020-04-25 16:52:24 +02:00
"colab": {
"base_uri": "https://localhost:8080/",
"height": 51
2020-05-13 14:17:39 +02:00
}
2020-04-25 16:52:24 +02:00
},
"source": [
"#model weights\n",
"clf.coef_[0]"
],
2020-05-13 14:17:39 +02:00
"execution_count": 215,
2020-04-25 16:52:24 +02:00
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
2020-05-13 14:17:39 +02:00
"array([-0.05881247, 0.03193658, -0.45802499, 0.59881635, 0.21925344,\n",
" -0.00195693, 0.21331492, 0.50199851, -0.29032136])"
2020-04-25 16:52:24 +02:00
]
},
"metadata": {
"tags": []
},
2020-05-13 14:17:39 +02:00
"execution_count": 215
2020-04-25 16:52:24 +02:00
}
]
},
2020-04-25 17:11:18 +02:00
{
"cell_type": "markdown",
"metadata": {
"id": "DBQ7Hrsm1WNv",
"colab_type": "text"
},
"source": [
"**SGD** Model"
]
},
2020-04-25 16:52:24 +02:00
{
"cell_type": "code",
"metadata": {
"id": "02YCl85sicPO",
"colab_type": "code",
"colab": {}
},
"source": [
"xG = clf.predict_proba(X_test[['Distance','Angle','UnderPressure','ShotType','ShotBodyPart','ShotTechnique','ShotFirstTime','ShotOneonOne']])[:,1]"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "WmD0Au5iv0Lh",
"colab_type": "code",
2020-05-13 14:17:39 +02:00
"outputId": "d451e487-f5d5-4384-d0e7-c793da528d98",
2020-04-25 16:52:24 +02:00
"colab": {
"base_uri": "https://localhost:8080/",
2020-05-13 14:17:39 +02:00
"height": 173
}
2020-04-25 16:52:24 +02:00
},
"source": [
"#SGD\n",
"sgdclf = linear_model.SGDClassifier(loss='log', alpha = 0.17)\n",
2020-05-13 14:17:39 +02:00
"sgdclf.fit(X_train[['Distance','Angle','UnderPressure','ShotType','ShotBodyPart','ShotTechnique','ShotFirstTime','ShotOneonOne','InFrontofGoal']], y_train)"
2020-04-25 16:52:24 +02:00
],
2020-05-13 14:17:39 +02:00
"execution_count": 239,
2020-04-25 16:52:24 +02:00
"outputs": [
2020-05-13 14:17:39 +02:00
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/sklearn/utils/validation.py:760: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n"
],
"name": "stderr"
},
2020-04-25 16:52:24 +02:00
{
"output_type": "execute_result",
"data": {
"text/plain": [
"SGDClassifier(alpha=0.17, average=False, class_weight=None,\n",
" early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,\n",
" l1_ratio=0.15, learning_rate='optimal', loss='log', max_iter=1000,\n",
" n_iter_no_change=5, n_jobs=None, penalty='l2', power_t=0.5,\n",
" random_state=None, shuffle=True, tol=0.001,\n",
" validation_fraction=0.1, verbose=0, warm_start=False)"
]
},
"metadata": {
"tags": []
},
2020-05-13 14:17:39 +02:00
"execution_count": 239
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "xgnm32tEWJsS",
"colab_type": "text"
},
"source": [
"Xg Boost Model"
]
},
{
"cell_type": "code",
"metadata": {
"id": "gXzsHLvQWLvm",
"colab_type": "code",
"outputId": "6c27f43d-ca87-4596-e349-2e934d3f7e33",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 224
}
},
"source": [
"xgb_model = xgb.XGBClassifier(objective='binary:logistic', max_depth=4, n_estimators=100)\n",
"xgb_model.fit(X_train[['Distance','Angle','UnderPressure','ShotType','ShotBodyPart','ShotTechnique','ShotFirstTime','ShotOneonOne','InFrontofGoal']], y_train)"
],
"execution_count": 245,
"outputs": [
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/sklearn/preprocessing/_label.py:235: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n",
"/usr/local/lib/python3.6/dist-packages/sklearn/preprocessing/_label.py:268: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
" y = column_or_1d(y, warn=True)\n"
],
"name": "stderr"
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n",
" colsample_bynode=1, colsample_bytree=1, gamma=0,\n",
" learning_rate=0.1, max_delta_step=0, max_depth=4,\n",
" min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,\n",
" nthread=None, objective='binary:logistic', random_state=0,\n",
" reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,\n",
" silent=None, subsample=1, verbosity=1)"
]
},
"metadata": {
"tags": []
},
"execution_count": 245
2020-04-25 16:52:24 +02:00
}
]
},
2020-04-25 17:11:18 +02:00
{
"cell_type": "markdown",
"metadata": {
"id": "LVID9Zsn1atX",
"colab_type": "text"
},
"source": [
2020-04-25 17:13:12 +02:00
"**Predict** Shot Probability"
2020-04-25 17:11:18 +02:00
]
},
2020-04-25 16:52:24 +02:00
{
"cell_type": "code",
"metadata": {
"id": "P8yo8gQev-q8",
"colab_type": "code",
"colab": {}
},
"source": [
2020-04-25 17:11:18 +02:00
"# change model here sgcclf(SGD) or clf(LR)\n",
2020-05-13 14:17:39 +02:00
"xG = xgb_model.predict_proba(X_test[['Distance','Angle','UnderPressure','ShotType','ShotBodyPart','ShotTechnique','ShotFirstTime','ShotOneonOne','InFrontofGoal']])[:,1]"
2020-04-25 16:52:24 +02:00
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "xL2vLcVfihcb",
"colab_type": "code",
2020-05-13 14:17:39 +02:00
"colab": {}
2020-04-25 16:52:24 +02:00
},
"source": [
"X_test['xG'] = xG\n",
2020-04-25 17:11:18 +02:00
"#X_test.head()"
2020-04-25 16:52:24 +02:00
],
2020-05-13 14:17:39 +02:00
"execution_count": 0,
"outputs": []
2020-04-25 16:52:24 +02:00
},
{
"cell_type": "code",
"metadata": {
"id": "q2BO-NLSijd5",
"colab_type": "code",
2020-05-13 14:17:39 +02:00
"outputId": "94151586-c063-449a-e5f6-46d7ea262272",
2020-04-25 16:52:24 +02:00
"colab": {
"base_uri": "https://localhost:8080/",
2020-05-13 14:17:39 +02:00
"height": 609
}
2020-04-25 16:52:24 +02:00
},
"source": [
"sortxg = X_test.sort_values(by = ['xG'],ascending=False)\n",
"sortxg"
],
2020-05-13 14:17:39 +02:00
"execution_count": 248,
2020-04-25 16:52:24 +02:00
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>location</th>\n",
" <th>Distance</th>\n",
" <th>Angle</th>\n",
" <th>UnderPressure</th>\n",
" <th>ShotType</th>\n",
" <th>ShotBodyPart</th>\n",
" <th>ShotTechnique</th>\n",
" <th>ShotFirstTime</th>\n",
2020-05-13 14:17:39 +02:00
" <th>InFrontofGoal</th>\n",
2020-04-25 16:52:24 +02:00
" <th>ShotOneonOne</th>\n",
" <th>xG</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2020-05-13 14:17:39 +02:00
" <th>646</th>\n",
" <td>[118.8, 37.0]</td>\n",
" <td>4.332081</td>\n",
" <td>120.077993</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>0</td>\n",
" <td>0.979459</td>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
" <tr>\n",
2020-05-13 14:17:39 +02:00
" <th>10495</th>\n",
" <td>[119.3, 41.4]</td>\n",
" <td>4.068882</td>\n",
" <td>157.545469</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>0</td>\n",
" <td>0.979459</td>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
" <tr>\n",
2020-05-13 14:17:39 +02:00
" <th>2681</th>\n",
" <td>[119.1, 39.6]</td>\n",
" <td>4.100949</td>\n",
" <td>154.403626</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>0</td>\n",
" <td>0.979459</td>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
" <tr>\n",
2020-05-13 14:17:39 +02:00
" <th>5237</th>\n",
" <td>[118.7, 39.4]</td>\n",
" <td>4.210111</td>\n",
" <td>143.294745</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
" <td>3</td>\n",
2020-05-13 14:17:39 +02:00
" <td>4</td>\n",
" <td>3</td>\n",
2020-04-25 16:52:24 +02:00
" <td>1</td>\n",
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>0</td>\n",
" <td>0.969388</td>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
" <tr>\n",
2020-05-13 14:17:39 +02:00
" <th>7452</th>\n",
" <td>[117.7, 37.8]</td>\n",
" <td>4.766741</td>\n",
" <td>107.693813</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
" <td>3</td>\n",
2020-05-13 14:17:39 +02:00
" <td>2</td>\n",
2020-04-25 16:52:24 +02:00
" <td>5</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>0</td>\n",
" <td>0.967390</td>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
2020-05-13 14:17:39 +02:00
" <td>...</td>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
" <tr>\n",
2020-05-13 14:17:39 +02:00
" <th>505</th>\n",
" <td>[58.1, 28.1]</td>\n",
" <td>63.155775</td>\n",
" <td>7.132821</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
2020-05-13 14:17:39 +02:00
" <td>5</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>1</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>0.004881</td>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
" <tr>\n",
2020-05-13 14:17:39 +02:00
" <th>11160</th>\n",
" <td>[61.8, 39.8]</td>\n",
" <td>58.337636</td>\n",
" <td>7.863251</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>3</td>\n",
2020-04-25 16:52:24 +02:00
" <td>4</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>1</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>0.004881</td>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
" <tr>\n",
2020-05-13 14:17:39 +02:00
" <th>3568</th>\n",
" <td>[63.0, 30.5]</td>\n",
" <td>57.920804</td>\n",
" <td>7.813054</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
" <td>3</td>\n",
2020-05-13 14:17:39 +02:00
" <td>4</td>\n",
2020-04-25 16:52:24 +02:00
" <td>5</td>\n",
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>1</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>0.004881</td>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
" <tr>\n",
2020-05-13 14:17:39 +02:00
" <th>5084</th>\n",
" <td>[87.5, 67.5]</td>\n",
" <td>42.683234</td>\n",
" <td>8.235003</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
2020-04-25 16:52:24 +02:00
" <td>3</td>\n",
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>1</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>0.004615</td>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
" <tr>\n",
2020-05-13 14:17:39 +02:00
" <th>6085</th>\n",
" <td>[58.9, 43.2]</td>\n",
" <td>61.313999</td>\n",
" <td>7.470866</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
2020-05-13 14:17:39 +02:00
" <td>5</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>2</td>\n",
2020-04-25 16:52:24 +02:00
" <td>0</td>\n",
2020-05-13 14:17:39 +02:00
" <td>0.004594</td>\n",
2020-04-25 16:52:24 +02:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2020-05-13 14:17:39 +02:00
"<p>2592 rows × 11 columns</p>\n",
2020-04-25 16:52:24 +02:00
"</div>"
],
"text/plain": [
" location Distance ... ShotOneonOne xG\n",
2020-05-13 14:17:39 +02:00
"646 [118.8, 37.0] 4.332081 ... 0 0.979459\n",
"10495 [119.3, 41.4] 4.068882 ... 0 0.979459\n",
"2681 [119.1, 39.6] 4.100949 ... 0 0.979459\n",
"5237 [118.7, 39.4] 4.210111 ... 0 0.969388\n",
"7452 [117.7, 37.8] 4.766741 ... 0 0.967390\n",
2020-04-25 16:52:24 +02:00
"... ... ... ... ... ...\n",
2020-05-13 14:17:39 +02:00
"505 [58.1, 28.1] 63.155775 ... 0 0.004881\n",
"11160 [61.8, 39.8] 58.337636 ... 0 0.004881\n",
"3568 [63.0, 30.5] 57.920804 ... 0 0.004881\n",
"5084 [87.5, 67.5] 42.683234 ... 0 0.004615\n",
"6085 [58.9, 43.2] 61.313999 ... 0 0.004594\n",
2020-04-25 16:52:24 +02:00
"\n",
2020-05-13 14:17:39 +02:00
"[2592 rows x 11 columns]"
2020-04-25 16:52:24 +02:00
]
},
"metadata": {
"tags": []
},
2020-05-13 14:17:39 +02:00
"execution_count": 248
2020-04-25 16:52:24 +02:00
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "lrUnxsmxpvPM",
"colab_type": "code",
2020-05-13 14:17:39 +02:00
"outputId": "eb22ddf4-3d3e-48de-ecdc-e3a196669746",
2020-04-25 16:52:24 +02:00
"colab": {
"base_uri": "https://localhost:8080/",
2020-05-13 14:17:39 +02:00
"height": 401
}
2020-04-25 16:52:24 +02:00
},
"source": [
2020-05-13 14:17:39 +02:00
"import statsbombpitch as sb\n",
2020-04-25 16:52:24 +02:00
"sb.sb_pitch(\"#195905\",\"#faf0e6\",\"horizontal\",\"full\")\n",
"plt.gca().invert_yaxis()\n",
"for i in range(len(sortxg)):\n",
" xe = sortxg.iloc[i]['location'][0]\n",
" ye = sortxg.iloc[i]['location'][1]\n",
" \n",
" if sortxg.iloc[i]['xG'] >= 0.75:\n",
" g = plt.scatter(xe,ye,color=\"#ee3e32\",edgecolors=\"none\",zorder=10,alpha=1,s = 40 )\n",
" elif sortxg.iloc[i]['xG'] < 0.75 and sortxg.iloc[i]['xG'] >=0.5:\n",
" o = plt.scatter(xe,ye,color=\"#f68838\",edgecolors=\"none\",zorder=8,alpha=0.75,s = 30 )\n",
" elif sortxg.iloc[i]['xG'] < 0.5 and sortxg.iloc[i]['xG'] >=0.25:\n",
" a = plt.scatter(xe,ye,color=\"#fbb021\",edgecolors=\"none\",zorder=6,alpha=0.5,s = 20 ) \n",
" else:\n",
" b = plt.scatter(xe,ye,color=\"#1b8a5a\",edgecolors=\"none\",zorder=4,alpha=0.25,s = 10 ) \n",
2020-04-25 17:11:18 +02:00
"plt.axis('off')\n",
2020-04-25 16:52:24 +02:00
"plt.legend((g,o,a,b),('>=0.75','>=0.5','>=0.25','<0.25'),scatterpoints=1,loc=2,title = 'xG Value',fontsize='small', fancybox=True)\n",
"#plt.title('xG SGD model')\n",
2020-05-13 14:17:39 +02:00
"plt.savefig('xgXGBmodelFreezeFrame.png')\n",
2020-04-25 16:52:24 +02:00
"plt.show()"
],
2020-05-13 14:17:39 +02:00
"execution_count": 249,
2020-04-25 16:52:24 +02:00
"outputs": [
{
"output_type": "display_data",
"data": {
2020-05-13 14:17:39 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlIAAAGACAYAAABmwYzKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOydeXxlZX3/P8/Z75J1JrMxLALKMCgqioobIKhgXapSQKSuqLXqq/hr61JtrXVp3VC0rpRCW2FAqBRBLQpCXdERFBSVTdZZM5kkdz/r8/vj+zz3OTe5ySSZZJKQ7/v1yivJveee85xz78z55Lt8vkJKCYZhGIZhGGb2WIu9AIZhGIZhmOUKCymGYRiGYZg5wkKKYRiGYRhmjrCQYhiGYRiGmSMspBiGYRiGYeYICymGYRiGYZg54kz35IZzvJ0A1h6gtTAMwzAMwyxJtl8eiW6P7ysixSKKYRiGYRhmCji1xzAMwzAMM0dYSDEMwzAMw8yRaWukJnL/F/6I7SNVVBqNhVrPkiSVIRrJbgDZYi+FYVYUhw0dBgB4cPjBRV0HwzAri2u3XostP94yo21nJaS2j1TR2z+AoQ1rIUTXmqvHHFJKjI9XsGs30Eh2LvZyGGZFUfSLi70EhmFWGJs3bgaAhRFSlUYDQxvW4oHdD8x+ZcsYKYGR6jje+JWzF3spDLOiuOLdVwAAzv4s/9tjGGaBcYuA4+KKt180q5fNSkgBWDGRqDxCANYKPG+GYRiGWRFYNuCX6GdhA3LmpTwHtNj8oQcexrvOOx8vPfkVeM3Lz8V557wNt/3i9o5tms0WTnzaKahVax2Pn/+2v8YN139vyn2f8KTnLciaGYZhGIZ5jCMzSj/RL7N66QETUmEY4l3nnY9Xn/1KXH/ztdjyra/jfR/6Wzz68LaO7QqFACc871n4wfduaT9Wrdbw69t+jeef8vwDtVyGYRiGYVYKUgKNUaBVAbJkVi+ddyH12zvvwp+95GyEYYhmo4lXnXYm7rv7Pnzn2u/i2Kc+CSedemJ72yOPOhKvOONlk/Zx+ste3BF9+sENN+OE550AmWV467lvx9kvfy3OOP0s3Pz9Wya9duutv8S7zju//fs//+MncO3V1wEAfveb3+PNr3krXvPyc/H2N7wTw7v3zOOZMwzDMAyzbJEpkISzftmsa6T2xROPPQYnnvJ8fPEzX0YrDPEnrzgdRx51JP7n6m/h6GM2zWgfz37eCfjw+z+KsdEx9A/044brv4ezX3cmPN/DBV/+FMo9ZYzuHcPrXv0GnHTqiTOq24rjBP/y4U/hc1/9DAZXDeCG67+Hf/3MF/HhT3xof0+ZYRiGYZgVyrwLKQB427vegte+8nXwfA/v/Ye/6brNu//ib/Dwgw/j0Mcdigu+/KmO51zPxYmnPB83fvcmnHLaKfjD7+6miJQEvvCZL+L2X/wKwrKwe9cwRvaMYPXQ6n2u6aE/Poj7770ff/H6dwAAsjTF6jX7fh3DMAzDMMxULIiQGhsbR6PRQJIkiMIIhWIBRzz+cNz2i1+1t/nsVz6Nu+78HS74l8913cfpL3sxvvav/wYJiZNOPRGu6+Daq6/D6MgYLr/263BdB6c//2UIw6jzhBwHWWaq7SP1vARwxOMPx39efcn8nzDDMAzDMCuSBSk2/+gHP4a/fPfbcfrLT8PnPvl5AMDpLz8Nd9x2B2658f/a27VarSn38fRnPQ0PP/QIrvyvq3D6y14MAKhVaxhcNQDXdbD1Z7/Ejm07Jr1u/YZ1+ON9DyAKI1QqVfz8p1sBAIc97lCMjozijtvvBECpvvvuuX/ezplhGIZhmJXHvEekrvvm9XAcBy95+WlI0xSv/7M34Rc/3YpnPPt4fP7fPodPf+wCfOqjF2DV6kEUS0W85R1v7rofy7Jw6mmn4Pvf+T6e9szjAAAvecXp+Ku3vhtnnH4WNj9pMx53xGGTXrduwzq86CWn4tWnn4WDDt6ATZuPAkDpwk998RP45D99GrVqDUma4rVveA2OfMIR830JGIZhGIZZIQgpp/ZL2HCO1/Hkt9/zMxz5+MPxx11/XPCFLTV2bxvG67/88sVeBsOsKNjZnGGYA81U/+9svzzq2tl2QA05GYZhGIZhHkuwkGIYhmEYhpkjLKQYhmEYhmHmCAsphmEYhmGYOcJCimEYhmEYZo4siCHnYvO5T3wed9x+JzZsXI9//JcPwXXNaf7ghptx2X9sAQBse3gb/vzN5+K1b3wNXvaCV2LNuiEAwHl/+Sac8NxnLcraGYZhGIZZPiyIkLL37EHp51uBLEPj+KcjWbd2TvuJ4wSQEq7nzvg1d//+HuzeNYxLrvw3XPTFi3Hjd2/E6S8/rf38C158Ml7w4pMBAOed8zac/KKTAAA9PWVcfPnX5rROhmEYhmFWJvOe2iv86tfY8P5/QP/V16D/m9di/Qc+hOKtv5jTvmrVGt5y7l/g0x+7YMYu5HfcfidOeO4zAQDPef6z8evb7+i63Z7hPYiiGBsOWg8AaDQaePNr3or3nf8BjI+Nz2m9DMMwDMOsLOY3IpVlGLjsSogkaT8ksgwDW65E4+nHAc7sDjcw2I9Lv3Exfn3bHbj80iuwY/sOnHraKTj5hSfhb9/13knbf+LCj6MyXmkPMS73lDE+Vum675tuuBmnnvaC9u+XfuNi9A/047pvXo8vf+6reN8/vmdWa2UYhmEYZuUxr0LKHh2FMzIy+fFKFe6u3YgP2jCn/T7laU9GEPi48utX4+rL/xunnvaCKdNwPb09qNdqACii1dff23W7G797Ez78yQ+1f+8f6AcAnHr6qbjmG9fOaZ0MwzAMw6ws5lVIZT09yAoBrGbnMOLMdZEooTIbojDCNy6/Grd8//9wxBMOx1nnnoFNx2zC3pFRvPmct07a/hMXfhxPPu5YfP3iy/CyV70UP/3Rz/CU4548abuRPSOIoqid1oujGFJKeL6HX239FQ4+dOOs18owDMMwzMpjXoWU9DxUTnsR+q/5VsfjtVNPhiwWZ72/ZrOJwVWD+OIln4fv++3HB1cNTBmRWj20GoOrV+GNZ52HdRvW4fXn/TkA4CMf+Bj+/mMfADA5rVepVPDON/0VCsUCXM/Fh//lH2a9VoZhGIZhVh7z3rVXeflLkQwNofSTn0HIDPVnPgP15z1nTvvq6+/DS3IddzPl/73/ryY9pkUUAJz52jM6nlu1ehW2fOvrs18gwzDMSsN2gSwFZLbYK2GYJcGC2B80TngmGic8cyF2zTAMwywWfg/gBoCUQGMviymGATubMwzDMDPFUn97CwFY9uKuhWGWCCykGIZhmJkR1YA0BuImfWcY5rE5IoZhGIZZANIYaI4t9ioYZknBESmGYRiGYZg58piMSE03tHjrrb/EB//6Q9h46EGwLRtf+/qXF3GlDMMwDMMsZ5Z0RCqOE8TR7PLw+aHFhx1+GG787o2Ttnnxn7wQF1/+NRZRDMMwDMPsFwsipMqP3I51t16C9T/7d/Q8tJVaZefAQg0tvvGGH+CNZ52Hyy7ZMqd1MQzDMAzDAAuQ2ivsuhv99/+o/XvfAz9F5gaob3jSrPe1EEOLj3nSZlz7/f8GAJz/tr/GU5/+FGx+0tGzXhvDMAzDMMy8C6ni8L2THisM3zsnIaWZz6HFxZIZVfP8U56He/5wDwsphmEYhmHmxLwLqdQrTXos82Y/Zw9YmKHFtWoN5Z4yAOBXv/w1/uycV89pbQzDMAzDMPMupGobn4Li7rthJSEAILM9VA9++pz2tRBDi7/3nRvx31d8E7bt4ClPezKe9ozj5rQ2hmEYhmGYeRdSSXEAO48/F8Xdd0NIicaao5AGPXPa10IMLX7VWX+KV531p3NaD8MwDMMwTJ4F8ZHK/DJqBz9tIXbNMAzDMAyzZFjSPlIMwzAMwzBLGRZSDMMwDMMwc4SFFMMwDMMwzBxhIcUwDMMwDDNHVtzQ4v+76Ye46IsXw3EcHP3Eo/Hef/gbAMCzj30+jn7iJgDA+z70Hjz+qCMXZe0MwzAMwywflnREaiGGFj/h6Cfg0m9cjEu/c
2020-04-25 16:52:24 +02:00
"text/plain": [
"<Figure size 748.8x489.6 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "DzQLtbmByqBa",
"colab_type": "code",
2020-05-13 14:17:39 +02:00
"outputId": "acf7c28d-56f8-4aa6-9862-04a340160cd3",
2020-04-25 16:52:24 +02:00
"colab": {
"base_uri": "https://localhost:8080/",
2020-05-13 14:17:39 +02:00
"height": 401
}
2020-04-25 16:52:24 +02:00
},
"source": [
"import StatsbombPitch as sb\n",
"sb.sb_pitch(\"#195905\",\"#faf0e6\",\"vertical\",\"half\")\n",
"#plt.gca().invert_xaxis()\n",
"for i in range(len(sortxg)):\n",
" xe = sortxg.iloc[i]['location'][0]\n",
" ye = sortxg.iloc[i]['location'][1]\n",
" \n",
" if sortxg.iloc[i]['xG'] >= 0.75:\n",
" g = plt.scatter(ye,xe,color=\"#ee3e32\",edgecolors=\"none\",zorder=10,alpha=1,s = 40 )\n",
" elif sortxg.iloc[i]['xG'] < 0.75 and sortxg.iloc[i]['xG'] >=0.5:\n",
" o = plt.scatter(ye,xe,color=\"#f68838\",edgecolors=\"none\",zorder=8,alpha=0.75,s = 30 )\n",
" elif sortxg.iloc[i]['xG'] < 0.5 and sortxg.iloc[i]['xG'] >=0.25:\n",
" a = plt.scatter(ye,xe,color=\"#fbb021\",edgecolors=\"none\",zorder=6,alpha=0.5,s = 20 ) \n",
" else:\n",
" b = plt.scatter(ye,xe,color=\"#1b8a5a\",edgecolors=\"none\",zorder=4,alpha=0.25,s = 10 ) \n",
2020-04-25 17:11:18 +02:00
"plt.axis('off')\n",
2020-04-25 16:52:24 +02:00
"plt.legend((g,o,a,b),('>=0.75','>=0.5','>=0.25','<0.25'),scatterpoints=1,loc=3,title = 'xG Value',fontsize='small', fancybox=True,edgecolor = 'black',framealpha = 2\n",
" )\n",
"\n",
"\n",
"#ax = plt.subplot()\n",
"\n",
"#plt.savefig('MessiValverdeEraScatter.png')\n",
"plt.show()"
],
2020-05-13 14:17:39 +02:00
"execution_count": 0,
2020-04-25 16:52:24 +02:00
"outputs": [
{
"output_type": "display_data",
"data": {
2020-05-13 14:17:39 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlIAAAGACAYAAABmwYzKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOy9d5Rk2V3n+bn3PhM+0ldmZZbprrbVRralRg4kQUsIpJaEk8QuA4cdZs/ODCBgZoA9LKDh7O4wy8HszGjRMHip1SCMDsIISS1kaSPbkrpV1d3lMysrfYaP9969d/+4LyJ9VVZ1dRn1+5zTp7PCPBPxIu43fub7E9ZaMjIyMjIyMjIyLh55tQ8gIyMjIyMjI+N6JRNSGRkZGRkZGRmXSCakMjIyMjIyMjIukUxIZWRkZGRkZGRcIpmQysjIyMjIyMi4RDIhlZGRkZGRkZFxiXjnu3Pvu4JZYM8VOpaMjIyMjIyMjGuSmQ9EYrvbLxSRykRURkZGRkZGRsYOZKm9jIyMjIyMjIxL5LypvYyMjIz1vPNV7+T+e+6/2ofxnPPhxz7MA5994GofRkZGxnVAFpHKyMjYNfffcz+Hpw5f7cN4Tjk8dfh5IRYzMjIuD+J8s/b2vivY9s73/9T7uWv/Xc/ZQWVc2+T8HACduHOVj+Rbh+vlNS2GRQCa3eZVPpLnjuvlHK+Xa+Z6IntNn9987dTX+OHf/uEd79+p2PySUnuH9hyiGBav+S+ajOcGJdXVPoTrn/U/UcT185pqo6/2ITznXC/neL1cM9cT2Wv6/KUYFjm059AlPfeShNSJ+ROcmD/BO37zHZe004zrmw+++4MA2fv/LIiXLCZ2asofEPzZzz8IZK9pxu7JPoeXn+w1ff7Se+8vhazYPCPjKqBKYOsgPIEIrsw+TWRJ6hbhCbwKCLFtlPqSeGGhyJsHBslLyWfqNf5+deWCz/GtILCCpjSX7Th2S8VIGsJgLt9LkJGR8TwlE1IZGVcBGQiC4Su7iusG2ARsYrE5gQgvz3ZvDEN+bnxvX5gdCHMIBH+3urzt430r+PerE7ytNUjeSh4NGvzy4DSnvGjX+wyF4LuqAxwIQo502nyytspuEnKv6pT4D6sT3JjkmJcx7y3P8WBpadf7vVpYa8GCkJnyy8i41siEVEbGNYzVFtMB4Tvx9WwQPhCDECAuYynIK0sVRs4qhmcVKoGVEcN3jFd2FFI/WdvDu5rD/X+/LCrx3oWDfO/YUe5M8hSs5EtBi1js3AjzHyamuDWfB+AV5Qo35/K8d272vMe5Lwn4fxcPEKTNyqPG5/9YneSsF/PpXP1iT/uyYCKLjUHmdxZJVlviZYs14JVB5Z/ddXC+ffbvy4FQmWjLyNgNmZDKyLiGSVbhQEFx38E8QUnw8EKXx5fiS9qWVxZugZTPfpEsSck7hke5PZ/n1pmQfSf9/n17zkheXM9Dafvnvr05uOW2gzrk7+ZuYb92YbIFEfMzw6f4Ytja8tgbw7Avonq8slTmA4vzrOqd41Lf2xroi6jNx3M1hJSJLfGKizTJWOAP7PC4CGx6WqYDKr/943a1z2TdPiOBv+6tsHrdfV2BP3Tp+8nIeD6R+UhlZFzDeMLylpsLlANJQQleN5FjOLz4j621rj7KtIHLEGj4yfG9vLZSZdwPmJrxt9w/sepzKN6YOwyt4C2tAQp2++PviSiAEevzuwsHyW1TxBQI93wVw8iMZOyUIt+SeBeo+Qrt9vcHO9z+nGNZ6948T5mYDNIIonBRpMu2z80Bv3X3WbNzNDAjI2MjmZDKyLiGKQ1KwlAgQ5eGEQhK3sUv/KYNumXRbYvexrXEGotuWnTnwgvooFLckS+kTwQ/3v54xvSawBrRHn917mb+r+V920aF7JZVHfIofmVlcsvtRzptmksJtz8WMPW0z94THrd9MeCNK9XzHvdH86u7uj0vJa8tV3lDdYBB9dy1w8tA4JUFKu+K/3dCKEEwIglGBSr37ESf9Hfep2tCSO+rZmm9jIzdkqX2MjKuAiZOUygXqHtqAtOJZl/oPqorkeFs+xJ8juQOf6foBui2EzNCgAwFoYI3TRXYW1DMtjV/e7pNR1sia0msdREgAfWqoby6caMNoflqsJaW+1f1MQ7onavbxQ5hsld3yltus0DpKYGXbHzOz66O85H8Cqtq+9fnyaDDr1Wn+enaOCWriDF8sLjEhwtrHYYDSvGeqf0Me04E/uDQCP9x+jQnou6Ox/5sUIULCxZrXN2S8NkxmmitxUYgvAunbc+3T5UX8GyjXhkZzzMyIZXxvMFaC+bqFtFaY9FdQ7ICCHcsMhAutbNDfPivT7Y4POjjCcGTKzHRJbgFqJxACLDGFRJvOS679e9XjOU4WHJfEfuLHq/eE/KxmQ5NY/hEbYU3VF2BzfRNCQce98jH7gRiDO8ZmKa1ztbgxVHh4g+aHQSWhbu6W1f7EMndcYHPqJ3rnR4oOeF0UxJyRkUsbRJd31Ud6IsogJyUvG1omN+cnUEAA4GkrS0dfWVSX9Za4iWL1U4k+UNsew0nq2C6FiHdY7JC8YyMK0cmpDKeF/QXpMQV63qVK7/QmK7rvooW08iPL5C+JY40umURvsBau8XfKbFccoH5emS48zl7ZdBSICT99NHmFGLRX/v3Hy/Mc6Td5vZ8gTNRl0dH67yyXaZgJZ/K1VlQyYbnTquI2+KLD3X8Q36rH9Wru2XkDqGZaXVhC4WWNDwetLfcPqAU31YqM6g8ajrp2ykMKQ9PwPcdLLC34JFYyz+cafNULdmyje2w1mJagLyEjju7VmhuY4gWDWiBKm56WOKuKWvcf5ezK/PZYhOL7rhar2fbeZqRcS2SCamM5wVWOw8lcILmslRcXySm6zqj+rsWIEPoTKfHZy2ma1C5K78KCinwNmXRnlyJOVTxEAgslidWNoq5R5oNHmk2yAnBrcU8jwdtpuPthczvlxZ4TaeMvy7s1sGQO0+Z5j/lavxGdaulwf2t7dvbjqsOx/xLS8HtCwJ+eXI/Y57PuO8z6nsc73aJreWxZoPDgz57C+7r0hOC75jI8VStsatt64arT+txMWKq976YDqAspuOe665hh00sMgATO6Ei/WtLrMQrLqKmWxCMZF5YGd96ZEIq43mBUC4iYyKL3EVdynOBzIHouGJembP4FYnwoTtnXJpPcE21fzxdT/iz4y0m8q5Garq1tfbotlyen5uYJC/dgX+qtsr75s9tedxXwhY/MnqMH6uPslf7PBo2OSsj/vfaxmLyORHzU8MnWVAJM972UTi1Q5fdp3O7Ezbb8ZaBIfJSUjeac3HMsO9Rkor3L87zkZUl7h7aaD9/Jd8mVRCogku5JtpZGKi86HfZRUuu3k7lXSH5NU3WDJjxLUgmpDKeFwjR8+m5eguNDATh2Nb9F29RyFyaVguuoZwMMNPSzGwjoHr82OhYX0QBfHulymcbNZ5ob02dPR60effwqQ23WQE/1hhlj/b557DBr1fPXjCq9PeFFe7rbOzQM1j+tnDhsTQ7MeCtfRUu6YQlnfDFRoOzccT9g0M83myw0NWMhAqL5XNzu498uTSce3+3q0/bLUL0vJ0EJnFpamvAdCwyFP2I67WGVxWYTs/G4RoXehkZl0AmpDIyrjJeQSIvMG+v15WF5JpK3UwFWzvx9gXhtkJqOx4oLfHARY5o+cd8jf9WPsePNUbJW8mq0PxmdZZvpHVPgRC8bXCIV5UrCASPNes8sLhAZHcOhzzWbHA4X8DvuqCJCeHuYoGXlJyr6NsHh/kvZ8/yCdukmRhWI4sA3jiV55aKRy22fOR0i/nOWoG9idLOzHBr2vTZYiPcgQpnWyB9gdrBAPVqI32B3Go1lpHxLUMmpDIyrgP6dTYC/MHLI6assSRpg5tXurRowdFOm1tyG4vIj5xHRAVCUFaKxcSFT0Y9j+8dGGLE9/lys8HHa9t7PW3mv1bm+KPSAnt1wCnVpSOdSDoYhPzK5D7uLrhq7IbRDHquzusPF+bwheC7q4Pcns9zOuryN8vL1I3ma4tNBk/AgZYThjOVm
2020-04-25 16:52:24 +02:00
"text/plain": [
"<Figure size 748.8x489.6 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
2020-05-13 14:17:39 +02:00
"id": "YGtDnKSjdfPU",
2020-04-25 16:52:24 +02:00
"colab_type": "code",
2020-05-13 14:17:39 +02:00
"outputId": "d2ad66b4-dbbb-4223-9295-bf9e704666ca",
2020-04-25 16:52:24 +02:00
"colab": {
"base_uri": "https://localhost:8080/",
2020-05-13 14:17:39 +02:00
"height": 401
}
2020-04-25 16:52:24 +02:00
},
"source": [
2020-05-13 14:17:39 +02:00
"import StatsbombPitch as sb\n",
"sb.sb_pitch(\"#195905\",\"#faf0e6\",\"vertical\",\"half\")\n",
"#plt.gca().invert_xaxis()\n",
"for i in range(len(sortxg)):\n",
" xe = sortxg.iloc[i]['location'][0]\n",
" ye = sortxg.iloc[i]['location'][1]\n",
" \n",
" if sortxg.iloc[i]['xG'] >= 0.75:\n",
" g = plt.scatter(ye,xe,color=\"#ee3e32\",edgecolors=\"none\",zorder=10,alpha=1,s = 40 )\n",
" elif sortxg.iloc[i]['xG'] < 0.75 and sortxg.iloc[i]['xG'] >=0.5:\n",
" o = plt.scatter(ye,xe,color=\"#f68838\",edgecolors=\"none\",zorder=8,alpha=0.75,s = 30 )\n",
" elif sortxg.iloc[i]['xG'] < 0.5 and sortxg.iloc[i]['xG'] >=0.25:\n",
" a = plt.scatter(ye,xe,color=\"#fbb021\",edgecolors=\"none\",zorder=6,alpha=0.5,s = 20 ) \n",
" else:\n",
" b = plt.scatter(ye,xe,color=\"black\",edgecolors=\"none\",zorder=4,alpha=0.25,s = 10 ) \n",
"plt.axis('off')\n",
"plt.legend((g,o,a,b),('>=0.75','>=0.5','>=0.25','<0.25'),scatterpoints=1,loc=3,title = 'xG Value',fontsize='small', fancybox=True,edgecolor = 'black',framealpha = 2\n",
" )\n",
"\n",
"\n",
"#ax = plt.subplot()\n",
"\n",
"#plt.savefig('MessiValverdeEraScatter.png')\n",
"plt.show()"
2020-04-25 16:52:24 +02:00
],
2020-05-13 14:17:39 +02:00
"execution_count": 0,
2020-04-25 16:52:24 +02:00
"outputs": [
{
2020-05-13 14:17:39 +02:00
"output_type": "display_data",
2020-04-25 16:52:24 +02:00
"data": {
2020-05-13 14:17:39 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlIAAAGACAYAAABmwYzKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nOy9d7Rk113n+9l7n1C5br6duxVaoWUFK1mOQsYBe9kYjAHbzADG8HjrvSHNzGOxFgwMDDOwYHi8mfeI85Z5wDgDXgLb2MY2DnKQZFuWZFmh1epWd997+/aNleuEvff7Y1fVzbeDWupudD5rael2hROqTtX+1i98f8JaS0ZGRkZGRkZGxrkjL/YBZGRkZGRkZGRcrmRCKiMjIyMjIyPjPMmEVEZGRkZGRkbGeZIJqYyMjIyMjIyM8yQTUhkZGRkZGRkZ50kmpDIyMjIyMjIyzhNvuzt3vTs4BUy+QMeSkZGRkZGRkXFJMv2BWGx2+5kiUpmIysjIyMjIyMjYgiy1l5GRkZGRkZFxnmyb2svIyMhYzbte9S7edsfbLvZhPO/c++C9fPC+D17sw8jIyLgMyCJSGRkZZ83b7ngbh/YcutiH8bxyaM+hF4VYzMjIuDCI7Wbt7Xp3sOmd7/+F93Pjvhuft4PKuLTJ+TkAukn3Ih/Jvxwul9e0GBYBaEWti3wkzx+XyzleLtfM5UT2mr64efT4o/zYf/uxLe/fqtj8vFJ7V01eRTEsXvJfNBnPD0qqi30Ilz+rf6KIy+c11UZf7EN43rlczvFyuWYuJ7LX9MVLMSxy1eRV5/Xc8xJSx+aOcWzuGO/8w3ee104zLm8+9EsfAsje/+dAsmgxiVNT/pDgI7/yYeDyfk2vi3MUreThoE266e+2jAtJ9jm88GSv6YuX/nt/PmTF5hkZFwFVAtsA4QlE8MLs08SWtGERnsCrgBAXRu2MaY//Z2E/NyYFAOZkwr8dOc63wva2z/OtILCCljTntV9poWQldWHgHE+lYiRNYTCZ4MvIyHiOZMXmGRkXARkIglGJXxXbChoBVHyBfwE+qboJNgXTtdj4uW+vz68u7xqIKIBx4/OHi/vw7ebn5VvBry7v4mszh3hg5gb+Yu4K9qXnpiZ/qDXMZ09dx9dmbuCTs9dwT6d8Vs97VbfEP8we5GszN/D5U9fxo82Rc9rvxcJaizVb17NmZGRcPDIhlZFxiRJKeOeVRd57TZmfuabM7sJzq98Qfu//AsQFKgWRFu7pVjbcPmZ8bokLmzwDfr4+ybtbo+St+/q5My7xJ/MHEAZujPO8LCpuKcIA7uoW+a3lPUwad0L7dcgfLu7jiiTc9lj3pgH/98J+rkxdQfG48fn12m5e0z07EfZ8YGKLbm0vkqy2JAuWeN6iO89dTG23z8F9OhNtGRlnSyakMjIuUW4eDdiRd4onEHD3+PZC4Ux4ZYE/IvBHBcJ7bjmtkpT89Pgkv7/vAEJsvui2xeYpu7e3hjfcdkCHfPL0NXxo7mreN38ln525ltuizYXY29sbo0g+ku9vD217zG9pDxFs8pW32fG8EJjEkixb0qYlrW/zuBisBiyY59hMZtJV+6ytvc/qre/LyMjYmkxIZWRconi9lJ/VFt0B0QXdPr9IgbWuPsp0OOd6os34+R27uKdSZUcQbJma7K4TUqEVfH97iKLdPBy2T68IxTHr82fzB8htUsQUbBGtCs5wYuFWz9sm+vW8Ylnp3tymTEwGvQiiAJm/gPtcfymtui9LI2ZknD2ZkMrIuER5dCmmkRisAWMtX5uKsOn5bct0nAjTHYvexLXEGpfS0d0zL6DDSnFDvhctsqC2qNie0P7g7zHt8bHZg/zO0l78TQSP3bCqQx7Ff1zeveH2T+e3CJec4dC3et762/NSck+5yhurQwyr568dXgYCryxQeVf8vxVCCYIxSTAuULnnJvqkv/U+XRNC775qVoWfkXG2ZF17GRkXAZNYsG4x3YpGYvnrI00mQ8nigmWpY7ZdcLdFbvF3D91kUH8jBMhQECp4854CuwqKUx3NJ0506GpLbC2ptS5iJqBRNZRrazfaFJqHg5WuvZ9tTLBfb52aFFtEk169Sf3SPxZq/Ptawg7jr7n9Xa1R/rw8R01t7gP1eNDlt6tT/GJ9ByWrSDB8qLjIvYXlwWOGlOK39uxj1HPb/pGRMf7T1AmOxdGWx/5cUIUzCxZrLDbp1bht8XBrXQOB8JzwOt99qryA5xr1ysh4kZFFpDJeNFh78YtorbGkHU08b4gXDdGcwdQtXtpEbBJuijQcbxuaeYs/cv61TSon8Id6EYdNSo9WDzjo//2KiRwHSh6BFOwrerx60gmhljF8rr4iPqauTun4K7mpBMNvDU3RXmVrcOsWhednYlOBZWHCbPwNGCK5Kdl+Px8sLXLPjid41/jTvHbHE/zu0MwacfL66tBARAHkpOQHR0Z7xwLDgSR3BqFyIbHWkiy62qVkyW55Dac13GMWL/41npHxYiOLSGW8KOgvSDYFlQev8sKnLkzkFsN4oRf58aFSFLzrYJHJ2lewymcyJ5ntnp+v0pmQ4dbn7JVBS4GQDNJHpXWireiv/Puv5ud4stPh+nyBk3HEA+MNXtkpU7CSL+YazKu1onBKxVyXnHuo41P55Q23vToqI7cIzUypM/s6tKXhkaCz4fYhpXh5qcyw8qjrlH5ca0R5eAJ+6ECBXQWP1Fo+dbLD4frZ5VmttZg2IHsRn3PB9grNAZtAvGBAC1Rx3cNSd01Z4/67UF2ZFwKbWnTX1XptF4HNyLhcyYRUxosCqxnUF5nIckEqrs8RE7nC8f6uhYC7rgypSAkWhE545VjI353cuMg/3wgp8NZl0R5fTriq4iEQWCzfXU7W3H9/q8n9rSY5Ibi2mOeRoMNUsrmQeV9pntd0y/irguBdDLltguJfyNX5g+qpDbe/bYvuvKOqyzP++aXg9gYBv7F7HxOezw7fZ9z3OBpFJNbyYKvJoWGfXQX3dekJwffszHG43jyrbevm2iaBcxFT/ffFdAFlMV33XHcNg69rvOdgicI18J2ZhM9Nd5H+pSVWkmWL1aDbEIy5c8rI+JdEJqQyXhQI5SIyJrbIs6hLeT6QORBdV8wrcxa/IgkqEukBolebdAkl259upHzkaJudeVcjNdXeWHt0XS7Pv9+5m3zvwL9Yr/Hnc7MbHvftsM2Pjz/Dexrj7NI+D4QtZmTMr9bXFpOflgm/MPIs8ypl2ks2bAdAbdFl96Xc2Qmbzfj+oRHyUtIwmtkkYdT3KEnF+xfm+PjyIjeNrDUMfSHfJlVw6VhrIdXOwmBsSDHW/hbD0eNcUfZYigy37gs5JTVP1s6zI+GFIMs6ZvwLJBNSGS8KhBD4Q3AxIlF9ZCAIJ9bu/6FazMF9PsIDKxQPLFxAy/ELwHRbM72JgOrznvGJgYgCuLtS5b5mne92NkbVHgk6/NLo8TW3WQHvaY4zqX2+Fjb5verMGaNK/1hY5g3d6prbDJZPrCoax8JtcYFR43F/0NqyAL3PkLfyVbioUxZ1yv3NBu9fmAPg8eWYG0d8xkKFxfKV02cf+XJpOJc2lbmzftoGhBD4I25bP3xNkXw6h68bXFPxeGQpoZNait4lpMR7eFWB6fZtHLJoVMa/PDIhlZFxEanFlr862uRNb7qTVOY53vp/N31cvysLyQVL3UgBB3upu6frCel5RAv2BBs78fYG4aZCajM+WFrk44VlhozHSRVzNpZOn8nX+ePyLO9pjpO3kprQ/GH1FI8FHXwreE9jjPc2xyn1/Kq6GH5zeIq/L2yst+rzYKvJofzaQvWjaZufubZEXgkeXUr44JEWE3lFKzXUYosAvm9PnmsqHvXE8vETbeZW1beZuNeZGW5Mmz4XQgWV3sygRJYRQlBQgqXIcKS+eRTvYiJ9gfTP/LiMjMuVTEhlZFxkIg2xt7279qDORoA/fG5iajSUaAvL8coiL4C37y+wt+i+AqbbPh891uZcfRif6na4Jre2iPzJLURU2Uje2h5ml/Z5MGhxX9jgV+q7eEdrmADJcRXxH
2020-04-25 16:52:24 +02:00
"text/plain": [
2020-05-13 14:17:39 +02:00
"<Figure size 748.8x489.6 with 1 Axes>"
2020-04-25 16:52:24 +02:00
]
},
"metadata": {
2020-05-13 14:17:39 +02:00
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "EHBsENu7wRim",
"colab_type": "code",
"outputId": "50ba187e-e87c-4c03-f0b8-deb082050d95",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 340
}
},
"source": [
"#[['Distance','Angle','UnderPressure','ShotType','ShotBodyPart','ShotTechnique','ShotFirstTime','ShotOneonOne']]\n",
"xgb_model.predict_proba(pd.DataFrame([12.55,37.156,0,4,2,2,0,0]))[:,1]"
],
"execution_count": 0,
"outputs": [
{
"output_type": "error",
"ename": "ValueError",
"evalue": "ignored",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-43-9dff4c73a403>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mxgb_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_proba\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m12.55\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m37.156\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/xgboost/sklearn.py\u001b[0m in \u001b[0;36mpredict_proba\u001b[0;34m(self, data, ntree_limit, validate_features)\u001b[0m\n\u001b[1;32m 832\u001b[0m class_probs = self.get_booster().predict(test_dmatrix,\n\u001b[1;32m 833\u001b[0m \u001b[0mntree_limit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mntree_limit\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 834\u001b[0;31m validate_features=validate_features)\n\u001b[0m\u001b[1;32m 835\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobjective\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"multi:softprob\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 836\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mclass_probs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/xgboost/core.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, data, output_margin, ntree_limit, pred_leaf, pred_contribs, approx_contribs, pred_interactions, validate_features)\u001b[0m\n\u001b[1;32m 1282\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1283\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mvalidate_features\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1284\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_features\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1285\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1286\u001b[0m \u001b[0mlength\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mc_bst_ulong\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/dist-packages/xgboost/core.py\u001b[0m in \u001b[0;36m_validate_features\u001b[0;34m(self, data)\u001b[0m\n\u001b[1;32m 1688\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1689\u001b[0m raise ValueError(msg.format(self.feature_names,\n\u001b[0;32m-> 1690\u001b[0;31m data.feature_names))\n\u001b[0m\u001b[1;32m 1691\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1692\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_split_value_histogram\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeature\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfmap\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m''\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbins\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mas_pandas\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: feature_names mismatch: ['Distance', 'Angle', 'UnderPressure', 'ShotType', 'ShotBodyPart', 'ShotTechnique', 'ShotFirstTime', 'ShotOneonOne'] ['0']\nexpected ShotOneonOne, ShotBodyPart, ShotTechnique, ShotType, ShotFirstTime, Distance, UnderPressure, Angle in input data\ntraining data did not have the following fields: 0"
]
2020-04-25 16:52:24 +02:00
}
]
}
]
2020-04-25 17:13:12 +02:00
}