{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "ad6b7dc7", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: kaggle in /home/osboxes/.local/lib/python3.8/site-packages (1.5.12)\n", "Requirement already satisfied: six>=1.10 in /usr/lib/python3/dist-packages (from kaggle) (1.14.0)\n", "Requirement already satisfied: urllib3 in /usr/lib/python3/dist-packages (from kaggle) (1.25.8)\n", "Requirement already satisfied: python-dateutil in /home/osboxes/.local/lib/python3.8/site-packages (from kaggle) (2.8.2)\n", "Requirement already satisfied: certifi in /usr/lib/python3/dist-packages (from kaggle) (2019.11.28)\n", "Requirement already satisfied: requests in /usr/lib/python3/dist-packages (from kaggle) (2.22.0)\n", "Requirement already satisfied: tqdm in /home/osboxes/.local/lib/python3.8/site-packages (from kaggle) (4.63.0)\n", "Requirement already satisfied: python-slugify in /home/osboxes/.local/lib/python3.8/site-packages (from kaggle) (6.1.1)\n", "Requirement already satisfied: text-unidecode>=1.3 in /home/osboxes/.local/lib/python3.8/site-packages (from python-slugify->kaggle) (1.3)\n" ] } ], "source": [ "!pip install --user kaggle" ] }, { "cell_type": "code", "execution_count": 2, "id": "4ab2c14f", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: pandas in /home/osboxes/.local/lib/python3.8/site-packages (1.4.1)\r\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /home/osboxes/.local/lib/python3.8/site-packages (from pandas) (2.8.2)\r\n", "Requirement already satisfied: pytz>=2020.1 in /home/osboxes/.local/lib/python3.8/site-packages (from pandas) (2022.1)\r\n", "Requirement already satisfied: numpy>=1.18.5; platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\" in /home/osboxes/.local/lib/python3.8/site-packages (from pandas) (1.22.3)\r\n", "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.1->pandas) (1.14.0)\r\n" ] } ], "source": [ "!pip install --user pandas" ] }, { "cell_type": "code", "execution_count": 4, "id": "c0597767", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mkdir: cannot create directory ‘/home/osboxes/.kaggle’: File exists\r\n" ] } ], "source": [ "!mkdir ~/.kaggle" ] }, { "cell_type": "code", "execution_count": 6, "id": "2465b1e9", "metadata": {}, "outputs": [], "source": [ "!cp /home/osboxes/Downloads/kaggle.json /home/osboxes/.kaggle/kaggle.json" ] }, { "cell_type": "code", "execution_count": 7, "id": "faa7e821", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /home/osboxes/.kaggle/kaggle.json'\n", "Downloading extended-football-stats-for-european-leagues-xg.zip to /home/osboxes/jupyter_dir/notebooks/IUM\n", " 73%|███████████████████████████▋ | 1.00M/1.37M [00:00<00:00, 4.92MB/s]\n", "100%|██████████████████████████████████████| 1.37M/1.37M [00:00<00:00, 6.55MB/s]\n" ] } ], "source": [ "!kaggle datasets download -d slehkyi/extended-football-stats-for-european-leagues-xg" ] }, { "cell_type": "code", "execution_count": 12, "id": "d5b18a91", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Archive: extended-football-stats-for-european-leagues-xg.zip\n", " inflating: understat.com.csv \n", " inflating: understat_per_game.csv \n" ] } ], "source": [ "!unzip -o extended-football-stats-for-european-leagues-xg.zip" ] }, { "cell_type": "code", "execution_count": 9, "id": "0283db51", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting seaborn\n", " Downloading seaborn-0.11.2-py3-none-any.whl (292 kB)\n", "\u001b[K |████████████████████████████████| 292 kB 2.0 MB/s eta 0:00:01\n", "\u001b[?25hRequirement already satisfied: pandas>=0.23 in /home/osboxes/.local/lib/python3.8/site-packages (from seaborn) (1.4.1)\n", "Requirement already satisfied: numpy>=1.15 in /home/osboxes/.local/lib/python3.8/site-packages (from seaborn) (1.22.3)\n", "Collecting matplotlib>=2.2\n", " Downloading matplotlib-3.5.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl (11.3 MB)\n", "\u001b[K |████████████████████████████████| 11.3 MB 5.7 MB/s eta 0:00:01 |██████████████████████ | 7.7 MB 5.7 MB/s eta 0:00:01 |█████████████████████████ | 8.8 MB 5.7 MB/s eta 0:00:01\n", "\u001b[?25hCollecting scipy>=1.0\n", " Downloading scipy-1.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (41.6 MB)\n", "\u001b[K |████████████████████████████████| 41.6 MB 27 kB/s eta 0:00:011 |███▊ | 4.9 MB 5.0 MB/s eta 0:00:08 |██████ | 7.8 MB 7.9 MB/s eta 0:00:05 |██████████████████ | 23.3 MB 4.5 MB/s eta 0:00:05 |██████████████████▎ | 23.7 MB 4.5 MB/s eta 0:00:04 |█████████████████████▍ | 27.8 MB 10.0 MB/s eta 0:00:02 |███████████████████████████▏ | 35.3 MB 7.2 MB/s eta 0:00:01 |████████████████████████████▍ | 36.9 MB 7.2 MB/s eta 0:00:01\n", "\u001b[?25hRequirement already satisfied: python-dateutil>=2.8.1 in /home/osboxes/.local/lib/python3.8/site-packages (from pandas>=0.23->seaborn) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /home/osboxes/.local/lib/python3.8/site-packages (from pandas>=0.23->seaborn) (2022.1)\n", "Collecting fonttools>=4.22.0\n", " Downloading fonttools-4.31.1-py3-none-any.whl (899 kB)\n", "\u001b[K |████████████████████████████████| 899 kB 3.2 MB/s eta 0:00:01\n", "\u001b[?25hRequirement already satisfied: pillow>=6.2.0 in /usr/lib/python3/dist-packages (from matplotlib>=2.2->seaborn) (7.0.0)\n", "Requirement already satisfied: pyparsing>=2.2.1 in /home/osboxes/.local/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (3.0.7)\n", "Collecting kiwisolver>=1.0.1\n", " Downloading kiwisolver-1.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.2 MB)\n", "\u001b[K |████████████████████████████████| 1.2 MB 12.4 MB/s eta 0:00:01\n", "\u001b[?25hRequirement already satisfied: packaging>=20.0 in /home/osboxes/.local/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (21.3)\n", "Collecting cycler>=0.10\n", " Downloading cycler-0.11.0-py3-none-any.whl (6.4 kB)\n", "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.1->pandas>=0.23->seaborn) (1.14.0)\n", "Installing collected packages: fonttools, kiwisolver, cycler, matplotlib, scipy, seaborn\n", "Successfully installed cycler-0.11.0 fonttools-4.31.1 kiwisolver-1.4.0 matplotlib-3.5.1 scipy-1.8.0 seaborn-0.11.2\n" ] } ], "source": [ "!pip install --user seaborn" ] }, { "cell_type": "code", "execution_count": 13, "id": "2cd1e392", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ ",,position,team,matches,wins,draws,loses,scored,missed,pts,xG,xG_diff,npxG,xGA,xGA_diff,npxGA,npxGD,ppda_coef,oppda_coef,deep,deep_allowed,xpts,xpts_diff\r\n", "La_liga,2014,1,Barcelona,38,30,4,4,110,21,94,102.98015200000002,-7.019847999999982,97.77721200000002,28.44429270000001,7.444292700000009,24.727906700000005,73.04930530000001,5.683534703382723,16.367592989090525,489,114,94.08129999999998,0.0812999999999846\r\n", "La_liga,2014,2,Real Madrid,38,30,2,6,118,38,92,95.76624299999999,-22.23375700000001,86.10389499999998,42.607198000000004,4.607198000000004,38.890805,47.213090000000015,10.209085456325049,12.929510106152211,351,153,81.7489,-10.251099999999994\r\n", "La_liga,2014,3,Atletico Madrid,38,23,9,6,67,29,78,57.047670000000004,-9.952329999999996,52.588007999999995,29.069107100000004,0.06910710000000364,26.839271100000005,25.748736900000008,8.982028430893806,9.237090640679776,197,123,73.13530000000003,-4.864699999999971\r\n", "La_liga,2014,4,Valencia,38,22,11,5,70,32,77,55.06250000000001,-14.937499999999993,49.703978,39.392571999999994,7.392571999999994,33.44647700000001,16.257500999999998,8.709827299105736,7.870224725817145,203,172,63.7068,-13.293199999999999\r\n" ] } ], "source": [ "!head -n 5 understat.csv" ] }, { "cell_type": "code", "execution_count": 10, "id": "12a3ddce", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", " | league | \n", "year | \n", "position | \n", "team | \n", "matches | \n", "wins | \n", "draws | \n", "loses | \n", "scored | \n", "missed | \n", "... | \n", "xGA | \n", "xGA_diff | \n", "npxGA | \n", "npxGD | \n", "ppda_coef | \n", "oppda_coef | \n", "deep | \n", "deep_allowed | \n", "xpts | \n", "xpts_diff | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "La_liga | \n", "2014 | \n", "1 | \n", "Barcelona | \n", "38 | \n", "30 | \n", "4 | \n", "4 | \n", "110 | \n", "21 | \n", "... | \n", "28.444293 | \n", "7.444293 | \n", "24.727907 | \n", "73.049305 | \n", "5.683535 | \n", "16.367593 | \n", "489 | \n", "114 | \n", "94.0813 | \n", "0.0813 | \n", "
1 | \n", "La_liga | \n", "2014 | \n", "2 | \n", "Real Madrid | \n", "38 | \n", "30 | \n", "2 | \n", "6 | \n", "118 | \n", "38 | \n", "... | \n", "42.607198 | \n", "4.607198 | \n", "38.890805 | \n", "47.213090 | \n", "10.209085 | \n", "12.929510 | \n", "351 | \n", "153 | \n", "81.7489 | \n", "-10.2511 | \n", "
2 | \n", "La_liga | \n", "2014 | \n", "3 | \n", "Atletico Madrid | \n", "38 | \n", "23 | \n", "9 | \n", "6 | \n", "67 | \n", "29 | \n", "... | \n", "29.069107 | \n", "0.069107 | \n", "26.839271 | \n", "25.748737 | \n", "8.982028 | \n", "9.237091 | \n", "197 | \n", "123 | \n", "73.1353 | \n", "-4.8647 | \n", "
3 | \n", "La_liga | \n", "2014 | \n", "4 | \n", "Valencia | \n", "38 | \n", "22 | \n", "11 | \n", "5 | \n", "70 | \n", "32 | \n", "... | \n", "39.392572 | \n", "7.392572 | \n", "33.446477 | \n", "16.257501 | \n", "8.709827 | \n", "7.870225 | \n", "203 | \n", "172 | \n", "63.7068 | \n", "-13.2932 | \n", "
4 | \n", "La_liga | \n", "2014 | \n", "5 | \n", "Sevilla | \n", "38 | \n", "23 | \n", "7 | \n", "8 | \n", "71 | \n", "45 | \n", "... | \n", "47.862742 | \n", "2.862742 | \n", "41.916529 | \n", "20.178070 | \n", "8.276148 | \n", "9.477805 | \n", "305 | \n", "168 | \n", "67.3867 | \n", "-8.6133 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
679 | \n", "RFPL | \n", "2019 | \n", "12 | \n", "PFC Sochi | \n", "30 | \n", "8 | \n", "9 | \n", "13 | \n", "40 | \n", "39 | \n", "... | \n", "38.850259 | \n", "-0.149741 | \n", "32.780898 | \n", "-0.096048 | \n", "12.838079 | \n", "10.562327 | \n", "175 | \n", "206 | \n", "38.6587 | \n", "5.6587 | \n", "
680 | \n", "RFPL | \n", "2019 | \n", "13 | \n", "FK Akhmat | \n", "30 | \n", "7 | \n", "10 | \n", "13 | \n", "27 | \n", "46 | \n", "... | \n", "40.626196 | \n", "-5.373804 | \n", "38.363370 | \n", "-10.495864 | \n", "11.199502 | \n", "10.806357 | \n", "124 | \n", "206 | \n", "36.5424 | \n", "5.5424 | \n", "
681 | \n", "RFPL | \n", "2019 | \n", "14 | \n", "Krylya Sovetov Samara | \n", "30 | \n", "8 | \n", "7 | \n", "15 | \n", "33 | \n", "40 | \n", "... | \n", "42.980693 | \n", "2.980693 | \n", "37.550114 | \n", "-7.777201 | \n", "11.949903 | \n", "10.080858 | \n", "103 | \n", "215 | \n", "36.3363 | \n", "5.3363 | \n", "
682 | \n", "RFPL | \n", "2019 | \n", "15 | \n", "FC Tambov | \n", "30 | \n", "9 | \n", "4 | \n", "17 | \n", "37 | \n", "41 | \n", "... | \n", "39.747938 | \n", "-1.252062 | \n", "34.468003 | \n", "-12.231948 | \n", "14.666049 | \n", "9.192768 | \n", "150 | \n", "270 | \n", "29.2413 | \n", "-1.7587 | \n", "
683 | \n", "RFPL | \n", "2019 | \n", "16 | \n", "FC Orenburg | \n", "30 | \n", "7 | \n", "6 | \n", "17 | \n", "28 | \n", "52 | \n", "... | \n", "37.169797 | \n", "-14.830203 | \n", "32.644130 | \n", "0.201339 | \n", "12.830908 | \n", "9.464581 | \n", "153 | \n", "215 | \n", "39.2364 | \n", "12.2364 | \n", "
684 rows × 24 columns
\n", "\n", " | league | \n", "year | \n", "position | \n", "team | \n", "matches | \n", "wins | \n", "draws | \n", "loses | \n", "scored | \n", "missed | \n", "... | \n", "xGA | \n", "xGA_diff | \n", "npxGA | \n", "npxGD | \n", "ppda_coef | \n", "oppda_coef | \n", "deep | \n", "deep_allowed | \n", "xpts | \n", "xpts_diff | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", "684 | \n", "684.000000 | \n", "684.000000 | \n", "684 | \n", "684.000000 | \n", "684.000000 | \n", "684.000000 | \n", "684.000000 | \n", "684.000000 | \n", "684.000000 | \n", "... | \n", "684.000000 | \n", "684.000000 | \n", "684.000000 | \n", "6.840000e+02 | \n", "684.000000 | \n", "684.000000 | \n", "684.000000 | \n", "684.000000 | \n", "684.000000 | \n", "684.000000 | \n", "
unique | \n", "6 | \n", "NaN | \n", "NaN | \n", "168 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
top | \n", "La_liga | \n", "NaN | \n", "NaN | \n", "Barcelona | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
freq | \n", "120 | \n", "NaN | \n", "NaN | \n", "6 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
mean | \n", "NaN | \n", "2016.500000 | \n", "10.061404 | \n", "NaN | \n", "35.935673 | \n", "13.434211 | \n", "9.067251 | \n", "13.434211 | \n", "48.190058 | \n", "48.190058 | \n", "... | \n", "47.064744 | \n", "-1.125315 | \n", "42.902596 | \n", "-4.155221e-17 | \n", "10.911784 | \n", "10.911772 | \n", "208.676901 | \n", "208.676901 | \n", "49.539598 | \n", "0.169715 | \n", "
std | \n", "NaN | \n", "1.709075 | \n", "5.580165 | \n", "NaN | \n", "3.203487 | \n", "5.880962 | \n", "2.941824 | \n", "5.510278 | \n", "17.605374 | \n", "13.866509 | \n", "... | \n", "11.781399 | \n", "6.663632 | \n", "11.002013 | \n", "1.929269e+01 | \n", "2.521398 | \n", "3.301410 | \n", "83.888073 | \n", "54.713624 | \n", "13.559213 | \n", "7.156998 | \n", "
min | \n", "NaN | \n", "2014.000000 | \n", "1.000000 | \n", "NaN | \n", "27.000000 | \n", "2.000000 | \n", "2.000000 | \n", "1.000000 | \n", "13.000000 | \n", "15.000000 | \n", "... | \n", "16.838674 | \n", "-29.175087 | \n", "16.084399 | \n", "-4.220877e+01 | \n", "5.683535 | \n", "4.394458 | \n", "76.000000 | \n", "83.000000 | \n", "17.907700 | \n", "-24.721600 | \n", "
25% | \n", "NaN | \n", "2015.000000 | \n", "5.000000 | \n", "NaN | \n", "34.000000 | \n", "9.000000 | \n", "7.000000 | \n", "9.000000 | \n", "36.000000 | \n", "38.000000 | \n", "... | \n", "38.916186 | \n", "-5.698828 | \n", "35.474606 | \n", "-1.325816e+01 | \n", "9.090617 | \n", "8.809866 | \n", "151.750000 | \n", "170.000000 | \n", "39.466550 | \n", "-4.498400 | \n", "
50% | \n", "NaN | \n", "2016.500000 | \n", "10.000000 | \n", "NaN | \n", "38.000000 | \n", "12.000000 | \n", "9.000000 | \n", "14.000000 | \n", "45.000000 | \n", "48.000000 | \n", "... | \n", "47.310924 | \n", "-0.918895 | \n", "43.031911 | \n", "-3.127901e+00 | \n", "10.562543 | \n", "10.347047 | \n", "188.000000 | \n", "205.000000 | \n", "47.102100 | \n", "0.116050 | \n", "
75% | \n", "NaN | \n", "2018.000000 | \n", "15.000000 | \n", "NaN | \n", "38.000000 | \n", "16.000000 | \n", "11.000000 | \n", "17.000000 | \n", "56.000000 | \n", "58.000000 | \n", "... | \n", "54.834899 | \n", "3.381834 | \n", "50.263465 | \n", "9.740049e+00 | \n", "12.434874 | \n", "12.187434 | \n", "242.000000 | \n", "246.250000 | \n", "56.942025 | \n", "4.912775 | \n", "
max | \n", "NaN | \n", "2019.000000 | \n", "20.000000 | \n", "NaN | \n", "38.000000 | \n", "32.000000 | \n", "18.000000 | \n", "29.000000 | \n", "118.000000 | \n", "94.000000 | \n", "... | \n", "88.432186 | \n", "16.370737 | \n", "78.535447 | \n", "7.304931e+01 | \n", "21.896752 | \n", "30.468113 | \n", "582.000000 | \n", "375.000000 | \n", "94.380000 | \n", "23.047500 | \n", "
11 rows × 24 columns
\n", "