ium_478839/IUM02-roboczy.ipynb
2022-03-21 04:53:44 -04:00

40 KiB
Raw Permalink Blame History

!pip install --user kaggle
Requirement already satisfied: kaggle in /home/osboxes/.local/lib/python3.8/site-packages (1.5.12)
Requirement already satisfied: six>=1.10 in /usr/lib/python3/dist-packages (from kaggle) (1.14.0)
Requirement already satisfied: urllib3 in /usr/lib/python3/dist-packages (from kaggle) (1.25.8)
Requirement already satisfied: python-dateutil in /home/osboxes/.local/lib/python3.8/site-packages (from kaggle) (2.8.2)
Requirement already satisfied: certifi in /usr/lib/python3/dist-packages (from kaggle) (2019.11.28)
Requirement already satisfied: requests in /usr/lib/python3/dist-packages (from kaggle) (2.22.0)
Requirement already satisfied: tqdm in /home/osboxes/.local/lib/python3.8/site-packages (from kaggle) (4.63.0)
Requirement already satisfied: python-slugify in /home/osboxes/.local/lib/python3.8/site-packages (from kaggle) (6.1.1)
Requirement already satisfied: text-unidecode>=1.3 in /home/osboxes/.local/lib/python3.8/site-packages (from python-slugify->kaggle) (1.3)
!pip install --user pandas
Requirement already satisfied: pandas in /home/osboxes/.local/lib/python3.8/site-packages (1.4.1)
Requirement already satisfied: python-dateutil>=2.8.1 in /home/osboxes/.local/lib/python3.8/site-packages (from pandas) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /home/osboxes/.local/lib/python3.8/site-packages (from pandas) (2022.1)
Requirement already satisfied: numpy>=1.18.5; platform_machine != "aarch64" and platform_machine != "arm64" and python_version < "3.10" in /home/osboxes/.local/lib/python3.8/site-packages (from pandas) (1.22.3)
Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.1->pandas) (1.14.0)
!mkdir ~/.kaggle
mkdir: cannot create directory /home/osboxes/.kaggle: File exists
!cp /home/osboxes/Downloads/kaggle.json /home/osboxes/.kaggle/kaggle.json
!kaggle datasets download -d slehkyi/extended-football-stats-for-european-leagues-xg
Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /home/osboxes/.kaggle/kaggle.json'
Downloading extended-football-stats-for-european-leagues-xg.zip to /home/osboxes/jupyter_dir/notebooks/IUM
 73%|███████████████████████████▋          | 1.00M/1.37M [00:00<00:00, 4.92MB/s]
100%|██████████████████████████████████████| 1.37M/1.37M [00:00<00:00, 6.55MB/s]
!unzip -o extended-football-stats-for-european-leagues-xg.zip
Archive:  extended-football-stats-for-european-leagues-xg.zip
  inflating: understat.com.csv       
  inflating: understat_per_game.csv  
!pip install --user seaborn
Collecting seaborn
  Downloading seaborn-0.11.2-py3-none-any.whl (292 kB)
     |████████████████████████████████| 292 kB 2.0 MB/s eta 0:00:01
[?25hRequirement already satisfied: pandas>=0.23 in /home/osboxes/.local/lib/python3.8/site-packages (from seaborn) (1.4.1)
Requirement already satisfied: numpy>=1.15 in /home/osboxes/.local/lib/python3.8/site-packages (from seaborn) (1.22.3)
Collecting matplotlib>=2.2
  Downloading matplotlib-3.5.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl (11.3 MB)
     |████████████████████████████████| 11.3 MB 5.7 MB/s eta 0:00:01     |██████████████████████          | 7.7 MB 5.7 MB/s eta 0:00:01     |█████████████████████████       | 8.8 MB 5.7 MB/s eta 0:00:01
[?25hCollecting scipy>=1.0
  Downloading scipy-1.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (41.6 MB)
     |████████████████████████████████| 41.6 MB 27 kB/s  eta 0:00:011   |███▊                            | 4.9 MB 5.0 MB/s eta 0:00:08     |██████                          | 7.8 MB 7.9 MB/s eta 0:00:05     |██████████████████              | 23.3 MB 4.5 MB/s eta 0:00:05     |██████████████████▎             | 23.7 MB 4.5 MB/s eta 0:00:04     |█████████████████████▍          | 27.8 MB 10.0 MB/s eta 0:00:02     |███████████████████████████▏    | 35.3 MB 7.2 MB/s eta 0:00:01     |████████████████████████████▍   | 36.9 MB 7.2 MB/s eta 0:00:01
[?25hRequirement already satisfied: python-dateutil>=2.8.1 in /home/osboxes/.local/lib/python3.8/site-packages (from pandas>=0.23->seaborn) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /home/osboxes/.local/lib/python3.8/site-packages (from pandas>=0.23->seaborn) (2022.1)
Collecting fonttools>=4.22.0
  Downloading fonttools-4.31.1-py3-none-any.whl (899 kB)
     |████████████████████████████████| 899 kB 3.2 MB/s eta 0:00:01
[?25hRequirement already satisfied: pillow>=6.2.0 in /usr/lib/python3/dist-packages (from matplotlib>=2.2->seaborn) (7.0.0)
Requirement already satisfied: pyparsing>=2.2.1 in /home/osboxes/.local/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (3.0.7)
Collecting kiwisolver>=1.0.1
  Downloading kiwisolver-1.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.2 MB)
     |████████████████████████████████| 1.2 MB 12.4 MB/s eta 0:00:01
[?25hRequirement already satisfied: packaging>=20.0 in /home/osboxes/.local/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (21.3)
Collecting cycler>=0.10
  Downloading cycler-0.11.0-py3-none-any.whl (6.4 kB)
Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.1->pandas>=0.23->seaborn) (1.14.0)
Installing collected packages: fonttools, kiwisolver, cycler, matplotlib, scipy, seaborn
Successfully installed cycler-0.11.0 fonttools-4.31.1 kiwisolver-1.4.0 matplotlib-3.5.1 scipy-1.8.0 seaborn-0.11.2
!head -n 5 understat.csv
,,position,team,matches,wins,draws,loses,scored,missed,pts,xG,xG_diff,npxG,xGA,xGA_diff,npxGA,npxGD,ppda_coef,oppda_coef,deep,deep_allowed,xpts,xpts_diff
La_liga,2014,1,Barcelona,38,30,4,4,110,21,94,102.98015200000002,-7.019847999999982,97.77721200000002,28.44429270000001,7.444292700000009,24.727906700000005,73.04930530000001,5.683534703382723,16.367592989090525,489,114,94.08129999999998,0.0812999999999846
La_liga,2014,2,Real Madrid,38,30,2,6,118,38,92,95.76624299999999,-22.23375700000001,86.10389499999998,42.607198000000004,4.607198000000004,38.890805,47.213090000000015,10.209085456325049,12.929510106152211,351,153,81.7489,-10.251099999999994
La_liga,2014,3,Atletico Madrid,38,23,9,6,67,29,78,57.047670000000004,-9.952329999999996,52.588007999999995,29.069107100000004,0.06910710000000364,26.839271100000005,25.748736900000008,8.982028430893806,9.237090640679776,197,123,73.13530000000003,-4.864699999999971
La_liga,2014,4,Valencia,38,22,11,5,70,32,77,55.06250000000001,-14.937499999999993,49.703978,39.392571999999994,7.392571999999994,33.44647700000001,16.257500999999998,8.709827299105736,7.870224725817145,203,172,63.7068,-13.293199999999999
import pandas as pd
understat = pd.read_csv('understat.csv')
understat
league year position team matches wins draws loses scored missed ... xGA xGA_diff npxGA npxGD ppda_coef oppda_coef deep deep_allowed xpts xpts_diff
0 La_liga 2014 1 Barcelona 38 30 4 4 110 21 ... 28.444293 7.444293 24.727907 73.049305 5.683535 16.367593 489 114 94.0813 0.0813
1 La_liga 2014 2 Real Madrid 38 30 2 6 118 38 ... 42.607198 4.607198 38.890805 47.213090 10.209085 12.929510 351 153 81.7489 -10.2511
2 La_liga 2014 3 Atletico Madrid 38 23 9 6 67 29 ... 29.069107 0.069107 26.839271 25.748737 8.982028 9.237091 197 123 73.1353 -4.8647
3 La_liga 2014 4 Valencia 38 22 11 5 70 32 ... 39.392572 7.392572 33.446477 16.257501 8.709827 7.870225 203 172 63.7068 -13.2932
4 La_liga 2014 5 Sevilla 38 23 7 8 71 45 ... 47.862742 2.862742 41.916529 20.178070 8.276148 9.477805 305 168 67.3867 -8.6133
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
679 RFPL 2019 12 PFC Sochi 30 8 9 13 40 39 ... 38.850259 -0.149741 32.780898 -0.096048 12.838079 10.562327 175 206 38.6587 5.6587
680 RFPL 2019 13 FK Akhmat 30 7 10 13 27 46 ... 40.626196 -5.373804 38.363370 -10.495864 11.199502 10.806357 124 206 36.5424 5.5424
681 RFPL 2019 14 Krylya Sovetov Samara 30 8 7 15 33 40 ... 42.980693 2.980693 37.550114 -7.777201 11.949903 10.080858 103 215 36.3363 5.3363
682 RFPL 2019 15 FC Tambov 30 9 4 17 37 41 ... 39.747938 -1.252062 34.468003 -12.231948 14.666049 9.192768 150 270 29.2413 -1.7587
683 RFPL 2019 16 FC Orenburg 30 7 6 17 28 52 ... 37.169797 -14.830203 32.644130 0.201339 12.830908 9.464581 153 215 39.2364 12.2364

684 rows × 24 columns

understat.describe(include='all')
league year position team matches wins draws loses scored missed ... xGA xGA_diff npxGA npxGD ppda_coef oppda_coef deep deep_allowed xpts xpts_diff
count 684 684.000000 684.000000 684 684.000000 684.000000 684.000000 684.000000 684.000000 684.000000 ... 684.000000 684.000000 684.000000 6.840000e+02 684.000000 684.000000 684.000000 684.000000 684.000000 684.000000
unique 6 NaN NaN 168 NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
top La_liga NaN NaN Barcelona NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
freq 120 NaN NaN 6 NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
mean NaN 2016.500000 10.061404 NaN 35.935673 13.434211 9.067251 13.434211 48.190058 48.190058 ... 47.064744 -1.125315 42.902596 -4.155221e-17 10.911784 10.911772 208.676901 208.676901 49.539598 0.169715
std NaN 1.709075 5.580165 NaN 3.203487 5.880962 2.941824 5.510278 17.605374 13.866509 ... 11.781399 6.663632 11.002013 1.929269e+01 2.521398 3.301410 83.888073 54.713624 13.559213 7.156998
min NaN 2014.000000 1.000000 NaN 27.000000 2.000000 2.000000 1.000000 13.000000 15.000000 ... 16.838674 -29.175087 16.084399 -4.220877e+01 5.683535 4.394458 76.000000 83.000000 17.907700 -24.721600
25% NaN 2015.000000 5.000000 NaN 34.000000 9.000000 7.000000 9.000000 36.000000 38.000000 ... 38.916186 -5.698828 35.474606 -1.325816e+01 9.090617 8.809866 151.750000 170.000000 39.466550 -4.498400
50% NaN 2016.500000 10.000000 NaN 38.000000 12.000000 9.000000 14.000000 45.000000 48.000000 ... 47.310924 -0.918895 43.031911 -3.127901e+00 10.562543 10.347047 188.000000 205.000000 47.102100 0.116050
75% NaN 2018.000000 15.000000 NaN 38.000000 16.000000 11.000000 17.000000 56.000000 58.000000 ... 54.834899 3.381834 50.263465 9.740049e+00 12.434874 12.187434 242.000000 246.250000 56.942025 4.912775
max NaN 2019.000000 20.000000 NaN 38.000000 32.000000 18.000000 29.000000 118.000000 94.000000 ... 88.432186 16.370737 78.535447 7.304931e+01 21.896752 30.468113 582.000000 375.000000 94.380000 23.047500

11 rows × 24 columns

!wc -l understat.csv
685 understat.csv
!head -n 5 understat.csv
,,position,team,matches,wins,draws,loses,scored,missed,pts,xG,xG_diff,npxG,xGA,xGA_diff,npxGA,npxGD,ppda_coef,oppda_coef,deep,deep_allowed,xpts,xpts_diff
La_liga,2014,1,Barcelona,38,30,4,4,110,21,94,102.98015200000002,-7.019847999999982,97.77721200000002,28.44429270000001,7.444292700000009,24.727906700000005,73.04930530000001,5.683534703382723,16.367592989090525,489,114,94.08129999999998,0.0812999999999846
La_liga,2014,2,Real Madrid,38,30,2,6,118,38,92,95.76624299999999,-22.23375700000001,86.10389499999998,42.607198000000004,4.607198000000004,38.890805,47.213090000000015,10.209085456325049,12.929510106152211,351,153,81.7489,-10.251099999999994
La_liga,2014,3,Atletico Madrid,38,23,9,6,67,29,78,57.047670000000004,-9.952329999999996,52.588007999999995,29.069107100000004,0.06910710000000364,26.839271100000005,25.748736900000008,8.982028430893806,9.237090640679776,197,123,73.13530000000003,-4.864699999999971
La_liga,2014,4,Valencia,38,22,11,5,70,32,77,55.06250000000001,-14.937499999999993,49.703978,39.392571999999994,7.392571999999994,33.44647700000001,16.257500999999998,8.709827299105736,7.870224725817145,203,172,63.7068,-13.293199999999999
!cut -f 5 -d "," understat.csv | sort | uniq -c
      2 27
     18 28
     96 30
    108 34
     10 37
    450 38
      1 matches
! grep -P "^$" -n understat.csv