1058 lines
40 KiB
Plaintext
1058 lines
40 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "ad6b7dc7",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Requirement already satisfied: kaggle in /home/osboxes/.local/lib/python3.8/site-packages (1.5.12)\n",
|
||
"Requirement already satisfied: six>=1.10 in /usr/lib/python3/dist-packages (from kaggle) (1.14.0)\n",
|
||
"Requirement already satisfied: urllib3 in /usr/lib/python3/dist-packages (from kaggle) (1.25.8)\n",
|
||
"Requirement already satisfied: python-dateutil in /home/osboxes/.local/lib/python3.8/site-packages (from kaggle) (2.8.2)\n",
|
||
"Requirement already satisfied: certifi in /usr/lib/python3/dist-packages (from kaggle) (2019.11.28)\n",
|
||
"Requirement already satisfied: requests in /usr/lib/python3/dist-packages (from kaggle) (2.22.0)\n",
|
||
"Requirement already satisfied: tqdm in /home/osboxes/.local/lib/python3.8/site-packages (from kaggle) (4.63.0)\n",
|
||
"Requirement already satisfied: python-slugify in /home/osboxes/.local/lib/python3.8/site-packages (from kaggle) (6.1.1)\n",
|
||
"Requirement already satisfied: text-unidecode>=1.3 in /home/osboxes/.local/lib/python3.8/site-packages (from python-slugify->kaggle) (1.3)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!pip install --user kaggle"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "4ab2c14f",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Requirement already satisfied: pandas in /home/osboxes/.local/lib/python3.8/site-packages (1.4.1)\r\n",
|
||
"Requirement already satisfied: python-dateutil>=2.8.1 in /home/osboxes/.local/lib/python3.8/site-packages (from pandas) (2.8.2)\r\n",
|
||
"Requirement already satisfied: pytz>=2020.1 in /home/osboxes/.local/lib/python3.8/site-packages (from pandas) (2022.1)\r\n",
|
||
"Requirement already satisfied: numpy>=1.18.5; platform_machine != \"aarch64\" and platform_machine != \"arm64\" and python_version < \"3.10\" in /home/osboxes/.local/lib/python3.8/site-packages (from pandas) (1.22.3)\r\n",
|
||
"Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.1->pandas) (1.14.0)\r\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!pip install --user pandas"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "c0597767",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"mkdir: cannot create directory ‘/home/osboxes/.kaggle’: File exists\r\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!mkdir ~/.kaggle"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "2465b1e9",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"!cp /home/osboxes/Downloads/kaggle.json /home/osboxes/.kaggle/kaggle.json"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "faa7e821",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /home/osboxes/.kaggle/kaggle.json'\n",
|
||
"Downloading extended-football-stats-for-european-leagues-xg.zip to /home/osboxes/jupyter_dir/notebooks/IUM\n",
|
||
" 73%|███████████████████████████▋ | 1.00M/1.37M [00:00<00:00, 4.92MB/s]\n",
|
||
"100%|██████████████████████████████████████| 1.37M/1.37M [00:00<00:00, 6.55MB/s]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!kaggle datasets download -d slehkyi/extended-football-stats-for-european-leagues-xg"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"id": "d5b18a91",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Archive: extended-football-stats-for-european-leagues-xg.zip\n",
|
||
" inflating: understat.com.csv \n",
|
||
" inflating: understat_per_game.csv \n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!unzip -o extended-football-stats-for-european-leagues-xg.zip"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "0283db51",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Collecting seaborn\n",
|
||
" Downloading seaborn-0.11.2-py3-none-any.whl (292 kB)\n",
|
||
"\u001b[K |████████████████████████████████| 292 kB 2.0 MB/s eta 0:00:01\n",
|
||
"\u001b[?25hRequirement already satisfied: pandas>=0.23 in /home/osboxes/.local/lib/python3.8/site-packages (from seaborn) (1.4.1)\n",
|
||
"Requirement already satisfied: numpy>=1.15 in /home/osboxes/.local/lib/python3.8/site-packages (from seaborn) (1.22.3)\n",
|
||
"Collecting matplotlib>=2.2\n",
|
||
" Downloading matplotlib-3.5.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl (11.3 MB)\n",
|
||
"\u001b[K |████████████████████████████████| 11.3 MB 5.7 MB/s eta 0:00:01 |██████████████████████ | 7.7 MB 5.7 MB/s eta 0:00:01 |█████████████████████████ | 8.8 MB 5.7 MB/s eta 0:00:01\n",
|
||
"\u001b[?25hCollecting scipy>=1.0\n",
|
||
" Downloading scipy-1.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (41.6 MB)\n",
|
||
"\u001b[K |████████████████████████████████| 41.6 MB 27 kB/s eta 0:00:011 |███▊ | 4.9 MB 5.0 MB/s eta 0:00:08 |██████ | 7.8 MB 7.9 MB/s eta 0:00:05 |██████████████████ | 23.3 MB 4.5 MB/s eta 0:00:05 |██████████████████▎ | 23.7 MB 4.5 MB/s eta 0:00:04 |█████████████████████▍ | 27.8 MB 10.0 MB/s eta 0:00:02 |███████████████████████████▏ | 35.3 MB 7.2 MB/s eta 0:00:01 |████████████████████████████▍ | 36.9 MB 7.2 MB/s eta 0:00:01\n",
|
||
"\u001b[?25hRequirement already satisfied: python-dateutil>=2.8.1 in /home/osboxes/.local/lib/python3.8/site-packages (from pandas>=0.23->seaborn) (2.8.2)\n",
|
||
"Requirement already satisfied: pytz>=2020.1 in /home/osboxes/.local/lib/python3.8/site-packages (from pandas>=0.23->seaborn) (2022.1)\n",
|
||
"Collecting fonttools>=4.22.0\n",
|
||
" Downloading fonttools-4.31.1-py3-none-any.whl (899 kB)\n",
|
||
"\u001b[K |████████████████████████████████| 899 kB 3.2 MB/s eta 0:00:01\n",
|
||
"\u001b[?25hRequirement already satisfied: pillow>=6.2.0 in /usr/lib/python3/dist-packages (from matplotlib>=2.2->seaborn) (7.0.0)\n",
|
||
"Requirement already satisfied: pyparsing>=2.2.1 in /home/osboxes/.local/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (3.0.7)\n",
|
||
"Collecting kiwisolver>=1.0.1\n",
|
||
" Downloading kiwisolver-1.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.2 MB)\n",
|
||
"\u001b[K |████████████████████████████████| 1.2 MB 12.4 MB/s eta 0:00:01\n",
|
||
"\u001b[?25hRequirement already satisfied: packaging>=20.0 in /home/osboxes/.local/lib/python3.8/site-packages (from matplotlib>=2.2->seaborn) (21.3)\n",
|
||
"Collecting cycler>=0.10\n",
|
||
" Downloading cycler-0.11.0-py3-none-any.whl (6.4 kB)\n",
|
||
"Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.1->pandas>=0.23->seaborn) (1.14.0)\n",
|
||
"Installing collected packages: fonttools, kiwisolver, cycler, matplotlib, scipy, seaborn\n",
|
||
"Successfully installed cycler-0.11.0 fonttools-4.31.1 kiwisolver-1.4.0 matplotlib-3.5.1 scipy-1.8.0 seaborn-0.11.2\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!pip install --user seaborn"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"id": "2cd1e392",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
",,position,team,matches,wins,draws,loses,scored,missed,pts,xG,xG_diff,npxG,xGA,xGA_diff,npxGA,npxGD,ppda_coef,oppda_coef,deep,deep_allowed,xpts,xpts_diff\r\n",
|
||
"La_liga,2014,1,Barcelona,38,30,4,4,110,21,94,102.98015200000002,-7.019847999999982,97.77721200000002,28.44429270000001,7.444292700000009,24.727906700000005,73.04930530000001,5.683534703382723,16.367592989090525,489,114,94.08129999999998,0.0812999999999846\r\n",
|
||
"La_liga,2014,2,Real Madrid,38,30,2,6,118,38,92,95.76624299999999,-22.23375700000001,86.10389499999998,42.607198000000004,4.607198000000004,38.890805,47.213090000000015,10.209085456325049,12.929510106152211,351,153,81.7489,-10.251099999999994\r\n",
|
||
"La_liga,2014,3,Atletico Madrid,38,23,9,6,67,29,78,57.047670000000004,-9.952329999999996,52.588007999999995,29.069107100000004,0.06910710000000364,26.839271100000005,25.748736900000008,8.982028430893806,9.237090640679776,197,123,73.13530000000003,-4.864699999999971\r\n",
|
||
"La_liga,2014,4,Valencia,38,22,11,5,70,32,77,55.06250000000001,-14.937499999999993,49.703978,39.392571999999994,7.392571999999994,33.44647700000001,16.257500999999998,8.709827299105736,7.870224725817145,203,172,63.7068,-13.293199999999999\r\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!head -n 5 understat.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"id": "12a3ddce",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>league</th>\n",
|
||
" <th>year</th>\n",
|
||
" <th>position</th>\n",
|
||
" <th>team</th>\n",
|
||
" <th>matches</th>\n",
|
||
" <th>wins</th>\n",
|
||
" <th>draws</th>\n",
|
||
" <th>loses</th>\n",
|
||
" <th>scored</th>\n",
|
||
" <th>missed</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>xGA</th>\n",
|
||
" <th>xGA_diff</th>\n",
|
||
" <th>npxGA</th>\n",
|
||
" <th>npxGD</th>\n",
|
||
" <th>ppda_coef</th>\n",
|
||
" <th>oppda_coef</th>\n",
|
||
" <th>deep</th>\n",
|
||
" <th>deep_allowed</th>\n",
|
||
" <th>xpts</th>\n",
|
||
" <th>xpts_diff</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>La_liga</td>\n",
|
||
" <td>2014</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Barcelona</td>\n",
|
||
" <td>38</td>\n",
|
||
" <td>30</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>110</td>\n",
|
||
" <td>21</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>28.444293</td>\n",
|
||
" <td>7.444293</td>\n",
|
||
" <td>24.727907</td>\n",
|
||
" <td>73.049305</td>\n",
|
||
" <td>5.683535</td>\n",
|
||
" <td>16.367593</td>\n",
|
||
" <td>489</td>\n",
|
||
" <td>114</td>\n",
|
||
" <td>94.0813</td>\n",
|
||
" <td>0.0813</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>La_liga</td>\n",
|
||
" <td>2014</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>Real Madrid</td>\n",
|
||
" <td>38</td>\n",
|
||
" <td>30</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>118</td>\n",
|
||
" <td>38</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>42.607198</td>\n",
|
||
" <td>4.607198</td>\n",
|
||
" <td>38.890805</td>\n",
|
||
" <td>47.213090</td>\n",
|
||
" <td>10.209085</td>\n",
|
||
" <td>12.929510</td>\n",
|
||
" <td>351</td>\n",
|
||
" <td>153</td>\n",
|
||
" <td>81.7489</td>\n",
|
||
" <td>-10.2511</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>La_liga</td>\n",
|
||
" <td>2014</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>Atletico Madrid</td>\n",
|
||
" <td>38</td>\n",
|
||
" <td>23</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>67</td>\n",
|
||
" <td>29</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>29.069107</td>\n",
|
||
" <td>0.069107</td>\n",
|
||
" <td>26.839271</td>\n",
|
||
" <td>25.748737</td>\n",
|
||
" <td>8.982028</td>\n",
|
||
" <td>9.237091</td>\n",
|
||
" <td>197</td>\n",
|
||
" <td>123</td>\n",
|
||
" <td>73.1353</td>\n",
|
||
" <td>-4.8647</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>La_liga</td>\n",
|
||
" <td>2014</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>Valencia</td>\n",
|
||
" <td>38</td>\n",
|
||
" <td>22</td>\n",
|
||
" <td>11</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>70</td>\n",
|
||
" <td>32</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>39.392572</td>\n",
|
||
" <td>7.392572</td>\n",
|
||
" <td>33.446477</td>\n",
|
||
" <td>16.257501</td>\n",
|
||
" <td>8.709827</td>\n",
|
||
" <td>7.870225</td>\n",
|
||
" <td>203</td>\n",
|
||
" <td>172</td>\n",
|
||
" <td>63.7068</td>\n",
|
||
" <td>-13.2932</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>La_liga</td>\n",
|
||
" <td>2014</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>Sevilla</td>\n",
|
||
" <td>38</td>\n",
|
||
" <td>23</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>8</td>\n",
|
||
" <td>71</td>\n",
|
||
" <td>45</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>47.862742</td>\n",
|
||
" <td>2.862742</td>\n",
|
||
" <td>41.916529</td>\n",
|
||
" <td>20.178070</td>\n",
|
||
" <td>8.276148</td>\n",
|
||
" <td>9.477805</td>\n",
|
||
" <td>305</td>\n",
|
||
" <td>168</td>\n",
|
||
" <td>67.3867</td>\n",
|
||
" <td>-8.6133</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>679</th>\n",
|
||
" <td>RFPL</td>\n",
|
||
" <td>2019</td>\n",
|
||
" <td>12</td>\n",
|
||
" <td>PFC Sochi</td>\n",
|
||
" <td>30</td>\n",
|
||
" <td>8</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>13</td>\n",
|
||
" <td>40</td>\n",
|
||
" <td>39</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>38.850259</td>\n",
|
||
" <td>-0.149741</td>\n",
|
||
" <td>32.780898</td>\n",
|
||
" <td>-0.096048</td>\n",
|
||
" <td>12.838079</td>\n",
|
||
" <td>10.562327</td>\n",
|
||
" <td>175</td>\n",
|
||
" <td>206</td>\n",
|
||
" <td>38.6587</td>\n",
|
||
" <td>5.6587</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>680</th>\n",
|
||
" <td>RFPL</td>\n",
|
||
" <td>2019</td>\n",
|
||
" <td>13</td>\n",
|
||
" <td>FK Akhmat</td>\n",
|
||
" <td>30</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>13</td>\n",
|
||
" <td>27</td>\n",
|
||
" <td>46</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>40.626196</td>\n",
|
||
" <td>-5.373804</td>\n",
|
||
" <td>38.363370</td>\n",
|
||
" <td>-10.495864</td>\n",
|
||
" <td>11.199502</td>\n",
|
||
" <td>10.806357</td>\n",
|
||
" <td>124</td>\n",
|
||
" <td>206</td>\n",
|
||
" <td>36.5424</td>\n",
|
||
" <td>5.5424</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>681</th>\n",
|
||
" <td>RFPL</td>\n",
|
||
" <td>2019</td>\n",
|
||
" <td>14</td>\n",
|
||
" <td>Krylya Sovetov Samara</td>\n",
|
||
" <td>30</td>\n",
|
||
" <td>8</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>15</td>\n",
|
||
" <td>33</td>\n",
|
||
" <td>40</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>42.980693</td>\n",
|
||
" <td>2.980693</td>\n",
|
||
" <td>37.550114</td>\n",
|
||
" <td>-7.777201</td>\n",
|
||
" <td>11.949903</td>\n",
|
||
" <td>10.080858</td>\n",
|
||
" <td>103</td>\n",
|
||
" <td>215</td>\n",
|
||
" <td>36.3363</td>\n",
|
||
" <td>5.3363</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>682</th>\n",
|
||
" <td>RFPL</td>\n",
|
||
" <td>2019</td>\n",
|
||
" <td>15</td>\n",
|
||
" <td>FC Tambov</td>\n",
|
||
" <td>30</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>37</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>39.747938</td>\n",
|
||
" <td>-1.252062</td>\n",
|
||
" <td>34.468003</td>\n",
|
||
" <td>-12.231948</td>\n",
|
||
" <td>14.666049</td>\n",
|
||
" <td>9.192768</td>\n",
|
||
" <td>150</td>\n",
|
||
" <td>270</td>\n",
|
||
" <td>29.2413</td>\n",
|
||
" <td>-1.7587</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>683</th>\n",
|
||
" <td>RFPL</td>\n",
|
||
" <td>2019</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>FC Orenburg</td>\n",
|
||
" <td>30</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>28</td>\n",
|
||
" <td>52</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>37.169797</td>\n",
|
||
" <td>-14.830203</td>\n",
|
||
" <td>32.644130</td>\n",
|
||
" <td>0.201339</td>\n",
|
||
" <td>12.830908</td>\n",
|
||
" <td>9.464581</td>\n",
|
||
" <td>153</td>\n",
|
||
" <td>215</td>\n",
|
||
" <td>39.2364</td>\n",
|
||
" <td>12.2364</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>684 rows × 24 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" league year position team matches wins draws \\\n",
|
||
"0 La_liga 2014 1 Barcelona 38 30 4 \n",
|
||
"1 La_liga 2014 2 Real Madrid 38 30 2 \n",
|
||
"2 La_liga 2014 3 Atletico Madrid 38 23 9 \n",
|
||
"3 La_liga 2014 4 Valencia 38 22 11 \n",
|
||
"4 La_liga 2014 5 Sevilla 38 23 7 \n",
|
||
".. ... ... ... ... ... ... ... \n",
|
||
"679 RFPL 2019 12 PFC Sochi 30 8 9 \n",
|
||
"680 RFPL 2019 13 FK Akhmat 30 7 10 \n",
|
||
"681 RFPL 2019 14 Krylya Sovetov Samara 30 8 7 \n",
|
||
"682 RFPL 2019 15 FC Tambov 30 9 4 \n",
|
||
"683 RFPL 2019 16 FC Orenburg 30 7 6 \n",
|
||
"\n",
|
||
" loses scored missed ... xGA xGA_diff npxGA npxGD \\\n",
|
||
"0 4 110 21 ... 28.444293 7.444293 24.727907 73.049305 \n",
|
||
"1 6 118 38 ... 42.607198 4.607198 38.890805 47.213090 \n",
|
||
"2 6 67 29 ... 29.069107 0.069107 26.839271 25.748737 \n",
|
||
"3 5 70 32 ... 39.392572 7.392572 33.446477 16.257501 \n",
|
||
"4 8 71 45 ... 47.862742 2.862742 41.916529 20.178070 \n",
|
||
".. ... ... ... ... ... ... ... ... \n",
|
||
"679 13 40 39 ... 38.850259 -0.149741 32.780898 -0.096048 \n",
|
||
"680 13 27 46 ... 40.626196 -5.373804 38.363370 -10.495864 \n",
|
||
"681 15 33 40 ... 42.980693 2.980693 37.550114 -7.777201 \n",
|
||
"682 17 37 41 ... 39.747938 -1.252062 34.468003 -12.231948 \n",
|
||
"683 17 28 52 ... 37.169797 -14.830203 32.644130 0.201339 \n",
|
||
"\n",
|
||
" ppda_coef oppda_coef deep deep_allowed xpts xpts_diff \n",
|
||
"0 5.683535 16.367593 489 114 94.0813 0.0813 \n",
|
||
"1 10.209085 12.929510 351 153 81.7489 -10.2511 \n",
|
||
"2 8.982028 9.237091 197 123 73.1353 -4.8647 \n",
|
||
"3 8.709827 7.870225 203 172 63.7068 -13.2932 \n",
|
||
"4 8.276148 9.477805 305 168 67.3867 -8.6133 \n",
|
||
".. ... ... ... ... ... ... \n",
|
||
"679 12.838079 10.562327 175 206 38.6587 5.6587 \n",
|
||
"680 11.199502 10.806357 124 206 36.5424 5.5424 \n",
|
||
"681 11.949903 10.080858 103 215 36.3363 5.3363 \n",
|
||
"682 14.666049 9.192768 150 270 29.2413 -1.7587 \n",
|
||
"683 12.830908 9.464581 153 215 39.2364 12.2364 \n",
|
||
"\n",
|
||
"[684 rows x 24 columns]"
|
||
]
|
||
},
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"understat = pd.read_csv('understat.csv')\n",
|
||
"understat"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"id": "e969975f",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>league</th>\n",
|
||
" <th>year</th>\n",
|
||
" <th>position</th>\n",
|
||
" <th>team</th>\n",
|
||
" <th>matches</th>\n",
|
||
" <th>wins</th>\n",
|
||
" <th>draws</th>\n",
|
||
" <th>loses</th>\n",
|
||
" <th>scored</th>\n",
|
||
" <th>missed</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>xGA</th>\n",
|
||
" <th>xGA_diff</th>\n",
|
||
" <th>npxGA</th>\n",
|
||
" <th>npxGD</th>\n",
|
||
" <th>ppda_coef</th>\n",
|
||
" <th>oppda_coef</th>\n",
|
||
" <th>deep</th>\n",
|
||
" <th>deep_allowed</th>\n",
|
||
" <th>xpts</th>\n",
|
||
" <th>xpts_diff</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>684</td>\n",
|
||
" <td>684.000000</td>\n",
|
||
" <td>684.000000</td>\n",
|
||
" <td>684</td>\n",
|
||
" <td>684.000000</td>\n",
|
||
" <td>684.000000</td>\n",
|
||
" <td>684.000000</td>\n",
|
||
" <td>684.000000</td>\n",
|
||
" <td>684.000000</td>\n",
|
||
" <td>684.000000</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>684.000000</td>\n",
|
||
" <td>684.000000</td>\n",
|
||
" <td>684.000000</td>\n",
|
||
" <td>6.840000e+02</td>\n",
|
||
" <td>684.000000</td>\n",
|
||
" <td>684.000000</td>\n",
|
||
" <td>684.000000</td>\n",
|
||
" <td>684.000000</td>\n",
|
||
" <td>684.000000</td>\n",
|
||
" <td>684.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>168</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>La_liga</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Barcelona</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>120</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2016.500000</td>\n",
|
||
" <td>10.061404</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>35.935673</td>\n",
|
||
" <td>13.434211</td>\n",
|
||
" <td>9.067251</td>\n",
|
||
" <td>13.434211</td>\n",
|
||
" <td>48.190058</td>\n",
|
||
" <td>48.190058</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>47.064744</td>\n",
|
||
" <td>-1.125315</td>\n",
|
||
" <td>42.902596</td>\n",
|
||
" <td>-4.155221e-17</td>\n",
|
||
" <td>10.911784</td>\n",
|
||
" <td>10.911772</td>\n",
|
||
" <td>208.676901</td>\n",
|
||
" <td>208.676901</td>\n",
|
||
" <td>49.539598</td>\n",
|
||
" <td>0.169715</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.709075</td>\n",
|
||
" <td>5.580165</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>3.203487</td>\n",
|
||
" <td>5.880962</td>\n",
|
||
" <td>2.941824</td>\n",
|
||
" <td>5.510278</td>\n",
|
||
" <td>17.605374</td>\n",
|
||
" <td>13.866509</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>11.781399</td>\n",
|
||
" <td>6.663632</td>\n",
|
||
" <td>11.002013</td>\n",
|
||
" <td>1.929269e+01</td>\n",
|
||
" <td>2.521398</td>\n",
|
||
" <td>3.301410</td>\n",
|
||
" <td>83.888073</td>\n",
|
||
" <td>54.713624</td>\n",
|
||
" <td>13.559213</td>\n",
|
||
" <td>7.156998</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2014.000000</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>27.000000</td>\n",
|
||
" <td>2.000000</td>\n",
|
||
" <td>2.000000</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>13.000000</td>\n",
|
||
" <td>15.000000</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>16.838674</td>\n",
|
||
" <td>-29.175087</td>\n",
|
||
" <td>16.084399</td>\n",
|
||
" <td>-4.220877e+01</td>\n",
|
||
" <td>5.683535</td>\n",
|
||
" <td>4.394458</td>\n",
|
||
" <td>76.000000</td>\n",
|
||
" <td>83.000000</td>\n",
|
||
" <td>17.907700</td>\n",
|
||
" <td>-24.721600</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2015.000000</td>\n",
|
||
" <td>5.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>34.000000</td>\n",
|
||
" <td>9.000000</td>\n",
|
||
" <td>7.000000</td>\n",
|
||
" <td>9.000000</td>\n",
|
||
" <td>36.000000</td>\n",
|
||
" <td>38.000000</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>38.916186</td>\n",
|
||
" <td>-5.698828</td>\n",
|
||
" <td>35.474606</td>\n",
|
||
" <td>-1.325816e+01</td>\n",
|
||
" <td>9.090617</td>\n",
|
||
" <td>8.809866</td>\n",
|
||
" <td>151.750000</td>\n",
|
||
" <td>170.000000</td>\n",
|
||
" <td>39.466550</td>\n",
|
||
" <td>-4.498400</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2016.500000</td>\n",
|
||
" <td>10.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>38.000000</td>\n",
|
||
" <td>12.000000</td>\n",
|
||
" <td>9.000000</td>\n",
|
||
" <td>14.000000</td>\n",
|
||
" <td>45.000000</td>\n",
|
||
" <td>48.000000</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>47.310924</td>\n",
|
||
" <td>-0.918895</td>\n",
|
||
" <td>43.031911</td>\n",
|
||
" <td>-3.127901e+00</td>\n",
|
||
" <td>10.562543</td>\n",
|
||
" <td>10.347047</td>\n",
|
||
" <td>188.000000</td>\n",
|
||
" <td>205.000000</td>\n",
|
||
" <td>47.102100</td>\n",
|
||
" <td>0.116050</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2018.000000</td>\n",
|
||
" <td>15.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>38.000000</td>\n",
|
||
" <td>16.000000</td>\n",
|
||
" <td>11.000000</td>\n",
|
||
" <td>17.000000</td>\n",
|
||
" <td>56.000000</td>\n",
|
||
" <td>58.000000</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>54.834899</td>\n",
|
||
" <td>3.381834</td>\n",
|
||
" <td>50.263465</td>\n",
|
||
" <td>9.740049e+00</td>\n",
|
||
" <td>12.434874</td>\n",
|
||
" <td>12.187434</td>\n",
|
||
" <td>242.000000</td>\n",
|
||
" <td>246.250000</td>\n",
|
||
" <td>56.942025</td>\n",
|
||
" <td>4.912775</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2019.000000</td>\n",
|
||
" <td>20.000000</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>38.000000</td>\n",
|
||
" <td>32.000000</td>\n",
|
||
" <td>18.000000</td>\n",
|
||
" <td>29.000000</td>\n",
|
||
" <td>118.000000</td>\n",
|
||
" <td>94.000000</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>88.432186</td>\n",
|
||
" <td>16.370737</td>\n",
|
||
" <td>78.535447</td>\n",
|
||
" <td>7.304931e+01</td>\n",
|
||
" <td>21.896752</td>\n",
|
||
" <td>30.468113</td>\n",
|
||
" <td>582.000000</td>\n",
|
||
" <td>375.000000</td>\n",
|
||
" <td>94.380000</td>\n",
|
||
" <td>23.047500</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>11 rows × 24 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" league year position team matches wins \\\n",
|
||
"count 684 684.000000 684.000000 684 684.000000 684.000000 \n",
|
||
"unique 6 NaN NaN 168 NaN NaN \n",
|
||
"top La_liga NaN NaN Barcelona NaN NaN \n",
|
||
"freq 120 NaN NaN 6 NaN NaN \n",
|
||
"mean NaN 2016.500000 10.061404 NaN 35.935673 13.434211 \n",
|
||
"std NaN 1.709075 5.580165 NaN 3.203487 5.880962 \n",
|
||
"min NaN 2014.000000 1.000000 NaN 27.000000 2.000000 \n",
|
||
"25% NaN 2015.000000 5.000000 NaN 34.000000 9.000000 \n",
|
||
"50% NaN 2016.500000 10.000000 NaN 38.000000 12.000000 \n",
|
||
"75% NaN 2018.000000 15.000000 NaN 38.000000 16.000000 \n",
|
||
"max NaN 2019.000000 20.000000 NaN 38.000000 32.000000 \n",
|
||
"\n",
|
||
" draws loses scored missed ... xGA \\\n",
|
||
"count 684.000000 684.000000 684.000000 684.000000 ... 684.000000 \n",
|
||
"unique NaN NaN NaN NaN ... NaN \n",
|
||
"top NaN NaN NaN NaN ... NaN \n",
|
||
"freq NaN NaN NaN NaN ... NaN \n",
|
||
"mean 9.067251 13.434211 48.190058 48.190058 ... 47.064744 \n",
|
||
"std 2.941824 5.510278 17.605374 13.866509 ... 11.781399 \n",
|
||
"min 2.000000 1.000000 13.000000 15.000000 ... 16.838674 \n",
|
||
"25% 7.000000 9.000000 36.000000 38.000000 ... 38.916186 \n",
|
||
"50% 9.000000 14.000000 45.000000 48.000000 ... 47.310924 \n",
|
||
"75% 11.000000 17.000000 56.000000 58.000000 ... 54.834899 \n",
|
||
"max 18.000000 29.000000 118.000000 94.000000 ... 88.432186 \n",
|
||
"\n",
|
||
" xGA_diff npxGA npxGD ppda_coef oppda_coef \\\n",
|
||
"count 684.000000 684.000000 6.840000e+02 684.000000 684.000000 \n",
|
||
"unique NaN NaN NaN NaN NaN \n",
|
||
"top NaN NaN NaN NaN NaN \n",
|
||
"freq NaN NaN NaN NaN NaN \n",
|
||
"mean -1.125315 42.902596 -4.155221e-17 10.911784 10.911772 \n",
|
||
"std 6.663632 11.002013 1.929269e+01 2.521398 3.301410 \n",
|
||
"min -29.175087 16.084399 -4.220877e+01 5.683535 4.394458 \n",
|
||
"25% -5.698828 35.474606 -1.325816e+01 9.090617 8.809866 \n",
|
||
"50% -0.918895 43.031911 -3.127901e+00 10.562543 10.347047 \n",
|
||
"75% 3.381834 50.263465 9.740049e+00 12.434874 12.187434 \n",
|
||
"max 16.370737 78.535447 7.304931e+01 21.896752 30.468113 \n",
|
||
"\n",
|
||
" deep deep_allowed xpts xpts_diff \n",
|
||
"count 684.000000 684.000000 684.000000 684.000000 \n",
|
||
"unique NaN NaN NaN NaN \n",
|
||
"top NaN NaN NaN NaN \n",
|
||
"freq NaN NaN NaN NaN \n",
|
||
"mean 208.676901 208.676901 49.539598 0.169715 \n",
|
||
"std 83.888073 54.713624 13.559213 7.156998 \n",
|
||
"min 76.000000 83.000000 17.907700 -24.721600 \n",
|
||
"25% 151.750000 170.000000 39.466550 -4.498400 \n",
|
||
"50% 188.000000 205.000000 47.102100 0.116050 \n",
|
||
"75% 242.000000 246.250000 56.942025 4.912775 \n",
|
||
"max 582.000000 375.000000 94.380000 23.047500 \n",
|
||
"\n",
|
||
"[11 rows x 24 columns]"
|
||
]
|
||
},
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"understat.describe(include='all')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "136925a1",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"685 understat.csv\r\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!wc -l understat.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "9478eaa2",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
",,position,team,matches,wins,draws,loses,scored,missed,pts,xG,xG_diff,npxG,xGA,xGA_diff,npxGA,npxGD,ppda_coef,oppda_coef,deep,deep_allowed,xpts,xpts_diff\r\n",
|
||
"La_liga,2014,1,Barcelona,38,30,4,4,110,21,94,102.98015200000002,-7.019847999999982,97.77721200000002,28.44429270000001,7.444292700000009,24.727906700000005,73.04930530000001,5.683534703382723,16.367592989090525,489,114,94.08129999999998,0.0812999999999846\r\n",
|
||
"La_liga,2014,2,Real Madrid,38,30,2,6,118,38,92,95.76624299999999,-22.23375700000001,86.10389499999998,42.607198000000004,4.607198000000004,38.890805,47.213090000000015,10.209085456325049,12.929510106152211,351,153,81.7489,-10.251099999999994\r\n",
|
||
"La_liga,2014,3,Atletico Madrid,38,23,9,6,67,29,78,57.047670000000004,-9.952329999999996,52.588007999999995,29.069107100000004,0.06910710000000364,26.839271100000005,25.748736900000008,8.982028430893806,9.237090640679776,197,123,73.13530000000003,-4.864699999999971\r\n",
|
||
"La_liga,2014,4,Valencia,38,22,11,5,70,32,77,55.06250000000001,-14.937499999999993,49.703978,39.392571999999994,7.392571999999994,33.44647700000001,16.257500999999998,8.709827299105736,7.870224725817145,203,172,63.7068,-13.293199999999999\r\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!head -n 5 understat.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "60448bf7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" 2 27\r\n",
|
||
" 18 28\r\n",
|
||
" 96 30\r\n",
|
||
" 108 34\r\n",
|
||
" 10 37\r\n",
|
||
" 450 38\r\n",
|
||
" 1 matches\r\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!cut -f 5 -d \",\" understat.csv | sort | uniq -c"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "f7668054",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"! grep -P \"^$\" -n understat.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "cba7e932",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.8.10"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|