544 lines
17 KiB
Plaintext
544 lines
17 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "7100de70",
|
||
"metadata": {},
|
||
"source": [
|
||
"1. Pobieranie bazy"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "17518731",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /home/osboxes/.kaggle/kaggle.json'\n",
|
||
"Downloading extended-football-stats-for-european-leagues-xg.zip to /home/osboxes/jupyter_dir/notebooks/IUM03\n",
|
||
" 73%|███████████████████████████▋ | 1.00M/1.37M [00:00<00:00, 5.12MB/s]\n",
|
||
"100%|██████████████████████████████████████| 1.37M/1.37M [00:00<00:00, 3.95MB/s]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!kaggle datasets download -d slehkyi/extended-football-stats-for-european-leagues-xg"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "0d9abb90",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Archive: extended-football-stats-for-european-leagues-xg.zip\n",
|
||
" inflating: understat.com.csv \n",
|
||
" inflating: understat_per_game.csv \n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!unzip -o extended-football-stats-for-european-leagues-xg.zip"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "edcb45ce",
|
||
"metadata": {},
|
||
"source": [
|
||
"2. Zmiana nazwy plikow"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "cb6595a3",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"mv understat.com.csv understat.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "b4aea8ec",
|
||
"metadata": {},
|
||
"source": [
|
||
"3. Zmiana nazwy kolumn"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "6c519048",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"understat = pd.read_csv('understat.csv')\n",
|
||
"understat_per_game = pd.read_csv('understat_per_game.csv')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"id": "fc730d10",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"understat.rename( columns={'Unnamed: 0':'league'}, inplace=True)\n",
|
||
"understat.rename( columns={'Unnamed: 1':'year'}, inplace=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"id": "5b636526",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>league</th>\n",
|
||
" <th>year</th>\n",
|
||
" <th>position</th>\n",
|
||
" <th>team</th>\n",
|
||
" <th>matches</th>\n",
|
||
" <th>wins</th>\n",
|
||
" <th>draws</th>\n",
|
||
" <th>loses</th>\n",
|
||
" <th>scored</th>\n",
|
||
" <th>missed</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>xGA</th>\n",
|
||
" <th>xGA_diff</th>\n",
|
||
" <th>npxGA</th>\n",
|
||
" <th>npxGD</th>\n",
|
||
" <th>ppda_coef</th>\n",
|
||
" <th>oppda_coef</th>\n",
|
||
" <th>deep</th>\n",
|
||
" <th>deep_allowed</th>\n",
|
||
" <th>xpts</th>\n",
|
||
" <th>xpts_diff</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>La_liga</td>\n",
|
||
" <td>2014</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>Barcelona</td>\n",
|
||
" <td>38</td>\n",
|
||
" <td>30</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>110</td>\n",
|
||
" <td>21</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>28.444293</td>\n",
|
||
" <td>7.444293</td>\n",
|
||
" <td>24.727907</td>\n",
|
||
" <td>73.049305</td>\n",
|
||
" <td>5.683535</td>\n",
|
||
" <td>16.367593</td>\n",
|
||
" <td>489</td>\n",
|
||
" <td>114</td>\n",
|
||
" <td>94.0813</td>\n",
|
||
" <td>0.0813</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>La_liga</td>\n",
|
||
" <td>2014</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>Real Madrid</td>\n",
|
||
" <td>38</td>\n",
|
||
" <td>30</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>118</td>\n",
|
||
" <td>38</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>42.607198</td>\n",
|
||
" <td>4.607198</td>\n",
|
||
" <td>38.890805</td>\n",
|
||
" <td>47.213090</td>\n",
|
||
" <td>10.209085</td>\n",
|
||
" <td>12.929510</td>\n",
|
||
" <td>351</td>\n",
|
||
" <td>153</td>\n",
|
||
" <td>81.7489</td>\n",
|
||
" <td>-10.2511</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>La_liga</td>\n",
|
||
" <td>2014</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>Atletico Madrid</td>\n",
|
||
" <td>38</td>\n",
|
||
" <td>23</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>67</td>\n",
|
||
" <td>29</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>29.069107</td>\n",
|
||
" <td>0.069107</td>\n",
|
||
" <td>26.839271</td>\n",
|
||
" <td>25.748737</td>\n",
|
||
" <td>8.982028</td>\n",
|
||
" <td>9.237091</td>\n",
|
||
" <td>197</td>\n",
|
||
" <td>123</td>\n",
|
||
" <td>73.1353</td>\n",
|
||
" <td>-4.8647</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>La_liga</td>\n",
|
||
" <td>2014</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>Valencia</td>\n",
|
||
" <td>38</td>\n",
|
||
" <td>22</td>\n",
|
||
" <td>11</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>70</td>\n",
|
||
" <td>32</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>39.392572</td>\n",
|
||
" <td>7.392572</td>\n",
|
||
" <td>33.446477</td>\n",
|
||
" <td>16.257501</td>\n",
|
||
" <td>8.709827</td>\n",
|
||
" <td>7.870225</td>\n",
|
||
" <td>203</td>\n",
|
||
" <td>172</td>\n",
|
||
" <td>63.7068</td>\n",
|
||
" <td>-13.2932</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>La_liga</td>\n",
|
||
" <td>2014</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>Sevilla</td>\n",
|
||
" <td>38</td>\n",
|
||
" <td>23</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>8</td>\n",
|
||
" <td>71</td>\n",
|
||
" <td>45</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>47.862742</td>\n",
|
||
" <td>2.862742</td>\n",
|
||
" <td>41.916529</td>\n",
|
||
" <td>20.178070</td>\n",
|
||
" <td>8.276148</td>\n",
|
||
" <td>9.477805</td>\n",
|
||
" <td>305</td>\n",
|
||
" <td>168</td>\n",
|
||
" <td>67.3867</td>\n",
|
||
" <td>-8.6133</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 24 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" league year position team matches wins draws loses \\\n",
|
||
"0 La_liga 2014 1 Barcelona 38 30 4 4 \n",
|
||
"1 La_liga 2014 2 Real Madrid 38 30 2 6 \n",
|
||
"2 La_liga 2014 3 Atletico Madrid 38 23 9 6 \n",
|
||
"3 La_liga 2014 4 Valencia 38 22 11 5 \n",
|
||
"4 La_liga 2014 5 Sevilla 38 23 7 8 \n",
|
||
"\n",
|
||
" scored missed ... xGA xGA_diff npxGA npxGD ppda_coef \\\n",
|
||
"0 110 21 ... 28.444293 7.444293 24.727907 73.049305 5.683535 \n",
|
||
"1 118 38 ... 42.607198 4.607198 38.890805 47.213090 10.209085 \n",
|
||
"2 67 29 ... 29.069107 0.069107 26.839271 25.748737 8.982028 \n",
|
||
"3 70 32 ... 39.392572 7.392572 33.446477 16.257501 8.709827 \n",
|
||
"4 71 45 ... 47.862742 2.862742 41.916529 20.178070 8.276148 \n",
|
||
"\n",
|
||
" oppda_coef deep deep_allowed xpts xpts_diff \n",
|
||
"0 16.367593 489 114 94.0813 0.0813 \n",
|
||
"1 12.929510 351 153 81.7489 -10.2511 \n",
|
||
"2 9.237091 197 123 73.1353 -4.8647 \n",
|
||
"3 7.870225 203 172 63.7068 -13.2932 \n",
|
||
"4 9.477805 305 168 67.3867 -8.6133 \n",
|
||
"\n",
|
||
"[5 rows x 24 columns]"
|
||
]
|
||
},
|
||
"execution_count": 19,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"understat.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"id": "cbf956d9",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>league</th>\n",
|
||
" <th>year</th>\n",
|
||
" <th>h_a</th>\n",
|
||
" <th>xG</th>\n",
|
||
" <th>xGA</th>\n",
|
||
" <th>npxG</th>\n",
|
||
" <th>npxGA</th>\n",
|
||
" <th>deep</th>\n",
|
||
" <th>deep_allowed</th>\n",
|
||
" <th>scored</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>ppda_coef</th>\n",
|
||
" <th>ppda_att</th>\n",
|
||
" <th>ppda_def</th>\n",
|
||
" <th>oppda_coef</th>\n",
|
||
" <th>oppda_att</th>\n",
|
||
" <th>oppda_def</th>\n",
|
||
" <th>team</th>\n",
|
||
" <th>xG_diff</th>\n",
|
||
" <th>xGA_diff</th>\n",
|
||
" <th>xpts_diff</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>Bundesliga</td>\n",
|
||
" <td>2014</td>\n",
|
||
" <td>h</td>\n",
|
||
" <td>2.57012</td>\n",
|
||
" <td>1.198420</td>\n",
|
||
" <td>2.57012</td>\n",
|
||
" <td>1.198420</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>9.625000</td>\n",
|
||
" <td>231</td>\n",
|
||
" <td>24</td>\n",
|
||
" <td>21.850000</td>\n",
|
||
" <td>437</td>\n",
|
||
" <td>20</td>\n",
|
||
" <td>Bayern Munich</td>\n",
|
||
" <td>0.57012</td>\n",
|
||
" <td>0.198420</td>\n",
|
||
" <td>-0.6514</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>Bundesliga</td>\n",
|
||
" <td>2014</td>\n",
|
||
" <td>a</td>\n",
|
||
" <td>1.50328</td>\n",
|
||
" <td>1.307950</td>\n",
|
||
" <td>1.50328</td>\n",
|
||
" <td>1.307950</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>4.756098</td>\n",
|
||
" <td>195</td>\n",
|
||
" <td>41</td>\n",
|
||
" <td>17.695652</td>\n",
|
||
" <td>407</td>\n",
|
||
" <td>23</td>\n",
|
||
" <td>Bayern Munich</td>\n",
|
||
" <td>0.50328</td>\n",
|
||
" <td>0.307950</td>\n",
|
||
" <td>0.5143</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>Bundesliga</td>\n",
|
||
" <td>2014</td>\n",
|
||
" <td>h</td>\n",
|
||
" <td>1.22987</td>\n",
|
||
" <td>0.310166</td>\n",
|
||
" <td>1.22987</td>\n",
|
||
" <td>0.310166</td>\n",
|
||
" <td>13</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>5.060606</td>\n",
|
||
" <td>167</td>\n",
|
||
" <td>33</td>\n",
|
||
" <td>16.961538</td>\n",
|
||
" <td>441</td>\n",
|
||
" <td>26</td>\n",
|
||
" <td>Bayern Munich</td>\n",
|
||
" <td>-0.77013</td>\n",
|
||
" <td>0.310166</td>\n",
|
||
" <td>-0.8412</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>Bundesliga</td>\n",
|
||
" <td>2014</td>\n",
|
||
" <td>a</td>\n",
|
||
" <td>1.03519</td>\n",
|
||
" <td>0.203118</td>\n",
|
||
" <td>1.03519</td>\n",
|
||
" <td>0.203118</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>4.423077</td>\n",
|
||
" <td>115</td>\n",
|
||
" <td>26</td>\n",
|
||
" <td>9.446809</td>\n",
|
||
" <td>444</td>\n",
|
||
" <td>47</td>\n",
|
||
" <td>Bayern Munich</td>\n",
|
||
" <td>1.03519</td>\n",
|
||
" <td>0.203118</td>\n",
|
||
" <td>1.1367</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>Bundesliga</td>\n",
|
||
" <td>2014</td>\n",
|
||
" <td>h</td>\n",
|
||
" <td>3.48286</td>\n",
|
||
" <td>0.402844</td>\n",
|
||
" <td>3.48286</td>\n",
|
||
" <td>0.402844</td>\n",
|
||
" <td>23</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>4.250000</td>\n",
|
||
" <td>170</td>\n",
|
||
" <td>40</td>\n",
|
||
" <td>44.800000</td>\n",
|
||
" <td>448</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>Bayern Munich</td>\n",
|
||
" <td>-0.51714</td>\n",
|
||
" <td>0.402844</td>\n",
|
||
" <td>-0.0713</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 29 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" league year h_a xG xGA npxG npxGA deep \\\n",
|
||
"0 Bundesliga 2014 h 2.57012 1.198420 2.57012 1.198420 5 \n",
|
||
"1 Bundesliga 2014 a 1.50328 1.307950 1.50328 1.307950 10 \n",
|
||
"2 Bundesliga 2014 h 1.22987 0.310166 1.22987 0.310166 13 \n",
|
||
"3 Bundesliga 2014 a 1.03519 0.203118 1.03519 0.203118 6 \n",
|
||
"4 Bundesliga 2014 h 3.48286 0.402844 3.48286 0.402844 23 \n",
|
||
"\n",
|
||
" deep_allowed scored ... ppda_coef ppda_att ppda_def oppda_coef \\\n",
|
||
"0 4 2 ... 9.625000 231 24 21.850000 \n",
|
||
"1 1 1 ... 4.756098 195 41 17.695652 \n",
|
||
"2 3 2 ... 5.060606 167 33 16.961538 \n",
|
||
"3 2 0 ... 4.423077 115 26 9.446809 \n",
|
||
"4 2 4 ... 4.250000 170 40 44.800000 \n",
|
||
"\n",
|
||
" oppda_att oppda_def team xG_diff xGA_diff xpts_diff \n",
|
||
"0 437 20 Bayern Munich 0.57012 0.198420 -0.6514 \n",
|
||
"1 407 23 Bayern Munich 0.50328 0.307950 0.5143 \n",
|
||
"2 441 26 Bayern Munich -0.77013 0.310166 -0.8412 \n",
|
||
"3 444 47 Bayern Munich 1.03519 0.203118 1.1367 \n",
|
||
"4 448 10 Bayern Munich -0.51714 0.402844 -0.0713 \n",
|
||
"\n",
|
||
"[5 rows x 29 columns]"
|
||
]
|
||
},
|
||
"execution_count": 21,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"understat_per_game.head()"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.8.10"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|