dodanie ium03

This commit is contained in:
osboxes.org 2022-03-27 07:37:55 -04:00
parent 61ca567dee
commit d92f4f8161

543
IUM03.ipynb Normal file
View File

@ -0,0 +1,543 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "7100de70",
"metadata": {},
"source": [
"1. Pobieranie bazy"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "17518731",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /home/osboxes/.kaggle/kaggle.json'\n",
"Downloading extended-football-stats-for-european-leagues-xg.zip to /home/osboxes/jupyter_dir/notebooks/IUM03\n",
" 73%|███████████████████████████▋ | 1.00M/1.37M [00:00<00:00, 5.12MB/s]\n",
"100%|██████████████████████████████████████| 1.37M/1.37M [00:00<00:00, 3.95MB/s]\n"
]
}
],
"source": [
"!kaggle datasets download -d slehkyi/extended-football-stats-for-european-leagues-xg"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "0d9abb90",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Archive: extended-football-stats-for-european-leagues-xg.zip\n",
" inflating: understat.com.csv \n",
" inflating: understat_per_game.csv \n"
]
}
],
"source": [
"!unzip -o extended-football-stats-for-european-leagues-xg.zip"
]
},
{
"cell_type": "markdown",
"id": "edcb45ce",
"metadata": {},
"source": [
"2. Zmiana nazwy plikow"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "cb6595a3",
"metadata": {},
"outputs": [],
"source": [
"mv understat.com.csv understat.csv"
]
},
{
"cell_type": "markdown",
"id": "b4aea8ec",
"metadata": {},
"source": [
"3. Zmiana nazwy kolumn"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "6c519048",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"understat = pd.read_csv('understat.csv')\n",
"understat_per_game = pd.read_csv('understat_per_game.csv')"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "fc730d10",
"metadata": {},
"outputs": [],
"source": [
"understat.rename( columns={'Unnamed: 0':'league'}, inplace=True)\n",
"understat.rename( columns={'Unnamed: 1':'year'}, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "5b636526",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>league</th>\n",
" <th>year</th>\n",
" <th>position</th>\n",
" <th>team</th>\n",
" <th>matches</th>\n",
" <th>wins</th>\n",
" <th>draws</th>\n",
" <th>loses</th>\n",
" <th>scored</th>\n",
" <th>missed</th>\n",
" <th>...</th>\n",
" <th>xGA</th>\n",
" <th>xGA_diff</th>\n",
" <th>npxGA</th>\n",
" <th>npxGD</th>\n",
" <th>ppda_coef</th>\n",
" <th>oppda_coef</th>\n",
" <th>deep</th>\n",
" <th>deep_allowed</th>\n",
" <th>xpts</th>\n",
" <th>xpts_diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>La_liga</td>\n",
" <td>2014</td>\n",
" <td>1</td>\n",
" <td>Barcelona</td>\n",
" <td>38</td>\n",
" <td>30</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>110</td>\n",
" <td>21</td>\n",
" <td>...</td>\n",
" <td>28.444293</td>\n",
" <td>7.444293</td>\n",
" <td>24.727907</td>\n",
" <td>73.049305</td>\n",
" <td>5.683535</td>\n",
" <td>16.367593</td>\n",
" <td>489</td>\n",
" <td>114</td>\n",
" <td>94.0813</td>\n",
" <td>0.0813</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>La_liga</td>\n",
" <td>2014</td>\n",
" <td>2</td>\n",
" <td>Real Madrid</td>\n",
" <td>38</td>\n",
" <td>30</td>\n",
" <td>2</td>\n",
" <td>6</td>\n",
" <td>118</td>\n",
" <td>38</td>\n",
" <td>...</td>\n",
" <td>42.607198</td>\n",
" <td>4.607198</td>\n",
" <td>38.890805</td>\n",
" <td>47.213090</td>\n",
" <td>10.209085</td>\n",
" <td>12.929510</td>\n",
" <td>351</td>\n",
" <td>153</td>\n",
" <td>81.7489</td>\n",
" <td>-10.2511</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>La_liga</td>\n",
" <td>2014</td>\n",
" <td>3</td>\n",
" <td>Atletico Madrid</td>\n",
" <td>38</td>\n",
" <td>23</td>\n",
" <td>9</td>\n",
" <td>6</td>\n",
" <td>67</td>\n",
" <td>29</td>\n",
" <td>...</td>\n",
" <td>29.069107</td>\n",
" <td>0.069107</td>\n",
" <td>26.839271</td>\n",
" <td>25.748737</td>\n",
" <td>8.982028</td>\n",
" <td>9.237091</td>\n",
" <td>197</td>\n",
" <td>123</td>\n",
" <td>73.1353</td>\n",
" <td>-4.8647</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>La_liga</td>\n",
" <td>2014</td>\n",
" <td>4</td>\n",
" <td>Valencia</td>\n",
" <td>38</td>\n",
" <td>22</td>\n",
" <td>11</td>\n",
" <td>5</td>\n",
" <td>70</td>\n",
" <td>32</td>\n",
" <td>...</td>\n",
" <td>39.392572</td>\n",
" <td>7.392572</td>\n",
" <td>33.446477</td>\n",
" <td>16.257501</td>\n",
" <td>8.709827</td>\n",
" <td>7.870225</td>\n",
" <td>203</td>\n",
" <td>172</td>\n",
" <td>63.7068</td>\n",
" <td>-13.2932</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>La_liga</td>\n",
" <td>2014</td>\n",
" <td>5</td>\n",
" <td>Sevilla</td>\n",
" <td>38</td>\n",
" <td>23</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>71</td>\n",
" <td>45</td>\n",
" <td>...</td>\n",
" <td>47.862742</td>\n",
" <td>2.862742</td>\n",
" <td>41.916529</td>\n",
" <td>20.178070</td>\n",
" <td>8.276148</td>\n",
" <td>9.477805</td>\n",
" <td>305</td>\n",
" <td>168</td>\n",
" <td>67.3867</td>\n",
" <td>-8.6133</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 24 columns</p>\n",
"</div>"
],
"text/plain": [
" league year position team matches wins draws loses \\\n",
"0 La_liga 2014 1 Barcelona 38 30 4 4 \n",
"1 La_liga 2014 2 Real Madrid 38 30 2 6 \n",
"2 La_liga 2014 3 Atletico Madrid 38 23 9 6 \n",
"3 La_liga 2014 4 Valencia 38 22 11 5 \n",
"4 La_liga 2014 5 Sevilla 38 23 7 8 \n",
"\n",
" scored missed ... xGA xGA_diff npxGA npxGD ppda_coef \\\n",
"0 110 21 ... 28.444293 7.444293 24.727907 73.049305 5.683535 \n",
"1 118 38 ... 42.607198 4.607198 38.890805 47.213090 10.209085 \n",
"2 67 29 ... 29.069107 0.069107 26.839271 25.748737 8.982028 \n",
"3 70 32 ... 39.392572 7.392572 33.446477 16.257501 8.709827 \n",
"4 71 45 ... 47.862742 2.862742 41.916529 20.178070 8.276148 \n",
"\n",
" oppda_coef deep deep_allowed xpts xpts_diff \n",
"0 16.367593 489 114 94.0813 0.0813 \n",
"1 12.929510 351 153 81.7489 -10.2511 \n",
"2 9.237091 197 123 73.1353 -4.8647 \n",
"3 7.870225 203 172 63.7068 -13.2932 \n",
"4 9.477805 305 168 67.3867 -8.6133 \n",
"\n",
"[5 rows x 24 columns]"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"understat.head()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "cbf956d9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>league</th>\n",
" <th>year</th>\n",
" <th>h_a</th>\n",
" <th>xG</th>\n",
" <th>xGA</th>\n",
" <th>npxG</th>\n",
" <th>npxGA</th>\n",
" <th>deep</th>\n",
" <th>deep_allowed</th>\n",
" <th>scored</th>\n",
" <th>...</th>\n",
" <th>ppda_coef</th>\n",
" <th>ppda_att</th>\n",
" <th>ppda_def</th>\n",
" <th>oppda_coef</th>\n",
" <th>oppda_att</th>\n",
" <th>oppda_def</th>\n",
" <th>team</th>\n",
" <th>xG_diff</th>\n",
" <th>xGA_diff</th>\n",
" <th>xpts_diff</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Bundesliga</td>\n",
" <td>2014</td>\n",
" <td>h</td>\n",
" <td>2.57012</td>\n",
" <td>1.198420</td>\n",
" <td>2.57012</td>\n",
" <td>1.198420</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>9.625000</td>\n",
" <td>231</td>\n",
" <td>24</td>\n",
" <td>21.850000</td>\n",
" <td>437</td>\n",
" <td>20</td>\n",
" <td>Bayern Munich</td>\n",
" <td>0.57012</td>\n",
" <td>0.198420</td>\n",
" <td>-0.6514</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Bundesliga</td>\n",
" <td>2014</td>\n",
" <td>a</td>\n",
" <td>1.50328</td>\n",
" <td>1.307950</td>\n",
" <td>1.50328</td>\n",
" <td>1.307950</td>\n",
" <td>10</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>4.756098</td>\n",
" <td>195</td>\n",
" <td>41</td>\n",
" <td>17.695652</td>\n",
" <td>407</td>\n",
" <td>23</td>\n",
" <td>Bayern Munich</td>\n",
" <td>0.50328</td>\n",
" <td>0.307950</td>\n",
" <td>0.5143</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Bundesliga</td>\n",
" <td>2014</td>\n",
" <td>h</td>\n",
" <td>1.22987</td>\n",
" <td>0.310166</td>\n",
" <td>1.22987</td>\n",
" <td>0.310166</td>\n",
" <td>13</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>5.060606</td>\n",
" <td>167</td>\n",
" <td>33</td>\n",
" <td>16.961538</td>\n",
" <td>441</td>\n",
" <td>26</td>\n",
" <td>Bayern Munich</td>\n",
" <td>-0.77013</td>\n",
" <td>0.310166</td>\n",
" <td>-0.8412</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Bundesliga</td>\n",
" <td>2014</td>\n",
" <td>a</td>\n",
" <td>1.03519</td>\n",
" <td>0.203118</td>\n",
" <td>1.03519</td>\n",
" <td>0.203118</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>4.423077</td>\n",
" <td>115</td>\n",
" <td>26</td>\n",
" <td>9.446809</td>\n",
" <td>444</td>\n",
" <td>47</td>\n",
" <td>Bayern Munich</td>\n",
" <td>1.03519</td>\n",
" <td>0.203118</td>\n",
" <td>1.1367</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Bundesliga</td>\n",
" <td>2014</td>\n",
" <td>h</td>\n",
" <td>3.48286</td>\n",
" <td>0.402844</td>\n",
" <td>3.48286</td>\n",
" <td>0.402844</td>\n",
" <td>23</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>...</td>\n",
" <td>4.250000</td>\n",
" <td>170</td>\n",
" <td>40</td>\n",
" <td>44.800000</td>\n",
" <td>448</td>\n",
" <td>10</td>\n",
" <td>Bayern Munich</td>\n",
" <td>-0.51714</td>\n",
" <td>0.402844</td>\n",
" <td>-0.0713</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 29 columns</p>\n",
"</div>"
],
"text/plain": [
" league year h_a xG xGA npxG npxGA deep \\\n",
"0 Bundesliga 2014 h 2.57012 1.198420 2.57012 1.198420 5 \n",
"1 Bundesliga 2014 a 1.50328 1.307950 1.50328 1.307950 10 \n",
"2 Bundesliga 2014 h 1.22987 0.310166 1.22987 0.310166 13 \n",
"3 Bundesliga 2014 a 1.03519 0.203118 1.03519 0.203118 6 \n",
"4 Bundesliga 2014 h 3.48286 0.402844 3.48286 0.402844 23 \n",
"\n",
" deep_allowed scored ... ppda_coef ppda_att ppda_def oppda_coef \\\n",
"0 4 2 ... 9.625000 231 24 21.850000 \n",
"1 1 1 ... 4.756098 195 41 17.695652 \n",
"2 3 2 ... 5.060606 167 33 16.961538 \n",
"3 2 0 ... 4.423077 115 26 9.446809 \n",
"4 2 4 ... 4.250000 170 40 44.800000 \n",
"\n",
" oppda_att oppda_def team xG_diff xGA_diff xpts_diff \n",
"0 437 20 Bayern Munich 0.57012 0.198420 -0.6514 \n",
"1 407 23 Bayern Munich 0.50328 0.307950 0.5143 \n",
"2 441 26 Bayern Munich -0.77013 0.310166 -0.8412 \n",
"3 444 47 Bayern Munich 1.03519 0.203118 1.1367 \n",
"4 448 10 Bayern Munich -0.51714 0.402844 -0.0713 \n",
"\n",
"[5 rows x 29 columns]"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"understat_per_game.head()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}