ium_434760/Zadanie 1.ipynb
2021-04-11 17:20:04 +02:00

1005 lines
102 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Instalacja pakietów i przygotowanie datasetu"
]
},
{
"cell_type": "code",
"execution_count": 207,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: kaggle in c:\\programdata\\anaconda3\\lib\\site-packages (1.5.12)\n",
"Requirement already satisfied: six>=1.10 in c:\\programdata\\anaconda3\\lib\\site-packages (from kaggle) (1.15.0)\n",
"Requirement already satisfied: requests in c:\\programdata\\anaconda3\\lib\\site-packages (from kaggle) (2.24.0)\n",
"Requirement already satisfied: python-slugify in c:\\programdata\\anaconda3\\lib\\site-packages (from kaggle) (4.0.1)\n",
"Requirement already satisfied: urllib3 in c:\\programdata\\anaconda3\\lib\\site-packages (from kaggle) (1.25.11)\n",
"Requirement already satisfied: python-dateutil in c:\\programdata\\anaconda3\\lib\\site-packages (from kaggle) (2.8.1)\n",
"Requirement already satisfied: tqdm in c:\\programdata\\anaconda3\\lib\\site-packages (from kaggle) (4.50.2)\n",
"Requirement already satisfied: certifi in c:\\programdata\\anaconda3\\lib\\site-packages (from kaggle) (2020.6.20)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in c:\\programdata\\anaconda3\\lib\\site-packages (from requests->kaggle) (3.0.4)\n",
"Requirement already satisfied: idna<3,>=2.5 in c:\\programdata\\anaconda3\\lib\\site-packages (from requests->kaggle) (2.10)\n",
"Requirement already satisfied: text-unidecode>=1.3 in c:\\programdata\\anaconda3\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n",
"Requirement already satisfied: pandas in c:\\programdata\\anaconda3\\lib\\site-packages (1.1.3)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in c:\\programdata\\anaconda3\\lib\\site-packages (from pandas) (2.8.1)\n",
"Requirement already satisfied: pytz>=2017.2 in c:\\programdata\\anaconda3\\lib\\site-packages (from pandas) (2020.1)\n",
"Requirement already satisfied: numpy>=1.15.4 in c:\\programdata\\anaconda3\\lib\\site-packages (from pandas) (1.19.2)\n",
"Requirement already satisfied: six>=1.5 in c:\\programdata\\anaconda3\\lib\\site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n",
"Requirement already satisfied: numpy in c:\\programdata\\anaconda3\\lib\\site-packages (1.19.2)\n"
]
}
],
"source": [
"!pip install kaggle\n",
"!pip install pandas\n",
"!pip install numpy"
]
},
{
"cell_type": "code",
"execution_count": 208,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading fifa19.zip to C:\\Users\\Ania\\Desktop\\AITECH\\[IUM] Inżynieria uczenia maszynowego\\ium_434760\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
" 0%| | 0.00/2.18M [00:00<?, ?B/s]\n",
" 46%|####5 | 1.00M/2.18M [00:00<00:00, 4.74MB/s]\n",
" 92%|#########1| 2.00M/2.18M [00:00<00:00, 5.31MB/s]\n",
"100%|##########| 2.18M/2.18M [00:00<00:00, 5.93MB/s]\n"
]
}
],
"source": [
"!kaggle datasets download -d karangadiya/fifa19"
]
},
{
"cell_type": "code",
"execution_count": 209,
"metadata": {},
"outputs": [],
"source": [
"import zipfile\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"with zipfile.ZipFile('fifa19.zip', 'r') as zip_ref:\n",
" zip_ref.extractall('.')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Normalizacja i usuwanie artefaktów"
]
},
{
"cell_type": "code",
"execution_count": 210,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"#Usuwanie artefaktów\n",
"df=pd.read_csv('data.csv')\n",
"df = df[df[\"Release Clause\"].notna()]\n",
"df = df[df[\"Release Clause\"].notnull()]\n",
"df.to_csv('data.csv')"
]
},
{
"cell_type": "code",
"execution_count": 211,
"metadata": {},
"outputs": [],
"source": [
"#Normalizacja\n",
"df=pd.read_csv('data.csv')\n",
"if df[\"Overall\"].mean() > 1:\n",
" df[\"Overall\"]= df[\"Overall\"]/100 \n",
"df[\"Release Clause\"] = df[\"Release Clause\"].str.replace(\"€\", \"\")\n",
"\n",
"df[\"Release Clause\"] = (df[\"Release Clause\"].replace(r'[KM]+$', '', regex=True).astype(float) * \n",
" df[\"Release Clause\"].str.extract(r'[\\d\\.]+([KM]+)', expand=False)\n",
" .replace(['K','M'], [1000, 1000000]).astype(int))\n",
"df.to_csv('data.csv')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Podział na train/dev/test"
]
},
{
"cell_type": "code",
"execution_count": 212,
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"df=pd.read_csv('data.csv')\n",
"train, dev = train_test_split(df, train_size=0.6, test_size=0.4, shuffle=True)\n",
"dev, test = train_test_split(dev, train_size=0.5, test_size=0.5, shuffle=False)\n",
"\n",
"test.to_csv('test.csv') \n",
"dev.to_csv('dev.csv') \n",
"train.to_csv('train.csv')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Odczyt danych"
]
},
{
"cell_type": "code",
"execution_count": 213,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test dataset length: 3329\n",
"Dev dataset length: 3329\n",
"Train dataset length: 9985\n",
"Whole dataset length: 16643\n"
]
}
],
"source": [
"import pandas as pd\n",
"\n",
"data = pd.read_csv('data.csv')\n",
"train = pd.read_csv('train.csv')\n",
"test = pd.read_csv('test.csv')\n",
"dev = pd.read_csv('dev.csv')\n",
"\n",
"print(f\"Test dataset length: {len(test)}\")\n",
"print(f\"Dev dataset length: {len(dev)}\")\n",
"print(f\"Train dataset length: {len(train)}\")\n",
"print(f\"Whole dataset length: {len(data)}\")"
]
},
{
"cell_type": "code",
"execution_count": 214,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Unnamed: 0</th>\n",
" <th>Unnamed: 0.1</th>\n",
" <th>Unnamed: 0.1.1</th>\n",
" <th>ID</th>\n",
" <th>Name</th>\n",
" <th>Age</th>\n",
" <th>Photo</th>\n",
" <th>Nationality</th>\n",
" <th>Flag</th>\n",
" <th>Overall</th>\n",
" <th>...</th>\n",
" <th>Composure</th>\n",
" <th>Marking</th>\n",
" <th>StandingTackle</th>\n",
" <th>SlidingTackle</th>\n",
" <th>GKDiving</th>\n",
" <th>GKHandling</th>\n",
" <th>GKKicking</th>\n",
" <th>GKPositioning</th>\n",
" <th>GKReflexes</th>\n",
" <th>Release Clause</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>158023</td>\n",
" <td>L. Messi</td>\n",
" <td>31</td>\n",
" <td>https://cdn.sofifa.org/players/4/19/158023.png</td>\n",
" <td>Argentina</td>\n",
" <td>https://cdn.sofifa.org/flags/52.png</td>\n",
" <td>0.94</td>\n",
" <td>...</td>\n",
" <td>96.0</td>\n",
" <td>33.0</td>\n",
" <td>28.0</td>\n",
" <td>26.0</td>\n",
" <td>6.0</td>\n",
" <td>11.0</td>\n",
" <td>15.0</td>\n",
" <td>14.0</td>\n",
" <td>8.0</td>\n",
" <td>226500000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>20801</td>\n",
" <td>Cristiano Ronaldo</td>\n",
" <td>33</td>\n",
" <td>https://cdn.sofifa.org/players/4/19/20801.png</td>\n",
" <td>Portugal</td>\n",
" <td>https://cdn.sofifa.org/flags/38.png</td>\n",
" <td>0.94</td>\n",
" <td>...</td>\n",
" <td>95.0</td>\n",
" <td>28.0</td>\n",
" <td>31.0</td>\n",
" <td>23.0</td>\n",
" <td>7.0</td>\n",
" <td>11.0</td>\n",
" <td>15.0</td>\n",
" <td>14.0</td>\n",
" <td>11.0</td>\n",
" <td>127100000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>190871</td>\n",
" <td>Neymar Jr</td>\n",
" <td>26</td>\n",
" <td>https://cdn.sofifa.org/players/4/19/190871.png</td>\n",
" <td>Brazil</td>\n",
" <td>https://cdn.sofifa.org/flags/54.png</td>\n",
" <td>0.92</td>\n",
" <td>...</td>\n",
" <td>94.0</td>\n",
" <td>27.0</td>\n",
" <td>24.0</td>\n",
" <td>33.0</td>\n",
" <td>9.0</td>\n",
" <td>9.0</td>\n",
" <td>15.0</td>\n",
" <td>15.0</td>\n",
" <td>11.0</td>\n",
" <td>228100000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>193080</td>\n",
" <td>De Gea</td>\n",
" <td>27</td>\n",
" <td>https://cdn.sofifa.org/players/4/19/193080.png</td>\n",
" <td>Spain</td>\n",
" <td>https://cdn.sofifa.org/flags/45.png</td>\n",
" <td>0.91</td>\n",
" <td>...</td>\n",
" <td>68.0</td>\n",
" <td>15.0</td>\n",
" <td>21.0</td>\n",
" <td>13.0</td>\n",
" <td>90.0</td>\n",
" <td>85.0</td>\n",
" <td>87.0</td>\n",
" <td>88.0</td>\n",
" <td>94.0</td>\n",
" <td>138600000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>192985</td>\n",
" <td>K. De Bruyne</td>\n",
" <td>27</td>\n",
" <td>https://cdn.sofifa.org/players/4/19/192985.png</td>\n",
" <td>Belgium</td>\n",
" <td>https://cdn.sofifa.org/flags/7.png</td>\n",
" <td>0.91</td>\n",
" <td>...</td>\n",
" <td>88.0</td>\n",
" <td>68.0</td>\n",
" <td>58.0</td>\n",
" <td>51.0</td>\n",
" <td>15.0</td>\n",
" <td>13.0</td>\n",
" <td>5.0</td>\n",
" <td>10.0</td>\n",
" <td>13.0</td>\n",
" <td>196400000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16638</th>\n",
" <td>16638</td>\n",
" <td>18202</td>\n",
" <td>18202</td>\n",
" <td>238813</td>\n",
" <td>J. Lundstram</td>\n",
" <td>19</td>\n",
" <td>https://cdn.sofifa.org/players/4/19/238813.png</td>\n",
" <td>England</td>\n",
" <td>https://cdn.sofifa.org/flags/14.png</td>\n",
" <td>0.47</td>\n",
" <td>...</td>\n",
" <td>45.0</td>\n",
" <td>40.0</td>\n",
" <td>48.0</td>\n",
" <td>47.0</td>\n",
" <td>10.0</td>\n",
" <td>13.0</td>\n",
" <td>7.0</td>\n",
" <td>8.0</td>\n",
" <td>9.0</td>\n",
" <td>143000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16639</th>\n",
" <td>16639</td>\n",
" <td>18203</td>\n",
" <td>18203</td>\n",
" <td>243165</td>\n",
" <td>N. Christoffersson</td>\n",
" <td>19</td>\n",
" <td>https://cdn.sofifa.org/players/4/19/243165.png</td>\n",
" <td>Sweden</td>\n",
" <td>https://cdn.sofifa.org/flags/46.png</td>\n",
" <td>0.47</td>\n",
" <td>...</td>\n",
" <td>42.0</td>\n",
" <td>22.0</td>\n",
" <td>15.0</td>\n",
" <td>19.0</td>\n",
" <td>10.0</td>\n",
" <td>9.0</td>\n",
" <td>9.0</td>\n",
" <td>5.0</td>\n",
" <td>12.0</td>\n",
" <td>113000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16640</th>\n",
" <td>16640</td>\n",
" <td>18204</td>\n",
" <td>18204</td>\n",
" <td>241638</td>\n",
" <td>B. Worman</td>\n",
" <td>16</td>\n",
" <td>https://cdn.sofifa.org/players/4/19/241638.png</td>\n",
" <td>England</td>\n",
" <td>https://cdn.sofifa.org/flags/14.png</td>\n",
" <td>0.47</td>\n",
" <td>...</td>\n",
" <td>41.0</td>\n",
" <td>32.0</td>\n",
" <td>13.0</td>\n",
" <td>11.0</td>\n",
" <td>6.0</td>\n",
" <td>5.0</td>\n",
" <td>10.0</td>\n",
" <td>6.0</td>\n",
" <td>13.0</td>\n",
" <td>165000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16641</th>\n",
" <td>16641</td>\n",
" <td>18205</td>\n",
" <td>18205</td>\n",
" <td>246268</td>\n",
" <td>D. Walker-Rice</td>\n",
" <td>17</td>\n",
" <td>https://cdn.sofifa.org/players/4/19/246268.png</td>\n",
" <td>England</td>\n",
" <td>https://cdn.sofifa.org/flags/14.png</td>\n",
" <td>0.47</td>\n",
" <td>...</td>\n",
" <td>46.0</td>\n",
" <td>20.0</td>\n",
" <td>25.0</td>\n",
" <td>27.0</td>\n",
" <td>14.0</td>\n",
" <td>6.0</td>\n",
" <td>14.0</td>\n",
" <td>8.0</td>\n",
" <td>9.0</td>\n",
" <td>143000.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16642</th>\n",
" <td>16642</td>\n",
" <td>18206</td>\n",
" <td>18206</td>\n",
" <td>246269</td>\n",
" <td>G. Nugent</td>\n",
" <td>16</td>\n",
" <td>https://cdn.sofifa.org/players/4/19/246269.png</td>\n",
" <td>England</td>\n",
" <td>https://cdn.sofifa.org/flags/14.png</td>\n",
" <td>0.46</td>\n",
" <td>...</td>\n",
" <td>43.0</td>\n",
" <td>40.0</td>\n",
" <td>43.0</td>\n",
" <td>50.0</td>\n",
" <td>10.0</td>\n",
" <td>15.0</td>\n",
" <td>9.0</td>\n",
" <td>12.0</td>\n",
" <td>9.0</td>\n",
" <td>165000.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>16643 rows × 91 columns</p>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0 Unnamed: 0.1 Unnamed: 0.1.1 ID Name \\\n",
"0 0 0 0 158023 L. Messi \n",
"1 1 1 1 20801 Cristiano Ronaldo \n",
"2 2 2 2 190871 Neymar Jr \n",
"3 3 3 3 193080 De Gea \n",
"4 4 4 4 192985 K. De Bruyne \n",
"... ... ... ... ... ... \n",
"16638 16638 18202 18202 238813 J. Lundstram \n",
"16639 16639 18203 18203 243165 N. Christoffersson \n",
"16640 16640 18204 18204 241638 B. Worman \n",
"16641 16641 18205 18205 246268 D. Walker-Rice \n",
"16642 16642 18206 18206 246269 G. Nugent \n",
"\n",
" Age Photo Nationality \\\n",
"0 31 https://cdn.sofifa.org/players/4/19/158023.png Argentina \n",
"1 33 https://cdn.sofifa.org/players/4/19/20801.png Portugal \n",
"2 26 https://cdn.sofifa.org/players/4/19/190871.png Brazil \n",
"3 27 https://cdn.sofifa.org/players/4/19/193080.png Spain \n",
"4 27 https://cdn.sofifa.org/players/4/19/192985.png Belgium \n",
"... ... ... ... \n",
"16638 19 https://cdn.sofifa.org/players/4/19/238813.png England \n",
"16639 19 https://cdn.sofifa.org/players/4/19/243165.png Sweden \n",
"16640 16 https://cdn.sofifa.org/players/4/19/241638.png England \n",
"16641 17 https://cdn.sofifa.org/players/4/19/246268.png England \n",
"16642 16 https://cdn.sofifa.org/players/4/19/246269.png England \n",
"\n",
" Flag Overall ... Composure Marking \\\n",
"0 https://cdn.sofifa.org/flags/52.png 0.94 ... 96.0 33.0 \n",
"1 https://cdn.sofifa.org/flags/38.png 0.94 ... 95.0 28.0 \n",
"2 https://cdn.sofifa.org/flags/54.png 0.92 ... 94.0 27.0 \n",
"3 https://cdn.sofifa.org/flags/45.png 0.91 ... 68.0 15.0 \n",
"4 https://cdn.sofifa.org/flags/7.png 0.91 ... 88.0 68.0 \n",
"... ... ... ... ... ... \n",
"16638 https://cdn.sofifa.org/flags/14.png 0.47 ... 45.0 40.0 \n",
"16639 https://cdn.sofifa.org/flags/46.png 0.47 ... 42.0 22.0 \n",
"16640 https://cdn.sofifa.org/flags/14.png 0.47 ... 41.0 32.0 \n",
"16641 https://cdn.sofifa.org/flags/14.png 0.47 ... 46.0 20.0 \n",
"16642 https://cdn.sofifa.org/flags/14.png 0.46 ... 43.0 40.0 \n",
"\n",
" StandingTackle SlidingTackle GKDiving GKHandling GKKicking \\\n",
"0 28.0 26.0 6.0 11.0 15.0 \n",
"1 31.0 23.0 7.0 11.0 15.0 \n",
"2 24.0 33.0 9.0 9.0 15.0 \n",
"3 21.0 13.0 90.0 85.0 87.0 \n",
"4 58.0 51.0 15.0 13.0 5.0 \n",
"... ... ... ... ... ... \n",
"16638 48.0 47.0 10.0 13.0 7.0 \n",
"16639 15.0 19.0 10.0 9.0 9.0 \n",
"16640 13.0 11.0 6.0 5.0 10.0 \n",
"16641 25.0 27.0 14.0 6.0 14.0 \n",
"16642 43.0 50.0 10.0 15.0 9.0 \n",
"\n",
" GKPositioning GKReflexes Release Clause \n",
"0 14.0 8.0 226500000.0 \n",
"1 14.0 11.0 127100000.0 \n",
"2 15.0 11.0 228100000.0 \n",
"3 88.0 94.0 138600000.0 \n",
"4 10.0 13.0 196400000.0 \n",
"... ... ... ... \n",
"16638 8.0 9.0 143000.0 \n",
"16639 5.0 12.0 113000.0 \n",
"16640 6.0 13.0 165000.0 \n",
"16641 8.0 9.0 143000.0 \n",
"16642 12.0 9.0 165000.0 \n",
"\n",
"[16643 rows x 91 columns]"
]
},
"execution_count": 214,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Minimum, maksimum, średnia, mediana, odchylenie standardowe"
]
},
{
"cell_type": "code",
"execution_count": 217,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overall zawodnika (0-1):\n",
"Minimum: 0.46\n",
"Maksimum: 0.94\n",
"Średnia: 0.6616277113501784\n",
"Mediana: 0.66\n",
"Odchylenie standardowe: 0.07008236149926617\n"
]
}
],
"source": [
"overall = data[\"Overall\"]\n",
"print(\"Overall zawodnika (0-1):\")\n",
"print(f\"Minimum: {overall.min()}\")\n",
"print(f\"Maksimum: {overall.max()}\")\n",
"\n",
"print(f\"Średnia: {overall.mean()}\")\n",
"print(f\"Mediana: {overall.median()}\")\n",
"print(f\"Odchylenie standardowe: {overall.std()}\")"
]
},
{
"cell_type": "code",
"execution_count": 218,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Wiek zawodnika:\n",
"Minimum: 16\n",
"Maksimum: 45\n",
"Średnia: 25.226221234152497\n",
"Mediana: 25.0\n",
"Odchylenie standardowe: 4.71658785571582\n"
]
}
],
"source": [
"age = data[\"Age\"]\n",
"print(\"Wiek zawodnika:\")\n",
"print(f\"Minimum: {age.min()}\")\n",
"print(f\"Maksimum: {age.max()}\")\n",
"\n",
"print(f\"Średnia: {age.mean()}\")\n",
"print(f\"Mediana: {age.median()}\")\n",
"print(f\"Odchylenie standardowe: {age.std()}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Liczba zawodników dla poszczególnych narodowości (top 10)"
]
},
{
"cell_type": "code",
"execution_count": 219,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
"execution_count": 219,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data[\"Nationality\"].value_counts().head(10).plot(kind=\"bar\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Top 10 najlepszych i najgorszych drużyn względem średniego Overall"
]
},
{
"cell_type": "code",
"execution_count": 220,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Overall</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Club</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Juventus</th>\n",
" <td>0.822800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Napoli</th>\n",
" <td>0.800417</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Inter</th>\n",
" <td>0.796190</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Real Madrid</th>\n",
" <td>0.782424</td>\n",
" </tr>\n",
" <tr>\n",
" <th>FC Barcelona</th>\n",
" <td>0.780303</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Milan</th>\n",
" <td>0.775417</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Paris Saint-Germain</th>\n",
" <td>0.774333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Roma</th>\n",
" <td>0.774000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Manchester United</th>\n",
" <td>0.772424</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SL Benfica</th>\n",
" <td>0.770741</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Overall\n",
"Club \n",
"Juventus 0.822800\n",
"Napoli 0.800417\n",
"Inter 0.796190\n",
"Real Madrid 0.782424\n",
"FC Barcelona 0.780303\n",
"Milan 0.775417\n",
"Paris Saint-Germain 0.774333\n",
"Roma 0.774000\n",
"Manchester United 0.772424\n",
"SL Benfica 0.770741"
]
},
"execution_count": 220,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[[\"Club\", \"Overall\"]].groupby(\"Club\").mean().sort_values(\"Overall\", ascending=False).head(10)"
]
},
{
"cell_type": "code",
"execution_count": 224,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Overall</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Club</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>St. Patrick's Athletic</th>\n",
" <td>0.577826</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Cambridge United</th>\n",
" <td>0.572593</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Waterford FC</th>\n",
" <td>0.570000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Morecambe</th>\n",
" <td>0.569600</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Crewe Alexandra</th>\n",
" <td>0.566667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Sligo Rovers</th>\n",
" <td>0.566316</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Derry City</th>\n",
" <td>0.555882</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bohemian FC</th>\n",
" <td>0.550000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Limerick FC</th>\n",
" <td>0.545263</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bray Wanderers</th>\n",
" <td>0.536522</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Overall\n",
"Club \n",
"St. Patrick's Athletic 0.577826\n",
"Cambridge United 0.572593\n",
"Waterford FC 0.570000\n",
"Morecambe 0.569600\n",
"Crewe Alexandra 0.566667\n",
"Sligo Rovers 0.566316\n",
"Derry City 0.555882\n",
"Bohemian FC 0.550000\n",
"Limerick FC 0.545263\n",
"Bray Wanderers 0.536522"
]
},
"execution_count": 224,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[[\"Club\", \"Overall\"]].groupby(\"Club\").mean().sort_values(\"Overall\", ascending=False).tail(10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Top 10 klauzul uwolnienia"
]
},
{
"cell_type": "code",
"execution_count": 227,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"29 Isco\n",
"11 T. Kroos\n",
"16 H. Kane\n",
"7 L. Suárez\n",
"17 A. Griezmann\n",
"25 K. Mbappé\n",
"5 E. Hazard\n",
"4 K. De Bruyne\n",
"0 L. Messi\n",
"2 Neymar Jr\n",
"Name: Name, dtype: object"
]
},
"execution_count": 227,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.sort_values(\"Release Clause\").tail(10)[\"Name\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Zależność między wiekiem a overall zawodników dla top 10 klubów"
]
},
{
"cell_type": "code",
"execution_count": 228,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<seaborn.axisgrid.FacetGrid at 0x202e9f658b0>"
]
},
"execution_count": 228,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 499.225x360 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import seaborn as sns\n",
"sns.set_theme()\n",
"\n",
"#Wyświetlenie danych tylko dla top 10 klubów względem overall\n",
"clubs = data[[\"Club\", \"Overall\"]].groupby(\"Club\", as_index=False).mean().sort_values(\"Overall\", ascending=False).head(10)[\"Club\"]\n",
"\n",
"data[data[\"Club\"].isin(clubs)]\n",
"sns.relplot(data=data[data[\"Club\"].isin(clubs)], x=\"Overall\", y=\"Age\", hue=\"Club\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}