1002 lines
102 KiB
Plaintext
1002 lines
102 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Instalacja pakietów i przygotowanie datasetu"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 207,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Requirement already satisfied: kaggle in c:\\programdata\\anaconda3\\lib\\site-packages (1.5.12)\n",
|
||
"Requirement already satisfied: six>=1.10 in c:\\programdata\\anaconda3\\lib\\site-packages (from kaggle) (1.15.0)\n",
|
||
"Requirement already satisfied: requests in c:\\programdata\\anaconda3\\lib\\site-packages (from kaggle) (2.24.0)\n",
|
||
"Requirement already satisfied: python-slugify in c:\\programdata\\anaconda3\\lib\\site-packages (from kaggle) (4.0.1)\n",
|
||
"Requirement already satisfied: urllib3 in c:\\programdata\\anaconda3\\lib\\site-packages (from kaggle) (1.25.11)\n",
|
||
"Requirement already satisfied: python-dateutil in c:\\programdata\\anaconda3\\lib\\site-packages (from kaggle) (2.8.1)\n",
|
||
"Requirement already satisfied: tqdm in c:\\programdata\\anaconda3\\lib\\site-packages (from kaggle) (4.50.2)\n",
|
||
"Requirement already satisfied: certifi in c:\\programdata\\anaconda3\\lib\\site-packages (from kaggle) (2020.6.20)\n",
|
||
"Requirement already satisfied: chardet<4,>=3.0.2 in c:\\programdata\\anaconda3\\lib\\site-packages (from requests->kaggle) (3.0.4)\n",
|
||
"Requirement already satisfied: idna<3,>=2.5 in c:\\programdata\\anaconda3\\lib\\site-packages (from requests->kaggle) (2.10)\n",
|
||
"Requirement already satisfied: text-unidecode>=1.3 in c:\\programdata\\anaconda3\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n",
|
||
"Requirement already satisfied: pandas in c:\\programdata\\anaconda3\\lib\\site-packages (1.1.3)\n",
|
||
"Requirement already satisfied: python-dateutil>=2.7.3 in c:\\programdata\\anaconda3\\lib\\site-packages (from pandas) (2.8.1)\n",
|
||
"Requirement already satisfied: pytz>=2017.2 in c:\\programdata\\anaconda3\\lib\\site-packages (from pandas) (2020.1)\n",
|
||
"Requirement already satisfied: numpy>=1.15.4 in c:\\programdata\\anaconda3\\lib\\site-packages (from pandas) (1.19.2)\n",
|
||
"Requirement already satisfied: six>=1.5 in c:\\programdata\\anaconda3\\lib\\site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n",
|
||
"Requirement already satisfied: numpy in c:\\programdata\\anaconda3\\lib\\site-packages (1.19.2)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!pip install kaggle\n",
|
||
"!pip install pandas\n",
|
||
"!pip install numpy"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 208,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Downloading fifa19.zip to C:\\Users\\Ania\\Desktop\\AITECH\\[IUM] Inżynieria uczenia maszynowego\\ium_434760\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"\n",
|
||
" 0%| | 0.00/2.18M [00:00<?, ?B/s]\n",
|
||
" 46%|####5 | 1.00M/2.18M [00:00<00:00, 4.74MB/s]\n",
|
||
" 92%|#########1| 2.00M/2.18M [00:00<00:00, 5.31MB/s]\n",
|
||
"100%|##########| 2.18M/2.18M [00:00<00:00, 5.93MB/s]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!kaggle datasets download -d karangadiya/fifa19"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 209,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import zipfile\n",
|
||
"\n",
|
||
"with zipfile.ZipFile('fifa19.zip', 'r') as zip_ref:\n",
|
||
" zip_ref.extractall('.')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Normalizacja i usuwanie artefaktów"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 210,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"#Usuwanie artefaktów\n",
|
||
"df=pd.read_csv('data.csv')\n",
|
||
"df = df[df[\"Release Clause\"].notna()]\n",
|
||
"df = df[df[\"Release Clause\"].notnull()]\n",
|
||
"df.to_csv('data.csv')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 211,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"#Normalizacja\n",
|
||
"df=pd.read_csv('data.csv')\n",
|
||
"if df[\"Overall\"].mean() > 1:\n",
|
||
" df[\"Overall\"]= df[\"Overall\"]/100 \n",
|
||
"df[\"Release Clause\"] = df[\"Release Clause\"].str.replace(\"€\", \"\")\n",
|
||
"\n",
|
||
"df[\"Release Clause\"] = (df[\"Release Clause\"].replace(r'[KM]+$', '', regex=True).astype(float) * \n",
|
||
" df[\"Release Clause\"].str.extract(r'[\\d\\.]+([KM]+)', expand=False)\n",
|
||
" .replace(['K','M'], [1000, 1000000]).astype(int))\n",
|
||
"df.to_csv('data.csv')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Podział na train/dev/test"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 212,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"from sklearn.model_selection import train_test_split\n",
|
||
"\n",
|
||
"df=pd.read_csv('data.csv')\n",
|
||
"train, dev = train_test_split(df, train_size=0.6, test_size=0.4, shuffle=True)\n",
|
||
"dev, test = train_test_split(dev, train_size=0.5, test_size=0.5, shuffle=False)\n",
|
||
"\n",
|
||
"test.to_csv('test.csv') \n",
|
||
"dev.to_csv('dev.csv') \n",
|
||
"train.to_csv('train.csv')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Odczyt danych"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 213,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Test dataset length: 3329\n",
|
||
"Dev dataset length: 3329\n",
|
||
"Train dataset length: 9985\n",
|
||
"Whole dataset length: 16643\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"data = pd.read_csv('data.csv')\n",
|
||
"train = pd.read_csv('train.csv')\n",
|
||
"test = pd.read_csv('test.csv')\n",
|
||
"dev = pd.read_csv('dev.csv')\n",
|
||
"\n",
|
||
"print(f\"Test dataset length: {len(test)}\")\n",
|
||
"print(f\"Dev dataset length: {len(dev)}\")\n",
|
||
"print(f\"Train dataset length: {len(train)}\")\n",
|
||
"print(f\"Whole dataset length: {len(data)}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 214,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Unnamed: 0</th>\n",
|
||
" <th>Unnamed: 0.1</th>\n",
|
||
" <th>Unnamed: 0.1.1</th>\n",
|
||
" <th>ID</th>\n",
|
||
" <th>Name</th>\n",
|
||
" <th>Age</th>\n",
|
||
" <th>Photo</th>\n",
|
||
" <th>Nationality</th>\n",
|
||
" <th>Flag</th>\n",
|
||
" <th>Overall</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>Composure</th>\n",
|
||
" <th>Marking</th>\n",
|
||
" <th>StandingTackle</th>\n",
|
||
" <th>SlidingTackle</th>\n",
|
||
" <th>GKDiving</th>\n",
|
||
" <th>GKHandling</th>\n",
|
||
" <th>GKKicking</th>\n",
|
||
" <th>GKPositioning</th>\n",
|
||
" <th>GKReflexes</th>\n",
|
||
" <th>Release Clause</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>158023</td>\n",
|
||
" <td>L. Messi</td>\n",
|
||
" <td>31</td>\n",
|
||
" <td>https://cdn.sofifa.org/players/4/19/158023.png</td>\n",
|
||
" <td>Argentina</td>\n",
|
||
" <td>https://cdn.sofifa.org/flags/52.png</td>\n",
|
||
" <td>0.94</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>96.0</td>\n",
|
||
" <td>33.0</td>\n",
|
||
" <td>28.0</td>\n",
|
||
" <td>26.0</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" <td>11.0</td>\n",
|
||
" <td>15.0</td>\n",
|
||
" <td>14.0</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>226500000.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>20801</td>\n",
|
||
" <td>Cristiano Ronaldo</td>\n",
|
||
" <td>33</td>\n",
|
||
" <td>https://cdn.sofifa.org/players/4/19/20801.png</td>\n",
|
||
" <td>Portugal</td>\n",
|
||
" <td>https://cdn.sofifa.org/flags/38.png</td>\n",
|
||
" <td>0.94</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>95.0</td>\n",
|
||
" <td>28.0</td>\n",
|
||
" <td>31.0</td>\n",
|
||
" <td>23.0</td>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>11.0</td>\n",
|
||
" <td>15.0</td>\n",
|
||
" <td>14.0</td>\n",
|
||
" <td>11.0</td>\n",
|
||
" <td>127100000.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>190871</td>\n",
|
||
" <td>Neymar Jr</td>\n",
|
||
" <td>26</td>\n",
|
||
" <td>https://cdn.sofifa.org/players/4/19/190871.png</td>\n",
|
||
" <td>Brazil</td>\n",
|
||
" <td>https://cdn.sofifa.org/flags/54.png</td>\n",
|
||
" <td>0.92</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>94.0</td>\n",
|
||
" <td>27.0</td>\n",
|
||
" <td>24.0</td>\n",
|
||
" <td>33.0</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>15.0</td>\n",
|
||
" <td>15.0</td>\n",
|
||
" <td>11.0</td>\n",
|
||
" <td>228100000.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>193080</td>\n",
|
||
" <td>De Gea</td>\n",
|
||
" <td>27</td>\n",
|
||
" <td>https://cdn.sofifa.org/players/4/19/193080.png</td>\n",
|
||
" <td>Spain</td>\n",
|
||
" <td>https://cdn.sofifa.org/flags/45.png</td>\n",
|
||
" <td>0.91</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>68.0</td>\n",
|
||
" <td>15.0</td>\n",
|
||
" <td>21.0</td>\n",
|
||
" <td>13.0</td>\n",
|
||
" <td>90.0</td>\n",
|
||
" <td>85.0</td>\n",
|
||
" <td>87.0</td>\n",
|
||
" <td>88.0</td>\n",
|
||
" <td>94.0</td>\n",
|
||
" <td>138600000.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>192985</td>\n",
|
||
" <td>K. De Bruyne</td>\n",
|
||
" <td>27</td>\n",
|
||
" <td>https://cdn.sofifa.org/players/4/19/192985.png</td>\n",
|
||
" <td>Belgium</td>\n",
|
||
" <td>https://cdn.sofifa.org/flags/7.png</td>\n",
|
||
" <td>0.91</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>88.0</td>\n",
|
||
" <td>68.0</td>\n",
|
||
" <td>58.0</td>\n",
|
||
" <td>51.0</td>\n",
|
||
" <td>15.0</td>\n",
|
||
" <td>13.0</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>13.0</td>\n",
|
||
" <td>196400000.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>16638</th>\n",
|
||
" <td>16638</td>\n",
|
||
" <td>18202</td>\n",
|
||
" <td>18202</td>\n",
|
||
" <td>238813</td>\n",
|
||
" <td>J. Lundstram</td>\n",
|
||
" <td>19</td>\n",
|
||
" <td>https://cdn.sofifa.org/players/4/19/238813.png</td>\n",
|
||
" <td>England</td>\n",
|
||
" <td>https://cdn.sofifa.org/flags/14.png</td>\n",
|
||
" <td>0.47</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>45.0</td>\n",
|
||
" <td>40.0</td>\n",
|
||
" <td>48.0</td>\n",
|
||
" <td>47.0</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>13.0</td>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>143000.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>16639</th>\n",
|
||
" <td>16639</td>\n",
|
||
" <td>18203</td>\n",
|
||
" <td>18203</td>\n",
|
||
" <td>243165</td>\n",
|
||
" <td>N. Christoffersson</td>\n",
|
||
" <td>19</td>\n",
|
||
" <td>https://cdn.sofifa.org/players/4/19/243165.png</td>\n",
|
||
" <td>Sweden</td>\n",
|
||
" <td>https://cdn.sofifa.org/flags/46.png</td>\n",
|
||
" <td>0.47</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>42.0</td>\n",
|
||
" <td>22.0</td>\n",
|
||
" <td>15.0</td>\n",
|
||
" <td>19.0</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>113000.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>16640</th>\n",
|
||
" <td>16640</td>\n",
|
||
" <td>18204</td>\n",
|
||
" <td>18204</td>\n",
|
||
" <td>241638</td>\n",
|
||
" <td>B. Worman</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>https://cdn.sofifa.org/players/4/19/241638.png</td>\n",
|
||
" <td>England</td>\n",
|
||
" <td>https://cdn.sofifa.org/flags/14.png</td>\n",
|
||
" <td>0.47</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>41.0</td>\n",
|
||
" <td>32.0</td>\n",
|
||
" <td>13.0</td>\n",
|
||
" <td>11.0</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" <td>13.0</td>\n",
|
||
" <td>165000.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>16641</th>\n",
|
||
" <td>16641</td>\n",
|
||
" <td>18205</td>\n",
|
||
" <td>18205</td>\n",
|
||
" <td>246268</td>\n",
|
||
" <td>D. Walker-Rice</td>\n",
|
||
" <td>17</td>\n",
|
||
" <td>https://cdn.sofifa.org/players/4/19/246268.png</td>\n",
|
||
" <td>England</td>\n",
|
||
" <td>https://cdn.sofifa.org/flags/14.png</td>\n",
|
||
" <td>0.47</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>46.0</td>\n",
|
||
" <td>20.0</td>\n",
|
||
" <td>25.0</td>\n",
|
||
" <td>27.0</td>\n",
|
||
" <td>14.0</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" <td>14.0</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>143000.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>16642</th>\n",
|
||
" <td>16642</td>\n",
|
||
" <td>18206</td>\n",
|
||
" <td>18206</td>\n",
|
||
" <td>246269</td>\n",
|
||
" <td>G. Nugent</td>\n",
|
||
" <td>16</td>\n",
|
||
" <td>https://cdn.sofifa.org/players/4/19/246269.png</td>\n",
|
||
" <td>England</td>\n",
|
||
" <td>https://cdn.sofifa.org/flags/14.png</td>\n",
|
||
" <td>0.46</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>43.0</td>\n",
|
||
" <td>40.0</td>\n",
|
||
" <td>43.0</td>\n",
|
||
" <td>50.0</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>15.0</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>165000.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>16643 rows × 91 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Unnamed: 0 Unnamed: 0.1 Unnamed: 0.1.1 ID Name \\\n",
|
||
"0 0 0 0 158023 L. Messi \n",
|
||
"1 1 1 1 20801 Cristiano Ronaldo \n",
|
||
"2 2 2 2 190871 Neymar Jr \n",
|
||
"3 3 3 3 193080 De Gea \n",
|
||
"4 4 4 4 192985 K. De Bruyne \n",
|
||
"... ... ... ... ... ... \n",
|
||
"16638 16638 18202 18202 238813 J. Lundstram \n",
|
||
"16639 16639 18203 18203 243165 N. Christoffersson \n",
|
||
"16640 16640 18204 18204 241638 B. Worman \n",
|
||
"16641 16641 18205 18205 246268 D. Walker-Rice \n",
|
||
"16642 16642 18206 18206 246269 G. Nugent \n",
|
||
"\n",
|
||
" Age Photo Nationality \\\n",
|
||
"0 31 https://cdn.sofifa.org/players/4/19/158023.png Argentina \n",
|
||
"1 33 https://cdn.sofifa.org/players/4/19/20801.png Portugal \n",
|
||
"2 26 https://cdn.sofifa.org/players/4/19/190871.png Brazil \n",
|
||
"3 27 https://cdn.sofifa.org/players/4/19/193080.png Spain \n",
|
||
"4 27 https://cdn.sofifa.org/players/4/19/192985.png Belgium \n",
|
||
"... ... ... ... \n",
|
||
"16638 19 https://cdn.sofifa.org/players/4/19/238813.png England \n",
|
||
"16639 19 https://cdn.sofifa.org/players/4/19/243165.png Sweden \n",
|
||
"16640 16 https://cdn.sofifa.org/players/4/19/241638.png England \n",
|
||
"16641 17 https://cdn.sofifa.org/players/4/19/246268.png England \n",
|
||
"16642 16 https://cdn.sofifa.org/players/4/19/246269.png England \n",
|
||
"\n",
|
||
" Flag Overall ... Composure Marking \\\n",
|
||
"0 https://cdn.sofifa.org/flags/52.png 0.94 ... 96.0 33.0 \n",
|
||
"1 https://cdn.sofifa.org/flags/38.png 0.94 ... 95.0 28.0 \n",
|
||
"2 https://cdn.sofifa.org/flags/54.png 0.92 ... 94.0 27.0 \n",
|
||
"3 https://cdn.sofifa.org/flags/45.png 0.91 ... 68.0 15.0 \n",
|
||
"4 https://cdn.sofifa.org/flags/7.png 0.91 ... 88.0 68.0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"16638 https://cdn.sofifa.org/flags/14.png 0.47 ... 45.0 40.0 \n",
|
||
"16639 https://cdn.sofifa.org/flags/46.png 0.47 ... 42.0 22.0 \n",
|
||
"16640 https://cdn.sofifa.org/flags/14.png 0.47 ... 41.0 32.0 \n",
|
||
"16641 https://cdn.sofifa.org/flags/14.png 0.47 ... 46.0 20.0 \n",
|
||
"16642 https://cdn.sofifa.org/flags/14.png 0.46 ... 43.0 40.0 \n",
|
||
"\n",
|
||
" StandingTackle SlidingTackle GKDiving GKHandling GKKicking \\\n",
|
||
"0 28.0 26.0 6.0 11.0 15.0 \n",
|
||
"1 31.0 23.0 7.0 11.0 15.0 \n",
|
||
"2 24.0 33.0 9.0 9.0 15.0 \n",
|
||
"3 21.0 13.0 90.0 85.0 87.0 \n",
|
||
"4 58.0 51.0 15.0 13.0 5.0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"16638 48.0 47.0 10.0 13.0 7.0 \n",
|
||
"16639 15.0 19.0 10.0 9.0 9.0 \n",
|
||
"16640 13.0 11.0 6.0 5.0 10.0 \n",
|
||
"16641 25.0 27.0 14.0 6.0 14.0 \n",
|
||
"16642 43.0 50.0 10.0 15.0 9.0 \n",
|
||
"\n",
|
||
" GKPositioning GKReflexes Release Clause \n",
|
||
"0 14.0 8.0 226500000.0 \n",
|
||
"1 14.0 11.0 127100000.0 \n",
|
||
"2 15.0 11.0 228100000.0 \n",
|
||
"3 88.0 94.0 138600000.0 \n",
|
||
"4 10.0 13.0 196400000.0 \n",
|
||
"... ... ... ... \n",
|
||
"16638 8.0 9.0 143000.0 \n",
|
||
"16639 5.0 12.0 113000.0 \n",
|
||
"16640 6.0 13.0 165000.0 \n",
|
||
"16641 8.0 9.0 143000.0 \n",
|
||
"16642 12.0 9.0 165000.0 \n",
|
||
"\n",
|
||
"[16643 rows x 91 columns]"
|
||
]
|
||
},
|
||
"execution_count": 214,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Minimum, maksimum, średnia, mediana, odchylenie standardowe"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 217,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Overall zawodnika (0-1):\n",
|
||
"Minimum: 0.46\n",
|
||
"Maksimum: 0.94\n",
|
||
"Średnia: 0.6616277113501784\n",
|
||
"Mediana: 0.66\n",
|
||
"Odchylenie standardowe: 0.07008236149926617\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"overall = data[\"Overall\"]\n",
|
||
"print(\"Overall zawodnika (0-1):\")\n",
|
||
"print(f\"Minimum: {overall.min()}\")\n",
|
||
"print(f\"Maksimum: {overall.max()}\")\n",
|
||
"\n",
|
||
"print(f\"Średnia: {overall.mean()}\")\n",
|
||
"print(f\"Mediana: {overall.median()}\")\n",
|
||
"print(f\"Odchylenie standardowe: {overall.std()}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 218,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Wiek zawodnika:\n",
|
||
"Minimum: 16\n",
|
||
"Maksimum: 45\n",
|
||
"Średnia: 25.226221234152497\n",
|
||
"Mediana: 25.0\n",
|
||
"Odchylenie standardowe: 4.71658785571582\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"age = data[\"Age\"]\n",
|
||
"print(\"Wiek zawodnika:\")\n",
|
||
"print(f\"Minimum: {age.min()}\")\n",
|
||
"print(f\"Maksimum: {age.max()}\")\n",
|
||
"\n",
|
||
"print(f\"Średnia: {age.mean()}\")\n",
|
||
"print(f\"Mediana: {age.median()}\")\n",
|
||
"print(f\"Odchylenie standardowe: {age.std()}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Liczba zawodników dla poszczególnych narodowości (top 10)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 219,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"<AxesSubplot:>"
|
||
]
|
||
},
|
||
"execution_count": 219,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
},
|
||
{
|
||
"data": {
|
||
"image/png": "\n",
|
||
"text/plain": [
|
||
"<Figure size 432x288 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"data[\"Nationality\"].value_counts().head(10).plot(kind=\"bar\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Top 10 najlepszych i najgorszych drużyn względem średniego Overall"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 220,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Overall</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Club</th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>Juventus</th>\n",
|
||
" <td>0.822800</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Napoli</th>\n",
|
||
" <td>0.800417</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Inter</th>\n",
|
||
" <td>0.796190</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Real Madrid</th>\n",
|
||
" <td>0.782424</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>FC Barcelona</th>\n",
|
||
" <td>0.780303</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Milan</th>\n",
|
||
" <td>0.775417</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Paris Saint-Germain</th>\n",
|
||
" <td>0.774333</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Roma</th>\n",
|
||
" <td>0.774000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Manchester United</th>\n",
|
||
" <td>0.772424</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>SL Benfica</th>\n",
|
||
" <td>0.770741</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Overall\n",
|
||
"Club \n",
|
||
"Juventus 0.822800\n",
|
||
"Napoli 0.800417\n",
|
||
"Inter 0.796190\n",
|
||
"Real Madrid 0.782424\n",
|
||
"FC Barcelona 0.780303\n",
|
||
"Milan 0.775417\n",
|
||
"Paris Saint-Germain 0.774333\n",
|
||
"Roma 0.774000\n",
|
||
"Manchester United 0.772424\n",
|
||
"SL Benfica 0.770741"
|
||
]
|
||
},
|
||
"execution_count": 220,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data[[\"Club\", \"Overall\"]].groupby(\"Club\").mean().sort_values(\"Overall\", ascending=False).head(10)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 224,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Overall</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Club</th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>St. Patrick's Athletic</th>\n",
|
||
" <td>0.577826</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Cambridge United</th>\n",
|
||
" <td>0.572593</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Waterford FC</th>\n",
|
||
" <td>0.570000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Morecambe</th>\n",
|
||
" <td>0.569600</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Crewe Alexandra</th>\n",
|
||
" <td>0.566667</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Sligo Rovers</th>\n",
|
||
" <td>0.566316</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Derry City</th>\n",
|
||
" <td>0.555882</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Bohemian FC</th>\n",
|
||
" <td>0.550000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Limerick FC</th>\n",
|
||
" <td>0.545263</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Bray Wanderers</th>\n",
|
||
" <td>0.536522</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Overall\n",
|
||
"Club \n",
|
||
"St. Patrick's Athletic 0.577826\n",
|
||
"Cambridge United 0.572593\n",
|
||
"Waterford FC 0.570000\n",
|
||
"Morecambe 0.569600\n",
|
||
"Crewe Alexandra 0.566667\n",
|
||
"Sligo Rovers 0.566316\n",
|
||
"Derry City 0.555882\n",
|
||
"Bohemian FC 0.550000\n",
|
||
"Limerick FC 0.545263\n",
|
||
"Bray Wanderers 0.536522"
|
||
]
|
||
},
|
||
"execution_count": 224,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data[[\"Club\", \"Overall\"]].groupby(\"Club\").mean().sort_values(\"Overall\", ascending=False).tail(10)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Top 10 klauzul uwolnienia"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 227,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"29 Isco\n",
|
||
"11 T. Kroos\n",
|
||
"16 H. Kane\n",
|
||
"7 L. Suárez\n",
|
||
"17 A. Griezmann\n",
|
||
"25 K. Mbappé\n",
|
||
"5 E. Hazard\n",
|
||
"4 K. De Bruyne\n",
|
||
"0 L. Messi\n",
|
||
"2 Neymar Jr\n",
|
||
"Name: Name, dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 227,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data.sort_values(\"Release Clause\").tail(10)[\"Name\"]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Zależność między wiekiem a overall zawodników dla top 10 klubów"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 228,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"<seaborn.axisgrid.FacetGrid at 0x202e9f658b0>"
|
||
]
|
||
},
|
||
"execution_count": 228,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
},
|
||
{
|
||
"data": {
|
||
"image/png": "\n",
|
||
"text/plain": [
|
||
"<Figure size 499.225x360 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"import seaborn as sns\n",
|
||
"sns.set_theme()\n",
|
||
"\n",
|
||
"#Wyświetlenie danych tylko dla top 10 klubów względem overall\n",
|
||
"clubs = data[[\"Club\", \"Overall\"]].groupby(\"Club\", as_index=False).mean().sort_values(\"Overall\", ascending=False).head(10)[\"Club\"]\n",
|
||
"\n",
|
||
"data[data[\"Club\"].isin(clubs)]\n",
|
||
"sns.relplot(data=data[data[\"Club\"].isin(clubs)], x=\"Overall\", y=\"Age\", hue=\"Club\")"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.8.5"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 4
|
||
}
|