{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Instalacja pakietów i przygotowanie datasetu" ] }, { "cell_type": "code", "execution_count": 207, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: kaggle in c:\\programdata\\anaconda3\\lib\\site-packages (1.5.12)\n", "Requirement already satisfied: six>=1.10 in c:\\programdata\\anaconda3\\lib\\site-packages (from kaggle) (1.15.0)\n", "Requirement already satisfied: requests in c:\\programdata\\anaconda3\\lib\\site-packages (from kaggle) (2.24.0)\n", "Requirement already satisfied: python-slugify in c:\\programdata\\anaconda3\\lib\\site-packages (from kaggle) (4.0.1)\n", "Requirement already satisfied: urllib3 in c:\\programdata\\anaconda3\\lib\\site-packages (from kaggle) (1.25.11)\n", "Requirement already satisfied: python-dateutil in c:\\programdata\\anaconda3\\lib\\site-packages (from kaggle) (2.8.1)\n", "Requirement already satisfied: tqdm in c:\\programdata\\anaconda3\\lib\\site-packages (from kaggle) (4.50.2)\n", "Requirement already satisfied: certifi in c:\\programdata\\anaconda3\\lib\\site-packages (from kaggle) (2020.6.20)\n", "Requirement already satisfied: chardet<4,>=3.0.2 in c:\\programdata\\anaconda3\\lib\\site-packages (from requests->kaggle) (3.0.4)\n", "Requirement already satisfied: idna<3,>=2.5 in c:\\programdata\\anaconda3\\lib\\site-packages (from requests->kaggle) (2.10)\n", "Requirement already satisfied: text-unidecode>=1.3 in c:\\programdata\\anaconda3\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n", "Requirement already satisfied: pandas in c:\\programdata\\anaconda3\\lib\\site-packages (1.1.3)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in c:\\programdata\\anaconda3\\lib\\site-packages (from pandas) (2.8.1)\n", "Requirement already satisfied: pytz>=2017.2 in c:\\programdata\\anaconda3\\lib\\site-packages (from pandas) (2020.1)\n", "Requirement already satisfied: numpy>=1.15.4 in c:\\programdata\\anaconda3\\lib\\site-packages (from pandas) (1.19.2)\n", "Requirement already satisfied: six>=1.5 in c:\\programdata\\anaconda3\\lib\\site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n", "Requirement already satisfied: numpy in c:\\programdata\\anaconda3\\lib\\site-packages (1.19.2)\n" ] } ], "source": [ "!pip install kaggle\n", "!pip install pandas\n", "!pip install numpy" ] }, { "cell_type": "code", "execution_count": 208, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading fifa19.zip to C:\\Users\\Ania\\Desktop\\AITECH\\[IUM] Inżynieria uczenia maszynowego\\ium_434760\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", " 0%| | 0.00/2.18M [00:00 1:\n", " df[\"Overall\"]= df[\"Overall\"]/100 \n", "df[\"Release Clause\"] = df[\"Release Clause\"].str.replace(\"€\", \"\")\n", "\n", "df[\"Release Clause\"] = (df[\"Release Clause\"].replace(r'[KM]+$', '', regex=True).astype(float) * \n", " df[\"Release Clause\"].str.extract(r'[\\d\\.]+([KM]+)', expand=False)\n", " .replace(['K','M'], [1000, 1000000]).astype(int))\n", "df.to_csv('data.csv')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Podział na train/dev/test" ] }, { "cell_type": "code", "execution_count": 212, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "\n", "df=pd.read_csv('data.csv')\n", "train, dev = train_test_split(df, train_size=0.6, test_size=0.4, shuffle=True)\n", "dev, test = train_test_split(dev, train_size=0.5, test_size=0.5, shuffle=False)\n", "\n", "test.to_csv('test.csv') \n", "dev.to_csv('dev.csv') \n", "train.to_csv('train.csv')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Odczyt danych" ] }, { "cell_type": "code", "execution_count": 213, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Test dataset length: 3329\n", "Dev dataset length: 3329\n", "Train dataset length: 9985\n", "Whole dataset length: 16643\n" ] } ], "source": [ "import pandas as pd\n", "\n", "data = pd.read_csv('data.csv')\n", "train = pd.read_csv('train.csv')\n", "test = pd.read_csv('test.csv')\n", "dev = pd.read_csv('dev.csv')\n", "\n", "print(f\"Test dataset length: {len(test)}\")\n", "print(f\"Dev dataset length: {len(dev)}\")\n", "print(f\"Train dataset length: {len(train)}\")\n", "print(f\"Whole dataset length: {len(data)}\")" ] }, { "cell_type": "code", "execution_count": 214, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0Unnamed: 0.1Unnamed: 0.1.1IDNameAgePhotoNationalityFlagOverall...ComposureMarkingStandingTackleSlidingTackleGKDivingGKHandlingGKKickingGKPositioningGKReflexesRelease Clause
0000158023L. Messi31https://cdn.sofifa.org/players/4/19/158023.pngArgentinahttps://cdn.sofifa.org/flags/52.png0.94...96.033.028.026.06.011.015.014.08.0226500000.0
111120801Cristiano Ronaldo33https://cdn.sofifa.org/players/4/19/20801.pngPortugalhttps://cdn.sofifa.org/flags/38.png0.94...95.028.031.023.07.011.015.014.011.0127100000.0
2222190871Neymar Jr26https://cdn.sofifa.org/players/4/19/190871.pngBrazilhttps://cdn.sofifa.org/flags/54.png0.92...94.027.024.033.09.09.015.015.011.0228100000.0
3333193080De Gea27https://cdn.sofifa.org/players/4/19/193080.pngSpainhttps://cdn.sofifa.org/flags/45.png0.91...68.015.021.013.090.085.087.088.094.0138600000.0
4444192985K. De Bruyne27https://cdn.sofifa.org/players/4/19/192985.pngBelgiumhttps://cdn.sofifa.org/flags/7.png0.91...88.068.058.051.015.013.05.010.013.0196400000.0
..................................................................
16638166381820218202238813J. Lundstram19https://cdn.sofifa.org/players/4/19/238813.pngEnglandhttps://cdn.sofifa.org/flags/14.png0.47...45.040.048.047.010.013.07.08.09.0143000.0
16639166391820318203243165N. Christoffersson19https://cdn.sofifa.org/players/4/19/243165.pngSwedenhttps://cdn.sofifa.org/flags/46.png0.47...42.022.015.019.010.09.09.05.012.0113000.0
16640166401820418204241638B. Worman16https://cdn.sofifa.org/players/4/19/241638.pngEnglandhttps://cdn.sofifa.org/flags/14.png0.47...41.032.013.011.06.05.010.06.013.0165000.0
16641166411820518205246268D. Walker-Rice17https://cdn.sofifa.org/players/4/19/246268.pngEnglandhttps://cdn.sofifa.org/flags/14.png0.47...46.020.025.027.014.06.014.08.09.0143000.0
16642166421820618206246269G. Nugent16https://cdn.sofifa.org/players/4/19/246269.pngEnglandhttps://cdn.sofifa.org/flags/14.png0.46...43.040.043.050.010.015.09.012.09.0165000.0
\n", "

16643 rows × 91 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 Unnamed: 0.1 Unnamed: 0.1.1 ID Name \\\n", "0 0 0 0 158023 L. Messi \n", "1 1 1 1 20801 Cristiano Ronaldo \n", "2 2 2 2 190871 Neymar Jr \n", "3 3 3 3 193080 De Gea \n", "4 4 4 4 192985 K. De Bruyne \n", "... ... ... ... ... ... \n", "16638 16638 18202 18202 238813 J. Lundstram \n", "16639 16639 18203 18203 243165 N. Christoffersson \n", "16640 16640 18204 18204 241638 B. Worman \n", "16641 16641 18205 18205 246268 D. Walker-Rice \n", "16642 16642 18206 18206 246269 G. Nugent \n", "\n", " Age Photo Nationality \\\n", "0 31 https://cdn.sofifa.org/players/4/19/158023.png Argentina \n", "1 33 https://cdn.sofifa.org/players/4/19/20801.png Portugal \n", "2 26 https://cdn.sofifa.org/players/4/19/190871.png Brazil \n", "3 27 https://cdn.sofifa.org/players/4/19/193080.png Spain \n", "4 27 https://cdn.sofifa.org/players/4/19/192985.png Belgium \n", "... ... ... ... \n", "16638 19 https://cdn.sofifa.org/players/4/19/238813.png England \n", "16639 19 https://cdn.sofifa.org/players/4/19/243165.png Sweden \n", "16640 16 https://cdn.sofifa.org/players/4/19/241638.png England \n", "16641 17 https://cdn.sofifa.org/players/4/19/246268.png England \n", "16642 16 https://cdn.sofifa.org/players/4/19/246269.png England \n", "\n", " Flag Overall ... Composure Marking \\\n", "0 https://cdn.sofifa.org/flags/52.png 0.94 ... 96.0 33.0 \n", "1 https://cdn.sofifa.org/flags/38.png 0.94 ... 95.0 28.0 \n", "2 https://cdn.sofifa.org/flags/54.png 0.92 ... 94.0 27.0 \n", "3 https://cdn.sofifa.org/flags/45.png 0.91 ... 68.0 15.0 \n", "4 https://cdn.sofifa.org/flags/7.png 0.91 ... 88.0 68.0 \n", "... ... ... ... ... ... \n", "16638 https://cdn.sofifa.org/flags/14.png 0.47 ... 45.0 40.0 \n", "16639 https://cdn.sofifa.org/flags/46.png 0.47 ... 42.0 22.0 \n", "16640 https://cdn.sofifa.org/flags/14.png 0.47 ... 41.0 32.0 \n", "16641 https://cdn.sofifa.org/flags/14.png 0.47 ... 46.0 20.0 \n", "16642 https://cdn.sofifa.org/flags/14.png 0.46 ... 43.0 40.0 \n", "\n", " StandingTackle SlidingTackle GKDiving GKHandling GKKicking \\\n", "0 28.0 26.0 6.0 11.0 15.0 \n", "1 31.0 23.0 7.0 11.0 15.0 \n", "2 24.0 33.0 9.0 9.0 15.0 \n", "3 21.0 13.0 90.0 85.0 87.0 \n", "4 58.0 51.0 15.0 13.0 5.0 \n", "... ... ... ... ... ... \n", "16638 48.0 47.0 10.0 13.0 7.0 \n", "16639 15.0 19.0 10.0 9.0 9.0 \n", "16640 13.0 11.0 6.0 5.0 10.0 \n", "16641 25.0 27.0 14.0 6.0 14.0 \n", "16642 43.0 50.0 10.0 15.0 9.0 \n", "\n", " GKPositioning GKReflexes Release Clause \n", "0 14.0 8.0 226500000.0 \n", "1 14.0 11.0 127100000.0 \n", "2 15.0 11.0 228100000.0 \n", "3 88.0 94.0 138600000.0 \n", "4 10.0 13.0 196400000.0 \n", "... ... ... ... \n", "16638 8.0 9.0 143000.0 \n", "16639 5.0 12.0 113000.0 \n", "16640 6.0 13.0 165000.0 \n", "16641 8.0 9.0 143000.0 \n", "16642 12.0 9.0 165000.0 \n", "\n", "[16643 rows x 91 columns]" ] }, "execution_count": 214, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Minimum, maksimum, średnia, mediana, odchylenie standardowe" ] }, { "cell_type": "code", "execution_count": 217, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overall zawodnika (0-1):\n", "Minimum: 0.46\n", "Maksimum: 0.94\n", "Średnia: 0.6616277113501784\n", "Mediana: 0.66\n", "Odchylenie standardowe: 0.07008236149926617\n" ] } ], "source": [ "overall = data[\"Overall\"]\n", "print(\"Overall zawodnika (0-1):\")\n", "print(f\"Minimum: {overall.min()}\")\n", "print(f\"Maksimum: {overall.max()}\")\n", "\n", "print(f\"Średnia: {overall.mean()}\")\n", "print(f\"Mediana: {overall.median()}\")\n", "print(f\"Odchylenie standardowe: {overall.std()}\")" ] }, { "cell_type": "code", "execution_count": 218, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Wiek zawodnika:\n", "Minimum: 16\n", "Maksimum: 45\n", "Średnia: 25.226221234152497\n", "Mediana: 25.0\n", "Odchylenie standardowe: 4.71658785571582\n" ] } ], "source": [ "age = data[\"Age\"]\n", "print(\"Wiek zawodnika:\")\n", "print(f\"Minimum: {age.min()}\")\n", "print(f\"Maksimum: {age.max()}\")\n", "\n", "print(f\"Średnia: {age.mean()}\")\n", "print(f\"Mediana: {age.median()}\")\n", "print(f\"Odchylenie standardowe: {age.std()}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Liczba zawodników dla poszczególnych narodowości (top 10)" ] }, { "cell_type": "code", "execution_count": 219, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 219, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data[\"Nationality\"].value_counts().head(10).plot(kind=\"bar\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Top 10 najlepszych i najgorszych drużyn względem średniego Overall" ] }, { "cell_type": "code", "execution_count": 220, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Overall
Club
Juventus0.822800
Napoli0.800417
Inter0.796190
Real Madrid0.782424
FC Barcelona0.780303
Milan0.775417
Paris Saint-Germain0.774333
Roma0.774000
Manchester United0.772424
SL Benfica0.770741
\n", "
" ], "text/plain": [ " Overall\n", "Club \n", "Juventus 0.822800\n", "Napoli 0.800417\n", "Inter 0.796190\n", "Real Madrid 0.782424\n", "FC Barcelona 0.780303\n", "Milan 0.775417\n", "Paris Saint-Germain 0.774333\n", "Roma 0.774000\n", "Manchester United 0.772424\n", "SL Benfica 0.770741" ] }, "execution_count": 220, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[[\"Club\", \"Overall\"]].groupby(\"Club\").mean().sort_values(\"Overall\", ascending=False).head(10)" ] }, { "cell_type": "code", "execution_count": 224, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Overall
Club
St. Patrick's Athletic0.577826
Cambridge United0.572593
Waterford FC0.570000
Morecambe0.569600
Crewe Alexandra0.566667
Sligo Rovers0.566316
Derry City0.555882
Bohemian FC0.550000
Limerick FC0.545263
Bray Wanderers0.536522
\n", "
" ], "text/plain": [ " Overall\n", "Club \n", "St. Patrick's Athletic 0.577826\n", "Cambridge United 0.572593\n", "Waterford FC 0.570000\n", "Morecambe 0.569600\n", "Crewe Alexandra 0.566667\n", "Sligo Rovers 0.566316\n", "Derry City 0.555882\n", "Bohemian FC 0.550000\n", "Limerick FC 0.545263\n", "Bray Wanderers 0.536522" ] }, "execution_count": 224, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[[\"Club\", \"Overall\"]].groupby(\"Club\").mean().sort_values(\"Overall\", ascending=False).tail(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Top 10 klauzul uwolnienia" ] }, { "cell_type": "code", "execution_count": 227, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "29 Isco\n", "11 T. Kroos\n", "16 H. Kane\n", "7 L. Suárez\n", "17 A. Griezmann\n", "25 K. Mbappé\n", "5 E. Hazard\n", "4 K. De Bruyne\n", "0 L. Messi\n", "2 Neymar Jr\n", "Name: Name, dtype: object" ] }, "execution_count": 227, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.sort_values(\"Release Clause\").tail(10)[\"Name\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Zależność między wiekiem a overall zawodników dla top 10 klubów" ] }, { "cell_type": "code", "execution_count": 228, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 228, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "sns.set_theme()\n", "\n", "#Wyświetlenie danych tylko dla top 10 klubów względem overall\n", "clubs = data[[\"Club\", \"Overall\"]].groupby(\"Club\", as_index=False).mean().sort_values(\"Overall\", ascending=False).head(10)[\"Club\"]\n", "\n", "data[data[\"Club\"].isin(clubs)]\n", "sns.relplot(data=data[data[\"Club\"].isin(clubs)], x=\"Overall\", y=\"Age\", hue=\"Club\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }