153 lines
11 KiB
Plaintext
153 lines
11 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 69,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"All data: 376884\n",
|
|
"Train size: 226125\n",
|
|
"Test size: 75384\n",
|
|
"Validate size: 75375\n",
|
|
" date home_team away_team home_score away_score tournament \\\n",
|
|
"count 41876 41876 41876 41876.000000 41876.000000 41876 \n",
|
|
"unique 15232 308 306 NaN NaN 112 \n",
|
|
"top 2012-02-29 brazil uruguay NaN NaN friendly \n",
|
|
"freq 66 570 543 NaN NaN 17136 \n",
|
|
"mean NaN NaN NaN 1.744293 1.186503 NaN \n",
|
|
"std NaN NaN NaN 1.752248 1.403053 NaN \n",
|
|
"min NaN NaN NaN 0.000000 0.000000 NaN \n",
|
|
"25% NaN NaN NaN 1.000000 0.000000 NaN \n",
|
|
"50% NaN NaN NaN 1.000000 1.000000 NaN \n",
|
|
"75% NaN NaN NaN 2.000000 2.000000 NaN \n",
|
|
"max NaN NaN NaN 31.000000 21.000000 NaN \n",
|
|
"\n",
|
|
" city country neutral \n",
|
|
"count 41876 41876 41876 \n",
|
|
"unique 2026 266 2 \n",
|
|
"top kuala lumpur united states False \n",
|
|
"freq 589 1160 31557 \n",
|
|
"mean NaN NaN NaN \n",
|
|
"std NaN NaN NaN \n",
|
|
"min NaN NaN NaN \n",
|
|
"25% NaN NaN NaN \n",
|
|
"50% NaN NaN NaN \n",
|
|
"75% NaN NaN NaN \n",
|
|
"max NaN NaN NaN \n",
|
|
"376884\n",
|
|
"\n",
|
|
" date\n",
|
|
"AxesSubplot(0.125,0.125;0.775x0.755)\n",
|
|
"\n",
|
|
" home_team\n",
|
|
"AxesSubplot(0.125,0.125;0.775x0.755)\n",
|
|
"\n",
|
|
" away_team\n",
|
|
"AxesSubplot(0.125,0.125;0.775x0.755)\n",
|
|
"\n",
|
|
" home_score\n",
|
|
"AxesSubplot(0.125,0.125;0.775x0.755)\n",
|
|
"\n",
|
|
" away_score\n",
|
|
"AxesSubplot(0.125,0.125;0.775x0.755)\n",
|
|
"\n",
|
|
" tournament\n",
|
|
"AxesSubplot(0.125,0.125;0.775x0.755)\n",
|
|
"\n",
|
|
" city\n",
|
|
"AxesSubplot(0.125,0.125;0.775x0.755)\n",
|
|
"\n",
|
|
" country\n",
|
|
"AxesSubplot(0.125,0.125;0.775x0.755)\n",
|
|
"\n",
|
|
" neutral\n",
|
|
"AxesSubplot(0.125,0.125;0.775x0.755)\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAEICAYAAABfz4NwAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAASmklEQVR4nO3df6zddX3H8eeLVrFTYfy4ENYW20mTCWxW6bouJIuz2+h0WTGD7JIpXdKkhtREM2MCZpvuRxfYokSSQYLBUQgTOtTQTNgkRWNcGHghzFKw80YQajtaAbH+AG1974/zufH0cnp/tvdcOM9H8s35nvf3+/ne9zdp87rf7+d7zk1VIUnSCf1uQJI0PxgIkiTAQJAkNQaCJAkwECRJjYEgSQJgYb8bmKnTTz+9li1b1u82JOkV5aGHHvpeVQ312vaKDYRly5YxMjLS7zYk6RUlyXeOts1bRpIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1LxiP5j2SrHsyi/2u4VXlSevfne/W5BetbxCkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSmkkDIcnrkjyY5H+S7EryN61+apJ7k3yrvZ7SNeaqJKNJdie5qKt+QZKdbdt1SdLqJya5o9UfSLLsOJyrJGkCU7lCeAl4Z1W9FVgJrEuyBrgS2FFVK4Ad7T1JzgWGgfOAdcD1SRa0Y90AbAJWtGVdq28Enq+qc4BrgWtmf2qSpOmYNBCq44ft7WvaUsB6YGurbwUubuvrgdur6qWqegIYBVYnOQs4qarur6oCbhk3ZuxYdwJrx64eJElzY0pzCEkWJHkE2A/cW1UPAGdW1T6A9npG230x8HTX8D2ttritj68fMaaqDgEvAKfN4HwkSTM0pUCoqsNVtRJYQue3/fMn2L3Xb/Y1QX2iMUceONmUZCTJyIEDBybpWpI0HdN6yqiqvg98hc69/2fabSDa6/622x5gadewJcDeVl/So37EmCQLgZOB53r8/BuralVVrRoaGppO65KkSUzlKaOhJL/c1hcBvwd8E9gObGi7bQDuauvbgeH25NByOpPHD7bbSgeTrGnzA5ePGzN2rEuA+9o8gyRpjkzl207PAra2J4VOALZV1b8nuR/YlmQj8BRwKUBV7UqyDXgMOARsrqrD7VhXADcDi4B72gJwE3BrklE6VwbDx+LkJElTN2kgVNU3gLf1qD8LrD3KmC3Alh71EeBl8w9V9SItUCRJ/eEnlSVJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKmZNBCSLE3y5SSPJ9mV5IOt/vEk303ySFve1TXmqiSjSXYnuairfkGSnW3bdUnS6icmuaPVH0iy7DicqyRpAlO5QjgEfLiq3gKsATYnObdtu7aqVrblboC2bRg4D1gHXJ9kQdv/BmATsKIt61p9I/B8VZ0DXAtcM/tTkyRNx6SBUFX7qurhtn4QeBxYPMGQ9cDtVfVSVT0BjAKrk5wFnFRV91dVAbcAF3eN2drW7wTWjl09SJLmxrTmENqtnLcBD7TSB5J8I8lnkpzSaouBp7uG7Wm1xW19fP2IMVV1CHgBOK3Hz9+UZCTJyIEDB6bTuiRpElMOhCRvAD4HfKiqfkDn9s+bgZXAPuATY7v2GF4T1Ccac2Sh6saqWlVVq4aGhqbauiRpCqYUCEleQycMbquqzwNU1TNVdbiqfg58Gljddt8DLO0avgTY2+pLetSPGJNkIXAy8NxMTkiSNDNTecoowE3A41X1ya76WV27vQd4tK1vB4bbk0PL6UweP1hV+4CDSda0Y14O3NU1ZkNbvwS4r80zSJLmyMIp7HMh8D5gZ5JHWu2jwGVJVtK5tfMk8H6AqtqVZBvwGJ0nlDZX1eE27grgZmARcE9boBM4tyYZpXNlMDybk5IkTd+kgVBVX6P3Pf67JxizBdjSoz4CnN+j/iJw6WS9SJKOHz+pLEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElSYyBIkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJmEIgJFma5MtJHk+yK8kHW/3UJPcm+VZ7PaVrzFVJRpPsTnJRV/2CJDvbtuuSpNVPTHJHqz+QZNlxOFdJ0gSmcoVwCPhwVb0FWANsTnIucCWwo6pWADvae9q2YeA8YB1wfZIF7Vg3AJuAFW1Z1+obgeer6hzgWuCaY3BukqRpmDQQqmpfVT3c1g8CjwOLgfXA1rbbVuDitr4euL2qXqqqJ4BRYHWSs4CTqur+qirglnFjxo51J7B27OpBkjQ3pjWH0G7lvA14ADizqvZBJzSAM9pui4Gnu4btabXFbX18/YgxVXUIeAE4bTq9SZJmZ8qBkOQNwOeAD1XVDybatUetJqhPNGZ8D5uSjCQZOXDgwGQtS5KmYUqBkOQ1dMLgtqr6fCs/024D0V73t/oeYGnX8CXA3lZf0qN+xJgkC4GTgefG91FVN1bVqqpaNTQ0NJXWJUlTNJWnjALcBDxeVZ/s2rQd2NDWNwB3ddWH25NDy+lMHj/YbisdTLKmHfPycWPGjnUJcF+bZ5AkzZGFU9jnQuB9wM4kj7TaR4GrgW1JNgJPAZcCVNWuJNuAx+g8obS5qg63cVcANwOLgHvaAp3AuTXJKJ0rg+HZnZYkabomDYSq+hq97/EDrD3KmC3Alh71EeD8HvUXaYEiSeoPP6ksSQIMBElSYyBIkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJzaSBkOQzSfYnebSr9vEk303ySFve1bXtqiSjSXYnuairfkGSnW3bdUnS6icmuaPVH0iy7BifoyRpCqZyhXAzsK5H/dqqWtmWuwGSnAsMA+e1MdcnWdD2vwHYBKxoy9gxNwLPV9U5wLXANTM8F0nSLEwaCFX1VeC5KR5vPXB7Vb1UVU8Ao8DqJGcBJ1XV/VVVwC3AxV1jtrb1O4G1Y1cPkqS5M5s5hA8k+Ua7pXRKqy0Gnu7aZ0+rLW7r4+tHjKmqQ8ALwGmz6EuSNAMzDYQbgDcDK4F9wCdavddv9jVBfaIxL5NkU5KRJCMHDhyYVsOSpInNKBCq6pmqOlxVPwc+Daxum/YAS7t2XQLsbfUlPepHjEmyEDiZo9yiqqobq2pVVa0aGhqaSeuSpKOYUSC0OYEx7wHGnkDaDgy3J4eW05k8frCq9gEHk6xp8wOXA3d1jdnQ1i8B7mvzDJKkObRwsh2SfBZ4B3B6kj3Ax4B3JFlJ59bOk8D7AapqV5JtwGPAIWBzVR1uh7qCzhNLi4B72gJwE3BrklE6VwbDx+C8JEnTNGkgVNVlPco3TbD/FmBLj/oIcH6P+ovApZP1IUk6vvyksiQJMBAkSY2BIEkCDARJUmMgSJKAKTxlJOnVadmVX+x3C68qT1797n63MGteIUiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElSYyBIkgADQZLUTBoIST6TZH+SR7tqpya5N8m32uspXduuSjKaZHeSi7rqFyTZ2bZdlyStfmKSO1r9gSTLjvE5SpKmYCpXCDcD68bVrgR2VNUKYEd7T5JzgWHgvDbm+iQL2pgbgE3AiraMHXMj8HxVnQNcC1wz05ORJM3cpIFQVV8FnhtXXg9sbetbgYu76rdX1UtV9QQwCqxOchZwUlXdX1UF3DJuzNix7gTWjl09SJLmzkznEM6sqn0A7fWMVl8MPN21355WW9zWx9ePGFNVh4AXgNNm2JckaYaO9aRyr9/sa4L6RGNefvBkU5KRJCMHDhyYYYuSpF5mGgjPtNtAtNf9rb4HWNq13xJgb6sv6VE/YkyShcDJvPwWFQBVdWNVraqqVUNDQzNsXZLUy0wDYTuwoa1vAO7qqg+3J4eW05k8frDdVjqYZE2bH7h83JixY10C3NfmGSRJc2jhZDsk+SzwDuD0JHuAjwFXA9uSbASeAi4FqKpdSbYBjwGHgM1Vdbgd6go6TywtAu5pC8BNwK1JRulcGQwfkzOTJE3LpIFQVZcdZdPao+y/BdjSoz4CnN+j/iItUCRJ/eMnlSVJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKmZVSAkeTLJziSPJBlptVOT3JvkW+31lK79r0oymmR3kou66he044wmuS5JZtOXJGn6jsUVwu9W1cqqWtXeXwnsqKoVwI72niTnAsPAecA64PokC9qYG4BNwIq2rDsGfUmSpuF43DJaD2xt61uBi7vqt1fVS1X1BDAKrE5yFnBSVd1fVQXc0jVGkjRHZhsIBXwpyUNJNrXamVW1D6C9ntHqi4Gnu8buabXFbX18XZI0hxbOcvyFVbU3yRnAvUm+OcG+veYFaoL6yw/QCZ1NAGefffZ0e5UkTWBWVwhVtbe97ge+AKwGnmm3gWiv+9vue4ClXcOXAHtbfUmPeq+fd2NVraqqVUNDQ7NpXZI0zowDIcnrk7xxbB34A+BRYDuwoe22AbirrW8HhpOcmGQ5ncnjB9ttpYNJ1rSniy7vGiNJmiOzuWV0JvCF9oToQuBfq+o/knwd2JZkI/AUcClAVe1Ksg14DDgEbK6qw+1YVwA3A4uAe9oiSZpDMw6Eqvo28NYe9WeBtUcZswXY0qM+Apw/014kSbPnJ5UlSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpmTeBkGRdkt1JRpNc2e9+JGnQzItASLIA+GfgD4FzgcuSnNvfriRpsMyLQABWA6NV9e2q+ilwO7C+zz1J0kCZL4GwGHi66/2eVpMkzZGF/W6gSY9avWynZBOwqb39YZLdx7WrwXI68L1+NzGZXNPvDtQH/ts8tt50tA3zJRD2AEu73i8B9o7fqapuBG6cq6YGSZKRqlrV7z6k8fy3OXfmyy2jrwMrkixP8lpgGNje554kaaDMiyuEqjqU5APAfwILgM9U1a4+tyVJA2VeBAJAVd0N3N3vPgaYt+I0X/lvc46k6mVzt5KkATRf5hAkSX1mIEiSAANB0jyTjvcm+ev2/uwkq/vd1yAwEAZYkl9K8ldJPt3er0jyR/3uSwPveuC3gcva+4N0vutMx5mBMNj+BXiJzn8+6HxA8O/7144EwG9V1WbgRYCqeh54bX9bGgwGwmB7c1X9I/AzgKr6Cb2/RkSaSz9r34BcAEmGgJ/3t6XBYCAMtp8mWcQv/uO9mc4Vg9RP1wFfAM5IsgX4GvAP/W1pMPg5hAGW5PeBv6TzNyi+BFwI/HlVfaWffUlJfg1YS+eKdUdVPd7nlgaCgTDgkpwGrKHzH++/q2ref6ukXt2SnN2rXlVPzXUvg8ZAGGBJLgQeqaofJXkv8HbgU1X1nT63pgGWZCed25gBXgcsB3ZX1Xl9bWwAOIcw2G4AfpzkrcBHgO8At/S3JQ26qvr1qvqN9rqCzl9U/Fq/+xoEBsJgO1SdS8T1wHVV9SngjX3uSTpCVT0M/Ga/+xgE8+bbTtUXB5NcBbwX+J32qN9r+tyTBlySv+h6ewKdW5kH+tTOQPEKYbD9KZ3HTDdW1f/R+TvW/9TfliTe2LWcCHyRzlWsjjMnlSXNG+0q9eqq+ki/exlE3jIaQEkO0j6MNn4TUFV10hy3JJFkYfvriW/vdy+DykAYQFXlxLHmowfpzBc8kmQ78G/Aj8Y2VtXn+9XYoDAQRJIz6DzvDfgBIPXdqcCzwDv5xecRCjAQjjMDYYAl+WPgE8CvAPuBNwGPA34ASP1wRnvC6FF+EQRjnOycAz5lNNj+js7XVvxvVS2n890x/9XfljTAFgBvaMsbu9bHFh1nXiEMtp9V1bNJTkhyQlV9Ock1/W5KA2tfVf1tv5sYZAbCYPt+kjcAXwVuS7IfONTnnjS4/FscfebnEAZQkrOr6qkkrwd+QufW4Z8BJwO3VdWzfW1QAynJqVX1XL/7GGQGwgBK8nBVvb2tf66q/qTfPUnqPyeVB1P3pfmv9q0LSfOKgTCY6ijrkgaYt4wGUJLDdD4BGmAR8OOxTfjVFdLAMhAkSYC3jCRJjYEgSQIMBElSYyBIkgADQZLU/D9Olzuv8JG2MwAAAABJRU5ErkJggg==\n",
|
|
"text/plain": [
|
|
"<Figure size 432x288 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {
|
|
"needs_background": "light"
|
|
},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from sklearn import preprocessing\n",
|
|
"import kaggle\n",
|
|
"\n",
|
|
"kaggle.api.authenticate()\n",
|
|
"\n",
|
|
"kaggle.api.dataset_download_files('martj42/international-football-results-from-1872-to-2017', path='.', unzip=True)\n",
|
|
"\n",
|
|
"results = pd.read_csv('results.csv')\n",
|
|
"\n",
|
|
"#brak wierszy z NaN\n",
|
|
"results.dropna()\n",
|
|
"\n",
|
|
"#normalizacja itp\n",
|
|
"for collumn in ['home_team', 'away_team', 'tournament', 'city', 'country']:\n",
|
|
" results[collumn] = results[collumn].str.lower()\n",
|
|
" \n",
|
|
"# Podział zbioru 6:1:1\n",
|
|
"train, test = train_test_split(results, test_size= 1 - 0.6)\n",
|
|
"\n",
|
|
"valid, test = train_test_split(test, test_size=0.5) \n",
|
|
"\n",
|
|
"print(\"All data: \", results.size)\n",
|
|
"print(\"Train size: \", train.size)\n",
|
|
"print(\"Test size: \", test.size)\n",
|
|
"print(\"Validate size: \", valid.size)\n",
|
|
"print(results.describe(include='all'))\n",
|
|
"\n",
|
|
"# sprawdzenie czy cały dataset oraz podział na podzbiory jest równy\n",
|
|
"print(train.size+test.size+valid.size)\n",
|
|
"\n",
|
|
"for col in results.columns:\n",
|
|
" column = results[col].value_counts().plot(kind=\"bar\")\n",
|
|
" print(\"\\n\", col)\n",
|
|
" print(column)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.5"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|