Sportowe/1_zaj.ipynb

546 lines
17 KiB
Plaintext
Raw Normal View History

2022-10-17 15:15:14 +02:00
{
"cells": [
{
"cell_type": "code",
2022-10-17 18:30:39 +02:00
"execution_count": 1,
2022-10-17 15:15:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
2022-10-17 18:30:39 +02:00
"execution_count": 2,
2022-10-17 15:15:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
2022-10-17 18:30:39 +02:00
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Wk</th>\n",
" <th>Day</th>\n",
" <th>Date</th>\n",
" <th>Time</th>\n",
" <th>Home</th>\n",
" <th>Score</th>\n",
" <th>Away</th>\n",
" <th>Attendance</th>\n",
" <th>Venue</th>\n",
" <th>Referee</th>\n",
" <th>Match Report</th>\n",
" <th>Notes</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.0</td>\n",
" <td>Fri</td>\n",
" <td>2020-08-21</td>\n",
" <td>18:00</td>\n",
" <td>Zagłębie Lubin</td>\n",
" <td>21</td>\n",
" <td>Lech Poznań</td>\n",
" <td>3968.0</td>\n",
" <td>Stadion Zagłębia Lubin</td>\n",
" <td>Bartosz Frankowski</td>\n",
" <td>Match Report</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>Sat</td>\n",
" <td>2020-08-22</td>\n",
" <td>15:00</td>\n",
" <td>Cracovia</td>\n",
" <td>21</td>\n",
" <td>Pogoń Szczecin</td>\n",
" <td>4053.0</td>\n",
" <td>Stadion Cracovii</td>\n",
" <td>Paweł Raczkowski</td>\n",
" <td>Match Report</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.0</td>\n",
" <td>Sat</td>\n",
" <td>2020-08-22</td>\n",
" <td>17:30</td>\n",
" <td>Śląsk Wrocław</td>\n",
" <td>20</td>\n",
" <td>Piast Gliwice</td>\n",
" <td>5259.0</td>\n",
" <td>Stadion Miejski</td>\n",
" <td>Wojciech Myć</td>\n",
" <td>Match Report</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1.0</td>\n",
" <td>Sat</td>\n",
" <td>2020-08-22</td>\n",
" <td>20:00</td>\n",
" <td>RKS Raków</td>\n",
" <td>12</td>\n",
" <td>Legia Warsaw</td>\n",
" <td>1985.0</td>\n",
" <td>Stadion GKS-u</td>\n",
" <td>Jarosław Przybył</td>\n",
" <td>Match Report</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.0</td>\n",
" <td>Sun</td>\n",
" <td>2020-08-23</td>\n",
" <td>12:30</td>\n",
" <td>Wisła Płock</td>\n",
" <td>11</td>\n",
" <td>Stal Mielec</td>\n",
" <td>1318.0</td>\n",
" <td>Stadion im. Kazimierza Górskiego</td>\n",
" <td>Sebastian Jarzębak</td>\n",
" <td>Match Report</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>275</th>\n",
" <td>30.0</td>\n",
" <td>Sun</td>\n",
" <td>2021-05-16</td>\n",
" <td>17:30</td>\n",
" <td>Pogoń Szczecin</td>\n",
" <td>13</td>\n",
" <td>RKS Raków</td>\n",
" <td>NaN</td>\n",
" <td>Stadion Miejski im. Floriana Krygiera</td>\n",
" <td>Sebastian Krasny</td>\n",
" <td>Match Report</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>276</th>\n",
" <td>30.0</td>\n",
" <td>Sun</td>\n",
" <td>2021-05-16</td>\n",
" <td>17:30</td>\n",
" <td>Piast Gliwice</td>\n",
" <td>23</td>\n",
" <td>Wisła Kraków</td>\n",
" <td>NaN</td>\n",
" <td>Stadion Miejski</td>\n",
" <td>Mariusz Zlotek</td>\n",
" <td>Match Report</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>277</th>\n",
" <td>30.0</td>\n",
" <td>Sun</td>\n",
" <td>2021-05-16</td>\n",
" <td>17:30</td>\n",
" <td>Cracovia</td>\n",
" <td>01</td>\n",
" <td>Warta Poznań</td>\n",
" <td>3670.0</td>\n",
" <td>Stadion Cracovii</td>\n",
" <td>Pawel Malec</td>\n",
" <td>Match Report</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>278</th>\n",
" <td>30.0</td>\n",
" <td>Sun</td>\n",
" <td>2021-05-16</td>\n",
" <td>17:30</td>\n",
" <td>Śląsk Wrocław</td>\n",
" <td>11</td>\n",
" <td>Stal Mielec</td>\n",
" <td>NaN</td>\n",
" <td>Stadion Miejski</td>\n",
" <td>Tomasz Kwiatkowski</td>\n",
" <td>Match Report</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>279</th>\n",
" <td>30.0</td>\n",
" <td>Sun</td>\n",
" <td>2021-05-16</td>\n",
" <td>17:30</td>\n",
" <td>Wisła Płock</td>\n",
" <td>40</td>\n",
" <td>Zagłębie Lubin</td>\n",
" <td>NaN</td>\n",
" <td>Stadion im. Kazimierza Górskiego</td>\n",
" <td>Paweł Raczkowski</td>\n",
" <td>Match Report</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>280 rows × 12 columns</p>\n",
"</div>"
],
2022-10-17 15:15:14 +02:00
"text/plain": [
" Wk Day Date Time Home Score Away \\\n",
"0 1.0 Fri 2020-08-21 18:00 Zagłębie Lubin 21 Lech Poznań \n",
"1 1.0 Sat 2020-08-22 15:00 Cracovia 21 Pogoń Szczecin \n",
"2 1.0 Sat 2020-08-22 17:30 Śląsk Wrocław 20 Piast Gliwice \n",
"3 1.0 Sat 2020-08-22 20:00 RKS Raków 12 Legia Warsaw \n",
"4 1.0 Sun 2020-08-23 12:30 Wisła Płock 11 Stal Mielec \n",
".. ... ... ... ... ... ... ... \n",
"275 30.0 Sun 2021-05-16 17:30 Pogoń Szczecin 13 RKS Raków \n",
"276 30.0 Sun 2021-05-16 17:30 Piast Gliwice 23 Wisła Kraków \n",
"277 30.0 Sun 2021-05-16 17:30 Cracovia 01 Warta Poznań \n",
"278 30.0 Sun 2021-05-16 17:30 Śląsk Wrocław 11 Stal Mielec \n",
"279 30.0 Sun 2021-05-16 17:30 Wisła Płock 40 Zagłębie Lubin \n",
"\n",
" Attendance Venue Referee \\\n",
"0 3968.0 Stadion Zagłębia Lubin Bartosz Frankowski \n",
"1 4053.0 Stadion Cracovii Paweł Raczkowski \n",
"2 5259.0 Stadion Miejski Wojciech Myć \n",
"3 1985.0 Stadion GKS-u Jarosław Przybył \n",
"4 1318.0 Stadion im. Kazimierza Górskiego Sebastian Jarzębak \n",
".. ... ... ... \n",
"275 NaN Stadion Miejski im. Floriana Krygiera Sebastian Krasny \n",
"276 NaN Stadion Miejski Mariusz Zlotek \n",
"277 3670.0 Stadion Cracovii Pawel Malec \n",
"278 NaN Stadion Miejski Tomasz Kwiatkowski \n",
"279 NaN Stadion im. Kazimierza Górskiego Paweł Raczkowski \n",
"\n",
" Match Report Notes \n",
"0 Match Report NaN \n",
"1 Match Report NaN \n",
"2 Match Report NaN \n",
"3 Match Report NaN \n",
"4 Match Report NaN \n",
".. ... ... \n",
"275 Match Report NaN \n",
"276 Match Report NaN \n",
"277 Match Report NaN \n",
"278 Match Report NaN \n",
"279 Match Report NaN \n",
"\n",
"[280 rows x 12 columns]"
2022-10-17 18:30:39 +02:00
]
2022-10-17 15:15:14 +02:00
},
2022-10-17 18:30:39 +02:00
"execution_count": 2,
2022-10-17 15:15:14 +02:00
"metadata": {},
2022-10-17 18:30:39 +02:00
"output_type": "execute_result"
2022-10-17 15:15:14 +02:00
}
],
"source": [
"scores_data = pd.read_csv('dane/scores.csv')\n",
"scores_data"
]
},
{
"cell_type": "code",
2022-10-17 18:30:39 +02:00
"execution_count": 3,
2022-10-17 15:15:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"45"
]
},
2022-10-17 18:30:39 +02:00
"execution_count": 3,
2022-10-17 15:15:14 +02:00
"metadata": {},
2022-10-17 18:30:39 +02:00
"output_type": "execute_result"
2022-10-17 15:15:14 +02:00
}
],
"source": [
"\n",
"scores_data = scores_data[['Day', 'Date', 'Attendance', 'Home', 'Away']]\n",
"\n",
"scores_data = scores_data.dropna()\n",
"len(scores_data)"
]
},
{
"cell_type": "code",
2022-10-17 18:30:39 +02:00
"execution_count": 5,
2022-10-17 15:15:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"# PIA \tPiast Gliwice Simple gold crown.svg \t1\n",
"# LEG \tLegia Warszawa \t2\n",
"# LGD \tLechia Gdańsk Simple gold cup.svg \t3\n",
"# CRA \tCracovia \t4\n",
"# JAG \tJagielscores_datalonia Białystok \t5\n",
"# ZLU \tZagłębie Lubin \t6\n",
"# POG \tPogoń Szczecin \t7\n",
"# LPO \tLech Poznań \t8\n",
"# WKR \tWisła Kraków \t9\n",
"# KOR \tKorona Kielce \t10\n",
"# GZA \tGórnik Zabrze \t11\n",
"# ARK \tArka Gdynia \t12\n",
"# ŚLĄ \tŚląsk Wrocław \t13\n",
"# WPŁ \tWisła Płock \t14 \n",
"win_table_20 = [\n",
" \"Piast Gliwice\",\n",
" \"Legia Warszawa\",\n",
" \"Lechia Gdańsk\",\n",
" \"Cracovia\",\n",
" \"Jagiellonia Białystok\",\n",
" \"Zagłębie Lubin\",\n",
" \"Pogoń Szczecin\",\n",
" \"Lech Poznań\",\n",
" \"Wisła Kraków\",\n",
" \"Korona Kielce\",\n",
" \"Górnik Zabrze\",\n",
" \"Arka Gdynia\",\n",
" \"Śląsk Wrocław\",\n",
" \"Wisła Płock\",\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'2020-08-21'"
]
},
2022-10-17 18:30:39 +02:00
"execution_count": 6,
2022-10-17 15:15:14 +02:00
"metadata": {},
2022-10-17 18:30:39 +02:00
"output_type": "execute_result"
2022-10-17 15:15:14 +02:00
}
],
"source": [
"scores_data['Date'][0]"
]
},
{
"cell_type": "code",
2022-10-17 18:30:39 +02:00
"execution_count": 9,
2022-10-17 15:15:14 +02:00
"metadata": {},
"outputs": [
{
"name": "stdout",
2022-10-17 18:30:39 +02:00
"output_type": "stream",
2022-10-17 15:15:14 +02:00
"text": [
2022-10-17 18:30:39 +02:00
"WARNING: pip is being invoked by an old script wrapper. This will fail in a future version of pip.\n",
"Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.\n",
"To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.\n",
2022-10-17 15:15:14 +02:00
"Defaulting to user installation because normal site-packages is not writeable\n",
"Collecting wolframalpha\n",
" Downloading wolframalpha-5.0.0-py3-none-any.whl (7.5 kB)\n",
"Collecting jaraco.context\n",
" Downloading jaraco.context-4.1.2-py3-none-any.whl (4.7 kB)\n",
2022-10-17 18:30:39 +02:00
"Requirement already satisfied: xmltodict in /home/mikolaj/.local/lib/python3.8/site-packages (from wolframalpha) (0.12.0)\n",
"Collecting more-itertools\n",
" Downloading more_itertools-8.14.0-py3-none-any.whl (52 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m52.2/52.2 kB\u001b[0m \u001b[31m1.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hInstalling collected packages: more-itertools, jaraco.context, wolframalpha\n",
"Successfully installed jaraco.context-4.1.2 more-itertools-8.14.0 wolframalpha-5.0.0\n",
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m22.3\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
2022-10-17 15:15:14 +02:00
]
}
],
"source": [
2022-10-17 18:30:39 +02:00
"!pip3 install wolframalpha"
2022-10-17 15:15:14 +02:00
]
},
{
"cell_type": "code",
2022-10-17 18:30:39 +02:00
"execution_count": 11,
2022-10-17 15:15:14 +02:00
"metadata": {},
2022-10-17 18:30:39 +02:00
"outputs": [],
"source": [
"import wolframalpha"
]
2022-10-17 15:15:14 +02:00
},
{
"cell_type": "code",
2022-10-17 18:30:39 +02:00
"execution_count": 12,
2022-10-17 15:15:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2022-10-17 18:30:39 +02:00
"'23'"
2022-10-17 15:15:14 +02:00
]
},
2022-10-17 18:30:39 +02:00
"execution_count": 12,
2022-10-17 15:15:14 +02:00
"metadata": {},
2022-10-17 18:30:39 +02:00
"output_type": "execute_result"
2022-10-17 15:15:14 +02:00
}
],
"source": [
"import time\n",
"import re\n",
2022-10-17 18:30:39 +02:00
"def check_weather(date: str, city: str = 'Warsaw') -> int:\n",
2022-10-17 15:15:14 +02:00
" api_id = '5KAEPX-EXX246XAW7'\n",
" question = 'Weather ' + date + ' ' + city\n",
" client = wolframalpha.Client(api_id)\n",
" # time.sleep(1)\n",
" res = client.query(question)\n",
" a = str(res)\n",
" result = re.findall(r'average: \\d+ °C', a)[0]\n",
2022-10-17 18:30:39 +02:00
" temp = re.search(r'\\d+', result).group()\n",
" return int(temp)\n",
2022-10-17 15:15:14 +02:00
"check_weather('2020-08-22')"
]
},
{
"cell_type": "code",
2022-10-17 18:30:39 +02:00
"execution_count": 13,
2022-10-17 15:15:14 +02:00
"metadata": {},
"outputs": [],
"source": [
"scores_data['Day'].unique()\n",
"days = {\n",
" 'Mon': 1,\n",
" 'Tue': 2,\n",
" 'Wed': 3,\n",
" 'Thu': 4,\n",
" 'Fri': 5,\n",
" 'Sat': 6,\n",
" 'Sun': 7,\n",
" 'nan': 0,\n",
"}"
]
},
{
"cell_type": "code",
2022-10-17 18:30:39 +02:00
"execution_count": 24,
2022-10-17 15:15:14 +02:00
"metadata": {},
2022-10-17 18:30:39 +02:00
"outputs": [
{
"data": {
"text/plain": [
"['14', '17', '17', '18', '14', '14']"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
2022-10-17 15:15:14 +02:00
"source": [
"final_data = np.array([])\n",
"days_num = []\n",
"for d in scores_data['Day']:\n",
" days_num.append(days[d])\n",
"is_home_top = []\n",
"for d in scores_data['Home']:\n",
" is_home_top.append(int(d in win_table_20))\n",
"is_away_top = []\n",
"for d in scores_data['Away']:\n",
" is_away_top.append(int(d in win_table_20))\n",
2022-10-17 18:30:39 +02:00
" \n",
2022-10-17 15:15:14 +02:00
"\n",
"weather = []\n",
2022-10-17 18:30:39 +02:00
"for d in scores_data['Date'][-6:]:\n",
" temp = check_weather(d)\n",
" weather.append(temp)\n",
"attendedce = [x for x in scores_data['Attendance']]\n",
"weather"
2022-10-17 15:15:14 +02:00
]
},
{
"cell_type": "code",
2022-10-17 18:30:39 +02:00
"execution_count": 25,
2022-10-17 15:15:14 +02:00
"metadata": {},
"outputs": [],
2022-10-17 18:30:39 +02:00
"source": [
"weather = ['23', '23', '23', '23', '20', '20', '20', '19', '17', '17', '18', '18', '18', '17', '17', '17', '14', '14', '16', '16', '16', '17', '17', '18', '12', '12', '12', '12', '12', '13', '13', '15', '19', '19', '19', '19', '19', '11', '14', '14', '17', '17', '18', '14', '14']"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(45, 45)"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(weather), len(scores_data['Date'])"
]
2022-10-17 15:15:14 +02:00
}
2022-10-17 18:30:39 +02:00
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.8.10 64-bit",
"metadata": {
"interpreter": {
"hash": "767d51c1340bd893661ea55ea3124f6de3c7a262a8b4abca0554b478b1e2ff90"
}
},
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10-final"
},
"orig_nbformat": 2
},
"nbformat": 4,
"nbformat_minor": 2
}