13 KiB
13 KiB
import pandas as pd
import numpy as np
scores_data = pd.read_csv('dane/scores.csv')
scores_data
Wk | Day | Date | Time | Home | Score | Away | Attendance | Venue | Referee | Match Report | Notes | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1.0 | Fri | 2020-08-21 | 18:00 | Zagłębie Lubin | 2–1 | Lech Poznań | 3968.0 | Stadion Zagłębia Lubin | Bartosz Frankowski | Match Report | NaN |
1 | 1.0 | Sat | 2020-08-22 | 15:00 | Cracovia | 2–1 | Pogoń Szczecin | 4053.0 | Stadion Cracovii | Paweł Raczkowski | Match Report | NaN |
2 | 1.0 | Sat | 2020-08-22 | 17:30 | Śląsk Wrocław | 2–0 | Piast Gliwice | 5259.0 | Stadion Miejski | Wojciech Myć | Match Report | NaN |
3 | 1.0 | Sat | 2020-08-22 | 20:00 | RKS Raków | 1–2 | Legia Warsaw | 1985.0 | Stadion GKS-u | Jarosław Przybył | Match Report | NaN |
4 | 1.0 | Sun | 2020-08-23 | 12:30 | Wisła Płock | 1–1 | Stal Mielec | 1318.0 | Stadion im. Kazimierza Górskiego | Sebastian Jarzębak | Match Report | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
275 | 30.0 | Sun | 2021-05-16 | 17:30 | Pogoń Szczecin | 1–3 | RKS Raków | NaN | Stadion Miejski im. Floriana Krygiera | Sebastian Krasny | Match Report | NaN |
276 | 30.0 | Sun | 2021-05-16 | 17:30 | Piast Gliwice | 2–3 | Wisła Kraków | NaN | Stadion Miejski | Mariusz Zlotek | Match Report | NaN |
277 | 30.0 | Sun | 2021-05-16 | 17:30 | Cracovia | 0–1 | Warta Poznań | 3670.0 | Stadion Cracovii | Pawel Malec | Match Report | NaN |
278 | 30.0 | Sun | 2021-05-16 | 17:30 | Śląsk Wrocław | 1–1 | Stal Mielec | NaN | Stadion Miejski | Tomasz Kwiatkowski | Match Report | NaN |
279 | 30.0 | Sun | 2021-05-16 | 17:30 | Wisła Płock | 4–0 | Zagłębie Lubin | NaN | Stadion im. Kazimierza Górskiego | Paweł Raczkowski | Match Report | NaN |
280 rows × 12 columns
scores_data = scores_data[['Day', 'Date', 'Attendance', 'Home', 'Away']]
scores_data = scores_data.dropna()
len(scores_data)
45
# PIA Piast Gliwice Simple gold crown.svg 1
# LEG Legia Warszawa 2
# LGD Lechia Gdańsk Simple gold cup.svg 3
# CRA Cracovia 4
# JAG Jagielscores_datalonia Białystok 5
# ZLU Zagłębie Lubin 6
# POG Pogoń Szczecin 7
# LPO Lech Poznań 8
# WKR Wisła Kraków 9
# KOR Korona Kielce 10
# GZA Górnik Zabrze 11
# ARK Arka Gdynia 12
# ŚLĄ Śląsk Wrocław 13
# WPŁ Wisła Płock 14
win_table_20 = [
"Piast Gliwice",
"Legia Warszawa",
"Lechia Gdańsk",
"Cracovia",
"Jagiellonia Białystok",
"Zagłębie Lubin",
"Pogoń Szczecin",
"Lech Poznań",
"Wisła Kraków",
"Korona Kielce",
"Górnik Zabrze",
"Arka Gdynia",
"Śląsk Wrocław",
"Wisła Płock",
]
scores_data['Date'][0]
'2020-08-21'
!pip install wolframalpha
Defaulting to user installation because normal site-packages is not writeable Collecting wolframalpha Downloading wolframalpha-5.0.0-py3-none-any.whl (7.5 kB) Collecting xmltodict Downloading xmltodict-0.13.0-py2.py3-none-any.whl (10.0 kB) Requirement already satisfied: more-itertools in /usr/lib/python3/dist-packages (from wolframalpha) (4.2.0) Collecting jaraco.context Downloading jaraco.context-4.1.2-py3-none-any.whl (4.7 kB) Installing collected packages: xmltodict, jaraco.context, wolframalpha Successfully installed jaraco.context-4.1.2 wolframalpha-5.0.0 xmltodict-0.13.0 [33mWARNING: You are using pip version 21.2.4; however, version 22.3 is available. You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m
import wolframalpha
import time
import re
def check_weather(date: str, city: str = 'Warsaw'):
api_id = '5KAEPX-EXX246XAW7'
question = 'Weather ' + date + ' ' + city
client = wolframalpha.Client(api_id)
# time.sleep(1)
res = client.query(question)
a = str(res)
result = re.findall(r'average: \d+ °C', a)[0]
return re.search(r'\d+', result).group()
check_weather('2020-08-22')
'7'
scores_data['Day'].unique()
days = {
'Mon': 1,
'Tue': 2,
'Wed': 3,
'Thu': 4,
'Fri': 5,
'Sat': 6,
'Sun': 7,
'nan': 0,
}
final_data = np.array([])
days_num = []
for d in scores_data['Day']:
days_num.append(days[d])
is_home_top = []
for d in scores_data['Home']:
is_home_top.append(int(d in win_table_20))
is_away_top = []
for d in scores_data['Away']:
is_away_top.append(int(d in win_table_20))
weather = []
for d in scores_data['Date']:
is_away_top.append(int(d in win_table_20))