17 KiB
17 KiB
import pandas as pd
import numpy as np
scores_data = pd.read_csv('dane/scores.csv')
scores_data
Wk | Day | Date | Time | Home | Score | Away | Attendance | Venue | Referee | Match Report | Notes | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1.0 | Fri | 2020-08-21 | 18:00 | Zagłębie Lubin | 2–1 | Lech Poznań | 3968.0 | Stadion Zagłębia Lubin | Bartosz Frankowski | Match Report | NaN |
1 | 1.0 | Sat | 2020-08-22 | 15:00 | Cracovia | 2–1 | Pogoń Szczecin | 4053.0 | Stadion Cracovii | Paweł Raczkowski | Match Report | NaN |
2 | 1.0 | Sat | 2020-08-22 | 17:30 | Śląsk Wrocław | 2–0 | Piast Gliwice | 5259.0 | Stadion Miejski | Wojciech Myć | Match Report | NaN |
3 | 1.0 | Sat | 2020-08-22 | 20:00 | RKS Raków | 1–2 | Legia Warsaw | 1985.0 | Stadion GKS-u | Jarosław Przybył | Match Report | NaN |
4 | 1.0 | Sun | 2020-08-23 | 12:30 | Wisła Płock | 1–1 | Stal Mielec | 1318.0 | Stadion im. Kazimierza Górskiego | Sebastian Jarzębak | Match Report | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
275 | 30.0 | Sun | 2021-05-16 | 17:30 | Pogoń Szczecin | 1–3 | RKS Raków | NaN | Stadion Miejski im. Floriana Krygiera | Sebastian Krasny | Match Report | NaN |
276 | 30.0 | Sun | 2021-05-16 | 17:30 | Piast Gliwice | 2–3 | Wisła Kraków | NaN | Stadion Miejski | Mariusz Zlotek | Match Report | NaN |
277 | 30.0 | Sun | 2021-05-16 | 17:30 | Cracovia | 0–1 | Warta Poznań | 3670.0 | Stadion Cracovii | Pawel Malec | Match Report | NaN |
278 | 30.0 | Sun | 2021-05-16 | 17:30 | Śląsk Wrocław | 1–1 | Stal Mielec | NaN | Stadion Miejski | Tomasz Kwiatkowski | Match Report | NaN |
279 | 30.0 | Sun | 2021-05-16 | 17:30 | Wisła Płock | 4–0 | Zagłębie Lubin | NaN | Stadion im. Kazimierza Górskiego | Paweł Raczkowski | Match Report | NaN |
280 rows × 12 columns
scores_data = scores_data[['Day', 'Date', 'Attendance', 'Home', 'Away']]
scores_data = scores_data.dropna()
len(scores_data)
45
# PIA Piast Gliwice Simple gold crown.svg 1
# LEG Legia Warszawa 2
# LGD Lechia Gdańsk Simple gold cup.svg 3
# CRA Cracovia 4
# JAG Jagielscores_datalonia Białystok 5
# ZLU Zagłębie Lubin 6
# POG Pogoń Szczecin 7
# LPO Lech Poznań 8
# WKR Wisła Kraków 9
# KOR Korona Kielce 10
# GZA Górnik Zabrze 11
# ARK Arka Gdynia 12
# ŚLĄ Śląsk Wrocław 13
# WPŁ Wisła Płock 14
win_table_20 = [
"Piast Gliwice",
"Legia Warszawa",
"Lechia Gdańsk",
"Cracovia",
"Jagiellonia Białystok",
"Zagłębie Lubin",
"Pogoń Szczecin",
"Lech Poznań",
"Wisła Kraków",
"Korona Kielce",
"Górnik Zabrze",
"Arka Gdynia",
"Śląsk Wrocław",
"Wisła Płock",
]
scores_data['Date'][0]
'2020-08-21'
!pip3 install wolframalpha
WARNING: pip is being invoked by an old script wrapper. This will fail in a future version of pip. Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue. To avoid this problem you can invoke Python with '-m pip' instead of running pip directly. Defaulting to user installation because normal site-packages is not writeable Collecting wolframalpha Downloading wolframalpha-5.0.0-py3-none-any.whl (7.5 kB) Collecting jaraco.context Downloading jaraco.context-4.1.2-py3-none-any.whl (4.7 kB) Requirement already satisfied: xmltodict in /home/mikolaj/.local/lib/python3.8/site-packages (from wolframalpha) (0.12.0) Collecting more-itertools Downloading more_itertools-8.14.0-py3-none-any.whl (52 kB) [2K [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.2/52.2 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m [?25hInstalling collected packages: more-itertools, jaraco.context, wolframalpha Successfully installed jaraco.context-4.1.2 more-itertools-8.14.0 wolframalpha-5.0.0 [1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.2[0m[39;49m -> [0m[32;49m22.3[0m [1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
import wolframalpha
import time
import re
def check_weather(date: str, city: str = 'Warsaw') -> int:
api_id = '5KAEPX-EXX246XAW7'
question = 'Weather ' + date + ' ' + city
client = wolframalpha.Client(api_id)
# time.sleep(1)
res = client.query(question)
a = str(res)
result = re.findall(r'average: \d+ °C', a)[0]
temp = re.search(r'\d+', result).group()
return int(temp)
check_weather('2020-08-22')
'23'
scores_data['Day'].unique()
days = {
'Mon': 1,
'Tue': 2,
'Wed': 3,
'Thu': 4,
'Fri': 5,
'Sat': 6,
'Sun': 7,
'nan': 0,
}
final_data = np.array([])
days_num = []
for d in scores_data['Day']:
days_num.append(days[d])
is_home_top = []
for d in scores_data['Home']:
is_home_top.append(int(d in win_table_20))
is_away_top = []
for d in scores_data['Away']:
is_away_top.append(int(d in win_table_20))
weather = []
for d in scores_data['Date'][-6:]:
temp = check_weather(d)
weather.append(temp)
attendedce = [x for x in scores_data['Attendance']]
weather
['14', '17', '17', '18', '14', '14']
weather = ['23', '23', '23', '23', '20', '20', '20', '19', '17', '17', '18', '18', '18', '17', '17', '17', '14', '14', '16', '16', '16', '17', '17', '18', '12', '12', '12', '12', '12', '13', '13', '15', '19', '19', '19', '19', '19', '11', '14', '14', '17', '17', '18', '14', '14']
len(weather), len(scores_data['Date'])
(45, 45)