Sportowe/1_zaj.ipynb

100 KiB
Raw Permalink Blame History

import pandas as pd
import numpy as np
scores_data = pd.read_csv('dane/scores.csv')
scores_data
Wk Day Date Time Home Score Away Attendance Venue Referee Match Report Notes
0 1.0 Fri 2020-08-21 18:00 Zagłębie Lubin 21 Lech Poznań 3968.0 Stadion Zagłębia Lubin Bartosz Frankowski Match Report NaN
1 1.0 Sat 2020-08-22 15:00 Cracovia 21 Pogoń Szczecin 4053.0 Stadion Cracovii Paweł Raczkowski Match Report NaN
2 1.0 Sat 2020-08-22 17:30 Śląsk Wrocław 20 Piast Gliwice 5259.0 Stadion Miejski Wojciech Myć Match Report NaN
3 1.0 Sat 2020-08-22 20:00 RKS Raków 12 Legia Warsaw 1985.0 Stadion GKS-u Jarosław Przybył Match Report NaN
4 1.0 Sun 2020-08-23 12:30 Wisła Płock 11 Stal Mielec 1318.0 Stadion im. Kazimierza Górskiego Sebastian Jarzębak Match Report NaN
... ... ... ... ... ... ... ... ... ... ... ... ...
275 30.0 Sun 2021-05-16 17:30 Pogoń Szczecin 13 RKS Raków NaN Stadion Miejski im. Floriana Krygiera Sebastian Krasny Match Report NaN
276 30.0 Sun 2021-05-16 17:30 Piast Gliwice 23 Wisła Kraków NaN Stadion Miejski Mariusz Zlotek Match Report NaN
277 30.0 Sun 2021-05-16 17:30 Cracovia 01 Warta Poznań 3670.0 Stadion Cracovii Pawel Malec Match Report NaN
278 30.0 Sun 2021-05-16 17:30 Śląsk Wrocław 11 Stal Mielec NaN Stadion Miejski Tomasz Kwiatkowski Match Report NaN
279 30.0 Sun 2021-05-16 17:30 Wisła Płock 40 Zagłębie Lubin NaN Stadion im. Kazimierza Górskiego Paweł Raczkowski Match Report NaN

280 rows × 12 columns


scores_data = scores_data[['Day', 'Date', 'Score', 'Attendance', 'Home', 'Away']]

scores_data = scores_data.dropna()
len(scores_data)
45
previous_won = []
win_history = {}

for idx, row in scores_data.iterrows():
    score = row["Score"].split("")
    won = score[0] > score[1]
    previous_won.append(win_history.get(row["Home"], False))
    win_history[row["Home"]] = won
    win_history[row["Away"]] = not won

previous_won
[False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 True,
 True,
 True,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 True,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 True,
 False,
 False,
 True,
 False,
 True,
 False]
# PIA 	Piast Gliwice Simple gold crown.svg 	1
# LEG 	Legia Warszawa 	2
# LGD 	Lechia Gdańsk Simple gold cup.svg 	3
# CRA 	Cracovia 	4
# JAG 	Jagielscores_datalonia Białystok 	5
# ZLU 	Zagłębie Lubin 	6
# POG 	Pogoń Szczecin 	7
# LPO 	Lech Poznań 	8
# WKR 	Wisła Kraków 	9
# KOR 	Korona Kielce 	10
# GZA 	Górnik Zabrze 	11
# ARK 	Arka Gdynia 	12
# ŚLĄ 	Śląsk Wrocław 	13
# WPŁ 	Wisła Płock 	14 
win_table_20 = [
    "Piast Gliwice",
    "Legia Warszawa",
    "Lechia Gdańsk",
    "Cracovia",
    "Jagiellonia Białystok",
    "Zagłębie Lubin",
    "Pogoń Szczecin",
    "Lech Poznań",
    "Wisła Kraków",
    "Korona Kielce",
    "Górnik Zabrze",
    "Arka Gdynia",
    "Śląsk Wrocław",
    "Wisła Płock",
]

win_table_4 = [
    "Piast Gliwice",
    "Legia Warszawa",
    "Lechia Gdańsk",
    "Cracovia"
]
scores_data['Date'][0]
'2020-08-21'
!pip3 install wolframalpha
Defaulting to user installation because normal site-packages is not writeable
Collecting wolframalpha
  Downloading wolframalpha-5.0.0-py3-none-any.whl (7.5 kB)
Collecting jaraco.context
  Downloading jaraco.context-4.1.2-py3-none-any.whl (4.7 kB)
Collecting more-itertools
  Downloading more_itertools-8.14.0-py3-none-any.whl (52 kB)
Collecting xmltodict
  Downloading xmltodict-0.13.0-py2.py3-none-any.whl (10.0 kB)
Installing collected packages: xmltodict, more-itertools, jaraco.context, wolframalpha
Successfully installed jaraco.context-4.1.2 more-itertools-8.14.0 wolframalpha-5.0.0 xmltodict-0.13.0
WARNING: Ignoring invalid distribution -ip (c:\users\mrpol\appdata\roaming\python\python39\site-packages)
WARNING: Ignoring invalid distribution -ip (c:\users\mrpol\appdata\roaming\python\python39\site-packages)
WARNING: Ignoring invalid distribution -ip (c:\users\mrpol\appdata\roaming\python\python39\site-packages)
WARNING: Ignoring invalid distribution -ip (c:\users\mrpol\appdata\roaming\python\python39\site-packages)
WARNING: Ignoring invalid distribution -ip (c:\users\mrpol\appdata\roaming\python\python39\site-packages)
WARNING: Ignoring invalid distribution -ip (c:\users\mrpol\appdata\roaming\python\python39\site-packages)
WARNING: Ignoring invalid distribution -ip (c:\users\mrpol\appdata\roaming\python\python39\site-packages)
WARNING: Ignoring invalid distribution -ip (c:\users\mrpol\appdata\roaming\python\python39\site-packages)
WARNING: Ignoring invalid distribution -ip (c:\users\mrpol\appdata\roaming\python\python39\site-packages)
WARNING: Ignoring invalid distribution -ip (c:\users\mrpol\appdata\roaming\python\python39\site-packages)
WARNING: You are using pip version 21.1.1; however, version 22.2.2 is available.
You should consider upgrading via the 'c:\Program Files\Python39\python.exe -m pip install --upgrade pip' command.
import wolframalpha
import time
import re
def check_weather(date: str, city: str = 'Warsaw') -> int:
    api_id = '5KAEPX-EXX246XAW7'
    question = 'Weather ' + date + ' ' + city
    client = wolframalpha.Client(api_id)
    # time.sleep(1)
    res = client.query(question)
    a = str(res)
    result = re.findall(r'average: \d+ °C', a)[0]
    temp = re.search(r'\d+', result).group()
    return int(temp)
check_weather('2020-08-22')
23
scores_data['Day'].unique()
days = {
    'Mon': 1,
    'Tue': 2,
    'Wed': 3,
    'Thu': 4,
    'Fri': 5,
    'Sat': 6,
    'Sun': 7,
    'nan': 0,
}

days_num = []
for d in scores_data['Day']:
    days_num.append(days[d])
is_home_top = []
is_home_top_4 = []
for d in scores_data['Home']:
    is_home_top.append(int(d in win_table_20))
    is_home_top_4.append(int(d in win_table_4))
is_away_top = []
is_away_top_4 = []
for d in scores_data['Away']:
    is_away_top.append(int(d in win_table_20))
    is_away_top_4.append(int(d in win_table_4))
    

# weather = []
# for d in scores_data['Date'][-6:]:
#     temp = check_weather(d)
#     weather.append(temp)
attendedce = [x for x in scores_data['Attendance']]
#weather
weather = ['23', '23', '23', '23', '20', '20', '20', '19', '17', '17', '18', '18', '18', '17', '17', '17', '14', '14', '16', '16', '16', '17', '17', '18', '12', '12', '12', '12', '12', '13', '13', '15', '19', '19', '19', '19', '19', '11', '14', '14', '17', '17', '18', '14', '14']
len(weather), len(scores_data['Date'])
(45, 45)
final_data = np.array([days_num, is_home_top, is_away_top, weather, attendedce])
final_data
array([['5', '6', '6', '6', '7', '7', '7', '1', '5', '5', '6', '6', '6',
        '7', '7', '7', '5', '5', '6', '6', '6', '7', '7', '1', '5', '5',
        '6', '6', '6', '7', '7', '1', '5', '5', '6', '6', '6', '1', '5',
        '5', '6', '6', '7', '7', '7'],
       ['1', '1', '1', '0', '1', '0', '1', '0', '1', '0', '1', '1', '0',
        '1', '0', '1', '0', '1', '1', '0', '1', '1', '1', '0', '0', '1',
        '1', '1', '0', '1', '1', '1', '1', '1', '1', '1', '0', '0', '0',
        '1', '0', '1', '1', '1', '1'],
       ['1', '1', '1', '0', '0', '1', '0', '1', '0', '1', '0', '1', '0',
        '1', '1', '1', '0', '0', '0', '1', '1', '1', '1', '1', '0', '1',
        '0', '1', '1', '1', '0', '0', '0', '1', '0', '0', '1', '1', '1',
        '1', '0', '1', '1', '1', '0'],
       ['23', '23', '23', '23', '20', '20', '20', '19', '17', '17', '18',
        '18', '18', '17', '17', '17', '14', '14', '16', '16', '16', '17',
        '17', '18', '12', '12', '12', '12', '12', '13', '13', '15', '19',
        '19', '19', '19', '19', '11', '14', '14', '17', '17', '18', '14',
        '14'],
       ['3968.0', '4053.0', '5259.0', '1985.0', '1318.0', '2381.0',
        '7368.0', '5070.0', '2486.0', '3420.0', '5424.0', '5044.0',
        '10000.0', '2578.0', '4308.0', '8226.0', '5065.0', '1500.0',
        '5439.0', '1872.0', '11535.0', '1877.0', '10270.0', '1675.0',
        '4397.0', '7007.0', '4507.0', '1821.0', '11000.0', '2533.0',
        '17546.0', '2649.0', '1411.0', '12492.0', '5127.0', '4877.0',
        '5881.0', '2714.0', '1631.0', '6438.0', '2987.0', '2882.0',
        '3215.0', '4890.0', '3670.0']], dtype='<U32')
import matplotlib.pyplot as plt

weather = [float(t) for t in weather]

plt.plot(weather, attendedce, 'bo')  
[<matplotlib.lines.Line2D at 0x200ef599d00>]
r = np.corrcoef(np.array(weather), np.array(attendedce))
r
array([[ 1.        , -0.11323007],
       [-0.11323007,  1.        ]])
import matplotlib.pyplot as plt

plt.plot(days_num, attendedce, 'bo')  
[<matplotlib.lines.Line2D at 0x200ef7433a0>]
days_num = [float(x) for x in days_num]
r = np.corrcoef(np.array(days_num), np.array(attendedce))
r
array([[1.       , 0.1964238],
       [0.1964238, 1.       ]])
import matplotlib.pyplot as plt

plt.plot(is_home_top, attendedce, 'bo')  
[<matplotlib.lines.Line2D at 0x7feb0ee77a00>]
import matplotlib.pyplot as plt

plt.plot(is_home_top_4, attendedce, 'bo')  
[<matplotlib.lines.Line2D at 0x200f07a6ac0>]
plt.plot(is_away_top, attendedce, 'bo')  
[<matplotlib.lines.Line2D at 0x7feb0ee5edf0>]
plt.plot(is_away_top_4, attendedce, 'bo')  
[<matplotlib.lines.Line2D at 0x200f07dcc10>]
is_home_and_away_top = []
for i, j in zip(is_home_top, is_away_top):
    is_home_and_away_top.append(i and j)
plt.plot(is_home_and_away_top, attendedce, 'bo')  
[<matplotlib.lines.Line2D at 0x200f14b06a0>]
temp = [float(x) for x in is_home_and_away_top]
r = np.corrcoef(np.array(temp), np.array(attendedce))
r
array([[1.       , 0.1384707],
       [0.1384707, 1.       ]])
plt.plot(previous_won, attendedce, 'bo')  
[<matplotlib.lines.Line2D at 0x200f58aaa60>]