From b7316839890df748dad63fc830bc416617ae7a49 Mon Sep 17 00:00:00 2001 From: Koushik R Kirugulige Date: Mon, 12 Oct 2020 17:17:54 +0530 Subject: [PATCH] laliga 538 prediction storage copies all the la liga prediction after completion of each match day and stores in a csv https://projects.fivethirtyeight.com/soccer-predictions/la-liga/ --- laliga.py | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 laliga.py diff --git a/laliga.py b/laliga.py new file mode 100644 index 0000000..76039ca --- /dev/null +++ b/laliga.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[1]: + + +import pandas as pd +import datetime +import logging +#yesterday gets yesterday's date in MMDDYYYY format and compare if that is a laliga matchday max date and proceed to get the prediction from 538 website + +logging.basicConfig(filename="laliga.log",format='%(asctime)s %(message)s',filemode='w') + +yesterday = (datetime.date.today() - datetime.timedelta(days=1)).strftime("%m/%d/%Y") + +df_match_day = pd.read_csv('LaLigaMatchday.csv') + +logger=logging.getLogger() +logger.setLevel(logging.DEBUG) + +if yesterday not in df_match_day.Date.values: + logger.info("not an end of matchday") + exit() +# In[81]: +#else part +matchday_no = df_match_day.index[df_match_day['Date'] == yesterday].to_list()[0] +logger.info("it is end of matchday number: %d",matchday_no) +df = pd.read_html('https://projects.fivethirtyeight.com/soccer-predictions/la-liga/',attrs={'class': 'forecast-table'},header=2) +df = pd.DataFrame(df[0]) + + +# In[83]: + + +df = df[['team','win La Ligawin league']] +#df.head() + + +# In[79]: + + +import re +def clean(team_name): + #print(team_name) + #print(re.sub("pts","",team_name)) + team_name = re.sub("\ pts","",team_name) + return re.sub("[0-9]+","",team_name) + + + +# In[91]: + + +def merge_col(row): + if 'list' in str(type(row['Win League'])): + lst = list(row['Win League']) + else: + lst = [] + lst.append(int(row['Win League'])) + + lst.append(int(row['new column'])) + return lst + + + +import re +df['win La Ligawin league'] = df['win La Ligawin league'].str.replace(r'\%','') +df['win La Ligawin league'] = df['win La Ligawin league'].str.replace(r'\<','') +df = df.rename(columns={'2':'rows','win La Ligawin league':'Win League'}) +df['team'] = df['team'].apply(clean) +df = df.sort_values(by=['team']) +#df.head() + + +import glob +filename = 'la liga.csv' +ispresent = glob.glob(filename) +if not ispresent: + df.to_csv(filename,index=False,encoding='utf-8-sig') + # exit here + exit() +else: + main_df = pd.read_csv(filename) + print(main_df.head()) + +# In[88]: + + +main_df = pd.merge(main_df,df,on='team') +main_df = main_df.rename(columns = {'Win League_x':'Win League','Win League_y':'new column'}) + + + +main_df['Win League'] = main_df.apply(merge_col,axis = 1) +main_df = main_df.drop(['new column'],axis = 1) +main_df.to_csv(filename,index=False,encoding='utf-8-sig') +#main_df.head() +logger.info("Done writing to laliga.csv")