SystemyRozmyte/data_filters.py

157 lines
5.4 KiB
Python

import pandas as pd
from fuzzy import *
def save_to_csv(filename, dataframe):
dataframe.to_csv(filename, mode='a', index=False, header=not pd.DataFrame().append(dataframe).empty)
def split_to_parts(dataframe, part_size):
for i in range(0, len(dataframe), part_size):
yield dataframe.iloc[i:i + part_size]
def przetwarzaj_co_50_rekordow(plik_wejsciowy, plik_wyjsciowy):
dataframe_wejsciowe = pd.read_csv(plik_wejsciowy)
def generateTrainingData(dataframe):
columns = ['season','date','home_team','away_team','result_full','home_passes','away_passes',
'home_possession','away_possession','home_shots','away_shots']
return dataframe[columns]
def generateFuzzyLogicData(dataframe):
columns = ['season','date','home_team','away_team','result_full','c_home_form_5m','c_away_form_5m',#,'c_home_passes','c_away_passes',
# 'c_home_possession','c_away_possession','c_home_shots','c_away_shots',
'c_home_diff_5m', 'c_away_diff_5m',"c_home_form_5s",
'c_away_form_5s','c_home_diff_5s','c_away_diff_5s'
, 'c_home_aggression_5m',
'c_away_aggression_5m', 'c_away_shots_5m','c_away_shots_5m',
'c_away_shots_5btw', 'c_away_shots_5btw', 'c_away_defence_5m',
'c_away_defence_5m', 'c_away_defence_5btw', 'c_away_defence_5btw',
'c_home_passing_5m', 'c_away_passing_5m', 'c_home_passing_5btw',
'c_away_passing_5btw', 'c_away_aggression_5btw', 'c_away_aggression_5btw'
#'c_home_aggression_season', 'c_away_aggression_season',
# 'c_home_form_season','c_away_form_season',
# 'c_home_diff_season', 'c_away_diff_season'
]
return dataframe[columns]
def last5Matches(season, teamA, data, df):
subset = df[((df['season'] == season) & ((df['home_team'] == teamA) | (df['away_team'] == teamA)))]
before_given_date = subset[pd.to_datetime(subset['date']) < pd.to_datetime(data)]
before_given_date = before_given_date.sort_values(by='date', ascending=False)
last_before_date = before_given_date.head(5)
return last_before_date, "_5m"
def last5MatchesBtwTeams(teamA, teamB, data, df):
subset = df[(((df['home_team'] == teamA) | (df['away_team'] == teamA)) & ((df['home_team'] == teamB) | (df['away_team'] == teamB)))]
before_given_date = subset[pd.to_datetime(subset['date']) < pd.to_datetime(data)]
before_given_date = before_given_date.sort_values(by='date', ascending=False)
last_before_date = before_given_date.head(5)
return last_before_date, "_5btw"
def seasonMatches(season, teamA, data, df):
# Wybierz rekordy dla danej pary drużyn i sezonu
subset = df[((df['season'] == season) & ((df['home_team'] == teamA) | (df['away_team'] == teamA)))]
# Filtruj dane, aby zawierały te przed daną datą
before_given_date = subset[pd.to_datetime(subset['date']) < pd.to_datetime(data)]
# Posortuj wg daty w odwrotnej kolejności
before_given_date = before_given_date.sort_values(by='date', ascending=False)
return before_given_date, "_s"
def getResult(score,teamHome):
x,y = score.split('-')
x = int(x)
y = int(y)
if (x > y and teamHome == True) or (x < y and teamHome == False):
return "win"
elif x == y:
return "draw"
else:
return "loss"
# def calculateAggression(matches, team):
# aggression = 0
# for index, row in matches.iterrows():
# if team == row['home_team']:
# yellow_cards = row['home_yellow_cards']
# red_cards = row['home_red_cards']
# else:
# yellow_cards = row['away_yellow_cards']
# red_cards = row['away_red_cards']
# aggression_result = calculateFuzzyAggression(yellow_cards, red_cards)
# #print(aggression_result['aggression'])
# aggression = aggression + aggression_result['aggression']
# if matches.shape[0] != 0:
# aggression_avg = aggression / matches.shape[0]
# else:
# aggression_avg = 0
# return aggression_avg
def calculatePoints(matches, team):
points = 0
for index, row in matches.iterrows():
if team == row['home_team']:
teamHome = True
else:
teamHome = False
x = getResult(row['result_full'], teamHome)
#print(x)
if x == "win":
points = points + 3
elif x == "draw":
points = points + 1
if matches.shape[0] != 0:
points_avg = points / matches.shape[0]
else:
points_avg = 0
return points_avg
def calculateGoalDifference(matches, team):
goal_diff = 0
for index, row in matches.iterrows():
if team == row['home_team']:
teamHome = True
else:
teamHome = False
x,y = row['result_full'].split('-')
x = int(x)
y = int(y)
if teamHome:
goal_diff = goal_diff + (x-y)
else:
goal_diff = goal_diff + (y-x)
return goal_diff
def calculateColumn(matches, team, column_name):
result = 0
for index, row in matches.iterrows():
if team == row['home_team']:
column = row[column_name]
else:
column = row[column_name]
result = result + column
if matches.shape[0] != 0:
result_avg = result / matches.shape[0]
else:
result_avg = 0
return result_avg