added prototype prediction model
This commit is contained in:
parent
7a16f22192
commit
2fff850afc
81
data_filters.py
Normal file
81
data_filters.py
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from simpful import *
|
||||||
|
|
||||||
|
def generateTrainingData(dataframe):
|
||||||
|
columns = ['season','date','home_team','away_team','result_full','home_passes','away_passes',
|
||||||
|
'home_possession','away_possession','home_shots','away_shots']
|
||||||
|
return dataframe[columns]
|
||||||
|
|
||||||
|
|
||||||
|
def generateFuzzyLogicData(dataframe):
|
||||||
|
columns = ['season','date','home_team','away_team','result_full','c_home_passes','c_away_passes',
|
||||||
|
'c_home_possession','c_away_possession','c_home_shots','c_away_shots','c_home_form','c_away_form',
|
||||||
|
'c_home_diff', 'c_away_diff']
|
||||||
|
return dataframe[columns]
|
||||||
|
|
||||||
|
def last5Matches(season, teamA, data, df):
|
||||||
|
# Wybierz rekordy dla danej pary drużyn i sezonu
|
||||||
|
subset = df[((df['season'] == season) & ((df['home_team'] == teamA) | (df['away_team'] == teamA)))]
|
||||||
|
|
||||||
|
# Filtruj dane, aby zawierały te przed daną datą
|
||||||
|
before_given_date = subset[pd.to_datetime(subset['date']) < pd.to_datetime(data)]
|
||||||
|
|
||||||
|
# Posortuj wg daty w odwrotnej kolejności
|
||||||
|
before_given_date = before_given_date.sort_values(by='date', ascending=False)
|
||||||
|
|
||||||
|
# Wybierz 5 ostatnich przed daną datą
|
||||||
|
last_before_date = before_given_date.head(5)
|
||||||
|
|
||||||
|
return last_before_date
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def getResult(score,teamHome):
|
||||||
|
x,y = score.split('-')
|
||||||
|
x = int(x)
|
||||||
|
y = int(y)
|
||||||
|
|
||||||
|
if (x > y and teamHome == True) or (x < y and teamHome == False):
|
||||||
|
return "win"
|
||||||
|
elif x == y:
|
||||||
|
return "draw"
|
||||||
|
else:
|
||||||
|
return "loss"
|
||||||
|
|
||||||
|
|
||||||
|
def calculatePoints(matches, team):
|
||||||
|
points = 0
|
||||||
|
for index, row in matches.iterrows():
|
||||||
|
if team == row['home_team']:
|
||||||
|
teamHome = True
|
||||||
|
else:
|
||||||
|
teamHome = False
|
||||||
|
x = getResult(row['result_full'], teamHome)
|
||||||
|
#print(x)
|
||||||
|
if x == "win":
|
||||||
|
points = points + 3
|
||||||
|
elif x == "draw":
|
||||||
|
points = points + 1
|
||||||
|
if matches.shape[0] != 0:
|
||||||
|
points_avg = points / matches.shape[0]
|
||||||
|
else:
|
||||||
|
points_avg = 0
|
||||||
|
return points_avg
|
||||||
|
|
||||||
|
|
||||||
|
def calculateGoalDifference(matches, team):
|
||||||
|
goal_diff = 0
|
||||||
|
for index, row in matches.iterrows():
|
||||||
|
if team == row['home_team']:
|
||||||
|
teamHome = True
|
||||||
|
else:
|
||||||
|
teamHome = False
|
||||||
|
x,y = row['result_full'].split('-')
|
||||||
|
x = int(x)
|
||||||
|
y = int(y)
|
||||||
|
if teamHome:
|
||||||
|
goal_diff = goal_diff + (x-y)
|
||||||
|
else:
|
||||||
|
goal_diff = goal_diff + (y-x)
|
||||||
|
return goal_diff
|
128
main.py
128
main.py
@ -1,7 +1,14 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from simpful import *
|
from simpful import *
|
||||||
|
from rules import *
|
||||||
|
from data_filters import *
|
||||||
|
from sklearn.ensemble import RandomForestClassifier
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
from sklearn.preprocessing import LabelEncoder
|
||||||
|
from sklearn.metrics import classification_report
|
||||||
|
|
||||||
|
|
||||||
df = pd.read_csv('df_full_premierleague.csv')
|
|
||||||
|
|
||||||
|
|
||||||
# Ostatnie 5 spotkań
|
# Ostatnie 5 spotkań
|
||||||
@ -12,95 +19,64 @@ df = pd.read_csv('df_full_premierleague.csv')
|
|||||||
#Podania ponizej 300-400 słabo powyżej 500 dużo
|
#Podania ponizej 300-400 słabo powyżej 500 dużo
|
||||||
|
|
||||||
|
|
||||||
def generateTrainingData(dataframe):
|
|
||||||
columns = ['season','date','home_team','away_team','result_full','home_passes','away_passes',
|
|
||||||
'home_possession','away_possession','home_shots','away_shots']
|
|
||||||
return dataframe[columns]
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
df = pd.read_csv('df_full_premierleague.csv')
|
||||||
|
|
||||||
def last5Matches(sezon, druzynaA, data, df):
|
result = last5Matches('10/11', 'Stoke City', '2010-10-02', df)
|
||||||
# Wybierz rekordy dla danej pary drużyn i sezonu
|
#print(result.to_markdown())
|
||||||
subset = df[((df['season'] == sezon) & ((df['home_team'] == druzynaA) | (df['away_team'] == druzynaA)))]
|
#print(result)
|
||||||
|
result = last5Matches('10/11', 'Blackburn Rovers', '2010-10-02', df)
|
||||||
|
#print(result.to_markdown())
|
||||||
|
#print(result)
|
||||||
|
|
||||||
# Filtruj dane, aby zawierały te przed daną datą
|
print(calculatePoints(result,'Blackburn Rovers'))
|
||||||
przed_dana_data = subset[pd.to_datetime(subset['date']) < pd.to_datetime(data)]
|
print(calculateGoalDifference(result, 'Blackburn Rovers'))
|
||||||
|
|
||||||
# Posortuj wg daty w odwrotnej kolejności
|
df = generateTrainingData(df)
|
||||||
przed_dana_data = przed_dana_data.sort_values(by='date', ascending=False)
|
df = add_column(df, categorize_passes, "c_away_passes", "away_passes")
|
||||||
|
df = add_column(df, categorize_passes, "c_home_passes", "home_passes")
|
||||||
|
|
||||||
# Wybierz 5 ostatnich przed daną datą
|
df = add_column(df, categorize_possesion, "c_away_possession", "away_possession")
|
||||||
ostatnie_przed_data = przed_dana_data.head(5)
|
df = add_column(df, categorize_possesion, "c_home_possession", "home_possession")
|
||||||
|
|
||||||
return ostatnie_przed_data
|
df = add_column(df, categorize_shots, "c_away_shots", "away_shots")
|
||||||
|
df = add_column(df, categorize_shots, "c_home_shots", "home_shots")
|
||||||
|
print(df.columns)
|
||||||
|
|
||||||
|
df = add_column(df, get_points_home(df), "c_home_form")
|
||||||
|
df = add_column(df, get_points_away(df), "c_away_form")
|
||||||
|
|
||||||
|
df = add_column(df, get_diff_home(df), "c_home_diff")
|
||||||
|
df = add_column(df, get_diff_away(df), "c_away_diff")
|
||||||
|
|
||||||
|
df = generateFuzzyLogicData(df)
|
||||||
|
|
||||||
def getResult(score,teamHome):
|
label_encoder = LabelEncoder()
|
||||||
x,y = score.split('-')
|
df['season'] = label_encoder.fit_transform(df['season'])
|
||||||
x = int(x)
|
df['c_home_result'] = get_result_list(df,True)
|
||||||
y = int(y)
|
df['c_away_result'] = get_result_list(df,True)
|
||||||
|
temp = df[['home_team', 'away_team']].stack()
|
||||||
|
temp[:] = temp.factorize()[0]
|
||||||
|
df[['home_team', 'away_team']] = temp.unstack()
|
||||||
|
X = df.drop(['result_full', 'date', 'c_home_result', 'c_away_result'], axis=1)
|
||||||
|
y = df['c_home_result']
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||||
|
|
||||||
if (x > y and teamHome == True) or (x < y and teamHome == False):
|
model = RandomForestClassifier(n_estimators=100, random_state=42)
|
||||||
return "win"
|
model.fit(X_train, y_train)
|
||||||
elif x == y:
|
|
||||||
return "draw"
|
|
||||||
else:
|
|
||||||
return "loss"
|
|
||||||
|
|
||||||
|
y_pred = model.predict(X_test)
|
||||||
|
|
||||||
def calculatePoints(matches, team):
|
accuracy = accuracy_score(y_test, y_pred)
|
||||||
points = 0
|
print(f'Dokładność modelu: {accuracy}')
|
||||||
for index, row in matches.iterrows():
|
|
||||||
if team == row['home_team']:
|
|
||||||
teamHome = True
|
|
||||||
else:
|
|
||||||
teamHome = False
|
|
||||||
x = getResult(row['result_full'], teamHome)
|
|
||||||
print(x)
|
|
||||||
if x == "win":
|
|
||||||
points = points + 3
|
|
||||||
elif x == "draw":
|
|
||||||
points = points + 1
|
|
||||||
return points
|
|
||||||
|
|
||||||
|
accuracy = accuracy_score(y_test, y_pred)
|
||||||
|
print(f'Dokładność modelu: {accuracy}')
|
||||||
|
print(classification_report(y_test, y_pred))
|
||||||
|
|
||||||
def calculateGoalDifference(matches, team):
|
result = last5Matches('10/11', 'Manchester United', '2010-12-16', df)
|
||||||
goal_diff = 0
|
print(calculatePoints(result,'Manchester United'))
|
||||||
for index, row in matches.iterrows():
|
print(calculateGoalDifference(result, 'Manchester United'))
|
||||||
if team == row['home_team']:
|
|
||||||
teamHome = True
|
|
||||||
else:
|
|
||||||
teamHome = False
|
|
||||||
x,y = row['result_full'].split('-')
|
|
||||||
x = int(x)
|
|
||||||
y = int(y)
|
|
||||||
if teamHome:
|
|
||||||
goal_diff = goal_diff + (x-y)
|
|
||||||
else:
|
|
||||||
goal_diff = goal_diff + (y-x)
|
|
||||||
return goal_diff
|
|
||||||
|
|
||||||
def categorize_passes(pass_count):
|
|
||||||
if pass_count < 400:
|
|
||||||
return 0 #słabo
|
|
||||||
elif 400 <= pass_count <= 500:
|
|
||||||
return 1 #średnio
|
|
||||||
else:
|
|
||||||
return 2 #dużo
|
|
||||||
|
|
||||||
wynik = last5Matches('10/11', 'Stoke City', '2010-10-02', df)
|
|
||||||
#print(wynik.to_markdown())
|
|
||||||
print(wynik)
|
|
||||||
#wynik = last5Matches('10/11', 'Blackburn Rovers', '2010-10-02', df)
|
|
||||||
#print(wynik.to_markdown())
|
|
||||||
#print(wynik)
|
|
||||||
|
|
||||||
print(calculatePoints(wynik,'Stoke City'))
|
|
||||||
print(calculateGoalDifference(wynik, 'Stoke City'))
|
|
||||||
|
|
||||||
df = generateTrainingData(df)
|
|
||||||
print(df)
|
|
||||||
|
|
||||||
|
|
||||||
|
108
rules.py
108
rules.py
@ -1,5 +1,7 @@
|
|||||||
import simpful
|
import simpful
|
||||||
|
from data_filters import *
|
||||||
|
import pandas as pd
|
||||||
|
'''
|
||||||
def kategoryzuj_strzaly(ilosc_strzalow):
|
def kategoryzuj_strzaly(ilosc_strzalow):
|
||||||
FS = FuzzySystem()
|
FS = FuzzySystem()
|
||||||
TLV = AutoTriangle(3, terms=['mało', 'średnio', 'dużo'], universe_of_discourse=[0, 25])
|
TLV = AutoTriangle(3, terms=['mało', 'średnio', 'dużo'], universe_of_discourse=[0, 25])
|
||||||
@ -39,3 +41,107 @@ def kategorie_strzalow(druzyna, sezon, data, df):
|
|||||||
ostatnie_spotkania['cat_shots'] = shots
|
ostatnie_spotkania['cat_shots'] = shots
|
||||||
|
|
||||||
return ostatnie_spotkania
|
return ostatnie_spotkania
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
def categorize_shots(shots):
|
||||||
|
if shots >= 12:
|
||||||
|
return 2
|
||||||
|
elif shots <= 6:
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
return 1
|
||||||
|
|
||||||
|
def categorize_passes(pass_count):
|
||||||
|
if pass_count < 400:
|
||||||
|
return 0 #słabo
|
||||||
|
elif 400 <= pass_count <= 500:
|
||||||
|
return 1 #średnio
|
||||||
|
else:
|
||||||
|
return 2 #dużo
|
||||||
|
|
||||||
|
def categorize_possesion(shots):
|
||||||
|
if shots >= 56:
|
||||||
|
return 2
|
||||||
|
elif shots <= 40:
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
return 1
|
||||||
|
|
||||||
|
def categorize_points(data, row, teamHome):
|
||||||
|
if teamHome:
|
||||||
|
data_5 = last5Matches(row['season'], row['home_team'], row['date'], data)
|
||||||
|
points = calculatePoints(data_5,row['home_team'])
|
||||||
|
else:
|
||||||
|
data_5 = last5Matches(row['season'], row['away_team'], row['date'], data)
|
||||||
|
points = calculatePoints(data_5,row['away_team'])
|
||||||
|
if points <=1:
|
||||||
|
return 0
|
||||||
|
elif points >=2:
|
||||||
|
return 2
|
||||||
|
else:
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
def get_points_home(data):
|
||||||
|
points = []
|
||||||
|
for index, row in data.iterrows():
|
||||||
|
points.append(categorize_points(data, row, True))
|
||||||
|
return points
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_points_away(data):
|
||||||
|
points = []
|
||||||
|
for index, row in data.iterrows():
|
||||||
|
points.append(categorize_points(data, row, False))
|
||||||
|
return points
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def categorize_diff(data, row, teamHome):
|
||||||
|
if teamHome:
|
||||||
|
data_5 = last5Matches(row['season'], row['home_team'], row['date'], data)
|
||||||
|
diff = calculateGoalDifference(data_5,row['home_team'])
|
||||||
|
else:
|
||||||
|
data_5 = last5Matches(row['season'], row['away_team'], row['date'], data)
|
||||||
|
diff = calculateGoalDifference(data_5,row['away_team'])
|
||||||
|
if diff <=0:
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
def get_diff_home(data):
|
||||||
|
points = []
|
||||||
|
for index, row in data.iterrows():
|
||||||
|
points.append(categorize_diff(data, row, True))
|
||||||
|
return points
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_diff_away(data):
|
||||||
|
points = []
|
||||||
|
for index, row in data.iterrows():
|
||||||
|
points.append(categorize_diff(data, row, False))
|
||||||
|
return points
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def add_column(data_frame, transform_function, new_column, existing_column=None):
|
||||||
|
if existing_column != None:
|
||||||
|
new_column_values = data_frame[existing_column].apply(transform_function)
|
||||||
|
data_frame[new_column] = new_column_values
|
||||||
|
else:
|
||||||
|
new_column_values = transform_function
|
||||||
|
data_frame[new_column] = new_column_values
|
||||||
|
return data_frame
|
||||||
|
|
||||||
|
def get_result_list(df, home_team):
|
||||||
|
results = []
|
||||||
|
for score in df['result_full']:
|
||||||
|
results.append(getResult(score,home_team))
|
||||||
|
return results
|
||||||
|
Loading…
Reference in New Issue
Block a user