260 lines
15 KiB
Python
260 lines
15 KiB
Python
import pandas as pd
|
|
from simpful import *
|
|
from rules import *
|
|
from data_filters import *
|
|
from fuzzy import *
|
|
from sklearn.ensemble import RandomForestClassifier
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.metrics import accuracy_score
|
|
from sklearn.preprocessing import LabelEncoder
|
|
from sklearn.metrics import classification_report
|
|
from sklearn.ensemble import GradientBoostingClassifier
|
|
|
|
|
|
# Ostatnie 5 spotkań
|
|
#Forma: 0-6 punktow = słaba, średnia 6-10, dobra 10-15 punktow
|
|
#Bilans bramek ujemny dodatni
|
|
#Strzały 6- mało pomiędzy średnio 12 - dużo
|
|
#Posiadanie piłki słabe 30-40, średnie = 40-55, dobre = 56-64
|
|
#Podania ponizej 300-400 słabo powyżej 500 dużo
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
df = pd.read_csv('df_parts.csv')
|
|
|
|
|
|
'''
|
|
|
|
|
|
df = pd.read_csv('df_full_premierleague.csv')
|
|
result = last5Matches('10/11', 'Stoke City', '2010-10-02', df)[0]
|
|
#print(result.to_markdown())
|
|
#print(result)
|
|
result = last5Matches('10/11', 'Blackburn Rovers', '2010-10-02', df)[0]
|
|
#print(result.to_markdown())
|
|
#print(result)
|
|
|
|
print(calculatePoints(result,'Blackburn Rovers'))
|
|
print(calculateGoalDifference(result, 'Blackburn Rovers'))
|
|
|
|
|
|
# df = generateTrainingData(df)
|
|
# df = add_column(df, categorize_passes, "c_away_passes", "away_passes")
|
|
# df = add_column(df, categorize_passes, "c_home_passes", "home_passes")
|
|
|
|
# df = add_column(df, categorize_possesion, "c_away_possession", "away_possession")
|
|
# df = add_column(df, categorize_possesion, "c_home_possession", "home_possession")
|
|
|
|
# df = add_column(df, categorize_shots, "c_away_shots", "away_shots")
|
|
# df = add_column(df, categorize_shots, "c_home_shots", "home_shots")
|
|
# print(df.columns)
|
|
###############################################################################################
|
|
# df = add_column(df, getColumnMethod(df, True, 'home_yellow_cards', seasonMatches), "c_home_yellow_cards_s")
|
|
# df = add_column(df, getColumnMethod(df, True, 'away_yellow_cards', seasonMatches), "c_away_yellow_cards_s")
|
|
# df = add_column(df, getColumnMethod(df, True, 'home_red_cards', seasonMatches), "c_home_red_cards_s")
|
|
# df = add_column(df, getColumnMethod(df, True, 'away_red_cards', seasonMatches), "c_away_red_cards_s")
|
|
df = df.sort_values(by='date', ascending=False)
|
|
|
|
df = add_column(df, getColumnMethod(df, True, 'home_yellow_cards', last5Matches), "c_home_yellow_cards_5m")
|
|
df = add_column(df, getColumnMethod(df, False, 'away_yellow_cards', last5Matches), "c_away_yellow_cards_5m")
|
|
df = add_column(df, getColumnMethod(df, True, 'home_red_cards', last5Matches), "c_home_red_cards_5m")
|
|
df = add_column(df, getColumnMethod(df, False, 'away_red_cards', last5Matches), "c_away_red_cards_5m")
|
|
|
|
df = add_column(df, getColumnMethod5Btw(df, True, 'home_yellow_cards', last5MatchesBtwTeams),
|
|
"c_home_yellow_cards_5btw")
|
|
df = add_column(df, getColumnMethod5Btw(df, False, 'away_yellow_cards', last5MatchesBtwTeams),
|
|
"c_away_yellow_cards_5btw")
|
|
df = add_column(df, getColumnMethod5Btw(df, True, 'home_red_cards', last5MatchesBtwTeams), "c_home_red_cards_5btw")
|
|
df = add_column(df, getColumnMethod5Btw(df, False, 'away_red_cards', last5MatchesBtwTeams), "c_away_red_cards_5btw")
|
|
|
|
|
|
|
|
###################################################################################################################
|
|
# df = add_column(df, getColumnMethod(df, True, 'home_shots', seasonMatches), "c_home_shots_s")
|
|
# df = add_column(df, getColumnMethod(df, True, 'away_shots', seasonMatches), "c_away_shots_s")
|
|
# df = add_column(df, getColumnMethod(df, True, 'home_shots_on_target', seasonMatches), "c_home_shots_on_target_s")
|
|
# df = add_column(df, getColumnMethod(df, True, 'away_shots_on_target', seasonMatches), "c_away_shots_on_target_s")
|
|
|
|
df = add_column(df, getColumnMethod(df, True, 'home_shots', last5Matches), "c_home_shots_5m")
|
|
df = add_column(df, getColumnMethod(df, False, 'away_shots', last5Matches), "c_away_shots_5m")
|
|
df = add_column(df, getColumnMethod(df, True, 'home_shots_on_target', last5Matches), "c_home_shots_on_target_5m")
|
|
df = add_column(df, getColumnMethod(df, False, 'away_shots_on_target', last5Matches), "c_away_shots_on_target_5m")
|
|
|
|
df = add_column(df, getColumnMethod5Btw(df, True, 'home_shots', last5MatchesBtwTeams), "c_home_shots")
|
|
df = add_column(df, getColumnMethod5Btw(df, False, 'away_shots', last5MatchesBtwTeams), "c_away_shots")
|
|
df = add_column(df, getColumnMethod5Btw(df, True, 'home_shots_on_target', last5MatchesBtwTeams),
|
|
"c_home_shots_on_target_5btw")
|
|
df = add_column(df, getColumnMethod5Btw(df, False, 'away_shots_on_target', last5MatchesBtwTeams),
|
|
"c_away_shots_on_target_5btw")
|
|
|
|
|
|
###################################################################################################################################
|
|
# df = add_column(df, getColumnMethod(df, True, 'home_tackles', seasonMatches), "c_home_tackles_s")
|
|
# df = add_column(df, getColumnMethod(df, True, 'away_tackles', seasonMatches), "c_away_tackles_s")
|
|
# df = add_column(df, getColumnMethod(df, True, 'home_clearances', seasonMatches), "c_home_clearances_s")
|
|
# df = add_column(df, getColumnMethod(df, True, 'away_clearances', seasonMatches), "c_away_clearances_s")
|
|
|
|
df = add_column(df, getColumnMethod(df, True, 'home_tackles', last5Matches), "c_home_tackles_5m")
|
|
df = add_column(df, getColumnMethod(df, False, 'away_tackles', last5Matches), "c_away_tackles_5m")
|
|
df = add_column(df, getColumnMethod(df, True, 'home_clearances', last5Matches), "c_home_clearances_5m")
|
|
df = add_column(df, getColumnMethod(df, False, 'away_clearances', last5Matches), "c_away_clearances_5m")
|
|
|
|
df = add_column(df, getColumnMethod5Btw(df, True, 'home_tackles', last5MatchesBtwTeams), "c_home_tackles_5btw")
|
|
df = add_column(df, getColumnMethod5Btw(df, False, 'away_tackles', last5MatchesBtwTeams), "c_away_tackles_5btw")
|
|
df = add_column(df, getColumnMethod5Btw(df, True, 'home_clearances', last5MatchesBtwTeams),
|
|
"c_home_clearances_5btw")
|
|
df = add_column(df, getColumnMethod5Btw(df, False, 'away_clearances', last5MatchesBtwTeams),
|
|
"c_away_clearances_5btw")
|
|
|
|
|
|
|
|
####################################################################################################################################
|
|
# df = add_column(df, getColumnMethod(df, True, 'home_passes', seasonMatches), "c_home_passes_s")
|
|
# df = add_column(df, getColumnMethod(df, True, 'away_passes', seasonMatches), "c_away_passes_s")
|
|
# df = add_column(df, getColumnMethod(df, True, 'home_possession', seasonMatches), "c_home_possession_s")
|
|
# df = add_column(df, getColumnMethod(df, True, 'away_possession', seasonMatches), "c_away_possession_s")
|
|
|
|
df = add_column(df, getColumnMethod(df, True, 'home_passes', last5Matches), "c_home_passes_5m")
|
|
df = add_column(df, getColumnMethod(df, False, 'away_passes', last5Matches), "c_away_passes_5m")
|
|
df = add_column(df, getColumnMethod(df, True, 'home_possession', last5Matches), "c_home_possession_5m")
|
|
df = add_column(df, getColumnMethod(df, False, 'away_possession', last5Matches), "c_away_possession_5m")
|
|
|
|
df = add_column(df, getColumnMethod5Btw(df, True, 'home_passes', last5MatchesBtwTeams), "c_home_passes_5btw")
|
|
df = add_column(df, getColumnMethod5Btw(df, False, 'away_passes', last5MatchesBtwTeams), "c_away_passes_5btw")
|
|
df = add_column(df, getColumnMethod5Btw(df, True, 'home_possession', last5MatchesBtwTeams),
|
|
"c_home_possession_5btw")
|
|
df = add_column(df, getColumnMethod5Btw(df, False, 'away_possession', last5MatchesBtwTeams),
|
|
"c_away_possession_5btw")
|
|
|
|
# TU
|
|
|
|
|
|
##########################################################################################################################################
|
|
|
|
df = add_column(df, get_method(df, True, categorize_points, last5Matches), "c_home_form_5m")
|
|
df = add_column(df, get_method(df, False, categorize_points, last5Matches), "c_away_form_5m")
|
|
|
|
df = add_column(df, get_method(df, True, categorize_diff, seasonMatches), "c_home_diff_5m")
|
|
df = add_column(df, get_method(df, False, categorize_diff, seasonMatches), "c_away_diff_5m")
|
|
|
|
df = add_column(df, get_method(df, True, categorize_points_Btw, last5MatchesBtwTeams), "c_home_form_5btw")
|
|
df = add_column(df, get_method(df, False, categorize_points_Btw, last5MatchesBtwTeams), "c_away_form_5btw")
|
|
|
|
df = add_column(df, get_method(df, True, categorize_diff_Btw, last5MatchesBtwTeams), "c_home_diff_5btw")
|
|
df = add_column(df, get_method(df, False, categorize_diff_Btw, last5MatchesBtwTeams), "c_away_diff_5btw")
|
|
|
|
df = add_column(df, get_method(df, True, categorize_points, seasonMatches), "c_home_form_5s")
|
|
df = add_column(df, get_method(df, False, categorize_points, seasonMatches), "c_away_form_5s")
|
|
|
|
df = add_column(df, get_method(df, True, categorize_diff, seasonMatches), "c_home_diff_5s")
|
|
df = add_column(df, get_method(df, False, categorize_diff, seasonMatches), "c_away_diff_5s")
|
|
|
|
df.to_csv('df.csv', index=False)
|
|
#TU sie zapisuje zbior
|
|
|
|
part_size = 50
|
|
for part in split_to_parts(df, part_size):
|
|
|
|
|
|
part = add_column(part,
|
|
getFuzzyMethod(part, calculateFuzzyAggression, True, "c_home_yellow_cards_5m", "c_home_red_cards_5m"),
|
|
"c_home_aggression_5m")
|
|
part = add_column(part, getFuzzyMethod(part, calculateFuzzyAggression, False, "c_away_yellow_cards_5m",
|
|
"c_away_red_cards_5m"), "c_away_aggression_5m")
|
|
|
|
part = add_column(part, getFuzzyMethod(part, calculateFuzzyAggression, True, "c_home_yellow_cards_5btw",
|
|
"c_home_red_cards_5btw"), "c_home_aggression_5btw")
|
|
part = add_column(part, getFuzzyMethod(part, calculateFuzzyAggression, False, "c_away_yellow_cards_5btw",
|
|
"c_away_red_cards_5btw"), "c_away_aggression_5btw")
|
|
|
|
part = add_column(part, getFuzzyMethod(part, categorizeFuzzyShots, True, "c_home_shots_5m", "c_home_shots_5m"),
|
|
"c_home_shots_5m")
|
|
part = add_column(part, getFuzzyMethod(part, categorizeFuzzyShots, False, "c_away_shots_5m", "c_away_shots_5m"),
|
|
"c_away_shots_5m")
|
|
|
|
part = add_column(part, getFuzzyMethod(part, categorizeFuzzyShots, True, "c_home_shots_on_target_5btw",
|
|
"c_home_shots_on_target_5btw"), "c_home_shots_5btw")
|
|
part = add_column(part, getFuzzyMethod(part, categorizeFuzzyShots, False, "c_away_shots_on_target_5btw",
|
|
"c_away_shots_on_target_5btw"), "c_away_shots_5btw")
|
|
|
|
part = add_column(part, getFuzzyMethod(part, categorizeFuzzyDefence, True, "c_home_tackles_5m", "c_home_clearances_5m"),
|
|
"c_home_defence_5m")
|
|
part = add_column(part, getFuzzyMethod(part, categorizeFuzzyDefence, False, "c_away_tackles_5m", "c_away_clearances_5m"),
|
|
"c_away_defence_5m")
|
|
|
|
part = add_column(part,
|
|
getFuzzyMethod(part, categorizeFuzzyDefence, True, "c_home_tackles_5btw", "c_home_clearances_5btw"),
|
|
"c_home_defence_5btw")
|
|
part = add_column(part,
|
|
getFuzzyMethod(part, categorizeFuzzyDefence, False, "c_away_tackles_5btw", "c_away_clearances_5btw"),
|
|
"c_away_defence_5btw")
|
|
part = add_column(part, getFuzzyMethod(part, categorizeFuzzyPasses, True, "c_home_passes_5m", "c_home_possession_5m"),
|
|
"c_home_passing_5m")
|
|
part = add_column(part, getFuzzyMethod(part, categorizeFuzzyPasses, False, "c_away_passes_5m", "c_away_possession_5m"),
|
|
"c_away_passing_5m")
|
|
|
|
part = add_column(part, getFuzzyMethod(part, categorizeFuzzyPasses, True, "c_home_passes_5btw", "c_home_possession_5btw"),
|
|
"c_home_passing_5btw")
|
|
part = add_column(part,
|
|
getFuzzyMethod(part, categorizeFuzzyPasses, False, "c_away_passes_5btw", "c_away_possession_5btw"),
|
|
"c_away_passing_5btw")
|
|
|
|
|
|
save_to_csv("df_parts", part)
|
|
'''
|
|
df = generateFuzzyLogicData(df)
|
|
|
|
label_encoder = LabelEncoder()
|
|
df['season'] = label_encoder.fit_transform(df['season'])
|
|
df['c_home_result'] = get_result_list(df,True)
|
|
df['c_away_result'] = get_result_list(df,True)
|
|
temp = df[['home_team', 'away_team']].stack()
|
|
temp[:] = temp.factorize()[0]
|
|
df[['home_team', 'away_team']] = temp.unstack()
|
|
X = df.drop(['result_full', 'date', 'c_home_result', 'c_away_result'], axis=1)
|
|
y = df['c_home_result']
|
|
#y = label_encoder.fit_transform(df['c_home_result'])
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
|
|
model = RandomForestClassifier(n_estimators=500, random_state=42)
|
|
#model = GradientBoostingClassifier(learning_rate=0.1, n_estimators=100, random_state = 42)
|
|
|
|
model.fit(X_train, y_train)
|
|
|
|
y_pred = model.predict(X_test)
|
|
|
|
accuracy = accuracy_score(y_test, y_pred)
|
|
print(f'Dokładność modelu: {accuracy}')
|
|
|
|
accuracy = accuracy_score(y_test, y_pred)
|
|
print(f'Dokładność modelu: {accuracy}')
|
|
print(classification_report(y_test, y_pred))
|
|
|
|
#print(model.feature_importances_)
|
|
|
|
|
|
#print(categorize_fuzzy_passes(450,50))
|
|
|
|
#df.to_csv('df.csv', index=False)
|
|
|
|
|
|
# df = add_column(df, get_method(df, True, categorize_aggression, last5Matches), "c_home_aggression_5m")#categorize_diff
|
|
# df = add_column(df, get_method(df, False, categorize_aggression,last5Matches), "c_away_aggression_5m")
|
|
# df = add_column(df, get_method(df, True, categorize_points, seasonMatches), "c_home_form_season")
|
|
# df = add_column(df, get_method(df, False, categorize_points, seasonMatches), "c_away_form_season")
|
|
|
|
#df = add_column(df, get_method(df, True, categorize_diff, seasonMatches), "c_home_diff_season")#categorize_diff
|
|
# df = add_column(df, get_method(df, False, categorize_diff,seasonMatches), "c_away_diff_season")
|
|
|
|
# df = add_column(df, get_method(df, True, categorize_aggression, seasonMatches), "c_home_aggression_season")#categorize_diff
|
|
# df = add_column(df, get_method(df, False, categorize_aggression,seasonMatches), "c_away_aggression_season")
|
|
|
|
|
|
#df = add_column(df, get_method(df, True, categorize_points, last5MatchesBtwTeams), "c_home_form_5btw")
|
|
#df = add_column(df, get_method(df, False, categorize_points, last5MatchesBtwTeams), "c_away_form_5btw")
|
|
|
|
#df = add_column(df, get_method(df, True, categorize_diff, last5MatchesBtwTeams), "c_home_diff_5btw")#categorize_diff
|
|
#df = add_column(df, get_method(df, False, categorize_diff,last5MatchesBtwTeams), "c_away_diff_5btw") |