improved model, fixed typos
This commit is contained in:
parent
882c09cdc3
commit
a348fac129
@ -2,12 +2,12 @@ import pandas as pd
|
|||||||
from fuzzy import *
|
from fuzzy import *
|
||||||
|
|
||||||
|
|
||||||
def zapisz_do_csv(nazwa_pliku, dataframe):
|
def save_to_csv(filename, dataframe):
|
||||||
dataframe.to_csv(nazwa_pliku, mode='a', index=False, header=not pd.DataFrame().append(dataframe).empty)
|
dataframe.to_csv(filename, mode='a', index=False, header=not pd.DataFrame().append(dataframe).empty)
|
||||||
|
|
||||||
def podziel_na_partie(dataframe, rozmiar_partii):
|
def split_to_parts(dataframe, part_size):
|
||||||
for i in range(0, len(dataframe), rozmiar_partii):
|
for i in range(0, len(dataframe), part_size):
|
||||||
yield dataframe.iloc[i:i + rozmiar_partii]
|
yield dataframe.iloc[i:i + part_size]
|
||||||
|
|
||||||
def przetwarzaj_co_50_rekordow(plik_wejsciowy, plik_wyjsciowy):
|
def przetwarzaj_co_50_rekordow(plik_wejsciowy, plik_wyjsciowy):
|
||||||
dataframe_wejsciowe = pd.read_csv(plik_wejsciowy)
|
dataframe_wejsciowe = pd.read_csv(plik_wejsciowy)
|
||||||
@ -40,16 +40,16 @@ def generateFuzzyLogicData(dataframe):
|
|||||||
|
|
||||||
|
|
||||||
def last5Matches(season, teamA, data, df):
|
def last5Matches(season, teamA, data, df):
|
||||||
# Wybierz rekordy dla danej pary drużyn i sezonu
|
|
||||||
subset = df[((df['season'] == season) & ((df['home_team'] == teamA) | (df['away_team'] == teamA)))]
|
subset = df[((df['season'] == season) & ((df['home_team'] == teamA) | (df['away_team'] == teamA)))]
|
||||||
|
|
||||||
# Filtruj dane, aby zawierały te przed daną datą
|
|
||||||
before_given_date = subset[pd.to_datetime(subset['date']) < pd.to_datetime(data)]
|
before_given_date = subset[pd.to_datetime(subset['date']) < pd.to_datetime(data)]
|
||||||
|
|
||||||
# Posortuj wg daty w odwrotnej kolejności
|
|
||||||
before_given_date = before_given_date.sort_values(by='date', ascending=False)
|
before_given_date = before_given_date.sort_values(by='date', ascending=False)
|
||||||
|
|
||||||
# Wybierz 5 ostatnich przed daną datą
|
|
||||||
last_before_date = before_given_date.head(5)
|
last_before_date = before_given_date.head(5)
|
||||||
|
|
||||||
return last_before_date, "_5m"
|
return last_before_date, "_5m"
|
||||||
|
26
main.py
26
main.py
@ -8,8 +8,7 @@ from sklearn.model_selection import train_test_split
|
|||||||
from sklearn.metrics import accuracy_score
|
from sklearn.metrics import accuracy_score
|
||||||
from sklearn.preprocessing import LabelEncoder
|
from sklearn.preprocessing import LabelEncoder
|
||||||
from sklearn.metrics import classification_report
|
from sklearn.metrics import classification_report
|
||||||
|
from sklearn.ensemble import GradientBoostingClassifier
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Ostatnie 5 spotkań
|
# Ostatnie 5 spotkań
|
||||||
@ -24,13 +23,12 @@ from sklearn.metrics import classification_report
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
|
||||||
df = pd.read_csv('df_full_premierleague.csv')
|
df = pd.read_csv('df_parts.csv')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
df = pd.read_csv('df_full_premierleague.csv')
|
df = pd.read_csv('df_full_premierleague.csv')
|
||||||
result = last5Matches('10/11', 'Stoke City', '2010-10-02', df)[0]
|
result = last5Matches('10/11', 'Stoke City', '2010-10-02', df)[0]
|
||||||
#print(result.to_markdown())
|
#print(result.to_markdown())
|
||||||
@ -42,7 +40,6 @@ if __name__ == "__main__":
|
|||||||
print(calculatePoints(result,'Blackburn Rovers'))
|
print(calculatePoints(result,'Blackburn Rovers'))
|
||||||
print(calculateGoalDifference(result, 'Blackburn Rovers'))
|
print(calculateGoalDifference(result, 'Blackburn Rovers'))
|
||||||
|
|
||||||
'''
|
|
||||||
|
|
||||||
# df = generateTrainingData(df)
|
# df = generateTrainingData(df)
|
||||||
# df = add_column(df, categorize_passes, "c_away_passes", "away_passes")
|
# df = add_column(df, categorize_passes, "c_away_passes", "away_passes")
|
||||||
@ -158,8 +155,8 @@ if __name__ == "__main__":
|
|||||||
df.to_csv('df.csv', index=False)
|
df.to_csv('df.csv', index=False)
|
||||||
#TU sie zapisuje zbior
|
#TU sie zapisuje zbior
|
||||||
|
|
||||||
rozmiar_partii = 50
|
part_size = 50
|
||||||
for part in podziel_na_partie(df, rozmiar_partii):
|
for part in split_to_parts(df, part_size):
|
||||||
|
|
||||||
|
|
||||||
part = add_column(part,
|
part = add_column(part,
|
||||||
@ -206,8 +203,8 @@ if __name__ == "__main__":
|
|||||||
"c_away_passing_5btw")
|
"c_away_passing_5btw")
|
||||||
|
|
||||||
|
|
||||||
zapisz_do_csv("df_parts", part)
|
save_to_csv("df_parts", part)
|
||||||
|
'''
|
||||||
df = generateFuzzyLogicData(df)
|
df = generateFuzzyLogicData(df)
|
||||||
|
|
||||||
label_encoder = LabelEncoder()
|
label_encoder = LabelEncoder()
|
||||||
@ -219,9 +216,12 @@ if __name__ == "__main__":
|
|||||||
df[['home_team', 'away_team']] = temp.unstack()
|
df[['home_team', 'away_team']] = temp.unstack()
|
||||||
X = df.drop(['result_full', 'date', 'c_home_result', 'c_away_result'], axis=1)
|
X = df.drop(['result_full', 'date', 'c_home_result', 'c_away_result'], axis=1)
|
||||||
y = df['c_home_result']
|
y = df['c_home_result']
|
||||||
|
#y = label_encoder.fit_transform(df['c_home_result'])
|
||||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||||
|
|
||||||
model = RandomForestClassifier(n_estimators=100, random_state=42)
|
model = RandomForestClassifier(n_estimators=500, random_state=42)
|
||||||
|
#model = GradientBoostingClassifier(learning_rate=0.1, n_estimators=100, random_state = 42)
|
||||||
|
|
||||||
model.fit(X_train, y_train)
|
model.fit(X_train, y_train)
|
||||||
|
|
||||||
y_pred = model.predict(X_test)
|
y_pred = model.predict(X_test)
|
||||||
@ -233,6 +233,8 @@ if __name__ == "__main__":
|
|||||||
print(f'Dokładność modelu: {accuracy}')
|
print(f'Dokładność modelu: {accuracy}')
|
||||||
print(classification_report(y_test, y_pred))
|
print(classification_report(y_test, y_pred))
|
||||||
|
|
||||||
|
#print(model.feature_importances_)
|
||||||
|
|
||||||
|
|
||||||
#print(categorize_fuzzy_passes(450,50))
|
#print(categorize_fuzzy_passes(450,50))
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user