evaluate result not nan
This commit is contained in:
parent
8780f35c2f
commit
9b71dc20d6
37
init.py
37
init.py
@ -5,14 +5,17 @@ import numpy as np
|
|||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
import matplotlib
|
import matplotlib
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
import math
|
||||||
|
|
||||||
# Inicjalizacja danych
|
# Inicjalizacja danych
|
||||||
file_exists = exists('./df_atp.csv')
|
file_exists = exists("./df_atp.csv")
|
||||||
if not file_exists:
|
if not file_exists:
|
||||||
subprocess.run(["kaggle", "datasets", "download", "-d", "hakeem/atp-and-wta-tennis-data"])
|
subprocess.run(
|
||||||
|
["kaggle", "datasets", "download", "-d", "hakeem/atp-and-wta-tennis-data"]
|
||||||
|
)
|
||||||
subprocess.run(["unzip", "-o", "atp-and-wta-tennis-data.zip"])
|
subprocess.run(["unzip", "-o", "atp-and-wta-tennis-data.zip"])
|
||||||
atp_data = pd.read_csv('df_atp.csv')
|
|
||||||
print(atp_data)
|
atp_data = pd.read_csv("df_atp.csv")
|
||||||
|
|
||||||
# Średnia ilość gemów w pierwszym secie zwycięzców meczu
|
# Średnia ilość gemów w pierwszym secie zwycięzców meczu
|
||||||
print(atp_data[["Winner", "W1"]].mean())
|
print(atp_data[["Winner", "W1"]].mean())
|
||||||
@ -30,25 +33,25 @@ print(atp_data[["Winner", "W1"]].std())
|
|||||||
print(atp_data[["Winner", "W1"]].median())
|
print(atp_data[["Winner", "W1"]].median())
|
||||||
|
|
||||||
# Zmiana nazwy nienazwanej kolumny
|
# Zmiana nazwy nienazwanej kolumny
|
||||||
atp_data.rename(columns={'Unnamed: 0':'ID'}, inplace=True)
|
atp_data.rename(columns={"Unnamed: 0": "ID"}, inplace=True)
|
||||||
|
|
||||||
# Jak często kto był zwycięzcą
|
# Jak często kto był zwycięzcą
|
||||||
print(atp_data.groupby("Winner")["ID"].nunique())
|
print(atp_data.groupby("Winner")["ID"].nunique())
|
||||||
|
|
||||||
# Normalizacja rund -1: Finał, -2: Półfinał, -3: Ćwiartka, -4: Każdy z każdym
|
# Normalizacja rund -1: Finał, -2: Półfinał, -3: Ćwiartka, -4: Każdy z każdym
|
||||||
# 1: pierwsza runda, 2: druga runda, 3: trzecia runda, 4: czwarta runda
|
# 1: pierwsza runda, 2: druga runda, 3: trzecia runda, 4: czwarta runda
|
||||||
atp_data.loc[atp_data["Round"] == 'The Final', "Round"] = -1
|
atp_data.loc[atp_data["Round"] == "The Final", "Round"] = -1
|
||||||
atp_data.loc[atp_data["Round"] == 'Semifinals', "Round"] = -2
|
atp_data.loc[atp_data["Round"] == "Semifinals", "Round"] = -2
|
||||||
atp_data.loc[atp_data["Round"] == 'Quarterfinals', "Round"] = -3
|
atp_data.loc[atp_data["Round"] == "Quarterfinals", "Round"] = -3
|
||||||
atp_data.loc[atp_data["Round"] == 'Round Robin', "Round"] = -4
|
atp_data.loc[atp_data["Round"] == "Round Robin", "Round"] = -4
|
||||||
atp_data.loc[atp_data["Round"] == '1st Round', "Round"] = 1
|
atp_data.loc[atp_data["Round"] == "1st Round", "Round"] = 1
|
||||||
atp_data.loc[atp_data["Round"] == '2nd Round', "Round"] = 2
|
atp_data.loc[atp_data["Round"] == "2nd Round", "Round"] = 2
|
||||||
atp_data.loc[atp_data["Round"] == '3rd Round', "Round"] = 3
|
atp_data.loc[atp_data["Round"] == "3rd Round", "Round"] = 3
|
||||||
atp_data.loc[atp_data["Round"] == '4th Round', "Round"] = 4
|
atp_data.loc[atp_data["Round"] == "4th Round", "Round"] = 4
|
||||||
print(atp_data["Round"])
|
print(atp_data["Round"])
|
||||||
|
|
||||||
# Czyszczenie: W polu z datą zamienimy ######## na pustego stringa
|
# Czyszczenie: W polu z datą zamienimy ######## na pustego stringa
|
||||||
atp_data.loc[atp_data["Date"] == '########', "Date"] = ''
|
atp_data.loc[atp_data["Date"] == "########", "Date"] = ""
|
||||||
print(atp_data["Date"])
|
print(atp_data["Date"])
|
||||||
|
|
||||||
# Podział na podzbiory: trenujący, testowy, walidujący w proporcjach 6:2:2
|
# Podział na podzbiory: trenujący, testowy, walidujący w proporcjach 6:2:2
|
||||||
@ -62,6 +65,6 @@ print("\nElements of dev set: " + str(len(atp_dev)))
|
|||||||
print("\nElements of train set: " + str(len(atp_train)))
|
print("\nElements of train set: " + str(len(atp_train)))
|
||||||
|
|
||||||
# Stworzenie plików z danymi trenującymi i testowymi
|
# Stworzenie plików z danymi trenującymi i testowymi
|
||||||
atp_test.to_csv('atp_test.csv', encoding="utf-8", index=False)
|
atp_test.to_csv("atp_test.csv", encoding="utf-8", index=False)
|
||||||
atp_dev.to_csv('atp_dev.csv', encoding="utf-8", index=False)
|
atp_dev.to_csv("atp_dev.csv", encoding="utf-8", index=False)
|
||||||
atp_train.to_csv('atp_train.csv', encoding="utf-8", index=False)
|
atp_train.to_csv("atp_train.csv", encoding="utf-8", index=False)
|
||||||
|
@ -11,13 +11,15 @@ default_epochs = 4
|
|||||||
|
|
||||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
|
||||||
|
|
||||||
class AtpDataset(Dataset):
|
class AtpDataset(Dataset):
|
||||||
def __init__(self, file_name):
|
def __init__(self, file_name):
|
||||||
df = pd.read_csv(file_name)
|
df = pd.read_csv(file_name, usecols=["AvgL", "AvgW"])
|
||||||
|
df = df.dropna()
|
||||||
|
|
||||||
# Loser avg and Winner avg
|
# Loser avg and Winner avg
|
||||||
x = df.iloc[:, 4].values
|
x = df.iloc[:, 1].values
|
||||||
y = df.iloc[:, 3].values
|
y = df.iloc[:, 0].values
|
||||||
|
|
||||||
self.x_train = torch.from_numpy(x)
|
self.x_train = torch.from_numpy(x)
|
||||||
self.y_train = torch.from_numpy(y)
|
self.y_train = torch.from_numpy(y)
|
||||||
@ -76,9 +78,9 @@ def test(dataloader, model, loss_fn):
|
|||||||
|
|
||||||
|
|
||||||
def setup_args():
|
def setup_args():
|
||||||
args_parser = argparse.ArgumentParser(prefix_chars='-')
|
args_parser = argparse.ArgumentParser(prefix_chars="-")
|
||||||
args_parser.add_argument('-b', '--batchSize', type=int, default=default_batch_size)
|
args_parser.add_argument("-b", "--batchSize", type=int, default=default_batch_size)
|
||||||
args_parser.add_argument('-e', '--epochs', type=int, default=default_epochs)
|
args_parser.add_argument("-e", "--epochs", type=int, default=default_epochs)
|
||||||
return args_parser.parse_args()
|
return args_parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
@ -87,8 +89,8 @@ print(f"Using {device} device")
|
|||||||
args = setup_args()
|
args = setup_args()
|
||||||
batch_size = args.batchSize
|
batch_size = args.batchSize
|
||||||
|
|
||||||
plant_test = AtpDataset('atp_test.csv')
|
plant_test = AtpDataset("atp_test.csv")
|
||||||
plant_train = AtpDataset('atp_train.csv')
|
plant_train = AtpDataset("atp_train.csv")
|
||||||
|
|
||||||
train_dataloader = DataLoader(plant_train, batch_size=batch_size)
|
train_dataloader = DataLoader(plant_train, batch_size=batch_size)
|
||||||
test_dataloader = DataLoader(plant_test, batch_size=batch_size)
|
test_dataloader = DataLoader(plant_test, batch_size=batch_size)
|
||||||
@ -111,5 +113,5 @@ for t in range(epochs):
|
|||||||
test(test_dataloader, model, loss_fn)
|
test(test_dataloader, model, loss_fn)
|
||||||
print("Finish!")
|
print("Finish!")
|
||||||
|
|
||||||
torch.save(model.state_dict(), './model.zip')
|
torch.save(model.state_dict(), "./model.zip")
|
||||||
print("Model saved in ./model.zip file.")
|
print("Model saved in ./model.zip file.")
|
Loading…
Reference in New Issue
Block a user