put generate subsets at the end of init script

2022-03-20 21:34:03 +01:00 · 2022-03-20 21:34:03 +01:00 · ee8737fc15
commit ee8737fc15
parent 16081a3564
1 changed files with 12 additions and 12 deletions
--- a/init.py
+++ b/init.py
@ -20,18 +20,6 @@ if not file_exists:
 atp_data = pd.read_csv('df_atp.csv')
 print(atp_data)
 # Podział na podzbiory: trenujący, testowy, walidujący
 atp_train, atp_test = train_test_split(atp_data, test_size=0.4, random_state=1)
 atp_dev, atp_test = train_test_split(atp_test, test_size=0.5, random_state=1)
 # Wielkość zbioru i podzbiorów
 print("Elements of total set: " + str(len(atp_data)))
 print("Elements of test set: " + str(len(atp_test)))
 print("Elements of dev set: " + str(len(atp_dev)))
 print("Elements of train set: " + str(len(atp_train)))
 # Średnia ilość gemów w pierwszym secie zwycięzców meczu
 print(atp_data[["Winner", "W1"]].mean())
@ -77,3 +65,15 @@ print(atp_data["Round"])
 atp_data.loc[atp_data["Date"] == '########', "Date"] = ''
 print(atp_data["Date"])
 # Podział na podzbiory: trenujący, testowy, walidujący w proporcjach 6:2:2
 atp_train, atp_test = train_test_split(atp_data, test_size=0.4, random_state=1)
 atp_dev, atp_test = train_test_split(atp_test, test_size=0.5, random_state=1)
 # Wielkość zbioru i podzbiorów
 print("Elements of total set: " + str(len(atp_data)))
 print("Elements of test set: " + str(len(atp_test)))
 print("Elements of dev set: " + str(len(atp_dev)))
 print("Elements of train set: " + str(len(atp_train)))