diff --git a/create-dataset.py b/create-dataset.py index fac2bef..1303fcf 100644 --- a/create-dataset.py +++ b/create-dataset.py @@ -1,6 +1,6 @@ import pandas import os -#from sklearn.model_selection import train_test_split +from sklearn.model_selection import train_test_split CUTOFF = int(os.environ['CUTOFF']) @@ -19,8 +19,8 @@ video_games = video_games.sample(CUTOFF) X, Y = video_games, video_games # SPLIT BETWEEN DEV, TRAINS, AND TEST -#X_train, X_temp, Y_train, Y_temp = train_test_split(X, Y, test_size=0.3, random_state=1) -#X_dev, X_test, Y_dev, Y_test = train_test_split(X_temp, Y_temp, test_size=0.3, random_state=1) +X_train, X_temp, Y_train, Y_temp = train_test_split(X, Y, test_size=0.3, random_state=1) +X_dev, X_test, Y_dev, Y_test = train_test_split(X_temp, Y_temp, test_size=0.3, random_state=1) #X_train.to_csv('X_train.csv', index=False) #X_dev.to_csv('X_dev.csv', index=False)