import pandas import os from sklearn.model_selection import train_test_split CUTOFF = int(os.environ['CUTOFF']) salaries = pandas.read_csv('./ium_458023/ds_salaries.csv',engine='python',encoding='ISO-8859-1',sep=',') salaries = salaries.dropna() salaries = salaries.sample(100) X,Y = salaries,salaries # SPLIT BETWEEN DEV, TRAINS, AND TEST salaries_train, salaries_temp, salaries_train, salaries_temp = train_test_split(X, Y, test_size=0.2, random_state=1) salaries_dev, salaries_test, salaries_dev, salaries_test = train_test_split(salaries_temp, salaries_temp, test_size=0.2) salaries_train.to_csv('salaries_train.csv', index=False) salaries_dev.to_csv('salaries_dev.csv', index=False) salaries_test.to_csv('salaries_test.csv', index=False)