import pandas as pd from sklearn.model_selection import train_test_split energy_data = pd.read_csv('Steel_industry_data.csv') train_data, test_data = train_test_split(energy_data, test_size=7008, random_state=1) test_data, dev_data = train_test_split(test_data, test_size=3504, random_state=1) # stats print(energy_data.describe(include='all')) print('Training set size:') print(train_data.shape) print('Testing set size:') print(test_data.shape) print('Dev set size:') print(dev_data.shape) #print(train_data.describe(include='all')) #print(test_data.describe(include='all')) #print(dev_data.describe(include='all')) test_data.to_csv("steel_industry_data_test.csv", encoding="utf-8", index=False) dev_data.to_csv("steel_industry_data_dev.csv", encoding="utf-8", index=False) train_data.to_csv("steel_industry_data_train.csv", encoding="utf-8", index=False)