26 lines
864 B
Python
26 lines
864 B
Python
|
import pandas as pd
|
||
|
from sklearn.model_selection import train_test_split
|
||
|
|
||
|
energy_data = pd.read_csv('Steel_industry_data.csv')
|
||
|
|
||
|
train_data, test_data = train_test_split(energy_data, test_size=7008, random_state=1)
|
||
|
test_data, dev_data = train_test_split(test_data, test_size=3504, random_state=1)
|
||
|
|
||
|
# stats
|
||
|
print(energy_data.describe(include='all'))
|
||
|
|
||
|
print('Training set size:')
|
||
|
print(train_data.shape)
|
||
|
print('Testing set size:')
|
||
|
print(test_data.shape)
|
||
|
print('Dev set size:')
|
||
|
print(dev_data.shape)
|
||
|
|
||
|
#print(train_data.describe(include='all'))
|
||
|
#print(test_data.describe(include='all'))
|
||
|
#print(dev_data.describe(include='all'))
|
||
|
|
||
|
test_data.to_csv("steel_industry_data_test.csv", encoding="utf-8", index=False)
|
||
|
dev_data.to_csv("steel_industry_data_dev.csv", encoding="utf-8", index=False)
|
||
|
train_data.to_csv("steel_industry_data_train.csv", encoding="utf-8", index=False)
|