ium_470607/lab5/create/create_dataset.py
2021-05-14 21:52:14 +02:00

37 lines
1.0 KiB
Python

import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
df = pd.read_csv('smart_grid_stability_augmented.csv')
scaler = preprocessing.StandardScaler().fit(df.iloc[:, 0:-1])
df_norm_array = scaler.transform(df.iloc[:, 0:-1])
df_norm = pd.DataFrame(data=df_norm_array,
columns=df.columns[:-1])
df_norm['stabf'] = df['stabf']
df_norm_data = df_norm.copy()
df_norm_data = df_norm_data.drop('stab', axis=1)
df_norm_labels = df_norm_data.pop('stabf')
X_train, X_testAndValid, Y_train, Y_testAndValid = train_test_split(
df_norm_data,
df_norm_labels,
test_size=0.2,
random_state=42)
X_test, X_valid, Y_test, Y_valid = train_test_split(
X_testAndValid,
Y_testAndValid,
test_size=0.5,
random_state=42)
train = pd.concat([X_train, Y_train], axis=1)
test = pd.concat([X_test, Y_test], axis=1)
valid = pd.concat([X_valid, Y_valid], axis=1)
train.to_csv('train.cs', index_col = False)
test.to_csv('test.csv', index_col = False)
valid.to_csv('valid.csv', index_col = False)