2021-05-02 22:01:32 +02:00
|
|
|
import pandas as pd
|
|
|
|
from sklearn import preprocessing
|
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
|
|
|
|
df = pd.read_csv('smart_grid_stability_augmented.csv')
|
|
|
|
|
|
|
|
scaler = preprocessing.StandardScaler().fit(df.iloc[:, 0:-1])
|
|
|
|
df_norm_array = scaler.transform(df.iloc[:, 0:-1])
|
|
|
|
df_norm = pd.DataFrame(data=df_norm_array,
|
|
|
|
columns=df.columns[:-1])
|
|
|
|
df_norm['stabf'] = df['stabf']
|
|
|
|
|
|
|
|
df_norm_data = df_norm.copy()
|
|
|
|
df_norm_data = df_norm_data.drop('stab', axis=1)
|
|
|
|
df_norm_labels = df_norm_data.pop('stabf')
|
|
|
|
|
|
|
|
X_train, X_testAndValid, Y_train, Y_testAndValid = train_test_split(
|
|
|
|
df_norm_data,
|
|
|
|
df_norm_labels,
|
|
|
|
test_size=0.2,
|
|
|
|
random_state=42)
|
|
|
|
|
|
|
|
X_test, X_valid, Y_test, Y_valid = train_test_split(
|
|
|
|
X_testAndValid,
|
|
|
|
Y_testAndValid,
|
|
|
|
test_size=0.5,
|
|
|
|
random_state=42)
|
|
|
|
|
|
|
|
train = pd.concat([X_train, Y_train], axis=1)
|
|
|
|
test = pd.concat([X_test, Y_test], axis=1)
|
|
|
|
valid = pd.concat([X_valid, Y_valid], axis=1)
|
|
|
|
|
2021-05-15 15:09:56 +02:00
|
|
|
test.to_csv('test.csv', index=False)
|
|
|
|
valid.to_csv('valid.csv', index=False)
|
2021-05-15 16:55:44 +02:00
|
|
|
train.to_csv('train-train.csv', index=False)
|
2021-05-14 21:52:14 +02:00
|
|
|
|