import pandas as pd from sklearn import preprocessing from sklearn.model_selection import train_test_split df = pd.read_csv('smart_grid_stability_augmented.csv') scaler = preprocessing.StandardScaler().fit(df.iloc[:, 0:-1]) df_norm_array = scaler.transform(df.iloc[:, 0:-1]) df_norm = pd.DataFrame(data=df_norm_array, columns=df.columns[:-1]) df_norm['stabf'] = df['stabf'] df_norm_data = df_norm.copy() df_norm_data = df_norm_data.drop('stab', axis=1) df_norm_labels = df_norm_data.pop('stabf') X_train, X_testAndValid, Y_train, Y_testAndValid = train_test_split( df_norm_data, df_norm_labels, test_size=0.2, random_state=42) X_test, X_valid, Y_test, Y_valid = train_test_split( X_testAndValid, Y_testAndValid, test_size=0.5, random_state=42) train = pd.concat([X_train, Y_train], axis=1) test = pd.concat([X_test, Y_test], axis=1) valid = pd.concat([X_valid, Y_valid], axis=1) test.to_csv('test.csv', index=False) valid.to_csv('valid.csv', index=False) train.to_csv('train.csv', index=False)