From c0ae2dd3293664a1addcf9b11776bc394e3b667d Mon Sep 17 00:00:00 2001 From: "bartosz.maslanka.consultant" Date: Wed, 28 Jun 2023 22:39:38 +0200 Subject: [PATCH] add script --- createDataset.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 createDataset.py diff --git a/createDataset.py b/createDataset.py new file mode 100644 index 0000000..2cbe09e --- /dev/null +++ b/createDataset.py @@ -0,0 +1,25 @@ +import pandas as pd +from sklearn.preprocessing import MinMaxScaler +from sklearn.model_selection import train_test_split +gender_classification = pd.read_csv('gender_classification_v7.csv') + +gender_classification_train_final, gender_classification_test = train_test_split(gender_classification, test_size=0.2, random_state=1) +gender_classification_test_final, gender_classification_val_final = train_test_split(gender_classification_test, test_size=0.5, random_state=1) + +numeric_cols_train = gender_classification_train_final.select_dtypes(include='number').columns +numeric_cols_test = gender_classification_test_final.select_dtypes(include='number').columns +numeric_cols_val = gender_classification_val_final.select_dtypes(include='number').columns + +scaler = MinMaxScaler() + +gender_classification_train_final[numeric_cols_train] = scaler.fit_transform(gender_classification_train_final[numeric_cols_train]) +gender_classification_test_final[numeric_cols_test] = scaler.fit_transform(gender_classification_test_final[numeric_cols_test]) +gender_classification_val_final[numeric_cols_val] = scaler.fit_transform(gender_classification_val_final[numeric_cols_val]) + +gender_classification_train_final = gender_classification_train_final.dropna() +gender_classification_test_final = gender_classification_test_final.dropna() +gender_classification_val_final = gender_classification_val_final.dropna() + +gender_classification_train_final.to_csv('gender_classification_train.csv', index=False) +gender_classification_test_final.to_csv('gender_classification_test.csv', index=False) +gender_classification_val_final.to_csv('gender_classification_val.csv', index=False) \ No newline at end of file