diff --git a/createDataset/createDataset.py b/createDataset/createDataset.py index 3963850..4c53f65 100644 --- a/createDataset/createDataset.py +++ b/createDataset/createDataset.py @@ -4,8 +4,8 @@ from sklearn.model_selection import train_test_split home_loan_train = pd.read_csv('loan_sanction_train.csv') home_loan_test = pd.read_csv('loan_sanction_test.csv') -home_loan_val_final, home_loan_test_final = train_test_split(home_loan_test, test_size=0.5, random_state=1) -home_loan_train_final = home_loan_train +home_loan_train_final, home_loan_test = train_test_split(home_loan_train, test_size=0.2, random_state=1) +home_loan_test_final, home_loan_val_final = train_test_split(home_loan_test, test_size=0.5, random_state=1) numeric_cols_train = home_loan_train_final.select_dtypes(include='number').columns numeric_cols_test = home_loan_test_final.select_dtypes(include='number').columns