!kaggle datasets download -d rishikeshkonapure/home-loan-approval
!unzip -o home-loan-approval.zip
import pandas as pd
home_loan_train = pd.read_csv('loan_sanction_train.csv')
home_loan_test = pd.read_csv('loan_sanction_test.csv')
home_loan_train.describe(include = "all")
Y    422
N    192
Name: Loan_Status, dtype: int64
home_loan_train[["Loan_Status", "ApplicantIncome"]].groupby("Loan_Status").mean().plot(kind="bar")
import seaborn as sns
sns.relplot(data=home_loan_train, x="LoanAmount", y="ApplicantIncome", hue="Loan_Status")
sns.pairplot(data=home_loan_train.drop(columns=["Loan_ID"]), hue="Loan_Status")
from sklearn.model_selection import train_test_split
home_loan_val_final, home_loan_test_final = train_test_split(home_loan_test, test_size=0.5, random_state=1)
home_loan_train_final = home_loan_train
from sklearn.preprocessing import MinMaxScaler
numeric_cols_train = home_loan_train_final.select_dtypes(include='number').columns
numeric_cols_test = home_loan_test_final.select_dtypes(include='number').columns
numeric_cols_val = home_loan_val_final.select_dtypes(include='number').columns
scaler = MinMaxScaler()
home_loan_train_final[numeric_cols_train] = scaler.fit_transform(home_loan_train_final[numeric_cols_train])
home_loan_test_final[numeric_cols_test] = scaler.fit_transform(home_loan_test_final[numeric_cols_test])
home_loan_val_final[numeric_cols_val] = scaler.fit_transform(home_loan_val_final[numeric_cols_val])

home_loan_train_final = home_loan_train_final.dropna()
home_loan_test_final = home_loan_test_final.dropna()
home_loan_val_final = home_loan_val_final.dropna()

