ium_z487179/Zadanie_LAB02.ipynb at fd0b3dbf6cdcd2c7799d277b87e2ac1766f89b4c

!kaggle datasets download -d rishikeshkonapure/home-loan-approval

Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /Users/wojciechbatruszewicz/.kaggle/kaggle.json'
Downloading home-loan-approval.zip to /Users/wojciechbatruszewicz/InformatykaStudia/SEMESTR8/IUM/ZADANIA
  0%|                                               | 0.00/12.6k [00:00<?, ?B/s]
100%|██████████████████████████████████████| 12.6k/12.6k [00:00<00:00, 18.6MB/s]

!unzip -o home-loan-approval.zip

Archive:  home-loan-approval.zip
  inflating: loan_sanction_test.csv  
  inflating: loan_sanction_train.csv

!wc -l loan_sanction_test.csv

     367 loan_sanction_test.csv

!wc -l loan_sanction_train.csv

     614 loan_sanction_train.csv

import pandas as pd
home_loan_train = pd.read_csv('loan_sanction_train.csv')
home_loan_test = pd.read_csv('loan_sanction_test.csv')
home_loan_train.head

<bound method NDFrame.head of       Loan_ID  Gender Married Dependents     Education Self_Employed  \
0    LP001002    Male      No          0      Graduate            No   
1    LP001003    Male     Yes          1      Graduate            No   
2    LP001005    Male     Yes          0      Graduate           Yes   
3    LP001006    Male     Yes          0  Not Graduate            No   
4    LP001008    Male      No          0      Graduate            No   
..        ...     ...     ...        ...           ...           ...   
609  LP002978  Female      No          0      Graduate            No   
610  LP002979    Male     Yes         3+      Graduate            No   
611  LP002983    Male     Yes          1      Graduate            No   
612  LP002984    Male     Yes          2      Graduate            No   
613  LP002990  Female      No          0      Graduate           Yes   

     ApplicantIncome  CoapplicantIncome  LoanAmount  Loan_Amount_Term  \
0               5849                0.0         NaN             360.0   
1               4583             1508.0       128.0             360.0   
2               3000                0.0        66.0             360.0   
3               2583             2358.0       120.0             360.0   
4               6000                0.0       141.0             360.0   
..               ...                ...         ...               ...   
609             2900                0.0        71.0             360.0   
610             4106                0.0        40.0             180.0   
611             8072              240.0       253.0             360.0   
612             7583                0.0       187.0             360.0   
613             4583                0.0       133.0             360.0   

     Credit_History Property_Area Loan_Status  
0               1.0         Urban           Y  
1               1.0         Rural           N  
2               1.0         Urban           Y  
3               1.0         Urban           Y  
4               1.0         Urban           Y  
..              ...           ...         ...  
609             1.0         Rural           Y  
610             1.0         Rural           Y  
611             1.0         Urban           Y  
612             1.0         Urban           Y  
613             0.0     Semiurban           N  

[614 rows x 13 columns]>

home_loan_train.describe(include = "all")

	Loan_ID	Gender	Married	Dependents	Education	Self_Employed	ApplicantIncome	CoapplicantIncome	LoanAmount	Loan_Amount_Term	Credit_History	Property_Area	Loan_Status
count	614	601	611	599	614	582	614.000000	614.000000	592.000000	600.00000	564.000000	614	614
unique	614	2	2	4	2	2	NaN	NaN	NaN	NaN	NaN	3	2
top	LP001002	Male	Yes	0	Graduate	No	NaN	NaN	NaN	NaN	NaN	Semiurban	Y
freq	1	489	398	345	480	500	NaN	NaN	NaN	NaN	NaN	233	422
mean	NaN	NaN	NaN	NaN	NaN	NaN	5403.459283	1621.245798	146.412162	342.00000	0.842199	NaN	NaN
std	NaN	NaN	NaN	NaN	NaN	NaN	6109.041673	2926.248369	85.587325	65.12041	0.364878	NaN	NaN
min	NaN	NaN	NaN	NaN	NaN	NaN	150.000000	0.000000	9.000000	12.00000	0.000000	NaN	NaN
25%	NaN	NaN	NaN	NaN	NaN	NaN	2877.500000	0.000000	100.000000	360.00000	1.000000	NaN	NaN
50%	NaN	NaN	NaN	NaN	NaN	NaN	3812.500000	1188.500000	128.000000	360.00000	1.000000	NaN	NaN
75%	NaN	NaN	NaN	NaN	NaN	NaN	5795.000000	2297.250000	168.000000	360.00000	1.000000	NaN	NaN
max	NaN	NaN	NaN	NaN	NaN	NaN	81000.000000	41667.000000	700.000000	480.00000	1.000000	NaN	NaN

home_loan_train["Loan_Status"].value_counts()

Y    422
N    192
Name: Loan_Status, dtype: int64

home_loan_train["Loan_Status"].value_counts().plot(kind="bar")

<Axes: >

home_loan_train[["Loan_Status", "ApplicantIncome"]].groupby("Loan_Status").mean().plot(kind="bar")

<Axes: xlabel='Loan_Status'>

import seaborn as sns
sns.set_theme()
sns.relplot(data=home_loan_train, x="LoanAmount", y="ApplicantIncome", hue="Loan_Status")

<seaborn.axisgrid.FacetGrid at 0x13920f1c0>

sns.pairplot(data=home_loan_train.drop(columns=["Loan_ID"]), hue="Loan_Status")

<seaborn.axisgrid.PairGrid at 0x11f6bdbd0>

from sklearn.model_selection import train_test_split

427 KiB Raw Blame History

427 KiB

Raw Blame History