ium_z487179/Zadanie_LAB02.ipynb
2023-03-25 12:44:06 +01:00

427 KiB

!kaggle datasets download -d rishikeshkonapure/home-loan-approval
Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /Users/wojciechbatruszewicz/.kaggle/kaggle.json'
Downloading home-loan-approval.zip to /Users/wojciechbatruszewicz/InformatykaStudia/SEMESTR8/IUM/ZADANIA
  0%|                                               | 0.00/12.6k [00:00<?, ?B/s]
100%|██████████████████████████████████████| 12.6k/12.6k [00:00<00:00, 18.6MB/s]
!unzip -o home-loan-approval.zip
Archive:  home-loan-approval.zip
  inflating: loan_sanction_test.csv  
  inflating: loan_sanction_train.csv  
!wc -l loan_sanction_test.csv
     367 loan_sanction_test.csv
!wc -l loan_sanction_train.csv
     614 loan_sanction_train.csv
import pandas as pd
home_loan_train = pd.read_csv('loan_sanction_train.csv')
home_loan_test = pd.read_csv('loan_sanction_test.csv')
home_loan_train.head
<bound method NDFrame.head of       Loan_ID  Gender Married Dependents     Education Self_Employed  \
0    LP001002    Male      No          0      Graduate            No   
1    LP001003    Male     Yes          1      Graduate            No   
2    LP001005    Male     Yes          0      Graduate           Yes   
3    LP001006    Male     Yes          0  Not Graduate            No   
4    LP001008    Male      No          0      Graduate            No   
..        ...     ...     ...        ...           ...           ...   
609  LP002978  Female      No          0      Graduate            No   
610  LP002979    Male     Yes         3+      Graduate            No   
611  LP002983    Male     Yes          1      Graduate            No   
612  LP002984    Male     Yes          2      Graduate            No   
613  LP002990  Female      No          0      Graduate           Yes   

     ApplicantIncome  CoapplicantIncome  LoanAmount  Loan_Amount_Term  \
0               5849                0.0         NaN             360.0   
1               4583             1508.0       128.0             360.0   
2               3000                0.0        66.0             360.0   
3               2583             2358.0       120.0             360.0   
4               6000                0.0       141.0             360.0   
..               ...                ...         ...               ...   
609             2900                0.0        71.0             360.0   
610             4106                0.0        40.0             180.0   
611             8072              240.0       253.0             360.0   
612             7583                0.0       187.0             360.0   
613             4583                0.0       133.0             360.0   

     Credit_History Property_Area Loan_Status  
0               1.0         Urban           Y  
1               1.0         Rural           N  
2               1.0         Urban           Y  
3               1.0         Urban           Y  
4               1.0         Urban           Y  
..              ...           ...         ...  
609             1.0         Rural           Y  
610             1.0         Rural           Y  
611             1.0         Urban           Y  
612             1.0         Urban           Y  
613             0.0     Semiurban           N  

[614 rows x 13 columns]>
home_loan_train.describe(include = "all")
Loan_ID Gender Married Dependents Education Self_Employed ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term Credit_History Property_Area Loan_Status
count 614 601 611 599 614 582 614.000000 614.000000 592.000000 600.00000 564.000000 614 614
unique 614 2 2 4 2 2 NaN NaN NaN NaN NaN 3 2
top LP001002 Male Yes 0 Graduate No NaN NaN NaN NaN NaN Semiurban Y
freq 1 489 398 345 480 500 NaN NaN NaN NaN NaN 233 422
mean NaN NaN NaN NaN NaN NaN 5403.459283 1621.245798 146.412162 342.00000 0.842199 NaN NaN
std NaN NaN NaN NaN NaN NaN 6109.041673 2926.248369 85.587325 65.12041 0.364878 NaN NaN
min NaN NaN NaN NaN NaN NaN 150.000000 0.000000 9.000000 12.00000 0.000000 NaN NaN
25% NaN NaN NaN NaN NaN NaN 2877.500000 0.000000 100.000000 360.00000 1.000000 NaN NaN
50% NaN NaN NaN NaN NaN NaN 3812.500000 1188.500000 128.000000 360.00000 1.000000 NaN NaN
75% NaN NaN NaN NaN NaN NaN 5795.000000 2297.250000 168.000000 360.00000 1.000000 NaN NaN
max NaN NaN NaN NaN NaN NaN 81000.000000 41667.000000 700.000000 480.00000 1.000000 NaN NaN
home_loan_train["Loan_Status"].value_counts()
Y    422
N    192
Name: Loan_Status, dtype: int64
home_loan_train["Loan_Status"].value_counts().plot(kind="bar")
<Axes: >
home_loan_train[["Loan_Status", "ApplicantIncome"]].groupby("Loan_Status").mean().plot(kind="bar")
<Axes: xlabel='Loan_Status'>
import seaborn as sns
sns.set_theme()
sns.relplot(data=home_loan_train, x="LoanAmount", y="ApplicantIncome", hue="Loan_Status")
<seaborn.axisgrid.FacetGrid at 0x13920f1c0>
sns.pairplot(data=home_loan_train.drop(columns=["Loan_ID"]), hue="Loan_Status")
<seaborn.axisgrid.PairGrid at 0x11f6bdbd0>
from sklearn.model_selection import train_test_split