ium_z487179/createDataset/Zadanie_LAB02.ipynb

884 lines
441 KiB
Plaintext
Raw Normal View History

2023-03-25 12:44:06 +01:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /Users/wojciechbatruszewicz/.kaggle/kaggle.json'\n",
"Downloading home-loan-approval.zip to /Users/wojciechbatruszewicz/InformatykaStudia/SEMESTR8/IUM/ZADANIA\n",
" 0%| | 0.00/12.6k [00:00<?, ?B/s]\n",
"100%|██████████████████████████████████████| 12.6k/12.6k [00:00<00:00, 18.6MB/s]\n"
]
}
],
"source": [
"!kaggle datasets download -d rishikeshkonapure/home-loan-approval"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Archive: home-loan-approval.zip\n",
" inflating: loan_sanction_test.csv \n",
" inflating: loan_sanction_train.csv \n"
]
}
],
"source": [
"!unzip -o home-loan-approval.zip"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 367 loan_sanction_test.csv\n"
]
}
],
"source": [
"!wc -l loan_sanction_test.csv"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 614 loan_sanction_train.csv\n"
]
}
],
"source": [
"!wc -l loan_sanction_train.csv"
]
},
{
"cell_type": "code",
2023-03-25 12:47:22 +01:00
"execution_count": 30,
2023-03-25 12:44:06 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<bound method NDFrame.head of Loan_ID Gender Married Dependents Education Self_Employed \\\n",
"0 LP001002 Male No 0 Graduate No \n",
"1 LP001003 Male Yes 1 Graduate No \n",
"2 LP001005 Male Yes 0 Graduate Yes \n",
"3 LP001006 Male Yes 0 Not Graduate No \n",
"4 LP001008 Male No 0 Graduate No \n",
".. ... ... ... ... ... ... \n",
"609 LP002978 Female No 0 Graduate No \n",
"610 LP002979 Male Yes 3+ Graduate No \n",
"611 LP002983 Male Yes 1 Graduate No \n",
"612 LP002984 Male Yes 2 Graduate No \n",
"613 LP002990 Female No 0 Graduate Yes \n",
"\n",
" ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term \\\n",
"0 5849 0.0 NaN 360.0 \n",
"1 4583 1508.0 128.0 360.0 \n",
"2 3000 0.0 66.0 360.0 \n",
"3 2583 2358.0 120.0 360.0 \n",
"4 6000 0.0 141.0 360.0 \n",
".. ... ... ... ... \n",
"609 2900 0.0 71.0 360.0 \n",
"610 4106 0.0 40.0 180.0 \n",
"611 8072 240.0 253.0 360.0 \n",
"612 7583 0.0 187.0 360.0 \n",
"613 4583 0.0 133.0 360.0 \n",
"\n",
" Credit_History Property_Area Loan_Status \n",
"0 1.0 Urban Y \n",
"1 1.0 Rural N \n",
"2 1.0 Urban Y \n",
"3 1.0 Urban Y \n",
"4 1.0 Urban Y \n",
".. ... ... ... \n",
"609 1.0 Rural Y \n",
"610 1.0 Rural Y \n",
"611 1.0 Urban Y \n",
"612 1.0 Urban Y \n",
"613 0.0 Semiurban N \n",
"\n",
"[614 rows x 13 columns]>"
]
},
2023-03-25 12:47:22 +01:00
"execution_count": 30,
2023-03-25 12:44:06 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"home_loan_train = pd.read_csv('loan_sanction_train.csv')\n",
"home_loan_test = pd.read_csv('loan_sanction_test.csv')\n",
"home_loan_train.head"
]
},
{
"cell_type": "code",
2023-03-25 12:47:22 +01:00
"execution_count": 31,
2023-03-25 12:44:06 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Loan_ID</th>\n",
" <th>Gender</th>\n",
" <th>Married</th>\n",
" <th>Dependents</th>\n",
" <th>Education</th>\n",
" <th>Self_Employed</th>\n",
" <th>ApplicantIncome</th>\n",
" <th>CoapplicantIncome</th>\n",
" <th>LoanAmount</th>\n",
" <th>Loan_Amount_Term</th>\n",
" <th>Credit_History</th>\n",
" <th>Property_Area</th>\n",
" <th>Loan_Status</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>614</td>\n",
" <td>601</td>\n",
" <td>611</td>\n",
" <td>599</td>\n",
" <td>614</td>\n",
" <td>582</td>\n",
" <td>614.000000</td>\n",
" <td>614.000000</td>\n",
" <td>592.000000</td>\n",
" <td>600.00000</td>\n",
" <td>564.000000</td>\n",
" <td>614</td>\n",
" <td>614</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>614</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>LP001002</td>\n",
" <td>Male</td>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>Graduate</td>\n",
" <td>No</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Semiurban</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>1</td>\n",
" <td>489</td>\n",
" <td>398</td>\n",
" <td>345</td>\n",
" <td>480</td>\n",
" <td>500</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>233</td>\n",
" <td>422</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>5403.459283</td>\n",
" <td>1621.245798</td>\n",
" <td>146.412162</td>\n",
" <td>342.00000</td>\n",
" <td>0.842199</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>6109.041673</td>\n",
" <td>2926.248369</td>\n",
" <td>85.587325</td>\n",
" <td>65.12041</td>\n",
" <td>0.364878</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>150.000000</td>\n",
" <td>0.000000</td>\n",
" <td>9.000000</td>\n",
" <td>12.00000</td>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2877.500000</td>\n",
" <td>0.000000</td>\n",
" <td>100.000000</td>\n",
" <td>360.00000</td>\n",
" <td>1.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3812.500000</td>\n",
" <td>1188.500000</td>\n",
" <td>128.000000</td>\n",
" <td>360.00000</td>\n",
" <td>1.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>5795.000000</td>\n",
" <td>2297.250000</td>\n",
" <td>168.000000</td>\n",
" <td>360.00000</td>\n",
" <td>1.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>81000.000000</td>\n",
" <td>41667.000000</td>\n",
" <td>700.000000</td>\n",
" <td>480.00000</td>\n",
" <td>1.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Loan_ID Gender Married Dependents Education Self_Employed \\\n",
"count 614 601 611 599 614 582 \n",
"unique 614 2 2 4 2 2 \n",
"top LP001002 Male Yes 0 Graduate No \n",
"freq 1 489 398 345 480 500 \n",
"mean NaN NaN NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN NaN NaN \n",
"\n",
" ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term \\\n",
"count 614.000000 614.000000 592.000000 600.00000 \n",
"unique NaN NaN NaN NaN \n",
"top NaN NaN NaN NaN \n",
"freq NaN NaN NaN NaN \n",
"mean 5403.459283 1621.245798 146.412162 342.00000 \n",
"std 6109.041673 2926.248369 85.587325 65.12041 \n",
"min 150.000000 0.000000 9.000000 12.00000 \n",
"25% 2877.500000 0.000000 100.000000 360.00000 \n",
"50% 3812.500000 1188.500000 128.000000 360.00000 \n",
"75% 5795.000000 2297.250000 168.000000 360.00000 \n",
"max 81000.000000 41667.000000 700.000000 480.00000 \n",
"\n",
" Credit_History Property_Area Loan_Status \n",
"count 564.000000 614 614 \n",
"unique NaN 3 2 \n",
"top NaN Semiurban Y \n",
"freq NaN 233 422 \n",
"mean 0.842199 NaN NaN \n",
"std 0.364878 NaN NaN \n",
"min 0.000000 NaN NaN \n",
"25% 1.000000 NaN NaN \n",
"50% 1.000000 NaN NaN \n",
"75% 1.000000 NaN NaN \n",
"max 1.000000 NaN NaN "
]
},
2023-03-25 12:47:22 +01:00
"execution_count": 31,
2023-03-25 12:44:06 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"home_loan_train.describe(include = \"all\")"
]
},
{
"cell_type": "code",
2023-03-25 12:47:22 +01:00
"execution_count": 32,
2023-03-25 12:44:06 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Y 422\n",
"N 192\n",
"Name: Loan_Status, dtype: int64"
]
},
2023-03-25 12:47:22 +01:00
"execution_count": 32,
2023-03-25 12:44:06 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"home_loan_train[\"Loan_Status\"].value_counts()"
]
},
{
"cell_type": "code",
2023-03-25 12:47:22 +01:00
"execution_count": 33,
2023-03-25 12:44:06 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Axes: >"
]
},
2023-03-25 12:47:22 +01:00
"execution_count": 33,
2023-03-25 12:44:06 +01:00
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
2023-03-25 12:47:22 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAGdCAYAAAA8F1jjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAm0klEQVR4nO3df1DUd37H8df+mAUE1oADgqQ2FoM7NgkmhRmuE8Sa0purph2GP+w1XM8fxxlzhSEx2EnEO42YaOVEaYv0FO+YSQzk1Mmk6dxciTPXTFLHA2fS2kM0OEoSBckhYRWBlV36h8OeW7w7fizsh93nY4YJfn/A+5v9Lj6z3+8Sy+jo6KgAAAAMZA31AAAAAL8NoQIAAIxFqAAAAGMRKgAAwFiECgAAMBahAgAAjEWoAAAAYxEqAADAWPZQDxAMo6Oj8vn4vXWRwmq18HgDYYrnd+SwWi2yWCy/d7uwCBWfb1Q3bw6EegzMArvdqoSEWLnddzQy4gv1OACCiOd3ZElMjJXN9vtDhUs/AADAWIQKAAAwFqECAACMRagAAABjESoAAMBYhAoAADAWoQIAAIxFqAAAAGMRKgAAwFiECgAAMBahAgAAjEWoAAAAYxEqAADAWIQKAAAwFqECAACMZQ/1AJg6q9Uiq9US6jFmlc1mDfhnJPH5RuXzjYZ6DACYVYTKHGW1WvTQQ/Mi8i9sSXI6Y0I9wqzzen366qs7xAqAiEKozFFWq0U2m1VVb53TFzduhXoczLCHF8br5ef+RFarhVABEFEIlTnuixu3dPlaf6jHAABgRkTmdQMAADAnECoAAMBYhAoAADAWoQIAAIxFqAAAAGMRKgAAwFiECgAAMBahAgAAjEWoAAAAYxEqAADAWFMOlStXrujJJ5/UqVOn/MsuXLigoqIirVixQqtWrVJ9fX3APj6fTzU1NcrNzVVmZqY2btyozs7OqU8PAADC2pRC5e7du3r55Zd1584d/7K+vj5t2LBBjzzyiE6ePKmSkhIdOnRIJ0+e9G9TW1urxsZGVVZWqqmpSRaLRcXFxfJ4PNM/EgAAEHamFCr/9E//pNjY2IBl77zzjhwOh3bu3Kn09HQVFhZq/fr1OnLkiCTJ4/Ho2LFjKikpUV5enlwul6qrq3Xjxg01NzdP/0gAAEDYmXSotLS0qKmpSfv27QtY3traquzsbNntv/kfMufk5OjKlSvq7e1Ve3u7BgYGlJOT41/vdDq1fPlytbS0TOMQAABAuLL//k1+w+12a9u2baqoqFBqamrAuu7ubmVkZAQsS05OliRdv35d3d3dkjRuv+TkZHV1dU168P/Pbo+s+4Jttsg6XtzD445wNnZ+c57jfpMKlZ07d2rFihV69tlnx60bGhqSw+EIWBYVFSVJGh4e1uDgoCQ9cJv+/v5JDf3/Wa0WJSTE/v4NgTnO6YwJ9QjAjOM8x/0mHCrvvvuuWltb9W//9m8PXB8dHT3uptjh4WFJ0rx58xQdHS3p3r0qY5+PbRMTM72T0ucbldt95/dvGEZsNitP5gjkdg/K6/WFegxgRoz9XOM8jwxOZ8yEXj2bcKicPHlSvb29WrVqVcDyH/zgB6qvr9eiRYvU09MTsG7szwsXLtTIyIh/2eLFiwO2cblcEx3jtxoZ4aRG+PN6fZzrCHuc57jfhEOlqqpKQ0NDAcv+4i/+QqWlpfrLv/xL/fu//7saGxvl9Xpls9kkSWfOnNGSJUu0YMECxcfHKy4uTmfPnvWHitvtVltbm4qKioJ4SAAAIFxMOFQWLlz4wOULFixQWlqaCgsLdfToUW3fvl3f+c539D//8z9qaGjQrl27JN27N6WoqEhVVVVKTExUWlqa9u/fr5SUFOXn5wfnaAAAQFiZ1M20v8uCBQt09OhR7dmzRwUFBUpKStK2bdtUUFDg36a0tFQjIyOqqKjQ0NCQsrOzVV9fP+4GWwAAAEmyjI6OjoZ6iOnyen26eXMg1GPMKrvdqoSEWJUd+IUuX5veu6ZgvvS0+Tr40ir19Q1w7R5ha+znGud5ZEhMjJ3QzbS8WR0AABiLUAEAAMYiVAAAgLEIFQAAYCxCBQAAGItQAQAAxiJUAACAsQgVAABgLEIFAAAYi1ABAADGIlQAAICxCBUAAGAsQgUAABiLUAEAAMYiVAAAgLEIFQAAYCxCBQAAGItQAQAAxiJUAACAsQgVAABgLEIFAAAYi1ABAADGIlQAAICxCBUAAGAsQgUAABiLUAEAAMYiVAAAgLEIFQAAYCxCBQAAGItQAQAAxpp0qPT29qq8vFw5OTl68skn9d3vflcdHR3+9a+88oqWLVsW8LFy5Ur/ep/Pp5qaGuXm5iozM1MbN25UZ2dncI4GAACElUmHypYtW/T555/ryJEjOnHihKKjo7V+/XoNDg5Kki5evKjnn39eH330kf/j3Xff9e9fW1urxsZGVVZWqqmpSRaLRcXFxfJ4PEE7KAAAEB4mFSp9fX16+OGHtXv3bj3++ONKT0/XCy+8oC+//FKffvqpvF6vOjo69PjjjyspKcn/kZiYKEnyeDw6duyYSkpKlJeXJ5fLperqat24cUPNzc0zcoAAAGDumlSoJCQk6MCBA3r00UclSb/+9a9VX1+vlJQULV26VFevXtXw8LDS09MfuH97e7sGBgaUk5PjX+Z0OrV8+XK1tLRM4zAAAEA4sk91xx07duidd96Rw+HQ4cOHNW/ePF26dEkWi0UNDQ368MMPZbValZeXp7KyMsXHx6u7u1uSlJqaGvC1kpOT1dXVNb0DsUfWfcE2W2QdL+7hcUc4Gzu/Oc9xvymHyre//W2tW7dOb7/9tr73ve/p+PHj+vTTT2W1WpWWlqa6ujp1dnZq3759unTpkhoaGvz3sTgcjoCvFRUVpf7+/ikfhNVqUUJC7JT3B+YKpzMm1CMAM47zHPebcqgsXbpUkrR792598sknevPNN/X6669r/fr1cjqdkqSMjAwlJSVp3bp1On/+vKKjoyXdu1dl7HNJGh4eVkzM1E9Mn29UbvedKe8/F9lsVp7MEcjtHpTX6wv1GMCMGPu5xnkeGZzOmAm9ejapUOnt7dWZM2f0jW98QzabTZJktVqVnp6unp4eWSwWf6SMycjIkCR1d3f7L/n09PRo8eLF/m16enrkcrkmM8o4IyOc1Ah/Xq+Pcx1hj/Mc95vUhcCenh5t3bpVv/zlL/3L7t69q7a2NqWnp2vr1q3atGlTwD7nz5+XdO8VGJfLpbi4OJ09e9a/3u12q62tTVlZWdM5DgAAEIYm9YqKy+XS008/rV27dqmyslJOp1N1dXVyu91av369Ll68qC1btujw4cNas2aNrly5otdee01r1671vxOoqKhIVVVVSkxMVFpamvbv36+UlBTl5+fPyAECAIC5a1KhYrFYdPDgQf3whz9UWVmZbt26paysLL311ltatGiRFi1apEOHDqmurk51dXWKj4/Xs88+q7KyMv/XKC0t1cjIiCoqKjQ0NKTs7GzV19ePu8EWAADAMjo6OhrqIabL6/Xp5s2BUI8xq+x2qxISYlV24Be6fG3q75jC3JCeNl8HX1qlvr4Brt0jbI39XOM8jwyJibETupmWN6sDAABjESoAAMBYhAoAADAWoQIAAIxFqAAAAGMRKgAAwFiECgAAMBahAgAAjEWoAAAAYxEqAADAWIQKAAAwFqECAACMRagAAABjESoAAMBYhAoAADAWoQIAAIxFqAAAAGMRKgAAwFiECgAAMBahAgAAjEWoAAAAYxEqAADAWIQKAAAwFqECAACMRagAAABjESoAAMBYhAoAADAWoQIAAIxFqAAAAGMRKgAAwFiTDpXe3l6Vl5crJydHTz75pL773e+qo6PDv/7ChQsqKirSihUrtGrVKtXX1wfs7/P5VFNTo9zcXGVmZmrjxo3q7Oyc/pEAAICwM+lQ2bJliz7//HMdOXJEJ06cUHR0tNavX6/BwUH19fVpw4YNeuSRR3Ty5EmVlJTo0KFDOnnypH//2tpaNTY2qrKyUk1NTbJYLCouLpbH4wnqgQEAgLnPPpmN+/r69PDDD2vLli169NFHJUkvvPC
2023-03-25 12:44:06 +01:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"home_loan_train[\"Loan_Status\"].value_counts().plot(kind=\"bar\")"
]
},
{
"cell_type": "code",
2023-03-25 12:47:22 +01:00
"execution_count": 34,
2023-03-25 12:44:06 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Axes: xlabel='Loan_Status'>"
]
},
2023-03-25 12:47:22 +01:00
"execution_count": 34,
2023-03-25 12:44:06 +01:00
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
2023-03-25 12:47:22 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjMAAAGyCAYAAAARVkUiAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAyPklEQVR4nO3de1iUZcLH8d8MIycRQ1IwzUOYkbmCJq9kka6utZt2YGmrNWrVQtPSEkMz1NQ0az1mrmkK5b5lauJ28rXysNbmuiaW25ZiaYamIooEgsDADO8fLpPjiYMHuJnv57r2WngONzfMPFdfn+eZGUt5eXm5AAAADGWt7QkAAABcCGIGAAAYjZgBAABGI2YAAIDRiBkAAGA0YgYAABiNmAEAAEYjZgAAgNFstT2By6G8vFxOJ+8N6CmsVguPN1BPcXx7DqvVIovFUqVtPSJmnM5yHTtWWNvTwGVgs1kVFNRQ+fknVFbmrO3pALiIOL49S5MmDeXlVbWY4TITAAAwGjEDAACMRswAAACjETMAAMBoxAwAADCaR7yaCQAgOZ1OORxltT2NGnM6LSou9pLdXiKHg5dnm87Lyyar9eKcUyFmAKCeKy8vV37+MRUVFdT2VC7Y0aNWOZ28LLu+8PMLUGBgkyq/n8y5EDMAUM9VhExAQJC8vX0u+D8ctcnLy8JZmXqgvLxcdnuJCgpyJUmNGwdf0HjEDADUY06nwxUyAQGBtT2dC2azWXnDvHrC29tHklRQkKtGjYIu6JITNwADQD3mcDgk/fIfDqAuqXheXui9XMQMAHgAky8tof66WM9LLjMBgIeyWi2yWi9/5DidfPgvLi5iBgA8kNVq0RVX+MvL6/KfoHc4nPr55xMEDS4aYgYAPJDVapGXl1Uz3tqmnw4fv2w/t2VIIz394I2yWi0XFDOFhQW6667b5e/fUKtWrVaDBg0u4iylJ54YrObNr1Jy8kR9+WW6Rox4TO+8876aN7/qov6cc/n66+0qL5ciIiLPmA/ORMwAgAf76fBx7TmQV9vTqLZ16z5RUFATHTt2TJ9+ukG/+c3tl+xn/epXEXrvvY90xRVBl+xnnG7YsEf17LPPuWIG50fM1GO1dT28NlWcMq+NU+e1jfsQ4ElWr35f3brdpCNHsvXuu2mXNGYaNGig4OArL9n4uHDETD1Vm9fD64LAQL/ansJlx30I8BQ//rhXO3Z8o/79H1JRUZGmTp2oH3/cqzZt2ko6eUmmffvrlJeXp08/3aBGjQL1+9/fp/j4P8lisbguG7344kzNmzdHR48e0Q03dNLIkUmuMU51+mWmsrIyLVmSojVrPlRu7jG1bt1WgwcPU3R0d0nS559/prfeekO7d38vh8OhsLB2Gjz4cUVFdXPNLzy8g44fz9enn26Q01muW2/tqcTEMfL399ctt3SVJL3wwiR99dW2s15auuWWrho9OlkbNqzV11//W4GBgYqLu08PPTTQtc3Wrf9SSspr+v77XQoMbKzbb79DCQlD5eXlpZKSYv31r6/rk08+Uk7OEbVu3UYDBw7Wrbf2lCT93/99oCVLUjR48DAtWDBPOTk5iozsorFjx2vp0v/VmjUfqkGDBrrvvj+6/czVq9/X0qV/1aFDh9S8eXPdfXec7r33/ov2sQXnQszUU7V1PRy142LdhwCYYPXq9+Xn56fo6JtVVlYmb29vvftump566mnXNn/720r17XuXUlLe1I4d32jGjGmyWKT4+AGubV5+eaYSE0erWbNQzZ8/VyNGPKalS9MUEBBw3p8/d+5MbdiwViNHjlF4+PVas+ZDjR07Sqmpb6mkpETPPvu0hg4dofHjn1dhYaEWLZqvyZPHu93bk5a2XA88EK/XXlui77/fpalTJ6ply6s1YMCjeu+9j3T33b/ViBGjdMcdd55zHvPnv6yRI0crMXGMPv74/7Rw4V/UqVOkIiI669tvv9GoUSP0hz/8UWPHTtDhw1maPHmcrFarBg8epokTk7VrV4YSE8eoVavWWr/+EyUnJ2natBm65ZYekqTDh7P0t7+t1AsvzFBR0QmNGZOoP/3pj+rb9y699tobWrv2Iy1c+BfdfHOMrrmmnd57b5UWLJinxMTR6tCho77/fpdmz/6zjh7N1rBhT17AI145YqaeM/V6OACcTVlZmT755P/UvXuMfH19JUnR0Tfro49W67HHnnAta9WqjUaNekYWi0WtW7fRjz/u1TvvLNODD/7JNdYTTzylm266RZL03HPP6/e/76t16z7WPffEnfPnnzhRqA8+eFdPPvm0evfuI0l69NHH5HA4dOLECXl7N9CTTz6tuLj7XPv84Q8PaOTIJ3TsWI5CQkIlSa1bt9WQIY//d66ttW7dJ/r66+2S5LqkFRAQcN6w+t3v7tTtt98hSUpIGKpVq97R119vV0REZ73zztu6/vobNHz4yP/+vDZKSkrW0aNH9OOPe/WPf3yql16arZtvjpEkDRyYoN27v9OSJamumCkrK1Ni4mhdc007SVLXrv+jb7/9j4YNGyGLxaL4+AF6/fVF+uGHPbrmmnZasiRFDz00UH36/FaS1KJFSxUWFmrmzJf0yCOPycfn0r1xIzEDAAaq6j1xTmf9uW/OYpE2bdqknJwc9e59m2t579636bPP/q516z5Wv353S5I6d+7i9oZsHTv+Sm+9tUR5eb/8465z5xtdXwcGNtbVV7fSDz/sPu8c9u3LVGlpqW644VduyyvCRJIaNWqst95aon37MrV//z59//0uSXL7gMzWrdu47R8QEKCCguqdRT99jIYNG6q0tFSStGfP967LWhV69Pi1JGn9+rWSpE6dIt3WR0R00YIF89yWtWr1y8/w9fVV8+ZXuf6uFXFit9uVm5ur7OzDWrx4gV5//TXXPk6nU3Z7iQ4dOnjWS3gXCzEDAIapzj1xxcVeOnrUKi8vi2y2X7av7fvpavrzV69+X5I0fvyYM9a9//4q3XNPrCwWixo0aOD2+1Z0jbe3zfWzfXy83bYpLy+XzWaTzWaVxWKRxXLyb3bqCwt8fLwlnfyMqFP3rfDVV9v05JOP66abblZkZGfddttvVVJSrNGjE+XlZXWN7ePjc9r8LK5xK1itvzxmZ3unXG9v7zOWlZeX/3cc23neXffsl6KdTodsNvcsOP37c41ZXn4y1EaMGKmuXbudsb7ijNSlQswAgGGqc09coJ9Vt0UGqMxSKIvV7lru52Or1RvlD+cUqqikep/Hk5f3sz7f9A/9uvfv1O/u+9zWrX5/pTasW61P/7lNJaUObf/319p/yt9m07/S1SykufKKLDqSe0KS9Ok/0/WrTl0kSfn5P2vfvn26ve+92n/4uEpKHSosLtX+w8dd2x86WqDGVwTJy2bTpi1fyjewuWv8Z54eopu699TOHV+rQ8dIPZE40bVuzYdpkqSDRwvk8HIfu0JhcalKSh1uy47lF2v/4eNqYPOq1t9Jktq0uUY7d+5wW7ZixVJ99NFqjR//vKST72VTcZlJkv797+01PnsSFNREQUFNdODAAd1zz9Wu5evXf6LPPvu7kpMn1WjcqiJmAMBQVbkn7spAm0od/rKXOiWLw7W84mxDy5BGl3SOp6v4efYyp0rsjkq2drdh3UdyOhz63Z33K6R5K7d1/e7pr40b1mjN6r/J6SxXxo6vtfR/U9Q9pre+y/hGH61epfgBw1Rid8j+30/dXvTqLD0yJFF+/gF6+38X6IqgJuoSFaMSu+PkWx04yt22P/k3bKDbfhurt99cLD//Rmp5dRt99veP9NO+H/WrYd2UlXVI2774XF//e7uaBDfVjm+/0rK3UiRJJ04UnzF2Bafj5FsrVCzz9fXTvswfdTTnmK4MblLtv3P//g/p0Ucf1qJFr+r22+/QgQM/6a9/fV1xcfepbdtrdNNNN2vmzBclSVdf3Urr13+izz//VJMnT6v2z5JOnrHp3/9hvfbaXxQSEqKbbrpFP/ywWzNnvqTu3W8561mki4mYAQAPVFbmVJnDqacfvLHyjS/2z3Y4VVbmrHzD03z29490w6+66KoWrc5Y1yykubp2i9Hmzzco9KqWujHqZv20f6+
2023-03-25 12:44:06 +01:00
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"home_loan_train[[\"Loan_Status\", \"ApplicantIncome\"]].groupby(\"Loan_Status\").mean().plot(kind=\"bar\")"
]
},
{
"cell_type": "code",
2023-03-25 12:47:22 +01:00
"execution_count": 35,
2023-03-25 12:44:06 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2023-03-25 12:47:22 +01:00
"<seaborn.axisgrid.FacetGrid at 0x13f929ae0>"
2023-03-25 12:44:06 +01:00
]
},
2023-03-25 12:47:22 +01:00
"execution_count": 35,
2023-03-25 12:44:06 +01:00
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHkCAYAAADfFDApAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAC0A0lEQVR4nOzdd3xUVdrA8d+909NDCAm9Q+ggBCKIIM1eENu6oIKyrgXXjop9RfdVFEWXZRXYxVUXUFDXiggqUqRXIfQOSSA9k0y7975/BAbGTDAJA5kkz/fz2ZXcc+bcczLtyamKYRgGQgghhBB1mFrdFRBCCCGEqG4SEAkhhBCizpOASAghhBB1ngREQgghhKjzJCASQgghRJ0nAZEQQggh6jwJiIQQQghR50lAJIQQQog6TwIiIYQQQtR55uquQG2laTo5Oc4K51dVhXr1IsnJcaLrNXfzcGlHeJF2hBdpR3gJVTsSE6NDWCtRXaSHKEyoqoKiKKiqUt1VOSvSjvAi7Qgv0o7wUlvaIUJDAiIhhBBC1HkSEAkhhBCizpOASAghhBB1ngREQgghhKjzJCASQgghRJ0nAZEQQggh6jwJiIQQQghR50lAJIQQQog6TwIiIYQQQtR5EhAJIYQQos6TgEgIIYQQdZ4EREIIIYSo8+S0eyFESFnMChatGAMFr8mBz1dzT0MXQtQdEhAJIUJCVRUiKcK5/nsKti4F1URUt8FEdbgIp+7AkLhICBHGJCASQoREJEVkfvgMWmGO/1rejx/i3LKE+jc8RZHhqMbaCSHEmckcIiHEWbOYwbn5x4Bg6CTv8YN4Dm3FZJKPGyFE+Kr2Tyiv18vkyZMZOHAgPXr04NZbb2XdunX+9G3btjFy5Ei6d+/OwIEDmTFjRsDjdV1nypQp9O/fn27dujFmzBj2798fkCcUZQghymfWXBRvW1puunPzD5jxnscaCSFE5VR7QPSPf/yDefPm8dJLL/HZZ5/RqlUrxo4dS2ZmJrm5uYwePZoWLVowb948xo0bx1tvvcW8efP8j586dSqzZ8/mpZdeYs6cOSiKwtixY/F4PAAhKUMI8TsUBUzlj8ArZktpHiGECFPVHhAtWrSIq666iosuuojmzZvzxBNPUFRUxIYNG5g7dy5Wq5Xnn3+e1q1bM2LECO644w7ee+89ADweDzNnzmTcuHEMGDCAlJQUJk+eTGZmJgsXLgQISRlCiDPzKA6iug8rNz3qgsvw6qbzWCMhhKicag+I4uLi+OGHHzh06BCapjFnzhysVisdOnRgzZo1pKamYjaf+sszLS2NvXv3kp2dTXp6Ok6nk7S0NH96TEwMHTt2ZPXq1QAhKUMIcWaapmNv0wtrw9Zl0hxtemKq3wJdl2VmQojwVe2rzCZMmMBDDz3E4MGDMZlMqKrKW2+9RbNmzcjIyKBdu3YB+Rs0aADAkSNHyMjIAKBhw4Zl8hw9ehQgJGUIIX6fU3dQ79pH8WXuxrlpEYpqIqrHpaj1muLUbNVdPSGEOKNqD4h2795NTEwMf//730lKSuLjjz9m/PjxfPDBB7hcLqxWa0B+m630g9XtdlNSUgIQNE9+fj5ASMqoKrO54h1wJ1fg1PSVONKO8HK+2+EmErVxN2KadMZAwaerGIZBJd4KQcnzEV6kHaI2qtaA6PDhwzz22GP8+9//plevXgB06dKFXbt28fbbb2O328tMbHa73QBERERgt9uB0nlAJ/99Mo/DUbrnSSjKqApVVYiPj6z042JiasdeLdKO8CLtCC/SjvBSW9ohzk61BkSbNm3C6/XSpUuXgOvdunVjyZIlNGrUiKysrIC0kz8nJSXh8/n815o1axaQJyUlBYDk5OSzLqMqdN2goKC4wvlNJpWYGAcFBSVoml7l+1Y3aUd4kXaEF2lHeAlVO6ryx68IP9UaEJ2ct7N9+3a6du3qv75jxw6aN29O9+7dmT17NpqmYTKVrlBZsWIFLVu2JCEhgejoaKKioli5cqU/mCkoKGDr1q2MHDkSgNTU1LMuo6p8vsq/wTRNr9Ljwo20I7xIO8KLtCO81JZ2iLNTrQOnXbt2pVevXowfP55ffvmFffv28eabb7JixQr+9Kc/MWLECIqKipgwYQK7du1i/vz5zJo1i7vvvhsonfczcuRIJk2axKJFi0hPT+ehhx4iOTmZoUOHAoSkDCGEEELUbtXaQ6SqKlOnTuXNN9/kySefJD8/n3bt2vHvf/+b7t27AzB9+nQmTpzI8OHDSUxM5PHHH2f48OH+Mh544AF8Ph9PP/00LpeL1NRUZsyY4Z8knZCQcNZlCCGEEKJ2UwxDzqA+FzRNJyfHWeH8ZrNKfHwkubnOGt11K+0IL9KO8CLtCC+hakdiYnQIayWqi6w1FEIIIUSdJwGREEIIIeo8CYiEEEIIUedJQCSEEEKIOk8CIiGEEELUeRIQCSGEEKLOk4BICCGEEHWeBERCCCGEqPMkIBJCCCFEnScBkRBCCCHqPAmIhBBCCFHnSUAkhBBCiDpPAiIhhBBC1HkSEAkhhBCizpOASAghhBB1ngREQgghhKjzJCASQgghRJ0nAZEQQggh6jwJiIQQQghR50lAJIQQQog6TwIiIYQQQtR5EhAJIYQQos6TgEgIIYQQdZ4EREIIIYSo8yQgEkIIIUSdJwGREEIIIeo8CYiEEEIIUedJQCSEEEKIOk8CIiGEEELUeRIQCSGEEKLOk4BICCGEEHWeBERCCCGEqPMkIBJCCCFEnScBkRBCCCHqPAmIhBBCCFHnVWtAtHLlStq3bx/0f4MHDwZg27ZtjBw5ku7duzNw4EBmzJgRUIau60yZMoX+/fvTrVs3xowZw/79+wPyhKIMIYQQQtRe1RoQ9ejRg6VLlwb8b+bMmZjNZv785z+Tm5vL6NGjadGiBfPmzWPcuHG89dZbzJs3z1/G1KlTmT17Ni+99BJz5sxBURTGjh2Lx+MBCEkZQgghhKjdqjUgslqtJCYm+v8XFxfHK6+8wrBhw7jxxhuZO3cuVquV559/ntatWzNixAjuuOMO3nvvPQA8Hg8zZ85k3LhxDBgwgJSUFCZPnkxmZiYLFy4ECEkZQgghhKjdwmoO0YcffsjRo0d58sknAVizZg2pqamYzWZ/nrS0NPbu3Ut2djbp6ek4nU7S0tL86TExMXTs2JHVq1eHrAwhhBBC1G7m389yfrjdbqZNm8btt99OgwYNAMjIyKBdu3YB+U6mHTlyhIyMDAAaNmxYJs/Ro0dDVkZVmc0VjzdNJjXgvzWVtCO8SDvCi7QjvNSWdojQCJuA6PPPP8ftdjNq1Cj/NZfLhdVqDchns9mA0gCqpKQEIGie/Pz8kJVRFaqqEB8fWenHxcQ4qnzPcCLtCC/SjvAi7QgvtaUd4uyETUD02WefMWzYMOLj4/3X7HZ7mYnNbrcbgIiICOx2O1A6D+jkv0/mcTgcISujKnTdoKCguML5TSaVmBgHBQUlaJpe5ftWN2lHeJF2hBdpR3gJVTuq8sevCD9hERDl5OSwfv167r777oDrycnJZGVlBVw7+XNSUhI+n89/rVmzZgF5UlJSQlZGVfl8lX+DaZpepceFG2lHeJF2hBdpR3ipLe0QZycsBk7XrVuHoij07t074Hpqaipr165F0zT/tRUrVtCyZUsSEhJISUkhKiqKlStX+tMLCgrYunUrvXr1ClkZQgghhKjdwiIgSk9Pp2nTpmWGqEaMGEFRURETJkxg165dzJ8/n1mzZvl7kqxWKyNHjmTSpEksWrSI9PR0HnroIZKTkxk6dGjIyhBCCCFE7RYWQ2bHjx8nLi6uzPWEhASmT5/OxIkTGT58OImJiTz++OMMHz7cn+eBBx7A5/Px9NNP43K5SE1NZcaMGf5J0qEoQwghhBC1m2IYhlHdlaiNNE0nJ8dZ4fxms0p8fCS5uc4aPZYt7Qgv0o7wIu0IL6FqR2JidAhrJapLWAyZCSGEEEJUJwmIhBBCCFHnSUAkhBBCiDpPAiIhhBBC1HkSEAkhhBCizpOASAghhBB1ngREQgghhKjzJCASQgghRJ0nAZEQQggh6jwJiIQQQghR50lAJIQ
"text/plain": [
"<Figure size 605.847x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import seaborn as sns\n",
"sns.set_theme()\n",
"sns.relplot(data=home_loan_train, x=\"LoanAmount\", y=\"ApplicantIncome\", hue=\"Loan_Status\")\n"
]
},
{
"cell_type": "code",
2023-03-25 12:47:22 +01:00
"execution_count": 36,
2023-03-25 12:44:06 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2023-03-25 12:47:22 +01:00
"<seaborn.axisgrid.PairGrid at 0x148786080>"
2023-03-25 12:44:06 +01:00
]
},
2023-03-25 12:47:22 +01:00
"execution_count": 36,
2023-03-25 12:44:06 +01:00
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABUIAAATMCAYAAABC9v0RAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdZ3gc1fn38e+ZmW3qstx7wxV3G0wngOkQWiAFSAiQCvxDEjrpkMoTEkJIQoBASAESaoBQQm82GOPecK9ykdWlLVOeF7JlC0m2JKvv73NduYLn7I7OzuzMztxzn/uYIAgCRERERERERERERLoxq6M7ICIiIiIiIiIiItLWFAgVERERERERERGRbk+BUBEREREREREREen2FAgVERERERERERGRbk+BUBEREREREREREen2FAgVERERERERERGRbk+BUBEREREREREREen2FAgVERERERERERGRbk+BUBEREREREREREen2nI7uQHfleT67dlW22foty9CjRya7dlXi+0Gb/R1pOe2j9tOrV/Z+29v6eNyX9nvLaLu1XGfadgc6FqF9j0foXNunLelzdi+t8Tk7+rcxXfZVU2hb7JWu26Kjj8fOJl2/B51Juu6DplyrSvenjNAuyrIMxhgsy3R0V6QR2kfpSfu9ZbTdWk7bbv/SZfvoc3Yv3eFzdofP0Fq0LfbSthDQ96Az0D6QdKZAqIiIiIiIiIiIiHR7CoSKiIiIiIiIiIhIt6dAqIiIiIiIiIiIiHR7mixJRKQVWJYhYpLYbpxUcQVhwngmRBCkT/FxEem+IrZPyK8C38e3IyRMFM/T+U0kHdU9H4RJmAw8z+/obkkXEbF9wn4lqV1lRAgR2DF9f0SkXSkQKiJykBw7IBrfwa6X7iOxeQUA0aETyT/pMqqcHmk1E6OIdC+WZcikgpLX/07V8tkQ+IR6DqLHyZfj5gwi6dsd3UURaSfGGLKsSkrffpSdS94B3yXUoz/5s76Mlz+UpK9bS2mcMYYsU0npW4+wc+k74HuECgaQf9Ke749+T0SkfXT40PhUKsWdd97J8ccfz5QpU/j85z/PvHnzatuXLVvGxRdfzOTJkzn++OO5//7767zf933uuusujjnmGCZNmsSXv/xl1q9fX+c1rbEOEZHGRN0yCh++pTYIChBft5Btf7uVzKC8A3smInJwMqhi+yM/pmrZuxDUZOykdm5k2z9+RKiyULPNiqSRDFPFjsdup3LRG+C7AKR2bWH7o7fhlG7U+UD2q+b7cxuVi98E3wMgVbSZ7Y/+BKdM3x8RaT8dHgj9wx/+wOOPP85tt93GU089xfDhw7nyyivZtm0bxcXFXHbZZQwdOpTHH3+cq6++mt/+9rc8/vjjte+/5557eOSRR7jtttt49NFHMcZw5ZVXkkwmAVplHSIijQk7UP7BswRu/fOFH6+katnbOI4u7ESk67Esg7t9DW7JtgZaA4pffZiI0bWSSDowxuAXbyFVtLnB9uJXHiRCvJ17JV2FZRn84s2kdm1psL34lYf0/RGRdtPhgdBXXnmFM888k6OPPpohQ4Zw4403UlFRwfz583nssccIh8P88Ic/ZMSIEZx//vl86Utf4s9//jMAyWSSBx54gKuvvprjjjuOMWPGcOedd7Jt2zZefvllgFZZh4hIY2wvTnz94kbbq9fMxwlS7dgjEZHW4TgW1Ws+arQ9sXkFts5vImnBcSzi6xc12p7cvl7nA2mUbVtUr13QaHty21rswG3HHolIOuvwQGheXh6vvfYamzZtwvM8Hn30UcLhMGPHjmXu3LnMmDEDx9lbb2bmzJmsXbuWoqIili9fTmVlJTNnzqxtz8nJYdy4cXzwwQcArbKOrugHD7zPE2+u6ehuiHR7gbGxM3Iabbcz8wiMah6JSNcTBODk9Gy03crIJkAZ7yLpIAgCnOyCRttNOEZgOvzWUjqpIAj2+3tiIhkERr8nItI+Oryi9S233MK1117LiSeeiG3bWJbFb3/7WwYPHkxhYSGjRo2q8/revXsDsGXLFgoLCwHo169fvdds3boVoFXW0VKO03YXA7Zt1fn/fe0sjbNxewUbt1dw4Qkj26wPsn/720fS/trqePStKDkzz2HH479osD1nxhkkjY3T4Wfbzk3HS8t1xW3Xlr+Pn9QVt09LtMXnDIKAjNFHUPLmo0D9Sd9yZpyJ62ThtOOEcNqfraujrlXTTXfZFtHhk8FYtfWC95U97RS8UCaOt/9gVnfZFm2hPX8bO0Js+BSKzUMNfn9ypp2G52ThaPL4dqNjUdJZh9+ar169mpycHH7/+9/Tp08f/vWvf3HDDTfwt7/9jXg8TjgcrvP6SCQCQCKRoLq6GqDB15SWlgK0yjpawrIM+fmZLX5/U+XkxOote3vx3lpemVlRwiFlo3WkhvaRtK+2Ph5dZzTZU0+hfN6LdZbnHXMRkd6DyIi1/bmgu9Dx0nJdZdu11+/jJ3WV7XOwWvtz+kmb3uf8H9ufvqvOzWts+GRyJp2Ak53Rqn+vqbQ/D15HXqumq66+LfyUQ5/zr2PbE3fUTnYDEBk0jrwZZ+BkZzV5XV19W7S2jvptbE9+yqHPed9h25O/rvv9GTye3Omn4mR378/fWelYlHTUoYHQzZs3c9111/Hggw8yffp0ACZMmMCqVav43e9+RzQarTdhUSKRACAjI4NoNArU1Pnc8997XhOL1RzQrbGOlvD9gLKyqha//0Bs2yInJ0ZZWTWeV/fR2ZLVO7Atg+cHzFtayOjBeW3WD2nc/vaRtK4DXTi29fEIDhkzLyB76inE1y/G2DaRwYfiOpmUxS2IV7bh3+4edLy0XGfadk25iWv747GuzrR92lJbfk5nwCT6f+W3JDYuw49XEh08jiAjn3I3DMXte37T/my6jv5tTJd91RTdaVs4fcbWnA82rcCvKiUycCxk9aDcjTTpfNCdtkVzdPTx2Fk4fcfv/v4sJ6guJzJwDEFmQZO/P9J6dCxKOuvQQOjChQtJpVJMmDChzvJJkybx5ptv0r9/f7Zv316nbc+/+/Tpg+u6tcsGDx5c5zVjxowBoG/fvge9jpZy3bY/oXieX+/vFJcnGNovm9Wby9iyo4IR/RuvXyhtr6F9JO2vrfeBSwisHkTGnEBOTozi4srdf1P7vjl0vLRcV9p2HdHPrrR9DkZbfE4XQ5xs7CEzMQYqXB9c6Mjzm/Zn6+ioa9V01R22hQvEycIeNB1jDFWeT9CC80F32BatLR22x57vT3jo4eTlZuh6uRPQsSjpqEMLQuypy7lixYo6y1euXMmQIUOYMWMGH374IZ63N3X+vffeY9iwYRQUFDBmzBiysrKYM2dObXtZWRlLly6tzTBtjXV0NaUVSXIywsQiDqWVyQO/QURaTTo9URWR9KKbJRHZw/MCXNcnaL8SwdKN+O1YW1pE5JM6NBA6ceJEpk+fzg033MDs2bNZt24dv/nNb3jvvff4yle+wvnnn09FRQW33HILq1at4oknnuChhx7iq1/9KlBT1/Piiy/mjjvu4JVXXmH58uVce+219O3bl1mzZgG0yjq6mrKqJJnREJlRhzIFQkVERERERERERDp2aLxlWdxzzz385je/4aabbqK0tJRRo0bx4IMPMnnyZADuu+8+br/9ds4991x69erF9ddfz7nnnlu7jmuuuQbXdbn11luJx+PMmDGD+++/v3byo4KCgoNeR1fi+T4VVSkyow6Z0RBlVQqEioiIiIiIiIiImCDQgIa24Hk+u3a1XcFnx7HIz8/cp65KjdKKBNfe/Q7nHjOc5RuK8fyAG78wtc36IY1rbB9J6+vVK3u/7W19PO5L+71ltN1arjNtuwMdi9C+xyN0ru3TlvQ5u5fW+Jwd/duYLvuqKbQt9krXbdHRx2Nnk67fg84kXfdBU65Vpfvr0KHx0vr21ATNijlkaGi8iIiIiIiIiIgIoEBot7Mn8JkRDZEVDWmyJBERERERERERERQI7Xb2BD4zIjUZodUJl1QapbqLiIiIiIiIiIg0RIHQbqY64WLbhpBjkRmtmQurXBMmiYiIiIiIiIhImlMgtJupTnpEQzY
"text/plain": [
"<Figure size 1355.85x1250 with 30 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.pairplot(data=home_loan_train.drop(columns=[\"Loan_ID\"]), hue=\"Loan_Status\")"
]
},
{
"cell_type": "code",
2023-03-25 12:47:22 +01:00
"execution_count": 37,
2023-03-25 12:44:06 +01:00
"metadata": {},
"outputs": [],
"source": [
2023-03-25 12:47:22 +01:00
"from sklearn.model_selection import train_test_split\n",
"home_loan_val_final, home_loan_test_final = train_test_split(home_loan_test, test_size=0.5, random_state=1)\n",
"home_loan_train_final = home_loan_train"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Loan_ID</th>\n",
" <th>Gender</th>\n",
" <th>Married</th>\n",
" <th>Dependents</th>\n",
" <th>Education</th>\n",
" <th>Self_Employed</th>\n",
" <th>ApplicantIncome</th>\n",
" <th>CoapplicantIncome</th>\n",
" <th>LoanAmount</th>\n",
" <th>Loan_Amount_Term</th>\n",
" <th>Credit_History</th>\n",
" <th>Property_Area</th>\n",
" <th>Loan_Status</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>LP001003</td>\n",
" <td>Male</td>\n",
" <td>Yes</td>\n",
" <td>1</td>\n",
" <td>Graduate</td>\n",
" <td>No</td>\n",
" <td>0.054830</td>\n",
" <td>0.036192</td>\n",
" <td>0.172214</td>\n",
" <td>0.743590</td>\n",
" <td>1.0</td>\n",
" <td>Rural</td>\n",
" <td>N</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>LP001005</td>\n",
" <td>Male</td>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>Graduate</td>\n",
" <td>Yes</td>\n",
" <td>0.035250</td>\n",
" <td>0.000000</td>\n",
" <td>0.082489</td>\n",
" <td>0.743590</td>\n",
" <td>1.0</td>\n",
" <td>Urban</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>LP001006</td>\n",
" <td>Male</td>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>Not Graduate</td>\n",
" <td>No</td>\n",
" <td>0.030093</td>\n",
" <td>0.056592</td>\n",
" <td>0.160637</td>\n",
" <td>0.743590</td>\n",
" <td>1.0</td>\n",
" <td>Urban</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>LP001008</td>\n",
" <td>Male</td>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>Graduate</td>\n",
" <td>No</td>\n",
" <td>0.072356</td>\n",
" <td>0.000000</td>\n",
" <td>0.191027</td>\n",
" <td>0.743590</td>\n",
" <td>1.0</td>\n",
" <td>Urban</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>LP001011</td>\n",
" <td>Male</td>\n",
" <td>Yes</td>\n",
" <td>2</td>\n",
" <td>Graduate</td>\n",
" <td>Yes</td>\n",
" <td>0.065145</td>\n",
" <td>0.100703</td>\n",
" <td>0.373372</td>\n",
" <td>0.743590</td>\n",
" <td>1.0</td>\n",
" <td>Urban</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>609</th>\n",
" <td>LP002978</td>\n",
" <td>Female</td>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>Graduate</td>\n",
" <td>No</td>\n",
" <td>0.034014</td>\n",
" <td>0.000000</td>\n",
" <td>0.089725</td>\n",
" <td>0.743590</td>\n",
" <td>1.0</td>\n",
" <td>Rural</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>610</th>\n",
" <td>LP002979</td>\n",
" <td>Male</td>\n",
" <td>Yes</td>\n",
" <td>3+</td>\n",
" <td>Graduate</td>\n",
" <td>No</td>\n",
" <td>0.048930</td>\n",
" <td>0.000000</td>\n",
" <td>0.044863</td>\n",
" <td>0.358974</td>\n",
" <td>1.0</td>\n",
" <td>Rural</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>611</th>\n",
" <td>LP002983</td>\n",
" <td>Male</td>\n",
" <td>Yes</td>\n",
" <td>1</td>\n",
" <td>Graduate</td>\n",
" <td>No</td>\n",
" <td>0.097984</td>\n",
" <td>0.005760</td>\n",
" <td>0.353111</td>\n",
" <td>0.743590</td>\n",
" <td>1.0</td>\n",
" <td>Urban</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>612</th>\n",
" <td>LP002984</td>\n",
" <td>Male</td>\n",
" <td>Yes</td>\n",
" <td>2</td>\n",
" <td>Graduate</td>\n",
" <td>No</td>\n",
" <td>0.091936</td>\n",
" <td>0.000000</td>\n",
" <td>0.257598</td>\n",
" <td>0.743590</td>\n",
" <td>1.0</td>\n",
" <td>Urban</td>\n",
" <td>Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>613</th>\n",
" <td>LP002990</td>\n",
" <td>Female</td>\n",
" <td>No</td>\n",
" <td>0</td>\n",
" <td>Graduate</td>\n",
" <td>Yes</td>\n",
" <td>0.054830</td>\n",
" <td>0.000000</td>\n",
" <td>0.179450</td>\n",
" <td>0.743590</td>\n",
" <td>0.0</td>\n",
" <td>Semiurban</td>\n",
" <td>N</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>480 rows × 13 columns</p>\n",
"</div>"
],
"text/plain": [
" Loan_ID Gender Married Dependents Education Self_Employed \\\n",
"1 LP001003 Male Yes 1 Graduate No \n",
"2 LP001005 Male Yes 0 Graduate Yes \n",
"3 LP001006 Male Yes 0 Not Graduate No \n",
"4 LP001008 Male No 0 Graduate No \n",
"5 LP001011 Male Yes 2 Graduate Yes \n",
".. ... ... ... ... ... ... \n",
"609 LP002978 Female No 0 Graduate No \n",
"610 LP002979 Male Yes 3+ Graduate No \n",
"611 LP002983 Male Yes 1 Graduate No \n",
"612 LP002984 Male Yes 2 Graduate No \n",
"613 LP002990 Female No 0 Graduate Yes \n",
"\n",
" ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term \\\n",
"1 0.054830 0.036192 0.172214 0.743590 \n",
"2 0.035250 0.000000 0.082489 0.743590 \n",
"3 0.030093 0.056592 0.160637 0.743590 \n",
"4 0.072356 0.000000 0.191027 0.743590 \n",
"5 0.065145 0.100703 0.373372 0.743590 \n",
".. ... ... ... ... \n",
"609 0.034014 0.000000 0.089725 0.743590 \n",
"610 0.048930 0.000000 0.044863 0.358974 \n",
"611 0.097984 0.005760 0.353111 0.743590 \n",
"612 0.091936 0.000000 0.257598 0.743590 \n",
"613 0.054830 0.000000 0.179450 0.743590 \n",
"\n",
" Credit_History Property_Area Loan_Status \n",
"1 1.0 Rural N \n",
"2 1.0 Urban Y \n",
"3 1.0 Urban Y \n",
"4 1.0 Urban Y \n",
"5 1.0 Urban Y \n",
".. ... ... ... \n",
"609 1.0 Rural Y \n",
"610 1.0 Rural Y \n",
"611 1.0 Urban Y \n",
"612 1.0 Urban Y \n",
"613 0.0 Semiurban N \n",
"\n",
"[480 rows x 13 columns]"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.preprocessing import MinMaxScaler\n",
"numeric_cols_train = home_loan_train_final.select_dtypes(include='number').columns\n",
"numeric_cols_test = home_loan_test_final.select_dtypes(include='number').columns\n",
"numeric_cols_val = home_loan_val_final.select_dtypes(include='number').columns\n",
"scaler = MinMaxScaler()\n",
"home_loan_train_final[numeric_cols_train] = scaler.fit_transform(home_loan_train_final[numeric_cols_train])\n",
"home_loan_test_final[numeric_cols_test] = scaler.fit_transform(home_loan_test_final[numeric_cols_test])\n",
"home_loan_val_final[numeric_cols_val] = scaler.fit_transform(home_loan_val_final[numeric_cols_val])\n",
"\n",
"home_loan_train_final = home_loan_train_final.dropna()\n",
"home_loan_test_final = home_loan_test_final.dropna()\n",
"home_loan_val_final = home_loan_val_final.dropna()\n",
"\n",
"home_loan_train_final"
2023-03-25 12:44:06 +01:00
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "IUMEnv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.8"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}