ium_z487186/lab_02.ipynb

2146 lines
75 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import MinMaxScaler\n",
"from collections import Counter"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Found cached dataset liver (/Users/natalia.szymczyk/.cache/huggingface/datasets/mstz___liver/liver/1.0.0/3115a4001e742dc2c89457a3906d35982a649915f71f35fc5e6d025c786eeacf)\n",
"100%|██████████| 1/1 [00:00<00:00, 684.45it/s]\n"
]
}
],
"source": [
"from datasets import load_dataset\n",
"\n",
"dataset = load_dataset(\"mstz/liver\")['train']"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Dataset({\n",
" features: ['age', 'is_male', 'total_bilirubin', 'direct_ribilubin', 'alkaline_phosphotase', 'alamine_aminotransferasi', 'aspartate_aminotransferase', 'total_proteins', 'albumin', 'albumin_to_globulin_ratio', 'class'],\n",
" num_rows: 583\n",
"})"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"dataset = dataset.to_pandas()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"train, test = train_test_split(dataset, test_size=0.2, random_state=42)\n",
"train, val = train_test_split(train, test_size=0.2, random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>is_male</th>\n",
" <th>total_bilirubin</th>\n",
" <th>direct_ribilubin</th>\n",
" <th>alkaline_phosphotase</th>\n",
" <th>alamine_aminotransferasi</th>\n",
" <th>aspartate_aminotransferase</th>\n",
" <th>total_proteins</th>\n",
" <th>albumin</th>\n",
" <th>albumin_to_globulin_ratio</th>\n",
" <th>class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>107</th>\n",
" <td>36</td>\n",
" <td>True</td>\n",
" <td>0.8</td>\n",
" <td>0.2</td>\n",
" <td>158</td>\n",
" <td>29</td>\n",
" <td>39</td>\n",
" <td>6.0</td>\n",
" <td>2.2</td>\n",
" <td>0.50</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>38</td>\n",
" <td>False</td>\n",
" <td>2.6</td>\n",
" <td>1.2</td>\n",
" <td>410</td>\n",
" <td>59</td>\n",
" <td>57</td>\n",
" <td>5.6</td>\n",
" <td>3.0</td>\n",
" <td>0.80</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>534</th>\n",
" <td>39</td>\n",
" <td>True</td>\n",
" <td>1.6</td>\n",
" <td>0.8</td>\n",
" <td>230</td>\n",
" <td>88</td>\n",
" <td>74</td>\n",
" <td>8.0</td>\n",
" <td>4.0</td>\n",
" <td>1.00</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>204</th>\n",
" <td>21</td>\n",
" <td>True</td>\n",
" <td>0.7</td>\n",
" <td>0.2</td>\n",
" <td>135</td>\n",
" <td>27</td>\n",
" <td>26</td>\n",
" <td>6.4</td>\n",
" <td>3.3</td>\n",
" <td>1.00</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48</th>\n",
" <td>32</td>\n",
" <td>False</td>\n",
" <td>0.6</td>\n",
" <td>0.1</td>\n",
" <td>176</td>\n",
" <td>39</td>\n",
" <td>28</td>\n",
" <td>6.0</td>\n",
" <td>3.0</td>\n",
" <td>1.00</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>42</td>\n",
" <td>True</td>\n",
" <td>6.8</td>\n",
" <td>3.2</td>\n",
" <td>630</td>\n",
" <td>25</td>\n",
" <td>47</td>\n",
" <td>6.1</td>\n",
" <td>2.3</td>\n",
" <td>0.60</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>179</th>\n",
" <td>75</td>\n",
" <td>True</td>\n",
" <td>8.0</td>\n",
" <td>4.6</td>\n",
" <td>386</td>\n",
" <td>30</td>\n",
" <td>25</td>\n",
" <td>5.5</td>\n",
" <td>1.8</td>\n",
" <td>0.48</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>430</th>\n",
" <td>53</td>\n",
" <td>False</td>\n",
" <td>0.7</td>\n",
" <td>0.1</td>\n",
" <td>182</td>\n",
" <td>20</td>\n",
" <td>33</td>\n",
" <td>4.8</td>\n",
" <td>1.9</td>\n",
" <td>0.60</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>475</th>\n",
" <td>38</td>\n",
" <td>True</td>\n",
" <td>2.2</td>\n",
" <td>1.0</td>\n",
" <td>310</td>\n",
" <td>119</td>\n",
" <td>42</td>\n",
" <td>7.9</td>\n",
" <td>4.1</td>\n",
" <td>1.00</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>425</th>\n",
" <td>58</td>\n",
" <td>True</td>\n",
" <td>0.4</td>\n",
" <td>0.1</td>\n",
" <td>100</td>\n",
" <td>59</td>\n",
" <td>126</td>\n",
" <td>4.3</td>\n",
" <td>2.5</td>\n",
" <td>1.40</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>372 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" age is_male total_bilirubin direct_ribilubin alkaline_phosphotase \\\n",
"107 36 True 0.8 0.2 158 \n",
"33 38 False 2.6 1.2 410 \n",
"534 39 True 1.6 0.8 230 \n",
"204 21 True 0.7 0.2 135 \n",
"48 32 False 0.6 0.1 176 \n",
".. ... ... ... ... ... \n",
"42 42 True 6.8 3.2 630 \n",
"179 75 True 8.0 4.6 386 \n",
"430 53 False 0.7 0.1 182 \n",
"475 38 True 2.2 1.0 310 \n",
"425 58 True 0.4 0.1 100 \n",
"\n",
" alamine_aminotransferasi aspartate_aminotransferase total_proteins \\\n",
"107 29 39 6.0 \n",
"33 59 57 5.6 \n",
"534 88 74 8.0 \n",
"204 27 26 6.4 \n",
"48 39 28 6.0 \n",
".. ... ... ... \n",
"42 25 47 6.1 \n",
"179 30 25 5.5 \n",
"430 20 33 4.8 \n",
"475 119 42 7.9 \n",
"425 59 126 4.3 \n",
"\n",
" albumin albumin_to_globulin_ratio class \n",
"107 2.2 0.50 1 \n",
"33 3.0 0.80 1 \n",
"534 4.0 1.00 1 \n",
"204 3.3 1.00 1 \n",
"48 3.0 1.00 0 \n",
".. ... ... ... \n",
"42 2.3 0.60 1 \n",
"179 1.8 0.48 0 \n",
"430 1.9 0.60 0 \n",
"475 4.1 1.00 1 \n",
"425 2.5 1.40 0 \n",
"\n",
"[372 rows x 11 columns]"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>is_male</th>\n",
" <th>total_bilirubin</th>\n",
" <th>direct_ribilubin</th>\n",
" <th>alkaline_phosphotase</th>\n",
" <th>alamine_aminotransferasi</th>\n",
" <th>aspartate_aminotransferase</th>\n",
" <th>total_proteins</th>\n",
" <th>albumin</th>\n",
" <th>albumin_to_globulin_ratio</th>\n",
" <th>class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>582</th>\n",
" <td>38</td>\n",
" <td>True</td>\n",
" <td>1.0</td>\n",
" <td>0.3</td>\n",
" <td>216</td>\n",
" <td>21</td>\n",
" <td>24</td>\n",
" <td>7.3</td>\n",
" <td>4.4</td>\n",
" <td>1.50</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>453</th>\n",
" <td>46</td>\n",
" <td>True</td>\n",
" <td>0.7</td>\n",
" <td>0.2</td>\n",
" <td>224</td>\n",
" <td>40</td>\n",
" <td>23</td>\n",
" <td>7.1</td>\n",
" <td>3.0</td>\n",
" <td>0.70</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>89</th>\n",
" <td>60</td>\n",
" <td>True</td>\n",
" <td>4.0</td>\n",
" <td>1.9</td>\n",
" <td>238</td>\n",
" <td>119</td>\n",
" <td>350</td>\n",
" <td>7.1</td>\n",
" <td>3.3</td>\n",
" <td>0.80</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>71</th>\n",
" <td>75</td>\n",
" <td>False</td>\n",
" <td>0.8</td>\n",
" <td>0.2</td>\n",
" <td>188</td>\n",
" <td>20</td>\n",
" <td>29</td>\n",
" <td>4.4</td>\n",
" <td>1.8</td>\n",
" <td>0.60</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>124</th>\n",
" <td>28</td>\n",
" <td>True</td>\n",
" <td>0.6</td>\n",
" <td>0.1</td>\n",
" <td>177</td>\n",
" <td>36</td>\n",
" <td>29</td>\n",
" <td>6.9</td>\n",
" <td>4.1</td>\n",
" <td>1.40</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>236</th>\n",
" <td>22</td>\n",
" <td>True</td>\n",
" <td>0.8</td>\n",
" <td>0.2</td>\n",
" <td>300</td>\n",
" <td>57</td>\n",
" <td>40</td>\n",
" <td>7.9</td>\n",
" <td>3.8</td>\n",
" <td>0.90</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>487</th>\n",
" <td>32</td>\n",
" <td>True</td>\n",
" <td>0.7</td>\n",
" <td>0.2</td>\n",
" <td>276</td>\n",
" <td>102</td>\n",
" <td>190</td>\n",
" <td>6.0</td>\n",
" <td>2.9</td>\n",
" <td>0.93</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>34</td>\n",
" <td>True</td>\n",
" <td>6.2</td>\n",
" <td>3.0</td>\n",
" <td>240</td>\n",
" <td>1680</td>\n",
" <td>850</td>\n",
" <td>7.2</td>\n",
" <td>4.0</td>\n",
" <td>1.20</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>307</th>\n",
" <td>30</td>\n",
" <td>True</td>\n",
" <td>0.8</td>\n",
" <td>0.2</td>\n",
" <td>174</td>\n",
" <td>21</td>\n",
" <td>47</td>\n",
" <td>4.6</td>\n",
" <td>2.3</td>\n",
" <td>1.00</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>512</th>\n",
" <td>56</td>\n",
" <td>True</td>\n",
" <td>1.0</td>\n",
" <td>0.3</td>\n",
" <td>195</td>\n",
" <td>22</td>\n",
" <td>28</td>\n",
" <td>5.8</td>\n",
" <td>2.6</td>\n",
" <td>0.80</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>94 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" age is_male total_bilirubin direct_ribilubin alkaline_phosphotase \\\n",
"582 38 True 1.0 0.3 216 \n",
"453 46 True 0.7 0.2 224 \n",
"89 60 True 4.0 1.9 238 \n",
"71 75 False 0.8 0.2 188 \n",
"124 28 True 0.6 0.1 177 \n",
".. ... ... ... ... ... \n",
"236 22 True 0.8 0.2 300 \n",
"487 32 True 0.7 0.2 276 \n",
"27 34 True 6.2 3.0 240 \n",
"307 30 True 0.8 0.2 174 \n",
"512 56 True 1.0 0.3 195 \n",
"\n",
" alamine_aminotransferasi aspartate_aminotransferase total_proteins \\\n",
"582 21 24 7.3 \n",
"453 40 23 7.1 \n",
"89 119 350 7.1 \n",
"71 20 29 4.4 \n",
"124 36 29 6.9 \n",
".. ... ... ... \n",
"236 57 40 7.9 \n",
"487 102 190 6.0 \n",
"27 1680 850 7.2 \n",
"307 21 47 4.6 \n",
"512 22 28 5.8 \n",
"\n",
" albumin albumin_to_globulin_ratio class \n",
"582 4.4 1.50 1 \n",
"453 3.0 0.70 0 \n",
"89 3.3 0.80 0 \n",
"71 1.8 0.60 0 \n",
"124 4.1 1.40 1 \n",
".. ... ... ... \n",
"236 3.8 0.90 1 \n",
"487 2.9 0.93 0 \n",
"27 4.0 1.20 0 \n",
"307 2.3 1.00 0 \n",
"512 2.6 0.80 1 \n",
"\n",
"[94 rows x 11 columns]"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"val"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>is_male</th>\n",
" <th>total_bilirubin</th>\n",
" <th>direct_ribilubin</th>\n",
" <th>alkaline_phosphotase</th>\n",
" <th>alamine_aminotransferasi</th>\n",
" <th>aspartate_aminotransferase</th>\n",
" <th>total_proteins</th>\n",
" <th>albumin</th>\n",
" <th>albumin_to_globulin_ratio</th>\n",
" <th>class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>355</th>\n",
" <td>19</td>\n",
" <td>True</td>\n",
" <td>1.4</td>\n",
" <td>0.8</td>\n",
" <td>178</td>\n",
" <td>13</td>\n",
" <td>26</td>\n",
" <td>8.0</td>\n",
" <td>4.6</td>\n",
" <td>1.30</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>407</th>\n",
" <td>12</td>\n",
" <td>True</td>\n",
" <td>1.0</td>\n",
" <td>0.2</td>\n",
" <td>719</td>\n",
" <td>157</td>\n",
" <td>108</td>\n",
" <td>7.2</td>\n",
" <td>3.7</td>\n",
" <td>1.00</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>90</th>\n",
" <td>60</td>\n",
" <td>True</td>\n",
" <td>5.7</td>\n",
" <td>2.8</td>\n",
" <td>214</td>\n",
" <td>412</td>\n",
" <td>850</td>\n",
" <td>7.3</td>\n",
" <td>3.2</td>\n",
" <td>0.78</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>402</th>\n",
" <td>42</td>\n",
" <td>False</td>\n",
" <td>0.5</td>\n",
" <td>0.1</td>\n",
" <td>162</td>\n",
" <td>155</td>\n",
" <td>108</td>\n",
" <td>8.1</td>\n",
" <td>4.0</td>\n",
" <td>0.90</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>268</th>\n",
" <td>40</td>\n",
" <td>True</td>\n",
" <td>14.5</td>\n",
" <td>6.4</td>\n",
" <td>358</td>\n",
" <td>50</td>\n",
" <td>75</td>\n",
" <td>5.7</td>\n",
" <td>2.1</td>\n",
" <td>0.50</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>516</th>\n",
" <td>60</td>\n",
" <td>True</td>\n",
" <td>0.9</td>\n",
" <td>0.3</td>\n",
" <td>168</td>\n",
" <td>16</td>\n",
" <td>24</td>\n",
" <td>6.7</td>\n",
" <td>3.0</td>\n",
" <td>0.80</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>305</th>\n",
" <td>54</td>\n",
" <td>False</td>\n",
" <td>1.4</td>\n",
" <td>0.7</td>\n",
" <td>195</td>\n",
" <td>36</td>\n",
" <td>16</td>\n",
" <td>7.9</td>\n",
" <td>3.7</td>\n",
" <td>0.90</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>167</th>\n",
" <td>47</td>\n",
" <td>False</td>\n",
" <td>3.0</td>\n",
" <td>1.5</td>\n",
" <td>292</td>\n",
" <td>64</td>\n",
" <td>67</td>\n",
" <td>5.6</td>\n",
" <td>1.8</td>\n",
" <td>0.47</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>312</th>\n",
" <td>27</td>\n",
" <td>True</td>\n",
" <td>1.3</td>\n",
" <td>0.6</td>\n",
" <td>106</td>\n",
" <td>25</td>\n",
" <td>54</td>\n",
" <td>8.5</td>\n",
" <td>4.8</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>329</th>\n",
" <td>21</td>\n",
" <td>True</td>\n",
" <td>0.7</td>\n",
" <td>0.2</td>\n",
" <td>211</td>\n",
" <td>14</td>\n",
" <td>23</td>\n",
" <td>7.3</td>\n",
" <td>4.1</td>\n",
" <td>1.20</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>117 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" age is_male total_bilirubin direct_ribilubin alkaline_phosphotase \\\n",
"355 19 True 1.4 0.8 178 \n",
"407 12 True 1.0 0.2 719 \n",
"90 60 True 5.7 2.8 214 \n",
"402 42 False 0.5 0.1 162 \n",
"268 40 True 14.5 6.4 358 \n",
".. ... ... ... ... ... \n",
"516 60 True 0.9 0.3 168 \n",
"305 54 False 1.4 0.7 195 \n",
"167 47 False 3.0 1.5 292 \n",
"312 27 True 1.3 0.6 106 \n",
"329 21 True 0.7 0.2 211 \n",
"\n",
" alamine_aminotransferasi aspartate_aminotransferase total_proteins \\\n",
"355 13 26 8.0 \n",
"407 157 108 7.2 \n",
"90 412 850 7.3 \n",
"402 155 108 8.1 \n",
"268 50 75 5.7 \n",
".. ... ... ... \n",
"516 16 24 6.7 \n",
"305 36 16 7.9 \n",
"167 64 67 5.6 \n",
"312 25 54 8.5 \n",
"329 14 23 7.3 \n",
"\n",
" albumin albumin_to_globulin_ratio class \n",
"355 4.6 1.30 1 \n",
"407 3.7 1.00 0 \n",
"90 3.2 0.78 0 \n",
"402 4.0 0.90 0 \n",
"268 2.1 0.50 0 \n",
".. ... ... ... \n",
"516 3.0 0.80 0 \n",
"305 3.7 0.90 1 \n",
"167 1.8 0.47 0 \n",
"312 4.8 NaN 1 \n",
"329 4.1 1.20 1 \n",
"\n",
"[117 rows x 11 columns]"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>total_bilirubin</th>\n",
" <th>direct_ribilubin</th>\n",
" <th>alkaline_phosphotase</th>\n",
" <th>alamine_aminotransferasi</th>\n",
" <th>aspartate_aminotransferase</th>\n",
" <th>total_proteins</th>\n",
" <th>albumin</th>\n",
" <th>albumin_to_globulin_ratio</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>372.000000</td>\n",
" <td>372.000000</td>\n",
" <td>372.000000</td>\n",
" <td>372.000000</td>\n",
" <td>372.000000</td>\n",
" <td>372.000000</td>\n",
" <td>372.000000</td>\n",
" <td>372.000000</td>\n",
" <td>371.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>44.680108</td>\n",
" <td>3.415860</td>\n",
" <td>1.494355</td>\n",
" <td>286.473118</td>\n",
" <td>72.986559</td>\n",
" <td>110.147849</td>\n",
" <td>6.500269</td>\n",
" <td>3.150806</td>\n",
" <td>0.959515</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>16.054568</td>\n",
" <td>6.736683</td>\n",
" <td>2.877245</td>\n",
" <td>242.459927</td>\n",
" <td>147.472734</td>\n",
" <td>306.425153</td>\n",
" <td>1.100049</td>\n",
" <td>0.806994</td>\n",
" <td>0.336514</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>4.000000</td>\n",
" <td>0.400000</td>\n",
" <td>0.100000</td>\n",
" <td>63.000000</td>\n",
" <td>10.000000</td>\n",
" <td>11.000000</td>\n",
" <td>2.700000</td>\n",
" <td>0.900000</td>\n",
" <td>0.300000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>32.000000</td>\n",
" <td>0.800000</td>\n",
" <td>0.200000</td>\n",
" <td>170.000000</td>\n",
" <td>24.000000</td>\n",
" <td>25.000000</td>\n",
" <td>5.775000</td>\n",
" <td>2.575000</td>\n",
" <td>0.700000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>45.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.300000</td>\n",
" <td>205.500000</td>\n",
" <td>35.000000</td>\n",
" <td>42.000000</td>\n",
" <td>6.600000</td>\n",
" <td>3.100000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>58.000000</td>\n",
" <td>2.625000</td>\n",
" <td>1.300000</td>\n",
" <td>298.000000</td>\n",
" <td>60.000000</td>\n",
" <td>86.250000</td>\n",
" <td>7.200000</td>\n",
" <td>3.800000</td>\n",
" <td>1.100000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>85.000000</td>\n",
" <td>75.000000</td>\n",
" <td>19.700000</td>\n",
" <td>2110.000000</td>\n",
" <td>1350.000000</td>\n",
" <td>4929.000000</td>\n",
" <td>9.600000</td>\n",
" <td>5.500000</td>\n",
" <td>2.800000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" age total_bilirubin direct_ribilubin alkaline_phosphotase \\\n",
"count 372.000000 372.000000 372.000000 372.000000 \n",
"mean 44.680108 3.415860 1.494355 286.473118 \n",
"std 16.054568 6.736683 2.877245 242.459927 \n",
"min 4.000000 0.400000 0.100000 63.000000 \n",
"25% 32.000000 0.800000 0.200000 170.000000 \n",
"50% 45.000000 1.000000 0.300000 205.500000 \n",
"75% 58.000000 2.625000 1.300000 298.000000 \n",
"max 85.000000 75.000000 19.700000 2110.000000 \n",
"\n",
" alamine_aminotransferasi aspartate_aminotransferase total_proteins \\\n",
"count 372.000000 372.000000 372.000000 \n",
"mean 72.986559 110.147849 6.500269 \n",
"std 147.472734 306.425153 1.100049 \n",
"min 10.000000 11.000000 2.700000 \n",
"25% 24.000000 25.000000 5.775000 \n",
"50% 35.000000 42.000000 6.600000 \n",
"75% 60.000000 86.250000 7.200000 \n",
"max 1350.000000 4929.000000 9.600000 \n",
"\n",
" albumin albumin_to_globulin_ratio \n",
"count 372.000000 371.000000 \n",
"mean 3.150806 0.959515 \n",
"std 0.806994 0.336514 \n",
"min 0.900000 0.300000 \n",
"25% 2.575000 0.700000 \n",
"50% 3.100000 1.000000 \n",
"75% 3.800000 1.100000 \n",
"max 5.500000 2.800000 "
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"numerical_features = ['age', 'total_bilirubin', 'direct_ribilubin', 'alkaline_phosphotase',\n",
" 'alamine_aminotransferasi', 'aspartate_aminotransferase', 'total_proteins', 'albumin',\n",
" 'albumin_to_globulin_ratio']\n",
"train[numerical_features].describe()"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Rozkład częstości dla klas:\n",
"1: 30.38%\n",
"0: 69.62%\n"
]
}
],
"source": [
"label_counter = Counter(train['class'])\n",
"print(\"\\nRozkład częstości dla klas:\")\n",
"for label in label_counter.keys():\n",
" print(f\"{label}: {label_counter[label] / len(train) * 100:.2f}%\")"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"scaler = MinMaxScaler()\n",
"train[numerical_features] = scaler.fit_transform(train[numerical_features])\n",
"test[numerical_features] = scaler.fit_transform(test[numerical_features])\n",
"val[numerical_features] = scaler.fit_transform(val[numerical_features])"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>is_male</th>\n",
" <th>total_bilirubin</th>\n",
" <th>direct_ribilubin</th>\n",
" <th>alkaline_phosphotase</th>\n",
" <th>alamine_aminotransferasi</th>\n",
" <th>aspartate_aminotransferase</th>\n",
" <th>total_proteins</th>\n",
" <th>albumin</th>\n",
" <th>albumin_to_globulin_ratio</th>\n",
" <th>class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>107</th>\n",
" <td>0.395062</td>\n",
" <td>True</td>\n",
" <td>0.005362</td>\n",
" <td>0.005102</td>\n",
" <td>0.046409</td>\n",
" <td>0.014179</td>\n",
" <td>0.005693</td>\n",
" <td>0.478261</td>\n",
" <td>0.282609</td>\n",
" <td>0.080</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>0.419753</td>\n",
" <td>False</td>\n",
" <td>0.029491</td>\n",
" <td>0.056122</td>\n",
" <td>0.169516</td>\n",
" <td>0.036567</td>\n",
" <td>0.009353</td>\n",
" <td>0.420290</td>\n",
" <td>0.456522</td>\n",
" <td>0.200</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>534</th>\n",
" <td>0.432099</td>\n",
" <td>True</td>\n",
" <td>0.016086</td>\n",
" <td>0.035714</td>\n",
" <td>0.081583</td>\n",
" <td>0.058209</td>\n",
" <td>0.012810</td>\n",
" <td>0.768116</td>\n",
" <td>0.673913</td>\n",
" <td>0.280</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>204</th>\n",
" <td>0.209877</td>\n",
" <td>True</td>\n",
" <td>0.004021</td>\n",
" <td>0.005102</td>\n",
" <td>0.035173</td>\n",
" <td>0.012687</td>\n",
" <td>0.003050</td>\n",
" <td>0.536232</td>\n",
" <td>0.521739</td>\n",
" <td>0.280</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48</th>\n",
" <td>0.345679</td>\n",
" <td>False</td>\n",
" <td>0.002681</td>\n",
" <td>0.000000</td>\n",
" <td>0.055203</td>\n",
" <td>0.021642</td>\n",
" <td>0.003457</td>\n",
" <td>0.478261</td>\n",
" <td>0.456522</td>\n",
" <td>0.280</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>0.469136</td>\n",
" <td>True</td>\n",
" <td>0.085791</td>\n",
" <td>0.158163</td>\n",
" <td>0.276991</td>\n",
" <td>0.011194</td>\n",
" <td>0.007320</td>\n",
" <td>0.492754</td>\n",
" <td>0.304348</td>\n",
" <td>0.120</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>179</th>\n",
" <td>0.876543</td>\n",
" <td>True</td>\n",
" <td>0.101877</td>\n",
" <td>0.229592</td>\n",
" <td>0.157792</td>\n",
" <td>0.014925</td>\n",
" <td>0.002847</td>\n",
" <td>0.405797</td>\n",
" <td>0.195652</td>\n",
" <td>0.072</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>430</th>\n",
" <td>0.604938</td>\n",
" <td>False</td>\n",
" <td>0.004021</td>\n",
" <td>0.000000</td>\n",
" <td>0.058134</td>\n",
" <td>0.007463</td>\n",
" <td>0.004473</td>\n",
" <td>0.304348</td>\n",
" <td>0.217391</td>\n",
" <td>0.120</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>475</th>\n",
" <td>0.419753</td>\n",
" <td>True</td>\n",
" <td>0.024129</td>\n",
" <td>0.045918</td>\n",
" <td>0.120664</td>\n",
" <td>0.081343</td>\n",
" <td>0.006303</td>\n",
" <td>0.753623</td>\n",
" <td>0.695652</td>\n",
" <td>0.280</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>425</th>\n",
" <td>0.666667</td>\n",
" <td>True</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.018075</td>\n",
" <td>0.036567</td>\n",
" <td>0.023383</td>\n",
" <td>0.231884</td>\n",
" <td>0.347826</td>\n",
" <td>0.440</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>371 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" age is_male total_bilirubin direct_ribilubin \\\n",
"107 0.395062 True 0.005362 0.005102 \n",
"33 0.419753 False 0.029491 0.056122 \n",
"534 0.432099 True 0.016086 0.035714 \n",
"204 0.209877 True 0.004021 0.005102 \n",
"48 0.345679 False 0.002681 0.000000 \n",
".. ... ... ... ... \n",
"42 0.469136 True 0.085791 0.158163 \n",
"179 0.876543 True 0.101877 0.229592 \n",
"430 0.604938 False 0.004021 0.000000 \n",
"475 0.419753 True 0.024129 0.045918 \n",
"425 0.666667 True 0.000000 0.000000 \n",
"\n",
" alkaline_phosphotase alamine_aminotransferasi \\\n",
"107 0.046409 0.014179 \n",
"33 0.169516 0.036567 \n",
"534 0.081583 0.058209 \n",
"204 0.035173 0.012687 \n",
"48 0.055203 0.021642 \n",
".. ... ... \n",
"42 0.276991 0.011194 \n",
"179 0.157792 0.014925 \n",
"430 0.058134 0.007463 \n",
"475 0.120664 0.081343 \n",
"425 0.018075 0.036567 \n",
"\n",
" aspartate_aminotransferase total_proteins albumin \\\n",
"107 0.005693 0.478261 0.282609 \n",
"33 0.009353 0.420290 0.456522 \n",
"534 0.012810 0.768116 0.673913 \n",
"204 0.003050 0.536232 0.521739 \n",
"48 0.003457 0.478261 0.456522 \n",
".. ... ... ... \n",
"42 0.007320 0.492754 0.304348 \n",
"179 0.002847 0.405797 0.195652 \n",
"430 0.004473 0.304348 0.217391 \n",
"475 0.006303 0.753623 0.695652 \n",
"425 0.023383 0.231884 0.347826 \n",
"\n",
" albumin_to_globulin_ratio class \n",
"107 0.080 1 \n",
"33 0.200 1 \n",
"534 0.280 1 \n",
"204 0.280 1 \n",
"48 0.280 0 \n",
".. ... ... \n",
"42 0.120 1 \n",
"179 0.072 0 \n",
"430 0.120 0 \n",
"475 0.280 1 \n",
"425 0.440 0 \n",
"\n",
"[371 rows x 11 columns]"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [],
"source": [
"train.dropna(inplace=True)\n",
"test.dropna(inplace=True)\n",
"val.dropna(inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>is_male</th>\n",
" <th>total_bilirubin</th>\n",
" <th>direct_ribilubin</th>\n",
" <th>alkaline_phosphotase</th>\n",
" <th>alamine_aminotransferasi</th>\n",
" <th>aspartate_aminotransferase</th>\n",
" <th>total_proteins</th>\n",
" <th>albumin</th>\n",
" <th>albumin_to_globulin_ratio</th>\n",
" <th>class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>107</th>\n",
" <td>0.395062</td>\n",
" <td>True</td>\n",
" <td>0.005362</td>\n",
" <td>0.005102</td>\n",
" <td>0.046409</td>\n",
" <td>0.014179</td>\n",
" <td>0.005693</td>\n",
" <td>0.478261</td>\n",
" <td>0.282609</td>\n",
" <td>0.080</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>0.419753</td>\n",
" <td>False</td>\n",
" <td>0.029491</td>\n",
" <td>0.056122</td>\n",
" <td>0.169516</td>\n",
" <td>0.036567</td>\n",
" <td>0.009353</td>\n",
" <td>0.420290</td>\n",
" <td>0.456522</td>\n",
" <td>0.200</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>534</th>\n",
" <td>0.432099</td>\n",
" <td>True</td>\n",
" <td>0.016086</td>\n",
" <td>0.035714</td>\n",
" <td>0.081583</td>\n",
" <td>0.058209</td>\n",
" <td>0.012810</td>\n",
" <td>0.768116</td>\n",
" <td>0.673913</td>\n",
" <td>0.280</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>204</th>\n",
" <td>0.209877</td>\n",
" <td>True</td>\n",
" <td>0.004021</td>\n",
" <td>0.005102</td>\n",
" <td>0.035173</td>\n",
" <td>0.012687</td>\n",
" <td>0.003050</td>\n",
" <td>0.536232</td>\n",
" <td>0.521739</td>\n",
" <td>0.280</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48</th>\n",
" <td>0.345679</td>\n",
" <td>False</td>\n",
" <td>0.002681</td>\n",
" <td>0.000000</td>\n",
" <td>0.055203</td>\n",
" <td>0.021642</td>\n",
" <td>0.003457</td>\n",
" <td>0.478261</td>\n",
" <td>0.456522</td>\n",
" <td>0.280</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>0.469136</td>\n",
" <td>True</td>\n",
" <td>0.085791</td>\n",
" <td>0.158163</td>\n",
" <td>0.276991</td>\n",
" <td>0.011194</td>\n",
" <td>0.007320</td>\n",
" <td>0.492754</td>\n",
" <td>0.304348</td>\n",
" <td>0.120</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>179</th>\n",
" <td>0.876543</td>\n",
" <td>True</td>\n",
" <td>0.101877</td>\n",
" <td>0.229592</td>\n",
" <td>0.157792</td>\n",
" <td>0.014925</td>\n",
" <td>0.002847</td>\n",
" <td>0.405797</td>\n",
" <td>0.195652</td>\n",
" <td>0.072</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>430</th>\n",
" <td>0.604938</td>\n",
" <td>False</td>\n",
" <td>0.004021</td>\n",
" <td>0.000000</td>\n",
" <td>0.058134</td>\n",
" <td>0.007463</td>\n",
" <td>0.004473</td>\n",
" <td>0.304348</td>\n",
" <td>0.217391</td>\n",
" <td>0.120</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>475</th>\n",
" <td>0.419753</td>\n",
" <td>True</td>\n",
" <td>0.024129</td>\n",
" <td>0.045918</td>\n",
" <td>0.120664</td>\n",
" <td>0.081343</td>\n",
" <td>0.006303</td>\n",
" <td>0.753623</td>\n",
" <td>0.695652</td>\n",
" <td>0.280</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>425</th>\n",
" <td>0.666667</td>\n",
" <td>True</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.018075</td>\n",
" <td>0.036567</td>\n",
" <td>0.023383</td>\n",
" <td>0.231884</td>\n",
" <td>0.347826</td>\n",
" <td>0.440</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>371 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" age is_male total_bilirubin direct_ribilubin \\\n",
"107 0.395062 True 0.005362 0.005102 \n",
"33 0.419753 False 0.029491 0.056122 \n",
"534 0.432099 True 0.016086 0.035714 \n",
"204 0.209877 True 0.004021 0.005102 \n",
"48 0.345679 False 0.002681 0.000000 \n",
".. ... ... ... ... \n",
"42 0.469136 True 0.085791 0.158163 \n",
"179 0.876543 True 0.101877 0.229592 \n",
"430 0.604938 False 0.004021 0.000000 \n",
"475 0.419753 True 0.024129 0.045918 \n",
"425 0.666667 True 0.000000 0.000000 \n",
"\n",
" alkaline_phosphotase alamine_aminotransferasi \\\n",
"107 0.046409 0.014179 \n",
"33 0.169516 0.036567 \n",
"534 0.081583 0.058209 \n",
"204 0.035173 0.012687 \n",
"48 0.055203 0.021642 \n",
".. ... ... \n",
"42 0.276991 0.011194 \n",
"179 0.157792 0.014925 \n",
"430 0.058134 0.007463 \n",
"475 0.120664 0.081343 \n",
"425 0.018075 0.036567 \n",
"\n",
" aspartate_aminotransferase total_proteins albumin \\\n",
"107 0.005693 0.478261 0.282609 \n",
"33 0.009353 0.420290 0.456522 \n",
"534 0.012810 0.768116 0.673913 \n",
"204 0.003050 0.536232 0.521739 \n",
"48 0.003457 0.478261 0.456522 \n",
".. ... ... ... \n",
"42 0.007320 0.492754 0.304348 \n",
"179 0.002847 0.405797 0.195652 \n",
"430 0.004473 0.304348 0.217391 \n",
"475 0.006303 0.753623 0.695652 \n",
"425 0.023383 0.231884 0.347826 \n",
"\n",
" albumin_to_globulin_ratio class \n",
"107 0.080 1 \n",
"33 0.200 1 \n",
"534 0.280 1 \n",
"204 0.280 1 \n",
"48 0.280 0 \n",
".. ... ... \n",
"42 0.120 1 \n",
"179 0.072 0 \n",
"430 0.120 0 \n",
"475 0.280 1 \n",
"425 0.440 0 \n",
"\n",
"[371 rows x 11 columns]"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>is_male</th>\n",
" <th>total_bilirubin</th>\n",
" <th>direct_ribilubin</th>\n",
" <th>alkaline_phosphotase</th>\n",
" <th>alamine_aminotransferasi</th>\n",
" <th>aspartate_aminotransferase</th>\n",
" <th>total_proteins</th>\n",
" <th>albumin</th>\n",
" <th>albumin_to_globulin_ratio</th>\n",
" <th>class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>355</th>\n",
" <td>0.174419</td>\n",
" <td>True</td>\n",
" <td>0.030000</td>\n",
" <td>0.049645</td>\n",
" <td>0.069831</td>\n",
" <td>0.002567</td>\n",
" <td>0.015385</td>\n",
" <td>0.909091</td>\n",
" <td>0.947368</td>\n",
" <td>0.666667</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>407</th>\n",
" <td>0.093023</td>\n",
" <td>True</td>\n",
" <td>0.016667</td>\n",
" <td>0.007092</td>\n",
" <td>0.436610</td>\n",
" <td>0.187420</td>\n",
" <td>0.094231</td>\n",
" <td>0.763636</td>\n",
" <td>0.710526</td>\n",
" <td>0.466667</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>90</th>\n",
" <td>0.651163</td>\n",
" <td>True</td>\n",
" <td>0.173333</td>\n",
" <td>0.191489</td>\n",
" <td>0.094237</td>\n",
" <td>0.514763</td>\n",
" <td>0.807692</td>\n",
" <td>0.781818</td>\n",
" <td>0.578947</td>\n",
" <td>0.320000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>402</th>\n",
" <td>0.441860</td>\n",
" <td>False</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.058983</td>\n",
" <td>0.184852</td>\n",
" <td>0.094231</td>\n",
" <td>0.927273</td>\n",
" <td>0.789474</td>\n",
" <td>0.400000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>268</th>\n",
" <td>0.418605</td>\n",
" <td>True</td>\n",
" <td>0.466667</td>\n",
" <td>0.446809</td>\n",
" <td>0.191864</td>\n",
" <td>0.050064</td>\n",
" <td>0.062500</td>\n",
" <td>0.490909</td>\n",
" <td>0.289474</td>\n",
" <td>0.133333</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>109</th>\n",
" <td>0.372093</td>\n",
" <td>True</td>\n",
" <td>0.013333</td>\n",
" <td>0.000000</td>\n",
" <td>0.278644</td>\n",
" <td>0.017972</td>\n",
" <td>0.023077</td>\n",
" <td>0.527273</td>\n",
" <td>0.473684</td>\n",
" <td>0.400000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>516</th>\n",
" <td>0.651163</td>\n",
" <td>True</td>\n",
" <td>0.013333</td>\n",
" <td>0.014184</td>\n",
" <td>0.063051</td>\n",
" <td>0.006418</td>\n",
" <td>0.013462</td>\n",
" <td>0.672727</td>\n",
" <td>0.526316</td>\n",
" <td>0.333333</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>305</th>\n",
" <td>0.581395</td>\n",
" <td>False</td>\n",
" <td>0.030000</td>\n",
" <td>0.042553</td>\n",
" <td>0.081356</td>\n",
" <td>0.032092</td>\n",
" <td>0.005769</td>\n",
" <td>0.890909</td>\n",
" <td>0.710526</td>\n",
" <td>0.400000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>167</th>\n",
" <td>0.500000</td>\n",
" <td>False</td>\n",
" <td>0.083333</td>\n",
" <td>0.099291</td>\n",
" <td>0.147119</td>\n",
" <td>0.068036</td>\n",
" <td>0.054808</td>\n",
" <td>0.472727</td>\n",
" <td>0.210526</td>\n",
" <td>0.113333</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>329</th>\n",
" <td>0.197674</td>\n",
" <td>True</td>\n",
" <td>0.006667</td>\n",
" <td>0.007092</td>\n",
" <td>0.092203</td>\n",
" <td>0.003851</td>\n",
" <td>0.012500</td>\n",
" <td>0.781818</td>\n",
" <td>0.815789</td>\n",
" <td>0.600000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>115 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" age is_male total_bilirubin direct_ribilubin \\\n",
"355 0.174419 True 0.030000 0.049645 \n",
"407 0.093023 True 0.016667 0.007092 \n",
"90 0.651163 True 0.173333 0.191489 \n",
"402 0.441860 False 0.000000 0.000000 \n",
"268 0.418605 True 0.466667 0.446809 \n",
".. ... ... ... ... \n",
"109 0.372093 True 0.013333 0.000000 \n",
"516 0.651163 True 0.013333 0.014184 \n",
"305 0.581395 False 0.030000 0.042553 \n",
"167 0.500000 False 0.083333 0.099291 \n",
"329 0.197674 True 0.006667 0.007092 \n",
"\n",
" alkaline_phosphotase alamine_aminotransferasi \\\n",
"355 0.069831 0.002567 \n",
"407 0.436610 0.187420 \n",
"90 0.094237 0.514763 \n",
"402 0.058983 0.184852 \n",
"268 0.191864 0.050064 \n",
".. ... ... \n",
"109 0.278644 0.017972 \n",
"516 0.063051 0.006418 \n",
"305 0.081356 0.032092 \n",
"167 0.147119 0.068036 \n",
"329 0.092203 0.003851 \n",
"\n",
" aspartate_aminotransferase total_proteins albumin \\\n",
"355 0.015385 0.909091 0.947368 \n",
"407 0.094231 0.763636 0.710526 \n",
"90 0.807692 0.781818 0.578947 \n",
"402 0.094231 0.927273 0.789474 \n",
"268 0.062500 0.490909 0.289474 \n",
".. ... ... ... \n",
"109 0.023077 0.527273 0.473684 \n",
"516 0.013462 0.672727 0.526316 \n",
"305 0.005769 0.890909 0.710526 \n",
"167 0.054808 0.472727 0.210526 \n",
"329 0.012500 0.781818 0.815789 \n",
"\n",
" albumin_to_globulin_ratio class \n",
"355 0.666667 1 \n",
"407 0.466667 0 \n",
"90 0.320000 0 \n",
"402 0.400000 0 \n",
"268 0.133333 0 \n",
".. ... ... \n",
"109 0.400000 1 \n",
"516 0.333333 0 \n",
"305 0.400000 1 \n",
"167 0.113333 0 \n",
"329 0.600000 1 \n",
"\n",
"[115 rows x 11 columns]"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>is_male</th>\n",
" <th>total_bilirubin</th>\n",
" <th>direct_ribilubin</th>\n",
" <th>alkaline_phosphotase</th>\n",
" <th>alamine_aminotransferasi</th>\n",
" <th>aspartate_aminotransferase</th>\n",
" <th>total_proteins</th>\n",
" <th>albumin</th>\n",
" <th>albumin_to_globulin_ratio</th>\n",
" <th>class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>582</th>\n",
" <td>0.403226</td>\n",
" <td>True</td>\n",
" <td>0.018100</td>\n",
" <td>0.017094</td>\n",
" <td>0.069175</td>\n",
" <td>0.005528</td>\n",
" <td>0.004090</td>\n",
" <td>0.725490</td>\n",
" <td>0.731707</td>\n",
" <td>0.846154</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>453</th>\n",
" <td>0.532258</td>\n",
" <td>True</td>\n",
" <td>0.004525</td>\n",
" <td>0.008547</td>\n",
" <td>0.074029</td>\n",
" <td>0.015075</td>\n",
" <td>0.003749</td>\n",
" <td>0.686275</td>\n",
" <td>0.390244</td>\n",
" <td>0.230769</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>89</th>\n",
" <td>0.758065</td>\n",
" <td>True</td>\n",
" <td>0.153846</td>\n",
" <td>0.153846</td>\n",
" <td>0.082524</td>\n",
" <td>0.054774</td>\n",
" <td>0.115201</td>\n",
" <td>0.686275</td>\n",
" <td>0.463415</td>\n",
" <td>0.307692</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>71</th>\n",
" <td>1.000000</td>\n",
" <td>False</td>\n",
" <td>0.009050</td>\n",
" <td>0.008547</td>\n",
" <td>0.052184</td>\n",
" <td>0.005025</td>\n",
" <td>0.005794</td>\n",
" <td>0.156863</td>\n",
" <td>0.097561</td>\n",
" <td>0.153846</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>124</th>\n",
" <td>0.241935</td>\n",
" <td>True</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.045510</td>\n",
" <td>0.013065</td>\n",
" <td>0.005794</td>\n",
" <td>0.647059</td>\n",
" <td>0.658537</td>\n",
" <td>0.769231</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>236</th>\n",
" <td>0.145161</td>\n",
" <td>True</td>\n",
" <td>0.009050</td>\n",
" <td>0.008547</td>\n",
" <td>0.120146</td>\n",
" <td>0.023618</td>\n",
" <td>0.009543</td>\n",
" <td>0.843137</td>\n",
" <td>0.585366</td>\n",
" <td>0.384615</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>487</th>\n",
" <td>0.306452</td>\n",
" <td>True</td>\n",
" <td>0.004525</td>\n",
" <td>0.008547</td>\n",
" <td>0.105583</td>\n",
" <td>0.046231</td>\n",
" <td>0.060668</td>\n",
" <td>0.470588</td>\n",
" <td>0.365854</td>\n",
" <td>0.407692</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>0.338710</td>\n",
" <td>True</td>\n",
" <td>0.253394</td>\n",
" <td>0.247863</td>\n",
" <td>0.083738</td>\n",
" <td>0.839196</td>\n",
" <td>0.285617</td>\n",
" <td>0.705882</td>\n",
" <td>0.634146</td>\n",
" <td>0.615385</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>307</th>\n",
" <td>0.274194</td>\n",
" <td>True</td>\n",
" <td>0.009050</td>\n",
" <td>0.008547</td>\n",
" <td>0.043689</td>\n",
" <td>0.005528</td>\n",
" <td>0.011929</td>\n",
" <td>0.196078</td>\n",
" <td>0.219512</td>\n",
" <td>0.461538</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>512</th>\n",
" <td>0.693548</td>\n",
" <td>True</td>\n",
" <td>0.018100</td>\n",
" <td>0.017094</td>\n",
" <td>0.056432</td>\n",
" <td>0.006030</td>\n",
" <td>0.005453</td>\n",
" <td>0.431373</td>\n",
" <td>0.292683</td>\n",
" <td>0.307692</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>93 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" age is_male total_bilirubin direct_ribilubin \\\n",
"582 0.403226 True 0.018100 0.017094 \n",
"453 0.532258 True 0.004525 0.008547 \n",
"89 0.758065 True 0.153846 0.153846 \n",
"71 1.000000 False 0.009050 0.008547 \n",
"124 0.241935 True 0.000000 0.000000 \n",
".. ... ... ... ... \n",
"236 0.145161 True 0.009050 0.008547 \n",
"487 0.306452 True 0.004525 0.008547 \n",
"27 0.338710 True 0.253394 0.247863 \n",
"307 0.274194 True 0.009050 0.008547 \n",
"512 0.693548 True 0.018100 0.017094 \n",
"\n",
" alkaline_phosphotase alamine_aminotransferasi \\\n",
"582 0.069175 0.005528 \n",
"453 0.074029 0.015075 \n",
"89 0.082524 0.054774 \n",
"71 0.052184 0.005025 \n",
"124 0.045510 0.013065 \n",
".. ... ... \n",
"236 0.120146 0.023618 \n",
"487 0.105583 0.046231 \n",
"27 0.083738 0.839196 \n",
"307 0.043689 0.005528 \n",
"512 0.056432 0.006030 \n",
"\n",
" aspartate_aminotransferase total_proteins albumin \\\n",
"582 0.004090 0.725490 0.731707 \n",
"453 0.003749 0.686275 0.390244 \n",
"89 0.115201 0.686275 0.463415 \n",
"71 0.005794 0.156863 0.097561 \n",
"124 0.005794 0.647059 0.658537 \n",
".. ... ... ... \n",
"236 0.009543 0.843137 0.585366 \n",
"487 0.060668 0.470588 0.365854 \n",
"27 0.285617 0.705882 0.634146 \n",
"307 0.011929 0.196078 0.219512 \n",
"512 0.005453 0.431373 0.292683 \n",
"\n",
" albumin_to_globulin_ratio class \n",
"582 0.846154 1 \n",
"453 0.230769 0 \n",
"89 0.307692 0 \n",
"71 0.153846 0 \n",
"124 0.769231 1 \n",
".. ... ... \n",
"236 0.384615 1 \n",
"487 0.407692 0 \n",
"27 0.615385 0 \n",
"307 0.461538 0 \n",
"512 0.307692 1 \n",
"\n",
"[93 rows x 11 columns]"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"val"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "ium",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}