{ "cells": [ { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import MinMaxScaler\n", "from collections import Counter" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Found cached dataset liver (/Users/natalia.szymczyk/.cache/huggingface/datasets/mstz___liver/liver/1.0.0/3115a4001e742dc2c89457a3906d35982a649915f71f35fc5e6d025c786eeacf)\n", "100%|██████████| 1/1 [00:00<00:00, 684.45it/s]\n" ] } ], "source": [ "from datasets import load_dataset\n", "\n", "dataset = load_dataset(\"mstz/liver\")['train']" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Dataset({\n", " features: ['age', 'is_male', 'total_bilirubin', 'direct_ribilubin', 'alkaline_phosphotase', 'alamine_aminotransferasi', 'aspartate_aminotransferase', 'total_proteins', 'albumin', 'albumin_to_globulin_ratio', 'class'],\n", " num_rows: 583\n", "})" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "dataset = dataset.to_pandas()" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "train, test = train_test_split(dataset, test_size=0.2, random_state=42)\n", "train, val = train_test_split(train, test_size=0.2, random_state=42)" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ageis_maletotal_bilirubindirect_ribilubinalkaline_phosphotasealamine_aminotransferasiaspartate_aminotransferasetotal_proteinsalbuminalbumin_to_globulin_ratioclass
10736True0.80.215829396.02.20.501
3338False2.61.241059575.63.00.801
53439True1.60.823088748.04.01.001
20421True0.70.213527266.43.31.001
4832False0.60.117639286.03.01.000
....................................
4242True6.83.263025476.12.30.601
17975True8.04.638630255.51.80.480
43053False0.70.118220334.81.90.600
47538True2.21.0310119427.94.11.001
42558True0.40.1100591264.32.51.400
\n", "

372 rows × 11 columns

\n", "
" ], "text/plain": [ " age is_male total_bilirubin direct_ribilubin alkaline_phosphotase \\\n", "107 36 True 0.8 0.2 158 \n", "33 38 False 2.6 1.2 410 \n", "534 39 True 1.6 0.8 230 \n", "204 21 True 0.7 0.2 135 \n", "48 32 False 0.6 0.1 176 \n", ".. ... ... ... ... ... \n", "42 42 True 6.8 3.2 630 \n", "179 75 True 8.0 4.6 386 \n", "430 53 False 0.7 0.1 182 \n", "475 38 True 2.2 1.0 310 \n", "425 58 True 0.4 0.1 100 \n", "\n", " alamine_aminotransferasi aspartate_aminotransferase total_proteins \\\n", "107 29 39 6.0 \n", "33 59 57 5.6 \n", "534 88 74 8.0 \n", "204 27 26 6.4 \n", "48 39 28 6.0 \n", ".. ... ... ... \n", "42 25 47 6.1 \n", "179 30 25 5.5 \n", "430 20 33 4.8 \n", "475 119 42 7.9 \n", "425 59 126 4.3 \n", "\n", " albumin albumin_to_globulin_ratio class \n", "107 2.2 0.50 1 \n", "33 3.0 0.80 1 \n", "534 4.0 1.00 1 \n", "204 3.3 1.00 1 \n", "48 3.0 1.00 0 \n", ".. ... ... ... \n", "42 2.3 0.60 1 \n", "179 1.8 0.48 0 \n", "430 1.9 0.60 0 \n", "475 4.1 1.00 1 \n", "425 2.5 1.40 0 \n", "\n", "[372 rows x 11 columns]" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ageis_maletotal_bilirubindirect_ribilubinalkaline_phosphotasealamine_aminotransferasiaspartate_aminotransferasetotal_proteinsalbuminalbumin_to_globulin_ratioclass
58238True1.00.321621247.34.41.501
45346True0.70.222440237.13.00.700
8960True4.01.92381193507.13.30.800
7175False0.80.218820294.41.80.600
12428True0.60.117736296.94.11.401
....................................
23622True0.80.230057407.93.80.901
48732True0.70.22761021906.02.90.930
2734True6.23.024016808507.24.01.200
30730True0.80.217421474.62.31.000
51256True1.00.319522285.82.60.801
\n", "

94 rows × 11 columns

\n", "
" ], "text/plain": [ " age is_male total_bilirubin direct_ribilubin alkaline_phosphotase \\\n", "582 38 True 1.0 0.3 216 \n", "453 46 True 0.7 0.2 224 \n", "89 60 True 4.0 1.9 238 \n", "71 75 False 0.8 0.2 188 \n", "124 28 True 0.6 0.1 177 \n", ".. ... ... ... ... ... \n", "236 22 True 0.8 0.2 300 \n", "487 32 True 0.7 0.2 276 \n", "27 34 True 6.2 3.0 240 \n", "307 30 True 0.8 0.2 174 \n", "512 56 True 1.0 0.3 195 \n", "\n", " alamine_aminotransferasi aspartate_aminotransferase total_proteins \\\n", "582 21 24 7.3 \n", "453 40 23 7.1 \n", "89 119 350 7.1 \n", "71 20 29 4.4 \n", "124 36 29 6.9 \n", ".. ... ... ... \n", "236 57 40 7.9 \n", "487 102 190 6.0 \n", "27 1680 850 7.2 \n", "307 21 47 4.6 \n", "512 22 28 5.8 \n", "\n", " albumin albumin_to_globulin_ratio class \n", "582 4.4 1.50 1 \n", "453 3.0 0.70 0 \n", "89 3.3 0.80 0 \n", "71 1.8 0.60 0 \n", "124 4.1 1.40 1 \n", ".. ... ... ... \n", "236 3.8 0.90 1 \n", "487 2.9 0.93 0 \n", "27 4.0 1.20 0 \n", "307 2.3 1.00 0 \n", "512 2.6 0.80 1 \n", "\n", "[94 rows x 11 columns]" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "val" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ageis_maletotal_bilirubindirect_ribilubinalkaline_phosphotasealamine_aminotransferasiaspartate_aminotransferasetotal_proteinsalbuminalbumin_to_globulin_ratioclass
35519True1.40.817813268.04.61.301
40712True1.00.27191571087.23.71.000
9060True5.72.82144128507.33.20.780
40242False0.50.11621551088.14.00.900
26840True14.56.435850755.72.10.500
....................................
51660True0.90.316816246.73.00.800
30554False1.40.719536167.93.70.901
16747False3.01.529264675.61.80.470
31227True1.30.610625548.54.8NaN1
32921True0.70.221114237.34.11.201
\n", "

117 rows × 11 columns

\n", "
" ], "text/plain": [ " age is_male total_bilirubin direct_ribilubin alkaline_phosphotase \\\n", "355 19 True 1.4 0.8 178 \n", "407 12 True 1.0 0.2 719 \n", "90 60 True 5.7 2.8 214 \n", "402 42 False 0.5 0.1 162 \n", "268 40 True 14.5 6.4 358 \n", ".. ... ... ... ... ... \n", "516 60 True 0.9 0.3 168 \n", "305 54 False 1.4 0.7 195 \n", "167 47 False 3.0 1.5 292 \n", "312 27 True 1.3 0.6 106 \n", "329 21 True 0.7 0.2 211 \n", "\n", " alamine_aminotransferasi aspartate_aminotransferase total_proteins \\\n", "355 13 26 8.0 \n", "407 157 108 7.2 \n", "90 412 850 7.3 \n", "402 155 108 8.1 \n", "268 50 75 5.7 \n", ".. ... ... ... \n", "516 16 24 6.7 \n", "305 36 16 7.9 \n", "167 64 67 5.6 \n", "312 25 54 8.5 \n", "329 14 23 7.3 \n", "\n", " albumin albumin_to_globulin_ratio class \n", "355 4.6 1.30 1 \n", "407 3.7 1.00 0 \n", "90 3.2 0.78 0 \n", "402 4.0 0.90 0 \n", "268 2.1 0.50 0 \n", ".. ... ... ... \n", "516 3.0 0.80 0 \n", "305 3.7 0.90 1 \n", "167 1.8 0.47 0 \n", "312 4.8 NaN 1 \n", "329 4.1 1.20 1 \n", "\n", "[117 rows x 11 columns]" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
agetotal_bilirubindirect_ribilubinalkaline_phosphotasealamine_aminotransferasiaspartate_aminotransferasetotal_proteinsalbuminalbumin_to_globulin_ratio
count372.000000372.000000372.000000372.000000372.000000372.000000372.000000372.000000371.000000
mean44.6801083.4158601.494355286.47311872.986559110.1478496.5002693.1508060.959515
std16.0545686.7366832.877245242.459927147.472734306.4251531.1000490.8069940.336514
min4.0000000.4000000.10000063.00000010.00000011.0000002.7000000.9000000.300000
25%32.0000000.8000000.200000170.00000024.00000025.0000005.7750002.5750000.700000
50%45.0000001.0000000.300000205.50000035.00000042.0000006.6000003.1000001.000000
75%58.0000002.6250001.300000298.00000060.00000086.2500007.2000003.8000001.100000
max85.00000075.00000019.7000002110.0000001350.0000004929.0000009.6000005.5000002.800000
\n", "
" ], "text/plain": [ " age total_bilirubin direct_ribilubin alkaline_phosphotase \\\n", "count 372.000000 372.000000 372.000000 372.000000 \n", "mean 44.680108 3.415860 1.494355 286.473118 \n", "std 16.054568 6.736683 2.877245 242.459927 \n", "min 4.000000 0.400000 0.100000 63.000000 \n", "25% 32.000000 0.800000 0.200000 170.000000 \n", "50% 45.000000 1.000000 0.300000 205.500000 \n", "75% 58.000000 2.625000 1.300000 298.000000 \n", "max 85.000000 75.000000 19.700000 2110.000000 \n", "\n", " alamine_aminotransferasi aspartate_aminotransferase total_proteins \\\n", "count 372.000000 372.000000 372.000000 \n", "mean 72.986559 110.147849 6.500269 \n", "std 147.472734 306.425153 1.100049 \n", "min 10.000000 11.000000 2.700000 \n", "25% 24.000000 25.000000 5.775000 \n", "50% 35.000000 42.000000 6.600000 \n", "75% 60.000000 86.250000 7.200000 \n", "max 1350.000000 4929.000000 9.600000 \n", "\n", " albumin albumin_to_globulin_ratio \n", "count 372.000000 371.000000 \n", "mean 3.150806 0.959515 \n", "std 0.806994 0.336514 \n", "min 0.900000 0.300000 \n", "25% 2.575000 0.700000 \n", "50% 3.100000 1.000000 \n", "75% 3.800000 1.100000 \n", "max 5.500000 2.800000 " ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "numerical_features = ['age', 'total_bilirubin', 'direct_ribilubin', 'alkaline_phosphotase',\n", " 'alamine_aminotransferasi', 'aspartate_aminotransferase', 'total_proteins', 'albumin',\n", " 'albumin_to_globulin_ratio']\n", "train[numerical_features].describe()" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Rozkład częstości dla klas:\n", "1: 30.38%\n", "0: 69.62%\n" ] } ], "source": [ "label_counter = Counter(train['class'])\n", "print(\"\\nRozkład częstości dla klas:\")\n", "for label in label_counter.keys():\n", " print(f\"{label}: {label_counter[label] / len(train) * 100:.2f}%\")" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [], "source": [ "scaler = MinMaxScaler()\n", "train[numerical_features] = scaler.fit_transform(train[numerical_features])\n", "test[numerical_features] = scaler.fit_transform(test[numerical_features])\n", "val[numerical_features] = scaler.fit_transform(val[numerical_features])" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ageis_maletotal_bilirubindirect_ribilubinalkaline_phosphotasealamine_aminotransferasiaspartate_aminotransferasetotal_proteinsalbuminalbumin_to_globulin_ratioclass
1070.395062True0.0053620.0051020.0464090.0141790.0056930.4782610.2826090.0801
330.419753False0.0294910.0561220.1695160.0365670.0093530.4202900.4565220.2001
5340.432099True0.0160860.0357140.0815830.0582090.0128100.7681160.6739130.2801
2040.209877True0.0040210.0051020.0351730.0126870.0030500.5362320.5217390.2801
480.345679False0.0026810.0000000.0552030.0216420.0034570.4782610.4565220.2800
....................................
420.469136True0.0857910.1581630.2769910.0111940.0073200.4927540.3043480.1201
1790.876543True0.1018770.2295920.1577920.0149250.0028470.4057970.1956520.0720
4300.604938False0.0040210.0000000.0581340.0074630.0044730.3043480.2173910.1200
4750.419753True0.0241290.0459180.1206640.0813430.0063030.7536230.6956520.2801
4250.666667True0.0000000.0000000.0180750.0365670.0233830.2318840.3478260.4400
\n", "

371 rows × 11 columns

\n", "
" ], "text/plain": [ " age is_male total_bilirubin direct_ribilubin \\\n", "107 0.395062 True 0.005362 0.005102 \n", "33 0.419753 False 0.029491 0.056122 \n", "534 0.432099 True 0.016086 0.035714 \n", "204 0.209877 True 0.004021 0.005102 \n", "48 0.345679 False 0.002681 0.000000 \n", ".. ... ... ... ... \n", "42 0.469136 True 0.085791 0.158163 \n", "179 0.876543 True 0.101877 0.229592 \n", "430 0.604938 False 0.004021 0.000000 \n", "475 0.419753 True 0.024129 0.045918 \n", "425 0.666667 True 0.000000 0.000000 \n", "\n", " alkaline_phosphotase alamine_aminotransferasi \\\n", "107 0.046409 0.014179 \n", "33 0.169516 0.036567 \n", "534 0.081583 0.058209 \n", "204 0.035173 0.012687 \n", "48 0.055203 0.021642 \n", ".. ... ... \n", "42 0.276991 0.011194 \n", "179 0.157792 0.014925 \n", "430 0.058134 0.007463 \n", "475 0.120664 0.081343 \n", "425 0.018075 0.036567 \n", "\n", " aspartate_aminotransferase total_proteins albumin \\\n", "107 0.005693 0.478261 0.282609 \n", "33 0.009353 0.420290 0.456522 \n", "534 0.012810 0.768116 0.673913 \n", "204 0.003050 0.536232 0.521739 \n", "48 0.003457 0.478261 0.456522 \n", ".. ... ... ... \n", "42 0.007320 0.492754 0.304348 \n", "179 0.002847 0.405797 0.195652 \n", "430 0.004473 0.304348 0.217391 \n", "475 0.006303 0.753623 0.695652 \n", "425 0.023383 0.231884 0.347826 \n", "\n", " albumin_to_globulin_ratio class \n", "107 0.080 1 \n", "33 0.200 1 \n", "534 0.280 1 \n", "204 0.280 1 \n", "48 0.280 0 \n", ".. ... ... \n", "42 0.120 1 \n", "179 0.072 0 \n", "430 0.120 0 \n", "475 0.280 1 \n", "425 0.440 0 \n", "\n", "[371 rows x 11 columns]" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [], "source": [ "train.dropna(inplace=True)\n", "test.dropna(inplace=True)\n", "val.dropna(inplace=True)" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ageis_maletotal_bilirubindirect_ribilubinalkaline_phosphotasealamine_aminotransferasiaspartate_aminotransferasetotal_proteinsalbuminalbumin_to_globulin_ratioclass
1070.395062True0.0053620.0051020.0464090.0141790.0056930.4782610.2826090.0801
330.419753False0.0294910.0561220.1695160.0365670.0093530.4202900.4565220.2001
5340.432099True0.0160860.0357140.0815830.0582090.0128100.7681160.6739130.2801
2040.209877True0.0040210.0051020.0351730.0126870.0030500.5362320.5217390.2801
480.345679False0.0026810.0000000.0552030.0216420.0034570.4782610.4565220.2800
....................................
420.469136True0.0857910.1581630.2769910.0111940.0073200.4927540.3043480.1201
1790.876543True0.1018770.2295920.1577920.0149250.0028470.4057970.1956520.0720
4300.604938False0.0040210.0000000.0581340.0074630.0044730.3043480.2173910.1200
4750.419753True0.0241290.0459180.1206640.0813430.0063030.7536230.6956520.2801
4250.666667True0.0000000.0000000.0180750.0365670.0233830.2318840.3478260.4400
\n", "

371 rows × 11 columns

\n", "
" ], "text/plain": [ " age is_male total_bilirubin direct_ribilubin \\\n", "107 0.395062 True 0.005362 0.005102 \n", "33 0.419753 False 0.029491 0.056122 \n", "534 0.432099 True 0.016086 0.035714 \n", "204 0.209877 True 0.004021 0.005102 \n", "48 0.345679 False 0.002681 0.000000 \n", ".. ... ... ... ... \n", "42 0.469136 True 0.085791 0.158163 \n", "179 0.876543 True 0.101877 0.229592 \n", "430 0.604938 False 0.004021 0.000000 \n", "475 0.419753 True 0.024129 0.045918 \n", "425 0.666667 True 0.000000 0.000000 \n", "\n", " alkaline_phosphotase alamine_aminotransferasi \\\n", "107 0.046409 0.014179 \n", "33 0.169516 0.036567 \n", "534 0.081583 0.058209 \n", "204 0.035173 0.012687 \n", "48 0.055203 0.021642 \n", ".. ... ... \n", "42 0.276991 0.011194 \n", "179 0.157792 0.014925 \n", "430 0.058134 0.007463 \n", "475 0.120664 0.081343 \n", "425 0.018075 0.036567 \n", "\n", " aspartate_aminotransferase total_proteins albumin \\\n", "107 0.005693 0.478261 0.282609 \n", "33 0.009353 0.420290 0.456522 \n", "534 0.012810 0.768116 0.673913 \n", "204 0.003050 0.536232 0.521739 \n", "48 0.003457 0.478261 0.456522 \n", ".. ... ... ... \n", "42 0.007320 0.492754 0.304348 \n", "179 0.002847 0.405797 0.195652 \n", "430 0.004473 0.304348 0.217391 \n", "475 0.006303 0.753623 0.695652 \n", "425 0.023383 0.231884 0.347826 \n", "\n", " albumin_to_globulin_ratio class \n", "107 0.080 1 \n", "33 0.200 1 \n", "534 0.280 1 \n", "204 0.280 1 \n", "48 0.280 0 \n", ".. ... ... \n", "42 0.120 1 \n", "179 0.072 0 \n", "430 0.120 0 \n", "475 0.280 1 \n", "425 0.440 0 \n", "\n", "[371 rows x 11 columns]" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ageis_maletotal_bilirubindirect_ribilubinalkaline_phosphotasealamine_aminotransferasiaspartate_aminotransferasetotal_proteinsalbuminalbumin_to_globulin_ratioclass
3550.174419True0.0300000.0496450.0698310.0025670.0153850.9090910.9473680.6666671
4070.093023True0.0166670.0070920.4366100.1874200.0942310.7636360.7105260.4666670
900.651163True0.1733330.1914890.0942370.5147630.8076920.7818180.5789470.3200000
4020.441860False0.0000000.0000000.0589830.1848520.0942310.9272730.7894740.4000000
2680.418605True0.4666670.4468090.1918640.0500640.0625000.4909090.2894740.1333330
....................................
1090.372093True0.0133330.0000000.2786440.0179720.0230770.5272730.4736840.4000001
5160.651163True0.0133330.0141840.0630510.0064180.0134620.6727270.5263160.3333330
3050.581395False0.0300000.0425530.0813560.0320920.0057690.8909090.7105260.4000001
1670.500000False0.0833330.0992910.1471190.0680360.0548080.4727270.2105260.1133330
3290.197674True0.0066670.0070920.0922030.0038510.0125000.7818180.8157890.6000001
\n", "

115 rows × 11 columns

\n", "
" ], "text/plain": [ " age is_male total_bilirubin direct_ribilubin \\\n", "355 0.174419 True 0.030000 0.049645 \n", "407 0.093023 True 0.016667 0.007092 \n", "90 0.651163 True 0.173333 0.191489 \n", "402 0.441860 False 0.000000 0.000000 \n", "268 0.418605 True 0.466667 0.446809 \n", ".. ... ... ... ... \n", "109 0.372093 True 0.013333 0.000000 \n", "516 0.651163 True 0.013333 0.014184 \n", "305 0.581395 False 0.030000 0.042553 \n", "167 0.500000 False 0.083333 0.099291 \n", "329 0.197674 True 0.006667 0.007092 \n", "\n", " alkaline_phosphotase alamine_aminotransferasi \\\n", "355 0.069831 0.002567 \n", "407 0.436610 0.187420 \n", "90 0.094237 0.514763 \n", "402 0.058983 0.184852 \n", "268 0.191864 0.050064 \n", ".. ... ... \n", "109 0.278644 0.017972 \n", "516 0.063051 0.006418 \n", "305 0.081356 0.032092 \n", "167 0.147119 0.068036 \n", "329 0.092203 0.003851 \n", "\n", " aspartate_aminotransferase total_proteins albumin \\\n", "355 0.015385 0.909091 0.947368 \n", "407 0.094231 0.763636 0.710526 \n", "90 0.807692 0.781818 0.578947 \n", "402 0.094231 0.927273 0.789474 \n", "268 0.062500 0.490909 0.289474 \n", ".. ... ... ... \n", "109 0.023077 0.527273 0.473684 \n", "516 0.013462 0.672727 0.526316 \n", "305 0.005769 0.890909 0.710526 \n", "167 0.054808 0.472727 0.210526 \n", "329 0.012500 0.781818 0.815789 \n", "\n", " albumin_to_globulin_ratio class \n", "355 0.666667 1 \n", "407 0.466667 0 \n", "90 0.320000 0 \n", "402 0.400000 0 \n", "268 0.133333 0 \n", ".. ... ... \n", "109 0.400000 1 \n", "516 0.333333 0 \n", "305 0.400000 1 \n", "167 0.113333 0 \n", "329 0.600000 1 \n", "\n", "[115 rows x 11 columns]" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ageis_maletotal_bilirubindirect_ribilubinalkaline_phosphotasealamine_aminotransferasiaspartate_aminotransferasetotal_proteinsalbuminalbumin_to_globulin_ratioclass
5820.403226True0.0181000.0170940.0691750.0055280.0040900.7254900.7317070.8461541
4530.532258True0.0045250.0085470.0740290.0150750.0037490.6862750.3902440.2307690
890.758065True0.1538460.1538460.0825240.0547740.1152010.6862750.4634150.3076920
711.000000False0.0090500.0085470.0521840.0050250.0057940.1568630.0975610.1538460
1240.241935True0.0000000.0000000.0455100.0130650.0057940.6470590.6585370.7692311
....................................
2360.145161True0.0090500.0085470.1201460.0236180.0095430.8431370.5853660.3846151
4870.306452True0.0045250.0085470.1055830.0462310.0606680.4705880.3658540.4076920
270.338710True0.2533940.2478630.0837380.8391960.2856170.7058820.6341460.6153850
3070.274194True0.0090500.0085470.0436890.0055280.0119290.1960780.2195120.4615380
5120.693548True0.0181000.0170940.0564320.0060300.0054530.4313730.2926830.3076921
\n", "

93 rows × 11 columns

\n", "
" ], "text/plain": [ " age is_male total_bilirubin direct_ribilubin \\\n", "582 0.403226 True 0.018100 0.017094 \n", "453 0.532258 True 0.004525 0.008547 \n", "89 0.758065 True 0.153846 0.153846 \n", "71 1.000000 False 0.009050 0.008547 \n", "124 0.241935 True 0.000000 0.000000 \n", ".. ... ... ... ... \n", "236 0.145161 True 0.009050 0.008547 \n", "487 0.306452 True 0.004525 0.008547 \n", "27 0.338710 True 0.253394 0.247863 \n", "307 0.274194 True 0.009050 0.008547 \n", "512 0.693548 True 0.018100 0.017094 \n", "\n", " alkaline_phosphotase alamine_aminotransferasi \\\n", "582 0.069175 0.005528 \n", "453 0.074029 0.015075 \n", "89 0.082524 0.054774 \n", "71 0.052184 0.005025 \n", "124 0.045510 0.013065 \n", ".. ... ... \n", "236 0.120146 0.023618 \n", "487 0.105583 0.046231 \n", "27 0.083738 0.839196 \n", "307 0.043689 0.005528 \n", "512 0.056432 0.006030 \n", "\n", " aspartate_aminotransferase total_proteins albumin \\\n", "582 0.004090 0.725490 0.731707 \n", "453 0.003749 0.686275 0.390244 \n", "89 0.115201 0.686275 0.463415 \n", "71 0.005794 0.156863 0.097561 \n", "124 0.005794 0.647059 0.658537 \n", ".. ... ... ... \n", "236 0.009543 0.843137 0.585366 \n", "487 0.060668 0.470588 0.365854 \n", "27 0.285617 0.705882 0.634146 \n", "307 0.011929 0.196078 0.219512 \n", "512 0.005453 0.431373 0.292683 \n", "\n", " albumin_to_globulin_ratio class \n", "582 0.846154 1 \n", "453 0.230769 0 \n", "89 0.307692 0 \n", "71 0.153846 0 \n", "124 0.769231 1 \n", ".. ... ... \n", "236 0.384615 1 \n", "487 0.407692 0 \n", "27 0.615385 0 \n", "307 0.461538 0 \n", "512 0.307692 1 \n", "\n", "[93 rows x 11 columns]" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "val" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "ium", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.10" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }