{ "cells": [ { "cell_type": "markdown", "source": [ "## IUM_02" ], "metadata": { "collapsed": false }, "id": "da5635319c1475f3" }, { "cell_type": "markdown", "source": [ "#### Wymagane zależności" ], "metadata": { "collapsed": false }, "id": "5c88bd65c24cfc75" }, { "cell_type": "code", "execution_count": 19, "outputs": [], "source": [ "# Instalacja wymaganych zależności\n", "!pip install kaggle\n", "!pip install pandas\n", "!pip install scikit-learn" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-15T19:06:25.947706400Z", "start_time": "2024-03-15T19:06:25.928113300Z" } }, "id": "ae6cca2241835fba" }, { "cell_type": "markdown", "source": [ "#### Import bibliotek" ], "metadata": { "collapsed": false }, "id": "ba9581e73648e5c3" }, { "cell_type": "code", "execution_count": 3, "outputs": [], "source": [ "# Import bibliotek\n", "import pandas as pd\n", "\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import MinMaxScaler" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-15T19:05:35.877258600Z", "start_time": "2024-03-15T19:05:35.821429600Z" } }, "id": "5db08fde342b5463" }, { "cell_type": "markdown", "source": [ "#### 1. Pobieranie zbioru danych" ], "metadata": { "collapsed": false }, "id": "2000b14bbb95a446" }, { "cell_type": "code", "execution_count": 4, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading breast-cancer-wisconsin-data.zip to C:\\Users\\broke\\PycharmProjects\\ium_464863\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", " 0%| | 0.00/48.6k [00:00\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
diagnosisradius_meantexture_meanperimeter_meanarea_meansmoothness_meancompactness_meanconcavity_meanconcave points_meansymmetry_mean...radius_worsttexture_worstperimeter_worstarea_worstsmoothness_worstcompactness_worstconcavity_worstconcave points_worstsymmetry_worstfractal_dimension_worst
id
842302M17.9910.38122.801001.00.118400.277600.30010.147100.2419...25.3817.33184.602019.00.16220.66560.71190.26540.46010.11890
842517M20.5717.77132.901326.00.084740.078640.08690.070170.1812...24.9923.41158.801956.00.12380.18660.24160.18600.27500.08902
84300903M19.6921.25130.001203.00.109600.159900.19740.127900.2069...23.5725.53152.501709.00.14440.42450.45040.24300.36130.08758
84348301M11.4220.3877.58386.10.142500.283900.24140.105200.2597...14.9126.5098.87567.70.20980.86630.68690.25750.66380.17300
84358402M20.2914.34135.101297.00.100300.132800.19800.104300.1809...22.5416.67152.201575.00.13740.20500.40000.16250.23640.07678
\n

5 rows × 31 columns

\n" }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Wyświetlenie 5 pierwszych wierszy\n", "df.head()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-15T19:05:45.015712300Z", "start_time": "2024-03-15T19:05:44.947489400Z" } }, "id": "db9cfc5e73a4da57" }, { "cell_type": "code", "execution_count": 8, "outputs": [], "source": [ "# Normalizacja cech do wartości z przedziału [0, 1]\n", "scaler = MinMaxScaler()\n", "\n", "df[df.columns[1:]] = scaler.fit_transform(df[df.columns[1:]])" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-15T19:05:45.023766200Z", "start_time": "2024-03-15T19:05:44.995971700Z" } }, "id": "f8513c47a4a1f844" }, { "cell_type": "markdown", "source": [ "#### 3. Podział danych na zbiór treningowy, walidacyjny i testowy" ], "metadata": { "collapsed": false }, "id": "7d74496029e594b1" }, { "cell_type": "code", "execution_count": 9, "outputs": [], "source": [ "# Podział zbioru na zbiór treningowy, walidacyjny i testowy w proporcji 80/10/10\n", "df_train, df_val_test = train_test_split(df, test_size=0.2, random_state=1234)\n", "df_val, df_test = train_test_split(df_val_test, test_size=0.5, random_state=1234)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-15T19:05:45.111265300Z", "start_time": "2024-03-15T19:05:45.028046500Z" } }, "id": "651b6bf8d1dd8e6d" }, { "cell_type": "code", "execution_count": 10, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Cały zbiór: 569 wierszy, 31 kolumn\n", "Zbiór treningowy: 455 wierszy, 31 kolumn\n", "Zbiór walidacyjny: 57 wierszy, 31 kolumn\n", "Zbiór testowy: 57 wierszy, 31 kolumn\n" ] } ], "source": [ "# Wymiary zbiorów i podzbiorów\n", "print(f\"Cały zbiór: {df.shape[0]} wierszy, {df.shape[1]} kolumn\")\n", "print(f\"Zbiór treningowy: {df_train.shape[0]} wierszy, {df_train.shape[1]} kolumn\")\n", "print(f\"Zbiór walidacyjny: {df_val.shape[0]} wierszy, {df_val.shape[1]} kolumn\")\n", "print(f\"Zbiór testowy: {df_test.shape[0]} wierszy, {df_test.shape[1]} kolumn\")" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-15T19:05:45.168725700Z", "start_time": "2024-03-15T19:05:45.058794500Z" } }, "id": "97f1bdbc7597c39f" }, { "cell_type": "markdown", "source": [ "#### Statystyki dla cech numerycznych (średnia, odchylenie standardowe, min, max, kwantyle)" ], "metadata": { "collapsed": false }, "id": "9014307b7d26b73f" }, { "cell_type": "code", "execution_count": 11, "outputs": [ { "data": { "text/plain": " radius_mean texture_mean perimeter_mean area_mean smoothness_mean \\\ncount 569.000000 569.000000 569.000000 569.000000 569.000000 \nmean 0.338222 0.323965 0.332935 0.216920 0.394785 \nstd 0.166787 0.145453 0.167915 0.149274 0.126967 \nmin 0.000000 0.000000 0.000000 0.000000 0.000000 \n25% 0.223342 0.218465 0.216847 0.117413 0.304595 \n50% 0.302381 0.308759 0.293345 0.172895 0.390358 \n75% 0.416442 0.408860 0.416765 0.271135 0.475490 \nmax 1.000000 1.000000 1.000000 1.000000 1.000000 \n\n compactness_mean concavity_mean concave points_mean symmetry_mean \\\ncount 569.000000 569.000000 569.000000 569.000000 \nmean 0.260601 0.208058 0.243137 0.379605 \nstd 0.161992 0.186785 0.192857 0.138456 \nmin 0.000000 0.000000 0.000000 0.000000 \n25% 0.139685 0.069260 0.100944 0.282323 \n50% 0.224679 0.144189 0.166501 0.369697 \n75% 0.340531 0.306232 0.367793 0.453030 \nmax 1.000000 1.000000 1.000000 1.000000 \n\n fractal_dimension_mean ... radius_worst texture_worst \\\ncount 569.000000 ... 569.000000 569.000000 \nmean 0.270379 ... 0.296663 0.363998 \nstd 0.148702 ... 0.171940 0.163813 \nmin 0.000000 ... 0.000000 0.000000 \n25% 0.163016 ... 0.180719 0.241471 \n50% 0.243892 ... 0.250445 0.356876 \n75% 0.340354 ... 0.386339 0.471748 \nmax 1.000000 ... 1.000000 1.000000 \n\n perimeter_worst area_worst smoothness_worst compactness_worst \\\ncount 569.000000 569.000000 569.000000 569.000000 \nmean 0.283138 0.170906 0.404138 0.220212 \nstd 0.167352 0.139932 0.150779 0.152649 \nmin 0.000000 0.000000 0.000000 0.000000 \n25% 0.167837 0.081130 0.300007 0.116337 \n50% 0.235320 0.123206 0.397081 0.179110 \n75% 0.373475 0.220901 0.494156 0.302520 \nmax 1.000000 1.000000 1.000000 1.000000 \n\n concavity_worst concave points_worst symmetry_worst \\\ncount 569.000000 569.000000 569.000000 \nmean 0.217403 0.393836 0.263307 \nstd 0.166633 0.225884 0.121954 \nmin 0.000000 0.000000 0.000000 \n25% 0.091454 0.223127 0.185098 \n50% 0.181070 0.343402 0.247782 \n75% 0.305831 0.554639 0.318155 \nmax 1.000000 1.000000 1.000000 \n\n fractal_dimension_worst \ncount 569.000000 \nmean 0.189596 \nstd 0.118466 \nmin 0.000000 \n25% 0.107700 \n50% 0.163977 \n75% 0.242949 \nmax 1.000000 \n\n[8 rows x 30 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
radius_meantexture_meanperimeter_meanarea_meansmoothness_meancompactness_meanconcavity_meanconcave points_meansymmetry_meanfractal_dimension_mean...radius_worsttexture_worstperimeter_worstarea_worstsmoothness_worstcompactness_worstconcavity_worstconcave points_worstsymmetry_worstfractal_dimension_worst
count569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000...569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000
mean0.3382220.3239650.3329350.2169200.3947850.2606010.2080580.2431370.3796050.270379...0.2966630.3639980.2831380.1709060.4041380.2202120.2174030.3938360.2633070.189596
std0.1667870.1454530.1679150.1492740.1269670.1619920.1867850.1928570.1384560.148702...0.1719400.1638130.1673520.1399320.1507790.1526490.1666330.2258840.1219540.118466
min0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
25%0.2233420.2184650.2168470.1174130.3045950.1396850.0692600.1009440.2823230.163016...0.1807190.2414710.1678370.0811300.3000070.1163370.0914540.2231270.1850980.107700
50%0.3023810.3087590.2933450.1728950.3903580.2246790.1441890.1665010.3696970.243892...0.2504450.3568760.2353200.1232060.3970810.1791100.1810700.3434020.2477820.163977
75%0.4164420.4088600.4167650.2711350.4754900.3405310.3062320.3677930.4530300.340354...0.3863390.4717480.3734750.2209010.4941560.3025200.3058310.5546390.3181550.242949
max1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000...1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
\n

8 rows × 30 columns

\n
" }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Cały zbiór\n", "df.describe()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-15T19:05:45.568275400Z", "start_time": "2024-03-15T19:05:45.073564100Z" } }, "id": "4f0c97b4de052a0c" }, { "cell_type": "code", "execution_count": 12, "outputs": [ { "data": { "text/plain": " radius_mean texture_mean perimeter_mean area_mean smoothness_mean \\\ncount 455.000000 455.000000 455.000000 455.000000 455.000000 \nmean 0.338949 0.326381 0.333644 0.217261 0.395892 \nstd 0.165349 0.145664 0.166246 0.147801 0.126845 \nmin 0.000000 0.022658 0.000000 0.000000 0.000000 \n25% 0.225235 0.219817 0.219128 0.119321 0.304776 \n50% 0.300961 0.310450 0.295833 0.170859 0.389636 \n75% 0.415259 0.411397 0.414346 0.271113 0.476393 \nmax 1.000000 0.815015 1.000000 0.999152 0.831182 \n\n compactness_mean concavity_mean concave points_mean symmetry_mean \\\ncount 455.000000 455.000000 455.000000 455.000000 \nmean 0.260580 0.209922 0.243100 0.381150 \nstd 0.160494 0.187617 0.192341 0.136984 \nmin 0.000000 0.000000 0.000000 0.000000 \n25% 0.136096 0.068030 0.100497 0.283081 \n50% 0.230262 0.145150 0.168191 0.369697 \n75% 0.340991 0.310098 0.357952 0.453030 \nmax 0.895712 1.000000 1.000000 1.000000 \n\n fractal_dimension_mean ... radius_worst texture_worst \\\ncount 455.000000 ... 455.000000 455.000000 \nmean 0.270577 ... 0.297118 0.369023 \nstd 0.147338 ... 0.170654 0.166060 \nmin 0.000000 ... 0.000000 0.012527 \n25% 0.168176 ... 0.182675 0.248801 \n50% 0.241786 ... 0.250445 0.358742 \n75% 0.340354 ... 0.377090 0.481343 \nmax 1.000000 ... 0.896478 1.000000 \n\n perimeter_worst area_worst smoothness_worst compactness_worst \\\ncount 455.000000 455.000000 455.000000 455.000000 \nmean 0.284164 0.171085 0.407890 0.221950 \nstd 0.166564 0.138560 0.153644 0.156748 \nmin 0.000000 0.000000 0.000000 0.000000 \n25% 0.169929 0.081781 0.303308 0.116871 \n50% 0.235370 0.123206 0.396421 0.181244 \n75% 0.369740 0.209964 0.496467 0.299318 \nmax 0.890931 0.797975 1.000000 1.000000 \n\n concavity_worst concave points_worst symmetry_worst \\\ncount 455.000000 455.000000 455.000000 \nmean 0.221686 0.394672 0.263384 \nstd 0.172039 0.226663 0.119284 \nmin 0.000000 0.000000 0.000000 \n25% 0.092212 0.221753 0.188252 \n50% 0.184505 0.347079 0.248571 \n75% 0.309265 0.558935 0.317465 \nmax 1.000000 1.000000 1.000000 \n\n fractal_dimension_worst \ncount 455.000000 \nmean 0.191552 \nstd 0.122769 \nmin 0.000000 \n25% 0.107110 \n50% 0.164305 \n75% 0.242785 \nmax 1.000000 \n\n[8 rows x 30 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
radius_meantexture_meanperimeter_meanarea_meansmoothness_meancompactness_meanconcavity_meanconcave points_meansymmetry_meanfractal_dimension_mean...radius_worsttexture_worstperimeter_worstarea_worstsmoothness_worstcompactness_worstconcavity_worstconcave points_worstsymmetry_worstfractal_dimension_worst
count455.000000455.000000455.000000455.000000455.000000455.000000455.000000455.000000455.000000455.000000...455.000000455.000000455.000000455.000000455.000000455.000000455.000000455.000000455.000000455.000000
mean0.3389490.3263810.3336440.2172610.3958920.2605800.2099220.2431000.3811500.270577...0.2971180.3690230.2841640.1710850.4078900.2219500.2216860.3946720.2633840.191552
std0.1653490.1456640.1662460.1478010.1268450.1604940.1876170.1923410.1369840.147338...0.1706540.1660600.1665640.1385600.1536440.1567480.1720390.2266630.1192840.122769
min0.0000000.0226580.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0125270.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
25%0.2252350.2198170.2191280.1193210.3047760.1360960.0680300.1004970.2830810.168176...0.1826750.2488010.1699290.0817810.3033080.1168710.0922120.2217530.1882520.107110
50%0.3009610.3104500.2958330.1708590.3896360.2302620.1451500.1681910.3696970.241786...0.2504450.3587420.2353700.1232060.3964210.1812440.1845050.3470790.2485710.164305
75%0.4152590.4113970.4143460.2711130.4763930.3409910.3100980.3579520.4530300.340354...0.3770900.4813430.3697400.2099640.4964670.2993180.3092650.5589350.3174650.242785
max1.0000000.8150151.0000000.9991520.8311820.8957121.0000001.0000001.0000001.000000...0.8964781.0000000.8909310.7979751.0000001.0000001.0000001.0000001.0000001.000000
\n

8 rows × 30 columns

\n
" }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Zbiór treningowy\n", "df_train.describe()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-15T19:05:45.578805Z", "start_time": "2024-03-15T19:05:45.179260600Z" } }, "id": "a6594457c1209a45" }, { "cell_type": "code", "execution_count": 13, "outputs": [ { "data": { "text/plain": " radius_mean texture_mean perimeter_mean area_mean smoothness_mean \\\ncount 57.000000 57.000000 57.000000 57.000000 57.000000 \nmean 0.334850 0.315985 0.331800 0.216332 0.399432 \nstd 0.176956 0.119165 0.181708 0.171264 0.144361 \nmin 0.089782 0.106865 0.089489 0.041357 0.167193 \n25% 0.225709 0.226581 0.223205 0.117413 0.293220 \n50% 0.282976 0.290159 0.279110 0.161909 0.396678 \n75% 0.413129 0.389922 0.405017 0.260912 0.471879 \nmax 0.967343 0.623267 0.988943 1.000000 1.000000 \n\n compactness_mean concavity_mean concave points_mean symmetry_mean \\\ncount 57.000000 57.000000 57.000000 57.000000 \nmean 0.279956 0.216035 0.254466 0.387684 \nstd 0.184302 0.208557 0.203610 0.160009 \nmin 0.046500 0.003622 0.027793 0.078283 \n25% 0.165235 0.076406 0.112326 0.282323 \n50% 0.222195 0.127413 0.154026 0.381818 \n75% 0.359242 0.296626 0.371918 0.464141 \nmax 1.000000 0.879569 0.839463 0.932323 \n\n fractal_dimension_mean ... radius_worst texture_worst \\\ncount 57.000000 ... 57.000000 57.000000 \nmean 0.281045 ... 0.294395 0.357428 \nstd 0.161084 ... 0.180500 0.130661 \nmin 0.047810 ... 0.072963 0.116205 \n25% 0.169545 ... 0.180719 0.267058 \n50% 0.258214 ... 0.225187 0.348348 \n75% 0.342249 ... 0.355034 0.463486 \nmax 0.949031 ... 1.000000 0.619670 \n\n perimeter_worst area_worst smoothness_worst compactness_worst \\\ncount 57.000000 57.000000 57.000000 57.000000 \nmean 0.281028 0.170796 0.391354 0.222054 \nstd 0.179389 0.161083 0.140480 0.133225 \nmin 0.074008 0.028264 0.085320 0.019239 \n25% 0.162757 0.082653 0.301327 0.126233 \n50% 0.228597 0.109836 0.416232 0.182893 \n75% 0.343593 0.197454 0.484911 0.308632 \nmax 1.000000 1.000000 0.786040 0.571557 \n\n concavity_worst concave points_worst symmetry_worst \\\ncount 57.000000 57.000000 57.000000 \nmean 0.209888 0.405999 0.276614 \nstd 0.152149 0.214849 0.143775 \nmin 0.006176 0.095670 0.066233 \n25% 0.107827 0.248179 0.198502 \n50% 0.164537 0.352234 0.234969 \n75% 0.312380 0.517182 0.323674 \nmax 0.613498 0.902062 0.829687 \n\n fractal_dimension_worst \ncount 57.000000 \nmean 0.187256 \nstd 0.103423 \nmin 0.028073 \n25% 0.107700 \n50% 0.155910 \n75% 0.260724 \nmax 0.460186 \n\n[8 rows x 30 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
radius_meantexture_meanperimeter_meanarea_meansmoothness_meancompactness_meanconcavity_meanconcave points_meansymmetry_meanfractal_dimension_mean...radius_worsttexture_worstperimeter_worstarea_worstsmoothness_worstcompactness_worstconcavity_worstconcave points_worstsymmetry_worstfractal_dimension_worst
count57.00000057.00000057.00000057.00000057.00000057.00000057.00000057.00000057.00000057.000000...57.00000057.00000057.00000057.00000057.00000057.00000057.00000057.00000057.00000057.000000
mean0.3348500.3159850.3318000.2163320.3994320.2799560.2160350.2544660.3876840.281045...0.2943950.3574280.2810280.1707960.3913540.2220540.2098880.4059990.2766140.187256
std0.1769560.1191650.1817080.1712640.1443610.1843020.2085570.2036100.1600090.161084...0.1805000.1306610.1793890.1610830.1404800.1332250.1521490.2148490.1437750.103423
min0.0897820.1068650.0894890.0413570.1671930.0465000.0036220.0277930.0782830.047810...0.0729630.1162050.0740080.0282640.0853200.0192390.0061760.0956700.0662330.028073
25%0.2257090.2265810.2232050.1174130.2932200.1652350.0764060.1123260.2823230.169545...0.1807190.2670580.1627570.0826530.3013270.1262330.1078270.2481790.1985020.107700
50%0.2829760.2901590.2791100.1619090.3966780.2221950.1274130.1540260.3818180.258214...0.2251870.3483480.2285970.1098360.4162320.1828930.1645370.3522340.2349690.155910
75%0.4131290.3899220.4050170.2609120.4718790.3592420.2966260.3719180.4641410.342249...0.3550340.4634860.3435930.1974540.4849110.3086320.3123800.5171820.3236740.260724
max0.9673430.6232670.9889431.0000001.0000001.0000000.8795690.8394630.9323230.949031...1.0000000.6196701.0000001.0000000.7860400.5715570.6134980.9020620.8296870.460186
\n

8 rows × 30 columns

\n
" }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Zbiór walidacyjny\n", "df_val.describe()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-15T19:05:45.583803900Z", "start_time": "2024-03-15T19:05:45.287918100Z" } }, "id": "5ce8c6c658be60c1" }, { "cell_type": "code", "execution_count": 14, "outputs": [ { "data": { "text/plain": " radius_mean texture_mean perimeter_mean area_mean smoothness_mean \\\ncount 57.000000 57.000000 57.000000 57.000000 57.000000 \nmean 0.335794 0.312663 0.328412 0.214790 0.381297 \nstd 0.170785 0.167714 0.169927 0.139898 0.109590 \nmin 0.075252 0.000000 0.072904 0.033001 0.146249 \n25% 0.192106 0.199188 0.185751 0.097222 0.308658 \n50% 0.315159 0.310450 0.301223 0.180445 0.390990 \n75% 0.486961 0.396348 0.481031 0.329629 0.460143 \nmax 0.692366 1.000000 0.695253 0.535949 0.578406 \n\n compactness_mean concavity_mean concave points_mean symmetry_mean \\\ncount 57.000000 57.000000 57.000000 57.000000 \nmean 0.241410 0.185207 0.232105 0.359197 \nstd 0.150381 0.156133 0.188722 0.127273 \nmin 0.021839 0.002798 0.011948 0.072222 \n25% 0.129471 0.062910 0.095179 0.271717 \n50% 0.203239 0.119845 0.154573 0.361616 \n75% 0.330102 0.261246 0.383996 0.439899 \nmax 0.809214 0.658388 0.776342 0.674242 \n\n fractal_dimension_mean ... radius_worst texture_worst \\\ncount 57.000000 ... 57.000000 57.000000 \nmean 0.258133 ... 0.295299 0.330458 \nstd 0.148563 ... 0.176543 0.173641 \nmin 0.021061 ... 0.054891 0.000000 \n25% 0.139217 ... 0.146211 0.199893 \n50% 0.246420 ... 0.256492 0.325160 \n75% 0.339090 ... 0.429740 0.425640 \nmax 0.839090 ... 0.667022 0.875533 \n\n perimeter_worst area_worst smoothness_worst compactness_worst \\\ncount 57.000000 57.000000 57.000000 57.000000 \nmean 0.277057 0.169590 0.386967 0.204497 \nstd 0.164054 0.130427 0.137458 0.138294 \nmin 0.047263 0.022046 0.111074 0.015504 \n25% 0.146023 0.064368 0.285478 0.098777 \n50% 0.245530 0.128048 0.396421 0.172318 \n75% 0.410827 0.256046 0.478307 0.280205 \nmax 0.627970 0.467902 0.674437 0.709327 \n\n concavity_worst concave points_worst symmetry_worst \\\ncount 57.000000 57.000000 57.000000 \nmean 0.190725 0.374998 0.249384 \nstd 0.132668 0.233099 0.120215 \nmin 0.002860 0.035808 0.081411 \n25% 0.086981 0.202131 0.159866 \n50% 0.155511 0.292509 0.234772 \n75% 0.275240 0.520619 0.304356 \nmax 0.563339 0.997595 0.622708 \n\n fractal_dimension_worst \ncount 57.000000 \nmean 0.176326 \nstd 0.096130 \nmin 0.001115 \n25% 0.113210 \n50% 0.160042 \n75% 0.231011 \nmax 0.481175 \n\n[8 rows x 30 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
radius_meantexture_meanperimeter_meanarea_meansmoothness_meancompactness_meanconcavity_meanconcave points_meansymmetry_meanfractal_dimension_mean...radius_worsttexture_worstperimeter_worstarea_worstsmoothness_worstcompactness_worstconcavity_worstconcave points_worstsymmetry_worstfractal_dimension_worst
count57.00000057.00000057.00000057.00000057.00000057.00000057.00000057.00000057.00000057.000000...57.00000057.00000057.00000057.00000057.00000057.00000057.00000057.00000057.00000057.000000
mean0.3357940.3126630.3284120.2147900.3812970.2414100.1852070.2321050.3591970.258133...0.2952990.3304580.2770570.1695900.3869670.2044970.1907250.3749980.2493840.176326
std0.1707850.1677140.1699270.1398980.1095900.1503810.1561330.1887220.1272730.148563...0.1765430.1736410.1640540.1304270.1374580.1382940.1326680.2330990.1202150.096130
min0.0752520.0000000.0729040.0330010.1462490.0218390.0027980.0119480.0722220.021061...0.0548910.0000000.0472630.0220460.1110740.0155040.0028600.0358080.0814110.001115
25%0.1921060.1991880.1857510.0972220.3086580.1294710.0629100.0951790.2717170.139217...0.1462110.1998930.1460230.0643680.2854780.0987770.0869810.2021310.1598660.113210
50%0.3151590.3104500.3012230.1804450.3909900.2032390.1198450.1545730.3616160.246420...0.2564920.3251600.2455300.1280480.3964210.1723180.1555110.2925090.2347720.160042
75%0.4869610.3963480.4810310.3296290.4601430.3301020.2612460.3839960.4398990.339090...0.4297400.4256400.4108270.2560460.4783070.2802050.2752400.5206190.3043560.231011
max0.6923661.0000000.6952530.5359490.5784060.8092140.6583880.7763420.6742420.839090...0.6670220.8755330.6279700.4679020.6744370.7093270.5633390.9975950.6227080.481175
\n

8 rows × 30 columns

\n
" }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Zbiór testowy\n", "df_test.describe()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-15T19:05:45.588195700Z", "start_time": "2024-03-15T19:05:45.381735500Z" } }, "id": "49acb8e5dfbda89f" }, { "cell_type": "markdown", "source": [ "#### Rozkład klas w zbiorze" ], "metadata": { "collapsed": false }, "id": "1c07bc584a09d6b8" }, { "cell_type": "code", "execution_count": 15, "outputs": [ { "data": { "text/plain": "diagnosis\nB 357\nM 212\nName: count, dtype: int64" }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Cały zbiór\n", "df['diagnosis'].value_counts()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-15T19:05:45.651344400Z", "start_time": "2024-03-15T19:05:45.482422700Z" } }, "id": "1e655fa9f8e48ff9" }, { "cell_type": "code", "execution_count": 16, "outputs": [ { "data": { "text/plain": "diagnosis\nB 288\nM 167\nName: count, dtype: int64" }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Zbiór treningowy\n", "df_train['diagnosis'].value_counts()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-15T19:05:45.652345300Z", "start_time": "2024-03-15T19:05:45.493491600Z" } }, "id": "d66c931287444033" }, { "cell_type": "code", "execution_count": 17, "outputs": [ { "data": { "text/plain": "diagnosis\nB 35\nM 22\nName: count, dtype: int64" }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Zbiór walidacyjny\n", "df_val['diagnosis'].value_counts()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-15T19:05:45.652345300Z", "start_time": "2024-03-15T19:05:45.508825500Z" } }, "id": "e3f1504600da351b" }, { "cell_type": "code", "execution_count": 18, "outputs": [ { "data": { "text/plain": "diagnosis\nB 34\nM 23\nName: count, dtype: int64" }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Zbiór testowy\n", "df_test['diagnosis'].value_counts()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-15T19:05:45.653709400Z", "start_time": "2024-03-15T19:05:45.523627900Z" } }, "id": "de5b287d4c68a1ec" } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }