install pandas\n", "!pip install scikit-learn" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-27T11:20:58.698061Z", "start_time": "2024-03-27T11:20:54.216389900Z" } }, "id": "ae6cca2241835fba" }, { "cell_type": "markdown", "source": [ "#### Import bibliotek" ], "metadata": { "collapsed": false }, "id": "ba9581e73648e5c3" }, { "cell_type": "code", "execution_count": 2, "outputs": [], "source": [ "# Import bibliotek\n", "import pandas as pd\n", "\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import MinMaxScaler" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-27T11:20:59.612164600Z", "start_time": "2024-03-27T11:20:58.699325900Z" } }, "id": "5db08fde342b5463" }, { "cell_type": "markdown", "source": [ "#### 1. Pobieranie zbioru danych" ], "metadata": { "collapsed": false }, "id": "2000b14bbb95a446" }, { "cell_type": "code", "execution_count": 3, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "breast-cancer-wisconsin-data.zip: Skipping, found more recently modified local copy (use --force to force download)\n" ] } ], "source": [ "# Pobranie zbioru danych\n", "!kaggle datasets download -d uciml/breast-cancer-wisconsin-data\n", "\n", "# Wypakowanie archiwum\n", "!tar -xf breast-cancer-wisconsin-data.zip" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-27T11:21:01.293628200Z", "start_time": "2024-03-27T11:20:59.612164600Z" } }, "id": "679faee0f0f27fa1" }, { "cell_type": "markdown", "source": [ "#### 2. Wczytanie danych oraz wstępne przetworzenie" ], "metadata": { "collapsed": false }, "id": "2aa06e9443f948c9" }, { "cell_type": "code", "execution_count": 4, "outputs": [], "source": [ "# Wczytanie danych, ustawienie kolumny 'id' jako indeks\n", "df = pd.read_csv('data.csv', index_col='id')\n", "\n", "# Usunięcie niepotrzebnych/błędnych kolumn\n", "df = df.drop(columns=['Unnamed: 32'])" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-27T11:21:01.312651300Z", "start_time": "2024-03-27T11:21:01.294632700Z" } }, "id": "2bc8c4be37c8fa69" }, { "cell_type": "code", "execution_count": 5, "outputs": [ { "data": { "text/plain": "diagnosis 0\nradius_mean 0\ntexture_mean 0\nperimeter_mean 0\narea_mean 0\nsmoothness_mean 0\ncompactness_mean 0\nconcavity_mean 0\nconcave points_mean 0\nsymmetry_mean 0\nfractal_dimension_mean 0\nradius_se 0\ntexture_se 0\nperimeter_se 0\narea_se 0\nsmoothness_se 0\ncompactness_se 0\nconcavity_se 0\nconcave points_se 0\nsymmetry_se 0\nfractal_dimension_se 0\nradius_worst 0\ntexture_worst 0\nperimeter_worst 0\narea_worst 0\nsmoothness_worst 0\ncompactness_worst 0\nconcavity_worst 0\nconcave points_worst 0\nsymmetry_worst 0\nfractal_dimension_worst 0\ndtype: int64" }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Sprawdzenie czy istnieją wartości brakujące (NaN)\n", "df.isnull().sum()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-27T11:21:01.335790100Z", "start_time": "2024-03-27T11:21:01.311644700Z" } }, "id": "abcf61d13e9b47f1" }, { "cell_type": "code", "execution_count": 6, "outputs": [ { "data": { "text/plain": " diagnosis radius_mean texture_mean perimeter_mean area_mean \\\nid \n842302 M 17.99 10.38 122.80 1001.0 \n842517 M 20.57 17.77 132.90 1326.0 \n84300903 M 19.69 21.25 130.00 1203.0 \n84348301 M 11.42 20.38 77.58 386.1 \n84358402 M 20.29 14.34 135.10 1297.0 \n\n smoothness_mean compactness_mean concavity_mean \\\nid \n842302 0.11840 0.27760 0.3001 \n842517 0.08474 0.07864 0.0869 \n84300903 0.10960 0.15990 0.1974 \n84348301 0.14250 0.28390 0.2414 \n84358402 0.10030 0.13280 0.1980 \n\n concave points_mean symmetry_mean ... radius_worst \\\nid ... \n842302 0.14710 0.2419 ... 25.38 \n842517 0.07017 0.1812 ... 24.99 \n84300903 0.12790 0.2069 ... 23.57 \n84348301 0.10520 0.2597 ... 14.91 \n84358402 0.10430 0.1809 ... 22.54 \n\n texture_worst perimeter_worst area_worst smoothness_worst \\\nid \n842302 17.33 184.60 2019.0 0.1622 \n842517 23.41 158.80 1956.0 0.1238 \n84300903 25.53 152.50 1709.0 0.1444 \n84348301 26.50 98.87 567.7 0.2098 \n84358402 16.67 152.20 1575.0 0.1374 \n\n compactness_worst concavity_worst concave points_worst \\\nid \n842302 0.6656 0.7119 0.2654 \n842517 0.1866 0.2416 0.1860 \n84300903 0.4245 0.4504 0.2430 \n84348301 0.8663 0.6869 0.2575 \n84358402 0.2050 0.4000 0.1625 \n\n symmetry_worst fractal_dimension_worst \nid \n842302 0.4601 0.11890 \n842517 0.2750 0.08902 \n84300903 0.3613 0.08758 \n84348301 0.6638 0.17300 \n84358402 0.2364 0.07678 \n\n[5 rows x 31 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
diagnosisradius_meantexture_meanperimeter_meanarea_meansmoothness_meancompactness_meanconcavity_meanconcave points_meansymmetry_mean...radius_worsttexture_worstperimeter_worstarea_worstsmoothness_worstcompactness_worstconcavity_worstconcave points_worstsymmetry_worstfractal_dimension_worst

5 rows × 31 columns

" }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Wyświetlenie 5 pierwszych wierszy\n", "df.head()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-27T11:21:01.360029700Z", "start_time": "2024-03-27T11:21:01.325157900Z" } }, "id": "db9cfc5e73a4da57" }, { "cell_type": "code", "execution_count": 7, "outputs": [], "source": [ "# Normalizacja cech do wartości z przedziału [0, 1]\n", "scaler = MinMaxScaler()\n", "\n", "df[df.columns[1:]] = scaler.fit_transform(df[df.columns[1:]])" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-27T11:21:01.385350900Z", "start_time": "2024-03-27T11:21:01.358019600Z" } }, "id": "f8513c47a4a1f844" }, { "cell_type": "markdown", "source": [ "#### 3. Podział danych na zbiór treningowy, walidacyjny i testowy" ], "metadata": { "collapsed": false }, "id": "7d74496029e594b1" }, { "cell_type": "code", "execution_count": 8, "outputs": [], "source": [ "# Podział zbioru na zbiór treningowy, walidacyjny i testowy w proporcji 80/10/10\n", "df_train, df_val_test = train_test_split(df, test_size=0.2, random_state=1234)\n", "df_val, df_test = train_test_split(df_val_test, test_size=0.5, random_state=1234)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-27T11:21:01.416540300Z", "start_time": "2024-03-27T11:21:01.372555800Z" } }, "id": "651b6bf8d1dd8e6d" }, { "cell_type": "code", "execution_count": 9, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Cały zbiór: 569 wierszy, 31 kolumn\n", "Zbiór treningowy: 455 wierszy, 31 kolumn\n", "Zbiór walidacyjny: 57 wierszy, 31 kolumn\n", "Zbiór testowy: 57 wierszy, 31 kolumn\n" ] } ], "source": [ "# Wymiary zbiorów i podzbiorów\n", "print(f\"Cały zbiór: {df.shape[0]} wierszy, {df.shape[1]} kolumn\")\n", "print(f\"Zbiór treningowy: {df_train.shape[0]} wierszy, {df_train.shape[1]} kolumn\")\n", "print(f\"Zbiór walidacyjny: {df_val.shape[0]} wierszy, {df_val.shape[1]} kolumn\")\n", "print(f\"Zbiór testowy: {df_test.shape[0]} wierszy, {df_test.shape[1]} kolumn\")" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-27T11:21:01.539113800Z", "start_time": "2024-03-27T11:21:01.388350400Z" } }, "id": "97f1bdbc7597c39f" }, { "cell_type": "markdown", "source": [ "#### Statystyki dla cech numerycznych (średnia, odchylenie standardowe, min, max, kwantyle)" ], "metadata": { "collapsed": false }, "id": "9014307b7d26b73f" }, { "cell_type": "code", "execution_count": 10, "outputs": [ { "data": { "text/plain": " radius_mean texture_mean perimeter_mean area_mean smoothness_mean \\\ncount 569.000000 569.000000 569.000000 569.000000 569.000000 \nmean 0.338222 0.323965 0.332935 0.216920 0.394785 \nstd 0.166787 0.145453 0.167915 0.149274 0.126967 \nmin 0.000000 0.000000 0.000000 0.000000 0.000000 \n25% 0.223342 0.218465 0.216847 0.117413 0.304595 \n50% 0.302381 0.308759 0.293345 0.172895 0.390358 \n75% 0.416442 0.408860 0.416765 0.271135 0.475490 \nmax 1.000000 1.000000 1.000000 1.000000 1.000000 \n\n compactness_mean concavity_mean concave points_mean symmetry_mean \\\ncount 569.000000 569.000000 569.000000 569.000000 \nmean 0.260601 0.208058 0.243137 0.379605 \nstd 0.161992 0.186785 0.192857 0.138456 \nmin 0.000000 0.000000 0.000000 0.000000 \n25% 0.139685 0.069260 0.100944 0.282323 \n50% 0.224679 0.144189 0.166501 0.369697 \n75% 0.340531 0.306232 0.367793 0.453030 \nmax 1.000000 1.000000 1.000000 1.000000 \n\n fractal_dimension_mean ... radius_worst texture_worst \\\ncount 569.000000 ... 569.000000 569.000000 \nmean 0.270379 ... 0.296663 0.363998 \nstd 0.148702 ... 0.171940 0.163813 \nmin 0.000000 ... 0.000000 0.000000 \n25% 0.163016 ... 0.180719 0.241471 \n50% 0.243892 ... 0.250445 0.356876 \n75% 0.340354 ... 0.386339 0.471748 \nmax 1.000000 ... 1.000000 1.000000 \n\n perimeter_worst area_worst smoothness_worst compactness_worst \\\ncount 569.000000 569.000000 569.000000 569.000000 \nmean 0.283138 0.170906 0.404138 0.220212 \nstd 0.167352 0.139932 0.150779 0.152649 \nmin 0.000000 0.000000 0.000000 0.000000 \n25% 0.167837 0.081130 0.300007 0.116337 \n50% 0.235320 0.123206 0.397081 0.179110 \n75% 0.373475 0.220901 0.494156 0.302520 \nmax 1.000000 1.000000 1.000000 1.000000 \n\n concavity_worst concave points_worst symmetry_worst \\\ncount 569.000000 569.000000 569.000000 \nmean 0.217403 0.393836 0.263307 \nstd 0.166633 0.225884 0.121954 \nmin 0.000000 0.000000 0.000000 \n25% 0.091454 0.223127 0.185098 \n50% 0.181070 0.343402 0.247782 \n75% 0.305831 0.554639 0.318155 \nmax 1.000000 1.000000 1.000000 \n\n fractal_dimension_worst \ncount 569.000000 \nmean 0.189596 \nstd 0.118466 \nmin 0.000000 \n25% 0.107700 \n50% 0.163977 \n75% 0.242949 \nmax 1.000000 \n\n[8 rows x 30 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
radius_meantexture_meanperimeter_meanarea_meansmoothness_meancompactness_meanconcavity_meanconcave points_meansymmetry_meanfractal_dimension_mean...radius_worsttexture_worstperimeter_worstarea_worstsmoothness_worstcompactness_worstconcavity_worstconcave points_worstsymmetry_worstfractal_dimension_worst

8 rows × 30 columns

" }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Cały zbiór\n", "df.describe()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-27T11:21:01.651383Z", "start_time": "2024-03-27T11:21:01.403035300Z" } }, "id": "4f0c97b4de052a0c" }, { "cell_type": "code", "execution_count": 11, "outputs": [ { "data": { "text/plain": " radius_mean texture_mean perimeter_mean area_mean smoothness_mean \\\ncount 455.000000 455.000000 455.000000 455.000000 455.000000 \nmean 0.338949 0.326381 0.333644 0.217261 0.395892 \nstd 0.165349 0.145664 0.166246 0.147801 0.126845 \nmin 0.000000 0.022658 0.000000 0.000000 0.000000 \n25% 0.225235 0.219817 0.219128 0.119321 0.304776 \n50% 0.300961 0.310450 0.295833 0.170859 0.389636 \n75% 0.415259 0.411397 0.414346 0.271113 0.476393 \nmax 1.000000 0.815015 1.000000 0.999152 0.831182 \n\n compactness_mean concavity_mean concave points_mean symmetry_mean \\\ncount 455.000000 455.000000 455.000000 455.000000 \nmean 0.260580 0.209922 0.243100 0.381150 \nstd 0.160494 0.187617 0.192341 0.136984 \nmin 0.000000 0.000000 0.000000 0.000000 \n25% 0.136096 0.068030 0.100497 0.283081 \n50% 0.230262 0.145150 0.168191 0.369697 \n75% 0.340991 0.310098 0.357952 0.453030 \nmax 0.895712 1.000000 1.000000 1.000000 \n\n fractal_dimension_mean ... radius_worst texture_worst \\\ncount 455.000000 ... 455.000000 455.000000 \nmean 0.270577 ... 0.297118 0.369023 \nstd 0.147338 ... 0.170654 0.166060 \nmin 0.000000 ... 0.000000 0.012527 \n25% 0.168176 ... 0.182675 0.248801 \n50% 0.241786 ... 0.250445 0.358742 \n75% 0.340354 ... 0.377090 0.481343 \nmax 1.000000 ... 0.896478 1.000000 \n\n perimeter_worst area_worst smoothness_worst compactness_worst \\\ncount 455.000000 455.000000 455.000000 455.000000 \nmean 0.284164 0.171085 0.407890 0.221950 \nstd 0.166564 0.138560 0.153644 0.156748 \nmin 0.000000 0.000000 0.000000 0.000000 \n25% 0.169929 0.081781 0.303308 0.116871 \n50% 0.235370 0.123206 0.396421 0.181244 \n75% 0.369740 0.209964 0.496467 0.299318 \nmax 0.890931 0.797975 1.000000 1.000000 \n\n concavity_worst concave points_worst symmetry_worst \\\ncount 455.000000 455.000000 455.000000 \nmean 0.221686 0.394672 0.263384 \nstd 0.172039 0.226663 0.119284 \nmin 0.000000 0.000000 0.000000 \n25% 0.092212 0.221753 0.188252 \n50% 0.184505 0.347079 0.248571 \n75% 0.309265 0.558935 0.317465 \nmax 1.000000 1.000000 1.000000 \n\n fractal_dimension_worst \ncount 455.000000 \nmean 0.191552 \nstd 0.122769 \nmin 0.000000 \n25% 0.107110 \n50% 0.164305 \n75% 0.242785 \nmax 1.000000 \n\n[8 rows x 30 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
radius_meantexture_meanperimeter_meanarea_meansmoothness_meancompactness_meanconcavity_meanconcave points_meansymmetry_meanfractal_dimension_mean...radius_worsttexture_worstperimeter_worstarea_worstsmoothness_worstcompactness_worstconcavity_worstconcave points_worstsymmetry_worstfractal_dimension_worst

8 rows × 30 columns

" }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Zbiór treningowy\n", "df_train.describe()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-27T11:21:01.652394400Z", "start_time": "2024-03-27T11:21:01.450048100Z" } }, "id": "a6594457c1209a45" }, { "cell_type": "code", "execution_count": 12, "outputs": [ { "data": { "text/plain": " radius_mean texture_mean perimeter_mean area_mean smoothness_mean \\\ncount 57.000000 57.000000 57.000000 57.000000 57.000000 \nmean 0.334850 0.315985 0.331800 0.216332 0.399432 \nstd 0.176956 0.119165 0.181708 0.171264 0.144361 \nmin 0.089782 0.106865 0.089489 0.041357 0.167193 \n25% 0.225709 0.226581 0.223205 0.117413 0.293220 \n50% 0.282976 0.290159 0.279110 0.161909 0.396678 \n75% 0.413129 0.389922 0.405017 0.260912 0.471879 \nmax 0.967343 0.623267 0.988943 1.000000 1.000000 \n\n compactness_mean concavity_mean concave points_mean symmetry_mean \\\ncount 57.000000 57.000000 57.000000 57.000000 \nmean 0.279956 0.216035 0.254466 0.387684 \nstd 0.184302 0.208557 0.203610 0.160009 \nmin 0.046500 0.003622 0.027793 0.078283 \n25% 0.165235 0.076406 0.112326 0.282323 \n50% 0.222195 0.127413 0.154026 0.381818 \n75% 0.359242 0.296626 0.371918 0.464141 \nmax 1.000000 0.879569 0.839463 0.932323 \n\n fractal_dimension_mean ... radius_worst texture_worst \\\ncount 57.000000 ... 57.000000 57.000000 \nmean 0.281045 ... 0.294395 0.357428 \nstd 0.161084 ... 0.180500 0.130661 \nmin 0.047810 ... 0.072963 0.116205 \n25% 0.169545 ... 0.180719 0.267058 \n50% 0.258214 ... 0.225187 0.348348 \n75% 0.342249 ... 0.355034 0.463486 \nmax 0.949031 ... 1.000000 0.619670 \n\n perimeter_worst area_worst smoothness_worst compactness_worst \\\ncount 57.000000 57.000000 57.000000 57.000000 \nmean 0.281028 0.170796 0.391354 0.222054 \nstd 0.179389 0.161083 0.140480 0.133225 \nmin 0.074008 0.028264 0.085320 0.019239 \n25% 0.162757 0.082653 0.301327 0.126233 \n50% 0.228597 0.109836 0.416232 0.182893 \n75% 0.343593 0.197454 0.484911 0.308632 \nmax 1.000000 1.000000 0.786040 0.571557 \n\n concavity_worst concave points_worst symmetry_worst \\\ncount 57.000000 57.000000 57.000000 \nmean 0.209888 0.405999 0.276614 \nstd 0.152149 0.214849 0.143775 \nmin 0.006176 0.095670 0.066233 \n25% 0.107827 0.248179 0.198502 \n50% 0.164537 0.352234 0.234969 \n75% 0.312380 0.517182 0.323674 \nmax 0.613498 0.902062 0.829687 \n\n fractal_dimension_worst \ncount 57.000000 \nmean 0.187256 \nstd 0.103423 \nmin 0.028073 \n25% 0.107700 \n50% 0.155910 \n75% 0.260724 \nmax 0.460186 \n\n[8 rows x 30 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
radius_meantexture_meanperimeter_meanarea_meansmoothness_meancompactness_meanconcavity_meanconcave points_meansymmetry_meanfractal_dimension_mean...radius_worsttexture_worstperimeter_worstarea_worstsmoothness_worstcompactness_worstconcavity_worstconcave points_worstsymmetry_worstfractal_dimension_worst

8 rows × 30 columns

" }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Zbiór walidacyjny\n", "df_val.describe()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-27T11:21:01.654387800Z", "start_time": "2024-03-27T11:21:01.498448500Z" } }, "id": "5ce8c6c658be60c1" }, { "cell_type": "code", "execution_count": 13, "outputs": [ { "data": { "text/plain": " radius_mean texture_mean perimeter_mean area_mean smoothness_mean \\\ncount 57.000000 57.000000 57.000000 57.000000 57.000000 \nmean 0.335794 0.312663 0.328412 0.214790 0.381297 \nstd 0.170785 0.167714 0.169927 0.139898 0.109590 \nmin 0.075252 0.000000 0.072904 0.033001 0.146249 \n25% 0.192106 0.199188 0.185751 0.097222 0.308658 \n50% 0.315159 0.310450 0.301223 0.180445 0.390990 \n75% 0.486961 0.396348 0.481031 0.329629 0.460143 \nmax 0.692366 1.000000 0.695253 0.535949 0.578406 \n\n compactness_mean concavity_mean concave points_mean symmetry_mean \\\ncount 57.000000 57.000000 57.000000 57.000000 \nmean 0.241410 0.185207 0.232105 0.359197 \nstd 0.150381 0.156133 0.188722 0.127273 \nmin 0.021839 0.002798 0.011948 0.072222 \n25% 0.129471 0.062910 0.095179 0.271717 \n50% 0.203239 0.119845 0.154573 0.361616 \n75% 0.330102 0.261246 0.383996 0.439899 \nmax 0.809214 0.658388 0.776342 0.674242 \n\n fractal_dimension_mean ... radius_worst texture_worst \\\ncount 57.000000 ... 57.000000 57.000000 \nmean 0.258133 ... 0.295299 0.330458 \nstd 0.148563 ... 0.176543 0.173641 \nmin 0.021061 ... 0.054891 0.000000 \n25% 0.139217 ... 0.146211 0.199893 \n50% 0.246420 ... 0.256492 0.325160 \n75% 0.339090 ... 0.429740 0.425640 \nmax 0.839090 ... 0.667022 0.875533 \n\n perimeter_worst area_worst smoothness_worst compactness_worst \\\ncount 57.000000 57.000000 57.000000 57.000000 \nmean 0.277057 0.169590 0.386967 0.204497 \nstd 0.164054 0.130427 0.137458 0.138294 \nmin 0.047263 0.022046 0.111074 0.015504 \n25% 0.146023 0.064368 0.285478 0.098777 \n50% 0.245530 0.128048 0.396421 0.172318 \n75% 0.410827 0.256046 0.478307 0.280205 \nmax 0.627970 0.467902 0.674437 0.709327 \n\n concavity_worst concave points_worst symmetry_worst \\\ncount 57.000000 57.000000 57.000000 \nmean 0.190725 0.374998 0.249384 \nstd 0.132668 0.233099 0.120215 \nmin 0.002860 0.035808 0.081411 \n25% 0.086981 0.202131 0.159866 \n50% 0.155511 0.292509 0.234772 \n75% 0.275240 0.520619 0.304356 \nmax 0.563339 0.997595 0.622708 \n\n fractal_dimension_worst \ncount 57.000000 \nmean 0.176326 \nstd 0.096130 \nmin 0.001115 \n25% 0.113210 \n50% 0.160042 \n75% 0.231011 \nmax 0.481175 \n\n[8 rows x 30 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
radius_meantexture_meanperimeter_meanarea_meansmoothness_meancompactness_meanconcavity_meanconcave points_meansymmetry_meanfractal_dimension_mean...radius_worsttexture_worstperimeter_worstarea_worstsmoothness_worstcompactness_worstconcavity_worstconcave points_worstsymmetry_worstfractal_dimension_worst

8 rows × 30 columns

" }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Zbiór testowy\n", "df_test.describe()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-27T11:21:01.668390300Z", "start_time": "2024-03-27T11:21:01.544332200Z" } }, "id": "49acb8e5dfbda89f" }, { "cell_type": "markdown", "source": [ "#### Rozkład klas w zbiorze" ], "metadata": { "collapsed": false }, "id": "1c07bc584a09d6b8" }, { "cell_type": "code", "execution_count": 14, "outputs": [ { "data": { "text/plain": "diagnosis\nB 357\nM 212\nName: count, dtype: int64" }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Cały zbiór\n", "df['diagnosis'].value_counts()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-27T11:21:01.668390300Z", "start_time": "2024-03-27T11:21:01.592136900Z" } }, "id": "1e655fa9f8e48ff9" }, { "cell_type": "code", "execution_count": 15, "outputs": [ { "data": { "text/plain": "diagnosis\nB 288\nM 167\nName: count, dtype: int64" }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Zbiór treningowy\n", "df_train['diagnosis'].value_counts()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-27T11:21:01.704119700Z", "start_time": "2024-03-27T11:21:01.607147900Z" } }, "id": "d66c931287444033" }, { "cell_type": "code", "execution_count": 16, "outputs": [ { "data": { "text/plain": "diagnosis\nB 35\nM 22\nName: count, dtype: int64" }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Zbiór walidacyjny\n", "df_val['diagnosis'].value_counts()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-27T11:21:01.704119700Z", "start_time": "2024-03-27T11:21:01.621660400Z" } }, "id": "e3f1504600da351b" }, { "cell_type": "code", "execution_count": 17, "outputs": [ { "data": { "text/plain": "diagnosis\nB 34\nM 23\nName: count, dtype: int64" }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Zbiór testowy\n", "df_test['diagnosis'].value_counts()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-27T11:21:01.705119900Z", "start_time": "2024-03-27T11:21:01.637872300Z" } }, "id": "de5b287d4c68a1ec" } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }