{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "35674c19", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: opendatasets in c:\\users\\riraa\\anaconda3\\lib\\site-packages (0.1.20)\n", "Requirement already satisfied: tqdm in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from opendatasets) (4.59.0)\n", "Requirement already satisfied: click in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from opendatasets) (7.1.2)\n", "Requirement already satisfied: kaggle in c:\\users\\riraa\\appdata\\roaming\\python\\python38\\site-packages (from opendatasets) (1.5.12)\n", "Requirement already satisfied: requests in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from kaggle->opendatasets) (2.25.1)\n", "Requirement already satisfied: six>=1.10 in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from kaggle->opendatasets) (1.15.0)\n", "Requirement already satisfied: certifi in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from kaggle->opendatasets) (2020.12.5)\n", "Requirement already satisfied: urllib3 in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from kaggle->opendatasets) (1.26.4)\n", "Requirement already satisfied: python-slugify in c:\\users\\riraa\\appdata\\roaming\\python\\python38\\site-packages (from kaggle->opendatasets) (6.1.1)\n", "Requirement already satisfied: python-dateutil in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from kaggle->opendatasets) (2.8.1)\n", "Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\riraa\\appdata\\roaming\\python\\python38\\site-packages (from python-slugify->kaggle->opendatasets) (1.3)\n", "Requirement already satisfied: idna<3,>=2.5 in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from requests->kaggle->opendatasets) (2.10)\n", "Requirement already satisfied: chardet<5,>=3.0.2 in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from requests->kaggle->opendatasets) (4.0.0)\n" ] } ], "source": [ "!pip install opendatasets\n", "\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "5e8e5ea8", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|█████████████████████████████████████████████████████████████████████████████| 25.6k/25.6k [00:00<00:00, 1.68MB/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Downloading red-wine-quality-cortez-et-al-2009.zip to .\\red-wine-quality-cortez-et-al-2009\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "import opendatasets as od\n", "od.download('https://www.kaggle.com/uciml/red-wine-quality-cortez-et-al-2009')" ] }, { "cell_type": "code", "execution_count": 6, "id": "1d0f072e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
fixed acidityvolatile aciditycitric acidresidual sugarchloridesfree sulfur dioxidetotal sulfur dioxidedensitypHsulphatesalcoholquality
07.40.7000.001.90.07611.034.00.997803.510.569.45
17.80.8800.002.60.09825.067.00.996803.200.689.85
27.80.7600.042.30.09215.054.00.997003.260.659.85
311.20.2800.561.90.07517.060.00.998003.160.589.86
47.40.7000.001.90.07611.034.00.997803.510.569.45
.......................................
15946.20.6000.082.00.09032.044.00.994903.450.5810.55
15955.90.5500.102.20.06239.051.00.995123.520.7611.26
15966.30.5100.132.30.07629.040.00.995743.420.7511.06
15975.90.6450.122.00.07532.044.00.995473.570.7110.25
15986.00.3100.473.60.06718.042.00.995493.390.6611.06
\n", "

1599 rows × 12 columns

\n", "
" ], "text/plain": [ " fixed acidity volatile acidity citric acid residual sugar chlorides \\\n", "0 7.4 0.700 0.00 1.9 0.076 \n", "1 7.8 0.880 0.00 2.6 0.098 \n", "2 7.8 0.760 0.04 2.3 0.092 \n", "3 11.2 0.280 0.56 1.9 0.075 \n", "4 7.4 0.700 0.00 1.9 0.076 \n", "... ... ... ... ... ... \n", "1594 6.2 0.600 0.08 2.0 0.090 \n", "1595 5.9 0.550 0.10 2.2 0.062 \n", "1596 6.3 0.510 0.13 2.3 0.076 \n", "1597 5.9 0.645 0.12 2.0 0.075 \n", "1598 6.0 0.310 0.47 3.6 0.067 \n", "\n", " free sulfur dioxide total sulfur dioxide density pH sulphates \\\n", "0 11.0 34.0 0.99780 3.51 0.56 \n", "1 25.0 67.0 0.99680 3.20 0.68 \n", "2 15.0 54.0 0.99700 3.26 0.65 \n", "3 17.0 60.0 0.99800 3.16 0.58 \n", "4 11.0 34.0 0.99780 3.51 0.56 \n", "... ... ... ... ... ... \n", "1594 32.0 44.0 0.99490 3.45 0.58 \n", "1595 39.0 51.0 0.99512 3.52 0.76 \n", "1596 29.0 40.0 0.99574 3.42 0.75 \n", "1597 32.0 44.0 0.99547 3.57 0.71 \n", "1598 18.0 42.0 0.99549 3.39 0.66 \n", "\n", " alcohol quality \n", "0 9.4 5 \n", "1 9.8 5 \n", "2 9.8 5 \n", "3 9.8 6 \n", "4 9.4 5 \n", "... ... ... \n", "1594 10.5 5 \n", "1595 11.2 6 \n", "1596 11.0 6 \n", "1597 10.2 5 \n", "1598 11.0 6 \n", "\n", "[1599 rows x 12 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "wine=pd.read_csv('./red-wine-quality-cortez-et-al-2009/winequality-red.csv')\n", "wine" ] }, { "cell_type": "code", "execution_count": 7, "id": "9a675582", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "8 17\n", "7 193\n", "6 618\n", "5 660\n", "4 51\n", "3 10\n", "Name: quality, dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.model_selection import train_test_split\n", "wine_train, wine_test = train_test_split(wine, test_size=50, random_state=1,stratify=wine[\"quality\"])\n", "wine_train[\"quality\"].value_counts().sort_index(ascending=False) " ] }, { "cell_type": "markdown", "id": "e32b25a0", "metadata": {}, "source": [ "## Wielkość zbioru i podzbiorów" ] }, { "cell_type": "markdown", "id": "14c56dcd", "metadata": {}, "source": [ "#### Dla całego zbioru" ] }, { "cell_type": "code", "execution_count": 8, "id": "3197a613", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
fixed acidityvolatile aciditycitric acidresidual sugarchloridesfree sulfur dioxidetotal sulfur dioxidedensitypHsulphatesalcoholquality
07.40.700.001.90.07611.034.00.99783.510.569.45
17.80.880.002.60.09825.067.00.99683.200.689.85
27.80.760.042.30.09215.054.00.99703.260.659.85
311.20.280.561.90.07517.060.00.99803.160.589.86
47.40.700.001.90.07611.034.00.99783.510.569.45
\n", "
" ], "text/plain": [ " fixed acidity volatile acidity citric acid residual sugar chlorides \\\n", "0 7.4 0.70 0.00 1.9 0.076 \n", "1 7.8 0.88 0.00 2.6 0.098 \n", "2 7.8 0.76 0.04 2.3 0.092 \n", "3 11.2 0.28 0.56 1.9 0.075 \n", "4 7.4 0.70 0.00 1.9 0.076 \n", "\n", " free sulfur dioxide total sulfur dioxide density pH sulphates \\\n", "0 11.0 34.0 0.9978 3.51 0.56 \n", "1 25.0 67.0 0.9968 3.20 0.68 \n", "2 15.0 54.0 0.9970 3.26 0.65 \n", "3 17.0 60.0 0.9980 3.16 0.58 \n", "4 11.0 34.0 0.9978 3.51 0.56 \n", "\n", " alcohol quality \n", "0 9.4 5 \n", "1 9.8 5 \n", "2 9.8 5 \n", "3 9.8 6 \n", "4 9.4 5 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wine.head()" ] }, { "cell_type": "code", "execution_count": 9, "id": "18dcd194", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
fixed acidityvolatile aciditycitric acidresidual sugarchloridesfree sulfur dioxidetotal sulfur dioxidedensitypHsulphatesalcoholquality
count1599.0000001599.0000001599.0000001599.0000001599.0000001599.0000001599.0000001599.0000001599.0000001599.0000001599.0000001599.000000
mean8.3196370.5278210.2709762.5388060.08746715.87492246.4677920.9967473.3111130.65814910.4229835.636023
std1.7410960.1790600.1948011.4099280.04706510.46015732.8953240.0018870.1543860.1695071.0656680.807569
min4.6000000.1200000.0000000.9000000.0120001.0000006.0000000.9900702.7400000.3300008.4000003.000000
25%7.1000000.3900000.0900001.9000000.0700007.00000022.0000000.9956003.2100000.5500009.5000005.000000
50%7.9000000.5200000.2600002.2000000.07900014.00000038.0000000.9967503.3100000.62000010.2000006.000000
75%9.2000000.6400000.4200002.6000000.09000021.00000062.0000000.9978353.4000000.73000011.1000006.000000
max15.9000001.5800001.00000015.5000000.61100072.000000289.0000001.0036904.0100002.00000014.9000008.000000
\n", "
" ], "text/plain": [ " fixed acidity volatile acidity citric acid residual sugar \\\n", "count 1599.000000 1599.000000 1599.000000 1599.000000 \n", "mean 8.319637 0.527821 0.270976 2.538806 \n", "std 1.741096 0.179060 0.194801 1.409928 \n", "min 4.600000 0.120000 0.000000 0.900000 \n", "25% 7.100000 0.390000 0.090000 1.900000 \n", "50% 7.900000 0.520000 0.260000 2.200000 \n", "75% 9.200000 0.640000 0.420000 2.600000 \n", "max 15.900000 1.580000 1.000000 15.500000 \n", "\n", " chlorides free sulfur dioxide total sulfur dioxide density \\\n", "count 1599.000000 1599.000000 1599.000000 1599.000000 \n", "mean 0.087467 15.874922 46.467792 0.996747 \n", "std 0.047065 10.460157 32.895324 0.001887 \n", "min 0.012000 1.000000 6.000000 0.990070 \n", "25% 0.070000 7.000000 22.000000 0.995600 \n", "50% 0.079000 14.000000 38.000000 0.996750 \n", "75% 0.090000 21.000000 62.000000 0.997835 \n", "max 0.611000 72.000000 289.000000 1.003690 \n", "\n", " pH sulphates alcohol quality \n", "count 1599.000000 1599.000000 1599.000000 1599.000000 \n", "mean 3.311113 0.658149 10.422983 5.636023 \n", "std 0.154386 0.169507 1.065668 0.807569 \n", "min 2.740000 0.330000 8.400000 3.000000 \n", "25% 3.210000 0.550000 9.500000 5.000000 \n", "50% 3.310000 0.620000 10.200000 6.000000 \n", "75% 3.400000 0.730000 11.100000 6.000000 \n", "max 4.010000 2.000000 14.900000 8.000000 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wine.describe()" ] }, { "cell_type": "code", "execution_count": 10, "id": "0948ca45", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "8 18\n", "7 199\n", "6 638\n", "5 681\n", "4 53\n", "3 10\n", "Name: quality, dtype: int64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wine[\"quality\"].value_counts().sort_index(ascending=False)" ] }, { "cell_type": "code", "execution_count": 11, "id": "7245500d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD1CAYAAACrz7WZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQ6klEQVR4nO3df6xfd13H8edrLauMnxu7q2UttiQF6ZRtcC2YGSMUWWFmXYzTixEbUqx/FMFoYjowEv5oMv9RSXQmzUBLRGqHLCuQAE1xGBNcufsBo/vhyjbWa0d7mfJDRgrt3v5xz8KX7nt7v+393vtdP3s+kuV8zvv7Oee8T9q87um533OWqkKS1JbzRt2AJGn4DHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYtHXUDABdffHGtXr161G1I0jnlzjvv/HZVjfX7bM5wT/Jq4F96Sq8E/gL4WFdfDTwK/HZV/W+3zQ3AFuAk8N6q+vzpjrF69WomJyfnPBFJ0k8k+eZsn815W6aqHqyqK6rqCuD1wJPArcB2YH9VrQX2d+skWQdMAJcBG4GbkiyZ70lIkgZ3pvfcNwDfqKpvApuAXV19F3BdN94E7K6q41X1CHAIWD+EXiVJAzrTcJ8APtGNl1fV4wDd8pKufilwuGebqa4mSVokA4d7kvOBa4Fb5prap/aMF9gk2ZpkMsnk9PT0oG1IkgZwJlfubwPuqqqj3frRJCsAuuWxrj4FrOrZbiVw5NSdVdXOqhqvqvGxsb6/7JUknaUzCfd38JNbMgB7gc3deDNwW099IsmyJGuAtcCB+TYqSRrcQN9zT3IB8OvAH/aUbwT2JNkCPAZcD1BVB5PsAe4DTgDbqurkULuWJJ3WQOFeVU8CLzul9gQz357pN38HsGPe3UmSzsqz4glVqUWrt392UY/36I3XLOrx9Ozmu2UkqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CBfHKaR8cVa0sLxyl2SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYNFO5JXprkk0keSHJ/kl9OclGSfUke6pYX9sy/IcmhJA8muXrh2pck9TPolfuHgc9V1c8DlwP3A9uB/VW1FtjfrZNkHTABXAZsBG5KsmTYjUuSZjdnuCd5MfCrwEcAqupHVfUdYBOwq5u2C7iuG28CdlfV8ap6BDgErB9u25Kk0xnkyv2VwDTwD0nuTnJzkhcAy6vqcYBueUk3/1LgcM/2U11NkrRIBgn3pcDrgL+vqiuBH9DdgplF+tTqGZOSrUkmk0xOT08P1KwkaTCDhPsUMFVVd3Trn2Qm7I8mWQHQLY/1zF/Vs/1K4MipO62qnVU1XlXjY2NjZ9u/JKmPOcO9qr4FHE7y6q60AbgP2Ats7mqbgdu68V5gIsmyJGuAtcCBoXYtSTqtQV/5+0fAx5OcDzwMvIuZHwx7kmwBHgOuB6iqg0n2MPMD4ASwrapODr1zSdKsBgr3qroHGO/z0YZZ5u8Adpx9W5Kk+fAJVUlqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1KCBwj3Jo0nuTXJPksmudlGSfUke6pYX9sy/IcmhJA8muXqhmpck9XcmV+5vqqorqmq8W98O7K+qtcD+bp0k64AJ4DJgI3BTkiVD7FmSNIf53JbZBOzqxruA63rqu6vqeFU9AhwC1s/jOJKkMzRouBfwhSR3Jtna1ZZX1eMA3fKSrn4pcLhn26mu9lOSbE0ymWRyenr67LqXJPW1dMB5V1XVkSSXAPuSPHCauelTq2cUqnYCOwHGx8ef8bkk6ewNdOVeVUe65THgVmZusxxNsgKgWx7rpk8Bq3o2XwkcGVbDkqS5zRnuSV6Q5EVPj4G3Al8H9gKbu2mbgdu68V5gIsmyJGuAtcCBYTcuSZrdILdllgO3Jnl6/j9X1eeSfAXYk2QL8BhwPUBVHUyyB7gPOAFsq6qTC9K9JKmvOcO9qh4GLu9TfwLYMMs2O4Ad8+5OknRWfEJVkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUEDh3uSJUnuTvKZbv2iJPuSPNQtL+yZe0OSQ0keTHL1QjQuSZrdmVy5vw+4v2d9O7C/qtYC+7t1kqwDJoDLgI3ATUmWDKddSdIgBgr3JCuBa4Cbe8qbgF3deBdwXU99d1Udr6pHgEPA+qF0K0kayKBX7n8D/BnwVE9teVU9DtAtL+nqlwKHe+ZNdTVJ0iKZM9yT/AZwrKruHHCf6VOrPvvdmmQyyeT09PSAu5YkDWKQK/ergGuTPArsBt6c5J+Ao0lWAHTLY938KWBVz/YrgSOn7rSqdlbVeFWNj42NzeMUJEmnmjPcq+qGqlpZVauZ+UXpF6vq94C9wOZu2mbgtm68F5hIsizJGmAtcGDonUuSZrV0HtveCOxJsgV4DLgeoKoOJtkD3AecALZV1cl5dypJGtgZhXtV3Q7c3o2fADbMMm8HsGOevUmSzpJPqEpSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1aM5wT/IzSQ4k+WqSg0k+1NUvSrIvyUPd8sKebW5IcijJg0muXsgTkCQ90yBX7seBN1fV5cAVwMYkbwS2A/urai2wv1snyTpgArgM2AjclGTJAvQuSZrFnOFeM/6vW31e918Bm4BdXX0XcF033gTsrqrjVfUIcAhYP8ymJUmnN9A99yRLktwDHAP2VdUdwPKqehygW17STb8UONyz+VRXkyQtkoHCvapOVtUVwEpgfZJfOM309NvFMyYlW5NMJpmcnp4eqFlJ0mDO6NsyVfUd4HZm7qUfTbICoFse66ZNAat6NlsJHOmzr51VNV5V42NjY2feuSRpVoN8W2YsyUu78fOBtwAPAHuBzd20zcBt3XgvMJFkWZI1wFrgwJD7liSdxtIB5qwAdnXfeDkP2FNVn0nyZWBPki3AY8D1AFV1MMke4D7gBLCtqk4uTPuSpH7mDPeq+hpwZZ/6E8CGWbbZAeyYd3eSpLPiE6qS1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBc4Z7klVJ/i3J/UkOJnlfV78oyb4kD3XLC3u2uSHJoSQPJrl6IU9AkvRMg1y5nwD+tKpeA7wR2JZkHbAd2F9Va4H93TrdZxPAZcBG4KYkSxaieUlSf3OGe1U9XlV3dePvA/cDlwKbgF3dtF3Add14E7C7qo5X1SPAIWD9kPuWJJ3GGd1zT7IauBK4A1heVY/DzA8A4JJu2qXA4Z7NprqaJGmRLB10YpIXAv8K/HFVfS/JrFP71KrP/rYCWwFe8YpXDNrGc8rq7Z9d1OM9euM1i3o8SQtnoCv3JM9jJtg/XlWf6spHk6zoPl8BHOvqU8Cqns1XAkdO3WdV7ayq8aoaHxsbO9v+JUl9DPJtmQAfAe6vqr/q+WgvsLkbbwZu66lPJFmWZA2wFjgwvJYlSXMZ5LbMVcA7gXuT3NPV3g/cCOxJsgV4DLgeoKoOJtkD3MfMN222VdXJYTcuSZrdnOFeVf9B//voABtm2WYHsGMefUmS5sEnVCWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaNGe4J/lokmNJvt5TuyjJviQPdcsLez67IcmhJA8muXqhGpckzW6QK/d/BDaeUtsO7K+qtcD+bp0k64AJ4LJum5uSLBlat5KkgcwZ7lX178D/nFLeBOzqxruA63rqu6vqeFU9AhwC1g+nVUnSoM72nvvyqnocoFte0tUvBQ73zJvqapKkRTTsX6imT636Tky2JplMMjk9PT3kNiTpue1sw/1okhUA3fJYV58CVvXMWwkc6beDqtpZVeNVNT42NnaWbUiS+jnbcN8LbO7Gm4HbeuoTSZYlWQOsBQ7Mr0VJ0plaOteEJJ8Afg24OMkU8EHgRmBPki3AY8D1AFV1MMke4D7gBLCtqk4uUO+SpFnMGe5V9Y5ZPtowy/wdwI75NCVJmh+fUJWkBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAbN+YSqJJ1q9fbPLurxHr3xmkU9Xgu8cpekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUoHP69QM+Ai1J/XnlLkkNWrAr9yQbgQ8DS4Cbq+rGhTqWJA1TC3cFFuTKPckS4O+AtwHrgHckWbcQx5IkPdNC3ZZZDxyqqoer6kfAbmDTAh1LknSKVNXwd5r8FrCxqt7drb8TeENVvadnzlZga7f6auDBoTcyu4uBby/i8Rab53dua/n8Wj43WPzz+7mqGuv3wULdc0+f2k/9FKmqncDOBTr+aSWZrKrxURx7MXh+57aWz6/lc4Nn1/kt1G2ZKWBVz/pK4MgCHUuSdIqFCvevAGuTrElyPjAB7F2gY0mSTrEgt2Wq6kSS9wCfZ+arkB+tqoMLcayzNJLbQYvI8zu3tXx+LZ8bPIvOb0F+oSpJGi2fUJWkBhnuktQgw12SGtR8uCc5P8nvJ3lLt/67Sf42ybYkzxt1f/OV5L1JVs0989yU5A1JXtyNn5/kQ0k+neQvk7xk1P0NU5JfSfInSd466l4WSpKPjbqHYUqyPskvdeN13Z/f20fdFzwHfqGa5OPMfCvoAuA7wAuBTwEbmDn/zaPrbv6SfBf4AfAN4BPALVU1PdquhifJQeDy7htYO4EngU8y8+d3eVX95kgbnIckB6pqfTf+A2AbcCvwVuDT5/rL9pKc+vXnAG8CvghQVdcuelNDlOSDzLw/aymwD3gDcDvwFuDzVbVjdN09N8L9a1X12iRLgf8GXl5VJ5ME+GpVvXbELc5LkruB1zPzF+p3gGuBO5kJ+k9V1fdH2N68Jbm/ql7Tje+qqtf1fHZPVV0xsubmKcndVXVlN/4K8Paqmk7yAuA/q+oXR9vh/CS5C7gPuJmZJ9TDzN/LCYCq+tLoupu/JPcCVwDLgG8BK6vqe0meD9wx6mxp/rYMcF73INWLmLl6f/qf8suAc/62DFBV9VRVfaGqtgAvB24CNgIPj7a1ofh6knd1468mGQdI8irgx6NrayjOS3Jhkpcxc6E1DVBVPwBOjLa1oRhn5kLjA8B3q+p24IdV9aVzPdg7J6rqZFU9CXyjqr4HUFU/BJ4abWvn+P+JaUAfAR5g5mGqDwC3JHkYeCMzb6s81/3Ue3yq6sfMPA28t7uCONe9G/hwkj9n5oVMX05yGDjcfXYuewkz4RegkvxsVX0ryQvp/36mc0pVPQX8dZJbuuVR2sqcHyW5oAv31z9d7H4XNPJwb/62DECSlwNU1ZEkL2XmFsZjVXVgpI0NQZJXVdV/jbqPhZbkRcArmQmHqao6OuKWFkySC4DlVfXIqHsZpiTXAFdV1ftH3cswJFlWVcf71C8GVlTVvSNo6yd9PBfCXZKea54L99wl6TnHcJekBhnuktQgw12SGmS4S1KD/h+8IJoxDOTRpAAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "wine[\"quality\"].value_counts().sort_index(ascending=False).plot(kind=\"bar\")" ] }, { "cell_type": "markdown", "id": "931ba82d", "metadata": {}, "source": [ "#### Dla podzbioru *train*" ] }, { "cell_type": "code", "execution_count": 12, "id": "f2d00efe", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
fixed acidityvolatile aciditycitric acidresidual sugarchloridesfree sulfur dioxidetotal sulfur dioxidedensitypHsulphatesalcoholquality
14537.60.490.331.90.07427.085.00.997063.410.589.05
12956.60.630.004.30.09351.077.50.995583.200.459.55
7788.30.430.303.40.0797.034.00.997883.360.6110.55
6928.60.490.512.00.42216.062.00.997903.031.179.05
1666.80.640.102.10.08518.0101.00.995603.340.5210.25
\n", "
" ], "text/plain": [ " fixed acidity volatile acidity citric acid residual sugar chlorides \\\n", "1453 7.6 0.49 0.33 1.9 0.074 \n", "1295 6.6 0.63 0.00 4.3 0.093 \n", "778 8.3 0.43 0.30 3.4 0.079 \n", "692 8.6 0.49 0.51 2.0 0.422 \n", "166 6.8 0.64 0.10 2.1 0.085 \n", "\n", " free sulfur dioxide total sulfur dioxide density pH sulphates \\\n", "1453 27.0 85.0 0.99706 3.41 0.58 \n", "1295 51.0 77.5 0.99558 3.20 0.45 \n", "778 7.0 34.0 0.99788 3.36 0.61 \n", "692 16.0 62.0 0.99790 3.03 1.17 \n", "166 18.0 101.0 0.99560 3.34 0.52 \n", "\n", " alcohol quality \n", "1453 9.0 5 \n", "1295 9.5 5 \n", "778 10.5 5 \n", "692 9.0 5 \n", "166 10.2 5 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wine_train.head()" ] }, { "cell_type": "code", "execution_count": 13, "id": "e074e787", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
fixed acidityvolatile aciditycitric acidresidual sugarchloridesfree sulfur dioxidetotal sulfur dioxidedensitypHsulphatesalcoholquality
count1549.0000001549.0000001549.0000001549.0000001549.0000001549.0000001549.0000001549.0000001549.0000001549.0000001549.0000001549.000000
mean8.3275660.5281280.2712522.5299870.08694415.83215046.4151070.9967463.3104840.65672710.4191415.635249
std1.7446920.1801520.1942491.3802020.04373210.45052232.8844540.0018770.1542690.1665581.0672450.807313
min4.6000000.1200000.0000000.9000000.0120001.0000006.0000000.9900702.8600000.3300008.4000003.000000
25%7.1000000.3900000.0900001.9000000.0700007.00000022.0000000.9956003.2100000.5500009.5000005.000000
50%7.9000000.5200000.2600002.2000000.07900013.00000038.0000000.9967503.3100000.62000010.1000006.000000
75%9.2000000.6400000.4300002.6000000.09000021.00000062.0000000.9978603.4000000.73000011.1000006.000000
max15.9000001.5800000.79000015.5000000.46700072.000000289.0000001.0036904.0100001.98000014.9000008.000000
\n", "
" ], "text/plain": [ " fixed acidity volatile acidity citric acid residual sugar \\\n", "count 1549.000000 1549.000000 1549.000000 1549.000000 \n", "mean 8.327566 0.528128 0.271252 2.529987 \n", "std 1.744692 0.180152 0.194249 1.380202 \n", "min 4.600000 0.120000 0.000000 0.900000 \n", "25% 7.100000 0.390000 0.090000 1.900000 \n", "50% 7.900000 0.520000 0.260000 2.200000 \n", "75% 9.200000 0.640000 0.430000 2.600000 \n", "max 15.900000 1.580000 0.790000 15.500000 \n", "\n", " chlorides free sulfur dioxide total sulfur dioxide density \\\n", "count 1549.000000 1549.000000 1549.000000 1549.000000 \n", "mean 0.086944 15.832150 46.415107 0.996746 \n", "std 0.043732 10.450522 32.884454 0.001877 \n", "min 0.012000 1.000000 6.000000 0.990070 \n", "25% 0.070000 7.000000 22.000000 0.995600 \n", "50% 0.079000 13.000000 38.000000 0.996750 \n", "75% 0.090000 21.000000 62.000000 0.997860 \n", "max 0.467000 72.000000 289.000000 1.003690 \n", "\n", " pH sulphates alcohol quality \n", "count 1549.000000 1549.000000 1549.000000 1549.000000 \n", "mean 3.310484 0.656727 10.419141 5.635249 \n", "std 0.154269 0.166558 1.067245 0.807313 \n", "min 2.860000 0.330000 8.400000 3.000000 \n", "25% 3.210000 0.550000 9.500000 5.000000 \n", "50% 3.310000 0.620000 10.100000 6.000000 \n", "75% 3.400000 0.730000 11.100000 6.000000 \n", "max 4.010000 1.980000 14.900000 8.000000 " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wine_train.describe()" ] }, { "cell_type": "code", "execution_count": 14, "id": "34f511dd", "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "8 17\n", "7 193\n", "6 618\n", "5 660\n", "4 51\n", "3 10\n", "Name: quality, dtype: int64" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wine_train[\"quality\"].value_counts().sort_index(ascending=False) #indexy oznaczają jakość wina" ] }, { "cell_type": "code", "execution_count": 15, "id": "466eb483", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD1CAYAAACrz7WZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAPwUlEQVR4nO3dbawcV33H8e8vNrg8kzQ3rhObOkiG4rQkgVtDlaoqGBGXVDiqGtVUpRYK9RvTUrVS5UAlxAtL6RtapDaVrEDrqoBlKCgGJMAyDVUlGnNDAsF5aEySxrcO9oWKhwIy2Pn3xU7Uxdnru/HdvZt7/P1I0Zw5c2bmP7L12/HZnUmqCklSWy6adAGSpNEz3CWpQYa7JDXIcJekBhnuktQgw12SGrRy0gUAXHrppbV+/fpJlyFJy8rdd9/97aqaGrTtWRHu69evZ2ZmZtJlSNKykuS/5tvmtIwkNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQc+Kh5ikFq3f9dklPd9jt96wpOfTs5t37pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ3yrZCaGN+aKI3PUHfuSV6a5BNJHkzyQJJfS3JJkoNJHu6WF/eNvyXJ0SQPJbl+fOVLkgYZdlrmg8DnquqXgKuBB4BdwKGq2gAc6tZJshHYBlwFbAFuS7Ji1IVLkua3YLgneTHwG8CHAKrqJ1X1XWArsLcbthe4sWtvBfZV1amqehQ4CmwabdmSpHMZ5s795cAc8A9J7klye5IXAKur6gmAbnlZN/4K4Fjf/rNdnyRpiQwT7iuB1wB/X1XXAj+km4KZRwb01dMGJTuSzCSZmZubG6pYSdJwhgn3WWC2qu7q1j9BL+xPJFkD0C1P9o1f17f/WuD42Qetqj1VNV1V01NTU+dbvyRpgAXDvaq+BRxL8squazNwP3AA2N71bQfu6NoHgG1JViW5EtgAHB5p1ZKkcxr2d+5/DHwkyXOBR4B30Ptg2J/kZuBx4CaAqjqSZD+9D4DTwM6qOjPyyiVJ8xoq3KvqXmB6wKbN84zfDew+/7IkSYvh6wckqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJatBQ4Z7ksST3Jbk3yUzXd0mSg0ke7pYX942/JcnRJA8luX5cxUuSBnsmd+5vqKprqmq6W98FHKqqDcChbp0kG4FtwFXAFuC2JCtGWLMkaQGLmZbZCuzt2nuBG/v691XVqap6FDgKbFrEeSRJz9Cw4V7AF5LcnWRH17e6qp4A6JaXdf1XAMf69p3t+iRJS2TlkOOuq6rjSS4DDiZ58BxjM6Cvnjao9yGxA+BlL3vZkGVIkoYx1J17VR3vlieBT9GbZjmRZA1AtzzZDZ8F1vXtvhY4PuCYe6pquqqmp6amzv8KJElPs2C4J3lBkhc91QbeDHwDOABs74ZtB+7o2geAbUlWJbkS2AAcHnXhkqT5DTMtsxr4VJKnxn+0qj6X5CvA/iQ3A48DNwFU1ZEk+4H7gdPAzqo6M5bqJUkDLRjuVfUIcPWA/u8Am+fZZzewe9HVSZLOi0+oSlKDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBg0d7klWJLknyWe69UuSHEzycLe8uG/sLUmOJnkoyfXjKFySNL9ncuf+buCBvvVdwKGq2gAc6tZJshHYBlwFbAFuS7JiNOVKkoYxVLgnWQvcANze170V2Nu19wI39vXvq6pTVfUocBTYNJJqJUlDGfbO/W+AvwCe7OtbXVVPAHTLy7r+K4BjfeNmuz5J0hJZMNyT/DZwsqruHvKYGdBXA467I8lMkpm5ubkhDy1JGsYwd+7XAW9N8hiwD3hjkn8GTiRZA9AtT3bjZ4F1ffuvBY6ffdCq2lNV01U1PTU1tYhLkCSdbcFwr6pbqmptVa2n90XpF6vqD4ADwPZu2Hbgjq59ANiWZFWSK4ENwOGRVy5JmtfKRex7K7A/yc3A48BNAFV1JMl+4H7gNLCzqs4sulJJ0tCeUbhX1Z3AnV37O8DmecbtBnYvsjZJ0nnyCVVJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGrRguCf5uSSHk3wtyZEk7+/6L0lyMMnD3fLivn1uSXI0yUNJrh/nBUiSnm6YO/dTwBur6mrgGmBLktcDu4BDVbUBONStk2QjsA24CtgC3JZkxRhqlyTNY8Fwr57/7Vaf0/1XwFZgb9e/F7ixa28F9lXVqap6FDgKbBpl0ZKkcxtqzj3JiiT3AieBg1V1F7C6qp4A6JaXdcOvAI717T7b9Z19zB1JZpLMzM3NLeISJElnGyrcq+pMVV0DrAU2JfnlcwzPoEMMOOaeqpququmpqamhipUkDecZ/Vqmqr4L3ElvLv1EkjUA3fJkN2wWWNe321rg+GILlSQNb5hfy0wleWnXfh7wJuBB4ACwvRu2Hbijax8AtiVZleRKYANweMR1S5LOYeUQY9YAe7tfvFwE7K+qzyT5MrA/yc3A48BNAFV1JMl+4H7gNLCzqs6Mp3xJ0iALhntVfR24dkD/d4DN8+yzG9i96OokSefFJ1QlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDFgz3JOuS/GuSB5IcSfLurv+SJAeTPNwtL+7b55YkR5M8lOT6cV6AJOnphrlzPw38eVW9Cng9sDPJRmAXcKiqNgCHunW6bduAq4AtwG1JVoyjeEnSYAuGe1U9UVVf7do/AB4ArgC2Anu7YXuBG7v2VmBfVZ2qqkeBo8CmEdctSTqHZzTnnmQ9cC1wF7C6qp6A3gcAcFk37ArgWN9us12fJGmJDB3uSV4I/Avwp1X1/XMNHdBXA463I8lMkpm5ublhy5AkDWGocE/yHHrB/pGq+mTXfSLJmm77GuBk1z8LrOvbfS1w/OxjVtWeqpququmpqanzrV+SNMDKhQYkCfAh4IGq+kDfpgPAduDWbnlHX/9Hk3wAuBzYABweZdEXivW7Pruk53vs1huW9HySxmfBcAeuA94O3Jfk3q7vPfRCfX+Sm4HHgZsAqupIkv3A/fR+abOzqs6MunBJ0vwWDPeq+ncGz6MDbJ5nn93A7kXUJUlaBJ9QlaQGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDVow3JN8OMnJJN/o67skycEkD3fLi/u23ZLkaJKHklw/rsIlSfMb5s79H4EtZ/XtAg5V1QbgULdOko3ANuCqbp/bkqwYWbWSpKEsGO5V9W/A/5zVvRXY27X3Ajf29e+rqlNV9ShwFNg0mlIlScM63zn31VX1BEC3vKzrvwI41jdutuuTJC2hUX+hmgF9NXBgsiPJTJKZubm5EZchSRe28w33E0nWAHTLk13/LLCub9xa4PigA1TVnqqarqrpqamp8yxDkjTI+Yb7AWB7194O3NHXvy3JqiRXAhuAw4srUZL0TK1caECSjwG/CVyaZBZ4H3ArsD/JzcDjwE0AVXUkyX7gfuA0sLOqzoypdknSPBYM96p62zybNs8zfjewezFFSZIWxydUJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDVowdcPSNLZ1u/67JKe77Fbb1jS87XAO3dJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDVrWrx/wEWhJGsw7d0lq0Nju3JNsAT4IrABur6pbx3UuSRqlFmYFxnLnnmQF8HfAbwEbgbcl2TiOc0mSnm5c0zKbgKNV9UhV/QTYB2wd07kkSWdJVY3+oMnvAluq6p3d+tuB11XVu/rG7AB2dKuvBB4aeSHzuxT49hKeb6l5fctby9fX8rXB0l/fL1bV1KAN45pzz4C+n/kUqao9wJ4xnf+cksxU1fQkzr0UvL7lreXra/na4Nl1feOalpkF1vWtrwWOj+lckqSzjCvcvwJsSHJlkucC24ADYzqXJOksY5mWqarTSd4FfJ7eTyE/XFVHxnGu8zSR6aAl5PUtby1fX8vXBs+i6xvLF6qSpMnyCVVJapDhLkkNMtwlqUHNh3uS5yb5wyRv6tZ/P8nfJtmZ5DmTrm+xkvxJknULj1yekrwuyYu79vOSvD/Jp5P8VZKXTLq+UUry60n+LMmbJ13LuCT5p0nXMEpJNiX51a69sfvze8uk64IL4AvVJB+h96ug5wPfBV4IfBLYTO/6t0+uusVL8j3gh8A3gY8BH6+quclWNTpJjgBXd7/A2gP8CPgEvT+/q6vqdyZa4CIkOVxVm7r2HwE7gU8BbwY+vdxftpfk7J8/B3gD8EWAqnrrkhc1QkneR+/9WSuBg8DrgDuBNwGfr6rdk6vuwgj3r1fVq5OsBP4buLyqziQJ8LWqevWES1yUJPcAr6X3F+r3gLcCd9ML+k9W1Q8mWN6iJXmgql7Vtb9aVa/p23ZvVV0zseIWKck9VXVt1/4K8JaqmkvyAuA/qupXJlvh4iT5KnA/cDu9J9RD7+/lNoCq+tLkqlu8JPcB1wCrgG8Ba6vq+0meB9w16WxpfloGuKh7kOpF9O7en/qn/Cpg2U/LAFVVT1bVF6rqZuBy4DZgC/DIZEsbiW8keUfX/lqSaYAkrwB+OrmyRuKiJBcn+Xl6N1pzAFX1Q+D0ZEsbiWl6NxrvBb5XVXcCP66qLy33YO+crqozVfUj4JtV9X2Aqvox8ORkS1vm/yemIX0IeJDew1TvBT6e5BHg9fTeVrnc/cx7fKrqp/SeBj7Q3UEsd+8EPpjkL+m9kOnLSY4Bx7pty9lL6IVfgEryC1X1rSQvZPD7mZaVqnoS+OskH++WJ2grc36S5PlduL/2qc7uu6CJh3vz0zIASS4HqKrjSV5Kbwrj8ao6PNHCRiDJK6rqPyddx7gleRHwcnrhMFtVJyZc0tgkeT6wuqoenXQto5TkBuC6qnrPpGsZhSSrqurUgP5LgTVVdd8Eyvr/Oi6EcJekC82FMOcuSRccw12SGmS4S1KDDHdJapDhLkkN+j8WqSlJMiyYvAAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "wine_train[\"quality\"].value_counts().sort_index(ascending=False).plot(kind=\"bar\")" ] }, { "cell_type": "markdown", "id": "040a1d8b", "metadata": {}, "source": [ "#### Dla podzbioru *test*\n", "\n", "\n", "\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 16, "id": "d6b697ec", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
fixed acidityvolatile aciditycitric acidresidual sugarchloridesfree sulfur dioxidetotal sulfur dioxidedensitypHsulphatesalcoholquality
8569.30.360.391.50.08041.055.00.996523.470.7310.96
11426.90.450.112.40.0436.012.00.993543.300.6511.46
53812.90.350.495.80.0665.035.01.001403.200.6612.07
13246.70.460.241.70.07718.034.00.994803.390.6010.66
2888.70.520.092.50.09120.049.00.997603.340.8610.67
\n", "
" ], "text/plain": [ " fixed acidity volatile acidity citric acid residual sugar chlorides \\\n", "856 9.3 0.36 0.39 1.5 0.080 \n", "1142 6.9 0.45 0.11 2.4 0.043 \n", "538 12.9 0.35 0.49 5.8 0.066 \n", "1324 6.7 0.46 0.24 1.7 0.077 \n", "288 8.7 0.52 0.09 2.5 0.091 \n", "\n", " free sulfur dioxide total sulfur dioxide density pH sulphates \\\n", "856 41.0 55.0 0.99652 3.47 0.73 \n", "1142 6.0 12.0 0.99354 3.30 0.65 \n", "538 5.0 35.0 1.00140 3.20 0.66 \n", "1324 18.0 34.0 0.99480 3.39 0.60 \n", "288 20.0 49.0 0.99760 3.34 0.86 \n", "\n", " alcohol quality \n", "856 10.9 6 \n", "1142 11.4 6 \n", "538 12.0 7 \n", "1324 10.6 6 \n", "288 10.6 7 " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wine_test.head()" ] }, { "cell_type": "code", "execution_count": 17, "id": "bc91d2fb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
fixed acidityvolatile aciditycitric acidresidual sugarchloridesfree sulfur dioxidetotal sulfur dioxidedensitypHsulphatesalcoholquality
count50.00000050.00000050.00000050.00000050.0000050.00000050.00000050.00000050.00000050.00000050.00000050.000000
mean8.0740000.5183000.2624002.8120000.1036417.20000048.1000000.9967793.3306000.70220010.5420005.660000
std1.6228990.1421970.2131552.1377690.1074610.77790633.5256530.0021990.1583380.2420351.0186210.823383
min5.6000000.3100000.0000001.5000000.038003.0000008.0000000.9929202.7400000.3700009.0000004.000000
25%6.9000000.4025000.0950001.9000000.0732510.00000025.2500000.9954453.2600000.5900009.7250005.000000
50%7.6500000.5000000.2450002.2000000.0800015.00000036.5000000.9965603.3200000.65500010.3500006.000000
75%9.1500000.6250000.4000002.6750000.0862523.75000062.0000000.9976003.4000000.77000011.1750006.000000
max12.9000000.9800001.00000015.4000000.6110055.000000143.0000001.0036903.7100002.00000012.8000008.000000
\n", "
" ], "text/plain": [ " fixed acidity volatile acidity citric acid residual sugar \\\n", "count 50.000000 50.000000 50.000000 50.000000 \n", "mean 8.074000 0.518300 0.262400 2.812000 \n", "std 1.622899 0.142197 0.213155 2.137769 \n", "min 5.600000 0.310000 0.000000 1.500000 \n", "25% 6.900000 0.402500 0.095000 1.900000 \n", "50% 7.650000 0.500000 0.245000 2.200000 \n", "75% 9.150000 0.625000 0.400000 2.675000 \n", "max 12.900000 0.980000 1.000000 15.400000 \n", "\n", " chlorides free sulfur dioxide total sulfur dioxide density \\\n", "count 50.00000 50.000000 50.000000 50.000000 \n", "mean 0.10364 17.200000 48.100000 0.996779 \n", "std 0.10746 10.777906 33.525653 0.002199 \n", "min 0.03800 3.000000 8.000000 0.992920 \n", "25% 0.07325 10.000000 25.250000 0.995445 \n", "50% 0.08000 15.000000 36.500000 0.996560 \n", "75% 0.08625 23.750000 62.000000 0.997600 \n", "max 0.61100 55.000000 143.000000 1.003690 \n", "\n", " pH sulphates alcohol quality \n", "count 50.000000 50.000000 50.000000 50.000000 \n", "mean 3.330600 0.702200 10.542000 5.660000 \n", "std 0.158338 0.242035 1.018621 0.823383 \n", "min 2.740000 0.370000 9.000000 4.000000 \n", "25% 3.260000 0.590000 9.725000 5.000000 \n", "50% 3.320000 0.655000 10.350000 6.000000 \n", "75% 3.400000 0.770000 11.175000 6.000000 \n", "max 3.710000 2.000000 12.800000 8.000000 " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wine_test.describe()" ] }, { "cell_type": "code", "execution_count": 18, "id": "72ce755c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8 1\n", "7 6\n", "6 20\n", "5 21\n", "4 2\n", "Name: quality, dtype: int64" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wine_test[\"quality\"].value_counts().sort_index(ascending=False) #indexy oznaczają jakość wina" ] }, { "cell_type": "code", "execution_count": 19, "id": "fc355d95", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD1CAYAAABeMT4pAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQu0lEQVR4nO3df6xfdX3H8edLfiwKTFAuyK9atlQiOkF3UzFsCYiQUgg4Y2a7RZnDVQ1kmvnHOl3c9h9mUTMHk3RCgISBMkExVIEwJ5KA0GJBEJDa4ahltGjkh5hg9b0/7ml6vX6/vbffc3u/5cPzkXzzPefz+Zxz3veb9tXTzz3ne1JVSJLa9bJxFyBJ2rMMeklqnEEvSY0z6CWpcQa9JDXOoJekxu077gIGOfTQQ2vx4sXjLkOSXjTWr1//VFVNDOrbK4N+8eLFrFu3btxlSNKLRpIfDetz6kaSGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUuL3yhilJe9bi1TeNuwQAHrvorHGX8JLgGb0kNc6gl6TGGfSS1DiDXpIaZ9BLUuNmDfokxyT5ZpKHkjyY5CNd+6uS3Jrk0e79kCHbL0vySJKNSVbP9w8gSdq1uZzRbwc+VlWvB04CLkhyPLAauK2qlgC3deu/Ick+wCXAmcDxwMpuW0nSApk16Kvqiaq6t1t+FngIOAo4F7iyG3Yl8M4Bmy8FNlbVpqp6Abi2206StEB2a44+yWLgzcB3gMOr6gmY+scAOGzAJkcBj09b39y1SZIWyJzvjE1yIPBl4KNV9UySOW02oK2G7H8VsApg0aJFcy1LmjPvBtVL1ZzO6JPsx1TIX11V13fNTyY5ous/Atg6YNPNwDHT1o8Gtgw6RlWtqarJqpqcmBj4fFtJ0gjmctVNgMuAh6rqM9O6bgTO65bPA746YPN7gCVJjk2yP7Ci206StEDmckZ/MvBe4O1JNnSv5cBFwOlJHgVO79ZJcmSStQBVtR24ELiZqV/ifqmqHtwDP4ckaYhZ5+ir6g4Gz7UDnDZg/BZg+bT1tcDaUQuUJPXjnbGS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMbN+uCRJJcDZwNbq+qNXdsXgeO6IQcDP6uqEwds+xjwLPArYHtVTc5L1ZKkOZs16IErgIuBq3Y0VNV7diwn+TTw9C62P7Wqnhq1QElSP3N5lODtSRYP6useHP6nwNvnuS5J0jzpO0f/x8CTVfXokP4CbkmyPsmqnseSJI1gLlM3u7ISuGYX/SdX1ZYkhwG3Jnm4qm4fNLD7h2AVwKJFi3qWJUnaYeQz+iT7Au8CvjhsTFVt6d63AjcAS3cxdk1VTVbV5MTExKhlSZJm6DN18w7g4araPKgzyQFJDtqxDJwBPNDjeJKkEcwa9EmuAe4EjkuyOcn5XdcKZkzbJDkyydpu9XDgjiT3AXcDN1XVN+avdEnSXMzlqpuVQ9r/YkDbFmB5t7wJOKFnfZKknrwzVpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUuLk8YeryJFuTPDCt7R+T/DjJhu61fMi2y5I8kmRjktXzWbgkaW7mckZ/BbBsQPtnq+rE7rV2ZmeSfYBLgDOB44GVSY7vU6wkaffNGvRVdTvw0xH2vRTYWFWbquoF4Frg3BH2I0nqoc8c/YVJ7u+mdg4Z0H8U8Pi09c1dmyRpAY0a9J8Hfh84EXgC+PSAMRnQVsN2mGRVknVJ1m3btm3EsiRJM40U9FX1ZFX9qqp+Dfw7U9M0M20Gjpm2fjSwZRf7XFNVk1U1OTExMUpZkqQBRgr6JEdMW/0T4IEBw+4BliQ5Nsn+wArgxlGOJ0ka3b6zDUhyDXAKcGiSzcA/AKckOZGpqZjHgA92Y48EvlBVy6tqe5ILgZuBfYDLq+rBPfFDSJKGmzXoq2rlgObLhozdAiyftr4W+K1LLyVJC8c7YyWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWrcrEHfPfx7a5IHprX9c5KHu4eD35Dk4CHbPpbke0k2JFk3j3VLkuZoLmf0VwDLZrTdCryxqt4E/AD4u11sf2pVnVhVk6OVKEnqY9agr6rbgZ/OaLulqrZ3q3cx9eBvSdJeaD7m6P8S+PqQvgJuSbI+yap5OJYkaTfN+szYXUnyCWA7cPWQISdX1ZYkhwG3Jnm4+x/CoH2tAlYBLFq0qE9ZkqRpRj6jT3IecDbw51VVg8Z0DwunqrYCNwBLh+2vqtZU1WRVTU5MTIxaliRphpGCPsky4G+Bc6rq+SFjDkhy0I5l4AzggUFjJUl7zlwur7wGuBM4LsnmJOcDFwMHMTUdsyHJpd3YI5Os7TY9HLgjyX3A3cBNVfWNPfJTSJKGmnWOvqpWDmi+bMjYLcDybnkTcEKv6iRJvXlnrCQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcXN5wtTlSbYmeWBa26uS3Jrk0e79kCHbLkvySJKNSVbPZ+GSpLmZyxn9FcCyGW2rgduqaglwW7f+G5LsA1wCnAkcD6xMcnyvaiVJu23WoK+q24Gfzmg+F7iyW74SeOeATZcCG6tqU1W9AFzbbSdJWkCjztEfXlVPAHTvhw0YcxTw+LT1zV2bJGkB7clfxmZAWw0dnKxKsi7Jum3btu3BsiTppWXUoH8yyREA3fvWAWM2A8dMWz8a2DJsh1W1pqomq2pyYmJixLIkSTONGvQ3Aud1y+cBXx0w5h5gSZJjk+wPrOi2kyQtoLlcXnkNcCdwXJLNSc4HLgJOT/IocHq3TpIjk6wFqKrtwIXAzcBDwJeq6sE982NIkobZd7YBVbVySNdpA8ZuAZZPW18LrB25OklSb94ZK0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklq3MhBn+S4JBumvZ5J8tEZY05J8vS0MZ/sXbEkabfM+oSpYarqEeBEgCT7AD8Gbhgw9NtVdfaox5Ek9TNfUzenAT+sqh/N0/4kSfNkvoJ+BXDNkL63JbkvydeTvGHYDpKsSrIuybpt27bNU1mSpN5Bn2R/4BzgugHd9wKvraoTgH8FvjJsP1W1pqomq2pyYmKib1mSpM58nNGfCdxbVU/O7KiqZ6rquW55LbBfkkPn4ZiSpDmaj6BfyZBpmySvSZJueWl3vJ/MwzElSXM08lU3AEleAZwOfHBa24cAqupS4N3Ah5NsB34BrKiq6nNMSdLu6RX0VfU88OoZbZdOW74YuLjPMSRJ/XhnrCQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNa7XnbHa+y1efdO4SwDgsYvOGncJ0kuWZ/SS1DiDXpIaZ9BLUuMMeklqnEEvSY3rFfRJHkvyvSQbkqwb0J8kn0uyMcn9Sd7S53iSpN03H5dXnlpVTw3pOxNY0r3eCny+e5ckLZA9PXVzLnBVTbkLODjJEXv4mJKkafoGfQG3JFmfZNWA/qOAx6etb+7aJEkLpO/UzclVtSXJYcCtSR6uqtun9WfANgMfDt79Q7EKYNGiRT3LkiTt0OuMvqq2dO9bgRuApTOGbAaOmbZ+NLBlyL7WVNVkVU1OTEz0KUuSNM3IQZ/kgCQH7VgGzgAemDHsRuB93dU3JwFPV9UTI1crSdptfaZuDgduSLJjP/9RVd9I8iGAqroUWAssBzYCzwPv71euJGl3jRz0VbUJOGFA+6XTlgu4YNRjSJL6885YSWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1Lj+jxK8Jgk30zyUJIHk3xkwJhTkjydZEP3+mS/ciVJu6vPowS3Ax+rqnu7Z8euT3JrVX1/xrhvV9XZPY4jSeph5DP6qnqiqu7tlp8FHgKOmq/CJEnzY17m6JMsBt4MfGdA99uS3Jfk60neMB/HkyTNXZ+pGwCSHAh8GfhoVT0zo/te4LVV9VyS5cBXgCVD9rMKWAWwaNGivmVJkjq9zuiT7MdUyF9dVdfP7K+qZ6rquW55LbBfkkMH7auq1lTVZFVNTkxM9ClLkjRNn6tuAlwGPFRVnxky5jXdOJIs7Y73k1GPKUnafX2mbk4G3gt8L8mGru3jwCKAqroUeDfw4STbgV8AK6qqehxTkrSbRg76qroDyCxjLgYuHvUYkqT+ev8yVpJezBavvmncJQDw2EVn7bF9+xUIktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUuCbvjH0p3OkmSXPlGb0kNc6gl6TGGfSS1DiDXpIaZ9BLUuP6PjN2WZJHkmxMsnpAf5J8ruu/P8lb+hxPkrT7+jwzdh/gEuBM4HhgZZLjZww7E1jSvVYBnx/1eJKk0fQ5o18KbKyqTVX1AnAtcO6MMecCV9WUu4CDkxzR45iSpN3U54apo4DHp61vBt46hzFHAU/M3FmSVUyd9QM8l+SRHrXNh0OBp/rsIJ+ap0rGz89iJz+LnfwsdtobPovXDuvoE/SDHgxeI4yZaqxaA6zpUc+8SrKuqibHXcfewM9iJz+LnfwsdtrbP4s+UzebgWOmrR8NbBlhjCRpD+oT9PcAS5Icm2R/YAVw44wxNwLv666+OQl4uqp+a9pGkrTnjDx1U1Xbk1wI3AzsA1xeVQ8m+VDXfymwFlgObASeB97fv+QFs9dMI+0F/Cx28rPYyc9ip736s0jVwClzSVIjvDNWkhpn0EtS4wx6SWqcQQ8k2T/J+5K8o1v/syQXJ7kgyX7jrm8hJfnrJMfMPrJ9Sd6a5He75Zcn+ackX0vyqSSvHHd945Tkj5L8TZIzxl3LuCW5atw1zMZfxgJJrmbqCqRXAD8DDgSuB05j6jM6b3zVLawkTwM/B34IXANcV1XbxlvVeCR5EDihu8JsDVNXjv0nU38uTqiqd421wAWU5O6qWtot/xVwAXADcAbwtaq6aJz1LZQkMy8hD3Aq8F8AVXXOghc1BwY9kOT+qnpTkn2BHwNHVtWvkgS4r6reNOYSF0yS7wJ/CLwDeA9wDrCeqdC/vqqeHWN5CyrJQ1X1+m753qp6y7S+DVV14tiKW2BJvltVb+6W7wGWV9W2JAcAd1XVH4y3woWR5F7g+8AXmLrLP0z93VgBUFXfGl91wzl1M+Vl3U1fBzF1Vr/jv+W/A7ykpm6AqqpfV9UtVXU+cCTwb8AyYNN4S1twDyTZce/HfUkmAZK8Dvjl+Moai5clOSTJq5k6QdwGUFU/B7aPt7QFNcnUic8nmLoB9L+BX1TVt/bWkId+33XTksuAh5m68esTwHVJNgEnMfWtnC8lv/H9RFX1S6bucL4xycvHU9LYfAD4lyR/z9QXVt2Z5HGmvqjvA2OtbOG9kqmAC1BJXlNV/5fkQAZ/p1WTqurXwGeTXNe9P8mLIEeduukkORKgqrYkOZipqYv/raq7x1rYAkvyuqr6wbjr2JskOQj4Pab+Qm+uqifHXNJeI8krgMOr6n/GXcs4JDkLOLmqPj7uWnbFoJekxjlHL0mNM+glqXEGvSQ1zqCXpMYZ9JLUuP8HxYaI3Mtg3D4AAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "wine_test[\"quality\"].value_counts().sort_index(ascending=False).plot(kind=\"bar\")" ] }, { "cell_type": "markdown", "id": "518f05c2", "metadata": {}, "source": [ "## Normalizacja" ] }, { "cell_type": "markdown", "id": "0d904976", "metadata": {}, "source": [ "# Podział z wyróżnieniem data/target" ] }, { "cell_type": "code", "execution_count": 20, "id": "2f1c75ab", "metadata": {}, "outputs": [], "source": [ "x_train,x_test,y_train,y_test = train_test_split(wine.iloc[:,:-1],wine.iloc[:,-1], test_size=0.2, random_state=1,stratify=wine[\"quality\"])" ] }, { "cell_type": "code", "execution_count": 21, "id": "c2b16170", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1279" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_train.value_counts().sum()" ] }, { "cell_type": "code", "execution_count": 22, "id": "772560b4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "320" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_test.value_counts().sum()" ] }, { "cell_type": "markdown", "id": "fd77e875", "metadata": {}, "source": [ "## Normalizacja" ] }, { "cell_type": "code", "execution_count": 23, "id": "a4ac6f00", "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import MinMaxScaler\n", "norm = MinMaxScaler()\n", "norm_fit = norm.fit(x_train)\n", "norm_x_train = norm_fit.transform(x_train)\n", "norm_x_test = norm_fit.transform(x_test)" ] }, { "cell_type": "code", "execution_count": 27, "id": "be0d1121", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0.31858407, 0.15702479, 0.50632911, 0.0890411 , 0.1010989 ,\n", " 0.07042254, 0.01413428, 0.38839941, 0.39130435, 0.21212121,\n", " 0.43076923],\n", " [0.26548673, 0.14049587, 0.62025316, 0.12328767, 0.17582418,\n", " 0.33802817, 0.19081272, 0.51615272, 0.39130435, 0.16969697,\n", " 0.26153846],\n", " [0.23893805, 0.17355372, 0.59493671, 0.08219178, 0.14285714,\n", " 0.05633803, 0.01766784, 0.42070485, 0.40869565, 0.12121212,\n", " 0.29230769],\n", " [0.19469027, 0.31404959, 0.13924051, 0.04109589, 0.13846154,\n", " 0.21126761, 0.15194346, 0.39500734, 0.43478261, 0.27878788,\n", " 0.16923077],\n", " [0.27433628, 0.65702479, 0.15189873, 0.0890411 , 0.28791209,\n", " 0.08450704, 0.06007067, 0.46475771, 0.42608696, 0.19393939,\n", " 0.27692308]])" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "norm_x_train[:5]" ] }, { "cell_type": "markdown", "id": "1af8555b", "metadata": {}, "source": [ "## Nie ma żadnych null'i do wypełnienia" ] }, { "cell_type": "code", "execution_count": 24, "id": "670062c0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "fixed acidity 0\n", "volatile acidity 0\n", "citric acid 0\n", "residual sugar 0\n", "chlorides 0\n", "free sulfur dioxide 0\n", "total sulfur dioxide 0\n", "density 0\n", "pH 0\n", "sulphates 0\n", "alcohol 0\n", "quality 0\n", "dtype: int64" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wine.isnull().sum()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 5 }