ium_444354/lab2.ipynb

1893 lines
76 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 39,
"id": "35674c19",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: opendatasets in c:\\users\\riraa\\anaconda3\\lib\\site-packages (0.1.20)\n",
"Requirement already satisfied: click in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from opendatasets) (7.1.2)\n",
"Requirement already satisfied: kaggle in c:\\users\\riraa\\appdata\\roaming\\python\\python38\\site-packages (from opendatasets) (1.5.12)\n",
"Requirement already satisfied: tqdm in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from opendatasets) (4.59.0)\n",
"Requirement already satisfied: python-slugify in c:\\users\\riraa\\appdata\\roaming\\python\\python38\\site-packages (from kaggle->opendatasets) (6.1.1)\n",
"Requirement already satisfied: python-dateutil in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from kaggle->opendatasets) (2.8.1)\n",
"Requirement already satisfied: requests in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from kaggle->opendatasets) (2.25.1)\n",
"Requirement already satisfied: urllib3 in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from kaggle->opendatasets) (1.26.4)\n",
"Requirement already satisfied: certifi in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from kaggle->opendatasets) (2020.12.5)\n",
"Requirement already satisfied: six>=1.10 in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from kaggle->opendatasets) (1.15.0)\n",
"Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\riraa\\appdata\\roaming\\python\\python38\\site-packages (from python-slugify->kaggle->opendatasets) (1.3)\n",
"Requirement already satisfied: idna<3,>=2.5 in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from requests->kaggle->opendatasets) (2.10)\n",
"Requirement already satisfied: chardet<5,>=3.0.2 in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from requests->kaggle->opendatasets) (4.0.0)\n"
]
}
],
"source": [
"!pip install opendatasets\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "5e8e5ea8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Skipping, found downloaded files in \".\\red-wine-quality-cortez-et-al-2009\" (use force=True to force download)\n"
]
}
],
"source": [
"import opendatasets as od\n",
"od.download('https://www.kaggle.com/uciml/red-wine-quality-cortez-et-al-2009')"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "1d0f072e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7.8</td>\n",
" <td>0.880</td>\n",
" <td>0.00</td>\n",
" <td>2.6</td>\n",
" <td>0.098</td>\n",
" <td>25.0</td>\n",
" <td>67.0</td>\n",
" <td>0.99680</td>\n",
" <td>3.20</td>\n",
" <td>0.68</td>\n",
" <td>9.8</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7.8</td>\n",
" <td>0.760</td>\n",
" <td>0.04</td>\n",
" <td>2.3</td>\n",
" <td>0.092</td>\n",
" <td>15.0</td>\n",
" <td>54.0</td>\n",
" <td>0.99700</td>\n",
" <td>3.26</td>\n",
" <td>0.65</td>\n",
" <td>9.8</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.2</td>\n",
" <td>0.280</td>\n",
" <td>0.56</td>\n",
" <td>1.9</td>\n",
" <td>0.075</td>\n",
" <td>17.0</td>\n",
" <td>60.0</td>\n",
" <td>0.99800</td>\n",
" <td>3.16</td>\n",
" <td>0.58</td>\n",
" <td>9.8</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1594</th>\n",
" <td>6.2</td>\n",
" <td>0.600</td>\n",
" <td>0.08</td>\n",
" <td>2.0</td>\n",
" <td>0.090</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99490</td>\n",
" <td>3.45</td>\n",
" <td>0.58</td>\n",
" <td>10.5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1595</th>\n",
" <td>5.9</td>\n",
" <td>0.550</td>\n",
" <td>0.10</td>\n",
" <td>2.2</td>\n",
" <td>0.062</td>\n",
" <td>39.0</td>\n",
" <td>51.0</td>\n",
" <td>0.99512</td>\n",
" <td>3.52</td>\n",
" <td>0.76</td>\n",
" <td>11.2</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1596</th>\n",
" <td>6.3</td>\n",
" <td>0.510</td>\n",
" <td>0.13</td>\n",
" <td>2.3</td>\n",
" <td>0.076</td>\n",
" <td>29.0</td>\n",
" <td>40.0</td>\n",
" <td>0.99574</td>\n",
" <td>3.42</td>\n",
" <td>0.75</td>\n",
" <td>11.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1597</th>\n",
" <td>5.9</td>\n",
" <td>0.645</td>\n",
" <td>0.12</td>\n",
" <td>2.0</td>\n",
" <td>0.075</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99547</td>\n",
" <td>3.57</td>\n",
" <td>0.71</td>\n",
" <td>10.2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1598</th>\n",
" <td>6.0</td>\n",
" <td>0.310</td>\n",
" <td>0.47</td>\n",
" <td>3.6</td>\n",
" <td>0.067</td>\n",
" <td>18.0</td>\n",
" <td>42.0</td>\n",
" <td>0.99549</td>\n",
" <td>3.39</td>\n",
" <td>0.66</td>\n",
" <td>11.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1599 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid residual sugar chlorides \\\n",
"0 7.4 0.700 0.00 1.9 0.076 \n",
"1 7.8 0.880 0.00 2.6 0.098 \n",
"2 7.8 0.760 0.04 2.3 0.092 \n",
"3 11.2 0.280 0.56 1.9 0.075 \n",
"4 7.4 0.700 0.00 1.9 0.076 \n",
"... ... ... ... ... ... \n",
"1594 6.2 0.600 0.08 2.0 0.090 \n",
"1595 5.9 0.550 0.10 2.2 0.062 \n",
"1596 6.3 0.510 0.13 2.3 0.076 \n",
"1597 5.9 0.645 0.12 2.0 0.075 \n",
"1598 6.0 0.310 0.47 3.6 0.067 \n",
"\n",
" free sulfur dioxide total sulfur dioxide density pH sulphates \\\n",
"0 11.0 34.0 0.99780 3.51 0.56 \n",
"1 25.0 67.0 0.99680 3.20 0.68 \n",
"2 15.0 54.0 0.99700 3.26 0.65 \n",
"3 17.0 60.0 0.99800 3.16 0.58 \n",
"4 11.0 34.0 0.99780 3.51 0.56 \n",
"... ... ... ... ... ... \n",
"1594 32.0 44.0 0.99490 3.45 0.58 \n",
"1595 39.0 51.0 0.99512 3.52 0.76 \n",
"1596 29.0 40.0 0.99574 3.42 0.75 \n",
"1597 32.0 44.0 0.99547 3.57 0.71 \n",
"1598 18.0 42.0 0.99549 3.39 0.66 \n",
"\n",
" alcohol quality \n",
"0 9.4 5 \n",
"1 9.8 5 \n",
"2 9.8 5 \n",
"3 9.8 6 \n",
"4 9.4 5 \n",
"... ... ... \n",
"1594 10.5 5 \n",
"1595 11.2 6 \n",
"1596 11.0 6 \n",
"1597 10.2 5 \n",
"1598 11.0 6 \n",
"\n",
"[1599 rows x 12 columns]"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"wine=pd.read_csv('./red-wine-quality-cortez-et-al-2009/winequality-red.csv')\n",
"wine"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "9a675582",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"8 17\n",
"7 193\n",
"6 618\n",
"5 660\n",
"4 51\n",
"3 10\n",
"Name: quality, dtype: int64"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.model_selection import train_test_split\n",
"wine_train, wine_test = train_test_split(wine, test_size=50, random_state=1,stratify=wine[\"quality\"])\n",
"wine_train[\"quality\"].value_counts().sort_index(ascending=False) "
]
},
{
"cell_type": "markdown",
"id": "e32b25a0",
"metadata": {},
"source": [
"## Wielkość zbioru i podzbiorów"
]
},
{
"cell_type": "markdown",
"id": "14c56dcd",
"metadata": {},
"source": [
"#### Dla całego zbioru"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "3197a613",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7.4</td>\n",
" <td>0.70</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.9978</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7.8</td>\n",
" <td>0.88</td>\n",
" <td>0.00</td>\n",
" <td>2.6</td>\n",
" <td>0.098</td>\n",
" <td>25.0</td>\n",
" <td>67.0</td>\n",
" <td>0.9968</td>\n",
" <td>3.20</td>\n",
" <td>0.68</td>\n",
" <td>9.8</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7.8</td>\n",
" <td>0.76</td>\n",
" <td>0.04</td>\n",
" <td>2.3</td>\n",
" <td>0.092</td>\n",
" <td>15.0</td>\n",
" <td>54.0</td>\n",
" <td>0.9970</td>\n",
" <td>3.26</td>\n",
" <td>0.65</td>\n",
" <td>9.8</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.2</td>\n",
" <td>0.28</td>\n",
" <td>0.56</td>\n",
" <td>1.9</td>\n",
" <td>0.075</td>\n",
" <td>17.0</td>\n",
" <td>60.0</td>\n",
" <td>0.9980</td>\n",
" <td>3.16</td>\n",
" <td>0.58</td>\n",
" <td>9.8</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7.4</td>\n",
" <td>0.70</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.9978</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid residual sugar chlorides \\\n",
"0 7.4 0.70 0.00 1.9 0.076 \n",
"1 7.8 0.88 0.00 2.6 0.098 \n",
"2 7.8 0.76 0.04 2.3 0.092 \n",
"3 11.2 0.28 0.56 1.9 0.075 \n",
"4 7.4 0.70 0.00 1.9 0.076 \n",
"\n",
" free sulfur dioxide total sulfur dioxide density pH sulphates \\\n",
"0 11.0 34.0 0.9978 3.51 0.56 \n",
"1 25.0 67.0 0.9968 3.20 0.68 \n",
"2 15.0 54.0 0.9970 3.26 0.65 \n",
"3 17.0 60.0 0.9980 3.16 0.58 \n",
"4 11.0 34.0 0.9978 3.51 0.56 \n",
"\n",
" alcohol quality \n",
"0 9.4 5 \n",
"1 9.8 5 \n",
"2 9.8 5 \n",
"3 9.8 6 \n",
"4 9.4 5 "
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine.head()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "18dcd194",
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>8.319637</td>\n",
" <td>0.527821</td>\n",
" <td>0.270976</td>\n",
" <td>2.538806</td>\n",
" <td>0.087467</td>\n",
" <td>15.874922</td>\n",
" <td>46.467792</td>\n",
" <td>0.996747</td>\n",
" <td>3.311113</td>\n",
" <td>0.658149</td>\n",
" <td>10.422983</td>\n",
" <td>5.636023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.741096</td>\n",
" <td>0.179060</td>\n",
" <td>0.194801</td>\n",
" <td>1.409928</td>\n",
" <td>0.047065</td>\n",
" <td>10.460157</td>\n",
" <td>32.895324</td>\n",
" <td>0.001887</td>\n",
" <td>0.154386</td>\n",
" <td>0.169507</td>\n",
" <td>1.065668</td>\n",
" <td>0.807569</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>4.600000</td>\n",
" <td>0.120000</td>\n",
" <td>0.000000</td>\n",
" <td>0.900000</td>\n",
" <td>0.012000</td>\n",
" <td>1.000000</td>\n",
" <td>6.000000</td>\n",
" <td>0.990070</td>\n",
" <td>2.740000</td>\n",
" <td>0.330000</td>\n",
" <td>8.400000</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>7.100000</td>\n",
" <td>0.390000</td>\n",
" <td>0.090000</td>\n",
" <td>1.900000</td>\n",
" <td>0.070000</td>\n",
" <td>7.000000</td>\n",
" <td>22.000000</td>\n",
" <td>0.995600</td>\n",
" <td>3.210000</td>\n",
" <td>0.550000</td>\n",
" <td>9.500000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>7.900000</td>\n",
" <td>0.520000</td>\n",
" <td>0.260000</td>\n",
" <td>2.200000</td>\n",
" <td>0.079000</td>\n",
" <td>14.000000</td>\n",
" <td>38.000000</td>\n",
" <td>0.996750</td>\n",
" <td>3.310000</td>\n",
" <td>0.620000</td>\n",
" <td>10.200000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>9.200000</td>\n",
" <td>0.640000</td>\n",
" <td>0.420000</td>\n",
" <td>2.600000</td>\n",
" <td>0.090000</td>\n",
" <td>21.000000</td>\n",
" <td>62.000000</td>\n",
" <td>0.997835</td>\n",
" <td>3.400000</td>\n",
" <td>0.730000</td>\n",
" <td>11.100000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>15.900000</td>\n",
" <td>1.580000</td>\n",
" <td>1.000000</td>\n",
" <td>15.500000</td>\n",
" <td>0.611000</td>\n",
" <td>72.000000</td>\n",
" <td>289.000000</td>\n",
" <td>1.003690</td>\n",
" <td>4.010000</td>\n",
" <td>2.000000</td>\n",
" <td>14.900000</td>\n",
" <td>8.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid residual sugar \\\n",
"count 1599.000000 1599.000000 1599.000000 1599.000000 \n",
"mean 8.319637 0.527821 0.270976 2.538806 \n",
"std 1.741096 0.179060 0.194801 1.409928 \n",
"min 4.600000 0.120000 0.000000 0.900000 \n",
"25% 7.100000 0.390000 0.090000 1.900000 \n",
"50% 7.900000 0.520000 0.260000 2.200000 \n",
"75% 9.200000 0.640000 0.420000 2.600000 \n",
"max 15.900000 1.580000 1.000000 15.500000 \n",
"\n",
" chlorides free sulfur dioxide total sulfur dioxide density \\\n",
"count 1599.000000 1599.000000 1599.000000 1599.000000 \n",
"mean 0.087467 15.874922 46.467792 0.996747 \n",
"std 0.047065 10.460157 32.895324 0.001887 \n",
"min 0.012000 1.000000 6.000000 0.990070 \n",
"25% 0.070000 7.000000 22.000000 0.995600 \n",
"50% 0.079000 14.000000 38.000000 0.996750 \n",
"75% 0.090000 21.000000 62.000000 0.997835 \n",
"max 0.611000 72.000000 289.000000 1.003690 \n",
"\n",
" pH sulphates alcohol quality \n",
"count 1599.000000 1599.000000 1599.000000 1599.000000 \n",
"mean 3.311113 0.658149 10.422983 5.636023 \n",
"std 0.154386 0.169507 1.065668 0.807569 \n",
"min 2.740000 0.330000 8.400000 3.000000 \n",
"25% 3.210000 0.550000 9.500000 5.000000 \n",
"50% 3.310000 0.620000 10.200000 6.000000 \n",
"75% 3.400000 0.730000 11.100000 6.000000 \n",
"max 4.010000 2.000000 14.900000 8.000000 "
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine.describe()"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "0948ca45",
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"8 18\n",
"7 199\n",
"6 638\n",
"5 681\n",
"4 53\n",
"3 10\n",
"Name: quality, dtype: int64"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine[\"quality\"].value_counts().sort_index(ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "7245500d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD1CAYAAACrz7WZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQ6klEQVR4nO3df6xfd13H8edrLauMnxu7q2UttiQF6ZRtcC2YGSMUWWFmXYzTixEbUqx/FMFoYjowEv5oMv9RSXQmzUBLRGqHLCuQAE1xGBNcufsBo/vhyjbWa0d7mfJDRgrt3v5xz8KX7nt7v+393vtdP3s+kuV8zvv7Oee8T9q87um533OWqkKS1JbzRt2AJGn4DHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYtHXUDABdffHGtXr161G1I0jnlzjvv/HZVjfX7bM5wT/Jq4F96Sq8E/gL4WFdfDTwK/HZV/W+3zQ3AFuAk8N6q+vzpjrF69WomJyfnPBFJ0k8k+eZsn815W6aqHqyqK6rqCuD1wJPArcB2YH9VrQX2d+skWQdMAJcBG4GbkiyZ70lIkgZ3pvfcNwDfqKpvApuAXV19F3BdN94E7K6q41X1CHAIWD+EXiVJAzrTcJ8APtGNl1fV4wDd8pKufilwuGebqa4mSVokA4d7kvOBa4Fb5prap/aMF9gk2ZpkMsnk9PT0oG1IkgZwJlfubwPuqqqj3frRJCsAuuWxrj4FrOrZbiVw5NSdVdXOqhqvqvGxsb6/7JUknaUzCfd38JNbMgB7gc3deDNwW099IsmyJGuAtcCB+TYqSRrcQN9zT3IB8OvAH/aUbwT2JNkCPAZcD1BVB5PsAe4DTgDbqurkULuWJJ3WQOFeVU8CLzul9gQz357pN38HsGPe3UmSzsqz4glVqUWrt392UY/36I3XLOrx9Ozmu2UkqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CBfHKaR8cVa0sLxyl2SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYNFO5JXprkk0keSHJ/kl9OclGSfUke6pYX9sy/IcmhJA8muXrh2pck9TPolfuHgc9V1c8DlwP3A9uB/VW1FtjfrZNkHTABXAZsBG5KsmTYjUuSZjdnuCd5MfCrwEcAqupHVfUdYBOwq5u2C7iuG28CdlfV8ap6BDgErB9u25Kk0xnkyv2VwDTwD0nuTnJzkhcAy6vqcYBueUk3/1LgcM/2U11NkrRIBgn3pcDrgL+vqiuBH9DdgplF+tTqGZOSrUkmk0xOT08P1KwkaTCDhPsUMFVVd3Trn2Qm7I8mWQHQLY/1zF/Vs/1K4MipO62qnVU1XlXjY2NjZ9u/JKmPOcO9qr4FHE7y6q60AbgP2Ats7mqbgdu68V5gIsmyJGuAtcCBoXYtSTqtQV/5+0fAx5OcDzwMvIuZHwx7kmwBHgOuB6iqg0n2MPMD4ASwrapODr1zSdKsBgr3qroHGO/z0YZZ5u8Adpx9W5Kk+fAJVUlqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1KCBwj3Jo0nuTXJPksmudlGSfUke6pYX9sy/IcmhJA8muXqhmpck9XcmV+5vqqorqmq8W98O7K+qtcD+bp0k64AJ4DJgI3BTkiVD7FmSNIf53JbZBOzqxruA63rqu6vqeFU9AhwC1s/jOJKkMzRouBfwhSR3Jtna1ZZX1eMA3fKSrn4pcLhn26mu9lOSbE0ymWRyenr67LqXJPW1dMB5V1XVkSSXAPuSPHCauelTq2cUqnYCOwHGx8ef8bkk6ewNdOVeVUe65THgVmZusxxNsgKgWx7rpk8Bq3o2XwkcGVbDkqS5zRnuSV6Q5EVPj4G3Al8H9gKbu2mbgdu68V5gIsmyJGuAtcCBYTcuSZrdILdllgO3Jnl6/j9X1eeSfAXYk2QL8BhwPUBVHUyyB7gPOAFsq6qTC9K9JKmvOcO9qh4GLu9TfwLYMMs2O4Ad8+5OknRWfEJVkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUEDh3uSJUnuTvKZbv2iJPuSPNQtL+yZe0OSQ0keTHL1QjQuSZrdmVy5vw+4v2d9O7C/qtYC+7t1kqwDJoDLgI3ATUmWDKddSdIgBgr3JCuBa4Cbe8qbgF3deBdwXU99d1Udr6pHgEPA+qF0K0kayKBX7n8D/BnwVE9teVU9DtAtL+nqlwKHe+ZNdTVJ0iKZM9yT/AZwrKruHHCf6VOrPvvdmmQyyeT09PSAu5YkDWKQK/ergGuTPArsBt6c5J+Ao0lWAHTLY938KWBVz/YrgSOn7rSqdlbVeFWNj42NzeMUJEmnmjPcq+qGqlpZVauZ+UXpF6vq94C9wOZu2mbgtm68F5hIsizJGmAtcGDonUuSZrV0HtveCOxJsgV4DLgeoKoOJtkD3AecALZV1cl5dypJGtgZhXtV3Q7c3o2fADbMMm8HsGOevUmSzpJPqEpSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1aM5wT/IzSQ4k+WqSg0k+1NUvSrIvyUPd8sKebW5IcijJg0muXsgTkCQ90yBX7seBN1fV5cAVwMYkbwS2A/urai2wv1snyTpgArgM2AjclGTJAvQuSZrFnOFeM/6vW31e918Bm4BdXX0XcF033gTsrqrjVfUIcAhYP8ymJUmnN9A99yRLktwDHAP2VdUdwPKqehygW17STb8UONyz+VRXkyQtkoHCvapOVtUVwEpgfZJfOM309NvFMyYlW5NMJpmcnp4eqFlJ0mDO6NsyVfUd4HZm7qUfTbICoFse66ZNAat6NlsJHOmzr51VNV5V42NjY2feuSRpVoN8W2YsyUu78fOBtwAPAHuBzd20zcBt3XgvMJFkWZI1wFrgwJD7liSdxtIB5qwAdnXfeDkP2FNVn0nyZWBPki3AY8D1AFV1MMke4D7gBLCtqk4uTPuSpH7mDPeq+hpwZZ/6E8CGWbbZAeyYd3eSpLPiE6qS1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBc4Z7klVJ/i3J/UkOJnlfV78oyb4kD3XLC3u2uSHJoSQPJrl6IU9AkvRMg1y5nwD+tKpeA7wR2JZkHbAd2F9Va4H93TrdZxPAZcBG4KYkSxaieUlSf3OGe1U9XlV3dePvA/cDlwKbgF3dtF3Add14E7C7qo5X1SPAIWD9kPuWJJ3GGd1zT7IauBK4A1heVY/DzA8A4JJu2qXA4Z7NprqaJGmRLB10YpIXAv8K/HFVfS/JrFP71KrP/rYCWwFe8YpXDNrGc8rq7Z9d1OM9euM1i3o8SQtnoCv3JM9jJtg/XlWf6spHk6zoPl8BHOvqU8Cqns1XAkdO3WdV7ayq8aoaHxsbO9v+JUl9DPJtmQAfAe6vqr/q+WgvsLkbbwZu66lPJFmWZA2wFjgwvJYlSXMZ5LbMVcA7gXuT3NPV3g/cCOxJsgV4DLgeoKoOJtkD3MfMN222VdXJYTcuSZrdnOFeVf9B//voABtm2WYHsGMefUmS5sEnVCWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaNGe4J/lokmNJvt5TuyjJviQPdcsLez67IcmhJA8muXqhGpckzW6QK/d/BDaeUtsO7K+qtcD+bp0k64AJ4LJum5uSLBlat5KkgcwZ7lX178D/nFLeBOzqxruA63rqu6vqeFU9AhwC1g+nVUnSoM72nvvyqnocoFte0tUvBQ73zJvqapKkRTTsX6imT636Tky2JplMMjk9PT3kNiTpue1sw/1okhUA3fJYV58CVvXMWwkc6beDqtpZVeNVNT42NnaWbUiS+jnbcN8LbO7Gm4HbeuoTSZYlWQOsBQ7Mr0VJ0plaOteEJJ8Afg24OMkU8EHgRmBPki3AY8D1AFV1MMke4D7gBLCtqk4uUO+SpFnMGe5V9Y5ZPtowy/wdwI75NCVJmh+fUJWkBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAbN+YSqJJ1q9fbPLurxHr3xmkU9Xgu8cpekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUoHP69QM+Ai1J/XnlLkkNWrAr9yQbgQ8DS4Cbq+rGhTqWJA1TC3cFFuTKPckS4O+AtwHrgHckWbcQx5IkPdNC3ZZZDxyqqoer6kfAbmDTAh1LknSKVNXwd5r8FrCxqt7drb8TeENVvadnzlZga7f6auDBoTcyu4uBby/i8Rab53dua/n8Wj43WPzz+7mqGuv3wULdc0+f2k/9FKmqncDOBTr+aSWZrKrxURx7MXh+57aWz6/lc4Nn1/kt1G2ZKWBVz/pK4MgCHUuSdIqFCvevAGuTrElyPjAB7F2gY0mSTrEgt2Wq6kSS9wCfZ+arkB+tqoMLcayzNJLbQYvI8zu3tXx+LZ8bPIvOb0F+oSpJGi2fUJWkBhnuktQgw12SGtR8uCc5P8nvJ3lLt/67Sf42ybYkzxt1f/OV5L1JVs0989yU5A1JXtyNn5/kQ0k+neQvk7xk1P0NU5JfSfInSd466l4WSpKPjbqHYUqyPskvdeN13Z/f20fdFzwHfqGa5OPMfCvoAuA7wAuBTwEbmDn/zaPrbv6SfBf4AfAN4BPALVU1PdquhifJQeDy7htYO4EngU8y8+d3eVX95kgbnIckB6pqfTf+A2AbcCvwVuDT5/rL9pKc+vXnAG8CvghQVdcuelNDlOSDzLw/aymwD3gDcDvwFuDzVbVjdN09N8L9a1X12iRLgf8GXl5VJ5ME+GpVvXbELc5LkruB1zPzF+p3gGuBO5kJ+k9V1fdH2N68Jbm/ql7Tje+qqtf1fHZPVV0xsubmKcndVXVlN/4K8Paqmk7yAuA/q+oXR9vh/CS5C7gPuJmZJ9TDzN/LCYCq+tLoupu/JPcCVwDLgG8BK6vqe0meD9wx6mxp/rYMcF73INWLmLl6f/qf8suAc/62DFBV9VRVfaGqtgAvB24CNgIPj7a1ofh6knd1468mGQdI8irgx6NrayjOS3Jhkpcxc6E1DVBVPwBOjLa1oRhn5kLjA8B3q+p24IdV9aVzPdg7J6rqZFU9CXyjqr4HUFU/BJ4abWvn+P+JaUAfAR5g5mGqDwC3JHkYeCMzb6s81/3Ue3yq6sfMPA28t7uCONe9G/hwkj9n5oVMX05yGDjcfXYuewkz4RegkvxsVX0ryQvp/36mc0pVPQX8dZJbuuVR2sqcHyW5oAv31z9d7H4XNPJwb/62DECSlwNU1ZEkL2XmFsZjVXVgpI0NQZJXVdV/jbqPhZbkRcArmQmHqao6OuKWFkySC4DlVfXIqHsZpiTXAFdV1ftH3cswJFlWVcf71C8GVlTVvSNo6yd9PBfCXZKea54L99wl6TnHcJekBhnuktQgw12SGmS4S1KD/h+8IJoxDOTRpAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"wine[\"quality\"].value_counts().sort_index(ascending=False).plot(kind=\"bar\")"
]
},
{
"cell_type": "markdown",
"id": "931ba82d",
"metadata": {},
"source": [
"#### Dla podzbioru *train*"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "f2d00efe",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1453</th>\n",
" <td>7.6</td>\n",
" <td>0.49</td>\n",
" <td>0.33</td>\n",
" <td>1.9</td>\n",
" <td>0.074</td>\n",
" <td>27.0</td>\n",
" <td>85.0</td>\n",
" <td>0.99706</td>\n",
" <td>3.41</td>\n",
" <td>0.58</td>\n",
" <td>9.0</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1295</th>\n",
" <td>6.6</td>\n",
" <td>0.63</td>\n",
" <td>0.00</td>\n",
" <td>4.3</td>\n",
" <td>0.093</td>\n",
" <td>51.0</td>\n",
" <td>77.5</td>\n",
" <td>0.99558</td>\n",
" <td>3.20</td>\n",
" <td>0.45</td>\n",
" <td>9.5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>778</th>\n",
" <td>8.3</td>\n",
" <td>0.43</td>\n",
" <td>0.30</td>\n",
" <td>3.4</td>\n",
" <td>0.079</td>\n",
" <td>7.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99788</td>\n",
" <td>3.36</td>\n",
" <td>0.61</td>\n",
" <td>10.5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>692</th>\n",
" <td>8.6</td>\n",
" <td>0.49</td>\n",
" <td>0.51</td>\n",
" <td>2.0</td>\n",
" <td>0.422</td>\n",
" <td>16.0</td>\n",
" <td>62.0</td>\n",
" <td>0.99790</td>\n",
" <td>3.03</td>\n",
" <td>1.17</td>\n",
" <td>9.0</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>166</th>\n",
" <td>6.8</td>\n",
" <td>0.64</td>\n",
" <td>0.10</td>\n",
" <td>2.1</td>\n",
" <td>0.085</td>\n",
" <td>18.0</td>\n",
" <td>101.0</td>\n",
" <td>0.99560</td>\n",
" <td>3.34</td>\n",
" <td>0.52</td>\n",
" <td>10.2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid residual sugar chlorides \\\n",
"1453 7.6 0.49 0.33 1.9 0.074 \n",
"1295 6.6 0.63 0.00 4.3 0.093 \n",
"778 8.3 0.43 0.30 3.4 0.079 \n",
"692 8.6 0.49 0.51 2.0 0.422 \n",
"166 6.8 0.64 0.10 2.1 0.085 \n",
"\n",
" free sulfur dioxide total sulfur dioxide density pH sulphates \\\n",
"1453 27.0 85.0 0.99706 3.41 0.58 \n",
"1295 51.0 77.5 0.99558 3.20 0.45 \n",
"778 7.0 34.0 0.99788 3.36 0.61 \n",
"692 16.0 62.0 0.99790 3.03 1.17 \n",
"166 18.0 101.0 0.99560 3.34 0.52 \n",
"\n",
" alcohol quality \n",
"1453 9.0 5 \n",
"1295 9.5 5 \n",
"778 10.5 5 \n",
"692 9.0 5 \n",
"166 10.2 5 "
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_train.head()"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "e074e787",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1549.000000</td>\n",
" <td>1549.000000</td>\n",
" <td>1549.000000</td>\n",
" <td>1549.000000</td>\n",
" <td>1549.000000</td>\n",
" <td>1549.000000</td>\n",
" <td>1549.000000</td>\n",
" <td>1549.000000</td>\n",
" <td>1549.000000</td>\n",
" <td>1549.000000</td>\n",
" <td>1549.000000</td>\n",
" <td>1549.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>8.327566</td>\n",
" <td>0.528128</td>\n",
" <td>0.271252</td>\n",
" <td>2.529987</td>\n",
" <td>0.086944</td>\n",
" <td>15.832150</td>\n",
" <td>46.415107</td>\n",
" <td>0.996746</td>\n",
" <td>3.310484</td>\n",
" <td>0.656727</td>\n",
" <td>10.419141</td>\n",
" <td>5.635249</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.744692</td>\n",
" <td>0.180152</td>\n",
" <td>0.194249</td>\n",
" <td>1.380202</td>\n",
" <td>0.043732</td>\n",
" <td>10.450522</td>\n",
" <td>32.884454</td>\n",
" <td>0.001877</td>\n",
" <td>0.154269</td>\n",
" <td>0.166558</td>\n",
" <td>1.067245</td>\n",
" <td>0.807313</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>4.600000</td>\n",
" <td>0.120000</td>\n",
" <td>0.000000</td>\n",
" <td>0.900000</td>\n",
" <td>0.012000</td>\n",
" <td>1.000000</td>\n",
" <td>6.000000</td>\n",
" <td>0.990070</td>\n",
" <td>2.860000</td>\n",
" <td>0.330000</td>\n",
" <td>8.400000</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>7.100000</td>\n",
" <td>0.390000</td>\n",
" <td>0.090000</td>\n",
" <td>1.900000</td>\n",
" <td>0.070000</td>\n",
" <td>7.000000</td>\n",
" <td>22.000000</td>\n",
" <td>0.995600</td>\n",
" <td>3.210000</td>\n",
" <td>0.550000</td>\n",
" <td>9.500000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>7.900000</td>\n",
" <td>0.520000</td>\n",
" <td>0.260000</td>\n",
" <td>2.200000</td>\n",
" <td>0.079000</td>\n",
" <td>13.000000</td>\n",
" <td>38.000000</td>\n",
" <td>0.996750</td>\n",
" <td>3.310000</td>\n",
" <td>0.620000</td>\n",
" <td>10.100000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>9.200000</td>\n",
" <td>0.640000</td>\n",
" <td>0.430000</td>\n",
" <td>2.600000</td>\n",
" <td>0.090000</td>\n",
" <td>21.000000</td>\n",
" <td>62.000000</td>\n",
" <td>0.997860</td>\n",
" <td>3.400000</td>\n",
" <td>0.730000</td>\n",
" <td>11.100000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>15.900000</td>\n",
" <td>1.580000</td>\n",
" <td>0.790000</td>\n",
" <td>15.500000</td>\n",
" <td>0.467000</td>\n",
" <td>72.000000</td>\n",
" <td>289.000000</td>\n",
" <td>1.003690</td>\n",
" <td>4.010000</td>\n",
" <td>1.980000</td>\n",
" <td>14.900000</td>\n",
" <td>8.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid residual sugar \\\n",
"count 1549.000000 1549.000000 1549.000000 1549.000000 \n",
"mean 8.327566 0.528128 0.271252 2.529987 \n",
"std 1.744692 0.180152 0.194249 1.380202 \n",
"min 4.600000 0.120000 0.000000 0.900000 \n",
"25% 7.100000 0.390000 0.090000 1.900000 \n",
"50% 7.900000 0.520000 0.260000 2.200000 \n",
"75% 9.200000 0.640000 0.430000 2.600000 \n",
"max 15.900000 1.580000 0.790000 15.500000 \n",
"\n",
" chlorides free sulfur dioxide total sulfur dioxide density \\\n",
"count 1549.000000 1549.000000 1549.000000 1549.000000 \n",
"mean 0.086944 15.832150 46.415107 0.996746 \n",
"std 0.043732 10.450522 32.884454 0.001877 \n",
"min 0.012000 1.000000 6.000000 0.990070 \n",
"25% 0.070000 7.000000 22.000000 0.995600 \n",
"50% 0.079000 13.000000 38.000000 0.996750 \n",
"75% 0.090000 21.000000 62.000000 0.997860 \n",
"max 0.467000 72.000000 289.000000 1.003690 \n",
"\n",
" pH sulphates alcohol quality \n",
"count 1549.000000 1549.000000 1549.000000 1549.000000 \n",
"mean 3.310484 0.656727 10.419141 5.635249 \n",
"std 0.154269 0.166558 1.067245 0.807313 \n",
"min 2.860000 0.330000 8.400000 3.000000 \n",
"25% 3.210000 0.550000 9.500000 5.000000 \n",
"50% 3.310000 0.620000 10.100000 6.000000 \n",
"75% 3.400000 0.730000 11.100000 6.000000 \n",
"max 4.010000 1.980000 14.900000 8.000000 "
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_train.describe()"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "34f511dd",
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"8 17\n",
"7 193\n",
"6 618\n",
"5 660\n",
"4 51\n",
"3 10\n",
"Name: quality, dtype: int64"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_train[\"quality\"].value_counts().sort_index(ascending=False) #indexy oznaczają jakość wina"
]
},
{
"cell_type": "markdown",
"id": "29e301ee",
"metadata": {},
"source": [
"#### Sortowanie jest po to, by szły od najlepszego do najgorszego, zamiast po największej ilość próbek"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "466eb483",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD1CAYAAACrz7WZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAPwUlEQVR4nO3dbawcV33H8e8vNrg8kzQ3rhObOkiG4rQkgVtDlaoqGBGXVDiqGtVUpRYK9RvTUrVS5UAlxAtL6RtapDaVrEDrqoBlKCgGJMAyDVUlGnNDAsF5aEySxrcO9oWKhwIy2Pn3xU7Uxdnru/HdvZt7/P1I0Zw5c2bmP7L12/HZnUmqCklSWy6adAGSpNEz3CWpQYa7JDXIcJekBhnuktQgw12SGrRy0gUAXHrppbV+/fpJlyFJy8rdd9/97aqaGrTtWRHu69evZ2ZmZtJlSNKykuS/5tvmtIwkNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQc+Kh5ikFq3f9dklPd9jt96wpOfTs5t37pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ3yrZCaGN+aKI3PUHfuSV6a5BNJHkzyQJJfS3JJkoNJHu6WF/eNvyXJ0SQPJbl+fOVLkgYZdlrmg8DnquqXgKuBB4BdwKGq2gAc6tZJshHYBlwFbAFuS7Ji1IVLkua3YLgneTHwG8CHAKrqJ1X1XWArsLcbthe4sWtvBfZV1amqehQ4CmwabdmSpHMZ5s795cAc8A9J7klye5IXAKur6gmAbnlZN/4K4Fjf/rNdnyRpiQwT7iuB1wB/X1XXAj+km4KZRwb01dMGJTuSzCSZmZubG6pYSdJwhgn3WWC2qu7q1j9BL+xPJFkD0C1P9o1f17f/WuD42Qetqj1VNV1V01NTU+dbvyRpgAXDvaq+BRxL8squazNwP3AA2N71bQfu6NoHgG1JViW5EtgAHB5p1ZKkcxr2d+5/DHwkyXOBR4B30Ptg2J/kZuBx4CaAqjqSZD+9D4DTwM6qOjPyyiVJ8xoq3KvqXmB6wKbN84zfDew+/7IkSYvh6wckqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJatBQ4Z7ksST3Jbk3yUzXd0mSg0ke7pYX942/JcnRJA8luX5cxUuSBnsmd+5vqKprqmq6W98FHKqqDcChbp0kG4FtwFXAFuC2JCtGWLMkaQGLmZbZCuzt2nuBG/v691XVqap6FDgKbFrEeSRJz9Cw4V7AF5LcnWRH17e6qp4A6JaXdf1XAMf69p3t+iRJS2TlkOOuq6rjSS4DDiZ58BxjM6Cvnjao9yGxA+BlL3vZkGVIkoYx1J17VR3vlieBT9GbZjmRZA1AtzzZDZ8F1vXtvhY4PuCYe6pquqqmp6amzv8KJElPs2C4J3lBkhc91QbeDHwDOABs74ZtB+7o2geAbUlWJbkS2AAcHnXhkqT5DTMtsxr4VJKnxn+0qj6X5CvA/iQ3A48DNwFU1ZEk+4H7gdPAzqo6M5bqJUkDLRjuVfUIcPWA/u8Am+fZZzewe9HVSZLOi0+oSlKDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBg0d7klWJLknyWe69UuSHEzycLe8uG/sLUmOJnkoyfXjKFySNL9ncuf+buCBvvVdwKGq2gAc6tZJshHYBlwFbAFuS7JiNOVKkoYxVLgnWQvcANze170V2Nu19wI39vXvq6pTVfUocBTYNJJqJUlDGfbO/W+AvwCe7OtbXVVPAHTLy7r+K4BjfeNmuz5J0hJZMNyT/DZwsqruHvKYGdBXA467I8lMkpm5ubkhDy1JGsYwd+7XAW9N8hiwD3hjkn8GTiRZA9AtT3bjZ4F1ffuvBY6ffdCq2lNV01U1PTU1tYhLkCSdbcFwr6pbqmptVa2n90XpF6vqD4ADwPZu2Hbgjq59ANiWZFWSK4ENwOGRVy5JmtfKRex7K7A/yc3A48BNAFV1JMl+4H7gNLCzqs4sulJJ0tCeUbhX1Z3AnV37O8DmecbtBnYvsjZJ0nnyCVVJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGrRguCf5uSSHk3wtyZEk7+/6L0lyMMnD3fLivn1uSXI0yUNJrh/nBUiSnm6YO/dTwBur6mrgGmBLktcDu4BDVbUBONStk2QjsA24CtgC3JZkxRhqlyTNY8Fwr57/7Vaf0/1XwFZgb9e/F7ixa28F9lXVqap6FDgKbBpl0ZKkcxtqzj3JiiT3AieBg1V1F7C6qp4A6JaXdcOvAI717T7b9Z19zB1JZpLMzM3NLeISJElnGyrcq+pMVV0DrAU2JfnlcwzPoEMMOOaeqpququmpqamhipUkDecZ/Vqmqr4L3ElvLv1EkjUA3fJkN2wWWNe321rg+GILlSQNb5hfy0wleWnXfh7wJuBB4ACwvRu2Hbijax8AtiVZleRKYANweMR1S5LOYeUQY9YAe7tfvFwE7K+qzyT5MrA/yc3A48BNAFV1JMl+4H7gNLCzqs6Mp3xJ0iALhntVfR24dkD/d4DN8+yzG9i96OokSefFJ1QlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDFgz3JOuS/GuSB5IcSfLurv+SJAeTPNwtL+7b55YkR5M8lOT6cV6AJOnphrlzPw38eVW9Cng9sDPJRmAXcKiqNgCHunW6bduAq4AtwG1JVoyjeEnSYAuGe1U9UVVf7do/AB4ArgC2Anu7YXuBG7v2VmBfVZ2qqkeBo8CmEdctSTqHZzTnnmQ9cC1wF7C6qp6A3gcAcFk37ArgWN9us12fJGmJDB3uSV4I/Avwp1X1/XMNHdBXA463I8lMkpm5ublhy5AkDWGocE/yHHrB/pGq+mTXfSLJmm77GuBk1z8LrOvbfS1w/OxjVtWeqpququmpqanzrV+SNMDKhQYkCfAh4IGq+kDfpgPAduDWbnlHX/9Hk3wAuBzYABweZdEXivW7Pruk53vs1huW9HySxmfBcAeuA94O3Jfk3q7vPfRCfX+Sm4HHgZsAqupIkv3A/fR+abOzqs6MunBJ0vwWDPeq+ncGz6MDbJ5nn93A7kXUJUlaBJ9QlaQGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDVow3JN8OMnJJN/o67skycEkD3fLi/u23ZLkaJKHklw/rsIlSfMb5s79H4EtZ/XtAg5V1QbgULdOko3ANuCqbp/bkqwYWbWSpKEsGO5V9W/A/5zVvRXY27X3Ajf29e+rqlNV9ShwFNg0mlIlScM63zn31VX1BEC3vKzrvwI41jdutuuTJC2hUX+hmgF9NXBgsiPJTJKZubm5EZchSRe28w33E0nWAHTLk13/LLCub9xa4PigA1TVnqqarqrpqamp8yxDkjTI+Yb7AWB7194O3NHXvy3JqiRXAhuAw4srUZL0TK1caECSjwG/CVyaZBZ4H3ArsD/JzcDjwE0AVXUkyX7gfuA0sLOqzoypdknSPBYM96p62zybNs8zfjewezFFSZIWxydUJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDVowdcPSNLZ1u/67JKe77Fbb1jS87XAO3dJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDVrWrx/wEWhJGsw7d0lq0Nju3JNsAT4IrABur6pbx3UuSRqlFmYFxnLnnmQF8HfAbwEbgbcl2TiOc0mSnm5c0zKbgKNV9UhV/QTYB2wd07kkSWdJVY3+oMnvAluq6p3d+tuB11XVu/rG7AB2dKuvBB4aeSHzuxT49hKeb6l5fctby9fX8rXB0l/fL1bV1KAN45pzz4C+n/kUqao9wJ4xnf+cksxU1fQkzr0UvL7lreXra/na4Nl1feOalpkF1vWtrwWOj+lckqSzjCvcvwJsSHJlkucC24ADYzqXJOksY5mWqarTSd4FfJ7eTyE/XFVHxnGu8zSR6aAl5PUtby1fX8vXBs+i6xvLF6qSpMnyCVVJapDhLkkNMtwlqUHNh3uS5yb5wyRv6tZ/P8nfJtmZ5DmTrm+xkvxJknULj1yekrwuyYu79vOSvD/Jp5P8VZKXTLq+UUry60n+LMmbJ13LuCT5p0nXMEpJNiX51a69sfvze8uk64IL4AvVJB+h96ug5wPfBV4IfBLYTO/6t0+uusVL8j3gh8A3gY8BH6+quclWNTpJjgBXd7/A2gP8CPgEvT+/q6vqdyZa4CIkOVxVm7r2HwE7gU8BbwY+vdxftpfk7J8/B3gD8EWAqnrrkhc1QkneR+/9WSuBg8DrgDuBNwGfr6rdk6vuwgj3r1fVq5OsBP4buLyqziQJ8LWqevWES1yUJPcAr6X3F+r3gLcCd9ML+k9W1Q8mWN6iJXmgql7Vtb9aVa/p23ZvVV0zseIWKck9VXVt1/4K8JaqmkvyAuA/qupXJlvh4iT5KnA/cDu9J9RD7+/lNoCq+tLkqlu8JPcB1wCrgG8Ba6vq+0meB9w16WxpfloGuKh7kOpF9O7en/qn/Cpg2U/LAFVVT1bVF6rqZuBy4DZgC/DIZEsbiW8keUfX/lqSaYAkrwB+OrmyRuKiJBcn+Xl6N1pzAFX1Q+D0ZEsbiWl6NxrvBb5XVXcCP66qLy33YO+crqozVfUj4JtV9X2Aqvox8ORkS1vm/yemIX0IeJDew1TvBT6e5BHg9fTeVrnc/cx7fKrqp/SeBj7Q3UEsd+8EPpjkL+m9kOnLSY4Bx7pty9lL6IVfgEryC1X1rSQvZPD7mZaVqnoS+OskH++WJ2grc36S5PlduL/2qc7uu6CJh3vz0zIASS4HqKrjSV5Kbwrj8ao6PNHCRiDJK6rqPyddx7gleRHwcnrhMFtVJyZc0tgkeT6wuqoenXQto5TkBuC6qnrPpGsZhSSrqurUgP5LgTVVdd8Eyvr/Oi6EcJekC82FMOcuSRccw12SGmS4S1KDDHdJapDhLkkN+j8WqSlJMiyYvAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"wine_train[\"quality\"].value_counts().sort_index(ascending=False).plot(kind=\"bar\")"
]
},
{
"cell_type": "markdown",
"id": "040a1d8b",
"metadata": {},
"source": [
"#### Dla podzbioru *test*\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "d6b697ec",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>856</th>\n",
" <td>9.3</td>\n",
" <td>0.36</td>\n",
" <td>0.39</td>\n",
" <td>1.5</td>\n",
" <td>0.080</td>\n",
" <td>41.0</td>\n",
" <td>55.0</td>\n",
" <td>0.99652</td>\n",
" <td>3.47</td>\n",
" <td>0.73</td>\n",
" <td>10.9</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1142</th>\n",
" <td>6.9</td>\n",
" <td>0.45</td>\n",
" <td>0.11</td>\n",
" <td>2.4</td>\n",
" <td>0.043</td>\n",
" <td>6.0</td>\n",
" <td>12.0</td>\n",
" <td>0.99354</td>\n",
" <td>3.30</td>\n",
" <td>0.65</td>\n",
" <td>11.4</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>538</th>\n",
" <td>12.9</td>\n",
" <td>0.35</td>\n",
" <td>0.49</td>\n",
" <td>5.8</td>\n",
" <td>0.066</td>\n",
" <td>5.0</td>\n",
" <td>35.0</td>\n",
" <td>1.00140</td>\n",
" <td>3.20</td>\n",
" <td>0.66</td>\n",
" <td>12.0</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1324</th>\n",
" <td>6.7</td>\n",
" <td>0.46</td>\n",
" <td>0.24</td>\n",
" <td>1.7</td>\n",
" <td>0.077</td>\n",
" <td>18.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99480</td>\n",
" <td>3.39</td>\n",
" <td>0.60</td>\n",
" <td>10.6</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>288</th>\n",
" <td>8.7</td>\n",
" <td>0.52</td>\n",
" <td>0.09</td>\n",
" <td>2.5</td>\n",
" <td>0.091</td>\n",
" <td>20.0</td>\n",
" <td>49.0</td>\n",
" <td>0.99760</td>\n",
" <td>3.34</td>\n",
" <td>0.86</td>\n",
" <td>10.6</td>\n",
" <td>7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid residual sugar chlorides \\\n",
"856 9.3 0.36 0.39 1.5 0.080 \n",
"1142 6.9 0.45 0.11 2.4 0.043 \n",
"538 12.9 0.35 0.49 5.8 0.066 \n",
"1324 6.7 0.46 0.24 1.7 0.077 \n",
"288 8.7 0.52 0.09 2.5 0.091 \n",
"\n",
" free sulfur dioxide total sulfur dioxide density pH sulphates \\\n",
"856 41.0 55.0 0.99652 3.47 0.73 \n",
"1142 6.0 12.0 0.99354 3.30 0.65 \n",
"538 5.0 35.0 1.00140 3.20 0.66 \n",
"1324 18.0 34.0 0.99480 3.39 0.60 \n",
"288 20.0 49.0 0.99760 3.34 0.86 \n",
"\n",
" alcohol quality \n",
"856 10.9 6 \n",
"1142 11.4 6 \n",
"538 12.0 7 \n",
"1324 10.6 6 \n",
"288 10.6 7 "
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_test.head()"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "bc91d2fb",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>50.000000</td>\n",
" <td>50.000000</td>\n",
" <td>50.000000</td>\n",
" <td>50.000000</td>\n",
" <td>50.00000</td>\n",
" <td>50.000000</td>\n",
" <td>50.000000</td>\n",
" <td>50.000000</td>\n",
" <td>50.000000</td>\n",
" <td>50.000000</td>\n",
" <td>50.000000</td>\n",
" <td>50.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>8.074000</td>\n",
" <td>0.518300</td>\n",
" <td>0.262400</td>\n",
" <td>2.812000</td>\n",
" <td>0.10364</td>\n",
" <td>17.200000</td>\n",
" <td>48.100000</td>\n",
" <td>0.996779</td>\n",
" <td>3.330600</td>\n",
" <td>0.702200</td>\n",
" <td>10.542000</td>\n",
" <td>5.660000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.622899</td>\n",
" <td>0.142197</td>\n",
" <td>0.213155</td>\n",
" <td>2.137769</td>\n",
" <td>0.10746</td>\n",
" <td>10.777906</td>\n",
" <td>33.525653</td>\n",
" <td>0.002199</td>\n",
" <td>0.158338</td>\n",
" <td>0.242035</td>\n",
" <td>1.018621</td>\n",
" <td>0.823383</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>5.600000</td>\n",
" <td>0.310000</td>\n",
" <td>0.000000</td>\n",
" <td>1.500000</td>\n",
" <td>0.03800</td>\n",
" <td>3.000000</td>\n",
" <td>8.000000</td>\n",
" <td>0.992920</td>\n",
" <td>2.740000</td>\n",
" <td>0.370000</td>\n",
" <td>9.000000</td>\n",
" <td>4.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>6.900000</td>\n",
" <td>0.402500</td>\n",
" <td>0.095000</td>\n",
" <td>1.900000</td>\n",
" <td>0.07325</td>\n",
" <td>10.000000</td>\n",
" <td>25.250000</td>\n",
" <td>0.995445</td>\n",
" <td>3.260000</td>\n",
" <td>0.590000</td>\n",
" <td>9.725000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>7.650000</td>\n",
" <td>0.500000</td>\n",
" <td>0.245000</td>\n",
" <td>2.200000</td>\n",
" <td>0.08000</td>\n",
" <td>15.000000</td>\n",
" <td>36.500000</td>\n",
" <td>0.996560</td>\n",
" <td>3.320000</td>\n",
" <td>0.655000</td>\n",
" <td>10.350000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>9.150000</td>\n",
" <td>0.625000</td>\n",
" <td>0.400000</td>\n",
" <td>2.675000</td>\n",
" <td>0.08625</td>\n",
" <td>23.750000</td>\n",
" <td>62.000000</td>\n",
" <td>0.997600</td>\n",
" <td>3.400000</td>\n",
" <td>0.770000</td>\n",
" <td>11.175000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>12.900000</td>\n",
" <td>0.980000</td>\n",
" <td>1.000000</td>\n",
" <td>15.400000</td>\n",
" <td>0.61100</td>\n",
" <td>55.000000</td>\n",
" <td>143.000000</td>\n",
" <td>1.003690</td>\n",
" <td>3.710000</td>\n",
" <td>2.000000</td>\n",
" <td>12.800000</td>\n",
" <td>8.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid residual sugar \\\n",
"count 50.000000 50.000000 50.000000 50.000000 \n",
"mean 8.074000 0.518300 0.262400 2.812000 \n",
"std 1.622899 0.142197 0.213155 2.137769 \n",
"min 5.600000 0.310000 0.000000 1.500000 \n",
"25% 6.900000 0.402500 0.095000 1.900000 \n",
"50% 7.650000 0.500000 0.245000 2.200000 \n",
"75% 9.150000 0.625000 0.400000 2.675000 \n",
"max 12.900000 0.980000 1.000000 15.400000 \n",
"\n",
" chlorides free sulfur dioxide total sulfur dioxide density \\\n",
"count 50.00000 50.000000 50.000000 50.000000 \n",
"mean 0.10364 17.200000 48.100000 0.996779 \n",
"std 0.10746 10.777906 33.525653 0.002199 \n",
"min 0.03800 3.000000 8.000000 0.992920 \n",
"25% 0.07325 10.000000 25.250000 0.995445 \n",
"50% 0.08000 15.000000 36.500000 0.996560 \n",
"75% 0.08625 23.750000 62.000000 0.997600 \n",
"max 0.61100 55.000000 143.000000 1.003690 \n",
"\n",
" pH sulphates alcohol quality \n",
"count 50.000000 50.000000 50.000000 50.000000 \n",
"mean 3.330600 0.702200 10.542000 5.660000 \n",
"std 0.158338 0.242035 1.018621 0.823383 \n",
"min 2.740000 0.370000 9.000000 4.000000 \n",
"25% 3.260000 0.590000 9.725000 5.000000 \n",
"50% 3.320000 0.655000 10.350000 6.000000 \n",
"75% 3.400000 0.770000 11.175000 6.000000 \n",
"max 3.710000 2.000000 12.800000 8.000000 "
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_test.describe()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "72ce755c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"8 1\n",
"7 6\n",
"6 20\n",
"5 21\n",
"4 2\n",
"Name: quality, dtype: int64"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_test[\"quality\"].value_counts().sort_index(ascending=False) #indexy oznaczają jakość wina"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "fc355d95",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD1CAYAAABeMT4pAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQu0lEQVR4nO3df6xfdX3H8edLfiwKTFAuyK9atlQiOkF3UzFsCYiQUgg4Y2a7RZnDVQ1kmvnHOl3c9h9mUTMHk3RCgISBMkExVIEwJ5KA0GJBEJDa4ahltGjkh5hg9b0/7ml6vX6/vbffc3u/5cPzkXzzPefz+Zxz3veb9tXTzz3ne1JVSJLa9bJxFyBJ2rMMeklqnEEvSY0z6CWpcQa9JDXOoJekxu077gIGOfTQQ2vx4sXjLkOSXjTWr1//VFVNDOrbK4N+8eLFrFu3btxlSNKLRpIfDetz6kaSGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUuL3yhilJe9bi1TeNuwQAHrvorHGX8JLgGb0kNc6gl6TGGfSS1DiDXpIaZ9BLUuNmDfokxyT5ZpKHkjyY5CNd+6uS3Jrk0e79kCHbL0vySJKNSVbP9w8gSdq1uZzRbwc+VlWvB04CLkhyPLAauK2qlgC3deu/Ick+wCXAmcDxwMpuW0nSApk16Kvqiaq6t1t+FngIOAo4F7iyG3Yl8M4Bmy8FNlbVpqp6Abi2206StEB2a44+yWLgzcB3gMOr6gmY+scAOGzAJkcBj09b39y1SZIWyJzvjE1yIPBl4KNV9UySOW02oK2G7H8VsApg0aJFcy1LmjPvBtVL1ZzO6JPsx1TIX11V13fNTyY5ous/Atg6YNPNwDHT1o8Gtgw6RlWtqarJqpqcmBj4fFtJ0gjmctVNgMuAh6rqM9O6bgTO65bPA746YPN7gCVJjk2yP7Ci206StEDmckZ/MvBe4O1JNnSv5cBFwOlJHgVO79ZJcmSStQBVtR24ELiZqV/ifqmqHtwDP4ckaYhZ5+ir6g4Gz7UDnDZg/BZg+bT1tcDaUQuUJPXjnbGS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMbN+uCRJJcDZwNbq+qNXdsXgeO6IQcDP6uqEwds+xjwLPArYHtVTc5L1ZKkOZs16IErgIuBq3Y0VNV7diwn+TTw9C62P7Wqnhq1QElSP3N5lODtSRYP6useHP6nwNvnuS5J0jzpO0f/x8CTVfXokP4CbkmyPsmqnseSJI1gLlM3u7ISuGYX/SdX1ZYkhwG3Jnm4qm4fNLD7h2AVwKJFi3qWJUnaYeQz+iT7Au8CvjhsTFVt6d63AjcAS3cxdk1VTVbV5MTExKhlSZJm6DN18w7g4araPKgzyQFJDtqxDJwBPNDjeJKkEcwa9EmuAe4EjkuyOcn5XdcKZkzbJDkyydpu9XDgjiT3AXcDN1XVN+avdEnSXMzlqpuVQ9r/YkDbFmB5t7wJOKFnfZKknrwzVpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUuLk8YeryJFuTPDCt7R+T/DjJhu61fMi2y5I8kmRjktXzWbgkaW7mckZ/BbBsQPtnq+rE7rV2ZmeSfYBLgDOB44GVSY7vU6wkaffNGvRVdTvw0xH2vRTYWFWbquoF4Frg3BH2I0nqoc8c/YVJ7u+mdg4Z0H8U8Pi09c1dmyRpAY0a9J8Hfh84EXgC+PSAMRnQVsN2mGRVknVJ1m3btm3EsiRJM40U9FX1ZFX9qqp+Dfw7U9M0M20Gjpm2fjSwZRf7XFNVk1U1OTExMUpZkqQBRgr6JEdMW/0T4IEBw+4BliQ5Nsn+wArgxlGOJ0ka3b6zDUhyDXAKcGiSzcA/AKckOZGpqZjHgA92Y48EvlBVy6tqe5ILgZuBfYDLq+rBPfFDSJKGmzXoq2rlgObLhozdAiyftr4W+K1LLyVJC8c7YyWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWrcrEHfPfx7a5IHprX9c5KHu4eD35Dk4CHbPpbke0k2JFk3j3VLkuZoLmf0VwDLZrTdCryxqt4E/AD4u11sf2pVnVhVk6OVKEnqY9agr6rbgZ/OaLulqrZ3q3cx9eBvSdJeaD7m6P8S+PqQvgJuSbI+yap5OJYkaTfN+szYXUnyCWA7cPWQISdX1ZYkhwG3Jnm4+x/CoH2tAlYBLFq0qE9ZkqRpRj6jT3IecDbw51VVg8Z0DwunqrYCNwBLh+2vqtZU1WRVTU5MTIxaliRphpGCPsky4G+Bc6rq+SFjDkhy0I5l4AzggUFjJUl7zlwur7wGuBM4LsnmJOcDFwMHMTUdsyHJpd3YI5Os7TY9HLgjyX3A3cBNVfWNPfJTSJKGmnWOvqpWDmi+bMjYLcDybnkTcEKv6iRJvXlnrCQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcXN5wtTlSbYmeWBa26uS3Jrk0e79kCHbLkvySJKNSVbPZ+GSpLmZyxn9FcCyGW2rgduqaglwW7f+G5LsA1wCnAkcD6xMcnyvaiVJu23WoK+q24Gfzmg+F7iyW74SeOeATZcCG6tqU1W9AFzbbSdJWkCjztEfXlVPAHTvhw0YcxTw+LT1zV2bJGkB7clfxmZAWw0dnKxKsi7Jum3btu3BsiTppWXUoH8yyREA3fvWAWM2A8dMWz8a2DJsh1W1pqomq2pyYmJixLIkSTONGvQ3Aud1y+cBXx0w5h5gSZJjk+wPrOi2kyQtoLlcXnkNcCdwXJLNSc4HLgJOT/IocHq3TpIjk6wFqKrtwIXAzcBDwJeq6sE982NIkobZd7YBVbVySNdpA8ZuAZZPW18LrB25OklSb94ZK0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklq3MhBn+S4JBumvZ5J8tEZY05J8vS0MZ/sXbEkabfM+oSpYarqEeBEgCT7AD8Gbhgw9NtVdfaox5Ek9TNfUzenAT+sqh/N0/4kSfNkvoJ+BXDNkL63JbkvydeTvGHYDpKsSrIuybpt27bNU1mSpN5Bn2R/4BzgugHd9wKvraoTgH8FvjJsP1W1pqomq2pyYmKib1mSpM58nNGfCdxbVU/O7KiqZ6rquW55LbBfkkPn4ZiSpDmaj6BfyZBpmySvSZJueWl3vJ/MwzElSXM08lU3AEleAZwOfHBa24cAqupS4N3Ah5NsB34BrKiq6nNMSdLu6RX0VfU88OoZbZdOW74YuLjPMSRJ/XhnrCQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNa7XnbHa+y1efdO4SwDgsYvOGncJ0kuWZ/SS1DiDXpIaZ9BLUuMMeklqnEEvSY3rFfRJHkvyvSQbkqwb0J8kn0uyMcn9Sd7S53iSpN03H5dXnlpVTw3pOxNY0r3eCny+e5ckLZA9PXVzLnBVTbkLODjJEXv4mJKkafoGfQG3JFmfZNWA/qOAx6etb+7aJEkLpO/UzclVtSXJYcCtSR6uqtun9WfANgMfDt79Q7EKYNGiRT3LkiTt0OuMvqq2dO9bgRuApTOGbAaOmbZ+NLBlyL7WVNVkVU1OTEz0KUuSNM3IQZ/kgCQH7VgGzgAemDHsRuB93dU3JwFPV9UTI1crSdptfaZuDgduSLJjP/9RVd9I8iGAqroUWAssBzYCzwPv71euJGl3jRz0VbUJOGFA+6XTlgu4YNRjSJL6885YSWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1Lj+jxK8Jgk30zyUJIHk3xkwJhTkjydZEP3+mS/ciVJu6vPowS3Ax+rqnu7Z8euT3JrVX1/xrhvV9XZPY4jSeph5DP6qnqiqu7tlp8FHgKOmq/CJEnzY17m6JMsBt4MfGdA99uS3Jfk60neMB/HkyTNXZ+pGwCSHAh8GfhoVT0zo/te4LVV9VyS5cBXgCVD9rMKWAWwaNGivmVJkjq9zuiT7MdUyF9dVdfP7K+qZ6rquW55LbBfkkMH7auq1lTVZFVNTkxM9ClLkjRNn6tuAlwGPFRVnxky5jXdOJIs7Y73k1GPKUnafX2mbk4G3gt8L8mGru3jwCKAqroUeDfw4STbgV8AK6qqehxTkrSbRg76qroDyCxjLgYuHvUYkqT+ev8yVpJezBavvmncJQDw2EVn7bF9+xUIktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUuCbvjH0p3OkmSXPlGb0kNc6gl6TGGfSS1DiDXpIaZ9BLUuP6PjN2WZJHkmxMsnpAf5J8ruu/P8lb+hxPkrT7+jwzdh/gEuBM4HhgZZLjZww7E1jSvVYBnx/1eJKk0fQ5o18KbKyqTVX1AnAtcO6MMecCV9WUu4CDkxzR45iSpN3U54apo4DHp61vBt46hzFHAU/M3FmSVUyd9QM8l+SRHrXNh0OBp/rsIJ+ap0rGz89iJz+LnfwsdtobPovXDuvoE/SDHgxeI4yZaqxaA6zpUc+8SrKuqibHXcfewM9iJz+LnfwsdtrbP4s+UzebgWOmrR8NbBlhjCRpD+oT9PcAS5Icm2R/YAVw44wxNwLv666+OQl4uqp+a9pGkrTnjDx1U1Xbk1wI3AzsA1xeVQ8m+VDXfymwFlgObASeB97fv+QFs9dMI+0F/Cx28rPYyc9ip736s0jVwClzSVIjvDNWkhpn0EtS4wx6SWqcQQ8k2T/J+5K8o1v/syQXJ7kgyX7jrm8hJfnrJMfMPrJ9Sd6a5He75Zcn+ackX0vyqSSvHHd945Tkj5L8TZIzxl3LuCW5atw1zMZfxgJJrmbqCqRXAD8DDgSuB05j6jM6b3zVLawkTwM/B34IXANcV1XbxlvVeCR5EDihu8JsDVNXjv0nU38uTqiqd421wAWU5O6qWtot/xVwAXADcAbwtaq6aJz1LZQkMy8hD3Aq8F8AVXXOghc1BwY9kOT+qnpTkn2BHwNHVtWvkgS4r6reNOYSF0yS7wJ/CLwDeA9wDrCeqdC/vqqeHWN5CyrJQ1X1+m753qp6y7S+DVV14tiKW2BJvltVb+6W7wGWV9W2JAcAd1XVH4y3woWR5F7g+8AXmLrLP0z93VgBUFXfGl91wzl1M+Vl3U1fBzF1Vr/jv+W/A7ykpm6AqqpfV9UtVXU+cCTwb8AyYNN4S1twDyTZce/HfUkmAZK8Dvjl+Moai5clOSTJq5k6QdwGUFU/B7aPt7QFNcnUic8nmLoB9L+BX1TVt/bWkId+33XTksuAh5m68esTwHVJNgEnMfWtnC8lv/H9RFX1S6bucL4xycvHU9LYfAD4lyR/z9QXVt2Z5HGmvqjvA2OtbOG9kqmAC1BJXlNV/5fkQAZ/p1WTqurXwGeTXNe9P8mLIEeduukkORKgqrYkOZipqYv/raq7x1rYAkvyuqr6wbjr2JskOQj4Pab+Qm+uqifHXNJeI8krgMOr6n/GXcs4JDkLOLmqPj7uWnbFoJekxjlHL0mNM+glqXEGvSQ1zqCXpMYZ9JLUuP8HxYaI3Mtg3D4AAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"wine_test[\"quality\"].value_counts().sort_index(ascending=False).plot(kind=\"bar\")"
]
},
{
"cell_type": "markdown",
"id": "0d904976",
"metadata": {},
"source": [
"# Podział z wyróżnieniem data/remain"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "2f1c75ab",
"metadata": {},
"outputs": [],
"source": [
"X_train,X_rem,y_train,y_rem = train_test_split(wine.iloc[:,:-1],wine.iloc[:,-1], test_size=0.2, random_state=1,stratify=wine[\"quality\"])"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "c2b16170",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1279"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_train.value_counts().sum()"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "772560b4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"320"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_rem.value_counts().sum()"
]
},
{
"cell_type": "markdown",
"id": "e6bca841",
"metadata": {},
"source": [
"#### Mamy teraz podział 8:2, a chcemy mieć 8:1:1, więc pozostały zbiór dzielimy na pół\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "c6bca605",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(1279, 11)\n",
"(1279,)\n",
"(160, 11)\n",
"(160,)\n",
"(160, 11)\n",
"(160,)\n"
]
},
{
"data": {
"text/plain": [
"(None, None)"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_valid, X_test, y_valid, y_test = train_test_split(X_rem,y_rem, test_size=0.5)\n",
"\n",
"print(X_train.shape), print(y_train.shape)\n",
"print(X_valid.shape), print(y_valid.shape)\n",
"print(X_test.shape), print(y_test.shape)"
]
},
{
"cell_type": "markdown",
"id": "fd77e875",
"metadata": {},
"source": [
"## Normalizacja"
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "a4ac6f00",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import MinMaxScaler\n",
"norm = MinMaxScaler()\n",
"norm_fit = norm.fit(X_train)\n",
"norm_X_train = norm_fit.transform(X_train)\n",
"norm_X_test = norm_fit.transform(X_test)\n",
"norm_X_valid =norm_fit.transform(X_valid)"
]
},
{
"cell_type": "markdown",
"id": "599c4102",
"metadata": {},
"source": [
"###### Wygląd po normalizacji: mieści się w zakresie [0,1]"
]
},
{
"cell_type": "code",
"execution_count": 69,
"id": "be0d1121",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([0.26548673, 0.14049587, 0.62025316, 0.12328767, 0.17582418,\n",
" 0.33802817, 0.19081272, 0.51615272, 0.39130435, 0.16969697,\n",
" 0.26153846])"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"norm_X_train[1]"
]
},
{
"cell_type": "markdown",
"id": "1af8555b",
"metadata": {},
"source": [
"## Nie ma żadnych null'i do wypełnienia"
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "670062c0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"fixed acidity 0\n",
"volatile acidity 0\n",
"citric acid 0\n",
"residual sugar 0\n",
"chlorides 0\n",
"free sulfur dioxide 0\n",
"total sulfur dioxide 0\n",
"density 0\n",
"pH 0\n",
"sulphates 0\n",
"alcohol 0\n",
"quality 0\n",
"dtype: int64"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine.isnull().sum()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}