ium_444354/.ipynb_checkpoints/lab2-checkpoint.ipynb

1863 lines
76 KiB
Plaintext
Raw Normal View History

2022-03-15 11:50:54 +01:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "35674c19",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: opendatasets in c:\\users\\riraa\\anaconda3\\lib\\site-packages (0.1.20)\n",
"Requirement already satisfied: tqdm in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from opendatasets) (4.59.0)\n",
"Requirement already satisfied: click in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from opendatasets) (7.1.2)\n",
"Requirement already satisfied: kaggle in c:\\users\\riraa\\appdata\\roaming\\python\\python38\\site-packages (from opendatasets) (1.5.12)\n",
"Requirement already satisfied: requests in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from kaggle->opendatasets) (2.25.1)\n",
"Requirement already satisfied: six>=1.10 in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from kaggle->opendatasets) (1.15.0)\n",
"Requirement already satisfied: certifi in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from kaggle->opendatasets) (2020.12.5)\n",
"Requirement already satisfied: urllib3 in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from kaggle->opendatasets) (1.26.4)\n",
"Requirement already satisfied: python-slugify in c:\\users\\riraa\\appdata\\roaming\\python\\python38\\site-packages (from kaggle->opendatasets) (6.1.1)\n",
"Requirement already satisfied: python-dateutil in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from kaggle->opendatasets) (2.8.1)\n",
"Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\riraa\\appdata\\roaming\\python\\python38\\site-packages (from python-slugify->kaggle->opendatasets) (1.3)\n",
"Requirement already satisfied: idna<3,>=2.5 in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from requests->kaggle->opendatasets) (2.10)\n",
"Requirement already satisfied: chardet<5,>=3.0.2 in c:\\users\\riraa\\anaconda3\\lib\\site-packages (from requests->kaggle->opendatasets) (4.0.0)\n"
]
}
],
"source": [
"!pip install opendatasets\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "5e8e5ea8",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|█████████████████████████████████████████████████████████████████████████████| 25.6k/25.6k [00:00<00:00, 1.68MB/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading red-wine-quality-cortez-et-al-2009.zip to .\\red-wine-quality-cortez-et-al-2009\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"import opendatasets as od\n",
"od.download('https://www.kaggle.com/uciml/red-wine-quality-cortez-et-al-2009')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "1d0f072e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7.8</td>\n",
" <td>0.880</td>\n",
" <td>0.00</td>\n",
" <td>2.6</td>\n",
" <td>0.098</td>\n",
" <td>25.0</td>\n",
" <td>67.0</td>\n",
" <td>0.99680</td>\n",
" <td>3.20</td>\n",
" <td>0.68</td>\n",
" <td>9.8</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7.8</td>\n",
" <td>0.760</td>\n",
" <td>0.04</td>\n",
" <td>2.3</td>\n",
" <td>0.092</td>\n",
" <td>15.0</td>\n",
" <td>54.0</td>\n",
" <td>0.99700</td>\n",
" <td>3.26</td>\n",
" <td>0.65</td>\n",
" <td>9.8</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.2</td>\n",
" <td>0.280</td>\n",
" <td>0.56</td>\n",
" <td>1.9</td>\n",
" <td>0.075</td>\n",
" <td>17.0</td>\n",
" <td>60.0</td>\n",
" <td>0.99800</td>\n",
" <td>3.16</td>\n",
" <td>0.58</td>\n",
" <td>9.8</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1594</th>\n",
" <td>6.2</td>\n",
" <td>0.600</td>\n",
" <td>0.08</td>\n",
" <td>2.0</td>\n",
" <td>0.090</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99490</td>\n",
" <td>3.45</td>\n",
" <td>0.58</td>\n",
" <td>10.5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1595</th>\n",
" <td>5.9</td>\n",
" <td>0.550</td>\n",
" <td>0.10</td>\n",
" <td>2.2</td>\n",
" <td>0.062</td>\n",
" <td>39.0</td>\n",
" <td>51.0</td>\n",
" <td>0.99512</td>\n",
" <td>3.52</td>\n",
" <td>0.76</td>\n",
" <td>11.2</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1596</th>\n",
" <td>6.3</td>\n",
" <td>0.510</td>\n",
" <td>0.13</td>\n",
" <td>2.3</td>\n",
" <td>0.076</td>\n",
" <td>29.0</td>\n",
" <td>40.0</td>\n",
" <td>0.99574</td>\n",
" <td>3.42</td>\n",
" <td>0.75</td>\n",
" <td>11.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1597</th>\n",
" <td>5.9</td>\n",
" <td>0.645</td>\n",
" <td>0.12</td>\n",
" <td>2.0</td>\n",
" <td>0.075</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99547</td>\n",
" <td>3.57</td>\n",
" <td>0.71</td>\n",
" <td>10.2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1598</th>\n",
" <td>6.0</td>\n",
" <td>0.310</td>\n",
" <td>0.47</td>\n",
" <td>3.6</td>\n",
" <td>0.067</td>\n",
" <td>18.0</td>\n",
" <td>42.0</td>\n",
" <td>0.99549</td>\n",
" <td>3.39</td>\n",
" <td>0.66</td>\n",
" <td>11.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1599 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid residual sugar chlorides \\\n",
"0 7.4 0.700 0.00 1.9 0.076 \n",
"1 7.8 0.880 0.00 2.6 0.098 \n",
"2 7.8 0.760 0.04 2.3 0.092 \n",
"3 11.2 0.280 0.56 1.9 0.075 \n",
"4 7.4 0.700 0.00 1.9 0.076 \n",
"... ... ... ... ... ... \n",
"1594 6.2 0.600 0.08 2.0 0.090 \n",
"1595 5.9 0.550 0.10 2.2 0.062 \n",
"1596 6.3 0.510 0.13 2.3 0.076 \n",
"1597 5.9 0.645 0.12 2.0 0.075 \n",
"1598 6.0 0.310 0.47 3.6 0.067 \n",
"\n",
" free sulfur dioxide total sulfur dioxide density pH sulphates \\\n",
"0 11.0 34.0 0.99780 3.51 0.56 \n",
"1 25.0 67.0 0.99680 3.20 0.68 \n",
"2 15.0 54.0 0.99700 3.26 0.65 \n",
"3 17.0 60.0 0.99800 3.16 0.58 \n",
"4 11.0 34.0 0.99780 3.51 0.56 \n",
"... ... ... ... ... ... \n",
"1594 32.0 44.0 0.99490 3.45 0.58 \n",
"1595 39.0 51.0 0.99512 3.52 0.76 \n",
"1596 29.0 40.0 0.99574 3.42 0.75 \n",
"1597 32.0 44.0 0.99547 3.57 0.71 \n",
"1598 18.0 42.0 0.99549 3.39 0.66 \n",
"\n",
" alcohol quality \n",
"0 9.4 5 \n",
"1 9.8 5 \n",
"2 9.8 5 \n",
"3 9.8 6 \n",
"4 9.4 5 \n",
"... ... ... \n",
"1594 10.5 5 \n",
"1595 11.2 6 \n",
"1596 11.0 6 \n",
"1597 10.2 5 \n",
"1598 11.0 6 \n",
"\n",
"[1599 rows x 12 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"wine=pd.read_csv('./red-wine-quality-cortez-et-al-2009/winequality-red.csv')\n",
"wine"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "9a675582",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"8 17\n",
"7 193\n",
"6 618\n",
"5 660\n",
"4 51\n",
"3 10\n",
"Name: quality, dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.model_selection import train_test_split\n",
"wine_train, wine_test = train_test_split(wine, test_size=50, random_state=1,stratify=wine[\"quality\"])\n",
"wine_train[\"quality\"].value_counts().sort_index(ascending=False) "
]
},
{
"cell_type": "markdown",
"id": "e32b25a0",
"metadata": {},
"source": [
"## Wielkość zbioru i podzbiorów"
]
},
{
"cell_type": "markdown",
"id": "14c56dcd",
"metadata": {},
"source": [
"#### Dla całego zbioru"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "3197a613",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7.4</td>\n",
" <td>0.70</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.9978</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7.8</td>\n",
" <td>0.88</td>\n",
" <td>0.00</td>\n",
" <td>2.6</td>\n",
" <td>0.098</td>\n",
" <td>25.0</td>\n",
" <td>67.0</td>\n",
" <td>0.9968</td>\n",
" <td>3.20</td>\n",
" <td>0.68</td>\n",
" <td>9.8</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7.8</td>\n",
" <td>0.76</td>\n",
" <td>0.04</td>\n",
" <td>2.3</td>\n",
" <td>0.092</td>\n",
" <td>15.0</td>\n",
" <td>54.0</td>\n",
" <td>0.9970</td>\n",
" <td>3.26</td>\n",
" <td>0.65</td>\n",
" <td>9.8</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.2</td>\n",
" <td>0.28</td>\n",
" <td>0.56</td>\n",
" <td>1.9</td>\n",
" <td>0.075</td>\n",
" <td>17.0</td>\n",
" <td>60.0</td>\n",
" <td>0.9980</td>\n",
" <td>3.16</td>\n",
" <td>0.58</td>\n",
" <td>9.8</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7.4</td>\n",
" <td>0.70</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.9978</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid residual sugar chlorides \\\n",
"0 7.4 0.70 0.00 1.9 0.076 \n",
"1 7.8 0.88 0.00 2.6 0.098 \n",
"2 7.8 0.76 0.04 2.3 0.092 \n",
"3 11.2 0.28 0.56 1.9 0.075 \n",
"4 7.4 0.70 0.00 1.9 0.076 \n",
"\n",
" free sulfur dioxide total sulfur dioxide density pH sulphates \\\n",
"0 11.0 34.0 0.9978 3.51 0.56 \n",
"1 25.0 67.0 0.9968 3.20 0.68 \n",
"2 15.0 54.0 0.9970 3.26 0.65 \n",
"3 17.0 60.0 0.9980 3.16 0.58 \n",
"4 11.0 34.0 0.9978 3.51 0.56 \n",
"\n",
" alcohol quality \n",
"0 9.4 5 \n",
"1 9.8 5 \n",
"2 9.8 5 \n",
"3 9.8 6 \n",
"4 9.4 5 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine.head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "18dcd194",
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>8.319637</td>\n",
" <td>0.527821</td>\n",
" <td>0.270976</td>\n",
" <td>2.538806</td>\n",
" <td>0.087467</td>\n",
" <td>15.874922</td>\n",
" <td>46.467792</td>\n",
" <td>0.996747</td>\n",
" <td>3.311113</td>\n",
" <td>0.658149</td>\n",
" <td>10.422983</td>\n",
" <td>5.636023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.741096</td>\n",
" <td>0.179060</td>\n",
" <td>0.194801</td>\n",
" <td>1.409928</td>\n",
" <td>0.047065</td>\n",
" <td>10.460157</td>\n",
" <td>32.895324</td>\n",
" <td>0.001887</td>\n",
" <td>0.154386</td>\n",
" <td>0.169507</td>\n",
" <td>1.065668</td>\n",
" <td>0.807569</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>4.600000</td>\n",
" <td>0.120000</td>\n",
" <td>0.000000</td>\n",
" <td>0.900000</td>\n",
" <td>0.012000</td>\n",
" <td>1.000000</td>\n",
" <td>6.000000</td>\n",
" <td>0.990070</td>\n",
" <td>2.740000</td>\n",
" <td>0.330000</td>\n",
" <td>8.400000</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>7.100000</td>\n",
" <td>0.390000</td>\n",
" <td>0.090000</td>\n",
" <td>1.900000</td>\n",
" <td>0.070000</td>\n",
" <td>7.000000</td>\n",
" <td>22.000000</td>\n",
" <td>0.995600</td>\n",
" <td>3.210000</td>\n",
" <td>0.550000</td>\n",
" <td>9.500000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>7.900000</td>\n",
" <td>0.520000</td>\n",
" <td>0.260000</td>\n",
" <td>2.200000</td>\n",
" <td>0.079000</td>\n",
" <td>14.000000</td>\n",
" <td>38.000000</td>\n",
" <td>0.996750</td>\n",
" <td>3.310000</td>\n",
" <td>0.620000</td>\n",
" <td>10.200000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>9.200000</td>\n",
" <td>0.640000</td>\n",
" <td>0.420000</td>\n",
" <td>2.600000</td>\n",
" <td>0.090000</td>\n",
" <td>21.000000</td>\n",
" <td>62.000000</td>\n",
" <td>0.997835</td>\n",
" <td>3.400000</td>\n",
" <td>0.730000</td>\n",
" <td>11.100000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>15.900000</td>\n",
" <td>1.580000</td>\n",
" <td>1.000000</td>\n",
" <td>15.500000</td>\n",
" <td>0.611000</td>\n",
" <td>72.000000</td>\n",
" <td>289.000000</td>\n",
" <td>1.003690</td>\n",
" <td>4.010000</td>\n",
" <td>2.000000</td>\n",
" <td>14.900000</td>\n",
" <td>8.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid residual sugar \\\n",
"count 1599.000000 1599.000000 1599.000000 1599.000000 \n",
"mean 8.319637 0.527821 0.270976 2.538806 \n",
"std 1.741096 0.179060 0.194801 1.409928 \n",
"min 4.600000 0.120000 0.000000 0.900000 \n",
"25% 7.100000 0.390000 0.090000 1.900000 \n",
"50% 7.900000 0.520000 0.260000 2.200000 \n",
"75% 9.200000 0.640000 0.420000 2.600000 \n",
"max 15.900000 1.580000 1.000000 15.500000 \n",
"\n",
" chlorides free sulfur dioxide total sulfur dioxide density \\\n",
"count 1599.000000 1599.000000 1599.000000 1599.000000 \n",
"mean 0.087467 15.874922 46.467792 0.996747 \n",
"std 0.047065 10.460157 32.895324 0.001887 \n",
"min 0.012000 1.000000 6.000000 0.990070 \n",
"25% 0.070000 7.000000 22.000000 0.995600 \n",
"50% 0.079000 14.000000 38.000000 0.996750 \n",
"75% 0.090000 21.000000 62.000000 0.997835 \n",
"max 0.611000 72.000000 289.000000 1.003690 \n",
"\n",
" pH sulphates alcohol quality \n",
"count 1599.000000 1599.000000 1599.000000 1599.000000 \n",
"mean 3.311113 0.658149 10.422983 5.636023 \n",
"std 0.154386 0.169507 1.065668 0.807569 \n",
"min 2.740000 0.330000 8.400000 3.000000 \n",
"25% 3.210000 0.550000 9.500000 5.000000 \n",
"50% 3.310000 0.620000 10.200000 6.000000 \n",
"75% 3.400000 0.730000 11.100000 6.000000 \n",
"max 4.010000 2.000000 14.900000 8.000000 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine.describe()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "0948ca45",
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"8 18\n",
"7 199\n",
"6 638\n",
"5 681\n",
"4 53\n",
"3 10\n",
"Name: quality, dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine[\"quality\"].value_counts().sort_index(ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "7245500d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD1CAYAAACrz7WZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQ6klEQVR4nO3df6xfd13H8edrLauMnxu7q2UttiQF6ZRtcC2YGSMUWWFmXYzTixEbUqx/FMFoYjowEv5oMv9RSXQmzUBLRGqHLCuQAE1xGBNcufsBo/vhyjbWa0d7mfJDRgrt3v5xz8KX7nt7v+393vtdP3s+kuV8zvv7Oee8T9q87um533OWqkKS1JbzRt2AJGn4DHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYtHXUDABdffHGtXr161G1I0jnlzjvv/HZVjfX7bM5wT/Jq4F96Sq8E/gL4WFdfDTwK/HZV/W+3zQ3AFuAk8N6q+vzpjrF69WomJyfnPBFJ0k8k+eZsn815W6aqHqyqK6rqCuD1wJPArcB2YH9VrQX2d+skWQdMAJcBG4GbkiyZ70lIkgZ3pvfcNwDfqKpvApuAXV19F3BdN94E7K6q41X1CHAIWD+EXiVJAzrTcJ8APtGNl1fV4wDd8pKufilwuGebqa4mSVokA4d7kvOBa4Fb5prap/aMF9gk2ZpkMsnk9PT0oG1IkgZwJlfubwPuqqqj3frRJCsAuuWxrj4FrOrZbiVw5NSdVdXOqhqvqvGxsb6/7JUknaUzCfd38JNbMgB7gc3deDNwW099IsmyJGuAtcCB+TYqSRrcQN9zT3IB8OvAH/aUbwT2JNkCPAZcD1BVB5PsAe4DTgDbqurkULuWJJ3WQOFeVU8CLzul9gQz357pN38HsGPe3UmSzsqz4glVqUWrt392UY/36I3XLOrx9Ozmu2UkqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CBfHKaR8cVa0sLxyl2SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYNFO5JXprkk0keSHJ/kl9OclGSfUke6pYX9sy/IcmhJA8muXrh2pck9TPolfuHgc9V1c8DlwP3A9uB/VW1FtjfrZNkHTABXAZsBG5KsmTYjUuSZjdnuCd5MfCrwEcAqupHVfUdYBOwq5u2C7iuG28CdlfV8ap6BDgErB9u25Kk0xnkyv2VwDTwD0nuTnJzkhcAy6vqcYBueUk3/1LgcM/2U11NkrRIBgn3pcDrgL+vqiuBH9DdgplF+tTqGZOSrUkmk0xOT08P1KwkaTCDhPsUMFVVd3Trn2Qm7I8mWQHQLY/1zF/Vs/1K4MipO62qnVU1XlXjY2NjZ9u/JKmPOcO9qr4FHE7y6q60AbgP2Ats7mqbgdu68V5gIsmyJGuAtcCBoXYtSTqtQV/5+0fAx5OcDzwMvIuZHwx7kmwBHgOuB6iqg0n2MPMD4ASwrapODr1zSdKsBgr3qroHGO/z0YZZ5u8Adpx9W5Kk+fAJVUlqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1KCBwj3Jo0nuTXJPksmudlGSfUke6pYX9sy/IcmhJA8muXqhmpck9XcmV+5vqqorqmq8W98O7K+qtcD+bp0k64AJ4DJgI3BTkiVD7FmSNIf53JbZBOzqxruA63rqu6vqeFU9AhwC1s/jOJKkMzRouBfwhSR3Jtna1ZZX1eMA3fKSrn4pcLhn26mu9lOSbE0ymWRyenr67LqXJPW1dMB5V1XVkSSXAPuSPHCauelTq2cUqnYCOwHGx8ef8bkk6ewNdOVeVUe65THgVmZusxxNsgKgWx7rpk8Bq3o2XwkcGVbDkqS5zRnuSV6Q5EVPj4G3Al8H9gKbu2mbgdu68V5gIsmyJGuAtcCBYTcuSZrdILdllgO3Jnl6/j9X1eeSfAXYk2QL8BhwPUBVHUyyB7gPOAFsq6qTC9K9JKmvOcO9qh4GLu9TfwLYMMs2O4Ad8+5OknRWfEJVkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUEDh3uSJUnuTvKZbv2iJPuSPNQtL+yZe0OSQ0keTHL1QjQuSZrdmVy5vw+4v2d9O7C/qtYC+7t1kqwDJoDLgI3ATUmWDKddSdIgBgr3JCuBa4Cbe8qbgF3deBdwXU99d1Udr6pHgEPA+qF0K0kayKBX7n8D/BnwVE9teVU9DtAtL+nqlwKHe+ZNdTVJ0iKZM9yT/AZwrKruHHCf6VOrPvvdmmQyyeT09PSAu5YkDWKQK/ergGuTPArsBt6c5J+Ao0lWAHTLY938KWBVz/YrgSOn7rSqdlbVeFWNj42NzeMUJEmnmjPcq+qGqlpZVauZ+UXpF6vq94C9wOZu2mbgtm68F5hIsizJGmAtcGDonUuSZrV0HtveCOxJsgV4DLgeoKoOJtkD3AecALZV1cl5dypJGtgZhXtV3Q7c3o2fADbMMm8HsGOevUmSzpJPqEpSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1aM5wT/IzSQ4k+WqSg0k+1NUvSrIvyUPd8sKebW5IcijJg0muXsgTkCQ90yBX7seBN1fV5cAVwMYkbwS2A/urai2wv1snyTpgArgM2AjclGTJAvQuSZrFnOFeM/6vW31e918Bm4BdXX0XcF033gTsrqrjVfUIcAhYP8ymJUmnN9A99yRLktwDHAP2VdUdwPKqehygW17STb8UONyz+VRXkyQtkoHCvapOVtUVwEpgfZJfOM309NvFMyYlW5NMJpmcnp4eqFlJ0mDO6NsyVfUd4HZm7qUfTbICoFse66ZNAat6NlsJHOmzr51VNV5V42NjY2feuSRpVoN8W2YsyUu78fOBtwAPAHuBzd20zcBt3XgvMJFkWZI1wFrgwJD7liSdxtIB5qwAdnXfeDkP2FNVn0nyZWBPki3AY8D1AFV1MMke4D7gBLCtqk4uTPuSpH7mDPeq+hpwZZ/6E8CGWbbZAeyYd3eSpLPiE6qS1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBc4Z7klVJ/i3J/UkOJnlfV78oyb4kD3XLC3u2uSHJoSQPJrl6IU9AkvRMg1y5nwD+tKpeA7wR2JZkHbAd2F9Va4H93TrdZxPAZcBG4KYkSxaieUlSf3OGe1U9XlV3dePvA/cDlwKbgF3dtF3Add14E7C7qo5X1SPAIWD9kPuWJJ3GGd1zT7IauBK4A1heVY/DzA8A4JJu2qXA4Z7NprqaJGmRLB10YpIXAv8K/HFVfS/JrFP71KrP/rYCWwFe8YpXDNrGc8rq7Z9d1OM9euM1i3o8SQtnoCv3JM9jJtg/XlWf6spHk6zoPl8BHOvqU8Cqns1XAkdO3WdV7ayq8aoaHxsbO9v+JUl9DPJtmQAfAe6vqr/q+WgvsLkbbwZu66lPJFmWZA2wFjgwvJYlSXMZ5LbMVcA7gXuT3NPV3g/cCOxJsgV4DLgeoKoOJtkD3MfMN222VdXJYTcuSZrdnOFeVf9B//voABtm2WYHsGMefUmS5sEnVCWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaNGe
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"wine[\"quality\"].value_counts().sort_index(ascending=False).plot(kind=\"bar\")"
]
},
{
"cell_type": "markdown",
"id": "931ba82d",
"metadata": {},
"source": [
"#### Dla podzbioru *train*"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "f2d00efe",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1453</th>\n",
" <td>7.6</td>\n",
" <td>0.49</td>\n",
" <td>0.33</td>\n",
" <td>1.9</td>\n",
" <td>0.074</td>\n",
" <td>27.0</td>\n",
" <td>85.0</td>\n",
" <td>0.99706</td>\n",
" <td>3.41</td>\n",
" <td>0.58</td>\n",
" <td>9.0</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1295</th>\n",
" <td>6.6</td>\n",
" <td>0.63</td>\n",
" <td>0.00</td>\n",
" <td>4.3</td>\n",
" <td>0.093</td>\n",
" <td>51.0</td>\n",
" <td>77.5</td>\n",
" <td>0.99558</td>\n",
" <td>3.20</td>\n",
" <td>0.45</td>\n",
" <td>9.5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>778</th>\n",
" <td>8.3</td>\n",
" <td>0.43</td>\n",
" <td>0.30</td>\n",
" <td>3.4</td>\n",
" <td>0.079</td>\n",
" <td>7.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99788</td>\n",
" <td>3.36</td>\n",
" <td>0.61</td>\n",
" <td>10.5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>692</th>\n",
" <td>8.6</td>\n",
" <td>0.49</td>\n",
" <td>0.51</td>\n",
" <td>2.0</td>\n",
" <td>0.422</td>\n",
" <td>16.0</td>\n",
" <td>62.0</td>\n",
" <td>0.99790</td>\n",
" <td>3.03</td>\n",
" <td>1.17</td>\n",
" <td>9.0</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>166</th>\n",
" <td>6.8</td>\n",
" <td>0.64</td>\n",
" <td>0.10</td>\n",
" <td>2.1</td>\n",
" <td>0.085</td>\n",
" <td>18.0</td>\n",
" <td>101.0</td>\n",
" <td>0.99560</td>\n",
" <td>3.34</td>\n",
" <td>0.52</td>\n",
" <td>10.2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid residual sugar chlorides \\\n",
"1453 7.6 0.49 0.33 1.9 0.074 \n",
"1295 6.6 0.63 0.00 4.3 0.093 \n",
"778 8.3 0.43 0.30 3.4 0.079 \n",
"692 8.6 0.49 0.51 2.0 0.422 \n",
"166 6.8 0.64 0.10 2.1 0.085 \n",
"\n",
" free sulfur dioxide total sulfur dioxide density pH sulphates \\\n",
"1453 27.0 85.0 0.99706 3.41 0.58 \n",
"1295 51.0 77.5 0.99558 3.20 0.45 \n",
"778 7.0 34.0 0.99788 3.36 0.61 \n",
"692 16.0 62.0 0.99790 3.03 1.17 \n",
"166 18.0 101.0 0.99560 3.34 0.52 \n",
"\n",
" alcohol quality \n",
"1453 9.0 5 \n",
"1295 9.5 5 \n",
"778 10.5 5 \n",
"692 9.0 5 \n",
"166 10.2 5 "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_train.head()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "e074e787",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1549.000000</td>\n",
" <td>1549.000000</td>\n",
" <td>1549.000000</td>\n",
" <td>1549.000000</td>\n",
" <td>1549.000000</td>\n",
" <td>1549.000000</td>\n",
" <td>1549.000000</td>\n",
" <td>1549.000000</td>\n",
" <td>1549.000000</td>\n",
" <td>1549.000000</td>\n",
" <td>1549.000000</td>\n",
" <td>1549.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>8.327566</td>\n",
" <td>0.528128</td>\n",
" <td>0.271252</td>\n",
" <td>2.529987</td>\n",
" <td>0.086944</td>\n",
" <td>15.832150</td>\n",
" <td>46.415107</td>\n",
" <td>0.996746</td>\n",
" <td>3.310484</td>\n",
" <td>0.656727</td>\n",
" <td>10.419141</td>\n",
" <td>5.635249</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.744692</td>\n",
" <td>0.180152</td>\n",
" <td>0.194249</td>\n",
" <td>1.380202</td>\n",
" <td>0.043732</td>\n",
" <td>10.450522</td>\n",
" <td>32.884454</td>\n",
" <td>0.001877</td>\n",
" <td>0.154269</td>\n",
" <td>0.166558</td>\n",
" <td>1.067245</td>\n",
" <td>0.807313</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>4.600000</td>\n",
" <td>0.120000</td>\n",
" <td>0.000000</td>\n",
" <td>0.900000</td>\n",
" <td>0.012000</td>\n",
" <td>1.000000</td>\n",
" <td>6.000000</td>\n",
" <td>0.990070</td>\n",
" <td>2.860000</td>\n",
" <td>0.330000</td>\n",
" <td>8.400000</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>7.100000</td>\n",
" <td>0.390000</td>\n",
" <td>0.090000</td>\n",
" <td>1.900000</td>\n",
" <td>0.070000</td>\n",
" <td>7.000000</td>\n",
" <td>22.000000</td>\n",
" <td>0.995600</td>\n",
" <td>3.210000</td>\n",
" <td>0.550000</td>\n",
" <td>9.500000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>7.900000</td>\n",
" <td>0.520000</td>\n",
" <td>0.260000</td>\n",
" <td>2.200000</td>\n",
" <td>0.079000</td>\n",
" <td>13.000000</td>\n",
" <td>38.000000</td>\n",
" <td>0.996750</td>\n",
" <td>3.310000</td>\n",
" <td>0.620000</td>\n",
" <td>10.100000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>9.200000</td>\n",
" <td>0.640000</td>\n",
" <td>0.430000</td>\n",
" <td>2.600000</td>\n",
" <td>0.090000</td>\n",
" <td>21.000000</td>\n",
" <td>62.000000</td>\n",
" <td>0.997860</td>\n",
" <td>3.400000</td>\n",
" <td>0.730000</td>\n",
" <td>11.100000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>15.900000</td>\n",
" <td>1.580000</td>\n",
" <td>0.790000</td>\n",
" <td>15.500000</td>\n",
" <td>0.467000</td>\n",
" <td>72.000000</td>\n",
" <td>289.000000</td>\n",
" <td>1.003690</td>\n",
" <td>4.010000</td>\n",
" <td>1.980000</td>\n",
" <td>14.900000</td>\n",
" <td>8.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid residual sugar \\\n",
"count 1549.000000 1549.000000 1549.000000 1549.000000 \n",
"mean 8.327566 0.528128 0.271252 2.529987 \n",
"std 1.744692 0.180152 0.194249 1.380202 \n",
"min 4.600000 0.120000 0.000000 0.900000 \n",
"25% 7.100000 0.390000 0.090000 1.900000 \n",
"50% 7.900000 0.520000 0.260000 2.200000 \n",
"75% 9.200000 0.640000 0.430000 2.600000 \n",
"max 15.900000 1.580000 0.790000 15.500000 \n",
"\n",
" chlorides free sulfur dioxide total sulfur dioxide density \\\n",
"count 1549.000000 1549.000000 1549.000000 1549.000000 \n",
"mean 0.086944 15.832150 46.415107 0.996746 \n",
"std 0.043732 10.450522 32.884454 0.001877 \n",
"min 0.012000 1.000000 6.000000 0.990070 \n",
"25% 0.070000 7.000000 22.000000 0.995600 \n",
"50% 0.079000 13.000000 38.000000 0.996750 \n",
"75% 0.090000 21.000000 62.000000 0.997860 \n",
"max 0.467000 72.000000 289.000000 1.003690 \n",
"\n",
" pH sulphates alcohol quality \n",
"count 1549.000000 1549.000000 1549.000000 1549.000000 \n",
"mean 3.310484 0.656727 10.419141 5.635249 \n",
"std 0.154269 0.166558 1.067245 0.807313 \n",
"min 2.860000 0.330000 8.400000 3.000000 \n",
"25% 3.210000 0.550000 9.500000 5.000000 \n",
"50% 3.310000 0.620000 10.100000 6.000000 \n",
"75% 3.400000 0.730000 11.100000 6.000000 \n",
"max 4.010000 1.980000 14.900000 8.000000 "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_train.describe()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "34f511dd",
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"8 17\n",
"7 193\n",
"6 618\n",
"5 660\n",
"4 51\n",
"3 10\n",
"Name: quality, dtype: int64"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_train[\"quality\"].value_counts().sort_index(ascending=False) #indexy oznaczają jakość wina"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "466eb483",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD1CAYAAACrz7WZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAPwUlEQVR4nO3dbawcV33H8e8vNrg8kzQ3rhObOkiG4rQkgVtDlaoqGBGXVDiqGtVUpRYK9RvTUrVS5UAlxAtL6RtapDaVrEDrqoBlKCgGJMAyDVUlGnNDAsF5aEySxrcO9oWKhwIy2Pn3xU7Uxdnru/HdvZt7/P1I0Zw5c2bmP7L12/HZnUmqCklSWy6adAGSpNEz3CWpQYa7JDXIcJekBhnuktQgw12SGrRy0gUAXHrppbV+/fpJlyFJy8rdd9/97aqaGrTtWRHu69evZ2ZmZtJlSNKykuS/5tvmtIwkNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQc+Kh5ikFq3f9dklPd9jt96wpOfTs5t37pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ3yrZCaGN+aKI3PUHfuSV6a5BNJHkzyQJJfS3JJkoNJHu6WF/eNvyXJ0SQPJbl+fOVLkgYZdlrmg8DnquqXgKuBB4BdwKGq2gAc6tZJshHYBlwFbAFuS7Ji1IVLkua3YLgneTHwG8CHAKrqJ1X1XWArsLcbthe4sWtvBfZV1amqehQ4CmwabdmSpHMZ5s795cAc8A9J7klye5IXAKur6gmAbnlZN/4K4Fjf/rNdnyRpiQwT7iuB1wB/X1XXAj+km4KZRwb01dMGJTuSzCSZmZubG6pYSdJwhgn3WWC2qu7q1j9BL+xPJFkD0C1P9o1f17f/WuD42Qetqj1VNV1V01NTU+dbvyRpgAXDvaq+BRxL8squazNwP3AA2N71bQfu6NoHgG1JViW5EtgAHB5p1ZKkcxr2d+5/DHwkyXOBR4B30Ptg2J/kZuBx4CaAqjqSZD+9D4DTwM6qOjPyyiVJ8xoq3KvqXmB6wKbN84zfDew+/7IkSYvh6wckqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJatBQ4Z7ksST3Jbk3yUzXd0mSg0ke7pYX942/JcnRJA8luX5cxUuSBnsmd+5vqKprqmq6W98FHKqqDcChbp0kG4FtwFXAFuC2JCtGWLMkaQGLmZbZCuzt2nuBG/v691XVqap6FDgKbFrEeSRJz9Cw4V7AF5LcnWRH17e6qp4A6JaXdf1XAMf69p3t+iRJS2TlkOOuq6rjSS4DDiZ58BxjM6Cvnjao9yGxA+BlL3vZkGVIkoYx1J17VR3vlieBT9GbZjmRZA1AtzzZDZ8F1vXtvhY4PuCYe6pquqqmp6amzv8KJElPs2C4J3lBkhc91QbeDHwDOABs74ZtB+7o2geAbUlWJbkS2AAcHnXhkqT5DTMtsxr4VJKnxn+0qj6X5CvA/iQ3A48DNwFU1ZEk+4H7gdPAzqo6M5bqJUkDLRjuVfUIcPWA/u8Am+fZZzewe9HVSZLOi0+oSlKDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBg0d7klWJLknyWe69UuSHEzycLe8uG/sLUmOJnkoyfXjKFySNL9ncuf+buCBvvVdwKGq2gAc6tZJshHYBlwFbAFuS7JiNOVKkoYxVLgnWQvcANze170V2Nu19wI39vXvq6pTVfUocBTYNJJqJUlDGfbO/W+AvwCe7OtbXVVPAHTLy7r+K4BjfeNmuz5J0hJZMNyT/DZwsqruHvKYGdBXA467I8lMkpm5ubkhDy1JGsYwd+7XAW9N8hiwD3hjkn8GTiRZA9AtT3bjZ4F1ffuvBY6ffdCq2lNV01U1PTU1tYhLkCSdbcFwr6pbqmptVa2n90XpF6vqD4ADwPZu2Hbgjq59ANiWZFWSK4ENwOGRVy5JmtfKRex7K7A/yc3A48BNAFV1JMl+4H7gNLCzqs4sulJJ0tCeUbhX1Z3AnV37O8DmecbtBnYvsjZJ0nnyCVVJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGrRguCf5uSSHk3wtyZEk7+/6L0lyMMnD3fLivn1uSXI0yUNJrh/nBUiSnm6YO/dTwBur6mrgGmBLktcDu4BDVbUBONStk2QjsA24CtgC3JZkxRhqlyTNY8Fwr57/7Vaf0/1XwFZgb9e/F7ixa28F9lXVqap6FDgKbBpl0ZKkcxtqzj3JiiT3AieBg1V1F7C6qp4A6JaXdcOvAI717T7b9Z19zB1JZpLMzM3NLeISJElnGyrcq+pMVV0DrAU2JfnlcwzPoEMMOOaeqpququmpqamhipUkDecZ/Vqmqr4L3ElvLv1EkjUA3fJkN2wWWNe321rg+GILlSQNb5hfy0wleWnXfh7wJuBB4ACwvRu2Hbijax8AtiVZleRKYANweMR1S5LOYeUQY9YAe7tfvFwE7K+qzyT5MrA/yc3A48BNAFV1JMl+4H7gNLCzqs6Mp3xJ0iALhntVfR24dkD/d4DN8+yzG9i96OokSefFJ1QlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDFgz3JOuS/GuSB5IcSfLurv+SJAeTPNwtL+7b55YkR5M8lOT6cV6AJOnphrlzPw38eVW9Cng9sDPJRmAXcKiqNgCHunW6bduAq4AtwG1JVoyjeEnSYAuGe1U9UVVf7do/AB4ArgC2Anu7YXuBG7v2VmBfVZ2qqkeBo8CmEdctSTqHZzTnnmQ9cC1wF7C6qp6A3gcAcFk37ArgWN9us12fJGmJDB3uSV4I/Avwp1X1/XMNHdBXA463I8lMkpm5ublhy5AkDWGocE/yHHrB/pGq+mTXfSLJmm77GuBk1z8LrOvbfS1w/OxjVtWeqpququmpqanzrV+SNMDKhQYkCfAh4IGq+kDfpgPAduDWbnlHX/9Hk3wAuBzYABweZdEXivW7Pruk53vs1huW9HySxmfBcAeuA94O3Jfk3q7vPfRCfX+Sm4HHgZsAqupIkv3A/fR+abOzqs6MunBJ0vwWDPeq+ncGz6MDbJ5nn93A7kXUJUlaBJ9QlaQGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDVow3JN8OMnJJN/o67skycEkD3fLi/u23ZLkaJKHklw/rsIlSfMb5s79H4EtZ/XtAg5V1QbgULdOko3ANuCqbp/bkqwYWbWSpKEsGO5V9W/A/5zVvRXY27X3Ajf29e+rqlNV9ShwFNg0mlIlScM63zn31VX1BEC3vKzrvwI41jdutuuTJC2hUX+hmgF9NXBgsiPJTJKZubm5EZchSRe28w33E0nWAHTLk13/LLCub9xa4PigA1TVnqqarqrpqamp8yxDkjTI+Yb7AWB7194O3NHXvy3JqiRXAhuAw4srUZL0TK1caECSjwG/CVyaZBZ4H3ArsD/JzcDjwE0AVXUkyX7gfuA0sLOqzoypdknSPBYM96p62zybNs8zfjewezFFSZIWxydUJalBhrs
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"wine_train[\"quality\"].value_counts().sort_index(ascending=False).plot(kind=\"bar\")"
]
},
{
"cell_type": "markdown",
"id": "040a1d8b",
"metadata": {},
"source": [
"#### Dla podzbioru *test*\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "d6b697ec",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>856</th>\n",
" <td>9.3</td>\n",
" <td>0.36</td>\n",
" <td>0.39</td>\n",
" <td>1.5</td>\n",
" <td>0.080</td>\n",
" <td>41.0</td>\n",
" <td>55.0</td>\n",
" <td>0.99652</td>\n",
" <td>3.47</td>\n",
" <td>0.73</td>\n",
" <td>10.9</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1142</th>\n",
" <td>6.9</td>\n",
" <td>0.45</td>\n",
" <td>0.11</td>\n",
" <td>2.4</td>\n",
" <td>0.043</td>\n",
" <td>6.0</td>\n",
" <td>12.0</td>\n",
" <td>0.99354</td>\n",
" <td>3.30</td>\n",
" <td>0.65</td>\n",
" <td>11.4</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>538</th>\n",
" <td>12.9</td>\n",
" <td>0.35</td>\n",
" <td>0.49</td>\n",
" <td>5.8</td>\n",
" <td>0.066</td>\n",
" <td>5.0</td>\n",
" <td>35.0</td>\n",
" <td>1.00140</td>\n",
" <td>3.20</td>\n",
" <td>0.66</td>\n",
" <td>12.0</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1324</th>\n",
" <td>6.7</td>\n",
" <td>0.46</td>\n",
" <td>0.24</td>\n",
" <td>1.7</td>\n",
" <td>0.077</td>\n",
" <td>18.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99480</td>\n",
" <td>3.39</td>\n",
" <td>0.60</td>\n",
" <td>10.6</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>288</th>\n",
" <td>8.7</td>\n",
" <td>0.52</td>\n",
" <td>0.09</td>\n",
" <td>2.5</td>\n",
" <td>0.091</td>\n",
" <td>20.0</td>\n",
" <td>49.0</td>\n",
" <td>0.99760</td>\n",
" <td>3.34</td>\n",
" <td>0.86</td>\n",
" <td>10.6</td>\n",
" <td>7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid residual sugar chlorides \\\n",
"856 9.3 0.36 0.39 1.5 0.080 \n",
"1142 6.9 0.45 0.11 2.4 0.043 \n",
"538 12.9 0.35 0.49 5.8 0.066 \n",
"1324 6.7 0.46 0.24 1.7 0.077 \n",
"288 8.7 0.52 0.09 2.5 0.091 \n",
"\n",
" free sulfur dioxide total sulfur dioxide density pH sulphates \\\n",
"856 41.0 55.0 0.99652 3.47 0.73 \n",
"1142 6.0 12.0 0.99354 3.30 0.65 \n",
"538 5.0 35.0 1.00140 3.20 0.66 \n",
"1324 18.0 34.0 0.99480 3.39 0.60 \n",
"288 20.0 49.0 0.99760 3.34 0.86 \n",
"\n",
" alcohol quality \n",
"856 10.9 6 \n",
"1142 11.4 6 \n",
"538 12.0 7 \n",
"1324 10.6 6 \n",
"288 10.6 7 "
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_test.head()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "bc91d2fb",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>50.000000</td>\n",
" <td>50.000000</td>\n",
" <td>50.000000</td>\n",
" <td>50.000000</td>\n",
" <td>50.00000</td>\n",
" <td>50.000000</td>\n",
" <td>50.000000</td>\n",
" <td>50.000000</td>\n",
" <td>50.000000</td>\n",
" <td>50.000000</td>\n",
" <td>50.000000</td>\n",
" <td>50.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>8.074000</td>\n",
" <td>0.518300</td>\n",
" <td>0.262400</td>\n",
" <td>2.812000</td>\n",
" <td>0.10364</td>\n",
" <td>17.200000</td>\n",
" <td>48.100000</td>\n",
" <td>0.996779</td>\n",
" <td>3.330600</td>\n",
" <td>0.702200</td>\n",
" <td>10.542000</td>\n",
" <td>5.660000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.622899</td>\n",
" <td>0.142197</td>\n",
" <td>0.213155</td>\n",
" <td>2.137769</td>\n",
" <td>0.10746</td>\n",
" <td>10.777906</td>\n",
" <td>33.525653</td>\n",
" <td>0.002199</td>\n",
" <td>0.158338</td>\n",
" <td>0.242035</td>\n",
" <td>1.018621</td>\n",
" <td>0.823383</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>5.600000</td>\n",
" <td>0.310000</td>\n",
" <td>0.000000</td>\n",
" <td>1.500000</td>\n",
" <td>0.03800</td>\n",
" <td>3.000000</td>\n",
" <td>8.000000</td>\n",
" <td>0.992920</td>\n",
" <td>2.740000</td>\n",
" <td>0.370000</td>\n",
" <td>9.000000</td>\n",
" <td>4.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>6.900000</td>\n",
" <td>0.402500</td>\n",
" <td>0.095000</td>\n",
" <td>1.900000</td>\n",
" <td>0.07325</td>\n",
" <td>10.000000</td>\n",
" <td>25.250000</td>\n",
" <td>0.995445</td>\n",
" <td>3.260000</td>\n",
" <td>0.590000</td>\n",
" <td>9.725000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>7.650000</td>\n",
" <td>0.500000</td>\n",
" <td>0.245000</td>\n",
" <td>2.200000</td>\n",
" <td>0.08000</td>\n",
" <td>15.000000</td>\n",
" <td>36.500000</td>\n",
" <td>0.996560</td>\n",
" <td>3.320000</td>\n",
" <td>0.655000</td>\n",
" <td>10.350000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>9.150000</td>\n",
" <td>0.625000</td>\n",
" <td>0.400000</td>\n",
" <td>2.675000</td>\n",
" <td>0.08625</td>\n",
" <td>23.750000</td>\n",
" <td>62.000000</td>\n",
" <td>0.997600</td>\n",
" <td>3.400000</td>\n",
" <td>0.770000</td>\n",
" <td>11.175000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>12.900000</td>\n",
" <td>0.980000</td>\n",
" <td>1.000000</td>\n",
" <td>15.400000</td>\n",
" <td>0.61100</td>\n",
" <td>55.000000</td>\n",
" <td>143.000000</td>\n",
" <td>1.003690</td>\n",
" <td>3.710000</td>\n",
" <td>2.000000</td>\n",
" <td>12.800000</td>\n",
" <td>8.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid residual sugar \\\n",
"count 50.000000 50.000000 50.000000 50.000000 \n",
"mean 8.074000 0.518300 0.262400 2.812000 \n",
"std 1.622899 0.142197 0.213155 2.137769 \n",
"min 5.600000 0.310000 0.000000 1.500000 \n",
"25% 6.900000 0.402500 0.095000 1.900000 \n",
"50% 7.650000 0.500000 0.245000 2.200000 \n",
"75% 9.150000 0.625000 0.400000 2.675000 \n",
"max 12.900000 0.980000 1.000000 15.400000 \n",
"\n",
" chlorides free sulfur dioxide total sulfur dioxide density \\\n",
"count 50.00000 50.000000 50.000000 50.000000 \n",
"mean 0.10364 17.200000 48.100000 0.996779 \n",
"std 0.10746 10.777906 33.525653 0.002199 \n",
"min 0.03800 3.000000 8.000000 0.992920 \n",
"25% 0.07325 10.000000 25.250000 0.995445 \n",
"50% 0.08000 15.000000 36.500000 0.996560 \n",
"75% 0.08625 23.750000 62.000000 0.997600 \n",
"max 0.61100 55.000000 143.000000 1.003690 \n",
"\n",
" pH sulphates alcohol quality \n",
"count 50.000000 50.000000 50.000000 50.000000 \n",
"mean 3.330600 0.702200 10.542000 5.660000 \n",
"std 0.158338 0.242035 1.018621 0.823383 \n",
"min 2.740000 0.370000 9.000000 4.000000 \n",
"25% 3.260000 0.590000 9.725000 5.000000 \n",
"50% 3.320000 0.655000 10.350000 6.000000 \n",
"75% 3.400000 0.770000 11.175000 6.000000 \n",
"max 3.710000 2.000000 12.800000 8.000000 "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_test.describe()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "72ce755c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"8 1\n",
"7 6\n",
"6 20\n",
"5 21\n",
"4 2\n",
"Name: quality, dtype: int64"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_test[\"quality\"].value_counts().sort_index(ascending=False) #indexy oznaczają jakość wina"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "fc355d95",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD1CAYAAABeMT4pAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQu0lEQVR4nO3df6xfdX3H8edLfiwKTFAuyK9atlQiOkF3UzFsCYiQUgg4Y2a7RZnDVQ1kmvnHOl3c9h9mUTMHk3RCgISBMkExVIEwJ5KA0GJBEJDa4ahltGjkh5hg9b0/7ml6vX6/vbffc3u/5cPzkXzzPefz+Zxz3veb9tXTzz3ne1JVSJLa9bJxFyBJ2rMMeklqnEEvSY0z6CWpcQa9JDXOoJekxu077gIGOfTQQ2vx4sXjLkOSXjTWr1//VFVNDOrbK4N+8eLFrFu3btxlSNKLRpIfDetz6kaSGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUuL3yhilJe9bi1TeNuwQAHrvorHGX8JLgGb0kNc6gl6TGGfSS1DiDXpIaZ9BLUuNmDfokxyT5ZpKHkjyY5CNd+6uS3Jrk0e79kCHbL0vySJKNSVbP9w8gSdq1uZzRbwc+VlWvB04CLkhyPLAauK2qlgC3deu/Ick+wCXAmcDxwMpuW0nSApk16Kvqiaq6t1t+FngIOAo4F7iyG3Yl8M4Bmy8FNlbVpqp6Abi2206StEB2a44+yWLgzcB3gMOr6gmY+scAOGzAJkcBj09b39y1SZIWyJzvjE1yIPBl4KNV9UySOW02oK2G7H8VsApg0aJFcy1LmjPvBtVL1ZzO6JPsx1TIX11V13fNTyY5ous/Atg6YNPNwDHT1o8Gtgw6RlWtqarJqpqcmBj4fFtJ0gjmctVNgMuAh6rqM9O6bgTO65bPA746YPN7gCVJjk2yP7Ci206StEDmckZ/MvBe4O1JNnSv5cBFwOlJHgVO79ZJcmSStQBVtR24ELiZqV/ifqmqHtwDP4ckaYhZ5+ir6g4Gz7UDnDZg/BZg+bT1tcDaUQuUJPXjnbGS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMbN+uCRJJcDZwNbq+qNXdsXgeO6IQcDP6uqEwds+xjwLPArYHtVTc5L1ZKkOZs16IErgIuBq3Y0VNV7diwn+TTw9C62P7Wqnhq1QElSP3N5lODtSRYP6useHP6nwNvnuS5J0jzpO0f/x8CTVfXokP4CbkmyPsmqnseSJI1gLlM3u7ISuGYX/SdX1ZYkhwG3Jnm4qm4fNLD7h2AVwKJFi3qWJUnaYeQz+iT7Au8CvjhsTFVt6d63AjcAS3cxdk1VTVbV5MTExKhlSZJm6DN18w7g4araPKgzyQFJDtqxDJwBPNDjeJKkEcwa9EmuAe4EjkuyOcn5XdcKZkzbJDkyydpu9XDgjiT3AXcDN1XVN+avdEnSXMzlqpuVQ9r/YkDbFmB5t7wJOKFnfZKknrwzVpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUuLk8YeryJFuTPDCt7R+T/DjJhu61fMi2y5I8kmRjktXzWbgkaW7mckZ/BbBsQPtnq+rE7rV2ZmeSfYBLgDOB44GVSY7vU6wkaffNGvRVdTvw0xH2vRTYWFWbquoF4Frg3BH2I0nqoc8c/YVJ7u+mdg4Z0H8U8Pi09c1dmyRpAY0a9J8Hfh84EXgC+PSAMRnQVsN2mGRVknVJ1m3btm3EsiRJM40U9FX1ZFX9qqp+Dfw7U9M0M20Gjpm2fjSwZRf7XFNVk1U1OTExMUpZkqQBRgr6JEdMW/0T4IEBw+4BliQ5Nsn+wArgxlGOJ0ka3b6zDUhyDXAKcGiSzcA/AKckOZGpqZjHgA92Y48EvlBVy6tqe5ILgZuBfYDLq+rBPfFDSJKGmzXoq2rlgObLhozdAiyftr4W+K1LLyVJC8c7YyWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWrcrEHfPfx7a5IHprX9c5KHu4eD35Dk4CHbPpbke0k2JFk3j3VLkuZoLmf0VwDLZrTdCryxqt4E/AD4u11sf2pVnVhVk6OVKEnqY9agr6rbgZ/OaLulqrZ3q3cx9eBvSdJeaD7m6P8S+PqQvgJuSbI+yap5OJYkaTfN+szYXUnyCWA7cPWQISdX1ZYkhwG3Jnm4+x/CoH2tAlYBLFq0qE9ZkqRpRj6jT3IecDbw51VVg8Z0DwunqrYCNwBLh+2vqtZU1WRVTU5MTIxaliRphpGCPsky4G+Bc6rq+SFjDkhy0I5l4AzggUFjJUl7zlwur7wGuBM4LsnmJOcDFwMHMTUdsyHJpd3YI5Os7TY9HLgjyX3A3cBNVfWNPfJTSJKGmnWOvqpWDmi+bMjYLcDybnkTcEKv6iRJvXlnrCQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcXN5wtTlSbYmeWBa26uS3Jrk0e79kCHbLkvySJKNSVbPZ+GSpLmZyxn9FcCyGW2rgduqaglwW7f+G5LsA1wCnAkcD6xMcnyvaiVJu23WoK+q24Gfzmg+F7iyW74SeOeATZcCG6tqU1W9AFzbbSdJWkCjztEfXlVPAHTvhw0YcxTw+LT1zV2bJGkB7clfxmZAWw0dnKxKsi7Jum3btu3BsiTppWXUoH8yyREA3fvWAWM2A8dMWz8a2DJsh1W1pqomq2pyYmJixLIkSTONGvQ3Aud1y+cBXx0w5h5gSZJjk+wPrOi2kyQtoLlcXnkNcCdwXJLNSc4HLgJOT/IocHq3TpIjk6wFqKrtwIXAzcBDwJeq6sE982NIkobZd7YBVbVySNdpA8ZuAZZPW18LrB25OklSb94ZK0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklq3MhBn+S4JBumvZ5J8tEZY05J8vS0MZ/sXbEkabfM+oSpYarqEeBEgCT7AD8Gbhgw9NtVdfaox5Ek9TNfUzenAT+sqh/N0/4kSfNkvoJ+BXDNkL63JbkvydeTvGHYDpKsSrIuybpt27bNU1mSpN5Bn2R/4BzgugHd9wKvraoTgH8FvjJsP1W1pqomq2pyYmKib1mSpM58nNGfCdxbVU/O7KiqZ6rquW55LbBfkkPn4ZiSpDmaj6BfyZBpmySvSZJueWl3vJ/MwzElSXM08lU3AEleAZwOfHBa24cAqupS4N3Ah5NsB34BrKiq6nNMSdLu6RX0VfU88OoZbZdOW74YuLjPMSRJ/XhnrCQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNa7XnbHa+y1efdO4SwDgsYvOGncJ0kuWZ/SS1DiDXpIaZ9BLUuMMeklqnEEvSY3rFfRJHkvyvSQbkqwb0J8kn0uyMcn9Sd7S53iSpN03H5dXnlpVTw3pOxNY0r3eCny+e5ckLZA9PXVzLnBVTbkLODjJEXv4mJKkafoGfQG3JFmfZNWA/qOAx6etb+7aJEkLpO/UzclVtSXJYcCtSR6uqtun9WfANgMfDt79Q7EKYNGiRT3LkiTt0OuMvqq2dO9bgRuApTOGbAaOmbZ+NLBlyL7WVNVkVU1OTEz0KUuSNM3IQZ/kgCQH7VgGzgAemDHsRuB93dU3JwFPV9UTI1crSdptfaZuDgduSLJjP/9RVd9I8iGAqroUWAssBzYCzwPv71euJGl3jRz0VbUJOGFA+6XTlgu4YNRjSJL6885YSWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMekl
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"wine_test[\"quality\"].value_counts().sort_index(ascending=False).plot(kind=\"bar\")"
]
},
{
"cell_type": "markdown",
"id": "518f05c2",
"metadata": {},
"source": [
"## Normalizacja"
]
},
{
"cell_type": "markdown",
"id": "0d904976",
"metadata": {},
"source": [
"# Podział z wyróżnieniem data/target"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "2f1c75ab",
"metadata": {},
"outputs": [],
"source": [
"x_train,x_test,y_train,y_test = train_test_split(wine.iloc[:,:-1],wine.iloc[:,-1], test_size=0.2, random_state=1,stratify=wine[\"quality\"])"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "c2b16170",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1279"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_train.value_counts().sum()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "772560b4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"320"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_test.value_counts().sum()"
]
},
{
"cell_type": "markdown",
"id": "fd77e875",
"metadata": {},
"source": [
"## Normalizacja"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "a4ac6f00",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import MinMaxScaler\n",
"norm = MinMaxScaler()\n",
"norm_fit = norm.fit(x_train)\n",
"norm_x_train = norm_fit.transform(x_train)\n",
"norm_x_test = norm_fit.transform(x_test)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "be0d1121",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.31858407, 0.15702479, 0.50632911, 0.0890411 , 0.1010989 ,\n",
" 0.07042254, 0.01413428, 0.38839941, 0.39130435, 0.21212121,\n",
" 0.43076923],\n",
" [0.26548673, 0.14049587, 0.62025316, 0.12328767, 0.17582418,\n",
" 0.33802817, 0.19081272, 0.51615272, 0.39130435, 0.16969697,\n",
" 0.26153846],\n",
" [0.23893805, 0.17355372, 0.59493671, 0.08219178, 0.14285714,\n",
" 0.05633803, 0.01766784, 0.42070485, 0.40869565, 0.12121212,\n",
" 0.29230769],\n",
" [0.19469027, 0.31404959, 0.13924051, 0.04109589, 0.13846154,\n",
" 0.21126761, 0.15194346, 0.39500734, 0.43478261, 0.27878788,\n",
" 0.16923077],\n",
" [0.27433628, 0.65702479, 0.15189873, 0.0890411 , 0.28791209,\n",
" 0.08450704, 0.06007067, 0.46475771, 0.42608696, 0.19393939,\n",
" 0.27692308]])"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"norm_x_train[:5]"
]
},
{
"cell_type": "markdown",
"id": "1af8555b",
"metadata": {},
"source": [
"## Nie ma żadnych null'i do wypełnienia"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "670062c0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"fixed acidity 0\n",
"volatile acidity 0\n",
"citric acid 0\n",
"residual sugar 0\n",
"chlorides 0\n",
"free sulfur dioxide 0\n",
"total sulfur dioxide 0\n",
"density 0\n",
"pH 0\n",
"sulphates 0\n",
"alcohol 0\n",
"quality 0\n",
"dtype: int64"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine.isnull().sum()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}