ium_434788/Zadanie_02_434788.ipynb
2021-05-13 11:58:46 +02:00

3306 lines
150 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "IUM_1_434788.ipynb",
"provenance": [],
"collapsed_sections": [],
"toc_visible": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "shaFKPEixPn4"
},
"source": [
"# 1. Pobranie zbioru danych z Repozytorium"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "-03GDjWtxD7W",
"outputId": "3cefd33d-3ef4-4c16-963e-ffa6e9e781de"
},
"source": [
"!curl -OL https://git.wmi.amu.edu.pl/s434788/ium_434788/raw/branch/master/winequality-red.csv"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
" Dload Upload Total Spent Left Speed\n",
"100 98k 0 98k 0 0 74502 0 --:--:-- 0:00:01 --:--:-- 74502\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
},
"id": "sAUNi0ylxWUm",
"outputId": "fe879388-072d-4845-f3b5-f06a4fca5f1e"
},
"source": [
"import pandas as pd\n",
"wine=pd.read_csv('winequality-red.csv')\n",
"wine"
],
"execution_count": 2,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7.8</td>\n",
" <td>0.880</td>\n",
" <td>0.00</td>\n",
" <td>2.6</td>\n",
" <td>0.098</td>\n",
" <td>25.0</td>\n",
" <td>67.0</td>\n",
" <td>0.99680</td>\n",
" <td>3.20</td>\n",
" <td>0.68</td>\n",
" <td>9.8</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7.8</td>\n",
" <td>0.760</td>\n",
" <td>0.04</td>\n",
" <td>2.3</td>\n",
" <td>0.092</td>\n",
" <td>15.0</td>\n",
" <td>54.0</td>\n",
" <td>0.99700</td>\n",
" <td>3.26</td>\n",
" <td>0.65</td>\n",
" <td>9.8</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.2</td>\n",
" <td>0.280</td>\n",
" <td>0.56</td>\n",
" <td>1.9</td>\n",
" <td>0.075</td>\n",
" <td>17.0</td>\n",
" <td>60.0</td>\n",
" <td>0.99800</td>\n",
" <td>3.16</td>\n",
" <td>0.58</td>\n",
" <td>9.8</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1594</th>\n",
" <td>6.2</td>\n",
" <td>0.600</td>\n",
" <td>0.08</td>\n",
" <td>2.0</td>\n",
" <td>0.090</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99490</td>\n",
" <td>3.45</td>\n",
" <td>0.58</td>\n",
" <td>10.5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1595</th>\n",
" <td>5.9</td>\n",
" <td>0.550</td>\n",
" <td>0.10</td>\n",
" <td>2.2</td>\n",
" <td>0.062</td>\n",
" <td>39.0</td>\n",
" <td>51.0</td>\n",
" <td>0.99512</td>\n",
" <td>3.52</td>\n",
" <td>0.76</td>\n",
" <td>11.2</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1596</th>\n",
" <td>6.3</td>\n",
" <td>0.510</td>\n",
" <td>0.13</td>\n",
" <td>2.3</td>\n",
" <td>0.076</td>\n",
" <td>29.0</td>\n",
" <td>40.0</td>\n",
" <td>0.99574</td>\n",
" <td>3.42</td>\n",
" <td>0.75</td>\n",
" <td>11.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1597</th>\n",
" <td>5.9</td>\n",
" <td>0.645</td>\n",
" <td>0.12</td>\n",
" <td>2.0</td>\n",
" <td>0.075</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99547</td>\n",
" <td>3.57</td>\n",
" <td>0.71</td>\n",
" <td>10.2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1598</th>\n",
" <td>6.0</td>\n",
" <td>0.310</td>\n",
" <td>0.47</td>\n",
" <td>3.6</td>\n",
" <td>0.067</td>\n",
" <td>18.0</td>\n",
" <td>42.0</td>\n",
" <td>0.99549</td>\n",
" <td>3.39</td>\n",
" <td>0.66</td>\n",
" <td>11.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1599 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid ... sulphates alcohol quality\n",
"0 7.4 0.700 0.00 ... 0.56 9.4 5\n",
"1 7.8 0.880 0.00 ... 0.68 9.8 5\n",
"2 7.8 0.760 0.04 ... 0.65 9.8 5\n",
"3 11.2 0.280 0.56 ... 0.58 9.8 6\n",
"4 7.4 0.700 0.00 ... 0.56 9.4 5\n",
"... ... ... ... ... ... ... ...\n",
"1594 6.2 0.600 0.08 ... 0.58 10.5 5\n",
"1595 5.9 0.550 0.10 ... 0.76 11.2 6\n",
"1596 6.3 0.510 0.13 ... 0.75 11.0 6\n",
"1597 5.9 0.645 0.12 ... 0.71 10.2 5\n",
"1598 6.0 0.310 0.47 ... 0.66 11.0 6\n",
"\n",
"[1599 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 2
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4H-i6DJlxduP"
},
"source": [
"# 2. Podział na zbiory test/train przy pomocy SciKit + (poprawka z 26.03.2021 przy pomocy basha)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Rf49qKC-eqEU"
},
"source": [
"## 2.1 SciKit"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "nZO_naLatT0o"
},
"source": [
"Próbowałem również podzielić na podzbiory Train:Dev:Test 6:2:2 Przy pomocy basha ale uznałem, że wygodniejsze jest korzystanie z \"train_test_split()\". Docelowo podział będzie dokonywany na 4 zmienne ` X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)`, jednak chciałem zachować konwencje z przykładu, z ćwiczeń."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ebHl5Aw1uuK1"
},
"source": [
"https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html"
]
},
{
"cell_type": "code",
"metadata": {
"id": "X88VMhb0x3gJ"
},
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"wine_train, wine_test = train_test_split(wine, test_size=360,train_size=959, random_state=1)"
],
"execution_count": 3,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "OzjEfgNOyAWs",
"outputId": "7e7bb70f-2b1e-422c-9500-d411884d8d5a"
},
"source": [
"wine_test[\"quality\"].value_counts()"
],
"execution_count": 4,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"5 155\n",
"6 149\n",
"7 37\n",
"4 16\n",
"8 2\n",
"3 1\n",
"Name: quality, dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "SpQZIuSxyAd0",
"outputId": "96505a9a-d2e7-44a1-b2cf-ee40d6d7d3d0"
},
"source": [
"wine_train[\"quality\"].value_counts()"
],
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"5 400\n",
"6 388\n",
"7 125\n",
"4 30\n",
"8 11\n",
"3 5\n",
"Name: quality, dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 5
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "YK0491tAeupD"
},
"source": [
"## 2.2 Bash"
]
},
{
"cell_type": "code",
"metadata": {
"id": "1idNUz-9eyfJ"
},
"source": [
"!head -n 1 winequality-red.csv > header.csv\n",
"!tail -n +2 winequality-red.csv | shuf > data.shuffled\n",
"\n",
"!head -n 266 data.shuffled > wine.data.test\n",
"!head -n 532 data.shuffled | tail -n 266 > wine.data.dev\n",
"!tail -n +333 data.shuffled > wine.data.train\n",
"\n",
"!cat header.csv wine.data.test > test.csv\n",
"!cat header.csv wine.data.dev > dev.csv\n",
"!cat header.csv wine.data.train > train.csv"
],
"execution_count": 6,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "-C4RRDH2fFEp",
"outputId": "93944a72-838c-4e2b-a907-de4b0902fcb1"
},
"source": [
"!wc -l test.csv\n",
"!wc -l dev.csv\n",
"!wc -l train.csv"
],
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"text": [
"267 test.csv\n",
"267 dev.csv\n",
"1268 train.csv\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "wLlI-k_jfb70"
},
"source": [
"wine_test_bash=pd.read_csv('test.csv')\n",
"wine_dev_bash=pd.read_csv('dev.csv')\n",
"wine_train_bash=pd.read_csv('train.csv')"
],
"execution_count": 8,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "wAq8KmNdyNOm"
},
"source": [
"# 3. Statystyki dla zbiorów"
]
},
{
"cell_type": "code",
"metadata": {
"id": "Wcq9YSTfXbs1"
},
"source": [
"from matplotlib import pyplot as plt\n",
"import seaborn as sns"
],
"execution_count": 9,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "EjDFpgdPy_of"
},
"source": [
"## 3.1. Zbiór Train (bash)"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
},
"id": "SscUak3AydG0",
"outputId": "5f0bd8df-1753-4211-e3a6-8ce2685146f9"
},
"source": [
"wine_train_bash"
],
"execution_count": 10,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10.0</td>\n",
" <td>0.380</td>\n",
" <td>0.38</td>\n",
" <td>1.6</td>\n",
" <td>0.169</td>\n",
" <td>27.0</td>\n",
" <td>90.0</td>\n",
" <td>0.99914</td>\n",
" <td>3.15</td>\n",
" <td>0.65</td>\n",
" <td>8.5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>6.7</td>\n",
" <td>0.460</td>\n",
" <td>0.24</td>\n",
" <td>1.7</td>\n",
" <td>0.077</td>\n",
" <td>18.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99480</td>\n",
" <td>3.39</td>\n",
" <td>0.60</td>\n",
" <td>10.6</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7.2</td>\n",
" <td>0.695</td>\n",
" <td>0.13</td>\n",
" <td>2.0</td>\n",
" <td>0.076</td>\n",
" <td>12.0</td>\n",
" <td>20.0</td>\n",
" <td>0.99546</td>\n",
" <td>3.29</td>\n",
" <td>0.54</td>\n",
" <td>10.1</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>12.5</td>\n",
" <td>0.600</td>\n",
" <td>0.49</td>\n",
" <td>4.3</td>\n",
" <td>0.100</td>\n",
" <td>5.0</td>\n",
" <td>14.0</td>\n",
" <td>1.00100</td>\n",
" <td>3.25</td>\n",
" <td>0.74</td>\n",
" <td>11.9</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>8.3</td>\n",
" <td>0.560</td>\n",
" <td>0.22</td>\n",
" <td>2.4</td>\n",
" <td>0.082</td>\n",
" <td>10.0</td>\n",
" <td>86.0</td>\n",
" <td>0.99830</td>\n",
" <td>3.37</td>\n",
" <td>0.62</td>\n",
" <td>9.5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1262</th>\n",
" <td>7.8</td>\n",
" <td>0.560</td>\n",
" <td>0.12</td>\n",
" <td>2.0</td>\n",
" <td>0.082</td>\n",
" <td>7.0</td>\n",
" <td>28.0</td>\n",
" <td>0.99700</td>\n",
" <td>3.37</td>\n",
" <td>0.50</td>\n",
" <td>9.4</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1263</th>\n",
" <td>5.8</td>\n",
" <td>0.680</td>\n",
" <td>0.02</td>\n",
" <td>1.8</td>\n",
" <td>0.087</td>\n",
" <td>21.0</td>\n",
" <td>94.0</td>\n",
" <td>0.99440</td>\n",
" <td>3.54</td>\n",
" <td>0.52</td>\n",
" <td>10.0</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1264</th>\n",
" <td>7.7</td>\n",
" <td>0.630</td>\n",
" <td>0.08</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>15.0</td>\n",
" <td>27.0</td>\n",
" <td>0.99670</td>\n",
" <td>3.32</td>\n",
" <td>0.54</td>\n",
" <td>9.5</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1265</th>\n",
" <td>7.1</td>\n",
" <td>0.600</td>\n",
" <td>0.00</td>\n",
" <td>1.8</td>\n",
" <td>0.074</td>\n",
" <td>16.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99720</td>\n",
" <td>3.47</td>\n",
" <td>0.70</td>\n",
" <td>9.9</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1266</th>\n",
" <td>10.4</td>\n",
" <td>0.610</td>\n",
" <td>0.49</td>\n",
" <td>2.1</td>\n",
" <td>0.200</td>\n",
" <td>5.0</td>\n",
" <td>16.0</td>\n",
" <td>0.99940</td>\n",
" <td>3.16</td>\n",
" <td>0.63</td>\n",
" <td>8.4</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1267 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid ... sulphates alcohol quality\n",
"0 10.0 0.380 0.38 ... 0.65 8.5 5\n",
"1 6.7 0.460 0.24 ... 0.60 10.6 6\n",
"2 7.2 0.695 0.13 ... 0.54 10.1 5\n",
"3 12.5 0.600 0.49 ... 0.74 11.9 6\n",
"4 8.3 0.560 0.22 ... 0.62 9.5 5\n",
"... ... ... ... ... ... ... ...\n",
"1262 7.8 0.560 0.12 ... 0.50 9.4 6\n",
"1263 5.8 0.680 0.02 ... 0.52 10.0 5\n",
"1264 7.7 0.630 0.08 ... 0.54 9.5 6\n",
"1265 7.1 0.600 0.00 ... 0.70 9.9 6\n",
"1266 10.4 0.610 0.49 ... 0.63 8.4 3\n",
"\n",
"[1267 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 10
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "hZAn8j4byMF2",
"outputId": "c47596aa-0d54-490f-c892-6ee5987a372d"
},
"source": [
"wine_train_bash[\"quality\"].value_counts()"
],
"execution_count": 11,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"5 550\n",
"6 498\n",
"7 157\n",
"4 39\n",
"8 15\n",
"3 8\n",
"Name: quality, dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 11
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 297
},
"id": "EOEuj8sRyL8v",
"outputId": "d2f102f6-d10c-4dc4-ae3f-fd34dc4e5985"
},
"source": [
"wine_train_bash.describe(include='all')"
],
"execution_count": 12,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1267.000000</td>\n",
" <td>1267.000000</td>\n",
" <td>1267.000000</td>\n",
" <td>1267.000000</td>\n",
" <td>1267.000000</td>\n",
" <td>1267.000000</td>\n",
" <td>1267.000000</td>\n",
" <td>1267.000000</td>\n",
" <td>1267.000000</td>\n",
" <td>1267.000000</td>\n",
" <td>1267.000000</td>\n",
" <td>1267.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>8.344199</td>\n",
" <td>0.525888</td>\n",
" <td>0.273891</td>\n",
" <td>2.574033</td>\n",
" <td>0.087419</td>\n",
" <td>15.889897</td>\n",
" <td>46.146014</td>\n",
" <td>0.996799</td>\n",
" <td>3.310016</td>\n",
" <td>0.655730</td>\n",
" <td>10.396725</td>\n",
" <td>5.632991</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.789253</td>\n",
" <td>0.177804</td>\n",
" <td>0.196141</td>\n",
" <td>1.453463</td>\n",
" <td>0.046754</td>\n",
" <td>10.603674</td>\n",
" <td>32.734818</td>\n",
" <td>0.001893</td>\n",
" <td>0.154047</td>\n",
" <td>0.166206</td>\n",
" <td>1.042353</td>\n",
" <td>0.806931</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>4.700000</td>\n",
" <td>0.120000</td>\n",
" <td>0.000000</td>\n",
" <td>0.900000</td>\n",
" <td>0.012000</td>\n",
" <td>1.000000</td>\n",
" <td>6.000000</td>\n",
" <td>0.990070</td>\n",
" <td>2.740000</td>\n",
" <td>0.370000</td>\n",
" <td>8.400000</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>7.100000</td>\n",
" <td>0.390000</td>\n",
" <td>0.090000</td>\n",
" <td>1.900000</td>\n",
" <td>0.071000</td>\n",
" <td>7.000000</td>\n",
" <td>22.000000</td>\n",
" <td>0.995660</td>\n",
" <td>3.210000</td>\n",
" <td>0.550000</td>\n",
" <td>9.500000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>7.900000</td>\n",
" <td>0.520000</td>\n",
" <td>0.260000</td>\n",
" <td>2.200000</td>\n",
" <td>0.080000</td>\n",
" <td>13.000000</td>\n",
" <td>37.000000</td>\n",
" <td>0.996800</td>\n",
" <td>3.310000</td>\n",
" <td>0.620000</td>\n",
" <td>10.200000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>9.300000</td>\n",
" <td>0.640000</td>\n",
" <td>0.430000</td>\n",
" <td>2.600000</td>\n",
" <td>0.090000</td>\n",
" <td>22.000000</td>\n",
" <td>62.000000</td>\n",
" <td>0.997870</td>\n",
" <td>3.400000</td>\n",
" <td>0.730000</td>\n",
" <td>11.000000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>15.900000</td>\n",
" <td>1.580000</td>\n",
" <td>1.000000</td>\n",
" <td>15.500000</td>\n",
" <td>0.611000</td>\n",
" <td>72.000000</td>\n",
" <td>278.000000</td>\n",
" <td>1.003690</td>\n",
" <td>4.010000</td>\n",
" <td>2.000000</td>\n",
" <td>14.900000</td>\n",
" <td>8.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity ... alcohol quality\n",
"count 1267.000000 1267.000000 ... 1267.000000 1267.000000\n",
"mean 8.344199 0.525888 ... 10.396725 5.632991\n",
"std 1.789253 0.177804 ... 1.042353 0.806931\n",
"min 4.700000 0.120000 ... 8.400000 3.000000\n",
"25% 7.100000 0.390000 ... 9.500000 5.000000\n",
"50% 7.900000 0.520000 ... 10.200000 6.000000\n",
"75% 9.300000 0.640000 ... 11.000000 6.000000\n",
"max 15.900000 1.580000 ... 14.900000 8.000000\n",
"\n",
"[8 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 12
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "JWXJ2CZQuylE"
},
"source": [
"Testowy Wykres (quality, volatile acidity)"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 408
},
"id": "HbsfwCL7XpNe",
"outputId": "249d8110-1b17-41ad-e1b1-18b0aa12ff06"
},
"source": [
"fig = plt.figure(figsize = (10,6))\n",
"sns.barplot(x = 'quality', y = 'volatile acidity', data = wine_train_bash)"
],
"execution_count": 13,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f2504f98950>"
]
},
"metadata": {
"tags": []
},
"execution_count": 13
},
{
"output_type": "display_data",
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 720x432 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "1W_oRCVczIgJ"
},
"source": [
"## 3.2. Zbiór Test (bash)"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
},
"id": "LJzygNqKzOWY",
"outputId": "d4f8dd3b-793c-4e02-a6ea-fbdb8fbf7a19"
},
"source": [
"wine_test_bash"
],
"execution_count": 14,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7.1</td>\n",
" <td>0.60</td>\n",
" <td>0.01</td>\n",
" <td>2.3</td>\n",
" <td>0.079</td>\n",
" <td>24.0</td>\n",
" <td>37.0</td>\n",
" <td>0.99514</td>\n",
" <td>3.40</td>\n",
" <td>0.61</td>\n",
" <td>10.9</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7.8</td>\n",
" <td>0.61</td>\n",
" <td>0.29</td>\n",
" <td>1.6</td>\n",
" <td>0.114</td>\n",
" <td>9.0</td>\n",
" <td>29.0</td>\n",
" <td>0.99740</td>\n",
" <td>3.26</td>\n",
" <td>1.56</td>\n",
" <td>9.1</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7.1</td>\n",
" <td>0.63</td>\n",
" <td>0.06</td>\n",
" <td>2.0</td>\n",
" <td>0.083</td>\n",
" <td>8.0</td>\n",
" <td>29.0</td>\n",
" <td>0.99855</td>\n",
" <td>3.67</td>\n",
" <td>0.73</td>\n",
" <td>9.6</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>9.1</td>\n",
" <td>0.30</td>\n",
" <td>0.41</td>\n",
" <td>2.0</td>\n",
" <td>0.068</td>\n",
" <td>10.0</td>\n",
" <td>24.0</td>\n",
" <td>0.99523</td>\n",
" <td>3.27</td>\n",
" <td>0.85</td>\n",
" <td>11.7</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>9.0</td>\n",
" <td>0.46</td>\n",
" <td>0.31</td>\n",
" <td>2.8</td>\n",
" <td>0.093</td>\n",
" <td>19.0</td>\n",
" <td>98.0</td>\n",
" <td>0.99815</td>\n",
" <td>3.32</td>\n",
" <td>0.63</td>\n",
" <td>9.5</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>261</th>\n",
" <td>7.2</td>\n",
" <td>0.60</td>\n",
" <td>0.04</td>\n",
" <td>2.5</td>\n",
" <td>0.076</td>\n",
" <td>18.0</td>\n",
" <td>88.0</td>\n",
" <td>0.99745</td>\n",
" <td>3.53</td>\n",
" <td>0.55</td>\n",
" <td>9.5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>262</th>\n",
" <td>8.4</td>\n",
" <td>0.67</td>\n",
" <td>0.19</td>\n",
" <td>2.2</td>\n",
" <td>0.093</td>\n",
" <td>11.0</td>\n",
" <td>75.0</td>\n",
" <td>0.99736</td>\n",
" <td>3.20</td>\n",
" <td>0.59</td>\n",
" <td>9.2</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>263</th>\n",
" <td>8.8</td>\n",
" <td>0.61</td>\n",
" <td>0.19</td>\n",
" <td>4.0</td>\n",
" <td>0.094</td>\n",
" <td>30.0</td>\n",
" <td>69.0</td>\n",
" <td>0.99787</td>\n",
" <td>3.22</td>\n",
" <td>0.50</td>\n",
" <td>10.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>264</th>\n",
" <td>9.6</td>\n",
" <td>0.68</td>\n",
" <td>0.24</td>\n",
" <td>2.2</td>\n",
" <td>0.087</td>\n",
" <td>5.0</td>\n",
" <td>28.0</td>\n",
" <td>0.99880</td>\n",
" <td>3.14</td>\n",
" <td>0.60</td>\n",
" <td>10.2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>265</th>\n",
" <td>10.5</td>\n",
" <td>0.43</td>\n",
" <td>0.35</td>\n",
" <td>3.3</td>\n",
" <td>0.092</td>\n",
" <td>24.0</td>\n",
" <td>70.0</td>\n",
" <td>0.99798</td>\n",
" <td>3.21</td>\n",
" <td>0.69</td>\n",
" <td>10.5</td>\n",
" <td>6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>266 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid ... sulphates alcohol quality\n",
"0 7.1 0.60 0.01 ... 0.61 10.9 6\n",
"1 7.8 0.61 0.29 ... 1.56 9.1 5\n",
"2 7.1 0.63 0.06 ... 0.73 9.6 5\n",
"3 9.1 0.30 0.41 ... 0.85 11.7 7\n",
"4 9.0 0.46 0.31 ... 0.63 9.5 6\n",
".. ... ... ... ... ... ... ...\n",
"261 7.2 0.60 0.04 ... 0.55 9.5 5\n",
"262 8.4 0.67 0.19 ... 0.59 9.2 4\n",
"263 8.8 0.61 0.19 ... 0.50 10.0 6\n",
"264 9.6 0.68 0.24 ... 0.60 10.2 5\n",
"265 10.5 0.43 0.35 ... 0.69 10.5 6\n",
"\n",
"[266 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 14
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1IAtBylEzS8w",
"outputId": "1f047c20-f723-490d-ada3-474f5d14db3a"
},
"source": [
"wine_test_bash[\"quality\"].value_counts()"
],
"execution_count": 15,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"6 109\n",
"5 108\n",
"7 37\n",
"4 8\n",
"8 2\n",
"3 2\n",
"Name: quality, dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 15
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 297
},
"id": "V-9cwcrczS-3",
"outputId": "a8a26e7f-a2c4-4a44-c91a-6ce57be85386"
},
"source": [
"wine_test_bash.describe(include='all')"
],
"execution_count": 16,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>8.245865</td>\n",
" <td>0.529455</td>\n",
" <td>0.266203</td>\n",
" <td>2.373308</td>\n",
" <td>0.086823</td>\n",
" <td>15.840226</td>\n",
" <td>47.447368</td>\n",
" <td>0.996499</td>\n",
" <td>3.313195</td>\n",
" <td>0.676241</td>\n",
" <td>10.569925</td>\n",
" <td>5.665414</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.526175</td>\n",
" <td>0.181583</td>\n",
" <td>0.191968</td>\n",
" <td>1.005345</td>\n",
" <td>0.046159</td>\n",
" <td>10.163096</td>\n",
" <td>34.610379</td>\n",
" <td>0.001772</td>\n",
" <td>0.158871</td>\n",
" <td>0.187786</td>\n",
" <td>1.149728</td>\n",
" <td>0.808497</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>4.600000</td>\n",
" <td>0.180000</td>\n",
" <td>0.000000</td>\n",
" <td>1.200000</td>\n",
" <td>0.039000</td>\n",
" <td>1.000000</td>\n",
" <td>7.000000</td>\n",
" <td>0.990840</td>\n",
" <td>2.880000</td>\n",
" <td>0.390000</td>\n",
" <td>9.000000</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>7.200000</td>\n",
" <td>0.392500</td>\n",
" <td>0.100000</td>\n",
" <td>1.900000</td>\n",
" <td>0.068000</td>\n",
" <td>7.000000</td>\n",
" <td>22.250000</td>\n",
" <td>0.995318</td>\n",
" <td>3.200000</td>\n",
" <td>0.560000</td>\n",
" <td>9.500000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>8.000000</td>\n",
" <td>0.520000</td>\n",
" <td>0.260000</td>\n",
" <td>2.100000</td>\n",
" <td>0.078000</td>\n",
" <td>14.000000</td>\n",
" <td>40.000000</td>\n",
" <td>0.996520</td>\n",
" <td>3.310000</td>\n",
" <td>0.640000</td>\n",
" <td>10.250000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>9.100000</td>\n",
" <td>0.630000</td>\n",
" <td>0.400000</td>\n",
" <td>2.500000</td>\n",
" <td>0.092000</td>\n",
" <td>21.000000</td>\n",
" <td>62.750000</td>\n",
" <td>0.997600</td>\n",
" <td>3.400000</td>\n",
" <td>0.750000</td>\n",
" <td>11.400000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>13.300000</td>\n",
" <td>1.330000</td>\n",
" <td>0.740000</td>\n",
" <td>8.800000</td>\n",
" <td>0.467000</td>\n",
" <td>51.000000</td>\n",
" <td>289.000000</td>\n",
" <td>1.002600</td>\n",
" <td>3.900000</td>\n",
" <td>1.980000</td>\n",
" <td>14.000000</td>\n",
" <td>8.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity ... alcohol quality\n",
"count 266.000000 266.000000 ... 266.000000 266.000000\n",
"mean 8.245865 0.529455 ... 10.569925 5.665414\n",
"std 1.526175 0.181583 ... 1.149728 0.808497\n",
"min 4.600000 0.180000 ... 9.000000 3.000000\n",
"25% 7.200000 0.392500 ... 9.500000 5.000000\n",
"50% 8.000000 0.520000 ... 10.250000 6.000000\n",
"75% 9.100000 0.630000 ... 11.400000 6.000000\n",
"max 13.300000 1.330000 ... 14.000000 8.000000\n",
"\n",
"[8 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 16
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "wzaUXARnu824"
},
"source": [
"Testowy Wykres (quality, volatile acidity)"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 405
},
"id": "3GksWzExaHV7",
"outputId": "21b77c09-445c-4e06-fcea-6f26d3717870"
},
"source": [
"fig = plt.figure(figsize = (10,6))\n",
"sns.barplot(x = 'quality', y = 'volatile acidity', data = wine_test_bash)"
],
"execution_count": 17,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f2504747ad0>"
]
},
"metadata": {
"tags": []
},
"execution_count": 17
},
{
"output_type": "display_data",
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 720x432 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "w5xmkUgGzdxs"
},
"source": [
"## 3.3. Cały zbiór"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
},
"id": "thGHHVJXzeGe",
"outputId": "a1bbe5c6-3aef-4a70-82ec-adc2b9d6daf5"
},
"source": [
"wine"
],
"execution_count": 18,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7.8</td>\n",
" <td>0.880</td>\n",
" <td>0.00</td>\n",
" <td>2.6</td>\n",
" <td>0.098</td>\n",
" <td>25.0</td>\n",
" <td>67.0</td>\n",
" <td>0.99680</td>\n",
" <td>3.20</td>\n",
" <td>0.68</td>\n",
" <td>9.8</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7.8</td>\n",
" <td>0.760</td>\n",
" <td>0.04</td>\n",
" <td>2.3</td>\n",
" <td>0.092</td>\n",
" <td>15.0</td>\n",
" <td>54.0</td>\n",
" <td>0.99700</td>\n",
" <td>3.26</td>\n",
" <td>0.65</td>\n",
" <td>9.8</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.2</td>\n",
" <td>0.280</td>\n",
" <td>0.56</td>\n",
" <td>1.9</td>\n",
" <td>0.075</td>\n",
" <td>17.0</td>\n",
" <td>60.0</td>\n",
" <td>0.99800</td>\n",
" <td>3.16</td>\n",
" <td>0.58</td>\n",
" <td>9.8</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1594</th>\n",
" <td>6.2</td>\n",
" <td>0.600</td>\n",
" <td>0.08</td>\n",
" <td>2.0</td>\n",
" <td>0.090</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99490</td>\n",
" <td>3.45</td>\n",
" <td>0.58</td>\n",
" <td>10.5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1595</th>\n",
" <td>5.9</td>\n",
" <td>0.550</td>\n",
" <td>0.10</td>\n",
" <td>2.2</td>\n",
" <td>0.062</td>\n",
" <td>39.0</td>\n",
" <td>51.0</td>\n",
" <td>0.99512</td>\n",
" <td>3.52</td>\n",
" <td>0.76</td>\n",
" <td>11.2</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1596</th>\n",
" <td>6.3</td>\n",
" <td>0.510</td>\n",
" <td>0.13</td>\n",
" <td>2.3</td>\n",
" <td>0.076</td>\n",
" <td>29.0</td>\n",
" <td>40.0</td>\n",
" <td>0.99574</td>\n",
" <td>3.42</td>\n",
" <td>0.75</td>\n",
" <td>11.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1597</th>\n",
" <td>5.9</td>\n",
" <td>0.645</td>\n",
" <td>0.12</td>\n",
" <td>2.0</td>\n",
" <td>0.075</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99547</td>\n",
" <td>3.57</td>\n",
" <td>0.71</td>\n",
" <td>10.2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1598</th>\n",
" <td>6.0</td>\n",
" <td>0.310</td>\n",
" <td>0.47</td>\n",
" <td>3.6</td>\n",
" <td>0.067</td>\n",
" <td>18.0</td>\n",
" <td>42.0</td>\n",
" <td>0.99549</td>\n",
" <td>3.39</td>\n",
" <td>0.66</td>\n",
" <td>11.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1599 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid ... sulphates alcohol quality\n",
"0 7.4 0.700 0.00 ... 0.56 9.4 5\n",
"1 7.8 0.880 0.00 ... 0.68 9.8 5\n",
"2 7.8 0.760 0.04 ... 0.65 9.8 5\n",
"3 11.2 0.280 0.56 ... 0.58 9.8 6\n",
"4 7.4 0.700 0.00 ... 0.56 9.4 5\n",
"... ... ... ... ... ... ... ...\n",
"1594 6.2 0.600 0.08 ... 0.58 10.5 5\n",
"1595 5.9 0.550 0.10 ... 0.76 11.2 6\n",
"1596 6.3 0.510 0.13 ... 0.75 11.0 6\n",
"1597 5.9 0.645 0.12 ... 0.71 10.2 5\n",
"1598 6.0 0.310 0.47 ... 0.66 11.0 6\n",
"\n",
"[1599 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 18
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Ua_ctPpVzeKJ",
"outputId": "da95e47b-9e44-42e0-efc0-66631dba99f1"
},
"source": [
"wine[\"quality\"].value_counts()"
],
"execution_count": 19,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"5 681\n",
"6 638\n",
"7 199\n",
"4 53\n",
"8 18\n",
"3 10\n",
"Name: quality, dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 19
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 297
},
"id": "-06v1i7XzeOz",
"outputId": "b0da7e9b-98aa-4af6-8131-359a54c2ac69"
},
"source": [
"wine.describe(include='all')"
],
"execution_count": 20,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>8.319637</td>\n",
" <td>0.527821</td>\n",
" <td>0.270976</td>\n",
" <td>2.538806</td>\n",
" <td>0.087467</td>\n",
" <td>15.874922</td>\n",
" <td>46.467792</td>\n",
" <td>0.996747</td>\n",
" <td>3.311113</td>\n",
" <td>0.658149</td>\n",
" <td>10.422983</td>\n",
" <td>5.636023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.741096</td>\n",
" <td>0.179060</td>\n",
" <td>0.194801</td>\n",
" <td>1.409928</td>\n",
" <td>0.047065</td>\n",
" <td>10.460157</td>\n",
" <td>32.895324</td>\n",
" <td>0.001887</td>\n",
" <td>0.154386</td>\n",
" <td>0.169507</td>\n",
" <td>1.065668</td>\n",
" <td>0.807569</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>4.600000</td>\n",
" <td>0.120000</td>\n",
" <td>0.000000</td>\n",
" <td>0.900000</td>\n",
" <td>0.012000</td>\n",
" <td>1.000000</td>\n",
" <td>6.000000</td>\n",
" <td>0.990070</td>\n",
" <td>2.740000</td>\n",
" <td>0.330000</td>\n",
" <td>8.400000</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>7.100000</td>\n",
" <td>0.390000</td>\n",
" <td>0.090000</td>\n",
" <td>1.900000</td>\n",
" <td>0.070000</td>\n",
" <td>7.000000</td>\n",
" <td>22.000000</td>\n",
" <td>0.995600</td>\n",
" <td>3.210000</td>\n",
" <td>0.550000</td>\n",
" <td>9.500000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>7.900000</td>\n",
" <td>0.520000</td>\n",
" <td>0.260000</td>\n",
" <td>2.200000</td>\n",
" <td>0.079000</td>\n",
" <td>14.000000</td>\n",
" <td>38.000000</td>\n",
" <td>0.996750</td>\n",
" <td>3.310000</td>\n",
" <td>0.620000</td>\n",
" <td>10.200000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>9.200000</td>\n",
" <td>0.640000</td>\n",
" <td>0.420000</td>\n",
" <td>2.600000</td>\n",
" <td>0.090000</td>\n",
" <td>21.000000</td>\n",
" <td>62.000000</td>\n",
" <td>0.997835</td>\n",
" <td>3.400000</td>\n",
" <td>0.730000</td>\n",
" <td>11.100000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>15.900000</td>\n",
" <td>1.580000</td>\n",
" <td>1.000000</td>\n",
" <td>15.500000</td>\n",
" <td>0.611000</td>\n",
" <td>72.000000</td>\n",
" <td>289.000000</td>\n",
" <td>1.003690</td>\n",
" <td>4.010000</td>\n",
" <td>2.000000</td>\n",
" <td>14.900000</td>\n",
" <td>8.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity ... alcohol quality\n",
"count 1599.000000 1599.000000 ... 1599.000000 1599.000000\n",
"mean 8.319637 0.527821 ... 10.422983 5.636023\n",
"std 1.741096 0.179060 ... 1.065668 0.807569\n",
"min 4.600000 0.120000 ... 8.400000 3.000000\n",
"25% 7.100000 0.390000 ... 9.500000 5.000000\n",
"50% 7.900000 0.520000 ... 10.200000 6.000000\n",
"75% 9.200000 0.640000 ... 11.100000 6.000000\n",
"max 15.900000 1.580000 ... 14.900000 8.000000\n",
"\n",
"[8 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 20
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "t8Y53QPyu_fO"
},
"source": [
"Testowy Wykres (quality, volatile acidity)"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 405
},
"id": "hEe3BYcJaKnF",
"outputId": "cd03275d-d09e-4517-ef76-22b40d9ffa9e"
},
"source": [
"fig = plt.figure(figsize = (10,6))\n",
"sns.barplot(x = 'quality', y = 'volatile acidity', data = wine)"
],
"execution_count": 21,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f2504262bd0>"
]
},
"metadata": {
"tags": []
},
"execution_count": 21
},
{
"output_type": "display_data",
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 720x432 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "M4hd_N8EgH57"
},
"source": [
"## 3.4. zbiór Dev (bash)"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
},
"id": "XT3hrfW3gOxH",
"outputId": "98ef6303-7f2b-4341-e6ad-c19af8750ccc"
},
"source": [
"wine_dev_bash"
],
"execution_count": 22,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>8.0</td>\n",
" <td>0.705</td>\n",
" <td>0.05</td>\n",
" <td>1.9</td>\n",
" <td>0.074</td>\n",
" <td>8.0</td>\n",
" <td>19.0</td>\n",
" <td>0.99620</td>\n",
" <td>3.34</td>\n",
" <td>0.95</td>\n",
" <td>10.5</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7.6</td>\n",
" <td>0.665</td>\n",
" <td>0.10</td>\n",
" <td>1.5</td>\n",
" <td>0.066</td>\n",
" <td>27.0</td>\n",
" <td>55.0</td>\n",
" <td>0.99655</td>\n",
" <td>3.39</td>\n",
" <td>0.51</td>\n",
" <td>9.3</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7.8</td>\n",
" <td>0.550</td>\n",
" <td>0.35</td>\n",
" <td>2.2</td>\n",
" <td>0.074</td>\n",
" <td>21.0</td>\n",
" <td>66.0</td>\n",
" <td>0.99740</td>\n",
" <td>3.25</td>\n",
" <td>0.56</td>\n",
" <td>9.2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13.0</td>\n",
" <td>0.320</td>\n",
" <td>0.65</td>\n",
" <td>2.6</td>\n",
" <td>0.093</td>\n",
" <td>15.0</td>\n",
" <td>47.0</td>\n",
" <td>0.99960</td>\n",
" <td>3.05</td>\n",
" <td>0.61</td>\n",
" <td>10.6</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>8.8</td>\n",
" <td>0.610</td>\n",
" <td>0.30</td>\n",
" <td>2.8</td>\n",
" <td>0.088</td>\n",
" <td>17.0</td>\n",
" <td>46.0</td>\n",
" <td>0.99760</td>\n",
" <td>3.26</td>\n",
" <td>0.51</td>\n",
" <td>9.3</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>261</th>\n",
" <td>13.8</td>\n",
" <td>0.490</td>\n",
" <td>0.67</td>\n",
" <td>3.0</td>\n",
" <td>0.093</td>\n",
" <td>6.0</td>\n",
" <td>15.0</td>\n",
" <td>0.99860</td>\n",
" <td>3.02</td>\n",
" <td>0.93</td>\n",
" <td>12.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>262</th>\n",
" <td>7.1</td>\n",
" <td>0.750</td>\n",
" <td>0.01</td>\n",
" <td>2.2</td>\n",
" <td>0.059</td>\n",
" <td>11.0</td>\n",
" <td>18.0</td>\n",
" <td>0.99242</td>\n",
" <td>3.39</td>\n",
" <td>0.40</td>\n",
" <td>12.8</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>263</th>\n",
" <td>9.9</td>\n",
" <td>0.350</td>\n",
" <td>0.41</td>\n",
" <td>2.3</td>\n",
" <td>0.083</td>\n",
" <td>11.0</td>\n",
" <td>61.0</td>\n",
" <td>0.99820</td>\n",
" <td>3.21</td>\n",
" <td>0.50</td>\n",
" <td>9.5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>264</th>\n",
" <td>6.5</td>\n",
" <td>0.520</td>\n",
" <td>0.11</td>\n",
" <td>1.8</td>\n",
" <td>0.073</td>\n",
" <td>13.0</td>\n",
" <td>38.0</td>\n",
" <td>0.99550</td>\n",
" <td>3.34</td>\n",
" <td>0.52</td>\n",
" <td>9.3</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>265</th>\n",
" <td>6.8</td>\n",
" <td>0.670</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.080</td>\n",
" <td>22.0</td>\n",
" <td>39.0</td>\n",
" <td>0.99701</td>\n",
" <td>3.40</td>\n",
" <td>0.74</td>\n",
" <td>9.7</td>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>266 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid ... sulphates alcohol quality\n",
"0 8.0 0.705 0.05 ... 0.95 10.5 6\n",
"1 7.6 0.665 0.10 ... 0.51 9.3 5\n",
"2 7.8 0.550 0.35 ... 0.56 9.2 5\n",
"3 13.0 0.320 0.65 ... 0.61 10.6 5\n",
"4 8.8 0.610 0.30 ... 0.51 9.3 4\n",
".. ... ... ... ... ... ... ...\n",
"261 13.8 0.490 0.67 ... 0.93 12.0 6\n",
"262 7.1 0.750 0.01 ... 0.40 12.8 6\n",
"263 9.9 0.350 0.41 ... 0.50 9.5 5\n",
"264 6.5 0.520 0.11 ... 0.52 9.3 5\n",
"265 6.8 0.670 0.00 ... 0.74 9.7 5\n",
"\n",
"[266 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 22
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "lhRktuxPgOsC",
"outputId": "612e6163-0b66-4495-fdc1-2a0813efe37e"
},
"source": [
"wine_dev_bash[\"quality\"].value_counts()"
],
"execution_count": 23,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"5 115\n",
"6 113\n",
"7 24\n",
"4 9\n",
"8 3\n",
"3 2\n",
"Name: quality, dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 23
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 297
},
"id": "FmOQIZMSgOnK",
"outputId": "a7f4b4e8-36a0-4a07-cce4-98caa71ff7d0"
},
"source": [
"wine_dev_bash.describe(include='all')"
],
"execution_count": 24,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>8.273684</td>\n",
" <td>0.540075</td>\n",
" <td>0.253008</td>\n",
" <td>2.523308</td>\n",
" <td>0.088620</td>\n",
" <td>15.398496</td>\n",
" <td>43.973684</td>\n",
" <td>0.996749</td>\n",
" <td>3.317895</td>\n",
" <td>0.649774</td>\n",
" <td>10.453321</td>\n",
" <td>5.590226</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.720592</td>\n",
" <td>0.193856</td>\n",
" <td>0.190330</td>\n",
" <td>1.380498</td>\n",
" <td>0.055825</td>\n",
" <td>10.002219</td>\n",
" <td>30.518712</td>\n",
" <td>0.001930</td>\n",
" <td>0.152003</td>\n",
" <td>0.176930</td>\n",
" <td>1.058010</td>\n",
" <td>0.777841</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>4.900000</td>\n",
" <td>0.120000</td>\n",
" <td>0.000000</td>\n",
" <td>1.300000</td>\n",
" <td>0.012000</td>\n",
" <td>1.000000</td>\n",
" <td>8.000000</td>\n",
" <td>0.990640</td>\n",
" <td>2.870000</td>\n",
" <td>0.330000</td>\n",
" <td>8.500000</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>7.100000</td>\n",
" <td>0.396250</td>\n",
" <td>0.080000</td>\n",
" <td>1.900000</td>\n",
" <td>0.068250</td>\n",
" <td>8.000000</td>\n",
" <td>20.000000</td>\n",
" <td>0.995525</td>\n",
" <td>3.210000</td>\n",
" <td>0.542500</td>\n",
" <td>9.500000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>7.900000</td>\n",
" <td>0.520000</td>\n",
" <td>0.240000</td>\n",
" <td>2.200000</td>\n",
" <td>0.079000</td>\n",
" <td>13.000000</td>\n",
" <td>37.000000</td>\n",
" <td>0.996720</td>\n",
" <td>3.320000</td>\n",
" <td>0.620000</td>\n",
" <td>10.200000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>9.200000</td>\n",
" <td>0.648750</td>\n",
" <td>0.390000</td>\n",
" <td>2.600000</td>\n",
" <td>0.090000</td>\n",
" <td>20.000000</td>\n",
" <td>60.000000</td>\n",
" <td>0.997877</td>\n",
" <td>3.430000</td>\n",
" <td>0.720000</td>\n",
" <td>11.200000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>15.600000</td>\n",
" <td>1.580000</td>\n",
" <td>0.760000</td>\n",
" <td>13.800000</td>\n",
" <td>0.611000</td>\n",
" <td>66.000000</td>\n",
" <td>141.000000</td>\n",
" <td>1.003150</td>\n",
" <td>3.720000</td>\n",
" <td>1.950000</td>\n",
" <td>14.000000</td>\n",
" <td>8.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity ... alcohol quality\n",
"count 266.000000 266.000000 ... 266.000000 266.000000\n",
"mean 8.273684 0.540075 ... 10.453321 5.590226\n",
"std 1.720592 0.193856 ... 1.058010 0.777841\n",
"min 4.900000 0.120000 ... 8.500000 3.000000\n",
"25% 7.100000 0.396250 ... 9.500000 5.000000\n",
"50% 7.900000 0.520000 ... 10.200000 6.000000\n",
"75% 9.200000 0.648750 ... 11.200000 6.000000\n",
"max 15.600000 1.580000 ... 14.000000 8.000000\n",
"\n",
"[8 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 24
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 405
},
"id": "j3Z6noeZgOjC",
"outputId": "de24703b-50d4-4059-d5e6-ddc0c0f3356c"
},
"source": [
"fig = plt.figure(figsize = (10,6))\n",
"sns.barplot(x = 'quality', y = 'volatile acidity', data = wine_dev_bash)"
],
"execution_count": 25,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f2504166f50>"
]
},
"metadata": {
"tags": []
},
"execution_count": 25
},
{
"output_type": "display_data",
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 720x432 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ftWOC-do2Pq-"
},
"source": [
"# 4. Normalizacja"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Wm0EM2hj4s6V"
},
"source": [
"Normalizacja kolumny 'quality' na wartości od 0 do 20. Nie jest ona konieczna ale została stworzona w celach demonstracyjnych"
]
},
{
"cell_type": "code",
"metadata": {
"id": "EkZQ6Hpy2Tj_"
},
"source": [
"wine[\"quality\"]=((wine[\"quality\"]-wine[\"quality\"].min())/(wine[\"quality\"].max()-wine[\"quality\"].min()))*20"
],
"execution_count": 26,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
},
"id": "_bQgYfct3Tir",
"outputId": "8b50d411-b47b-4d4d-d3eb-606d7c134de0"
},
"source": [
"wine"
],
"execution_count": 27,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7.8</td>\n",
" <td>0.880</td>\n",
" <td>0.00</td>\n",
" <td>2.6</td>\n",
" <td>0.098</td>\n",
" <td>25.0</td>\n",
" <td>67.0</td>\n",
" <td>0.99680</td>\n",
" <td>3.20</td>\n",
" <td>0.68</td>\n",
" <td>9.8</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7.8</td>\n",
" <td>0.760</td>\n",
" <td>0.04</td>\n",
" <td>2.3</td>\n",
" <td>0.092</td>\n",
" <td>15.0</td>\n",
" <td>54.0</td>\n",
" <td>0.99700</td>\n",
" <td>3.26</td>\n",
" <td>0.65</td>\n",
" <td>9.8</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.2</td>\n",
" <td>0.280</td>\n",
" <td>0.56</td>\n",
" <td>1.9</td>\n",
" <td>0.075</td>\n",
" <td>17.0</td>\n",
" <td>60.0</td>\n",
" <td>0.99800</td>\n",
" <td>3.16</td>\n",
" <td>0.58</td>\n",
" <td>9.8</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1594</th>\n",
" <td>6.2</td>\n",
" <td>0.600</td>\n",
" <td>0.08</td>\n",
" <td>2.0</td>\n",
" <td>0.090</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99490</td>\n",
" <td>3.45</td>\n",
" <td>0.58</td>\n",
" <td>10.5</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1595</th>\n",
" <td>5.9</td>\n",
" <td>0.550</td>\n",
" <td>0.10</td>\n",
" <td>2.2</td>\n",
" <td>0.062</td>\n",
" <td>39.0</td>\n",
" <td>51.0</td>\n",
" <td>0.99512</td>\n",
" <td>3.52</td>\n",
" <td>0.76</td>\n",
" <td>11.2</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1596</th>\n",
" <td>6.3</td>\n",
" <td>0.510</td>\n",
" <td>0.13</td>\n",
" <td>2.3</td>\n",
" <td>0.076</td>\n",
" <td>29.0</td>\n",
" <td>40.0</td>\n",
" <td>0.99574</td>\n",
" <td>3.42</td>\n",
" <td>0.75</td>\n",
" <td>11.0</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1597</th>\n",
" <td>5.9</td>\n",
" <td>0.645</td>\n",
" <td>0.12</td>\n",
" <td>2.0</td>\n",
" <td>0.075</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99547</td>\n",
" <td>3.57</td>\n",
" <td>0.71</td>\n",
" <td>10.2</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1598</th>\n",
" <td>6.0</td>\n",
" <td>0.310</td>\n",
" <td>0.47</td>\n",
" <td>3.6</td>\n",
" <td>0.067</td>\n",
" <td>18.0</td>\n",
" <td>42.0</td>\n",
" <td>0.99549</td>\n",
" <td>3.39</td>\n",
" <td>0.66</td>\n",
" <td>11.0</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1599 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid ... sulphates alcohol quality\n",
"0 7.4 0.700 0.00 ... 0.56 9.4 8.0\n",
"1 7.8 0.880 0.00 ... 0.68 9.8 8.0\n",
"2 7.8 0.760 0.04 ... 0.65 9.8 8.0\n",
"3 11.2 0.280 0.56 ... 0.58 9.8 12.0\n",
"4 7.4 0.700 0.00 ... 0.56 9.4 8.0\n",
"... ... ... ... ... ... ... ...\n",
"1594 6.2 0.600 0.08 ... 0.58 10.5 8.0\n",
"1595 5.9 0.550 0.10 ... 0.76 11.2 12.0\n",
"1596 6.3 0.510 0.13 ... 0.75 11.0 12.0\n",
"1597 5.9 0.645 0.12 ... 0.71 10.2 8.0\n",
"1598 6.0 0.310 0.47 ... 0.66 11.0 12.0\n",
"\n",
"[1599 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 27
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "I1AwZoyN4RHs",
"outputId": "15a7bca4-8bbe-4749-80b8-5eede667aa07"
},
"source": [
"wine[\"quality\"].value_counts()"
],
"execution_count": 28,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"8.0 681\n",
"12.0 638\n",
"16.0 199\n",
"4.0 53\n",
"20.0 18\n",
"0.0 10\n",
"Name: quality, dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 28
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "XBU3z_of414w"
},
"source": [
"# 5. Usuwanie artefaktów"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "KCstRwQp5-X1"
},
"source": [
"### Całe szczęscie nie ma w moim zbiorze ani pustych linijek, ani przykładów z niepoprawnymi wartościami"
]
},
{
"cell_type": "code",
"metadata": {
"id": "EJqksTP545UV"
},
"source": [
"# Znajdźmy pustą linijkę:\n",
"! grep -P \"^$\" -n winequality-red.csv"
],
"execution_count": 29,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "8DuoPn3Fa0kP"
},
"source": [
"Szukanie wartości \"NA\": https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.dropna.html"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "REYF2AWjz_lr",
"outputId": "01c5cd70-a37e-433f-bde3-d0c855c96c2e"
},
"source": [
"wine.isnull().sum()"
],
"execution_count": 30,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"fixed acidity 0\n",
"volatile acidity 0\n",
"citric acid 0\n",
"residual sugar 0\n",
"chlorides 0\n",
"free sulfur dioxide 0\n",
"total sulfur dioxide 0\n",
"density 0\n",
"pH 0\n",
"sulphates 0\n",
"alcohol 0\n",
"quality 0\n",
"dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 30
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "RbkqNj9_akcU"
},
"source": [
"wine.dropna(inplace=True) "
],
"execution_count": 31,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
},
"id": "4WylJo9malyG",
"outputId": "95a9b3f4-a7f5-4f61-fdbe-918dbca2d72c"
},
"source": [
"wine"
],
"execution_count": 32,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7.8</td>\n",
" <td>0.880</td>\n",
" <td>0.00</td>\n",
" <td>2.6</td>\n",
" <td>0.098</td>\n",
" <td>25.0</td>\n",
" <td>67.0</td>\n",
" <td>0.99680</td>\n",
" <td>3.20</td>\n",
" <td>0.68</td>\n",
" <td>9.8</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7.8</td>\n",
" <td>0.760</td>\n",
" <td>0.04</td>\n",
" <td>2.3</td>\n",
" <td>0.092</td>\n",
" <td>15.0</td>\n",
" <td>54.0</td>\n",
" <td>0.99700</td>\n",
" <td>3.26</td>\n",
" <td>0.65</td>\n",
" <td>9.8</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.2</td>\n",
" <td>0.280</td>\n",
" <td>0.56</td>\n",
" <td>1.9</td>\n",
" <td>0.075</td>\n",
" <td>17.0</td>\n",
" <td>60.0</td>\n",
" <td>0.99800</td>\n",
" <td>3.16</td>\n",
" <td>0.58</td>\n",
" <td>9.8</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1594</th>\n",
" <td>6.2</td>\n",
" <td>0.600</td>\n",
" <td>0.08</td>\n",
" <td>2.0</td>\n",
" <td>0.090</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99490</td>\n",
" <td>3.45</td>\n",
" <td>0.58</td>\n",
" <td>10.5</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1595</th>\n",
" <td>5.9</td>\n",
" <td>0.550</td>\n",
" <td>0.10</td>\n",
" <td>2.2</td>\n",
" <td>0.062</td>\n",
" <td>39.0</td>\n",
" <td>51.0</td>\n",
" <td>0.99512</td>\n",
" <td>3.52</td>\n",
" <td>0.76</td>\n",
" <td>11.2</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1596</th>\n",
" <td>6.3</td>\n",
" <td>0.510</td>\n",
" <td>0.13</td>\n",
" <td>2.3</td>\n",
" <td>0.076</td>\n",
" <td>29.0</td>\n",
" <td>40.0</td>\n",
" <td>0.99574</td>\n",
" <td>3.42</td>\n",
" <td>0.75</td>\n",
" <td>11.0</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1597</th>\n",
" <td>5.9</td>\n",
" <td>0.645</td>\n",
" <td>0.12</td>\n",
" <td>2.0</td>\n",
" <td>0.075</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99547</td>\n",
" <td>3.57</td>\n",
" <td>0.71</td>\n",
" <td>10.2</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1598</th>\n",
" <td>6.0</td>\n",
" <td>0.310</td>\n",
" <td>0.47</td>\n",
" <td>3.6</td>\n",
" <td>0.067</td>\n",
" <td>18.0</td>\n",
" <td>42.0</td>\n",
" <td>0.99549</td>\n",
" <td>3.39</td>\n",
" <td>0.66</td>\n",
" <td>11.0</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1599 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid ... sulphates alcohol quality\n",
"0 7.4 0.700 0.00 ... 0.56 9.4 8.0\n",
"1 7.8 0.880 0.00 ... 0.68 9.8 8.0\n",
"2 7.8 0.760 0.04 ... 0.65 9.8 8.0\n",
"3 11.2 0.280 0.56 ... 0.58 9.8 12.0\n",
"4 7.4 0.700 0.00 ... 0.56 9.4 8.0\n",
"... ... ... ... ... ... ... ...\n",
"1594 6.2 0.600 0.08 ... 0.58 10.5 8.0\n",
"1595 5.9 0.550 0.10 ... 0.76 11.2 12.0\n",
"1596 6.3 0.510 0.13 ... 0.75 11.0 12.0\n",
"1597 5.9 0.645 0.12 ... 0.71 10.2 8.0\n",
"1598 6.0 0.310 0.47 ... 0.66 11.0 12.0\n",
"\n",
"[1599 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 32
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "iqsJ9Bfngy-m"
},
"source": [
""
],
"execution_count": null,
"outputs": []
}
]
}