ium_434788/Zadanie_02_434788.ipynb

3306 lines
150 KiB
Plaintext
Raw Normal View History

2021-03-21 22:43:47 +01:00
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "IUM_1_434788.ipynb",
"provenance": [],
"collapsed_sections": [],
"toc_visible": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "shaFKPEixPn4"
},
"source": [
"# 1. Pobranie zbioru danych z Repozytorium"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "-03GDjWtxD7W",
"outputId": "3cefd33d-3ef4-4c16-963e-ffa6e9e781de"
2021-03-21 22:43:47 +01:00
},
"source": [
"!curl -OL https://git.wmi.amu.edu.pl/s434788/ium_434788/raw/branch/master/winequality-red.csv"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
" Dload Upload Total Spent Left Speed\n",
"100 98k 0 98k 0 0 74502 0 --:--:-- 0:00:01 --:--:-- 74502\n"
2021-03-21 22:43:47 +01:00
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
},
"id": "sAUNi0ylxWUm",
"outputId": "fe879388-072d-4845-f3b5-f06a4fca5f1e"
2021-03-21 22:43:47 +01:00
},
"source": [
"import pandas as pd\n",
"wine=pd.read_csv('winequality-red.csv')\n",
"wine"
],
"execution_count": 2,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7.8</td>\n",
" <td>0.880</td>\n",
" <td>0.00</td>\n",
" <td>2.6</td>\n",
" <td>0.098</td>\n",
" <td>25.0</td>\n",
" <td>67.0</td>\n",
" <td>0.99680</td>\n",
" <td>3.20</td>\n",
" <td>0.68</td>\n",
" <td>9.8</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7.8</td>\n",
" <td>0.760</td>\n",
" <td>0.04</td>\n",
" <td>2.3</td>\n",
" <td>0.092</td>\n",
" <td>15.0</td>\n",
" <td>54.0</td>\n",
" <td>0.99700</td>\n",
" <td>3.26</td>\n",
" <td>0.65</td>\n",
" <td>9.8</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.2</td>\n",
" <td>0.280</td>\n",
" <td>0.56</td>\n",
" <td>1.9</td>\n",
" <td>0.075</td>\n",
" <td>17.0</td>\n",
" <td>60.0</td>\n",
" <td>0.99800</td>\n",
" <td>3.16</td>\n",
" <td>0.58</td>\n",
" <td>9.8</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1594</th>\n",
" <td>6.2</td>\n",
" <td>0.600</td>\n",
" <td>0.08</td>\n",
" <td>2.0</td>\n",
" <td>0.090</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99490</td>\n",
" <td>3.45</td>\n",
" <td>0.58</td>\n",
" <td>10.5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1595</th>\n",
" <td>5.9</td>\n",
" <td>0.550</td>\n",
" <td>0.10</td>\n",
" <td>2.2</td>\n",
" <td>0.062</td>\n",
" <td>39.0</td>\n",
" <td>51.0</td>\n",
" <td>0.99512</td>\n",
" <td>3.52</td>\n",
" <td>0.76</td>\n",
" <td>11.2</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1596</th>\n",
" <td>6.3</td>\n",
" <td>0.510</td>\n",
" <td>0.13</td>\n",
" <td>2.3</td>\n",
" <td>0.076</td>\n",
" <td>29.0</td>\n",
" <td>40.0</td>\n",
" <td>0.99574</td>\n",
" <td>3.42</td>\n",
" <td>0.75</td>\n",
" <td>11.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1597</th>\n",
" <td>5.9</td>\n",
" <td>0.645</td>\n",
" <td>0.12</td>\n",
" <td>2.0</td>\n",
" <td>0.075</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99547</td>\n",
" <td>3.57</td>\n",
" <td>0.71</td>\n",
" <td>10.2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1598</th>\n",
" <td>6.0</td>\n",
" <td>0.310</td>\n",
" <td>0.47</td>\n",
" <td>3.6</td>\n",
" <td>0.067</td>\n",
" <td>18.0</td>\n",
" <td>42.0</td>\n",
" <td>0.99549</td>\n",
" <td>3.39</td>\n",
" <td>0.66</td>\n",
" <td>11.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1599 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid ... sulphates alcohol quality\n",
"0 7.4 0.700 0.00 ... 0.56 9.4 5\n",
"1 7.8 0.880 0.00 ... 0.68 9.8 5\n",
"2 7.8 0.760 0.04 ... 0.65 9.8 5\n",
"3 11.2 0.280 0.56 ... 0.58 9.8 6\n",
"4 7.4 0.700 0.00 ... 0.56 9.4 5\n",
"... ... ... ... ... ... ... ...\n",
"1594 6.2 0.600 0.08 ... 0.58 10.5 5\n",
"1595 5.9 0.550 0.10 ... 0.76 11.2 6\n",
"1596 6.3 0.510 0.13 ... 0.75 11.0 6\n",
"1597 5.9 0.645 0.12 ... 0.71 10.2 5\n",
"1598 6.0 0.310 0.47 ... 0.66 11.0 6\n",
"\n",
"[1599 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 2
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4H-i6DJlxduP"
},
"source": [
"# 2. Podział na zbiory test/train przy pomocy SciKit + (poprawka z 26.03.2021 przy pomocy basha)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Rf49qKC-eqEU"
},
"source": [
"## 2.1 SciKit"
2021-03-21 22:43:47 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "nZO_naLatT0o"
},
"source": [
"Próbowałem również podzielić na podzbiory Train:Dev:Test 6:2:2 Przy pomocy basha ale uznałem, że wygodniejsze jest korzystanie z \"train_test_split()\". Docelowo podział będzie dokonywany na 4 zmienne ` X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)`, jednak chciałem zachować konwencje z przykładu, z ćwiczeń."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ebHl5Aw1uuK1"
},
"source": [
"https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html"
]
},
{
"cell_type": "code",
"metadata": {
"id": "X88VMhb0x3gJ"
},
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"wine_train, wine_test = train_test_split(wine, test_size=360,train_size=959, random_state=1)"
],
"execution_count": 3,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "OzjEfgNOyAWs",
"outputId": "7e7bb70f-2b1e-422c-9500-d411884d8d5a"
2021-03-21 22:43:47 +01:00
},
"source": [
"wine_test[\"quality\"].value_counts()"
],
"execution_count": 4,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"5 155\n",
"6 149\n",
"7 37\n",
"4 16\n",
"8 2\n",
"3 1\n",
"Name: quality, dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "SpQZIuSxyAd0",
"outputId": "96505a9a-d2e7-44a1-b2cf-ee40d6d7d3d0"
2021-03-21 22:43:47 +01:00
},
"source": [
"wine_train[\"quality\"].value_counts()"
],
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"5 400\n",
"6 388\n",
"7 125\n",
"4 30\n",
"8 11\n",
"3 5\n",
"Name: quality, dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 5
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "YK0491tAeupD"
},
"source": [
"## 2.2 Bash"
]
},
{
"cell_type": "code",
"metadata": {
"id": "1idNUz-9eyfJ"
},
"source": [
"!head -n 1 winequality-red.csv > header.csv\n",
"!tail -n +2 winequality-red.csv | shuf > data.shuffled\n",
"\n",
"!head -n 266 data.shuffled > wine.data.test\n",
"!head -n 532 data.shuffled | tail -n 266 > wine.data.dev\n",
"!tail -n +333 data.shuffled > wine.data.train\n",
"\n",
"!cat header.csv wine.data.test > test.csv\n",
"!cat header.csv wine.data.dev > dev.csv\n",
"!cat header.csv wine.data.train > train.csv"
],
"execution_count": 6,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "-C4RRDH2fFEp",
"outputId": "93944a72-838c-4e2b-a907-de4b0902fcb1"
},
"source": [
"!wc -l test.csv\n",
"!wc -l dev.csv\n",
"!wc -l train.csv"
],
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"text": [
"267 test.csv\n",
"267 dev.csv\n",
"1268 train.csv\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "wLlI-k_jfb70"
},
"source": [
"wine_test_bash=pd.read_csv('test.csv')\n",
"wine_dev_bash=pd.read_csv('dev.csv')\n",
"wine_train_bash=pd.read_csv('train.csv')"
],
"execution_count": 8,
"outputs": []
},
2021-03-21 22:43:47 +01:00
{
"cell_type": "markdown",
"metadata": {
"id": "wAq8KmNdyNOm"
},
"source": [
"# 3. Statystyki dla zbiorów"
2021-03-21 22:43:47 +01:00
]
},
{
"cell_type": "code",
"metadata": {
"id": "Wcq9YSTfXbs1"
},
"source": [
"from matplotlib import pyplot as plt\n",
"import seaborn as sns"
],
"execution_count": 9,
2021-03-21 22:43:47 +01:00
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "EjDFpgdPy_of"
},
"source": [
"## 3.1. Zbiór Train (bash)"
2021-03-21 22:43:47 +01:00
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
},
"id": "SscUak3AydG0",
"outputId": "5f0bd8df-1753-4211-e3a6-8ce2685146f9"
2021-03-21 22:43:47 +01:00
},
"source": [
"wine_train_bash"
2021-03-21 22:43:47 +01:00
],
"execution_count": 10,
2021-03-21 22:43:47 +01:00
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10.0</td>\n",
" <td>0.380</td>\n",
" <td>0.38</td>\n",
" <td>1.6</td>\n",
" <td>0.169</td>\n",
" <td>27.0</td>\n",
" <td>90.0</td>\n",
" <td>0.99914</td>\n",
" <td>3.15</td>\n",
" <td>0.65</td>\n",
" <td>8.5</td>\n",
2021-03-21 22:43:47 +01:00
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>6.7</td>\n",
" <td>0.460</td>\n",
" <td>0.24</td>\n",
" <td>1.7</td>\n",
" <td>0.077</td>\n",
" <td>18.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99480</td>\n",
" <td>3.39</td>\n",
" <td>0.60</td>\n",
" <td>10.6</td>\n",
2021-03-21 22:43:47 +01:00
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7.2</td>\n",
" <td>0.695</td>\n",
" <td>0.13</td>\n",
" <td>2.0</td>\n",
" <td>0.076</td>\n",
" <td>12.0</td>\n",
" <td>20.0</td>\n",
" <td>0.99546</td>\n",
" <td>3.29</td>\n",
" <td>0.54</td>\n",
" <td>10.1</td>\n",
2021-03-21 22:43:47 +01:00
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>12.5</td>\n",
" <td>0.600</td>\n",
" <td>0.49</td>\n",
" <td>4.3</td>\n",
" <td>0.100</td>\n",
" <td>5.0</td>\n",
" <td>14.0</td>\n",
" <td>1.00100</td>\n",
" <td>3.25</td>\n",
" <td>0.74</td>\n",
" <td>11.9</td>\n",
2021-03-21 22:43:47 +01:00
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>8.3</td>\n",
" <td>0.560</td>\n",
" <td>0.22</td>\n",
" <td>2.4</td>\n",
" <td>0.082</td>\n",
" <td>10.0</td>\n",
" <td>86.0</td>\n",
" <td>0.99830</td>\n",
" <td>3.37</td>\n",
" <td>0.62</td>\n",
" <td>9.5</td>\n",
2021-03-21 22:43:47 +01:00
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1262</th>\n",
" <td>7.8</td>\n",
" <td>0.560</td>\n",
" <td>0.12</td>\n",
" <td>2.0</td>\n",
" <td>0.082</td>\n",
" <td>7.0</td>\n",
" <td>28.0</td>\n",
" <td>0.99700</td>\n",
" <td>3.37</td>\n",
" <td>0.50</td>\n",
" <td>9.4</td>\n",
" <td>6</td>\n",
2021-03-21 22:43:47 +01:00
" </tr>\n",
" <tr>\n",
" <th>1263</th>\n",
" <td>5.8</td>\n",
" <td>0.680</td>\n",
" <td>0.02</td>\n",
" <td>1.8</td>\n",
" <td>0.087</td>\n",
2021-03-21 22:43:47 +01:00
" <td>21.0</td>\n",
" <td>94.0</td>\n",
" <td>0.99440</td>\n",
" <td>3.54</td>\n",
2021-03-21 22:43:47 +01:00
" <td>0.52</td>\n",
" <td>10.0</td>\n",
2021-03-21 22:43:47 +01:00
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1264</th>\n",
" <td>7.7</td>\n",
" <td>0.630</td>\n",
" <td>0.08</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
2021-03-21 22:43:47 +01:00
" <td>15.0</td>\n",
" <td>27.0</td>\n",
" <td>0.99670</td>\n",
2021-03-21 22:43:47 +01:00
" <td>3.32</td>\n",
" <td>0.54</td>\n",
" <td>9.5</td>\n",
2021-03-21 22:43:47 +01:00
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1265</th>\n",
" <td>7.1</td>\n",
" <td>0.600</td>\n",
" <td>0.00</td>\n",
" <td>1.8</td>\n",
" <td>0.074</td>\n",
" <td>16.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99720</td>\n",
" <td>3.47</td>\n",
" <td>0.70</td>\n",
" <td>9.9</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1266</th>\n",
" <td>10.4</td>\n",
" <td>0.610</td>\n",
" <td>0.49</td>\n",
2021-03-21 22:43:47 +01:00
" <td>2.1</td>\n",
" <td>0.200</td>\n",
" <td>5.0</td>\n",
" <td>16.0</td>\n",
" <td>0.99940</td>\n",
" <td>3.16</td>\n",
" <td>0.63</td>\n",
" <td>8.4</td>\n",
" <td>3</td>\n",
2021-03-21 22:43:47 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1267 rows × 12 columns</p>\n",
2021-03-21 22:43:47 +01:00
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid ... sulphates alcohol quality\n",
"0 10.0 0.380 0.38 ... 0.65 8.5 5\n",
"1 6.7 0.460 0.24 ... 0.60 10.6 6\n",
"2 7.2 0.695 0.13 ... 0.54 10.1 5\n",
"3 12.5 0.600 0.49 ... 0.74 11.9 6\n",
"4 8.3 0.560 0.22 ... 0.62 9.5 5\n",
2021-03-21 22:43:47 +01:00
"... ... ... ... ... ... ... ...\n",
"1262 7.8 0.560 0.12 ... 0.50 9.4 6\n",
"1263 5.8 0.680 0.02 ... 0.52 10.0 5\n",
"1264 7.7 0.630 0.08 ... 0.54 9.5 6\n",
"1265 7.1 0.600 0.00 ... 0.70 9.9 6\n",
"1266 10.4 0.610 0.49 ... 0.63 8.4 3\n",
2021-03-21 22:43:47 +01:00
"\n",
"[1267 rows x 12 columns]"
2021-03-21 22:43:47 +01:00
]
},
"metadata": {
"tags": []
},
"execution_count": 10
2021-03-21 22:43:47 +01:00
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "hZAn8j4byMF2",
"outputId": "c47596aa-0d54-490f-c892-6ee5987a372d"
2021-03-21 22:43:47 +01:00
},
"source": [
"wine_train_bash[\"quality\"].value_counts()"
2021-03-21 22:43:47 +01:00
],
"execution_count": 11,
2021-03-21 22:43:47 +01:00
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"5 550\n",
"6 498\n",
"7 157\n",
"4 39\n",
"8 15\n",
"3 8\n",
2021-03-21 22:43:47 +01:00
"Name: quality, dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 11
2021-03-21 22:43:47 +01:00
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 297
},
"id": "EOEuj8sRyL8v",
"outputId": "d2f102f6-d10c-4dc4-ae3f-fd34dc4e5985"
2021-03-21 22:43:47 +01:00
},
"source": [
"wine_train_bash.describe(include='all')"
2021-03-21 22:43:47 +01:00
],
"execution_count": 12,
2021-03-21 22:43:47 +01:00
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1267.000000</td>\n",
" <td>1267.000000</td>\n",
" <td>1267.000000</td>\n",
" <td>1267.000000</td>\n",
" <td>1267.000000</td>\n",
" <td>1267.000000</td>\n",
" <td>1267.000000</td>\n",
" <td>1267.000000</td>\n",
" <td>1267.000000</td>\n",
" <td>1267.000000</td>\n",
" <td>1267.000000</td>\n",
" <td>1267.000000</td>\n",
2021-03-21 22:43:47 +01:00
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>8.344199</td>\n",
" <td>0.525888</td>\n",
" <td>0.273891</td>\n",
" <td>2.574033</td>\n",
" <td>0.087419</td>\n",
" <td>15.889897</td>\n",
" <td>46.146014</td>\n",
" <td>0.996799</td>\n",
" <td>3.310016</td>\n",
" <td>0.655730</td>\n",
" <td>10.396725</td>\n",
" <td>5.632991</td>\n",
2021-03-21 22:43:47 +01:00
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.789253</td>\n",
" <td>0.177804</td>\n",
" <td>0.196141</td>\n",
" <td>1.453463</td>\n",
" <td>0.046754</td>\n",
" <td>10.603674</td>\n",
" <td>32.734818</td>\n",
" <td>0.001893</td>\n",
" <td>0.154047</td>\n",
" <td>0.166206</td>\n",
" <td>1.042353</td>\n",
" <td>0.806931</td>\n",
2021-03-21 22:43:47 +01:00
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>4.700000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>0.120000</td>\n",
" <td>0.000000</td>\n",
" <td>0.900000</td>\n",
" <td>0.012000</td>\n",
" <td>1.000000</td>\n",
" <td>6.000000</td>\n",
" <td>0.990070</td>\n",
" <td>2.740000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>0.370000</td>\n",
" <td>8.400000</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>7.100000</td>\n",
" <td>0.390000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>0.090000</td>\n",
" <td>1.900000</td>\n",
" <td>0.071000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>7.000000</td>\n",
" <td>22.000000</td>\n",
" <td>0.995660</td>\n",
" <td>3.210000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>0.550000</td>\n",
" <td>9.500000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>7.900000</td>\n",
" <td>0.520000</td>\n",
" <td>0.260000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>2.200000</td>\n",
" <td>0.080000</td>\n",
" <td>13.000000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>37.000000</td>\n",
" <td>0.996800</td>\n",
" <td>3.310000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>0.620000</td>\n",
" <td>10.200000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>9.300000</td>\n",
" <td>0.640000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>0.430000</td>\n",
" <td>2.600000</td>\n",
" <td>0.090000</td>\n",
" <td>22.000000</td>\n",
" <td>62.000000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>0.997870</td>\n",
" <td>3.400000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>0.730000</td>\n",
" <td>11.000000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>15.900000</td>\n",
" <td>1.580000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>1.000000</td>\n",
" <td>15.500000</td>\n",
" <td>0.611000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>72.000000</td>\n",
" <td>278.000000</td>\n",
" <td>1.003690</td>\n",
" <td>4.010000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>2.000000</td>\n",
" <td>14.900000</td>\n",
" <td>8.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity ... alcohol quality\n",
"count 1267.000000 1267.000000 ... 1267.000000 1267.000000\n",
"mean 8.344199 0.525888 ... 10.396725 5.632991\n",
"std 1.789253 0.177804 ... 1.042353 0.806931\n",
"min 4.700000 0.120000 ... 8.400000 3.000000\n",
"25% 7.100000 0.390000 ... 9.500000 5.000000\n",
"50% 7.900000 0.520000 ... 10.200000 6.000000\n",
"75% 9.300000 0.640000 ... 11.000000 6.000000\n",
"max 15.900000 1.580000 ... 14.900000 8.000000\n",
2021-03-21 22:43:47 +01:00
"\n",
"[8 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 12
2021-03-21 22:43:47 +01:00
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "JWXJ2CZQuylE"
},
"source": [
"Testowy Wykres (quality, volatile acidity)"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 408
2021-03-21 22:43:47 +01:00
},
"id": "HbsfwCL7XpNe",
"outputId": "249d8110-1b17-41ad-e1b1-18b0aa12ff06"
2021-03-21 22:43:47 +01:00
},
"source": [
"fig = plt.figure(figsize = (10,6))\n",
"sns.barplot(x = 'quality', y = 'volatile acidity', data = wine_train_bash)"
2021-03-21 22:43:47 +01:00
],
"execution_count": 13,
2021-03-21 22:43:47 +01:00
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f2504f98950>"
2021-03-21 22:43:47 +01:00
]
},
"metadata": {
"tags": []
},
"execution_count": 13
2021-03-21 22:43:47 +01:00
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmEAAAF2CAYAAAAmzk/1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAZiklEQVR4nO3dfbRlZ10f8O8vk4SENBDKjEbzYlIbsaki4JiiQUgFNEFMWkQlCigLiLZEEZRZuGijoO2qI9YXDNqIvKiYACnoYIOxFQEFwUwgvISAjuElM3LJhBjeMST59Y97Rm+GmTt3wuz73Dn381nrrnP23s8955uzspLv3c9z9q7uDgAAq+uI0QEAANYjJQwAYAAlDABgACUMAGAAJQwAYAAlDABggMlKWFW9tKpurqr37ef4D1XVe6rqvVX1tqr6pqmyAACsNVOeCXt5knOXOf6hJI/o7m9M8vNJLpswCwDAmnLkVC/c3W+pqtOWOf62JZtvT3LyVFkAANaayUrYQXpqkjesZODGjRv7tNNOmzYNAMAhcO21197S3Zv2dWx4Cauqf5/FEvawZcZclOSiJDn11FOzffv2VUoHAHDPVdVH9nds6Lcjq+qBSV6S5ILu/sT+xnX3Zd29ubs3b9q0zzIJAHBYGVbCqurUJK9N8qTu/ptROQAARphsOrKqLk9yTpKNVbUzyc8mOSpJuvu3klyS5P5JXlxVSXJHd2+eKg8AwFoy5bcjLzzA8acledpU7w8AsJa5Yj4AwABKGADAAEoYAMAAShgAwABKGADAAEoYAMAAShgAwABKGADAAMNv4L1ebNmyJQsLCznxxBOzdevW0XEAgMGUsFWysLCQXbt2jY4BAKwRpiMBAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABpishFXVS6vq5qp6336OV1X9elXtqKr3VNVDpsoCALDWTHkm7OVJzl3m+HlJzpj9XJTkNyfMAgCwpkxWwrr7LUluXWbIBUl+txe9PckJVfVVU+UBAFhLRq4JOynJTUu2d872AQDMvcNiYX5VXVRV26tq++7du0fHAQD4so0sYbuSnLJk++TZvi/R3Zd19+bu3rxp06ZVCQcAMKWRJWxbkifPviX50CSf7O6PDcwDALBqjpzqhavq8iTnJNlYVTuT/GySo5Kku38ryVVJHpNkR5LPJXnKVFkAANaayUpYd194gOOd5BlTvT8AwFp2WCzMBwCYN0oYAMAAShgAwABKGADAAEoYAMAAShgAwABKGADAAEoYAMAAShgAwABKGADAAEoYAMAAShgAwABKGADAAEoYAMAAR44OMJVvfs7vjo5wN8ff8ulsSPLRWz69prJd+0tPHh0BANYlZ8IAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAaYtIRV1blV9cGq2lFVz93H8VOr6s+r6l1V9Z6qesyUeQAA1orJSlhVbUhyaZLzkpyZ5MKqOnOvYf8lyau7+8FJnpDkxVPlAQBYS6Y8E3ZWkh3dfWN3357kiiQX7DWmk9xn9vy+Sf5+wjwAAGvGlCXspCQ3LdneOdu31M8leWJV7UxyVZIf39cLVdVFVbW9qrbv3r17iqwAAKtq9ML8C5O8vLtPTvKYJL9XVV+Sqbsv6+7N3b1506ZNqx4SAOBQm7KE7UpyypLtk2f7lnpqklcnSXf/VZJjkmycMBMAwJowZQm7JskZVXV6VR2dxYX32/Ya89Ekj0ySqvo3WSxh5hsBgLk3WQnr7juSXJzk6iQ3ZPFbkNdX1Quq6vzZsJ9K8vSqeneSy5P8SHf3VJkAANaKI6d88e6+KosL7pfuu2TJ8/cnOXvKDAAAa9HohfkAAOuSEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADDAAUtYVV1bVc+oqvutRiAAgPVgJWfCfiDJVye5pqquqKrvqqqaONfcuevo43Lnve6Tu44+bnQUAGANOPJAA7p7R5LnVdV/TfLYJC9NcmdVvSzJr3X3rRNnnAufPeM7R0dYd7Zs2ZKFhYWceOKJ2bp16+g4AHA3ByxhSVJVD0zylCSPSfK/k7wyycOSvDHJgyZLB1+GhYWF7Nq1a3QMANinA5awqro2yW1JfifJc7v7H2eH3lFVZ08ZDgBgXq3kTNj3dfeNS3dU1end/aHuftxEuQAA5tpKFuZfucJ9AACs0H7PhFXV1yf5t0nuW1VLz3jdJ8kxUwcDAJhny01HPiCL34Y8Icn3LNn/6SRPnzIUAMC8228J6+4/SvJHVfWt3f1Xq5gJAGDuLTcduaW7tyb5waq6cO/j3f0TkyYDAJhjy01H3jB73L4aQQAA1pPlpiNfP3t8xerFAQBYH5abjnx9kt7f8e4+f5JEAADrwHLTkS+cPT4uyYlJfn+2fWGSj08ZCgBg3i03HfnmJKmqX+7uzUsOvb6qrBMDAPgyrOSK+cdV1b/as1FVpyc5brpIAADzbyX3jnxWkjdV1Y1JKsnXJPnRSVMBAMy5A5aw7v6TqjojydfPdn2gu/9x2lgAAPNtuW9Hfkd3v3Gv+0YmyddWVbr7tRNn4zDz0Rd84+gId3PHrf8yyZG549aPrKlsp17y3tERAFgDljsT9ogkb8zd7xu5RydRwgAA7qHlvh35s7PHp6xeHACA9eGA346sqv9eVScs2b5fVf3CtLEAAObbSi5RcV5337Zno7v/IcljposEADD/VlLCNlTVvfZsVNWxSe61zHgAAA5gJdcJe2WSP6uql822n5LETb0BAL4MK7lO2C9W1XuSPHK26+e7++ppYwEAzLeVnAlLd78hyRsmzgIAsG6s5NuRD62qa6rqM1V1e1XdWVWfWo1wAADzaiUL838jyYVJ/jbJsUmeluTSKUMBAMy7lZSwdPeOJBu6+87uflmSc6eNBQAw31ayJuxzVXV0kuuqamuSj2WF5Q0AgH1bSZl60mzcxUk+m+SUJN+7khevqnOr6oNVtaOqnrufMd9fVe+vquur6g9WGhwA4HC2kktUfGT29AtJnr/SF66qDVlcO/boJDuTXFNV27r7/UvGnJHkZ5Kc3d3/UFVfcTDhAQAOV1NOK56VZEd339jdtye5IskFe415epJLZ7dCSnffPGEeAIA1Y8oSdlKSm5Zs75ztW+rrknxdVb21qt5eVftc8F9VF1XV9qravnv37oniAgCsnhWXsKq69wTvf2SSM5Kck8XLYPx2VZ2w96Duvqy7N3f35k2bNk0QAwBgda3kYq3fVlXvT/KB2fY3VdWLV/Dau7K4iH+Pk2f7ltqZZFt3f7G7P5Tkb7JYygAA5tpKzoT9SpLvSvKJJOnudyd5+Ap+75okZ1TV6bNLXDwhyba9xvxhFs+Cpao2ZnF68sYVJYcD2HjMXfnKY+/IxmPuGh1l3diyZUue/OQnZ8uWLaOjAKx5K7135E1VtXTXnSv4nTuq6uIkVyfZkOSl3X19Vb0gyfbu3jY79p2zM213JnlOd3/iYP8hYF9++oG3jY6w7iwsLGTXrr1PeAOwLyspYTdV1bcl6ao6Kskzk9ywkhfv7quSXLXXvkuWPO8kz579AACsGyuZjvyxJM/I4jcbdyV50GwbAIB7aCUXa70lyQ+tQhbgIJ39orNHR7ibo287OkfkiNx0201rKttbf/ytoyMAf
2021-03-21 22:43:47 +01:00
"text/plain": [
"<Figure size 720x432 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "1W_oRCVczIgJ"
},
"source": [
"## 3.2. Zbiór Test (bash)"
2021-03-21 22:43:47 +01:00
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
},
"id": "LJzygNqKzOWY",
"outputId": "d4f8dd3b-793c-4e02-a6ea-fbdb8fbf7a19"
2021-03-21 22:43:47 +01:00
},
"source": [
"wine_test_bash"
2021-03-21 22:43:47 +01:00
],
"execution_count": 14,
2021-03-21 22:43:47 +01:00
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7.1</td>\n",
" <td>0.60</td>\n",
" <td>0.01</td>\n",
" <td>2.3</td>\n",
" <td>0.079</td>\n",
" <td>24.0</td>\n",
" <td>37.0</td>\n",
" <td>0.99514</td>\n",
" <td>3.40</td>\n",
" <td>0.61</td>\n",
" <td>10.9</td>\n",
2021-03-21 22:43:47 +01:00
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7.8</td>\n",
" <td>0.61</td>\n",
" <td>0.29</td>\n",
" <td>1.6</td>\n",
" <td>0.114</td>\n",
" <td>9.0</td>\n",
" <td>29.0</td>\n",
" <td>0.99740</td>\n",
" <td>3.26</td>\n",
" <td>1.56</td>\n",
" <td>9.1</td>\n",
" <td>5</td>\n",
2021-03-21 22:43:47 +01:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2021-03-21 22:43:47 +01:00
" <td>7.1</td>\n",
" <td>0.63</td>\n",
" <td>0.06</td>\n",
" <td>2.0</td>\n",
" <td>0.083</td>\n",
" <td>8.0</td>\n",
" <td>29.0</td>\n",
" <td>0.99855</td>\n",
" <td>3.67</td>\n",
" <td>0.73</td>\n",
" <td>9.6</td>\n",
" <td>5</td>\n",
2021-03-21 22:43:47 +01:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>9.1</td>\n",
" <td>0.30</td>\n",
" <td>0.41</td>\n",
" <td>2.0</td>\n",
" <td>0.068</td>\n",
" <td>10.0</td>\n",
2021-03-21 22:43:47 +01:00
" <td>24.0</td>\n",
" <td>0.99523</td>\n",
" <td>3.27</td>\n",
" <td>0.85</td>\n",
" <td>11.7</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>9.0</td>\n",
" <td>0.46</td>\n",
" <td>0.31</td>\n",
" <td>2.8</td>\n",
" <td>0.093</td>\n",
" <td>19.0</td>\n",
" <td>98.0</td>\n",
" <td>0.99815</td>\n",
" <td>3.32</td>\n",
" <td>0.63</td>\n",
" <td>9.5</td>\n",
2021-03-21 22:43:47 +01:00
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>261</th>\n",
" <td>7.2</td>\n",
" <td>0.60</td>\n",
" <td>0.04</td>\n",
" <td>2.5</td>\n",
" <td>0.076</td>\n",
" <td>18.0</td>\n",
" <td>88.0</td>\n",
" <td>0.99745</td>\n",
" <td>3.53</td>\n",
" <td>0.55</td>\n",
" <td>9.5</td>\n",
" <td>5</td>\n",
2021-03-21 22:43:47 +01:00
" </tr>\n",
" <tr>\n",
" <th>262</th>\n",
" <td>8.4</td>\n",
" <td>0.67</td>\n",
" <td>0.19</td>\n",
2021-03-21 22:43:47 +01:00
" <td>2.2</td>\n",
" <td>0.093</td>\n",
" <td>11.0</td>\n",
" <td>75.0</td>\n",
" <td>0.99736</td>\n",
" <td>3.20</td>\n",
" <td>0.59</td>\n",
" <td>9.2</td>\n",
" <td>4</td>\n",
2021-03-21 22:43:47 +01:00
" </tr>\n",
" <tr>\n",
" <th>263</th>\n",
" <td>8.8</td>\n",
" <td>0.61</td>\n",
" <td>0.19</td>\n",
" <td>4.0</td>\n",
" <td>0.094</td>\n",
" <td>30.0</td>\n",
" <td>69.0</td>\n",
" <td>0.99787</td>\n",
2021-03-21 22:43:47 +01:00
" <td>3.22</td>\n",
" <td>0.50</td>\n",
" <td>10.0</td>\n",
2021-03-21 22:43:47 +01:00
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>264</th>\n",
" <td>9.6</td>\n",
" <td>0.68</td>\n",
" <td>0.24</td>\n",
" <td>2.2</td>\n",
" <td>0.087</td>\n",
" <td>5.0</td>\n",
" <td>28.0</td>\n",
" <td>0.99880</td>\n",
" <td>3.14</td>\n",
" <td>0.60</td>\n",
" <td>10.2</td>\n",
2021-03-21 22:43:47 +01:00
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>265</th>\n",
" <td>10.5</td>\n",
" <td>0.43</td>\n",
" <td>0.35</td>\n",
" <td>3.3</td>\n",
" <td>0.092</td>\n",
" <td>24.0</td>\n",
" <td>70.0</td>\n",
" <td>0.99798</td>\n",
" <td>3.21</td>\n",
" <td>0.69</td>\n",
" <td>10.5</td>\n",
" <td>6</td>\n",
2021-03-21 22:43:47 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>266 rows × 12 columns</p>\n",
2021-03-21 22:43:47 +01:00
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid ... sulphates alcohol quality\n",
"0 7.1 0.60 0.01 ... 0.61 10.9 6\n",
"1 7.8 0.61 0.29 ... 1.56 9.1 5\n",
"2 7.1 0.63 0.06 ... 0.73 9.6 5\n",
"3 9.1 0.30 0.41 ... 0.85 11.7 7\n",
"4 9.0 0.46 0.31 ... 0.63 9.5 6\n",
".. ... ... ... ... ... ... ...\n",
"261 7.2 0.60 0.04 ... 0.55 9.5 5\n",
"262 8.4 0.67 0.19 ... 0.59 9.2 4\n",
"263 8.8 0.61 0.19 ... 0.50 10.0 6\n",
"264 9.6 0.68 0.24 ... 0.60 10.2 5\n",
"265 10.5 0.43 0.35 ... 0.69 10.5 6\n",
2021-03-21 22:43:47 +01:00
"\n",
"[266 rows x 12 columns]"
2021-03-21 22:43:47 +01:00
]
},
"metadata": {
"tags": []
},
"execution_count": 14
2021-03-21 22:43:47 +01:00
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1IAtBylEzS8w",
"outputId": "1f047c20-f723-490d-ada3-474f5d14db3a"
2021-03-21 22:43:47 +01:00
},
"source": [
"wine_test_bash[\"quality\"].value_counts()"
2021-03-21 22:43:47 +01:00
],
"execution_count": 15,
2021-03-21 22:43:47 +01:00
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"6 109\n",
"5 108\n",
2021-03-21 22:43:47 +01:00
"7 37\n",
"4 8\n",
2021-03-21 22:43:47 +01:00
"8 2\n",
"3 2\n",
2021-03-21 22:43:47 +01:00
"Name: quality, dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 15
2021-03-21 22:43:47 +01:00
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 297
},
"id": "V-9cwcrczS-3",
"outputId": "a8a26e7f-a2c4-4a44-c91a-6ce57be85386"
2021-03-21 22:43:47 +01:00
},
"source": [
"wine_test_bash.describe(include='all')"
2021-03-21 22:43:47 +01:00
],
"execution_count": 16,
2021-03-21 22:43:47 +01:00
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
2021-03-21 22:43:47 +01:00
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>8.245865</td>\n",
" <td>0.529455</td>\n",
" <td>0.266203</td>\n",
" <td>2.373308</td>\n",
" <td>0.086823</td>\n",
" <td>15.840226</td>\n",
" <td>47.447368</td>\n",
" <td>0.996499</td>\n",
" <td>3.313195</td>\n",
" <td>0.676241</td>\n",
" <td>10.569925</td>\n",
" <td>5.665414</td>\n",
2021-03-21 22:43:47 +01:00
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.526175</td>\n",
" <td>0.181583</td>\n",
" <td>0.191968</td>\n",
" <td>1.005345</td>\n",
" <td>0.046159</td>\n",
" <td>10.163096</td>\n",
" <td>34.610379</td>\n",
" <td>0.001772</td>\n",
" <td>0.158871</td>\n",
" <td>0.187786</td>\n",
" <td>1.149728</td>\n",
" <td>0.808497</td>\n",
2021-03-21 22:43:47 +01:00
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>4.600000</td>\n",
" <td>0.180000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>0.000000</td>\n",
" <td>1.200000</td>\n",
" <td>0.039000</td>\n",
" <td>1.000000</td>\n",
" <td>7.000000</td>\n",
" <td>0.990840</td>\n",
" <td>2.880000</td>\n",
" <td>0.390000</td>\n",
" <td>9.000000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>7.200000</td>\n",
" <td>0.392500</td>\n",
" <td>0.100000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>1.900000</td>\n",
" <td>0.068000</td>\n",
" <td>7.000000</td>\n",
" <td>22.250000</td>\n",
" <td>0.995318</td>\n",
" <td>3.200000</td>\n",
" <td>0.560000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>9.500000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>8.000000</td>\n",
" <td>0.520000</td>\n",
" <td>0.260000</td>\n",
" <td>2.100000</td>\n",
" <td>0.078000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>14.000000</td>\n",
" <td>40.000000</td>\n",
" <td>0.996520</td>\n",
" <td>3.310000</td>\n",
" <td>0.640000</td>\n",
" <td>10.250000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>9.100000</td>\n",
" <td>0.630000</td>\n",
" <td>0.400000</td>\n",
" <td>2.500000</td>\n",
" <td>0.092000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>21.000000</td>\n",
" <td>62.750000</td>\n",
" <td>0.997600</td>\n",
" <td>3.400000</td>\n",
" <td>0.750000</td>\n",
" <td>11.400000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>13.300000</td>\n",
" <td>1.330000</td>\n",
" <td>0.740000</td>\n",
" <td>8.800000</td>\n",
" <td>0.467000</td>\n",
" <td>51.000000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>289.000000</td>\n",
" <td>1.002600</td>\n",
" <td>3.900000</td>\n",
" <td>1.980000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>14.000000</td>\n",
" <td>8.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity ... alcohol quality\n",
"count 266.000000 266.000000 ... 266.000000 266.000000\n",
"mean 8.245865 0.529455 ... 10.569925 5.665414\n",
"std 1.526175 0.181583 ... 1.149728 0.808497\n",
"min 4.600000 0.180000 ... 9.000000 3.000000\n",
"25% 7.200000 0.392500 ... 9.500000 5.000000\n",
"50% 8.000000 0.520000 ... 10.250000 6.000000\n",
"75% 9.100000 0.630000 ... 11.400000 6.000000\n",
"max 13.300000 1.330000 ... 14.000000 8.000000\n",
2021-03-21 22:43:47 +01:00
"\n",
"[8 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 16
2021-03-21 22:43:47 +01:00
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "wzaUXARnu824"
},
"source": [
"Testowy Wykres (quality, volatile acidity)"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 405
},
"id": "3GksWzExaHV7",
"outputId": "21b77c09-445c-4e06-fcea-6f26d3717870"
2021-03-21 22:43:47 +01:00
},
"source": [
"fig = plt.figure(figsize = (10,6))\n",
"sns.barplot(x = 'quality', y = 'volatile acidity', data = wine_test_bash)"
2021-03-21 22:43:47 +01:00
],
"execution_count": 17,
2021-03-21 22:43:47 +01:00
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f2504747ad0>"
2021-03-21 22:43:47 +01:00
]
},
"metadata": {
"tags": []
},
"execution_count": 17
2021-03-21 22:43:47 +01:00
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmEAAAFzCAYAAAB2A95GAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAY1ElEQVR4nO3dfZBlZ10n8O8vM8RADC+a6GgSTNaNuFERcIxoFCgRTRCTXQRlXEApNboSRVCnsNhCwd2tcoDVVaNulgXxBSJmQQc3EFcRUJSYCYSXENExEDIjbSa8vxqS/PaPvqOdyUx3zzCnn+7bn09V173nnKfP/eZWKvn2c96quwMAwNo6YXQAAIDNSAkDABhACQMAGEAJAwAYQAkDABhACQMAGGDr6ABH69RTT+2zzjprdAwAgBVdd911t3X3aYfbtuFK2FlnnZU9e/aMjgEAsKKquvlI2xyOBAAYQAkDABhACQMAGGCyElZVL6mqW6vqXUfYXlX1K1W1t6reUVUPmyoLAMB6M+VM2G8luWCZ7RcmOWf2c0mS35gwCwDAujJZCevuNyX50DJDLk7y273oLUnuX1VfMlUeAID1ZOQ5YacnuWXJ8r7ZOgCAubchTsyvqkuqak9V7Tlw4MDoOAAAn7ORJWx/kjOXLJ8xW3cP3X15d2/v7u2nnXbYm84CAGwoI0vY7iRPnV0l+fAkH+3uDwzMAwCwZiZ7bFFVvSLJo5KcWlX7kvxcknslSXf/ZpKrkjw2yd4kn0rytKmyAACsN5OVsO7escL2TvL0qT4fAGA923AP8N6odu7cmYWFhWzbti27du0aHQcAGEwJWyMLCwvZv/+w1x0AAJvQhrhFBQDAvFHCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABtg6OgBMZefOnVlYWMi2bduya9eu0XEA4G6UMObWwsJC9u/fPzoGAByWw5EAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAA0xawqrqgqp6T1XtrapnH2b7A6vqz6vqbVX1jqp67JR5AADWi8lKWFVtSXJZkguTnJtkR1Wde8iw/5zkld390CRPSvLrU+UBAFhPppwJOy/J3u6+qbtvT3JFkosPGdNJ7jt7f78k/zhhHgCAdWPKEnZ6kluWLO+brVvq55M8uar2JbkqyY8fbkdVdUlV7amqPQcOHJgiKwDAmhp9Yv6OJL/V3WckeWyS36mqe2Tq7su7e3t3bz/ttNPWPCQAwPE2ZQnbn+TMJctnzNYt9YNJXpkk3f3XSU5KcuqEmQAA1oUpS9i1Sc6pqrOr6sQsnni/+5Ax70/y6CSpqn+XxRLmeCMAMPcmK2HdfUeSS5NcneTGLF4FeUNVPb+qLpoN+6kkP1xVb0/yiiQ/0N09VSYAgPVi65Q77+6rsnjC/dJ1z13y/t1Jzp8yAwDAejT6xHwAgE1JCQMAGEAJAwAYQAkDABhACQMAGEAJAwAYQAkDABhACQMAGEAJAwAYQAkDABhACQMAGEAJAwAYQAkDABhACQMAGEAJAwAYYOvoAMyH9z//a0ZHuIc7PvQFSbbmjg/dvK7yPfC57xwdAYB1wEwYAMAAShgAwABKGADAAEoYAMAAShgAwABKGADAAEoYAMAAc3mfsK/7md8eHeEeTrnt49mS5P23fXxd5bvuBU8dHQEANiUzYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAA2wdHQCYHzt37szCwkK2bduWXbt2jY4DsK6tOBNWVddV1dOr6gFrEQjYuBYWFrJ///4sLCyMjgKw7q3mcOT3JvnSJNdW1RVV9R1VVRPnAgCYayuWsO7e293PSfIVSV6e5CVJbq6q51XVF0wdEABgHq3qxPyqenCSFyV5QZL/k+SJST6W5PXTRQMAmF+rOicsyS8luTbJg7v7J7r7mu5+UZKbVvjdC6rqPVW1t6qefYQx31NV766qG6rq5cfyDwEAsNGs5urIJ3b33cpWVZ3d3e/t7scf6ZeqakuSy5I8Jsm+LJ5Ttru7371kzDlJfjbJ+d394ar6omP6pwAA2GBWczjyylWuO9R5SfZ2903dfXuSK5JcfMiYH05yWXd/OEm6+9ZV7BcAYMM74kxYVX1lkq9Kcr+qWjrjdd8kJ61i36cnuWXJ8r4k33DImK+Yfdabk2xJ8vPd/bpV7BsAYENb7nDkg5I8Lsn9k3zXkvUfz+IM1vH6/HOSPCrJGUneVFVf090fWTqoqi5JckmSPPCBDzxOHw0AMM4RS1h3/1GSP6qqb+zuvz6Gfe9PcuaS5TNm65bal+Sa7v5skvdW1d9lsZRde0iWy5NcniTbt2/vY8gCALCuLHc4cmd370ryfVW149Dt3f0TK+z72iTnVNXZWSxfT0ryfYeM+cMkO5K8tKpOzeLhyWWvuAQAmAfLHY68cfa651h23N13VNWlSa7O4vleL+nuG6rq+Un2dPfu2bZvr6p3J7kzyc909weP5fNgszn/V88fHeEeTvzIiTkhJ+SWj9yyrvK9+cffPDoCwD0sdzjyNbPXlx3rzrv7qiRXHbLuuUved5JnzX4AADaN5Q5HvibJEc+/6u6LJkkEALAJLHc48oWz18cn2Zbkd2fLO5L805ShAADm3XKHI9+YJFX1ou7evmTTa6rqmM4TAwBg0WoeW3RyVf2bg48uml3tePK0seBzd+pJdyW5Y/YKwEa1c+fOLCwsZNu2bdm1a9foOMfNakrYM5O8oapuSlJJvizJj0yaCo6Dn37wR1YeBMC6t7CwkP37D73V6Ma3Ygnr7tfNHrT9lbNVf9vd/zxtLACA+bbc1ZHf2t2vP+S5kUny5VWV7n7VxNkAAObWcjNhj0zy+tz9uZEHdRIlDADgGC13deTPzV6ftnZxAAA2hxNWGlBV/62q7r9k+QFV9V+mjQVsRH2fzl0n35W+zxHv8wzAzIolLMmF3f0vl5l194eTPHa6SMBG9dnzP5vbH3N7Pnv+Z0dHAVj3VlPCtlTV5x1cqKp7J/m8ZcYDALCC1dwn7PeS/FlVvXS2/LQkx/xQbwAAVnefsF+sqnckefRs1S9099XTxgIAmG+rmQlLd782yWsnzgLAUZrXx7nAZrCaqyMfXlXXVtUnqur2qrqzqj62FuEAWN7Bx7ksLCyMjgIcpdWcmP9rSXYk+fsk907yQ0kumzIUAMC8W00JS3fvTbKlu+/s7pcmuWDaWAAA820154R9qqpOTHJ9Ve1K8oGssrwBAHB4qylTT5mNuzTJJ5OcmeS7pwwFADDvVnOLiptnbz+T5HnTxgEA2BwcVgQAGEAJAwAYYNUlrKruM2UQAIDNZMVzwqrqm5K8OMnnJ3lgVX1tkh/p7h+bOhzAevLGRzxydIR7+PTWLUlVPr1v37rK98g3vXF0BFj3VjMT9ktJviPJB5Oku9+e5BFThgIAmHervVnrLYesunOCLAAAm8ZqbtZ6y+yQZFfVvZI8I8mN08YCAJhvq5kJ+9EkT09yepL9SR4yWwYA4Bit5mattyX5j2uQBQBg0zhiCauqX03SR9re3T8xSSIAgE1guZmwPWuWAgBgkzliCevul61lEACAzWS5w5G/3N0/WVWvyWEOS3b3RZMmAwCYY8sdjvyd2esL1yIIADDef33yE0ZHuIcP3frRxdeFD6yrfM/53Ss/p99f7nDkd
2021-03-21 22:43:47 +01:00
"text/plain": [
"<Figure size 720x432 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "w5xmkUgGzdxs"
},
"source": [
"## 3.3. Cały zbiór"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
},
"id": "thGHHVJXzeGe",
"outputId": "a1bbe5c6-3aef-4a70-82ec-adc2b9d6daf5"
2021-03-21 22:43:47 +01:00
},
"source": [
"wine"
],
"execution_count": 18,
2021-03-21 22:43:47 +01:00
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7.8</td>\n",
" <td>0.880</td>\n",
" <td>0.00</td>\n",
" <td>2.6</td>\n",
" <td>0.098</td>\n",
" <td>25.0</td>\n",
" <td>67.0</td>\n",
" <td>0.99680</td>\n",
" <td>3.20</td>\n",
" <td>0.68</td>\n",
" <td>9.8</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7.8</td>\n",
" <td>0.760</td>\n",
" <td>0.04</td>\n",
" <td>2.3</td>\n",
" <td>0.092</td>\n",
" <td>15.0</td>\n",
" <td>54.0</td>\n",
" <td>0.99700</td>\n",
" <td>3.26</td>\n",
" <td>0.65</td>\n",
" <td>9.8</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.2</td>\n",
" <td>0.280</td>\n",
" <td>0.56</td>\n",
" <td>1.9</td>\n",
" <td>0.075</td>\n",
" <td>17.0</td>\n",
" <td>60.0</td>\n",
" <td>0.99800</td>\n",
" <td>3.16</td>\n",
" <td>0.58</td>\n",
" <td>9.8</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1594</th>\n",
" <td>6.2</td>\n",
" <td>0.600</td>\n",
" <td>0.08</td>\n",
" <td>2.0</td>\n",
" <td>0.090</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99490</td>\n",
" <td>3.45</td>\n",
" <td>0.58</td>\n",
" <td>10.5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1595</th>\n",
" <td>5.9</td>\n",
" <td>0.550</td>\n",
" <td>0.10</td>\n",
" <td>2.2</td>\n",
" <td>0.062</td>\n",
" <td>39.0</td>\n",
" <td>51.0</td>\n",
" <td>0.99512</td>\n",
" <td>3.52</td>\n",
" <td>0.76</td>\n",
" <td>11.2</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1596</th>\n",
" <td>6.3</td>\n",
" <td>0.510</td>\n",
" <td>0.13</td>\n",
" <td>2.3</td>\n",
" <td>0.076</td>\n",
" <td>29.0</td>\n",
" <td>40.0</td>\n",
" <td>0.99574</td>\n",
" <td>3.42</td>\n",
" <td>0.75</td>\n",
" <td>11.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1597</th>\n",
" <td>5.9</td>\n",
" <td>0.645</td>\n",
" <td>0.12</td>\n",
" <td>2.0</td>\n",
" <td>0.075</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99547</td>\n",
" <td>3.57</td>\n",
" <td>0.71</td>\n",
" <td>10.2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1598</th>\n",
" <td>6.0</td>\n",
" <td>0.310</td>\n",
" <td>0.47</td>\n",
" <td>3.6</td>\n",
" <td>0.067</td>\n",
" <td>18.0</td>\n",
" <td>42.0</td>\n",
" <td>0.99549</td>\n",
" <td>3.39</td>\n",
" <td>0.66</td>\n",
" <td>11.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1599 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid ... sulphates alcohol quality\n",
"0 7.4 0.700 0.00 ... 0.56 9.4 5\n",
"1 7.8 0.880 0.00 ... 0.68 9.8 5\n",
"2 7.8 0.760 0.04 ... 0.65 9.8 5\n",
"3 11.2 0.280 0.56 ... 0.58 9.8 6\n",
"4 7.4 0.700 0.00 ... 0.56 9.4 5\n",
"... ... ... ... ... ... ... ...\n",
"1594 6.2 0.600 0.08 ... 0.58 10.5 5\n",
"1595 5.9 0.550 0.10 ... 0.76 11.2 6\n",
"1596 6.3 0.510 0.13 ... 0.75 11.0 6\n",
"1597 5.9 0.645 0.12 ... 0.71 10.2 5\n",
"1598 6.0 0.310 0.47 ... 0.66 11.0 6\n",
"\n",
"[1599 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 18
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Ua_ctPpVzeKJ",
"outputId": "da95e47b-9e44-42e0-efc0-66631dba99f1"
},
"source": [
"wine[\"quality\"].value_counts()"
],
"execution_count": 19,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"5 681\n",
"6 638\n",
"7 199\n",
"4 53\n",
"8 18\n",
"3 10\n",
"Name: quality, dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 19
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 297
},
"id": "-06v1i7XzeOz",
"outputId": "b0da7e9b-98aa-4af6-8131-359a54c2ac69"
},
"source": [
"wine.describe(include='all')"
],
"execution_count": 20,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>8.319637</td>\n",
" <td>0.527821</td>\n",
" <td>0.270976</td>\n",
" <td>2.538806</td>\n",
" <td>0.087467</td>\n",
" <td>15.874922</td>\n",
" <td>46.467792</td>\n",
" <td>0.996747</td>\n",
" <td>3.311113</td>\n",
" <td>0.658149</td>\n",
" <td>10.422983</td>\n",
" <td>5.636023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.741096</td>\n",
" <td>0.179060</td>\n",
" <td>0.194801</td>\n",
" <td>1.409928</td>\n",
" <td>0.047065</td>\n",
" <td>10.460157</td>\n",
" <td>32.895324</td>\n",
" <td>0.001887</td>\n",
" <td>0.154386</td>\n",
" <td>0.169507</td>\n",
" <td>1.065668</td>\n",
" <td>0.807569</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>4.600000</td>\n",
" <td>0.120000</td>\n",
" <td>0.000000</td>\n",
" <td>0.900000</td>\n",
" <td>0.012000</td>\n",
" <td>1.000000</td>\n",
" <td>6.000000</td>\n",
" <td>0.990070</td>\n",
" <td>2.740000</td>\n",
" <td>0.330000</td>\n",
" <td>8.400000</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>7.100000</td>\n",
" <td>0.390000</td>\n",
" <td>0.090000</td>\n",
" <td>1.900000</td>\n",
" <td>0.070000</td>\n",
" <td>7.000000</td>\n",
" <td>22.000000</td>\n",
" <td>0.995600</td>\n",
" <td>3.210000</td>\n",
" <td>0.550000</td>\n",
" <td>9.500000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>7.900000</td>\n",
" <td>0.520000</td>\n",
" <td>0.260000</td>\n",
" <td>2.200000</td>\n",
" <td>0.079000</td>\n",
" <td>14.000000</td>\n",
" <td>38.000000</td>\n",
" <td>0.996750</td>\n",
" <td>3.310000</td>\n",
" <td>0.620000</td>\n",
" <td>10.200000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>9.200000</td>\n",
" <td>0.640000</td>\n",
" <td>0.420000</td>\n",
" <td>2.600000</td>\n",
" <td>0.090000</td>\n",
" <td>21.000000</td>\n",
" <td>62.000000</td>\n",
" <td>0.997835</td>\n",
" <td>3.400000</td>\n",
" <td>0.730000</td>\n",
" <td>11.100000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>15.900000</td>\n",
" <td>1.580000</td>\n",
" <td>1.000000</td>\n",
" <td>15.500000</td>\n",
" <td>0.611000</td>\n",
" <td>72.000000</td>\n",
" <td>289.000000</td>\n",
" <td>1.003690</td>\n",
" <td>4.010000</td>\n",
" <td>2.000000</td>\n",
" <td>14.900000</td>\n",
" <td>8.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity ... alcohol quality\n",
"count 1599.000000 1599.000000 ... 1599.000000 1599.000000\n",
"mean 8.319637 0.527821 ... 10.422983 5.636023\n",
"std 1.741096 0.179060 ... 1.065668 0.807569\n",
"min 4.600000 0.120000 ... 8.400000 3.000000\n",
"25% 7.100000 0.390000 ... 9.500000 5.000000\n",
"50% 7.900000 0.520000 ... 10.200000 6.000000\n",
"75% 9.200000 0.640000 ... 11.100000 6.000000\n",
"max 15.900000 1.580000 ... 14.900000 8.000000\n",
"\n",
"[8 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 20
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "t8Y53QPyu_fO"
},
"source": [
"Testowy Wykres (quality, volatile acidity)"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 405
},
"id": "hEe3BYcJaKnF",
"outputId": "cd03275d-d09e-4517-ef76-22b40d9ffa9e"
},
"source": [
"fig = plt.figure(figsize = (10,6))\n",
"sns.barplot(x = 'quality', y = 'volatile acidity', data = wine)"
],
"execution_count": 21,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f2504262bd0>"
]
},
"metadata": {
"tags": []
},
"execution_count": 21
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmEAAAFzCAYAAAB2A95GAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAYzUlEQVR4nO3de/TndV0n8OeLQQJZvNTMNgUUbEu2VKY2sRalnswCM9g1LWm18pjUrpSXcg4e91ja7p7jqFtbYi1rmV2UiNXCdowu3sqSGAQviNaEIjP5i0HzriHw2j9+36kfw1y+g/P5vX/z/T0e5/zO9/u5/L7fJ5/Dgefv/f5cqrsDAMDqOmZ0AACA9UgJAwAYQAkDABhACQMAGEAJAwAYQAkDABjg2NEBDtfGjRv7tNNOGx0DAOCQrr322tu6e9P+th11Jey0007Ljh07RscAADikqrr5QNtMRwIADKCEAQAMoIQBAAyghAEADKCEAQAMoIQBAAyghAEADKCEAQAMoIQBAAyghAEADKCEAQAMoIQBAAxw1D3A+2i1devWLC0tZfPmzdm2bdvoOADAYErYKllaWsru3btHxwAA1gjTkQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAANMVsKq6ter6taqeu8BtldV/VJV7ayqd1fVw6bKAgCw1kw5EvYbSc45yPZzk5wx+7kwya9MmAUAYE2ZrIR199uSfOwgu5yf5Dd72TuSPKCqvmKqPAAAa8nIc8JOTnLLiuVds3UAAAvvqDgxv6ourKodVbVjz549o+MAAHzRRpaw3UlOXbF8ymzdPXT3pd29pbu3bNq0aVXCAQBMaWQJuzLJD8+uknx4kk9090cG5gEAWDXHTvXBVfXaJI9KsrGqdiX52ST3SZLu/tUk25M8NsnOJJ9N8tSpsgAArDWTlbDuvuAQ2zvJM6b6fgCAteyoODEfAGDRKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADHDs6wBS++bm/OTrCPZx026eyIcmHb/vUmsp37Ut+eHQEAFiXjIQBAAyghAEADKCEAQAMoIQBAAyghAEADKCEAQAMoIQBAAyghAEADKCEAQAMoIQBAAyghAEADKCEAQAMoIQBAAyghAEADKCEAQAMoIQBAAyghAEADKCEAQAMoIQBAAyghAEADDBpCauqc6rqA1W1s6ou3s/2r6qqN1fVdVX17qp67JR5AADWislKWFVtSHJJknOTnJnkgqo6c5/d/muSy7v7oUmelOQVU+UBAFhLphwJOyvJzu6+qbtvT3JZkvP32aeT3G/2/v5J/n7CPAAAa8aUJezkJLesWN41W7fSzyV5clXtSrI9yU/u74Oq6sKq2lFVO/bs2TNFVgCAVTX6xPwLkvxGd5+S5LFJfquq7pGpuy/t7i3dvWXTpk2rHhIA4EibsoTtTnLqiuVTZutWelqSy5Oku/8qyfFJNk6YCQBgTZiyhF2T5IyqOr2qjsvyifdX7rPPh5M8Okmq6t9luYSZbwQAFt5kJay770hyUZKrktyY5asgb6iqF1XVebPdfjrJ06vqXUlem+RHu7unygQAsFYcO+WHd/f2LJ9wv3LdC1a8f1+Ss6fMAACwFo0+MR8AYF1SwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAaY9D5hMNLWrVuztLSUzZs3Z9u2baPjAMDdKGEsrKWlpezeve/jSgFgbTAdCQAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwwLGjA6wXdx134t1eF82HX/SNoyPcwx0f+9Ikx+aOj928pvJ91QveMzoCAGuAErZKPnPGd4+OAACsIYecjqyqa6vqGVX1wNUIBACwHsxzTtgPJvnKJNdU1WVV9T1VVRPnAgBYaIcsYd29s7ufn+Rrk7wmya8nubmqXlhVXzp1QACARTTX1ZFV9eAkL0vykiT/N8kTk3wyyZumiwYAsLgOeWJ+VV2b5ONJfi3Jxd39T7NNV1fV2VOGAwBYVPNcHfnE7r5p5YqqOr27P9jdj58oFwDAQptnOvKKOdcBADCnA46EVdXXJfn6JPevqpUjXvdLcvzUwQAAFtnBpiMflORxSR6Q5PtWrP9UkqdPGQoAYNEdsIR19x8k+YOq+tbu/qtVzAQAsPAONh25tbu3Jfmhqrpg3+3d/VOTJgMAWGAHm468cfa6YzWCAACsJwebjnzD7PXVqxcHAGB9ONh05BuS9IG2d/d5kyQCAFgHDjYd+dLZ6+OTbE7y27PlC5L8w5ShAAAW3cGmI9+aJFX1su7esmLTG6rKeWIAAF+Eee6Yf2JV/Zu9C1V1epITp4sER8bG4+/Kl59wRzYef9foKABwD/M8O/LZSd5SVTclqSRfneTHJ00FR8DPPPjjoyMAwAEdsoR19x9V1RlJvm626v3d/U/TxgIAWGwHuzryO7v7Tfs8NzJJvqaq0t2vmzgbAMDCOthI2COTvCl3f27kXp1ECQMAuJcOdnXkz85en7p6cQAA1odDXh1ZVf+jqh6wYvmBVfXf5vnwqjqnqj5QVTur6uID7PMDVfW+qrqhql4zf3QAgKPXPLeoOLe7//kys+7+xySPPdQvVdWGJJckOTfJmUkuqKoz99nnjCTPS3J2d399kmcdRnYAgKPWPCVsQ1V9yd6FqjohyZccZP+9zkqys7tv6u7bk1yW5Px99nl6kktmxS7dfet8sQEAjm7zlLDfSfJnVfW0qnpakj9JMs9DvU9OcsuK5V2zdSt9bZKvraq3V9U7quqc/X1QVV1YVTuqaseePXvm+GoAgLVtnvuEvbiq3p3k0bNVP9/dVx3B7z8jyaOSnJLkbVX1jSunP2cZLk1yaZJs2bLlgA8VBwA4Wsxzx/x09xuTvPEwP3t3klNXLJ8yW7fSriRXd/cXknywqv4my6XsmsP8LmAN2Lp1a5aWlrJ58+Zs27ZtdByANW2eqyMfXlXXVNWnq+r2qrqzqj45x2dfk+SMqjq9qo5L8qQkV+6zz+9neRQsVbUxy9OTNx3WPwGwZiwtLWX37t1ZWloaHQVgzZvnnLCXJ7kgyd8mOSHJj2X5qseD6u47klyU5KokNya5vLtvqKoXVdV5s92uSvLRqnpfkjcneW53f/Tw/zEAAI4u805H7qyqDd19Z5JXVdV1Wb61xKF+b3uS7fuse8GK953kObMfAIB1Y54S9tnZdOL1VbUtyUcy3wgaAAAHME8Je0qWS9dFSZ6d5ZPtv3/KUMChnf3LZ4+OcA/Hffy4HJNjcsvHb1lT+d7+k28fHQHgHua5RcXNs7efT/LCaeMAAKwPphUBAAZQwgAABpi7hFXVfacMAhz9+r6du068K31fD7YAOJR5btb6bbP7eL1/tvxNVfWKyZMBR50vnP2F3P6Y2/OFs78wOgrAmjfPSNgvJPmeJB9Nku5+V5JHTBkKAGDRzTUd2d237LPqzgmyAACsG/PcJ+yWqvq2JF1V90nyzCw/hggAgHtpnpGwn0jyjCQnJ9md5CGzZQAA7qV5btZ6W5L/tApZAADWjQOWsKr65SQHvM68u39qkkQAAOvAwUbCdqxaCgCAdeaAJay7X72aQQAA1pODTUf+Ync/q6rekP1MS3b3eZMmAwBYYAebjvyt2etLVyMIAMB6crDpyGtnbx/S3f9r5baqemaSt04ZDABgkc1zn7Af2c+6Hz3COQAA1pWDnRN2QZIfSnJ6VV25YtNJST42dTAADm3r1q1ZWlrK5s2bs23bttFxgMNwsHPC/
"text/plain": [
"<Figure size 720x432 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "M4hd_N8EgH57"
},
"source": [
"## 3.4. zbiór Dev (bash)"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
},
"id": "XT3hrfW3gOxH",
"outputId": "98ef6303-7f2b-4341-e6ad-c19af8750ccc"
},
"source": [
"wine_dev_bash"
],
"execution_count": 22,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>8.0</td>\n",
" <td>0.705</td>\n",
" <td>0.05</td>\n",
" <td>1.9</td>\n",
" <td>0.074</td>\n",
" <td>8.0</td>\n",
" <td>19.0</td>\n",
" <td>0.99620</td>\n",
" <td>3.34</td>\n",
" <td>0.95</td>\n",
" <td>10.5</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7.6</td>\n",
" <td>0.665</td>\n",
" <td>0.10</td>\n",
" <td>1.5</td>\n",
" <td>0.066</td>\n",
" <td>27.0</td>\n",
" <td>55.0</td>\n",
" <td>0.99655</td>\n",
" <td>3.39</td>\n",
" <td>0.51</td>\n",
" <td>9.3</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7.8</td>\n",
" <td>0.550</td>\n",
" <td>0.35</td>\n",
" <td>2.2</td>\n",
" <td>0.074</td>\n",
" <td>21.0</td>\n",
" <td>66.0</td>\n",
" <td>0.99740</td>\n",
" <td>3.25</td>\n",
" <td>0.56</td>\n",
" <td>9.2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>13.0</td>\n",
" <td>0.320</td>\n",
" <td>0.65</td>\n",
" <td>2.6</td>\n",
" <td>0.093</td>\n",
" <td>15.0</td>\n",
" <td>47.0</td>\n",
" <td>0.99960</td>\n",
" <td>3.05</td>\n",
" <td>0.61</td>\n",
" <td>10.6</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>8.8</td>\n",
" <td>0.610</td>\n",
" <td>0.30</td>\n",
" <td>2.8</td>\n",
" <td>0.088</td>\n",
" <td>17.0</td>\n",
" <td>46.0</td>\n",
" <td>0.99760</td>\n",
" <td>3.26</td>\n",
" <td>0.51</td>\n",
" <td>9.3</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>261</th>\n",
" <td>13.8</td>\n",
" <td>0.490</td>\n",
" <td>0.67</td>\n",
" <td>3.0</td>\n",
" <td>0.093</td>\n",
" <td>6.0</td>\n",
" <td>15.0</td>\n",
" <td>0.99860</td>\n",
" <td>3.02</td>\n",
" <td>0.93</td>\n",
" <td>12.0</td>\n",
" <td>6</td>\n",
2021-03-21 22:43:47 +01:00
" </tr>\n",
" <tr>\n",
" <th>262</th>\n",
" <td>7.1</td>\n",
" <td>0.750</td>\n",
" <td>0.01</td>\n",
2021-03-21 22:43:47 +01:00
" <td>2.2</td>\n",
" <td>0.059</td>\n",
" <td>11.0</td>\n",
" <td>18.0</td>\n",
" <td>0.99242</td>\n",
" <td>3.39</td>\n",
" <td>0.40</td>\n",
" <td>12.8</td>\n",
2021-03-21 22:43:47 +01:00
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>263</th>\n",
" <td>9.9</td>\n",
" <td>0.350</td>\n",
" <td>0.41</td>\n",
2021-03-21 22:43:47 +01:00
" <td>2.3</td>\n",
" <td>0.083</td>\n",
2021-03-21 22:43:47 +01:00
" <td>11.0</td>\n",
" <td>61.0</td>\n",
" <td>0.99820</td>\n",
" <td>3.21</td>\n",
" <td>0.50</td>\n",
" <td>9.5</td>\n",
" <td>5</td>\n",
2021-03-21 22:43:47 +01:00
" </tr>\n",
" <tr>\n",
" <th>264</th>\n",
" <td>6.5</td>\n",
" <td>0.520</td>\n",
" <td>0.11</td>\n",
" <td>1.8</td>\n",
" <td>0.073</td>\n",
" <td>13.0</td>\n",
" <td>38.0</td>\n",
" <td>0.99550</td>\n",
" <td>3.34</td>\n",
" <td>0.52</td>\n",
" <td>9.3</td>\n",
2021-03-21 22:43:47 +01:00
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>265</th>\n",
" <td>6.8</td>\n",
" <td>0.670</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.080</td>\n",
" <td>22.0</td>\n",
" <td>39.0</td>\n",
" <td>0.99701</td>\n",
" <td>3.40</td>\n",
" <td>0.74</td>\n",
" <td>9.7</td>\n",
" <td>5</td>\n",
2021-03-21 22:43:47 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>266 rows × 12 columns</p>\n",
2021-03-21 22:43:47 +01:00
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid ... sulphates alcohol quality\n",
"0 8.0 0.705 0.05 ... 0.95 10.5 6\n",
"1 7.6 0.665 0.10 ... 0.51 9.3 5\n",
"2 7.8 0.550 0.35 ... 0.56 9.2 5\n",
"3 13.0 0.320 0.65 ... 0.61 10.6 5\n",
"4 8.8 0.610 0.30 ... 0.51 9.3 4\n",
".. ... ... ... ... ... ... ...\n",
"261 13.8 0.490 0.67 ... 0.93 12.0 6\n",
"262 7.1 0.750 0.01 ... 0.40 12.8 6\n",
"263 9.9 0.350 0.41 ... 0.50 9.5 5\n",
"264 6.5 0.520 0.11 ... 0.52 9.3 5\n",
"265 6.8 0.670 0.00 ... 0.74 9.7 5\n",
2021-03-21 22:43:47 +01:00
"\n",
"[266 rows x 12 columns]"
2021-03-21 22:43:47 +01:00
]
},
"metadata": {
"tags": []
},
"execution_count": 22
2021-03-21 22:43:47 +01:00
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "lhRktuxPgOsC",
"outputId": "612e6163-0b66-4495-fdc1-2a0813efe37e"
2021-03-21 22:43:47 +01:00
},
"source": [
"wine_dev_bash[\"quality\"].value_counts()"
2021-03-21 22:43:47 +01:00
],
"execution_count": 23,
2021-03-21 22:43:47 +01:00
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"5 115\n",
"6 113\n",
"7 24\n",
"4 9\n",
"8 3\n",
"3 2\n",
2021-03-21 22:43:47 +01:00
"Name: quality, dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 23
2021-03-21 22:43:47 +01:00
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 297
},
"id": "FmOQIZMSgOnK",
"outputId": "a7f4b4e8-36a0-4a07-cce4-98caa71ff7d0"
2021-03-21 22:43:47 +01:00
},
"source": [
"wine_dev_bash.describe(include='all')"
2021-03-21 22:43:47 +01:00
],
"execution_count": 24,
2021-03-21 22:43:47 +01:00
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
" <td>266.000000</td>\n",
2021-03-21 22:43:47 +01:00
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>8.273684</td>\n",
" <td>0.540075</td>\n",
" <td>0.253008</td>\n",
" <td>2.523308</td>\n",
" <td>0.088620</td>\n",
" <td>15.398496</td>\n",
" <td>43.973684</td>\n",
" <td>0.996749</td>\n",
" <td>3.317895</td>\n",
" <td>0.649774</td>\n",
" <td>10.453321</td>\n",
" <td>5.590226</td>\n",
2021-03-21 22:43:47 +01:00
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.720592</td>\n",
" <td>0.193856</td>\n",
" <td>0.190330</td>\n",
" <td>1.380498</td>\n",
" <td>0.055825</td>\n",
" <td>10.002219</td>\n",
" <td>30.518712</td>\n",
" <td>0.001930</td>\n",
" <td>0.152003</td>\n",
" <td>0.176930</td>\n",
" <td>1.058010</td>\n",
" <td>0.777841</td>\n",
2021-03-21 22:43:47 +01:00
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>4.900000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>0.120000</td>\n",
" <td>0.000000</td>\n",
" <td>1.300000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>0.012000</td>\n",
" <td>1.000000</td>\n",
" <td>8.000000</td>\n",
" <td>0.990640</td>\n",
" <td>2.870000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>0.330000</td>\n",
" <td>8.500000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>7.100000</td>\n",
" <td>0.396250</td>\n",
" <td>0.080000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>1.900000</td>\n",
" <td>0.068250</td>\n",
" <td>8.000000</td>\n",
" <td>20.000000</td>\n",
" <td>0.995525</td>\n",
2021-03-21 22:43:47 +01:00
" <td>3.210000</td>\n",
" <td>0.542500</td>\n",
2021-03-21 22:43:47 +01:00
" <td>9.500000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>7.900000</td>\n",
" <td>0.520000</td>\n",
" <td>0.240000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>2.200000</td>\n",
" <td>0.079000</td>\n",
" <td>13.000000</td>\n",
" <td>37.000000</td>\n",
" <td>0.996720</td>\n",
" <td>3.320000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>0.620000</td>\n",
" <td>10.200000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>9.200000</td>\n",
" <td>0.648750</td>\n",
" <td>0.390000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>2.600000</td>\n",
" <td>0.090000</td>\n",
" <td>20.000000</td>\n",
" <td>60.000000</td>\n",
" <td>0.997877</td>\n",
" <td>3.430000</td>\n",
" <td>0.720000</td>\n",
" <td>11.200000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>15.600000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>1.580000</td>\n",
" <td>0.760000</td>\n",
" <td>13.800000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>0.611000</td>\n",
" <td>66.000000</td>\n",
" <td>141.000000</td>\n",
" <td>1.003150</td>\n",
" <td>3.720000</td>\n",
" <td>1.950000</td>\n",
" <td>14.000000</td>\n",
2021-03-21 22:43:47 +01:00
" <td>8.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity ... alcohol quality\n",
"count 266.000000 266.000000 ... 266.000000 266.000000\n",
"mean 8.273684 0.540075 ... 10.453321 5.590226\n",
"std 1.720592 0.193856 ... 1.058010 0.777841\n",
"min 4.900000 0.120000 ... 8.500000 3.000000\n",
"25% 7.100000 0.396250 ... 9.500000 5.000000\n",
"50% 7.900000 0.520000 ... 10.200000 6.000000\n",
"75% 9.200000 0.648750 ... 11.200000 6.000000\n",
"max 15.600000 1.580000 ... 14.000000 8.000000\n",
2021-03-21 22:43:47 +01:00
"\n",
"[8 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 24
2021-03-21 22:43:47 +01:00
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 405
},
"id": "j3Z6noeZgOjC",
"outputId": "de24703b-50d4-4059-d5e6-ddc0c0f3356c"
2021-03-21 22:43:47 +01:00
},
"source": [
"fig = plt.figure(figsize = (10,6))\n",
"sns.barplot(x = 'quality', y = 'volatile acidity', data = wine_dev_bash)"
2021-03-21 22:43:47 +01:00
],
"execution_count": 25,
2021-03-21 22:43:47 +01:00
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f2504166f50>"
2021-03-21 22:43:47 +01:00
]
},
"metadata": {
"tags": []
},
"execution_count": 25
2021-03-21 22:43:47 +01:00
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmEAAAFzCAYAAAB2A95GAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAa8klEQVR4nO3de7SddX3n8feHhIgwINZkiiVgaCdosd6PeMEqU7QGVJgqWmLFy1LTzhjrrWZwOYMVO7OWQUdbRZ2UIl5BpKjBieJUBawW5KCIXMRJUSApxwQQL3jBwHf+ODvtyTHnnB2S5/z22ef9WuusvZ/n+Z29P+zFgs/5Pc/+PakqJEmSNLv2ah1AkiRpPrKESZIkNWAJkyRJasASJkmS1IAlTJIkqQFLmCRJUgMLWwfYVYsXL65ly5a1jiFJkjSjK6+88raqWrKzY3OuhC1btozR0dHWMSRJkmaU5Kapjnk6UpIkqQFLmCRJUgOWMEmSpAYsYZIkSQ1YwiRJkhqwhEmSJDXQWQlLclaSLUmumWbM0UmuSnJtkku6yiJJkjRoupwJOxtYMdXBJAcC7wOOr6qHA8/vMIskSdJA6ayEVdWlwB3TDHkhcEFV3dwbv6WrLJIkSYOm5TVhhwMPTHJxkiuTvHiqgUlWJRlNMrp169ZZjChJktSNliVsIfA44FnAM4H/nuTwnQ2sqnVVNVJVI0uW7PT2S5IkSXNKy3tHbgJur6q7gLuSXAo8Cvhuw0ySJEmzomUJ+wzw3iQLgUXAE4B3NczTqTVr1jA2NsZBBx3E2rVrW8eRJEmNdVbCkpwDHA0sTrIJeAuwN0BVfaCqrk/yeeBq4F7gzKqacjmLuW5sbIzNmze3jiFJkgZEZyWsqlb2MeZ04PSuMkiSJA0qV8yXJElqwBImSZLUgCVMkiSpAUuYJElSA5YwSZKkBixhkiRJDVjCJEmSGrCESZIkNWAJkyRJasASJkmS1IAlTJIkqQFLmCRJUgOWMEmSpAYsYZIkSQ1YwiRJkhqwhEmSJDVgCZMkSWrAEiZJktSAJUySJKkBS5gkSVIDljBJkqQGLGGSJEkNWMIkSZIasIRJkiQ1YAmTJElqwBImSZLUgCVMkiSpAUuYJElSA5YwSZKkBixhkiRJDXRWwpKclWRLkmtmGPf4JNuSnNhVFkmSpEHT5UzY2cCK6QYkWQC8HfhChzkkSZIGTmclrKouBe6YYdirgb8HtnSVQ5IkaRA1uyYsycHAHwHvb5VBkiSplZYX5r8b+K9Vde9MA5OsSjKaZHTr1q2zEE2SJKlbCxu+9whwbhKAxcBxSbZV1acnD6yqdcA6gJGRkZrVlJIkSR1oVsKq6rDtz5OcDXx2ZwVMkiRpGHVWwpKcAxwNLE6yCXgLsDdAVX2gq/eVJEmaCzorYVW1chfGvrSrHJIkSYPIFfMlSZIasIRJkiQ1YAmTJElqwBImSZLUgCVMkiSpAUuYJElSA5YwSZKkBixhkiRJDVjCJEmSGrCESZIkNWAJkyRJasASJkmS1IAlTJIkqQFLmCRJUgMLWwfoyuPe+OHWEXaw/20/YQFw820/GahsV57+4tYRJEmal5wJkyRJasASJkmS1IAlTJIkqQFLmCRJUgOWMEmSpAYsYZIkSQ1YwiRJkhqwhEmSJDVgCZMkSWrAEiZJktSAJUySJKkBS5gkSVIDljBJkqQGLGGSJEkNWMIkSZIa6KyEJTkryZYk10xx/E+SXJ3k20m+luRRXWWRJEkaNF3OhJ0NrJjm+PeAp1XVI4C3Aes6zCJJkjRQFnb1wlV1aZJl0xz/2oTNy4ClXWWRJEkaNINyTdjLgc+1DiFJkjRbOpsJ61eS/8h4CXvKNGNWAasADj300FlKJkmS1J2mM2FJHgmcCZxQVbdPNa6q1lXVSFWNLFmyZPYCSpIkdaRZCUtyKHABcHJVfbdVDkmSpBY6Ox2Z5BzgaGBxkk3AW4C9AarqA8CpwIOA9yUB2FZVI13lkSRJGiRdfjty5QzHXwG8oqv3lyRJGmSD8u1ISZKkecUSJkmS1IAlTJIkqQFLmCRJUgOWMEmSpAYsYZIkSQ1YwiRJkhqwhEmSJDVgCZMkSWrAEiZJktSAJUySJKkBS5gkSVIDljBJkqQGLGGSJEkNWMIkSZIasIRJkiQ1YAmTJElqwBImSZLUgCVMkiSpAUuYJElSA5YwSZKkBixhkiRJDVjCJEmSGrCESZIkNWAJkyRJasASJkmS1IAlTJIkqQFLmCRJUgOWMEmSpAYsYZIkSQ1YwiRJkhrorIQlOSvJliTXTHE8Sf4mycYkVyd5bFdZBsG9i/bjnvsdwL2L9msdRZIkDYCFHb722cB7gQ9PcfxYYHnv5wnA+3uPQ+mu5X/YOoIkSRogM86EJbkyyauSPHBXXriqLgXumGbICcCHa9xlwIFJHrwr7yFJkjRX9XM68o+B3wKuSHJukmcmyR5474OBWyZsb+rt+zVJViUZTTK6devWPfDWkiRJbc1YwqpqY1W9GTgc+DhwFnBTkrcm+Y2uA/YyrKuqkaoaWbJkyWy8pSRJUqf6ujA/ySOBdwKnA38PPB/4MfCl3XjvzcAhE7aX9vZJkiQNvRkvzE9yJXAn8HfAKVX1y96hy5MctRvvvR5YneRcxi/I/1FV3bobrydJkjRn9PPtyOdX1Y0TdyQ5rKq+V1XPneqXkpwDHA0sTrIJeAuwN0BVfQDYABwHbAR+BrzsPv0TSFNYs2YNY2NjHHTQQaxdu7Z1HEmSdtBPCTsfmLyG1/nA46b7papaOcPxAl7Vx/tL98nY2BibN3uGW5I0mKYsYUkeBjwceECSiTNeBwD7dB1MkiRpmE03E/ZQ4NnAgcBzJuz/CfDKLkNJkiQNuylLWFV9BvhMkidV1T/NYiZJkqShN93pyDVVtRZ4YZJfu76rqv6802SSJElDbLrTkdf3HkdnI4gkSdJ8Mt3pyAt7jx+avTiSJEnzw3SnIy8EaqrjVXV8J4kkSZLmgelOR76j9/hc4CDgo73tlcAPugwlSZI07KY7HXkJQJJ3VtXIhEMXJvE6MUmSpN3Qzw2890vy29s3khwG7NddJEmSpOHXz22LXgdcnORGIMBDgD/tNJUkSdKQm7GEVdXnkywHHtbb9Z2q+mW3sSRJkobbdN+O/IOq+tKk+0YC/E4SquqCjrNJkiQNrelmwp4GfIkd7xu5XQGWMEmSpPtoum9HvqX3+LLZiyNJkjQ/zPjtyCT/M8mBE7YfmOSvuo0lSZI03PpZouLYqrpz+0ZV/RA4rrtIkiRJw6+fErYgyf22byS5P3C/acZLkiRpBv2sE/Yx4ItJPtjbfhngTb0lSZJ2Qz/rhL09ydXAMb1db6uqi7qNJUmSNNz6mQmjqj4HfK7jLJIkSfNGP9+OfGKSK5L8NMndSe5J8uPZCCdJkjSs+pkJey9wEvBJYAR4MXB4l6E0N9182iNaR9jBtjt+A1jItjtuGqhsh5767dYRJEkDoJ9vR1JVG4EFVXVPVX0QWNFtLEmSpOHWz0zYz5IsAq5Ksha4lT7LmyRJknaunzJ1cm/cauAu4BDgeV2GkiRJGnb9LFFxU+/pL4C3dhtHkiRpfvC0oiRJUgOWMEmSpAb6LmFJ9u0yiCRJ0nzSz2KtT05yHfCd3vajkryvnxdPsiLJDUk2JjllJ8cPTfLlJN9McnWS43b5n0CSJGkO6mcm7F3AM4HbAarqW8BTZ/qlJAuAM4BjgSOAlUmOmDTsvwHnVdVjGF8Qtq9yJ0mSNNf1u1jrLZN23dPHrx0JbKyqG6vqbuBc4ITJLw0c0Hv+AOBf+skjSZI01/WzWOstSZ4MVJK9gdcA1/fxewcDE8vbJuAJk8b8JfCFJK8G9gOe3sfrSpIkzXn9zIT9GfAqxkvVZuDRve09YSVwdlUtBY4DPpLk1zIlWZVkNMno1q1b99BbS5IktdPPYq23AX9yH157M+Or62+3tLdvopfTuw9lVf1Tkn2AxcCWSRnWAesARkZG6j5kkSRJGihTlrAk72H8mq2dqqo/n+G1rwCWJzmM8fJ1EvDCS
2021-03-21 22:43:47 +01:00
"text/plain": [
"<Figure size 720x432 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ftWOC-do2Pq-"
},
"source": [
"# 4. Normalizacja"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Wm0EM2hj4s6V"
},
"source": [
"Normalizacja kolumny 'quality' na wartości od 0 do 20. Nie jest ona konieczna ale została stworzona w celach demonstracyjnych"
]
},
{
"cell_type": "code",
"metadata": {
"id": "EkZQ6Hpy2Tj_"
},
"source": [
"wine[\"quality\"]=((wine[\"quality\"]-wine[\"quality\"].min())/(wine[\"quality\"].max()-wine[\"quality\"].min()))*20"
],
"execution_count": 26,
2021-03-21 22:43:47 +01:00
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
},
"id": "_bQgYfct3Tir",
"outputId": "8b50d411-b47b-4d4d-d3eb-606d7c134de0"
2021-03-21 22:43:47 +01:00
},
"source": [
"wine"
],
"execution_count": 27,
2021-03-21 22:43:47 +01:00
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7.8</td>\n",
" <td>0.880</td>\n",
" <td>0.00</td>\n",
" <td>2.6</td>\n",
" <td>0.098</td>\n",
" <td>25.0</td>\n",
" <td>67.0</td>\n",
" <td>0.99680</td>\n",
" <td>3.20</td>\n",
" <td>0.68</td>\n",
" <td>9.8</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7.8</td>\n",
" <td>0.760</td>\n",
" <td>0.04</td>\n",
" <td>2.3</td>\n",
" <td>0.092</td>\n",
" <td>15.0</td>\n",
" <td>54.0</td>\n",
" <td>0.99700</td>\n",
" <td>3.26</td>\n",
" <td>0.65</td>\n",
" <td>9.8</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.2</td>\n",
" <td>0.280</td>\n",
" <td>0.56</td>\n",
" <td>1.9</td>\n",
" <td>0.075</td>\n",
" <td>17.0</td>\n",
" <td>60.0</td>\n",
" <td>0.99800</td>\n",
" <td>3.16</td>\n",
" <td>0.58</td>\n",
" <td>9.8</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1594</th>\n",
" <td>6.2</td>\n",
" <td>0.600</td>\n",
" <td>0.08</td>\n",
" <td>2.0</td>\n",
" <td>0.090</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99490</td>\n",
" <td>3.45</td>\n",
" <td>0.58</td>\n",
" <td>10.5</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1595</th>\n",
" <td>5.9</td>\n",
" <td>0.550</td>\n",
" <td>0.10</td>\n",
" <td>2.2</td>\n",
" <td>0.062</td>\n",
" <td>39.0</td>\n",
" <td>51.0</td>\n",
" <td>0.99512</td>\n",
" <td>3.52</td>\n",
" <td>0.76</td>\n",
" <td>11.2</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1596</th>\n",
" <td>6.3</td>\n",
" <td>0.510</td>\n",
" <td>0.13</td>\n",
" <td>2.3</td>\n",
" <td>0.076</td>\n",
" <td>29.0</td>\n",
" <td>40.0</td>\n",
" <td>0.99574</td>\n",
" <td>3.42</td>\n",
" <td>0.75</td>\n",
" <td>11.0</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1597</th>\n",
" <td>5.9</td>\n",
" <td>0.645</td>\n",
" <td>0.12</td>\n",
" <td>2.0</td>\n",
" <td>0.075</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99547</td>\n",
" <td>3.57</td>\n",
" <td>0.71</td>\n",
" <td>10.2</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1598</th>\n",
" <td>6.0</td>\n",
" <td>0.310</td>\n",
" <td>0.47</td>\n",
" <td>3.6</td>\n",
" <td>0.067</td>\n",
" <td>18.0</td>\n",
" <td>42.0</td>\n",
" <td>0.99549</td>\n",
" <td>3.39</td>\n",
" <td>0.66</td>\n",
" <td>11.0</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1599 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid ... sulphates alcohol quality\n",
"0 7.4 0.700 0.00 ... 0.56 9.4 8.0\n",
"1 7.8 0.880 0.00 ... 0.68 9.8 8.0\n",
"2 7.8 0.760 0.04 ... 0.65 9.8 8.0\n",
"3 11.2 0.280 0.56 ... 0.58 9.8 12.0\n",
"4 7.4 0.700 0.00 ... 0.56 9.4 8.0\n",
"... ... ... ... ... ... ... ...\n",
"1594 6.2 0.600 0.08 ... 0.58 10.5 8.0\n",
"1595 5.9 0.550 0.10 ... 0.76 11.2 12.0\n",
"1596 6.3 0.510 0.13 ... 0.75 11.0 12.0\n",
"1597 5.9 0.645 0.12 ... 0.71 10.2 8.0\n",
"1598 6.0 0.310 0.47 ... 0.66 11.0 12.0\n",
"\n",
"[1599 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 27
2021-03-21 22:43:47 +01:00
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "I1AwZoyN4RHs",
"outputId": "15a7bca4-8bbe-4749-80b8-5eede667aa07"
2021-03-21 22:43:47 +01:00
},
"source": [
"wine[\"quality\"].value_counts()"
],
"execution_count": 28,
2021-03-21 22:43:47 +01:00
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"8.0 681\n",
"12.0 638\n",
"16.0 199\n",
"4.0 53\n",
"20.0 18\n",
"0.0 10\n",
"Name: quality, dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 28
2021-03-21 22:43:47 +01:00
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "XBU3z_of414w"
},
"source": [
"# 5. Usuwanie artefaktów"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "KCstRwQp5-X1"
},
"source": [
"### Całe szczęscie nie ma w moim zbiorze ani pustych linijek, ani przykładów z niepoprawnymi wartościami"
]
},
{
"cell_type": "code",
"metadata": {
"id": "EJqksTP545UV"
},
"source": [
"# Znajdźmy pustą linijkę:\n",
"! grep -P \"^$\" -n winequality-red.csv"
],
"execution_count": 29,
2021-03-21 22:43:47 +01:00
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "8DuoPn3Fa0kP"
},
"source": [
"Szukanie wartości \"NA\": https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.dropna.html"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "REYF2AWjz_lr",
"outputId": "01c5cd70-a37e-433f-bde3-d0c855c96c2e"
2021-03-21 22:43:47 +01:00
},
"source": [
"wine.isnull().sum()"
],
"execution_count": 30,
2021-03-21 22:43:47 +01:00
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"fixed acidity 0\n",
"volatile acidity 0\n",
"citric acid 0\n",
"residual sugar 0\n",
"chlorides 0\n",
"free sulfur dioxide 0\n",
"total sulfur dioxide 0\n",
"density 0\n",
"pH 0\n",
"sulphates 0\n",
"alcohol 0\n",
"quality 0\n",
"dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 30
2021-03-21 22:43:47 +01:00
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "RbkqNj9_akcU"
},
"source": [
"wine.dropna(inplace=True) "
],
"execution_count": 31,
2021-03-21 22:43:47 +01:00
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
},
"id": "4WylJo9malyG",
"outputId": "95a9b3f4-a7f5-4f61-fdbe-918dbca2d72c"
2021-03-21 22:43:47 +01:00
},
"source": [
"wine"
],
"execution_count": 32,
2021-03-21 22:43:47 +01:00
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7.8</td>\n",
" <td>0.880</td>\n",
" <td>0.00</td>\n",
" <td>2.6</td>\n",
" <td>0.098</td>\n",
" <td>25.0</td>\n",
" <td>67.0</td>\n",
" <td>0.99680</td>\n",
" <td>3.20</td>\n",
" <td>0.68</td>\n",
" <td>9.8</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7.8</td>\n",
" <td>0.760</td>\n",
" <td>0.04</td>\n",
" <td>2.3</td>\n",
" <td>0.092</td>\n",
" <td>15.0</td>\n",
" <td>54.0</td>\n",
" <td>0.99700</td>\n",
" <td>3.26</td>\n",
" <td>0.65</td>\n",
" <td>9.8</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.2</td>\n",
" <td>0.280</td>\n",
" <td>0.56</td>\n",
" <td>1.9</td>\n",
" <td>0.075</td>\n",
" <td>17.0</td>\n",
" <td>60.0</td>\n",
" <td>0.99800</td>\n",
" <td>3.16</td>\n",
" <td>0.58</td>\n",
" <td>9.8</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1594</th>\n",
" <td>6.2</td>\n",
" <td>0.600</td>\n",
" <td>0.08</td>\n",
" <td>2.0</td>\n",
" <td>0.090</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99490</td>\n",
" <td>3.45</td>\n",
" <td>0.58</td>\n",
" <td>10.5</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1595</th>\n",
" <td>5.9</td>\n",
" <td>0.550</td>\n",
" <td>0.10</td>\n",
" <td>2.2</td>\n",
" <td>0.062</td>\n",
" <td>39.0</td>\n",
" <td>51.0</td>\n",
" <td>0.99512</td>\n",
" <td>3.52</td>\n",
" <td>0.76</td>\n",
" <td>11.2</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1596</th>\n",
" <td>6.3</td>\n",
" <td>0.510</td>\n",
" <td>0.13</td>\n",
" <td>2.3</td>\n",
" <td>0.076</td>\n",
" <td>29.0</td>\n",
" <td>40.0</td>\n",
" <td>0.99574</td>\n",
" <td>3.42</td>\n",
" <td>0.75</td>\n",
" <td>11.0</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1597</th>\n",
" <td>5.9</td>\n",
" <td>0.645</td>\n",
" <td>0.12</td>\n",
" <td>2.0</td>\n",
" <td>0.075</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99547</td>\n",
" <td>3.57</td>\n",
" <td>0.71</td>\n",
" <td>10.2</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1598</th>\n",
" <td>6.0</td>\n",
" <td>0.310</td>\n",
" <td>0.47</td>\n",
" <td>3.6</td>\n",
" <td>0.067</td>\n",
" <td>18.0</td>\n",
" <td>42.0</td>\n",
" <td>0.99549</td>\n",
" <td>3.39</td>\n",
" <td>0.66</td>\n",
" <td>11.0</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1599 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid ... sulphates alcohol quality\n",
"0 7.4 0.700 0.00 ... 0.56 9.4 8.0\n",
"1 7.8 0.880 0.00 ... 0.68 9.8 8.0\n",
"2 7.8 0.760 0.04 ... 0.65 9.8 8.0\n",
"3 11.2 0.280 0.56 ... 0.58 9.8 12.0\n",
"4 7.4 0.700 0.00 ... 0.56 9.4 8.0\n",
"... ... ... ... ... ... ... ...\n",
"1594 6.2 0.600 0.08 ... 0.58 10.5 8.0\n",
"1595 5.9 0.550 0.10 ... 0.76 11.2 12.0\n",
"1596 6.3 0.510 0.13 ... 0.75 11.0 12.0\n",
"1597 5.9 0.645 0.12 ... 0.71 10.2 8.0\n",
"1598 6.0 0.310 0.47 ... 0.66 11.0 12.0\n",
"\n",
"[1599 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 32
2021-03-21 22:43:47 +01:00
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "iqsJ9Bfngy-m"
},
"source": [
""
],
"execution_count": null,
"outputs": []
2021-03-21 22:43:47 +01:00
}
]
}