ium_434788/IUM_1_434788.ipynb

2688 lines
119 KiB
Plaintext
Raw Normal View History

2021-03-21 22:43:47 +01:00
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "IUM_1_434788.ipynb",
"provenance": [],
"collapsed_sections": [],
"toc_visible": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "shaFKPEixPn4"
},
"source": [
"# 1. Pobranie zbioru danych z Repozytorium"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "-03GDjWtxD7W",
"outputId": "35c4ed64-62c4-47f9-a407-571b072bf831"
},
"source": [
"!curl -OL https://git.wmi.amu.edu.pl/s434788/ium_434788/raw/branch/master/winequality-red.csv"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
" Dload Upload Total Spent Left Speed\n",
"100 98k 0 98k 0 0 282k 0 --:--:-- --:--:-- --:--:-- 281k\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
},
"id": "sAUNi0ylxWUm",
"outputId": "27072275-680f-4154-bdf2-e952a63ab25e"
},
"source": [
"import pandas as pd\n",
"wine=pd.read_csv('winequality-red.csv')\n",
"wine"
],
"execution_count": 2,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7.8</td>\n",
" <td>0.880</td>\n",
" <td>0.00</td>\n",
" <td>2.6</td>\n",
" <td>0.098</td>\n",
" <td>25.0</td>\n",
" <td>67.0</td>\n",
" <td>0.99680</td>\n",
" <td>3.20</td>\n",
" <td>0.68</td>\n",
" <td>9.8</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7.8</td>\n",
" <td>0.760</td>\n",
" <td>0.04</td>\n",
" <td>2.3</td>\n",
" <td>0.092</td>\n",
" <td>15.0</td>\n",
" <td>54.0</td>\n",
" <td>0.99700</td>\n",
" <td>3.26</td>\n",
" <td>0.65</td>\n",
" <td>9.8</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.2</td>\n",
" <td>0.280</td>\n",
" <td>0.56</td>\n",
" <td>1.9</td>\n",
" <td>0.075</td>\n",
" <td>17.0</td>\n",
" <td>60.0</td>\n",
" <td>0.99800</td>\n",
" <td>3.16</td>\n",
" <td>0.58</td>\n",
" <td>9.8</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1594</th>\n",
" <td>6.2</td>\n",
" <td>0.600</td>\n",
" <td>0.08</td>\n",
" <td>2.0</td>\n",
" <td>0.090</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99490</td>\n",
" <td>3.45</td>\n",
" <td>0.58</td>\n",
" <td>10.5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1595</th>\n",
" <td>5.9</td>\n",
" <td>0.550</td>\n",
" <td>0.10</td>\n",
" <td>2.2</td>\n",
" <td>0.062</td>\n",
" <td>39.0</td>\n",
" <td>51.0</td>\n",
" <td>0.99512</td>\n",
" <td>3.52</td>\n",
" <td>0.76</td>\n",
" <td>11.2</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1596</th>\n",
" <td>6.3</td>\n",
" <td>0.510</td>\n",
" <td>0.13</td>\n",
" <td>2.3</td>\n",
" <td>0.076</td>\n",
" <td>29.0</td>\n",
" <td>40.0</td>\n",
" <td>0.99574</td>\n",
" <td>3.42</td>\n",
" <td>0.75</td>\n",
" <td>11.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1597</th>\n",
" <td>5.9</td>\n",
" <td>0.645</td>\n",
" <td>0.12</td>\n",
" <td>2.0</td>\n",
" <td>0.075</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99547</td>\n",
" <td>3.57</td>\n",
" <td>0.71</td>\n",
" <td>10.2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1598</th>\n",
" <td>6.0</td>\n",
" <td>0.310</td>\n",
" <td>0.47</td>\n",
" <td>3.6</td>\n",
" <td>0.067</td>\n",
" <td>18.0</td>\n",
" <td>42.0</td>\n",
" <td>0.99549</td>\n",
" <td>3.39</td>\n",
" <td>0.66</td>\n",
" <td>11.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1599 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid ... sulphates alcohol quality\n",
"0 7.4 0.700 0.00 ... 0.56 9.4 5\n",
"1 7.8 0.880 0.00 ... 0.68 9.8 5\n",
"2 7.8 0.760 0.04 ... 0.65 9.8 5\n",
"3 11.2 0.280 0.56 ... 0.58 9.8 6\n",
"4 7.4 0.700 0.00 ... 0.56 9.4 5\n",
"... ... ... ... ... ... ... ...\n",
"1594 6.2 0.600 0.08 ... 0.58 10.5 5\n",
"1595 5.9 0.550 0.10 ... 0.76 11.2 6\n",
"1596 6.3 0.510 0.13 ... 0.75 11.0 6\n",
"1597 5.9 0.645 0.12 ... 0.71 10.2 5\n",
"1598 6.0 0.310 0.47 ... 0.66 11.0 6\n",
"\n",
"[1599 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 2
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4H-i6DJlxduP"
},
"source": [
"# 2. Podział na zbiory test/train przy pomocy SciKit"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "nZO_naLatT0o"
},
"source": [
"Próbowałem również podzielić na podzbiory Train:Dev:Test 6:2:2 Przy pomocy basha ale uznałem, że wygodniejsze jest korzystanie z \"train_test_split()\". Docelowo podział będzie dokonywany na 4 zmienne ` X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)`, jednak chciałem zachować konwencje z przykładu, z ćwiczeń."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ebHl5Aw1uuK1"
},
"source": [
"https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html"
]
},
{
"cell_type": "code",
"metadata": {
"id": "X88VMhb0x3gJ"
},
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"wine_train, wine_test = train_test_split(wine, test_size=360,train_size=959, random_state=1)"
],
"execution_count": 3,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "OzjEfgNOyAWs",
"outputId": "0035e197-98c9-4a15-c1f9-23742d6a0595"
},
"source": [
"wine_test[\"quality\"].value_counts()"
],
"execution_count": 4,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"5 155\n",
"6 149\n",
"7 37\n",
"4 16\n",
"8 2\n",
"3 1\n",
"Name: quality, dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "SpQZIuSxyAd0",
"outputId": "6302469b-8853-45ea-b4d4-eae6078e96cf"
},
"source": [
"wine_train[\"quality\"].value_counts()"
],
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"5 400\n",
"6 388\n",
"7 125\n",
"4 30\n",
"8 11\n",
"3 5\n",
"Name: quality, dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 5
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "wAq8KmNdyNOm"
},
"source": [
"# 3. Statystyki dla zbior"
]
},
{
"cell_type": "code",
"metadata": {
"id": "Wcq9YSTfXbs1"
},
"source": [
"from matplotlib import pyplot as plt\n",
"import seaborn as sns"
],
"execution_count": 6,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "EjDFpgdPy_of"
},
"source": [
"## 3.1. Zbiór Train"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
},
"id": "SscUak3AydG0",
"outputId": "edcf5523-066f-4c75-bc9c-1628a413edf7"
},
"source": [
"wine_train"
],
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1589</th>\n",
" <td>6.6</td>\n",
" <td>0.725</td>\n",
" <td>0.20</td>\n",
" <td>7.8</td>\n",
" <td>0.073</td>\n",
" <td>29.0</td>\n",
" <td>79.0</td>\n",
" <td>0.99770</td>\n",
" <td>3.29</td>\n",
" <td>0.54</td>\n",
" <td>9.2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>854</th>\n",
" <td>9.3</td>\n",
" <td>0.360</td>\n",
" <td>0.39</td>\n",
" <td>1.5</td>\n",
" <td>0.080</td>\n",
" <td>41.0</td>\n",
" <td>55.0</td>\n",
" <td>0.99652</td>\n",
" <td>3.47</td>\n",
" <td>0.73</td>\n",
" <td>10.9</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83</th>\n",
" <td>7.3</td>\n",
" <td>0.670</td>\n",
" <td>0.26</td>\n",
" <td>1.8</td>\n",
" <td>0.401</td>\n",
" <td>16.0</td>\n",
" <td>51.0</td>\n",
" <td>0.99690</td>\n",
" <td>3.16</td>\n",
" <td>1.14</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1106</th>\n",
" <td>8.2</td>\n",
" <td>0.230</td>\n",
" <td>0.42</td>\n",
" <td>1.9</td>\n",
" <td>0.069</td>\n",
" <td>9.0</td>\n",
" <td>17.0</td>\n",
" <td>0.99376</td>\n",
" <td>3.21</td>\n",
" <td>0.54</td>\n",
" <td>12.3</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>650</th>\n",
" <td>10.7</td>\n",
" <td>0.430</td>\n",
" <td>0.39</td>\n",
" <td>2.2</td>\n",
" <td>0.106</td>\n",
" <td>8.0</td>\n",
" <td>32.0</td>\n",
" <td>0.99860</td>\n",
" <td>2.89</td>\n",
" <td>0.50</td>\n",
" <td>9.6</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>526</th>\n",
" <td>7.3</td>\n",
" <td>0.365</td>\n",
" <td>0.49</td>\n",
" <td>2.5</td>\n",
" <td>0.088</td>\n",
" <td>39.0</td>\n",
" <td>106.0</td>\n",
" <td>0.99660</td>\n",
" <td>3.36</td>\n",
" <td>0.78</td>\n",
" <td>11.0</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>583</th>\n",
" <td>12.0</td>\n",
" <td>0.280</td>\n",
" <td>0.49</td>\n",
" <td>1.9</td>\n",
" <td>0.074</td>\n",
" <td>10.0</td>\n",
" <td>21.0</td>\n",
" <td>0.99760</td>\n",
" <td>2.98</td>\n",
" <td>0.66</td>\n",
" <td>9.9</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>975</th>\n",
" <td>7.2</td>\n",
" <td>0.410</td>\n",
" <td>0.30</td>\n",
" <td>2.1</td>\n",
" <td>0.083</td>\n",
" <td>35.0</td>\n",
" <td>72.0</td>\n",
" <td>0.99700</td>\n",
" <td>3.44</td>\n",
" <td>0.52</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>566</th>\n",
" <td>8.7</td>\n",
" <td>0.700</td>\n",
" <td>0.24</td>\n",
" <td>2.5</td>\n",
" <td>0.226</td>\n",
" <td>5.0</td>\n",
" <td>15.0</td>\n",
" <td>0.99910</td>\n",
" <td>3.32</td>\n",
" <td>0.60</td>\n",
" <td>9.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1232</th>\n",
" <td>7.6</td>\n",
" <td>0.430</td>\n",
" <td>0.29</td>\n",
" <td>2.1</td>\n",
" <td>0.075</td>\n",
" <td>19.0</td>\n",
" <td>66.0</td>\n",
" <td>0.99718</td>\n",
" <td>3.40</td>\n",
" <td>0.64</td>\n",
" <td>9.5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>959 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid ... sulphates alcohol quality\n",
"1589 6.6 0.725 0.20 ... 0.54 9.2 5\n",
"854 9.3 0.360 0.39 ... 0.73 10.9 6\n",
"83 7.3 0.670 0.26 ... 1.14 9.4 5\n",
"1106 8.2 0.230 0.42 ... 0.54 12.3 6\n",
"650 10.7 0.430 0.39 ... 0.50 9.6 5\n",
"... ... ... ... ... ... ... ...\n",
"526 7.3 0.365 0.49 ... 0.78 11.0 5\n",
"583 12.0 0.280 0.49 ... 0.66 9.9 7\n",
"975 7.2 0.410 0.30 ... 0.52 9.4 5\n",
"566 8.7 0.700 0.24 ... 0.60 9.0 6\n",
"1232 7.6 0.430 0.29 ... 0.64 9.5 5\n",
"\n",
"[959 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 7
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "hZAn8j4byMF2",
"outputId": "9d7d799f-7018-4b9d-9d4c-153abe0a53f3"
},
"source": [
"wine_train[\"quality\"].value_counts()"
],
"execution_count": 8,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"5 400\n",
"6 388\n",
"7 125\n",
"4 30\n",
"8 11\n",
"3 5\n",
"Name: quality, dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 8
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 297
},
"id": "EOEuj8sRyL8v",
"outputId": "cb7c369c-9bdd-457a-98fe-3537c569730b"
},
"source": [
"wine_train.describe(include='all')"
],
"execution_count": 9,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>959.000000</td>\n",
" <td>959.000000</td>\n",
" <td>959.000000</td>\n",
" <td>959.000000</td>\n",
" <td>959.000000</td>\n",
" <td>959.000000</td>\n",
" <td>959.000000</td>\n",
" <td>959.000000</td>\n",
" <td>959.00000</td>\n",
" <td>959.000000</td>\n",
" <td>959.000000</td>\n",
" <td>959.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>8.329093</td>\n",
" <td>0.526809</td>\n",
" <td>0.269864</td>\n",
" <td>2.493743</td>\n",
" <td>0.088230</td>\n",
" <td>15.883733</td>\n",
" <td>45.738790</td>\n",
" <td>0.996736</td>\n",
" <td>3.31048</td>\n",
" <td>0.661481</td>\n",
" <td>10.433160</td>\n",
" <td>5.657977</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.808394</td>\n",
" <td>0.175221</td>\n",
" <td>0.198377</td>\n",
" <td>1.262329</td>\n",
" <td>0.050555</td>\n",
" <td>10.485739</td>\n",
" <td>31.897095</td>\n",
" <td>0.001925</td>\n",
" <td>0.15462</td>\n",
" <td>0.171639</td>\n",
" <td>1.084349</td>\n",
" <td>0.805654</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>4.600000</td>\n",
" <td>0.120000</td>\n",
" <td>0.000000</td>\n",
" <td>0.900000</td>\n",
" <td>0.012000</td>\n",
" <td>1.000000</td>\n",
" <td>6.000000</td>\n",
" <td>0.990070</td>\n",
" <td>2.74000</td>\n",
" <td>0.370000</td>\n",
" <td>8.400000</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>7.100000</td>\n",
" <td>0.400000</td>\n",
" <td>0.090000</td>\n",
" <td>1.900000</td>\n",
" <td>0.070000</td>\n",
" <td>7.000000</td>\n",
" <td>22.000000</td>\n",
" <td>0.995540</td>\n",
" <td>3.21000</td>\n",
" <td>0.550000</td>\n",
" <td>9.500000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>7.900000</td>\n",
" <td>0.520000</td>\n",
" <td>0.250000</td>\n",
" <td>2.200000</td>\n",
" <td>0.079000</td>\n",
" <td>14.000000</td>\n",
" <td>37.000000</td>\n",
" <td>0.996770</td>\n",
" <td>3.31000</td>\n",
" <td>0.620000</td>\n",
" <td>10.100000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>9.300000</td>\n",
" <td>0.635000</td>\n",
" <td>0.430000</td>\n",
" <td>2.600000</td>\n",
" <td>0.090000</td>\n",
" <td>22.000000</td>\n",
" <td>61.000000</td>\n",
" <td>0.997870</td>\n",
" <td>3.40000</td>\n",
" <td>0.730000</td>\n",
" <td>11.100000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>15.900000</td>\n",
" <td>1.330000</td>\n",
" <td>1.000000</td>\n",
" <td>15.400000</td>\n",
" <td>0.610000</td>\n",
" <td>72.000000</td>\n",
" <td>278.000000</td>\n",
" <td>1.003690</td>\n",
" <td>4.01000</td>\n",
" <td>2.000000</td>\n",
" <td>14.900000</td>\n",
" <td>8.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity ... alcohol quality\n",
"count 959.000000 959.000000 ... 959.000000 959.000000\n",
"mean 8.329093 0.526809 ... 10.433160 5.657977\n",
"std 1.808394 0.175221 ... 1.084349 0.805654\n",
"min 4.600000 0.120000 ... 8.400000 3.000000\n",
"25% 7.100000 0.400000 ... 9.500000 5.000000\n",
"50% 7.900000 0.520000 ... 10.100000 6.000000\n",
"75% 9.300000 0.635000 ... 11.100000 6.000000\n",
"max 15.900000 1.330000 ... 14.900000 8.000000\n",
"\n",
"[8 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 9
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "JWXJ2CZQuylE"
},
"source": [
"Testowy Wykres (quality, volatile acidity)"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 405
},
"id": "HbsfwCL7XpNe",
"outputId": "3e95f7ba-b73f-4861-e3a5-c3ec3029e3a5"
},
"source": [
"fig = plt.figure(figsize = (10,6))\n",
"sns.barplot(x = 'quality', y = 'volatile acidity', data = wine)"
],
"execution_count": 10,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f8a1e433c10>"
]
},
"metadata": {
"tags": []
},
"execution_count": 10
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmEAAAFzCAYAAAB2A95GAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAYuElEQVR4nO3de7BlZ1km8OdNNzGQCaCmtZ0kmowT0OggYJtBo0CJOAliMoOoiQMqhURniCIoXbGYQkFnqmhgdETUiSjiBTBkQIPTGFQuKgqmA+GSBJw2ENItx3S4X4WQd/44O3rS6cvu0Ot8u/f5/apO7b3W+s7eT3alkues79trVXcHAID1ddzoAAAAG5ESBgAwgBIGADCAEgYAMIASBgAwgBIGADDA5tEBjtTJJ5/cp59++ugYAACHdc0119za3VsOdOyYK2Gnn356du3aNToGAMBhVdVNBztmOhIAYAAlDABgACUMAGAAJQwAYAAlDABgACUMAGAAJQwAYAAlDABgACUMAGAAJQwAYAAlDABgACUMAGCAY+4G3seq7du3Z2VlJVu3bs2OHTtGxwEABlPC1snKykr27t07OgYAsCBMRwIADKCEAQAMoIQBAAyghAEADKCEAQAMoIQBAAyghAEADKCEAQAMoIQBAAyghAEADKCEAQAMoIQBAAyghAEADKCEAQAMoIQBAAyghAEADKCEAQAMoIQBAAyghAEADKCEAQAMoIQBAAyghAEADKCEAQAMMFkJq6rfqqpbqupdBzleVfXLVbW7qt5RVQ+eKgsAwKKZ8kzYbyc59xDHz0ty5uzn4iS/NmEWAICFMlkJ6+6/SPKhQwy5IMnv9Ko3J7lvVX3FVHkAABbJyDVhpyS5ec32ntk+AICld0wszK+qi6tqV1Xt2rdv3+g4AABfsJElbG+S09ZsnzrbdxfdfVl3b+vubVu2bFmXcAAAUxpZwq5M8oOzb0k+JMlHu/sDA/MAAKybzVO9cFW9LMnDk5xcVXuS/GySeyRJd/96kp1JHpVkd5JPJXnCVFkAABbNZCWsuy86zPFO8uSp3h8AYJEdEwvzAQCWjRIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMMDm0QGm8I1P/53REe7ipFs/nk1J3n/rxxcq3zXP/cHREQBgQ3ImDABgACUMAGAAJQwAYAAlDABgACUMAGAAJQwAYAAlDABgACUMAGAAJQwAYAAlDABgACUMAGAAJQwAYAAlDABgACUMAGAAJQwAYAAlDABgACUMAGAAJQwAYAAlDABgACUMAGCASUtYVZ1bVe+pqt1VdekBjn9lVb2+qt5WVe+oqkdNmQcAYFFMVsKqalOSFyY5L8lZSS6qqrP2G/bfklze3Q9KcmGSX50qDwDAIpnyTNjZSXZ3943d/dkkL09ywX5jOsm9Z8/vk+QfJswDALAwpixhpyS5ec32ntm+tX4uyeOqak+SnUl+/EAvVFUXV9Wuqtq1b9++KbICAKyr0QvzL0ry2919apJHJfndqrpLpu6+rLu3dfe2LVu2rHtIAICjbcoStjfJaWu2T53tW+uJSS5Pku7+myQnJDl5wkwAAAthyhJ2dZIzq+qMqjo+qwvvr9xvzPuTPCJJquprs1rCzDcCAEtvshLW3bcluSTJVUluyOq3IK+rqmdX1fmzYT+V5ElV9fYkL0vyw93dU2UCAFgUm6d88e7emdUF92v3PXPN8+uTnDNlBgCARTR6YT4AwIakhAEADKCEAQAMoIQBAAyghAEADKCEAQAMoIQBAAww6XXCYKTt27dnZWUlW7duzY4dO0bHAYA7UcJYWisrK9m7d//blQLAYjAdCQAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwwObRATaK248/8U6PAMDGdtgSVlXXJPmtJC/t7g9PH2k5ffLM7xwdAQBYIPOcCfv+JE9IcnVV7Ury4iSv7e6eNBnHlPc/+9+NjnAXt33oS5Jszm0fummh8n3lM985OgIAC+Cwa8K6e3d3PyPJ/ZK8NKtnxW6qqmdV1ZdMHRAAYBnNtTC/qh6Q5PlJnpvk/yT53iQfS/K66aIBACyvedeEfSTJbya5tLv/aXboLVV1zpThAACW1Txrwr63u29cu6Oqzuju93b3YybKBQCw1OaZjrxizn0AAMzpoGfCquprknxdkvtU1dozXvdOcsLUwQAAltmhpiPvn+TRSe6b5LvX7P94kidNGQoAYNkdtIR19x8l+aOq+ubu/pt1zAQAsPQONR25vbt3JPmBqrpo/+Pd/ROTJgMAWGKHmo68Yfa4az2CAABsJIeajnz17PEl6xcHAGBjONR05KuTHPT+kN19/iSJAAA2gENNRz5v9viYJFuT/N5s+6Ik/zhlKACAZXeo6cg3JklVPb+7t6059Oqqsk4MAOALMM8V80+sqn9zx0ZVnZHkxOkiAQAsv3nuHfnUJG+oqhuTVJKvSvKjk6YCAFhyhy1h3f0nVXVmkq+Z7Xp3d//TtLEAAJbbob4d+e3d/br97huZJF9dVenuV06cDb4gJ59we5LbZo8AsFgOdSbsYUlelzvfN/IOnUQJY6H99AM+MjoCABzUob4d+bOzxyesXxwAgI3hsN+OrKr/UVX3XbP9xVX1C9PGAgBYbvNcouK87v7neZ3u/nCSR00XCQBg+c1TwjZV1RfdsVFV90zyRYcYDwDAYcxTwn4/yZ9X1ROr6olJ/jTJXDf1rqpzq+o9VbW7qi49yJjvq6rrq+q6qnrp/NEBAI5d81wn7DlV9Y4kj5jt+vnuvupwv1dVm5K8MMkjk+xJcnVVXdnd168Zc2aSn0lyTnd/uKq+7O78QwAAHGvmuWJ+uvs1SV5zhK99dpLd3X1jklTVy5NckOT6NWOelOSFs3Vm6e5bjvA9AACOSfN8O/IhVXV1VX2iqj5bVZ+vqo/N8dqnJLl5zfae2b617pfkflX1pqp6c1Wde5AMF1fVrqratW/fvjneGgBgsc2zJuxXklyU5P8luWeSH8nqNOPRsDnJmUkePnuP31h7OYw7dPdl3b2tu7dt2bLlKL01AMA4805H7q6qTd39+SQvrqq3ZXUt16HsTXLamu1TZ/vW2pPkLd39uSTvraq/y2opu3qu9MBC2b59e1ZWVrJ169bs2LFjdByAhTZPCftUVR2f5Nqq2pHkA5nvDNrVSc6sqjOyWr4uTPID+435w6yeAXtxVZ2c1enJG+cNDyyWlZWV7N27/99aABzIPGXq8bNxlyT5ZFbPbn3P4X6pu2+b/c5VSW5Icnl3X1dVz66q82fDrkrywaq6Psnrkzy9uz945P8YAADHlnkuUXHT7OlnkjzrSF68u3cm2bnfvmeued5Jnjb7AQDYMOZaEwYsnnNecM7oCHdx/EeOz3E5Ljd/5OaFyvemH3/T6AgAdzHPdCQAAEfZ3CWsqu41ZRAAgI1knou1fsts4fy7Z9vfUFW/OnkyAIAlNs+ZsF9M8h+SfDBJuvvtSR46ZSgAgGU378Vab66qtbs+P00c4FjW9+rcntvT9+rRUQAW3jwl7Oaq+pYkXVX3SPKUrF73C+BOPnfO50ZHADhmzDMd+WNJnpzVm2/vTfLA2TYAAHfTPBdrvTXJf16HLAAAG8ZBS1hVvSDJQRd2dPdPTJIIAGADONSZsF3rlgIAYIM5aAnr7pesZxAAgI3kUNORv9TdP1lVr84BpiW7+/xJkwEALLFDTUf+7uzxeesRBABgIznUdOQ1s6cP7O7/tfZYVT0lyRunDAYAsMzmuU7YDx1g3w8f5RwAABvKodaEXZTkB5KcUVVXrjl0UpIPTR0MAGCZHWpN2F8n+UCSk5M8f83+jyd5x5ShAACW3aHWhN2U5KYk37x+cQAAN
"text/plain": [
"<Figure size 720x432 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "1W_oRCVczIgJ"
},
"source": [
"## 3.2. Zbiór Test"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
},
"id": "LJzygNqKzOWY",
"outputId": "b5751c9a-6f9e-48b8-88c2-0b6feec17af9"
},
"source": [
"wine_test"
],
"execution_count": 11,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>75</th>\n",
" <td>8.8</td>\n",
" <td>0.410</td>\n",
" <td>0.64</td>\n",
" <td>2.2</td>\n",
" <td>0.093</td>\n",
" <td>9.0</td>\n",
" <td>42.0</td>\n",
" <td>0.99860</td>\n",
" <td>3.54</td>\n",
" <td>0.66</td>\n",
" <td>10.5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1283</th>\n",
" <td>8.7</td>\n",
" <td>0.630</td>\n",
" <td>0.28</td>\n",
" <td>2.7</td>\n",
" <td>0.096</td>\n",
" <td>17.0</td>\n",
" <td>69.0</td>\n",
" <td>0.99734</td>\n",
" <td>3.26</td>\n",
" <td>0.63</td>\n",
" <td>10.2</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>408</th>\n",
" <td>10.4</td>\n",
" <td>0.340</td>\n",
" <td>0.58</td>\n",
" <td>3.7</td>\n",
" <td>0.174</td>\n",
" <td>6.0</td>\n",
" <td>16.0</td>\n",
" <td>0.99700</td>\n",
" <td>3.19</td>\n",
" <td>0.70</td>\n",
" <td>11.3</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1281</th>\n",
" <td>7.1</td>\n",
" <td>0.460</td>\n",
" <td>0.20</td>\n",
" <td>1.9</td>\n",
" <td>0.077</td>\n",
" <td>28.0</td>\n",
" <td>54.0</td>\n",
" <td>0.99560</td>\n",
" <td>3.37</td>\n",
" <td>0.64</td>\n",
" <td>10.4</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1118</th>\n",
" <td>7.1</td>\n",
" <td>0.390</td>\n",
" <td>0.12</td>\n",
" <td>2.1</td>\n",
" <td>0.065</td>\n",
" <td>14.0</td>\n",
" <td>24.0</td>\n",
" <td>0.99252</td>\n",
" <td>3.30</td>\n",
" <td>0.53</td>\n",
" <td>13.3</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1461</th>\n",
" <td>6.2</td>\n",
" <td>0.785</td>\n",
" <td>0.00</td>\n",
" <td>2.1</td>\n",
" <td>0.060</td>\n",
" <td>6.0</td>\n",
" <td>13.0</td>\n",
" <td>0.99664</td>\n",
" <td>3.59</td>\n",
" <td>0.61</td>\n",
" <td>10.0</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1016</th>\n",
" <td>8.9</td>\n",
" <td>0.380</td>\n",
" <td>0.40</td>\n",
" <td>2.2</td>\n",
" <td>0.068</td>\n",
" <td>12.0</td>\n",
" <td>28.0</td>\n",
" <td>0.99486</td>\n",
" <td>3.27</td>\n",
" <td>0.75</td>\n",
" <td>12.6</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1412</th>\n",
" <td>8.2</td>\n",
" <td>0.240</td>\n",
" <td>0.34</td>\n",
" <td>5.1</td>\n",
" <td>0.062</td>\n",
" <td>8.0</td>\n",
" <td>22.0</td>\n",
" <td>0.99740</td>\n",
" <td>3.22</td>\n",
" <td>0.94</td>\n",
" <td>10.9</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>424</th>\n",
" <td>7.7</td>\n",
" <td>0.960</td>\n",
" <td>0.20</td>\n",
" <td>2.0</td>\n",
" <td>0.047</td>\n",
" <td>15.0</td>\n",
" <td>60.0</td>\n",
" <td>0.99550</td>\n",
" <td>3.36</td>\n",
" <td>0.44</td>\n",
" <td>10.9</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>120</th>\n",
" <td>7.3</td>\n",
" <td>1.070</td>\n",
" <td>0.09</td>\n",
" <td>1.7</td>\n",
" <td>0.178</td>\n",
" <td>10.0</td>\n",
" <td>89.0</td>\n",
" <td>0.99620</td>\n",
" <td>3.30</td>\n",
" <td>0.57</td>\n",
" <td>9.0</td>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>360 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid ... sulphates alcohol quality\n",
"75 8.8 0.410 0.64 ... 0.66 10.5 5\n",
"1283 8.7 0.630 0.28 ... 0.63 10.2 6\n",
"408 10.4 0.340 0.58 ... 0.70 11.3 6\n",
"1281 7.1 0.460 0.20 ... 0.64 10.4 6\n",
"1118 7.1 0.390 0.12 ... 0.53 13.3 6\n",
"... ... ... ... ... ... ... ...\n",
"1461 6.2 0.785 0.00 ... 0.61 10.0 4\n",
"1016 8.9 0.380 0.40 ... 0.75 12.6 7\n",
"1412 8.2 0.240 0.34 ... 0.94 10.9 6\n",
"424 7.7 0.960 0.20 ... 0.44 10.9 5\n",
"120 7.3 1.070 0.09 ... 0.57 9.0 5\n",
"\n",
"[360 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 11
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1IAtBylEzS8w",
"outputId": "538fe66b-8dc1-4338-d0d8-492fbef2940e"
},
"source": [
"wine_test[\"quality\"].value_counts()"
],
"execution_count": 12,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"5 155\n",
"6 149\n",
"7 37\n",
"4 16\n",
"8 2\n",
"3 1\n",
"Name: quality, dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 12
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 297
},
"id": "V-9cwcrczS-3",
"outputId": "78da725e-3105-4d3e-8880-00dbc8cf8910"
},
"source": [
"wine_test.describe(include='all')"
],
"execution_count": 13,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>360.000000</td>\n",
" <td>360.000000</td>\n",
" <td>360.000000</td>\n",
" <td>360.000000</td>\n",
" <td>360.000000</td>\n",
" <td>360.000000</td>\n",
" <td>360.000000</td>\n",
" <td>360.000000</td>\n",
" <td>360.000000</td>\n",
" <td>360.000000</td>\n",
" <td>360.000000</td>\n",
" <td>360.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>8.348611</td>\n",
" <td>0.518764</td>\n",
" <td>0.275444</td>\n",
" <td>2.542222</td>\n",
" <td>0.086114</td>\n",
" <td>16.093056</td>\n",
" <td>48.777778</td>\n",
" <td>0.996747</td>\n",
" <td>3.301083</td>\n",
" <td>0.653833</td>\n",
" <td>10.368889</td>\n",
" <td>5.586111</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.580574</td>\n",
" <td>0.182554</td>\n",
" <td>0.182508</td>\n",
" <td>1.528465</td>\n",
" <td>0.043445</td>\n",
" <td>10.421097</td>\n",
" <td>35.005778</td>\n",
" <td>0.001792</td>\n",
" <td>0.145379</td>\n",
" <td>0.168306</td>\n",
" <td>1.041729</td>\n",
" <td>0.767245</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>5.000000</td>\n",
" <td>0.120000</td>\n",
" <td>0.000000</td>\n",
" <td>0.900000</td>\n",
" <td>0.042000</td>\n",
" <td>3.000000</td>\n",
" <td>6.000000</td>\n",
" <td>0.990070</td>\n",
" <td>2.870000</td>\n",
" <td>0.370000</td>\n",
" <td>8.700000</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>7.200000</td>\n",
" <td>0.380000</td>\n",
" <td>0.120000</td>\n",
" <td>1.900000</td>\n",
" <td>0.070000</td>\n",
" <td>8.000000</td>\n",
" <td>23.000000</td>\n",
" <td>0.995760</td>\n",
" <td>3.210000</td>\n",
" <td>0.550000</td>\n",
" <td>9.500000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>8.000000</td>\n",
" <td>0.500000</td>\n",
" <td>0.270000</td>\n",
" <td>2.150000</td>\n",
" <td>0.079000</td>\n",
" <td>14.000000</td>\n",
" <td>40.000000</td>\n",
" <td>0.996645</td>\n",
" <td>3.300000</td>\n",
" <td>0.620000</td>\n",
" <td>10.100000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>9.200000</td>\n",
" <td>0.640000</td>\n",
" <td>0.420000</td>\n",
" <td>2.600000</td>\n",
" <td>0.090000</td>\n",
" <td>21.000000</td>\n",
" <td>65.750000</td>\n",
" <td>0.997683</td>\n",
" <td>3.390000</td>\n",
" <td>0.720000</td>\n",
" <td>11.000000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>15.600000</td>\n",
" <td>1.115000</td>\n",
" <td>0.790000</td>\n",
" <td>15.500000</td>\n",
" <td>0.611000</td>\n",
" <td>68.000000</td>\n",
" <td>289.000000</td>\n",
" <td>1.003690</td>\n",
" <td>3.750000</td>\n",
" <td>1.950000</td>\n",
" <td>14.000000</td>\n",
" <td>8.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity ... alcohol quality\n",
"count 360.000000 360.000000 ... 360.000000 360.000000\n",
"mean 8.348611 0.518764 ... 10.368889 5.586111\n",
"std 1.580574 0.182554 ... 1.041729 0.767245\n",
"min 5.000000 0.120000 ... 8.700000 3.000000\n",
"25% 7.200000 0.380000 ... 9.500000 5.000000\n",
"50% 8.000000 0.500000 ... 10.100000 6.000000\n",
"75% 9.200000 0.640000 ... 11.000000 6.000000\n",
"max 15.600000 1.115000 ... 14.000000 8.000000\n",
"\n",
"[8 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 13
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "wzaUXARnu824"
},
"source": [
"Testowy Wykres (quality, volatile acidity)"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 405
},
"id": "3GksWzExaHV7",
"outputId": "e7076b4b-79b9-4c9b-a1e8-44b5897175ce"
},
"source": [
"fig = plt.figure(figsize = (10,6))\n",
"sns.barplot(x = 'quality', y = 'volatile acidity', data = wine)"
],
"execution_count": 14,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f8a0bec96d0>"
]
},
"metadata": {
"tags": []
},
"execution_count": 14
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmEAAAFzCAYAAAB2A95GAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAYvklEQVR4nO3de5ClZ10n8O8vM8RANlx0Zh1Ngsm6ETcqAo4RjQIlXhLEZBdBExeilEt0lygXZSoUWyjobhUDrK6KulkE8QIRsqDBHYirCChKzISESwhYYyBkRtpMRO5gSPLbP/qMdjozPT1h3n56Tn8+VV3nvJc+55tTqeTbz/Oe563uDgAAa+u40QEAADYiJQwAYAAlDABgACUMAGAAJQwAYAAlDABggM2jAxypLVu29GmnnTY6BgDAYV177bW3dffWgx075krYaaedlt27d4+OAQBwWFV186GOmY4EABhACQMAGEAJAwAYQAkDABhACQMAGEAJAwAYQAkDABhACQMAGEAJAwAYQAkDABhACQMAGEAJAwAY4Ji7gfexaseOHVlYWMi2bduyc+fO0XEAgMGUsDWysLCQffv2jY4BAKwTpiMBAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABlDCAAAGUMIAAAZQwgAABpishFXVK6rq1qp63yGOV1X9clXtqar3VNUjpsoCALDeTDkS9ltJzlnh+LlJzpj9XJzk1yfMAgCwrkxWwrr77Uk+tsIp5yf57V70ziQPrKqvmCoPAMB6MvKasJOT3LJke+9s3z1U1cVVtbuqdu/fv39NwgEATOmYuDC/uy/r7u3dvX3r1q2j4wAAfNFGlrB9SU5dsn3KbB8AwNwbWcKuTHLR7FuSj0zyie7+6MA8AABrZvNUL1xVr0nymCRbqmpvkp9Ncp8k6e7fSLIryeOS7Eny2SRPnSoLAMB6M1kJ6+4LD3O8kzx9qvcHAFjPjokL8wEA5o0SBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADDA5tEBpvBNz/nt0RHu4aTbPpVNST5y26fWVb5rX3zR6AgAsCEZCQMAGEAJAwAYQAkDABhACQMAGEAJAwAYQAkDABhACQMAGEAJAwAYQAkDABhACQMAGEAJAwAYQAkDABhACQMAGEAJAwAYQAkDABhACQMAGEAJAwAYQAkDABhACQMAGEAJAwAYYNISVlXnVNUHq2pPVV16kOMPrqo/q6rrquo9VfW4KfMAAKwXk5WwqtqU5GVJzk1yZpILq+rMZaf91ySv7e6HJ7kgya9NlQcAYD2ZciTsrCR7uvum7r49yeVJzl92Tie5/+z5A5L83YR5AADWjSlL2MlJblmyvXe2b6mfS/LkqtqbZFeSnzzYC1XVxVW1u6p279+/f4qsAABravSF+Rcm+a3uPiXJ45L8TlXdI1N3X9bd27t7+9atW9c8JADA0TZlCduX5NQl26fM9i31Y0lemyTd/VdJTkiyZcJMAADrwpQl7JokZ1TV6VV1fBYvvL9y2TkfSfLYJKmqf5fFEma+EQCYe5OVsO6+I8klSa5KcmMWvwV5Q1W9sKrOm53200meVlXvTvKaJD/a3T1VJgCA9WLzlC/e3buyeMH90n3PX/L8/UnOnjIDAMB6NPrCfACADUkJAwAYQAkDABhACQMAGEAJAwAYQAkDABhACQMAGEAJAwAYYNLFWmGkHTt2ZGFhIdu2bcvOnTtHxwGAu1HCmFsLCwvZt2/5PeMBYH0wHQkAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwgBIGADCAEgYAMMDm0QE2iruOP/FujwDAxqaErZHPnPE9oyMAAOuIEsZR8ZEXfsPoCPdwx8e+NMnm3PGxm9dVvgc//72jIwCwDhz2mrCquraqnl5VD1qLQAAAG8FqLsz/oSRfmeSaqrq8qr63qmriXAAAc+2wJay793T385J8TZJXJ3lFkpur6gVV9aVTBwQAmEerWqKiqh6a5KVJXpzk/yR5UpJPJnnLdNEAAObXYS/Mr6prk3w8yW8mubS7/2l26OqqOnvKcAAA82o13458UnfftHRHVZ3e3R/q7idMlAsAYK6tZjryilXuAwBglQ45ElZVX5vk65I8oKqWjnjdP8kJUwcDAJhnK01HPiTJ45M8MMn3L9n/qSRPmzIUAMC8O2QJ6+4/TPKHVfWt3f1Xa5gJAGDurTQduaO7dyb54aq6cPnx7v6pSZMBAMyxlaYjb5w97l6LIAAAG8lK05FvnD2+au3iAABsDCtNR74xSR/qeHefN0kiAIANYKXpyJfMHp+QZFuS351tX5jk76cMBQAw71aajnxbklTVS7t7+5JDb6wq14kBAHwRVrNi/olV9W8ObFTV6UlOnC4SHB1bTrgrX37fO7LlhLtGRwGAe1jNvSOfleStVXVTkkryVUl+fNJUcBT8zEM/PjoCABzSYUtYd7+5qs5I8rWzXR/o7n+aNhYAwHxb6duR39ndb1l238gk+eqqSne/fuJsAABza6WRsEcneUvuft/IAzqJEgYAcC+t9O3In509PnXt4gAAbAyH/XZkVf33qnrgku0HVdUvrObFq+qcqvpgVe2pqksPcc4PVtX7q+qGqnr16qMDABy7VrNExbnd/c9fM+vuf0zyuMP9UlVtSvKyJOcmOTPJhVV15rJzzkjy3CRnd/fXJXnmEWQHADhmraaEbaqqLzmwUVX3TfIlK5x/wFlJ9nT3Td19e5LLk5y/7JynJXnZrNilu29dXWwAgGPbatYJ+70kf1pVr5xtPzXJam7qfXKSW5Zs703yLcvO+Zokqap3JNmU5Oe6+82reG0AgGPaatYJe1FVvSfJY2e7fr67rzqK739GksckOSXJ26vqG5ZOfyZJVV2c5OIkefCDH3yU3hoAYJzVjISlu9+U5E1H+Nr7kpy6ZPuU2b6l9ia5uru/kORDVfU3WSxl1yx7/8uSXJYk27dv7yPMAQCw7qzm25GPrKprqurTVXV7Vd1ZVZ9cxWtfk+SMqjq9qo5PckGSK5ed8wdZHAVLVW3J4vTkTUf0TwCsGzt27MhFF12UHTt2jI4CsO6tZiTsV7NYoF6XZHuSizK7lmsl3X1HVV2S5KosXu/1iu6+oapemGR3d185O/Y9VfX+JHcmeU53/8O9+0cBRltYWMi+fcsHvAE4mNVOR+6pqk3dfWeSV1bVdVlcWuJwv7crya5l+56/5HknefbsBwBgw1hNCfvsbDrx+qrameSjWd3SFgAAHMJqSthTsli6LknyrCxebP8DU4YCDu/sXzl7dIR7OP7jx+e4HJdbPn7Lusr3jp98x+gIAPewmiUqbp49/XySF0wbBwBgYzCtCAAwgBIGADDAqktYVd1vyiAAABvJahZr/bbZOl4fmG1/Y1X92uTJgGNO369z14l3pe/nxhYAh7Oab0f+YpLvzWy1++5+d1U9atJUwDHpC2d/YXQEgGPGqqYju/uWZbvunCALAMCGsZqRsFuq6tuSdFXdJ8kzktw4bSwAgPm2mpGwn0jy9CQnJ9mX5GGzbQAA7qXVLNZ6W5L/uAZZAAA2jEOWsKr6lSSH/IpTd//UJIkAADaAlUbCdq9ZCgCADeaQJay7X7WWQQAANpKVpiN/qbufWVVvzEGmJbv7vEmTAQDMsZWmI39n9viStQgCALCRrDQdee3s6cO6+38uPVZVz0jytimDAQDMs9WsE/YjB9n3o0c5BwDAhrLSNWEXJvnhJKdX1ZVLDp2U5GNTBwPg8Hbs2JGFhYVs27YtO3fuHB0HOAIrXRP2l0k+mmRLkpcu2f+pJO+ZM
"text/plain": [
"<Figure size 720x432 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "w5xmkUgGzdxs"
},
"source": [
"## 3.3. Cały zbiór"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
},
"id": "thGHHVJXzeGe",
"outputId": "84c93f9c-516a-43fd-d4d8-656907ba66ac"
},
"source": [
"wine"
],
"execution_count": 15,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7.8</td>\n",
" <td>0.880</td>\n",
" <td>0.00</td>\n",
" <td>2.6</td>\n",
" <td>0.098</td>\n",
" <td>25.0</td>\n",
" <td>67.0</td>\n",
" <td>0.99680</td>\n",
" <td>3.20</td>\n",
" <td>0.68</td>\n",
" <td>9.8</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7.8</td>\n",
" <td>0.760</td>\n",
" <td>0.04</td>\n",
" <td>2.3</td>\n",
" <td>0.092</td>\n",
" <td>15.0</td>\n",
" <td>54.0</td>\n",
" <td>0.99700</td>\n",
" <td>3.26</td>\n",
" <td>0.65</td>\n",
" <td>9.8</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.2</td>\n",
" <td>0.280</td>\n",
" <td>0.56</td>\n",
" <td>1.9</td>\n",
" <td>0.075</td>\n",
" <td>17.0</td>\n",
" <td>60.0</td>\n",
" <td>0.99800</td>\n",
" <td>3.16</td>\n",
" <td>0.58</td>\n",
" <td>9.8</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1594</th>\n",
" <td>6.2</td>\n",
" <td>0.600</td>\n",
" <td>0.08</td>\n",
" <td>2.0</td>\n",
" <td>0.090</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99490</td>\n",
" <td>3.45</td>\n",
" <td>0.58</td>\n",
" <td>10.5</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1595</th>\n",
" <td>5.9</td>\n",
" <td>0.550</td>\n",
" <td>0.10</td>\n",
" <td>2.2</td>\n",
" <td>0.062</td>\n",
" <td>39.0</td>\n",
" <td>51.0</td>\n",
" <td>0.99512</td>\n",
" <td>3.52</td>\n",
" <td>0.76</td>\n",
" <td>11.2</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1596</th>\n",
" <td>6.3</td>\n",
" <td>0.510</td>\n",
" <td>0.13</td>\n",
" <td>2.3</td>\n",
" <td>0.076</td>\n",
" <td>29.0</td>\n",
" <td>40.0</td>\n",
" <td>0.99574</td>\n",
" <td>3.42</td>\n",
" <td>0.75</td>\n",
" <td>11.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1597</th>\n",
" <td>5.9</td>\n",
" <td>0.645</td>\n",
" <td>0.12</td>\n",
" <td>2.0</td>\n",
" <td>0.075</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99547</td>\n",
" <td>3.57</td>\n",
" <td>0.71</td>\n",
" <td>10.2</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1598</th>\n",
" <td>6.0</td>\n",
" <td>0.310</td>\n",
" <td>0.47</td>\n",
" <td>3.6</td>\n",
" <td>0.067</td>\n",
" <td>18.0</td>\n",
" <td>42.0</td>\n",
" <td>0.99549</td>\n",
" <td>3.39</td>\n",
" <td>0.66</td>\n",
" <td>11.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1599 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid ... sulphates alcohol quality\n",
"0 7.4 0.700 0.00 ... 0.56 9.4 5\n",
"1 7.8 0.880 0.00 ... 0.68 9.8 5\n",
"2 7.8 0.760 0.04 ... 0.65 9.8 5\n",
"3 11.2 0.280 0.56 ... 0.58 9.8 6\n",
"4 7.4 0.700 0.00 ... 0.56 9.4 5\n",
"... ... ... ... ... ... ... ...\n",
"1594 6.2 0.600 0.08 ... 0.58 10.5 5\n",
"1595 5.9 0.550 0.10 ... 0.76 11.2 6\n",
"1596 6.3 0.510 0.13 ... 0.75 11.0 6\n",
"1597 5.9 0.645 0.12 ... 0.71 10.2 5\n",
"1598 6.0 0.310 0.47 ... 0.66 11.0 6\n",
"\n",
"[1599 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 15
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Ua_ctPpVzeKJ",
"outputId": "358ff4d0-bc4b-489e-dd00-b3cf31b4ccfd"
},
"source": [
"wine[\"quality\"].value_counts()"
],
"execution_count": 16,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"5 681\n",
"6 638\n",
"7 199\n",
"4 53\n",
"8 18\n",
"3 10\n",
"Name: quality, dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 16
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 297
},
"id": "-06v1i7XzeOz",
"outputId": "54a6e104-8137-41a5-a65c-41bf2ff5203f"
},
"source": [
"wine.describe(include='all')"
],
"execution_count": 17,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" <td>1599.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>8.319637</td>\n",
" <td>0.527821</td>\n",
" <td>0.270976</td>\n",
" <td>2.538806</td>\n",
" <td>0.087467</td>\n",
" <td>15.874922</td>\n",
" <td>46.467792</td>\n",
" <td>0.996747</td>\n",
" <td>3.311113</td>\n",
" <td>0.658149</td>\n",
" <td>10.422983</td>\n",
" <td>5.636023</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.741096</td>\n",
" <td>0.179060</td>\n",
" <td>0.194801</td>\n",
" <td>1.409928</td>\n",
" <td>0.047065</td>\n",
" <td>10.460157</td>\n",
" <td>32.895324</td>\n",
" <td>0.001887</td>\n",
" <td>0.154386</td>\n",
" <td>0.169507</td>\n",
" <td>1.065668</td>\n",
" <td>0.807569</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>4.600000</td>\n",
" <td>0.120000</td>\n",
" <td>0.000000</td>\n",
" <td>0.900000</td>\n",
" <td>0.012000</td>\n",
" <td>1.000000</td>\n",
" <td>6.000000</td>\n",
" <td>0.990070</td>\n",
" <td>2.740000</td>\n",
" <td>0.330000</td>\n",
" <td>8.400000</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>7.100000</td>\n",
" <td>0.390000</td>\n",
" <td>0.090000</td>\n",
" <td>1.900000</td>\n",
" <td>0.070000</td>\n",
" <td>7.000000</td>\n",
" <td>22.000000</td>\n",
" <td>0.995600</td>\n",
" <td>3.210000</td>\n",
" <td>0.550000</td>\n",
" <td>9.500000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>7.900000</td>\n",
" <td>0.520000</td>\n",
" <td>0.260000</td>\n",
" <td>2.200000</td>\n",
" <td>0.079000</td>\n",
" <td>14.000000</td>\n",
" <td>38.000000</td>\n",
" <td>0.996750</td>\n",
" <td>3.310000</td>\n",
" <td>0.620000</td>\n",
" <td>10.200000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>9.200000</td>\n",
" <td>0.640000</td>\n",
" <td>0.420000</td>\n",
" <td>2.600000</td>\n",
" <td>0.090000</td>\n",
" <td>21.000000</td>\n",
" <td>62.000000</td>\n",
" <td>0.997835</td>\n",
" <td>3.400000</td>\n",
" <td>0.730000</td>\n",
" <td>11.100000</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>15.900000</td>\n",
" <td>1.580000</td>\n",
" <td>1.000000</td>\n",
" <td>15.500000</td>\n",
" <td>0.611000</td>\n",
" <td>72.000000</td>\n",
" <td>289.000000</td>\n",
" <td>1.003690</td>\n",
" <td>4.010000</td>\n",
" <td>2.000000</td>\n",
" <td>14.900000</td>\n",
" <td>8.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity ... alcohol quality\n",
"count 1599.000000 1599.000000 ... 1599.000000 1599.000000\n",
"mean 8.319637 0.527821 ... 10.422983 5.636023\n",
"std 1.741096 0.179060 ... 1.065668 0.807569\n",
"min 4.600000 0.120000 ... 8.400000 3.000000\n",
"25% 7.100000 0.390000 ... 9.500000 5.000000\n",
"50% 7.900000 0.520000 ... 10.200000 6.000000\n",
"75% 9.200000 0.640000 ... 11.100000 6.000000\n",
"max 15.900000 1.580000 ... 14.900000 8.000000\n",
"\n",
"[8 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 17
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "t8Y53QPyu_fO"
},
"source": [
"Testowy Wykres (quality, volatile acidity)"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 405
},
"id": "hEe3BYcJaKnF",
"outputId": "ccaf5bc0-889b-453b-f50b-eae3f8c50ee6"
},
"source": [
"fig = plt.figure(figsize = (10,6))\n",
"sns.barplot(x = 'quality', y = 'volatile acidity', data = wine)"
],
"execution_count": 18,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f8a0be6e9d0>"
]
},
"metadata": {
"tags": []
},
"execution_count": 18
},
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmEAAAFzCAYAAAB2A95GAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAYxUlEQVR4nO3de5ClZ10n8O8vM4RANgE0o+PmYrJuRKMi4BjRKFAiboKY7CIocUGllOguUQRlKhRbKLi7VY6wumLUzSIXL4AxCzq4gbDKTVFiJhAuSUDHQMiMtElAbgGEJL/9o0+wmcxMnwnz9tNz+vOp6jrnvfQ535xKJd9+nue8b3V3AABYW0eNDgAAsBEpYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADbB4d4FCdcMIJfeqpp46OAQCwqquvvvrW7t6yv2NHXAk79dRTs2vXrtExAABWVVU3HuiY6UgAgAGUMACAAZQwAIABlDAAgAGUMACAAZQwAIABlDAAgAGUMACAAZQwAIABlDAAgAGUMACAAZQwAIABjrgbeB+ptm/fnqWlpWzdujU7duwYHQcAGEwJWyNLS0vZu3fv6BgAwDphOhIAYAAlDABgACUMAGAAJQwAYAAlDABgACUMAGAAJQwAYAAlDABgACUMAGAAJQwAYAAlDABgACUMAGAAJQwAYAAlDABgACUMAGAAJQwAYAAlDABgACUMAGAAJQwAYAAlDABggMlKWFW9pKpurqr3HuB4VdWvV9Xuqnp3VT10qiwAAOvNlCNhL0ty9kGOn5Pk9NnPBUl+a8IsAADrymQlrLvfmuSjBznlvCS/28venuT+VfVVU+UBAFhPRq4JOzHJTSu298z2AQAsvCNiYX5VXVBVu6pq1y233DI6DgDAl2xkCdub5OQV2yfN9t1Nd1/S3du6e9uWLVvWJBwAwJRGlrCdSX5k9i3JhyX5eHd/eGAeAIA1s3mqF66qVyZ5ZJITqmpPkl9Icq8k6e7fTnJ5ksck2Z3k00meMlUWAID1ZrIS1t3nr3K8kzxtqvcHAFjPjoiF+QAAi0YJAwAYQAkDABhACQMAGEAJAwAYQAkDABhACQMAGEAJAwAYQAkDABhACQMAGEAJAwAYQAkDABhACQMAGEAJAwAYQAkDABhACQMAGEAJAwAYQAkDABhACQMAGEAJAwAYQAkDABhACQMAGEAJAwAYQAkDABhACQMAGEAJAwAYQAkDABhACQMAGGDz6ABT+JZn/e7oCHdz3K2fzKYkH7r1k+sq39W/8iOjIwDAhmQkDABgACUMAGAAJQwAYAAlDABgACUMAGAAJQwAYAAlDABgACUMAGAAJQwAYAAlDABgACUMAGAAJQwAYAAlDABgACUMAGAAJQwAYAAlDABgACUMAGAAJQwAYAAlDABgACUMAGCASUtYVZ1dVe+vqt1VddF+jp9SVW+qqndW1bur6jFT5gEAWC8mK2FVtSnJxUnOSXJGkvOr6ox9TvsvSS7t7ockeWKS35wqDwDAejLlSNiZSXZ39w3d/bkkr0py3j7ndJLjZ8/vl+QfJswDALBuTFnCTkxy04rtPbN9K/1ikidV1Z4klyf56f29UFVdUFW7qmrXLbfcMkVWAIA1NXph/vlJXtbdJyV5TJLfq6q7ZeruS7p7W3dv27Jly5qHBAA43KYsYXuTnLxi+6TZvpV+PMmlSdLdf53kmCQnTJgJAGBdmLKEXZXk9Ko6raqOzvLC+537nPOhJI9Kkqr6+iyXMPONAMDCm6yEdfftSS5MckWS67P8Lchrq+r5VXXu7LSfS/LUqnpXklcm+bHu7qkyAQCsF5unfPHuvjzLC+5X7nvuiufXJTlrygwAAOvR6IX5AAAbkhIGADCAEgYAMIASBgAwgBIGADCAEgYAMIASBgAwwKTXCYORtm/fnqWlpWzdujU7duwYHQcAvogSxsJaWlrK3r373q4UANYH05EAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADKGEAAAMoYQAAAyhhAAADbB4dgMXwoed/0+gId3P7R78syebc/tEb11W+U577ntERAFgHVh0Jq6qrq+ppVfWAtQi0qO48+tjcce/jc+fRx46OAgCsA/OMhP1QkqckuaqqdiV5aZI3dHdPmmzB3Hb6946OAACsI6uOhHX37u5+TpKvTfKKJC9JcmNVPa+qvmzqgAAAi2iuhflV9aAkL0zyK0n+T5InJPlEkjdOFw0AYHGtOh1ZVVcn+ViS30lyUXf/8+zQlVV11pThAAAW1Txrwp7Q3Tes3FFVp3X3B7r7cRPlAgBYaPNMR1425z4AAOZ0wJGwqvq6JN+Q5H5VtXLE6/gkx0wdDABgkR1sOvKBSR6b5P5Jvn/F/k8meeqUoQAAFt0BS1h3/0mSP6mqb+/uv17DTAAAC+9g05Hbu3tHkh+uqvP3Pd7dPzNpMgCABXaw6cjrZ4+71iIIAMBGcrDpyNfOHl++dnEAADaGg01HvjbJAe8P2d3nTpIIAGADONh05Atmj49LsjXJ78+2z0/yj1OGAgBYdAebjnxLklTVC7t724pDr60q68QAAL4E81wx/9iq+jd3bVTVaUmOnS4SAMDim+fekc9I8uaquiFJJfnqJD85aSo4DE445s4kt88eAWB9WbWEdffrq+r0JF832/W+7v7naWPBl+7nH/Sx0REA4IAO9u3I7+7uN+5z38gk+ZqqSne/euJsAAAL62AjYY9I8sZ88X0j79JJlDAAgHvoYN+O/IXZ41PWLg4AwMaw6rcjq+q/V9X9V2w/oKr+67SxAAAW2zyXqDinu7+wwrm7/ynJY+Z58ao6u6reX1W7q+qiA5zzg1V1XVVdW1WvmC82AMCRbZ5LVGyqqnvf9Y3IqrpPknuv9ktVtSnJxUkenWRPkquqamd3X7finNOTPDvJWd39T1X1FffkHwIA4EgzTwn7gyR/XlUvnW0/Jck8N/U+M8nu7r4hSarqVUnOS3LdinOemuTi2ehauvvmeYMDABzJ5rlO2C9X1buTPGq265e6+4o5XvvEJDet2N6T5Nv2Oedrk6Sq3pZkU5Jf7O7X7/tCVXVBkguS5JRTTpnjrQEA1rd5RsLS3a9L8rqJ3v/0JI9MclKSt1bVN61cgzZ7/0uSXJIk27Zt6wlyAACsqVVLWFU9LMmLknx9kqOzPGJ1W3cfv8qv7k1y8ortk2b7VtqT5Mru/nySD1TV32a5lF01X3xgPdm+fXuWlpaydevW7NixY3QcgHVtnm9H/kaS85P8XZL7JPmJLC+4X81VSU6vqtOq6ugkT0yyc59z/jjLo2CpqhOyPD15w1zJgXVnaWkpe/fuzdLS0ugoAOvePCUs3b07yabuvqO7X5rk7Dl+5/YkFya5Isn1SS7t7mur6vlVde7stCuSfKSqrkvypiTP6u6P3JN/EACAI8k8a8I+PRvJuqaqdiT5cOYvb5cnuXyffc9d8byTPHP2AwCwYcxTpp48O+/CJLdleZ3XD0wZCgBg0c1ziYobZ08/m+R508YB5nXWi84aHeFujv7Y0TkqR+Wmj920rvK97affNjoCwN3MNa0IAMDhpYQBAAwwdwmrqvtOGQQAYCNZtYRV1XfMLiHxvtn2N1fVb06eDABggc0zEvarSf5dko8kSXe/K8nDpwwFHJn6vp07j70zfV93FwNYzbz3jrypqlbuumOaOMCR7PNnfX50BIAjxjwl7Kaq+o4kXVX3SvL0LF8BHwCAe2ie6cifSvK0JCdm+QbcD55tAwBwD81zsdZbk/zHNcgCALBhHLCEVdWLkhxwdW13/8wkiQAANoCDjYTtWrMUAAAbzAFLWHe/fC2DAABsJAebjvy17v7Zqnpt9jMt2d3nTpoMAGCBHWw68vdmjy9YiyAAABvJwaYjr549fXB3/8+Vx6rq6UneMmUwAIBFNs91wn50P/t+7DDnAADYUA62Juz8JD+c5LSq2rni0HFJPjp1MACARXawNWF/leTDSU5I8sIV+z+Z5N1ThgJgPtu3b
"text/plain": [
"<Figure size 720x432 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ftWOC-do2Pq-"
},
"source": [
"# 4. Normalizacja"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Wm0EM2hj4s6V"
},
"source": [
"Normalizacja kolumny 'quality' na wartości od 0 do 20. Nie jest ona konieczna ale została stworzona w celach demonstracyjnych"
]
},
{
"cell_type": "code",
"metadata": {
"id": "EkZQ6Hpy2Tj_"
},
"source": [
"wine[\"quality\"]=((wine[\"quality\"]-wine[\"quality\"].min())/(wine[\"quality\"].max()-wine[\"quality\"].min()))*20"
],
"execution_count": 19,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
},
"id": "_bQgYfct3Tir",
"outputId": "b15a9922-3a36-47af-eb8d-762adf593ae7"
},
"source": [
"wine"
],
"execution_count": 20,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7.8</td>\n",
" <td>0.880</td>\n",
" <td>0.00</td>\n",
" <td>2.6</td>\n",
" <td>0.098</td>\n",
" <td>25.0</td>\n",
" <td>67.0</td>\n",
" <td>0.99680</td>\n",
" <td>3.20</td>\n",
" <td>0.68</td>\n",
" <td>9.8</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7.8</td>\n",
" <td>0.760</td>\n",
" <td>0.04</td>\n",
" <td>2.3</td>\n",
" <td>0.092</td>\n",
" <td>15.0</td>\n",
" <td>54.0</td>\n",
" <td>0.99700</td>\n",
" <td>3.26</td>\n",
" <td>0.65</td>\n",
" <td>9.8</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.2</td>\n",
" <td>0.280</td>\n",
" <td>0.56</td>\n",
" <td>1.9</td>\n",
" <td>0.075</td>\n",
" <td>17.0</td>\n",
" <td>60.0</td>\n",
" <td>0.99800</td>\n",
" <td>3.16</td>\n",
" <td>0.58</td>\n",
" <td>9.8</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1594</th>\n",
" <td>6.2</td>\n",
" <td>0.600</td>\n",
" <td>0.08</td>\n",
" <td>2.0</td>\n",
" <td>0.090</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99490</td>\n",
" <td>3.45</td>\n",
" <td>0.58</td>\n",
" <td>10.5</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1595</th>\n",
" <td>5.9</td>\n",
" <td>0.550</td>\n",
" <td>0.10</td>\n",
" <td>2.2</td>\n",
" <td>0.062</td>\n",
" <td>39.0</td>\n",
" <td>51.0</td>\n",
" <td>0.99512</td>\n",
" <td>3.52</td>\n",
" <td>0.76</td>\n",
" <td>11.2</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1596</th>\n",
" <td>6.3</td>\n",
" <td>0.510</td>\n",
" <td>0.13</td>\n",
" <td>2.3</td>\n",
" <td>0.076</td>\n",
" <td>29.0</td>\n",
" <td>40.0</td>\n",
" <td>0.99574</td>\n",
" <td>3.42</td>\n",
" <td>0.75</td>\n",
" <td>11.0</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1597</th>\n",
" <td>5.9</td>\n",
" <td>0.645</td>\n",
" <td>0.12</td>\n",
" <td>2.0</td>\n",
" <td>0.075</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99547</td>\n",
" <td>3.57</td>\n",
" <td>0.71</td>\n",
" <td>10.2</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1598</th>\n",
" <td>6.0</td>\n",
" <td>0.310</td>\n",
" <td>0.47</td>\n",
" <td>3.6</td>\n",
" <td>0.067</td>\n",
" <td>18.0</td>\n",
" <td>42.0</td>\n",
" <td>0.99549</td>\n",
" <td>3.39</td>\n",
" <td>0.66</td>\n",
" <td>11.0</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1599 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid ... sulphates alcohol quality\n",
"0 7.4 0.700 0.00 ... 0.56 9.4 8.0\n",
"1 7.8 0.880 0.00 ... 0.68 9.8 8.0\n",
"2 7.8 0.760 0.04 ... 0.65 9.8 8.0\n",
"3 11.2 0.280 0.56 ... 0.58 9.8 12.0\n",
"4 7.4 0.700 0.00 ... 0.56 9.4 8.0\n",
"... ... ... ... ... ... ... ...\n",
"1594 6.2 0.600 0.08 ... 0.58 10.5 8.0\n",
"1595 5.9 0.550 0.10 ... 0.76 11.2 12.0\n",
"1596 6.3 0.510 0.13 ... 0.75 11.0 12.0\n",
"1597 5.9 0.645 0.12 ... 0.71 10.2 8.0\n",
"1598 6.0 0.310 0.47 ... 0.66 11.0 12.0\n",
"\n",
"[1599 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 20
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "I1AwZoyN4RHs",
"outputId": "81a417a4-236b-41e1-8d26-4462b2e13711"
},
"source": [
"wine[\"quality\"].value_counts()"
],
"execution_count": 21,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"8.0 681\n",
"12.0 638\n",
"16.0 199\n",
"4.0 53\n",
"20.0 18\n",
"0.0 10\n",
"Name: quality, dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 21
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "XBU3z_of414w"
},
"source": [
"# 5. Usuwanie artefaktów"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "KCstRwQp5-X1"
},
"source": [
"### Całe szczęscie nie ma w moim zbiorze ani pustych linijek, ani przykładów z niepoprawnymi wartościami"
]
},
{
"cell_type": "code",
"metadata": {
"id": "EJqksTP545UV"
},
"source": [
"# Znajdźmy pustą linijkę:\n",
"! grep -P \"^$\" -n winequality-red.csv"
],
"execution_count": 22,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "8DuoPn3Fa0kP"
},
"source": [
"Szukanie wartości \"NA\": https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.dropna.html"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "REYF2AWjz_lr",
"outputId": "148c1b42-d301-4208-e09a-2333ce73c5e1"
},
"source": [
"wine.isnull().sum()"
],
"execution_count": 23,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"fixed acidity 0\n",
"volatile acidity 0\n",
"citric acid 0\n",
"residual sugar 0\n",
"chlorides 0\n",
"free sulfur dioxide 0\n",
"total sulfur dioxide 0\n",
"density 0\n",
"pH 0\n",
"sulphates 0\n",
"alcohol 0\n",
"quality 0\n",
"dtype: int64"
]
},
"metadata": {
"tags": []
},
"execution_count": 23
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "RbkqNj9_akcU"
},
"source": [
"wine.dropna(inplace=True) "
],
"execution_count": 24,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 419
},
"id": "4WylJo9malyG",
"outputId": "8858109b-c7e8-4ddc-de07-790bcb39c5a4"
},
"source": [
"wine"
],
"execution_count": 25,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed acidity</th>\n",
" <th>volatile acidity</th>\n",
" <th>citric acid</th>\n",
" <th>residual sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free sulfur dioxide</th>\n",
" <th>total sulfur dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7.8</td>\n",
" <td>0.880</td>\n",
" <td>0.00</td>\n",
" <td>2.6</td>\n",
" <td>0.098</td>\n",
" <td>25.0</td>\n",
" <td>67.0</td>\n",
" <td>0.99680</td>\n",
" <td>3.20</td>\n",
" <td>0.68</td>\n",
" <td>9.8</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7.8</td>\n",
" <td>0.760</td>\n",
" <td>0.04</td>\n",
" <td>2.3</td>\n",
" <td>0.092</td>\n",
" <td>15.0</td>\n",
" <td>54.0</td>\n",
" <td>0.99700</td>\n",
" <td>3.26</td>\n",
" <td>0.65</td>\n",
" <td>9.8</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.2</td>\n",
" <td>0.280</td>\n",
" <td>0.56</td>\n",
" <td>1.9</td>\n",
" <td>0.075</td>\n",
" <td>17.0</td>\n",
" <td>60.0</td>\n",
" <td>0.99800</td>\n",
" <td>3.16</td>\n",
" <td>0.58</td>\n",
" <td>9.8</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7.4</td>\n",
" <td>0.700</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.99780</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1594</th>\n",
" <td>6.2</td>\n",
" <td>0.600</td>\n",
" <td>0.08</td>\n",
" <td>2.0</td>\n",
" <td>0.090</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99490</td>\n",
" <td>3.45</td>\n",
" <td>0.58</td>\n",
" <td>10.5</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1595</th>\n",
" <td>5.9</td>\n",
" <td>0.550</td>\n",
" <td>0.10</td>\n",
" <td>2.2</td>\n",
" <td>0.062</td>\n",
" <td>39.0</td>\n",
" <td>51.0</td>\n",
" <td>0.99512</td>\n",
" <td>3.52</td>\n",
" <td>0.76</td>\n",
" <td>11.2</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1596</th>\n",
" <td>6.3</td>\n",
" <td>0.510</td>\n",
" <td>0.13</td>\n",
" <td>2.3</td>\n",
" <td>0.076</td>\n",
" <td>29.0</td>\n",
" <td>40.0</td>\n",
" <td>0.99574</td>\n",
" <td>3.42</td>\n",
" <td>0.75</td>\n",
" <td>11.0</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1597</th>\n",
" <td>5.9</td>\n",
" <td>0.645</td>\n",
" <td>0.12</td>\n",
" <td>2.0</td>\n",
" <td>0.075</td>\n",
" <td>32.0</td>\n",
" <td>44.0</td>\n",
" <td>0.99547</td>\n",
" <td>3.57</td>\n",
" <td>0.71</td>\n",
" <td>10.2</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1598</th>\n",
" <td>6.0</td>\n",
" <td>0.310</td>\n",
" <td>0.47</td>\n",
" <td>3.6</td>\n",
" <td>0.067</td>\n",
" <td>18.0</td>\n",
" <td>42.0</td>\n",
" <td>0.99549</td>\n",
" <td>3.39</td>\n",
" <td>0.66</td>\n",
" <td>11.0</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1599 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" fixed acidity volatile acidity citric acid ... sulphates alcohol quality\n",
"0 7.4 0.700 0.00 ... 0.56 9.4 8.0\n",
"1 7.8 0.880 0.00 ... 0.68 9.8 8.0\n",
"2 7.8 0.760 0.04 ... 0.65 9.8 8.0\n",
"3 11.2 0.280 0.56 ... 0.58 9.8 12.0\n",
"4 7.4 0.700 0.00 ... 0.56 9.4 8.0\n",
"... ... ... ... ... ... ... ...\n",
"1594 6.2 0.600 0.08 ... 0.58 10.5 8.0\n",
"1595 5.9 0.550 0.10 ... 0.76 11.2 12.0\n",
"1596 6.3 0.510 0.13 ... 0.75 11.0 12.0\n",
"1597 5.9 0.645 0.12 ... 0.71 10.2 8.0\n",
"1598 6.0 0.310 0.47 ... 0.66 11.0 12.0\n",
"\n",
"[1599 rows x 12 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 25
}
]
}
]
}