2023-03-25 11:59:49 +01:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import sklearn.model_selection"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Found cached dataset wine (C:/Users/s487176/.cache/huggingface/datasets/mstz___wine/wine/1.0.0/0913b614badc418a000d75d098776831f39ebf5ee208ecd3cfad4d5db1418d76)\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a8f1b9db0c8b41e1904e16e22ae351e0",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from datasets import load_dataset\n",
"\n",
"dataset = load_dataset(\"mstz/wine\", \"wine\")"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Dataset({\n",
" features: ['fixed_acidity', 'volatile_acidity', 'citric_acid', 'residual_sugar', 'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density', 'pH', 'sulphates', 'alcohol', 'quality', 'color'],\n",
" num_rows: 6497\n",
"})"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset[\"train\"]"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"wine_dataset = pd.DataFrame(dataset[\"train\"])"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed_acidity</th>\n",
" <th>volatile_acidity</th>\n",
" <th>citric_acid</th>\n",
" <th>residual_sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free_sulfur_dioxide</th>\n",
" <th>total_sulfur_dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" <th>color</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7.4</td>\n",
" <td>0.70</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.9978</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>7.8</td>\n",
" <td>0.88</td>\n",
" <td>0.00</td>\n",
" <td>2.6</td>\n",
" <td>0.098</td>\n",
" <td>25.0</td>\n",
" <td>67.0</td>\n",
" <td>0.9968</td>\n",
" <td>3.20</td>\n",
" <td>0.68</td>\n",
" <td>9.8</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>7.8</td>\n",
" <td>0.76</td>\n",
" <td>0.04</td>\n",
" <td>2.3</td>\n",
" <td>0.092</td>\n",
" <td>15.0</td>\n",
" <td>54.0</td>\n",
" <td>0.9970</td>\n",
" <td>3.26</td>\n",
" <td>0.65</td>\n",
" <td>9.8</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.2</td>\n",
" <td>0.28</td>\n",
" <td>0.56</td>\n",
" <td>1.9</td>\n",
" <td>0.075</td>\n",
" <td>17.0</td>\n",
" <td>60.0</td>\n",
" <td>0.9980</td>\n",
" <td>3.16</td>\n",
" <td>0.58</td>\n",
" <td>9.8</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>7.4</td>\n",
" <td>0.70</td>\n",
" <td>0.00</td>\n",
" <td>1.9</td>\n",
" <td>0.076</td>\n",
" <td>11.0</td>\n",
" <td>34.0</td>\n",
" <td>0.9978</td>\n",
" <td>3.51</td>\n",
" <td>0.56</td>\n",
" <td>9.4</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed_acidity volatile_acidity citric_acid residual_sugar chlorides \\\n",
"0 7.4 0.70 0.00 1.9 0.076 \n",
"1 7.8 0.88 0.00 2.6 0.098 \n",
"2 7.8 0.76 0.04 2.3 0.092 \n",
"3 11.2 0.28 0.56 1.9 0.075 \n",
"4 7.4 0.70 0.00 1.9 0.076 \n",
"\n",
" free_sulfur_dioxide total_sulfur_dioxide density pH sulphates \\\n",
"0 11.0 34.0 0.9978 3.51 0.56 \n",
"1 25.0 67.0 0.9968 3.20 0.68 \n",
"2 15.0 54.0 0.9970 3.26 0.65 \n",
"3 17.0 60.0 0.9980 3.16 0.58 \n",
"4 11.0 34.0 0.9978 3.51 0.56 \n",
"\n",
" alcohol quality color \n",
"0 9.4 5 0 \n",
"1 9.8 5 0 \n",
"2 9.8 5 0 \n",
"3 9.8 6 0 \n",
"4 9.4 5 0 "
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_dataset.head()# podgląd danych"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed_acidity</th>\n",
" <th>volatile_acidity</th>\n",
" <th>citric_acid</th>\n",
" <th>residual_sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free_sulfur_dioxide</th>\n",
" <th>total_sulfur_dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" <th>color</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>7.215307</td>\n",
" <td>0.339666</td>\n",
" <td>0.318633</td>\n",
" <td>5.443235</td>\n",
" <td>0.056034</td>\n",
" <td>30.525319</td>\n",
" <td>115.744574</td>\n",
" <td>0.994697</td>\n",
" <td>3.218501</td>\n",
" <td>0.531268</td>\n",
" <td>10.491801</td>\n",
" <td>5.818378</td>\n",
" <td>0.753886</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1.296434</td>\n",
" <td>0.164636</td>\n",
" <td>0.145318</td>\n",
" <td>4.757804</td>\n",
" <td>0.035034</td>\n",
" <td>17.749400</td>\n",
" <td>56.521855</td>\n",
" <td>0.002999</td>\n",
" <td>0.160787</td>\n",
" <td>0.148806</td>\n",
" <td>1.192712</td>\n",
" <td>0.873255</td>\n",
" <td>0.430779</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>3.800000</td>\n",
" <td>0.080000</td>\n",
" <td>0.000000</td>\n",
" <td>0.600000</td>\n",
" <td>0.009000</td>\n",
" <td>1.000000</td>\n",
" <td>6.000000</td>\n",
" <td>0.987110</td>\n",
" <td>2.720000</td>\n",
" <td>0.220000</td>\n",
" <td>8.000000</td>\n",
" <td>3.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>6.400000</td>\n",
" <td>0.230000</td>\n",
" <td>0.250000</td>\n",
" <td>1.800000</td>\n",
" <td>0.038000</td>\n",
" <td>17.000000</td>\n",
" <td>77.000000</td>\n",
" <td>0.992340</td>\n",
" <td>3.110000</td>\n",
" <td>0.430000</td>\n",
" <td>9.500000</td>\n",
" <td>5.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>7.000000</td>\n",
" <td>0.290000</td>\n",
" <td>0.310000</td>\n",
" <td>3.000000</td>\n",
" <td>0.047000</td>\n",
" <td>29.000000</td>\n",
" <td>118.000000</td>\n",
" <td>0.994890</td>\n",
" <td>3.210000</td>\n",
" <td>0.510000</td>\n",
" <td>10.300000</td>\n",
" <td>6.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>7.700000</td>\n",
" <td>0.400000</td>\n",
" <td>0.390000</td>\n",
" <td>8.100000</td>\n",
" <td>0.065000</td>\n",
" <td>41.000000</td>\n",
" <td>156.000000</td>\n",
" <td>0.996990</td>\n",
" <td>3.320000</td>\n",
" <td>0.600000</td>\n",
" <td>11.300000</td>\n",
" <td>6.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>15.900000</td>\n",
" <td>1.580000</td>\n",
" <td>1.660000</td>\n",
" <td>65.800000</td>\n",
" <td>0.611000</td>\n",
" <td>289.000000</td>\n",
" <td>440.000000</td>\n",
" <td>1.038980</td>\n",
" <td>4.010000</td>\n",
" <td>2.000000</td>\n",
" <td>14.900000</td>\n",
" <td>9.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed_acidity volatile_acidity citric_acid residual_sugar \\\n",
"count 6497.000000 6497.000000 6497.000000 6497.000000 \n",
"mean 7.215307 0.339666 0.318633 5.443235 \n",
"std 1.296434 0.164636 0.145318 4.757804 \n",
"min 3.800000 0.080000 0.000000 0.600000 \n",
"25% 6.400000 0.230000 0.250000 1.800000 \n",
"50% 7.000000 0.290000 0.310000 3.000000 \n",
"75% 7.700000 0.400000 0.390000 8.100000 \n",
"max 15.900000 1.580000 1.660000 65.800000 \n",
"\n",
" chlorides free_sulfur_dioxide total_sulfur_dioxide density \\\n",
"count 6497.000000 6497.000000 6497.000000 6497.000000 \n",
"mean 0.056034 30.525319 115.744574 0.994697 \n",
"std 0.035034 17.749400 56.521855 0.002999 \n",
"min 0.009000 1.000000 6.000000 0.987110 \n",
"25% 0.038000 17.000000 77.000000 0.992340 \n",
"50% 0.047000 29.000000 118.000000 0.994890 \n",
"75% 0.065000 41.000000 156.000000 0.996990 \n",
"max 0.611000 289.000000 440.000000 1.038980 \n",
"\n",
" pH sulphates alcohol quality color \n",
"count 6497.000000 6497.000000 6497.000000 6497.000000 6497.000000 \n",
"mean 3.218501 0.531268 10.491801 5.818378 0.753886 \n",
"std 0.160787 0.148806 1.192712 0.873255 0.430779 \n",
"min 2.720000 0.220000 8.000000 3.000000 0.000000 \n",
"25% 3.110000 0.430000 9.500000 5.000000 1.000000 \n",
"50% 3.210000 0.510000 10.300000 6.000000 1.000000 \n",
"75% 3.320000 0.600000 11.300000 6.000000 1.000000 \n",
"max 4.010000 2.000000 14.900000 9.000000 1.000000 "
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_dataset.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Axes: >"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjMAAAGbCAYAAAAx9RHcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAeNklEQVR4nO3df0xV9/3H8de9XH4J3g5RuLSG1uHEkW1F6zWQDMdMvqzZ3BZGl2URs7pqWWdGh2Omi/SXzq5LUTubudaotaYSZwbp1nRZTU2WtYtSYO2aFVmrVWM7gVIptyJwy733+4e5d15tC5dJL2/u85EY5ZzP+eRjOYc+PfdwcYRCoZAAAACMcsZ7AQAAAP8LYgYAAJhGzAAAANOIGQAAYBoxAwAATCNmAACAacQMAAAwjZgBAACmETMAAMA0V7wX8GkIhUIKBnmj40ThdDr4fAPTFNd34nA6HXI4HOMamxAxEwyGdP78YLyXgU+By+VUVlaGfL6LGh0Nxns5AK4hru/EMmtWhpKSxhczvMwEAABMI2YAAIBpxAwAADAt5pjp6elRYWHhVb9aWlokScePH1d1dbWKi4u1fPly7d+/P+r4YDCoHTt2qKysTMXFxVq7dq3Onj0bNWasOQAAAMJifgC4q6tLqampeuGFF6KeMp45c6b6+/u1evVqLV++XA8++KBeffVVPfjgg8rIyFBVVZUkaefOnWpqatLDDz8sj8ejRx55RGvWrNGzzz6rlJSUcc0BAAAQFnPMvPHGG7rpppuUk5Nz1b6nnnpKycnJ2rRpk1wulwoKCnTmzBnt2rVLVVVV8vv92rt3r+rr61VeXi5J2r59u8rKynT48GGtWLFChw4d+sQ5AAAALhfzy0z//ve/VVBQ8JH72tvbtXTpUrlc/22kkpISnT59Wn19ferq6tLg4KBKS0sj+91ut4qKitTW1jauOQAAAC43oTszWVlZWrlypU6dOqUbb7xRd911l5YtW6bu7m4tWLAganz4Ds65c+fU3d0tScrLy7tqTHjfWHPMnj071iVLuvT+BJj+kpKcUb8DmD64vvFxYoqZ0dFRvfXWW5o/f77uueceZWZm6rnnntOdd96pJ598UsPDw0pJSYk6JjU1VZI0MjKioaEhSfrIMQMDA5I05hwT4XQ6lJWVMaFjYZPbnR7vJQCYJFzfuFJMMeNyudTa2qqkpCSlpaVJkr7whS/ozTff1J49e5SWlia/3x91TDhAZsyYETnG7/dH/hwek55+6eQca46JCAZD8vkuTuhY2JKU5JTbnS6fb0iBAO8QCkwnXN+Jxe1OH/dduJhfZsrIuPoOx+c+9zm99NJL8ng86u3tjdoX/jg3N1ejo6ORbfn5+VFjCgsLJWnMOSaKt75OLIFAkM85ME1xfeNKMb3w+Oabb2rx4sVqbW2N2v6vf/1L8+fPl9frVUdHhwKBQGTfsWPHNG/ePGVnZ2vhwoXKzMyMOt7n86mzs1Ner1eSxpwDAADgcjHFTEFBgT772c9q06ZNam9v18mTJ/WrX/1Kr776qu666y5VVVXpwoUL2rhxo06cOKGWlhbt27dPNTU1ki49K1NdXa3GxkYdOXJEXV1dqqurk8fjUUVFhSSNOQcAAMDlHKFQKKafpd7X16etW7fqxRdflM/nU1FRkerr67VkyRJJ0muvvaYtW7aos7NTc+bM0Q9/+ENVV1dHjg8EAtq2bZtaWlo0PDwsr9er++67T3Pnzo2MGWuOWAUCQX5qdoII/1Td/v5BbkMD0wzXd2K59FOzx3fPJeaYsShRY8bpdMjpHN+PT58uEvkBwWAwpGBw2l/OSGDETGKJJWZifgAYNjidDn3mMzMS9v0YEvFbNwOBoN5//yJBAyDhEDPTlNPpUFKSU40HOvR2zwfxXg4m2dzcmapfeYucTgcxAyDhEDPT3Ns9H+jkOwPxXgYAAJMmMV+DAAAA0wYxAwAATCNmAACAacQMAAAwjZgBAACmETMAAMA0YgYAAJhGzAAAANOIGQAAYBoxAwAATCNmAACAacQMAAAwjZgBAACmETMAAMA0YgYAAJhGzAAAANOIGQAAYBoxAwAATCNmAACAacQMAAAwjZgBAACmETMAAMA0YgYAAJhGzAAAANOIGQAAYBoxAwAATCNmAACAacQMAAAwjZgBAACmETMAAMA0YgYAAJhGzAAAANOIGQAAYBoxAwAATCNmAACAacQMAAAwjZgBAACmETMAAMA0YgYAAJhGzAAAANOIGQAAYBoxAwAATCNmAACAacQMAAAwjZgBAACmETMAAMA0YgYAAJhGzAAAANOIGQAAYBoxAwAATCNmAACAacQMAAAwjZgBAACmETMAAMA0YgYAAJhGzAAAANOIGQAAYNqEY+bUqVNatGiRWlpaItuOHz+u6upqFRcXa/ny5dq/f3/UMcFgUDt27FBZWZmKi4u1du1anT17NmrMWHMAAABcbkIx8+GHH6q+vl4XL16MbOvv79fq1auVn5+v5uZmrVu3To2NjWpubo6M2blzp5qamrR582YdPHhQwWBQa9askd/vH/ccAAAAl3NN5KDHHntMmZmZUdsOHTqk5ORkbdq0SS6XSwUFBTpz5ox27dqlqqoq+f1+7d27V/X19SovL5ckbd++XWVlZTp8+LBWrFgx5hwAAABXivnOTFtbm37/+9/r4Ycfjtre3t6upUuXyuX6bx+VlJTo9OnT6uvrU1dXlwYHB1VaWhrZ73a7VVRUpLa2tnHNAQAAcKWY7sz4fD5t2LBBDQ0NysvLi9rX3d2tBQsWRG3LycmRJJ07d07d3d2SdNVxOTk5kX1jzTF79uxYlhvF5UqsZ52TkhLr74tL+LxjOguf35znuFJMMfPAAw9o0aJF+uY3v3nVvuHhYaWkpERtS01NlSSNjIxoaGhIkj5yzMDAwLjmmCin06GsrIwJHw9Y4Xanx3sJwKTjPMeVxh0zzzzzjNrb2/Xss89+5P60tLTIg7xh4QCZMWOG0tLSJEl+vz/y5/CY9PT0cc0xUcFgSD7fxbEHTiNJSU4u+ATk8w0pEAjGexnApAh/XeM8Twxud/q478KNO2aam5v13nvvRR7eDbv//vv15z//WR6PR729vVH7wh/n5uZqdHQ0si0/Pz9qTGFhoSSNOcf/YnSUEx/TXyAQ5FzHtMd5jiuNO2YaGxs1PDwcta2iokK1tbX61re+pT/+8Y86ePCgAoGAkpKSJEnHjh3TvHnzlJ2drZkzZyozM1Otra2RmPH5fOrs7FR1dbUkyev1fuIcAAAAVxr3U1S5ubm68cYbo35JUnZ2tnJzc1VVVaULFy5o48aNOnHihFpaWrRv3z7V1NRIuvSsTHV1tRobG3XkyBF1dXWprq5OHo9HFRUVkjTmHAAAAFea0PvMfJTs7Gzt3r1bW7ZsUWVlpebMmaMNGzaosrIyMqa2tlajo6NqaGjQ8PCwvF6v9uzZo+Tk5HHPAQAAcDlHKBQKxXsRky0QCOr8+cF4L+NT5XI5lZWVoZ9u+6tOvjMQ7+VgkhXccJ0eXV+u/v5BniXAtBX+usZ5nhhmzcoY9wPAfLM+AAAwjZgBAACmETMAAMA0YgYAAJhGzAAAANOIGQAAYBoxAwAATCNmAACAacQMAAAwjZgBAACmETMAAMA0YgYAAJhGzAAAANOIGQAAYBoxAwAATCNmAACAacQMAAAwjZgBAACmETMAAMA0YgYAAJhGzAAAANOIGQAAYBoxAwAATCNmAACAacQMAAAwjZgBAACmETMAAMA0YgYAAJhGzAAAANOIGQAAYBoxAwAATCNmAACAacQMAAAwjZgBAACmETMAAMA0YgYAAJhGzAAAANOIGQAAYBoxAwAATCNmAACAacQMAAAwjZgBAACmETMAAMA0YgYAAJhGzAAAANOIGQAAYBoxAwAATCNmAACAacQMAAAwjZgBAACmETMAAMA0YgYAAJhGzAAAANOIGQAAYBoxAwAATCNmAACAacQMAAAwjZgBAACmETMAAMA0YgYAAJgWc8y89957+vnPf66SkhItWrRId955p06ePBnZf/z4cVVXV6u4uFjLly/X/v37o44
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"wine_dataset[\"color\"].value_counts().plot(kind=\"bar\")\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.2964337577998153"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_dataset[\"fixed_acidity\"].std()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([], dtype=int64), array([], dtype=int64))"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"np.where(pd.isnull(wine_dataset))## sprawdzanie czy istnieją puste wartości"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"for column in wine_dataset.columns:\n",
" wine_dataset[column] = wine_dataset[column] / wine_dataset[column].abs().max() # normalizacja"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed_acidity</th>\n",
" <th>volatile_acidity</th>\n",
" <th>citric_acid</th>\n",
" <th>residual_sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free_sulfur_dioxide</th>\n",
" <th>total_sulfur_dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" <th>color</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" <td>6497.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>0.453793</td>\n",
" <td>0.214978</td>\n",
" <td>0.191948</td>\n",
" <td>0.082724</td>\n",
" <td>0.091708</td>\n",
" <td>0.105624</td>\n",
" <td>0.263056</td>\n",
" <td>0.957378</td>\n",
" <td>0.802619</td>\n",
" <td>0.265634</td>\n",
" <td>0.704148</td>\n",
" <td>0.646486</td>\n",
" <td>0.753886</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.081537</td>\n",
" <td>0.104200</td>\n",
" <td>0.087541</td>\n",
" <td>0.072307</td>\n",
" <td>0.057338</td>\n",
" <td>0.061417</td>\n",
" <td>0.128459</td>\n",
" <td>0.002886</td>\n",
" <td>0.040097</td>\n",
" <td>0.074403</td>\n",
" <td>0.080048</td>\n",
" <td>0.097028</td>\n",
" <td>0.430779</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.238994</td>\n",
" <td>0.050633</td>\n",
" <td>0.000000</td>\n",
" <td>0.009119</td>\n",
" <td>0.014730</td>\n",
" <td>0.003460</td>\n",
" <td>0.013636</td>\n",
" <td>0.950076</td>\n",
" <td>0.678304</td>\n",
" <td>0.110000</td>\n",
" <td>0.536913</td>\n",
" <td>0.333333</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>0.402516</td>\n",
" <td>0.145570</td>\n",
" <td>0.150602</td>\n",
" <td>0.027356</td>\n",
" <td>0.062193</td>\n",
" <td>0.058824</td>\n",
" <td>0.175000</td>\n",
" <td>0.955110</td>\n",
" <td>0.775561</td>\n",
" <td>0.215000</td>\n",
" <td>0.637584</td>\n",
" <td>0.555556</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>0.440252</td>\n",
" <td>0.183544</td>\n",
" <td>0.186747</td>\n",
" <td>0.045593</td>\n",
" <td>0.076923</td>\n",
" <td>0.100346</td>\n",
" <td>0.268182</td>\n",
" <td>0.957564</td>\n",
" <td>0.800499</td>\n",
" <td>0.255000</td>\n",
" <td>0.691275</td>\n",
" <td>0.666667</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>0.484277</td>\n",
" <td>0.253165</td>\n",
" <td>0.234940</td>\n",
" <td>0.123100</td>\n",
" <td>0.106383</td>\n",
" <td>0.141869</td>\n",
" <td>0.354545</td>\n",
" <td>0.959585</td>\n",
" <td>0.827930</td>\n",
" <td>0.300000</td>\n",
" <td>0.758389</td>\n",
" <td>0.666667</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed_acidity volatile_acidity citric_acid residual_sugar \\\n",
"count 6497.000000 6497.000000 6497.000000 6497.000000 \n",
"mean 0.453793 0.214978 0.191948 0.082724 \n",
"std 0.081537 0.104200 0.087541 0.072307 \n",
"min 0.238994 0.050633 0.000000 0.009119 \n",
"25% 0.402516 0.145570 0.150602 0.027356 \n",
"50% 0.440252 0.183544 0.186747 0.045593 \n",
"75% 0.484277 0.253165 0.234940 0.123100 \n",
"max 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" chlorides free_sulfur_dioxide total_sulfur_dioxide density \\\n",
"count 6497.000000 6497.000000 6497.000000 6497.000000 \n",
"mean 0.091708 0.105624 0.263056 0.957378 \n",
"std 0.057338 0.061417 0.128459 0.002886 \n",
"min 0.014730 0.003460 0.013636 0.950076 \n",
"25% 0.062193 0.058824 0.175000 0.955110 \n",
"50% 0.076923 0.100346 0.268182 0.957564 \n",
"75% 0.106383 0.141869 0.354545 0.959585 \n",
"max 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" pH sulphates alcohol quality color \n",
"count 6497.000000 6497.000000 6497.000000 6497.000000 6497.000000 \n",
"mean 0.802619 0.265634 0.704148 0.646486 0.753886 \n",
"std 0.040097 0.074403 0.080048 0.097028 0.430779 \n",
"min 0.678304 0.110000 0.536913 0.333333 0.000000 \n",
"25% 0.775561 0.215000 0.637584 0.555556 1.000000 \n",
"50% 0.800499 0.255000 0.691275 0.666667 1.000000 \n",
"75% 0.827930 0.300000 0.758389 0.666667 1.000000 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 "
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_dataset.describe(include='all') # sprawdzanie wartości po znormalizowaniu"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"652 1.000000\n",
"442 0.981132\n",
"557 0.981132\n",
"554 0.974843\n",
"555 0.974843\n",
"243 0.943396\n",
"244 0.943396\n",
"544 0.899371\n",
"3125 0.893082\n",
"374 0.880503\n",
"Name: fixed_acidity, dtype: float64"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_dataset[\"fixed_acidity\"].nlargest(10) #sprawdza czy najwyższe wartości mają sens"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.0 4408\n",
"0.0 1439\n",
"Name: color, dtype: int64"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.model_selection import train_test_split\n",
"wine_train, wine_test = sklearn.model_selection.train_test_split(wine_dataset, test_size=0.1, random_state=1, stratify=wine_dataset[\"color\"])\n",
"wine_train[\"color\"].value_counts() \n",
"# podzielenie na train i test"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.0 490\n",
"0.0 160\n",
"Name: color, dtype: int64"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_test[\"color\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"wine_test, wine_val = sklearn.model_selection.train_test_split(wine_test, test_size=0.5, random_state=1, stratify=wine_test[\"color\"]) # podzielenie na test i validation"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.0 245\n",
"0.0 80\n",
"Name: color, dtype: int64"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_test[\"color\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.0 245\n",
"0.0 80\n",
"Name: color, dtype: int64"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_val[\"color\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"import seaborn as sns\n",
"sns.set_theme()"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"13"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(wine_dataset.columns)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
2023-03-25 12:01:55 +01:00
"outputs": [
{
"data": {
"text/plain": [
"<seaborn.axisgrid.PairGrid at 0x1dfc8961690>"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAC+kAAAt/CAYAAAArLjYDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzddXwc17XA8d+dmUUxS7bMzOzYThyHqUka5qRpkzZp0+YFCimlzJRym6RJ2jA2zA2bmZlkWbKYtTTw/lhbtqJdmUQrne/n89LnubvS7Ghn5s65556rHMdxEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghhBBCCCHEcdO6eweEEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghhBBCiN5CkvSFEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghhBBCiA4iSfpCCCGEEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghRAeRJH0hhBBCCCGEEEIIIYQQQgghhBBCCCGEEEIIIYQQooNIkr4QQgghhBBCCCGEEEIIIYQQQgghhBBCCCGEEEII0UEkSV8IIYQQQgghhBBCCCGEEEIIIYQQQgghhBBCCCGE6CCSpC+EEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghhBBCdBBJ0hdCCCGEEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghOogk6QshhBBCCCGEEEIIIYQQQgghhBBCCCGEEEIIIYQQHcTo7h1IFJZlU13ddNw/R9MUmZlJVFc3YdtOB+xZzySfs3fp6s+Zk5PSYT/rSM/dvvK37EnkmHe9rjjm3XH+Hiv5DnY+OcZdo6OOc085f+V70/XkmHePjjzucv72TXK8u15HH/OecO7K96j7yLHvPh1x7HvC+XuAfJe6lhzvrtUZx7snnb89jXy/j44cr6PT2+6/IjY5LxJPV/3NOur8lXO3e8k53jN15t9F7r0Hyfe/fXJ82tcdx6ezzl/5W8sxkM+fWHlXomeQSvpdTNMUSik0TXX3rnQq+Zy9S1/4nH3hM/Y0csy7nhzz1uR4dD45xl2jtx3n3vZ5EoEc8+7RG497b/xMPZkc767XG495b/xMiUKOfffpbce+t32enk6Od9eS49215HgfHTleR0eOV98gf+fEI38zcTTk+9Izyd+la8hxbp8cn/b1puPTmz7Lserrx0A+f9/+/OLYSJK+EEIIIYQQQgghhBBCCCGEEEIIIYQQQgghhBBCCNFBJElfCCGEEEIIIYQQQgghhBBCCCGEEEIIIYQQQgghhOggkqQvhBBCCCGEEEIIIYQQQgghhBBCCCGEEEIIIYQQQnSQhEvS/+c//8n111/f7mtqamq4++67mTFjBjNnzuRHP/oRgUCgi/ZQCCE6h1Lgdhu43Tqaprp7d4QQvZCua7jdOi6X3t27IkSHcrl03G4dXU+4xx8hRDs0TeF2R89vIUTXkn6jEH2b3IOF6N0MQ5P4sxAJzDCkry6EaOtAjNwwJEYuhBBdzTA0HMeR56w+TGJpQvRtRnfvwNF4/PHHue+++5g+fXq7r7v99tsJBAI88sgj1NfX893vfpfm5mZ+9atfddGeCiFEx7KVoqohxP+WbiMUtjllan8Kc5PRHae7d00I0QtomiJsw/qd1Sxet4+sdC+nTR9AssdAyXVGJDKlaI7YfLR0F3srmpg0IptJI3Lw6grLsrt774QQx0gpMFFsL6nno1V7Sfa5OWPGANKS3Ghy3xKiU2maImTDqq2VrNhUTl6Wn1OmFpLk0cGW80+I3k4piKDYUVzH/DUlpCZ5OH36AFL9LrkHC5HgbKXYVxvk8Xe2EJT4sxAJSdMVQdNh4dp9bNhZzeB+qZw4oQC/S8OWvroQfZbSojHyd1YUUbSvgXFDs5g6KgefobAsuTYIIUSnUoqmsMXrS3awr6qZqaNyGTc0E4+mpH/WRygFEadtLC3N75I8DCH6kIRI0i8rK+MHP/gBixcvZvDgwe2+duXKlSxZsoTXX3+dYcOGAfDjH/+Ym2++mbvuuou8vLwu2GMhhOg4tlI8+9423lq8u2XbhyuLGTM4k7uuniIDJUKI4xayHL5//0LKaw6uPPTSRzu49ZIJzByVC3KdEQlIaYote+v51X+WtuQMRoMfbn7+5TkkuXQc+W4LkZBMFD9/ZCk7S+tbtr2xcBeXnjqcc2cNkiRBITqJUtAcsfnuPxZQ0xBq2f7CB9u48+qpjB+UIf1GIXo5E8WP/7WY4vLGlm2vzd/J1WeN5PRpA+QeLESCihd/Hj0ok7uvkfizEIlA0xTVjRG++48FBEImEI2DPf3OZu69aRYDs/2SCCZEH6Q0xa7yJn760GKs/deA+WtKSPIa/OzLJ5LuM+TaIIQQnUVTbNhdy++eWN4SMv1kdQnpKR5+dusc/IaSUGofYKL4ycNL2FPW0LLtQCztjGkDJFFfiD4iIdayWr9+PS6Xi5dffplJkya1+9ply5aRk5PTkqAPMHPmTJRSLF++vLN3VQghOpRSivLaQKsBkgM27qpm8YYydD0hLuVCiB5KaRqPv725VYL+Af/871oCplQbF4kpZDn87vHlbYr61jeF+cuzq7FlRUkhEpJuaLy9pKhVgv4Bz7+/jbrmcDfslRB9g6M0HnhpfasEfYjm5f/xqZWEZWBfiF5NNzRe+WRnqwT9A558ewuNQbMb9koIcbyUIm78edPuahav3yfxZyESQMSG3z+5oiVB/wDTcvj1o8sISV9diD4pbDn85rFlLQn6BzQFTf7w5AoicmkQQohOE7Yc7ntqRZtE/NqGEPe/uA5HyUBlb6cbGq/N39UqQf+AJ9/eQoPE0oToMxKikv5pp53GaaeddkSvLSsro6CgoNU2t9tNeno6paWlx7UfhnH8gcgDwczeHtSUz9m7JPrnPJJzt6d+Rk3XeGNh2wGSA16fv5NZY/Nwd8D1qav11GPemyXiMe+Ie288iXg8OkPAtJm/uiRmm+PA6q0VzJvUD8s6+mR9OcZdo6ce52M9fzvi8ygFO/fWE44zyWTT7hqCEZsUj37Mv6M36anfod6uJx/37jx/DydkOby1KH7/+P1lxVxz5khM0+q0fegpevJ3qLfq6cf8WM7do/lMTWGLVVvLY7ZZtsO24lomD8uSKnxHqKd/n3qznnjsj+fZt6s+T8hyeGdJUdz2T1bv5ZKTh2L28onePfH705slwvHuzNhVV9ANjTfbiT+/tmAXs8blJ2T8ubMlwvezJ+mJxyvRz99DNTSbMSfSATQGItQ2hCnI8Hb6fvTEv7NoXyL+zXrTudvZSiubaY6TALh7XwPBsEWa78hThhLx+9IXJNLfJZHP30Q6zt1Bjk9rSik27azGtGLHSVdvrSBoOiS7E+d4HTh/5W995McgZDkxJ8Qf8PGqvVw6L/FiaX39O9DXP784NgmRpH80AoEAbre7zXaPx0MoFIrxjiOjaYqMjKTj2bVWUlN9HfazOlKodAehkq0kTzwFzeU57p/XUz9nR5PP2XMd7bnb0z5jOGLRHIzEbQ+GLQyXTkZaz9rvo9HTjnlfkCjHvKPvvfEkyvHoLKGqpjZVVA4VDFvHfYz6+jHuKj3pOHfE+Xu8nye0s6bddge65BqTSHrSd6gv6WnHvSecv+2prA0QCsdPwG8KRkhJ6fzEg56kp32H+oKeeMyP99w9ks/UVN7Q7hLMwbBFWpr/mPehr+qJ36e+oqcc+4569u3sz1NZGyAUiX8Pbg6apKT0jGPaFXrK96ev6KnHu6tiV50pErFoaif+HOoF8efO1lO/nz1VTzleveH8PVR1c2277RHL7tLP21P+zuLIJcrfrLedu51tR1nsyTsHHGuMPFG+L31NT/+79Jbzt6cf5+4mx+egUKSy3XalEmecMtb5K3/rwx+Dil4eS+vr34G+/vnF0el1Sfper5dwuO3y9qFQCL//2AcKbduhvr75eHYNiM6iSU31UV8fOKaKtJ3JsUzqn/0VdkMldTvWkXzGrcf8s3ry5+xI8jk7R0d2RI/03O2pf0tNU8yb0p/lm2JXK5w1Lh/dcaipaeqS/YnYDjUNYXaXNZCV6qVfThKaUhS
"text/plain": [
"<Figure size 3076.25x3000 with 156 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
2023-03-25 11:59:49 +01:00
"source": [
"sns.pairplot(data=wine_dataset, hue=\"color\")"
]
},
{
"cell_type": "code",
2023-03-25 12:01:55 +01:00
"execution_count": 65,
2023-03-25 11:59:49 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed_acidity</th>\n",
" <th>volatile_acidity</th>\n",
" <th>citric_acid</th>\n",
" <th>residual_sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free_sulfur_dioxide</th>\n",
" <th>total_sulfur_dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" <th>color</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
2023-03-25 12:01:55 +01:00
" <td>0.448244</td>\n",
" <td>0.217069</td>\n",
" <td>0.180630</td>\n",
" <td>0.078990</td>\n",
" <td>0.088742</td>\n",
" <td>0.103024</td>\n",
" <td>0.257462</td>\n",
" <td>0.957255</td>\n",
" <td>0.803553</td>\n",
" <td>0.263877</td>\n",
" <td>0.703930</td>\n",
" <td>0.646154</td>\n",
2023-03-25 11:59:49 +01:00
" <td>0.753846</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
2023-03-25 12:01:55 +01:00
" <td>0.074301</td>\n",
" <td>0.107627</td>\n",
" <td>0.078046</td>\n",
" <td>0.070045</td>\n",
" <td>0.051400</td>\n",
" <td>0.054750</td>\n",
" <td>0.125165</td>\n",
" <td>0.002786</td>\n",
" <td>0.039808</td>\n",
" <td>0.072275</td>\n",
" <td>0.078704</td>\n",
" <td>0.095014</td>\n",
2023-03-25 11:59:49 +01:00
" <td>0.431433</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
2023-03-25 12:01:55 +01:00
" <td>0.314465</td>\n",
" <td>0.063291</td>\n",
2023-03-25 11:59:49 +01:00
" <td>0.000000</td>\n",
2023-03-25 12:01:55 +01:00
" <td>0.012158</td>\n",
" <td>0.031097</td>\n",
" <td>0.010381</td>\n",
" <td>0.020455</td>\n",
" <td>0.951116</td>\n",
" <td>0.713217</td>\n",
" <td>0.130000</td>\n",
" <td>0.570470</td>\n",
" <td>0.333333</td>\n",
2023-03-25 11:59:49 +01:00
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
2023-03-25 12:01:55 +01:00
" <td>0.402516</td>\n",
" <td>0.145570</td>\n",
" <td>0.144578</td>\n",
" <td>0.027356</td>\n",
" <td>0.060556</td>\n",
" <td>0.058824</td>\n",
" <td>0.168182</td>\n",
" <td>0.955168</td>\n",
" <td>0.775561</td>\n",
" <td>0.210000</td>\n",
" <td>0.637584</td>\n",
" <td>0.555556</td>\n",
2023-03-25 11:59:49 +01:00
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
2023-03-25 12:01:55 +01:00
" <td>0.433962</td>\n",
" <td>0.177215</td>\n",
" <td>0.180723</td>\n",
" <td>0.042553</td>\n",
" <td>0.078560</td>\n",
" <td>0.100346</td>\n",
" <td>0.261364</td>\n",
" <td>0.957478</td>\n",
" <td>0.800499</td>\n",
" <td>0.250000</td>\n",
" <td>0.691275</td>\n",
" <td>0.666667</td>\n",
2023-03-25 11:59:49 +01:00
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
2023-03-25 12:01:55 +01:00
" <td>0.471698</td>\n",
" <td>0.253165</td>\n",
" <td>0.222892</td>\n",
" <td>0.113982</td>\n",
" <td>0.101473</td>\n",
" <td>0.141869</td>\n",
" <td>0.343182</td>\n",
" <td>0.959354</td>\n",
" <td>0.827930</td>\n",
" <td>0.300000</td>\n",
" <td>0.758389</td>\n",
" <td>0.666667</td>\n",
2023-03-25 11:59:49 +01:00
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
2023-03-25 12:01:55 +01:00
" <td>0.817610</td>\n",
" <td>0.569620</td>\n",
" <td>0.445783</td>\n",
" <td>0.334347</td>\n",
" <td>0.679214</td>\n",
" <td>0.231834</td>\n",
" <td>0.575000</td>\n",
" <td>0.965264</td>\n",
" <td>0.917706</td>\n",
" <td>0.585000</td>\n",
" <td>0.939597</td>\n",
" <td>1.000000</td>\n",
2023-03-25 11:59:49 +01:00
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed_acidity volatile_acidity citric_acid residual_sugar \\\n",
"count 325.000000 325.000000 325.000000 325.000000 \n",
2023-03-25 12:01:55 +01:00
"mean 0.448244 0.217069 0.180630 0.078990 \n",
"std 0.074301 0.107627 0.078046 0.070045 \n",
"min 0.314465 0.063291 0.000000 0.012158 \n",
"25% 0.402516 0.145570 0.144578 0.027356 \n",
"50% 0.433962 0.177215 0.180723 0.042553 \n",
"75% 0.471698 0.253165 0.222892 0.113982 \n",
"max 0.817610 0.569620 0.445783 0.334347 \n",
2023-03-25 11:59:49 +01:00
"\n",
" chlorides free_sulfur_dioxide total_sulfur_dioxide density \\\n",
"count 325.000000 325.000000 325.000000 325.000000 \n",
2023-03-25 12:01:55 +01:00
"mean 0.088742 0.103024 0.257462 0.957255 \n",
"std 0.051400 0.054750 0.125165 0.002786 \n",
"min 0.031097 0.010381 0.020455 0.951116 \n",
"25% 0.060556 0.058824 0.168182 0.955168 \n",
"50% 0.078560 0.100346 0.261364 0.957478 \n",
"75% 0.101473 0.141869 0.343182 0.959354 \n",
"max 0.679214 0.231834 0.575000 0.965264 \n",
2023-03-25 11:59:49 +01:00
"\n",
" pH sulphates alcohol quality color \n",
"count 325.000000 325.000000 325.000000 325.000000 325.000000 \n",
2023-03-25 12:01:55 +01:00
"mean 0.803553 0.263877 0.703930 0.646154 0.753846 \n",
"std 0.039808 0.072275 0.078704 0.095014 0.431433 \n",
"min 0.713217 0.130000 0.570470 0.333333 0.000000 \n",
"25% 0.775561 0.210000 0.637584 0.555556 1.000000 \n",
"50% 0.800499 0.250000 0.691275 0.666667 1.000000 \n",
"75% 0.827930 0.300000 0.758389 0.666667 1.000000 \n",
"max 0.917706 0.585000 0.939597 1.000000 1.000000 "
2023-03-25 11:59:49 +01:00
]
},
2023-03-25 12:01:55 +01:00
"execution_count": 65,
2023-03-25 11:59:49 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_test.describe()"
]
},
{
"cell_type": "code",
2023-03-25 12:01:55 +01:00
"execution_count": 66,
2023-03-25 11:59:49 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed_acidity</th>\n",
" <th>volatile_acidity</th>\n",
" <th>citric_acid</th>\n",
" <th>residual_sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free_sulfur_dioxide</th>\n",
" <th>total_sulfur_dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" <th>color</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>5847.000000</td>\n",
" <td>5847.000000</td>\n",
" <td>5847.000000</td>\n",
" <td>5847.000000</td>\n",
" <td>5847.000000</td>\n",
" <td>5847.000000</td>\n",
" <td>5847.000000</td>\n",
" <td>5847.000000</td>\n",
" <td>5847.000000</td>\n",
" <td>5847.000000</td>\n",
" <td>5847.000000</td>\n",
" <td>5847.000000</td>\n",
" <td>5847.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
2023-03-25 12:01:55 +01:00
" <td>0.453848</td>\n",
" <td>0.215061</td>\n",
" <td>0.192235</td>\n",
" <td>0.082331</td>\n",
" <td>0.092161</td>\n",
" <td>0.105659</td>\n",
" <td>0.262894</td>\n",
" <td>0.957364</td>\n",
" <td>0.802569</td>\n",
" <td>0.265798</td>\n",
" <td>0.704326</td>\n",
" <td>0.646732</td>\n",
2023-03-25 11:59:49 +01:00
" <td>0.753891</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
2023-03-25 12:01:55 +01:00
" <td>0.081742</td>\n",
" <td>0.104315</td>\n",
" <td>0.088036</td>\n",
" <td>0.071982</td>\n",
" <td>0.058619</td>\n",
" <td>0.061749</td>\n",
" <td>0.128256</td>\n",
" <td>0.002882</td>\n",
" <td>0.039880</td>\n",
" <td>0.074864</td>\n",
" <td>0.079852</td>\n",
" <td>0.096928</td>\n",
2023-03-25 11:59:49 +01:00
" <td>0.430780</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
2023-03-25 12:01:55 +01:00
" <td>0.238994</td>\n",
" <td>0.050633</td>\n",
2023-03-25 11:59:49 +01:00
" <td>0.000000</td>\n",
2023-03-25 12:01:55 +01:00
" <td>0.009119</td>\n",
" <td>0.014730</td>\n",
" <td>0.003460</td>\n",
" <td>0.013636</td>\n",
" <td>0.950076</td>\n",
" <td>0.678304</td>\n",
" <td>0.110000</td>\n",
" <td>0.536913</td>\n",
" <td>0.333333</td>\n",
2023-03-25 11:59:49 +01:00
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
2023-03-25 12:01:55 +01:00
" <td>0.402516</td>\n",
" <td>0.145570</td>\n",
" <td>0.150602</td>\n",
" <td>0.027356</td>\n",
" <td>0.062193</td>\n",
" <td>0.058824</td>\n",
" <td>0.176136</td>\n",
" <td>0.955071</td>\n",
" <td>0.775561</td>\n",
" <td>0.215000</td>\n",
" <td>0.637584</td>\n",
" <td>0.555556</td>\n",
2023-03-25 11:59:49 +01:00
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
2023-03-25 12:01:55 +01:00
" <td>0.440252</td>\n",
" <td>0.183544</td>\n",
" <td>0.186747</td>\n",
" <td>0.045593</td>\n",
" <td>0.076923</td>\n",
" <td>0.100346</td>\n",
" <td>0.268182</td>\n",
" <td>0.957516</td>\n",
" <td>0.800499</td>\n",
" <td>0.255000</td>\n",
" <td>0.691275</td>\n",
" <td>0.666667</td>\n",
2023-03-25 11:59:49 +01:00
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
2023-03-25 12:01:55 +01:00
" <td>0.484277</td>\n",
" <td>0.253165</td>\n",
" <td>0.234940</td>\n",
" <td>0.123100</td>\n",
" <td>0.106383</td>\n",
" <td>0.141869</td>\n",
" <td>0.353409</td>\n",
" <td>0.959581</td>\n",
" <td>0.827930</td>\n",
" <td>0.300000</td>\n",
" <td>0.758389</td>\n",
" <td>0.666667</td>\n",
2023-03-25 11:59:49 +01:00
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
2023-03-25 12:01:55 +01:00
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
2023-03-25 11:59:49 +01:00
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed_acidity volatile_acidity citric_acid residual_sugar \\\n",
"count 5847.000000 5847.000000 5847.000000 5847.000000 \n",
2023-03-25 12:01:55 +01:00
"mean 0.453848 0.215061 0.192235 0.082331 \n",
"std 0.081742 0.104315 0.088036 0.071982 \n",
"min 0.238994 0.050633 0.000000 0.009119 \n",
"25% 0.402516 0.145570 0.150602 0.027356 \n",
"50% 0.440252 0.183544 0.186747 0.045593 \n",
"75% 0.484277 0.253165 0.234940 0.123100 \n",
"max 1.000000 1.000000 1.000000 1.000000 \n",
2023-03-25 11:59:49 +01:00
"\n",
" chlorides free_sulfur_dioxide total_sulfur_dioxide density \\\n",
"count 5847.000000 5847.000000 5847.000000 5847.000000 \n",
2023-03-25 12:01:55 +01:00
"mean 0.092161 0.105659 0.262894 0.957364 \n",
"std 0.058619 0.061749 0.128256 0.002882 \n",
"min 0.014730 0.003460 0.013636 0.950076 \n",
"25% 0.062193 0.058824 0.176136 0.955071 \n",
"50% 0.076923 0.100346 0.268182 0.957516 \n",
"75% 0.106383 0.141869 0.353409 0.959581 \n",
"max 1.000000 1.000000 1.000000 1.000000 \n",
2023-03-25 11:59:49 +01:00
"\n",
" pH sulphates alcohol quality color \n",
"count 5847.000000 5847.000000 5847.000000 5847.000000 5847.000000 \n",
2023-03-25 12:01:55 +01:00
"mean 0.802569 0.265798 0.704326 0.646732 0.753891 \n",
"std 0.039880 0.074864 0.079852 0.096928 0.430780 \n",
"min 0.678304 0.110000 0.536913 0.333333 0.000000 \n",
"25% 0.775561 0.215000 0.637584 0.555556 1.000000 \n",
"50% 0.800499 0.255000 0.691275 0.666667 1.000000 \n",
"75% 0.827930 0.300000 0.758389 0.666667 1.000000 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 "
2023-03-25 11:59:49 +01:00
]
},
2023-03-25 12:01:55 +01:00
"execution_count": 66,
2023-03-25 11:59:49 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_train.describe()"
]
},
{
"cell_type": "code",
2023-03-25 12:01:55 +01:00
"execution_count": 67,
2023-03-25 11:59:49 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fixed_acidity</th>\n",
" <th>volatile_acidity</th>\n",
" <th>citric_acid</th>\n",
" <th>residual_sugar</th>\n",
" <th>chlorides</th>\n",
" <th>free_sulfur_dioxide</th>\n",
" <th>total_sulfur_dioxide</th>\n",
" <th>density</th>\n",
" <th>pH</th>\n",
" <th>sulphates</th>\n",
" <th>alcohol</th>\n",
" <th>quality</th>\n",
" <th>color</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" <td>325.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
2023-03-25 12:01:55 +01:00
" <td>0.458355</td>\n",
" <td>0.211412</td>\n",
" <td>0.198091</td>\n",
" <td>0.093521</td>\n",
" <td>0.086537</td>\n",
" <td>0.107596</td>\n",
" <td>0.271556</td>\n",
" <td>0.957757</td>\n",
" <td>0.802570</td>\n",
" <td>0.264446</td>\n",
" <td>0.701160</td>\n",
" <td>0.642393</td>\n",
2023-03-25 11:59:49 +01:00
" <td>0.753846</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
2023-03-25 12:01:55 +01:00
" <td>0.084621</td>\n",
" <td>0.098749</td>\n",
" <td>0.086862</td>\n",
" <td>0.079346</td>\n",
" <td>0.035141</td>\n",
" <td>0.061805</td>\n",
" <td>0.135185</td>\n",
" <td>0.003031</td>\n",
" <td>0.044183</td>\n",
" <td>0.068086</td>\n",
" <td>0.084939</td>\n",
" <td>0.100957</td>\n",
2023-03-25 11:59:49 +01:00
" <td>0.431433</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
2023-03-25 12:01:55 +01:00
" <td>0.295597</td>\n",
" <td>0.056962</td>\n",
2023-03-25 11:59:49 +01:00
" <td>0.000000</td>\n",
2023-03-25 12:01:55 +01:00
" <td>0.012158</td>\n",
" <td>0.019640</td>\n",
" <td>0.010381</td>\n",
" <td>0.018182</td>\n",
" <td>0.950413</td>\n",
" <td>0.715711</td>\n",
" <td>0.140000</td>\n",
" <td>0.563758</td>\n",
" <td>0.333333</td>\n",
2023-03-25 11:59:49 +01:00
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
2023-03-25 12:01:55 +01:00
" <td>0.402516</td>\n",
" <td>0.145570</td>\n",
" <td>0.156627</td>\n",
" <td>0.030395</td>\n",
" <td>0.063830</td>\n",
" <td>0.055363</td>\n",
" <td>0.179545</td>\n",
" <td>0.955456</td>\n",
" <td>0.773067</td>\n",
" <td>0.215000</td>\n",
" <td>0.630872</td>\n",
" <td>0.555556</td>\n",
2023-03-25 11:59:49 +01:00
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
2023-03-25 12:01:55 +01:00
" <td>0.446541</td>\n",
" <td>0.183544</td>\n",
" <td>0.186747</td>\n",
" <td>0.069149</td>\n",
" <td>0.078560</td>\n",
" <td>0.100346</td>\n",
" <td>0.284091</td>\n",
" <td>0.957978</td>\n",
" <td>0.800499</td>\n",
" <td>0.250000</td>\n",
" <td>0.684564</td>\n",
" <td>0.666667</td>\n",
2023-03-25 11:59:49 +01:00
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
2023-03-25 12:01:55 +01:00
" <td>0.490566</td>\n",
" <td>0.253165</td>\n",
" <td>0.240964</td>\n",
" <td>0.133739</td>\n",
" <td>0.098200</td>\n",
" <td>0.155709</td>\n",
" <td>0.370455</td>\n",
" <td>0.960028</td>\n",
" <td>0.827930</td>\n",
" <td>0.305000</td>\n",
" <td>0.758389</td>\n",
" <td>0.666667</td>\n",
2023-03-25 11:59:49 +01:00
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
2023-03-25 12:01:55 +01:00
" <td>0.943396</td>\n",
" <td>0.746835</td>\n",
" <td>0.445783</td>\n",
" <td>0.480243</td>\n",
" <td>0.278232</td>\n",
" <td>0.266436</td>\n",
" <td>0.570455</td>\n",
" <td>0.972396</td>\n",
" <td>1.000000</td>\n",
" <td>0.570000</td>\n",
" <td>0.939597</td>\n",
" <td>0.888889</td>\n",
2023-03-25 11:59:49 +01:00
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" fixed_acidity volatile_acidity citric_acid residual_sugar \\\n",
"count 325.000000 325.000000 325.000000 325.000000 \n",
2023-03-25 12:01:55 +01:00
"mean 0.458355 0.211412 0.198091 0.093521 \n",
"std 0.084621 0.098749 0.086862 0.079346 \n",
"min 0.295597 0.056962 0.000000 0.012158 \n",
"25% 0.402516 0.145570 0.156627 0.030395 \n",
"50% 0.446541 0.183544 0.186747 0.069149 \n",
"75% 0.490566 0.253165 0.240964 0.133739 \n",
"max 0.943396 0.746835 0.445783 0.480243 \n",
2023-03-25 11:59:49 +01:00
"\n",
" chlorides free_sulfur_dioxide total_sulfur_dioxide density \\\n",
"count 325.000000 325.000000 325.000000 325.000000 \n",
2023-03-25 12:01:55 +01:00
"mean 0.086537 0.107596 0.271556 0.957757 \n",
"std 0.035141 0.061805 0.135185 0.003031 \n",
"min 0.019640 0.010381 0.018182 0.950413 \n",
"25% 0.063830 0.055363 0.179545 0.955456 \n",
"50% 0.078560 0.100346 0.284091 0.957978 \n",
"75% 0.098200 0.155709 0.370455 0.960028 \n",
"max 0.278232 0.266436 0.570455 0.972396 \n",
2023-03-25 11:59:49 +01:00
"\n",
" pH sulphates alcohol quality color \n",
"count 325.000000 325.000000 325.000000 325.000000 325.000000 \n",
2023-03-25 12:01:55 +01:00
"mean 0.802570 0.264446 0.701160 0.642393 0.753846 \n",
"std 0.044183 0.068086 0.084939 0.100957 0.431433 \n",
"min 0.715711 0.140000 0.563758 0.333333 0.000000 \n",
"25% 0.773067 0.215000 0.630872 0.555556 1.000000 \n",
"50% 0.800499 0.250000 0.684564 0.666667 1.000000 \n",
"75% 0.827930 0.305000 0.758389 0.666667 1.000000 \n",
"max 1.000000 0.570000 0.939597 0.888889 1.000000 "
2023-03-25 11:59:49 +01:00
]
},
2023-03-25 12:01:55 +01:00
"execution_count": 67,
2023-03-25 11:59:49 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_val.describe()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}