{
"cells": [
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import sklearn.model_selection"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"\n",
"url = \"https://huggingface.co/datasets/mstz/wine/raw/main/Wine_Quality_Data.csv\"\n",
"save_path = \"Wine_Quality_Data.csv\"\n",
"\n",
"response = requests.get(url)\n",
"response.raise_for_status()\n",
"\n",
"with open(save_path, \"wb\") as f:\n",
" f.write(response.content)\n"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [],
"source": [
"wine_dataset = pd.read_csv(\"Wine_Quality_Data.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" fixed_acidity | \n",
" volatile_acidity | \n",
" citric_acid | \n",
" residual_sugar | \n",
" chlorides | \n",
" free_sulfur_dioxide | \n",
" total_sulfur_dioxide | \n",
" density | \n",
" pH | \n",
" sulphates | \n",
" alcohol | \n",
" quality | \n",
" color | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 7.4 | \n",
" 0.70 | \n",
" 0.00 | \n",
" 1.9 | \n",
" 0.076 | \n",
" 11.0 | \n",
" 34.0 | \n",
" 0.9978 | \n",
" 3.51 | \n",
" 0.56 | \n",
" 9.4 | \n",
" 5 | \n",
" red | \n",
"
\n",
" \n",
" 1 | \n",
" 7.8 | \n",
" 0.88 | \n",
" 0.00 | \n",
" 2.6 | \n",
" 0.098 | \n",
" 25.0 | \n",
" 67.0 | \n",
" 0.9968 | \n",
" 3.20 | \n",
" 0.68 | \n",
" 9.8 | \n",
" 5 | \n",
" red | \n",
"
\n",
" \n",
" 2 | \n",
" 7.8 | \n",
" 0.76 | \n",
" 0.04 | \n",
" 2.3 | \n",
" 0.092 | \n",
" 15.0 | \n",
" 54.0 | \n",
" 0.9970 | \n",
" 3.26 | \n",
" 0.65 | \n",
" 9.8 | \n",
" 5 | \n",
" red | \n",
"
\n",
" \n",
" 3 | \n",
" 11.2 | \n",
" 0.28 | \n",
" 0.56 | \n",
" 1.9 | \n",
" 0.075 | \n",
" 17.0 | \n",
" 60.0 | \n",
" 0.9980 | \n",
" 3.16 | \n",
" 0.58 | \n",
" 9.8 | \n",
" 6 | \n",
" red | \n",
"
\n",
" \n",
" 4 | \n",
" 7.4 | \n",
" 0.70 | \n",
" 0.00 | \n",
" 1.9 | \n",
" 0.076 | \n",
" 11.0 | \n",
" 34.0 | \n",
" 0.9978 | \n",
" 3.51 | \n",
" 0.56 | \n",
" 9.4 | \n",
" 5 | \n",
" red | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" fixed_acidity volatile_acidity citric_acid residual_sugar chlorides \\\n",
"0 7.4 0.70 0.00 1.9 0.076 \n",
"1 7.8 0.88 0.00 2.6 0.098 \n",
"2 7.8 0.76 0.04 2.3 0.092 \n",
"3 11.2 0.28 0.56 1.9 0.075 \n",
"4 7.4 0.70 0.00 1.9 0.076 \n",
"\n",
" free_sulfur_dioxide total_sulfur_dioxide density pH sulphates \\\n",
"0 11.0 34.0 0.9978 3.51 0.56 \n",
"1 25.0 67.0 0.9968 3.20 0.68 \n",
"2 15.0 54.0 0.9970 3.26 0.65 \n",
"3 17.0 60.0 0.9980 3.16 0.58 \n",
"4 11.0 34.0 0.9978 3.51 0.56 \n",
"\n",
" alcohol quality color \n",
"0 9.4 5 red \n",
"1 9.8 5 red \n",
"2 9.8 5 red \n",
"3 9.8 6 red \n",
"4 9.4 5 red "
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_dataset.head()# podgląd danych"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"wine_dataset['color'] = wine_dataset['color'].replace({'red': 1, 'white': 0})"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" fixed_acidity | \n",
" volatile_acidity | \n",
" citric_acid | \n",
" residual_sugar | \n",
" chlorides | \n",
" free_sulfur_dioxide | \n",
" total_sulfur_dioxide | \n",
" density | \n",
" pH | \n",
" sulphates | \n",
" alcohol | \n",
" quality | \n",
" color | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 7.215307 | \n",
" 0.339666 | \n",
" 0.318633 | \n",
" 5.443235 | \n",
" 0.056034 | \n",
" 30.525319 | \n",
" 115.744574 | \n",
" 0.994697 | \n",
" 3.218501 | \n",
" 0.531268 | \n",
" 10.491801 | \n",
" 5.818378 | \n",
" 0.246114 | \n",
"
\n",
" \n",
" std | \n",
" 1.296434 | \n",
" 0.164636 | \n",
" 0.145318 | \n",
" 4.757804 | \n",
" 0.035034 | \n",
" 17.749400 | \n",
" 56.521855 | \n",
" 0.002999 | \n",
" 0.160787 | \n",
" 0.148806 | \n",
" 1.192712 | \n",
" 0.873255 | \n",
" 0.430779 | \n",
"
\n",
" \n",
" min | \n",
" 3.800000 | \n",
" 0.080000 | \n",
" 0.000000 | \n",
" 0.600000 | \n",
" 0.009000 | \n",
" 1.000000 | \n",
" 6.000000 | \n",
" 0.987110 | \n",
" 2.720000 | \n",
" 0.220000 | \n",
" 8.000000 | \n",
" 3.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 6.400000 | \n",
" 0.230000 | \n",
" 0.250000 | \n",
" 1.800000 | \n",
" 0.038000 | \n",
" 17.000000 | \n",
" 77.000000 | \n",
" 0.992340 | \n",
" 3.110000 | \n",
" 0.430000 | \n",
" 9.500000 | \n",
" 5.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 7.000000 | \n",
" 0.290000 | \n",
" 0.310000 | \n",
" 3.000000 | \n",
" 0.047000 | \n",
" 29.000000 | \n",
" 118.000000 | \n",
" 0.994890 | \n",
" 3.210000 | \n",
" 0.510000 | \n",
" 10.300000 | \n",
" 6.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 7.700000 | \n",
" 0.400000 | \n",
" 0.390000 | \n",
" 8.100000 | \n",
" 0.065000 | \n",
" 41.000000 | \n",
" 156.000000 | \n",
" 0.996990 | \n",
" 3.320000 | \n",
" 0.600000 | \n",
" 11.300000 | \n",
" 6.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" max | \n",
" 15.900000 | \n",
" 1.580000 | \n",
" 1.660000 | \n",
" 65.800000 | \n",
" 0.611000 | \n",
" 289.000000 | \n",
" 440.000000 | \n",
" 1.038980 | \n",
" 4.010000 | \n",
" 2.000000 | \n",
" 14.900000 | \n",
" 9.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" fixed_acidity volatile_acidity citric_acid residual_sugar \\\n",
"count 6497.000000 6497.000000 6497.000000 6497.000000 \n",
"mean 7.215307 0.339666 0.318633 5.443235 \n",
"std 1.296434 0.164636 0.145318 4.757804 \n",
"min 3.800000 0.080000 0.000000 0.600000 \n",
"25% 6.400000 0.230000 0.250000 1.800000 \n",
"50% 7.000000 0.290000 0.310000 3.000000 \n",
"75% 7.700000 0.400000 0.390000 8.100000 \n",
"max 15.900000 1.580000 1.660000 65.800000 \n",
"\n",
" chlorides free_sulfur_dioxide total_sulfur_dioxide density \\\n",
"count 6497.000000 6497.000000 6497.000000 6497.000000 \n",
"mean 0.056034 30.525319 115.744574 0.994697 \n",
"std 0.035034 17.749400 56.521855 0.002999 \n",
"min 0.009000 1.000000 6.000000 0.987110 \n",
"25% 0.038000 17.000000 77.000000 0.992340 \n",
"50% 0.047000 29.000000 118.000000 0.994890 \n",
"75% 0.065000 41.000000 156.000000 0.996990 \n",
"max 0.611000 289.000000 440.000000 1.038980 \n",
"\n",
" pH sulphates alcohol quality color \n",
"count 6497.000000 6497.000000 6497.000000 6497.000000 6497.000000 \n",
"mean 3.218501 0.531268 10.491801 5.818378 0.246114 \n",
"std 0.160787 0.148806 1.192712 0.873255 0.430779 \n",
"min 2.720000 0.220000 8.000000 3.000000 0.000000 \n",
"25% 3.110000 0.430000 9.500000 5.000000 0.000000 \n",
"50% 3.210000 0.510000 10.300000 6.000000 0.000000 \n",
"75% 3.320000 0.600000 11.300000 6.000000 0.000000 \n",
"max 4.010000 2.000000 14.900000 9.000000 1.000000 "
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_dataset.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjMAAAGbCAYAAAAx9RHcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAeNUlEQVR4nO3df0xV9/3H8de9XH4J3g5RuLSG1uHEkW1F6zWQDMdMvqzZ3BZGl2URs7pqWWdGh2Omi/SXzq5LUTubudaotaYSZwbp1nRZTU2WtYtSYO2aFVmrVWM7gVIptyJwy733+4e5d15tC5cVL2/u85EY5ZzP+eRjOYc+PfdwcYRCoZAAAACMcsZ7AQAAAP8LYgYAAJhGzAAAANOIGQAAYBoxAwAATCNmAACAacQMAAAwjZgBAACmETMAAMA0V7wXcC2EQiEFg7zRcaJwOh18voFpius7cTidDjkcjnGNTYiYCQZDOn9+MN7LwDXgcjmVlZUhn++iRkeD8V4OgE8R13dimTUrQ0lJ44sZXmYCAACmETMAAMA0YgYAAJgWc8z09PSosLDwql8tLS2SpOPHj6u6ulrFxcVavny59u/fH3V8MBjUjh07VFZWpuLiYq1du1Znz56NGjPWHAAAAGExPwDc1dWl1NRUvfDCC1FPGc+cOVP9/f1avXq1li9frgcffFCvvvqqHnzwQWVkZKiqqkqStHPnTjU1Nenhhx+Wx+PRI488ojVr1ujZZ59VSkrKuOYAAAAIizlm3njjDd10003Kycm5at9TTz2l5ORkbdq0SS6XSwUFBTpz5ox27dqlqqoq+f1+7d27V/X19SovL5ckbd++XWVlZTp8+LBWrFihQ4cOfeIcAAAAl4v5ZaZ///vfKigo+Mh97e3tWrp0qVyu/zZSSUmJTp8+rb6+PnV1dWlwcFClpaWR/W63W0VFRWpraxvXHAAAAJeb0J2ZrKwsrVy5UqdOndKNN96ou+66S8uWLVN3d7cWLFgQNT58B+fcuXPq7u6WJOXl5V01JrxvrDlmz54d65IlXXp/Akx/SUnOqN8BTB9c3/g4McXM6Oio3nrrLc2fP1/33HOPMjMz9dxzz+nOO+/Uk08+qeHhYaWkpEQdk5qaKkkaGRnR0NCQJH3kmIGBAUkac46JcDodysrKmNCxsMntTo/3EgBMEq5vXCmmmHG5XGptbVVSUpLS0tIkSV/4whf05ptvas+ePUpLS5Pf7486JhwgM2bMiBzj9/sjfw6PSU+/dHKONcdEBIMh+XwXJ3QsbElKcsrtTpfPN6RAgHcIBaYTru/E4nanj/suXMwvM2VkXH2H43Of+5xeeukleTwe9fb2Ru0Lf5ybm6vR0dHItvz8/KgxhYWFkjTmHBPFW18nlkAgyOccmKa4vnGlmF54fPPNN7V48WK1trZGbf/Xv/6l+fPny+v1qqOjQ4FAILLv2LFjmjdvnrKzs7Vw4UJlZmZGHe/z+dTZ2Smv1ytJY84BAABwuZhipqCgQJ/97Ge1adMmtbe36+TJk/rVr36lV199VXfddZeqqqp04cIFbdy4USdOnFBLS4v27dunmpoaSZeelamurlZjY6OOHDmirq4u1dXVyePxqKKiQpLGnAMAAOByjlAoFNPPUu/r69PWrVv14osvyufzqaioSPX19VqyZIkk6bXXXtOWLVvU2dmpOXPm6Ic//KGqq6sjxwcCAW3btk0tLS0aHh6W1+vVfffdp7lz50bGjDVHrAKBID81O0GEf6puf/8gt6GBaYbrO7Fc+qnZ47vnEnPMWJSoMeN0OuR0ju/Hp08XifyAYDAYUjA47S9nJDBiJrHEEjMxPwAMG5xOhz7zmRkJ+34Mifitm4FAUO+/f5GgAZBwiJlpyul0KCnJqcYDHXq754N4LweTbG7uTNWvvEVOp4OYAZBwiJlp7u2eD3TynYF4LwMAgEmTmK9BAACAaYOYAQAAphEzAADANGIGAACYRswAAADTiBkAAGAaMQMAAEwjZgAAgGnEDAAAMI2YAQAAphEzAADANGIGAACYRswAAADTiBkAAGAaMQMAAEwjZgAAgGnEDAAAMI2YAQAAphEzAADANGIGAACYRswAAADTiBkAAGAaMQMAAEwjZgAAgGnEDAAAMI2YAQAAphEzAADANGIGAACYRswAAADTiBkAAGAaMQMAAEwjZgAAgGnEDAAAMI2YAQAAphEzAADANGIGAACYRswAAADTiBkAAGAaMQMAAEwjZgAAgGnEDAAAMI2YAQAAphEzAADANGIGAACYRswAAADTiBkAAGAaMQMAAEwjZgAAgGnEDAAAMI2YAQAAphEzAADANGIGAACYRswAAADTiBkAAGAaMQMAAEwjZgAAgGnEDAAAMG3CMXPq1CktWrRILS0tkW3Hjx9XdXW1iouLtXz5cu3fvz/qmGAwqB07dqisrEzFxcVau3atzp49GzVmrDkAAAAuN6GY+fDDD1VfX6+LFy9GtvX392v16tXKz89Xc3Oz1q1bp8bGRjU3N0fG7Ny5U01NTdq8ebMOHjyoYDCoNWvWyO/3j3sOAACAy7kmctBjjz2mzMzMqG2HDh1ScnKyNm3aJJfLpYKCAp05c0a7du1SVVWV/H6/9u7dq/r6epWXl0uStm/frrKyMh0+fFgrVqwYcw4AAIArxXxnpq2tTb///e/18MMPR21vb2/X0qVL5XL9t49KSkp0+vRp9fX1qaurS4ODgyotLY3sd7vdKioqUltb27jmAAAAuFJMd2Z8Pp82bNighoYG5eXlRe3r7u7WggULorbl5ORIks6dO6fu7m5Juuq4nJycyL6x5pg9e3Ysy43iciXWs85JSYn198UlfN4xnYXPb85zXCmmmHnggQe0aNEiffOb37xq3/DwsFJSUqK2paamSpJGRkY0NDQkSR85ZmBgYFxzTJTT6VBWVsaEjwescLvT470EYNJxnuNK446ZZ555Ru3t7Xr22Wc/cn9aWlrkQd6wcIDMmDFDaWlpkiS/3x/5c3hMenr6uOaYqGAwJJ/v4tgDp5GkJCcXfALy+YYUCATjvQxgUoS/rnGeJwa3O33cd+HGHTPNzc167733Ig/vht1///3685//LI/Ho97e3qh94Y9zc3M1Ojoa2Zafnx81prCwUJLGnON/MTrKiY/pLxAIcq5j2uM8x5XGHTONjY0aHh6O2lZRUaHa2lp961vf0h//+EcdPHhQgUBASUlJkqRjx45p3rx5ys7O1syZM5WZmanW1tZIzPh8PnV2dqq6ulqS5PV6P3EOAACAK437Karc3FzdeOONUb8kKTs7W7m5uaqqqtKFCxe0ceNGnThxQi0tLdq3b59qamokXXpWprq6Wo2NjTpy5Ii6urpUV1cnj8ejiooKSRpzDgAAgCtN6H1mPkp2drZ2796tLVu2qLKyUnPmzNGGDRtUWVkZGVNbW6vR0VE1NDRoeHhYXq9Xe/bsUXJy8rjnAAAAuJwjFAqF4r2IyRYIBHX+/GC8l3FNuVxOZWVl6Kfb/qqT7wzEezmYZAU3XKdH15erv3+QZwkwbYW/rnGeJ4ZZszLG/QAw36wPAABMI2YAAIBpxAwAADCNmAEAAKYRMwAAwDRiBgAAmEbMAAAA04gZAABgGjEDAABMI2YAAIBpxAwAADCNmAEAAKYRMwAAwDRiBgAAmEbMAAAA04gZAABgGjEDAABMI2YAAIBpxAwAADCNmAEAAKYRMwAAwDRiBgAAmEbMAAAA04gZAABgGjEDAABMI2YAAIBpxAwAADCNmAEAAKYRMwAAwDRiBgAAmEbMAAAA04gZAABgGjEDAABMI2YAAIBpxAwAADCNmAEAAKYRMwAAwDRiBgAAmEbMAAAA04gZAABgGjEDAABMI2YAAIBpxAwAADCNmAEAAKYRMwAAwDRiBgAAmEbMAAAA04gZAABgGjEDAABMI2YAAIBpxAwAADCNmAEAAKYRMwAAwDRiBgAAmEbMAAAA04gZAABgGjEDAABMI2YAAIBpxAwAADCNmAEAAKbFHDPvvfeefv7zn6ukpESLFi3SnXfeqZMnT0b2Hz9+XNXV1SouLtby5cu1f//+qOODwaB27NihsrIyFRcXa+3atTp79mzUmLHmAAAACIs5ZtatW6czZ85o165d+sMf/qC0tDTdfvvtGhoaUn9/v1avXq38/Hw1Nzdr3bp1amxsVHNzc+T4nTt3qqmpSZs3b9bBgwcVDAa1Zs0a+f1+SRrXHAAAAGGuWAYPDAzohhtuUE1NjRYsWCBJ+vGPf6xvf/vbevPNN3X06FElJydr06ZNcrlcKigoiIRPVVWV/H6/9u7dq/r6epWXl0uStm/frrKyMh0+fFgrVqzQoUOHPnEOAACAy8V0Z+a6667T1q1bIyFz/vx57du3Tx6PR/Pnz1d7e7uWLl0ql+u/jVRSUqLTp0+rr69PXV1dGhwcVGlpaWS/2+1WUVGR2traJGnMOQAAAC4X052Zy9177706dOiQUlJS9Lvf/U4zZsxQd3d3JHTCcnJyJEnnzp1Td3e3JCkvL++qMeF9Y80xe/bsCa3X5UqsZ52TkhLr74tL+LxjOguf35znuNKEY+YHP/iBvve97+nAgQNat26dmpqaNDw8rJSUlKhxqampkqSRkRENDQ1J0keOGRgYkKQx55gIp9OhrKyMCR0LWOJ2p8d7CcCk4zzHlSYcM/Pnz5ckbdmyRf/85z/19NNPKy0tLfIgb1g4QGbMmKG0tDRJkt/vj/w5PCY9/dLJOdYcExEMhuTzXZzQsVYlJTm54BOQzzekQCAY72UAkyL8dY3zPDG43enjvgsXU8ycP39eR48e1de+9rXIMy1Op1Pz589Xb2+vPB6Pent7o44Jf5ybm6vR0dHItvz8/KgxhYWFkjTmHBM1OsqJj+kvEAhyrmPa4zzHlWJ64bGvr0/r16/X0aNHI9s+/PBDdXZ2qqCgQF6vVx0dHQoEApH9x44d07x585Sdna2FCxcqMzNTra2tkf0+n0+dnZ3yer2SNOYcAAAAl4spZhYsWKBly5bpl7/8pdra2vTGG2/onnvukc/n0+23366qqipduHBBGzdu1IkTJ9TS0qJ9+/appqZG0qVnZaqrq9XY2KgjR46oq6tLdXV18ng8qqiokKQx5wAAALhczM/MbNu2TVu3blVdXZ0++OADLVmyRAcOHND1118vSdq9e7e2bNmiyspKzZkzRxs2bFBlZWXk+NraWo2OjqqhoUHDw8Pyer3as2ePkpOTJUnZ2dljzgEAABDmCIVCoXgvYrIFAkGdPz8Y72VcUy6XU1lZGfrptr/q5DsD8V4OJlnBDdfp0fXl6u8f5FkCTFvhr2uc54lh1qyMcT8AzDfrAwAA04gZAABgGjEDAABMI2YAAIBpxAwAADCNmAEAAKYRMwAAwDRiBgAAmEbMAAAA04gZAABgGjEDAABMI2YAAIBpxAwAADCNmAEAAKYRMwAAwDRiBgAAmEbMAAAA04gZAABgGjEDAABMI2YAAIBpxAwAADCNmAEAAKYRMwAAwDRiBgAAmEbMAAAA04gZAABgGjEDAABMI2YAAIBpxAwAADCNmAEAAKYRMwAAwDRiBgAAmEbMAAAA04gZAABgGjEDAABMI2YAAIBpxAwAADCNmAEAAKYRMwAAwDRiBgAAmEbMAAAA04gZAABgGjEDAABMI2YAAIBpxAwAADCNmAEAAKYRMwAAwDRiBgAAmEbMAAAA04gZAABgGjEDAABMI2YAAIBpxAwAADCNmAEAAKYRMwAAwDRiBgAAmEbMAAAA04gZAABgGjEDAABMizlm3n//fd13331atmyZFi9erO9///tqb2+P7D969Ki+853v6Oabb9att96q5557Lur4kZERPfjggyotLdWiRYv0s5/9TOfPn48aM9YcAAAAYTHHzPr16/XKK69o27Ztam5u1uc//3ndcccdeuutt3Ty5EnV1NSorKxMLS0t+u53v6sNGzbo6NGjkeMfeOABvfTSS3rsscf01FNP6a233lJtbW1k/3jmAAAACHPFMvjMmTP6+9//rqamJt1yyy2SpHvvvVcvvviinn32Wb333nsqLCxUXV2dJKmgoECdnZ3avXu3SktL1dPTo2eeeUaPP/64lixZIknatm2bbr31Vr3yyitatGiRnnrqqU+cAwAA4HIx3ZnJysrSrl279MUvfjGyzeFwyOFwyOfzqb29/argKCkpUUdHh0KhkDo6OiLbwubNm6fc3Fy1tbVJ0phzAAAAXC6mmHG73frKV76ilJSUyLbnn39eZ86cUVlZmbq7u+XxeKKOycnJ0dDQkPr7+9XT06OsrCylpqZeNaa7u1uSxpwDAADgcjG9zHSlf/zjH/rFL36hiooKlZeXa3h4OCp0JEU+9vv9Ghoaumq/JKWmpmpkZESSxpxjolyuxPrGraSkxPr74hI+75jOwuc35zmuNOGYeeGFF1RfX6/FixersbFR0qUouTI4wh+np6crLS3tI4NkZGRE6enp45pjIpxOh7KyMiZ0LGCJ2z2xawSwhPMcV5pQzDz99NPasmWLbr31Vv3617+O3DnJy8tTb29v1Nje3l7NmDFDM2fOlMfj0fvvvy+/3x9196W3t1e5ubnjmmMigsGQfL6LEzrWqqQkJxd8AvL5hhQIBOO9DGBShL+ucZ4nBrc7fdx34WKOmaamJm3evFmrVq3Sxo0b5XA4IvuWLFmil19+OWr8sWPHtHjxYjmdTt1yyy0KBoPq6OiIPOR76tQp9fT0yOv1jmuOiRod5cTH9BcIBDnXMe1xnuNKMdXBqVOn9NBDD+n//u//VFNTo76+Pr377rt699139cEHH2jVqlV67bXX1NjYqJMnT2rv3r36y1/+ojVr1kiScnNz9Y1vfEMNDQ1qbW3Va6+9pvXr12vp0qUqLi6WpDHnAAAAuJwjFMP3Oz/++OPavn37R+6rrKzUww8/rL/97W965JFHdPr0ac2dO1c/+clP9PWvfz0y7uLFi3rooYf0/PPPS5KWLVumhoYGZWVlRcaMNUesAoGgzp8fnPDxFrlcTmVlZein2/6qk+8MxHs5mGQFN1ynR9eXq79/kH+xYtoKf13jPE8Ms2ZljPtlpphixipihpiZ7ogZJAJiJrHEEjN8fxsAADCNmAEAAKYRMwAAwDRiBgAAmEbMAAAA04gZAABgGjEDAABMI2YAAIBpxAwAADCNmAEAAKYRMwAAwDRiBgAAmEbMAAAA04gZAABgGjEDAABMI2YAAIBpxAwAADCNmAEAAKYRMwAAwDRiBgAAmEbMAAAA01zxXgAAIHZOp0NOpyPey7imkpKcUb8nkmAwpGAwFO9lTFnEDAAY43Q69JnPzEjI/6lLktudHu8lXHOBQFDvv3+RoPkYxAwAGON0OpSU5FTjgQ693fNBvJeDSTY3d6bqV94ip9NBzHwMYgYAjHq75wOdfGcg3ssA4i4x71ECAIBpg5gBAACmETMAAMA0YgYAAJhGzAAAANOIGQAAYBoxAwAATCNmAACAacQMAAAwjZgBAACmETMAAMA0YgYAAJhGzAAAANOIGQAAYBoxAwAATCNmAACAacQMAAAwjZgBAACmETMAAMA0YgYAAJhGzAAAANOIGQAAYBoxAwAATCNmAACAacQMAAAwjZgBAACmETMAAMA0YgYAAJhGzAAAANOIGQAAYBoxAwAATCNmAACAacQMAAAwjZgBAACmETMAAMC0/ylmnnjiCa1atSpq2/Hjx1VdXa3i4mItX75c+/fvj9ofDAa1Y8cOlZWVqbi4WGvXrtXZs2djmgMAACBswjFz4MABPfroo1Hb+vv7tXr1auXn56u5uVnr1q1TY2OjmpubI2N27typpqYmbd68WQcPHlQwGNSaNWvk9/vHPQcAAECYK9YDenp6dP/996u1tVU33XRT1L5Dhw4pOTlZmzZtksvlUkFBgc6cOaNdu3apqqpKfr9fe/fuVX19vcrLyyVJ27dvV1lZmQ4fPqwVK1aMOQcAAMDlYr4z8/rrrys5OVl/+tOfdPPNN0fta29v19KlS+Vy/beRSkpKdPr0afX19amrq0uDg4MqLS2N7He73SoqKlJbW9u45gAAALhczHdmli9fruXLl3/kvu7ubi1YsCBqW05OjiTp3Llz6u7uliTl5eVdNSa8b6w5Zs+eHeuSJUkuV2I965yUlFh/X1zC5z0x8HlOTHzeP17MMfNJhoeHlZKSErUtNTVVkjQyMqKhoSFJ+sgxAwMD45pjIpxOh7KyMiZ0LGCJ250e7yUAmCRc3x/vU42ZtLS0yIO8YeEAmTFjhtLS0iRJfr8/8ufwmPT09HHNMRHBYEg+38UJHWtVUpKTEz8B+XxDCgSC8V4GJhnXd2JKtOvb7U4f992oTzVmPB6Pent7o7aFP87NzdXo6GhkW35+ftSYwsLCcc0xUaOjiXMCIHEFAkHOdWCa4vr+eJ/qC3Ber1cdHR0KBAKRbceOHdO8efOUnZ2thQsXKjMzU62trZH9Pp9PnZ2d8nq945oDAADgcp9qzFRVVenChQvauHGjTpw4oZaWFu3bt081NTWSLj0rU11drcbGRh05ckRdXV2qq6uTx+NRRUXFuOYAAAC43Kf6MlN2drZ2796tLVu2qLKyUnPmzNGGDRtUWVkZGVNbW6vR0VE1NDRoeHhYXq9Xe/bsUXJy8rjnAAAACHOEQqFQvBcx2QKBoM6fH4z3Mq4pl8uprKwM/XTbX3XynYF4LweTrOCG6/To+nL19w/ymnoC4PpOLIl6fc+alTHuB4D5pnUAAGAaMQMAAEwjZgAAgGnEDAAAMI2YAQAAphEzAADANGIGAACYRswAAADTiBkAAGAaMQMAAEwjZgAAgGnEDAAAMI2YAQAAphEzAADANGIGAACYRswAAADTiBkAAGAaMQMAAEwjZgAAgGnEDAAAMI2YAQAAphEzAADANGIGAACYRswAAADTiBkAAGAaMQMAAEwjZgAAgGnEDAAAMI2YAQAAphEzAADANGIGAACYRswAAADTiBkAAGAaMQMAAEwjZgAAgGnEDAAAMI2YAQAAphEzAADANGIGAACYRswAAADTiBkAAGAaMQMAAEwjZgAAgGnEDAAAMI2YAQAAphEzAADANGIGAACYRswAAADTiBkAAGAaMQMAAEwjZgAAgGnEDAAAMI2YAQAAphEzAADANGIGAACYRswAAADTiBkAAGAaMQMAAEwjZgAAgGnEDAAAMG1KxkwwGNSOHTtUVlam4uJirV27VmfPno33sgAAwBQ0JWNm586dampq0ubNm3Xw4EEFg0GtWbNGfr8/3ksDAABTzJSLGb/fr71796q2tlbl5eVauHChtm/fru7ubh0+fDjeywMAAFPMlIuZrq4uDQ4OqrS0NLLN7XarqKhIbW1tcVwZAACYilzxXsCVuru7JUl5eXlR23NyciL7YuV0OjRrVsb/vDZLHI5Lvz+wtlSjgWB8F4NJ50q69O+S665LVygU58Vg0nF9J5ZEvb6dTse4x065mBkaGpIkpaSkRG1PTU3VwMDAhOZ0OBxKShr/f5Tp5DMzU+O9BFxDTueUu9mKScT1nVi4vj/elPsvk5aWJklXPew7MjKi9PT0eCwJAABMYVMuZsIvL/X29kZt7+3tVW5ubjyWBAAAprApFzMLFy5UZmamWltbI9t8Pp86Ozvl9XrjuDIAADAVTblnZlJSUlRdXa3GxkbNmjVLN9xwgx555BF5PB5VVFTEe3kAAGCKmXIxI0m1tbUaHR1VQ0ODhoeH5fV6tWfPHiUnJ8d7aQAAYIpxhEKJ9I1eAABguplyz8wAAADEgpgBAACmETMAAMA0YgYAAJhGzAAAANOIGQAAYBoxAwAATCNmAACAaVPyHYCB8RgdHdXhw4fV1tamc+fOye/3Kz09Xbm5ufJ6vaqoqFBSUlK8lwkAmGS8AzBMevvtt3XHHXeop6dHRUVFysnJUWpqqkZGRtTb26vOzk5df/312r17t66//vp4LxcAMImIGZh05513KhAI6NFHH9XMmTOv2u/z+VRXV6fk5GQ9/vjjcVghAOBaIWZg0qJFi3Tw4EEVFhZ+7Jiuri6tXLlSHR0d13BlAD4Nq1atksPhGNfY/fv3T/JqMNXxzAxMmjlzpnp6ej4xZv7zn/8oLS3tGq4KwKfly1/+sn7zm99o3rx5+tKXvhTv5WCKI2Zg0m233aZ77rlHd999t0pKSpSXl6eUlBT5/X719PTo5ZdfVmNjo2677bZ4LxXABNTU1CgzM1Nbt27VE088oblz58Z7SZjCeJkJJoVCIf32t7/Vk08+qYsXL161PyMjQytXrtTdd98tp5N3IACs+tGPfqSUlBTt2LEj3kvBFEbMwLQPP/xQx48fV09Pj4aGhpSWliaPx6OFCxcqJSUl3ssD8D/q7e3V66+/rq9+9avxXgqmMGIGAACYxv13AABgGjEDAABMI2YAAIBpxAwAADCNmAEAAKYRMwAAwDRiBgAAmEbMAAAA0/4fapCwNl9f+CcAAAAASUVORK5CYII=",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"wine_dataset[\"color\"].value_counts().plot(kind=\"bar\")\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.2964337577998153"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_dataset[\"fixed_acidity\"].std()"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([], dtype=int64), array([], dtype=int64))"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"np.where(pd.isnull(wine_dataset))## sprawdzanie czy istnieją puste wartości"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"for column in wine_dataset.columns:\n",
" wine_dataset[column] = wine_dataset[column] / wine_dataset[column].abs().max() # normalizacja"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" fixed_acidity | \n",
" volatile_acidity | \n",
" citric_acid | \n",
" residual_sugar | \n",
" chlorides | \n",
" free_sulfur_dioxide | \n",
" total_sulfur_dioxide | \n",
" density | \n",
" pH | \n",
" sulphates | \n",
" alcohol | \n",
" quality | \n",
" color | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 0.453793 | \n",
" 0.214978 | \n",
" 0.191948 | \n",
" 0.082724 | \n",
" 0.091708 | \n",
" 0.105624 | \n",
" 0.263056 | \n",
" 0.957378 | \n",
" 0.802619 | \n",
" 0.265634 | \n",
" 0.704148 | \n",
" 0.646486 | \n",
" 0.246114 | \n",
"
\n",
" \n",
" std | \n",
" 0.081537 | \n",
" 0.104200 | \n",
" 0.087541 | \n",
" 0.072307 | \n",
" 0.057338 | \n",
" 0.061417 | \n",
" 0.128459 | \n",
" 0.002886 | \n",
" 0.040097 | \n",
" 0.074403 | \n",
" 0.080048 | \n",
" 0.097028 | \n",
" 0.430779 | \n",
"
\n",
" \n",
" min | \n",
" 0.238994 | \n",
" 0.050633 | \n",
" 0.000000 | \n",
" 0.009119 | \n",
" 0.014730 | \n",
" 0.003460 | \n",
" 0.013636 | \n",
" 0.950076 | \n",
" 0.678304 | \n",
" 0.110000 | \n",
" 0.536913 | \n",
" 0.333333 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 0.402516 | \n",
" 0.145570 | \n",
" 0.150602 | \n",
" 0.027356 | \n",
" 0.062193 | \n",
" 0.058824 | \n",
" 0.175000 | \n",
" 0.955110 | \n",
" 0.775561 | \n",
" 0.215000 | \n",
" 0.637584 | \n",
" 0.555556 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 0.440252 | \n",
" 0.183544 | \n",
" 0.186747 | \n",
" 0.045593 | \n",
" 0.076923 | \n",
" 0.100346 | \n",
" 0.268182 | \n",
" 0.957564 | \n",
" 0.800499 | \n",
" 0.255000 | \n",
" 0.691275 | \n",
" 0.666667 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 0.484277 | \n",
" 0.253165 | \n",
" 0.234940 | \n",
" 0.123100 | \n",
" 0.106383 | \n",
" 0.141869 | \n",
" 0.354545 | \n",
" 0.959585 | \n",
" 0.827930 | \n",
" 0.300000 | \n",
" 0.758389 | \n",
" 0.666667 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" max | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" fixed_acidity volatile_acidity citric_acid residual_sugar \\\n",
"count 6497.000000 6497.000000 6497.000000 6497.000000 \n",
"mean 0.453793 0.214978 0.191948 0.082724 \n",
"std 0.081537 0.104200 0.087541 0.072307 \n",
"min 0.238994 0.050633 0.000000 0.009119 \n",
"25% 0.402516 0.145570 0.150602 0.027356 \n",
"50% 0.440252 0.183544 0.186747 0.045593 \n",
"75% 0.484277 0.253165 0.234940 0.123100 \n",
"max 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" chlorides free_sulfur_dioxide total_sulfur_dioxide density \\\n",
"count 6497.000000 6497.000000 6497.000000 6497.000000 \n",
"mean 0.091708 0.105624 0.263056 0.957378 \n",
"std 0.057338 0.061417 0.128459 0.002886 \n",
"min 0.014730 0.003460 0.013636 0.950076 \n",
"25% 0.062193 0.058824 0.175000 0.955110 \n",
"50% 0.076923 0.100346 0.268182 0.957564 \n",
"75% 0.106383 0.141869 0.354545 0.959585 \n",
"max 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" pH sulphates alcohol quality color \n",
"count 6497.000000 6497.000000 6497.000000 6497.000000 6497.000000 \n",
"mean 0.802619 0.265634 0.704148 0.646486 0.246114 \n",
"std 0.040097 0.074403 0.080048 0.097028 0.430779 \n",
"min 0.678304 0.110000 0.536913 0.333333 0.000000 \n",
"25% 0.775561 0.215000 0.637584 0.555556 0.000000 \n",
"50% 0.800499 0.255000 0.691275 0.666667 0.000000 \n",
"75% 0.827930 0.300000 0.758389 0.666667 0.000000 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 "
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_dataset.describe(include='all') # sprawdzanie wartości po znormalizowaniu"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"652 1.000000\n",
"442 0.981132\n",
"557 0.981132\n",
"554 0.974843\n",
"555 0.974843\n",
"243 0.943396\n",
"244 0.943396\n",
"544 0.899371\n",
"3125 0.893082\n",
"374 0.880503\n",
"Name: fixed_acidity, dtype: float64"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_dataset[\"fixed_acidity\"].nlargest(10) #sprawdza czy najwyższe wartości mają sens"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.0 4408\n",
"1.0 1439\n",
"Name: color, dtype: int64"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.model_selection import train_test_split\n",
"wine_train, wine_test = sklearn.model_selection.train_test_split(wine_dataset, test_size=0.1, random_state=1, stratify=wine_dataset[\"color\"])\n",
"wine_train[\"color\"].value_counts() \n",
"# podzielenie na train i test"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.0 490\n",
"1.0 160\n",
"Name: color, dtype: int64"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_test[\"color\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [],
"source": [
"wine_test, wine_val = sklearn.model_selection.train_test_split(wine_test, test_size=0.5, random_state=1, stratify=wine_test[\"color\"]) # podzielenie na test i validation"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.0 245\n",
"1.0 80\n",
"Name: color, dtype: int64"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_test[\"color\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.0 245\n",
"1.0 80\n",
"Name: color, dtype: int64"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_val[\"color\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [],
"source": [
"import seaborn as sns\n",
"sns.set_theme()"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"13"
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(wine_dataset.columns)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [],
"source": [
"#sns.pairplot(data=wine_dataset, hue=\"color\")"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" fixed_acidity | \n",
" volatile_acidity | \n",
" citric_acid | \n",
" residual_sugar | \n",
" chlorides | \n",
" free_sulfur_dioxide | \n",
" total_sulfur_dioxide | \n",
" density | \n",
" pH | \n",
" sulphates | \n",
" alcohol | \n",
" quality | \n",
" color | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 0.460126 | \n",
" 0.209883 | \n",
" 0.197294 | \n",
" 0.083839 | \n",
" 0.096352 | \n",
" 0.105307 | \n",
" 0.272028 | \n",
" 0.957685 | \n",
" 0.799770 | \n",
" 0.266477 | \n",
" 0.691389 | \n",
" 0.636239 | \n",
" 0.246154 | \n",
"
\n",
" \n",
" std | \n",
" 0.087321 | \n",
" 0.100971 | \n",
" 0.086532 | \n",
" 0.072172 | \n",
" 0.066017 | \n",
" 0.061895 | \n",
" 0.131981 | \n",
" 0.002780 | \n",
" 0.038640 | \n",
" 0.082243 | \n",
" 0.073293 | \n",
" 0.088732 | \n",
" 0.431433 | \n",
"
\n",
" \n",
" min | \n",
" 0.308176 | \n",
" 0.066456 | \n",
" 0.000000 | \n",
" 0.010638 | \n",
" 0.026187 | \n",
" 0.003460 | \n",
" 0.020455 | \n",
" 0.952030 | \n",
" 0.698254 | \n",
" 0.115000 | \n",
" 0.577181 | \n",
" 0.333333 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 0.408805 | \n",
" 0.139241 | \n",
" 0.156627 | \n",
" 0.027356 | \n",
" 0.062193 | \n",
" 0.058824 | \n",
" 0.188636 | \n",
" 0.955322 | \n",
" 0.773067 | \n",
" 0.215000 | \n",
" 0.630872 | \n",
" 0.555556 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 0.440252 | \n",
" 0.189873 | \n",
" 0.186747 | \n",
" 0.048632 | \n",
" 0.078560 | \n",
" 0.100346 | \n",
" 0.275000 | \n",
" 0.957978 | \n",
" 0.795511 | \n",
" 0.250000 | \n",
" 0.671141 | \n",
" 0.666667 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 0.484277 | \n",
" 0.240506 | \n",
" 0.246988 | \n",
" 0.121581 | \n",
" 0.116203 | \n",
" 0.145329 | \n",
" 0.356818 | \n",
" 0.959787 | \n",
" 0.822943 | \n",
" 0.305000 | \n",
" 0.738255 | \n",
" 0.666667 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" max | \n",
" 0.943396 | \n",
" 0.715190 | \n",
" 0.469880 | \n",
" 0.303191 | \n",
" 0.764321 | \n",
" 0.479239 | \n",
" 0.781818 | \n",
" 0.966034 | \n",
" 0.895262 | \n",
" 0.975000 | \n",
" 0.906040 | \n",
" 0.888889 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" fixed_acidity volatile_acidity citric_acid residual_sugar \\\n",
"count 325.000000 325.000000 325.000000 325.000000 \n",
"mean 0.460126 0.209883 0.197294 0.083839 \n",
"std 0.087321 0.100971 0.086532 0.072172 \n",
"min 0.308176 0.066456 0.000000 0.010638 \n",
"25% 0.408805 0.139241 0.156627 0.027356 \n",
"50% 0.440252 0.189873 0.186747 0.048632 \n",
"75% 0.484277 0.240506 0.246988 0.121581 \n",
"max 0.943396 0.715190 0.469880 0.303191 \n",
"\n",
" chlorides free_sulfur_dioxide total_sulfur_dioxide density \\\n",
"count 325.000000 325.000000 325.000000 325.000000 \n",
"mean 0.096352 0.105307 0.272028 0.957685 \n",
"std 0.066017 0.061895 0.131981 0.002780 \n",
"min 0.026187 0.003460 0.020455 0.952030 \n",
"25% 0.062193 0.058824 0.188636 0.955322 \n",
"50% 0.078560 0.100346 0.275000 0.957978 \n",
"75% 0.116203 0.145329 0.356818 0.959787 \n",
"max 0.764321 0.479239 0.781818 0.966034 \n",
"\n",
" pH sulphates alcohol quality color \n",
"count 325.000000 325.000000 325.000000 325.000000 325.000000 \n",
"mean 0.799770 0.266477 0.691389 0.636239 0.246154 \n",
"std 0.038640 0.082243 0.073293 0.088732 0.431433 \n",
"min 0.698254 0.115000 0.577181 0.333333 0.000000 \n",
"25% 0.773067 0.215000 0.630872 0.555556 0.000000 \n",
"50% 0.795511 0.250000 0.671141 0.666667 0.000000 \n",
"75% 0.822943 0.305000 0.738255 0.666667 0.000000 \n",
"max 0.895262 0.975000 0.906040 0.888889 1.000000 "
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_test.describe()"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" fixed_acidity | \n",
" volatile_acidity | \n",
" citric_acid | \n",
" residual_sugar | \n",
" chlorides | \n",
" free_sulfur_dioxide | \n",
" total_sulfur_dioxide | \n",
" density | \n",
" pH | \n",
" sulphates | \n",
" alcohol | \n",
" quality | \n",
" color | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 0.453724 | \n",
" 0.215128 | \n",
" 0.192091 | \n",
" 0.082877 | \n",
" 0.091656 | \n",
" 0.105899 | \n",
" 0.262834 | \n",
" 0.957374 | \n",
" 0.802637 | \n",
" 0.265601 | \n",
" 0.704572 | \n",
" 0.646846 | \n",
" 0.246109 | \n",
"
\n",
" \n",
" std | \n",
" 0.081597 | \n",
" 0.104319 | \n",
" 0.087166 | \n",
" 0.072487 | \n",
" 0.057502 | \n",
" 0.061908 | \n",
" 0.128388 | \n",
" 0.002899 | \n",
" 0.040030 | \n",
" 0.074400 | \n",
" 0.080399 | \n",
" 0.097212 | \n",
" 0.430780 | \n",
"
\n",
" \n",
" min | \n",
" 0.238994 | \n",
" 0.050633 | \n",
" 0.000000 | \n",
" 0.009119 | \n",
" 0.014730 | \n",
" 0.003460 | \n",
" 0.013636 | \n",
" 0.950076 | \n",
" 0.678304 | \n",
" 0.110000 | \n",
" 0.536913 | \n",
" 0.333333 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 0.402516 | \n",
" 0.145570 | \n",
" 0.150602 | \n",
" 0.027356 | \n",
" 0.062193 | \n",
" 0.058824 | \n",
" 0.175000 | \n",
" 0.955110 | \n",
" 0.775561 | \n",
" 0.215000 | \n",
" 0.637584 | \n",
" 0.555556 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 0.440252 | \n",
" 0.183544 | \n",
" 0.186747 | \n",
" 0.045593 | \n",
" 0.076923 | \n",
" 0.100346 | \n",
" 0.268182 | \n",
" 0.957555 | \n",
" 0.800499 | \n",
" 0.255000 | \n",
" 0.691275 | \n",
" 0.666667 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 0.484277 | \n",
" 0.259494 | \n",
" 0.234940 | \n",
" 0.123100 | \n",
" 0.106383 | \n",
" 0.141869 | \n",
" 0.354545 | \n",
" 0.959585 | \n",
" 0.827930 | \n",
" 0.300000 | \n",
" 0.758389 | \n",
" 0.666667 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" max | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" fixed_acidity volatile_acidity citric_acid residual_sugar \\\n",
"count 5847.000000 5847.000000 5847.000000 5847.000000 \n",
"mean 0.453724 0.215128 0.192091 0.082877 \n",
"std 0.081597 0.104319 0.087166 0.072487 \n",
"min 0.238994 0.050633 0.000000 0.009119 \n",
"25% 0.402516 0.145570 0.150602 0.027356 \n",
"50% 0.440252 0.183544 0.186747 0.045593 \n",
"75% 0.484277 0.259494 0.234940 0.123100 \n",
"max 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" chlorides free_sulfur_dioxide total_sulfur_dioxide density \\\n",
"count 5847.000000 5847.000000 5847.000000 5847.000000 \n",
"mean 0.091656 0.105899 0.262834 0.957374 \n",
"std 0.057502 0.061908 0.128388 0.002899 \n",
"min 0.014730 0.003460 0.013636 0.950076 \n",
"25% 0.062193 0.058824 0.175000 0.955110 \n",
"50% 0.076923 0.100346 0.268182 0.957555 \n",
"75% 0.106383 0.141869 0.354545 0.959585 \n",
"max 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" pH sulphates alcohol quality color \n",
"count 5847.000000 5847.000000 5847.000000 5847.000000 5847.000000 \n",
"mean 0.802637 0.265601 0.704572 0.646846 0.246109 \n",
"std 0.040030 0.074400 0.080399 0.097212 0.430780 \n",
"min 0.678304 0.110000 0.536913 0.333333 0.000000 \n",
"25% 0.775561 0.215000 0.637584 0.555556 0.000000 \n",
"50% 0.800499 0.255000 0.691275 0.666667 0.000000 \n",
"75% 0.827930 0.300000 0.758389 0.666667 0.000000 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 "
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_train.describe()"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" fixed_acidity | \n",
" volatile_acidity | \n",
" citric_acid | \n",
" residual_sugar | \n",
" chlorides | \n",
" free_sulfur_dioxide | \n",
" total_sulfur_dioxide | \n",
" density | \n",
" pH | \n",
" sulphates | \n",
" alcohol | \n",
" quality | \n",
" color | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 0.448708 | \n",
" 0.217381 | \n",
" 0.184022 | \n",
" 0.078864 | \n",
" 0.088017 | \n",
" 0.100985 | \n",
" 0.258073 | \n",
" 0.957147 | \n",
" 0.805141 | \n",
" 0.265385 | \n",
" 0.709269 | \n",
" 0.650256 | \n",
" 0.246154 | \n",
"
\n",
" \n",
" std | \n",
" 0.073960 | \n",
" 0.105388 | \n",
" 0.094736 | \n",
" 0.069232 | \n",
" 0.043159 | \n",
" 0.051174 | \n",
" 0.126120 | \n",
" 0.002746 | \n",
" 0.042584 | \n",
" 0.065946 | \n",
" 0.079198 | \n",
" 0.101225 | \n",
" 0.431433 | \n",
"
\n",
" \n",
" min | \n",
" 0.301887 | \n",
" 0.075949 | \n",
" 0.000000 | \n",
" 0.012158 | \n",
" 0.026187 | \n",
" 0.006920 | \n",
" 0.018182 | \n",
" 0.950731 | \n",
" 0.683292 | \n",
" 0.150000 | \n",
" 0.570470 | \n",
" 0.333333 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 0.402516 | \n",
" 0.145570 | \n",
" 0.138554 | \n",
" 0.028875 | \n",
" 0.062193 | \n",
" 0.058824 | \n",
" 0.179545 | \n",
" 0.954879 | \n",
" 0.775561 | \n",
" 0.215000 | \n",
" 0.637584 | \n",
" 0.555556 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 0.433962 | \n",
" 0.177215 | \n",
" 0.186747 | \n",
" 0.042553 | \n",
" 0.076923 | \n",
" 0.100346 | \n",
" 0.256818 | \n",
" 0.957189 | \n",
" 0.805486 | \n",
" 0.260000 | \n",
" 0.697987 | \n",
" 0.666667 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 0.484277 | \n",
" 0.253165 | \n",
" 0.234940 | \n",
" 0.117021 | \n",
" 0.101473 | \n",
" 0.138408 | \n",
" 0.356818 | \n",
" 0.959306 | \n",
" 0.830424 | \n",
" 0.305000 | \n",
" 0.758389 | \n",
" 0.666667 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" max | \n",
" 0.798742 | \n",
" 0.696203 | \n",
" 0.602410 | \n",
" 0.303191 | \n",
" 0.436989 | \n",
" 0.280277 | \n",
" 0.575000 | \n",
" 0.962935 | \n",
" 0.935162 | \n",
" 0.490000 | \n",
" 0.953020 | \n",
" 0.888889 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" fixed_acidity volatile_acidity citric_acid residual_sugar \\\n",
"count 325.000000 325.000000 325.000000 325.000000 \n",
"mean 0.448708 0.217381 0.184022 0.078864 \n",
"std 0.073960 0.105388 0.094736 0.069232 \n",
"min 0.301887 0.075949 0.000000 0.012158 \n",
"25% 0.402516 0.145570 0.138554 0.028875 \n",
"50% 0.433962 0.177215 0.186747 0.042553 \n",
"75% 0.484277 0.253165 0.234940 0.117021 \n",
"max 0.798742 0.696203 0.602410 0.303191 \n",
"\n",
" chlorides free_sulfur_dioxide total_sulfur_dioxide density \\\n",
"count 325.000000 325.000000 325.000000 325.000000 \n",
"mean 0.088017 0.100985 0.258073 0.957147 \n",
"std 0.043159 0.051174 0.126120 0.002746 \n",
"min 0.026187 0.006920 0.018182 0.950731 \n",
"25% 0.062193 0.058824 0.179545 0.954879 \n",
"50% 0.076923 0.100346 0.256818 0.957189 \n",
"75% 0.101473 0.138408 0.356818 0.959306 \n",
"max 0.436989 0.280277 0.575000 0.962935 \n",
"\n",
" pH sulphates alcohol quality color \n",
"count 325.000000 325.000000 325.000000 325.000000 325.000000 \n",
"mean 0.805141 0.265385 0.709269 0.650256 0.246154 \n",
"std 0.042584 0.065946 0.079198 0.101225 0.431433 \n",
"min 0.683292 0.150000 0.570470 0.333333 0.000000 \n",
"25% 0.775561 0.215000 0.637584 0.555556 0.000000 \n",
"50% 0.805486 0.260000 0.697987 0.666667 0.000000 \n",
"75% 0.830424 0.305000 0.758389 0.666667 0.000000 \n",
"max 0.935162 0.490000 0.953020 0.888889 1.000000 "
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_val.describe()"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"from torch import nn\n",
"from torch.utils.data import DataLoader, Dataset"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [],
"source": [
"class TabularDataset(Dataset):\n",
" def __init__(self, data):\n",
" self.data = data.values.astype('float32')\n",
"\n",
" def __getitem__(self, index):\n",
" x = torch.tensor(self.data[index, :-1])\n",
" y = torch.tensor(self.data[index, -1])\n",
" return x, y\n",
"\n",
" def __len__(self):\n",
" return len(self.data)"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [],
"source": [
"batch_size = 64\n",
"train_dataset = TabularDataset(wine_train)\n",
"train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)\n",
"test_dataset = TabularDataset(wine_test)\n",
"test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [],
"source": [
"class TabularModel(nn.Module):\n",
" def __init__(self, input_dim, hidden_dim, output_dim):\n",
" super(TabularModel, self).__init__()\n",
" self.fc1 = nn.Linear(input_dim, hidden_dim)\n",
" self.relu = nn.ReLU()\n",
" self.fc2 = nn.Linear(hidden_dim, output_dim)\n",
" self.softmax = nn.Softmax(dim=1)\n",
" \n",
" def forward(self, x):\n",
" out = self.fc1(x)\n",
" out = self.relu(out)\n",
" out = self.fc2(out)\n",
" out = self.softmax(out)\n",
" return out"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [],
"source": [
"input_dim = wine_train.shape[1] - 1\n",
"hidden_dim = 32\n",
"output_dim = 2\n",
"model = TabularModel(input_dim, hidden_dim, output_dim)\n",
"criterion = nn.CrossEntropyLoss()\n",
"optimizer = torch.optim.Adam(model.parameters())"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [],
"source": [
"model = TabularModel(input_dim=len(wine_train.columns)-1, hidden_dim=32, output_dim=2)\n",
"criterion = nn.CrossEntropyLoss()\n",
"optimizer = torch.optim.Adam(model.parameters(), lr=0.01)"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1, loss: 0.4864\n",
"Epoch 3, loss: 0.3413\n",
"Epoch 5, loss: 0.3345\n",
"Epoch 7, loss: 0.3337\n",
"Epoch 9, loss: 0.3331\n",
"Finished Training\n"
]
}
],
"source": [
"num_epochs = 10\n",
"for epoch in range(num_epochs):\n",
" running_loss = 0.0\n",
" for i, data in enumerate(train_dataloader, 0):\n",
" inputs, labels = data\n",
" labels = labels.type(torch.LongTensor)\n",
" optimizer.zero_grad()\n",
" outputs = model(inputs)\n",
" loss = criterion(outputs, labels)\n",
" loss.backward()\n",
" optimizer.step()\n",
" running_loss += loss.item()\n",
"\n",
" # Print the loss every 1000 mini-batches\n",
" if (epoch%2) == 0:\n",
" print(f'Epoch {epoch + 1}, loss: {running_loss / len(train_dataloader):.4f}')\n",
"\n",
"print('Finished Training')"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy on test set: 98 %\n"
]
}
],
"source": [
"correct = 0\n",
"total = 0\n",
"with torch.no_grad():\n",
" for data in test_dataloader:\n",
" inputs, labels = data\n",
" outputs = model(inputs.float())\n",
" _, predicted = torch.max(outputs.data, 1)\n",
" total += labels.size(0)\n",
" correct += (predicted == labels).sum().item()\n",
"print('Accuracy on test set: %d %%' % (100 * correct / total))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}