{
"cells": [
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import sklearn.model_selection"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Found cached dataset wine (C:/Users/s487176/.cache/huggingface/datasets/mstz___wine/wine/1.0.0/0913b614badc418a000d75d098776831f39ebf5ee208ecd3cfad4d5db1418d76)\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a8f1b9db0c8b41e1904e16e22ae351e0",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/1 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from datasets import load_dataset\n",
"\n",
"dataset = load_dataset(\"mstz/wine\", \"wine\")"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Dataset({\n",
" features: ['fixed_acidity', 'volatile_acidity', 'citric_acid', 'residual_sugar', 'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density', 'pH', 'sulphates', 'alcohol', 'quality', 'color'],\n",
" num_rows: 6497\n",
"})"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset[\"train\"]"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"wine_dataset = pd.DataFrame(dataset[\"train\"])"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" fixed_acidity | \n",
" volatile_acidity | \n",
" citric_acid | \n",
" residual_sugar | \n",
" chlorides | \n",
" free_sulfur_dioxide | \n",
" total_sulfur_dioxide | \n",
" density | \n",
" pH | \n",
" sulphates | \n",
" alcohol | \n",
" quality | \n",
" color | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 7.4 | \n",
" 0.70 | \n",
" 0.00 | \n",
" 1.9 | \n",
" 0.076 | \n",
" 11.0 | \n",
" 34.0 | \n",
" 0.9978 | \n",
" 3.51 | \n",
" 0.56 | \n",
" 9.4 | \n",
" 5 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 7.8 | \n",
" 0.88 | \n",
" 0.00 | \n",
" 2.6 | \n",
" 0.098 | \n",
" 25.0 | \n",
" 67.0 | \n",
" 0.9968 | \n",
" 3.20 | \n",
" 0.68 | \n",
" 9.8 | \n",
" 5 | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" 7.8 | \n",
" 0.76 | \n",
" 0.04 | \n",
" 2.3 | \n",
" 0.092 | \n",
" 15.0 | \n",
" 54.0 | \n",
" 0.9970 | \n",
" 3.26 | \n",
" 0.65 | \n",
" 9.8 | \n",
" 5 | \n",
" 0 | \n",
"
\n",
" \n",
" 3 | \n",
" 11.2 | \n",
" 0.28 | \n",
" 0.56 | \n",
" 1.9 | \n",
" 0.075 | \n",
" 17.0 | \n",
" 60.0 | \n",
" 0.9980 | \n",
" 3.16 | \n",
" 0.58 | \n",
" 9.8 | \n",
" 6 | \n",
" 0 | \n",
"
\n",
" \n",
" 4 | \n",
" 7.4 | \n",
" 0.70 | \n",
" 0.00 | \n",
" 1.9 | \n",
" 0.076 | \n",
" 11.0 | \n",
" 34.0 | \n",
" 0.9978 | \n",
" 3.51 | \n",
" 0.56 | \n",
" 9.4 | \n",
" 5 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" fixed_acidity volatile_acidity citric_acid residual_sugar chlorides \\\n",
"0 7.4 0.70 0.00 1.9 0.076 \n",
"1 7.8 0.88 0.00 2.6 0.098 \n",
"2 7.8 0.76 0.04 2.3 0.092 \n",
"3 11.2 0.28 0.56 1.9 0.075 \n",
"4 7.4 0.70 0.00 1.9 0.076 \n",
"\n",
" free_sulfur_dioxide total_sulfur_dioxide density pH sulphates \\\n",
"0 11.0 34.0 0.9978 3.51 0.56 \n",
"1 25.0 67.0 0.9968 3.20 0.68 \n",
"2 15.0 54.0 0.9970 3.26 0.65 \n",
"3 17.0 60.0 0.9980 3.16 0.58 \n",
"4 11.0 34.0 0.9978 3.51 0.56 \n",
"\n",
" alcohol quality color \n",
"0 9.4 5 0 \n",
"1 9.8 5 0 \n",
"2 9.8 5 0 \n",
"3 9.8 6 0 \n",
"4 9.4 5 0 "
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_dataset.head()# podgląd danych"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" fixed_acidity | \n",
" volatile_acidity | \n",
" citric_acid | \n",
" residual_sugar | \n",
" chlorides | \n",
" free_sulfur_dioxide | \n",
" total_sulfur_dioxide | \n",
" density | \n",
" pH | \n",
" sulphates | \n",
" alcohol | \n",
" quality | \n",
" color | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 7.215307 | \n",
" 0.339666 | \n",
" 0.318633 | \n",
" 5.443235 | \n",
" 0.056034 | \n",
" 30.525319 | \n",
" 115.744574 | \n",
" 0.994697 | \n",
" 3.218501 | \n",
" 0.531268 | \n",
" 10.491801 | \n",
" 5.818378 | \n",
" 0.753886 | \n",
"
\n",
" \n",
" std | \n",
" 1.296434 | \n",
" 0.164636 | \n",
" 0.145318 | \n",
" 4.757804 | \n",
" 0.035034 | \n",
" 17.749400 | \n",
" 56.521855 | \n",
" 0.002999 | \n",
" 0.160787 | \n",
" 0.148806 | \n",
" 1.192712 | \n",
" 0.873255 | \n",
" 0.430779 | \n",
"
\n",
" \n",
" min | \n",
" 3.800000 | \n",
" 0.080000 | \n",
" 0.000000 | \n",
" 0.600000 | \n",
" 0.009000 | \n",
" 1.000000 | \n",
" 6.000000 | \n",
" 0.987110 | \n",
" 2.720000 | \n",
" 0.220000 | \n",
" 8.000000 | \n",
" 3.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 6.400000 | \n",
" 0.230000 | \n",
" 0.250000 | \n",
" 1.800000 | \n",
" 0.038000 | \n",
" 17.000000 | \n",
" 77.000000 | \n",
" 0.992340 | \n",
" 3.110000 | \n",
" 0.430000 | \n",
" 9.500000 | \n",
" 5.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 7.000000 | \n",
" 0.290000 | \n",
" 0.310000 | \n",
" 3.000000 | \n",
" 0.047000 | \n",
" 29.000000 | \n",
" 118.000000 | \n",
" 0.994890 | \n",
" 3.210000 | \n",
" 0.510000 | \n",
" 10.300000 | \n",
" 6.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 7.700000 | \n",
" 0.400000 | \n",
" 0.390000 | \n",
" 8.100000 | \n",
" 0.065000 | \n",
" 41.000000 | \n",
" 156.000000 | \n",
" 0.996990 | \n",
" 3.320000 | \n",
" 0.600000 | \n",
" 11.300000 | \n",
" 6.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" max | \n",
" 15.900000 | \n",
" 1.580000 | \n",
" 1.660000 | \n",
" 65.800000 | \n",
" 0.611000 | \n",
" 289.000000 | \n",
" 440.000000 | \n",
" 1.038980 | \n",
" 4.010000 | \n",
" 2.000000 | \n",
" 14.900000 | \n",
" 9.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" fixed_acidity volatile_acidity citric_acid residual_sugar \\\n",
"count 6497.000000 6497.000000 6497.000000 6497.000000 \n",
"mean 7.215307 0.339666 0.318633 5.443235 \n",
"std 1.296434 0.164636 0.145318 4.757804 \n",
"min 3.800000 0.080000 0.000000 0.600000 \n",
"25% 6.400000 0.230000 0.250000 1.800000 \n",
"50% 7.000000 0.290000 0.310000 3.000000 \n",
"75% 7.700000 0.400000 0.390000 8.100000 \n",
"max 15.900000 1.580000 1.660000 65.800000 \n",
"\n",
" chlorides free_sulfur_dioxide total_sulfur_dioxide density \\\n",
"count 6497.000000 6497.000000 6497.000000 6497.000000 \n",
"mean 0.056034 30.525319 115.744574 0.994697 \n",
"std 0.035034 17.749400 56.521855 0.002999 \n",
"min 0.009000 1.000000 6.000000 0.987110 \n",
"25% 0.038000 17.000000 77.000000 0.992340 \n",
"50% 0.047000 29.000000 118.000000 0.994890 \n",
"75% 0.065000 41.000000 156.000000 0.996990 \n",
"max 0.611000 289.000000 440.000000 1.038980 \n",
"\n",
" pH sulphates alcohol quality color \n",
"count 6497.000000 6497.000000 6497.000000 6497.000000 6497.000000 \n",
"mean 3.218501 0.531268 10.491801 5.818378 0.753886 \n",
"std 0.160787 0.148806 1.192712 0.873255 0.430779 \n",
"min 2.720000 0.220000 8.000000 3.000000 0.000000 \n",
"25% 3.110000 0.430000 9.500000 5.000000 1.000000 \n",
"50% 3.210000 0.510000 10.300000 6.000000 1.000000 \n",
"75% 3.320000 0.600000 11.300000 6.000000 1.000000 \n",
"max 4.010000 2.000000 14.900000 9.000000 1.000000 "
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_dataset.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"wine_dataset[\"color\"].value_counts().plot(kind=\"bar\")\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.2964337577998153"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_dataset[\"fixed_acidity\"].std()"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([], dtype=int64), array([], dtype=int64))"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"np.where(pd.isnull(wine_dataset))## sprawdzanie czy istnieją puste wartości"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"for column in wine_dataset.columns:\n",
" wine_dataset[column] = wine_dataset[column] / wine_dataset[column].abs().max() # normalizacja"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" fixed_acidity | \n",
" volatile_acidity | \n",
" citric_acid | \n",
" residual_sugar | \n",
" chlorides | \n",
" free_sulfur_dioxide | \n",
" total_sulfur_dioxide | \n",
" density | \n",
" pH | \n",
" sulphates | \n",
" alcohol | \n",
" quality | \n",
" color | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
" 6497.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 0.453793 | \n",
" 0.214978 | \n",
" 0.191948 | \n",
" 0.082724 | \n",
" 0.091708 | \n",
" 0.105624 | \n",
" 0.263056 | \n",
" 0.957378 | \n",
" 0.802619 | \n",
" 0.265634 | \n",
" 0.704148 | \n",
" 0.646486 | \n",
" 0.753886 | \n",
"
\n",
" \n",
" std | \n",
" 0.081537 | \n",
" 0.104200 | \n",
" 0.087541 | \n",
" 0.072307 | \n",
" 0.057338 | \n",
" 0.061417 | \n",
" 0.128459 | \n",
" 0.002886 | \n",
" 0.040097 | \n",
" 0.074403 | \n",
" 0.080048 | \n",
" 0.097028 | \n",
" 0.430779 | \n",
"
\n",
" \n",
" min | \n",
" 0.238994 | \n",
" 0.050633 | \n",
" 0.000000 | \n",
" 0.009119 | \n",
" 0.014730 | \n",
" 0.003460 | \n",
" 0.013636 | \n",
" 0.950076 | \n",
" 0.678304 | \n",
" 0.110000 | \n",
" 0.536913 | \n",
" 0.333333 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 0.402516 | \n",
" 0.145570 | \n",
" 0.150602 | \n",
" 0.027356 | \n",
" 0.062193 | \n",
" 0.058824 | \n",
" 0.175000 | \n",
" 0.955110 | \n",
" 0.775561 | \n",
" 0.215000 | \n",
" 0.637584 | \n",
" 0.555556 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 0.440252 | \n",
" 0.183544 | \n",
" 0.186747 | \n",
" 0.045593 | \n",
" 0.076923 | \n",
" 0.100346 | \n",
" 0.268182 | \n",
" 0.957564 | \n",
" 0.800499 | \n",
" 0.255000 | \n",
" 0.691275 | \n",
" 0.666667 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 0.484277 | \n",
" 0.253165 | \n",
" 0.234940 | \n",
" 0.123100 | \n",
" 0.106383 | \n",
" 0.141869 | \n",
" 0.354545 | \n",
" 0.959585 | \n",
" 0.827930 | \n",
" 0.300000 | \n",
" 0.758389 | \n",
" 0.666667 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" max | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" fixed_acidity volatile_acidity citric_acid residual_sugar \\\n",
"count 6497.000000 6497.000000 6497.000000 6497.000000 \n",
"mean 0.453793 0.214978 0.191948 0.082724 \n",
"std 0.081537 0.104200 0.087541 0.072307 \n",
"min 0.238994 0.050633 0.000000 0.009119 \n",
"25% 0.402516 0.145570 0.150602 0.027356 \n",
"50% 0.440252 0.183544 0.186747 0.045593 \n",
"75% 0.484277 0.253165 0.234940 0.123100 \n",
"max 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" chlorides free_sulfur_dioxide total_sulfur_dioxide density \\\n",
"count 6497.000000 6497.000000 6497.000000 6497.000000 \n",
"mean 0.091708 0.105624 0.263056 0.957378 \n",
"std 0.057338 0.061417 0.128459 0.002886 \n",
"min 0.014730 0.003460 0.013636 0.950076 \n",
"25% 0.062193 0.058824 0.175000 0.955110 \n",
"50% 0.076923 0.100346 0.268182 0.957564 \n",
"75% 0.106383 0.141869 0.354545 0.959585 \n",
"max 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" pH sulphates alcohol quality color \n",
"count 6497.000000 6497.000000 6497.000000 6497.000000 6497.000000 \n",
"mean 0.802619 0.265634 0.704148 0.646486 0.753886 \n",
"std 0.040097 0.074403 0.080048 0.097028 0.430779 \n",
"min 0.678304 0.110000 0.536913 0.333333 0.000000 \n",
"25% 0.775561 0.215000 0.637584 0.555556 1.000000 \n",
"50% 0.800499 0.255000 0.691275 0.666667 1.000000 \n",
"75% 0.827930 0.300000 0.758389 0.666667 1.000000 \n",
"max 1.000000 1.000000 1.000000 1.000000 1.000000 "
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_dataset.describe(include='all') # sprawdzanie wartości po znormalizowaniu"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"652 1.000000\n",
"442 0.981132\n",
"557 0.981132\n",
"554 0.974843\n",
"555 0.974843\n",
"243 0.943396\n",
"244 0.943396\n",
"544 0.899371\n",
"3125 0.893082\n",
"374 0.880503\n",
"Name: fixed_acidity, dtype: float64"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_dataset[\"fixed_acidity\"].nlargest(10) #sprawdza czy najwyższe wartości mają sens"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.0 4408\n",
"0.0 1439\n",
"Name: color, dtype: int64"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.model_selection import train_test_split\n",
"wine_train, wine_test = sklearn.model_selection.train_test_split(wine_dataset, test_size=0.1, random_state=1, stratify=wine_dataset[\"color\"])\n",
"wine_train[\"color\"].value_counts() \n",
"# podzielenie na train i test"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.0 490\n",
"0.0 160\n",
"Name: color, dtype: int64"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_test[\"color\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"wine_test, wine_val = sklearn.model_selection.train_test_split(wine_test, test_size=0.5, random_state=1, stratify=wine_test[\"color\"]) # podzielenie na test i validation"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.0 245\n",
"0.0 80\n",
"Name: color, dtype: int64"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_test[\"color\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.0 245\n",
"0.0 80\n",
"Name: color, dtype: int64"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_val[\"color\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"import seaborn as sns\n",
"sns.set_theme()"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"13"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(wine_dataset.columns)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [],
"source": [
"sns.pairplot(data=wine_dataset, hue=\"color\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" fixed_acidity | \n",
" volatile_acidity | \n",
" citric_acid | \n",
" residual_sugar | \n",
" chlorides | \n",
" free_sulfur_dioxide | \n",
" total_sulfur_dioxide | \n",
" density | \n",
" pH | \n",
" sulphates | \n",
" alcohol | \n",
" quality | \n",
" color | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 7.127077 | \n",
" 0.342969 | \n",
" 0.299846 | \n",
" 5.197538 | \n",
" 0.054222 | \n",
" 29.773846 | \n",
" 113.283077 | \n",
" 0.994568 | \n",
" 3.222246 | \n",
" 0.527754 | \n",
" 10.488564 | \n",
" 5.815385 | \n",
" 0.753846 | \n",
"
\n",
" \n",
" std | \n",
" 1.181391 | \n",
" 0.170050 | \n",
" 0.129556 | \n",
" 4.608978 | \n",
" 0.031405 | \n",
" 15.822670 | \n",
" 55.072566 | \n",
" 0.002895 | \n",
" 0.159630 | \n",
" 0.144550 | \n",
" 1.172682 | \n",
" 0.855128 | \n",
" 0.431433 | \n",
"
\n",
" \n",
" min | \n",
" 5.000000 | \n",
" 0.100000 | \n",
" 0.000000 | \n",
" 0.800000 | \n",
" 0.019000 | \n",
" 3.000000 | \n",
" 9.000000 | \n",
" 0.988190 | \n",
" 2.860000 | \n",
" 0.260000 | \n",
" 8.500000 | \n",
" 3.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 6.400000 | \n",
" 0.230000 | \n",
" 0.240000 | \n",
" 1.800000 | \n",
" 0.037000 | \n",
" 17.000000 | \n",
" 74.000000 | \n",
" 0.992400 | \n",
" 3.110000 | \n",
" 0.420000 | \n",
" 9.500000 | \n",
" 5.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 6.900000 | \n",
" 0.280000 | \n",
" 0.300000 | \n",
" 2.800000 | \n",
" 0.048000 | \n",
" 29.000000 | \n",
" 115.000000 | \n",
" 0.994800 | \n",
" 3.210000 | \n",
" 0.500000 | \n",
" 10.300000 | \n",
" 6.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 7.500000 | \n",
" 0.400000 | \n",
" 0.370000 | \n",
" 7.500000 | \n",
" 0.062000 | \n",
" 41.000000 | \n",
" 151.000000 | \n",
" 0.996750 | \n",
" 3.320000 | \n",
" 0.600000 | \n",
" 11.300000 | \n",
" 6.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" max | \n",
" 13.000000 | \n",
" 0.900000 | \n",
" 0.740000 | \n",
" 22.000000 | \n",
" 0.415000 | \n",
" 67.000000 | \n",
" 253.000000 | \n",
" 1.002890 | \n",
" 3.680000 | \n",
" 1.170000 | \n",
" 14.000000 | \n",
" 9.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" fixed_acidity volatile_acidity citric_acid residual_sugar \\\n",
"count 325.000000 325.000000 325.000000 325.000000 \n",
"mean 7.127077 0.342969 0.299846 5.197538 \n",
"std 1.181391 0.170050 0.129556 4.608978 \n",
"min 5.000000 0.100000 0.000000 0.800000 \n",
"25% 6.400000 0.230000 0.240000 1.800000 \n",
"50% 6.900000 0.280000 0.300000 2.800000 \n",
"75% 7.500000 0.400000 0.370000 7.500000 \n",
"max 13.000000 0.900000 0.740000 22.000000 \n",
"\n",
" chlorides free_sulfur_dioxide total_sulfur_dioxide density \\\n",
"count 325.000000 325.000000 325.000000 325.000000 \n",
"mean 0.054222 29.773846 113.283077 0.994568 \n",
"std 0.031405 15.822670 55.072566 0.002895 \n",
"min 0.019000 3.000000 9.000000 0.988190 \n",
"25% 0.037000 17.000000 74.000000 0.992400 \n",
"50% 0.048000 29.000000 115.000000 0.994800 \n",
"75% 0.062000 41.000000 151.000000 0.996750 \n",
"max 0.415000 67.000000 253.000000 1.002890 \n",
"\n",
" pH sulphates alcohol quality color \n",
"count 325.000000 325.000000 325.000000 325.000000 325.000000 \n",
"mean 3.222246 0.527754 10.488564 5.815385 0.753846 \n",
"std 0.159630 0.144550 1.172682 0.855128 0.431433 \n",
"min 2.860000 0.260000 8.500000 3.000000 0.000000 \n",
"25% 3.110000 0.420000 9.500000 5.000000 1.000000 \n",
"50% 3.210000 0.500000 10.300000 6.000000 1.000000 \n",
"75% 3.320000 0.600000 11.300000 6.000000 1.000000 \n",
"max 3.680000 1.170000 14.000000 9.000000 1.000000 "
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_test.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" fixed_acidity | \n",
" volatile_acidity | \n",
" citric_acid | \n",
" residual_sugar | \n",
" chlorides | \n",
" free_sulfur_dioxide | \n",
" total_sulfur_dioxide | \n",
" density | \n",
" pH | \n",
" sulphates | \n",
" alcohol | \n",
" quality | \n",
" color | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
" 5847.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 7.216179 | \n",
" 0.339796 | \n",
" 0.319111 | \n",
" 5.417402 | \n",
" 0.056310 | \n",
" 30.535403 | \n",
" 115.673508 | \n",
" 0.994682 | \n",
" 3.218303 | \n",
" 0.531596 | \n",
" 10.494455 | \n",
" 5.820592 | \n",
" 0.753891 | \n",
"
\n",
" \n",
" std | \n",
" 1.299695 | \n",
" 0.164817 | \n",
" 0.146141 | \n",
" 4.736399 | \n",
" 0.035816 | \n",
" 17.845522 | \n",
" 56.432512 | \n",
" 0.002995 | \n",
" 0.159919 | \n",
" 0.149728 | \n",
" 1.189801 | \n",
" 0.872353 | \n",
" 0.430780 | \n",
"
\n",
" \n",
" min | \n",
" 3.800000 | \n",
" 0.080000 | \n",
" 0.000000 | \n",
" 0.600000 | \n",
" 0.009000 | \n",
" 1.000000 | \n",
" 6.000000 | \n",
" 0.987110 | \n",
" 2.720000 | \n",
" 0.220000 | \n",
" 8.000000 | \n",
" 3.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 6.400000 | \n",
" 0.230000 | \n",
" 0.250000 | \n",
" 1.800000 | \n",
" 0.038000 | \n",
" 17.000000 | \n",
" 77.500000 | \n",
" 0.992300 | \n",
" 3.110000 | \n",
" 0.430000 | \n",
" 9.500000 | \n",
" 5.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 7.000000 | \n",
" 0.290000 | \n",
" 0.310000 | \n",
" 3.000000 | \n",
" 0.047000 | \n",
" 29.000000 | \n",
" 118.000000 | \n",
" 0.994840 | \n",
" 3.210000 | \n",
" 0.510000 | \n",
" 10.300000 | \n",
" 6.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 7.700000 | \n",
" 0.400000 | \n",
" 0.390000 | \n",
" 8.100000 | \n",
" 0.065000 | \n",
" 41.000000 | \n",
" 155.500000 | \n",
" 0.996985 | \n",
" 3.320000 | \n",
" 0.600000 | \n",
" 11.300000 | \n",
" 6.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" max | \n",
" 15.900000 | \n",
" 1.580000 | \n",
" 1.660000 | \n",
" 65.800000 | \n",
" 0.611000 | \n",
" 289.000000 | \n",
" 440.000000 | \n",
" 1.038980 | \n",
" 4.010000 | \n",
" 2.000000 | \n",
" 14.900000 | \n",
" 9.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" fixed_acidity volatile_acidity citric_acid residual_sugar \\\n",
"count 5847.000000 5847.000000 5847.000000 5847.000000 \n",
"mean 7.216179 0.339796 0.319111 5.417402 \n",
"std 1.299695 0.164817 0.146141 4.736399 \n",
"min 3.800000 0.080000 0.000000 0.600000 \n",
"25% 6.400000 0.230000 0.250000 1.800000 \n",
"50% 7.000000 0.290000 0.310000 3.000000 \n",
"75% 7.700000 0.400000 0.390000 8.100000 \n",
"max 15.900000 1.580000 1.660000 65.800000 \n",
"\n",
" chlorides free_sulfur_dioxide total_sulfur_dioxide density \\\n",
"count 5847.000000 5847.000000 5847.000000 5847.000000 \n",
"mean 0.056310 30.535403 115.673508 0.994682 \n",
"std 0.035816 17.845522 56.432512 0.002995 \n",
"min 0.009000 1.000000 6.000000 0.987110 \n",
"25% 0.038000 17.000000 77.500000 0.992300 \n",
"50% 0.047000 29.000000 118.000000 0.994840 \n",
"75% 0.065000 41.000000 155.500000 0.996985 \n",
"max 0.611000 289.000000 440.000000 1.038980 \n",
"\n",
" pH sulphates alcohol quality color \n",
"count 5847.000000 5847.000000 5847.000000 5847.000000 5847.000000 \n",
"mean 3.218303 0.531596 10.494455 5.820592 0.753891 \n",
"std 0.159919 0.149728 1.189801 0.872353 0.430780 \n",
"min 2.720000 0.220000 8.000000 3.000000 0.000000 \n",
"25% 3.110000 0.430000 9.500000 5.000000 1.000000 \n",
"50% 3.210000 0.510000 10.300000 6.000000 1.000000 \n",
"75% 3.320000 0.600000 11.300000 6.000000 1.000000 \n",
"max 4.010000 2.000000 14.900000 9.000000 1.000000 "
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_train.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" fixed_acidity | \n",
" volatile_acidity | \n",
" citric_acid | \n",
" residual_sugar | \n",
" chlorides | \n",
" free_sulfur_dioxide | \n",
" total_sulfur_dioxide | \n",
" density | \n",
" pH | \n",
" sulphates | \n",
" alcohol | \n",
" quality | \n",
" color | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
" 325.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 7.287846 | \n",
" 0.334031 | \n",
" 0.328831 | \n",
" 6.153692 | \n",
" 0.052874 | \n",
" 31.095385 | \n",
" 119.484615 | \n",
" 0.995091 | \n",
" 3.218308 | \n",
" 0.528892 | \n",
" 10.447282 | \n",
" 5.781538 | \n",
" 0.753846 | \n",
"
\n",
" \n",
" std | \n",
" 1.345471 | \n",
" 0.156023 | \n",
" 0.144192 | \n",
" 5.220944 | \n",
" 0.021471 | \n",
" 17.861741 | \n",
" 59.481580 | \n",
" 0.003150 | \n",
" 0.177176 | \n",
" 0.136171 | \n",
" 1.265593 | \n",
" 0.908617 | \n",
" 0.431433 | \n",
"
\n",
" \n",
" min | \n",
" 4.700000 | \n",
" 0.090000 | \n",
" 0.000000 | \n",
" 0.800000 | \n",
" 0.012000 | \n",
" 3.000000 | \n",
" 8.000000 | \n",
" 0.987460 | \n",
" 2.870000 | \n",
" 0.280000 | \n",
" 8.400000 | \n",
" 3.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 6.400000 | \n",
" 0.230000 | \n",
" 0.260000 | \n",
" 2.000000 | \n",
" 0.039000 | \n",
" 16.000000 | \n",
" 79.000000 | \n",
" 0.992700 | \n",
" 3.100000 | \n",
" 0.430000 | \n",
" 9.400000 | \n",
" 5.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 7.100000 | \n",
" 0.290000 | \n",
" 0.310000 | \n",
" 4.550000 | \n",
" 0.048000 | \n",
" 29.000000 | \n",
" 125.000000 | \n",
" 0.995320 | \n",
" 3.210000 | \n",
" 0.500000 | \n",
" 10.200000 | \n",
" 6.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 7.800000 | \n",
" 0.400000 | \n",
" 0.400000 | \n",
" 8.800000 | \n",
" 0.060000 | \n",
" 45.000000 | \n",
" 163.000000 | \n",
" 0.997450 | \n",
" 3.320000 | \n",
" 0.610000 | \n",
" 11.300000 | \n",
" 6.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" max | \n",
" 15.000000 | \n",
" 1.180000 | \n",
" 0.740000 | \n",
" 31.600000 | \n",
" 0.170000 | \n",
" 77.000000 | \n",
" 251.000000 | \n",
" 1.010300 | \n",
" 4.010000 | \n",
" 1.140000 | \n",
" 14.000000 | \n",
" 8.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" fixed_acidity volatile_acidity citric_acid residual_sugar \\\n",
"count 325.000000 325.000000 325.000000 325.000000 \n",
"mean 7.287846 0.334031 0.328831 6.153692 \n",
"std 1.345471 0.156023 0.144192 5.220944 \n",
"min 4.700000 0.090000 0.000000 0.800000 \n",
"25% 6.400000 0.230000 0.260000 2.000000 \n",
"50% 7.100000 0.290000 0.310000 4.550000 \n",
"75% 7.800000 0.400000 0.400000 8.800000 \n",
"max 15.000000 1.180000 0.740000 31.600000 \n",
"\n",
" chlorides free_sulfur_dioxide total_sulfur_dioxide density \\\n",
"count 325.000000 325.000000 325.000000 325.000000 \n",
"mean 0.052874 31.095385 119.484615 0.995091 \n",
"std 0.021471 17.861741 59.481580 0.003150 \n",
"min 0.012000 3.000000 8.000000 0.987460 \n",
"25% 0.039000 16.000000 79.000000 0.992700 \n",
"50% 0.048000 29.000000 125.000000 0.995320 \n",
"75% 0.060000 45.000000 163.000000 0.997450 \n",
"max 0.170000 77.000000 251.000000 1.010300 \n",
"\n",
" pH sulphates alcohol quality color \n",
"count 325.000000 325.000000 325.000000 325.000000 325.000000 \n",
"mean 3.218308 0.528892 10.447282 5.781538 0.753846 \n",
"std 0.177176 0.136171 1.265593 0.908617 0.431433 \n",
"min 2.870000 0.280000 8.400000 3.000000 0.000000 \n",
"25% 3.100000 0.430000 9.400000 5.000000 1.000000 \n",
"50% 3.210000 0.500000 10.200000 6.000000 1.000000 \n",
"75% 3.320000 0.610000 11.300000 6.000000 1.000000 \n",
"max 4.010000 1.140000 14.000000 8.000000 1.000000 "
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wine_val.describe()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}