{ "cells": [ { "cell_type": "markdown", "id": "b8487a98-1b51-46f1-b727-719575945544", "metadata": { "tags": [] }, "source": [ "### Pobieranie zbioru i pakietów" ] }, { "cell_type": "code", "execution_count": 1, "id": "800bc7a7-aa60-4db8-b170-a5a7340520aa", "metadata": { "ExecuteTime": { "start_time": "2024-03-24T15:19:23.899243Z", "end_time": "2024-03-24T15:19:50.743948Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting kaggle\n", " Downloading kaggle-1.6.6.tar.gz (84 kB)\n", " ---------------------------------------- 84.6/84.6 kB 2.4 MB/s eta 0:00:00\n", " Preparing metadata (setup.py): started\n", " Preparing metadata (setup.py): finished with status 'done'\n", "Requirement already satisfied: six>=1.10 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from kaggle) (1.16.0)\n", "Requirement already satisfied: certifi in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from kaggle) (2022.12.7)\n", "Requirement already satisfied: python-dateutil in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from kaggle) (2.8.2)\n", "Requirement already satisfied: requests in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from kaggle) (2.28.1)\n", "Requirement already satisfied: tqdm in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from kaggle) (4.64.1)\n", "Requirement already satisfied: python-slugify in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from kaggle) (5.0.2)\n", "Requirement already satisfied: urllib3 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from kaggle) (1.26.14)\n", "Requirement already satisfied: bleach in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from kaggle) (4.1.0)\n", "Requirement already satisfied: webencodings in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from bleach->kaggle) (0.5.1)\n", "Requirement already satisfied: packaging in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from bleach->kaggle) (22.0)\n", "Requirement already satisfied: text-unidecode>=1.3 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n", "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from requests->kaggle) (3.4)\n", "Requirement already satisfied: charset-normalizer<3,>=2 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from requests->kaggle) (2.0.4)\n", "Requirement already satisfied: colorama in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from tqdm->kaggle) (0.4.6)\n", "Building wheels for collected packages: kaggle\n", " Building wheel for kaggle (setup.py): started\n", " Building wheel for kaggle (setup.py): finished with status 'done'\n", " Created wheel for kaggle: filename=kaggle-1.6.6-py3-none-any.whl size=111955 sha256=23592736409344e3027e92f5ac103680cd5efb348835a123a68118e729e02b66\n", " Stored in directory: c:\\users\\adamw\\appdata\\local\\pip\\cache\\wheels\\54\\6e\\ff\\d5ab6af2287a2d0c5b8cea9328fb14940ca253fe60214a99c8\n", "Successfully built kaggle\n", "Installing collected packages: kaggle\n", "Successfully installed kaggle-1.6.6\n", "Note: you may need to restart the kernel to use updated packages.\n", "Requirement already satisfied: pandas in c:\\users\\adamw\\anaconda3\\lib\\site-packages (1.5.3)\n", "Requirement already satisfied: pytz>=2020.1 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from pandas) (2022.7)\n", "Requirement already satisfied: numpy>=1.21.0 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from pandas) (1.23.5)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from pandas) (2.8.2)\n", "Requirement already satisfied: six>=1.5 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n", "Note: you may need to restart the kernel to use updated packages.\n", "Requirement already satisfied: numpy in c:\\users\\adamw\\anaconda3\\lib\\site-packages (1.23.5)\n", "Note: you may need to restart the kernel to use updated packages.\n", "Requirement already satisfied: scikit-learn in c:\\users\\adamw\\anaconda3\\lib\\site-packages (1.2.1)\n", "Requirement already satisfied: numpy>=1.17.3 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from scikit-learn) (1.23.5)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from scikit-learn) (2.2.0)\n", "Requirement already satisfied: joblib>=1.1.1 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from scikit-learn) (1.1.1)\n", "Requirement already satisfied: scipy>=1.3.2 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from scikit-learn) (1.10.0)\n", "Note: you may need to restart the kernel to use updated packages.\n", "Requirement already satisfied: seaborn in c:\\users\\adamw\\anaconda3\\lib\\site-packages (0.12.2)\n", "Requirement already satisfied: numpy!=1.24.0,>=1.17 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from seaborn) (1.23.5)\n", "Requirement already satisfied: matplotlib!=3.6.1,>=3.1 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from seaborn) (3.7.0)\n", "Requirement already satisfied: pandas>=0.25 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from seaborn) (1.5.3)\n", "Requirement already satisfied: pyparsing>=2.3.1 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (3.0.9)\n", "Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (1.0.5)\n", "Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (4.25.0)\n", "Requirement already satisfied: pillow>=6.2.0 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (9.4.0)\n", "Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (2.8.2)\n", "Requirement already satisfied: packaging>=20.0 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (22.0)\n", "Requirement already satisfied: cycler>=0.10 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (0.11.0)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (1.4.4)\n", "Requirement already satisfied: pytz>=2020.1 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from pandas>=0.25->seaborn) (2022.7)\n", "Requirement already satisfied: six>=1.5 in c:\\users\\adamw\\anaconda3\\lib\\site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.1->seaborn) (1.16.0)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "%pip install kaggle\n", "%pip install pandas\n", "%pip install numpy\n", "%pip install scikit-learn\n", "%pip install seaborn" ] }, { "cell_type": "code", "execution_count": 3, "id": "f132ca66-2325-48e0-8bf8-ff983d8ad1ce", "metadata": { "ExecuteTime": { "start_time": "2024-03-24T15:21:05.151558Z", "end_time": "2024-03-24T15:21:12.115431Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading 1-5-million-beer-reviews-from-beer-advocate.zip to C:\\Users\\adamw\\REPOS\\ium_464979\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", " 0%| | 0.00/32.5M [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexbrewery_idbrewery_namereview_timereview_overallreview_aromareview_appearancereview_profilenamebeer_stylereview_palatereview_tastebeer_namebeer_abvbeer_beerid
0010325Vecchio Birraio12348178231.5000002.0000002.500000stculesHefeweizen1.5000001.500000Sausa Weizen5.00000047986
1110325Vecchio Birraio12359150973.0000002.5000003.000000stculesEnglish Strong Ale3.0000003.000000Red Moon6.20000048213
2210325Vecchio Birraio12359166043.0000002.5000003.000000stculesForeign / Export Stout3.0000003.000000Black Horse Black Beer6.50000048215
3310325Vecchio Birraio12347251453.0000003.0000003.500000stculesGerman Pilsener2.5000003.000000Sausa Pils5.00000047969
441075Caldera Brewing Company12937352064.0000004.5000004.000000johnmichaelsenAmerican Double / Imperial IPA4.0000004.500000Cauldron DIPA7.70000064883
\n", "" ], "text/plain": [ " index brewery_id brewery_name review_time review_overall \\\n", "0 0 10325 Vecchio Birraio 1234817823 1.500000 \n", "1 1 10325 Vecchio Birraio 1235915097 3.000000 \n", "2 2 10325 Vecchio Birraio 1235916604 3.000000 \n", "3 3 10325 Vecchio Birraio 1234725145 3.000000 \n", "4 4 1075 Caldera Brewing Company 1293735206 4.000000 \n", "\n", " review_aroma review_appearance review_profilename \\\n", "0 2.000000 2.500000 stcules \n", "1 2.500000 3.000000 stcules \n", "2 2.500000 3.000000 stcules \n", "3 3.000000 3.500000 stcules \n", "4 4.500000 4.000000 johnmichaelsen \n", "\n", " beer_style review_palate review_taste \\\n", "0 Hefeweizen 1.500000 1.500000 \n", "1 English Strong Ale 3.000000 3.000000 \n", "2 Foreign / Export Stout 3.000000 3.000000 \n", "3 German Pilsener 2.500000 3.000000 \n", "4 American Double / Imperial IPA 4.000000 4.500000 \n", "\n", " beer_name beer_abv beer_beerid \n", "0 Sausa Weizen 5.000000 47986 \n", "1 Red Moon 6.200000 48213 \n", "2 Black Horse Black Beer 6.500000 48215 \n", "3 Sausa Pils 5.000000 47969 \n", "4 Cauldron DIPA 7.700000 64883 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "beers=pd.read_csv('beer_reviews.csv')\n", "\n", "beers.head()" ] }, { "cell_type": "code", "execution_count": 9, "id": "f54a599d-9cee-4b1f-9be1-c7bad6129760", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 1586614 entries, 0 to 1586613\n", "Data columns (total 14 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 index 1586614 non-null int64 \n", " 1 brewery_id 1586614 non-null int64 \n", " 2 brewery_name 1586599 non-null object \n", " 3 review_time 1586614 non-null int64 \n", " 4 review_overall 1586614 non-null float64\n", " 5 review_aroma 1586614 non-null float64\n", " 6 review_appearance 1586614 non-null float64\n", " 7 review_profilename 1586266 non-null object \n", " 8 beer_style 1586614 non-null object \n", " 9 review_palate 1586614 non-null float64\n", " 10 review_taste 1586614 non-null float64\n", " 11 beer_name 1586614 non-null object \n", " 12 beer_abv 1518829 non-null float64\n", " 13 beer_beerid 1586614 non-null int64 \n", "dtypes: float64(6), int64(4), object(4)\n", "memory usage: 169.5+ MB\n" ] } ], "source": [ "beers.info()" ] }, { "cell_type": "markdown", "id": "81107f1b-bfd2-40ce-b1dd-a98be02c0e9f", "metadata": {}, "source": [ "### Czyszczenie " ] }, { "cell_type": "code", "execution_count": 49, "id": "a1c7ea8b-b9a4-4098-8e31-32ae0cf22075", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "index 0\n", "brewery_id 0\n", "brewery_name 0\n", "review_time 0\n", "review_overall 0\n", "review_aroma 0\n", "review_appearance 0\n", "review_profilename 0\n", "beer_style 0\n", "review_palate 0\n", "review_taste 0\n", "beer_name 0\n", "beer_abv 0\n", "beer_beerid 0\n", "dtype: int64" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "beers.dropna(subset=['brewery_name'], inplace=True)\n", "beers.dropna(subset=['review_profilename'], inplace=True)\n", "beers.dropna(subset=['beer_abv'], inplace=True)\n", "\n", "beers.isnull().sum()" ] }, { "cell_type": "markdown", "id": "7e79db21-7b02-4f76-972f-da3092a0d22c", "metadata": {}, "source": [ "### Normalizacja" ] }, { "cell_type": "code", "execution_count": 22, "id": "e83dd914-b8cf-4e72-a9ea-f4e7f2f63791", "metadata": {}, "outputs": [], "source": [ "scaler = MinMaxScaler()\n", "\n", "beers[['review_overall', 'review_aroma', 'review_appearance', 'review_palate', 'review_taste', 'beer_abv', 'beer_beerid']] = scaler.fit_transform(beers[['review_overall', 'review_aroma', 'review_appearance', 'review_palate', 'review_taste', 'beer_abv', 'beer_beerid']])" ] }, { "cell_type": "markdown", "id": "46e10b78-fb00-4f7e-9c40-7ee08ebeeffe", "metadata": {}, "source": [ "### Podział na podzbiory" ] }, { "cell_type": "code", "execution_count": 24, "id": "e5f6f028-dfcb-4cc5-9bd1-bc9bd51c0a31", "metadata": {}, "outputs": [], "source": [ "beers_train, beers_dev_test = train_test_split(beers, test_size=0.2, random_state=1234)\n", "beers_dev, beers_test = train_test_split(beers_dev_test, test_size=0.5, random_state=1234)" ] }, { "cell_type": "code", "execution_count": 25, "id": "c9feafcc-3591-4d7b-8282-f0f2e2ebd782", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Liczba kolumn w każdym zbiorze: 14 kolumn\n", "Całość: 1518478 rekordów \n", "Train: 1214782 rekordów\n", "Dev: 151848 rekordów\n", "Test: 151848 rekordów\n" ] } ], "source": [ "print(f\"Liczba kolumn w każdym zbiorze: {beers.shape[1]} kolumn\")\n", "print(f\"Całość: {beers.shape[0]} rekordów \")\n", "print(f\"Train: {beers_train.shape[0]} rekordów\")\n", "print(f\"Dev: {beers_dev.shape[0]} rekordów\")\n", "print(f\"Test: {beers_test.shape[0]} rekordów\")" ] }, { "cell_type": "markdown", "id": "c811f83b-351e-45c0-bb0f-c1cf68afd669", "metadata": {}, "source": [ "### Przegląd danych" ] }, { "cell_type": "code", "execution_count": 51, "id": "75ffb6e4-3780-4e5f-b151-4b2929237e2a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Suma różnych piw: 44075\n", "Suma różnych styli: 104\n", "Suma różnych browarów: 5155\n" ] } ], "source": [ "print(f\"Suma różnych piw: {beers['beer_name'].nunique()}\")\n", "print(f\"Suma różnych styli: {beers['beer_style'].nunique()}\")\n", "print(f\"Suma różnych browarów: {beers['brewery_name'].nunique()}\")" ] }, { "cell_type": "code", "execution_count": 76, "id": "02571b1b-471c-4339-8422-c8fc27ce6055", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "style_counts = beers['beer_style'].value_counts()\n", "\n", "top_15_styles = style_counts.head(15) \n", "\n", "plt.bar(top_15_styles.index, top_15_styles.values)\n", "plt.xlabel('Styl')\n", "plt.ylabel('Liczba piw')\n", "plt.title('Ilość piw dla naliczniejszych styli')\n", "plt.xticks(rotation=90)\n", "plt.tight_layout()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 91, "id": "0f1a2572-db91-4d8f-ad73-69327e60a606", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
review_overallLiczba opini
beer_name
90 Minute IPA0.8290973289
Old Rasputin Russian Imperial Stout0.8348233110
Sierra Nevada Celebration Ale0.8337112999
India Pale Ale0.7707772960
Two Hearted Ale0.8660432727
\n", "
" ], "text/plain": [ " review_overall Liczba opini\n", "beer_name \n", "90 Minute IPA 0.829097 3289\n", "Old Rasputin Russian Imperial Stout 0.834823 3110\n", "Sierra Nevada Celebration Ale 0.833711 2999\n", "India Pale Ale 0.770777 2960\n", "Two Hearted Ale 0.866043 2727" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reviews = pd.DataFrame(beers.groupby('beer_name')['review_overall'].mean())\n", "reviews['Liczba opini'] = pd.DataFrame(beers.groupby('beer_name')['review_overall'].count())\n", "reviews = reviews.sort_values(by=['Liczba opini'], ascending=False)\n", "reviews.head()" ] }, { "cell_type": "code", "execution_count": 32, "id": "20444c91-b0be-44c8-ba99-b24290a054a0", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
review_overallreview_aromareview_appearancereview_palatereview_tastebeer_abvbeer_beerid
count1518478.0001518478.0001518478.0001518478.0001518478.0001518478.0001518478.000
mean0.7650.6870.7700.6880.7010.1220.277
std0.1430.1740.1230.1700.1820.0400.282
min0.0000.0000.0000.0000.0000.0000.000
25%0.7000.6250.7000.6250.6250.0900.021
50%0.8000.7500.8000.7500.7500.1120.166
75%0.9000.7500.8000.7500.8750.1470.507
max1.0001.0001.0001.0001.0001.0001.000
\n", "
" ], "text/plain": [ " review_overall review_aroma review_appearance review_palate \\\n", "count 1518478.000 1518478.000 1518478.000 1518478.000 \n", "mean 0.765 0.687 0.770 0.688 \n", "std 0.143 0.174 0.123 0.170 \n", "min 0.000 0.000 0.000 0.000 \n", "25% 0.700 0.625 0.700 0.625 \n", "50% 0.800 0.750 0.800 0.750 \n", "75% 0.900 0.750 0.800 0.750 \n", "max 1.000 1.000 1.000 1.000 \n", "\n", " review_taste beer_abv beer_beerid \n", "count 1518478.000 1518478.000 1518478.000 \n", "mean 0.701 0.122 0.277 \n", "std 0.182 0.040 0.282 \n", "min 0.000 0.000 0.000 \n", "25% 0.625 0.090 0.021 \n", "50% 0.750 0.112 0.166 \n", "75% 0.875 0.147 0.507 \n", "max 1.000 1.000 1.000 " ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "beers[['review_overall', 'review_aroma', 'review_appearance', 'review_palate', 'review_taste', 'beer_abv', 'beer_beerid']].describe().applymap(lambda x: f\"{x:0.3f}\")" ] }, { "cell_type": "code", "execution_count": 33, "id": "98febfcb-f801-4fed-88c8-2c188cae111c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
review_overallreview_aromareview_appearancereview_palatereview_tastebeer_abvbeer_beerid
count1214782.01214782.01214782.01214782.01214782.01214782.01214782.0
mean0.80.70.80.70.70.10.3
std0.10.20.10.20.20.00.3
min0.00.00.00.00.00.00.0
25%0.70.60.70.60.60.10.0
50%0.80.80.80.80.80.10.2
75%0.90.80.80.80.90.10.5
max1.01.01.01.01.01.01.0
\n", "
" ], "text/plain": [ " review_overall review_aroma review_appearance review_palate \\\n", "count 1214782.0 1214782.0 1214782.0 1214782.0 \n", "mean 0.8 0.7 0.8 0.7 \n", "std 0.1 0.2 0.1 0.2 \n", "min 0.0 0.0 0.0 0.0 \n", "25% 0.7 0.6 0.7 0.6 \n", "50% 0.8 0.8 0.8 0.8 \n", "75% 0.9 0.8 0.8 0.8 \n", "max 1.0 1.0 1.0 1.0 \n", "\n", " review_taste beer_abv beer_beerid \n", "count 1214782.0 1214782.0 1214782.0 \n", "mean 0.7 0.1 0.3 \n", "std 0.2 0.0 0.3 \n", "min 0.0 0.0 0.0 \n", "25% 0.6 0.1 0.0 \n", "50% 0.8 0.1 0.2 \n", "75% 0.9 0.1 0.5 \n", "max 1.0 1.0 1.0 " ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "beers_train[['review_overall', 'review_aroma', 'review_appearance', 'review_palate', 'review_taste', 'beer_abv', 'beer_beerid']].describe().applymap(lambda x: f\"{x:0.1f}\")" ] }, { "cell_type": "code", "execution_count": 34, "id": "9b675fc2-42d8-4d3a-b6b9-3b35a0b8ab08", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
review_overallreview_aromareview_appearancereview_palatereview_tastebeer_abvbeer_beerid
count151848.0151848.0151848.0151848.0151848.0151848.0151848.0
mean0.80.70.80.70.70.10.3
std0.10.20.10.20.20.00.3
min0.00.00.00.00.00.00.0
25%0.70.60.70.60.60.10.0
50%0.80.80.80.80.80.10.2
75%0.90.80.80.80.90.10.5
max1.01.01.01.01.00.71.0
\n", "
" ], "text/plain": [ " review_overall review_aroma review_appearance review_palate \\\n", "count 151848.0 151848.0 151848.0 151848.0 \n", "mean 0.8 0.7 0.8 0.7 \n", "std 0.1 0.2 0.1 0.2 \n", "min 0.0 0.0 0.0 0.0 \n", "25% 0.7 0.6 0.7 0.6 \n", "50% 0.8 0.8 0.8 0.8 \n", "75% 0.9 0.8 0.8 0.8 \n", "max 1.0 1.0 1.0 1.0 \n", "\n", " review_taste beer_abv beer_beerid \n", "count 151848.0 151848.0 151848.0 \n", "mean 0.7 0.1 0.3 \n", "std 0.2 0.0 0.3 \n", "min 0.0 0.0 0.0 \n", "25% 0.6 0.1 0.0 \n", "50% 0.8 0.1 0.2 \n", "75% 0.9 0.1 0.5 \n", "max 1.0 0.7 1.0 " ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "beers_dev[['review_overall', 'review_aroma', 'review_appearance', 'review_palate', 'review_taste', 'beer_abv', 'beer_beerid']].describe().applymap(lambda x: f\"{x:0.1f}\")" ] }, { "cell_type": "code", "execution_count": 35, "id": "fa018c6f-4093-414a-aef3-48cedb1d82d2", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
review_overallreview_aromareview_appearancereview_palatereview_tastebeer_abvbeer_beerid
count151848.0151848.0151848.0151848.0151848.0151848.0151848.0
mean0.80.70.80.70.70.10.3
std0.10.20.10.20.20.00.3
min0.00.00.00.00.00.00.0
25%0.70.60.70.60.60.10.0
50%0.80.80.80.80.80.10.2
75%0.90.80.80.80.90.10.5
max1.01.01.01.01.00.71.0
\n", "
" ], "text/plain": [ " review_overall review_aroma review_appearance review_palate \\\n", "count 151848.0 151848.0 151848.0 151848.0 \n", "mean 0.8 0.7 0.8 0.7 \n", "std 0.1 0.2 0.1 0.2 \n", "min 0.0 0.0 0.0 0.0 \n", "25% 0.7 0.6 0.7 0.6 \n", "50% 0.8 0.8 0.8 0.8 \n", "75% 0.9 0.8 0.8 0.8 \n", "max 1.0 1.0 1.0 1.0 \n", "\n", " review_taste beer_abv beer_beerid \n", "count 151848.0 151848.0 151848.0 \n", "mean 0.7 0.1 0.3 \n", "std 0.2 0.0 0.3 \n", "min 0.0 0.0 0.0 \n", "25% 0.6 0.1 0.0 \n", "50% 0.8 0.1 0.2 \n", "75% 0.9 0.1 0.5 \n", "max 1.0 0.7 1.0 " ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "beers_test[['review_overall', 'review_aroma', 'review_appearance', 'review_palate', 'review_taste', 'beer_abv', 'beer_beerid']].describe().applymap(lambda x: f\"{x:0.1f}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "8ce7d432-8d8a-40b0-a247-300f9a39ad44", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2" } }, "nbformat": 4, "nbformat_minor": 5 }