1249 lines
107 KiB
Plaintext
1249 lines
107 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "b8487a98-1b51-46f1-b727-719575945544",
|
||
|
"metadata": {
|
||
|
"tags": []
|
||
|
},
|
||
|
"source": [
|
||
|
"### Pobieranie zbioru i pakietów"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 38,
|
||
|
"id": "800bc7a7-aa60-4db8-b170-a5a7340520aa",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Defaulting to user installation because normal site-packages is not writeable\n",
|
||
|
"Requirement already satisfied: kaggle in /home/students/s464979/.local/lib/python3.9/site-packages (1.6.6)\n",
|
||
|
"Requirement already satisfied: bleach in /usr/local/lib/python3.9/dist-packages (from kaggle) (5.0.1)\n",
|
||
|
"Requirement already satisfied: certifi in /usr/local/lib/python3.9/dist-packages (from kaggle) (2022.9.14)\n",
|
||
|
"Requirement already satisfied: python-dateutil in /usr/local/lib/python3.9/dist-packages (from kaggle) (2.8.2)\n",
|
||
|
"Requirement already satisfied: python-slugify in /home/students/s464979/.local/lib/python3.9/site-packages (from kaggle) (8.0.4)\n",
|
||
|
"Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from kaggle) (2.28.1)\n",
|
||
|
"Requirement already satisfied: six>=1.10 in /usr/lib/python3/dist-packages (from kaggle) (1.16.0)\n",
|
||
|
"Requirement already satisfied: tqdm in /usr/local/lib/python3.9/dist-packages (from kaggle) (4.64.1)\n",
|
||
|
"Requirement already satisfied: urllib3 in /usr/local/lib/python3.9/dist-packages (from kaggle) (1.26.12)\n",
|
||
|
"Requirement already satisfied: webencodings in /usr/local/lib/python3.9/dist-packages (from bleach->kaggle) (0.5.1)\n",
|
||
|
"Requirement already satisfied: text-unidecode>=1.3 in /home/students/s464979/.local/lib/python3.9/site-packages (from python-slugify->kaggle) (1.3)\n",
|
||
|
"Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.9/dist-packages (from requests->kaggle) (2.1.1)\n",
|
||
|
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->kaggle) (3.4)\n",
|
||
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
||
|
"Defaulting to user installation because normal site-packages is not writeable\n",
|
||
|
"Requirement already satisfied: pandas in /usr/local/lib/python3.9/dist-packages (1.3.5)\n",
|
||
|
"Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.9/dist-packages (from pandas) (2.8.2)\n",
|
||
|
"Requirement already satisfied: pytz>=2017.3 in /usr/lib/python3/dist-packages (from pandas) (2021.1)\n",
|
||
|
"Requirement already satisfied: numpy>=1.17.3 in /home/students/s464979/.local/lib/python3.9/site-packages (from pandas) (1.26.4)\n",
|
||
|
"Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas) (1.16.0)\n",
|
||
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
||
|
"Defaulting to user installation because normal site-packages is not writeable\n",
|
||
|
"Requirement already satisfied: numpy in /home/students/s464979/.local/lib/python3.9/site-packages (1.26.4)\n",
|
||
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
||
|
"Defaulting to user installation because normal site-packages is not writeable\n",
|
||
|
"Requirement already satisfied: scikit-learn in /usr/lib/python3/dist-packages (0.23.2)\n",
|
||
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
||
|
"Defaulting to user installation because normal site-packages is not writeable\n",
|
||
|
"Requirement already satisfied: seaborn in /usr/local/lib/python3.9/dist-packages (0.12.0)\n",
|
||
|
"Requirement already satisfied: numpy>=1.17 in /home/students/s464979/.local/lib/python3.9/site-packages (from seaborn) (1.26.4)\n",
|
||
|
"Requirement already satisfied: pandas>=0.25 in /usr/local/lib/python3.9/dist-packages (from seaborn) (1.3.5)\n",
|
||
|
"Requirement already satisfied: matplotlib>=3.1 in /usr/local/lib/python3.9/dist-packages (from seaborn) (3.6.2)\n",
|
||
|
"Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib>=3.1->seaborn) (1.0.6)\n",
|
||
|
"Requirement already satisfied: cycler>=0.10 in /usr/lib/python3/dist-packages (from matplotlib>=3.1->seaborn) (0.10.0)\n",
|
||
|
"Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib>=3.1->seaborn) (4.38.0)\n",
|
||
|
"Requirement already satisfied: kiwisolver>=1.0.1 in /usr/lib/python3/dist-packages (from matplotlib>=3.1->seaborn) (1.3.1)\n",
|
||
|
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib>=3.1->seaborn) (21.3)\n",
|
||
|
"Requirement already satisfied: pillow>=6.2.0 in /home/students/s464979/.local/lib/python3.9/site-packages (from matplotlib>=3.1->seaborn) (10.2.0)\n",
|
||
|
"Requirement already satisfied: pyparsing>=2.2.1 in /usr/lib/python3/dist-packages (from matplotlib>=3.1->seaborn) (2.4.7)\n",
|
||
|
"Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.9/dist-packages (from matplotlib>=3.1->seaborn) (2.8.2)\n",
|
||
|
"Requirement already satisfied: pytz>=2017.3 in /usr/lib/python3/dist-packages (from pandas>=0.25->seaborn) (2021.1)\n",
|
||
|
"Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7->matplotlib>=3.1->seaborn) (1.16.0)\n",
|
||
|
"Note: you may need to restart the kernel to use updated packages.\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"%pip install kaggle\n",
|
||
|
"%pip install pandas\n",
|
||
|
"%pip install numpy\n",
|
||
|
"%pip install scikit-learn\n",
|
||
|
"%pip install seaborn"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "f132ca66-2325-48e0-8bf8-ff983d8ad1ce",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"!kaggle datasets download -d thedevastator/1-5-million-beer-reviews-from-beer-advocate"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "cd162ce4-9ca6-4631-86fc-55aa3704a1fb",
|
||
|
"metadata": {
|
||
|
"tags": []
|
||
|
},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"!kaggle datasets download -d"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "8516dbcf-1628-4059-a212-7bb36641151f",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"!unzip -o 1-5-million-beer-reviews-from-beer-advocate.zip"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 43,
|
||
|
"id": "399983f4-351b-485e-835f-03caf3302743",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import numpy as np\n",
|
||
|
"import pandas as pd\n",
|
||
|
"import seaborn as sns\n",
|
||
|
"from sklearn.model_selection import train_test_split\n",
|
||
|
"import matplotlib.pyplot as plt\n",
|
||
|
"from sklearn.preprocessing import MinMaxScaler\n",
|
||
|
"\n",
|
||
|
"pd.set_option('float_format', '{:f}'.format)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "205c3fcf-fa0e-4d62-92f0-e8c72737e8b3",
|
||
|
"metadata": {
|
||
|
"tags": []
|
||
|
},
|
||
|
"source": [
|
||
|
"## Wczytywanie danych"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 8,
|
||
|
"id": "69561c75-0140-4f3d-93de-391aa344755c",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>index</th>\n",
|
||
|
" <th>brewery_id</th>\n",
|
||
|
" <th>brewery_name</th>\n",
|
||
|
" <th>review_time</th>\n",
|
||
|
" <th>review_overall</th>\n",
|
||
|
" <th>review_aroma</th>\n",
|
||
|
" <th>review_appearance</th>\n",
|
||
|
" <th>review_profilename</th>\n",
|
||
|
" <th>beer_style</th>\n",
|
||
|
" <th>review_palate</th>\n",
|
||
|
" <th>review_taste</th>\n",
|
||
|
" <th>beer_name</th>\n",
|
||
|
" <th>beer_abv</th>\n",
|
||
|
" <th>beer_beerid</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>10325</td>\n",
|
||
|
" <td>Vecchio Birraio</td>\n",
|
||
|
" <td>1234817823</td>\n",
|
||
|
" <td>1.500000</td>\n",
|
||
|
" <td>2.000000</td>\n",
|
||
|
" <td>2.500000</td>\n",
|
||
|
" <td>stcules</td>\n",
|
||
|
" <td>Hefeweizen</td>\n",
|
||
|
" <td>1.500000</td>\n",
|
||
|
" <td>1.500000</td>\n",
|
||
|
" <td>Sausa Weizen</td>\n",
|
||
|
" <td>5.000000</td>\n",
|
||
|
" <td>47986</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>1</td>\n",
|
||
|
" <td>10325</td>\n",
|
||
|
" <td>Vecchio Birraio</td>\n",
|
||
|
" <td>1235915097</td>\n",
|
||
|
" <td>3.000000</td>\n",
|
||
|
" <td>2.500000</td>\n",
|
||
|
" <td>3.000000</td>\n",
|
||
|
" <td>stcules</td>\n",
|
||
|
" <td>English Strong Ale</td>\n",
|
||
|
" <td>3.000000</td>\n",
|
||
|
" <td>3.000000</td>\n",
|
||
|
" <td>Red Moon</td>\n",
|
||
|
" <td>6.200000</td>\n",
|
||
|
" <td>48213</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>2</td>\n",
|
||
|
" <td>10325</td>\n",
|
||
|
" <td>Vecchio Birraio</td>\n",
|
||
|
" <td>1235916604</td>\n",
|
||
|
" <td>3.000000</td>\n",
|
||
|
" <td>2.500000</td>\n",
|
||
|
" <td>3.000000</td>\n",
|
||
|
" <td>stcules</td>\n",
|
||
|
" <td>Foreign / Export Stout</td>\n",
|
||
|
" <td>3.000000</td>\n",
|
||
|
" <td>3.000000</td>\n",
|
||
|
" <td>Black Horse Black Beer</td>\n",
|
||
|
" <td>6.500000</td>\n",
|
||
|
" <td>48215</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>3</td>\n",
|
||
|
" <td>10325</td>\n",
|
||
|
" <td>Vecchio Birraio</td>\n",
|
||
|
" <td>1234725145</td>\n",
|
||
|
" <td>3.000000</td>\n",
|
||
|
" <td>3.000000</td>\n",
|
||
|
" <td>3.500000</td>\n",
|
||
|
" <td>stcules</td>\n",
|
||
|
" <td>German Pilsener</td>\n",
|
||
|
" <td>2.500000</td>\n",
|
||
|
" <td>3.000000</td>\n",
|
||
|
" <td>Sausa Pils</td>\n",
|
||
|
" <td>5.000000</td>\n",
|
||
|
" <td>47969</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>4</td>\n",
|
||
|
" <td>1075</td>\n",
|
||
|
" <td>Caldera Brewing Company</td>\n",
|
||
|
" <td>1293735206</td>\n",
|
||
|
" <td>4.000000</td>\n",
|
||
|
" <td>4.500000</td>\n",
|
||
|
" <td>4.000000</td>\n",
|
||
|
" <td>johnmichaelsen</td>\n",
|
||
|
" <td>American Double / Imperial IPA</td>\n",
|
||
|
" <td>4.000000</td>\n",
|
||
|
" <td>4.500000</td>\n",
|
||
|
" <td>Cauldron DIPA</td>\n",
|
||
|
" <td>7.700000</td>\n",
|
||
|
" <td>64883</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" index brewery_id brewery_name review_time review_overall \\\n",
|
||
|
"0 0 10325 Vecchio Birraio 1234817823 1.500000 \n",
|
||
|
"1 1 10325 Vecchio Birraio 1235915097 3.000000 \n",
|
||
|
"2 2 10325 Vecchio Birraio 1235916604 3.000000 \n",
|
||
|
"3 3 10325 Vecchio Birraio 1234725145 3.000000 \n",
|
||
|
"4 4 1075 Caldera Brewing Company 1293735206 4.000000 \n",
|
||
|
"\n",
|
||
|
" review_aroma review_appearance review_profilename \\\n",
|
||
|
"0 2.000000 2.500000 stcules \n",
|
||
|
"1 2.500000 3.000000 stcules \n",
|
||
|
"2 2.500000 3.000000 stcules \n",
|
||
|
"3 3.000000 3.500000 stcules \n",
|
||
|
"4 4.500000 4.000000 johnmichaelsen \n",
|
||
|
"\n",
|
||
|
" beer_style review_palate review_taste \\\n",
|
||
|
"0 Hefeweizen 1.500000 1.500000 \n",
|
||
|
"1 English Strong Ale 3.000000 3.000000 \n",
|
||
|
"2 Foreign / Export Stout 3.000000 3.000000 \n",
|
||
|
"3 German Pilsener 2.500000 3.000000 \n",
|
||
|
"4 American Double / Imperial IPA 4.000000 4.500000 \n",
|
||
|
"\n",
|
||
|
" beer_name beer_abv beer_beerid \n",
|
||
|
"0 Sausa Weizen 5.000000 47986 \n",
|
||
|
"1 Red Moon 6.200000 48213 \n",
|
||
|
"2 Black Horse Black Beer 6.500000 48215 \n",
|
||
|
"3 Sausa Pils 5.000000 47969 \n",
|
||
|
"4 Cauldron DIPA 7.700000 64883 "
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 8,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"beers=pd.read_csv('beer_reviews.csv')\n",
|
||
|
"\n",
|
||
|
"beers.head()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 9,
|
||
|
"id": "f54a599d-9cee-4b1f-9be1-c7bad6129760",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
|
"RangeIndex: 1586614 entries, 0 to 1586613\n",
|
||
|
"Data columns (total 14 columns):\n",
|
||
|
" # Column Non-Null Count Dtype \n",
|
||
|
"--- ------ -------------- ----- \n",
|
||
|
" 0 index 1586614 non-null int64 \n",
|
||
|
" 1 brewery_id 1586614 non-null int64 \n",
|
||
|
" 2 brewery_name 1586599 non-null object \n",
|
||
|
" 3 review_time 1586614 non-null int64 \n",
|
||
|
" 4 review_overall 1586614 non-null float64\n",
|
||
|
" 5 review_aroma 1586614 non-null float64\n",
|
||
|
" 6 review_appearance 1586614 non-null float64\n",
|
||
|
" 7 review_profilename 1586266 non-null object \n",
|
||
|
" 8 beer_style 1586614 non-null object \n",
|
||
|
" 9 review_palate 1586614 non-null float64\n",
|
||
|
" 10 review_taste 1586614 non-null float64\n",
|
||
|
" 11 beer_name 1586614 non-null object \n",
|
||
|
" 12 beer_abv 1518829 non-null float64\n",
|
||
|
" 13 beer_beerid 1586614 non-null int64 \n",
|
||
|
"dtypes: float64(6), int64(4), object(4)\n",
|
||
|
"memory usage: 169.5+ MB\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"beers.info()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "81107f1b-bfd2-40ce-b1dd-a98be02c0e9f",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Czyszczenie "
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 49,
|
||
|
"id": "a1c7ea8b-b9a4-4098-8e31-32ae0cf22075",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"index 0\n",
|
||
|
"brewery_id 0\n",
|
||
|
"brewery_name 0\n",
|
||
|
"review_time 0\n",
|
||
|
"review_overall 0\n",
|
||
|
"review_aroma 0\n",
|
||
|
"review_appearance 0\n",
|
||
|
"review_profilename 0\n",
|
||
|
"beer_style 0\n",
|
||
|
"review_palate 0\n",
|
||
|
"review_taste 0\n",
|
||
|
"beer_name 0\n",
|
||
|
"beer_abv 0\n",
|
||
|
"beer_beerid 0\n",
|
||
|
"dtype: int64"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 49,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"beers.dropna(subset=['brewery_name'], inplace=True)\n",
|
||
|
"beers.dropna(subset=['review_profilename'], inplace=True)\n",
|
||
|
"beers.dropna(subset=['beer_abv'], inplace=True)\n",
|
||
|
"\n",
|
||
|
"beers.isnull().sum()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "7e79db21-7b02-4f76-972f-da3092a0d22c",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Normalizacja"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 22,
|
||
|
"id": "e83dd914-b8cf-4e72-a9ea-f4e7f2f63791",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"scaler = MinMaxScaler()\n",
|
||
|
"\n",
|
||
|
"beers[['review_overall', 'review_aroma', 'review_appearance', 'review_palate', 'review_taste', 'beer_abv', 'beer_beerid']] = scaler.fit_transform(beers[['review_overall', 'review_aroma', 'review_appearance', 'review_palate', 'review_taste', 'beer_abv', 'beer_beerid']])"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "46e10b78-fb00-4f7e-9c40-7ee08ebeeffe",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Podział na podzbiory"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 24,
|
||
|
"id": "e5f6f028-dfcb-4cc5-9bd1-bc9bd51c0a31",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"beers_train, beers_dev_test = train_test_split(beers, test_size=0.2, random_state=1234)\n",
|
||
|
"beers_dev, beers_test = train_test_split(beers_dev_test, test_size=0.5, random_state=1234)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 25,
|
||
|
"id": "c9feafcc-3591-4d7b-8282-f0f2e2ebd782",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Liczba kolumn w każdym zbiorze: 14 kolumn\n",
|
||
|
"Całość: 1518478 rekordów \n",
|
||
|
"Train: 1214782 rekordów\n",
|
||
|
"Dev: 151848 rekordów\n",
|
||
|
"Test: 151848 rekordów\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"print(f\"Liczba kolumn w każdym zbiorze: {beers.shape[1]} kolumn\")\n",
|
||
|
"print(f\"Całość: {beers.shape[0]} rekordów \")\n",
|
||
|
"print(f\"Train: {beers_train.shape[0]} rekordów\")\n",
|
||
|
"print(f\"Dev: {beers_dev.shape[0]} rekordów\")\n",
|
||
|
"print(f\"Test: {beers_test.shape[0]} rekordów\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "c811f83b-351e-45c0-bb0f-c1cf68afd669",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"### Przegląd danych"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 51,
|
||
|
"id": "75ffb6e4-3780-4e5f-b151-4b2929237e2a",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Suma różnych piw: 44075\n",
|
||
|
"Suma różnych styli: 104\n",
|
||
|
"Suma różnych browarów: 5155\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"print(f\"Suma różnych piw: {beers['beer_name'].nunique()}\")\n",
|
||
|
"print(f\"Suma różnych styli: {beers['beer_style'].nunique()}\")\n",
|
||
|
"print(f\"Suma różnych browarów: {beers['brewery_name'].nunique()}\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 76,
|
||
|
"id": "02571b1b-471c-4339-8422-c8fc27ce6055",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAnYAAAHWCAYAAAD6oMSKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAADA6klEQVR4nOzdd1gU1/c/8PeC9K5SBOmggh2NvaCgWGIv2BHsATFgARNFsWCJPfoRC4gYY8MSuyiigmBFsKGCotjAijQFhfv7wx/zZQUpCTODcF7Ps4/u7LDnLLvsnp2591wJY4yBEEIIIYT88GTEToAQQgghhFQMKuwIIYQQQqoIKuwIIYQQQqoIKuwIIYQQQqoIKuwIIYQQQqoIKuwIIYQQQqoIKuwIIYQQQqoIKuwIIYQQQqoIKuwIqWK2b9+O9evXl7hPVFQUFixYgPfv3wuUFSGEECFQYUdIJTB//nxIJJL/fD9Hjx7FlClT0Lx58+/u8/z5c/Tr1w+KiorQ0tL6zzEB4PHjx5BIJAgKCqqQ+ysrW1tb2Nraip5HeRT3XJuYmGDs2LHiJFQBOUgkEsyfP79C86loEokEbm5uYqdRom9/j0FBQZBIJHj8+LFoOZEfDxV2hAis4M362rVrFXq/z58/x7hx47Bjxw60b9++2H2+fPkCR0dHDB06FLNmzarQ+IRUZ1FRUZg/fz7S0tLEToVUczXEToAQUjHi4uKwYcMGDBo06Lv7xMfHY8CAAfDw8KjQ2MbGxvj48SPk5OQq9H6ri/v370NGRtzv2f8lh48fP6JGjer9cRIVFQVfX1+MHTsWmpqaFXKfo0ePxrBhw6CgoFAh90eqh+r9l0hIFdKrV69S92ncuDEaN25c4bElEgkUFRUr/H6ri8rwwf1fcqDnnh+ysrKQlZUVOw3yg6FTsYRUUl++fMHChQthbm4OBQUFmJiY4LfffkNOTo7UfteuXYODgwNq164NJSUlmJqawsXFRWqf/Px8rF27Fo0bN4aioiK0tbXRo0ePUk8H29raolGjRrh+/TratWvH3b+/v7/Uft+ObTt8+DAkEglu3rzJ7bN//35IJBIMHDhQ6metrKzg6OhY6u9j8+bNMDc3h5KSElq1aoWIiIhSfwYAbt68ibFjx8LMzAyKiorQ09ODi4sL3r59W+rPnjt3DhKJBHv37sXixYtRt25dKCoqws7ODomJiVL7RkREYMiQITAyMoKCggIMDQ3h4eGBjx8/lhqnuPFtaWlp8PDwgImJCRQUFFC3bl2MGTMGb9684X5GIpEUezl37hyA/xvPl5iYyB1J0tDQgLOzM7Kzs8uUw6+//gpDQ0MoKCjAwsICy5YtQ35+vtR+344Ny8jIwK+//srlrqOjg27duiEmJgbA/w1HKO5SMGayc+fOaNq0abG/r/r168PBwYG7Xp7X96FDh9CoUSMoKCigYcOGOHnyZLExvvXnn3+iYcOGUFZWhpaWFlq2bIm///6b+z3PnDkTAGBqaso9lsePH5frcXyLxtiRf4OO2BFSSY0fPx7bt2/H4MGDMX36dFy+fBlLlixBfHw8Dh48CAB49eoVunfvDm1tbXh7e0NTUxOPHz/GgQMHpO5r3LhxCAoKQs+ePTF+/Hh8+fIFERERuHTpElq2bFliHu/fv0evXr0wdOhQDB8+HHv37sWUKVMgLy9fpIAs0KFDB0gkEly4cAFNmjQB8LXwkZGRQWRkJLff69evce/evVIHtQcEBGDSpElo164dfv31Vzx69Ah9+/ZFzZo1YWhoWOLPnj59Go8ePYKzszP09PRw584dbN68GXfu3MGlS5fKNGll6dKlkJGRwYwZM/DhwwcsX74cI0eOxOXLl7l99u3bh+zsbEyZMgW1atXClStX8Oeff+LZs2fYt29fqTEKy8zMRMeOHREfHw8XFxfY2NjgzZs3OHz4MJ49e4batWtjzZo1yMzMlPq51atXIzY2FrVq1ZLaPnToUJiammLJkiWIiYnB1q1boaOjg2XLln03h+zsbHTu3BnPnz/HpEmTYGRkhKioKMyePRsvX77EmjVrvvuzkydPRkhICNzc3GBtbY23b98iMjIS8fHxsLGxQadOnbBjxw6pn3ny5AnmzJkDHR0dAF9PQ06YMAG3b99Go0aNuP2uXr2KBw8eYM6cOdy2sr6+IyMjceDAAfzyyy9QU1PDunXrMGjQICQnJxf5nRW2ZcsWuLu7Y/DgwZg2bRo+ffqEmzdv4vLlyxgxYgQGDhyIBw8eYNeuXVi9ejVq164NANDW1i7X4yCkQjBCiKC2bdvGALCrV69y2+bNm8cK/znGxsYyAGz8+PFSPztjxgwGgJ09e5YxxtjBgweL3Ne3zp49ywAwd3f3Irfl5+eXmGvnzp0ZALZy5UpuW05ODmvWrBnT0dFhubm5jDHGkpKSGAC2bds2br+GDRuyoUOHctdtbGzYkCFDGAAWHx/PGGPswIEDDACLi4v7bg65ublMR0eHNWvWjOXk5HDbN2/ezACwzp07c9uKyyM7O7vIfe7atYsBYBcuXCjx8YeHhzMAzMrKSir22rVrGQB269atEuMsWbKESSQS9uTJE27bt881Y4wZGxszJycn7rqPjw8DwA4cOFDkPr/3nO3du5cBYAsWLCgSy8XFRWrfAQMGsFq1apWYw8KFC5mKigp78OCB1H7e3t5MVlaWJScnc9sAsHnz5nHXNTQ0mKura7F5Fufjx4+sRYsWTF9fn718+ZIxxlhaWhpTVFRkXl5eUvu6u7szFRUVlpmZyRgr++sbAJOXl2eJiYnctri4OAaA/fnnnyXm169fP9awYcMS9/njjz8YAJaUlCS1vayPoyDHwr/HgveKb++TkJLQqVhCKqHjx48DADw9PaW2T58+HQBw7NgxAOAGaR89ehSfP38u9r4KToHOmzevyG1lOVpVo0YNTJo0ibsuLy+PSZMm4dWrV7h+/fp3f65jx47c6dKMjAzExcVh4sSJqF27Nrc9IiICmpqaUkcyvnXt2jW8evUKkydPhry8PLd97Nix0NDQKDV/JSUl7v+fPn3Cmzdv0KZNGwDgTg2WxtnZWSp2x44dAQCPHj0qNk5WVhbevHmDdu3agTGGGzdulClOgf3796Np06YYMGBAkduKe87u3r0LFxcX9OvXr9gjQJMnT5a63rFjR7x9+xbp6enfzWHfvn3o2LEjtLS08ObNG+5ib2+PvLw8XLhw4bs/q6mpicuXL+PFixclPUzOL7/8glu3bmH//v3Q09MDAGhoaKBfv37YtWsXGGMAgLy8POzZswf9+/eHiooKgPK9vu3t7WFubs5db9KkCdTV1aWex+89nmfPnuHq1atlejyFlfVxEFJRqLAjpBJ68uQJZGRkYGFhIbVdT08PmpqaePLkCYCv45AGDRoEX19f1K5dG/369cO2bdukxuE9fPgQ+vr6qFmz5r/KRV9fv8iHT7169QCgxLE/HTt2xMuXL5GYmIioqChIJBK0bdtWquCLiIhA+/btS5yNWfBYLS0tpbbLycnBzMys1PzfvXuHadOmQVdXF0pKStDW1oapqSkA4MOHD6X+PAAYGRlJXS/o/1e4wXNycjLGjh2LmjVrQlVVFdra2ujcuXO54hR4+PBhicVuYenp6Rg4cCAMDAwQHBxcbOFXlvy/lZCQgJMnT0JbW1vqYm9vD+DrMIDvWb58OW7fvg1DQ0O0atUK8+fP/27xtGnTJmzbtg1//vknV3AXGDNmDJKTk7nXy5kzZ5CamorRo0dz+5Tn9f3t7wH4+rsorVG3l5cXVFVV0apVK1haWsLV1RUXL14sNV55HgchFYUKO0IqsdKOqEkkEoSEhCA6Ohpubm54/vw5XFxc0KJFiyLjr4TWoUMHAMCFCxcQEREBGxsbqKiocIVdZmYmbty4wR394svQoUOxZcsWTJ48GQcOHEBoaCg3YP7bSQDf872ZiYWPwHTr1g3Hjh2Dl5cXDh06hNOnT3OTScoa598YO3YsXrx4gUOHDkFdXb3YfUrLvzj5+fno1q0bTp8+XeylpLY6Q4cOxaNHj/Dnn39CX18ff/zxBxo2bIgTJ05I7Xf
|
||
|
"text/plain": [
|
||
|
"<Figure size 640x480 with 1 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"style_counts = beers['beer_style'].value_counts()\n",
|
||
|
"\n",
|
||
|
"top_15_styles = style_counts.head(15) \n",
|
||
|
"\n",
|
||
|
"plt.bar(top_15_styles.index, top_15_styles.values)\n",
|
||
|
"plt.xlabel('Styl')\n",
|
||
|
"plt.ylabel('Liczba piw')\n",
|
||
|
"plt.title('Ilość piw dla naliczniejszych styli')\n",
|
||
|
"plt.xticks(rotation=90)\n",
|
||
|
"plt.tight_layout()\n",
|
||
|
"plt.show()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 91,
|
||
|
"id": "0f1a2572-db91-4d8f-ad73-69327e60a606",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>review_overall</th>\n",
|
||
|
" <th>Liczba opini</th>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>beer_name</th>\n",
|
||
|
" <th></th>\n",
|
||
|
" <th></th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>90 Minute IPA</th>\n",
|
||
|
" <td>0.829097</td>\n",
|
||
|
" <td>3289</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>Old Rasputin Russian Imperial Stout</th>\n",
|
||
|
" <td>0.834823</td>\n",
|
||
|
" <td>3110</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>Sierra Nevada Celebration Ale</th>\n",
|
||
|
" <td>0.833711</td>\n",
|
||
|
" <td>2999</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>India Pale Ale</th>\n",
|
||
|
" <td>0.770777</td>\n",
|
||
|
" <td>2960</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>Two Hearted Ale</th>\n",
|
||
|
" <td>0.866043</td>\n",
|
||
|
" <td>2727</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" review_overall Liczba opini\n",
|
||
|
"beer_name \n",
|
||
|
"90 Minute IPA 0.829097 3289\n",
|
||
|
"Old Rasputin Russian Imperial Stout 0.834823 3110\n",
|
||
|
"Sierra Nevada Celebration Ale 0.833711 2999\n",
|
||
|
"India Pale Ale 0.770777 2960\n",
|
||
|
"Two Hearted Ale 0.866043 2727"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 91,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"reviews = pd.DataFrame(beers.groupby('beer_name')['review_overall'].mean())\n",
|
||
|
"reviews['Liczba opini'] = pd.DataFrame(beers.groupby('beer_name')['review_overall'].count())\n",
|
||
|
"reviews = reviews.sort_values(by=['Liczba opini'], ascending=False)\n",
|
||
|
"reviews.head()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 32,
|
||
|
"id": "20444c91-b0be-44c8-ba99-b24290a054a0",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>review_overall</th>\n",
|
||
|
" <th>review_aroma</th>\n",
|
||
|
" <th>review_appearance</th>\n",
|
||
|
" <th>review_palate</th>\n",
|
||
|
" <th>review_taste</th>\n",
|
||
|
" <th>beer_abv</th>\n",
|
||
|
" <th>beer_beerid</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>count</th>\n",
|
||
|
" <td>1518478.000</td>\n",
|
||
|
" <td>1518478.000</td>\n",
|
||
|
" <td>1518478.000</td>\n",
|
||
|
" <td>1518478.000</td>\n",
|
||
|
" <td>1518478.000</td>\n",
|
||
|
" <td>1518478.000</td>\n",
|
||
|
" <td>1518478.000</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <td>0.765</td>\n",
|
||
|
" <td>0.687</td>\n",
|
||
|
" <td>0.770</td>\n",
|
||
|
" <td>0.688</td>\n",
|
||
|
" <td>0.701</td>\n",
|
||
|
" <td>0.122</td>\n",
|
||
|
" <td>0.277</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>std</th>\n",
|
||
|
" <td>0.143</td>\n",
|
||
|
" <td>0.174</td>\n",
|
||
|
" <td>0.123</td>\n",
|
||
|
" <td>0.170</td>\n",
|
||
|
" <td>0.182</td>\n",
|
||
|
" <td>0.040</td>\n",
|
||
|
" <td>0.282</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>min</th>\n",
|
||
|
" <td>0.000</td>\n",
|
||
|
" <td>0.000</td>\n",
|
||
|
" <td>0.000</td>\n",
|
||
|
" <td>0.000</td>\n",
|
||
|
" <td>0.000</td>\n",
|
||
|
" <td>0.000</td>\n",
|
||
|
" <td>0.000</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>25%</th>\n",
|
||
|
" <td>0.700</td>\n",
|
||
|
" <td>0.625</td>\n",
|
||
|
" <td>0.700</td>\n",
|
||
|
" <td>0.625</td>\n",
|
||
|
" <td>0.625</td>\n",
|
||
|
" <td>0.090</td>\n",
|
||
|
" <td>0.021</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>50%</th>\n",
|
||
|
" <td>0.800</td>\n",
|
||
|
" <td>0.750</td>\n",
|
||
|
" <td>0.800</td>\n",
|
||
|
" <td>0.750</td>\n",
|
||
|
" <td>0.750</td>\n",
|
||
|
" <td>0.112</td>\n",
|
||
|
" <td>0.166</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>75%</th>\n",
|
||
|
" <td>0.900</td>\n",
|
||
|
" <td>0.750</td>\n",
|
||
|
" <td>0.800</td>\n",
|
||
|
" <td>0.750</td>\n",
|
||
|
" <td>0.875</td>\n",
|
||
|
" <td>0.147</td>\n",
|
||
|
" <td>0.507</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>max</th>\n",
|
||
|
" <td>1.000</td>\n",
|
||
|
" <td>1.000</td>\n",
|
||
|
" <td>1.000</td>\n",
|
||
|
" <td>1.000</td>\n",
|
||
|
" <td>1.000</td>\n",
|
||
|
" <td>1.000</td>\n",
|
||
|
" <td>1.000</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" review_overall review_aroma review_appearance review_palate \\\n",
|
||
|
"count 1518478.000 1518478.000 1518478.000 1518478.000 \n",
|
||
|
"mean 0.765 0.687 0.770 0.688 \n",
|
||
|
"std 0.143 0.174 0.123 0.170 \n",
|
||
|
"min 0.000 0.000 0.000 0.000 \n",
|
||
|
"25% 0.700 0.625 0.700 0.625 \n",
|
||
|
"50% 0.800 0.750 0.800 0.750 \n",
|
||
|
"75% 0.900 0.750 0.800 0.750 \n",
|
||
|
"max 1.000 1.000 1.000 1.000 \n",
|
||
|
"\n",
|
||
|
" review_taste beer_abv beer_beerid \n",
|
||
|
"count 1518478.000 1518478.000 1518478.000 \n",
|
||
|
"mean 0.701 0.122 0.277 \n",
|
||
|
"std 0.182 0.040 0.282 \n",
|
||
|
"min 0.000 0.000 0.000 \n",
|
||
|
"25% 0.625 0.090 0.021 \n",
|
||
|
"50% 0.750 0.112 0.166 \n",
|
||
|
"75% 0.875 0.147 0.507 \n",
|
||
|
"max 1.000 1.000 1.000 "
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 32,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"beers[['review_overall', 'review_aroma', 'review_appearance', 'review_palate', 'review_taste', 'beer_abv', 'beer_beerid']].describe().applymap(lambda x: f\"{x:0.3f}\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 33,
|
||
|
"id": "98febfcb-f801-4fed-88c8-2c188cae111c",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>review_overall</th>\n",
|
||
|
" <th>review_aroma</th>\n",
|
||
|
" <th>review_appearance</th>\n",
|
||
|
" <th>review_palate</th>\n",
|
||
|
" <th>review_taste</th>\n",
|
||
|
" <th>beer_abv</th>\n",
|
||
|
" <th>beer_beerid</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>count</th>\n",
|
||
|
" <td>1214782.0</td>\n",
|
||
|
" <td>1214782.0</td>\n",
|
||
|
" <td>1214782.0</td>\n",
|
||
|
" <td>1214782.0</td>\n",
|
||
|
" <td>1214782.0</td>\n",
|
||
|
" <td>1214782.0</td>\n",
|
||
|
" <td>1214782.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.7</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.7</td>\n",
|
||
|
" <td>0.7</td>\n",
|
||
|
" <td>0.1</td>\n",
|
||
|
" <td>0.3</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>std</th>\n",
|
||
|
" <td>0.1</td>\n",
|
||
|
" <td>0.2</td>\n",
|
||
|
" <td>0.1</td>\n",
|
||
|
" <td>0.2</td>\n",
|
||
|
" <td>0.2</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.3</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>min</th>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>25%</th>\n",
|
||
|
" <td>0.7</td>\n",
|
||
|
" <td>0.6</td>\n",
|
||
|
" <td>0.7</td>\n",
|
||
|
" <td>0.6</td>\n",
|
||
|
" <td>0.6</td>\n",
|
||
|
" <td>0.1</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>50%</th>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.1</td>\n",
|
||
|
" <td>0.2</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>75%</th>\n",
|
||
|
" <td>0.9</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.9</td>\n",
|
||
|
" <td>0.1</td>\n",
|
||
|
" <td>0.5</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>max</th>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" review_overall review_aroma review_appearance review_palate \\\n",
|
||
|
"count 1214782.0 1214782.0 1214782.0 1214782.0 \n",
|
||
|
"mean 0.8 0.7 0.8 0.7 \n",
|
||
|
"std 0.1 0.2 0.1 0.2 \n",
|
||
|
"min 0.0 0.0 0.0 0.0 \n",
|
||
|
"25% 0.7 0.6 0.7 0.6 \n",
|
||
|
"50% 0.8 0.8 0.8 0.8 \n",
|
||
|
"75% 0.9 0.8 0.8 0.8 \n",
|
||
|
"max 1.0 1.0 1.0 1.0 \n",
|
||
|
"\n",
|
||
|
" review_taste beer_abv beer_beerid \n",
|
||
|
"count 1214782.0 1214782.0 1214782.0 \n",
|
||
|
"mean 0.7 0.1 0.3 \n",
|
||
|
"std 0.2 0.0 0.3 \n",
|
||
|
"min 0.0 0.0 0.0 \n",
|
||
|
"25% 0.6 0.1 0.0 \n",
|
||
|
"50% 0.8 0.1 0.2 \n",
|
||
|
"75% 0.9 0.1 0.5 \n",
|
||
|
"max 1.0 1.0 1.0 "
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 33,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"beers_train[['review_overall', 'review_aroma', 'review_appearance', 'review_palate', 'review_taste', 'beer_abv', 'beer_beerid']].describe().applymap(lambda x: f\"{x:0.1f}\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 34,
|
||
|
"id": "9b675fc2-42d8-4d3a-b6b9-3b35a0b8ab08",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>review_overall</th>\n",
|
||
|
" <th>review_aroma</th>\n",
|
||
|
" <th>review_appearance</th>\n",
|
||
|
" <th>review_palate</th>\n",
|
||
|
" <th>review_taste</th>\n",
|
||
|
" <th>beer_abv</th>\n",
|
||
|
" <th>beer_beerid</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>count</th>\n",
|
||
|
" <td>151848.0</td>\n",
|
||
|
" <td>151848.0</td>\n",
|
||
|
" <td>151848.0</td>\n",
|
||
|
" <td>151848.0</td>\n",
|
||
|
" <td>151848.0</td>\n",
|
||
|
" <td>151848.0</td>\n",
|
||
|
" <td>151848.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.7</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.7</td>\n",
|
||
|
" <td>0.7</td>\n",
|
||
|
" <td>0.1</td>\n",
|
||
|
" <td>0.3</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>std</th>\n",
|
||
|
" <td>0.1</td>\n",
|
||
|
" <td>0.2</td>\n",
|
||
|
" <td>0.1</td>\n",
|
||
|
" <td>0.2</td>\n",
|
||
|
" <td>0.2</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.3</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>min</th>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>25%</th>\n",
|
||
|
" <td>0.7</td>\n",
|
||
|
" <td>0.6</td>\n",
|
||
|
" <td>0.7</td>\n",
|
||
|
" <td>0.6</td>\n",
|
||
|
" <td>0.6</td>\n",
|
||
|
" <td>0.1</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>50%</th>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.1</td>\n",
|
||
|
" <td>0.2</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>75%</th>\n",
|
||
|
" <td>0.9</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.9</td>\n",
|
||
|
" <td>0.1</td>\n",
|
||
|
" <td>0.5</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>max</th>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0.7</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" review_overall review_aroma review_appearance review_palate \\\n",
|
||
|
"count 151848.0 151848.0 151848.0 151848.0 \n",
|
||
|
"mean 0.8 0.7 0.8 0.7 \n",
|
||
|
"std 0.1 0.2 0.1 0.2 \n",
|
||
|
"min 0.0 0.0 0.0 0.0 \n",
|
||
|
"25% 0.7 0.6 0.7 0.6 \n",
|
||
|
"50% 0.8 0.8 0.8 0.8 \n",
|
||
|
"75% 0.9 0.8 0.8 0.8 \n",
|
||
|
"max 1.0 1.0 1.0 1.0 \n",
|
||
|
"\n",
|
||
|
" review_taste beer_abv beer_beerid \n",
|
||
|
"count 151848.0 151848.0 151848.0 \n",
|
||
|
"mean 0.7 0.1 0.3 \n",
|
||
|
"std 0.2 0.0 0.3 \n",
|
||
|
"min 0.0 0.0 0.0 \n",
|
||
|
"25% 0.6 0.1 0.0 \n",
|
||
|
"50% 0.8 0.1 0.2 \n",
|
||
|
"75% 0.9 0.1 0.5 \n",
|
||
|
"max 1.0 0.7 1.0 "
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 34,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"beers_dev[['review_overall', 'review_aroma', 'review_appearance', 'review_palate', 'review_taste', 'beer_abv', 'beer_beerid']].describe().applymap(lambda x: f\"{x:0.1f}\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 35,
|
||
|
"id": "fa018c6f-4093-414a-aef3-48cedb1d82d2",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>review_overall</th>\n",
|
||
|
" <th>review_aroma</th>\n",
|
||
|
" <th>review_appearance</th>\n",
|
||
|
" <th>review_palate</th>\n",
|
||
|
" <th>review_taste</th>\n",
|
||
|
" <th>beer_abv</th>\n",
|
||
|
" <th>beer_beerid</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>count</th>\n",
|
||
|
" <td>151848.0</td>\n",
|
||
|
" <td>151848.0</td>\n",
|
||
|
" <td>151848.0</td>\n",
|
||
|
" <td>151848.0</td>\n",
|
||
|
" <td>151848.0</td>\n",
|
||
|
" <td>151848.0</td>\n",
|
||
|
" <td>151848.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.7</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.7</td>\n",
|
||
|
" <td>0.7</td>\n",
|
||
|
" <td>0.1</td>\n",
|
||
|
" <td>0.3</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>std</th>\n",
|
||
|
" <td>0.1</td>\n",
|
||
|
" <td>0.2</td>\n",
|
||
|
" <td>0.1</td>\n",
|
||
|
" <td>0.2</td>\n",
|
||
|
" <td>0.2</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.3</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>min</th>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>25%</th>\n",
|
||
|
" <td>0.7</td>\n",
|
||
|
" <td>0.6</td>\n",
|
||
|
" <td>0.7</td>\n",
|
||
|
" <td>0.6</td>\n",
|
||
|
" <td>0.6</td>\n",
|
||
|
" <td>0.1</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>50%</th>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.1</td>\n",
|
||
|
" <td>0.2</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>75%</th>\n",
|
||
|
" <td>0.9</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.8</td>\n",
|
||
|
" <td>0.9</td>\n",
|
||
|
" <td>0.1</td>\n",
|
||
|
" <td>0.5</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>max</th>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" <td>0.7</td>\n",
|
||
|
" <td>1.0</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" review_overall review_aroma review_appearance review_palate \\\n",
|
||
|
"count 151848.0 151848.0 151848.0 151848.0 \n",
|
||
|
"mean 0.8 0.7 0.8 0.7 \n",
|
||
|
"std 0.1 0.2 0.1 0.2 \n",
|
||
|
"min 0.0 0.0 0.0 0.0 \n",
|
||
|
"25% 0.7 0.6 0.7 0.6 \n",
|
||
|
"50% 0.8 0.8 0.8 0.8 \n",
|
||
|
"75% 0.9 0.8 0.8 0.8 \n",
|
||
|
"max 1.0 1.0 1.0 1.0 \n",
|
||
|
"\n",
|
||
|
" review_taste beer_abv beer_beerid \n",
|
||
|
"count 151848.0 151848.0 151848.0 \n",
|
||
|
"mean 0.7 0.1 0.3 \n",
|
||
|
"std 0.2 0.0 0.3 \n",
|
||
|
"min 0.0 0.0 0.0 \n",
|
||
|
"25% 0.6 0.1 0.0 \n",
|
||
|
"50% 0.8 0.1 0.2 \n",
|
||
|
"75% 0.9 0.1 0.5 \n",
|
||
|
"max 1.0 0.7 1.0 "
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 35,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"beers_test[['review_overall', 'review_aroma', 'review_appearance', 'review_palate', 'review_taste', 'beer_abv', 'beer_beerid']].describe().applymap(lambda x: f\"{x:0.1f}\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "8ce7d432-8d8a-40b0-a247-300f9a39ad44",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3 (ipykernel)",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.9.2"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 5
|
||
|
}
|