{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# OPIS\n", "\n", "#### Dataset zawiera dane dotyczące cen awokado Hass i ich sprzedaży w wybranych regionach Stanów Zjednoczonych.\n", "\n", "#### Opis kolumn:\n", "- Date - data obserwacji\n", "- AveragePrice - średnia cena pojedynczego awokado\n", "- type - zwykłe lub organiczne\n", "- year - rok obserwacji\n", "- Region - miasto/region obserwacji\n", "- Total Volume - liczba sprzedanych awokado\n", "- 4046 - liczba sprzedanych awokado z kodem PLU 4046 (małe)\n", "- 4225 - liczba sprzedanych awokado z kodem PLU 4225 (duże)\n", "- 4770 - liczba sprzedanych awokado z kodem PLU 4770 (bardzo duże)\n" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: kaggle in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (1.5.12)\n", "Requirement already satisfied: six>=1.10 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from kaggle) (1.15.0)\n", "Requirement already satisfied: requests in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from kaggle) (2.25.1)\n", "Requirement already satisfied: python-dateutil in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from kaggle) (2.8.1)\n", "Requirement already satisfied: python-slugify in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from kaggle) (4.0.1)\n", "Requirement already satisfied: urllib3 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from kaggle) (1.26.2)\n", "Requirement already satisfied: tqdm in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from kaggle) (4.59.0)\n", "Requirement already satisfied: certifi in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from kaggle) (2020.12.5)\n", "Requirement already satisfied: text-unidecode>=1.3 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from python-slugify->kaggle) (1.3)\n", "Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from requests->kaggle) (4.0.0)\n", "Requirement already satisfied: idna<3,>=2.5 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from requests->kaggle) (2.10)\n", "OOOOOOOOO /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/bin/python\n", "Requirement already satisfied: pandas in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (1.2.3)\n", "Requirement already satisfied: numpy>=1.16.5 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from pandas) (1.20.1)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from pandas) (2.8.1)\n", "Requirement already satisfied: pytz>=2017.3 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from pandas) (2020.4)\n", "Requirement already satisfied: six>=1.5 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n", "Requirement already satisfied: sklearn in /usr/local/lib/python3.9/site-packages (0.0)\n", "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.9/site-packages (from sklearn) (0.24.1)\n", "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.9/site-packages (from scikit-learn->sklearn) (1.0.1)\n", "Requirement already satisfied: scipy>=0.19.1 in /usr/local/lib/python3.9/site-packages (from scikit-learn->sklearn) (1.6.1)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.9/site-packages (from scikit-learn->sklearn) (2.1.0)\n", "Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.9/site-packages (from scikit-learn->sklearn) (1.20.1)\n" ] } ], "source": [ "import sys\n", "!{sys.executable} -m pip install kaggle\n", "!echo OOOOOOOOO {sys.executable}\n", "!{sys.executable} -m pip install pandas\n", "!python3 -m pip install sklearn" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Pobranie zbioru." ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "!kaggle datasets download -d timmate/avocado-prices-2020" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Archive: avocado-prices-2020.zip\n", " inflating: avocado-updated-2020.csv \n" ] } ], "source": [ "!unzip -o avocado-prices-2020.zip\n" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "date,average_price,total_volume,4046,4225,4770,total_bags,small_bags,large_bags,xlarge_bags,type,year,geography\r\n", "2015-01-04,1.22,40873.28,2819.5,28287.42,49.9,9716.46,9186.93,529.53,0.0,conventional,2015,Albany\r\n", "2015-01-04,1.79,1373.95,57.42,153.88,0.0,1162.65,1162.65,0.0,0.0,organic,2015,Albany\r\n", "2015-01-04,1.0,435021.49,364302.39,23821.16,82.15,46815.79,16707.15,30108.64,0.0,conventional,2015,Atlanta\r\n", "2015-01-04,1.76,3846.69,1500.15,938.35,0.0,1408.19,1071.35,336.84,0.0,organic,2015,Atlanta\r\n" ] } ], "source": [ "!head -n 5 avocado-updated-2020.csv" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Usunięcie zbędnej kolumny (redundantne dane)." ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>date</th>\n", " <th>average_price</th>\n", " <th>total_volume</th>\n", " <th>4046</th>\n", " <th>4225</th>\n", " <th>4770</th>\n", " <th>total_bags</th>\n", " <th>small_bags</th>\n", " <th>large_bags</th>\n", " <th>xlarge_bags</th>\n", " <th>type</th>\n", " <th>geography</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>2015-01-04</td>\n", " <td>1.22</td>\n", " <td>40873.28</td>\n", " <td>2819.50</td>\n", " <td>28287.42</td>\n", " <td>49.90</td>\n", " <td>9716.46</td>\n", " <td>9186.93</td>\n", " <td>529.53</td>\n", " <td>0.00</td>\n", " <td>conventional</td>\n", " <td>Albany</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2015-01-04</td>\n", " <td>1.79</td>\n", " <td>1373.95</td>\n", " <td>57.42</td>\n", " <td>153.88</td>\n", " <td>0.00</td>\n", " <td>1162.65</td>\n", " <td>1162.65</td>\n", " <td>0.00</td>\n", " <td>0.00</td>\n", " <td>organic</td>\n", " <td>Albany</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>2015-01-04</td>\n", " <td>1.00</td>\n", " <td>435021.49</td>\n", " <td>364302.39</td>\n", " <td>23821.16</td>\n", " <td>82.15</td>\n", " <td>46815.79</td>\n", " <td>16707.15</td>\n", " <td>30108.64</td>\n", " <td>0.00</td>\n", " <td>conventional</td>\n", " <td>Atlanta</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>2015-01-04</td>\n", " <td>1.76</td>\n", " <td>3846.69</td>\n", " <td>1500.15</td>\n", " <td>938.35</td>\n", " <td>0.00</td>\n", " <td>1408.19</td>\n", " <td>1071.35</td>\n", " <td>336.84</td>\n", " <td>0.00</td>\n", " <td>organic</td>\n", " <td>Atlanta</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>2015-01-04</td>\n", " <td>1.08</td>\n", " <td>788025.06</td>\n", " <td>53987.31</td>\n", " <td>552906.04</td>\n", " <td>39995.03</td>\n", " <td>141136.68</td>\n", " <td>137146.07</td>\n", " <td>3990.61</td>\n", " <td>0.00</td>\n", " <td>conventional</td>\n", " <td>Baltimore/Washington</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>33040</th>\n", " <td>2020-11-29</td>\n", " <td>1.47</td>\n", " <td>1583056.27</td>\n", " <td>67544.48</td>\n", " <td>97996.46</td>\n", " <td>2617.17</td>\n", " <td>1414878.10</td>\n", " <td>906711.52</td>\n", " <td>480191.83</td>\n", " <td>27974.75</td>\n", " <td>organic</td>\n", " <td>Total U.S.</td>\n", " </tr>\n", " <tr>\n", " <th>33041</th>\n", " <td>2020-11-29</td>\n", " <td>0.91</td>\n", " <td>5811114.22</td>\n", " <td>1352877.53</td>\n", " <td>589061.83</td>\n", " <td>19741.90</td>\n", " <td>3790665.29</td>\n", " <td>2197611.02</td>\n", " <td>1531530.14</td>\n", " <td>61524.13</td>\n", " <td>conventional</td>\n", " <td>West</td>\n", " </tr>\n", " <tr>\n", " <th>33042</th>\n", " <td>2020-11-29</td>\n", " <td>1.48</td>\n", " <td>289961.27</td>\n", " <td>13273.75</td>\n", " <td>19341.09</td>\n", " <td>636.51</td>\n", " <td>256709.92</td>\n", " <td>122606.21</td>\n", " <td>134103.71</td>\n", " <td>0.00</td>\n", " <td>organic</td>\n", " <td>West</td>\n", " </tr>\n", " <tr>\n", " <th>33043</th>\n", " <td>2020-11-29</td>\n", " <td>0.67</td>\n", " <td>822818.75</td>\n", " <td>234688.01</td>\n", " <td>80205.15</td>\n", " <td>10543.63</td>\n", " <td>497381.96</td>\n", " <td>285764.11</td>\n", " <td>210808.02</td>\n", " <td>809.83</td>\n", " <td>conventional</td>\n", " <td>West Tex/New Mexico</td>\n", " </tr>\n", " <tr>\n", " <th>33044</th>\n", " <td>2020-11-29</td>\n", " <td>1.35</td>\n", " <td>24106.58</td>\n", " <td>1236.96</td>\n", " <td>617.80</td>\n", " <td>1564.98</td>\n", " <td>20686.84</td>\n", " <td>17824.52</td>\n", " <td>2862.32</td>\n", " <td>0.00</td>\n", " <td>organic</td>\n", " <td>West Tex/New Mexico</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>33045 rows × 12 columns</p>\n", "</div>" ], "text/plain": [ " date average_price total_volume 4046 4225 \\\n", "0 2015-01-04 1.22 40873.28 2819.50 28287.42 \n", "1 2015-01-04 1.79 1373.95 57.42 153.88 \n", "2 2015-01-04 1.00 435021.49 364302.39 23821.16 \n", "3 2015-01-04 1.76 3846.69 1500.15 938.35 \n", "4 2015-01-04 1.08 788025.06 53987.31 552906.04 \n", "... ... ... ... ... ... \n", "33040 2020-11-29 1.47 1583056.27 67544.48 97996.46 \n", "33041 2020-11-29 0.91 5811114.22 1352877.53 589061.83 \n", "33042 2020-11-29 1.48 289961.27 13273.75 19341.09 \n", "33043 2020-11-29 0.67 822818.75 234688.01 80205.15 \n", "33044 2020-11-29 1.35 24106.58 1236.96 617.80 \n", "\n", " 4770 total_bags small_bags large_bags xlarge_bags \\\n", "0 49.90 9716.46 9186.93 529.53 0.00 \n", "1 0.00 1162.65 1162.65 0.00 0.00 \n", "2 82.15 46815.79 16707.15 30108.64 0.00 \n", "3 0.00 1408.19 1071.35 336.84 0.00 \n", "4 39995.03 141136.68 137146.07 3990.61 0.00 \n", "... ... ... ... ... ... \n", "33040 2617.17 1414878.10 906711.52 480191.83 27974.75 \n", "33041 19741.90 3790665.29 2197611.02 1531530.14 61524.13 \n", "33042 636.51 256709.92 122606.21 134103.71 0.00 \n", "33043 10543.63 497381.96 285764.11 210808.02 809.83 \n", "33044 1564.98 20686.84 17824.52 2862.32 0.00 \n", "\n", " type geography \n", "0 conventional Albany \n", "1 organic Albany \n", "2 conventional Atlanta \n", "3 organic Atlanta \n", "4 conventional Baltimore/Washington \n", "... ... ... \n", "33040 organic Total U.S. \n", "33041 conventional West \n", "33042 organic West \n", "33043 conventional West Tex/New Mexico \n", "33044 organic West Tex/New Mexico \n", "\n", "[33045 rows x 12 columns]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "avocado_with_year = pd.read_csv('avocado-updated-2020.csv')\n", "avocado_with_year\n", "\n", "new = ['date', 'average_price', 'total_volume', '4046', '4225', '4770', 'total_bags', 'small_bags', 'large_bags', 'xlarge_bags', 'type', 'geography']\n", "avocado = avocado_with_year[new]\n", "avocado.to_csv(\"avocado.csv\", index=False)\n", "avocado = pd.read_csv('avocado.csv')\n", "avocado" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Podział zbioru na train/dev/test." ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "scrolled": false }, "outputs": [], "source": [ "import numpy as np\n", "\n", "avocado_train, avocado_validate, avocado_test = np.split(avocado.sample(frac=1), [int(.6*len(avocado)), int(.8*len(avocado))])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Podsumowanie zbioru i poszczególnych podzbiorów." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Wielkości zbioru i podzbiorów." ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Avocado: 396540\n", "Avocado (train) : 237924\n", "Avocado (validate): 79308\n", "Avocado (test) 79308\n" ] } ], "source": [ "print(\"Avocado: \".ljust(20), np.size(avocado))\n", "print(\"Avocado (train) : \".ljust(20), np.size(avocado_train))\n", "print(\"Avocado (validate): \".ljust(20), np.size(avocado_validate))\n", "print(\"Avocado (test) \".ljust(20), np.size(avocado_test))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Podsumowanie zbioru avocado." ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>date</th>\n", " <th>average_price</th>\n", " <th>total_volume</th>\n", " <th>4046</th>\n", " <th>4225</th>\n", " <th>4770</th>\n", " <th>total_bags</th>\n", " <th>small_bags</th>\n", " <th>large_bags</th>\n", " <th>xlarge_bags</th>\n", " <th>type</th>\n", " <th>geography</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>count</th>\n", " <td>33045</td>\n", " <td>33045.000000</td>\n", " <td>3.304500e+04</td>\n", " <td>3.304500e+04</td>\n", " <td>3.304500e+04</td>\n", " <td>3.304500e+04</td>\n", " <td>3.304500e+04</td>\n", " <td>3.304500e+04</td>\n", " <td>3.304500e+04</td>\n", " <td>3.304500e+04</td>\n", " <td>33045</td>\n", " <td>33045</td>\n", " </tr>\n", " <tr>\n", " <th>unique</th>\n", " <td>306</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>2</td>\n", " <td>54</td>\n", " </tr>\n", " <tr>\n", " <th>top</th>\n", " <td>2017-10-01</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>conventional</td>\n", " <td>Atlanta</td>\n", " </tr>\n", " <tr>\n", " <th>freq</th>\n", " <td>108</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>16524</td>\n", " <td>612</td>\n", " </tr>\n", " <tr>\n", " <th>mean</th>\n", " <td>NaN</td>\n", " <td>1.379941</td>\n", " <td>9.683997e+05</td>\n", " <td>3.023914e+05</td>\n", " <td>2.797693e+05</td>\n", " <td>2.148255e+04</td>\n", " <td>3.646735e+05</td>\n", " <td>2.501980e+05</td>\n", " <td>1.067329e+05</td>\n", " <td>7.742585e+03</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>std</th>\n", " <td>NaN</td>\n", " <td>0.378972</td>\n", " <td>3.934533e+06</td>\n", " <td>1.301026e+06</td>\n", " <td>1.151052e+06</td>\n", " <td>1.001607e+05</td>\n", " <td>1.564004e+06</td>\n", " <td>1.037734e+06</td>\n", " <td>5.167226e+05</td>\n", " <td>4.819803e+04</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>min</th>\n", " <td>NaN</td>\n", " <td>0.440000</td>\n", " <td>8.456000e+01</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>25%</th>\n", " <td>NaN</td>\n", " <td>1.100000</td>\n", " <td>1.511895e+04</td>\n", " <td>7.673100e+02</td>\n", " <td>2.712470e+03</td>\n", " <td>0.000000e+00</td>\n", " <td>9.121860e+03</td>\n", " <td>6.478630e+03</td>\n", " <td>4.662900e+02</td>\n", " <td>0.000000e+00</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>50%</th>\n", " <td>NaN</td>\n", " <td>1.350000</td>\n", " <td>1.291170e+05</td>\n", " <td>1.099477e+04</td>\n", " <td>2.343600e+04</td>\n", " <td>1.780900e+02</td>\n", " <td>5.322224e+04</td>\n", " <td>3.687699e+04</td>\n", " <td>6.375860e+03</td>\n", " <td>0.000000e+00</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>75%</th>\n", " <td>NaN</td>\n", " <td>1.620000</td>\n", " <td>5.058285e+05</td>\n", " <td>1.190219e+05</td>\n", " <td>1.352389e+05</td>\n", " <td>5.096530e+03</td>\n", " <td>1.744314e+05</td>\n", " <td>1.206624e+05</td>\n", " <td>4.041723e+04</td>\n", " <td>8.044400e+02</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>max</th>\n", " <td>NaN</td>\n", " <td>3.250000</td>\n", " <td>6.371614e+07</td>\n", " <td>2.274362e+07</td>\n", " <td>2.047057e+07</td>\n", " <td>2.546439e+06</td>\n", " <td>3.168919e+07</td>\n", " <td>2.055041e+07</td>\n", " <td>1.332760e+07</td>\n", " <td>1.403184e+06</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " date average_price total_volume 4046 4225 \\\n", "count 33045 33045.000000 3.304500e+04 3.304500e+04 3.304500e+04 \n", "unique 306 NaN NaN NaN NaN \n", "top 2017-10-01 NaN NaN NaN NaN \n", "freq 108 NaN NaN NaN NaN \n", "mean NaN 1.379941 9.683997e+05 3.023914e+05 2.797693e+05 \n", "std NaN 0.378972 3.934533e+06 1.301026e+06 1.151052e+06 \n", "min NaN 0.440000 8.456000e+01 0.000000e+00 0.000000e+00 \n", "25% NaN 1.100000 1.511895e+04 7.673100e+02 2.712470e+03 \n", "50% NaN 1.350000 1.291170e+05 1.099477e+04 2.343600e+04 \n", "75% NaN 1.620000 5.058285e+05 1.190219e+05 1.352389e+05 \n", "max NaN 3.250000 6.371614e+07 2.274362e+07 2.047057e+07 \n", "\n", " 4770 total_bags small_bags large_bags xlarge_bags \\\n", "count 3.304500e+04 3.304500e+04 3.304500e+04 3.304500e+04 3.304500e+04 \n", "unique NaN NaN NaN NaN NaN \n", "top NaN NaN NaN NaN NaN \n", "freq NaN NaN NaN NaN NaN \n", "mean 2.148255e+04 3.646735e+05 2.501980e+05 1.067329e+05 7.742585e+03 \n", "std 1.001607e+05 1.564004e+06 1.037734e+06 5.167226e+05 4.819803e+04 \n", "min 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n", "25% 0.000000e+00 9.121860e+03 6.478630e+03 4.662900e+02 0.000000e+00 \n", "50% 1.780900e+02 5.322224e+04 3.687699e+04 6.375860e+03 0.000000e+00 \n", "75% 5.096530e+03 1.744314e+05 1.206624e+05 4.041723e+04 8.044400e+02 \n", "max 2.546439e+06 3.168919e+07 2.055041e+07 1.332760e+07 1.403184e+06 \n", "\n", " type geography \n", "count 33045 33045 \n", "unique 2 54 \n", "top conventional Atlanta \n", "freq 16524 612 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN " ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "avocado.describe(include = 'all')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Podsumowanie podzbioru train." ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>date</th>\n", " <th>average_price</th>\n", " <th>total_volume</th>\n", " <th>4046</th>\n", " <th>4225</th>\n", " <th>4770</th>\n", " <th>total_bags</th>\n", " <th>small_bags</th>\n", " <th>large_bags</th>\n", " <th>xlarge_bags</th>\n", " <th>type</th>\n", " <th>geography</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>count</th>\n", " <td>19827</td>\n", " <td>19827.000000</td>\n", " <td>1.982700e+04</td>\n", " <td>1.982700e+04</td>\n", " <td>1.982700e+04</td>\n", " <td>1.982700e+04</td>\n", " <td>1.982700e+04</td>\n", " <td>1.982700e+04</td>\n", " <td>1.982700e+04</td>\n", " <td>1.982700e+04</td>\n", " <td>19827</td>\n", " <td>19827</td>\n", " </tr>\n", " <tr>\n", " <th>unique</th>\n", " <td>306</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>2</td>\n", " <td>54</td>\n", " </tr>\n", " <tr>\n", " <th>top</th>\n", " <td>2018-09-23</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>organic</td>\n", " <td>Sacramento</td>\n", " </tr>\n", " <tr>\n", " <th>freq</th>\n", " <td>77</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>9954</td>\n", " <td>404</td>\n", " </tr>\n", " <tr>\n", " <th>mean</th>\n", " <td>NaN</td>\n", " <td>1.380658</td>\n", " <td>9.503549e+05</td>\n", " <td>2.955048e+05</td>\n", " <td>2.762023e+05</td>\n", " <td>2.117442e+04</td>\n", " <td>3.573659e+05</td>\n", " <td>2.448356e+05</td>\n", " <td>1.049736e+05</td>\n", " <td>7.556707e+03</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>std</th>\n", " <td>NaN</td>\n", " <td>0.377988</td>\n", " <td>3.896388e+06</td>\n", " <td>1.285945e+06</td>\n", " <td>1.147780e+06</td>\n", " <td>1.008332e+05</td>\n", " <td>1.548676e+06</td>\n", " <td>1.023617e+06</td>\n", " <td>5.161354e+05</td>\n", " <td>4.776408e+04</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>min</th>\n", " <td>NaN</td>\n", " <td>0.460000</td>\n", " <td>2.534500e+02</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>25%</th>\n", " <td>NaN</td>\n", " <td>1.100000</td>\n", " <td>1.509891e+04</td>\n", " <td>7.560400e+02</td>\n", " <td>2.695640e+03</td>\n", " <td>0.000000e+00</td>\n", " <td>9.095285e+03</td>\n", " <td>6.430960e+03</td>\n", " <td>4.678750e+02</td>\n", " <td>0.000000e+00</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>50%</th>\n", " <td>NaN</td>\n", " <td>1.350000</td>\n", " <td>1.275485e+05</td>\n", " <td>1.086294e+04</td>\n", " <td>2.337789e+04</td>\n", " <td>1.714100e+02</td>\n", " <td>5.240743e+04</td>\n", " <td>3.663295e+04</td>\n", " <td>6.148990e+03</td>\n", " <td>0.000000e+00</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>75%</th>\n", " <td>NaN</td>\n", " <td>1.610000</td>\n", " <td>4.996119e+05</td>\n", " <td>1.174216e+05</td>\n", " <td>1.337254e+05</td>\n", " <td>4.976950e+03</td>\n", " <td>1.721448e+05</td>\n", " <td>1.193927e+05</td>\n", " <td>3.875767e+04</td>\n", " <td>7.391950e+02</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>max</th>\n", " <td>NaN</td>\n", " <td>3.170000</td>\n", " <td>6.371614e+07</td>\n", " <td>2.113740e+07</td>\n", " <td>2.047057e+07</td>\n", " <td>2.546439e+06</td>\n", " <td>3.168919e+07</td>\n", " <td>2.055041e+07</td>\n", " <td>1.332760e+07</td>\n", " <td>1.403184e+06</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " date average_price total_volume 4046 4225 \\\n", "count 19827 19827.000000 1.982700e+04 1.982700e+04 1.982700e+04 \n", "unique 306 NaN NaN NaN NaN \n", "top 2018-09-23 NaN NaN NaN NaN \n", "freq 77 NaN NaN NaN NaN \n", "mean NaN 1.380658 9.503549e+05 2.955048e+05 2.762023e+05 \n", "std NaN 0.377988 3.896388e+06 1.285945e+06 1.147780e+06 \n", "min NaN 0.460000 2.534500e+02 0.000000e+00 0.000000e+00 \n", "25% NaN 1.100000 1.509891e+04 7.560400e+02 2.695640e+03 \n", "50% NaN 1.350000 1.275485e+05 1.086294e+04 2.337789e+04 \n", "75% NaN 1.610000 4.996119e+05 1.174216e+05 1.337254e+05 \n", "max NaN 3.170000 6.371614e+07 2.113740e+07 2.047057e+07 \n", "\n", " 4770 total_bags small_bags large_bags xlarge_bags \\\n", "count 1.982700e+04 1.982700e+04 1.982700e+04 1.982700e+04 1.982700e+04 \n", "unique NaN NaN NaN NaN NaN \n", "top NaN NaN NaN NaN NaN \n", "freq NaN NaN NaN NaN NaN \n", "mean 2.117442e+04 3.573659e+05 2.448356e+05 1.049736e+05 7.556707e+03 \n", "std 1.008332e+05 1.548676e+06 1.023617e+06 5.161354e+05 4.776408e+04 \n", "min 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n", "25% 0.000000e+00 9.095285e+03 6.430960e+03 4.678750e+02 0.000000e+00 \n", "50% 1.714100e+02 5.240743e+04 3.663295e+04 6.148990e+03 0.000000e+00 \n", "75% 4.976950e+03 1.721448e+05 1.193927e+05 3.875767e+04 7.391950e+02 \n", "max 2.546439e+06 3.168919e+07 2.055041e+07 1.332760e+07 1.403184e+06 \n", "\n", " type geography \n", "count 19827 19827 \n", "unique 2 54 \n", "top organic Sacramento \n", "freq 9954 404 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN " ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "avocado_train.describe(include= 'all' )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Podsumowanie podzbioru validate." ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>date</th>\n", " <th>average_price</th>\n", " <th>total_volume</th>\n", " <th>4046</th>\n", " <th>4225</th>\n", " <th>4770</th>\n", " <th>total_bags</th>\n", " <th>small_bags</th>\n", " <th>large_bags</th>\n", " <th>xlarge_bags</th>\n", " <th>type</th>\n", " <th>geography</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>count</th>\n", " <td>6609</td>\n", " <td>6609.000000</td>\n", " <td>6.609000e+03</td>\n", " <td>6.609000e+03</td>\n", " <td>6.609000e+03</td>\n", " <td>6.609000e+03</td>\n", " <td>6.609000e+03</td>\n", " <td>6.609000e+03</td>\n", " <td>6.609000e+03</td>\n", " <td>6.609000e+03</td>\n", " <td>6609</td>\n", " <td>6609</td>\n", " </tr>\n", " <tr>\n", " <th>unique</th>\n", " <td>306</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>2</td>\n", " <td>54</td>\n", " </tr>\n", " <tr>\n", " <th>top</th>\n", " <td>2020-05-03</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>organic</td>\n", " <td>Jacksonville</td>\n", " </tr>\n", " <tr>\n", " <th>freq</th>\n", " <td>35</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>3365</td>\n", " <td>149</td>\n", " </tr>\n", " <tr>\n", " <th>mean</th>\n", " <td>NaN</td>\n", " <td>1.382624</td>\n", " <td>9.914296e+05</td>\n", " <td>3.140144e+05</td>\n", " <td>2.827458e+05</td>\n", " <td>2.172480e+04</td>\n", " <td>3.729031e+05</td>\n", " <td>2.567059e+05</td>\n", " <td>1.085372e+05</td>\n", " <td>7.660065e+03</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>std</th>\n", " <td>NaN</td>\n", " <td>0.380997</td>\n", " <td>4.042527e+06</td>\n", " <td>1.341419e+06</td>\n", " <td>1.181393e+06</td>\n", " <td>1.021178e+05</td>\n", " <td>1.596924e+06</td>\n", " <td>1.065783e+06</td>\n", " <td>5.196275e+05</td>\n", " <td>4.795256e+04</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>min</th>\n", " <td>NaN</td>\n", " <td>0.440000</td>\n", " <td>8.456000e+01</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>25%</th>\n", " <td>NaN</td>\n", " <td>1.100000</td>\n", " <td>1.486299e+04</td>\n", " <td>7.570000e+02</td>\n", " <td>2.534810e+03</td>\n", " <td>0.000000e+00</td>\n", " <td>9.007310e+03</td>\n", " <td>6.281480e+03</td>\n", " <td>4.562400e+02</td>\n", " <td>0.000000e+00</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>50%</th>\n", " <td>NaN</td>\n", " <td>1.350000</td>\n", " <td>1.241199e+05</td>\n", " <td>1.023778e+04</td>\n", " <td>2.204006e+04</td>\n", " <td>1.674700e+02</td>\n", " <td>5.247009e+04</td>\n", " <td>3.492217e+04</td>\n", " <td>6.458780e+03</td>\n", " <td>0.000000e+00</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>75%</th>\n", " <td>NaN</td>\n", " <td>1.620000</td>\n", " <td>5.026773e+05</td>\n", " <td>1.207824e+05</td>\n", " <td>1.307007e+05</td>\n", " <td>5.104000e+03</td>\n", " <td>1.706264e+05</td>\n", " <td>1.197749e+05</td>\n", " <td>4.128634e+04</td>\n", " <td>7.951300e+02</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>max</th>\n", " <td>NaN</td>\n", " <td>3.250000</td>\n", " <td>6.250565e+07</td>\n", " <td>2.274362e+07</td>\n", " <td>2.044550e+07</td>\n", " <td>1.800066e+06</td>\n", " <td>2.666884e+07</td>\n", " <td>1.740824e+07</td>\n", " <td>1.077854e+07</td>\n", " <td>1.123540e+06</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " date average_price total_volume 4046 4225 \\\n", "count 6609 6609.000000 6.609000e+03 6.609000e+03 6.609000e+03 \n", "unique 306 NaN NaN NaN NaN \n", "top 2020-05-03 NaN NaN NaN NaN \n", "freq 35 NaN NaN NaN NaN \n", "mean NaN 1.382624 9.914296e+05 3.140144e+05 2.827458e+05 \n", "std NaN 0.380997 4.042527e+06 1.341419e+06 1.181393e+06 \n", "min NaN 0.440000 8.456000e+01 0.000000e+00 0.000000e+00 \n", "25% NaN 1.100000 1.486299e+04 7.570000e+02 2.534810e+03 \n", "50% NaN 1.350000 1.241199e+05 1.023778e+04 2.204006e+04 \n", "75% NaN 1.620000 5.026773e+05 1.207824e+05 1.307007e+05 \n", "max NaN 3.250000 6.250565e+07 2.274362e+07 2.044550e+07 \n", "\n", " 4770 total_bags small_bags large_bags xlarge_bags \\\n", "count 6.609000e+03 6.609000e+03 6.609000e+03 6.609000e+03 6.609000e+03 \n", "unique NaN NaN NaN NaN NaN \n", "top NaN NaN NaN NaN NaN \n", "freq NaN NaN NaN NaN NaN \n", "mean 2.172480e+04 3.729031e+05 2.567059e+05 1.085372e+05 7.660065e+03 \n", "std 1.021178e+05 1.596924e+06 1.065783e+06 5.196275e+05 4.795256e+04 \n", "min 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n", "25% 0.000000e+00 9.007310e+03 6.281480e+03 4.562400e+02 0.000000e+00 \n", "50% 1.674700e+02 5.247009e+04 3.492217e+04 6.458780e+03 0.000000e+00 \n", "75% 5.104000e+03 1.706264e+05 1.197749e+05 4.128634e+04 7.951300e+02 \n", "max 1.800066e+06 2.666884e+07 1.740824e+07 1.077854e+07 1.123540e+06 \n", "\n", " type geography \n", "count 6609 6609 \n", "unique 2 54 \n", "top organic Jacksonville \n", "freq 3365 149 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN " ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "avocado_validate.describe(include = 'all')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Podsumowanie podzbioru test." ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>date</th>\n", " <th>average_price</th>\n", " <th>total_volume</th>\n", " <th>4046</th>\n", " <th>4225</th>\n", " <th>4770</th>\n", " <th>total_bags</th>\n", " <th>small_bags</th>\n", " <th>large_bags</th>\n", " <th>xlarge_bags</th>\n", " <th>type</th>\n", " <th>geography</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>count</th>\n", " <td>6609</td>\n", " <td>6609.000000</td>\n", " <td>6.609000e+03</td>\n", " <td>6.609000e+03</td>\n", " <td>6.609000e+03</td>\n", " <td>6.609000e+03</td>\n", " <td>6.609000e+03</td>\n", " <td>6.609000e+03</td>\n", " <td>6.609000e+03</td>\n", " <td>6.609000e+03</td>\n", " <td>6609</td>\n", " <td>6609</td>\n", " </tr>\n", " <tr>\n", " <th>unique</th>\n", " <td>306</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>2</td>\n", " <td>54</td>\n", " </tr>\n", " <tr>\n", " <th>top</th>\n", " <td>2020-06-21</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>conventional</td>\n", " <td>California</td>\n", " </tr>\n", " <tr>\n", " <th>freq</th>\n", " <td>33</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>3407</td>\n", " <td>143</td>\n", " </tr>\n", " <tr>\n", " <th>mean</th>\n", " <td>NaN</td>\n", " <td>1.375107</td>\n", " <td>9.995041e+05</td>\n", " <td>3.114282e+05</td>\n", " <td>2.874940e+05</td>\n", " <td>2.216469e+04</td>\n", " <td>3.783667e+05</td>\n", " <td>2.597775e+05</td>\n", " <td>1.102065e+05</td>\n", " <td>8.382739e+03</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>std</th>\n", " <td>NaN</td>\n", " <td>0.379902</td>\n", " <td>3.939225e+06</td>\n", " <td>1.305043e+06</td>\n", " <td>1.130053e+06</td>\n", " <td>9.608845e+04</td>\n", " <td>1.576553e+06</td>\n", " <td>1.051335e+06</td>\n", " <td>5.156234e+05</td>\n", " <td>4.971697e+04</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>min</th>\n", " <td>NaN</td>\n", " <td>0.480000</td>\n", " <td>3.855500e+02</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>25%</th>\n", " <td>NaN</td>\n", " <td>1.090000</td>\n", " <td>1.544873e+04</td>\n", " <td>8.225900e+02</td>\n", " <td>2.903380e+03</td>\n", " <td>0.000000e+00</td>\n", " <td>9.358110e+03</td>\n", " <td>6.834760e+03</td>\n", " <td>4.706000e+02</td>\n", " <td>0.000000e+00</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>50%</th>\n", " <td>NaN</td>\n", " <td>1.330000</td>\n", " <td>1.409398e+05</td>\n", " <td>1.233835e+04</td>\n", " <td>2.530639e+04</td>\n", " <td>2.074500e+02</td>\n", " <td>5.576654e+04</td>\n", " <td>3.897502e+04</td>\n", " <td>7.182140e+03</td>\n", " <td>0.000000e+00</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>75%</th>\n", " <td>NaN</td>\n", " <td>1.610000</td>\n", " <td>5.330085e+05</td>\n", " <td>1.221341e+05</td>\n", " <td>1.453971e+05</td>\n", " <td>5.358790e+03</td>\n", " <td>1.833669e+05</td>\n", " <td>1.254250e+05</td>\n", " <td>4.531138e+04</td>\n", " <td>1.012940e+03</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>max</th>\n", " <td>NaN</td>\n", " <td>3.000000</td>\n", " <td>5.453235e+07</td>\n", " <td>1.707665e+07</td>\n", " <td>1.789639e+07</td>\n", " <td>1.993645e+06</td>\n", " <td>2.735245e+07</td>\n", " <td>1.791382e+07</td>\n", " <td>1.063102e+07</td>\n", " <td>1.181516e+06</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " date average_price total_volume 4046 4225 \\\n", "count 6609 6609.000000 6.609000e+03 6.609000e+03 6.609000e+03 \n", "unique 306 NaN NaN NaN NaN \n", "top 2020-06-21 NaN NaN NaN NaN \n", "freq 33 NaN NaN NaN NaN \n", "mean NaN 1.375107 9.995041e+05 3.114282e+05 2.874940e+05 \n", "std NaN 0.379902 3.939225e+06 1.305043e+06 1.130053e+06 \n", "min NaN 0.480000 3.855500e+02 0.000000e+00 0.000000e+00 \n", "25% NaN 1.090000 1.544873e+04 8.225900e+02 2.903380e+03 \n", "50% NaN 1.330000 1.409398e+05 1.233835e+04 2.530639e+04 \n", "75% NaN 1.610000 5.330085e+05 1.221341e+05 1.453971e+05 \n", "max NaN 3.000000 5.453235e+07 1.707665e+07 1.789639e+07 \n", "\n", " 4770 total_bags small_bags large_bags xlarge_bags \\\n", "count 6.609000e+03 6.609000e+03 6.609000e+03 6.609000e+03 6.609000e+03 \n", "unique NaN NaN NaN NaN NaN \n", "top NaN NaN NaN NaN NaN \n", "freq NaN NaN NaN NaN NaN \n", "mean 2.216469e+04 3.783667e+05 2.597775e+05 1.102065e+05 8.382739e+03 \n", "std 9.608845e+04 1.576553e+06 1.051335e+06 5.156234e+05 4.971697e+04 \n", "min 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n", "25% 0.000000e+00 9.358110e+03 6.834760e+03 4.706000e+02 0.000000e+00 \n", "50% 2.074500e+02 5.576654e+04 3.897502e+04 7.182140e+03 0.000000e+00 \n", "75% 5.358790e+03 1.833669e+05 1.254250e+05 4.531138e+04 1.012940e+03 \n", "max 1.993645e+06 2.735245e+07 1.791382e+07 1.063102e+07 1.181516e+06 \n", "\n", " type geography \n", "count 6609 6609 \n", "unique 2 54 \n", "top conventional California \n", "freq 3407 143 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN " ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "avocado_test.describe(include = 'all')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Rozkład częstości przykładów dla poszczególnych klas." ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Atlanta 612\n", "St. Louis 612\n", "New York 612\n", "Indianapolis 612\n", "Sacramento 612\n", "Spokane 612\n", "Philadelphia 612\n", "South Carolina 612\n", "West 612\n", "San Francisco 612\n", "Orlando 612\n", "Southeast 612\n", "Miami/Ft. Lauderdale 612\n", "Nashville 612\n", "Syracuse 612\n", "Columbus 612\n", "Detroit 612\n", "Northern New England 612\n", "Buffalo/Rochester 612\n", "Raleigh/Greensboro 612\n", "Midsouth 612\n", "Boise 612\n", "San Diego 612\n", "Hartford/Springfield 612\n", "Los Angeles 612\n", "Total U.S. 612\n", "Dallas/Ft. Worth 612\n", "Great Lakes 612\n", "Roanoke 612\n", "Plains 612\n", "California 612\n", "Portland 612\n", "Grand Rapids 612\n", "Harrisburg/Scranton 612\n", "Charlotte 612\n", "Cincinnati/Dayton 612\n", "Richmond/Norfolk 612\n", "Houston 612\n", "South Central 612\n", "Northeast 612\n", "Seattle 612\n", "Jacksonville 612\n", "Baltimore/Washington 612\n", "Pittsburgh 612\n", "Louisville 612\n", "Boston 612\n", "Tampa 612\n", "Phoenix/Tucson 612\n", "Chicago 612\n", "Denver 612\n", "Las Vegas 612\n", "Albany 612\n", "New Orleans/Mobile 612\n", "West Tex/New Mexico 609\n", "Name: geography, dtype: int64" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "avocado.geography.value_counts() " ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "California 143\n", "Grand Rapids 139\n", "Roanoke 139\n", "Las Vegas 139\n", "Spokane 137\n", "Plains 135\n", "Seattle 134\n", "Louisville 132\n", "Atlanta 131\n", "Syracuse 130\n", "New York 130\n", "Nashville 129\n", "Raleigh/Greensboro 129\n", "Miami/Ft. Lauderdale 128\n", "Phoenix/Tucson 128\n", "Orlando 128\n", "Hartford/Springfield 127\n", "San Francisco 127\n", "South Central 127\n", "Charlotte 126\n", "Richmond/Norfolk 126\n", "West 126\n", "Tampa 124\n", "Los Angeles 124\n", "South Carolina 122\n", "Great Lakes 122\n", "Total U.S. 122\n", "Northeast 121\n", "Cincinnati/Dayton 121\n", "Columbus 121\n", "Baltimore/Washington 119\n", "Pittsburgh 119\n", "Jacksonville 119\n", "Portland 119\n", "West Tex/New Mexico 118\n", "Midsouth 118\n", "Houston 117\n", "Chicago 116\n", "Buffalo/Rochester 116\n", "New Orleans/Mobile 116\n", "Philadelphia 115\n", "San Diego 115\n", "Indianapolis 115\n", "Northern New England 114\n", "Boston 114\n", "Boise 114\n", "Southeast 114\n", "Dallas/Ft. Worth 113\n", "Detroit 113\n", "Albany 112\n", "Denver 111\n", "St. Louis 111\n", "Harrisburg/Scranton 104\n", "Sacramento 100\n", "Name: geography, dtype: int64" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "avocado_test.geography.value_counts() " ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Sacramento 404\n", "Albany 398\n", "Northern New England 390\n", "Harrisburg/Scranton 388\n", "St. Louis 385\n", "Columbus 384\n", "Boise 382\n", "Indianapolis 381\n", "Detroit 380\n", "South Carolina 378\n", "West Tex/New Mexico 378\n", "Southeast 378\n", "Nashville 377\n", "Denver 377\n", "Los Angeles 377\n", "Great Lakes 376\n", "San Diego 375\n", "Cincinnati/Dayton 374\n", "Boston 374\n", "South Central 373\n", "New Orleans/Mobile 373\n", "Richmond/Norfolk 371\n", "Seattle 371\n", "Total U.S. 371\n", "Buffalo/Rochester 370\n", "Northeast 369\n", "Charlotte 368\n", "Atlanta 368\n", "Chicago 367\n", "San Francisco 366\n", "Midsouth 366\n", "Philadelphia 365\n", "New York 363\n", "Portland 363\n", "Syracuse 362\n", "Grand Rapids 361\n", "Louisville 361\n", "Roanoke 361\n", "Dallas/Ft. Worth 360\n", "Orlando 359\n", "Tampa 359\n", "Houston 359\n", "Hartford/Springfield 358\n", "Pittsburgh 357\n", "West 356\n", "Miami/Ft. Lauderdale 354\n", "Baltimore/Washington 353\n", "Phoenix/Tucson 353\n", "Raleigh/Greensboro 345\n", "Jacksonville 344\n", "Las Vegas 339\n", "California 336\n", "Plains 335\n", "Spokane 335\n", "Name: geography, dtype: int64" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "avocado_train.geography.value_counts() " ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<AxesSubplot:>" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEvCAYAAACnuq2HAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAXV0lEQVR4nO3df7DddX3n8efLRFC0SDBXqkkwqaTuBKqVpoDrttPCLgSLhrHqwtQlq1mz0yLVbWcU3NllF2VGrFtWqtCmEgmU8kOKklaUzSIVdys/giA/pdwFMcnwI5IAVrZg6Hv/OJ8sh8u9JLnn3nsOOc/HzJl7vu/v53vO+2Tu5HW/3+/nfL+pKiRJw+1l/W5AktR/hoEkyTCQJBkGkiQMA0kShoEkCZjd7wYma+7cubVw4cJ+tyFJLym33HLLj6tqZGz9JRsGCxcuZMOGDf1uQ5JeUpI8OF7dw0SSJMNAkmQYSJIwDCRJGAaSJAwDSRKGgSQJw0CSxEv4S2cvFQtP/Xq/W9hj/PAzv9XvFvYo/m5OrZf676d7BpIkw0CStAthkGRNkkeT3DmmfkqSHyS5K8lnu+qnJRlNcm+SY7rqy1ptNMmpXfVFSW5s9cuS7DVVH06StGt2Zc/gAmBZdyHJbwLLgbdW1cHA51p9CXACcHDb5twks5LMAr4IHAssAU5sYwHOAs6uqoOAbcDKXj+UJGn37DQMqup6YOuY8u8Cn6mqp9uYR1t9OXBpVT1dVQ8Ao8Bh7TFaVfdX1TPApcDyJAGOBK5o268Fju/tI0mSdtdkzxn8IvBr7fDOt5P8aqvPAzZ2jdvUahPVXws8XlXbx9QlSTNoslNLZwP7A0cAvwpcnuQXpqyrCSRZBawCOPDAA6f77SRpaEx2z2ATcGV13AT8EzAX2Aws6Bo3v9Umqj8G7Jdk9pj6uKpqdVUtraqlIyMvuFGPJGmSJhsGXwN+EyDJLwJ7AT8G1gEnJNk7ySJgMXATcDOwuM0c2ovOSeZ1VVXAdcB72+uuAK6aZE+SpEna6WGiJJcAvwHMTbIJOB1YA6xp002fAVa0/9jvSnI5cDewHTi5qp5tr/MR4BpgFrCmqu5qb/EJ4NIknwZuBc6fws8nSdoFOw2DqjpxglUfmGD8mcCZ49SvBq4ep34/ndlGkqQ+8RvIkiTDQJJkGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJLELoRBkjVJHm13NRu77g+TVJK5bTlJzkkymuT2JId2jV2R5L72WNFV/5Ukd7RtzkmSqfpwkqRdsyt7BhcAy8YWkywAjgZ+1FU+ls59jxcDq4Dz2tj96dwu83A6dzU7Pcmcts15wIe7tnvBe0mSptdOw6Cqrge2jrPqbODjQHXVlgMXVscNwH5JXg8cA6yvqq1VtQ1YDyxr6/atqhvaPZQvBI7v6RNJknbbpM4ZJFkObK6q749ZNQ/Y2LW8qdVerL5pnLokaQbN3t0NkuwDfJLOIaIZlWQVncNPHHjggTP99pK0x5rMnsGbgEXA95P8EJgPfC/JzwObgQVdY+e32ovV549TH1dVra6qpVW1dGRkZBKtS5LGs9thUFV3VNXrqmphVS2kc2jn0Kp6GFgHnNRmFR0BPFFVDwHXAEcnmdNOHB8NXNPWPZnkiDaL6CTgqin6bJKkXbQrU0svAb4LvDnJpiQrX2T41cD9wCjw58DvAVTVVuBTwM3tcUar0cZ8qW3zf4BvTO6jSJIma6fnDKrqxJ2sX9j1vICTJxi3BlgzTn0DcMjO+pAkTR+/gSxJMgwkSYaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSezanc7WJHk0yZ1dtT9K8oMktyf5apL9utadlmQ0yb1JjumqL2u10SSndtUXJbmx1S9LstcUfj5J0i7YlT2DC4BlY2rrgUOq6i3A3wOnASRZApwAHNy2OTfJrCSzgC8CxwJLgBPbWICzgLOr6iBgG/Bit9WUJE2DnYZBVV0PbB1T+x9Vtb0t3gDMb8+XA5dW1dNV9QCd+xof1h6jVXV/VT0DXAosTxLgSOCKtv1a4PjePpIkaXdNxTmDD/HcTeznARu71m1qtYnqrwUe7wqWHXVJ0gzqKQyS/EdgO3Dx1LSz0/dblWRDkg1btmyZibeUpKEw6TBI8m+B44Dfqapq5c3Agq5h81ttovpjwH5JZo+pj6uqVlfV0qpaOjIyMtnWJUljTCoMkiwDPg68u6qe6lq1Djghyd5JFgGLgZuAm4HFbebQXnROMq9rIXId8N62/Qrgqsl9FEnSZO3K1NJLgO8Cb06yKclK4AvAzwHrk9yW5E8Bquou4HLgbuCbwMlV9Ww7J/AR4BrgHuDyNhbgE8AfJBmlcw7h/Cn9hJKknZq9swFVdeI45Qn/w66qM4Ezx6lfDVw9Tv1+OrONJEl94jeQJUmGgSTJMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSSJXbvT2Zokjya5s6u2f5L1Se5rP+e0epKck2Q0ye1JDu3aZkUbf1+SFV31X0lyR9vmnCSZ6g8pSXpxu7JncAGwbEztVODaqloMXNuWAY6lc9/jxcAq4DzohAdwOnA4nbuanb4jQNqYD3dtN/a9JEnTbKdhUFXXA1vHlJcDa9vztcDxXfULq+MGYL8krweOAdZX1daq2gasB5a1dftW1Q1VVcCFXa8lSZohkz1ncEBVPdSePwwc0J7PAzZ2jdvUai9W3zROXZI0g3o+gdz+oq8p6GWnkqxKsiHJhi1btszEW0rSUJhsGDzSDvHQfj7a6puBBV3j5rfai9Xnj1MfV1WtrqqlVbV0ZGRkkq1LksaabBisA3bMCFoBXNVVP6nNKjoCeKIdTroGODrJnHbi+GjgmrbuySRHtFlEJ3W9liRphsze2YAklwC/AcxNsonOrKDPAJcnWQk8CLy/Db8aeCcwCjwFfBCgqrYm+RRwcxt3RlXtOCn9e3RmLL0S+EZ7SJJm0E7DoKpOnGDVUeOMLeDkCV5nDbBmnPoG4JCd9SFJmj5+A1mSZBhIkgwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkugxDJL8hyR3JbkzySVJXpFkUZIbk4wmuSzJXm3s3m15tK1f2PU6p7X6vUmO6fEzSZJ206TDIMk84PeBpVV1CDALOAE4Czi7qg4CtgEr2yYrgW2tfnYbR5IlbbuDgWXAuUlmTbYvSdLu6/Uw0WzglUlmA/sADwFHAle09WuB49vz5W2Ztv6oJGn1S6vq6ap6gM79kw/rsS9J0m6YdBhU1Wbgc8CP6ITAE8AtwONVtb0N2wTMa8/nARvbttvb+Nd218fZRpI0A3o5TDSHzl/1i4A3AK+ic5hn2iRZlWRDkg1btmyZzreSpKHSy2Gifwk8UFVbqupnwJXAO4D92mEjgPnA5vZ8M7AAoK1/DfBYd32cbZ6nqlZX1dKqWjoyMtJD65Kkbr2EwY+AI5Ls0479HwXcDVwHvLeNWQFc1Z6va8u09d+qqmr1E9pso0XAYuCmHvqSJO2m2TsfMr6qujHJFcD3gO3ArcBq4OvApUk+3Wrnt03OBy5KMgpspTODiKq6K8nldIJkO3ByVT072b4kSbtv0mEAUFWnA6ePKd/POLOBquofgfdN8DpnAmf20oskafL8BrIkyTCQJBkGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJNFjGCTZL8kVSX6Q5J4kb0+yf5L1Se5rP+e0sUlyTpLRJLcnObTrdVa08fclWTHxO0qSpkOvewafB75ZVf8MeCtwD3AqcG1VLQaubcsAx9K5v/FiYBVwHkCS/encLe1wOndIO31HgEiSZsakwyDJa4Bfp93juKqeqarHgeXA2jZsLXB8e74cuLA6bgD2S/J64BhgfVVtraptwHpg2WT7kiTtvl72DBYBW4AvJ7k1yZeSvAo4oKoeamMeBg5oz+cBG7u239RqE9UlSTOklzCYDRwKnFdVbwN+ynOHhACoqgKqh/d4niSrkmxIsmHLli1T9bKSNPR6CYNNwKaqurEtX0EnHB5ph39oPx9t6zcDC7q2n99qE9VfoKpWV9XSqlo6MjLSQ+uSpG6TDoOqehjYmOTNrXQUcDewDtgxI2gFcFV7vg44qc0qOgJ4oh1OugY4OsmcduL46FaTJM2Q2T1ufwpwcZK9gPuBD9IJmMuTrAQeBN7fxl4NvBMYBZ5qY6mqrUk+Bdzcxp1RVVt77EuStBt6CoOqug1YOs6qo8YZW8DJE7zOGmBNL71IkibPbyBLkgwDSZJhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSSJKQiDJLOS3Jrkb9ryoiQ3JhlNclm7JSZJ9m7Lo239wq7XOK3V701yTK89SZJ2z1TsGXwUuKdr+Szg7Ko6CNgGrGz1lcC2Vj+7jSPJEuAE4GBgGXBukllT0JckaRf1FAZJ5gO/BXypLQc4EriiDVkLHN+eL2/LtPVHtfHLgUur6umqegAYBQ7rpS9J0u7pdc/gvwMfB/6pLb8WeLyqtrflTcC89nwesBGgrX+ijf//9XG2eZ4kq5JsSLJhy5YtPbYuSdph0mGQ5Djg0aq6ZQr7eVFVtbqqllbV0pGRkZl6W0na483uYdt3AO9O8k7gFcC+wOeB/ZLMbn/9zwc2t/GbgQXApiSzgdcAj3XVd+jeRpI0Aya9Z1BVp1XV/KpaSOcE8Leq6neA64D3tmErgKva83Vtmbb+W1VVrX5Cm220CFgM3DTZviRJu6+XPYOJfAK4NMmngVuB81v9fOCiJKPAVjoBQlXdleRy4G5gO3ByVT07DX1JkiYwJWFQVX8L/G17fj/jzAaqqn8E3jfB9mcCZ05FL5Kk3ec3kCVJhoEkyTCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkiR7CIMmCJNcluTvJXUk+2ur7J1mf5L72c06rJ8k5SUaT3J7k0K7XWtHG35dkxUTvKUmaHr3sGWwH/rCqlgBHACcnWQKcClxbVYuBa9sywLF07m+8GFgFnAed8ABOBw6nc4e003cEiCRpZkw6DKrqoar6Xnv+E+AeYB6wHFjbhq0Fjm/PlwMXVscNwH5JXg8cA6yvqq1VtQ1YDyybbF+SpN03JecMkiwE3gbcCBxQVQ+1VQ8DB7Tn84CNXZttarWJ6uO9z6okG5Js2LJly1S0LkliCsIgyauBvwI+VlVPdq+rqgKq1/foer3VVbW0qpaOjIxM1ctK0tDrKQySvJxOEFxcVVe28iPt8A/t56OtvhlY0LX5/FabqC5JmiG9zCYKcD5wT1X9cdeqdcCOGUErgKu66ie1WUVHAE+0w0nXAEcnmdNOHB/dapKkGTK7h23fAfwb4I4kt7XaJ4HPAJcnWQk8CLy/rbsaeCcwCjwFfBCgqrYm+RRwcxt3RlVt7aEvSdJumnQYVNX/AjLB6qPGGV/AyRO81hpgzWR7kST1xm8gS5IMA0mSYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kSAxQGSZYluTfJaJJT+92PJA2TgQiDJLOALwLHAkuAE5Ms6W9XkjQ8BiIMgMOA0aq6v6qeAS4Flve5J0kaGpO+B/IUmwds7FreBBw+dlCSVcCqtvgPSe6dgd6GwVzgx/1uYmdyVr87UJ/4+zm13jhecVDCYJdU1Wpgdb/72NMk2VBVS/vdhzQefz9nxqAcJtoMLOhant9qkqQZMChhcDOwOMmiJHsBJwDr+tyTJA2NgThMVFXbk3wEuAaYBaypqrv63NYw8dCbBpm/nzMgVdXvHiRJfTYoh4kkSX1kGEiSDANJkmEgSWJAZhNp5iU5Gbi4qh5vy3OAE6vq3L42pqGV5A9ebH1V/fFM9TKM3DMYXh/eEQQAVbUN+HD/2pH4uZ08NI3cMxhes5Kk2tziduXYvfrck4ZYVf3XfvcwzAyD4fVN4LIkf9aW/32rSX2V5BXASuBg4BU76lX1ob41NQQ8TDS8PgFcB/xue1wLfLyvHUkdFwE/DxwDfJvOtcp+0teOhoDfQJY0UJLcWlVvS3J7Vb0lycuB71TVEf3ubU/mYaIhk+Tyqnp/kjuAF/wlUFVv6UNbUreftZ+PJzkEeBh4XR/7GQqGwfD5aPt5XF+7kCa2uk11/k90rl78auA/97elPZ+HiSRJ7hkMqyTvAc6is/ud9qiq2revjWnoJdkb+G1gIV3/R1XVGf3qaRgYBsPrs8C7quqefjcijXEV8ARwC/B0n3sZGobB8HrEINCAml9Vy/rdxLAxDIbXhiSXAV+j66+vqrqybx1JHX+X5Jeq6o5+NzJMPIE8pJJ8eZxy+S1P9VuSu4GDgAfo/KGy43yW056nkWEgaaAkeeN49ap6cKZ7GSaGwZDy+i8aZEneCvxaW/xOVX2/n/0MA69NNLy8/osGUpKPAhfTmfb8OuAvkpzS3672fO4ZDCmv/6JBleR24O1V9dO2/Crgu54zmF7uGQyvsdd/eQ1e/0WDIcCzXcvPtpqmkVNLh5fXf9Gg+jJwY5KvtuXjgfP7185w8DCRpIGT5FDgX7TF71TVrf3sZxgYBkNqgpuPPwHcUlW3zXA7Ekn2raonk+w/3vqq2jrTPQ0Tw2BIJflLYCnw1610HHA7nYuDfaWqPtun1jSkkvxNVR2X5AGef6+NHV86+4U+tTYUDIMhleR64J1V9Q9t+dXA14FldPYOlvSzP0kzy9lEw+t1PP+KkD8DDqiq/4tXilQfJbl2V2qaWs4mGl4X05mxcVVbfhfwl21O9939a0vDqn0rfh9gbpvptmM66b7AvL41NiQ8TDSEkoTON44PAN7Ryv+7qjb0rysNu/bN448BbwA281wYPAn8eVV9oU+tDQXDYEgluaOqfqnffUhjJTmlqv6k330MG8NgSCVZC3yhqm7udy/SWEn+OS+87eWFfWtoCBgGQyrJD4DFwA+Bn+I14zUgklwEvAm4jecuS1FV9ft9a2oIGAZDql0zfg7PXSb4euBxrxmvfktyD7Ck/M9pRjm1dHgdT+cy1nOBkfb83f1sSGrupHN5dc0g9wyGlJcJ1qBKch3wy8BNPP/+3P6xMo38nsHw8jLBGlT/pd8NDCPDYHh5mWANpKr6djuntbiq/meSfYBZ/e5rT+dhoiHmZYI1iJJ8GFgF7F9Vb0qyGPjTqjqqz63t0QwDSQMlyW3AYcCNVfW2VvNLktPM2USSBs3TVfXMjoUks3n+Ja01DQwDSYPm20k+Cbwyyb8CvsJz993QNPEwkaSBkuRlwErgaDoz3K4BvuSX0KaXYSBpoCR5D/D1qvK+GjPIw0SSBs27gL9PclGS49o5A00z9wwkDZwkLweOBf41nenP66vq3/W3qz2bYSBpILVAWAZ8EPj1qprb55b2aB4mkjRQkhyb5ALgPuC3gS/hheumnXsGkgZKkkuAy4BveBJ55hgGkiQPE0kaLEnek+S+JE8keTLJT5I82e++9nTuGUgaKElGgXdV1T397mWYuGcgadA8YhDMPPcMJA2UJJ+nM3voazz/TmdX9qunYeA3+yQNmn2Bp+hcm2iHAgyDaeSegSTJcwaSBkuS+Um+muTR9virJPP73deezjCQNGi+DKwD3tAef91qmkYeJpI0UJLcVlW/vLOappZ7BpIGzWNJPpBkVnt8AHis303t6dwzkDRQkrwR+BPg7XRmEf0dcEpVbexrY3s4w0DSQEmyFvhYVW1ry/sDn6uqD/W3sz2bh4kkDZq37AgCgKraCrytj/0MBcNA0qB5WZI5OxbanoFfkJ1m/gNLGjT/Dfhukq+05fcBZ/axn6HgOQNJAyfJEuDItvitqrq7n/0MA8NAkuQ5A0mSYSBJwjCQJGEYSJIwDCRJwP8DT8gr8aoFiX8AAAAASUVORK5CYII=\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "pd.value_counts(avocado['type']).plot.bar()" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "<AxesSubplot:>" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEvCAYAAACnuq2HAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAASaklEQVR4nO3de7BdZX3G8e9jIl4LBIlUE8ZQzbQTr9AMYm2djrQQFAzjrTi1ZjA1nRZvbWcUnGlpVTri2FLRSpsKGCgKiBfwyqSAl1ZFDkJBQMsZEUmGy9EEUKkX7K9/7DdlJ54Qztk5Z22yvp+ZPXu973rXPr9kMnnOete71k5VIUnqt0d0XYAkqXuGgSTJMJAkGQaSJAwDSRKGgSQJWNh1AbO1//7717Jly7ouQ5IeNq6++urvV9Xi6fY9bMNg2bJlTExMdF2GJD1sJLl1Z/ucJpIkGQaSJMNAkoRhIEniIYRBkrOS3JXkm0N9+yXZmOTm9r6o9SfJ6Ukmk1yX5JChY9a08TcnWTPU/5tJrm/HnJ4ku/sPKUl6cA/lzOBDwKod+k4ELquq5cBlrQ1wFLC8vdYBZ8AgPICTgecChwInbwuQNuZ1Q8ft+LMkSXNsl2FQVV8CtuzQvRrY0LY3AMcO9Z9TA18D9k3yJOBIYGNVbamqrcBGYFXbt3dVfa0Gz9I+Z+izJEnzZLbXDA6oqtvb9h3AAW17CXDb0LhNre/B+jdN0z+tJOuSTCSZmJqammXpkqQdjXzTWVVVknn5hpyqWg+sB1i5cuXYfyvPshM/03UJe5TvvuvFXZcg7bFme2ZwZ5viob3f1fo3AwcOjVva+h6sf+k0/ZKkeTTbM4NLgDXAu9r7xUP9r09yPoOLxfdU1e1JLgX+buii8RHASVW1Jcm9SQ4DrgReA7xvljVJmgHPXHevh/uZ6y7DIMlHgN8F9k+yicGqoHcBFyZZC9wKvLIN/yzwImASuA84HqD9p/8O4Ko27u1Vte2i9J8xWLH0GOBz7SVJmke7DIOqetVOdh0+zdgCTtjJ55wFnDVN/wTwjF3VIUmaO96BLEkyDCRJhoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEmMGAZJ/jzJDUm+meQjSR6d5KAkVyaZTHJBkr3a2Ee19mTbv2zoc05q/d9OcuSIfyZJ0gzNOgySLAHeCKysqmcAC4DjgFOB06rqacBWYG07ZC2wtfWf1saRZEU77unAKuADSRbMti5J0syNOk20EHhMkoXAY4HbgRcCF7X9G4Bj2/bq1qbtPzxJWv/5VfXTqroFmAQOHbEuSdIMzDoMqmoz8B7gewxC4B7gauDuqrq/DdsELGnbS4Db2rH3t/FPGO6f5hhJ0jwYZZpoEYPf6g8Cngw8jsE0z5xJsi7JRJKJqampufxRktQro0wT/R5wS1VNVdXPgY8Dzwf2bdNGAEuBzW17M3AgQNu/D/CD4f5pjtlOVa2vqpVVtXLx4sUjlC5JGjZKGHwPOCzJY9vc/+HAjcAVwMvbmDXAxW37ktam7b+8qqr1H9dWGx0ELAe+PkJdkqQZWrjrIdOrqiuTXAR8A7gfuAZYD3wGOD/JO1vfme2QM4Fzk0wCWxisIKKqbkhyIYMguR84oap+Mdu6JEkzN+swAKiqk4GTd+j+DtOsBqqqnwCv2MnnnAKcMkotkqTZ8w5kSZJhIEkyDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCQxYhgk2TfJRUm+leSmJM9Lsl+SjUlubu+L2tgkOT3JZJLrkhwy9Dlr2vibk6wZ9Q8lSZqZUc8M3gt8vqp+A3g2cBNwInBZVS0HLmttgKOA5e21DjgDIMl+wMnAc4FDgZO3BYgkaX7MOgyS7AO8ADgToKp+VlV3A6uBDW3YBuDYtr0aOKcGvgbsm+RJwJHAxqraUlVbgY3AqtnWJUmauVHODA4CpoCzk1yT5INJHgccUFW3tzF3AAe07SXAbUPHb2p9O+uXJM2TUcJgIXAIcEZVHQz8mAemhACoqgJqhJ+xnSTrkkwkmZiamtpdHytJvTdKGGwCNlXVla19EYNwuLNN/9De72r7NwMHDh2/tPXtrP+XVNX6qlpZVSsXL148QumSpGGzDoOqugO4Lcmvt67DgRuBS4BtK4LWABe37UuA17RVRYcB97TppEuBI5IsaheOj2h9kqR5snDE498AnJdkL+A7wPEMAubCJGuBW4FXtrGfBV4ETAL3tbFU1ZYk7wCuauPeXlVbRqxLkjQDI4VBVV0LrJxm1+HTjC3ghJ18zlnAWaPUIkmaPe9AliQZBpIkw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiR2QxgkWZDkmiSfbu2DklyZZDLJBUn2av2Pau3Jtn/Z0Gec1Pq/neTIUWuSJM3M7jgzeBNw01D7VOC0qnoasBVY2/rXAltb/2ltHElWAMcBTwdWAR9IsmA31CVJeohGCoMkS4EXAx9s7QAvBC5qQzYAx7bt1a1N2394G78aOL+qflpVtwCTwKGj1CVJmplRzwz+EXgL8L+t/QTg7qq6v7U3AUva9hLgNoC2/542/v/7pzlGkjQPZh0GSY4G7qqqq3djPbv6meuSTCSZmJqamq8fK0l7vFHODJ4PvCTJd4HzGUwPvRfYN8nCNmYpsLltbwYOBGj79wF+MNw/zTHbqar1VbWyqlYuXrx4hNIlScNmHQZVdVJVLa2qZQwuAF9eVX8IXAG8vA1bA1zcti9pbdr+y6uqWv9xbbXRQcBy4OuzrUuSNHMLdz1kxt4KnJ/kncA1wJmt/0zg3CSTwBYGAUJV3ZDkQuBG4H7ghKr6xRzUJUnaid0SBlX1BeALbfs7TLMaqKp+ArxiJ8efApyyO2qRJM2cdyBLkgwDSZJhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJIkRwiDJgUmuSHJjkhuSvKn175dkY5Kb2/ui1p8kpyeZTHJdkkOGPmtNG39zkjWj/7EkSTMxypnB/cBfVtUK4DDghCQrgBOBy6pqOXBZawMcBSxvr3XAGTAID+Bk4LnAocDJ2wJEkjQ/Zh0GVXV7VX2jbf8QuAlYAqwGNrRhG4Bj2/Zq4Jwa+Bqwb5InAUcCG6tqS1VtBTYCq2ZblyRp5nbLNYMky4CDgSuBA6rq9rbrDuCAtr0EuG3osE2tb2f9kqR5MnIYJHk88DHgzVV17/C+qiqgRv0ZQz9rXZKJJBNTU1O762MlqfdGCoMkj2QQBOdV1cdb951t+of2flfr3wwcOHT40ta3s/5fUlXrq2plVa1cvHjxKKVLkoaMspoowJnATVX1D0O7LgG2rQhaA1w81P+atqroMOCeNp10KXBEkkXtwvERrU+SNE8WjnDs84E/Aq5Pcm3rexvwLuDCJGuBW4FXtn2fBV4ETAL3AccDVNWWJO8Armrj3l5VW0aoS5I0Q7MOg6r6DyA72X34NOMLOGEnn3UWcNZsa5EkjcY7kCVJhoEkyTCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CSxBiFQZJVSb6dZDLJiV3XI0l9MhZhkGQB8E/AUcAK4FVJVnRblST1x1iEAXAoMFlV36mqnwHnA6s7rkmSemNh1wU0S4DbhtqbgOfuOCjJOmBda/4oybfnobY+2B/4ftdF7EpO7boCdcR/n7vPU3a2Y1zC4CGpqvXA+q7r2NMkmaiqlV3XIU3Hf5/zY1ymiTYDBw61l7Y+SdI8GJcwuApYnuSgJHsBxwGXdFyTJPXGWEwTVdX9SV4PXAosAM6qqhs6LqtPnHrTOPPf5zxIVXVdgySpY+MyTSRJ6pBhIEkyDCRJhoEkiTFZTaT5l+QE4Lyquru1FwGvqqoPdFqYeivJXzzY/qr6h/mqpY88M+iv120LAoCq2gq8rrtyJH5lFy/NIc8M+mtBklRbW9yeHLtXxzWpx6rqb7uuoc8Mg/76PHBBkn9p7T9pfVKnkjwaWAs8HXj0tv6qem1nRfWA00T99VbgCuBP2+sy4C2dViQNnAv8KnAk8EUGzyr7YacV9YB3IEsaK0muqaqDk1xXVc9K8kjgy1V1WNe17cmcJuqZJBdW1SuTXA/80m8CVfWsDsqShv28vd+d5BnAHcATO6ynFwyD/nlTez+60yqknVvfljr/FYOnFz8e+OtuS9rzOU0kSfLMoK+SvBQ4lcHpd9qrqmrvTgtT7yV5FPAyYBlD/0dV1du7qqkPDIP+ejdwTFXd1HUh0g4uBu4BrgZ+2nEtvWEY9NedBoHG1NKqWtV1EX1jGPTXRJILgE8y9NtXVX28s4qkga8keWZVXd91IX3iBeSeSnL2NN3lXZ7qWpIbgacBtzD4RWXb9SyXPc8hw0DSWEnylOn6q+rW+a6lTwyDnvL5LxpnSZ4N/E5rfrmq/qvLevrAZxP1l89/0VhK8ibgPAbLnp8I/FuSN3Rb1Z7PM4Oe8vkvGldJrgOeV1U/bu3HAV/1msHc8sygv3Z8/ss++PwXjYcAvxhq/6L1aQ65tLS/fP6LxtXZwJVJPtHaxwJndldOPzhNJGnsJDkE+O3W/HJVXdNlPX1gGPTUTr58/B7g6qq6dp7LkUiyd1Xdm2S/6fZX1Zb5rqlPDIOeSvJhYCXwqdZ1NHAdg4eDfbSq3t1RaeqpJJ+uqqOT3ML237Wx7aazX+uotF4wDHoqyZeAF1XVj1r78cBngFUMzg5WdFmfpPnlaqL+eiLbPxHy58ABVfU/+KRIdSjJZQ+lT7uXq4n66zwGKzYubu1jgA+3Nd03dleW+qrdFf9YYP+20m3bctK9gSWdFdYTThP1UJIwuOP4AOD5rfs/q2qiu6rUd+3O4zcDTwY280AY3Av8a1W9v6PSesEw6Kkk11fVM7uuQ9pRkjdU1fu6rqNvDIOeSrIBeH9VXdV1LdKOkvwWv/y1l+d0VlAPGAY9leRbwHLgu8CP8ZnxGhNJzgWeClzLA4+lqKp6Y2dF9YBh0FPtmfGLeOAxwV8C7vaZ8epakpuAFeV/TvPKpaX9dSyDx1jvDyxu2y/psiCp+SaDx6trHnlm0FM+JljjKskVwHOAr7P993P7y8oc8j6D/vIxwRpXf9N1AX1kGPSXjwnWWKqqL7ZrWsur6t+TPBZY0HVdezqniXrMxwRrHCV5HbAO2K+qnppkOfDPVXV4x6Xt0QwDSWMlybXAocCVVXVw6/MmyTnmaiJJ4+anVfWzbY0kC9n+kdaaA4aBpHHzxSRvAx6T5PeBj/LA925ojjhNJGmsJHkEsBY4gsEKt0uBD3oT2twyDCSNlSQvBT5TVX6vxjxymkjSuDkG+O8k5yY5ul0z0BzzzEDS2EnySOAo4A8YLH/eWFV/3G1VezbDQNJYaoGwCjgeeEFV7d9xSXs0p4kkjZUkRyX5EHAz8DLgg/jgujnnmYGksZLkI8AFwOe8iDx/DANJktNEksZLkpcmuTnJPUnuTfLDJPd2XdeezjMDSWMlySRwTFXd1HUtfeKZgaRxc6dBMP88M5A0VpK8l8HqoU+y/TedfbyrmvrAO/skjZu9gfsYPJtomwIMgznkmYEkyWsGksZLkqVJPpHkrvb6WJKlXde1pzMMJI2bs4FLgCe316dan+aQ00SSxkqSa6vqObvq0+7lmYGkcfODJK9OsqC9Xg38oOui9nSeGUgaK0meArwPeB6DVURfAd5QVbd1WtgezjCQNFaSbADeXFVbW3s/4D1V9dpuK9uzOU0kadw8a1sQAFTVFuDgDuvpBcNA0rh5RJJF2xrtzMAbZOeYf8GSxs3fA19N8tHWfgVwSof19ILXDCSNnSQrgBe25uVVdWOX9fSBYSBJ8pqBJMkwkCRhGEiSMAwkSRgGkiTg/wAZuUhOdJB+3AAAAABJRU5ErkJggg==\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "pd.value_counts(avocado_train['type']).plot.bar()" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<AxesSubplot:>" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEvCAYAAACpPxGtAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAVKklEQVR4nO3df7DldX3f8efL5YcmaljKleIuutRsJ7Mmuji3gDVtrY6wUA3ENBZmEimhru2A1UmmDTptMRpmNBNlYqK0GNDVoIi/wkZJ6AYZ0SYCi274KeWWH8PuIGxcfmhoSaDv/nE+Ww7r3b13d++eL57P8zFz5ny/7+/3e877zO6+7nc/5/P93lQVkqQ+PGfoBiRJk2PoS1JHDH1J6oihL0kdMfQlqSOGviR15KChG9iTI444olatWjV0G5L0Y+Wmm27666qamW/bgqGf5LnAdcChbf8vVNX5ST4J/DPg0bbrv66qLUkC/B5wCvB4q3+7vdaZwH9q+/92VW3Y03uvWrWKzZs3L9SiJGlMkvt2t20xZ/pPAK+rqh8mORj4ZpI/bdv+Q1V9YZf9TwZWt8fxwEXA8UkOB84HZoECbkqysaoe3ruPI0naVwuO6dfID9vqwe2xp8t4TwU+1Y77FnBYkqOAk4BNVbWjBf0mYN3+tS9J2huL+iI3ybIkW4CHGAX39W3TBUluTnJhkkNbbQVw/9jhW1ttd3VJ0oQsKvSr6qmqWgusBI5L8rPAu4GfAf4RcDjwm0vRUJL1STYn2bx9+/aleElJUrNXUzar6hHgWmBdVT3QhnCeAD4BHNd22wYcPXbYylbbXX3X97i4qmaranZmZt4vnyVJ+2jB0E8yk+Swtvw84A3Ad9s4PW22zmnAre2QjcBbM3IC8GhVPQBcDZyYZHmS5cCJrSZJmpDFzN45CtiQZBmjHxJXVNVXknwtyQwQYAvwb9v+VzGarjnHaMrmWQBVtSPJ+4Eb237vq6odS/ZJJEkLyrP5fvqzs7PlPH1J2jtJbqqq2fm2PauvyP1xseq8rw7dwlS59wP/YugWpKnlvXckqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFvuCZNOW8IuHSm4WaAnulLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktSRBUM/yXOT3JDkr5LcluS3Wv2YJNcnmUvyuSSHtPqhbX2ubV819lrvbvU7k5x0wD6VJGleiznTfwJ4XVW9ElgLrEtyAvBB4MKq+mngYeDstv/ZwMOtfmHbjyRrgNOBlwPrgI8lWbaEn0WStIAFQ79GfthWD26PAl4HfKHVNwCnteVT2zpt++uTpNUvr6onquoeYA44bik+hCRpcRY1pp9kWZItwEPAJuB/AY9U1ZNtl63Aira8ArgfoG1/FPh74/V5jpEkTcCiQr+qnqqqtcBKRmfnP3OgGkqyPsnmJJu3b99+oN5Gkrq0V7N3quoR4Frg1cBhSXbesG0lsK0tbwOOBmjbfwr4/nh9nmPG3+PiqpqtqtmZmZm9aU+StIDFzN6ZSXJYW34e8AbgDkbh/y/bbmcCV7bljW2dtv1rVVWtfnqb3XMMsBq4YYk+hyRpERZza+WjgA1tps1zgCuq6itJbgcuT/LbwHeAS9r+lwCfTjIH7GA0Y4equi3JFcDtwJPAOVX11NJ+HEnSniwY+lV1M3DsPPW7mWf2TVX9H+CXd/NaFwAX7H2bkqSl4BW5ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpIwuGfpKjk1yb5PYktyV5Z6u/N8m2JFva45SxY96dZC7JnUlOGquva7W5JOcdmI8kSdqdgxaxz5PAb1TVt5O8ALgpyaa27cKq+t3xnZOsAU4HXg68GPjzJP+wbf4o8AZgK3Bjko1VdftSfBBJ0sIWDP2qegB4oC3/IMkdwIo9HHIqcHlVPQHck2QOOK5tm6uquwGSXN72NfQlaUL2akw/ySrgWOD6Vjo3yc1JLk2yvNVWAPePHba11XZX3/U91ifZnGTz9u3b96Y9SdICFh36SZ4PfBF4V1U9BlwEvAxYy+h/Ah9aioaq6uKqmq2q2ZmZmaV4SUlSs5gxfZIczCjwL6uqLwFU1YNj2z8OfKWtbgOOHjt8Zauxh7okaQIWM3snwCXAHVX14bH6UWO7/SJwa1veCJye5NAkxwCrgRuAG4HVSY5JcgijL3s3Ls3HkCQtxmLO9F8D/CpwS5ItrfYe4Iwka4EC7gXeDlBVtyW5gtEXtE8C51TVUwBJzgWuBpYBl1bVbUv2SSRJC1rM7J1vApln01V7OOYC4IJ56lft6ThJ0oHlFbmS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHVkw9JMcneTaJLcnuS3JO1v98CSbktzVnpe3epJ8JMlckpuTvGrstc5s+9+V5MwD97EkSfNZzJn+k8BvVNUa4ATgnCRrgPOAa6pqNXBNWwc4GVjdHuuBi2D0QwI4HzgeOA44f+cPCknSZCwY+lX1QFV9uy3/ALgDWAGcCmxou20ATmvLpwKfqpFvAYclOQo4CdhUVTuq6mFgE7BuKT+MJGnP9mpMP8kq4FjgeuDIqnqgbfoecGRbXgHcP3bY1lbbXV2SNCGLDv0kzwe+CLyrqh4b31ZVBdRSNJRkfZLNSTZv3759KV5SktQsKvSTHMwo8C+rqi+18oNt2Ib2/FCrbwOOHjt8Zavtrv4MVXVxVc1W1ezMzMzefBZJ0gIWM3snwCXAHVX14bFNG4GdM3DOBK4cq7+1zeI5AXi0DQNdDZyYZHn7AvfEVpMkTchBi9jnNcCvArck2dJq7wE+AFyR5GzgPuAtbdtVwCnAHPA4cBZAVe1I8n7gxrbf+6pqx1J8CEnS4iwY+lX1TSC72fz6efYv4JzdvNalwKV706Akael4Ra4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRxYM/SSXJnkoya1jtfcm2ZZkS3ucMrbt3UnmktyZ5KSx+rpWm0ty3tJ/FEnSQhZzpv9JYN089Quram17XAWQZA1wOvDydszHkixLsgz4KHAysAY4o+0rSZqggxbaoaquS7Jqka93KnB5VT0B3JNkDjiubZurqrsBklze9r1971uWJO2r/RnTPzfJzW34Z3mrrQDuH9tna6vtri5JmqB9Df2LgJcBa4EHgA8tVUNJ1ifZnGTz9u3bl+plJUnsY+hX1YNV9VRV/V/g4zw9hLMNOHps15Wttrv6fK99cVXNVtXszMzMvrQnSdqNfQr9JEeNrf4isHNmz0bg9CSHJjkGWA3cANwIrE5yTJJDGH3Zu3Hf25Yk7YsFv8hN8lngtcARSbYC5wOvTbIWKOBe4O0AVXVbkisYfUH7JHBOVT3VXudc4GpgGXBpVd221B9GkrRni5m9c8Y85Uv2sP8FwAXz1K8Crtqr7iRJS8orciWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcWDP0klyZ5KMmtY7XDk2xKcld7Xt7qSfKRJHNJbk7yqrFjzmz735XkzAPzcSRJe7KYM/1PAut2qZ0HXFNVq4Fr2jrAycDq9lgPXASjHxLA+cDxwHHA+Tt/UEiSJmfB0K+q64Adu5RPBTa05Q3AaWP1T9XIt4DDkhwFnARsqqodVfUwsIkf/UEiSTrA9nVM/8iqeqAtfw84si2vAO4f229rq+2uLkmaoP3+IreqCqgl6AWAJOuTbE6yefv27Uv1spIk9j30H2zDNrTnh1p9G3D02H4rW2139R9RVRdX1WxVzc7MzOxje5Kk+exr6G8Eds7AORO4cqz+1jaL5wTg0TYMdDVwYpLl7QvcE1tNkjRBBy20Q5LPAq8FjkiyldEsnA8AVyQ5G7gPeEvb/SrgFGAOeBw4C6CqdiR5P3Bj2+99VbXrl8OSpANswdCvqjN2s+n18+xbwDm7eZ1LgUv3qjtJ0pLyilxJ6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0JekjuxX6Ce5N8ktSbYk2dxqhyfZlOSu9ry81ZPkI0nmktyc5FVL8QEkSYu3FGf6/7yq1lbVbFs/D7imqlYD17R1gJOB1e2xHrhoCd5bkrQXDsTwzqnAhra8AThtrP6pGvkWcFiSow7A+0uSdmN/Q7+A/57kpiTrW+3IqnqgLX8POLItrwDuHzt2a6tJkibkoP08/ueraluSFwGbknx3fGNVVZLamxdsPzzWA7zkJS/Zz/YkSeP260y/qra154eALwPHAQ/uHLZpzw+13bcBR48dvrLVdn3Ni6tqtqpmZ2Zm9qc9SdIu9jn0k/xkkhfsXAZOBG4FNgJntt3OBK5syxuBt7ZZPCcAj44NA0mSJmB/hneOBL6cZOfrfKaq/izJjcAVSc4G7gPe0va/CjgFmAMeB87aj/eWJO2DfQ79qrobeOU89e8Dr5+nXsA5+/p+kqT95xW5ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpIxMP/STrktyZZC7JeZN+f0nq2URDP8ky4KPAycAa4IwkaybZgyT1bNJn+scBc1V1d1X9LXA5cOqEe5Ckbh004fdbAdw/tr4VOH58hyTrgfVt9YdJ7pxQbz04AvjroZtYSD44dAcayLP+7+eP0d/Nl+5uw6RDf0FVdTFw8dB9TKMkm6tqdug+pPn493MyJj28sw04emx9ZatJkiZg0qF/I7A6yTFJDgFOBzZOuAdJ6tZEh3eq6skk5wJXA8uAS6vqtkn20DmHzfRs5t/PCUhVDd2DJGlCvCJXkjpi6EtSRwx9SeqIoS9JHXnWXZylpZHk1/e0vao+PKlepPkkOQe4rKoeaevLgTOq6mODNjblPNOfXi9Y4CEN7W07Ax+gqh4G3jZcO33wTH9KVdVvDd2DtIBlSVJt3ni7C+8hA/c09Qz9KZfkucDZwMuB5+6sV9WvDdaUNPJnwOeS/Le2/vZW0wHk8M70+zTw94GTgK8zut/RDwbtSBr5TeBa4N+1xzXAfxy0ow54Re6US/Kdqjo2yc1V9YokBwPfqKoThu5N0uQ5vDP9/q49P5LkZ4HvAS8asB91LskVVfWWJLcAP3LWWVWvGKCtbhj60+/iNhXuPzO6o+nzgf8ybEvq3Dvb8xsH7aJTDu9IUkc8059ySQ4FfglYxdifd1W9b6ieJIAkbwY+yGi4Me1RVfXCQRubcob+9LsSeBS4CXhi4F6kcb8DvKmq7hi6kZ4Y+tNvZVWtG7oJaR4PGviTZ+hPv79I8nNVdcvQjUi72Jzkc8AfM/a/0Kr60mAddcAvcqdcktuBnwbuYfQPa+e4qdPiNKgkn5inXF4tfmAZ+lMuyUvnq1fVfZPuRdLwDP0OJHkl8E/a6jeq6q+G7EcC7ws1FO+9M+WSvBO4jNG0uBcBf5TkHcN2JQHeF2oQnulPuSQ3A6+uqr9p6z8J/KVj+hqa94Uahmf60y/AU2PrT7WaNLRd7wv1U3hfqAPOKZvT7xPA9Um+3NZPAy4Zrh3p//O+UANweKcDSV4F/Hxb/UZVfWfIfiQNx9CfUkleWFWPJTl8vu1VtWPSPUnjkvz6POVHgZuqasuE2+mGoT+lknylqt6Y5B6eec/ynRdn/YOBWpMASPIZYBb4k1Z6I3Azo5sDfr6qfmeg1qaaoS9pEEmuA06pqh+29ecDXwXWMTrbXzNkf9PK2TtTLsk1i6lJA3gRz7zz698BR1bV/8Y7wh4wzt6ZUu1qx58AjmgzJHZO03whsGKwxqSnXcZoZtmVbf1NwGfatSS3D9fWdHN4Z0q1K3HfBbwY2MbTof8Y8PGq+oOBWpNIEkZX4B4JvKaV/0dVbR6uqz4Y+lMuyTuq6veH7kPaVZJbqurnhu6jN4Z+B5L8Y3701yV+arCGJCDJBuAPqurGoXvpiaE/5ZJ8GngZsIWnb8dQVfXvB2tKApJ8F1gN3Av8Df6uh4kw9KdckjuANeUftJ5l2u96WM7Tt/2+DnjE3/VwYDllc/rdyuj2tdKzzWmMbq98BDDTln9hyIZ64Jn+lEtyLbAWuIFn/h5S/3FpUN72exjO059+7x26AWk3vO33AAz9KVdVX29jp6ur6s+T/ASwbOi+JLzt9yAc3plySd4GrAcOr6qXJVkN/Neqev3ArUne9nsAhv6US7IFOA64vqqObTUvipE65eyd6fdEVf3tzpUkB/HMWy1L6oihP/2+nuQ9wPOSvAH4PE/fv1xSZxzemXJJngOcDZzIaGbE1cAferGW1CdDf8oleTPw1ary/uSSHN7pwJuA/5nk00ne2Mb0JXXKM/0OJDkYOBn4V4ymx22qqn8zbFeShmDod6IF/zrgLOCfVtURA7ckaQAO70y5JCcn+SRwF/BLwB/iDdikbnmmP+WSfBb4HPCnfpkrydCXpI44vDPlkrw5yV1JHk3yWJIfJHls6L4kDcMz/SmXZA54U1XdMXQvkobnmf70e9DAl7STZ/pTLsnvMZqt88c88zdnfWmoniQNx6szp98LgccZ3XtnpwIMfalDnulLUkcc059ySVYm+XKSh9rji0lWDt2XpGEY+tPvE8BG4MXt8SetJqlDDu9MuSRbqmrtQjVJffBMf/p9P8mvJFnWHr8CfH/opiQNwzP9KZfkpcDvA69mNGvnL4B3VNX9gzYmaRCG/pRLsgF4V1U93NYPB363qn5t2M4kDcHhnen3ip2BD1BVO4BjB+xH0oAM/en3nCTLd660M30vypM65T/+6fch4C+TfL6t/zJwwYD9SBqQY/odSLIGeF1b/VpV3T5kP5KGY+hLUkcc05ekjhj6ktQRQ1+SOmLoS1JHDH1J6sj/A+qwMSDo1Gd2AAAAAElFTkSuQmCC\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "pd.value_counts(avocado_test['type']).plot.bar()" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "<AxesSubplot:>" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAASkElEQVR4nO3df6zddX3H8ecbSikKoyjkDkrdrZEw0I4NbxBDYq6yQYXFkgxNA9HWYLo4RFy6zGKy1apETFSGbNM0lq0asHRopAOca4ATsz+o/BCtUBl3UKAdilKoVgF39b0/zufitd7be27v+XHP/TwfyUm/Pz7n+/18+j33db7n8/2ez4nMRJJUh8N6XQFJUvcY+pJUEUNfkipi6EtSRQx9SarIvF5X4GCOP/74HBwcbPt2f/7zn/PKV76y7dvtNdvVf+Zq22xXb91///0/ycwTJlo3q0N/cHCQ++67r+3bbTQaDA8Pt327vWa7+s9cbZvt6q2IeGKydXbvSFJFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRWb1N3I1PTv27GPV2tt7su9d11zYk/1Kmh7P9CWpIoa+JFXE0Jekihj6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxGEY1BaDHRz+Yc3S0UmHl3D4B2l6PNOXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkUMfUmqSEuhHxF/HREPRcT3I+IrEbEgIpZExPaIGImImyNifil7ZJkfKesHx23nqrL8kYg4v0NtkiRNYsrQj4hFwAeBocx8A3A4sAL4FHBtZr4OeA64rDzlMuC5svzaUo6IOL087/XAMuCfI+Lw9jZHknQwrXbvzAOOioh5wCuAp4G3AbeU9ZuAi8r08jJPWX9uRERZvjkzX8rMx4ER4KwZt0CS1LIpf0QlM/dExKeBJ4EXgP8E7geez8zRUmw3sKhMLwKeKs8djYh9wKvL8nvGbXr8c14WEauB1QADAwM0Go3pt2oK+/fv78h2e23gqOYPjsw1B2tXvx/HufpatF2z15ShHxHH0TxLXwI8D/wbze6ZjsjMDcAGgKGhoRweHm77PhqNBp3Ybq9df+OtfGbH3PsxtDVLRydt165Lh7tbmTabq69F2zV7tdK986fA45n548z8P+BrwDnAwtLdA3AysKdM7wEWA5T1xwLPjl8+wXMkSV3QSug/CZwdEa8offPnAg8DdwMXlzIrgVvL9NYyT1l/V2ZmWb6i3N2zBDgF+HZ7miFJakUrffrbI+IW4AFgFPgOze6X24HNEfGJsmxjecpG4MsRMQLspXnHDpn5UERsofmGMQpcnpm/anN7JEkH0VIHcGauA9YdsPgxJrj7JjNfBN45yXauBq6eZh0lSW3iN3IlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkUMfUmqiKEvSRUx9CWpIoa+JFXE0Jekihj6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKtBT6EbEwIm6JiB9ExM6IeHNEvCoitkXEo+Xf40rZiIjPRcRIRHwvIs4ct52VpfyjEbGyU42SJE2s1TP964D/yMw/BM4AdgJrgTsz8xTgzjIP8HbglPJYDXweICJeBawD3gScBawbe6OQJHXHlKEfEccCbwE2AmTmLzPzeWA5sKkU2wRcVKaXA1/KpnuAhRFxInA+sC0z92bmc8A2YFkb2yJJmsK8FsosAX4M/EtEnAHcD1wJDGTm06XMD4GBMr0IeGrc83eXZZMt/y0RsZrmJwQGBgZoNBqttqVl+/fv78h2e23gKFizdLTX1Wi7g7Wr34/jXH0t2q7Zq5XQnwecCVyRmdsj4jp+05UDQGZmRGQ7KpSZG4ANAENDQzk8PNyOzf6WRqNBJ7bba9ffeCuf2dHKIe0va5aOTtquXZcOd7cybTZXX4u2a/ZqpU9/N7A7M7eX+Vtovgn8qHTbUP59pqzfAywe9/yTy7LJlkuSumTK0M/MHwJPRcSpZdG5wMPAVmDsDpyVwK1leivwnnIXz9nAvtIN9E3gvIg4rlzAPa8skyR1Sat9AVcAN0bEfOAx4L003zC2RMRlwBPAu0rZO4ALgBHgF6Usmbk3Ij4O3FvKfSwz97alFZKklrQU+pn5IDA0wapzJyibwOWTbOcG4IZp1E86qMG1t/ds37uuubBn+5YOld/IlaSKGPqSVBFDX5IqYuhLUkUMfUmqiKEvSRUx9CWpIoa+JFXE0Jekisy9IRkrtvSwx9m1YF3X9zv44k1d36ekQ+OZviRVxNCXpIoY+pJUEUNfkirihVzN2K4Fl3R0+43D1v/OBWovHkuHxjN9SaqIoS9JFTH0Jakihr4kVcQLuR3Qq99t/dczerJbSX3EM31JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkVaDv2IODwivhMRt5X5JRGxPSJGIuLmiJhflh9Z5kfK+sFx27iqLH8kIs5ve2skSQc1nTP9K4Gd4+Y/BVybma8DngMuK8svA54ry68t5YiI04EVwOuBZcA/R8ThM6u+JGk6Wgr9iDgZuBD4YpkP4G3ALaXIJuCiMr28zFPWn1vKLwc2Z+ZLmfk4MAKc1YY2SJJa1Opv5P4D8LfAMWX+1cDzmTla5ncDi8r0IuApgMwcjYh9pfwi4J5x2xz/nJdFxGpgNcDAwACNRqPFKrZu//79HdnumDVLR6cu1AH7jzyJxqnre7LvTpqoXWt+3Zv/4/Ha8Rrq9GuxV2zX7DVl6EfEnwPPZOb9ETHc6Qpl5gZgA8DQ0FAOD7d/l41Gg05sd8yqnv0w+uMMP7KuJ/vupMap63+nXatevKlHtfmNXZcOz3gbnX4t9ortmr1aOdM/B3hHRFwALAB+D7gOWBgR88rZ/snAnlJ+D7AY2B0R84BjgWfHLR8z/jmSpC6Ysk8/M6/KzJMzc5Dmhdi7MvNS4G7g4lJsJXBrmd5a5inr78rMLMtXlLt7lgCnAN9uW0skSVNqtU9/Ih8GNkfEJ4DvABvL8o3AlyNiBNhL842CzHwoIrYADwOjwOWZ+asZ7F+SNE3TCv3MbACNMv0YE9x9k5kvAu+c5PlXA1dPt5KSpPbwG7mSVBFDX5IqYuhLUkUMfUmqiKEvSRWZyS2bUs/sWnBJ1/c5OAu+BSzNlGf6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0JakiDq3cIb0Y+rfB+q7vU1J/8Uxfkipi6EtSRQx9SaqIoS9JFfFCrnSIBtfePuNtrFk6yqppbmfXNRfOeL+ql2f6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkioyZehHxOKIuDsiHo6IhyLiyrL8VRGxLSIeLf8eV5ZHRHwuIkYi4nsRcea4ba0s5R+NiJWda5YkaSKtnOmPAmsy83TgbODyiDgdWAvcmZmnAHeWeYC3A6eUx2rg89B8kwDWAW8CzgLWjb1RSJK6Y8rQz8ynM/OBMv0zYCewCFgObCrFNgEXlenlwJey6R5gYUScCJwPbMvMvZn5HLANWNbOxkiSDi4ys/XCEYPAt4A3AE9m5sKyPIDnMnNhRNwGXJOZ/1XW3Ql8GBgGFmTmJ8ryvwNeyMxPH7CP1TQ/ITAwMPDGzZs3z6R9E9q/fz9HH31027c7ZseefSw97PGObX8y+488iaNf+t+u77fTZku7dvx6Sdu3OXAU/OiF6T1n6aJj216Pduv031iv9Eu73vrWt96fmUMTrWt5aOWIOBr4KvChzPxpM+ebMjMjovV3j4PIzA3ABoChoaEcHh5ux2Z/S6PRoBPbHbNq7e3sWrCuY9ufTOPU9Qw/0v39dtpsadeqF29q+zbXLB3lMzumN8L5rkuH216Pduv031ivzIV2tXT3TkQcQTPwb8zMr5XFPyrdNpR/nynL9wCLxz395LJssuWSpC5p5e6dADYCOzPzs+NWbQXG7sBZCdw6bvl7yl08ZwP7MvNp4JvAeRFxXLmAe15ZJknqklY+V54DvBvYEREPlmUfAa4BtkTEZcATwLvKujuAC4AR4BfAewEyc29EfBy4t5T7WGbubUcj1PTw/PlcseQ1Pdn3jsef7Ml+JU3PlKFfLsjGJKvPnaB8ApdPsq0bgBumU0H1h6UdfLN5/0HezHyzkabH38iVWrRrwSVt32bjsPVTXvQf7MAFZNXL0J8Ddm4+CYDXnpBsuX604/t711W+bKR+5dg7klQRQ1+SKuLndE3blk92vgtpjF1JUnt5pi9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkW8CVqz2pZPjvLEFZMPL7GTk9q2r9NW9P7XuaROM/SlYrojhTrCp/qR3TuSVBFDX5IqYuhLUkXs0++AY05by1K697OFW+jeAGiS+ptn+pJUEc/0O6Sbww+rLoNrb+/Zvnddc2HP9q328Exfkipi6EtSRQx9SaqIffpSMd3rMDMdAsJhH9QLnulLUkUMfUmqiN070iy3a8ElXd/n4Is3dX2f6o45HfqT3c+8Zukoqzp4r/Mxp3Vs05pDli55De+fP58rHN1TXWT3jiRVxNCXpIoY+pJUkTndpz/mwAthjcPWs2vBuo7tr5sjbErSdHimL0kVqeJMX5qNpvrR98kcyjeB/favxhj6klrW6rDO7b4t2iGd28fQlyqwdJrfBTiGtRMub/U7Aod63cwvhXVe10M/IpYB1wGHA1/MzGs6ta9jTmu+cMcurI4NqPXiFUewc/PMBss6GH++ULNNu37Up5WuJbuSZreuhn5EHA78E/BnwG7g3ojYmpkPd7MekmanSYec+Gjn9nmwTxdzsVup22f6ZwEjmfkYQERsBpYDhr40R+zcfFLHP0230zf4m0nX7fz6AevW/SV8dPnMd/rRfTPfxiGKzOzeziIuBpZl5vvK/LuBN2XmB8aVWQ2sLrOnAo90oCrHAz/pwHZ7zXb1n7naNtvVW3+QmSdMtGLWXcjNzA3Ahk7uIyLuy8yhTu6jF2xX/5mrbbNds1e3v5y1B1g8bv7kskyS1AXdDv17gVMiYklEzAdWAFu7XAdJqlZXu3cyczQiPgB8k+Ytmzdk5kPdrEPR0e6jHrJd/Weuts12zVJdvZArSeotB1yTpIoY+pJUkTkd+hGxLCIeiYiRiPidwUQiYlVE/DgiHiyP9/WintMRETdExDMR8f1J1kdEfK60+XsRcWa363goWmjXcETsG3es/r7bdTwUEbE4Iu6OiIcj4qGIuHKCMn13zFpsV78eswUR8e2I+G5p2/oJyhwZETeXY7Y9IgZ7UNVDk5lz8kHzQvH/AK8F5gPfBU4/oMwq4B97XddptustwJnA9ydZfwHwDSCAs4Htva5zm9o1DNzW63oeQrtOBM4s08cA/z3B67DvjlmL7erXYxbA0WX6CGA7cPYBZf4K+EKZXgHc3Ot6t/qYy2f6Lw/5kJm/BMaGfOhrmfktYO9BiiwHvpRN9wALI+LE7tTu0LXQrr6UmU9n5gNl+mfATmDRAcX67pi12K6+VI7D/jJ7RHkceMfLcmBTmb4FODcioktVnJG5HPqLgKfGze9m4hflX5SP1LdExOIJ1vebVtvdj95cPnJ/IyJe3+vKTFfpAvgTmmeO4/X1MTtIu6BPj1lEHB4RDwLPANsyc9JjlpmjwD7g1V2t5CGay6Hfin8HBjPzj4Bt/OadW7PPAzTHEzkDuB74em+rMz0RcTTwVeBDmfnTXtenXaZoV98es8z8VWb+Mc1RA86KiDf0uEptM5dDf8ohHzLz2cx8qcx+EXhjl+rWSXNyqIvM/OnYR+7MvAM4IiKO73G1WhIRR9AMxhsz82sTFOnLYzZVu/r5mI3JzOeBu4FlB6x6+ZhFxDzgWODZrlbuEM3l0J9yyIcD+k3fQbNfst9tBd5T7gg5G9iXmU/3ulIzFRG/P9ZnGhFn0Xztzvo/slLnjcDOzPzsJMX67pi10q4+PmYnRMTCMn0Uzd//+MEBxbYCK8v0xcBdWa7qznazbpTNdslJhnyIiI8B92XmVuCDEfEOYJTmRcRVPatwiyLiKzTvijg+InYD62heaCIzvwDcQfNukBHgF8B7e1PT6WmhXRcD74+IUeAFYEWf/JGdA7wb2FH6iAE+As2fc+vjY9ZKu/r1mJ0IbIrmjz4dBmzJzNsOyI6NwJcjYoRmdqzoXXWnx2EYJKkic7l7R5J0AENfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVeT/AdHBg3mBM54VAAAAAElFTkSuQmCC\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "avocado['average_price'].hist()\n", "avocado_train['average_price'].hist()\n", "avocado_validate['average_price'].hist()\n", "avocado_test['average_price'].hist()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Normalizacja wartości." ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>date</th>\n", " <th>average_price</th>\n", " <th>total_volume</th>\n", " <th>4046</th>\n", " <th>4225</th>\n", " <th>4770</th>\n", " <th>total_bags</th>\n", " <th>small_bags</th>\n", " <th>large_bags</th>\n", " <th>xlarge_bags</th>\n", " <th>type</th>\n", " <th>geography</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>2015-01-04</td>\n", " <td>0.277580</td>\n", " <td>0.000640</td>\n", " <td>0.000124</td>\n", " <td>0.001382</td>\n", " <td>0.000020</td>\n", " <td>0.000307</td>\n", " <td>0.000447</td>\n", " <td>0.000040</td>\n", " <td>0.000000</td>\n", " <td>conventional</td>\n", " <td>Albany</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2015-01-04</td>\n", " <td>0.480427</td>\n", " <td>0.000020</td>\n", " <td>0.000003</td>\n", " <td>0.000008</td>\n", " <td>0.000000</td>\n", " <td>0.000037</td>\n", " <td>0.000057</td>\n", " <td>0.000000</td>\n", " <td>0.000000</td>\n", " <td>organic</td>\n", " <td>Albany</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>2015-01-04</td>\n", " <td>0.199288</td>\n", " <td>0.006826</td>\n", " <td>0.016018</td>\n", " <td>0.001164</td>\n", " <td>0.000032</td>\n", " <td>0.001477</td>\n", " <td>0.000813</td>\n", " <td>0.002259</td>\n", " <td>0.000000</td>\n", " <td>conventional</td>\n", " <td>Atlanta</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>2015-01-04</td>\n", " <td>0.469751</td>\n", " <td>0.000059</td>\n", " <td>0.000066</td>\n", " <td>0.000046</td>\n", " <td>0.000000</td>\n", " <td>0.000044</td>\n", " <td>0.000052</td>\n", " <td>0.000025</td>\n", " <td>0.000000</td>\n", " <td>organic</td>\n", " <td>Atlanta</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>2015-01-04</td>\n", " <td>0.227758</td>\n", " <td>0.012366</td>\n", " <td>0.002374</td>\n", " <td>0.027010</td>\n", " <td>0.015706</td>\n", " <td>0.004454</td>\n", " <td>0.006674</td>\n", " <td>0.000299</td>\n", " <td>0.000000</td>\n", " <td>conventional</td>\n", " <td>Baltimore/Washington</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>33040</th>\n", " <td>2020-11-29</td>\n", " <td>0.366548</td>\n", " <td>0.024844</td>\n", " <td>0.002970</td>\n", " <td>0.004787</td>\n", " <td>0.001028</td>\n", " <td>0.044649</td>\n", " <td>0.044121</td>\n", " <td>0.036030</td>\n", " <td>0.019937</td>\n", " <td>organic</td>\n", " <td>Total U.S.</td>\n", " </tr>\n", " <tr>\n", " <th>33041</th>\n", " <td>2020-11-29</td>\n", " <td>0.167260</td>\n", " <td>0.091202</td>\n", " <td>0.059484</td>\n", " <td>0.028776</td>\n", " <td>0.007753</td>\n", " <td>0.119620</td>\n", " <td>0.106938</td>\n", " <td>0.114914</td>\n", " <td>0.043846</td>\n", " <td>conventional</td>\n", " <td>West</td>\n", " </tr>\n", " <tr>\n", " <th>33042</th>\n", " <td>2020-11-29</td>\n", " <td>0.370107</td>\n", " <td>0.004550</td>\n", " <td>0.000584</td>\n", " <td>0.000945</td>\n", " <td>0.000250</td>\n", " <td>0.008101</td>\n", " <td>0.005966</td>\n", " <td>0.010062</td>\n", " <td>0.000000</td>\n", " <td>organic</td>\n", " <td>West</td>\n", " </tr>\n", " <tr>\n", " <th>33043</th>\n", " <td>2020-11-29</td>\n", " <td>0.081851</td>\n", " <td>0.012913</td>\n", " <td>0.010319</td>\n", " <td>0.003918</td>\n", " <td>0.004141</td>\n", " <td>0.015696</td>\n", " <td>0.013906</td>\n", " <td>0.015817</td>\n", " <td>0.000577</td>\n", " <td>conventional</td>\n", " <td>West Tex/New Mexico</td>\n", " </tr>\n", " <tr>\n", " <th>33044</th>\n", " <td>2020-11-29</td>\n", " <td>0.323843</td>\n", " <td>0.000377</td>\n", " <td>0.000054</td>\n", " <td>0.000030</td>\n", " <td>0.000615</td>\n", " <td>0.000653</td>\n", " <td>0.000867</td>\n", " <td>0.000215</td>\n", " <td>0.000000</td>\n", " <td>organic</td>\n", " <td>West Tex/New Mexico</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>33045 rows × 12 columns</p>\n", "</div>" ], "text/plain": [ " date average_price total_volume 4046 4225 4770 \\\n", "0 2015-01-04 0.277580 0.000640 0.000124 0.001382 0.000020 \n", "1 2015-01-04 0.480427 0.000020 0.000003 0.000008 0.000000 \n", "2 2015-01-04 0.199288 0.006826 0.016018 0.001164 0.000032 \n", "3 2015-01-04 0.469751 0.000059 0.000066 0.000046 0.000000 \n", "4 2015-01-04 0.227758 0.012366 0.002374 0.027010 0.015706 \n", "... ... ... ... ... ... ... \n", "33040 2020-11-29 0.366548 0.024844 0.002970 0.004787 0.001028 \n", "33041 2020-11-29 0.167260 0.091202 0.059484 0.028776 0.007753 \n", "33042 2020-11-29 0.370107 0.004550 0.000584 0.000945 0.000250 \n", "33043 2020-11-29 0.081851 0.012913 0.010319 0.003918 0.004141 \n", "33044 2020-11-29 0.323843 0.000377 0.000054 0.000030 0.000615 \n", "\n", " total_bags small_bags large_bags xlarge_bags type \\\n", "0 0.000307 0.000447 0.000040 0.000000 conventional \n", "1 0.000037 0.000057 0.000000 0.000000 organic \n", "2 0.001477 0.000813 0.002259 0.000000 conventional \n", "3 0.000044 0.000052 0.000025 0.000000 organic \n", "4 0.004454 0.006674 0.000299 0.000000 conventional \n", "... ... ... ... ... ... \n", "33040 0.044649 0.044121 0.036030 0.019937 organic \n", "33041 0.119620 0.106938 0.114914 0.043846 conventional \n", "33042 0.008101 0.005966 0.010062 0.000000 organic \n", "33043 0.015696 0.013906 0.015817 0.000577 conventional \n", "33044 0.000653 0.000867 0.000215 0.000000 organic \n", "\n", " geography \n", "0 Albany \n", "1 Albany \n", "2 Atlanta \n", "3 Atlanta \n", "4 Baltimore/Washington \n", "... ... \n", "33040 Total U.S. \n", "33041 West \n", "33042 West \n", "33043 West Tex/New Mexico \n", "33044 West Tex/New Mexico \n", "\n", "[33045 rows x 12 columns]" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# według https://www.journaldev.com/45109/normalize-data-in-python\n", "from sklearn import preprocessing\n", "\n", "num_values = avocado.select_dtypes(include='float64').values\n", "scaler = preprocessing.MinMaxScaler()\n", "x_scaled = scaler.fit_transform(num_values)\n", "num_columns = avocado.select_dtypes(include='float64').columns\n", "avocado_normalized = pd.DataFrame(x_scaled, columns=num_columns)\n", "for col in avocado.columns:\n", " if col in num_columns: \n", " avocado[col] = avocado_normalized[col]\n", " \n", "avocado" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Usunięcie artefaktów." ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "date 0\n", "average_price 0\n", "total_volume 0\n", "4046 0\n", "4225 0\n", "4770 0\n", "total_bags 0\n", "small_bags 0\n", "large_bags 0\n", "xlarge_bags 0\n", "type 0\n", "geography 0\n", "dtype: int64" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "avocado.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>date</th>\n", " <th>average_price</th>\n", " <th>total_volume</th>\n", " <th>4046</th>\n", " <th>4225</th>\n", " <th>4770</th>\n", " <th>total_bags</th>\n", " <th>small_bags</th>\n", " <th>large_bags</th>\n", " <th>xlarge_bags</th>\n", " <th>type</th>\n", " <th>geography</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>2015-01-04</td>\n", " <td>0.277580</td>\n", " <td>0.000640</td>\n", " <td>0.000124</td>\n", " <td>0.001382</td>\n", " <td>0.000020</td>\n", " <td>0.000307</td>\n", " <td>0.000447</td>\n", " <td>0.000040</td>\n", " <td>0.000000</td>\n", " <td>conventional</td>\n", " <td>Albany</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2015-01-04</td>\n", " <td>0.480427</td>\n", " <td>0.000020</td>\n", " <td>0.000003</td>\n", " <td>0.000008</td>\n", " <td>0.000000</td>\n", " <td>0.000037</td>\n", " <td>0.000057</td>\n", " <td>0.000000</td>\n", " <td>0.000000</td>\n", " <td>organic</td>\n", " <td>Albany</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>2015-01-04</td>\n", " <td>0.199288</td>\n", " <td>0.006826</td>\n", " <td>0.016018</td>\n", " <td>0.001164</td>\n", " <td>0.000032</td>\n", " <td>0.001477</td>\n", " <td>0.000813</td>\n", " <td>0.002259</td>\n", " <td>0.000000</td>\n", " <td>conventional</td>\n", " <td>Atlanta</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>2015-01-04</td>\n", " <td>0.469751</td>\n", " <td>0.000059</td>\n", " <td>0.000066</td>\n", " <td>0.000046</td>\n", " <td>0.000000</td>\n", " <td>0.000044</td>\n", " <td>0.000052</td>\n", " <td>0.000025</td>\n", " <td>0.000000</td>\n", " <td>organic</td>\n", " <td>Atlanta</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>2015-01-04</td>\n", " <td>0.227758</td>\n", " <td>0.012366</td>\n", " <td>0.002374</td>\n", " <td>0.027010</td>\n", " <td>0.015706</td>\n", " <td>0.004454</td>\n", " <td>0.006674</td>\n", " <td>0.000299</td>\n", " <td>0.000000</td>\n", " <td>conventional</td>\n", " <td>Baltimore/Washington</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>33040</th>\n", " <td>2020-11-29</td>\n", " <td>0.366548</td>\n", " <td>0.024844</td>\n", " <td>0.002970</td>\n", " <td>0.004787</td>\n", " <td>0.001028</td>\n", " <td>0.044649</td>\n", " <td>0.044121</td>\n", " <td>0.036030</td>\n", " <td>0.019937</td>\n", " <td>organic</td>\n", " <td>Total U.S.</td>\n", " </tr>\n", " <tr>\n", " <th>33041</th>\n", " <td>2020-11-29</td>\n", " <td>0.167260</td>\n", " <td>0.091202</td>\n", " <td>0.059484</td>\n", " <td>0.028776</td>\n", " <td>0.007753</td>\n", " <td>0.119620</td>\n", " <td>0.106938</td>\n", " <td>0.114914</td>\n", " <td>0.043846</td>\n", " <td>conventional</td>\n", " <td>West</td>\n", " </tr>\n", " <tr>\n", " <th>33042</th>\n", " <td>2020-11-29</td>\n", " <td>0.370107</td>\n", " <td>0.004550</td>\n", " <td>0.000584</td>\n", " <td>0.000945</td>\n", " <td>0.000250</td>\n", " <td>0.008101</td>\n", " <td>0.005966</td>\n", " <td>0.010062</td>\n", " <td>0.000000</td>\n", " <td>organic</td>\n", " <td>West</td>\n", " </tr>\n", " <tr>\n", " <th>33043</th>\n", " <td>2020-11-29</td>\n", " <td>0.081851</td>\n", " <td>0.012913</td>\n", " <td>0.010319</td>\n", " <td>0.003918</td>\n", " <td>0.004141</td>\n", " <td>0.015696</td>\n", " <td>0.013906</td>\n", " <td>0.015817</td>\n", " <td>0.000577</td>\n", " <td>conventional</td>\n", " <td>West Tex/New Mexico</td>\n", " </tr>\n", " <tr>\n", " <th>33044</th>\n", " <td>2020-11-29</td>\n", " <td>0.323843</td>\n", " <td>0.000377</td>\n", " <td>0.000054</td>\n", " <td>0.000030</td>\n", " <td>0.000615</td>\n", " <td>0.000653</td>\n", " <td>0.000867</td>\n", " <td>0.000215</td>\n", " <td>0.000000</td>\n", " <td>organic</td>\n", " <td>West Tex/New Mexico</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>33045 rows × 12 columns</p>\n", "</div>" ], "text/plain": [ " date average_price total_volume 4046 4225 4770 \\\n", "0 2015-01-04 0.277580 0.000640 0.000124 0.001382 0.000020 \n", "1 2015-01-04 0.480427 0.000020 0.000003 0.000008 0.000000 \n", "2 2015-01-04 0.199288 0.006826 0.016018 0.001164 0.000032 \n", "3 2015-01-04 0.469751 0.000059 0.000066 0.000046 0.000000 \n", "4 2015-01-04 0.227758 0.012366 0.002374 0.027010 0.015706 \n", "... ... ... ... ... ... ... \n", "33040 2020-11-29 0.366548 0.024844 0.002970 0.004787 0.001028 \n", "33041 2020-11-29 0.167260 0.091202 0.059484 0.028776 0.007753 \n", "33042 2020-11-29 0.370107 0.004550 0.000584 0.000945 0.000250 \n", "33043 2020-11-29 0.081851 0.012913 0.010319 0.003918 0.004141 \n", "33044 2020-11-29 0.323843 0.000377 0.000054 0.000030 0.000615 \n", "\n", " total_bags small_bags large_bags xlarge_bags type \\\n", "0 0.000307 0.000447 0.000040 0.000000 conventional \n", "1 0.000037 0.000057 0.000000 0.000000 organic \n", "2 0.001477 0.000813 0.002259 0.000000 conventional \n", "3 0.000044 0.000052 0.000025 0.000000 organic \n", "4 0.004454 0.006674 0.000299 0.000000 conventional \n", "... ... ... ... ... ... \n", "33040 0.044649 0.044121 0.036030 0.019937 organic \n", "33041 0.119620 0.106938 0.114914 0.043846 conventional \n", "33042 0.008101 0.005966 0.010062 0.000000 organic \n", "33043 0.015696 0.013906 0.015817 0.000577 conventional \n", "33044 0.000653 0.000867 0.000215 0.000000 organic \n", "\n", " geography \n", "0 Albany \n", "1 Albany \n", "2 Atlanta \n", "3 Atlanta \n", "4 Baltimore/Washington \n", "... ... \n", "33040 Total U.S. \n", "33041 West \n", "33042 West \n", "33043 West Tex/New Mexico \n", "33044 West Tex/New Mexico \n", "\n", "[33045 rows x 12 columns]" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "avocado.dropna()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.1" } }, "nbformat": 4, "nbformat_minor": 4 }