2506 lines
115 KiB
Plaintext
2506 lines
115 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# OPIS\n",
|
||
"\n",
|
||
"#### Dataset zawiera dane dotyczące cen awokado Hass i ich sprzedaży w wybranych regionach Stanów Zjednoczonych.\n",
|
||
"\n",
|
||
"#### Opis kolumn:\n",
|
||
"- Date - data obserwacji\n",
|
||
"- AveragePrice - średnia cena pojedynczego awokado\n",
|
||
"- type - zwykłe lub organiczne\n",
|
||
"- year - rok obserwacji\n",
|
||
"- Region - miasto/region obserwacji\n",
|
||
"- Total Volume - liczba sprzedanych awokado\n",
|
||
"- 4046 - liczba sprzedanych awokado z kodem PLU 4046 (małe)\n",
|
||
"- 4225 - liczba sprzedanych awokado z kodem PLU 4225 (duże)\n",
|
||
"- 4770 - liczba sprzedanych awokado z kodem PLU 4770 (bardzo duże)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Requirement already satisfied: kaggle in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (1.5.12)\n",
|
||
"Requirement already satisfied: six>=1.10 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from kaggle) (1.15.0)\n",
|
||
"Requirement already satisfied: requests in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from kaggle) (2.25.1)\n",
|
||
"Requirement already satisfied: python-dateutil in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from kaggle) (2.8.1)\n",
|
||
"Requirement already satisfied: python-slugify in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from kaggle) (4.0.1)\n",
|
||
"Requirement already satisfied: urllib3 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from kaggle) (1.26.2)\n",
|
||
"Requirement already satisfied: tqdm in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from kaggle) (4.59.0)\n",
|
||
"Requirement already satisfied: certifi in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from kaggle) (2020.12.5)\n",
|
||
"Requirement already satisfied: text-unidecode>=1.3 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from python-slugify->kaggle) (1.3)\n",
|
||
"Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from requests->kaggle) (4.0.0)\n",
|
||
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from requests->kaggle) (2.10)\n",
|
||
"OOOOOOOOO /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/bin/python\n",
|
||
"Requirement already satisfied: pandas in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (1.2.3)\n",
|
||
"Requirement already satisfied: numpy>=1.16.5 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from pandas) (1.20.1)\n",
|
||
"Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from pandas) (2.8.1)\n",
|
||
"Requirement already satisfied: pytz>=2017.3 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from pandas) (2020.4)\n",
|
||
"Requirement already satisfied: six>=1.5 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n",
|
||
"Requirement already satisfied: sklearn in /usr/local/lib/python3.9/site-packages (0.0)\n",
|
||
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.9/site-packages (from sklearn) (0.24.1)\n",
|
||
"Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.9/site-packages (from scikit-learn->sklearn) (1.0.1)\n",
|
||
"Requirement already satisfied: scipy>=0.19.1 in /usr/local/lib/python3.9/site-packages (from scikit-learn->sklearn) (1.6.1)\n",
|
||
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.9/site-packages (from scikit-learn->sklearn) (2.1.0)\n",
|
||
"Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.9/site-packages (from scikit-learn->sklearn) (1.20.1)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import sys\n",
|
||
"!{sys.executable} -m pip install kaggle\n",
|
||
"!echo OOOOOOOOO {sys.executable}\n",
|
||
"!{sys.executable} -m pip install pandas\n",
|
||
"!python3 -m pip install sklearn"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Pobranie zbioru."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"!kaggle datasets download -d timmate/avocado-prices-2020"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Archive: avocado-prices-2020.zip\n",
|
||
" inflating: avocado-updated-2020.csv \n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!unzip -o avocado-prices-2020.zip\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"date,average_price,total_volume,4046,4225,4770,total_bags,small_bags,large_bags,xlarge_bags,type,year,geography\r\n",
|
||
"2015-01-04,1.22,40873.28,2819.5,28287.42,49.9,9716.46,9186.93,529.53,0.0,conventional,2015,Albany\r\n",
|
||
"2015-01-04,1.79,1373.95,57.42,153.88,0.0,1162.65,1162.65,0.0,0.0,organic,2015,Albany\r\n",
|
||
"2015-01-04,1.0,435021.49,364302.39,23821.16,82.15,46815.79,16707.15,30108.64,0.0,conventional,2015,Atlanta\r\n",
|
||
"2015-01-04,1.76,3846.69,1500.15,938.35,0.0,1408.19,1071.35,336.84,0.0,organic,2015,Atlanta\r\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"!head -n 5 avocado-updated-2020.csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Usunięcie zbędnej kolumny (redundantne dane)."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 25,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>date</th>\n",
|
||
" <th>average_price</th>\n",
|
||
" <th>total_volume</th>\n",
|
||
" <th>4046</th>\n",
|
||
" <th>4225</th>\n",
|
||
" <th>4770</th>\n",
|
||
" <th>total_bags</th>\n",
|
||
" <th>small_bags</th>\n",
|
||
" <th>large_bags</th>\n",
|
||
" <th>xlarge_bags</th>\n",
|
||
" <th>type</th>\n",
|
||
" <th>geography</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2015-01-04</td>\n",
|
||
" <td>1.22</td>\n",
|
||
" <td>40873.28</td>\n",
|
||
" <td>2819.50</td>\n",
|
||
" <td>28287.42</td>\n",
|
||
" <td>49.90</td>\n",
|
||
" <td>9716.46</td>\n",
|
||
" <td>9186.93</td>\n",
|
||
" <td>529.53</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>conventional</td>\n",
|
||
" <td>Albany</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2015-01-04</td>\n",
|
||
" <td>1.79</td>\n",
|
||
" <td>1373.95</td>\n",
|
||
" <td>57.42</td>\n",
|
||
" <td>153.88</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>1162.65</td>\n",
|
||
" <td>1162.65</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>organic</td>\n",
|
||
" <td>Albany</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2015-01-04</td>\n",
|
||
" <td>1.00</td>\n",
|
||
" <td>435021.49</td>\n",
|
||
" <td>364302.39</td>\n",
|
||
" <td>23821.16</td>\n",
|
||
" <td>82.15</td>\n",
|
||
" <td>46815.79</td>\n",
|
||
" <td>16707.15</td>\n",
|
||
" <td>30108.64</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>conventional</td>\n",
|
||
" <td>Atlanta</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2015-01-04</td>\n",
|
||
" <td>1.76</td>\n",
|
||
" <td>3846.69</td>\n",
|
||
" <td>1500.15</td>\n",
|
||
" <td>938.35</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>1408.19</td>\n",
|
||
" <td>1071.35</td>\n",
|
||
" <td>336.84</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>organic</td>\n",
|
||
" <td>Atlanta</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2015-01-04</td>\n",
|
||
" <td>1.08</td>\n",
|
||
" <td>788025.06</td>\n",
|
||
" <td>53987.31</td>\n",
|
||
" <td>552906.04</td>\n",
|
||
" <td>39995.03</td>\n",
|
||
" <td>141136.68</td>\n",
|
||
" <td>137146.07</td>\n",
|
||
" <td>3990.61</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>conventional</td>\n",
|
||
" <td>Baltimore/Washington</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33040</th>\n",
|
||
" <td>2020-11-29</td>\n",
|
||
" <td>1.47</td>\n",
|
||
" <td>1583056.27</td>\n",
|
||
" <td>67544.48</td>\n",
|
||
" <td>97996.46</td>\n",
|
||
" <td>2617.17</td>\n",
|
||
" <td>1414878.10</td>\n",
|
||
" <td>906711.52</td>\n",
|
||
" <td>480191.83</td>\n",
|
||
" <td>27974.75</td>\n",
|
||
" <td>organic</td>\n",
|
||
" <td>Total U.S.</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33041</th>\n",
|
||
" <td>2020-11-29</td>\n",
|
||
" <td>0.91</td>\n",
|
||
" <td>5811114.22</td>\n",
|
||
" <td>1352877.53</td>\n",
|
||
" <td>589061.83</td>\n",
|
||
" <td>19741.90</td>\n",
|
||
" <td>3790665.29</td>\n",
|
||
" <td>2197611.02</td>\n",
|
||
" <td>1531530.14</td>\n",
|
||
" <td>61524.13</td>\n",
|
||
" <td>conventional</td>\n",
|
||
" <td>West</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33042</th>\n",
|
||
" <td>2020-11-29</td>\n",
|
||
" <td>1.48</td>\n",
|
||
" <td>289961.27</td>\n",
|
||
" <td>13273.75</td>\n",
|
||
" <td>19341.09</td>\n",
|
||
" <td>636.51</td>\n",
|
||
" <td>256709.92</td>\n",
|
||
" <td>122606.21</td>\n",
|
||
" <td>134103.71</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>organic</td>\n",
|
||
" <td>West</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33043</th>\n",
|
||
" <td>2020-11-29</td>\n",
|
||
" <td>0.67</td>\n",
|
||
" <td>822818.75</td>\n",
|
||
" <td>234688.01</td>\n",
|
||
" <td>80205.15</td>\n",
|
||
" <td>10543.63</td>\n",
|
||
" <td>497381.96</td>\n",
|
||
" <td>285764.11</td>\n",
|
||
" <td>210808.02</td>\n",
|
||
" <td>809.83</td>\n",
|
||
" <td>conventional</td>\n",
|
||
" <td>West Tex/New Mexico</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33044</th>\n",
|
||
" <td>2020-11-29</td>\n",
|
||
" <td>1.35</td>\n",
|
||
" <td>24106.58</td>\n",
|
||
" <td>1236.96</td>\n",
|
||
" <td>617.80</td>\n",
|
||
" <td>1564.98</td>\n",
|
||
" <td>20686.84</td>\n",
|
||
" <td>17824.52</td>\n",
|
||
" <td>2862.32</td>\n",
|
||
" <td>0.00</td>\n",
|
||
" <td>organic</td>\n",
|
||
" <td>West Tex/New Mexico</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>33045 rows × 12 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" date average_price total_volume 4046 4225 \\\n",
|
||
"0 2015-01-04 1.22 40873.28 2819.50 28287.42 \n",
|
||
"1 2015-01-04 1.79 1373.95 57.42 153.88 \n",
|
||
"2 2015-01-04 1.00 435021.49 364302.39 23821.16 \n",
|
||
"3 2015-01-04 1.76 3846.69 1500.15 938.35 \n",
|
||
"4 2015-01-04 1.08 788025.06 53987.31 552906.04 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"33040 2020-11-29 1.47 1583056.27 67544.48 97996.46 \n",
|
||
"33041 2020-11-29 0.91 5811114.22 1352877.53 589061.83 \n",
|
||
"33042 2020-11-29 1.48 289961.27 13273.75 19341.09 \n",
|
||
"33043 2020-11-29 0.67 822818.75 234688.01 80205.15 \n",
|
||
"33044 2020-11-29 1.35 24106.58 1236.96 617.80 \n",
|
||
"\n",
|
||
" 4770 total_bags small_bags large_bags xlarge_bags \\\n",
|
||
"0 49.90 9716.46 9186.93 529.53 0.00 \n",
|
||
"1 0.00 1162.65 1162.65 0.00 0.00 \n",
|
||
"2 82.15 46815.79 16707.15 30108.64 0.00 \n",
|
||
"3 0.00 1408.19 1071.35 336.84 0.00 \n",
|
||
"4 39995.03 141136.68 137146.07 3990.61 0.00 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"33040 2617.17 1414878.10 906711.52 480191.83 27974.75 \n",
|
||
"33041 19741.90 3790665.29 2197611.02 1531530.14 61524.13 \n",
|
||
"33042 636.51 256709.92 122606.21 134103.71 0.00 \n",
|
||
"33043 10543.63 497381.96 285764.11 210808.02 809.83 \n",
|
||
"33044 1564.98 20686.84 17824.52 2862.32 0.00 \n",
|
||
"\n",
|
||
" type geography \n",
|
||
"0 conventional Albany \n",
|
||
"1 organic Albany \n",
|
||
"2 conventional Atlanta \n",
|
||
"3 organic Atlanta \n",
|
||
"4 conventional Baltimore/Washington \n",
|
||
"... ... ... \n",
|
||
"33040 organic Total U.S. \n",
|
||
"33041 conventional West \n",
|
||
"33042 organic West \n",
|
||
"33043 conventional West Tex/New Mexico \n",
|
||
"33044 organic West Tex/New Mexico \n",
|
||
"\n",
|
||
"[33045 rows x 12 columns]"
|
||
]
|
||
},
|
||
"execution_count": 25,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"avocado_with_year = pd.read_csv('avocado-updated-2020.csv')\n",
|
||
"avocado_with_year\n",
|
||
"\n",
|
||
"new = ['date', 'average_price', 'total_volume', '4046', '4225', '4770', 'total_bags', 'small_bags', 'large_bags', 'xlarge_bags', 'type', 'geography']\n",
|
||
"avocado = avocado_with_year[new]\n",
|
||
"avocado.to_csv(\"avocado.csv\", index=False)\n",
|
||
"avocado = pd.read_csv('avocado.csv')\n",
|
||
"avocado"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Podział zbioru na train/dev/test."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"metadata": {
|
||
"scrolled": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import numpy as np\n",
|
||
"\n",
|
||
"avocado_train, avocado_validate, avocado_test = np.split(avocado.sample(frac=1), [int(.6*len(avocado)), int(.8*len(avocado))])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Podsumowanie zbioru i poszczególnych podzbiorów."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Wielkości zbioru i podzbiorów."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Avocado: 396540\n",
|
||
"Avocado (train) : 237924\n",
|
||
"Avocado (validate): 79308\n",
|
||
"Avocado (test) 79308\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(\"Avocado: \".ljust(20), np.size(avocado))\n",
|
||
"print(\"Avocado (train) : \".ljust(20), np.size(avocado_train))\n",
|
||
"print(\"Avocado (validate): \".ljust(20), np.size(avocado_validate))\n",
|
||
"print(\"Avocado (test) \".ljust(20), np.size(avocado_test))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Podsumowanie zbioru avocado."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>date</th>\n",
|
||
" <th>average_price</th>\n",
|
||
" <th>total_volume</th>\n",
|
||
" <th>4046</th>\n",
|
||
" <th>4225</th>\n",
|
||
" <th>4770</th>\n",
|
||
" <th>total_bags</th>\n",
|
||
" <th>small_bags</th>\n",
|
||
" <th>large_bags</th>\n",
|
||
" <th>xlarge_bags</th>\n",
|
||
" <th>type</th>\n",
|
||
" <th>geography</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>33045</td>\n",
|
||
" <td>33045.000000</td>\n",
|
||
" <td>3.304500e+04</td>\n",
|
||
" <td>3.304500e+04</td>\n",
|
||
" <td>3.304500e+04</td>\n",
|
||
" <td>3.304500e+04</td>\n",
|
||
" <td>3.304500e+04</td>\n",
|
||
" <td>3.304500e+04</td>\n",
|
||
" <td>3.304500e+04</td>\n",
|
||
" <td>3.304500e+04</td>\n",
|
||
" <td>33045</td>\n",
|
||
" <td>33045</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>306</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>54</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>2017-10-01</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>conventional</td>\n",
|
||
" <td>Atlanta</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>108</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>16524</td>\n",
|
||
" <td>612</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.379941</td>\n",
|
||
" <td>9.683997e+05</td>\n",
|
||
" <td>3.023914e+05</td>\n",
|
||
" <td>2.797693e+05</td>\n",
|
||
" <td>2.148255e+04</td>\n",
|
||
" <td>3.646735e+05</td>\n",
|
||
" <td>2.501980e+05</td>\n",
|
||
" <td>1.067329e+05</td>\n",
|
||
" <td>7.742585e+03</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.378972</td>\n",
|
||
" <td>3.934533e+06</td>\n",
|
||
" <td>1.301026e+06</td>\n",
|
||
" <td>1.151052e+06</td>\n",
|
||
" <td>1.001607e+05</td>\n",
|
||
" <td>1.564004e+06</td>\n",
|
||
" <td>1.037734e+06</td>\n",
|
||
" <td>5.167226e+05</td>\n",
|
||
" <td>4.819803e+04</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.440000</td>\n",
|
||
" <td>8.456000e+01</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.100000</td>\n",
|
||
" <td>1.511895e+04</td>\n",
|
||
" <td>7.673100e+02</td>\n",
|
||
" <td>2.712470e+03</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>9.121860e+03</td>\n",
|
||
" <td>6.478630e+03</td>\n",
|
||
" <td>4.662900e+02</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.350000</td>\n",
|
||
" <td>1.291170e+05</td>\n",
|
||
" <td>1.099477e+04</td>\n",
|
||
" <td>2.343600e+04</td>\n",
|
||
" <td>1.780900e+02</td>\n",
|
||
" <td>5.322224e+04</td>\n",
|
||
" <td>3.687699e+04</td>\n",
|
||
" <td>6.375860e+03</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.620000</td>\n",
|
||
" <td>5.058285e+05</td>\n",
|
||
" <td>1.190219e+05</td>\n",
|
||
" <td>1.352389e+05</td>\n",
|
||
" <td>5.096530e+03</td>\n",
|
||
" <td>1.744314e+05</td>\n",
|
||
" <td>1.206624e+05</td>\n",
|
||
" <td>4.041723e+04</td>\n",
|
||
" <td>8.044400e+02</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>3.250000</td>\n",
|
||
" <td>6.371614e+07</td>\n",
|
||
" <td>2.274362e+07</td>\n",
|
||
" <td>2.047057e+07</td>\n",
|
||
" <td>2.546439e+06</td>\n",
|
||
" <td>3.168919e+07</td>\n",
|
||
" <td>2.055041e+07</td>\n",
|
||
" <td>1.332760e+07</td>\n",
|
||
" <td>1.403184e+06</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" date average_price total_volume 4046 4225 \\\n",
|
||
"count 33045 33045.000000 3.304500e+04 3.304500e+04 3.304500e+04 \n",
|
||
"unique 306 NaN NaN NaN NaN \n",
|
||
"top 2017-10-01 NaN NaN NaN NaN \n",
|
||
"freq 108 NaN NaN NaN NaN \n",
|
||
"mean NaN 1.379941 9.683997e+05 3.023914e+05 2.797693e+05 \n",
|
||
"std NaN 0.378972 3.934533e+06 1.301026e+06 1.151052e+06 \n",
|
||
"min NaN 0.440000 8.456000e+01 0.000000e+00 0.000000e+00 \n",
|
||
"25% NaN 1.100000 1.511895e+04 7.673100e+02 2.712470e+03 \n",
|
||
"50% NaN 1.350000 1.291170e+05 1.099477e+04 2.343600e+04 \n",
|
||
"75% NaN 1.620000 5.058285e+05 1.190219e+05 1.352389e+05 \n",
|
||
"max NaN 3.250000 6.371614e+07 2.274362e+07 2.047057e+07 \n",
|
||
"\n",
|
||
" 4770 total_bags small_bags large_bags xlarge_bags \\\n",
|
||
"count 3.304500e+04 3.304500e+04 3.304500e+04 3.304500e+04 3.304500e+04 \n",
|
||
"unique NaN NaN NaN NaN NaN \n",
|
||
"top NaN NaN NaN NaN NaN \n",
|
||
"freq NaN NaN NaN NaN NaN \n",
|
||
"mean 2.148255e+04 3.646735e+05 2.501980e+05 1.067329e+05 7.742585e+03 \n",
|
||
"std 1.001607e+05 1.564004e+06 1.037734e+06 5.167226e+05 4.819803e+04 \n",
|
||
"min 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
|
||
"25% 0.000000e+00 9.121860e+03 6.478630e+03 4.662900e+02 0.000000e+00 \n",
|
||
"50% 1.780900e+02 5.322224e+04 3.687699e+04 6.375860e+03 0.000000e+00 \n",
|
||
"75% 5.096530e+03 1.744314e+05 1.206624e+05 4.041723e+04 8.044400e+02 \n",
|
||
"max 2.546439e+06 3.168919e+07 2.055041e+07 1.332760e+07 1.403184e+06 \n",
|
||
"\n",
|
||
" type geography \n",
|
||
"count 33045 33045 \n",
|
||
"unique 2 54 \n",
|
||
"top conventional Atlanta \n",
|
||
"freq 16524 612 \n",
|
||
"mean NaN NaN \n",
|
||
"std NaN NaN \n",
|
||
"min NaN NaN \n",
|
||
"25% NaN NaN \n",
|
||
"50% NaN NaN \n",
|
||
"75% NaN NaN \n",
|
||
"max NaN NaN "
|
||
]
|
||
},
|
||
"execution_count": 28,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"avocado.describe(include = 'all')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Podsumowanie podzbioru train."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>date</th>\n",
|
||
" <th>average_price</th>\n",
|
||
" <th>total_volume</th>\n",
|
||
" <th>4046</th>\n",
|
||
" <th>4225</th>\n",
|
||
" <th>4770</th>\n",
|
||
" <th>total_bags</th>\n",
|
||
" <th>small_bags</th>\n",
|
||
" <th>large_bags</th>\n",
|
||
" <th>xlarge_bags</th>\n",
|
||
" <th>type</th>\n",
|
||
" <th>geography</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>19827</td>\n",
|
||
" <td>19827.000000</td>\n",
|
||
" <td>1.982700e+04</td>\n",
|
||
" <td>1.982700e+04</td>\n",
|
||
" <td>1.982700e+04</td>\n",
|
||
" <td>1.982700e+04</td>\n",
|
||
" <td>1.982700e+04</td>\n",
|
||
" <td>1.982700e+04</td>\n",
|
||
" <td>1.982700e+04</td>\n",
|
||
" <td>1.982700e+04</td>\n",
|
||
" <td>19827</td>\n",
|
||
" <td>19827</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>306</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>54</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>2018-09-23</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>organic</td>\n",
|
||
" <td>Sacramento</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>77</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>9954</td>\n",
|
||
" <td>404</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.380658</td>\n",
|
||
" <td>9.503549e+05</td>\n",
|
||
" <td>2.955048e+05</td>\n",
|
||
" <td>2.762023e+05</td>\n",
|
||
" <td>2.117442e+04</td>\n",
|
||
" <td>3.573659e+05</td>\n",
|
||
" <td>2.448356e+05</td>\n",
|
||
" <td>1.049736e+05</td>\n",
|
||
" <td>7.556707e+03</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.377988</td>\n",
|
||
" <td>3.896388e+06</td>\n",
|
||
" <td>1.285945e+06</td>\n",
|
||
" <td>1.147780e+06</td>\n",
|
||
" <td>1.008332e+05</td>\n",
|
||
" <td>1.548676e+06</td>\n",
|
||
" <td>1.023617e+06</td>\n",
|
||
" <td>5.161354e+05</td>\n",
|
||
" <td>4.776408e+04</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.460000</td>\n",
|
||
" <td>2.534500e+02</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.100000</td>\n",
|
||
" <td>1.509891e+04</td>\n",
|
||
" <td>7.560400e+02</td>\n",
|
||
" <td>2.695640e+03</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>9.095285e+03</td>\n",
|
||
" <td>6.430960e+03</td>\n",
|
||
" <td>4.678750e+02</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.350000</td>\n",
|
||
" <td>1.275485e+05</td>\n",
|
||
" <td>1.086294e+04</td>\n",
|
||
" <td>2.337789e+04</td>\n",
|
||
" <td>1.714100e+02</td>\n",
|
||
" <td>5.240743e+04</td>\n",
|
||
" <td>3.663295e+04</td>\n",
|
||
" <td>6.148990e+03</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.610000</td>\n",
|
||
" <td>4.996119e+05</td>\n",
|
||
" <td>1.174216e+05</td>\n",
|
||
" <td>1.337254e+05</td>\n",
|
||
" <td>4.976950e+03</td>\n",
|
||
" <td>1.721448e+05</td>\n",
|
||
" <td>1.193927e+05</td>\n",
|
||
" <td>3.875767e+04</td>\n",
|
||
" <td>7.391950e+02</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>3.170000</td>\n",
|
||
" <td>6.371614e+07</td>\n",
|
||
" <td>2.113740e+07</td>\n",
|
||
" <td>2.047057e+07</td>\n",
|
||
" <td>2.546439e+06</td>\n",
|
||
" <td>3.168919e+07</td>\n",
|
||
" <td>2.055041e+07</td>\n",
|
||
" <td>1.332760e+07</td>\n",
|
||
" <td>1.403184e+06</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" date average_price total_volume 4046 4225 \\\n",
|
||
"count 19827 19827.000000 1.982700e+04 1.982700e+04 1.982700e+04 \n",
|
||
"unique 306 NaN NaN NaN NaN \n",
|
||
"top 2018-09-23 NaN NaN NaN NaN \n",
|
||
"freq 77 NaN NaN NaN NaN \n",
|
||
"mean NaN 1.380658 9.503549e+05 2.955048e+05 2.762023e+05 \n",
|
||
"std NaN 0.377988 3.896388e+06 1.285945e+06 1.147780e+06 \n",
|
||
"min NaN 0.460000 2.534500e+02 0.000000e+00 0.000000e+00 \n",
|
||
"25% NaN 1.100000 1.509891e+04 7.560400e+02 2.695640e+03 \n",
|
||
"50% NaN 1.350000 1.275485e+05 1.086294e+04 2.337789e+04 \n",
|
||
"75% NaN 1.610000 4.996119e+05 1.174216e+05 1.337254e+05 \n",
|
||
"max NaN 3.170000 6.371614e+07 2.113740e+07 2.047057e+07 \n",
|
||
"\n",
|
||
" 4770 total_bags small_bags large_bags xlarge_bags \\\n",
|
||
"count 1.982700e+04 1.982700e+04 1.982700e+04 1.982700e+04 1.982700e+04 \n",
|
||
"unique NaN NaN NaN NaN NaN \n",
|
||
"top NaN NaN NaN NaN NaN \n",
|
||
"freq NaN NaN NaN NaN NaN \n",
|
||
"mean 2.117442e+04 3.573659e+05 2.448356e+05 1.049736e+05 7.556707e+03 \n",
|
||
"std 1.008332e+05 1.548676e+06 1.023617e+06 5.161354e+05 4.776408e+04 \n",
|
||
"min 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
|
||
"25% 0.000000e+00 9.095285e+03 6.430960e+03 4.678750e+02 0.000000e+00 \n",
|
||
"50% 1.714100e+02 5.240743e+04 3.663295e+04 6.148990e+03 0.000000e+00 \n",
|
||
"75% 4.976950e+03 1.721448e+05 1.193927e+05 3.875767e+04 7.391950e+02 \n",
|
||
"max 2.546439e+06 3.168919e+07 2.055041e+07 1.332760e+07 1.403184e+06 \n",
|
||
"\n",
|
||
" type geography \n",
|
||
"count 19827 19827 \n",
|
||
"unique 2 54 \n",
|
||
"top organic Sacramento \n",
|
||
"freq 9954 404 \n",
|
||
"mean NaN NaN \n",
|
||
"std NaN NaN \n",
|
||
"min NaN NaN \n",
|
||
"25% NaN NaN \n",
|
||
"50% NaN NaN \n",
|
||
"75% NaN NaN \n",
|
||
"max NaN NaN "
|
||
]
|
||
},
|
||
"execution_count": 29,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"avocado_train.describe(include= 'all' )"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Podsumowanie podzbioru validate."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>date</th>\n",
|
||
" <th>average_price</th>\n",
|
||
" <th>total_volume</th>\n",
|
||
" <th>4046</th>\n",
|
||
" <th>4225</th>\n",
|
||
" <th>4770</th>\n",
|
||
" <th>total_bags</th>\n",
|
||
" <th>small_bags</th>\n",
|
||
" <th>large_bags</th>\n",
|
||
" <th>xlarge_bags</th>\n",
|
||
" <th>type</th>\n",
|
||
" <th>geography</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>6609</td>\n",
|
||
" <td>6609.000000</td>\n",
|
||
" <td>6.609000e+03</td>\n",
|
||
" <td>6.609000e+03</td>\n",
|
||
" <td>6.609000e+03</td>\n",
|
||
" <td>6.609000e+03</td>\n",
|
||
" <td>6.609000e+03</td>\n",
|
||
" <td>6.609000e+03</td>\n",
|
||
" <td>6.609000e+03</td>\n",
|
||
" <td>6.609000e+03</td>\n",
|
||
" <td>6609</td>\n",
|
||
" <td>6609</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>306</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>54</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>2020-05-03</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>organic</td>\n",
|
||
" <td>Jacksonville</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>35</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>3365</td>\n",
|
||
" <td>149</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.382624</td>\n",
|
||
" <td>9.914296e+05</td>\n",
|
||
" <td>3.140144e+05</td>\n",
|
||
" <td>2.827458e+05</td>\n",
|
||
" <td>2.172480e+04</td>\n",
|
||
" <td>3.729031e+05</td>\n",
|
||
" <td>2.567059e+05</td>\n",
|
||
" <td>1.085372e+05</td>\n",
|
||
" <td>7.660065e+03</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.380997</td>\n",
|
||
" <td>4.042527e+06</td>\n",
|
||
" <td>1.341419e+06</td>\n",
|
||
" <td>1.181393e+06</td>\n",
|
||
" <td>1.021178e+05</td>\n",
|
||
" <td>1.596924e+06</td>\n",
|
||
" <td>1.065783e+06</td>\n",
|
||
" <td>5.196275e+05</td>\n",
|
||
" <td>4.795256e+04</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.440000</td>\n",
|
||
" <td>8.456000e+01</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.100000</td>\n",
|
||
" <td>1.486299e+04</td>\n",
|
||
" <td>7.570000e+02</td>\n",
|
||
" <td>2.534810e+03</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>9.007310e+03</td>\n",
|
||
" <td>6.281480e+03</td>\n",
|
||
" <td>4.562400e+02</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.350000</td>\n",
|
||
" <td>1.241199e+05</td>\n",
|
||
" <td>1.023778e+04</td>\n",
|
||
" <td>2.204006e+04</td>\n",
|
||
" <td>1.674700e+02</td>\n",
|
||
" <td>5.247009e+04</td>\n",
|
||
" <td>3.492217e+04</td>\n",
|
||
" <td>6.458780e+03</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.620000</td>\n",
|
||
" <td>5.026773e+05</td>\n",
|
||
" <td>1.207824e+05</td>\n",
|
||
" <td>1.307007e+05</td>\n",
|
||
" <td>5.104000e+03</td>\n",
|
||
" <td>1.706264e+05</td>\n",
|
||
" <td>1.197749e+05</td>\n",
|
||
" <td>4.128634e+04</td>\n",
|
||
" <td>7.951300e+02</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>3.250000</td>\n",
|
||
" <td>6.250565e+07</td>\n",
|
||
" <td>2.274362e+07</td>\n",
|
||
" <td>2.044550e+07</td>\n",
|
||
" <td>1.800066e+06</td>\n",
|
||
" <td>2.666884e+07</td>\n",
|
||
" <td>1.740824e+07</td>\n",
|
||
" <td>1.077854e+07</td>\n",
|
||
" <td>1.123540e+06</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" date average_price total_volume 4046 4225 \\\n",
|
||
"count 6609 6609.000000 6.609000e+03 6.609000e+03 6.609000e+03 \n",
|
||
"unique 306 NaN NaN NaN NaN \n",
|
||
"top 2020-05-03 NaN NaN NaN NaN \n",
|
||
"freq 35 NaN NaN NaN NaN \n",
|
||
"mean NaN 1.382624 9.914296e+05 3.140144e+05 2.827458e+05 \n",
|
||
"std NaN 0.380997 4.042527e+06 1.341419e+06 1.181393e+06 \n",
|
||
"min NaN 0.440000 8.456000e+01 0.000000e+00 0.000000e+00 \n",
|
||
"25% NaN 1.100000 1.486299e+04 7.570000e+02 2.534810e+03 \n",
|
||
"50% NaN 1.350000 1.241199e+05 1.023778e+04 2.204006e+04 \n",
|
||
"75% NaN 1.620000 5.026773e+05 1.207824e+05 1.307007e+05 \n",
|
||
"max NaN 3.250000 6.250565e+07 2.274362e+07 2.044550e+07 \n",
|
||
"\n",
|
||
" 4770 total_bags small_bags large_bags xlarge_bags \\\n",
|
||
"count 6.609000e+03 6.609000e+03 6.609000e+03 6.609000e+03 6.609000e+03 \n",
|
||
"unique NaN NaN NaN NaN NaN \n",
|
||
"top NaN NaN NaN NaN NaN \n",
|
||
"freq NaN NaN NaN NaN NaN \n",
|
||
"mean 2.172480e+04 3.729031e+05 2.567059e+05 1.085372e+05 7.660065e+03 \n",
|
||
"std 1.021178e+05 1.596924e+06 1.065783e+06 5.196275e+05 4.795256e+04 \n",
|
||
"min 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
|
||
"25% 0.000000e+00 9.007310e+03 6.281480e+03 4.562400e+02 0.000000e+00 \n",
|
||
"50% 1.674700e+02 5.247009e+04 3.492217e+04 6.458780e+03 0.000000e+00 \n",
|
||
"75% 5.104000e+03 1.706264e+05 1.197749e+05 4.128634e+04 7.951300e+02 \n",
|
||
"max 1.800066e+06 2.666884e+07 1.740824e+07 1.077854e+07 1.123540e+06 \n",
|
||
"\n",
|
||
" type geography \n",
|
||
"count 6609 6609 \n",
|
||
"unique 2 54 \n",
|
||
"top organic Jacksonville \n",
|
||
"freq 3365 149 \n",
|
||
"mean NaN NaN \n",
|
||
"std NaN NaN \n",
|
||
"min NaN NaN \n",
|
||
"25% NaN NaN \n",
|
||
"50% NaN NaN \n",
|
||
"75% NaN NaN \n",
|
||
"max NaN NaN "
|
||
]
|
||
},
|
||
"execution_count": 30,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"avocado_validate.describe(include = 'all')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Podsumowanie podzbioru test."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>date</th>\n",
|
||
" <th>average_price</th>\n",
|
||
" <th>total_volume</th>\n",
|
||
" <th>4046</th>\n",
|
||
" <th>4225</th>\n",
|
||
" <th>4770</th>\n",
|
||
" <th>total_bags</th>\n",
|
||
" <th>small_bags</th>\n",
|
||
" <th>large_bags</th>\n",
|
||
" <th>xlarge_bags</th>\n",
|
||
" <th>type</th>\n",
|
||
" <th>geography</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>6609</td>\n",
|
||
" <td>6609.000000</td>\n",
|
||
" <td>6.609000e+03</td>\n",
|
||
" <td>6.609000e+03</td>\n",
|
||
" <td>6.609000e+03</td>\n",
|
||
" <td>6.609000e+03</td>\n",
|
||
" <td>6.609000e+03</td>\n",
|
||
" <td>6.609000e+03</td>\n",
|
||
" <td>6.609000e+03</td>\n",
|
||
" <td>6.609000e+03</td>\n",
|
||
" <td>6609</td>\n",
|
||
" <td>6609</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unique</th>\n",
|
||
" <td>306</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>54</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>top</th>\n",
|
||
" <td>2020-06-21</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>conventional</td>\n",
|
||
" <td>California</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>freq</th>\n",
|
||
" <td>33</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>3407</td>\n",
|
||
" <td>143</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.375107</td>\n",
|
||
" <td>9.995041e+05</td>\n",
|
||
" <td>3.114282e+05</td>\n",
|
||
" <td>2.874940e+05</td>\n",
|
||
" <td>2.216469e+04</td>\n",
|
||
" <td>3.783667e+05</td>\n",
|
||
" <td>2.597775e+05</td>\n",
|
||
" <td>1.102065e+05</td>\n",
|
||
" <td>8.382739e+03</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.379902</td>\n",
|
||
" <td>3.939225e+06</td>\n",
|
||
" <td>1.305043e+06</td>\n",
|
||
" <td>1.130053e+06</td>\n",
|
||
" <td>9.608845e+04</td>\n",
|
||
" <td>1.576553e+06</td>\n",
|
||
" <td>1.051335e+06</td>\n",
|
||
" <td>5.156234e+05</td>\n",
|
||
" <td>4.971697e+04</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0.480000</td>\n",
|
||
" <td>3.855500e+02</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.090000</td>\n",
|
||
" <td>1.544873e+04</td>\n",
|
||
" <td>8.225900e+02</td>\n",
|
||
" <td>2.903380e+03</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>9.358110e+03</td>\n",
|
||
" <td>6.834760e+03</td>\n",
|
||
" <td>4.706000e+02</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.330000</td>\n",
|
||
" <td>1.409398e+05</td>\n",
|
||
" <td>1.233835e+04</td>\n",
|
||
" <td>2.530639e+04</td>\n",
|
||
" <td>2.074500e+02</td>\n",
|
||
" <td>5.576654e+04</td>\n",
|
||
" <td>3.897502e+04</td>\n",
|
||
" <td>7.182140e+03</td>\n",
|
||
" <td>0.000000e+00</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1.610000</td>\n",
|
||
" <td>5.330085e+05</td>\n",
|
||
" <td>1.221341e+05</td>\n",
|
||
" <td>1.453971e+05</td>\n",
|
||
" <td>5.358790e+03</td>\n",
|
||
" <td>1.833669e+05</td>\n",
|
||
" <td>1.254250e+05</td>\n",
|
||
" <td>4.531138e+04</td>\n",
|
||
" <td>1.012940e+03</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>3.000000</td>\n",
|
||
" <td>5.453235e+07</td>\n",
|
||
" <td>1.707665e+07</td>\n",
|
||
" <td>1.789639e+07</td>\n",
|
||
" <td>1.993645e+06</td>\n",
|
||
" <td>2.735245e+07</td>\n",
|
||
" <td>1.791382e+07</td>\n",
|
||
" <td>1.063102e+07</td>\n",
|
||
" <td>1.181516e+06</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" date average_price total_volume 4046 4225 \\\n",
|
||
"count 6609 6609.000000 6.609000e+03 6.609000e+03 6.609000e+03 \n",
|
||
"unique 306 NaN NaN NaN NaN \n",
|
||
"top 2020-06-21 NaN NaN NaN NaN \n",
|
||
"freq 33 NaN NaN NaN NaN \n",
|
||
"mean NaN 1.375107 9.995041e+05 3.114282e+05 2.874940e+05 \n",
|
||
"std NaN 0.379902 3.939225e+06 1.305043e+06 1.130053e+06 \n",
|
||
"min NaN 0.480000 3.855500e+02 0.000000e+00 0.000000e+00 \n",
|
||
"25% NaN 1.090000 1.544873e+04 8.225900e+02 2.903380e+03 \n",
|
||
"50% NaN 1.330000 1.409398e+05 1.233835e+04 2.530639e+04 \n",
|
||
"75% NaN 1.610000 5.330085e+05 1.221341e+05 1.453971e+05 \n",
|
||
"max NaN 3.000000 5.453235e+07 1.707665e+07 1.789639e+07 \n",
|
||
"\n",
|
||
" 4770 total_bags small_bags large_bags xlarge_bags \\\n",
|
||
"count 6.609000e+03 6.609000e+03 6.609000e+03 6.609000e+03 6.609000e+03 \n",
|
||
"unique NaN NaN NaN NaN NaN \n",
|
||
"top NaN NaN NaN NaN NaN \n",
|
||
"freq NaN NaN NaN NaN NaN \n",
|
||
"mean 2.216469e+04 3.783667e+05 2.597775e+05 1.102065e+05 8.382739e+03 \n",
|
||
"std 9.608845e+04 1.576553e+06 1.051335e+06 5.156234e+05 4.971697e+04 \n",
|
||
"min 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
|
||
"25% 0.000000e+00 9.358110e+03 6.834760e+03 4.706000e+02 0.000000e+00 \n",
|
||
"50% 2.074500e+02 5.576654e+04 3.897502e+04 7.182140e+03 0.000000e+00 \n",
|
||
"75% 5.358790e+03 1.833669e+05 1.254250e+05 4.531138e+04 1.012940e+03 \n",
|
||
"max 1.993645e+06 2.735245e+07 1.791382e+07 1.063102e+07 1.181516e+06 \n",
|
||
"\n",
|
||
" type geography \n",
|
||
"count 6609 6609 \n",
|
||
"unique 2 54 \n",
|
||
"top conventional California \n",
|
||
"freq 3407 143 \n",
|
||
"mean NaN NaN \n",
|
||
"std NaN NaN \n",
|
||
"min NaN NaN \n",
|
||
"25% NaN NaN \n",
|
||
"50% NaN NaN \n",
|
||
"75% NaN NaN \n",
|
||
"max NaN NaN "
|
||
]
|
||
},
|
||
"execution_count": 31,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"avocado_test.describe(include = 'all')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Rozkład częstości przykładów dla poszczególnych klas."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 32,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Atlanta 612\n",
|
||
"St. Louis 612\n",
|
||
"New York 612\n",
|
||
"Indianapolis 612\n",
|
||
"Sacramento 612\n",
|
||
"Spokane 612\n",
|
||
"Philadelphia 612\n",
|
||
"South Carolina 612\n",
|
||
"West 612\n",
|
||
"San Francisco 612\n",
|
||
"Orlando 612\n",
|
||
"Southeast 612\n",
|
||
"Miami/Ft. Lauderdale 612\n",
|
||
"Nashville 612\n",
|
||
"Syracuse 612\n",
|
||
"Columbus 612\n",
|
||
"Detroit 612\n",
|
||
"Northern New England 612\n",
|
||
"Buffalo/Rochester 612\n",
|
||
"Raleigh/Greensboro 612\n",
|
||
"Midsouth 612\n",
|
||
"Boise 612\n",
|
||
"San Diego 612\n",
|
||
"Hartford/Springfield 612\n",
|
||
"Los Angeles 612\n",
|
||
"Total U.S. 612\n",
|
||
"Dallas/Ft. Worth 612\n",
|
||
"Great Lakes 612\n",
|
||
"Roanoke 612\n",
|
||
"Plains 612\n",
|
||
"California 612\n",
|
||
"Portland 612\n",
|
||
"Grand Rapids 612\n",
|
||
"Harrisburg/Scranton 612\n",
|
||
"Charlotte 612\n",
|
||
"Cincinnati/Dayton 612\n",
|
||
"Richmond/Norfolk 612\n",
|
||
"Houston 612\n",
|
||
"South Central 612\n",
|
||
"Northeast 612\n",
|
||
"Seattle 612\n",
|
||
"Jacksonville 612\n",
|
||
"Baltimore/Washington 612\n",
|
||
"Pittsburgh 612\n",
|
||
"Louisville 612\n",
|
||
"Boston 612\n",
|
||
"Tampa 612\n",
|
||
"Phoenix/Tucson 612\n",
|
||
"Chicago 612\n",
|
||
"Denver 612\n",
|
||
"Las Vegas 612\n",
|
||
"Albany 612\n",
|
||
"New Orleans/Mobile 612\n",
|
||
"West Tex/New Mexico 609\n",
|
||
"Name: geography, dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 32,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"avocado.geography.value_counts() "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"California 143\n",
|
||
"Grand Rapids 139\n",
|
||
"Roanoke 139\n",
|
||
"Las Vegas 139\n",
|
||
"Spokane 137\n",
|
||
"Plains 135\n",
|
||
"Seattle 134\n",
|
||
"Louisville 132\n",
|
||
"Atlanta 131\n",
|
||
"Syracuse 130\n",
|
||
"New York 130\n",
|
||
"Nashville 129\n",
|
||
"Raleigh/Greensboro 129\n",
|
||
"Miami/Ft. Lauderdale 128\n",
|
||
"Phoenix/Tucson 128\n",
|
||
"Orlando 128\n",
|
||
"Hartford/Springfield 127\n",
|
||
"San Francisco 127\n",
|
||
"South Central 127\n",
|
||
"Charlotte 126\n",
|
||
"Richmond/Norfolk 126\n",
|
||
"West 126\n",
|
||
"Tampa 124\n",
|
||
"Los Angeles 124\n",
|
||
"South Carolina 122\n",
|
||
"Great Lakes 122\n",
|
||
"Total U.S. 122\n",
|
||
"Northeast 121\n",
|
||
"Cincinnati/Dayton 121\n",
|
||
"Columbus 121\n",
|
||
"Baltimore/Washington 119\n",
|
||
"Pittsburgh 119\n",
|
||
"Jacksonville 119\n",
|
||
"Portland 119\n",
|
||
"West Tex/New Mexico 118\n",
|
||
"Midsouth 118\n",
|
||
"Houston 117\n",
|
||
"Chicago 116\n",
|
||
"Buffalo/Rochester 116\n",
|
||
"New Orleans/Mobile 116\n",
|
||
"Philadelphia 115\n",
|
||
"San Diego 115\n",
|
||
"Indianapolis 115\n",
|
||
"Northern New England 114\n",
|
||
"Boston 114\n",
|
||
"Boise 114\n",
|
||
"Southeast 114\n",
|
||
"Dallas/Ft. Worth 113\n",
|
||
"Detroit 113\n",
|
||
"Albany 112\n",
|
||
"Denver 111\n",
|
||
"St. Louis 111\n",
|
||
"Harrisburg/Scranton 104\n",
|
||
"Sacramento 100\n",
|
||
"Name: geography, dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 33,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"avocado_test.geography.value_counts() "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 34,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Sacramento 404\n",
|
||
"Albany 398\n",
|
||
"Northern New England 390\n",
|
||
"Harrisburg/Scranton 388\n",
|
||
"St. Louis 385\n",
|
||
"Columbus 384\n",
|
||
"Boise 382\n",
|
||
"Indianapolis 381\n",
|
||
"Detroit 380\n",
|
||
"South Carolina 378\n",
|
||
"West Tex/New Mexico 378\n",
|
||
"Southeast 378\n",
|
||
"Nashville 377\n",
|
||
"Denver 377\n",
|
||
"Los Angeles 377\n",
|
||
"Great Lakes 376\n",
|
||
"San Diego 375\n",
|
||
"Cincinnati/Dayton 374\n",
|
||
"Boston 374\n",
|
||
"South Central 373\n",
|
||
"New Orleans/Mobile 373\n",
|
||
"Richmond/Norfolk 371\n",
|
||
"Seattle 371\n",
|
||
"Total U.S. 371\n",
|
||
"Buffalo/Rochester 370\n",
|
||
"Northeast 369\n",
|
||
"Charlotte 368\n",
|
||
"Atlanta 368\n",
|
||
"Chicago 367\n",
|
||
"San Francisco 366\n",
|
||
"Midsouth 366\n",
|
||
"Philadelphia 365\n",
|
||
"New York 363\n",
|
||
"Portland 363\n",
|
||
"Syracuse 362\n",
|
||
"Grand Rapids 361\n",
|
||
"Louisville 361\n",
|
||
"Roanoke 361\n",
|
||
"Dallas/Ft. Worth 360\n",
|
||
"Orlando 359\n",
|
||
"Tampa 359\n",
|
||
"Houston 359\n",
|
||
"Hartford/Springfield 358\n",
|
||
"Pittsburgh 357\n",
|
||
"West 356\n",
|
||
"Miami/Ft. Lauderdale 354\n",
|
||
"Baltimore/Washington 353\n",
|
||
"Phoenix/Tucson 353\n",
|
||
"Raleigh/Greensboro 345\n",
|
||
"Jacksonville 344\n",
|
||
"Las Vegas 339\n",
|
||
"California 336\n",
|
||
"Plains 335\n",
|
||
"Spokane 335\n",
|
||
"Name: geography, dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 34,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"avocado_train.geography.value_counts() "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 37,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"<AxesSubplot:>"
|
||
]
|
||
},
|
||
"execution_count": 37,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
},
|
||
{
|
||
"data": {
|
||
"image/png": "\n",
|
||
"text/plain": [
|
||
"<Figure size 432x288 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {
|
||
"needs_background": "light"
|
||
},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"pd.value_counts(avocado['type']).plot.bar()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 38,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"<AxesSubplot:>"
|
||
]
|
||
},
|
||
"execution_count": 38,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
},
|
||
{
|
||
"data": {
|
||
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEvCAYAAACnuq2HAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAASaklEQVR4nO3de7BdZX3G8e9jIl4LBIlUE8ZQzbQTr9AMYm2djrQQFAzjrTi1ZjA1nRZvbWcUnGlpVTri2FLRSpsKGCgKiBfwyqSAl1ZFDkJBQMsZEUmGy9EEUKkX7K9/7DdlJ54Qztk5Z22yvp+ZPXu973rXPr9kMnnOete71k5VIUnqt0d0XYAkqXuGgSTJMJAkGQaSJAwDSRKGgSQJWNh1AbO1//7717Jly7ouQ5IeNq6++urvV9Xi6fY9bMNg2bJlTExMdF2GJD1sJLl1Z/ucJpIkGQaSJMNAkoRhIEniIYRBkrOS3JXkm0N9+yXZmOTm9r6o9SfJ6Ukmk1yX5JChY9a08TcnWTPU/5tJrm/HnJ4ku/sPKUl6cA/lzOBDwKod+k4ELquq5cBlrQ1wFLC8vdYBZ8AgPICTgecChwInbwuQNuZ1Q8ft+LMkSXNsl2FQVV8CtuzQvRrY0LY3AMcO9Z9TA18D9k3yJOBIYGNVbamqrcBGYFXbt3dVfa0Gz9I+Z+izJEnzZLbXDA6oqtvb9h3AAW17CXDb0LhNre/B+jdN0z+tJOuSTCSZmJqammXpkqQdjXzTWVVVknn5hpyqWg+sB1i5cuXYfyvPshM/03UJe5TvvuvFXZcg7bFme2ZwZ5viob3f1fo3AwcOjVva+h6sf+k0/ZKkeTTbM4NLgDXAu9r7xUP9r09yPoOLxfdU1e1JLgX+buii8RHASVW1Jcm9SQ4DrgReA7xvljVJmgHPXHevh/uZ6y7DIMlHgN8F9k+yicGqoHcBFyZZC9wKvLIN/yzwImASuA84HqD9p/8O4Ko27u1Vte2i9J8xWLH0GOBz7SVJmke7DIOqetVOdh0+zdgCTtjJ55wFnDVN/wTwjF3VIUmaO96BLEkyDCRJhoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEmMGAZJ/jzJDUm+meQjSR6d5KAkVyaZTHJBkr3a2Ee19mTbv2zoc05q/d9OcuSIfyZJ0gzNOgySLAHeCKysqmcAC4DjgFOB06rqacBWYG07ZC2wtfWf1saRZEU77unAKuADSRbMti5J0syNOk20EHhMkoXAY4HbgRcCF7X9G4Bj2/bq1qbtPzxJWv/5VfXTqroFmAQOHbEuSdIMzDoMqmoz8B7gewxC4B7gauDuqrq/DdsELGnbS4Db2rH3t/FPGO6f5hhJ0jwYZZpoEYPf6g8Cngw8jsE0z5xJsi7JRJKJqampufxRktQro0wT/R5wS1VNVdXPgY8Dzwf2bdNGAEuBzW17M3AgQNu/D/CD4f5pjtlOVa2vqpVVtXLx4sUjlC5JGjZKGHwPOCzJY9vc/+HAjcAVwMvbmDXAxW37ktam7b+8qqr1H9dWGx0ELAe+PkJdkqQZWrjrIdOrqiuTXAR8A7gfuAZYD3wGOD/JO1vfme2QM4Fzk0wCWxisIKKqbkhyIYMguR84oap+Mdu6JEkzN+swAKiqk4GTd+j+DtOsBqqqnwCv2MnnnAKcMkotkqTZ8w5kSZJhIEkyDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCQxYhgk2TfJRUm+leSmJM9Lsl+SjUlubu+L2tgkOT3JZJLrkhwy9Dlr2vibk6wZ9Q8lSZqZUc8M3gt8vqp+A3g2cBNwInBZVS0HLmttgKOA5e21DjgDIMl+wMnAc4FDgZO3BYgkaX7MOgyS7AO8ADgToKp+VlV3A6uBDW3YBuDYtr0aOKcGvgbsm+RJwJHAxqraUlVbgY3AqtnWJUmauVHODA4CpoCzk1yT5INJHgccUFW3tzF3AAe07SXAbUPHb2p9O+uXJM2TUcJgIXAIcEZVHQz8mAemhACoqgJqhJ+xnSTrkkwkmZiamtpdHytJvTdKGGwCNlXVla19EYNwuLNN/9De72r7NwMHDh2/tPXtrP+XVNX6qlpZVSsXL148QumSpGGzDoOqugO4Lcmvt67DgRuBS4BtK4LWABe37UuA17RVRYcB97TppEuBI5IsaheOj2h9kqR5snDE498AnJdkL+A7wPEMAubCJGuBW4FXtrGfBV4ETAL3tbFU1ZYk7wCuauPeXlVbRqxLkjQDI4VBVV0LrJxm1+HTjC3ghJ18zlnAWaPUIkmaPe9AliQZBpIkw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiR2QxgkWZDkmiSfbu2DklyZZDLJBUn2av2Pau3Jtn/Z0Gec1Pq/neTIUWuSJM3M7jgzeBNw01D7VOC0qnoasBVY2/rXAltb/2ltHElWAMcBTwdWAR9IsmA31CVJeohGCoMkS4EXAx9s7QAvBC5qQzYAx7bt1a1N2394G78aOL+qflpVtwCTwKGj1CVJmplRzwz+EXgL8L+t/QTg7qq6v7U3AUva9hLgNoC2/542/v/7pzlGkjQPZh0GSY4G7qqqq3djPbv6meuSTCSZmJqamq8fK0l7vFHODJ4PvCTJd4HzGUwPvRfYN8nCNmYpsLltbwYOBGj79wF+MNw/zTHbqar1VbWyqlYuXrx4hNIlScNmHQZVdVJVLa2qZQwuAF9eVX8IXAG8vA1bA1zcti9pbdr+y6uqWv9xbbXRQcBy4OuzrUuSNHMLdz1kxt4KnJ/kncA1wJmt/0zg3CSTwBYGAUJV3ZDkQuBG4H7ghKr6xRzUJUnaid0SBlX1BeALbfs7TLMaqKp+ArxiJ8efApyyO2qRJM2cdyBLkgwDSZJhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJIkRwiDJgUmuSHJjkhuSvKn175dkY5Kb2/ui1p8kpyeZTHJdkkOGPmtNG39zkjWj/7EkSTMxypnB/cBfVtUK4DDghCQrgBOBy6pqOXBZawMcBSxvr3XAGTAID+Bk4LnAocDJ2wJEkjQ/Zh0GVXV7VX2jbf8QuAlYAqwGNrRhG4Bj2/Zq4Jwa+Bqwb5InAUcCG6tqS1VtBTYCq2ZblyRp5nbLNYMky4CDgSuBA6rq9rbrDuCAtr0EuG3osE2tb2f9kqR5MnIYJHk88DHgzVV17/C+qiqgRv0ZQz9rXZKJJBNTU1O762MlqfdGCoMkj2QQBOdV1cdb951t+of2flfr3wwcOHT40ta3s/5fUlXrq2plVa1cvHjxKKVLkoaMspoowJnATVX1D0O7LgG2rQhaA1w81P+atqroMOCeNp10KXBEkkXtwvERrU+SNE8WjnDs84E/Aq5Pcm3rexvwLuDCJGuBW4FXtn2fBV4ETAL3AccDVNWWJO8Armrj3l5VW0aoS5I0Q7MOg6r6DyA72X34NOMLOGEnn3UWcNZsa5EkjcY7kCVJhoEkyTCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CSxBiFQZJVSb6dZDLJiV3XI0l9MhZhkGQB8E/AUcAK4FVJVnRblST1x1iEAXAoMFlV36mqnwHnA6s7rkmSemNh1wU0S4DbhtqbgOfuOCjJOmBda/4oybfnobY+2B/4ftdF7EpO7boCdcR/n7vPU3a2Y1zC4CGpqvXA+q7r2NMkmaiqlV3XIU3Hf5/zY1ymiTYDBw61l7Y+SdI8GJcwuApYnuSgJHsBxwGXdFyTJPXGWEwTVdX9SV4PXAosAM6qqhs6LqtPnHrTOPPf5zxIVXVdgySpY+MyTSRJ6pBhIEkyDCRJhoEkiTFZTaT5l+QE4Lyquru1FwGvqqoPdFqYeivJXzzY/qr6h/mqpY88M+iv120LAoCq2gq8rrtyJH5lFy/NIc8M+mtBklRbW9yeHLtXxzWpx6rqb7uuoc8Mg/76PHBBkn9p7T9pfVKnkjwaWAs8HXj0tv6qem1nRfWA00T99VbgCuBP2+sy4C2dViQNnAv8KnAk8EUGzyr7YacV9YB3IEsaK0muqaqDk1xXVc9K8kjgy1V1WNe17cmcJuqZJBdW1SuTXA/80m8CVfWsDsqShv28vd+d5BnAHcATO6ynFwyD/nlTez+60yqknVvfljr/FYOnFz8e+OtuS9rzOU0kSfLMoK+SvBQ4lcHpd9qrqmrvTgtT7yV5FPAyYBlD/0dV1du7qqkPDIP+ejdwTFXd1HUh0g4uBu4BrgZ+2nEtvWEY9NedBoHG1NKqWtV1EX1jGPTXRJILgE8y9NtXVX28s4qkga8keWZVXd91IX3iBeSeSnL2NN3lXZ7qWpIbgacBtzD4RWXb9SyXPc8hw0DSWEnylOn6q+rW+a6lTwyDnvL5LxpnSZ4N/E5rfrmq/qvLevrAZxP1l89/0VhK8ibgPAbLnp8I/FuSN3Rb1Z7PM4Oe8vkvGldJrgOeV1U/bu3HAV/1msHc8sygv3Z8/ss++PwXjYcAvxhq/6L1aQ65tLS/fP6LxtXZwJVJPtHaxwJndldOPzhNJGnsJDkE+O3W/HJVXdNlPX1gGPTUTr58/B7g6qq6dp7LkUiyd1Xdm2S/6fZX1Zb5rqlPDIOeSvJhYCXwqdZ1NHAdg4eDfbSq3t1RaeqpJJ+uqqOT3ML237Wx7aazX+uotF4wDHoqyZeAF1XVj1r78cBngFUMzg5WdFmfpPnlaqL+eiLbPxHy58ABVfU/+KRIdSjJZQ+lT7uXq4n66zwGKzYubu1jgA+3Nd03dleW+qrdFf9YYP+20m3bctK9gSWdFdYTThP1UJIwuOP4AOD5rfs/q2qiu6rUd+3O4zcDTwY280AY3Av8a1W9v6PSesEw6Kkk11fVM7uuQ9pRkjdU1fu6rqNvDIOeSrIBeH9VXdV1LdKOkvwWv/y1l+d0VlAPGAY9leRbwHLgu8CP8ZnxGhNJzgWeClzLA4+lqKp6Y2dF9YBh0FPtmfGLeOAxwV8C7vaZ8epakpuAFeV/TvPKpaX9dSyDx1jvDyxu2y/psiCp+SaDx6trHnlm0FM+JljjKskVwHOAr7P993P7y8oc8j6D/vIxwRpXf9N1AX1kGPSXjwnWWKqqL7ZrWsur6t+TPBZY0HVdezqniXrMxwRrHCV5HbAO2K+qnppkOfDPVXV4x6Xt0QwDSWMlybXAocCVVXVw6/MmyTnmaiJJ4+anVfWzbY0kC9n+kdaaA4aBpHHzxSRvAx6T5PeBj/LA925ojjhNJGmsJHkEsBY4gsEKt0uBD3oT2twyDCSNlSQvBT5TVX6vxjxymkjSuDkG+O8k5yY5ul0z0BzzzEDS2EnySOAo4A8YLH/eWFV/3G1VezbDQNJYaoGwCjgeeEFV7d9xSXs0p4kkjZUkRyX5EHAz8DLgg/jgujnnmYGksZLkI8AFwOe8iDx/DANJktNEksZLkpcmuTnJPUnuTfLDJPd2XdeezjMDSWMlySRwTFXd1HUtfeKZgaRxc6dBMP88M5A0VpK8l8HqoU+y/TedfbyrmvrAO/skjZu9gfsYPJtomwIMgznkmYEkyWsGksZLkqVJPpHkrvb6WJKlXde1pzMMJI2bs4FLgCe316dan+aQ00SSxkqSa6vqObvq0+7lmYGkcfODJK9OsqC9Xg38oOui9nSeGUgaK0meArwPeB6DVURfAd5QVbd1WtgezjCQNFaSbADeXFVbW3s/4D1V9dpuK9uzOU0kadw8a1sQAFTVFuDgDuvpBcNA0rh5RJJF2xrtzMAbZOeYf8GSxs3fA19N8tHWfgVwSof19ILXDCSNnSQrgBe25uVVdWOX9fSBYSBJ8pqBJMkwkCRhGEiSMAwkSRgGkiTg/wAZuUhOdJB+3AAAAABJRU5ErkJggg==\n",
|
||
"text/plain": [
|
||
"<Figure size 432x288 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {
|
||
"needs_background": "light"
|
||
},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"pd.value_counts(avocado_train['type']).plot.bar()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 39,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"<AxesSubplot:>"
|
||
]
|
||
},
|
||
"execution_count": 39,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
},
|
||
{
|
||
"data": {
|
||
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEvCAYAAACpPxGtAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAVKklEQVR4nO3df7DldX3f8efL5YcmaljKleIuutRsJ7Mmuji3gDVtrY6wUA3ENBZmEimhru2A1UmmDTptMRpmNBNlYqK0GNDVoIi/wkZJ6AYZ0SYCi274KeWWH8PuIGxcfmhoSaDv/nE+Ww7r3b13d++eL57P8zFz5ny/7+/3e877zO6+7nc/5/P93lQVkqQ+PGfoBiRJk2PoS1JHDH1J6oihL0kdMfQlqSOGviR15KChG9iTI444olatWjV0G5L0Y+Wmm27666qamW/bgqGf5LnAdcChbf8vVNX5ST4J/DPg0bbrv66qLUkC/B5wCvB4q3+7vdaZwH9q+/92VW3Y03uvWrWKzZs3L9SiJGlMkvt2t20xZ/pPAK+rqh8mORj4ZpI/bdv+Q1V9YZf9TwZWt8fxwEXA8UkOB84HZoECbkqysaoe3ruPI0naVwuO6dfID9vqwe2xp8t4TwU+1Y77FnBYkqOAk4BNVbWjBf0mYN3+tS9J2huL+iI3ybIkW4CHGAX39W3TBUluTnJhkkNbbQVw/9jhW1ttd3VJ0oQsKvSr6qmqWgusBI5L8rPAu4GfAf4RcDjwm0vRUJL1STYn2bx9+/aleElJUrNXUzar6hHgWmBdVT3QhnCeAD4BHNd22wYcPXbYylbbXX3X97i4qmaranZmZt4vnyVJ+2jB0E8yk+Swtvw84A3Ad9s4PW22zmnAre2QjcBbM3IC8GhVPQBcDZyYZHmS5cCJrSZJmpDFzN45CtiQZBmjHxJXVNVXknwtyQwQYAvwb9v+VzGarjnHaMrmWQBVtSPJ+4Eb237vq6odS/ZJJEkLyrP5fvqzs7PlPH1J2jtJbqqq2fm2PauvyP1xseq8rw7dwlS59wP/YugWpKnlvXckqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFvuCZNOW8IuHSm4WaAnulLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktSRBUM/yXOT3JDkr5LcluS3Wv2YJNcnmUvyuSSHtPqhbX2ubV819lrvbvU7k5x0wD6VJGleiznTfwJ4XVW9ElgLrEtyAvBB4MKq+mngYeDstv/ZwMOtfmHbjyRrgNOBlwPrgI8lWbaEn0WStIAFQ79GfthWD26PAl4HfKHVNwCnteVT2zpt++uTpNUvr6onquoeYA44bik+hCRpcRY1pp9kWZItwEPAJuB/AY9U1ZNtl63Aira8ArgfoG1/FPh74/V5jpEkTcCiQr+qnqqqtcBKRmfnP3OgGkqyPsnmJJu3b99+oN5Gkrq0V7N3quoR4Frg1cBhSXbesG0lsK0tbwOOBmjbfwr4/nh9nmPG3+PiqpqtqtmZmZm9aU+StIDFzN6ZSXJYW34e8AbgDkbh/y/bbmcCV7bljW2dtv1rVVWtfnqb3XMMsBq4YYk+hyRpERZza+WjgA1tps1zgCuq6itJbgcuT/LbwHeAS9r+lwCfTjIH7GA0Y4equi3JFcDtwJPAOVX11NJ+HEnSniwY+lV1M3DsPPW7mWf2TVX9H+CXd/NaFwAX7H2bkqSl4BW5ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpIwuGfpKjk1yb5PYktyV5Z6u/N8m2JFva45SxY96dZC7JnUlOGquva7W5JOcdmI8kSdqdgxaxz5PAb1TVt5O8ALgpyaa27cKq+t3xnZOsAU4HXg68GPjzJP+wbf4o8AZgK3Bjko1VdftSfBBJ0sIWDP2qegB4oC3/IMkdwIo9HHIqcHlVPQHck2QOOK5tm6uquwGSXN72NfQlaUL2akw/ySrgWOD6Vjo3yc1JLk2yvNVWAPePHba11XZX3/U91ifZnGTz9u3b96Y9SdICFh36SZ4PfBF4V1U9BlwEvAxYy+h/Ah9aioaq6uKqmq2q2ZmZmaV4SUlSs5gxfZIczCjwL6uqLwFU1YNj2z8OfKWtbgOOHjt8Zauxh7okaQIWM3snwCXAHVX14bH6UWO7/SJwa1veCJye5NAkxwCrgRuAG4HVSY5JcgijL3s3Ls3HkCQtxmLO9F8D/CpwS5ItrfYe4Iwka4EC7gXeDlBVtyW5gtEXtE8C51TVUwBJzgWuBpYBl1bVbUv2SSRJC1rM7J1vApln01V7OOYC4IJ56lft6ThJ0oHlFbmS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHVkw9JMcneTaJLcnuS3JO1v98CSbktzVnpe3epJ8JMlckpuTvGrstc5s+9+V5MwD97EkSfNZzJn+k8BvVNUa4ATgnCRrgPOAa6pqNXBNWwc4GVjdHuuBi2D0QwI4HzgeOA44f+cPCknSZCwY+lX1QFV9uy3/ALgDWAGcCmxou20ATmvLpwKfqpFvAYclOQo4CdhUVTuq6mFgE7BuKT+MJGnP9mpMP8kq4FjgeuDIqnqgbfoecGRbXgHcP3bY1lbbXV2SNCGLDv0kzwe+CLyrqh4b31ZVBdRSNJRkfZLNSTZv3759KV5SktQsKvSTHMwo8C+rqi+18oNt2Ib2/FCrbwOOHjt8Zavtrv4MVXVxVc1W1ezMzMzefBZJ0gIWM3snwCXAHVX14bFNG4GdM3DOBK4cq7+1zeI5AXi0DQNdDZyYZHn7AvfEVpMkTchBi9jnNcCvArck2dJq7wE+AFyR5GzgPuAtbdtVwCnAHPA4cBZAVe1I8n7gxrbf+6pqx1J8CEnS4iwY+lX1TSC72fz6efYv4JzdvNalwKV706Akael4Ra4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRxYM/SSXJnkoya1jtfcm2ZZkS3ucMrbt3UnmktyZ5KSx+rpWm0ty3tJ/FEnSQhZzpv9JYN089Quram17XAWQZA1wOvDydszHkixLsgz4KHAysAY4o+0rSZqggxbaoaquS7Jqka93KnB5VT0B3JNkDjiubZurqrsBklze9r1971uWJO2r/RnTPzfJzW34Z3mrrQDuH9tna6vtri5JmqB9Df2LgJcBa4EHgA8tVUNJ1ifZnGTz9u3bl+plJUnsY+hX1YNV9VRV/V/g4zw9hLMNOHps15Wttrv6fK99cVXNVtXszMzMvrQnSdqNfQr9JEeNrf4isHNmz0bg9CSHJjkGWA3cANwIrE5yTJJDGH3Zu3Hf25Yk7YsFv8hN8lngtcARSbYC5wOvTbIWKOBe4O0AVXVbkisYfUH7JHBOVT3VXudc4GpgGXBpVd221B9GkrRni5m9c8Y85Uv2sP8FwAXz1K8Crtqr7iRJS8orciWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcWDP0klyZ5KMmtY7XDk2xKcld7Xt7qSfKRJHNJbk7yqrFjzmz735XkzAPzcSRJe7KYM/1PAut2qZ0HXFNVq4Fr2jrAycDq9lgPXASjHxLA+cDxwHHA+Tt/UEiSJmfB0K+q64Adu5RPBTa05Q3AaWP1T9XIt4DDkhwFnARsqqodVfUwsIkf/UEiSTrA9nVM/8iqeqAtfw84si2vAO4f229rq+2uLkmaoP3+IreqCqgl6AWAJOuTbE6yefv27Uv1spIk9j30H2zDNrTnh1p9G3D02H4rW2139R9RVRdX1WxVzc7MzOxje5Kk+exr6G8Eds7AORO4cqz+1jaL5wTg0TYMdDVwYpLl7QvcE1tNkjRBBy20Q5LPAq8FjkiyldEsnA8AVyQ5G7gPeEvb/SrgFGAOeBw4C6CqdiR5P3Bj2+99VbXrl8OSpANswdCvqjN2s+n18+xbwDm7eZ1LgUv3qjtJ0pLyilxJ6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0JekjuxX6Ce5N8ktSbYk2dxqhyfZlOSu9ry81ZPkI0nmktyc5FVL8QEkSYu3FGf6/7yq1lbVbFs/D7imqlYD17R1gJOB1e2xHrhoCd5bkrQXDsTwzqnAhra8AThtrP6pGvkWcFiSow7A+0uSdmN/Q7+A/57kpiTrW+3IqnqgLX8POLItrwDuHzt2a6tJkibkoP08/ueraluSFwGbknx3fGNVVZLamxdsPzzWA7zkJS/Zz/YkSeP260y/qra154eALwPHAQ/uHLZpzw+13bcBR48dvrLVdn3Ni6tqtqpmZ2Zm9qc9SdIu9jn0k/xkkhfsXAZOBG4FNgJntt3OBK5syxuBt7ZZPCcAj44NA0mSJmB/hneOBL6cZOfrfKaq/izJjcAVSc4G7gPe0va/CjgFmAMeB87aj/eWJO2DfQ79qrobeOU89e8Dr5+nXsA5+/p+kqT95xW5ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpIxMP/STrktyZZC7JeZN+f0nq2URDP8ky4KPAycAa4IwkaybZgyT1bNJn+scBc1V1d1X9LXA5cOqEe5Ckbh004fdbAdw/tr4VOH58hyTrgfVt9YdJ7pxQbz04AvjroZtYSD44dAcayLP+7+eP0d/Nl+5uw6RDf0FVdTFw8dB9TKMkm6tqdug+pPn493MyJj28sw04emx9ZatJkiZg0qF/I7A6yTFJDgFOBzZOuAdJ6tZEh3eq6skk5wJXA8uAS6vqtkn20DmHzfRs5t/PCUhVDd2DJGlCvCJXkjpi6EtSRwx9SeqIoS9JHXnWXZylpZHk1/e0vao+PKlepPkkOQe4rKoeaevLgTOq6mODNjblPNOfXi9Y4CEN7W07Ax+gqh4G3jZcO33wTH9KVdVvDd2DtIBlSVJt3ni7C+8hA/c09Qz9KZfkucDZwMuB5+6sV9WvDdaUNPJnwOeS/Le2/vZW0wHk8M70+zTw94GTgK8zut/RDwbtSBr5TeBa4N+1xzXAfxy0ow54Re6US/Kdqjo2yc1V9YokBwPfqKoThu5N0uQ5vDP9/q49P5LkZ4HvAS8asB91LskVVfWWJLcAP3LWWVWvGKCtbhj60+/iNhXuPzO6o+nzgf8ybEvq3Dvb8xsH7aJTDu9IUkc8059ySQ4FfglYxdifd1W9b6ieJIAkbwY+yGi4Me1RVfXCQRubcob+9LsSeBS4CXhi4F6kcb8DvKmq7hi6kZ4Y+tNvZVWtG7oJaR4PGviTZ+hPv79I8nNVdcvQjUi72Jzkc8AfM/a/0Kr60mAddcAvcqdcktuBnwbuYfQPa+e4qdPiNKgkn5inXF4tfmAZ+lMuyUvnq1fVfZPuRdLwDP0OJHkl8E/a6jeq6q+G7EcC7ws1FO+9M+WSvBO4jNG0uBcBf5TkHcN2JQHeF2oQnulPuSQ3A6+uqr9p6z8J/KVj+hqa94Uahmf60y/AU2PrT7WaNLRd7wv1U3hfqAPOKZvT7xPA9Um+3NZPAy4Zrh3p//O+UANweKcDSV4F/Hxb/UZVfWfIfiQNx9CfUkleWFWPJTl8vu1VtWPSPUnjkvz6POVHgZuqasuE2+mGoT+lknylqt6Y5B6eec/ynRdn/YOBWpMASPIZYBb4k1Z6I3Azo5sDfr6qfmeg1qaaoS9pEEmuA06pqh+29ecDXwXWMTrbXzNkf9PK2TtTLsk1i6lJA3gRz7zz698BR1bV/8Y7wh4wzt6ZUu1qx58AjmgzJHZO03whsGKwxqSnXcZoZtmVbf1NwGfatSS3D9fWdHN4Z0q1K3HfBbwY2MbTof8Y8PGq+oOBWpNIEkZX4B4JvKaV/0dVbR6uqz4Y+lMuyTuq6veH7kPaVZJbqurnhu6jN4Z+B5L8Y3701yV+arCGJCDJBuAPqurGoXvpiaE/5ZJ8GngZsIWnb8dQVfXvB2tKApJ8F1gN3Av8Df6uh4kw9KdckjuANeUftJ5l2u96WM7Tt/2+DnjE3/VwYDllc/rdyuj2tdKzzWmMbq98BDDTln9hyIZ64Jn+lEtyLbAWuIFn/h5S/3FpUN72exjO059+7x26AWk3vO33AAz9KVdVX29jp6ur6s+T/ASwbOi+JLzt9yAc3plySd4GrAcOr6qXJVkN/Neqev3ArUne9nsAhv6US7IFOA64vqqObTUvipE65eyd6fdEVf3tzpUkB/HMWy1L6oihP/2+nuQ9wPOSvAH4PE/fv1xSZxzemXJJngOcDZzIaGbE1cAferGW1CdDf8oleTPw1ary/uSSHN7pwJuA/5nk00ne2Mb0JXXKM/0OJDkYOBn4V4ymx22qqn8zbFeShmDod6IF/zrgLOCfVtURA7ckaQAO70y5JCcn+SRwF/BLwB/iDdikbnmmP+WSfBb4HPCnfpkrydCXpI44vDPlkrw5yV1JHk3yWJIfJHls6L4kDcMz/SmXZA54U1XdMXQvkobnmf70e9DAl7STZ/pTLsnvMZqt88c88zdnfWmoniQNx6szp98LgccZ3XtnpwIMfalDnulLUkcc059ySVYm+XKSh9rji0lWDt2XpGEY+tPvE8BG4MXt8SetJqlDDu9MuSRbqmrtQjVJffBMf/p9P8mvJFnWHr8CfH/opiQNwzP9KZfkpcDvA69mNGvnL4B3VNX9gzYmaRCG/pRLsgF4V1U93NYPB363qn5t2M4kDcHhnen3ip2BD1BVO4BjB+xH0oAM/en3nCTLd660M30vypM65T/+6fch4C+TfL6t/zJwwYD9SBqQY/odSLIGeF1b/VpV3T5kP5KGY+hLUkcc05ekjhj6ktQRQ1+SOmLoS1JHDH1J6sj/A+qwMSDo1Gd2AAAAAElFTkSuQmCC\n",
|
||
"text/plain": [
|
||
"<Figure size 432x288 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {
|
||
"needs_background": "light"
|
||
},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"pd.value_counts(avocado_test['type']).plot.bar()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 40,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"<AxesSubplot:>"
|
||
]
|
||
},
|
||
"execution_count": 40,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
},
|
||
{
|
||
"data": {
|
||
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAASkElEQVR4nO3df6zddX3H8ecbSikKoyjkDkrdrZEw0I4NbxBDYq6yQYXFkgxNA9HWYLo4RFy6zGKy1apETFSGbNM0lq0asHRopAOca4ATsz+o/BCtUBl3UKAdilKoVgF39b0/zufitd7be27v+XHP/TwfyUm/Pz7n+/18+j33db7n8/2ez4nMRJJUh8N6XQFJUvcY+pJUEUNfkipi6EtSRQx9SarIvF5X4GCOP/74HBwcbPt2f/7zn/PKV76y7dvtNdvVf+Zq22xXb91///0/ycwTJlo3q0N/cHCQ++67r+3bbTQaDA8Pt327vWa7+s9cbZvt6q2IeGKydXbvSFJFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRWb1N3I1PTv27GPV2tt7su9d11zYk/1Kmh7P9CWpIoa+JFXE0Jekihj6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxGEY1BaDHRz+Yc3S0UmHl3D4B2l6PNOXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkUMfUmqSEuhHxF/HREPRcT3I+IrEbEgIpZExPaIGImImyNifil7ZJkfKesHx23nqrL8kYg4v0NtkiRNYsrQj4hFwAeBocx8A3A4sAL4FHBtZr4OeA64rDzlMuC5svzaUo6IOL087/XAMuCfI+Lw9jZHknQwrXbvzAOOioh5wCuAp4G3AbeU9ZuAi8r08jJPWX9uRERZvjkzX8rMx4ER4KwZt0CS1LIpf0QlM/dExKeBJ4EXgP8E7geez8zRUmw3sKhMLwKeKs8djYh9wKvL8nvGbXr8c14WEauB1QADAwM0Go3pt2oK+/fv78h2e23gqOYPjsw1B2tXvx/HufpatF2z15ShHxHH0TxLXwI8D/wbze6ZjsjMDcAGgKGhoRweHm77PhqNBp3Ybq9df+OtfGbH3PsxtDVLRydt165Lh7tbmTabq69F2zV7tdK986fA45n548z8P+BrwDnAwtLdA3AysKdM7wEWA5T1xwLPjl8+wXMkSV3QSug/CZwdEa8offPnAg8DdwMXlzIrgVvL9NYyT1l/V2ZmWb6i3N2zBDgF+HZ7miFJakUrffrbI+IW4AFgFPgOze6X24HNEfGJsmxjecpG4MsRMQLspXnHDpn5UERsofmGMQpcnpm/anN7JEkH0VIHcGauA9YdsPgxJrj7JjNfBN45yXauBq6eZh0lSW3iN3IlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkUMfUmqiKEvSRUx9CWpIoa+JFXE0Jekihj6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKtBT6EbEwIm6JiB9ExM6IeHNEvCoitkXEo+Xf40rZiIjPRcRIRHwvIs4ct52VpfyjEbGyU42SJE2s1TP964D/yMw/BM4AdgJrgTsz8xTgzjIP8HbglPJYDXweICJeBawD3gScBawbe6OQJHXHlKEfEccCbwE2AmTmLzPzeWA5sKkU2wRcVKaXA1/KpnuAhRFxInA+sC0z92bmc8A2YFkb2yJJmsK8FsosAX4M/EtEnAHcD1wJDGTm06XMD4GBMr0IeGrc83eXZZMt/y0RsZrmJwQGBgZoNBqttqVl+/fv78h2e23gKFizdLTX1Wi7g7Wr34/jXH0t2q7Zq5XQnwecCVyRmdsj4jp+05UDQGZmRGQ7KpSZG4ANAENDQzk8PNyOzf6WRqNBJ7bba9ffeCuf2dHKIe0va5aOTtquXZcOd7cybTZXX4u2a/ZqpU9/N7A7M7eX+Vtovgn8qHTbUP59pqzfAywe9/yTy7LJlkuSumTK0M/MHwJPRcSpZdG5wMPAVmDsDpyVwK1leivwnnIXz9nAvtIN9E3gvIg4rlzAPa8skyR1Sat9AVcAN0bEfOAx4L003zC2RMRlwBPAu0rZO4ALgBHgF6Usmbk3Ij4O3FvKfSwz97alFZKklrQU+pn5IDA0wapzJyibwOWTbOcG4IZp1E86qMG1t/ds37uuubBn+5YOld/IlaSKGPqSVBFDX5IqYuhLUkUMfUmqiKEvSRUx9CWpIoa+JFXE0Jekisy9IRkrtvSwx9m1YF3X9zv44k1d36ekQ+OZviRVxNCXpIoY+pJUEUNfkirihVzN2K4Fl3R0+43D1v/OBWovHkuHxjN9SaqIoS9JFTH0Jakihr4kVcQLuR3Qq99t/dczerJbSX3EM31JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkVaDv2IODwivhMRt5X5JRGxPSJGIuLmiJhflh9Z5kfK+sFx27iqLH8kIs5ve2skSQc1nTP9K4Gd4+Y/BVybma8DngMuK8svA54ry68t5YiI04EVwOuBZcA/R8ThM6u+JGk6Wgr9iDgZuBD4YpkP4G3ALaXIJuCiMr28zFPWn1vKLwc2Z+ZLmfk4MAKc1YY2SJJa1Opv5P4D8LfAMWX+1cDzmTla5ncDi8r0IuApgMwcjYh9pfwi4J5x2xz/nJdFxGpgNcDAwACNRqPFKrZu//79HdnumDVLR6cu1AH7jzyJxqnre7LvTpqoXWt+3Zv/4/Ha8Rrq9GuxV2zX7DVl6EfEnwPPZOb9ETHc6Qpl5gZgA8DQ0FAOD7d/l41Gg05sd8yqnv0w+uMMP7KuJ/vupMap63+nXatevKlHtfmNXZcOz3gbnX4t9ortmr1aOdM/B3hHRFwALAB+D7gOWBgR88rZ/snAnlJ+D7AY2B0R84BjgWfHLR8z/jmSpC6Ysk8/M6/KzJMzc5Dmhdi7MvNS4G7g4lJsJXBrmd5a5inr78rMLMtXlLt7lgCnAN9uW0skSVNqtU9/Ih8GNkfEJ4DvABvL8o3AlyNiBNhL842CzHwoIrYADwOjwOWZ+asZ7F+SNE3TCv3MbACNMv0YE9x9k5kvAu+c5PlXA1dPt5KSpPbwG7mSVBFDX5IqYuhLUkUMfUmqiKEvSRWZyS2bUs/sWnBJ1/c5OAu+BSzNlGf6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0JakiDq3cIb0Y+rfB+q7vU1J/8Uxfkipi6EtSRQx9SaqIoS9JFfFCrnSIBtfePuNtrFk6yqppbmfXNRfOeL+ql2f6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkioyZehHxOKIuDsiHo6IhyLiyrL8VRGxLSIeLf8eV5ZHRHwuIkYi4nsRcea4ba0s5R+NiJWda5YkaSKtnOmPAmsy83TgbODyiDgdWAvcmZmnAHeWeYC3A6eUx2rg89B8kwDWAW8CzgLWjb1RSJK6Y8rQz8ynM/OBMv0zYCewCFgObCrFNgEXlenlwJey6R5gYUScCJwPbMvMvZn5HLANWNbOxkiSDi4ys/XCEYPAt4A3AE9m5sKyPIDnMnNhRNwGXJOZ/1XW3Ql8GBgGFmTmJ8ryvwNeyMxPH7CP1TQ/ITAwMPDGzZs3z6R9E9q/fz9HH31027c7ZseefSw97PGObX8y+488iaNf+t+u77fTZku7dvx6Sdu3OXAU/OiF6T1n6aJj216Pduv031iv9Eu73vrWt96fmUMTrWt5aOWIOBr4KvChzPxpM+ebMjMjovV3j4PIzA3ABoChoaEcHh5ux2Z/S6PRoBPbHbNq7e3sWrCuY9ufTOPU9Qw/0v39dtpsadeqF29q+zbXLB3lMzumN8L5rkuH216Pduv031ivzIV2tXT3TkQcQTPwb8zMr5XFPyrdNpR/nynL9wCLxz395LJssuWSpC5p5e6dADYCOzPzs+NWbQXG7sBZCdw6bvl7yl08ZwP7MvNp4JvAeRFxXLmAe15ZJknqklY+V54DvBvYEREPlmUfAa4BtkTEZcATwLvKujuAC4AR4BfAewEyc29EfBy4t5T7WGbubUcj1PTw/PlcseQ1Pdn3jsef7Ml+JU3PlKFfLsjGJKvPnaB8ApdPsq0bgBumU0H1h6UdfLN5/0HezHyzkabH38iVWrRrwSVt32bjsPVTXvQf7MAFZNXL0J8Ddm4+CYDXnpBsuX604/t711W+bKR+5dg7klQRQ1+SKuLndE3blk92vgtpjF1JUnt5pi9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkW8CVqz2pZPjvLEFZMPL7GTk9q2r9NW9P7XuaROM/SlYrojhTrCp/qR3TuSVBFDX5IqYuhLUkXs0++AY05by1K697OFW+jeAGiS+ptn+pJUEc/0O6Sbww+rLoNrb+/Zvnddc2HP9q328Exfkipi6EtSRQx9SaqIffpSMd3rMDMdAsJhH9QLnulLUkUMfUmqiN070iy3a8ElXd/n4Is3dX2f6o45HfqT3c+8Zukoqzp4r/Mxp3Vs05pDli55De+fP58rHN1TXWT3jiRVxNCXpIoY+pJUkTndpz/mwAthjcPWs2vBuo7tr5sjbErSdHimL0kVqeJMX5qNpvrR98kcyjeB/favxhj6klrW6rDO7b4t2iGd28fQlyqwdJrfBTiGtRMub/U7Aod63cwvhXVe10M/IpYB1wGHA1/MzGs6ta9jTmu+cMcurI4NqPXiFUewc/PMBss6GH++ULNNu37Up5WuJbuSZreuhn5EHA78E/BnwG7g3ojYmpkPd7MekmanSYec+Gjn9nmwTxdzsVup22f6ZwEjmfkYQERsBpYDhr40R+zcfFLHP0230zf4m0nX7fz6AevW/SV8dPnMd/rRfTPfxiGKzOzeziIuBpZl5vvK/LuBN2XmB8aVWQ2sLrOnAo90oCrHAz/pwHZ7zXb1n7naNtvVW3+QmSdMtGLWXcjNzA3Ahk7uIyLuy8yhTu6jF2xX/5mrbbNds1e3v5y1B1g8bv7kskyS1AXdDv17gVMiYklEzAdWAFu7XAdJqlZXu3cyczQiPgB8k+Ytmzdk5kPdrEPR0e6jHrJd/Weuts12zVJdvZArSeotB1yTpIoY+pJUkTkd+hGxLCIeiYiRiPidwUQiYlVE/DgiHiyP9/WintMRETdExDMR8f1J1kdEfK60+XsRcWa363goWmjXcETsG3es/r7bdTwUEbE4Iu6OiIcj4qGIuHKCMn13zFpsV78eswUR8e2I+G5p2/oJyhwZETeXY7Y9IgZ7UNVDk5lz8kHzQvH/AK8F5gPfBU4/oMwq4B97XddptustwJnA9ydZfwHwDSCAs4Htva5zm9o1DNzW63oeQrtOBM4s08cA/z3B67DvjlmL7erXYxbA0WX6CGA7cPYBZf4K+EKZXgHc3Ot6t/qYy2f6Lw/5kJm/BMaGfOhrmfktYO9BiiwHvpRN9wALI+LE7tTu0LXQrr6UmU9n5gNl+mfATmDRAcX67pi12K6+VI7D/jJ7RHkceMfLcmBTmb4FODcioktVnJG5HPqLgKfGze9m4hflX5SP1LdExOIJ1vebVtvdj95cPnJ/IyJe3+vKTFfpAvgTmmeO4/X1MTtIu6BPj1lEHB4RDwLPANsyc9JjlpmjwD7g1V2t5CGay6Hfin8HBjPzj4Bt/OadW7PPAzTHEzkDuB74em+rMz0RcTTwVeBDmfnTXtenXaZoV98es8z8VWb+Mc1RA86KiDf0uEptM5dDf8ohHzLz2cx8qcx+EXhjl+rWSXNyqIvM/OnYR+7MvAM4IiKO73G1WhIRR9AMxhsz82sTFOnLYzZVu/r5mI3JzOeBu4FlB6x6+ZhFxDzgWODZrlbuEM3l0J9yyIcD+k3fQbNfst9tBd5T7gg5G9iXmU/3ulIzFRG/P9ZnGhFn0Xztzvo/slLnjcDOzPzsJMX67pi10q4+PmYnRMTCMn0Uzd//+MEBxbYCK8v0xcBdWa7qznazbpTNdslJhnyIiI8B92XmVuCDEfEOYJTmRcRVPatwiyLiKzTvijg+InYD62heaCIzvwDcQfNukBHgF8B7e1PT6WmhXRcD74+IUeAFYEWf/JGdA7wb2FH6iAE+As2fc+vjY9ZKu/r1mJ0IbIrmjz4dBmzJzNsOyI6NwJcjYoRmdqzoXXWnx2EYJKkic7l7R5J0AENfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVeT/AdHBg3mBM54VAAAAAElFTkSuQmCC\n",
|
||
"text/plain": [
|
||
"<Figure size 432x288 with 1 Axes>"
|
||
]
|
||
},
|
||
"metadata": {
|
||
"needs_background": "light"
|
||
},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"avocado['average_price'].hist()\n",
|
||
"avocado_train['average_price'].hist()\n",
|
||
"avocado_validate['average_price'].hist()\n",
|
||
"avocado_test['average_price'].hist()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Normalizacja wartości."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 41,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>date</th>\n",
|
||
" <th>average_price</th>\n",
|
||
" <th>total_volume</th>\n",
|
||
" <th>4046</th>\n",
|
||
" <th>4225</th>\n",
|
||
" <th>4770</th>\n",
|
||
" <th>total_bags</th>\n",
|
||
" <th>small_bags</th>\n",
|
||
" <th>large_bags</th>\n",
|
||
" <th>xlarge_bags</th>\n",
|
||
" <th>type</th>\n",
|
||
" <th>geography</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2015-01-04</td>\n",
|
||
" <td>0.277580</td>\n",
|
||
" <td>0.000640</td>\n",
|
||
" <td>0.000124</td>\n",
|
||
" <td>0.001382</td>\n",
|
||
" <td>0.000020</td>\n",
|
||
" <td>0.000307</td>\n",
|
||
" <td>0.000447</td>\n",
|
||
" <td>0.000040</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>conventional</td>\n",
|
||
" <td>Albany</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2015-01-04</td>\n",
|
||
" <td>0.480427</td>\n",
|
||
" <td>0.000020</td>\n",
|
||
" <td>0.000003</td>\n",
|
||
" <td>0.000008</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000037</td>\n",
|
||
" <td>0.000057</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>organic</td>\n",
|
||
" <td>Albany</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2015-01-04</td>\n",
|
||
" <td>0.199288</td>\n",
|
||
" <td>0.006826</td>\n",
|
||
" <td>0.016018</td>\n",
|
||
" <td>0.001164</td>\n",
|
||
" <td>0.000032</td>\n",
|
||
" <td>0.001477</td>\n",
|
||
" <td>0.000813</td>\n",
|
||
" <td>0.002259</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>conventional</td>\n",
|
||
" <td>Atlanta</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2015-01-04</td>\n",
|
||
" <td>0.469751</td>\n",
|
||
" <td>0.000059</td>\n",
|
||
" <td>0.000066</td>\n",
|
||
" <td>0.000046</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000044</td>\n",
|
||
" <td>0.000052</td>\n",
|
||
" <td>0.000025</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>organic</td>\n",
|
||
" <td>Atlanta</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2015-01-04</td>\n",
|
||
" <td>0.227758</td>\n",
|
||
" <td>0.012366</td>\n",
|
||
" <td>0.002374</td>\n",
|
||
" <td>0.027010</td>\n",
|
||
" <td>0.015706</td>\n",
|
||
" <td>0.004454</td>\n",
|
||
" <td>0.006674</td>\n",
|
||
" <td>0.000299</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>conventional</td>\n",
|
||
" <td>Baltimore/Washington</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33040</th>\n",
|
||
" <td>2020-11-29</td>\n",
|
||
" <td>0.366548</td>\n",
|
||
" <td>0.024844</td>\n",
|
||
" <td>0.002970</td>\n",
|
||
" <td>0.004787</td>\n",
|
||
" <td>0.001028</td>\n",
|
||
" <td>0.044649</td>\n",
|
||
" <td>0.044121</td>\n",
|
||
" <td>0.036030</td>\n",
|
||
" <td>0.019937</td>\n",
|
||
" <td>organic</td>\n",
|
||
" <td>Total U.S.</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33041</th>\n",
|
||
" <td>2020-11-29</td>\n",
|
||
" <td>0.167260</td>\n",
|
||
" <td>0.091202</td>\n",
|
||
" <td>0.059484</td>\n",
|
||
" <td>0.028776</td>\n",
|
||
" <td>0.007753</td>\n",
|
||
" <td>0.119620</td>\n",
|
||
" <td>0.106938</td>\n",
|
||
" <td>0.114914</td>\n",
|
||
" <td>0.043846</td>\n",
|
||
" <td>conventional</td>\n",
|
||
" <td>West</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33042</th>\n",
|
||
" <td>2020-11-29</td>\n",
|
||
" <td>0.370107</td>\n",
|
||
" <td>0.004550</td>\n",
|
||
" <td>0.000584</td>\n",
|
||
" <td>0.000945</td>\n",
|
||
" <td>0.000250</td>\n",
|
||
" <td>0.008101</td>\n",
|
||
" <td>0.005966</td>\n",
|
||
" <td>0.010062</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>organic</td>\n",
|
||
" <td>West</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33043</th>\n",
|
||
" <td>2020-11-29</td>\n",
|
||
" <td>0.081851</td>\n",
|
||
" <td>0.012913</td>\n",
|
||
" <td>0.010319</td>\n",
|
||
" <td>0.003918</td>\n",
|
||
" <td>0.004141</td>\n",
|
||
" <td>0.015696</td>\n",
|
||
" <td>0.013906</td>\n",
|
||
" <td>0.015817</td>\n",
|
||
" <td>0.000577</td>\n",
|
||
" <td>conventional</td>\n",
|
||
" <td>West Tex/New Mexico</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33044</th>\n",
|
||
" <td>2020-11-29</td>\n",
|
||
" <td>0.323843</td>\n",
|
||
" <td>0.000377</td>\n",
|
||
" <td>0.000054</td>\n",
|
||
" <td>0.000030</td>\n",
|
||
" <td>0.000615</td>\n",
|
||
" <td>0.000653</td>\n",
|
||
" <td>0.000867</td>\n",
|
||
" <td>0.000215</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>organic</td>\n",
|
||
" <td>West Tex/New Mexico</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>33045 rows × 12 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" date average_price total_volume 4046 4225 4770 \\\n",
|
||
"0 2015-01-04 0.277580 0.000640 0.000124 0.001382 0.000020 \n",
|
||
"1 2015-01-04 0.480427 0.000020 0.000003 0.000008 0.000000 \n",
|
||
"2 2015-01-04 0.199288 0.006826 0.016018 0.001164 0.000032 \n",
|
||
"3 2015-01-04 0.469751 0.000059 0.000066 0.000046 0.000000 \n",
|
||
"4 2015-01-04 0.227758 0.012366 0.002374 0.027010 0.015706 \n",
|
||
"... ... ... ... ... ... ... \n",
|
||
"33040 2020-11-29 0.366548 0.024844 0.002970 0.004787 0.001028 \n",
|
||
"33041 2020-11-29 0.167260 0.091202 0.059484 0.028776 0.007753 \n",
|
||
"33042 2020-11-29 0.370107 0.004550 0.000584 0.000945 0.000250 \n",
|
||
"33043 2020-11-29 0.081851 0.012913 0.010319 0.003918 0.004141 \n",
|
||
"33044 2020-11-29 0.323843 0.000377 0.000054 0.000030 0.000615 \n",
|
||
"\n",
|
||
" total_bags small_bags large_bags xlarge_bags type \\\n",
|
||
"0 0.000307 0.000447 0.000040 0.000000 conventional \n",
|
||
"1 0.000037 0.000057 0.000000 0.000000 organic \n",
|
||
"2 0.001477 0.000813 0.002259 0.000000 conventional \n",
|
||
"3 0.000044 0.000052 0.000025 0.000000 organic \n",
|
||
"4 0.004454 0.006674 0.000299 0.000000 conventional \n",
|
||
"... ... ... ... ... ... \n",
|
||
"33040 0.044649 0.044121 0.036030 0.019937 organic \n",
|
||
"33041 0.119620 0.106938 0.114914 0.043846 conventional \n",
|
||
"33042 0.008101 0.005966 0.010062 0.000000 organic \n",
|
||
"33043 0.015696 0.013906 0.015817 0.000577 conventional \n",
|
||
"33044 0.000653 0.000867 0.000215 0.000000 organic \n",
|
||
"\n",
|
||
" geography \n",
|
||
"0 Albany \n",
|
||
"1 Albany \n",
|
||
"2 Atlanta \n",
|
||
"3 Atlanta \n",
|
||
"4 Baltimore/Washington \n",
|
||
"... ... \n",
|
||
"33040 Total U.S. \n",
|
||
"33041 West \n",
|
||
"33042 West \n",
|
||
"33043 West Tex/New Mexico \n",
|
||
"33044 West Tex/New Mexico \n",
|
||
"\n",
|
||
"[33045 rows x 12 columns]"
|
||
]
|
||
},
|
||
"execution_count": 41,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# według https://www.journaldev.com/45109/normalize-data-in-python\n",
|
||
"from sklearn import preprocessing\n",
|
||
"\n",
|
||
"num_values = avocado.select_dtypes(include='float64').values\n",
|
||
"scaler = preprocessing.MinMaxScaler()\n",
|
||
"x_scaled = scaler.fit_transform(num_values)\n",
|
||
"num_columns = avocado.select_dtypes(include='float64').columns\n",
|
||
"avocado_normalized = pd.DataFrame(x_scaled, columns=num_columns)\n",
|
||
"for col in avocado.columns:\n",
|
||
" if col in num_columns: \n",
|
||
" avocado[col] = avocado_normalized[col]\n",
|
||
" \n",
|
||
"avocado"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Usunięcie artefaktów."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 42,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"date 0\n",
|
||
"average_price 0\n",
|
||
"total_volume 0\n",
|
||
"4046 0\n",
|
||
"4225 0\n",
|
||
"4770 0\n",
|
||
"total_bags 0\n",
|
||
"small_bags 0\n",
|
||
"large_bags 0\n",
|
||
"xlarge_bags 0\n",
|
||
"type 0\n",
|
||
"geography 0\n",
|
||
"dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 42,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"avocado.isnull().sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 43,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>date</th>\n",
|
||
" <th>average_price</th>\n",
|
||
" <th>total_volume</th>\n",
|
||
" <th>4046</th>\n",
|
||
" <th>4225</th>\n",
|
||
" <th>4770</th>\n",
|
||
" <th>total_bags</th>\n",
|
||
" <th>small_bags</th>\n",
|
||
" <th>large_bags</th>\n",
|
||
" <th>xlarge_bags</th>\n",
|
||
" <th>type</th>\n",
|
||
" <th>geography</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2015-01-04</td>\n",
|
||
" <td>0.277580</td>\n",
|
||
" <td>0.000640</td>\n",
|
||
" <td>0.000124</td>\n",
|
||
" <td>0.001382</td>\n",
|
||
" <td>0.000020</td>\n",
|
||
" <td>0.000307</td>\n",
|
||
" <td>0.000447</td>\n",
|
||
" <td>0.000040</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>conventional</td>\n",
|
||
" <td>Albany</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2015-01-04</td>\n",
|
||
" <td>0.480427</td>\n",
|
||
" <td>0.000020</td>\n",
|
||
" <td>0.000003</td>\n",
|
||
" <td>0.000008</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000037</td>\n",
|
||
" <td>0.000057</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>organic</td>\n",
|
||
" <td>Albany</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2015-01-04</td>\n",
|
||
" <td>0.199288</td>\n",
|
||
" <td>0.006826</td>\n",
|
||
" <td>0.016018</td>\n",
|
||
" <td>0.001164</td>\n",
|
||
" <td>0.000032</td>\n",
|
||
" <td>0.001477</td>\n",
|
||
" <td>0.000813</td>\n",
|
||
" <td>0.002259</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>conventional</td>\n",
|
||
" <td>Atlanta</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2015-01-04</td>\n",
|
||
" <td>0.469751</td>\n",
|
||
" <td>0.000059</td>\n",
|
||
" <td>0.000066</td>\n",
|
||
" <td>0.000046</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000044</td>\n",
|
||
" <td>0.000052</td>\n",
|
||
" <td>0.000025</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>organic</td>\n",
|
||
" <td>Atlanta</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2015-01-04</td>\n",
|
||
" <td>0.227758</td>\n",
|
||
" <td>0.012366</td>\n",
|
||
" <td>0.002374</td>\n",
|
||
" <td>0.027010</td>\n",
|
||
" <td>0.015706</td>\n",
|
||
" <td>0.004454</td>\n",
|
||
" <td>0.006674</td>\n",
|
||
" <td>0.000299</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>conventional</td>\n",
|
||
" <td>Baltimore/Washington</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33040</th>\n",
|
||
" <td>2020-11-29</td>\n",
|
||
" <td>0.366548</td>\n",
|
||
" <td>0.024844</td>\n",
|
||
" <td>0.002970</td>\n",
|
||
" <td>0.004787</td>\n",
|
||
" <td>0.001028</td>\n",
|
||
" <td>0.044649</td>\n",
|
||
" <td>0.044121</td>\n",
|
||
" <td>0.036030</td>\n",
|
||
" <td>0.019937</td>\n",
|
||
" <td>organic</td>\n",
|
||
" <td>Total U.S.</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33041</th>\n",
|
||
" <td>2020-11-29</td>\n",
|
||
" <td>0.167260</td>\n",
|
||
" <td>0.091202</td>\n",
|
||
" <td>0.059484</td>\n",
|
||
" <td>0.028776</td>\n",
|
||
" <td>0.007753</td>\n",
|
||
" <td>0.119620</td>\n",
|
||
" <td>0.106938</td>\n",
|
||
" <td>0.114914</td>\n",
|
||
" <td>0.043846</td>\n",
|
||
" <td>conventional</td>\n",
|
||
" <td>West</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33042</th>\n",
|
||
" <td>2020-11-29</td>\n",
|
||
" <td>0.370107</td>\n",
|
||
" <td>0.004550</td>\n",
|
||
" <td>0.000584</td>\n",
|
||
" <td>0.000945</td>\n",
|
||
" <td>0.000250</td>\n",
|
||
" <td>0.008101</td>\n",
|
||
" <td>0.005966</td>\n",
|
||
" <td>0.010062</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>organic</td>\n",
|
||
" <td>West</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33043</th>\n",
|
||
" <td>2020-11-29</td>\n",
|
||
" <td>0.081851</td>\n",
|
||
" <td>0.012913</td>\n",
|
||
" <td>0.010319</td>\n",
|
||
" <td>0.003918</td>\n",
|
||
" <td>0.004141</td>\n",
|
||
" <td>0.015696</td>\n",
|
||
" <td>0.013906</td>\n",
|
||
" <td>0.015817</td>\n",
|
||
" <td>0.000577</td>\n",
|
||
" <td>conventional</td>\n",
|
||
" <td>West Tex/New Mexico</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>33044</th>\n",
|
||
" <td>2020-11-29</td>\n",
|
||
" <td>0.323843</td>\n",
|
||
" <td>0.000377</td>\n",
|
||
" <td>0.000054</td>\n",
|
||
" <td>0.000030</td>\n",
|
||
" <td>0.000615</td>\n",
|
||
" <td>0.000653</td>\n",
|
||
" <td>0.000867</td>\n",
|
||
" <td>0.000215</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>organic</td>\n",
|
||
" <td>West Tex/New Mexico</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>33045 rows × 12 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" date average_price total_volume 4046 4225 4770 \\\n",
|
||
"0 2015-01-04 0.277580 0.000640 0.000124 0.001382 0.000020 \n",
|
||
"1 2015-01-04 0.480427 0.000020 0.000003 0.000008 0.000000 \n",
|
||
"2 2015-01-04 0.199288 0.006826 0.016018 0.001164 0.000032 \n",
|
||
"3 2015-01-04 0.469751 0.000059 0.000066 0.000046 0.000000 \n",
|
||
"4 2015-01-04 0.227758 0.012366 0.002374 0.027010 0.015706 \n",
|
||
"... ... ... ... ... ... ... \n",
|
||
"33040 2020-11-29 0.366548 0.024844 0.002970 0.004787 0.001028 \n",
|
||
"33041 2020-11-29 0.167260 0.091202 0.059484 0.028776 0.007753 \n",
|
||
"33042 2020-11-29 0.370107 0.004550 0.000584 0.000945 0.000250 \n",
|
||
"33043 2020-11-29 0.081851 0.012913 0.010319 0.003918 0.004141 \n",
|
||
"33044 2020-11-29 0.323843 0.000377 0.000054 0.000030 0.000615 \n",
|
||
"\n",
|
||
" total_bags small_bags large_bags xlarge_bags type \\\n",
|
||
"0 0.000307 0.000447 0.000040 0.000000 conventional \n",
|
||
"1 0.000037 0.000057 0.000000 0.000000 organic \n",
|
||
"2 0.001477 0.000813 0.002259 0.000000 conventional \n",
|
||
"3 0.000044 0.000052 0.000025 0.000000 organic \n",
|
||
"4 0.004454 0.006674 0.000299 0.000000 conventional \n",
|
||
"... ... ... ... ... ... \n",
|
||
"33040 0.044649 0.044121 0.036030 0.019937 organic \n",
|
||
"33041 0.119620 0.106938 0.114914 0.043846 conventional \n",
|
||
"33042 0.008101 0.005966 0.010062 0.000000 organic \n",
|
||
"33043 0.015696 0.013906 0.015817 0.000577 conventional \n",
|
||
"33044 0.000653 0.000867 0.000215 0.000000 organic \n",
|
||
"\n",
|
||
" geography \n",
|
||
"0 Albany \n",
|
||
"1 Albany \n",
|
||
"2 Atlanta \n",
|
||
"3 Atlanta \n",
|
||
"4 Baltimore/Washington \n",
|
||
"... ... \n",
|
||
"33040 Total U.S. \n",
|
||
"33041 West \n",
|
||
"33042 West \n",
|
||
"33043 West Tex/New Mexico \n",
|
||
"33044 West Tex/New Mexico \n",
|
||
"\n",
|
||
"[33045 rows x 12 columns]"
|
||
]
|
||
},
|
||
"execution_count": 43,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"avocado.dropna()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.9.1"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 4
|
||
}
|