ium_434742/laby-inz-um.ipynb

2506 lines
115 KiB
Plaintext
Raw Permalink Normal View History

2021-03-21 16:37:25 +01:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# OPIS\n",
"\n",
"#### Dataset zawiera dane dotyczące cen awokado Hass i ich sprzedaży w wybranych regionach Stanów Zjednoczonych.\n",
"\n",
"#### Opis kolumn:\n",
"- Date - data obserwacji\n",
"- AveragePrice - średnia cena pojedynczego awokado\n",
"- type - zwykłe lub organiczne\n",
"- year - rok obserwacji\n",
"- Region - miasto/region obserwacji\n",
"- Total Volume - liczba sprzedanych awokado\n",
"- 4046 - liczba sprzedanych awokado z kodem PLU 4046 (małe)\n",
"- 4225 - liczba sprzedanych awokado z kodem PLU 4225 (duże)\n",
"- 4770 - liczba sprzedanych awokado z kodem PLU 4770 (bardzo duże)\n"
]
},
{
"cell_type": "code",
2021-03-21 16:49:08 +01:00
"execution_count": 20,
2021-03-21 16:37:25 +01:00
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: kaggle in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (1.5.12)\n",
"Requirement already satisfied: six>=1.10 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from kaggle) (1.15.0)\n",
2021-03-21 16:49:08 +01:00
"Requirement already satisfied: requests in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from kaggle) (2.25.1)\n",
2021-03-21 16:37:25 +01:00
"Requirement already satisfied: python-dateutil in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from kaggle) (2.8.1)\n",
"Requirement already satisfied: python-slugify in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from kaggle) (4.0.1)\n",
2021-03-21 16:49:08 +01:00
"Requirement already satisfied: urllib3 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from kaggle) (1.26.2)\n",
"Requirement already satisfied: tqdm in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from kaggle) (4.59.0)\n",
"Requirement already satisfied: certifi in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from kaggle) (2020.12.5)\n",
2021-03-21 16:37:25 +01:00
"Requirement already satisfied: text-unidecode>=1.3 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from python-slugify->kaggle) (1.3)\n",
"Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from requests->kaggle) (4.0.0)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from requests->kaggle) (2.10)\n",
"OOOOOOOOO /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/bin/python\n",
"Requirement already satisfied: pandas in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (1.2.3)\n",
"Requirement already satisfied: numpy>=1.16.5 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from pandas) (1.20.1)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from pandas) (2.8.1)\n",
2021-03-21 16:49:08 +01:00
"Requirement already satisfied: pytz>=2017.3 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from pandas) (2020.4)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/Cellar/jupyterlab/3.0.0_1/libexec/lib/python3.9/site-packages (from python-dateutil>=2.7.3->pandas) (1.15.0)\n",
"Requirement already satisfied: sklearn in /usr/local/lib/python3.9/site-packages (0.0)\n",
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.9/site-packages (from sklearn) (0.24.1)\n",
"Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.9/site-packages (from scikit-learn->sklearn) (1.0.1)\n",
"Requirement already satisfied: scipy>=0.19.1 in /usr/local/lib/python3.9/site-packages (from scikit-learn->sklearn) (1.6.1)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.9/site-packages (from scikit-learn->sklearn) (2.1.0)\n",
"Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.9/site-packages (from scikit-learn->sklearn) (1.20.1)\n"
2021-03-21 16:37:25 +01:00
]
}
],
"source": [
"import sys\n",
"!{sys.executable} -m pip install kaggle\n",
"!echo OOOOOOOOO {sys.executable}\n",
"!{sys.executable} -m pip install pandas\n",
"!python3 -m pip install sklearn"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Pobranie zbioru."
]
},
{
"cell_type": "code",
2021-03-21 16:49:08 +01:00
"execution_count": 23,
2021-03-21 16:37:25 +01:00
"metadata": {},
"outputs": [],
"source": [
"!kaggle datasets download -d timmate/avocado-prices-2020"
]
},
{
"cell_type": "code",
2021-03-21 16:49:08 +01:00
"execution_count": 22,
2021-03-21 16:37:25 +01:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2021-03-21 16:49:08 +01:00
"Archive: avocado-prices-2020.zip\n",
" inflating: avocado-updated-2020.csv \n"
2021-03-21 16:37:25 +01:00
]
}
],
"source": [
"!unzip -o avocado-prices-2020.zip\n"
]
},
{
"cell_type": "code",
2021-03-21 16:49:08 +01:00
"execution_count": 24,
2021-03-21 16:37:25 +01:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"date,average_price,total_volume,4046,4225,4770,total_bags,small_bags,large_bags,xlarge_bags,type,year,geography\r\n",
"2015-01-04,1.22,40873.28,2819.5,28287.42,49.9,9716.46,9186.93,529.53,0.0,conventional,2015,Albany\r\n",
"2015-01-04,1.79,1373.95,57.42,153.88,0.0,1162.65,1162.65,0.0,0.0,organic,2015,Albany\r\n",
"2015-01-04,1.0,435021.49,364302.39,23821.16,82.15,46815.79,16707.15,30108.64,0.0,conventional,2015,Atlanta\r\n",
"2015-01-04,1.76,3846.69,1500.15,938.35,0.0,1408.19,1071.35,336.84,0.0,organic,2015,Atlanta\r\n"
]
}
],
"source": [
"!head -n 5 avocado-updated-2020.csv"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Usunięcie zbędnej kolumny (redundantne dane)."
]
},
{
"cell_type": "code",
2021-03-21 16:49:08 +01:00
"execution_count": 25,
2021-03-21 16:37:25 +01:00
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>average_price</th>\n",
" <th>total_volume</th>\n",
" <th>4046</th>\n",
" <th>4225</th>\n",
" <th>4770</th>\n",
" <th>total_bags</th>\n",
" <th>small_bags</th>\n",
" <th>large_bags</th>\n",
" <th>xlarge_bags</th>\n",
" <th>type</th>\n",
" <th>geography</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2015-01-04</td>\n",
" <td>1.22</td>\n",
" <td>40873.28</td>\n",
" <td>2819.50</td>\n",
" <td>28287.42</td>\n",
" <td>49.90</td>\n",
" <td>9716.46</td>\n",
" <td>9186.93</td>\n",
" <td>529.53</td>\n",
" <td>0.00</td>\n",
" <td>conventional</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2015-01-04</td>\n",
" <td>1.79</td>\n",
" <td>1373.95</td>\n",
" <td>57.42</td>\n",
" <td>153.88</td>\n",
" <td>0.00</td>\n",
" <td>1162.65</td>\n",
" <td>1162.65</td>\n",
" <td>0.00</td>\n",
" <td>0.00</td>\n",
" <td>organic</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2015-01-04</td>\n",
" <td>1.00</td>\n",
" <td>435021.49</td>\n",
" <td>364302.39</td>\n",
" <td>23821.16</td>\n",
" <td>82.15</td>\n",
" <td>46815.79</td>\n",
" <td>16707.15</td>\n",
" <td>30108.64</td>\n",
" <td>0.00</td>\n",
" <td>conventional</td>\n",
" <td>Atlanta</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2015-01-04</td>\n",
" <td>1.76</td>\n",
" <td>3846.69</td>\n",
" <td>1500.15</td>\n",
" <td>938.35</td>\n",
" <td>0.00</td>\n",
" <td>1408.19</td>\n",
" <td>1071.35</td>\n",
" <td>336.84</td>\n",
" <td>0.00</td>\n",
" <td>organic</td>\n",
" <td>Atlanta</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2015-01-04</td>\n",
" <td>1.08</td>\n",
" <td>788025.06</td>\n",
" <td>53987.31</td>\n",
" <td>552906.04</td>\n",
" <td>39995.03</td>\n",
" <td>141136.68</td>\n",
" <td>137146.07</td>\n",
" <td>3990.61</td>\n",
" <td>0.00</td>\n",
" <td>conventional</td>\n",
" <td>Baltimore/Washington</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33040</th>\n",
" <td>2020-11-29</td>\n",
" <td>1.47</td>\n",
" <td>1583056.27</td>\n",
" <td>67544.48</td>\n",
" <td>97996.46</td>\n",
" <td>2617.17</td>\n",
" <td>1414878.10</td>\n",
" <td>906711.52</td>\n",
" <td>480191.83</td>\n",
" <td>27974.75</td>\n",
" <td>organic</td>\n",
" <td>Total U.S.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33041</th>\n",
" <td>2020-11-29</td>\n",
" <td>0.91</td>\n",
" <td>5811114.22</td>\n",
" <td>1352877.53</td>\n",
" <td>589061.83</td>\n",
" <td>19741.90</td>\n",
" <td>3790665.29</td>\n",
" <td>2197611.02</td>\n",
" <td>1531530.14</td>\n",
" <td>61524.13</td>\n",
" <td>conventional</td>\n",
" <td>West</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33042</th>\n",
" <td>2020-11-29</td>\n",
" <td>1.48</td>\n",
" <td>289961.27</td>\n",
" <td>13273.75</td>\n",
" <td>19341.09</td>\n",
" <td>636.51</td>\n",
" <td>256709.92</td>\n",
" <td>122606.21</td>\n",
" <td>134103.71</td>\n",
" <td>0.00</td>\n",
" <td>organic</td>\n",
" <td>West</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33043</th>\n",
" <td>2020-11-29</td>\n",
" <td>0.67</td>\n",
" <td>822818.75</td>\n",
" <td>234688.01</td>\n",
" <td>80205.15</td>\n",
" <td>10543.63</td>\n",
" <td>497381.96</td>\n",
" <td>285764.11</td>\n",
" <td>210808.02</td>\n",
" <td>809.83</td>\n",
" <td>conventional</td>\n",
" <td>West Tex/New Mexico</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33044</th>\n",
" <td>2020-11-29</td>\n",
" <td>1.35</td>\n",
" <td>24106.58</td>\n",
" <td>1236.96</td>\n",
" <td>617.80</td>\n",
" <td>1564.98</td>\n",
" <td>20686.84</td>\n",
" <td>17824.52</td>\n",
" <td>2862.32</td>\n",
" <td>0.00</td>\n",
" <td>organic</td>\n",
" <td>West Tex/New Mexico</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>33045 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" date average_price total_volume 4046 4225 \\\n",
"0 2015-01-04 1.22 40873.28 2819.50 28287.42 \n",
"1 2015-01-04 1.79 1373.95 57.42 153.88 \n",
"2 2015-01-04 1.00 435021.49 364302.39 23821.16 \n",
"3 2015-01-04 1.76 3846.69 1500.15 938.35 \n",
"4 2015-01-04 1.08 788025.06 53987.31 552906.04 \n",
"... ... ... ... ... ... \n",
"33040 2020-11-29 1.47 1583056.27 67544.48 97996.46 \n",
"33041 2020-11-29 0.91 5811114.22 1352877.53 589061.83 \n",
"33042 2020-11-29 1.48 289961.27 13273.75 19341.09 \n",
"33043 2020-11-29 0.67 822818.75 234688.01 80205.15 \n",
"33044 2020-11-29 1.35 24106.58 1236.96 617.80 \n",
"\n",
" 4770 total_bags small_bags large_bags xlarge_bags \\\n",
"0 49.90 9716.46 9186.93 529.53 0.00 \n",
"1 0.00 1162.65 1162.65 0.00 0.00 \n",
"2 82.15 46815.79 16707.15 30108.64 0.00 \n",
"3 0.00 1408.19 1071.35 336.84 0.00 \n",
"4 39995.03 141136.68 137146.07 3990.61 0.00 \n",
"... ... ... ... ... ... \n",
"33040 2617.17 1414878.10 906711.52 480191.83 27974.75 \n",
"33041 19741.90 3790665.29 2197611.02 1531530.14 61524.13 \n",
"33042 636.51 256709.92 122606.21 134103.71 0.00 \n",
"33043 10543.63 497381.96 285764.11 210808.02 809.83 \n",
"33044 1564.98 20686.84 17824.52 2862.32 0.00 \n",
"\n",
" type geography \n",
"0 conventional Albany \n",
"1 organic Albany \n",
"2 conventional Atlanta \n",
"3 organic Atlanta \n",
"4 conventional Baltimore/Washington \n",
"... ... ... \n",
"33040 organic Total U.S. \n",
"33041 conventional West \n",
"33042 organic West \n",
"33043 conventional West Tex/New Mexico \n",
"33044 organic West Tex/New Mexico \n",
"\n",
"[33045 rows x 12 columns]"
]
},
2021-03-21 16:49:08 +01:00
"execution_count": 25,
2021-03-21 16:37:25 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"avocado_with_year = pd.read_csv('avocado-updated-2020.csv')\n",
"avocado_with_year\n",
"\n",
"new = ['date', 'average_price', 'total_volume', '4046', '4225', '4770', 'total_bags', 'small_bags', 'large_bags', 'xlarge_bags', 'type', 'geography']\n",
"avocado = avocado_with_year[new]\n",
"avocado.to_csv(\"avocado.csv\", index=False)\n",
"avocado = pd.read_csv('avocado.csv')\n",
"avocado"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Podział zbioru na train/dev/test."
]
},
{
"cell_type": "code",
2021-03-21 16:49:08 +01:00
"execution_count": 26,
2021-03-21 16:37:25 +01:00
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"avocado_train, avocado_validate, avocado_test = np.split(avocado.sample(frac=1), [int(.6*len(avocado)), int(.8*len(avocado))])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Podsumowanie zbioru i poszczególnych podzbiorów."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Wielkości zbioru i podzbiorów."
]
},
{
"cell_type": "code",
2021-03-21 16:49:08 +01:00
"execution_count": 27,
2021-03-21 16:37:25 +01:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Avocado: 396540\n",
"Avocado (train) : 237924\n",
"Avocado (validate): 79308\n",
"Avocado (test) 79308\n"
]
}
],
"source": [
"print(\"Avocado: \".ljust(20), np.size(avocado))\n",
"print(\"Avocado (train) : \".ljust(20), np.size(avocado_train))\n",
"print(\"Avocado (validate): \".ljust(20), np.size(avocado_validate))\n",
"print(\"Avocado (test) \".ljust(20), np.size(avocado_test))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Podsumowanie zbioru avocado."
]
},
{
"cell_type": "code",
2021-03-21 16:49:08 +01:00
"execution_count": 28,
2021-03-21 16:37:25 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>average_price</th>\n",
" <th>total_volume</th>\n",
" <th>4046</th>\n",
" <th>4225</th>\n",
" <th>4770</th>\n",
" <th>total_bags</th>\n",
" <th>small_bags</th>\n",
" <th>large_bags</th>\n",
" <th>xlarge_bags</th>\n",
" <th>type</th>\n",
" <th>geography</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>33045</td>\n",
" <td>33045.000000</td>\n",
" <td>3.304500e+04</td>\n",
" <td>3.304500e+04</td>\n",
" <td>3.304500e+04</td>\n",
" <td>3.304500e+04</td>\n",
" <td>3.304500e+04</td>\n",
" <td>3.304500e+04</td>\n",
" <td>3.304500e+04</td>\n",
" <td>3.304500e+04</td>\n",
" <td>33045</td>\n",
" <td>33045</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>306</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
2021-03-21 16:49:08 +01:00
" <td>2017-10-01</td>\n",
2021-03-21 16:37:25 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>conventional</td>\n",
2021-03-21 16:49:08 +01:00
" <td>Atlanta</td>\n",
2021-03-21 16:37:25 +01:00
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>108</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>16524</td>\n",
" <td>612</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>1.379941</td>\n",
" <td>9.683997e+05</td>\n",
" <td>3.023914e+05</td>\n",
" <td>2.797693e+05</td>\n",
" <td>2.148255e+04</td>\n",
" <td>3.646735e+05</td>\n",
" <td>2.501980e+05</td>\n",
" <td>1.067329e+05</td>\n",
" <td>7.742585e+03</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>0.378972</td>\n",
" <td>3.934533e+06</td>\n",
" <td>1.301026e+06</td>\n",
" <td>1.151052e+06</td>\n",
" <td>1.001607e+05</td>\n",
" <td>1.564004e+06</td>\n",
" <td>1.037734e+06</td>\n",
" <td>5.167226e+05</td>\n",
" <td>4.819803e+04</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>0.440000</td>\n",
" <td>8.456000e+01</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>1.100000</td>\n",
" <td>1.511895e+04</td>\n",
" <td>7.673100e+02</td>\n",
" <td>2.712470e+03</td>\n",
" <td>0.000000e+00</td>\n",
" <td>9.121860e+03</td>\n",
" <td>6.478630e+03</td>\n",
" <td>4.662900e+02</td>\n",
" <td>0.000000e+00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>1.350000</td>\n",
" <td>1.291170e+05</td>\n",
" <td>1.099477e+04</td>\n",
" <td>2.343600e+04</td>\n",
" <td>1.780900e+02</td>\n",
" <td>5.322224e+04</td>\n",
" <td>3.687699e+04</td>\n",
" <td>6.375860e+03</td>\n",
" <td>0.000000e+00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>1.620000</td>\n",
" <td>5.058285e+05</td>\n",
" <td>1.190219e+05</td>\n",
" <td>1.352389e+05</td>\n",
" <td>5.096530e+03</td>\n",
" <td>1.744314e+05</td>\n",
" <td>1.206624e+05</td>\n",
" <td>4.041723e+04</td>\n",
" <td>8.044400e+02</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>3.250000</td>\n",
" <td>6.371614e+07</td>\n",
" <td>2.274362e+07</td>\n",
" <td>2.047057e+07</td>\n",
" <td>2.546439e+06</td>\n",
" <td>3.168919e+07</td>\n",
" <td>2.055041e+07</td>\n",
" <td>1.332760e+07</td>\n",
" <td>1.403184e+06</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date average_price total_volume 4046 4225 \\\n",
"count 33045 33045.000000 3.304500e+04 3.304500e+04 3.304500e+04 \n",
"unique 306 NaN NaN NaN NaN \n",
2021-03-21 16:49:08 +01:00
"top 2017-10-01 NaN NaN NaN NaN \n",
2021-03-21 16:37:25 +01:00
"freq 108 NaN NaN NaN NaN \n",
"mean NaN 1.379941 9.683997e+05 3.023914e+05 2.797693e+05 \n",
"std NaN 0.378972 3.934533e+06 1.301026e+06 1.151052e+06 \n",
"min NaN 0.440000 8.456000e+01 0.000000e+00 0.000000e+00 \n",
"25% NaN 1.100000 1.511895e+04 7.673100e+02 2.712470e+03 \n",
"50% NaN 1.350000 1.291170e+05 1.099477e+04 2.343600e+04 \n",
"75% NaN 1.620000 5.058285e+05 1.190219e+05 1.352389e+05 \n",
"max NaN 3.250000 6.371614e+07 2.274362e+07 2.047057e+07 \n",
"\n",
" 4770 total_bags small_bags large_bags xlarge_bags \\\n",
"count 3.304500e+04 3.304500e+04 3.304500e+04 3.304500e+04 3.304500e+04 \n",
"unique NaN NaN NaN NaN NaN \n",
"top NaN NaN NaN NaN NaN \n",
"freq NaN NaN NaN NaN NaN \n",
"mean 2.148255e+04 3.646735e+05 2.501980e+05 1.067329e+05 7.742585e+03 \n",
"std 1.001607e+05 1.564004e+06 1.037734e+06 5.167226e+05 4.819803e+04 \n",
"min 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
"25% 0.000000e+00 9.121860e+03 6.478630e+03 4.662900e+02 0.000000e+00 \n",
"50% 1.780900e+02 5.322224e+04 3.687699e+04 6.375860e+03 0.000000e+00 \n",
"75% 5.096530e+03 1.744314e+05 1.206624e+05 4.041723e+04 8.044400e+02 \n",
"max 2.546439e+06 3.168919e+07 2.055041e+07 1.332760e+07 1.403184e+06 \n",
"\n",
2021-03-21 16:49:08 +01:00
" type geography \n",
"count 33045 33045 \n",
"unique 2 54 \n",
"top conventional Atlanta \n",
"freq 16524 612 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN "
2021-03-21 16:37:25 +01:00
]
},
2021-03-21 16:49:08 +01:00
"execution_count": 28,
2021-03-21 16:37:25 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"avocado.describe(include = 'all')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Podsumowanie podzbioru train."
]
},
{
"cell_type": "code",
2021-03-21 16:49:08 +01:00
"execution_count": 29,
2021-03-21 16:37:25 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>average_price</th>\n",
" <th>total_volume</th>\n",
" <th>4046</th>\n",
" <th>4225</th>\n",
" <th>4770</th>\n",
" <th>total_bags</th>\n",
" <th>small_bags</th>\n",
" <th>large_bags</th>\n",
" <th>xlarge_bags</th>\n",
" <th>type</th>\n",
" <th>geography</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>19827</td>\n",
" <td>19827.000000</td>\n",
" <td>1.982700e+04</td>\n",
" <td>1.982700e+04</td>\n",
" <td>1.982700e+04</td>\n",
" <td>1.982700e+04</td>\n",
" <td>1.982700e+04</td>\n",
" <td>1.982700e+04</td>\n",
" <td>1.982700e+04</td>\n",
" <td>1.982700e+04</td>\n",
" <td>19827</td>\n",
" <td>19827</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>306</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
2021-03-21 16:49:08 +01:00
" <td>2018-09-23</td>\n",
2021-03-21 16:37:25 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>organic</td>\n",
2021-03-21 16:49:08 +01:00
" <td>Sacramento</td>\n",
2021-03-21 16:37:25 +01:00
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
2021-03-21 16:49:08 +01:00
" <td>77</td>\n",
2021-03-21 16:37:25 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-21 16:49:08 +01:00
" <td>9954</td>\n",
" <td>404</td>\n",
2021-03-21 16:37:25 +01:00
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
2021-03-21 16:49:08 +01:00
" <td>1.380658</td>\n",
" <td>9.503549e+05</td>\n",
" <td>2.955048e+05</td>\n",
" <td>2.762023e+05</td>\n",
" <td>2.117442e+04</td>\n",
" <td>3.573659e+05</td>\n",
" <td>2.448356e+05</td>\n",
" <td>1.049736e+05</td>\n",
" <td>7.556707e+03</td>\n",
2021-03-21 16:37:25 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
2021-03-21 16:49:08 +01:00
" <td>0.377988</td>\n",
" <td>3.896388e+06</td>\n",
" <td>1.285945e+06</td>\n",
" <td>1.147780e+06</td>\n",
" <td>1.008332e+05</td>\n",
" <td>1.548676e+06</td>\n",
" <td>1.023617e+06</td>\n",
" <td>5.161354e+05</td>\n",
" <td>4.776408e+04</td>\n",
2021-03-21 16:37:25 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
2021-03-21 16:49:08 +01:00
" <td>0.460000</td>\n",
" <td>2.534500e+02</td>\n",
2021-03-21 16:37:25 +01:00
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>1.100000</td>\n",
2021-03-21 16:49:08 +01:00
" <td>1.509891e+04</td>\n",
" <td>7.560400e+02</td>\n",
" <td>2.695640e+03</td>\n",
2021-03-21 16:37:25 +01:00
" <td>0.000000e+00</td>\n",
2021-03-21 16:49:08 +01:00
" <td>9.095285e+03</td>\n",
" <td>6.430960e+03</td>\n",
" <td>4.678750e+02</td>\n",
2021-03-21 16:37:25 +01:00
" <td>0.000000e+00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>1.350000</td>\n",
2021-03-21 16:49:08 +01:00
" <td>1.275485e+05</td>\n",
" <td>1.086294e+04</td>\n",
" <td>2.337789e+04</td>\n",
" <td>1.714100e+02</td>\n",
" <td>5.240743e+04</td>\n",
" <td>3.663295e+04</td>\n",
" <td>6.148990e+03</td>\n",
2021-03-21 16:37:25 +01:00
" <td>0.000000e+00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
2021-03-21 16:49:08 +01:00
" <td>1.610000</td>\n",
" <td>4.996119e+05</td>\n",
" <td>1.174216e+05</td>\n",
" <td>1.337254e+05</td>\n",
" <td>4.976950e+03</td>\n",
" <td>1.721448e+05</td>\n",
" <td>1.193927e+05</td>\n",
" <td>3.875767e+04</td>\n",
" <td>7.391950e+02</td>\n",
2021-03-21 16:37:25 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
2021-03-21 16:49:08 +01:00
" <td>3.170000</td>\n",
" <td>6.371614e+07</td>\n",
" <td>2.113740e+07</td>\n",
2021-03-21 16:37:25 +01:00
" <td>2.047057e+07</td>\n",
" <td>2.546439e+06</td>\n",
2021-03-21 16:49:08 +01:00
" <td>3.168919e+07</td>\n",
" <td>2.055041e+07</td>\n",
" <td>1.332760e+07</td>\n",
2021-03-21 16:37:25 +01:00
" <td>1.403184e+06</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date average_price total_volume 4046 4225 \\\n",
"count 19827 19827.000000 1.982700e+04 1.982700e+04 1.982700e+04 \n",
"unique 306 NaN NaN NaN NaN \n",
2021-03-21 16:49:08 +01:00
"top 2018-09-23 NaN NaN NaN NaN \n",
"freq 77 NaN NaN NaN NaN \n",
"mean NaN 1.380658 9.503549e+05 2.955048e+05 2.762023e+05 \n",
"std NaN 0.377988 3.896388e+06 1.285945e+06 1.147780e+06 \n",
"min NaN 0.460000 2.534500e+02 0.000000e+00 0.000000e+00 \n",
"25% NaN 1.100000 1.509891e+04 7.560400e+02 2.695640e+03 \n",
"50% NaN 1.350000 1.275485e+05 1.086294e+04 2.337789e+04 \n",
"75% NaN 1.610000 4.996119e+05 1.174216e+05 1.337254e+05 \n",
"max NaN 3.170000 6.371614e+07 2.113740e+07 2.047057e+07 \n",
2021-03-21 16:37:25 +01:00
"\n",
" 4770 total_bags small_bags large_bags xlarge_bags \\\n",
"count 1.982700e+04 1.982700e+04 1.982700e+04 1.982700e+04 1.982700e+04 \n",
"unique NaN NaN NaN NaN NaN \n",
"top NaN NaN NaN NaN NaN \n",
"freq NaN NaN NaN NaN NaN \n",
2021-03-21 16:49:08 +01:00
"mean 2.117442e+04 3.573659e+05 2.448356e+05 1.049736e+05 7.556707e+03 \n",
"std 1.008332e+05 1.548676e+06 1.023617e+06 5.161354e+05 4.776408e+04 \n",
2021-03-21 16:37:25 +01:00
"min 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
2021-03-21 16:49:08 +01:00
"25% 0.000000e+00 9.095285e+03 6.430960e+03 4.678750e+02 0.000000e+00 \n",
"50% 1.714100e+02 5.240743e+04 3.663295e+04 6.148990e+03 0.000000e+00 \n",
"75% 4.976950e+03 1.721448e+05 1.193927e+05 3.875767e+04 7.391950e+02 \n",
"max 2.546439e+06 3.168919e+07 2.055041e+07 1.332760e+07 1.403184e+06 \n",
2021-03-21 16:37:25 +01:00
"\n",
" type geography \n",
"count 19827 19827 \n",
"unique 2 54 \n",
2021-03-21 16:49:08 +01:00
"top organic Sacramento \n",
"freq 9954 404 \n",
2021-03-21 16:37:25 +01:00
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN "
]
},
2021-03-21 16:49:08 +01:00
"execution_count": 29,
2021-03-21 16:37:25 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"avocado_train.describe(include= 'all' )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Podsumowanie podzbioru validate."
]
},
{
"cell_type": "code",
2021-03-21 16:49:08 +01:00
"execution_count": 30,
2021-03-21 16:37:25 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>average_price</th>\n",
" <th>total_volume</th>\n",
" <th>4046</th>\n",
" <th>4225</th>\n",
" <th>4770</th>\n",
" <th>total_bags</th>\n",
" <th>small_bags</th>\n",
" <th>large_bags</th>\n",
" <th>xlarge_bags</th>\n",
" <th>type</th>\n",
" <th>geography</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>6609</td>\n",
" <td>6609.000000</td>\n",
" <td>6.609000e+03</td>\n",
" <td>6.609000e+03</td>\n",
" <td>6.609000e+03</td>\n",
" <td>6.609000e+03</td>\n",
" <td>6.609000e+03</td>\n",
" <td>6.609000e+03</td>\n",
" <td>6.609000e+03</td>\n",
" <td>6.609000e+03</td>\n",
" <td>6609</td>\n",
" <td>6609</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>306</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
2021-03-21 16:49:08 +01:00
" <td>2020-05-03</td>\n",
2021-03-21 16:37:25 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-21 16:49:08 +01:00
" <td>organic</td>\n",
" <td>Jacksonville</td>\n",
2021-03-21 16:37:25 +01:00
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
2021-03-21 16:49:08 +01:00
" <td>35</td>\n",
2021-03-21 16:37:25 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-21 16:49:08 +01:00
" <td>3365</td>\n",
" <td>149</td>\n",
2021-03-21 16:37:25 +01:00
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
2021-03-21 16:49:08 +01:00
" <td>1.382624</td>\n",
" <td>9.914296e+05</td>\n",
" <td>3.140144e+05</td>\n",
" <td>2.827458e+05</td>\n",
" <td>2.172480e+04</td>\n",
" <td>3.729031e+05</td>\n",
" <td>2.567059e+05</td>\n",
" <td>1.085372e+05</td>\n",
" <td>7.660065e+03</td>\n",
2021-03-21 16:37:25 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
2021-03-21 16:49:08 +01:00
" <td>0.380997</td>\n",
" <td>4.042527e+06</td>\n",
" <td>1.341419e+06</td>\n",
" <td>1.181393e+06</td>\n",
" <td>1.021178e+05</td>\n",
" <td>1.596924e+06</td>\n",
" <td>1.065783e+06</td>\n",
" <td>5.196275e+05</td>\n",
" <td>4.795256e+04</td>\n",
2021-03-21 16:37:25 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
2021-03-21 16:49:08 +01:00
" <td>0.440000</td>\n",
" <td>8.456000e+01</td>\n",
2021-03-21 16:37:25 +01:00
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
2021-03-21 16:49:08 +01:00
" <td>1.100000</td>\n",
" <td>1.486299e+04</td>\n",
" <td>7.570000e+02</td>\n",
" <td>2.534810e+03</td>\n",
2021-03-21 16:37:25 +01:00
" <td>0.000000e+00</td>\n",
2021-03-21 16:49:08 +01:00
" <td>9.007310e+03</td>\n",
" <td>6.281480e+03</td>\n",
" <td>4.562400e+02</td>\n",
2021-03-21 16:37:25 +01:00
" <td>0.000000e+00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
2021-03-21 16:49:08 +01:00
" <td>1.350000</td>\n",
" <td>1.241199e+05</td>\n",
" <td>1.023778e+04</td>\n",
" <td>2.204006e+04</td>\n",
" <td>1.674700e+02</td>\n",
" <td>5.247009e+04</td>\n",
" <td>3.492217e+04</td>\n",
" <td>6.458780e+03</td>\n",
2021-03-21 16:37:25 +01:00
" <td>0.000000e+00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
2021-03-21 16:49:08 +01:00
" <td>1.620000</td>\n",
" <td>5.026773e+05</td>\n",
" <td>1.207824e+05</td>\n",
" <td>1.307007e+05</td>\n",
" <td>5.104000e+03</td>\n",
" <td>1.706264e+05</td>\n",
" <td>1.197749e+05</td>\n",
" <td>4.128634e+04</td>\n",
" <td>7.951300e+02</td>\n",
2021-03-21 16:37:25 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
2021-03-21 16:49:08 +01:00
" <td>3.250000</td>\n",
" <td>6.250565e+07</td>\n",
" <td>2.274362e+07</td>\n",
" <td>2.044550e+07</td>\n",
" <td>1.800066e+06</td>\n",
" <td>2.666884e+07</td>\n",
" <td>1.740824e+07</td>\n",
" <td>1.077854e+07</td>\n",
" <td>1.123540e+06</td>\n",
2021-03-21 16:37:25 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date average_price total_volume 4046 4225 \\\n",
"count 6609 6609.000000 6.609000e+03 6.609000e+03 6.609000e+03 \n",
"unique 306 NaN NaN NaN NaN \n",
2021-03-21 16:49:08 +01:00
"top 2020-05-03 NaN NaN NaN NaN \n",
"freq 35 NaN NaN NaN NaN \n",
"mean NaN 1.382624 9.914296e+05 3.140144e+05 2.827458e+05 \n",
"std NaN 0.380997 4.042527e+06 1.341419e+06 1.181393e+06 \n",
"min NaN 0.440000 8.456000e+01 0.000000e+00 0.000000e+00 \n",
"25% NaN 1.100000 1.486299e+04 7.570000e+02 2.534810e+03 \n",
"50% NaN 1.350000 1.241199e+05 1.023778e+04 2.204006e+04 \n",
"75% NaN 1.620000 5.026773e+05 1.207824e+05 1.307007e+05 \n",
"max NaN 3.250000 6.250565e+07 2.274362e+07 2.044550e+07 \n",
2021-03-21 16:37:25 +01:00
"\n",
" 4770 total_bags small_bags large_bags xlarge_bags \\\n",
"count 6.609000e+03 6.609000e+03 6.609000e+03 6.609000e+03 6.609000e+03 \n",
"unique NaN NaN NaN NaN NaN \n",
"top NaN NaN NaN NaN NaN \n",
"freq NaN NaN NaN NaN NaN \n",
2021-03-21 16:49:08 +01:00
"mean 2.172480e+04 3.729031e+05 2.567059e+05 1.085372e+05 7.660065e+03 \n",
"std 1.021178e+05 1.596924e+06 1.065783e+06 5.196275e+05 4.795256e+04 \n",
2021-03-21 16:37:25 +01:00
"min 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
2021-03-21 16:49:08 +01:00
"25% 0.000000e+00 9.007310e+03 6.281480e+03 4.562400e+02 0.000000e+00 \n",
"50% 1.674700e+02 5.247009e+04 3.492217e+04 6.458780e+03 0.000000e+00 \n",
"75% 5.104000e+03 1.706264e+05 1.197749e+05 4.128634e+04 7.951300e+02 \n",
"max 1.800066e+06 2.666884e+07 1.740824e+07 1.077854e+07 1.123540e+06 \n",
2021-03-21 16:37:25 +01:00
"\n",
2021-03-21 16:49:08 +01:00
" type geography \n",
"count 6609 6609 \n",
"unique 2 54 \n",
"top organic Jacksonville \n",
"freq 3365 149 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN "
2021-03-21 16:37:25 +01:00
]
},
2021-03-21 16:49:08 +01:00
"execution_count": 30,
2021-03-21 16:37:25 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"avocado_validate.describe(include = 'all')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Podsumowanie podzbioru test."
]
},
{
"cell_type": "code",
2021-03-21 16:49:08 +01:00
"execution_count": 31,
2021-03-21 16:37:25 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>average_price</th>\n",
" <th>total_volume</th>\n",
" <th>4046</th>\n",
" <th>4225</th>\n",
" <th>4770</th>\n",
" <th>total_bags</th>\n",
" <th>small_bags</th>\n",
" <th>large_bags</th>\n",
" <th>xlarge_bags</th>\n",
" <th>type</th>\n",
" <th>geography</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>6609</td>\n",
" <td>6609.000000</td>\n",
" <td>6.609000e+03</td>\n",
" <td>6.609000e+03</td>\n",
" <td>6.609000e+03</td>\n",
" <td>6.609000e+03</td>\n",
" <td>6.609000e+03</td>\n",
" <td>6.609000e+03</td>\n",
" <td>6.609000e+03</td>\n",
" <td>6.609000e+03</td>\n",
" <td>6609</td>\n",
" <td>6609</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>306</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>54</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
2021-03-21 16:49:08 +01:00
" <td>2020-06-21</td>\n",
2021-03-21 16:37:25 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-21 16:49:08 +01:00
" <td>conventional</td>\n",
" <td>California</td>\n",
2021-03-21 16:37:25 +01:00
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
2021-03-21 16:49:08 +01:00
" <td>33</td>\n",
2021-03-21 16:37:25 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-21 16:49:08 +01:00
" <td>3407</td>\n",
" <td>143</td>\n",
2021-03-21 16:37:25 +01:00
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
2021-03-21 16:49:08 +01:00
" <td>1.375107</td>\n",
" <td>9.995041e+05</td>\n",
" <td>3.114282e+05</td>\n",
" <td>2.874940e+05</td>\n",
" <td>2.216469e+04</td>\n",
" <td>3.783667e+05</td>\n",
" <td>2.597775e+05</td>\n",
" <td>1.102065e+05</td>\n",
" <td>8.382739e+03</td>\n",
2021-03-21 16:37:25 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
2021-03-21 16:49:08 +01:00
" <td>0.379902</td>\n",
" <td>3.939225e+06</td>\n",
" <td>1.305043e+06</td>\n",
" <td>1.130053e+06</td>\n",
" <td>9.608845e+04</td>\n",
" <td>1.576553e+06</td>\n",
" <td>1.051335e+06</td>\n",
" <td>5.156234e+05</td>\n",
" <td>4.971697e+04</td>\n",
2021-03-21 16:37:25 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
2021-03-21 16:49:08 +01:00
" <td>0.480000</td>\n",
" <td>3.855500e+02</td>\n",
2021-03-21 16:37:25 +01:00
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>1.090000</td>\n",
2021-03-21 16:49:08 +01:00
" <td>1.544873e+04</td>\n",
" <td>8.225900e+02</td>\n",
" <td>2.903380e+03</td>\n",
2021-03-21 16:37:25 +01:00
" <td>0.000000e+00</td>\n",
2021-03-21 16:49:08 +01:00
" <td>9.358110e+03</td>\n",
" <td>6.834760e+03</td>\n",
" <td>4.706000e+02</td>\n",
2021-03-21 16:37:25 +01:00
" <td>0.000000e+00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
2021-03-21 16:49:08 +01:00
" <td>1.330000</td>\n",
" <td>1.409398e+05</td>\n",
" <td>1.233835e+04</td>\n",
" <td>2.530639e+04</td>\n",
" <td>2.074500e+02</td>\n",
" <td>5.576654e+04</td>\n",
" <td>3.897502e+04</td>\n",
" <td>7.182140e+03</td>\n",
2021-03-21 16:37:25 +01:00
" <td>0.000000e+00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>1.610000</td>\n",
2021-03-21 16:49:08 +01:00
" <td>5.330085e+05</td>\n",
" <td>1.221341e+05</td>\n",
" <td>1.453971e+05</td>\n",
" <td>5.358790e+03</td>\n",
" <td>1.833669e+05</td>\n",
" <td>1.254250e+05</td>\n",
" <td>4.531138e+04</td>\n",
" <td>1.012940e+03</td>\n",
2021-03-21 16:37:25 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
2021-03-21 16:49:08 +01:00
" <td>3.000000</td>\n",
" <td>5.453235e+07</td>\n",
" <td>1.707665e+07</td>\n",
" <td>1.789639e+07</td>\n",
2021-03-21 16:37:25 +01:00
" <td>1.993645e+06</td>\n",
2021-03-21 16:49:08 +01:00
" <td>2.735245e+07</td>\n",
" <td>1.791382e+07</td>\n",
" <td>1.063102e+07</td>\n",
" <td>1.181516e+06</td>\n",
2021-03-21 16:37:25 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date average_price total_volume 4046 4225 \\\n",
"count 6609 6609.000000 6.609000e+03 6.609000e+03 6.609000e+03 \n",
"unique 306 NaN NaN NaN NaN \n",
2021-03-21 16:49:08 +01:00
"top 2020-06-21 NaN NaN NaN NaN \n",
"freq 33 NaN NaN NaN NaN \n",
"mean NaN 1.375107 9.995041e+05 3.114282e+05 2.874940e+05 \n",
"std NaN 0.379902 3.939225e+06 1.305043e+06 1.130053e+06 \n",
"min NaN 0.480000 3.855500e+02 0.000000e+00 0.000000e+00 \n",
"25% NaN 1.090000 1.544873e+04 8.225900e+02 2.903380e+03 \n",
"50% NaN 1.330000 1.409398e+05 1.233835e+04 2.530639e+04 \n",
"75% NaN 1.610000 5.330085e+05 1.221341e+05 1.453971e+05 \n",
"max NaN 3.000000 5.453235e+07 1.707665e+07 1.789639e+07 \n",
2021-03-21 16:37:25 +01:00
"\n",
" 4770 total_bags small_bags large_bags xlarge_bags \\\n",
"count 6.609000e+03 6.609000e+03 6.609000e+03 6.609000e+03 6.609000e+03 \n",
"unique NaN NaN NaN NaN NaN \n",
"top NaN NaN NaN NaN NaN \n",
"freq NaN NaN NaN NaN NaN \n",
2021-03-21 16:49:08 +01:00
"mean 2.216469e+04 3.783667e+05 2.597775e+05 1.102065e+05 8.382739e+03 \n",
"std 9.608845e+04 1.576553e+06 1.051335e+06 5.156234e+05 4.971697e+04 \n",
2021-03-21 16:37:25 +01:00
"min 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
2021-03-21 16:49:08 +01:00
"25% 0.000000e+00 9.358110e+03 6.834760e+03 4.706000e+02 0.000000e+00 \n",
"50% 2.074500e+02 5.576654e+04 3.897502e+04 7.182140e+03 0.000000e+00 \n",
"75% 5.358790e+03 1.833669e+05 1.254250e+05 4.531138e+04 1.012940e+03 \n",
"max 1.993645e+06 2.735245e+07 1.791382e+07 1.063102e+07 1.181516e+06 \n",
2021-03-21 16:37:25 +01:00
"\n",
2021-03-21 16:49:08 +01:00
" type geography \n",
"count 6609 6609 \n",
"unique 2 54 \n",
"top conventional California \n",
"freq 3407 143 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN "
2021-03-21 16:37:25 +01:00
]
},
2021-03-21 16:49:08 +01:00
"execution_count": 31,
2021-03-21 16:37:25 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"avocado_test.describe(include = 'all')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Rozkład częstości przykładów dla poszczególnych klas."
]
},
{
"cell_type": "code",
2021-03-21 16:49:08 +01:00
"execution_count": 32,
2021-03-21 16:37:25 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Atlanta 612\n",
"St. Louis 612\n",
"New York 612\n",
"Indianapolis 612\n",
"Sacramento 612\n",
"Spokane 612\n",
"Philadelphia 612\n",
"South Carolina 612\n",
"West 612\n",
"San Francisco 612\n",
"Orlando 612\n",
"Southeast 612\n",
"Miami/Ft. Lauderdale 612\n",
"Nashville 612\n",
"Syracuse 612\n",
"Columbus 612\n",
"Detroit 612\n",
"Northern New England 612\n",
"Buffalo/Rochester 612\n",
"Raleigh/Greensboro 612\n",
"Midsouth 612\n",
"Boise 612\n",
"San Diego 612\n",
"Hartford/Springfield 612\n",
"Los Angeles 612\n",
"Total U.S. 612\n",
"Dallas/Ft. Worth 612\n",
"Great Lakes 612\n",
"Roanoke 612\n",
"Plains 612\n",
"California 612\n",
"Portland 612\n",
"Grand Rapids 612\n",
"Harrisburg/Scranton 612\n",
"Charlotte 612\n",
"Cincinnati/Dayton 612\n",
"Richmond/Norfolk 612\n",
"Houston 612\n",
"South Central 612\n",
"Northeast 612\n",
"Seattle 612\n",
"Jacksonville 612\n",
"Baltimore/Washington 612\n",
"Pittsburgh 612\n",
"Louisville 612\n",
"Boston 612\n",
"Tampa 612\n",
"Phoenix/Tucson 612\n",
"Chicago 612\n",
"Denver 612\n",
"Las Vegas 612\n",
"Albany 612\n",
"New Orleans/Mobile 612\n",
"West Tex/New Mexico 609\n",
"Name: geography, dtype: int64"
]
},
2021-03-21 16:49:08 +01:00
"execution_count": 32,
2021-03-21 16:37:25 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"avocado.geography.value_counts() "
]
},
{
"cell_type": "code",
2021-03-21 16:49:08 +01:00
"execution_count": 33,
2021-03-21 16:37:25 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2021-03-21 16:49:08 +01:00
"California 143\n",
"Grand Rapids 139\n",
"Roanoke 139\n",
"Las Vegas 139\n",
"Spokane 137\n",
"Plains 135\n",
"Seattle 134\n",
"Louisville 132\n",
"Atlanta 131\n",
"Syracuse 130\n",
"New York 130\n",
"Nashville 129\n",
"Raleigh/Greensboro 129\n",
"Miami/Ft. Lauderdale 128\n",
"Phoenix/Tucson 128\n",
"Orlando 128\n",
"Hartford/Springfield 127\n",
"San Francisco 127\n",
"South Central 127\n",
"Charlotte 126\n",
"Richmond/Norfolk 126\n",
"West 126\n",
"Tampa 124\n",
"Los Angeles 124\n",
"South Carolina 122\n",
"Great Lakes 122\n",
"Total U.S. 122\n",
"Northeast 121\n",
"Cincinnati/Dayton 121\n",
"Columbus 121\n",
"Baltimore/Washington 119\n",
"Pittsburgh 119\n",
"Jacksonville 119\n",
"Portland 119\n",
"West Tex/New Mexico 118\n",
"Midsouth 118\n",
"Houston 117\n",
"Chicago 116\n",
"Buffalo/Rochester 116\n",
"New Orleans/Mobile 116\n",
"Philadelphia 115\n",
"San Diego 115\n",
"Indianapolis 115\n",
"Northern New England 114\n",
"Boston 114\n",
"Boise 114\n",
"Southeast 114\n",
"Dallas/Ft. Worth 113\n",
"Detroit 113\n",
"Albany 112\n",
"Denver 111\n",
"St. Louis 111\n",
"Harrisburg/Scranton 104\n",
"Sacramento 100\n",
2021-03-21 16:37:25 +01:00
"Name: geography, dtype: int64"
]
},
2021-03-21 16:49:08 +01:00
"execution_count": 33,
2021-03-21 16:37:25 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"avocado_test.geography.value_counts() "
]
},
{
"cell_type": "code",
2021-03-21 16:49:08 +01:00
"execution_count": 34,
2021-03-21 16:37:25 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2021-03-21 16:49:08 +01:00
"Sacramento 404\n",
"Albany 398\n",
"Northern New England 390\n",
"Harrisburg/Scranton 388\n",
"St. Louis 385\n",
"Columbus 384\n",
"Boise 382\n",
"Indianapolis 381\n",
"Detroit 380\n",
"South Carolina 378\n",
"West Tex/New Mexico 378\n",
"Southeast 378\n",
"Nashville 377\n",
"Denver 377\n",
"Los Angeles 377\n",
"Great Lakes 376\n",
"San Diego 375\n",
"Cincinnati/Dayton 374\n",
"Boston 374\n",
"South Central 373\n",
"New Orleans/Mobile 373\n",
"Richmond/Norfolk 371\n",
"Seattle 371\n",
"Total U.S. 371\n",
"Buffalo/Rochester 370\n",
"Northeast 369\n",
"Charlotte 368\n",
"Atlanta 368\n",
"Chicago 367\n",
"San Francisco 366\n",
"Midsouth 366\n",
"Philadelphia 365\n",
"New York 363\n",
"Portland 363\n",
"Syracuse 362\n",
"Grand Rapids 361\n",
"Louisville 361\n",
"Roanoke 361\n",
"Dallas/Ft. Worth 360\n",
"Orlando 359\n",
"Tampa 359\n",
"Houston 359\n",
"Hartford/Springfield 358\n",
"Pittsburgh 357\n",
"West 356\n",
"Miami/Ft. Lauderdale 354\n",
"Baltimore/Washington 353\n",
"Phoenix/Tucson 353\n",
"Raleigh/Greensboro 345\n",
"Jacksonville 344\n",
"Las Vegas 339\n",
"California 336\n",
"Plains 335\n",
"Spokane 335\n",
2021-03-21 16:37:25 +01:00
"Name: geography, dtype: int64"
]
},
2021-03-21 16:49:08 +01:00
"execution_count": 34,
2021-03-21 16:37:25 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"avocado_train.geography.value_counts() "
]
},
{
"cell_type": "code",
2021-03-21 16:49:08 +01:00
"execution_count": 37,
2021-03-21 16:37:25 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
2021-03-21 16:49:08 +01:00
"execution_count": 37,
2021-03-21 16:37:25 +01:00
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
2021-03-21 16:49:08 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEvCAYAAACnuq2HAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAXV0lEQVR4nO3df7DddX3n8efLRFC0SDBXqkkwqaTuBKqVpoDrttPCLgSLhrHqwtQlq1mz0yLVbWcU3NllF2VGrFtWqtCmEgmU8kOKklaUzSIVdys/giA/pdwFMcnwI5IAVrZg6Hv/OJ8sh8u9JLnn3nsOOc/HzJl7vu/v53vO+2Tu5HW/3+/nfL+pKiRJw+1l/W5AktR/hoEkyTCQJBkGkiQMA0kShoEkCZjd7wYma+7cubVw4cJ+tyFJLym33HLLj6tqZGz9JRsGCxcuZMOGDf1uQ5JeUpI8OF7dw0SSJMNAkmQYSJIwDCRJGAaSJAwDSRKGgSQJw0CSxEv4S2cvFQtP/Xq/W9hj/PAzv9XvFvYo/m5OrZf676d7BpIkw0CStAthkGRNkkeT3DmmfkqSHyS5K8lnu+qnJRlNcm+SY7rqy1ptNMmpXfVFSW5s9cuS7DVVH06StGt2Zc/gAmBZdyHJbwLLgbdW1cHA51p9CXACcHDb5twks5LMAr4IHAssAU5sYwHOAs6uqoOAbcDKXj+UJGn37DQMqup6YOuY8u8Cn6mqp9uYR1t9OXBpVT1dVQ8Ao8Bh7TFaVfdX1TPApcDyJAGOBK5o268Fju/tI0mSdtdkzxn8IvBr7fDOt5P8aqvPAzZ2jdvUahPVXws8XlXbx9QlSTNoslNLZwP7A0cAvwpcnuQXpqyrCSRZBawCOPDAA6f77SRpaEx2z2ATcGV13AT8EzAX2Aws6Bo3v9Umqj8G7Jdk9pj6uKpqdVUtraqlIyMvuFGPJGmSJhsGXwN+EyDJLwJ7AT8G1gEnJNk7ySJgMXATcDOwuM0c2ovOSeZ1VVXAdcB72+uuAK6aZE+SpEna6WGiJJcAvwHMTbIJOB1YA6xp002fAVa0/9jvSnI5cDewHTi5qp5tr/MR4BpgFrCmqu5qb/EJ4NIknwZuBc6fws8nSdoFOw2DqjpxglUfmGD8mcCZ49SvBq4ep34/ndlGkqQ+8RvIkiTDQJJkGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJLELoRBkjVJHm13NRu77g+TVJK5bTlJzkkymuT2JId2jV2R5L72WNFV/5Ukd7RtzkmSqfpwkqRdsyt7BhcAy8YWkywAjgZ+1FU+ls59jxcDq4Dz2tj96dwu83A6dzU7Pcmcts15wIe7tnvBe0mSptdOw6Cqrge2jrPqbODjQHXVlgMXVscNwH5JXg8cA6yvqq1VtQ1YDyxr6/atqhvaPZQvBI7v6RNJknbbpM4ZJFkObK6q749ZNQ/Y2LW8qdVerL5pnLokaQbN3t0NkuwDfJLOIaIZlWQVncNPHHjggTP99pK0x5rMnsGbgEXA95P8EJgPfC/JzwObgQVdY+e32ovV549TH1dVra6qpVW1dGRkZBKtS5LGs9thUFV3VNXrqmphVS2kc2jn0Kp6GFgHnNRmFR0BPFFVDwHXAEcnmdNOHB8NXNPWPZnkiDaL6CTgqin6bJKkXbQrU0svAb4LvDnJpiQrX2T41cD9wCjw58DvAVTVVuBTwM3tcUar0cZ8qW3zf4BvTO6jSJIma6fnDKrqxJ2sX9j1vICTJxi3BlgzTn0DcMjO+pAkTR+/gSxJMgwkSYaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSezanc7WJHk0yZ1dtT9K8oMktyf5apL9utadlmQ0yb1JjumqL2u10SSndtUXJbmx1S9LstcUfj5J0i7YlT2DC4BlY2rrgUOq6i3A3wOnASRZApwAHNy2OTfJrCSzgC8CxwJLgBPbWICzgLOr6iBgG/Bit9WUJE2DnYZBVV0PbB1T+x9Vtb0t3gDMb8+XA5dW1dNV9QCd+xof1h6jVXV/VT0DXAosTxLgSOCKtv1a4PjePpIkaXdNxTmDD/HcTeznARu71m1qtYnqrwUe7wqWHXVJ0gzqKQyS/EdgO3Dx1LSz0/dblWRDkg1btmyZibeUpKEw6TBI8m+B44Dfqapq5c3Agq5h81ttovpjwH5JZo+pj6uqVlfV0qpaOjIyMtnWJUljTCoMkiwDPg68u6qe6lq1Djghyd5JFgGLgZuAm4HFbebQXnROMq9rIXId8N62/Qrgqsl9FEnSZO3K1NJLgO8Cb06yKclK4AvAzwHrk9yW5E8Bquou4HLgbuCbwMlV9Ww7J/AR4BrgHuDyNhbgE8AfJBmlcw7h/Cn9hJKknZq9swFVdeI45Qn/w66qM4Ezx6lfDVw9Tv1+OrONJEl94jeQJUmGgSTJMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSSJXbvT2Zokjya5s6u2f5L1Se5rP+e0epKck2Q0ye1JDu3aZkUbf1+SFV31X0lyR9vmnCSZ6g8pSXpxu7JncAGwbEztVODaqloMXNuWAY6lc9/jxcAq4DzohAdwOnA4nbuanb4jQNqYD3dtN/a9JEnTbKdhUFXXA1vHlJcDa9vztcDxXfULq+MGYL8krweOAdZX1daq2gasB5a1dftW1Q1VVcCFXa8lSZohkz1ncEBVPdSePwwc0J7PAzZ2jdvUai9W3zROXZI0g3o+gdz+oq8p6GWnkqxKsiHJhi1btszEW0rSUJhsGDzSDvHQfj7a6puBBV3j5rfai9Xnj1MfV1WtrqqlVbV0ZGRkkq1LksaabBisA3bMCFoBXNVVP6nNKjoCeKIdTroGODrJnHbi+GjgmrbuySRHtFlEJ3W9liRphsze2YAklwC/AcxNsonOrKDPAJcnWQk8CLy/Db8aeCcwCjwFfBCgqrYm+RRwcxt3RlXtOCn9e3RmLL0S+EZ7SJJm0E7DoKpOnGDVUeOMLeDkCV5nDbBmnPoG4JCd9SFJmj5+A1mSZBhIkgwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkugxDJL8hyR3JbkzySVJXpFkUZIbk4wmuSzJXm3s3m15tK1f2PU6p7X6vUmO6fEzSZJ206TDIMk84PeBpVV1CDALOAE4Czi7qg4CtgEr2yYrgW2tfnYbR5IlbbuDgWXAuUlmTbYvSdLu6/Uw0WzglUlmA/sADwFHAle09WuB49vz5W2Ztv6oJGn1S6vq6ap6gM79kw/rsS9J0m6YdBhU1Wbgc8CP6ITAE8AtwONVtb0N2wTMa8/nARvbttvb+Nd218fZRpI0A3o5TDSHzl/1i4A3AK+ic5hn2iRZlWRDkg1btmyZzreSpKHSy2Gifwk8UFVbqupnwJXAO4D92mEjgPnA5vZ8M7AAoK1/DfBYd32cbZ6nqlZX1dKqWjoyMtJD65Kkbr2EwY+AI5Ls0479HwXcDVwHvLeNWQFc1Z6va8u09d+qqmr1E9pso0XAYuCmHvqSJO2m2TsfMr6qujHJFcD3gO3ArcBq4OvApUk+3Wrnt03OBy5KMgpspTODiKq6K8nldIJkO3ByVT072b4kSbtv0mEAUFWnA6ePKd/POLOBquofgfdN8DpnAmf20oskafL8BrIkyTCQJBkGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJNFjGCTZL8kVSX6Q5J4kb0+yf5L1Se5rP+e0sUlyTpLRJLcnObTrdVa08fclWTHxO0qSpkOvewafB75ZVf8MeCtwD3AqcG1VLQaubcsAx9K5v/FiYBVwHkCS/encLe1wOndIO31HgEiSZsakwyDJa4Bfp93juKqeqarHgeXA2jZsLXB8e74cuLA6bgD2S/J64BhgfVVtraptwHp
2021-03-21 16:37:25 +01:00
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"pd.value_counts(avocado['type']).plot.bar()"
]
},
{
"cell_type": "code",
2021-03-21 16:49:08 +01:00
"execution_count": 38,
2021-03-21 16:37:25 +01:00
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
2021-03-21 16:49:08 +01:00
"execution_count": 38,
2021-03-21 16:37:25 +01:00
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
2021-03-21 16:49:08 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEvCAYAAACnuq2HAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAASaklEQVR4nO3de7BdZX3G8e9jIl4LBIlUE8ZQzbQTr9AMYm2djrQQFAzjrTi1ZjA1nRZvbWcUnGlpVTri2FLRSpsKGCgKiBfwyqSAl1ZFDkJBQMsZEUmGy9EEUKkX7K9/7DdlJ54Qztk5Z22yvp+ZPXu973rXPr9kMnnOete71k5VIUnqt0d0XYAkqXuGgSTJMJAkGQaSJAwDSRKGgSQJWNh1AbO1//7717Jly7ouQ5IeNq6++urvV9Xi6fY9bMNg2bJlTExMdF2GJD1sJLl1Z/ucJpIkGQaSJMNAkoRhIEniIYRBkrOS3JXkm0N9+yXZmOTm9r6o9SfJ6Ukmk1yX5JChY9a08TcnWTPU/5tJrm/HnJ4ku/sPKUl6cA/lzOBDwKod+k4ELquq5cBlrQ1wFLC8vdYBZ8AgPICTgecChwInbwuQNuZ1Q8ft+LMkSXNsl2FQVV8CtuzQvRrY0LY3AMcO9Z9TA18D9k3yJOBIYGNVbamqrcBGYFXbt3dVfa0Gz9I+Z+izJEnzZLbXDA6oqtvb9h3AAW17CXDb0LhNre/B+jdN0z+tJOuSTCSZmJqammXpkqQdjXzTWVVVknn5hpyqWg+sB1i5cuXYfyvPshM/03UJe5TvvuvFXZcg7bFme2ZwZ5viob3f1fo3AwcOjVva+h6sf+k0/ZKkeTTbM4NLgDXAu9r7xUP9r09yPoOLxfdU1e1JLgX+buii8RHASVW1Jcm9SQ4DrgReA7xvljVJmgHPXHevh/uZ6y7DIMlHgN8F9k+yicGqoHcBFyZZC9wKvLIN/yzwImASuA84HqD9p/8O4Ko27u1Vte2i9J8xWLH0GOBz7SVJmke7DIOqetVOdh0+zdgCTtjJ55wFnDVN/wTwjF3VIUmaO96BLEkyDCRJhoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEmMGAZJ/jzJDUm+meQjSR6d5KAkVyaZTHJBkr3a2Ee19mTbv2zoc05q/d9OcuSIfyZJ0gzNOgySLAHeCKysqmcAC4DjgFOB06rqacBWYG07ZC2wtfWf1saRZEU77unAKuADSRbMti5J0syNOk20EHhMkoXAY4HbgRcCF7X9G4Bj2/bq1qbtPzxJWv/5VfXTqroFmAQOHbEuSdIMzDoMqmoz8B7gewxC4B7gauDuqrq/DdsELGnbS4Db2rH3t/FPGO6f5hhJ0jwYZZpoEYPf6g8Cngw8jsE0z5xJsi7JRJKJqampufxRktQro0wT/R5wS1VNVdXPgY8Dzwf2bdNGAEuBzW17M3AgQNu/D/CD4f5pjtlOVa2vqpVVtXLx4sUjlC5JGjZKGHwPOCzJY9vc/+HAjcAVwMvbmDXAxW37ktam7b+8qqr1H9dWGx0ELAe+PkJdkqQZWrjrIdOrqiuTXAR8A7gfuAZYD3wGOD/JO1vfme2QM4Fzk0wCWxisIKKqbkhyIYMguR84oap+Mdu6JEkzN+swAKiqk4GTd+j+DtOsBqqqnwCv2MnnnAKcMkotkqTZ8w5kSZJhIEkyDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCQxYhgk2TfJRUm+leSmJM9Lsl+SjUlubu+L2tgkOT3JZJLrkhwy9Dlr2vibk6wZ9Q8lSZqZUc8M3gt8vqp+A3g2cBNwInBZVS0HLmttgKOA5e21DjgDIMl+wMnAc4FDgZO3BYgkaX7MOgyS7AO8ADgToKp+VlV3A6uBDW3YBuDYtr0aOKcGvgbsm+RJwJHAxqraUlVbgY3AqtnWJUmauVHODA4CpoCzk1yT5INJHgccUFW3tzF3AAe07SXAbUPHb2p9O+uXJM2TUcJgIXAIcEZVHQz8mAemhACoqgJqhJ+xnSTrkkwkmZiamtpdHytJvTdKGGwCNlXVla19EYNwuLNN/9De72r7NwMHDh2/tPXtrP+XVNX6qlpZVSsXL148QumSpGGzDoOqugO4Lcmvt67DgRuBS4BtK4LWABe37UuA17RVRYcB97TppEuBI5IsaheOj2h9kqR5snDE498AnJdkL+A7wPEMAubCJGuBW4FXtrGfBV4ETAL3tbFU1ZYk7wCuauPeXlVbRqxLkjQDI4VBVV0LrJxm1+HTjC3ghJ18zlnAWaPUIkmaPe9AliQZBpIkw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiR2QxgkWZDkmiSfbu2DklyZZDLJBUn2av2Pau3Jtn/Z0Gec1Pq/neTIUWuSJM3M7jgzeBNw01D7VOC0qnoasBVY2/rXAltb/2ltHElWAMcBTwdWAR9IsmA31CVJeohGCoMkS4EXAx9s7QAvBC5qQzYAx7bt1a1N2394G78aOL+qflpVtwCTwKGj1CVJmplRzwz+EXgL8L+t/QTg7qq6v7U3AUva9hLgNoC2/542/v/7pzlGkjQPZh0GSY4G7qqqq3djPbv6meuSTCSZmJqamq8fK0l7vFHODJ4PvCTJd4HzGUwPvRfYN8nCNmYpsLltbwYOBGj79wF+MNw/zTHbqar1VbWyqlYuXrx4hNIlScNmHQZVdVJVLa2qZQwuAF9eVX8IXAG8vA1bA1zcti9pbdr+y6uqWv9xbbXRQcBy4OuzrUuSNHMLdz1kxt4KnJ/kncA1wJmt/0zg3CSTwBYGAUJV3ZDkQuBG4H7ghKr6xRzUJUnaid0SBlX1BeALbfs7TLMaqKp+ArxiJ8efApyyO2qRJM2cdyBLkgwDSZJhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJIkRwiDJgUmuSHJjkhuSvKn175dkY5Kb2/ui1p8kpyeZTHJdkkOGPmtNG39zkjWj/7EkSTMxypnB/cBfVtUK4DDghCQrgBOBy6pqOXBZawMcBSxvr3XAGTAID+Bk4LnAocDJ2wJEkjQ/Zh0GVXV7VX2jbf8QuAlYAqwGNrRhG4Bj2/Zq4Jwa+Bqwb5InAUcCG6tqS1VtBTYCq2ZblyRp5nbLNYMky4CDgSuBA6rq9rbrDuCAtr0EuG3osE2tb2f9kqR5MnIYJHk88DHgzVV17/C+qiqgRv0ZQz9rXZKJJBNTU1O762MlqfdGCoMkj2QQBOdV1cdb951t+of2flfr3wwcOHT40ta3s/5fUlXrq2plVa1cvHjxKKVLkoaMspoowJnATVX1D0O7LgG2rQhaA1w81P+atqroMOCeNp10KXBEkkXtwvERrU+SNE8WjnDs84E/Aq5Pcm3rexvwLuDCJGuBW4FXtn2fBV4ETAL3AccDVNWWJO8Armrj3l5VW0aoS5I0Q7MOg6r6DyA72X34NOMLOGEnn3UWcNZsa5EkjcY7kCVJhoEkyTCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpI
2021-03-21 16:37:25 +01:00
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"pd.value_counts(avocado_train['type']).plot.bar()"
]
},
{
"cell_type": "code",
2021-03-21 16:49:08 +01:00
"execution_count": 39,
2021-03-21 16:37:25 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
2021-03-21 16:49:08 +01:00
"execution_count": 39,
2021-03-21 16:37:25 +01:00
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
2021-03-21 16:49:08 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEvCAYAAACpPxGtAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAVKklEQVR4nO3df7DldX3f8efL5YcmaljKleIuutRsJ7Mmuji3gDVtrY6wUA3ENBZmEimhru2A1UmmDTptMRpmNBNlYqK0GNDVoIi/wkZJ6AYZ0SYCi274KeWWH8PuIGxcfmhoSaDv/nE+Ww7r3b13d++eL57P8zFz5ny/7+/3e877zO6+7nc/5/P93lQVkqQ+PGfoBiRJk2PoS1JHDH1J6oihL0kdMfQlqSOGviR15KChG9iTI444olatWjV0G5L0Y+Wmm27666qamW/bgqGf5LnAdcChbf8vVNX5ST4J/DPg0bbrv66qLUkC/B5wCvB4q3+7vdaZwH9q+/92VW3Y03uvWrWKzZs3L9SiJGlMkvt2t20xZ/pPAK+rqh8mORj4ZpI/bdv+Q1V9YZf9TwZWt8fxwEXA8UkOB84HZoECbkqysaoe3ruPI0naVwuO6dfID9vqwe2xp8t4TwU+1Y77FnBYkqOAk4BNVbWjBf0mYN3+tS9J2huL+iI3ybIkW4CHGAX39W3TBUluTnJhkkNbbQVw/9jhW1ttd3VJ0oQsKvSr6qmqWgusBI5L8rPAu4GfAf4RcDjwm0vRUJL1STYn2bx9+/aleElJUrNXUzar6hHgWmBdVT3QhnCeAD4BHNd22wYcPXbYylbbXX3X97i4qmaranZmZt4vnyVJ+2jB0E8yk+Swtvw84A3Ad9s4PW22zmnAre2QjcBbM3IC8GhVPQBcDZyYZHmS5cCJrSZJmpDFzN45CtiQZBmjHxJXVNVXknwtyQwQYAvwb9v+VzGarjnHaMrmWQBVtSPJ+4Eb237vq6odS/ZJJEkLyrP5fvqzs7PlPH1J2jtJbqqq2fm2PauvyP1xseq8rw7dwlS59wP/YugWpKnlvXckqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFvuCZNOW8IuHSm4WaAnulLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktSRBUM/yXOT3JDkr5LcluS3Wv2YJNcnmUvyuSSHtPqhbX2ubV819lrvbvU7k5x0wD6VJGleiznTfwJ4XVW9ElgLrEtyAvBB4MKq+mngYeDstv/ZwMOtfmHbjyRrgNOBlwPrgI8lWbaEn0WStIAFQ79GfthWD26PAl4HfKHVNwCnteVT2zpt++uTpNUvr6onquoeYA44bik+hCRpcRY1pp9kWZItwEPAJuB/AY9U1ZNtl63Aira8ArgfoG1/FPh74/V5jpEkTcCiQr+qnqqqtcBKRmfnP3OgGkqyPsnmJJu3b99+oN5Gkrq0V7N3quoR4Frg1cBhSXbesG0lsK0tbwOOBmjbfwr4/nh9nmPG3+PiqpqtqtmZmZm9aU+StIDFzN6ZSXJYW34e8AbgDkbh/y/bbmcCV7bljW2dtv1rVVWtfnqb3XMMsBq4YYk+hyRpERZza+WjgA1tps1zgCuq6itJbgcuT/LbwHeAS9r+lwCfTjIH7GA0Y4equi3JFcDtwJPAOVX11NJ+HEnSniwY+lV1M3DsPPW7mWf2TVX9H+CXd/NaFwAX7H2bkqSl4BW5ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpIwuGfpKjk1yb5PYktyV5Z6u/N8m2JFva45SxY96dZC7JnUlOGquva7W5JOcdmI8kSdqdgxaxz5PAb1TVt5O8ALgpyaa27cKq+t3xnZOsAU4HXg68GPjzJP+wbf4o8AZgK3Bjko1VdftSfBBJ0sIWDP2qegB4oC3/IMkdwIo9HHIqcHlVPQHck2QOOK5tm6uquwGSXN72NfQlaUL2akw/ySrgWOD6Vjo3yc1JLk2yvNVWAPePHba11XZX3/U91ifZnGTz9u3b96Y9SdICFh36SZ4PfBF4V1U9BlwEvAxYy+h/Ah9aioaq6uKqmq2q2ZmZmaV4SUlSs5gxfZIczCjwL6uqLwFU1YNj2z8OfKWtbgOOHjt8Zauxh7okaQIWM3snwCXAHVX14bH6UWO7/SJwa1veCJye5NAkxwCrgRuAG4HVSY5JcgijL3s3Ls3HkCQtxmLO9F8D/CpwS5ItrfYe4Iwka4EC7gXeDlBVtyW5gtEXtE8C51TVUwBJzgWuBpYBl1bVbUv2SSRJC1rM7J1vApln01V7OOYC4IJ56lft6ThJ0oHlFbmS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHVkw9JMcneTaJLcnuS3JO1v98CSbktzVnpe3epJ8JMlckpuTvGrstc5s+9+V5MwD97EkSfNZzJn+k8BvVNUa4ATgnCRrgPOAa6pqNXBNWwc4GVjdHuuBi2D0QwI4HzgeOA44f+cPCknSZCwY+lX1QFV9uy3/ALgDWAGcCmxou20ATmvLpwKfqpFvAYclOQo4CdhUVTuq6mFgE7BuKT+MJGnP9mpMP8kq4FjgeuDIqnqgbfoecGRbXgHcP3bY1lbbXV2SNCGLDv0kzwe+CLyrqh4b31ZVBdRSNJRkfZLNSTZv3759KV5SktQsKvSTHMwo8C+rqi+18oNt2Ib2/FCrbwOOHjt8Zavtrv4MVXVxVc1W1ezMzMzefBZJ0gIWM3snwCXAHVX14bFNG4GdM3DOBK4cq7+1zeI5AXi0DQNdDZyYZHn7AvfEVpMkTchBi9jnNcCvArck2dJq7wE+AFyR5GzgPuAtbdtVwCnAHPA4cBZAVe1I8n7gxrbf+6pqx1J8CEnS4iwY+lX1TSC72fz6efYv4JzdvNalwKV706Akael4Ra4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRxYM/SSXJnkoya1jtfcm2ZZkS3ucMrbt3UnmktyZ5KSx+rpWm0ty3tJ/FEnSQhZzpv9JYN089Quram17XAWQZA1wOvDydszHkixLsgz4KHAysAY4o+0rSZqggxbaoaquS7Jqka93KnB5VT0B3JNkDjiubZurqrsBklze9r1971uWJO2r/RnTPzfJzW34Z3mrrQDuH9tna6vtri5JmqB9Df2LgJcBa4EHgA8tVUNJ1ifZnGTz9u3bl+plJUnsY+hX1YNV9VRV/V/g4zw9hLMNOHps15Wttrv6fK99cVXNVtXszMzMvrQnSdqNfQr9JEeNrf4isHNmz0bg9CSHJjkGWA3cANwIrE5yTJJDGH3Zu3Hf25Yk7YsFv8hN8lngtcARSbYC5wOvTbIWKOBe4O0AVXVbkisYfUH7JHBOVT3VXudc4GpgGXBpVd221B9GkrRni5m9c8Y85Uv2sP8FwAXz1K8Crtqr7iRJS8orciWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcWDP0klyZ5KMmtY7XDk2xKcld7Xt7qSfKRJHNJbk7yqrFjzmz735XkzAPzcSRJe7KYM/1PAut2qZ0HXFNVq4Fr2jrAycDq9lgPXASjHxLA+cDxwHHA+Tt/UEiSJmfB0K+q64Adu5RPBTa05Q3AaWP1T9XIt4DDkhwFnARsqqodVfUwsIkf/UEiSTrA9nVM/8i
2021-03-21 16:37:25 +01:00
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"pd.value_counts(avocado_test['type']).plot.bar()"
]
},
{
"cell_type": "code",
2021-03-21 16:49:08 +01:00
"execution_count": 40,
2021-03-21 16:37:25 +01:00
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot:>"
]
},
2021-03-21 16:49:08 +01:00
"execution_count": 40,
2021-03-21 16:37:25 +01:00
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
2021-03-21 16:49:08 +01:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAASkElEQVR4nO3df6zddX3H8ecbSikKoyjkDkrdrZEw0I4NbxBDYq6yQYXFkgxNA9HWYLo4RFy6zGKy1apETFSGbNM0lq0asHRopAOca4ATsz+o/BCtUBl3UKAdilKoVgF39b0/zufitd7be27v+XHP/TwfyUm/Pz7n+/18+j33db7n8/2ez4nMRJJUh8N6XQFJUvcY+pJUEUNfkipi6EtSRQx9SarIvF5X4GCOP/74HBwcbPt2f/7zn/PKV76y7dvtNdvVf+Zq22xXb91///0/ycwTJlo3q0N/cHCQ++67r+3bbTQaDA8Pt327vWa7+s9cbZvt6q2IeGKydXbvSFJFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRWb1N3I1PTv27GPV2tt7su9d11zYk/1Kmh7P9CWpIoa+JFXE0Jekihj6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxGEY1BaDHRz+Yc3S0UmHl3D4B2l6PNOXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkUMfUmqSEuhHxF/HREPRcT3I+IrEbEgIpZExPaIGImImyNifil7ZJkfKesHx23nqrL8kYg4v0NtkiRNYsrQj4hFwAeBocx8A3A4sAL4FHBtZr4OeA64rDzlMuC5svzaUo6IOL087/XAMuCfI+Lw9jZHknQwrXbvzAOOioh5wCuAp4G3AbeU9ZuAi8r08jJPWX9uRERZvjkzX8rMx4ER4KwZt0CS1LIpf0QlM/dExKeBJ4EXgP8E7geez8zRUmw3sKhMLwKeKs8djYh9wKvL8nvGbXr8c14WEauB1QADAwM0Go3pt2oK+/fv78h2e23gqOYPjsw1B2tXvx/HufpatF2z15ShHxHH0TxLXwI8D/wbze6ZjsjMDcAGgKGhoRweHm77PhqNBp3Ybq9df+OtfGbH3PsxtDVLRydt165Lh7tbmTabq69F2zV7tdK986fA45n548z8P+BrwDnAwtLdA3AysKdM7wEWA5T1xwLPjl8+wXMkSV3QSug/CZwdEa8offPnAg8DdwMXlzIrgVvL9NYyT1l/V2ZmWb6i3N2zBDgF+HZ7miFJakUrffrbI+IW4AFgFPgOze6X24HNEfGJsmxjecpG4MsRMQLspXnHDpn5UERsofmGMQpcnpm/anN7JEkH0VIHcGauA9YdsPgxJrj7JjNfBN45yXauBq6eZh0lSW3iN3IlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkUMfUmqiKEvSRUx9CWpIoa+JFXE0Jekihj6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKtBT6EbEwIm6JiB9ExM6IeHNEvCoitkXEo+Xf40rZiIjPRcRIRHwvIs4ct52VpfyjEbGyU42SJE2s1TP964D/yMw/BM4AdgJrgTsz8xTgzjIP8HbglPJYDXweICJeBawD3gScBawbe6OQJHXHlKEfEccCbwE2AmTmLzPzeWA5sKkU2wRcVKaXA1/KpnuAhRFxInA+sC0z92bmc8A2YFkb2yJJmsK8FsosAX4M/EtEnAHcD1wJDGTm06XMD4GBMr0IeGrc83eXZZMt/y0RsZrmJwQGBgZoNBqttqVl+/fv78h2e23gKFizdLTX1Wi7g7Wr34/jXH0t2q7Zq5XQnwecCVyRmdsj4jp+05UDQGZmRGQ7KpSZG4ANAENDQzk8PNyOzf6WRqNBJ7bba9ffeCuf2dHKIe0va5aOTtquXZcOd7cybTZXX4u2a/ZqpU9/N7A7M7eX+Vtovgn8qHTbUP59pqzfAywe9/yTy7LJlkuSumTK0M/MHwJPRcSpZdG5wMPAVmDsDpyVwK1leivwnnIXz9nAvtIN9E3gvIg4rlzAPa8skyR1Sat9AVcAN0bEfOAx4L003zC2RMRlwBPAu0rZO4ALgBHgF6Usmbk3Ij4O3FvKfSwz97alFZKklrQU+pn5IDA0wapzJyibwOWTbOcG4IZp1E86qMG1t/ds37uuubBn+5YOld/IlaSKGPqSVBFDX5IqYuhLUkUMfUmqiKEvSRUx9CWpIoa+JFXE0Jekisy9IRkrtvSwx9m1YF3X9zv44k1d36ekQ+OZviRVxNCXpIoY+pJUEUNfkirihVzN2K4Fl3R0+43D1v/OBWovHkuHxjN9SaqIoS9JFTH0Jakihr4kVcQLuR3Qq99t/dczerJbSX3EM31JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0Jakihr4kVcTQl6SKGPqSVBFDX5IqYuhLUkVaDv2IODwivhMRt5X5JRGxPSJGIuLmiJhflh9Z5kfK+sFx27iqLH8kIs5ve2skSQc1nTP9K4Gd4+Y/BVybma8DngMuK8svA54ry68t5YiI04EVwOuBZcA/R8ThM6u+JGk6Wgr9iDgZuBD4YpkP4G3ALaXIJuCiMr28zFPWn1vKLwc2Z+ZLmfk4MAKc1YY2SJJa1Opv5P4D8LfAMWX+1cDzmTla5ncDi8r0IuApgMwcjYh9pfwi4J5x2xz/nJdFxGpgNcDAwACNRqPFKrZu//79HdnumDVLR6cu1AH7jzyJxqnre7LvTpqoXWt+3Zv/4/Ha8Rrq9GuxV2zX7DVl6EfEnwPPZOb9ETHc6Qpl5gZgA8DQ0FAOD7d/l41Gg05sd8yqnv0w+uMMP7KuJ/vupMap63+nXatevKlHtfmNXZcOz3gbnX4t9ortmr1aOdM/B3hHRFwALAB+D7gOWBgR88rZ/snAnlJ+D7AY2B0R84BjgWfHLR8z/jmSpC6Ysk8/M6/KzJMzc5Dmhdi7MvNS4G7g4lJsJXBrmd5a5inr78rMLMtXlLt7lgCnAN9uW0skSVNqtU9/Ih8GNkfEJ4DvABvL8o3AlyNiBNhL842CzHwoIrYADwOjwOWZ+asZ7F+SNE3TCv3MbACNMv0YE9x9k5kvAu+c5PlXA1dPt5KSpPbwG7mSVBFDX5IqYuhLUkUMfUmqiKEvSRWZyS2bUs/sWnBJ1/c5OAu+BSzNlGf6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkipi6EtSRQx9SaqIoS9JFTH0JakiDq3cIb0Y+rfB+q7vU1J/8Uxfkipi6EtSRQx9SaqIoS9JFfFCrnSIBtfePuNtrFk6yqppbmfXNRfOeL+ql2f6klQRQ1+SKmLoS1JFDH1JqoihL0kVMfQlqSKGviRVxNCXpIoY+pJUEUNfkioyZehHxOKIuDsiHo6IhyLiyrL8VRGxLSIeLf8eV5ZHRHwuIkYi4nsRcea4ba0s5R+NiJWda5YkaSKtnOmPAmsy83TgbODyiDgdWAvcmZmnAHeWeYC3A6eUx2rg89B8kwDWAW8CzgLWjb1RSJK6Y8rQz8ynM/OBMv0zYCewCFgObCrFNgEXlenlwJey6R5gYUScCJwPbMvMvZn5HLANWNbOxkiSDi4ys/XCEYPAt4A3AE9m5sKyPIDnMnNhRNwGXJOZ/1XW3Ql8GBgGFmTmJ8ryvwNeyMxPH7CP1TQ/ITAwMPDGzZs3z6R9E9q/fz9HH31027c7ZseefSw97PGObX8y+488iaNf+t+u77fTZku7dvx6Sdu3OXAU/OiF6T1n6aJj216Pduv031iv9Eu73vrWt96fmUMTrWt5aOWIOBr4KvChzPxpM+ebMjMjovV3j4PIzA3ABoChoaEcHh5ux2Z/S6P
2021-03-21 16:37:25 +01:00
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"avocado['average_price'].hist()\n",
"avocado_train['average_price'].hist()\n",
"avocado_validate['average_price'].hist()\n",
"avocado_test['average_price'].hist()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Normalizacja wartości."
]
},
{
"cell_type": "code",
2021-03-21 16:49:08 +01:00
"execution_count": 41,
2021-03-21 16:37:25 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>average_price</th>\n",
" <th>total_volume</th>\n",
" <th>4046</th>\n",
" <th>4225</th>\n",
" <th>4770</th>\n",
" <th>total_bags</th>\n",
" <th>small_bags</th>\n",
" <th>large_bags</th>\n",
" <th>xlarge_bags</th>\n",
" <th>type</th>\n",
" <th>geography</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2015-01-04</td>\n",
" <td>0.277580</td>\n",
" <td>0.000640</td>\n",
" <td>0.000124</td>\n",
" <td>0.001382</td>\n",
" <td>0.000020</td>\n",
" <td>0.000307</td>\n",
" <td>0.000447</td>\n",
" <td>0.000040</td>\n",
" <td>0.000000</td>\n",
" <td>conventional</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2015-01-04</td>\n",
" <td>0.480427</td>\n",
" <td>0.000020</td>\n",
" <td>0.000003</td>\n",
" <td>0.000008</td>\n",
" <td>0.000000</td>\n",
" <td>0.000037</td>\n",
" <td>0.000057</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>organic</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2015-01-04</td>\n",
" <td>0.199288</td>\n",
" <td>0.006826</td>\n",
" <td>0.016018</td>\n",
" <td>0.001164</td>\n",
" <td>0.000032</td>\n",
" <td>0.001477</td>\n",
" <td>0.000813</td>\n",
" <td>0.002259</td>\n",
" <td>0.000000</td>\n",
" <td>conventional</td>\n",
" <td>Atlanta</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2015-01-04</td>\n",
" <td>0.469751</td>\n",
" <td>0.000059</td>\n",
" <td>0.000066</td>\n",
" <td>0.000046</td>\n",
" <td>0.000000</td>\n",
" <td>0.000044</td>\n",
" <td>0.000052</td>\n",
" <td>0.000025</td>\n",
" <td>0.000000</td>\n",
" <td>organic</td>\n",
" <td>Atlanta</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2015-01-04</td>\n",
" <td>0.227758</td>\n",
" <td>0.012366</td>\n",
" <td>0.002374</td>\n",
" <td>0.027010</td>\n",
" <td>0.015706</td>\n",
" <td>0.004454</td>\n",
" <td>0.006674</td>\n",
" <td>0.000299</td>\n",
" <td>0.000000</td>\n",
" <td>conventional</td>\n",
" <td>Baltimore/Washington</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33040</th>\n",
" <td>2020-11-29</td>\n",
" <td>0.366548</td>\n",
" <td>0.024844</td>\n",
" <td>0.002970</td>\n",
" <td>0.004787</td>\n",
" <td>0.001028</td>\n",
" <td>0.044649</td>\n",
" <td>0.044121</td>\n",
" <td>0.036030</td>\n",
" <td>0.019937</td>\n",
" <td>organic</td>\n",
" <td>Total U.S.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33041</th>\n",
" <td>2020-11-29</td>\n",
" <td>0.167260</td>\n",
" <td>0.091202</td>\n",
" <td>0.059484</td>\n",
" <td>0.028776</td>\n",
" <td>0.007753</td>\n",
" <td>0.119620</td>\n",
" <td>0.106938</td>\n",
" <td>0.114914</td>\n",
" <td>0.043846</td>\n",
" <td>conventional</td>\n",
" <td>West</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33042</th>\n",
" <td>2020-11-29</td>\n",
" <td>0.370107</td>\n",
" <td>0.004550</td>\n",
" <td>0.000584</td>\n",
" <td>0.000945</td>\n",
" <td>0.000250</td>\n",
" <td>0.008101</td>\n",
" <td>0.005966</td>\n",
" <td>0.010062</td>\n",
" <td>0.000000</td>\n",
" <td>organic</td>\n",
" <td>West</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33043</th>\n",
" <td>2020-11-29</td>\n",
" <td>0.081851</td>\n",
" <td>0.012913</td>\n",
" <td>0.010319</td>\n",
" <td>0.003918</td>\n",
" <td>0.004141</td>\n",
" <td>0.015696</td>\n",
" <td>0.013906</td>\n",
" <td>0.015817</td>\n",
" <td>0.000577</td>\n",
" <td>conventional</td>\n",
" <td>West Tex/New Mexico</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33044</th>\n",
" <td>2020-11-29</td>\n",
" <td>0.323843</td>\n",
" <td>0.000377</td>\n",
" <td>0.000054</td>\n",
" <td>0.000030</td>\n",
" <td>0.000615</td>\n",
" <td>0.000653</td>\n",
" <td>0.000867</td>\n",
" <td>0.000215</td>\n",
" <td>0.000000</td>\n",
" <td>organic</td>\n",
" <td>West Tex/New Mexico</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>33045 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" date average_price total_volume 4046 4225 4770 \\\n",
"0 2015-01-04 0.277580 0.000640 0.000124 0.001382 0.000020 \n",
"1 2015-01-04 0.480427 0.000020 0.000003 0.000008 0.000000 \n",
"2 2015-01-04 0.199288 0.006826 0.016018 0.001164 0.000032 \n",
"3 2015-01-04 0.469751 0.000059 0.000066 0.000046 0.000000 \n",
"4 2015-01-04 0.227758 0.012366 0.002374 0.027010 0.015706 \n",
"... ... ... ... ... ... ... \n",
"33040 2020-11-29 0.366548 0.024844 0.002970 0.004787 0.001028 \n",
"33041 2020-11-29 0.167260 0.091202 0.059484 0.028776 0.007753 \n",
"33042 2020-11-29 0.370107 0.004550 0.000584 0.000945 0.000250 \n",
"33043 2020-11-29 0.081851 0.012913 0.010319 0.003918 0.004141 \n",
"33044 2020-11-29 0.323843 0.000377 0.000054 0.000030 0.000615 \n",
"\n",
" total_bags small_bags large_bags xlarge_bags type \\\n",
"0 0.000307 0.000447 0.000040 0.000000 conventional \n",
"1 0.000037 0.000057 0.000000 0.000000 organic \n",
"2 0.001477 0.000813 0.002259 0.000000 conventional \n",
"3 0.000044 0.000052 0.000025 0.000000 organic \n",
"4 0.004454 0.006674 0.000299 0.000000 conventional \n",
"... ... ... ... ... ... \n",
"33040 0.044649 0.044121 0.036030 0.019937 organic \n",
"33041 0.119620 0.106938 0.114914 0.043846 conventional \n",
"33042 0.008101 0.005966 0.010062 0.000000 organic \n",
"33043 0.015696 0.013906 0.015817 0.000577 conventional \n",
"33044 0.000653 0.000867 0.000215 0.000000 organic \n",
"\n",
" geography \n",
"0 Albany \n",
"1 Albany \n",
"2 Atlanta \n",
"3 Atlanta \n",
"4 Baltimore/Washington \n",
"... ... \n",
"33040 Total U.S. \n",
"33041 West \n",
"33042 West \n",
"33043 West Tex/New Mexico \n",
"33044 West Tex/New Mexico \n",
"\n",
"[33045 rows x 12 columns]"
]
},
2021-03-21 16:49:08 +01:00
"execution_count": 41,
2021-03-21 16:37:25 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# według https://www.journaldev.com/45109/normalize-data-in-python\n",
"from sklearn import preprocessing\n",
"\n",
"num_values = avocado.select_dtypes(include='float64').values\n",
"scaler = preprocessing.MinMaxScaler()\n",
"x_scaled = scaler.fit_transform(num_values)\n",
"num_columns = avocado.select_dtypes(include='float64').columns\n",
"avocado_normalized = pd.DataFrame(x_scaled, columns=num_columns)\n",
"for col in avocado.columns:\n",
" if col in num_columns: \n",
" avocado[col] = avocado_normalized[col]\n",
" \n",
"avocado"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Usunięcie artefaktów."
]
},
{
"cell_type": "code",
2021-03-21 16:49:08 +01:00
"execution_count": 42,
2021-03-21 16:37:25 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"date 0\n",
"average_price 0\n",
"total_volume 0\n",
"4046 0\n",
"4225 0\n",
"4770 0\n",
"total_bags 0\n",
"small_bags 0\n",
"large_bags 0\n",
"xlarge_bags 0\n",
"type 0\n",
"geography 0\n",
"dtype: int64"
]
},
2021-03-21 16:49:08 +01:00
"execution_count": 42,
2021-03-21 16:37:25 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"avocado.isnull().sum()"
]
},
{
"cell_type": "code",
2021-03-21 16:49:08 +01:00
"execution_count": 43,
2021-03-21 16:37:25 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>average_price</th>\n",
" <th>total_volume</th>\n",
" <th>4046</th>\n",
" <th>4225</th>\n",
" <th>4770</th>\n",
" <th>total_bags</th>\n",
" <th>small_bags</th>\n",
" <th>large_bags</th>\n",
" <th>xlarge_bags</th>\n",
" <th>type</th>\n",
" <th>geography</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2015-01-04</td>\n",
2021-03-21 16:49:08 +01:00
" <td>0.277580</td>\n",
" <td>0.000640</td>\n",
" <td>0.000124</td>\n",
" <td>0.001382</td>\n",
" <td>0.000020</td>\n",
" <td>0.000307</td>\n",
" <td>0.000447</td>\n",
" <td>0.000040</td>\n",
" <td>0.000000</td>\n",
2021-03-21 16:37:25 +01:00
" <td>conventional</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2015-01-04</td>\n",
2021-03-21 16:49:08 +01:00
" <td>0.480427</td>\n",
" <td>0.000020</td>\n",
" <td>0.000003</td>\n",
" <td>0.000008</td>\n",
" <td>0.000000</td>\n",
" <td>0.000037</td>\n",
" <td>0.000057</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
2021-03-21 16:37:25 +01:00
" <td>organic</td>\n",
" <td>Albany</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2015-01-04</td>\n",
2021-03-21 16:49:08 +01:00
" <td>0.199288</td>\n",
" <td>0.006826</td>\n",
" <td>0.016018</td>\n",
" <td>0.001164</td>\n",
" <td>0.000032</td>\n",
" <td>0.001477</td>\n",
" <td>0.000813</td>\n",
" <td>0.002259</td>\n",
" <td>0.000000</td>\n",
2021-03-21 16:37:25 +01:00
" <td>conventional</td>\n",
" <td>Atlanta</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2015-01-04</td>\n",
2021-03-21 16:49:08 +01:00
" <td>0.469751</td>\n",
" <td>0.000059</td>\n",
" <td>0.000066</td>\n",
" <td>0.000046</td>\n",
" <td>0.000000</td>\n",
" <td>0.000044</td>\n",
" <td>0.000052</td>\n",
" <td>0.000025</td>\n",
" <td>0.000000</td>\n",
2021-03-21 16:37:25 +01:00
" <td>organic</td>\n",
" <td>Atlanta</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2015-01-04</td>\n",
2021-03-21 16:49:08 +01:00
" <td>0.227758</td>\n",
" <td>0.012366</td>\n",
" <td>0.002374</td>\n",
" <td>0.027010</td>\n",
" <td>0.015706</td>\n",
" <td>0.004454</td>\n",
" <td>0.006674</td>\n",
" <td>0.000299</td>\n",
" <td>0.000000</td>\n",
2021-03-21 16:37:25 +01:00
" <td>conventional</td>\n",
" <td>Baltimore/Washington</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33040</th>\n",
" <td>2020-11-29</td>\n",
2021-03-21 16:49:08 +01:00
" <td>0.366548</td>\n",
" <td>0.024844</td>\n",
" <td>0.002970</td>\n",
" <td>0.004787</td>\n",
" <td>0.001028</td>\n",
" <td>0.044649</td>\n",
" <td>0.044121</td>\n",
" <td>0.036030</td>\n",
" <td>0.019937</td>\n",
2021-03-21 16:37:25 +01:00
" <td>organic</td>\n",
" <td>Total U.S.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33041</th>\n",
" <td>2020-11-29</td>\n",
2021-03-21 16:49:08 +01:00
" <td>0.167260</td>\n",
" <td>0.091202</td>\n",
" <td>0.059484</td>\n",
" <td>0.028776</td>\n",
" <td>0.007753</td>\n",
" <td>0.119620</td>\n",
" <td>0.106938</td>\n",
" <td>0.114914</td>\n",
" <td>0.043846</td>\n",
2021-03-21 16:37:25 +01:00
" <td>conventional</td>\n",
" <td>West</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33042</th>\n",
" <td>2020-11-29</td>\n",
2021-03-21 16:49:08 +01:00
" <td>0.370107</td>\n",
" <td>0.004550</td>\n",
" <td>0.000584</td>\n",
" <td>0.000945</td>\n",
" <td>0.000250</td>\n",
" <td>0.008101</td>\n",
" <td>0.005966</td>\n",
" <td>0.010062</td>\n",
" <td>0.000000</td>\n",
2021-03-21 16:37:25 +01:00
" <td>organic</td>\n",
" <td>West</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33043</th>\n",
" <td>2020-11-29</td>\n",
2021-03-21 16:49:08 +01:00
" <td>0.081851</td>\n",
" <td>0.012913</td>\n",
" <td>0.010319</td>\n",
" <td>0.003918</td>\n",
" <td>0.004141</td>\n",
" <td>0.015696</td>\n",
" <td>0.013906</td>\n",
" <td>0.015817</td>\n",
" <td>0.000577</td>\n",
2021-03-21 16:37:25 +01:00
" <td>conventional</td>\n",
" <td>West Tex/New Mexico</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33044</th>\n",
" <td>2020-11-29</td>\n",
2021-03-21 16:49:08 +01:00
" <td>0.323843</td>\n",
" <td>0.000377</td>\n",
" <td>0.000054</td>\n",
" <td>0.000030</td>\n",
" <td>0.000615</td>\n",
" <td>0.000653</td>\n",
" <td>0.000867</td>\n",
" <td>0.000215</td>\n",
" <td>0.000000</td>\n",
2021-03-21 16:37:25 +01:00
" <td>organic</td>\n",
" <td>West Tex/New Mexico</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>33045 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
2021-03-21 16:49:08 +01:00
" date average_price total_volume 4046 4225 4770 \\\n",
"0 2015-01-04 0.277580 0.000640 0.000124 0.001382 0.000020 \n",
"1 2015-01-04 0.480427 0.000020 0.000003 0.000008 0.000000 \n",
"2 2015-01-04 0.199288 0.006826 0.016018 0.001164 0.000032 \n",
"3 2015-01-04 0.469751 0.000059 0.000066 0.000046 0.000000 \n",
"4 2015-01-04 0.227758 0.012366 0.002374 0.027010 0.015706 \n",
"... ... ... ... ... ... ... \n",
"33040 2020-11-29 0.366548 0.024844 0.002970 0.004787 0.001028 \n",
"33041 2020-11-29 0.167260 0.091202 0.059484 0.028776 0.007753 \n",
"33042 2020-11-29 0.370107 0.004550 0.000584 0.000945 0.000250 \n",
"33043 2020-11-29 0.081851 0.012913 0.010319 0.003918 0.004141 \n",
"33044 2020-11-29 0.323843 0.000377 0.000054 0.000030 0.000615 \n",
2021-03-21 16:37:25 +01:00
"\n",
2021-03-21 16:49:08 +01:00
" total_bags small_bags large_bags xlarge_bags type \\\n",
"0 0.000307 0.000447 0.000040 0.000000 conventional \n",
"1 0.000037 0.000057 0.000000 0.000000 organic \n",
"2 0.001477 0.000813 0.002259 0.000000 conventional \n",
"3 0.000044 0.000052 0.000025 0.000000 organic \n",
"4 0.004454 0.006674 0.000299 0.000000 conventional \n",
"... ... ... ... ... ... \n",
"33040 0.044649 0.044121 0.036030 0.019937 organic \n",
"33041 0.119620 0.106938 0.114914 0.043846 conventional \n",
"33042 0.008101 0.005966 0.010062 0.000000 organic \n",
"33043 0.015696 0.013906 0.015817 0.000577 conventional \n",
"33044 0.000653 0.000867 0.000215 0.000000 organic \n",
2021-03-21 16:37:25 +01:00
"\n",
2021-03-21 16:49:08 +01:00
" geography \n",
"0 Albany \n",
"1 Albany \n",
"2 Atlanta \n",
"3 Atlanta \n",
"4 Baltimore/Washington \n",
"... ... \n",
"33040 Total U.S. \n",
"33041 West \n",
"33042 West \n",
"33043 West Tex/New Mexico \n",
"33044 West Tex/New Mexico \n",
2021-03-21 16:37:25 +01:00
"\n",
"[33045 rows x 12 columns]"
]
},
2021-03-21 16:49:08 +01:00
"execution_count": 43,
2021-03-21 16:37:25 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"avocado.dropna()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}