diff --git a/avocado-preprocessing.py b/avocado-preprocessing.py index c47aa75..ba7656f 100644 --- a/avocado-preprocessing.py +++ b/avocado-preprocessing.py @@ -1,10 +1,14 @@ +import kaggle import pandas as pd import numpy as np from sklearn import preprocessing +# kaggle +kaggle.api.authenticate() +kaggle.api.dataset_download_files('timmate/avocado-prices-2020', path='.', unzip=True) + avocado_with_year = pd.read_csv('avocado-updated-2020.csv') -avocado_with_year new = ['date', 'average_price', 'total_volume', '4046', '4225', '4770', 'total_bags', 'small_bags', 'large_bags', 'xlarge_bags', 'type', 'geography'] avocado = avocado_with_year[new]