diff --git a/script.py b/script.py index f9fd03c..6493924 100644 --- a/script.py +++ b/script.py @@ -16,17 +16,22 @@ def install_dependencies(): def unzip_package(): """Unzip dataset""" + print('Unzipping dataset...') os.system('unzip -o car-prices-poland.zip') + print('Dataset unzipped') def download_dataset(): """Download kaggle dataset.""" + print('Downloading dataset...') os.system('kaggle datasets download -d aleksandrglotov/car-prices-poland') + print('Dataset downloaded') def divide_dataset(dataset): """Split dataset to dev, train, test datasets. """ - + print('Dividing dataset...') + os.system('tail -n +2 Car_Prices_Poland_Kaggle.csv | shuf > ./Car_Prices_Poland_Kaggle_shuf.csv') len1 = len(dataset) // 6 @@ -41,6 +46,8 @@ def divide_dataset(dataset): os.system('cat Car_Prices_Poland_Kaggle_train.csv | wc -l') os.system('cat Car_Prices_Poland_Kaggle_dev.csv | wc -l') os.system('cat Car_Prices_Poland_Kaggle_test.csv | wc -l') + + print('Dataset devided') def get_statistics(dataset):