#!/bin/bash echo 'Downloading Dataset' kaggle datasets download -d aleksandrglotov/car-prices-poland echo 'Dataset downloaded' echo 'Unzippig Dataset' unzip -o car-prices-poland.zip echo 'Dataset unzipped' len=$(cat ./Car_Prices_Poland_Kaggle.csv | wc -l) echo 'Initial dataset count:' $len echo 'CUTOFF VALUE: ' $1 echo 'Skip first header row and shuffle' # example in materials don't work (head -n -1) tail -n +2 Car_Prices_Poland_Kaggle.csv | shuf | head -n $1 > ./Car_Prices_Poland_Kaggle_shuf.csv echo 'Shuffled' len=$(cat ./Car_Prices_Poland_Kaggle_shuf.csv | wc -l) echo 'Dataset count after cutoff:' $len len1=$(($len/6)) len2=$(($len1*2+1)) echo 'len: '$len echo 'len1: '$len1 echo 'len2: '$len2 echo 'Divide and save to files' head -n $len1 Car_Prices_Poland_Kaggle_shuf.csv> car_prices.csv.test head -n $len1 Car_Prices_Poland_Kaggle_shuf.csv| tail -n $len1 > car_prices.csv.dev tail -n +$len2 Car_Prices_Poland_Kaggle_shuf.csv> car_prices.csv.train rm ./Car_Prices_Poland_Kaggle_shuf.csv echo 'Divided datasets count' wc -l car_prices.csv.*