38 lines
1.0 KiB
Bash
Executable File
38 lines
1.0 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
echo 'Downloading Dataset'
|
|
kaggle datasets download -d aleksandrglotov/car-prices-poland
|
|
echo 'Dataset downloaded'
|
|
|
|
echo 'Unzippig Dataset'
|
|
unzip -o car-prices-poland.zip
|
|
echo 'Dataset unzipped'
|
|
|
|
len=$(cat ./Car_Prices_Poland_Kaggle.csv | wc -l)
|
|
echo 'Initial dataset count:' $len
|
|
|
|
echo 'CUTOFF VALUE: ' $1
|
|
|
|
echo 'Skip first header row and shuffle'
|
|
# example in materials don't work (head -n -1)
|
|
tail -n +2 Car_Prices_Poland_Kaggle.csv | shuf | head -n $1 > ./Car_Prices_Poland_Kaggle_shuf.csv
|
|
echo 'Shuffled'
|
|
|
|
len=$(cat ./Car_Prices_Poland_Kaggle_shuf.csv | wc -l)
|
|
echo 'Dataset count after cutoff:' $len
|
|
len1=$(($len/6))
|
|
len2=$(($len1*2+1))
|
|
echo 'len: '$len
|
|
echo 'len1: '$len1
|
|
echo 'len2: '$len2
|
|
|
|
echo 'Divide and save to files'
|
|
head -n $len1 Car_Prices_Poland_Kaggle_shuf.csv> car_prices.csv.test
|
|
head -n $len1 Car_Prices_Poland_Kaggle_shuf.csv| tail -n $len1 > car_prices.csv.dev
|
|
tail -n +$len2 Car_Prices_Poland_Kaggle_shuf.csv> car_prices.csv.train
|
|
rm ./Car_Prices_Poland_Kaggle_shuf.csv
|
|
|
|
echo 'Divided datasets count'
|
|
wc -l car_prices.csv.*
|
|
|