2021-03-28 22:16:42 +02:00
|
|
|
#!/bin/bash
|
2021-03-28 22:00:27 +02:00
|
|
|
kaggle datasets download -d apoorvaappz/global-super-store-dataset
|
2021-03-28 22:20:20 +02:00
|
|
|
unzip -o global-super-store-dataset.zip
|
2021-03-28 22:00:27 +02:00
|
|
|
cat Global_Superstore2.csv | shuf > Global_Superstore2.csv.shuf
|
2021-03-28 22:21:27 +02:00
|
|
|
echo $1
|
2021-03-28 22:00:27 +02:00
|
|
|
if [ $1="0" ]; then
|
|
|
|
CUTOFF=51291
|
|
|
|
else
|
|
|
|
CUTOFF=$1
|
|
|
|
fi
|
|
|
|
prop1=$((CUTOFF/5))
|
|
|
|
head -n $prop1 Global_Superstore2.csv.shuf > Global_Superstore2.csv.test
|
|
|
|
head -n $((prop1*2)) Global_Superstore2.csv.shuf | tail -n $prop1 > Global_Superstore2.csv.dev
|
|
|
|
tail -n +$((prop1*2+1)) Global_Superstore2.csv.shuf > Global_Superstore2.csv.train
|
|
|
|
rm Global_Superstore2.csv.shuf
|
|
|
|
#Sprawdźmy, czy wielkości się zgadzają:
|
|
|
|
wc -l Global_Superstore2.csv*
|