ium_426206/skrypt.sh

20 lines
740 B
Bash
Raw Normal View History

2021-03-28 22:16:42 +02:00
#!/bin/bash
2021-03-28 22:00:27 +02:00
kaggle datasets download -d apoorvaappz/global-super-store-dataset
2021-03-28 22:20:20 +02:00
unzip -o global-super-store-dataset.zip
2021-05-07 20:16:31 +02:00
if [[ $1 = "0" ]]; then
2021-03-28 22:00:27 +02:00
CUTOFF=51291
2021-03-28 22:31:37 +02:00
cp Global_Superstore2.csv Global_Superstore22.csv
2021-03-28 22:00:27 +02:00
else
CUTOFF=$1
2021-03-28 22:31:37 +02:00
head -n $1 Global_Superstore2.csv > Global_Superstore22.csv
2021-03-28 22:00:27 +02:00
fi
2021-03-28 22:31:37 +02:00
head -n -1 Global_Superstore22.csv | shuf > Global_Superstore2.csv.shuf
2021-03-28 22:00:27 +02:00
prop1=$((CUTOFF/5))
head -n $prop1 Global_Superstore2.csv.shuf > Global_Superstore2.csv.test
head -n $((prop1*2)) Global_Superstore2.csv.shuf | tail -n $prop1 > Global_Superstore2.csv.dev
tail -n +$((prop1*2+1)) Global_Superstore2.csv.shuf > Global_Superstore2.csv.train
rm Global_Superstore2.csv.shuf
#Sprawdźmy, czy wielkości się zgadzają:
2021-05-07 20:16:31 +02:00
wc -l Global_Superstore2*