ium_487176/getdata.sh

23 lines
646 B
Bash
Raw Normal View History

2023-04-17 21:45:40 +02:00
#!/bin/bash
2023-04-17 21:56:22 +02:00
export LC_ALL=C
wget -O wine.csv https://huggingface.co/datasets/mstz/wine/raw/main/Wine_Quality_Data.csv
num_rows=$(($(wc -l < wine.csv) - 1))
2023-04-17 21:52:24 +02:00
num_rows=$(expr "$num_rows" + 0)
CUTOFF=${1:-num_rows}
train_size=$((CUTOFF * 80 / 100))
test_size=$((CUTOFF * 10 / 100))
header=$(head -n 1 wine.csv)
tail -n +2 wine.csv | shuf > shuffled_data.csv
echo "$header" > train.csv
head -n $train_size shuffled_data.csv >> train.csv
echo "$header" > val.csv
head -n $(($train_size + $test_size)) shuffled_data.csv | tail -n $test_size >> val.csv
echo "$header" > test.csv
tail -n $test_size shuffled_data.csv >> test.csv
rm shuffled_data.csv