ium_487176/getdata.sh

21 lines
587 B
Bash

wget -O wine.csv https://huggingface.co/datasets/mstz/wine/raw/main/Wine_Quality_Data.csv
num_rows=$(($(wc -l < wine.csv) - 1))
CUTOFF=${1:-$num_rows}
train_size=$((CUTOFF * 80 / 100))
test_size=$((CUTOFF * 10 / 100))
header=$(head -n 1 wine.csv)
tail -n +2 wine.csv | shuf > shuffled_data.csv
echo "$header" > train.csv
head -n $train_size shuffled_data.csv >> train.csv
echo "$header" > val.csv
head -n $(($train_size + $test_size)) shuffled_data.csv | tail -n $test_size >> val.csv
echo "$header" > test.csv
tail -n $test_size shuffled_data.csv >> test.csv
rm shuffled_data.csv