ium_487176/getdata.sh
2023-04-17 21:50:09 +02:00

22 lines
623 B
Bash

#!/bin/bash
wget -O wine.csv https://huggingface.co/datasets/mstz/wine/raw/main/Wine_Quality_Data.csv
num_rows=$(($(wc -l < wine.csv) - 1))
echo "num_rows=$num_rows"
CUTOFF=${1:-num_rows}
train_size=$((CUTOFF * 80 / 100))
test_size=$((CUTOFF * 10 / 100))
header=$(head -n 1 wine.csv)
tail -n +2 wine.csv | shuf > shuffled_data.csv
echo "$header" > train.csv
head -n $train_size shuffled_data.csv >> train.csv
echo "$header" > val.csv
head -n $(($train_size + $test_size)) shuffled_data.csv | tail -n $test_size >> val.csv
echo "$header" > test.csv
tail -n $test_size shuffled_data.csv >> test.csv
rm shuffled_data.csv