2023-04-17 21:45:40 +02:00
|
|
|
#!/bin/bash
|
2023-04-17 21:56:22 +02:00
|
|
|
export LC_ALL=C
|
2023-04-17 21:28:39 +02:00
|
|
|
wget -O wine.csv https://huggingface.co/datasets/mstz/wine/raw/main/Wine_Quality_Data.csv
|
|
|
|
|
|
|
|
num_rows=$(($(wc -l < wine.csv) - 1))
|
2023-04-17 21:52:24 +02:00
|
|
|
num_rows=$(expr "$num_rows" + 0)
|
2023-04-17 21:28:39 +02:00
|
|
|
CUTOFF=${1:-num_rows}
|
|
|
|
|
|
|
|
train_size=$((CUTOFF * 80 / 100))
|
|
|
|
test_size=$((CUTOFF * 10 / 100))
|
|
|
|
|
|
|
|
header=$(head -n 1 wine.csv)
|
|
|
|
tail -n +2 wine.csv | shuf > shuffled_data.csv
|
|
|
|
echo "$header" > train.csv
|
|
|
|
head -n $train_size shuffled_data.csv >> train.csv
|
|
|
|
echo "$header" > val.csv
|
|
|
|
head -n $(($train_size + $test_size)) shuffled_data.csv | tail -n $test_size >> val.csv
|
|
|
|
echo "$header" > test.csv
|
|
|
|
tail -n $test_size shuffled_data.csv >> test.csv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rm shuffled_data.csv
|