#!/bin/bash wget -O wine.csv https://huggingface.co/datasets/mstz/wine/raw/main/Wine_Quality_Data.csv num_rows=$(($(wc -l < wine.csv) - 1)) echo "num_rows=$num_rows" CUTOFF=${1:-num_rows} train_size=$((CUTOFF * 80 / 100)) test_size=$((CUTOFF * 10 / 100)) header=$(head -n 1 wine.csv) tail -n +2 wine.csv | shuf > shuffled_data.csv echo "$header" > train.csv head -n $train_size shuffled_data.csv >> train.csv echo "$header" > val.csv head -n $(($train_size + $test_size)) shuffled_data.csv | tail -n $test_size >> val.csv echo "$header" > test.csv tail -n $test_size shuffled_data.csv >> test.csv rm shuffled_data.csv