2023-04-20 22:45:53 +02:00
|
|
|
curl -o liver.data 'https://archive.ics.uci.edu/ml/machine-learning-databases/00225/Indian%20Liver%20Patient%20Dataset%20(ILPD).csv'
|
|
|
|
sort -R liver.data > liver.data.shuf
|
2023-04-20 23:46:06 +02:00
|
|
|
|
|
|
|
# Check if the CUTOFF variable is set
|
|
|
|
if [ -z "$CUTOFF" ]; then
|
|
|
|
head -n 120 liver.data.shuf > liver.data.test
|
|
|
|
head -n 240 liver.data.shuf | tail -n 120 > liver.data.dev
|
|
|
|
tail -n +241 liver.data.shuf > liver.data.train
|
|
|
|
else
|
|
|
|
cut_train=$(( 60 * CUTOFF / 100 ))
|
|
|
|
cut_test=$(( 80 * CUTOFF / 100 ))
|
|
|
|
cut_dev=$(( CUTOFF - cut_train - (cut_test - cut_train) ))
|
|
|
|
|
|
|
|
head -n $cut_train liver.data.shuf > liver.data.train
|
|
|
|
head -n $(( cut_train + cut_dev )) liver.data.shuf | tail -n $cut_dev > liver.data.dev
|
|
|
|
tail -n +$(( cut_train + cut_dev + 1 )) liver.data.shuf | head -n $(( cut_test - cut_dev )) > liver.data.test
|
|
|
|
fi
|
2023-04-20 23:30:23 +02:00
|
|
|
|
|
|
|
rm liver.data liver.data.shuf
|