This commit is contained in:
Natalia Szymczyk 2023-04-21 09:52:12 +02:00
parent 5215d1d959
commit 840b9526b3

View File

@ -1,12 +1,7 @@
curl -o liver.data 'https://archive.ics.uci.edu/ml/machine-learning-databases/00225/Indian%20Liver%20Patient%20Dataset%20(ILPD).csv'
sort -R liver.data > liver.data.shuf
# Check if the CUTOFF variable is set
if [ "$CUTOFF" == '0' ]; then
head -n 120 liver.data.shuf > liver.data.test
head -n 240 liver.data.shuf | tail -n 120 > liver.data.dev
tail -n +241 liver.data.shuf > liver.data.train
else
if [[ "$CUTOFF" != "0" ]]; then
cut_train=$(( 60 * CUTOFF / 100 ))
cut_test=$(( 20 * CUTOFF / 100 ))
cut_dev=$(( CUTOFF - cut_train - cut_test ))
@ -14,6 +9,10 @@ else
head -n $cut_train liver.data.shuf > liver.data.train
head -n $(( cut_train + cut_dev )) liver.data.shuf | tail -n $cut_dev > liver.data.dev
tail -n +$(( cut_train + cut_dev + 1 )) liver.data.shuf | head -n $cut_test > liver.data.test
else
head -n 120 liver.data.shuf > liver.data.test
head -n 240 liver.data.shuf | tail -n 120 > liver.data.dev
tail -n +241 liver.data.shuf > liver.data.train
fi
rm liver.data liver.data.shuf