inzynieria-uczenia-maszynowego/create-dataset/main.sh

21 lines
723 B
Bash
Raw Normal View History

2023-04-17 14:46:30 +02:00
#!/bin/bash
curl -o spambase.data https://archive.ics.uci.edu/ml/machine-learning-databases/spambase/spambase.data
sort -R spambase.data > spambase.data.shuf
2023-04-17 15:32:56 +02:00
2023-04-20 23:55:36 +02:00
if [[ "$CUTOFF" != "0" ]]; then
2023-04-17 15:39:36 +02:00
let "train = $CUTOFF * 60 / 100"
let "val = $CUTOFF * 80 / 100"
2023-04-17 15:43:49 +02:00
let "test = $CUTOFF"
2023-04-17 15:32:56 +02:00
else
2023-04-20 23:55:36 +02:00
let "train = $((wc -l < spambase.data.shuf)) * 60 / 100"
let "val = $((wc -l < spambase.data.shuf)) * 80 / 100"
let "test = $((wc -l < spambase.data.shuf))"
2023-04-17 15:32:56 +02:00
fi
2023-04-17 15:47:31 +02:00
2023-04-20 23:55:36 +02:00
head -n $train spambase.data.shuf > spambase.data.train
head -n $val spambase.data.shuf | tail -n $(($val - $train)) > spambase.data.val
head -n $test spambase.data.shuf | tail -n $(($test - $val)) > spambase.data.test
2023-04-17 14:46:30 +02:00
rm spambase.data spambase.data.shuf