inzynieria-uczenia-maszynowego/create-dataset/main.sh
2023-04-20 23:55:36 +02:00

21 lines
723 B
Bash

#!/bin/bash
curl -o spambase.data https://archive.ics.uci.edu/ml/machine-learning-databases/spambase/spambase.data
sort -R spambase.data > spambase.data.shuf
if [[ "$CUTOFF" != "0" ]]; then
let "train = $CUTOFF * 60 / 100"
let "val = $CUTOFF * 80 / 100"
let "test = $CUTOFF"
else
let "train = $((wc -l < spambase.data.shuf)) * 60 / 100"
let "val = $((wc -l < spambase.data.shuf)) * 80 / 100"
let "test = $((wc -l < spambase.data.shuf))"
fi
head -n $train spambase.data.shuf > spambase.data.train
head -n $val spambase.data.shuf | tail -n $(($val - $train)) > spambase.data.val
head -n $test spambase.data.shuf | tail -n $(($test - $val)) > spambase.data.test
rm spambase.data spambase.data.shuf