21 lines
723 B
Bash
21 lines
723 B
Bash
#!/bin/bash
|
|
|
|
curl -o spambase.data https://archive.ics.uci.edu/ml/machine-learning-databases/spambase/spambase.data
|
|
sort -R spambase.data > spambase.data.shuf
|
|
|
|
if [[ "$CUTOFF" != "0" ]]; then
|
|
let "train = $CUTOFF * 60 / 100"
|
|
let "val = $CUTOFF * 80 / 100"
|
|
let "test = $CUTOFF"
|
|
else
|
|
let "train = $((wc -l < spambase.data.shuf)) * 60 / 100"
|
|
let "val = $((wc -l < spambase.data.shuf)) * 80 / 100"
|
|
let "test = $((wc -l < spambase.data.shuf))"
|
|
fi
|
|
|
|
head -n $train spambase.data.shuf > spambase.data.train
|
|
head -n $val spambase.data.shuf | tail -n $(($val - $train)) > spambase.data.val
|
|
head -n $test spambase.data.shuf | tail -n $(($test - $val)) > spambase.data.test
|
|
|
|
rm spambase.data spambase.data.shuf
|