ium_434784/preparations.sh

26 lines
727 B
Bash
Raw Normal View History

2021-03-29 01:58:55 +02:00
curl -OL https://git.wmi.amu.edu.pl/s434784/ium_434784/raw/branch/master/who_suicide_statistics.csv
2021-03-29 00:57:26 +02:00
2021-03-29 01:55:42 +02:00
# uciecie nazw kolumn
head -n 1 who_suicide_statistics.csv > names.csv
# Randomizacja zbioru.
2021-03-29 02:14:15 +02:00
sed 1d who_suicide_statistics.csv | shuf > data.shuf
2021-03-29 01:55:42 +02:00
#cp who_suicide_statistics.csv data.shuf
# Podzial na 3 pozbiory
#Ustalenie proporcji 6:2:2
2021-03-29 01:58:55 +02:00
NUMROWS=$(cat data.shuf | wc -l)
TEST=$((NUMROWS / 10 * 2 ))
DEV=$((NUMROWS / 10 * 4 ))
TRAIN=$((NUMROWS - DEV ))
2021-03-29 01:55:42 +02:00
head -n $TEST data.shuf > data.test
head -n $DEV data.shuf | tail -n $TEST > data.dev
head -n $NUMROWS data.shuf | tail -n $TRAIN > data.train
echo "Test rows ${TEST}"
2021-03-29 02:14:15 +02:00
echo "Dev rows ${DEV}"
2021-03-29 01:55:42 +02:00
echo "Train rows ${TRAIN}"
2021-03-29 02:14:15 +02:00
echo "All number of rows ${NUMROWS}"
2021-03-29 02:17:32 +02:00
echo "${1}"