diff --git a/preparations.sh b/preparations.sh index 08c11c7..3af0b5d 100755 --- a/preparations.sh +++ b/preparations.sh @@ -1,2 +1,25 @@ -curl -OL https://git.wmi.amu.edu.pl/s434784/ium_434784/who_suicide_statistics.csv +#curl -OL https://git.wmi.amu.edu.pl/s434784/ium_434784/raw/branch/master/who_suicide_statistics.csv +# uciecie nazw kolumn +head -n 1 who_suicide_statistics.csv > names.csv + +# Randomizacja zbioru. +shuf who_suicide_statistics.csv > data.shuf +#cp who_suicide_statistics.csv data.shuf + +# Podzial na 3 pozbiory + +#Ustalenie proporcji 6:2:2 +let NUMROWS=$(cat data.shuf | wc -l) +let TEST=$((NUMROWS / 10 * 2 )) +let DEV=$((NUMROWS / 10 * 4 )) +let TRAIN=$((NUMROWS - DEV )) + +head -n $TEST data.shuf > data.test +head -n $DEV data.shuf | tail -n $TEST > data.dev +head -n $NUMROWS data.shuf | tail -n $TRAIN > data.train + +echo "Test rows ${TEST}" +echo "Dev rows ${DEV}" +echo "Train rows ${TRAIN}" +echo "All number of rows ${NUMROWS}" \ No newline at end of file