curl -OL https://git.wmi.amu.edu.pl/s434784/ium_434784/raw/branch/master/who_suicide_statistics.csv # uciecie nazw kolumn head -n 1 who_suicide_statistics.csv > names.csv # Randomizacja zbioru. sed 1d who_suicide_statistics.csv | shuf > data.shuf # Ilosc wierszy NUMROWS=$(cat data.shuf | wc -l) echo "Initial number of rows ${NUMROWS}" CUTOFF=$1 DELETE=$((NUMROWS - CUTOFF)) echo "Cutting ${CUTOFF} rows" tail -n $DELETE data.shuf > data.shuf.cut # Podzial na 3 pozbiory #Ustalenie proporcji 6:2:2 NUMROWS=$(cat data.shuf.cut | wc -l) TEST=$((NUMROWS / 10 * 2 )) DEV=$((NUMROWS / 10 * 4 )) TRAIN=$((NUMROWS - DEV )) head -n $TEST data.shuf.cut > data.test head -n $DEV data.shuf.cut | tail -n $TEST > data.dev head -n $NUMROWS data.shuf.cut | tail -n $TRAIN > data.train rm data.shuf.cut echo "Test rows ${TEST}" echo "Dev rows ${TEST}" echo "Train rows ${TRAIN}" echo "All number of rows ${NUMROWS}"