script can now create 8_1_1 datasets
This commit is contained in:
parent
5b20395ba3
commit
6de4db7a83
1586
TEST_winequality-red.csv
Normal file
1586
TEST_winequality-red.csv
Normal file
File diff suppressed because it is too large
Load Diff
12968
TRAIN_winequality-red.csv
Normal file
12968
TRAIN_winequality-red.csv
Normal file
File diff suppressed because it is too large
Load Diff
1696
VAL_winequality-red.csv
Normal file
1696
VAL_winequality-red.csv
Normal file
File diff suppressed because it is too large
Load Diff
58
script.sh
58
script.sh
@ -1,4 +1,56 @@
|
||||
#!/bin/sh
|
||||
kaggle datasets download -d uciml/red-wine-quality-cortez-et-al-2009
|
||||
unzip -o red-wine-quality-cortez-et-al-2009.zip > data.txt
|
||||
head data.txt
|
||||
#kaggle datasets download -d uciml/red-wine-quality-cortez-et-al-2009
|
||||
#unzip -o red-wine-quality-cortez-et-al-2009.zip > data.txt
|
||||
|
||||
|
||||
FILE=winequality-red.csv
|
||||
|
||||
TRAIN_SET_PERCENT=80
|
||||
TEST_SET_PERCENT=10
|
||||
VAL_SET_PERCENT=$(( 100 - $TRAIN_SET_PERCENT - $TEST_SET_PERCENT ))
|
||||
|
||||
TRAIN_DATA=TRAIN_$FILE
|
||||
TEST_DATA=TEST_$FILE
|
||||
VAL_DATA=VAL_$FILE
|
||||
|
||||
> $TRAIN_DATA
|
||||
> $TEST_DATA
|
||||
> $VAL_DATA
|
||||
|
||||
for state in `cut -d',' -f1 $FILE | sort | uniq`
|
||||
do
|
||||
NUM_STATE_DATA=`grep "$state" $FILE | wc -l`
|
||||
echo "$state: $NUM_STATE_DATA"
|
||||
|
||||
TRAIN_NUM_DATA=$(( $NUM_STATE_DATA * $TRAIN_SET_PERCENT / 100 ))
|
||||
TEST_NUM_DATA=$(( $NUM_STATE_DATA * $TEST_SET_PERCENT / 100 ))
|
||||
VAL_NUM_DATA=$(( $NUM_STATE_DATA - $TRAIN_NUM_DATA - $TEST_NUM_DATA ))
|
||||
|
||||
|
||||
STATE_DATA=`grep $state $FILE`
|
||||
|
||||
|
||||
# Train set
|
||||
per=$TRAIN_SET_PERCENT
|
||||
num=$TRAIN_NUM_DATA; from=1; to=$(($from + $num - 1));
|
||||
echo Train set: $per% $num from=$from to=$to
|
||||
echo "$STATE_DATA" | head -$to >> $TRAIN_DATA
|
||||
|
||||
# Test set
|
||||
per=$TEST_SET_PERCENT
|
||||
num=$TEST_NUM_DATA; from=$(($to + 1)); to=$(($from + $num - 1));
|
||||
echo Test set: $per% $num from=$from to=$to
|
||||
echo "$STATE_DATA" | head -$to | tail -$num >> $TEST_DATA
|
||||
|
||||
# Validate set
|
||||
per=$VAL_SET_PERCENT
|
||||
num=$VAL_NUM_DATA; from=$(($to + 1)); to=$NUM_STATE_DATA;
|
||||
echo Validate set: $per% $num from=$from to=$to
|
||||
echo "$STATE_DATA" | tail -$num >> $VAL_DATA
|
||||
|
||||
echo
|
||||
done
|
||||
|
||||
|
||||
|
||||
|
||||
|
1600
winequality-red.csv
Normal file
1600
winequality-red.csv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user