21 lines
664 B
Bash
Executable File
21 lines
664 B
Bash
Executable File
#!/bin/bash
|
|
|
|
kaggle datasets download -d lava18/google-play-store-apps
|
|
unzip -o google-play-store-apps.zip
|
|
sed -i '1d' googleplaystore.csv
|
|
shuf googleplaystore.csv > apps_shuf_.csv
|
|
head -n $CUTOFF apps_shuf_.csv > apps_shuf.csv
|
|
|
|
total=$(wc -l apps_shuf.csv | awk '{print $1}')
|
|
test_set=$(( (total*60+50) / 100 ))
|
|
train_val_set=$(( (total*20+50) / 100 ))
|
|
|
|
#cp apps_shuf.csv apps_shuf_copy.csv
|
|
|
|
head -n $test_set apps_shuf.csv > apps_train.csv
|
|
lines="1,$test_set"
|
|
sed -i "$lines"'d' apps_shuf.csv
|
|
head -n $train_val_set apps_shuf.csv > apps_test.csv
|
|
lines="1,$train_val_set"
|
|
sed -i "$lines"'d' apps_shuf.csv
|
|
head -n $train_val_set apps_shuf.csv > apps_validate.csv |