34 lines
1.1 KiB
Bash
34 lines
1.1 KiB
Bash
#!/bin/bash
|
|
|
|
pip install kaggle --upgrade
|
|
kaggle datasets download -d nasa/meteorite-landings
|
|
|
|
unzip -o meteorite-landings.zip
|
|
|
|
###Zmienne###
|
|
|
|
train_ratio=0.8
|
|
test_val_ratio=0.5
|
|
|
|
##Przetwrazanie pliku##
|
|
|
|
shuf meteorite-landings.csv -o shuffled-meteorite-landings.csv
|
|
|
|
##Cut off $1 rows##
|
|
head -n $1 shuffled-meteorite-landings.csv > shuffled-meteorite-landings.csv
|
|
|
|
total_lines=$(wc -l < shuffled-meteorite-landings.csv)
|
|
train_lines=$(echo $total_lines*$train_ratio| bc)
|
|
train_lines=$(echo "($train_lines+0.5)/1" | bc )
|
|
|
|
test_lines=$(echo "($total_lines-$train_lines)*$test_val_ratio" | bc)
|
|
test_lines=$(echo "($test_lines+0.5)/1" | bc )
|
|
|
|
validation_lines=$(echo $total_lines-$train_lines-$test_lines | bc)
|
|
|
|
head -n "$train_lines" shuffled-meteorite-landings.csv > "meteorite_train.csv"
|
|
tail -n $((test_lines+validation_lines)) shuffled-meteorite-landings.csv | head -n "$test_lines" > "meteorite_test.csv"
|
|
tail -n "$validation_lines" shuffled-meteorite-landings.csv > "meteorite_validation.csv"
|
|
|
|
mkdir -p artifacts
|
|
mv meteorite-landings.csv shuffled-meteorite-landings.csv meteorite_test.csv meteorite_train.csv meteorite_validation.csv artifacts/ |