18 lines
726 B
Bash
18 lines
726 B
Bash
|
#!/bin/bash
|
||
|
pip install kaggle
|
||
|
kaggle datasets download -d open-powerlifting/powerlifting-database
|
||
|
unzip -o powerlifting-database.zip
|
||
|
DATASET_FILE="openpowerlifting.csv"
|
||
|
echo "Obcięte wiersze: ${1}"
|
||
|
head -n $1 $DATASET_FILE > cutoff_$DATASET_FILE
|
||
|
echo "Podział i wymieszanie"
|
||
|
total_lines=$(tail -n +2 cutoff_$DATASET_FILE | wc -l)
|
||
|
train_lines=$((total_lines * 90 / 100))
|
||
|
dev_lines=$((total_lines * 10 / 100))
|
||
|
test_lines=$((total_lines - train_lines - dev_lines))
|
||
|
shuf cutoff_$DATASET_FILE -o shuffled.csv
|
||
|
head -n $train_lines shuffled.csv > train.csv
|
||
|
tail -n $((dev_lines + test_lines)) shuffled.csv | head -n $dev_lines > dev.csv
|
||
|
tail -n $test_lines shuffled.csv > test.csv
|
||
|
mkdir -p data
|
||
|
mv train.csv dev.csv test.csv data/
|