changed script
This commit is contained in:
parent
9dca2d4283
commit
4da4b2deed
10
script1.sh
10
script1.sh
@ -3,9 +3,10 @@ pip install kaggle
|
|||||||
kaggle datasets download -d open-powerlifting/powerlifting-database
|
kaggle datasets download -d open-powerlifting/powerlifting-database
|
||||||
unzip -o powerlifting-database.zip
|
unzip -o powerlifting-database.zip
|
||||||
DATASET_FILE="openpowerlifting.csv"
|
DATASET_FILE="openpowerlifting.csv"
|
||||||
echo "Obcięte wiersze: ${1}"
|
column_names=$(head -n 1 $DATASET_FILE)
|
||||||
|
echo "Truncated rows: ${1}"
|
||||||
head -n $1 $DATASET_FILE > cutoff_$DATASET_FILE
|
head -n $1 $DATASET_FILE > cutoff_$DATASET_FILE
|
||||||
echo "Podział i wymieszanie"
|
echo "$column_names" > temp && cat cutoff_$DATASET_FILE >> temp && mv temp cutoff_$DATASET_FILE
|
||||||
total_lines=$(tail -n +2 cutoff_$DATASET_FILE | wc -l)
|
total_lines=$(tail -n +2 cutoff_$DATASET_FILE | wc -l)
|
||||||
train_lines=$((total_lines * 90 / 100))
|
train_lines=$((total_lines * 90 / 100))
|
||||||
dev_lines=$((total_lines * 10 / 100))
|
dev_lines=$((total_lines * 10 / 100))
|
||||||
@ -15,4 +16,7 @@ head -n $train_lines shuffled.csv > train.csv
|
|||||||
tail -n $((dev_lines + test_lines)) shuffled.csv | head -n $dev_lines > dev.csv
|
tail -n $((dev_lines + test_lines)) shuffled.csv | head -n $dev_lines > dev.csv
|
||||||
tail -n $test_lines shuffled.csv > test.csv
|
tail -n $test_lines shuffled.csv > test.csv
|
||||||
mkdir -p data
|
mkdir -p data
|
||||||
mv train.csv dev.csv test.csv data/
|
echo "$column_names" | cat - train.csv > temp && mv temp train.csv
|
||||||
|
echo "$column_names" | cat - dev.csv > temp && mv temp dev.csv
|
||||||
|
echo "$column_names" | cat - test.csv > temp && mv temp test.csv
|
||||||
|
mv train.csv dev.csv test.csv data/
|
||||||
|
Loading…
Reference in New Issue
Block a user