This commit is contained in:
Szymon Bartanowicz 2024-05-14 22:39:09 +02:00
parent 8942ab2122
commit ba6c8d5dde
2 changed files with 27 additions and 4 deletions

View File

@ -8,7 +8,7 @@ from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import tensorflow as tf
data = pd.read_csv('./openpowerlifting.csv')
data = pd.read_csv('./data/train.csv')
data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna()

View File

@ -1,11 +1,31 @@
##!/bin/bash
#pip install kaggle
#kaggle datasets download -d open-powerlifting/powerlifting-database
#unzip -o powerlifting-database.zip
#DATASET_FILE="openpowerlifting.csv"
#echo "Obcięte wiersze: ${1}"
#head -n $1 $DATASET_FILE > cutoff_$DATASET_FILE
#echo "Podział i wymieszanie"
#total_lines=$(tail -n +2 cutoff_$DATASET_FILE | wc -l)
#train_lines=$((total_lines * 90 / 100))
#dev_lines=$((total_lines * 10 / 100))
#test_lines=$((total_lines - train_lines - dev_lines))
#shuf cutoff_$DATASET_FILE -o shuffled.csv
#head -n $train_lines shuffled.csv > train.csv
#tail -n $((dev_lines + test_lines)) shuffled.csv | head -n $dev_lines > dev.csv
#tail -n $test_lines shuffled.csv > test.csv
#mkdir -p data
#mv train.csv dev.csv test.csv data/
#!/bin/bash
pip install kaggle
kaggle datasets download -d open-powerlifting/powerlifting-database
unzip -o powerlifting-database.zip
DATASET_FILE="openpowerlifting.csv"
echo "Obcięte wiersze: ${1}"
column_names=$(head -n 1 $DATASET_FILE)
echo "Truncated rows: ${1}"
head -n $1 $DATASET_FILE > cutoff_$DATASET_FILE
echo "Podział i wymieszanie"
echo "$column_names" > temp && cat cutoff_$DATASET_FILE >> temp && mv temp cutoff_$DATASET_FILE
total_lines=$(tail -n +2 cutoff_$DATASET_FILE | wc -l)
train_lines=$((total_lines * 90 / 100))
dev_lines=$((total_lines * 10 / 100))
@ -15,4 +35,7 @@ head -n $train_lines shuffled.csv > train.csv
tail -n $((dev_lines + test_lines)) shuffled.csv | head -n $dev_lines > dev.csv
tail -n $test_lines shuffled.csv > test.csv
mkdir -p data
echo "$column_names" | cat - train.csv > temp && mv temp train.csv
echo "$column_names" | cat - dev.csv > temp && mv temp dev.csv
echo "$column_names" | cat - test.csv > temp && mv temp test.csv
mv train.csv dev.csv test.csv data/