This commit is contained in:
Szymon Bartanowicz 2024-04-23 22:10:38 +02:00
parent 9b1f8c68ca
commit bfc6feba86
6 changed files with 150 additions and 1 deletions

1
.gitignore vendored
View File

@ -4,3 +4,4 @@ openpowerlifting.csv
openpowerlifting-2024-01-06-4c732975.csv
.idea
.ipynb_checkpoints
powerlifting_test_predictions.csv

81
05.ipynb Normal file
View File

@ -0,0 +1,81 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"is_executing": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import StandardScaler\n",
"import tensorflow as tf\n",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Dense\n",
"\n",
"# Wczytywanie danych\n",
"data = pd.read_csv('openpowerlifting.csv')\n",
"\n",
"# Zakładając, że kolumny to 'squat', 'bench_press', 'deadlift' i 'total'\n",
"features = data[['squat', 'bench_press', 'deadlift']]\n",
"target = data['total']\n",
"\n",
"# Podział na dane treningowe i testowe\n",
"X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)\n",
"\n",
"# Normalizacja danych\n",
"scaler = StandardScaler()\n",
"X_train = scaler.fit_transform(X_train)\n",
"X_test = scaler.transform(X_test) # Używamy tego samego scaler do danych testowych\n",
"\n",
"# Tworzenie modelu\n",
"model = Sequential([\n",
" Dense(64, activation='relu', input_shape=(X_train.shape[1],)),\n",
" Dense(64, activation='relu'),\n",
" Dense(1)\n",
"])\n",
"\n",
"model.compile(optimizer='adam', loss='mse', metrics=['mae'])\n",
"\n",
"# Trenowanie modelu\n",
"model.fit(X_train, y_train, epochs=10, validation_split=0.1) # Używam validation_split zamiast oddzielnego zbioru\n",
"\n",
"# Save the model\n",
"model.save('powerlifting_model.h5')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@ -2,7 +2,7 @@ FROM ubuntu:latest
RUN apt-get update && apt-get install -y python3-pip unzip coreutils
RUN pip install --user kaggle pandas
RUN pip install --user kaggle pandas scikit-learn tensorflow
WORKDIR /app

39
model.py Normal file
View File

@ -0,0 +1,39 @@
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import tensorflow as tf
data = pd.read_csv('openpowerlifting.csv')
data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna()
features = data[['Sex', 'Age', 'BodyweightKg']]
target = data['TotalKg']
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
preprocessor = ColumnTransformer(
transformers=[
('num', StandardScaler(), ['Age', 'BodyweightKg']),
('cat', OneHotEncoder(), ['Sex'])
]
)
pipeline = Pipeline(steps=[
('preprocessor', preprocessor),
('model', Sequential([
Dense(64, activation='relu', input_dim=4), # Liczba wejść musi zgadzać się z wynikowym wymiarem preprocessingu
Dense(64, activation='relu'),
Dense(1)
]))
])
pipeline['model'].compile(optimizer='adam', loss='mse', metrics=['mae'])
pipeline.fit(X_train, y_train, model__epochs=10, model__validation_split=0.1)
pipeline['model'].save('powerlifting_model.h5')

BIN
powerlifting_model.h5 Normal file

Binary file not shown.

28
predict.py Normal file
View File

@ -0,0 +1,28 @@
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
loaded_model = tf.keras.models.load_model('powerlifting_model.h5')
data = pd.read_csv('openpowerlifting.csv')
data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna() # Usunięcie wierszy z brakującymi danymi
features = data[['Sex', 'Age', 'BodyweightKg']]
target = data['TotalKg']
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
preprocessor = ColumnTransformer(
transformers=[
('num', StandardScaler(), ['Age', 'BodyweightKg']),
('cat', OneHotEncoder(), ['Sex'])
]
)
X_test_transformed = preprocessor.fit_transform(X_test)
predictions = loaded_model.predict(X_test_transformed)
predictions_df = pd.DataFrame(predictions, columns=['predicted_TotalKg'])
predictions_df.to_csv('powerlifting_test_predictions.csv', index=False)