Compare commits

...

30 Commits

Author SHA1 Message Date
Szymon Bartanowicz
5651fb92d1 fix 2024-05-14 23:43:14 +02:00
Szymon Bartanowicz
a597ba4d2a fix 2024-05-14 23:40:55 +02:00
Szymon Bartanowicz
e1616fd376 fix 2024-05-14 23:39:54 +02:00
Szymon Bartanowicz
97ca282402 fix 2024-05-14 23:38:30 +02:00
Szymon Bartanowicz
a188435a1b fix 2024-05-14 23:35:39 +02:00
Szymon Bartanowicz
fb48e4e5f5 fix 2024-05-14 23:30:16 +02:00
Szymon Bartanowicz
a860f43d29 fix 2024-05-14 23:25:25 +02:00
Szymon Bartanowicz
a3b003c422 fix 2024-05-14 23:19:35 +02:00
Szymon Bartanowicz
a3da4f5c76 fix 2024-05-14 23:12:11 +02:00
Szymon Bartanowicz
da08a94eb5 fix 2024-05-14 23:07:05 +02:00
Szymon Bartanowicz
c94695e20b fix 2024-05-14 23:01:38 +02:00
Szymon Bartanowicz
27eb66aaed fix 2024-05-14 22:59:17 +02:00
Szymon Bartanowicz
99173104ae changed script 2024-05-14 22:55:10 +02:00
Szymon Bartanowicz
1e044d743f changed script 2024-05-14 22:52:03 +02:00
Szymon Bartanowicz
ba6c8d5dde fix 2024-05-14 22:39:09 +02:00
Szymon Bartanowicz
8942ab2122 fix 2024-05-14 22:32:48 +02:00
Szymon Bartanowicz
7e735543b9 fix 2024-05-14 22:29:02 +02:00
Szymon Bartanowicz
14fc6d1120 fix 2024-05-14 22:26:47 +02:00
Szymon Bartanowicz
d02162bc7a fix 2024-05-14 22:11:46 +02:00
Szymon Bartanowicz
c200a8f364 fix 2024-05-14 22:10:14 +02:00
Szymon Bartanowicz
36199513fa fix 2024-05-14 22:08:33 +02:00
Szymon Bartanowicz
8c6a6c593a fix 2024-05-14 22:03:36 +02:00
Szymon Bartanowicz
844157b757 fix 2024-05-14 21:55:06 +02:00
Szymon Bartanowicz
199980fdb9 fix 2024-05-14 21:50:46 +02:00
Szymon Bartanowicz
8fd726c5a7 fix 2024-05-14 21:46:15 +02:00
Szymon Bartanowicz
9c578e17b3 fix 2024-05-14 21:41:21 +02:00
Szymon Bartanowicz
40f48555c8 fix 2024-05-14 19:58:32 +02:00
Szymon Bartanowicz
ac790babc3 fix 2024-05-14 19:46:25 +02:00
Szymon Bartanowicz
6ecb5ee56a missing csv 2024-05-14 19:25:45 +02:00
Szymon Bartanowicz
af6518f064 06-training 2024-05-14 19:21:17 +02:00
4 changed files with 58 additions and 28 deletions

View File

@ -2,7 +2,7 @@ FROM ubuntu:latest
RUN apt-get update && apt-get install -y python3-pip unzip coreutils
RUN pip install --user kaggle pandas scikit-learn tensorflow
RUN pip install --no-cache-dir wheel kaggle pandas scikit-learn tensorflow
WORKDIR /app

43
Jenkinsfile vendored
View File

@ -1,36 +1,37 @@
pipeline {
agent any
triggers {
upstream(upstreamProjects: 'z-s464937-create-dataset', threshold: hudson.model.Result.SUCCESS)
}
parameters {
string(name: 'CUTOFF', defaultValue: '100', description: 'Ilość wierszy do odcięcia')
string(name: 'KAGGLE_USERNAME', defaultValue: '', description: 'Kaggle username')
password(name: 'KAGGLE_KEY', defaultValue: '', description: 'Kaggle API key')
string(name: 'EPOCHS', defaultValue: '10', description: 'Epochs')
buildSelector(defaultSelector: lastSuccessful(), description: 'Build no', name: 'BUILD_SELECTOR')
}
stages {
stage('Clone repo') {
stage('Clone Repository') {
steps {
git branch: "main", url: "https://git.wmi.amu.edu.pl/s464937/ium_464937"
git branch: 'training', url: "https://git.wmi.amu.edu.pl/s464937/ium_464937.git"
}
}
stage('Download and preprocess') {
environment {
KAGGLE_USERNAME = "szymonbartanowicz"
KAGGLE_KEY = "4692239eb65f20ec79f9a59ef30e67eb"
stage('Copy Artifacts') {
steps {
copyArtifacts filter: 'data/dev.csv,data/test.csv,data/train.csv', projectName: 'z-s464937-create-dataset', selector: buildParameter('BUILD_SELECTOR')
}
}
stage("Run") {
agent {
dockerfile {
filename 'Dockerfile'
reuseNode true
}
}
steps {
withEnv([
"KAGGLE_USERNAME=${env.KAGGLE_USERNAME}",
"KAGGLE_KEY=${env.KAGGLE_KEY}"
]) {
sh "bash ./script1.sh ${params.CUTOFF}"
}
}
}
stage('Archive') {
steps {
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
sh "chmod +x ./model.py"
sh "python3 ./model.py ${params.EPOCHS}"
archiveArtifacts artifacts: 'powerlifting_model.h5', onlyIfSuccessful: true
}
}
}

View File

@ -1,3 +1,4 @@
import sys
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
@ -7,10 +8,12 @@ from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import tensorflow as tf
data = pd.read_csv('openpowerlifting.csv')
data = pd.read_csv('./data/train.csv')
data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna()
data['Age'] = pd.to_numeric(data['Age'], errors='coerce')
data['BodyweightKg'] = pd.to_numeric(data['BodyweightKg'], errors='coerce')
data['TotalKg'] = pd.to_numeric(data['TotalKg'], errors='coerce')
features = data[['Sex', 'Age', 'BodyweightKg']]
target = data['TotalKg']
@ -20,13 +23,13 @@ preprocessor = ColumnTransformer(
transformers=[
('num', StandardScaler(), ['Age', 'BodyweightKg']),
('cat', OneHotEncoder(), ['Sex'])
]
],
)
pipeline = Pipeline(steps=[
('preprocessor', preprocessor),
('model', Sequential([
Dense(64, activation='relu', input_dim=4),
Dense(64, activation='relu', input_dim=5),
Dense(64, activation='relu'),
Dense(1)
]))
@ -34,6 +37,9 @@ pipeline = Pipeline(steps=[
pipeline['model'].compile(optimizer='adam', loss='mse', metrics=['mae'])
pipeline.fit(X_train, y_train, model__epochs=10, model__validation_split=0.1)
X_train_excluded = X_train.iloc[1:]
y_train_excluded = y_train.iloc[1:]
pipeline.fit(X_train_excluded, y_train_excluded, model__epochs=int(sys.argv[1]), model__validation_split=0.1)
pipeline['model'].save('powerlifting_model.h5')

View File

@ -1,11 +1,31 @@
##!/bin/bash
#pip install kaggle
#kaggle datasets download -d open-powerlifting/powerlifting-database
#unzip -o powerlifting-database.zip
#DATASET_FILE="openpowerlifting.csv"
#echo "Obcięte wiersze: ${1}"
#head -n $1 $DATASET_FILE > cutoff_$DATASET_FILE
#echo "Podział i wymieszanie"
#total_lines=$(tail -n +2 cutoff_$DATASET_FILE | wc -l)
#train_lines=$((total_lines * 90 / 100))
#dev_lines=$((total_lines * 10 / 100))
#test_lines=$((total_lines - train_lines - dev_lines))
#shuf cutoff_$DATASET_FILE -o shuffled.csv
#head -n $train_lines shuffled.csv > train.csv
#tail -n $((dev_lines + test_lines)) shuffled.csv | head -n $dev_lines > dev.csv
#tail -n $test_lines shuffled.csv > test.csv
#mkdir -p data
#mv train.csv dev.csv test.csv data/
#!/bin/bash
pip install kaggle
kaggle datasets download -d open-powerlifting/powerlifting-database
unzip -o powerlifting-database.zip
DATASET_FILE="openpowerlifting.csv"
echo "Obcięte wiersze: ${1}"
column_names=$(head -n 1 $DATASET_FILE)
echo "Truncated rows: ${1}"
head -n $1 $DATASET_FILE > cutoff_$DATASET_FILE
echo "Podział i wymieszanie"
echo "$column_names" > temp && cat cutoff_$DATASET_FILE >> temp && mv temp cutoff_$DATASET_FILE
total_lines=$(tail -n +2 cutoff_$DATASET_FILE | wc -l)
train_lines=$((total_lines * 90 / 100))
dev_lines=$((total_lines * 10 / 100))
@ -15,4 +35,7 @@ head -n $train_lines shuffled.csv > train.csv
tail -n $((dev_lines + test_lines)) shuffled.csv | head -n $dev_lines > dev.csv
tail -n $test_lines shuffled.csv > test.csv
mkdir -p data
echo "$column_names" | cat - train.csv > temp && mv temp train.csv
echo "$column_names" | cat - dev.csv > temp && mv temp dev.csv
echo "$column_names" | cat - test.csv > temp && mv temp test.csv
mv train.csv dev.csv test.csv data/