Compare commits
30 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
5651fb92d1 | ||
|
a597ba4d2a | ||
|
e1616fd376 | ||
|
97ca282402 | ||
|
a188435a1b | ||
|
fb48e4e5f5 | ||
|
a860f43d29 | ||
|
a3b003c422 | ||
|
a3da4f5c76 | ||
|
da08a94eb5 | ||
|
c94695e20b | ||
|
27eb66aaed | ||
|
99173104ae | ||
|
1e044d743f | ||
|
ba6c8d5dde | ||
|
8942ab2122 | ||
|
7e735543b9 | ||
|
14fc6d1120 | ||
|
d02162bc7a | ||
|
c200a8f364 | ||
|
36199513fa | ||
|
8c6a6c593a | ||
|
844157b757 | ||
|
199980fdb9 | ||
|
8fd726c5a7 | ||
|
9c578e17b3 | ||
|
40f48555c8 | ||
|
ac790babc3 | ||
|
6ecb5ee56a | ||
|
af6518f064 |
@ -2,7 +2,7 @@ FROM ubuntu:latest
|
|||||||
|
|
||||||
RUN apt-get update && apt-get install -y python3-pip unzip coreutils
|
RUN apt-get update && apt-get install -y python3-pip unzip coreutils
|
||||||
|
|
||||||
RUN pip install --user kaggle pandas scikit-learn tensorflow
|
RUN pip install --no-cache-dir wheel kaggle pandas scikit-learn tensorflow
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
39
Jenkinsfile
vendored
39
Jenkinsfile
vendored
@ -1,36 +1,37 @@
|
|||||||
pipeline {
|
pipeline {
|
||||||
agent any
|
agent any
|
||||||
|
|
||||||
|
triggers {
|
||||||
|
upstream(upstreamProjects: 'z-s464937-create-dataset', threshold: hudson.model.Result.SUCCESS)
|
||||||
|
}
|
||||||
|
|
||||||
parameters {
|
parameters {
|
||||||
string(name: 'CUTOFF', defaultValue: '100', description: 'Ilość wierszy do odcięcia')
|
string(name: 'EPOCHS', defaultValue: '10', description: 'Epochs')
|
||||||
string(name: 'KAGGLE_USERNAME', defaultValue: '', description: 'Kaggle username')
|
buildSelector(defaultSelector: lastSuccessful(), description: 'Build no', name: 'BUILD_SELECTOR')
|
||||||
password(name: 'KAGGLE_KEY', defaultValue: '', description: 'Kaggle API key')
|
|
||||||
}
|
}
|
||||||
|
|
||||||
stages {
|
stages {
|
||||||
stage('Clone repo') {
|
stage('Clone Repository') {
|
||||||
steps {
|
steps {
|
||||||
git branch: "main", url: "https://git.wmi.amu.edu.pl/s464937/ium_464937"
|
git branch: 'training', url: "https://git.wmi.amu.edu.pl/s464937/ium_464937.git"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
stage('Copy Artifacts') {
|
||||||
stage('Download and preprocess') {
|
|
||||||
environment {
|
|
||||||
KAGGLE_USERNAME = "szymonbartanowicz"
|
|
||||||
KAGGLE_KEY = "4692239eb65f20ec79f9a59ef30e67eb"
|
|
||||||
}
|
|
||||||
steps {
|
steps {
|
||||||
withEnv([
|
copyArtifacts filter: 'data/dev.csv,data/test.csv,data/train.csv', projectName: 'z-s464937-create-dataset', selector: buildParameter('BUILD_SELECTOR')
|
||||||
"KAGGLE_USERNAME=${env.KAGGLE_USERNAME}",
|
|
||||||
"KAGGLE_KEY=${env.KAGGLE_KEY}"
|
|
||||||
]) {
|
|
||||||
sh "bash ./script1.sh ${params.CUTOFF}"
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage('Archive') {
|
stage("Run") {
|
||||||
|
agent {
|
||||||
|
dockerfile {
|
||||||
|
filename 'Dockerfile'
|
||||||
|
reuseNode true
|
||||||
|
}
|
||||||
|
}
|
||||||
steps {
|
steps {
|
||||||
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
|
sh "chmod +x ./model.py"
|
||||||
|
sh "python3 ./model.py ${params.EPOCHS}"
|
||||||
|
archiveArtifacts artifacts: 'powerlifting_model.h5', onlyIfSuccessful: true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
16
model.py
16
model.py
@ -1,3 +1,4 @@
|
|||||||
|
import sys
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
||||||
@ -7,10 +8,12 @@ from tensorflow.keras.models import Sequential
|
|||||||
from tensorflow.keras.layers import Dense
|
from tensorflow.keras.layers import Dense
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
data = pd.read_csv('openpowerlifting.csv')
|
data = pd.read_csv('./data/train.csv')
|
||||||
|
|
||||||
data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna()
|
data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna()
|
||||||
|
data['Age'] = pd.to_numeric(data['Age'], errors='coerce')
|
||||||
|
data['BodyweightKg'] = pd.to_numeric(data['BodyweightKg'], errors='coerce')
|
||||||
|
data['TotalKg'] = pd.to_numeric(data['TotalKg'], errors='coerce')
|
||||||
features = data[['Sex', 'Age', 'BodyweightKg']]
|
features = data[['Sex', 'Age', 'BodyweightKg']]
|
||||||
target = data['TotalKg']
|
target = data['TotalKg']
|
||||||
|
|
||||||
@ -20,13 +23,13 @@ preprocessor = ColumnTransformer(
|
|||||||
transformers=[
|
transformers=[
|
||||||
('num', StandardScaler(), ['Age', 'BodyweightKg']),
|
('num', StandardScaler(), ['Age', 'BodyweightKg']),
|
||||||
('cat', OneHotEncoder(), ['Sex'])
|
('cat', OneHotEncoder(), ['Sex'])
|
||||||
]
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
pipeline = Pipeline(steps=[
|
pipeline = Pipeline(steps=[
|
||||||
('preprocessor', preprocessor),
|
('preprocessor', preprocessor),
|
||||||
('model', Sequential([
|
('model', Sequential([
|
||||||
Dense(64, activation='relu', input_dim=4),
|
Dense(64, activation='relu', input_dim=5),
|
||||||
Dense(64, activation='relu'),
|
Dense(64, activation='relu'),
|
||||||
Dense(1)
|
Dense(1)
|
||||||
]))
|
]))
|
||||||
@ -34,6 +37,9 @@ pipeline = Pipeline(steps=[
|
|||||||
|
|
||||||
pipeline['model'].compile(optimizer='adam', loss='mse', metrics=['mae'])
|
pipeline['model'].compile(optimizer='adam', loss='mse', metrics=['mae'])
|
||||||
|
|
||||||
pipeline.fit(X_train, y_train, model__epochs=10, model__validation_split=0.1)
|
X_train_excluded = X_train.iloc[1:]
|
||||||
|
y_train_excluded = y_train.iloc[1:]
|
||||||
|
|
||||||
|
pipeline.fit(X_train_excluded, y_train_excluded, model__epochs=int(sys.argv[1]), model__validation_split=0.1)
|
||||||
|
|
||||||
pipeline['model'].save('powerlifting_model.h5')
|
pipeline['model'].save('powerlifting_model.h5')
|
||||||
|
27
script1.sh
27
script1.sh
@ -1,11 +1,31 @@
|
|||||||
|
##!/bin/bash
|
||||||
|
#pip install kaggle
|
||||||
|
#kaggle datasets download -d open-powerlifting/powerlifting-database
|
||||||
|
#unzip -o powerlifting-database.zip
|
||||||
|
#DATASET_FILE="openpowerlifting.csv"
|
||||||
|
#echo "Obcięte wiersze: ${1}"
|
||||||
|
#head -n $1 $DATASET_FILE > cutoff_$DATASET_FILE
|
||||||
|
#echo "Podział i wymieszanie"
|
||||||
|
#total_lines=$(tail -n +2 cutoff_$DATASET_FILE | wc -l)
|
||||||
|
#train_lines=$((total_lines * 90 / 100))
|
||||||
|
#dev_lines=$((total_lines * 10 / 100))
|
||||||
|
#test_lines=$((total_lines - train_lines - dev_lines))
|
||||||
|
#shuf cutoff_$DATASET_FILE -o shuffled.csv
|
||||||
|
#head -n $train_lines shuffled.csv > train.csv
|
||||||
|
#tail -n $((dev_lines + test_lines)) shuffled.csv | head -n $dev_lines > dev.csv
|
||||||
|
#tail -n $test_lines shuffled.csv > test.csv
|
||||||
|
#mkdir -p data
|
||||||
|
#mv train.csv dev.csv test.csv data/
|
||||||
|
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
pip install kaggle
|
pip install kaggle
|
||||||
kaggle datasets download -d open-powerlifting/powerlifting-database
|
kaggle datasets download -d open-powerlifting/powerlifting-database
|
||||||
unzip -o powerlifting-database.zip
|
unzip -o powerlifting-database.zip
|
||||||
DATASET_FILE="openpowerlifting.csv"
|
DATASET_FILE="openpowerlifting.csv"
|
||||||
echo "Obcięte wiersze: ${1}"
|
column_names=$(head -n 1 $DATASET_FILE)
|
||||||
|
echo "Truncated rows: ${1}"
|
||||||
head -n $1 $DATASET_FILE > cutoff_$DATASET_FILE
|
head -n $1 $DATASET_FILE > cutoff_$DATASET_FILE
|
||||||
echo "Podział i wymieszanie"
|
echo "$column_names" > temp && cat cutoff_$DATASET_FILE >> temp && mv temp cutoff_$DATASET_FILE
|
||||||
total_lines=$(tail -n +2 cutoff_$DATASET_FILE | wc -l)
|
total_lines=$(tail -n +2 cutoff_$DATASET_FILE | wc -l)
|
||||||
train_lines=$((total_lines * 90 / 100))
|
train_lines=$((total_lines * 90 / 100))
|
||||||
dev_lines=$((total_lines * 10 / 100))
|
dev_lines=$((total_lines * 10 / 100))
|
||||||
@ -15,4 +35,7 @@ head -n $train_lines shuffled.csv > train.csv
|
|||||||
tail -n $((dev_lines + test_lines)) shuffled.csv | head -n $dev_lines > dev.csv
|
tail -n $((dev_lines + test_lines)) shuffled.csv | head -n $dev_lines > dev.csv
|
||||||
tail -n $test_lines shuffled.csv > test.csv
|
tail -n $test_lines shuffled.csv > test.csv
|
||||||
mkdir -p data
|
mkdir -p data
|
||||||
|
echo "$column_names" | cat - train.csv > temp && mv temp train.csv
|
||||||
|
echo "$column_names" | cat - dev.csv > temp && mv temp dev.csv
|
||||||
|
echo "$column_names" | cat - test.csv > temp && mv temp test.csv
|
||||||
mv train.csv dev.csv test.csv data/
|
mv train.csv dev.csv test.csv data/
|
Loading…
Reference in New Issue
Block a user