Compare commits
30 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
5651fb92d1 | ||
|
a597ba4d2a | ||
|
e1616fd376 | ||
|
97ca282402 | ||
|
a188435a1b | ||
|
fb48e4e5f5 | ||
|
a860f43d29 | ||
|
a3b003c422 | ||
|
a3da4f5c76 | ||
|
da08a94eb5 | ||
|
c94695e20b | ||
|
27eb66aaed | ||
|
99173104ae | ||
|
1e044d743f | ||
|
ba6c8d5dde | ||
|
8942ab2122 | ||
|
7e735543b9 | ||
|
14fc6d1120 | ||
|
d02162bc7a | ||
|
c200a8f364 | ||
|
36199513fa | ||
|
8c6a6c593a | ||
|
844157b757 | ||
|
199980fdb9 | ||
|
8fd726c5a7 | ||
|
9c578e17b3 | ||
|
40f48555c8 | ||
|
ac790babc3 | ||
|
6ecb5ee56a | ||
|
af6518f064 |
@ -2,7 +2,7 @@ FROM ubuntu:latest
|
||||
|
||||
RUN apt-get update && apt-get install -y python3-pip unzip coreutils
|
||||
|
||||
RUN pip install --user kaggle pandas scikit-learn tensorflow
|
||||
RUN pip install --no-cache-dir wheel kaggle pandas scikit-learn tensorflow
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
|
43
Jenkinsfile
vendored
43
Jenkinsfile
vendored
@ -1,36 +1,37 @@
|
||||
pipeline {
|
||||
agent any
|
||||
|
||||
triggers {
|
||||
upstream(upstreamProjects: 'z-s464937-create-dataset', threshold: hudson.model.Result.SUCCESS)
|
||||
}
|
||||
|
||||
parameters {
|
||||
string(name: 'CUTOFF', defaultValue: '100', description: 'Ilość wierszy do odcięcia')
|
||||
string(name: 'KAGGLE_USERNAME', defaultValue: '', description: 'Kaggle username')
|
||||
password(name: 'KAGGLE_KEY', defaultValue: '', description: 'Kaggle API key')
|
||||
string(name: 'EPOCHS', defaultValue: '10', description: 'Epochs')
|
||||
buildSelector(defaultSelector: lastSuccessful(), description: 'Build no', name: 'BUILD_SELECTOR')
|
||||
}
|
||||
|
||||
stages {
|
||||
stage('Clone repo') {
|
||||
stage('Clone Repository') {
|
||||
steps {
|
||||
git branch: "main", url: "https://git.wmi.amu.edu.pl/s464937/ium_464937"
|
||||
git branch: 'training', url: "https://git.wmi.amu.edu.pl/s464937/ium_464937.git"
|
||||
}
|
||||
}
|
||||
|
||||
stage('Download and preprocess') {
|
||||
environment {
|
||||
KAGGLE_USERNAME = "szymonbartanowicz"
|
||||
KAGGLE_KEY = "4692239eb65f20ec79f9a59ef30e67eb"
|
||||
stage('Copy Artifacts') {
|
||||
steps {
|
||||
copyArtifacts filter: 'data/dev.csv,data/test.csv,data/train.csv', projectName: 'z-s464937-create-dataset', selector: buildParameter('BUILD_SELECTOR')
|
||||
}
|
||||
}
|
||||
stage("Run") {
|
||||
agent {
|
||||
dockerfile {
|
||||
filename 'Dockerfile'
|
||||
reuseNode true
|
||||
}
|
||||
}
|
||||
steps {
|
||||
withEnv([
|
||||
"KAGGLE_USERNAME=${env.KAGGLE_USERNAME}",
|
||||
"KAGGLE_KEY=${env.KAGGLE_KEY}"
|
||||
]) {
|
||||
sh "bash ./script1.sh ${params.CUTOFF}"
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Archive') {
|
||||
steps {
|
||||
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
|
||||
sh "chmod +x ./model.py"
|
||||
sh "python3 ./model.py ${params.EPOCHS}"
|
||||
archiveArtifacts artifacts: 'powerlifting_model.h5', onlyIfSuccessful: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
16
model.py
16
model.py
@ -1,3 +1,4 @@
|
||||
import sys
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
||||
@ -7,10 +8,12 @@ from tensorflow.keras.models import Sequential
|
||||
from tensorflow.keras.layers import Dense
|
||||
import tensorflow as tf
|
||||
|
||||
data = pd.read_csv('openpowerlifting.csv')
|
||||
data = pd.read_csv('./data/train.csv')
|
||||
|
||||
data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna()
|
||||
|
||||
data['Age'] = pd.to_numeric(data['Age'], errors='coerce')
|
||||
data['BodyweightKg'] = pd.to_numeric(data['BodyweightKg'], errors='coerce')
|
||||
data['TotalKg'] = pd.to_numeric(data['TotalKg'], errors='coerce')
|
||||
features = data[['Sex', 'Age', 'BodyweightKg']]
|
||||
target = data['TotalKg']
|
||||
|
||||
@ -20,13 +23,13 @@ preprocessor = ColumnTransformer(
|
||||
transformers=[
|
||||
('num', StandardScaler(), ['Age', 'BodyweightKg']),
|
||||
('cat', OneHotEncoder(), ['Sex'])
|
||||
]
|
||||
],
|
||||
)
|
||||
|
||||
pipeline = Pipeline(steps=[
|
||||
('preprocessor', preprocessor),
|
||||
('model', Sequential([
|
||||
Dense(64, activation='relu', input_dim=4),
|
||||
Dense(64, activation='relu', input_dim=5),
|
||||
Dense(64, activation='relu'),
|
||||
Dense(1)
|
||||
]))
|
||||
@ -34,6 +37,9 @@ pipeline = Pipeline(steps=[
|
||||
|
||||
pipeline['model'].compile(optimizer='adam', loss='mse', metrics=['mae'])
|
||||
|
||||
pipeline.fit(X_train, y_train, model__epochs=10, model__validation_split=0.1)
|
||||
X_train_excluded = X_train.iloc[1:]
|
||||
y_train_excluded = y_train.iloc[1:]
|
||||
|
||||
pipeline.fit(X_train_excluded, y_train_excluded, model__epochs=int(sys.argv[1]), model__validation_split=0.1)
|
||||
|
||||
pipeline['model'].save('powerlifting_model.h5')
|
||||
|
27
script1.sh
27
script1.sh
@ -1,11 +1,31 @@
|
||||
##!/bin/bash
|
||||
#pip install kaggle
|
||||
#kaggle datasets download -d open-powerlifting/powerlifting-database
|
||||
#unzip -o powerlifting-database.zip
|
||||
#DATASET_FILE="openpowerlifting.csv"
|
||||
#echo "Obcięte wiersze: ${1}"
|
||||
#head -n $1 $DATASET_FILE > cutoff_$DATASET_FILE
|
||||
#echo "Podział i wymieszanie"
|
||||
#total_lines=$(tail -n +2 cutoff_$DATASET_FILE | wc -l)
|
||||
#train_lines=$((total_lines * 90 / 100))
|
||||
#dev_lines=$((total_lines * 10 / 100))
|
||||
#test_lines=$((total_lines - train_lines - dev_lines))
|
||||
#shuf cutoff_$DATASET_FILE -o shuffled.csv
|
||||
#head -n $train_lines shuffled.csv > train.csv
|
||||
#tail -n $((dev_lines + test_lines)) shuffled.csv | head -n $dev_lines > dev.csv
|
||||
#tail -n $test_lines shuffled.csv > test.csv
|
||||
#mkdir -p data
|
||||
#mv train.csv dev.csv test.csv data/
|
||||
|
||||
#!/bin/bash
|
||||
pip install kaggle
|
||||
kaggle datasets download -d open-powerlifting/powerlifting-database
|
||||
unzip -o powerlifting-database.zip
|
||||
DATASET_FILE="openpowerlifting.csv"
|
||||
echo "Obcięte wiersze: ${1}"
|
||||
column_names=$(head -n 1 $DATASET_FILE)
|
||||
echo "Truncated rows: ${1}"
|
||||
head -n $1 $DATASET_FILE > cutoff_$DATASET_FILE
|
||||
echo "Podział i wymieszanie"
|
||||
echo "$column_names" > temp && cat cutoff_$DATASET_FILE >> temp && mv temp cutoff_$DATASET_FILE
|
||||
total_lines=$(tail -n +2 cutoff_$DATASET_FILE | wc -l)
|
||||
train_lines=$((total_lines * 90 / 100))
|
||||
dev_lines=$((total_lines * 10 / 100))
|
||||
@ -15,4 +35,7 @@ head -n $train_lines shuffled.csv > train.csv
|
||||
tail -n $((dev_lines + test_lines)) shuffled.csv | head -n $dev_lines > dev.csv
|
||||
tail -n $test_lines shuffled.csv > test.csv
|
||||
mkdir -p data
|
||||
echo "$column_names" | cat - train.csv > temp && mv temp train.csv
|
||||
echo "$column_names" | cat - dev.csv > temp && mv temp dev.csv
|
||||
echo "$column_names" | cat - test.csv > temp && mv temp test.csv
|
||||
mv train.csv dev.csv test.csv data/
|
Loading…
Reference in New Issue
Block a user