Compare commits
9 Commits
main
...
evaluation
Author | SHA1 | Date | |
---|---|---|---|
|
7c8fe37562 | ||
|
4a7fe811f5 | ||
|
8d92919488 | ||
|
17be57bcd3 | ||
|
adf3b77091 | ||
|
cb364fee5f | ||
|
dc3284677a | ||
|
d5306f5b06 | ||
|
a6f8a4fe78 |
@ -2,7 +2,7 @@ FROM ubuntu:latest
|
|||||||
|
|
||||||
RUN apt-get update && apt-get install -y python3-pip unzip coreutils
|
RUN apt-get update && apt-get install -y python3-pip unzip coreutils
|
||||||
|
|
||||||
RUN pip install --user kaggle pandas scikit-learn tensorflow
|
RUN pip install --no-cache-dir wheel kaggle pandas scikit-learn tensorflow
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
77
Jenkinsfile
vendored
77
Jenkinsfile
vendored
@ -1,37 +1,62 @@
|
|||||||
pipeline {
|
pipeline {
|
||||||
agent any
|
agent {
|
||||||
|
dockerfile true
|
||||||
parameters {
|
|
||||||
string(name: 'CUTOFF', defaultValue: '100', description: 'Ilość wierszy do odcięcia')
|
|
||||||
string(name: 'KAGGLE_USERNAME', defaultValue: '', description: 'Kaggle username')
|
|
||||||
password(name: 'KAGGLE_KEY', defaultValue: '', description: 'Kaggle API key')
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
triggers {
|
||||||
|
upstream(upstreamProjects: 's464937-training/training', threshold: hudson.model.Result.SUCCESS)
|
||||||
|
}
|
||||||
|
|
||||||
|
parameters {
|
||||||
|
buildSelector(defaultSelector: lastSuccessful(), description: 'Which build to use for copying artifacts', name: 'BUILD_SELECTOR')
|
||||||
|
gitParameter branchFilter: 'origin/(.*)', defaultValue: 'training', name: 'BRANCH', type: 'PT_BRANCH'
|
||||||
|
}
|
||||||
|
|
||||||
stages {
|
stages {
|
||||||
stage('Clone repo') {
|
stage('Clone Repository') {
|
||||||
steps {
|
steps {
|
||||||
git branch: "main", url: "https://git.wmi.amu.edu.pl/s464937/ium_464937"
|
git branch: 'evaluation', url: "https://git.wmi.amu.edu.pl/s464937/ium_464937"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
stage('Copy Dataset Artifacts') {
|
||||||
|
steps {
|
||||||
|
copyArtifacts filter: 'data/dev.csv,data/test.csv,data/train.csv', projectName: 'z-s464937-create-dataset', selector: buildParameter('BUILD_SELECTOR')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage('Copy Training Artifacts') {
|
||||||
|
steps {
|
||||||
|
copyArtifacts filter: 'powerlifting_model.h5', projectName: 's464937-training/' + params.BRANCH, selector: buildParameter('BUILD_SELECTOR')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage('Copy Evaluation Artifacts') {
|
||||||
|
steps {
|
||||||
|
copyArtifacts filter: 'metrics.txt', projectName: 's464937-evaluation/evaluation', selector: buildParameter('BUILD_SELECTOR'), optional: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage("Run predictions") {
|
||||||
|
steps {
|
||||||
|
sh "chmod +x ./predict.py"
|
||||||
|
sh "python3 ./predict.py"
|
||||||
|
archiveArtifacts artifacts: 'powerlifting_test_predictions.csv', onlyIfSuccessful: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage('Run metrics') {
|
||||||
|
steps {
|
||||||
|
sh 'chmod +x ./metrics.py'
|
||||||
|
sh "python3 ./metrics.py ${currentBuild.number}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
stage('Download and preprocess') {
|
stage('Run plot') {
|
||||||
environment {
|
steps {
|
||||||
KAGGLE_USERNAME = "szymonbartanowicz"
|
sh 'chmod +x ./plot.py'
|
||||||
KAGGLE_KEY = "4692239eb65f20ec79f9a59ef30e67eb"
|
sh 'python3 ./plot.py'
|
||||||
}
|
}
|
||||||
steps {
|
|
||||||
withEnv([
|
|
||||||
"KAGGLE_USERNAME=${env.KAGGLE_USERNAME}",
|
|
||||||
"KAGGLE_KEY=${env.KAGGLE_KEY}"
|
|
||||||
]) {
|
|
||||||
sh "bash ./script1.sh ${params.CUTOFF}"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
stage('Archive') {
|
stage('Archive Artifacts') {
|
||||||
steps {
|
steps {
|
||||||
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
|
archiveArtifacts artifacts: '*', onlyIfSuccessful: true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
14
metrics.py
Normal file
14
metrics.py
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, mean_squared_error
|
||||||
|
from math import sqrt
|
||||||
|
import sys
|
||||||
|
|
||||||
|
data = pd.read_csv('powerlifting_test_predictions.csv')
|
||||||
|
y_pred = data['predicted_TotalKg']
|
||||||
|
y_test = data['actual_TotalKg']
|
||||||
|
|
||||||
|
build_number = sys.argv[1]
|
||||||
|
rmse = sqrt(mean_squared_error(y_test, y_pred))
|
||||||
|
|
||||||
|
with open(r"metrics.txt", "a") as f:
|
||||||
|
f.write(f"{build_number},{rmse}\n")
|
0
metrics.txt
Normal file
0
metrics.txt
Normal file
20
plot.py
Normal file
20
plot.py
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
def main():
|
||||||
|
accuracy = []
|
||||||
|
build_numbers = []
|
||||||
|
|
||||||
|
with open("metrics.txt") as f:
|
||||||
|
for line in f:
|
||||||
|
accuracy.append(float(line.split(",")[0]))
|
||||||
|
build_numbers.append(int(line.split(",")[1]))
|
||||||
|
|
||||||
|
plt.plot(build_numbers, accuracy)
|
||||||
|
plt.xlabel("Build Number")
|
||||||
|
plt.ylabel("RMSE")
|
||||||
|
plt.title("RMSE of the model over time")
|
||||||
|
plt.xticks(range(min(build_numbers), max(build_numbers) + 1))
|
||||||
|
plt.savefig("plot.png")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
10
predict.py
10
predict.py
@ -4,11 +4,18 @@ from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
|||||||
from sklearn.compose import ColumnTransformer
|
from sklearn.compose import ColumnTransformer
|
||||||
from sklearn.pipeline import Pipeline
|
from sklearn.pipeline import Pipeline
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
|
from keras.metrics import MeanSquaredError
|
||||||
|
|
||||||
loaded_model = tf.keras.models.load_model('powerlifting_model.h5')
|
loaded_model = tf.keras.models.load_model('powerlifting_model.h5')
|
||||||
|
|
||||||
data = pd.read_csv('openpowerlifting.csv')
|
data = pd.read_csv('openpowerlifting.csv')
|
||||||
|
|
||||||
data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna()
|
data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna()
|
||||||
|
|
||||||
|
data['Age'] = pd.to_numeric(data['Age'], errors='coerce')
|
||||||
|
data['BodyweightKg'] = pd.to_numeric(data['BodyweightKg'], errors='coerce')
|
||||||
|
data['TotalKg'] = pd.to_numeric(data['TotalKg'], errors='coerce')
|
||||||
|
|
||||||
features = data[['Sex', 'Age', 'BodyweightKg']]
|
features = data[['Sex', 'Age', 'BodyweightKg']]
|
||||||
target = data['TotalKg']
|
target = data['TotalKg']
|
||||||
|
|
||||||
@ -20,8 +27,9 @@ preprocessor = ColumnTransformer(
|
|||||||
('cat', OneHotEncoder(), ['Sex'])
|
('cat', OneHotEncoder(), ['Sex'])
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
X_test_transformed = preprocessor.fit_transform(X_test)
|
|
||||||
|
|
||||||
|
X_test_transformed = preprocessor.fit_transform(X_test)
|
||||||
predictions = loaded_model.predict(X_test_transformed)
|
predictions = loaded_model.predict(X_test_transformed)
|
||||||
predictions_df = pd.DataFrame(predictions, columns=['predicted_TotalKg'])
|
predictions_df = pd.DataFrame(predictions, columns=['predicted_TotalKg'])
|
||||||
|
predictions_df['actual_TotalKg'] = y_test.reset_index(drop=True)
|
||||||
predictions_df.to_csv('powerlifting_test_predictions.csv', index=False)
|
predictions_df.to_csv('powerlifting_test_predictions.csv', index=False)
|
||||||
|
Loading…
Reference in New Issue
Block a user