Compare commits

...

9 Commits

Author SHA1 Message Date
96e8535023 Update Dockerfile 2024-06-01 17:54:53 +02:00
df42bfcee0 IUM_06 2024-05-04 16:39:48 +02:00
3f95fa102c IUM_06 2024-05-04 16:23:32 +02:00
0920a59d1f IUM_06 2024-05-04 16:19:51 +02:00
b1a03b41b0 IUM_06 2024-05-04 15:59:35 +02:00
9d6ffe8205 IUM_06 2024-05-04 15:54:55 +02:00
a8cf8d2829 IUM_06 2024-05-04 15:42:16 +02:00
dace057c96 IUM_06 2024-05-04 15:30:49 +02:00
ee4c1adab2 IUM_06 2024-05-04 15:25:54 +02:00
6 changed files with 93 additions and 39 deletions

1
.gitignore vendored
View File

@ -3,3 +3,4 @@ creditcard.csv
data data
model/model.keras model/model.keras
stats_data stats_data
evaluation

View File

@ -2,4 +2,4 @@ FROM ubuntu:latest
RUN apt update && apt install -y python3-pip RUN apt update && apt install -y python3-pip
RUN pip install pandas numpy scikit-learn tensorflow RUN pip install pandas numpy scikit-learn tensorflow matplotlib --break-system-packages

74
Jenkinsfile vendored
View File

@ -1,54 +1,70 @@
pipeline { pipeline {
agent any agent {
dockerfile true
}
triggers {
upstream(upstreamProjects: 's464913-training/training', threshold: hudson.model.Result.SUCCESS)
}
parameters { parameters {
string ( buildSelector(
defaultValue: 'vskyper', defaultSelector: lastSuccessful(),
description: 'Kaggle username', description: 'Which build to use for copying artifacts',
name: 'KAGGLE_USERNAME', name: 'BUILD_SELECTOR'
trim: false
)
password (
defaultValue: '',
description: 'Kaggle API key',
name: 'KAGGLE_KEY',
) )
gitParameter branchFilter: 'origin/(.*)', defaultValue: 'training', name: 'BRANCH', type: 'PT_BRANCH'
} }
stages { stages {
stage('Clone Repository') { stage('Clone Repository') {
steps { steps {
git branch: 'main', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git' git branch: 'evaluation', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git'
} }
} }
stage('Download dataset') { stage('Copy Artifacts from dataset job') {
steps { steps {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) { copyArtifacts filter: 'data/*', projectName: 'z-s464913-create-dataset', selector: buildParameter('BUILD_SELECTOR')
sh 'pip install kaggle'
sh 'kaggle datasets download -d mlg-ulb/creditcardfraud'
sh 'unzip -o creditcardfraud.zip'
sh 'rm creditcardfraud.zip'
}
}
}
stage('Run create-dataset script') {
agent {
dockerfile {
reuseNode true
} }
} }
stage('Copy Artifacts from training job') {
steps { steps {
sh 'chmod +x create-dataset.py' copyArtifacts filter: 'model/*', projectName: 's464913-training/' + params.BRANCH, selector: buildParameter('BUILD_SELECTOR')
sh 'python3 ./create-dataset.py' }
}
stage('Copy Artifacts from evaluation job') {
steps {
copyArtifacts filter: 'evaluation/*', projectName: 's464913-evaluation/evaluation', selector: buildParameter('BUILD_SELECTOR'), optional: true
}
}
stage('Run predict script') {
steps {
sh 'chmod +x predict.py'
sh 'python3 ./predict.py'
}
}
stage('Run metrics script') {
steps {
sh 'chmod +x metrics.py'
sh "python3 ./metrics.py ${currentBuild.number}"
}
}
stage('Run plot script') {
steps {
sh 'chmod +x plot.py'
sh 'python3 ./plot.py'
} }
} }
stage('Archive Artifacts') { stage('Archive Artifacts') {
steps { steps {
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true archiveArtifacts artifacts: 'evaluation/*', onlyIfSuccessful: true
} }
} }
} }

19
metrics.py Normal file
View File

@ -0,0 +1,19 @@
from sklearn.metrics import confusion_matrix
import pandas as pd
import sys
def main():
y_test = pd.read_csv("data/y_test.csv")
y_pred = pd.read_csv("evaluation/y_pred.csv", header=None)
build_number = sys.argv[1]
cm = confusion_matrix(y_test, y_pred)
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
with open(r"evaluation/metrics.txt", "a") as f:
f.write(f"{accuracy},{build_number}\n")
if __name__ == "__main__":
main()

24
plot.py Normal file
View File

@ -0,0 +1,24 @@
import matplotlib.pyplot as plt
def main():
accuracy = []
build_numbers = []
with open("evaluation/metrics.txt") as f:
for line in f:
accuracy.append(float(line.split(",")[0]))
build_numbers.append(int(line.split(",")[1]))
plt.plot(build_numbers, accuracy)
plt.xlabel("Build Number")
plt.ylabel("Accuracy")
plt.title("Accuracy of the model over time")
plt.xticks(range(min(build_numbers), max(build_numbers) + 1))
plt.show()
plt.savefig("evaluation/accuracy.png")
if __name__ == "__main__":
main()

View File

@ -4,24 +4,18 @@ os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
from keras.models import load_model from keras.models import load_model
import pandas as pd import pandas as pd
from sklearn.metrics import confusion_matrix
import numpy as np import numpy as np
def main(): def main():
model = load_model("model/model.keras") model = load_model("model/model.keras")
X_test = pd.read_csv("data/X_test.csv") X_test = pd.read_csv("data/X_test.csv")
y_test = pd.read_csv("data/y_test.csv")
y_pred = model.predict(X_test) y_pred = model.predict(X_test)
y_pred = y_pred >= 0.5 y_pred = y_pred >= 0.5
np.savetxt("data/y_pred.csv", y_pred, delimiter=",")
cm = confusion_matrix(y_test, y_pred) os.makedirs("evaluation", exist_ok=True)
print( np.savetxt("evaluation/y_pred.csv", y_pred, delimiter=",")
"Recall metric in the testing dataset: ",
cm[1, 1] / (cm[1, 0] + cm[1, 1]),
)
if __name__ == "__main__": if __name__ == "__main__":