Update Dockerfile

IUM_06
2024-06-01 17:54:53 +02:00 · 2024-05-04 16:39:48 +02:00 · 2024-05-04 16:23:32 +02:00 · 2024-05-04 16:19:51 +02:00 · 2024-05-04 15:59:35 +02:00 · 2024-05-04 15:54:55 +02:00
6 changed files with 93 additions and 39 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,4 +2,5 @@ creditcardfraud.zip
 creditcard.csv
 data
 model/model.keras
-stats_data
+stats_data
+evaluation
--- a/2
+++ b/2
@ -2,4 +2,4 @@ FROM ubuntu:latest

 RUN apt update && apt install -y python3-pip

-RUN pip install pandas numpy scikit-learn tensorflow
+RUN pip install pandas numpy scikit-learn tensorflow matplotlib --break-system-packages
--- a/74
+++ b/74
@ -1,54 +1,70 @@
 pipeline {
-  agent any
+  agent { 
+    dockerfile true 
+  }
+
+  triggers {
+    upstream(upstreamProjects: 's464913-training/training', threshold: hudson.model.Result.SUCCESS)
+  }
  
  parameters {
-    string (
-      defaultValue: 'vskyper',
-      description: 'Kaggle username',
-      name: 'KAGGLE_USERNAME',
-      trim: false
-    )
-    password (
-      defaultValue: '',
-      description: 'Kaggle API key',
-      name: 'KAGGLE_KEY',
+    buildSelector(
+      defaultSelector: lastSuccessful(),
+      description: 'Which build to use for copying artifacts',
+      name: 'BUILD_SELECTOR'
    )
+    gitParameter branchFilter: 'origin/(.*)', defaultValue: 'training', name: 'BRANCH', type: 'PT_BRANCH'
  }

  stages {
    stage('Clone Repository') {
      steps {
-        git branch: 'main', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git'
+        git branch: 'evaluation', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git'
      }
    }

-    stage('Download dataset') {
+    stage('Copy Artifacts from dataset job') {
      steps {
-        withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
-          sh 'pip install kaggle'
-          sh 'kaggle datasets download -d mlg-ulb/creditcardfraud'
-          sh 'unzip -o creditcardfraud.zip'
-          sh 'rm creditcardfraud.zip'
-        }
+        copyArtifacts filter: 'data/*', projectName: 'z-s464913-create-dataset', selector: buildParameter('BUILD_SELECTOR')
      }
    }

-    stage('Run create-dataset script') {
-        agent {
-          dockerfile {
-            reuseNode true
-        }
-      }
-
+    stage('Copy Artifacts from training job') {
      steps {
-        sh 'chmod +x create-dataset.py'
-        sh 'python3 ./create-dataset.py'
+        copyArtifacts filter: 'model/*', projectName: 's464913-training/' + params.BRANCH, selector: buildParameter('BUILD_SELECTOR')
+      }
+    }
+
+    stage('Copy Artifacts from evaluation job') {
+      steps {
+        copyArtifacts filter: 'evaluation/*', projectName: 's464913-evaluation/evaluation', selector: buildParameter('BUILD_SELECTOR'), optional: true
+      }
+    }
+
+    stage('Run predict script') {
+      steps {
+        sh 'chmod +x predict.py'
+        sh 'python3 ./predict.py'
+      }
+    }
+
+    stage('Run metrics script') {
+      steps {
+        sh 'chmod +x metrics.py'
+        sh "python3 ./metrics.py ${currentBuild.number}"
+      }
+    }
+
+    stage('Run plot script') {
+      steps {
+        sh 'chmod +x plot.py'
+        sh 'python3 ./plot.py'
      }
    }

    stage('Archive Artifacts') {
      steps {
-        archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
+        archiveArtifacts artifacts: 'evaluation/*', onlyIfSuccessful: true
      }
    }
  }
--- a/metrics.py
+++ b/metrics.py
@ -0,0 +1,19 @@
+from sklearn.metrics import confusion_matrix
+import pandas as pd
+import sys
+
+
+def main():
+    y_test = pd.read_csv("data/y_test.csv")
+    y_pred = pd.read_csv("evaluation/y_pred.csv", header=None)
+    build_number = sys.argv[1]
+
+    cm = confusion_matrix(y_test, y_pred)
+    accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
+
+    with open(r"evaluation/metrics.txt", "a") as f:
+        f.write(f"{accuracy},{build_number}\n")
+
+
+if __name__ == "__main__":
+    main()
--- a/plot.py
+++ b/plot.py
@ -0,0 +1,24 @@
+import matplotlib.pyplot as plt
+
+
+def main():
+    accuracy = []
+    build_numbers = []
+
+    with open("evaluation/metrics.txt") as f:
+        for line in f:
+            accuracy.append(float(line.split(",")[0]))
+            build_numbers.append(int(line.split(",")[1]))
+
+    plt.plot(build_numbers, accuracy)
+    plt.xlabel("Build Number")
+    plt.ylabel("Accuracy")
+    plt.title("Accuracy of the model over time")
+    plt.xticks(range(min(build_numbers), max(build_numbers) + 1))
+    plt.show()
+
+    plt.savefig("evaluation/accuracy.png")
+
+
+if __name__ == "__main__":
+    main()
--- a/predict.py
+++ b/predict.py
@ -4,24 +4,18 @@ os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"

 from keras.models import load_model
 import pandas as pd
-from sklearn.metrics import confusion_matrix
 import numpy as np


 def main():
    model = load_model("model/model.keras")
    X_test = pd.read_csv("data/X_test.csv")
-    y_test = pd.read_csv("data/y_test.csv")

    y_pred = model.predict(X_test)
    y_pred = y_pred >= 0.5
-    np.savetxt("data/y_pred.csv", y_pred, delimiter=",")

-    cm = confusion_matrix(y_test, y_pred)
-    print(
-        "Recall metric in the testing dataset: ",
-        cm[1, 1] / (cm[1, 0] + cm[1, 1]),
-    )
+    os.makedirs("evaluation", exist_ok=True)
+    np.savetxt("evaluation/y_pred.csv", y_pred, delimiter=",")


 if __name__ == "__main__":
Author	SHA1	Message	Date
s464913	96e8535023	Update Dockerfile	2024-06-01 17:54:53 +02:00
Mateusz	df42bfcee0	IUM_06	2024-05-04 16:39:48 +02:00
Mateusz	3f95fa102c	IUM_06	2024-05-04 16:23:32 +02:00
Mateusz	0920a59d1f	IUM_06	2024-05-04 16:19:51 +02:00
Mateusz	b1a03b41b0	IUM_06	2024-05-04 15:59:35 +02:00
Mateusz	9d6ffe8205	IUM_06	2024-05-04 15:54:55 +02:00
Mateusz	a8cf8d2829	IUM_06	2024-05-04 15:42:16 +02:00
Mateusz	dace057c96	IUM_06	2024-05-04 15:30:49 +02:00
Mateusz	ee4c1adab2	IUM_06	2024-05-04 15:25:54 +02:00