Update Dockerfile

IUM_06
2024-06-01 17:54:53 +02:00 · 2024-05-04 16:39:48 +02:00 · 2024-05-04 16:23:32 +02:00 · 2024-05-04 16:19:51 +02:00 · 2024-05-04 15:59:35 +02:00 · 2024-05-04 15:54:55 +02:00
64 changed files with 142 additions and 570329 deletions
--- a/.dvc/.gitignore
+++ b/.dvc/.gitignore
@ -1,3 +0,0 @@
-/config.local
-/tmp
-/cache
--- a/.dvc/config
+++ b/.dvc/config
@ -1,4 +0,0 @@
-[core]
-    remote = ium_ssh_remote
-['remote "ium_ssh_remote"']
-    url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
--- a/.dvcignore
+++ b/.dvcignore
@ -1,3 +0,0 @@
-# Add patterns of files dvc should ignore, which could improve
-# the performance. Learn more at
-# https://dvc.org/doc/user-guide/dvcignore
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,6 @@
+creditcardfraud.zip
+creditcard.csv
 data
 model/model.keras
 stats_data
-/creditcard.csv
-/creditcardfraud.zip
+evaluation
--- a/4
+++ b/4
@ -1,5 +1,5 @@
 FROM ubuntu:latest

-RUN apt update && apt install -y python3-pip git
+RUN apt update && apt install -y python3-pip

-RUN pip install pandas numpy scikit-learn tensorflow sacred pymongo --break-system-packages
+RUN pip install pandas numpy scikit-learn tensorflow matplotlib --break-system-packages
--- a/IUM_12.pptx
+++ b/IUM_12.pptx
--- a/79
+++ b/79
@ -1,73 +1,70 @@
 pipeline {
-  agent any
+  agent { 
+    dockerfile true 
+  }
+
+  triggers {
+    upstream(upstreamProjects: 's464913-training/training', threshold: hudson.model.Result.SUCCESS)
+  }
  
  parameters {
-    string (
-      defaultValue: 'vskyper',
-      description: 'Kaggle username',
-      name: 'KAGGLE_USERNAME',
-      trim: false
-    )
-    password (
-      defaultValue: '',
-      description: 'Kaggle API key',
-      name: 'KAGGLE_KEY',
+    buildSelector(
+      defaultSelector: lastSuccessful(),
+      description: 'Which build to use for copying artifacts',
+      name: 'BUILD_SELECTOR'
    )
+    gitParameter branchFilter: 'origin/(.*)', defaultValue: 'training', name: 'BRANCH', type: 'PT_BRANCH'
  }

  stages {
    stage('Clone Repository') {
      steps {
-        git branch: 'main', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git'
+        git branch: 'evaluation', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git'
      }
    }

-    stage('Download dataset') {
+    stage('Copy Artifacts from dataset job') {
      steps {
-        withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
-          sh 'pip install kaggle'
-          sh 'kaggle datasets download -d mlg-ulb/creditcardfraud'
-          sh 'unzip -o creditcardfraud.zip'
-          sh 'rm creditcardfraud.zip'
-        }
+        copyArtifacts filter: 'data/*', projectName: 'z-s464913-create-dataset', selector: buildParameter('BUILD_SELECTOR')
      }
    }

-    stage('Run create-dataset script') {
-      agent {
-        dockerfile {
-          reuseNode true
-        }
-      } 
-
+    stage('Copy Artifacts from training job') {
      steps {
-        sh 'chmod +x create-dataset.py'
-        sh 'python3 ./create-dataset.py'
+        copyArtifacts filter: 'model/*', projectName: 's464913-training/' + params.BRANCH, selector: buildParameter('BUILD_SELECTOR')
      }
    }

-    stage('Archive Artifacts from create-dataset') {
+    stage('Copy Artifacts from evaluation job') {
      steps {
-        archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
+        copyArtifacts filter: 'evaluation/*', projectName: 's464913-evaluation/evaluation', selector: buildParameter('BUILD_SELECTOR'), optional: true
      }
    }

-    stage('Experiments') {
-      agent {
-        dockerfile {
-          reuseNode true
-        }
-      } 
-  
+    stage('Run predict script') {
      steps {
-        sh 'chmod +x sacred/sacred_train_evaluation.py'
-        sh 'python3 sacred/sacred_train_evaluation.py'
+        sh 'chmod +x predict.py'
+        sh 'python3 ./predict.py'
      }
    }

-    stage('Archive Artifacts from Experiments') {
+    stage('Run metrics script') {
      steps {
-        archiveArtifacts artifacts: 'experiments/**/*.*', onlyIfSuccessful: true
+        sh 'chmod +x metrics.py'
+        sh "python3 ./metrics.py ${currentBuild.number}"
+      }
+    }
+
+    stage('Run plot script') {
+      steps {
+        sh 'chmod +x plot.py'
+        sh 'python3 ./plot.py'
+      }
+    }
+
+    stage('Archive Artifacts') {
+      steps {
+        archiveArtifacts artifacts: 'evaluation/*', onlyIfSuccessful: true
      }
    }
  }
--- a/create-dataset.sh
+++ b/create-dataset.sh
@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Install the Kaggle API
+pip install kaggle
+# Download the dataset from Kaggle
+kaggle datasets download -d mlg-ulb/creditcardfraud
+
+# Unzip the dataset
+unzip -o creditcardfraud.zip
+# Remove the zip file
+rm creditcardfraud.zip
+
+# Create a header file
+head -n 1 creditcard.csv > creditcard_header.csv
+# Remove the header from the dataset
+tail -n +2 creditcard.csv > creditcard_no_header.csv
+# Remove the original dataset
+rm creditcard.csv
+
+# Shuffle the dataset
+shuf creditcard_no_header.csv > creditcard_shuf_no_header.csv
+# Remove the unshuffled dataset
+rm creditcard_no_header.csv
+
+# Add the header back to the shuffled dataset
+cat creditcard_header.csv creditcard_shuf_no_header.csv > creditcard_shuf.csv
+
+# Split the dataset into training and testing
+tail -n +10001 creditcard_shuf_no_header.csv > creditcard_train_no_header.csv
+head -n 10000 creditcard_shuf_no_header.csv > creditcard_test_no_header.csv
+
+# Add the header back to the training and testing datasets
+cat creditcard_header.csv creditcard_train_no_header.csv > creditcard_train.csv
+cat creditcard_header.csv creditcard_test_no_header.csv > creditcard_test.csv
+
+# Remove the intermediate files
+rm creditcard_header.csv creditcard_shuf_no_header.csv creditcard_train_no_header.csv creditcard_test_no_header.csv
+
+# Create a directory for the data
+mkdir -p data
+# Move the datasets to the data directory
+mv creditcard_shuf.csv creditcard_train.csv creditcard_test.csv data/
--- a/creditcard.csv.dvc
+++ b/creditcard.csv.dvc
@ -1,5 +0,0 @@
-outs:
- md5: e90efcb83d69faf99fcab8b0255024de
-  size: 150828752
-  hash: md5
-  path: creditcard.csv
--- a/creditcardfraud.zip.dvc
+++ b/creditcardfraud.zip.dvc
@ -1,5 +0,0 @@
-outs:
- md5: bf8e9842731ab6f9b8ab51e1a6741f8b
-  size: 69155672
-  hash: md5
-  path: creditcardfraud.zip
--- a/dataset-stats.sh
+++ b/dataset-stats.sh
@ -0,0 +1,12 @@
+#!/bin/bash
+
+# Count the number of lines in the original dataset
+wc -l < data/creditcard_shuf.csv > stats.txt
+# Count the number of lines in the training and testing datasets
+wc -l < data/creditcard_train.csv > stats_train.txt
+wc -l < data/creditcard_test.csv > stats_test.txt
+
+# Create a directory for the statistics
+mkdir -p stats_data
+# Move the statistics to the stats directory
+mv stats.txt stats_train.txt stats_test.txt stats_data/
--- a/dvc.lock
+++ b/dvc.lock
@ -1,94 +0,0 @@
-schema: '2.0'
-stages:
-  prepare_data:
-    cmd: python ./create-dataset.py
-    deps:
-    - path: create-dataset.py
-      hash: md5
-      md5: 0903460139f5b57b9759f4de37b2d5e4
-      size: 1531
-    - path: creditcard.csv
-      hash: md5
-      md5: e90efcb83d69faf99fcab8b0255024de
-      size: 150828752
-    outs:
-    - path: data/X_test.csv
-      hash: md5
-      md5: 46ff52696af9a4c06f6b25639525dda6
-      size: 30947960
-    - path: data/X_train.csv
-      hash: md5
-      md5: 7505524c54858300bbd92094092a6c39
-      size: 92838653
-    - path: data/X_val.csv
-      hash: md5
-      md5: 4d078882cc1898640ddaf4ad9117f543
-      size: 30946540
-    - path: data/creditcard.csv
-      hash: md5
-      md5: 4b81435690147d1e624a8b06c5520629
-      size: 155302541
-    - path: data/y_test.csv
-      hash: md5
-      md5: a6bc4827feae19934c4021d1f10f5963
-      size: 170893
-    - path: data/y_train.csv
-      hash: md5
-      md5: 8112a5cf4faac882c421bcb7e3d42044
-      size: 512656
-    - path: data/y_val.csv
-      hash: md5
-      md5: 1155f648650986d8866eba603b86560c
-      size: 170893
-  train_model:
-    cmd: python ./train_model.py
-    deps:
-    - path: data/X_train.csv
-      hash: md5
-      md5: 7505524c54858300bbd92094092a6c39
-      size: 92838653
-    - path: data/X_val.csv
-      hash: md5
-      md5: 4d078882cc1898640ddaf4ad9117f543
-      size: 30946540
-    - path: data/y_train.csv
-      hash: md5
-      md5: 8112a5cf4faac882c421bcb7e3d42044
-      size: 512656
-    - path: data/y_val.csv
-      hash: md5
-      md5: 1155f648650986d8866eba603b86560c
-      size: 170893
-    - path: train_model.py
-      hash: md5
-      md5: 00b8bac043f4d7a56dec95f2f1bb1b49
-      size: 1540
-    outs:
-    - path: model/model.keras
-      hash: md5
-      md5: 1d1df55ad26a8c0689efa4a86a86c217
-      size: 1476738
-  evaluate_model:
-    cmd: python ./predict.py
-    deps:
-    - path: data/X_test.csv
-      hash: md5
-      md5: 46ff52696af9a4c06f6b25639525dda6
-      size: 30947960
-    - path: data/y_test.csv
-      hash: md5
-      md5: a6bc4827feae19934c4021d1f10f5963
-      size: 170893
-    - path: model/model.keras
-      hash: md5
-      md5: 1d1df55ad26a8c0689efa4a86a86c217
-      size: 1476738
-    - path: predict.py
-      hash: md5
-      md5: a61388aabf381779b38e2f32a4d0df7b
-      size: 660
-    outs:
-    - path: data/y_pred.csv
-      hash: md5
-      md5: be150c2fbf1914102b479edbe0a4cf43
-      size: 1481012
--- a/dvc.yaml
+++ b/dvc.yaml
@ -1,35 +0,0 @@
-stages:
-  prepare_data:
-    cmd: python ./create-dataset.py
-    deps:
-      - create-dataset.py
-      - creditcard.csv
-    outs:
-      - data/creditcard.csv
-      - data/X_train.csv
-      - data/X_val.csv
-      - data/X_test.csv
-      - data/y_train.csv
-      - data/y_val.csv
-      - data/y_test.csv
-
-  train_model:
-    cmd: python ./train_model.py
-    deps:
-      - train_model.py
-      - data/X_train.csv
-      - data/X_val.csv
-      - data/y_train.csv
-      - data/y_val.csv
-    outs:
-      - model/model.keras
-
-  evaluate_model:
-    cmd: python ./predict.py
-    deps:
-      - predict.py
-      - model/model.keras
-      - data/X_test.csv
-      - data/y_test.csv
-    outs:
-      - data/y_pred.csv
--- a/environment.yml
+++ b/environment.yml
--- a/experiments/708/config.json
+++ b/experiments/708/config.json
@ -1,5 +0,0 @@
-{
-  "epochs": 5,
-  "learning_rate": 0.001,
-  "seed": 7929899
-}
--- a/experiments/708/cout.txt
+++ b/experiments/708/cout.txt
--- a/experiments/708/info.json
+++ b/experiments/708/info.json
@ -1,8 +0,0 @@
-{
-  "metrics": [
-    {
-      "id": "665b3cd5c1ae3ab5cc15d3d9",
-      "name": "accuracy"
-    }
-  ]
-}
--- a/experiments/708/metrics.json
+++ b/experiments/708/metrics.json
@ -1,13 +0,0 @@
-{
-  "accuracy": {
-    "steps": [
-      0
-    ],
-    "timestamps": [
-      "2024-06-01T15:23:02.056704"
-    ],
-    "values": [
-      0.8217821782178217
-    ]
-  }
-}
--- a/experiments/708/model.keras
+++ b/experiments/708/model.keras
--- a/experiments/708/run.json
+++ b/experiments/708/run.json
@ -1,102 +0,0 @@
-{
-  "artifacts": [
-    "model.keras"
-  ],
-  "command": "main",
-  "experiment": {
-    "base_dir": "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\sacred",
-    "dependencies": [
-      "keras==3.1.1",
-      "numpy==1.26.3",
-      "sacred==0.8.5",
-      "scikit-learn==1.4.1.post1"
-    ],
-    "mainfile": "sacred_train_evaluation.py",
-    "name": "464913",
-    "repositories": [
-      {
-        "commit": "cf648b6c128aae353730cdad0c6972df3438c4cd",
-        "dirty": true,
-        "url": "https://git.wmi.amu.edu.pl/s464913/ium_464913.git"
-      }
-    ],
-    "sources": [
-      [
-        "sacred_train_evaluation.py",
-        "_sources\\sacred_train_evaluation_69085ae4bcdbd49594dbaeed1ddb2e93.py"
-      ]
-    ]
-  },
-  "heartbeat": "2024-06-01T15:23:02.067455",
-  "host": {
-    "ENV": {},
-    "cpu": "AMD Ryzen 5 5500U with Radeon Graphics",
-    "hostname": "Dell",
-    "os": [
-      "Windows",
-      "Windows-11-10.0.22631-SP0"
-    ],
-    "python_version": "3.12.3"
-  },
-  "meta": {
-    "command": "main",
-    "config_updates": {},
-    "named_configs": [],
-    "options": {
-      "--beat-interval": null,
-      "--capture": null,
-      "--comment": null,
-      "--debug": false,
-      "--enforce_clean": false,
-      "--file_storage": null,
-      "--force": false,
-      "--help": false,
-      "--id": null,
-      "--loglevel": null,
-      "--mongo_db": null,
-      "--name": null,
-      "--pdb": false,
-      "--print-config": false,
-      "--priority": null,
-      "--queue": false,
-      "--s3": null,
-      "--sql": null,
-      "--tiny_db": null,
-      "--unobserved": false,
-      "COMMAND": null,
-      "UPDATE": [],
-      "help": false,
-      "with": false
-    }
-  },
-  "resources": [
-    [
-      "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_train.csv",
-      "experiments\\_resources\\X_train_7505524c54858300bbd92094092a6c39.csv"
-    ],
-    [
-      "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_val.csv",
-      "experiments\\_resources\\X_val_4d078882cc1898640ddaf4ad9117f543.csv"
-    ],
-    [
-      "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_train.csv",
-      "experiments\\_resources\\y_train_8112a5cf4faac882c421bcb7e3d42044.csv"
-    ],
-    [
-      "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_val.csv",
-      "experiments\\_resources\\y_val_1155f648650986d8866eba603b86560c.csv"
-    ],
-    [
-      "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_test.csv",
-      "experiments\\_resources\\X_test_46ff52696af9a4c06f6b25639525dda6.csv"
-    ],
-    [
-      "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_test.csv",
-      "experiments\\_resources\\y_test_a6bc4827feae19934c4021d1f10f5963.csv"
-    ]
-  ],
-  "result": null,
-  "start_time": "2024-06-01T15:20:05.925811",
-  "status": "COMPLETED",
-  "stop_time": "2024-06-01T15:23:02.065167"
-}
--- a/experiments/_resources/X_test_46ff52696af9a4c06f6b25639525dda6.csv
+++ b/experiments/_resources/X_test_46ff52696af9a4c06f6b25639525dda6.csv
--- a/experiments/_resources/X_train_7505524c54858300bbd92094092a6c39.csv
+++ b/experiments/_resources/X_train_7505524c54858300bbd92094092a6c39.csv
--- a/experiments/_resources/X_val_4d078882cc1898640ddaf4ad9117f543.csv
+++ b/experiments/_resources/X_val_4d078882cc1898640ddaf4ad9117f543.csv
--- a/experiments/_resources/y_test_a6bc4827feae19934c4021d1f10f5963.csv
+++ b/experiments/_resources/y_test_a6bc4827feae19934c4021d1f10f5963.csv
--- a/experiments/_resources/y_train_8112a5cf4faac882c421bcb7e3d42044.csv
+++ b/experiments/_resources/y_train_8112a5cf4faac882c421bcb7e3d42044.csv
--- a/experiments/_resources/y_val_1155f648650986d8866eba603b86560c.csv
+++ b/experiments/_resources/y_val_1155f648650986d8866eba603b86560c.csv
--- a/experiments/_sources/sacred_train_evaluation_69085ae4bcdbd49594dbaeed1ddb2e93.py
+++ b/experiments/_sources/sacred_train_evaluation_69085ae4bcdbd49594dbaeed1ddb2e93.py
@ -1,100 +0,0 @@
-import os
-
-os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
-
-from keras.models import Sequential
-from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
-from keras.optimizers import Adam
-import pandas as pd
-from sklearn.metrics import confusion_matrix
-from sacred import Experiment
-from sacred.observers import FileStorageObserver, MongoObserver
-
-ex = Experiment("464913")
-
-ex.observers.append(
-    MongoObserver.create(
-        url="mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017",
-        db_name="sacred",
-    )
-)
-ex.observers.append(FileStorageObserver("experiments"))
-
-
-@ex.config
-def my_config():
-    learning_rate = 0.001
-    epochs = 5
-
-
-@ex.capture
-def train_and_evaluate(_run, learning_rate, epochs):
-
-    X_train = _run.open_resource("data/X_train.csv")
-    X_val = _run.open_resource("data/X_val.csv")
-    y_train = _run.open_resource("data/y_train.csv")
-    y_val = _run.open_resource("data/y_val.csv")
-
-    X_train = pd.read_csv(X_train)
-    X_val = pd.read_csv(X_val)
-    y_train = pd.read_csv(y_train)
-    y_val = pd.read_csv(y_val)
-
-    X_train = X_train.to_numpy()
-    X_val = X_val.to_numpy()
-    y_train = y_train.to_numpy()
-    y_val = y_val.to_numpy()
-
-    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
-    X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
-
-    model = Sequential(
-        [
-            Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
-            BatchNormalization(),
-            Dropout(0.2),
-            Conv1D(64, 2, activation="relu"),
-            BatchNormalization(),
-            Dropout(0.5),
-            Flatten(),
-            Dense(64, activation="relu"),
-            Dropout(0.5),
-            Dense(1, activation="sigmoid"),
-        ]
-    )
-
-    model.compile(
-        optimizer=Adam(learning_rate=learning_rate),
-        loss="binary_crossentropy",
-        metrics=["accuracy"],
-    )
-
-    model.fit(
-        X_train,
-        y_train,
-        validation_data=(X_val, y_val),
-        epochs=epochs,
-        verbose=1,
-    )
-
-    model.save("sacred/model.keras")
-    _run.add_artifact("sacred/model.keras")
-
-    X_test = _run.open_resource("data/X_test.csv")
-    y_test = _run.open_resource("data/y_test.csv")
-
-    X_test = pd.read_csv(X_test)
-    y_test = pd.read_csv(y_test)
-
-    y_pred = model.predict(X_test)
-    y_pred = y_pred >= 0.5
-
-    cm = confusion_matrix(y_test, y_pred)
-    accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
-
-    _run.log_scalar("accuracy", accuracy)
-
-
-@ex.automain
-def main(learning_rate, epochs):
-    train_and_evaluate()
--- a/metrics.py
+++ b/metrics.py
@ -0,0 +1,19 @@
+from sklearn.metrics import confusion_matrix
+import pandas as pd
+import sys
+
+
+def main():
+    y_test = pd.read_csv("data/y_test.csv")
+    y_pred = pd.read_csv("evaluation/y_pred.csv", header=None)
+    build_number = sys.argv[1]
+
+    cm = confusion_matrix(y_test, y_pred)
+    accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
+
+    with open(r"evaluation/metrics.txt", "a") as f:
+        f.write(f"{accuracy},{build_number}\n")
+
+
+if __name__ == "__main__":
+    main()
--- a/mlflow/MLproject
+++ b/mlflow/MLproject
@ -1,10 +0,0 @@
-name: Credit card fraud MLFlow - s464913
-
-conda_env: conda.yaml
-
-entry_points:
-  main:
-    parameters:
-      learning_rate: { type: float, default: 0.001 }
-      epochs: { type: int, default: 5 }
-    command: 'python mlflow_train_evaluation.py {learning_rate} {epochs}'
--- a/mlflow/conda.yaml
+++ b/mlflow/conda.yaml
@ -1,11 +0,0 @@
-name: Credit card fraud MLFlow - s464913
-channels:
-  - defaults
-dependencies:
-  - python=3.12
-  - pip
-  - pip:
-      - mlflow
-      - tensorflow
-      - pandas
-      - scikit-learn
--- a/mlflow/mlflow_train_evaluation.py
+++ b/mlflow/mlflow_train_evaluation.py
@ -1,82 +0,0 @@
-import os
-
-os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
-
-from keras.models import Sequential
-from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
-from keras.optimizers import Adam
-import pandas as pd
-import sys
-import mlflow
-from sklearn.metrics import confusion_matrix
-
-mlflow.set_tracking_uri("http://localhost:5000")
-
-
-def main():
-    X_train = pd.read_csv("../data/X_train.csv")
-    X_val = pd.read_csv("../data/X_val.csv")
-    y_train = pd.read_csv("../data/y_train.csv")
-    y_val = pd.read_csv("../data/y_val.csv")
-
-    X_train = X_train.to_numpy()
-    X_val = X_val.to_numpy()
-    y_train = y_train.to_numpy()
-    y_val = y_val.to_numpy()
-
-    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
-    X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
-
-    learning_rate = float(sys.argv[1])
-    epochs = int(sys.argv[2])
-
-    with mlflow.start_run() as run:
-        print("MLflow run experiment_id: {0}".format(run.info.experiment_id))
-        print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri))
-
-        model = Sequential(
-            [
-                Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
-                BatchNormalization(),
-                Dropout(0.2),
-                Conv1D(64, 2, activation="relu"),
-                BatchNormalization(),
-                Dropout(0.5),
-                Flatten(),
-                Dense(64, activation="relu"),
-                Dropout(0.5),
-                Dense(1, activation="sigmoid"),
-            ]
-        )
-
-        model.compile(
-            optimizer=Adam(learning_rate=learning_rate),
-            loss="binary_crossentropy",
-            metrics=["accuracy"],
-        )
-
-        model.fit(
-            X_train,
-            y_train,
-            validation_data=(X_val, y_val),
-            epochs=epochs,
-            verbose=1,
-        )
-
-        mlflow.log_param("learning_rate", learning_rate)
-        mlflow.log_param("epochs", epochs)
-
-        X_test = pd.read_csv("../data/X_test.csv")
-        y_test = pd.read_csv("../data/y_test.csv")
-
-        y_pred = model.predict(X_test)
-        y_pred = y_pred >= 0.5
-
-        cm = confusion_matrix(y_test, y_pred)
-        accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
-
-        mlflow.log_metric("accuracy", accuracy)
-
-
-if __name__ == "__main__":
-    main()
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/meta.yaml
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/meta.yaml
@ -1,15 +0,0 @@
-artifact_uri: mlflow-artifacts:/0/3c46f6c4b15743faa0119c4b9b804825/artifacts
-end_time: 1715508788768
-entry_point_name: ''
-experiment_id: '0'
-lifecycle_stage: active
-run_id: 3c46f6c4b15743faa0119c4b9b804825
-run_name: dapper-hog-137
-run_uuid: 3c46f6c4b15743faa0119c4b9b804825
-source_name: ''
-source_type: 4
-source_version: ''
-start_time: 1715508594003
-status: 3
-tags: []
-user_id: skype
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/metrics/accuracy
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/metrics/accuracy
@ -1 +0,0 @@
-1715508787882 0.8217821782178217 0
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/params/epochs
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/params/epochs
@ -1 +0,0 @@
-5
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/params/learning_rate
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/params/learning_rate
@ -1 +0,0 @@
-0.001
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.gitRepoURL
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.gitRepoURL
@ -1 +0,0 @@
-https://git.wmi.amu.edu.pl/s464913/ium_464913.git
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.project.backend
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.project.backend
@ -1 +0,0 @@
-local
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.project.entryPoint
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.project.entryPoint
@ -1 +0,0 @@
-main
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.project.env
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.project.env
@ -1 +0,0 @@
-conda
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.runName
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.runName
@ -1 +0,0 @@
-dapper-hog-137
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.source.git.commit
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.source.git.commit
@ -1 +0,0 @@
-a6be9a729562db8c47bc5fec88ad8f5216af0cf3
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.source.git.repoURL
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.source.git.repoURL
@ -1 +0,0 @@
-https://git.wmi.amu.edu.pl/s464913/ium_464913.git
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.source.name
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.source.name
@ -1 +0,0 @@
-file://C:\Users\skype\source\repos\Inżynieria Uczenia Maszynowego#\mlflow
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.source.type
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.source.type
@ -1 +0,0 @@
-PROJECT
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.user
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.user
@ -1 +0,0 @@
-skype
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/meta.yaml
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/meta.yaml
@ -1,15 +0,0 @@
-artifact_uri: mlflow-artifacts:/0/706dcf453a0842aaa48647e15521bb7b/artifacts
-end_time: 1715508573447
-entry_point_name: ''
-experiment_id: '0'
-lifecycle_stage: active
-run_id: 706dcf453a0842aaa48647e15521bb7b
-run_name: loud-whale-40
-run_uuid: 706dcf453a0842aaa48647e15521bb7b
-source_name: ''
-source_type: 4
-source_version: ''
-start_time: 1715508159092
-status: 3
-tags: []
-user_id: skype
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/metrics/accuracy
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/metrics/accuracy
@ -1 +0,0 @@
-1715508572612 0.7524752475247525 0
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/params/epochs
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/params/epochs
@ -1 +0,0 @@
-7
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/params/learning_rate
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/params/learning_rate
@ -1 +0,0 @@
-0.001
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.gitRepoURL
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.gitRepoURL
@ -1 +0,0 @@
-https://git.wmi.amu.edu.pl/s464913/ium_464913.git
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.project.backend
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.project.backend
@ -1 +0,0 @@
-local
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.project.entryPoint
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.project.entryPoint
@ -1 +0,0 @@
-main
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.project.env
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.project.env
@ -1 +0,0 @@
-conda
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.runName
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.runName
@ -1 +0,0 @@
-loud-whale-40
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.source.git.commit
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.source.git.commit
@ -1 +0,0 @@
-a6be9a729562db8c47bc5fec88ad8f5216af0cf3
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.source.git.repoURL
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.source.git.repoURL
@ -1 +0,0 @@
-https://git.wmi.amu.edu.pl/s464913/ium_464913.git
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.source.name
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.source.name
@ -1 +0,0 @@
-file://C:\Users\skype\source\repos\Inżynieria Uczenia Maszynowego#\mlflow
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.source.type
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.source.type
@ -1 +0,0 @@
-PROJECT
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.user
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.user
@ -1 +0,0 @@
-skype
--- a/mlflow/mlruns/0/meta.yaml
+++ b/mlflow/mlruns/0/meta.yaml
@ -1,6 +0,0 @@
-artifact_location: mlflow-artifacts:/0
-creation_time: 1715508147231
-experiment_id: '0'
-last_update_time: 1715508147231
-lifecycle_stage: active
-name: Default
--- a/plot.py
+++ b/plot.py
@ -0,0 +1,24 @@
+import matplotlib.pyplot as plt
+
+
+def main():
+    accuracy = []
+    build_numbers = []
+
+    with open("evaluation/metrics.txt") as f:
+        for line in f:
+            accuracy.append(float(line.split(",")[0]))
+            build_numbers.append(int(line.split(",")[1]))
+
+    plt.plot(build_numbers, accuracy)
+    plt.xlabel("Build Number")
+    plt.ylabel("Accuracy")
+    plt.title("Accuracy of the model over time")
+    plt.xticks(range(min(build_numbers), max(build_numbers) + 1))
+    plt.show()
+
+    plt.savefig("evaluation/accuracy.png")
+
+
+if __name__ == "__main__":
+    main()
--- a/predict.py
+++ b/predict.py
@ -4,24 +4,18 @@ os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"

 from keras.models import load_model
 import pandas as pd
-from sklearn.metrics import confusion_matrix
 import numpy as np


 def main():
    model = load_model("model/model.keras")
    X_test = pd.read_csv("data/X_test.csv")
-    y_test = pd.read_csv("data/y_test.csv")

    y_pred = model.predict(X_test)
    y_pred = y_pred >= 0.5
-    np.savetxt("data/y_pred.csv", y_pred, delimiter=",")

-    cm = confusion_matrix(y_test, y_pred)
-    print(
-        "Recall metric in the testing dataset: ",
-        cm[1, 1] / (cm[1, 0] + cm[1, 1]),
-    )
+    os.makedirs("evaluation", exist_ok=True)
+    np.savetxt("evaluation/y_pred.csv", y_pred, delimiter=",")


 if __name__ == "__main__":
--- a/sacred/model.keras
+++ b/sacred/model.keras
--- a/sacred/sacred_train_evaluation.py
+++ b/sacred/sacred_train_evaluation.py
@ -1,100 +0,0 @@
-import os
-
-os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
-
-from keras.models import Sequential
-from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
-from keras.optimizers import Adam
-import pandas as pd
-from sklearn.metrics import confusion_matrix
-from sacred import Experiment
-from sacred.observers import FileStorageObserver, MongoObserver
-
-ex = Experiment("464913")
-
-ex.observers.append(
-    MongoObserver.create(
-        url="mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017",
-        db_name="sacred",
-    )
-)
-ex.observers.append(FileStorageObserver("experiments"))
-
-
-@ex.config
-def my_config():
-    learning_rate = 0.001
-    epochs = 5
-
-
-@ex.capture
-def train_and_evaluate(_run, learning_rate, epochs):
-
-    X_train = _run.open_resource("data/X_train.csv")
-    X_val = _run.open_resource("data/X_val.csv")
-    y_train = _run.open_resource("data/y_train.csv")
-    y_val = _run.open_resource("data/y_val.csv")
-
-    X_train = pd.read_csv(X_train)
-    X_val = pd.read_csv(X_val)
-    y_train = pd.read_csv(y_train)
-    y_val = pd.read_csv(y_val)
-
-    X_train = X_train.to_numpy()
-    X_val = X_val.to_numpy()
-    y_train = y_train.to_numpy()
-    y_val = y_val.to_numpy()
-
-    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
-    X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
-
-    model = Sequential(
-        [
-            Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
-            BatchNormalization(),
-            Dropout(0.2),
-            Conv1D(64, 2, activation="relu"),
-            BatchNormalization(),
-            Dropout(0.5),
-            Flatten(),
-            Dense(64, activation="relu"),
-            Dropout(0.5),
-            Dense(1, activation="sigmoid"),
-        ]
-    )
-
-    model.compile(
-        optimizer=Adam(learning_rate=learning_rate),
-        loss="binary_crossentropy",
-        metrics=["accuracy"],
-    )
-
-    model.fit(
-        X_train,
-        y_train,
-        validation_data=(X_val, y_val),
-        epochs=epochs,
-        verbose=1,
-    )
-
-    model.save("sacred/model.keras")
-    _run.add_artifact("sacred/model.keras")
-
-    X_test = _run.open_resource("data/X_test.csv")
-    y_test = _run.open_resource("data/y_test.csv")
-
-    X_test = pd.read_csv(X_test)
-    y_test = pd.read_csv(y_test)
-
-    y_pred = model.predict(X_test)
-    y_pred = y_pred >= 0.5
-
-    cm = confusion_matrix(y_test, y_pred)
-    accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
-
-    _run.log_scalar("accuracy", accuracy)
-
-
-@ex.automain
-def main(learning_rate, epochs):
-    train_and_evaluate()
Author	SHA1	Message	Date
s464913	96e8535023	Update Dockerfile	2024-06-01 17:54:53 +02:00
Mateusz	df42bfcee0	IUM_06	2024-05-04 16:39:48 +02:00
Mateusz	3f95fa102c	IUM_06	2024-05-04 16:23:32 +02:00
Mateusz	0920a59d1f	IUM_06	2024-05-04 16:19:51 +02:00
Mateusz	b1a03b41b0	IUM_06	2024-05-04 15:59:35 +02:00
Mateusz	9d6ffe8205	IUM_06	2024-05-04 15:54:55 +02:00
Mateusz	a8cf8d2829	IUM_06	2024-05-04 15:42:16 +02:00
Mateusz	dace057c96	IUM_06	2024-05-04 15:30:49 +02:00
Mateusz	ee4c1adab2	IUM_06	2024-05-04 15:25:54 +02:00
				`@ -1 +0,0 @@`
				`https://git.wmi.amu.edu.pl/s464913/ium_464913.git`
				`@ -1 +0,0 @@`
				`file://C:\Users\skype\source\repos\Inżynieria Uczenia Maszynowego#\mlflow`