Update Dockerfile

IUM_06
2024-06-01 17:54:53 +02:00 · 2024-05-04 16:39:48 +02:00 · 2024-05-04 16:23:32 +02:00 · 2024-05-04 16:19:51 +02:00 · 2024-05-04 15:59:35 +02:00 · 2024-05-04 15:54:55 +02:00
64 changed files with 142 additions and 570329 deletions
--- a/.dvc/.gitignore
+++ b/.dvc/.gitignore
@ -1,3 +0,0 @@
 /config.local
 /tmp
 /cache
--- a/.dvc/config
+++ b/.dvc/config
@ -1,4 +0,0 @@
 [core]
    remote = ium_ssh_remote
 ['remote "ium_ssh_remote"']
    url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
--- a/.dvcignore
+++ b/.dvcignore
@ -1,3 +0,0 @@
 # Add patterns of files dvc should ignore, which could improve
 # the performance. Learn more at
 # https://dvc.org/doc/user-guide/dvcignore
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,6 @@
 creditcardfraud.zip
 creditcard.csv
 data
 model/model.keras
 stats_data
-/creditcard.csv
+evaluation
 /creditcardfraud.zip
--- a/4
+++ b/4
@ -1,5 +1,5 @@
 FROM ubuntu:latest
-RUN apt update && apt install -y python3-pip git
+RUN apt update && apt install -y python3-pip
-RUN pip install pandas numpy scikit-learn tensorflow sacred pymongo --break-system-packages
+RUN pip install pandas numpy scikit-learn tensorflow matplotlib --break-system-packages
--- a/IUM_12.pptx
+++ b/IUM_12.pptx
--- a/79
+++ b/79
@ -1,73 +1,70 @@
 pipeline {
-  agent any
+  agent { 
    dockerfile true 
  }
  triggers {
    upstream(upstreamProjects: 's464913-training/training', threshold: hudson.model.Result.SUCCESS)
  }
  parameters {
-    string (
+    buildSelector(
-      defaultValue: 'vskyper',
+      defaultSelector: lastSuccessful(),
-      description: 'Kaggle username',
+      description: 'Which build to use for copying artifacts',
-      name: 'KAGGLE_USERNAME',
+      name: 'BUILD_SELECTOR'
      trim: false
    )
    password (
      defaultValue: '',
      description: 'Kaggle API key',
      name: 'KAGGLE_KEY',
    )
    gitParameter branchFilter: 'origin/(.*)', defaultValue: 'training', name: 'BRANCH', type: 'PT_BRANCH'
  }
  stages {
    stage('Clone Repository') {
      steps {
-        git branch: 'main', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git'
+        git branch: 'evaluation', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git'
      }
    }
-    stage('Download dataset') {
+    stage('Copy Artifacts from dataset job') {
      steps {
-        withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
+        copyArtifacts filter: 'data/*', projectName: 'z-s464913-create-dataset', selector: buildParameter('BUILD_SELECTOR')
          sh 'pip install kaggle'
          sh 'kaggle datasets download -d mlg-ulb/creditcardfraud'
          sh 'unzip -o creditcardfraud.zip'
          sh 'rm creditcardfraud.zip'
        }
      }
    }
-    stage('Run create-dataset script') {
+    stage('Copy Artifacts from training job') {
      agent {
        dockerfile {
          reuseNode true
        }
      } 
      steps {
-        sh 'chmod +x create-dataset.py'
+        copyArtifacts filter: 'model/*', projectName: 's464913-training/' + params.BRANCH, selector: buildParameter('BUILD_SELECTOR')
        sh 'python3 ./create-dataset.py'
      }
    }
-    stage('Archive Artifacts from create-dataset') {
+    stage('Copy Artifacts from evaluation job') {
      steps {
-        archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
+        copyArtifacts filter: 'evaluation/*', projectName: 's464913-evaluation/evaluation', selector: buildParameter('BUILD_SELECTOR'), optional: true
      }
    }
-    stage('Experiments') {
+    stage('Run predict script') {
      agent {
        dockerfile {
          reuseNode true
        }
      } 
      steps {
-        sh 'chmod +x sacred/sacred_train_evaluation.py'
+        sh 'chmod +x predict.py'
-        sh 'python3 sacred/sacred_train_evaluation.py'
+        sh 'python3 ./predict.py'
      }
    }
-    stage('Archive Artifacts from Experiments') {
+    stage('Run metrics script') {
      steps {
-        archiveArtifacts artifacts: 'experiments/**/*.*', onlyIfSuccessful: true
+        sh 'chmod +x metrics.py'
        sh "python3 ./metrics.py ${currentBuild.number}"
      }
    }
    stage('Run plot script') {
      steps {
        sh 'chmod +x plot.py'
        sh 'python3 ./plot.py'
      }
    }
    stage('Archive Artifacts') {
      steps {
        archiveArtifacts artifacts: 'evaluation/*', onlyIfSuccessful: true
      }
    }
  }
--- a/create-dataset.sh
+++ b/create-dataset.sh
@ -0,0 +1,42 @@
 #!/bin/bash
 # Install the Kaggle API
 pip install kaggle
 # Download the dataset from Kaggle
 kaggle datasets download -d mlg-ulb/creditcardfraud
 # Unzip the dataset
 unzip -o creditcardfraud.zip
 # Remove the zip file
 rm creditcardfraud.zip
 # Create a header file
 head -n 1 creditcard.csv > creditcard_header.csv
 # Remove the header from the dataset
 tail -n +2 creditcard.csv > creditcard_no_header.csv
 # Remove the original dataset
 rm creditcard.csv
 # Shuffle the dataset
 shuf creditcard_no_header.csv > creditcard_shuf_no_header.csv
 # Remove the unshuffled dataset
 rm creditcard_no_header.csv
 # Add the header back to the shuffled dataset
 cat creditcard_header.csv creditcard_shuf_no_header.csv > creditcard_shuf.csv
 # Split the dataset into training and testing
 tail -n +10001 creditcard_shuf_no_header.csv > creditcard_train_no_header.csv
 head -n 10000 creditcard_shuf_no_header.csv > creditcard_test_no_header.csv
 # Add the header back to the training and testing datasets
 cat creditcard_header.csv creditcard_train_no_header.csv > creditcard_train.csv
 cat creditcard_header.csv creditcard_test_no_header.csv > creditcard_test.csv
 # Remove the intermediate files
 rm creditcard_header.csv creditcard_shuf_no_header.csv creditcard_train_no_header.csv creditcard_test_no_header.csv
 # Create a directory for the data
 mkdir -p data
 # Move the datasets to the data directory
 mv creditcard_shuf.csv creditcard_train.csv creditcard_test.csv data/
--- a/creditcard.csv.dvc
+++ b/creditcard.csv.dvc
@ -1,5 +0,0 @@
 outs:
 - md5: e90efcb83d69faf99fcab8b0255024de
  size: 150828752
  hash: md5
  path: creditcard.csv
--- a/creditcardfraud.zip.dvc
+++ b/creditcardfraud.zip.dvc
@ -1,5 +0,0 @@
 outs:
 - md5: bf8e9842731ab6f9b8ab51e1a6741f8b
  size: 69155672
  hash: md5
  path: creditcardfraud.zip
--- a/dataset-stats.sh
+++ b/dataset-stats.sh
@ -0,0 +1,12 @@
 #!/bin/bash
 # Count the number of lines in the original dataset
 wc -l < data/creditcard_shuf.csv > stats.txt
 # Count the number of lines in the training and testing datasets
 wc -l < data/creditcard_train.csv > stats_train.txt
 wc -l < data/creditcard_test.csv > stats_test.txt
 # Create a directory for the statistics
 mkdir -p stats_data
 # Move the statistics to the stats directory
 mv stats.txt stats_train.txt stats_test.txt stats_data/
--- a/dvc.lock
+++ b/dvc.lock
@ -1,94 +0,0 @@
 schema: '2.0'
 stages:
  prepare_data:
    cmd: python ./create-dataset.py
    deps:
    - path: create-dataset.py
      hash: md5
      md5: 0903460139f5b57b9759f4de37b2d5e4
      size: 1531
    - path: creditcard.csv
      hash: md5
      md5: e90efcb83d69faf99fcab8b0255024de
      size: 150828752
    outs:
    - path: data/X_test.csv
      hash: md5
      md5: 46ff52696af9a4c06f6b25639525dda6
      size: 30947960
    - path: data/X_train.csv
      hash: md5
      md5: 7505524c54858300bbd92094092a6c39
      size: 92838653
    - path: data/X_val.csv
      hash: md5
      md5: 4d078882cc1898640ddaf4ad9117f543
      size: 30946540
    - path: data/creditcard.csv
      hash: md5
      md5: 4b81435690147d1e624a8b06c5520629
      size: 155302541
    - path: data/y_test.csv
      hash: md5
      md5: a6bc4827feae19934c4021d1f10f5963
      size: 170893
    - path: data/y_train.csv
      hash: md5
      md5: 8112a5cf4faac882c421bcb7e3d42044
      size: 512656
    - path: data/y_val.csv
      hash: md5
      md5: 1155f648650986d8866eba603b86560c
      size: 170893
  train_model:
    cmd: python ./train_model.py
    deps:
    - path: data/X_train.csv
      hash: md5
      md5: 7505524c54858300bbd92094092a6c39
      size: 92838653
    - path: data/X_val.csv
      hash: md5
      md5: 4d078882cc1898640ddaf4ad9117f543
      size: 30946540
    - path: data/y_train.csv
      hash: md5
      md5: 8112a5cf4faac882c421bcb7e3d42044
      size: 512656
    - path: data/y_val.csv
      hash: md5
      md5: 1155f648650986d8866eba603b86560c
      size: 170893
    - path: train_model.py
      hash: md5
      md5: 00b8bac043f4d7a56dec95f2f1bb1b49
      size: 1540
    outs:
    - path: model/model.keras
      hash: md5
      md5: 1d1df55ad26a8c0689efa4a86a86c217
      size: 1476738
  evaluate_model:
    cmd: python ./predict.py
    deps:
    - path: data/X_test.csv
      hash: md5
      md5: 46ff52696af9a4c06f6b25639525dda6
      size: 30947960
    - path: data/y_test.csv
      hash: md5
      md5: a6bc4827feae19934c4021d1f10f5963
      size: 170893
    - path: model/model.keras
      hash: md5
      md5: 1d1df55ad26a8c0689efa4a86a86c217
      size: 1476738
    - path: predict.py
      hash: md5
      md5: a61388aabf381779b38e2f32a4d0df7b
      size: 660
    outs:
    - path: data/y_pred.csv
      hash: md5
      md5: be150c2fbf1914102b479edbe0a4cf43
      size: 1481012
--- a/dvc.yaml
+++ b/dvc.yaml
@ -1,35 +0,0 @@
 stages:
  prepare_data:
    cmd: python ./create-dataset.py
    deps:
      - create-dataset.py
      - creditcard.csv
    outs:
      - data/creditcard.csv
      - data/X_train.csv
      - data/X_val.csv
      - data/X_test.csv
      - data/y_train.csv
      - data/y_val.csv
      - data/y_test.csv
  train_model:
    cmd: python ./train_model.py
    deps:
      - train_model.py
      - data/X_train.csv
      - data/X_val.csv
      - data/y_train.csv
      - data/y_val.csv
    outs:
      - model/model.keras
  evaluate_model:
    cmd: python ./predict.py
    deps:
      - predict.py
      - model/model.keras
      - data/X_test.csv
      - data/y_test.csv
    outs:
      - data/y_pred.csv
--- a/environment.yml
+++ b/environment.yml
--- a/experiments/708/config.json
+++ b/experiments/708/config.json
@ -1,5 +0,0 @@
 {
  "epochs": 5,
  "learning_rate": 0.001,
  "seed": 7929899
 }
--- a/experiments/708/cout.txt
+++ b/experiments/708/cout.txt
--- a/experiments/708/info.json
+++ b/experiments/708/info.json
@ -1,8 +0,0 @@
 {
  "metrics": [
    {
      "id": "665b3cd5c1ae3ab5cc15d3d9",
      "name": "accuracy"
    }
  ]
 }
--- a/experiments/708/metrics.json
+++ b/experiments/708/metrics.json
@ -1,13 +0,0 @@
 {
  "accuracy": {
    "steps": [
      0
    ],
    "timestamps": [
      "2024-06-01T15:23:02.056704"
    ],
    "values": [
      0.8217821782178217
    ]
  }
 }
--- a/experiments/708/model.keras
+++ b/experiments/708/model.keras
--- a/experiments/708/run.json
+++ b/experiments/708/run.json
@ -1,102 +0,0 @@
 {
  "artifacts": [
    "model.keras"
  ],
  "command": "main",
  "experiment": {
    "base_dir": "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\sacred",
    "dependencies": [
      "keras==3.1.1",
      "numpy==1.26.3",
      "sacred==0.8.5",
      "scikit-learn==1.4.1.post1"
    ],
    "mainfile": "sacred_train_evaluation.py",
    "name": "464913",
    "repositories": [
      {
        "commit": "cf648b6c128aae353730cdad0c6972df3438c4cd",
        "dirty": true,
        "url": "https://git.wmi.amu.edu.pl/s464913/ium_464913.git"
      }
    ],
    "sources": [
      [
        "sacred_train_evaluation.py",
        "_sources\\sacred_train_evaluation_69085ae4bcdbd49594dbaeed1ddb2e93.py"
      ]
    ]
  },
  "heartbeat": "2024-06-01T15:23:02.067455",
  "host": {
    "ENV": {},
    "cpu": "AMD Ryzen 5 5500U with Radeon Graphics",
    "hostname": "Dell",
    "os": [
      "Windows",
      "Windows-11-10.0.22631-SP0"
    ],
    "python_version": "3.12.3"
  },
  "meta": {
    "command": "main",
    "config_updates": {},
    "named_configs": [],
    "options": {
      "--beat-interval": null,
      "--capture": null,
      "--comment": null,
      "--debug": false,
      "--enforce_clean": false,
      "--file_storage": null,
      "--force": false,
      "--help": false,
      "--id": null,
      "--loglevel": null,
      "--mongo_db": null,
      "--name": null,
      "--pdb": false,
      "--print-config": false,
      "--priority": null,
      "--queue": false,
      "--s3": null,
      "--sql": null,
      "--tiny_db": null,
      "--unobserved": false,
      "COMMAND": null,
      "UPDATE": [],
      "help": false,
      "with": false
    }
  },
  "resources": [
    [
      "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_train.csv",
      "experiments\\_resources\\X_train_7505524c54858300bbd92094092a6c39.csv"
    ],
    [
      "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_val.csv",
      "experiments\\_resources\\X_val_4d078882cc1898640ddaf4ad9117f543.csv"
    ],
    [
      "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_train.csv",
      "experiments\\_resources\\y_train_8112a5cf4faac882c421bcb7e3d42044.csv"
    ],
    [
      "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_val.csv",
      "experiments\\_resources\\y_val_1155f648650986d8866eba603b86560c.csv"
    ],
    [
      "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_test.csv",
      "experiments\\_resources\\X_test_46ff52696af9a4c06f6b25639525dda6.csv"
    ],
    [
      "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_test.csv",
      "experiments\\_resources\\y_test_a6bc4827feae19934c4021d1f10f5963.csv"
    ]
  ],
  "result": null,
  "start_time": "2024-06-01T15:20:05.925811",
  "status": "COMPLETED",
  "stop_time": "2024-06-01T15:23:02.065167"
 }
--- a/experiments/_resources/X_test_46ff52696af9a4c06f6b25639525dda6.csv
+++ b/experiments/_resources/X_test_46ff52696af9a4c06f6b25639525dda6.csv
--- a/experiments/_resources/X_train_7505524c54858300bbd92094092a6c39.csv
+++ b/experiments/_resources/X_train_7505524c54858300bbd92094092a6c39.csv
--- a/experiments/_resources/X_val_4d078882cc1898640ddaf4ad9117f543.csv
+++ b/experiments/_resources/X_val_4d078882cc1898640ddaf4ad9117f543.csv
--- a/experiments/_resources/y_test_a6bc4827feae19934c4021d1f10f5963.csv
+++ b/experiments/_resources/y_test_a6bc4827feae19934c4021d1f10f5963.csv
--- a/experiments/_resources/y_train_8112a5cf4faac882c421bcb7e3d42044.csv
+++ b/experiments/_resources/y_train_8112a5cf4faac882c421bcb7e3d42044.csv
--- a/experiments/_resources/y_val_1155f648650986d8866eba603b86560c.csv
+++ b/experiments/_resources/y_val_1155f648650986d8866eba603b86560c.csv
--- a/experiments/_sources/sacred_train_evaluation_69085ae4bcdbd49594dbaeed1ddb2e93.py
+++ b/experiments/_sources/sacred_train_evaluation_69085ae4bcdbd49594dbaeed1ddb2e93.py
@ -1,100 +0,0 @@
 import os
 os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
 from keras.models import Sequential
 from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
 from keras.optimizers import Adam
 import pandas as pd
 from sklearn.metrics import confusion_matrix
 from sacred import Experiment
 from sacred.observers import FileStorageObserver, MongoObserver
 ex = Experiment("464913")
 ex.observers.append(
    MongoObserver.create(
        url="mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017",
        db_name="sacred",
    )
 )
 ex.observers.append(FileStorageObserver("experiments"))
@ex.config
 def my_config():
    learning_rate = 0.001
    epochs = 5
@ex.capture
 def train_and_evaluate(_run, learning_rate, epochs):
    X_train = _run.open_resource("data/X_train.csv")
    X_val = _run.open_resource("data/X_val.csv")
    y_train = _run.open_resource("data/y_train.csv")
    y_val = _run.open_resource("data/y_val.csv")
    X_train = pd.read_csv(X_train)
    X_val = pd.read_csv(X_val)
    y_train = pd.read_csv(y_train)
    y_val = pd.read_csv(y_val)
    X_train = X_train.to_numpy()
    X_val = X_val.to_numpy()
    y_train = y_train.to_numpy()
    y_val = y_val.to_numpy()
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
    X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
    model = Sequential(
        [
            Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
            BatchNormalization(),
            Dropout(0.2),
            Conv1D(64, 2, activation="relu"),
            BatchNormalization(),
            Dropout(0.5),
            Flatten(),
            Dense(64, activation="relu"),
            Dropout(0.5),
            Dense(1, activation="sigmoid"),
        ]
    )
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss="binary_crossentropy",
        metrics=["accuracy"],
    )
    model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        epochs=epochs,
        verbose=1,
    )
    model.save("sacred/model.keras")
    _run.add_artifact("sacred/model.keras")
    X_test = _run.open_resource("data/X_test.csv")
    y_test = _run.open_resource("data/y_test.csv")
    X_test = pd.read_csv(X_test)
    y_test = pd.read_csv(y_test)
    y_pred = model.predict(X_test)
    y_pred = y_pred >= 0.5
    cm = confusion_matrix(y_test, y_pred)
    accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
    _run.log_scalar("accuracy", accuracy)
@ex.automain
 def main(learning_rate, epochs):
    train_and_evaluate()
--- a/metrics.py
+++ b/metrics.py
@ -0,0 +1,19 @@
 from sklearn.metrics import confusion_matrix
 import pandas as pd
 import sys
 def main():
    y_test = pd.read_csv("data/y_test.csv")
    y_pred = pd.read_csv("evaluation/y_pred.csv", header=None)
    build_number = sys.argv[1]
    cm = confusion_matrix(y_test, y_pred)
    accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
    with open(r"evaluation/metrics.txt", "a") as f:
        f.write(f"{accuracy},{build_number}\n")
 if __name__ == "__main__":
    main()
--- a/mlflow/MLproject
+++ b/mlflow/MLproject
@ -1,10 +0,0 @@
 name: Credit card fraud MLFlow - s464913
 conda_env: conda.yaml
 entry_points:
  main:
    parameters:
      learning_rate: { type: float, default: 0.001 }
      epochs: { type: int, default: 5 }
    command: 'python mlflow_train_evaluation.py {learning_rate} {epochs}'
--- a/mlflow/conda.yaml
+++ b/mlflow/conda.yaml
@ -1,11 +0,0 @@
 name: Credit card fraud MLFlow - s464913
 channels:
  - defaults
 dependencies:
  - python=3.12
  - pip
  - pip:
      - mlflow
      - tensorflow
      - pandas
      - scikit-learn
--- a/mlflow/mlflow_train_evaluation.py
+++ b/mlflow/mlflow_train_evaluation.py
@ -1,82 +0,0 @@
 import os
 os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
 from keras.models import Sequential
 from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
 from keras.optimizers import Adam
 import pandas as pd
 import sys
 import mlflow
 from sklearn.metrics import confusion_matrix
 mlflow.set_tracking_uri("http://localhost:5000")
 def main():
    X_train = pd.read_csv("../data/X_train.csv")
    X_val = pd.read_csv("../data/X_val.csv")
    y_train = pd.read_csv("../data/y_train.csv")
    y_val = pd.read_csv("../data/y_val.csv")
    X_train = X_train.to_numpy()
    X_val = X_val.to_numpy()
    y_train = y_train.to_numpy()
    y_val = y_val.to_numpy()
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
    X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
    learning_rate = float(sys.argv[1])
    epochs = int(sys.argv[2])
    with mlflow.start_run() as run:
        print("MLflow run experiment_id: {0}".format(run.info.experiment_id))
        print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri))
        model = Sequential(
            [
                Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
                BatchNormalization(),
                Dropout(0.2),
                Conv1D(64, 2, activation="relu"),
                BatchNormalization(),
                Dropout(0.5),
                Flatten(),
                Dense(64, activation="relu"),
                Dropout(0.5),
                Dense(1, activation="sigmoid"),
            ]
        )
        model.compile(
            optimizer=Adam(learning_rate=learning_rate),
            loss="binary_crossentropy",
            metrics=["accuracy"],
        )
        model.fit(
            X_train,
            y_train,
            validation_data=(X_val, y_val),
            epochs=epochs,
            verbose=1,
        )
        mlflow.log_param("learning_rate", learning_rate)
        mlflow.log_param("epochs", epochs)
        X_test = pd.read_csv("../data/X_test.csv")
        y_test = pd.read_csv("../data/y_test.csv")
        y_pred = model.predict(X_test)
        y_pred = y_pred >= 0.5
        cm = confusion_matrix(y_test, y_pred)
        accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
        mlflow.log_metric("accuracy", accuracy)
 if __name__ == "__main__":
    main()
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/meta.yaml
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/meta.yaml
@ -1,15 +0,0 @@
 artifact_uri: mlflow-artifacts:/0/3c46f6c4b15743faa0119c4b9b804825/artifacts
 end_time: 1715508788768
 entry_point_name: ''
 experiment_id: '0'
 lifecycle_stage: active
 run_id: 3c46f6c4b15743faa0119c4b9b804825
 run_name: dapper-hog-137
 run_uuid: 3c46f6c4b15743faa0119c4b9b804825
 source_name: ''
 source_type: 4
 source_version: ''
 start_time: 1715508594003
 status: 3
 tags: []
 user_id: skype
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/metrics/accuracy
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/metrics/accuracy
@ -1 +0,0 @@
 1715508787882 0.8217821782178217 0
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/params/epochs
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/params/epochs
@ -1 +0,0 @@
 5
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/params/learning_rate
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/params/learning_rate
@ -1 +0,0 @@
 0.001
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.gitRepoURL
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.gitRepoURL
@ -1 +0,0 @@
 https://git.wmi.amu.edu.pl/s464913/ium_464913.git
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.project.backend
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.project.backend
@ -1 +0,0 @@
 local
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.project.entryPoint
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.project.entryPoint
@ -1 +0,0 @@
 main
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.project.env
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.project.env
@ -1 +0,0 @@
 conda
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.runName
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.runName
@ -1 +0,0 @@
 dapper-hog-137
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.source.git.commit
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.source.git.commit
@ -1 +0,0 @@
 a6be9a729562db8c47bc5fec88ad8f5216af0cf3
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.source.git.repoURL
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.source.git.repoURL
@ -1 +0,0 @@
 https://git.wmi.amu.edu.pl/s464913/ium_464913.git
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.source.name
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.source.name
@ -1 +0,0 @@
 file://C:\Users\skype\source\repos\Inżynieria Uczenia Maszynowego#\mlflow
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.source.type
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.source.type
@ -1 +0,0 @@
 PROJECT
--- a/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.user
+++ b/mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/tags/mlflow.user
@ -1 +0,0 @@
 skype
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/meta.yaml
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/meta.yaml
@ -1,15 +0,0 @@
 artifact_uri: mlflow-artifacts:/0/706dcf453a0842aaa48647e15521bb7b/artifacts
 end_time: 1715508573447
 entry_point_name: ''
 experiment_id: '0'
 lifecycle_stage: active
 run_id: 706dcf453a0842aaa48647e15521bb7b
 run_name: loud-whale-40
 run_uuid: 706dcf453a0842aaa48647e15521bb7b
 source_name: ''
 source_type: 4
 source_version: ''
 start_time: 1715508159092
 status: 3
 tags: []
 user_id: skype
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/metrics/accuracy
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/metrics/accuracy
@ -1 +0,0 @@
 1715508572612 0.7524752475247525 0
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/params/epochs
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/params/epochs
@ -1 +0,0 @@
 7
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/params/learning_rate
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/params/learning_rate
@ -1 +0,0 @@
 0.001
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.gitRepoURL
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.gitRepoURL
@ -1 +0,0 @@
 https://git.wmi.amu.edu.pl/s464913/ium_464913.git
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.project.backend
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.project.backend
@ -1 +0,0 @@
 local
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.project.entryPoint
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.project.entryPoint
@ -1 +0,0 @@
 main
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.project.env
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.project.env
@ -1 +0,0 @@
 conda
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.runName
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.runName
@ -1 +0,0 @@
 loud-whale-40
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.source.git.commit
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.source.git.commit
@ -1 +0,0 @@
 a6be9a729562db8c47bc5fec88ad8f5216af0cf3
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.source.git.repoURL
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.source.git.repoURL
@ -1 +0,0 @@
 https://git.wmi.amu.edu.pl/s464913/ium_464913.git
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.source.name
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.source.name
@ -1 +0,0 @@
 file://C:\Users\skype\source\repos\Inżynieria Uczenia Maszynowego#\mlflow
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.source.type
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.source.type
@ -1 +0,0 @@
 PROJECT
--- a/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.user
+++ b/mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/tags/mlflow.user
@ -1 +0,0 @@
 skype
--- a/mlflow/mlruns/0/meta.yaml
+++ b/mlflow/mlruns/0/meta.yaml
@ -1,6 +0,0 @@
 artifact_location: mlflow-artifacts:/0
 creation_time: 1715508147231
 experiment_id: '0'
 last_update_time: 1715508147231
 lifecycle_stage: active
 name: Default
--- a/plot.py
+++ b/plot.py
@ -0,0 +1,24 @@
 import matplotlib.pyplot as plt
 def main():
    accuracy = []
    build_numbers = []
    with open("evaluation/metrics.txt") as f:
        for line in f:
            accuracy.append(float(line.split(",")[0]))
            build_numbers.append(int(line.split(",")[1]))
    plt.plot(build_numbers, accuracy)
    plt.xlabel("Build Number")
    plt.ylabel("Accuracy")
    plt.title("Accuracy of the model over time")
    plt.xticks(range(min(build_numbers), max(build_numbers) + 1))
    plt.show()
    plt.savefig("evaluation/accuracy.png")
 if __name__ == "__main__":
    main()
--- a/predict.py
+++ b/predict.py
@ -4,24 +4,18 @@ os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
 from keras.models import load_model
 import pandas as pd
 from sklearn.metrics import confusion_matrix
 import numpy as np
 def main():
    model = load_model("model/model.keras")
    X_test = pd.read_csv("data/X_test.csv")
    y_test = pd.read_csv("data/y_test.csv")
    y_pred = model.predict(X_test)
    y_pred = y_pred >= 0.5
    np.savetxt("data/y_pred.csv", y_pred, delimiter=",")
-    cm = confusion_matrix(y_test, y_pred)
+    os.makedirs("evaluation", exist_ok=True)
-    print(
+    np.savetxt("evaluation/y_pred.csv", y_pred, delimiter=",")
        "Recall metric in the testing dataset: ",
        cm[1, 1] / (cm[1, 0] + cm[1, 1]),
    )
 if __name__ == "__main__":
--- a/sacred/model.keras
+++ b/sacred/model.keras
--- a/sacred/sacred_train_evaluation.py
+++ b/sacred/sacred_train_evaluation.py
@ -1,100 +0,0 @@
 import os
 os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
 from keras.models import Sequential
 from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
 from keras.optimizers import Adam
 import pandas as pd
 from sklearn.metrics import confusion_matrix
 from sacred import Experiment
 from sacred.observers import FileStorageObserver, MongoObserver
 ex = Experiment("464913")
 ex.observers.append(
    MongoObserver.create(
        url="mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017",
        db_name="sacred",
    )
 )
 ex.observers.append(FileStorageObserver("experiments"))
@ex.config
 def my_config():
    learning_rate = 0.001
    epochs = 5
@ex.capture
 def train_and_evaluate(_run, learning_rate, epochs):
    X_train = _run.open_resource("data/X_train.csv")
    X_val = _run.open_resource("data/X_val.csv")
    y_train = _run.open_resource("data/y_train.csv")
    y_val = _run.open_resource("data/y_val.csv")
    X_train = pd.read_csv(X_train)
    X_val = pd.read_csv(X_val)
    y_train = pd.read_csv(y_train)
    y_val = pd.read_csv(y_val)
    X_train = X_train.to_numpy()
    X_val = X_val.to_numpy()
    y_train = y_train.to_numpy()
    y_val = y_val.to_numpy()
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
    X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
    model = Sequential(
        [
            Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
            BatchNormalization(),
            Dropout(0.2),
            Conv1D(64, 2, activation="relu"),
            BatchNormalization(),
            Dropout(0.5),
            Flatten(),
            Dense(64, activation="relu"),
            Dropout(0.5),
            Dense(1, activation="sigmoid"),
        ]
    )
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss="binary_crossentropy",
        metrics=["accuracy"],
    )
    model.fit(
        X_train,
        y_train,
        validation_data=(X_val, y_val),
        epochs=epochs,
        verbose=1,
    )
    model.save("sacred/model.keras")
    _run.add_artifact("sacred/model.keras")
    X_test = _run.open_resource("data/X_test.csv")
    y_test = _run.open_resource("data/y_test.csv")
    X_test = pd.read_csv(X_test)
    y_test = pd.read_csv(y_test)
    y_pred = model.predict(X_test)
    y_pred = y_pred >= 0.5
    cm = confusion_matrix(y_test, y_pred)
    accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
    _run.log_scalar("accuracy", accuracy)
@ex.automain
 def main(learning_rate, epochs):
    train_and_evaluate()
Author	SHA1	Message	Date
s464913	96e8535023	Update Dockerfile	2024-06-01 17:54:53 +02:00
Mateusz	df42bfcee0	IUM_06	2024-05-04 16:39:48 +02:00
Mateusz	3f95fa102c	IUM_06	2024-05-04 16:23:32 +02:00
Mateusz	0920a59d1f	IUM_06	2024-05-04 16:19:51 +02:00
Mateusz	b1a03b41b0	IUM_06	2024-05-04 15:59:35 +02:00
Mateusz	9d6ffe8205	IUM_06	2024-05-04 15:54:55 +02:00
Mateusz	a8cf8d2829	IUM_06	2024-05-04 15:42:16 +02:00
Mateusz	dace057c96	IUM_06	2024-05-04 15:30:49 +02:00
Mateusz	ee4c1adab2	IUM_06	2024-05-04 15:25:54 +02:00
		`@ -1 +0,0 @@`
			`https://git.wmi.amu.edu.pl/s464913/ium_464913.git`
		`@ -1 +0,0 @@`
			`file://C:\Users\skype\source\repos\Inżynieria Uczenia Maszynowego#\mlflow`