IUM_7

2024-06-01 17:24:14 +02:00 · 2024-06-01 17:24:14 +02:00 · cc56865cc1
commit cc56865cc1
parent cf648b6c12
19 changed files with 569977 additions and 56 deletions
--- a/2
+++ b/2
@ -2,4 +2,4 @@ FROM ubuntu:latest

 RUN apt update && apt install -y python3-pip

-RUN pip install pandas numpy scikit-learn tensorflow
+RUN pip install pandas numpy scikit-learn tensorflow sacred pymongo --break-system-packages
--- a/15
+++ b/15
@ -46,10 +46,23 @@ pipeline {
      }
    }

-    stage('Archive Artifacts') {
+    stage('Archive Artifacts from create-dataset') {
      steps {
        archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
      }
    }
+
+    stage('Experiments') {
+      steps {
+        sh 'chmod +x sacred/sacred_train_evaluation.py'
+        sh 'python3 sacred/sacred_train_evaluation.py'
+      }
+    }
+
+    stage('Archive Artifacts from Experiments') {
+      steps {
+        archiveArtifacts artifacts: 'experiments/*', onlyIfSuccessful: true
+      }
+    }
  }
 }
--- a/create-dataset.sh
+++ b/create-dataset.sh
@ -1,42 +0,0 @@
-#!/bin/bash
-
-# Install the Kaggle API
-pip install kaggle
-# Download the dataset from Kaggle
-kaggle datasets download -d mlg-ulb/creditcardfraud
-
-# Unzip the dataset
-unzip -o creditcardfraud.zip
-# Remove the zip file
-rm creditcardfraud.zip
-
-# Create a header file
-head -n 1 creditcard.csv > creditcard_header.csv
-# Remove the header from the dataset
-tail -n +2 creditcard.csv > creditcard_no_header.csv
-# Remove the original dataset
-rm creditcard.csv
-
-# Shuffle the dataset
-shuf creditcard_no_header.csv > creditcard_shuf_no_header.csv
-# Remove the unshuffled dataset
-rm creditcard_no_header.csv
-
-# Add the header back to the shuffled dataset
-cat creditcard_header.csv creditcard_shuf_no_header.csv > creditcard_shuf.csv
-
-# Split the dataset into training and testing
-tail -n +10001 creditcard_shuf_no_header.csv > creditcard_train_no_header.csv
-head -n 10000 creditcard_shuf_no_header.csv > creditcard_test_no_header.csv
-
-# Add the header back to the training and testing datasets
-cat creditcard_header.csv creditcard_train_no_header.csv > creditcard_train.csv
-cat creditcard_header.csv creditcard_test_no_header.csv > creditcard_test.csv
-
-# Remove the intermediate files
-rm creditcard_header.csv creditcard_shuf_no_header.csv creditcard_train_no_header.csv creditcard_test_no_header.csv
-
-# Create a directory for the data
-mkdir -p data
-# Move the datasets to the data directory
-mv creditcard_shuf.csv creditcard_train.csv creditcard_test.csv data/
--- a/dataset-stats.sh
+++ b/dataset-stats.sh
@ -1,12 +0,0 @@
-#!/bin/bash
-
-# Count the number of lines in the original dataset
-wc -l < data/creditcard_shuf.csv > stats.txt
-# Count the number of lines in the training and testing datasets
-wc -l < data/creditcard_train.csv > stats_train.txt
-wc -l < data/creditcard_test.csv > stats_test.txt
-
-# Create a directory for the statistics
-mkdir -p stats_data
-# Move the statistics to the stats directory
-mv stats.txt stats_train.txt stats_test.txt stats_data/
--- a/experiments/708/config.json
+++ b/experiments/708/config.json
@ -0,0 +1,5 @@
+{
+  "epochs": 5,
+  "learning_rate": 0.001,
+  "seed": 7929899
+}
--- a/experiments/708/cout.txt
+++ b/experiments/708/cout.txt
--- a/experiments/708/info.json
+++ b/experiments/708/info.json
@ -0,0 +1,8 @@
+{
+  "metrics": [
+    {
+      "id": "665b3cd5c1ae3ab5cc15d3d9",
+      "name": "accuracy"
+    }
+  ]
+}
--- a/experiments/708/metrics.json
+++ b/experiments/708/metrics.json
@ -0,0 +1,13 @@
+{
+  "accuracy": {
+    "steps": [
+      0
+    ],
+    "timestamps": [
+      "2024-06-01T15:23:02.056704"
+    ],
+    "values": [
+      0.8217821782178217
+    ]
+  }
+}
--- a/experiments/708/model.keras
+++ b/experiments/708/model.keras
--- a/experiments/708/run.json
+++ b/experiments/708/run.json
@ -0,0 +1,102 @@
+{
+  "artifacts": [
+    "model.keras"
+  ],
+  "command": "main",
+  "experiment": {
+    "base_dir": "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\sacred",
+    "dependencies": [
+      "keras==3.1.1",
+      "numpy==1.26.3",
+      "sacred==0.8.5",
+      "scikit-learn==1.4.1.post1"
+    ],
+    "mainfile": "sacred_train_evaluation.py",
+    "name": "464913",
+    "repositories": [
+      {
+        "commit": "cf648b6c128aae353730cdad0c6972df3438c4cd",
+        "dirty": true,
+        "url": "https://git.wmi.amu.edu.pl/s464913/ium_464913.git"
+      }
+    ],
+    "sources": [
+      [
+        "sacred_train_evaluation.py",
+        "_sources\\sacred_train_evaluation_69085ae4bcdbd49594dbaeed1ddb2e93.py"
+      ]
+    ]
+  },
+  "heartbeat": "2024-06-01T15:23:02.067455",
+  "host": {
+    "ENV": {},
+    "cpu": "AMD Ryzen 5 5500U with Radeon Graphics",
+    "hostname": "Dell",
+    "os": [
+      "Windows",
+      "Windows-11-10.0.22631-SP0"
+    ],
+    "python_version": "3.12.3"
+  },
+  "meta": {
+    "command": "main",
+    "config_updates": {},
+    "named_configs": [],
+    "options": {
+      "--beat-interval": null,
+      "--capture": null,
+      "--comment": null,
+      "--debug": false,
+      "--enforce_clean": false,
+      "--file_storage": null,
+      "--force": false,
+      "--help": false,
+      "--id": null,
+      "--loglevel": null,
+      "--mongo_db": null,
+      "--name": null,
+      "--pdb": false,
+      "--print-config": false,
+      "--priority": null,
+      "--queue": false,
+      "--s3": null,
+      "--sql": null,
+      "--tiny_db": null,
+      "--unobserved": false,
+      "COMMAND": null,
+      "UPDATE": [],
+      "help": false,
+      "with": false
+    }
+  },
+  "resources": [
+    [
+      "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_train.csv",
+      "experiments\\_resources\\X_train_7505524c54858300bbd92094092a6c39.csv"
+    ],
+    [
+      "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_val.csv",
+      "experiments\\_resources\\X_val_4d078882cc1898640ddaf4ad9117f543.csv"
+    ],
+    [
+      "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_train.csv",
+      "experiments\\_resources\\y_train_8112a5cf4faac882c421bcb7e3d42044.csv"
+    ],
+    [
+      "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_val.csv",
+      "experiments\\_resources\\y_val_1155f648650986d8866eba603b86560c.csv"
+    ],
+    [
+      "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_test.csv",
+      "experiments\\_resources\\X_test_46ff52696af9a4c06f6b25639525dda6.csv"
+    ],
+    [
+      "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_test.csv",
+      "experiments\\_resources\\y_test_a6bc4827feae19934c4021d1f10f5963.csv"
+    ]
+  ],
+  "result": null,
+  "start_time": "2024-06-01T15:20:05.925811",
+  "status": "COMPLETED",
+  "stop_time": "2024-06-01T15:23:02.065167"
+}
--- a/experiments/_resources/X_test_46ff52696af9a4c06f6b25639525dda6.csv
+++ b/experiments/_resources/X_test_46ff52696af9a4c06f6b25639525dda6.csv
--- a/experiments/_resources/X_train_7505524c54858300bbd92094092a6c39.csv
+++ b/experiments/_resources/X_train_7505524c54858300bbd92094092a6c39.csv
--- a/experiments/_resources/X_val_4d078882cc1898640ddaf4ad9117f543.csv
+++ b/experiments/_resources/X_val_4d078882cc1898640ddaf4ad9117f543.csv
--- a/experiments/_resources/y_test_a6bc4827feae19934c4021d1f10f5963.csv
+++ b/experiments/_resources/y_test_a6bc4827feae19934c4021d1f10f5963.csv
--- a/experiments/_resources/y_train_8112a5cf4faac882c421bcb7e3d42044.csv
+++ b/experiments/_resources/y_train_8112a5cf4faac882c421bcb7e3d42044.csv
--- a/experiments/_resources/y_val_1155f648650986d8866eba603b86560c.csv
+++ b/experiments/_resources/y_val_1155f648650986d8866eba603b86560c.csv
--- a/experiments/_sources/sacred_train_evaluation_69085ae4bcdbd49594dbaeed1ddb2e93.py
+++ b/experiments/_sources/sacred_train_evaluation_69085ae4bcdbd49594dbaeed1ddb2e93.py
@ -0,0 +1,100 @@
+import os
+
+os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
+
+from keras.models import Sequential
+from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
+from keras.optimizers import Adam
+import pandas as pd
+from sklearn.metrics import confusion_matrix
+from sacred import Experiment
+from sacred.observers import FileStorageObserver, MongoObserver
+
+ex = Experiment("464913")
+
+ex.observers.append(
+    MongoObserver.create(
+        url="mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017",
+        db_name="sacred",
+    )
+)
+ex.observers.append(FileStorageObserver("experiments"))
+
+
+@ex.config
+def my_config():
+    learning_rate = 0.001
+    epochs = 5
+
+
+@ex.capture
+def train_and_evaluate(_run, learning_rate, epochs):
+
+    X_train = _run.open_resource("data/X_train.csv")
+    X_val = _run.open_resource("data/X_val.csv")
+    y_train = _run.open_resource("data/y_train.csv")
+    y_val = _run.open_resource("data/y_val.csv")
+
+    X_train = pd.read_csv(X_train)
+    X_val = pd.read_csv(X_val)
+    y_train = pd.read_csv(y_train)
+    y_val = pd.read_csv(y_val)
+
+    X_train = X_train.to_numpy()
+    X_val = X_val.to_numpy()
+    y_train = y_train.to_numpy()
+    y_val = y_val.to_numpy()
+
+    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
+    X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
+
+    model = Sequential(
+        [
+            Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
+            BatchNormalization(),
+            Dropout(0.2),
+            Conv1D(64, 2, activation="relu"),
+            BatchNormalization(),
+            Dropout(0.5),
+            Flatten(),
+            Dense(64, activation="relu"),
+            Dropout(0.5),
+            Dense(1, activation="sigmoid"),
+        ]
+    )
+
+    model.compile(
+        optimizer=Adam(learning_rate=learning_rate),
+        loss="binary_crossentropy",
+        metrics=["accuracy"],
+    )
+
+    model.fit(
+        X_train,
+        y_train,
+        validation_data=(X_val, y_val),
+        epochs=epochs,
+        verbose=1,
+    )
+
+    model.save("sacred/model.keras")
+    _run.add_artifact("sacred/model.keras")
+
+    X_test = _run.open_resource("data/X_test.csv")
+    y_test = _run.open_resource("data/y_test.csv")
+
+    X_test = pd.read_csv(X_test)
+    y_test = pd.read_csv(y_test)
+
+    y_pred = model.predict(X_test)
+    y_pred = y_pred >= 0.5
+
+    cm = confusion_matrix(y_test, y_pred)
+    accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
+
+    _run.log_scalar("accuracy", accuracy)
+
+
+@ex.automain
+def main(learning_rate, epochs):
+    train_and_evaluate()
--- a/sacred/model.keras
+++ b/sacred/model.keras
--- a/sacred/sacred_train_evaluation.py
+++ b/sacred/sacred_train_evaluation.py
@ -0,0 +1,100 @@
+import os
+
+os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
+
+from keras.models import Sequential
+from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
+from keras.optimizers import Adam
+import pandas as pd
+from sklearn.metrics import confusion_matrix
+from sacred import Experiment
+from sacred.observers import FileStorageObserver, MongoObserver
+
+ex = Experiment("464913")
+
+ex.observers.append(
+    MongoObserver.create(
+        url="mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017",
+        db_name="sacred",
+    )
+)
+ex.observers.append(FileStorageObserver("experiments"))
+
+
+@ex.config
+def my_config():
+    learning_rate = 0.001
+    epochs = 5
+
+
+@ex.capture
+def train_and_evaluate(_run, learning_rate, epochs):
+
+    X_train = _run.open_resource("data/X_train.csv")
+    X_val = _run.open_resource("data/X_val.csv")
+    y_train = _run.open_resource("data/y_train.csv")
+    y_val = _run.open_resource("data/y_val.csv")
+
+    X_train = pd.read_csv(X_train)
+    X_val = pd.read_csv(X_val)
+    y_train = pd.read_csv(y_train)
+    y_val = pd.read_csv(y_val)
+
+    X_train = X_train.to_numpy()
+    X_val = X_val.to_numpy()
+    y_train = y_train.to_numpy()
+    y_val = y_val.to_numpy()
+
+    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
+    X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
+
+    model = Sequential(
+        [
+            Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
+            BatchNormalization(),
+            Dropout(0.2),
+            Conv1D(64, 2, activation="relu"),
+            BatchNormalization(),
+            Dropout(0.5),
+            Flatten(),
+            Dense(64, activation="relu"),
+            Dropout(0.5),
+            Dense(1, activation="sigmoid"),
+        ]
+    )
+
+    model.compile(
+        optimizer=Adam(learning_rate=learning_rate),
+        loss="binary_crossentropy",
+        metrics=["accuracy"],
+    )
+
+    model.fit(
+        X_train,
+        y_train,
+        validation_data=(X_val, y_val),
+        epochs=epochs,
+        verbose=1,
+    )
+
+    model.save("sacred/model.keras")
+    _run.add_artifact("sacred/model.keras")
+
+    X_test = _run.open_resource("data/X_test.csv")
+    y_test = _run.open_resource("data/y_test.csv")
+
+    X_test = pd.read_csv(X_test)
+    y_test = pd.read_csv(y_test)
+
+    y_pred = model.predict(X_test)
+    y_pred = y_pred >= 0.5
+
+    cm = confusion_matrix(y_test, y_pred)
+    accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
+
+    _run.log_scalar("accuracy", accuracy)
+
+
+@ex.automain
+def main(learning_rate, epochs):
+    train_and_evaluate()