From ea02f9dc7b9d25749956485972b3a3bbc600d485 Mon Sep 17 00:00:00 2001
From: Mateusz <matpiw1@st.amu.edu.pl>
Date: Sun, 14 Apr 2024 13:36:50 +0200
Subject: [PATCH] IUM_05

---
 .gitignore     |  4 +++-
 Dockerfile     |  2 +-
 predict.py     | 26 +++++++++++++++++++++++++
 train_model.py | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 83 insertions(+), 2 deletions(-)
 create mode 100644 predict.py
 create mode 100644 train_model.py

diff --git a/.gitignore b/.gitignore
index eb7b522..4e8128b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,4 @@
 creditcardfraud.zip
-creditcard.csv
\ No newline at end of file
+creditcard.csv
+data
+model/model.keras
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index 8def072..ef6e85d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,4 +2,4 @@ FROM ubuntu:latest
 
 RUN apt update && apt install -y python3-pip
 
-RUN pip install pandas numpy scikit-learn
\ No newline at end of file
+RUN pip install pandas numpy scikit-learn tensorflow
\ No newline at end of file
diff --git a/predict.py b/predict.py
new file mode 100644
index 0000000..461db7c
--- /dev/null
+++ b/predict.py
@@ -0,0 +1,26 @@
+import os
+
+os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
+
+from keras.models import load_model
+import pandas as pd
+from sklearn.metrics import confusion_matrix
+
+
+def main():
+    model = load_model("model/model.keras")
+    X_test = pd.read_csv("data/X_test.csv")
+    y_test = pd.read_csv("data/y_test.csv")
+
+    y_pred = model.predict(X_test)
+    y_pred = y_pred > 0.5
+
+    cm = confusion_matrix(y_test, y_pred)
+    print(
+        "Recall metric in the testing dataset: ",
+        cm[1, 1] / (cm[1, 0] + cm[1, 1]),
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/train_model.py b/train_model.py
new file mode 100644
index 0000000..c77ecb5
--- /dev/null
+++ b/train_model.py
@@ -0,0 +1,53 @@
+import os
+
+os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
+
+from keras.models import Sequential
+from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
+from keras.optimizers import Adam
+import pandas as pd
+
+
+def main():
+    X_train = pd.read_csv("data/X_train.csv")
+    y_train = pd.read_csv("data/y_train.csv")
+
+    X_train = X_train.to_numpy()
+    y_train = y_train.to_numpy()
+
+    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
+
+    model = Sequential(
+        [
+            Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
+            BatchNormalization(),
+            Dropout(0.2),
+            Conv1D(64, 2, activation="relu"),
+            BatchNormalization(),
+            Dropout(0.5),
+            Flatten(),
+            Dense(64, activation="relu"),
+            Dropout(0.5),
+            Dense(1, activation="sigmoid"),
+        ]
+    )
+
+    model.compile(
+        optimizer=Adam(learning_rate=1e-3),
+        loss="binary_crossentropy",
+        metrics=["accuracy"],
+    )
+
+    model.fit(
+        X_train,
+        y_train,
+        epochs=5,
+        verbose=1,
+    )
+
+    os.makedirs("model", exist_ok=True)
+    model.save("model/model.keras")
+
+
+if __name__ == "__main__":
+    main()