From ea02f9dc7b9d25749956485972b3a3bbc600d485 Mon Sep 17 00:00:00 2001 From: Mateusz Date: Sun, 14 Apr 2024 13:36:50 +0200 Subject: [PATCH] IUM_05 --- .gitignore | 4 +++- Dockerfile | 2 +- predict.py | 26 +++++++++++++++++++++++++ train_model.py | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 predict.py create mode 100644 train_model.py diff --git a/.gitignore b/.gitignore index eb7b522..4e8128b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ creditcardfraud.zip -creditcard.csv \ No newline at end of file +creditcard.csv +data +model/model.keras \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 8def072..ef6e85d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,4 +2,4 @@ FROM ubuntu:latest RUN apt update && apt install -y python3-pip -RUN pip install pandas numpy scikit-learn \ No newline at end of file +RUN pip install pandas numpy scikit-learn tensorflow \ No newline at end of file diff --git a/predict.py b/predict.py new file mode 100644 index 0000000..461db7c --- /dev/null +++ b/predict.py @@ -0,0 +1,26 @@ +import os + +os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0" + +from keras.models import load_model +import pandas as pd +from sklearn.metrics import confusion_matrix + + +def main(): + model = load_model("model/model.keras") + X_test = pd.read_csv("data/X_test.csv") + y_test = pd.read_csv("data/y_test.csv") + + y_pred = model.predict(X_test) + y_pred = y_pred > 0.5 + + cm = confusion_matrix(y_test, y_pred) + print( + "Recall metric in the testing dataset: ", + cm[1, 1] / (cm[1, 0] + cm[1, 1]), + ) + + +if __name__ == "__main__": + main() diff --git a/train_model.py b/train_model.py new file mode 100644 index 0000000..c77ecb5 --- /dev/null +++ b/train_model.py @@ -0,0 +1,53 @@ +import os + +os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0" + +from keras.models import Sequential +from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D +from keras.optimizers import Adam +import pandas as pd + + +def main(): + X_train = pd.read_csv("data/X_train.csv") + y_train = pd.read_csv("data/y_train.csv") + + X_train = X_train.to_numpy() + y_train = y_train.to_numpy() + + X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1) + + model = Sequential( + [ + Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape), + BatchNormalization(), + Dropout(0.2), + Conv1D(64, 2, activation="relu"), + BatchNormalization(), + Dropout(0.5), + Flatten(), + Dense(64, activation="relu"), + Dropout(0.5), + Dense(1, activation="sigmoid"), + ] + ) + + model.compile( + optimizer=Adam(learning_rate=1e-3), + loss="binary_crossentropy", + metrics=["accuracy"], + ) + + model.fit( + X_train, + y_train, + epochs=5, + verbose=1, + ) + + os.makedirs("model", exist_ok=True) + model.save("model/model.keras") + + +if __name__ == "__main__": + main()