Compare commits

..

9 Commits

Author SHA1 Message Date
96e8535023 Update Dockerfile 2024-06-01 17:54:53 +02:00
df42bfcee0 IUM_06 2024-05-04 16:39:48 +02:00
3f95fa102c IUM_06 2024-05-04 16:23:32 +02:00
0920a59d1f IUM_06 2024-05-04 16:19:51 +02:00
b1a03b41b0 IUM_06 2024-05-04 15:59:35 +02:00
9d6ffe8205 IUM_06 2024-05-04 15:54:55 +02:00
a8cf8d2829 IUM_06 2024-05-04 15:42:16 +02:00
dace057c96 IUM_06 2024-05-04 15:30:49 +02:00
ee4c1adab2 IUM_06 2024-05-04 15:25:54 +02:00
64 changed files with 142 additions and 570329 deletions

3
.dvc/.gitignore vendored
View File

@ -1,3 +0,0 @@
/config.local
/tmp
/cache

View File

@ -1,4 +0,0 @@
[core]
remote = ium_ssh_remote
['remote "ium_ssh_remote"']
url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl

View File

@ -1,3 +0,0 @@
# Add patterns of files dvc should ignore, which could improve
# the performance. Learn more at
# https://dvc.org/doc/user-guide/dvcignore

5
.gitignore vendored
View File

@ -1,5 +1,6 @@
creditcardfraud.zip
creditcard.csv
data data
model/model.keras model/model.keras
stats_data stats_data
/creditcard.csv evaluation
/creditcardfraud.zip

View File

@ -1,5 +1,5 @@
FROM ubuntu:latest FROM ubuntu:latest
RUN apt update && apt install -y python3-pip git RUN apt update && apt install -y python3-pip
RUN pip install pandas numpy scikit-learn tensorflow sacred pymongo --break-system-packages RUN pip install pandas numpy scikit-learn tensorflow matplotlib --break-system-packages

Binary file not shown.

79
Jenkinsfile vendored
View File

@ -1,73 +1,70 @@
pipeline { pipeline {
agent any agent {
dockerfile true
}
triggers {
upstream(upstreamProjects: 's464913-training/training', threshold: hudson.model.Result.SUCCESS)
}
parameters { parameters {
string ( buildSelector(
defaultValue: 'vskyper', defaultSelector: lastSuccessful(),
description: 'Kaggle username', description: 'Which build to use for copying artifacts',
name: 'KAGGLE_USERNAME', name: 'BUILD_SELECTOR'
trim: false
)
password (
defaultValue: '',
description: 'Kaggle API key',
name: 'KAGGLE_KEY',
) )
gitParameter branchFilter: 'origin/(.*)', defaultValue: 'training', name: 'BRANCH', type: 'PT_BRANCH'
} }
stages { stages {
stage('Clone Repository') { stage('Clone Repository') {
steps { steps {
git branch: 'main', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git' git branch: 'evaluation', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git'
} }
} }
stage('Download dataset') { stage('Copy Artifacts from dataset job') {
steps { steps {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) { copyArtifacts filter: 'data/*', projectName: 'z-s464913-create-dataset', selector: buildParameter('BUILD_SELECTOR')
sh 'pip install kaggle'
sh 'kaggle datasets download -d mlg-ulb/creditcardfraud'
sh 'unzip -o creditcardfraud.zip'
sh 'rm creditcardfraud.zip'
}
} }
} }
stage('Run create-dataset script') { stage('Copy Artifacts from training job') {
agent {
dockerfile {
reuseNode true
}
}
steps { steps {
sh 'chmod +x create-dataset.py' copyArtifacts filter: 'model/*', projectName: 's464913-training/' + params.BRANCH, selector: buildParameter('BUILD_SELECTOR')
sh 'python3 ./create-dataset.py'
} }
} }
stage('Archive Artifacts from create-dataset') { stage('Copy Artifacts from evaluation job') {
steps { steps {
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true copyArtifacts filter: 'evaluation/*', projectName: 's464913-evaluation/evaluation', selector: buildParameter('BUILD_SELECTOR'), optional: true
} }
} }
stage('Experiments') { stage('Run predict script') {
agent {
dockerfile {
reuseNode true
}
}
steps { steps {
sh 'chmod +x sacred/sacred_train_evaluation.py' sh 'chmod +x predict.py'
sh 'python3 sacred/sacred_train_evaluation.py' sh 'python3 ./predict.py'
} }
} }
stage('Archive Artifacts from Experiments') { stage('Run metrics script') {
steps { steps {
archiveArtifacts artifacts: 'experiments/**/*.*', onlyIfSuccessful: true sh 'chmod +x metrics.py'
sh "python3 ./metrics.py ${currentBuild.number}"
}
}
stage('Run plot script') {
steps {
sh 'chmod +x plot.py'
sh 'python3 ./plot.py'
}
}
stage('Archive Artifacts') {
steps {
archiveArtifacts artifacts: 'evaluation/*', onlyIfSuccessful: true
} }
} }
} }

42
create-dataset.sh Normal file
View File

@ -0,0 +1,42 @@
#!/bin/bash
# Install the Kaggle API
pip install kaggle
# Download the dataset from Kaggle
kaggle datasets download -d mlg-ulb/creditcardfraud
# Unzip the dataset
unzip -o creditcardfraud.zip
# Remove the zip file
rm creditcardfraud.zip
# Create a header file
head -n 1 creditcard.csv > creditcard_header.csv
# Remove the header from the dataset
tail -n +2 creditcard.csv > creditcard_no_header.csv
# Remove the original dataset
rm creditcard.csv
# Shuffle the dataset
shuf creditcard_no_header.csv > creditcard_shuf_no_header.csv
# Remove the unshuffled dataset
rm creditcard_no_header.csv
# Add the header back to the shuffled dataset
cat creditcard_header.csv creditcard_shuf_no_header.csv > creditcard_shuf.csv
# Split the dataset into training and testing
tail -n +10001 creditcard_shuf_no_header.csv > creditcard_train_no_header.csv
head -n 10000 creditcard_shuf_no_header.csv > creditcard_test_no_header.csv
# Add the header back to the training and testing datasets
cat creditcard_header.csv creditcard_train_no_header.csv > creditcard_train.csv
cat creditcard_header.csv creditcard_test_no_header.csv > creditcard_test.csv
# Remove the intermediate files
rm creditcard_header.csv creditcard_shuf_no_header.csv creditcard_train_no_header.csv creditcard_test_no_header.csv
# Create a directory for the data
mkdir -p data
# Move the datasets to the data directory
mv creditcard_shuf.csv creditcard_train.csv creditcard_test.csv data/

View File

@ -1,5 +0,0 @@
outs:
- md5: e90efcb83d69faf99fcab8b0255024de
size: 150828752
hash: md5
path: creditcard.csv

View File

@ -1,5 +0,0 @@
outs:
- md5: bf8e9842731ab6f9b8ab51e1a6741f8b
size: 69155672
hash: md5
path: creditcardfraud.zip

12
dataset-stats.sh Normal file
View File

@ -0,0 +1,12 @@
#!/bin/bash
# Count the number of lines in the original dataset
wc -l < data/creditcard_shuf.csv > stats.txt
# Count the number of lines in the training and testing datasets
wc -l < data/creditcard_train.csv > stats_train.txt
wc -l < data/creditcard_test.csv > stats_test.txt
# Create a directory for the statistics
mkdir -p stats_data
# Move the statistics to the stats directory
mv stats.txt stats_train.txt stats_test.txt stats_data/

View File

@ -1,94 +0,0 @@
schema: '2.0'
stages:
prepare_data:
cmd: python ./create-dataset.py
deps:
- path: create-dataset.py
hash: md5
md5: 0903460139f5b57b9759f4de37b2d5e4
size: 1531
- path: creditcard.csv
hash: md5
md5: e90efcb83d69faf99fcab8b0255024de
size: 150828752
outs:
- path: data/X_test.csv
hash: md5
md5: 46ff52696af9a4c06f6b25639525dda6
size: 30947960
- path: data/X_train.csv
hash: md5
md5: 7505524c54858300bbd92094092a6c39
size: 92838653
- path: data/X_val.csv
hash: md5
md5: 4d078882cc1898640ddaf4ad9117f543
size: 30946540
- path: data/creditcard.csv
hash: md5
md5: 4b81435690147d1e624a8b06c5520629
size: 155302541
- path: data/y_test.csv
hash: md5
md5: a6bc4827feae19934c4021d1f10f5963
size: 170893
- path: data/y_train.csv
hash: md5
md5: 8112a5cf4faac882c421bcb7e3d42044
size: 512656
- path: data/y_val.csv
hash: md5
md5: 1155f648650986d8866eba603b86560c
size: 170893
train_model:
cmd: python ./train_model.py
deps:
- path: data/X_train.csv
hash: md5
md5: 7505524c54858300bbd92094092a6c39
size: 92838653
- path: data/X_val.csv
hash: md5
md5: 4d078882cc1898640ddaf4ad9117f543
size: 30946540
- path: data/y_train.csv
hash: md5
md5: 8112a5cf4faac882c421bcb7e3d42044
size: 512656
- path: data/y_val.csv
hash: md5
md5: 1155f648650986d8866eba603b86560c
size: 170893
- path: train_model.py
hash: md5
md5: 00b8bac043f4d7a56dec95f2f1bb1b49
size: 1540
outs:
- path: model/model.keras
hash: md5
md5: 1d1df55ad26a8c0689efa4a86a86c217
size: 1476738
evaluate_model:
cmd: python ./predict.py
deps:
- path: data/X_test.csv
hash: md5
md5: 46ff52696af9a4c06f6b25639525dda6
size: 30947960
- path: data/y_test.csv
hash: md5
md5: a6bc4827feae19934c4021d1f10f5963
size: 170893
- path: model/model.keras
hash: md5
md5: 1d1df55ad26a8c0689efa4a86a86c217
size: 1476738
- path: predict.py
hash: md5
md5: a61388aabf381779b38e2f32a4d0df7b
size: 660
outs:
- path: data/y_pred.csv
hash: md5
md5: be150c2fbf1914102b479edbe0a4cf43
size: 1481012

View File

@ -1,35 +0,0 @@
stages:
prepare_data:
cmd: python ./create-dataset.py
deps:
- create-dataset.py
- creditcard.csv
outs:
- data/creditcard.csv
- data/X_train.csv
- data/X_val.csv
- data/X_test.csv
- data/y_train.csv
- data/y_val.csv
- data/y_test.csv
train_model:
cmd: python ./train_model.py
deps:
- train_model.py
- data/X_train.csv
- data/X_val.csv
- data/y_train.csv
- data/y_val.csv
outs:
- model/model.keras
evaluate_model:
cmd: python ./predict.py
deps:
- predict.py
- model/model.keras
- data/X_test.csv
- data/y_test.csv
outs:
- data/y_pred.csv

Binary file not shown.

View File

@ -1,5 +0,0 @@
{
"epochs": 5,
"learning_rate": 0.001,
"seed": 7929899
}

File diff suppressed because one or more lines are too long

View File

@ -1,8 +0,0 @@
{
"metrics": [
{
"id": "665b3cd5c1ae3ab5cc15d3d9",
"name": "accuracy"
}
]
}

View File

@ -1,13 +0,0 @@
{
"accuracy": {
"steps": [
0
],
"timestamps": [
"2024-06-01T15:23:02.056704"
],
"values": [
0.8217821782178217
]
}
}

Binary file not shown.

View File

@ -1,102 +0,0 @@
{
"artifacts": [
"model.keras"
],
"command": "main",
"experiment": {
"base_dir": "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\sacred",
"dependencies": [
"keras==3.1.1",
"numpy==1.26.3",
"sacred==0.8.5",
"scikit-learn==1.4.1.post1"
],
"mainfile": "sacred_train_evaluation.py",
"name": "464913",
"repositories": [
{
"commit": "cf648b6c128aae353730cdad0c6972df3438c4cd",
"dirty": true,
"url": "https://git.wmi.amu.edu.pl/s464913/ium_464913.git"
}
],
"sources": [
[
"sacred_train_evaluation.py",
"_sources\\sacred_train_evaluation_69085ae4bcdbd49594dbaeed1ddb2e93.py"
]
]
},
"heartbeat": "2024-06-01T15:23:02.067455",
"host": {
"ENV": {},
"cpu": "AMD Ryzen 5 5500U with Radeon Graphics",
"hostname": "Dell",
"os": [
"Windows",
"Windows-11-10.0.22631-SP0"
],
"python_version": "3.12.3"
},
"meta": {
"command": "main",
"config_updates": {},
"named_configs": [],
"options": {
"--beat-interval": null,
"--capture": null,
"--comment": null,
"--debug": false,
"--enforce_clean": false,
"--file_storage": null,
"--force": false,
"--help": false,
"--id": null,
"--loglevel": null,
"--mongo_db": null,
"--name": null,
"--pdb": false,
"--print-config": false,
"--priority": null,
"--queue": false,
"--s3": null,
"--sql": null,
"--tiny_db": null,
"--unobserved": false,
"COMMAND": null,
"UPDATE": [],
"help": false,
"with": false
}
},
"resources": [
[
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_train.csv",
"experiments\\_resources\\X_train_7505524c54858300bbd92094092a6c39.csv"
],
[
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_val.csv",
"experiments\\_resources\\X_val_4d078882cc1898640ddaf4ad9117f543.csv"
],
[
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_train.csv",
"experiments\\_resources\\y_train_8112a5cf4faac882c421bcb7e3d42044.csv"
],
[
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_val.csv",
"experiments\\_resources\\y_val_1155f648650986d8866eba603b86560c.csv"
],
[
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_test.csv",
"experiments\\_resources\\X_test_46ff52696af9a4c06f6b25639525dda6.csv"
],
[
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_test.csv",
"experiments\\_resources\\y_test_a6bc4827feae19934c4021d1f10f5963.csv"
]
],
"result": null,
"start_time": "2024-06-01T15:20:05.925811",
"status": "COMPLETED",
"stop_time": "2024-06-01T15:23:02.065167"
}

View File

@ -1,100 +0,0 @@
import os
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
from keras.models import Sequential
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
from keras.optimizers import Adam
import pandas as pd
from sklearn.metrics import confusion_matrix
from sacred import Experiment
from sacred.observers import FileStorageObserver, MongoObserver
ex = Experiment("464913")
ex.observers.append(
MongoObserver.create(
url="mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017",
db_name="sacred",
)
)
ex.observers.append(FileStorageObserver("experiments"))
@ex.config
def my_config():
learning_rate = 0.001
epochs = 5
@ex.capture
def train_and_evaluate(_run, learning_rate, epochs):
X_train = _run.open_resource("data/X_train.csv")
X_val = _run.open_resource("data/X_val.csv")
y_train = _run.open_resource("data/y_train.csv")
y_val = _run.open_resource("data/y_val.csv")
X_train = pd.read_csv(X_train)
X_val = pd.read_csv(X_val)
y_train = pd.read_csv(y_train)
y_val = pd.read_csv(y_val)
X_train = X_train.to_numpy()
X_val = X_val.to_numpy()
y_train = y_train.to_numpy()
y_val = y_val.to_numpy()
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
model = Sequential(
[
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
BatchNormalization(),
Dropout(0.2),
Conv1D(64, 2, activation="relu"),
BatchNormalization(),
Dropout(0.5),
Flatten(),
Dense(64, activation="relu"),
Dropout(0.5),
Dense(1, activation="sigmoid"),
]
)
model.compile(
optimizer=Adam(learning_rate=learning_rate),
loss="binary_crossentropy",
metrics=["accuracy"],
)
model.fit(
X_train,
y_train,
validation_data=(X_val, y_val),
epochs=epochs,
verbose=1,
)
model.save("sacred/model.keras")
_run.add_artifact("sacred/model.keras")
X_test = _run.open_resource("data/X_test.csv")
y_test = _run.open_resource("data/y_test.csv")
X_test = pd.read_csv(X_test)
y_test = pd.read_csv(y_test)
y_pred = model.predict(X_test)
y_pred = y_pred >= 0.5
cm = confusion_matrix(y_test, y_pred)
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
_run.log_scalar("accuracy", accuracy)
@ex.automain
def main(learning_rate, epochs):
train_and_evaluate()

19
metrics.py Normal file
View File

@ -0,0 +1,19 @@
from sklearn.metrics import confusion_matrix
import pandas as pd
import sys
def main():
y_test = pd.read_csv("data/y_test.csv")
y_pred = pd.read_csv("evaluation/y_pred.csv", header=None)
build_number = sys.argv[1]
cm = confusion_matrix(y_test, y_pred)
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
with open(r"evaluation/metrics.txt", "a") as f:
f.write(f"{accuracy},{build_number}\n")
if __name__ == "__main__":
main()

View File

@ -1,10 +0,0 @@
name: Credit card fraud MLFlow - s464913
conda_env: conda.yaml
entry_points:
main:
parameters:
learning_rate: { type: float, default: 0.001 }
epochs: { type: int, default: 5 }
command: 'python mlflow_train_evaluation.py {learning_rate} {epochs}'

View File

@ -1,11 +0,0 @@
name: Credit card fraud MLFlow - s464913
channels:
- defaults
dependencies:
- python=3.12
- pip
- pip:
- mlflow
- tensorflow
- pandas
- scikit-learn

View File

@ -1,82 +0,0 @@
import os
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
from keras.models import Sequential
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
from keras.optimizers import Adam
import pandas as pd
import sys
import mlflow
from sklearn.metrics import confusion_matrix
mlflow.set_tracking_uri("http://localhost:5000")
def main():
X_train = pd.read_csv("../data/X_train.csv")
X_val = pd.read_csv("../data/X_val.csv")
y_train = pd.read_csv("../data/y_train.csv")
y_val = pd.read_csv("../data/y_val.csv")
X_train = X_train.to_numpy()
X_val = X_val.to_numpy()
y_train = y_train.to_numpy()
y_val = y_val.to_numpy()
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
learning_rate = float(sys.argv[1])
epochs = int(sys.argv[2])
with mlflow.start_run() as run:
print("MLflow run experiment_id: {0}".format(run.info.experiment_id))
print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri))
model = Sequential(
[
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
BatchNormalization(),
Dropout(0.2),
Conv1D(64, 2, activation="relu"),
BatchNormalization(),
Dropout(0.5),
Flatten(),
Dense(64, activation="relu"),
Dropout(0.5),
Dense(1, activation="sigmoid"),
]
)
model.compile(
optimizer=Adam(learning_rate=learning_rate),
loss="binary_crossentropy",
metrics=["accuracy"],
)
model.fit(
X_train,
y_train,
validation_data=(X_val, y_val),
epochs=epochs,
verbose=1,
)
mlflow.log_param("learning_rate", learning_rate)
mlflow.log_param("epochs", epochs)
X_test = pd.read_csv("../data/X_test.csv")
y_test = pd.read_csv("../data/y_test.csv")
y_pred = model.predict(X_test)
y_pred = y_pred >= 0.5
cm = confusion_matrix(y_test, y_pred)
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
mlflow.log_metric("accuracy", accuracy)
if __name__ == "__main__":
main()

View File

@ -1,15 +0,0 @@
artifact_uri: mlflow-artifacts:/0/3c46f6c4b15743faa0119c4b9b804825/artifacts
end_time: 1715508788768
entry_point_name: ''
experiment_id: '0'
lifecycle_stage: active
run_id: 3c46f6c4b15743faa0119c4b9b804825
run_name: dapper-hog-137
run_uuid: 3c46f6c4b15743faa0119c4b9b804825
source_name: ''
source_type: 4
source_version: ''
start_time: 1715508594003
status: 3
tags: []
user_id: skype

View File

@ -1 +0,0 @@
1715508787882 0.8217821782178217 0

View File

@ -1 +0,0 @@
https://git.wmi.amu.edu.pl/s464913/ium_464913.git

View File

@ -1 +0,0 @@
a6be9a729562db8c47bc5fec88ad8f5216af0cf3

View File

@ -1 +0,0 @@
https://git.wmi.amu.edu.pl/s464913/ium_464913.git

View File

@ -1 +0,0 @@
file://C:\Users\skype\source\repos\Inżynieria Uczenia Maszynowego#\mlflow

View File

@ -1,15 +0,0 @@
artifact_uri: mlflow-artifacts:/0/706dcf453a0842aaa48647e15521bb7b/artifacts
end_time: 1715508573447
entry_point_name: ''
experiment_id: '0'
lifecycle_stage: active
run_id: 706dcf453a0842aaa48647e15521bb7b
run_name: loud-whale-40
run_uuid: 706dcf453a0842aaa48647e15521bb7b
source_name: ''
source_type: 4
source_version: ''
start_time: 1715508159092
status: 3
tags: []
user_id: skype

View File

@ -1 +0,0 @@
1715508572612 0.7524752475247525 0

View File

@ -1 +0,0 @@
https://git.wmi.amu.edu.pl/s464913/ium_464913.git

View File

@ -1 +0,0 @@
a6be9a729562db8c47bc5fec88ad8f5216af0cf3

View File

@ -1 +0,0 @@
https://git.wmi.amu.edu.pl/s464913/ium_464913.git

View File

@ -1 +0,0 @@
file://C:\Users\skype\source\repos\Inżynieria Uczenia Maszynowego#\mlflow

View File

@ -1,6 +0,0 @@
artifact_location: mlflow-artifacts:/0
creation_time: 1715508147231
experiment_id: '0'
last_update_time: 1715508147231
lifecycle_stage: active
name: Default

24
plot.py Normal file
View File

@ -0,0 +1,24 @@
import matplotlib.pyplot as plt
def main():
accuracy = []
build_numbers = []
with open("evaluation/metrics.txt") as f:
for line in f:
accuracy.append(float(line.split(",")[0]))
build_numbers.append(int(line.split(",")[1]))
plt.plot(build_numbers, accuracy)
plt.xlabel("Build Number")
plt.ylabel("Accuracy")
plt.title("Accuracy of the model over time")
plt.xticks(range(min(build_numbers), max(build_numbers) + 1))
plt.show()
plt.savefig("evaluation/accuracy.png")
if __name__ == "__main__":
main()

View File

@ -4,24 +4,18 @@ os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
from keras.models import load_model from keras.models import load_model
import pandas as pd import pandas as pd
from sklearn.metrics import confusion_matrix
import numpy as np import numpy as np
def main(): def main():
model = load_model("model/model.keras") model = load_model("model/model.keras")
X_test = pd.read_csv("data/X_test.csv") X_test = pd.read_csv("data/X_test.csv")
y_test = pd.read_csv("data/y_test.csv")
y_pred = model.predict(X_test) y_pred = model.predict(X_test)
y_pred = y_pred >= 0.5 y_pred = y_pred >= 0.5
np.savetxt("data/y_pred.csv", y_pred, delimiter=",")
cm = confusion_matrix(y_test, y_pred) os.makedirs("evaluation", exist_ok=True)
print( np.savetxt("evaluation/y_pred.csv", y_pred, delimiter=",")
"Recall metric in the testing dataset: ",
cm[1, 1] / (cm[1, 0] + cm[1, 1]),
)
if __name__ == "__main__": if __name__ == "__main__":

Binary file not shown.

View File

@ -1,100 +0,0 @@
import os
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
from keras.models import Sequential
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
from keras.optimizers import Adam
import pandas as pd
from sklearn.metrics import confusion_matrix
from sacred import Experiment
from sacred.observers import FileStorageObserver, MongoObserver
ex = Experiment("464913")
ex.observers.append(
MongoObserver.create(
url="mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017",
db_name="sacred",
)
)
ex.observers.append(FileStorageObserver("experiments"))
@ex.config
def my_config():
learning_rate = 0.001
epochs = 5
@ex.capture
def train_and_evaluate(_run, learning_rate, epochs):
X_train = _run.open_resource("data/X_train.csv")
X_val = _run.open_resource("data/X_val.csv")
y_train = _run.open_resource("data/y_train.csv")
y_val = _run.open_resource("data/y_val.csv")
X_train = pd.read_csv(X_train)
X_val = pd.read_csv(X_val)
y_train = pd.read_csv(y_train)
y_val = pd.read_csv(y_val)
X_train = X_train.to_numpy()
X_val = X_val.to_numpy()
y_train = y_train.to_numpy()
y_val = y_val.to_numpy()
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
model = Sequential(
[
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
BatchNormalization(),
Dropout(0.2),
Conv1D(64, 2, activation="relu"),
BatchNormalization(),
Dropout(0.5),
Flatten(),
Dense(64, activation="relu"),
Dropout(0.5),
Dense(1, activation="sigmoid"),
]
)
model.compile(
optimizer=Adam(learning_rate=learning_rate),
loss="binary_crossentropy",
metrics=["accuracy"],
)
model.fit(
X_train,
y_train,
validation_data=(X_val, y_val),
epochs=epochs,
verbose=1,
)
model.save("sacred/model.keras")
_run.add_artifact("sacred/model.keras")
X_test = _run.open_resource("data/X_test.csv")
y_test = _run.open_resource("data/y_test.csv")
X_test = pd.read_csv(X_test)
y_test = pd.read_csv(y_test)
y_pred = model.predict(X_test)
y_pred = y_pred >= 0.5
cm = confusion_matrix(y_test, y_pred)
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
_run.log_scalar("accuracy", accuracy)
@ex.automain
def main(learning_rate, epochs):
train_and_evaluate()