Compare commits
9 Commits
main
...
evaluation
Author | SHA1 | Date | |
---|---|---|---|
96e8535023 | |||
df42bfcee0 | |||
3f95fa102c | |||
0920a59d1f | |||
b1a03b41b0 | |||
9d6ffe8205 | |||
a8cf8d2829 | |||
dace057c96 | |||
ee4c1adab2 |
3
.dvc/.gitignore
vendored
3
.dvc/.gitignore
vendored
@ -1,3 +0,0 @@
|
||||
/config.local
|
||||
/tmp
|
||||
/cache
|
@ -1,4 +0,0 @@
|
||||
[core]
|
||||
remote = ium_ssh_remote
|
||||
['remote "ium_ssh_remote"']
|
||||
url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
|
@ -1,3 +0,0 @@
|
||||
# Add patterns of files dvc should ignore, which could improve
|
||||
# the performance. Learn more at
|
||||
# https://dvc.org/doc/user-guide/dvcignore
|
5
.gitignore
vendored
5
.gitignore
vendored
@ -1,5 +1,6 @@
|
||||
creditcardfraud.zip
|
||||
creditcard.csv
|
||||
data
|
||||
model/model.keras
|
||||
stats_data
|
||||
/creditcard.csv
|
||||
/creditcardfraud.zip
|
||||
evaluation
|
@ -1,5 +1,5 @@
|
||||
FROM ubuntu:latest
|
||||
|
||||
RUN apt update && apt install -y python3-pip git
|
||||
RUN apt update && apt install -y python3-pip
|
||||
|
||||
RUN pip install pandas numpy scikit-learn tensorflow sacred pymongo --break-system-packages
|
||||
RUN pip install pandas numpy scikit-learn tensorflow matplotlib --break-system-packages
|
BIN
IUM_12.pptx
BIN
IUM_12.pptx
Binary file not shown.
79
Jenkinsfile
vendored
79
Jenkinsfile
vendored
@ -1,73 +1,70 @@
|
||||
pipeline {
|
||||
agent any
|
||||
agent {
|
||||
dockerfile true
|
||||
}
|
||||
|
||||
triggers {
|
||||
upstream(upstreamProjects: 's464913-training/training', threshold: hudson.model.Result.SUCCESS)
|
||||
}
|
||||
|
||||
parameters {
|
||||
string (
|
||||
defaultValue: 'vskyper',
|
||||
description: 'Kaggle username',
|
||||
name: 'KAGGLE_USERNAME',
|
||||
trim: false
|
||||
)
|
||||
password (
|
||||
defaultValue: '',
|
||||
description: 'Kaggle API key',
|
||||
name: 'KAGGLE_KEY',
|
||||
buildSelector(
|
||||
defaultSelector: lastSuccessful(),
|
||||
description: 'Which build to use for copying artifacts',
|
||||
name: 'BUILD_SELECTOR'
|
||||
)
|
||||
gitParameter branchFilter: 'origin/(.*)', defaultValue: 'training', name: 'BRANCH', type: 'PT_BRANCH'
|
||||
}
|
||||
|
||||
stages {
|
||||
stage('Clone Repository') {
|
||||
steps {
|
||||
git branch: 'main', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git'
|
||||
git branch: 'evaluation', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git'
|
||||
}
|
||||
}
|
||||
|
||||
stage('Download dataset') {
|
||||
stage('Copy Artifacts from dataset job') {
|
||||
steps {
|
||||
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
|
||||
sh 'pip install kaggle'
|
||||
sh 'kaggle datasets download -d mlg-ulb/creditcardfraud'
|
||||
sh 'unzip -o creditcardfraud.zip'
|
||||
sh 'rm creditcardfraud.zip'
|
||||
}
|
||||
copyArtifacts filter: 'data/*', projectName: 'z-s464913-create-dataset', selector: buildParameter('BUILD_SELECTOR')
|
||||
}
|
||||
}
|
||||
|
||||
stage('Run create-dataset script') {
|
||||
agent {
|
||||
dockerfile {
|
||||
reuseNode true
|
||||
}
|
||||
}
|
||||
|
||||
stage('Copy Artifacts from training job') {
|
||||
steps {
|
||||
sh 'chmod +x create-dataset.py'
|
||||
sh 'python3 ./create-dataset.py'
|
||||
copyArtifacts filter: 'model/*', projectName: 's464913-training/' + params.BRANCH, selector: buildParameter('BUILD_SELECTOR')
|
||||
}
|
||||
}
|
||||
|
||||
stage('Archive Artifacts from create-dataset') {
|
||||
stage('Copy Artifacts from evaluation job') {
|
||||
steps {
|
||||
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
|
||||
copyArtifacts filter: 'evaluation/*', projectName: 's464913-evaluation/evaluation', selector: buildParameter('BUILD_SELECTOR'), optional: true
|
||||
}
|
||||
}
|
||||
|
||||
stage('Experiments') {
|
||||
agent {
|
||||
dockerfile {
|
||||
reuseNode true
|
||||
}
|
||||
}
|
||||
|
||||
stage('Run predict script') {
|
||||
steps {
|
||||
sh 'chmod +x sacred/sacred_train_evaluation.py'
|
||||
sh 'python3 sacred/sacred_train_evaluation.py'
|
||||
sh 'chmod +x predict.py'
|
||||
sh 'python3 ./predict.py'
|
||||
}
|
||||
}
|
||||
|
||||
stage('Archive Artifacts from Experiments') {
|
||||
stage('Run metrics script') {
|
||||
steps {
|
||||
archiveArtifacts artifacts: 'experiments/**/*.*', onlyIfSuccessful: true
|
||||
sh 'chmod +x metrics.py'
|
||||
sh "python3 ./metrics.py ${currentBuild.number}"
|
||||
}
|
||||
}
|
||||
|
||||
stage('Run plot script') {
|
||||
steps {
|
||||
sh 'chmod +x plot.py'
|
||||
sh 'python3 ./plot.py'
|
||||
}
|
||||
}
|
||||
|
||||
stage('Archive Artifacts') {
|
||||
steps {
|
||||
archiveArtifacts artifacts: 'evaluation/*', onlyIfSuccessful: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
42
create-dataset.sh
Normal file
42
create-dataset.sh
Normal file
@ -0,0 +1,42 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Install the Kaggle API
|
||||
pip install kaggle
|
||||
# Download the dataset from Kaggle
|
||||
kaggle datasets download -d mlg-ulb/creditcardfraud
|
||||
|
||||
# Unzip the dataset
|
||||
unzip -o creditcardfraud.zip
|
||||
# Remove the zip file
|
||||
rm creditcardfraud.zip
|
||||
|
||||
# Create a header file
|
||||
head -n 1 creditcard.csv > creditcard_header.csv
|
||||
# Remove the header from the dataset
|
||||
tail -n +2 creditcard.csv > creditcard_no_header.csv
|
||||
# Remove the original dataset
|
||||
rm creditcard.csv
|
||||
|
||||
# Shuffle the dataset
|
||||
shuf creditcard_no_header.csv > creditcard_shuf_no_header.csv
|
||||
# Remove the unshuffled dataset
|
||||
rm creditcard_no_header.csv
|
||||
|
||||
# Add the header back to the shuffled dataset
|
||||
cat creditcard_header.csv creditcard_shuf_no_header.csv > creditcard_shuf.csv
|
||||
|
||||
# Split the dataset into training and testing
|
||||
tail -n +10001 creditcard_shuf_no_header.csv > creditcard_train_no_header.csv
|
||||
head -n 10000 creditcard_shuf_no_header.csv > creditcard_test_no_header.csv
|
||||
|
||||
# Add the header back to the training and testing datasets
|
||||
cat creditcard_header.csv creditcard_train_no_header.csv > creditcard_train.csv
|
||||
cat creditcard_header.csv creditcard_test_no_header.csv > creditcard_test.csv
|
||||
|
||||
# Remove the intermediate files
|
||||
rm creditcard_header.csv creditcard_shuf_no_header.csv creditcard_train_no_header.csv creditcard_test_no_header.csv
|
||||
|
||||
# Create a directory for the data
|
||||
mkdir -p data
|
||||
# Move the datasets to the data directory
|
||||
mv creditcard_shuf.csv creditcard_train.csv creditcard_test.csv data/
|
@ -1,5 +0,0 @@
|
||||
outs:
|
||||
- md5: e90efcb83d69faf99fcab8b0255024de
|
||||
size: 150828752
|
||||
hash: md5
|
||||
path: creditcard.csv
|
@ -1,5 +0,0 @@
|
||||
outs:
|
||||
- md5: bf8e9842731ab6f9b8ab51e1a6741f8b
|
||||
size: 69155672
|
||||
hash: md5
|
||||
path: creditcardfraud.zip
|
12
dataset-stats.sh
Normal file
12
dataset-stats.sh
Normal file
@ -0,0 +1,12 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Count the number of lines in the original dataset
|
||||
wc -l < data/creditcard_shuf.csv > stats.txt
|
||||
# Count the number of lines in the training and testing datasets
|
||||
wc -l < data/creditcard_train.csv > stats_train.txt
|
||||
wc -l < data/creditcard_test.csv > stats_test.txt
|
||||
|
||||
# Create a directory for the statistics
|
||||
mkdir -p stats_data
|
||||
# Move the statistics to the stats directory
|
||||
mv stats.txt stats_train.txt stats_test.txt stats_data/
|
94
dvc.lock
94
dvc.lock
@ -1,94 +0,0 @@
|
||||
schema: '2.0'
|
||||
stages:
|
||||
prepare_data:
|
||||
cmd: python ./create-dataset.py
|
||||
deps:
|
||||
- path: create-dataset.py
|
||||
hash: md5
|
||||
md5: 0903460139f5b57b9759f4de37b2d5e4
|
||||
size: 1531
|
||||
- path: creditcard.csv
|
||||
hash: md5
|
||||
md5: e90efcb83d69faf99fcab8b0255024de
|
||||
size: 150828752
|
||||
outs:
|
||||
- path: data/X_test.csv
|
||||
hash: md5
|
||||
md5: 46ff52696af9a4c06f6b25639525dda6
|
||||
size: 30947960
|
||||
- path: data/X_train.csv
|
||||
hash: md5
|
||||
md5: 7505524c54858300bbd92094092a6c39
|
||||
size: 92838653
|
||||
- path: data/X_val.csv
|
||||
hash: md5
|
||||
md5: 4d078882cc1898640ddaf4ad9117f543
|
||||
size: 30946540
|
||||
- path: data/creditcard.csv
|
||||
hash: md5
|
||||
md5: 4b81435690147d1e624a8b06c5520629
|
||||
size: 155302541
|
||||
- path: data/y_test.csv
|
||||
hash: md5
|
||||
md5: a6bc4827feae19934c4021d1f10f5963
|
||||
size: 170893
|
||||
- path: data/y_train.csv
|
||||
hash: md5
|
||||
md5: 8112a5cf4faac882c421bcb7e3d42044
|
||||
size: 512656
|
||||
- path: data/y_val.csv
|
||||
hash: md5
|
||||
md5: 1155f648650986d8866eba603b86560c
|
||||
size: 170893
|
||||
train_model:
|
||||
cmd: python ./train_model.py
|
||||
deps:
|
||||
- path: data/X_train.csv
|
||||
hash: md5
|
||||
md5: 7505524c54858300bbd92094092a6c39
|
||||
size: 92838653
|
||||
- path: data/X_val.csv
|
||||
hash: md5
|
||||
md5: 4d078882cc1898640ddaf4ad9117f543
|
||||
size: 30946540
|
||||
- path: data/y_train.csv
|
||||
hash: md5
|
||||
md5: 8112a5cf4faac882c421bcb7e3d42044
|
||||
size: 512656
|
||||
- path: data/y_val.csv
|
||||
hash: md5
|
||||
md5: 1155f648650986d8866eba603b86560c
|
||||
size: 170893
|
||||
- path: train_model.py
|
||||
hash: md5
|
||||
md5: 00b8bac043f4d7a56dec95f2f1bb1b49
|
||||
size: 1540
|
||||
outs:
|
||||
- path: model/model.keras
|
||||
hash: md5
|
||||
md5: 1d1df55ad26a8c0689efa4a86a86c217
|
||||
size: 1476738
|
||||
evaluate_model:
|
||||
cmd: python ./predict.py
|
||||
deps:
|
||||
- path: data/X_test.csv
|
||||
hash: md5
|
||||
md5: 46ff52696af9a4c06f6b25639525dda6
|
||||
size: 30947960
|
||||
- path: data/y_test.csv
|
||||
hash: md5
|
||||
md5: a6bc4827feae19934c4021d1f10f5963
|
||||
size: 170893
|
||||
- path: model/model.keras
|
||||
hash: md5
|
||||
md5: 1d1df55ad26a8c0689efa4a86a86c217
|
||||
size: 1476738
|
||||
- path: predict.py
|
||||
hash: md5
|
||||
md5: a61388aabf381779b38e2f32a4d0df7b
|
||||
size: 660
|
||||
outs:
|
||||
- path: data/y_pred.csv
|
||||
hash: md5
|
||||
md5: be150c2fbf1914102b479edbe0a4cf43
|
||||
size: 1481012
|
35
dvc.yaml
35
dvc.yaml
@ -1,35 +0,0 @@
|
||||
stages:
|
||||
prepare_data:
|
||||
cmd: python ./create-dataset.py
|
||||
deps:
|
||||
- create-dataset.py
|
||||
- creditcard.csv
|
||||
outs:
|
||||
- data/creditcard.csv
|
||||
- data/X_train.csv
|
||||
- data/X_val.csv
|
||||
- data/X_test.csv
|
||||
- data/y_train.csv
|
||||
- data/y_val.csv
|
||||
- data/y_test.csv
|
||||
|
||||
train_model:
|
||||
cmd: python ./train_model.py
|
||||
deps:
|
||||
- train_model.py
|
||||
- data/X_train.csv
|
||||
- data/X_val.csv
|
||||
- data/y_train.csv
|
||||
- data/y_val.csv
|
||||
outs:
|
||||
- model/model.keras
|
||||
|
||||
evaluate_model:
|
||||
cmd: python ./predict.py
|
||||
deps:
|
||||
- predict.py
|
||||
- model/model.keras
|
||||
- data/X_test.csv
|
||||
- data/y_test.csv
|
||||
outs:
|
||||
- data/y_pred.csv
|
BIN
environment.yml
BIN
environment.yml
Binary file not shown.
@ -1,5 +0,0 @@
|
||||
{
|
||||
"epochs": 5,
|
||||
"learning_rate": 0.001,
|
||||
"seed": 7929899
|
||||
}
|
File diff suppressed because one or more lines are too long
@ -1,8 +0,0 @@
|
||||
{
|
||||
"metrics": [
|
||||
{
|
||||
"id": "665b3cd5c1ae3ab5cc15d3d9",
|
||||
"name": "accuracy"
|
||||
}
|
||||
]
|
||||
}
|
@ -1,13 +0,0 @@
|
||||
{
|
||||
"accuracy": {
|
||||
"steps": [
|
||||
0
|
||||
],
|
||||
"timestamps": [
|
||||
"2024-06-01T15:23:02.056704"
|
||||
],
|
||||
"values": [
|
||||
0.8217821782178217
|
||||
]
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,102 +0,0 @@
|
||||
{
|
||||
"artifacts": [
|
||||
"model.keras"
|
||||
],
|
||||
"command": "main",
|
||||
"experiment": {
|
||||
"base_dir": "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\sacred",
|
||||
"dependencies": [
|
||||
"keras==3.1.1",
|
||||
"numpy==1.26.3",
|
||||
"sacred==0.8.5",
|
||||
"scikit-learn==1.4.1.post1"
|
||||
],
|
||||
"mainfile": "sacred_train_evaluation.py",
|
||||
"name": "464913",
|
||||
"repositories": [
|
||||
{
|
||||
"commit": "cf648b6c128aae353730cdad0c6972df3438c4cd",
|
||||
"dirty": true,
|
||||
"url": "https://git.wmi.amu.edu.pl/s464913/ium_464913.git"
|
||||
}
|
||||
],
|
||||
"sources": [
|
||||
[
|
||||
"sacred_train_evaluation.py",
|
||||
"_sources\\sacred_train_evaluation_69085ae4bcdbd49594dbaeed1ddb2e93.py"
|
||||
]
|
||||
]
|
||||
},
|
||||
"heartbeat": "2024-06-01T15:23:02.067455",
|
||||
"host": {
|
||||
"ENV": {},
|
||||
"cpu": "AMD Ryzen 5 5500U with Radeon Graphics",
|
||||
"hostname": "Dell",
|
||||
"os": [
|
||||
"Windows",
|
||||
"Windows-11-10.0.22631-SP0"
|
||||
],
|
||||
"python_version": "3.12.3"
|
||||
},
|
||||
"meta": {
|
||||
"command": "main",
|
||||
"config_updates": {},
|
||||
"named_configs": [],
|
||||
"options": {
|
||||
"--beat-interval": null,
|
||||
"--capture": null,
|
||||
"--comment": null,
|
||||
"--debug": false,
|
||||
"--enforce_clean": false,
|
||||
"--file_storage": null,
|
||||
"--force": false,
|
||||
"--help": false,
|
||||
"--id": null,
|
||||
"--loglevel": null,
|
||||
"--mongo_db": null,
|
||||
"--name": null,
|
||||
"--pdb": false,
|
||||
"--print-config": false,
|
||||
"--priority": null,
|
||||
"--queue": false,
|
||||
"--s3": null,
|
||||
"--sql": null,
|
||||
"--tiny_db": null,
|
||||
"--unobserved": false,
|
||||
"COMMAND": null,
|
||||
"UPDATE": [],
|
||||
"help": false,
|
||||
"with": false
|
||||
}
|
||||
},
|
||||
"resources": [
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_train.csv",
|
||||
"experiments\\_resources\\X_train_7505524c54858300bbd92094092a6c39.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_val.csv",
|
||||
"experiments\\_resources\\X_val_4d078882cc1898640ddaf4ad9117f543.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_train.csv",
|
||||
"experiments\\_resources\\y_train_8112a5cf4faac882c421bcb7e3d42044.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_val.csv",
|
||||
"experiments\\_resources\\y_val_1155f648650986d8866eba603b86560c.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_test.csv",
|
||||
"experiments\\_resources\\X_test_46ff52696af9a4c06f6b25639525dda6.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_test.csv",
|
||||
"experiments\\_resources\\y_test_a6bc4827feae19934c4021d1f10f5963.csv"
|
||||
]
|
||||
],
|
||||
"result": null,
|
||||
"start_time": "2024-06-01T15:20:05.925811",
|
||||
"status": "COMPLETED",
|
||||
"stop_time": "2024-06-01T15:23:02.065167"
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,100 +0,0 @@
|
||||
import os
|
||||
|
||||
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
|
||||
|
||||
from keras.models import Sequential
|
||||
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
|
||||
from keras.optimizers import Adam
|
||||
import pandas as pd
|
||||
from sklearn.metrics import confusion_matrix
|
||||
from sacred import Experiment
|
||||
from sacred.observers import FileStorageObserver, MongoObserver
|
||||
|
||||
ex = Experiment("464913")
|
||||
|
||||
ex.observers.append(
|
||||
MongoObserver.create(
|
||||
url="mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017",
|
||||
db_name="sacred",
|
||||
)
|
||||
)
|
||||
ex.observers.append(FileStorageObserver("experiments"))
|
||||
|
||||
|
||||
@ex.config
|
||||
def my_config():
|
||||
learning_rate = 0.001
|
||||
epochs = 5
|
||||
|
||||
|
||||
@ex.capture
|
||||
def train_and_evaluate(_run, learning_rate, epochs):
|
||||
|
||||
X_train = _run.open_resource("data/X_train.csv")
|
||||
X_val = _run.open_resource("data/X_val.csv")
|
||||
y_train = _run.open_resource("data/y_train.csv")
|
||||
y_val = _run.open_resource("data/y_val.csv")
|
||||
|
||||
X_train = pd.read_csv(X_train)
|
||||
X_val = pd.read_csv(X_val)
|
||||
y_train = pd.read_csv(y_train)
|
||||
y_val = pd.read_csv(y_val)
|
||||
|
||||
X_train = X_train.to_numpy()
|
||||
X_val = X_val.to_numpy()
|
||||
y_train = y_train.to_numpy()
|
||||
y_val = y_val.to_numpy()
|
||||
|
||||
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
|
||||
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
|
||||
|
||||
model = Sequential(
|
||||
[
|
||||
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
|
||||
BatchNormalization(),
|
||||
Dropout(0.2),
|
||||
Conv1D(64, 2, activation="relu"),
|
||||
BatchNormalization(),
|
||||
Dropout(0.5),
|
||||
Flatten(),
|
||||
Dense(64, activation="relu"),
|
||||
Dropout(0.5),
|
||||
Dense(1, activation="sigmoid"),
|
||||
]
|
||||
)
|
||||
|
||||
model.compile(
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss="binary_crossentropy",
|
||||
metrics=["accuracy"],
|
||||
)
|
||||
|
||||
model.fit(
|
||||
X_train,
|
||||
y_train,
|
||||
validation_data=(X_val, y_val),
|
||||
epochs=epochs,
|
||||
verbose=1,
|
||||
)
|
||||
|
||||
model.save("sacred/model.keras")
|
||||
_run.add_artifact("sacred/model.keras")
|
||||
|
||||
X_test = _run.open_resource("data/X_test.csv")
|
||||
y_test = _run.open_resource("data/y_test.csv")
|
||||
|
||||
X_test = pd.read_csv(X_test)
|
||||
y_test = pd.read_csv(y_test)
|
||||
|
||||
y_pred = model.predict(X_test)
|
||||
y_pred = y_pred >= 0.5
|
||||
|
||||
cm = confusion_matrix(y_test, y_pred)
|
||||
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
|
||||
|
||||
_run.log_scalar("accuracy", accuracy)
|
||||
|
||||
|
||||
@ex.automain
|
||||
def main(learning_rate, epochs):
|
||||
train_and_evaluate()
|
19
metrics.py
Normal file
19
metrics.py
Normal file
@ -0,0 +1,19 @@
|
||||
from sklearn.metrics import confusion_matrix
|
||||
import pandas as pd
|
||||
import sys
|
||||
|
||||
|
||||
def main():
|
||||
y_test = pd.read_csv("data/y_test.csv")
|
||||
y_pred = pd.read_csv("evaluation/y_pred.csv", header=None)
|
||||
build_number = sys.argv[1]
|
||||
|
||||
cm = confusion_matrix(y_test, y_pred)
|
||||
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
|
||||
|
||||
with open(r"evaluation/metrics.txt", "a") as f:
|
||||
f.write(f"{accuracy},{build_number}\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,10 +0,0 @@
|
||||
name: Credit card fraud MLFlow - s464913
|
||||
|
||||
conda_env: conda.yaml
|
||||
|
||||
entry_points:
|
||||
main:
|
||||
parameters:
|
||||
learning_rate: { type: float, default: 0.001 }
|
||||
epochs: { type: int, default: 5 }
|
||||
command: 'python mlflow_train_evaluation.py {learning_rate} {epochs}'
|
@ -1,11 +0,0 @@
|
||||
name: Credit card fraud MLFlow - s464913
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- python=3.12
|
||||
- pip
|
||||
- pip:
|
||||
- mlflow
|
||||
- tensorflow
|
||||
- pandas
|
||||
- scikit-learn
|
@ -1,82 +0,0 @@
|
||||
import os
|
||||
|
||||
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
|
||||
|
||||
from keras.models import Sequential
|
||||
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
|
||||
from keras.optimizers import Adam
|
||||
import pandas as pd
|
||||
import sys
|
||||
import mlflow
|
||||
from sklearn.metrics import confusion_matrix
|
||||
|
||||
mlflow.set_tracking_uri("http://localhost:5000")
|
||||
|
||||
|
||||
def main():
|
||||
X_train = pd.read_csv("../data/X_train.csv")
|
||||
X_val = pd.read_csv("../data/X_val.csv")
|
||||
y_train = pd.read_csv("../data/y_train.csv")
|
||||
y_val = pd.read_csv("../data/y_val.csv")
|
||||
|
||||
X_train = X_train.to_numpy()
|
||||
X_val = X_val.to_numpy()
|
||||
y_train = y_train.to_numpy()
|
||||
y_val = y_val.to_numpy()
|
||||
|
||||
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
|
||||
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
|
||||
|
||||
learning_rate = float(sys.argv[1])
|
||||
epochs = int(sys.argv[2])
|
||||
|
||||
with mlflow.start_run() as run:
|
||||
print("MLflow run experiment_id: {0}".format(run.info.experiment_id))
|
||||
print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri))
|
||||
|
||||
model = Sequential(
|
||||
[
|
||||
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
|
||||
BatchNormalization(),
|
||||
Dropout(0.2),
|
||||
Conv1D(64, 2, activation="relu"),
|
||||
BatchNormalization(),
|
||||
Dropout(0.5),
|
||||
Flatten(),
|
||||
Dense(64, activation="relu"),
|
||||
Dropout(0.5),
|
||||
Dense(1, activation="sigmoid"),
|
||||
]
|
||||
)
|
||||
|
||||
model.compile(
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss="binary_crossentropy",
|
||||
metrics=["accuracy"],
|
||||
)
|
||||
|
||||
model.fit(
|
||||
X_train,
|
||||
y_train,
|
||||
validation_data=(X_val, y_val),
|
||||
epochs=epochs,
|
||||
verbose=1,
|
||||
)
|
||||
|
||||
mlflow.log_param("learning_rate", learning_rate)
|
||||
mlflow.log_param("epochs", epochs)
|
||||
|
||||
X_test = pd.read_csv("../data/X_test.csv")
|
||||
y_test = pd.read_csv("../data/y_test.csv")
|
||||
|
||||
y_pred = model.predict(X_test)
|
||||
y_pred = y_pred >= 0.5
|
||||
|
||||
cm = confusion_matrix(y_test, y_pred)
|
||||
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
|
||||
|
||||
mlflow.log_metric("accuracy", accuracy)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,15 +0,0 @@
|
||||
artifact_uri: mlflow-artifacts:/0/3c46f6c4b15743faa0119c4b9b804825/artifacts
|
||||
end_time: 1715508788768
|
||||
entry_point_name: ''
|
||||
experiment_id: '0'
|
||||
lifecycle_stage: active
|
||||
run_id: 3c46f6c4b15743faa0119c4b9b804825
|
||||
run_name: dapper-hog-137
|
||||
run_uuid: 3c46f6c4b15743faa0119c4b9b804825
|
||||
source_name: ''
|
||||
source_type: 4
|
||||
source_version: ''
|
||||
start_time: 1715508594003
|
||||
status: 3
|
||||
tags: []
|
||||
user_id: skype
|
@ -1 +0,0 @@
|
||||
1715508787882 0.8217821782178217 0
|
@ -1 +0,0 @@
|
||||
5
|
@ -1 +0,0 @@
|
||||
0.001
|
@ -1 +0,0 @@
|
||||
https://git.wmi.amu.edu.pl/s464913/ium_464913.git
|
@ -1 +0,0 @@
|
||||
local
|
@ -1 +0,0 @@
|
||||
main
|
@ -1 +0,0 @@
|
||||
conda
|
@ -1 +0,0 @@
|
||||
dapper-hog-137
|
@ -1 +0,0 @@
|
||||
a6be9a729562db8c47bc5fec88ad8f5216af0cf3
|
@ -1 +0,0 @@
|
||||
https://git.wmi.amu.edu.pl/s464913/ium_464913.git
|
@ -1 +0,0 @@
|
||||
file://C:\Users\skype\source\repos\Inżynieria Uczenia Maszynowego#\mlflow
|
@ -1 +0,0 @@
|
||||
PROJECT
|
@ -1 +0,0 @@
|
||||
skype
|
@ -1,15 +0,0 @@
|
||||
artifact_uri: mlflow-artifacts:/0/706dcf453a0842aaa48647e15521bb7b/artifacts
|
||||
end_time: 1715508573447
|
||||
entry_point_name: ''
|
||||
experiment_id: '0'
|
||||
lifecycle_stage: active
|
||||
run_id: 706dcf453a0842aaa48647e15521bb7b
|
||||
run_name: loud-whale-40
|
||||
run_uuid: 706dcf453a0842aaa48647e15521bb7b
|
||||
source_name: ''
|
||||
source_type: 4
|
||||
source_version: ''
|
||||
start_time: 1715508159092
|
||||
status: 3
|
||||
tags: []
|
||||
user_id: skype
|
@ -1 +0,0 @@
|
||||
1715508572612 0.7524752475247525 0
|
@ -1 +0,0 @@
|
||||
7
|
@ -1 +0,0 @@
|
||||
0.001
|
@ -1 +0,0 @@
|
||||
https://git.wmi.amu.edu.pl/s464913/ium_464913.git
|
@ -1 +0,0 @@
|
||||
local
|
@ -1 +0,0 @@
|
||||
main
|
@ -1 +0,0 @@
|
||||
conda
|
@ -1 +0,0 @@
|
||||
loud-whale-40
|
@ -1 +0,0 @@
|
||||
a6be9a729562db8c47bc5fec88ad8f5216af0cf3
|
@ -1 +0,0 @@
|
||||
https://git.wmi.amu.edu.pl/s464913/ium_464913.git
|
@ -1 +0,0 @@
|
||||
file://C:\Users\skype\source\repos\Inżynieria Uczenia Maszynowego#\mlflow
|
@ -1 +0,0 @@
|
||||
PROJECT
|
@ -1 +0,0 @@
|
||||
skype
|
@ -1,6 +0,0 @@
|
||||
artifact_location: mlflow-artifacts:/0
|
||||
creation_time: 1715508147231
|
||||
experiment_id: '0'
|
||||
last_update_time: 1715508147231
|
||||
lifecycle_stage: active
|
||||
name: Default
|
24
plot.py
Normal file
24
plot.py
Normal file
@ -0,0 +1,24 @@
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
def main():
|
||||
accuracy = []
|
||||
build_numbers = []
|
||||
|
||||
with open("evaluation/metrics.txt") as f:
|
||||
for line in f:
|
||||
accuracy.append(float(line.split(",")[0]))
|
||||
build_numbers.append(int(line.split(",")[1]))
|
||||
|
||||
plt.plot(build_numbers, accuracy)
|
||||
plt.xlabel("Build Number")
|
||||
plt.ylabel("Accuracy")
|
||||
plt.title("Accuracy of the model over time")
|
||||
plt.xticks(range(min(build_numbers), max(build_numbers) + 1))
|
||||
plt.show()
|
||||
|
||||
plt.savefig("evaluation/accuracy.png")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
10
predict.py
10
predict.py
@ -4,24 +4,18 @@ os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
|
||||
|
||||
from keras.models import load_model
|
||||
import pandas as pd
|
||||
from sklearn.metrics import confusion_matrix
|
||||
import numpy as np
|
||||
|
||||
|
||||
def main():
|
||||
model = load_model("model/model.keras")
|
||||
X_test = pd.read_csv("data/X_test.csv")
|
||||
y_test = pd.read_csv("data/y_test.csv")
|
||||
|
||||
y_pred = model.predict(X_test)
|
||||
y_pred = y_pred >= 0.5
|
||||
np.savetxt("data/y_pred.csv", y_pred, delimiter=",")
|
||||
|
||||
cm = confusion_matrix(y_test, y_pred)
|
||||
print(
|
||||
"Recall metric in the testing dataset: ",
|
||||
cm[1, 1] / (cm[1, 0] + cm[1, 1]),
|
||||
)
|
||||
os.makedirs("evaluation", exist_ok=True)
|
||||
np.savetxt("evaluation/y_pred.csv", y_pred, delimiter=",")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
Binary file not shown.
@ -1,100 +0,0 @@
|
||||
import os
|
||||
|
||||
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
|
||||
|
||||
from keras.models import Sequential
|
||||
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
|
||||
from keras.optimizers import Adam
|
||||
import pandas as pd
|
||||
from sklearn.metrics import confusion_matrix
|
||||
from sacred import Experiment
|
||||
from sacred.observers import FileStorageObserver, MongoObserver
|
||||
|
||||
ex = Experiment("464913")
|
||||
|
||||
ex.observers.append(
|
||||
MongoObserver.create(
|
||||
url="mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017",
|
||||
db_name="sacred",
|
||||
)
|
||||
)
|
||||
ex.observers.append(FileStorageObserver("experiments"))
|
||||
|
||||
|
||||
@ex.config
|
||||
def my_config():
|
||||
learning_rate = 0.001
|
||||
epochs = 5
|
||||
|
||||
|
||||
@ex.capture
|
||||
def train_and_evaluate(_run, learning_rate, epochs):
|
||||
|
||||
X_train = _run.open_resource("data/X_train.csv")
|
||||
X_val = _run.open_resource("data/X_val.csv")
|
||||
y_train = _run.open_resource("data/y_train.csv")
|
||||
y_val = _run.open_resource("data/y_val.csv")
|
||||
|
||||
X_train = pd.read_csv(X_train)
|
||||
X_val = pd.read_csv(X_val)
|
||||
y_train = pd.read_csv(y_train)
|
||||
y_val = pd.read_csv(y_val)
|
||||
|
||||
X_train = X_train.to_numpy()
|
||||
X_val = X_val.to_numpy()
|
||||
y_train = y_train.to_numpy()
|
||||
y_val = y_val.to_numpy()
|
||||
|
||||
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
|
||||
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
|
||||
|
||||
model = Sequential(
|
||||
[
|
||||
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
|
||||
BatchNormalization(),
|
||||
Dropout(0.2),
|
||||
Conv1D(64, 2, activation="relu"),
|
||||
BatchNormalization(),
|
||||
Dropout(0.5),
|
||||
Flatten(),
|
||||
Dense(64, activation="relu"),
|
||||
Dropout(0.5),
|
||||
Dense(1, activation="sigmoid"),
|
||||
]
|
||||
)
|
||||
|
||||
model.compile(
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss="binary_crossentropy",
|
||||
metrics=["accuracy"],
|
||||
)
|
||||
|
||||
model.fit(
|
||||
X_train,
|
||||
y_train,
|
||||
validation_data=(X_val, y_val),
|
||||
epochs=epochs,
|
||||
verbose=1,
|
||||
)
|
||||
|
||||
model.save("sacred/model.keras")
|
||||
_run.add_artifact("sacred/model.keras")
|
||||
|
||||
X_test = _run.open_resource("data/X_test.csv")
|
||||
y_test = _run.open_resource("data/y_test.csv")
|
||||
|
||||
X_test = pd.read_csv(X_test)
|
||||
y_test = pd.read_csv(y_test)
|
||||
|
||||
y_pred = model.predict(X_test)
|
||||
y_pred = y_pred >= 0.5
|
||||
|
||||
cm = confusion_matrix(y_test, y_pred)
|
||||
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
|
||||
|
||||
_run.log_scalar("accuracy", accuracy)
|
||||
|
||||
|
||||
@ex.automain
|
||||
def main(learning_rate, epochs):
|
||||
train_and_evaluate()
|
Loading…
Reference in New Issue
Block a user