Compare commits
7 Commits
Author | SHA1 | Date | |
---|---|---|---|
29602e30a6 | |||
3d80335ede | |||
f6c7f5981e | |||
c0b07aaac4 | |||
979785f5b7 | |||
795b91c695 | |||
91508718a0 |
3
.dvc/.gitignore
vendored
3
.dvc/.gitignore
vendored
@ -1,3 +0,0 @@
|
||||
/config.local
|
||||
/tmp
|
||||
/cache
|
@ -1,4 +0,0 @@
|
||||
[core]
|
||||
remote = ium_ssh_remote
|
||||
['remote "ium_ssh_remote"']
|
||||
url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
|
@ -1,3 +0,0 @@
|
||||
# Add patterns of files dvc should ignore, which could improve
|
||||
# the performance. Learn more at
|
||||
# https://dvc.org/doc/user-guide/dvcignore
|
6
.gitignore
vendored
6
.gitignore
vendored
@ -1,5 +1,5 @@
|
||||
creditcardfraud.zip
|
||||
creditcard.csv
|
||||
data
|
||||
model/model.keras
|
||||
stats_data
|
||||
/creditcard.csv
|
||||
/creditcardfraud.zip
|
||||
stats_data
|
@ -1,5 +1,5 @@
|
||||
FROM ubuntu:latest
|
||||
|
||||
RUN apt update && apt install -y python3-pip git
|
||||
RUN apt update && apt install -y python3-pip
|
||||
|
||||
RUN pip install pandas numpy scikit-learn tensorflow sacred pymongo --break-system-packages
|
||||
RUN pip install pandas numpy scikit-learn tensorflow --break-system-packages
|
BIN
IUM_12.pptx
BIN
IUM_12.pptx
Binary file not shown.
70
Jenkinsfile
vendored
70
Jenkinsfile
vendored
@ -1,73 +1,45 @@
|
||||
pipeline {
|
||||
agent any
|
||||
agent {
|
||||
dockerfile true
|
||||
}
|
||||
|
||||
triggers {
|
||||
upstream(upstreamProjects: 'z-s464913-create-dataset', threshold: hudson.model.Result.SUCCESS)
|
||||
}
|
||||
|
||||
parameters {
|
||||
string (
|
||||
defaultValue: 'vskyper',
|
||||
description: 'Kaggle username',
|
||||
name: 'KAGGLE_USERNAME',
|
||||
trim: false
|
||||
)
|
||||
password (
|
||||
defaultValue: '',
|
||||
description: 'Kaggle API key',
|
||||
name: 'KAGGLE_KEY',
|
||||
buildSelector(
|
||||
defaultSelector: lastSuccessful(),
|
||||
description: 'Which build to use for copying artifacts',
|
||||
name: 'BUILD_SELECTOR'
|
||||
)
|
||||
string(name: 'LEARNING_RATE', defaultValue: '0.001', description: 'Learning rate')
|
||||
string(name: 'EPOCHS', defaultValue: '5', description: 'Number of epochs')
|
||||
}
|
||||
|
||||
stages {
|
||||
stage('Clone Repository') {
|
||||
steps {
|
||||
git branch: 'main', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git'
|
||||
git branch: 'training', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git'
|
||||
}
|
||||
}
|
||||
|
||||
stage('Download dataset') {
|
||||
stage('Copy Artifacts') {
|
||||
steps {
|
||||
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
|
||||
sh 'pip install kaggle'
|
||||
sh 'kaggle datasets download -d mlg-ulb/creditcardfraud'
|
||||
sh 'unzip -o creditcardfraud.zip'
|
||||
sh 'rm creditcardfraud.zip'
|
||||
}
|
||||
copyArtifacts filter: 'data/*', projectName: 'z-s464913-create-dataset', selector: buildParameter('BUILD_SELECTOR')
|
||||
}
|
||||
}
|
||||
|
||||
stage('Run create-dataset script') {
|
||||
agent {
|
||||
dockerfile {
|
||||
reuseNode true
|
||||
}
|
||||
}
|
||||
|
||||
stage('Run train_model script') {
|
||||
steps {
|
||||
sh 'chmod +x create-dataset.py'
|
||||
sh 'python3 ./create-dataset.py'
|
||||
sh 'chmod +x train_model.py'
|
||||
sh "python3 ./train_model.py ${params.LEARNING_RATE} ${params.EPOCHS}"
|
||||
}
|
||||
}
|
||||
|
||||
stage('Archive Artifacts from create-dataset') {
|
||||
stage('Archive Artifacts') {
|
||||
steps {
|
||||
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
|
||||
}
|
||||
}
|
||||
|
||||
stage('Experiments') {
|
||||
agent {
|
||||
dockerfile {
|
||||
reuseNode true
|
||||
}
|
||||
}
|
||||
|
||||
steps {
|
||||
sh 'chmod +x sacred/sacred_train_evaluation.py'
|
||||
sh 'python3 sacred/sacred_train_evaluation.py'
|
||||
}
|
||||
}
|
||||
|
||||
stage('Archive Artifacts from Experiments') {
|
||||
steps {
|
||||
archiveArtifacts artifacts: 'experiments/**/*.*', onlyIfSuccessful: true
|
||||
archiveArtifacts artifacts: 'model/*', onlyIfSuccessful: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
42
create-dataset.sh
Normal file
42
create-dataset.sh
Normal file
@ -0,0 +1,42 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Install the Kaggle API
|
||||
pip install kaggle
|
||||
# Download the dataset from Kaggle
|
||||
kaggle datasets download -d mlg-ulb/creditcardfraud
|
||||
|
||||
# Unzip the dataset
|
||||
unzip -o creditcardfraud.zip
|
||||
# Remove the zip file
|
||||
rm creditcardfraud.zip
|
||||
|
||||
# Create a header file
|
||||
head -n 1 creditcard.csv > creditcard_header.csv
|
||||
# Remove the header from the dataset
|
||||
tail -n +2 creditcard.csv > creditcard_no_header.csv
|
||||
# Remove the original dataset
|
||||
rm creditcard.csv
|
||||
|
||||
# Shuffle the dataset
|
||||
shuf creditcard_no_header.csv > creditcard_shuf_no_header.csv
|
||||
# Remove the unshuffled dataset
|
||||
rm creditcard_no_header.csv
|
||||
|
||||
# Add the header back to the shuffled dataset
|
||||
cat creditcard_header.csv creditcard_shuf_no_header.csv > creditcard_shuf.csv
|
||||
|
||||
# Split the dataset into training and testing
|
||||
tail -n +10001 creditcard_shuf_no_header.csv > creditcard_train_no_header.csv
|
||||
head -n 10000 creditcard_shuf_no_header.csv > creditcard_test_no_header.csv
|
||||
|
||||
# Add the header back to the training and testing datasets
|
||||
cat creditcard_header.csv creditcard_train_no_header.csv > creditcard_train.csv
|
||||
cat creditcard_header.csv creditcard_test_no_header.csv > creditcard_test.csv
|
||||
|
||||
# Remove the intermediate files
|
||||
rm creditcard_header.csv creditcard_shuf_no_header.csv creditcard_train_no_header.csv creditcard_test_no_header.csv
|
||||
|
||||
# Create a directory for the data
|
||||
mkdir -p data
|
||||
# Move the datasets to the data directory
|
||||
mv creditcard_shuf.csv creditcard_train.csv creditcard_test.csv data/
|
@ -1,5 +0,0 @@
|
||||
outs:
|
||||
- md5: e90efcb83d69faf99fcab8b0255024de
|
||||
size: 150828752
|
||||
hash: md5
|
||||
path: creditcard.csv
|
@ -1,5 +0,0 @@
|
||||
outs:
|
||||
- md5: bf8e9842731ab6f9b8ab51e1a6741f8b
|
||||
size: 69155672
|
||||
hash: md5
|
||||
path: creditcardfraud.zip
|
12
dataset-stats.sh
Normal file
12
dataset-stats.sh
Normal file
@ -0,0 +1,12 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Count the number of lines in the original dataset
|
||||
wc -l < data/creditcard_shuf.csv > stats.txt
|
||||
# Count the number of lines in the training and testing datasets
|
||||
wc -l < data/creditcard_train.csv > stats_train.txt
|
||||
wc -l < data/creditcard_test.csv > stats_test.txt
|
||||
|
||||
# Create a directory for the statistics
|
||||
mkdir -p stats_data
|
||||
# Move the statistics to the stats directory
|
||||
mv stats.txt stats_train.txt stats_test.txt stats_data/
|
94
dvc.lock
94
dvc.lock
@ -1,94 +0,0 @@
|
||||
schema: '2.0'
|
||||
stages:
|
||||
prepare_data:
|
||||
cmd: python ./create-dataset.py
|
||||
deps:
|
||||
- path: create-dataset.py
|
||||
hash: md5
|
||||
md5: 0903460139f5b57b9759f4de37b2d5e4
|
||||
size: 1531
|
||||
- path: creditcard.csv
|
||||
hash: md5
|
||||
md5: e90efcb83d69faf99fcab8b0255024de
|
||||
size: 150828752
|
||||
outs:
|
||||
- path: data/X_test.csv
|
||||
hash: md5
|
||||
md5: 46ff52696af9a4c06f6b25639525dda6
|
||||
size: 30947960
|
||||
- path: data/X_train.csv
|
||||
hash: md5
|
||||
md5: 7505524c54858300bbd92094092a6c39
|
||||
size: 92838653
|
||||
- path: data/X_val.csv
|
||||
hash: md5
|
||||
md5: 4d078882cc1898640ddaf4ad9117f543
|
||||
size: 30946540
|
||||
- path: data/creditcard.csv
|
||||
hash: md5
|
||||
md5: 4b81435690147d1e624a8b06c5520629
|
||||
size: 155302541
|
||||
- path: data/y_test.csv
|
||||
hash: md5
|
||||
md5: a6bc4827feae19934c4021d1f10f5963
|
||||
size: 170893
|
||||
- path: data/y_train.csv
|
||||
hash: md5
|
||||
md5: 8112a5cf4faac882c421bcb7e3d42044
|
||||
size: 512656
|
||||
- path: data/y_val.csv
|
||||
hash: md5
|
||||
md5: 1155f648650986d8866eba603b86560c
|
||||
size: 170893
|
||||
train_model:
|
||||
cmd: python ./train_model.py
|
||||
deps:
|
||||
- path: data/X_train.csv
|
||||
hash: md5
|
||||
md5: 7505524c54858300bbd92094092a6c39
|
||||
size: 92838653
|
||||
- path: data/X_val.csv
|
||||
hash: md5
|
||||
md5: 4d078882cc1898640ddaf4ad9117f543
|
||||
size: 30946540
|
||||
- path: data/y_train.csv
|
||||
hash: md5
|
||||
md5: 8112a5cf4faac882c421bcb7e3d42044
|
||||
size: 512656
|
||||
- path: data/y_val.csv
|
||||
hash: md5
|
||||
md5: 1155f648650986d8866eba603b86560c
|
||||
size: 170893
|
||||
- path: train_model.py
|
||||
hash: md5
|
||||
md5: 00b8bac043f4d7a56dec95f2f1bb1b49
|
||||
size: 1540
|
||||
outs:
|
||||
- path: model/model.keras
|
||||
hash: md5
|
||||
md5: 1d1df55ad26a8c0689efa4a86a86c217
|
||||
size: 1476738
|
||||
evaluate_model:
|
||||
cmd: python ./predict.py
|
||||
deps:
|
||||
- path: data/X_test.csv
|
||||
hash: md5
|
||||
md5: 46ff52696af9a4c06f6b25639525dda6
|
||||
size: 30947960
|
||||
- path: data/y_test.csv
|
||||
hash: md5
|
||||
md5: a6bc4827feae19934c4021d1f10f5963
|
||||
size: 170893
|
||||
- path: model/model.keras
|
||||
hash: md5
|
||||
md5: 1d1df55ad26a8c0689efa4a86a86c217
|
||||
size: 1476738
|
||||
- path: predict.py
|
||||
hash: md5
|
||||
md5: a61388aabf381779b38e2f32a4d0df7b
|
||||
size: 660
|
||||
outs:
|
||||
- path: data/y_pred.csv
|
||||
hash: md5
|
||||
md5: be150c2fbf1914102b479edbe0a4cf43
|
||||
size: 1481012
|
35
dvc.yaml
35
dvc.yaml
@ -1,35 +0,0 @@
|
||||
stages:
|
||||
prepare_data:
|
||||
cmd: python ./create-dataset.py
|
||||
deps:
|
||||
- create-dataset.py
|
||||
- creditcard.csv
|
||||
outs:
|
||||
- data/creditcard.csv
|
||||
- data/X_train.csv
|
||||
- data/X_val.csv
|
||||
- data/X_test.csv
|
||||
- data/y_train.csv
|
||||
- data/y_val.csv
|
||||
- data/y_test.csv
|
||||
|
||||
train_model:
|
||||
cmd: python ./train_model.py
|
||||
deps:
|
||||
- train_model.py
|
||||
- data/X_train.csv
|
||||
- data/X_val.csv
|
||||
- data/y_train.csv
|
||||
- data/y_val.csv
|
||||
outs:
|
||||
- model/model.keras
|
||||
|
||||
evaluate_model:
|
||||
cmd: python ./predict.py
|
||||
deps:
|
||||
- predict.py
|
||||
- model/model.keras
|
||||
- data/X_test.csv
|
||||
- data/y_test.csv
|
||||
outs:
|
||||
- data/y_pred.csv
|
BIN
environment.yml
BIN
environment.yml
Binary file not shown.
@ -1,5 +0,0 @@
|
||||
{
|
||||
"epochs": 5,
|
||||
"learning_rate": 0.001,
|
||||
"seed": 7929899
|
||||
}
|
File diff suppressed because one or more lines are too long
@ -1,8 +0,0 @@
|
||||
{
|
||||
"metrics": [
|
||||
{
|
||||
"id": "665b3cd5c1ae3ab5cc15d3d9",
|
||||
"name": "accuracy"
|
||||
}
|
||||
]
|
||||
}
|
@ -1,13 +0,0 @@
|
||||
{
|
||||
"accuracy": {
|
||||
"steps": [
|
||||
0
|
||||
],
|
||||
"timestamps": [
|
||||
"2024-06-01T15:23:02.056704"
|
||||
],
|
||||
"values": [
|
||||
0.8217821782178217
|
||||
]
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,102 +0,0 @@
|
||||
{
|
||||
"artifacts": [
|
||||
"model.keras"
|
||||
],
|
||||
"command": "main",
|
||||
"experiment": {
|
||||
"base_dir": "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\sacred",
|
||||
"dependencies": [
|
||||
"keras==3.1.1",
|
||||
"numpy==1.26.3",
|
||||
"sacred==0.8.5",
|
||||
"scikit-learn==1.4.1.post1"
|
||||
],
|
||||
"mainfile": "sacred_train_evaluation.py",
|
||||
"name": "464913",
|
||||
"repositories": [
|
||||
{
|
||||
"commit": "cf648b6c128aae353730cdad0c6972df3438c4cd",
|
||||
"dirty": true,
|
||||
"url": "https://git.wmi.amu.edu.pl/s464913/ium_464913.git"
|
||||
}
|
||||
],
|
||||
"sources": [
|
||||
[
|
||||
"sacred_train_evaluation.py",
|
||||
"_sources\\sacred_train_evaluation_69085ae4bcdbd49594dbaeed1ddb2e93.py"
|
||||
]
|
||||
]
|
||||
},
|
||||
"heartbeat": "2024-06-01T15:23:02.067455",
|
||||
"host": {
|
||||
"ENV": {},
|
||||
"cpu": "AMD Ryzen 5 5500U with Radeon Graphics",
|
||||
"hostname": "Dell",
|
||||
"os": [
|
||||
"Windows",
|
||||
"Windows-11-10.0.22631-SP0"
|
||||
],
|
||||
"python_version": "3.12.3"
|
||||
},
|
||||
"meta": {
|
||||
"command": "main",
|
||||
"config_updates": {},
|
||||
"named_configs": [],
|
||||
"options": {
|
||||
"--beat-interval": null,
|
||||
"--capture": null,
|
||||
"--comment": null,
|
||||
"--debug": false,
|
||||
"--enforce_clean": false,
|
||||
"--file_storage": null,
|
||||
"--force": false,
|
||||
"--help": false,
|
||||
"--id": null,
|
||||
"--loglevel": null,
|
||||
"--mongo_db": null,
|
||||
"--name": null,
|
||||
"--pdb": false,
|
||||
"--print-config": false,
|
||||
"--priority": null,
|
||||
"--queue": false,
|
||||
"--s3": null,
|
||||
"--sql": null,
|
||||
"--tiny_db": null,
|
||||
"--unobserved": false,
|
||||
"COMMAND": null,
|
||||
"UPDATE": [],
|
||||
"help": false,
|
||||
"with": false
|
||||
}
|
||||
},
|
||||
"resources": [
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_train.csv",
|
||||
"experiments\\_resources\\X_train_7505524c54858300bbd92094092a6c39.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_val.csv",
|
||||
"experiments\\_resources\\X_val_4d078882cc1898640ddaf4ad9117f543.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_train.csv",
|
||||
"experiments\\_resources\\y_train_8112a5cf4faac882c421bcb7e3d42044.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_val.csv",
|
||||
"experiments\\_resources\\y_val_1155f648650986d8866eba603b86560c.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_test.csv",
|
||||
"experiments\\_resources\\X_test_46ff52696af9a4c06f6b25639525dda6.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_test.csv",
|
||||
"experiments\\_resources\\y_test_a6bc4827feae19934c4021d1f10f5963.csv"
|
||||
]
|
||||
],
|
||||
"result": null,
|
||||
"start_time": "2024-06-01T15:20:05.925811",
|
||||
"status": "COMPLETED",
|
||||
"stop_time": "2024-06-01T15:23:02.065167"
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,100 +0,0 @@
|
||||
import os
|
||||
|
||||
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
|
||||
|
||||
from keras.models import Sequential
|
||||
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
|
||||
from keras.optimizers import Adam
|
||||
import pandas as pd
|
||||
from sklearn.metrics import confusion_matrix
|
||||
from sacred import Experiment
|
||||
from sacred.observers import FileStorageObserver, MongoObserver
|
||||
|
||||
ex = Experiment("464913")
|
||||
|
||||
ex.observers.append(
|
||||
MongoObserver.create(
|
||||
url="mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017",
|
||||
db_name="sacred",
|
||||
)
|
||||
)
|
||||
ex.observers.append(FileStorageObserver("experiments"))
|
||||
|
||||
|
||||
@ex.config
|
||||
def my_config():
|
||||
learning_rate = 0.001
|
||||
epochs = 5
|
||||
|
||||
|
||||
@ex.capture
|
||||
def train_and_evaluate(_run, learning_rate, epochs):
|
||||
|
||||
X_train = _run.open_resource("data/X_train.csv")
|
||||
X_val = _run.open_resource("data/X_val.csv")
|
||||
y_train = _run.open_resource("data/y_train.csv")
|
||||
y_val = _run.open_resource("data/y_val.csv")
|
||||
|
||||
X_train = pd.read_csv(X_train)
|
||||
X_val = pd.read_csv(X_val)
|
||||
y_train = pd.read_csv(y_train)
|
||||
y_val = pd.read_csv(y_val)
|
||||
|
||||
X_train = X_train.to_numpy()
|
||||
X_val = X_val.to_numpy()
|
||||
y_train = y_train.to_numpy()
|
||||
y_val = y_val.to_numpy()
|
||||
|
||||
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
|
||||
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
|
||||
|
||||
model = Sequential(
|
||||
[
|
||||
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
|
||||
BatchNormalization(),
|
||||
Dropout(0.2),
|
||||
Conv1D(64, 2, activation="relu"),
|
||||
BatchNormalization(),
|
||||
Dropout(0.5),
|
||||
Flatten(),
|
||||
Dense(64, activation="relu"),
|
||||
Dropout(0.5),
|
||||
Dense(1, activation="sigmoid"),
|
||||
]
|
||||
)
|
||||
|
||||
model.compile(
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss="binary_crossentropy",
|
||||
metrics=["accuracy"],
|
||||
)
|
||||
|
||||
model.fit(
|
||||
X_train,
|
||||
y_train,
|
||||
validation_data=(X_val, y_val),
|
||||
epochs=epochs,
|
||||
verbose=1,
|
||||
)
|
||||
|
||||
model.save("sacred/model.keras")
|
||||
_run.add_artifact("sacred/model.keras")
|
||||
|
||||
X_test = _run.open_resource("data/X_test.csv")
|
||||
y_test = _run.open_resource("data/y_test.csv")
|
||||
|
||||
X_test = pd.read_csv(X_test)
|
||||
y_test = pd.read_csv(y_test)
|
||||
|
||||
y_pred = model.predict(X_test)
|
||||
y_pred = y_pred >= 0.5
|
||||
|
||||
cm = confusion_matrix(y_test, y_pred)
|
||||
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
|
||||
|
||||
_run.log_scalar("accuracy", accuracy)
|
||||
|
||||
|
||||
@ex.automain
|
||||
def main(learning_rate, epochs):
|
||||
train_and_evaluate()
|
@ -1,10 +0,0 @@
|
||||
name: Credit card fraud MLFlow - s464913
|
||||
|
||||
conda_env: conda.yaml
|
||||
|
||||
entry_points:
|
||||
main:
|
||||
parameters:
|
||||
learning_rate: { type: float, default: 0.001 }
|
||||
epochs: { type: int, default: 5 }
|
||||
command: 'python mlflow_train_evaluation.py {learning_rate} {epochs}'
|
@ -1,11 +0,0 @@
|
||||
name: Credit card fraud MLFlow - s464913
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- python=3.12
|
||||
- pip
|
||||
- pip:
|
||||
- mlflow
|
||||
- tensorflow
|
||||
- pandas
|
||||
- scikit-learn
|
@ -1,82 +0,0 @@
|
||||
import os
|
||||
|
||||
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
|
||||
|
||||
from keras.models import Sequential
|
||||
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
|
||||
from keras.optimizers import Adam
|
||||
import pandas as pd
|
||||
import sys
|
||||
import mlflow
|
||||
from sklearn.metrics import confusion_matrix
|
||||
|
||||
mlflow.set_tracking_uri("http://localhost:5000")
|
||||
|
||||
|
||||
def main():
|
||||
X_train = pd.read_csv("../data/X_train.csv")
|
||||
X_val = pd.read_csv("../data/X_val.csv")
|
||||
y_train = pd.read_csv("../data/y_train.csv")
|
||||
y_val = pd.read_csv("../data/y_val.csv")
|
||||
|
||||
X_train = X_train.to_numpy()
|
||||
X_val = X_val.to_numpy()
|
||||
y_train = y_train.to_numpy()
|
||||
y_val = y_val.to_numpy()
|
||||
|
||||
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
|
||||
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
|
||||
|
||||
learning_rate = float(sys.argv[1])
|
||||
epochs = int(sys.argv[2])
|
||||
|
||||
with mlflow.start_run() as run:
|
||||
print("MLflow run experiment_id: {0}".format(run.info.experiment_id))
|
||||
print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri))
|
||||
|
||||
model = Sequential(
|
||||
[
|
||||
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
|
||||
BatchNormalization(),
|
||||
Dropout(0.2),
|
||||
Conv1D(64, 2, activation="relu"),
|
||||
BatchNormalization(),
|
||||
Dropout(0.5),
|
||||
Flatten(),
|
||||
Dense(64, activation="relu"),
|
||||
Dropout(0.5),
|
||||
Dense(1, activation="sigmoid"),
|
||||
]
|
||||
)
|
||||
|
||||
model.compile(
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss="binary_crossentropy",
|
||||
metrics=["accuracy"],
|
||||
)
|
||||
|
||||
model.fit(
|
||||
X_train,
|
||||
y_train,
|
||||
validation_data=(X_val, y_val),
|
||||
epochs=epochs,
|
||||
verbose=1,
|
||||
)
|
||||
|
||||
mlflow.log_param("learning_rate", learning_rate)
|
||||
mlflow.log_param("epochs", epochs)
|
||||
|
||||
X_test = pd.read_csv("../data/X_test.csv")
|
||||
y_test = pd.read_csv("../data/y_test.csv")
|
||||
|
||||
y_pred = model.predict(X_test)
|
||||
y_pred = y_pred >= 0.5
|
||||
|
||||
cm = confusion_matrix(y_test, y_pred)
|
||||
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
|
||||
|
||||
mlflow.log_metric("accuracy", accuracy)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,15 +0,0 @@
|
||||
artifact_uri: mlflow-artifacts:/0/3c46f6c4b15743faa0119c4b9b804825/artifacts
|
||||
end_time: 1715508788768
|
||||
entry_point_name: ''
|
||||
experiment_id: '0'
|
||||
lifecycle_stage: active
|
||||
run_id: 3c46f6c4b15743faa0119c4b9b804825
|
||||
run_name: dapper-hog-137
|
||||
run_uuid: 3c46f6c4b15743faa0119c4b9b804825
|
||||
source_name: ''
|
||||
source_type: 4
|
||||
source_version: ''
|
||||
start_time: 1715508594003
|
||||
status: 3
|
||||
tags: []
|
||||
user_id: skype
|
@ -1 +0,0 @@
|
||||
1715508787882 0.8217821782178217 0
|
@ -1 +0,0 @@
|
||||
5
|
@ -1 +0,0 @@
|
||||
0.001
|
@ -1 +0,0 @@
|
||||
https://git.wmi.amu.edu.pl/s464913/ium_464913.git
|
@ -1 +0,0 @@
|
||||
local
|
@ -1 +0,0 @@
|
||||
main
|
@ -1 +0,0 @@
|
||||
conda
|
@ -1 +0,0 @@
|
||||
dapper-hog-137
|
@ -1 +0,0 @@
|
||||
a6be9a729562db8c47bc5fec88ad8f5216af0cf3
|
@ -1 +0,0 @@
|
||||
https://git.wmi.amu.edu.pl/s464913/ium_464913.git
|
@ -1 +0,0 @@
|
||||
file://C:\Users\skype\source\repos\Inżynieria Uczenia Maszynowego#\mlflow
|
@ -1 +0,0 @@
|
||||
PROJECT
|
@ -1 +0,0 @@
|
||||
skype
|
@ -1,15 +0,0 @@
|
||||
artifact_uri: mlflow-artifacts:/0/706dcf453a0842aaa48647e15521bb7b/artifacts
|
||||
end_time: 1715508573447
|
||||
entry_point_name: ''
|
||||
experiment_id: '0'
|
||||
lifecycle_stage: active
|
||||
run_id: 706dcf453a0842aaa48647e15521bb7b
|
||||
run_name: loud-whale-40
|
||||
run_uuid: 706dcf453a0842aaa48647e15521bb7b
|
||||
source_name: ''
|
||||
source_type: 4
|
||||
source_version: ''
|
||||
start_time: 1715508159092
|
||||
status: 3
|
||||
tags: []
|
||||
user_id: skype
|
@ -1 +0,0 @@
|
||||
1715508572612 0.7524752475247525 0
|
@ -1 +0,0 @@
|
||||
7
|
@ -1 +0,0 @@
|
||||
0.001
|
@ -1 +0,0 @@
|
||||
https://git.wmi.amu.edu.pl/s464913/ium_464913.git
|
@ -1 +0,0 @@
|
||||
local
|
@ -1 +0,0 @@
|
||||
main
|
@ -1 +0,0 @@
|
||||
conda
|
@ -1 +0,0 @@
|
||||
loud-whale-40
|
@ -1 +0,0 @@
|
||||
a6be9a729562db8c47bc5fec88ad8f5216af0cf3
|
@ -1 +0,0 @@
|
||||
https://git.wmi.amu.edu.pl/s464913/ium_464913.git
|
@ -1 +0,0 @@
|
||||
file://C:\Users\skype\source\repos\Inżynieria Uczenia Maszynowego#\mlflow
|
@ -1 +0,0 @@
|
||||
PROJECT
|
@ -1 +0,0 @@
|
||||
skype
|
@ -1,6 +0,0 @@
|
||||
artifact_location: mlflow-artifacts:/0
|
||||
creation_time: 1715508147231
|
||||
experiment_id: '0'
|
||||
last_update_time: 1715508147231
|
||||
lifecycle_stage: active
|
||||
name: Default
|
Binary file not shown.
@ -1,100 +0,0 @@
|
||||
import os
|
||||
|
||||
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
|
||||
|
||||
from keras.models import Sequential
|
||||
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
|
||||
from keras.optimizers import Adam
|
||||
import pandas as pd
|
||||
from sklearn.metrics import confusion_matrix
|
||||
from sacred import Experiment
|
||||
from sacred.observers import FileStorageObserver, MongoObserver
|
||||
|
||||
ex = Experiment("464913")
|
||||
|
||||
ex.observers.append(
|
||||
MongoObserver.create(
|
||||
url="mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017",
|
||||
db_name="sacred",
|
||||
)
|
||||
)
|
||||
ex.observers.append(FileStorageObserver("experiments"))
|
||||
|
||||
|
||||
@ex.config
|
||||
def my_config():
|
||||
learning_rate = 0.001
|
||||
epochs = 5
|
||||
|
||||
|
||||
@ex.capture
|
||||
def train_and_evaluate(_run, learning_rate, epochs):
|
||||
|
||||
X_train = _run.open_resource("data/X_train.csv")
|
||||
X_val = _run.open_resource("data/X_val.csv")
|
||||
y_train = _run.open_resource("data/y_train.csv")
|
||||
y_val = _run.open_resource("data/y_val.csv")
|
||||
|
||||
X_train = pd.read_csv(X_train)
|
||||
X_val = pd.read_csv(X_val)
|
||||
y_train = pd.read_csv(y_train)
|
||||
y_val = pd.read_csv(y_val)
|
||||
|
||||
X_train = X_train.to_numpy()
|
||||
X_val = X_val.to_numpy()
|
||||
y_train = y_train.to_numpy()
|
||||
y_val = y_val.to_numpy()
|
||||
|
||||
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
|
||||
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
|
||||
|
||||
model = Sequential(
|
||||
[
|
||||
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
|
||||
BatchNormalization(),
|
||||
Dropout(0.2),
|
||||
Conv1D(64, 2, activation="relu"),
|
||||
BatchNormalization(),
|
||||
Dropout(0.5),
|
||||
Flatten(),
|
||||
Dense(64, activation="relu"),
|
||||
Dropout(0.5),
|
||||
Dense(1, activation="sigmoid"),
|
||||
]
|
||||
)
|
||||
|
||||
model.compile(
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss="binary_crossentropy",
|
||||
metrics=["accuracy"],
|
||||
)
|
||||
|
||||
model.fit(
|
||||
X_train,
|
||||
y_train,
|
||||
validation_data=(X_val, y_val),
|
||||
epochs=epochs,
|
||||
verbose=1,
|
||||
)
|
||||
|
||||
model.save("sacred/model.keras")
|
||||
_run.add_artifact("sacred/model.keras")
|
||||
|
||||
X_test = _run.open_resource("data/X_test.csv")
|
||||
y_test = _run.open_resource("data/y_test.csv")
|
||||
|
||||
X_test = pd.read_csv(X_test)
|
||||
y_test = pd.read_csv(y_test)
|
||||
|
||||
y_pred = model.predict(X_test)
|
||||
y_pred = y_pred >= 0.5
|
||||
|
||||
cm = confusion_matrix(y_test, y_pred)
|
||||
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
|
||||
|
||||
_run.log_scalar("accuracy", accuracy)
|
||||
|
||||
|
||||
@ex.automain
|
||||
def main(learning_rate, epochs):
|
||||
train_and_evaluate()
|
@ -6,6 +6,7 @@ from keras.models import Sequential
|
||||
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
|
||||
from keras.optimizers import Adam
|
||||
import pandas as pd
|
||||
import sys
|
||||
|
||||
|
||||
def main():
|
||||
@ -22,6 +23,9 @@ def main():
|
||||
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
|
||||
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
|
||||
|
||||
learning_rate = float(sys.argv[1])
|
||||
epochs = int(sys.argv[2])
|
||||
|
||||
model = Sequential(
|
||||
[
|
||||
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
|
||||
@ -38,7 +42,7 @@ def main():
|
||||
)
|
||||
|
||||
model.compile(
|
||||
optimizer=Adam(learning_rate=1e-3),
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss="binary_crossentropy",
|
||||
metrics=["accuracy"],
|
||||
)
|
||||
@ -47,7 +51,7 @@ def main():
|
||||
X_train,
|
||||
y_train,
|
||||
validation_data=(X_val, y_val),
|
||||
epochs=5,
|
||||
epochs=epochs,
|
||||
verbose=1,
|
||||
)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user