Compare commits

..

7 Commits

Author SHA1 Message Date
29602e30a6 Update Dockerfile 2024-06-01 17:53:51 +02:00
3d80335ede IUM_06 2024-05-04 11:48:12 +02:00
f6c7f5981e IUM_06 2024-05-04 11:41:45 +02:00
c0b07aaac4 IUM_06 2024-05-04 11:31:39 +02:00
979785f5b7 IUM_06 2024-05-04 11:30:47 +02:00
795b91c695 IUM_06 2024-05-04 11:26:17 +02:00
91508718a0 IUM_06 2024-05-04 10:40:09 +02:00
62 changed files with 86 additions and 570332 deletions

3
.dvc/.gitignore vendored
View File

@ -1,3 +0,0 @@
/config.local
/tmp
/cache

View File

@ -1,4 +0,0 @@
[core]
remote = ium_ssh_remote
['remote "ium_ssh_remote"']
url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl

View File

@ -1,3 +0,0 @@
# Add patterns of files dvc should ignore, which could improve
# the performance. Learn more at
# https://dvc.org/doc/user-guide/dvcignore

6
.gitignore vendored
View File

@ -1,5 +1,5 @@
creditcardfraud.zip
creditcard.csv
data data
model/model.keras model/model.keras
stats_data stats_data
/creditcard.csv
/creditcardfraud.zip

View File

@ -1,5 +1,5 @@
FROM ubuntu:latest FROM ubuntu:latest
RUN apt update && apt install -y python3-pip git RUN apt update && apt install -y python3-pip
RUN pip install pandas numpy scikit-learn tensorflow sacred pymongo --break-system-packages RUN pip install pandas numpy scikit-learn tensorflow --break-system-packages

Binary file not shown.

70
Jenkinsfile vendored
View File

@ -1,73 +1,45 @@
pipeline { pipeline {
agent any agent {
dockerfile true
}
triggers {
upstream(upstreamProjects: 'z-s464913-create-dataset', threshold: hudson.model.Result.SUCCESS)
}
parameters { parameters {
string ( buildSelector(
defaultValue: 'vskyper', defaultSelector: lastSuccessful(),
description: 'Kaggle username', description: 'Which build to use for copying artifacts',
name: 'KAGGLE_USERNAME', name: 'BUILD_SELECTOR'
trim: false
)
password (
defaultValue: '',
description: 'Kaggle API key',
name: 'KAGGLE_KEY',
) )
string(name: 'LEARNING_RATE', defaultValue: '0.001', description: 'Learning rate')
string(name: 'EPOCHS', defaultValue: '5', description: 'Number of epochs')
} }
stages { stages {
stage('Clone Repository') { stage('Clone Repository') {
steps { steps {
git branch: 'main', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git' git branch: 'training', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git'
} }
} }
stage('Download dataset') { stage('Copy Artifacts') {
steps { steps {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) { copyArtifacts filter: 'data/*', projectName: 'z-s464913-create-dataset', selector: buildParameter('BUILD_SELECTOR')
sh 'pip install kaggle'
sh 'kaggle datasets download -d mlg-ulb/creditcardfraud'
sh 'unzip -o creditcardfraud.zip'
sh 'rm creditcardfraud.zip'
}
} }
} }
stage('Run create-dataset script') { stage('Run train_model script') {
agent {
dockerfile {
reuseNode true
}
}
steps { steps {
sh 'chmod +x create-dataset.py' sh 'chmod +x train_model.py'
sh 'python3 ./create-dataset.py' sh "python3 ./train_model.py ${params.LEARNING_RATE} ${params.EPOCHS}"
} }
} }
stage('Archive Artifacts from create-dataset') { stage('Archive Artifacts') {
steps { steps {
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true archiveArtifacts artifacts: 'model/*', onlyIfSuccessful: true
}
}
stage('Experiments') {
agent {
dockerfile {
reuseNode true
}
}
steps {
sh 'chmod +x sacred/sacred_train_evaluation.py'
sh 'python3 sacred/sacred_train_evaluation.py'
}
}
stage('Archive Artifacts from Experiments') {
steps {
archiveArtifacts artifacts: 'experiments/**/*.*', onlyIfSuccessful: true
} }
} }
} }

42
create-dataset.sh Normal file
View File

@ -0,0 +1,42 @@
#!/bin/bash
# Install the Kaggle API
pip install kaggle
# Download the dataset from Kaggle
kaggle datasets download -d mlg-ulb/creditcardfraud
# Unzip the dataset
unzip -o creditcardfraud.zip
# Remove the zip file
rm creditcardfraud.zip
# Create a header file
head -n 1 creditcard.csv > creditcard_header.csv
# Remove the header from the dataset
tail -n +2 creditcard.csv > creditcard_no_header.csv
# Remove the original dataset
rm creditcard.csv
# Shuffle the dataset
shuf creditcard_no_header.csv > creditcard_shuf_no_header.csv
# Remove the unshuffled dataset
rm creditcard_no_header.csv
# Add the header back to the shuffled dataset
cat creditcard_header.csv creditcard_shuf_no_header.csv > creditcard_shuf.csv
# Split the dataset into training and testing
tail -n +10001 creditcard_shuf_no_header.csv > creditcard_train_no_header.csv
head -n 10000 creditcard_shuf_no_header.csv > creditcard_test_no_header.csv
# Add the header back to the training and testing datasets
cat creditcard_header.csv creditcard_train_no_header.csv > creditcard_train.csv
cat creditcard_header.csv creditcard_test_no_header.csv > creditcard_test.csv
# Remove the intermediate files
rm creditcard_header.csv creditcard_shuf_no_header.csv creditcard_train_no_header.csv creditcard_test_no_header.csv
# Create a directory for the data
mkdir -p data
# Move the datasets to the data directory
mv creditcard_shuf.csv creditcard_train.csv creditcard_test.csv data/

View File

@ -1,5 +0,0 @@
outs:
- md5: e90efcb83d69faf99fcab8b0255024de
size: 150828752
hash: md5
path: creditcard.csv

View File

@ -1,5 +0,0 @@
outs:
- md5: bf8e9842731ab6f9b8ab51e1a6741f8b
size: 69155672
hash: md5
path: creditcardfraud.zip

12
dataset-stats.sh Normal file
View File

@ -0,0 +1,12 @@
#!/bin/bash
# Count the number of lines in the original dataset
wc -l < data/creditcard_shuf.csv > stats.txt
# Count the number of lines in the training and testing datasets
wc -l < data/creditcard_train.csv > stats_train.txt
wc -l < data/creditcard_test.csv > stats_test.txt
# Create a directory for the statistics
mkdir -p stats_data
# Move the statistics to the stats directory
mv stats.txt stats_train.txt stats_test.txt stats_data/

View File

@ -1,94 +0,0 @@
schema: '2.0'
stages:
prepare_data:
cmd: python ./create-dataset.py
deps:
- path: create-dataset.py
hash: md5
md5: 0903460139f5b57b9759f4de37b2d5e4
size: 1531
- path: creditcard.csv
hash: md5
md5: e90efcb83d69faf99fcab8b0255024de
size: 150828752
outs:
- path: data/X_test.csv
hash: md5
md5: 46ff52696af9a4c06f6b25639525dda6
size: 30947960
- path: data/X_train.csv
hash: md5
md5: 7505524c54858300bbd92094092a6c39
size: 92838653
- path: data/X_val.csv
hash: md5
md5: 4d078882cc1898640ddaf4ad9117f543
size: 30946540
- path: data/creditcard.csv
hash: md5
md5: 4b81435690147d1e624a8b06c5520629
size: 155302541
- path: data/y_test.csv
hash: md5
md5: a6bc4827feae19934c4021d1f10f5963
size: 170893
- path: data/y_train.csv
hash: md5
md5: 8112a5cf4faac882c421bcb7e3d42044
size: 512656
- path: data/y_val.csv
hash: md5
md5: 1155f648650986d8866eba603b86560c
size: 170893
train_model:
cmd: python ./train_model.py
deps:
- path: data/X_train.csv
hash: md5
md5: 7505524c54858300bbd92094092a6c39
size: 92838653
- path: data/X_val.csv
hash: md5
md5: 4d078882cc1898640ddaf4ad9117f543
size: 30946540
- path: data/y_train.csv
hash: md5
md5: 8112a5cf4faac882c421bcb7e3d42044
size: 512656
- path: data/y_val.csv
hash: md5
md5: 1155f648650986d8866eba603b86560c
size: 170893
- path: train_model.py
hash: md5
md5: 00b8bac043f4d7a56dec95f2f1bb1b49
size: 1540
outs:
- path: model/model.keras
hash: md5
md5: 1d1df55ad26a8c0689efa4a86a86c217
size: 1476738
evaluate_model:
cmd: python ./predict.py
deps:
- path: data/X_test.csv
hash: md5
md5: 46ff52696af9a4c06f6b25639525dda6
size: 30947960
- path: data/y_test.csv
hash: md5
md5: a6bc4827feae19934c4021d1f10f5963
size: 170893
- path: model/model.keras
hash: md5
md5: 1d1df55ad26a8c0689efa4a86a86c217
size: 1476738
- path: predict.py
hash: md5
md5: a61388aabf381779b38e2f32a4d0df7b
size: 660
outs:
- path: data/y_pred.csv
hash: md5
md5: be150c2fbf1914102b479edbe0a4cf43
size: 1481012

View File

@ -1,35 +0,0 @@
stages:
prepare_data:
cmd: python ./create-dataset.py
deps:
- create-dataset.py
- creditcard.csv
outs:
- data/creditcard.csv
- data/X_train.csv
- data/X_val.csv
- data/X_test.csv
- data/y_train.csv
- data/y_val.csv
- data/y_test.csv
train_model:
cmd: python ./train_model.py
deps:
- train_model.py
- data/X_train.csv
- data/X_val.csv
- data/y_train.csv
- data/y_val.csv
outs:
- model/model.keras
evaluate_model:
cmd: python ./predict.py
deps:
- predict.py
- model/model.keras
- data/X_test.csv
- data/y_test.csv
outs:
- data/y_pred.csv

Binary file not shown.

View File

@ -1,5 +0,0 @@
{
"epochs": 5,
"learning_rate": 0.001,
"seed": 7929899
}

File diff suppressed because one or more lines are too long

View File

@ -1,8 +0,0 @@
{
"metrics": [
{
"id": "665b3cd5c1ae3ab5cc15d3d9",
"name": "accuracy"
}
]
}

View File

@ -1,13 +0,0 @@
{
"accuracy": {
"steps": [
0
],
"timestamps": [
"2024-06-01T15:23:02.056704"
],
"values": [
0.8217821782178217
]
}
}

Binary file not shown.

View File

@ -1,102 +0,0 @@
{
"artifacts": [
"model.keras"
],
"command": "main",
"experiment": {
"base_dir": "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\sacred",
"dependencies": [
"keras==3.1.1",
"numpy==1.26.3",
"sacred==0.8.5",
"scikit-learn==1.4.1.post1"
],
"mainfile": "sacred_train_evaluation.py",
"name": "464913",
"repositories": [
{
"commit": "cf648b6c128aae353730cdad0c6972df3438c4cd",
"dirty": true,
"url": "https://git.wmi.amu.edu.pl/s464913/ium_464913.git"
}
],
"sources": [
[
"sacred_train_evaluation.py",
"_sources\\sacred_train_evaluation_69085ae4bcdbd49594dbaeed1ddb2e93.py"
]
]
},
"heartbeat": "2024-06-01T15:23:02.067455",
"host": {
"ENV": {},
"cpu": "AMD Ryzen 5 5500U with Radeon Graphics",
"hostname": "Dell",
"os": [
"Windows",
"Windows-11-10.0.22631-SP0"
],
"python_version": "3.12.3"
},
"meta": {
"command": "main",
"config_updates": {},
"named_configs": [],
"options": {
"--beat-interval": null,
"--capture": null,
"--comment": null,
"--debug": false,
"--enforce_clean": false,
"--file_storage": null,
"--force": false,
"--help": false,
"--id": null,
"--loglevel": null,
"--mongo_db": null,
"--name": null,
"--pdb": false,
"--print-config": false,
"--priority": null,
"--queue": false,
"--s3": null,
"--sql": null,
"--tiny_db": null,
"--unobserved": false,
"COMMAND": null,
"UPDATE": [],
"help": false,
"with": false
}
},
"resources": [
[
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_train.csv",
"experiments\\_resources\\X_train_7505524c54858300bbd92094092a6c39.csv"
],
[
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_val.csv",
"experiments\\_resources\\X_val_4d078882cc1898640ddaf4ad9117f543.csv"
],
[
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_train.csv",
"experiments\\_resources\\y_train_8112a5cf4faac882c421bcb7e3d42044.csv"
],
[
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_val.csv",
"experiments\\_resources\\y_val_1155f648650986d8866eba603b86560c.csv"
],
[
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_test.csv",
"experiments\\_resources\\X_test_46ff52696af9a4c06f6b25639525dda6.csv"
],
[
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_test.csv",
"experiments\\_resources\\y_test_a6bc4827feae19934c4021d1f10f5963.csv"
]
],
"result": null,
"start_time": "2024-06-01T15:20:05.925811",
"status": "COMPLETED",
"stop_time": "2024-06-01T15:23:02.065167"
}

View File

@ -1,100 +0,0 @@
import os
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
from keras.models import Sequential
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
from keras.optimizers import Adam
import pandas as pd
from sklearn.metrics import confusion_matrix
from sacred import Experiment
from sacred.observers import FileStorageObserver, MongoObserver
ex = Experiment("464913")
ex.observers.append(
MongoObserver.create(
url="mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017",
db_name="sacred",
)
)
ex.observers.append(FileStorageObserver("experiments"))
@ex.config
def my_config():
learning_rate = 0.001
epochs = 5
@ex.capture
def train_and_evaluate(_run, learning_rate, epochs):
X_train = _run.open_resource("data/X_train.csv")
X_val = _run.open_resource("data/X_val.csv")
y_train = _run.open_resource("data/y_train.csv")
y_val = _run.open_resource("data/y_val.csv")
X_train = pd.read_csv(X_train)
X_val = pd.read_csv(X_val)
y_train = pd.read_csv(y_train)
y_val = pd.read_csv(y_val)
X_train = X_train.to_numpy()
X_val = X_val.to_numpy()
y_train = y_train.to_numpy()
y_val = y_val.to_numpy()
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
model = Sequential(
[
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
BatchNormalization(),
Dropout(0.2),
Conv1D(64, 2, activation="relu"),
BatchNormalization(),
Dropout(0.5),
Flatten(),
Dense(64, activation="relu"),
Dropout(0.5),
Dense(1, activation="sigmoid"),
]
)
model.compile(
optimizer=Adam(learning_rate=learning_rate),
loss="binary_crossentropy",
metrics=["accuracy"],
)
model.fit(
X_train,
y_train,
validation_data=(X_val, y_val),
epochs=epochs,
verbose=1,
)
model.save("sacred/model.keras")
_run.add_artifact("sacred/model.keras")
X_test = _run.open_resource("data/X_test.csv")
y_test = _run.open_resource("data/y_test.csv")
X_test = pd.read_csv(X_test)
y_test = pd.read_csv(y_test)
y_pred = model.predict(X_test)
y_pred = y_pred >= 0.5
cm = confusion_matrix(y_test, y_pred)
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
_run.log_scalar("accuracy", accuracy)
@ex.automain
def main(learning_rate, epochs):
train_and_evaluate()

View File

@ -1,10 +0,0 @@
name: Credit card fraud MLFlow - s464913
conda_env: conda.yaml
entry_points:
main:
parameters:
learning_rate: { type: float, default: 0.001 }
epochs: { type: int, default: 5 }
command: 'python mlflow_train_evaluation.py {learning_rate} {epochs}'

View File

@ -1,11 +0,0 @@
name: Credit card fraud MLFlow - s464913
channels:
- defaults
dependencies:
- python=3.12
- pip
- pip:
- mlflow
- tensorflow
- pandas
- scikit-learn

View File

@ -1,82 +0,0 @@
import os
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
from keras.models import Sequential
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
from keras.optimizers import Adam
import pandas as pd
import sys
import mlflow
from sklearn.metrics import confusion_matrix
mlflow.set_tracking_uri("http://localhost:5000")
def main():
X_train = pd.read_csv("../data/X_train.csv")
X_val = pd.read_csv("../data/X_val.csv")
y_train = pd.read_csv("../data/y_train.csv")
y_val = pd.read_csv("../data/y_val.csv")
X_train = X_train.to_numpy()
X_val = X_val.to_numpy()
y_train = y_train.to_numpy()
y_val = y_val.to_numpy()
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
learning_rate = float(sys.argv[1])
epochs = int(sys.argv[2])
with mlflow.start_run() as run:
print("MLflow run experiment_id: {0}".format(run.info.experiment_id))
print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri))
model = Sequential(
[
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
BatchNormalization(),
Dropout(0.2),
Conv1D(64, 2, activation="relu"),
BatchNormalization(),
Dropout(0.5),
Flatten(),
Dense(64, activation="relu"),
Dropout(0.5),
Dense(1, activation="sigmoid"),
]
)
model.compile(
optimizer=Adam(learning_rate=learning_rate),
loss="binary_crossentropy",
metrics=["accuracy"],
)
model.fit(
X_train,
y_train,
validation_data=(X_val, y_val),
epochs=epochs,
verbose=1,
)
mlflow.log_param("learning_rate", learning_rate)
mlflow.log_param("epochs", epochs)
X_test = pd.read_csv("../data/X_test.csv")
y_test = pd.read_csv("../data/y_test.csv")
y_pred = model.predict(X_test)
y_pred = y_pred >= 0.5
cm = confusion_matrix(y_test, y_pred)
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
mlflow.log_metric("accuracy", accuracy)
if __name__ == "__main__":
main()

View File

@ -1,15 +0,0 @@
artifact_uri: mlflow-artifacts:/0/3c46f6c4b15743faa0119c4b9b804825/artifacts
end_time: 1715508788768
entry_point_name: ''
experiment_id: '0'
lifecycle_stage: active
run_id: 3c46f6c4b15743faa0119c4b9b804825
run_name: dapper-hog-137
run_uuid: 3c46f6c4b15743faa0119c4b9b804825
source_name: ''
source_type: 4
source_version: ''
start_time: 1715508594003
status: 3
tags: []
user_id: skype

View File

@ -1 +0,0 @@
1715508787882 0.8217821782178217 0

View File

@ -1 +0,0 @@
https://git.wmi.amu.edu.pl/s464913/ium_464913.git

View File

@ -1 +0,0 @@
a6be9a729562db8c47bc5fec88ad8f5216af0cf3

View File

@ -1 +0,0 @@
https://git.wmi.amu.edu.pl/s464913/ium_464913.git

View File

@ -1 +0,0 @@
file://C:\Users\skype\source\repos\Inżynieria Uczenia Maszynowego#\mlflow

View File

@ -1,15 +0,0 @@
artifact_uri: mlflow-artifacts:/0/706dcf453a0842aaa48647e15521bb7b/artifacts
end_time: 1715508573447
entry_point_name: ''
experiment_id: '0'
lifecycle_stage: active
run_id: 706dcf453a0842aaa48647e15521bb7b
run_name: loud-whale-40
run_uuid: 706dcf453a0842aaa48647e15521bb7b
source_name: ''
source_type: 4
source_version: ''
start_time: 1715508159092
status: 3
tags: []
user_id: skype

View File

@ -1 +0,0 @@
1715508572612 0.7524752475247525 0

View File

@ -1 +0,0 @@
https://git.wmi.amu.edu.pl/s464913/ium_464913.git

View File

@ -1 +0,0 @@
a6be9a729562db8c47bc5fec88ad8f5216af0cf3

View File

@ -1 +0,0 @@
https://git.wmi.amu.edu.pl/s464913/ium_464913.git

View File

@ -1 +0,0 @@
file://C:\Users\skype\source\repos\Inżynieria Uczenia Maszynowego#\mlflow

View File

@ -1,6 +0,0 @@
artifact_location: mlflow-artifacts:/0
creation_time: 1715508147231
experiment_id: '0'
last_update_time: 1715508147231
lifecycle_stage: active
name: Default

Binary file not shown.

View File

@ -1,100 +0,0 @@
import os
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
from keras.models import Sequential
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
from keras.optimizers import Adam
import pandas as pd
from sklearn.metrics import confusion_matrix
from sacred import Experiment
from sacred.observers import FileStorageObserver, MongoObserver
ex = Experiment("464913")
ex.observers.append(
MongoObserver.create(
url="mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017",
db_name="sacred",
)
)
ex.observers.append(FileStorageObserver("experiments"))
@ex.config
def my_config():
learning_rate = 0.001
epochs = 5
@ex.capture
def train_and_evaluate(_run, learning_rate, epochs):
X_train = _run.open_resource("data/X_train.csv")
X_val = _run.open_resource("data/X_val.csv")
y_train = _run.open_resource("data/y_train.csv")
y_val = _run.open_resource("data/y_val.csv")
X_train = pd.read_csv(X_train)
X_val = pd.read_csv(X_val)
y_train = pd.read_csv(y_train)
y_val = pd.read_csv(y_val)
X_train = X_train.to_numpy()
X_val = X_val.to_numpy()
y_train = y_train.to_numpy()
y_val = y_val.to_numpy()
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
model = Sequential(
[
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
BatchNormalization(),
Dropout(0.2),
Conv1D(64, 2, activation="relu"),
BatchNormalization(),
Dropout(0.5),
Flatten(),
Dense(64, activation="relu"),
Dropout(0.5),
Dense(1, activation="sigmoid"),
]
)
model.compile(
optimizer=Adam(learning_rate=learning_rate),
loss="binary_crossentropy",
metrics=["accuracy"],
)
model.fit(
X_train,
y_train,
validation_data=(X_val, y_val),
epochs=epochs,
verbose=1,
)
model.save("sacred/model.keras")
_run.add_artifact("sacred/model.keras")
X_test = _run.open_resource("data/X_test.csv")
y_test = _run.open_resource("data/y_test.csv")
X_test = pd.read_csv(X_test)
y_test = pd.read_csv(y_test)
y_pred = model.predict(X_test)
y_pred = y_pred >= 0.5
cm = confusion_matrix(y_test, y_pred)
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
_run.log_scalar("accuracy", accuracy)
@ex.automain
def main(learning_rate, epochs):
train_and_evaluate()

View File

@ -6,6 +6,7 @@ from keras.models import Sequential
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
from keras.optimizers import Adam from keras.optimizers import Adam
import pandas as pd import pandas as pd
import sys
def main(): def main():
@ -22,6 +23,9 @@ def main():
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1) X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1) X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
learning_rate = float(sys.argv[1])
epochs = int(sys.argv[2])
model = Sequential( model = Sequential(
[ [
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape), Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
@ -38,7 +42,7 @@ def main():
) )
model.compile( model.compile(
optimizer=Adam(learning_rate=1e-3), optimizer=Adam(learning_rate=learning_rate),
loss="binary_crossentropy", loss="binary_crossentropy",
metrics=["accuracy"], metrics=["accuracy"],
) )
@ -47,7 +51,7 @@ def main():
X_train, X_train,
y_train, y_train,
validation_data=(X_val, y_val), validation_data=(X_val, y_val),
epochs=5, epochs=epochs,
verbose=1, verbose=1,
) )