Compare commits
10 Commits
evaluation
...
main
Author | SHA1 | Date | |
---|---|---|---|
e2ce3a6b9f | |||
4c143f2574 | |||
feef756ed0 | |||
0ff7d1c06f | |||
cc56865cc1 | |||
cf648b6c12 | |||
cb74efc384 | |||
dc7777ef23 | |||
e9194b950d | |||
f7b13459a3 |
3
.dvc/.gitignore
vendored
Normal file
3
.dvc/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
/config.local
|
||||
/tmp
|
||||
/cache
|
4
.dvc/config
Normal file
4
.dvc/config
Normal file
@ -0,0 +1,4 @@
|
||||
[core]
|
||||
remote = ium_ssh_remote
|
||||
['remote "ium_ssh_remote"']
|
||||
url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
|
3
.dvcignore
Normal file
3
.dvcignore
Normal file
@ -0,0 +1,3 @@
|
||||
# Add patterns of files dvc should ignore, which could improve
|
||||
# the performance. Learn more at
|
||||
# https://dvc.org/doc/user-guide/dvcignore
|
6
.gitignore
vendored
6
.gitignore
vendored
@ -1,5 +1,5 @@
|
||||
creditcardfraud.zip
|
||||
creditcard.csv
|
||||
data
|
||||
model/model.keras
|
||||
stats_data
|
||||
stats_data
|
||||
/creditcard.csv
|
||||
/creditcardfraud.zip
|
||||
|
@ -1,5 +1,5 @@
|
||||
FROM ubuntu:latest
|
||||
|
||||
RUN apt update && apt install -y python3-pip
|
||||
RUN apt update && apt install -y python3-pip git
|
||||
|
||||
RUN pip install pandas numpy scikit-learn tensorflow
|
||||
RUN pip install pandas numpy scikit-learn tensorflow sacred pymongo --break-system-packages
|
BIN
IUM_12.pptx
Normal file
BIN
IUM_12.pptx
Normal file
Binary file not shown.
29
Jenkinsfile
vendored
29
Jenkinsfile
vendored
@ -34,11 +34,11 @@ pipeline {
|
||||
}
|
||||
|
||||
stage('Run create-dataset script') {
|
||||
agent {
|
||||
dockerfile {
|
||||
reuseNode true
|
||||
agent {
|
||||
dockerfile {
|
||||
reuseNode true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
steps {
|
||||
sh 'chmod +x create-dataset.py'
|
||||
@ -46,10 +46,29 @@ pipeline {
|
||||
}
|
||||
}
|
||||
|
||||
stage('Archive Artifacts') {
|
||||
stage('Archive Artifacts from create-dataset') {
|
||||
steps {
|
||||
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
|
||||
}
|
||||
}
|
||||
|
||||
stage('Experiments') {
|
||||
agent {
|
||||
dockerfile {
|
||||
reuseNode true
|
||||
}
|
||||
}
|
||||
|
||||
steps {
|
||||
sh 'chmod +x sacred/sacred_train_evaluation.py'
|
||||
sh 'python3 sacred/sacred_train_evaluation.py'
|
||||
}
|
||||
}
|
||||
|
||||
stage('Archive Artifacts from Experiments') {
|
||||
steps {
|
||||
archiveArtifacts artifacts: 'experiments/**/*.*', onlyIfSuccessful: true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,42 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Install the Kaggle API
|
||||
pip install kaggle
|
||||
# Download the dataset from Kaggle
|
||||
kaggle datasets download -d mlg-ulb/creditcardfraud
|
||||
|
||||
# Unzip the dataset
|
||||
unzip -o creditcardfraud.zip
|
||||
# Remove the zip file
|
||||
rm creditcardfraud.zip
|
||||
|
||||
# Create a header file
|
||||
head -n 1 creditcard.csv > creditcard_header.csv
|
||||
# Remove the header from the dataset
|
||||
tail -n +2 creditcard.csv > creditcard_no_header.csv
|
||||
# Remove the original dataset
|
||||
rm creditcard.csv
|
||||
|
||||
# Shuffle the dataset
|
||||
shuf creditcard_no_header.csv > creditcard_shuf_no_header.csv
|
||||
# Remove the unshuffled dataset
|
||||
rm creditcard_no_header.csv
|
||||
|
||||
# Add the header back to the shuffled dataset
|
||||
cat creditcard_header.csv creditcard_shuf_no_header.csv > creditcard_shuf.csv
|
||||
|
||||
# Split the dataset into training and testing
|
||||
tail -n +10001 creditcard_shuf_no_header.csv > creditcard_train_no_header.csv
|
||||
head -n 10000 creditcard_shuf_no_header.csv > creditcard_test_no_header.csv
|
||||
|
||||
# Add the header back to the training and testing datasets
|
||||
cat creditcard_header.csv creditcard_train_no_header.csv > creditcard_train.csv
|
||||
cat creditcard_header.csv creditcard_test_no_header.csv > creditcard_test.csv
|
||||
|
||||
# Remove the intermediate files
|
||||
rm creditcard_header.csv creditcard_shuf_no_header.csv creditcard_train_no_header.csv creditcard_test_no_header.csv
|
||||
|
||||
# Create a directory for the data
|
||||
mkdir -p data
|
||||
# Move the datasets to the data directory
|
||||
mv creditcard_shuf.csv creditcard_train.csv creditcard_test.csv data/
|
5
creditcard.csv.dvc
Normal file
5
creditcard.csv.dvc
Normal file
@ -0,0 +1,5 @@
|
||||
outs:
|
||||
- md5: e90efcb83d69faf99fcab8b0255024de
|
||||
size: 150828752
|
||||
hash: md5
|
||||
path: creditcard.csv
|
5
creditcardfraud.zip.dvc
Normal file
5
creditcardfraud.zip.dvc
Normal file
@ -0,0 +1,5 @@
|
||||
outs:
|
||||
- md5: bf8e9842731ab6f9b8ab51e1a6741f8b
|
||||
size: 69155672
|
||||
hash: md5
|
||||
path: creditcardfraud.zip
|
@ -1,12 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Count the number of lines in the original dataset
|
||||
wc -l < data/creditcard_shuf.csv > stats.txt
|
||||
# Count the number of lines in the training and testing datasets
|
||||
wc -l < data/creditcard_train.csv > stats_train.txt
|
||||
wc -l < data/creditcard_test.csv > stats_test.txt
|
||||
|
||||
# Create a directory for the statistics
|
||||
mkdir -p stats_data
|
||||
# Move the statistics to the stats directory
|
||||
mv stats.txt stats_train.txt stats_test.txt stats_data/
|
94
dvc.lock
Normal file
94
dvc.lock
Normal file
@ -0,0 +1,94 @@
|
||||
schema: '2.0'
|
||||
stages:
|
||||
prepare_data:
|
||||
cmd: python ./create-dataset.py
|
||||
deps:
|
||||
- path: create-dataset.py
|
||||
hash: md5
|
||||
md5: 0903460139f5b57b9759f4de37b2d5e4
|
||||
size: 1531
|
||||
- path: creditcard.csv
|
||||
hash: md5
|
||||
md5: e90efcb83d69faf99fcab8b0255024de
|
||||
size: 150828752
|
||||
outs:
|
||||
- path: data/X_test.csv
|
||||
hash: md5
|
||||
md5: 46ff52696af9a4c06f6b25639525dda6
|
||||
size: 30947960
|
||||
- path: data/X_train.csv
|
||||
hash: md5
|
||||
md5: 7505524c54858300bbd92094092a6c39
|
||||
size: 92838653
|
||||
- path: data/X_val.csv
|
||||
hash: md5
|
||||
md5: 4d078882cc1898640ddaf4ad9117f543
|
||||
size: 30946540
|
||||
- path: data/creditcard.csv
|
||||
hash: md5
|
||||
md5: 4b81435690147d1e624a8b06c5520629
|
||||
size: 155302541
|
||||
- path: data/y_test.csv
|
||||
hash: md5
|
||||
md5: a6bc4827feae19934c4021d1f10f5963
|
||||
size: 170893
|
||||
- path: data/y_train.csv
|
||||
hash: md5
|
||||
md5: 8112a5cf4faac882c421bcb7e3d42044
|
||||
size: 512656
|
||||
- path: data/y_val.csv
|
||||
hash: md5
|
||||
md5: 1155f648650986d8866eba603b86560c
|
||||
size: 170893
|
||||
train_model:
|
||||
cmd: python ./train_model.py
|
||||
deps:
|
||||
- path: data/X_train.csv
|
||||
hash: md5
|
||||
md5: 7505524c54858300bbd92094092a6c39
|
||||
size: 92838653
|
||||
- path: data/X_val.csv
|
||||
hash: md5
|
||||
md5: 4d078882cc1898640ddaf4ad9117f543
|
||||
size: 30946540
|
||||
- path: data/y_train.csv
|
||||
hash: md5
|
||||
md5: 8112a5cf4faac882c421bcb7e3d42044
|
||||
size: 512656
|
||||
- path: data/y_val.csv
|
||||
hash: md5
|
||||
md5: 1155f648650986d8866eba603b86560c
|
||||
size: 170893
|
||||
- path: train_model.py
|
||||
hash: md5
|
||||
md5: 00b8bac043f4d7a56dec95f2f1bb1b49
|
||||
size: 1540
|
||||
outs:
|
||||
- path: model/model.keras
|
||||
hash: md5
|
||||
md5: 1d1df55ad26a8c0689efa4a86a86c217
|
||||
size: 1476738
|
||||
evaluate_model:
|
||||
cmd: python ./predict.py
|
||||
deps:
|
||||
- path: data/X_test.csv
|
||||
hash: md5
|
||||
md5: 46ff52696af9a4c06f6b25639525dda6
|
||||
size: 30947960
|
||||
- path: data/y_test.csv
|
||||
hash: md5
|
||||
md5: a6bc4827feae19934c4021d1f10f5963
|
||||
size: 170893
|
||||
- path: model/model.keras
|
||||
hash: md5
|
||||
md5: 1d1df55ad26a8c0689efa4a86a86c217
|
||||
size: 1476738
|
||||
- path: predict.py
|
||||
hash: md5
|
||||
md5: a61388aabf381779b38e2f32a4d0df7b
|
||||
size: 660
|
||||
outs:
|
||||
- path: data/y_pred.csv
|
||||
hash: md5
|
||||
md5: be150c2fbf1914102b479edbe0a4cf43
|
||||
size: 1481012
|
35
dvc.yaml
Normal file
35
dvc.yaml
Normal file
@ -0,0 +1,35 @@
|
||||
stages:
|
||||
prepare_data:
|
||||
cmd: python ./create-dataset.py
|
||||
deps:
|
||||
- create-dataset.py
|
||||
- creditcard.csv
|
||||
outs:
|
||||
- data/creditcard.csv
|
||||
- data/X_train.csv
|
||||
- data/X_val.csv
|
||||
- data/X_test.csv
|
||||
- data/y_train.csv
|
||||
- data/y_val.csv
|
||||
- data/y_test.csv
|
||||
|
||||
train_model:
|
||||
cmd: python ./train_model.py
|
||||
deps:
|
||||
- train_model.py
|
||||
- data/X_train.csv
|
||||
- data/X_val.csv
|
||||
- data/y_train.csv
|
||||
- data/y_val.csv
|
||||
outs:
|
||||
- model/model.keras
|
||||
|
||||
evaluate_model:
|
||||
cmd: python ./predict.py
|
||||
deps:
|
||||
- predict.py
|
||||
- model/model.keras
|
||||
- data/X_test.csv
|
||||
- data/y_test.csv
|
||||
outs:
|
||||
- data/y_pred.csv
|
BIN
environment.yml
Normal file
BIN
environment.yml
Normal file
Binary file not shown.
5
experiments/708/config.json
Normal file
5
experiments/708/config.json
Normal file
@ -0,0 +1,5 @@
|
||||
{
|
||||
"epochs": 5,
|
||||
"learning_rate": 0.001,
|
||||
"seed": 7929899
|
||||
}
|
14
experiments/708/cout.txt
Normal file
14
experiments/708/cout.txt
Normal file
File diff suppressed because one or more lines are too long
8
experiments/708/info.json
Normal file
8
experiments/708/info.json
Normal file
@ -0,0 +1,8 @@
|
||||
{
|
||||
"metrics": [
|
||||
{
|
||||
"id": "665b3cd5c1ae3ab5cc15d3d9",
|
||||
"name": "accuracy"
|
||||
}
|
||||
]
|
||||
}
|
13
experiments/708/metrics.json
Normal file
13
experiments/708/metrics.json
Normal file
@ -0,0 +1,13 @@
|
||||
{
|
||||
"accuracy": {
|
||||
"steps": [
|
||||
0
|
||||
],
|
||||
"timestamps": [
|
||||
"2024-06-01T15:23:02.056704"
|
||||
],
|
||||
"values": [
|
||||
0.8217821782178217
|
||||
]
|
||||
}
|
||||
}
|
BIN
experiments/708/model.keras
Normal file
BIN
experiments/708/model.keras
Normal file
Binary file not shown.
102
experiments/708/run.json
Normal file
102
experiments/708/run.json
Normal file
@ -0,0 +1,102 @@
|
||||
{
|
||||
"artifacts": [
|
||||
"model.keras"
|
||||
],
|
||||
"command": "main",
|
||||
"experiment": {
|
||||
"base_dir": "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\sacred",
|
||||
"dependencies": [
|
||||
"keras==3.1.1",
|
||||
"numpy==1.26.3",
|
||||
"sacred==0.8.5",
|
||||
"scikit-learn==1.4.1.post1"
|
||||
],
|
||||
"mainfile": "sacred_train_evaluation.py",
|
||||
"name": "464913",
|
||||
"repositories": [
|
||||
{
|
||||
"commit": "cf648b6c128aae353730cdad0c6972df3438c4cd",
|
||||
"dirty": true,
|
||||
"url": "https://git.wmi.amu.edu.pl/s464913/ium_464913.git"
|
||||
}
|
||||
],
|
||||
"sources": [
|
||||
[
|
||||
"sacred_train_evaluation.py",
|
||||
"_sources\\sacred_train_evaluation_69085ae4bcdbd49594dbaeed1ddb2e93.py"
|
||||
]
|
||||
]
|
||||
},
|
||||
"heartbeat": "2024-06-01T15:23:02.067455",
|
||||
"host": {
|
||||
"ENV": {},
|
||||
"cpu": "AMD Ryzen 5 5500U with Radeon Graphics",
|
||||
"hostname": "Dell",
|
||||
"os": [
|
||||
"Windows",
|
||||
"Windows-11-10.0.22631-SP0"
|
||||
],
|
||||
"python_version": "3.12.3"
|
||||
},
|
||||
"meta": {
|
||||
"command": "main",
|
||||
"config_updates": {},
|
||||
"named_configs": [],
|
||||
"options": {
|
||||
"--beat-interval": null,
|
||||
"--capture": null,
|
||||
"--comment": null,
|
||||
"--debug": false,
|
||||
"--enforce_clean": false,
|
||||
"--file_storage": null,
|
||||
"--force": false,
|
||||
"--help": false,
|
||||
"--id": null,
|
||||
"--loglevel": null,
|
||||
"--mongo_db": null,
|
||||
"--name": null,
|
||||
"--pdb": false,
|
||||
"--print-config": false,
|
||||
"--priority": null,
|
||||
"--queue": false,
|
||||
"--s3": null,
|
||||
"--sql": null,
|
||||
"--tiny_db": null,
|
||||
"--unobserved": false,
|
||||
"COMMAND": null,
|
||||
"UPDATE": [],
|
||||
"help": false,
|
||||
"with": false
|
||||
}
|
||||
},
|
||||
"resources": [
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_train.csv",
|
||||
"experiments\\_resources\\X_train_7505524c54858300bbd92094092a6c39.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_val.csv",
|
||||
"experiments\\_resources\\X_val_4d078882cc1898640ddaf4ad9117f543.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_train.csv",
|
||||
"experiments\\_resources\\y_train_8112a5cf4faac882c421bcb7e3d42044.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_val.csv",
|
||||
"experiments\\_resources\\y_val_1155f648650986d8866eba603b86560c.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_test.csv",
|
||||
"experiments\\_resources\\X_test_46ff52696af9a4c06f6b25639525dda6.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_test.csv",
|
||||
"experiments\\_resources\\y_test_a6bc4827feae19934c4021d1f10f5963.csv"
|
||||
]
|
||||
],
|
||||
"result": null,
|
||||
"start_time": "2024-06-01T15:20:05.925811",
|
||||
"status": "COMPLETED",
|
||||
"stop_time": "2024-06-01T15:23:02.065167"
|
||||
}
|
56963
experiments/_resources/X_test_46ff52696af9a4c06f6b25639525dda6.csv
Normal file
56963
experiments/_resources/X_test_46ff52696af9a4c06f6b25639525dda6.csv
Normal file
File diff suppressed because it is too large
Load Diff
170884
experiments/_resources/X_train_7505524c54858300bbd92094092a6c39.csv
Normal file
170884
experiments/_resources/X_train_7505524c54858300bbd92094092a6c39.csv
Normal file
File diff suppressed because it is too large
Load Diff
56963
experiments/_resources/X_val_4d078882cc1898640ddaf4ad9117f543.csv
Normal file
56963
experiments/_resources/X_val_4d078882cc1898640ddaf4ad9117f543.csv
Normal file
File diff suppressed because it is too large
Load Diff
56963
experiments/_resources/y_test_a6bc4827feae19934c4021d1f10f5963.csv
Normal file
56963
experiments/_resources/y_test_a6bc4827feae19934c4021d1f10f5963.csv
Normal file
File diff suppressed because it is too large
Load Diff
170884
experiments/_resources/y_train_8112a5cf4faac882c421bcb7e3d42044.csv
Normal file
170884
experiments/_resources/y_train_8112a5cf4faac882c421bcb7e3d42044.csv
Normal file
File diff suppressed because it is too large
Load Diff
56963
experiments/_resources/y_val_1155f648650986d8866eba603b86560c.csv
Normal file
56963
experiments/_resources/y_val_1155f648650986d8866eba603b86560c.csv
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,100 @@
|
||||
import os
|
||||
|
||||
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
|
||||
|
||||
from keras.models import Sequential
|
||||
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
|
||||
from keras.optimizers import Adam
|
||||
import pandas as pd
|
||||
from sklearn.metrics import confusion_matrix
|
||||
from sacred import Experiment
|
||||
from sacred.observers import FileStorageObserver, MongoObserver
|
||||
|
||||
ex = Experiment("464913")
|
||||
|
||||
ex.observers.append(
|
||||
MongoObserver.create(
|
||||
url="mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017",
|
||||
db_name="sacred",
|
||||
)
|
||||
)
|
||||
ex.observers.append(FileStorageObserver("experiments"))
|
||||
|
||||
|
||||
@ex.config
|
||||
def my_config():
|
||||
learning_rate = 0.001
|
||||
epochs = 5
|
||||
|
||||
|
||||
@ex.capture
|
||||
def train_and_evaluate(_run, learning_rate, epochs):
|
||||
|
||||
X_train = _run.open_resource("data/X_train.csv")
|
||||
X_val = _run.open_resource("data/X_val.csv")
|
||||
y_train = _run.open_resource("data/y_train.csv")
|
||||
y_val = _run.open_resource("data/y_val.csv")
|
||||
|
||||
X_train = pd.read_csv(X_train)
|
||||
X_val = pd.read_csv(X_val)
|
||||
y_train = pd.read_csv(y_train)
|
||||
y_val = pd.read_csv(y_val)
|
||||
|
||||
X_train = X_train.to_numpy()
|
||||
X_val = X_val.to_numpy()
|
||||
y_train = y_train.to_numpy()
|
||||
y_val = y_val.to_numpy()
|
||||
|
||||
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
|
||||
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
|
||||
|
||||
model = Sequential(
|
||||
[
|
||||
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
|
||||
BatchNormalization(),
|
||||
Dropout(0.2),
|
||||
Conv1D(64, 2, activation="relu"),
|
||||
BatchNormalization(),
|
||||
Dropout(0.5),
|
||||
Flatten(),
|
||||
Dense(64, activation="relu"),
|
||||
Dropout(0.5),
|
||||
Dense(1, activation="sigmoid"),
|
||||
]
|
||||
)
|
||||
|
||||
model.compile(
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss="binary_crossentropy",
|
||||
metrics=["accuracy"],
|
||||
)
|
||||
|
||||
model.fit(
|
||||
X_train,
|
||||
y_train,
|
||||
validation_data=(X_val, y_val),
|
||||
epochs=epochs,
|
||||
verbose=1,
|
||||
)
|
||||
|
||||
model.save("sacred/model.keras")
|
||||
_run.add_artifact("sacred/model.keras")
|
||||
|
||||
X_test = _run.open_resource("data/X_test.csv")
|
||||
y_test = _run.open_resource("data/y_test.csv")
|
||||
|
||||
X_test = pd.read_csv(X_test)
|
||||
y_test = pd.read_csv(y_test)
|
||||
|
||||
y_pred = model.predict(X_test)
|
||||
y_pred = y_pred >= 0.5
|
||||
|
||||
cm = confusion_matrix(y_test, y_pred)
|
||||
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
|
||||
|
||||
_run.log_scalar("accuracy", accuracy)
|
||||
|
||||
|
||||
@ex.automain
|
||||
def main(learning_rate, epochs):
|
||||
train_and_evaluate()
|
10
mlflow/MLproject
Normal file
10
mlflow/MLproject
Normal file
@ -0,0 +1,10 @@
|
||||
name: Credit card fraud MLFlow - s464913
|
||||
|
||||
conda_env: conda.yaml
|
||||
|
||||
entry_points:
|
||||
main:
|
||||
parameters:
|
||||
learning_rate: { type: float, default: 0.001 }
|
||||
epochs: { type: int, default: 5 }
|
||||
command: 'python mlflow_train_evaluation.py {learning_rate} {epochs}'
|
11
mlflow/conda.yaml
Normal file
11
mlflow/conda.yaml
Normal file
@ -0,0 +1,11 @@
|
||||
name: Credit card fraud MLFlow - s464913
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- python=3.12
|
||||
- pip
|
||||
- pip:
|
||||
- mlflow
|
||||
- tensorflow
|
||||
- pandas
|
||||
- scikit-learn
|
82
mlflow/mlflow_train_evaluation.py
Normal file
82
mlflow/mlflow_train_evaluation.py
Normal file
@ -0,0 +1,82 @@
|
||||
import os
|
||||
|
||||
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
|
||||
|
||||
from keras.models import Sequential
|
||||
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
|
||||
from keras.optimizers import Adam
|
||||
import pandas as pd
|
||||
import sys
|
||||
import mlflow
|
||||
from sklearn.metrics import confusion_matrix
|
||||
|
||||
mlflow.set_tracking_uri("http://localhost:5000")
|
||||
|
||||
|
||||
def main():
|
||||
X_train = pd.read_csv("../data/X_train.csv")
|
||||
X_val = pd.read_csv("../data/X_val.csv")
|
||||
y_train = pd.read_csv("../data/y_train.csv")
|
||||
y_val = pd.read_csv("../data/y_val.csv")
|
||||
|
||||
X_train = X_train.to_numpy()
|
||||
X_val = X_val.to_numpy()
|
||||
y_train = y_train.to_numpy()
|
||||
y_val = y_val.to_numpy()
|
||||
|
||||
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
|
||||
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
|
||||
|
||||
learning_rate = float(sys.argv[1])
|
||||
epochs = int(sys.argv[2])
|
||||
|
||||
with mlflow.start_run() as run:
|
||||
print("MLflow run experiment_id: {0}".format(run.info.experiment_id))
|
||||
print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri))
|
||||
|
||||
model = Sequential(
|
||||
[
|
||||
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
|
||||
BatchNormalization(),
|
||||
Dropout(0.2),
|
||||
Conv1D(64, 2, activation="relu"),
|
||||
BatchNormalization(),
|
||||
Dropout(0.5),
|
||||
Flatten(),
|
||||
Dense(64, activation="relu"),
|
||||
Dropout(0.5),
|
||||
Dense(1, activation="sigmoid"),
|
||||
]
|
||||
)
|
||||
|
||||
model.compile(
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss="binary_crossentropy",
|
||||
metrics=["accuracy"],
|
||||
)
|
||||
|
||||
model.fit(
|
||||
X_train,
|
||||
y_train,
|
||||
validation_data=(X_val, y_val),
|
||||
epochs=epochs,
|
||||
verbose=1,
|
||||
)
|
||||
|
||||
mlflow.log_param("learning_rate", learning_rate)
|
||||
mlflow.log_param("epochs", epochs)
|
||||
|
||||
X_test = pd.read_csv("../data/X_test.csv")
|
||||
y_test = pd.read_csv("../data/y_test.csv")
|
||||
|
||||
y_pred = model.predict(X_test)
|
||||
y_pred = y_pred >= 0.5
|
||||
|
||||
cm = confusion_matrix(y_test, y_pred)
|
||||
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
|
||||
|
||||
mlflow.log_metric("accuracy", accuracy)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
15
mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/meta.yaml
Normal file
15
mlflow/mlruns/0/3c46f6c4b15743faa0119c4b9b804825/meta.yaml
Normal file
@ -0,0 +1,15 @@
|
||||
artifact_uri: mlflow-artifacts:/0/3c46f6c4b15743faa0119c4b9b804825/artifacts
|
||||
end_time: 1715508788768
|
||||
entry_point_name: ''
|
||||
experiment_id: '0'
|
||||
lifecycle_stage: active
|
||||
run_id: 3c46f6c4b15743faa0119c4b9b804825
|
||||
run_name: dapper-hog-137
|
||||
run_uuid: 3c46f6c4b15743faa0119c4b9b804825
|
||||
source_name: ''
|
||||
source_type: 4
|
||||
source_version: ''
|
||||
start_time: 1715508594003
|
||||
status: 3
|
||||
tags: []
|
||||
user_id: skype
|
@ -0,0 +1 @@
|
||||
1715508787882 0.8217821782178217 0
|
@ -0,0 +1 @@
|
||||
5
|
@ -0,0 +1 @@
|
||||
0.001
|
@ -0,0 +1 @@
|
||||
https://git.wmi.amu.edu.pl/s464913/ium_464913.git
|
@ -0,0 +1 @@
|
||||
local
|
@ -0,0 +1 @@
|
||||
main
|
@ -0,0 +1 @@
|
||||
conda
|
@ -0,0 +1 @@
|
||||
dapper-hog-137
|
@ -0,0 +1 @@
|
||||
a6be9a729562db8c47bc5fec88ad8f5216af0cf3
|
@ -0,0 +1 @@
|
||||
https://git.wmi.amu.edu.pl/s464913/ium_464913.git
|
@ -0,0 +1 @@
|
||||
file://C:\Users\skype\source\repos\Inżynieria Uczenia Maszynowego#\mlflow
|
@ -0,0 +1 @@
|
||||
PROJECT
|
@ -0,0 +1 @@
|
||||
skype
|
15
mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/meta.yaml
Normal file
15
mlflow/mlruns/0/706dcf453a0842aaa48647e15521bb7b/meta.yaml
Normal file
@ -0,0 +1,15 @@
|
||||
artifact_uri: mlflow-artifacts:/0/706dcf453a0842aaa48647e15521bb7b/artifacts
|
||||
end_time: 1715508573447
|
||||
entry_point_name: ''
|
||||
experiment_id: '0'
|
||||
lifecycle_stage: active
|
||||
run_id: 706dcf453a0842aaa48647e15521bb7b
|
||||
run_name: loud-whale-40
|
||||
run_uuid: 706dcf453a0842aaa48647e15521bb7b
|
||||
source_name: ''
|
||||
source_type: 4
|
||||
source_version: ''
|
||||
start_time: 1715508159092
|
||||
status: 3
|
||||
tags: []
|
||||
user_id: skype
|
@ -0,0 +1 @@
|
||||
1715508572612 0.7524752475247525 0
|
@ -0,0 +1 @@
|
||||
7
|
@ -0,0 +1 @@
|
||||
0.001
|
@ -0,0 +1 @@
|
||||
https://git.wmi.amu.edu.pl/s464913/ium_464913.git
|
@ -0,0 +1 @@
|
||||
local
|
@ -0,0 +1 @@
|
||||
main
|
@ -0,0 +1 @@
|
||||
conda
|
@ -0,0 +1 @@
|
||||
loud-whale-40
|
@ -0,0 +1 @@
|
||||
a6be9a729562db8c47bc5fec88ad8f5216af0cf3
|
@ -0,0 +1 @@
|
||||
https://git.wmi.amu.edu.pl/s464913/ium_464913.git
|
@ -0,0 +1 @@
|
||||
file://C:\Users\skype\source\repos\Inżynieria Uczenia Maszynowego#\mlflow
|
@ -0,0 +1 @@
|
||||
PROJECT
|
@ -0,0 +1 @@
|
||||
skype
|
6
mlflow/mlruns/0/meta.yaml
Normal file
6
mlflow/mlruns/0/meta.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
artifact_location: mlflow-artifacts:/0
|
||||
creation_time: 1715508147231
|
||||
experiment_id: '0'
|
||||
last_update_time: 1715508147231
|
||||
lifecycle_stage: active
|
||||
name: Default
|
BIN
sacred/model.keras
Normal file
BIN
sacred/model.keras
Normal file
Binary file not shown.
100
sacred/sacred_train_evaluation.py
Normal file
100
sacred/sacred_train_evaluation.py
Normal file
@ -0,0 +1,100 @@
|
||||
import os
|
||||
|
||||
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
|
||||
|
||||
from keras.models import Sequential
|
||||
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
|
||||
from keras.optimizers import Adam
|
||||
import pandas as pd
|
||||
from sklearn.metrics import confusion_matrix
|
||||
from sacred import Experiment
|
||||
from sacred.observers import FileStorageObserver, MongoObserver
|
||||
|
||||
ex = Experiment("464913")
|
||||
|
||||
ex.observers.append(
|
||||
MongoObserver.create(
|
||||
url="mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017",
|
||||
db_name="sacred",
|
||||
)
|
||||
)
|
||||
ex.observers.append(FileStorageObserver("experiments"))
|
||||
|
||||
|
||||
@ex.config
|
||||
def my_config():
|
||||
learning_rate = 0.001
|
||||
epochs = 5
|
||||
|
||||
|
||||
@ex.capture
|
||||
def train_and_evaluate(_run, learning_rate, epochs):
|
||||
|
||||
X_train = _run.open_resource("data/X_train.csv")
|
||||
X_val = _run.open_resource("data/X_val.csv")
|
||||
y_train = _run.open_resource("data/y_train.csv")
|
||||
y_val = _run.open_resource("data/y_val.csv")
|
||||
|
||||
X_train = pd.read_csv(X_train)
|
||||
X_val = pd.read_csv(X_val)
|
||||
y_train = pd.read_csv(y_train)
|
||||
y_val = pd.read_csv(y_val)
|
||||
|
||||
X_train = X_train.to_numpy()
|
||||
X_val = X_val.to_numpy()
|
||||
y_train = y_train.to_numpy()
|
||||
y_val = y_val.to_numpy()
|
||||
|
||||
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
|
||||
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
|
||||
|
||||
model = Sequential(
|
||||
[
|
||||
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
|
||||
BatchNormalization(),
|
||||
Dropout(0.2),
|
||||
Conv1D(64, 2, activation="relu"),
|
||||
BatchNormalization(),
|
||||
Dropout(0.5),
|
||||
Flatten(),
|
||||
Dense(64, activation="relu"),
|
||||
Dropout(0.5),
|
||||
Dense(1, activation="sigmoid"),
|
||||
]
|
||||
)
|
||||
|
||||
model.compile(
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss="binary_crossentropy",
|
||||
metrics=["accuracy"],
|
||||
)
|
||||
|
||||
model.fit(
|
||||
X_train,
|
||||
y_train,
|
||||
validation_data=(X_val, y_val),
|
||||
epochs=epochs,
|
||||
verbose=1,
|
||||
)
|
||||
|
||||
model.save("sacred/model.keras")
|
||||
_run.add_artifact("sacred/model.keras")
|
||||
|
||||
X_test = _run.open_resource("data/X_test.csv")
|
||||
y_test = _run.open_resource("data/y_test.csv")
|
||||
|
||||
X_test = pd.read_csv(X_test)
|
||||
y_test = pd.read_csv(y_test)
|
||||
|
||||
y_pred = model.predict(X_test)
|
||||
y_pred = y_pred >= 0.5
|
||||
|
||||
cm = confusion_matrix(y_test, y_pred)
|
||||
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
|
||||
|
||||
_run.log_scalar("accuracy", accuracy)
|
||||
|
||||
|
||||
@ex.automain
|
||||
def main(learning_rate, epochs):
|
||||
train_and_evaluate()
|
Loading…
Reference in New Issue
Block a user