This commit is contained in:
Mateusz 2024-06-01 17:24:14 +02:00
parent cf648b6c12
commit cc56865cc1
19 changed files with 569977 additions and 56 deletions

View File

@ -2,4 +2,4 @@ FROM ubuntu:latest
RUN apt update && apt install -y python3-pip
RUN pip install pandas numpy scikit-learn tensorflow
RUN pip install pandas numpy scikit-learn tensorflow sacred pymongo --break-system-packages

15
Jenkinsfile vendored
View File

@ -46,10 +46,23 @@ pipeline {
}
}
stage('Archive Artifacts') {
stage('Archive Artifacts from create-dataset') {
steps {
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
}
}
stage('Experiments') {
steps {
sh 'chmod +x sacred/sacred_train_evaluation.py'
sh 'python3 sacred/sacred_train_evaluation.py'
}
}
stage('Archive Artifacts from Experiments') {
steps {
archiveArtifacts artifacts: 'experiments/*', onlyIfSuccessful: true
}
}
}
}

View File

@ -1,42 +0,0 @@
#!/bin/bash
# Install the Kaggle API
pip install kaggle
# Download the dataset from Kaggle
kaggle datasets download -d mlg-ulb/creditcardfraud
# Unzip the dataset
unzip -o creditcardfraud.zip
# Remove the zip file
rm creditcardfraud.zip
# Create a header file
head -n 1 creditcard.csv > creditcard_header.csv
# Remove the header from the dataset
tail -n +2 creditcard.csv > creditcard_no_header.csv
# Remove the original dataset
rm creditcard.csv
# Shuffle the dataset
shuf creditcard_no_header.csv > creditcard_shuf_no_header.csv
# Remove the unshuffled dataset
rm creditcard_no_header.csv
# Add the header back to the shuffled dataset
cat creditcard_header.csv creditcard_shuf_no_header.csv > creditcard_shuf.csv
# Split the dataset into training and testing
tail -n +10001 creditcard_shuf_no_header.csv > creditcard_train_no_header.csv
head -n 10000 creditcard_shuf_no_header.csv > creditcard_test_no_header.csv
# Add the header back to the training and testing datasets
cat creditcard_header.csv creditcard_train_no_header.csv > creditcard_train.csv
cat creditcard_header.csv creditcard_test_no_header.csv > creditcard_test.csv
# Remove the intermediate files
rm creditcard_header.csv creditcard_shuf_no_header.csv creditcard_train_no_header.csv creditcard_test_no_header.csv
# Create a directory for the data
mkdir -p data
# Move the datasets to the data directory
mv creditcard_shuf.csv creditcard_train.csv creditcard_test.csv data/

View File

@ -1,12 +0,0 @@
#!/bin/bash
# Count the number of lines in the original dataset
wc -l < data/creditcard_shuf.csv > stats.txt
# Count the number of lines in the training and testing datasets
wc -l < data/creditcard_train.csv > stats_train.txt
wc -l < data/creditcard_test.csv > stats_test.txt
# Create a directory for the statistics
mkdir -p stats_data
# Move the statistics to the stats directory
mv stats.txt stats_train.txt stats_test.txt stats_data/

View File

@ -0,0 +1,5 @@
{
"epochs": 5,
"learning_rate": 0.001,
"seed": 7929899
}

14
experiments/708/cout.txt Normal file

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,8 @@
{
"metrics": [
{
"id": "665b3cd5c1ae3ab5cc15d3d9",
"name": "accuracy"
}
]
}

View File

@ -0,0 +1,13 @@
{
"accuracy": {
"steps": [
0
],
"timestamps": [
"2024-06-01T15:23:02.056704"
],
"values": [
0.8217821782178217
]
}
}

BIN
experiments/708/model.keras Normal file

Binary file not shown.

102
experiments/708/run.json Normal file
View File

@ -0,0 +1,102 @@
{
"artifacts": [
"model.keras"
],
"command": "main",
"experiment": {
"base_dir": "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\sacred",
"dependencies": [
"keras==3.1.1",
"numpy==1.26.3",
"sacred==0.8.5",
"scikit-learn==1.4.1.post1"
],
"mainfile": "sacred_train_evaluation.py",
"name": "464913",
"repositories": [
{
"commit": "cf648b6c128aae353730cdad0c6972df3438c4cd",
"dirty": true,
"url": "https://git.wmi.amu.edu.pl/s464913/ium_464913.git"
}
],
"sources": [
[
"sacred_train_evaluation.py",
"_sources\\sacred_train_evaluation_69085ae4bcdbd49594dbaeed1ddb2e93.py"
]
]
},
"heartbeat": "2024-06-01T15:23:02.067455",
"host": {
"ENV": {},
"cpu": "AMD Ryzen 5 5500U with Radeon Graphics",
"hostname": "Dell",
"os": [
"Windows",
"Windows-11-10.0.22631-SP0"
],
"python_version": "3.12.3"
},
"meta": {
"command": "main",
"config_updates": {},
"named_configs": [],
"options": {
"--beat-interval": null,
"--capture": null,
"--comment": null,
"--debug": false,
"--enforce_clean": false,
"--file_storage": null,
"--force": false,
"--help": false,
"--id": null,
"--loglevel": null,
"--mongo_db": null,
"--name": null,
"--pdb": false,
"--print-config": false,
"--priority": null,
"--queue": false,
"--s3": null,
"--sql": null,
"--tiny_db": null,
"--unobserved": false,
"COMMAND": null,
"UPDATE": [],
"help": false,
"with": false
}
},
"resources": [
[
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_train.csv",
"experiments\\_resources\\X_train_7505524c54858300bbd92094092a6c39.csv"
],
[
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_val.csv",
"experiments\\_resources\\X_val_4d078882cc1898640ddaf4ad9117f543.csv"
],
[
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_train.csv",
"experiments\\_resources\\y_train_8112a5cf4faac882c421bcb7e3d42044.csv"
],
[
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_val.csv",
"experiments\\_resources\\y_val_1155f648650986d8866eba603b86560c.csv"
],
[
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_test.csv",
"experiments\\_resources\\X_test_46ff52696af9a4c06f6b25639525dda6.csv"
],
[
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_test.csv",
"experiments\\_resources\\y_test_a6bc4827feae19934c4021d1f10f5963.csv"
]
],
"result": null,
"start_time": "2024-06-01T15:20:05.925811",
"status": "COMPLETED",
"stop_time": "2024-06-01T15:23:02.065167"
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,100 @@
import os
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
from keras.models import Sequential
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
from keras.optimizers import Adam
import pandas as pd
from sklearn.metrics import confusion_matrix
from sacred import Experiment
from sacred.observers import FileStorageObserver, MongoObserver
ex = Experiment("464913")
ex.observers.append(
MongoObserver.create(
url="mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017",
db_name="sacred",
)
)
ex.observers.append(FileStorageObserver("experiments"))
@ex.config
def my_config():
learning_rate = 0.001
epochs = 5
@ex.capture
def train_and_evaluate(_run, learning_rate, epochs):
X_train = _run.open_resource("data/X_train.csv")
X_val = _run.open_resource("data/X_val.csv")
y_train = _run.open_resource("data/y_train.csv")
y_val = _run.open_resource("data/y_val.csv")
X_train = pd.read_csv(X_train)
X_val = pd.read_csv(X_val)
y_train = pd.read_csv(y_train)
y_val = pd.read_csv(y_val)
X_train = X_train.to_numpy()
X_val = X_val.to_numpy()
y_train = y_train.to_numpy()
y_val = y_val.to_numpy()
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
model = Sequential(
[
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
BatchNormalization(),
Dropout(0.2),
Conv1D(64, 2, activation="relu"),
BatchNormalization(),
Dropout(0.5),
Flatten(),
Dense(64, activation="relu"),
Dropout(0.5),
Dense(1, activation="sigmoid"),
]
)
model.compile(
optimizer=Adam(learning_rate=learning_rate),
loss="binary_crossentropy",
metrics=["accuracy"],
)
model.fit(
X_train,
y_train,
validation_data=(X_val, y_val),
epochs=epochs,
verbose=1,
)
model.save("sacred/model.keras")
_run.add_artifact("sacred/model.keras")
X_test = _run.open_resource("data/X_test.csv")
y_test = _run.open_resource("data/y_test.csv")
X_test = pd.read_csv(X_test)
y_test = pd.read_csv(y_test)
y_pred = model.predict(X_test)
y_pred = y_pred >= 0.5
cm = confusion_matrix(y_test, y_pred)
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
_run.log_scalar("accuracy", accuracy)
@ex.automain
def main(learning_rate, epochs):
train_and_evaluate()

BIN
sacred/model.keras Normal file

Binary file not shown.

View File

@ -0,0 +1,100 @@
import os
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
from keras.models import Sequential
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
from keras.optimizers import Adam
import pandas as pd
from sklearn.metrics import confusion_matrix
from sacred import Experiment
from sacred.observers import FileStorageObserver, MongoObserver
ex = Experiment("464913")
ex.observers.append(
MongoObserver.create(
url="mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017",
db_name="sacred",
)
)
ex.observers.append(FileStorageObserver("experiments"))
@ex.config
def my_config():
learning_rate = 0.001
epochs = 5
@ex.capture
def train_and_evaluate(_run, learning_rate, epochs):
X_train = _run.open_resource("data/X_train.csv")
X_val = _run.open_resource("data/X_val.csv")
y_train = _run.open_resource("data/y_train.csv")
y_val = _run.open_resource("data/y_val.csv")
X_train = pd.read_csv(X_train)
X_val = pd.read_csv(X_val)
y_train = pd.read_csv(y_train)
y_val = pd.read_csv(y_val)
X_train = X_train.to_numpy()
X_val = X_val.to_numpy()
y_train = y_train.to_numpy()
y_val = y_val.to_numpy()
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
model = Sequential(
[
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
BatchNormalization(),
Dropout(0.2),
Conv1D(64, 2, activation="relu"),
BatchNormalization(),
Dropout(0.5),
Flatten(),
Dense(64, activation="relu"),
Dropout(0.5),
Dense(1, activation="sigmoid"),
]
)
model.compile(
optimizer=Adam(learning_rate=learning_rate),
loss="binary_crossentropy",
metrics=["accuracy"],
)
model.fit(
X_train,
y_train,
validation_data=(X_val, y_val),
epochs=epochs,
verbose=1,
)
model.save("sacred/model.keras")
_run.add_artifact("sacred/model.keras")
X_test = _run.open_resource("data/X_test.csv")
y_test = _run.open_resource("data/y_test.csv")
X_test = pd.read_csv(X_test)
y_test = pd.read_csv(y_test)
y_pred = model.predict(X_test)
y_pred = y_pred >= 0.5
cm = confusion_matrix(y_test, y_pred)
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
_run.log_scalar("accuracy", accuracy)
@ex.automain
def main(learning_rate, epochs):
train_and_evaluate()