IUM_7
This commit is contained in:
parent
cf648b6c12
commit
cc56865cc1
|
@ -2,4 +2,4 @@ FROM ubuntu:latest
|
|||
|
||||
RUN apt update && apt install -y python3-pip
|
||||
|
||||
RUN pip install pandas numpy scikit-learn tensorflow
|
||||
RUN pip install pandas numpy scikit-learn tensorflow sacred pymongo --break-system-packages
|
|
@ -46,10 +46,23 @@ pipeline {
|
|||
}
|
||||
}
|
||||
|
||||
stage('Archive Artifacts') {
|
||||
stage('Archive Artifacts from create-dataset') {
|
||||
steps {
|
||||
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
|
||||
}
|
||||
}
|
||||
|
||||
stage('Experiments') {
|
||||
steps {
|
||||
sh 'chmod +x sacred/sacred_train_evaluation.py'
|
||||
sh 'python3 sacred/sacred_train_evaluation.py'
|
||||
}
|
||||
}
|
||||
|
||||
stage('Archive Artifacts from Experiments') {
|
||||
steps {
|
||||
archiveArtifacts artifacts: 'experiments/*', onlyIfSuccessful: true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,42 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Install the Kaggle API
|
||||
pip install kaggle
|
||||
# Download the dataset from Kaggle
|
||||
kaggle datasets download -d mlg-ulb/creditcardfraud
|
||||
|
||||
# Unzip the dataset
|
||||
unzip -o creditcardfraud.zip
|
||||
# Remove the zip file
|
||||
rm creditcardfraud.zip
|
||||
|
||||
# Create a header file
|
||||
head -n 1 creditcard.csv > creditcard_header.csv
|
||||
# Remove the header from the dataset
|
||||
tail -n +2 creditcard.csv > creditcard_no_header.csv
|
||||
# Remove the original dataset
|
||||
rm creditcard.csv
|
||||
|
||||
# Shuffle the dataset
|
||||
shuf creditcard_no_header.csv > creditcard_shuf_no_header.csv
|
||||
# Remove the unshuffled dataset
|
||||
rm creditcard_no_header.csv
|
||||
|
||||
# Add the header back to the shuffled dataset
|
||||
cat creditcard_header.csv creditcard_shuf_no_header.csv > creditcard_shuf.csv
|
||||
|
||||
# Split the dataset into training and testing
|
||||
tail -n +10001 creditcard_shuf_no_header.csv > creditcard_train_no_header.csv
|
||||
head -n 10000 creditcard_shuf_no_header.csv > creditcard_test_no_header.csv
|
||||
|
||||
# Add the header back to the training and testing datasets
|
||||
cat creditcard_header.csv creditcard_train_no_header.csv > creditcard_train.csv
|
||||
cat creditcard_header.csv creditcard_test_no_header.csv > creditcard_test.csv
|
||||
|
||||
# Remove the intermediate files
|
||||
rm creditcard_header.csv creditcard_shuf_no_header.csv creditcard_train_no_header.csv creditcard_test_no_header.csv
|
||||
|
||||
# Create a directory for the data
|
||||
mkdir -p data
|
||||
# Move the datasets to the data directory
|
||||
mv creditcard_shuf.csv creditcard_train.csv creditcard_test.csv data/
|
|
@ -1,12 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Count the number of lines in the original dataset
|
||||
wc -l < data/creditcard_shuf.csv > stats.txt
|
||||
# Count the number of lines in the training and testing datasets
|
||||
wc -l < data/creditcard_train.csv > stats_train.txt
|
||||
wc -l < data/creditcard_test.csv > stats_test.txt
|
||||
|
||||
# Create a directory for the statistics
|
||||
mkdir -p stats_data
|
||||
# Move the statistics to the stats directory
|
||||
mv stats.txt stats_train.txt stats_test.txt stats_data/
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"epochs": 5,
|
||||
"learning_rate": 0.001,
|
||||
"seed": 7929899
|
||||
}
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"metrics": [
|
||||
{
|
||||
"id": "665b3cd5c1ae3ab5cc15d3d9",
|
||||
"name": "accuracy"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
{
|
||||
"accuracy": {
|
||||
"steps": [
|
||||
0
|
||||
],
|
||||
"timestamps": [
|
||||
"2024-06-01T15:23:02.056704"
|
||||
],
|
||||
"values": [
|
||||
0.8217821782178217
|
||||
]
|
||||
}
|
||||
}
|
Binary file not shown.
|
@ -0,0 +1,102 @@
|
|||
{
|
||||
"artifacts": [
|
||||
"model.keras"
|
||||
],
|
||||
"command": "main",
|
||||
"experiment": {
|
||||
"base_dir": "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\sacred",
|
||||
"dependencies": [
|
||||
"keras==3.1.1",
|
||||
"numpy==1.26.3",
|
||||
"sacred==0.8.5",
|
||||
"scikit-learn==1.4.1.post1"
|
||||
],
|
||||
"mainfile": "sacred_train_evaluation.py",
|
||||
"name": "464913",
|
||||
"repositories": [
|
||||
{
|
||||
"commit": "cf648b6c128aae353730cdad0c6972df3438c4cd",
|
||||
"dirty": true,
|
||||
"url": "https://git.wmi.amu.edu.pl/s464913/ium_464913.git"
|
||||
}
|
||||
],
|
||||
"sources": [
|
||||
[
|
||||
"sacred_train_evaluation.py",
|
||||
"_sources\\sacred_train_evaluation_69085ae4bcdbd49594dbaeed1ddb2e93.py"
|
||||
]
|
||||
]
|
||||
},
|
||||
"heartbeat": "2024-06-01T15:23:02.067455",
|
||||
"host": {
|
||||
"ENV": {},
|
||||
"cpu": "AMD Ryzen 5 5500U with Radeon Graphics",
|
||||
"hostname": "Dell",
|
||||
"os": [
|
||||
"Windows",
|
||||
"Windows-11-10.0.22631-SP0"
|
||||
],
|
||||
"python_version": "3.12.3"
|
||||
},
|
||||
"meta": {
|
||||
"command": "main",
|
||||
"config_updates": {},
|
||||
"named_configs": [],
|
||||
"options": {
|
||||
"--beat-interval": null,
|
||||
"--capture": null,
|
||||
"--comment": null,
|
||||
"--debug": false,
|
||||
"--enforce_clean": false,
|
||||
"--file_storage": null,
|
||||
"--force": false,
|
||||
"--help": false,
|
||||
"--id": null,
|
||||
"--loglevel": null,
|
||||
"--mongo_db": null,
|
||||
"--name": null,
|
||||
"--pdb": false,
|
||||
"--print-config": false,
|
||||
"--priority": null,
|
||||
"--queue": false,
|
||||
"--s3": null,
|
||||
"--sql": null,
|
||||
"--tiny_db": null,
|
||||
"--unobserved": false,
|
||||
"COMMAND": null,
|
||||
"UPDATE": [],
|
||||
"help": false,
|
||||
"with": false
|
||||
}
|
||||
},
|
||||
"resources": [
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_train.csv",
|
||||
"experiments\\_resources\\X_train_7505524c54858300bbd92094092a6c39.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_val.csv",
|
||||
"experiments\\_resources\\X_val_4d078882cc1898640ddaf4ad9117f543.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_train.csv",
|
||||
"experiments\\_resources\\y_train_8112a5cf4faac882c421bcb7e3d42044.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_val.csv",
|
||||
"experiments\\_resources\\y_val_1155f648650986d8866eba603b86560c.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_test.csv",
|
||||
"experiments\\_resources\\X_test_46ff52696af9a4c06f6b25639525dda6.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_test.csv",
|
||||
"experiments\\_resources\\y_test_a6bc4827feae19934c4021d1f10f5963.csv"
|
||||
]
|
||||
],
|
||||
"result": null,
|
||||
"start_time": "2024-06-01T15:20:05.925811",
|
||||
"status": "COMPLETED",
|
||||
"stop_time": "2024-06-01T15:23:02.065167"
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,100 @@
|
|||
import os
|
||||
|
||||
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
|
||||
|
||||
from keras.models import Sequential
|
||||
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
|
||||
from keras.optimizers import Adam
|
||||
import pandas as pd
|
||||
from sklearn.metrics import confusion_matrix
|
||||
from sacred import Experiment
|
||||
from sacred.observers import FileStorageObserver, MongoObserver
|
||||
|
||||
ex = Experiment("464913")
|
||||
|
||||
ex.observers.append(
|
||||
MongoObserver.create(
|
||||
url="mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017",
|
||||
db_name="sacred",
|
||||
)
|
||||
)
|
||||
ex.observers.append(FileStorageObserver("experiments"))
|
||||
|
||||
|
||||
@ex.config
|
||||
def my_config():
|
||||
learning_rate = 0.001
|
||||
epochs = 5
|
||||
|
||||
|
||||
@ex.capture
|
||||
def train_and_evaluate(_run, learning_rate, epochs):
|
||||
|
||||
X_train = _run.open_resource("data/X_train.csv")
|
||||
X_val = _run.open_resource("data/X_val.csv")
|
||||
y_train = _run.open_resource("data/y_train.csv")
|
||||
y_val = _run.open_resource("data/y_val.csv")
|
||||
|
||||
X_train = pd.read_csv(X_train)
|
||||
X_val = pd.read_csv(X_val)
|
||||
y_train = pd.read_csv(y_train)
|
||||
y_val = pd.read_csv(y_val)
|
||||
|
||||
X_train = X_train.to_numpy()
|
||||
X_val = X_val.to_numpy()
|
||||
y_train = y_train.to_numpy()
|
||||
y_val = y_val.to_numpy()
|
||||
|
||||
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
|
||||
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
|
||||
|
||||
model = Sequential(
|
||||
[
|
||||
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
|
||||
BatchNormalization(),
|
||||
Dropout(0.2),
|
||||
Conv1D(64, 2, activation="relu"),
|
||||
BatchNormalization(),
|
||||
Dropout(0.5),
|
||||
Flatten(),
|
||||
Dense(64, activation="relu"),
|
||||
Dropout(0.5),
|
||||
Dense(1, activation="sigmoid"),
|
||||
]
|
||||
)
|
||||
|
||||
model.compile(
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss="binary_crossentropy",
|
||||
metrics=["accuracy"],
|
||||
)
|
||||
|
||||
model.fit(
|
||||
X_train,
|
||||
y_train,
|
||||
validation_data=(X_val, y_val),
|
||||
epochs=epochs,
|
||||
verbose=1,
|
||||
)
|
||||
|
||||
model.save("sacred/model.keras")
|
||||
_run.add_artifact("sacred/model.keras")
|
||||
|
||||
X_test = _run.open_resource("data/X_test.csv")
|
||||
y_test = _run.open_resource("data/y_test.csv")
|
||||
|
||||
X_test = pd.read_csv(X_test)
|
||||
y_test = pd.read_csv(y_test)
|
||||
|
||||
y_pred = model.predict(X_test)
|
||||
y_pred = y_pred >= 0.5
|
||||
|
||||
cm = confusion_matrix(y_test, y_pred)
|
||||
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
|
||||
|
||||
_run.log_scalar("accuracy", accuracy)
|
||||
|
||||
|
||||
@ex.automain
|
||||
def main(learning_rate, epochs):
|
||||
train_and_evaluate()
|
Binary file not shown.
|
@ -0,0 +1,100 @@
|
|||
import os
|
||||
|
||||
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
|
||||
|
||||
from keras.models import Sequential
|
||||
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
|
||||
from keras.optimizers import Adam
|
||||
import pandas as pd
|
||||
from sklearn.metrics import confusion_matrix
|
||||
from sacred import Experiment
|
||||
from sacred.observers import FileStorageObserver, MongoObserver
|
||||
|
||||
ex = Experiment("464913")
|
||||
|
||||
ex.observers.append(
|
||||
MongoObserver.create(
|
||||
url="mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017",
|
||||
db_name="sacred",
|
||||
)
|
||||
)
|
||||
ex.observers.append(FileStorageObserver("experiments"))
|
||||
|
||||
|
||||
@ex.config
|
||||
def my_config():
|
||||
learning_rate = 0.001
|
||||
epochs = 5
|
||||
|
||||
|
||||
@ex.capture
|
||||
def train_and_evaluate(_run, learning_rate, epochs):
|
||||
|
||||
X_train = _run.open_resource("data/X_train.csv")
|
||||
X_val = _run.open_resource("data/X_val.csv")
|
||||
y_train = _run.open_resource("data/y_train.csv")
|
||||
y_val = _run.open_resource("data/y_val.csv")
|
||||
|
||||
X_train = pd.read_csv(X_train)
|
||||
X_val = pd.read_csv(X_val)
|
||||
y_train = pd.read_csv(y_train)
|
||||
y_val = pd.read_csv(y_val)
|
||||
|
||||
X_train = X_train.to_numpy()
|
||||
X_val = X_val.to_numpy()
|
||||
y_train = y_train.to_numpy()
|
||||
y_val = y_val.to_numpy()
|
||||
|
||||
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
|
||||
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
|
||||
|
||||
model = Sequential(
|
||||
[
|
||||
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
|
||||
BatchNormalization(),
|
||||
Dropout(0.2),
|
||||
Conv1D(64, 2, activation="relu"),
|
||||
BatchNormalization(),
|
||||
Dropout(0.5),
|
||||
Flatten(),
|
||||
Dense(64, activation="relu"),
|
||||
Dropout(0.5),
|
||||
Dense(1, activation="sigmoid"),
|
||||
]
|
||||
)
|
||||
|
||||
model.compile(
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss="binary_crossentropy",
|
||||
metrics=["accuracy"],
|
||||
)
|
||||
|
||||
model.fit(
|
||||
X_train,
|
||||
y_train,
|
||||
validation_data=(X_val, y_val),
|
||||
epochs=epochs,
|
||||
verbose=1,
|
||||
)
|
||||
|
||||
model.save("sacred/model.keras")
|
||||
_run.add_artifact("sacred/model.keras")
|
||||
|
||||
X_test = _run.open_resource("data/X_test.csv")
|
||||
y_test = _run.open_resource("data/y_test.csv")
|
||||
|
||||
X_test = pd.read_csv(X_test)
|
||||
y_test = pd.read_csv(y_test)
|
||||
|
||||
y_pred = model.predict(X_test)
|
||||
y_pred = y_pred >= 0.5
|
||||
|
||||
cm = confusion_matrix(y_test, y_pred)
|
||||
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
|
||||
|
||||
_run.log_scalar("accuracy", accuracy)
|
||||
|
||||
|
||||
@ex.automain
|
||||
def main(learning_rate, epochs):
|
||||
train_and_evaluate()
|
Loading…
Reference in New Issue