IUM_7
This commit is contained in:
parent
cf648b6c12
commit
cc56865cc1
@ -2,4 +2,4 @@ FROM ubuntu:latest
|
||||
|
||||
RUN apt update && apt install -y python3-pip
|
||||
|
||||
RUN pip install pandas numpy scikit-learn tensorflow
|
||||
RUN pip install pandas numpy scikit-learn tensorflow sacred pymongo --break-system-packages
|
15
Jenkinsfile
vendored
15
Jenkinsfile
vendored
@ -46,10 +46,23 @@ pipeline {
|
||||
}
|
||||
}
|
||||
|
||||
stage('Archive Artifacts') {
|
||||
stage('Archive Artifacts from create-dataset') {
|
||||
steps {
|
||||
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
|
||||
}
|
||||
}
|
||||
|
||||
stage('Experiments') {
|
||||
steps {
|
||||
sh 'chmod +x sacred/sacred_train_evaluation.py'
|
||||
sh 'python3 sacred/sacred_train_evaluation.py'
|
||||
}
|
||||
}
|
||||
|
||||
stage('Archive Artifacts from Experiments') {
|
||||
steps {
|
||||
archiveArtifacts artifacts: 'experiments/*', onlyIfSuccessful: true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,42 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Install the Kaggle API
|
||||
pip install kaggle
|
||||
# Download the dataset from Kaggle
|
||||
kaggle datasets download -d mlg-ulb/creditcardfraud
|
||||
|
||||
# Unzip the dataset
|
||||
unzip -o creditcardfraud.zip
|
||||
# Remove the zip file
|
||||
rm creditcardfraud.zip
|
||||
|
||||
# Create a header file
|
||||
head -n 1 creditcard.csv > creditcard_header.csv
|
||||
# Remove the header from the dataset
|
||||
tail -n +2 creditcard.csv > creditcard_no_header.csv
|
||||
# Remove the original dataset
|
||||
rm creditcard.csv
|
||||
|
||||
# Shuffle the dataset
|
||||
shuf creditcard_no_header.csv > creditcard_shuf_no_header.csv
|
||||
# Remove the unshuffled dataset
|
||||
rm creditcard_no_header.csv
|
||||
|
||||
# Add the header back to the shuffled dataset
|
||||
cat creditcard_header.csv creditcard_shuf_no_header.csv > creditcard_shuf.csv
|
||||
|
||||
# Split the dataset into training and testing
|
||||
tail -n +10001 creditcard_shuf_no_header.csv > creditcard_train_no_header.csv
|
||||
head -n 10000 creditcard_shuf_no_header.csv > creditcard_test_no_header.csv
|
||||
|
||||
# Add the header back to the training and testing datasets
|
||||
cat creditcard_header.csv creditcard_train_no_header.csv > creditcard_train.csv
|
||||
cat creditcard_header.csv creditcard_test_no_header.csv > creditcard_test.csv
|
||||
|
||||
# Remove the intermediate files
|
||||
rm creditcard_header.csv creditcard_shuf_no_header.csv creditcard_train_no_header.csv creditcard_test_no_header.csv
|
||||
|
||||
# Create a directory for the data
|
||||
mkdir -p data
|
||||
# Move the datasets to the data directory
|
||||
mv creditcard_shuf.csv creditcard_train.csv creditcard_test.csv data/
|
@ -1,12 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Count the number of lines in the original dataset
|
||||
wc -l < data/creditcard_shuf.csv > stats.txt
|
||||
# Count the number of lines in the training and testing datasets
|
||||
wc -l < data/creditcard_train.csv > stats_train.txt
|
||||
wc -l < data/creditcard_test.csv > stats_test.txt
|
||||
|
||||
# Create a directory for the statistics
|
||||
mkdir -p stats_data
|
||||
# Move the statistics to the stats directory
|
||||
mv stats.txt stats_train.txt stats_test.txt stats_data/
|
5
experiments/708/config.json
Normal file
5
experiments/708/config.json
Normal file
@ -0,0 +1,5 @@
|
||||
{
|
||||
"epochs": 5,
|
||||
"learning_rate": 0.001,
|
||||
"seed": 7929899
|
||||
}
|
14
experiments/708/cout.txt
Normal file
14
experiments/708/cout.txt
Normal file
File diff suppressed because one or more lines are too long
8
experiments/708/info.json
Normal file
8
experiments/708/info.json
Normal file
@ -0,0 +1,8 @@
|
||||
{
|
||||
"metrics": [
|
||||
{
|
||||
"id": "665b3cd5c1ae3ab5cc15d3d9",
|
||||
"name": "accuracy"
|
||||
}
|
||||
]
|
||||
}
|
13
experiments/708/metrics.json
Normal file
13
experiments/708/metrics.json
Normal file
@ -0,0 +1,13 @@
|
||||
{
|
||||
"accuracy": {
|
||||
"steps": [
|
||||
0
|
||||
],
|
||||
"timestamps": [
|
||||
"2024-06-01T15:23:02.056704"
|
||||
],
|
||||
"values": [
|
||||
0.8217821782178217
|
||||
]
|
||||
}
|
||||
}
|
BIN
experiments/708/model.keras
Normal file
BIN
experiments/708/model.keras
Normal file
Binary file not shown.
102
experiments/708/run.json
Normal file
102
experiments/708/run.json
Normal file
@ -0,0 +1,102 @@
|
||||
{
|
||||
"artifacts": [
|
||||
"model.keras"
|
||||
],
|
||||
"command": "main",
|
||||
"experiment": {
|
||||
"base_dir": "C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\sacred",
|
||||
"dependencies": [
|
||||
"keras==3.1.1",
|
||||
"numpy==1.26.3",
|
||||
"sacred==0.8.5",
|
||||
"scikit-learn==1.4.1.post1"
|
||||
],
|
||||
"mainfile": "sacred_train_evaluation.py",
|
||||
"name": "464913",
|
||||
"repositories": [
|
||||
{
|
||||
"commit": "cf648b6c128aae353730cdad0c6972df3438c4cd",
|
||||
"dirty": true,
|
||||
"url": "https://git.wmi.amu.edu.pl/s464913/ium_464913.git"
|
||||
}
|
||||
],
|
||||
"sources": [
|
||||
[
|
||||
"sacred_train_evaluation.py",
|
||||
"_sources\\sacred_train_evaluation_69085ae4bcdbd49594dbaeed1ddb2e93.py"
|
||||
]
|
||||
]
|
||||
},
|
||||
"heartbeat": "2024-06-01T15:23:02.067455",
|
||||
"host": {
|
||||
"ENV": {},
|
||||
"cpu": "AMD Ryzen 5 5500U with Radeon Graphics",
|
||||
"hostname": "Dell",
|
||||
"os": [
|
||||
"Windows",
|
||||
"Windows-11-10.0.22631-SP0"
|
||||
],
|
||||
"python_version": "3.12.3"
|
||||
},
|
||||
"meta": {
|
||||
"command": "main",
|
||||
"config_updates": {},
|
||||
"named_configs": [],
|
||||
"options": {
|
||||
"--beat-interval": null,
|
||||
"--capture": null,
|
||||
"--comment": null,
|
||||
"--debug": false,
|
||||
"--enforce_clean": false,
|
||||
"--file_storage": null,
|
||||
"--force": false,
|
||||
"--help": false,
|
||||
"--id": null,
|
||||
"--loglevel": null,
|
||||
"--mongo_db": null,
|
||||
"--name": null,
|
||||
"--pdb": false,
|
||||
"--print-config": false,
|
||||
"--priority": null,
|
||||
"--queue": false,
|
||||
"--s3": null,
|
||||
"--sql": null,
|
||||
"--tiny_db": null,
|
||||
"--unobserved": false,
|
||||
"COMMAND": null,
|
||||
"UPDATE": [],
|
||||
"help": false,
|
||||
"with": false
|
||||
}
|
||||
},
|
||||
"resources": [
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_train.csv",
|
||||
"experiments\\_resources\\X_train_7505524c54858300bbd92094092a6c39.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_val.csv",
|
||||
"experiments\\_resources\\X_val_4d078882cc1898640ddaf4ad9117f543.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_train.csv",
|
||||
"experiments\\_resources\\y_train_8112a5cf4faac882c421bcb7e3d42044.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_val.csv",
|
||||
"experiments\\_resources\\y_val_1155f648650986d8866eba603b86560c.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\X_test.csv",
|
||||
"experiments\\_resources\\X_test_46ff52696af9a4c06f6b25639525dda6.csv"
|
||||
],
|
||||
[
|
||||
"C:\\Users\\skype\\source\\repos\\In\u017cynieria Uczenia Maszynowego\\data\\y_test.csv",
|
||||
"experiments\\_resources\\y_test_a6bc4827feae19934c4021d1f10f5963.csv"
|
||||
]
|
||||
],
|
||||
"result": null,
|
||||
"start_time": "2024-06-01T15:20:05.925811",
|
||||
"status": "COMPLETED",
|
||||
"stop_time": "2024-06-01T15:23:02.065167"
|
||||
}
|
56963
experiments/_resources/X_test_46ff52696af9a4c06f6b25639525dda6.csv
Normal file
56963
experiments/_resources/X_test_46ff52696af9a4c06f6b25639525dda6.csv
Normal file
File diff suppressed because it is too large
Load Diff
170884
experiments/_resources/X_train_7505524c54858300bbd92094092a6c39.csv
Normal file
170884
experiments/_resources/X_train_7505524c54858300bbd92094092a6c39.csv
Normal file
File diff suppressed because it is too large
Load Diff
56963
experiments/_resources/X_val_4d078882cc1898640ddaf4ad9117f543.csv
Normal file
56963
experiments/_resources/X_val_4d078882cc1898640ddaf4ad9117f543.csv
Normal file
File diff suppressed because it is too large
Load Diff
56963
experiments/_resources/y_test_a6bc4827feae19934c4021d1f10f5963.csv
Normal file
56963
experiments/_resources/y_test_a6bc4827feae19934c4021d1f10f5963.csv
Normal file
File diff suppressed because it is too large
Load Diff
170884
experiments/_resources/y_train_8112a5cf4faac882c421bcb7e3d42044.csv
Normal file
170884
experiments/_resources/y_train_8112a5cf4faac882c421bcb7e3d42044.csv
Normal file
File diff suppressed because it is too large
Load Diff
56963
experiments/_resources/y_val_1155f648650986d8866eba603b86560c.csv
Normal file
56963
experiments/_resources/y_val_1155f648650986d8866eba603b86560c.csv
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,100 @@
|
||||
import os
|
||||
|
||||
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
|
||||
|
||||
from keras.models import Sequential
|
||||
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
|
||||
from keras.optimizers import Adam
|
||||
import pandas as pd
|
||||
from sklearn.metrics import confusion_matrix
|
||||
from sacred import Experiment
|
||||
from sacred.observers import FileStorageObserver, MongoObserver
|
||||
|
||||
ex = Experiment("464913")
|
||||
|
||||
ex.observers.append(
|
||||
MongoObserver.create(
|
||||
url="mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017",
|
||||
db_name="sacred",
|
||||
)
|
||||
)
|
||||
ex.observers.append(FileStorageObserver("experiments"))
|
||||
|
||||
|
||||
@ex.config
|
||||
def my_config():
|
||||
learning_rate = 0.001
|
||||
epochs = 5
|
||||
|
||||
|
||||
@ex.capture
|
||||
def train_and_evaluate(_run, learning_rate, epochs):
|
||||
|
||||
X_train = _run.open_resource("data/X_train.csv")
|
||||
X_val = _run.open_resource("data/X_val.csv")
|
||||
y_train = _run.open_resource("data/y_train.csv")
|
||||
y_val = _run.open_resource("data/y_val.csv")
|
||||
|
||||
X_train = pd.read_csv(X_train)
|
||||
X_val = pd.read_csv(X_val)
|
||||
y_train = pd.read_csv(y_train)
|
||||
y_val = pd.read_csv(y_val)
|
||||
|
||||
X_train = X_train.to_numpy()
|
||||
X_val = X_val.to_numpy()
|
||||
y_train = y_train.to_numpy()
|
||||
y_val = y_val.to_numpy()
|
||||
|
||||
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
|
||||
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
|
||||
|
||||
model = Sequential(
|
||||
[
|
||||
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
|
||||
BatchNormalization(),
|
||||
Dropout(0.2),
|
||||
Conv1D(64, 2, activation="relu"),
|
||||
BatchNormalization(),
|
||||
Dropout(0.5),
|
||||
Flatten(),
|
||||
Dense(64, activation="relu"),
|
||||
Dropout(0.5),
|
||||
Dense(1, activation="sigmoid"),
|
||||
]
|
||||
)
|
||||
|
||||
model.compile(
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss="binary_crossentropy",
|
||||
metrics=["accuracy"],
|
||||
)
|
||||
|
||||
model.fit(
|
||||
X_train,
|
||||
y_train,
|
||||
validation_data=(X_val, y_val),
|
||||
epochs=epochs,
|
||||
verbose=1,
|
||||
)
|
||||
|
||||
model.save("sacred/model.keras")
|
||||
_run.add_artifact("sacred/model.keras")
|
||||
|
||||
X_test = _run.open_resource("data/X_test.csv")
|
||||
y_test = _run.open_resource("data/y_test.csv")
|
||||
|
||||
X_test = pd.read_csv(X_test)
|
||||
y_test = pd.read_csv(y_test)
|
||||
|
||||
y_pred = model.predict(X_test)
|
||||
y_pred = y_pred >= 0.5
|
||||
|
||||
cm = confusion_matrix(y_test, y_pred)
|
||||
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
|
||||
|
||||
_run.log_scalar("accuracy", accuracy)
|
||||
|
||||
|
||||
@ex.automain
|
||||
def main(learning_rate, epochs):
|
||||
train_and_evaluate()
|
BIN
sacred/model.keras
Normal file
BIN
sacred/model.keras
Normal file
Binary file not shown.
100
sacred/sacred_train_evaluation.py
Normal file
100
sacred/sacred_train_evaluation.py
Normal file
@ -0,0 +1,100 @@
|
||||
import os
|
||||
|
||||
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
|
||||
|
||||
from keras.models import Sequential
|
||||
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
|
||||
from keras.optimizers import Adam
|
||||
import pandas as pd
|
||||
from sklearn.metrics import confusion_matrix
|
||||
from sacred import Experiment
|
||||
from sacred.observers import FileStorageObserver, MongoObserver
|
||||
|
||||
ex = Experiment("464913")
|
||||
|
||||
ex.observers.append(
|
||||
MongoObserver.create(
|
||||
url="mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017",
|
||||
db_name="sacred",
|
||||
)
|
||||
)
|
||||
ex.observers.append(FileStorageObserver("experiments"))
|
||||
|
||||
|
||||
@ex.config
|
||||
def my_config():
|
||||
learning_rate = 0.001
|
||||
epochs = 5
|
||||
|
||||
|
||||
@ex.capture
|
||||
def train_and_evaluate(_run, learning_rate, epochs):
|
||||
|
||||
X_train = _run.open_resource("data/X_train.csv")
|
||||
X_val = _run.open_resource("data/X_val.csv")
|
||||
y_train = _run.open_resource("data/y_train.csv")
|
||||
y_val = _run.open_resource("data/y_val.csv")
|
||||
|
||||
X_train = pd.read_csv(X_train)
|
||||
X_val = pd.read_csv(X_val)
|
||||
y_train = pd.read_csv(y_train)
|
||||
y_val = pd.read_csv(y_val)
|
||||
|
||||
X_train = X_train.to_numpy()
|
||||
X_val = X_val.to_numpy()
|
||||
y_train = y_train.to_numpy()
|
||||
y_val = y_val.to_numpy()
|
||||
|
||||
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
|
||||
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
|
||||
|
||||
model = Sequential(
|
||||
[
|
||||
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
|
||||
BatchNormalization(),
|
||||
Dropout(0.2),
|
||||
Conv1D(64, 2, activation="relu"),
|
||||
BatchNormalization(),
|
||||
Dropout(0.5),
|
||||
Flatten(),
|
||||
Dense(64, activation="relu"),
|
||||
Dropout(0.5),
|
||||
Dense(1, activation="sigmoid"),
|
||||
]
|
||||
)
|
||||
|
||||
model.compile(
|
||||
optimizer=Adam(learning_rate=learning_rate),
|
||||
loss="binary_crossentropy",
|
||||
metrics=["accuracy"],
|
||||
)
|
||||
|
||||
model.fit(
|
||||
X_train,
|
||||
y_train,
|
||||
validation_data=(X_val, y_val),
|
||||
epochs=epochs,
|
||||
verbose=1,
|
||||
)
|
||||
|
||||
model.save("sacred/model.keras")
|
||||
_run.add_artifact("sacred/model.keras")
|
||||
|
||||
X_test = _run.open_resource("data/X_test.csv")
|
||||
y_test = _run.open_resource("data/y_test.csv")
|
||||
|
||||
X_test = pd.read_csv(X_test)
|
||||
y_test = pd.read_csv(y_test)
|
||||
|
||||
y_pred = model.predict(X_test)
|
||||
y_pred = y_pred >= 0.5
|
||||
|
||||
cm = confusion_matrix(y_test, y_pred)
|
||||
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
|
||||
|
||||
_run.log_scalar("accuracy", accuracy)
|
||||
|
||||
|
||||
@ex.automain
|
||||
def main(learning_rate, epochs):
|
||||
train_and_evaluate()
|
Loading…
Reference in New Issue
Block a user