This commit is contained in:
parent
7dfbf0d939
commit
18e26fed6b
@ -1,6 +1,6 @@
|
||||
node {
|
||||
checkout scm
|
||||
def dockerImageIUM = docker.build("s444452/ium:1.3")
|
||||
def dockerImageIUM = docker.build("s444452/ium:1.4")
|
||||
dockerImageIUM.inside {
|
||||
stage('Preparation') {
|
||||
properties([
|
||||
|
@ -1,7 +1,7 @@
|
||||
node {
|
||||
checkout scm
|
||||
try {
|
||||
docker.image('s444452/ium:1.3').inside {
|
||||
docker.image('s444452/ium:1.4').inside {
|
||||
stage('Preparation') {
|
||||
properties([
|
||||
parameters([
|
||||
|
@ -1,5 +1,5 @@
|
||||
node {
|
||||
docker.image('s444452/ium:1.3').inside {
|
||||
docker.image('s444452/ium:1.4').inside {
|
||||
stage('Preparation') {
|
||||
properties([parameters([
|
||||
buildSelector(
|
||||
|
@ -1,7 +1,7 @@
|
||||
node {
|
||||
checkout scm
|
||||
try {
|
||||
docker.image('s444452/ium:1.3').inside {
|
||||
docker.image('s444452/ium:1.4').inside {
|
||||
stage('Preparation') {
|
||||
properties([
|
||||
pipelineTriggers([upstream(threshold: hudson.model.Result.SUCCESS, upstreamProjects: "s444452-create-dataset")]),
|
||||
|
23
MLproject
Normal file
23
MLproject
Normal file
@ -0,0 +1,23 @@
|
||||
name: s444452_fake_job_classification
|
||||
|
||||
docker_env:
|
||||
image: s444452/ium:1.4
|
||||
|
||||
entry_points:
|
||||
train:
|
||||
parameters:
|
||||
data_path: path
|
||||
epochs: {type: float, default: 1}
|
||||
num_words: {type: float, default: 20000}
|
||||
batch_size: {type: float, default: 150}
|
||||
pad_length: {type: float, default: 300}
|
||||
command: "python3 Scripts/train_neural_network.py {data_path} {epochs} {num_words} {batch_size} {pad_length}"
|
||||
evaluate:
|
||||
parameters:
|
||||
build_nr: {type: float, default: 0}
|
||||
data_path: path
|
||||
epochs: {type: float, default: 1}
|
||||
num_words: {type: float, default: 20000}
|
||||
batch_size: {type: float, default: 150}
|
||||
pad_length: {type: float, default: 300}
|
||||
command: "python3 Scripts/evaluate_neural_network.py {build_nr} {data_path} {epochs} {num_words} {batch_size} {pad_length}"
|
@ -11,6 +11,14 @@ import matplotlib.pyplot as plt
|
||||
from sacred.observers import MongoObserver
|
||||
from sacred.observers import FileStorageObserver
|
||||
from sacred import Experiment
|
||||
import mlflow
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.WARN)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
mlflow.set_tracking_uri("http://172.17.0.1:5000")
|
||||
mlflow.set_experiment("s444452")
|
||||
|
||||
ex = Experiment(name='s444452_fake_job_classification_evaluation', save_git_info=False)
|
||||
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017',
|
||||
@ -51,8 +59,16 @@ def evaluate_and_save(model, x, y, abs_path, build_number, _run):
|
||||
y_predicted = (model.predict(x) >= 0.5).astype(int)
|
||||
evaluation_file_path = os.path.join(abs_path, 'neural_network_evaluation.csv')
|
||||
with open(evaluation_file_path, 'a+') as f:
|
||||
result = f'{build_number},{accuracy},{loss},{precision_score(y, y_predicted)},{recall_score(y, y_predicted)},{f1_score(y, y_predicted)}'
|
||||
precision = precision_score(y, y_predicted)
|
||||
recall = recall_score(y, y_predicted)
|
||||
f1 = f1_score(y, y_predicted)
|
||||
result = f'{build_number},{accuracy},{loss},{precision},{recall},{f1}'
|
||||
f.write(result + '\n')
|
||||
mlflow.log_metric("accuracy", accuracy)
|
||||
mlflow.log_metric("loss", loss)
|
||||
mlflow.log_metric("precision", precision)
|
||||
mlflow.log_metric("recall", recall)
|
||||
mlflow.log_metric("f1_score", f1)
|
||||
# ex.log_scalar("loss", loss)
|
||||
_run.log_scalar("training.loss", loss)
|
||||
# ex.log_scalar("accuracy", accuracy)
|
||||
@ -102,15 +118,25 @@ def load_data(data_path, filename) -> pd.DataFrame:
|
||||
|
||||
@ex.main
|
||||
def main(build_number, data_path, num_words, epochs, batch_size, pad_length, _run):
|
||||
abs_data_path = os.path.abspath(data_path)
|
||||
train_data = load_data(abs_data_path, 'train_data.csv')
|
||||
test_data = load_data(abs_data_path, 'test_data.csv')
|
||||
x_train, _ = split_data(train_data)
|
||||
x_test, y_test = split_data(test_data)
|
||||
x_test, _ = tokenize(pd.concat([x_train, x_test]), x_test)
|
||||
model = load_trained_model()
|
||||
evaluate_and_save(model, x_test, y_test, abs_data_path)
|
||||
generate_and_save_comparison(abs_data_path)
|
||||
with mlflow.start_run() as mlflow_run:
|
||||
print("MLflow run experiment_id: {0}".format(mlflow_run.info.experiment_id))
|
||||
print("MLflow run artifact_uri: {0}".format(mlflow_run.info.artifact_uri))
|
||||
mlflow.log_param("build_number", build_number)
|
||||
mlflow.log_param("data_path", data_path)
|
||||
mlflow.log_param("num_words", num_words)
|
||||
mlflow.log_param("epochs", epochs)
|
||||
mlflow.log_param("batch_size", batch_size)
|
||||
mlflow.log_param("pad_length", pad_length)
|
||||
|
||||
abs_data_path = os.path.abspath(data_path)
|
||||
train_data = load_data(abs_data_path, 'train_data.csv')
|
||||
test_data = load_data(abs_data_path, 'test_data.csv')
|
||||
x_train, _ = split_data(train_data)
|
||||
x_test, y_test = split_data(test_data)
|
||||
x_test, _ = tokenize(pd.concat([x_train, x_test]), x_test)
|
||||
model = load_trained_model()
|
||||
evaluate_and_save(model, x_test, y_test, abs_data_path)
|
||||
generate_and_save_comparison(abs_data_path)
|
||||
|
||||
|
||||
ex.run()
|
||||
|
@ -1,6 +1,9 @@
|
||||
#!/usr/bin/python
|
||||
import os
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from keras.models import Sequential
|
||||
from keras import layers
|
||||
@ -9,6 +12,15 @@ from keras.preprocessing.sequence import pad_sequences
|
||||
from sacred.observers import MongoObserver
|
||||
from sacred.observers import FileStorageObserver
|
||||
from sacred import Experiment
|
||||
from mlflow.models.signature import infer_signature
|
||||
import mlflow
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.WARN)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
mlflow.set_tracking_uri("http://172.17.0.1:5000")
|
||||
mlflow.set_experiment("s444452")
|
||||
|
||||
ex = Experiment(name='s444452_fake_job_classification_training', save_git_info=False)
|
||||
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017',
|
||||
@ -80,15 +92,29 @@ def load_data(data_path, filename) -> pd.DataFrame:
|
||||
|
||||
@ex.main
|
||||
def main(data_path, num_words, epochs, batch_size, pad_length, _run):
|
||||
abs_data_path = os.path.abspath(data_path)
|
||||
train_data = load_data(abs_data_path, 'train_data.csv')
|
||||
test_data = load_data(abs_data_path, 'test_data.csv')
|
||||
x_train, y_train = split_data(train_data)
|
||||
x_test, _ = split_data(test_data)
|
||||
x_train, vocab_size = tokenize(pd.concat([x_train, x_test]), x_train)
|
||||
model = get_model(vocab_size)
|
||||
train_model(model, x_train, y_train)
|
||||
save_model(model)
|
||||
with mlflow.start_run() as mlflow_run:
|
||||
print("MLflow run experiment_id: {0}".format(mlflow_run.info.experiment_id))
|
||||
print("MLflow run artifact_uri: {0}".format(mlflow_run.info.artifact_uri))
|
||||
mlflow.log_param("data_path", data_path)
|
||||
mlflow.log_param("num_words", num_words)
|
||||
mlflow.log_param("epochs", epochs)
|
||||
mlflow.log_param("batch_size", batch_size)
|
||||
mlflow.log_param("pad_length", pad_length)
|
||||
|
||||
abs_data_path = os.path.abspath(data_path)
|
||||
train_data = load_data(abs_data_path, 'train_data.csv')
|
||||
test_data = load_data(abs_data_path, 'test_data.csv')
|
||||
x_train, y_train = split_data(train_data)
|
||||
x_test, _ = split_data(test_data)
|
||||
x_train, vocab_size = tokenize(pd.concat([x_train, x_test]), x_train)
|
||||
model = get_model(vocab_size)
|
||||
train_model(model, x_train, y_train)
|
||||
save_model(model)
|
||||
|
||||
signature = infer_signature(x_train, y_train)
|
||||
input_example = np.array(x_test[:20])
|
||||
mlflow.keras.log_model(model, "model", signature=signature, input_example=input_example)
|
||||
|
||||
|
||||
warnings.filterwarnings("ignore")
|
||||
ex.run()
|
||||
|
@ -1,12 +1,18 @@
|
||||
absl-py==1.0.0
|
||||
alembic==1.7.7
|
||||
astunparse==1.6.3
|
||||
cachetools==5.0.0
|
||||
certifi==2021.10.8
|
||||
charset-normalizer==2.0.12
|
||||
click==8.1.2
|
||||
cloudpickle==2.0.0
|
||||
colorama==0.4.4
|
||||
cycler==0.11.0
|
||||
databricks-cli==0.16.6
|
||||
docker==5.0.3
|
||||
docopt==0.6.2
|
||||
entrypoints==0.4
|
||||
Flask==2.1.2
|
||||
flatbuffers==2.0
|
||||
fonttools==4.33.3
|
||||
gast==0.5.3
|
||||
@ -15,10 +21,14 @@ GitPython==3.1.27
|
||||
google-auth==2.6.6
|
||||
google-auth-oauthlib==0.4.6
|
||||
google-pasta==0.2.0
|
||||
greenlet==1.1.2
|
||||
grpcio==1.44.0
|
||||
gunicorn==20.1.0
|
||||
h5py==3.6.0
|
||||
idna==3.3
|
||||
importlib-metadata==4.11.3
|
||||
itsdangerous==2.1.2
|
||||
Jinja2==3.1.2
|
||||
joblib==1.1.0
|
||||
jsonpickle==1.5.2
|
||||
kaggle==1.5.12
|
||||
@ -26,8 +36,11 @@ keras==2.8.0
|
||||
Keras-Preprocessing==1.1.2
|
||||
kiwisolver==1.4.2
|
||||
libclang==14.0.1
|
||||
Mako==1.2.0
|
||||
Markdown==3.3.6
|
||||
MarkupSafe==2.1.1
|
||||
matplotlib==3.5.2
|
||||
mlflow==1.25.1
|
||||
munch==2.5.0
|
||||
nltk==3.7
|
||||
numpy==1.22.3
|
||||
@ -36,15 +49,20 @@ opt-einsum==3.3.0
|
||||
packaging==21.3
|
||||
pandas==1.4.2
|
||||
Pillow==9.1.0
|
||||
prometheus-client==0.14.1
|
||||
prometheus-flask-exporter==0.20.1
|
||||
protobuf==3.20.1
|
||||
py-cpuinfo==8.0.0
|
||||
pyasn1==0.4.8
|
||||
pyasn1-modules==0.2.8
|
||||
PyJWT==2.3.0
|
||||
pymongo==4.1.1
|
||||
pyparsing==3.0.8
|
||||
python-dateutil==2.8.2
|
||||
python-slugify==6.1.1
|
||||
pytz==2022.1
|
||||
PyYAML==6.0
|
||||
querystring-parser==1.2.4
|
||||
regex==2022.3.15
|
||||
requests==2.27.1
|
||||
requests-oauthlib==1.3.1
|
||||
@ -55,6 +73,9 @@ scipy==1.8.0
|
||||
six==1.16.0
|
||||
sklearn==0.0
|
||||
smmap==5.0.0
|
||||
SQLAlchemy==1.4.36
|
||||
sqlparse==0.4.2
|
||||
tabulate==0.8.9
|
||||
tensorboard==2.8.0
|
||||
tensorboard-data-server==0.6.1
|
||||
tensorboard-plugin-wit==1.8.1
|
||||
@ -67,6 +88,7 @@ threadpoolctl==3.1.0
|
||||
tqdm==4.64.0
|
||||
typing_extensions==4.2.0
|
||||
urllib3==1.26.9
|
||||
websocket-client==1.3.2
|
||||
Werkzeug==2.1.1
|
||||
wrapt==1.14.0
|
||||
zipp==3.8.0
|
||||
|
Loading…
Reference in New Issue
Block a user