mlflow
Some checks failed
s444452-training/pipeline/head There was a failure building this commit

This commit is contained in:
AdamOsiowy123 2022-05-15 22:20:18 +02:00
parent 7dfbf0d939
commit 18e26fed6b
8 changed files with 120 additions and 23 deletions

View File

@ -1,6 +1,6 @@
node {
checkout scm
def dockerImageIUM = docker.build("s444452/ium:1.3")
def dockerImageIUM = docker.build("s444452/ium:1.4")
dockerImageIUM.inside {
stage('Preparation') {
properties([

View File

@ -1,7 +1,7 @@
node {
checkout scm
try {
docker.image('s444452/ium:1.3').inside {
docker.image('s444452/ium:1.4').inside {
stage('Preparation') {
properties([
parameters([

View File

@ -1,5 +1,5 @@
node {
docker.image('s444452/ium:1.3').inside {
docker.image('s444452/ium:1.4').inside {
stage('Preparation') {
properties([parameters([
buildSelector(

View File

@ -1,7 +1,7 @@
node {
checkout scm
try {
docker.image('s444452/ium:1.3').inside {
docker.image('s444452/ium:1.4').inside {
stage('Preparation') {
properties([
pipelineTriggers([upstream(threshold: hudson.model.Result.SUCCESS, upstreamProjects: "s444452-create-dataset")]),

23
MLproject Normal file
View File

@ -0,0 +1,23 @@
name: s444452_fake_job_classification
docker_env:
image: s444452/ium:1.4
entry_points:
train:
parameters:
data_path: path
epochs: {type: float, default: 1}
num_words: {type: float, default: 20000}
batch_size: {type: float, default: 150}
pad_length: {type: float, default: 300}
command: "python3 Scripts/train_neural_network.py {data_path} {epochs} {num_words} {batch_size} {pad_length}"
evaluate:
parameters:
build_nr: {type: float, default: 0}
data_path: path
epochs: {type: float, default: 1}
num_words: {type: float, default: 20000}
batch_size: {type: float, default: 150}
pad_length: {type: float, default: 300}
command: "python3 Scripts/evaluate_neural_network.py {build_nr} {data_path} {epochs} {num_words} {batch_size} {pad_length}"

View File

@ -11,6 +11,14 @@ import matplotlib.pyplot as plt
from sacred.observers import MongoObserver
from sacred.observers import FileStorageObserver
from sacred import Experiment
import mlflow
import logging
logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)
mlflow.set_tracking_uri("http://172.17.0.1:5000")
mlflow.set_experiment("s444452")
ex = Experiment(name='s444452_fake_job_classification_evaluation', save_git_info=False)
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017',
@ -51,8 +59,16 @@ def evaluate_and_save(model, x, y, abs_path, build_number, _run):
y_predicted = (model.predict(x) >= 0.5).astype(int)
evaluation_file_path = os.path.join(abs_path, 'neural_network_evaluation.csv')
with open(evaluation_file_path, 'a+') as f:
result = f'{build_number},{accuracy},{loss},{precision_score(y, y_predicted)},{recall_score(y, y_predicted)},{f1_score(y, y_predicted)}'
precision = precision_score(y, y_predicted)
recall = recall_score(y, y_predicted)
f1 = f1_score(y, y_predicted)
result = f'{build_number},{accuracy},{loss},{precision},{recall},{f1}'
f.write(result + '\n')
mlflow.log_metric("accuracy", accuracy)
mlflow.log_metric("loss", loss)
mlflow.log_metric("precision", precision)
mlflow.log_metric("recall", recall)
mlflow.log_metric("f1_score", f1)
# ex.log_scalar("loss", loss)
_run.log_scalar("training.loss", loss)
# ex.log_scalar("accuracy", accuracy)
@ -102,15 +118,25 @@ def load_data(data_path, filename) -> pd.DataFrame:
@ex.main
def main(build_number, data_path, num_words, epochs, batch_size, pad_length, _run):
abs_data_path = os.path.abspath(data_path)
train_data = load_data(abs_data_path, 'train_data.csv')
test_data = load_data(abs_data_path, 'test_data.csv')
x_train, _ = split_data(train_data)
x_test, y_test = split_data(test_data)
x_test, _ = tokenize(pd.concat([x_train, x_test]), x_test)
model = load_trained_model()
evaluate_and_save(model, x_test, y_test, abs_data_path)
generate_and_save_comparison(abs_data_path)
with mlflow.start_run() as mlflow_run:
print("MLflow run experiment_id: {0}".format(mlflow_run.info.experiment_id))
print("MLflow run artifact_uri: {0}".format(mlflow_run.info.artifact_uri))
mlflow.log_param("build_number", build_number)
mlflow.log_param("data_path", data_path)
mlflow.log_param("num_words", num_words)
mlflow.log_param("epochs", epochs)
mlflow.log_param("batch_size", batch_size)
mlflow.log_param("pad_length", pad_length)
abs_data_path = os.path.abspath(data_path)
train_data = load_data(abs_data_path, 'train_data.csv')
test_data = load_data(abs_data_path, 'test_data.csv')
x_train, _ = split_data(train_data)
x_test, y_test = split_data(test_data)
x_test, _ = tokenize(pd.concat([x_train, x_test]), x_test)
model = load_trained_model()
evaluate_and_save(model, x_test, y_test, abs_data_path)
generate_and_save_comparison(abs_data_path)
ex.run()

View File

@ -1,6 +1,9 @@
#!/usr/bin/python
import os
import sys
import warnings
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras import layers
@ -9,6 +12,15 @@ from keras.preprocessing.sequence import pad_sequences
from sacred.observers import MongoObserver
from sacred.observers import FileStorageObserver
from sacred import Experiment
from mlflow.models.signature import infer_signature
import mlflow
import logging
logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)
mlflow.set_tracking_uri("http://172.17.0.1:5000")
mlflow.set_experiment("s444452")
ex = Experiment(name='s444452_fake_job_classification_training', save_git_info=False)
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017',
@ -80,15 +92,29 @@ def load_data(data_path, filename) -> pd.DataFrame:
@ex.main
def main(data_path, num_words, epochs, batch_size, pad_length, _run):
abs_data_path = os.path.abspath(data_path)
train_data = load_data(abs_data_path, 'train_data.csv')
test_data = load_data(abs_data_path, 'test_data.csv')
x_train, y_train = split_data(train_data)
x_test, _ = split_data(test_data)
x_train, vocab_size = tokenize(pd.concat([x_train, x_test]), x_train)
model = get_model(vocab_size)
train_model(model, x_train, y_train)
save_model(model)
with mlflow.start_run() as mlflow_run:
print("MLflow run experiment_id: {0}".format(mlflow_run.info.experiment_id))
print("MLflow run artifact_uri: {0}".format(mlflow_run.info.artifact_uri))
mlflow.log_param("data_path", data_path)
mlflow.log_param("num_words", num_words)
mlflow.log_param("epochs", epochs)
mlflow.log_param("batch_size", batch_size)
mlflow.log_param("pad_length", pad_length)
abs_data_path = os.path.abspath(data_path)
train_data = load_data(abs_data_path, 'train_data.csv')
test_data = load_data(abs_data_path, 'test_data.csv')
x_train, y_train = split_data(train_data)
x_test, _ = split_data(test_data)
x_train, vocab_size = tokenize(pd.concat([x_train, x_test]), x_train)
model = get_model(vocab_size)
train_model(model, x_train, y_train)
save_model(model)
signature = infer_signature(x_train, y_train)
input_example = np.array(x_test[:20])
mlflow.keras.log_model(model, "model", signature=signature, input_example=input_example)
warnings.filterwarnings("ignore")
ex.run()

View File

@ -1,12 +1,18 @@
absl-py==1.0.0
alembic==1.7.7
astunparse==1.6.3
cachetools==5.0.0
certifi==2021.10.8
charset-normalizer==2.0.12
click==8.1.2
cloudpickle==2.0.0
colorama==0.4.4
cycler==0.11.0
databricks-cli==0.16.6
docker==5.0.3
docopt==0.6.2
entrypoints==0.4
Flask==2.1.2
flatbuffers==2.0
fonttools==4.33.3
gast==0.5.3
@ -15,10 +21,14 @@ GitPython==3.1.27
google-auth==2.6.6
google-auth-oauthlib==0.4.6
google-pasta==0.2.0
greenlet==1.1.2
grpcio==1.44.0
gunicorn==20.1.0
h5py==3.6.0
idna==3.3
importlib-metadata==4.11.3
itsdangerous==2.1.2
Jinja2==3.1.2
joblib==1.1.0
jsonpickle==1.5.2
kaggle==1.5.12
@ -26,8 +36,11 @@ keras==2.8.0
Keras-Preprocessing==1.1.2
kiwisolver==1.4.2
libclang==14.0.1
Mako==1.2.0
Markdown==3.3.6
MarkupSafe==2.1.1
matplotlib==3.5.2
mlflow==1.25.1
munch==2.5.0
nltk==3.7
numpy==1.22.3
@ -36,15 +49,20 @@ opt-einsum==3.3.0
packaging==21.3
pandas==1.4.2
Pillow==9.1.0
prometheus-client==0.14.1
prometheus-flask-exporter==0.20.1
protobuf==3.20.1
py-cpuinfo==8.0.0
pyasn1==0.4.8
pyasn1-modules==0.2.8
PyJWT==2.3.0
pymongo==4.1.1
pyparsing==3.0.8
python-dateutil==2.8.2
python-slugify==6.1.1
pytz==2022.1
PyYAML==6.0
querystring-parser==1.2.4
regex==2022.3.15
requests==2.27.1
requests-oauthlib==1.3.1
@ -55,6 +73,9 @@ scipy==1.8.0
six==1.16.0
sklearn==0.0
smmap==5.0.0
SQLAlchemy==1.4.36
sqlparse==0.4.2
tabulate==0.8.9
tensorboard==2.8.0
tensorboard-data-server==0.6.1
tensorboard-plugin-wit==1.8.1
@ -67,6 +88,7 @@ threadpoolctl==3.1.0
tqdm==4.64.0
typing_extensions==4.2.0
urllib3==1.26.9
websocket-client==1.3.2
Werkzeug==2.1.1
wrapt==1.14.0
zipp==3.8.0