mlflow
Some checks failed
s444452-training/pipeline/head There was a failure building this commit

This commit is contained in:
AdamOsiowy123 2022-05-15 22:20:18 +02:00
parent 7dfbf0d939
commit 18e26fed6b
8 changed files with 120 additions and 23 deletions

View File

@ -1,6 +1,6 @@
node { node {
checkout scm checkout scm
def dockerImageIUM = docker.build("s444452/ium:1.3") def dockerImageIUM = docker.build("s444452/ium:1.4")
dockerImageIUM.inside { dockerImageIUM.inside {
stage('Preparation') { stage('Preparation') {
properties([ properties([

View File

@ -1,7 +1,7 @@
node { node {
checkout scm checkout scm
try { try {
docker.image('s444452/ium:1.3').inside { docker.image('s444452/ium:1.4').inside {
stage('Preparation') { stage('Preparation') {
properties([ properties([
parameters([ parameters([

View File

@ -1,5 +1,5 @@
node { node {
docker.image('s444452/ium:1.3').inside { docker.image('s444452/ium:1.4').inside {
stage('Preparation') { stage('Preparation') {
properties([parameters([ properties([parameters([
buildSelector( buildSelector(

View File

@ -1,7 +1,7 @@
node { node {
checkout scm checkout scm
try { try {
docker.image('s444452/ium:1.3').inside { docker.image('s444452/ium:1.4').inside {
stage('Preparation') { stage('Preparation') {
properties([ properties([
pipelineTriggers([upstream(threshold: hudson.model.Result.SUCCESS, upstreamProjects: "s444452-create-dataset")]), pipelineTriggers([upstream(threshold: hudson.model.Result.SUCCESS, upstreamProjects: "s444452-create-dataset")]),

23
MLproject Normal file
View File

@ -0,0 +1,23 @@
name: s444452_fake_job_classification
docker_env:
image: s444452/ium:1.4
entry_points:
train:
parameters:
data_path: path
epochs: {type: float, default: 1}
num_words: {type: float, default: 20000}
batch_size: {type: float, default: 150}
pad_length: {type: float, default: 300}
command: "python3 Scripts/train_neural_network.py {data_path} {epochs} {num_words} {batch_size} {pad_length}"
evaluate:
parameters:
build_nr: {type: float, default: 0}
data_path: path
epochs: {type: float, default: 1}
num_words: {type: float, default: 20000}
batch_size: {type: float, default: 150}
pad_length: {type: float, default: 300}
command: "python3 Scripts/evaluate_neural_network.py {build_nr} {data_path} {epochs} {num_words} {batch_size} {pad_length}"

View File

@ -11,6 +11,14 @@ import matplotlib.pyplot as plt
from sacred.observers import MongoObserver from sacred.observers import MongoObserver
from sacred.observers import FileStorageObserver from sacred.observers import FileStorageObserver
from sacred import Experiment from sacred import Experiment
import mlflow
import logging
logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)
mlflow.set_tracking_uri("http://172.17.0.1:5000")
mlflow.set_experiment("s444452")
ex = Experiment(name='s444452_fake_job_classification_evaluation', save_git_info=False) ex = Experiment(name='s444452_fake_job_classification_evaluation', save_git_info=False)
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017', ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017',
@ -51,8 +59,16 @@ def evaluate_and_save(model, x, y, abs_path, build_number, _run):
y_predicted = (model.predict(x) >= 0.5).astype(int) y_predicted = (model.predict(x) >= 0.5).astype(int)
evaluation_file_path = os.path.join(abs_path, 'neural_network_evaluation.csv') evaluation_file_path = os.path.join(abs_path, 'neural_network_evaluation.csv')
with open(evaluation_file_path, 'a+') as f: with open(evaluation_file_path, 'a+') as f:
result = f'{build_number},{accuracy},{loss},{precision_score(y, y_predicted)},{recall_score(y, y_predicted)},{f1_score(y, y_predicted)}' precision = precision_score(y, y_predicted)
recall = recall_score(y, y_predicted)
f1 = f1_score(y, y_predicted)
result = f'{build_number},{accuracy},{loss},{precision},{recall},{f1}'
f.write(result + '\n') f.write(result + '\n')
mlflow.log_metric("accuracy", accuracy)
mlflow.log_metric("loss", loss)
mlflow.log_metric("precision", precision)
mlflow.log_metric("recall", recall)
mlflow.log_metric("f1_score", f1)
# ex.log_scalar("loss", loss) # ex.log_scalar("loss", loss)
_run.log_scalar("training.loss", loss) _run.log_scalar("training.loss", loss)
# ex.log_scalar("accuracy", accuracy) # ex.log_scalar("accuracy", accuracy)
@ -102,6 +118,16 @@ def load_data(data_path, filename) -> pd.DataFrame:
@ex.main @ex.main
def main(build_number, data_path, num_words, epochs, batch_size, pad_length, _run): def main(build_number, data_path, num_words, epochs, batch_size, pad_length, _run):
with mlflow.start_run() as mlflow_run:
print("MLflow run experiment_id: {0}".format(mlflow_run.info.experiment_id))
print("MLflow run artifact_uri: {0}".format(mlflow_run.info.artifact_uri))
mlflow.log_param("build_number", build_number)
mlflow.log_param("data_path", data_path)
mlflow.log_param("num_words", num_words)
mlflow.log_param("epochs", epochs)
mlflow.log_param("batch_size", batch_size)
mlflow.log_param("pad_length", pad_length)
abs_data_path = os.path.abspath(data_path) abs_data_path = os.path.abspath(data_path)
train_data = load_data(abs_data_path, 'train_data.csv') train_data = load_data(abs_data_path, 'train_data.csv')
test_data = load_data(abs_data_path, 'test_data.csv') test_data = load_data(abs_data_path, 'test_data.csv')

View File

@ -1,6 +1,9 @@
#!/usr/bin/python #!/usr/bin/python
import os import os
import sys import sys
import warnings
import numpy as np
import pandas as pd import pandas as pd
from keras.models import Sequential from keras.models import Sequential
from keras import layers from keras import layers
@ -9,6 +12,15 @@ from keras.preprocessing.sequence import pad_sequences
from sacred.observers import MongoObserver from sacred.observers import MongoObserver
from sacred.observers import FileStorageObserver from sacred.observers import FileStorageObserver
from sacred import Experiment from sacred import Experiment
from mlflow.models.signature import infer_signature
import mlflow
import logging
logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)
mlflow.set_tracking_uri("http://172.17.0.1:5000")
mlflow.set_experiment("s444452")
ex = Experiment(name='s444452_fake_job_classification_training', save_git_info=False) ex = Experiment(name='s444452_fake_job_classification_training', save_git_info=False)
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017', ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017',
@ -80,6 +92,15 @@ def load_data(data_path, filename) -> pd.DataFrame:
@ex.main @ex.main
def main(data_path, num_words, epochs, batch_size, pad_length, _run): def main(data_path, num_words, epochs, batch_size, pad_length, _run):
with mlflow.start_run() as mlflow_run:
print("MLflow run experiment_id: {0}".format(mlflow_run.info.experiment_id))
print("MLflow run artifact_uri: {0}".format(mlflow_run.info.artifact_uri))
mlflow.log_param("data_path", data_path)
mlflow.log_param("num_words", num_words)
mlflow.log_param("epochs", epochs)
mlflow.log_param("batch_size", batch_size)
mlflow.log_param("pad_length", pad_length)
abs_data_path = os.path.abspath(data_path) abs_data_path = os.path.abspath(data_path)
train_data = load_data(abs_data_path, 'train_data.csv') train_data = load_data(abs_data_path, 'train_data.csv')
test_data = load_data(abs_data_path, 'test_data.csv') test_data = load_data(abs_data_path, 'test_data.csv')
@ -90,5 +111,10 @@ def main(data_path, num_words, epochs, batch_size, pad_length, _run):
train_model(model, x_train, y_train) train_model(model, x_train, y_train)
save_model(model) save_model(model)
signature = infer_signature(x_train, y_train)
input_example = np.array(x_test[:20])
mlflow.keras.log_model(model, "model", signature=signature, input_example=input_example)
warnings.filterwarnings("ignore")
ex.run() ex.run()

View File

@ -1,12 +1,18 @@
absl-py==1.0.0 absl-py==1.0.0
alembic==1.7.7
astunparse==1.6.3 astunparse==1.6.3
cachetools==5.0.0 cachetools==5.0.0
certifi==2021.10.8 certifi==2021.10.8
charset-normalizer==2.0.12 charset-normalizer==2.0.12
click==8.1.2 click==8.1.2
cloudpickle==2.0.0
colorama==0.4.4 colorama==0.4.4
cycler==0.11.0 cycler==0.11.0
databricks-cli==0.16.6
docker==5.0.3
docopt==0.6.2 docopt==0.6.2
entrypoints==0.4
Flask==2.1.2
flatbuffers==2.0 flatbuffers==2.0
fonttools==4.33.3 fonttools==4.33.3
gast==0.5.3 gast==0.5.3
@ -15,10 +21,14 @@ GitPython==3.1.27
google-auth==2.6.6 google-auth==2.6.6
google-auth-oauthlib==0.4.6 google-auth-oauthlib==0.4.6
google-pasta==0.2.0 google-pasta==0.2.0
greenlet==1.1.2
grpcio==1.44.0 grpcio==1.44.0
gunicorn==20.1.0
h5py==3.6.0 h5py==3.6.0
idna==3.3 idna==3.3
importlib-metadata==4.11.3 importlib-metadata==4.11.3
itsdangerous==2.1.2
Jinja2==3.1.2
joblib==1.1.0 joblib==1.1.0
jsonpickle==1.5.2 jsonpickle==1.5.2
kaggle==1.5.12 kaggle==1.5.12
@ -26,8 +36,11 @@ keras==2.8.0
Keras-Preprocessing==1.1.2 Keras-Preprocessing==1.1.2
kiwisolver==1.4.2 kiwisolver==1.4.2
libclang==14.0.1 libclang==14.0.1
Mako==1.2.0
Markdown==3.3.6 Markdown==3.3.6
MarkupSafe==2.1.1
matplotlib==3.5.2 matplotlib==3.5.2
mlflow==1.25.1
munch==2.5.0 munch==2.5.0
nltk==3.7 nltk==3.7
numpy==1.22.3 numpy==1.22.3
@ -36,15 +49,20 @@ opt-einsum==3.3.0
packaging==21.3 packaging==21.3
pandas==1.4.2 pandas==1.4.2
Pillow==9.1.0 Pillow==9.1.0
prometheus-client==0.14.1
prometheus-flask-exporter==0.20.1
protobuf==3.20.1 protobuf==3.20.1
py-cpuinfo==8.0.0 py-cpuinfo==8.0.0
pyasn1==0.4.8 pyasn1==0.4.8
pyasn1-modules==0.2.8 pyasn1-modules==0.2.8
PyJWT==2.3.0
pymongo==4.1.1 pymongo==4.1.1
pyparsing==3.0.8 pyparsing==3.0.8
python-dateutil==2.8.2 python-dateutil==2.8.2
python-slugify==6.1.1 python-slugify==6.1.1
pytz==2022.1 pytz==2022.1
PyYAML==6.0
querystring-parser==1.2.4
regex==2022.3.15 regex==2022.3.15
requests==2.27.1 requests==2.27.1
requests-oauthlib==1.3.1 requests-oauthlib==1.3.1
@ -55,6 +73,9 @@ scipy==1.8.0
six==1.16.0 six==1.16.0
sklearn==0.0 sklearn==0.0
smmap==5.0.0 smmap==5.0.0
SQLAlchemy==1.4.36
sqlparse==0.4.2
tabulate==0.8.9
tensorboard==2.8.0 tensorboard==2.8.0
tensorboard-data-server==0.6.1 tensorboard-data-server==0.6.1
tensorboard-plugin-wit==1.8.1 tensorboard-plugin-wit==1.8.1
@ -67,6 +88,7 @@ threadpoolctl==3.1.0
tqdm==4.64.0 tqdm==4.64.0
typing_extensions==4.2.0 typing_extensions==4.2.0
urllib3==1.26.9 urllib3==1.26.9
websocket-client==1.3.2
Werkzeug==2.1.1 Werkzeug==2.1.1
wrapt==1.14.0 wrapt==1.14.0
zipp==3.8.0 zipp==3.8.0