This commit is contained in:
parent
7dfbf0d939
commit
18e26fed6b
@ -1,6 +1,6 @@
|
|||||||
node {
|
node {
|
||||||
checkout scm
|
checkout scm
|
||||||
def dockerImageIUM = docker.build("s444452/ium:1.3")
|
def dockerImageIUM = docker.build("s444452/ium:1.4")
|
||||||
dockerImageIUM.inside {
|
dockerImageIUM.inside {
|
||||||
stage('Preparation') {
|
stage('Preparation') {
|
||||||
properties([
|
properties([
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
node {
|
node {
|
||||||
checkout scm
|
checkout scm
|
||||||
try {
|
try {
|
||||||
docker.image('s444452/ium:1.3').inside {
|
docker.image('s444452/ium:1.4').inside {
|
||||||
stage('Preparation') {
|
stage('Preparation') {
|
||||||
properties([
|
properties([
|
||||||
parameters([
|
parameters([
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
node {
|
node {
|
||||||
docker.image('s444452/ium:1.3').inside {
|
docker.image('s444452/ium:1.4').inside {
|
||||||
stage('Preparation') {
|
stage('Preparation') {
|
||||||
properties([parameters([
|
properties([parameters([
|
||||||
buildSelector(
|
buildSelector(
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
node {
|
node {
|
||||||
checkout scm
|
checkout scm
|
||||||
try {
|
try {
|
||||||
docker.image('s444452/ium:1.3').inside {
|
docker.image('s444452/ium:1.4').inside {
|
||||||
stage('Preparation') {
|
stage('Preparation') {
|
||||||
properties([
|
properties([
|
||||||
pipelineTriggers([upstream(threshold: hudson.model.Result.SUCCESS, upstreamProjects: "s444452-create-dataset")]),
|
pipelineTriggers([upstream(threshold: hudson.model.Result.SUCCESS, upstreamProjects: "s444452-create-dataset")]),
|
||||||
|
23
MLproject
Normal file
23
MLproject
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
name: s444452_fake_job_classification
|
||||||
|
|
||||||
|
docker_env:
|
||||||
|
image: s444452/ium:1.4
|
||||||
|
|
||||||
|
entry_points:
|
||||||
|
train:
|
||||||
|
parameters:
|
||||||
|
data_path: path
|
||||||
|
epochs: {type: float, default: 1}
|
||||||
|
num_words: {type: float, default: 20000}
|
||||||
|
batch_size: {type: float, default: 150}
|
||||||
|
pad_length: {type: float, default: 300}
|
||||||
|
command: "python3 Scripts/train_neural_network.py {data_path} {epochs} {num_words} {batch_size} {pad_length}"
|
||||||
|
evaluate:
|
||||||
|
parameters:
|
||||||
|
build_nr: {type: float, default: 0}
|
||||||
|
data_path: path
|
||||||
|
epochs: {type: float, default: 1}
|
||||||
|
num_words: {type: float, default: 20000}
|
||||||
|
batch_size: {type: float, default: 150}
|
||||||
|
pad_length: {type: float, default: 300}
|
||||||
|
command: "python3 Scripts/evaluate_neural_network.py {build_nr} {data_path} {epochs} {num_words} {batch_size} {pad_length}"
|
@ -11,6 +11,14 @@ import matplotlib.pyplot as plt
|
|||||||
from sacred.observers import MongoObserver
|
from sacred.observers import MongoObserver
|
||||||
from sacred.observers import FileStorageObserver
|
from sacred.observers import FileStorageObserver
|
||||||
from sacred import Experiment
|
from sacred import Experiment
|
||||||
|
import mlflow
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.WARN)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
mlflow.set_tracking_uri("http://172.17.0.1:5000")
|
||||||
|
mlflow.set_experiment("s444452")
|
||||||
|
|
||||||
ex = Experiment(name='s444452_fake_job_classification_evaluation', save_git_info=False)
|
ex = Experiment(name='s444452_fake_job_classification_evaluation', save_git_info=False)
|
||||||
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017',
|
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017',
|
||||||
@ -51,8 +59,16 @@ def evaluate_and_save(model, x, y, abs_path, build_number, _run):
|
|||||||
y_predicted = (model.predict(x) >= 0.5).astype(int)
|
y_predicted = (model.predict(x) >= 0.5).astype(int)
|
||||||
evaluation_file_path = os.path.join(abs_path, 'neural_network_evaluation.csv')
|
evaluation_file_path = os.path.join(abs_path, 'neural_network_evaluation.csv')
|
||||||
with open(evaluation_file_path, 'a+') as f:
|
with open(evaluation_file_path, 'a+') as f:
|
||||||
result = f'{build_number},{accuracy},{loss},{precision_score(y, y_predicted)},{recall_score(y, y_predicted)},{f1_score(y, y_predicted)}'
|
precision = precision_score(y, y_predicted)
|
||||||
|
recall = recall_score(y, y_predicted)
|
||||||
|
f1 = f1_score(y, y_predicted)
|
||||||
|
result = f'{build_number},{accuracy},{loss},{precision},{recall},{f1}'
|
||||||
f.write(result + '\n')
|
f.write(result + '\n')
|
||||||
|
mlflow.log_metric("accuracy", accuracy)
|
||||||
|
mlflow.log_metric("loss", loss)
|
||||||
|
mlflow.log_metric("precision", precision)
|
||||||
|
mlflow.log_metric("recall", recall)
|
||||||
|
mlflow.log_metric("f1_score", f1)
|
||||||
# ex.log_scalar("loss", loss)
|
# ex.log_scalar("loss", loss)
|
||||||
_run.log_scalar("training.loss", loss)
|
_run.log_scalar("training.loss", loss)
|
||||||
# ex.log_scalar("accuracy", accuracy)
|
# ex.log_scalar("accuracy", accuracy)
|
||||||
@ -102,6 +118,16 @@ def load_data(data_path, filename) -> pd.DataFrame:
|
|||||||
|
|
||||||
@ex.main
|
@ex.main
|
||||||
def main(build_number, data_path, num_words, epochs, batch_size, pad_length, _run):
|
def main(build_number, data_path, num_words, epochs, batch_size, pad_length, _run):
|
||||||
|
with mlflow.start_run() as mlflow_run:
|
||||||
|
print("MLflow run experiment_id: {0}".format(mlflow_run.info.experiment_id))
|
||||||
|
print("MLflow run artifact_uri: {0}".format(mlflow_run.info.artifact_uri))
|
||||||
|
mlflow.log_param("build_number", build_number)
|
||||||
|
mlflow.log_param("data_path", data_path)
|
||||||
|
mlflow.log_param("num_words", num_words)
|
||||||
|
mlflow.log_param("epochs", epochs)
|
||||||
|
mlflow.log_param("batch_size", batch_size)
|
||||||
|
mlflow.log_param("pad_length", pad_length)
|
||||||
|
|
||||||
abs_data_path = os.path.abspath(data_path)
|
abs_data_path = os.path.abspath(data_path)
|
||||||
train_data = load_data(abs_data_path, 'train_data.csv')
|
train_data = load_data(abs_data_path, 'train_data.csv')
|
||||||
test_data = load_data(abs_data_path, 'test_data.csv')
|
test_data = load_data(abs_data_path, 'test_data.csv')
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from keras.models import Sequential
|
from keras.models import Sequential
|
||||||
from keras import layers
|
from keras import layers
|
||||||
@ -9,6 +12,15 @@ from keras.preprocessing.sequence import pad_sequences
|
|||||||
from sacred.observers import MongoObserver
|
from sacred.observers import MongoObserver
|
||||||
from sacred.observers import FileStorageObserver
|
from sacred.observers import FileStorageObserver
|
||||||
from sacred import Experiment
|
from sacred import Experiment
|
||||||
|
from mlflow.models.signature import infer_signature
|
||||||
|
import mlflow
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.WARN)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
mlflow.set_tracking_uri("http://172.17.0.1:5000")
|
||||||
|
mlflow.set_experiment("s444452")
|
||||||
|
|
||||||
ex = Experiment(name='s444452_fake_job_classification_training', save_git_info=False)
|
ex = Experiment(name='s444452_fake_job_classification_training', save_git_info=False)
|
||||||
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017',
|
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017',
|
||||||
@ -80,6 +92,15 @@ def load_data(data_path, filename) -> pd.DataFrame:
|
|||||||
|
|
||||||
@ex.main
|
@ex.main
|
||||||
def main(data_path, num_words, epochs, batch_size, pad_length, _run):
|
def main(data_path, num_words, epochs, batch_size, pad_length, _run):
|
||||||
|
with mlflow.start_run() as mlflow_run:
|
||||||
|
print("MLflow run experiment_id: {0}".format(mlflow_run.info.experiment_id))
|
||||||
|
print("MLflow run artifact_uri: {0}".format(mlflow_run.info.artifact_uri))
|
||||||
|
mlflow.log_param("data_path", data_path)
|
||||||
|
mlflow.log_param("num_words", num_words)
|
||||||
|
mlflow.log_param("epochs", epochs)
|
||||||
|
mlflow.log_param("batch_size", batch_size)
|
||||||
|
mlflow.log_param("pad_length", pad_length)
|
||||||
|
|
||||||
abs_data_path = os.path.abspath(data_path)
|
abs_data_path = os.path.abspath(data_path)
|
||||||
train_data = load_data(abs_data_path, 'train_data.csv')
|
train_data = load_data(abs_data_path, 'train_data.csv')
|
||||||
test_data = load_data(abs_data_path, 'test_data.csv')
|
test_data = load_data(abs_data_path, 'test_data.csv')
|
||||||
@ -90,5 +111,10 @@ def main(data_path, num_words, epochs, batch_size, pad_length, _run):
|
|||||||
train_model(model, x_train, y_train)
|
train_model(model, x_train, y_train)
|
||||||
save_model(model)
|
save_model(model)
|
||||||
|
|
||||||
|
signature = infer_signature(x_train, y_train)
|
||||||
|
input_example = np.array(x_test[:20])
|
||||||
|
mlflow.keras.log_model(model, "model", signature=signature, input_example=input_example)
|
||||||
|
|
||||||
|
|
||||||
|
warnings.filterwarnings("ignore")
|
||||||
ex.run()
|
ex.run()
|
||||||
|
@ -1,12 +1,18 @@
|
|||||||
absl-py==1.0.0
|
absl-py==1.0.0
|
||||||
|
alembic==1.7.7
|
||||||
astunparse==1.6.3
|
astunparse==1.6.3
|
||||||
cachetools==5.0.0
|
cachetools==5.0.0
|
||||||
certifi==2021.10.8
|
certifi==2021.10.8
|
||||||
charset-normalizer==2.0.12
|
charset-normalizer==2.0.12
|
||||||
click==8.1.2
|
click==8.1.2
|
||||||
|
cloudpickle==2.0.0
|
||||||
colorama==0.4.4
|
colorama==0.4.4
|
||||||
cycler==0.11.0
|
cycler==0.11.0
|
||||||
|
databricks-cli==0.16.6
|
||||||
|
docker==5.0.3
|
||||||
docopt==0.6.2
|
docopt==0.6.2
|
||||||
|
entrypoints==0.4
|
||||||
|
Flask==2.1.2
|
||||||
flatbuffers==2.0
|
flatbuffers==2.0
|
||||||
fonttools==4.33.3
|
fonttools==4.33.3
|
||||||
gast==0.5.3
|
gast==0.5.3
|
||||||
@ -15,10 +21,14 @@ GitPython==3.1.27
|
|||||||
google-auth==2.6.6
|
google-auth==2.6.6
|
||||||
google-auth-oauthlib==0.4.6
|
google-auth-oauthlib==0.4.6
|
||||||
google-pasta==0.2.0
|
google-pasta==0.2.0
|
||||||
|
greenlet==1.1.2
|
||||||
grpcio==1.44.0
|
grpcio==1.44.0
|
||||||
|
gunicorn==20.1.0
|
||||||
h5py==3.6.0
|
h5py==3.6.0
|
||||||
idna==3.3
|
idna==3.3
|
||||||
importlib-metadata==4.11.3
|
importlib-metadata==4.11.3
|
||||||
|
itsdangerous==2.1.2
|
||||||
|
Jinja2==3.1.2
|
||||||
joblib==1.1.0
|
joblib==1.1.0
|
||||||
jsonpickle==1.5.2
|
jsonpickle==1.5.2
|
||||||
kaggle==1.5.12
|
kaggle==1.5.12
|
||||||
@ -26,8 +36,11 @@ keras==2.8.0
|
|||||||
Keras-Preprocessing==1.1.2
|
Keras-Preprocessing==1.1.2
|
||||||
kiwisolver==1.4.2
|
kiwisolver==1.4.2
|
||||||
libclang==14.0.1
|
libclang==14.0.1
|
||||||
|
Mako==1.2.0
|
||||||
Markdown==3.3.6
|
Markdown==3.3.6
|
||||||
|
MarkupSafe==2.1.1
|
||||||
matplotlib==3.5.2
|
matplotlib==3.5.2
|
||||||
|
mlflow==1.25.1
|
||||||
munch==2.5.0
|
munch==2.5.0
|
||||||
nltk==3.7
|
nltk==3.7
|
||||||
numpy==1.22.3
|
numpy==1.22.3
|
||||||
@ -36,15 +49,20 @@ opt-einsum==3.3.0
|
|||||||
packaging==21.3
|
packaging==21.3
|
||||||
pandas==1.4.2
|
pandas==1.4.2
|
||||||
Pillow==9.1.0
|
Pillow==9.1.0
|
||||||
|
prometheus-client==0.14.1
|
||||||
|
prometheus-flask-exporter==0.20.1
|
||||||
protobuf==3.20.1
|
protobuf==3.20.1
|
||||||
py-cpuinfo==8.0.0
|
py-cpuinfo==8.0.0
|
||||||
pyasn1==0.4.8
|
pyasn1==0.4.8
|
||||||
pyasn1-modules==0.2.8
|
pyasn1-modules==0.2.8
|
||||||
|
PyJWT==2.3.0
|
||||||
pymongo==4.1.1
|
pymongo==4.1.1
|
||||||
pyparsing==3.0.8
|
pyparsing==3.0.8
|
||||||
python-dateutil==2.8.2
|
python-dateutil==2.8.2
|
||||||
python-slugify==6.1.1
|
python-slugify==6.1.1
|
||||||
pytz==2022.1
|
pytz==2022.1
|
||||||
|
PyYAML==6.0
|
||||||
|
querystring-parser==1.2.4
|
||||||
regex==2022.3.15
|
regex==2022.3.15
|
||||||
requests==2.27.1
|
requests==2.27.1
|
||||||
requests-oauthlib==1.3.1
|
requests-oauthlib==1.3.1
|
||||||
@ -55,6 +73,9 @@ scipy==1.8.0
|
|||||||
six==1.16.0
|
six==1.16.0
|
||||||
sklearn==0.0
|
sklearn==0.0
|
||||||
smmap==5.0.0
|
smmap==5.0.0
|
||||||
|
SQLAlchemy==1.4.36
|
||||||
|
sqlparse==0.4.2
|
||||||
|
tabulate==0.8.9
|
||||||
tensorboard==2.8.0
|
tensorboard==2.8.0
|
||||||
tensorboard-data-server==0.6.1
|
tensorboard-data-server==0.6.1
|
||||||
tensorboard-plugin-wit==1.8.1
|
tensorboard-plugin-wit==1.8.1
|
||||||
@ -67,6 +88,7 @@ threadpoolctl==3.1.0
|
|||||||
tqdm==4.64.0
|
tqdm==4.64.0
|
||||||
typing_extensions==4.2.0
|
typing_extensions==4.2.0
|
||||||
urllib3==1.26.9
|
urllib3==1.26.9
|
||||||
|
websocket-client==1.3.2
|
||||||
Werkzeug==2.1.1
|
Werkzeug==2.1.1
|
||||||
wrapt==1.14.0
|
wrapt==1.14.0
|
||||||
zipp==3.8.0
|
zipp==3.8.0
|
||||||
|
Loading…
Reference in New Issue
Block a user