add mlflow

This commit is contained in:
s444417 2022-05-14 11:50:56 +02:00
parent c9c0c36ba2
commit 9f2f81c67e
7 changed files with 143 additions and 5 deletions

View File

@ -6,3 +6,4 @@ Participants_Data_HPP
my_runs my_runs
saved_model saved_model
mlruns

2
.gitignore vendored
View File

@ -224,3 +224,5 @@ Participants_Data_HPP/
my_runs my_runs
saved_model saved_model
mlruns

View File

@ -15,6 +15,7 @@ RUN pip3 install matplotlib
RUN pip3 install sacred RUN pip3 install sacred
RUN pip3 install sacred RUN pip3 install sacred
RUN pip3 install pymongo RUN pip3 install pymongo
RUN pip3 install mlflow
# RUN ln -s ~/.local/bin/kaggle /usr/bin/kaggle # RUN ln -s ~/.local/bin/kaggle /usr/bin/kaggle
WORKDIR /app WORKDIR /app

13
lab8/MLproject Normal file
View File

@ -0,0 +1,13 @@
name: tutorial
# conda_env: conda.yaml #ścieżka do pliku conda.yaml z definicją środowiska
docker_env:
image: mikolajk/ium:mlflow
entry_points:
main:
parameters:
numberOfEpochParam: {type: float, default: 3}
learning_rate: {type: float, default: 0.1}
command: "python ./lab8/trainScript.py {numberOfEpochParam} {learning_rate}"

121
lab8/trainScript.py Normal file
View File

@ -0,0 +1,121 @@
import os
import sys
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
import mlflow
import mlflow.keras
from urllib.parse import urlparse
# mlflow.set_tracking_uri('http://localhost:5000')
mlflow.set_experiment('s444417')
# train params
numberOfEpochParam = 0
learning_rate = 0
try:
numberOfEpochParam = int(sys.argv[1])
learning_rate = int(sys.argv[2])
except:
# dafault val
numberOfEpochParam = 3
learning_rate = 0.1
def flatten(t):
return [item for sublist in t for item in sublist]
def train():
with mlflow.start_run():
mlflow.tensorflow.autolog()
cwd = os.path.abspath(os.path.dirname(sys.argv[0]))
pathTrain = cwd + "/../Participants_Data_HPP/Train.csv"
pathTest = cwd + "/../Participants_Data_HPP/Test.csv"
features = ["UNDER_CONSTRUCTION", "RERA", "BHK_NO.", "SQUARE_FT", "READY_TO_MOVE", "RESALE", "LONGITUDE", "LATITUDE", "TARGET(PRICE_IN_LACS)"]
# get dataset
house_price_train = pd.read_csv(pathTrain)[features]
# get test dataset
house_price_test = pd.read_csv(pathTest)[features]
house_price_features = house_price_train.copy()
# pop column
house_price_labels = house_price_features.pop('TARGET(PRICE_IN_LACS)')
# process data
normalize = layers.Normalization()
normalize.adapt(house_price_features)
feature_test_sample = house_price_test.sample(10)
labels_test_sample = feature_test_sample.pop('TARGET(PRICE_IN_LACS)')
house_price_test_features = house_price_test.copy()
# pop column
house_price_test_expected = house_price_test_features.pop('TARGET(PRICE_IN_LACS)')
house_price_features = np.array(house_price_features)
# load model if exists or create new
modelPath = 'saved_model/MyModel_tf'
try:
linear_model = tf.keras.models.load_model(modelPath)
print("open existing model")
except Exception as exception:
print(exception)
linear_model = tf.keras.Sequential([
normalize,
layers.Dense(1)
])
linear_model.compile(loss = tf.losses.MeanSquaredError(),
optimizer = tf.optimizers.Adam(learning_rate=learning_rate))
print("creating new model")
# train model
history = linear_model.fit(
house_price_features,
house_price_labels,
epochs=int(numberOfEpochParam),
validation_split=0.33,
verbose=1,)
# save model
linear_model.save(modelPath, save_format='tf')
# save model as artifact
# finall loss
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
test_results = {}
test_results['linear_model'] = linear_model.evaluate(
house_price_test_features, house_price_test_expected, verbose=0)
pred = np.array(linear_model.predict(feature_test_sample))
flatten_pred = flatten(pred)
with open(cwd + "/../result.txt", "w+") as resultFile:
resultFile.write("predictions: " + str(flatten_pred) + '\n')
resultFile.write("expected: " + str(labels_test_sample.to_numpy()))
mlflow.log_param('epochs number', numberOfEpochParam)
mlflow.log_param('learning rate', learning_rate)
mlflow.log_metric('val loss', min(hist["val_loss"]))
# signature = mlflow.models.signature.infer_signature(house_price_features, linear_model.predict(house_price_features))
#
# tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
#
# if tracking_url_type_store != "file":
# mlflow.keras.log_model(linear_model, "linear-model", registered_model_name="HousePriceLinear", signature=signature)
# else:
# mlflow.keras.log_model(linear_model, "model", signature=signature)
if __name__ == '__main__':
train()

View File

@ -1,2 +1,2 @@
predictions: [185.41609, 41.248466, -66.347305, 112.55022, 106.2057, 11.261917, 75.81361, 184.90059, -3.6325989, 85.295105] predictions: [157.08437, 4.671051, 190.45694, 126.68617, 96.37216, 134.32784, 154.44032, -19.104736, 80.28882, 100.09464]
expected: [ 96. 51. 8. 63. 25. 11. 80. 110. 85. 41.] expected: [110. 25.2 60. 70. 100. 110. 520. 18. 77. 32. ]