add mlflow

2022-05-14 11:50:56 +02:00 · 2022-05-14 11:50:56 +02:00 · 9f2f81c67e
commit 9f2f81c67e
parent c9c0c36ba2
7 changed files with 143 additions and 5 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -5,4 +5,5 @@ venv
 Participants_Data_HPP

 my_runs
-saved_model
+saved_model
+mlruns
--- a/.gitignore
+++ b/.gitignore
@ -223,4 +223,6 @@ training_1
 Participants_Data_HPP/

 my_runs
-saved_model
+saved_model
+
+mlruns
--- a/1
+++ b/1
@ -15,6 +15,7 @@ RUN pip3 install matplotlib
 RUN pip3 install sacred
 RUN pip3 install sacred
 RUN pip3 install pymongo
+RUN pip3 install mlflow
 # RUN ln -s ~/.local/bin/kaggle /usr/bin/kaggle

 WORKDIR /app
--- a/README.md
+++ b/README.md
@ -21,4 +21,4 @@ Zadanie 2
 5. projekt odpala się po zakończeniu trenowania jenkinsfile3 build job oraz kopiuje sobie model copyArtifacts z uwzględnieniem brancha master
 6. copyArtifacts z s444417-create-dataset
 7. parametr BRANCH do wyboru konkretnej gałęzi, buildselector do wybrania builda w Jenkins.eval
-8. powiadomenie mail wraz z metryką loss wysyłane w pliku Jenkinsfile.eval post emailext
+8. powiadomenie mail wraz z metryką loss wysyłane w pliku Jenkinsfile.eval post emailext
--- a/lab8/MLproject
+++ b/lab8/MLproject
@ -0,0 +1,13 @@
+name: tutorial
+
+# conda_env: conda.yaml #ścieżka do pliku conda.yaml z definicją środowiska
+    
+docker_env:
+  image: mikolajk/ium:mlflow
+
+entry_points:
+  main:
+    parameters:
+      numberOfEpochParam: {type: float, default: 3}
+      learning_rate: {type: float, default: 0.1}
+    command: "python ./lab8/trainScript.py {numberOfEpochParam} {learning_rate}"
--- a/lab8/trainScript.py
+++ b/lab8/trainScript.py
@ -0,0 +1,121 @@
+import os
+import sys
+import pandas as pd
+import numpy as np
+
+import tensorflow as tf
+from tensorflow.keras import layers
+import mlflow
+import mlflow.keras
+from urllib.parse import urlparse
+
+
+# mlflow.set_tracking_uri('http://localhost:5000')
+mlflow.set_experiment('s444417')
+
+# train params
+numberOfEpochParam = 0
+learning_rate = 0
+try: 
+  numberOfEpochParam = int(sys.argv[1])
+  learning_rate = int(sys.argv[2])
+
+except:
+  # dafault val
+  numberOfEpochParam = 3
+  learning_rate = 0.1
+
+def flatten(t):
+      return [item for sublist in t for item in sublist]
+
+def train():
+  with mlflow.start_run():
+    mlflow.tensorflow.autolog()
+    cwd = os.path.abspath(os.path.dirname(sys.argv[0]))
+
+    pathTrain = cwd + "/../Participants_Data_HPP/Train.csv"
+    pathTest = cwd + "/../Participants_Data_HPP/Test.csv"
+
+    features = ["UNDER_CONSTRUCTION", "RERA", "BHK_NO.", "SQUARE_FT", "READY_TO_MOVE", "RESALE", "LONGITUDE", "LATITUDE", "TARGET(PRICE_IN_LACS)"]
+
+    # get dataset
+    house_price_train = pd.read_csv(pathTrain)[features]
+
+    # get test dataset
+    house_price_test = pd.read_csv(pathTest)[features]
+
+
+    house_price_features = house_price_train.copy()
+    # pop column
+    house_price_labels = house_price_features.pop('TARGET(PRICE_IN_LACS)')
+
+    # process data
+    normalize = layers.Normalization()
+    normalize.adapt(house_price_features)
+
+    feature_test_sample = house_price_test.sample(10)
+    labels_test_sample = feature_test_sample.pop('TARGET(PRICE_IN_LACS)')
+
+    house_price_test_features = house_price_test.copy()
+    # pop column
+    house_price_test_expected = house_price_test_features.pop('TARGET(PRICE_IN_LACS)')
+
+    house_price_features = np.array(house_price_features)
+  
+    # load model if exists or create new
+    modelPath = 'saved_model/MyModel_tf'
+    try: 
+      linear_model = tf.keras.models.load_model(modelPath)
+      print("open existing model")
+    except Exception as exception:
+      print(exception)
+      linear_model = tf.keras.Sequential([
+        normalize,
+        layers.Dense(1)
+      ])
+      linear_model.compile(loss = tf.losses.MeanSquaredError(),
+                            optimizer = tf.optimizers.Adam(learning_rate=learning_rate))
+      print("creating new model")
+
+    # train model
+    history = linear_model.fit(
+      house_price_features, 
+      house_price_labels, 
+      epochs=int(numberOfEpochParam), 
+      validation_split=0.33,
+      verbose=1,)
+
+    # save model
+    linear_model.save(modelPath, save_format='tf')
+    # save model as artifact
+
+    # finall loss
+    hist = pd.DataFrame(history.history)
+    hist['epoch'] = history.epoch
+
+    test_results = {}
+    test_results['linear_model'] = linear_model.evaluate(
+        house_price_test_features, house_price_test_expected, verbose=0)
+
+    pred = np.array(linear_model.predict(feature_test_sample))
+    flatten_pred = flatten(pred)
+
+    with open(cwd + "/../result.txt", "w+") as resultFile:
+      resultFile.write("predictions: " + str(flatten_pred) + '\n')
+      resultFile.write("expected: " + str(labels_test_sample.to_numpy()))
+
+    mlflow.log_param('epochs number', numberOfEpochParam)
+    mlflow.log_param('learning rate', learning_rate)
+    mlflow.log_metric('val loss', min(hist["val_loss"]))
+
+  # signature = mlflow.models.signature.infer_signature(house_price_features, linear_model.predict(house_price_features))
+  #
+  # tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
+  #
+  # if tracking_url_type_store != "file":
+  #     mlflow.keras.log_model(linear_model, "linear-model", registered_model_name="HousePriceLinear", signature=signature)
+  # else:
+  #     mlflow.keras.log_model(linear_model, "model", signature=signature)
+
+if __name__ == '__main__':
+    train()
--- a/result.txt
+++ b/result.txt
@ -1,2 +1,2 @@
-predictions: [185.41609, 41.248466, -66.347305, 112.55022, 106.2057, 11.261917, 75.81361, 184.90059, -3.6325989, 85.295105]
-expected: [ 96.  51.   8.  63.  25.  11.  80. 110.  85.  41.]
+predictions: [157.08437, 4.671051, 190.45694, 126.68617, 96.37216, 134.32784, 154.44032, -19.104736, 80.28882, 100.09464]
+expected: [110.   25.2  60.   70.  100.  110.  520.   18.   77.   32. ]