add mlflow

2022-05-14 11:50:56 +02:00 · 2022-05-14 11:50:56 +02:00 · 9f2f81c67e
commit 9f2f81c67e
parent c9c0c36ba2
7 changed files with 143 additions and 5 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -6,3 +6,4 @@ Participants_Data_HPP
 my_runs
 saved_model
 mlruns
--- a/.gitignore
+++ b/.gitignore
@ -224,3 +224,5 @@ Participants_Data_HPP/
 my_runs
 saved_model
 mlruns
--- a/1
+++ b/1
@ -15,6 +15,7 @@ RUN pip3 install matplotlib
 RUN pip3 install sacred
 RUN pip3 install sacred
 RUN pip3 install pymongo
 RUN pip3 install mlflow
 # RUN ln -s ~/.local/bin/kaggle /usr/bin/kaggle
 WORKDIR /app
--- a/lab8/MLproject
+++ b/lab8/MLproject
@ -0,0 +1,13 @@
 name: tutorial
 # conda_env: conda.yaml #ścieżka do pliku conda.yaml z definicją środowiska
 docker_env:
  image: mikolajk/ium:mlflow
 entry_points:
  main:
    parameters:
      numberOfEpochParam: {type: float, default: 3}
      learning_rate: {type: float, default: 0.1}
    command: "python ./lab8/trainScript.py {numberOfEpochParam} {learning_rate}"
--- a/lab8/trainScript.py
+++ b/lab8/trainScript.py
@ -0,0 +1,121 @@
 import os
 import sys
 import pandas as pd
 import numpy as np
 import tensorflow as tf
 from tensorflow.keras import layers
 import mlflow
 import mlflow.keras
 from urllib.parse import urlparse
 # mlflow.set_tracking_uri('http://localhost:5000')
 mlflow.set_experiment('s444417')
 # train params
 numberOfEpochParam = 0
 learning_rate = 0
 try: 
  numberOfEpochParam = int(sys.argv[1])
  learning_rate = int(sys.argv[2])
 except:
  # dafault val
  numberOfEpochParam = 3
  learning_rate = 0.1
 def flatten(t):
      return [item for sublist in t for item in sublist]
 def train():
  with mlflow.start_run():
    mlflow.tensorflow.autolog()
    cwd = os.path.abspath(os.path.dirname(sys.argv[0]))
    pathTrain = cwd + "/../Participants_Data_HPP/Train.csv"
    pathTest = cwd + "/../Participants_Data_HPP/Test.csv"
    features = ["UNDER_CONSTRUCTION", "RERA", "BHK_NO.", "SQUARE_FT", "READY_TO_MOVE", "RESALE", "LONGITUDE", "LATITUDE", "TARGET(PRICE_IN_LACS)"]
    # get dataset
    house_price_train = pd.read_csv(pathTrain)[features]
    # get test dataset
    house_price_test = pd.read_csv(pathTest)[features]
    house_price_features = house_price_train.copy()
    # pop column
    house_price_labels = house_price_features.pop('TARGET(PRICE_IN_LACS)')
    # process data
    normalize = layers.Normalization()
    normalize.adapt(house_price_features)
    feature_test_sample = house_price_test.sample(10)
    labels_test_sample = feature_test_sample.pop('TARGET(PRICE_IN_LACS)')
    house_price_test_features = house_price_test.copy()
    # pop column
    house_price_test_expected = house_price_test_features.pop('TARGET(PRICE_IN_LACS)')
    house_price_features = np.array(house_price_features)
    # load model if exists or create new
    modelPath = 'saved_model/MyModel_tf'
    try: 
      linear_model = tf.keras.models.load_model(modelPath)
      print("open existing model")
    except Exception as exception:
      print(exception)
      linear_model = tf.keras.Sequential([
        normalize,
        layers.Dense(1)
      ])
      linear_model.compile(loss = tf.losses.MeanSquaredError(),
                            optimizer = tf.optimizers.Adam(learning_rate=learning_rate))
      print("creating new model")
    # train model
    history = linear_model.fit(
      house_price_features, 
      house_price_labels, 
      epochs=int(numberOfEpochParam), 
      validation_split=0.33,
      verbose=1,)
    # save model
    linear_model.save(modelPath, save_format='tf')
    # save model as artifact
    # finall loss
    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch
    test_results = {}
    test_results['linear_model'] = linear_model.evaluate(
        house_price_test_features, house_price_test_expected, verbose=0)
    pred = np.array(linear_model.predict(feature_test_sample))
    flatten_pred = flatten(pred)
    with open(cwd + "/../result.txt", "w+") as resultFile:
      resultFile.write("predictions: " + str(flatten_pred) + '\n')
      resultFile.write("expected: " + str(labels_test_sample.to_numpy()))
    mlflow.log_param('epochs number', numberOfEpochParam)
    mlflow.log_param('learning rate', learning_rate)
    mlflow.log_metric('val loss', min(hist["val_loss"]))
  # signature = mlflow.models.signature.infer_signature(house_price_features, linear_model.predict(house_price_features))
  #
  # tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
  #
  # if tracking_url_type_store != "file":
  #     mlflow.keras.log_model(linear_model, "linear-model", registered_model_name="HousePriceLinear", signature=signature)
  # else:
  #     mlflow.keras.log_model(linear_model, "model", signature=signature)
 if __name__ == '__main__':
    train()
--- a/result.txt
+++ b/result.txt
@ -1,2 +1,2 @@
-predictions: [185.41609, 41.248466, -66.347305, 112.55022, 106.2057, 11.261917, 75.81361, 184.90059, -3.6325989, 85.295105]
+predictions: [157.08437, 4.671051, 190.45694, 126.68617, 96.37216, 134.32784, 154.44032, -19.104736, 80.28882, 100.09464]
-expected: [ 96.  51.   8.  63.  25.  11.  80. 110.  85.  41.]
+expected: [110.   25.2  60.   70.  100.  110.  520.   18.   77.   32. ]
`@ -1,2 +1,2 @@`
	`predictions: [185.41609, 41.248466, -66.347305, 112.55022, 106.2057, 11.261917, 75.81361, 184.90059, -3.6325989, 85.295105]`	`predictions: [157.08437, 4.671051, 190.45694, 126.68617, 96.37216, 134.32784, 154.44032, -19.104736, 80.28882, 100.09464]`
	`expected: [ 96. 51. 8. 63. 25. 11. 80. 110. 85. 41.]`	`expected: [110. 25.2 60. 70. 100. 110. 520. 18. 77. 32. ]`