import os import sys import pandas as pd import numpy as np import tensorflow as tf from tensorflow.keras import layers import mlflow import mlflow.keras from urllib.parse import urlparse mlflow.set_tracking_uri("http://127.0.0.1:5000") # mlflow.set_tracking_uri("http://172.17.0.1:5000") mlflow.set_experiment('s444417') # train params numberOfEpochParam = 0 learning_rate = 0 try: numberOfEpochParam = int(sys.argv[1]) learning_rate = int(sys.argv[2]) except: # dafault val numberOfEpochParam = 3 learning_rate = 0.1 def flatten(t): return [item for sublist in t for item in sublist] def train(): with mlflow.start_run(): # mlflow.tensorflow.autolog() cwd = os.path.abspath(os.path.dirname(sys.argv[0])) pathTrain = cwd + "/../Participants_Data_HPP/Train.csv" pathTest = cwd + "/../Participants_Data_HPP/Test.csv" features = ["UNDER_CONSTRUCTION", "RERA", "BHK_NO.", "SQUARE_FT", "READY_TO_MOVE", "RESALE", "LONGITUDE", "LATITUDE", "TARGET(PRICE_IN_LACS)"] # get dataset house_price_train = pd.read_csv(pathTrain)[features] # get test dataset house_price_test = pd.read_csv(pathTest)[features] house_price_features = house_price_train.copy() # pop column house_price_labels = house_price_features.pop('TARGET(PRICE_IN_LACS)') # process data normalize = layers.Normalization() normalize.adapt(house_price_features) # feature_test_sample = house_price_test.sample(10) # labels_test_sample = feature_test_sample.pop('TARGET(PRICE_IN_LACS)') house_price_test_features = house_price_test.copy() # pop column house_price_test_expected = house_price_test_features.pop('TARGET(PRICE_IN_LACS)') house_price_features = np.array(house_price_features) # load model if exists or create new modelPath = 'saved_model/MyModel_tf' try: linear_model = tf.keras.models.load_model(modelPath) print("open existing model") except Exception as exception: print(exception) linear_model = tf.keras.Sequential([ normalize, layers.Dense(1) ]) linear_model.compile(loss = tf.losses.MeanSquaredError(), optimizer = tf.optimizers.Adam(learning_rate=learning_rate)) print("creating new model") # train model history = linear_model.fit( house_price_features, house_price_labels, epochs=int(numberOfEpochParam), validation_split=0.33, verbose=1,) # save model linear_model.save(modelPath, save_format='tf') # save model as artifact # finall loss hist = pd.DataFrame(history.history) hist['epoch'] = history.epoch test_results = {} test_results['linear_model'] = linear_model.evaluate( house_price_test_features, house_price_test_expected, verbose=0) # pred = np.array(linear_model.predict(feature_test_sample)) # flatten_pred = flatten(pred) # ## with open(cwd + "/../result.txt", "w+") as resultFile: # resultFile.write("predictions: " + str(flatten_pred) + '\n') # resultFile.write("expected: " + str(labels_test_sample.to_numpy())) mlflow.log_param('epochs', numberOfEpochParam) mlflow.log_param('learning_rate', learning_rate) mlflow.log_metric('final_loss', min(hist["val_loss"])) signature = mlflow.models.signature.infer_signature(house_price_features, linear_model.predict(house_price_features)) tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme sampleInp = [0.0, 0.0, 2.0, 904.129525, 1.000000, 1.000000, 20.098413, 79.107860] # expected value is 49.7 if tracking_url_type_store != "file": mlflow.keras.log_model(linear_model, "linear-model", registered_model_name="HousePriceLinear", signature=signature) else: mlflow.keras.log_model(linear_model, "model", signature=signature, input_example=np.array(sampleInp)) if __name__ == '__main__': train()