import os
import sys
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow.keras import layers
import mlflow
import mlflow.keras
from urllib.parse import urlparse

# mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_tracking_uri("http://172.17.0.1:5000")
mlflow.set_experiment('s444417')

# train params
numberOfEpochParam = 0
learning_rate = 0
try: 
  numberOfEpochParam = int(sys.argv[1])
  learning_rate = int(sys.argv[2])

except:
  # dafault val
  numberOfEpochParam = 3
  learning_rate = 0.1

def flatten(t):
      return [item for sublist in t for item in sublist]

def train():
  with mlflow.start_run():
    # mlflow.tensorflow.autolog()
    cwd = os.path.abspath(os.path.dirname(sys.argv[0]))

    pathTrain = cwd + "/../Participants_Data_HPP/Train.csv"
    pathTest = cwd + "/../Participants_Data_HPP/Test.csv"

    features = ["UNDER_CONSTRUCTION", "RERA", "BHK_NO.", "SQUARE_FT", "READY_TO_MOVE", "RESALE", "LONGITUDE", "LATITUDE", "TARGET(PRICE_IN_LACS)"]

    # get dataset
    house_price_train = pd.read_csv(pathTrain)[features]

    # get test dataset
    house_price_test = pd.read_csv(pathTest)[features]


    house_price_features = house_price_train.copy()
    # pop column
    house_price_labels = house_price_features.pop('TARGET(PRICE_IN_LACS)')
    
    # process data
    normalize = layers.Normalization()
    normalize.adapt(house_price_features)

    # feature_test_sample = house_price_test.sample(10)
    # labels_test_sample = feature_test_sample.pop('TARGET(PRICE_IN_LACS)')

    house_price_test_features = house_price_test.copy()
    # pop column
    house_price_test_expected = house_price_test_features.pop('TARGET(PRICE_IN_LACS)')

    house_price_features = np.array(house_price_features)
  
    # load model if exists or create new
    modelPath = 'saved_model/MyModel_tf'
    try: 
      linear_model = tf.keras.models.load_model(modelPath)
      print("open existing model")
    except Exception as exception:
      print(exception)
      linear_model = tf.keras.Sequential([
        normalize,
        layers.Dense(1)
      ])
      linear_model.compile(loss = tf.losses.MeanSquaredError(),
                            optimizer = tf.optimizers.Adam(learning_rate=learning_rate))
      print("creating new model")

    # train model
    history = linear_model.fit(
      house_price_features, 
      house_price_labels, 
      epochs=int(numberOfEpochParam), 
      validation_split=0.33,
      verbose=1,)

    # save model
    linear_model.save(modelPath, save_format='tf')
    # save model as artifact

    # finall loss
    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch

    test_results = {}
    test_results['linear_model'] = linear_model.evaluate(
        house_price_test_features, house_price_test_expected, verbose=0)

    # pred = np.array(linear_model.predict(feature_test_sample))
    # flatten_pred = flatten(pred)
    #
    # with open(cwd + "/../result.txt", "w+") as resultFile:
    #   resultFile.write("predictions: " + str(flatten_pred) + '\n')
    #   resultFile.write("expected: " + str(labels_test_sample.to_numpy()))

    mlflow.log_param('epochs', numberOfEpochParam)
    mlflow.log_param('learning_rate', learning_rate)
    mlflow.log_metric('final_loss', min(hist["val_loss"]))

  signature = mlflow.models.signature.infer_signature(house_price_features, linear_model.predict(house_price_features))
  
  tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
  
  sampleInp = [0.0, 0.0, 2.0, 904.129525, 1.000000, 1.000000, 20.098413, 79.107860]
  # expected value is 49.7
  if tracking_url_type_store != "file":
      mlflow.keras.log_model(linear_model, "linear-model", registered_model_name="HousePriceLinear", signature=signature)
  else:
      mlflow.keras.log_model(linear_model, "model", signature=signature, input_example=np.array(sampleInp))

if __name__ == '__main__':
    train()