ium_444417/lab8/trainScript.py
2022-05-14 11:50:56 +02:00

121 lines
3.6 KiB
Python

import os
import sys
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
import mlflow
import mlflow.keras
from urllib.parse import urlparse
# mlflow.set_tracking_uri('http://localhost:5000')
mlflow.set_experiment('s444417')
# train params
numberOfEpochParam = 0
learning_rate = 0
try:
numberOfEpochParam = int(sys.argv[1])
learning_rate = int(sys.argv[2])
except:
# dafault val
numberOfEpochParam = 3
learning_rate = 0.1
def flatten(t):
return [item for sublist in t for item in sublist]
def train():
with mlflow.start_run():
mlflow.tensorflow.autolog()
cwd = os.path.abspath(os.path.dirname(sys.argv[0]))
pathTrain = cwd + "/../Participants_Data_HPP/Train.csv"
pathTest = cwd + "/../Participants_Data_HPP/Test.csv"
features = ["UNDER_CONSTRUCTION", "RERA", "BHK_NO.", "SQUARE_FT", "READY_TO_MOVE", "RESALE", "LONGITUDE", "LATITUDE", "TARGET(PRICE_IN_LACS)"]
# get dataset
house_price_train = pd.read_csv(pathTrain)[features]
# get test dataset
house_price_test = pd.read_csv(pathTest)[features]
house_price_features = house_price_train.copy()
# pop column
house_price_labels = house_price_features.pop('TARGET(PRICE_IN_LACS)')
# process data
normalize = layers.Normalization()
normalize.adapt(house_price_features)
feature_test_sample = house_price_test.sample(10)
labels_test_sample = feature_test_sample.pop('TARGET(PRICE_IN_LACS)')
house_price_test_features = house_price_test.copy()
# pop column
house_price_test_expected = house_price_test_features.pop('TARGET(PRICE_IN_LACS)')
house_price_features = np.array(house_price_features)
# load model if exists or create new
modelPath = 'saved_model/MyModel_tf'
try:
linear_model = tf.keras.models.load_model(modelPath)
print("open existing model")
except Exception as exception:
print(exception)
linear_model = tf.keras.Sequential([
normalize,
layers.Dense(1)
])
linear_model.compile(loss = tf.losses.MeanSquaredError(),
optimizer = tf.optimizers.Adam(learning_rate=learning_rate))
print("creating new model")
# train model
history = linear_model.fit(
house_price_features,
house_price_labels,
epochs=int(numberOfEpochParam),
validation_split=0.33,
verbose=1,)
# save model
linear_model.save(modelPath, save_format='tf')
# save model as artifact
# finall loss
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
test_results = {}
test_results['linear_model'] = linear_model.evaluate(
house_price_test_features, house_price_test_expected, verbose=0)
pred = np.array(linear_model.predict(feature_test_sample))
flatten_pred = flatten(pred)
with open(cwd + "/../result.txt", "w+") as resultFile:
resultFile.write("predictions: " + str(flatten_pred) + '\n')
resultFile.write("expected: " + str(labels_test_sample.to_numpy()))
mlflow.log_param('epochs number', numberOfEpochParam)
mlflow.log_param('learning rate', learning_rate)
mlflow.log_metric('val loss', min(hist["val_loss"]))
# signature = mlflow.models.signature.infer_signature(house_price_features, linear_model.predict(house_price_features))
#
# tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
#
# if tracking_url_type_store != "file":
# mlflow.keras.log_model(linear_model, "linear-model", registered_model_name="HousePriceLinear", signature=signature)
# else:
# mlflow.keras.log_model(linear_model, "model", signature=signature)
if __name__ == '__main__':
train()