ium_444417/lab8/trainScript.py

121 lines
3.6 KiB
Python
Raw Normal View History

2022-05-14 11:50:56 +02:00
import os
import sys
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
import mlflow
import mlflow.keras
from urllib.parse import urlparse
2022-05-14 12:12:20 +02:00
mlflow.set_tracking_uri("http://172.17.0.1:5000")
2022-05-14 11:50:56 +02:00
mlflow.set_experiment('s444417')
# train params
numberOfEpochParam = 0
learning_rate = 0
try:
numberOfEpochParam = int(sys.argv[1])
learning_rate = int(sys.argv[2])
except:
# dafault val
numberOfEpochParam = 3
learning_rate = 0.1
def flatten(t):
return [item for sublist in t for item in sublist]
def train():
with mlflow.start_run():
mlflow.tensorflow.autolog()
cwd = os.path.abspath(os.path.dirname(sys.argv[0]))
pathTrain = cwd + "/../Participants_Data_HPP/Train.csv"
pathTest = cwd + "/../Participants_Data_HPP/Test.csv"
features = ["UNDER_CONSTRUCTION", "RERA", "BHK_NO.", "SQUARE_FT", "READY_TO_MOVE", "RESALE", "LONGITUDE", "LATITUDE", "TARGET(PRICE_IN_LACS)"]
# get dataset
house_price_train = pd.read_csv(pathTrain)[features]
# get test dataset
house_price_test = pd.read_csv(pathTest)[features]
house_price_features = house_price_train.copy()
# pop column
house_price_labels = house_price_features.pop('TARGET(PRICE_IN_LACS)')
# process data
normalize = layers.Normalization()
normalize.adapt(house_price_features)
feature_test_sample = house_price_test.sample(10)
labels_test_sample = feature_test_sample.pop('TARGET(PRICE_IN_LACS)')
house_price_test_features = house_price_test.copy()
# pop column
house_price_test_expected = house_price_test_features.pop('TARGET(PRICE_IN_LACS)')
house_price_features = np.array(house_price_features)
# load model if exists or create new
modelPath = 'saved_model/MyModel_tf'
try:
linear_model = tf.keras.models.load_model(modelPath)
print("open existing model")
except Exception as exception:
print(exception)
linear_model = tf.keras.Sequential([
normalize,
layers.Dense(1)
])
linear_model.compile(loss = tf.losses.MeanSquaredError(),
optimizer = tf.optimizers.Adam(learning_rate=learning_rate))
print("creating new model")
# train model
history = linear_model.fit(
house_price_features,
house_price_labels,
epochs=int(numberOfEpochParam),
validation_split=0.33,
verbose=1,)
# save model
linear_model.save(modelPath, save_format='tf')
# save model as artifact
# finall loss
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
test_results = {}
test_results['linear_model'] = linear_model.evaluate(
house_price_test_features, house_price_test_expected, verbose=0)
pred = np.array(linear_model.predict(feature_test_sample))
flatten_pred = flatten(pred)
with open(cwd + "/../result.txt", "w+") as resultFile:
resultFile.write("predictions: " + str(flatten_pred) + '\n')
resultFile.write("expected: " + str(labels_test_sample.to_numpy()))
mlflow.log_param('epochs number', numberOfEpochParam)
mlflow.log_param('learning rate', learning_rate)
mlflow.log_metric('val loss', min(hist["val_loss"]))
2022-05-14 12:12:20 +02:00
signature = mlflow.models.signature.infer_signature(house_price_features, linear_model.predict(house_price_features))
tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
if tracking_url_type_store != "file":
mlflow.keras.log_model(linear_model, "linear-model", registered_model_name="HousePriceLinear", signature=signature)
else:
mlflow.keras.log_model(linear_model, "model", signature=signature)
2022-05-14 11:50:56 +02:00
if __name__ == '__main__':
train()