import mlflow import numpy as np import pandas as pd import tensorflow as tf from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split from mlflow.models.signature import infer_signature from sklearn.preprocessing import StandardScaler import sys mlflow.set_experiment("s444465") def evaluate_model(model, test_x, test_y): test_loss, test_acc, test_rec = model.evaluate(test_x, test_y, verbose=1) # print("Accuracy:", test_acc) # print("Loss:", test_loss) # print("Recall:", test_rec) return test_acc, test_loss, test_rec def main(): no_of_epochs = int(sys.argv[1]) if (len(sys.argv) == 2 and sys.argv[1].isdigit()) else 10 is_testing = (len(sys.argv) == 2) and not sys.argv[1].isdigit() and sys.argv[1] == "test" mlflow.log_param("epochs", no_of_epochs) scaler = StandardScaler() feature_names = ["BMI", "SleepTime", "Sex", "Diabetic", "PhysicalActivity", "Smoking", "AlcoholDrinking"] dataset = pd.read_csv('heart_2020_cleaned.csv') dataset = dataset.dropna() dataset["Diabetic"] = dataset["Diabetic"].apply(lambda x: True if "Yes" in x else False) dataset["HeartDisease"] = dataset["HeartDisease"].apply(lambda x: True if x == "Yes" else False) dataset["PhysicalActivity"] = dataset["PhysicalActivity"].apply(lambda x: True if x == "Yes" else False) dataset["Smoking"] = dataset["Smoking"].apply(lambda x: True if x == "Yes" else False) dataset["AlcoholDrinking"] = dataset["AlcoholDrinking"].apply(lambda x: True if x == "Yes" else False) dataset["Sex"] = dataset["Sex"].apply(lambda x: 1 if x == "Female" else 0) dataset_train, dataset_test = train_test_split(dataset, test_size=.1, train_size=.9, random_state=1) print(dataset_test.shape) model = tf.keras.Sequential([ tf.keras.layers.Dense(16, activation='relu'), tf.keras.layers.Dense(8, activation='relu'), tf.keras.layers.Dense(4, activation='relu'), tf.keras.layers.Dense(1, activation='sigmoid') ]) model.compile( loss=tf.keras.losses.binary_crossentropy, optimizer=tf.keras.optimizers.Adam(lr=0.01), metrics=["accuracy", tf.keras.metrics.Recall(name='recall')] ) train_X = dataset_train[feature_names].astype(np.float32) train_Y = dataset_train["HeartDisease"].astype(np.float32) test_X = dataset_test[feature_names].astype(np.float32) test_Y = dataset_test["HeartDisease"].astype(np.float32) train_X = scaler.fit_transform(train_X) # train_Y = scaler.fit_transform(train_Y) test_X = scaler.fit_transform(test_X) # test_Y = scaler.fit_transform(test_Y) print(train_Y.value_counts()) train_X = tf.convert_to_tensor(train_X) train_Y = tf.convert_to_tensor(train_Y) test_X = tf.convert_to_tensor(test_X) test_Y = tf.convert_to_tensor(test_Y) model.fit(train_X, train_Y, epochs=no_of_epochs) model.save("trained_model") acc, loss, rec = evaluate_model(model, test_X, test_Y) mlflow.log_metric("accuracy", acc) mlflow.log_metric("loss", loss) signature = infer_signature(np.array(train_X), np.array(train_Y)) mlflow.sklearn.log_model(model, "mlflow_model", signature=signature, input_example=np.array(test_X[0])) if is_testing: predictions = model.predict(np.array(test_X)) predictions = [int(i > 0.5) for i in predictions] accuracy = accuracy_score(np.array(test_Y), predictions) mlflow.log_metric("eval_accuracy", accuracy) main()