import numpy as np import pandas as pd import tensorflow as tf from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler import sys import sacred from sacred.observers import FileStorageObserver, MongoObserver ex = sacred.Experiment("Training model") ex.observers.append(FileStorageObserver('training_experiment')) # ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017', # db_name='sacred')) @ex.config def get_config(): no_of_epochs = 10 if len(sys.argv) == 2: no_of_epochs = int(sys.argv[1]) @ex.capture def evaluate_model(model, test_x, test_y): test_loss, test_acc, test_rec = model.evaluate(test_x, test_y, verbose=1) # print("Accuracy:", test_acc) # print("Loss:", test_loss) # print("Recall:", test_rec) return f"Accuracy: {test_acc}, Loss: {test_loss}, Recall: {test_rec}" @ex.main def main(no_of_epochs, _run): # no_of_epochs = get_config() scaler = StandardScaler() feature_names = ["BMI", "SleepTime", "Sex", "Diabetic", "PhysicalActivity", "Smoking", "AlcoholDrinking"] dataset = pd.read_csv('heart_2020_cleaned.csv') dataset = dataset.dropna() dataset["Diabetic"] = dataset["Diabetic"].apply(lambda x: True if "Yes" in x else False) dataset["HeartDisease"] = dataset["HeartDisease"].apply(lambda x: True if x == "Yes" else False) dataset["PhysicalActivity"] = dataset["PhysicalActivity"].apply(lambda x: True if x == "Yes" else False) dataset["Smoking"] = dataset["Smoking"].apply(lambda x: True if x == "Yes" else False) dataset["AlcoholDrinking"] = dataset["AlcoholDrinking"].apply(lambda x: True if x == "Yes" else False) dataset["Sex"] = dataset["Sex"].apply(lambda x: 1 if x == "Female" else 0) dataset_train, dataset_test = train_test_split(dataset, test_size=.1, train_size=.9, random_state=1) print(dataset_test.shape) model = tf.keras.Sequential([ tf.keras.layers.Dense(16, activation='relu'), tf.keras.layers.Dense(8, activation='relu'), tf.keras.layers.Dense(4, activation='relu'), tf.keras.layers.Dense(1, activation='sigmoid') ]) model.compile( loss=tf.keras.losses.binary_crossentropy, optimizer=tf.keras.optimizers.Adam(lr=0.01), metrics=["accuracy", tf.keras.metrics.Recall(name='recall')] ) train_X = dataset_train[feature_names].astype(np.float32) train_Y = dataset_train["HeartDisease"].astype(np.float32) test_X = dataset_test[feature_names].astype(np.float32) test_Y = dataset_test["HeartDisease"].astype(np.float32) train_X = scaler.fit_transform(train_X) # train_Y = scaler.fit_transform(train_Y) test_X = scaler.fit_transform(test_X) # test_Y = scaler.fit_transform(test_Y) print(train_Y.value_counts()) train_X = tf.convert_to_tensor(train_X) train_Y = tf.convert_to_tensor(train_Y) test_X = tf.convert_to_tensor(test_X) test_Y = tf.convert_to_tensor(test_Y) model.fit(train_X, train_Y, epochs=no_of_epochs) model.save("trained_model") metrics = evaluate_model(model, test_X, test_Y) _run.log_scalar("model.eval", metrics) ex.add_artifact("trained_model/saved_model.pb") ex.add_artifact("trained_model/keras_metadata.pb") ex.run()