diff --git a/Dockerfile b/Dockerfile index 3661c9e..39f8147 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,8 +12,8 @@ WORKDIR app ARG EPOCHS ENV EPOCHS=${EPOCHS} -COPY ml_training.py ./ +COPY mlflow_training.py ./ COPY heart_2020_cleaned.csv ./ -CMD ["python3", "./ml_training.py $EPOCHS"] +CMD ["python3", "./mlflow_training.py $EPOCHS"] diff --git a/MLProject b/MLProject new file mode 100644 index 0000000..e16e63f --- /dev/null +++ b/MLProject @@ -0,0 +1,12 @@ +name: s444465 + +docker_env: + image: s444465/ium:mlflow + +entry_points: + main: + parameters: + epochs: {type: float, default: 10} + command: "python mlflow_training.py {epochs}" + test: + command: "python mlflow_training.py test" diff --git a/mlflow_training.py b/mlflow_training.py new file mode 100644 index 0000000..52bfca3 --- /dev/null +++ b/mlflow_training.py @@ -0,0 +1,94 @@ +import mlflow +import numpy as np +import pandas as pd +import tensorflow as tf +from sklearn.metrics import accuracy_score +from sklearn.model_selection import train_test_split +from mlflow.models.signature import infer_signature +from sklearn.preprocessing import StandardScaler +import sys + +mlflow.set_experiment("s444465") + + +def evaluate_model(model, test_x, test_y): + test_loss, test_acc, test_rec = model.evaluate(test_x, test_y, verbose=1) + # print("Accuracy:", test_acc) + # print("Loss:", test_loss) + # print("Recall:", test_rec) + return test_acc, test_loss, test_rec + + +def main(): + no_of_epochs = int(sys.argv[1]) if (len(sys.argv) == 2 and sys.argv[1].isdigit()) else 10 + is_testing = (len(sys.argv) == 2) and not sys.argv[1].isdigit() and sys.argv[1] == "test" + + mlflow.log_param("epochs", no_of_epochs) + scaler = StandardScaler() + feature_names = ["BMI", "SleepTime", "Sex", "Diabetic", "PhysicalActivity", "Smoking", "AlcoholDrinking"] + + dataset = pd.read_csv('heart_2020_cleaned.csv') + dataset = dataset.dropna() + + dataset["Diabetic"] = dataset["Diabetic"].apply(lambda x: True if "Yes" in x else False) + dataset["HeartDisease"] = dataset["HeartDisease"].apply(lambda x: True if x == "Yes" else False) + dataset["PhysicalActivity"] = dataset["PhysicalActivity"].apply(lambda x: True if x == "Yes" else False) + dataset["Smoking"] = dataset["Smoking"].apply(lambda x: True if x == "Yes" else False) + dataset["AlcoholDrinking"] = dataset["AlcoholDrinking"].apply(lambda x: True if x == "Yes" else False) + dataset["Sex"] = dataset["Sex"].apply(lambda x: 1 if x == "Female" else 0) + + dataset_train, dataset_test = train_test_split(dataset, test_size=.1, train_size=.9, random_state=1) + + print(dataset_test.shape) + + model = tf.keras.Sequential([ + tf.keras.layers.Dense(16, activation='relu'), + tf.keras.layers.Dense(8, activation='relu'), + tf.keras.layers.Dense(4, activation='relu'), + tf.keras.layers.Dense(1, activation='sigmoid') + ]) + + model.compile( + loss=tf.keras.losses.binary_crossentropy, + optimizer=tf.keras.optimizers.Adam(lr=0.01), + metrics=["accuracy", tf.keras.metrics.Recall(name='recall')] + ) + + train_X = dataset_train[feature_names].astype(np.float32) + train_Y = dataset_train["HeartDisease"].astype(np.float32) + test_X = dataset_test[feature_names].astype(np.float32) + test_Y = dataset_test["HeartDisease"].astype(np.float32) + + train_X = scaler.fit_transform(train_X) + # train_Y = scaler.fit_transform(train_Y) + test_X = scaler.fit_transform(test_X) + # test_Y = scaler.fit_transform(test_Y) + + + print(train_Y.value_counts()) + + train_X = tf.convert_to_tensor(train_X) + train_Y = tf.convert_to_tensor(train_Y) + + test_X = tf.convert_to_tensor(test_X) + test_Y = tf.convert_to_tensor(test_Y) + + model.fit(train_X, train_Y, epochs=no_of_epochs) + model.save("trained_model") + + acc, loss, rec = evaluate_model(model, test_X, test_Y) + + mlflow.log_metric("accuracy", acc) + mlflow.log_metric("loss", loss) + + signature = infer_signature(np.array(train_X), np.array(train_Y)) + + mlflow.sklearn.log_model(model, "mlflow_model", signature=signature, input_example=np.array(test_X[0])) + if is_testing: + predictions = model.predict(np.array(test_X)) + predictions = [int(i > 0.5) for i in predictions] + accuracy = accuracy_score(np.array(test_Y), predictions) + mlflow.log_metric("eval_accuracy", accuracy) + + +main() \ No newline at end of file diff --git a/trained_model/keras_metadata.pb b/trained_model/keras_metadata.pb index 57e18d2..98103d0 100644 --- a/trained_model/keras_metadata.pb +++ b/trained_model/keras_metadata.pb @@ -1,5 +1,5 @@ -+root"_tf_keras_sequential*+{"name": "sequential", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 7]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "dense_input"}}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 16, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 8, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "units": 4, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dense", "config": {"name": "dense_3", "trainable": true, "dtype": "float32", "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}]}, "shared_object_id": 13, "build_input_shape": {"class_name": "TensorShape", "items": [null, 7]}, "is_graph_network": true, "full_save_spec": {"class_name": "__tuple__", "items": [[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 7]}, "float32", "dense_input"]}], {}]}, "save_spec": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 7]}, "float32", "dense_input"]}, "keras_version": "2.8.0", "backend": "tensorflow", "model_config": {"class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 7]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "dense_input"}, "shared_object_id": 0}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 16, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 2}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 3}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 8, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 4}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 5}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 6}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "units": 4, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 7}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 8}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 9}, {"class_name": "Dense", "config": {"name": "dense_3", "trainable": true, "dtype": "float32", "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 10}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 11}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 12}]}}, "training_config": {"loss": "binary_crossentropy", "metrics": [[{"class_name": "MeanMetricWrapper", "config": {"name": "accuracy", "dtype": "float32", "fn": "binary_accuracy"}, "shared_object_id": 14}, {"class_name": "Recall", "config": {"name": "recall", "dtype": "float32", "thresholds": null, "top_k": null, "class_id": null}, "shared_object_id": 15}]], "weighted_metrics": null, "loss_weights": null, "optimizer_config": {"class_name": "Adam", "config": {"name": "Adam", "learning_rate": 0.10000000149011612, "decay": 0.0, "beta_1": 0.8999999761581421, "beta_2": 0.9990000128746033, "epsilon": 1e-07, "amsgrad": false}}}}2 ++root"_tf_keras_sequential*+{"name": "sequential", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 7]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "dense_input"}}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 16, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 8, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "units": 4, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dense", "config": {"name": "dense_3", "trainable": true, "dtype": "float32", "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}]}, "shared_object_id": 13, "build_input_shape": {"class_name": "TensorShape", "items": [null, 7]}, "is_graph_network": true, "full_save_spec": {"class_name": "__tuple__", "items": [[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 7]}, "float32", "dense_input"]}], {}]}, "save_spec": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 7]}, "float32", "dense_input"]}, "keras_version": "2.8.0", "backend": "tensorflow", "model_config": {"class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 7]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "dense_input"}, "shared_object_id": 0}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 16, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 2}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 3}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 8, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 4}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 5}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 6}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "units": 4, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 7}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 8}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 9}, {"class_name": "Dense", "config": {"name": "dense_3", "trainable": true, "dtype": "float32", "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 10}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 11}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 12}]}}, "training_config": {"loss": "binary_crossentropy", "metrics": [[{"class_name": "MeanMetricWrapper", "config": {"name": "accuracy", "dtype": "float32", "fn": "binary_accuracy"}, "shared_object_id": 14}, {"class_name": "Recall", "config": {"name": "recall", "dtype": "float32", "thresholds": null, "top_k": null, "class_id": null}, "shared_object_id": 15}]], "weighted_metrics": null, "loss_weights": null, "optimizer_config": {"class_name": "Adam", "config": {"name": "Adam", "learning_rate": 0.009999999776482582, "decay": 0.0, "beta_1": 0.8999999761581421, "beta_2": 0.9990000128746033, "epsilon": 1e-07, "amsgrad": false}}}}2 root.layer_with_weights-0"_tf_keras_layer*{"name": "dense", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 16, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 2}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 3, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 7}}, "shared_object_id": 16}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 7]}}2 root.layer_with_weights-1"_tf_keras_layer*{"name": "dense_1", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 8, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 4}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 5}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 6, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 16}}, "shared_object_id": 17}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 16]}}2 root.layer_with_weights-2"_tf_keras_layer*{"name": "dense_2", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "units": 4, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 7}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 8}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 9, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 8}}, "shared_object_id": 18}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 8]}}2 diff --git a/trained_model/variables/variables.data-00000-of-00001 b/trained_model/variables/variables.data-00000-of-00001 index 8ba3cd2..6cb590b 100644 Binary files a/trained_model/variables/variables.data-00000-of-00001 and b/trained_model/variables/variables.data-00000-of-00001 differ diff --git a/trained_model/variables/variables.index b/trained_model/variables/variables.index index 11480c1..fd27c1b 100644 Binary files a/trained_model/variables/variables.index and b/trained_model/variables/variables.index differ