MLFlow
This commit is contained in:
parent
a648c1f6d1
commit
535a165c6d
@ -12,8 +12,8 @@ WORKDIR app
|
||||
ARG EPOCHS
|
||||
ENV EPOCHS=${EPOCHS}
|
||||
|
||||
COPY ml_training.py ./
|
||||
COPY mlflow_training.py ./
|
||||
COPY heart_2020_cleaned.csv ./
|
||||
|
||||
|
||||
CMD ["python3", "./ml_training.py $EPOCHS"]
|
||||
CMD ["python3", "./mlflow_training.py $EPOCHS"]
|
||||
|
12
MLProject
Normal file
12
MLProject
Normal file
@ -0,0 +1,12 @@
|
||||
name: s444465
|
||||
|
||||
docker_env:
|
||||
image: s444465/ium:mlflow
|
||||
|
||||
entry_points:
|
||||
main:
|
||||
parameters:
|
||||
epochs: {type: float, default: 10}
|
||||
command: "python mlflow_training.py {epochs}"
|
||||
test:
|
||||
command: "python mlflow_training.py test"
|
94
mlflow_training.py
Normal file
94
mlflow_training.py
Normal file
@ -0,0 +1,94 @@
|
||||
import mlflow
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import tensorflow as tf
|
||||
from sklearn.metrics import accuracy_score
|
||||
from sklearn.model_selection import train_test_split
|
||||
from mlflow.models.signature import infer_signature
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
import sys
|
||||
|
||||
mlflow.set_experiment("s444465")
|
||||
|
||||
|
||||
def evaluate_model(model, test_x, test_y):
|
||||
test_loss, test_acc, test_rec = model.evaluate(test_x, test_y, verbose=1)
|
||||
# print("Accuracy:", test_acc)
|
||||
# print("Loss:", test_loss)
|
||||
# print("Recall:", test_rec)
|
||||
return test_acc, test_loss, test_rec
|
||||
|
||||
|
||||
def main():
|
||||
no_of_epochs = int(sys.argv[1]) if (len(sys.argv) == 2 and sys.argv[1].isdigit()) else 10
|
||||
is_testing = (len(sys.argv) == 2) and not sys.argv[1].isdigit() and sys.argv[1] == "test"
|
||||
|
||||
mlflow.log_param("epochs", no_of_epochs)
|
||||
scaler = StandardScaler()
|
||||
feature_names = ["BMI", "SleepTime", "Sex", "Diabetic", "PhysicalActivity", "Smoking", "AlcoholDrinking"]
|
||||
|
||||
dataset = pd.read_csv('heart_2020_cleaned.csv')
|
||||
dataset = dataset.dropna()
|
||||
|
||||
dataset["Diabetic"] = dataset["Diabetic"].apply(lambda x: True if "Yes" in x else False)
|
||||
dataset["HeartDisease"] = dataset["HeartDisease"].apply(lambda x: True if x == "Yes" else False)
|
||||
dataset["PhysicalActivity"] = dataset["PhysicalActivity"].apply(lambda x: True if x == "Yes" else False)
|
||||
dataset["Smoking"] = dataset["Smoking"].apply(lambda x: True if x == "Yes" else False)
|
||||
dataset["AlcoholDrinking"] = dataset["AlcoholDrinking"].apply(lambda x: True if x == "Yes" else False)
|
||||
dataset["Sex"] = dataset["Sex"].apply(lambda x: 1 if x == "Female" else 0)
|
||||
|
||||
dataset_train, dataset_test = train_test_split(dataset, test_size=.1, train_size=.9, random_state=1)
|
||||
|
||||
print(dataset_test.shape)
|
||||
|
||||
model = tf.keras.Sequential([
|
||||
tf.keras.layers.Dense(16, activation='relu'),
|
||||
tf.keras.layers.Dense(8, activation='relu'),
|
||||
tf.keras.layers.Dense(4, activation='relu'),
|
||||
tf.keras.layers.Dense(1, activation='sigmoid')
|
||||
])
|
||||
|
||||
model.compile(
|
||||
loss=tf.keras.losses.binary_crossentropy,
|
||||
optimizer=tf.keras.optimizers.Adam(lr=0.01),
|
||||
metrics=["accuracy", tf.keras.metrics.Recall(name='recall')]
|
||||
)
|
||||
|
||||
train_X = dataset_train[feature_names].astype(np.float32)
|
||||
train_Y = dataset_train["HeartDisease"].astype(np.float32)
|
||||
test_X = dataset_test[feature_names].astype(np.float32)
|
||||
test_Y = dataset_test["HeartDisease"].astype(np.float32)
|
||||
|
||||
train_X = scaler.fit_transform(train_X)
|
||||
# train_Y = scaler.fit_transform(train_Y)
|
||||
test_X = scaler.fit_transform(test_X)
|
||||
# test_Y = scaler.fit_transform(test_Y)
|
||||
|
||||
|
||||
print(train_Y.value_counts())
|
||||
|
||||
train_X = tf.convert_to_tensor(train_X)
|
||||
train_Y = tf.convert_to_tensor(train_Y)
|
||||
|
||||
test_X = tf.convert_to_tensor(test_X)
|
||||
test_Y = tf.convert_to_tensor(test_Y)
|
||||
|
||||
model.fit(train_X, train_Y, epochs=no_of_epochs)
|
||||
model.save("trained_model")
|
||||
|
||||
acc, loss, rec = evaluate_model(model, test_X, test_Y)
|
||||
|
||||
mlflow.log_metric("accuracy", acc)
|
||||
mlflow.log_metric("loss", loss)
|
||||
|
||||
signature = infer_signature(np.array(train_X), np.array(train_Y))
|
||||
|
||||
mlflow.sklearn.log_model(model, "mlflow_model", signature=signature, input_example=np.array(test_X[0]))
|
||||
if is_testing:
|
||||
predictions = model.predict(np.array(test_X))
|
||||
predictions = [int(i > 0.5) for i in predictions]
|
||||
accuracy = accuracy_score(np.array(test_Y), predictions)
|
||||
mlflow.log_metric("eval_accuracy", accuracy)
|
||||
|
||||
|
||||
main()
|
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user