ium_464913/mlflow/mlflow_train_evaluation.py

83 lines
2.3 KiB
Python

import os
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
from keras.models import Sequential
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
from keras.optimizers import Adam
import pandas as pd
import sys
import mlflow
from sklearn.metrics import confusion_matrix
mlflow.set_tracking_uri("http://localhost:5000")
def main():
X_train = pd.read_csv("../data/X_train.csv")
X_val = pd.read_csv("../data/X_val.csv")
y_train = pd.read_csv("../data/y_train.csv")
y_val = pd.read_csv("../data/y_val.csv")
X_train = X_train.to_numpy()
X_val = X_val.to_numpy()
y_train = y_train.to_numpy()
y_val = y_val.to_numpy()
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
learning_rate = float(sys.argv[1])
epochs = int(sys.argv[2])
with mlflow.start_run() as run:
print("MLflow run experiment_id: {0}".format(run.info.experiment_id))
print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri))
model = Sequential(
[
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
BatchNormalization(),
Dropout(0.2),
Conv1D(64, 2, activation="relu"),
BatchNormalization(),
Dropout(0.5),
Flatten(),
Dense(64, activation="relu"),
Dropout(0.5),
Dense(1, activation="sigmoid"),
]
)
model.compile(
optimizer=Adam(learning_rate=learning_rate),
loss="binary_crossentropy",
metrics=["accuracy"],
)
model.fit(
X_train,
y_train,
validation_data=(X_val, y_val),
epochs=epochs,
verbose=1,
)
mlflow.log_param("learning_rate", learning_rate)
mlflow.log_param("epochs", epochs)
X_test = pd.read_csv("../data/X_test.csv")
y_test = pd.read_csv("../data/y_test.csv")
y_pred = model.predict(X_test)
y_pred = y_pred >= 0.5
cm = confusion_matrix(y_test, y_pred)
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
mlflow.log_metric("accuracy", accuracy)
if __name__ == "__main__":
main()