83 lines
2.3 KiB
Python
83 lines
2.3 KiB
Python
import os
|
|
|
|
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
|
|
|
|
from keras.models import Sequential
|
|
from keras.layers import BatchNormalization, Dropout, Dense, Flatten, Conv1D
|
|
from keras.optimizers import Adam
|
|
import pandas as pd
|
|
import sys
|
|
import mlflow
|
|
from sklearn.metrics import confusion_matrix
|
|
|
|
mlflow.set_tracking_uri("http://localhost:5000")
|
|
|
|
|
|
def main():
|
|
X_train = pd.read_csv("../data/X_train.csv")
|
|
X_val = pd.read_csv("../data/X_val.csv")
|
|
y_train = pd.read_csv("../data/y_train.csv")
|
|
y_val = pd.read_csv("../data/y_val.csv")
|
|
|
|
X_train = X_train.to_numpy()
|
|
X_val = X_val.to_numpy()
|
|
y_train = y_train.to_numpy()
|
|
y_val = y_val.to_numpy()
|
|
|
|
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
|
|
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
|
|
|
|
learning_rate = float(sys.argv[1])
|
|
epochs = int(sys.argv[2])
|
|
|
|
with mlflow.start_run() as run:
|
|
print("MLflow run experiment_id: {0}".format(run.info.experiment_id))
|
|
print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri))
|
|
|
|
model = Sequential(
|
|
[
|
|
Conv1D(32, 2, activation="relu", input_shape=X_train[0].shape),
|
|
BatchNormalization(),
|
|
Dropout(0.2),
|
|
Conv1D(64, 2, activation="relu"),
|
|
BatchNormalization(),
|
|
Dropout(0.5),
|
|
Flatten(),
|
|
Dense(64, activation="relu"),
|
|
Dropout(0.5),
|
|
Dense(1, activation="sigmoid"),
|
|
]
|
|
)
|
|
|
|
model.compile(
|
|
optimizer=Adam(learning_rate=learning_rate),
|
|
loss="binary_crossentropy",
|
|
metrics=["accuracy"],
|
|
)
|
|
|
|
model.fit(
|
|
X_train,
|
|
y_train,
|
|
validation_data=(X_val, y_val),
|
|
epochs=epochs,
|
|
verbose=1,
|
|
)
|
|
|
|
mlflow.log_param("learning_rate", learning_rate)
|
|
mlflow.log_param("epochs", epochs)
|
|
|
|
X_test = pd.read_csv("../data/X_test.csv")
|
|
y_test = pd.read_csv("../data/y_test.csv")
|
|
|
|
y_pred = model.predict(X_test)
|
|
y_pred = y_pred >= 0.5
|
|
|
|
cm = confusion_matrix(y_test, y_pred)
|
|
accuracy = cm[1, 1] / (cm[1, 0] + cm[1, 1])
|
|
|
|
mlflow.log_metric("accuracy", accuracy)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|