mlflow first try

This commit is contained in:
Kamila 2022-05-11 14:31:24 +02:00
parent 0106677f2e
commit e84532b331
3 changed files with 99 additions and 0 deletions

View File

@ -17,6 +17,7 @@ RUN pip3 install keras
RUN pip3 install sklearn RUN pip3 install sklearn
RUN pip3 install pymongo RUN pip3 install pymongo
RUN pip3 install sacred RUN pip3 install sacred
RUN pip3 install mlflow
CMD python3 data_expl.py CMD python3 data_expl.py
CMD python3 nn_train.py CMD python3 nn_train.py

10
MLProject Normal file
View File

@ -0,0 +1,10 @@
name: s444517_train
docker_env:
image: kambobdocker420/ium:mlflow
entry_points:
main:
parameters:
epochs: {type: int, default: 200}
first_activation_funct: {type: str, default: "relu"}
second_activation_funct: {type: str, default: "softmax"}
command: "python nn_train_mlflow.py {epochs} {first_activation_funct} {second_activation_funct}"

88
nn_train_mlflow.py Normal file
View File

@ -0,0 +1,88 @@
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from keras.utils import np_utils
from tensorflow import keras
import mlflow
import sys
mlflow.set_experiment("s444517")
# reading data
def read_data():
all_data = []
for name in ['train', 'test', 'validate']:
all_data.append(pd.read_csv(f'apps_{name}.csv', header=0))
return all_data
def data_prep():
train_set, test_set, validate_set = read_data()
train_set = train_set.drop(columns=["Unnamed: 0"])
test_set = test_set.drop(columns=["Unnamed: 0"])
validate_set = validate_set.drop(columns=["Unnamed: 0"])
numeric_columns = ["Rating", "Reviews", "Installs", "Price", "Genres_numeric_value"]
# train set set-up
x_train_set = train_set[numeric_columns]
y_train_set = train_set["Category"]
encoder = LabelEncoder()
encoder.fit(y_train_set)
encoded_Y = encoder.transform(y_train_set)
dummy_y = np_utils.to_categorical(encoded_Y)
# validation set set-up
x_validate_set = validate_set[numeric_columns]
y_validate_set = validate_set["Category"]
encoder = LabelEncoder()
encoder.fit(y_validate_set)
encoded_Yv = encoder.transform(y_validate_set)
dummy_yv = np_utils.to_categorical(encoded_Yv)
#test set set-up
x_test_set = test_set[numeric_columns]
y_test_set = test_set["Category"]
y_class_names = train_set["Category"].unique()
encoder = LabelEncoder()
encoder.fit(y_test_set)
encoded_Ytt = encoder.transform(y_test_set)
dummy_ytt = np_utils.to_categorical(encoded_Ytt)
return x_train_set, dummy_y, x_validate_set, dummy_yv, x_test_set, y_test_set, y_class_names
with mlflow.start_run():
epoch = int(sys.argv[1]) if len(sys.argv) > 1 else 200
first_activation_funct = int(sys.argv[2]) if len(sys.argv) > 2 else "relu"
second_activation_funct = int(sys.argv[3]) if len(sys.argv) > 3 else "softmax"
x_train_set, dummy_y, x_validate_set, dummy_yv, x_test_set, y_test_set, y_class_names = data_prep()
number_of_classes = 33
number_of_features = 5
model = Sequential()
model.add(Dense(number_of_classes, activation=first_activation_funct))
model.add(Dense(number_of_classes, activation=second_activation_funct,input_dim=number_of_features))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy'])
model.fit(x_train_set, dummy_y, epochs=epoch, validation_data=(x_validate_set, dummy_yv))
model.save("my_model/")
#model predictions
yhat = model.predict(x_test_set)
y_true = []
y_pred = []
for numerator, single_pred in enumerate(yhat):
y_true.append(sorted(y_class_names)[np.argmax(single_pred)])
y_pred.append(y_test_set[numerator])
mlflow.log_param("epoch", epoch)
mlflow.log_param("1st_activation_funct", first_activation_funct)
mlflow.log_param("2nd_activation_funct", second_activation_funct)
mlflow.keras.log_model(model, 'my_model')
mlflow.keras.save_model(model, "my_model")
mlflow.log_metric("accuracy", accuracy_score(y_true, y_pred))