mlflow first try
This commit is contained in:
parent
0106677f2e
commit
e84532b331
@ -17,6 +17,7 @@ RUN pip3 install keras
|
|||||||
RUN pip3 install sklearn
|
RUN pip3 install sklearn
|
||||||
RUN pip3 install pymongo
|
RUN pip3 install pymongo
|
||||||
RUN pip3 install sacred
|
RUN pip3 install sacred
|
||||||
|
RUN pip3 install mlflow
|
||||||
|
|
||||||
CMD python3 data_expl.py
|
CMD python3 data_expl.py
|
||||||
CMD python3 nn_train.py
|
CMD python3 nn_train.py
|
10
MLProject
Normal file
10
MLProject
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
name: s444517_train
|
||||||
|
docker_env:
|
||||||
|
image: kambobdocker420/ium:mlflow
|
||||||
|
entry_points:
|
||||||
|
main:
|
||||||
|
parameters:
|
||||||
|
epochs: {type: int, default: 200}
|
||||||
|
first_activation_funct: {type: str, default: "relu"}
|
||||||
|
second_activation_funct: {type: str, default: "softmax"}
|
||||||
|
command: "python nn_train_mlflow.py {epochs} {first_activation_funct} {second_activation_funct}"
|
88
nn_train_mlflow.py
Normal file
88
nn_train_mlflow.py
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from tensorflow.keras.models import Sequential
|
||||||
|
from tensorflow.keras.layers import Dense
|
||||||
|
from sklearn.preprocessing import LabelEncoder
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
from keras.utils import np_utils
|
||||||
|
from tensorflow import keras
|
||||||
|
import mlflow
|
||||||
|
import sys
|
||||||
|
|
||||||
|
mlflow.set_experiment("s444517")
|
||||||
|
|
||||||
|
# reading data
|
||||||
|
def read_data():
|
||||||
|
all_data = []
|
||||||
|
for name in ['train', 'test', 'validate']:
|
||||||
|
all_data.append(pd.read_csv(f'apps_{name}.csv', header=0))
|
||||||
|
return all_data
|
||||||
|
|
||||||
|
def data_prep():
|
||||||
|
train_set, test_set, validate_set = read_data()
|
||||||
|
train_set = train_set.drop(columns=["Unnamed: 0"])
|
||||||
|
test_set = test_set.drop(columns=["Unnamed: 0"])
|
||||||
|
validate_set = validate_set.drop(columns=["Unnamed: 0"])
|
||||||
|
numeric_columns = ["Rating", "Reviews", "Installs", "Price", "Genres_numeric_value"]
|
||||||
|
|
||||||
|
# train set set-up
|
||||||
|
x_train_set = train_set[numeric_columns]
|
||||||
|
y_train_set = train_set["Category"]
|
||||||
|
encoder = LabelEncoder()
|
||||||
|
encoder.fit(y_train_set)
|
||||||
|
encoded_Y = encoder.transform(y_train_set)
|
||||||
|
dummy_y = np_utils.to_categorical(encoded_Y)
|
||||||
|
|
||||||
|
# validation set set-up
|
||||||
|
x_validate_set = validate_set[numeric_columns]
|
||||||
|
y_validate_set = validate_set["Category"]
|
||||||
|
encoder = LabelEncoder()
|
||||||
|
encoder.fit(y_validate_set)
|
||||||
|
encoded_Yv = encoder.transform(y_validate_set)
|
||||||
|
dummy_yv = np_utils.to_categorical(encoded_Yv)
|
||||||
|
|
||||||
|
#test set set-up
|
||||||
|
x_test_set = test_set[numeric_columns]
|
||||||
|
y_test_set = test_set["Category"]
|
||||||
|
y_class_names = train_set["Category"].unique()
|
||||||
|
encoder = LabelEncoder()
|
||||||
|
encoder.fit(y_test_set)
|
||||||
|
encoded_Ytt = encoder.transform(y_test_set)
|
||||||
|
dummy_ytt = np_utils.to_categorical(encoded_Ytt)
|
||||||
|
return x_train_set, dummy_y, x_validate_set, dummy_yv, x_test_set, y_test_set, y_class_names
|
||||||
|
|
||||||
|
|
||||||
|
with mlflow.start_run():
|
||||||
|
epoch = int(sys.argv[1]) if len(sys.argv) > 1 else 200
|
||||||
|
first_activation_funct = int(sys.argv[2]) if len(sys.argv) > 2 else "relu"
|
||||||
|
second_activation_funct = int(sys.argv[3]) if len(sys.argv) > 3 else "softmax"
|
||||||
|
|
||||||
|
x_train_set, dummy_y, x_validate_set, dummy_yv, x_test_set, y_test_set, y_class_names = data_prep()
|
||||||
|
|
||||||
|
number_of_classes = 33
|
||||||
|
number_of_features = 5
|
||||||
|
model = Sequential()
|
||||||
|
model.add(Dense(number_of_classes, activation=first_activation_funct))
|
||||||
|
model.add(Dense(number_of_classes, activation=second_activation_funct,input_dim=number_of_features))
|
||||||
|
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'categorical_accuracy'])
|
||||||
|
model.fit(x_train_set, dummy_y, epochs=epoch, validation_data=(x_validate_set, dummy_yv))
|
||||||
|
|
||||||
|
model.save("my_model/")
|
||||||
|
|
||||||
|
|
||||||
|
#model predictions
|
||||||
|
yhat = model.predict(x_test_set)
|
||||||
|
y_true = []
|
||||||
|
y_pred = []
|
||||||
|
for numerator, single_pred in enumerate(yhat):
|
||||||
|
y_true.append(sorted(y_class_names)[np.argmax(single_pred)])
|
||||||
|
y_pred.append(y_test_set[numerator])
|
||||||
|
|
||||||
|
mlflow.log_param("epoch", epoch)
|
||||||
|
mlflow.log_param("1st_activation_funct", first_activation_funct)
|
||||||
|
mlflow.log_param("2nd_activation_funct", second_activation_funct)
|
||||||
|
mlflow.keras.log_model(model, 'my_model')
|
||||||
|
mlflow.keras.save_model(model, "my_model")
|
||||||
|
mlflow.log_metric("accuracy", accuracy_score(y_true, y_pred))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user