Add solution to lab08 MLFlow

This commit is contained in:
s430705 2021-05-12 09:37:42 +02:00
parent 11c1e224a4
commit 72c11b869f
3 changed files with 123 additions and 81 deletions

13
MLproject Normal file
View File

@ -0,0 +1,13 @@
name: Lab08-ium
docker_env: my_env.yaml
image: s430705/ium:3
entry_points:
main:
parameters:
train_size_param = 0.8
test_size_param = 0.2
epochs = 400
batch_size = 128
command: "python lab07_08_sacred_mfl.py -r {train_size_param} {test_size_param} {epochs} {batch_size}"

110
lab07_08_sacred_mfl.py Normal file
View File

@ -0,0 +1,110 @@
import sys
import mlflow
import pandas as pd
from sacred import Experiment
from sacred.observers import FileStorageObserver
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Sequential
'''
If you want to use it as sacred, you have to
uncomment lines, for now it's set up for MLFlow
'''
# ex = Experiment("file_observer", interactive=False, save_git_info=False)
# ex.observers.append(FileStorageObserver('lab07/my_runs'))
# @ex.config
# def my_config():
# train_size_param = 0.8
# test_size_param = 0.2
# epochs = 400
# batch_size = 128
# @ex.capture
def prepare_model(train_size_param, test_size_param, epochs, batch_size): # _run):
# _run.info["prepare_model_ts"] = str(datetime.now())
movies_data = pd.read_csv("train.csv", error_bad_lines=False)
movies_data.drop(movies_data.columns[0], axis=1, inplace=True)
movies_data.dropna(inplace=True)
X = movies_data.drop("rating", axis=1)
Y = movies_data["rating"]
print(X, Y.values)
# Split set to train/test 8:2 ratio
X_train, X_test, Y_train, Y_test = train_test_split(
X, Y, test_size=test_size_param, random_state=42
)
test_df = pd.read_csv("test.csv")
test_df.drop(test_df.columns[0], axis=1, inplace=True)
x_test = test_df.drop("rating", axis=1)
y_test = test_df["rating"]
# Set up model
model = Sequential()
model.add(Dense(8, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(3, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(1))
model.compile(optimizer="adam", loss="mse")
early_stop = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10)
model.fit(
x=X_train.values,
y=Y_train.values,
validation_data=(X_test, Y_test.values),
batch_size=batch_size,
epochs=epochs,
callbacks=[early_stop],
)
y_pred = model.predict(x_test.values)
rmse = mean_squared_error(y_test, y_pred)
# _run.info["Final Results: "] = rmse
model.save("model_movies")
return model, rmse
# @ex.automain
# def my_main(train_size_param, test_size_param, epochs, batch_size):
# print(prepare_model())
# r = ex.run()
# ex.add_artifact("model_movies/saved_model.pb")
train_size_param = float(sys.argv[1]) if len(sys.argv) > 1 else 0.8
test_size_param = float(sys.argv[2]) if len(sys.argv) > 1 else 0.2
epochs = int(sys.argv[3]) if len(sys.argv) > 1 else 400
batch_size = int(sys.argv[4]) if len(sys.argv) > 1 else 128
with mlflow.start_run():
mlflow.log_param("train size", train_size_param)
mlflow.log_param("test size", test_size_param)
mlflow.log_param("epochs", epochs)
mlflow.log_param("batch size", batch_size)
model, rmse = prepare_model(
train_size_param=train_size_param,
test_size_param=test_size_param,
epochs=epochs,
batch_size=batch_size,
)
mlflow.log_metric("RMSE", rmse)
mlflow.keras.log_model(model, "movies_imdb")

View File

@ -1,81 +0,0 @@
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.callbacks import EarlyStopping
from datetime import datetime
from sacred.observers import FileStorageObserver
from sacred import Experiment
ex = Experiment("file_observer", interactive=False, save_git_info=False)
ex.observers.append(FileStorageObserver('lab07/my_runs'))
@ex.config
def my_config():
train_size_param = 0.8
test_size_param = 0.2
epochs = 400
batch_size = 128
@ex.capture
def prepare_model(train_size_param, test_size_param, epochs, batch_size, _run):
_run.info["prepare_model_ts"] = str(datetime.now())
movies_data = pd.read_csv('train.csv', error_bad_lines=False)
movies_data.drop(movies_data.columns[0], axis=1, inplace=True)
movies_data.dropna(inplace=True)
X = movies_data.drop("rating", axis=1)
Y = movies_data["rating"]
print(X, Y.values)
# Split set to train/test 8:2 ratio
X_train, X_test, Y_train, Y_test = train_test_split(
X, Y, test_size=test_size_param, random_state=42
)
test_df = pd.read_csv('test.csv')
test_df.drop(test_df.columns[0], axis=1, inplace=True)
x_test = test_df.drop("rating", axis=1)
y_test = test_df["rating"]
# Set up model
model = Sequential()
model.add(Dense(8, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(3, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(1))
model.compile(optimizer="adam", loss="mse")
early_stop = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10)
model.fit(
x=X_train.values,
y=Y_train.values,
validation_data=(X_test, Y_test.values),
batch_size=batch_size,
epochs=epochs,
callbacks=[early_stop],
)
y_pred = model.predict(x_test.values)
rmse = mean_squared_error(y_test, y_pred)
_run.info["Final Results: "] = rmse
model.save('model_movies')
return rmse
@ex.automain
def my_main(train_size_param, test_size_param, epochs, batch_size):
print(prepare_model())
r = ex.run()
ex.add_artifact("model_movies/saved_model.pb")