From b7556dedb02b58eb5e625e463aecb797c50d6328 Mon Sep 17 00:00:00 2001 From: s430705 Date: Wed, 12 May 2021 09:42:24 +0200 Subject: [PATCH] Split sacred and MLflow to separate files --- lab07_sacred.py | 80 ++++++++++++++++++++++++++ lab07_08_sacred_mfl.py => lab08_mfl.py | 34 +---------- 2 files changed, 81 insertions(+), 33 deletions(-) create mode 100644 lab07_sacred.py rename lab07_08_sacred_mfl.py => lab08_mfl.py (73%) diff --git a/lab07_sacred.py b/lab07_sacred.py new file mode 100644 index 0000000..8caffe7 --- /dev/null +++ b/lab07_sacred.py @@ -0,0 +1,80 @@ +from datetime import datetime + +import pandas as pd +from sacred import Experiment +from sacred.observers import FileStorageObserver +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split +from tensorflow.keras.callbacks import EarlyStopping +from tensorflow.keras.layers import Dense, Dropout +from tensorflow.keras.models import Sequential + +ex = Experiment("file_observer", interactive=False, save_git_info=False) +ex.observers.append(FileStorageObserver("lab07/my_runs")) + + +@ex.config +def my_config(): + train_size_param = 0.8 + test_size_param = 0.2 + epochs = 400 + batch_size = 128 + + +@ex.capture +def prepare_model(train_size_param, test_size_param, epochs, batch_size, _run): + _run.info["prepare_model_ts"] = str(datetime.now()) + movies_data = pd.read_csv("train.csv", error_bad_lines=False) + movies_data.drop(movies_data.columns[0], axis=1, inplace=True) + movies_data.dropna(inplace=True) + X = movies_data.drop("rating", axis=1) + Y = movies_data["rating"] + + print(X, Y.values) + # Split set to train/test 8:2 ratio + X_train, X_test, Y_train, Y_test = train_test_split( + X, Y, test_size=test_size_param, random_state=42 + ) + + test_df = pd.read_csv("test.csv") + test_df.drop(test_df.columns[0], axis=1, inplace=True) + x_test = test_df.drop("rating", axis=1) + y_test = test_df["rating"] + + # Set up model + model = Sequential() + model.add(Dense(8, activation="relu")) + model.add(Dropout(0.5)) + model.add(Dense(3, activation="relu")) + model.add(Dropout(0.5)) + model.add(Dense(1)) + model.compile(optimizer="adam", loss="mse") + + early_stop = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10) + + model.fit( + x=X_train.values, + y=Y_train.values, + validation_data=(X_test, Y_test.values), + batch_size=batch_size, + epochs=epochs, + callbacks=[early_stop], + ) + y_pred = model.predict(x_test.values) + + rmse = mean_squared_error(y_test, y_pred) + + _run.info["Final Results: "] = rmse + + model.save("model_movies") + + return rmse + + +@ex.automain +def my_main(train_size_param, test_size_param, epochs, batch_size): + print(prepare_model()) + + +r = ex.run() +ex.add_artifact("model_movies/saved_model.pb") diff --git a/lab07_08_sacred_mfl.py b/lab08_mfl.py similarity index 73% rename from lab07_08_sacred_mfl.py rename to lab08_mfl.py index 2ed778a..72ab479 100644 --- a/lab07_08_sacred_mfl.py +++ b/lab08_mfl.py @@ -2,41 +2,20 @@ import sys import mlflow import pandas as pd -from sacred import Experiment -from sacred.observers import FileStorageObserver from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split from tensorflow.keras.callbacks import EarlyStopping from tensorflow.keras.layers import Dense, Dropout from tensorflow.keras.models import Sequential -''' -If you want to use it as sacred, you have to -uncomment lines, for now it's set up for MLFlow -''' -# ex = Experiment("file_observer", interactive=False, save_git_info=False) -# ex.observers.append(FileStorageObserver('lab07/my_runs')) - -# @ex.config -# def my_config(): -# train_size_param = 0.8 -# test_size_param = 0.2 -# epochs = 400 -# batch_size = 128 - - -# @ex.capture -def prepare_model(train_size_param, test_size_param, epochs, batch_size): # _run): - # _run.info["prepare_model_ts"] = str(datetime.now()) +def prepare_model(train_size_param, test_size_param, epochs, batch_size): movies_data = pd.read_csv("train.csv", error_bad_lines=False) movies_data.drop(movies_data.columns[0], axis=1, inplace=True) movies_data.dropna(inplace=True) X = movies_data.drop("rating", axis=1) Y = movies_data["rating"] - print(X, Y.values) - # Split set to train/test 8:2 ratio X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=test_size_param, random_state=42 ) @@ -69,22 +48,11 @@ def prepare_model(train_size_param, test_size_param, epochs, batch_size): # _ru rmse = mean_squared_error(y_test, y_pred) - # _run.info["Final Results: "] = rmse - model.save("model_movies") return model, rmse -# @ex.automain -# def my_main(train_size_param, test_size_param, epochs, batch_size): -# print(prepare_model()) - - -# r = ex.run() -# ex.add_artifact("model_movies/saved_model.pb") - - train_size_param = float(sys.argv[1]) if len(sys.argv) > 1 else 0.8 test_size_param = float(sys.argv[2]) if len(sys.argv) > 1 else 0.2 epochs = int(sys.argv[3]) if len(sys.argv) > 1 else 400