diff --git a/MLproject b/MLproject new file mode 100644 index 0000000..a8e5fc6 --- /dev/null +++ b/MLproject @@ -0,0 +1,13 @@ +name: Lab08-ium + +docker_env: my_env.yaml + image: s430705/ium:3 + +entry_points: + main: + parameters: + train_size_param = 0.8 + test_size_param = 0.2 + epochs = 400 + batch_size = 128 + command: "python lab07_08_sacred_mfl.py -r {train_size_param} {test_size_param} {epochs} {batch_size}" diff --git a/lab07_08_sacred_mfl.py b/lab07_08_sacred_mfl.py new file mode 100644 index 0000000..2ed778a --- /dev/null +++ b/lab07_08_sacred_mfl.py @@ -0,0 +1,110 @@ +import sys +import mlflow +import pandas as pd + +from sacred import Experiment +from sacred.observers import FileStorageObserver +from sklearn.metrics import mean_squared_error +from sklearn.model_selection import train_test_split +from tensorflow.keras.callbacks import EarlyStopping +from tensorflow.keras.layers import Dense, Dropout +from tensorflow.keras.models import Sequential + +''' +If you want to use it as sacred, you have to +uncomment lines, for now it's set up for MLFlow +''' +# ex = Experiment("file_observer", interactive=False, save_git_info=False) +# ex.observers.append(FileStorageObserver('lab07/my_runs')) + + +# @ex.config +# def my_config(): +# train_size_param = 0.8 +# test_size_param = 0.2 +# epochs = 400 +# batch_size = 128 + + +# @ex.capture +def prepare_model(train_size_param, test_size_param, epochs, batch_size): # _run): + # _run.info["prepare_model_ts"] = str(datetime.now()) + movies_data = pd.read_csv("train.csv", error_bad_lines=False) + movies_data.drop(movies_data.columns[0], axis=1, inplace=True) + movies_data.dropna(inplace=True) + X = movies_data.drop("rating", axis=1) + Y = movies_data["rating"] + + print(X, Y.values) + # Split set to train/test 8:2 ratio + X_train, X_test, Y_train, Y_test = train_test_split( + X, Y, test_size=test_size_param, random_state=42 + ) + + test_df = pd.read_csv("test.csv") + test_df.drop(test_df.columns[0], axis=1, inplace=True) + x_test = test_df.drop("rating", axis=1) + y_test = test_df["rating"] + + # Set up model + model = Sequential() + model.add(Dense(8, activation="relu")) + model.add(Dropout(0.5)) + model.add(Dense(3, activation="relu")) + model.add(Dropout(0.5)) + model.add(Dense(1)) + model.compile(optimizer="adam", loss="mse") + + early_stop = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10) + + model.fit( + x=X_train.values, + y=Y_train.values, + validation_data=(X_test, Y_test.values), + batch_size=batch_size, + epochs=epochs, + callbacks=[early_stop], + ) + y_pred = model.predict(x_test.values) + + rmse = mean_squared_error(y_test, y_pred) + + # _run.info["Final Results: "] = rmse + + model.save("model_movies") + + return model, rmse + + +# @ex.automain +# def my_main(train_size_param, test_size_param, epochs, batch_size): +# print(prepare_model()) + + +# r = ex.run() +# ex.add_artifact("model_movies/saved_model.pb") + + +train_size_param = float(sys.argv[1]) if len(sys.argv) > 1 else 0.8 +test_size_param = float(sys.argv[2]) if len(sys.argv) > 1 else 0.2 +epochs = int(sys.argv[3]) if len(sys.argv) > 1 else 400 +batch_size = int(sys.argv[4]) if len(sys.argv) > 1 else 128 + + +with mlflow.start_run(): + + mlflow.log_param("train size", train_size_param) + mlflow.log_param("test size", test_size_param) + mlflow.log_param("epochs", epochs) + mlflow.log_param("batch size", batch_size) + + model, rmse = prepare_model( + train_size_param=train_size_param, + test_size_param=test_size_param, + epochs=epochs, + batch_size=batch_size, + ) + + mlflow.log_metric("RMSE", rmse) + + mlflow.keras.log_model(model, "movies_imdb") diff --git a/lab07_sacred.py b/lab07_sacred.py deleted file mode 100644 index e355d5d..0000000 --- a/lab07_sacred.py +++ /dev/null @@ -1,81 +0,0 @@ -import pandas as pd -from sklearn.metrics import mean_squared_error -from sklearn.model_selection import train_test_split - -from tensorflow.keras.models import Sequential -from tensorflow.keras.layers import Dense -from tensorflow.keras.layers import Dropout -from tensorflow.keras.callbacks import EarlyStopping -from datetime import datetime -from sacred.observers import FileStorageObserver -from sacred import Experiment - -ex = Experiment("file_observer", interactive=False, save_git_info=False) -ex.observers.append(FileStorageObserver('lab07/my_runs')) - - -@ex.config -def my_config(): - train_size_param = 0.8 - test_size_param = 0.2 - epochs = 400 - batch_size = 128 - - -@ex.capture -def prepare_model(train_size_param, test_size_param, epochs, batch_size, _run): - _run.info["prepare_model_ts"] = str(datetime.now()) - movies_data = pd.read_csv('train.csv', error_bad_lines=False) - movies_data.drop(movies_data.columns[0], axis=1, inplace=True) - movies_data.dropna(inplace=True) - X = movies_data.drop("rating", axis=1) - Y = movies_data["rating"] - - print(X, Y.values) - # Split set to train/test 8:2 ratio - X_train, X_test, Y_train, Y_test = train_test_split( - X, Y, test_size=test_size_param, random_state=42 - ) - - test_df = pd.read_csv('test.csv') - test_df.drop(test_df.columns[0], axis=1, inplace=True) - x_test = test_df.drop("rating", axis=1) - y_test = test_df["rating"] - - # Set up model - model = Sequential() - model.add(Dense(8, activation="relu")) - model.add(Dropout(0.5)) - model.add(Dense(3, activation="relu")) - model.add(Dropout(0.5)) - model.add(Dense(1)) - model.compile(optimizer="adam", loss="mse") - - early_stop = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10) - - model.fit( - x=X_train.values, - y=Y_train.values, - validation_data=(X_test, Y_test.values), - batch_size=batch_size, - epochs=epochs, - callbacks=[early_stop], - ) - y_pred = model.predict(x_test.values) - - rmse = mean_squared_error(y_test, y_pred) - - _run.info["Final Results: "] = rmse - - model.save('model_movies') - - return rmse - - -@ex.automain -def my_main(train_size_param, test_size_param, epochs, batch_size): - print(prepare_model()) - - -r = ex.run() -ex.add_artifact("model_movies/saved_model.pb")