Split sacred and MLflow to separate files

This commit is contained in:
s430705 2021-05-12 09:42:24 +02:00
parent 72c11b869f
commit b7556dedb0
2 changed files with 81 additions and 33 deletions

80
lab07_sacred.py Normal file
View File

@ -0,0 +1,80 @@
from datetime import datetime
import pandas as pd
from sacred import Experiment
from sacred.observers import FileStorageObserver
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Sequential
ex = Experiment("file_observer", interactive=False, save_git_info=False)
ex.observers.append(FileStorageObserver("lab07/my_runs"))
@ex.config
def my_config():
train_size_param = 0.8
test_size_param = 0.2
epochs = 400
batch_size = 128
@ex.capture
def prepare_model(train_size_param, test_size_param, epochs, batch_size, _run):
_run.info["prepare_model_ts"] = str(datetime.now())
movies_data = pd.read_csv("train.csv", error_bad_lines=False)
movies_data.drop(movies_data.columns[0], axis=1, inplace=True)
movies_data.dropna(inplace=True)
X = movies_data.drop("rating", axis=1)
Y = movies_data["rating"]
print(X, Y.values)
# Split set to train/test 8:2 ratio
X_train, X_test, Y_train, Y_test = train_test_split(
X, Y, test_size=test_size_param, random_state=42
)
test_df = pd.read_csv("test.csv")
test_df.drop(test_df.columns[0], axis=1, inplace=True)
x_test = test_df.drop("rating", axis=1)
y_test = test_df["rating"]
# Set up model
model = Sequential()
model.add(Dense(8, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(3, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(1))
model.compile(optimizer="adam", loss="mse")
early_stop = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10)
model.fit(
x=X_train.values,
y=Y_train.values,
validation_data=(X_test, Y_test.values),
batch_size=batch_size,
epochs=epochs,
callbacks=[early_stop],
)
y_pred = model.predict(x_test.values)
rmse = mean_squared_error(y_test, y_pred)
_run.info["Final Results: "] = rmse
model.save("model_movies")
return rmse
@ex.automain
def my_main(train_size_param, test_size_param, epochs, batch_size):
print(prepare_model())
r = ex.run()
ex.add_artifact("model_movies/saved_model.pb")

View File

@ -2,41 +2,20 @@ import sys
import mlflow import mlflow
import pandas as pd import pandas as pd
from sacred import Experiment
from sacred.observers import FileStorageObserver
from sklearn.metrics import mean_squared_error from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense, Dropout from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Sequential from tensorflow.keras.models import Sequential
'''
If you want to use it as sacred, you have to
uncomment lines, for now it's set up for MLFlow
'''
# ex = Experiment("file_observer", interactive=False, save_git_info=False)
# ex.observers.append(FileStorageObserver('lab07/my_runs'))
def prepare_model(train_size_param, test_size_param, epochs, batch_size):
# @ex.config
# def my_config():
# train_size_param = 0.8
# test_size_param = 0.2
# epochs = 400
# batch_size = 128
# @ex.capture
def prepare_model(train_size_param, test_size_param, epochs, batch_size): # _run):
# _run.info["prepare_model_ts"] = str(datetime.now())
movies_data = pd.read_csv("train.csv", error_bad_lines=False) movies_data = pd.read_csv("train.csv", error_bad_lines=False)
movies_data.drop(movies_data.columns[0], axis=1, inplace=True) movies_data.drop(movies_data.columns[0], axis=1, inplace=True)
movies_data.dropna(inplace=True) movies_data.dropna(inplace=True)
X = movies_data.drop("rating", axis=1) X = movies_data.drop("rating", axis=1)
Y = movies_data["rating"] Y = movies_data["rating"]
print(X, Y.values)
# Split set to train/test 8:2 ratio
X_train, X_test, Y_train, Y_test = train_test_split( X_train, X_test, Y_train, Y_test = train_test_split(
X, Y, test_size=test_size_param, random_state=42 X, Y, test_size=test_size_param, random_state=42
) )
@ -69,22 +48,11 @@ def prepare_model(train_size_param, test_size_param, epochs, batch_size): # _ru
rmse = mean_squared_error(y_test, y_pred) rmse = mean_squared_error(y_test, y_pred)
# _run.info["Final Results: "] = rmse
model.save("model_movies") model.save("model_movies")
return model, rmse return model, rmse
# @ex.automain
# def my_main(train_size_param, test_size_param, epochs, batch_size):
# print(prepare_model())
# r = ex.run()
# ex.add_artifact("model_movies/saved_model.pb")
train_size_param = float(sys.argv[1]) if len(sys.argv) > 1 else 0.8 train_size_param = float(sys.argv[1]) if len(sys.argv) > 1 else 0.8
test_size_param = float(sys.argv[2]) if len(sys.argv) > 1 else 0.2 test_size_param = float(sys.argv[2]) if len(sys.argv) > 1 else 0.2
epochs = int(sys.argv[3]) if len(sys.argv) > 1 else 400 epochs = int(sys.argv[3]) if len(sys.argv) > 1 else 400