diff --git a/MLProject b/MLProject new file mode 100644 index 0000000..d57ee8c --- /dev/null +++ b/MLProject @@ -0,0 +1,11 @@ +name: 437622-mlflow + +docker_env: + image: jpogodzinski/ium:1 + +entry_points: + main: + parameters: + epochs: {type: int, default: 15} + batch_size: {type: int, default: 16} + command: "python3 zad8-mlflow.py {epochs} {batch_size}" \ No newline at end of file diff --git a/evaluation.png b/evaluation.png index 06e618f..2d9fdaf 100644 Binary files a/evaluation.png and b/evaluation.png differ diff --git a/evaluation.txt b/evaluation.txt index d156ae7..84f7024 100644 --- a/evaluation.txt +++ b/evaluation.txt @@ -1,3 +1,4 @@ 0.5406397482957525 0.5406397482957525 0.5406397482957525 +0.5406397482957525 diff --git a/model/saved_model.pb b/model/saved_model.pb index 3a7eaeb..9d9b1b3 100644 Binary files a/model/saved_model.pb and b/model/saved_model.pb differ diff --git a/model/variables/variables.data-00000-of-00001 b/model/variables/variables.data-00000-of-00001 index 334c0ff..cb62869 100644 Binary files a/model/variables/variables.data-00000-of-00001 and b/model/variables/variables.data-00000-of-00001 differ diff --git a/model/variables/variables.index b/model/variables/variables.index index d483ba2..a7d1009 100644 Binary files a/model/variables/variables.index and b/model/variables/variables.index differ diff --git a/zad5.py b/zad5.py index d565397..b681ed6 100644 --- a/zad5.py +++ b/zad5.py @@ -29,7 +29,7 @@ model.add(layers.Dense(5, activation="relu", name="layer4")) model.add(layers.Dense(1, activation="relu", name="output")) model.compile( - optimizer=keras.optimizers.RMSprop(), + optimizer=keras.optimizers.Adam(), loss=keras.losses.MeanSquaredError(), ) diff --git a/zad7-sacred-mongo.py b/zad7-sacred-mongo.py new file mode 100644 index 0000000..11bea7a --- /dev/null +++ b/zad7-sacred-mongo.py @@ -0,0 +1,75 @@ +from sacred import Experiment +from sacred.observers import MongoObserver +import pandas as pd +import numpy as np +from tensorflow import keras +from sklearn.metrics import accuracy_score +from tensorflow.keras import layers + + +ex = Experiment("437622-mongo", interactive=False, save_git_info=False) +#ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@172.17.0.1:27017', db_name='sacred')) + +@ex.config +def my_config(): + epochs = 15 + batch_size = 16 + +@ex.capture +def prepare_model(epochs, batch_size, _run): + model_name = "model" + train = pd.read_csv('train.csv', header=None, skiprows=1) + indexNames = train[train[1] == 2].index + train.drop(indexNames, inplace=True) + cols = [0, 2, 3] + X = train[cols].to_numpy() + y = train[1].to_numpy() + X = np.asarray(X).astype('float32') + + model = keras.Sequential(name="winner") + model.add(keras.Input(shape=(3), name="game_info")) + model.add(layers.Dense(4, activation="relu", name="layer1")) + model.add(layers.Dense(8, activation="relu", name="layer2")) + model.add(layers.Dense(8, activation="relu", name="layer3")) + model.add(layers.Dense(5, activation="relu", name="layer4")) + model.add(layers.Dense(1, activation="relu", name="output")) + + model.compile( + optimizer=keras.optimizers.RMSprop(), + loss=keras.losses.MeanSquaredError(), + ) + + history = model.fit( + X, + y, + batch_size=batch_size, + epochs=epochs, ) + + model.save(model_name) + + test = pd.read_csv('test.csv', header=None, skiprows=1) + cols = [0, 2, 3] + indexNames = test[test[1] == 2].index + test.drop(indexNames, inplace=True) + X_test = test[cols].to_numpy() + y_test = test[1].to_numpy() + X_test = np.asarray(X_test).astype('float32') + + predictions = model.predict(X_test) + pd.DataFrame(predictions).to_csv('results.csv', sep='\t', index=False, header=False) + + + acc = accuracy_score(y_test, predictions) + print('Accuracy: ', acc) + return acc + +@ex.automain +def my_main(epochs, batch_size): + print(prepare_model()) + +ex.run() +ex.add_artifact('model') + + + + diff --git a/zad8-mlflow.py b/zad8-mlflow.py new file mode 100644 index 0000000..34df7bb --- /dev/null +++ b/zad8-mlflow.py @@ -0,0 +1,64 @@ +import sys +import pandas as pd +import numpy as np +from tensorflow import keras +from sklearn.metrics import accuracy_score +from tensorflow.keras import layers +import mlflow + +def train(epochs, batch_size): + model_name = "model" + train = pd.read_csv('train.csv', header=None, skiprows=1) + indexNames = train[train[1] == 2].index + train.drop(indexNames, inplace=True) + cols = [0, 2, 3] + X = train[cols].to_numpy() + y = train[1].to_numpy() + X = np.asarray(X).astype('float32') + + model = keras.Sequential(name="winner") + model.add(keras.Input(shape=(3), name="game_info")) + model.add(layers.Dense(4, activation="relu", name="layer1")) + model.add(layers.Dense(8, activation="relu", name="layer2")) + model.add(layers.Dense(8, activation="relu", name="layer3")) + model.add(layers.Dense(5, activation="relu", name="layer4")) + model.add(layers.Dense(1, activation="relu", name="output")) + + model.compile( + optimizer=keras.optimizers.Adam(), + loss=keras.losses.MeanSquaredError(), + ) + + history = model.fit( + X, + y, + batch_size=batch_size, + epochs=epochs, ) + + model.save(model_name) + + test = pd.read_csv('test.csv', header=None, skiprows=1) + cols = [0, 2, 3] + indexNames = test[test[1] == 2].index + test.drop(indexNames, inplace=True) + X_test = test[cols].to_numpy() + y_test = test[1].to_numpy() + X_test = np.asarray(X_test).astype('float32') + + predictions = model.predict(X_test) + pd.DataFrame(predictions).to_csv('results.csv', sep='\t', index=False, header=False) + + acc = accuracy_score(y_test, predictions) + print('Accuracy: ', acc) + return acc, model + + +epochs = int(sys.argv[1]) if len(sys.argv) > 1 else 15 +batch_size = int(sys.argv[2]) if len(sys.argv) > 2 else 16 + +with mlflow.start_run(): + acc, model = train(epochs, batch_size) + mlflow.log_param("epochs", epochs) + mlflow.log_param("batch_size", batch_size) + mlflow.log_metric("accuracy", acc) + mlflow.keras.log_model(model, 'model') \ No newline at end of file