This commit is contained in:
Jakub Pogodziński 2021-05-16 23:02:13 +02:00
parent b511261142
commit 6244a25e5d
9 changed files with 152 additions and 1 deletions

11
MLProject Normal file
View File

@ -0,0 +1,11 @@
name: 437622-mlflow
docker_env:
image: jpogodzinski/ium:1
entry_points:
main:
parameters:
epochs: {type: int, default: 15}
batch_size: {type: int, default: 16}
command: "python3 zad8-mlflow.py {epochs} {batch_size}"

Binary file not shown.

Before

Width:  |  Height:  |  Size: 12 KiB

After

Width:  |  Height:  |  Size: 11 KiB

View File

@ -1,3 +1,4 @@
0.5406397482957525
0.5406397482957525
0.5406397482957525
0.5406397482957525

Binary file not shown.

Binary file not shown.

View File

@ -29,7 +29,7 @@ model.add(layers.Dense(5, activation="relu", name="layer4"))
model.add(layers.Dense(1, activation="relu", name="output"))
model.compile(
optimizer=keras.optimizers.RMSprop(),
optimizer=keras.optimizers.Adam(),
loss=keras.losses.MeanSquaredError(),
)

75
zad7-sacred-mongo.py Normal file
View File

@ -0,0 +1,75 @@
from sacred import Experiment
from sacred.observers import MongoObserver
import pandas as pd
import numpy as np
from tensorflow import keras
from sklearn.metrics import accuracy_score
from tensorflow.keras import layers
ex = Experiment("437622-mongo", interactive=False, save_git_info=False)
#ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@172.17.0.1:27017', db_name='sacred'))
@ex.config
def my_config():
epochs = 15
batch_size = 16
@ex.capture
def prepare_model(epochs, batch_size, _run):
model_name = "model"
train = pd.read_csv('train.csv', header=None, skiprows=1)
indexNames = train[train[1] == 2].index
train.drop(indexNames, inplace=True)
cols = [0, 2, 3]
X = train[cols].to_numpy()
y = train[1].to_numpy()
X = np.asarray(X).astype('float32')
model = keras.Sequential(name="winner")
model.add(keras.Input(shape=(3), name="game_info"))
model.add(layers.Dense(4, activation="relu", name="layer1"))
model.add(layers.Dense(8, activation="relu", name="layer2"))
model.add(layers.Dense(8, activation="relu", name="layer3"))
model.add(layers.Dense(5, activation="relu", name="layer4"))
model.add(layers.Dense(1, activation="relu", name="output"))
model.compile(
optimizer=keras.optimizers.RMSprop(),
loss=keras.losses.MeanSquaredError(),
)
history = model.fit(
X,
y,
batch_size=batch_size,
epochs=epochs, )
model.save(model_name)
test = pd.read_csv('test.csv', header=None, skiprows=1)
cols = [0, 2, 3]
indexNames = test[test[1] == 2].index
test.drop(indexNames, inplace=True)
X_test = test[cols].to_numpy()
y_test = test[1].to_numpy()
X_test = np.asarray(X_test).astype('float32')
predictions = model.predict(X_test)
pd.DataFrame(predictions).to_csv('results.csv', sep='\t', index=False, header=False)
acc = accuracy_score(y_test, predictions)
print('Accuracy: ', acc)
return acc
@ex.automain
def my_main(epochs, batch_size):
print(prepare_model())
ex.run()
ex.add_artifact('model')

64
zad8-mlflow.py Normal file
View File

@ -0,0 +1,64 @@
import sys
import pandas as pd
import numpy as np
from tensorflow import keras
from sklearn.metrics import accuracy_score
from tensorflow.keras import layers
import mlflow
def train(epochs, batch_size):
model_name = "model"
train = pd.read_csv('train.csv', header=None, skiprows=1)
indexNames = train[train[1] == 2].index
train.drop(indexNames, inplace=True)
cols = [0, 2, 3]
X = train[cols].to_numpy()
y = train[1].to_numpy()
X = np.asarray(X).astype('float32')
model = keras.Sequential(name="winner")
model.add(keras.Input(shape=(3), name="game_info"))
model.add(layers.Dense(4, activation="relu", name="layer1"))
model.add(layers.Dense(8, activation="relu", name="layer2"))
model.add(layers.Dense(8, activation="relu", name="layer3"))
model.add(layers.Dense(5, activation="relu", name="layer4"))
model.add(layers.Dense(1, activation="relu", name="output"))
model.compile(
optimizer=keras.optimizers.Adam(),
loss=keras.losses.MeanSquaredError(),
)
history = model.fit(
X,
y,
batch_size=batch_size,
epochs=epochs, )
model.save(model_name)
test = pd.read_csv('test.csv', header=None, skiprows=1)
cols = [0, 2, 3]
indexNames = test[test[1] == 2].index
test.drop(indexNames, inplace=True)
X_test = test[cols].to_numpy()
y_test = test[1].to_numpy()
X_test = np.asarray(X_test).astype('float32')
predictions = model.predict(X_test)
pd.DataFrame(predictions).to_csv('results.csv', sep='\t', index=False, header=False)
acc = accuracy_score(y_test, predictions)
print('Accuracy: ', acc)
return acc, model
epochs = int(sys.argv[1]) if len(sys.argv) > 1 else 15
batch_size = int(sys.argv[2]) if len(sys.argv) > 2 else 16
with mlflow.start_run():
acc, model = train(epochs, batch_size)
mlflow.log_param("epochs", epochs)
mlflow.log_param("batch_size", batch_size)
mlflow.log_metric("accuracy", acc)
mlflow.keras.log_model(model, 'model')