ium_s437622/zad8-mlflow.py

64 lines
2.0 KiB
Python
Raw Permalink Normal View History

2021-05-16 23:02:13 +02:00
import sys
import pandas as pd
import numpy as np
from tensorflow import keras
from sklearn.metrics import accuracy_score
from tensorflow.keras import layers
import mlflow
def train(epochs, batch_size):
model_name = "model"
train = pd.read_csv('train.csv', header=None, skiprows=1)
indexNames = train[train[1] == 2].index
train.drop(indexNames, inplace=True)
cols = [0, 2, 3]
X = train[cols].to_numpy()
y = train[1].to_numpy()
X = np.asarray(X).astype('float32')
model = keras.Sequential(name="winner")
model.add(keras.Input(shape=(3), name="game_info"))
model.add(layers.Dense(4, activation="relu", name="layer1"))
model.add(layers.Dense(8, activation="relu", name="layer2"))
model.add(layers.Dense(8, activation="relu", name="layer3"))
model.add(layers.Dense(5, activation="relu", name="layer4"))
model.add(layers.Dense(1, activation="relu", name="output"))
model.compile(
optimizer=keras.optimizers.Adam(),
loss=keras.losses.MeanSquaredError(),
)
history = model.fit(
X,
y,
batch_size=batch_size,
epochs=epochs, )
model.save(model_name)
test = pd.read_csv('test.csv', header=None, skiprows=1)
cols = [0, 2, 3]
indexNames = test[test[1] == 2].index
test.drop(indexNames, inplace=True)
X_test = test[cols].to_numpy()
y_test = test[1].to_numpy()
X_test = np.asarray(X_test).astype('float32')
predictions = model.predict(X_test)
pd.DataFrame(predictions).to_csv('results.csv', sep='\t', index=False, header=False)
acc = accuracy_score(y_test, predictions)
print('Accuracy: ', acc)
return acc, model
epochs = int(sys.argv[1]) if len(sys.argv) > 1 else 15
batch_size = int(sys.argv[2]) if len(sys.argv) > 2 else 16
with mlflow.start_run():
acc, model = train(epochs, batch_size)
mlflow.log_param("epochs", epochs)
mlflow.log_param("batch_size", batch_size)
mlflow.log_metric("accuracy", acc)
mlflow.keras.log_model(model, 'model')