ium_434804/mlflow_model.py

55 lines
2.4 KiB
Python
Raw Normal View History

2021-05-15 20:56:45 +02:00
import sys
import pandas as pd
2021-05-23 13:39:05 +02:00
import mlflow as mlf
2021-05-15 20:56:45 +02:00
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as rmse
def create_model(test_size, epochs, batch_size):
df = pd.read_csv('country_vaccinations.csv').dropna()
dataset = df.iloc[:, 3:-3]
dataset = df.groupby(by=["country"], dropna=True).sum()
X = dataset.loc[:,dataset.columns != "daily_vaccinations"]
y = dataset.loc[:,dataset.columns == "daily_vaccinations"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size, random_state = 6)
model = keras.Sequential([
keras.layers.Dense(512,input_dim = X_train.shape[1],kernel_initializer='normal', activation='relu'),
keras.layers.Dense(512,kernel_initializer='normal', activation='relu'),
keras.layers.Dense(256,kernel_initializer='normal', activation='relu'),
keras.layers.Dense(256,kernel_initializer='normal', activation='relu'),
keras.layers.Dense(128,kernel_initializer='normal', activation='relu'),
keras.layers.Dense(1,kernel_initializer='normal', activation='linear'),
])
model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_absolute_error'])
model.fit(X_train, y_train, epochs = epochs, validation_split = 0.3, batch_size = batch_size)
2021-05-23 13:39:05 +02:00
signature = mlf.models.signature.infer_signature(X_train.values, model.predict(X_train.values))
input_example = X_test.values[10]
2021-05-15 20:56:45 +02:00
prediction = model.predict(X_test)
rmse_result = rmse(y_test, prediction, squared = False)
model.save('vaccines_model')
2021-05-23 13:39:05 +02:00
return model, rmse_result, signature, input_example
2021-05-15 20:56:45 +02:00
if __name__ == "__main__":
test_size = float(sys.argv[1]) if len(sys.argv) > 1 else 0.2
epochs = int(sys.argv[2]) if len(sys.argv) > 1 else 100
batch_size = int(sys.argv[3]) if len(sys.argv) > 1 else 32
with mlf.start_run():
mlf.log_param("Test size", test_size)
mlf.log_param("Epochs", epochs)
mlf.log_param("Batch size", batch_size)
2021-05-23 13:39:05 +02:00
model, rmse_result, signature, input_example = create_model(
2021-05-15 20:56:45 +02:00
test_size=test_size,
epochs=epochs,
batch_size=batch_size,
)
mlf.log_metric("RMSE", rmse_result)
2021-05-23 13:39:05 +02:00
# mlf.keras.log_model(model, "country_vaccination")
mlf.keras.save_model(model, "country_vaccination", input_example=input_example, signature=signature)