ium_434804/mlflow_model.py

64 lines
2.9 KiB
Python
Raw Normal View History

2021-05-15 20:56:45 +02:00
import sys
import pandas as pd
2021-05-23 13:39:05 +02:00
import mlflow as mlf
2021-05-15 20:56:45 +02:00
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as rmse
2021-05-23 17:38:53 +02:00
from urllib.parse import urlparse
2021-05-15 20:56:45 +02:00
def create_model(test_size, epochs, batch_size):
df = pd.read_csv('country_vaccinations.csv').dropna()
dataset = df.iloc[:, 3:-3]
dataset = df.groupby(by=["country"], dropna=True).sum()
X = dataset.loc[:,dataset.columns != "daily_vaccinations"]
y = dataset.loc[:,dataset.columns == "daily_vaccinations"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size, random_state = 6)
model = keras.Sequential([
keras.layers.Dense(512,input_dim = X_train.shape[1],kernel_initializer='normal', activation='relu'),
keras.layers.Dense(512,kernel_initializer='normal', activation='relu'),
keras.layers.Dense(256,kernel_initializer='normal', activation='relu'),
keras.layers.Dense(256,kernel_initializer='normal', activation='relu'),
keras.layers.Dense(128,kernel_initializer='normal', activation='relu'),
keras.layers.Dense(1,kernel_initializer='normal', activation='linear'),
])
model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_absolute_error'])
model.fit(X_train, y_train, epochs = epochs, validation_split = 0.3, batch_size = batch_size)
2021-05-23 13:39:05 +02:00
signature = mlf.models.signature.infer_signature(X_train.values, model.predict(X_train.values))
input_example = X_test.values[10]
2021-05-15 20:56:45 +02:00
prediction = model.predict(X_test)
rmse_result = rmse(y_test, prediction, squared = False)
model.save('vaccines_model')
2021-05-23 13:39:05 +02:00
return model, rmse_result, signature, input_example
2021-05-15 20:56:45 +02:00
if __name__ == "__main__":
2021-05-23 17:38:53 +02:00
mlf.set_tracking_uri("http://172.17.0.1:5000")
2021-05-15 20:56:45 +02:00
test_size = float(sys.argv[1]) if len(sys.argv) > 1 else 0.2
epochs = int(sys.argv[2]) if len(sys.argv) > 1 else 100
batch_size = int(sys.argv[3]) if len(sys.argv) > 1 else 32
2021-05-23 17:38:53 +02:00
2021-05-15 20:56:45 +02:00
with mlf.start_run():
mlf.log_param("Test size", test_size)
mlf.log_param("Epochs", epochs)
mlf.log_param("Batch size", batch_size)
2021-05-23 13:39:05 +02:00
model, rmse_result, signature, input_example = create_model(
2021-05-15 20:56:45 +02:00
test_size=test_size,
epochs=epochs,
batch_size=batch_size,
)
mlf.log_metric("RMSE", rmse_result)
2021-05-23 13:39:05 +02:00
# mlf.keras.log_model(model, "country_vaccination")
2021-05-23 17:38:53 +02:00
mlf.set_experiment("s434804")
tracking_url_type_store = urlparse(mlf.get_tracking_uri()).scheme
if tracking_url_type_store != "file":
2021-05-23 18:00:57 +02:00
mlf.keras.log_model(model, "country_vaccination2", registered_model_name="s434804", signature=signature,
2021-05-23 17:38:53 +02:00
input_example=input_example)
else:
mlf.keras.log_model(model, "vaccines_model", signature=signature, input_example=input_example)
2021-05-23 18:00:57 +02:00
mlf.keras.save_model(model, "country_vaccination", signature=signature, input_example=input_example)