2021-05-15 20:56:45 +02:00
|
|
|
import sys
|
|
|
|
import pandas as pd
|
2021-05-23 13:39:05 +02:00
|
|
|
import mlflow as mlf
|
2021-05-15 20:56:45 +02:00
|
|
|
from tensorflow import keras
|
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
from sklearn.metrics import mean_squared_error as rmse
|
|
|
|
|
|
|
|
|
|
|
|
def create_model(test_size, epochs, batch_size):
|
|
|
|
df = pd.read_csv('country_vaccinations.csv').dropna()
|
|
|
|
dataset = df.iloc[:, 3:-3]
|
|
|
|
dataset = df.groupby(by=["country"], dropna=True).sum()
|
|
|
|
X = dataset.loc[:,dataset.columns != "daily_vaccinations"]
|
|
|
|
y = dataset.loc[:,dataset.columns == "daily_vaccinations"]
|
|
|
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size, random_state = 6)
|
|
|
|
|
|
|
|
model = keras.Sequential([
|
|
|
|
keras.layers.Dense(512,input_dim = X_train.shape[1],kernel_initializer='normal', activation='relu'),
|
|
|
|
keras.layers.Dense(512,kernel_initializer='normal', activation='relu'),
|
|
|
|
keras.layers.Dense(256,kernel_initializer='normal', activation='relu'),
|
|
|
|
keras.layers.Dense(256,kernel_initializer='normal', activation='relu'),
|
|
|
|
keras.layers.Dense(128,kernel_initializer='normal', activation='relu'),
|
|
|
|
keras.layers.Dense(1,kernel_initializer='normal', activation='linear'),
|
|
|
|
])
|
|
|
|
|
|
|
|
model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_absolute_error'])
|
|
|
|
|
|
|
|
model.fit(X_train, y_train, epochs = epochs, validation_split = 0.3, batch_size = batch_size)
|
|
|
|
|
2021-05-23 13:39:05 +02:00
|
|
|
signature = mlf.models.signature.infer_signature(X_train.values, model.predict(X_train.values))
|
|
|
|
input_example = X_test.values[10]
|
2021-05-15 20:56:45 +02:00
|
|
|
prediction = model.predict(X_test)
|
|
|
|
rmse_result = rmse(y_test, prediction, squared = False)
|
|
|
|
model.save('vaccines_model')
|
2021-05-23 13:39:05 +02:00
|
|
|
return model, rmse_result, signature, input_example
|
2021-05-15 20:56:45 +02:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
test_size = float(sys.argv[1]) if len(sys.argv) > 1 else 0.2
|
|
|
|
epochs = int(sys.argv[2]) if len(sys.argv) > 1 else 100
|
|
|
|
batch_size = int(sys.argv[3]) if len(sys.argv) > 1 else 32
|
|
|
|
with mlf.start_run():
|
|
|
|
mlf.log_param("Test size", test_size)
|
|
|
|
mlf.log_param("Epochs", epochs)
|
|
|
|
mlf.log_param("Batch size", batch_size)
|
|
|
|
|
2021-05-23 13:39:05 +02:00
|
|
|
model, rmse_result, signature, input_example = create_model(
|
2021-05-15 20:56:45 +02:00
|
|
|
test_size=test_size,
|
|
|
|
epochs=epochs,
|
|
|
|
batch_size=batch_size,
|
|
|
|
)
|
|
|
|
|
|
|
|
mlf.log_metric("RMSE", rmse_result)
|
2021-05-23 13:39:05 +02:00
|
|
|
# mlf.keras.log_model(model, "country_vaccination")
|
|
|
|
mlf.keras.save_model(model, "country_vaccination", input_example=input_example, signature=signature)
|