From 8a900b0cbdc5d3325bd2d4b643288f243f4fac12 Mon Sep 17 00:00:00 2001 From: Filip Izydorczyk Date: Fri, 4 Jun 2021 15:58:29 +0200 Subject: [PATCH] mlflow test --- learning/Jenkinsfile | 2 + learning/ml-mlflow.py | 87 +++++++++++++++++++++++++++++++++++++++++++ learning/ml.py | 1 - requirements.txt | 3 +- 4 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 learning/ml-mlflow.py diff --git a/learning/Jenkinsfile b/learning/Jenkinsfile index 3a0856d..43b5ef5 100644 --- a/learning/Jenkinsfile +++ b/learning/Jenkinsfile @@ -21,6 +21,8 @@ pipeline { stage('learning') { steps { sh 'python ./learning/ml.py' + sh "export MLFLOW_TRACKING_URI=http://172.17.0.1:5000" + sh "python ./learning/ml-mlflow.py" } } stage('archiveArtifacts') { diff --git a/learning/ml-mlflow.py b/learning/ml-mlflow.py new file mode 100644 index 0000000..e880fca --- /dev/null +++ b/learning/ml-mlflow.py @@ -0,0 +1,87 @@ +import torch +import mlflow +from urllib.parse import urlparse +import torch.nn as nn +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import torch +import datetime +from torch.autograd import Variable + + +INPUT_DIM = 1 +OUTPUT_DIM = 1 +LEARNING_RATE = 0.01 +EPOCHS = 100 + + +mlflow.set_tracking_uri("http://172.17.0.1:5000") +dataset = pd.read_csv('datasets/train_set.csv') +testset = pd.read_csv('datasets/test_set.csv') + +x_values = [datetime.datetime.strptime( + item, "%Y-%m-%d").month for item in dataset['date'].values] +x_train = np.array(x_values, dtype=np.float32) +x_train = x_train.reshape(-1, 1) + +y_values = [min(dataset['result_1'].values[i]/dataset['result_2'].values[i], dataset['result_2'].values[i] / + dataset['result_1'].values[i]) for i in range(len(dataset['result_1'].values))] +y_train = np.array(y_values, dtype=np.float32) +y_train = y_train.reshape(-1, 1) + + +class LinearRegression(torch.nn.Module): + def __init__(self, inputSize, outputSize): + super(LinearRegression, self).__init__() + self.linear = torch.nn.Linear(inputSize, outputSize) + + def forward(self, x): + out = self.linear(x) + return out + + +model = LinearRegression(INPUT_DIM, OUTPUT_DIM) + + +criterion = torch.nn.MSELoss() +optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE) + +for epoch in range(EPOCHS): + inputs = Variable(torch.from_numpy(x_train)) + labels = Variable(torch.from_numpy(y_train)) + + optimizer.zero_grad() + + outputs = model(inputs) + + loss = criterion(outputs, labels) + print(loss) + loss.backward() + + optimizer.step() + + print('epoch {}, loss {}'.format(epoch, loss.item())) + +torch.save(model.state_dict(), 'model.pt') + +with mlflow.start_run(): + test_input = x_train[0] + + mlflow.log_param("train size", dataset.size) + mlflow.log_param("test size", testset.size) + mlflow.log_param("epochs", EPOCHS) + + signature = mlflow.models.signature.infer_signature( + x_train.values, model.predict(x_train.values)) + + mlflow.set_experiment("s434700") + tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme + if tracking_url_type_store != "file": + mlflow.keras.log_model(model, "model.pt", registered_model_name="s434700", signature=signature, + input_example=test_input) + else: + mlflow.keras.log_model(model, "model.pt", + signature=signature, input_example=test_input) + mlflow.keras.save_model( + model, "model.pt", signature=signature, input_example=test_input) diff --git a/learning/ml.py b/learning/ml.py index ecfaae4..5e17d69 100644 --- a/learning/ml.py +++ b/learning/ml.py @@ -3,7 +3,6 @@ import torch.nn as nn import numpy as np import pandas as pd import matplotlib.pyplot as plt -import seaborn as sns import torch import datetime from torch.autograd import Variable diff --git a/requirements.txt b/requirements.txt index cb07960..4a84621 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ numpy matplotlib seaborn sacred -pymongo \ No newline at end of file +pymongo +mlflow \ No newline at end of file