diff --git a/Jenkinsfile_train b/Jenkinsfile_train index b2c7664..5d0efe2 100644 --- a/Jenkinsfile_train +++ b/Jenkinsfile_train @@ -35,10 +35,10 @@ pipeline { sh 'chmod +x dlgssdpytorch.py' sh 'python3 ./dlgssdpytorch.py $PARAMETRY' sh 'chmod +x train_mlflow.py' - //sh 'chmod +x generate_MLmodel.py' - //sh 'python3 ./generate_MLmodel.py' sh 'python3 ./train_mlflow.py -e 5' //sh 'mlflow run --experiment-name s426206 .' //Uruchamiany projekt nie moze znajdowac sie w katalogach z wielkimi literami. + sh 'chmod +x generate_MLmodel.py' + sh 'python3 ./generate_MLmodel.py -e 5' } } } diff --git a/MLproject b/MLproject index 53c13e3..4a6ed6a 100644 --- a/MLproject +++ b/MLproject @@ -4,7 +4,6 @@ name: 426206mlflow docker_env: image: rokoch/ium:01 - volumes: ["/etc/passwd:/etc/passwd"] entry_points: main: diff --git a/generate_MLmodel.py b/generate_MLmodel.py index b327e7c..8678258 100644 --- a/generate_MLmodel.py +++ b/generate_MLmodel.py @@ -1,12 +1,13 @@ import torch import numpy as np + import torch.nn as nn import torch.optim as optim from torch.utils.data import Dataset, TensorDataset, DataLoader +import argparse import mlflow import mlflow.pytorch from urllib.parse import urlparse - from mlflow.models.signature import infer_signature class LayerLinearRegression(nn.Module): @@ -14,30 +15,130 @@ class LayerLinearRegression(nn.Module): super().__init__() # Instead of our custom parameters, we use a Linear layer with single input and single output self.linear = nn.Linear(1, 1) - + def forward(self, x): # Now it only takes a call to the layer to make predictions return self.linear(x) -checkpoint = torch.load('model.pt') +if __name__ == "__main__": -model = LayerLinearRegression() -#optimizer = optim.SGD(model.parameters(), lr=checkpoint['loss']) + parser = argparse.ArgumentParser(description='Program do uczenia modelu') + parser.add_argument('-l', '--lr', type=float, default=1e-3, help="Współczynik uczenia (lr)", required=False) + parser.add_argument('-e', '--epochs', type=int, default=100, help="Liczba epok", required=False) + args = parser.parse_args() -model.load_state_dict(checkpoint['model_state_dict']) + lr = args.lr + n_epochs = args.epochs + mlflow.set_experiment("s426206") + with mlflow.start_run(): + mlflow.log_param("lr", lr) + mlflow.log_param("epochs", n_epochs) -train_dataset = torch.load('train_dataset.pt') -x_train = np.array(train_dataset)[:,0] #(Sales Sum row) -input_example = np.reshape(x_train, (-1,1)) + train_dataset = torch.load('train_dataset.pt') + #val_dataset = torch.load('val_dataset.pt') -with torch.no_grad(): - model.eval() - siganture = infer_signature(x_train, model(torch.tensor(np.reshape(x_train, (-1,1))).float()).numpy()) + train_loader = DataLoader(dataset=train_dataset) + #val_loader = DataLoader(dataset=val_dataset) + + model = LayerLinearRegression() + # Checks model's parameters + #print(model.state_dict()) + + loss_fn = nn.MSELoss(reduction='mean') + optimizer = optim.SGD(model.parameters(), lr=lr) + + def make_train_step(model, loss_fn, optimizer): + # Builds function that performs a step in the train loop + def train_step(x, y): + # Sets model to TRAIN mode + model.train() + # Makes predictions + yhat = model(x) + # Computes loss + loss = loss_fn(y, yhat) + # Computes gradients + loss.backward() + # Updates parameters and zeroes gradients + optimizer.step() + optimizer.zero_grad() + # Returns the loss + return loss.item() + + # Returns the function that will be called inside the train loop + return train_step + + # Creates the train_step function for our model, loss function and optimizer + train_step = make_train_step(model, loss_fn, optimizer) + training_losses = [] + validation_losses = [] + #print(model.state_dict()) + # For each epoch... + for epoch in range(n_epochs): + + losses = [] + # Uses loader to fetch one mini-batch for training + for x_batch, y_batch in train_loader: + # NOW, sends the mini-batch data to the device + # so it matches location of the MODEL + # x_batch = x_batch.to(device) + # y_batch = y_batch.to(device) + # One stpe of training + loss = train_step(x_batch, y_batch) + losses.append(loss) + training_loss = np.mean(losses) + training_losses.append(training_loss) + + mlflow.log_metric("MSE", training_loss) + + # After finishing training steps for all mini-batches, + # it is time for evaluation! + # Ewaluacja jest już tutaj nie potrzebna bo odbywa sie w evaluation.py. Można jednak włączyć podgląd ewaluacji dla poszczególnych epok. + # # We tell PyTorch to NOT use autograd... + # # Do you remember why? + # with torch.no_grad(): + # val_losses = [] + # # Uses loader to fetch one mini-batch for validation + # for x_val, y_val in val_loader: + # # Again, sends data to same device as model + # # x_val = x_val.to(device) + # # y_val = y_val.to(device) + + # model.eval() + # # Makes predictions + # yhat = model(x_val) + # # Computes validation loss + # val_loss = loss_fn(y_val, yhat) + # val_losses.append(val_loss.item()) + # validation_loss = np.mean(val_losses) + # validation_losses.append(validation_loss) + + # print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t Validation loss: {validation_loss:.3f}") + print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t") + + torch.save({ + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': lr, + }, 'model.pt') + + x_train = np.array(train_dataset)[:,0] #(Sales Sum row) + input_example = np.reshape(x_train, (-1,1)) + + with torch.no_grad(): + model.eval() + siganture = infer_signature(x_train, model(torch.tensor(np.reshape(x_train, (-1,1))).float()).numpy()) + + #mlflow.set_experiment("s426206") mlflow.set_tracking_uri("http://172.17.0.1:5000") tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme # print(tracking_url_type_store) # Model registry does not work with file store + if tracking_url_type_store != "file": - mlflow.sklearn.log_model(model, "model", registered_model_name="s426206", signature=siganture, input_example=input_example) + mlflow.pytorch.log_model(model, "model", registered_model_name="s426206", signature=siganture, input_example=input_example) else: - mlflow.sklearn.log_model(model, "model", signature=siganture, input_example=input_example) \ No newline at end of file + mlflow.pytorch.log_model(model, "model", signature=siganture, input_example=input_example) + mlflow.pytorch.save_model(model, "my_model", signature=siganture, input_example=input_example) + + #export MLFLOW_CONDA_HOME=/home/jan/miniconda3/ + #mlflow models serve -m my_model/ diff --git a/train_mlflow.py b/train_mlflow.py index 3b7b4a2..651175e 100644 --- a/train_mlflow.py +++ b/train_mlflow.py @@ -138,4 +138,7 @@ if __name__ == "__main__": mlflow.pytorch.log_model(model, "model", registered_model_name="s426206", signature=siganture, input_example=input_example) else: mlflow.pytorch.log_model(model, "model", signature=siganture, input_example=input_example) - mlflow.pytorch.save_model(model, "my_model", signature=siganture, input_example=input_example) \ No newline at end of file + mlflow.pytorch.save_model(model, "my_model", signature=siganture, input_example=input_example) + + #export MLFLOW_CONDA_HOME=/home/jan/miniconda3/ + #mlflow models serve -m my_model/