diff --git a/generate_MLmodel.py b/generate_MLmodel.py new file mode 100644 index 0000000..6dffd23 --- /dev/null +++ b/generate_MLmodel.py @@ -0,0 +1,144 @@ +import torch +import numpy as np + +import torch.nn as nn +import torch.optim as optim +from torch.utils.data import Dataset, TensorDataset, DataLoader +import argparse +import mlflow +import mlflow.pytorch +from urllib.parse import urlparse + +from mlflow.models.signature import infer_signature + + +class LayerLinearRegression(nn.Module): + def __init__(self): + super().__init__() + # Instead of our custom parameters, we use a Linear layer with single input and single output + self.linear = nn.Linear(1, 1) + + def forward(self, x): + # Now it only takes a call to the layer to make predictions + return self.linear(x) + +parser = argparse.ArgumentParser(description='Program do uczenia modelu') +parser.add_argument('-l', '--lr', type=float, default=1e-3, help="Współczynik uczenia (lr)", required=False) +parser.add_argument('-e', '--epochs', type=int, default=100, help="Liczba epok", required=False) +args = parser.parse_args() + +if __name__ == "__main__": + lr = args.lr + n_epochs = args.epochs + with mlflow.start_run(run_name="s426206"): + mlflow.log_param("lr", lr) + mlflow.log_param("epochs", n_epochs) + + train_dataset = torch.load('train_dataset.pt') + #val_dataset = torch.load('val_dataset.pt') + + train_loader = DataLoader(dataset=train_dataset) + #val_loader = DataLoader(dataset=val_dataset) + + model = LayerLinearRegression() + # Checks model's parameters + #print(model.state_dict()) + + loss_fn = nn.MSELoss(reduction='mean') + optimizer = optim.SGD(model.parameters(), lr=lr) + some_x = 0 + def make_train_step(model, loss_fn, optimizer): + # Builds function that performs a step in the train loop + def train_step(x, y): + # Sets model to TRAIN mode + model.train() + # Makes predictions + yhat = model(x) + # Computes loss + loss = loss_fn(y, yhat) + # Computes gradients + loss.backward() + # Updates parameters and zeroes gradients + optimizer.step() + optimizer.zero_grad() + # Returns the loss + return loss.item() + + # Returns the function that will be called inside the train loop + return train_step + + # Creates the train_step function for our model, loss function and optimizer + train_step = make_train_step(model, loss_fn, optimizer) + training_losses = [] + validation_losses = [] + #print(model.state_dict()) + # For each epoch... + for epoch in range(n_epochs): + + losses = [] + # Uses loader to fetch one mini-batch for training + for x_batch, y_batch in train_loader: + # NOW, sends the mini-batch data to the device + # so it matches location of the MODEL + # x_batch = x_batch.to(device) + # y_batch = y_batch.to(device) + # One stpe of training + + loss = train_step(x_batch, y_batch) + losses.append(loss) + training_loss = np.mean(losses) + training_losses.append(training_loss) + + mlflow.log_metric("MSE", training_loss) + + # After finishing training steps for all mini-batches, + # it is time for evaluation! + # Ewaluacja jest już tutaj nie potrzebna bo odbywa sie w evaluation.py. Można jednak włączyć podgląd ewaluacji dla poszczególnych epok. + # # We tell PyTorch to NOT use autograd... + # # Do you remember why? + # with torch.no_grad(): + # val_losses = [] + # # Uses loader to fetch one mini-batch for validation + # for x_val, y_val in val_loader: + # # Again, sends data to same device as model + # # x_val = x_val.to(device) + # # y_val = y_val.to(device) + + # model.eval() + # # Makes predictions + # yhat = model(x_val) + # # Computes validation loss + # val_loss = loss_fn(y_val, yhat) + # val_losses.append(val_loss.item()) + # validation_loss = np.mean(val_losses) + # validation_losses.append(validation_loss) + + # print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t Validation loss: {validation_loss:.3f}") + print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t") + + torch.save({ + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': lr, + }, 'model.pt') + + mlflow.pytorch.log_state_dict(model.state_dict(), artifact_path="model") + + x_train = np.array(train_dataset)[:,0] #(Sales Sum row) + input_example = np.reshape(x_train, (-1,1)) + with torch.no_grad(): + #print(model(torch.tensor(np.reshape(x_train, (-1,1))).float())) + siganture = infer_signature(x_train, model(torch.tensor(np.reshape(x_train, (-1,1))).float()).numpy()) + #mlflow.set_tracking_uri("http://172.17.0.1:5000") + tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme + print(tracking_url_type_store) + # Model registry does not work with file store + if tracking_url_type_store != "file": + + # Register the model + # There are other ways to use the Model Registry, which depends on the use case, + # please refer to the doc for more information: + # https://mlflow.org/docs/latest/model-registry.html#api-workflow + mlflow.sklearn.log_model(model, "model", registered_model_name="s426206") + else: + mlflow.sklearn.log_model(model, "model", signature=siganture, input_example=input_example) \ No newline at end of file