From b5bcb5229390917bbc7195f91765ecf9610b9d5b Mon Sep 17 00:00:00 2001 From: MatOgr Date: Wed, 11 May 2022 19:03:30 +0200 Subject: [PATCH] Sacred training --- jenkins/training.Jenkinsfile | 1 + scripts/sacred_train.py | 210 +++++++++++++++++++++++++++++++++++ 2 files changed, 211 insertions(+) create mode 100644 scripts/sacred_train.py diff --git a/jenkins/training.Jenkinsfile b/jenkins/training.Jenkinsfile index 72ce2b6..71f4556 100644 --- a/jenkins/training.Jenkinsfile +++ b/jenkins/training.Jenkinsfile @@ -37,6 +37,7 @@ pipeline { steps { archiveArtifacts artifacts: '*data/predictions.csv', onlyIfSuccessful: true archiveArtifacts artifacts: '*data/model_scripted*', onlyIfSuccessful: true + archiveArtifacts artifacts: '*data/training_runs/*', onlyIfSuccessful: true } } } diff --git a/scripts/sacred_train.py b/scripts/sacred_train.py new file mode 100644 index 0000000..d2c25b5 --- /dev/null +++ b/scripts/sacred_train.py @@ -0,0 +1,210 @@ +from audioop import rms +from cgi import test +from multiprocessing.spawn import prepare +from xml.etree.ElementPath import prepare_star +from sacred import Experiment +from sacred.observers import FileStorageObserver, MongoObserver + +import argparse +import pandas as pd +import numpy as np +from sklearn.metrics import mean_squared_error, mean_absolute_error + +import torch +from torch import nn +from torch.utils import data as t_u_data + + +ex = Experiment("478841 sacred_scopes", interactive=True, save_git_info=False) +ex.observers.append(MongoObserver( + url='mongodb://admin:IUM_2021@172.17.0.1:27017', db_name='sacred')) +ex.observers.append(FileStorageObserver('./data/training_runs')) + + +@ex.config +def my_config(): + parser = argparse.ArgumentParser(description="Script performing logistic regression model training", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + "-e", "--epochs", default=100, help="Number of epochs the model will be trained for") + parser.add_argument( + "-s", "--step", default=10, help="Number of steps to repeat logging loss values on") + parser.add_argument("--save", action="store_true", + help="Save trained model to file 'trained_model.h5'") + + args = vars(parser.parse_args()) + + epochs = int(args['epochs']) + save_model = args['save'] + log_step = int(args['step']) + + +# * Customized Dataset class (base provided by PyTorch) +class AvocadoDataset(t_u_data.Dataset): + def __init__(self, path: str, target: str = 'AveragePrice'): + data = pd.read_csv(path) + y = data[target].values.astype('float32') + self.y = y.reshape((len(y), 1)) + self.x_data = data.drop( + [target], axis=1).values.astype('float32') + self.x_shape = data.drop([target], axis=1).shape + # print("Data shape is: ", self.x_data.shape) + + def __len__(self): + return len(self.x_data) + + def __getitem__(self, idx): + return [self.x_data[idx], self.y[idx]] + + def get_shape(self): + return self.x_shape + + def get_splits(self, n_test=0.33): + test_size = round(n_test * len(self.x_data)) + train_size = len(self.x_data) - test_size + return t_u_data.random_split(self, [train_size, test_size]) + + +class AvocadoRegressor(nn.Module): + def __init__(self, input_dim): + super(AvocadoRegressor, self).__init__() + self.hidden1 = nn.Linear(input_dim, 32) + nn.init.xavier_uniform_(self.hidden1.weight) + self.act1 = nn.ReLU() + self.hidden2 = nn.Linear(32, 8) + nn.init.xavier_uniform_(self.hidden2.weight) + self.act2 = nn.ReLU() + self.hidden3 = nn.Linear(8, 1) + nn.init.xavier_uniform_(self.hidden3.weight) + + def forward(self, x): + x = self.hidden1(x) + x = self.act1(x) + x = self.hidden2(x) + x = self.act2(x) + x = self.hidden3(x) + return x + + +def prepare_data(paths): + train_dl = t_u_data.DataLoader(AvocadoDataset( + paths[0]), batch_size=32, shuffle=True) + validate_dl = t_u_data.DataLoader(AvocadoDataset( + paths[1]), batch_size=128, shuffle=True) + test_dl = t_u_data.DataLoader(AvocadoDataset( + paths[2]), batch_size=1, shuffle=False) + return train_dl, validate_dl, test_dl + + +@ex.capture +def train_model(train_dl, model, epochs, log_step, _run): + criterion = nn.MSELoss() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9) + to_compare = None + + for epoch in range(1, epochs+1): + for _, (inputs, targets) in enumerate(train_dl): + optimizer.zero_grad() + yhat = model(inputs) + # * For loss value inspection + to_compare = (yhat, targets) + loss = criterion(yhat, targets) + + loss.backward() + optimizer.step() + + if epoch == 1 or (epoch) % log_step == 0: + result, target = to_compare[0].detach( + ).numpy(), to_compare[1].detach().numpy() + mse = mean_squared_error(target, result) + mae = mean_absolute_error(target, result) + _run.log_scalar("training.RMSE", np.sqrt(mse), epoch) + _run.log_scalar("training.MAE", mae, epoch) + _run.log_scalar('training.MSE', mse, epoch) + + print( + f"Epoch {epoch}\t→\tMSE: {mse},\tRMSE: {np.sqrt(mse)},\tMAE: {mae}") + + +def evaluate_model(test_dl, model): + predictions, actuals = list(), list() + for _, (inputs, targets) in enumerate(test_dl): + yhat = model(inputs) + # * retrieve numpy array + yhat = yhat.detach().numpy() + actual = targets.numpy() + actual = actual.reshape((len(actual), 1)) + # * store predictions + predictions.append(yhat) + actuals.append(actual) + predictions, actuals = np.vstack(predictions), np.vstack(actuals) + # * return MSE value + mse = mean_squared_error(actuals, predictions) + rmse = mean_squared_error(actuals, predictions, squared=False) + mae = mean_absolute_error(actuals, predictions) + return mse, rmse, mae + + +def predict(row, model): + row = row[0].flatten() + yhat = model(row) + yhat = yhat.detach().numpy() + return yhat + + +@ex.main +def main(epochs, save_model, log_step, _run): + print( + f"Your model will be trained for {epochs} epochs. Trained model will {'not ' if save_model else ''}be saved.") + + # * Paths to data + avocado_data = ['./data/avocado.data.train', + './data/avocado.data.valid', + './data/avocado.data.test'] + + # * Data preparation + train_dl, validate_dl, test_dl = prepare_data(paths=avocado_data) + print(f""" + Train set size: {len(train_dl.dataset)}, + Validate set size: {len(validate_dl.dataset)} + Test set size: {len(test_dl.dataset)} + """) + + # * Model definition + # ! 66 - in case only regions and type are used (among all the categorical vals) + model = AvocadoRegressor(235) + + # * Train model + print("Let's start the training, mate!") + train_model(train_dl=train_dl, model=model, + epochs=epochs, log_step=log_step) + + # * Evaluate model + mse, rmse, mae = evaluate_model(validate_dl, model) + print( + f"\nEvaluation on validation set\t→\tMSE: {mse}, RMSE: {rmse}, MAE: {mae}") + + _run.log_scalar("validation.RMSE", rmse, epochs+1) + _run.log_scalar("validation.MAE", mae, epochs+1) + _run.log_scalar('validation.MSE', mse, epochs+1) + + # * Prediction + predictions = [(predict(row, model)[0], row[1].item()) for row in test_dl] + preds_df = pd.DataFrame(predictions, columns=["Prediction", "Target"]) + test_loss = evaluate_model(test_dl, model) + + print("\nNow predictions - hey ho, let's go!\n", preds_df.head(), + f"\nLoss values for test data: \t→\tMSE: {test_loss[0]}, RMSE: {test_loss[1]}, MAE: {test_loss[2]}") + print("\n...let's save them\ndum...\ndum...\ndum dum dum...\n\tDUM\n") + + preds_df.to_csv("./data/predictions.csv", index=False) + + # * Save the trained model + if save_model: + print("Your model has been saved - have a nice day!") + scripted_model = torch.jit.script(model) + scripted_model.save('./data/model_scripted.pt') + ex.add_artifact('./data/model_scripted.pt') + + +ex.run()