from sklearn.model_selection import train_test_split import torch import torch.nn as nn import pandas as pd import numpy as np import torch.nn.functional as F from torch.utils.data import DataLoader, TensorDataset, random_split from sklearn import preprocessing import sys from sacred import Experiment from sacred.observers import MongoObserver from sklearn.metrics import accuracy_score from sklearn.metrics import f1_score from sklearn.metrics import mean_squared_error np.set_printoptions(suppress=False) ex = Experiment("ium_s440058", interactive=False, save_git_info=False) ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@172.17.0.1:27017', db_name='sacred')) @ex.config def my_config(): num_epochs = 10 batch_size = 20 class LogisticRegressionModel(torch.nn.Module): def __init__(self, input_dim, output_dim): super(LogisticRegressionModel, self).__init__() self.linear = nn.Linear(input_dim, output_dim) self.sigmoid = nn.Sigmoid() def forward(self, x): out = self.linear(x) return self.sigmoid(out) @ex.capture def script(num_epochs, batch_size, _run): results = pd.read_csv('diabetes2.csv') results.dropna() data_train, data_valid, data_test = np.split(results.sample(frac=1), [int(.6*len(results)), int(.8*len(results))]) columns_to_train = ['Glucose', 'BloodPressure', 'Insulin', 'Age'] x_train = data_train[columns_to_train].astype(np.float32) y_train = data_train['Outcome'].astype(np.float32) x_test = data_test[columns_to_train].astype(np.float32) y_test = data_test['Outcome'].astype(np.float32) fTrain = torch.from_numpy(x_train.values) tTrain = torch.from_numpy(y_train.values.reshape(460,1)) fTest= torch.from_numpy(x_test.values) tTest = torch.from_numpy(y_test.values) _run.log_scalar("Batch", str(batch_size)) _run.log_scalar("epoch", str(num_epochs)) learning_rate = 0.005 input_dim = 4 output_dim = 1 model = LogisticRegressionModel(input_dim, output_dim) criterion = torch.nn.BCELoss(reduction='mean') optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate) for epoch in range(num_epochs): print ("Epoch - ",epoch) model.train() optimizer.zero_grad() # Forward pass y_pred = model(fTrain) # Compute Loss loss = criterion(y_pred, tTrain) print(loss.item()) # Backward pass loss.backward() optimizer.step() _run.log_scalar("Lost", str(loss.item())) torch.save(model.state_dict(), 'diabetes.pth') pred = model(fTest) accuracy = accuracy_score(tTest, np.argmax(pred.detach().numpy(), axis = 1)) f1 = f1_score(tTest, np.argmax(pred.detach().numpy(), axis = 1), average = None) rmse = mean_squared_error(tTest, pred.detach().numpy()) _run.log_scalar("accuracy", accuracy) _run.log_scalar("f1", f1) _run.log_scalar("rmse", rmse) @ex.automain def my_main(num_epochs, batch_size, _run): script() ex.run() ex.add_artifact('diabetes_model/diabetes.pth')