import torch import sys import mlflow import torch.nn.functional as F from torch import nn from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, mean_squared_error import numpy as np import pandas as pd np.set_printoptions(suppress=False) class LogisticRegressionModel(nn.Module): def __init__(self, input_dim, output_dim): super(LogisticRegressionModel, self).__init__() self.linear = nn.Linear(input_dim, output_dim) self.sigmoid = nn.Sigmoid() def forward(self, x): out = self.linear(x) return self.sigmoid(out) with mlflow.start_run(): data_train = pd.read_csv("data_train.csv") data_test = pd.read_csv("data_test.csv") data_val = pd.read_csv("data_val.csv") FEATURES = ['age','hypertension','heart_disease','ever_married', 'avg_glucose_level', 'bmi'] x_train = data_train[FEATURES].astype(np.float32) y_train = data_train['stroke'].astype(np.float32) x_test = data_test[FEATURES].astype(np.float32) y_test = data_test['stroke'].astype(np.float32) fTrain = torch.from_numpy(x_train.values) tTrain = torch.from_numpy(y_train.values.reshape(2945,1)) fTest= torch.from_numpy(x_test.values) tTest = torch.from_numpy(y_test.values) batch_size = int(sys.argv[1]) if len(sys.argv) > 1 else 16 num_epochs = int(sys.argv[2]) if len(sys.argv) > 2 else 5 learning_rate = 0.001 input_dim = 6 output_dim = 1 model = LogisticRegressionModel(input_dim, output_dim) criterion = torch.nn.BCELoss(reduction='mean') optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate) for epoch in range(num_epochs): # print ("Epoch #",epoch) model.train() optimizer.zero_grad() # Forward pass y_pred = model(fTrain) # Compute Loss loss = criterion(y_pred, tTrain) # print(loss.item()) # Backward pass loss.backward() optimizer.step() y_pred = model(fTest) # print("predicted Y value: ", y_pred.data) torch.save(model.state_dict(), 'stroke.pth') rmse = mean_squared_error(tTest, y_pred.detach().numpy()) mlflow.log_metric("rmse", rmse) mlflow.log_param("Last loss", loss.item()) mlflow.log_param("epochs", num_epochs) mlflow.log_param("batch size", batch_size)