#!/usr/bin/env python # coding: utf-8 # In[ ]: import numpy as np import pandas as pd from sklearn.metrics import accuracy_score import torch from torch import nn, optim import torch.nn.functional as F import sys import mlflow from urllib.parse import urlparse # In[ ]: mlflow.set_experiment("s444421") # In[ ]: epochs = int(sys.argv[1]) # In[ ]: def prepare_data(): X_train = pd.read_csv('X_train.csv') y_train = pd.read_csv('y_train.csv') X_train = torch.from_numpy(np.array(X_train)).float() y_train = torch.squeeze(torch.from_numpy(y_train.values).float()) return X_train, y_train # In[ ]: class Net(nn.Module): def __init__(self, n_features): super(Net, self).__init__() self.fc1 = nn.Linear(n_features, 5) self.fc2 = nn.Linear(5, 3) self.fc3 = nn.Linear(3, 1) def forward(self, x): x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) return torch.sigmoid(self.fc3(x)) # In[ ]: def calculate_accuracy(y_true, y_pred): predicted = y_pred.ge(.5).view(-1) return (y_true == predicted).sum().float() / len(y_true) # In[ ]: def round_tensor(t, decimal_places=3): return round(t.item(), decimal_places) # In[ ]: def train_model(X_train, y_train, device, epochs): net = Net(X_train.shape[1]) criterion = nn.BCELoss() optimizer = optim.Adam(net.parameters(), lr=0.001) X_train = X_train.to(device) y_train = y_train.to(device) net = net.to(device) criterion = criterion.to(device) for epoch in range(epochs): y_pred = net(X_train) y_pred = torch.squeeze(y_pred) train_loss = criterion(y_pred, y_train) if epoch % 100 == 0: train_acc = calculate_accuracy(y_train, y_pred) print( f'''epoch {epoch} Train set - loss: {round_tensor(train_loss)}, accuracy: {round_tensor(train_acc)} ''') optimizer.zero_grad() train_loss.backward() optimizer.step() return net, round_tensor(train_loss) # In[ ]: def my_main(epochs): X_train, y_train = prepare_data() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model, loss = train_model(X_train, y_train, device, epochs) torch.save(model, 'model.pth') mlflow.log_param("epochs", epochs) mlflow.log_metric("loss", loss) X_test = pd.read_csv('X_test.csv') X_test = torch.from_numpy(np.array(X_test)).float() X_test = X_test.to(device) y_pred = model(X_test) y_pred = y_pred.ge(.5).view(-1).cpu() signature = mlflow.models.signature.infer_signature(X_train.numpy(), np.array(y_pred)) tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme if tracking_url_type_store != "file": mlflow.sklearn.log_model(model, "s444421", registered_model_name="s444421", signature=signature, input_example=X_test.numpy()[:5]) else: mlflow.sklearn.log_model(model, "s444421", signature=signature, input_example=X_test.numpy()[:5]) # In[ ]: with mlflow.start_run() as run: print("MLflow run experiment_id: {0}".format(run.info.experiment_id)) print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri)) my_main(epochs)