import numpy as np import pandas as pd import torch import torch.nn as nn import torch.optim as optim import mlflow import os import sys import inspect from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) parentdir = os.path.dirname(currentdir) sys.path.insert(0, parentdir) from NeuralNetwork import NeuralNetwork # MLflow tracking URI mlflow.set_tracking_uri("http://localhost:5000") # Create mlflow experiment if not exists experiment = mlflow.get_experiment_by_name("s464863") if experiment is None: mlflow.create_experiment("s464863") # Set active mlflow experiment mlflow.set_experiment("s464863") # MLflow experiment client = mlflow.tracking.MlflowClient() run = client.create_run(experiment_id=experiment.experiment_id) run = mlflow.start_run(run_id=run.info.run_id) # Seed for reproducibility torch.manual_seed(1234) # Get absolute path currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) parentdir = os.path.dirname(currentdir) train_path = os.path.join(parentdir, 'datasets/train.csv') test_path = os.path.join(parentdir, 'datasets/test.csv') # Load data train_data = pd.read_csv(train_path) test_data = pd.read_csv(test_path) # Split data X_train = train_data.drop(columns=['id', 'diagnosis']).values y_train = train_data['diagnosis'].values X_test = test_data.drop(columns=['id', 'diagnosis']).values y_test = test_data['diagnosis'].values # Convert data to PyTorch tensors X_train = torch.FloatTensor(X_train) y_train = torch.FloatTensor(y_train).view(-1, 1) X_test = torch.FloatTensor(X_test) y_test = torch.FloatTensor(y_test).view(-1, 1) # Parameters input_size = X_train.shape[1] hidden_size = 128 # Learning parameters learning_rate = float(sys.argv[1]) if len(sys.argv) > 1 else 0.001 weight_decay = float(sys.argv[2]) if len(sys.argv) > 2 else 0.001 num_epochs = int(sys.argv[3]) if len(sys.argv) > 3 else 1000 # Log parameters to mlflow mlflow.log_param("hidden_size", hidden_size) mlflow.log_param("learning_rate", learning_rate) mlflow.log_param("weight_decay", weight_decay) mlflow.log_param("num_epochs", num_epochs) # Model initialization model = NeuralNetwork(input_size, hidden_size) # Loss function and optimizer criterion = nn.BCELoss() optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) # Training loop model.train() for epoch in range(num_epochs): # Zero the gradients optimizer.zero_grad() # Forward pass outputs = model(X_train) # Compute loss loss = criterion(outputs, y_train) # Backward pass loss.backward() # Update weights optimizer.step() # Print loss if (epoch + 1) % 100 == 0: print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}') # Test the model model.eval() with torch.no_grad(): # Make predictions y_pred = model(X_test) y_pred = np.where(y_pred > 0.5, 1, 0) # Calculate metrics accuracy = accuracy_score(y_test, y_pred) precision = precision_score(y_test, y_pred) recall = recall_score(y_test, y_pred) f1 = f1_score(y_test, y_pred) # Log metrics to mlflow mlflow.log_metric("accuracy", accuracy) mlflow.log_metric("precision", precision) mlflow.log_metric("recall", recall) mlflow.log_metric("f1", f1) # If directory models does not exist, create it if not os.path.exists('./models'): os.makedirs('./models') # Save the model torch.save(model, './models/model.pth') # End mlflow run mlflow.end_run() # # MLflow experiment # with mlflow.start_run() as run: # # Seed for reproducibility # torch.manual_seed(1234) # # # Load data # train_data = pd.read_csv('../datasets/train.csv') # test_data = pd.read_csv('../datasets/test.csv') # # # Split data # X_train = train_data.drop(columns=['id', 'diagnosis']).values # y_train = train_data['diagnosis'].values # # X_test = test_data.drop(columns=['id', 'diagnosis']).values # y_test = test_data['diagnosis'].values # # # Convert data to PyTorch tensors # X_train = torch.FloatTensor(X_train) # y_train = torch.FloatTensor(y_train).view(-1, 1) # # X_test = torch.FloatTensor(X_test) # y_test = torch.FloatTensor(y_test).view(-1, 1) # # # Parameters # input_size = X_train.shape[1] # hidden_size = 128 # # # Learning parameters # learning_rate = float(sys.argv[1]) if len(sys.argv) > 1 else 0.001 # weight_decay = float(sys.argv[2]) if len(sys.argv) > 2 else 0.001 # num_epochs = int(sys.argv[3]) if len(sys.argv) > 3 else 1000 # # # Log parameters to mlflow # mlflow.log_param("hidden_size", hidden_size) # mlflow.log_param("learning_rate", learning_rate) # mlflow.log_param("weight_decay", weight_decay) # mlflow.log_param("num_epochs", num_epochs) # # # Model initialization # model = NeuralNetwork(input_size, hidden_size) # # # Loss function and optimizer # criterion = nn.BCELoss() # optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) # # # Training loop # model.train() # # for epoch in range(num_epochs): # # Zero the gradients # optimizer.zero_grad() # # # Forward pass # outputs = model(X_train) # # # Compute loss # loss = criterion(outputs, y_train) # # # Backward pass # loss.backward() # # # Update weights # optimizer.step() # # # Print loss # if (epoch + 1) % 100 == 0: # print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}') # # # Test the model # model.eval() # # with torch.no_grad(): # # # Make predictions # y_pred = model(X_test) # y_pred = np.where(y_pred > 0.5, 1, 0) # # # Calculate metrics # accuracy = accuracy_score(y_test, y_pred) # precision = precision_score(y_test, y_pred) # recall = recall_score(y_test, y_pred) # f1 = f1_score(y_test, y_pred) # # # Log metrics to mlflow # mlflow.log_metric("accuracy", accuracy) # mlflow.log_metric("precision", precision) # mlflow.log_metric("recall", recall) # mlflow.log_metric("f1", f1) # # # If directory models does not exist, create it # if not os.path.exists('./models'): # os.makedirs('./models') # # # Save the model # torch.save(model, './models/model.pth')