import numpy as np import pandas as pd import torch import torch.nn as nn import torch.optim as optim import pathlib import os import sys from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score from NeuralNetwork import NeuralNetwork from sacred import Experiment from sacred.observers import FileStorageObserver, MongoObserver # Create new sacred experiment ex = Experiment("s464863") # Setup observers ex.observers.append(FileStorageObserver('my_runs')) ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017', db_name='sacred')) @ex.config def config(): # Default parameters hidden_size = 128 # Default learning parameters learning_rate = 0.001 weight_decay = 0.001 num_epochs = 1000 # Learning parameters from sys.argv if len(sys.argv) > 1: num_epochs = int(sys.argv[1]) learning_rate = float(sys.argv[2]) weight_decay = float(sys.argv[3]) @ex.automain def experiment(hidden_size, learning_rate, weight_decay, num_epochs, _run): # Seed for reproducibility torch.manual_seed(1234) # Load data with sacred train_data = ex.open_resource('./datasets/train.csv', "r") test_data = ex.open_resource('./datasets/test.csv', "r") # Convert to pandas dataframe train = pd.read_csv(train_data) test = pd.read_csv(test_data) # Split data X_train = train.drop(columns=['id', 'diagnosis']).values y_train = train['diagnosis'].values X_test = test.drop(columns=['id', 'diagnosis']).values y_test = test['diagnosis'].values # Convert data to PyTorch tensors X_train = torch.FloatTensor(X_train) y_train = torch.FloatTensor(y_train).view(-1, 1) X_test = torch.FloatTensor(X_test) y_test = torch.FloatTensor(y_test).view(-1, 1) # Parameters input_size = X_train.shape[1] # Model initialization model = NeuralNetwork(input_size, hidden_size) # Loss function and optimizer criterion = nn.BCELoss() optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) # Training loop model.train() for epoch in range(num_epochs): # Zero the gradients optimizer.zero_grad() # Forward pass outputs = model(X_train) # Compute loss loss = criterion(outputs, y_train) # Backward pass loss.backward() # Update weights optimizer.step() # Print loss if (epoch + 1) % 100 == 0: print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}') # Test the model model.eval() with torch.no_grad(): # Make predictions y_pred = model(X_test) y_pred = np.where(y_pred > 0.5, 1, 0) # Calculate metrics accuracy = accuracy_score(y_test, y_pred) precision = precision_score(y_test, y_pred) recall = recall_score(y_test, y_pred) f1 = f1_score(y_test, y_pred) # Save metrics to sacred _run.log_scalar("accuracy", accuracy) _run.log_scalar("precision", precision) _run.log_scalar("recall", recall) _run.log_scalar("f1", f1) # If directory models does not exist, create it if not os.path.exists('./models'): os.makedirs('./models') # Save the model torch.save(model, './models/model.pth') # Add artifact to sacred experiment ex.add_artifact('./models/model.pth', content_type="application/x-pythorch") # Save id of the run with open("experiment_id.txt", "w") as f: f.write(str(_run._id)) # Save sources and resources paths with open("sources.txt", "w") as f: for source in _run.observers[1].run_entry["experiment"]["sources"]: f.write(source[1] + "\n") with open("resources.txt", "w") as f: for resource in _run.observers[1].run_entry["resources"]: f.write(resource[1] + "\n")