ium_464863/create_model.py

135 lines
3.7 KiB
Python

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import os
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from NeuralNetwork import NeuralNetwork
from sacred import Experiment
from sacred.observers import FileStorageObserver, MongoObserver
# Create new sacred experiment
ex = Experiment("s464863", save_git_info=False)
# Setup observers
ex.observers.append(FileStorageObserver('my_runs'))
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017', db_name='sacred'))
@ex.config
def config():
# Default parameters
hidden_size = 128
# Default learning parameters
learning_rate = 0.001
weight_decay = 0.001
num_epochs = 1000
@ex.automain
def experiment(hidden_size, learning_rate, weight_decay, num_epochs, _run):
# Seed for reproducibility
torch.manual_seed(1234)
# Load data with sacred
train_data = ex.open_resource('./datasets/train.csv', "r")
test_data = ex.open_resource('./datasets/test.csv', "r")
# Convert to pandas dataframe
train = pd.read_csv(train_data)
test = pd.read_csv(test_data)
# Split data
X_train = train.drop(columns=['id', 'diagnosis']).values
y_train = train['diagnosis'].values
X_test = test.drop(columns=['id', 'diagnosis']).values
y_test = test['diagnosis'].values
# Convert data to PyTorch tensors
X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train).view(-1, 1)
X_test = torch.FloatTensor(X_test)
y_test = torch.FloatTensor(y_test).view(-1, 1)
# Parameters
input_size = X_train.shape[1]
# Model initialization
model = NeuralNetwork(input_size, hidden_size)
# Loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
# Training loop
model.train()
for epoch in range(num_epochs):
# Zero the gradients
optimizer.zero_grad()
# Forward pass
outputs = model(X_train)
# Compute loss
loss = criterion(outputs, y_train)
# Backward pass
loss.backward()
# Update weights
optimizer.step()
# Print loss
if (epoch + 1) % 100 == 0:
print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')
# Test the model
model.eval()
with torch.no_grad():
# Make predictions
y_pred = model(X_test)
y_pred = np.where(y_pred > 0.5, 1, 0)
# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
# Save metrics to sacred
_run.log_scalar("accuracy", accuracy)
_run.log_scalar("precision", precision)
_run.log_scalar("recall", recall)
_run.log_scalar("f1", f1)
# If directory models does not exist, create it
if not os.path.exists('./models'):
os.makedirs('./models')
# Save the model
torch.save(model, './models/model.pth')
# Add artifact to sacred experiment
ex.add_artifact('./models/model.pth', content_type="application/x-pythorch")
# Save id of the run
with open("experiment_id.txt", "w") as f:
f.write(str(_run._id))
# Save sources and resources paths
with open("sources.txt", "w") as f:
for source in _run.observers[1].run_entry["experiment"]["sources"]:
f.write(source[1] + "\n")
with open("resources.txt", "w") as f:
for resource in _run.observers[1].run_entry["resources"]:
f.write(resource[1] + "\n")