ium_464863/create_model.py

144 lines
3.9 KiB
Python
Raw Normal View History

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import pathlib
import os
import sys
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from NeuralNetwork import NeuralNetwork
from sacred import Experiment
from sacred.observers import FileStorageObserver, MongoObserver
# Create new sacred experiment
ex = Experiment("s464863")
# Setup observers
ex.observers.append(FileStorageObserver('my_runs'))
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017', db_name='sacred'))
@ex.config
def config():
# Default parameters
hidden_size = 128
# Default learning parameters
learning_rate = 0.001
weight_decay = 0.001
num_epochs = 1000
# Learning parameters from sys.argv
if len(sys.argv) > 1:
num_epochs = int(sys.argv[1])
learning_rate = float(sys.argv[2])
weight_decay = float(sys.argv[3])
@ex.automain
def experiment(hidden_size, learning_rate, weight_decay, num_epochs, _run):
# Seed for reproducibility
torch.manual_seed(1234)
# Load data with sacred
train_data = ex.open_resource('./datasets/train.csv', "r")
test_data = ex.open_resource('./datasets/test.csv', "r")
# Convert to pandas dataframe
train = pd.read_csv(train_data)
test = pd.read_csv(test_data)
# Split data
X_train = train.drop(columns=['id', 'diagnosis']).values
y_train = train['diagnosis'].values
X_test = test.drop(columns=['id', 'diagnosis']).values
y_test = test['diagnosis'].values
# Convert data to PyTorch tensors
X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train).view(-1, 1)
X_test = torch.FloatTensor(X_test)
y_test = torch.FloatTensor(y_test).view(-1, 1)
# Parameters
input_size = X_train.shape[1]
# Model initialization
model = NeuralNetwork(input_size, hidden_size)
# Loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
# Training loop
model.train()
for epoch in range(num_epochs):
# Zero the gradients
optimizer.zero_grad()
# Forward pass
outputs = model(X_train)
# Compute loss
loss = criterion(outputs, y_train)
# Backward pass
loss.backward()
# Update weights
optimizer.step()
# Print loss
if (epoch + 1) % 100 == 0:
print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')
# Test the model
model.eval()
with torch.no_grad():
# Make predictions
y_pred = model(X_test)
y_pred = np.where(y_pred > 0.5, 1, 0)
# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
# Save metrics to sacred
_run.log_scalar("accuracy", accuracy)
_run.log_scalar("precision", precision)
_run.log_scalar("recall", recall)
_run.log_scalar("f1", f1)
# If directory models does not exist, create it
if not os.path.exists('./models'):
os.makedirs('./models')
# Save the model
torch.save(model, './models/model.pth')
# Add artifact to sacred experiment
ex.add_artifact('./models/model.pth', content_type="application/x-pythorch")
# Save id of the run
with open("experiment_id.txt", "w") as f:
f.write(str(_run._id))
# Save sources and resources paths
with open("sources.txt", "w") as f:
for source in _run.observers[1].run_entry["experiment"]["sources"]:
f.write(source[1] + "\n")
with open("resources.txt", "w") as f:
for resource in _run.observers[1].run_entry["resources"]:
f.write(resource[1] + "\n")