IUM_07 - wrap create_model.py with sacred experiment

This commit is contained in:
Paweł Łączkowski 2024-04-24 20:22:04 +02:00
parent 56cc31edd3
commit 2fca60f167
3 changed files with 134 additions and 64 deletions

View File

@ -4,90 +4,141 @@ import torch
import torch.nn as nn import torch.nn as nn
import torch.optim as optim import torch.optim as optim
import pathlib
import os import os
import sys import sys
from sklearn.metrics import classification_report from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from NeuralNetwork import NeuralNetwork from NeuralNetwork import NeuralNetwork
# Seed for reproducibility from sacred import Experiment
torch.manual_seed(1234) from sacred.observers import FileStorageObserver, MongoObserver
# Load data # Create new sacred experiment
train = pd.read_csv('./datasets/train.csv') ex = Experiment("s464863")
test = pd.read_csv('./datasets/test.csv')
# Split data # Setup observers
X_train = train.drop(columns=['id', 'diagnosis']).values ex.observers.append(FileStorageObserver('my_runs'))
y_train = train['diagnosis'].values ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017', db_name='sacred'))
X_test = test.drop(columns=['id', 'diagnosis']).values @ex.config
y_test = test['diagnosis'].values def config():
# Default parameters
hidden_size = 128
# Convert data to PyTorch tensors # Default learning parameters
X_train = torch.FloatTensor(X_train) learning_rate = 0.001
y_train = torch.FloatTensor(y_train).view(-1, 1) weight_decay = 0.001
num_epochs = 1000
X_test = torch.FloatTensor(X_test) # Learning parameters from sys.argv
y_test = torch.FloatTensor(y_test).view(-1, 1) if len(sys.argv) > 1:
num_epochs = int(sys.argv[1])
learning_rate = float(sys.argv[2])
weight_decay = float(sys.argv[3])
# Parameters @ex.automain
input_size = X_train.shape[1] def experiment(hidden_size, learning_rate, weight_decay, num_epochs, _run):
hidden_size = 128 # Seed for reproducibility
torch.manual_seed(1234)
# Default parameters # Load data with sacred
learning_rate = 0.001 train_data = ex.open_resource('./datasets/train.csv', "r")
weight_decay = 0.001 test_data = ex.open_resource('./datasets/test.csv', "r")
num_epochs = 1000
# Parameters from sys.argv # Convert to pandas dataframe
if len(sys.argv) > 1: train = pd.read_csv(train_data)
num_epochs = int(sys.argv[1]) test = pd.read_csv(test_data)
learning_rate = float(sys.argv[2])
weight_decay = float(sys.argv[3])
# Model initialization # Split data
model = NeuralNetwork(input_size, hidden_size) X_train = train.drop(columns=['id', 'diagnosis']).values
y_train = train['diagnosis'].values
# Loss function and optimizer X_test = test.drop(columns=['id', 'diagnosis']).values
criterion = nn.BCELoss() y_test = test['diagnosis'].values
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
# Training loop # Convert data to PyTorch tensors
model.train() X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train).view(-1, 1)
for epoch in range(num_epochs): X_test = torch.FloatTensor(X_test)
# Zero the gradients y_test = torch.FloatTensor(y_test).view(-1, 1)
optimizer.zero_grad()
# Forward pass # Parameters
outputs = model(X_train) input_size = X_train.shape[1]
# Compute loss # Model initialization
loss = criterion(outputs, y_train) model = NeuralNetwork(input_size, hidden_size)
# Backward pass # Loss function and optimizer
loss.backward() criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
# Update weights # Training loop
optimizer.step() model.train()
# Print loss for epoch in range(num_epochs):
if (epoch + 1) % 100 == 0: # Zero the gradients
print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}') optimizer.zero_grad()
# Test the model # Forward pass
model.eval() outputs = model(X_train)
with torch.no_grad(): # Compute loss
y_pred = model(X_test) loss = criterion(outputs, y_train)
y_pred = np.where(y_pred > 0.5, 1, 0)
print(classification_report(y_test, y_pred, target_names=['B', 'M']))
# If directory models does not exist, create it # Backward pass
if not os.path.exists('./models'): loss.backward()
os.makedirs('./models')
# Save the model # Update weights
torch.save(model, './models/model.pth') optimizer.step()
# Print loss
if (epoch + 1) % 100 == 0:
print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')
# Test the model
model.eval()
with torch.no_grad():
# Make predictions
y_pred = model(X_test)
y_pred = np.where(y_pred > 0.5, 1, 0)
# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
# Save metrics to sacred
_run.log_scalar("accuracy", accuracy)
_run.log_scalar("precision", precision)
_run.log_scalar("recall", recall)
_run.log_scalar("f1", f1)
# If directory models does not exist, create it
if not os.path.exists('./models'):
os.makedirs('./models')
# Save the model
torch.save(model, './models/model.pth')
# Add artifact to sacred experiment
ex.add_artifact('./models/model.pth', content_type="application/x-pythorch")
# Save id of the run
with open("experiment_id.txt", "w") as f:
f.write(str(_run._id))
# Save sources and resources paths
with open("sources.txt", "w") as f:
for source in _run.observers[1].run_entry["experiment"]["sources"]:
f.write(source[1] + "\n")
with open("resources.txt", "w") as f:
for resource in _run.observers[1].run_entry["resources"]:
f.write(resource[1] + "\n")

27
models/Jenkinsfile vendored
View File

@ -48,10 +48,29 @@ pipeline {
} }
steps { steps {
sh "chmod +x ./create_model.py" script {
sh "python3 ./create_model.py ${params.epochs} ${params.learning_rate} ${params.weight_decay}" sh "chmod +x ./create_model.py"
archiveArtifacts artifacts: 'models/model.pth', onlyIfSuccessful: true sh "python3 ./create_model.py ${params.epochs} ${params.learning_rate} ${params.weight_decay}"
build job: 's464863-evaluation/main', wait: false
def experiment_id = readFile('experiment_id.txt').trim()
archiveArtifacts artifacts: "my_runs/${experiment_id}/*", onlyIfSuccessful: true
archiveArtifacts artifacts: 'models/model.pth', onlyIfSuccessful: true
def sources = readFile('sources.txt').split('\n')
for (def source in sources) {
archiveArtifacts artifacts: "my_runs/${source}", onlyIfSuccessful: true
}
def resources = readFile('resources.txt').split('\n')
for (def resource in resources) {
archiveArtifacts artifacts: "${resource}", onlyIfSuccessful: true
}
build job: 's464863-evaluation/main', wait: false
}
} }
} }
} }

Binary file not shown.