From 2fca60f167c234a997719b8206112423c10ee2ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20=C5=81=C4=85czkowski?= Date: Wed, 24 Apr 2024 20:22:04 +0200 Subject: [PATCH] IUM_07 - wrap create_model.py with sacred experiment --- create_model.py | 171 +++++++++++++++++++++++++++++---------------- models/Jenkinsfile | 27 +++++-- requirements.txt | Bin 128 -> 144 bytes 3 files changed, 134 insertions(+), 64 deletions(-) diff --git a/create_model.py b/create_model.py index 869c992..d2e26df 100644 --- a/create_model.py +++ b/create_model.py @@ -4,90 +4,141 @@ import torch import torch.nn as nn import torch.optim as optim +import pathlib + import os import sys -from sklearn.metrics import classification_report +from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score from NeuralNetwork import NeuralNetwork -# Seed for reproducibility -torch.manual_seed(1234) +from sacred import Experiment +from sacred.observers import FileStorageObserver, MongoObserver -# Load data -train = pd.read_csv('./datasets/train.csv') -test = pd.read_csv('./datasets/test.csv') +# Create new sacred experiment +ex = Experiment("s464863") -# Split data -X_train = train.drop(columns=['id', 'diagnosis']).values -y_train = train['diagnosis'].values +# Setup observers +ex.observers.append(FileStorageObserver('my_runs')) +ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017', db_name='sacred')) -X_test = test.drop(columns=['id', 'diagnosis']).values -y_test = test['diagnosis'].values +@ex.config +def config(): + # Default parameters + hidden_size = 128 -# Convert data to PyTorch tensors -X_train = torch.FloatTensor(X_train) -y_train = torch.FloatTensor(y_train).view(-1, 1) + # Default learning parameters + learning_rate = 0.001 + weight_decay = 0.001 + num_epochs = 1000 -X_test = torch.FloatTensor(X_test) -y_test = torch.FloatTensor(y_test).view(-1, 1) + # Learning parameters from sys.argv + if len(sys.argv) > 1: + num_epochs = int(sys.argv[1]) + learning_rate = float(sys.argv[2]) + weight_decay = float(sys.argv[3]) -# Parameters -input_size = X_train.shape[1] -hidden_size = 128 +@ex.automain +def experiment(hidden_size, learning_rate, weight_decay, num_epochs, _run): + # Seed for reproducibility + torch.manual_seed(1234) -# Default parameters -learning_rate = 0.001 -weight_decay = 0.001 -num_epochs = 1000 + # Load data with sacred + train_data = ex.open_resource('./datasets/train.csv', "r") + test_data = ex.open_resource('./datasets/test.csv', "r") -# Parameters from sys.argv -if len(sys.argv) > 1: - num_epochs = int(sys.argv[1]) - learning_rate = float(sys.argv[2]) - weight_decay = float(sys.argv[3]) + # Convert to pandas dataframe + train = pd.read_csv(train_data) + test = pd.read_csv(test_data) -# Model initialization -model = NeuralNetwork(input_size, hidden_size) + # Split data + X_train = train.drop(columns=['id', 'diagnosis']).values + y_train = train['diagnosis'].values -# Loss function and optimizer -criterion = nn.BCELoss() -optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) + X_test = test.drop(columns=['id', 'diagnosis']).values + y_test = test['diagnosis'].values -# Training loop -model.train() + # Convert data to PyTorch tensors + X_train = torch.FloatTensor(X_train) + y_train = torch.FloatTensor(y_train).view(-1, 1) -for epoch in range(num_epochs): - # Zero the gradients - optimizer.zero_grad() + X_test = torch.FloatTensor(X_test) + y_test = torch.FloatTensor(y_test).view(-1, 1) - # Forward pass - outputs = model(X_train) + # Parameters + input_size = X_train.shape[1] - # Compute loss - loss = criterion(outputs, y_train) + # Model initialization + model = NeuralNetwork(input_size, hidden_size) - # Backward pass - loss.backward() + # Loss function and optimizer + criterion = nn.BCELoss() + optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) - # Update weights - optimizer.step() + # Training loop + model.train() - # Print loss - if (epoch + 1) % 100 == 0: - print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}') + for epoch in range(num_epochs): + # Zero the gradients + optimizer.zero_grad() -# Test the model -model.eval() + # Forward pass + outputs = model(X_train) -with torch.no_grad(): - y_pred = model(X_test) - y_pred = np.where(y_pred > 0.5, 1, 0) - print(classification_report(y_test, y_pred, target_names=['B', 'M'])) + # Compute loss + loss = criterion(outputs, y_train) -# If directory models does not exist, create it -if not os.path.exists('./models'): - os.makedirs('./models') + # Backward pass + loss.backward() -# Save the model -torch.save(model, './models/model.pth') \ No newline at end of file + # Update weights + optimizer.step() + + # Print loss + if (epoch + 1) % 100 == 0: + print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}') + + # Test the model + model.eval() + + with torch.no_grad(): + + # Make predictions + y_pred = model(X_test) + y_pred = np.where(y_pred > 0.5, 1, 0) + + # Calculate metrics + accuracy = accuracy_score(y_test, y_pred) + precision = precision_score(y_test, y_pred) + recall = recall_score(y_test, y_pred) + f1 = f1_score(y_test, y_pred) + + # Save metrics to sacred + _run.log_scalar("accuracy", accuracy) + _run.log_scalar("precision", precision) + _run.log_scalar("recall", recall) + _run.log_scalar("f1", f1) + + # If directory models does not exist, create it + if not os.path.exists('./models'): + os.makedirs('./models') + + # Save the model + torch.save(model, './models/model.pth') + + # Add artifact to sacred experiment + ex.add_artifact('./models/model.pth', content_type="application/x-pythorch") + + # Save id of the run + with open("experiment_id.txt", "w") as f: + f.write(str(_run._id)) + + # Save sources and resources paths + with open("sources.txt", "w") as f: + for source in _run.observers[1].run_entry["experiment"]["sources"]: + f.write(source[1] + "\n") + + with open("resources.txt", "w") as f: + for resource in _run.observers[1].run_entry["resources"]: + f.write(resource[1] + "\n") \ No newline at end of file diff --git a/models/Jenkinsfile b/models/Jenkinsfile index 67aca6d..deff1b2 100644 --- a/models/Jenkinsfile +++ b/models/Jenkinsfile @@ -48,10 +48,29 @@ pipeline { } steps { - sh "chmod +x ./create_model.py" - sh "python3 ./create_model.py ${params.epochs} ${params.learning_rate} ${params.weight_decay}" - archiveArtifacts artifacts: 'models/model.pth', onlyIfSuccessful: true - build job: 's464863-evaluation/main', wait: false + script { + sh "chmod +x ./create_model.py" + sh "python3 ./create_model.py ${params.epochs} ${params.learning_rate} ${params.weight_decay}" + + def experiment_id = readFile('experiment_id.txt').trim() + archiveArtifacts artifacts: "my_runs/${experiment_id}/*", onlyIfSuccessful: true + + archiveArtifacts artifacts: 'models/model.pth', onlyIfSuccessful: true + + def sources = readFile('sources.txt').split('\n') + + for (def source in sources) { + archiveArtifacts artifacts: "my_runs/${source}", onlyIfSuccessful: true + } + + def resources = readFile('resources.txt').split('\n') + + for (def resource in resources) { + archiveArtifacts artifacts: "${resource}", onlyIfSuccessful: true + } + + build job: 's464863-evaluation/main', wait: false + } } } } diff --git a/requirements.txt b/requirements.txt index c6d415ebf698fc4bfc0b4cf9115093d3ab164f77..6820f8e3e328e625553a5283e569e87d1bb8ad44 100644 GIT binary patch delta 23 ecmZo*oWMAtL4cQmi=mhyks+C(h#{3Bg#iFQ`vklI delta 6 NcmbQh*uXfU0RRUM0wVwb