IUM_07 - wrap create_model.py with sacred experiment

2024-04-24 20:22:04 +02:00 · 2024-04-24 20:22:04 +02:00 · 2fca60f167
commit 2fca60f167
parent 56cc31edd3
3 changed files with 134 additions and 64 deletions
--- a/create_model.py
+++ b/create_model.py
@ -4,90 +4,141 @@ import torch
 import torch.nn as nn
 import torch.optim as optim
 import pathlib
 import os
 import sys
-from sklearn.metrics import classification_report
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
 from NeuralNetwork import NeuralNetwork
-# Seed for reproducibility
+from sacred import Experiment
-torch.manual_seed(1234)
+from sacred.observers import FileStorageObserver, MongoObserver
-# Load data
+# Create new sacred experiment
-train = pd.read_csv('./datasets/train.csv')
+ex = Experiment("s464863")
 test = pd.read_csv('./datasets/test.csv')
-# Split data
+# Setup observers
-X_train = train.drop(columns=['id', 'diagnosis']).values
+ex.observers.append(FileStorageObserver('my_runs'))
-y_train = train['diagnosis'].values
+ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017', db_name='sacred'))
-X_test = test.drop(columns=['id', 'diagnosis']).values
+@ex.config
-y_test = test['diagnosis'].values
+def config():
    # Default parameters
    hidden_size = 128
-# Convert data to PyTorch tensors
+    # Default learning parameters
-X_train = torch.FloatTensor(X_train)
+    learning_rate = 0.001
-y_train = torch.FloatTensor(y_train).view(-1, 1)
+    weight_decay = 0.001
    num_epochs = 1000
-X_test = torch.FloatTensor(X_test)
+    # Learning parameters from sys.argv
-y_test = torch.FloatTensor(y_test).view(-1, 1)
+    if len(sys.argv) > 1:
        num_epochs = int(sys.argv[1])
        learning_rate = float(sys.argv[2])
        weight_decay = float(sys.argv[3])
-# Parameters
+@ex.automain
-input_size = X_train.shape[1]
+def experiment(hidden_size, learning_rate, weight_decay, num_epochs, _run):
-hidden_size = 128
+    # Seed for reproducibility
    torch.manual_seed(1234)
-# Default parameters
+    # Load data with sacred
-learning_rate = 0.001
+    train_data = ex.open_resource('./datasets/train.csv', "r")
-weight_decay = 0.001
+    test_data = ex.open_resource('./datasets/test.csv', "r")
 num_epochs = 1000
-# Parameters from sys.argv
+    # Convert to pandas dataframe
-if len(sys.argv) > 1:
+    train = pd.read_csv(train_data)
-    num_epochs = int(sys.argv[1])
+    test = pd.read_csv(test_data)
    learning_rate = float(sys.argv[2])
    weight_decay = float(sys.argv[3])
-# Model initialization
+    # Split data
-model = NeuralNetwork(input_size, hidden_size)
+    X_train = train.drop(columns=['id', 'diagnosis']).values
    y_train = train['diagnosis'].values
-# Loss function and optimizer
+    X_test = test.drop(columns=['id', 'diagnosis']).values
-criterion = nn.BCELoss()
+    y_test = test['diagnosis'].values
 optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
-# Training loop
+    # Convert data to PyTorch tensors
-model.train()
+    X_train = torch.FloatTensor(X_train)
    y_train = torch.FloatTensor(y_train).view(-1, 1)
-for epoch in range(num_epochs):
+    X_test = torch.FloatTensor(X_test)
-    # Zero the gradients
+    y_test = torch.FloatTensor(y_test).view(-1, 1)
    optimizer.zero_grad()
-    # Forward pass
+    # Parameters
-    outputs = model(X_train)
+    input_size = X_train.shape[1]
-    # Compute loss
+    # Model initialization
-    loss = criterion(outputs, y_train)
+    model = NeuralNetwork(input_size, hidden_size)
-    # Backward pass
+    # Loss function and optimizer
-    loss.backward()
+    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
-    # Update weights
+    # Training loop
-    optimizer.step()
+    model.train()
-    # Print loss
+    for epoch in range(num_epochs):
-    if (epoch + 1) % 100 == 0:
+        # Zero the gradients
-        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')
+        optimizer.zero_grad()
-# Test the model
+        # Forward pass
-model.eval()
+        outputs = model(X_train)
-with torch.no_grad():
+        # Compute loss
-    y_pred = model(X_test)
+        loss = criterion(outputs, y_train)
    y_pred = np.where(y_pred > 0.5, 1, 0)
    print(classification_report(y_test, y_pred, target_names=['B', 'M']))
-# If directory models does not exist, create it
+        # Backward pass
-if not os.path.exists('./models'):
+        loss.backward()
    os.makedirs('./models')
-# Save the model
+        # Update weights
-torch.save(model, './models/model.pth')
+        optimizer.step()
        # Print loss
        if (epoch + 1) % 100 == 0:
            print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')
    # Test the model
    model.eval()
    with torch.no_grad():
        # Make predictions
        y_pred = model(X_test)
        y_pred = np.where(y_pred > 0.5, 1, 0)
        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        # Save metrics to sacred
        _run.log_scalar("accuracy", accuracy)
        _run.log_scalar("precision", precision)
        _run.log_scalar("recall", recall)
        _run.log_scalar("f1", f1)
    # If directory models does not exist, create it
    if not os.path.exists('./models'):
        os.makedirs('./models')
    # Save the model
    torch.save(model, './models/model.pth')
    # Add artifact to sacred experiment
    ex.add_artifact('./models/model.pth', content_type="application/x-pythorch")
    # Save id of the run
    with open("experiment_id.txt", "w") as f:
        f.write(str(_run._id))
    # Save sources and resources paths
    with open("sources.txt", "w") as f:
        for source in _run.observers[1].run_entry["experiment"]["sources"]:
            f.write(source[1] + "\n")
    with open("resources.txt", "w") as f:
        for resource in _run.observers[1].run_entry["resources"]:
            f.write(resource[1] + "\n")
--- a/models/Jenkinsfile
+++ b/models/Jenkinsfile
@ -48,10 +48,29 @@ pipeline {
            }
            steps {
-                sh "chmod +x ./create_model.py"
+                script {
-                sh "python3 ./create_model.py ${params.epochs} ${params.learning_rate} ${params.weight_decay}"
+                    sh "chmod +x ./create_model.py"
-                archiveArtifacts artifacts: 'models/model.pth', onlyIfSuccessful: true
+                    sh "python3 ./create_model.py ${params.epochs} ${params.learning_rate} ${params.weight_decay}"
-                build job: 's464863-evaluation/main', wait: false
+
                    def experiment_id = readFile('experiment_id.txt').trim()
                    archiveArtifacts artifacts: "my_runs/${experiment_id}/*", onlyIfSuccessful: true
                    archiveArtifacts artifacts: 'models/model.pth', onlyIfSuccessful: true
                    def sources = readFile('sources.txt').split('\n')
                    for (def source in sources) {
                        archiveArtifacts artifacts: "my_runs/${source}", onlyIfSuccessful: true
                    }
                    def resources = readFile('resources.txt').split('\n')
                    for (def resource in resources) {
                        archiveArtifacts artifacts: "${resource}", onlyIfSuccessful: true
                    }
                    build job: 's464863-evaluation/main', wait: false
                }
            }
        }
    }
--- a/requirements.txt
+++ b/requirements.txt