IUM_07 - wrap create_model.py with sacred experiment

2024-04-24 20:22:04 +02:00 · 2024-04-24 20:22:04 +02:00 · 2fca60f167
commit 2fca60f167
parent 56cc31edd3
3 changed files with 134 additions and 64 deletions
--- a/create_model.py
+++ b/create_model.py
@ -4,19 +4,53 @@ import torch
 import torch.nn as nn
 import torch.optim as optim
 import pathlib
 import os
 import sys
-from sklearn.metrics import classification_report
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
 from NeuralNetwork import NeuralNetwork
 from sacred import Experiment
 from sacred.observers import FileStorageObserver, MongoObserver
 # Create new sacred experiment
 ex = Experiment("s464863")
 # Setup observers
 ex.observers.append(FileStorageObserver('my_runs'))
 ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017', db_name='sacred'))
@ex.config
 def config():
    # Default parameters
    hidden_size = 128
    # Default learning parameters
    learning_rate = 0.001
    weight_decay = 0.001
    num_epochs = 1000
    # Learning parameters from sys.argv
    if len(sys.argv) > 1:
        num_epochs = int(sys.argv[1])
        learning_rate = float(sys.argv[2])
        weight_decay = float(sys.argv[3])
@ex.automain
 def experiment(hidden_size, learning_rate, weight_decay, num_epochs, _run):
    # Seed for reproducibility
    torch.manual_seed(1234)
-# Load data
+    # Load data with sacred
-train = pd.read_csv('./datasets/train.csv')
+    train_data = ex.open_resource('./datasets/train.csv', "r")
-test = pd.read_csv('./datasets/test.csv')
+    test_data = ex.open_resource('./datasets/test.csv', "r")
    # Convert to pandas dataframe
    train = pd.read_csv(train_data)
    test = pd.read_csv(test_data)
    # Split data
    X_train = train.drop(columns=['id', 'diagnosis']).values
@ -34,18 +68,6 @@ y_test = torch.FloatTensor(y_test).view(-1, 1)
    # Parameters
    input_size = X_train.shape[1]
 hidden_size = 128
 # Default parameters
 learning_rate = 0.001
 weight_decay = 0.001
 num_epochs = 1000
 # Parameters from sys.argv
 if len(sys.argv) > 1:
    num_epochs = int(sys.argv[1])
    learning_rate = float(sys.argv[2])
    weight_decay = float(sys.argv[3])
    # Model initialization
    model = NeuralNetwork(input_size, hidden_size)
@ -81,9 +103,22 @@ for epoch in range(num_epochs):
    model.eval()
    with torch.no_grad():
        # Make predictions
        y_pred = model(X_test)
        y_pred = np.where(y_pred > 0.5, 1, 0)
-    print(classification_report(y_test, y_pred, target_names=['B', 'M']))
+
        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        # Save metrics to sacred
        _run.log_scalar("accuracy", accuracy)
        _run.log_scalar("precision", precision)
        _run.log_scalar("recall", recall)
        _run.log_scalar("f1", f1)
    # If directory models does not exist, create it
    if not os.path.exists('./models'):
@ -91,3 +126,19 @@ if not os.path.exists('./models'):
    # Save the model
    torch.save(model, './models/model.pth')
    # Add artifact to sacred experiment
    ex.add_artifact('./models/model.pth', content_type="application/x-pythorch")
    # Save id of the run
    with open("experiment_id.txt", "w") as f:
        f.write(str(_run._id))
    # Save sources and resources paths
    with open("sources.txt", "w") as f:
        for source in _run.observers[1].run_entry["experiment"]["sources"]:
            f.write(source[1] + "\n")
    with open("resources.txt", "w") as f:
        for resource in _run.observers[1].run_entry["resources"]:
            f.write(resource[1] + "\n")
--- a/models/Jenkinsfile
+++ b/models/Jenkinsfile
@ -48,11 +48,30 @@ pipeline {
            }
            steps {
                script {
                    sh "chmod +x ./create_model.py"
                    sh "python3 ./create_model.py ${params.epochs} ${params.learning_rate} ${params.weight_decay}"
                    def experiment_id = readFile('experiment_id.txt').trim()
                    archiveArtifacts artifacts: "my_runs/${experiment_id}/*", onlyIfSuccessful: true
                    archiveArtifacts artifacts: 'models/model.pth', onlyIfSuccessful: true
                    def sources = readFile('sources.txt').split('\n')
                    for (def source in sources) {
                        archiveArtifacts artifacts: "my_runs/${source}", onlyIfSuccessful: true
                    }
                    def resources = readFile('resources.txt').split('\n')
                    for (def resource in resources) {
                        archiveArtifacts artifacts: "${resource}", onlyIfSuccessful: true
                    }
                    build job: 's464863-evaluation/main', wait: false
                }
            }
        }
    }
 }
--- a/requirements.txt
+++ b/requirements.txt