From 2fca60f167c234a997719b8206112423c10ee2ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20=C5=81=C4=85czkowski?= <broketmpl@gmail.com>
Date: Wed, 24 Apr 2024 20:22:04 +0200
Subject: [PATCH] IUM_07 - wrap create_model.py with sacred experiment

---
 create_model.py    | 171 +++++++++++++++++++++++++++++----------------
 models/Jenkinsfile |  27 +++++--
 requirements.txt   | Bin 128 -> 144 bytes
 3 files changed, 134 insertions(+), 64 deletions(-)

diff --git a/create_model.py b/create_model.py
index 869c992..d2e26df 100644
--- a/create_model.py
+++ b/create_model.py
@@ -4,90 +4,141 @@ import torch
 import torch.nn as nn
 import torch.optim as optim
 
+import pathlib
+
 import os
 import sys
 
-from sklearn.metrics import classification_report
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
 
 from NeuralNetwork import NeuralNetwork
 
-# Seed for reproducibility
-torch.manual_seed(1234)
+from sacred import Experiment
+from sacred.observers import FileStorageObserver, MongoObserver
 
-# Load data
-train = pd.read_csv('./datasets/train.csv')
-test = pd.read_csv('./datasets/test.csv')
+# Create new sacred experiment
+ex = Experiment("s464863")
 
-# Split data
-X_train = train.drop(columns=['id', 'diagnosis']).values
-y_train = train['diagnosis'].values
+# Setup observers
+ex.observers.append(FileStorageObserver('my_runs'))
+ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017', db_name='sacred'))
 
-X_test = test.drop(columns=['id', 'diagnosis']).values
-y_test = test['diagnosis'].values
+@ex.config
+def config():
+    # Default parameters
+    hidden_size = 128
 
-# Convert data to PyTorch tensors
-X_train = torch.FloatTensor(X_train)
-y_train = torch.FloatTensor(y_train).view(-1, 1)
+    # Default learning parameters
+    learning_rate = 0.001
+    weight_decay = 0.001
+    num_epochs = 1000
 
-X_test = torch.FloatTensor(X_test)
-y_test = torch.FloatTensor(y_test).view(-1, 1)
+    # Learning parameters from sys.argv
+    if len(sys.argv) > 1:
+        num_epochs = int(sys.argv[1])
+        learning_rate = float(sys.argv[2])
+        weight_decay = float(sys.argv[3])
 
-# Parameters
-input_size = X_train.shape[1]
-hidden_size = 128
+@ex.automain
+def experiment(hidden_size, learning_rate, weight_decay, num_epochs, _run):
+    # Seed for reproducibility
+    torch.manual_seed(1234)
 
-# Default parameters
-learning_rate = 0.001
-weight_decay = 0.001
-num_epochs = 1000
+    # Load data with sacred
+    train_data = ex.open_resource('./datasets/train.csv', "r")
+    test_data = ex.open_resource('./datasets/test.csv', "r")
 
-# Parameters from sys.argv
-if len(sys.argv) > 1:
-    num_epochs = int(sys.argv[1])
-    learning_rate = float(sys.argv[2])
-    weight_decay = float(sys.argv[3])
+    # Convert to pandas dataframe
+    train = pd.read_csv(train_data)
+    test = pd.read_csv(test_data)
 
-# Model initialization
-model = NeuralNetwork(input_size, hidden_size)
+    # Split data
+    X_train = train.drop(columns=['id', 'diagnosis']).values
+    y_train = train['diagnosis'].values
 
-# Loss function and optimizer
-criterion = nn.BCELoss()
-optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
+    X_test = test.drop(columns=['id', 'diagnosis']).values
+    y_test = test['diagnosis'].values
 
-# Training loop
-model.train()
+    # Convert data to PyTorch tensors
+    X_train = torch.FloatTensor(X_train)
+    y_train = torch.FloatTensor(y_train).view(-1, 1)
 
-for epoch in range(num_epochs):
-    # Zero the gradients
-    optimizer.zero_grad()
+    X_test = torch.FloatTensor(X_test)
+    y_test = torch.FloatTensor(y_test).view(-1, 1)
 
-    # Forward pass
-    outputs = model(X_train)
+    # Parameters
+    input_size = X_train.shape[1]
 
-    # Compute loss
-    loss = criterion(outputs, y_train)
+    # Model initialization
+    model = NeuralNetwork(input_size, hidden_size)
 
-    # Backward pass
-    loss.backward()
+    # Loss function and optimizer
+    criterion = nn.BCELoss()
+    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
 
-    # Update weights
-    optimizer.step()
+    # Training loop
+    model.train()
 
-    # Print loss
-    if (epoch + 1) % 100 == 0:
-        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')
+    for epoch in range(num_epochs):
+        # Zero the gradients
+        optimizer.zero_grad()
 
-# Test the model
-model.eval()
+        # Forward pass
+        outputs = model(X_train)
 
-with torch.no_grad():
-    y_pred = model(X_test)
-    y_pred = np.where(y_pred > 0.5, 1, 0)
-    print(classification_report(y_test, y_pred, target_names=['B', 'M']))
+        # Compute loss
+        loss = criterion(outputs, y_train)
 
-# If directory models does not exist, create it
-if not os.path.exists('./models'):
-    os.makedirs('./models')
+        # Backward pass
+        loss.backward()
 
-# Save the model
-torch.save(model, './models/model.pth')
\ No newline at end of file
+        # Update weights
+        optimizer.step()
+
+        # Print loss
+        if (epoch + 1) % 100 == 0:
+            print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')
+
+    # Test the model
+    model.eval()
+
+    with torch.no_grad():
+
+        # Make predictions
+        y_pred = model(X_test)
+        y_pred = np.where(y_pred > 0.5, 1, 0)
+
+        # Calculate metrics
+        accuracy = accuracy_score(y_test, y_pred)
+        precision = precision_score(y_test, y_pred)
+        recall = recall_score(y_test, y_pred)
+        f1 = f1_score(y_test, y_pred)
+
+        # Save metrics to sacred
+        _run.log_scalar("accuracy", accuracy)
+        _run.log_scalar("precision", precision)
+        _run.log_scalar("recall", recall)
+        _run.log_scalar("f1", f1)
+
+    # If directory models does not exist, create it
+    if not os.path.exists('./models'):
+        os.makedirs('./models')
+
+    # Save the model
+    torch.save(model, './models/model.pth')
+
+    # Add artifact to sacred experiment
+    ex.add_artifact('./models/model.pth', content_type="application/x-pythorch")
+
+    # Save id of the run
+    with open("experiment_id.txt", "w") as f:
+        f.write(str(_run._id))
+
+    # Save sources and resources paths
+    with open("sources.txt", "w") as f:
+        for source in _run.observers[1].run_entry["experiment"]["sources"]:
+            f.write(source[1] + "\n")
+
+    with open("resources.txt", "w") as f:
+        for resource in _run.observers[1].run_entry["resources"]:
+            f.write(resource[1] + "\n")
\ No newline at end of file
diff --git a/models/Jenkinsfile b/models/Jenkinsfile
index 67aca6d..deff1b2 100644
--- a/models/Jenkinsfile
+++ b/models/Jenkinsfile
@@ -48,10 +48,29 @@ pipeline {
             }
 
             steps {
-                sh "chmod +x ./create_model.py"
-                sh "python3 ./create_model.py ${params.epochs} ${params.learning_rate} ${params.weight_decay}"
-                archiveArtifacts artifacts: 'models/model.pth', onlyIfSuccessful: true
-                build job: 's464863-evaluation/main', wait: false
+                script {
+                    sh "chmod +x ./create_model.py"
+                    sh "python3 ./create_model.py ${params.epochs} ${params.learning_rate} ${params.weight_decay}"
+
+                    def experiment_id = readFile('experiment_id.txt').trim()
+                    archiveArtifacts artifacts: "my_runs/${experiment_id}/*", onlyIfSuccessful: true
+
+                    archiveArtifacts artifacts: 'models/model.pth', onlyIfSuccessful: true
+
+                    def sources = readFile('sources.txt').split('\n')
+
+                    for (def source in sources) {
+                        archiveArtifacts artifacts: "my_runs/${source}", onlyIfSuccessful: true
+                    }
+
+                    def resources = readFile('resources.txt').split('\n')
+
+                    for (def resource in resources) {
+                        archiveArtifacts artifacts: "${resource}", onlyIfSuccessful: true
+                    }
+
+                    build job: 's464863-evaluation/main', wait: false
+                }
             }
         }
     }
diff --git a/requirements.txt b/requirements.txt
index c6d415ebf698fc4bfc0b4cf9115093d3ab164f77..6820f8e3e328e625553a5283e569e87d1bb8ad44 100644
GIT binary patch
delta 23
ecmZo*oWMAtL4cQmi=mhyks+C(h#{3Bg#iFQ`vklI

delta 6
NcmbQh*uXfU0RRUM0wVwb