From 5aa6a770d196248657a69e56ecdb132428f090a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20=C5=81=C4=85czkowski?= Date: Fri, 26 Apr 2024 09:24:56 +0200 Subject: [PATCH] IUM_08 - add scripts for MLflow tracking params and metrics, add MLproject file with train_test_evaluate command, add conda.yaml, update requirements.txt, fix minor issues --- create_model.py | 3 - mlflow/breast_cancer_pytorch/MLproject | 11 ++ mlflow/breast_cancer_pytorch/conda.yaml | 14 ++ mlflow/create_model.py | 235 ++++++++++++++++++++++++ requirements.txt | Bin 184 -> 200 bytes 5 files changed, 260 insertions(+), 3 deletions(-) create mode 100644 mlflow/breast_cancer_pytorch/MLproject create mode 100644 mlflow/breast_cancer_pytorch/conda.yaml create mode 100644 mlflow/create_model.py diff --git a/create_model.py b/create_model.py index 80bdce1..55f181c 100644 --- a/create_model.py +++ b/create_model.py @@ -4,10 +4,7 @@ import torch import torch.nn as nn import torch.optim as optim -import pathlib - import os -import sys from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score diff --git a/mlflow/breast_cancer_pytorch/MLproject b/mlflow/breast_cancer_pytorch/MLproject new file mode 100644 index 0000000..c9ba7f5 --- /dev/null +++ b/mlflow/breast_cancer_pytorch/MLproject @@ -0,0 +1,11 @@ +name: s464863 + +conda_env: conda.yaml + +entry_points: + main: + parameters: + learning_rate: {type: float, default: 0.001} + weight_decay: {type: float, default: 0.001} + num_epochs: {type: int, default: 1000} + command: "python ../create_model.py {learning_rate} {weight_decay} {num_epochs}" \ No newline at end of file diff --git a/mlflow/breast_cancer_pytorch/conda.yaml b/mlflow/breast_cancer_pytorch/conda.yaml new file mode 100644 index 0000000..e76cbeb --- /dev/null +++ b/mlflow/breast_cancer_pytorch/conda.yaml @@ -0,0 +1,14 @@ +name: breast_cancer_pytorch +channels: + - defaults +dependencies: + - python=3.10 + - pip + - pip: + - mlflow + - torch + - pandas + - numpy + - scikit-learn + - matplotlib + - seaborn \ No newline at end of file diff --git a/mlflow/create_model.py b/mlflow/create_model.py new file mode 100644 index 0000000..c329b6a --- /dev/null +++ b/mlflow/create_model.py @@ -0,0 +1,235 @@ +import numpy as np +import pandas as pd +import torch +import torch.nn as nn +import torch.optim as optim + +import mlflow + +import os +import sys +import inspect + +from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score + +currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parentdir = os.path.dirname(currentdir) +sys.path.insert(0, parentdir) + +from NeuralNetwork import NeuralNetwork + +# MLflow tracking URI +mlflow.set_tracking_uri("http://localhost:5000") + +# Create mlflow experiment if not exists +experiment = mlflow.get_experiment_by_name("s464863") + +if experiment is None: + mlflow.create_experiment("s464863") + +# Set active mlflow experiment +mlflow.set_experiment("s464863") + +# MLflow experiment +client = mlflow.tracking.MlflowClient() +run = client.create_run(experiment_id=experiment.experiment_id) +run = mlflow.start_run(run_id=run.info.run_id) + +# Seed for reproducibility +torch.manual_seed(1234) + +# Get absolute path +currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +parentdir = os.path.dirname(currentdir) +train_path = os.path.join(parentdir, 'datasets/train.csv') +test_path = os.path.join(parentdir, 'datasets/test.csv') + +# Load data +train_data = pd.read_csv(train_path) +test_data = pd.read_csv(test_path) + +# Split data +X_train = train_data.drop(columns=['id', 'diagnosis']).values +y_train = train_data['diagnosis'].values + +X_test = test_data.drop(columns=['id', 'diagnosis']).values +y_test = test_data['diagnosis'].values + +# Convert data to PyTorch tensors +X_train = torch.FloatTensor(X_train) +y_train = torch.FloatTensor(y_train).view(-1, 1) + +X_test = torch.FloatTensor(X_test) +y_test = torch.FloatTensor(y_test).view(-1, 1) + +# Parameters +input_size = X_train.shape[1] +hidden_size = 128 + +# Learning parameters +learning_rate = float(sys.argv[1]) if len(sys.argv) > 1 else 0.001 +weight_decay = float(sys.argv[2]) if len(sys.argv) > 2 else 0.001 +num_epochs = int(sys.argv[3]) if len(sys.argv) > 3 else 1000 + +# Log parameters to mlflow +mlflow.log_param("hidden_size", hidden_size) +mlflow.log_param("learning_rate", learning_rate) +mlflow.log_param("weight_decay", weight_decay) +mlflow.log_param("num_epochs", num_epochs) + +# Model initialization +model = NeuralNetwork(input_size, hidden_size) + +# Loss function and optimizer +criterion = nn.BCELoss() +optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) + +# Training loop +model.train() + +for epoch in range(num_epochs): + # Zero the gradients + optimizer.zero_grad() + + # Forward pass + outputs = model(X_train) + + # Compute loss + loss = criterion(outputs, y_train) + + # Backward pass + loss.backward() + + # Update weights + optimizer.step() + + # Print loss + if (epoch + 1) % 100 == 0: + print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}') + +# Test the model +model.eval() + +with torch.no_grad(): + + # Make predictions + y_pred = model(X_test) + y_pred = np.where(y_pred > 0.5, 1, 0) + + # Calculate metrics + accuracy = accuracy_score(y_test, y_pred) + precision = precision_score(y_test, y_pred) + recall = recall_score(y_test, y_pred) + f1 = f1_score(y_test, y_pred) + + # Log metrics to mlflow + mlflow.log_metric("accuracy", accuracy) + mlflow.log_metric("precision", precision) + mlflow.log_metric("recall", recall) + mlflow.log_metric("f1", f1) + +# If directory models does not exist, create it +if not os.path.exists('./models'): + os.makedirs('./models') + +# Save the model +torch.save(model, './models/model.pth') + +# End mlflow run +mlflow.end_run() + +# # MLflow experiment +# with mlflow.start_run() as run: +# # Seed for reproducibility +# torch.manual_seed(1234) +# +# # Load data +# train_data = pd.read_csv('../datasets/train.csv') +# test_data = pd.read_csv('../datasets/test.csv') +# +# # Split data +# X_train = train_data.drop(columns=['id', 'diagnosis']).values +# y_train = train_data['diagnosis'].values +# +# X_test = test_data.drop(columns=['id', 'diagnosis']).values +# y_test = test_data['diagnosis'].values +# +# # Convert data to PyTorch tensors +# X_train = torch.FloatTensor(X_train) +# y_train = torch.FloatTensor(y_train).view(-1, 1) +# +# X_test = torch.FloatTensor(X_test) +# y_test = torch.FloatTensor(y_test).view(-1, 1) +# +# # Parameters +# input_size = X_train.shape[1] +# hidden_size = 128 +# +# # Learning parameters +# learning_rate = float(sys.argv[1]) if len(sys.argv) > 1 else 0.001 +# weight_decay = float(sys.argv[2]) if len(sys.argv) > 2 else 0.001 +# num_epochs = int(sys.argv[3]) if len(sys.argv) > 3 else 1000 +# +# # Log parameters to mlflow +# mlflow.log_param("hidden_size", hidden_size) +# mlflow.log_param("learning_rate", learning_rate) +# mlflow.log_param("weight_decay", weight_decay) +# mlflow.log_param("num_epochs", num_epochs) +# +# # Model initialization +# model = NeuralNetwork(input_size, hidden_size) +# +# # Loss function and optimizer +# criterion = nn.BCELoss() +# optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) +# +# # Training loop +# model.train() +# +# for epoch in range(num_epochs): +# # Zero the gradients +# optimizer.zero_grad() +# +# # Forward pass +# outputs = model(X_train) +# +# # Compute loss +# loss = criterion(outputs, y_train) +# +# # Backward pass +# loss.backward() +# +# # Update weights +# optimizer.step() +# +# # Print loss +# if (epoch + 1) % 100 == 0: +# print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}') +# +# # Test the model +# model.eval() +# +# with torch.no_grad(): +# +# # Make predictions +# y_pred = model(X_test) +# y_pred = np.where(y_pred > 0.5, 1, 0) +# +# # Calculate metrics +# accuracy = accuracy_score(y_test, y_pred) +# precision = precision_score(y_test, y_pred) +# recall = recall_score(y_test, y_pred) +# f1 = f1_score(y_test, y_pred) +# +# # Log metrics to mlflow +# mlflow.log_metric("accuracy", accuracy) +# mlflow.log_metric("precision", precision) +# mlflow.log_metric("recall", recall) +# mlflow.log_metric("f1", f1) +# +# # If directory models does not exist, create it +# if not os.path.exists('./models'): +# os.makedirs('./models') +# +# # Save the model +# torch.save(model, './models/model.pth') \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index d694e9ace8bec3c0de50cbdfa26e5b51dfc3afd1..3d1e596b6cb4937704a754dd3bd4e251de671a23 100644 GIT binary patch delta 23 dcmdnNc!F`l4gp>UE{0r&9ELO?&SxlR002z$1%m(p delta 6 NcmX@XxPx)R4gd*u0?PmZ