IUM_08 - add scripts for MLflow tracking params and metrics, add MLproject file with train_test_evaluate command, add conda.yaml, update requirements.txt, fix minor issues

This commit is contained in:
Paweł Łączkowski 2024-04-26 09:24:56 +02:00
parent a209ef3e7c
commit 5aa6a770d1
5 changed files with 260 additions and 3 deletions

View File

@ -4,10 +4,7 @@ import torch
import torch.nn as nn import torch.nn as nn
import torch.optim as optim import torch.optim as optim
import pathlib
import os import os
import sys
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

View File

@ -0,0 +1,11 @@
name: s464863
conda_env: conda.yaml
entry_points:
main:
parameters:
learning_rate: {type: float, default: 0.001}
weight_decay: {type: float, default: 0.001}
num_epochs: {type: int, default: 1000}
command: "python ../create_model.py {learning_rate} {weight_decay} {num_epochs}"

View File

@ -0,0 +1,14 @@
name: breast_cancer_pytorch
channels:
- defaults
dependencies:
- python=3.10
- pip
- pip:
- mlflow
- torch
- pandas
- numpy
- scikit-learn
- matplotlib
- seaborn

235
mlflow/create_model.py Normal file
View File

@ -0,0 +1,235 @@
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import mlflow
import os
import sys
import inspect
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)
from NeuralNetwork import NeuralNetwork
# MLflow tracking URI
mlflow.set_tracking_uri("http://localhost:5000")
# Create mlflow experiment if not exists
experiment = mlflow.get_experiment_by_name("s464863")
if experiment is None:
mlflow.create_experiment("s464863")
# Set active mlflow experiment
mlflow.set_experiment("s464863")
# MLflow experiment
client = mlflow.tracking.MlflowClient()
run = client.create_run(experiment_id=experiment.experiment_id)
run = mlflow.start_run(run_id=run.info.run_id)
# Seed for reproducibility
torch.manual_seed(1234)
# Get absolute path
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
train_path = os.path.join(parentdir, 'datasets/train.csv')
test_path = os.path.join(parentdir, 'datasets/test.csv')
# Load data
train_data = pd.read_csv(train_path)
test_data = pd.read_csv(test_path)
# Split data
X_train = train_data.drop(columns=['id', 'diagnosis']).values
y_train = train_data['diagnosis'].values
X_test = test_data.drop(columns=['id', 'diagnosis']).values
y_test = test_data['diagnosis'].values
# Convert data to PyTorch tensors
X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train).view(-1, 1)
X_test = torch.FloatTensor(X_test)
y_test = torch.FloatTensor(y_test).view(-1, 1)
# Parameters
input_size = X_train.shape[1]
hidden_size = 128
# Learning parameters
learning_rate = float(sys.argv[1]) if len(sys.argv) > 1 else 0.001
weight_decay = float(sys.argv[2]) if len(sys.argv) > 2 else 0.001
num_epochs = int(sys.argv[3]) if len(sys.argv) > 3 else 1000
# Log parameters to mlflow
mlflow.log_param("hidden_size", hidden_size)
mlflow.log_param("learning_rate", learning_rate)
mlflow.log_param("weight_decay", weight_decay)
mlflow.log_param("num_epochs", num_epochs)
# Model initialization
model = NeuralNetwork(input_size, hidden_size)
# Loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
# Training loop
model.train()
for epoch in range(num_epochs):
# Zero the gradients
optimizer.zero_grad()
# Forward pass
outputs = model(X_train)
# Compute loss
loss = criterion(outputs, y_train)
# Backward pass
loss.backward()
# Update weights
optimizer.step()
# Print loss
if (epoch + 1) % 100 == 0:
print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')
# Test the model
model.eval()
with torch.no_grad():
# Make predictions
y_pred = model(X_test)
y_pred = np.where(y_pred > 0.5, 1, 0)
# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
# Log metrics to mlflow
mlflow.log_metric("accuracy", accuracy)
mlflow.log_metric("precision", precision)
mlflow.log_metric("recall", recall)
mlflow.log_metric("f1", f1)
# If directory models does not exist, create it
if not os.path.exists('./models'):
os.makedirs('./models')
# Save the model
torch.save(model, './models/model.pth')
# End mlflow run
mlflow.end_run()
# # MLflow experiment
# with mlflow.start_run() as run:
# # Seed for reproducibility
# torch.manual_seed(1234)
#
# # Load data
# train_data = pd.read_csv('../datasets/train.csv')
# test_data = pd.read_csv('../datasets/test.csv')
#
# # Split data
# X_train = train_data.drop(columns=['id', 'diagnosis']).values
# y_train = train_data['diagnosis'].values
#
# X_test = test_data.drop(columns=['id', 'diagnosis']).values
# y_test = test_data['diagnosis'].values
#
# # Convert data to PyTorch tensors
# X_train = torch.FloatTensor(X_train)
# y_train = torch.FloatTensor(y_train).view(-1, 1)
#
# X_test = torch.FloatTensor(X_test)
# y_test = torch.FloatTensor(y_test).view(-1, 1)
#
# # Parameters
# input_size = X_train.shape[1]
# hidden_size = 128
#
# # Learning parameters
# learning_rate = float(sys.argv[1]) if len(sys.argv) > 1 else 0.001
# weight_decay = float(sys.argv[2]) if len(sys.argv) > 2 else 0.001
# num_epochs = int(sys.argv[3]) if len(sys.argv) > 3 else 1000
#
# # Log parameters to mlflow
# mlflow.log_param("hidden_size", hidden_size)
# mlflow.log_param("learning_rate", learning_rate)
# mlflow.log_param("weight_decay", weight_decay)
# mlflow.log_param("num_epochs", num_epochs)
#
# # Model initialization
# model = NeuralNetwork(input_size, hidden_size)
#
# # Loss function and optimizer
# criterion = nn.BCELoss()
# optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
#
# # Training loop
# model.train()
#
# for epoch in range(num_epochs):
# # Zero the gradients
# optimizer.zero_grad()
#
# # Forward pass
# outputs = model(X_train)
#
# # Compute loss
# loss = criterion(outputs, y_train)
#
# # Backward pass
# loss.backward()
#
# # Update weights
# optimizer.step()
#
# # Print loss
# if (epoch + 1) % 100 == 0:
# print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')
#
# # Test the model
# model.eval()
#
# with torch.no_grad():
#
# # Make predictions
# y_pred = model(X_test)
# y_pred = np.where(y_pred > 0.5, 1, 0)
#
# # Calculate metrics
# accuracy = accuracy_score(y_test, y_pred)
# precision = precision_score(y_test, y_pred)
# recall = recall_score(y_test, y_pred)
# f1 = f1_score(y_test, y_pred)
#
# # Log metrics to mlflow
# mlflow.log_metric("accuracy", accuracy)
# mlflow.log_metric("precision", precision)
# mlflow.log_metric("recall", recall)
# mlflow.log_metric("f1", f1)
#
# # If directory models does not exist, create it
# if not os.path.exists('./models'):
# os.makedirs('./models')
#
# # Save the model
# torch.save(model, './models/model.pth')

Binary file not shown.