IUM_08 - add scripts for MLflow tracking params and metrics, add MLproject file with train_test_evaluate command, add conda.yaml, update requirements.txt, fix minor issues
This commit is contained in:
parent
a209ef3e7c
commit
5aa6a770d1
@ -4,10 +4,7 @@ import torch
|
|||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import torch.optim as optim
|
import torch.optim as optim
|
||||||
|
|
||||||
import pathlib
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
|
|
||||||
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
||||||
|
|
||||||
|
11
mlflow/breast_cancer_pytorch/MLproject
Normal file
11
mlflow/breast_cancer_pytorch/MLproject
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
name: s464863
|
||||||
|
|
||||||
|
conda_env: conda.yaml
|
||||||
|
|
||||||
|
entry_points:
|
||||||
|
main:
|
||||||
|
parameters:
|
||||||
|
learning_rate: {type: float, default: 0.001}
|
||||||
|
weight_decay: {type: float, default: 0.001}
|
||||||
|
num_epochs: {type: int, default: 1000}
|
||||||
|
command: "python ../create_model.py {learning_rate} {weight_decay} {num_epochs}"
|
14
mlflow/breast_cancer_pytorch/conda.yaml
Normal file
14
mlflow/breast_cancer_pytorch/conda.yaml
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
name: breast_cancer_pytorch
|
||||||
|
channels:
|
||||||
|
- defaults
|
||||||
|
dependencies:
|
||||||
|
- python=3.10
|
||||||
|
- pip
|
||||||
|
- pip:
|
||||||
|
- mlflow
|
||||||
|
- torch
|
||||||
|
- pandas
|
||||||
|
- numpy
|
||||||
|
- scikit-learn
|
||||||
|
- matplotlib
|
||||||
|
- seaborn
|
235
mlflow/create_model.py
Normal file
235
mlflow/create_model.py
Normal file
@ -0,0 +1,235 @@
|
|||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.optim as optim
|
||||||
|
|
||||||
|
import mlflow
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import inspect
|
||||||
|
|
||||||
|
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
||||||
|
|
||||||
|
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||||
|
parentdir = os.path.dirname(currentdir)
|
||||||
|
sys.path.insert(0, parentdir)
|
||||||
|
|
||||||
|
from NeuralNetwork import NeuralNetwork
|
||||||
|
|
||||||
|
# MLflow tracking URI
|
||||||
|
mlflow.set_tracking_uri("http://localhost:5000")
|
||||||
|
|
||||||
|
# Create mlflow experiment if not exists
|
||||||
|
experiment = mlflow.get_experiment_by_name("s464863")
|
||||||
|
|
||||||
|
if experiment is None:
|
||||||
|
mlflow.create_experiment("s464863")
|
||||||
|
|
||||||
|
# Set active mlflow experiment
|
||||||
|
mlflow.set_experiment("s464863")
|
||||||
|
|
||||||
|
# MLflow experiment
|
||||||
|
client = mlflow.tracking.MlflowClient()
|
||||||
|
run = client.create_run(experiment_id=experiment.experiment_id)
|
||||||
|
run = mlflow.start_run(run_id=run.info.run_id)
|
||||||
|
|
||||||
|
# Seed for reproducibility
|
||||||
|
torch.manual_seed(1234)
|
||||||
|
|
||||||
|
# Get absolute path
|
||||||
|
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||||
|
parentdir = os.path.dirname(currentdir)
|
||||||
|
train_path = os.path.join(parentdir, 'datasets/train.csv')
|
||||||
|
test_path = os.path.join(parentdir, 'datasets/test.csv')
|
||||||
|
|
||||||
|
# Load data
|
||||||
|
train_data = pd.read_csv(train_path)
|
||||||
|
test_data = pd.read_csv(test_path)
|
||||||
|
|
||||||
|
# Split data
|
||||||
|
X_train = train_data.drop(columns=['id', 'diagnosis']).values
|
||||||
|
y_train = train_data['diagnosis'].values
|
||||||
|
|
||||||
|
X_test = test_data.drop(columns=['id', 'diagnosis']).values
|
||||||
|
y_test = test_data['diagnosis'].values
|
||||||
|
|
||||||
|
# Convert data to PyTorch tensors
|
||||||
|
X_train = torch.FloatTensor(X_train)
|
||||||
|
y_train = torch.FloatTensor(y_train).view(-1, 1)
|
||||||
|
|
||||||
|
X_test = torch.FloatTensor(X_test)
|
||||||
|
y_test = torch.FloatTensor(y_test).view(-1, 1)
|
||||||
|
|
||||||
|
# Parameters
|
||||||
|
input_size = X_train.shape[1]
|
||||||
|
hidden_size = 128
|
||||||
|
|
||||||
|
# Learning parameters
|
||||||
|
learning_rate = float(sys.argv[1]) if len(sys.argv) > 1 else 0.001
|
||||||
|
weight_decay = float(sys.argv[2]) if len(sys.argv) > 2 else 0.001
|
||||||
|
num_epochs = int(sys.argv[3]) if len(sys.argv) > 3 else 1000
|
||||||
|
|
||||||
|
# Log parameters to mlflow
|
||||||
|
mlflow.log_param("hidden_size", hidden_size)
|
||||||
|
mlflow.log_param("learning_rate", learning_rate)
|
||||||
|
mlflow.log_param("weight_decay", weight_decay)
|
||||||
|
mlflow.log_param("num_epochs", num_epochs)
|
||||||
|
|
||||||
|
# Model initialization
|
||||||
|
model = NeuralNetwork(input_size, hidden_size)
|
||||||
|
|
||||||
|
# Loss function and optimizer
|
||||||
|
criterion = nn.BCELoss()
|
||||||
|
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
|
||||||
|
|
||||||
|
# Training loop
|
||||||
|
model.train()
|
||||||
|
|
||||||
|
for epoch in range(num_epochs):
|
||||||
|
# Zero the gradients
|
||||||
|
optimizer.zero_grad()
|
||||||
|
|
||||||
|
# Forward pass
|
||||||
|
outputs = model(X_train)
|
||||||
|
|
||||||
|
# Compute loss
|
||||||
|
loss = criterion(outputs, y_train)
|
||||||
|
|
||||||
|
# Backward pass
|
||||||
|
loss.backward()
|
||||||
|
|
||||||
|
# Update weights
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
# Print loss
|
||||||
|
if (epoch + 1) % 100 == 0:
|
||||||
|
print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')
|
||||||
|
|
||||||
|
# Test the model
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
|
||||||
|
# Make predictions
|
||||||
|
y_pred = model(X_test)
|
||||||
|
y_pred = np.where(y_pred > 0.5, 1, 0)
|
||||||
|
|
||||||
|
# Calculate metrics
|
||||||
|
accuracy = accuracy_score(y_test, y_pred)
|
||||||
|
precision = precision_score(y_test, y_pred)
|
||||||
|
recall = recall_score(y_test, y_pred)
|
||||||
|
f1 = f1_score(y_test, y_pred)
|
||||||
|
|
||||||
|
# Log metrics to mlflow
|
||||||
|
mlflow.log_metric("accuracy", accuracy)
|
||||||
|
mlflow.log_metric("precision", precision)
|
||||||
|
mlflow.log_metric("recall", recall)
|
||||||
|
mlflow.log_metric("f1", f1)
|
||||||
|
|
||||||
|
# If directory models does not exist, create it
|
||||||
|
if not os.path.exists('./models'):
|
||||||
|
os.makedirs('./models')
|
||||||
|
|
||||||
|
# Save the model
|
||||||
|
torch.save(model, './models/model.pth')
|
||||||
|
|
||||||
|
# End mlflow run
|
||||||
|
mlflow.end_run()
|
||||||
|
|
||||||
|
# # MLflow experiment
|
||||||
|
# with mlflow.start_run() as run:
|
||||||
|
# # Seed for reproducibility
|
||||||
|
# torch.manual_seed(1234)
|
||||||
|
#
|
||||||
|
# # Load data
|
||||||
|
# train_data = pd.read_csv('../datasets/train.csv')
|
||||||
|
# test_data = pd.read_csv('../datasets/test.csv')
|
||||||
|
#
|
||||||
|
# # Split data
|
||||||
|
# X_train = train_data.drop(columns=['id', 'diagnosis']).values
|
||||||
|
# y_train = train_data['diagnosis'].values
|
||||||
|
#
|
||||||
|
# X_test = test_data.drop(columns=['id', 'diagnosis']).values
|
||||||
|
# y_test = test_data['diagnosis'].values
|
||||||
|
#
|
||||||
|
# # Convert data to PyTorch tensors
|
||||||
|
# X_train = torch.FloatTensor(X_train)
|
||||||
|
# y_train = torch.FloatTensor(y_train).view(-1, 1)
|
||||||
|
#
|
||||||
|
# X_test = torch.FloatTensor(X_test)
|
||||||
|
# y_test = torch.FloatTensor(y_test).view(-1, 1)
|
||||||
|
#
|
||||||
|
# # Parameters
|
||||||
|
# input_size = X_train.shape[1]
|
||||||
|
# hidden_size = 128
|
||||||
|
#
|
||||||
|
# # Learning parameters
|
||||||
|
# learning_rate = float(sys.argv[1]) if len(sys.argv) > 1 else 0.001
|
||||||
|
# weight_decay = float(sys.argv[2]) if len(sys.argv) > 2 else 0.001
|
||||||
|
# num_epochs = int(sys.argv[3]) if len(sys.argv) > 3 else 1000
|
||||||
|
#
|
||||||
|
# # Log parameters to mlflow
|
||||||
|
# mlflow.log_param("hidden_size", hidden_size)
|
||||||
|
# mlflow.log_param("learning_rate", learning_rate)
|
||||||
|
# mlflow.log_param("weight_decay", weight_decay)
|
||||||
|
# mlflow.log_param("num_epochs", num_epochs)
|
||||||
|
#
|
||||||
|
# # Model initialization
|
||||||
|
# model = NeuralNetwork(input_size, hidden_size)
|
||||||
|
#
|
||||||
|
# # Loss function and optimizer
|
||||||
|
# criterion = nn.BCELoss()
|
||||||
|
# optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
|
||||||
|
#
|
||||||
|
# # Training loop
|
||||||
|
# model.train()
|
||||||
|
#
|
||||||
|
# for epoch in range(num_epochs):
|
||||||
|
# # Zero the gradients
|
||||||
|
# optimizer.zero_grad()
|
||||||
|
#
|
||||||
|
# # Forward pass
|
||||||
|
# outputs = model(X_train)
|
||||||
|
#
|
||||||
|
# # Compute loss
|
||||||
|
# loss = criterion(outputs, y_train)
|
||||||
|
#
|
||||||
|
# # Backward pass
|
||||||
|
# loss.backward()
|
||||||
|
#
|
||||||
|
# # Update weights
|
||||||
|
# optimizer.step()
|
||||||
|
#
|
||||||
|
# # Print loss
|
||||||
|
# if (epoch + 1) % 100 == 0:
|
||||||
|
# print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')
|
||||||
|
#
|
||||||
|
# # Test the model
|
||||||
|
# model.eval()
|
||||||
|
#
|
||||||
|
# with torch.no_grad():
|
||||||
|
#
|
||||||
|
# # Make predictions
|
||||||
|
# y_pred = model(X_test)
|
||||||
|
# y_pred = np.where(y_pred > 0.5, 1, 0)
|
||||||
|
#
|
||||||
|
# # Calculate metrics
|
||||||
|
# accuracy = accuracy_score(y_test, y_pred)
|
||||||
|
# precision = precision_score(y_test, y_pred)
|
||||||
|
# recall = recall_score(y_test, y_pred)
|
||||||
|
# f1 = f1_score(y_test, y_pred)
|
||||||
|
#
|
||||||
|
# # Log metrics to mlflow
|
||||||
|
# mlflow.log_metric("accuracy", accuracy)
|
||||||
|
# mlflow.log_metric("precision", precision)
|
||||||
|
# mlflow.log_metric("recall", recall)
|
||||||
|
# mlflow.log_metric("f1", f1)
|
||||||
|
#
|
||||||
|
# # If directory models does not exist, create it
|
||||||
|
# if not os.path.exists('./models'):
|
||||||
|
# os.makedirs('./models')
|
||||||
|
#
|
||||||
|
# # Save the model
|
||||||
|
# torch.save(model, './models/model.pth')
|
BIN
requirements.txt
BIN
requirements.txt
Binary file not shown.
Loading…
Reference in New Issue
Block a user