IUM_08 - add scripts for MLflow tracking params and metrics, add MLproject file with train_test_evaluate command, add conda.yaml, update requirements.txt, fix minor issues
This commit is contained in:
parent
a209ef3e7c
commit
5aa6a770d1
@ -4,10 +4,7 @@ import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
|
||||
import pathlib
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
||||
|
||||
|
11
mlflow/breast_cancer_pytorch/MLproject
Normal file
11
mlflow/breast_cancer_pytorch/MLproject
Normal file
@ -0,0 +1,11 @@
|
||||
name: s464863
|
||||
|
||||
conda_env: conda.yaml
|
||||
|
||||
entry_points:
|
||||
main:
|
||||
parameters:
|
||||
learning_rate: {type: float, default: 0.001}
|
||||
weight_decay: {type: float, default: 0.001}
|
||||
num_epochs: {type: int, default: 1000}
|
||||
command: "python ../create_model.py {learning_rate} {weight_decay} {num_epochs}"
|
14
mlflow/breast_cancer_pytorch/conda.yaml
Normal file
14
mlflow/breast_cancer_pytorch/conda.yaml
Normal file
@ -0,0 +1,14 @@
|
||||
name: breast_cancer_pytorch
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- python=3.10
|
||||
- pip
|
||||
- pip:
|
||||
- mlflow
|
||||
- torch
|
||||
- pandas
|
||||
- numpy
|
||||
- scikit-learn
|
||||
- matplotlib
|
||||
- seaborn
|
235
mlflow/create_model.py
Normal file
235
mlflow/create_model.py
Normal file
@ -0,0 +1,235 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
|
||||
import mlflow
|
||||
|
||||
import os
|
||||
import sys
|
||||
import inspect
|
||||
|
||||
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
||||
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(currentdir)
|
||||
sys.path.insert(0, parentdir)
|
||||
|
||||
from NeuralNetwork import NeuralNetwork
|
||||
|
||||
# MLflow tracking URI
|
||||
mlflow.set_tracking_uri("http://localhost:5000")
|
||||
|
||||
# Create mlflow experiment if not exists
|
||||
experiment = mlflow.get_experiment_by_name("s464863")
|
||||
|
||||
if experiment is None:
|
||||
mlflow.create_experiment("s464863")
|
||||
|
||||
# Set active mlflow experiment
|
||||
mlflow.set_experiment("s464863")
|
||||
|
||||
# MLflow experiment
|
||||
client = mlflow.tracking.MlflowClient()
|
||||
run = client.create_run(experiment_id=experiment.experiment_id)
|
||||
run = mlflow.start_run(run_id=run.info.run_id)
|
||||
|
||||
# Seed for reproducibility
|
||||
torch.manual_seed(1234)
|
||||
|
||||
# Get absolute path
|
||||
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
||||
parentdir = os.path.dirname(currentdir)
|
||||
train_path = os.path.join(parentdir, 'datasets/train.csv')
|
||||
test_path = os.path.join(parentdir, 'datasets/test.csv')
|
||||
|
||||
# Load data
|
||||
train_data = pd.read_csv(train_path)
|
||||
test_data = pd.read_csv(test_path)
|
||||
|
||||
# Split data
|
||||
X_train = train_data.drop(columns=['id', 'diagnosis']).values
|
||||
y_train = train_data['diagnosis'].values
|
||||
|
||||
X_test = test_data.drop(columns=['id', 'diagnosis']).values
|
||||
y_test = test_data['diagnosis'].values
|
||||
|
||||
# Convert data to PyTorch tensors
|
||||
X_train = torch.FloatTensor(X_train)
|
||||
y_train = torch.FloatTensor(y_train).view(-1, 1)
|
||||
|
||||
X_test = torch.FloatTensor(X_test)
|
||||
y_test = torch.FloatTensor(y_test).view(-1, 1)
|
||||
|
||||
# Parameters
|
||||
input_size = X_train.shape[1]
|
||||
hidden_size = 128
|
||||
|
||||
# Learning parameters
|
||||
learning_rate = float(sys.argv[1]) if len(sys.argv) > 1 else 0.001
|
||||
weight_decay = float(sys.argv[2]) if len(sys.argv) > 2 else 0.001
|
||||
num_epochs = int(sys.argv[3]) if len(sys.argv) > 3 else 1000
|
||||
|
||||
# Log parameters to mlflow
|
||||
mlflow.log_param("hidden_size", hidden_size)
|
||||
mlflow.log_param("learning_rate", learning_rate)
|
||||
mlflow.log_param("weight_decay", weight_decay)
|
||||
mlflow.log_param("num_epochs", num_epochs)
|
||||
|
||||
# Model initialization
|
||||
model = NeuralNetwork(input_size, hidden_size)
|
||||
|
||||
# Loss function and optimizer
|
||||
criterion = nn.BCELoss()
|
||||
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
|
||||
|
||||
# Training loop
|
||||
model.train()
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
# Zero the gradients
|
||||
optimizer.zero_grad()
|
||||
|
||||
# Forward pass
|
||||
outputs = model(X_train)
|
||||
|
||||
# Compute loss
|
||||
loss = criterion(outputs, y_train)
|
||||
|
||||
# Backward pass
|
||||
loss.backward()
|
||||
|
||||
# Update weights
|
||||
optimizer.step()
|
||||
|
||||
# Print loss
|
||||
if (epoch + 1) % 100 == 0:
|
||||
print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')
|
||||
|
||||
# Test the model
|
||||
model.eval()
|
||||
|
||||
with torch.no_grad():
|
||||
|
||||
# Make predictions
|
||||
y_pred = model(X_test)
|
||||
y_pred = np.where(y_pred > 0.5, 1, 0)
|
||||
|
||||
# Calculate metrics
|
||||
accuracy = accuracy_score(y_test, y_pred)
|
||||
precision = precision_score(y_test, y_pred)
|
||||
recall = recall_score(y_test, y_pred)
|
||||
f1 = f1_score(y_test, y_pred)
|
||||
|
||||
# Log metrics to mlflow
|
||||
mlflow.log_metric("accuracy", accuracy)
|
||||
mlflow.log_metric("precision", precision)
|
||||
mlflow.log_metric("recall", recall)
|
||||
mlflow.log_metric("f1", f1)
|
||||
|
||||
# If directory models does not exist, create it
|
||||
if not os.path.exists('./models'):
|
||||
os.makedirs('./models')
|
||||
|
||||
# Save the model
|
||||
torch.save(model, './models/model.pth')
|
||||
|
||||
# End mlflow run
|
||||
mlflow.end_run()
|
||||
|
||||
# # MLflow experiment
|
||||
# with mlflow.start_run() as run:
|
||||
# # Seed for reproducibility
|
||||
# torch.manual_seed(1234)
|
||||
#
|
||||
# # Load data
|
||||
# train_data = pd.read_csv('../datasets/train.csv')
|
||||
# test_data = pd.read_csv('../datasets/test.csv')
|
||||
#
|
||||
# # Split data
|
||||
# X_train = train_data.drop(columns=['id', 'diagnosis']).values
|
||||
# y_train = train_data['diagnosis'].values
|
||||
#
|
||||
# X_test = test_data.drop(columns=['id', 'diagnosis']).values
|
||||
# y_test = test_data['diagnosis'].values
|
||||
#
|
||||
# # Convert data to PyTorch tensors
|
||||
# X_train = torch.FloatTensor(X_train)
|
||||
# y_train = torch.FloatTensor(y_train).view(-1, 1)
|
||||
#
|
||||
# X_test = torch.FloatTensor(X_test)
|
||||
# y_test = torch.FloatTensor(y_test).view(-1, 1)
|
||||
#
|
||||
# # Parameters
|
||||
# input_size = X_train.shape[1]
|
||||
# hidden_size = 128
|
||||
#
|
||||
# # Learning parameters
|
||||
# learning_rate = float(sys.argv[1]) if len(sys.argv) > 1 else 0.001
|
||||
# weight_decay = float(sys.argv[2]) if len(sys.argv) > 2 else 0.001
|
||||
# num_epochs = int(sys.argv[3]) if len(sys.argv) > 3 else 1000
|
||||
#
|
||||
# # Log parameters to mlflow
|
||||
# mlflow.log_param("hidden_size", hidden_size)
|
||||
# mlflow.log_param("learning_rate", learning_rate)
|
||||
# mlflow.log_param("weight_decay", weight_decay)
|
||||
# mlflow.log_param("num_epochs", num_epochs)
|
||||
#
|
||||
# # Model initialization
|
||||
# model = NeuralNetwork(input_size, hidden_size)
|
||||
#
|
||||
# # Loss function and optimizer
|
||||
# criterion = nn.BCELoss()
|
||||
# optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
|
||||
#
|
||||
# # Training loop
|
||||
# model.train()
|
||||
#
|
||||
# for epoch in range(num_epochs):
|
||||
# # Zero the gradients
|
||||
# optimizer.zero_grad()
|
||||
#
|
||||
# # Forward pass
|
||||
# outputs = model(X_train)
|
||||
#
|
||||
# # Compute loss
|
||||
# loss = criterion(outputs, y_train)
|
||||
#
|
||||
# # Backward pass
|
||||
# loss.backward()
|
||||
#
|
||||
# # Update weights
|
||||
# optimizer.step()
|
||||
#
|
||||
# # Print loss
|
||||
# if (epoch + 1) % 100 == 0:
|
||||
# print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')
|
||||
#
|
||||
# # Test the model
|
||||
# model.eval()
|
||||
#
|
||||
# with torch.no_grad():
|
||||
#
|
||||
# # Make predictions
|
||||
# y_pred = model(X_test)
|
||||
# y_pred = np.where(y_pred > 0.5, 1, 0)
|
||||
#
|
||||
# # Calculate metrics
|
||||
# accuracy = accuracy_score(y_test, y_pred)
|
||||
# precision = precision_score(y_test, y_pred)
|
||||
# recall = recall_score(y_test, y_pred)
|
||||
# f1 = f1_score(y_test, y_pred)
|
||||
#
|
||||
# # Log metrics to mlflow
|
||||
# mlflow.log_metric("accuracy", accuracy)
|
||||
# mlflow.log_metric("precision", precision)
|
||||
# mlflow.log_metric("recall", recall)
|
||||
# mlflow.log_metric("f1", f1)
|
||||
#
|
||||
# # If directory models does not exist, create it
|
||||
# if not os.path.exists('./models'):
|
||||
# os.makedirs('./models')
|
||||
#
|
||||
# # Save the model
|
||||
# torch.save(model, './models/model.pth')
|
BIN
requirements.txt
BIN
requirements.txt
Binary file not shown.
Loading…
Reference in New Issue
Block a user