ium_464863/mlflow/create_model.py

235 lines
6.4 KiB
Python

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import mlflow
import os
import sys
import inspect
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)
from NeuralNetwork import NeuralNetwork
# MLflow tracking URI
mlflow.set_tracking_uri("http://localhost:5000")
# Create mlflow experiment if not exists
experiment = mlflow.get_experiment_by_name("s464863")
if experiment is None:
mlflow.create_experiment("s464863")
# Set active mlflow experiment
mlflow.set_experiment("s464863")
# MLflow experiment
client = mlflow.tracking.MlflowClient()
run = client.create_run(experiment_id=experiment.experiment_id)
run = mlflow.start_run(run_id=run.info.run_id)
# Seed for reproducibility
torch.manual_seed(1234)
# Get absolute path
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
train_path = os.path.join(parentdir, 'datasets/train.csv')
test_path = os.path.join(parentdir, 'datasets/test.csv')
# Load data
train_data = pd.read_csv(train_path)
test_data = pd.read_csv(test_path)
# Split data
X_train = train_data.drop(columns=['id', 'diagnosis']).values
y_train = train_data['diagnosis'].values
X_test = test_data.drop(columns=['id', 'diagnosis']).values
y_test = test_data['diagnosis'].values
# Convert data to PyTorch tensors
X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train).view(-1, 1)
X_test = torch.FloatTensor(X_test)
y_test = torch.FloatTensor(y_test).view(-1, 1)
# Parameters
input_size = X_train.shape[1]
hidden_size = 128
# Learning parameters
learning_rate = float(sys.argv[1]) if len(sys.argv) > 1 else 0.001
weight_decay = float(sys.argv[2]) if len(sys.argv) > 2 else 0.001
num_epochs = int(sys.argv[3]) if len(sys.argv) > 3 else 1000
# Log parameters to mlflow
mlflow.log_param("hidden_size", hidden_size)
mlflow.log_param("learning_rate", learning_rate)
mlflow.log_param("weight_decay", weight_decay)
mlflow.log_param("num_epochs", num_epochs)
# Model initialization
model = NeuralNetwork(input_size, hidden_size)
# Loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
# Training loop
model.train()
for epoch in range(num_epochs):
# Zero the gradients
optimizer.zero_grad()
# Forward pass
outputs = model(X_train)
# Compute loss
loss = criterion(outputs, y_train)
# Backward pass
loss.backward()
# Update weights
optimizer.step()
# Print loss
if (epoch + 1) % 100 == 0:
print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')
# Test the model
model.eval()
with torch.no_grad():
# Make predictions
y_pred = model(X_test)
y_pred = np.where(y_pred > 0.5, 1, 0)
# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
# Log metrics to mlflow
mlflow.log_metric("accuracy", accuracy)
mlflow.log_metric("precision", precision)
mlflow.log_metric("recall", recall)
mlflow.log_metric("f1", f1)
# If directory models does not exist, create it
if not os.path.exists('./models'):
os.makedirs('./models')
# Save the model
torch.save(model, './models/model.pth')
# End mlflow run
mlflow.end_run()
# # MLflow experiment
# with mlflow.start_run() as run:
# # Seed for reproducibility
# torch.manual_seed(1234)
#
# # Load data
# train_data = pd.read_csv('../datasets/train.csv')
# test_data = pd.read_csv('../datasets/test.csv')
#
# # Split data
# X_train = train_data.drop(columns=['id', 'diagnosis']).values
# y_train = train_data['diagnosis'].values
#
# X_test = test_data.drop(columns=['id', 'diagnosis']).values
# y_test = test_data['diagnosis'].values
#
# # Convert data to PyTorch tensors
# X_train = torch.FloatTensor(X_train)
# y_train = torch.FloatTensor(y_train).view(-1, 1)
#
# X_test = torch.FloatTensor(X_test)
# y_test = torch.FloatTensor(y_test).view(-1, 1)
#
# # Parameters
# input_size = X_train.shape[1]
# hidden_size = 128
#
# # Learning parameters
# learning_rate = float(sys.argv[1]) if len(sys.argv) > 1 else 0.001
# weight_decay = float(sys.argv[2]) if len(sys.argv) > 2 else 0.001
# num_epochs = int(sys.argv[3]) if len(sys.argv) > 3 else 1000
#
# # Log parameters to mlflow
# mlflow.log_param("hidden_size", hidden_size)
# mlflow.log_param("learning_rate", learning_rate)
# mlflow.log_param("weight_decay", weight_decay)
# mlflow.log_param("num_epochs", num_epochs)
#
# # Model initialization
# model = NeuralNetwork(input_size, hidden_size)
#
# # Loss function and optimizer
# criterion = nn.BCELoss()
# optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
#
# # Training loop
# model.train()
#
# for epoch in range(num_epochs):
# # Zero the gradients
# optimizer.zero_grad()
#
# # Forward pass
# outputs = model(X_train)
#
# # Compute loss
# loss = criterion(outputs, y_train)
#
# # Backward pass
# loss.backward()
#
# # Update weights
# optimizer.step()
#
# # Print loss
# if (epoch + 1) % 100 == 0:
# print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')
#
# # Test the model
# model.eval()
#
# with torch.no_grad():
#
# # Make predictions
# y_pred = model(X_test)
# y_pred = np.where(y_pred > 0.5, 1, 0)
#
# # Calculate metrics
# accuracy = accuracy_score(y_test, y_pred)
# precision = precision_score(y_test, y_pred)
# recall = recall_score(y_test, y_pred)
# f1 = f1_score(y_test, y_pred)
#
# # Log metrics to mlflow
# mlflow.log_metric("accuracy", accuracy)
# mlflow.log_metric("precision", precision)
# mlflow.log_metric("recall", recall)
# mlflow.log_metric("f1", f1)
#
# # If directory models does not exist, create it
# if not os.path.exists('./models'):
# os.makedirs('./models')
#
# # Save the model
# torch.save(model, './models/model.pth')