235 lines
6.4 KiB
Python
235 lines
6.4 KiB
Python
import numpy as np
|
|
import pandas as pd
|
|
import torch
|
|
import torch.nn as nn
|
|
import torch.optim as optim
|
|
|
|
import mlflow
|
|
|
|
import os
|
|
import sys
|
|
import inspect
|
|
|
|
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
|
|
|
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
|
parentdir = os.path.dirname(currentdir)
|
|
sys.path.insert(0, parentdir)
|
|
|
|
from NeuralNetwork import NeuralNetwork
|
|
|
|
# MLflow tracking URI
|
|
mlflow.set_tracking_uri("http://localhost:5000")
|
|
|
|
# Create mlflow experiment if not exists
|
|
experiment = mlflow.get_experiment_by_name("s464863")
|
|
|
|
if experiment is None:
|
|
mlflow.create_experiment("s464863")
|
|
|
|
# Set active mlflow experiment
|
|
mlflow.set_experiment("s464863")
|
|
|
|
# MLflow experiment
|
|
client = mlflow.tracking.MlflowClient()
|
|
run = client.create_run(experiment_id=experiment.experiment_id)
|
|
run = mlflow.start_run(run_id=run.info.run_id)
|
|
|
|
# Seed for reproducibility
|
|
torch.manual_seed(1234)
|
|
|
|
# Get absolute path
|
|
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
|
parentdir = os.path.dirname(currentdir)
|
|
train_path = os.path.join(parentdir, 'datasets/train.csv')
|
|
test_path = os.path.join(parentdir, 'datasets/test.csv')
|
|
|
|
# Load data
|
|
train_data = pd.read_csv(train_path)
|
|
test_data = pd.read_csv(test_path)
|
|
|
|
# Split data
|
|
X_train = train_data.drop(columns=['id', 'diagnosis']).values
|
|
y_train = train_data['diagnosis'].values
|
|
|
|
X_test = test_data.drop(columns=['id', 'diagnosis']).values
|
|
y_test = test_data['diagnosis'].values
|
|
|
|
# Convert data to PyTorch tensors
|
|
X_train = torch.FloatTensor(X_train)
|
|
y_train = torch.FloatTensor(y_train).view(-1, 1)
|
|
|
|
X_test = torch.FloatTensor(X_test)
|
|
y_test = torch.FloatTensor(y_test).view(-1, 1)
|
|
|
|
# Parameters
|
|
input_size = X_train.shape[1]
|
|
hidden_size = 128
|
|
|
|
# Learning parameters
|
|
learning_rate = float(sys.argv[1]) if len(sys.argv) > 1 else 0.001
|
|
weight_decay = float(sys.argv[2]) if len(sys.argv) > 2 else 0.001
|
|
num_epochs = int(sys.argv[3]) if len(sys.argv) > 3 else 1000
|
|
|
|
# Log parameters to mlflow
|
|
mlflow.log_param("hidden_size", hidden_size)
|
|
mlflow.log_param("learning_rate", learning_rate)
|
|
mlflow.log_param("weight_decay", weight_decay)
|
|
mlflow.log_param("num_epochs", num_epochs)
|
|
|
|
# Model initialization
|
|
model = NeuralNetwork(input_size, hidden_size)
|
|
|
|
# Loss function and optimizer
|
|
criterion = nn.BCELoss()
|
|
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
|
|
|
|
# Training loop
|
|
model.train()
|
|
|
|
for epoch in range(num_epochs):
|
|
# Zero the gradients
|
|
optimizer.zero_grad()
|
|
|
|
# Forward pass
|
|
outputs = model(X_train)
|
|
|
|
# Compute loss
|
|
loss = criterion(outputs, y_train)
|
|
|
|
# Backward pass
|
|
loss.backward()
|
|
|
|
# Update weights
|
|
optimizer.step()
|
|
|
|
# Print loss
|
|
if (epoch + 1) % 100 == 0:
|
|
print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')
|
|
|
|
# Test the model
|
|
model.eval()
|
|
|
|
with torch.no_grad():
|
|
|
|
# Make predictions
|
|
y_pred = model(X_test)
|
|
y_pred = np.where(y_pred > 0.5, 1, 0)
|
|
|
|
# Calculate metrics
|
|
accuracy = accuracy_score(y_test, y_pred)
|
|
precision = precision_score(y_test, y_pred)
|
|
recall = recall_score(y_test, y_pred)
|
|
f1 = f1_score(y_test, y_pred)
|
|
|
|
# Log metrics to mlflow
|
|
mlflow.log_metric("accuracy", accuracy)
|
|
mlflow.log_metric("precision", precision)
|
|
mlflow.log_metric("recall", recall)
|
|
mlflow.log_metric("f1", f1)
|
|
|
|
# If directory models does not exist, create it
|
|
if not os.path.exists('./models'):
|
|
os.makedirs('./models')
|
|
|
|
# Save the model
|
|
torch.save(model, './models/model.pth')
|
|
|
|
# End mlflow run
|
|
mlflow.end_run()
|
|
|
|
# # MLflow experiment
|
|
# with mlflow.start_run() as run:
|
|
# # Seed for reproducibility
|
|
# torch.manual_seed(1234)
|
|
#
|
|
# # Load data
|
|
# train_data = pd.read_csv('../datasets/train.csv')
|
|
# test_data = pd.read_csv('../datasets/test.csv')
|
|
#
|
|
# # Split data
|
|
# X_train = train_data.drop(columns=['id', 'diagnosis']).values
|
|
# y_train = train_data['diagnosis'].values
|
|
#
|
|
# X_test = test_data.drop(columns=['id', 'diagnosis']).values
|
|
# y_test = test_data['diagnosis'].values
|
|
#
|
|
# # Convert data to PyTorch tensors
|
|
# X_train = torch.FloatTensor(X_train)
|
|
# y_train = torch.FloatTensor(y_train).view(-1, 1)
|
|
#
|
|
# X_test = torch.FloatTensor(X_test)
|
|
# y_test = torch.FloatTensor(y_test).view(-1, 1)
|
|
#
|
|
# # Parameters
|
|
# input_size = X_train.shape[1]
|
|
# hidden_size = 128
|
|
#
|
|
# # Learning parameters
|
|
# learning_rate = float(sys.argv[1]) if len(sys.argv) > 1 else 0.001
|
|
# weight_decay = float(sys.argv[2]) if len(sys.argv) > 2 else 0.001
|
|
# num_epochs = int(sys.argv[3]) if len(sys.argv) > 3 else 1000
|
|
#
|
|
# # Log parameters to mlflow
|
|
# mlflow.log_param("hidden_size", hidden_size)
|
|
# mlflow.log_param("learning_rate", learning_rate)
|
|
# mlflow.log_param("weight_decay", weight_decay)
|
|
# mlflow.log_param("num_epochs", num_epochs)
|
|
#
|
|
# # Model initialization
|
|
# model = NeuralNetwork(input_size, hidden_size)
|
|
#
|
|
# # Loss function and optimizer
|
|
# criterion = nn.BCELoss()
|
|
# optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
|
|
#
|
|
# # Training loop
|
|
# model.train()
|
|
#
|
|
# for epoch in range(num_epochs):
|
|
# # Zero the gradients
|
|
# optimizer.zero_grad()
|
|
#
|
|
# # Forward pass
|
|
# outputs = model(X_train)
|
|
#
|
|
# # Compute loss
|
|
# loss = criterion(outputs, y_train)
|
|
#
|
|
# # Backward pass
|
|
# loss.backward()
|
|
#
|
|
# # Update weights
|
|
# optimizer.step()
|
|
#
|
|
# # Print loss
|
|
# if (epoch + 1) % 100 == 0:
|
|
# print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}')
|
|
#
|
|
# # Test the model
|
|
# model.eval()
|
|
#
|
|
# with torch.no_grad():
|
|
#
|
|
# # Make predictions
|
|
# y_pred = model(X_test)
|
|
# y_pred = np.where(y_pred > 0.5, 1, 0)
|
|
#
|
|
# # Calculate metrics
|
|
# accuracy = accuracy_score(y_test, y_pred)
|
|
# precision = precision_score(y_test, y_pred)
|
|
# recall = recall_score(y_test, y_pred)
|
|
# f1 = f1_score(y_test, y_pred)
|
|
#
|
|
# # Log metrics to mlflow
|
|
# mlflow.log_metric("accuracy", accuracy)
|
|
# mlflow.log_metric("precision", precision)
|
|
# mlflow.log_metric("recall", recall)
|
|
# mlflow.log_metric("f1", f1)
|
|
#
|
|
# # If directory models does not exist, create it
|
|
# if not os.path.exists('./models'):
|
|
# os.makedirs('./models')
|
|
#
|
|
# # Save the model
|
|
# torch.save(model, './models/model.pth') |