mlflow
This commit is contained in:
parent
8ab682be76
commit
ed9927d7a1
13
mlflow/MLProject
Normal file
13
mlflow/MLProject
Normal file
@ -0,0 +1,13 @@
|
||||
name: mlflow_464914
|
||||
|
||||
conda_env: conda.yaml #ścieżka do pliku conda.yaml z definicją środowisk
|
||||
# docker_env:
|
||||
# image: mlflow-docker-example-environment
|
||||
|
||||
entry_points:
|
||||
main:
|
||||
parameters:
|
||||
epochs: {type: int, default: 10}
|
||||
command: "python mlflow_model.py {epochs}"
|
||||
test:
|
||||
command: "python mlflow_prediction.py"
|
14
mlflow/conda,yaml
Normal file
14
mlflow/conda,yaml
Normal file
@ -0,0 +1,14 @@
|
||||
name: mlflow_464914
|
||||
channels:
|
||||
- defaults
|
||||
dependencies:
|
||||
- python=3.6 #Te zależności będą zainstalowane za pomocą conda isntall
|
||||
- pip
|
||||
- pip: #Te ząś za pomocą pip install
|
||||
- scikit-learn==0.23.2
|
||||
- mlflow>=1.0
|
||||
- kaggle
|
||||
- pandas
|
||||
- numpy
|
||||
- torch
|
||||
|
120
mlflow/mlflow_model.py
Normal file
120
mlflow/mlflow_model.py
Normal file
@ -0,0 +1,120 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from torch.utils.data import DataLoader, Dataset
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
import torch.nn.functional as F
|
||||
import mlflow
|
||||
import mlflow.sklearn
|
||||
import sys
|
||||
|
||||
mlflow.set_tracking_uri("http://localhost:5000")
|
||||
mlflow.set_experiment("s464914")
|
||||
|
||||
|
||||
device = (
|
||||
"cuda"
|
||||
if torch.cuda.is_available()
|
||||
else "cpu"
|
||||
)
|
||||
|
||||
class Model(nn.Module):
|
||||
def __init__(self, input_features=54, hidden_layer1=25, hidden_layer2=30, output_features=8):
|
||||
super().__init__()
|
||||
self.fc1 = nn.Linear(input_features,output_features)
|
||||
self.bn1 = nn.BatchNorm1d(hidden_layer1) # Add batch normalization
|
||||
self.fc2 = nn.Linear(hidden_layer1, hidden_layer2)
|
||||
self.bn2 = nn.BatchNorm1d(hidden_layer2) # Add batch normalization
|
||||
self.out = nn.Linear(hidden_layer2, output_features)
|
||||
|
||||
def forward(self, x):
|
||||
x = F.relu(self.fc1(x)) # Apply batch normalization after first linear layer
|
||||
#x = F.relu(self.bn2(self.fc2(x))) # Apply batch normalization after second linear layer
|
||||
#x = self.out(x)
|
||||
return x
|
||||
|
||||
def main():
|
||||
epochs = int(sys.argv[1])
|
||||
forest_train = pd.read_csv('forest_train.csv')
|
||||
forest_val = pd.read_csv('forest_val.csv')
|
||||
|
||||
print(forest_train.head())
|
||||
|
||||
|
||||
X_train = forest_train.drop(columns=['Cover_Type']).values
|
||||
y_train = forest_train['Cover_Type'].values
|
||||
|
||||
X_val = forest_val.drop(columns=['Cover_Type']).values
|
||||
y_val = forest_val['Cover_Type'].values
|
||||
|
||||
|
||||
# Initialize model, loss function, and optimizer
|
||||
model = Model().to(device)
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = optim.Adam(model.parameters(), lr=0.001)
|
||||
|
||||
# Convert to PyTorch tensors
|
||||
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
|
||||
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
|
||||
X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
|
||||
y_val = torch.tensor(y_val, dtype=torch.long).to(device)
|
||||
|
||||
# Create DataLoader
|
||||
train_loader = DataLoader(list(zip(X_train, y_train)), batch_size=64, shuffle=True)
|
||||
val_loader = DataLoader(list(zip(X_val, y_val)), batch_size=64)
|
||||
|
||||
with mlflow.start_run() as run:
|
||||
# Training loop
|
||||
for epoch in range(epochs):
|
||||
model.train() # Set model to training mode
|
||||
running_loss = 0.0
|
||||
for inputs, labels in train_loader:
|
||||
inputs, labels = inputs.to(device), labels.to(device)
|
||||
|
||||
optimizer.zero_grad()
|
||||
|
||||
outputs = model(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
running_loss += loss.item() * inputs.size(0)
|
||||
|
||||
# Calculate training loss
|
||||
epoch_loss = running_loss / len(train_loader.dataset)
|
||||
|
||||
# Validation
|
||||
model.eval() # Set model to evaluation mode
|
||||
val_running_loss = 0.0
|
||||
correct = 0
|
||||
total = 0
|
||||
with torch.no_grad():
|
||||
for inputs, labels in val_loader:
|
||||
inputs, labels = inputs.to(device), labels.to(device)
|
||||
|
||||
outputs = model(inputs)
|
||||
val_loss = criterion(outputs, labels)
|
||||
val_running_loss += val_loss.item() * inputs.size(0)
|
||||
|
||||
_, predicted = torch.max(outputs, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted == labels).sum().item()
|
||||
|
||||
# Calculate validation loss and accuracy
|
||||
val_epoch_loss = val_running_loss / len(val_loader.dataset)
|
||||
val_accuracy = correct / total
|
||||
|
||||
print(f"Epoch {epoch+1}/{epochs}, "
|
||||
f"Train Loss: {epoch_loss:.4f}, "
|
||||
f"Val Loss: {val_epoch_loss:.4f}, "
|
||||
f"Val Accuracy: {val_accuracy:.4f}")
|
||||
|
||||
|
||||
torch.save(model.state_dict(), 'model.pth')
|
||||
mlflow.log_param("epochs", epochs)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
95
mlflow/mlflow_prediction.py
Normal file
95
mlflow/mlflow_prediction.py
Normal file
@ -0,0 +1,95 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from torch.utils.data import DataLoader, Dataset
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
import torch.nn.functional as F
|
||||
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error
|
||||
import numpy as np
|
||||
import mlflow
|
||||
import mlflow.sklearn
|
||||
|
||||
mlflow.set_tracking_uri("http://localhost:5000")
|
||||
mlflow.set_experiment("s464914")
|
||||
|
||||
device = (
|
||||
"cuda"
|
||||
if torch.cuda.is_available()
|
||||
else "cpu"
|
||||
)
|
||||
|
||||
class Model(nn.Module):
|
||||
def __init__(self, input_features=54, hidden_layer1=25, hidden_layer2=30, output_features=8):
|
||||
super().__init__()
|
||||
self.fc1 = nn.Linear(input_features,output_features)
|
||||
self.bn1 = nn.BatchNorm1d(hidden_layer1) # Add batch normalization
|
||||
self.fc2 = nn.Linear(hidden_layer1, hidden_layer2)
|
||||
self.bn2 = nn.BatchNorm1d(hidden_layer2) # Add batch normalization
|
||||
self.out = nn.Linear(hidden_layer2, output_features)
|
||||
|
||||
def forward(self, x):
|
||||
x = F.relu(self.fc1(x))
|
||||
return x
|
||||
|
||||
def load_model(model, model_path):
|
||||
model.load_state_dict(torch.load(model_path))
|
||||
model.eval()
|
||||
|
||||
def predict(model, input_data):
|
||||
# Convert input data to PyTorch tensor
|
||||
|
||||
# Perform forward pass
|
||||
with torch.no_grad():
|
||||
output = model(input_data)
|
||||
|
||||
_, predicted_class = torch.max(output, 0)
|
||||
|
||||
return predicted_class.item() # Return the predicted class label
|
||||
|
||||
def main():
|
||||
with mlflow.start_run() as run:
|
||||
forest_test = pd.read_csv('forest_test.csv')
|
||||
|
||||
X_test = forest_test.drop(columns=['Cover_Type']).values
|
||||
y_test = forest_test['Cover_Type'].values
|
||||
|
||||
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
|
||||
|
||||
model = Model().to(device)
|
||||
model_path = 'model.pth' # Path to your saved model file
|
||||
load_model(model, model_path)
|
||||
|
||||
predictions = []
|
||||
true_labels = []
|
||||
with torch.no_grad():
|
||||
for input_data, target in zip(X_test, y_test):
|
||||
output = model(input_data)
|
||||
_, predicted_class = torch.max(output, 0)
|
||||
prediction_entry = f"predicted: {predicted_class.item()} true_label: {target}"
|
||||
predictions.append(prediction_entry)
|
||||
true_labels.append()
|
||||
if predicted_class.item() == target:
|
||||
true_labels.append(target)
|
||||
|
||||
|
||||
with open(r'predictions.txt', 'w') as fp:
|
||||
for item in predictions:
|
||||
# write each item on a new line
|
||||
fp.write("%s\n" % item)
|
||||
|
||||
accuracy = accuracy_score(true_labels, predictions)
|
||||
precision_micro = precision_score(true_labels, predictions, average='micro')
|
||||
recall_micro = recall_score(true_labels, predictions, average='micro')
|
||||
f1_micro = f1_score(true_labels, predictions, average='micro')
|
||||
rmse = np.sqrt(mean_squared_error(true_labels, predictions))
|
||||
|
||||
mlflow.log_metric("accuracy", accuracy)
|
||||
mlflow.log_metric("precision_micro", precision_micro)
|
||||
mlflow.log_metric("recall_micro", recall_micro)
|
||||
mlflow.log_metric("f1_micro", f1_micro)
|
||||
mlflow.log_metric("rmse", rmse)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue
Block a user