Add mlflow files

This commit is contained in:
Agata 2022-05-15 14:22:54 +02:00
parent 908d2a053a
commit f6f45ee75e
3 changed files with 145 additions and 4 deletions

View File

@ -6,12 +6,10 @@ RUN pip3 install seaborn
RUN pip3 install ipython
RUN pip3 install torch
RUN pip3 install numpy
RUN pip3 install sacred
RUN pip3 install GitPython
RUN pip3 install pymongo
RUN pip3 install mlflow
WORKDIR /app
COPY ./training.py ./
COPY ./training_sacred.py ./
COPY ./training_mlflow.py ./
COPY ./evaluation.py ./

12
MLproject Normal file
View File

@ -0,0 +1,12 @@
name: s444421
docker_env:
image: agakul/ium:mlflow
entry_points:
main:
parameters:
epochs: {type: float, default: 1000}
command: "python training_mlflow.py {epochs}"
test:
command: "python evaluation.py"

131
training_mlflow.py Normal file
View File

@ -0,0 +1,131 @@
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
import torch
from torch import nn, optim
import torch.nn.functional as F
import sys
import mlflow
from urllib.parse import urlparse
# In[ ]:
mlflow.set_experiment("s444421")
# In[ ]:
epochs = int(sys.argv[1])
# In[ ]:
def prepare_data():
X_train = pd.read_csv('X_train.csv')
y_train = pd.read_csv('y_train.csv')
X_train = torch.from_numpy(np.array(X_train)).float()
y_train = torch.squeeze(torch.from_numpy(y_train.values).float())
return X_train, y_train
# In[ ]:
class Net(nn.Module):
def __init__(self, n_features):
super(Net, self).__init__()
self.fc1 = nn.Linear(n_features, 5)
self.fc2 = nn.Linear(5, 3)
self.fc3 = nn.Linear(3, 1)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
return torch.sigmoid(self.fc3(x))
# In[ ]:
def calculate_accuracy(y_true, y_pred):
predicted = y_pred.ge(.5).view(-1)
return (y_true == predicted).sum().float() / len(y_true)
# In[ ]:
def round_tensor(t, decimal_places=3):
return round(t.item(), decimal_places)
# In[ ]:
def train_model(X_train, y_train, device, epochs):
net = Net(X_train.shape[1])
criterion = nn.BCELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
X_train = X_train.to(device)
y_train = y_train.to(device)
net = net.to(device)
criterion = criterion.to(device)
for epoch in range(epochs):
y_pred = net(X_train)
y_pred = torch.squeeze(y_pred)
train_loss = criterion(y_pred, y_train)
if epoch % 100 == 0:
train_acc = calculate_accuracy(y_train, y_pred)
print(
f'''epoch {epoch}
Train set - loss: {round_tensor(train_loss)}, accuracy: {round_tensor(train_acc)}
''')
optimizer.zero_grad()
train_loss.backward()
optimizer.step()
return net, round_tensor(train_loss)
# In[ ]:
def my_main(epochs):
X_train, y_train = prepare_data()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model, loss = train_model(X_train, y_train, device, epochs)
torch.save(model, 'model.pth')
mlflow.log_param("epochs", epochs)
mlflow.log_metric("loss", loss)
X_test = pd.read_csv('X_test.csv')
X_test = torch.from_numpy(np.array(X_test)).float()
X_test = X_test.to(device)
y_pred = model(X_test)
y_pred = y_pred.ge(.5).view(-1).cpu()
signature = mlflow.models.signature.infer_signature(X_train.numpy(), np.array(y_pred))
tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
if tracking_url_type_store != "file":
mlflow.sklearn.log_model(model, "s444421", registered_model_name="s444421", signature=signature)
else:
mlflow.sklearn.log_model(model, "s444421", signature=signature)
# In[ ]:
with mlflow.start_run() as run:
print("MLflow run experiment_id: {0}".format(run.info.experiment_id))
print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri))
my_main(epochs)