save model
This commit is contained in:
parent
518f414734
commit
7a446f1753
2
.gitignore
vendored
2
.gitignore
vendored
@ -154,4 +154,4 @@ fabric.properties
|
||||
kaggle.json
|
||||
Car_Prices_Poland_Kaggle*
|
||||
CarPrices*
|
||||
|
||||
IUM08/*
|
||||
|
@ -13,13 +13,18 @@ pipeline {
|
||||
}
|
||||
stage('Train model with sacred') {
|
||||
steps {
|
||||
sh "python3 lab07_sacred.py with 'epochs=$epoch'"
|
||||
sh "python3 lab08_deepLearining_mlflow.py $epoch'"
|
||||
archiveArtifacts artifacts: 'games_model.pkl'
|
||||
archiveArtifacts artifacts: 'mlruns/**'
|
||||
archiveArtifacts artifacts: 'my_model/**'
|
||||
sh 'rm -r mlruns'
|
||||
sh 'rm -r my_model'
|
||||
}
|
||||
}
|
||||
}
|
||||
post {
|
||||
success {
|
||||
archiveArtifacts artifacts: 'prediction_results.csv, CarPrices_pytorch_model.pkl, s444507_sacred_FileObserver/**/*.*', followSymlinks: false
|
||||
archiveArtifacts artifacts: 'CarPrices_pytorch_model.pkl, mlruns/**, my_model/**', followSymlinks: false
|
||||
}
|
||||
always {
|
||||
emailext body: "${currentBuild.currentResult}", subject: 's444507-training', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
||||
|
28
Jenkinsfile_training_sacred_old
Normal file
28
Jenkinsfile_training_sacred_old
Normal file
@ -0,0 +1,28 @@
|
||||
pipeline {
|
||||
agent {
|
||||
docker { image 's444507_create_dataset_image:latest' }
|
||||
}
|
||||
parameters {
|
||||
string(name: 'epoch', defaultValue: '1000', description: 'Number of epochs to train model.')
|
||||
}
|
||||
stages {
|
||||
stage('Get arifacts') {
|
||||
steps {
|
||||
copyArtifacts fingerprintArtifacts: true, projectName: 's444507-create-dataset', selector: lastSuccessful()
|
||||
}
|
||||
}
|
||||
stage('Train model with sacred') {
|
||||
steps {
|
||||
sh "python3 lab07_sacred.py with 'epochs=$epoch'"
|
||||
}
|
||||
}
|
||||
}
|
||||
post {
|
||||
success {
|
||||
archiveArtifacts artifacts: 'prediction_results.csv, CarPrices_pytorch_model.pkl, s444507_sacred_FileObserver/**/*.*', followSymlinks: false
|
||||
}
|
||||
always {
|
||||
emailext body: "${currentBuild.currentResult}", subject: 's444507-training', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
||||
}
|
||||
}
|
||||
}
|
14
MLproject
14
MLproject
@ -1,13 +1,11 @@
|
||||
name: tutorial
|
||||
name: s444507
|
||||
|
||||
conda_env: conda.yaml #ścieżka do pliku conda.yaml z definicją środowiska
|
||||
|
||||
#docker_env:
|
||||
# image: mlflow-docker-example-environment
|
||||
docker_env:
|
||||
image: adamwojdyla
|
||||
volumes: ["/mlflow/tmp/mlruns"]
|
||||
|
||||
entry_points:
|
||||
main:
|
||||
parameters:
|
||||
epochs: {type: float, default: 0.5}
|
||||
l1_ratio: {type: float, default: 0.1}
|
||||
command: "python train.py {epochs}"
|
||||
epochs: {type: float, default: 1}
|
||||
command: "python3 lab08_deepLearining_mlflow.py {epochs}"
|
||||
|
@ -14,6 +14,7 @@ import os
|
||||
import matplotlib.pyplot as plt
|
||||
import json
|
||||
|
||||
|
||||
class Model(nn.Module):
|
||||
def __init__(self, input_dim):
|
||||
super(Model, self).__init__()
|
||||
@ -27,6 +28,7 @@ class Model(nn.Module):
|
||||
x = F.softmax(self.layer3(x)) # To check with the loss function
|
||||
return x
|
||||
|
||||
|
||||
def prepare_labels_features(dataset):
|
||||
""" Label make column"""
|
||||
le = preprocessing.LabelEncoder()
|
||||
|
@ -0,0 +1,164 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
from urllib.parse import urlparse
|
||||
import mlflow
|
||||
import numpy as np
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.autograd import Variable
|
||||
from sklearn.datasets import load_iris
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import accuracy_score, f1_score
|
||||
import torch.nn.functional as F
|
||||
import pandas as pd
|
||||
from sklearn import preprocessing
|
||||
import sys
|
||||
import logging
|
||||
import mlflow
|
||||
import mlflow.sklearn
|
||||
|
||||
logging.basicConfig(level=logging.WARN)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# mlflow.set_tracking_uri("http://localhost:5000/")
|
||||
mlflow.set_experiment("s444507")
|
||||
|
||||
|
||||
class Model(nn.Module):
|
||||
def __init__(self, input_dim):
|
||||
super(Model, self).__init__()
|
||||
self.layer1 = nn.Linear(input_dim, 100)
|
||||
self.layer2 = nn.Linear(100, 60)
|
||||
self.layer3 = nn.Linear(60, 5)
|
||||
|
||||
def forward(self, x):
|
||||
x = F.relu(self.layer1(x))
|
||||
x = F.relu(self.layer2(x))
|
||||
x = F.softmax(self.layer3(x)) # To check with the loss function
|
||||
return x
|
||||
|
||||
|
||||
def load_dataset_raw():
|
||||
""" Load data from .csv file. """
|
||||
cars = pd.read_csv('./Car_Prices_Poland_Kaggle.csv', usecols=[1, 4, 5, 6, 10], sep=',')
|
||||
return cars
|
||||
|
||||
|
||||
def load_dataset_files():
|
||||
""" Load shuffled, splitted dev and train files from .csv files. """
|
||||
|
||||
cars_dev = pd.read_csv('./Car_Prices_Poland_Kaggle_dev.csv', usecols=[1, 4, 5, 6, 10], sep=',', names= [str(i) for i in range(5)])
|
||||
cars_train = pd.read_csv('./Car_Prices_Poland_Kaggle_train.csv', usecols=[1, 4, 5, 6, 10], sep=',', names= [str(i) for i in range(5)])
|
||||
|
||||
return cars_dev, cars_train
|
||||
|
||||
|
||||
def remove_rows(data_dev, data_train):
|
||||
dev_removed_rows = data_dev.loc[(data_dev['0'] == 'audi') | (data_dev['0'] == 'bmw') | (data_dev['0'] == 'ford') | (data_dev['0'] == 'opel') | (data_dev['0'] == 'volkswagen')]
|
||||
train_removed_rows = data_train.loc[(data_train['0'] == 'audi') | (data_train['0'] == 'bmw') | (data_train['0'] == 'ford') | (data_train['0'] == 'opel') | (data_train['0'] == 'volkswagen')]
|
||||
|
||||
return dev_removed_rows, train_removed_rows
|
||||
|
||||
|
||||
def prepare_labels_features(dataset):
|
||||
""" Label make column"""
|
||||
le = preprocessing.LabelEncoder()
|
||||
mark_column = np.array(dataset[:]['0'])
|
||||
le.fit(mark_column)
|
||||
|
||||
print(list(le.classes_))
|
||||
lab = le.transform(mark_column)
|
||||
feat = dataset.drop(['0'], axis=1).to_numpy()
|
||||
|
||||
mm_scaler = preprocessing.MinMaxScaler()
|
||||
feat = mm_scaler.fit_transform(feat)
|
||||
|
||||
return lab, feat
|
||||
|
||||
|
||||
def my_main(epoch):
|
||||
print("Loading dataset...")
|
||||
dev, train = load_dataset_files()
|
||||
print("Dataset loaded")
|
||||
|
||||
print("Preparing dataset...")
|
||||
dev, train = remove_rows(dev, train)
|
||||
labels_train, features_train = prepare_labels_features(train)
|
||||
labels_test, features_test = prepare_labels_features(dev)
|
||||
print("Dataset prepared")
|
||||
|
||||
# Training
|
||||
model = Model(features_train.shape[1])
|
||||
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
|
||||
loss_fn = nn.CrossEntropyLoss()
|
||||
|
||||
# number of epochs is parametrized
|
||||
try:
|
||||
epochs_n = int(epoch)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("Setting default epochs value to 10.")
|
||||
epochs_n = 10
|
||||
|
||||
print(f"Number of epochs: {epochs_n}")
|
||||
mlflow.log_param("epochs", epochs_n)
|
||||
|
||||
print("Starting model training...")
|
||||
x_train, y_train = Variable(torch.from_numpy(features_train)).float(), Variable(torch.from_numpy(labels_train)).long()
|
||||
for epoch in range(1, epochs_n + 1):
|
||||
print("Epoch #", epoch)
|
||||
y_pred = model(x_train)
|
||||
loss = loss_fn(y_pred, y_train)
|
||||
print(f"The loss calculated: {loss}")
|
||||
|
||||
# Zero gradients
|
||||
optimizer.zero_grad()
|
||||
loss.backward() # Gradients
|
||||
optimizer.step() # Update
|
||||
print("Model training finished")
|
||||
|
||||
x_test = Variable(torch.from_numpy(features_test)).float()
|
||||
pred = model(x_test)
|
||||
pred = pred.detach().numpy()
|
||||
print(f"The accuracy metric is: {accuracy_score(labels_test, np.argmax(pred, axis=1))}")
|
||||
|
||||
accuracy = accuracy_score(labels_test, np.argmax(pred, axis=1))
|
||||
f1 = f1_score(labels_test, np.argmax(pred, axis=1), average='weighted')
|
||||
|
||||
mlflow.log_metric("accuracy", accuracy)
|
||||
mlflow.log_metric("f1", f1)
|
||||
|
||||
# Infer model signature to log it
|
||||
signature = mlflow.models.signature.infer_signature(features_train, labels_train)
|
||||
tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
|
||||
|
||||
if tracking_url_type_store != "file":
|
||||
mlflow.pytorch.log_model(model, "model", registered_model_name="s444507", signature=signature,
|
||||
input_example=features_train)
|
||||
else:
|
||||
mlflow.pytorch.log_model(model, "model", signature=signature, input_example=features_train)
|
||||
mlflow.pytorch.save_model(model, "my_model", signature=signature, input_example=features_train)
|
||||
|
||||
print("Saving model to file...")
|
||||
torch.save(model, "CarPrices_pytorch_model.pkl")
|
||||
print("Model saved with name: CarPrices_pytorch_model.pkl")
|
||||
|
||||
saved_model = torch.load("CarPrices_pytorch_model.pkl")
|
||||
print(np.argmax(saved_model(x_test[0]).detach().numpy(), axis=0))
|
||||
|
||||
pd_predictions = pd.DataFrame(pred)
|
||||
pd_predictions.to_csv("./prediction_results.csv")
|
||||
|
||||
|
||||
try:
|
||||
epochs = int(sys.argv[1])
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("Setting default epochs value to 1000.")
|
||||
epochs = 100
|
||||
|
||||
with mlflow.start_run() as run:
|
||||
print("MLflow run experiment_id: {0}".format(run.info.experiment_id))
|
||||
print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri))
|
||||
my_main(epochs)
|
||||
|
Loading…
Reference in New Issue
Block a user