Model evaluation.
This commit is contained in:
parent
048e489250
commit
451c85755d
43
MLEvaluate/Jenkinsfile
vendored
Normal file
43
MLEvaluate/Jenkinsfile
vendored
Normal file
@ -0,0 +1,43 @@
|
||||
pipeline {
|
||||
agent any
|
||||
parameters {
|
||||
buildSelector(
|
||||
name: 'BUILD_SELECTOR',
|
||||
defaultSelector: lastSuccessful(),
|
||||
description: 'A build to take the artifacts from'
|
||||
)
|
||||
string(
|
||||
name: 'EPOCHS',
|
||||
description: 'Number of epochs',
|
||||
defaultValue: '10'
|
||||
)
|
||||
}
|
||||
stages {
|
||||
stage('Copy artifacts') {
|
||||
steps {
|
||||
script {
|
||||
copyArtifacts(
|
||||
projectName: 'z-s487179-training',
|
||||
selector: buildParameter('BUILD_SELECTOR'),
|
||||
target: './MLEvaluate'
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Run training and save model') {
|
||||
steps {
|
||||
script {
|
||||
sh 'ls -l'
|
||||
docker.image('docker-image').inside {
|
||||
dir('./MLEvaluate') {
|
||||
sh 'ls -l'
|
||||
sh 'python3 ./model_test.py'
|
||||
archiveArtifacts 'plot.png'
|
||||
archiveArtifacts 'metrics.csv'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
82
MLEvaluate/model_test.py
Normal file
82
MLEvaluate/model_test.py
Normal file
@ -0,0 +1,82 @@
|
||||
import torch
|
||||
import sys
|
||||
sys.path.append("../MLTrain/")
|
||||
from model_train import MyNeuralNetwork, load_data
|
||||
from torch.utils.data import DataLoader
|
||||
import csv
|
||||
import os
|
||||
import matplotlib.pyplot as plt
|
||||
from typing import Tuple, List
|
||||
|
||||
def evaluate_model() -> Tuple[List[float], float]:
|
||||
model: MyNeuralNetwork = MyNeuralNetwork()
|
||||
model.load_state_dict(torch.load('model.pt'))
|
||||
model.eval()
|
||||
test_dataset = load_data("home_loan_test.csv")
|
||||
batch_size: int = 32
|
||||
test_dataloader: DataLoader = DataLoader(test_dataset, batch_size=batch_size)
|
||||
predictions = []
|
||||
labels = []
|
||||
get_label = lambda pred: 1 if pred >= 0.5 else 0
|
||||
total = 0
|
||||
correct = 0
|
||||
with torch.no_grad():
|
||||
for batch_data, batch_labels in test_dataloader:
|
||||
batch_predictions = model(batch_data)
|
||||
predicted_batch_labels = [get_label(prediction) for prediction in batch_predictions]
|
||||
total += len(predicted_batch_labels)
|
||||
batch_labels_list = list(map(int,batch_labels.tolist()))
|
||||
correct += sum(x == y for x, y in zip(predicted_batch_labels, batch_labels_list))
|
||||
predictions.extend(batch_predictions)
|
||||
labels.extend(batch_labels)
|
||||
accuracy = correct/total
|
||||
return predictions, accuracy
|
||||
|
||||
def save_predictions(predictions: list[float]) -> None:
|
||||
filename = "results.csv"
|
||||
column_name = "predict"
|
||||
with open(filename, 'w', newline='') as file:
|
||||
writer = csv.writer(file)
|
||||
writer.writerow([column_name])
|
||||
for result in predictions:
|
||||
loan_decision = 1 if result.item() > 0.5 else 0
|
||||
writer.writerow([loan_decision])
|
||||
|
||||
def save_accuracy(accuracy: float) -> None:
|
||||
filename = 'metrics.csv'
|
||||
if os.path.exists(filename):
|
||||
with open(filename, 'a') as file:
|
||||
writer = csv.writer(file)
|
||||
writer.writerow([accuracy])
|
||||
else:
|
||||
with open(filename, 'w') as file:
|
||||
writer = csv.writer(file)
|
||||
writer.writerow(['accuracy'])
|
||||
writer.writerow([accuracy])
|
||||
|
||||
def plot_accuracy() -> None:
|
||||
filename = 'metrics.csv'
|
||||
accuracy_results = []
|
||||
if os.path.exists(filename):
|
||||
with open(filename, 'r') as file:
|
||||
reader = csv.reader(file)
|
||||
for idx, row in enumerate(reader):
|
||||
if idx == 0:
|
||||
continue
|
||||
accuracy_results.append(float(row[0]))
|
||||
iterations = list(map(str,range(1, len(accuracy_results)+1)))
|
||||
plt.plot(iterations, accuracy_results)
|
||||
plt.xlabel('build')
|
||||
plt.ylabel('accuracy')
|
||||
plt.title("Accuracies over builds.")
|
||||
plt.savefig("plot.png")
|
||||
|
||||
def main() -> None:
|
||||
predictions, accuracy = evaluate_model()
|
||||
save_predictions(predictions)
|
||||
save_accuracy(accuracy)
|
||||
plot_accuracy()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
47
MLTrain/Jenkinsfile
vendored
Normal file
47
MLTrain/Jenkinsfile
vendored
Normal file
@ -0,0 +1,47 @@
|
||||
pipeline {
|
||||
agent any
|
||||
parameters {
|
||||
buildSelector(
|
||||
name: 'BUILD_SELECTOR',
|
||||
defaultSelector: lastSuccessful(),
|
||||
description: 'A build to take the artifacts from'
|
||||
)
|
||||
string(
|
||||
name: 'EPOCHS',
|
||||
description: 'Number of epochs',
|
||||
defaultValue: '10'
|
||||
)
|
||||
}
|
||||
stages {
|
||||
stage('Copy artifacts') {
|
||||
steps {
|
||||
script {
|
||||
copyArtifacts(
|
||||
projectName: 'z-s487179-create-dataset',
|
||||
selector: buildParameter('BUILD_SELECTOR'),
|
||||
target: './MLTrain'
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Run training and save model') {
|
||||
steps {
|
||||
script {
|
||||
sh 'ls -l'
|
||||
docker.image('docker-image').inside {
|
||||
dir('./MLTrain') {
|
||||
sh 'ls -l'
|
||||
sh 'python3 ./model_train.py'
|
||||
archiveArtifacts 'model.pt'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
post {
|
||||
success {
|
||||
build job: 'z-s487179-evaluation.eg/main', propagate: false, wait: false
|
||||
}
|
||||
}
|
||||
}
|
BIN
MLTrain/__pycache__/model_train.cpython-311.pyc
Normal file
BIN
MLTrain/__pycache__/model_train.cpython-311.pyc
Normal file
Binary file not shown.
113
MLTrain/model_train.py
Normal file
113
MLTrain/model_train.py
Normal file
@ -0,0 +1,113 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from torch.utils.data import DataLoader, TensorDataset
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.preprocessing import LabelBinarizer
|
||||
import numpy as np
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
from sklearn.model_selection import train_test_split
|
||||
import argparse
|
||||
|
||||
|
||||
class MyNeuralNetwork(nn.Module):
|
||||
def __init__(self, *args, **kwargs) -> None:
|
||||
super(MyNeuralNetwork, self).__init__(*args, **kwargs)
|
||||
self.fc1 = nn.Linear(12, 64)
|
||||
self.relu = nn.ReLU()
|
||||
self.fc1 = nn.Linear(12, 64)
|
||||
self.relu = nn.ReLU()
|
||||
self.fc2 = nn.Linear(64, 1)
|
||||
self.sigmoid = nn.Sigmoid()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.fc1(x)
|
||||
x = self.relu(x)
|
||||
x = self.fc2(x)
|
||||
x = self.sigmoid(x)
|
||||
return x
|
||||
|
||||
def prepare_df_for_nn(df: pd.DataFrame):
|
||||
|
||||
id_column_name_list: list[str] = [column for column in df.columns.to_list() if 'id' in column.lower()]
|
||||
if len(id_column_name_list) == 0:
|
||||
pass
|
||||
else:
|
||||
df.drop(id_column_name_list[0], inplace=True, axis=1)
|
||||
encoder: LabelBinarizer = LabelBinarizer()
|
||||
df.reset_index(inplace=True)
|
||||
for column in df.columns:
|
||||
if str(df[column].dtype).lower() == 'object':
|
||||
encoded_column: np.ndarray = encoder.fit_transform(df[column])
|
||||
df[column] = pd.Series(encoded_column.flatten(), dtype=pd.Int16Dtype)
|
||||
return df
|
||||
|
||||
def load_data(path: str):
|
||||
df: pd.DataFrame = pd.read_csv(path)
|
||||
train_dataset: pd.DataFrame = prepare_df_for_nn(df)
|
||||
x: np.ndarray = train_dataset.iloc[:, :-1].values.astype(float)
|
||||
y: np.ndarray = train_dataset.iloc[:, -1].values.astype(float)
|
||||
x_tensor: torch.Tensor = torch.tensor(x, dtype=torch.float32)
|
||||
y_tensor: torch.Tensor = torch.tensor(y, dtype=torch.float32)
|
||||
dataset: TensorDataset = TensorDataset(x_tensor, y_tensor)
|
||||
return dataset
|
||||
|
||||
def train(epochs: int, dataloader_train: DataLoader, dataloader_val: DataLoader):
|
||||
model: MyNeuralNetwork = MyNeuralNetwork()
|
||||
criterion: nn.BCELoss = nn.BCELoss()
|
||||
optimizer = optim.Adam(model.parameters(), lr=0.001)
|
||||
for epoch in range(epochs):
|
||||
total_correct_train = 0
|
||||
total_samples_train = 0
|
||||
total_correct_val = 0
|
||||
total_samples_val = 0
|
||||
|
||||
for inputs, labels in dataloader_train:
|
||||
outputs = model(inputs)
|
||||
labels = labels.reshape((labels.shape[0], 1))
|
||||
loss = criterion(outputs, labels)
|
||||
predicted_labels = (outputs > 0.5).float()
|
||||
total_correct_train += (predicted_labels == labels).sum().item()
|
||||
total_samples_train += labels.size(0)
|
||||
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
with torch.no_grad():
|
||||
for inputs, labels in dataloader_val:
|
||||
outputs_val = model(inputs)
|
||||
predicted_labels_val = (outputs_val > 0.5).float()
|
||||
labels = labels.reshape((labels.shape[0], 1))
|
||||
total_correct_val += (predicted_labels_val == labels).sum().item()
|
||||
total_samples_val += labels.size(0)
|
||||
|
||||
accuracy_val = total_correct_val / total_samples_val
|
||||
accuracy_train = total_correct_train / total_samples_train
|
||||
print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}, Accuracy train: {accuracy_train:.4f}, Accuracy val: {accuracy_val:.4f}")
|
||||
|
||||
return model
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description='A test program.')
|
||||
parser.add_argument("--epochs", help="Prints the supplied argument.", default='10')
|
||||
args = parser.parse_args()
|
||||
config = vars(args)
|
||||
epochs = int(config["epochs"])
|
||||
|
||||
train_dataset = load_data("home_loan_train.csv")
|
||||
val_dataset = load_data("home_loan_val.csv")
|
||||
|
||||
batch_size: int = 32
|
||||
dataloader_train = DataLoader(train_dataset, batch_size = batch_size, shuffle=True)
|
||||
dataloader_val = DataLoader(val_dataset, batch_size = batch_size)
|
||||
|
||||
model = train(epochs, dataloader_train, dataloader_val)
|
||||
torch.save(model.state_dict(), 'model.pt')
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user