This commit is contained in:
wojciechbatruszewicz 2023-06-26 19:38:13 +02:00
parent 4ea2460a49
commit afa27941e0
2 changed files with 127 additions and 0 deletions

45
JenkinsfileTrain Normal file
View File

@ -0,0 +1,45 @@
pipeline {
agent any
parameters {
buildSelector(
name: 'BUILD_SELECTOR',
defaultSelector: lastSuccessful(),
description: 'A build to take the artifacts from'
)
string(
name: 'EPOCHS',
description: 'Number of epochs',
defaultValue: '10'
)
}
stages {
stage('Copy artifacts') {
steps {
script {
copyArtifacts(
projectName: 'x1-create-dataset',
selector: buildParameter('BUILD_SELECTOR'),
target: './'
)
}
}
}
stage('Run training and save model') {
steps {
script {
sh 'ls -l'
docker.image('docker-image').inside {
sh 'ls -l'
sh 'python3 ./model_train.py'
archiveArtifacts 'model.pt'
}
}
}
}
}
// post {
// success {
// build job: 'x1-evaluation.eg/main', wait: false
// }
// }
}

82
train.py Normal file
View File

@ -0,0 +1,82 @@
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
from sklearn.preprocessing import LabelBinarizer
import numpy as np
import argparse
class MyNeuralNetwork(nn.Module):
def __init__(self, *args, **kwargs) -> None:
super(MyNeuralNetwork, self).__init__(*args, **kwargs)
self.fc1 = nn.Linear(12, 64)
self.relu = nn.ReLU()
self.fc1 = nn.Linear(12, 64)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(64, 1)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.sigmoid(x)
return x
def prepare_df_for_nn(df):
id_column_name_list = [column for column in df.columns.to_list() if 'id' in column.lower()]
if len(id_column_name_list) == 0:
pass
else:
df.drop(id_column_name_list[0], inplace=True, axis=1)
encoder = LabelBinarizer()
df.reset_index(inplace=True)
for column in df.columns:
if str(df[column].dtype).lower() == 'object':
encoded_column = encoder.fit_transform(df[column])
df[column] = pd.Series(encoded_column.flatten(), dtype=pd.Int16Dtype)
return df
def load_data(path):
df = pd.read_csv(path)
train_dataset = prepare_df_for_nn(df)
x = train_dataset.iloc[:, :-1].values.astype(float)
y = train_dataset.iloc[:, -1].values.astype(float)
x_tensor = torch.tensor(x, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32)
dataset = TensorDataset(x_tensor, y_tensor)
return dataset
def train(epochs, dataloader_train):
model: MyNeuralNetwork = MyNeuralNetwork()
criterion: nn.BCELoss = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
for epoch in range(epochs):
for inputs, labels in dataloader_train:
outputs = model(inputs)
labels = labels.reshape((labels.shape[0], 1))
loss = criterion(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")
return model
def main():
parser = argparse.ArgumentParser(description='A test program.')
parser.add_argument("--epochs", help="Prints the supplied argument.", default='10')
args = parser.parse_args()
config = vars(args)
epochs = int(config["epochs"])
train_dataset = load_data("gender_classification_train.csv")
batch_size = 32
dataloader_train = DataLoader(train_dataset, batch_size = batch_size, shuffle=True)
model = train(epochs, dataloader_train)
torch.save(model.state_dict(), 'model.pt')
if __name__ == "__main__":
main()