add training script and get predictions

This commit is contained in:
Sheaza 2024-04-03 09:39:37 +02:00
parent 008f9ef6cf
commit 2c31d1eb26
4 changed files with 82 additions and 1 deletions

View File

@ -11,5 +11,6 @@ WORKDIR ./app
COPY ./get_dataset.py ./
COPY ./get_stats.py ./
COPY ./train.py ./
COPY predict.py ./
CMD bash

16
Jenkinsfile vendored
View File

@ -49,5 +49,21 @@ pipeline {
archiveArtifacts artifacts: 'dataset.csv,df_train.csv,df_test.csv', onlyIfSuccessful: true
}
}
stage("Prediction") {
agent {
dockerfile {
filename 'Dockerfile'
reuseNode true
}
}
steps {
sh "chmod +x ./train.py"
sh "chmod +x ./predict.py"
sh "python ./train.py"
sh "python ./predict.py"
archiveArtifacts artifacts: 'model.keras,predictions.txt', onlyIfSuccessful: true
}
}
}
}

15
predict.py Normal file
View File

@ -0,0 +1,15 @@
import pandas as pd
from tensorflow import keras
import numpy as np
np.set_printoptions(threshold=np.inf)
data = pd.read_csv("df_test.csv")
X_test = data.drop("Performance Index", axis=1)
y_test = data["Performance Index"]
model = keras.models.load_model("model.keras")
predictions = model.predict(X_test)
with open("predictions.txt", "w") as f:
f.write(str(predictions))

49
train.py Normal file
View File

@ -0,0 +1,49 @@
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
class RegressionModel:
def __init__(self, optimizer="adam", loss="mean_squared_error"):
self.model = keras.Sequential([
layers.Input(shape=(5,)), # Input layer
layers.Dense(32, activation='relu'), # Hidden layer with 32 neurons and ReLU activation
layers.Dense(1) # Output layer with a single neuron (for regression)
])
self.optimizer = optimizer
self.loss = loss
self.X_train = None
self.X_test = None
self.y_train = None
self.y_test = None
def load_data(self, train_path, test_path):
data_train = pd.read_csv(train_path)
data_test = pd.read_csv(test_path)
self.X_train = data_train.drop("Performance Index", axis=1)
self.y_train = data_train["Performance Index"]
self.X_test = data_test.drop("Performance Index", axis=1)
self.y_test = data_test["Performance Index"]
def train(self, epochs=30):
self.model.compile(optimizer=self.optimizer, loss=self.loss)
self.model.fit(self.X_train, self.y_train, epochs=epochs, batch_size=32, validation_data=(self.X_test, self.y_test))
def predict(self, data):
prediction = self.model.predict(data)
return prediction
def evaluate(self):
test_loss = self.model.evaluate(self.X_test, self.y_test)
print(f"Test Loss: {test_loss:.4f}")
def save_model(self):
self.model.save("model.keras")
model = RegressionModel()
model.load_data("df_train.csv", "df_test.csv")
model.train()
model.evaluate()
model.save_model()