From 2c31d1eb264044fb3a404f5bd0b54cd12e97a22d Mon Sep 17 00:00:00 2001 From: Sheaza Date: Wed, 3 Apr 2024 09:39:37 +0200 Subject: [PATCH] add training script and get predictions --- Dockerfile | 3 ++- Jenkinsfile | 16 ++++++++++++++++ predict.py | 15 +++++++++++++++ train.py | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 predict.py create mode 100644 train.py diff --git a/Dockerfile b/Dockerfile index 5221cf9..18b24f0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,5 +11,6 @@ WORKDIR ./app COPY ./get_dataset.py ./ COPY ./get_stats.py ./ - +COPY ./train.py ./ +COPY predict.py ./ CMD bash \ No newline at end of file diff --git a/Jenkinsfile b/Jenkinsfile index 58031b7..fff2b4a 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -49,5 +49,21 @@ pipeline { archiveArtifacts artifacts: 'dataset.csv,df_train.csv,df_test.csv', onlyIfSuccessful: true } } + stage("Prediction") { + agent { + dockerfile { + filename 'Dockerfile' + reuseNode true + } + } + + steps { + sh "chmod +x ./train.py" + sh "chmod +x ./predict.py" + sh "python ./train.py" + sh "python ./predict.py" + archiveArtifacts artifacts: 'model.keras,predictions.txt', onlyIfSuccessful: true + } + } } } diff --git a/predict.py b/predict.py new file mode 100644 index 0000000..7ac8c47 --- /dev/null +++ b/predict.py @@ -0,0 +1,15 @@ +import pandas as pd +from tensorflow import keras +import numpy as np +np.set_printoptions(threshold=np.inf) + +data = pd.read_csv("df_test.csv") +X_test = data.drop("Performance Index", axis=1) +y_test = data["Performance Index"] + +model = keras.models.load_model("model.keras") + +predictions = model.predict(X_test) + +with open("predictions.txt", "w") as f: + f.write(str(predictions)) diff --git a/train.py b/train.py new file mode 100644 index 0000000..f23ee93 --- /dev/null +++ b/train.py @@ -0,0 +1,49 @@ +import pandas as pd +from tensorflow import keras +from tensorflow.keras import layers + + +class RegressionModel: + def __init__(self, optimizer="adam", loss="mean_squared_error"): + self.model = keras.Sequential([ + layers.Input(shape=(5,)), # Input layer + layers.Dense(32, activation='relu'), # Hidden layer with 32 neurons and ReLU activation + layers.Dense(1) # Output layer with a single neuron (for regression) + ]) + self.optimizer = optimizer + self.loss = loss + self.X_train = None + self.X_test = None + self.y_train = None + self.y_test = None + + def load_data(self, train_path, test_path): + data_train = pd.read_csv(train_path) + data_test = pd.read_csv(test_path) + self.X_train = data_train.drop("Performance Index", axis=1) + self.y_train = data_train["Performance Index"] + self.X_test = data_test.drop("Performance Index", axis=1) + self.y_test = data_test["Performance Index"] + + def train(self, epochs=30): + + self.model.compile(optimizer=self.optimizer, loss=self.loss) + self.model.fit(self.X_train, self.y_train, epochs=epochs, batch_size=32, validation_data=(self.X_test, self.y_test)) + + def predict(self, data): + prediction = self.model.predict(data) + return prediction + + def evaluate(self): + test_loss = self.model.evaluate(self.X_test, self.y_test) + print(f"Test Loss: {test_loss:.4f}") + + def save_model(self): + self.model.save("model.keras") + + +model = RegressionModel() +model.load_data("df_train.csv", "df_test.csv") +model.train() +model.evaluate() +model.save_model()