diff --git a/MLFLOW/MLproject b/MLFLOW/MLproject new file mode 100644 index 0000000..296bba0 --- /dev/null +++ b/MLFLOW/MLproject @@ -0,0 +1,10 @@ +name: s464980 + +docker_env: + image: s464980-mlflow + +entry_points: + main: + parameters: + epochs: float + command: "python train.py --epochs {epochs}" \ No newline at end of file diff --git a/MLFLOW/train.py b/MLFLOW/train.py new file mode 100644 index 0000000..9a2a74f --- /dev/null +++ b/MLFLOW/train.py @@ -0,0 +1,58 @@ +import pandas as pd +from tensorflow import keras +from tensorflow.keras import layers +import argparse +import mlflow + + +class RegressionModel: + def __init__(self, optimizer="adam", loss="mean_squared_error"): + self.model = keras.Sequential([ + layers.Input(shape=(5,)), # Input layer + layers.Dense(32, activation='relu'), # Hidden layer with 32 neurons and ReLU activation + layers.Dense(1) # Output layer with a single neuron (for regression) + ]) + self.optimizer = optimizer + self.loss = loss + self.X_train = None + self.X_test = None + self.y_train = None + self.y_test = None + + def load_data(self, train_path, test_path): + data_train = pd.read_csv(train_path) + data_test = pd.read_csv(test_path) + self.X_train = data_train.drop("Performance Index", axis=1) + self.y_train = data_train["Performance Index"] + self.X_test = data_test.drop("Performance Index", axis=1) + self.y_test = data_test["Performance Index"] + + def train(self, epochs=30): + self.model.compile(optimizer=self.optimizer, loss=self.loss) + self.model.fit(self.X_train, self.y_train, epochs=epochs, batch_size=32, validation_data=(self.X_test, self.y_test)) + + def predict(self, data): + prediction = self.model.predict(data) + return prediction + + def evaluate(self): + test_loss = self.model.evaluate(self.X_test, self.y_test) + print(f"Test Loss: {test_loss:.4f}") + return test_loss + + def save_model(self): + self.model.save("model.keras") + + +parser = argparse.ArgumentParser() +parser.add_argument('--epochs') + +args = parser.parse_args() +mlflow.set_tracking_uri("http://localhost:5000") +model = RegressionModel() +with mlflow.start_run() as run: + model.train(epochs=int(args.epochs)) + rmse = model.evaluate() + mlflow.log_param("epoch", int(args.epochs)) + mlflow.log_metric("rmse", rmse) +model.save_model() diff --git a/MLproject b/MLproject deleted file mode 100644 index 64ccc44..0000000 --- a/MLproject +++ /dev/null @@ -1,9 +0,0 @@ -name: s464980 - -conda_env: conda.yaml - -entry_points: - optimal_parameters: - parameters: - epochs: { type: int, default: 20 } - command: 'python train.py {epochs}' \ No newline at end of file diff --git a/conda.yaml b/conda.yaml deleted file mode 100644 index f4dfd5b..0000000 --- a/conda.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: s464980 -channels: - - defaults -dependencies: - - python=3.11 - - pip - - pip: - - mlflow - - tensorflow - - pandas - - scikit-learn - - numpy - - matplotlib \ No newline at end of file diff --git a/train.py b/train.py index 9a2a74f..72450b5 100644 --- a/train.py +++ b/train.py @@ -2,8 +2,6 @@ import pandas as pd from tensorflow import keras from tensorflow.keras import layers import argparse -import mlflow - class RegressionModel: def __init__(self, optimizer="adam", loss="mean_squared_error"): @@ -48,11 +46,7 @@ parser = argparse.ArgumentParser() parser.add_argument('--epochs') args = parser.parse_args() -mlflow.set_tracking_uri("http://localhost:5000") model = RegressionModel() -with mlflow.start_run() as run: - model.train(epochs=int(args.epochs)) - rmse = model.evaluate() - mlflow.log_param("epoch", int(args.epochs)) - mlflow.log_metric("rmse", rmse) +model.load_data("df_train.csv", "df_test.csv") +model.train(epochs=int(args.epochs)) model.save_model()