From 2b853809be411313839af723f16dacdc36c7b5f4 Mon Sep 17 00:00:00 2001 From: Sheaza Date: Wed, 15 May 2024 00:30:21 +0200 Subject: [PATCH] fix mlflow --- MLFLOW/Dockerfile | 13 +++++++++++++ MLFLOW/requirements.txt | 3 +++ MLFLOW/train.py | 8 +++----- get_stats.py | 6 +++--- predict.py | 2 +- 5 files changed, 23 insertions(+), 9 deletions(-) create mode 100644 MLFLOW/Dockerfile create mode 100644 MLFLOW/requirements.txt diff --git a/MLFLOW/Dockerfile b/MLFLOW/Dockerfile new file mode 100644 index 0000000..fb2d6d7 --- /dev/null +++ b/MLFLOW/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.11 + +RUN apt-get update && apt-get -y upgrade +RUN apt-get install -y build-essential + +RUN python -m pip install --upgrade pip +COPY ../requirements.txt /tmp +RUN python -m pip install -r /tmp/requirements.txt + +WORKDIR ./app +COPY train.py ./ + +CMD bash \ No newline at end of file diff --git a/MLFLOW/requirements.txt b/MLFLOW/requirements.txt new file mode 100644 index 0000000..2cb54be --- /dev/null +++ b/MLFLOW/requirements.txt @@ -0,0 +1,3 @@ +pandas +tensorflow +mlflow \ No newline at end of file diff --git a/MLFLOW/train.py b/MLFLOW/train.py index 9a2a74f..fb7b6e5 100644 --- a/MLFLOW/train.py +++ b/MLFLOW/train.py @@ -3,8 +3,6 @@ from tensorflow import keras from tensorflow.keras import layers import argparse import mlflow - - class RegressionModel: def __init__(self, optimizer="adam", loss="mean_squared_error"): self.model = keras.Sequential([ @@ -48,11 +46,11 @@ parser = argparse.ArgumentParser() parser.add_argument('--epochs') args = parser.parse_args() -mlflow.set_tracking_uri("http://localhost:5000") -model = RegressionModel() with mlflow.start_run() as run: + model = RegressionModel() + model.load_data("df_train.csv", "df_test.csv") model.train(epochs=int(args.epochs)) + mlflow.log_param("epochs", int(args.epochs)) rmse = model.evaluate() - mlflow.log_param("epoch", int(args.epochs)) mlflow.log_metric("rmse", rmse) model.save_model() diff --git a/get_stats.py b/get_stats.py index 43fba39..2ca2b9d 100644 --- a/get_stats.py +++ b/get_stats.py @@ -5,9 +5,9 @@ pd.set_option('display.max_columns', None) pd.set_option('display.max_rows', None) -df = pd.read_csv('./dataset.csv') -df_train = pd.read_csv('./df_train.csv') -df_test = pd.read_csv('./df_test.csv') +df = pd.read_csv('MLFLOW/dataset.csv') +df_train = pd.read_csv('MLFLOW/df_train.csv') +df_test = pd.read_csv('MLFLOW/df_test.csv') with open('stats.txt', 'w') as f: diff --git a/predict.py b/predict.py index ff2c2c5..e48d7c0 100644 --- a/predict.py +++ b/predict.py @@ -6,7 +6,7 @@ import matplotlib.pyplot as plt np.set_printoptions(threshold=np.inf) -data = pd.read_csv("df_test.csv") +data = pd.read_csv("MLFLOW/df_test.csv") X_test = data.drop("Performance Index", axis=1) y_test = data["Performance Index"]