From 75a3a6e6c7876ebcf0a4daf6c6190e22e54125c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Raczy=C5=84ski?= Date: Tue, 21 May 2024 19:47:48 +0200 Subject: [PATCH] IUM_08 --- mlflow/MLProject | 12 ++++++++++++ mlflow/conda.yaml | 13 +++++++++++++ mlflow/mlflow_model.py | 39 +++++++++++++++++++++++++++++++++++++++ mlflow/mlflow_predict.py | 23 +++++++++++++++++++++++ 4 files changed, 87 insertions(+) create mode 100644 mlflow/MLProject create mode 100644 mlflow/conda.yaml create mode 100644 mlflow/mlflow_model.py create mode 100644 mlflow/mlflow_predict.py diff --git a/mlflow/MLProject b/mlflow/MLProject new file mode 100644 index 0000000..34a123e --- /dev/null +++ b/mlflow/MLProject @@ -0,0 +1,12 @@ +name: Car Price Prediction + +conda_env: conda.yaml + +entry_points: + main: + parameters: + epochs: {type: int, default: 20} + batch_size: {type: int, default: 32} + command: "python mlflow_model.py {epochs} {batch_size}" + predict: + command: "python mlflow_predict.py" diff --git a/mlflow/conda.yaml b/mlflow/conda.yaml new file mode 100644 index 0000000..90d4ded --- /dev/null +++ b/mlflow/conda.yaml @@ -0,0 +1,13 @@ +name: car_price_env +channels: + - default +dependencies: + - python=3.8 + - pip: + - pip + - pandas + - numpy + - scikit-learn + - tensorflow + - mlflow + - h5py diff --git a/mlflow/mlflow_model.py b/mlflow/mlflow_model.py new file mode 100644 index 0000000..eb434fa --- /dev/null +++ b/mlflow/mlflow_model.py @@ -0,0 +1,39 @@ +import mlflow +import mlflow.keras +import pandas as pd +import numpy as np +from tensorflow.keras import Sequential +from tensorflow.keras.layers import Dense +from sklearn.preprocessing import MinMaxScaler +import sys + +# Parameters from the command line +epochs = int(sys.argv[1]) +batch_size = int(sys.argv[2]) + +mlflow.start_run() + +train_data = pd.read_csv('./data/car_prices_train.csv') +train_data.dropna(inplace=True) +y_train = train_data['sellingprice'].astype(np.float32) +X_train = train_data[['year', 'condition', 'transmission']] + +scaler_x = MinMaxScaler() +X_train['condition'] = scaler_x.fit_transform(X_train[['condition']]) + +scaler_y = MinMaxScaler() +y_train = scaler_y.fit_transform(y_train.values.reshape(-1, 1)) +X_train = pd.get_dummies(X_train, columns=['transmission']) + +model = Sequential([Dense(64, activation='relu'), Dense(32, activation='relu'), Dense(1)]) +model.compile(optimizer='adam', loss='mean_squared_error') + +# Training the model with MLflow tracking +model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size) + +mlflow.keras.log_model(model, "model") + +mlflow.log_param("epochs", epochs) +mlflow.log_param("batch_size", batch_size) + +mlflow.end_run() diff --git a/mlflow/mlflow_predict.py b/mlflow/mlflow_predict.py new file mode 100644 index 0000000..29f9c33 --- /dev/null +++ b/mlflow/mlflow_predict.py @@ -0,0 +1,23 @@ +import mlflow.keras +import pandas as pd +import numpy as np +from sklearn.preprocessing import MinMaxScaler + +model = mlflow.keras.load_model("model") + +test_data = pd.read_csv('./data/car_prices_test.csv') +test_data.dropna(inplace=True) +y_test = test_data['sellingprice'].astype(np.float32) +X_test = test_data[['year', 'condition', 'transmission']] + +scaler_y = MinMaxScaler() +scaler_y.fit(y_test.values.reshape(-1, 1)) + +scaler_X = MinMaxScaler() +X_test['condition'] = scaler_X.fit_transform(X_test[['condition']]) +X_test = pd.get_dummies(X_test, columns=['transmission']) + +y_pred_scaled = model.predict(X_test) +y_pred = scaler_y.inverse_transform(y_pred_scaled) +y_pred_df = pd.DataFrame(y_pred, columns=['PredictedSellingPrice']) +y_pred_df.to_csv('predicted_selling_prices.csv', index=False)