fix github workflow

2024-06-06 03:16:53 +02:00 · 2024-06-06 03:04:30 +02:00 · 2024-06-06 02:56:14 +02:00 · 2024-06-06 02:47:38 +02:00 · 2024-06-06 02:37:26 +02:00 · 2024-06-06 02:29:50 +02:00
7 changed files with 50195 additions and 0 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -0,0 +1,84 @@
+name: Housing Price Workflow
+
+on:
+  workflow_dispatch:
+    inputs:
+      epochs:
+        description: 'Number of epochs'
+        required: true
+        default: 20
+      learning_rate:
+        description: 'Learning rate'
+        required: true
+        default: 0.001
+      batch_size:
+        description: 'Batch size'
+        required: true
+        default: 32
+
+jobs:
+  train:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v2
+
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.8'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install pandas scikit-learn tensorflow matplotlib mlflow
+
+    - name: Train Model
+      run: python ./github_project/create_model.py ${{ github.event.inputs.epochs }} ${{ github.event.inputs.learning_rate }} ${{ github.event.inputs.batch_size }}
+
+    - name: Upload Artefacts
+      uses: actions/upload-artifact@v2
+      with:
+        name: model
+        path: |
+          ./github_project
+
+  evaluate:
+    needs: train
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v2
+
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.8'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install pandas scikit-learn tensorflow matplotlib mlflow
+
+    - name: Download Artifact
+      uses: actions/download-artifact@v2
+      with:
+        name: model
+        path: ./github_project
+
+    - name: Evaluate Model
+      run: python ./github_project/evaluate.py ${{ github.run_number }}
+
+    - name: Upload Artefacts
+      uses: actions/upload-artifact@v2
+      with:
+        name: evaluation-results
+        path: |
+          ./github_project/hp_test_predictions.csv
+          ./github_project/hp_test_metrics.csv
+          plot_mae.png
+          plot_r2.png
+          plot_rmse.png
+
--- a/github_project/create_model.py
+++ b/github_project/create_model.py
@ -0,0 +1,38 @@
+import pandas as pd
+import sys
+from keras.models import Sequential
+from keras.layers import Dense
+from keras.optimizers import Adam
+from keras import regularizers
+import mlflow
+
+from helper import prepare_tensors
+
+epochs = int(sys.argv[1])
+learning_rate = float(sys.argv[2])
+batch_size = int(sys.argv[3])
+
+hp_train = pd.read_csv('./github_project/hp_train.csv')
+hp_dev = pd.read_csv('./github_project/hp_dev.csv')
+
+X_train, Y_train = prepare_tensors(hp_train)
+X_dev, Y_dev = prepare_tensors(hp_dev)
+
+model = Sequential()
+model.add(Dense(64, input_dim=7, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
+model.add(Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
+model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
+model.add(Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
+model.add(Dense(1, activation='linear'))
+
+adam = Adam(learning_rate=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-7)
+model.compile(optimizer=adam, loss='mean_squared_error')
+
+model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_dev, Y_dev))
+
+model.save('./github_project/hp_model.h5')
+
+with mlflow.start_run() as run:
+    mlflow.log_param("epochs", epochs)
+    mlflow.log_param("learning_rate", learning_rate)
+    mlflow.log_param("batch_size", batch_size)
--- a/github_project/evaluate.py
+++ b/github_project/evaluate.py
@ -0,0 +1,61 @@
+import pandas as pd
+import numpy as np
+import sys
+import os
+
+import mlflow
+from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
+from keras.models import load_model
+from helper import prepare_tensors
+import matplotlib.pyplot as plt
+
+if len(sys.argv) > 1:
+    build_number = int(sys.argv[1])
+else:
+    build_number = 0
+
+hp_test = pd.read_csv('./github_project/hp_test.csv')
+X_test, Y_test = prepare_tensors(hp_test)
+
+model = load_model('/home/runner/work/ium/ium/github_project/hp_model.h5')
+
+test_predictions = model.predict(X_test)
+
+predictions_df = pd.DataFrame(test_predictions, columns=["Predicted_Price"])
+predictions_df.to_csv('./github_project/hp_test_predictions.csv', index=False)
+
+rmse = np.sqrt(mean_squared_error(Y_test, test_predictions))
+mae = mean_absolute_error(Y_test, test_predictions)
+r2 = r2_score(Y_test, test_predictions)
+
+metrics_df = pd.DataFrame({
+    'Build_Number': [build_number],
+    'RMSE': [rmse],
+    'MAE': [mae],
+    'R2': [r2]
+})
+
+metrics_file = './github_project/hp_test_metrics.csv'
+if os.path.isfile(metrics_file):
+    existing_metrics_df = pd.read_csv(metrics_file)
+    updated_metrics_df = pd.concat([existing_metrics_df, metrics_df], ignore_index=True)
+else:
+    updated_metrics_df = metrics_df
+
+updated_metrics_df.to_csv(metrics_file, index=False)
+
+metrics = ['RMSE', 'MAE', 'R2']
+for metric in metrics:
+    plt.plot(updated_metrics_df['Build_Number'], updated_metrics_df[metric], marker='o')
+    plt.title(f'{metric} vs Builds')
+    plt.xlabel('Build Number')
+    plt.ylabel(metric)
+    plt.grid(True)
+    plot_file = f'plot_{metric.lower()}.png'
+    plt.savefig(plot_file)
+    plt.close()
+
+with mlflow.start_run() as run:
+    mlflow.log_metric('RMSE', rmse)
+    mlflow.log_metric('MAE', mae)
+    mlflow.log_metric('R2', r2)
--- a/github_project/helper.py
+++ b/github_project/helper.py
@ -0,0 +1,9 @@
+import tensorflow as tf
+
+
+def prepare_tensors(df):
+    Y = df["Price"]
+    X = df.drop("Price", axis=1)
+    X_tensor = tf.convert_to_tensor(X, dtype=tf.float32)
+    Y_tensor = tf.convert_to_tensor(Y, dtype=tf.float32)
+    return X_tensor, Y_tensor
--- a/github_project/hp_dev.csv
+++ b/github_project/hp_dev.csv
--- a/github_project/hp_test.csv
+++ b/github_project/hp_test.csv
--- a/github_project/hp_train.csv
+++ b/github_project/hp_train.csv
Author	SHA1	Message	Date
PawelDopierala	1245979730	fix github workflow	2024-06-06 03:16:53 +02:00
PawelDopierala	554ed4e9cd	fix github workflow	2024-06-06 03:04:30 +02:00
PawelDopierala	ae55583129	fix github workflow	2024-06-06 02:56:14 +02:00
PawelDopierala	08323a3396	fix github workflow	2024-06-06 02:47:38 +02:00
PawelDopierala	bfc8f16904	fix github workflow	2024-06-06 02:37:26 +02:00
PawelDopierala	6b77f00b66	fix github workflow	2024-06-06 02:29:50 +02:00
PawelDopierala	0dc2c077bc	fix github workflow	2024-06-06 02:23:23 +02:00
PawelDopierala	dc5131136e	fix github workflow	2024-06-06 02:18:13 +02:00
PawelDopierala	58c0867cd8	fix github workflow	2024-06-06 02:12:40 +02:00
PawelDopierala	81aa2cbbec	fix github workflow	2024-06-06 02:08:25 +02:00
PawelDopierala	578976bbe4	fix github workflow	2024-06-06 02:01:37 +02:00
PawelDopierala	855fc593d8	fix github workflow	2024-06-06 01:59:07 +02:00
PawelDopierala	d051818515	Add github workflow	2024-06-06 01:47:20 +02:00