Compare commits

...

13 Commits

Author SHA1 Message Date
PawelDopierala
1245979730 fix github workflow 2024-06-06 03:16:53 +02:00
PawelDopierala
554ed4e9cd fix github workflow 2024-06-06 03:04:30 +02:00
PawelDopierala
ae55583129 fix github workflow 2024-06-06 02:56:14 +02:00
PawelDopierala
08323a3396 fix github workflow 2024-06-06 02:47:38 +02:00
PawelDopierala
bfc8f16904 fix github workflow 2024-06-06 02:37:26 +02:00
PawelDopierala
6b77f00b66 fix github workflow 2024-06-06 02:29:50 +02:00
PawelDopierala
0dc2c077bc fix github workflow 2024-06-06 02:23:23 +02:00
PawelDopierala
dc5131136e fix github workflow 2024-06-06 02:18:13 +02:00
PawelDopierala
58c0867cd8 fix github workflow 2024-06-06 02:12:40 +02:00
PawelDopierala
81aa2cbbec fix github workflow 2024-06-06 02:08:25 +02:00
PawelDopierala
578976bbe4 fix github workflow 2024-06-06 02:01:37 +02:00
PawelDopierala
855fc593d8 fix github workflow 2024-06-06 01:59:07 +02:00
PawelDopierala
d051818515 Add github workflow 2024-06-06 01:47:20 +02:00
7 changed files with 50195 additions and 0 deletions

84
.github/workflows/main.yml vendored Normal file
View File

@ -0,0 +1,84 @@
name: Housing Price Workflow
on:
workflow_dispatch:
inputs:
epochs:
description: 'Number of epochs'
required: true
default: 20
learning_rate:
description: 'Learning rate'
required: true
default: 0.001
batch_size:
description: 'Batch size'
required: true
default: 32
jobs:
train:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.8'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pandas scikit-learn tensorflow matplotlib mlflow
- name: Train Model
run: python ./github_project/create_model.py ${{ github.event.inputs.epochs }} ${{ github.event.inputs.learning_rate }} ${{ github.event.inputs.batch_size }}
- name: Upload Artefacts
uses: actions/upload-artifact@v2
with:
name: model
path: |
./github_project
evaluate:
needs: train
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.8'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pandas scikit-learn tensorflow matplotlib mlflow
- name: Download Artifact
uses: actions/download-artifact@v2
with:
name: model
path: ./github_project
- name: Evaluate Model
run: python ./github_project/evaluate.py ${{ github.run_number }}
- name: Upload Artefacts
uses: actions/upload-artifact@v2
with:
name: evaluation-results
path: |
./github_project/hp_test_predictions.csv
./github_project/hp_test_metrics.csv
plot_mae.png
plot_r2.png
plot_rmse.png

View File

@ -0,0 +1,38 @@
import pandas as pd
import sys
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras import regularizers
import mlflow
from helper import prepare_tensors
epochs = int(sys.argv[1])
learning_rate = float(sys.argv[2])
batch_size = int(sys.argv[3])
hp_train = pd.read_csv('./github_project/hp_train.csv')
hp_dev = pd.read_csv('./github_project/hp_dev.csv')
X_train, Y_train = prepare_tensors(hp_train)
X_dev, Y_dev = prepare_tensors(hp_dev)
model = Sequential()
model.add(Dense(64, input_dim=7, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(1, activation='linear'))
adam = Adam(learning_rate=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-7)
model.compile(optimizer=adam, loss='mean_squared_error')
model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_dev, Y_dev))
model.save('./github_project/hp_model.h5')
with mlflow.start_run() as run:
mlflow.log_param("epochs", epochs)
mlflow.log_param("learning_rate", learning_rate)
mlflow.log_param("batch_size", batch_size)

View File

@ -0,0 +1,61 @@
import pandas as pd
import numpy as np
import sys
import os
import mlflow
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from keras.models import load_model
from helper import prepare_tensors
import matplotlib.pyplot as plt
if len(sys.argv) > 1:
build_number = int(sys.argv[1])
else:
build_number = 0
hp_test = pd.read_csv('./github_project/hp_test.csv')
X_test, Y_test = prepare_tensors(hp_test)
model = load_model('/home/runner/work/ium/ium/github_project/hp_model.h5')
test_predictions = model.predict(X_test)
predictions_df = pd.DataFrame(test_predictions, columns=["Predicted_Price"])
predictions_df.to_csv('./github_project/hp_test_predictions.csv', index=False)
rmse = np.sqrt(mean_squared_error(Y_test, test_predictions))
mae = mean_absolute_error(Y_test, test_predictions)
r2 = r2_score(Y_test, test_predictions)
metrics_df = pd.DataFrame({
'Build_Number': [build_number],
'RMSE': [rmse],
'MAE': [mae],
'R2': [r2]
})
metrics_file = './github_project/hp_test_metrics.csv'
if os.path.isfile(metrics_file):
existing_metrics_df = pd.read_csv(metrics_file)
updated_metrics_df = pd.concat([existing_metrics_df, metrics_df], ignore_index=True)
else:
updated_metrics_df = metrics_df
updated_metrics_df.to_csv(metrics_file, index=False)
metrics = ['RMSE', 'MAE', 'R2']
for metric in metrics:
plt.plot(updated_metrics_df['Build_Number'], updated_metrics_df[metric], marker='o')
plt.title(f'{metric} vs Builds')
plt.xlabel('Build Number')
plt.ylabel(metric)
plt.grid(True)
plot_file = f'plot_{metric.lower()}.png'
plt.savefig(plot_file)
plt.close()
with mlflow.start_run() as run:
mlflow.log_metric('RMSE', rmse)
mlflow.log_metric('MAE', mae)
mlflow.log_metric('R2', r2)

9
github_project/helper.py Normal file
View File

@ -0,0 +1,9 @@
import tensorflow as tf
def prepare_tensors(df):
Y = df["Price"]
X = df.drop("Price", axis=1)
X_tensor = tf.convert_to_tensor(X, dtype=tf.float32)
Y_tensor = tf.convert_to_tensor(Y, dtype=tf.float32)
return X_tensor, Y_tensor

5001
github_project/hp_dev.csv Normal file

File diff suppressed because it is too large Load Diff

1001
github_project/hp_test.csv Normal file

File diff suppressed because it is too large Load Diff

44001
github_project/hp_train.csv Normal file

File diff suppressed because it is too large Load Diff