Compare commits

..

No commits in common. "12459797302394fd8d6cefa6438cff8fe81d6804" and "b586cbdbc494045c56555f7ac7c51bb9fb6b4bd3" have entirely different histories.

7 changed files with 0 additions and 50195 deletions

View File

@ -1,84 +0,0 @@
name: Housing Price Workflow
on:
workflow_dispatch:
inputs:
epochs:
description: 'Number of epochs'
required: true
default: 20
learning_rate:
description: 'Learning rate'
required: true
default: 0.001
batch_size:
description: 'Batch size'
required: true
default: 32
jobs:
train:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.8'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pandas scikit-learn tensorflow matplotlib mlflow
- name: Train Model
run: python ./github_project/create_model.py ${{ github.event.inputs.epochs }} ${{ github.event.inputs.learning_rate }} ${{ github.event.inputs.batch_size }}
- name: Upload Artefacts
uses: actions/upload-artifact@v2
with:
name: model
path: |
./github_project
evaluate:
needs: train
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.8'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pandas scikit-learn tensorflow matplotlib mlflow
- name: Download Artifact
uses: actions/download-artifact@v2
with:
name: model
path: ./github_project
- name: Evaluate Model
run: python ./github_project/evaluate.py ${{ github.run_number }}
- name: Upload Artefacts
uses: actions/upload-artifact@v2
with:
name: evaluation-results
path: |
./github_project/hp_test_predictions.csv
./github_project/hp_test_metrics.csv
plot_mae.png
plot_r2.png
plot_rmse.png

View File

@ -1,38 +0,0 @@
import pandas as pd
import sys
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras import regularizers
import mlflow
from helper import prepare_tensors
epochs = int(sys.argv[1])
learning_rate = float(sys.argv[2])
batch_size = int(sys.argv[3])
hp_train = pd.read_csv('./github_project/hp_train.csv')
hp_dev = pd.read_csv('./github_project/hp_dev.csv')
X_train, Y_train = prepare_tensors(hp_train)
X_dev, Y_dev = prepare_tensors(hp_dev)
model = Sequential()
model.add(Dense(64, input_dim=7, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(1, activation='linear'))
adam = Adam(learning_rate=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-7)
model.compile(optimizer=adam, loss='mean_squared_error')
model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_dev, Y_dev))
model.save('./github_project/hp_model.h5')
with mlflow.start_run() as run:
mlflow.log_param("epochs", epochs)
mlflow.log_param("learning_rate", learning_rate)
mlflow.log_param("batch_size", batch_size)

View File

@ -1,61 +0,0 @@
import pandas as pd
import numpy as np
import sys
import os
import mlflow
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from keras.models import load_model
from helper import prepare_tensors
import matplotlib.pyplot as plt
if len(sys.argv) > 1:
build_number = int(sys.argv[1])
else:
build_number = 0
hp_test = pd.read_csv('./github_project/hp_test.csv')
X_test, Y_test = prepare_tensors(hp_test)
model = load_model('/home/runner/work/ium/ium/github_project/hp_model.h5')
test_predictions = model.predict(X_test)
predictions_df = pd.DataFrame(test_predictions, columns=["Predicted_Price"])
predictions_df.to_csv('./github_project/hp_test_predictions.csv', index=False)
rmse = np.sqrt(mean_squared_error(Y_test, test_predictions))
mae = mean_absolute_error(Y_test, test_predictions)
r2 = r2_score(Y_test, test_predictions)
metrics_df = pd.DataFrame({
'Build_Number': [build_number],
'RMSE': [rmse],
'MAE': [mae],
'R2': [r2]
})
metrics_file = './github_project/hp_test_metrics.csv'
if os.path.isfile(metrics_file):
existing_metrics_df = pd.read_csv(metrics_file)
updated_metrics_df = pd.concat([existing_metrics_df, metrics_df], ignore_index=True)
else:
updated_metrics_df = metrics_df
updated_metrics_df.to_csv(metrics_file, index=False)
metrics = ['RMSE', 'MAE', 'R2']
for metric in metrics:
plt.plot(updated_metrics_df['Build_Number'], updated_metrics_df[metric], marker='o')
plt.title(f'{metric} vs Builds')
plt.xlabel('Build Number')
plt.ylabel(metric)
plt.grid(True)
plot_file = f'plot_{metric.lower()}.png'
plt.savefig(plot_file)
plt.close()
with mlflow.start_run() as run:
mlflow.log_metric('RMSE', rmse)
mlflow.log_metric('MAE', mae)
mlflow.log_metric('R2', r2)

View File

@ -1,9 +0,0 @@
import tensorflow as tf
def prepare_tensors(df):
Y = df["Price"]
X = df.drop("Price", axis=1)
X_tensor = tf.convert_to_tensor(X, dtype=tf.float32)
Y_tensor = tf.convert_to_tensor(Y, dtype=tf.float32)
return X_tensor, Y_tensor

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff