fix github workflow
This commit is contained in:
parent
d051818515
commit
855fc593d8
4
.github/workflows/main.yml
vendored
4
.github/workflows/main.yml
vendored
@ -35,7 +35,7 @@ jobs:
|
||||
pip install pandas scikit-learn tensorflow matplotlib mlflow
|
||||
|
||||
- name: Train Model
|
||||
run: python create_model.py ${{ github.event.inputs.epochs }} ${{ github.event.inputs.learning_rate }} ${{ github.event.inputs.batch_size }}
|
||||
run: python ./github_project/create_model.py ${{ github.event.inputs.epochs }} ${{ github.event.inputs.learning_rate }} ${{ github.event.inputs.batch_size }}
|
||||
|
||||
- name: Evaluate Model
|
||||
run: python evaluate.py ${{ github.run_number }}
|
||||
run: python ./github_project/evaluate.py ${{ github.run_number }}
|
38
github_project/create_model.py
Normal file
38
github_project/create_model.py
Normal file
@ -0,0 +1,38 @@
|
||||
import pandas as pd
|
||||
import sys
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense
|
||||
from keras.optimizers import Adam
|
||||
from keras import regularizers
|
||||
import mlflow
|
||||
|
||||
from helper import prepare_tensors
|
||||
|
||||
epochs = int(sys.argv[1])
|
||||
learning_rate = float(sys.argv[2])
|
||||
batch_size = int(sys.argv[3])
|
||||
|
||||
hp_train = pd.read_csv('hp_train.csv')
|
||||
hp_dev = pd.read_csv('hp_dev.csv')
|
||||
|
||||
X_train, Y_train = prepare_tensors(hp_train)
|
||||
X_dev, Y_dev = prepare_tensors(hp_dev)
|
||||
|
||||
model = Sequential()
|
||||
model.add(Dense(64, input_dim=7, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
|
||||
model.add(Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
|
||||
model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
|
||||
model.add(Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
|
||||
model.add(Dense(1, activation='linear'))
|
||||
|
||||
adam = Adam(learning_rate=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-7)
|
||||
model.compile(optimizer=adam, loss='mean_squared_error')
|
||||
|
||||
model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_dev, Y_dev))
|
||||
|
||||
model.save('hp_model.h5')
|
||||
|
||||
with mlflow.start_run() as run:
|
||||
mlflow.log_param("epochs", epochs)
|
||||
mlflow.log_param("learning_rate", learning_rate)
|
||||
mlflow.log_param("batch_size", batch_size)
|
61
github_project/evaluate.py
Normal file
61
github_project/evaluate.py
Normal file
@ -0,0 +1,61 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import sys
|
||||
import os
|
||||
|
||||
import mlflow
|
||||
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
|
||||
from keras.models import load_model
|
||||
from helper import prepare_tensors
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
build_number = int(sys.argv[1])
|
||||
else:
|
||||
build_number = 0
|
||||
|
||||
hp_test = pd.read_csv('hp_test.csv')
|
||||
X_test, Y_test = prepare_tensors(hp_test)
|
||||
|
||||
model = load_model('hp_model.h5')
|
||||
|
||||
test_predictions = model.predict(X_test)
|
||||
|
||||
predictions_df = pd.DataFrame(test_predictions, columns=["Predicted_Price"])
|
||||
predictions_df.to_csv('hp_test_predictions.csv', index=False)
|
||||
|
||||
rmse = np.sqrt(mean_squared_error(Y_test, test_predictions))
|
||||
mae = mean_absolute_error(Y_test, test_predictions)
|
||||
r2 = r2_score(Y_test, test_predictions)
|
||||
|
||||
metrics_df = pd.DataFrame({
|
||||
'Build_Number': [build_number],
|
||||
'RMSE': [rmse],
|
||||
'MAE': [mae],
|
||||
'R2': [r2]
|
||||
})
|
||||
|
||||
metrics_file = 'hp_test_metrics.csv'
|
||||
if os.path.isfile(metrics_file):
|
||||
existing_metrics_df = pd.read_csv(metrics_file)
|
||||
updated_metrics_df = pd.concat([existing_metrics_df, metrics_df], ignore_index=True)
|
||||
else:
|
||||
updated_metrics_df = metrics_df
|
||||
|
||||
updated_metrics_df.to_csv(metrics_file, index=False)
|
||||
|
||||
metrics = ['RMSE', 'MAE', 'R2']
|
||||
for metric in metrics:
|
||||
plt.plot(updated_metrics_df['Build_Number'], updated_metrics_df[metric], marker='o')
|
||||
plt.title(f'{metric} vs Builds')
|
||||
plt.xlabel('Build Number')
|
||||
plt.ylabel(metric)
|
||||
plt.grid(True)
|
||||
plot_file = f'plot_{metric.lower()}.png'
|
||||
plt.savefig(plot_file)
|
||||
plt.close()
|
||||
|
||||
with mlflow.start_run() as run:
|
||||
mlflow.log_metric('RMSE', rmse)
|
||||
mlflow.log_metric('MAE', mae)
|
||||
mlflow.log_metric('R2', r2)
|
5001
github_project/hp_dev.csv
Normal file
5001
github_project/hp_dev.csv
Normal file
File diff suppressed because it is too large
Load Diff
1001
github_project/hp_test.csv
Normal file
1001
github_project/hp_test.csv
Normal file
File diff suppressed because it is too large
Load Diff
44001
github_project/hp_train.csv
Normal file
44001
github_project/hp_train.csv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user