diff --git a/.gitignore b/.gitignore index 851e9c3..afd9ba7 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,10 @@ -/housing_price_dataset.csv +/data/housing_price_dataset.csv +/hp_model.h5 +/hp_test_predictions.csv +/hp_test_metrics.csv +/plot_rmse.png +/plot_mae.png +/plot_r2.png +/hp_train.csv +/hp_dev.csv +/hp_test.csv diff --git a/data_processing_dvc.py b/data_processing_dvc.py new file mode 100644 index 0000000..1d6810b --- /dev/null +++ b/data_processing_dvc.py @@ -0,0 +1,14 @@ +from sklearn.model_selection import train_test_split +import pandas as pd +import sys + +housing_price_dataset = pd.read_csv(sys.argv[1]) + +housing_price_dataset = pd.get_dummies(housing_price_dataset, columns=['Neighborhood']) + +hp_train_test, hp_dev = train_test_split(housing_price_dataset, test_size=0.1) +hp_train, hp_test = train_test_split(hp_train_test, test_size=1000) + +hp_train.to_csv('hp_train.csv', index=False) +hp_dev.to_csv('hp_dev.csv', index=False) +hp_test.to_csv('hp_test.csv', index=False) diff --git a/dvc.yaml b/dvc.yaml new file mode 100644 index 0000000..a520cad --- /dev/null +++ b/dvc.yaml @@ -0,0 +1,38 @@ +stages: + data_processing: + cmd: python data_processing_dvc.py data/housing_price_dataset.csv + deps: + - data_processing_dvc.py + - data/housing_price_dataset.csv + outs: + - hp_train.csv + - hp_dev.csv + - hp_test.csv + create_model: + cmd: python create_model.py ${create_model.epochs} ${create_model.learning_rate} ${create_model.batch_size} + deps: + - create_model.py + - helper.py + - hp_train.csv + - hp_dev.csv + outs: + - hp_model.h5 + params: + - create_model.epochs + - create_model.learning_rate + - create_model.batch_size + evaluate: + cmd: python evaluate.py ${evaluate.build_number} + deps: + - evaluate.py + - helper.py + - hp_model.h5 + - hp_test.csv + outs: + - hp_test_predictions.csv + - hp_test_metrics.csv + - plot_rmse.png + - plot_mae.png + - plot_r2.png + params: + - evaluate.build_number diff --git a/params.yaml b/params.yaml new file mode 100644 index 0000000..189557d --- /dev/null +++ b/params.yaml @@ -0,0 +1,6 @@ +create_model: + epochs: 20 + learning_rate: 0.001 + batch_size: 32 +evaluate: + build_number: 0 \ No newline at end of file