add pipeline dvc

This commit is contained in:
PawelDopierala 2024-05-29 21:22:02 +02:00
parent 8c64afdb05
commit b586cbdbc4
4 changed files with 68 additions and 1 deletions

11
.gitignore vendored
View File

@ -1 +1,10 @@
/housing_price_dataset.csv /data/housing_price_dataset.csv
/hp_model.h5
/hp_test_predictions.csv
/hp_test_metrics.csv
/plot_rmse.png
/plot_mae.png
/plot_r2.png
/hp_train.csv
/hp_dev.csv
/hp_test.csv

14
data_processing_dvc.py Normal file
View File

@ -0,0 +1,14 @@
from sklearn.model_selection import train_test_split
import pandas as pd
import sys
housing_price_dataset = pd.read_csv(sys.argv[1])
housing_price_dataset = pd.get_dummies(housing_price_dataset, columns=['Neighborhood'])
hp_train_test, hp_dev = train_test_split(housing_price_dataset, test_size=0.1)
hp_train, hp_test = train_test_split(hp_train_test, test_size=1000)
hp_train.to_csv('hp_train.csv', index=False)
hp_dev.to_csv('hp_dev.csv', index=False)
hp_test.to_csv('hp_test.csv', index=False)

38
dvc.yaml Normal file
View File

@ -0,0 +1,38 @@
stages:
data_processing:
cmd: python data_processing_dvc.py data/housing_price_dataset.csv
deps:
- data_processing_dvc.py
- data/housing_price_dataset.csv
outs:
- hp_train.csv
- hp_dev.csv
- hp_test.csv
create_model:
cmd: python create_model.py ${create_model.epochs} ${create_model.learning_rate} ${create_model.batch_size}
deps:
- create_model.py
- helper.py
- hp_train.csv
- hp_dev.csv
outs:
- hp_model.h5
params:
- create_model.epochs
- create_model.learning_rate
- create_model.batch_size
evaluate:
cmd: python evaluate.py ${evaluate.build_number}
deps:
- evaluate.py
- helper.py
- hp_model.h5
- hp_test.csv
outs:
- hp_test_predictions.csv
- hp_test_metrics.csv
- plot_rmse.png
- plot_mae.png
- plot_r2.png
params:
- evaluate.build_number

6
params.yaml Normal file
View File

@ -0,0 +1,6 @@
create_model:
epochs: 20
learning_rate: 0.001
batch_size: 32
evaluate:
build_number: 0