Added dvc.yaml with reproductible stages

This commit is contained in:
Kamil Guttmann 2022-06-05 15:52:59 +02:00
parent 87f12f6afc
commit 02a596a86b
3 changed files with 54 additions and 0 deletions

3
.gitignore vendored
View File

@ -1 +1,4 @@
/crime_conv.csv /crime_conv.csv
/crime_test.csv
/crime_dev.csv
/crime_train.csv

34
dvc.lock Normal file
View File

@ -0,0 +1,34 @@
schema: '2.0'
stages:
prepare_data:
cmd: python3 clean_and_split_data.py
deps:
- path: crime_conv.csv
md5: 6074436a3903679501a4d230053f7acf
size: 57978034
outs:
- path: crime_dev.csv
md5: bf45f519bab1a91415db273e5eb99dc6
size: 4553128
- path: crime_test.csv
md5: c7ed98f4ee8b0ccc176e91030cc5949f
size: 4552734
- path: crime_train.csv
md5: 6676b883d6553adecf567c502f6c766d
size: 36177141
train:
cmd: python3 train_model.py
deps:
- path: crime_dev.csv
md5: bf45f519bab1a91415db273e5eb99dc6
size: 4553128
- path: crime_test.csv
md5: c7ed98f4ee8b0ccc176e91030cc5949f
size: 4552734
- path: crime_train.csv
md5: 6676b883d6553adecf567c502f6c766d
size: 36177141
outs:
- path: model/saved_model.pb
md5: e4e20efbab0ef424d1d18e54b65a25ad
size: 106610

17
dvc.yaml Normal file
View File

@ -0,0 +1,17 @@
stages:
prepare_data:
cmd: python3 clean_and_split_data.py
deps:
- crime_conv.csv
outs:
- crime_dev.csv
- crime_test.csv
- crime_train.csv
train:
cmd: python3 train_model.py
deps:
- crime_dev.csv
- crime_test.csv
- crime_train.csv
outs:
- model/saved_model.pb