From 02a596a86b04ec9ba7c104aab4b3f6251cb0ad87 Mon Sep 17 00:00:00 2001 From: Kamil Guttmann Date: Sun, 5 Jun 2022 15:52:59 +0200 Subject: [PATCH] Added dvc.yaml with reproductible stages --- .gitignore | 3 +++ dvc.lock | 34 ++++++++++++++++++++++++++++++++++ dvc.yaml | 17 +++++++++++++++++ 3 files changed, 54 insertions(+) create mode 100644 dvc.lock create mode 100644 dvc.yaml diff --git a/.gitignore b/.gitignore index 14b6e76..5ae0a7a 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ /crime_conv.csv +/crime_test.csv +/crime_dev.csv +/crime_train.csv diff --git a/dvc.lock b/dvc.lock new file mode 100644 index 0000000..6f7aa21 --- /dev/null +++ b/dvc.lock @@ -0,0 +1,34 @@ +schema: '2.0' +stages: + prepare_data: + cmd: python3 clean_and_split_data.py + deps: + - path: crime_conv.csv + md5: 6074436a3903679501a4d230053f7acf + size: 57978034 + outs: + - path: crime_dev.csv + md5: bf45f519bab1a91415db273e5eb99dc6 + size: 4553128 + - path: crime_test.csv + md5: c7ed98f4ee8b0ccc176e91030cc5949f + size: 4552734 + - path: crime_train.csv + md5: 6676b883d6553adecf567c502f6c766d + size: 36177141 + train: + cmd: python3 train_model.py + deps: + - path: crime_dev.csv + md5: bf45f519bab1a91415db273e5eb99dc6 + size: 4553128 + - path: crime_test.csv + md5: c7ed98f4ee8b0ccc176e91030cc5949f + size: 4552734 + - path: crime_train.csv + md5: 6676b883d6553adecf567c502f6c766d + size: 36177141 + outs: + - path: model/saved_model.pb + md5: e4e20efbab0ef424d1d18e54b65a25ad + size: 106610 diff --git a/dvc.yaml b/dvc.yaml new file mode 100644 index 0000000..be2050b --- /dev/null +++ b/dvc.yaml @@ -0,0 +1,17 @@ +stages: + prepare_data: + cmd: python3 clean_and_split_data.py + deps: + - crime_conv.csv + outs: + - crime_dev.csv + - crime_test.csv + - crime_train.csv + train: + cmd: python3 train_model.py + deps: + - crime_dev.csv + - crime_test.csv + - crime_train.csv + outs: + - model/saved_model.pb