diff --git a/dvc.lock b/dvc.lock new file mode 100644 index 0000000..50bb0a4 --- /dev/null +++ b/dvc.lock @@ -0,0 +1,94 @@ +schema: '2.0' +stages: + prepare_data: + cmd: python ./create-dataset.py + deps: + - path: create-dataset.py + hash: md5 + md5: 0903460139f5b57b9759f4de37b2d5e4 + size: 1531 + - path: creditcard.csv + hash: md5 + md5: e90efcb83d69faf99fcab8b0255024de + size: 150828752 + outs: + - path: data/X_test.csv + hash: md5 + md5: 46ff52696af9a4c06f6b25639525dda6 + size: 30947960 + - path: data/X_train.csv + hash: md5 + md5: 7505524c54858300bbd92094092a6c39 + size: 92838653 + - path: data/X_val.csv + hash: md5 + md5: 4d078882cc1898640ddaf4ad9117f543 + size: 30946540 + - path: data/creditcard.csv + hash: md5 + md5: 4b81435690147d1e624a8b06c5520629 + size: 155302541 + - path: data/y_test.csv + hash: md5 + md5: a6bc4827feae19934c4021d1f10f5963 + size: 170893 + - path: data/y_train.csv + hash: md5 + md5: 8112a5cf4faac882c421bcb7e3d42044 + size: 512656 + - path: data/y_val.csv + hash: md5 + md5: 1155f648650986d8866eba603b86560c + size: 170893 + train_model: + cmd: python ./train_model.py + deps: + - path: data/X_train.csv + hash: md5 + md5: 7505524c54858300bbd92094092a6c39 + size: 92838653 + - path: data/X_val.csv + hash: md5 + md5: 4d078882cc1898640ddaf4ad9117f543 + size: 30946540 + - path: data/y_train.csv + hash: md5 + md5: 8112a5cf4faac882c421bcb7e3d42044 + size: 512656 + - path: data/y_val.csv + hash: md5 + md5: 1155f648650986d8866eba603b86560c + size: 170893 + - path: train_model.py + hash: md5 + md5: 00b8bac043f4d7a56dec95f2f1bb1b49 + size: 1540 + outs: + - path: model/model.keras + hash: md5 + md5: 1d1df55ad26a8c0689efa4a86a86c217 + size: 1476738 + evaluate_model: + cmd: python ./predict.py + deps: + - path: data/X_test.csv + hash: md5 + md5: 46ff52696af9a4c06f6b25639525dda6 + size: 30947960 + - path: data/y_test.csv + hash: md5 + md5: a6bc4827feae19934c4021d1f10f5963 + size: 170893 + - path: model/model.keras + hash: md5 + md5: 1d1df55ad26a8c0689efa4a86a86c217 + size: 1476738 + - path: predict.py + hash: md5 + md5: a61388aabf381779b38e2f32a4d0df7b + size: 660 + outs: + - path: data/y_pred.csv + hash: md5 + md5: be150c2fbf1914102b479edbe0a4cf43 + size: 1481012 diff --git a/dvc.yaml b/dvc.yaml new file mode 100644 index 0000000..0d21d6c --- /dev/null +++ b/dvc.yaml @@ -0,0 +1,35 @@ +stages: + prepare_data: + cmd: python ./create-dataset.py + deps: + - create-dataset.py + - creditcard.csv + outs: + - data/creditcard.csv + - data/X_train.csv + - data/X_val.csv + - data/X_test.csv + - data/y_train.csv + - data/y_val.csv + - data/y_test.csv + + train_model: + cmd: python ./train_model.py + deps: + - train_model.py + - data/X_train.csv + - data/X_val.csv + - data/y_train.csv + - data/y_val.csv + outs: + - model/model.keras + + evaluate_model: + cmd: python ./predict.py + deps: + - predict.py + - model/model.keras + - data/X_test.csv + - data/y_test.csv + outs: + - data/y_pred.csv