diff --git a/.dvc/.gitignore b/.dvc/.gitignore new file mode 100644 index 0000000..528f30c --- /dev/null +++ b/.dvc/.gitignore @@ -0,0 +1,3 @@ +/config.local +/tmp +/cache diff --git a/.dvc/config b/.dvc/config new file mode 100644 index 0000000..ce375a8 --- /dev/null +++ b/.dvc/config @@ -0,0 +1,4 @@ +[core] + remote = ium_ssh_remote +['remote "ium_ssh_remote"'] + url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl diff --git a/.dvcignore b/.dvcignore new file mode 100644 index 0000000..5197305 --- /dev/null +++ b/.dvcignore @@ -0,0 +1,3 @@ +# Add patterns of files dvc should ignore, which could improve +# the performance. Learn more at +# https://dvc.org/doc/user-guide/dvcignore diff --git a/.gitignore b/.gitignore index 7cf8443..351b829 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,5 @@ ipython_config.py __pycache__/ +/train +/test diff --git a/dvc.yaml b/dvc.yaml new file mode 100644 index 0000000..114869e --- /dev/null +++ b/dvc.yaml @@ -0,0 +1,23 @@ +stages: + prepare_data: + cmd: ./download.sh + deps: + - download.sh + outs: + - data/raw + + train: + cmd: python train.py + deps: + - train.py + - data/raw + outs: + - model.pth + + test: + cmd: python test.py + deps: + - test.py + - model.pth + outs: + - predictions.csv diff --git a/test.dvc b/test.dvc new file mode 100644 index 0000000..5cf7b6f --- /dev/null +++ b/test.dvc @@ -0,0 +1,6 @@ +outs: +- md5: 31361b67b19276a528020b964be5a880.dir + size: 11777108 + nfiles: 2000 + hash: md5 + path: test diff --git a/train.dvc b/train.dvc new file mode 100644 index 0000000..1a2a87b --- /dev/null +++ b/train.dvc @@ -0,0 +1,6 @@ +outs: +- md5: 27bf2e3326fa79daa24727ffd1a9889f.dir + size: 71490454 + nfiles: 11879 + hash: md5 + path: train