diff --git a/.dvc/.gitignore b/.dvc/.gitignore new file mode 100644 index 0000000..528f30c --- /dev/null +++ b/.dvc/.gitignore @@ -0,0 +1,3 @@ +/config.local +/tmp +/cache diff --git a/.dvc/config b/.dvc/config index e69de29..3a7ae37 100644 --- a/.dvc/config +++ b/.dvc/config @@ -0,0 +1,4 @@ +[core] + remote = ium_ssh_remote +['remote "ium_ssh_remote"'] + url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl \ No newline at end of file diff --git a/dvc.yml b/dvc.yml new file mode 100644 index 0000000..8c9eb46 --- /dev/null +++ b/dvc.yml @@ -0,0 +1,16 @@ +stages: + download: + cmd: scripts/load_data.sh + + prepare: + cmd: python3 scripts/grab_avocado.py + + train: + cmd: python3 scripts/model.py + deps: + - scripts/data/avocado.data.train + outs: + - scripts/data/predictions.csv + params: + - step + - epochs \ No newline at end of file diff --git a/jenkins/dvc.Jenkinsfile b/jenkins/dvc.Jenkinsfile new file mode 100644 index 0000000..5c4c7ee --- /dev/null +++ b/jenkins/dvc.Jenkinsfile @@ -0,0 +1,41 @@ +pipeline { + parameters { + string( + defaultValue: 'mateuszogrodowczyk', + description: 'Kaggle username', + name: 'KAGGLE_USERNAME', + trim: false + ) + password( + defaultValue: '', + description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials', + name: 'KAGGLE_KEY' + ) + } + + agent { + dockerfile { + additionalBuildArgs "--build-arg KAGGLE_USERNAME=${params.KAGGLE_USERNAME} --build-arg KAGGLE_KEY=${params.KAGGLE_KEY} -t s478841-create-dataset" + } + } + + stages { + stage("Run DVC") { + steps{ + withCredentials([ + sshUserPrivateKey( + credentialsId: '48ac7004-216e-4260-abba-1fe5db753e18', + keyFileVariable: 'IUM_SFTP_KEY', + passphraseVariable: '', + usernameVariable: 'USER' + ) + ]) { + sh 'dvc remote modify --local ium_ssh_remote keyfile $IUM_SFTP_KEY' + sh 'dvc remote modify --local ium_ssh_remote password IUM@2021' + sh 'dvc pull' + sh 'dvc repro' + } + } + } + } +} \ No newline at end of file diff --git a/model_params.yml b/model_params.yml new file mode 100644 index 0000000..a08a404 --- /dev/null +++ b/model_params.yml @@ -0,0 +1,2 @@ +step: 10 +epochs: 15 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index d49070c..3fc7cc2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,7 @@ torch matplotlib sacred pymongo -mlflow \ No newline at end of file +mlflow +dvc +dvc-ssh +paramiko \ No newline at end of file diff --git a/scripts/data/.gitignore b/scripts/data/.gitignore new file mode 100644 index 0000000..76d549a --- /dev/null +++ b/scripts/data/.gitignore @@ -0,0 +1 @@ +/plots.png diff --git a/scripts/data/plots.png.dvc b/scripts/data/plots.png.dvc new file mode 100644 index 0000000..e17c626 --- /dev/null +++ b/scripts/data/plots.png.dvc @@ -0,0 +1,4 @@ +outs: +- md5: 860c8fe454e7e4683620393359c90e58 + size: 25040 + path: plots.png diff --git a/scripts/data/predictions.csv.dvc b/scripts/data/predictions.csv.dvc new file mode 100644 index 0000000..c590189 --- /dev/null +++ b/scripts/data/predictions.csv.dvc @@ -0,0 +1,4 @@ +outs: +- md5: 4482cb25938b104e670ad748014354dc + size: 56401 + path: predictions.csv