Compare commits
28 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
6ad202ae8e | ||
|
17379f6457 | ||
|
83a7308d1c | ||
|
97bdf3be9b | ||
|
84e4b8bccc | ||
|
aa48cb857b | ||
|
622ff2f7c7 | ||
|
a4f8146f48 | ||
|
b5bcb52293 | ||
|
38c5d047c0 | ||
|
3516f6a158 | ||
|
d7e13f676e | ||
|
12859f19f1 | ||
|
ba78a548b6 | ||
|
6e984faea3 | ||
|
bd8e6a897a | ||
|
3be030ad58 | ||
|
4cad8f26ee | ||
|
d591b36357 | ||
|
2a4f1b1496 | ||
|
16e4c0b97d | ||
|
66a6fd501d | ||
|
153c75df2a | ||
|
4c4b6fc7c4 | ||
|
1c07500ab4 | ||
|
b829da974b | ||
|
993b2ce5bc | ||
|
d2a813c9c6 |
3
.dvc/.gitignore
vendored
3
.dvc/.gitignore
vendored
@ -1,3 +0,0 @@
|
|||||||
/config.local
|
|
||||||
/tmp
|
|
||||||
/cache
|
|
@ -1,4 +0,0 @@
|
|||||||
[core]
|
|
||||||
remote = ium_ssh_remote
|
|
||||||
['remote "ium_ssh_remote"']
|
|
||||||
url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
|
|
@ -1,3 +0,0 @@
|
|||||||
# Add patterns of files dvc should ignore, which could improve
|
|
||||||
# the performance. Learn more at
|
|
||||||
# https://dvc.org/doc/user-guide/dvcignore
|
|
5
.gitignore
vendored
5
.gitignore
vendored
@ -1,5 +0,0 @@
|
|||||||
.vscode/settings.json
|
|
||||||
mlruns
|
|
||||||
*.csv
|
|
||||||
avocado*
|
|
||||||
*cpython*
|
|
@ -23,8 +23,4 @@ RUN chmod +x /load_data.sh
|
|||||||
RUN /load_data.sh
|
RUN /load_data.sh
|
||||||
|
|
||||||
RUN chmod +x /grab_avocado.py
|
RUN chmod +x /grab_avocado.py
|
||||||
RUN python3 /grab_avocado.py
|
RUN python3 /grab_avocado.py
|
||||||
|
|
||||||
# Run the model and train it
|
|
||||||
# RUN chmod +x /model.py
|
|
||||||
# RUN python3 /model.py
|
|
12
MLProject
Normal file
12
MLProject
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
name: s478841 regression model
|
||||||
|
|
||||||
|
docker_env:
|
||||||
|
image: s478841-image:latest
|
||||||
|
|
||||||
|
entry_points:
|
||||||
|
main:
|
||||||
|
parameters:
|
||||||
|
epochs: { type: string, default: "140" }
|
||||||
|
steps: { type: string, default: "10" }
|
||||||
|
save_model: { type: string, default: "--save" }
|
||||||
|
command: "python3 scripts/mlflow_train.py -e {epochs} -s {steps} {save_model}"
|
16
dvc.yml
16
dvc.yml
@ -1,16 +0,0 @@
|
|||||||
stages:
|
|
||||||
download:
|
|
||||||
cmd: scripts/load_data.sh
|
|
||||||
|
|
||||||
prepare:
|
|
||||||
cmd: python3 scripts/grab_avocado.py
|
|
||||||
|
|
||||||
train:
|
|
||||||
cmd: python3 scripts/model.py
|
|
||||||
deps:
|
|
||||||
- scripts/data/avocado.data.train
|
|
||||||
outs:
|
|
||||||
- scripts/data/predictions.csv
|
|
||||||
params:
|
|
||||||
- step
|
|
||||||
- epochs
|
|
@ -15,7 +15,4 @@ node {
|
|||||||
stage('Archive arifacts') {
|
stage('Archive arifacts') {
|
||||||
archiveArtifacts artifacts: '*data/avocado.data*', onlyIfSuccessful: true
|
archiveArtifacts artifacts: '*data/avocado.data*', onlyIfSuccessful: true
|
||||||
}
|
}
|
||||||
stage('Delegate the model training') {
|
|
||||||
build job: 's478841-training/develop'
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -1,41 +0,0 @@
|
|||||||
pipeline {
|
|
||||||
parameters {
|
|
||||||
string(
|
|
||||||
defaultValue: 'mateuszogrodowczyk',
|
|
||||||
description: 'Kaggle username',
|
|
||||||
name: 'KAGGLE_USERNAME',
|
|
||||||
trim: false
|
|
||||||
)
|
|
||||||
password(
|
|
||||||
defaultValue: '',
|
|
||||||
description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
|
|
||||||
name: 'KAGGLE_KEY'
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
agent {
|
|
||||||
dockerfile {
|
|
||||||
additionalBuildArgs "--build-arg KAGGLE_USERNAME=${params.KAGGLE_USERNAME} --build-arg KAGGLE_KEY=${params.KAGGLE_KEY} -t s478841-create-dataset"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
stages {
|
|
||||||
stage("Run DVC") {
|
|
||||||
steps{
|
|
||||||
withCredentials([
|
|
||||||
sshUserPrivateKey(
|
|
||||||
credentialsId: '48ac7004-216e-4260-abba-1fe5db753e18',
|
|
||||||
keyFileVariable: 'IUM_SFTP_KEY',
|
|
||||||
passphraseVariable: '',
|
|
||||||
usernameVariable: 'USER'
|
|
||||||
)
|
|
||||||
]) {
|
|
||||||
sh 'dvc remote modify --local ium_ssh_remote keyfile $IUM_SFTP_KEY'
|
|
||||||
sh 'dvc remote modify --local ium_ssh_remote password IUM@2021'
|
|
||||||
sh 'dvc pull'
|
|
||||||
sh 'dvc repro'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
51
jenkins/evaluate.Jenkinsfile
Normal file
51
jenkins/evaluate.Jenkinsfile
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
pipeline {
|
||||||
|
agent {
|
||||||
|
docker { image 's478841-image:latest' }
|
||||||
|
}
|
||||||
|
parameters {
|
||||||
|
gitParameter branchFilter: 'origin/(.*)', defaultValue: 'develop', name: 'BRANCH_NAME', type:'PT_BRANCH'
|
||||||
|
buildSelector(
|
||||||
|
defaultSelector: upstream(),
|
||||||
|
description: 'Build used for artifacts copying',
|
||||||
|
name:'BUILD_SELECTOR')
|
||||||
|
}
|
||||||
|
|
||||||
|
stages {
|
||||||
|
stage('Copy artifacts') {
|
||||||
|
steps {
|
||||||
|
git branch: "${params.BRANCH_NAME}", url: 'https://git.wmi.amu.edu.pl/s478841/ium_478841.git'
|
||||||
|
|
||||||
|
copyArtifacts filter: 'data/*test*', fingerprintArtifacts: true, projectName: 's478841-create-dataset', selector: buildParameter('BUILD_SELECTOR')
|
||||||
|
copyArtifacts filter: 'data/*model*', fingerprintArtifacts: true, projectName: "s478841-training/${BRANCH_NAME}/", selector: buildParameter('BUILD_SELECTOR')
|
||||||
|
copyArtifacts filter: 'evaluation_results.csv', projectName: "s478841-evaluation/${BRANCH_NAME}/", optional: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage('Evaluate model') {
|
||||||
|
steps {
|
||||||
|
// sh 'chmod +x -R ${env.WORKSPACE}'
|
||||||
|
sh 'python3 scripts/evaluate.py'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage('Archive artifacts') {
|
||||||
|
steps {
|
||||||
|
archiveArtifacts artifacts: '*data/evaluation_results.csv', onlyIfSuccessful: true
|
||||||
|
archiveArtifacts artifacts: '*data/plots.png', onlyIfSuccessful: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
success {
|
||||||
|
emailext body: 'SUCCESS', subject: 's478841-evaluation', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
||||||
|
}
|
||||||
|
failure {
|
||||||
|
emailext body: 'FAILURE', subject: 's478841-evaluation', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
||||||
|
}
|
||||||
|
unstable {
|
||||||
|
emailext body: 'UNSTABLE', subject: 's478841-evaluation', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
||||||
|
}
|
||||||
|
changed {
|
||||||
|
emailext body: 'CHANGED', subject: 's478841-evaluation', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,34 +0,0 @@
|
|||||||
pipeline {
|
|
||||||
agent {
|
|
||||||
docker {
|
|
||||||
image 's478841-image:latest'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
parameters {
|
|
||||||
string(
|
|
||||||
defaultValue: '{\\"inputs\\": [[0.76, 0.71], [0.6, 0.73], [0.75, 0.75], [0.91, 0.85]]}',
|
|
||||||
description: 'Input data',
|
|
||||||
name: 'INPUT',
|
|
||||||
trim: true
|
|
||||||
)
|
|
||||||
buildSelector(
|
|
||||||
defaultSelector: lastSuccessful(),
|
|
||||||
description: 'Build used for artifacts copying',
|
|
||||||
name: 'BUILD_SELECTOR'
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
stages {
|
|
||||||
stage('Load artifacts') {
|
|
||||||
steps {
|
|
||||||
copyArtifacts projectName: 's444356-training/master', selector: buildParameter('BUILD_SELECTOR')
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stage('Predict using artifact') {
|
|
||||||
steps {
|
|
||||||
sh "echo ${params.INPUT} > scripts/input_example.json"
|
|
||||||
sh 'python3 scripts/predict_s444356.py'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,16 +0,0 @@
|
|||||||
pipeline {
|
|
||||||
agent {
|
|
||||||
docker {
|
|
||||||
image 's478841-image:latest'
|
|
||||||
args '-v /mlruns:/mlruns'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
stages {
|
|
||||||
stage('Predict using artifacts') {
|
|
||||||
steps {
|
|
||||||
sh 'python3 scripts/predict_s444356_registry.py'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
75
jenkins/training.Jenkinsfile
Normal file
75
jenkins/training.Jenkinsfile
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
pipeline {
|
||||||
|
agent {
|
||||||
|
docker {
|
||||||
|
image 's478841-image:latest'
|
||||||
|
args '-v /mlruns:/mlruns'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
parameters {
|
||||||
|
string(
|
||||||
|
defaultValue: '140',
|
||||||
|
description: 'epochs number',
|
||||||
|
name: 'epochs'
|
||||||
|
)
|
||||||
|
string(
|
||||||
|
defaultValue: '10',
|
||||||
|
description: 'Number of training steps between loss values logging',
|
||||||
|
name: 'step'
|
||||||
|
)
|
||||||
|
string (
|
||||||
|
defaultValue: '--save',
|
||||||
|
description: 'save model after training',
|
||||||
|
name: 'save_model'
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
stages {
|
||||||
|
stage('Checkout') {
|
||||||
|
steps {
|
||||||
|
checkout([$class: 'GitSCM', branches: [[name: '*/develop']], extensions: [], userRemoteConfigs: [
|
||||||
|
[url: 'https://git.wmi.amu.edu.pl/s478841/ium_478841.git']]])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage('Copy Artifacts') {
|
||||||
|
steps {
|
||||||
|
copyArtifacts filter: 'data/avocado.data*', fingerprintArtifacts: true, projectName: 's478841-create-dataset', selector: lastSuccessful()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage('Model training') {
|
||||||
|
steps {
|
||||||
|
sh "chmod +x -R ${env.WORKSPACE}"
|
||||||
|
sh 'python3 scripts/sacred_train.py -e $epochs -s $step $save_model'
|
||||||
|
sh 'python3 scripts/mlflow_train.py -e $epochs -s $step $save_model'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage('Archive artifacts') {
|
||||||
|
steps {
|
||||||
|
archiveArtifacts artifacts: 'mlruns/**', onlyIfSuccessful: true
|
||||||
|
archiveArtifacts artifacts: '*data/predictions.csv', onlyIfSuccessful: true
|
||||||
|
archiveArtifacts artifacts: '*data/model_scripted*', onlyIfSuccessful: true
|
||||||
|
dir('data/training_runs') {
|
||||||
|
archiveArtifacts artifacts: '**/**', onlyIfSuccessful: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
post {
|
||||||
|
success {
|
||||||
|
emailext body: 'SUCCESS', subject: "${env.JOB_NAME}", to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
||||||
|
build job: 's478841-evaluation/develop'
|
||||||
|
}
|
||||||
|
|
||||||
|
failure {
|
||||||
|
emailext body: 'FAILURE', subject: "${env.JOB_NAME}", to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
||||||
|
}
|
||||||
|
|
||||||
|
unstable {
|
||||||
|
emailext body: 'UNSTABLE', subject: "${env.JOB_NAME}", to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
||||||
|
}
|
||||||
|
|
||||||
|
changed {
|
||||||
|
emailext body: 'CHANGED', subject: "${env.JOB_NAME}", to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,2 +0,0 @@
|
|||||||
step: 10
|
|
||||||
epochs: 15
|
|
@ -2,11 +2,4 @@ kaggle
|
|||||||
pandas
|
pandas
|
||||||
numpy
|
numpy
|
||||||
sklearn
|
sklearn
|
||||||
torch
|
torch
|
||||||
matplotlib
|
|
||||||
sacred
|
|
||||||
pymongo
|
|
||||||
mlflow
|
|
||||||
dvc
|
|
||||||
dvc-ssh
|
|
||||||
paramiko
|
|
1
scripts/data/.gitignore
vendored
1
scripts/data/.gitignore
vendored
@ -1 +0,0 @@
|
|||||||
/plots.png
|
|
@ -1,4 +0,0 @@
|
|||||||
outs:
|
|
||||||
- md5: a9b426d018a0e02b44bbb85c62e3e012
|
|
||||||
size: 1989197
|
|
||||||
path: avocado.csv
|
|
@ -1,4 +0,0 @@
|
|||||||
outs:
|
|
||||||
- md5: 860c8fe454e7e4683620393359c90e58
|
|
||||||
size: 25040
|
|
||||||
path: plots.png
|
|
@ -1,4 +0,0 @@
|
|||||||
outs:
|
|
||||||
- md5: 4482cb25938b104e670ad748014354dc
|
|
||||||
size: 56401
|
|
||||||
path: predictions.csv
|
|
@ -1,107 +0,0 @@
|
|||||||
name: base
|
|
||||||
channels:
|
|
||||||
- conda-forge
|
|
||||||
- defaults
|
|
||||||
dependencies:
|
|
||||||
- _libgcc_mutex=0.1=main
|
|
||||||
- _openmp_mutex=4.5=1_gnu
|
|
||||||
- alembic=1.7.7=pyhd8ed1ab_0
|
|
||||||
- appdirs=1.4.4=pyh9f0ad1d_0
|
|
||||||
- asn1crypto=1.5.1=pyhd8ed1ab_0
|
|
||||||
- blas=1.0=openblas
|
|
||||||
- bottleneck=1.3.4=py38hce1f21e_0
|
|
||||||
- brotlipy=0.7.0=py38h27cfd23_1003
|
|
||||||
- ca-certificates=2022.5.18.1=ha878542_0
|
|
||||||
- certifi=2022.5.18.1=py38h578d9bd_0
|
|
||||||
- cffi=1.15.0=py38hd667e15_1
|
|
||||||
- charset-normalizer=2.0.4=pyhd3eb1b0_0
|
|
||||||
- click=8.1.3=py38h578d9bd_0
|
|
||||||
- cloudpickle=2.1.0=pyhd8ed1ab_0
|
|
||||||
- colorama=0.4.4=pyhd3eb1b0_0
|
|
||||||
- conda=4.12.0=py38h578d9bd_0
|
|
||||||
- conda-content-trust=0.1.1=pyhd3eb1b0_0
|
|
||||||
- conda-package-handling=1.8.1=py38h7f8727e_0
|
|
||||||
- configparser=5.2.0=pyhd8ed1ab_0
|
|
||||||
- cryptography=36.0.0=py38h9ce1e76_0
|
|
||||||
- databricks-cli=0.12.1=pyhd8ed1ab_0
|
|
||||||
- docker-py=5.0.3=py38h578d9bd_2
|
|
||||||
- docker-pycreds=0.4.0=py_0
|
|
||||||
- entrypoints=0.4=pyhd8ed1ab_0
|
|
||||||
- flask=2.1.2=pyhd8ed1ab_1
|
|
||||||
- gitdb=4.0.9=pyhd8ed1ab_0
|
|
||||||
- gitpython=3.1.27=pyhd8ed1ab_0
|
|
||||||
- greenlet=1.1.1=py38h295c915_0
|
|
||||||
- gunicorn=20.1.0=py38h578d9bd_2
|
|
||||||
- idna=3.3=pyhd3eb1b0_0
|
|
||||||
- importlib-metadata=4.11.3=py38h578d9bd_1
|
|
||||||
- importlib_resources=5.7.1=pyhd8ed1ab_1
|
|
||||||
- itsdangerous=2.1.2=pyhd8ed1ab_0
|
|
||||||
- jinja2=3.1.2=pyhd8ed1ab_0
|
|
||||||
- kaggle=1.5.12=pyhd8ed1ab_4
|
|
||||||
- ld_impl_linux-64=2.35.1=h7274673_9
|
|
||||||
- libblas=3.9.0=11_linux64_openblas
|
|
||||||
- libcblas=3.9.0=11_linux64_openblas
|
|
||||||
- libffi=3.3=he6710b0_2
|
|
||||||
- libgcc-ng=9.3.0=h5101ec6_17
|
|
||||||
- libgfortran-ng=12.1.0=h69a702a_16
|
|
||||||
- libgfortran5=12.1.0=hdcd56e2_16
|
|
||||||
- libgomp=9.3.0=h5101ec6_17
|
|
||||||
- liblapack=3.9.0=11_linux64_openblas
|
|
||||||
- libopenblas=0.3.17=pthreads_h8fe5266_1
|
|
||||||
- libprotobuf=3.15.8=h780b84a_0
|
|
||||||
- libstdcxx-ng=9.3.0=hd4cf53a_17
|
|
||||||
- mako=1.2.0=pyhd8ed1ab_1
|
|
||||||
- markupsafe=2.0.1=py38h497a2fe_0
|
|
||||||
- mlflow=1.26.0=py38he918c71_0
|
|
||||||
- ncurses=6.3=h7f8727e_2
|
|
||||||
- numexpr=2.8.1=py38hecfb737_0
|
|
||||||
- numpy=1.20.3=py38h9894fe3_1
|
|
||||||
- openssl=1.1.1o=h7f8727e_0
|
|
||||||
- packaging=21.3=pyhd8ed1ab_0
|
|
||||||
- pandas=1.4.2=py38h295c915_0
|
|
||||||
- pip=21.2.4=py38h06a4308_0
|
|
||||||
- prometheus_client=0.14.1=pyhd8ed1ab_0
|
|
||||||
- prometheus_flask_exporter=0.20.1=pyhd8ed1ab_0
|
|
||||||
- protobuf=3.15.8=py38h709712a_0
|
|
||||||
- pycosat=0.6.3=py38h7b6447c_1
|
|
||||||
- pycparser=2.21=pyhd3eb1b0_0
|
|
||||||
- pymongo=3.10.1=py38h950e882_2
|
|
||||||
- pyopenssl=22.0.0=pyhd3eb1b0_0
|
|
||||||
- pyparsing=3.0.9=pyhd8ed1ab_0
|
|
||||||
- pysocks=1.7.1=py38h06a4308_0
|
|
||||||
- python=3.8.13=h12debd9_0
|
|
||||||
- python-dateutil=2.8.2=pyhd8ed1ab_0
|
|
||||||
- python-slugify=6.1.2=pyhd8ed1ab_0
|
|
||||||
- python_abi=3.8=2_cp38
|
|
||||||
- pytz=2022.1=pyhd8ed1ab_0
|
|
||||||
- querystring_parser=1.2.4=py_0
|
|
||||||
- readline=8.1.2=h7f8727e_1
|
|
||||||
- requests=2.27.1=pyhd3eb1b0_0
|
|
||||||
- ruamel_yaml=0.15.100=py38h27cfd23_0
|
|
||||||
- scipy=1.5.3=py38hb2138dd_0
|
|
||||||
- setuptools=61.2.0=py38h06a4308_0
|
|
||||||
- six=1.16.0=pyhd3eb1b0_1
|
|
||||||
- smmap=3.0.5=pyh44b312d_0
|
|
||||||
- sqlalchemy=1.4.13=py38h497a2fe_0
|
|
||||||
- sqlite=3.38.2=hc218d9a_0
|
|
||||||
- sqlparse=0.4.2=pyhd8ed1ab_0
|
|
||||||
- tabulate=0.8.9=pyhd8ed1ab_0
|
|
||||||
- tenacity=8.0.1=pyhd8ed1ab_0
|
|
||||||
- text-unidecode=1.3=py_0
|
|
||||||
- tk=8.6.11=h1ccaba5_0
|
|
||||||
- tqdm=4.63.0=pyhd3eb1b0_0
|
|
||||||
- typing_extensions=4.2.0=pyha770c72_1
|
|
||||||
- unidecode=1.3.4=pyhd8ed1ab_0
|
|
||||||
- urllib3=1.26.8=pyhd3eb1b0_0
|
|
||||||
- websocket-client=1.3.2=pyhd8ed1ab_0
|
|
||||||
- werkzeug=2.1.2=pyhd8ed1ab_1
|
|
||||||
- wheel=0.37.1=pyhd3eb1b0_0
|
|
||||||
- xz=5.2.5=h7b6447c_0
|
|
||||||
- yaml=0.2.5=h7b6447c_0
|
|
||||||
- zipp=3.8.0=pyhd8ed1ab_0
|
|
||||||
- zlib=1.2.12=h7f8727e_1
|
|
||||||
- pip:
|
|
||||||
- docopt==0.6.2
|
|
||||||
- oauthlib==3.2.0
|
|
||||||
- pyjwt==2.4.0
|
|
||||||
- pyyaml==6.0
|
|
42
scripts/evaluate.py
Normal file
42
scripts/evaluate.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
from model import AvocadoDataset, evaluate_model
|
||||||
|
from torch.utils.data import DataLoader
|
||||||
|
from torch.jit import load as load_model
|
||||||
|
import pandas as pd
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import matplotlib
|
||||||
|
matplotlib.style.use('ggplot')
|
||||||
|
|
||||||
|
|
||||||
|
# * Load the test data
|
||||||
|
test_data = DataLoader(AvocadoDataset(
|
||||||
|
'./data/avocado.data.test'), batch_size=1, shuffle=False)
|
||||||
|
|
||||||
|
# * Load the model
|
||||||
|
model = load_model('./data/model_scripted.pt')
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
# * Append new inference data
|
||||||
|
with open('./data/evaluation_results.csv', 'a+') as f:
|
||||||
|
f.write("{0},{1},{2}\n".format(*evaluate_model(test_data, model)))
|
||||||
|
|
||||||
|
# * Load all inference data gathered (till the current one)
|
||||||
|
results = pd.read_csv('./data/evaluation_results.csv',
|
||||||
|
names=['MSE', 'RMSE', 'MAE'])
|
||||||
|
|
||||||
|
# * Plot the results
|
||||||
|
plt.plot(range(1, len(results)+1), results['MSE'], color='green')
|
||||||
|
plt.scatter(range(1, len(results)+1),
|
||||||
|
results['MSE'], label='MSE', color='green', marker='.')
|
||||||
|
plt.plot(range(1, len(results)+1), results['RMSE'], color='darkred')
|
||||||
|
plt.scatter(range(1, len(results)+1),
|
||||||
|
results['RMSE'], label='RMSE', color='darkorange', marker='.')
|
||||||
|
plt.plot(range(1, len(results)+1), results['MAE'], color='blue')
|
||||||
|
plt.scatter(range(1, len(results)+1),
|
||||||
|
results['MAE'], label='MAE', color='blue', marker='.')
|
||||||
|
plt.xticks(range(1, len(results)+1))
|
||||||
|
plt.ylabel('Metric value')
|
||||||
|
plt.xlabel('Build number')
|
||||||
|
plt.legend()
|
||||||
|
|
||||||
|
# * Save figure
|
||||||
|
plt.savefig('data/plots.png')
|
@ -6,10 +6,7 @@ cols = list(pd.read_csv("data/avocado.csv", nrows=1))
|
|||||||
# print("###\n", cols, "\n###")
|
# print("###\n", cols, "\n###")
|
||||||
avocados = pd.read_csv(
|
avocados = pd.read_csv(
|
||||||
"data/avocado.csv").rename(columns={"Unnamed: 0": 'Week'})
|
"data/avocado.csv").rename(columns={"Unnamed: 0": 'Week'})
|
||||||
print(avocados.describe(include="all"))
|
avocados.describe(include="all")
|
||||||
avg_prices = avocados['AveragePrice']
|
|
||||||
avocados.drop(['AveragePrice'], axis=1, inplace=True)
|
|
||||||
|
|
||||||
|
|
||||||
# * Retrieve the target column
|
# * Retrieve the target column
|
||||||
# y = avocados.AveragePrice
|
# y = avocados.AveragePrice
|
||||||
@ -46,8 +43,7 @@ print(all_cols)
|
|||||||
# avocados = pd.concat([avocados, ohe_df], axis=1)
|
# avocados = pd.concat([avocados, ohe_df], axis=1)
|
||||||
# * Time for normalization
|
# * Time for normalization
|
||||||
mM = MinMaxScaler()
|
mM = MinMaxScaler()
|
||||||
avocados_normed = pd.concat([avg_prices, pd.DataFrame(
|
avocados_normed = pd.DataFrame(mM.fit_transform(avocados.values), columns=all_cols)
|
||||||
mM.fit_transform(avocados.values), columns=all_cols)], axis=1)
|
|
||||||
|
|
||||||
print(avocados_normed.head())
|
print(avocados_normed.head())
|
||||||
|
|
||||||
|
212
scripts/mlflow_train.py
Normal file
212
scripts/mlflow_train.py
Normal file
@ -0,0 +1,212 @@
|
|||||||
|
from urllib.parse import urlparse
|
||||||
|
import mlflow
|
||||||
|
import mlflow.pytorch as model_logger
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.metrics import mean_squared_error, mean_absolute_error
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
from torch.utils import data as t_u_data
|
||||||
|
|
||||||
|
|
||||||
|
# mlflow.set_tracking_uri("http://localhost:5000")
|
||||||
|
mlflow.set_tracking_uri("http://172.17.0.1:5000")
|
||||||
|
mlflow.set_experiment("s478841")
|
||||||
|
|
||||||
|
|
||||||
|
# * Customized Dataset class (base provided by PyTorch)
|
||||||
|
class AvocadoDataset(t_u_data.Dataset):
|
||||||
|
def __init__(self, path: str, target: str = 'AveragePrice'):
|
||||||
|
data = pd.read_csv(path)
|
||||||
|
y = data[target].values.astype('float32')
|
||||||
|
self.y = y.reshape((len(y), 1))
|
||||||
|
self.x_data = data.drop(
|
||||||
|
[target], axis=1).values.astype('float32')
|
||||||
|
self.x_shape = data.drop([target], axis=1).shape
|
||||||
|
# print("Data shape is: ", self.x_data.shape)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.x_data)
|
||||||
|
|
||||||
|
def __getitem__(self, idx):
|
||||||
|
return [self.x_data[idx], self.y[idx]]
|
||||||
|
|
||||||
|
def get_shape(self):
|
||||||
|
return self.x_shape
|
||||||
|
|
||||||
|
def get_splits(self, n_test=0.33):
|
||||||
|
test_size = round(n_test * len(self.x_data))
|
||||||
|
train_size = len(self.x_data) - test_size
|
||||||
|
return t_u_data.random_split(self, [train_size, test_size])
|
||||||
|
|
||||||
|
|
||||||
|
class AvocadoRegressor(nn.Module):
|
||||||
|
def __init__(self, input_dim):
|
||||||
|
super(AvocadoRegressor, self).__init__()
|
||||||
|
self.hidden1 = nn.Linear(input_dim, 32)
|
||||||
|
nn.init.xavier_uniform_(self.hidden1.weight)
|
||||||
|
self.act1 = nn.ReLU()
|
||||||
|
self.hidden2 = nn.Linear(32, 8)
|
||||||
|
nn.init.xavier_uniform_(self.hidden2.weight)
|
||||||
|
self.act2 = nn.ReLU()
|
||||||
|
self.hidden3 = nn.Linear(8, 1)
|
||||||
|
nn.init.xavier_uniform_(self.hidden3.weight)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.hidden1(x)
|
||||||
|
x = self.act1(x)
|
||||||
|
x = self.hidden2(x)
|
||||||
|
x = self.act2(x)
|
||||||
|
x = self.hidden3(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_data(paths):
|
||||||
|
train_dl = t_u_data.DataLoader(AvocadoDataset(
|
||||||
|
paths[0]), batch_size=32, shuffle=True)
|
||||||
|
validate_dl = t_u_data.DataLoader(AvocadoDataset(
|
||||||
|
paths[1]), batch_size=128, shuffle=True)
|
||||||
|
test_dl = t_u_data.DataLoader(AvocadoDataset(
|
||||||
|
paths[2]), batch_size=1, shuffle=False)
|
||||||
|
return train_dl, validate_dl, test_dl
|
||||||
|
|
||||||
|
|
||||||
|
def train_model(train_dl, model, epochs, log_step):
|
||||||
|
criterion = nn.MSELoss()
|
||||||
|
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
|
||||||
|
to_compare = None
|
||||||
|
metrics = None
|
||||||
|
for epoch in range(1, epochs+1):
|
||||||
|
for _, (inputs, targets) in enumerate(train_dl):
|
||||||
|
optimizer.zero_grad()
|
||||||
|
yhat = model(inputs)
|
||||||
|
# * For loss value inspection
|
||||||
|
to_compare = (yhat, targets)
|
||||||
|
loss = criterion(yhat, targets)
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
if epoch == 1 or (epoch) % log_step == 0:
|
||||||
|
result, target = to_compare[0].detach(
|
||||||
|
).numpy(), to_compare[1].detach().numpy()
|
||||||
|
metrics = {'train.mse': mean_squared_error(target, result),
|
||||||
|
'train.mae': mean_absolute_error(target, result),
|
||||||
|
'train.rmse': mean_squared_error(target, result, squared=False)}
|
||||||
|
# _run.log_scalar("training.RMSE", np.sqrt(mse), epoch)
|
||||||
|
# _run.log_scalar("training.MAE", mae, epoch)
|
||||||
|
# _run.log_scalar('training.MSE', mse, epoch)
|
||||||
|
print(
|
||||||
|
f"Epoch {epoch}\t→\tMSE: {metrics['train.mse']},\tRMSE: {metrics['train.rmse']},\tMAE: {metrics['train.mae']}")
|
||||||
|
return metrics
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_model(test_dl, model):
|
||||||
|
predictions, actuals = list(), list()
|
||||||
|
for _, (inputs, targets) in enumerate(test_dl):
|
||||||
|
yhat = model(inputs)
|
||||||
|
# * retrieve numpy array
|
||||||
|
yhat = yhat.detach().numpy()
|
||||||
|
actual = targets.numpy()
|
||||||
|
actual = actual.reshape((len(actual), 1))
|
||||||
|
# * store predictions
|
||||||
|
predictions.append(yhat)
|
||||||
|
actuals.append(actual)
|
||||||
|
predictions, actuals = np.vstack(predictions), np.vstack(actuals)
|
||||||
|
# * return MSE value
|
||||||
|
mse = mean_squared_error(actuals, predictions)
|
||||||
|
rmse = mean_squared_error(actuals, predictions, squared=False)
|
||||||
|
mae = mean_absolute_error(actuals, predictions)
|
||||||
|
return mse, rmse, mae
|
||||||
|
|
||||||
|
|
||||||
|
def predict(row, model):
|
||||||
|
row = row[0].flatten()
|
||||||
|
yhat = model(row)
|
||||||
|
yhat = yhat.detach().numpy()
|
||||||
|
return yhat
|
||||||
|
|
||||||
|
|
||||||
|
def main(epochs, save_model, log_step):
|
||||||
|
print(
|
||||||
|
f"Your model will be trained for {epochs} epochs, logging every {log_step} steps. Trained model will {'not ' if save_model else ''}be saved.")
|
||||||
|
|
||||||
|
# * Paths to data
|
||||||
|
avocado_data = ['./data/avocado.data.train',
|
||||||
|
'./data/avocado.data.valid',
|
||||||
|
'./data/avocado.data.test']
|
||||||
|
|
||||||
|
# * Data preparation
|
||||||
|
train_dl, validate_dl, test_dl = prepare_data(paths=avocado_data)
|
||||||
|
print(f"""
|
||||||
|
Train set size: {len(train_dl.dataset)},
|
||||||
|
Validate set size: {len(validate_dl.dataset)}
|
||||||
|
Test set size: {len(test_dl.dataset)}
|
||||||
|
""")
|
||||||
|
|
||||||
|
# * Model definition
|
||||||
|
# ! 66 - in case only regions and type are used (among all the categorical vals)
|
||||||
|
model = AvocadoRegressor(235)
|
||||||
|
|
||||||
|
# * Train model
|
||||||
|
print("Let's start the training, mate!")
|
||||||
|
with mlflow.start_run() as run:
|
||||||
|
print("MLflow run experiment_id: {0}".format(run.info.experiment_id))
|
||||||
|
print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri))
|
||||||
|
metrics = train_model(train_dl=train_dl, model=model,
|
||||||
|
epochs=epochs, log_step=log_step)
|
||||||
|
mlflow.log_param('epochs', epochs)
|
||||||
|
mlflow.log_metrics(metrics)
|
||||||
|
|
||||||
|
# * Evaluate model
|
||||||
|
val_metrics = {key: val for key, val in zip(
|
||||||
|
['validate.mse', 'validate.rmse', 'validate.mae'], evaluate_model(validate_dl, model))}
|
||||||
|
print(
|
||||||
|
f"\nEvaluation on VALIDATION set\t→\tMSE: {val_metrics['validate.mse']}, RMSE: {val_metrics['validate.rmse']}, MAE: {val_metrics['validate.mae']}")
|
||||||
|
mlflow.log_metrics(val_metrics)
|
||||||
|
|
||||||
|
test_loss = {key: val for key, val in zip(
|
||||||
|
['test.mse', 'test.rmse', 'test.mae'], evaluate_model(test_dl, model))}
|
||||||
|
print(
|
||||||
|
f"\nEvaluation on TEST set\t→\tMSE: {test_loss['test.mse']}, RMSE: {test_loss['test.rmse']}, MAE: {test_loss['test.mae']}")
|
||||||
|
mlflow.log_metrics(test_loss)
|
||||||
|
|
||||||
|
# tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
|
||||||
|
|
||||||
|
# if tracking_url_type_store != 'file':
|
||||||
|
# print('First option')
|
||||||
|
# model_logger.log_model(
|
||||||
|
# model, "avocados-model", registered_model_name="AvocadoModel_478841")
|
||||||
|
# else:
|
||||||
|
# print('Second option')
|
||||||
|
# model_logger.log_model(model, "model")
|
||||||
|
|
||||||
|
|
||||||
|
# * Save the trained model
|
||||||
|
if save_model:
|
||||||
|
print("Your model has been saved - have a nice day!")
|
||||||
|
scripted_model = torch.jit.script(model)
|
||||||
|
scripted_model.save('./data/model_scripted.pt')
|
||||||
|
# ex.add_artifact('./data/model_scripted.pt')
|
||||||
|
|
||||||
|
|
||||||
|
# ex.run()
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# * Model parameters
|
||||||
|
parser = argparse.ArgumentParser(description="Script performing logistic regression model training",
|
||||||
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||||
|
parser.add_argument(
|
||||||
|
"-e", "--epochs", default=100, help="Number of epochs the model will be trained for")
|
||||||
|
parser.add_argument(
|
||||||
|
"-s", "--step", default=10, help="Number of steps to repeat logging loss values on")
|
||||||
|
parser.add_argument("--save", action="store_true",
|
||||||
|
help="Save trained model to file 'trained_model.h5'")
|
||||||
|
|
||||||
|
args = vars(parser.parse_args())
|
||||||
|
|
||||||
|
epochs = int(args['epochs'])
|
||||||
|
save_model = args['save']
|
||||||
|
log_step = int(args['step'])
|
||||||
|
|
||||||
|
main(epochs, save_model, log_step)
|
@ -1,6 +1,8 @@
|
|||||||
|
import argparse
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.metrics import mean_squared_error
|
from sklearn.metrics import mean_squared_error, mean_absolute_error
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
@ -99,7 +101,10 @@ def evaluate_model(test_dl, model):
|
|||||||
actuals.append(actual)
|
actuals.append(actual)
|
||||||
predictions, actuals = np.vstack(predictions), np.vstack(actuals)
|
predictions, actuals = np.vstack(predictions), np.vstack(actuals)
|
||||||
# * return MSE value
|
# * return MSE value
|
||||||
return mean_squared_error(actuals, predictions)
|
mse = mean_squared_error(actuals, predictions)
|
||||||
|
rmse = mean_squared_error(actuals, predictions, squared=False)
|
||||||
|
mae = mean_absolute_error(actuals, predictions)
|
||||||
|
return mse, rmse, mae
|
||||||
|
|
||||||
|
|
||||||
def predict(row, model):
|
def predict(row, model):
|
||||||
@ -111,6 +116,21 @@ def predict(row, model):
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
# * Model parameters
|
||||||
|
parser = argparse.ArgumentParser(description="Script performing logistic regression model training",
|
||||||
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||||
|
parser.add_argument(
|
||||||
|
"-e", "--epochs", default=100, help="Number of epochs the model will be trained for")
|
||||||
|
parser.add_argument("--save", action="store_true",
|
||||||
|
help="Save trained model to file 'trained_model.h5'")
|
||||||
|
|
||||||
|
args = vars(parser.parse_args())
|
||||||
|
|
||||||
|
epochs = args['epochs']
|
||||||
|
save_model = args['save']
|
||||||
|
print(
|
||||||
|
f"Your model will be trained for {epochs} epochs. Trained model will {'not ' if save_model else ''}be saved.")
|
||||||
|
|
||||||
# * Paths to data
|
# * Paths to data
|
||||||
avocado_train = './data/avocado.data.train'
|
avocado_train = './data/avocado.data.train'
|
||||||
avocado_valid = './data/avocado.data.valid'
|
avocado_valid = './data/avocado.data.valid'
|
||||||
@ -135,14 +155,21 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
# * Train model
|
# * Train model
|
||||||
print("Let's start the training, mate!")
|
print("Let's start the training, mate!")
|
||||||
train_model(train_dl, model)
|
train_model(train_dl, model, int(epochs))
|
||||||
|
|
||||||
# * Evaluate model
|
# * Evaluate model
|
||||||
mse = evaluate_model(validate_dl, model)
|
mse, rmse, mae = evaluate_model(validate_dl, model)
|
||||||
print(f"\nEvaluation\t→\tMSE: {mse}, RMSE: {np.sqrt(mse)}")
|
print(f"\nEvaluation\t→\tMSE: {mse}, RMSE: {rmse}, MAE: {mae}")
|
||||||
|
|
||||||
# * Prediction
|
# * Prediction
|
||||||
predictions = [(predict(row, model)[0], row[1].item()) for row in test_dl]
|
predictions = [(predict(row, model)[0], row[1].item()) for row in test_dl]
|
||||||
preds_df = pd.DataFrame(predictions, columns=["Prediction", "Target"])
|
preds_df = pd.DataFrame(predictions, columns=["Prediction", "Target"])
|
||||||
print("\nNow predictions - hey ho, let's go!\n", preds_df.head())
|
print("\nNow predictions - hey ho, let's go!\n",
|
||||||
|
preds_df.head(), "\n\n...let's save them\ndum...\ndum...\ndum dum dum...\n\tDUM\n")
|
||||||
preds_df.to_csv("./data/predictions.csv", index=False)
|
preds_df.to_csv("./data/predictions.csv", index=False)
|
||||||
|
|
||||||
|
# * Save the trained model
|
||||||
|
if save_model:
|
||||||
|
print("Your model has been saved - have a nice day!")
|
||||||
|
scripted_model = torch.jit.script(model)
|
||||||
|
scripted_model.save('./data/model_scripted.pt')
|
||||||
|
@ -1,10 +0,0 @@
|
|||||||
import mlflow
|
|
||||||
import numpy as np
|
|
||||||
import json
|
|
||||||
|
|
||||||
|
|
||||||
model = mlflow.pyfunc.load_model(
|
|
||||||
'mlruns/1/4b83e774512444188fb587288818c298/artifacts/model')
|
|
||||||
with open('scripts/input_example.json') as f:
|
|
||||||
data = np.array([json.load(f)['inputs'][0]], dtype=np.float64)
|
|
||||||
print(f"Predicted values: {model.predict(data.reshape(-1, 2))}")
|
|
@ -1,12 +0,0 @@
|
|||||||
import mlflow
|
|
||||||
import numpy as np
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
|
|
||||||
|
|
||||||
print('################\n\n', os.listdir('scripts/'), "\n\n###############")
|
|
||||||
model = mlflow.pyfunc.load_model('/mlruns/13/da5c6167bb45403fa35569849a1fbc13/artifacts/model')
|
|
||||||
|
|
||||||
with open('/mlruns/13/da5c6167bb45403fa35569849a1fbc13/artifacts/model/input_example.json') as f:
|
|
||||||
data = np.array([json.load(f)['inputs'][0]], dtype=np.float64)
|
|
||||||
print(f"Predicted values: {model.predict(data.reshape(-1, 2))}")
|
|
206
scripts/sacred_train.py
Normal file
206
scripts/sacred_train.py
Normal file
@ -0,0 +1,206 @@
|
|||||||
|
from sacred import Experiment
|
||||||
|
from sacred.observers import FileStorageObserver, MongoObserver
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.metrics import mean_squared_error, mean_absolute_error
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
from torch.utils import data as t_u_data
|
||||||
|
|
||||||
|
|
||||||
|
ex = Experiment("478841 sacred_scopes", interactive=True, save_git_info=False)
|
||||||
|
ex.observers.append(MongoObserver(
|
||||||
|
url='mongodb://admin:IUM_2021@172.17.0.1:27017', db_name='sacred'))
|
||||||
|
ex.observers.append(FileStorageObserver('./data/training_runs'))
|
||||||
|
|
||||||
|
|
||||||
|
@ex.config
|
||||||
|
def my_config():
|
||||||
|
parser = argparse.ArgumentParser(description="Script performing logistic regression model training",
|
||||||
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||||
|
parser.add_argument(
|
||||||
|
"-e", "--epochs", default=100, help="Number of epochs the model will be trained for")
|
||||||
|
parser.add_argument(
|
||||||
|
"-s", "--step", default=10, help="Number of steps to repeat logging loss values on")
|
||||||
|
parser.add_argument("--save", action="store_true",
|
||||||
|
help="Save trained model to file 'trained_model.h5'")
|
||||||
|
|
||||||
|
args = vars(parser.parse_args())
|
||||||
|
|
||||||
|
epochs = int(args['epochs'])
|
||||||
|
save_model = args['save']
|
||||||
|
log_step = int(args['step'])
|
||||||
|
|
||||||
|
|
||||||
|
# * Customized Dataset class (base provided by PyTorch)
|
||||||
|
class AvocadoDataset(t_u_data.Dataset):
|
||||||
|
def __init__(self, path: str, target: str = 'AveragePrice'):
|
||||||
|
data = pd.read_csv(path)
|
||||||
|
y = data[target].values.astype('float32')
|
||||||
|
self.y = y.reshape((len(y), 1))
|
||||||
|
self.x_data = data.drop(
|
||||||
|
[target], axis=1).values.astype('float32')
|
||||||
|
self.x_shape = data.drop([target], axis=1).shape
|
||||||
|
# print("Data shape is: ", self.x_data.shape)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.x_data)
|
||||||
|
|
||||||
|
def __getitem__(self, idx):
|
||||||
|
return [self.x_data[idx], self.y[idx]]
|
||||||
|
|
||||||
|
def get_shape(self):
|
||||||
|
return self.x_shape
|
||||||
|
|
||||||
|
def get_splits(self, n_test=0.33):
|
||||||
|
test_size = round(n_test * len(self.x_data))
|
||||||
|
train_size = len(self.x_data) - test_size
|
||||||
|
return t_u_data.random_split(self, [train_size, test_size])
|
||||||
|
|
||||||
|
|
||||||
|
class AvocadoRegressor(nn.Module):
|
||||||
|
def __init__(self, input_dim):
|
||||||
|
super(AvocadoRegressor, self).__init__()
|
||||||
|
self.hidden1 = nn.Linear(input_dim, 32)
|
||||||
|
nn.init.xavier_uniform_(self.hidden1.weight)
|
||||||
|
self.act1 = nn.ReLU()
|
||||||
|
self.hidden2 = nn.Linear(32, 8)
|
||||||
|
nn.init.xavier_uniform_(self.hidden2.weight)
|
||||||
|
self.act2 = nn.ReLU()
|
||||||
|
self.hidden3 = nn.Linear(8, 1)
|
||||||
|
nn.init.xavier_uniform_(self.hidden3.weight)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.hidden1(x)
|
||||||
|
x = self.act1(x)
|
||||||
|
x = self.hidden2(x)
|
||||||
|
x = self.act2(x)
|
||||||
|
x = self.hidden3(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_data(paths):
|
||||||
|
train_dl = t_u_data.DataLoader(AvocadoDataset(
|
||||||
|
paths[0]), batch_size=32, shuffle=True)
|
||||||
|
validate_dl = t_u_data.DataLoader(AvocadoDataset(
|
||||||
|
paths[1]), batch_size=128, shuffle=True)
|
||||||
|
test_dl = t_u_data.DataLoader(AvocadoDataset(
|
||||||
|
paths[2]), batch_size=1, shuffle=False)
|
||||||
|
return train_dl, validate_dl, test_dl
|
||||||
|
|
||||||
|
|
||||||
|
@ex.capture
|
||||||
|
def train_model(train_dl, model, epochs, log_step, _run):
|
||||||
|
criterion = nn.MSELoss()
|
||||||
|
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
|
||||||
|
to_compare = None
|
||||||
|
|
||||||
|
for epoch in range(1, epochs+1):
|
||||||
|
for _, (inputs, targets) in enumerate(train_dl):
|
||||||
|
optimizer.zero_grad()
|
||||||
|
yhat = model(inputs)
|
||||||
|
# * For loss value inspection
|
||||||
|
to_compare = (yhat, targets)
|
||||||
|
loss = criterion(yhat, targets)
|
||||||
|
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
if epoch == 1 or (epoch) % log_step == 0:
|
||||||
|
result, target = to_compare[0].detach(
|
||||||
|
).numpy(), to_compare[1].detach().numpy()
|
||||||
|
mse = mean_squared_error(target, result)
|
||||||
|
mae = mean_absolute_error(target, result)
|
||||||
|
_run.log_scalar("training.RMSE", np.sqrt(mse), epoch)
|
||||||
|
_run.log_scalar("training.MAE", mae, epoch)
|
||||||
|
_run.log_scalar('training.MSE', mse, epoch)
|
||||||
|
|
||||||
|
print(
|
||||||
|
f"Epoch {epoch}\t→\tMSE: {mse},\tRMSE: {np.sqrt(mse)},\tMAE: {mae}")
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_model(test_dl, model):
|
||||||
|
predictions, actuals = list(), list()
|
||||||
|
for _, (inputs, targets) in enumerate(test_dl):
|
||||||
|
yhat = model(inputs)
|
||||||
|
# * retrieve numpy array
|
||||||
|
yhat = yhat.detach().numpy()
|
||||||
|
actual = targets.numpy()
|
||||||
|
actual = actual.reshape((len(actual), 1))
|
||||||
|
# * store predictions
|
||||||
|
predictions.append(yhat)
|
||||||
|
actuals.append(actual)
|
||||||
|
predictions, actuals = np.vstack(predictions), np.vstack(actuals)
|
||||||
|
# * return MSE value
|
||||||
|
mse = mean_squared_error(actuals, predictions)
|
||||||
|
rmse = mean_squared_error(actuals, predictions, squared=False)
|
||||||
|
mae = mean_absolute_error(actuals, predictions)
|
||||||
|
return mse, rmse, mae
|
||||||
|
|
||||||
|
|
||||||
|
def predict(row, model):
|
||||||
|
row = row[0].flatten()
|
||||||
|
yhat = model(row)
|
||||||
|
yhat = yhat.detach().numpy()
|
||||||
|
return yhat
|
||||||
|
|
||||||
|
|
||||||
|
@ex.main
|
||||||
|
def main(epochs, save_model, log_step, _run):
|
||||||
|
print(
|
||||||
|
f"Your model will be trained for {epochs} epochs. Trained model will {'not ' if save_model else ''}be saved.")
|
||||||
|
|
||||||
|
# * Paths to data
|
||||||
|
avocado_data = ['./data/avocado.data.train',
|
||||||
|
'./data/avocado.data.valid',
|
||||||
|
'./data/avocado.data.test']
|
||||||
|
|
||||||
|
# * Data preparation
|
||||||
|
train_dl, validate_dl, test_dl = prepare_data(paths=avocado_data)
|
||||||
|
print(f"""
|
||||||
|
Train set size: {len(train_dl.dataset)},
|
||||||
|
Validate set size: {len(validate_dl.dataset)}
|
||||||
|
Test set size: {len(test_dl.dataset)}
|
||||||
|
""")
|
||||||
|
|
||||||
|
# * Model definition
|
||||||
|
# ! 66 - in case only regions and type are used (among all the categorical vals)
|
||||||
|
model = AvocadoRegressor(235)
|
||||||
|
|
||||||
|
# * Train model
|
||||||
|
print("Let's start the training, mate!")
|
||||||
|
train_model(train_dl=train_dl, model=model,
|
||||||
|
epochs=epochs, log_step=log_step)
|
||||||
|
|
||||||
|
# * Evaluate model
|
||||||
|
mse, rmse, mae = evaluate_model(validate_dl, model)
|
||||||
|
print(
|
||||||
|
f"\nEvaluation on validation set\t→\tMSE: {mse}, RMSE: {rmse}, MAE: {mae}")
|
||||||
|
|
||||||
|
_run.log_scalar("validation.RMSE", rmse, epochs+1)
|
||||||
|
_run.log_scalar("validation.MAE", mae, epochs+1)
|
||||||
|
_run.log_scalar('validation.MSE', mse, epochs+1)
|
||||||
|
|
||||||
|
# * Prediction
|
||||||
|
predictions = [(predict(row, model)[0], row[1].item()) for row in test_dl]
|
||||||
|
preds_df = pd.DataFrame(predictions, columns=["Prediction", "Target"])
|
||||||
|
test_loss = evaluate_model(test_dl, model)
|
||||||
|
|
||||||
|
print("\nNow predictions - hey ho, let's go!\n", preds_df.head(),
|
||||||
|
f"\nLoss values for test data: \t→\tMSE: {test_loss[0]}, RMSE: {test_loss[1]}, MAE: {test_loss[2]}")
|
||||||
|
print("\n...let's save them\ndum...\ndum...\ndum dum dum...\n\tDUM\n")
|
||||||
|
|
||||||
|
preds_df.to_csv("./data/predictions.csv", index=False)
|
||||||
|
|
||||||
|
# * Save the trained model
|
||||||
|
if save_model:
|
||||||
|
print("Your model has been saved - have a nice day!")
|
||||||
|
scripted_model = torch.jit.script(model)
|
||||||
|
scripted_model.save('./data/model_scripted.pt')
|
||||||
|
ex.add_artifact('./data/model_scripted.pt')
|
||||||
|
|
||||||
|
|
||||||
|
ex.run()
|
Loading…
Reference in New Issue
Block a user