forked from s464914/ium_464914
Compare commits
12 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
40d0c3e849 | ||
|
0f254aa5fa | ||
|
abb213675e | ||
|
3cbfc6aca1 | ||
|
80ebb3c0da | ||
|
281c3c6a86 | ||
|
ae632b1ea3 | ||
|
7309d49e67 | ||
|
c4ce89938c | ||
|
ed9927d7a1 | ||
|
8ab682be76 | ||
|
7ff2f9711e |
3
.dvc/.gitignore
vendored
Normal file
3
.dvc/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
/config.local
|
||||||
|
/tmp
|
||||||
|
/cache
|
4
.dvc/config
Normal file
4
.dvc/config
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
[core]
|
||||||
|
remote = ium_ssh_remote
|
||||||
|
['remote "ium_ssh_remote"']
|
||||||
|
url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
|
3
.dvcignore
Normal file
3
.dvcignore
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# Add patterns of files dvc should ignore, which could improve
|
||||||
|
# the performance. Learn more at
|
||||||
|
# https://dvc.org/doc/user-guide/dvcignore
|
5
.env
Normal file
5
.env
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
MONGO_INITDB_ROOT_USERNAME=admin
|
||||||
|
MONGO_INITDB_ROOT_PASSWORD=IUM_2021
|
||||||
|
ME_CONFIG_BASICAUTH_USERNAME=mongo_express_user
|
||||||
|
ME_CONFIG_BASICAUTH_PASSWORD=mongo_express_pw
|
||||||
|
MONGO_DATABASE=sacred
|
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
/covtype.csv
|
@ -1,95 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%pip install --user kaggle \n",
|
|
||||||
"%pip install --user pandas"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 8,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"ERROR: Could not find a version that satisfies the requirement git (from versions: none)\n",
|
|
||||||
"ERROR: No matching distribution found for git\n",
|
|
||||||
"\n",
|
|
||||||
"[notice] A new release of pip is available: 23.1.2 -> 24.0\n",
|
|
||||||
"[notice] To update, run: python.exe -m pip install --upgrade pip\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"%pip install git"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Download data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"!kaggle datasets download -d nasa/meteorite-landings"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 10,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"!tar -xf meteorite-landings.zip"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.11.6"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
@ -4,7 +4,7 @@ RUN apt update && apt install -y python3-pip
|
|||||||
RUN apt install unzip
|
RUN apt install unzip
|
||||||
RUN apt install bc
|
RUN apt install bc
|
||||||
|
|
||||||
RUN pip3 install kaggle pandas scikit-learn torch
|
RUN pip3 install kaggle pandas scikit-learn torch sacred pymongo
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
52
Jenkinsfile
vendored
52
Jenkinsfile
vendored
@ -1,15 +1,9 @@
|
|||||||
pipeline {
|
pipeline {
|
||||||
agent any
|
agent any
|
||||||
triggers {
|
|
||||||
upstream(upstreamProjects: 'z-s464914-create-dataset', threshold: hudson.model.Result.SUCCESS)
|
|
||||||
}
|
|
||||||
parameters {
|
parameters {
|
||||||
buildSelector (
|
string(name: 'KAGGLE_USERNAME', defaultValue: 'alicjaszulecka', description: 'Kaggle username')
|
||||||
defaultSelector: lastSuccessful(),
|
password(name: 'KAGGLE_KEY', defaultValue:'', description: 'Kaggle Key')
|
||||||
description: 'Build for copying artifacts',
|
string(name: 'CUTOFF', defaultValue: '100', description: 'cut off number')
|
||||||
name: 'BUILD_SELECTOR'
|
|
||||||
)
|
|
||||||
string(name: 'EPOCHS', defaultValue: '10', description: 'epochs')
|
|
||||||
}
|
}
|
||||||
stages {
|
stages {
|
||||||
stage('Git Checkout') {
|
stage('Git Checkout') {
|
||||||
@ -17,21 +11,53 @@ pipeline {
|
|||||||
checkout scm
|
checkout scm
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage('Copy Artifacts') {
|
stage('Download dataset') {
|
||||||
steps {
|
steps {
|
||||||
copyArtifacts fingerprintArtifacts: true, projectName: 'z-s464914-create-dataset', selector: buildParameter('BUILD_SELECTOR')
|
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
|
||||||
|
sh 'pip install kaggle'
|
||||||
|
sh 'kaggle datasets download -d uciml/forest-cover-type-dataset'
|
||||||
|
sh 'unzip -o forest-cover-type-dataset.zip'
|
||||||
|
sh 'rm forest-cover-type-dataset.zip'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
stage('Train') {
|
}
|
||||||
|
stage('Build') {
|
||||||
|
steps {
|
||||||
|
script {
|
||||||
|
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
|
||||||
|
"KAGGLE_KEY=${params.KAGGLE_KEY}" ]) {
|
||||||
|
def customImage = docker.build("custom-image")
|
||||||
|
customImage.inside {
|
||||||
|
sh 'python3 ./IUM_2.py'
|
||||||
|
archiveArtifacts artifacts: 'covtype.csv, forest_train.csv, forest_test.csv, forest_val.csv', onlyIfSuccessful: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stage('Train and Predict') {
|
||||||
steps {
|
steps {
|
||||||
script {
|
script {
|
||||||
def customImage = docker.build("custom-image")
|
def customImage = docker.build("custom-image")
|
||||||
customImage.inside {
|
customImage.inside {
|
||||||
sh 'python3 ./model.py ' + params.EPOCHS
|
sh 'python3 ./model.py'
|
||||||
|
sh 'python3 ./prediction.py'
|
||||||
archiveArtifacts artifacts: 'model.pth, predictions.txt', onlyIfSuccessful: true
|
archiveArtifacts artifacts: 'model.pth, predictions.txt', onlyIfSuccessful: true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
stage('Experiments') {
|
||||||
|
steps {
|
||||||
|
script {
|
||||||
|
def customImage = docker.build("custom-image")
|
||||||
|
customImage.inside {
|
||||||
|
sh 'python3 ./sacred_model.py'
|
||||||
|
archiveArtifacts artifacts: 'experiments', onlyIfSuccessful: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
581013
covtype.csv
581013
covtype.csv
File diff suppressed because it is too large
Load Diff
5
covtype.csv.dvc
Normal file
5
covtype.csv.dvc
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
outs:
|
||||||
|
- md5: e88c3c209db2e8982e07c43462d67c87
|
||||||
|
size: 75170064
|
||||||
|
hash: md5
|
||||||
|
path: covtype.csv
|
29
dvc.yaml
Normal file
29
dvc.yaml
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
stages:
|
||||||
|
prepare_data:
|
||||||
|
cmd: python ./IUM_2.py
|
||||||
|
deps:
|
||||||
|
- create-dataset.py
|
||||||
|
- covtype.csv
|
||||||
|
outs:
|
||||||
|
- forest_train.csv
|
||||||
|
- forest_test.csv
|
||||||
|
- forest_val.csv
|
||||||
|
|
||||||
|
train_model:
|
||||||
|
cmd: python ./model.py
|
||||||
|
deps:
|
||||||
|
- model.py
|
||||||
|
- forest_train.csv
|
||||||
|
- forest_test.csv
|
||||||
|
- forest_val.csv
|
||||||
|
outs:
|
||||||
|
- model.pth
|
||||||
|
|
||||||
|
evaluate_model:
|
||||||
|
cmd: python ./prediction.py
|
||||||
|
deps:
|
||||||
|
- prediction.py
|
||||||
|
- model.pth
|
||||||
|
- forest_test.csv
|
||||||
|
outs:
|
||||||
|
- predictions.txt
|
189
environment.yml
Normal file
189
environment.yml
Normal file
@ -0,0 +1,189 @@
|
|||||||
|
name: IUM
|
||||||
|
channels:
|
||||||
|
- defaults
|
||||||
|
dependencies:
|
||||||
|
- _tflow_select=2.3.0=mkl
|
||||||
|
- abseil-cpp=20211102.0=hd77b12b_0
|
||||||
|
- absl-py=2.1.0=py310haa95532_0
|
||||||
|
- aiohttp=3.9.5=py310h2bbff1b_0
|
||||||
|
- aiosignal=1.2.0=pyhd3eb1b0_0
|
||||||
|
- alembic=1.8.1=py310haa95532_0
|
||||||
|
- aniso8601=9.0.1=pyhd3eb1b0_0
|
||||||
|
- arrow-cpp=11.0.0=h2c9b28c_2
|
||||||
|
- astunparse=1.6.3=py_0
|
||||||
|
- async-timeout=4.0.3=py310haa95532_0
|
||||||
|
- attrs=23.1.0=py310haa95532_0
|
||||||
|
- aws-c-common=0.4.57=ha925a31_1
|
||||||
|
- aws-c-event-stream=0.1.6=hd77b12b_5
|
||||||
|
- aws-checksums=0.1.9=ha925a31_0
|
||||||
|
- aws-sdk-cpp=1.8.185=hd77b12b_0
|
||||||
|
- bcrypt=3.2.0=py310h2bbff1b_1
|
||||||
|
- blas=1.0=mkl
|
||||||
|
- blinker=1.6.2=py310haa95532_0
|
||||||
|
- boost-cpp=1.82.0=h59b6b97_2
|
||||||
|
- bottleneck=1.3.7=py310h9128911_0
|
||||||
|
- brotli=1.0.9=h2bbff1b_8
|
||||||
|
- brotli-bin=1.0.9=h2bbff1b_8
|
||||||
|
- brotli-python=1.0.9=py310hd77b12b_8
|
||||||
|
- bzip2=1.0.8=h2bbff1b_6
|
||||||
|
- c-ares=1.19.1=h2bbff1b_0
|
||||||
|
- ca-certificates=2024.3.11=haa95532_0
|
||||||
|
- cachetools=5.3.3=py310haa95532_0
|
||||||
|
- certifi=2024.2.2=py310haa95532_0
|
||||||
|
- cffi=1.16.0=py310h2bbff1b_1
|
||||||
|
- charset-normalizer=2.0.4=pyhd3eb1b0_0
|
||||||
|
- click=8.1.7=py310haa95532_0
|
||||||
|
- cloudpickle=2.2.1=py310haa95532_0
|
||||||
|
- colorama=0.4.6=py310haa95532_0
|
||||||
|
- contourpy=1.2.0=py310h59b6b97_0
|
||||||
|
- cryptography=41.0.3=py310h3438e0d_0
|
||||||
|
- cycler=0.11.0=pyhd3eb1b0_0
|
||||||
|
- docker-py=7.0.0=py310haa95532_0
|
||||||
|
- entrypoints=0.4=py310haa95532_0
|
||||||
|
- flask=2.2.5=py310haa95532_0
|
||||||
|
- flatbuffers=2.0.0=h6c2663c_0
|
||||||
|
- fonttools=4.51.0=py310h2bbff1b_0
|
||||||
|
- freetype=2.12.1=ha860e81_0
|
||||||
|
- frozenlist=1.4.0=py310h2bbff1b_0
|
||||||
|
- gast=0.4.0=pyhd3eb1b0_0
|
||||||
|
- gflags=2.2.2=hd77b12b_1
|
||||||
|
- giflib=5.2.1=h8cc25b3_3
|
||||||
|
- gitdb=4.0.7=pyhd3eb1b0_0
|
||||||
|
- gitpython=3.1.37=py310haa95532_0
|
||||||
|
- glog=0.5.0=hd77b12b_1
|
||||||
|
- google-auth=2.29.0=py310haa95532_0
|
||||||
|
- google-auth-oauthlib=0.4.4=pyhd3eb1b0_0
|
||||||
|
- google-pasta=0.2.0=pyhd3eb1b0_0
|
||||||
|
- graphene=3.3=py310haa95532_0
|
||||||
|
- graphql-core=3.2.3=py310haa95532_1
|
||||||
|
- graphql-relay=3.2.0=py310haa95532_0
|
||||||
|
- greenlet=3.0.1=py310hd77b12b_0
|
||||||
|
- grpc-cpp=1.48.2=hf108199_0
|
||||||
|
- grpcio=1.48.2=py310hf108199_0
|
||||||
|
- h5py=3.11.0=py310hed405ee_0
|
||||||
|
- hdf5=1.12.1=h51c971a_3
|
||||||
|
- icc_rt=2022.1.0=h6049295_2
|
||||||
|
- icu=58.2=ha925a31_3
|
||||||
|
- idna=3.7=py310haa95532_0
|
||||||
|
- importlib-metadata=7.0.1=py310haa95532_0
|
||||||
|
- intel-openmp=2023.1.0=h59b6b97_46320
|
||||||
|
- itsdangerous=2.0.1=pyhd3eb1b0_0
|
||||||
|
- jinja2=3.1.3=py310haa95532_0
|
||||||
|
- joblib=1.4.0=py310haa95532_0
|
||||||
|
- jpeg=9e=h2bbff1b_1
|
||||||
|
- keras=2.10.0=py310haa95532_0
|
||||||
|
- keras-preprocessing=1.1.2=pyhd3eb1b0_0
|
||||||
|
- kiwisolver=1.4.4=py310hd77b12b_0
|
||||||
|
- krb5=1.20.1=h5b6d351_1
|
||||||
|
- lcms2=2.12=h83e58a3_0
|
||||||
|
- lerc=3.0=hd77b12b_0
|
||||||
|
- libboost=1.82.0=h3399ecb_2
|
||||||
|
- libbrotlicommon=1.0.9=h2bbff1b_8
|
||||||
|
- libbrotlidec=1.0.9=h2bbff1b_8
|
||||||
|
- libbrotlienc=1.0.9=h2bbff1b_8
|
||||||
|
- libclang=14.0.6=default_hb5a9fac_1
|
||||||
|
- libclang13=14.0.6=default_h8e68704_1
|
||||||
|
- libcurl=8.7.1=h86230a5_0
|
||||||
|
- libdeflate=1.17=h2bbff1b_1
|
||||||
|
- libevent=2.1.12=hcc03200_0
|
||||||
|
- libffi=3.4.4=hd77b12b_1
|
||||||
|
- libpng=1.6.39=h8cc25b3_0
|
||||||
|
- libpq=12.15=hb652d5d_1
|
||||||
|
- libprotobuf=3.20.3=h23ce68f_0
|
||||||
|
- libssh2=1.10.0=hcd4344a_2
|
||||||
|
- libthrift=0.15.0=he49ee6e_2
|
||||||
|
- libtiff=4.5.1=hd77b12b_0
|
||||||
|
- libwebp-base=1.3.2=h2bbff1b_0
|
||||||
|
- lz4-c=1.9.4=h2bbff1b_1
|
||||||
|
- mako=1.2.3=py310haa95532_0
|
||||||
|
- markdown=3.4.1=py310haa95532_0
|
||||||
|
- markupsafe=2.1.3=py310h2bbff1b_0
|
||||||
|
- matplotlib=3.8.4=py310haa95532_0
|
||||||
|
- matplotlib-base=3.8.4=py310h4ed8f06_0
|
||||||
|
- mkl=2023.1.0=h6b88ed4_46358
|
||||||
|
- mkl-service=2.4.0=py310h2bbff1b_1
|
||||||
|
- mkl_fft=1.3.8=py310h2bbff1b_0
|
||||||
|
- mkl_random=1.2.4=py310h59b6b97_0
|
||||||
|
- mlflow=2.12.2=py310hd1fac3c_0
|
||||||
|
- multidict=6.0.4=py310h2bbff1b_0
|
||||||
|
- numexpr=2.8.7=py310h2cd9be0_0
|
||||||
|
- numpy=1.26.4=py310h055cbcc_0
|
||||||
|
- numpy-base=1.26.4=py310h65a83cf_0
|
||||||
|
- oauthlib=3.2.2=py310haa95532_0
|
||||||
|
- openjpeg=2.4.0=h4fc8c34_0
|
||||||
|
- openssl=1.1.1w=h2bbff1b_0
|
||||||
|
- opt_einsum=3.3.0=pyhd3eb1b0_1
|
||||||
|
- orc=1.7.4=h623e30f_1
|
||||||
|
- packaging=23.2=py310haa95532_0
|
||||||
|
- pandas=2.2.1=py310h5da7b33_0
|
||||||
|
- paramiko=2.8.1=pyhd3eb1b0_0
|
||||||
|
- pillow=10.3.0=py310h2bbff1b_0
|
||||||
|
- pip=24.0=py310haa95532_0
|
||||||
|
- ply=3.11=py310haa95532_0
|
||||||
|
- protobuf=3.20.3=py310hd77b12b_0
|
||||||
|
- pyarrow=11.0.0=py310h790e06d_1
|
||||||
|
- pyasn1=0.4.8=pyhd3eb1b0_0
|
||||||
|
- pyasn1-modules=0.2.8=py_0
|
||||||
|
- pybind11-abi=5=hd3eb1b0_0
|
||||||
|
- pycparser=2.21=pyhd3eb1b0_0
|
||||||
|
- pyjwt=2.8.0=py310haa95532_0
|
||||||
|
- pynacl=1.5.0=py310h8cc25b3_0
|
||||||
|
- pyopenssl=23.2.0=py310haa95532_0
|
||||||
|
- pyqt=5.15.10=py310hd77b12b_0
|
||||||
|
- pyqt5-sip=12.13.0=py310h2bbff1b_0
|
||||||
|
- pysocks=1.7.1=py310haa95532_0
|
||||||
|
- python=3.10.13=h966fe2a_0
|
||||||
|
- python-dateutil=2.9.0post0=py310haa95532_0
|
||||||
|
- python-flatbuffers=2.0=pyhd3eb1b0_0
|
||||||
|
- python-tzdata=2023.3=pyhd3eb1b0_0
|
||||||
|
- pytz=2024.1=py310haa95532_0
|
||||||
|
- pywin32=305=py310h2bbff1b_0
|
||||||
|
- pyyaml=6.0.1=py310h2bbff1b_0
|
||||||
|
- qt-main=5.15.2=h6072711_9
|
||||||
|
- querystring_parser=1.2.4=py310haa95532_0
|
||||||
|
- re2=2022.04.01=hd77b12b_0
|
||||||
|
- requests=2.31.0=py310haa95532_1
|
||||||
|
- requests-oauthlib=1.3.0=py_0
|
||||||
|
- rsa=4.7.2=pyhd3eb1b0_1
|
||||||
|
- scikit-learn=1.4.2=py310h4ed8f06_1
|
||||||
|
- scipy=1.13.0=py310h8640f81_0
|
||||||
|
- setuptools=69.5.1=py310haa95532_0
|
||||||
|
- sip=6.7.12=py310hd77b12b_0
|
||||||
|
- six=1.16.0=pyhd3eb1b0_1
|
||||||
|
- smmap=4.0.0=pyhd3eb1b0_0
|
||||||
|
- snappy=1.1.10=h6c2663c_1
|
||||||
|
- sqlalchemy=2.0.25=py310h2bbff1b_0
|
||||||
|
- sqlite=3.45.3=h2bbff1b_0
|
||||||
|
- sqlparse=0.4.4=py310haa95532_0
|
||||||
|
- tbb=2021.8.0=h59b6b97_0
|
||||||
|
- tensorboard=2.10.0=py310haa95532_0
|
||||||
|
- tensorboard-data-server=0.6.1=py310haa95532_0
|
||||||
|
- tensorboard-plugin-wit=1.8.1=py310haa95532_0
|
||||||
|
- tensorflow=2.10.0=mkl_py310hd99672f_0
|
||||||
|
- tensorflow-base=2.10.0=mkl_py310h6a7f48e_0
|
||||||
|
- tensorflow-estimator=2.10.0=py310haa95532_0
|
||||||
|
- termcolor=2.1.0=py310haa95532_0
|
||||||
|
- threadpoolctl=2.2.0=pyh0d69192_0
|
||||||
|
- tk=8.6.14=h0416ee5_0
|
||||||
|
- tornado=6.3.3=py310h2bbff1b_0
|
||||||
|
- typing-extensions=4.11.0=py310haa95532_0
|
||||||
|
- typing_extensions=4.11.0=py310haa95532_0
|
||||||
|
- tzdata=2024a=h04d1e81_0
|
||||||
|
- unicodedata2=15.1.0=py310h2bbff1b_0
|
||||||
|
- urllib3=2.2.1=py310haa95532_0
|
||||||
|
- utf8proc=2.6.1=h2bbff1b_1
|
||||||
|
- vc=14.2=h2eaa2aa_1
|
||||||
|
- vs2015_runtime=14.29.30133=h43f2093_3
|
||||||
|
- waitress=2.0.0=pyhd3eb1b0_0
|
||||||
|
- websocket-client=1.8.0=py310haa95532_0
|
||||||
|
- werkzeug=2.3.8=py310haa95532_0
|
||||||
|
- wheel=0.43.0=py310haa95532_0
|
||||||
|
- win_inet_pton=1.1.0=py310haa95532_0
|
||||||
|
- wrapt=1.14.1=py310h2bbff1b_0
|
||||||
|
- xz=5.4.6=h8cc25b3_1
|
||||||
|
- yaml=0.2.5=he774522_0
|
||||||
|
- yarl=1.9.3=py310h2bbff1b_0
|
||||||
|
- zipp=3.17.0=py310haa95532_0
|
||||||
|
- zlib=1.2.13=h8cc25b3_1
|
||||||
|
- zstd=1.5.5=hd43e919_2
|
||||||
|
prefix: C:\Users\Genos\miniconda3\envs\IUM
|
15
mlflow/Dockerfile
Normal file
15
mlflow/Dockerfile
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
FROM python:3.10
|
||||||
|
|
||||||
|
RUN pip install --upgrade pip
|
||||||
|
|
||||||
|
RUN pip3 install mlflow
|
||||||
|
RUN pip3 install scikit-learn
|
||||||
|
RUN pip3 install pandas
|
||||||
|
RUN pip3 install numpy
|
||||||
|
RUN pip3 install torch
|
||||||
|
|
||||||
|
COPY mlflow_model.py .
|
||||||
|
COPY mlflow_prediction.py .
|
||||||
|
COPY forest_test.csv .
|
||||||
|
COPY forest_train.csv .
|
||||||
|
COPY forest_val.csv .
|
13
mlflow/MLProject
Normal file
13
mlflow/MLProject
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
name: mlflow_464914
|
||||||
|
|
||||||
|
# conda_env: conda.yaml #ścieżka do pliku conda.yaml z definicją środowisk
|
||||||
|
docker_env:
|
||||||
|
image: mlflow_image
|
||||||
|
|
||||||
|
entry_points:
|
||||||
|
main:
|
||||||
|
parameters:
|
||||||
|
epochs: {type: int, default: 10}
|
||||||
|
command: "python mlflow_model.py {epochs}"
|
||||||
|
test:
|
||||||
|
command: "python mlflow_prediction.py"
|
120
mlflow/mlflow_model.py
Normal file
120
mlflow/mlflow_model.py
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.optim as optim
|
||||||
|
from torch.utils.data import DataLoader, Dataset
|
||||||
|
import pandas as pd
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.preprocessing import LabelEncoder
|
||||||
|
import torch.nn.functional as F
|
||||||
|
import mlflow
|
||||||
|
import mlflow.sklearn
|
||||||
|
import sys
|
||||||
|
|
||||||
|
mlflow.set_tracking_uri("http://localhost:5000")
|
||||||
|
mlflow.set_experiment("s464914")
|
||||||
|
|
||||||
|
|
||||||
|
device = (
|
||||||
|
"cuda"
|
||||||
|
if torch.cuda.is_available()
|
||||||
|
else "cpu"
|
||||||
|
)
|
||||||
|
|
||||||
|
class Model(nn.Module):
|
||||||
|
def __init__(self, input_features=54, hidden_layer1=25, hidden_layer2=30, output_features=8):
|
||||||
|
super().__init__()
|
||||||
|
self.fc1 = nn.Linear(input_features,output_features)
|
||||||
|
self.bn1 = nn.BatchNorm1d(hidden_layer1) # Add batch normalization
|
||||||
|
self.fc2 = nn.Linear(hidden_layer1, hidden_layer2)
|
||||||
|
self.bn2 = nn.BatchNorm1d(hidden_layer2) # Add batch normalization
|
||||||
|
self.out = nn.Linear(hidden_layer2, output_features)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = F.relu(self.fc1(x)) # Apply batch normalization after first linear layer
|
||||||
|
#x = F.relu(self.bn2(self.fc2(x))) # Apply batch normalization after second linear layer
|
||||||
|
#x = self.out(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
def main():
|
||||||
|
epochs = int(sys.argv[1])
|
||||||
|
forest_train = pd.read_csv('forest_train.csv')
|
||||||
|
forest_val = pd.read_csv('forest_val.csv')
|
||||||
|
|
||||||
|
print(forest_train.head())
|
||||||
|
|
||||||
|
|
||||||
|
X_train = forest_train.drop(columns=['Cover_Type']).values
|
||||||
|
y_train = forest_train['Cover_Type'].values
|
||||||
|
|
||||||
|
X_val = forest_val.drop(columns=['Cover_Type']).values
|
||||||
|
y_val = forest_val['Cover_Type'].values
|
||||||
|
|
||||||
|
|
||||||
|
# Initialize model, loss function, and optimizer
|
||||||
|
model = Model().to(device)
|
||||||
|
criterion = nn.CrossEntropyLoss()
|
||||||
|
optimizer = optim.Adam(model.parameters(), lr=0.001)
|
||||||
|
|
||||||
|
# Convert to PyTorch tensors
|
||||||
|
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
|
||||||
|
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
|
||||||
|
X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
|
||||||
|
y_val = torch.tensor(y_val, dtype=torch.long).to(device)
|
||||||
|
|
||||||
|
# Create DataLoader
|
||||||
|
train_loader = DataLoader(list(zip(X_train, y_train)), batch_size=64, shuffle=True)
|
||||||
|
val_loader = DataLoader(list(zip(X_val, y_val)), batch_size=64)
|
||||||
|
|
||||||
|
with mlflow.start_run() as run:
|
||||||
|
# Training loop
|
||||||
|
for epoch in range(epochs):
|
||||||
|
model.train() # Set model to training mode
|
||||||
|
running_loss = 0.0
|
||||||
|
for inputs, labels in train_loader:
|
||||||
|
inputs, labels = inputs.to(device), labels.to(device)
|
||||||
|
|
||||||
|
optimizer.zero_grad()
|
||||||
|
|
||||||
|
outputs = model(inputs)
|
||||||
|
loss = criterion(outputs, labels)
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
running_loss += loss.item() * inputs.size(0)
|
||||||
|
|
||||||
|
# Calculate training loss
|
||||||
|
epoch_loss = running_loss / len(train_loader.dataset)
|
||||||
|
|
||||||
|
# Validation
|
||||||
|
model.eval() # Set model to evaluation mode
|
||||||
|
val_running_loss = 0.0
|
||||||
|
correct = 0
|
||||||
|
total = 0
|
||||||
|
with torch.no_grad():
|
||||||
|
for inputs, labels in val_loader:
|
||||||
|
inputs, labels = inputs.to(device), labels.to(device)
|
||||||
|
|
||||||
|
outputs = model(inputs)
|
||||||
|
val_loss = criterion(outputs, labels)
|
||||||
|
val_running_loss += val_loss.item() * inputs.size(0)
|
||||||
|
|
||||||
|
_, predicted = torch.max(outputs, 1)
|
||||||
|
total += labels.size(0)
|
||||||
|
correct += (predicted == labels).sum().item()
|
||||||
|
|
||||||
|
# Calculate validation loss and accuracy
|
||||||
|
val_epoch_loss = val_running_loss / len(val_loader.dataset)
|
||||||
|
val_accuracy = correct / total
|
||||||
|
|
||||||
|
print(f"Epoch {epoch+1}/{epochs}, "
|
||||||
|
f"Train Loss: {epoch_loss:.4f}, "
|
||||||
|
f"Val Loss: {val_epoch_loss:.4f}, "
|
||||||
|
f"Val Accuracy: {val_accuracy:.4f}")
|
||||||
|
|
||||||
|
|
||||||
|
torch.save(model.state_dict(), 'model.pth')
|
||||||
|
mlflow.log_param("epochs", epochs)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
95
mlflow/mlflow_prediction.py
Normal file
95
mlflow/mlflow_prediction.py
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.optim as optim
|
||||||
|
from torch.utils.data import DataLoader, Dataset
|
||||||
|
import pandas as pd
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.preprocessing import LabelEncoder
|
||||||
|
import torch.nn.functional as F
|
||||||
|
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error
|
||||||
|
import numpy as np
|
||||||
|
import mlflow
|
||||||
|
import mlflow.sklearn
|
||||||
|
|
||||||
|
mlflow.set_tracking_uri("http://localhost:5000")
|
||||||
|
mlflow.set_experiment("s464914")
|
||||||
|
|
||||||
|
device = (
|
||||||
|
"cuda"
|
||||||
|
if torch.cuda.is_available()
|
||||||
|
else "cpu"
|
||||||
|
)
|
||||||
|
|
||||||
|
class Model(nn.Module):
|
||||||
|
def __init__(self, input_features=54, hidden_layer1=25, hidden_layer2=30, output_features=8):
|
||||||
|
super().__init__()
|
||||||
|
self.fc1 = nn.Linear(input_features,output_features)
|
||||||
|
self.bn1 = nn.BatchNorm1d(hidden_layer1) # Add batch normalization
|
||||||
|
self.fc2 = nn.Linear(hidden_layer1, hidden_layer2)
|
||||||
|
self.bn2 = nn.BatchNorm1d(hidden_layer2) # Add batch normalization
|
||||||
|
self.out = nn.Linear(hidden_layer2, output_features)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = F.relu(self.fc1(x))
|
||||||
|
return x
|
||||||
|
|
||||||
|
def load_model(model, model_path):
|
||||||
|
model.load_state_dict(torch.load(model_path))
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
def predict(model, input_data):
|
||||||
|
# Convert input data to PyTorch tensor
|
||||||
|
|
||||||
|
# Perform forward pass
|
||||||
|
with torch.no_grad():
|
||||||
|
output = model(input_data)
|
||||||
|
|
||||||
|
_, predicted_class = torch.max(output, 0)
|
||||||
|
|
||||||
|
return predicted_class.item() # Return the predicted class label
|
||||||
|
|
||||||
|
def main():
|
||||||
|
with mlflow.start_run() as run:
|
||||||
|
forest_test = pd.read_csv('forest_test.csv')
|
||||||
|
|
||||||
|
X_test = forest_test.drop(columns=['Cover_Type']).values
|
||||||
|
y_test = forest_test['Cover_Type'].values
|
||||||
|
|
||||||
|
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
|
||||||
|
|
||||||
|
model = Model().to(device)
|
||||||
|
model_path = 'model.pth' # Path to your saved model file
|
||||||
|
load_model(model, model_path)
|
||||||
|
|
||||||
|
predictions = []
|
||||||
|
true_labels = []
|
||||||
|
with torch.no_grad():
|
||||||
|
for input_data, target in zip(X_test, y_test):
|
||||||
|
output = model(input_data)
|
||||||
|
_, predicted_class = torch.max(output, 0)
|
||||||
|
prediction_entry = f"predicted: {predicted_class.item()} true_label: {target}"
|
||||||
|
predictions.append(prediction_entry)
|
||||||
|
true_labels.append()
|
||||||
|
if predicted_class.item() == target:
|
||||||
|
true_labels.append(target)
|
||||||
|
|
||||||
|
|
||||||
|
with open(r'predictions.txt', 'w') as fp:
|
||||||
|
for item in predictions:
|
||||||
|
# write each item on a new line
|
||||||
|
fp.write("%s\n" % item)
|
||||||
|
|
||||||
|
accuracy = accuracy_score(true_labels, predictions)
|
||||||
|
precision_micro = precision_score(true_labels, predictions, average='micro')
|
||||||
|
recall_micro = recall_score(true_labels, predictions, average='micro')
|
||||||
|
f1_micro = f1_score(true_labels, predictions, average='micro')
|
||||||
|
rmse = np.sqrt(mean_squared_error(true_labels, predictions))
|
||||||
|
|
||||||
|
mlflow.log_metric("accuracy", accuracy)
|
||||||
|
mlflow.log_metric("precision_micro", precision_micro)
|
||||||
|
mlflow.log_metric("recall_micro", recall_micro)
|
||||||
|
mlflow.log_metric("f1_micro", f1_micro)
|
||||||
|
mlflow.log_metric("rmse", rmse)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
5
model.py
5
model.py
@ -6,7 +6,6 @@ import pandas as pd
|
|||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
from sklearn.preprocessing import LabelEncoder
|
from sklearn.preprocessing import LabelEncoder
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
import sys
|
|
||||||
|
|
||||||
|
|
||||||
device = (
|
device = (
|
||||||
@ -31,9 +30,6 @@ class Model(nn.Module):
|
|||||||
return x
|
return x
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
epochs = int(sys.argv[1])
|
|
||||||
print(epochs)
|
|
||||||
|
|
||||||
forest_train = pd.read_csv('forest_train.csv')
|
forest_train = pd.read_csv('forest_train.csv')
|
||||||
forest_val = pd.read_csv('forest_val.csv')
|
forest_val = pd.read_csv('forest_val.csv')
|
||||||
|
|
||||||
@ -63,6 +59,7 @@ def main():
|
|||||||
val_loader = DataLoader(list(zip(X_val, y_val)), batch_size=64)
|
val_loader = DataLoader(list(zip(X_val, y_val)), batch_size=64)
|
||||||
|
|
||||||
# Training loop
|
# Training loop
|
||||||
|
epochs = 10
|
||||||
for epoch in range(epochs):
|
for epoch in range(epochs):
|
||||||
model.train() # Set model to training mode
|
model.train() # Set model to training mode
|
||||||
running_loss = 0.0
|
running_loss = 0.0
|
||||||
|
126
sacred_model.py
Normal file
126
sacred_model.py
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.optim as optim
|
||||||
|
from torch.utils.data import DataLoader, Dataset
|
||||||
|
import pandas as pd
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.preprocessing import LabelEncoder
|
||||||
|
import torch.nn.functional as F
|
||||||
|
from sacred import Experiment
|
||||||
|
from sacred.observers import FileStorageObserver, MongoObserver
|
||||||
|
|
||||||
|
|
||||||
|
device = (
|
||||||
|
"cuda"
|
||||||
|
if torch.cuda.is_available()
|
||||||
|
else "cpu"
|
||||||
|
)
|
||||||
|
|
||||||
|
ex = Experiment("464914", interactive=True, save_git_info=False)
|
||||||
|
ex.observers.append(FileStorageObserver('experiments'))
|
||||||
|
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017',
|
||||||
|
db_name='sacred'))
|
||||||
|
|
||||||
|
class Model(nn.Module):
|
||||||
|
def __init__(self, input_features=54, hidden_layer1=25, hidden_layer2=30, output_features=8):
|
||||||
|
super().__init__()
|
||||||
|
self.fc1 = nn.Linear(input_features,output_features)
|
||||||
|
self.bn1 = nn.BatchNorm1d(hidden_layer1) # Add batch normalization
|
||||||
|
self.fc2 = nn.Linear(hidden_layer1, hidden_layer2)
|
||||||
|
self.bn2 = nn.BatchNorm1d(hidden_layer2) # Add batch normalization
|
||||||
|
self.out = nn.Linear(hidden_layer2, output_features)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = F.relu(self.fc1(x)) # Apply batch normalization after first linear layer
|
||||||
|
#x = F.relu(self.bn2(self.fc2(x))) # Apply batch normalization after second linear layer
|
||||||
|
#x = self.out(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
@ex.capture
|
||||||
|
def capture_params(epochs):
|
||||||
|
print(f"epochs: {epochs}")
|
||||||
|
|
||||||
|
@ex.main
|
||||||
|
def main(_run):
|
||||||
|
forest_train_ex = ex.open_resource('forest_train.csv')
|
||||||
|
forest_val_ex = ex.open_resource('forest_val.csv')
|
||||||
|
|
||||||
|
forest_val = pd.read_csv('forest_val.csv')
|
||||||
|
forest_train = pd.read_csv('forest_train.csv')
|
||||||
|
|
||||||
|
X_train = forest_train.drop(columns=['Cover_Type']).values
|
||||||
|
y_train = forest_train['Cover_Type'].values
|
||||||
|
|
||||||
|
X_val = forest_val.drop(columns=['Cover_Type']).values
|
||||||
|
y_val = forest_val['Cover_Type'].values
|
||||||
|
|
||||||
|
|
||||||
|
# Initialize model, loss function, and optimizer
|
||||||
|
model = Model().to(device)
|
||||||
|
criterion = nn.CrossEntropyLoss()
|
||||||
|
optimizer = optim.Adam(model.parameters(), lr=0.001)
|
||||||
|
|
||||||
|
# Convert to PyTorch tensors
|
||||||
|
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
|
||||||
|
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
|
||||||
|
X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
|
||||||
|
y_val = torch.tensor(y_val, dtype=torch.long).to(device)
|
||||||
|
|
||||||
|
# Create DataLoader
|
||||||
|
train_loader = DataLoader(list(zip(X_train, y_train)), batch_size=64, shuffle=True)
|
||||||
|
val_loader = DataLoader(list(zip(X_val, y_val)), batch_size=64)
|
||||||
|
|
||||||
|
# Training loop
|
||||||
|
epochs = 10
|
||||||
|
for epoch in range(epochs):
|
||||||
|
model.train() # Set model to training mode
|
||||||
|
running_loss = 0.0
|
||||||
|
for inputs, labels in train_loader:
|
||||||
|
inputs, labels = inputs.to(device), labels.to(device)
|
||||||
|
|
||||||
|
optimizer.zero_grad()
|
||||||
|
|
||||||
|
outputs = model(inputs)
|
||||||
|
loss = criterion(outputs, labels)
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
|
||||||
|
running_loss += loss.item() * inputs.size(0)
|
||||||
|
|
||||||
|
# Calculate training loss
|
||||||
|
epoch_loss = running_loss / len(train_loader.dataset)
|
||||||
|
|
||||||
|
# Validation
|
||||||
|
model.eval() # Set model to evaluation mode
|
||||||
|
val_running_loss = 0.0
|
||||||
|
correct = 0
|
||||||
|
total = 0
|
||||||
|
with torch.no_grad():
|
||||||
|
for inputs, labels in val_loader:
|
||||||
|
inputs, labels = inputs.to(device), labels.to(device)
|
||||||
|
|
||||||
|
outputs = model(inputs)
|
||||||
|
val_loss = criterion(outputs, labels)
|
||||||
|
val_running_loss += val_loss.item() * inputs.size(0)
|
||||||
|
|
||||||
|
_, predicted = torch.max(outputs, 1)
|
||||||
|
total += labels.size(0)
|
||||||
|
correct += (predicted == labels).sum().item()
|
||||||
|
|
||||||
|
# Calculate validation loss and accuracy
|
||||||
|
val_epoch_loss = val_running_loss / len(val_loader.dataset)
|
||||||
|
val_accuracy = correct / total
|
||||||
|
|
||||||
|
print(f"Epoch {epoch+1}/{epochs}, "
|
||||||
|
f"Train Loss: {epoch_loss:.4f}, "
|
||||||
|
f"Val Loss: {val_epoch_loss:.4f}, "
|
||||||
|
f"Val Accuracy: {val_accuracy:.4f}")
|
||||||
|
_run.log_scalar("train loss", epoch_loss)
|
||||||
|
_run.log_scalar("val loss", val_epoch_loss)
|
||||||
|
|
||||||
|
|
||||||
|
capture_params(epochs)
|
||||||
|
torch.save(model.state_dict(), 'model.pth')
|
||||||
|
ex.add_artifact("model.pth")
|
||||||
|
|
||||||
|
ex.run()
|
5
sacredboard/Dockerfile
Normal file
5
sacredboard/Dockerfile
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
FROM python:3.6-jessie
|
||||||
|
|
||||||
|
RUN pip install https://github.com/chovanecm/sacredboard/archive/develop.zip
|
||||||
|
|
||||||
|
ENTRYPOINT sacredboard -mu mongodb://$MONGO_INITDB_ROOT_USERNAME:$MONGO_INITDB_ROOT_PASSWORD@mongo:27017/?authMechanism=SCRAM-SHA-1 $MONGO_DATABASE
|
Loading…
Reference in New Issue
Block a user