Compare commits

..

17 Commits

Author SHA1 Message Date
AWieczarek
e4a90fd187 IUM_12 2024-06-11 23:09:14 +02:00
AWieczarek
490b8cf773 IUM_12 2024-06-11 22:50:11 +02:00
AWieczarek
42f90164aa IUM_7 2024-06-11 20:01:27 +02:00
AWieczarek
d7be18b9f6 IUM_7 2024-06-11 19:55:29 +02:00
AWieczarek
3e896933bc IUM_7 2024-06-11 19:50:12 +02:00
AWieczarek
999c18f3d9 IUM_7 2024-06-11 19:45:02 +02:00
AWieczarek
25619cb914 IUM_7 2024-06-11 19:44:30 +02:00
AWieczarek
0916bfc576 IUM_7 2024-06-11 19:35:20 +02:00
AWieczarek
bedf46af4b IUM_7 2024-06-11 19:34:37 +02:00
AWieczarek
88ab8d9d4d IUM_07 2024-06-11 19:30:58 +02:00
AWieczarek
e9f53be954 IUM_10 2024-05-28 18:36:32 +02:00
AWieczarek
b8ecd36d1a IUM_10 2024-05-28 18:06:00 +02:00
AWieczarek
7773a934ce IUM_09 2024-05-20 18:33:07 +02:00
AWieczarek
d20076a6b5 IUM_08 2024-05-20 18:20:56 +02:00
AWieczarek
d739f275e8 IUM_08 2024-05-13 20:25:54 +02:00
0dbf6f1959 Update IUM_02.py 2024-05-06 19:41:47 +02:00
AWieczarek
59f3e55786 IUM_06 2024-05-05 19:23:41 +02:00
80 changed files with 2755527 additions and 22 deletions

3
.dvc/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
/config.local
/tmp
/cache

4
.dvc/config Normal file
View File

@ -0,0 +1,4 @@
[core]
remote = ium_ssh_remote
['remote "ium_ssh_remote"']
url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl

3
.dvcignore Normal file
View File

@ -0,0 +1,3 @@
# Add patterns of files dvc should ignore, which could improve
# the performance. Learn more at
# https://dvc.org/doc/user-guide/dvcignore

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
/beer_reviews_train.csv
/beer_reviews_test.csv
/beer_review_sentiment_model.h5
/beer_review_sentiment_predictions.csv

View File

@ -8,9 +8,9 @@ ARG DEBIAN_FRONTEND=noninteractive
ENV TZ=Etc/UTC
RUN apt update && \
apt install -y python3 python3-pip unzip
apt install -y python3 python3-pip unzip git
RUN pip install kaggle pandas seaborn scikit-learn tensorflow
RUN pip install kaggle pandas seaborn scikit-learn tensorflow sacred pymongo --break-system-packages
WORKDIR /app

View File

@ -22,6 +22,7 @@ api.dataset_download_files('thedevastator/1-5-million-beer-reviews-from-beer-adv
#
# get_ipython().system('kaggle datasets download -d thedevastator/1-5-million-beer-reviews-from-beer-advocate')
#
#Change
#
# # In[ ]:
#

View File

@ -1,18 +1,18 @@
import pandas as pd
import numpy as np
import tensorflow as tf
test_data = pd.read_csv('./beer_reviews_test.csv')
X_test = test_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
y_test = test_data['review_overall']
model = tf.keras.models.load_model('beer_review_sentiment_model.h5')
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000)
predictions = model.predict(X_test)
print(f'Predictions shape: {predictions.shape}')
X_test_seq = tokenizer.texts_to_sequences(X_test)
X_test_pad = tf.keras.preprocessing.sequence.pad_sequences(X_test_seq, maxlen=100)
if len(predictions.shape) > 1:
predictions = predictions[:, 0]
predictions = model.predict(X_test_pad)
np.savetxt('beer_review_sentiment_predictions.csv', predictions, delimiter=',', fmt='%.10f')
results = pd.DataFrame({'Predictions': predictions, 'Actual': y_test})
results.to_csv('beer_review_sentiment_predictions.csv', index=False)

View File

@ -1,7 +1,7 @@
import pandas as pd
from sklearn.model_selection import train_test_split
data = pd.read_csv('./beer_reviews.csv')
data = pd.read_csv('beer_reviews.csv')
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

56
Jenkinsfile vendored
View File

@ -1,25 +1,36 @@
pipeline {
agent any
triggers {
upstream(upstreamProjects: 'z-s464979-create-dataset', threshold: hudson.model.Result.SUCCESS)
}
parameters {
string(name: 'EPOCHS', defaultValue: '40', description: 'Number of epochs')
string(name: 'BATCH_SIZE', defaultValue: '32', description: 'Batch size')
buildSelector(defaultSelector: lastSuccessful(), description: 'Which build to use for copying artifacts', name: 'BUILD_SELECTOR')
string(name: 'CUTOFF', defaultValue: '10000', description: 'Liczba wierszy do obcięcia ze zbioru danych')
string(name: 'KAGGLE_USERNAME', defaultValue: '', description: 'Kaggle username')
password(name: 'KAGGLE_KEY', defaultValue: '', description: 'Kaggle API key')
}
stages {
stage('Clone Repository') {
steps {
git branch: 'training', url: "https://git.wmi.amu.edu.pl/s464979/ium_464979.git"
git url: "https://git.wmi.amu.edu.pl/s464979/ium_464979"
}
}
stage('Copy Artifacts') {
stage('Download dataset') {
steps {
copyArtifacts filter: 'beer_reviews.csv,beer_reviews_train.csv,beer_reviews_test.csv', projectName: 'z-s464979-create-dataset', selector: buildParameter('BUILD_SELECTOR')
withEnv(["KAGGLE_USERNAME=${env.KAGGLE_USERNAME}", "KAGGLE_KEY=${env.KAGGLE_KEY}"]) {
sh "kaggle datasets download -d thedevastator/1-5-million-beer-reviews-from-beer-advocate --unzip"
}
}
}
stage('Process and Split Dataset') {
agent {
dockerfile {
filename 'Dockerfile'
reuseNode true
}
}
steps {
sh "chmod +x ./IUM_05-split.py"
sh "python3 ./IUM_05-split.py"
archiveArtifacts artifacts: 'beer_reviews.csv,beer_reviews_train.csv,beer_reviews_test.csv', onlyIfSuccessful: true
}
}
stage("Run") {
@ -31,9 +42,30 @@ pipeline {
}
steps {
sh "chmod +x ./IUM_05-model.py"
sh "python3 ./IUM_05-model.py ${params.EPOCHS} ${params.BATCH_SIZE}"
archiveArtifacts artifacts: 'beer_review_sentiment_model.h5', onlyIfSuccessful: true
sh "chmod +x ./IUM_05-predict.py"
sh "python3 ./IUM_05-model.py 10 32"
sh "python3 ./IUM_05-predict.py"
archiveArtifacts artifacts: 'beer_review_sentiment_model.h5,beer_review_sentiment_predictions.csv', onlyIfSuccessful: true
}
}
stage('Sacred') {
agent {
dockerfile {
filename 'Dockerfile'
reuseNode true
}
}
steps {
sh 'chmod +x sacred/sacred_training_model.py'
sh 'python3 sacred/sacred_training_model.py'
}
}
stage('Archive Artifacts from Experiments') {
steps {
archiveArtifacts artifacts: 'sacred_runs/**/*.*', onlyIfSuccessful: true
}
}
}
}

Binary file not shown.

1
data/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/beer_reviews.csv

View File

@ -0,0 +1,5 @@
outs:
- md5: 50f6eec0d0fe78bc0f10e35edd271998
size: 201644905
hash: md5
path: beer_reviews.csv

46
dvc.lock Normal file
View File

@ -0,0 +1,46 @@
schema: '2.0'
stages:
split_data:
cmd: python IUM_05-split.py
deps:
- path: data/beer_reviews.csv
hash: md5
md5: 50f6eec0d0fe78bc0f10e35edd271998
size: 201644905
outs:
- path: beer_reviews_test.csv
hash: md5
md5: edbd0a7f05c59a0c0e936917f60e9b96
size: 40632354
- path: beer_reviews_train.csv
hash: md5
md5: 8c6877a26fef1542369bfae6b39d163c
size: 162599343
train_model:
cmd: python IUM_05-model.py 10 32
deps:
- path: beer_reviews_train.csv
hash: md5
md5: 8c6877a26fef1542369bfae6b39d163c
size: 162599343
outs:
- path: beer_review_sentiment_model.h5
hash: md5
md5: c126bd5d332a905262c66894585450e3
size: 1950856
predict:
cmd: python IUM_05-predict.py
deps:
- path: beer_review_sentiment_model.h5
hash: md5
md5: c126bd5d332a905262c66894585450e3
size: 1950856
- path: beer_reviews_test.csv
hash: md5
md5: edbd0a7f05c59a0c0e936917f60e9b96
size: 40632354
outs:
- path: beer_review_sentiment_predictions.csv
hash: md5
md5: 12a66fafb7f4d7d19eb0c4a90cc7d3ad
size: 4814242

23
dvc.yaml Normal file
View File

@ -0,0 +1,23 @@
stages:
split_data:
cmd: python IUM_05-split.py
deps:
- data/beer_reviews.csv
outs:
- beer_reviews_train.csv
- beer_reviews_test.csv
train_model:
cmd: python IUM_05-model.py 10 32
deps:
- beer_reviews_train.csv
outs:
- beer_review_sentiment_model.h5
predict:
cmd: python IUM_05-predict.py
deps:
- beer_review_sentiment_model.h5
- beer_reviews_test.csv
outs:
- beer_review_sentiment_predictions.csv

338
environment.yml Normal file
View File

@ -0,0 +1,338 @@
name: uczenie_glebokie
channels:
- conda-forge
- defaults
dependencies:
- _tflow_select=2.3.0=mkl
- abseil-cpp=20211102.0=h36ffca9_3
- absl-py=2.1.0=pyhd8ed1ab_0
- aiohttp=3.9.3=py310h8d17308_1
- aiosignal=1.3.1=pyhd8ed1ab_0
- alembic=1.13.1=pyhd8ed1ab_1
- aniso8601=9.0.1=pyhd8ed1ab_0
- anyio=4.3.0=pyhd8ed1ab_0
- aom=3.6.0=hd77b12b_0
- argon2-cffi=23.1.0=pyhd8ed1ab_0
- argon2-cffi-bindings=21.2.0=py310h8d17308_4
- arrow=1.3.0=pyhd8ed1ab_0
- arrow-cpp=11.0.0=h2c9b28c_2
- asttokens=2.4.1=pyhd8ed1ab_0
- astunparse=1.6.3=pyhd8ed1ab_0
- async-lru=2.0.4=pyhd8ed1ab_0
- async-timeout=4.0.3=pyhd8ed1ab_0
- attrs=23.2.0=pyh71513ae_0
- aws-c-common=0.4.57=ha925a31_1
- aws-c-event-stream=0.1.6=h7915e17_3
- aws-checksums=0.1.9=hb01e4cc_0
- aws-sdk-cpp=1.8.185=hd77b12b_0
- babel=2.14.0=pyhd8ed1ab_0
- bcrypt=4.1.3=py310hc226416_0
- beautifulsoup4=4.12.3=pyha770c72_0
- blas=1.0=mkl
- bleach=6.1.0=pyhd8ed1ab_0
- blinker=1.7.0=pyhd8ed1ab_0
- blosc=1.21.5=hdccc3a2_0
- boost-cpp=1.84.0=h6f18f0d_2
- bottleneck=1.3.8=py310h3e78b6c_0
- brotli=1.0.9=h2bbff1b_7
- brotli-bin=1.0.9=h2bbff1b_7
- brotli-python=1.0.9=py310h00ffb61_8
- bzip2=1.0.8=hcfcfb64_5
- c-ares=1.28.1=hcfcfb64_0
- ca-certificates=2024.2.2=h56e8100_0
- cached-property=1.5.2=hd8ed1ab_1
- cached_property=1.5.2=pyha770c72_1
- cachetools=5.3.3=pyhd8ed1ab_0
- certifi=2024.2.2=pyhd8ed1ab_0
- cffi=1.16.0=py310h8d17308_0
- cfitsio=3.470=h2bbff1b_7
- charls=2.2.0=h6c2663c_0
- charset-normalizer=3.3.2=pyhd8ed1ab_0
- click=8.1.7=win_pyh7428d3b_0
- cloudpickle=3.0.0=pyhd8ed1ab_0
- colorama=0.4.6=pyhd8ed1ab_0
- comm=0.2.2=pyhd8ed1ab_0
- contourpy=1.2.1=py310h232114e_0
- cryptography=41.0.3=py310h3438e0d_0
- cycler=0.12.1=pyhd8ed1ab_0
- dav1d=1.2.1=hcfcfb64_0
- debugpy=1.8.1=py310h00ffb61_0
- decorator=5.1.1=pyhd8ed1ab_0
- defusedxml=0.7.1=pyhd8ed1ab_0
- docker-py=7.0.0=pyhd8ed1ab_0
- eigen=3.4.0=h91493d7_0
- entrypoints=0.4=pyhd8ed1ab_0
- exceptiongroup=1.2.0=pyhd8ed1ab_2
- executing=2.0.1=pyhd8ed1ab_0
- ffmpeg=4.2.3=ha925a31_0
- flask=3.0.3=pyhd8ed1ab_0
- flatbuffers=24.3.25=h63175ca_0
- fonttools=4.51.0=py310h8d17308_0
- fqdn=1.5.1=pyhd8ed1ab_0
- freetype=2.12.1=hdaf720e_2
- frozenlist=1.4.1=py310h8d17308_0
- gast=0.4.0=pyh9f0ad1d_0
- gflags=2.2.2=ha925a31_1004
- giflib=5.2.1=h64bf75a_3
- gitdb=4.0.11=pyhd8ed1ab_0
- gitpython=3.1.43=pyhd8ed1ab_0
- glib=2.80.0=h39d0aa6_3
- glib-tools=2.80.0=h0a98069_3
- glog=0.5.0=h4797de2_0
- google-auth=2.29.0=pyhca7485f_0
- google-auth-oauthlib=0.4.1=py_2
- google-pasta=0.2.0=pyh8c360ce_0
- graphene=3.3=pyhd8ed1ab_0
- graphql-core=3.2.3=pyhd8ed1ab_0
- graphql-relay=3.2.0=pyhd8ed1ab_0
- greenlet=3.0.3=py310h00ffb61_0
- grpc-cpp=1.48.2=hf108199_0
- grpcio=1.42.0=py310hc60d5dd_0
- gst-plugins-base=1.18.5=h9e645db_0
- gstreamer=1.18.5=hd78058f_0
- h11=0.14.0=pyhd8ed1ab_0
- h2=4.1.0=pyhd8ed1ab_0
- h5py=3.7.0=nompi_py310h00cbb18_100
- hdf5=1.12.1=nompi_h2a0e4a3_104
- hpack=4.0.0=pyh9f0ad1d_0
- httpcore=1.0.5=pyhd8ed1ab_0
- httpx=0.27.0=pyhd8ed1ab_0
- hyperframe=6.0.1=pyhd8ed1ab_0
- icu=58.2=ha925a31_3
- idna=3.6=pyhd8ed1ab_0
- imagecodecs=2023.1.23=py310h6c6a46e_0
- imageio=2.34.0=pyh4b66e23_0
- importlib-metadata=7.1.0=pyha770c72_0
- importlib_metadata=7.1.0=hd8ed1ab_0
- importlib_resources=6.4.0=pyhd8ed1ab_0
- intel-openmp=2023.1.0=h59b6b97_46320
- ipykernel=6.29.3=pyha63f2e9_0
- ipython=8.22.2=pyh7428d3b_0
- ipywidgets=8.1.2=pyhd8ed1ab_0
- isoduration=20.11.0=pyhd8ed1ab_0
- itsdangerous=2.2.0=pyhd8ed1ab_0
- jedi=0.19.1=pyhd8ed1ab_0
- jinja2=3.1.3=pyhd8ed1ab_0
- joblib=1.3.2=pyhd8ed1ab_0
- jpeg=9e=hcfcfb64_3
- json5=0.9.24=pyhd8ed1ab_0
- jsonpointer=2.4=py310h5588dad_3
- jsonschema=4.21.1=pyhd8ed1ab_0
- jsonschema-specifications=2023.12.1=pyhd8ed1ab_0
- jsonschema-with-format-nongpl=4.21.1=pyhd8ed1ab_0
- jupyter=1.0.0=py310haa95532_9
- jupyter-lsp=2.2.4=pyhd8ed1ab_0
- jupyter_client=8.6.1=pyhd8ed1ab_0
- jupyter_console=6.6.3=pyhd8ed1ab_0
- jupyter_core=5.7.2=py310h5588dad_0
- jupyter_events=0.10.0=pyhd8ed1ab_0
- jupyter_server=2.13.0=pyhd8ed1ab_0
- jupyter_server_terminals=0.5.3=pyhd8ed1ab_0
- jupyterlab=4.1.5=pyhd8ed1ab_0
- jupyterlab_pygments=0.3.0=pyhd8ed1ab_1
- jupyterlab_server=2.25.4=pyhd8ed1ab_0
- jupyterlab_widgets=3.0.10=pyhd8ed1ab_0
- keras=2.10.0=py310haa95532_0
- keras-preprocessing=1.1.2=pyhd8ed1ab_0
- kiwisolver=1.4.5=py310h232114e_1
- lazy_loader=0.4=pyhd8ed1ab_0
- lcms2=2.12=h83e58a3_0
- lerc=3.0=hd77b12b_0
- libabseil-static=20211102.0=cxx11_h58a5ce6_3
- libaec=1.1.3=h63175ca_0
- libavif=0.11.1=h2bbff1b_0
- libblas=3.9.0=20_win64_mkl
- libboost=1.84.0=hcc118f5_2
- libboost-devel=1.84.0=h91493d7_2
- libboost-headers=1.84.0=h57928b3_2
- libbrotlicommon=1.0.9=h2bbff1b_7
- libbrotlidec=1.0.9=h2bbff1b_7
- libbrotlienc=1.0.9=h2bbff1b_7
- libcblas=3.9.0=20_win64_mkl
- libclang=12.0.0=default_h627e005_2
- libcurl=8.5.0=h86230a5_0
- libdeflate=1.17=h2bbff1b_1
- libffi=3.4.2=h8ffe710_5
- libglib=2.80.0=h39d0aa6_3
- libiconv=1.17=hcfcfb64_2
- libintl=0.22.5=h5728263_2
- libintl-devel=0.22.5=h5728263_2
- liblapack=3.9.0=20_win64_mkl
- libogg=1.3.4=h8ffe710_1
- libopencv=4.6.0=haa95532_5
- libpng=1.6.43=h19919ed_0
- libprotobuf=3.20.3=h12be248_0
- libsodium=1.0.18=h8d14728_1
- libsqlite=3.45.2=hcfcfb64_0
- libssh2=1.10.0=hcd4344a_2
- libthrift=0.15.0=h636ae23_1
- libtiff=4.5.1=hd77b12b_0
- libvorbis=1.3.7=h0e60522_0
- libwebp=1.3.2=hcfcfb64_1
- libwebp-base=1.3.2=hcfcfb64_0
- libxml2=2.10.4=h0ad7f3c_1
- libxslt=1.1.37=h2bbff1b_1
- libzlib=1.2.13=hcfcfb64_5
- libzopfli=1.0.3=h0e60522_0
- lz4-c=1.9.4=hcfcfb64_0
- mako=1.3.5=pyhd8ed1ab_0
- markdown=3.6=pyhd8ed1ab_0
- markupsafe=2.1.5=py310h8d17308_0
- matplotlib-base=3.8.3=py310hc9baf74_0
- matplotlib-inline=0.1.6=pyhd8ed1ab_0
- mistune=3.0.2=pyhd8ed1ab_0
- mkl=2023.2.0=h6a75c08_50497
- mkl-service=2.4.1=py310h49a50da_0
- mkl_fft=1.3.8=py310h042f14a_1
- mkl_random=1.2.5=py310hd199dba_1
- mlflow=2.12.2=h5588dad_0
- mlflow-skinny=2.12.2=py310h5588dad_0
- mlflow-ui=2.12.2=py310h5588dad_0
- multidict=6.0.5=py310h8d17308_0
- munkres=1.1.4=pyh9f0ad1d_0
- nbclient=0.10.0=pyhd8ed1ab_0
- nbconvert=7.16.3=hd8ed1ab_0
- nbconvert-core=7.16.3=pyhd8ed1ab_0
- nbconvert-pandoc=7.16.3=hd8ed1ab_0
- nbformat=5.10.4=pyhd8ed1ab_0
- nest-asyncio=1.6.0=pyhd8ed1ab_0
- networkx=3.3=pyhd8ed1ab_1
- notebook=7.1.2=pyhd8ed1ab_0
- notebook-shim=0.2.4=pyhd8ed1ab_0
- numexpr=2.9.0=mkl_py310hc26a618_0
- numpy=1.24.3=py310h055cbcc_1
- numpy-base=1.24.3=py310h65a83cf_1
- oauthlib=3.2.2=pyhd8ed1ab_0
- opencv=4.6.0=py310ha36de5b_5
- openjpeg=2.4.0=h4fc8c34_0
- openssl=1.1.1w=hcfcfb64_0
- opt_einsum=3.3.0=pyhc1e730c_2
- orc=1.7.4=h623e30f_1
- overrides=7.7.0=pyhd8ed1ab_0
- packaging=24.0=pyhd8ed1ab_0
- pandas=2.2.1=py310h5da7b33_0
- pandoc=3.1.13=h57928b3_0
- pandocfilters=1.5.0=pyhd8ed1ab_0
- paramiko=3.4.0=pyhd8ed1ab_0
- parso=0.8.4=pyhd8ed1ab_0
- pcre2=10.43=h17e33f8_0
- pickleshare=0.7.5=py_1003
- pillow=10.2.0=py310h2bbff1b_0
- pip=24.0=pyhd8ed1ab_0
- pkgutil-resolve-name=1.3.10=pyhd8ed1ab_1
- platformdirs=4.2.0=pyhd8ed1ab_0
- ply=3.11=pyhd8ed1ab_2
- prometheus_client=0.20.0=pyhd8ed1ab_0
- prometheus_flask_exporter=0.23.0=pyhd8ed1ab_0
- prompt-toolkit=3.0.42=pyha770c72_0
- prompt_toolkit=3.0.42=hd8ed1ab_0
- protobuf=3.20.3=py310h5588dad_1
- psutil=5.9.8=py310h8d17308_0
- pure_eval=0.2.2=pyhd8ed1ab_0
- py-opencv=4.6.0=haa95532_5
- pyarrow=11.0.0=py310h790e06d_1
- pyasn1=0.5.1=pyhd8ed1ab_0
- pyasn1-modules=0.3.0=pyhd8ed1ab_0
- pycparser=2.22=pyhd8ed1ab_0
- pygments=2.17.2=pyhd8ed1ab_0
- pyjwt=2.8.0=pyhd8ed1ab_1
- pynacl=1.5.0=py310h635b8f1_3
- pyopenssl=23.2.0=pyhd8ed1ab_1
- pyparsing=3.1.2=pyhd8ed1ab_0
- pyqt=5.15.10=py310hd77b12b_0
- pyqt5-sip=12.13.0=py310h2bbff1b_0
- pysocks=1.7.1=pyh0701188_6
- python=3.10.13=h966fe2a_0
- python-dateutil=2.9.0=pyhd8ed1ab_0
- python-fastjsonschema=2.19.1=pyhd8ed1ab_0
- python-flatbuffers=24.3.25=pyh59ac667_0
- python-json-logger=2.0.7=pyhd8ed1ab_0
- python-tzdata=2024.1=pyhd8ed1ab_0
- python_abi=3.10=2_cp310
- pytz=2024.1=pyhd8ed1ab_0
- pyu2f=0.1.5=pyhd8ed1ab_0
- pywin32=306=py310h00ffb61_2
- pywin32-on-windows=0.1.0=pyh07e9846_2
- pywinpty=2.0.13=py310h00ffb61_0
- pyyaml=6.0.1=py310h8d17308_1
- pyzmq=25.1.2=py310h2849c00_0
- qt-main=5.15.2=he8e5bd7_7
- qt-webengine=5.15.9=h5bd16bc_7
- qtconsole=5.5.1=pyhd8ed1ab_0
- qtconsole-base=5.5.1=pyha770c72_0
- qtpy=2.4.1=pyhd8ed1ab_0
- qtwebkit=5.212=h2bbfb41_5
- querystring_parser=1.2.4=py_0
- re2=2022.04.01=h0e60522_0
- referencing=0.34.0=pyhd8ed1ab_0
- requests=2.31.0=pyhd8ed1ab_0
- requests-oauthlib=2.0.0=pyhd8ed1ab_0
- rfc3339-validator=0.1.4=pyhd8ed1ab_0
- rfc3986-validator=0.1.1=pyh9f0ad1d_0
- rpds-py=0.18.0=py310h87d50f1_0
- rsa=4.9=pyhd8ed1ab_0
- scikit-image=0.22.0=py310h25bd2df_0
- scikit-learn=1.3.0=py310h4ed8f06_1
- scipy=1.13.0=py310hf667824_0
- seaborn=0.12.2=py310haa95532_0
- send2trash=1.8.2=pyh08f2357_0
- setuptools=69.2.0=pyhd8ed1ab_0
- sip=6.7.12=py310h00ffb61_0
- six=1.16.0=pyh6c4a22f_0
- smmap=5.0.0=pyhd8ed1ab_0
- snappy=1.1.10=hfb803bf_0
- sniffio=1.3.1=pyhd8ed1ab_0
- soupsieve=2.5=pyhd8ed1ab_1
- sqlalchemy=2.0.30=py310ha8f682b_0
- sqlite=3.45.2=hcfcfb64_0
- sqlparse=0.5.0=pyhd8ed1ab_0
- stack_data=0.6.2=pyhd8ed1ab_0
- tbb=2021.8.0=h59b6b97_0
- tensorboard=2.10.0=py310haa95532_0
- tensorboard-data-server=0.6.1=py310haa95532_0
- tensorboard-plugin-wit=1.8.1=pyhd8ed1ab_0
- tensorflow=2.10.0=mkl_py310hd99672f_0
- tensorflow-base=2.10.0=mkl_py310h6a7f48e_0
- tensorflow-estimator=2.10.0=py310haa95532_0
- termcolor=2.4.0=pyhd8ed1ab_0
- terminado=0.18.1=pyh5737063_0
- threadpoolctl=3.4.0=pyhc1e730c_0
- tifffile=2023.2.28=pyhd8ed1ab_0
- tinycss2=1.2.1=pyhd8ed1ab_0
- tk=8.6.13=h5226925_1
- tomli=2.0.1=pyhd8ed1ab_0
- tornado=6.4=py310h8d17308_0
- traitlets=5.14.2=pyhd8ed1ab_0
- types-python-dateutil=2.9.0.20240316=pyhd8ed1ab_0
- typing-extensions=4.11.0=hd8ed1ab_0
- typing_extensions=4.11.0=pyha770c72_0
- typing_utils=0.1.0=pyhd8ed1ab_0
- tzdata=2024a=h0c530f3_0
- ucrt=10.0.22621.0=h57928b3_0
- unicodedata2=15.1.0=py310h8d17308_0
- uri-template=1.3.0=pyhd8ed1ab_0
- urllib3=2.2.1=pyhd8ed1ab_0
- utf8proc=2.6.1=h2bbff1b_1
- vc=14.3=hcf57466_18
- vc14_runtime=14.38.33130=h82b7239_18
- vs2015_runtime=14.38.33130=hcb4865c_18
- waitress=2.1.2=pyhd8ed1ab_0
- wcwidth=0.2.13=pyhd8ed1ab_0
- webcolors=1.13=pyhd8ed1ab_0
- webencodings=0.5.1=pyhd8ed1ab_2
- websocket-client=1.7.0=pyhd8ed1ab_0
- werkzeug=3.0.2=pyhd8ed1ab_0
- wheel=0.43.0=pyhd8ed1ab_1
- widgetsnbextension=4.0.10=pyhd8ed1ab_0
- win_inet_pton=1.1.0=pyhd8ed1ab_6
- winpty=0.4.3=4
- wrapt=1.16.0=py310h8d17308_0
- xz=5.4.6=h8cc25b3_0
- yaml=0.2.5=h8ffe710_2
- yarl=1.9.4=py310h8d17308_0
- zeromq=4.3.5=h63175ca_1
- zfp=1.0.1=h63175ca_0
- zipp=3.17.0=pyhd8ed1ab_0
- zlib=1.2.13=hcfcfb64_5
- zstd=1.5.5=h12be248_0
prefix: C:\Users\adamw\.conda\envs\uczenie_glebokie

10
mlflow_project/MLproject Normal file
View File

@ -0,0 +1,10 @@
name: MLflow_s464979
conda_env: conda.yaml
entry_points:
optimal_parameters:
parameters:
epochs: { type: int, default: 20 }
batch_size: { type: int, default: 32 }
command: 'python mlflow_training_model.py {epochs} {batch_size}'

File diff suppressed because it is too large Load Diff

11
mlflow_project/conda.yaml Normal file
View File

@ -0,0 +1,11 @@
name: MLflow_s464979
channels:
- defaults
dependencies:
- python=3.10
- pip
- pip:
- mlflow
- tensorflow
- pandas
- scikit-learn

View File

@ -0,0 +1,53 @@
import pandas as pd
import tensorflow as tf
import sys
import mlflow
from sklearn.metrics import accuracy_score
mlflow.set_tracking_uri("http://localhost:5000")
def main():
train_data = pd.read_csv('./beer_reviews_train.csv')
X_train = train_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
y_train = train_data['review_overall']
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_train_pad = tf.keras.preprocessing.sequence.pad_sequences(X_train_seq, maxlen=100)
with mlflow.start_run() as run:
print("MLflow run experiment_id: {0}".format(run.info.experiment_id))
print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri))
model = tf.keras.Sequential([
tf.keras.layers.Embedding(input_dim=10000, output_dim=16, input_length=100),
tf.keras.layers.GlobalAveragePooling1D(),
tf.keras.layers.Dense(16, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
print(sys.argv[1])
print(sys.argv[2])
model.fit(X_train_pad, y_train, epochs=int(sys.argv[1]), batch_size=int(sys.argv[2]), validation_split=0.1)
mlflow.log_param("epochs", int(sys.argv[1]))
mlflow.log_param("batch_size", int(sys.argv[2]))
test_data = pd.read_csv('./beer_reviews_test.csv')
X_test = test_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
y_test = test_data['review_overall']
predictions = model.predict(X_test).flatten()
y_test_binary = (y_test >= 3).astype(int)
accuracy = accuracy_score(y_test_binary, predictions.round())
mlflow.log_metric("accuracy", accuracy)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,15 @@
artifact_uri: file:///C:/Users/adamw/REPOS/ium_464979/mlruns/0/2824b8df5d6d414faf28b318d8b870b9/artifacts
end_time: 1715624675138
entry_point_name: ''
experiment_id: '0'
lifecycle_stage: active
run_id: 2824b8df5d6d414faf28b318d8b870b9
run_name: auspicious-pig-388
run_uuid: 2824b8df5d6d414faf28b318d8b870b9
source_name: ''
source_type: 4
source_version: ''
start_time: 1715624648948
status: 3
tags: []
user_id: adamw

View File

@ -0,0 +1 @@
1715624674602 0.9242538359967604 0

View File

@ -0,0 +1 @@
64

View File

@ -0,0 +1 @@
20

View File

@ -0,0 +1 @@
https://git.wmi.amu.edu.pl/s464979/ium_464979.git

View File

@ -0,0 +1 @@
local

View File

@ -0,0 +1 @@
optimal_parameters

View File

@ -0,0 +1 @@
conda

View File

@ -0,0 +1 @@
auspicious-pig-388

View File

@ -0,0 +1 @@
0dbf6f1959cb042149cd568c8b11684f23c68024

View File

@ -0,0 +1 @@
https://git.wmi.amu.edu.pl/s464979/ium_464979.git

View File

@ -0,0 +1 @@
file://C:\Users\adamw\REPOS\ium_464979#\mlflow_project

View File

@ -0,0 +1 @@
PROJECT

View File

@ -0,0 +1 @@
adamw

View File

@ -0,0 +1,15 @@
artifact_uri: file:///C:/Users/adamw/REPOS/ium_464979/mlruns/0/5cbc975a93e94b8eb27d7dca17d65191/artifacts
end_time: 1715624631264
entry_point_name: ''
experiment_id: '0'
lifecycle_stage: active
run_id: 5cbc975a93e94b8eb27d7dca17d65191
run_name: unruly-lamb-469
run_uuid: 5cbc975a93e94b8eb27d7dca17d65191
source_name: ''
source_type: 4
source_version: ''
start_time: 1715624604038
status: 3
tags: []
user_id: adamw

View File

@ -0,0 +1 @@
1715624630726 0.9242538359967604 0

View File

@ -0,0 +1 @@
32

View File

@ -0,0 +1 @@
40

View File

@ -0,0 +1 @@
https://git.wmi.amu.edu.pl/s464979/ium_464979.git

View File

@ -0,0 +1 @@
local

View File

@ -0,0 +1 @@
optimal_parameters

View File

@ -0,0 +1 @@
conda

View File

@ -0,0 +1 @@
unruly-lamb-469

View File

@ -0,0 +1 @@
0dbf6f1959cb042149cd568c8b11684f23c68024

View File

@ -0,0 +1 @@
https://git.wmi.amu.edu.pl/s464979/ium_464979.git

View File

@ -0,0 +1 @@
file://C:\Users\adamw\REPOS\ium_464979#\mlflow_project

View File

@ -0,0 +1 @@
PROJECT

View File

@ -0,0 +1 @@
adamw

View File

@ -0,0 +1,15 @@
artifact_uri: file:///C:/Users/adamw/REPOS/ium_464979/mlruns/0/5df6baa9d7c143d7a16314c7721ef48e/artifacts
end_time: 1715624529496
entry_point_name: ''
experiment_id: '0'
lifecycle_stage: active
run_id: 5df6baa9d7c143d7a16314c7721ef48e
run_name: silent-hound-874
run_uuid: 5df6baa9d7c143d7a16314c7721ef48e
source_name: ''
source_type: 4
source_version: ''
start_time: 1715624503432
status: 3
tags: []
user_id: adamw

View File

@ -0,0 +1 @@
1715624528800 0.9242538359967604 0

View File

@ -0,0 +1 @@
32

View File

@ -0,0 +1 @@
20

View File

@ -0,0 +1 @@
https://git.wmi.amu.edu.pl/s464979/ium_464979.git

View File

@ -0,0 +1 @@
local

View File

@ -0,0 +1 @@
optimal_parameters

View File

@ -0,0 +1 @@
conda

View File

@ -0,0 +1 @@
silent-hound-874

View File

@ -0,0 +1 @@
0dbf6f1959cb042149cd568c8b11684f23c68024

View File

@ -0,0 +1 @@
https://git.wmi.amu.edu.pl/s464979/ium_464979.git

View File

@ -0,0 +1 @@
file://C:\Users\adamw\REPOS\ium_464979#\mlflow_project

View File

@ -0,0 +1 @@
PROJECT

View File

@ -0,0 +1 @@
adamw

6
mlruns/0/meta.yaml Normal file
View File

@ -0,0 +1,6 @@
artifact_location: file:///C:/Users/adamw/REPOS/ium_464979/mlruns/0
creation_time: 1715624503432
experiment_id: '0'
last_update_time: 1715624503432
lifecycle_stage: active
name: Default

812346
sacred/beer_reviews.csv Normal file

File diff suppressed because it is too large Load Diff

162470
sacred/beer_reviews_test.csv Normal file

File diff suppressed because it is too large Load Diff

649877
sacred/beer_reviews_train.csv Normal file

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -0,0 +1,5 @@
{
"batch_size": 32,
"epochs": 10,
"seed": 373303958
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1,100 @@
{
"artifacts": [
"beer_review_model.h5"
],
"command": "run_experiment",
"experiment": {
"base_dir": "C:\\Users\\adamw\\REPOS\\ium_464979\\sacred",
"dependencies": [
"keras==2.12.0",
"numpy==1.23.5",
"sacred==0.8.5",
"scikit-learn==1.2.2"
],
"mainfile": "sacred_training_model.py",
"name": "464979",
"repositories": [
{
"commit": "e9f53be95453a8da8811653ba3c4a6e75895cd33",
"dirty": true,
"url": "https://git.wmi.amu.edu.pl/s464979/ium_464979.git"
}
],
"sources": [
[
"sacred_training_model.py",
"_sources\\sacred_training_model_2a1e89d7c820c7a00319e1e22827c7f9.py"
]
]
},
"heartbeat": "2024-06-11T17:21:14.840246",
"host": {
"ENV": {},
"cpu": "Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz",
"gpus": {
"driver_version": "555.85",
"gpus": [
{
"model": "NVIDIA GeForce GTX 1660 Ti",
"persistence_mode": false,
"total_memory": 6144
}
]
},
"hostname": "DESKTOP-9SEHQM2",
"os": [
"Windows",
"Windows-10-10.0.19045-SP0"
],
"python_version": "3.11.7"
},
"meta": {
"command": "run_experiment",
"config_updates": {},
"named_configs": [],
"options": {
"--beat-interval": null,
"--capture": null,
"--comment": null,
"--debug": false,
"--enforce_clean": false,
"--file_storage": null,
"--force": false,
"--help": false,
"--id": null,
"--loglevel": null,
"--mongo_db": null,
"--name": null,
"--pdb": false,
"--print-config": false,
"--priority": null,
"--queue": false,
"--s3": null,
"--sql": null,
"--tiny_db": null,
"--unobserved": false,
"COMMAND": null,
"UPDATE": [],
"help": false,
"with": false
}
},
"resources": [
[
"C:\\Users\\adamw\\REPOS\\ium_464979\\sacred\\beer_reviews_train.csv",
"sacred_runs\\_resources\\beer_reviews_train_e8dab75a0ec202f56510a0e1f9926ad7.csv"
],
[
"C:\\Users\\adamw\\REPOS\\ium_464979\\sacred\\beer_reviews_test.csv",
"sacred_runs\\_resources\\beer_reviews_test_56070f83bef3ee1d17d1a632aa55b798.csv"
]
],
"result": {
"dtype": "float64",
"py/object": "numpy.float64",
"value": 0.9237146778770103
},
"start_time": "2024-06-11T17:21:03.851734",
"status": "COMPLETED",
"stop_time": "2024-06-11T17:21:14.839247"
}

Binary file not shown.

View File

@ -0,0 +1,5 @@
{
"batch_size": 32,
"epochs": 10,
"seed": 541882551
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1,100 @@
{
"artifacts": [
"beer_review_sentiment_model.keras"
],
"command": "run_experiment",
"experiment": {
"base_dir": "C:\\Users\\adamw\\REPOS\\ium_464979\\sacred",
"dependencies": [
"keras==2.12.0",
"numpy==1.23.5",
"sacred==0.8.5",
"scikit-learn==1.2.2"
],
"mainfile": "sacred_training_model.py",
"name": "464979",
"repositories": [
{
"commit": "490b8cf77306ea482543e03ba29e37b07f689ae1",
"dirty": true,
"url": "https://git.wmi.amu.edu.pl/s464979/ium_464979.git"
}
],
"sources": [
[
"sacred_training_model.py",
"_sources\\sacred_training_model_ccb7ce6317e0e291ec9a10a9f4fffffe.py"
]
]
},
"heartbeat": "2024-06-11T21:08:46.548013",
"host": {
"ENV": {},
"cpu": "Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz",
"gpus": {
"driver_version": "555.85",
"gpus": [
{
"model": "NVIDIA GeForce GTX 1660 Ti",
"persistence_mode": false,
"total_memory": 6144
}
]
},
"hostname": "DESKTOP-9SEHQM2",
"os": [
"Windows",
"Windows-10-10.0.19045-SP0"
],
"python_version": "3.11.7"
},
"meta": {
"command": "run_experiment",
"config_updates": {},
"named_configs": [],
"options": {
"--beat-interval": null,
"--capture": null,
"--comment": null,
"--debug": false,
"--enforce_clean": false,
"--file_storage": null,
"--force": false,
"--help": false,
"--id": null,
"--loglevel": null,
"--mongo_db": null,
"--name": null,
"--pdb": false,
"--print-config": false,
"--priority": null,
"--queue": false,
"--s3": null,
"--sql": null,
"--tiny_db": null,
"--unobserved": false,
"COMMAND": null,
"UPDATE": [],
"help": false,
"with": false
}
},
"resources": [
[
"C:\\Users\\adamw\\REPOS\\ium_464979\\sacred\\beer_reviews_train.csv",
"sacred_runs\\_resources\\beer_reviews_train_e8dab75a0ec202f56510a0e1f9926ad7.csv"
],
[
"C:\\Users\\adamw\\REPOS\\ium_464979\\sacred\\beer_reviews_test.csv",
"sacred_runs\\_resources\\beer_reviews_test_56070f83bef3ee1d17d1a632aa55b798.csv"
]
],
"result": {
"dtype": "float64",
"py/object": "numpy.float64",
"value": 0.9237146778770103
},
"start_time": "2024-06-11T21:08:35.823687",
"status": "COMPLETED",
"stop_time": "2024-06-11T21:08:46.547012"
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,85 @@
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, mean_squared_error
from sacred import Experiment
from sacred.observers import MongoObserver, FileStorageObserver
from math import sqrt
ex = Experiment('464979')
# ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017'))
ex.observers.append(FileStorageObserver('sacred_runs'))
@ex.config
def my_config():
epochs = 10
batch_size = 32
@ex.automain
def run_experiment(epochs, batch_size, _run):
train_data = pd.read_csv('beer_reviews_train.csv')
X_train = train_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
y_train = train_data['review_overall']
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_train_pad = pad_sequences(X_train_seq, maxlen=100)
model = Sequential([
Embedding(input_dim=10000, output_dim=16, input_length=100),
GlobalAveragePooling1D(),
Dense(16, activation='relu'),
Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(X_train_pad, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.1)
model.save('beer_review_sentiment_model.keras')
_run.add_artifact('beer_review_model.h5')
test_data = pd.read_csv('beer_reviews_test.csv')
X_test = test_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
y_test = test_data['review_overall']
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(X_test)
X_test_text = X_test.astype(str).agg(' '.join, axis=1)
X_test_seq = tokenizer.texts_to_sequences(X_test_text)
X_test_pad = pad_sequences(X_test_seq, maxlen=100)
predictions = model.predict(X_test_pad)
if len(predictions.shape) > 1:
predictions = predictions[:, 0]
results = pd.DataFrame({'Predictions': predictions, 'Actual': y_test})
results.to_csv('beer_review_sentiment_predictions.csv', index=False)
y_pred = results['Predictions']
y_test = results['Actual']
y_test_binary = (y_test >= 3).astype(int)
accuracy = accuracy_score(y_test_binary, y_pred.round())
precision, recall, f1, _ = precision_recall_fscore_support(y_test_binary, y_pred.round(), average='micro')
rmse = sqrt(mean_squared_error(y_test, y_pred))
print(f'Accuracy: {accuracy}')
print(f'Micro-avg Precision: {precision}')
print(f'Micro-avg Recall: {recall}')
print(f'F1 Score: {f1}')
print(f'RMSE: {rmse}')
_run.add_resource('./beer_reviews_train.csv')
_run.add_resource('./beer_reviews_test.csv')
return accuracy

View File

@ -0,0 +1,84 @@
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, mean_squared_error
from sacred import Experiment
from sacred.observers import MongoObserver, FileStorageObserver
from math import sqrt
ex = Experiment('464979')
# ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017'))
ex.observers.append(FileStorageObserver('sacred_runs'))
@ex.config
def my_config():
epochs = 10
batch_size = 32
@ex.automain
def run_experiment(epochs, batch_size, _run):
train_data = pd.read_csv('beer_reviews_train.csv')
X_train = train_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
y_train = train_data['review_overall']
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_train_pad = pad_sequences(X_train_seq, maxlen=100)
model = Sequential([
Embedding(input_dim=10000, output_dim=16, input_length=100),
GlobalAveragePooling1D(),
Dense(16, activation='relu'),
Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(X_train_pad, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.1)
model.save('beer_review_sentiment_model.keras')
_run.add_artifact('beer_review_sentiment_model.keras')
test_data = pd.read_csv('beer_reviews_test.csv')
X_test = test_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
y_test = test_data['review_overall']
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(X_test)
X_test_text = X_test.astype(str).agg(' '.join, axis=1)
X_test_seq = tokenizer.texts_to_sequences(X_test_text)
X_test_pad = pad_sequences(X_test_seq, maxlen=100)
predictions = model.predict(X_test_pad)
if len(predictions.shape) > 1:
predictions = predictions[:, 0]
results = pd.DataFrame({'Predictions': predictions, 'Actual': y_test})
results.to_csv('beer_review_sentiment_predictions.csv', index=False)
y_pred = results['Predictions']
y_test = results['Actual']
y_test_binary = (y_test >= 3).astype(int)
accuracy = accuracy_score(y_test_binary, y_pred.round())
precision, recall, f1, _ = precision_recall_fscore_support(y_test_binary, y_pred.round(), average='micro')
rmse = sqrt(mean_squared_error(y_test, y_pred))
print(f'Accuracy: {accuracy}')
print(f'Micro-avg Precision: {precision}')
print(f'Micro-avg Recall: {recall}')
print(f'F1 Score: {f1}')
print(f'RMSE: {rmse}')
_run.add_resource('./beer_reviews_train.csv')
_run.add_resource('./beer_reviews_test.csv')
return accuracy

View File

@ -0,0 +1,84 @@
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, mean_squared_error
from sacred import Experiment
from sacred.observers import MongoObserver, FileStorageObserver
from math import sqrt
ex = Experiment('464979')
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017'))
ex.observers.append(FileStorageObserver('sacred_runs'))
@ex.config
def my_config():
epochs = 10
batch_size = 32
@ex.automain
def run_experiment(epochs, batch_size, _run):
train_data = pd.read_csv('beer_reviews_train.csv')
X_train = train_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
y_train = train_data['review_overall']
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_train_pad = pad_sequences(X_train_seq, maxlen=100)
model = Sequential([
Embedding(input_dim=10000, output_dim=16, input_length=100),
GlobalAveragePooling1D(),
Dense(16, activation='relu'),
Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(X_train_pad, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.1)
model.save('beer_review_sentiment_model.keras')
_run.add_artifact('beer_review_sentiment_model.keras')
test_data = pd.read_csv('beer_reviews_test.csv')
X_test = test_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
y_test = test_data['review_overall']
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(X_test)
X_test_text = X_test.astype(str).agg(' '.join, axis=1)
X_test_seq = tokenizer.texts_to_sequences(X_test_text)
X_test_pad = pad_sequences(X_test_seq, maxlen=100)
predictions = model.predict(X_test_pad)
if len(predictions.shape) > 1:
predictions = predictions[:, 0]
results = pd.DataFrame({'Predictions': predictions, 'Actual': y_test})
results.to_csv('beer_review_sentiment_predictions.csv', index=False)
y_pred = results['Predictions']
y_test = results['Actual']
y_test_binary = (y_test >= 3).astype(int)
accuracy = accuracy_score(y_test_binary, y_pred.round())
precision, recall, f1, _ = precision_recall_fscore_support(y_test_binary, y_pred.round(), average='micro')
rmse = sqrt(mean_squared_error(y_test, y_pred))
print(f'Accuracy: {accuracy}')
print(f'Micro-avg Precision: {precision}')
print(f'Micro-avg Recall: {recall}')
print(f'F1 Score: {f1}')
print(f'RMSE: {rmse}')
_run.add_resource('./beer_reviews_train.csv')
_run.add_resource('./beer_reviews_test.csv')
return accuracy