Compare commits

..

1 Commits

Author SHA1 Message Date
AWieczarek
850263b3b8 IUM_06 2024-05-06 21:14:18 +02:00
83 changed files with 98 additions and 2755534 deletions

3
.dvc/.gitignore vendored
View File

@ -1,3 +0,0 @@
/config.local
/tmp
/cache

View File

@ -1,4 +0,0 @@
[core]
remote = ium_ssh_remote
['remote "ium_ssh_remote"']
url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl

View File

@ -1,3 +0,0 @@
# Add patterns of files dvc should ignore, which could improve
# the performance. Learn more at
# https://dvc.org/doc/user-guide/dvcignore

4
.gitignore vendored
View File

@ -1,4 +0,0 @@
/beer_reviews_train.csv
/beer_reviews_test.csv
/beer_review_sentiment_model.h5
/beer_review_sentiment_predictions.csv

View File

@ -8,9 +8,9 @@ ARG DEBIAN_FRONTEND=noninteractive
ENV TZ=Etc/UTC
RUN apt update && \
apt install -y python3 python3-pip unzip git
apt install -y python3 python3-pip unzip
RUN pip install kaggle pandas seaborn scikit-learn tensorflow sacred pymongo --break-system-packages
RUN pip install kaggle pandas seaborn scikit-learn tensorflow
WORKDIR /app

View File

@ -22,7 +22,6 @@ api.dataset_download_files('thedevastator/1-5-million-beer-reviews-from-beer-adv
#
# get_ipython().system('kaggle datasets download -d thedevastator/1-5-million-beer-reviews-from-beer-advocate')
#
#Change
#
# # In[ ]:
#

View File

@ -1,6 +1,5 @@
import pandas as pd
import tensorflow as tf
import sys
train_data = pd.read_csv('./beer_reviews_train.csv')
X_train = train_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
@ -23,6 +22,6 @@ model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(X_train_pad, y_train, epochs=int(sys.argv[1]), batch_size=int(sys.argv[2]), validation_split=0.1)
model.fit(X_train_pad, y_train, epochs=40, batch_size=32, validation_split=0.1)
model.save('beer_review_sentiment_model.h5')

View File

@ -1,7 +1,7 @@
import pandas as pd
from sklearn.model_selection import train_test_split
data = pd.read_csv('beer_reviews.csv')
data = pd.read_csv('./beer_reviews.csv')
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

24
IUM_06-metrics.py Normal file
View File

@ -0,0 +1,24 @@
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, mean_squared_error
from math import sqrt
import sys
data = pd.read_csv('beer_review_sentiment_predictions.csv')
y_pred = data['Predictions']
y_test = data['Actual']
y_test_binary = (y_test >= 3).astype(int)
build_number = sys.argv[1]
accuracy = accuracy_score(y_test_binary, y_pred.round())
precision, recall, f1, _ = precision_recall_fscore_support(y_test_binary, y_pred.round(), average='micro')
rmse = sqrt(mean_squared_error(y_test, y_pred))
print(f'Accuracy: {accuracy}')
print(f'Micro-avg Precision: {precision}')
print(f'Micro-avg Recall: {recall}')
print(f'F1 Score: {f1}')
print(f'RMSE: {rmse}')
with open(r"beer_metrics.txt", "a") as f:
f.write(f"{accuracy},{build_number}\n")

24
IUM_06-plot.py Normal file
View File

@ -0,0 +1,24 @@
import matplotlib.pyplot as plt
def main():
accuracy = []
build_numbers = []
with open("beer_metrics.txt") as f:
for line in f:
accuracy.append(float(line.split(",")[0]))
build_numbers.append(int(line.split(",")[1]))
plt.plot(build_numbers, accuracy)
plt.xlabel("Build Number")
plt.ylabel("Accuracy")
plt.title("Accuracy of the model over time")
plt.xticks(range(min(build_numbers), max(build_numbers) + 1))
plt.show()
plt.savefig("acc.png")
if __name__ == "__main__":
main()

101
Jenkinsfile vendored
View File

@ -1,70 +1,61 @@
pipeline {
agent any
parameters {
string(name: 'CUTOFF', defaultValue: '10000', description: 'Liczba wierszy do obcięcia ze zbioru danych')
string(name: 'KAGGLE_USERNAME', defaultValue: '', description: 'Kaggle username')
password(name: 'KAGGLE_KEY', defaultValue: '', description: 'Kaggle API key')
agent {
dockerfile true
}
triggers {
upstream(upstreamProjects: 's464979-training/training', threshold: hudson.model.Result.SUCCESS)
}
parameters {
buildSelector(defaultSelector: lastSuccessful(), description: 'Which build to use for copying artifacts', name: 'BUILD_SELECTOR')
gitParameter branchFilter: 'origin/(.*)', defaultValue: 'training', name: 'BRANCH', type: 'PT_BRANCH'
}
stages {
stage('Clone Repository') {
steps {
git url: "https://git.wmi.amu.edu.pl/s464979/ium_464979"
git branch: 'evaluation', url: "https://git.wmi.amu.edu.pl/s464979/ium_464979"
}
}
stage('Download dataset') {
stage('Copy Dataset Artifacts') {
steps {
withEnv(["KAGGLE_USERNAME=${env.KAGGLE_USERNAME}", "KAGGLE_KEY=${env.KAGGLE_KEY}"]) {
sh "kaggle datasets download -d thedevastator/1-5-million-beer-reviews-from-beer-advocate --unzip"
}
copyArtifacts filter: 'beer_reviews.csv,beer_reviews_train.csv,beer_reviews_test.csv', projectName: 'z-s464979-create-dataset', selector: buildParameter('BUILD_SELECTOR')
}
}
stage('Process and Split Dataset') {
agent {
dockerfile {
filename 'Dockerfile'
reuseNode true
}
}
steps {
sh "chmod +x ./IUM_05-split.py"
sh "python3 ./IUM_05-split.py"
archiveArtifacts artifacts: 'beer_reviews.csv,beer_reviews_train.csv,beer_reviews_test.csv', onlyIfSuccessful: true
}
}
stage("Run") {
agent {
dockerfile {
filename 'Dockerfile'
reuseNode true
}
}
steps {
sh "chmod +x ./IUM_05-model.py"
sh "chmod +x ./IUM_05-predict.py"
sh "python3 ./IUM_05-model.py 10 32"
sh "python3 ./IUM_05-predict.py"
archiveArtifacts artifacts: 'beer_review_sentiment_model.h5,beer_review_sentiment_predictions.csv', onlyIfSuccessful: true
}
}
stage('Sacred') {
agent {
dockerfile {
filename 'Dockerfile'
reuseNode true
}
}
steps {
sh 'chmod +x sacred/sacred_training_model.py'
sh 'python3 sacred/sacred_training_model.py'
}
}
stage('Archive Artifacts from Experiments') {
stage('Copy Training Artifacts') {
steps {
archiveArtifacts artifacts: 'sacred_runs/**/*.*', onlyIfSuccessful: true
copyArtifacts filter: 'beer_review_sentiment_model.h5', projectName: 's464979-training/' + params.BRANCH, selector: buildParameter('BUILD_SELECTOR')
}
}
stage('Copy Evaluation Artifacts') {
steps {
copyArtifacts filter: 'beer_metrics.txt', projectName: '_s464979-evaluation/evaluation', selector: buildParameter('BUILD_SELECTOR'), optional: true
}
}
stage("Run predictions") {
steps {
sh "chmod +x ./IUM_05-predict.py"
sh "python3 ./IUM_05-predict.py"
archiveArtifacts artifacts: 'beer_review_sentiment_predictions.csv', onlyIfSuccessful: true
}
}
stage('Run metrics') {
steps {
sh 'chmod +x ./IUM_06-metrics.py'
sh "python3 ./IUM_06-metrics.py ${currentBuild.number}"
}
}
stage('Run plot') {
steps {
sh 'chmod +x ./IUM_06-plot.py'
sh 'python3 ./IUM_06-plot.py'
}
}
stage('Archive Artifacts') {
steps {
archiveArtifacts artifacts: '*', onlyIfSuccessful: true
}
}
}

View File

@ -1 +0,0 @@
asdasd

1
data/.gitignore vendored
View File

@ -1 +0,0 @@
/beer_reviews.csv

View File

@ -1,5 +0,0 @@
outs:
- md5: 50f6eec0d0fe78bc0f10e35edd271998
size: 201644905
hash: md5
path: beer_reviews.csv

View File

@ -1,46 +0,0 @@
schema: '2.0'
stages:
split_data:
cmd: python IUM_05-split.py
deps:
- path: data/beer_reviews.csv
hash: md5
md5: 50f6eec0d0fe78bc0f10e35edd271998
size: 201644905
outs:
- path: beer_reviews_test.csv
hash: md5
md5: edbd0a7f05c59a0c0e936917f60e9b96
size: 40632354
- path: beer_reviews_train.csv
hash: md5
md5: 8c6877a26fef1542369bfae6b39d163c
size: 162599343
train_model:
cmd: python IUM_05-model.py 10 32
deps:
- path: beer_reviews_train.csv
hash: md5
md5: 8c6877a26fef1542369bfae6b39d163c
size: 162599343
outs:
- path: beer_review_sentiment_model.h5
hash: md5
md5: c126bd5d332a905262c66894585450e3
size: 1950856
predict:
cmd: python IUM_05-predict.py
deps:
- path: beer_review_sentiment_model.h5
hash: md5
md5: c126bd5d332a905262c66894585450e3
size: 1950856
- path: beer_reviews_test.csv
hash: md5
md5: edbd0a7f05c59a0c0e936917f60e9b96
size: 40632354
outs:
- path: beer_review_sentiment_predictions.csv
hash: md5
md5: 12a66fafb7f4d7d19eb0c4a90cc7d3ad
size: 4814242

View File

@ -1,23 +0,0 @@
stages:
split_data:
cmd: python IUM_05-split.py
deps:
- data/beer_reviews.csv
outs:
- beer_reviews_train.csv
- beer_reviews_test.csv
train_model:
cmd: python IUM_05-model.py 10 32
deps:
- beer_reviews_train.csv
outs:
- beer_review_sentiment_model.h5
predict:
cmd: python IUM_05-predict.py
deps:
- beer_review_sentiment_model.h5
- beer_reviews_test.csv
outs:
- beer_review_sentiment_predictions.csv

View File

@ -1,338 +0,0 @@
name: uczenie_glebokie
channels:
- conda-forge
- defaults
dependencies:
- _tflow_select=2.3.0=mkl
- abseil-cpp=20211102.0=h36ffca9_3
- absl-py=2.1.0=pyhd8ed1ab_0
- aiohttp=3.9.3=py310h8d17308_1
- aiosignal=1.3.1=pyhd8ed1ab_0
- alembic=1.13.1=pyhd8ed1ab_1
- aniso8601=9.0.1=pyhd8ed1ab_0
- anyio=4.3.0=pyhd8ed1ab_0
- aom=3.6.0=hd77b12b_0
- argon2-cffi=23.1.0=pyhd8ed1ab_0
- argon2-cffi-bindings=21.2.0=py310h8d17308_4
- arrow=1.3.0=pyhd8ed1ab_0
- arrow-cpp=11.0.0=h2c9b28c_2
- asttokens=2.4.1=pyhd8ed1ab_0
- astunparse=1.6.3=pyhd8ed1ab_0
- async-lru=2.0.4=pyhd8ed1ab_0
- async-timeout=4.0.3=pyhd8ed1ab_0
- attrs=23.2.0=pyh71513ae_0
- aws-c-common=0.4.57=ha925a31_1
- aws-c-event-stream=0.1.6=h7915e17_3
- aws-checksums=0.1.9=hb01e4cc_0
- aws-sdk-cpp=1.8.185=hd77b12b_0
- babel=2.14.0=pyhd8ed1ab_0
- bcrypt=4.1.3=py310hc226416_0
- beautifulsoup4=4.12.3=pyha770c72_0
- blas=1.0=mkl
- bleach=6.1.0=pyhd8ed1ab_0
- blinker=1.7.0=pyhd8ed1ab_0
- blosc=1.21.5=hdccc3a2_0
- boost-cpp=1.84.0=h6f18f0d_2
- bottleneck=1.3.8=py310h3e78b6c_0
- brotli=1.0.9=h2bbff1b_7
- brotli-bin=1.0.9=h2bbff1b_7
- brotli-python=1.0.9=py310h00ffb61_8
- bzip2=1.0.8=hcfcfb64_5
- c-ares=1.28.1=hcfcfb64_0
- ca-certificates=2024.2.2=h56e8100_0
- cached-property=1.5.2=hd8ed1ab_1
- cached_property=1.5.2=pyha770c72_1
- cachetools=5.3.3=pyhd8ed1ab_0
- certifi=2024.2.2=pyhd8ed1ab_0
- cffi=1.16.0=py310h8d17308_0
- cfitsio=3.470=h2bbff1b_7
- charls=2.2.0=h6c2663c_0
- charset-normalizer=3.3.2=pyhd8ed1ab_0
- click=8.1.7=win_pyh7428d3b_0
- cloudpickle=3.0.0=pyhd8ed1ab_0
- colorama=0.4.6=pyhd8ed1ab_0
- comm=0.2.2=pyhd8ed1ab_0
- contourpy=1.2.1=py310h232114e_0
- cryptography=41.0.3=py310h3438e0d_0
- cycler=0.12.1=pyhd8ed1ab_0
- dav1d=1.2.1=hcfcfb64_0
- debugpy=1.8.1=py310h00ffb61_0
- decorator=5.1.1=pyhd8ed1ab_0
- defusedxml=0.7.1=pyhd8ed1ab_0
- docker-py=7.0.0=pyhd8ed1ab_0
- eigen=3.4.0=h91493d7_0
- entrypoints=0.4=pyhd8ed1ab_0
- exceptiongroup=1.2.0=pyhd8ed1ab_2
- executing=2.0.1=pyhd8ed1ab_0
- ffmpeg=4.2.3=ha925a31_0
- flask=3.0.3=pyhd8ed1ab_0
- flatbuffers=24.3.25=h63175ca_0
- fonttools=4.51.0=py310h8d17308_0
- fqdn=1.5.1=pyhd8ed1ab_0
- freetype=2.12.1=hdaf720e_2
- frozenlist=1.4.1=py310h8d17308_0
- gast=0.4.0=pyh9f0ad1d_0
- gflags=2.2.2=ha925a31_1004
- giflib=5.2.1=h64bf75a_3
- gitdb=4.0.11=pyhd8ed1ab_0
- gitpython=3.1.43=pyhd8ed1ab_0
- glib=2.80.0=h39d0aa6_3
- glib-tools=2.80.0=h0a98069_3
- glog=0.5.0=h4797de2_0
- google-auth=2.29.0=pyhca7485f_0
- google-auth-oauthlib=0.4.1=py_2
- google-pasta=0.2.0=pyh8c360ce_0
- graphene=3.3=pyhd8ed1ab_0
- graphql-core=3.2.3=pyhd8ed1ab_0
- graphql-relay=3.2.0=pyhd8ed1ab_0
- greenlet=3.0.3=py310h00ffb61_0
- grpc-cpp=1.48.2=hf108199_0
- grpcio=1.42.0=py310hc60d5dd_0
- gst-plugins-base=1.18.5=h9e645db_0
- gstreamer=1.18.5=hd78058f_0
- h11=0.14.0=pyhd8ed1ab_0
- h2=4.1.0=pyhd8ed1ab_0
- h5py=3.7.0=nompi_py310h00cbb18_100
- hdf5=1.12.1=nompi_h2a0e4a3_104
- hpack=4.0.0=pyh9f0ad1d_0
- httpcore=1.0.5=pyhd8ed1ab_0
- httpx=0.27.0=pyhd8ed1ab_0
- hyperframe=6.0.1=pyhd8ed1ab_0
- icu=58.2=ha925a31_3
- idna=3.6=pyhd8ed1ab_0
- imagecodecs=2023.1.23=py310h6c6a46e_0
- imageio=2.34.0=pyh4b66e23_0
- importlib-metadata=7.1.0=pyha770c72_0
- importlib_metadata=7.1.0=hd8ed1ab_0
- importlib_resources=6.4.0=pyhd8ed1ab_0
- intel-openmp=2023.1.0=h59b6b97_46320
- ipykernel=6.29.3=pyha63f2e9_0
- ipython=8.22.2=pyh7428d3b_0
- ipywidgets=8.1.2=pyhd8ed1ab_0
- isoduration=20.11.0=pyhd8ed1ab_0
- itsdangerous=2.2.0=pyhd8ed1ab_0
- jedi=0.19.1=pyhd8ed1ab_0
- jinja2=3.1.3=pyhd8ed1ab_0
- joblib=1.3.2=pyhd8ed1ab_0
- jpeg=9e=hcfcfb64_3
- json5=0.9.24=pyhd8ed1ab_0
- jsonpointer=2.4=py310h5588dad_3
- jsonschema=4.21.1=pyhd8ed1ab_0
- jsonschema-specifications=2023.12.1=pyhd8ed1ab_0
- jsonschema-with-format-nongpl=4.21.1=pyhd8ed1ab_0
- jupyter=1.0.0=py310haa95532_9
- jupyter-lsp=2.2.4=pyhd8ed1ab_0
- jupyter_client=8.6.1=pyhd8ed1ab_0
- jupyter_console=6.6.3=pyhd8ed1ab_0
- jupyter_core=5.7.2=py310h5588dad_0
- jupyter_events=0.10.0=pyhd8ed1ab_0
- jupyter_server=2.13.0=pyhd8ed1ab_0
- jupyter_server_terminals=0.5.3=pyhd8ed1ab_0
- jupyterlab=4.1.5=pyhd8ed1ab_0
- jupyterlab_pygments=0.3.0=pyhd8ed1ab_1
- jupyterlab_server=2.25.4=pyhd8ed1ab_0
- jupyterlab_widgets=3.0.10=pyhd8ed1ab_0
- keras=2.10.0=py310haa95532_0
- keras-preprocessing=1.1.2=pyhd8ed1ab_0
- kiwisolver=1.4.5=py310h232114e_1
- lazy_loader=0.4=pyhd8ed1ab_0
- lcms2=2.12=h83e58a3_0
- lerc=3.0=hd77b12b_0
- libabseil-static=20211102.0=cxx11_h58a5ce6_3
- libaec=1.1.3=h63175ca_0
- libavif=0.11.1=h2bbff1b_0
- libblas=3.9.0=20_win64_mkl
- libboost=1.84.0=hcc118f5_2
- libboost-devel=1.84.0=h91493d7_2
- libboost-headers=1.84.0=h57928b3_2
- libbrotlicommon=1.0.9=h2bbff1b_7
- libbrotlidec=1.0.9=h2bbff1b_7
- libbrotlienc=1.0.9=h2bbff1b_7
- libcblas=3.9.0=20_win64_mkl
- libclang=12.0.0=default_h627e005_2
- libcurl=8.5.0=h86230a5_0
- libdeflate=1.17=h2bbff1b_1
- libffi=3.4.2=h8ffe710_5
- libglib=2.80.0=h39d0aa6_3
- libiconv=1.17=hcfcfb64_2
- libintl=0.22.5=h5728263_2
- libintl-devel=0.22.5=h5728263_2
- liblapack=3.9.0=20_win64_mkl
- libogg=1.3.4=h8ffe710_1
- libopencv=4.6.0=haa95532_5
- libpng=1.6.43=h19919ed_0
- libprotobuf=3.20.3=h12be248_0
- libsodium=1.0.18=h8d14728_1
- libsqlite=3.45.2=hcfcfb64_0
- libssh2=1.10.0=hcd4344a_2
- libthrift=0.15.0=h636ae23_1
- libtiff=4.5.1=hd77b12b_0
- libvorbis=1.3.7=h0e60522_0
- libwebp=1.3.2=hcfcfb64_1
- libwebp-base=1.3.2=hcfcfb64_0
- libxml2=2.10.4=h0ad7f3c_1
- libxslt=1.1.37=h2bbff1b_1
- libzlib=1.2.13=hcfcfb64_5
- libzopfli=1.0.3=h0e60522_0
- lz4-c=1.9.4=hcfcfb64_0
- mako=1.3.5=pyhd8ed1ab_0
- markdown=3.6=pyhd8ed1ab_0
- markupsafe=2.1.5=py310h8d17308_0
- matplotlib-base=3.8.3=py310hc9baf74_0
- matplotlib-inline=0.1.6=pyhd8ed1ab_0
- mistune=3.0.2=pyhd8ed1ab_0
- mkl=2023.2.0=h6a75c08_50497
- mkl-service=2.4.1=py310h49a50da_0
- mkl_fft=1.3.8=py310h042f14a_1
- mkl_random=1.2.5=py310hd199dba_1
- mlflow=2.12.2=h5588dad_0
- mlflow-skinny=2.12.2=py310h5588dad_0
- mlflow-ui=2.12.2=py310h5588dad_0
- multidict=6.0.5=py310h8d17308_0
- munkres=1.1.4=pyh9f0ad1d_0
- nbclient=0.10.0=pyhd8ed1ab_0
- nbconvert=7.16.3=hd8ed1ab_0
- nbconvert-core=7.16.3=pyhd8ed1ab_0
- nbconvert-pandoc=7.16.3=hd8ed1ab_0
- nbformat=5.10.4=pyhd8ed1ab_0
- nest-asyncio=1.6.0=pyhd8ed1ab_0
- networkx=3.3=pyhd8ed1ab_1
- notebook=7.1.2=pyhd8ed1ab_0
- notebook-shim=0.2.4=pyhd8ed1ab_0
- numexpr=2.9.0=mkl_py310hc26a618_0
- numpy=1.24.3=py310h055cbcc_1
- numpy-base=1.24.3=py310h65a83cf_1
- oauthlib=3.2.2=pyhd8ed1ab_0
- opencv=4.6.0=py310ha36de5b_5
- openjpeg=2.4.0=h4fc8c34_0
- openssl=1.1.1w=hcfcfb64_0
- opt_einsum=3.3.0=pyhc1e730c_2
- orc=1.7.4=h623e30f_1
- overrides=7.7.0=pyhd8ed1ab_0
- packaging=24.0=pyhd8ed1ab_0
- pandas=2.2.1=py310h5da7b33_0
- pandoc=3.1.13=h57928b3_0
- pandocfilters=1.5.0=pyhd8ed1ab_0
- paramiko=3.4.0=pyhd8ed1ab_0
- parso=0.8.4=pyhd8ed1ab_0
- pcre2=10.43=h17e33f8_0
- pickleshare=0.7.5=py_1003
- pillow=10.2.0=py310h2bbff1b_0
- pip=24.0=pyhd8ed1ab_0
- pkgutil-resolve-name=1.3.10=pyhd8ed1ab_1
- platformdirs=4.2.0=pyhd8ed1ab_0
- ply=3.11=pyhd8ed1ab_2
- prometheus_client=0.20.0=pyhd8ed1ab_0
- prometheus_flask_exporter=0.23.0=pyhd8ed1ab_0
- prompt-toolkit=3.0.42=pyha770c72_0
- prompt_toolkit=3.0.42=hd8ed1ab_0
- protobuf=3.20.3=py310h5588dad_1
- psutil=5.9.8=py310h8d17308_0
- pure_eval=0.2.2=pyhd8ed1ab_0
- py-opencv=4.6.0=haa95532_5
- pyarrow=11.0.0=py310h790e06d_1
- pyasn1=0.5.1=pyhd8ed1ab_0
- pyasn1-modules=0.3.0=pyhd8ed1ab_0
- pycparser=2.22=pyhd8ed1ab_0
- pygments=2.17.2=pyhd8ed1ab_0
- pyjwt=2.8.0=pyhd8ed1ab_1
- pynacl=1.5.0=py310h635b8f1_3
- pyopenssl=23.2.0=pyhd8ed1ab_1
- pyparsing=3.1.2=pyhd8ed1ab_0
- pyqt=5.15.10=py310hd77b12b_0
- pyqt5-sip=12.13.0=py310h2bbff1b_0
- pysocks=1.7.1=pyh0701188_6
- python=3.10.13=h966fe2a_0
- python-dateutil=2.9.0=pyhd8ed1ab_0
- python-fastjsonschema=2.19.1=pyhd8ed1ab_0
- python-flatbuffers=24.3.25=pyh59ac667_0
- python-json-logger=2.0.7=pyhd8ed1ab_0
- python-tzdata=2024.1=pyhd8ed1ab_0
- python_abi=3.10=2_cp310
- pytz=2024.1=pyhd8ed1ab_0
- pyu2f=0.1.5=pyhd8ed1ab_0
- pywin32=306=py310h00ffb61_2
- pywin32-on-windows=0.1.0=pyh07e9846_2
- pywinpty=2.0.13=py310h00ffb61_0
- pyyaml=6.0.1=py310h8d17308_1
- pyzmq=25.1.2=py310h2849c00_0
- qt-main=5.15.2=he8e5bd7_7
- qt-webengine=5.15.9=h5bd16bc_7
- qtconsole=5.5.1=pyhd8ed1ab_0
- qtconsole-base=5.5.1=pyha770c72_0
- qtpy=2.4.1=pyhd8ed1ab_0
- qtwebkit=5.212=h2bbfb41_5
- querystring_parser=1.2.4=py_0
- re2=2022.04.01=h0e60522_0
- referencing=0.34.0=pyhd8ed1ab_0
- requests=2.31.0=pyhd8ed1ab_0
- requests-oauthlib=2.0.0=pyhd8ed1ab_0
- rfc3339-validator=0.1.4=pyhd8ed1ab_0
- rfc3986-validator=0.1.1=pyh9f0ad1d_0
- rpds-py=0.18.0=py310h87d50f1_0
- rsa=4.9=pyhd8ed1ab_0
- scikit-image=0.22.0=py310h25bd2df_0
- scikit-learn=1.3.0=py310h4ed8f06_1
- scipy=1.13.0=py310hf667824_0
- seaborn=0.12.2=py310haa95532_0
- send2trash=1.8.2=pyh08f2357_0
- setuptools=69.2.0=pyhd8ed1ab_0
- sip=6.7.12=py310h00ffb61_0
- six=1.16.0=pyh6c4a22f_0
- smmap=5.0.0=pyhd8ed1ab_0
- snappy=1.1.10=hfb803bf_0
- sniffio=1.3.1=pyhd8ed1ab_0
- soupsieve=2.5=pyhd8ed1ab_1
- sqlalchemy=2.0.30=py310ha8f682b_0
- sqlite=3.45.2=hcfcfb64_0
- sqlparse=0.5.0=pyhd8ed1ab_0
- stack_data=0.6.2=pyhd8ed1ab_0
- tbb=2021.8.0=h59b6b97_0
- tensorboard=2.10.0=py310haa95532_0
- tensorboard-data-server=0.6.1=py310haa95532_0
- tensorboard-plugin-wit=1.8.1=pyhd8ed1ab_0
- tensorflow=2.10.0=mkl_py310hd99672f_0
- tensorflow-base=2.10.0=mkl_py310h6a7f48e_0
- tensorflow-estimator=2.10.0=py310haa95532_0
- termcolor=2.4.0=pyhd8ed1ab_0
- terminado=0.18.1=pyh5737063_0
- threadpoolctl=3.4.0=pyhc1e730c_0
- tifffile=2023.2.28=pyhd8ed1ab_0
- tinycss2=1.2.1=pyhd8ed1ab_0
- tk=8.6.13=h5226925_1
- tomli=2.0.1=pyhd8ed1ab_0
- tornado=6.4=py310h8d17308_0
- traitlets=5.14.2=pyhd8ed1ab_0
- types-python-dateutil=2.9.0.20240316=pyhd8ed1ab_0
- typing-extensions=4.11.0=hd8ed1ab_0
- typing_extensions=4.11.0=pyha770c72_0
- typing_utils=0.1.0=pyhd8ed1ab_0
- tzdata=2024a=h0c530f3_0
- ucrt=10.0.22621.0=h57928b3_0
- unicodedata2=15.1.0=py310h8d17308_0
- uri-template=1.3.0=pyhd8ed1ab_0
- urllib3=2.2.1=pyhd8ed1ab_0
- utf8proc=2.6.1=h2bbff1b_1
- vc=14.3=hcf57466_18
- vc14_runtime=14.38.33130=h82b7239_18
- vs2015_runtime=14.38.33130=hcb4865c_18
- waitress=2.1.2=pyhd8ed1ab_0
- wcwidth=0.2.13=pyhd8ed1ab_0
- webcolors=1.13=pyhd8ed1ab_0
- webencodings=0.5.1=pyhd8ed1ab_2
- websocket-client=1.7.0=pyhd8ed1ab_0
- werkzeug=3.0.2=pyhd8ed1ab_0
- wheel=0.43.0=pyhd8ed1ab_1
- widgetsnbextension=4.0.10=pyhd8ed1ab_0
- win_inet_pton=1.1.0=pyhd8ed1ab_6
- winpty=0.4.3=4
- wrapt=1.16.0=py310h8d17308_0
- xz=5.4.6=h8cc25b3_0
- yaml=0.2.5=h8ffe710_2
- yarl=1.9.4=py310h8d17308_0
- zeromq=4.3.5=h63175ca_1
- zfp=1.0.1=h63175ca_0
- zipp=3.17.0=pyhd8ed1ab_0
- zlib=1.2.13=hcfcfb64_5
- zstd=1.5.5=h12be248_0
prefix: C:\Users\adamw\.conda\envs\uczenie_glebokie

View File

@ -1,10 +0,0 @@
name: MLflow_s464979
conda_env: conda.yaml
entry_points:
optimal_parameters:
parameters:
epochs: { type: int, default: 20 }
batch_size: { type: int, default: 32 }
command: 'python mlflow_training_model.py {epochs} {batch_size}'

File diff suppressed because it is too large Load Diff

View File

@ -1,11 +0,0 @@
name: MLflow_s464979
channels:
- defaults
dependencies:
- python=3.10
- pip
- pip:
- mlflow
- tensorflow
- pandas
- scikit-learn

View File

@ -1,53 +0,0 @@
import pandas as pd
import tensorflow as tf
import sys
import mlflow
from sklearn.metrics import accuracy_score
mlflow.set_tracking_uri("http://localhost:5000")
def main():
train_data = pd.read_csv('./beer_reviews_train.csv')
X_train = train_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
y_train = train_data['review_overall']
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_train_pad = tf.keras.preprocessing.sequence.pad_sequences(X_train_seq, maxlen=100)
with mlflow.start_run() as run:
print("MLflow run experiment_id: {0}".format(run.info.experiment_id))
print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri))
model = tf.keras.Sequential([
tf.keras.layers.Embedding(input_dim=10000, output_dim=16, input_length=100),
tf.keras.layers.GlobalAveragePooling1D(),
tf.keras.layers.Dense(16, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
print(sys.argv[1])
print(sys.argv[2])
model.fit(X_train_pad, y_train, epochs=int(sys.argv[1]), batch_size=int(sys.argv[2]), validation_split=0.1)
mlflow.log_param("epochs", int(sys.argv[1]))
mlflow.log_param("batch_size", int(sys.argv[2]))
test_data = pd.read_csv('./beer_reviews_test.csv')
X_test = test_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
y_test = test_data['review_overall']
predictions = model.predict(X_test).flatten()
y_test_binary = (y_test >= 3).astype(int)
accuracy = accuracy_score(y_test_binary, predictions.round())
mlflow.log_metric("accuracy", accuracy)
if __name__ == '__main__':
main()

View File

@ -1,15 +0,0 @@
artifact_uri: file:///C:/Users/adamw/REPOS/ium_464979/mlruns/0/2824b8df5d6d414faf28b318d8b870b9/artifacts
end_time: 1715624675138
entry_point_name: ''
experiment_id: '0'
lifecycle_stage: active
run_id: 2824b8df5d6d414faf28b318d8b870b9
run_name: auspicious-pig-388
run_uuid: 2824b8df5d6d414faf28b318d8b870b9
source_name: ''
source_type: 4
source_version: ''
start_time: 1715624648948
status: 3
tags: []
user_id: adamw

View File

@ -1 +0,0 @@
1715624674602 0.9242538359967604 0

View File

@ -1 +0,0 @@
https://git.wmi.amu.edu.pl/s464979/ium_464979.git

View File

@ -1 +0,0 @@
auspicious-pig-388

View File

@ -1 +0,0 @@
0dbf6f1959cb042149cd568c8b11684f23c68024

View File

@ -1 +0,0 @@
https://git.wmi.amu.edu.pl/s464979/ium_464979.git

View File

@ -1 +0,0 @@
file://C:\Users\adamw\REPOS\ium_464979#\mlflow_project

View File

@ -1,15 +0,0 @@
artifact_uri: file:///C:/Users/adamw/REPOS/ium_464979/mlruns/0/5cbc975a93e94b8eb27d7dca17d65191/artifacts
end_time: 1715624631264
entry_point_name: ''
experiment_id: '0'
lifecycle_stage: active
run_id: 5cbc975a93e94b8eb27d7dca17d65191
run_name: unruly-lamb-469
run_uuid: 5cbc975a93e94b8eb27d7dca17d65191
source_name: ''
source_type: 4
source_version: ''
start_time: 1715624604038
status: 3
tags: []
user_id: adamw

View File

@ -1 +0,0 @@
1715624630726 0.9242538359967604 0

View File

@ -1 +0,0 @@
https://git.wmi.amu.edu.pl/s464979/ium_464979.git

View File

@ -1 +0,0 @@
unruly-lamb-469

View File

@ -1 +0,0 @@
0dbf6f1959cb042149cd568c8b11684f23c68024

View File

@ -1 +0,0 @@
https://git.wmi.amu.edu.pl/s464979/ium_464979.git

View File

@ -1 +0,0 @@
file://C:\Users\adamw\REPOS\ium_464979#\mlflow_project

View File

@ -1,15 +0,0 @@
artifact_uri: file:///C:/Users/adamw/REPOS/ium_464979/mlruns/0/5df6baa9d7c143d7a16314c7721ef48e/artifacts
end_time: 1715624529496
entry_point_name: ''
experiment_id: '0'
lifecycle_stage: active
run_id: 5df6baa9d7c143d7a16314c7721ef48e
run_name: silent-hound-874
run_uuid: 5df6baa9d7c143d7a16314c7721ef48e
source_name: ''
source_type: 4
source_version: ''
start_time: 1715624503432
status: 3
tags: []
user_id: adamw

View File

@ -1 +0,0 @@
1715624528800 0.9242538359967604 0

View File

@ -1 +0,0 @@
https://git.wmi.amu.edu.pl/s464979/ium_464979.git

View File

@ -1 +0,0 @@
silent-hound-874

View File

@ -1 +0,0 @@
0dbf6f1959cb042149cd568c8b11684f23c68024

View File

@ -1 +0,0 @@
https://git.wmi.amu.edu.pl/s464979/ium_464979.git

View File

@ -1 +0,0 @@
file://C:\Users\adamw\REPOS\ium_464979#\mlflow_project

View File

@ -1,6 +0,0 @@
artifact_location: file:///C:/Users/adamw/REPOS/ium_464979/mlruns/0
creation_time: 1715624503432
experiment_id: '0'
last_update_time: 1715624503432
lifecycle_stage: active
name: Default

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +0,0 @@
{
"batch_size": 32,
"epochs": 10,
"seed": 373303958
}

File diff suppressed because one or more lines are too long

View File

@ -1 +0,0 @@
{}

View File

@ -1,100 +0,0 @@
{
"artifacts": [
"beer_review_model.h5"
],
"command": "run_experiment",
"experiment": {
"base_dir": "C:\\Users\\adamw\\REPOS\\ium_464979\\sacred",
"dependencies": [
"keras==2.12.0",
"numpy==1.23.5",
"sacred==0.8.5",
"scikit-learn==1.2.2"
],
"mainfile": "sacred_training_model.py",
"name": "464979",
"repositories": [
{
"commit": "e9f53be95453a8da8811653ba3c4a6e75895cd33",
"dirty": true,
"url": "https://git.wmi.amu.edu.pl/s464979/ium_464979.git"
}
],
"sources": [
[
"sacred_training_model.py",
"_sources\\sacred_training_model_2a1e89d7c820c7a00319e1e22827c7f9.py"
]
]
},
"heartbeat": "2024-06-11T17:21:14.840246",
"host": {
"ENV": {},
"cpu": "Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz",
"gpus": {
"driver_version": "555.85",
"gpus": [
{
"model": "NVIDIA GeForce GTX 1660 Ti",
"persistence_mode": false,
"total_memory": 6144
}
]
},
"hostname": "DESKTOP-9SEHQM2",
"os": [
"Windows",
"Windows-10-10.0.19045-SP0"
],
"python_version": "3.11.7"
},
"meta": {
"command": "run_experiment",
"config_updates": {},
"named_configs": [],
"options": {
"--beat-interval": null,
"--capture": null,
"--comment": null,
"--debug": false,
"--enforce_clean": false,
"--file_storage": null,
"--force": false,
"--help": false,
"--id": null,
"--loglevel": null,
"--mongo_db": null,
"--name": null,
"--pdb": false,
"--print-config": false,
"--priority": null,
"--queue": false,
"--s3": null,
"--sql": null,
"--tiny_db": null,
"--unobserved": false,
"COMMAND": null,
"UPDATE": [],
"help": false,
"with": false
}
},
"resources": [
[
"C:\\Users\\adamw\\REPOS\\ium_464979\\sacred\\beer_reviews_train.csv",
"sacred_runs\\_resources\\beer_reviews_train_e8dab75a0ec202f56510a0e1f9926ad7.csv"
],
[
"C:\\Users\\adamw\\REPOS\\ium_464979\\sacred\\beer_reviews_test.csv",
"sacred_runs\\_resources\\beer_reviews_test_56070f83bef3ee1d17d1a632aa55b798.csv"
]
],
"result": {
"dtype": "float64",
"py/object": "numpy.float64",
"value": 0.9237146778770103
},
"start_time": "2024-06-11T17:21:03.851734",
"status": "COMPLETED",
"stop_time": "2024-06-11T17:21:14.839247"
}

View File

@ -1,5 +0,0 @@
{
"batch_size": 32,
"epochs": 10,
"seed": 541882551
}

File diff suppressed because one or more lines are too long

View File

@ -1 +0,0 @@
{}

View File

@ -1,100 +0,0 @@
{
"artifacts": [
"beer_review_sentiment_model.keras"
],
"command": "run_experiment",
"experiment": {
"base_dir": "C:\\Users\\adamw\\REPOS\\ium_464979\\sacred",
"dependencies": [
"keras==2.12.0",
"numpy==1.23.5",
"sacred==0.8.5",
"scikit-learn==1.2.2"
],
"mainfile": "sacred_training_model.py",
"name": "464979",
"repositories": [
{
"commit": "490b8cf77306ea482543e03ba29e37b07f689ae1",
"dirty": true,
"url": "https://git.wmi.amu.edu.pl/s464979/ium_464979.git"
}
],
"sources": [
[
"sacred_training_model.py",
"_sources\\sacred_training_model_ccb7ce6317e0e291ec9a10a9f4fffffe.py"
]
]
},
"heartbeat": "2024-06-11T21:08:46.548013",
"host": {
"ENV": {},
"cpu": "Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz",
"gpus": {
"driver_version": "555.85",
"gpus": [
{
"model": "NVIDIA GeForce GTX 1660 Ti",
"persistence_mode": false,
"total_memory": 6144
}
]
},
"hostname": "DESKTOP-9SEHQM2",
"os": [
"Windows",
"Windows-10-10.0.19045-SP0"
],
"python_version": "3.11.7"
},
"meta": {
"command": "run_experiment",
"config_updates": {},
"named_configs": [],
"options": {
"--beat-interval": null,
"--capture": null,
"--comment": null,
"--debug": false,
"--enforce_clean": false,
"--file_storage": null,
"--force": false,
"--help": false,
"--id": null,
"--loglevel": null,
"--mongo_db": null,
"--name": null,
"--pdb": false,
"--print-config": false,
"--priority": null,
"--queue": false,
"--s3": null,
"--sql": null,
"--tiny_db": null,
"--unobserved": false,
"COMMAND": null,
"UPDATE": [],
"help": false,
"with": false
}
},
"resources": [
[
"C:\\Users\\adamw\\REPOS\\ium_464979\\sacred\\beer_reviews_train.csv",
"sacred_runs\\_resources\\beer_reviews_train_e8dab75a0ec202f56510a0e1f9926ad7.csv"
],
[
"C:\\Users\\adamw\\REPOS\\ium_464979\\sacred\\beer_reviews_test.csv",
"sacred_runs\\_resources\\beer_reviews_test_56070f83bef3ee1d17d1a632aa55b798.csv"
]
],
"result": {
"dtype": "float64",
"py/object": "numpy.float64",
"value": 0.9237146778770103
},
"start_time": "2024-06-11T21:08:35.823687",
"status": "COMPLETED",
"stop_time": "2024-06-11T21:08:46.547012"
}

View File

@ -1,85 +0,0 @@
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, mean_squared_error
from sacred import Experiment
from sacred.observers import MongoObserver, FileStorageObserver
from math import sqrt
ex = Experiment('464979')
# ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017'))
ex.observers.append(FileStorageObserver('sacred_runs'))
@ex.config
def my_config():
epochs = 10
batch_size = 32
@ex.automain
def run_experiment(epochs, batch_size, _run):
train_data = pd.read_csv('beer_reviews_train.csv')
X_train = train_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
y_train = train_data['review_overall']
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_train_pad = pad_sequences(X_train_seq, maxlen=100)
model = Sequential([
Embedding(input_dim=10000, output_dim=16, input_length=100),
GlobalAveragePooling1D(),
Dense(16, activation='relu'),
Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(X_train_pad, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.1)
model.save('beer_review_sentiment_model.keras')
_run.add_artifact('beer_review_model.h5')
test_data = pd.read_csv('beer_reviews_test.csv')
X_test = test_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
y_test = test_data['review_overall']
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(X_test)
X_test_text = X_test.astype(str).agg(' '.join, axis=1)
X_test_seq = tokenizer.texts_to_sequences(X_test_text)
X_test_pad = pad_sequences(X_test_seq, maxlen=100)
predictions = model.predict(X_test_pad)
if len(predictions.shape) > 1:
predictions = predictions[:, 0]
results = pd.DataFrame({'Predictions': predictions, 'Actual': y_test})
results.to_csv('beer_review_sentiment_predictions.csv', index=False)
y_pred = results['Predictions']
y_test = results['Actual']
y_test_binary = (y_test >= 3).astype(int)
accuracy = accuracy_score(y_test_binary, y_pred.round())
precision, recall, f1, _ = precision_recall_fscore_support(y_test_binary, y_pred.round(), average='micro')
rmse = sqrt(mean_squared_error(y_test, y_pred))
print(f'Accuracy: {accuracy}')
print(f'Micro-avg Precision: {precision}')
print(f'Micro-avg Recall: {recall}')
print(f'F1 Score: {f1}')
print(f'RMSE: {rmse}')
_run.add_resource('./beer_reviews_train.csv')
_run.add_resource('./beer_reviews_test.csv')
return accuracy

View File

@ -1,84 +0,0 @@
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, mean_squared_error
from sacred import Experiment
from sacred.observers import MongoObserver, FileStorageObserver
from math import sqrt
ex = Experiment('464979')
# ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017'))
ex.observers.append(FileStorageObserver('sacred_runs'))
@ex.config
def my_config():
epochs = 10
batch_size = 32
@ex.automain
def run_experiment(epochs, batch_size, _run):
train_data = pd.read_csv('beer_reviews_train.csv')
X_train = train_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
y_train = train_data['review_overall']
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_train_pad = pad_sequences(X_train_seq, maxlen=100)
model = Sequential([
Embedding(input_dim=10000, output_dim=16, input_length=100),
GlobalAveragePooling1D(),
Dense(16, activation='relu'),
Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(X_train_pad, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.1)
model.save('beer_review_sentiment_model.keras')
_run.add_artifact('beer_review_sentiment_model.keras')
test_data = pd.read_csv('beer_reviews_test.csv')
X_test = test_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
y_test = test_data['review_overall']
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(X_test)
X_test_text = X_test.astype(str).agg(' '.join, axis=1)
X_test_seq = tokenizer.texts_to_sequences(X_test_text)
X_test_pad = pad_sequences(X_test_seq, maxlen=100)
predictions = model.predict(X_test_pad)
if len(predictions.shape) > 1:
predictions = predictions[:, 0]
results = pd.DataFrame({'Predictions': predictions, 'Actual': y_test})
results.to_csv('beer_review_sentiment_predictions.csv', index=False)
y_pred = results['Predictions']
y_test = results['Actual']
y_test_binary = (y_test >= 3).astype(int)
accuracy = accuracy_score(y_test_binary, y_pred.round())
precision, recall, f1, _ = precision_recall_fscore_support(y_test_binary, y_pred.round(), average='micro')
rmse = sqrt(mean_squared_error(y_test, y_pred))
print(f'Accuracy: {accuracy}')
print(f'Micro-avg Precision: {precision}')
print(f'Micro-avg Recall: {recall}')
print(f'F1 Score: {f1}')
print(f'RMSE: {rmse}')
_run.add_resource('./beer_reviews_train.csv')
_run.add_resource('./beer_reviews_test.csv')
return accuracy

View File

@ -1,84 +0,0 @@
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, mean_squared_error
from sacred import Experiment
from sacred.observers import MongoObserver, FileStorageObserver
from math import sqrt
ex = Experiment('464979')
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017'))
ex.observers.append(FileStorageObserver('sacred_runs'))
@ex.config
def my_config():
epochs = 10
batch_size = 32
@ex.automain
def run_experiment(epochs, batch_size, _run):
train_data = pd.read_csv('beer_reviews_train.csv')
X_train = train_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
y_train = train_data['review_overall']
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_train_pad = pad_sequences(X_train_seq, maxlen=100)
model = Sequential([
Embedding(input_dim=10000, output_dim=16, input_length=100),
GlobalAveragePooling1D(),
Dense(16, activation='relu'),
Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(X_train_pad, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.1)
model.save('beer_review_sentiment_model.keras')
_run.add_artifact('beer_review_sentiment_model.keras')
test_data = pd.read_csv('beer_reviews_test.csv')
X_test = test_data[['review_aroma', 'review_appearance', 'review_palate', 'review_taste']]
y_test = test_data['review_overall']
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(X_test)
X_test_text = X_test.astype(str).agg(' '.join, axis=1)
X_test_seq = tokenizer.texts_to_sequences(X_test_text)
X_test_pad = pad_sequences(X_test_seq, maxlen=100)
predictions = model.predict(X_test_pad)
if len(predictions.shape) > 1:
predictions = predictions[:, 0]
results = pd.DataFrame({'Predictions': predictions, 'Actual': y_test})
results.to_csv('beer_review_sentiment_predictions.csv', index=False)
y_pred = results['Predictions']
y_test = results['Actual']
y_test_binary = (y_test >= 3).astype(int)
accuracy = accuracy_score(y_test_binary, y_pred.round())
precision, recall, f1, _ = precision_recall_fscore_support(y_test_binary, y_pred.round(), average='micro')
rmse = sqrt(mean_squared_error(y_test, y_pred))
print(f'Accuracy: {accuracy}')
print(f'Micro-avg Precision: {precision}')
print(f'Micro-avg Recall: {recall}')
print(f'F1 Score: {f1}')
print(f'RMSE: {rmse}')
_run.add_resource('./beer_reviews_train.csv')
_run.add_resource('./beer_reviews_test.csv')
return accuracy