Compare commits

...

16 Commits

Author SHA1 Message Date
fe6319e61b Upload files to "/" 2024-05-21 16:20:49 +02:00
4e1008cbca Update Jenkinsfile 2024-05-07 16:07:19 +02:00
670957ba8f Update evaluate.py 2024-05-07 15:58:57 +02:00
c4b9b4ca46 Update evaluate.py 2024-05-06 12:15:43 +02:00
e52c554336 Update Jenkinsfile 2024-05-04 16:59:28 +02:00
b0e6b6642f Update Jenkinsfile 2024-05-04 16:58:27 +02:00
9047dc0d98 Update Jenkinsfile 2024-05-04 16:56:22 +02:00
30de175f0a Update Jenkinsfile 2024-05-04 16:54:33 +02:00
e4d9d63f70 Update Jenkinsfile 2024-05-04 16:48:01 +02:00
0352e5bccc Update Jenkinsfile 2024-05-04 16:46:48 +02:00
322f486b91 Update Jenkinsfile 2024-05-04 16:43:16 +02:00
6b9b324ae3 Add evaluate.sh 2024-05-04 16:41:57 +02:00
9a13f3db0a Upload files to "/" 2024-05-04 16:36:19 +02:00
0ad38f5210 Upload files to "/" 2024-05-04 16:09:36 +02:00
4605c55152 Update requirements.txt 2024-05-04 15:57:47 +02:00
99063a59de Update Jenkinsfile 2024-05-04 15:29:58 +02:00
5 changed files with 344 additions and 11 deletions

24
Jenkinsfile vendored
View File

@ -1,21 +1,31 @@
pipeline {
agent { dockerfile true }
options {
copyArtifactPermission('*');
}
triggers {
upstream(upstreamProjects: "s452487-training/train", threshold: hudson.model.Result.SUCCESS)
}
stages {
stage('Dataset download and stats') {
stage('evaluateDataset') {
steps {
sh "export KAGGLE_CONFIG_DIR='${env.WORKSPACE}/kaggle_config/'"
copyArtifacts filter: '*', fingerprintArtifacts: true, projectName: 'z-s452487-create-dataset'
copyArtifacts filter: '*', fingerprintArtifacts: true, projectName: 's452487-training/train'
copyArtifacts filter: '*', fingerprintArtifacts: true, projectName: 's452487-evaluation/evaluate', optional: true
sh "chmod +x -R ${env.WORKSPACE}"
sh './dataset_download_and_run.sh'
sh './evaluate.sh'
}
}
stage('archiveArtifacts') {
steps {
echo "Zapisywanie artefaktów..."
archiveArtifacts 'test.csv'
archiveArtifacts 'train.csv'
archiveArtifacts 'valid.csv'
archiveArtifacts 'extracted_dataset/2022/processed.csv'
archiveArtifacts 'metrics.jpg'
archiveArtifacts 'metrics.json'
archiveArtifacts 'predictions.txt'
archiveArtifacts 'predictions_two_digits.txt'
}
}
}

236
environment.yml Normal file
View File

@ -0,0 +1,236 @@
name: ML
channels:
- defaults
- conda-forge
dependencies:
- _tflow_select=2.3.0=mkl
- absl-py=2.1.0=py39haa95532_0
- aiohttp=3.9.5=py39h2bbff1b_0
- aiosignal=1.2.0=pyhd3eb1b0_0
- anyio=4.2.0=py39haa95532_0
- argon2-cffi=21.3.0=pyhd3eb1b0_0
- argon2-cffi-bindings=21.2.0=py39h2bbff1b_0
- asttokens=2.0.5=pyhd3eb1b0_0
- astunparse=1.6.3=py_0
- async-lru=2.0.4=py39haa95532_0
- async-timeout=4.0.3=py39haa95532_0
- attrs=23.1.0=py39haa95532_0
- babel=2.11.0=py39haa95532_0
- backcall=0.2.0=pyhd3eb1b0_0
- beautifulsoup4=4.12.2=py39haa95532_0
- blas=1.0=mkl
- bleach=4.1.0=pyhd3eb1b0_0
- blinker=1.6.2=py39haa95532_0
- bottleneck=1.3.7=py39h9128911_0
- brotli=1.0.9=h2bbff1b_8
- brotli-bin=1.0.9=h2bbff1b_8
- brotli-python=1.0.9=py39hd77b12b_8
- bzip2=1.0.8=h2bbff1b_6
- ca-certificates=2024.3.11=haa95532_0
- cachetools=5.3.3=py39haa95532_0
- certifi=2024.2.2=py39haa95532_0
- cffi=1.16.0=py39h2bbff1b_1
- charset-normalizer=2.0.4=pyhd3eb1b0_0
- click=8.1.7=py39haa95532_0
- colorama=0.4.6=py39haa95532_0
- comm=0.2.1=py39haa95532_0
- contourpy=1.2.0=py39h59b6b97_0
- cryptography=41.0.3=py39h3438e0d_0
- cycler=0.11.0=pyhd3eb1b0_0
- debugpy=1.6.7=py39hd77b12b_0
- decorator=5.1.1=pyhd3eb1b0_0
- defusedxml=0.7.1=pyhd3eb1b0_0
- exceptiongroup=1.2.0=py39haa95532_0
- executing=0.8.3=pyhd3eb1b0_0
- flatbuffers=2.0.0=h6c2663c_0
- fonttools=4.51.0=py39h2bbff1b_0
- freetype=2.12.1=ha860e81_0
- frozenlist=1.4.0=py39h2bbff1b_0
- gast=0.4.0=pyhd3eb1b0_0
- giflib=5.2.1=h8cc25b3_3
- glib=2.78.4=hd77b12b_0
- glib-tools=2.78.4=hd77b12b_0
- google-auth=2.29.0=py39haa95532_0
- google-auth-oauthlib=0.4.4=pyhd3eb1b0_0
- google-pasta=0.2.0=pyhd3eb1b0_0
- grpcio=1.42.0=py39hc60d5dd_0
- gst-plugins-base=1.18.5=h9e645db_0
- gstreamer=1.18.5=hd78058f_0
- h5py=3.11.0=py39hed405ee_0
- hdf5=1.12.1=h51c971a_3
- icc_rt=2022.1.0=h6049295_2
- icu=58.2=ha925a31_3
- idna=3.7=py39haa95532_0
- importlib-metadata=7.0.1=py39haa95532_0
- importlib_metadata=7.0.1=hd3eb1b0_0
- importlib_resources=6.1.1=py39haa95532_1
- intel-openmp=2023.1.0=h59b6b97_46320
- ipykernel=6.28.0=py39haa95532_0
- ipython=8.15.0=py39haa95532_0
- ipywidgets=8.1.2=py39haa95532_0
- jedi=0.18.1=py39haa95532_1
- jinja2=3.1.3=py39haa95532_0
- joblib=1.4.0=py39haa95532_0
- jpeg=9e=h2bbff1b_1
- json5=0.9.6=pyhd3eb1b0_0
- jsonschema=4.19.2=py39haa95532_0
- jsonschema-specifications=2023.7.1=py39haa95532_0
- jupyter=1.0.0=py39haa95532_9
- jupyter-lsp=2.2.0=py39haa95532_0
- jupyter_client=8.6.0=py39haa95532_0
- jupyter_console=6.6.3=py39haa95532_0
- jupyter_core=5.5.0=py39haa95532_0
- jupyter_events=0.8.0=py39haa95532_0
- jupyter_server=2.10.0=py39haa95532_0
- jupyter_server_terminals=0.4.4=py39haa95532_1
- jupyterlab=4.0.11=py39haa95532_0
- jupyterlab_pygments=0.1.2=py_0
- jupyterlab_server=2.25.1=py39haa95532_0
- jupyterlab_widgets=3.0.10=py39haa95532_0
- keras=2.10.0=py39haa95532_0
- keras-preprocessing=1.1.2=pyhd3eb1b0_0
- kiwisolver=1.4.4=py39hd77b12b_0
- krb5=1.19.4=h5b6d351_0
- lcms2=2.12=h83e58a3_0
- lerc=3.0=hd77b12b_0
- libbrotlicommon=1.0.9=h2bbff1b_8
- libbrotlidec=1.0.9=h2bbff1b_8
- libbrotlienc=1.0.9=h2bbff1b_8
- libclang=14.0.6=default_hb5a9fac_1
- libclang13=14.0.6=default_h8e68704_1
- libcurl=8.7.1=h86230a5_0
- libdeflate=1.17=h2bbff1b_1
- libffi=3.4.4=hd77b12b_1
- libglib=2.78.4=ha17d25a_0
- libiconv=1.16=h2bbff1b_3
- libogg=1.3.5=h2bbff1b_1
- libpng=1.6.39=h8cc25b3_0
- libprotobuf=3.20.3=h23ce68f_0
- libsodium=1.0.18=h62dcd97_0
- libssh2=1.10.0=hcd4344a_2
- libtiff=4.5.1=hd77b12b_0
- libvorbis=1.3.7=he774522_0
- libwebp-base=1.3.2=h2bbff1b_0
- lz4-c=1.9.4=h2bbff1b_1
- markdown=3.4.1=py39haa95532_0
- markupsafe=2.1.3=py39h2bbff1b_0
- matplotlib-base=3.8.4=py39h4ed8f06_0
- matplotlib-inline=0.1.6=py39haa95532_0
- mistune=2.0.4=py39haa95532_0
- mkl=2023.1.0=h6b88ed4_46358
- mkl-service=2.4.0=py39h2bbff1b_1
- mkl_fft=1.3.8=py39h2bbff1b_0
- mkl_random=1.2.4=py39h59b6b97_0
- multidict=6.0.4=py39h2bbff1b_0
- nbclient=0.8.0=py39haa95532_0
- nbconvert=7.10.0=py39haa95532_0
- nbformat=5.9.2=py39haa95532_0
- nest-asyncio=1.6.0=py39haa95532_0
- notebook=7.0.8=py39haa95532_0
- notebook-shim=0.2.3=py39haa95532_0
- numexpr=2.8.7=py39h2cd9be0_0
- numpy=1.26.4=py39h055cbcc_0
- numpy-base=1.26.4=py39h65a83cf_0
- oauthlib=3.2.2=py39haa95532_0
- openjpeg=2.4.0=h4fc8c34_0
- openssl=1.1.1w=h2bbff1b_0
- opt_einsum=3.3.0=pyhd3eb1b0_1
- overrides=7.4.0=py39haa95532_0
- packaging=23.2=py39haa95532_0
- pandas=2.2.1=py39h5da7b33_0
- pandocfilters=1.5.0=pyhd3eb1b0_0
- parso=0.8.3=pyhd3eb1b0_0
- pcre2=10.42=h0ff8eda_1
- pickleshare=0.7.5=pyhd3eb1b0_1003
- pillow=10.3.0=py39h2bbff1b_0
- pip=24.0=py39haa95532_0
- platformdirs=3.10.0=py39haa95532_0
- ply=3.11=py39haa95532_0
- prometheus_client=0.14.1=py39haa95532_0
- prompt-toolkit=3.0.43=py39haa95532_0
- prompt_toolkit=3.0.43=hd3eb1b0_0
- protobuf=3.20.3=py39hd77b12b_0
- psutil=5.9.0=py39h2bbff1b_0
- pure_eval=0.2.2=pyhd3eb1b0_0
- pyasn1=0.4.8=pyhd3eb1b0_0
- pyasn1-modules=0.2.8=py_0
- pybind11-abi=5=hd3eb1b0_0
- pycparser=2.21=pyhd3eb1b0_0
- pygments=2.15.1=py39haa95532_1
- pyjwt=2.8.0=py39haa95532_0
- pyopenssl=23.2.0=py39haa95532_0
- pyparsing=3.0.9=py39haa95532_0
- pyqt=5.15.10=py39hd77b12b_0
- pyqt5-sip=12.13.0=py39h2bbff1b_0
- pysocks=1.7.1=py39haa95532_0
- python=3.9.18=h6244533_0
- python-dateutil=2.9.0post0=py39haa95532_0
- python-fastjsonschema=2.16.2=py39haa95532_0
- python-flatbuffers=2.0=pyhd3eb1b0_0
- python-json-logger=2.0.7=py39haa95532_0
- python-tzdata=2023.3=pyhd3eb1b0_0
- pytz=2024.1=py39haa95532_0
- pywin32=305=py39h2bbff1b_0
- pywinpty=2.0.10=py39h5da7b33_0
- pyyaml=6.0.1=py39h2bbff1b_0
- pyzmq=25.1.2=py39hd77b12b_0
- qt-main=5.15.2=he8e5bd7_8
- qtconsole=5.5.1=py39haa95532_0
- qtpy=2.4.1=py39haa95532_0
- referencing=0.30.2=py39haa95532_0
- requests=2.31.0=py39haa95532_1
- requests-oauthlib=1.3.0=py_0
- rfc3339-validator=0.1.4=py39haa95532_0
- rfc3986-validator=0.1.1=py39haa95532_0
- rpds-py=0.10.6=py39h062c2fa_0
- rsa=4.7.2=pyhd3eb1b0_1
- scikit-learn=1.4.2=py39h4ed8f06_1
- scipy=1.13.0=py39h8640f81_0
- seaborn=0.12.2=py39haa95532_0
- send2trash=1.8.2=py39haa95532_0
- setuptools=69.5.1=py39haa95532_0
- sip=6.7.12=py39hd77b12b_0
- six=1.16.0=pyhd3eb1b0_1
- snappy=1.1.10=h6c2663c_1
- sniffio=1.3.0=py39haa95532_0
- soupsieve=2.5=py39haa95532_0
- sqlite=3.45.3=h2bbff1b_0
- stack_data=0.2.0=pyhd3eb1b0_0
- tbb=2021.8.0=h59b6b97_0
- tensorboard=2.10.0=py39haa95532_0
- tensorboard-data-server=0.6.1=py39haa95532_0
- tensorboard-plugin-wit=1.8.1=py39haa95532_0
- tensorflow=2.10.0=mkl_py39ha510bab_0
- tensorflow-base=2.10.0=mkl_py39h6a7f48e_0
- tensorflow-estimator=2.10.0=py39haa95532_0
- termcolor=2.1.0=py39haa95532_0
- terminado=0.17.1=py39haa95532_0
- threadpoolctl=2.2.0=pyh0d69192_0
- tinycss2=1.2.1=py39haa95532_0
- tomli=2.0.1=py39haa95532_0
- tornado=6.3.3=py39h2bbff1b_0
- traitlets=5.7.1=py39haa95532_0
- typing-extensions=4.11.0=py39haa95532_0
- typing_extensions=4.11.0=py39haa95532_0
- tzdata=2024a=h04d1e81_0
- unicodedata2=15.1.0=py39h2bbff1b_0
- urllib3=2.2.1=py39haa95532_0
- vc=14.2=h2eaa2aa_1
- vs2015_runtime=14.29.30133=h43f2093_3
- wcwidth=0.2.5=pyhd3eb1b0_0
- webencodings=0.5.1=py39haa95532_1
- websocket-client=1.8.0=py39haa95532_0
- werkzeug=2.3.8=py39haa95532_0
- wheel=0.43.0=py39haa95532_0
- widgetsnbextension=4.0.10=py39haa95532_0
- win_inet_pton=1.1.0=py39haa95532_0
- winpty=0.4.3=4
- wrapt=1.14.1=py39h2bbff1b_0
- xz=5.4.6=h8cc25b3_1
- yaml=0.2.5=he774522_0
- yarl=1.9.3=py39h2bbff1b_0
- zeromq=4.3.5=hd77b12b_0
- zipp=3.17.0=py39haa95532_0
- zlib=1.2.13=h8cc25b3_1
- zstd=1.5.5=hd43e919_2
prefix: C:\Users\Adrian\miniconda3\envs\ML

82
evaluate.py Normal file
View File

@ -0,0 +1,82 @@
import pandas as pd
valid = pd.read_csv("valid.csv")
x_columns = ['Male', 'GeneralHealth', 'PhysicalHealthDays', 'MentalHealthDays',
'PhysicalActivities', 'SleepHours', 'RemovedTeeth',
'HadAngina', 'HadStroke', 'HadAsthma', 'HadSkinCancer', 'HadCOPD',
'HadDepressiveDisorder', 'HadKidneyDisease', 'HadArthritis',
'HadDiabetes', 'DeafOrHardOfHearing', 'BlindOrVisionDifficulty',
'DifficultyConcentrating', 'DifficultyWalking',
'DifficultyDressingBathing', 'DifficultyErrands', 'SmokerStatus',
'ECigaretteUsage', 'ChestScan', 'HeightInMeters', 'WeightInKilograms',
'BMI', 'AlcoholDrinkers', 'HIVTesting', 'FluVaxLast12', 'PneumoVaxEver',
'TetanusLast10Tdap', 'HighRiskLastYear', 'CovidPos']
y_column = 'HadHeartAttack'
valid_x = valid[x_columns]
valid_y = valid[y_column]
from tensorflow import keras
model = keras.models.load_model('model.keras')
import numpy as np
predictions = model.predict(valid_x)[:,0]
true_answers = valid_y.to_numpy()
validation_accuracy = np.sum(np.rint(predictions) == true_answers)/len(true_answers)
print(f"Poprawność na zbiorze walidacyjnym: {validation_accuracy:.2%}")
np.savetxt("predictions.txt",predictions)
np.savetxt("predictions_two_digits.txt",predictions, fmt='%1.2f')
validate_heart_disease_true = valid.loc[valid[y_column]==1]
validate_heart_disease_false = valid.loc[valid[y_column]==0]
from datetime import timezone
import datetime
import json
validate_heart_disease_true_x = validate_heart_disease_true[x_columns]
validate_heart_disease_false_x = validate_heart_disease_false[x_columns]
predictions_for_true = model.predict(validate_heart_disease_true_x)[:,0]
predictions_for_false = model.predict(validate_heart_disease_false_x)[:,0]
true_positives = np.sum(np.rint(predictions_for_true) == np.ones_like(predictions_for_true)).tolist()
true_negatives = np.sum(np.rint(predictions_for_false) == np.zeros_like(predictions_for_false)).tolist()
false_positives = len(predictions_for_false)-true_negatives
false_negatives = len(predictions_for_true)-true_positives
current_datetime = datetime.datetime.now(timezone.utc)
metrics = {"true_positives": true_positives, "true_negatives": true_negatives, "false_positives": false_positives, "false_negatives" : false_negatives, "datetime_utc" : str(current_datetime)}
history = []
try:
with open("metrics.json","r") as f:
history = json.load(f)
except FileNotFoundError:
print('No historical metrics found')
history.append(metrics)
with open("metrics.json","w") as f:
json.dump(history, f)
import matplotlib.pyplot as plt
true_positives_history = [x["true_positives"] for x in history]
true_negatives_history = [x["true_negatives"] for x in history]
false_positives_history = [x["false_positives"] for x in history]
false_negatives_history = [x["false_negatives"] for x in history]
plt.plot(true_positives_history, "-go")
plt.plot(true_negatives_history, "-bo")
plt.plot(false_positives_history, "-ro")
plt.plot(false_negatives_history, "-mo")
plt.legend(["True positives", "True negatives", "False positives", "False negatives"])
plt.xlabel("Build number")
plt.ylabel("Metric value")
plt.title("Model evaluation history")
plt.savefig("metrics.jpg")

1
evaluate.sh Normal file
View File

@ -0,0 +1 @@
python ./evaluate.py

View File

@ -2,3 +2,7 @@ kaggle
pandas
scikit-learn
seaborn
tensorflow
keras
numpy
matplotlib