Update Jenkinsfile

Update plot.py
Update Jenkinsfile
2024-05-04 16:48:31 +02:00 · 2024-04-30 19:32:52 +02:00 · 2024-04-30 19:30:32 +02:00 · 2024-04-30 19:25:40 +02:00 · 2024-04-30 19:09:17 +02:00 · 2024-04-30 19:02:47 +02:00
23 changed files with 697409 additions and 655 deletions
--- a/.dvc/.gitignore
+++ b/.dvc/.gitignore
@ -1,3 +0,0 @@
-/config.local
-/tmp
-/cache
--- a/.dvc/config
+++ b/.dvc/config
@ -1,4 +0,0 @@
-[core]
-    remote = ium_ssh_remote
-['remote "ium_ssh_remote"']
-    url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
--- a/.dvcignore
+++ b/.dvcignore
@ -1,3 +0,0 @@
-# Add patterns of files dvc should ignore, which could improve
-# the performance. Learn more at
-# https://dvc.org/doc/user-guide/dvcignore
--- a/.env
+++ b/.env
@ -1,5 +0,0 @@
-MONGO_INITDB_ROOT_USERNAME=admin
-MONGO_INITDB_ROOT_PASSWORD=IUM_2021
-ME_CONFIG_BASICAUTH_USERNAME=mongo_express_user
-ME_CONFIG_BASICAUTH_PASSWORD=mongo_express_pw
-MONGO_DATABASE=sacred
--- a/.gitignore
+++ b/.gitignore
@ -1 +0,0 @@
-/covtype.csv
--- a/.ipynb_checkpoints/IUM_2-checkpoint.ipynb
+++ b/.ipynb_checkpoints/IUM_2-checkpoint.ipynb
@ -0,0 +1,95 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install --user kaggle \n",
+    "%pip install --user pandas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "ERROR: Could not find a version that satisfies the requirement git (from versions: none)\n",
+      "ERROR: No matching distribution found for git\n",
+      "\n",
+      "[notice] A new release of pip is available: 23.1.2 -> 24.0\n",
+      "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pip install git"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Download data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!kaggle datasets download -d nasa/meteorite-landings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!tar -xf  meteorite-landings.zip"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/2
+++ b/2
@ -4,7 +4,7 @@ RUN apt update && apt install -y python3-pip
 RUN apt install unzip
 RUN apt install bc

-RUN pip3 install kaggle pandas scikit-learn torch sacred pymongo 
+RUN pip3 install kaggle pandas scikit-learn torch matplotlib

 WORKDIR /app

--- a/67
+++ b/67
@ -1,60 +1,59 @@
 pipeline {
    agent any
    parameters {
-    string(name: 'KAGGLE_USERNAME', defaultValue: 'alicjaszulecka', description: 'Kaggle username')
-    password(name: 'KAGGLE_KEY', defaultValue:'', description: 'Kaggle Key')
-    string(name: 'CUTOFF', defaultValue: '100', description: 'cut off number')
-    }
+            buildSelector (
+                defaultSelector: lastSuccessful(),
+                description: 'Build for copying artifacts',
+                name: 'BUILD_SELECTOR'
+            )
+            gitParameter branchFilter: 'origin/(.*)', defaultValue: 'model', name: 'BRANCH', type: 'PT_BRANCH'
+        }
+        triggers {
+            upstream(upstreamProjects: 's464914-training/' + params.BRANCH + '/', threshold: hudson.model.Result.SUCCESS)
+        }
    stages {
        stage('Git Checkout') {
            steps {
               checkout scm
            }
        }
-         stage('Download dataset') {
-      steps {
-        withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
-          sh 'pip install kaggle'
-          sh 'kaggle datasets download -d uciml/forest-cover-type-dataset'
-          sh 'unzip -o forest-cover-type-dataset.zip'
-          sh 'rm forest-cover-type-dataset.zip'
+     stage('Copy Artifacts') {
+            steps {
+               copyArtifacts fingerprintArtifacts: true, projectName: 'z-s464914-create-dataset', selector: buildParameter('BUILD_SELECTOR')
+               copyArtifacts filter: '*', projectName: 's464914-training/' + params.BRANCH + '/', selector: buildParameter('BUILD_SELECTOR')
+               copyArtifacts filter: '*', projectName: 's464914-evaluation/evaluation/', selector: buildParameter('BUILD_SELECTOR'), optional: true
+            }
        }
-      }
-    }
-     stage('Build') {
-         steps {
-            script {
-                 withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
-                          "KAGGLE_KEY=${params.KAGGLE_KEY}" ]) {
-                    def customImage = docker.build("custom-image")
-                    customImage.inside {
-                        sh 'python3 ./IUM_2.py'
-                        archiveArtifacts artifacts: 'covtype.csv, forest_train.csv, forest_test.csv, forest_val.csv', onlyIfSuccessful: true
-                    }
-                 }
-             }
-         }
-         }
-
-         stage('Train and Predict') {
+         stage('Prediction') {
            steps {
              script {
               def customImage = docker.build("custom-image")
                    customImage.inside {
-                        sh 'python3 ./model.py'
                        sh 'python3 ./prediction.py'
-                        archiveArtifacts artifacts: 'model.pth, predictions.txt', onlyIfSuccessful: true
+                        archiveArtifacts artifacts: 'predictions.txt', onlyIfSuccessful: true
                    }
              }
            }
        }
-        stage('Experiments') {
+        stage('Metrics') {
            steps {
              script {
               def customImage = docker.build("custom-image")
                    customImage.inside {
-                        sh 'python3 ./sacred_model.py'
-                        archiveArtifacts artifacts: 'experiments', onlyIfSuccessful: true
+                        sh 'python3 ./metrics.py'
+                        archiveArtifacts artifacts: 'metrics.txt', onlyIfSuccessful: true
+                    }
+              }
+            }
+        }
+
+        stage('Plot Accuracy') {
+            steps {
+              script {
+               def customImage = docker.build("custom-image")
+                    customImage.inside {
+                        sh 'python3 ./plot.py'
+                        archiveArtifacts artifacts: 'accuracy.png', onlyIfSuccessful: true
                    }
              }
            }
--- a/covtype.csv
+++ b/covtype.csv
--- a/covtype.csv.dvc
+++ b/covtype.csv.dvc
@ -1,5 +0,0 @@
-outs:
- md5: e88c3c209db2e8982e07c43462d67c87
-  size: 75170064
-  hash: md5
-  path: covtype.csv
--- a/dvc.yaml
+++ b/dvc.yaml
@ -1,29 +0,0 @@
-stages:
-  prepare_data:
-    cmd: python ./IUM_2.py
-    deps:
-      - create-dataset.py
-      - covtype.csv
-    outs:
-      - forest_train.csv
-      - forest_test.csv
-      - forest_val.csv
-
-  train_model:
-    cmd: python ./model.py
-    deps:
-      - model.py
-      - forest_train.csv
-      - forest_test.csv
-      - forest_val.csv
-    outs:
-      - model.pth
-
-  evaluate_model:
-    cmd: python ./prediction.py
-    deps:
-      - prediction.py
-      - model.pth
-      - forest_test.csv
-    outs:
-      - predictions.txt
--- a/environment.yml
+++ b/environment.yml
@ -1,189 +0,0 @@
-name: IUM
-channels:
-  - defaults
-dependencies:
-  - _tflow_select=2.3.0=mkl
-  - abseil-cpp=20211102.0=hd77b12b_0
-  - absl-py=2.1.0=py310haa95532_0
-  - aiohttp=3.9.5=py310h2bbff1b_0
-  - aiosignal=1.2.0=pyhd3eb1b0_0
-  - alembic=1.8.1=py310haa95532_0
-  - aniso8601=9.0.1=pyhd3eb1b0_0
-  - arrow-cpp=11.0.0=h2c9b28c_2
-  - astunparse=1.6.3=py_0
-  - async-timeout=4.0.3=py310haa95532_0
-  - attrs=23.1.0=py310haa95532_0
-  - aws-c-common=0.4.57=ha925a31_1
-  - aws-c-event-stream=0.1.6=hd77b12b_5
-  - aws-checksums=0.1.9=ha925a31_0
-  - aws-sdk-cpp=1.8.185=hd77b12b_0
-  - bcrypt=3.2.0=py310h2bbff1b_1
-  - blas=1.0=mkl
-  - blinker=1.6.2=py310haa95532_0
-  - boost-cpp=1.82.0=h59b6b97_2
-  - bottleneck=1.3.7=py310h9128911_0
-  - brotli=1.0.9=h2bbff1b_8
-  - brotli-bin=1.0.9=h2bbff1b_8
-  - brotli-python=1.0.9=py310hd77b12b_8
-  - bzip2=1.0.8=h2bbff1b_6
-  - c-ares=1.19.1=h2bbff1b_0
-  - ca-certificates=2024.3.11=haa95532_0
-  - cachetools=5.3.3=py310haa95532_0
-  - certifi=2024.2.2=py310haa95532_0
-  - cffi=1.16.0=py310h2bbff1b_1
-  - charset-normalizer=2.0.4=pyhd3eb1b0_0
-  - click=8.1.7=py310haa95532_0
-  - cloudpickle=2.2.1=py310haa95532_0
-  - colorama=0.4.6=py310haa95532_0
-  - contourpy=1.2.0=py310h59b6b97_0
-  - cryptography=41.0.3=py310h3438e0d_0
-  - cycler=0.11.0=pyhd3eb1b0_0
-  - docker-py=7.0.0=py310haa95532_0
-  - entrypoints=0.4=py310haa95532_0
-  - flask=2.2.5=py310haa95532_0
-  - flatbuffers=2.0.0=h6c2663c_0
-  - fonttools=4.51.0=py310h2bbff1b_0
-  - freetype=2.12.1=ha860e81_0
-  - frozenlist=1.4.0=py310h2bbff1b_0
-  - gast=0.4.0=pyhd3eb1b0_0
-  - gflags=2.2.2=hd77b12b_1
-  - giflib=5.2.1=h8cc25b3_3
-  - gitdb=4.0.7=pyhd3eb1b0_0
-  - gitpython=3.1.37=py310haa95532_0
-  - glog=0.5.0=hd77b12b_1
-  - google-auth=2.29.0=py310haa95532_0
-  - google-auth-oauthlib=0.4.4=pyhd3eb1b0_0
-  - google-pasta=0.2.0=pyhd3eb1b0_0
-  - graphene=3.3=py310haa95532_0
-  - graphql-core=3.2.3=py310haa95532_1
-  - graphql-relay=3.2.0=py310haa95532_0
-  - greenlet=3.0.1=py310hd77b12b_0
-  - grpc-cpp=1.48.2=hf108199_0
-  - grpcio=1.48.2=py310hf108199_0
-  - h5py=3.11.0=py310hed405ee_0
-  - hdf5=1.12.1=h51c971a_3
-  - icc_rt=2022.1.0=h6049295_2
-  - icu=58.2=ha925a31_3
-  - idna=3.7=py310haa95532_0
-  - importlib-metadata=7.0.1=py310haa95532_0
-  - intel-openmp=2023.1.0=h59b6b97_46320
-  - itsdangerous=2.0.1=pyhd3eb1b0_0
-  - jinja2=3.1.3=py310haa95532_0
-  - joblib=1.4.0=py310haa95532_0
-  - jpeg=9e=h2bbff1b_1
-  - keras=2.10.0=py310haa95532_0
-  - keras-preprocessing=1.1.2=pyhd3eb1b0_0
-  - kiwisolver=1.4.4=py310hd77b12b_0
-  - krb5=1.20.1=h5b6d351_1
-  - lcms2=2.12=h83e58a3_0
-  - lerc=3.0=hd77b12b_0
-  - libboost=1.82.0=h3399ecb_2
-  - libbrotlicommon=1.0.9=h2bbff1b_8
-  - libbrotlidec=1.0.9=h2bbff1b_8
-  - libbrotlienc=1.0.9=h2bbff1b_8
-  - libclang=14.0.6=default_hb5a9fac_1
-  - libclang13=14.0.6=default_h8e68704_1
-  - libcurl=8.7.1=h86230a5_0
-  - libdeflate=1.17=h2bbff1b_1
-  - libevent=2.1.12=hcc03200_0
-  - libffi=3.4.4=hd77b12b_1
-  - libpng=1.6.39=h8cc25b3_0
-  - libpq=12.15=hb652d5d_1
-  - libprotobuf=3.20.3=h23ce68f_0
-  - libssh2=1.10.0=hcd4344a_2
-  - libthrift=0.15.0=he49ee6e_2
-  - libtiff=4.5.1=hd77b12b_0
-  - libwebp-base=1.3.2=h2bbff1b_0
-  - lz4-c=1.9.4=h2bbff1b_1
-  - mako=1.2.3=py310haa95532_0
-  - markdown=3.4.1=py310haa95532_0
-  - markupsafe=2.1.3=py310h2bbff1b_0
-  - matplotlib=3.8.4=py310haa95532_0
-  - matplotlib-base=3.8.4=py310h4ed8f06_0
-  - mkl=2023.1.0=h6b88ed4_46358
-  - mkl-service=2.4.0=py310h2bbff1b_1
-  - mkl_fft=1.3.8=py310h2bbff1b_0
-  - mkl_random=1.2.4=py310h59b6b97_0
-  - mlflow=2.12.2=py310hd1fac3c_0
-  - multidict=6.0.4=py310h2bbff1b_0
-  - numexpr=2.8.7=py310h2cd9be0_0
-  - numpy=1.26.4=py310h055cbcc_0
-  - numpy-base=1.26.4=py310h65a83cf_0
-  - oauthlib=3.2.2=py310haa95532_0
-  - openjpeg=2.4.0=h4fc8c34_0
-  - openssl=1.1.1w=h2bbff1b_0
-  - opt_einsum=3.3.0=pyhd3eb1b0_1
-  - orc=1.7.4=h623e30f_1
-  - packaging=23.2=py310haa95532_0
-  - pandas=2.2.1=py310h5da7b33_0
-  - paramiko=2.8.1=pyhd3eb1b0_0
-  - pillow=10.3.0=py310h2bbff1b_0
-  - pip=24.0=py310haa95532_0
-  - ply=3.11=py310haa95532_0
-  - protobuf=3.20.3=py310hd77b12b_0
-  - pyarrow=11.0.0=py310h790e06d_1
-  - pyasn1=0.4.8=pyhd3eb1b0_0
-  - pyasn1-modules=0.2.8=py_0
-  - pybind11-abi=5=hd3eb1b0_0
-  - pycparser=2.21=pyhd3eb1b0_0
-  - pyjwt=2.8.0=py310haa95532_0
-  - pynacl=1.5.0=py310h8cc25b3_0
-  - pyopenssl=23.2.0=py310haa95532_0
-  - pyqt=5.15.10=py310hd77b12b_0
-  - pyqt5-sip=12.13.0=py310h2bbff1b_0
-  - pysocks=1.7.1=py310haa95532_0
-  - python=3.10.13=h966fe2a_0
-  - python-dateutil=2.9.0post0=py310haa95532_0
-  - python-flatbuffers=2.0=pyhd3eb1b0_0
-  - python-tzdata=2023.3=pyhd3eb1b0_0
-  - pytz=2024.1=py310haa95532_0
-  - pywin32=305=py310h2bbff1b_0
-  - pyyaml=6.0.1=py310h2bbff1b_0
-  - qt-main=5.15.2=h6072711_9
-  - querystring_parser=1.2.4=py310haa95532_0
-  - re2=2022.04.01=hd77b12b_0
-  - requests=2.31.0=py310haa95532_1
-  - requests-oauthlib=1.3.0=py_0
-  - rsa=4.7.2=pyhd3eb1b0_1
-  - scikit-learn=1.4.2=py310h4ed8f06_1
-  - scipy=1.13.0=py310h8640f81_0
-  - setuptools=69.5.1=py310haa95532_0
-  - sip=6.7.12=py310hd77b12b_0
-  - six=1.16.0=pyhd3eb1b0_1
-  - smmap=4.0.0=pyhd3eb1b0_0
-  - snappy=1.1.10=h6c2663c_1
-  - sqlalchemy=2.0.25=py310h2bbff1b_0
-  - sqlite=3.45.3=h2bbff1b_0
-  - sqlparse=0.4.4=py310haa95532_0
-  - tbb=2021.8.0=h59b6b97_0
-  - tensorboard=2.10.0=py310haa95532_0
-  - tensorboard-data-server=0.6.1=py310haa95532_0
-  - tensorboard-plugin-wit=1.8.1=py310haa95532_0
-  - tensorflow=2.10.0=mkl_py310hd99672f_0
-  - tensorflow-base=2.10.0=mkl_py310h6a7f48e_0
-  - tensorflow-estimator=2.10.0=py310haa95532_0
-  - termcolor=2.1.0=py310haa95532_0
-  - threadpoolctl=2.2.0=pyh0d69192_0
-  - tk=8.6.14=h0416ee5_0
-  - tornado=6.3.3=py310h2bbff1b_0
-  - typing-extensions=4.11.0=py310haa95532_0
-  - typing_extensions=4.11.0=py310haa95532_0
-  - tzdata=2024a=h04d1e81_0
-  - unicodedata2=15.1.0=py310h2bbff1b_0
-  - urllib3=2.2.1=py310haa95532_0
-  - utf8proc=2.6.1=h2bbff1b_1
-  - vc=14.2=h2eaa2aa_1
-  - vs2015_runtime=14.29.30133=h43f2093_3
-  - waitress=2.0.0=pyhd3eb1b0_0
-  - websocket-client=1.8.0=py310haa95532_0
-  - werkzeug=2.3.8=py310haa95532_0
-  - wheel=0.43.0=py310haa95532_0
-  - win_inet_pton=1.1.0=py310haa95532_0
-  - wrapt=1.14.1=py310h2bbff1b_0
-  - xz=5.4.6=h8cc25b3_1
-  - yaml=0.2.5=he774522_0
-  - yarl=1.9.3=py310h2bbff1b_0
-  - zipp=3.17.0=py310haa95532_0
-  - zlib=1.2.13=h8cc25b3_1
-  - zstd=1.5.5=hd43e919_2
-prefix: C:\Users\Genos\miniconda3\envs\IUM
--- a/metrics.py
+++ b/metrics.py
@ -0,0 +1,24 @@
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error
+import numpy as np
+
+true_labels = []
+predicted_labels = []
+
+f = open("predictions.txt", "r")
+for line in f:
+  parts = line.strip().split(' ')
+  true_labels.append(int(parts[3]))
+  predicted_labels.append(int(parts[1]))
+
+accuracy = accuracy_score(true_labels, predicted_labels)
+precision_micro = precision_score(true_labels, predicted_labels, average='micro')
+recall_micro = recall_score(true_labels, predicted_labels, average='micro')
+f1_micro = f1_score(true_labels, predicted_labels, average='micro')
+rmse = np.sqrt(mean_squared_error(true_labels, predicted_labels))
+
+with open(r'metrics.txt', 'a') as fp:
+    fp.write(f"Accuracy: {accuracy}\n")
+    fp.write(f"Precision: {precision_micro}\n")
+    fp.write(f"Recall: {recall_micro}\n")
+    fp.write(f"F1-score: {f1_micro}\n")
+    fp.write(f"RMSE: {rmse}\n")
--- a/mlflow/Dockerfile
+++ b/mlflow/Dockerfile
@ -1,15 +0,0 @@
-FROM python:3.10
-
-RUN pip install --upgrade pip
-
-RUN pip3 install mlflow
-RUN pip3 install scikit-learn
-RUN pip3 install pandas
-RUN pip3 install numpy 
-RUN pip3 install torch
-
-COPY mlflow_model.py .
-COPY mlflow_prediction.py .
-COPY forest_test.csv .
-COPY forest_train.csv .
-COPY forest_val.csv .
--- a/mlflow/MLProject
+++ b/mlflow/MLProject
@ -1,13 +0,0 @@
-name: mlflow_464914
-
-# conda_env: conda.yaml #ścieżka do pliku conda.yaml z definicją środowisk
-docker_env:
- image: mlflow_image
-
-entry_points:
-  main:
-    parameters:
-      epochs: {type: int, default: 10}
-    command: "python mlflow_model.py {epochs}"
-  test:
-    command: "python mlflow_prediction.py"
--- a/mlflow/mlflow_model.py
+++ b/mlflow/mlflow_model.py
@ -1,120 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.optim as optim
-from torch.utils.data import DataLoader, Dataset
-import pandas as pd
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import LabelEncoder
-import torch.nn.functional as F
-import mlflow
-import mlflow.sklearn
-import sys
-
-mlflow.set_tracking_uri("http://localhost:5000")
-mlflow.set_experiment("s464914")
- 
-
-device = (
-    "cuda"
-    if torch.cuda.is_available()
-    else "cpu"
-)
-
-class Model(nn.Module):
-    def __init__(self, input_features=54, hidden_layer1=25, hidden_layer2=30, output_features=8):
-        super().__init__()
-        self.fc1 = nn.Linear(input_features,output_features)
-        self.bn1 = nn.BatchNorm1d(hidden_layer1)  # Add batch normalization
-        self.fc2 = nn.Linear(hidden_layer1, hidden_layer2)
-        self.bn2 = nn.BatchNorm1d(hidden_layer2)  # Add batch normalization
-        self.out = nn.Linear(hidden_layer2, output_features)
-        
-    def forward(self, x):
-        x = F.relu(self.fc1(x))  # Apply batch normalization after first linear layer
-        #x = F.relu(self.bn2(self.fc2(x)))  # Apply batch normalization after second linear layer
-        #x = self.out(x)
-        return x
-
-def main():
-    epochs = int(sys.argv[1])
-    forest_train = pd.read_csv('forest_train.csv')
-    forest_val = pd.read_csv('forest_val.csv')
-
-    print(forest_train.head())
-
-
-    X_train = forest_train.drop(columns=['Cover_Type']).values
-    y_train = forest_train['Cover_Type'].values
-
-    X_val = forest_val.drop(columns=['Cover_Type']).values
-    y_val = forest_val['Cover_Type'].values
-
-
-    # Initialize model, loss function, and optimizer
-    model = Model().to(device)
-    criterion = nn.CrossEntropyLoss()
-    optimizer = optim.Adam(model.parameters(), lr=0.001)
-
-    # Convert to PyTorch tensors
-    X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
-    y_train = torch.tensor(y_train, dtype=torch.long).to(device)
-    X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
-    y_val = torch.tensor(y_val, dtype=torch.long).to(device)
-
-    # Create DataLoader
-    train_loader = DataLoader(list(zip(X_train, y_train)), batch_size=64, shuffle=True)
-    val_loader = DataLoader(list(zip(X_val, y_val)), batch_size=64)
-
-    with mlflow.start_run() as run:
-        # Training loop
-        for epoch in range(epochs):
-            model.train()  # Set model to training mode
-            running_loss = 0.0
-            for inputs, labels in train_loader:
-                inputs, labels = inputs.to(device), labels.to(device)
-
-                optimizer.zero_grad()
-
-                outputs = model(inputs)
-                loss = criterion(outputs, labels)
-                loss.backward()
-                optimizer.step()
-
-                running_loss += loss.item() * inputs.size(0)
-
-            # Calculate training loss
-            epoch_loss = running_loss / len(train_loader.dataset)
-
-            # Validation
-            model.eval()  # Set model to evaluation mode
-            val_running_loss = 0.0
-            correct = 0
-            total = 0
-            with torch.no_grad():
-                for inputs, labels in val_loader:
-                    inputs, labels = inputs.to(device), labels.to(device)
-
-                    outputs = model(inputs)
-                    val_loss = criterion(outputs, labels)
-                    val_running_loss += val_loss.item() * inputs.size(0)
-
-                    _, predicted = torch.max(outputs, 1)
-                    total += labels.size(0)
-                    correct += (predicted == labels).sum().item()
-
-            # Calculate validation loss and accuracy
-            val_epoch_loss = val_running_loss / len(val_loader.dataset)
-            val_accuracy = correct / total
-
-            print(f"Epoch {epoch+1}/{epochs}, "
-                f"Train Loss: {epoch_loss:.4f}, "
-                f"Val Loss: {val_epoch_loss:.4f}, "
-                f"Val Accuracy: {val_accuracy:.4f}")
-            
-
-        torch.save(model.state_dict(), 'model.pth')
-        mlflow.log_param("epochs", epochs)
-
-
-if __name__ == "__main__":
-    main()
--- a/mlflow/mlflow_prediction.py
+++ b/mlflow/mlflow_prediction.py
@ -1,95 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.optim as optim
-from torch.utils.data import DataLoader, Dataset
-import pandas as pd
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import LabelEncoder
-import torch.nn.functional as F
-from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error
-import numpy as np
-import mlflow
-import mlflow.sklearn
-
-mlflow.set_tracking_uri("http://localhost:5000")
-mlflow.set_experiment("s464914")
-
-device = (
-    "cuda"
-    if torch.cuda.is_available()
-    else "cpu"
-)
-
-class Model(nn.Module):
-    def __init__(self, input_features=54, hidden_layer1=25, hidden_layer2=30, output_features=8):
-        super().__init__()
-        self.fc1 = nn.Linear(input_features,output_features)
-        self.bn1 = nn.BatchNorm1d(hidden_layer1)  # Add batch normalization
-        self.fc2 = nn.Linear(hidden_layer1, hidden_layer2)
-        self.bn2 = nn.BatchNorm1d(hidden_layer2)  # Add batch normalization
-        self.out = nn.Linear(hidden_layer2, output_features)
-        
-    def forward(self, x):
-        x = F.relu(self.fc1(x)) 
-        return x
-
-def load_model(model, model_path):
-    model.load_state_dict(torch.load(model_path))
-    model.eval()
-
-def predict(model, input_data):
-    # Convert input data to PyTorch tensor
-    
-    # Perform forward pass
-    with torch.no_grad():
-        output = model(input_data)
-
-    _, predicted_class = torch.max(output, 0)
-    
-    return predicted_class.item()  # Return the predicted class label
-
-def main():
-    with mlflow.start_run() as run:
-        forest_test = pd.read_csv('forest_test.csv')
-
-        X_test = forest_test.drop(columns=['Cover_Type']).values
-        y_test = forest_test['Cover_Type'].values
-
-        X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
-
-        model = Model().to(device)
-        model_path = 'model.pth'  # Path to your saved model file
-        load_model(model, model_path)
-
-        predictions = []
-        true_labels = []
-        with torch.no_grad():
-            for input_data, target in zip(X_test, y_test):
-                output = model(input_data)
-                _, predicted_class = torch.max(output, 0)
-                prediction_entry = f"predicted: {predicted_class.item()} true_label: {target}"
-                predictions.append(prediction_entry)
-                true_labels.append()
-                if predicted_class.item() == target:
-                    true_labels.append(target)
-
-
-        with open(r'predictions.txt', 'w') as fp:
-            for item in predictions:
-                # write each item on a new line
-                fp.write("%s\n" % item)
-
-        accuracy = accuracy_score(true_labels, predictions)
-        precision_micro = precision_score(true_labels, predictions, average='micro')
-        recall_micro = recall_score(true_labels, predictions, average='micro')
-        f1_micro = f1_score(true_labels, predictions, average='micro')
-        rmse = np.sqrt(mean_squared_error(true_labels, predictions))
-
-        mlflow.log_metric("accuracy", accuracy)
-        mlflow.log_metric("precision_micro", precision_micro)
-        mlflow.log_metric("recall_micro", recall_micro)
-        mlflow.log_metric("f1_micro", f1_micro)
-        mlflow.log_metric("rmse", rmse)
-
-if __name__ == "__main__":
-    main()
--- a/model.py
+++ b/model.py
@ -6,6 +6,7 @@ import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import LabelEncoder
 import torch.nn.functional as F
+import sys
 

 device = (
@ -30,6 +31,9 @@ class Model(nn.Module):
        return x

 def main():
+    epochs = int(sys.argv[1])
+    print(epochs)
+
    forest_train = pd.read_csv('forest_train.csv')
    forest_val = pd.read_csv('forest_val.csv')

@ -59,7 +63,6 @@ def main():
    val_loader = DataLoader(list(zip(X_val, y_val)), batch_size=64)

    # Training loop
-    epochs = 10
    for epoch in range(epochs):
        model.train()  # Set model to training mode
        running_loss = 0.0
--- a/plot.py
+++ b/plot.py
@ -0,0 +1,21 @@
+import matplotlib.pyplot as plt
+import numpy as np
+
+accuracy = [] 
+
+f = open("metrics.txt", "r")
+for line in f:
+  parts = line.strip().split(' ')
+  if(parts[0] == 'Accuracy:'):
+    accuracy.append(float(parts[1]))
+
+build_numbers = np.arange(1, len(accuracy) + 1)
+
+plt.plot(build_numbers, accuracy,  marker='o', linestyle='-', color='b')
+plt.xlabel('Build Number')
+plt.ylabel('Accuracy')
+plt.title('Accuracy Plot')
+plt.grid(True)
+plt.show()
+
+plt.savefig('accuracy.png')
--- a/prediction.py
+++ b/prediction.py
@ -6,6 +6,8 @@ import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import LabelEncoder
 import torch.nn.functional as F
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error
+import numpy as np

 device = (
    "cuda"
@ -41,7 +43,6 @@ def predict(model, input_data):
    
    return predicted_class.item()  # Return the predicted class label

-
 def main():
    forest_test = pd.read_csv('forest_test.csv')

@ -55,15 +56,23 @@ def main():
    load_model(model, model_path)

    predictions = []
-    for input_data in X_test:
-        predicted_class = predict(model, input_data)
-        predictions.append(predicted_class)
-    
+    correct = 0
+    total = 0
+    with torch.no_grad():
+        for input_data, target in zip(X_test, y_test):
+            output = model(input_data)
+            _, predicted_class = torch.max(output, 0)
+            prediction_entry = f"predicted: {predicted_class.item()} true_label: {target}"
+            predictions.append(prediction_entry)
+            total += 1
+            if predicted_class.item() == target:
+                correct += 1
+
+
    with open(r'predictions.txt', 'w') as fp:
        for item in predictions:
            # write each item on a new line
            fp.write("%s\n" % item)
-   

 if __name__ == "__main__":
    main()
--- a/predictions.txt
+++ b/predictions.txt
--- a/sacred_model.py
+++ b/sacred_model.py
@ -1,126 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.optim as optim
-from torch.utils.data import DataLoader, Dataset
-import pandas as pd
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import LabelEncoder
-import torch.nn.functional as F
-from sacred import Experiment
-from sacred.observers import FileStorageObserver, MongoObserver
- 
-
-device = (
-    "cuda"
-    if torch.cuda.is_available()
-    else "cpu"
-)
-
-ex = Experiment("464914", interactive=True, save_git_info=False)
-ex.observers.append(FileStorageObserver('experiments'))
-ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017',
-                                  db_name='sacred')) 
-
-class Model(nn.Module):
-    def __init__(self, input_features=54, hidden_layer1=25, hidden_layer2=30, output_features=8):
-        super().__init__()
-        self.fc1 = nn.Linear(input_features,output_features)
-        self.bn1 = nn.BatchNorm1d(hidden_layer1)  # Add batch normalization
-        self.fc2 = nn.Linear(hidden_layer1, hidden_layer2)
-        self.bn2 = nn.BatchNorm1d(hidden_layer2)  # Add batch normalization
-        self.out = nn.Linear(hidden_layer2, output_features)
-        
-    def forward(self, x):
-        x = F.relu(self.fc1(x))  # Apply batch normalization after first linear layer
-        #x = F.relu(self.bn2(self.fc2(x)))  # Apply batch normalization after second linear layer
-        #x = self.out(x)
-        return x
-    
-@ex.capture
-def capture_params(epochs):
-    print(f"epochs: {epochs}")
-
-@ex.main
-def main(_run):
-    forest_train_ex = ex.open_resource('forest_train.csv')
-    forest_val_ex = ex.open_resource('forest_val.csv')
-
-    forest_val = pd.read_csv('forest_val.csv')
-    forest_train = pd.read_csv('forest_train.csv')
-
-    X_train = forest_train.drop(columns=['Cover_Type']).values
-    y_train = forest_train['Cover_Type'].values
-
-    X_val = forest_val.drop(columns=['Cover_Type']).values
-    y_val = forest_val['Cover_Type'].values
-
-
-    # Initialize model, loss function, and optimizer
-    model = Model().to(device)
-    criterion = nn.CrossEntropyLoss()
-    optimizer = optim.Adam(model.parameters(), lr=0.001)
-
-    # Convert to PyTorch tensors
-    X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
-    y_train = torch.tensor(y_train, dtype=torch.long).to(device)
-    X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
-    y_val = torch.tensor(y_val, dtype=torch.long).to(device)
-
-    # Create DataLoader
-    train_loader = DataLoader(list(zip(X_train, y_train)), batch_size=64, shuffle=True)
-    val_loader = DataLoader(list(zip(X_val, y_val)), batch_size=64)
-
-    # Training loop
-    epochs = 10
-    for epoch in range(epochs):
-        model.train()  # Set model to training mode
-        running_loss = 0.0
-        for inputs, labels in train_loader:
-            inputs, labels = inputs.to(device), labels.to(device)
-
-            optimizer.zero_grad()
-
-            outputs = model(inputs)
-            loss = criterion(outputs, labels)
-            loss.backward()
-            optimizer.step()
-
-            running_loss += loss.item() * inputs.size(0)
-
-        # Calculate training loss
-        epoch_loss = running_loss / len(train_loader.dataset)
-
-        # Validation
-        model.eval()  # Set model to evaluation mode
-        val_running_loss = 0.0
-        correct = 0
-        total = 0
-        with torch.no_grad():
-            for inputs, labels in val_loader:
-                inputs, labels = inputs.to(device), labels.to(device)
-
-                outputs = model(inputs)
-                val_loss = criterion(outputs, labels)
-                val_running_loss += val_loss.item() * inputs.size(0)
-
-                _, predicted = torch.max(outputs, 1)
-                total += labels.size(0)
-                correct += (predicted == labels).sum().item()
-
-        # Calculate validation loss and accuracy
-        val_epoch_loss = val_running_loss / len(val_loader.dataset)
-        val_accuracy = correct / total
-
-        print(f"Epoch {epoch+1}/{epochs}, "
-              f"Train Loss: {epoch_loss:.4f}, "
-              f"Val Loss: {val_epoch_loss:.4f}, "
-              f"Val Accuracy: {val_accuracy:.4f}")
-        _run.log_scalar("train loss", epoch_loss)
-        _run.log_scalar("val loss", val_epoch_loss)
-        
-
-    capture_params(epochs)
-    torch.save(model.state_dict(), 'model.pth')
-    ex.add_artifact("model.pth")
-
-ex.run()
--- a/sacredboard/Dockerfile
+++ b/sacredboard/Dockerfile
@ -1,5 +0,0 @@
-FROM python:3.6-jessie
-
-RUN pip install https://github.com/chovanecm/sacredboard/archive/develop.zip
-
-ENTRYPOINT sacredboard -mu mongodb://$MONGO_INITDB_ROOT_USERNAME:$MONGO_INITDB_ROOT_PASSWORD@mongo:27017/?authMechanism=SCRAM-SHA-1 $MONGO_DATABASE
Author	SHA1	Message	Date
Alicja Szulecka	52aa376edb	Update Jenkinsfile	2024-05-04 16:48:31 +02:00
Alicja Szulecka	c84935dd0f	Update plot.py	2024-04-30 19:32:52 +02:00
Alicja Szulecka	6e7d740463	Update Jenkinsfile	2024-04-30 19:30:32 +02:00
Alicja Szulecka	f866ef4bf7	Update Jenkinsfile	2024-04-30 19:25:40 +02:00
Alicja Szulecka	773d932415	Update Jenkinsfile	2024-04-30 19:09:17 +02:00
Alicja Szulecka	cfbf877ac2	Update Jenkinsfile	2024-04-30 19:02:47 +02:00
Alicja Szulecka	42408c00ea	Update Dockerfile	2024-04-30 16:29:48 +02:00
Alicja Szulecka	99b9b9c70b	Update plot.py	2024-04-30 16:29:27 +02:00
Alicja Szulecka	520206ef22	plot	2024-04-30 16:25:37 +02:00
Alicja Szulecka	65bf01c425	Update Jenkinsfile	2024-04-30 16:11:33 +02:00
Alicja Szulecka	e6d4c07a7a	Update Jenkinsfile	2024-04-30 16:08:38 +02:00
Alicja Szulecka	5dfd11b904	jenkins evaluation	2024-04-30 16:03:02 +02:00
Alicja Szulecka	6a0b357945	Update model.py	2024-04-29 21:47:03 +02:00
Alicja Szulecka	b45d036d42	Update Jenkinsfile	2024-04-29 21:45:09 +02:00
Alicja Szulecka	45beb68c25	Update Jenkinsfile	2024-04-29 21:43:25 +02:00
Alicja Szulecka	03f4d0b47a	Update model.py	2024-04-29 21:27:45 +02:00
Alicja Szulecka	ca24c39ada	Update Jenkinsfile	2024-04-29 21:22:42 +02:00
Alicja Szulecka	f883cd5e17	add parameter	2024-04-29 21:21:21 +02:00
Alicja Szulecka	ac93029123	Update Jenkinsfile	2024-04-29 21:09:13 +02:00
Alicja Szulecka	5ff6e66c4f	Update Jenkinsfile	2024-04-29 21:08:45 +02:00
Alicja Szulecka	66d15ac8f4	Update Jenkinsfile	2024-04-29 21:02:47 +02:00