Update Jenkinsfile

Update model.py
Update Jenkinsfile
2024-05-04 15:55:49 +02:00 · 2024-04-29 21:47:03 +02:00 · 2024-04-29 21:45:09 +02:00 · 2024-04-29 21:43:25 +02:00 · 2024-04-29 21:27:45 +02:00 · 2024-04-29 21:22:42 +02:00
19 changed files with 581128 additions and 656 deletions
--- a/.dvc/.gitignore
+++ b/.dvc/.gitignore
@ -1,3 +0,0 @@
-/config.local
-/tmp
-/cache
--- a/.dvc/config
+++ b/.dvc/config
@ -1,4 +0,0 @@
-[core]
-    remote = ium_ssh_remote
-['remote "ium_ssh_remote"']
-    url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
--- a/.dvcignore
+++ b/.dvcignore
@ -1,3 +0,0 @@
-# Add patterns of files dvc should ignore, which could improve
-# the performance. Learn more at
-# https://dvc.org/doc/user-guide/dvcignore
--- a/.env
+++ b/.env
@ -1,5 +0,0 @@
-MONGO_INITDB_ROOT_USERNAME=admin
-MONGO_INITDB_ROOT_PASSWORD=IUM_2021
-ME_CONFIG_BASICAUTH_USERNAME=mongo_express_user
-ME_CONFIG_BASICAUTH_PASSWORD=mongo_express_pw
-MONGO_DATABASE=sacred
--- a/.gitignore
+++ b/.gitignore
@ -1 +0,0 @@
-/covtype.csv
--- a/.ipynb_checkpoints/IUM_2-checkpoint.ipynb
+++ b/.ipynb_checkpoints/IUM_2-checkpoint.ipynb
@ -0,0 +1,95 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install --user kaggle \n",
+    "%pip install --user pandas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "ERROR: Could not find a version that satisfies the requirement git (from versions: none)\n",
+      "ERROR: No matching distribution found for git\n",
+      "\n",
+      "[notice] A new release of pip is available: 23.1.2 -> 24.0\n",
+      "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pip install git"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Download data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!kaggle datasets download -d nasa/meteorite-landings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!tar -xf  meteorite-landings.zip"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/2
+++ b/2
@ -4,7 +4,7 @@ RUN apt update && apt install -y python3-pip
 RUN apt install unzip
 RUN apt install bc

-RUN pip3 install kaggle pandas scikit-learn torch sacred pymongo 
+RUN pip3 install kaggle pandas scikit-learn torch

 WORKDIR /app

--- a/56
+++ b/56
@ -1,60 +1,34 @@
 pipeline {
    agent any
-    parameters {
-    string(name: 'KAGGLE_USERNAME', defaultValue: 'alicjaszulecka', description: 'Kaggle username')
-    password(name: 'KAGGLE_KEY', defaultValue:'', description: 'Kaggle Key')
-    string(name: 'CUTOFF', defaultValue: '100', description: 'cut off number')
+    triggers {
+        upstream(upstreamProjects: 'z-s464914-create-dataset', threshold: hudson.model.Result.SUCCESS)
    }
+    parameters {
+            buildSelector (
+                defaultSelector: lastSuccessful(),
+                description: 'Build for copying artifacts',
+                name: 'BUILD_SELECTOR'
+            )
+            string(name: 'EPOCHS', defaultValue: '10', description: 'epochs')
+        }
    stages {
        stage('Git Checkout') {
            steps {
               checkout scm
            }
        }
-         stage('Download dataset') {
-      steps {
-        withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
-          sh 'pip install kaggle'
-          sh 'kaggle datasets download -d uciml/forest-cover-type-dataset'
-          sh 'unzip -o forest-cover-type-dataset.zip'
-          sh 'rm forest-cover-type-dataset.zip'
-        }
-      }
-    }
-     stage('Build') {
-         steps {
-            script {
-                 withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
-                          "KAGGLE_KEY=${params.KAGGLE_KEY}" ]) {
-                    def customImage = docker.build("custom-image")
-                    customImage.inside {
-                        sh 'python3 ./IUM_2.py'
-                        archiveArtifacts artifacts: 'covtype.csv, forest_train.csv, forest_test.csv, forest_val.csv', onlyIfSuccessful: true
-                    }
-                 }
-             }
-         }
-         }
-
-         stage('Train and Predict') {
+     stage('Copy Artifacts') {
            steps {
-              script {
-               def customImage = docker.build("custom-image")
-                    customImage.inside {
-                        sh 'python3 ./model.py'
-                        sh 'python3 ./prediction.py'
-                        archiveArtifacts artifacts: 'model.pth, predictions.txt', onlyIfSuccessful: true
-                    }
-              }
+               copyArtifacts fingerprintArtifacts: true, projectName: 'z-s464914-create-dataset', selector: buildParameter('BUILD_SELECTOR')
            }
        }
-        stage('Experiments') {
+         stage('Train') {
            steps {
              script {
               def customImage = docker.build("custom-image")
                    customImage.inside {
-                        sh 'python3 ./sacred_model.py'
-                        archiveArtifacts artifacts: 'experiments', onlyIfSuccessful: true
+                        sh 'python3 ./model.py ' + params.EPOCHS
+                        archiveArtifacts artifacts: 'model.pth, predictions.txt', onlyIfSuccessful: true
                    }
              }
            }
--- a/covtype.csv
+++ b/covtype.csv
--- a/covtype.csv.dvc
+++ b/covtype.csv.dvc
@ -1,5 +0,0 @@
-outs:
- md5: e88c3c209db2e8982e07c43462d67c87
-  size: 75170064
-  hash: md5
-  path: covtype.csv
--- a/dvc.yaml
+++ b/dvc.yaml
@ -1,29 +0,0 @@
-stages:
-  prepare_data:
-    cmd: python ./IUM_2.py
-    deps:
-      - create-dataset.py
-      - covtype.csv
-    outs:
-      - forest_train.csv
-      - forest_test.csv
-      - forest_val.csv
-
-  train_model:
-    cmd: python ./model.py
-    deps:
-      - model.py
-      - forest_train.csv
-      - forest_test.csv
-      - forest_val.csv
-    outs:
-      - model.pth
-
-  evaluate_model:
-    cmd: python ./prediction.py
-    deps:
-      - prediction.py
-      - model.pth
-      - forest_test.csv
-    outs:
-      - predictions.txt
--- a/environment.yml
+++ b/environment.yml
@ -1,189 +0,0 @@
-name: IUM
-channels:
-  - defaults
-dependencies:
-  - _tflow_select=2.3.0=mkl
-  - abseil-cpp=20211102.0=hd77b12b_0
-  - absl-py=2.1.0=py310haa95532_0
-  - aiohttp=3.9.5=py310h2bbff1b_0
-  - aiosignal=1.2.0=pyhd3eb1b0_0
-  - alembic=1.8.1=py310haa95532_0
-  - aniso8601=9.0.1=pyhd3eb1b0_0
-  - arrow-cpp=11.0.0=h2c9b28c_2
-  - astunparse=1.6.3=py_0
-  - async-timeout=4.0.3=py310haa95532_0
-  - attrs=23.1.0=py310haa95532_0
-  - aws-c-common=0.4.57=ha925a31_1
-  - aws-c-event-stream=0.1.6=hd77b12b_5
-  - aws-checksums=0.1.9=ha925a31_0
-  - aws-sdk-cpp=1.8.185=hd77b12b_0
-  - bcrypt=3.2.0=py310h2bbff1b_1
-  - blas=1.0=mkl
-  - blinker=1.6.2=py310haa95532_0
-  - boost-cpp=1.82.0=h59b6b97_2
-  - bottleneck=1.3.7=py310h9128911_0
-  - brotli=1.0.9=h2bbff1b_8
-  - brotli-bin=1.0.9=h2bbff1b_8
-  - brotli-python=1.0.9=py310hd77b12b_8
-  - bzip2=1.0.8=h2bbff1b_6
-  - c-ares=1.19.1=h2bbff1b_0
-  - ca-certificates=2024.3.11=haa95532_0
-  - cachetools=5.3.3=py310haa95532_0
-  - certifi=2024.2.2=py310haa95532_0
-  - cffi=1.16.0=py310h2bbff1b_1
-  - charset-normalizer=2.0.4=pyhd3eb1b0_0
-  - click=8.1.7=py310haa95532_0
-  - cloudpickle=2.2.1=py310haa95532_0
-  - colorama=0.4.6=py310haa95532_0
-  - contourpy=1.2.0=py310h59b6b97_0
-  - cryptography=41.0.3=py310h3438e0d_0
-  - cycler=0.11.0=pyhd3eb1b0_0
-  - docker-py=7.0.0=py310haa95532_0
-  - entrypoints=0.4=py310haa95532_0
-  - flask=2.2.5=py310haa95532_0
-  - flatbuffers=2.0.0=h6c2663c_0
-  - fonttools=4.51.0=py310h2bbff1b_0
-  - freetype=2.12.1=ha860e81_0
-  - frozenlist=1.4.0=py310h2bbff1b_0
-  - gast=0.4.0=pyhd3eb1b0_0
-  - gflags=2.2.2=hd77b12b_1
-  - giflib=5.2.1=h8cc25b3_3
-  - gitdb=4.0.7=pyhd3eb1b0_0
-  - gitpython=3.1.37=py310haa95532_0
-  - glog=0.5.0=hd77b12b_1
-  - google-auth=2.29.0=py310haa95532_0
-  - google-auth-oauthlib=0.4.4=pyhd3eb1b0_0
-  - google-pasta=0.2.0=pyhd3eb1b0_0
-  - graphene=3.3=py310haa95532_0
-  - graphql-core=3.2.3=py310haa95532_1
-  - graphql-relay=3.2.0=py310haa95532_0
-  - greenlet=3.0.1=py310hd77b12b_0
-  - grpc-cpp=1.48.2=hf108199_0
-  - grpcio=1.48.2=py310hf108199_0
-  - h5py=3.11.0=py310hed405ee_0
-  - hdf5=1.12.1=h51c971a_3
-  - icc_rt=2022.1.0=h6049295_2
-  - icu=58.2=ha925a31_3
-  - idna=3.7=py310haa95532_0
-  - importlib-metadata=7.0.1=py310haa95532_0
-  - intel-openmp=2023.1.0=h59b6b97_46320
-  - itsdangerous=2.0.1=pyhd3eb1b0_0
-  - jinja2=3.1.3=py310haa95532_0
-  - joblib=1.4.0=py310haa95532_0
-  - jpeg=9e=h2bbff1b_1
-  - keras=2.10.0=py310haa95532_0
-  - keras-preprocessing=1.1.2=pyhd3eb1b0_0
-  - kiwisolver=1.4.4=py310hd77b12b_0
-  - krb5=1.20.1=h5b6d351_1
-  - lcms2=2.12=h83e58a3_0
-  - lerc=3.0=hd77b12b_0
-  - libboost=1.82.0=h3399ecb_2
-  - libbrotlicommon=1.0.9=h2bbff1b_8
-  - libbrotlidec=1.0.9=h2bbff1b_8
-  - libbrotlienc=1.0.9=h2bbff1b_8
-  - libclang=14.0.6=default_hb5a9fac_1
-  - libclang13=14.0.6=default_h8e68704_1
-  - libcurl=8.7.1=h86230a5_0
-  - libdeflate=1.17=h2bbff1b_1
-  - libevent=2.1.12=hcc03200_0
-  - libffi=3.4.4=hd77b12b_1
-  - libpng=1.6.39=h8cc25b3_0
-  - libpq=12.15=hb652d5d_1
-  - libprotobuf=3.20.3=h23ce68f_0
-  - libssh2=1.10.0=hcd4344a_2
-  - libthrift=0.15.0=he49ee6e_2
-  - libtiff=4.5.1=hd77b12b_0
-  - libwebp-base=1.3.2=h2bbff1b_0
-  - lz4-c=1.9.4=h2bbff1b_1
-  - mako=1.2.3=py310haa95532_0
-  - markdown=3.4.1=py310haa95532_0
-  - markupsafe=2.1.3=py310h2bbff1b_0
-  - matplotlib=3.8.4=py310haa95532_0
-  - matplotlib-base=3.8.4=py310h4ed8f06_0
-  - mkl=2023.1.0=h6b88ed4_46358
-  - mkl-service=2.4.0=py310h2bbff1b_1
-  - mkl_fft=1.3.8=py310h2bbff1b_0
-  - mkl_random=1.2.4=py310h59b6b97_0
-  - mlflow=2.12.2=py310hd1fac3c_0
-  - multidict=6.0.4=py310h2bbff1b_0
-  - numexpr=2.8.7=py310h2cd9be0_0
-  - numpy=1.26.4=py310h055cbcc_0
-  - numpy-base=1.26.4=py310h65a83cf_0
-  - oauthlib=3.2.2=py310haa95532_0
-  - openjpeg=2.4.0=h4fc8c34_0
-  - openssl=1.1.1w=h2bbff1b_0
-  - opt_einsum=3.3.0=pyhd3eb1b0_1
-  - orc=1.7.4=h623e30f_1
-  - packaging=23.2=py310haa95532_0
-  - pandas=2.2.1=py310h5da7b33_0
-  - paramiko=2.8.1=pyhd3eb1b0_0
-  - pillow=10.3.0=py310h2bbff1b_0
-  - pip=24.0=py310haa95532_0
-  - ply=3.11=py310haa95532_0
-  - protobuf=3.20.3=py310hd77b12b_0
-  - pyarrow=11.0.0=py310h790e06d_1
-  - pyasn1=0.4.8=pyhd3eb1b0_0
-  - pyasn1-modules=0.2.8=py_0
-  - pybind11-abi=5=hd3eb1b0_0
-  - pycparser=2.21=pyhd3eb1b0_0
-  - pyjwt=2.8.0=py310haa95532_0
-  - pynacl=1.5.0=py310h8cc25b3_0
-  - pyopenssl=23.2.0=py310haa95532_0
-  - pyqt=5.15.10=py310hd77b12b_0
-  - pyqt5-sip=12.13.0=py310h2bbff1b_0
-  - pysocks=1.7.1=py310haa95532_0
-  - python=3.10.13=h966fe2a_0
-  - python-dateutil=2.9.0post0=py310haa95532_0
-  - python-flatbuffers=2.0=pyhd3eb1b0_0
-  - python-tzdata=2023.3=pyhd3eb1b0_0
-  - pytz=2024.1=py310haa95532_0
-  - pywin32=305=py310h2bbff1b_0
-  - pyyaml=6.0.1=py310h2bbff1b_0
-  - qt-main=5.15.2=h6072711_9
-  - querystring_parser=1.2.4=py310haa95532_0
-  - re2=2022.04.01=hd77b12b_0
-  - requests=2.31.0=py310haa95532_1
-  - requests-oauthlib=1.3.0=py_0
-  - rsa=4.7.2=pyhd3eb1b0_1
-  - scikit-learn=1.4.2=py310h4ed8f06_1
-  - scipy=1.13.0=py310h8640f81_0
-  - setuptools=69.5.1=py310haa95532_0
-  - sip=6.7.12=py310hd77b12b_0
-  - six=1.16.0=pyhd3eb1b0_1
-  - smmap=4.0.0=pyhd3eb1b0_0
-  - snappy=1.1.10=h6c2663c_1
-  - sqlalchemy=2.0.25=py310h2bbff1b_0
-  - sqlite=3.45.3=h2bbff1b_0
-  - sqlparse=0.4.4=py310haa95532_0
-  - tbb=2021.8.0=h59b6b97_0
-  - tensorboard=2.10.0=py310haa95532_0
-  - tensorboard-data-server=0.6.1=py310haa95532_0
-  - tensorboard-plugin-wit=1.8.1=py310haa95532_0
-  - tensorflow=2.10.0=mkl_py310hd99672f_0
-  - tensorflow-base=2.10.0=mkl_py310h6a7f48e_0
-  - tensorflow-estimator=2.10.0=py310haa95532_0
-  - termcolor=2.1.0=py310haa95532_0
-  - threadpoolctl=2.2.0=pyh0d69192_0
-  - tk=8.6.14=h0416ee5_0
-  - tornado=6.3.3=py310h2bbff1b_0
-  - typing-extensions=4.11.0=py310haa95532_0
-  - typing_extensions=4.11.0=py310haa95532_0
-  - tzdata=2024a=h04d1e81_0
-  - unicodedata2=15.1.0=py310h2bbff1b_0
-  - urllib3=2.2.1=py310haa95532_0
-  - utf8proc=2.6.1=h2bbff1b_1
-  - vc=14.2=h2eaa2aa_1
-  - vs2015_runtime=14.29.30133=h43f2093_3
-  - waitress=2.0.0=pyhd3eb1b0_0
-  - websocket-client=1.8.0=py310haa95532_0
-  - werkzeug=2.3.8=py310haa95532_0
-  - wheel=0.43.0=py310haa95532_0
-  - win_inet_pton=1.1.0=py310haa95532_0
-  - wrapt=1.14.1=py310h2bbff1b_0
-  - xz=5.4.6=h8cc25b3_1
-  - yaml=0.2.5=he774522_0
-  - yarl=1.9.3=py310h2bbff1b_0
-  - zipp=3.17.0=py310haa95532_0
-  - zlib=1.2.13=h8cc25b3_1
-  - zstd=1.5.5=hd43e919_2
-prefix: C:\Users\Genos\miniconda3\envs\IUM
--- a/mlflow/Dockerfile
+++ b/mlflow/Dockerfile
@ -1,15 +0,0 @@
-FROM python:3.10
-
-RUN pip install --upgrade pip
-
-RUN pip3 install mlflow
-RUN pip3 install scikit-learn
-RUN pip3 install pandas
-RUN pip3 install numpy 
-RUN pip3 install torch
-
-COPY mlflow_model.py .
-COPY mlflow_prediction.py .
-COPY forest_test.csv .
-COPY forest_train.csv .
-COPY forest_val.csv .
--- a/mlflow/MLProject
+++ b/mlflow/MLProject
@ -1,13 +0,0 @@
-name: mlflow_464914
-
-# conda_env: conda.yaml #ścieżka do pliku conda.yaml z definicją środowisk
-docker_env:
- image: mlflow_image
-
-entry_points:
-  main:
-    parameters:
-      epochs: {type: int, default: 10}
-    command: "python mlflow_model.py {epochs}"
-  test:
-    command: "python mlflow_prediction.py"
--- a/mlflow/mlflow_model.py
+++ b/mlflow/mlflow_model.py
@ -1,120 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.optim as optim
-from torch.utils.data import DataLoader, Dataset
-import pandas as pd
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import LabelEncoder
-import torch.nn.functional as F
-import mlflow
-import mlflow.sklearn
-import sys
-
-mlflow.set_tracking_uri("http://localhost:5000")
-mlflow.set_experiment("s464914")
- 
-
-device = (
-    "cuda"
-    if torch.cuda.is_available()
-    else "cpu"
-)
-
-class Model(nn.Module):
-    def __init__(self, input_features=54, hidden_layer1=25, hidden_layer2=30, output_features=8):
-        super().__init__()
-        self.fc1 = nn.Linear(input_features,output_features)
-        self.bn1 = nn.BatchNorm1d(hidden_layer1)  # Add batch normalization
-        self.fc2 = nn.Linear(hidden_layer1, hidden_layer2)
-        self.bn2 = nn.BatchNorm1d(hidden_layer2)  # Add batch normalization
-        self.out = nn.Linear(hidden_layer2, output_features)
-        
-    def forward(self, x):
-        x = F.relu(self.fc1(x))  # Apply batch normalization after first linear layer
-        #x = F.relu(self.bn2(self.fc2(x)))  # Apply batch normalization after second linear layer
-        #x = self.out(x)
-        return x
-
-def main():
-    epochs = int(sys.argv[1])
-    forest_train = pd.read_csv('forest_train.csv')
-    forest_val = pd.read_csv('forest_val.csv')
-
-    print(forest_train.head())
-
-
-    X_train = forest_train.drop(columns=['Cover_Type']).values
-    y_train = forest_train['Cover_Type'].values
-
-    X_val = forest_val.drop(columns=['Cover_Type']).values
-    y_val = forest_val['Cover_Type'].values
-
-
-    # Initialize model, loss function, and optimizer
-    model = Model().to(device)
-    criterion = nn.CrossEntropyLoss()
-    optimizer = optim.Adam(model.parameters(), lr=0.001)
-
-    # Convert to PyTorch tensors
-    X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
-    y_train = torch.tensor(y_train, dtype=torch.long).to(device)
-    X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
-    y_val = torch.tensor(y_val, dtype=torch.long).to(device)
-
-    # Create DataLoader
-    train_loader = DataLoader(list(zip(X_train, y_train)), batch_size=64, shuffle=True)
-    val_loader = DataLoader(list(zip(X_val, y_val)), batch_size=64)
-
-    with mlflow.start_run() as run:
-        # Training loop
-        for epoch in range(epochs):
-            model.train()  # Set model to training mode
-            running_loss = 0.0
-            for inputs, labels in train_loader:
-                inputs, labels = inputs.to(device), labels.to(device)
-
-                optimizer.zero_grad()
-
-                outputs = model(inputs)
-                loss = criterion(outputs, labels)
-                loss.backward()
-                optimizer.step()
-
-                running_loss += loss.item() * inputs.size(0)
-
-            # Calculate training loss
-            epoch_loss = running_loss / len(train_loader.dataset)
-
-            # Validation
-            model.eval()  # Set model to evaluation mode
-            val_running_loss = 0.0
-            correct = 0
-            total = 0
-            with torch.no_grad():
-                for inputs, labels in val_loader:
-                    inputs, labels = inputs.to(device), labels.to(device)
-
-                    outputs = model(inputs)
-                    val_loss = criterion(outputs, labels)
-                    val_running_loss += val_loss.item() * inputs.size(0)
-
-                    _, predicted = torch.max(outputs, 1)
-                    total += labels.size(0)
-                    correct += (predicted == labels).sum().item()
-
-            # Calculate validation loss and accuracy
-            val_epoch_loss = val_running_loss / len(val_loader.dataset)
-            val_accuracy = correct / total
-
-            print(f"Epoch {epoch+1}/{epochs}, "
-                f"Train Loss: {epoch_loss:.4f}, "
-                f"Val Loss: {val_epoch_loss:.4f}, "
-                f"Val Accuracy: {val_accuracy:.4f}")
-            
-
-        torch.save(model.state_dict(), 'model.pth')
-        mlflow.log_param("epochs", epochs)
-
-
-if __name__ == "__main__":
-    main()
--- a/mlflow/mlflow_prediction.py
+++ b/mlflow/mlflow_prediction.py
@ -1,95 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.optim as optim
-from torch.utils.data import DataLoader, Dataset
-import pandas as pd
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import LabelEncoder
-import torch.nn.functional as F
-from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error
-import numpy as np
-import mlflow
-import mlflow.sklearn
-
-mlflow.set_tracking_uri("http://localhost:5000")
-mlflow.set_experiment("s464914")
-
-device = (
-    "cuda"
-    if torch.cuda.is_available()
-    else "cpu"
-)
-
-class Model(nn.Module):
-    def __init__(self, input_features=54, hidden_layer1=25, hidden_layer2=30, output_features=8):
-        super().__init__()
-        self.fc1 = nn.Linear(input_features,output_features)
-        self.bn1 = nn.BatchNorm1d(hidden_layer1)  # Add batch normalization
-        self.fc2 = nn.Linear(hidden_layer1, hidden_layer2)
-        self.bn2 = nn.BatchNorm1d(hidden_layer2)  # Add batch normalization
-        self.out = nn.Linear(hidden_layer2, output_features)
-        
-    def forward(self, x):
-        x = F.relu(self.fc1(x)) 
-        return x
-
-def load_model(model, model_path):
-    model.load_state_dict(torch.load(model_path))
-    model.eval()
-
-def predict(model, input_data):
-    # Convert input data to PyTorch tensor
-    
-    # Perform forward pass
-    with torch.no_grad():
-        output = model(input_data)
-
-    _, predicted_class = torch.max(output, 0)
-    
-    return predicted_class.item()  # Return the predicted class label
-
-def main():
-    with mlflow.start_run() as run:
-        forest_test = pd.read_csv('forest_test.csv')
-
-        X_test = forest_test.drop(columns=['Cover_Type']).values
-        y_test = forest_test['Cover_Type'].values
-
-        X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
-
-        model = Model().to(device)
-        model_path = 'model.pth'  # Path to your saved model file
-        load_model(model, model_path)
-
-        predictions = []
-        true_labels = []
-        with torch.no_grad():
-            for input_data, target in zip(X_test, y_test):
-                output = model(input_data)
-                _, predicted_class = torch.max(output, 0)
-                prediction_entry = f"predicted: {predicted_class.item()} true_label: {target}"
-                predictions.append(prediction_entry)
-                true_labels.append()
-                if predicted_class.item() == target:
-                    true_labels.append(target)
-
-
-        with open(r'predictions.txt', 'w') as fp:
-            for item in predictions:
-                # write each item on a new line
-                fp.write("%s\n" % item)
-
-        accuracy = accuracy_score(true_labels, predictions)
-        precision_micro = precision_score(true_labels, predictions, average='micro')
-        recall_micro = recall_score(true_labels, predictions, average='micro')
-        f1_micro = f1_score(true_labels, predictions, average='micro')
-        rmse = np.sqrt(mean_squared_error(true_labels, predictions))
-
-        mlflow.log_metric("accuracy", accuracy)
-        mlflow.log_metric("precision_micro", precision_micro)
-        mlflow.log_metric("recall_micro", recall_micro)
-        mlflow.log_metric("f1_micro", f1_micro)
-        mlflow.log_metric("rmse", rmse)
-
-if __name__ == "__main__":
-    main()
--- a/model.py
+++ b/model.py
@ -6,6 +6,7 @@ import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import LabelEncoder
 import torch.nn.functional as F
+import sys
 

 device = (
@ -30,6 +31,9 @@ class Model(nn.Module):
        return x

 def main():
+    epochs = int(sys.argv[1])
+    print(epochs)
+
    forest_train = pd.read_csv('forest_train.csv')
    forest_val = pd.read_csv('forest_val.csv')

@ -59,7 +63,6 @@ def main():
    val_loader = DataLoader(list(zip(X_val, y_val)), batch_size=64)

    # Training loop
-    epochs = 10
    for epoch in range(epochs):
        model.train()  # Set model to training mode
        running_loss = 0.0
--- a/sacred_model.py
+++ b/sacred_model.py
@ -1,126 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.optim as optim
-from torch.utils.data import DataLoader, Dataset
-import pandas as pd
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import LabelEncoder
-import torch.nn.functional as F
-from sacred import Experiment
-from sacred.observers import FileStorageObserver, MongoObserver
- 
-
-device = (
-    "cuda"
-    if torch.cuda.is_available()
-    else "cpu"
-)
-
-ex = Experiment("464914", interactive=True, save_git_info=False)
-ex.observers.append(FileStorageObserver('experiments'))
-ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017',
-                                  db_name='sacred')) 
-
-class Model(nn.Module):
-    def __init__(self, input_features=54, hidden_layer1=25, hidden_layer2=30, output_features=8):
-        super().__init__()
-        self.fc1 = nn.Linear(input_features,output_features)
-        self.bn1 = nn.BatchNorm1d(hidden_layer1)  # Add batch normalization
-        self.fc2 = nn.Linear(hidden_layer1, hidden_layer2)
-        self.bn2 = nn.BatchNorm1d(hidden_layer2)  # Add batch normalization
-        self.out = nn.Linear(hidden_layer2, output_features)
-        
-    def forward(self, x):
-        x = F.relu(self.fc1(x))  # Apply batch normalization after first linear layer
-        #x = F.relu(self.bn2(self.fc2(x)))  # Apply batch normalization after second linear layer
-        #x = self.out(x)
-        return x
-    
-@ex.capture
-def capture_params(epochs):
-    print(f"epochs: {epochs}")
-
-@ex.main
-def main(_run):
-    forest_train_ex = ex.open_resource('forest_train.csv')
-    forest_val_ex = ex.open_resource('forest_val.csv')
-
-    forest_val = pd.read_csv('forest_val.csv')
-    forest_train = pd.read_csv('forest_train.csv')
-
-    X_train = forest_train.drop(columns=['Cover_Type']).values
-    y_train = forest_train['Cover_Type'].values
-
-    X_val = forest_val.drop(columns=['Cover_Type']).values
-    y_val = forest_val['Cover_Type'].values
-
-
-    # Initialize model, loss function, and optimizer
-    model = Model().to(device)
-    criterion = nn.CrossEntropyLoss()
-    optimizer = optim.Adam(model.parameters(), lr=0.001)
-
-    # Convert to PyTorch tensors
-    X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
-    y_train = torch.tensor(y_train, dtype=torch.long).to(device)
-    X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
-    y_val = torch.tensor(y_val, dtype=torch.long).to(device)
-
-    # Create DataLoader
-    train_loader = DataLoader(list(zip(X_train, y_train)), batch_size=64, shuffle=True)
-    val_loader = DataLoader(list(zip(X_val, y_val)), batch_size=64)
-
-    # Training loop
-    epochs = 10
-    for epoch in range(epochs):
-        model.train()  # Set model to training mode
-        running_loss = 0.0
-        for inputs, labels in train_loader:
-            inputs, labels = inputs.to(device), labels.to(device)
-
-            optimizer.zero_grad()
-
-            outputs = model(inputs)
-            loss = criterion(outputs, labels)
-            loss.backward()
-            optimizer.step()
-
-            running_loss += loss.item() * inputs.size(0)
-
-        # Calculate training loss
-        epoch_loss = running_loss / len(train_loader.dataset)
-
-        # Validation
-        model.eval()  # Set model to evaluation mode
-        val_running_loss = 0.0
-        correct = 0
-        total = 0
-        with torch.no_grad():
-            for inputs, labels in val_loader:
-                inputs, labels = inputs.to(device), labels.to(device)
-
-                outputs = model(inputs)
-                val_loss = criterion(outputs, labels)
-                val_running_loss += val_loss.item() * inputs.size(0)
-
-                _, predicted = torch.max(outputs, 1)
-                total += labels.size(0)
-                correct += (predicted == labels).sum().item()
-
-        # Calculate validation loss and accuracy
-        val_epoch_loss = val_running_loss / len(val_loader.dataset)
-        val_accuracy = correct / total
-
-        print(f"Epoch {epoch+1}/{epochs}, "
-              f"Train Loss: {epoch_loss:.4f}, "
-              f"Val Loss: {val_epoch_loss:.4f}, "
-              f"Val Accuracy: {val_accuracy:.4f}")
-        _run.log_scalar("train loss", epoch_loss)
-        _run.log_scalar("val loss", val_epoch_loss)
-        
-
-    capture_params(epochs)
-    torch.save(model.state_dict(), 'model.pth')
-    ex.add_artifact("model.pth")
-
-ex.run()
--- a/sacredboard/Dockerfile
+++ b/sacredboard/Dockerfile
@ -1,5 +0,0 @@
-FROM python:3.6-jessie
-
-RUN pip install https://github.com/chovanecm/sacredboard/archive/develop.zip
-
-ENTRYPOINT sacredboard -mu mongodb://$MONGO_INITDB_ROOT_USERNAME:$MONGO_INITDB_ROOT_PASSWORD@mongo:27017/?authMechanism=SCRAM-SHA-1 $MONGO_DATABASE
Author	SHA1	Message	Date
Alicja Szulecka	36b1428694	Update Jenkinsfile	2024-05-04 15:55:49 +02:00
Alicja Szulecka	6a0b357945	Update model.py	2024-04-29 21:47:03 +02:00
Alicja Szulecka	b45d036d42	Update Jenkinsfile	2024-04-29 21:45:09 +02:00
Alicja Szulecka	45beb68c25	Update Jenkinsfile	2024-04-29 21:43:25 +02:00
Alicja Szulecka	03f4d0b47a	Update model.py	2024-04-29 21:27:45 +02:00
Alicja Szulecka	ca24c39ada	Update Jenkinsfile	2024-04-29 21:22:42 +02:00
Alicja Szulecka	f883cd5e17	add parameter	2024-04-29 21:21:21 +02:00
Alicja Szulecka	ac93029123	Update Jenkinsfile	2024-04-29 21:09:13 +02:00
Alicja Szulecka	5ff6e66c4f	Update Jenkinsfile	2024-04-29 21:08:45 +02:00
Alicja Szulecka	66d15ac8f4	Update Jenkinsfile	2024-04-29 21:02:47 +02:00