IUM_9

Create dvc.yaml
Update config
2024-05-28 21:52:31 +02:00 · 2024-05-28 21:43:31 +02:00 · 2024-05-28 21:34:33 +02:00 · 2024-05-28 21:26:27 +02:00 · 2024-05-28 21:22:43 +02:00 · 2024-05-28 21:22:26 +02:00
19 changed files with 657 additions and 581129 deletions
--- a/.dvc/.gitignore
+++ b/.dvc/.gitignore
@ -0,0 +1,3 @@
+/config.local
+/tmp
+/cache
--- a/.dvc/config
+++ b/.dvc/config
@ -0,0 +1,4 @@
+[core]
+    remote = ium_ssh_remote
+['remote "ium_ssh_remote"']
+    url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
--- a/.dvcignore
+++ b/.dvcignore
@ -0,0 +1,3 @@
+# Add patterns of files dvc should ignore, which could improve
+# the performance. Learn more at
+# https://dvc.org/doc/user-guide/dvcignore
--- a/.env
+++ b/.env
@ -0,0 +1,5 @@
+MONGO_INITDB_ROOT_USERNAME=admin
+MONGO_INITDB_ROOT_PASSWORD=IUM_2021
+ME_CONFIG_BASICAUTH_USERNAME=mongo_express_user
+ME_CONFIG_BASICAUTH_PASSWORD=mongo_express_pw
+MONGO_DATABASE=sacred
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+/covtype.csv
--- a/.ipynb_checkpoints/IUM_2-checkpoint.ipynb
+++ b/.ipynb_checkpoints/IUM_2-checkpoint.ipynb
@ -1,95 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%pip install --user kaggle \n",
-    "%pip install --user pandas"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "ERROR: Could not find a version that satisfies the requirement git (from versions: none)\n",
-      "ERROR: No matching distribution found for git\n",
-      "\n",
-      "[notice] A new release of pip is available: 23.1.2 -> 24.0\n",
-      "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
-     ]
-    }
-   ],
-   "source": [
-    "%pip install git"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Download data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!kaggle datasets download -d nasa/meteorite-landings"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!tar -xf  meteorite-landings.zip"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
--- a/2
+++ b/2
@ -4,7 +4,7 @@ RUN apt update && apt install -y python3-pip
 RUN apt install unzip
 RUN apt install bc

-RUN pip3 install kaggle pandas scikit-learn torch
+RUN pip3 install kaggle pandas scikit-learn torch sacred pymongo 

 WORKDIR /app

--- a/58
+++ b/58
@ -1,37 +1,63 @@
 pipeline {
    agent any
-    triggers {
-        upstream(upstreamProjects: 'z-s464914-create-dataset', threshold: hudson.model.Result.SUCCESS)
-    }
    parameters {
-            buildSelector (
-                defaultSelector: lastSuccessful(),
-                description: 'Build for copying artifacts',
-                name: 'BUILD_SELECTOR'
-            )
-            string(name: 'EPOCHS', defaultValue: '10', description: 'epochs')
-        }
+    string(name: 'KAGGLE_USERNAME', defaultValue: 'alicjaszulecka', description: 'Kaggle username')
+    password(name: 'KAGGLE_KEY', defaultValue:'', description: 'Kaggle Key')
+    string(name: 'CUTOFF', defaultValue: '100', description: 'cut off number')
+    }
    stages {
        stage('Git Checkout') {
            steps {
               checkout scm
            }
        }
-     stage('Copy Artifacts') {
-            steps {
-               copyArtifacts fingerprintArtifacts: true, projectName: 'z-s464914-create-dataset', selector: buildParameter('BUILD_SELECTOR')
-            }
+         stage('Download dataset') {
+      steps {
+        withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
+          sh 'pip install kaggle'
+          sh 'kaggle datasets download -d uciml/forest-cover-type-dataset'
+          sh 'unzip -o forest-cover-type-dataset.zip'
+          sh 'rm forest-cover-type-dataset.zip'
        }
-         stage('Train') {
+      }
+    }
+     stage('Build') {
+         steps {
+            script {
+                 withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
+                          "KAGGLE_KEY=${params.KAGGLE_KEY}" ]) {
+                    def customImage = docker.build("custom-image")
+                    customImage.inside {
+                        sh 'python3 ./IUM_2.py'
+                        archiveArtifacts artifacts: 'covtype.csv, forest_train.csv, forest_test.csv, forest_val.csv', onlyIfSuccessful: true
+                    }
+                 }
+             }
+         }
+         }
+
+         stage('Train and Predict') {
            steps {
              script {
               def customImage = docker.build("custom-image")
                    customImage.inside {
-                        sh 'python3 ./model.py ' + params.EPOCHS
+                        sh 'python3 ./model.py'
+                        sh 'python3 ./prediction.py'
                        archiveArtifacts artifacts: 'model.pth, predictions.txt', onlyIfSuccessful: true
                    }
              }
            }
        }
+        stage('Experiments') {
+            steps {
+              script {
+               def customImage = docker.build("custom-image")
+                    customImage.inside {
+                        sh 'python3 ./sacred_model.py'
+                        archiveArtifacts artifacts: 'experiments', onlyIfSuccessful: true
+                    }
+              }
+            }
+        }
     }
 }
--- a/covtype.csv
+++ b/covtype.csv
--- a/covtype.csv.dvc
+++ b/covtype.csv.dvc
@ -0,0 +1,5 @@
+outs:
+- md5: e88c3c209db2e8982e07c43462d67c87
+  size: 75170064
+  hash: md5
+  path: covtype.csv
--- a/dvc.yaml
+++ b/dvc.yaml
@ -0,0 +1,29 @@
+stages:
+  prepare_data:
+    cmd: python ./IUM_2.py
+    deps:
+      - create-dataset.py
+      - covtype.csv
+    outs:
+      - forest_train.csv
+      - forest_test.csv
+      - forest_val.csv
+
+  train_model:
+    cmd: python ./model.py
+    deps:
+      - model.py
+      - forest_train.csv
+      - forest_test.csv
+      - forest_val.csv
+    outs:
+      - model.pth
+
+  evaluate_model:
+    cmd: python ./prediction.py
+    deps:
+      - prediction.py
+      - model.pth
+      - forest_test.csv
+    outs:
+      - predictions.txt
--- a/environment.yml
+++ b/environment.yml
@ -0,0 +1,189 @@
+name: IUM
+channels:
+  - defaults
+dependencies:
+  - _tflow_select=2.3.0=mkl
+  - abseil-cpp=20211102.0=hd77b12b_0
+  - absl-py=2.1.0=py310haa95532_0
+  - aiohttp=3.9.5=py310h2bbff1b_0
+  - aiosignal=1.2.0=pyhd3eb1b0_0
+  - alembic=1.8.1=py310haa95532_0
+  - aniso8601=9.0.1=pyhd3eb1b0_0
+  - arrow-cpp=11.0.0=h2c9b28c_2
+  - astunparse=1.6.3=py_0
+  - async-timeout=4.0.3=py310haa95532_0
+  - attrs=23.1.0=py310haa95532_0
+  - aws-c-common=0.4.57=ha925a31_1
+  - aws-c-event-stream=0.1.6=hd77b12b_5
+  - aws-checksums=0.1.9=ha925a31_0
+  - aws-sdk-cpp=1.8.185=hd77b12b_0
+  - bcrypt=3.2.0=py310h2bbff1b_1
+  - blas=1.0=mkl
+  - blinker=1.6.2=py310haa95532_0
+  - boost-cpp=1.82.0=h59b6b97_2
+  - bottleneck=1.3.7=py310h9128911_0
+  - brotli=1.0.9=h2bbff1b_8
+  - brotli-bin=1.0.9=h2bbff1b_8
+  - brotli-python=1.0.9=py310hd77b12b_8
+  - bzip2=1.0.8=h2bbff1b_6
+  - c-ares=1.19.1=h2bbff1b_0
+  - ca-certificates=2024.3.11=haa95532_0
+  - cachetools=5.3.3=py310haa95532_0
+  - certifi=2024.2.2=py310haa95532_0
+  - cffi=1.16.0=py310h2bbff1b_1
+  - charset-normalizer=2.0.4=pyhd3eb1b0_0
+  - click=8.1.7=py310haa95532_0
+  - cloudpickle=2.2.1=py310haa95532_0
+  - colorama=0.4.6=py310haa95532_0
+  - contourpy=1.2.0=py310h59b6b97_0
+  - cryptography=41.0.3=py310h3438e0d_0
+  - cycler=0.11.0=pyhd3eb1b0_0
+  - docker-py=7.0.0=py310haa95532_0
+  - entrypoints=0.4=py310haa95532_0
+  - flask=2.2.5=py310haa95532_0
+  - flatbuffers=2.0.0=h6c2663c_0
+  - fonttools=4.51.0=py310h2bbff1b_0
+  - freetype=2.12.1=ha860e81_0
+  - frozenlist=1.4.0=py310h2bbff1b_0
+  - gast=0.4.0=pyhd3eb1b0_0
+  - gflags=2.2.2=hd77b12b_1
+  - giflib=5.2.1=h8cc25b3_3
+  - gitdb=4.0.7=pyhd3eb1b0_0
+  - gitpython=3.1.37=py310haa95532_0
+  - glog=0.5.0=hd77b12b_1
+  - google-auth=2.29.0=py310haa95532_0
+  - google-auth-oauthlib=0.4.4=pyhd3eb1b0_0
+  - google-pasta=0.2.0=pyhd3eb1b0_0
+  - graphene=3.3=py310haa95532_0
+  - graphql-core=3.2.3=py310haa95532_1
+  - graphql-relay=3.2.0=py310haa95532_0
+  - greenlet=3.0.1=py310hd77b12b_0
+  - grpc-cpp=1.48.2=hf108199_0
+  - grpcio=1.48.2=py310hf108199_0
+  - h5py=3.11.0=py310hed405ee_0
+  - hdf5=1.12.1=h51c971a_3
+  - icc_rt=2022.1.0=h6049295_2
+  - icu=58.2=ha925a31_3
+  - idna=3.7=py310haa95532_0
+  - importlib-metadata=7.0.1=py310haa95532_0
+  - intel-openmp=2023.1.0=h59b6b97_46320
+  - itsdangerous=2.0.1=pyhd3eb1b0_0
+  - jinja2=3.1.3=py310haa95532_0
+  - joblib=1.4.0=py310haa95532_0
+  - jpeg=9e=h2bbff1b_1
+  - keras=2.10.0=py310haa95532_0
+  - keras-preprocessing=1.1.2=pyhd3eb1b0_0
+  - kiwisolver=1.4.4=py310hd77b12b_0
+  - krb5=1.20.1=h5b6d351_1
+  - lcms2=2.12=h83e58a3_0
+  - lerc=3.0=hd77b12b_0
+  - libboost=1.82.0=h3399ecb_2
+  - libbrotlicommon=1.0.9=h2bbff1b_8
+  - libbrotlidec=1.0.9=h2bbff1b_8
+  - libbrotlienc=1.0.9=h2bbff1b_8
+  - libclang=14.0.6=default_hb5a9fac_1
+  - libclang13=14.0.6=default_h8e68704_1
+  - libcurl=8.7.1=h86230a5_0
+  - libdeflate=1.17=h2bbff1b_1
+  - libevent=2.1.12=hcc03200_0
+  - libffi=3.4.4=hd77b12b_1
+  - libpng=1.6.39=h8cc25b3_0
+  - libpq=12.15=hb652d5d_1
+  - libprotobuf=3.20.3=h23ce68f_0
+  - libssh2=1.10.0=hcd4344a_2
+  - libthrift=0.15.0=he49ee6e_2
+  - libtiff=4.5.1=hd77b12b_0
+  - libwebp-base=1.3.2=h2bbff1b_0
+  - lz4-c=1.9.4=h2bbff1b_1
+  - mako=1.2.3=py310haa95532_0
+  - markdown=3.4.1=py310haa95532_0
+  - markupsafe=2.1.3=py310h2bbff1b_0
+  - matplotlib=3.8.4=py310haa95532_0
+  - matplotlib-base=3.8.4=py310h4ed8f06_0
+  - mkl=2023.1.0=h6b88ed4_46358
+  - mkl-service=2.4.0=py310h2bbff1b_1
+  - mkl_fft=1.3.8=py310h2bbff1b_0
+  - mkl_random=1.2.4=py310h59b6b97_0
+  - mlflow=2.12.2=py310hd1fac3c_0
+  - multidict=6.0.4=py310h2bbff1b_0
+  - numexpr=2.8.7=py310h2cd9be0_0
+  - numpy=1.26.4=py310h055cbcc_0
+  - numpy-base=1.26.4=py310h65a83cf_0
+  - oauthlib=3.2.2=py310haa95532_0
+  - openjpeg=2.4.0=h4fc8c34_0
+  - openssl=1.1.1w=h2bbff1b_0
+  - opt_einsum=3.3.0=pyhd3eb1b0_1
+  - orc=1.7.4=h623e30f_1
+  - packaging=23.2=py310haa95532_0
+  - pandas=2.2.1=py310h5da7b33_0
+  - paramiko=2.8.1=pyhd3eb1b0_0
+  - pillow=10.3.0=py310h2bbff1b_0
+  - pip=24.0=py310haa95532_0
+  - ply=3.11=py310haa95532_0
+  - protobuf=3.20.3=py310hd77b12b_0
+  - pyarrow=11.0.0=py310h790e06d_1
+  - pyasn1=0.4.8=pyhd3eb1b0_0
+  - pyasn1-modules=0.2.8=py_0
+  - pybind11-abi=5=hd3eb1b0_0
+  - pycparser=2.21=pyhd3eb1b0_0
+  - pyjwt=2.8.0=py310haa95532_0
+  - pynacl=1.5.0=py310h8cc25b3_0
+  - pyopenssl=23.2.0=py310haa95532_0
+  - pyqt=5.15.10=py310hd77b12b_0
+  - pyqt5-sip=12.13.0=py310h2bbff1b_0
+  - pysocks=1.7.1=py310haa95532_0
+  - python=3.10.13=h966fe2a_0
+  - python-dateutil=2.9.0post0=py310haa95532_0
+  - python-flatbuffers=2.0=pyhd3eb1b0_0
+  - python-tzdata=2023.3=pyhd3eb1b0_0
+  - pytz=2024.1=py310haa95532_0
+  - pywin32=305=py310h2bbff1b_0
+  - pyyaml=6.0.1=py310h2bbff1b_0
+  - qt-main=5.15.2=h6072711_9
+  - querystring_parser=1.2.4=py310haa95532_0
+  - re2=2022.04.01=hd77b12b_0
+  - requests=2.31.0=py310haa95532_1
+  - requests-oauthlib=1.3.0=py_0
+  - rsa=4.7.2=pyhd3eb1b0_1
+  - scikit-learn=1.4.2=py310h4ed8f06_1
+  - scipy=1.13.0=py310h8640f81_0
+  - setuptools=69.5.1=py310haa95532_0
+  - sip=6.7.12=py310hd77b12b_0
+  - six=1.16.0=pyhd3eb1b0_1
+  - smmap=4.0.0=pyhd3eb1b0_0
+  - snappy=1.1.10=h6c2663c_1
+  - sqlalchemy=2.0.25=py310h2bbff1b_0
+  - sqlite=3.45.3=h2bbff1b_0
+  - sqlparse=0.4.4=py310haa95532_0
+  - tbb=2021.8.0=h59b6b97_0
+  - tensorboard=2.10.0=py310haa95532_0
+  - tensorboard-data-server=0.6.1=py310haa95532_0
+  - tensorboard-plugin-wit=1.8.1=py310haa95532_0
+  - tensorflow=2.10.0=mkl_py310hd99672f_0
+  - tensorflow-base=2.10.0=mkl_py310h6a7f48e_0
+  - tensorflow-estimator=2.10.0=py310haa95532_0
+  - termcolor=2.1.0=py310haa95532_0
+  - threadpoolctl=2.2.0=pyh0d69192_0
+  - tk=8.6.14=h0416ee5_0
+  - tornado=6.3.3=py310h2bbff1b_0
+  - typing-extensions=4.11.0=py310haa95532_0
+  - typing_extensions=4.11.0=py310haa95532_0
+  - tzdata=2024a=h04d1e81_0
+  - unicodedata2=15.1.0=py310h2bbff1b_0
+  - urllib3=2.2.1=py310haa95532_0
+  - utf8proc=2.6.1=h2bbff1b_1
+  - vc=14.2=h2eaa2aa_1
+  - vs2015_runtime=14.29.30133=h43f2093_3
+  - waitress=2.0.0=pyhd3eb1b0_0
+  - websocket-client=1.8.0=py310haa95532_0
+  - werkzeug=2.3.8=py310haa95532_0
+  - wheel=0.43.0=py310haa95532_0
+  - win_inet_pton=1.1.0=py310haa95532_0
+  - wrapt=1.14.1=py310h2bbff1b_0
+  - xz=5.4.6=h8cc25b3_1
+  - yaml=0.2.5=he774522_0
+  - yarl=1.9.3=py310h2bbff1b_0
+  - zipp=3.17.0=py310haa95532_0
+  - zlib=1.2.13=h8cc25b3_1
+  - zstd=1.5.5=hd43e919_2
+prefix: C:\Users\Genos\miniconda3\envs\IUM
--- a/mlflow/Dockerfile
+++ b/mlflow/Dockerfile
@ -0,0 +1,15 @@
+FROM python:3.10
+
+RUN pip install --upgrade pip
+
+RUN pip3 install mlflow
+RUN pip3 install scikit-learn
+RUN pip3 install pandas
+RUN pip3 install numpy 
+RUN pip3 install torch
+
+COPY mlflow_model.py .
+COPY mlflow_prediction.py .
+COPY forest_test.csv .
+COPY forest_train.csv .
+COPY forest_val.csv .
--- a/mlflow/MLProject
+++ b/mlflow/MLProject
@ -0,0 +1,13 @@
+name: mlflow_464914
+
+# conda_env: conda.yaml #ścieżka do pliku conda.yaml z definicją środowisk
+docker_env:
+ image: mlflow_image
+
+entry_points:
+  main:
+    parameters:
+      epochs: {type: int, default: 10}
+    command: "python mlflow_model.py {epochs}"
+  test:
+    command: "python mlflow_prediction.py"
--- a/mlflow/mlflow_model.py
+++ b/mlflow/mlflow_model.py
@ -0,0 +1,120 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, Dataset
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder
+import torch.nn.functional as F
+import mlflow
+import mlflow.sklearn
+import sys
+
+mlflow.set_tracking_uri("http://localhost:5000")
+mlflow.set_experiment("s464914")
+ 
+
+device = (
+    "cuda"
+    if torch.cuda.is_available()
+    else "cpu"
+)
+
+class Model(nn.Module):
+    def __init__(self, input_features=54, hidden_layer1=25, hidden_layer2=30, output_features=8):
+        super().__init__()
+        self.fc1 = nn.Linear(input_features,output_features)
+        self.bn1 = nn.BatchNorm1d(hidden_layer1)  # Add batch normalization
+        self.fc2 = nn.Linear(hidden_layer1, hidden_layer2)
+        self.bn2 = nn.BatchNorm1d(hidden_layer2)  # Add batch normalization
+        self.out = nn.Linear(hidden_layer2, output_features)
+        
+    def forward(self, x):
+        x = F.relu(self.fc1(x))  # Apply batch normalization after first linear layer
+        #x = F.relu(self.bn2(self.fc2(x)))  # Apply batch normalization after second linear layer
+        #x = self.out(x)
+        return x
+
+def main():
+    epochs = int(sys.argv[1])
+    forest_train = pd.read_csv('forest_train.csv')
+    forest_val = pd.read_csv('forest_val.csv')
+
+    print(forest_train.head())
+
+
+    X_train = forest_train.drop(columns=['Cover_Type']).values
+    y_train = forest_train['Cover_Type'].values
+
+    X_val = forest_val.drop(columns=['Cover_Type']).values
+    y_val = forest_val['Cover_Type'].values
+
+
+    # Initialize model, loss function, and optimizer
+    model = Model().to(device)
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters(), lr=0.001)
+
+    # Convert to PyTorch tensors
+    X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
+    y_train = torch.tensor(y_train, dtype=torch.long).to(device)
+    X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
+    y_val = torch.tensor(y_val, dtype=torch.long).to(device)
+
+    # Create DataLoader
+    train_loader = DataLoader(list(zip(X_train, y_train)), batch_size=64, shuffle=True)
+    val_loader = DataLoader(list(zip(X_val, y_val)), batch_size=64)
+
+    with mlflow.start_run() as run:
+        # Training loop
+        for epoch in range(epochs):
+            model.train()  # Set model to training mode
+            running_loss = 0.0
+            for inputs, labels in train_loader:
+                inputs, labels = inputs.to(device), labels.to(device)
+
+                optimizer.zero_grad()
+
+                outputs = model(inputs)
+                loss = criterion(outputs, labels)
+                loss.backward()
+                optimizer.step()
+
+                running_loss += loss.item() * inputs.size(0)
+
+            # Calculate training loss
+            epoch_loss = running_loss / len(train_loader.dataset)
+
+            # Validation
+            model.eval()  # Set model to evaluation mode
+            val_running_loss = 0.0
+            correct = 0
+            total = 0
+            with torch.no_grad():
+                for inputs, labels in val_loader:
+                    inputs, labels = inputs.to(device), labels.to(device)
+
+                    outputs = model(inputs)
+                    val_loss = criterion(outputs, labels)
+                    val_running_loss += val_loss.item() * inputs.size(0)
+
+                    _, predicted = torch.max(outputs, 1)
+                    total += labels.size(0)
+                    correct += (predicted == labels).sum().item()
+
+            # Calculate validation loss and accuracy
+            val_epoch_loss = val_running_loss / len(val_loader.dataset)
+            val_accuracy = correct / total
+
+            print(f"Epoch {epoch+1}/{epochs}, "
+                f"Train Loss: {epoch_loss:.4f}, "
+                f"Val Loss: {val_epoch_loss:.4f}, "
+                f"Val Accuracy: {val_accuracy:.4f}")
+            
+
+        torch.save(model.state_dict(), 'model.pth')
+        mlflow.log_param("epochs", epochs)
+
+
+if __name__ == "__main__":
+    main()
--- a/mlflow/mlflow_prediction.py
+++ b/mlflow/mlflow_prediction.py
@ -0,0 +1,95 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, Dataset
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder
+import torch.nn.functional as F
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error
+import numpy as np
+import mlflow
+import mlflow.sklearn
+
+mlflow.set_tracking_uri("http://localhost:5000")
+mlflow.set_experiment("s464914")
+
+device = (
+    "cuda"
+    if torch.cuda.is_available()
+    else "cpu"
+)
+
+class Model(nn.Module):
+    def __init__(self, input_features=54, hidden_layer1=25, hidden_layer2=30, output_features=8):
+        super().__init__()
+        self.fc1 = nn.Linear(input_features,output_features)
+        self.bn1 = nn.BatchNorm1d(hidden_layer1)  # Add batch normalization
+        self.fc2 = nn.Linear(hidden_layer1, hidden_layer2)
+        self.bn2 = nn.BatchNorm1d(hidden_layer2)  # Add batch normalization
+        self.out = nn.Linear(hidden_layer2, output_features)
+        
+    def forward(self, x):
+        x = F.relu(self.fc1(x)) 
+        return x
+
+def load_model(model, model_path):
+    model.load_state_dict(torch.load(model_path))
+    model.eval()
+
+def predict(model, input_data):
+    # Convert input data to PyTorch tensor
+    
+    # Perform forward pass
+    with torch.no_grad():
+        output = model(input_data)
+
+    _, predicted_class = torch.max(output, 0)
+    
+    return predicted_class.item()  # Return the predicted class label
+
+def main():
+    with mlflow.start_run() as run:
+        forest_test = pd.read_csv('forest_test.csv')
+
+        X_test = forest_test.drop(columns=['Cover_Type']).values
+        y_test = forest_test['Cover_Type'].values
+
+        X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
+
+        model = Model().to(device)
+        model_path = 'model.pth'  # Path to your saved model file
+        load_model(model, model_path)
+
+        predictions = []
+        true_labels = []
+        with torch.no_grad():
+            for input_data, target in zip(X_test, y_test):
+                output = model(input_data)
+                _, predicted_class = torch.max(output, 0)
+                prediction_entry = f"predicted: {predicted_class.item()} true_label: {target}"
+                predictions.append(prediction_entry)
+                true_labels.append()
+                if predicted_class.item() == target:
+                    true_labels.append(target)
+
+
+        with open(r'predictions.txt', 'w') as fp:
+            for item in predictions:
+                # write each item on a new line
+                fp.write("%s\n" % item)
+
+        accuracy = accuracy_score(true_labels, predictions)
+        precision_micro = precision_score(true_labels, predictions, average='micro')
+        recall_micro = recall_score(true_labels, predictions, average='micro')
+        f1_micro = f1_score(true_labels, predictions, average='micro')
+        rmse = np.sqrt(mean_squared_error(true_labels, predictions))
+
+        mlflow.log_metric("accuracy", accuracy)
+        mlflow.log_metric("precision_micro", precision_micro)
+        mlflow.log_metric("recall_micro", recall_micro)
+        mlflow.log_metric("f1_micro", f1_micro)
+        mlflow.log_metric("rmse", rmse)
+
+if __name__ == "__main__":
+    main()
--- a/model.py
+++ b/model.py
@ -6,7 +6,6 @@ import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import LabelEncoder
 import torch.nn.functional as F
-import sys
 

 device = (
@ -31,9 +30,6 @@ class Model(nn.Module):
        return x

 def main():
-    epochs = int(sys.argv[1])
-    print(epochs)
-
    forest_train = pd.read_csv('forest_train.csv')
    forest_val = pd.read_csv('forest_val.csv')

@ -63,6 +59,7 @@ def main():
    val_loader = DataLoader(list(zip(X_val, y_val)), batch_size=64)

    # Training loop
+    epochs = 10
    for epoch in range(epochs):
        model.train()  # Set model to training mode
        running_loss = 0.0
--- a/sacred_model.py
+++ b/sacred_model.py
@ -0,0 +1,126 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, Dataset
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder
+import torch.nn.functional as F
+from sacred import Experiment
+from sacred.observers import FileStorageObserver, MongoObserver
+ 
+
+device = (
+    "cuda"
+    if torch.cuda.is_available()
+    else "cpu"
+)
+
+ex = Experiment("464914", interactive=True, save_git_info=False)
+ex.observers.append(FileStorageObserver('experiments'))
+ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017',
+                                  db_name='sacred')) 
+
+class Model(nn.Module):
+    def __init__(self, input_features=54, hidden_layer1=25, hidden_layer2=30, output_features=8):
+        super().__init__()
+        self.fc1 = nn.Linear(input_features,output_features)
+        self.bn1 = nn.BatchNorm1d(hidden_layer1)  # Add batch normalization
+        self.fc2 = nn.Linear(hidden_layer1, hidden_layer2)
+        self.bn2 = nn.BatchNorm1d(hidden_layer2)  # Add batch normalization
+        self.out = nn.Linear(hidden_layer2, output_features)
+        
+    def forward(self, x):
+        x = F.relu(self.fc1(x))  # Apply batch normalization after first linear layer
+        #x = F.relu(self.bn2(self.fc2(x)))  # Apply batch normalization after second linear layer
+        #x = self.out(x)
+        return x
+    
+@ex.capture
+def capture_params(epochs):
+    print(f"epochs: {epochs}")
+
+@ex.main
+def main(_run):
+    forest_train_ex = ex.open_resource('forest_train.csv')
+    forest_val_ex = ex.open_resource('forest_val.csv')
+
+    forest_val = pd.read_csv('forest_val.csv')
+    forest_train = pd.read_csv('forest_train.csv')
+
+    X_train = forest_train.drop(columns=['Cover_Type']).values
+    y_train = forest_train['Cover_Type'].values
+
+    X_val = forest_val.drop(columns=['Cover_Type']).values
+    y_val = forest_val['Cover_Type'].values
+
+
+    # Initialize model, loss function, and optimizer
+    model = Model().to(device)
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters(), lr=0.001)
+
+    # Convert to PyTorch tensors
+    X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
+    y_train = torch.tensor(y_train, dtype=torch.long).to(device)
+    X_val = torch.tensor(X_val, dtype=torch.float32).to(device)
+    y_val = torch.tensor(y_val, dtype=torch.long).to(device)
+
+    # Create DataLoader
+    train_loader = DataLoader(list(zip(X_train, y_train)), batch_size=64, shuffle=True)
+    val_loader = DataLoader(list(zip(X_val, y_val)), batch_size=64)
+
+    # Training loop
+    epochs = 10
+    for epoch in range(epochs):
+        model.train()  # Set model to training mode
+        running_loss = 0.0
+        for inputs, labels in train_loader:
+            inputs, labels = inputs.to(device), labels.to(device)
+
+            optimizer.zero_grad()
+
+            outputs = model(inputs)
+            loss = criterion(outputs, labels)
+            loss.backward()
+            optimizer.step()
+
+            running_loss += loss.item() * inputs.size(0)
+
+        # Calculate training loss
+        epoch_loss = running_loss / len(train_loader.dataset)
+
+        # Validation
+        model.eval()  # Set model to evaluation mode
+        val_running_loss = 0.0
+        correct = 0
+        total = 0
+        with torch.no_grad():
+            for inputs, labels in val_loader:
+                inputs, labels = inputs.to(device), labels.to(device)
+
+                outputs = model(inputs)
+                val_loss = criterion(outputs, labels)
+                val_running_loss += val_loss.item() * inputs.size(0)
+
+                _, predicted = torch.max(outputs, 1)
+                total += labels.size(0)
+                correct += (predicted == labels).sum().item()
+
+        # Calculate validation loss and accuracy
+        val_epoch_loss = val_running_loss / len(val_loader.dataset)
+        val_accuracy = correct / total
+
+        print(f"Epoch {epoch+1}/{epochs}, "
+              f"Train Loss: {epoch_loss:.4f}, "
+              f"Val Loss: {val_epoch_loss:.4f}, "
+              f"Val Accuracy: {val_accuracy:.4f}")
+        _run.log_scalar("train loss", epoch_loss)
+        _run.log_scalar("val loss", val_epoch_loss)
+        
+
+    capture_params(epochs)
+    torch.save(model.state_dict(), 'model.pth')
+    ex.add_artifact("model.pth")
+
+ex.run()
--- a/sacredboard/Dockerfile
+++ b/sacredboard/Dockerfile
@ -0,0 +1,5 @@
+FROM python:3.6-jessie
+
+RUN pip install https://github.com/chovanecm/sacredboard/archive/develop.zip
+
+ENTRYPOINT sacredboard -mu mongodb://$MONGO_INITDB_ROOT_USERNAME:$MONGO_INITDB_ROOT_PASSWORD@mongo:27017/?authMechanism=SCRAM-SHA-1 $MONGO_DATABASE
Author	SHA1	Message	Date
Alicja Szulecka	40d0c3e849	IUM_9	2024-05-28 21:52:31 +02:00
Alicja Szulecka	0f254aa5fa	Create dvc.yaml	2024-05-28 21:43:31 +02:00
Alicja Szulecka	abb213675e	Update config	2024-05-28 21:34:33 +02:00
Alicja Szulecka	3cbfc6aca1	Delete IUM_2-checkpoint.ipynb	2024-05-28 21:26:27 +02:00
Alicja Szulecka	80ebb3c0da	dvc	2024-05-28 21:22:43 +02:00
Alicja Szulecka	281c3c6a86	sacredboard	2024-05-28 21:22:26 +02:00
Alicja Szulecka	ae632b1ea3	stop tracking covtype.csv	2024-05-28 21:20:04 +02:00
Alicja Szulecka	7309d49e67	Delete conda,yaml	2024-05-06 22:08:57 +02:00
Alicja Szulecka	c4ce89938c	mlflow	2024-05-06 22:08:05 +02:00
Alicja Szulecka	ed9927d7a1	mlflow	2024-05-06 17:27:28 +02:00
Alicja Szulecka	8ab682be76	Update Dockerfile	2024-05-05 14:12:55 +02:00
Alicja Szulecka	7ff2f9711e	sacred	2024-05-05 14:07:27 +02:00