Run example with Dockerfile to run the code

Merge branch 'main' into basic-model-setup
Launch settings define queue setup for agents
2024-05-05 18:23:13 +02:00 · 2024-05-05 18:21:24 +02:00 · 2024-05-05 18:17:51 +02:00 · 2024-05-02 23:29:00 +02:00
15 changed files with 242 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,4 @@
+data
+archive.zip
+.ipynb_checkpoints
+__pycache__
--- a/DataManager.py
+++ b/DataManager.py
@ -0,0 +1,55 @@
+import glob
+import shutil
+import cv2
+from zipfile import ZipFile
+import os
+import wget
+
+mainPath="data/"
+pathToTrainAndValidDate = mainPath + "%s/**/*.*"
+pathToTestDataset = mainPath + "/test"
+originalDatasetName = "original dataset"
+
+class DataManager:
+
+    def downloadData(self):
+        if not os.path.isfile("archive.zip"):
+            wget.download("https://storage.googleapis.com/kaggle-data-sets/78313/182633/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20240502%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20240502T181500Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=87d0661313e358206b6e10d44f135d41e23501d601e58b1e8236ca28a82ccc434534564b45baa84c4d829dd1995ff384d51fe5dba3f543d00eb0763169fd712c6c8f91bb4f298db38a19b31b2d489798a9723a271aa4108d7b93345c5a64a7ef00b9b8f27d1d5f728e373c870f0287eb89bc747941f0aeeb4703c288059e2e07b7ece3a83114a9607276874a90d4ec96dde06fddb94a0d3af72848565661b1404e3ea248eeebf46374daada7df1f37db7d62b21b4ac90706ea64cc74200a58f35bfe379703e7691aeda9e39635b02f58a9f8399fa64b031b1a9bccd7f109d256c6f4886ef94fcdc11034d6da13c0f1d4d8b97cabdd295862a5107b587824ebe8")
+
+    def unzipData(self, fileName, pathToExtract):
+        if not os.path.exists(mainPath):
+            os.makedirs("data")    
+        ZipFile(fileName).extractall(mainPath + pathToExtract)
+        shutil.move("data/original dataset/test/test", "data", copy_function = shutil.copytree)
+        shutil.move("data/original dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/train", "data/original dataset/train", copy_function = shutil.copytree)
+        shutil.move("data/original dataset/New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)/valid", "data/original dataset/valid", copy_function = shutil.copytree)
+        shutil.rmtree("data/original dataset/New Plant Diseases Dataset(Augmented)")
+        shutil.rmtree("data/Detection-of-plant-diseases/data/original dataset/test")
+
+    def writeImageToGivenPath(self, image, path):
+        os.makedirs(path.rsplit('/', 1)[0], exist_ok=True)
+        cv2.imwrite(path, image)
+
+    def resizeDataset(self, soruceDatasetName, width, height):
+        if not os.path.exists(mainPath + "resized dataset"):
+            for file in glob.glob(pathToTrainAndValidDate % soruceDatasetName, recursive=True):
+                pathToFile = file.replace("\\","/")
+                image = cv2.imread(pathToFile)
+                image = cv2.resize(image, (width, height))
+                newPath = pathToFile.replace(soruceDatasetName,"resized dataset")
+                self.writeImageToGivenPath(image,newPath)
+
+    def sobelx(self, soruceDatasetName):
+        if not os.path.exists(mainPath + "sobel dataset"):
+            for file in glob.glob(pathToTrainAndValidDate % soruceDatasetName, recursive=True):
+                pathToFile = file.replace("\\","/")
+                image = cv2.imread(pathToFile)
+                sobel = cv2.Sobel(image,cv2.CV_64F,1,0,ksize=5) 
+                newPath = pathToFile.replace(soruceDatasetName,"sobel dataset")
+                self.writeImageToGivenPath(sobel,newPath)
+
+dataManager = DataManager()
+dataManager.downloadData()
+dataManager.unzipData("archive.zip","original dataset")
+dataManager.resizeDataset("original dataset", 64, 64)
+dataManager.sobelx("resized dataset")
--- a/launch_settings.yaml
+++ b/launch_settings.yaml
@ -0,0 +1,10 @@
+max_jobs: 1
+
+entity: uczenie-maszynowe-projekt
+
+queues:
+  - GPU queue 1
+  - GPU queue 2
+
+builder:
+  type: docker
--- a/src/.python-version
+++ b/src/.python-version
@ -0,0 +1 @@
+3.10.12
--- a/src/Dockerfile
+++ b/src/Dockerfile
@ -0,0 +1,41 @@
+FROM ubuntu:22.04
+
+# Packages
+RUN apt-get update && apt-get upgrade && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 
+    curl liblzma-dev python-tk python3-tk tk-dev libssl-dev libffi-dev libncurses5-dev zlib1g zlib1g-dev \
+    libreadline-dev libbz2-dev libsqlite3-dev make gcc curl git-all wget python3-openssl gnupg2
+
+# Setup CUDA
+RUN apt-key del 7fa2af80 && \
+    wget https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64/cuda-wsl-ubuntu.pin && \
+    mv cuda-wsl-ubuntu.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \
+    wget https://developer.download.nvidia.com/compute/cuda/12.2.2/local_installers/cuda-repo-wsl-ubuntu-12-2-local_12.2.2-1_amd64.deb && \
+    dpkg -i cuda-repo-wsl-ubuntu-12-2-local_12.2.2-1_amd64.deb && \
+    cp /var/cuda-repo-wsl-ubuntu-12-2-local/cuda-*-keyring.gpg /usr/share/keyrings/ && \
+    apt-get update && \
+    apt-get -y install cuda-toolkit-12-2
+
+# Pyenv
+ENV PYENV_ROOT="$HOME/.pyenv"
+ENV PATH="$PYENV_ROOT/bin:$PYENV_ROOT/versions/3.10.12/bin:$PATH"
+
+RUN curl https://pyenv.run | bash
+RUN pyenv install 3.10.12 && \
+    pyenv global 3.10.12 && \
+    echo 'eval "$(pyenv init --path)"' >> ~/.bashrc && \
+    echo 'eval "$(pyenv virtualenv-init -)"' >> ~/.bashrc
+
+SHELL ["/bin/bash", "-c"]
+
+WORKDIR /app
+ADD ./requirements.txt /app/requirements.txt
+RUN pip install -r requirements.txt
+
+ENV CUDNN_PATH="/.pyenv/versions/3.10.12/lib/python3.10/site-packages/nvidia/cudnn/"
+ENV LD_LIBRARY_PATH="$CUDNN_PATH/lib":"/usr/local/cuda-12.2/lib64"
+ENV PATH="$PATH":"/usr/local/cuda-12.2/bin"
+
+COPY . .
+
+ARG api_key
+RUN wandb login $api_key
--- a/src/README.md
+++ b/src/README.md
@ -0,0 +1,12 @@
+# Setup
+
+1. Install Docker on your local system
+2. Build docker image and run the shell
+3. Get your API key from https://wandb.ai/settings#api, docker will automatically connect to WanDB.
+
+```bash
+docker build -t gpu api_key="<wandb_api_key>" .
+docker run --rm -it --gpus all --entrypoint /bin/bash gpu
+```
+
+4. To double check if tensorflow is configured properly run `python3 gpu_check.py`.
--- a/src/init.py
+++ b/src/init.py
--- a/src/gpu_check.py
+++ b/src/gpu_check.py
@ -0,0 +1,18 @@
+try:
+    import tensorflow
+except ImportError:
+    print("Tensorflow is not installed, install requied packages from requirements.txt")
+    exit(1)
+
+import tensorflow
+
+print("If you see the tensor result, then the Tensorflow is available.")
+rs = tensorflow.reduce_sum(tensorflow.random.normal([1000, 1000]))
+print(rs)
+
+gpus = tensorflow.config.list_physical_devices('GPU')
+if len(gpus) == 0:
+    print("No GPU available.")
+else:
+    print(f"GPUs available: {len(gpus)}")
+    print(gpus)
--- a/src/main.py
+++ b/src/main.py
@ -0,0 +1,7 @@
+
+from model.test_model import TestModel
+
+if __name__ == "__main__":
+    model = TestModel()
+    history = model.fit()
+    model.save()
--- a/src/model/init.py
+++ b/src/model/init.py
--- a/src/model/test_model.py
+++ b/src/model/test_model.py
@ -0,0 +1,65 @@
+import random
+import tensorflow as tf
+
+from wandb_utils.config import Config
+from wandb.keras import WandbMetricsLogger
+
+
+class TestModel:
+    def __init__(self):
+        self.config = Config(epoch=8, batch_size=256).config()
+        self.config.learning_rate = 0.01
+        # Define specific configuration below, they will be visible in the W&B interface
+        # Start of config
+        self.config.layer_1 = 512
+        self.config.activation_1 = "relu"
+        self.config.dropout = random.uniform(0.01, 0.80),
+        self.config.layer_2 = 10
+        self.config.activation_2 = "softmax"
+        self.config.optimizer = "sgd"
+        self.config.loss = "sparse_categorical_crossentropy"
+        self.config.metrics = ["accuracy"]
+        # End
+        self.model = self.__build_model()
+        self.__compile()
+        self.__load_dataset()
+
+    def __build_model(self):
+        return tf.keras.models.Sequential([
+            tf.keras.layers.Input(shape=(28,28)),
+            tf.keras.layers.Dense(self.config.layer_1, activation=self.config.activation_1),
+            tf.keras.layers.Dropout(self.config.dropout),
+            tf.keras.layers.Dense(self.config.layer_2, activation=self.config.activation_2)
+        ])
+    
+    def __compile(self):
+        self.model.compile(
+            optimizer=self.config.optimizer,
+            loss=self.config.loss,
+            metrics=self.config.metrics,
+        )
+    def __load_dataset(self):
+        mnist = tf.keras.datasets.mnist
+        (self.x_train, self.y_train), (self.x_test, self.y_test) = mnist.load_data()
+        self.x_train, self.x_test = self.x_train / 255.0, self.x_test / 255.0
+        self.x_train, self.y_train = self.x_train[::5], self.y_train[::5]
+        self.x_test, self.y_test = self.x_test[::20], self.y_test[::20]
+
+    def fit(self):
+        wandb_callbacks = [
+            WandbMetricsLogger(log_freq=5),
+            # Not supported with Keras >= 3.0.0
+            # WandbModelCheckpoint(filepath="models"), 
+        ]
+        return self.model.fit(
+            x=self.x_train,
+            y=self.y_train,
+            epochs=self.config.epoch,
+            batch_size=self.config.batch_size,
+            validation_data=(self.x_test, self.y_test),
+            callbacks=wandb_callbacks
+        )
+
+    def save(self):
+        self.model.save("test_model/final_model.keras")
+
--- a/src/requirements.txt
+++ b/src/requirements.txt
@ -0,0 +1,7 @@
+tensorflow[and-cuda]==2.16.1
+tensorflow-io==0.37.0
+numpy==1.26.4
+opencv-python==4.9.0.80
+numpy==1.26.4
+wget==3.2
+wandb==0.16.6
--- a/src/tests/init.py
+++ b/src/tests/init.py
--- a/src/wandb_utils/init.py
+++ b/src/wandb_utils/init.py
--- a/src/wandb_utils/config.py
+++ b/src/wandb_utils/config.py
@ -0,0 +1,22 @@
+import wandb
+
+class Config:
+    def __init__(self, epoch, batch_size):
+        self.epoch = epoch
+        self.batch_size = batch_size
+
+        self.run = wandb.init(
+            project="Detection of plant diseases",
+            config={
+                "epoch": epoch,
+                "batch_size": batch_size,
+            }
+        )
+
+    def config(self):
+        return self.run.config
+
+    def finish(self):
+        self.run.config.finish()
+
+
Author	SHA1	Message	Date
Krzysztof Bojakowski	e75075c141	Run example with Dockerfile to run the code	2024-05-05 18:23:13 +02:00
Krzysztof Bojakowski	e48d6cd31a	Merge branch 'main' into basic-model-setup	2024-05-05 18:21:24 +02:00
Krzysztof Bojakowski	a855567ca9	Launch settings define queue setup for agents	2024-05-05 18:17:51 +02:00
s495728	2f2e961ba3	Add dataManager	2024-05-02 23:29:00 +02:00