Compare commits

...

22 Commits

Author SHA1 Message Date
s495728 ca58640f00 Add progress bars 2024-05-13 09:45:43 +02:00
s495727 476673ad9d secrets.txt added to .gitignore 2024-05-11 20:03:38 +02:00
s495727 8f11d15c12 Merge pull request 'feature/basic-model-setup' (#3) from feature/basic-model-setup into main
Reviewed-on: #3
2024-05-11 20:00:06 +02:00
s495727 c082a3982a Updating project structure, dockerfile + volume setup 2024-05-11 19:53:35 +02:00
s495727 7fb9902340 Merge branch 'feature/basic-model-setup' of https://git.wmi.amu.edu.pl/s495728/Detection-of-plant-diseases into feature/basic-model-setup 2024-05-11 16:15:51 +02:00
mszmyd e9fffa0539 Merge branch 'main' of git.wmi.amu.edu.pl:s495728/Detection-of-plant-diseases into feature/basic-model-setup 2024-05-06 00:10:40 +02:00
mszmyd 7849d9ad51 create test ds 2024-05-06 00:07:09 +02:00
s495727 81dc0f8771 Merge branch 'main' into feature/basic-model-setup 2024-05-05 19:46:13 +02:00
s495733 c70553ec7c Merge pull request 'feature/load-dataset' (#2) from feature/load-dataset into main
Reviewed-on: #2
Reviewed-by: s495727 <krzboj@st.amu.edu.pl>
2024-05-05 19:42:12 +02:00
mszmyd c6f6ae28ca add batch 2024-05-05 19:28:40 +02:00
s495727 040b1d014f Merge pull request 'feature/data_manager' (#1) from feature/data_manager into main
Reviewed-on: #1
Reviewed-by: s495727 <krzboj@st.amu.edu.pl>
2024-05-05 19:22:14 +02:00
mszmyd 1cfb74db6a priv 2024-05-05 19:03:15 +02:00
s495727 09070879ac Moving out notes from README to external docx 2024-05-05 18:51:58 +02:00
s495727 2093f84c5f Small fixes, model wasnt building 2024-05-05 18:36:53 +02:00
s495727 e75075c141 Run example with Dockerfile to run the code 2024-05-05 18:23:13 +02:00
s495727 e48d6cd31a Merge branch 'main' into basic-model-setup 2024-05-05 18:21:24 +02:00
s495727 a855567ca9 Launch settings define queue setup for agents 2024-05-05 18:17:51 +02:00
mszmyd 8e6318b1fe del tfio from req 2024-05-05 13:41:12 +02:00
mszmyd d4b6a714bb add onehot and getattr 2024-05-05 13:25:53 +02:00
mszmyd b7ca0fae45 div by float 2024-05-05 12:44:54 +02:00
mszmyd de27695d53 move funcs to class 2024-05-05 12:41:54 +02:00
mszmyd e3002d5ef8 add loader 2024-05-05 01:20:04 +02:00
22 changed files with 481 additions and 461 deletions

3
.gitignore vendored
View File

@ -1,6 +1,7 @@
secrets.txt
.ipynb_checkpoints
data/
*.zip
# https://github.com/microsoft/vscode-python/blob/main/.gitignore
.DS_Store
.huskyrc.json

36
Dockerfile Normal file
View File

@ -0,0 +1,36 @@
FROM ubuntu:22.04
# Packages
RUN apt-get update && apt-get upgrade && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
curl liblzma-dev python-tk python3-tk tk-dev libssl-dev libffi-dev libncurses5-dev zlib1g zlib1g-dev \
libreadline-dev libbz2-dev libsqlite3-dev make gcc curl git-all wget python3-openssl gnupg2
# Setup CUDA
RUN apt-key del 7fa2af80 && \
wget https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64/cuda-wsl-ubuntu.pin && \
mv cuda-wsl-ubuntu.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \
wget https://developer.download.nvidia.com/compute/cuda/12.2.2/local_installers/cuda-repo-wsl-ubuntu-12-2-local_12.2.2-1_amd64.deb && \
dpkg -i cuda-repo-wsl-ubuntu-12-2-local_12.2.2-1_amd64.deb && \
cp /var/cuda-repo-wsl-ubuntu-12-2-local/cuda-*-keyring.gpg /usr/share/keyrings/ && \
apt-get update && \
apt-get -y install cuda-toolkit-12-2
# Pyenv
ENV PYENV_ROOT="$HOME/.pyenv"
ENV PATH="$PYENV_ROOT/bin:$PYENV_ROOT/versions/3.10.12/bin:$PATH"
RUN curl https://pyenv.run | bash
RUN pyenv install 3.10.12 && \
pyenv global 3.10.12 && \
echo 'eval "$(pyenv init --path)"' >> ~/.bashrc && \
echo 'eval "$(pyenv virtualenv-init -)"' >> ~/.bashrc
SHELL ["/bin/bash", "-c"]
WORKDIR /app
ADD ./requirements.txt /app/requirements.txt
RUN pip install -r requirements.txt
ENV CUDNN_PATH="/.pyenv/versions/3.10.12/lib/python3.10/site-packages/nvidia/cudnn/"
ENV LD_LIBRARY_PATH="$CUDNN_PATH/lib":"/usr/local/cuda-12.2/lib64"
ENV PATH="$PATH":"/usr/local/cuda-12.2/bin"

View File

@ -1,7 +1,24 @@
.PHONY: download-dataset sobel-dataset
.PHONY: download-dataset resize-dataset sobel-dataset
# Use inside docker container
download-dataset:
python3 ./file_manager/data_manager.py --download
resize-dataset:
python3 ./file_manager/data_manager.py --resize --shape 64 64 --source "original_dataset"
sobel-dataset:
python3 ./file_manager/data_manager.py --sobel
python3 ./file_manager/data_manager.py --sobel --source "resized_dataset"
login:
wandb login $$(cat "$$API_KEY_SECRET")
# Outside docker
docker-run:
docker-compose run --entrypoint=/bin/bash gpu
docker-build:
docker-compose build
check-gpu:
python3 ./gpu_check.py

View File

@ -12,85 +12,26 @@
| 15.06.2024 | Prezentacja działania systemu
| | Prezentacja wyników i skuteczności wybranego modelu
# Szczegółowy harmonogram
# Dokumentacja
Spotkania dot. progresu prac - każda niedziela, godzina 18:00-20:00.
Poniżej, kolumna "działanie" jest w formacie `<osoba/osoby> (<numer_zadania>)`.
Brak osoby oznacza, że zadanie nie zostało jeszcze przypisane.
[Link do dokumentacji](https://uam-my.sharepoint.com/personal/krzboj_st_amu_edu_pl/_layouts/15/doc.aspx?sourcedoc={dc695bbe-68d1-4947-8c29-1d008f252a3b}&action=edit)
| Data | Działanie
|----------------------------:|:------------------------------------------------------------|
| 05.05.2024 | Sergiusz (1), Mateusz (3), Krzysztof (2)
| 12.05.2024 | Wszyscy (5), (4), (6), (7.1)
| 19.05.2024 | Wszyscy (5), (7.2)
| 26.05.2024 | Wszyscy (5), (7.3), (9)
| 02.06.2024 | (8)
| 09.06.2024 | Feedback, ewentualne poprawki
| 15.06.2024 | Finalna prezentacja
Szczegóły działań:
1) Przygotowanie danych i modułu do ich przetwarzania
- Napisanie skryptu, który pobiera dane oraz rozpakowuje je lokalnie.
- Napisanie szablonu skryptu do przetwarzania danych. Skrypt powinien tworzyć katalogi (struktura katalogowa) z danymi po transformacji. Każda transformacja na oryginalnych danych będzie commit'owana do repozytorium, tak aby reszta zeszpołu mogła ją uruchomić.
- Napisać jedną przykładową transformację, np. resize i kontury, korzystając z szablonu.
- Utworzyć README.md z instrukcją tworzenia nowego modułu do przetwarzania.
2) Modele do przygotowania:
- Przygotować wstępnie 3 modele w formacie WanDB, np. MobileNet, ResNet, ew. custom CNN z klasyfikacją wielozadaniową.
- Uruchomić modele na WanDB żeby zobaczyć czy się uruchamiają i generują poprawne wykresy.
- Utworzyć README.md z instrukcją tworzenia nowych modeli.
3) Moduł do ładowania plików
- Napisać moduł, który ładuje dane po transformacji. Dane będą wykorzystywane do uczenia i walidacji modelu.
- Moduł powinien dokonywać podziału zbioru danych na 3 czesci - train, valid, test.
- Powinno być możliwe zdefiniowanie rozmiaru batch'a, rozmiaru validation set'a, scieżki skąd załadować dane.
- Dodać możliwość definiowania seed'a, tak aby każdy mógł uzyskać podobne rezultaty w razie potrzeby. Seed powinien być przekierowany na stdout podczas uruchamiania skryptu.
- Dodać możliwość wyboru rozkładu danych.
- Dane wyjściowe powinny być w formacie pozwalającym na załadowanie ich bezpośrednio do modelu (binarne, tf record, lub inne).
- README.md opisujący w jaki sposób parametryzować moduł.
4) Moduł do obslugi i uruchaminia WanDB Job's
- Napisać skrypt do ściągania danych z kolejki aby obejść problem uruchamiania agenta na Colab/Kaggle.
- Napisać skrypt, który uruchamia job'y i wysyła go na kolejkę. Powinien obsługiwać przyjmowanie hiperparamterów, oraz nazwę kolejki, do której zostanie job przesłany.
- Napisać skrypt, który uruchamia agenta na danej maszynie.
- Napisać skrypt do tworzenia jobów - powinna być sprecyzowana struktura katalogowa, pozwalająca na zarządzanie nimi i obsługę różnych modeli. Ewentualnie synchronizacja job'ów, między WanDB i środowiskiem lokalnym.
- README.md opisujący powyższe.
5) Eksperymenty, dobieranie hiperparametrów, rozkładu danych, testowanie różnych strategii. Jeżeli konieczne, dodanie nowych modeli.
6) Dodać Heatmap'ę do modelu (CAM).
7) Przygotowanie frontu do projektu (https://www.gradio.app/)
1. Uruchomienie lokalne Frontu do testów.
2. Obsługa wyświetlania Heatmap.
3. Deploy frontu na środowisko (lokalne/zdalne, do wyboru).
8) Wybór najlepszego modelu.
9) Modul do obslugi Sweeps - automatycznego dobierania hiperparametrów (opcjonalnie).
# Źródło danych
https://www.kaggle.com/datasets/vipoooool/new-plant-diseases-dataset
# Technologie
## WanDB
WanDB built-in features:
- Experiments Tracking
- Predictions Visualization
- Scheduling runs through queues & connected agents
- Model Registry
- Hyperparamter optimization via Sweeps
## Moc obliczeniowa
- Radeon 7800XT
- GeForce RTX 3060TI
- GeForce RTX 3070
- GeForce RTX 4050M
- [zasoby uczelniane](https://laboratoria.wmi.amu.edu.pl/uslugi/zasoby-dla-projektow/maszyna-gpu/)
# Setup
1. Install Docker on your local system.
2. To build docker image and run the shell, use Makefile
```bash
make docker-build
make docker-run
```
3. Get your API key from https://wandb.ai/settings#api, and add the key to **secrets.txt** file.
4. After running the container
```bash
make login # to login to WanDB.
make check-gpu # to verify if GPU works
```
5. If needed, to manually run containers, run:
```bash
docker build -t gpu api_key="<wandb_api_key>" .
docker run --rm -it --gpus all --entrypoint /bin/bash gpu
```

23
compose.yaml Normal file
View File

@ -0,0 +1,23 @@
services:
gpu:
image: gpu
volumes:
- .:/app
command: nvidia-smi
build:
context: .
dockerfile: Dockerfile
environment:
API_KEY_SECRET: /run/secrets/api_key_secret
secrets:
- api_key_secret
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
secrets:
api_key_secret:
file: ./secrets.txt

0
dataset/__init__.py Normal file
View File

40
dataset/consts.py Normal file
View File

@ -0,0 +1,40 @@
PLANT_CLASSES = [
"Tomato",
"Potato",
"Corn_(maize)",
"Apple",
"Blueberry",
"Soybean",
"Cherry_(including_sour)",
"Squash",
"Strawberry",
"Pepper,_bell",
"Peach",
"Grape",
"Orange",
"Raspberry",
]
DISEASE_CLASSES = [
"healthy",
"Northern_Leaf_Blight",
"Tomato_mosaic_virus",
"Early_blight",
"Leaf_scorch",
"Tomato_Yellow_Leaf_Curl_Virus",
"Cedar_apple_rust",
"Late_blight",
"Spider_mites Two-spotted_spider_mite",
"Black_rot",
"Bacterial_spot",
"Apple_scab",
"Powdery_mildew",
"Esca_(Black_Measles)",
"Haunglongbing_(Citrus_greening)",
"Leaf_Mold",
"Common_rust_",
"Target_Spot",
"Leaf_blight_(Isariopsis_Leaf_Spot)",
"Septoria_leaf_spot",
"Cercospora_leaf_spot Gray_leaf_spot",
]

75
dataset/dataset.py Normal file
View File

@ -0,0 +1,75 @@
import os
from pathlib import Path
import tensorflow as tf
from .consts import DISEASE_CLASSES, PLANT_CLASSES
class Dataset:
''' Class to load and preprocess the dataset.
Loads images and labels from the given directory to tf.data.Dataset.
Args:
`data_dir (Path)`: Path to the dataset directory.
`seed (int)`: Seed for shuffling the dataset.
`repeat (int)`: Number of times to repeat the dataset.
`shuffle_buffer_size (int)`: Size of the buffer for shuffling the dataset.
`batch_size (int)`: Batch size for the dataset.
'''
def __init__(self,
data_dir: Path,
seed: int = 42,
repeat: int = 1,
shuffle_buffer_size: int = 10_000,
batch_size: int = 64) -> None:
self.data_dir = data_dir
self.seed = seed
self.repeat = repeat
self.shuffle_buffer_size = shuffle_buffer_size
self.batch_size = batch_size
self.dataset = self.__load_dataset()\
.shuffle(self.shuffle_buffer_size, seed=self.seed)\
.repeat(self.repeat)\
.batch(self.batch_size, drop_remainder=True)\
.prefetch(tf.data.experimental.AUTOTUNE)
def __load_dataset(self) -> tf.data.Dataset:
# check if path has 'test' word in it
dataset = tf.data.Dataset.list_files(str(self.data_dir / '*/*'))
if 'test' in str(self.data_dir).lower():
# file names issue - labels have camel case (regex?) and differs from the train/valid sets
pass
else:
dataset = dataset.map(
self.__preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)
return dataset
def __get_labels(self, image_path):
path = tf.strings.split(image_path, os.path.sep)[-2]
plant = tf.strings.split(path, '___')[0]
disease = tf.strings.split(path, '___')[1]
one_hot_plant = plant == PLANT_CLASSES
one_hot_disease = disease == DISEASE_CLASSES
return tf.cast(one_hot_plant, dtype=tf.uint8, name=None), tf.cast(one_hot_disease, dtype=tf.uint8, name=None)
def __get_image(self, image_path):
img = tf.io.read_file(image_path)
img = tf.io.decode_jpeg(img, channels=3)
return tf.cast(img, dtype=tf.float32, name=None) / 255.
def __preprocess(self, image_path):
labels = self.__get_labels(image_path)
image = self.__get_image(image_path)
# returns X, Y1, Y2
return image, labels[0], labels[1]
def __getattr__(self, attr):
return getattr(self.dataset, attr)

View File

@ -1,38 +1,51 @@
import glob
import shutil
import cv2
from zipfile import ZipFile
import os
import wget
import argparse
import glob
import os
import shutil
from pathlib import Path
import zipfile
from tqdm import tqdm
import cv2
import wget
main_path = Path("data/")
path_to_train_and_valid = main_path / "%s/**/*.*"
path_to_test_dataset = main_path / "test"
original_dataset_name = "original_dataset"
parser = argparse.ArgumentParser()
parser.add_argument("--download", action="store_true",
help="Download the data")
parser.add_argument("--resize", action="store_true",
help="Resize the dataset")
parser.add_argument("--shape", type=int, nargs="+", default=(64, 64),
help="Shape of the resized images. Applied only for resize option. Default: (64, 64)")
parser.add_argument("--sobel", action="store_true",
help="Apply Sobel filter to the dataset")
parser.add_argument("--source", type=str, default="original_dataset",
help="Name of the source dataset. Applied for all arguments except download. Default: original_dataset")
args = parser.parse_args()
class DataManager:
def download_data(self):
print("Downloading")
if not os.path.isfile("archive.zip"):
wget.download("https://storage.googleapis.com/kaggle-data-sets/78313/182633/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20240502%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20240502T181500Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=87d0661313e358206b6e10d44f135d41e23501d601e58b1e8236ca28a82ccc434534564b45baa84c4d829dd1995ff384d51fe5dba3f543d00eb0763169fd712c6c8f91bb4f298db38a19b31b2d489798a9723a271aa4108d7b93345c5a64a7ef00b9b8f27d1d5f728e373c870f0287eb89bc747941f0aeeb4703c288059e2e07b7ece3a83114a9607276874a90d4ec96dde06fddb94a0d3af72848565661b1404e3ea248eeebf46374daada7df1f37db7d62b21b4ac90706ea64cc74200a58f35bfe379703e7691aeda9e39635b02f58a9f8399fa64b031b1a9bccd7f109d256c6f4886ef94fcdc11034d6da13c0f1d4d8b97cabdd295862a5107b587824ebe8")
wget.download("https://storage.googleapis.com/kaggle-data-sets/78313/182633/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20240512%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20240512T222712Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=48a59c070b4f57e2746696d7ce6c77a7efd7e2d421a7d1f66411ab3fb21f688c0a0c81907ef4b788d99767cfd2e72200e6ee42e41fc4548d7874c128beabc2ff12d38aa47dfae1c10a10659e81b8d34c23515c1d6682bcf1f3eefc4e75a1ddba65dec5a03b03eec674272e772279b723f3a2739ca9099b185cf110cc5fb98e96b92622070c8cdd521f6ea0d676e6ba5dc37b23faf919e5dbc8b631e5be8e25f8da5bc74fbb94ff72393702c7348b3adf8140e80269d571ff00dd6aa065c43492d66131f62b9e59c503e1490851748c683680dbf3f929602239c6de297d329c9f3c4b12e389007783c88526f38064afbad73dba9897e408d8e1856b013fadc480")
def unzip_data(self, file_name, path_to_extract):
full_path_to_extract = main_path / path_to_extract
old_path = "New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)"
if not os.path.exists(main_path):
os.makedirs(main_path)
ZipFile(file_name).extractall(full_path_to_extract)
with zipfile.ZipFile(file_name) as zf:
for member in tqdm(zf.infolist(), desc='Extracting'):
try:
zf.extract(member, full_path_to_extract)
except zipfile.error as e:
pass
# shutil.move("data/test/test",
# full_path_to_extract, copy_function=shutil.copytree)
shutil.move(full_path_to_extract / old_path / "train",
@ -45,32 +58,61 @@ class DataManager:
shutil.rmtree(
full_path_to_extract / "new plant diseases dataset(augmented)"
)
shutil.rmtree(full_path_to_extract / "test")
self.get_test_ds_from_validation()
def write_image(self, image, path):
os.makedirs(path.rsplit('/', 1)[0], exist_ok=True)
cv2.imwrite(path, image)
def resize_dataset(self, source_dataset_name, width, height):
def get_test_ds_from_validation(self, files_per_category: int = 2):
path_to_extract = main_path / original_dataset_name
valid_ds = glob.glob(str(path_to_extract / "valid/*/*"))
category_dirs = set([category_dir.split("/")[-2]
for category_dir in valid_ds])
category_lists = {category: [] for category in category_dirs}
for file_path in valid_ds:
category = file_path.split("/")[-2]
category_lists[category].append(file_path)
test_dir = path_to_extract / "test"
if not os.path.exists(test_dir):
os.makedirs(test_dir, exist_ok=True)
for category, files in category_lists.items():
os.makedirs(test_dir / category, exist_ok=True)
files.sort()
for file in files[:files_per_category]:
shutil.move(file, test_dir / category)
def resize_dataset(self, source_dataset_name, shape):
dataset_name = "resized_dataset"
if not os.path.exists(main_path / dataset_name):
counter=0
for file in glob.glob(str(path_to_train_and_valid) % source_dataset_name, recursive=True):
counter+=1
path_to_file = file.replace("\\", "/")
image = cv2.imread(path_to_file)
image = cv2.resize(image, (width, height))
image = cv2.resize(image, shape)
new_path = path_to_file.replace(
source_dataset_name, dataset_name)
self.write_image(image, new_path)
print("Resized %s files" % (counter), end='\r')
def sobelx(self, source_dataset_name):
dataset_name = "sobel_dataset"
if not os.path.exists(main_path / dataset_name):
counter=0
for file in glob.glob(str(path_to_train_and_valid) % source_dataset_name, recursive=True):
counter+=1
path_to_file = file.replace("\\", "/")
image = cv2.imread(path_to_file)
sobel = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=5)
new_path = path_to_file.replace(
source_dataset_name, dataset_name)
self.write_image(sobel, new_path)
print("Sobel processed %s files" % (counter), end='\r')
if __name__ == "__main__":
@ -78,6 +120,7 @@ if __name__ == "__main__":
if args.download:
data_manager.download_data()
data_manager.unzip_data("archive.zip", original_dataset_name)
data_manager.resize_dataset(original_dataset_name, 64, 64)
if args.resize:
data_manager.resize_dataset(args.source, tuple(args.shape))
if args.sobel:
data_manager.sobelx("resized_dataset")
data_manager.sobelx(args.source)

View File

@ -0,0 +1,19 @@
from pathlib import Path
# TODO: split the files into smaller dirs and make list of them
class FileSharder:
def __init__(self,
train_dir: Path = Path('./data/resized_dataset/train'),
valid_dir: Path = Path('./data/resized_dataset/valid'),
test_dir: Path = Path('./data/resized_dataset/test'),
shard_size = 5_000) -> None:
self.shard_size = shard_size
self.train_dir = train_dir
self.valid_dir = valid_dir
self.test_dir = test_dir
self.shard()
def shard(self):
pass

18
gpu_check.py Normal file
View File

@ -0,0 +1,18 @@
try:
import tensorflow
except ImportError:
print("Tensorflow is not installed, install requied packages from requirements.txt")
exit(1)
import tensorflow
print("If you see the tensor result, then the Tensorflow is available.")
rs = tensorflow.reduce_sum(tensorflow.random.normal([1000, 1000]))
print(rs)
gpus = tensorflow.config.list_physical_devices('GPU')
if len(gpus) == 0:
print("No GPU available.")
else:
print(f"GPUs available: {len(gpus)}")
print(gpus)

10
launch_settings.yaml Normal file
View File

@ -0,0 +1,10 @@
max_jobs: 1
entity: uczenie-maszynowe-projekt
queues:
- GPU queue 1
- GPU queue 2
builder:
type: docker

15
main.py Normal file
View File

@ -0,0 +1,15 @@
from model.test_model import TestModel
from pathlib import Path
from dataset.dataset import Dataset
if __name__ == "__main__":
# Loading dataset
train_dataset = Dataset(Path('data/resized_dataset/train'))
valid_dataset = Dataset(Path('data/resized_dataset/valid'))
for i in train_dataset.take(1):
print(i)
# Training model
model = TestModel()
history = model.fit()
model.save("./src/model/test_model_final.keras")

0
model/__init__.py Normal file
View File

55
model/resnet_50_model.py Normal file
View File

@ -0,0 +1,55 @@
import tensorflow as tf
from wandb_utils.config import Config
from wandb.keras import WandbMetricsLogger
class Resnet50Model:
def __init__(self):
self.config = Config(epoch=8, batch_size=64).config()
self.config.learning_rate = 0.01
# Define specific configuration below, they will be visible in the W&B interface
# Start of config
self.config.optimizer = "sgd"
self.config.loss = "sparse_categorical_crossentropy"
self.config.metrics = ["accuracy"]
# End
self.model = self.__build_model()
self.__compile()
self.__load_dataset()
def __build_model(self):
return tf.keras.applications.ResNet50(
input_shape=(224, 224, 3), include_top=False, weights='imagenet'
# output - odblokować ostatnią warstwę freeze
# zobaczyc czy dziala to by default, czy wewn. warstwy są frozen, i czy ost. jest dynamiczna
)
def __compile(self):
self.model.compile(
optimizer=self.config.optimizer,
loss=self.config.loss,
metrics=self.config.metrics,
)
def __load_dataset(self):
(self.x_train, self.y_train), (self.x_test, self.y_test) = tf.keras.datasets.cifar10.load_data()
self.x_train = self.x_train.astype('float32') / 255.0
self.x_test = self.x_test.astype('float32') / 255.0
def fit(self):
wandb_callbacks = [
WandbMetricsLogger(log_freq=5),
# Not supported with Keras >= 3.0.0
# WandbModelCheckpoint(filepath="models"),
]
return self.model.fit(
x=self.x_train,
y=self.y_train,
epochs=self.config.epoch,
batch_size=self.config.batch_size,
callbacks=wandb_callbacks
)
def save(self, filepath):
self.model.save(filepath)

65
model/test_model.py Normal file
View File

@ -0,0 +1,65 @@
import random
import tensorflow as tf
from wandb_utils.config import Config
from wandb.keras import WandbMetricsLogger
class TestModel:
def __init__(self):
self.config = Config(epoch=8, batch_size=256).config()
self.config.learning_rate = 0.01
# Define specific configuration below, they will be visible in the W&B interface
# Start of config
self.config.layer_1 = 512
self.config.activation_1 = "relu"
self.config.dropout = random.uniform(0.01, 0.80)
self.config.layer_2 = 10
self.config.activation_2 = "softmax"
self.config.optimizer = "sgd"
self.config.loss = "sparse_categorical_crossentropy"
self.config.metrics = ["accuracy"]
# End
self.model = self.__build_model()
self.__compile()
self.__load_dataset()
def __build_model(self):
return tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(self.config.layer_1, activation=self.config.activation_1),
tf.keras.layers.Dropout(self.config.dropout),
tf.keras.layers.Dense(self.config.layer_2, activation=self.config.activation_2)
])
def __compile(self):
self.model.compile(
optimizer=self.config.optimizer,
loss=self.config.loss,
metrics=self.config.metrics,
)
def __load_dataset(self):
mnist = tf.keras.datasets.mnist
(self.x_train, self.y_train), (self.x_test, self.y_test) = mnist.load_data()
self.x_train, self.x_test = self.x_train / 255.0, self.x_test / 255.0
self.x_train, self.y_train = self.x_train[::5], self.y_train[::5]
self.x_test, self.y_test = self.x_test[::20], self.y_test[::20]
def fit(self):
wandb_callbacks = [
WandbMetricsLogger(log_freq=5),
# Not supported with Keras >= 3.0.0
# WandbModelCheckpoint(filepath="models"),
]
return self.model.fit(
x=self.x_train,
y=self.y_train,
epochs=self.config.epoch,
batch_size=self.config.batch_size,
validation_data=(self.x_test, self.y_test),
callbacks=wandb_callbacks
)
def save(self, filepath):
self.model.save(filepath)

Binary file not shown.

View File

@ -1,5 +1,7 @@
tensorflow==2.16.1
tensorflow[and-cuda]==2.16.1
tensorflow-io==0.37.0
numpy==1.26.4
opencv-python==4.9.0.80
numpy==1.26.4
wget==3.2
wandb==0.16.6

1
secrets.txt Normal file
View File

@ -0,0 +1 @@
FILL IN

View File

@ -1,363 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"Tracking run with wandb version 0.16.6"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Run data is saved locally in <code>/mnt/c/Users/krzys/OneDrive/Studia/inz-uczenia-maszynowego/Detection-of-plant-diseases/wandb/run-20240416_232247-bfji8amn</code>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Syncing run <strong><a href='https://wandb.ai/uczenie-maszynowe-projekt/Detection%20of%20plant%20diseases/runs/bfji8amn' target=\"_blank\">floral-energy-3</a></strong> to <a href='https://wandb.ai/uczenie-maszynowe-projekt/Detection%20of%20plant%20diseases' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" View project at <a href='https://wandb.ai/uczenie-maszynowe-projekt/Detection%20of%20plant%20diseases' target=\"_blank\">https://wandb.ai/uczenie-maszynowe-projekt/Detection%20of%20plant%20diseases</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" View run at <a href='https://wandb.ai/uczenie-maszynowe-projekt/Detection%20of%20plant%20diseases/runs/bfji8amn' target=\"_blank\">https://wandb.ai/uczenie-maszynowe-projekt/Detection%20of%20plant%20diseases/runs/bfji8amn</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/8\n",
"44/47 [===========================>..] - ETA: 0s - loss: 2.1872 - accuracy: 0.2224INFO:tensorflow:Assets written to: models/assets\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:tensorflow:Assets written to: models/assets\n",
"\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (./models)... Done. 0.1s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"47/47 [==============================] - 2s 32ms/step - loss: 2.1734 - accuracy: 0.2344 - val_loss: 1.9111 - val_accuracy: 0.5380\n",
"Epoch 2/8\n",
"40/47 [========================>.....] - ETA: 0s - loss: 1.7703 - accuracy: 0.5437INFO:tensorflow:Assets written to: models/assets\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:tensorflow:Assets written to: models/assets\n",
"\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (./models)... Done. 0.1s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"47/47 [==============================] - 1s 31ms/step - loss: 1.7483 - accuracy: 0.5527 - val_loss: 1.5486 - val_accuracy: 0.6880\n",
"Epoch 3/8\n",
"46/47 [============================>.] - ETA: 0s - loss: 1.4466 - accuracy: 0.6818INFO:tensorflow:Assets written to: models/assets\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:tensorflow:Assets written to: models/assets\n",
"\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (./models)... Done. 0.1s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"47/47 [==============================] - 2s 33ms/step - loss: 1.4444 - accuracy: 0.6829 - val_loss: 1.2824 - val_accuracy: 0.7460\n",
"Epoch 4/8\n",
"44/47 [===========================>..] - ETA: 0s - loss: 1.2232 - accuracy: 0.7362INFO:tensorflow:Assets written to: models/assets\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:tensorflow:Assets written to: models/assets\n",
"\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (./models)... Done. 0.1s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"47/47 [==============================] - 2s 32ms/step - loss: 1.2162 - accuracy: 0.7390 - val_loss: 1.0886 - val_accuracy: 0.7880\n",
"Epoch 5/8\n",
"44/47 [===========================>..] - ETA: 0s - loss: 1.0583 - accuracy: 0.7694INFO:tensorflow:Assets written to: models/assets\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:tensorflow:Assets written to: models/assets\n",
"\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (./models)... Done. 0.1s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"47/47 [==============================] - 1s 28ms/step - loss: 1.0519 - accuracy: 0.7711 - val_loss: 0.9497 - val_accuracy: 0.8020\n",
"Epoch 6/8\n",
"41/47 [=========================>....] - ETA: 0s - loss: 0.9382 - accuracy: 0.7897INFO:tensorflow:Assets written to: models/assets\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:tensorflow:Assets written to: models/assets\n",
"\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (./models)... Done. 0.1s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"47/47 [==============================] - 1s 28ms/step - loss: 0.9339 - accuracy: 0.7902 - val_loss: 0.8484 - val_accuracy: 0.8180\n",
"Epoch 7/8\n",
"47/47 [==============================] - ETA: 0s - loss: 0.8496 - accuracy: 0.8043INFO:tensorflow:Assets written to: models/assets\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:tensorflow:Assets written to: models/assets\n",
"\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (./models)... Done. 0.1s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"47/47 [==============================] - 1s 27ms/step - loss: 0.8496 - accuracy: 0.8043 - val_loss: 0.7735 - val_accuracy: 0.8220\n",
"Epoch 8/8\n",
"44/47 [===========================>..] - ETA: 0s - loss: 0.7790 - accuracy: 0.8180INFO:tensorflow:Assets written to: models/assets\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:tensorflow:Assets written to: models/assets\n",
"\u001b[34m\u001b[1mwandb\u001b[0m: Adding directory to artifact (./models)... Done. 0.1s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r",
"47/47 [==============================] - 1s 29ms/step - loss: 0.7779 - accuracy: 0.8183 - val_loss: 0.7165 - val_accuracy: 0.8260\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "316da49b179f47019f8cf5c9c72353fe"
}
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<style>\n",
" table.wandb td:nth-child(1) { padding: 0 10px; text-align: left ; width: auto;} td:nth-child(2) {text-align: left ; width: 100%}\n",
" .wandb-row { display: flex; flex-direction: row; flex-wrap: wrap; justify-content: flex-start; width: 100% }\n",
" .wandb-col { display: flex; flex-direction: column; flex-basis: 100%; flex: 1; padding: 10px; }\n",
" </style>\n",
"<div class=\"wandb-row\"><div class=\"wandb-col\"><h3>Run history:</h3><br/><table class=\"wandb\"><tr><td>batch/accuracy</td><td>▁▁▁▂▂▄▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇████████████████</td></tr><tr><td>batch/batch_step</td><td>▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███</td></tr><tr><td>batch/learning_rate</td><td>▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁</td></tr><tr><td>batch/loss</td><td>███▇▇▆▆▆▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁</td></tr><tr><td>epoch/accuracy</td><td>▁▅▆▇▇███</td></tr><tr><td>epoch/epoch</td><td>▁▂▃▄▅▆▇█</td></tr><tr><td>epoch/learning_rate</td><td>▁▁▁▁▁▁▁▁</td></tr><tr><td>epoch/loss</td><td>█▆▄▃▂▂▁▁</td></tr><tr><td>epoch/val_accuracy</td><td>▁▅▆▇▇███</td></tr><tr><td>epoch/val_loss</td><td>█▆▄▃▂▂▁▁</td></tr></table><br/></div><div class=\"wandb-col\"><h3>Run summary:</h3><br/><table class=\"wandb\"><tr><td>batch/accuracy</td><td>0.81726</td></tr><tr><td>batch/batch_step</td><td>395</td></tr><tr><td>batch/learning_rate</td><td>0.01</td></tr><tr><td>batch/loss</td><td>0.77969</td></tr><tr><td>epoch/accuracy</td><td>0.81825</td></tr><tr><td>epoch/epoch</td><td>7</td></tr><tr><td>epoch/learning_rate</td><td>0.01</td></tr><tr><td>epoch/loss</td><td>0.77791</td></tr><tr><td>epoch/val_accuracy</td><td>0.826</td></tr><tr><td>epoch/val_loss</td><td>0.71648</td></tr></table><br/></div></div>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" View run <strong style=\"color:#cdcd00\">floral-energy-3</strong> at: <a href='https://wandb.ai/uczenie-maszynowe-projekt/Detection%20of%20plant%20diseases/runs/bfji8amn' target=\"_blank\">https://wandb.ai/uczenie-maszynowe-projekt/Detection%20of%20plant%20diseases/runs/bfji8amn</a><br/> View project at: <a href='https://wandb.ai/uczenie-maszynowe-projekt/Detection%20of%20plant%20diseases' target=\"_blank\">https://wandb.ai/uczenie-maszynowe-projekt/Detection%20of%20plant%20diseases</a><br/>Synced 5 W&B file(s), 0 media file(s), 42 artifact file(s) and 0 other file(s)"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Find logs at: <code>./wandb/run-20240416_232247-bfji8amn/logs</code>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# This script needs these libraries to be installed:\n",
"# tensorflow, numpy\n",
"\n",
"import wandb\n",
"from wandb.keras import WandbMetricsLogger, WandbModelCheckpoint\n",
"\n",
"import random\n",
"import numpy as np\n",
"import tensorflow as tf\n",
"\n",
"\n",
"# Start a run, tracking hyperparameters\n",
"wandb.init(\n",
" # set the wandb project where this run will be logged\n",
" project=\"Detection of plant diseases\",\n",
"\n",
" # track hyperparameters and run metadata with wandb.config\n",
" config={\n",
" \"layer_1\": 512,\n",
" \"activation_1\": \"relu\",\n",
" \"dropout\": random.uniform(0.01, 0.80),\n",
" \"layer_2\": 10,\n",
" \"activation_2\": \"softmax\",\n",
" \"optimizer\": \"sgd\",\n",
" \"loss\": \"sparse_categorical_crossentropy\",\n",
" \"metric\": \"accuracy\",\n",
" \"epoch\": 8,\n",
" \"batch_size\": 256\n",
" }\n",
")\n",
"\n",
"# [optional] use wandb.config as your config\n",
"config = wandb.config\n",
"\n",
"# get the data\n",
"mnist = tf.keras.datasets.mnist\n",
"(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
"x_train, x_test = x_train / 255.0, x_test / 255.0\n",
"x_train, y_train = x_train[::5], y_train[::5]\n",
"x_test, y_test = x_test[::20], y_test[::20]\n",
"labels = [str(digit) for digit in range(np.max(y_train) + 1)]\n",
"\n",
"# build a model\n",
"model = tf.keras.models.Sequential([\n",
" tf.keras.layers.Flatten(input_shape=(28, 28)),\n",
" tf.keras.layers.Dense(config.layer_1, activation=config.activation_1),\n",
" tf.keras.layers.Dropout(config.dropout),\n",
" tf.keras.layers.Dense(config.layer_2, activation=config.activation_2)\n",
" ])\n",
"\n",
"# compile the model\n",
"model.compile(optimizer=config.optimizer,\n",
" loss=config.loss,\n",
" metrics=[config.metric]\n",
" )\n",
"\n",
"# WandbMetricsLogger will log train and validation metrics to wandb\n",
"# WandbModelCheckpoint will upload model checkpoints to wandb\n",
"history = model.fit(x=x_train, y=y_train,\n",
" epochs=config.epoch,\n",
" batch_size=config.batch_size,\n",
" validation_data=(x_test, y_test),\n",
" callbacks=[\n",
" WandbMetricsLogger(log_freq=5),\n",
" WandbModelCheckpoint(\"models\")\n",
" ])\n",
"\n",
"# [optional] finish the wandb run, necessary in notebooks\n",
"wandb.finish()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0
wandb_utils/__init__.py Normal file
View File

22
wandb_utils/config.py Normal file
View File

@ -0,0 +1,22 @@
import wandb
class Config:
def __init__(self, epoch, batch_size):
self.epoch = epoch
self.batch_size = batch_size
self.run = wandb.init(
project="Detection of plant diseases",
config={
"epoch": epoch,
"batch_size": batch_size,
}
)
def config(self):
return self.run.config
def finish(self):
self.run.config.finish()