widzenie-komputerowe-projekt/training/data_load.py

import numpy as np
import os
from skimage.io import imread
import cv2 as cv
from pathlib import Path
import json
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
import os

def load_train_data(input_dir, image_size):
    
    
    image_dir = Path(input_dir)
    categories_name = []
    for file in os.listdir(image_dir):
        d = os.path.join(image_dir, file)
        if os.path.isdir(d):
            categories_name.append(file)

    folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]

    train_img = []
    categories_count=[]
    labels=[]
    for i, direc in enumerate(folders):
        count = 0
        for obj in direc.iterdir():
            if os.path.isfile(obj) and os.path.basename(os.path.normpath(obj)) != 'desktop.ini':
                labels.append(os.path.basename(os.path.normpath(direc)))
                count += 1
                img = imread(obj)#zwraca ndarry postaci xSize x ySize x colorDepth
                img = img[:, :, :3]
                img = cv.resize(img, image_size, interpolation=cv.INTER_AREA)# zwraca ndarray
                img = img / 255 #normalizacja
                train_img.append(img)
        categories_count.append(count)
    X={}
    X["values"] = np.array(train_img)
    X["categories_name"] = categories_name
    X["categories_count"] = categories_count
    X["labels"]=labels
    return X

def load_test_data(input_dir, image_size):

    image_path = Path(input_dir)

    labels_path = image_path.parents[0] / 'test_labels.json'

    jsonString = labels_path.read_text()
    objects = json.loads(jsonString)

    categories_name = []
    categories_count=[]
    count = 0
    c = objects[0]['value']
    for e in  objects:
        if e['value'] != c:
            categories_count.append(count)
            c = e['value']
            count = 1
        else:
            count += 1
        if not e['value'] in categories_name:
            categories_name.append(e['value'])

    categories_count.append(count)
    
    test_img = []

    labels=[]
    for e in objects:
        p = image_path / e['filename']
        img = imread(p)#zwraca ndarry postaci xSize x ySize x colorDepth
        img = img[:, :, :3]
        img = cv.resize(img, image_size, interpolation=cv.INTER_AREA)# zwraca ndarray
        img = img / 255#normalizacja
        test_img.append(img)
        labels.append(e['value'])

    X={}
    X["values"] = np.array(test_img)
    X["categories_name"] = categories_name
    X["categories_count"] = categories_count
    X["labels"]=labels
    return X


def load_data(shape, path_train, path_test):
    data_train = load_train_data(path_train, shape)
    values_train = data_train['values']
    labels_train = data_train['labels']

    data_test = load_test_data(path_test, shape)
    X_test = data_test['values']
    y_test = data_test['labels']

    X_train, X_validate, y_train, y_validate = train_test_split(values_train, labels_train, test_size=0.2, random_state=42)

    class_le = LabelEncoder()
    y_train_enc = class_le.fit_transform(y_train)
    y_validate_enc = class_le.fit_transform(y_validate)
    y_test_enc = class_le.fit_transform(y_test)

    train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train_enc))
    validation_ds = tf.data.Dataset.from_tensor_slices((X_validate, y_validate_enc))
    test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test_enc))

    train_ds_size = tf.data.experimental.cardinality(train_ds).numpy()
    test_ds_size = tf.data.experimental.cardinality(test_ds).numpy()
    validation_ds_size = tf.data.experimental.cardinality(validation_ds).numpy()
    print("Training data size:", train_ds_size)
    print("Test data size:", test_ds_size)
    print("Validation data size:", validation_ds_size)

    train_ds = (train_ds
                  .shuffle(buffer_size=train_ds_size)
                  .batch(batch_size=32, drop_remainder=True))
    test_ds = (test_ds
                    .shuffle(buffer_size=train_ds_size)
                    .batch(batch_size=32, drop_remainder=True))
    validation_ds = (validation_ds
                    .shuffle(buffer_size=train_ds_size)
                    .batch(batch_size=32, drop_remainder=True))

    return train_ds, test_ds, validation_ds
classes for models, training 2023-02-01 18:42:47 +01:00			`import numpy as np`
			`import os`
			`from skimage.io import imread`
			`import cv2 as cv`
			`from pathlib import Path`
			`import json`
			`from sklearn.model_selection import train_test_split`
			`from sklearn.preprocessing import LabelEncoder`
			`import tensorflow as tf`
			`import os`

			`def load_train_data(input_dir, image_size):`


			`image_dir = Path(input_dir)`
			`categories_name = []`
			`for file in os.listdir(image_dir):`
			`d = os.path.join(image_dir, file)`
			`if os.path.isdir(d):`
			`categories_name.append(file)`

			`folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]`

			`train_img = []`
			`categories_count=[]`
			`labels=[]`
			`for i, direc in enumerate(folders):`
			`count = 0`
			`for obj in direc.iterdir():`
			`if os.path.isfile(obj) and os.path.basename(os.path.normpath(obj)) != 'desktop.ini':`
			`labels.append(os.path.basename(os.path.normpath(direc)))`
			`count += 1`
			`img = imread(obj)#zwraca ndarry postaci xSize x ySize x colorDepth`
			`img = img[:, :, :3]`
			`img = cv.resize(img, image_size, interpolation=cv.INTER_AREA)# zwraca ndarray`
			`img = img / 255 #normalizacja`
			`train_img.append(img)`
			`categories_count.append(count)`
			`X={}`
			`X["values"] = np.array(train_img)`
			`X["categories_name"] = categories_name`
			`X["categories_count"] = categories_count`
			`X["labels"]=labels`
			`return X`

			`def load_test_data(input_dir, image_size):`

			`image_path = Path(input_dir)`

			`labels_path = image_path.parents[0] / 'test_labels.json'`

			`jsonString = labels_path.read_text()`
			`objects = json.loads(jsonString)`

			`categories_name = []`
			`categories_count=[]`
			`count = 0`
			`c = objects[0]['value']`
			`for e in objects:`
			`if e['value'] != c:`
			`categories_count.append(count)`
			`c = e['value']`
			`count = 1`
			`else:`
			`count += 1`
			`if not e['value'] in categories_name:`
			`categories_name.append(e['value'])`

			`categories_count.append(count)`

			`test_img = []`

			`labels=[]`
			`for e in objects:`
			`p = image_path / e['filename']`
			`img = imread(p)#zwraca ndarry postaci xSize x ySize x colorDepth`
			`img = img[:, :, :3]`
			`img = cv.resize(img, image_size, interpolation=cv.INTER_AREA)# zwraca ndarray`
			`img = img / 255#normalizacja`
			`test_img.append(img)`
			`labels.append(e['value'])`

			`X={}`
			`X["values"] = np.array(test_img)`
			`X["categories_name"] = categories_name`
			`X["categories_count"] = categories_count`
			`X["labels"]=labels`
			`return X`



			`def load_data(shape, path_train, path_test):`
			`data_train = load_train_data(path_train, shape)`
			`values_train = data_train['values']`
			`labels_train = data_train['labels']`

			`data_test = load_test_data(path_test, shape)`
			`X_test = data_test['values']`
			`y_test = data_test['labels']`

			`X_train, X_validate, y_train, y_validate = train_test_split(values_train, labels_train, test_size=0.2, random_state=42)`

			`class_le = LabelEncoder()`
			`y_train_enc = class_le.fit_transform(y_train)`
			`y_validate_enc = class_le.fit_transform(y_validate)`
			`y_test_enc = class_le.fit_transform(y_test)`

			`train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train_enc))`
			`validation_ds = tf.data.Dataset.from_tensor_slices((X_validate, y_validate_enc))`
			`test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test_enc))`

			`train_ds_size = tf.data.experimental.cardinality(train_ds).numpy()`
			`test_ds_size = tf.data.experimental.cardinality(test_ds).numpy()`
			`validation_ds_size = tf.data.experimental.cardinality(validation_ds).numpy()`
			`print("Training data size:", train_ds_size)`
			`print("Test data size:", test_ds_size)`
			`print("Validation data size:", validation_ds_size)`

			`train_ds = (train_ds`
			`.shuffle(buffer_size=train_ds_size)`
			`.batch(batch_size=32, drop_remainder=True))`
			`test_ds = (test_ds`
			`.shuffle(buffer_size=train_ds_size)`
			`.batch(batch_size=32, drop_remainder=True))`
			`validation_ds = (validation_ds`
			`.shuffle(buffer_size=train_ds_size)`
			`.batch(batch_size=32, drop_remainder=True))`

			`return train_ds, test_ds, validation_ds`