import numpy as np import os from skimage.io import imread import cv2 as cv from pathlib import Path import json from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder import tensorflow as tf import os def load_train_data(input_dir, image_size): image_dir = Path(input_dir) categories_name = [] for file in os.listdir(image_dir): d = os.path.join(image_dir, file) if os.path.isdir(d): categories_name.append(file) folders = [directory for directory in image_dir.iterdir() if directory.is_dir()] train_img = [] categories_count=[] labels=[] for i, direc in enumerate(folders): count = 0 for obj in direc.iterdir(): if os.path.isfile(obj) and os.path.basename(os.path.normpath(obj)) != 'desktop.ini': labels.append(os.path.basename(os.path.normpath(direc))) count += 1 img = imread(obj)#zwraca ndarry postaci xSize x ySize x colorDepth img = img[:, :, :3] img = cv.resize(img, image_size, interpolation=cv.INTER_AREA)# zwraca ndarray img = img / 255 #normalizacja train_img.append(img) categories_count.append(count) X={} X["values"] = np.array(train_img) X["categories_name"] = categories_name X["categories_count"] = categories_count X["labels"]=labels return X def load_test_data(input_dir, image_size): image_path = Path(input_dir) labels_path = image_path.parents[0] / 'test_labels.json' jsonString = labels_path.read_text() objects = json.loads(jsonString) categories_name = [] categories_count=[] count = 0 c = objects[0]['value'] for e in objects: if e['value'] != c: categories_count.append(count) c = e['value'] count = 1 else: count += 1 if not e['value'] in categories_name: categories_name.append(e['value']) categories_count.append(count) test_img = [] labels=[] for e in objects: p = image_path / e['filename'] img = imread(p)#zwraca ndarry postaci xSize x ySize x colorDepth img = img[:, :, :3] img = cv.resize(img, image_size, interpolation=cv.INTER_AREA)# zwraca ndarray img = img / 255#normalizacja test_img.append(img) labels.append(e['value']) X={} X["values"] = np.array(test_img) X["categories_name"] = categories_name X["categories_count"] = categories_count X["labels"]=labels return X def load_data(shape, path_train, path_test): data_train = load_train_data(path_train, shape) values_train = data_train['values'] labels_train = data_train['labels'] data_test = load_test_data(path_test, shape) X_test = data_test['values'] y_test = data_test['labels'] X_train, X_validate, y_train, y_validate = train_test_split(values_train, labels_train, test_size=0.2, random_state=42) class_le = LabelEncoder() y_train_enc = class_le.fit_transform(y_train) y_validate_enc = class_le.fit_transform(y_validate) y_test_enc = class_le.fit_transform(y_test) train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train_enc)) validation_ds = tf.data.Dataset.from_tensor_slices((X_validate, y_validate_enc)) test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test_enc)) train_ds_size = tf.data.experimental.cardinality(train_ds).numpy() test_ds_size = tf.data.experimental.cardinality(test_ds).numpy() validation_ds_size = tf.data.experimental.cardinality(validation_ds).numpy() print("Training data size:", train_ds_size) print("Test data size:", test_ds_size) print("Validation data size:", validation_ds_size) train_ds = (train_ds .shuffle(buffer_size=train_ds_size) .batch(batch_size=32, drop_remainder=True)) test_ds = (test_ds .shuffle(buffer_size=train_ds_size) .batch(batch_size=32, drop_remainder=True)) validation_ds = (validation_ds .shuffle(buffer_size=train_ds_size) .batch(batch_size=32, drop_remainder=True)) return train_ds, test_ds, validation_ds