129 lines
4.2 KiB
Python
129 lines
4.2 KiB
Python
import numpy as np
|
|
import os
|
|
from skimage.io import imread
|
|
import cv2 as cv
|
|
from pathlib import Path
|
|
import json
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.preprocessing import LabelEncoder
|
|
import tensorflow as tf
|
|
import os
|
|
|
|
def load_train_data(input_dir, image_size):
|
|
|
|
|
|
image_dir = Path(input_dir)
|
|
categories_name = []
|
|
for file in os.listdir(image_dir):
|
|
d = os.path.join(image_dir, file)
|
|
if os.path.isdir(d):
|
|
categories_name.append(file)
|
|
|
|
folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
|
|
|
|
train_img = []
|
|
categories_count=[]
|
|
labels=[]
|
|
for i, direc in enumerate(folders):
|
|
count = 0
|
|
for obj in direc.iterdir():
|
|
if os.path.isfile(obj) and os.path.basename(os.path.normpath(obj)) != 'desktop.ini':
|
|
labels.append(os.path.basename(os.path.normpath(direc)))
|
|
count += 1
|
|
img = imread(obj)#zwraca ndarry postaci xSize x ySize x colorDepth
|
|
img = img[:, :, :3]
|
|
img = cv.resize(img, image_size, interpolation=cv.INTER_AREA)# zwraca ndarray
|
|
img = img / 255 #normalizacja
|
|
train_img.append(img)
|
|
categories_count.append(count)
|
|
X={}
|
|
X["values"] = np.array(train_img)
|
|
X["categories_name"] = categories_name
|
|
X["categories_count"] = categories_count
|
|
X["labels"]=labels
|
|
return X
|
|
|
|
def load_test_data(input_dir, image_size):
|
|
|
|
image_path = Path(input_dir)
|
|
|
|
labels_path = image_path.parents[0] / 'test_labels.json'
|
|
|
|
jsonString = labels_path.read_text()
|
|
objects = json.loads(jsonString)
|
|
|
|
categories_name = []
|
|
categories_count=[]
|
|
count = 0
|
|
c = objects[0]['value']
|
|
for e in objects:
|
|
if e['value'] != c:
|
|
categories_count.append(count)
|
|
c = e['value']
|
|
count = 1
|
|
else:
|
|
count += 1
|
|
if not e['value'] in categories_name:
|
|
categories_name.append(e['value'])
|
|
|
|
categories_count.append(count)
|
|
|
|
test_img = []
|
|
|
|
labels=[]
|
|
for e in objects:
|
|
p = image_path / e['filename']
|
|
img = imread(p)#zwraca ndarry postaci xSize x ySize x colorDepth
|
|
img = img[:, :, :3]
|
|
img = cv.resize(img, image_size, interpolation=cv.INTER_AREA)# zwraca ndarray
|
|
img = img / 255#normalizacja
|
|
test_img.append(img)
|
|
labels.append(e['value'])
|
|
|
|
X={}
|
|
X["values"] = np.array(test_img)
|
|
X["categories_name"] = categories_name
|
|
X["categories_count"] = categories_count
|
|
X["labels"]=labels
|
|
return X
|
|
|
|
|
|
|
|
def load_data(shape, path_train, path_test):
|
|
data_train = load_train_data(path_train, shape)
|
|
values_train = data_train['values']
|
|
labels_train = data_train['labels']
|
|
|
|
data_test = load_test_data(path_test, shape)
|
|
X_test = data_test['values']
|
|
y_test = data_test['labels']
|
|
|
|
X_train, X_validate, y_train, y_validate = train_test_split(values_train, labels_train, test_size=0.2, random_state=42)
|
|
|
|
class_le = LabelEncoder()
|
|
y_train_enc = class_le.fit_transform(y_train)
|
|
y_validate_enc = class_le.fit_transform(y_validate)
|
|
y_test_enc = class_le.fit_transform(y_test)
|
|
|
|
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train_enc))
|
|
validation_ds = tf.data.Dataset.from_tensor_slices((X_validate, y_validate_enc))
|
|
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test_enc))
|
|
|
|
train_ds_size = tf.data.experimental.cardinality(train_ds).numpy()
|
|
test_ds_size = tf.data.experimental.cardinality(test_ds).numpy()
|
|
validation_ds_size = tf.data.experimental.cardinality(validation_ds).numpy()
|
|
print("Training data size:", train_ds_size)
|
|
print("Test data size:", test_ds_size)
|
|
print("Validation data size:", validation_ds_size)
|
|
|
|
train_ds = (train_ds
|
|
.shuffle(buffer_size=train_ds_size)
|
|
.batch(batch_size=32, drop_remainder=True))
|
|
test_ds = (test_ds
|
|
.shuffle(buffer_size=train_ds_size)
|
|
.batch(batch_size=32, drop_remainder=True))
|
|
validation_ds = (validation_ds
|
|
.shuffle(buffer_size=train_ds_size)
|
|
.batch(batch_size=32, drop_remainder=True))
|
|
|
|
return train_ds, test_ds, validation_ds |