widzenie-komputerowe-projekt/training/data_load.py

129 lines
4.2 KiB
Python
Raw Normal View History

2023-02-01 18:42:47 +01:00
import numpy as np
import os
from skimage.io import imread
import cv2 as cv
from pathlib import Path
import json
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
import os
def load_train_data(input_dir, image_size):
image_dir = Path(input_dir)
categories_name = []
for file in os.listdir(image_dir):
d = os.path.join(image_dir, file)
if os.path.isdir(d):
categories_name.append(file)
folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
train_img = []
categories_count=[]
labels=[]
for i, direc in enumerate(folders):
count = 0
for obj in direc.iterdir():
if os.path.isfile(obj) and os.path.basename(os.path.normpath(obj)) != 'desktop.ini':
labels.append(os.path.basename(os.path.normpath(direc)))
count += 1
img = imread(obj)#zwraca ndarry postaci xSize x ySize x colorDepth
img = img[:, :, :3]
img = cv.resize(img, image_size, interpolation=cv.INTER_AREA)# zwraca ndarray
img = img / 255 #normalizacja
train_img.append(img)
categories_count.append(count)
X={}
X["values"] = np.array(train_img)
X["categories_name"] = categories_name
X["categories_count"] = categories_count
X["labels"]=labels
return X
def load_test_data(input_dir, image_size):
image_path = Path(input_dir)
labels_path = image_path.parents[0] / 'test_labels.json'
jsonString = labels_path.read_text()
objects = json.loads(jsonString)
categories_name = []
categories_count=[]
count = 0
c = objects[0]['value']
for e in objects:
if e['value'] != c:
categories_count.append(count)
c = e['value']
count = 1
else:
count += 1
if not e['value'] in categories_name:
categories_name.append(e['value'])
categories_count.append(count)
test_img = []
labels=[]
for e in objects:
p = image_path / e['filename']
img = imread(p)#zwraca ndarry postaci xSize x ySize x colorDepth
img = img[:, :, :3]
img = cv.resize(img, image_size, interpolation=cv.INTER_AREA)# zwraca ndarray
img = img / 255#normalizacja
test_img.append(img)
labels.append(e['value'])
X={}
X["values"] = np.array(test_img)
X["categories_name"] = categories_name
X["categories_count"] = categories_count
X["labels"]=labels
return X
def load_data(shape, path_train, path_test):
data_train = load_train_data(path_train, shape)
values_train = data_train['values']
labels_train = data_train['labels']
data_test = load_test_data(path_test, shape)
X_test = data_test['values']
y_test = data_test['labels']
X_train, X_validate, y_train, y_validate = train_test_split(values_train, labels_train, test_size=0.2, random_state=42)
class_le = LabelEncoder()
y_train_enc = class_le.fit_transform(y_train)
y_validate_enc = class_le.fit_transform(y_validate)
y_test_enc = class_le.fit_transform(y_test)
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train_enc))
validation_ds = tf.data.Dataset.from_tensor_slices((X_validate, y_validate_enc))
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test_enc))
train_ds_size = tf.data.experimental.cardinality(train_ds).numpy()
test_ds_size = tf.data.experimental.cardinality(test_ds).numpy()
validation_ds_size = tf.data.experimental.cardinality(validation_ds).numpy()
print("Training data size:", train_ds_size)
print("Test data size:", test_ds_size)
print("Validation data size:", validation_ds_size)
train_ds = (train_ds
.shuffle(buffer_size=train_ds_size)
.batch(batch_size=32, drop_remainder=True))
test_ds = (test_ds
.shuffle(buffer_size=train_ds_size)
.batch(batch_size=32, drop_remainder=True))
validation_ds = (validation_ds
.shuffle(buffer_size=train_ds_size)
.batch(batch_size=32, drop_remainder=True))
return train_ds, test_ds, validation_ds