wko-projekt/yolo_video.py

174 lines
5.9 KiB
Python
Raw Normal View History

from PIL import Image
import cv2 as cv
from yolo import YOLO
import ocr
import numpy as np
import math
import base64
def grayscale(image):
return cv.cvtColor(image, cv.COLOR_BGR2GRAY)
def noise_removal(image):
kernel = np.ones((1, 1), np.uint8)
image = cv.dilate(image, kernel, iterations=1)
kernel = np.ones((1, 1), np.uint8)
image = cv.erode(image, kernel, iterations=1)
image = cv.morphologyEx(image, cv.MORPH_CLOSE, kernel)
image = cv.medianBlur(image, 3)
return (image)
def remove_borders(image):
contours, heiarchy = cv.findContours(image, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
cntsSorted = sorted(contours, key=lambda x:cv.contourArea(x))
cnt = cntsSorted[-1]
x, y, w, h = cv.boundingRect(cnt)
crop = image[y:y+h, x:x+w]
return (crop)
def rotate_image(image, angle):
image_center = tuple(np.array(image.shape[1::-1]) / 2)
rot_mat = cv.getRotationMatrix2D(image_center, angle, 1.0)
result = cv.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv.INTER_LINEAR)
return result
def compute_skew(src_img):
if len(src_img.shape) == 3:
h, w, _ = src_img.shape
elif len(src_img.shape) == 2:
h, w = src_img.shape
else:
print('upsupported image type')
img = cv.medianBlur(src_img, 3)
edges = cv.Canny(img, threshold1 = 30, threshold2 = 100, apertureSize = 3, L2gradient = True)
lines = cv.HoughLinesP(edges, 1, math.pi/180, 30, minLineLength=w / 4.0, maxLineGap=h/4.0)
angle = 0.0
nlines = lines.size
#print(nlines)
cnt = 0
for x1, y1, x2, y2 in lines[0]:
ang = np.arctan2(y2 - y1, x2 - x1)
#print(ang)
if math.fabs(ang) <= 30: # excluding extreme rotations
angle += ang
cnt += 1
if cnt == 0:
return 0.0
return (angle / cnt)*180/math.pi
def deskew(src_img):
return rotate_image(src_img, compute_skew(src_img))
2023-01-27 01:30:57 +01:00
from keras import backend as K
def detect_img(yolo, img_path, j):
try:
2023-01-27 01:30:57 +01:00
# processed_image = cv.imread(img_path)
# final_image = cv.imread(img_path)
# processed_image = cv.resize(processed_image, (3024,3024))
# img_path = './img_to_detect.jpeg'
# cv.imwrite(img_path, processed_image)
image = Image.open(img_path)
except:
print('Image open Error! Try again!')
return None
else:
2023-01-27 01:30:57 +01:00
# Before prediction
# K.clear_session()
r_image, pred = yolo.detect_image(image)
2023-01-27 01:30:57 +01:00
# After prediction
# K.clear_session()
r_image.save('detected.png')
processed_image = cv.imread(img_path)
if not pred:
return None
i = 0
2023-01-27 01:30:57 +01:00
texts = []
## FIXME : better list mapping
for prediction in pred:
x1 = prediction[1][0]
x2 = prediction[2][0]
y1 = prediction[1][1]
y2 = prediction[2][1]
w = abs(x1 - x2)
h = abs(y1 - y2)
2023-01-27 01:30:57 +01:00
# print(pred)
# print(f'x1: {x1}, x2: {x2}, y1: {y1}, y2: {y2}, w: {w}, h: {h}')
img = processed_image[y1:y1 + h, x1:x1 + w]
img = deskew(img)
2023-01-27 01:30:57 +01:00
# gray_image = cv.cvtColor(robot_img, cv.COLOR_BGR2GRAY)
# # gray_image = cv.bilateralFilter(gray_image, 11, 17, 17)
# gaussian_blur = cv.GaussianBlur(gray_image, (9, 9), 0)
# edged = cv.Canny(gaussian_blur, 255, 255)
#
# image_file = './img0.png'
# img = cv.imread(image_file)
gray_image = grayscale(img)
2023-01-27 01:30:57 +01:00
thresh, im_bw = cv.threshold(gray_image, 125, 150, cv.THRESH_BINARY) #the best = 120,150; 100, 150; 150, 210
no_noise = noise_removal(im_bw)
no_borders = remove_borders(no_noise)
2023-01-27 01:30:57 +01:00
# blur = cv.GaussianBlur(gray_image, (3, 3), 0)
# thresh = cv.threshold(blur, 0, 255, cv.THRESH_BINARY_INV + cv.THRESH_OTSU)[1]
#
# # Morph open to remove noise and invert image
# kernel = cv.getStructuringElement(cv.MORPH_RECT, (3, 3))
# opening = cv.morphologyEx(thresh, cv.MORPH_OPEN, kernel, iterations=1)
# no_borders = 255 - no_borders
cv.imwrite(f'img/img{j}{i}.png', no_borders)
text = ocr.get_text_from_image(f'img/img{j}{i}.png')
2023-01-27 01:30:57 +01:00
texts.append(text)
if i > 0:
processed_image = cv.imread(f'final/final{j}{i-1}.png')
res = cv.rectangle(processed_image, (x1, y1), (x1+w, y1+h), (0, 0, 255), 15)
res = cv.putText(res, text, (x1, y1 - 20), cv.FONT_HERSHEY_SIMPLEX, 4, (0, 0, 255), 15, cv.LINE_AA)
cv.imwrite(f'final/final{j}{i}.png', res)
my_string = 'ok'
i += 1
2023-01-27 01:30:57 +01:00
# with open("final.png", "rb") as img_file:
# my_string = base64.b64encode(img_file.read())
# print(my_string)
2023-01-27 00:11:51 +01:00
with open(f"final/final{j}{i-1}.png", "rb") as img_file:
my_string = base64.b64encode(img_file.read())
2023-01-27 01:30:57 +01:00
return my_string, texts
2023-01-27 01:30:57 +01:00
# text_file = open("base64.txt", "w")
# text_file.write(str(my_string))
# text_file.close()
2023-01-27 01:30:57 +01:00
# decoded data
# decoded_data = base64.b64decode((my_string))
# img_file = open('base64.png', 'wb')
# img_file.write(decoded_data)
# img_file.close()
def detect_license_plate(model, img_path, i):
str, texts = detect_img(model, img_path, i)
if not str or not texts:
return None, [None]
return str, texts
# yolo_model = YOLO()
# for i in range(18,100):
# image_path = rf'Images/New/IMG_25{i}.jpeg' #95; 3909, 2491
# detect_license_plate(model=yolo_model, img_path=image_path, i=i)
# image_path = rf'./Images/New/IMG_5016.jpeg' #95; 3909, 2491
# detect_license_plate(model=yolo_model, img_path=image_path, i=0)
# print(ocr.get_text_from_image(f'img0.png'))
# print(ocr.keras_ocr_func())
# print(ocr.tesseract_ocr())