import argparse import cv2 import os import sys import re import warnings import pytesseract import numpy as np from PIL import Image ap = argparse.ArgumentParser() ap.add_argument("-i", "--image", required=True, help="Path to the image") args = vars(ap.parse_args()) if (not os.path.isfile(args["image"])): print(f"Could not find an image '{args['image']}'") sys.exit(-1) img = cv2.imread(args["image"]) out_img = img.copy() gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray = cv2.GaussianBlur(gray, (5, 5), 0) edged = cv2.Canny(gray, 75, 200) contours, hierarchy = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) max_area_contour = max(contours, key=cv2.contourArea) x, y, w, h = cv2.boundingRect(max_area_contour) # out_img = gray[y:y+h, x:x+w] # ret, out_img = cv2.threshold(gray[y:y+h, x:x+w], 155, 255, cv2.THRESH_TOZERO) img_cut = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)[y:y+h, x:x+w] img_out = cv2.cvtColor(img_cut, cv2.COLOR_BGR2RGB) text = pytesseract.image_to_string(Image.fromarray(img_out), config="-l pol") text_lines = text.split('\n') index_start = 0 index_stop = len(text_lines) - 1 for i in range(len(text_lines) - 1): if(re.compile('PARAGON.*FISKALNY.*').match(text_lines[i])): index_start = i if(re.compile('SPRZEDA.*').match(text_lines[i])): index_stop = i for item_line in text_lines[index_start + 1 : index_stop - 2]: print(item_line) # regex = re.compile("([ A-Za-ząćęłśźż]+).*(\d{1,3},\d{2})[A-E]$") # m = regex.match(item_line) # if m: # print(item_line, "===>", m.group(1), m.group(2)) # else: # print("skipped!") # # cv2.drawContours(out_img, contours, -1, (0, 255, 0), 3) # # cv2.rectangle(out_img, (x, y), (x+w, y+h), (0, 0, 255), 2) # cv2.imshow("cropped", img_out) # # cv2.imshow("Edged", edged) # cv2.waitKey(0) # cv2.destroyAllWindows()