src: add debug code

This commit is contained in:
Artur Tamborski 2021-01-20 17:56:54 +01:00
parent c00532ce74
commit e5a7f68814
4 changed files with 78 additions and 36 deletions

View File

@ -7,6 +7,7 @@ import Board from '../Board/Board';
import './App.scss'; import './App.scss';
import {findTextRegions} from "../../helpers/findTextRegions"; import {findTextRegions} from "../../helpers/findTextRegions";
import {recognizeTextOnImageGrid} from "../../helpers/recognizeTextOnImage";
export type Point = { export type Point = {
x: number; x: number;
@ -48,22 +49,13 @@ export default class App extends React.Component {
image.src = url; image.src = url;
image.onload = () => { image.onload = () => {
console.log("handleTakePhoto: finding text regions...") console.log("handleTakePhoto: finding text regions...")
const {grid, gridWidth, gridHeight} = findTextRegions(image); const r = findTextRegions(image);
console.log(grid, gridWidth, gridHeight); console.log(r);
image.src = "";
URL.revokeObjectURL(url); URL.revokeObjectURL(url);
if (!grid) { if (r !== null) {
return; recognizeTextOnImageGrid(r.grid).then(() => {});
} }
let g = grid[0][0];
let ot = document.createElement('canvas');
ot.width = g.w;
ot.height = g.h;
ot.getContext('2d')?.putImageData(g.data, 0, 0);
document.body.appendChild(ot);
} }
} }

View File

@ -7,7 +7,7 @@ export function findTextRegions(image, maxWhiteSpace, maxFontLineWidth, minTextW
heightTolerance = heightTolerance ?? 8; heightTolerance = heightTolerance ?? 8;
if (!image.width) { if (!image.width) {
return {}; return null;
} }
let canvas = document.createElement('canvas'); let canvas = document.createElement('canvas');
@ -131,20 +131,20 @@ export function findTextRegions(image, maxWhiteSpace, maxFontLineWidth, minTextW
let list = segments[y]; let list = segments[y];
for (let i in list) { for (let i in list) {
const x1 = list[i][0]; let x1 = list[i][0];
const y1 = list[i][1]; let y1 = list[i][1];
const x2 = list[i][2]; let x2 = list[i][2];
const y2 = list[i][3]; let y2 = list[i][3];
if (x1 !== -1 && y1 !== -1 && x2 !== -1 && y2 !== -1) { if (x1 !== -1 && y1 !== -1 && x2 !== -1 && y2 !== -1) {
const w = (x2 - x1) + 1; let w = (x2 - x1) + 1;
const h = (y2 - y1) + 1; let h = (y2 - y1) + 1;
if (w >= minTextWidth && h >= minTextWidth) { if (w >= minTextWidth && h >= minTextWidth) {
foundSegments.push({ foundSegments.push({
x: x1 - widthTolerance * 2, x: x1 - widthTolerance * 3,
y: y1 - heightTolerance, y: y1 - heightTolerance * 1,
w: w + widthTolerance, w: w + widthTolerance * 3,
h: h + heightTolerance * 2, h: h + heightTolerance * 2,
}); });
} }
@ -155,7 +155,7 @@ export function findTextRegions(image, maxWhiteSpace, maxFontLineWidth, minTextW
// hopefully we found at least something // hopefully we found at least something
if (!foundSegments.length) { if (!foundSegments.length) {
console.error("findTextRegions(): Error: findTextRegions() did not found anything"); console.error("findTextRegions(): Error: findTextRegions() did not found anything");
return {}; return null;
} }
let gridWidth = 0; let gridWidth = 0;
@ -200,7 +200,11 @@ export function findTextRegions(image, maxWhiteSpace, maxFontLineWidth, minTextW
for (let i = 0; i < gridWidth; i++) { for (let i = 0; i < gridWidth; i++) {
let s = foundSegments[segmentOffset + i]; let s = foundSegments[segmentOffset + i];
let croppedData = ctx.getImageData(s.x, s.y, s.w, s.h); let croppedData = ctx.getImageData(s.x, s.y, s.w, s.h);
grid[j][indexMap[s.x]] = {data: croppedData, ...s} let croppedCanvas = document.createElement('canvas');
croppedCanvas.width = s.w;
croppedCanvas.height = s.h;
croppedCanvas.getContext('2d')?.putImageData(croppedData, 0, 0);
grid[j][indexMap[s.x]] = {data: croppedData, canvas: croppedCanvas, ...s}
} }
} }

View File

@ -1,9 +1,9 @@
import {createWorker, ImageLike, PSM} from 'tesseract.js'; import {createWorker, Worker, ImageLike, PSM} from 'tesseract.js';
export function recognizeTextOnImage(image: ImageLike, isColumn: boolean): string { let TEXT_WORKER: Worker | null = null;
let out; let CHAR_WORKER: Worker | null = null;
(async () => { async function prepareWorker(isColumn: boolean = true): Promise<Worker> {
const worker = createWorker(); const worker = createWorker();
await worker.load(); await worker.load();
await worker.loadLanguage('pol'); await worker.loadLanguage('pol');
@ -15,10 +15,56 @@ export function recognizeTextOnImage(image: ImageLike, isColumn: boolean): strin
tessjs_create_box: '1', tessjs_create_box: '1',
}); });
const { data: { text } } = await worker.recognize(image); return worker;
out = text; }
await worker.terminate();
})(); async function recognize(worker: Worker, image: ImageLike) {
return (await worker.recognize(image)).data.text.trim() || "";
return out || ""; }
export async function recognizeTextOnImage(image: ImageLike): Promise<string> {
TEXT_WORKER = TEXT_WORKER ?? await prepareWorker(true);
return await recognize(TEXT_WORKER, image);
}
export async function recognizeTextOnImageGrid(grid: any[][]): Promise<void> {
CHAR_WORKER = CHAR_WORKER ?? await prepareWorker(false);
// no parallelization for now, we can always Promise it all() later
for (let line of grid) {
for (let g of line) {
g.text = await recognize(CHAR_WORKER, g.canvas);
//g.data = g.canvas = null;
}
}
const polishLetters = ['Ą', 'Ć', 'Ę', 'V', 'Ł', 'Ń', 'Ó', 'Ś', 'Ź', 'Ż'];
for (let line of grid) {
let div = document.createElement('div');
for (let g of line) {
let p = document.createElement('p');
p.style.paddingRight = '5px';
p.style.paddingBottom = '5px';
p.style.marginTop = '-5px';
p.style.textAlign = 'right';
// bug fix, this letter is rarely recognized
p.innerText = g.text || "V";
let innerDiv = document.createElement('div');
innerDiv.style.display = 'inline-block';
let color = polishLetters.includes(p.innerText) ? 'red' : 'blue';
innerDiv.style.border = `3px solid ${color}`;
innerDiv.style.marginLeft = '3px';
innerDiv.style.marginBottom = '3px';
innerDiv.style.width = '70px';
innerDiv.style.height = '70px';
innerDiv.appendChild(g.canvas);
innerDiv.appendChild(p);
div.appendChild(innerDiv);
}
document.body.appendChild(div);
}
} }

View File

@ -1,6 +1,6 @@
{ {
"compilerOptions": { "compilerOptions": {
"target": "es6", "target": "es2017",
"lib": [ "lib": [
"dom", "dom",
"dom.iterable", "dom.iterable",