src: add debug code

This commit is contained in:
Artur Tamborski 2021-01-20 17:56:54 +01:00
parent c00532ce74
commit e5a7f68814
4 changed files with 78 additions and 36 deletions

View File

@ -7,6 +7,7 @@ import Board from '../Board/Board';
import './App.scss';
import {findTextRegions} from "../../helpers/findTextRegions";
import {recognizeTextOnImageGrid} from "../../helpers/recognizeTextOnImage";
export type Point = {
x: number;
@ -48,22 +49,13 @@ export default class App extends React.Component {
image.src = url;
image.onload = () => {
console.log("handleTakePhoto: finding text regions...")
const {grid, gridWidth, gridHeight} = findTextRegions(image);
console.log(grid, gridWidth, gridHeight);
image.src = "";
const r = findTextRegions(image);
console.log(r);
URL.revokeObjectURL(url);
if (!grid) {
return;
if (r !== null) {
recognizeTextOnImageGrid(r.grid).then(() => {});
}
let g = grid[0][0];
let ot = document.createElement('canvas');
ot.width = g.w;
ot.height = g.h;
ot.getContext('2d')?.putImageData(g.data, 0, 0);
document.body.appendChild(ot);
}
}

View File

@ -7,7 +7,7 @@ export function findTextRegions(image, maxWhiteSpace, maxFontLineWidth, minTextW
heightTolerance = heightTolerance ?? 8;
if (!image.width) {
return {};
return null;
}
let canvas = document.createElement('canvas');
@ -131,20 +131,20 @@ export function findTextRegions(image, maxWhiteSpace, maxFontLineWidth, minTextW
let list = segments[y];
for (let i in list) {
const x1 = list[i][0];
const y1 = list[i][1];
const x2 = list[i][2];
const y2 = list[i][3];
let x1 = list[i][0];
let y1 = list[i][1];
let x2 = list[i][2];
let y2 = list[i][3];
if (x1 !== -1 && y1 !== -1 && x2 !== -1 && y2 !== -1) {
const w = (x2 - x1) + 1;
const h = (y2 - y1) + 1;
let w = (x2 - x1) + 1;
let h = (y2 - y1) + 1;
if (w >= minTextWidth && h >= minTextWidth) {
foundSegments.push({
x: x1 - widthTolerance * 2,
y: y1 - heightTolerance,
w: w + widthTolerance,
x: x1 - widthTolerance * 3,
y: y1 - heightTolerance * 1,
w: w + widthTolerance * 3,
h: h + heightTolerance * 2,
});
}
@ -155,7 +155,7 @@ export function findTextRegions(image, maxWhiteSpace, maxFontLineWidth, minTextW
// hopefully we found at least something
if (!foundSegments.length) {
console.error("findTextRegions(): Error: findTextRegions() did not found anything");
return {};
return null;
}
let gridWidth = 0;
@ -200,7 +200,11 @@ export function findTextRegions(image, maxWhiteSpace, maxFontLineWidth, minTextW
for (let i = 0; i < gridWidth; i++) {
let s = foundSegments[segmentOffset + i];
let croppedData = ctx.getImageData(s.x, s.y, s.w, s.h);
grid[j][indexMap[s.x]] = {data: croppedData, ...s}
let croppedCanvas = document.createElement('canvas');
croppedCanvas.width = s.w;
croppedCanvas.height = s.h;
croppedCanvas.getContext('2d')?.putImageData(croppedData, 0, 0);
grid[j][indexMap[s.x]] = {data: croppedData, canvas: croppedCanvas, ...s}
}
}

View File

@ -1,9 +1,9 @@
import {createWorker, ImageLike, PSM} from 'tesseract.js';
import {createWorker, Worker, ImageLike, PSM} from 'tesseract.js';
export function recognizeTextOnImage(image: ImageLike, isColumn: boolean): string {
let out;
let TEXT_WORKER: Worker | null = null;
let CHAR_WORKER: Worker | null = null;
(async () => {
async function prepareWorker(isColumn: boolean = true): Promise<Worker> {
const worker = createWorker();
await worker.load();
await worker.loadLanguage('pol');
@ -15,10 +15,56 @@ export function recognizeTextOnImage(image: ImageLike, isColumn: boolean): strin
tessjs_create_box: '1',
});
const { data: { text } } = await worker.recognize(image);
out = text;
await worker.terminate();
})();
return out || "";
return worker;
}
async function recognize(worker: Worker, image: ImageLike) {
return (await worker.recognize(image)).data.text.trim() || "";
}
export async function recognizeTextOnImage(image: ImageLike): Promise<string> {
TEXT_WORKER = TEXT_WORKER ?? await prepareWorker(true);
return await recognize(TEXT_WORKER, image);
}
export async function recognizeTextOnImageGrid(grid: any[][]): Promise<void> {
CHAR_WORKER = CHAR_WORKER ?? await prepareWorker(false);
// no parallelization for now, we can always Promise it all() later
for (let line of grid) {
for (let g of line) {
g.text = await recognize(CHAR_WORKER, g.canvas);
//g.data = g.canvas = null;
}
}
const polishLetters = ['Ą', 'Ć', 'Ę', 'V', 'Ł', 'Ń', 'Ó', 'Ś', 'Ź', 'Ż'];
for (let line of grid) {
let div = document.createElement('div');
for (let g of line) {
let p = document.createElement('p');
p.style.paddingRight = '5px';
p.style.paddingBottom = '5px';
p.style.marginTop = '-5px';
p.style.textAlign = 'right';
// bug fix, this letter is rarely recognized
p.innerText = g.text || "V";
let innerDiv = document.createElement('div');
innerDiv.style.display = 'inline-block';
let color = polishLetters.includes(p.innerText) ? 'red' : 'blue';
innerDiv.style.border = `3px solid ${color}`;
innerDiv.style.marginLeft = '3px';
innerDiv.style.marginBottom = '3px';
innerDiv.style.width = '70px';
innerDiv.style.height = '70px';
innerDiv.appendChild(g.canvas);
innerDiv.appendChild(p);
div.appendChild(innerDiv);
}
document.body.appendChild(div);
}
}

View File

@ -1,6 +1,6 @@
{
"compilerOptions": {
"target": "es6",
"target": "es2017",
"lib": [
"dom",
"dom.iterable",