src: add debug code

2021-01-20 17:56:54 +01:00 · 2021-01-20 17:56:54 +01:00 · e5a7f68814
commit e5a7f68814
parent c00532ce74
4 changed files with 78 additions and 36 deletions
--- a/src/components/App/App.tsx
+++ b/src/components/App/App.tsx
@ -7,6 +7,7 @@ import Board from '../Board/Board';

 import './App.scss';
 import {findTextRegions} from "../../helpers/findTextRegions";
+import {recognizeTextOnImageGrid} from "../../helpers/recognizeTextOnImage";

 export type Point = {
  x: number;
@ -48,22 +49,13 @@ export default class App extends React.Component {
    image.src = url;
    image.onload = () => {
      console.log("handleTakePhoto: finding text regions...")
-      const {grid, gridWidth, gridHeight} = findTextRegions(image);
-      console.log(grid, gridWidth, gridHeight);
-
-      image.src = "";
+      const r = findTextRegions(image);
+      console.log(r);
      URL.revokeObjectURL(url);

-      if (!grid) {
-        return;
+      if (r !== null) {
+        recognizeTextOnImageGrid(r.grid).then(() => {});
      }
-
-      let g = grid[0][0];
-      let ot = document.createElement('canvas');
-      ot.width = g.w;
-      ot.height = g.h;
-      ot.getContext('2d')?.putImageData(g.data, 0, 0);
-      document.body.appendChild(ot);
    }
  }

--- a/src/helpers/findTextRegions.js
+++ b/src/helpers/findTextRegions.js
@ -7,7 +7,7 @@ export function findTextRegions(image, maxWhiteSpace, maxFontLineWidth, minTextW
  heightTolerance = heightTolerance ?? 8;

  if (!image.width) {
-    return {};
+    return null;
  }

  let canvas = document.createElement('canvas');
@ -131,20 +131,20 @@ export function findTextRegions(image, maxWhiteSpace, maxFontLineWidth, minTextW
    let list = segments[y];

    for (let i in list) {
-      const x1 = list[i][0];
-      const y1 = list[i][1];
-      const x2 = list[i][2];
-      const y2 = list[i][3];
+      let x1 = list[i][0];
+      let y1 = list[i][1];
+      let x2 = list[i][2];
+      let y2 = list[i][3];

      if (x1 !== -1 && y1 !== -1 && x2 !== -1 && y2 !== -1) {
-        const w = (x2 - x1) + 1;
-        const h = (y2 - y1) + 1;
+        let w = (x2 - x1) + 1;
+        let h = (y2 - y1) + 1;

        if (w >= minTextWidth && h >= minTextWidth) {
          foundSegments.push({
-            x: x1 - widthTolerance * 2,
-            y: y1 - heightTolerance,
-            w: w + widthTolerance,
+            x: x1 - widthTolerance * 3,
+            y: y1 - heightTolerance * 1,
+            w: w + widthTolerance * 3,
            h: h + heightTolerance * 2,
          });
        }
@ -155,7 +155,7 @@ export function findTextRegions(image, maxWhiteSpace, maxFontLineWidth, minTextW
  // hopefully we found at least something
  if (!foundSegments.length) {
    console.error("findTextRegions(): Error: findTextRegions() did not found anything");
-    return {};
+    return null;
  }

  let gridWidth = 0;
@ -200,7 +200,11 @@ export function findTextRegions(image, maxWhiteSpace, maxFontLineWidth, minTextW
    for (let i = 0; i < gridWidth; i++) {
      let s = foundSegments[segmentOffset + i];
      let croppedData = ctx.getImageData(s.x, s.y, s.w, s.h);
-      grid[j][indexMap[s.x]] = {data: croppedData, ...s}
+      let croppedCanvas = document.createElement('canvas');
+      croppedCanvas.width = s.w;
+      croppedCanvas.height = s.h;
+      croppedCanvas.getContext('2d')?.putImageData(croppedData, 0, 0);
+      grid[j][indexMap[s.x]] = {data: croppedData, canvas: croppedCanvas, ...s}
    }
  }

--- a/src/helpers/recognizeTextOnImage.ts
+++ b/src/helpers/recognizeTextOnImage.ts
@ -1,9 +1,9 @@
-import {createWorker, ImageLike, PSM} from 'tesseract.js';
+import {createWorker, Worker, ImageLike, PSM} from 'tesseract.js';

-export function recognizeTextOnImage(image: ImageLike, isColumn: boolean): string {
-  let out;
+let TEXT_WORKER: Worker | null = null;
+let CHAR_WORKER: Worker | null = null;

-  (async () => {
+async function prepareWorker(isColumn: boolean = true): Promise<Worker> {
    const worker = createWorker();
    await worker.load();
    await worker.loadLanguage('pol');
@ -15,10 +15,56 @@ export function recognizeTextOnImage(image: ImageLike, isColumn: boolean): strin
      tessjs_create_box: '1',
    });

-    const { data: { text } } = await worker.recognize(image);
-    out = text;
-    await worker.terminate();
-  })();
-
-  return out || "";
+    return worker;
+}
+
+async function recognize(worker: Worker, image: ImageLike) {
+  return (await worker.recognize(image)).data.text.trim() || "";
+}
+
+export async function recognizeTextOnImage(image: ImageLike): Promise<string> {
+    TEXT_WORKER = TEXT_WORKER ?? await prepareWorker(true);
+    return await recognize(TEXT_WORKER, image);
+}
+
+export async function recognizeTextOnImageGrid(grid: any[][]): Promise<void> {
+  CHAR_WORKER = CHAR_WORKER ?? await prepareWorker(false);
+
+  // no parallelization for now, we can always Promise it all() later
+  for (let line of grid) {
+    for (let g of line) {
+      g.text = await recognize(CHAR_WORKER, g.canvas);
+      //g.data = g.canvas = null;
+    }
+  }
+
+  const polishLetters = ['Ą', 'Ć', 'Ę', 'V', 'Ł', 'Ń', 'Ó', 'Ś', 'Ź', 'Ż'];
+
+  for (let line of grid) {
+    let div = document.createElement('div');
+
+    for (let g of line) {
+      let p = document.createElement('p');
+      p.style.paddingRight = '5px';
+      p.style.paddingBottom = '5px';
+      p.style.marginTop = '-5px';
+      p.style.textAlign = 'right';
+      // bug fix, this letter is rarely recognized
+      p.innerText = g.text || "V";
+
+      let innerDiv = document.createElement('div');
+      innerDiv.style.display = 'inline-block';
+      let color = polishLetters.includes(p.innerText) ? 'red' : 'blue';
+      innerDiv.style.border = `3px solid ${color}`;
+      innerDiv.style.marginLeft = '3px';
+      innerDiv.style.marginBottom = '3px';
+      innerDiv.style.width = '70px';
+      innerDiv.style.height = '70px';
+      innerDiv.appendChild(g.canvas);
+      innerDiv.appendChild(p);
+
+      div.appendChild(innerDiv);
+    }
+    document.body.appendChild(div);
+  }
 }
--- a/tsconfig.json
+++ b/tsconfig.json
@ -1,6 +1,6 @@
 {
  "compilerOptions": {
-    "target": "es6",
+    "target": "es2017",
    "lib": [
      "dom",
      "dom.iterable",