From 492913df70a09cb18386c88d94985084bfee1144 Mon Sep 17 00:00:00 2001 From: Artur Tamborski Date: Sun, 17 Jan 2021 00:54:33 +0100 Subject: [PATCH] src/helpers: add photo recognision functions --- src/helpers/findTextRegions.js | 210 ++++++++++++++++++++++++++++ src/helpers/recognizeTextOnImage.ts | 17 +++ 2 files changed, 227 insertions(+) create mode 100644 src/helpers/findTextRegions.js create mode 100644 src/helpers/recognizeTextOnImage.ts diff --git a/src/helpers/findTextRegions.js b/src/helpers/findTextRegions.js new file mode 100644 index 0000000..ed985fd --- /dev/null +++ b/src/helpers/findTextRegions.js @@ -0,0 +1,210 @@ +export function findTextRegions(image, maxWhiteSpace, maxFontLineWidth, minTextWidth, grayScaleThreshold, widthTolerance, heightTolerance) { + maxWhiteSpace = maxWhiteSpace ?? 8; + maxFontLineWidth = maxFontLineWidth ?? maxWhiteSpace * 3; + minTextWidth = minTextWidth ?? maxWhiteSpace; + grayScaleThreshold = grayScaleThreshold ?? 100; + widthTolerance = widthTolerance ?? 3; + heightTolerance = heightTolerance ?? 8; + + if (!image.width) { + return {}; + } + + let canvas = document.createElement('canvas'); + canvas.width = image.width; + canvas.height = image.height; + + let ctx = canvas.getContext('2d'); + ctx.drawImage(image, 0, 0); + let data = ctx.getImageData(0, 0, image.width, image.height); + + // convert to black & white picture + for (let y = 0; y < image.height; y++) { + for (let x = 0; x < image.width; x++) { + let offset = ((y * image.width) + x) * 4; + let r = data.data[offset + 0] * 0.30; // 30% + let g = data.data[offset + 1] * 0.59; // 59% + let b = data.data[offset + 2] * 0.11; // 11% + let c = Math.ceil(r + g + b); + + r = g = b = c < grayScaleThreshold ? 0 : 255; + + data.data[offset + 0] = r; + data.data[offset + 1] = g; + data.data[offset + 2] = b; + data.data[offset + 3] = 255; + } + } + + // redraw after grayscaling + ctx.putImageData(data, 0, 0); + + let whitePixels = 0; + let blackPixels = 0; + let patternLength = 0; + let patternStartX = -1; + let segments = []; + + for (let y = 0; y < image.height; y++) { + segments.push([]); + + for (let x = 0; x < image.width; x++) { + let o = (y * image.width + x) * 4; + let r = data.data[o + 0] << 16; + let g = data.data[o + 1] << 8; + let b = data.data[o + 2] << 0; + let a = data.data[o + 3] << 24; + let c = 0x100000000 + a + r + g + b; + + if (c === 0xFFFFFFFF && patternStartX !== -1) { + whitePixels++; + blackPixels = 0; + } + + if (c === 0xFF000000) { + blackPixels++; + whitePixels = 0; + + if (patternStartX === -1) { + patternStartX = x; + } + } + + // check white and black pattern maximum lenghts + if (whitePixels > maxWhiteSpace || blackPixels > maxFontLineWidth || x === image.width - 1) { + if (patternLength >= minTextWidth) { + segments[y].push([patternStartX, y, patternStartX + patternLength, y]); + } + + whitePixels = 0; + blackPixels = 0; + patternLength = 0; + patternStartX = -1; + } + + if (patternStartX !== -1) { + patternLength++; + } + } + } + + for (let y = 0; y < image.height - 2; y++) { + let listY = segments[y]; + + for (let w = y + 1; w <= y + 2; w++) { + let listW = segments[w]; + + for (let i = 0; i < listY.length; i++) { + let sA = listY[i]; + + for (let j = 0; j < listW.length; j++) { + let sB = listW[j]; + + // horizontal intersection + if + ( + (sA[0] <= sB[0] && sA[2] >= sB[2]) || + (sA[0] >= sB[0] && sA[0] <= sB[2]) || + (sA[2] >= sB[0] && sA[2] <= sB[2]) + + ) { + sA[0] = Math.min(sA[0], sB[0]); + sA[2] = Math.max(sA[2], sB[2]); + sA[3] = sB[3]; + + listY.splice(i--, 1); + listW.splice(j, 1); + listW.push(sA); + + break; + } + } + } + } + } + + let foundSegments = []; + for (let y = 0; y < image.height; y++) { + let list = segments[y]; + + for (let i in list) { + const x1 = list[i][0]; + const y1 = list[i][1]; + const x2 = list[i][2]; + const y2 = list[i][3]; + + if (x1 !== -1 && y1 !== -1 && x2 !== -1 && y2 !== -1) { + let w = (x2 - x1) + 1; + let h = (y2 - y1) + 1; + + if (w >= minTextWidth && h >= minTextWidth) { + foundSegments.push({ + x: x1 - widthTolerance * 2, + y: y1 - heightTolerance, + w: w + widthTolerance, + h: h + heightTolerance * 2, + }); + } + } + } + } + + // hopefully we found at least something + if (!foundSegments.length) { + console.assert("Error: findTextRegions() did not found anything"); + return {}; + } + + let gridWidth = 0; + let gridHeight = 0; + + // find grid dimensions + let lastY = foundSegments[0].y; + let lastX = foundSegments[0].x; + + for (let s of foundSegments) { + if (Math.abs(lastY - s.y) < minTextWidth) { + lastY = Math.min(lastY, s.y); + gridWidth++; + } + + if (Math.abs(lastX - s.x) < minTextWidth) { + lastX = Math.min(lastX, s.x); + gridHeight++; + } + } + + // hopefully we matched every letter in the grid + if (gridWidth * gridHeight !== segments.length) { + console.assert("Dimentions are not equal to the number of matches"); + } + + // prepare grid for letters + let grid = Array.from(Array(gridHeight), () => new Array(gridWidth)); + + // fill grid with imageData objects (captured cutouts) + for (let j = 0; j < gridHeight; j++) { + const segmentOffset = j * gridWidth; + const indexMap = foundSegments + .slice(segmentOffset, segmentOffset + gridWidth); + const reverseIndexMap = indexMap + .map(s => s.x) + .sort((a, b) => a - b) + .reduce((o, x, i) => (o[x] = i, o), {}); + + for (let i = 0; i < gridWidth; i++) { + let s = foundSegments[segmentOffset + i]; + let croppedData = ctx.getImageData(s.x, s.y, s.w, s.h); + grid[j][reverseIndexMap[s.x]] = { + data: croppedData, + ...s, + } + } + } + + return { + grid, + gridWidth, + gridHeight, + } +} diff --git a/src/helpers/recognizeTextOnImage.ts b/src/helpers/recognizeTextOnImage.ts new file mode 100644 index 0000000..3893a92 --- /dev/null +++ b/src/helpers/recognizeTextOnImage.ts @@ -0,0 +1,17 @@ + +export function recognise(): string { + let out; + + (async () => { + const {data: {text}} = await Tesseract.recognize(image, 'pol', { + workerPath: 'https://unpkg.com/tesseract.js@v2.0.0/dist/worker.min.js', + langPath: 'https://tessdata.projectnaptha.com/4.0.0', + corePath: 'https://unpkg.com/tesseract.js-core@v2.0.0/tesseract-core.wasm.js', + cacheMethod: 'none', + }); + out = text; + console.log("recognised: ", text); + })(); + + return out || ""; +}