PCQRSCANER/venv/Lib/site-packages/textract/parsers/image.py

22 lines
554 B
Python
Raw Normal View History

2019-12-22 21:51:47 +01:00
"""
Process an image file using tesseract.
"""
import os
from .utils import ShellParser
class Parser(ShellParser):
"""Extract text from various image file formats using tesseract-ocr"""
def extract(self, filename, **kwargs):
# if language given as argument, specify language for tesseract to use
if 'language' in kwargs:
args = ['tesseract', filename, 'stdout', '-l', kwargs['language']]
else:
args = ['tesseract', filename, 'stdout']
stdout, _ = self.run(args)
return stdout