PCQRSCANER/venv/Lib/site-packages/textract/parsers/pptx_parser.py

import pptx

from .utils import BaseParser


class Parser(BaseParser):
    """Extract text from pptx file using python-pptx
    """

    def extract(self, filename, **kwargs):
        presentation = pptx.Presentation(filename)
        text_runs = []
        for slide in presentation.slides:
            for shape in slide.shapes:
                if not shape.has_text_frame:
                    continue
                for paragraph in shape.text_frame.paragraphs:
                    for run in paragraph.runs:
                        text_runs.append(run.text)
        return '\n\n'.join(text_runs)
3 2019-12-22 21:51:47 +01:00			`import pptx`

			`from .utils import BaseParser`


			`class Parser(BaseParser):`
			`"""Extract text from pptx file using python-pptx`
			`"""`

			`def extract(self, filename, **kwargs):`
			`presentation = pptx.Presentation(filename)`
			`text_runs = []`
			`for slide in presentation.slides:`
			`for shape in slide.shapes:`
			`if not shape.has_text_frame:`
			`continue`
			`for paragraph in shape.text_frame.paragraphs:`
			`for run in paragraph.runs:`
			`text_runs.append(run.text)`
			`return '\n\n'.join(text_runs)`