PCQRSCANER/venv/Lib/site-packages/textract/parsers/pptx_parser.py

21 lines
603 B
Python
Raw Normal View History

2019-12-22 21:51:47 +01:00
import pptx
from .utils import BaseParser
class Parser(BaseParser):
"""Extract text from pptx file using python-pptx
"""
def extract(self, filename, **kwargs):
presentation = pptx.Presentation(filename)
text_runs = []
for slide in presentation.slides:
for shape in slide.shapes:
if not shape.has_text_frame:
continue
for paragraph in shape.text_frame.paragraphs:
for run in paragraph.runs:
text_runs.append(run.text)
return '\n\n'.join(text_runs)