PCQRSCANER/venv/Lib/site-packages/pptx/opc/pkgreader.py

294 lines
9.6 KiB
Python
Raw Normal View History

2019-12-22 21:51:47 +01:00
# encoding: utf-8
"""
Provides a low-level, read-only API to a serialized Open Packaging Convention
(OPC) package.
"""
from __future__ import absolute_import
from .constants import RELATIONSHIP_TARGET_MODE as RTM
from .oxml import parse_xml
from .packuri import PACKAGE_URI, PackURI
from .phys_pkg import PhysPkgReader
from .shared import CaseInsensitiveDict
class PackageReader(object):
"""
Provides access to the contents of a zip-format OPC package via its
:attr:`serialized_parts` and :attr:`pkg_srels` attributes.
"""
def __init__(self, content_types, pkg_srels, sparts):
super(PackageReader, self).__init__()
self._pkg_srels = pkg_srels
self._sparts = sparts
@staticmethod
def from_file(pkg_file):
"""
Return a |PackageReader| instance loaded with contents of *pkg_file*.
"""
phys_reader = PhysPkgReader(pkg_file)
content_types = _ContentTypeMap.from_xml(phys_reader.content_types_xml)
pkg_srels = PackageReader._srels_for(phys_reader, PACKAGE_URI)
sparts = PackageReader._load_serialized_parts(
phys_reader, pkg_srels, content_types
)
phys_reader.close()
return PackageReader(content_types, pkg_srels, sparts)
def iter_sparts(self):
"""
Generate a 3-tuple `(partname, content_type, blob)` for each of the
serialized parts in the package.
"""
for spart in self._sparts:
yield (spart.partname, spart.content_type, spart.blob)
def iter_srels(self):
"""
Generate a 2-tuple `(source_uri, srel)` for each of the relationships
in the package.
"""
for srel in self._pkg_srels:
yield (PACKAGE_URI, srel)
for spart in self._sparts:
for srel in spart.srels:
yield (spart.partname, srel)
@staticmethod
def _load_serialized_parts(phys_reader, pkg_srels, content_types):
"""
Return a list of |_SerializedPart| instances corresponding to the
parts in *phys_reader* accessible by walking the relationship graph
starting with *pkg_srels*.
"""
sparts = []
part_walker = PackageReader._walk_phys_parts(phys_reader, pkg_srels)
for partname, blob, srels in part_walker:
content_type = content_types[partname]
spart = _SerializedPart(partname, content_type, blob, srels)
sparts.append(spart)
return tuple(sparts)
@staticmethod
def _srels_for(phys_reader, source_uri):
"""
Return |_SerializedRelationshipCollection| instance populated with
relationships for source identified by *source_uri*.
"""
rels_xml = phys_reader.rels_xml_for(source_uri)
return _SerializedRelationshipCollection.load_from_xml(
source_uri.baseURI, rels_xml
)
@staticmethod
def _walk_phys_parts(phys_reader, srels, visited_partnames=None):
"""
Generate a 3-tuple `(partname, blob, srels)` for each of the parts in
*phys_reader* by walking the relationship graph rooted at srels.
"""
if visited_partnames is None:
visited_partnames = []
for srel in srels:
if srel.is_external:
continue
partname = srel.target_partname
if partname in visited_partnames:
continue
visited_partnames.append(partname)
part_srels = PackageReader._srels_for(phys_reader, partname)
blob = phys_reader.blob_for(partname)
yield (partname, blob, part_srels)
for partname, blob, srels in PackageReader._walk_phys_parts(
phys_reader, part_srels, visited_partnames
):
yield (partname, blob, srels)
class _ContentTypeMap(object):
"""
Value type providing dictionary semantics for looking up content type by
part name, e.g. ``content_type = cti['/ppt/presentation.xml']``.
"""
def __init__(self):
super(_ContentTypeMap, self).__init__()
self._overrides = CaseInsensitiveDict()
self._defaults = CaseInsensitiveDict()
def __getitem__(self, partname):
"""
Return content type for part identified by *partname*.
"""
if not isinstance(partname, PackURI):
tmpl = "_ContentTypeMap key must be <type 'PackURI'>, got %s"
raise KeyError(tmpl % type(partname))
if partname in self._overrides:
return self._overrides[partname]
if partname.ext in self._defaults:
return self._defaults[partname.ext]
tmpl = "no content type for partname '%s' in [Content_Types].xml"
raise KeyError(tmpl % partname)
@staticmethod
def from_xml(content_types_xml):
"""
Return a new |_ContentTypeMap| instance populated with the contents
of *content_types_xml*.
"""
types_elm = parse_xml(content_types_xml)
ct_map = _ContentTypeMap()
for o in types_elm.override_lst:
ct_map._add_override(o.partName, o.contentType)
for d in types_elm.default_lst:
ct_map._add_default(d.extension, d.contentType)
return ct_map
def _add_default(self, extension, content_type):
"""
Add the default mapping of *extension* to *content_type* to this
content type mapping. *extension* does not include the leading
period.
"""
self._defaults[extension] = content_type
def _add_override(self, partname, content_type):
"""
Add the default mapping of *partname* to *content_type* to this
content type mapping.
"""
self._overrides[partname] = content_type
class _SerializedPart(object):
"""
Value object for an OPC package part. Provides access to the partname,
content type, blob, and serialized relationships for the part.
"""
def __init__(self, partname, content_type, blob, srels):
super(_SerializedPart, self).__init__()
self._partname = partname
self._content_type = content_type
self._blob = blob
self._srels = srels
@property
def partname(self):
return self._partname
@property
def content_type(self):
return self._content_type
@property
def blob(self):
return self._blob
@property
def srels(self):
return self._srels
class _SerializedRelationship(object):
"""
Value object representing a serialized relationship in an OPC package.
Serialized, in this case, means any target part is referred to via its
partname rather than a direct link to an in-memory |Part| object.
"""
def __init__(self, baseURI, rel_elm):
super(_SerializedRelationship, self).__init__()
self._baseURI = baseURI
self._rId = rel_elm.rId
self._reltype = rel_elm.reltype
self._target_mode = rel_elm.targetMode
self._target_ref = rel_elm.target_ref
@property
def is_external(self):
"""
True if target_mode is ``RTM.EXTERNAL``
"""
return self._target_mode == RTM.EXTERNAL
@property
def reltype(self):
"""Relationship type, like ``RT.OFFICE_DOCUMENT``"""
return self._reltype
@property
def rId(self):
"""
Relationship id, like 'rId9', corresponds to the ``Id`` attribute on
the ``CT_Relationship`` element.
"""
return self._rId
@property
def target_mode(self):
"""
String in ``TargetMode`` attribute of ``CT_Relationship`` element,
one of ``RTM.INTERNAL`` or ``RTM.EXTERNAL``.
"""
return self._target_mode
@property
def target_ref(self):
"""
String in ``Target`` attribute of ``CT_Relationship`` element, a
relative part reference for internal target mode or an arbitrary URI,
e.g. an HTTP URL, for external target mode.
"""
return self._target_ref
@property
def target_partname(self):
"""
|PackURI| instance containing partname targeted by this relationship.
Raises ``ValueError`` on reference if target_mode is ``'External'``.
Use :attr:`target_mode` to check before referencing.
"""
if self.is_external:
msg = (
"target_partname attribute on Relationship is undefined w"
'here TargetMode == "External"'
)
raise ValueError(msg)
# lazy-load _target_partname attribute
if not hasattr(self, "_target_partname"):
self._target_partname = PackURI.from_rel_ref(self._baseURI, self.target_ref)
return self._target_partname
class _SerializedRelationshipCollection(object):
"""
Read-only sequence of |_SerializedRelationship| instances corresponding
to the relationships item XML passed to constructor.
"""
def __init__(self):
super(_SerializedRelationshipCollection, self).__init__()
self._srels = []
def __iter__(self):
"""Support iteration, e.g. 'for x in srels:'"""
return self._srels.__iter__()
@staticmethod
def load_from_xml(baseURI, rels_item_xml):
"""
Return |_SerializedRelationshipCollection| instance loaded with the
relationships contained in *rels_item_xml*. Returns an empty
collection if *rels_item_xml* is |None|.
"""
srels = _SerializedRelationshipCollection()
if rels_item_xml is not None:
rels_elm = parse_xml(rels_item_xml)
for rel_elm in rels_elm.relationship_lst:
srels._srels.append(_SerializedRelationship(baseURI, rel_elm))
return srels