# encoding: utf-8 """ Provides a low-level, read-only API to a serialized Open Packaging Convention (OPC) package. """ from __future__ import absolute_import from .constants import RELATIONSHIP_TARGET_MODE as RTM from .oxml import parse_xml from .packuri import PACKAGE_URI, PackURI from .phys_pkg import PhysPkgReader from .shared import CaseInsensitiveDict class PackageReader(object): """ Provides access to the contents of a zip-format OPC package via its :attr:`serialized_parts` and :attr:`pkg_srels` attributes. """ def __init__(self, content_types, pkg_srels, sparts): super(PackageReader, self).__init__() self._pkg_srels = pkg_srels self._sparts = sparts @staticmethod def from_file(pkg_file): """ Return a |PackageReader| instance loaded with contents of *pkg_file*. """ phys_reader = PhysPkgReader(pkg_file) content_types = _ContentTypeMap.from_xml(phys_reader.content_types_xml) pkg_srels = PackageReader._srels_for(phys_reader, PACKAGE_URI) sparts = PackageReader._load_serialized_parts( phys_reader, pkg_srels, content_types ) phys_reader.close() return PackageReader(content_types, pkg_srels, sparts) def iter_sparts(self): """ Generate a 3-tuple `(partname, content_type, blob)` for each of the serialized parts in the package. """ for spart in self._sparts: yield (spart.partname, spart.content_type, spart.blob) def iter_srels(self): """ Generate a 2-tuple `(source_uri, srel)` for each of the relationships in the package. """ for srel in self._pkg_srels: yield (PACKAGE_URI, srel) for spart in self._sparts: for srel in spart.srels: yield (spart.partname, srel) @staticmethod def _load_serialized_parts(phys_reader, pkg_srels, content_types): """ Return a list of |_SerializedPart| instances corresponding to the parts in *phys_reader* accessible by walking the relationship graph starting with *pkg_srels*. """ sparts = [] part_walker = PackageReader._walk_phys_parts(phys_reader, pkg_srels) for partname, blob, srels in part_walker: content_type = content_types[partname] spart = _SerializedPart(partname, content_type, blob, srels) sparts.append(spart) return tuple(sparts) @staticmethod def _srels_for(phys_reader, source_uri): """ Return |_SerializedRelationshipCollection| instance populated with relationships for source identified by *source_uri*. """ rels_xml = phys_reader.rels_xml_for(source_uri) return _SerializedRelationshipCollection.load_from_xml( source_uri.baseURI, rels_xml ) @staticmethod def _walk_phys_parts(phys_reader, srels, visited_partnames=None): """ Generate a 3-tuple `(partname, blob, srels)` for each of the parts in *phys_reader* by walking the relationship graph rooted at srels. """ if visited_partnames is None: visited_partnames = [] for srel in srels: if srel.is_external: continue partname = srel.target_partname if partname in visited_partnames: continue visited_partnames.append(partname) part_srels = PackageReader._srels_for(phys_reader, partname) blob = phys_reader.blob_for(partname) yield (partname, blob, part_srels) for partname, blob, srels in PackageReader._walk_phys_parts( phys_reader, part_srels, visited_partnames ): yield (partname, blob, srels) class _ContentTypeMap(object): """ Value type providing dictionary semantics for looking up content type by part name, e.g. ``content_type = cti['/ppt/presentation.xml']``. """ def __init__(self): super(_ContentTypeMap, self).__init__() self._overrides = CaseInsensitiveDict() self._defaults = CaseInsensitiveDict() def __getitem__(self, partname): """ Return content type for part identified by *partname*. """ if not isinstance(partname, PackURI): tmpl = "_ContentTypeMap key must be , got %s" raise KeyError(tmpl % type(partname)) if partname in self._overrides: return self._overrides[partname] if partname.ext in self._defaults: return self._defaults[partname.ext] tmpl = "no content type for partname '%s' in [Content_Types].xml" raise KeyError(tmpl % partname) @staticmethod def from_xml(content_types_xml): """ Return a new |_ContentTypeMap| instance populated with the contents of *content_types_xml*. """ types_elm = parse_xml(content_types_xml) ct_map = _ContentTypeMap() for o in types_elm.override_lst: ct_map._add_override(o.partName, o.contentType) for d in types_elm.default_lst: ct_map._add_default(d.extension, d.contentType) return ct_map def _add_default(self, extension, content_type): """ Add the default mapping of *extension* to *content_type* to this content type mapping. *extension* does not include the leading period. """ self._defaults[extension] = content_type def _add_override(self, partname, content_type): """ Add the default mapping of *partname* to *content_type* to this content type mapping. """ self._overrides[partname] = content_type class _SerializedPart(object): """ Value object for an OPC package part. Provides access to the partname, content type, blob, and serialized relationships for the part. """ def __init__(self, partname, content_type, blob, srels): super(_SerializedPart, self).__init__() self._partname = partname self._content_type = content_type self._blob = blob self._srels = srels @property def partname(self): return self._partname @property def content_type(self): return self._content_type @property def blob(self): return self._blob @property def srels(self): return self._srels class _SerializedRelationship(object): """ Value object representing a serialized relationship in an OPC package. Serialized, in this case, means any target part is referred to via its partname rather than a direct link to an in-memory |Part| object. """ def __init__(self, baseURI, rel_elm): super(_SerializedRelationship, self).__init__() self._baseURI = baseURI self._rId = rel_elm.rId self._reltype = rel_elm.reltype self._target_mode = rel_elm.targetMode self._target_ref = rel_elm.target_ref @property def is_external(self): """ True if target_mode is ``RTM.EXTERNAL`` """ return self._target_mode == RTM.EXTERNAL @property def reltype(self): """Relationship type, like ``RT.OFFICE_DOCUMENT``""" return self._reltype @property def rId(self): """ Relationship id, like 'rId9', corresponds to the ``Id`` attribute on the ``CT_Relationship`` element. """ return self._rId @property def target_mode(self): """ String in ``TargetMode`` attribute of ``CT_Relationship`` element, one of ``RTM.INTERNAL`` or ``RTM.EXTERNAL``. """ return self._target_mode @property def target_ref(self): """ String in ``Target`` attribute of ``CT_Relationship`` element, a relative part reference for internal target mode or an arbitrary URI, e.g. an HTTP URL, for external target mode. """ return self._target_ref @property def target_partname(self): """ |PackURI| instance containing partname targeted by this relationship. Raises ``ValueError`` on reference if target_mode is ``'External'``. Use :attr:`target_mode` to check before referencing. """ if self.is_external: msg = ( "target_partname attribute on Relationship is undefined w" 'here TargetMode == "External"' ) raise ValueError(msg) # lazy-load _target_partname attribute if not hasattr(self, "_target_partname"): self._target_partname = PackURI.from_rel_ref(self._baseURI, self.target_ref) return self._target_partname class _SerializedRelationshipCollection(object): """ Read-only sequence of |_SerializedRelationship| instances corresponding to the relationships item XML passed to constructor. """ def __init__(self): super(_SerializedRelationshipCollection, self).__init__() self._srels = [] def __iter__(self): """Support iteration, e.g. 'for x in srels:'""" return self._srels.__iter__() @staticmethod def load_from_xml(baseURI, rels_item_xml): """ Return |_SerializedRelationshipCollection| instance loaded with the relationships contained in *rels_item_xml*. Returns an empty collection if *rels_item_xml* is |None|. """ srels = _SerializedRelationshipCollection() if rels_item_xml is not None: rels_elm = parse_xml(rels_item_xml) for rel_elm in rels_elm.relationship_lst: srels._srels.append(_SerializedRelationship(baseURI, rel_elm)) return srels