296 lines
9.6 KiB
Python
296 lines
9.6 KiB
Python
|
# encoding: utf-8
|
||
|
|
||
|
"""
|
||
|
lxml custom element classes for core properties-related XML elements.
|
||
|
"""
|
||
|
|
||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||
|
|
||
|
import re
|
||
|
|
||
|
from datetime import datetime, timedelta
|
||
|
|
||
|
from . import parse_xml
|
||
|
from .ns import nsdecls, qn
|
||
|
from .xmlchemy import BaseOxmlElement, ZeroOrOne
|
||
|
|
||
|
|
||
|
class CT_CoreProperties(BaseOxmlElement):
|
||
|
"""
|
||
|
``<cp:coreProperties>`` element, the root element of the Core Properties
|
||
|
part stored as ``/docProps/core.xml``. Implements many of the Dublin Core
|
||
|
document metadata elements. String elements resolve to an empty string
|
||
|
('') if the element is not present in the XML. String elements are
|
||
|
limited in length to 255 unicode characters.
|
||
|
"""
|
||
|
|
||
|
category = ZeroOrOne("cp:category", successors=())
|
||
|
contentStatus = ZeroOrOne("cp:contentStatus", successors=())
|
||
|
created = ZeroOrOne("dcterms:created", successors=())
|
||
|
creator = ZeroOrOne("dc:creator", successors=())
|
||
|
description = ZeroOrOne("dc:description", successors=())
|
||
|
identifier = ZeroOrOne("dc:identifier", successors=())
|
||
|
keywords = ZeroOrOne("cp:keywords", successors=())
|
||
|
language = ZeroOrOne("dc:language", successors=())
|
||
|
lastModifiedBy = ZeroOrOne("cp:lastModifiedBy", successors=())
|
||
|
lastPrinted = ZeroOrOne("cp:lastPrinted", successors=())
|
||
|
modified = ZeroOrOne("dcterms:modified", successors=())
|
||
|
revision = ZeroOrOne("cp:revision", successors=())
|
||
|
subject = ZeroOrOne("dc:subject", successors=())
|
||
|
title = ZeroOrOne("dc:title", successors=())
|
||
|
version = ZeroOrOne("cp:version", successors=())
|
||
|
|
||
|
_coreProperties_tmpl = "<cp:coreProperties %s/>\n" % nsdecls("cp", "dc", "dcterms")
|
||
|
|
||
|
@staticmethod
|
||
|
def new_coreProperties():
|
||
|
"""Return a new ``<cp:coreProperties>`` element"""
|
||
|
xml = CT_CoreProperties._coreProperties_tmpl
|
||
|
coreProperties = parse_xml(xml)
|
||
|
return coreProperties
|
||
|
|
||
|
@property
|
||
|
def author_text(self):
|
||
|
return self._text_of_element("creator")
|
||
|
|
||
|
@author_text.setter
|
||
|
def author_text(self, value):
|
||
|
self._set_element_text("creator", value)
|
||
|
|
||
|
@property
|
||
|
def category_text(self):
|
||
|
return self._text_of_element("category")
|
||
|
|
||
|
@category_text.setter
|
||
|
def category_text(self, value):
|
||
|
self._set_element_text("category", value)
|
||
|
|
||
|
@property
|
||
|
def comments_text(self):
|
||
|
return self._text_of_element("description")
|
||
|
|
||
|
@comments_text.setter
|
||
|
def comments_text(self, value):
|
||
|
self._set_element_text("description", value)
|
||
|
|
||
|
@property
|
||
|
def contentStatus_text(self):
|
||
|
return self._text_of_element("contentStatus")
|
||
|
|
||
|
@contentStatus_text.setter
|
||
|
def contentStatus_text(self, value):
|
||
|
self._set_element_text("contentStatus", value)
|
||
|
|
||
|
@property
|
||
|
def created_datetime(self):
|
||
|
return self._datetime_of_element("created")
|
||
|
|
||
|
@created_datetime.setter
|
||
|
def created_datetime(self, value):
|
||
|
self._set_element_datetime("created", value)
|
||
|
|
||
|
@property
|
||
|
def identifier_text(self):
|
||
|
return self._text_of_element("identifier")
|
||
|
|
||
|
@identifier_text.setter
|
||
|
def identifier_text(self, value):
|
||
|
self._set_element_text("identifier", value)
|
||
|
|
||
|
@property
|
||
|
def keywords_text(self):
|
||
|
return self._text_of_element("keywords")
|
||
|
|
||
|
@keywords_text.setter
|
||
|
def keywords_text(self, value):
|
||
|
self._set_element_text("keywords", value)
|
||
|
|
||
|
@property
|
||
|
def language_text(self):
|
||
|
return self._text_of_element("language")
|
||
|
|
||
|
@language_text.setter
|
||
|
def language_text(self, value):
|
||
|
self._set_element_text("language", value)
|
||
|
|
||
|
@property
|
||
|
def lastModifiedBy_text(self):
|
||
|
return self._text_of_element("lastModifiedBy")
|
||
|
|
||
|
@lastModifiedBy_text.setter
|
||
|
def lastModifiedBy_text(self, value):
|
||
|
self._set_element_text("lastModifiedBy", value)
|
||
|
|
||
|
@property
|
||
|
def lastPrinted_datetime(self):
|
||
|
return self._datetime_of_element("lastPrinted")
|
||
|
|
||
|
@lastPrinted_datetime.setter
|
||
|
def lastPrinted_datetime(self, value):
|
||
|
self._set_element_datetime("lastPrinted", value)
|
||
|
|
||
|
@property
|
||
|
def modified_datetime(self):
|
||
|
return self._datetime_of_element("modified")
|
||
|
|
||
|
@modified_datetime.setter
|
||
|
def modified_datetime(self, value):
|
||
|
self._set_element_datetime("modified", value)
|
||
|
|
||
|
@property
|
||
|
def revision_number(self):
|
||
|
"""
|
||
|
Integer value of revision property.
|
||
|
"""
|
||
|
revision = self.revision
|
||
|
if revision is None:
|
||
|
return 0
|
||
|
revision_str = revision.text
|
||
|
try:
|
||
|
revision = int(revision_str)
|
||
|
except ValueError:
|
||
|
# non-integer revision strings also resolve to 0
|
||
|
revision = 0
|
||
|
# as do negative integers
|
||
|
if revision < 0:
|
||
|
revision = 0
|
||
|
return revision
|
||
|
|
||
|
@revision_number.setter
|
||
|
def revision_number(self, value):
|
||
|
"""
|
||
|
Set revision property to string value of integer *value*.
|
||
|
"""
|
||
|
if not isinstance(value, int) or value < 1:
|
||
|
tmpl = "revision property requires positive int, got '%s'"
|
||
|
raise ValueError(tmpl % value)
|
||
|
revision = self.get_or_add_revision()
|
||
|
revision.text = str(value)
|
||
|
|
||
|
@property
|
||
|
def subject_text(self):
|
||
|
return self._text_of_element("subject")
|
||
|
|
||
|
@subject_text.setter
|
||
|
def subject_text(self, value):
|
||
|
self._set_element_text("subject", value)
|
||
|
|
||
|
@property
|
||
|
def title_text(self):
|
||
|
return self._text_of_element("title")
|
||
|
|
||
|
@title_text.setter
|
||
|
def title_text(self, value):
|
||
|
self._set_element_text("title", value)
|
||
|
|
||
|
@property
|
||
|
def version_text(self):
|
||
|
return self._text_of_element("version")
|
||
|
|
||
|
@version_text.setter
|
||
|
def version_text(self, value):
|
||
|
self._set_element_text("version", value)
|
||
|
|
||
|
def _datetime_of_element(self, property_name):
|
||
|
element = getattr(self, property_name)
|
||
|
if element is None:
|
||
|
return None
|
||
|
datetime_str = element.text
|
||
|
try:
|
||
|
return self._parse_W3CDTF_to_datetime(datetime_str)
|
||
|
except ValueError:
|
||
|
# invalid datetime strings are ignored
|
||
|
return None
|
||
|
|
||
|
def _get_or_add(self, prop_name):
|
||
|
"""
|
||
|
Return element returned by 'get_or_add_' method for *prop_name*.
|
||
|
"""
|
||
|
get_or_add_method_name = "get_or_add_%s" % prop_name
|
||
|
get_or_add_method = getattr(self, get_or_add_method_name)
|
||
|
element = get_or_add_method()
|
||
|
return element
|
||
|
|
||
|
@classmethod
|
||
|
def _offset_dt(cls, dt, offset_str):
|
||
|
"""
|
||
|
Return a |datetime| instance that is offset from datetime *dt* by
|
||
|
the timezone offset specified in *offset_str*, a string like
|
||
|
``'-07:00'``.
|
||
|
"""
|
||
|
match = cls._offset_pattern.match(offset_str)
|
||
|
if match is None:
|
||
|
raise ValueError("'%s' is not a valid offset string" % offset_str)
|
||
|
sign, hours_str, minutes_str = match.groups()
|
||
|
sign_factor = -1 if sign == "+" else 1
|
||
|
hours = int(hours_str) * sign_factor
|
||
|
minutes = int(minutes_str) * sign_factor
|
||
|
td = timedelta(hours=hours, minutes=minutes)
|
||
|
return dt + td
|
||
|
|
||
|
_offset_pattern = re.compile(r"([+-])(\d\d):(\d\d)")
|
||
|
|
||
|
@classmethod
|
||
|
def _parse_W3CDTF_to_datetime(cls, w3cdtf_str):
|
||
|
# valid W3CDTF date cases:
|
||
|
# yyyy e.g. '2003'
|
||
|
# yyyy-mm e.g. '2003-12'
|
||
|
# yyyy-mm-dd e.g. '2003-12-31'
|
||
|
# UTC timezone e.g. '2003-12-31T10:14:55Z'
|
||
|
# numeric timezone e.g. '2003-12-31T10:14:55-08:00'
|
||
|
templates = ("%Y-%m-%dT%H:%M:%S", "%Y-%m-%d", "%Y-%m", "%Y")
|
||
|
# strptime isn't smart enough to parse literal timezone offsets like
|
||
|
# '-07:30', so we have to do it ourselves
|
||
|
parseable_part = w3cdtf_str[:19]
|
||
|
offset_str = w3cdtf_str[19:]
|
||
|
dt = None
|
||
|
for tmpl in templates:
|
||
|
try:
|
||
|
dt = datetime.strptime(parseable_part, tmpl)
|
||
|
except ValueError:
|
||
|
continue
|
||
|
if dt is None:
|
||
|
tmpl = "could not parse W3CDTF datetime string '%s'"
|
||
|
raise ValueError(tmpl % w3cdtf_str)
|
||
|
if len(offset_str) == 6:
|
||
|
return cls._offset_dt(dt, offset_str)
|
||
|
return dt
|
||
|
|
||
|
def _set_element_datetime(self, prop_name, value):
|
||
|
"""
|
||
|
Set date/time value of child element having *prop_name* to *value*.
|
||
|
"""
|
||
|
if not isinstance(value, datetime):
|
||
|
tmpl = "property requires <type 'datetime.datetime'> object, got %s"
|
||
|
raise ValueError(tmpl % type(value))
|
||
|
element = self._get_or_add(prop_name)
|
||
|
dt_str = value.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||
|
element.text = dt_str
|
||
|
if prop_name in ("created", "modified"):
|
||
|
# These two require an explicit 'xsi:type="dcterms:W3CDTF"'
|
||
|
# attribute. The first and last line are a hack required to add
|
||
|
# the xsi namespace to the root element rather than each child
|
||
|
# element in which it is referenced
|
||
|
self.set(qn("xsi:foo"), "bar")
|
||
|
element.set(qn("xsi:type"), "dcterms:W3CDTF")
|
||
|
del self.attrib[qn("xsi:foo")]
|
||
|
|
||
|
def _set_element_text(self, prop_name, value):
|
||
|
"""
|
||
|
Set string value of *name* property to *value*.
|
||
|
"""
|
||
|
value = str(value)
|
||
|
if len(value) > 255:
|
||
|
tmpl = "exceeded 255 char limit for property, got:\n\n'%s'"
|
||
|
raise ValueError(tmpl % value)
|
||
|
element = self._get_or_add(prop_name)
|
||
|
element.text = value
|
||
|
|
||
|
def _text_of_element(self, property_name):
|
||
|
element = getattr(self, property_name)
|
||
|
if element is None:
|
||
|
return ""
|
||
|
if element.text is None:
|
||
|
return ""
|
||
|
return element.text
|