Yet a lot more python files that somehow svn just refused to add
git-svn-id: http://google-refine.googlecode.com/svn/trunk@962 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
7767536292
commit
3f58d88922
380
extensions/jython/module/MOD-INF/lib/jython/xml/Uri.py
Normal file
380
extensions/jython/module/MOD-INF/lib/jython/xml/Uri.py
Normal file
@ -0,0 +1,380 @@
|
|||||||
|
# pylint: disable-msg=C0103
|
||||||
|
#
|
||||||
|
# backported code from 4Suite with slight modifications, started from r1.89 of
|
||||||
|
# Ft/Lib/Uri.py, by syt@logilab.fr on 2005-02-09
|
||||||
|
#
|
||||||
|
# part if not all of this code should probably move to urlparse (or be used
|
||||||
|
# to fix some existant functions in this module)
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# Copyright 2004 Fourthought, Inc. (USA).
|
||||||
|
# Detailed license and copyright information: http://4suite.org/COPYRIGHT
|
||||||
|
# Project home, documentation, distributions: http://4suite.org/
|
||||||
|
import os.path
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
import urlparse, urllib, urllib2
|
||||||
|
|
||||||
|
def UnsplitUriRef(uriRefSeq):
|
||||||
|
"""should replace urlparse.urlunsplit
|
||||||
|
|
||||||
|
Given a sequence as would be produced by SplitUriRef(), assembles and
|
||||||
|
returns a URI reference as a string.
|
||||||
|
"""
|
||||||
|
if not isinstance(uriRefSeq, (tuple, list)):
|
||||||
|
raise TypeError("sequence expected, got %s" % type(uriRefSeq))
|
||||||
|
(scheme, authority, path, query, fragment) = uriRefSeq
|
||||||
|
uri = ''
|
||||||
|
if scheme is not None:
|
||||||
|
uri += scheme + ':'
|
||||||
|
if authority is not None:
|
||||||
|
uri += '//' + authority
|
||||||
|
uri += path
|
||||||
|
if query is not None:
|
||||||
|
uri += '?' + query
|
||||||
|
if fragment is not None:
|
||||||
|
uri += '#' + fragment
|
||||||
|
return uri
|
||||||
|
|
||||||
|
SPLIT_URI_REF_PATTERN = re.compile(r"^(?:(?P<scheme>[^:/?#]+):)?(?://(?P<authority>[^/?#]*))?(?P<path>[^?#]*)(?:\?(?P<query>[^#]*))?(?:#(?P<fragment>.*))?$")
|
||||||
|
|
||||||
|
def SplitUriRef(uriref):
|
||||||
|
"""should replace urlparse.urlsplit
|
||||||
|
|
||||||
|
Given a valid URI reference as a string, returns a tuple representing the
|
||||||
|
generic URI components, as per RFC 2396 appendix B. The tuple's structure
|
||||||
|
is (scheme, authority, path, query, fragment).
|
||||||
|
|
||||||
|
All values will be strings (possibly empty) or None if undefined.
|
||||||
|
|
||||||
|
Note that per rfc3986, there is no distinction between a path and
|
||||||
|
an "opaque part", as there was in RFC 2396.
|
||||||
|
"""
|
||||||
|
# the pattern will match every possible string, so it's safe to
|
||||||
|
# assume there's a groupdict method to call.
|
||||||
|
g = SPLIT_URI_REF_PATTERN.match(uriref).groupdict()
|
||||||
|
scheme = g['scheme']
|
||||||
|
authority = g['authority']
|
||||||
|
path = g['path']
|
||||||
|
query = g['query']
|
||||||
|
fragment = g['fragment']
|
||||||
|
return (scheme, authority, path, query, fragment)
|
||||||
|
|
||||||
|
|
||||||
|
def Absolutize(uriRef, baseUri):
|
||||||
|
"""
|
||||||
|
Resolves a URI reference to absolute form, effecting the result of RFC
|
||||||
|
3986 section 5. The URI reference is considered to be relative to the
|
||||||
|
given base URI.
|
||||||
|
|
||||||
|
It is the caller's responsibility to ensure that the base URI matches
|
||||||
|
the absolute-URI syntax rule of RFC 3986, and that its path component
|
||||||
|
does not contain '.' or '..' segments if the scheme is hierarchical.
|
||||||
|
Unexpected results may occur otherwise.
|
||||||
|
|
||||||
|
This function only conducts a minimal sanity check in order to determine
|
||||||
|
if relative resolution is possible: it raises a UriException if the base
|
||||||
|
URI does not have a scheme component. While it is true that the base URI
|
||||||
|
is irrelevant if the URI reference has a scheme, an exception is raised
|
||||||
|
in order to signal that the given string does not even come close to
|
||||||
|
meeting the criteria to be usable as a base URI.
|
||||||
|
|
||||||
|
It is the caller's responsibility to make a determination of whether the
|
||||||
|
URI reference constitutes a "same-document reference", as defined in RFC
|
||||||
|
2396 or RFC 3986. As per the spec, dereferencing a same-document
|
||||||
|
reference "should not" involve retrieval of a new representation of the
|
||||||
|
referenced resource. Note that the two specs have different definitions
|
||||||
|
of same-document reference: RFC 2396 says it is *only* the cases where the
|
||||||
|
reference is the empty string, or "#" followed by a fragment; RFC 3986
|
||||||
|
requires making a comparison of the base URI to the absolute form of the
|
||||||
|
reference (as is returned by the spec), minus its fragment component,
|
||||||
|
if any.
|
||||||
|
|
||||||
|
This function is similar to urlparse.urljoin() and urllib.basejoin().
|
||||||
|
Those functions, however, are (as of Python 2.3) outdated, buggy, and/or
|
||||||
|
designed to produce results acceptable for use with other core Python
|
||||||
|
libraries, rather than being earnest implementations of the relevant
|
||||||
|
specs. Their problems are most noticeable in their handling of
|
||||||
|
same-document references and 'file:' URIs, both being situations that
|
||||||
|
come up far too often to consider the functions reliable enough for
|
||||||
|
general use.
|
||||||
|
"""
|
||||||
|
# Reasons to avoid using urllib.basejoin() and urlparse.urljoin():
|
||||||
|
# - Both are partial implementations of long-obsolete specs.
|
||||||
|
# - Both accept relative URLs as the base, which no spec allows.
|
||||||
|
# - urllib.basejoin() mishandles the '' and '..' references.
|
||||||
|
# - If the base URL uses a non-hierarchical or relative path,
|
||||||
|
# or if the URL scheme is unrecognized, the result is not
|
||||||
|
# always as expected (partly due to issues in RFC 1808).
|
||||||
|
# - If the authority component of a 'file' URI is empty,
|
||||||
|
# the authority component is removed altogether. If it was
|
||||||
|
# not present, an empty authority component is in the result.
|
||||||
|
# - '.' and '..' segments are not always collapsed as well as they
|
||||||
|
# should be (partly due to issues in RFC 1808).
|
||||||
|
# - Effective Python 2.4, urllib.basejoin() *is* urlparse.urljoin(),
|
||||||
|
# but urlparse.urljoin() is still based on RFC 1808.
|
||||||
|
|
||||||
|
# This procedure is based on the pseudocode in RFC 3986 sec. 5.2.
|
||||||
|
#
|
||||||
|
# ensure base URI is absolute
|
||||||
|
if not baseUri:
|
||||||
|
raise ValueError('baseUri is required and must be a non empty string')
|
||||||
|
if not IsAbsolute(baseUri):
|
||||||
|
raise ValueError('%r is not an absolute URI' % baseUri)
|
||||||
|
# shortcut for the simplest same-document reference cases
|
||||||
|
if uriRef == '' or uriRef[0] == '#':
|
||||||
|
return baseUri.split('#')[0] + uriRef
|
||||||
|
# ensure a clean slate
|
||||||
|
tScheme = tAuth = tPath = tQuery = None
|
||||||
|
# parse the reference into its components
|
||||||
|
(rScheme, rAuth, rPath, rQuery, rFrag) = SplitUriRef(uriRef)
|
||||||
|
# if the reference is absolute, eliminate '.' and '..' path segments
|
||||||
|
# and skip to the end
|
||||||
|
if rScheme is not None:
|
||||||
|
tScheme = rScheme
|
||||||
|
tAuth = rAuth
|
||||||
|
tPath = RemoveDotSegments(rPath)
|
||||||
|
tQuery = rQuery
|
||||||
|
else:
|
||||||
|
# the base URI's scheme, and possibly more, will be inherited
|
||||||
|
(bScheme, bAuth, bPath, bQuery, bFrag) = SplitUriRef(baseUri)
|
||||||
|
# if the reference is a net-path, just eliminate '.' and '..' path
|
||||||
|
# segments; no other changes needed.
|
||||||
|
if rAuth is not None:
|
||||||
|
tAuth = rAuth
|
||||||
|
tPath = RemoveDotSegments(rPath)
|
||||||
|
tQuery = rQuery
|
||||||
|
# if it's not a net-path, we need to inherit pieces of the base URI
|
||||||
|
else:
|
||||||
|
# use base URI's path if the reference's path is empty
|
||||||
|
if not rPath:
|
||||||
|
tPath = bPath
|
||||||
|
# use the reference's query, if any, or else the base URI's,
|
||||||
|
tQuery = rQuery is not None and rQuery or bQuery
|
||||||
|
# the reference's path is not empty
|
||||||
|
else:
|
||||||
|
# just use the reference's path if it's absolute
|
||||||
|
if rPath[0] == '/':
|
||||||
|
tPath = RemoveDotSegments(rPath)
|
||||||
|
# merge the reference's relative path with the base URI's path
|
||||||
|
else:
|
||||||
|
if bAuth is not None and not bPath:
|
||||||
|
tPath = '/' + rPath
|
||||||
|
else:
|
||||||
|
tPath = bPath[:bPath.rfind('/')+1] + rPath
|
||||||
|
tPath = RemoveDotSegments(tPath)
|
||||||
|
# use the reference's query
|
||||||
|
tQuery = rQuery
|
||||||
|
# since the reference isn't a net-path,
|
||||||
|
# use the authority from the base URI
|
||||||
|
tAuth = bAuth
|
||||||
|
# inherit the scheme from the base URI
|
||||||
|
tScheme = bScheme
|
||||||
|
# always use the reference's fragment (but no need to define another var)
|
||||||
|
#tFrag = rFrag
|
||||||
|
|
||||||
|
# now compose the target URI (RFC 3986 sec. 5.3)
|
||||||
|
return UnsplitUriRef((tScheme, tAuth, tPath, tQuery, rFrag))
|
||||||
|
|
||||||
|
|
||||||
|
REG_NAME_HOST_PATTERN = re.compile(r"^(?:(?:[0-9A-Za-z\-_\.!~*'();&=+$,]|(?:%[0-9A-Fa-f]{2}))*)$")
|
||||||
|
|
||||||
|
def MakeUrllibSafe(uriRef):
|
||||||
|
"""
|
||||||
|
Makes the given RFC 3986-conformant URI reference safe for passing
|
||||||
|
to legacy urllib functions. The result may not be a valid URI.
|
||||||
|
|
||||||
|
As of Python 2.3.3, urllib.urlopen() does not fully support
|
||||||
|
internationalized domain names, it does not strip fragment components,
|
||||||
|
and on Windows, it expects file URIs to use '|' instead of ':' in the
|
||||||
|
path component corresponding to the drivespec. It also relies on
|
||||||
|
urllib.unquote(), which mishandles unicode arguments. This function
|
||||||
|
produces a URI reference that will work around these issues, although
|
||||||
|
the IDN workaround is limited to Python 2.3 only. May raise a
|
||||||
|
UnicodeEncodeError if the URI reference is Unicode and erroneously
|
||||||
|
contains non-ASCII characters.
|
||||||
|
"""
|
||||||
|
# IDN support requires decoding any percent-encoded octets in the
|
||||||
|
# host part (if it's a reg-name) of the authority component, and when
|
||||||
|
# doing DNS lookups, applying IDNA encoding to that string first.
|
||||||
|
# As of Python 2.3, there is an IDNA codec, and the socket and httplib
|
||||||
|
# modules accept Unicode strings and apply IDNA encoding automatically
|
||||||
|
# where necessary. However, urllib.urlopen() has not yet been updated
|
||||||
|
# to do the same; it raises an exception if you give it a Unicode
|
||||||
|
# string, and does no conversion on non-Unicode strings, meaning you
|
||||||
|
# have to give it an IDNA string yourself. We will only support it on
|
||||||
|
# Python 2.3 and up.
|
||||||
|
#
|
||||||
|
# see if host is a reg-name, as opposed to IPv4 or IPv6 addr.
|
||||||
|
if isinstance(uriRef, unicode):
|
||||||
|
try:
|
||||||
|
uriRef = uriRef.encode('us-ascii') # parts of urllib are not unicode safe
|
||||||
|
except UnicodeError:
|
||||||
|
raise ValueError("uri %r must consist of ASCII characters." % uriRef)
|
||||||
|
(scheme, auth, path, query, frag) = urlparse.urlsplit(uriRef)
|
||||||
|
if auth and auth.find('@') > -1:
|
||||||
|
userinfo, hostport = auth.split('@')
|
||||||
|
else:
|
||||||
|
userinfo = None
|
||||||
|
hostport = auth
|
||||||
|
if hostport and hostport.find(':') > -1:
|
||||||
|
host, port = hostport.split(':')
|
||||||
|
else:
|
||||||
|
host = hostport
|
||||||
|
port = None
|
||||||
|
if host and REG_NAME_HOST_PATTERN.match(host):
|
||||||
|
# percent-encoded hostnames will always fail DNS lookups
|
||||||
|
host = urllib.unquote(host) #PercentDecode(host)
|
||||||
|
# IDNA-encode if possible.
|
||||||
|
# We shouldn't do this for schemes that don't need DNS lookup,
|
||||||
|
# but are there any (that you'd be calling urlopen for)?
|
||||||
|
if sys.version_info[0:2] >= (2, 3):
|
||||||
|
if isinstance(host, str):
|
||||||
|
host = host.decode('utf-8')
|
||||||
|
host = host.encode('idna')
|
||||||
|
# reassemble the authority with the new hostname
|
||||||
|
# (percent-decoded, and possibly IDNA-encoded)
|
||||||
|
auth = ''
|
||||||
|
if userinfo:
|
||||||
|
auth += userinfo + '@'
|
||||||
|
auth += host
|
||||||
|
if port:
|
||||||
|
auth += ':' + port
|
||||||
|
|
||||||
|
# On Windows, ensure that '|', not ':', is used in a drivespec.
|
||||||
|
if os.name == 'nt' and scheme == 'file':
|
||||||
|
path = path.replace(':', '|', 1)
|
||||||
|
|
||||||
|
# Note that we drop fragment, if any. See RFC 3986 sec. 3.5.
|
||||||
|
uri = urlparse.urlunsplit((scheme, auth, path, query, None))
|
||||||
|
|
||||||
|
return uri
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def BaseJoin(base, uriRef):
|
||||||
|
"""
|
||||||
|
Merges a base URI reference with another URI reference, returning a
|
||||||
|
new URI reference.
|
||||||
|
|
||||||
|
It behaves exactly the same as Absolutize(), except the arguments
|
||||||
|
are reversed, and it accepts any URI reference (even a relative URI)
|
||||||
|
as the base URI. If the base has no scheme component, it is
|
||||||
|
evaluated as if it did, and then the scheme component of the result
|
||||||
|
is removed from the result, unless the uriRef had a scheme. Thus, if
|
||||||
|
neither argument has a scheme component, the result won't have one.
|
||||||
|
|
||||||
|
This function is named BaseJoin because it is very much like
|
||||||
|
urllib.basejoin(), but it follows the current rfc3986 algorithms
|
||||||
|
for path merging, dot segment elimination, and inheritance of query
|
||||||
|
and fragment components.
|
||||||
|
|
||||||
|
WARNING: This function exists for 2 reasons: (1) because of a need
|
||||||
|
within the 4Suite repository to perform URI reference absolutization
|
||||||
|
using base URIs that are stored (inappropriately) as absolute paths
|
||||||
|
in the subjects of statements in the RDF model, and (2) because of
|
||||||
|
a similar need to interpret relative repo paths in a 4Suite product
|
||||||
|
setup.xml file as being relative to a path that can be set outside
|
||||||
|
the document. When these needs go away, this function probably will,
|
||||||
|
too, so it is not advisable to use it.
|
||||||
|
"""
|
||||||
|
if IsAbsolute(base):
|
||||||
|
return Absolutize(uriRef, base)
|
||||||
|
else:
|
||||||
|
dummyscheme = 'basejoin'
|
||||||
|
res = Absolutize(uriRef, '%s:%s' % (dummyscheme, base))
|
||||||
|
if IsAbsolute(uriRef):
|
||||||
|
# scheme will be inherited from uriRef
|
||||||
|
return res
|
||||||
|
else:
|
||||||
|
# no scheme in, no scheme out
|
||||||
|
return res[len(dummyscheme)+1:]
|
||||||
|
|
||||||
|
|
||||||
|
def RemoveDotSegments(path):
|
||||||
|
"""
|
||||||
|
Supports Absolutize() by implementing the remove_dot_segments function
|
||||||
|
described in RFC 3986 sec. 5.2. It collapses most of the '.' and '..'
|
||||||
|
segments out of a path without eliminating empty segments. It is intended
|
||||||
|
to be used during the path merging process and may not give expected
|
||||||
|
results when used independently. Use NormalizePathSegments() or
|
||||||
|
NormalizePathSegmentsInUri() if more general normalization is desired.
|
||||||
|
|
||||||
|
semi-private because it is not for general use. I've implemented it
|
||||||
|
using two segment stacks, as alluded to in the spec, rather than the
|
||||||
|
explicit string-walking algorithm that would be too inefficient. (mbrown)
|
||||||
|
"""
|
||||||
|
# return empty string if entire path is just "." or ".."
|
||||||
|
if path == '.' or path == '..':
|
||||||
|
return path[0:0] # preserves string type
|
||||||
|
# remove all "./" or "../" segments at the beginning
|
||||||
|
while path:
|
||||||
|
if path[:2] == './':
|
||||||
|
path = path[2:]
|
||||||
|
elif path[:3] == '../':
|
||||||
|
path = path[3:]
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
# We need to keep track of whether there was a leading slash,
|
||||||
|
# because we're going to drop it in order to prevent our list of
|
||||||
|
# segments from having an ambiguous empty first item when we call
|
||||||
|
# split().
|
||||||
|
leading_slash = 0
|
||||||
|
if path[:1] == '/':
|
||||||
|
path = path[1:]
|
||||||
|
leading_slash = 1
|
||||||
|
# replace a trailing "/." with just "/"
|
||||||
|
if path[-2:] == '/.':
|
||||||
|
path = path[:-1]
|
||||||
|
# convert the segments into a list and process each segment in
|
||||||
|
# order from left to right.
|
||||||
|
segments = path.split('/')
|
||||||
|
keepers = []
|
||||||
|
segments.reverse()
|
||||||
|
while segments:
|
||||||
|
seg = segments.pop()
|
||||||
|
# '..' means drop the previous kept segment, if any.
|
||||||
|
# If none, and if the path is relative, then keep the '..'.
|
||||||
|
# If the '..' was the last segment, ensure
|
||||||
|
# that the result ends with '/'.
|
||||||
|
if seg == '..':
|
||||||
|
if keepers:
|
||||||
|
keepers.pop()
|
||||||
|
elif not leading_slash:
|
||||||
|
keepers.append(seg)
|
||||||
|
if not segments:
|
||||||
|
keepers.append('')
|
||||||
|
# ignore '.' segments and keep all others, even empty ones
|
||||||
|
elif seg != '.':
|
||||||
|
keepers.append(seg)
|
||||||
|
# reassemble the kept segments
|
||||||
|
return leading_slash * '/' + '/'.join(keepers)
|
||||||
|
|
||||||
|
|
||||||
|
SCHEME_PATTERN = re.compile(r'([a-zA-Z][a-zA-Z0-9+\-.]*):')
|
||||||
|
def GetScheme(uriRef):
|
||||||
|
"""
|
||||||
|
Obtains, with optimum efficiency, just the scheme from a URI reference.
|
||||||
|
Returns a string, or if no scheme could be found, returns None.
|
||||||
|
"""
|
||||||
|
# Using a regex seems to be the best option. Called 50,000 times on
|
||||||
|
# different URIs, on a 1.0-GHz PIII with FreeBSD 4.7 and Python
|
||||||
|
# 2.2.1, this method completed in 0.95s, and 0.05s if there was no
|
||||||
|
# scheme to find. By comparison,
|
||||||
|
# urllib.splittype()[0] took 1.5s always;
|
||||||
|
# Ft.Lib.Uri.SplitUriRef()[0] took 2.5s always;
|
||||||
|
# urlparse.urlparse()[0] took 3.5s always.
|
||||||
|
m = SCHEME_PATTERN.match(uriRef)
|
||||||
|
if m is None:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return m.group(1)
|
||||||
|
|
||||||
|
|
||||||
|
def IsAbsolute(identifier):
|
||||||
|
"""
|
||||||
|
Given a string believed to be a URI or URI reference, tests that it is
|
||||||
|
absolute (as per RFC 2396), not relative -- i.e., that it has a scheme.
|
||||||
|
"""
|
||||||
|
# We do it this way to avoid compiling another massive regex.
|
||||||
|
return GetScheme(identifier) is not None
|
@ -0,0 +1 @@
|
|||||||
|
"Directory for SAX version 2 drivers."
|
@ -0,0 +1,333 @@
|
|||||||
|
"""
|
||||||
|
SAX driver for the Java SAX parsers. Can only be used in Jython.
|
||||||
|
|
||||||
|
$Id: drv_javasax.py,v 1.5 2003/01/26 09:08:51 loewis Exp $
|
||||||
|
"""
|
||||||
|
|
||||||
|
# --- Initialization
|
||||||
|
|
||||||
|
version = "0.10"
|
||||||
|
revision = "$Revision: 1.5 $"
|
||||||
|
|
||||||
|
import string
|
||||||
|
from xml.sax import xmlreader, saxutils
|
||||||
|
from xml.sax.handler import feature_namespaces, feature_namespace_prefixes
|
||||||
|
from xml.sax import _exceptions
|
||||||
|
|
||||||
|
# we only work in jython
|
||||||
|
import sys
|
||||||
|
if sys.platform[:4] != "java":
|
||||||
|
raise _exceptions.SAXReaderNotAvailable("drv_javasax not available in CPython", None)
|
||||||
|
del sys
|
||||||
|
|
||||||
|
# get the necessary Java SAX classes
|
||||||
|
try:
|
||||||
|
from org.python.core import FilelikeInputStream
|
||||||
|
from org.xml.sax.helpers import XMLReaderFactory
|
||||||
|
from org.xml import sax as javasax
|
||||||
|
except ImportError:
|
||||||
|
raise _exceptions.SAXReaderNotAvailable("SAX is not on the classpath", None)
|
||||||
|
|
||||||
|
# get some JAXP stuff
|
||||||
|
try:
|
||||||
|
from javax.xml.parsers import SAXParserFactory, ParserConfigurationException
|
||||||
|
factory = SAXParserFactory.newInstance()
|
||||||
|
jaxp = 1
|
||||||
|
except ImportError:
|
||||||
|
jaxp = 0
|
||||||
|
|
||||||
|
from java.lang import String
|
||||||
|
|
||||||
|
|
||||||
|
def _wrap_sax_exception(e):
|
||||||
|
return _exceptions.SAXParseException(e.message,
|
||||||
|
e.exception,
|
||||||
|
SimpleLocator(e.columnNumber,
|
||||||
|
e.lineNumber,
|
||||||
|
e.publicId,
|
||||||
|
e.systemId))
|
||||||
|
|
||||||
|
class JyErrorHandlerWrapper(javasax.ErrorHandler):
|
||||||
|
def __init__(self, err_handler):
|
||||||
|
self._err_handler = err_handler
|
||||||
|
|
||||||
|
def error(self, exc):
|
||||||
|
self._err_handler.error(_wrap_sax_exception(exc))
|
||||||
|
|
||||||
|
def fatalError(self, exc):
|
||||||
|
self._err_handler.fatalError(_wrap_sax_exception(exc))
|
||||||
|
|
||||||
|
def warning(self, exc):
|
||||||
|
self._err_handler.warning(_wrap_sax_exception(exc))
|
||||||
|
|
||||||
|
class JyInputSourceWrapper(javasax.InputSource):
|
||||||
|
def __init__(self, source):
|
||||||
|
if isinstance(source, str):
|
||||||
|
javasax.InputSource.__init__(self, source)
|
||||||
|
elif hasattr(source, "read"):#file like object
|
||||||
|
f = source
|
||||||
|
javasax.InputSource.__init__(self, FilelikeInputStream(f))
|
||||||
|
if hasattr(f, "name"):
|
||||||
|
self.setSystemId(f.name)
|
||||||
|
else:#xml.sax.xmlreader.InputSource object
|
||||||
|
#Use byte stream constructor if possible so that Xerces won't attempt to open
|
||||||
|
#the url at systemId unless it's really there
|
||||||
|
if source.getByteStream():
|
||||||
|
javasax.InputSource.__init__(self,
|
||||||
|
FilelikeInputStream(source.getByteStream()))
|
||||||
|
else:
|
||||||
|
javasax.InputSource.__init__(self)
|
||||||
|
if source.getSystemId():
|
||||||
|
self.setSystemId(source.getSystemId())
|
||||||
|
self.setPublicId(source.getPublicId())
|
||||||
|
self.setEncoding(source.getEncoding())
|
||||||
|
|
||||||
|
class JyEntityResolverWrapper(javasax.EntityResolver):
|
||||||
|
def __init__(self, entityResolver):
|
||||||
|
self._resolver = entityResolver
|
||||||
|
|
||||||
|
def resolveEntity(self, pubId, sysId):
|
||||||
|
return JyInputSourceWrapper(self._resolver.resolveEntity(pubId, sysId))
|
||||||
|
|
||||||
|
class JyDTDHandlerWrapper(javasax.DTDHandler):
|
||||||
|
def __init__(self, dtdHandler):
|
||||||
|
self._handler = dtdHandler
|
||||||
|
|
||||||
|
def notationDecl(self, name, publicId, systemId):
|
||||||
|
self._handler.notationDecl(name, publicId, systemId)
|
||||||
|
|
||||||
|
def unparsedEntityDecl(self, name, publicId, systemId, notationName):
|
||||||
|
self._handler.unparsedEntityDecl(name, publicId, systemId, notationName)
|
||||||
|
|
||||||
|
class SimpleLocator(xmlreader.Locator):
|
||||||
|
def __init__(self, colNum, lineNum, pubId, sysId):
|
||||||
|
self.colNum = colNum
|
||||||
|
self.lineNum = lineNum
|
||||||
|
self.pubId = pubId
|
||||||
|
self.sysId = sysId
|
||||||
|
|
||||||
|
def getColumnNumber(self):
|
||||||
|
return self.colNum
|
||||||
|
|
||||||
|
def getLineNumber(self):
|
||||||
|
return self.lineNum
|
||||||
|
|
||||||
|
def getPublicId(self):
|
||||||
|
return self.pubId
|
||||||
|
|
||||||
|
def getSystemId(self):
|
||||||
|
return self.sysId
|
||||||
|
|
||||||
|
# --- JavaSAXParser
|
||||||
|
class JavaSAXParser(xmlreader.XMLReader, javasax.ContentHandler):
|
||||||
|
"SAX driver for the Java SAX parsers."
|
||||||
|
|
||||||
|
def __init__(self, jdriver = None):
|
||||||
|
xmlreader.XMLReader.__init__(self)
|
||||||
|
self._parser = create_java_parser(jdriver)
|
||||||
|
self._parser.setFeature(feature_namespaces, 0)
|
||||||
|
self._parser.setFeature(feature_namespace_prefixes, 0)
|
||||||
|
self._parser.setContentHandler(self)
|
||||||
|
self._nsattrs = AttributesNSImpl()
|
||||||
|
self._attrs = AttributesImpl()
|
||||||
|
self.setEntityResolver(self.getEntityResolver())
|
||||||
|
self.setErrorHandler(self.getErrorHandler())
|
||||||
|
self.setDTDHandler(self.getDTDHandler())
|
||||||
|
|
||||||
|
# XMLReader methods
|
||||||
|
|
||||||
|
def parse(self, source):
|
||||||
|
"Parse an XML document from a URL or an InputSource."
|
||||||
|
self._parser.parse(JyInputSourceWrapper(source))
|
||||||
|
|
||||||
|
def getFeature(self, name):
|
||||||
|
return self._parser.getFeature(name)
|
||||||
|
|
||||||
|
def setFeature(self, name, state):
|
||||||
|
self._parser.setFeature(name, state)
|
||||||
|
|
||||||
|
def getProperty(self, name):
|
||||||
|
return self._parser.getProperty(name)
|
||||||
|
|
||||||
|
def setProperty(self, name, value):
|
||||||
|
self._parser.setProperty(name, value)
|
||||||
|
|
||||||
|
def setEntityResolver(self, resolver):
|
||||||
|
self._parser.entityResolver = JyEntityResolverWrapper(resolver)
|
||||||
|
xmlreader.XMLReader.setEntityResolver(self, resolver)
|
||||||
|
|
||||||
|
def setErrorHandler(self, err_handler):
|
||||||
|
self._parser.errorHandler = JyErrorHandlerWrapper(err_handler)
|
||||||
|
xmlreader.XMLReader.setErrorHandler(self, err_handler)
|
||||||
|
|
||||||
|
def setDTDHandler(self, dtd_handler):
|
||||||
|
self._parser.setDTDHandler(JyDTDHandlerWrapper(dtd_handler))
|
||||||
|
xmlreader.XMLReader.setDTDHandler(self, dtd_handler)
|
||||||
|
|
||||||
|
# ContentHandler methods
|
||||||
|
def setDocumentLocator(self, locator):
|
||||||
|
self._cont_handler.setDocumentLocator(locator)
|
||||||
|
|
||||||
|
def startDocument(self):
|
||||||
|
self._cont_handler.startDocument()
|
||||||
|
self._namespaces = self._parser.getFeature(feature_namespaces)
|
||||||
|
|
||||||
|
def startElement(self, uri, lname, qname, attrs):
|
||||||
|
if self._namespaces:
|
||||||
|
self._nsattrs._attrs = attrs
|
||||||
|
self._cont_handler.startElementNS((uri or None, lname), qname,
|
||||||
|
self._nsattrs)
|
||||||
|
else:
|
||||||
|
self._attrs._attrs = attrs
|
||||||
|
self._cont_handler.startElement(qname, self._attrs)
|
||||||
|
|
||||||
|
def startPrefixMapping(self, prefix, uri):
|
||||||
|
self._cont_handler.startPrefixMapping(prefix, uri)
|
||||||
|
|
||||||
|
def characters(self, char, start, len):
|
||||||
|
self._cont_handler.characters(str(String(char, start, len)))
|
||||||
|
|
||||||
|
def ignorableWhitespace(self, char, start, len):
|
||||||
|
self._cont_handler.ignorableWhitespace(str(String(char, start, len)))
|
||||||
|
|
||||||
|
def endElement(self, uri, lname, qname):
|
||||||
|
if self._namespaces:
|
||||||
|
self._cont_handler.endElementNS((uri or None, lname), qname)
|
||||||
|
else:
|
||||||
|
self._cont_handler.endElement(qname)
|
||||||
|
|
||||||
|
def endPrefixMapping(self, prefix):
|
||||||
|
self._cont_handler.endPrefixMapping(prefix)
|
||||||
|
|
||||||
|
def endDocument(self):
|
||||||
|
self._cont_handler.endDocument()
|
||||||
|
|
||||||
|
def processingInstruction(self, target, data):
|
||||||
|
self._cont_handler.processingInstruction(target, data)
|
||||||
|
|
||||||
|
class AttributesImpl:
|
||||||
|
def __init__(self, attrs = None):
|
||||||
|
self._attrs = attrs
|
||||||
|
|
||||||
|
def getLength(self):
|
||||||
|
return self._attrs.getLength()
|
||||||
|
|
||||||
|
def getType(self, name):
|
||||||
|
return self._attrs.getType(name)
|
||||||
|
|
||||||
|
def getValue(self, name):
|
||||||
|
value = self._attrs.getValue(name)
|
||||||
|
if value == None:
|
||||||
|
raise KeyError(name)
|
||||||
|
return value
|
||||||
|
|
||||||
|
def getNames(self):
|
||||||
|
return [self._attrs.getQName(index) for index in range(len(self))]
|
||||||
|
|
||||||
|
def getQNames(self):
|
||||||
|
return [self._attrs.getQName(index) for index in range(len(self))]
|
||||||
|
|
||||||
|
def getValueByQName(self, qname):
|
||||||
|
idx = self._attrs.getIndex(qname)
|
||||||
|
if idx == -1:
|
||||||
|
raise KeyError, qname
|
||||||
|
return self._attrs.getValue(idx)
|
||||||
|
|
||||||
|
def getNameByQName(self, qname):
|
||||||
|
idx = self._attrs.getIndex(qname)
|
||||||
|
if idx == -1:
|
||||||
|
raise KeyError, qname
|
||||||
|
return qname
|
||||||
|
|
||||||
|
def getQNameByName(self, name):
|
||||||
|
idx = self._attrs.getIndex(name)
|
||||||
|
if idx == -1:
|
||||||
|
raise KeyError, name
|
||||||
|
return name
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return self._attrs.getLength()
|
||||||
|
|
||||||
|
def __getitem__(self, name):
|
||||||
|
return self.getValue(name)
|
||||||
|
|
||||||
|
def keys(self):
|
||||||
|
return self.getNames()
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
return self.__class__(self._attrs)
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
return [(name, self[name]) for name in self.getNames()]
|
||||||
|
|
||||||
|
def values(self):
|
||||||
|
return map(self.getValue, self.getNames())
|
||||||
|
|
||||||
|
def get(self, name, alt=None):
|
||||||
|
try:
|
||||||
|
return self.getValue(name)
|
||||||
|
except KeyError:
|
||||||
|
return alt
|
||||||
|
|
||||||
|
def has_key(self, name):
|
||||||
|
try:
|
||||||
|
self.getValue(name)
|
||||||
|
return True
|
||||||
|
except KeyError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# --- AttributesNSImpl
|
||||||
|
|
||||||
|
class AttributesNSImpl(AttributesImpl):
|
||||||
|
|
||||||
|
def __init__(self, attrs=None):
|
||||||
|
AttributesImpl.__init__(self, attrs)
|
||||||
|
|
||||||
|
def getType(self, name):
|
||||||
|
return self._attrs.getType(name[0], name[1])
|
||||||
|
|
||||||
|
def getValue(self, name):
|
||||||
|
value = self._attrs.getValue(name[0], name[1])
|
||||||
|
if value == None:
|
||||||
|
raise KeyError(name)
|
||||||
|
return value
|
||||||
|
|
||||||
|
def getNames(self):
|
||||||
|
names = []
|
||||||
|
for idx in range(len(self)):
|
||||||
|
names.append((self._attrs.getURI(idx),
|
||||||
|
self._attrs.getLocalName(idx)))
|
||||||
|
return names
|
||||||
|
|
||||||
|
def getNameByQName(self, qname):
|
||||||
|
idx = self._attrs.getIndex(qname)
|
||||||
|
if idx == -1:
|
||||||
|
raise KeyError, qname
|
||||||
|
return (self._attrs.getURI(idx), self._attrs.getLocalName(idx))
|
||||||
|
|
||||||
|
def getQNameByName(self, name):
|
||||||
|
idx = self._attrs.getIndex(name[0], name[1])
|
||||||
|
if idx == -1:
|
||||||
|
raise KeyError, name
|
||||||
|
return self._attrs.getQName(idx)
|
||||||
|
|
||||||
|
def getQNames(self):
|
||||||
|
return [self._attrs.getQName(idx) for idx in range(len(self))]
|
||||||
|
|
||||||
|
# ---
|
||||||
|
|
||||||
|
def create_java_parser(jdriver = None):
|
||||||
|
try:
|
||||||
|
if jdriver:
|
||||||
|
return XMLReaderFactory.createXMLReader(jdriver)
|
||||||
|
elif jaxp:
|
||||||
|
return factory.newSAXParser().getXMLReader()
|
||||||
|
else:
|
||||||
|
return XMLReaderFactory.createXMLReader()
|
||||||
|
except ParserConfigurationException, e:
|
||||||
|
raise _exceptions.SAXReaderNotAvailable(e.getMessage())
|
||||||
|
except javasax.SAXException, e:
|
||||||
|
raise _exceptions.SAXReaderNotAvailable(e.getMessage())
|
||||||
|
|
||||||
|
def create_parser(jdriver = None):
|
||||||
|
return JavaSAXParser(jdriver)
|
345
extensions/jython/module/MOD-INF/lib/jython/xml/sax/handler.py
Normal file
345
extensions/jython/module/MOD-INF/lib/jython/xml/sax/handler.py
Normal file
@ -0,0 +1,345 @@
|
|||||||
|
"""
|
||||||
|
This module contains the core classes of version 2.0 of SAX for Python.
|
||||||
|
This file provides only default classes with absolutely minimum
|
||||||
|
functionality, from which drivers and applications can be subclassed.
|
||||||
|
|
||||||
|
Many of these classes are empty and are included only as documentation
|
||||||
|
of the interfaces.
|
||||||
|
|
||||||
|
$Id: handler.py,v 1.5 2002/02/14 08:09:36 loewis Exp $
|
||||||
|
"""
|
||||||
|
|
||||||
|
version = '2.0beta'
|
||||||
|
|
||||||
|
#============================================================================
|
||||||
|
#
|
||||||
|
# HANDLER INTERFACES
|
||||||
|
#
|
||||||
|
#============================================================================
|
||||||
|
|
||||||
|
# ===== ERRORHANDLER =====
|
||||||
|
|
||||||
|
class ErrorHandler:
|
||||||
|
"""Basic interface for SAX error handlers.
|
||||||
|
|
||||||
|
If you create an object that implements this interface, then
|
||||||
|
register the object with your XMLReader, the parser will call the
|
||||||
|
methods in your object to report all warnings and errors. There
|
||||||
|
are three levels of errors available: warnings, (possibly)
|
||||||
|
recoverable errors, and unrecoverable errors. All methods take a
|
||||||
|
SAXParseException as the only parameter."""
|
||||||
|
|
||||||
|
def error(self, exception):
|
||||||
|
"Handle a recoverable error."
|
||||||
|
raise exception
|
||||||
|
|
||||||
|
def fatalError(self, exception):
|
||||||
|
"Handle a non-recoverable error."
|
||||||
|
raise exception
|
||||||
|
|
||||||
|
def warning(self, exception):
|
||||||
|
"Handle a warning."
|
||||||
|
print exception
|
||||||
|
|
||||||
|
|
||||||
|
# ===== CONTENTHANDLER =====
|
||||||
|
|
||||||
|
class ContentHandler:
|
||||||
|
"""Interface for receiving logical document content events.
|
||||||
|
|
||||||
|
This is the main callback interface in SAX, and the one most
|
||||||
|
important to applications. The order of events in this interface
|
||||||
|
mirrors the order of the information in the document."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._locator = None
|
||||||
|
|
||||||
|
def setDocumentLocator(self, locator):
|
||||||
|
"""Called by the parser to give the application a locator for
|
||||||
|
locating the origin of document events.
|
||||||
|
|
||||||
|
SAX parsers are strongly encouraged (though not absolutely
|
||||||
|
required) to supply a locator: if it does so, it must supply
|
||||||
|
the locator to the application by invoking this method before
|
||||||
|
invoking any of the other methods in the DocumentHandler
|
||||||
|
interface.
|
||||||
|
|
||||||
|
The locator allows the application to determine the end
|
||||||
|
position of any document-related event, even if the parser is
|
||||||
|
not reporting an error. Typically, the application will use
|
||||||
|
this information for reporting its own errors (such as
|
||||||
|
character content that does not match an application's
|
||||||
|
business rules). The information returned by the locator is
|
||||||
|
probably not sufficient for use with a search engine.
|
||||||
|
|
||||||
|
Note that the locator will return correct information only
|
||||||
|
during the invocation of the events in this interface. The
|
||||||
|
application should not attempt to use it at any other time."""
|
||||||
|
self._locator = locator
|
||||||
|
|
||||||
|
def startDocument(self):
|
||||||
|
"""Receive notification of the beginning of a document.
|
||||||
|
|
||||||
|
The SAX parser will invoke this method only once, before any
|
||||||
|
other methods in this interface or in DTDHandler (except for
|
||||||
|
setDocumentLocator)."""
|
||||||
|
|
||||||
|
def endDocument(self):
|
||||||
|
"""Receive notification of the end of a document.
|
||||||
|
|
||||||
|
The SAX parser will invoke this method only once, and it will
|
||||||
|
be the last method invoked during the parse. The parser shall
|
||||||
|
not invoke this method until it has either abandoned parsing
|
||||||
|
(because of an unrecoverable error) or reached the end of
|
||||||
|
input."""
|
||||||
|
|
||||||
|
def startPrefixMapping(self, prefix, uri):
|
||||||
|
"""Begin the scope of a prefix-URI Namespace mapping.
|
||||||
|
|
||||||
|
The information from this event is not necessary for normal
|
||||||
|
Namespace processing: the SAX XML reader will automatically
|
||||||
|
replace prefixes for element and attribute names when the
|
||||||
|
http://xml.org/sax/features/namespaces feature is true (the
|
||||||
|
default).
|
||||||
|
|
||||||
|
There are cases, however, when applications need to use
|
||||||
|
prefixes in character data or in attribute values, where they
|
||||||
|
cannot safely be expanded automatically; the
|
||||||
|
start/endPrefixMapping event supplies the information to the
|
||||||
|
application to expand prefixes in those contexts itself, if
|
||||||
|
necessary.
|
||||||
|
|
||||||
|
Note that start/endPrefixMapping events are not guaranteed to
|
||||||
|
be properly nested relative to each-other: all
|
||||||
|
startPrefixMapping events will occur before the corresponding
|
||||||
|
startElement event, and all endPrefixMapping events will occur
|
||||||
|
after the corresponding endElement event, but their order is
|
||||||
|
not guaranteed."""
|
||||||
|
|
||||||
|
def endPrefixMapping(self, prefix):
|
||||||
|
"""End the scope of a prefix-URI mapping.
|
||||||
|
|
||||||
|
See startPrefixMapping for details. This event will always
|
||||||
|
occur after the corresponding endElement event, but the order
|
||||||
|
of endPrefixMapping events is not otherwise guaranteed."""
|
||||||
|
|
||||||
|
def startElement(self, name, attrs):
|
||||||
|
"""Signals the start of an element in non-namespace mode.
|
||||||
|
|
||||||
|
The name parameter contains the raw XML 1.0 name of the
|
||||||
|
element type as a string and the attrs parameter holds an
|
||||||
|
instance of the Attributes class containing the attributes of
|
||||||
|
the element."""
|
||||||
|
|
||||||
|
def endElement(self, name):
|
||||||
|
"""Signals the end of an element in non-namespace mode.
|
||||||
|
|
||||||
|
The name parameter contains the name of the element type, just
|
||||||
|
as with the startElement event."""
|
||||||
|
|
||||||
|
def startElementNS(self, name, qname, attrs):
|
||||||
|
"""Signals the start of an element in namespace mode.
|
||||||
|
|
||||||
|
The name parameter contains the name of the element type as a
|
||||||
|
(uri, localname) tuple, the qname parameter the raw XML 1.0
|
||||||
|
name used in the source document, and the attrs parameter
|
||||||
|
holds an instance of the Attributes class containing the
|
||||||
|
attributes of the element.
|
||||||
|
|
||||||
|
The uri part of the name tuple is None for elements which have
|
||||||
|
no namespace."""
|
||||||
|
|
||||||
|
def endElementNS(self, name, qname):
|
||||||
|
"""Signals the end of an element in namespace mode.
|
||||||
|
|
||||||
|
The name parameter contains the name of the element type, just
|
||||||
|
as with the startElementNS event."""
|
||||||
|
|
||||||
|
def characters(self, content):
|
||||||
|
"""Receive notification of character data.
|
||||||
|
|
||||||
|
The Parser will call this method to report each chunk of
|
||||||
|
character data. SAX parsers may return all contiguous
|
||||||
|
character data in a single chunk, or they may split it into
|
||||||
|
several chunks; however, all of the characters in any single
|
||||||
|
event must come from the same external entity so that the
|
||||||
|
Locator provides useful information."""
|
||||||
|
|
||||||
|
def ignorableWhitespace(self, whitespace):
|
||||||
|
"""Receive notification of ignorable whitespace in element content.
|
||||||
|
|
||||||
|
Validating Parsers must use this method to report each chunk
|
||||||
|
of ignorable whitespace (see the W3C XML 1.0 recommendation,
|
||||||
|
section 2.10): non-validating parsers may also use this method
|
||||||
|
if they are capable of parsing and using content models.
|
||||||
|
|
||||||
|
SAX parsers may return all contiguous whitespace in a single
|
||||||
|
chunk, or they may split it into several chunks; however, all
|
||||||
|
of the characters in any single event must come from the same
|
||||||
|
external entity, so that the Locator provides useful
|
||||||
|
information.
|
||||||
|
|
||||||
|
The application must not attempt to read from the array
|
||||||
|
outside of the specified range."""
|
||||||
|
|
||||||
|
def processingInstruction(self, target, data):
|
||||||
|
"""Receive notification of a processing instruction.
|
||||||
|
|
||||||
|
The Parser will invoke this method once for each processing
|
||||||
|
instruction found: note that processing instructions may occur
|
||||||
|
before or after the main document element.
|
||||||
|
|
||||||
|
A SAX parser should never report an XML declaration (XML 1.0,
|
||||||
|
section 2.8) or a text declaration (XML 1.0, section 4.3.1)
|
||||||
|
using this method."""
|
||||||
|
|
||||||
|
def skippedEntity(self, name):
|
||||||
|
"""Receive notification of a skipped entity.
|
||||||
|
|
||||||
|
The Parser will invoke this method once for each entity
|
||||||
|
skipped. Non-validating processors may skip entities if they
|
||||||
|
have not seen the declarations (because, for example, the
|
||||||
|
entity was declared in an external DTD subset). All processors
|
||||||
|
may skip external entities, depending on the values of the
|
||||||
|
http://xml.org/sax/features/external-general-entities and the
|
||||||
|
http://xml.org/sax/features/external-parameter-entities
|
||||||
|
properties."""
|
||||||
|
|
||||||
|
|
||||||
|
# ===== DTDHandler =====
|
||||||
|
|
||||||
|
class DTDHandler:
|
||||||
|
"""Handle DTD events.
|
||||||
|
|
||||||
|
This interface specifies only those DTD events required for basic
|
||||||
|
parsing (unparsed entities and attributes)."""
|
||||||
|
|
||||||
|
def notationDecl(self, name, publicId, systemId):
|
||||||
|
"Handle a notation declaration event."
|
||||||
|
|
||||||
|
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
|
||||||
|
"Handle an unparsed entity declaration event."
|
||||||
|
|
||||||
|
|
||||||
|
# ===== ENTITYRESOLVER =====
|
||||||
|
|
||||||
|
class EntityResolver:
|
||||||
|
"""Basic interface for resolving entities. If you create an object
|
||||||
|
implementing this interface, then register the object with your
|
||||||
|
Parser, the parser will call the method in your object to
|
||||||
|
resolve all external entities. Note that DefaultHandler implements
|
||||||
|
this interface with the default behaviour."""
|
||||||
|
|
||||||
|
def resolveEntity(self, publicId, systemId):
|
||||||
|
"""Resolve the system identifier of an entity and return either
|
||||||
|
the system identifier to read from as a string, or an InputSource
|
||||||
|
to read from."""
|
||||||
|
return systemId
|
||||||
|
|
||||||
|
|
||||||
|
#============================================================================
|
||||||
|
#
|
||||||
|
# CORE FEATURES
|
||||||
|
#
|
||||||
|
#============================================================================
|
||||||
|
|
||||||
|
feature_namespaces = "http://xml.org/sax/features/namespaces"
|
||||||
|
# true: Perform Namespace processing (default).
|
||||||
|
# false: Optionally do not perform Namespace processing
|
||||||
|
# (implies namespace-prefixes).
|
||||||
|
# access: (parsing) read-only; (not parsing) read/write
|
||||||
|
|
||||||
|
feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes"
|
||||||
|
# true: Report the original prefixed names and attributes used for Namespace
|
||||||
|
# declarations.
|
||||||
|
# false: Do not report attributes used for Namespace declarations, and
|
||||||
|
# optionally do not report original prefixed names (default).
|
||||||
|
# access: (parsing) read-only; (not parsing) read/write
|
||||||
|
|
||||||
|
feature_string_interning = "http://xml.org/sax/features/string-interning"
|
||||||
|
# true: All element names, prefixes, attribute names, Namespace URIs, and
|
||||||
|
# local names are interned using the built-in intern function.
|
||||||
|
# false: Names are not necessarily interned, although they may be (default).
|
||||||
|
# access: (parsing) read-only; (not parsing) read/write
|
||||||
|
|
||||||
|
feature_validation = "http://xml.org/sax/features/validation"
|
||||||
|
# true: Report all validation errors (implies external-general-entities and
|
||||||
|
# external-parameter-entities).
|
||||||
|
# false: Do not report validation errors.
|
||||||
|
# access: (parsing) read-only; (not parsing) read/write
|
||||||
|
|
||||||
|
feature_external_ges = "http://xml.org/sax/features/external-general-entities"
|
||||||
|
# true: Include all external general (text) entities.
|
||||||
|
# false: Do not include external general entities.
|
||||||
|
# access: (parsing) read-only; (not parsing) read/write
|
||||||
|
|
||||||
|
feature_external_pes = "http://xml.org/sax/features/external-parameter-entities"
|
||||||
|
# true: Include all external parameter entities, including the external
|
||||||
|
# DTD subset.
|
||||||
|
# false: Do not include any external parameter entities, even the external
|
||||||
|
# DTD subset.
|
||||||
|
# access: (parsing) read-only; (not parsing) read/write
|
||||||
|
|
||||||
|
all_features = [feature_namespaces,
|
||||||
|
feature_namespace_prefixes,
|
||||||
|
feature_string_interning,
|
||||||
|
feature_validation,
|
||||||
|
feature_external_ges,
|
||||||
|
feature_external_pes]
|
||||||
|
|
||||||
|
|
||||||
|
#============================================================================
|
||||||
|
#
|
||||||
|
# CORE PROPERTIES
|
||||||
|
#
|
||||||
|
#============================================================================
|
||||||
|
|
||||||
|
property_lexical_handler = "http://xml.org/sax/properties/lexical-handler"
|
||||||
|
# data type: xml.sax.sax2lib.LexicalHandler
|
||||||
|
# description: An optional extension handler for lexical events like comments.
|
||||||
|
# access: read/write
|
||||||
|
|
||||||
|
property_declaration_handler = "http://xml.org/sax/properties/declaration-handler"
|
||||||
|
# data type: xml.sax.sax2lib.DeclHandler
|
||||||
|
# description: An optional extension handler for DTD-related events other
|
||||||
|
# than notations and unparsed entities.
|
||||||
|
# access: read/write
|
||||||
|
|
||||||
|
property_dom_node = "http://xml.org/sax/properties/dom-node"
|
||||||
|
# data type: org.w3c.dom.Node
|
||||||
|
# description: When parsing, the current DOM node being visited if this is
|
||||||
|
# a DOM iterator; when not parsing, the root DOM node for
|
||||||
|
# iteration.
|
||||||
|
# access: (parsing) read-only; (not parsing) read/write
|
||||||
|
|
||||||
|
property_xml_string = "http://xml.org/sax/properties/xml-string"
|
||||||
|
# data type: String
|
||||||
|
# description: The literal string of characters that was the source for
|
||||||
|
# the current event.
|
||||||
|
# access: read-only
|
||||||
|
|
||||||
|
property_encoding = "http://www.python.org/sax/properties/encoding"
|
||||||
|
# data type: String
|
||||||
|
# description: The name of the encoding to assume for input data.
|
||||||
|
# access: write: set the encoding, e.g. established by a higher-level
|
||||||
|
# protocol. May change during parsing (e.g. after
|
||||||
|
# processing a META tag)
|
||||||
|
# read: return the current encoding (possibly established through
|
||||||
|
# auto-detection.
|
||||||
|
# initial value: UTF-8
|
||||||
|
#
|
||||||
|
|
||||||
|
property_interning_dict = "http://www.python.org/sax/properties/interning-dict"
|
||||||
|
# data type: Dictionary
|
||||||
|
# description: The dictionary used to intern common strings in the document
|
||||||
|
# access: write: Request that the parser uses a specific dictionary, to
|
||||||
|
# allow interning across different documents
|
||||||
|
# read: return the current interning dictionary, or None
|
||||||
|
#
|
||||||
|
|
||||||
|
all_properties = [property_lexical_handler,
|
||||||
|
property_dom_node,
|
||||||
|
property_declaration_handler,
|
||||||
|
property_xml_string,
|
||||||
|
property_encoding,
|
||||||
|
property_interning_dict]
|
430
extensions/jython/module/MOD-INF/lib/jython/xml/sax/saxlib.py
Normal file
430
extensions/jython/module/MOD-INF/lib/jython/xml/sax/saxlib.py
Normal file
@ -0,0 +1,430 @@
|
|||||||
|
"""
|
||||||
|
This module contains the core classes of version 2.0 of SAX for Python.
|
||||||
|
This file provides only default classes with absolutely minimum
|
||||||
|
functionality, from which drivers and applications can be subclassed.
|
||||||
|
|
||||||
|
Many of these classes are empty and are included only as documentation
|
||||||
|
of the interfaces.
|
||||||
|
|
||||||
|
$Id: saxlib.py,v 1.12 2002/05/10 14:49:21 akuchling Exp $
|
||||||
|
"""
|
||||||
|
|
||||||
|
version = '2.0beta'
|
||||||
|
|
||||||
|
# A number of interfaces used to live in saxlib, but are now in
|
||||||
|
# various other modules for Python 2 compatibility. If nobody uses
|
||||||
|
# them here any longer, the references can be removed
|
||||||
|
|
||||||
|
from handler import ErrorHandler, ContentHandler, DTDHandler, EntityResolver
|
||||||
|
from xmlreader import XMLReader, InputSource, Locator, IncrementalParser
|
||||||
|
from _exceptions import *
|
||||||
|
|
||||||
|
from handler import \
|
||||||
|
feature_namespaces,\
|
||||||
|
feature_namespace_prefixes,\
|
||||||
|
feature_string_interning,\
|
||||||
|
feature_validation,\
|
||||||
|
feature_external_ges,\
|
||||||
|
feature_external_pes,\
|
||||||
|
all_features,\
|
||||||
|
property_lexical_handler,\
|
||||||
|
property_declaration_handler,\
|
||||||
|
property_dom_node,\
|
||||||
|
property_xml_string,\
|
||||||
|
all_properties
|
||||||
|
|
||||||
|
#============================================================================
|
||||||
|
#
|
||||||
|
# MAIN INTERFACES
|
||||||
|
#
|
||||||
|
#============================================================================
|
||||||
|
|
||||||
|
# ===== XMLFILTER =====
|
||||||
|
|
||||||
|
class XMLFilter(XMLReader):
|
||||||
|
"""Interface for a SAX2 parser filter.
|
||||||
|
|
||||||
|
A parser filter is an XMLReader that gets its events from another
|
||||||
|
XMLReader (which may in turn also be a filter) rather than from a
|
||||||
|
primary source like a document or other non-SAX data source.
|
||||||
|
Filters can modify a stream of events before passing it on to its
|
||||||
|
handlers."""
|
||||||
|
|
||||||
|
def __init__(self, parent = None):
|
||||||
|
"""Creates a filter instance, allowing applications to set the
|
||||||
|
parent on instantiation."""
|
||||||
|
XMLReader.__init__(self)
|
||||||
|
self._parent = parent
|
||||||
|
|
||||||
|
def setParent(self, parent):
|
||||||
|
"""Sets the parent XMLReader of this filter. The argument may
|
||||||
|
not be None."""
|
||||||
|
self._parent = parent
|
||||||
|
|
||||||
|
def getParent(self):
|
||||||
|
"Returns the parent of this filter."
|
||||||
|
return self._parent
|
||||||
|
|
||||||
|
# ===== ATTRIBUTES =====
|
||||||
|
|
||||||
|
class Attributes:
|
||||||
|
"""Interface for a list of XML attributes.
|
||||||
|
|
||||||
|
Contains a list of XML attributes, accessible by name."""
|
||||||
|
|
||||||
|
def getLength(self):
|
||||||
|
"Returns the number of attributes in the list."
|
||||||
|
raise NotImplementedError("This method must be implemented!")
|
||||||
|
|
||||||
|
def getType(self, name):
|
||||||
|
"Returns the type of the attribute with the given name."
|
||||||
|
raise NotImplementedError("This method must be implemented!")
|
||||||
|
|
||||||
|
def getValue(self, name):
|
||||||
|
"Returns the value of the attribute with the given name."
|
||||||
|
raise NotImplementedError("This method must be implemented!")
|
||||||
|
|
||||||
|
def getValueByQName(self, name):
|
||||||
|
"""Returns the value of the attribute with the given raw (or
|
||||||
|
qualified) name."""
|
||||||
|
raise NotImplementedError("This method must be implemented!")
|
||||||
|
|
||||||
|
def getNameByQName(self, name):
|
||||||
|
"""Returns the namespace name of the attribute with the given
|
||||||
|
raw (or qualified) name."""
|
||||||
|
raise NotImplementedError("This method must be implemented!")
|
||||||
|
|
||||||
|
def getNames(self):
|
||||||
|
"""Returns a list of the names of all attributes
|
||||||
|
in the list."""
|
||||||
|
raise NotImplementedError("This method must be implemented!")
|
||||||
|
|
||||||
|
def getQNames(self):
|
||||||
|
"""Returns a list of the raw qualified names of all attributes
|
||||||
|
in the list."""
|
||||||
|
raise NotImplementedError("This method must be implemented!")
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
"Alias for getLength."
|
||||||
|
raise NotImplementedError("This method must be implemented!")
|
||||||
|
|
||||||
|
def __getitem__(self, name):
|
||||||
|
"Alias for getValue."
|
||||||
|
raise NotImplementedError("This method must be implemented!")
|
||||||
|
|
||||||
|
def keys(self):
|
||||||
|
"Returns a list of the attribute names in the list."
|
||||||
|
raise NotImplementedError("This method must be implemented!")
|
||||||
|
|
||||||
|
def has_key(self, name):
|
||||||
|
"True if the attribute is in the list, false otherwise."
|
||||||
|
raise NotImplementedError("This method must be implemented!")
|
||||||
|
|
||||||
|
def get(self, name, alternative=None):
|
||||||
|
"""Return the value associated with attribute name; if it is not
|
||||||
|
available, then return the alternative."""
|
||||||
|
raise NotImplementedError("This method must be implemented!")
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
"Return a copy of the Attributes object."
|
||||||
|
raise NotImplementedError("This method must be implemented!")
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
"Return a list of (attribute_name, value) pairs."
|
||||||
|
raise NotImplementedError("This method must be implemented!")
|
||||||
|
|
||||||
|
def values(self):
|
||||||
|
"Return a list of all attribute values."
|
||||||
|
raise NotImplementedError("This method must be implemented!")
|
||||||
|
|
||||||
|
|
||||||
|
#============================================================================
|
||||||
|
#
|
||||||
|
# HANDLER INTERFACES
|
||||||
|
#
|
||||||
|
#============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# ===== DECLHANDLER =====
|
||||||
|
|
||||||
|
class DeclHandler:
|
||||||
|
"""Optional SAX2 handler for DTD declaration events.
|
||||||
|
|
||||||
|
Note that some DTD declarations are already reported through the
|
||||||
|
DTDHandler interface. All events reported to this handler will
|
||||||
|
occur between the startDTD and endDTD events of the
|
||||||
|
LexicalHandler.
|
||||||
|
|
||||||
|
To set the DeclHandler for an XMLReader, use the setProperty method
|
||||||
|
with the identifier http://xml.org/sax/handlers/DeclHandler."""
|
||||||
|
|
||||||
|
def attributeDecl(self, elem_name, attr_name, type, value_def, value):
|
||||||
|
"""Report an attribute type declaration.
|
||||||
|
|
||||||
|
Only the first declaration will be reported. The type will be
|
||||||
|
one of the strings "CDATA", "ID", "IDREF", "IDREFS",
|
||||||
|
"NMTOKEN", "NMTOKENS", "ENTITY", "ENTITIES", or "NOTATION", or
|
||||||
|
a list of names (in the case of enumerated definitions).
|
||||||
|
|
||||||
|
elem_name is the element type name, attr_name the attribute
|
||||||
|
type name, type a string representing the attribute type,
|
||||||
|
value_def a string representing the default declaration
|
||||||
|
('#IMPLIED', '#REQUIRED', '#FIXED' or None). value is a string
|
||||||
|
representing the attribute's default value, or None if there
|
||||||
|
is none."""
|
||||||
|
|
||||||
|
def elementDecl(self, elem_name, content_model):
|
||||||
|
"""Report an element type declaration.
|
||||||
|
|
||||||
|
Only the first declaration will be reported.
|
||||||
|
|
||||||
|
content_model is the string 'EMPTY', the string 'ANY' or the content
|
||||||
|
model structure represented as tuple (separator, tokens, modifier)
|
||||||
|
where separator is the separator in the token list (that is, '|' or
|
||||||
|
','), tokens is the list of tokens (element type names or tuples
|
||||||
|
representing parentheses) and modifier is the quantity modifier
|
||||||
|
('*', '?' or '+')."""
|
||||||
|
|
||||||
|
def internalEntityDecl(self, name, value):
|
||||||
|
"""Report an internal entity declaration.
|
||||||
|
|
||||||
|
Only the first declaration of an entity will be reported.
|
||||||
|
|
||||||
|
name is the name of the entity. If it is a parameter entity,
|
||||||
|
the name will begin with '%'. value is the replacement text of
|
||||||
|
the entity."""
|
||||||
|
|
||||||
|
def externalEntityDecl(self, name, public_id, system_id):
|
||||||
|
"""Report a parsed entity declaration. (Unparsed entities are
|
||||||
|
reported to the DTDHandler.)
|
||||||
|
|
||||||
|
Only the first declaration for each entity will be reported.
|
||||||
|
|
||||||
|
name is the name of the entity. If it is a parameter entity,
|
||||||
|
the name will begin with '%'. public_id and system_id are the
|
||||||
|
public and system identifiers of the entity. public_id will be
|
||||||
|
None if none were declared."""
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# ===== LEXICALHANDLER =====
|
||||||
|
|
||||||
|
class LexicalHandler:
|
||||||
|
"""Optional SAX2 handler for lexical events.
|
||||||
|
|
||||||
|
This handler is used to obtain lexical information about an XML
|
||||||
|
document, that is, information about how the document was encoded
|
||||||
|
(as opposed to what it contains, which is reported to the
|
||||||
|
ContentHandler), such as comments and CDATA marked section
|
||||||
|
boundaries.
|
||||||
|
|
||||||
|
To set the LexicalHandler of an XMLReader, use the setProperty
|
||||||
|
method with the property identifier
|
||||||
|
'http://xml.org/sax/handlers/LexicalHandler'. There is no
|
||||||
|
guarantee that the XMLReader will support or recognize this
|
||||||
|
property."""
|
||||||
|
|
||||||
|
def comment(self, content):
|
||||||
|
"""Reports a comment anywhere in the document (including the
|
||||||
|
DTD and outside the document element).
|
||||||
|
|
||||||
|
content is a string that holds the contents of the comment."""
|
||||||
|
|
||||||
|
def startDTD(self, name, public_id, system_id):
|
||||||
|
"""Report the start of the DTD declarations, if the document
|
||||||
|
has an associated DTD.
|
||||||
|
|
||||||
|
A startEntity event will be reported before declaration events
|
||||||
|
from the external DTD subset are reported, and this can be
|
||||||
|
used to infer from which subset DTD declarations derive.
|
||||||
|
|
||||||
|
name is the name of the document element type, public_id the
|
||||||
|
public identifier of the DTD (or None if none were supplied)
|
||||||
|
and system_id the system identfier of the external subset (or
|
||||||
|
None if none were supplied)."""
|
||||||
|
|
||||||
|
def endDTD(self):
|
||||||
|
"Signals the end of DTD declarations."
|
||||||
|
|
||||||
|
def startEntity(self, name):
|
||||||
|
"""Report the beginning of an entity.
|
||||||
|
|
||||||
|
The start and end of the document entity is not reported. The
|
||||||
|
start and end of the external DTD subset is reported with the
|
||||||
|
pseudo-name '[dtd]'.
|
||||||
|
|
||||||
|
Skipped entities will be reported through the skippedEntity
|
||||||
|
event of the ContentHandler rather than through this event.
|
||||||
|
|
||||||
|
name is the name of the entity. If it is a parameter entity,
|
||||||
|
the name will begin with '%'."""
|
||||||
|
|
||||||
|
def endEntity(self, name):
|
||||||
|
"""Reports the end of an entity. name is the name of the
|
||||||
|
entity, and follows the same conventions as for
|
||||||
|
startEntity."""
|
||||||
|
|
||||||
|
def startCDATA(self):
|
||||||
|
"""Reports the beginning of a CDATA marked section.
|
||||||
|
|
||||||
|
The contents of the CDATA marked section will be reported
|
||||||
|
through the characters event."""
|
||||||
|
|
||||||
|
def endCDATA(self):
|
||||||
|
"Reports the end of a CDATA marked section."
|
||||||
|
|
||||||
|
|
||||||
|
#============================================================================
|
||||||
|
#
|
||||||
|
# SAX 1.0 COMPATIBILITY CLASSES
|
||||||
|
# Note that these are all deprecated.
|
||||||
|
#
|
||||||
|
#============================================================================
|
||||||
|
|
||||||
|
# ===== ATTRIBUTELIST =====
|
||||||
|
|
||||||
|
class AttributeList:
|
||||||
|
"""Interface for an attribute list. This interface provides
|
||||||
|
information about a list of attributes for an element (only
|
||||||
|
specified or defaulted attributes will be reported). Note that the
|
||||||
|
information returned by this object will be valid only during the
|
||||||
|
scope of the DocumentHandler.startElement callback, and the
|
||||||
|
attributes will not necessarily be provided in the order declared
|
||||||
|
or specified."""
|
||||||
|
|
||||||
|
def getLength(self):
|
||||||
|
"Return the number of attributes in list."
|
||||||
|
|
||||||
|
def getName(self, i):
|
||||||
|
"Return the name of an attribute in the list."
|
||||||
|
|
||||||
|
def getType(self, i):
|
||||||
|
"""Return the type of an attribute in the list. (Parameter can be
|
||||||
|
either integer index or attribute name.)"""
|
||||||
|
|
||||||
|
def getValue(self, i):
|
||||||
|
"""Return the value of an attribute in the list. (Parameter can be
|
||||||
|
either integer index or attribute name.)"""
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
"Alias for getLength."
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
"Alias for getName (if key is an integer) and getValue (if string)."
|
||||||
|
|
||||||
|
def keys(self):
|
||||||
|
"Returns a list of the attribute names."
|
||||||
|
|
||||||
|
def has_key(self, key):
|
||||||
|
"True if the attribute is in the list, false otherwise."
|
||||||
|
|
||||||
|
def get(self, key, alternative=None):
|
||||||
|
"""Return the value associated with attribute name; if it is not
|
||||||
|
available, then return the alternative."""
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
"Return a copy of the AttributeList."
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
"Return a list of (attribute_name,value) pairs."
|
||||||
|
|
||||||
|
def values(self):
|
||||||
|
"Return a list of all attribute values."
|
||||||
|
|
||||||
|
|
||||||
|
# ===== DOCUMENTHANDLER =====
|
||||||
|
|
||||||
|
class DocumentHandler:
|
||||||
|
"""Handle general document events. This is the main client
|
||||||
|
interface for SAX: it contains callbacks for the most important
|
||||||
|
document events, such as the start and end of elements. You need
|
||||||
|
to create an object that implements this interface, and then
|
||||||
|
register it with the Parser. If you do not want to implement
|
||||||
|
the entire interface, you can derive a class from HandlerBase,
|
||||||
|
which implements the default functionality. You can find the
|
||||||
|
location of any document event using the Locator interface
|
||||||
|
supplied by setDocumentLocator()."""
|
||||||
|
|
||||||
|
def characters(self, ch, start, length):
|
||||||
|
"Handle a character data event."
|
||||||
|
|
||||||
|
def endDocument(self):
|
||||||
|
"Handle an event for the end of a document."
|
||||||
|
|
||||||
|
def endElement(self, name):
|
||||||
|
"Handle an event for the end of an element."
|
||||||
|
|
||||||
|
def ignorableWhitespace(self, ch, start, length):
|
||||||
|
"Handle an event for ignorable whitespace in element content."
|
||||||
|
|
||||||
|
def processingInstruction(self, target, data):
|
||||||
|
"Handle a processing instruction event."
|
||||||
|
|
||||||
|
def setDocumentLocator(self, locator):
|
||||||
|
"Receive an object for locating the origin of SAX document events."
|
||||||
|
|
||||||
|
def startDocument(self):
|
||||||
|
"Handle an event for the beginning of a document."
|
||||||
|
|
||||||
|
def startElement(self, name, atts):
|
||||||
|
"Handle an event for the beginning of an element."
|
||||||
|
|
||||||
|
|
||||||
|
# ===== HANDLERBASE =====
|
||||||
|
|
||||||
|
class HandlerBase(EntityResolver, DTDHandler, DocumentHandler,\
|
||||||
|
ErrorHandler):
|
||||||
|
"""Default base class for handlers. This class implements the
|
||||||
|
default behaviour for four SAX interfaces: EntityResolver,
|
||||||
|
DTDHandler, DocumentHandler, and ErrorHandler: rather
|
||||||
|
than implementing those full interfaces, you may simply extend
|
||||||
|
this class and override the methods that you need. Note that the
|
||||||
|
use of this class is optional (you are free to implement the
|
||||||
|
interfaces directly if you wish)."""
|
||||||
|
|
||||||
|
|
||||||
|
# ===== PARSER =====
|
||||||
|
|
||||||
|
class Parser:
|
||||||
|
"""Basic interface for SAX (Simple API for XML) parsers. All SAX
|
||||||
|
parsers must implement this basic interface: it allows users to
|
||||||
|
register handlers for different types of events and to initiate a
|
||||||
|
parse from a URI, a character stream, or a byte stream. SAX
|
||||||
|
parsers should also implement a zero-argument constructor."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.doc_handler = DocumentHandler()
|
||||||
|
self.dtd_handler = DTDHandler()
|
||||||
|
self.ent_handler = EntityResolver()
|
||||||
|
self.err_handler = ErrorHandler()
|
||||||
|
|
||||||
|
def parse(self, systemId):
|
||||||
|
"Parse an XML document from a system identifier."
|
||||||
|
|
||||||
|
def parseFile(self, fileobj):
|
||||||
|
"Parse an XML document from a file-like object."
|
||||||
|
|
||||||
|
def setDocumentHandler(self, handler):
|
||||||
|
"Register an object to receive basic document-related events."
|
||||||
|
self.doc_handler=handler
|
||||||
|
|
||||||
|
def setDTDHandler(self, handler):
|
||||||
|
"Register an object to receive basic DTD-related events."
|
||||||
|
self.dtd_handler=handler
|
||||||
|
|
||||||
|
def setEntityResolver(self, resolver):
|
||||||
|
"Register an object to resolve external entities."
|
||||||
|
self.ent_handler=resolver
|
||||||
|
|
||||||
|
def setErrorHandler(self, handler):
|
||||||
|
"Register an object to receive error-message events."
|
||||||
|
self.err_handler=handler
|
||||||
|
|
||||||
|
def setLocale(self, locale):
|
||||||
|
"""Allow an application to set the locale for errors and warnings.
|
||||||
|
|
||||||
|
SAX parsers are not required to provide localisation for errors
|
||||||
|
and warnings; if they cannot support the requested locale,
|
||||||
|
however, they must throw a SAX exception. Applications may
|
||||||
|
request a locale change in the middle of a parse."""
|
||||||
|
raise SAXNotSupportedException("Locale support not implemented")
|
813
extensions/jython/module/MOD-INF/lib/jython/xml/sax/saxutils.py
Normal file
813
extensions/jython/module/MOD-INF/lib/jython/xml/sax/saxutils.py
Normal file
@ -0,0 +1,813 @@
|
|||||||
|
"""
|
||||||
|
A library of useful helper classes to the saxlib classes, for the
|
||||||
|
convenience of application and driver writers.
|
||||||
|
|
||||||
|
$Id: saxutils.py,v 1.37 2005/04/13 14:02:08 syt Exp $
|
||||||
|
"""
|
||||||
|
import os, urlparse, urllib2, types
|
||||||
|
import handler
|
||||||
|
import xmlreader
|
||||||
|
import sys, _exceptions, saxlib
|
||||||
|
|
||||||
|
from xml.Uri import Absolutize, MakeUrllibSafe,IsAbsolute
|
||||||
|
|
||||||
|
try:
|
||||||
|
_StringTypes = [types.StringType, types.UnicodeType]
|
||||||
|
except AttributeError: # 1.5 compatibility:UnicodeType not defined
|
||||||
|
_StringTypes = [types.StringType]
|
||||||
|
|
||||||
|
def __dict_replace(s, d):
|
||||||
|
"""Replace substrings of a string using a dictionary."""
|
||||||
|
for key, value in d.items():
|
||||||
|
s = s.replace(key, value)
|
||||||
|
return s
|
||||||
|
|
||||||
|
def escape(data, entities={}):
|
||||||
|
"""Escape &, <, and > in a string of data.
|
||||||
|
|
||||||
|
You can escape other strings of data by passing a dictionary as
|
||||||
|
the optional entities parameter. The keys and values must all be
|
||||||
|
strings; each key will be replaced with its corresponding value.
|
||||||
|
"""
|
||||||
|
data = data.replace("&", "&")
|
||||||
|
data = data.replace("<", "<")
|
||||||
|
data = data.replace(">", ">")
|
||||||
|
if entities:
|
||||||
|
data = __dict_replace(data, entities)
|
||||||
|
return data
|
||||||
|
|
||||||
|
def unescape(data, entities={}):
|
||||||
|
"""Unescape &, <, and > in a string of data.
|
||||||
|
|
||||||
|
You can unescape other strings of data by passing a dictionary as
|
||||||
|
the optional entities parameter. The keys and values must all be
|
||||||
|
strings; each key will be replaced with its corresponding value.
|
||||||
|
"""
|
||||||
|
data = data.replace("<", "<")
|
||||||
|
data = data.replace(">", ">")
|
||||||
|
if entities:
|
||||||
|
data = __dict_replace(data, entities)
|
||||||
|
# must do ampersand last
|
||||||
|
return data.replace("&", "&")
|
||||||
|
|
||||||
|
def quoteattr(data, entities={}):
|
||||||
|
"""Escape and quote an attribute value.
|
||||||
|
|
||||||
|
Escape &, <, and > in a string of data, then quote it for use as
|
||||||
|
an attribute value. The \" character will be escaped as well, if
|
||||||
|
necessary.
|
||||||
|
|
||||||
|
You can escape other strings of data by passing a dictionary as
|
||||||
|
the optional entities parameter. The keys and values must all be
|
||||||
|
strings; each key will be replaced with its corresponding value.
|
||||||
|
"""
|
||||||
|
data = escape(data, entities)
|
||||||
|
if '"' in data:
|
||||||
|
if "'" in data:
|
||||||
|
data = '"%s"' % data.replace('"', """)
|
||||||
|
else:
|
||||||
|
data = "'%s'" % data
|
||||||
|
else:
|
||||||
|
data = '"%s"' % data
|
||||||
|
return data
|
||||||
|
|
||||||
|
# --- DefaultHandler
|
||||||
|
|
||||||
|
class DefaultHandler(handler.EntityResolver, handler.DTDHandler,
|
||||||
|
handler.ContentHandler, handler.ErrorHandler):
|
||||||
|
"""Default base class for SAX2 event handlers. Implements empty
|
||||||
|
methods for all callback methods, which can be overridden by
|
||||||
|
application implementors. Replaces the deprecated SAX1 HandlerBase
|
||||||
|
class."""
|
||||||
|
|
||||||
|
# --- Location
|
||||||
|
|
||||||
|
class Location:
|
||||||
|
"""Represents a location in an XML entity. Initialized by being passed
|
||||||
|
a locator, from which it reads off the current location, which is then
|
||||||
|
stored internally."""
|
||||||
|
|
||||||
|
def __init__(self, locator):
|
||||||
|
self.__col = locator.getColumnNumber()
|
||||||
|
self.__line = locator.getLineNumber()
|
||||||
|
self.__pubid = locator.getPublicId()
|
||||||
|
self.__sysid = locator.getSystemId()
|
||||||
|
|
||||||
|
def getColumnNumber(self):
|
||||||
|
return self.__col
|
||||||
|
|
||||||
|
def getLineNumber(self):
|
||||||
|
return self.__line
|
||||||
|
|
||||||
|
def getPublicId(self):
|
||||||
|
return self.__pubid
|
||||||
|
|
||||||
|
def getSystemId(self):
|
||||||
|
return self.__sysid
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
if self.__line is None:
|
||||||
|
line = "?"
|
||||||
|
else:
|
||||||
|
line = self.__line
|
||||||
|
if self.__col is None:
|
||||||
|
col = "?"
|
||||||
|
else:
|
||||||
|
col = self.__col
|
||||||
|
return "%s:%s:%s" % (
|
||||||
|
self.__sysid or self.__pubid or "<unknown>",
|
||||||
|
line, col)
|
||||||
|
|
||||||
|
# --- ErrorPrinter
|
||||||
|
|
||||||
|
class ErrorPrinter:
|
||||||
|
"A simple class that just prints error messages to standard out."
|
||||||
|
|
||||||
|
def __init__(self, level=0, outfile=sys.stderr):
|
||||||
|
self._level = level
|
||||||
|
self._outfile = outfile
|
||||||
|
|
||||||
|
def warning(self, exception):
|
||||||
|
if self._level <= 0:
|
||||||
|
self._outfile.write("WARNING in %s: %s\n" %
|
||||||
|
(self.__getpos(exception),
|
||||||
|
exception.getMessage()))
|
||||||
|
|
||||||
|
def error(self, exception):
|
||||||
|
if self._level <= 1:
|
||||||
|
self._outfile.write("ERROR in %s: %s\n" %
|
||||||
|
(self.__getpos(exception),
|
||||||
|
exception.getMessage()))
|
||||||
|
|
||||||
|
def fatalError(self, exception):
|
||||||
|
if self._level <= 2:
|
||||||
|
self._outfile.write("FATAL ERROR in %s: %s\n" %
|
||||||
|
(self.__getpos(exception),
|
||||||
|
exception.getMessage()))
|
||||||
|
|
||||||
|
def __getpos(self, exception):
|
||||||
|
if isinstance(exception, _exceptions.SAXParseException):
|
||||||
|
return "%s:%s:%s" % (exception.getSystemId(),
|
||||||
|
exception.getLineNumber(),
|
||||||
|
exception.getColumnNumber())
|
||||||
|
else:
|
||||||
|
return "<unknown>"
|
||||||
|
|
||||||
|
# --- ErrorRaiser
|
||||||
|
|
||||||
|
class ErrorRaiser:
|
||||||
|
"A simple class that just raises the exceptions it is passed."
|
||||||
|
|
||||||
|
def __init__(self, level = 0):
|
||||||
|
self._level = level
|
||||||
|
|
||||||
|
def error(self, exception):
|
||||||
|
if self._level <= 1:
|
||||||
|
raise exception
|
||||||
|
|
||||||
|
def fatalError(self, exception):
|
||||||
|
if self._level <= 2:
|
||||||
|
raise exception
|
||||||
|
|
||||||
|
def warning(self, exception):
|
||||||
|
if self._level <= 0:
|
||||||
|
raise exception
|
||||||
|
|
||||||
|
# --- AttributesImpl now lives in xmlreader
|
||||||
|
from xmlreader import AttributesImpl
|
||||||
|
|
||||||
|
# --- XMLGenerator is the SAX2 ContentHandler for writing back XML
|
||||||
|
import codecs
|
||||||
|
|
||||||
|
def _outputwrapper(stream,encoding):
|
||||||
|
writerclass = codecs.lookup(encoding)[3]
|
||||||
|
return writerclass(stream)
|
||||||
|
|
||||||
|
if hasattr(codecs, "register_error"):
|
||||||
|
def writetext(stream, text, entities={}):
|
||||||
|
stream.errors = "xmlcharrefreplace"
|
||||||
|
stream.write(escape(text, entities))
|
||||||
|
stream.errors = "strict"
|
||||||
|
else:
|
||||||
|
def writetext(stream, text, entities={}):
|
||||||
|
text = escape(text, entities)
|
||||||
|
try:
|
||||||
|
stream.write(text)
|
||||||
|
except UnicodeError:
|
||||||
|
for c in text:
|
||||||
|
try:
|
||||||
|
stream.write(c)
|
||||||
|
except UnicodeError:
|
||||||
|
stream.write("&#%d;" % ord(c))
|
||||||
|
|
||||||
|
def writeattr(stream, text):
|
||||||
|
countdouble = text.count('"')
|
||||||
|
if countdouble:
|
||||||
|
countsingle = text.count("'")
|
||||||
|
if countdouble <= countsingle:
|
||||||
|
entities = {'"': """}
|
||||||
|
quote = '"'
|
||||||
|
else:
|
||||||
|
entities = {"'": "'"}
|
||||||
|
quote = "'"
|
||||||
|
else:
|
||||||
|
entities = {}
|
||||||
|
quote = '"'
|
||||||
|
stream.write(quote)
|
||||||
|
writetext(stream, text, entities)
|
||||||
|
stream.write(quote)
|
||||||
|
|
||||||
|
|
||||||
|
class XMLGenerator(handler.ContentHandler):
|
||||||
|
GENERATED_PREFIX = "xml.sax.saxutils.prefix%s"
|
||||||
|
|
||||||
|
def __init__(self, out=None, encoding="iso-8859-1"):
|
||||||
|
if out is None:
|
||||||
|
import sys
|
||||||
|
out = sys.stdout
|
||||||
|
handler.ContentHandler.__init__(self)
|
||||||
|
self._out = _outputwrapper(out,encoding)
|
||||||
|
self._ns_contexts = [{}] # contains uri -> prefix dicts
|
||||||
|
self._current_context = self._ns_contexts[-1]
|
||||||
|
self._undeclared_ns_maps = []
|
||||||
|
self._encoding = encoding
|
||||||
|
self._generated_prefix_ctr = 0
|
||||||
|
return
|
||||||
|
|
||||||
|
# ContentHandler methods
|
||||||
|
|
||||||
|
def startDocument(self):
|
||||||
|
self._out.write('<?xml version="1.0" encoding="%s"?>\n' %
|
||||||
|
self._encoding)
|
||||||
|
|
||||||
|
def startPrefixMapping(self, prefix, uri):
|
||||||
|
self._ns_contexts.append(self._current_context.copy())
|
||||||
|
self._current_context[uri] = prefix
|
||||||
|
self._undeclared_ns_maps.append((prefix, uri))
|
||||||
|
|
||||||
|
def endPrefixMapping(self, prefix):
|
||||||
|
self._current_context = self._ns_contexts[-1]
|
||||||
|
del self._ns_contexts[-1]
|
||||||
|
|
||||||
|
def startElement(self, name, attrs):
|
||||||
|
self._out.write('<' + name)
|
||||||
|
for (name, value) in attrs.items():
|
||||||
|
self._out.write(' %s=' % name)
|
||||||
|
writeattr(self._out, value)
|
||||||
|
self._out.write('>')
|
||||||
|
|
||||||
|
def endElement(self, name):
|
||||||
|
self._out.write('</%s>' % name)
|
||||||
|
|
||||||
|
def startElementNS(self, name, qname, attrs):
|
||||||
|
if name[0] is None:
|
||||||
|
name = name[1]
|
||||||
|
elif self._current_context[name[0]] is None:
|
||||||
|
# default namespace
|
||||||
|
name = name[1]
|
||||||
|
else:
|
||||||
|
name = self._current_context[name[0]] + ":" + name[1]
|
||||||
|
self._out.write('<' + name)
|
||||||
|
|
||||||
|
for k,v in self._undeclared_ns_maps:
|
||||||
|
if k is None:
|
||||||
|
self._out.write(' xmlns="%s"' % (v or ''))
|
||||||
|
else:
|
||||||
|
self._out.write(' xmlns:%s="%s"' % (k,v))
|
||||||
|
self._undeclared_ns_maps = []
|
||||||
|
|
||||||
|
for (name, value) in attrs.items():
|
||||||
|
if name[0] is None:
|
||||||
|
name = name[1]
|
||||||
|
elif self._current_context[name[0]] is None:
|
||||||
|
# default namespace
|
||||||
|
#If an attribute has a nsuri but not a prefix, we must
|
||||||
|
#create a prefix and add a nsdecl
|
||||||
|
prefix = self.GENERATED_PREFIX % self._generated_prefix_ctr
|
||||||
|
self._generated_prefix_ctr = self._generated_prefix_ctr + 1
|
||||||
|
name = prefix + ':' + name[1]
|
||||||
|
self._out.write(' xmlns:%s=%s' % (prefix, quoteattr(name[0])))
|
||||||
|
self._current_context[name[0]] = prefix
|
||||||
|
else:
|
||||||
|
name = self._current_context[name[0]] + ":" + name[1]
|
||||||
|
self._out.write(' %s=' % name)
|
||||||
|
writeattr(self._out, value)
|
||||||
|
self._out.write('>')
|
||||||
|
|
||||||
|
def endElementNS(self, name, qname):
|
||||||
|
# XXX: if qname is not None, we better use it.
|
||||||
|
# Python 2.0b2 requires us to use the recorded prefix for
|
||||||
|
# name[0], though
|
||||||
|
if name[0] is None:
|
||||||
|
qname = name[1]
|
||||||
|
elif self._current_context[name[0]] is None:
|
||||||
|
qname = name[1]
|
||||||
|
else:
|
||||||
|
qname = self._current_context[name[0]] + ":" + name[1]
|
||||||
|
self._out.write('</%s>' % qname)
|
||||||
|
|
||||||
|
def characters(self, content):
|
||||||
|
writetext(self._out, content)
|
||||||
|
|
||||||
|
def ignorableWhitespace(self, content):
|
||||||
|
self._out.write(content)
|
||||||
|
|
||||||
|
def processingInstruction(self, target, data):
|
||||||
|
self._out.write('<?%s %s?>' % (target, data))
|
||||||
|
|
||||||
|
|
||||||
|
class LexicalXMLGenerator(XMLGenerator, saxlib.LexicalHandler):
|
||||||
|
"""A XMLGenerator that also supports the LexicalHandler interface"""
|
||||||
|
|
||||||
|
def __init__(self, out=None, encoding="iso-8859-1"):
|
||||||
|
XMLGenerator.__init__(self, out, encoding)
|
||||||
|
self._in_cdata = 0
|
||||||
|
|
||||||
|
def characters(self, content):
|
||||||
|
if self._in_cdata:
|
||||||
|
self._out.write(content.replace(']]>', ']]>]]><![CDATA['))
|
||||||
|
else:
|
||||||
|
self._out.write(escape(content))
|
||||||
|
|
||||||
|
# LexicalHandler methods
|
||||||
|
# (we only support the most important ones and inherit the rest)
|
||||||
|
|
||||||
|
def startDTD(self, name, public_id, system_id):
|
||||||
|
self._out.write('<!DOCTYPE %s' % name)
|
||||||
|
if public_id:
|
||||||
|
self._out.write(' PUBLIC %s %s' % (
|
||||||
|
quoteattr(public_id or ""), quoteattr(system_id or "")
|
||||||
|
))
|
||||||
|
elif system_id:
|
||||||
|
self._out.write(' SYSTEM %s' % quoteattr(system_id or ""))
|
||||||
|
|
||||||
|
def endDTD(self):
|
||||||
|
self._out.write('>')
|
||||||
|
|
||||||
|
def comment(self, content):
|
||||||
|
self._out.write('<!--')
|
||||||
|
self._out.write(content)
|
||||||
|
self._out.write('-->')
|
||||||
|
|
||||||
|
def startCDATA(self):
|
||||||
|
self._in_cdata = 1
|
||||||
|
self._out.write('<![CDATA[')
|
||||||
|
|
||||||
|
def endCDATA(self):
|
||||||
|
self._in_cdata = 0
|
||||||
|
self._out.write(']]>')
|
||||||
|
|
||||||
|
|
||||||
|
# --- ContentGenerator is the SAX1 DocumentHandler for writing back XML
|
||||||
|
class ContentGenerator(XMLGenerator):
|
||||||
|
|
||||||
|
def characters(self, str, start, end):
|
||||||
|
# In SAX1, characters receives start and end; in SAX2, it receives
|
||||||
|
# a string. For plain strings, we may want to use a buffer object.
|
||||||
|
return XMLGenerator.characters(self, str[start:start+end])
|
||||||
|
|
||||||
|
# --- XMLFilterImpl
|
||||||
|
class XMLFilterBase(saxlib.XMLFilter):
|
||||||
|
"""This class is designed to sit between an XMLReader and the
|
||||||
|
client application's event handlers. By default, it does nothing
|
||||||
|
but pass requests up to the reader and events on to the handlers
|
||||||
|
unmodified, but subclasses can override specific methods to modify
|
||||||
|
the event stream or the configuration requests as they pass
|
||||||
|
through."""
|
||||||
|
|
||||||
|
# ErrorHandler methods
|
||||||
|
|
||||||
|
def error(self, exception):
|
||||||
|
self._err_handler.error(exception)
|
||||||
|
|
||||||
|
def fatalError(self, exception):
|
||||||
|
self._err_handler.fatalError(exception)
|
||||||
|
|
||||||
|
def warning(self, exception):
|
||||||
|
self._err_handler.warning(exception)
|
||||||
|
|
||||||
|
# ContentHandler methods
|
||||||
|
|
||||||
|
def setDocumentLocator(self, locator):
|
||||||
|
self._cont_handler.setDocumentLocator(locator)
|
||||||
|
|
||||||
|
def startDocument(self):
|
||||||
|
self._cont_handler.startDocument()
|
||||||
|
|
||||||
|
def endDocument(self):
|
||||||
|
self._cont_handler.endDocument()
|
||||||
|
|
||||||
|
def startPrefixMapping(self, prefix, uri):
|
||||||
|
self._cont_handler.startPrefixMapping(prefix, uri)
|
||||||
|
|
||||||
|
def endPrefixMapping(self, prefix):
|
||||||
|
self._cont_handler.endPrefixMapping(prefix)
|
||||||
|
|
||||||
|
def startElement(self, name, attrs):
|
||||||
|
self._cont_handler.startElement(name, attrs)
|
||||||
|
|
||||||
|
def endElement(self, name):
|
||||||
|
self._cont_handler.endElement(name)
|
||||||
|
|
||||||
|
def startElementNS(self, name, qname, attrs):
|
||||||
|
self._cont_handler.startElementNS(name, qname, attrs)
|
||||||
|
|
||||||
|
def endElementNS(self, name, qname):
|
||||||
|
self._cont_handler.endElementNS(name, qname)
|
||||||
|
|
||||||
|
def characters(self, content):
|
||||||
|
self._cont_handler.characters(content)
|
||||||
|
|
||||||
|
def ignorableWhitespace(self, chars):
|
||||||
|
self._cont_handler.ignorableWhitespace(chars)
|
||||||
|
|
||||||
|
def processingInstruction(self, target, data):
|
||||||
|
self._cont_handler.processingInstruction(target, data)
|
||||||
|
|
||||||
|
def skippedEntity(self, name):
|
||||||
|
self._cont_handler.skippedEntity(name)
|
||||||
|
|
||||||
|
# DTDHandler methods
|
||||||
|
|
||||||
|
def notationDecl(self, name, publicId, systemId):
|
||||||
|
self._dtd_handler.notationDecl(name, publicId, systemId)
|
||||||
|
|
||||||
|
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
|
||||||
|
self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
|
||||||
|
|
||||||
|
# EntityResolver methods
|
||||||
|
|
||||||
|
def resolveEntity(self, publicId, systemId):
|
||||||
|
return self._ent_handler.resolveEntity(publicId, systemId)
|
||||||
|
|
||||||
|
# XMLReader methods
|
||||||
|
|
||||||
|
def parse(self, source):
|
||||||
|
self._parent.setContentHandler(self)
|
||||||
|
self._parent.setErrorHandler(self)
|
||||||
|
self._parent.setEntityResolver(self)
|
||||||
|
self._parent.setDTDHandler(self)
|
||||||
|
self._parent.parse(source)
|
||||||
|
|
||||||
|
def setLocale(self, locale):
|
||||||
|
self._parent.setLocale(locale)
|
||||||
|
|
||||||
|
def getFeature(self, name):
|
||||||
|
return self._parent.getFeature(name)
|
||||||
|
|
||||||
|
def setFeature(self, name, state):
|
||||||
|
self._parent.setFeature(name, state)
|
||||||
|
|
||||||
|
def getProperty(self, name):
|
||||||
|
return self._parent.getProperty(name)
|
||||||
|
|
||||||
|
def setProperty(self, name, value):
|
||||||
|
self._parent.setProperty(name, value)
|
||||||
|
|
||||||
|
# FIXME: remove this backward compatibility hack when not needed anymore
|
||||||
|
XMLFilterImpl = XMLFilterBase
|
||||||
|
|
||||||
|
# --- BaseIncrementalParser
|
||||||
|
|
||||||
|
class BaseIncrementalParser(xmlreader.IncrementalParser):
|
||||||
|
"""This class implements the parse method of the XMLReader
|
||||||
|
interface using the feed, close and reset methods of the
|
||||||
|
IncrementalParser interface as a convenience to SAX 2.0 driver
|
||||||
|
writers."""
|
||||||
|
|
||||||
|
def parse(self, source):
|
||||||
|
source = prepare_input_source(source)
|
||||||
|
self.prepareParser(source)
|
||||||
|
|
||||||
|
self._cont_handler.startDocument()
|
||||||
|
|
||||||
|
# FIXME: what about char-stream?
|
||||||
|
inf = source.getByteStream()
|
||||||
|
buffer = inf.read(16384)
|
||||||
|
while buffer != "":
|
||||||
|
self.feed(buffer)
|
||||||
|
buffer = inf.read(16384)
|
||||||
|
|
||||||
|
self.close()
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
self._cont_handler.endDocument()
|
||||||
|
|
||||||
|
def prepareParser(self, source):
|
||||||
|
"""This method is called by the parse implementation to allow
|
||||||
|
the SAX 2.0 driver to prepare itself for parsing."""
|
||||||
|
raise NotImplementedError("prepareParser must be overridden!")
|
||||||
|
|
||||||
|
# --- Utility functions
|
||||||
|
|
||||||
|
def prepare_input_source(source, base = ""):
|
||||||
|
"""This function takes an InputSource and an optional base URL and
|
||||||
|
returns a fully resolved InputSource object ready for reading."""
|
||||||
|
|
||||||
|
if type(source) in _StringTypes:
|
||||||
|
source = xmlreader.InputSource(source)
|
||||||
|
elif hasattr(source, "read"):
|
||||||
|
f = source
|
||||||
|
source = xmlreader.InputSource()
|
||||||
|
source.setByteStream(f)
|
||||||
|
if hasattr(f, "name"):
|
||||||
|
source.setSystemId(absolute_system_id(f.name, base))
|
||||||
|
|
||||||
|
if source.getByteStream() is None:
|
||||||
|
sysid = absolute_system_id(source.getSystemId(), base)
|
||||||
|
source.setSystemId(sysid)
|
||||||
|
f = urllib2.urlopen(sysid)
|
||||||
|
source.setByteStream(f)
|
||||||
|
|
||||||
|
return source
|
||||||
|
|
||||||
|
|
||||||
|
def absolute_system_id(sysid, base=''):
|
||||||
|
if os.path.exists(sysid):
|
||||||
|
sysid = 'file:%s' % os.path.abspath(sysid)
|
||||||
|
elif base:
|
||||||
|
sysid = Absolutize(sysid, base)
|
||||||
|
assert IsAbsolute(sysid)
|
||||||
|
return MakeUrllibSafe(sysid)
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
#
|
||||||
|
# DEPRECATED SAX 1.0 CLASSES
|
||||||
|
#
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
# --- AttributeMap
|
||||||
|
|
||||||
|
class AttributeMap:
|
||||||
|
"""An implementation of AttributeList that takes an (attr,val) hash
|
||||||
|
and uses it to implement the AttributeList interface."""
|
||||||
|
|
||||||
|
def __init__(self, map):
|
||||||
|
self.map=map
|
||||||
|
|
||||||
|
def getLength(self):
|
||||||
|
return len(self.map.keys())
|
||||||
|
|
||||||
|
def getName(self, i):
|
||||||
|
try:
|
||||||
|
return self.map.keys()[i]
|
||||||
|
except IndexError,e:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def getType(self, i):
|
||||||
|
return "CDATA"
|
||||||
|
|
||||||
|
def getValue(self, i):
|
||||||
|
try:
|
||||||
|
if type(i)==types.IntType:
|
||||||
|
return self.map[self.getName(i)]
|
||||||
|
else:
|
||||||
|
return self.map[i]
|
||||||
|
except KeyError,e:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.map)
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
if type(key)==types.IntType:
|
||||||
|
return self.map.keys()[key]
|
||||||
|
else:
|
||||||
|
return self.map[key]
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
return self.map.items()
|
||||||
|
|
||||||
|
def keys(self):
|
||||||
|
return self.map.keys()
|
||||||
|
|
||||||
|
def has_key(self,key):
|
||||||
|
return self.map.has_key(key)
|
||||||
|
|
||||||
|
def get(self, key, alternative=None):
|
||||||
|
return self.map.get(key, alternative)
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
return AttributeMap(self.map.copy())
|
||||||
|
|
||||||
|
def values(self):
|
||||||
|
return self.map.values()
|
||||||
|
|
||||||
|
# --- Event broadcasting object
|
||||||
|
|
||||||
|
class EventBroadcaster:
|
||||||
|
"""Takes a list of objects and forwards any method calls received
|
||||||
|
to all objects in the list. The attribute list holds the list and
|
||||||
|
can freely be modified by clients."""
|
||||||
|
|
||||||
|
class Event:
|
||||||
|
"Helper objects that represent event methods."
|
||||||
|
|
||||||
|
def __init__(self,list,name):
|
||||||
|
self.list=list
|
||||||
|
self.name=name
|
||||||
|
|
||||||
|
def __call__(self,*rest):
|
||||||
|
for obj in self.list:
|
||||||
|
apply(getattr(obj,self.name), rest)
|
||||||
|
|
||||||
|
def __init__(self,list):
|
||||||
|
self.list=list
|
||||||
|
|
||||||
|
def __getattr__(self,name):
|
||||||
|
return self.Event(self.list,name)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "<EventBroadcaster instance at %d>" % id(self)
|
||||||
|
|
||||||
|
# --- ESIS document handler
|
||||||
|
import saxlib
|
||||||
|
class ESISDocHandler(saxlib.HandlerBase):
|
||||||
|
"A SAX document handler that produces naive ESIS output."
|
||||||
|
|
||||||
|
def __init__(self,writer=sys.stdout):
|
||||||
|
self.writer=writer
|
||||||
|
|
||||||
|
def processingInstruction (self,target, remainder):
|
||||||
|
"""Receive an event signalling that a processing instruction
|
||||||
|
has been found."""
|
||||||
|
self.writer.write("?"+target+" "+remainder+"\n")
|
||||||
|
|
||||||
|
def startElement(self,name,amap):
|
||||||
|
"Receive an event signalling the start of an element."
|
||||||
|
self.writer.write("("+name+"\n")
|
||||||
|
for a_name in amap.keys():
|
||||||
|
self.writer.write("A"+a_name+" "+amap[a_name]+"\n")
|
||||||
|
|
||||||
|
def endElement(self,name):
|
||||||
|
"Receive an event signalling the end of an element."
|
||||||
|
self.writer.write(")"+name+"\n")
|
||||||
|
|
||||||
|
def characters(self,data,start_ix,length):
|
||||||
|
"Receive an event signalling that character data has been found."
|
||||||
|
self.writer.write("-"+data[start_ix:start_ix+length]+"\n")
|
||||||
|
|
||||||
|
# --- XML canonizer
|
||||||
|
|
||||||
|
class Canonizer(saxlib.HandlerBase):
|
||||||
|
"A SAX document handler that produces canonized XML output."
|
||||||
|
|
||||||
|
def __init__(self,writer=sys.stdout):
|
||||||
|
self.elem_level=0
|
||||||
|
self.writer=writer
|
||||||
|
|
||||||
|
def processingInstruction (self,target, remainder):
|
||||||
|
if not target=="xml":
|
||||||
|
self.writer.write("<?"+target+" "+remainder+"?>")
|
||||||
|
|
||||||
|
def startElement(self,name,amap):
|
||||||
|
self.writer.write("<"+name)
|
||||||
|
|
||||||
|
a_names=amap.keys()
|
||||||
|
a_names.sort()
|
||||||
|
|
||||||
|
for a_name in a_names:
|
||||||
|
self.writer.write(" "+a_name+"=\"")
|
||||||
|
self.write_data(amap[a_name])
|
||||||
|
self.writer.write("\"")
|
||||||
|
self.writer.write(">")
|
||||||
|
self.elem_level=self.elem_level+1
|
||||||
|
|
||||||
|
def endElement(self,name):
|
||||||
|
self.writer.write("</"+name+">")
|
||||||
|
self.elem_level=self.elem_level-1
|
||||||
|
|
||||||
|
def ignorableWhitespace(self,data,start_ix,length):
|
||||||
|
self.characters(data,start_ix,length)
|
||||||
|
|
||||||
|
def characters(self,data,start_ix,length):
|
||||||
|
if self.elem_level>0:
|
||||||
|
self.write_data(data[start_ix:start_ix+length])
|
||||||
|
|
||||||
|
def write_data(self,data):
|
||||||
|
"Writes datachars to writer."
|
||||||
|
data=data.replace("&","&")
|
||||||
|
data=data.replace("<","<")
|
||||||
|
data=data.replace("\"",""")
|
||||||
|
data=data.replace(">",">")
|
||||||
|
data=data.replace(chr(9),"	")
|
||||||
|
data=data.replace(chr(10)," ")
|
||||||
|
data=data.replace(chr(13)," ")
|
||||||
|
self.writer.write(data)
|
||||||
|
|
||||||
|
# --- mllib
|
||||||
|
|
||||||
|
class mllib:
|
||||||
|
"""A re-implementation of the htmllib, sgmllib and xmllib interfaces as a
|
||||||
|
SAX DocumentHandler."""
|
||||||
|
|
||||||
|
# Unsupported:
|
||||||
|
# - setnomoretags
|
||||||
|
# - setliteral
|
||||||
|
# - translate_references
|
||||||
|
# - handle_xml
|
||||||
|
# - handle_doctype
|
||||||
|
# - handle_charref
|
||||||
|
# - handle_entityref
|
||||||
|
# - handle_comment
|
||||||
|
# - handle_cdata
|
||||||
|
# - tag_attributes
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
import saxexts # only used here
|
||||||
|
self.parser=saxexts.XMLParserFactory.make_parser()
|
||||||
|
self.handler=mllib.Handler(self.parser,self)
|
||||||
|
self.handler.reset()
|
||||||
|
|
||||||
|
def feed(self,data):
|
||||||
|
self.parser.feed(data)
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self.parser.close()
|
||||||
|
|
||||||
|
def get_stack(self):
|
||||||
|
return self.handler.get_stack()
|
||||||
|
|
||||||
|
# --- Handler methods (to be overridden)
|
||||||
|
|
||||||
|
def handle_starttag(self,name,method,atts):
|
||||||
|
method(atts)
|
||||||
|
|
||||||
|
def handle_endtag(self,name,method):
|
||||||
|
method()
|
||||||
|
|
||||||
|
def handle_data(self,data):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def handle_proc(self,target,data):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def unknown_starttag(self,name,atts):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def unknown_endtag(self,name):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def syntax_error(self,message):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# --- The internal handler class
|
||||||
|
|
||||||
|
class Handler(saxlib.DocumentHandler,saxlib.ErrorHandler):
|
||||||
|
"""An internal class to handle SAX events and translate them to mllib
|
||||||
|
events."""
|
||||||
|
|
||||||
|
def __init__(self,driver,handler):
|
||||||
|
self.driver=driver
|
||||||
|
self.driver.setDocumentHandler(self)
|
||||||
|
self.driver.setErrorHandler(self)
|
||||||
|
self.handler=handler
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def get_stack(self):
|
||||||
|
return self.stack
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.stack=[]
|
||||||
|
|
||||||
|
# --- DocumentHandler methods
|
||||||
|
|
||||||
|
def characters(self, ch, start, length):
|
||||||
|
self.handler.handle_data(ch[start:start+length])
|
||||||
|
|
||||||
|
def endElement(self, name):
|
||||||
|
if hasattr(self.handler,"end_"+name):
|
||||||
|
self.handler.handle_endtag(name,
|
||||||
|
getattr(self.handler,"end_"+name))
|
||||||
|
else:
|
||||||
|
self.handler.unknown_endtag(name)
|
||||||
|
|
||||||
|
del self.stack[-1]
|
||||||
|
|
||||||
|
def ignorableWhitespace(self, ch, start, length):
|
||||||
|
self.handler.handle_data(ch[start:start+length])
|
||||||
|
|
||||||
|
def processingInstruction(self, target, data):
|
||||||
|
self.handler.handle_proc(target,data)
|
||||||
|
|
||||||
|
def startElement(self, name, atts):
|
||||||
|
self.stack.append(name)
|
||||||
|
|
||||||
|
if hasattr(self.handler,"start_"+name):
|
||||||
|
self.handler.handle_starttag(name,
|
||||||
|
getattr(self.handler,
|
||||||
|
"start_"+name),
|
||||||
|
atts)
|
||||||
|
else:
|
||||||
|
self.handler.unknown_starttag(name,atts)
|
||||||
|
|
||||||
|
# --- ErrorHandler methods
|
||||||
|
|
||||||
|
def error(self, exception):
|
||||||
|
self.handler.syntax_error(str(exception))
|
||||||
|
|
||||||
|
def fatalError(self, exception):
|
||||||
|
raise RuntimeError(str(exception))
|
378
extensions/jython/module/MOD-INF/lib/jython/xml/sax/xmlreader.py
Normal file
378
extensions/jython/module/MOD-INF/lib/jython/xml/sax/xmlreader.py
Normal file
@ -0,0 +1,378 @@
|
|||||||
|
"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers
|
||||||
|
should be based on this code. """
|
||||||
|
|
||||||
|
import handler
|
||||||
|
|
||||||
|
from _exceptions import SAXNotSupportedException, SAXNotRecognizedException
|
||||||
|
|
||||||
|
|
||||||
|
# ===== XMLREADER =====
|
||||||
|
|
||||||
|
class XMLReader:
|
||||||
|
"""Interface for reading an XML document using callbacks.
|
||||||
|
|
||||||
|
XMLReader is the interface that an XML parser's SAX2 driver must
|
||||||
|
implement. This interface allows an application to set and query
|
||||||
|
features and properties in the parser, to register event handlers
|
||||||
|
for document processing, and to initiate a document parse.
|
||||||
|
|
||||||
|
All SAX interfaces are assumed to be synchronous: the parse
|
||||||
|
methods must not return until parsing is complete, and readers
|
||||||
|
must wait for an event-handler callback to return before reporting
|
||||||
|
the next event."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._cont_handler = handler.ContentHandler()
|
||||||
|
self._dtd_handler = handler.DTDHandler()
|
||||||
|
self._ent_handler = handler.EntityResolver()
|
||||||
|
self._err_handler = handler.ErrorHandler()
|
||||||
|
|
||||||
|
def parse(self, source):
|
||||||
|
"Parse an XML document from a system identifier or an InputSource."
|
||||||
|
raise NotImplementedError("This method must be implemented!")
|
||||||
|
|
||||||
|
def getContentHandler(self):
|
||||||
|
"Returns the current ContentHandler."
|
||||||
|
return self._cont_handler
|
||||||
|
|
||||||
|
def setContentHandler(self, handler):
|
||||||
|
"Registers a new object to receive document content events."
|
||||||
|
self._cont_handler = handler
|
||||||
|
|
||||||
|
def getDTDHandler(self):
|
||||||
|
"Returns the current DTD handler."
|
||||||
|
return self._dtd_handler
|
||||||
|
|
||||||
|
def setDTDHandler(self, handler):
|
||||||
|
"Register an object to receive basic DTD-related events."
|
||||||
|
self._dtd_handler = handler
|
||||||
|
|
||||||
|
def getEntityResolver(self):
|
||||||
|
"Returns the current EntityResolver."
|
||||||
|
return self._ent_handler
|
||||||
|
|
||||||
|
def setEntityResolver(self, resolver):
|
||||||
|
"Register an object to resolve external entities."
|
||||||
|
self._ent_handler = resolver
|
||||||
|
|
||||||
|
def getErrorHandler(self):
|
||||||
|
"Returns the current ErrorHandler."
|
||||||
|
return self._err_handler
|
||||||
|
|
||||||
|
def setErrorHandler(self, handler):
|
||||||
|
"Register an object to receive error-message events."
|
||||||
|
self._err_handler = handler
|
||||||
|
|
||||||
|
def setLocale(self, locale):
|
||||||
|
"""Allow an application to set the locale for errors and warnings.
|
||||||
|
|
||||||
|
SAX parsers are not required to provide localization for errors
|
||||||
|
and warnings; if they cannot support the requested locale,
|
||||||
|
however, they must throw a SAX exception. Applications may
|
||||||
|
request a locale change in the middle of a parse."""
|
||||||
|
raise SAXNotSupportedException("Locale support not implemented")
|
||||||
|
|
||||||
|
def getFeature(self, name):
|
||||||
|
"Looks up and returns the state of a SAX2 feature."
|
||||||
|
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
|
||||||
|
|
||||||
|
def setFeature(self, name, state):
|
||||||
|
"Sets the state of a SAX2 feature."
|
||||||
|
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
|
||||||
|
|
||||||
|
def getProperty(self, name):
|
||||||
|
"Looks up and returns the value of a SAX2 property."
|
||||||
|
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
|
||||||
|
|
||||||
|
def setProperty(self, name, value):
|
||||||
|
"Sets the value of a SAX2 property."
|
||||||
|
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
|
||||||
|
|
||||||
|
class IncrementalParser(XMLReader):
|
||||||
|
"""This interface adds three extra methods to the XMLReader
|
||||||
|
interface that allow XML parsers to support incremental
|
||||||
|
parsing. Support for this interface is optional, since not all
|
||||||
|
underlying XML parsers support this functionality.
|
||||||
|
|
||||||
|
When the parser is instantiated it is ready to begin accepting
|
||||||
|
data from the feed method immediately. After parsing has been
|
||||||
|
finished with a call to close the reset method must be called to
|
||||||
|
make the parser ready to accept new data, either from feed or
|
||||||
|
using the parse method.
|
||||||
|
|
||||||
|
Note that these methods must _not_ be called during parsing, that
|
||||||
|
is, after parse has been called and before it returns.
|
||||||
|
|
||||||
|
By default, the class also implements the parse method of the XMLReader
|
||||||
|
interface using the feed, close and reset methods of the
|
||||||
|
IncrementalParser interface as a convenience to SAX 2.0 driver
|
||||||
|
writers."""
|
||||||
|
|
||||||
|
def __init__(self, bufsize=2**16):
|
||||||
|
self._bufsize = bufsize
|
||||||
|
XMLReader.__init__(self)
|
||||||
|
|
||||||
|
def parse(self, source):
|
||||||
|
import saxutils
|
||||||
|
source = saxutils.prepare_input_source(source)
|
||||||
|
|
||||||
|
self.prepareParser(source)
|
||||||
|
file = source.getByteStream()
|
||||||
|
buffer = file.read(self._bufsize)
|
||||||
|
while buffer != "":
|
||||||
|
self.feed(buffer)
|
||||||
|
buffer = file.read(self._bufsize)
|
||||||
|
self.close()
|
||||||
|
|
||||||
|
def feed(self, data):
|
||||||
|
"""This method gives the raw XML data in the data parameter to
|
||||||
|
the parser and makes it parse the data, emitting the
|
||||||
|
corresponding events. It is allowed for XML constructs to be
|
||||||
|
split across several calls to feed.
|
||||||
|
|
||||||
|
feed may raise SAXException."""
|
||||||
|
raise NotImplementedError("This method must be implemented!")
|
||||||
|
|
||||||
|
def prepareParser(self, source):
|
||||||
|
"""This method is called by the parse implementation to allow
|
||||||
|
the SAX 2.0 driver to prepare itself for parsing."""
|
||||||
|
raise NotImplementedError("prepareParser must be overridden!")
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
"""This method is called when the entire XML document has been
|
||||||
|
passed to the parser through the feed method, to notify the
|
||||||
|
parser that there are no more data. This allows the parser to
|
||||||
|
do the final checks on the document and empty the internal
|
||||||
|
data buffer.
|
||||||
|
|
||||||
|
The parser will not be ready to parse another document until
|
||||||
|
the reset method has been called.
|
||||||
|
|
||||||
|
close may raise SAXException."""
|
||||||
|
raise NotImplementedError("This method must be implemented!")
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
"""This method is called after close has been called to reset
|
||||||
|
the parser so that it is ready to parse new documents. The
|
||||||
|
results of calling parse or feed after close without calling
|
||||||
|
reset are undefined."""
|
||||||
|
raise NotImplementedError("This method must be implemented!")
|
||||||
|
|
||||||
|
# ===== LOCATOR =====
|
||||||
|
|
||||||
|
class Locator:
|
||||||
|
"""Interface for associating a SAX event with a document
|
||||||
|
location. A locator object will return valid results only during
|
||||||
|
calls to DocumentHandler methods; at any other time, the
|
||||||
|
results are unpredictable."""
|
||||||
|
|
||||||
|
def getColumnNumber(self):
|
||||||
|
"Return the column number where the current event ends."
|
||||||
|
return -1
|
||||||
|
|
||||||
|
def getLineNumber(self):
|
||||||
|
"Return the line number where the current event ends."
|
||||||
|
return -1
|
||||||
|
|
||||||
|
def getPublicId(self):
|
||||||
|
"Return the public identifier for the current event."
|
||||||
|
return None
|
||||||
|
|
||||||
|
def getSystemId(self):
|
||||||
|
"Return the system identifier for the current event."
|
||||||
|
return None
|
||||||
|
|
||||||
|
# ===== INPUTSOURCE =====
|
||||||
|
|
||||||
|
class InputSource:
|
||||||
|
"""Encapsulation of the information needed by the XMLReader to
|
||||||
|
read entities.
|
||||||
|
|
||||||
|
This class may include information about the public identifier,
|
||||||
|
system identifier, byte stream (possibly with character encoding
|
||||||
|
information) and/or the character stream of an entity.
|
||||||
|
|
||||||
|
Applications will create objects of this class for use in the
|
||||||
|
XMLReader.parse method and for returning from
|
||||||
|
EntityResolver.resolveEntity.
|
||||||
|
|
||||||
|
An InputSource belongs to the application, the XMLReader is not
|
||||||
|
allowed to modify InputSource objects passed to it from the
|
||||||
|
application, although it may make copies and modify those."""
|
||||||
|
|
||||||
|
def __init__(self, system_id = None):
|
||||||
|
self.__system_id = system_id
|
||||||
|
self.__public_id = None
|
||||||
|
self.__encoding = None
|
||||||
|
self.__bytefile = None
|
||||||
|
self.__charfile = None
|
||||||
|
|
||||||
|
def setPublicId(self, public_id):
|
||||||
|
"Sets the public identifier of this InputSource."
|
||||||
|
self.__public_id = public_id
|
||||||
|
|
||||||
|
def getPublicId(self):
|
||||||
|
"Returns the public identifier of this InputSource."
|
||||||
|
return self.__public_id
|
||||||
|
|
||||||
|
def setSystemId(self, system_id):
|
||||||
|
"Sets the system identifier of this InputSource."
|
||||||
|
self.__system_id = system_id
|
||||||
|
|
||||||
|
def getSystemId(self):
|
||||||
|
"Returns the system identifier of this InputSource."
|
||||||
|
return self.__system_id
|
||||||
|
|
||||||
|
def setEncoding(self, encoding):
|
||||||
|
"""Sets the character encoding of this InputSource.
|
||||||
|
|
||||||
|
The encoding must be a string acceptable for an XML encoding
|
||||||
|
declaration (see section 4.3.3 of the XML recommendation).
|
||||||
|
|
||||||
|
The encoding attribute of the InputSource is ignored if the
|
||||||
|
InputSource also contains a character stream."""
|
||||||
|
self.__encoding = encoding
|
||||||
|
|
||||||
|
def getEncoding(self):
|
||||||
|
"Get the character encoding of this InputSource."
|
||||||
|
return self.__encoding
|
||||||
|
|
||||||
|
def setByteStream(self, bytefile):
|
||||||
|
"""Set the byte stream (a Python file-like object which does
|
||||||
|
not perform byte-to-character conversion) for this input
|
||||||
|
source.
|
||||||
|
|
||||||
|
The SAX parser will ignore this if there is also a character
|
||||||
|
stream specified, but it will use a byte stream in preference
|
||||||
|
to opening a URI connection itself.
|
||||||
|
|
||||||
|
If the application knows the character encoding of the byte
|
||||||
|
stream, it should set it with the setEncoding method."""
|
||||||
|
self.__bytefile = bytefile
|
||||||
|
|
||||||
|
def getByteStream(self):
|
||||||
|
"""Get the byte stream for this input source.
|
||||||
|
|
||||||
|
The getEncoding method will return the character encoding for
|
||||||
|
this byte stream, or None if unknown."""
|
||||||
|
return self.__bytefile
|
||||||
|
|
||||||
|
def setCharacterStream(self, charfile):
|
||||||
|
"""Set the character stream for this input source. (The stream
|
||||||
|
must be a Python 2.0 Unicode-wrapped file-like that performs
|
||||||
|
conversion to Unicode strings.)
|
||||||
|
|
||||||
|
If there is a character stream specified, the SAX parser will
|
||||||
|
ignore any byte stream and will not attempt to open a URI
|
||||||
|
connection to the system identifier."""
|
||||||
|
self.__charfile = charfile
|
||||||
|
|
||||||
|
def getCharacterStream(self):
|
||||||
|
"Get the character stream for this input source."
|
||||||
|
return self.__charfile
|
||||||
|
|
||||||
|
# ===== ATTRIBUTESIMPL =====
|
||||||
|
|
||||||
|
class AttributesImpl:
|
||||||
|
|
||||||
|
def __init__(self, attrs):
|
||||||
|
"""Non-NS-aware implementation.
|
||||||
|
|
||||||
|
attrs should be of the form {name : value}."""
|
||||||
|
self._attrs = attrs
|
||||||
|
|
||||||
|
def getLength(self):
|
||||||
|
return len(self._attrs)
|
||||||
|
|
||||||
|
def getType(self, name):
|
||||||
|
return "CDATA"
|
||||||
|
|
||||||
|
def getValue(self, name):
|
||||||
|
return self._attrs[name]
|
||||||
|
|
||||||
|
def getValueByQName(self, name):
|
||||||
|
return self._attrs[name]
|
||||||
|
|
||||||
|
def getNameByQName(self, name):
|
||||||
|
if not self._attrs.has_key(name):
|
||||||
|
raise KeyError, name
|
||||||
|
return name
|
||||||
|
|
||||||
|
def getQNameByName(self, name):
|
||||||
|
if not self._attrs.has_key(name):
|
||||||
|
raise KeyError, name
|
||||||
|
return name
|
||||||
|
|
||||||
|
def getNames(self):
|
||||||
|
return self._attrs.keys()
|
||||||
|
|
||||||
|
def getQNames(self):
|
||||||
|
return self._attrs.keys()
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self._attrs)
|
||||||
|
|
||||||
|
def __getitem__(self, name):
|
||||||
|
return self._attrs[name]
|
||||||
|
|
||||||
|
def keys(self):
|
||||||
|
return self._attrs.keys()
|
||||||
|
|
||||||
|
def has_key(self, name):
|
||||||
|
return self._attrs.has_key(name)
|
||||||
|
|
||||||
|
def get(self, name, alternative=None):
|
||||||
|
return self._attrs.get(name, alternative)
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
return self.__class__(self._attrs)
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
return self._attrs.items()
|
||||||
|
|
||||||
|
def values(self):
|
||||||
|
return self._attrs.values()
|
||||||
|
|
||||||
|
# ===== ATTRIBUTESNSIMPL =====
|
||||||
|
|
||||||
|
class AttributesNSImpl(AttributesImpl):
|
||||||
|
|
||||||
|
def __init__(self, attrs, qnames):
|
||||||
|
"""NS-aware implementation.
|
||||||
|
|
||||||
|
attrs should be of the form {(ns_uri, lname): value, ...}.
|
||||||
|
qnames of the form {(ns_uri, lname): qname, ...}."""
|
||||||
|
self._attrs = attrs
|
||||||
|
self._qnames = qnames
|
||||||
|
|
||||||
|
def getValueByQName(self, name):
|
||||||
|
for (nsname, qname) in self._qnames.items():
|
||||||
|
if qname == name:
|
||||||
|
return self._attrs[nsname]
|
||||||
|
|
||||||
|
raise KeyError, name
|
||||||
|
|
||||||
|
def getNameByQName(self, name):
|
||||||
|
for (nsname, qname) in self._qnames.items():
|
||||||
|
if qname == name:
|
||||||
|
return nsname
|
||||||
|
|
||||||
|
raise KeyError, name
|
||||||
|
|
||||||
|
def getQNameByName(self, name):
|
||||||
|
return self._qnames[name]
|
||||||
|
|
||||||
|
def getQNames(self):
|
||||||
|
return self._qnames.values()
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
return self.__class__(self._attrs, self._qnames)
|
||||||
|
|
||||||
|
|
||||||
|
def _test():
|
||||||
|
XMLReader()
|
||||||
|
IncrementalParser()
|
||||||
|
Locator()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
_test()
|
Loading…
Reference in New Issue
Block a user