Yet a lot more python files that somehow svn just refused to add
git-svn-id: http://google-refine.googlecode.com/svn/trunk@962 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
parent
7767536292
commit
3f58d88922
380
extensions/jython/module/MOD-INF/lib/jython/xml/Uri.py
Normal file
380
extensions/jython/module/MOD-INF/lib/jython/xml/Uri.py
Normal file
@ -0,0 +1,380 @@
|
||||
# pylint: disable-msg=C0103
|
||||
#
|
||||
# backported code from 4Suite with slight modifications, started from r1.89 of
|
||||
# Ft/Lib/Uri.py, by syt@logilab.fr on 2005-02-09
|
||||
#
|
||||
# part if not all of this code should probably move to urlparse (or be used
|
||||
# to fix some existant functions in this module)
|
||||
#
|
||||
#
|
||||
# Copyright 2004 Fourthought, Inc. (USA).
|
||||
# Detailed license and copyright information: http://4suite.org/COPYRIGHT
|
||||
# Project home, documentation, distributions: http://4suite.org/
|
||||
import os.path
|
||||
import sys
|
||||
import re
|
||||
import urlparse, urllib, urllib2
|
||||
|
||||
def UnsplitUriRef(uriRefSeq):
|
||||
"""should replace urlparse.urlunsplit
|
||||
|
||||
Given a sequence as would be produced by SplitUriRef(), assembles and
|
||||
returns a URI reference as a string.
|
||||
"""
|
||||
if not isinstance(uriRefSeq, (tuple, list)):
|
||||
raise TypeError("sequence expected, got %s" % type(uriRefSeq))
|
||||
(scheme, authority, path, query, fragment) = uriRefSeq
|
||||
uri = ''
|
||||
if scheme is not None:
|
||||
uri += scheme + ':'
|
||||
if authority is not None:
|
||||
uri += '//' + authority
|
||||
uri += path
|
||||
if query is not None:
|
||||
uri += '?' + query
|
||||
if fragment is not None:
|
||||
uri += '#' + fragment
|
||||
return uri
|
||||
|
||||
SPLIT_URI_REF_PATTERN = re.compile(r"^(?:(?P<scheme>[^:/?#]+):)?(?://(?P<authority>[^/?#]*))?(?P<path>[^?#]*)(?:\?(?P<query>[^#]*))?(?:#(?P<fragment>.*))?$")
|
||||
|
||||
def SplitUriRef(uriref):
|
||||
"""should replace urlparse.urlsplit
|
||||
|
||||
Given a valid URI reference as a string, returns a tuple representing the
|
||||
generic URI components, as per RFC 2396 appendix B. The tuple's structure
|
||||
is (scheme, authority, path, query, fragment).
|
||||
|
||||
All values will be strings (possibly empty) or None if undefined.
|
||||
|
||||
Note that per rfc3986, there is no distinction between a path and
|
||||
an "opaque part", as there was in RFC 2396.
|
||||
"""
|
||||
# the pattern will match every possible string, so it's safe to
|
||||
# assume there's a groupdict method to call.
|
||||
g = SPLIT_URI_REF_PATTERN.match(uriref).groupdict()
|
||||
scheme = g['scheme']
|
||||
authority = g['authority']
|
||||
path = g['path']
|
||||
query = g['query']
|
||||
fragment = g['fragment']
|
||||
return (scheme, authority, path, query, fragment)
|
||||
|
||||
|
||||
def Absolutize(uriRef, baseUri):
|
||||
"""
|
||||
Resolves a URI reference to absolute form, effecting the result of RFC
|
||||
3986 section 5. The URI reference is considered to be relative to the
|
||||
given base URI.
|
||||
|
||||
It is the caller's responsibility to ensure that the base URI matches
|
||||
the absolute-URI syntax rule of RFC 3986, and that its path component
|
||||
does not contain '.' or '..' segments if the scheme is hierarchical.
|
||||
Unexpected results may occur otherwise.
|
||||
|
||||
This function only conducts a minimal sanity check in order to determine
|
||||
if relative resolution is possible: it raises a UriException if the base
|
||||
URI does not have a scheme component. While it is true that the base URI
|
||||
is irrelevant if the URI reference has a scheme, an exception is raised
|
||||
in order to signal that the given string does not even come close to
|
||||
meeting the criteria to be usable as a base URI.
|
||||
|
||||
It is the caller's responsibility to make a determination of whether the
|
||||
URI reference constitutes a "same-document reference", as defined in RFC
|
||||
2396 or RFC 3986. As per the spec, dereferencing a same-document
|
||||
reference "should not" involve retrieval of a new representation of the
|
||||
referenced resource. Note that the two specs have different definitions
|
||||
of same-document reference: RFC 2396 says it is *only* the cases where the
|
||||
reference is the empty string, or "#" followed by a fragment; RFC 3986
|
||||
requires making a comparison of the base URI to the absolute form of the
|
||||
reference (as is returned by the spec), minus its fragment component,
|
||||
if any.
|
||||
|
||||
This function is similar to urlparse.urljoin() and urllib.basejoin().
|
||||
Those functions, however, are (as of Python 2.3) outdated, buggy, and/or
|
||||
designed to produce results acceptable for use with other core Python
|
||||
libraries, rather than being earnest implementations of the relevant
|
||||
specs. Their problems are most noticeable in their handling of
|
||||
same-document references and 'file:' URIs, both being situations that
|
||||
come up far too often to consider the functions reliable enough for
|
||||
general use.
|
||||
"""
|
||||
# Reasons to avoid using urllib.basejoin() and urlparse.urljoin():
|
||||
# - Both are partial implementations of long-obsolete specs.
|
||||
# - Both accept relative URLs as the base, which no spec allows.
|
||||
# - urllib.basejoin() mishandles the '' and '..' references.
|
||||
# - If the base URL uses a non-hierarchical or relative path,
|
||||
# or if the URL scheme is unrecognized, the result is not
|
||||
# always as expected (partly due to issues in RFC 1808).
|
||||
# - If the authority component of a 'file' URI is empty,
|
||||
# the authority component is removed altogether. If it was
|
||||
# not present, an empty authority component is in the result.
|
||||
# - '.' and '..' segments are not always collapsed as well as they
|
||||
# should be (partly due to issues in RFC 1808).
|
||||
# - Effective Python 2.4, urllib.basejoin() *is* urlparse.urljoin(),
|
||||
# but urlparse.urljoin() is still based on RFC 1808.
|
||||
|
||||
# This procedure is based on the pseudocode in RFC 3986 sec. 5.2.
|
||||
#
|
||||
# ensure base URI is absolute
|
||||
if not baseUri:
|
||||
raise ValueError('baseUri is required and must be a non empty string')
|
||||
if not IsAbsolute(baseUri):
|
||||
raise ValueError('%r is not an absolute URI' % baseUri)
|
||||
# shortcut for the simplest same-document reference cases
|
||||
if uriRef == '' or uriRef[0] == '#':
|
||||
return baseUri.split('#')[0] + uriRef
|
||||
# ensure a clean slate
|
||||
tScheme = tAuth = tPath = tQuery = None
|
||||
# parse the reference into its components
|
||||
(rScheme, rAuth, rPath, rQuery, rFrag) = SplitUriRef(uriRef)
|
||||
# if the reference is absolute, eliminate '.' and '..' path segments
|
||||
# and skip to the end
|
||||
if rScheme is not None:
|
||||
tScheme = rScheme
|
||||
tAuth = rAuth
|
||||
tPath = RemoveDotSegments(rPath)
|
||||
tQuery = rQuery
|
||||
else:
|
||||
# the base URI's scheme, and possibly more, will be inherited
|
||||
(bScheme, bAuth, bPath, bQuery, bFrag) = SplitUriRef(baseUri)
|
||||
# if the reference is a net-path, just eliminate '.' and '..' path
|
||||
# segments; no other changes needed.
|
||||
if rAuth is not None:
|
||||
tAuth = rAuth
|
||||
tPath = RemoveDotSegments(rPath)
|
||||
tQuery = rQuery
|
||||
# if it's not a net-path, we need to inherit pieces of the base URI
|
||||
else:
|
||||
# use base URI's path if the reference's path is empty
|
||||
if not rPath:
|
||||
tPath = bPath
|
||||
# use the reference's query, if any, or else the base URI's,
|
||||
tQuery = rQuery is not None and rQuery or bQuery
|
||||
# the reference's path is not empty
|
||||
else:
|
||||
# just use the reference's path if it's absolute
|
||||
if rPath[0] == '/':
|
||||
tPath = RemoveDotSegments(rPath)
|
||||
# merge the reference's relative path with the base URI's path
|
||||
else:
|
||||
if bAuth is not None and not bPath:
|
||||
tPath = '/' + rPath
|
||||
else:
|
||||
tPath = bPath[:bPath.rfind('/')+1] + rPath
|
||||
tPath = RemoveDotSegments(tPath)
|
||||
# use the reference's query
|
||||
tQuery = rQuery
|
||||
# since the reference isn't a net-path,
|
||||
# use the authority from the base URI
|
||||
tAuth = bAuth
|
||||
# inherit the scheme from the base URI
|
||||
tScheme = bScheme
|
||||
# always use the reference's fragment (but no need to define another var)
|
||||
#tFrag = rFrag
|
||||
|
||||
# now compose the target URI (RFC 3986 sec. 5.3)
|
||||
return UnsplitUriRef((tScheme, tAuth, tPath, tQuery, rFrag))
|
||||
|
||||
|
||||
REG_NAME_HOST_PATTERN = re.compile(r"^(?:(?:[0-9A-Za-z\-_\.!~*'();&=+$,]|(?:%[0-9A-Fa-f]{2}))*)$")
|
||||
|
||||
def MakeUrllibSafe(uriRef):
|
||||
"""
|
||||
Makes the given RFC 3986-conformant URI reference safe for passing
|
||||
to legacy urllib functions. The result may not be a valid URI.
|
||||
|
||||
As of Python 2.3.3, urllib.urlopen() does not fully support
|
||||
internationalized domain names, it does not strip fragment components,
|
||||
and on Windows, it expects file URIs to use '|' instead of ':' in the
|
||||
path component corresponding to the drivespec. It also relies on
|
||||
urllib.unquote(), which mishandles unicode arguments. This function
|
||||
produces a URI reference that will work around these issues, although
|
||||
the IDN workaround is limited to Python 2.3 only. May raise a
|
||||
UnicodeEncodeError if the URI reference is Unicode and erroneously
|
||||
contains non-ASCII characters.
|
||||
"""
|
||||
# IDN support requires decoding any percent-encoded octets in the
|
||||
# host part (if it's a reg-name) of the authority component, and when
|
||||
# doing DNS lookups, applying IDNA encoding to that string first.
|
||||
# As of Python 2.3, there is an IDNA codec, and the socket and httplib
|
||||
# modules accept Unicode strings and apply IDNA encoding automatically
|
||||
# where necessary. However, urllib.urlopen() has not yet been updated
|
||||
# to do the same; it raises an exception if you give it a Unicode
|
||||
# string, and does no conversion on non-Unicode strings, meaning you
|
||||
# have to give it an IDNA string yourself. We will only support it on
|
||||
# Python 2.3 and up.
|
||||
#
|
||||
# see if host is a reg-name, as opposed to IPv4 or IPv6 addr.
|
||||
if isinstance(uriRef, unicode):
|
||||
try:
|
||||
uriRef = uriRef.encode('us-ascii') # parts of urllib are not unicode safe
|
||||
except UnicodeError:
|
||||
raise ValueError("uri %r must consist of ASCII characters." % uriRef)
|
||||
(scheme, auth, path, query, frag) = urlparse.urlsplit(uriRef)
|
||||
if auth and auth.find('@') > -1:
|
||||
userinfo, hostport = auth.split('@')
|
||||
else:
|
||||
userinfo = None
|
||||
hostport = auth
|
||||
if hostport and hostport.find(':') > -1:
|
||||
host, port = hostport.split(':')
|
||||
else:
|
||||
host = hostport
|
||||
port = None
|
||||
if host and REG_NAME_HOST_PATTERN.match(host):
|
||||
# percent-encoded hostnames will always fail DNS lookups
|
||||
host = urllib.unquote(host) #PercentDecode(host)
|
||||
# IDNA-encode if possible.
|
||||
# We shouldn't do this for schemes that don't need DNS lookup,
|
||||
# but are there any (that you'd be calling urlopen for)?
|
||||
if sys.version_info[0:2] >= (2, 3):
|
||||
if isinstance(host, str):
|
||||
host = host.decode('utf-8')
|
||||
host = host.encode('idna')
|
||||
# reassemble the authority with the new hostname
|
||||
# (percent-decoded, and possibly IDNA-encoded)
|
||||
auth = ''
|
||||
if userinfo:
|
||||
auth += userinfo + '@'
|
||||
auth += host
|
||||
if port:
|
||||
auth += ':' + port
|
||||
|
||||
# On Windows, ensure that '|', not ':', is used in a drivespec.
|
||||
if os.name == 'nt' and scheme == 'file':
|
||||
path = path.replace(':', '|', 1)
|
||||
|
||||
# Note that we drop fragment, if any. See RFC 3986 sec. 3.5.
|
||||
uri = urlparse.urlunsplit((scheme, auth, path, query, None))
|
||||
|
||||
return uri
|
||||
|
||||
|
||||
|
||||
def BaseJoin(base, uriRef):
|
||||
"""
|
||||
Merges a base URI reference with another URI reference, returning a
|
||||
new URI reference.
|
||||
|
||||
It behaves exactly the same as Absolutize(), except the arguments
|
||||
are reversed, and it accepts any URI reference (even a relative URI)
|
||||
as the base URI. If the base has no scheme component, it is
|
||||
evaluated as if it did, and then the scheme component of the result
|
||||
is removed from the result, unless the uriRef had a scheme. Thus, if
|
||||
neither argument has a scheme component, the result won't have one.
|
||||
|
||||
This function is named BaseJoin because it is very much like
|
||||
urllib.basejoin(), but it follows the current rfc3986 algorithms
|
||||
for path merging, dot segment elimination, and inheritance of query
|
||||
and fragment components.
|
||||
|
||||
WARNING: This function exists for 2 reasons: (1) because of a need
|
||||
within the 4Suite repository to perform URI reference absolutization
|
||||
using base URIs that are stored (inappropriately) as absolute paths
|
||||
in the subjects of statements in the RDF model, and (2) because of
|
||||
a similar need to interpret relative repo paths in a 4Suite product
|
||||
setup.xml file as being relative to a path that can be set outside
|
||||
the document. When these needs go away, this function probably will,
|
||||
too, so it is not advisable to use it.
|
||||
"""
|
||||
if IsAbsolute(base):
|
||||
return Absolutize(uriRef, base)
|
||||
else:
|
||||
dummyscheme = 'basejoin'
|
||||
res = Absolutize(uriRef, '%s:%s' % (dummyscheme, base))
|
||||
if IsAbsolute(uriRef):
|
||||
# scheme will be inherited from uriRef
|
||||
return res
|
||||
else:
|
||||
# no scheme in, no scheme out
|
||||
return res[len(dummyscheme)+1:]
|
||||
|
||||
|
||||
def RemoveDotSegments(path):
|
||||
"""
|
||||
Supports Absolutize() by implementing the remove_dot_segments function
|
||||
described in RFC 3986 sec. 5.2. It collapses most of the '.' and '..'
|
||||
segments out of a path without eliminating empty segments. It is intended
|
||||
to be used during the path merging process and may not give expected
|
||||
results when used independently. Use NormalizePathSegments() or
|
||||
NormalizePathSegmentsInUri() if more general normalization is desired.
|
||||
|
||||
semi-private because it is not for general use. I've implemented it
|
||||
using two segment stacks, as alluded to in the spec, rather than the
|
||||
explicit string-walking algorithm that would be too inefficient. (mbrown)
|
||||
"""
|
||||
# return empty string if entire path is just "." or ".."
|
||||
if path == '.' or path == '..':
|
||||
return path[0:0] # preserves string type
|
||||
# remove all "./" or "../" segments at the beginning
|
||||
while path:
|
||||
if path[:2] == './':
|
||||
path = path[2:]
|
||||
elif path[:3] == '../':
|
||||
path = path[3:]
|
||||
else:
|
||||
break
|
||||
# We need to keep track of whether there was a leading slash,
|
||||
# because we're going to drop it in order to prevent our list of
|
||||
# segments from having an ambiguous empty first item when we call
|
||||
# split().
|
||||
leading_slash = 0
|
||||
if path[:1] == '/':
|
||||
path = path[1:]
|
||||
leading_slash = 1
|
||||
# replace a trailing "/." with just "/"
|
||||
if path[-2:] == '/.':
|
||||
path = path[:-1]
|
||||
# convert the segments into a list and process each segment in
|
||||
# order from left to right.
|
||||
segments = path.split('/')
|
||||
keepers = []
|
||||
segments.reverse()
|
||||
while segments:
|
||||
seg = segments.pop()
|
||||
# '..' means drop the previous kept segment, if any.
|
||||
# If none, and if the path is relative, then keep the '..'.
|
||||
# If the '..' was the last segment, ensure
|
||||
# that the result ends with '/'.
|
||||
if seg == '..':
|
||||
if keepers:
|
||||
keepers.pop()
|
||||
elif not leading_slash:
|
||||
keepers.append(seg)
|
||||
if not segments:
|
||||
keepers.append('')
|
||||
# ignore '.' segments and keep all others, even empty ones
|
||||
elif seg != '.':
|
||||
keepers.append(seg)
|
||||
# reassemble the kept segments
|
||||
return leading_slash * '/' + '/'.join(keepers)
|
||||
|
||||
|
||||
SCHEME_PATTERN = re.compile(r'([a-zA-Z][a-zA-Z0-9+\-.]*):')
|
||||
def GetScheme(uriRef):
|
||||
"""
|
||||
Obtains, with optimum efficiency, just the scheme from a URI reference.
|
||||
Returns a string, or if no scheme could be found, returns None.
|
||||
"""
|
||||
# Using a regex seems to be the best option. Called 50,000 times on
|
||||
# different URIs, on a 1.0-GHz PIII with FreeBSD 4.7 and Python
|
||||
# 2.2.1, this method completed in 0.95s, and 0.05s if there was no
|
||||
# scheme to find. By comparison,
|
||||
# urllib.splittype()[0] took 1.5s always;
|
||||
# Ft.Lib.Uri.SplitUriRef()[0] took 2.5s always;
|
||||
# urlparse.urlparse()[0] took 3.5s always.
|
||||
m = SCHEME_PATTERN.match(uriRef)
|
||||
if m is None:
|
||||
return None
|
||||
else:
|
||||
return m.group(1)
|
||||
|
||||
|
||||
def IsAbsolute(identifier):
|
||||
"""
|
||||
Given a string believed to be a URI or URI reference, tests that it is
|
||||
absolute (as per RFC 2396), not relative -- i.e., that it has a scheme.
|
||||
"""
|
||||
# We do it this way to avoid compiling another massive regex.
|
||||
return GetScheme(identifier) is not None
|
@ -0,0 +1 @@
|
||||
"Directory for SAX version 2 drivers."
|
@ -0,0 +1,333 @@
|
||||
"""
|
||||
SAX driver for the Java SAX parsers. Can only be used in Jython.
|
||||
|
||||
$Id: drv_javasax.py,v 1.5 2003/01/26 09:08:51 loewis Exp $
|
||||
"""
|
||||
|
||||
# --- Initialization
|
||||
|
||||
version = "0.10"
|
||||
revision = "$Revision: 1.5 $"
|
||||
|
||||
import string
|
||||
from xml.sax import xmlreader, saxutils
|
||||
from xml.sax.handler import feature_namespaces, feature_namespace_prefixes
|
||||
from xml.sax import _exceptions
|
||||
|
||||
# we only work in jython
|
||||
import sys
|
||||
if sys.platform[:4] != "java":
|
||||
raise _exceptions.SAXReaderNotAvailable("drv_javasax not available in CPython", None)
|
||||
del sys
|
||||
|
||||
# get the necessary Java SAX classes
|
||||
try:
|
||||
from org.python.core import FilelikeInputStream
|
||||
from org.xml.sax.helpers import XMLReaderFactory
|
||||
from org.xml import sax as javasax
|
||||
except ImportError:
|
||||
raise _exceptions.SAXReaderNotAvailable("SAX is not on the classpath", None)
|
||||
|
||||
# get some JAXP stuff
|
||||
try:
|
||||
from javax.xml.parsers import SAXParserFactory, ParserConfigurationException
|
||||
factory = SAXParserFactory.newInstance()
|
||||
jaxp = 1
|
||||
except ImportError:
|
||||
jaxp = 0
|
||||
|
||||
from java.lang import String
|
||||
|
||||
|
||||
def _wrap_sax_exception(e):
|
||||
return _exceptions.SAXParseException(e.message,
|
||||
e.exception,
|
||||
SimpleLocator(e.columnNumber,
|
||||
e.lineNumber,
|
||||
e.publicId,
|
||||
e.systemId))
|
||||
|
||||
class JyErrorHandlerWrapper(javasax.ErrorHandler):
|
||||
def __init__(self, err_handler):
|
||||
self._err_handler = err_handler
|
||||
|
||||
def error(self, exc):
|
||||
self._err_handler.error(_wrap_sax_exception(exc))
|
||||
|
||||
def fatalError(self, exc):
|
||||
self._err_handler.fatalError(_wrap_sax_exception(exc))
|
||||
|
||||
def warning(self, exc):
|
||||
self._err_handler.warning(_wrap_sax_exception(exc))
|
||||
|
||||
class JyInputSourceWrapper(javasax.InputSource):
|
||||
def __init__(self, source):
|
||||
if isinstance(source, str):
|
||||
javasax.InputSource.__init__(self, source)
|
||||
elif hasattr(source, "read"):#file like object
|
||||
f = source
|
||||
javasax.InputSource.__init__(self, FilelikeInputStream(f))
|
||||
if hasattr(f, "name"):
|
||||
self.setSystemId(f.name)
|
||||
else:#xml.sax.xmlreader.InputSource object
|
||||
#Use byte stream constructor if possible so that Xerces won't attempt to open
|
||||
#the url at systemId unless it's really there
|
||||
if source.getByteStream():
|
||||
javasax.InputSource.__init__(self,
|
||||
FilelikeInputStream(source.getByteStream()))
|
||||
else:
|
||||
javasax.InputSource.__init__(self)
|
||||
if source.getSystemId():
|
||||
self.setSystemId(source.getSystemId())
|
||||
self.setPublicId(source.getPublicId())
|
||||
self.setEncoding(source.getEncoding())
|
||||
|
||||
class JyEntityResolverWrapper(javasax.EntityResolver):
|
||||
def __init__(self, entityResolver):
|
||||
self._resolver = entityResolver
|
||||
|
||||
def resolveEntity(self, pubId, sysId):
|
||||
return JyInputSourceWrapper(self._resolver.resolveEntity(pubId, sysId))
|
||||
|
||||
class JyDTDHandlerWrapper(javasax.DTDHandler):
|
||||
def __init__(self, dtdHandler):
|
||||
self._handler = dtdHandler
|
||||
|
||||
def notationDecl(self, name, publicId, systemId):
|
||||
self._handler.notationDecl(name, publicId, systemId)
|
||||
|
||||
def unparsedEntityDecl(self, name, publicId, systemId, notationName):
|
||||
self._handler.unparsedEntityDecl(name, publicId, systemId, notationName)
|
||||
|
||||
class SimpleLocator(xmlreader.Locator):
|
||||
def __init__(self, colNum, lineNum, pubId, sysId):
|
||||
self.colNum = colNum
|
||||
self.lineNum = lineNum
|
||||
self.pubId = pubId
|
||||
self.sysId = sysId
|
||||
|
||||
def getColumnNumber(self):
|
||||
return self.colNum
|
||||
|
||||
def getLineNumber(self):
|
||||
return self.lineNum
|
||||
|
||||
def getPublicId(self):
|
||||
return self.pubId
|
||||
|
||||
def getSystemId(self):
|
||||
return self.sysId
|
||||
|
||||
# --- JavaSAXParser
|
||||
class JavaSAXParser(xmlreader.XMLReader, javasax.ContentHandler):
|
||||
"SAX driver for the Java SAX parsers."
|
||||
|
||||
def __init__(self, jdriver = None):
|
||||
xmlreader.XMLReader.__init__(self)
|
||||
self._parser = create_java_parser(jdriver)
|
||||
self._parser.setFeature(feature_namespaces, 0)
|
||||
self._parser.setFeature(feature_namespace_prefixes, 0)
|
||||
self._parser.setContentHandler(self)
|
||||
self._nsattrs = AttributesNSImpl()
|
||||
self._attrs = AttributesImpl()
|
||||
self.setEntityResolver(self.getEntityResolver())
|
||||
self.setErrorHandler(self.getErrorHandler())
|
||||
self.setDTDHandler(self.getDTDHandler())
|
||||
|
||||
# XMLReader methods
|
||||
|
||||
def parse(self, source):
|
||||
"Parse an XML document from a URL or an InputSource."
|
||||
self._parser.parse(JyInputSourceWrapper(source))
|
||||
|
||||
def getFeature(self, name):
|
||||
return self._parser.getFeature(name)
|
||||
|
||||
def setFeature(self, name, state):
|
||||
self._parser.setFeature(name, state)
|
||||
|
||||
def getProperty(self, name):
|
||||
return self._parser.getProperty(name)
|
||||
|
||||
def setProperty(self, name, value):
|
||||
self._parser.setProperty(name, value)
|
||||
|
||||
def setEntityResolver(self, resolver):
|
||||
self._parser.entityResolver = JyEntityResolverWrapper(resolver)
|
||||
xmlreader.XMLReader.setEntityResolver(self, resolver)
|
||||
|
||||
def setErrorHandler(self, err_handler):
|
||||
self._parser.errorHandler = JyErrorHandlerWrapper(err_handler)
|
||||
xmlreader.XMLReader.setErrorHandler(self, err_handler)
|
||||
|
||||
def setDTDHandler(self, dtd_handler):
|
||||
self._parser.setDTDHandler(JyDTDHandlerWrapper(dtd_handler))
|
||||
xmlreader.XMLReader.setDTDHandler(self, dtd_handler)
|
||||
|
||||
# ContentHandler methods
|
||||
def setDocumentLocator(self, locator):
|
||||
self._cont_handler.setDocumentLocator(locator)
|
||||
|
||||
def startDocument(self):
|
||||
self._cont_handler.startDocument()
|
||||
self._namespaces = self._parser.getFeature(feature_namespaces)
|
||||
|
||||
def startElement(self, uri, lname, qname, attrs):
|
||||
if self._namespaces:
|
||||
self._nsattrs._attrs = attrs
|
||||
self._cont_handler.startElementNS((uri or None, lname), qname,
|
||||
self._nsattrs)
|
||||
else:
|
||||
self._attrs._attrs = attrs
|
||||
self._cont_handler.startElement(qname, self._attrs)
|
||||
|
||||
def startPrefixMapping(self, prefix, uri):
|
||||
self._cont_handler.startPrefixMapping(prefix, uri)
|
||||
|
||||
def characters(self, char, start, len):
|
||||
self._cont_handler.characters(str(String(char, start, len)))
|
||||
|
||||
def ignorableWhitespace(self, char, start, len):
|
||||
self._cont_handler.ignorableWhitespace(str(String(char, start, len)))
|
||||
|
||||
def endElement(self, uri, lname, qname):
|
||||
if self._namespaces:
|
||||
self._cont_handler.endElementNS((uri or None, lname), qname)
|
||||
else:
|
||||
self._cont_handler.endElement(qname)
|
||||
|
||||
def endPrefixMapping(self, prefix):
|
||||
self._cont_handler.endPrefixMapping(prefix)
|
||||
|
||||
def endDocument(self):
|
||||
self._cont_handler.endDocument()
|
||||
|
||||
def processingInstruction(self, target, data):
|
||||
self._cont_handler.processingInstruction(target, data)
|
||||
|
||||
class AttributesImpl:
|
||||
def __init__(self, attrs = None):
|
||||
self._attrs = attrs
|
||||
|
||||
def getLength(self):
|
||||
return self._attrs.getLength()
|
||||
|
||||
def getType(self, name):
|
||||
return self._attrs.getType(name)
|
||||
|
||||
def getValue(self, name):
|
||||
value = self._attrs.getValue(name)
|
||||
if value == None:
|
||||
raise KeyError(name)
|
||||
return value
|
||||
|
||||
def getNames(self):
|
||||
return [self._attrs.getQName(index) for index in range(len(self))]
|
||||
|
||||
def getQNames(self):
|
||||
return [self._attrs.getQName(index) for index in range(len(self))]
|
||||
|
||||
def getValueByQName(self, qname):
|
||||
idx = self._attrs.getIndex(qname)
|
||||
if idx == -1:
|
||||
raise KeyError, qname
|
||||
return self._attrs.getValue(idx)
|
||||
|
||||
def getNameByQName(self, qname):
|
||||
idx = self._attrs.getIndex(qname)
|
||||
if idx == -1:
|
||||
raise KeyError, qname
|
||||
return qname
|
||||
|
||||
def getQNameByName(self, name):
|
||||
idx = self._attrs.getIndex(name)
|
||||
if idx == -1:
|
||||
raise KeyError, name
|
||||
return name
|
||||
|
||||
def __len__(self):
|
||||
return self._attrs.getLength()
|
||||
|
||||
def __getitem__(self, name):
|
||||
return self.getValue(name)
|
||||
|
||||
def keys(self):
|
||||
return self.getNames()
|
||||
|
||||
def copy(self):
|
||||
return self.__class__(self._attrs)
|
||||
|
||||
def items(self):
|
||||
return [(name, self[name]) for name in self.getNames()]
|
||||
|
||||
def values(self):
|
||||
return map(self.getValue, self.getNames())
|
||||
|
||||
def get(self, name, alt=None):
|
||||
try:
|
||||
return self.getValue(name)
|
||||
except KeyError:
|
||||
return alt
|
||||
|
||||
def has_key(self, name):
|
||||
try:
|
||||
self.getValue(name)
|
||||
return True
|
||||
except KeyError:
|
||||
return False
|
||||
|
||||
# --- AttributesNSImpl
|
||||
|
||||
class AttributesNSImpl(AttributesImpl):
|
||||
|
||||
def __init__(self, attrs=None):
|
||||
AttributesImpl.__init__(self, attrs)
|
||||
|
||||
def getType(self, name):
|
||||
return self._attrs.getType(name[0], name[1])
|
||||
|
||||
def getValue(self, name):
|
||||
value = self._attrs.getValue(name[0], name[1])
|
||||
if value == None:
|
||||
raise KeyError(name)
|
||||
return value
|
||||
|
||||
def getNames(self):
|
||||
names = []
|
||||
for idx in range(len(self)):
|
||||
names.append((self._attrs.getURI(idx),
|
||||
self._attrs.getLocalName(idx)))
|
||||
return names
|
||||
|
||||
def getNameByQName(self, qname):
|
||||
idx = self._attrs.getIndex(qname)
|
||||
if idx == -1:
|
||||
raise KeyError, qname
|
||||
return (self._attrs.getURI(idx), self._attrs.getLocalName(idx))
|
||||
|
||||
def getQNameByName(self, name):
|
||||
idx = self._attrs.getIndex(name[0], name[1])
|
||||
if idx == -1:
|
||||
raise KeyError, name
|
||||
return self._attrs.getQName(idx)
|
||||
|
||||
def getQNames(self):
|
||||
return [self._attrs.getQName(idx) for idx in range(len(self))]
|
||||
|
||||
# ---
|
||||
|
||||
def create_java_parser(jdriver = None):
|
||||
try:
|
||||
if jdriver:
|
||||
return XMLReaderFactory.createXMLReader(jdriver)
|
||||
elif jaxp:
|
||||
return factory.newSAXParser().getXMLReader()
|
||||
else:
|
||||
return XMLReaderFactory.createXMLReader()
|
||||
except ParserConfigurationException, e:
|
||||
raise _exceptions.SAXReaderNotAvailable(e.getMessage())
|
||||
except javasax.SAXException, e:
|
||||
raise _exceptions.SAXReaderNotAvailable(e.getMessage())
|
||||
|
||||
def create_parser(jdriver = None):
|
||||
return JavaSAXParser(jdriver)
|
345
extensions/jython/module/MOD-INF/lib/jython/xml/sax/handler.py
Normal file
345
extensions/jython/module/MOD-INF/lib/jython/xml/sax/handler.py
Normal file
@ -0,0 +1,345 @@
|
||||
"""
|
||||
This module contains the core classes of version 2.0 of SAX for Python.
|
||||
This file provides only default classes with absolutely minimum
|
||||
functionality, from which drivers and applications can be subclassed.
|
||||
|
||||
Many of these classes are empty and are included only as documentation
|
||||
of the interfaces.
|
||||
|
||||
$Id: handler.py,v 1.5 2002/02/14 08:09:36 loewis Exp $
|
||||
"""
|
||||
|
||||
version = '2.0beta'
|
||||
|
||||
#============================================================================
|
||||
#
|
||||
# HANDLER INTERFACES
|
||||
#
|
||||
#============================================================================
|
||||
|
||||
# ===== ERRORHANDLER =====
|
||||
|
||||
class ErrorHandler:
|
||||
"""Basic interface for SAX error handlers.
|
||||
|
||||
If you create an object that implements this interface, then
|
||||
register the object with your XMLReader, the parser will call the
|
||||
methods in your object to report all warnings and errors. There
|
||||
are three levels of errors available: warnings, (possibly)
|
||||
recoverable errors, and unrecoverable errors. All methods take a
|
||||
SAXParseException as the only parameter."""
|
||||
|
||||
def error(self, exception):
|
||||
"Handle a recoverable error."
|
||||
raise exception
|
||||
|
||||
def fatalError(self, exception):
|
||||
"Handle a non-recoverable error."
|
||||
raise exception
|
||||
|
||||
def warning(self, exception):
|
||||
"Handle a warning."
|
||||
print exception
|
||||
|
||||
|
||||
# ===== CONTENTHANDLER =====
|
||||
|
||||
class ContentHandler:
|
||||
"""Interface for receiving logical document content events.
|
||||
|
||||
This is the main callback interface in SAX, and the one most
|
||||
important to applications. The order of events in this interface
|
||||
mirrors the order of the information in the document."""
|
||||
|
||||
def __init__(self):
|
||||
self._locator = None
|
||||
|
||||
def setDocumentLocator(self, locator):
|
||||
"""Called by the parser to give the application a locator for
|
||||
locating the origin of document events.
|
||||
|
||||
SAX parsers are strongly encouraged (though not absolutely
|
||||
required) to supply a locator: if it does so, it must supply
|
||||
the locator to the application by invoking this method before
|
||||
invoking any of the other methods in the DocumentHandler
|
||||
interface.
|
||||
|
||||
The locator allows the application to determine the end
|
||||
position of any document-related event, even if the parser is
|
||||
not reporting an error. Typically, the application will use
|
||||
this information for reporting its own errors (such as
|
||||
character content that does not match an application's
|
||||
business rules). The information returned by the locator is
|
||||
probably not sufficient for use with a search engine.
|
||||
|
||||
Note that the locator will return correct information only
|
||||
during the invocation of the events in this interface. The
|
||||
application should not attempt to use it at any other time."""
|
||||
self._locator = locator
|
||||
|
||||
def startDocument(self):
|
||||
"""Receive notification of the beginning of a document.
|
||||
|
||||
The SAX parser will invoke this method only once, before any
|
||||
other methods in this interface or in DTDHandler (except for
|
||||
setDocumentLocator)."""
|
||||
|
||||
def endDocument(self):
|
||||
"""Receive notification of the end of a document.
|
||||
|
||||
The SAX parser will invoke this method only once, and it will
|
||||
be the last method invoked during the parse. The parser shall
|
||||
not invoke this method until it has either abandoned parsing
|
||||
(because of an unrecoverable error) or reached the end of
|
||||
input."""
|
||||
|
||||
def startPrefixMapping(self, prefix, uri):
|
||||
"""Begin the scope of a prefix-URI Namespace mapping.
|
||||
|
||||
The information from this event is not necessary for normal
|
||||
Namespace processing: the SAX XML reader will automatically
|
||||
replace prefixes for element and attribute names when the
|
||||
http://xml.org/sax/features/namespaces feature is true (the
|
||||
default).
|
||||
|
||||
There are cases, however, when applications need to use
|
||||
prefixes in character data or in attribute values, where they
|
||||
cannot safely be expanded automatically; the
|
||||
start/endPrefixMapping event supplies the information to the
|
||||
application to expand prefixes in those contexts itself, if
|
||||
necessary.
|
||||
|
||||
Note that start/endPrefixMapping events are not guaranteed to
|
||||
be properly nested relative to each-other: all
|
||||
startPrefixMapping events will occur before the corresponding
|
||||
startElement event, and all endPrefixMapping events will occur
|
||||
after the corresponding endElement event, but their order is
|
||||
not guaranteed."""
|
||||
|
||||
def endPrefixMapping(self, prefix):
|
||||
"""End the scope of a prefix-URI mapping.
|
||||
|
||||
See startPrefixMapping for details. This event will always
|
||||
occur after the corresponding endElement event, but the order
|
||||
of endPrefixMapping events is not otherwise guaranteed."""
|
||||
|
||||
def startElement(self, name, attrs):
|
||||
"""Signals the start of an element in non-namespace mode.
|
||||
|
||||
The name parameter contains the raw XML 1.0 name of the
|
||||
element type as a string and the attrs parameter holds an
|
||||
instance of the Attributes class containing the attributes of
|
||||
the element."""
|
||||
|
||||
def endElement(self, name):
|
||||
"""Signals the end of an element in non-namespace mode.
|
||||
|
||||
The name parameter contains the name of the element type, just
|
||||
as with the startElement event."""
|
||||
|
||||
def startElementNS(self, name, qname, attrs):
|
||||
"""Signals the start of an element in namespace mode.
|
||||
|
||||
The name parameter contains the name of the element type as a
|
||||
(uri, localname) tuple, the qname parameter the raw XML 1.0
|
||||
name used in the source document, and the attrs parameter
|
||||
holds an instance of the Attributes class containing the
|
||||
attributes of the element.
|
||||
|
||||
The uri part of the name tuple is None for elements which have
|
||||
no namespace."""
|
||||
|
||||
def endElementNS(self, name, qname):
|
||||
"""Signals the end of an element in namespace mode.
|
||||
|
||||
The name parameter contains the name of the element type, just
|
||||
as with the startElementNS event."""
|
||||
|
||||
def characters(self, content):
|
||||
"""Receive notification of character data.
|
||||
|
||||
The Parser will call this method to report each chunk of
|
||||
character data. SAX parsers may return all contiguous
|
||||
character data in a single chunk, or they may split it into
|
||||
several chunks; however, all of the characters in any single
|
||||
event must come from the same external entity so that the
|
||||
Locator provides useful information."""
|
||||
|
||||
def ignorableWhitespace(self, whitespace):
|
||||
"""Receive notification of ignorable whitespace in element content.
|
||||
|
||||
Validating Parsers must use this method to report each chunk
|
||||
of ignorable whitespace (see the W3C XML 1.0 recommendation,
|
||||
section 2.10): non-validating parsers may also use this method
|
||||
if they are capable of parsing and using content models.
|
||||
|
||||
SAX parsers may return all contiguous whitespace in a single
|
||||
chunk, or they may split it into several chunks; however, all
|
||||
of the characters in any single event must come from the same
|
||||
external entity, so that the Locator provides useful
|
||||
information.
|
||||
|
||||
The application must not attempt to read from the array
|
||||
outside of the specified range."""
|
||||
|
||||
def processingInstruction(self, target, data):
|
||||
"""Receive notification of a processing instruction.
|
||||
|
||||
The Parser will invoke this method once for each processing
|
||||
instruction found: note that processing instructions may occur
|
||||
before or after the main document element.
|
||||
|
||||
A SAX parser should never report an XML declaration (XML 1.0,
|
||||
section 2.8) or a text declaration (XML 1.0, section 4.3.1)
|
||||
using this method."""
|
||||
|
||||
def skippedEntity(self, name):
|
||||
"""Receive notification of a skipped entity.
|
||||
|
||||
The Parser will invoke this method once for each entity
|
||||
skipped. Non-validating processors may skip entities if they
|
||||
have not seen the declarations (because, for example, the
|
||||
entity was declared in an external DTD subset). All processors
|
||||
may skip external entities, depending on the values of the
|
||||
http://xml.org/sax/features/external-general-entities and the
|
||||
http://xml.org/sax/features/external-parameter-entities
|
||||
properties."""
|
||||
|
||||
|
||||
# ===== DTDHandler =====
|
||||
|
||||
class DTDHandler:
|
||||
"""Handle DTD events.
|
||||
|
||||
This interface specifies only those DTD events required for basic
|
||||
parsing (unparsed entities and attributes)."""
|
||||
|
||||
def notationDecl(self, name, publicId, systemId):
|
||||
"Handle a notation declaration event."
|
||||
|
||||
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
|
||||
"Handle an unparsed entity declaration event."
|
||||
|
||||
|
||||
# ===== ENTITYRESOLVER =====
|
||||
|
||||
class EntityResolver:
|
||||
"""Basic interface for resolving entities. If you create an object
|
||||
implementing this interface, then register the object with your
|
||||
Parser, the parser will call the method in your object to
|
||||
resolve all external entities. Note that DefaultHandler implements
|
||||
this interface with the default behaviour."""
|
||||
|
||||
def resolveEntity(self, publicId, systemId):
|
||||
"""Resolve the system identifier of an entity and return either
|
||||
the system identifier to read from as a string, or an InputSource
|
||||
to read from."""
|
||||
return systemId
|
||||
|
||||
|
||||
#============================================================================
|
||||
#
|
||||
# CORE FEATURES
|
||||
#
|
||||
#============================================================================
|
||||
|
||||
feature_namespaces = "http://xml.org/sax/features/namespaces"
|
||||
# true: Perform Namespace processing (default).
|
||||
# false: Optionally do not perform Namespace processing
|
||||
# (implies namespace-prefixes).
|
||||
# access: (parsing) read-only; (not parsing) read/write
|
||||
|
||||
feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes"
|
||||
# true: Report the original prefixed names and attributes used for Namespace
|
||||
# declarations.
|
||||
# false: Do not report attributes used for Namespace declarations, and
|
||||
# optionally do not report original prefixed names (default).
|
||||
# access: (parsing) read-only; (not parsing) read/write
|
||||
|
||||
feature_string_interning = "http://xml.org/sax/features/string-interning"
|
||||
# true: All element names, prefixes, attribute names, Namespace URIs, and
|
||||
# local names are interned using the built-in intern function.
|
||||
# false: Names are not necessarily interned, although they may be (default).
|
||||
# access: (parsing) read-only; (not parsing) read/write
|
||||
|
||||
feature_validation = "http://xml.org/sax/features/validation"
|
||||
# true: Report all validation errors (implies external-general-entities and
|
||||
# external-parameter-entities).
|
||||
# false: Do not report validation errors.
|
||||
# access: (parsing) read-only; (not parsing) read/write
|
||||
|
||||
feature_external_ges = "http://xml.org/sax/features/external-general-entities"
|
||||
# true: Include all external general (text) entities.
|
||||
# false: Do not include external general entities.
|
||||
# access: (parsing) read-only; (not parsing) read/write
|
||||
|
||||
feature_external_pes = "http://xml.org/sax/features/external-parameter-entities"
|
||||
# true: Include all external parameter entities, including the external
|
||||
# DTD subset.
|
||||
# false: Do not include any external parameter entities, even the external
|
||||
# DTD subset.
|
||||
# access: (parsing) read-only; (not parsing) read/write
|
||||
|
||||
all_features = [feature_namespaces,
|
||||
feature_namespace_prefixes,
|
||||
feature_string_interning,
|
||||
feature_validation,
|
||||
feature_external_ges,
|
||||
feature_external_pes]
|
||||
|
||||
|
||||
#============================================================================
|
||||
#
|
||||
# CORE PROPERTIES
|
||||
#
|
||||
#============================================================================
|
||||
|
||||
property_lexical_handler = "http://xml.org/sax/properties/lexical-handler"
|
||||
# data type: xml.sax.sax2lib.LexicalHandler
|
||||
# description: An optional extension handler for lexical events like comments.
|
||||
# access: read/write
|
||||
|
||||
property_declaration_handler = "http://xml.org/sax/properties/declaration-handler"
|
||||
# data type: xml.sax.sax2lib.DeclHandler
|
||||
# description: An optional extension handler for DTD-related events other
|
||||
# than notations and unparsed entities.
|
||||
# access: read/write
|
||||
|
||||
property_dom_node = "http://xml.org/sax/properties/dom-node"
|
||||
# data type: org.w3c.dom.Node
|
||||
# description: When parsing, the current DOM node being visited if this is
|
||||
# a DOM iterator; when not parsing, the root DOM node for
|
||||
# iteration.
|
||||
# access: (parsing) read-only; (not parsing) read/write
|
||||
|
||||
property_xml_string = "http://xml.org/sax/properties/xml-string"
|
||||
# data type: String
|
||||
# description: The literal string of characters that was the source for
|
||||
# the current event.
|
||||
# access: read-only
|
||||
|
||||
property_encoding = "http://www.python.org/sax/properties/encoding"
|
||||
# data type: String
|
||||
# description: The name of the encoding to assume for input data.
|
||||
# access: write: set the encoding, e.g. established by a higher-level
|
||||
# protocol. May change during parsing (e.g. after
|
||||
# processing a META tag)
|
||||
# read: return the current encoding (possibly established through
|
||||
# auto-detection.
|
||||
# initial value: UTF-8
|
||||
#
|
||||
|
||||
property_interning_dict = "http://www.python.org/sax/properties/interning-dict"
|
||||
# data type: Dictionary
|
||||
# description: The dictionary used to intern common strings in the document
|
||||
# access: write: Request that the parser uses a specific dictionary, to
|
||||
# allow interning across different documents
|
||||
# read: return the current interning dictionary, or None
|
||||
#
|
||||
|
||||
all_properties = [property_lexical_handler,
|
||||
property_dom_node,
|
||||
property_declaration_handler,
|
||||
property_xml_string,
|
||||
property_encoding,
|
||||
property_interning_dict]
|
430
extensions/jython/module/MOD-INF/lib/jython/xml/sax/saxlib.py
Normal file
430
extensions/jython/module/MOD-INF/lib/jython/xml/sax/saxlib.py
Normal file
@ -0,0 +1,430 @@
|
||||
"""
|
||||
This module contains the core classes of version 2.0 of SAX for Python.
|
||||
This file provides only default classes with absolutely minimum
|
||||
functionality, from which drivers and applications can be subclassed.
|
||||
|
||||
Many of these classes are empty and are included only as documentation
|
||||
of the interfaces.
|
||||
|
||||
$Id: saxlib.py,v 1.12 2002/05/10 14:49:21 akuchling Exp $
|
||||
"""
|
||||
|
||||
version = '2.0beta'
|
||||
|
||||
# A number of interfaces used to live in saxlib, but are now in
|
||||
# various other modules for Python 2 compatibility. If nobody uses
|
||||
# them here any longer, the references can be removed
|
||||
|
||||
from handler import ErrorHandler, ContentHandler, DTDHandler, EntityResolver
|
||||
from xmlreader import XMLReader, InputSource, Locator, IncrementalParser
|
||||
from _exceptions import *
|
||||
|
||||
from handler import \
|
||||
feature_namespaces,\
|
||||
feature_namespace_prefixes,\
|
||||
feature_string_interning,\
|
||||
feature_validation,\
|
||||
feature_external_ges,\
|
||||
feature_external_pes,\
|
||||
all_features,\
|
||||
property_lexical_handler,\
|
||||
property_declaration_handler,\
|
||||
property_dom_node,\
|
||||
property_xml_string,\
|
||||
all_properties
|
||||
|
||||
#============================================================================
|
||||
#
|
||||
# MAIN INTERFACES
|
||||
#
|
||||
#============================================================================
|
||||
|
||||
# ===== XMLFILTER =====
|
||||
|
||||
class XMLFilter(XMLReader):
|
||||
"""Interface for a SAX2 parser filter.
|
||||
|
||||
A parser filter is an XMLReader that gets its events from another
|
||||
XMLReader (which may in turn also be a filter) rather than from a
|
||||
primary source like a document or other non-SAX data source.
|
||||
Filters can modify a stream of events before passing it on to its
|
||||
handlers."""
|
||||
|
||||
def __init__(self, parent = None):
|
||||
"""Creates a filter instance, allowing applications to set the
|
||||
parent on instantiation."""
|
||||
XMLReader.__init__(self)
|
||||
self._parent = parent
|
||||
|
||||
def setParent(self, parent):
|
||||
"""Sets the parent XMLReader of this filter. The argument may
|
||||
not be None."""
|
||||
self._parent = parent
|
||||
|
||||
def getParent(self):
|
||||
"Returns the parent of this filter."
|
||||
return self._parent
|
||||
|
||||
# ===== ATTRIBUTES =====
|
||||
|
||||
class Attributes:
|
||||
"""Interface for a list of XML attributes.
|
||||
|
||||
Contains a list of XML attributes, accessible by name."""
|
||||
|
||||
def getLength(self):
|
||||
"Returns the number of attributes in the list."
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
def getType(self, name):
|
||||
"Returns the type of the attribute with the given name."
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
def getValue(self, name):
|
||||
"Returns the value of the attribute with the given name."
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
def getValueByQName(self, name):
|
||||
"""Returns the value of the attribute with the given raw (or
|
||||
qualified) name."""
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
def getNameByQName(self, name):
|
||||
"""Returns the namespace name of the attribute with the given
|
||||
raw (or qualified) name."""
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
def getNames(self):
|
||||
"""Returns a list of the names of all attributes
|
||||
in the list."""
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
def getQNames(self):
|
||||
"""Returns a list of the raw qualified names of all attributes
|
||||
in the list."""
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
def __len__(self):
|
||||
"Alias for getLength."
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
def __getitem__(self, name):
|
||||
"Alias for getValue."
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
def keys(self):
|
||||
"Returns a list of the attribute names in the list."
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
def has_key(self, name):
|
||||
"True if the attribute is in the list, false otherwise."
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
def get(self, name, alternative=None):
|
||||
"""Return the value associated with attribute name; if it is not
|
||||
available, then return the alternative."""
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
def copy(self):
|
||||
"Return a copy of the Attributes object."
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
def items(self):
|
||||
"Return a list of (attribute_name, value) pairs."
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
def values(self):
|
||||
"Return a list of all attribute values."
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
|
||||
#============================================================================
|
||||
#
|
||||
# HANDLER INTERFACES
|
||||
#
|
||||
#============================================================================
|
||||
|
||||
|
||||
# ===== DECLHANDLER =====
|
||||
|
||||
class DeclHandler:
|
||||
"""Optional SAX2 handler for DTD declaration events.
|
||||
|
||||
Note that some DTD declarations are already reported through the
|
||||
DTDHandler interface. All events reported to this handler will
|
||||
occur between the startDTD and endDTD events of the
|
||||
LexicalHandler.
|
||||
|
||||
To set the DeclHandler for an XMLReader, use the setProperty method
|
||||
with the identifier http://xml.org/sax/handlers/DeclHandler."""
|
||||
|
||||
def attributeDecl(self, elem_name, attr_name, type, value_def, value):
|
||||
"""Report an attribute type declaration.
|
||||
|
||||
Only the first declaration will be reported. The type will be
|
||||
one of the strings "CDATA", "ID", "IDREF", "IDREFS",
|
||||
"NMTOKEN", "NMTOKENS", "ENTITY", "ENTITIES", or "NOTATION", or
|
||||
a list of names (in the case of enumerated definitions).
|
||||
|
||||
elem_name is the element type name, attr_name the attribute
|
||||
type name, type a string representing the attribute type,
|
||||
value_def a string representing the default declaration
|
||||
('#IMPLIED', '#REQUIRED', '#FIXED' or None). value is a string
|
||||
representing the attribute's default value, or None if there
|
||||
is none."""
|
||||
|
||||
def elementDecl(self, elem_name, content_model):
|
||||
"""Report an element type declaration.
|
||||
|
||||
Only the first declaration will be reported.
|
||||
|
||||
content_model is the string 'EMPTY', the string 'ANY' or the content
|
||||
model structure represented as tuple (separator, tokens, modifier)
|
||||
where separator is the separator in the token list (that is, '|' or
|
||||
','), tokens is the list of tokens (element type names or tuples
|
||||
representing parentheses) and modifier is the quantity modifier
|
||||
('*', '?' or '+')."""
|
||||
|
||||
def internalEntityDecl(self, name, value):
|
||||
"""Report an internal entity declaration.
|
||||
|
||||
Only the first declaration of an entity will be reported.
|
||||
|
||||
name is the name of the entity. If it is a parameter entity,
|
||||
the name will begin with '%'. value is the replacement text of
|
||||
the entity."""
|
||||
|
||||
def externalEntityDecl(self, name, public_id, system_id):
|
||||
"""Report a parsed entity declaration. (Unparsed entities are
|
||||
reported to the DTDHandler.)
|
||||
|
||||
Only the first declaration for each entity will be reported.
|
||||
|
||||
name is the name of the entity. If it is a parameter entity,
|
||||
the name will begin with '%'. public_id and system_id are the
|
||||
public and system identifiers of the entity. public_id will be
|
||||
None if none were declared."""
|
||||
|
||||
|
||||
|
||||
# ===== LEXICALHANDLER =====
|
||||
|
||||
class LexicalHandler:
|
||||
"""Optional SAX2 handler for lexical events.
|
||||
|
||||
This handler is used to obtain lexical information about an XML
|
||||
document, that is, information about how the document was encoded
|
||||
(as opposed to what it contains, which is reported to the
|
||||
ContentHandler), such as comments and CDATA marked section
|
||||
boundaries.
|
||||
|
||||
To set the LexicalHandler of an XMLReader, use the setProperty
|
||||
method with the property identifier
|
||||
'http://xml.org/sax/handlers/LexicalHandler'. There is no
|
||||
guarantee that the XMLReader will support or recognize this
|
||||
property."""
|
||||
|
||||
def comment(self, content):
|
||||
"""Reports a comment anywhere in the document (including the
|
||||
DTD and outside the document element).
|
||||
|
||||
content is a string that holds the contents of the comment."""
|
||||
|
||||
def startDTD(self, name, public_id, system_id):
|
||||
"""Report the start of the DTD declarations, if the document
|
||||
has an associated DTD.
|
||||
|
||||
A startEntity event will be reported before declaration events
|
||||
from the external DTD subset are reported, and this can be
|
||||
used to infer from which subset DTD declarations derive.
|
||||
|
||||
name is the name of the document element type, public_id the
|
||||
public identifier of the DTD (or None if none were supplied)
|
||||
and system_id the system identfier of the external subset (or
|
||||
None if none were supplied)."""
|
||||
|
||||
def endDTD(self):
|
||||
"Signals the end of DTD declarations."
|
||||
|
||||
def startEntity(self, name):
|
||||
"""Report the beginning of an entity.
|
||||
|
||||
The start and end of the document entity is not reported. The
|
||||
start and end of the external DTD subset is reported with the
|
||||
pseudo-name '[dtd]'.
|
||||
|
||||
Skipped entities will be reported through the skippedEntity
|
||||
event of the ContentHandler rather than through this event.
|
||||
|
||||
name is the name of the entity. If it is a parameter entity,
|
||||
the name will begin with '%'."""
|
||||
|
||||
def endEntity(self, name):
|
||||
"""Reports the end of an entity. name is the name of the
|
||||
entity, and follows the same conventions as for
|
||||
startEntity."""
|
||||
|
||||
def startCDATA(self):
|
||||
"""Reports the beginning of a CDATA marked section.
|
||||
|
||||
The contents of the CDATA marked section will be reported
|
||||
through the characters event."""
|
||||
|
||||
def endCDATA(self):
|
||||
"Reports the end of a CDATA marked section."
|
||||
|
||||
|
||||
#============================================================================
|
||||
#
|
||||
# SAX 1.0 COMPATIBILITY CLASSES
|
||||
# Note that these are all deprecated.
|
||||
#
|
||||
#============================================================================
|
||||
|
||||
# ===== ATTRIBUTELIST =====
|
||||
|
||||
class AttributeList:
|
||||
"""Interface for an attribute list. This interface provides
|
||||
information about a list of attributes for an element (only
|
||||
specified or defaulted attributes will be reported). Note that the
|
||||
information returned by this object will be valid only during the
|
||||
scope of the DocumentHandler.startElement callback, and the
|
||||
attributes will not necessarily be provided in the order declared
|
||||
or specified."""
|
||||
|
||||
def getLength(self):
|
||||
"Return the number of attributes in list."
|
||||
|
||||
def getName(self, i):
|
||||
"Return the name of an attribute in the list."
|
||||
|
||||
def getType(self, i):
|
||||
"""Return the type of an attribute in the list. (Parameter can be
|
||||
either integer index or attribute name.)"""
|
||||
|
||||
def getValue(self, i):
|
||||
"""Return the value of an attribute in the list. (Parameter can be
|
||||
either integer index or attribute name.)"""
|
||||
|
||||
def __len__(self):
|
||||
"Alias for getLength."
|
||||
|
||||
def __getitem__(self, key):
|
||||
"Alias for getName (if key is an integer) and getValue (if string)."
|
||||
|
||||
def keys(self):
|
||||
"Returns a list of the attribute names."
|
||||
|
||||
def has_key(self, key):
|
||||
"True if the attribute is in the list, false otherwise."
|
||||
|
||||
def get(self, key, alternative=None):
|
||||
"""Return the value associated with attribute name; if it is not
|
||||
available, then return the alternative."""
|
||||
|
||||
def copy(self):
|
||||
"Return a copy of the AttributeList."
|
||||
|
||||
def items(self):
|
||||
"Return a list of (attribute_name,value) pairs."
|
||||
|
||||
def values(self):
|
||||
"Return a list of all attribute values."
|
||||
|
||||
|
||||
# ===== DOCUMENTHANDLER =====
|
||||
|
||||
class DocumentHandler:
|
||||
"""Handle general document events. This is the main client
|
||||
interface for SAX: it contains callbacks for the most important
|
||||
document events, such as the start and end of elements. You need
|
||||
to create an object that implements this interface, and then
|
||||
register it with the Parser. If you do not want to implement
|
||||
the entire interface, you can derive a class from HandlerBase,
|
||||
which implements the default functionality. You can find the
|
||||
location of any document event using the Locator interface
|
||||
supplied by setDocumentLocator()."""
|
||||
|
||||
def characters(self, ch, start, length):
|
||||
"Handle a character data event."
|
||||
|
||||
def endDocument(self):
|
||||
"Handle an event for the end of a document."
|
||||
|
||||
def endElement(self, name):
|
||||
"Handle an event for the end of an element."
|
||||
|
||||
def ignorableWhitespace(self, ch, start, length):
|
||||
"Handle an event for ignorable whitespace in element content."
|
||||
|
||||
def processingInstruction(self, target, data):
|
||||
"Handle a processing instruction event."
|
||||
|
||||
def setDocumentLocator(self, locator):
|
||||
"Receive an object for locating the origin of SAX document events."
|
||||
|
||||
def startDocument(self):
|
||||
"Handle an event for the beginning of a document."
|
||||
|
||||
def startElement(self, name, atts):
|
||||
"Handle an event for the beginning of an element."
|
||||
|
||||
|
||||
# ===== HANDLERBASE =====
|
||||
|
||||
class HandlerBase(EntityResolver, DTDHandler, DocumentHandler,\
|
||||
ErrorHandler):
|
||||
"""Default base class for handlers. This class implements the
|
||||
default behaviour for four SAX interfaces: EntityResolver,
|
||||
DTDHandler, DocumentHandler, and ErrorHandler: rather
|
||||
than implementing those full interfaces, you may simply extend
|
||||
this class and override the methods that you need. Note that the
|
||||
use of this class is optional (you are free to implement the
|
||||
interfaces directly if you wish)."""
|
||||
|
||||
|
||||
# ===== PARSER =====
|
||||
|
||||
class Parser:
|
||||
"""Basic interface for SAX (Simple API for XML) parsers. All SAX
|
||||
parsers must implement this basic interface: it allows users to
|
||||
register handlers for different types of events and to initiate a
|
||||
parse from a URI, a character stream, or a byte stream. SAX
|
||||
parsers should also implement a zero-argument constructor."""
|
||||
|
||||
def __init__(self):
|
||||
self.doc_handler = DocumentHandler()
|
||||
self.dtd_handler = DTDHandler()
|
||||
self.ent_handler = EntityResolver()
|
||||
self.err_handler = ErrorHandler()
|
||||
|
||||
def parse(self, systemId):
|
||||
"Parse an XML document from a system identifier."
|
||||
|
||||
def parseFile(self, fileobj):
|
||||
"Parse an XML document from a file-like object."
|
||||
|
||||
def setDocumentHandler(self, handler):
|
||||
"Register an object to receive basic document-related events."
|
||||
self.doc_handler=handler
|
||||
|
||||
def setDTDHandler(self, handler):
|
||||
"Register an object to receive basic DTD-related events."
|
||||
self.dtd_handler=handler
|
||||
|
||||
def setEntityResolver(self, resolver):
|
||||
"Register an object to resolve external entities."
|
||||
self.ent_handler=resolver
|
||||
|
||||
def setErrorHandler(self, handler):
|
||||
"Register an object to receive error-message events."
|
||||
self.err_handler=handler
|
||||
|
||||
def setLocale(self, locale):
|
||||
"""Allow an application to set the locale for errors and warnings.
|
||||
|
||||
SAX parsers are not required to provide localisation for errors
|
||||
and warnings; if they cannot support the requested locale,
|
||||
however, they must throw a SAX exception. Applications may
|
||||
request a locale change in the middle of a parse."""
|
||||
raise SAXNotSupportedException("Locale support not implemented")
|
813
extensions/jython/module/MOD-INF/lib/jython/xml/sax/saxutils.py
Normal file
813
extensions/jython/module/MOD-INF/lib/jython/xml/sax/saxutils.py
Normal file
@ -0,0 +1,813 @@
|
||||
"""
|
||||
A library of useful helper classes to the saxlib classes, for the
|
||||
convenience of application and driver writers.
|
||||
|
||||
$Id: saxutils.py,v 1.37 2005/04/13 14:02:08 syt Exp $
|
||||
"""
|
||||
import os, urlparse, urllib2, types
|
||||
import handler
|
||||
import xmlreader
|
||||
import sys, _exceptions, saxlib
|
||||
|
||||
from xml.Uri import Absolutize, MakeUrllibSafe,IsAbsolute
|
||||
|
||||
try:
|
||||
_StringTypes = [types.StringType, types.UnicodeType]
|
||||
except AttributeError: # 1.5 compatibility:UnicodeType not defined
|
||||
_StringTypes = [types.StringType]
|
||||
|
||||
def __dict_replace(s, d):
|
||||
"""Replace substrings of a string using a dictionary."""
|
||||
for key, value in d.items():
|
||||
s = s.replace(key, value)
|
||||
return s
|
||||
|
||||
def escape(data, entities={}):
|
||||
"""Escape &, <, and > in a string of data.
|
||||
|
||||
You can escape other strings of data by passing a dictionary as
|
||||
the optional entities parameter. The keys and values must all be
|
||||
strings; each key will be replaced with its corresponding value.
|
||||
"""
|
||||
data = data.replace("&", "&")
|
||||
data = data.replace("<", "<")
|
||||
data = data.replace(">", ">")
|
||||
if entities:
|
||||
data = __dict_replace(data, entities)
|
||||
return data
|
||||
|
||||
def unescape(data, entities={}):
|
||||
"""Unescape &, <, and > in a string of data.
|
||||
|
||||
You can unescape other strings of data by passing a dictionary as
|
||||
the optional entities parameter. The keys and values must all be
|
||||
strings; each key will be replaced with its corresponding value.
|
||||
"""
|
||||
data = data.replace("<", "<")
|
||||
data = data.replace(">", ">")
|
||||
if entities:
|
||||
data = __dict_replace(data, entities)
|
||||
# must do ampersand last
|
||||
return data.replace("&", "&")
|
||||
|
||||
def quoteattr(data, entities={}):
|
||||
"""Escape and quote an attribute value.
|
||||
|
||||
Escape &, <, and > in a string of data, then quote it for use as
|
||||
an attribute value. The \" character will be escaped as well, if
|
||||
necessary.
|
||||
|
||||
You can escape other strings of data by passing a dictionary as
|
||||
the optional entities parameter. The keys and values must all be
|
||||
strings; each key will be replaced with its corresponding value.
|
||||
"""
|
||||
data = escape(data, entities)
|
||||
if '"' in data:
|
||||
if "'" in data:
|
||||
data = '"%s"' % data.replace('"', """)
|
||||
else:
|
||||
data = "'%s'" % data
|
||||
else:
|
||||
data = '"%s"' % data
|
||||
return data
|
||||
|
||||
# --- DefaultHandler
|
||||
|
||||
class DefaultHandler(handler.EntityResolver, handler.DTDHandler,
|
||||
handler.ContentHandler, handler.ErrorHandler):
|
||||
"""Default base class for SAX2 event handlers. Implements empty
|
||||
methods for all callback methods, which can be overridden by
|
||||
application implementors. Replaces the deprecated SAX1 HandlerBase
|
||||
class."""
|
||||
|
||||
# --- Location
|
||||
|
||||
class Location:
|
||||
"""Represents a location in an XML entity. Initialized by being passed
|
||||
a locator, from which it reads off the current location, which is then
|
||||
stored internally."""
|
||||
|
||||
def __init__(self, locator):
|
||||
self.__col = locator.getColumnNumber()
|
||||
self.__line = locator.getLineNumber()
|
||||
self.__pubid = locator.getPublicId()
|
||||
self.__sysid = locator.getSystemId()
|
||||
|
||||
def getColumnNumber(self):
|
||||
return self.__col
|
||||
|
||||
def getLineNumber(self):
|
||||
return self.__line
|
||||
|
||||
def getPublicId(self):
|
||||
return self.__pubid
|
||||
|
||||
def getSystemId(self):
|
||||
return self.__sysid
|
||||
|
||||
def __str__(self):
|
||||
if self.__line is None:
|
||||
line = "?"
|
||||
else:
|
||||
line = self.__line
|
||||
if self.__col is None:
|
||||
col = "?"
|
||||
else:
|
||||
col = self.__col
|
||||
return "%s:%s:%s" % (
|
||||
self.__sysid or self.__pubid or "<unknown>",
|
||||
line, col)
|
||||
|
||||
# --- ErrorPrinter
|
||||
|
||||
class ErrorPrinter:
|
||||
"A simple class that just prints error messages to standard out."
|
||||
|
||||
def __init__(self, level=0, outfile=sys.stderr):
|
||||
self._level = level
|
||||
self._outfile = outfile
|
||||
|
||||
def warning(self, exception):
|
||||
if self._level <= 0:
|
||||
self._outfile.write("WARNING in %s: %s\n" %
|
||||
(self.__getpos(exception),
|
||||
exception.getMessage()))
|
||||
|
||||
def error(self, exception):
|
||||
if self._level <= 1:
|
||||
self._outfile.write("ERROR in %s: %s\n" %
|
||||
(self.__getpos(exception),
|
||||
exception.getMessage()))
|
||||
|
||||
def fatalError(self, exception):
|
||||
if self._level <= 2:
|
||||
self._outfile.write("FATAL ERROR in %s: %s\n" %
|
||||
(self.__getpos(exception),
|
||||
exception.getMessage()))
|
||||
|
||||
def __getpos(self, exception):
|
||||
if isinstance(exception, _exceptions.SAXParseException):
|
||||
return "%s:%s:%s" % (exception.getSystemId(),
|
||||
exception.getLineNumber(),
|
||||
exception.getColumnNumber())
|
||||
else:
|
||||
return "<unknown>"
|
||||
|
||||
# --- ErrorRaiser
|
||||
|
||||
class ErrorRaiser:
|
||||
"A simple class that just raises the exceptions it is passed."
|
||||
|
||||
def __init__(self, level = 0):
|
||||
self._level = level
|
||||
|
||||
def error(self, exception):
|
||||
if self._level <= 1:
|
||||
raise exception
|
||||
|
||||
def fatalError(self, exception):
|
||||
if self._level <= 2:
|
||||
raise exception
|
||||
|
||||
def warning(self, exception):
|
||||
if self._level <= 0:
|
||||
raise exception
|
||||
|
||||
# --- AttributesImpl now lives in xmlreader
|
||||
from xmlreader import AttributesImpl
|
||||
|
||||
# --- XMLGenerator is the SAX2 ContentHandler for writing back XML
|
||||
import codecs
|
||||
|
||||
def _outputwrapper(stream,encoding):
|
||||
writerclass = codecs.lookup(encoding)[3]
|
||||
return writerclass(stream)
|
||||
|
||||
if hasattr(codecs, "register_error"):
|
||||
def writetext(stream, text, entities={}):
|
||||
stream.errors = "xmlcharrefreplace"
|
||||
stream.write(escape(text, entities))
|
||||
stream.errors = "strict"
|
||||
else:
|
||||
def writetext(stream, text, entities={}):
|
||||
text = escape(text, entities)
|
||||
try:
|
||||
stream.write(text)
|
||||
except UnicodeError:
|
||||
for c in text:
|
||||
try:
|
||||
stream.write(c)
|
||||
except UnicodeError:
|
||||
stream.write("&#%d;" % ord(c))
|
||||
|
||||
def writeattr(stream, text):
|
||||
countdouble = text.count('"')
|
||||
if countdouble:
|
||||
countsingle = text.count("'")
|
||||
if countdouble <= countsingle:
|
||||
entities = {'"': """}
|
||||
quote = '"'
|
||||
else:
|
||||
entities = {"'": "'"}
|
||||
quote = "'"
|
||||
else:
|
||||
entities = {}
|
||||
quote = '"'
|
||||
stream.write(quote)
|
||||
writetext(stream, text, entities)
|
||||
stream.write(quote)
|
||||
|
||||
|
||||
class XMLGenerator(handler.ContentHandler):
|
||||
GENERATED_PREFIX = "xml.sax.saxutils.prefix%s"
|
||||
|
||||
def __init__(self, out=None, encoding="iso-8859-1"):
|
||||
if out is None:
|
||||
import sys
|
||||
out = sys.stdout
|
||||
handler.ContentHandler.__init__(self)
|
||||
self._out = _outputwrapper(out,encoding)
|
||||
self._ns_contexts = [{}] # contains uri -> prefix dicts
|
||||
self._current_context = self._ns_contexts[-1]
|
||||
self._undeclared_ns_maps = []
|
||||
self._encoding = encoding
|
||||
self._generated_prefix_ctr = 0
|
||||
return
|
||||
|
||||
# ContentHandler methods
|
||||
|
||||
def startDocument(self):
|
||||
self._out.write('<?xml version="1.0" encoding="%s"?>\n' %
|
||||
self._encoding)
|
||||
|
||||
def startPrefixMapping(self, prefix, uri):
|
||||
self._ns_contexts.append(self._current_context.copy())
|
||||
self._current_context[uri] = prefix
|
||||
self._undeclared_ns_maps.append((prefix, uri))
|
||||
|
||||
def endPrefixMapping(self, prefix):
|
||||
self._current_context = self._ns_contexts[-1]
|
||||
del self._ns_contexts[-1]
|
||||
|
||||
def startElement(self, name, attrs):
|
||||
self._out.write('<' + name)
|
||||
for (name, value) in attrs.items():
|
||||
self._out.write(' %s=' % name)
|
||||
writeattr(self._out, value)
|
||||
self._out.write('>')
|
||||
|
||||
def endElement(self, name):
|
||||
self._out.write('</%s>' % name)
|
||||
|
||||
def startElementNS(self, name, qname, attrs):
|
||||
if name[0] is None:
|
||||
name = name[1]
|
||||
elif self._current_context[name[0]] is None:
|
||||
# default namespace
|
||||
name = name[1]
|
||||
else:
|
||||
name = self._current_context[name[0]] + ":" + name[1]
|
||||
self._out.write('<' + name)
|
||||
|
||||
for k,v in self._undeclared_ns_maps:
|
||||
if k is None:
|
||||
self._out.write(' xmlns="%s"' % (v or ''))
|
||||
else:
|
||||
self._out.write(' xmlns:%s="%s"' % (k,v))
|
||||
self._undeclared_ns_maps = []
|
||||
|
||||
for (name, value) in attrs.items():
|
||||
if name[0] is None:
|
||||
name = name[1]
|
||||
elif self._current_context[name[0]] is None:
|
||||
# default namespace
|
||||
#If an attribute has a nsuri but not a prefix, we must
|
||||
#create a prefix and add a nsdecl
|
||||
prefix = self.GENERATED_PREFIX % self._generated_prefix_ctr
|
||||
self._generated_prefix_ctr = self._generated_prefix_ctr + 1
|
||||
name = prefix + ':' + name[1]
|
||||
self._out.write(' xmlns:%s=%s' % (prefix, quoteattr(name[0])))
|
||||
self._current_context[name[0]] = prefix
|
||||
else:
|
||||
name = self._current_context[name[0]] + ":" + name[1]
|
||||
self._out.write(' %s=' % name)
|
||||
writeattr(self._out, value)
|
||||
self._out.write('>')
|
||||
|
||||
def endElementNS(self, name, qname):
|
||||
# XXX: if qname is not None, we better use it.
|
||||
# Python 2.0b2 requires us to use the recorded prefix for
|
||||
# name[0], though
|
||||
if name[0] is None:
|
||||
qname = name[1]
|
||||
elif self._current_context[name[0]] is None:
|
||||
qname = name[1]
|
||||
else:
|
||||
qname = self._current_context[name[0]] + ":" + name[1]
|
||||
self._out.write('</%s>' % qname)
|
||||
|
||||
def characters(self, content):
|
||||
writetext(self._out, content)
|
||||
|
||||
def ignorableWhitespace(self, content):
|
||||
self._out.write(content)
|
||||
|
||||
def processingInstruction(self, target, data):
|
||||
self._out.write('<?%s %s?>' % (target, data))
|
||||
|
||||
|
||||
class LexicalXMLGenerator(XMLGenerator, saxlib.LexicalHandler):
|
||||
"""A XMLGenerator that also supports the LexicalHandler interface"""
|
||||
|
||||
def __init__(self, out=None, encoding="iso-8859-1"):
|
||||
XMLGenerator.__init__(self, out, encoding)
|
||||
self._in_cdata = 0
|
||||
|
||||
def characters(self, content):
|
||||
if self._in_cdata:
|
||||
self._out.write(content.replace(']]>', ']]>]]><![CDATA['))
|
||||
else:
|
||||
self._out.write(escape(content))
|
||||
|
||||
# LexicalHandler methods
|
||||
# (we only support the most important ones and inherit the rest)
|
||||
|
||||
def startDTD(self, name, public_id, system_id):
|
||||
self._out.write('<!DOCTYPE %s' % name)
|
||||
if public_id:
|
||||
self._out.write(' PUBLIC %s %s' % (
|
||||
quoteattr(public_id or ""), quoteattr(system_id or "")
|
||||
))
|
||||
elif system_id:
|
||||
self._out.write(' SYSTEM %s' % quoteattr(system_id or ""))
|
||||
|
||||
def endDTD(self):
|
||||
self._out.write('>')
|
||||
|
||||
def comment(self, content):
|
||||
self._out.write('<!--')
|
||||
self._out.write(content)
|
||||
self._out.write('-->')
|
||||
|
||||
def startCDATA(self):
|
||||
self._in_cdata = 1
|
||||
self._out.write('<![CDATA[')
|
||||
|
||||
def endCDATA(self):
|
||||
self._in_cdata = 0
|
||||
self._out.write(']]>')
|
||||
|
||||
|
||||
# --- ContentGenerator is the SAX1 DocumentHandler for writing back XML
|
||||
class ContentGenerator(XMLGenerator):
|
||||
|
||||
def characters(self, str, start, end):
|
||||
# In SAX1, characters receives start and end; in SAX2, it receives
|
||||
# a string. For plain strings, we may want to use a buffer object.
|
||||
return XMLGenerator.characters(self, str[start:start+end])
|
||||
|
||||
# --- XMLFilterImpl
|
||||
class XMLFilterBase(saxlib.XMLFilter):
|
||||
"""This class is designed to sit between an XMLReader and the
|
||||
client application's event handlers. By default, it does nothing
|
||||
but pass requests up to the reader and events on to the handlers
|
||||
unmodified, but subclasses can override specific methods to modify
|
||||
the event stream or the configuration requests as they pass
|
||||
through."""
|
||||
|
||||
# ErrorHandler methods
|
||||
|
||||
def error(self, exception):
|
||||
self._err_handler.error(exception)
|
||||
|
||||
def fatalError(self, exception):
|
||||
self._err_handler.fatalError(exception)
|
||||
|
||||
def warning(self, exception):
|
||||
self._err_handler.warning(exception)
|
||||
|
||||
# ContentHandler methods
|
||||
|
||||
def setDocumentLocator(self, locator):
|
||||
self._cont_handler.setDocumentLocator(locator)
|
||||
|
||||
def startDocument(self):
|
||||
self._cont_handler.startDocument()
|
||||
|
||||
def endDocument(self):
|
||||
self._cont_handler.endDocument()
|
||||
|
||||
def startPrefixMapping(self, prefix, uri):
|
||||
self._cont_handler.startPrefixMapping(prefix, uri)
|
||||
|
||||
def endPrefixMapping(self, prefix):
|
||||
self._cont_handler.endPrefixMapping(prefix)
|
||||
|
||||
def startElement(self, name, attrs):
|
||||
self._cont_handler.startElement(name, attrs)
|
||||
|
||||
def endElement(self, name):
|
||||
self._cont_handler.endElement(name)
|
||||
|
||||
def startElementNS(self, name, qname, attrs):
|
||||
self._cont_handler.startElementNS(name, qname, attrs)
|
||||
|
||||
def endElementNS(self, name, qname):
|
||||
self._cont_handler.endElementNS(name, qname)
|
||||
|
||||
def characters(self, content):
|
||||
self._cont_handler.characters(content)
|
||||
|
||||
def ignorableWhitespace(self, chars):
|
||||
self._cont_handler.ignorableWhitespace(chars)
|
||||
|
||||
def processingInstruction(self, target, data):
|
||||
self._cont_handler.processingInstruction(target, data)
|
||||
|
||||
def skippedEntity(self, name):
|
||||
self._cont_handler.skippedEntity(name)
|
||||
|
||||
# DTDHandler methods
|
||||
|
||||
def notationDecl(self, name, publicId, systemId):
|
||||
self._dtd_handler.notationDecl(name, publicId, systemId)
|
||||
|
||||
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
|
||||
self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
|
||||
|
||||
# EntityResolver methods
|
||||
|
||||
def resolveEntity(self, publicId, systemId):
|
||||
return self._ent_handler.resolveEntity(publicId, systemId)
|
||||
|
||||
# XMLReader methods
|
||||
|
||||
def parse(self, source):
|
||||
self._parent.setContentHandler(self)
|
||||
self._parent.setErrorHandler(self)
|
||||
self._parent.setEntityResolver(self)
|
||||
self._parent.setDTDHandler(self)
|
||||
self._parent.parse(source)
|
||||
|
||||
def setLocale(self, locale):
|
||||
self._parent.setLocale(locale)
|
||||
|
||||
def getFeature(self, name):
|
||||
return self._parent.getFeature(name)
|
||||
|
||||
def setFeature(self, name, state):
|
||||
self._parent.setFeature(name, state)
|
||||
|
||||
def getProperty(self, name):
|
||||
return self._parent.getProperty(name)
|
||||
|
||||
def setProperty(self, name, value):
|
||||
self._parent.setProperty(name, value)
|
||||
|
||||
# FIXME: remove this backward compatibility hack when not needed anymore
|
||||
XMLFilterImpl = XMLFilterBase
|
||||
|
||||
# --- BaseIncrementalParser
|
||||
|
||||
class BaseIncrementalParser(xmlreader.IncrementalParser):
|
||||
"""This class implements the parse method of the XMLReader
|
||||
interface using the feed, close and reset methods of the
|
||||
IncrementalParser interface as a convenience to SAX 2.0 driver
|
||||
writers."""
|
||||
|
||||
def parse(self, source):
|
||||
source = prepare_input_source(source)
|
||||
self.prepareParser(source)
|
||||
|
||||
self._cont_handler.startDocument()
|
||||
|
||||
# FIXME: what about char-stream?
|
||||
inf = source.getByteStream()
|
||||
buffer = inf.read(16384)
|
||||
while buffer != "":
|
||||
self.feed(buffer)
|
||||
buffer = inf.read(16384)
|
||||
|
||||
self.close()
|
||||
self.reset()
|
||||
|
||||
self._cont_handler.endDocument()
|
||||
|
||||
def prepareParser(self, source):
|
||||
"""This method is called by the parse implementation to allow
|
||||
the SAX 2.0 driver to prepare itself for parsing."""
|
||||
raise NotImplementedError("prepareParser must be overridden!")
|
||||
|
||||
# --- Utility functions
|
||||
|
||||
def prepare_input_source(source, base = ""):
|
||||
"""This function takes an InputSource and an optional base URL and
|
||||
returns a fully resolved InputSource object ready for reading."""
|
||||
|
||||
if type(source) in _StringTypes:
|
||||
source = xmlreader.InputSource(source)
|
||||
elif hasattr(source, "read"):
|
||||
f = source
|
||||
source = xmlreader.InputSource()
|
||||
source.setByteStream(f)
|
||||
if hasattr(f, "name"):
|
||||
source.setSystemId(absolute_system_id(f.name, base))
|
||||
|
||||
if source.getByteStream() is None:
|
||||
sysid = absolute_system_id(source.getSystemId(), base)
|
||||
source.setSystemId(sysid)
|
||||
f = urllib2.urlopen(sysid)
|
||||
source.setByteStream(f)
|
||||
|
||||
return source
|
||||
|
||||
|
||||
def absolute_system_id(sysid, base=''):
|
||||
if os.path.exists(sysid):
|
||||
sysid = 'file:%s' % os.path.abspath(sysid)
|
||||
elif base:
|
||||
sysid = Absolutize(sysid, base)
|
||||
assert IsAbsolute(sysid)
|
||||
return MakeUrllibSafe(sysid)
|
||||
|
||||
# ===========================================================================
|
||||
#
|
||||
# DEPRECATED SAX 1.0 CLASSES
|
||||
#
|
||||
# ===========================================================================
|
||||
|
||||
# --- AttributeMap
|
||||
|
||||
class AttributeMap:
|
||||
"""An implementation of AttributeList that takes an (attr,val) hash
|
||||
and uses it to implement the AttributeList interface."""
|
||||
|
||||
def __init__(self, map):
|
||||
self.map=map
|
||||
|
||||
def getLength(self):
|
||||
return len(self.map.keys())
|
||||
|
||||
def getName(self, i):
|
||||
try:
|
||||
return self.map.keys()[i]
|
||||
except IndexError,e:
|
||||
return None
|
||||
|
||||
def getType(self, i):
|
||||
return "CDATA"
|
||||
|
||||
def getValue(self, i):
|
||||
try:
|
||||
if type(i)==types.IntType:
|
||||
return self.map[self.getName(i)]
|
||||
else:
|
||||
return self.map[i]
|
||||
except KeyError,e:
|
||||
return None
|
||||
|
||||
def __len__(self):
|
||||
return len(self.map)
|
||||
|
||||
def __getitem__(self, key):
|
||||
if type(key)==types.IntType:
|
||||
return self.map.keys()[key]
|
||||
else:
|
||||
return self.map[key]
|
||||
|
||||
def items(self):
|
||||
return self.map.items()
|
||||
|
||||
def keys(self):
|
||||
return self.map.keys()
|
||||
|
||||
def has_key(self,key):
|
||||
return self.map.has_key(key)
|
||||
|
||||
def get(self, key, alternative=None):
|
||||
return self.map.get(key, alternative)
|
||||
|
||||
def copy(self):
|
||||
return AttributeMap(self.map.copy())
|
||||
|
||||
def values(self):
|
||||
return self.map.values()
|
||||
|
||||
# --- Event broadcasting object
|
||||
|
||||
class EventBroadcaster:
|
||||
"""Takes a list of objects and forwards any method calls received
|
||||
to all objects in the list. The attribute list holds the list and
|
||||
can freely be modified by clients."""
|
||||
|
||||
class Event:
|
||||
"Helper objects that represent event methods."
|
||||
|
||||
def __init__(self,list,name):
|
||||
self.list=list
|
||||
self.name=name
|
||||
|
||||
def __call__(self,*rest):
|
||||
for obj in self.list:
|
||||
apply(getattr(obj,self.name), rest)
|
||||
|
||||
def __init__(self,list):
|
||||
self.list=list
|
||||
|
||||
def __getattr__(self,name):
|
||||
return self.Event(self.list,name)
|
||||
|
||||
def __repr__(self):
|
||||
return "<EventBroadcaster instance at %d>" % id(self)
|
||||
|
||||
# --- ESIS document handler
|
||||
import saxlib
|
||||
class ESISDocHandler(saxlib.HandlerBase):
|
||||
"A SAX document handler that produces naive ESIS output."
|
||||
|
||||
def __init__(self,writer=sys.stdout):
|
||||
self.writer=writer
|
||||
|
||||
def processingInstruction (self,target, remainder):
|
||||
"""Receive an event signalling that a processing instruction
|
||||
has been found."""
|
||||
self.writer.write("?"+target+" "+remainder+"\n")
|
||||
|
||||
def startElement(self,name,amap):
|
||||
"Receive an event signalling the start of an element."
|
||||
self.writer.write("("+name+"\n")
|
||||
for a_name in amap.keys():
|
||||
self.writer.write("A"+a_name+" "+amap[a_name]+"\n")
|
||||
|
||||
def endElement(self,name):
|
||||
"Receive an event signalling the end of an element."
|
||||
self.writer.write(")"+name+"\n")
|
||||
|
||||
def characters(self,data,start_ix,length):
|
||||
"Receive an event signalling that character data has been found."
|
||||
self.writer.write("-"+data[start_ix:start_ix+length]+"\n")
|
||||
|
||||
# --- XML canonizer
|
||||
|
||||
class Canonizer(saxlib.HandlerBase):
|
||||
"A SAX document handler that produces canonized XML output."
|
||||
|
||||
def __init__(self,writer=sys.stdout):
|
||||
self.elem_level=0
|
||||
self.writer=writer
|
||||
|
||||
def processingInstruction (self,target, remainder):
|
||||
if not target=="xml":
|
||||
self.writer.write("<?"+target+" "+remainder+"?>")
|
||||
|
||||
def startElement(self,name,amap):
|
||||
self.writer.write("<"+name)
|
||||
|
||||
a_names=amap.keys()
|
||||
a_names.sort()
|
||||
|
||||
for a_name in a_names:
|
||||
self.writer.write(" "+a_name+"=\"")
|
||||
self.write_data(amap[a_name])
|
||||
self.writer.write("\"")
|
||||
self.writer.write(">")
|
||||
self.elem_level=self.elem_level+1
|
||||
|
||||
def endElement(self,name):
|
||||
self.writer.write("</"+name+">")
|
||||
self.elem_level=self.elem_level-1
|
||||
|
||||
def ignorableWhitespace(self,data,start_ix,length):
|
||||
self.characters(data,start_ix,length)
|
||||
|
||||
def characters(self,data,start_ix,length):
|
||||
if self.elem_level>0:
|
||||
self.write_data(data[start_ix:start_ix+length])
|
||||
|
||||
def write_data(self,data):
|
||||
"Writes datachars to writer."
|
||||
data=data.replace("&","&")
|
||||
data=data.replace("<","<")
|
||||
data=data.replace("\"",""")
|
||||
data=data.replace(">",">")
|
||||
data=data.replace(chr(9),"	")
|
||||
data=data.replace(chr(10)," ")
|
||||
data=data.replace(chr(13)," ")
|
||||
self.writer.write(data)
|
||||
|
||||
# --- mllib
|
||||
|
||||
class mllib:
|
||||
"""A re-implementation of the htmllib, sgmllib and xmllib interfaces as a
|
||||
SAX DocumentHandler."""
|
||||
|
||||
# Unsupported:
|
||||
# - setnomoretags
|
||||
# - setliteral
|
||||
# - translate_references
|
||||
# - handle_xml
|
||||
# - handle_doctype
|
||||
# - handle_charref
|
||||
# - handle_entityref
|
||||
# - handle_comment
|
||||
# - handle_cdata
|
||||
# - tag_attributes
|
||||
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
import saxexts # only used here
|
||||
self.parser=saxexts.XMLParserFactory.make_parser()
|
||||
self.handler=mllib.Handler(self.parser,self)
|
||||
self.handler.reset()
|
||||
|
||||
def feed(self,data):
|
||||
self.parser.feed(data)
|
||||
|
||||
def close(self):
|
||||
self.parser.close()
|
||||
|
||||
def get_stack(self):
|
||||
return self.handler.get_stack()
|
||||
|
||||
# --- Handler methods (to be overridden)
|
||||
|
||||
def handle_starttag(self,name,method,atts):
|
||||
method(atts)
|
||||
|
||||
def handle_endtag(self,name,method):
|
||||
method()
|
||||
|
||||
def handle_data(self,data):
|
||||
pass
|
||||
|
||||
def handle_proc(self,target,data):
|
||||
pass
|
||||
|
||||
def unknown_starttag(self,name,atts):
|
||||
pass
|
||||
|
||||
def unknown_endtag(self,name):
|
||||
pass
|
||||
|
||||
def syntax_error(self,message):
|
||||
pass
|
||||
|
||||
# --- The internal handler class
|
||||
|
||||
class Handler(saxlib.DocumentHandler,saxlib.ErrorHandler):
|
||||
"""An internal class to handle SAX events and translate them to mllib
|
||||
events."""
|
||||
|
||||
def __init__(self,driver,handler):
|
||||
self.driver=driver
|
||||
self.driver.setDocumentHandler(self)
|
||||
self.driver.setErrorHandler(self)
|
||||
self.handler=handler
|
||||
self.reset()
|
||||
|
||||
def get_stack(self):
|
||||
return self.stack
|
||||
|
||||
def reset(self):
|
||||
self.stack=[]
|
||||
|
||||
# --- DocumentHandler methods
|
||||
|
||||
def characters(self, ch, start, length):
|
||||
self.handler.handle_data(ch[start:start+length])
|
||||
|
||||
def endElement(self, name):
|
||||
if hasattr(self.handler,"end_"+name):
|
||||
self.handler.handle_endtag(name,
|
||||
getattr(self.handler,"end_"+name))
|
||||
else:
|
||||
self.handler.unknown_endtag(name)
|
||||
|
||||
del self.stack[-1]
|
||||
|
||||
def ignorableWhitespace(self, ch, start, length):
|
||||
self.handler.handle_data(ch[start:start+length])
|
||||
|
||||
def processingInstruction(self, target, data):
|
||||
self.handler.handle_proc(target,data)
|
||||
|
||||
def startElement(self, name, atts):
|
||||
self.stack.append(name)
|
||||
|
||||
if hasattr(self.handler,"start_"+name):
|
||||
self.handler.handle_starttag(name,
|
||||
getattr(self.handler,
|
||||
"start_"+name),
|
||||
atts)
|
||||
else:
|
||||
self.handler.unknown_starttag(name,atts)
|
||||
|
||||
# --- ErrorHandler methods
|
||||
|
||||
def error(self, exception):
|
||||
self.handler.syntax_error(str(exception))
|
||||
|
||||
def fatalError(self, exception):
|
||||
raise RuntimeError(str(exception))
|
378
extensions/jython/module/MOD-INF/lib/jython/xml/sax/xmlreader.py
Normal file
378
extensions/jython/module/MOD-INF/lib/jython/xml/sax/xmlreader.py
Normal file
@ -0,0 +1,378 @@
|
||||
"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers
|
||||
should be based on this code. """
|
||||
|
||||
import handler
|
||||
|
||||
from _exceptions import SAXNotSupportedException, SAXNotRecognizedException
|
||||
|
||||
|
||||
# ===== XMLREADER =====
|
||||
|
||||
class XMLReader:
|
||||
"""Interface for reading an XML document using callbacks.
|
||||
|
||||
XMLReader is the interface that an XML parser's SAX2 driver must
|
||||
implement. This interface allows an application to set and query
|
||||
features and properties in the parser, to register event handlers
|
||||
for document processing, and to initiate a document parse.
|
||||
|
||||
All SAX interfaces are assumed to be synchronous: the parse
|
||||
methods must not return until parsing is complete, and readers
|
||||
must wait for an event-handler callback to return before reporting
|
||||
the next event."""
|
||||
|
||||
def __init__(self):
|
||||
self._cont_handler = handler.ContentHandler()
|
||||
self._dtd_handler = handler.DTDHandler()
|
||||
self._ent_handler = handler.EntityResolver()
|
||||
self._err_handler = handler.ErrorHandler()
|
||||
|
||||
def parse(self, source):
|
||||
"Parse an XML document from a system identifier or an InputSource."
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
def getContentHandler(self):
|
||||
"Returns the current ContentHandler."
|
||||
return self._cont_handler
|
||||
|
||||
def setContentHandler(self, handler):
|
||||
"Registers a new object to receive document content events."
|
||||
self._cont_handler = handler
|
||||
|
||||
def getDTDHandler(self):
|
||||
"Returns the current DTD handler."
|
||||
return self._dtd_handler
|
||||
|
||||
def setDTDHandler(self, handler):
|
||||
"Register an object to receive basic DTD-related events."
|
||||
self._dtd_handler = handler
|
||||
|
||||
def getEntityResolver(self):
|
||||
"Returns the current EntityResolver."
|
||||
return self._ent_handler
|
||||
|
||||
def setEntityResolver(self, resolver):
|
||||
"Register an object to resolve external entities."
|
||||
self._ent_handler = resolver
|
||||
|
||||
def getErrorHandler(self):
|
||||
"Returns the current ErrorHandler."
|
||||
return self._err_handler
|
||||
|
||||
def setErrorHandler(self, handler):
|
||||
"Register an object to receive error-message events."
|
||||
self._err_handler = handler
|
||||
|
||||
def setLocale(self, locale):
|
||||
"""Allow an application to set the locale for errors and warnings.
|
||||
|
||||
SAX parsers are not required to provide localization for errors
|
||||
and warnings; if they cannot support the requested locale,
|
||||
however, they must throw a SAX exception. Applications may
|
||||
request a locale change in the middle of a parse."""
|
||||
raise SAXNotSupportedException("Locale support not implemented")
|
||||
|
||||
def getFeature(self, name):
|
||||
"Looks up and returns the state of a SAX2 feature."
|
||||
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
|
||||
|
||||
def setFeature(self, name, state):
|
||||
"Sets the state of a SAX2 feature."
|
||||
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
|
||||
|
||||
def getProperty(self, name):
|
||||
"Looks up and returns the value of a SAX2 property."
|
||||
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
|
||||
|
||||
def setProperty(self, name, value):
|
||||
"Sets the value of a SAX2 property."
|
||||
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
|
||||
|
||||
class IncrementalParser(XMLReader):
|
||||
"""This interface adds three extra methods to the XMLReader
|
||||
interface that allow XML parsers to support incremental
|
||||
parsing. Support for this interface is optional, since not all
|
||||
underlying XML parsers support this functionality.
|
||||
|
||||
When the parser is instantiated it is ready to begin accepting
|
||||
data from the feed method immediately. After parsing has been
|
||||
finished with a call to close the reset method must be called to
|
||||
make the parser ready to accept new data, either from feed or
|
||||
using the parse method.
|
||||
|
||||
Note that these methods must _not_ be called during parsing, that
|
||||
is, after parse has been called and before it returns.
|
||||
|
||||
By default, the class also implements the parse method of the XMLReader
|
||||
interface using the feed, close and reset methods of the
|
||||
IncrementalParser interface as a convenience to SAX 2.0 driver
|
||||
writers."""
|
||||
|
||||
def __init__(self, bufsize=2**16):
|
||||
self._bufsize = bufsize
|
||||
XMLReader.__init__(self)
|
||||
|
||||
def parse(self, source):
|
||||
import saxutils
|
||||
source = saxutils.prepare_input_source(source)
|
||||
|
||||
self.prepareParser(source)
|
||||
file = source.getByteStream()
|
||||
buffer = file.read(self._bufsize)
|
||||
while buffer != "":
|
||||
self.feed(buffer)
|
||||
buffer = file.read(self._bufsize)
|
||||
self.close()
|
||||
|
||||
def feed(self, data):
|
||||
"""This method gives the raw XML data in the data parameter to
|
||||
the parser and makes it parse the data, emitting the
|
||||
corresponding events. It is allowed for XML constructs to be
|
||||
split across several calls to feed.
|
||||
|
||||
feed may raise SAXException."""
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
def prepareParser(self, source):
|
||||
"""This method is called by the parse implementation to allow
|
||||
the SAX 2.0 driver to prepare itself for parsing."""
|
||||
raise NotImplementedError("prepareParser must be overridden!")
|
||||
|
||||
def close(self):
|
||||
"""This method is called when the entire XML document has been
|
||||
passed to the parser through the feed method, to notify the
|
||||
parser that there are no more data. This allows the parser to
|
||||
do the final checks on the document and empty the internal
|
||||
data buffer.
|
||||
|
||||
The parser will not be ready to parse another document until
|
||||
the reset method has been called.
|
||||
|
||||
close may raise SAXException."""
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
def reset(self):
|
||||
"""This method is called after close has been called to reset
|
||||
the parser so that it is ready to parse new documents. The
|
||||
results of calling parse or feed after close without calling
|
||||
reset are undefined."""
|
||||
raise NotImplementedError("This method must be implemented!")
|
||||
|
||||
# ===== LOCATOR =====
|
||||
|
||||
class Locator:
|
||||
"""Interface for associating a SAX event with a document
|
||||
location. A locator object will return valid results only during
|
||||
calls to DocumentHandler methods; at any other time, the
|
||||
results are unpredictable."""
|
||||
|
||||
def getColumnNumber(self):
|
||||
"Return the column number where the current event ends."
|
||||
return -1
|
||||
|
||||
def getLineNumber(self):
|
||||
"Return the line number where the current event ends."
|
||||
return -1
|
||||
|
||||
def getPublicId(self):
|
||||
"Return the public identifier for the current event."
|
||||
return None
|
||||
|
||||
def getSystemId(self):
|
||||
"Return the system identifier for the current event."
|
||||
return None
|
||||
|
||||
# ===== INPUTSOURCE =====
|
||||
|
||||
class InputSource:
|
||||
"""Encapsulation of the information needed by the XMLReader to
|
||||
read entities.
|
||||
|
||||
This class may include information about the public identifier,
|
||||
system identifier, byte stream (possibly with character encoding
|
||||
information) and/or the character stream of an entity.
|
||||
|
||||
Applications will create objects of this class for use in the
|
||||
XMLReader.parse method and for returning from
|
||||
EntityResolver.resolveEntity.
|
||||
|
||||
An InputSource belongs to the application, the XMLReader is not
|
||||
allowed to modify InputSource objects passed to it from the
|
||||
application, although it may make copies and modify those."""
|
||||
|
||||
def __init__(self, system_id = None):
|
||||
self.__system_id = system_id
|
||||
self.__public_id = None
|
||||
self.__encoding = None
|
||||
self.__bytefile = None
|
||||
self.__charfile = None
|
||||
|
||||
def setPublicId(self, public_id):
|
||||
"Sets the public identifier of this InputSource."
|
||||
self.__public_id = public_id
|
||||
|
||||
def getPublicId(self):
|
||||
"Returns the public identifier of this InputSource."
|
||||
return self.__public_id
|
||||
|
||||
def setSystemId(self, system_id):
|
||||
"Sets the system identifier of this InputSource."
|
||||
self.__system_id = system_id
|
||||
|
||||
def getSystemId(self):
|
||||
"Returns the system identifier of this InputSource."
|
||||
return self.__system_id
|
||||
|
||||
def setEncoding(self, encoding):
|
||||
"""Sets the character encoding of this InputSource.
|
||||
|
||||
The encoding must be a string acceptable for an XML encoding
|
||||
declaration (see section 4.3.3 of the XML recommendation).
|
||||
|
||||
The encoding attribute of the InputSource is ignored if the
|
||||
InputSource also contains a character stream."""
|
||||
self.__encoding = encoding
|
||||
|
||||
def getEncoding(self):
|
||||
"Get the character encoding of this InputSource."
|
||||
return self.__encoding
|
||||
|
||||
def setByteStream(self, bytefile):
|
||||
"""Set the byte stream (a Python file-like object which does
|
||||
not perform byte-to-character conversion) for this input
|
||||
source.
|
||||
|
||||
The SAX parser will ignore this if there is also a character
|
||||
stream specified, but it will use a byte stream in preference
|
||||
to opening a URI connection itself.
|
||||
|
||||
If the application knows the character encoding of the byte
|
||||
stream, it should set it with the setEncoding method."""
|
||||
self.__bytefile = bytefile
|
||||
|
||||
def getByteStream(self):
|
||||
"""Get the byte stream for this input source.
|
||||
|
||||
The getEncoding method will return the character encoding for
|
||||
this byte stream, or None if unknown."""
|
||||
return self.__bytefile
|
||||
|
||||
def setCharacterStream(self, charfile):
|
||||
"""Set the character stream for this input source. (The stream
|
||||
must be a Python 2.0 Unicode-wrapped file-like that performs
|
||||
conversion to Unicode strings.)
|
||||
|
||||
If there is a character stream specified, the SAX parser will
|
||||
ignore any byte stream and will not attempt to open a URI
|
||||
connection to the system identifier."""
|
||||
self.__charfile = charfile
|
||||
|
||||
def getCharacterStream(self):
|
||||
"Get the character stream for this input source."
|
||||
return self.__charfile
|
||||
|
||||
# ===== ATTRIBUTESIMPL =====
|
||||
|
||||
class AttributesImpl:
|
||||
|
||||
def __init__(self, attrs):
|
||||
"""Non-NS-aware implementation.
|
||||
|
||||
attrs should be of the form {name : value}."""
|
||||
self._attrs = attrs
|
||||
|
||||
def getLength(self):
|
||||
return len(self._attrs)
|
||||
|
||||
def getType(self, name):
|
||||
return "CDATA"
|
||||
|
||||
def getValue(self, name):
|
||||
return self._attrs[name]
|
||||
|
||||
def getValueByQName(self, name):
|
||||
return self._attrs[name]
|
||||
|
||||
def getNameByQName(self, name):
|
||||
if not self._attrs.has_key(name):
|
||||
raise KeyError, name
|
||||
return name
|
||||
|
||||
def getQNameByName(self, name):
|
||||
if not self._attrs.has_key(name):
|
||||
raise KeyError, name
|
||||
return name
|
||||
|
||||
def getNames(self):
|
||||
return self._attrs.keys()
|
||||
|
||||
def getQNames(self):
|
||||
return self._attrs.keys()
|
||||
|
||||
def __len__(self):
|
||||
return len(self._attrs)
|
||||
|
||||
def __getitem__(self, name):
|
||||
return self._attrs[name]
|
||||
|
||||
def keys(self):
|
||||
return self._attrs.keys()
|
||||
|
||||
def has_key(self, name):
|
||||
return self._attrs.has_key(name)
|
||||
|
||||
def get(self, name, alternative=None):
|
||||
return self._attrs.get(name, alternative)
|
||||
|
||||
def copy(self):
|
||||
return self.__class__(self._attrs)
|
||||
|
||||
def items(self):
|
||||
return self._attrs.items()
|
||||
|
||||
def values(self):
|
||||
return self._attrs.values()
|
||||
|
||||
# ===== ATTRIBUTESNSIMPL =====
|
||||
|
||||
class AttributesNSImpl(AttributesImpl):
|
||||
|
||||
def __init__(self, attrs, qnames):
|
||||
"""NS-aware implementation.
|
||||
|
||||
attrs should be of the form {(ns_uri, lname): value, ...}.
|
||||
qnames of the form {(ns_uri, lname): qname, ...}."""
|
||||
self._attrs = attrs
|
||||
self._qnames = qnames
|
||||
|
||||
def getValueByQName(self, name):
|
||||
for (nsname, qname) in self._qnames.items():
|
||||
if qname == name:
|
||||
return self._attrs[nsname]
|
||||
|
||||
raise KeyError, name
|
||||
|
||||
def getNameByQName(self, name):
|
||||
for (nsname, qname) in self._qnames.items():
|
||||
if qname == name:
|
||||
return nsname
|
||||
|
||||
raise KeyError, name
|
||||
|
||||
def getQNameByName(self, name):
|
||||
return self._qnames[name]
|
||||
|
||||
def getQNames(self):
|
||||
return self._qnames.values()
|
||||
|
||||
def copy(self):
|
||||
return self.__class__(self._attrs, self._qnames)
|
||||
|
||||
|
||||
def _test():
|
||||
XMLReader()
|
||||
IncrementalParser()
|
||||
Locator()
|
||||
|
||||
if __name__ == "__main__":
|
||||
_test()
|
Loading…
Reference in New Issue
Block a user