Yet a lot more python files that somehow svn just refused to add

git-svn-id: http://google-refine.googlecode.com/svn/trunk@962 7d457c2a-affb-35e4-300a-418c747d4874
This commit is contained in:
David Huynh 2010-06-14 21:59:17 +00:00
parent 7767536292
commit 3f58d88922
7 changed files with 2680 additions and 0 deletions

View File

@ -0,0 +1,380 @@
# pylint: disable-msg=C0103
#
# backported code from 4Suite with slight modifications, started from r1.89 of
# Ft/Lib/Uri.py, by syt@logilab.fr on 2005-02-09
#
# part if not all of this code should probably move to urlparse (or be used
# to fix some existant functions in this module)
#
#
# Copyright 2004 Fourthought, Inc. (USA).
# Detailed license and copyright information: http://4suite.org/COPYRIGHT
# Project home, documentation, distributions: http://4suite.org/
import os.path
import sys
import re
import urlparse, urllib, urllib2
def UnsplitUriRef(uriRefSeq):
"""should replace urlparse.urlunsplit
Given a sequence as would be produced by SplitUriRef(), assembles and
returns a URI reference as a string.
"""
if not isinstance(uriRefSeq, (tuple, list)):
raise TypeError("sequence expected, got %s" % type(uriRefSeq))
(scheme, authority, path, query, fragment) = uriRefSeq
uri = ''
if scheme is not None:
uri += scheme + ':'
if authority is not None:
uri += '//' + authority
uri += path
if query is not None:
uri += '?' + query
if fragment is not None:
uri += '#' + fragment
return uri
SPLIT_URI_REF_PATTERN = re.compile(r"^(?:(?P<scheme>[^:/?#]+):)?(?://(?P<authority>[^/?#]*))?(?P<path>[^?#]*)(?:\?(?P<query>[^#]*))?(?:#(?P<fragment>.*))?$")
def SplitUriRef(uriref):
"""should replace urlparse.urlsplit
Given a valid URI reference as a string, returns a tuple representing the
generic URI components, as per RFC 2396 appendix B. The tuple's structure
is (scheme, authority, path, query, fragment).
All values will be strings (possibly empty) or None if undefined.
Note that per rfc3986, there is no distinction between a path and
an "opaque part", as there was in RFC 2396.
"""
# the pattern will match every possible string, so it's safe to
# assume there's a groupdict method to call.
g = SPLIT_URI_REF_PATTERN.match(uriref).groupdict()
scheme = g['scheme']
authority = g['authority']
path = g['path']
query = g['query']
fragment = g['fragment']
return (scheme, authority, path, query, fragment)
def Absolutize(uriRef, baseUri):
"""
Resolves a URI reference to absolute form, effecting the result of RFC
3986 section 5. The URI reference is considered to be relative to the
given base URI.
It is the caller's responsibility to ensure that the base URI matches
the absolute-URI syntax rule of RFC 3986, and that its path component
does not contain '.' or '..' segments if the scheme is hierarchical.
Unexpected results may occur otherwise.
This function only conducts a minimal sanity check in order to determine
if relative resolution is possible: it raises a UriException if the base
URI does not have a scheme component. While it is true that the base URI
is irrelevant if the URI reference has a scheme, an exception is raised
in order to signal that the given string does not even come close to
meeting the criteria to be usable as a base URI.
It is the caller's responsibility to make a determination of whether the
URI reference constitutes a "same-document reference", as defined in RFC
2396 or RFC 3986. As per the spec, dereferencing a same-document
reference "should not" involve retrieval of a new representation of the
referenced resource. Note that the two specs have different definitions
of same-document reference: RFC 2396 says it is *only* the cases where the
reference is the empty string, or "#" followed by a fragment; RFC 3986
requires making a comparison of the base URI to the absolute form of the
reference (as is returned by the spec), minus its fragment component,
if any.
This function is similar to urlparse.urljoin() and urllib.basejoin().
Those functions, however, are (as of Python 2.3) outdated, buggy, and/or
designed to produce results acceptable for use with other core Python
libraries, rather than being earnest implementations of the relevant
specs. Their problems are most noticeable in their handling of
same-document references and 'file:' URIs, both being situations that
come up far too often to consider the functions reliable enough for
general use.
"""
# Reasons to avoid using urllib.basejoin() and urlparse.urljoin():
# - Both are partial implementations of long-obsolete specs.
# - Both accept relative URLs as the base, which no spec allows.
# - urllib.basejoin() mishandles the '' and '..' references.
# - If the base URL uses a non-hierarchical or relative path,
# or if the URL scheme is unrecognized, the result is not
# always as expected (partly due to issues in RFC 1808).
# - If the authority component of a 'file' URI is empty,
# the authority component is removed altogether. If it was
# not present, an empty authority component is in the result.
# - '.' and '..' segments are not always collapsed as well as they
# should be (partly due to issues in RFC 1808).
# - Effective Python 2.4, urllib.basejoin() *is* urlparse.urljoin(),
# but urlparse.urljoin() is still based on RFC 1808.
# This procedure is based on the pseudocode in RFC 3986 sec. 5.2.
#
# ensure base URI is absolute
if not baseUri:
raise ValueError('baseUri is required and must be a non empty string')
if not IsAbsolute(baseUri):
raise ValueError('%r is not an absolute URI' % baseUri)
# shortcut for the simplest same-document reference cases
if uriRef == '' or uriRef[0] == '#':
return baseUri.split('#')[0] + uriRef
# ensure a clean slate
tScheme = tAuth = tPath = tQuery = None
# parse the reference into its components
(rScheme, rAuth, rPath, rQuery, rFrag) = SplitUriRef(uriRef)
# if the reference is absolute, eliminate '.' and '..' path segments
# and skip to the end
if rScheme is not None:
tScheme = rScheme
tAuth = rAuth
tPath = RemoveDotSegments(rPath)
tQuery = rQuery
else:
# the base URI's scheme, and possibly more, will be inherited
(bScheme, bAuth, bPath, bQuery, bFrag) = SplitUriRef(baseUri)
# if the reference is a net-path, just eliminate '.' and '..' path
# segments; no other changes needed.
if rAuth is not None:
tAuth = rAuth
tPath = RemoveDotSegments(rPath)
tQuery = rQuery
# if it's not a net-path, we need to inherit pieces of the base URI
else:
# use base URI's path if the reference's path is empty
if not rPath:
tPath = bPath
# use the reference's query, if any, or else the base URI's,
tQuery = rQuery is not None and rQuery or bQuery
# the reference's path is not empty
else:
# just use the reference's path if it's absolute
if rPath[0] == '/':
tPath = RemoveDotSegments(rPath)
# merge the reference's relative path with the base URI's path
else:
if bAuth is not None and not bPath:
tPath = '/' + rPath
else:
tPath = bPath[:bPath.rfind('/')+1] + rPath
tPath = RemoveDotSegments(tPath)
# use the reference's query
tQuery = rQuery
# since the reference isn't a net-path,
# use the authority from the base URI
tAuth = bAuth
# inherit the scheme from the base URI
tScheme = bScheme
# always use the reference's fragment (but no need to define another var)
#tFrag = rFrag
# now compose the target URI (RFC 3986 sec. 5.3)
return UnsplitUriRef((tScheme, tAuth, tPath, tQuery, rFrag))
REG_NAME_HOST_PATTERN = re.compile(r"^(?:(?:[0-9A-Za-z\-_\.!~*'();&=+$,]|(?:%[0-9A-Fa-f]{2}))*)$")
def MakeUrllibSafe(uriRef):
"""
Makes the given RFC 3986-conformant URI reference safe for passing
to legacy urllib functions. The result may not be a valid URI.
As of Python 2.3.3, urllib.urlopen() does not fully support
internationalized domain names, it does not strip fragment components,
and on Windows, it expects file URIs to use '|' instead of ':' in the
path component corresponding to the drivespec. It also relies on
urllib.unquote(), which mishandles unicode arguments. This function
produces a URI reference that will work around these issues, although
the IDN workaround is limited to Python 2.3 only. May raise a
UnicodeEncodeError if the URI reference is Unicode and erroneously
contains non-ASCII characters.
"""
# IDN support requires decoding any percent-encoded octets in the
# host part (if it's a reg-name) of the authority component, and when
# doing DNS lookups, applying IDNA encoding to that string first.
# As of Python 2.3, there is an IDNA codec, and the socket and httplib
# modules accept Unicode strings and apply IDNA encoding automatically
# where necessary. However, urllib.urlopen() has not yet been updated
# to do the same; it raises an exception if you give it a Unicode
# string, and does no conversion on non-Unicode strings, meaning you
# have to give it an IDNA string yourself. We will only support it on
# Python 2.3 and up.
#
# see if host is a reg-name, as opposed to IPv4 or IPv6 addr.
if isinstance(uriRef, unicode):
try:
uriRef = uriRef.encode('us-ascii') # parts of urllib are not unicode safe
except UnicodeError:
raise ValueError("uri %r must consist of ASCII characters." % uriRef)
(scheme, auth, path, query, frag) = urlparse.urlsplit(uriRef)
if auth and auth.find('@') > -1:
userinfo, hostport = auth.split('@')
else:
userinfo = None
hostport = auth
if hostport and hostport.find(':') > -1:
host, port = hostport.split(':')
else:
host = hostport
port = None
if host and REG_NAME_HOST_PATTERN.match(host):
# percent-encoded hostnames will always fail DNS lookups
host = urllib.unquote(host) #PercentDecode(host)
# IDNA-encode if possible.
# We shouldn't do this for schemes that don't need DNS lookup,
# but are there any (that you'd be calling urlopen for)?
if sys.version_info[0:2] >= (2, 3):
if isinstance(host, str):
host = host.decode('utf-8')
host = host.encode('idna')
# reassemble the authority with the new hostname
# (percent-decoded, and possibly IDNA-encoded)
auth = ''
if userinfo:
auth += userinfo + '@'
auth += host
if port:
auth += ':' + port
# On Windows, ensure that '|', not ':', is used in a drivespec.
if os.name == 'nt' and scheme == 'file':
path = path.replace(':', '|', 1)
# Note that we drop fragment, if any. See RFC 3986 sec. 3.5.
uri = urlparse.urlunsplit((scheme, auth, path, query, None))
return uri
def BaseJoin(base, uriRef):
"""
Merges a base URI reference with another URI reference, returning a
new URI reference.
It behaves exactly the same as Absolutize(), except the arguments
are reversed, and it accepts any URI reference (even a relative URI)
as the base URI. If the base has no scheme component, it is
evaluated as if it did, and then the scheme component of the result
is removed from the result, unless the uriRef had a scheme. Thus, if
neither argument has a scheme component, the result won't have one.
This function is named BaseJoin because it is very much like
urllib.basejoin(), but it follows the current rfc3986 algorithms
for path merging, dot segment elimination, and inheritance of query
and fragment components.
WARNING: This function exists for 2 reasons: (1) because of a need
within the 4Suite repository to perform URI reference absolutization
using base URIs that are stored (inappropriately) as absolute paths
in the subjects of statements in the RDF model, and (2) because of
a similar need to interpret relative repo paths in a 4Suite product
setup.xml file as being relative to a path that can be set outside
the document. When these needs go away, this function probably will,
too, so it is not advisable to use it.
"""
if IsAbsolute(base):
return Absolutize(uriRef, base)
else:
dummyscheme = 'basejoin'
res = Absolutize(uriRef, '%s:%s' % (dummyscheme, base))
if IsAbsolute(uriRef):
# scheme will be inherited from uriRef
return res
else:
# no scheme in, no scheme out
return res[len(dummyscheme)+1:]
def RemoveDotSegments(path):
"""
Supports Absolutize() by implementing the remove_dot_segments function
described in RFC 3986 sec. 5.2. It collapses most of the '.' and '..'
segments out of a path without eliminating empty segments. It is intended
to be used during the path merging process and may not give expected
results when used independently. Use NormalizePathSegments() or
NormalizePathSegmentsInUri() if more general normalization is desired.
semi-private because it is not for general use. I've implemented it
using two segment stacks, as alluded to in the spec, rather than the
explicit string-walking algorithm that would be too inefficient. (mbrown)
"""
# return empty string if entire path is just "." or ".."
if path == '.' or path == '..':
return path[0:0] # preserves string type
# remove all "./" or "../" segments at the beginning
while path:
if path[:2] == './':
path = path[2:]
elif path[:3] == '../':
path = path[3:]
else:
break
# We need to keep track of whether there was a leading slash,
# because we're going to drop it in order to prevent our list of
# segments from having an ambiguous empty first item when we call
# split().
leading_slash = 0
if path[:1] == '/':
path = path[1:]
leading_slash = 1
# replace a trailing "/." with just "/"
if path[-2:] == '/.':
path = path[:-1]
# convert the segments into a list and process each segment in
# order from left to right.
segments = path.split('/')
keepers = []
segments.reverse()
while segments:
seg = segments.pop()
# '..' means drop the previous kept segment, if any.
# If none, and if the path is relative, then keep the '..'.
# If the '..' was the last segment, ensure
# that the result ends with '/'.
if seg == '..':
if keepers:
keepers.pop()
elif not leading_slash:
keepers.append(seg)
if not segments:
keepers.append('')
# ignore '.' segments and keep all others, even empty ones
elif seg != '.':
keepers.append(seg)
# reassemble the kept segments
return leading_slash * '/' + '/'.join(keepers)
SCHEME_PATTERN = re.compile(r'([a-zA-Z][a-zA-Z0-9+\-.]*):')
def GetScheme(uriRef):
"""
Obtains, with optimum efficiency, just the scheme from a URI reference.
Returns a string, or if no scheme could be found, returns None.
"""
# Using a regex seems to be the best option. Called 50,000 times on
# different URIs, on a 1.0-GHz PIII with FreeBSD 4.7 and Python
# 2.2.1, this method completed in 0.95s, and 0.05s if there was no
# scheme to find. By comparison,
# urllib.splittype()[0] took 1.5s always;
# Ft.Lib.Uri.SplitUriRef()[0] took 2.5s always;
# urlparse.urlparse()[0] took 3.5s always.
m = SCHEME_PATTERN.match(uriRef)
if m is None:
return None
else:
return m.group(1)
def IsAbsolute(identifier):
"""
Given a string believed to be a URI or URI reference, tests that it is
absolute (as per RFC 2396), not relative -- i.e., that it has a scheme.
"""
# We do it this way to avoid compiling another massive regex.
return GetScheme(identifier) is not None

View File

@ -0,0 +1 @@
"Directory for SAX version 2 drivers."

View File

@ -0,0 +1,333 @@
"""
SAX driver for the Java SAX parsers. Can only be used in Jython.
$Id: drv_javasax.py,v 1.5 2003/01/26 09:08:51 loewis Exp $
"""
# --- Initialization
version = "0.10"
revision = "$Revision: 1.5 $"
import string
from xml.sax import xmlreader, saxutils
from xml.sax.handler import feature_namespaces, feature_namespace_prefixes
from xml.sax import _exceptions
# we only work in jython
import sys
if sys.platform[:4] != "java":
raise _exceptions.SAXReaderNotAvailable("drv_javasax not available in CPython", None)
del sys
# get the necessary Java SAX classes
try:
from org.python.core import FilelikeInputStream
from org.xml.sax.helpers import XMLReaderFactory
from org.xml import sax as javasax
except ImportError:
raise _exceptions.SAXReaderNotAvailable("SAX is not on the classpath", None)
# get some JAXP stuff
try:
from javax.xml.parsers import SAXParserFactory, ParserConfigurationException
factory = SAXParserFactory.newInstance()
jaxp = 1
except ImportError:
jaxp = 0
from java.lang import String
def _wrap_sax_exception(e):
return _exceptions.SAXParseException(e.message,
e.exception,
SimpleLocator(e.columnNumber,
e.lineNumber,
e.publicId,
e.systemId))
class JyErrorHandlerWrapper(javasax.ErrorHandler):
def __init__(self, err_handler):
self._err_handler = err_handler
def error(self, exc):
self._err_handler.error(_wrap_sax_exception(exc))
def fatalError(self, exc):
self._err_handler.fatalError(_wrap_sax_exception(exc))
def warning(self, exc):
self._err_handler.warning(_wrap_sax_exception(exc))
class JyInputSourceWrapper(javasax.InputSource):
def __init__(self, source):
if isinstance(source, str):
javasax.InputSource.__init__(self, source)
elif hasattr(source, "read"):#file like object
f = source
javasax.InputSource.__init__(self, FilelikeInputStream(f))
if hasattr(f, "name"):
self.setSystemId(f.name)
else:#xml.sax.xmlreader.InputSource object
#Use byte stream constructor if possible so that Xerces won't attempt to open
#the url at systemId unless it's really there
if source.getByteStream():
javasax.InputSource.__init__(self,
FilelikeInputStream(source.getByteStream()))
else:
javasax.InputSource.__init__(self)
if source.getSystemId():
self.setSystemId(source.getSystemId())
self.setPublicId(source.getPublicId())
self.setEncoding(source.getEncoding())
class JyEntityResolverWrapper(javasax.EntityResolver):
def __init__(self, entityResolver):
self._resolver = entityResolver
def resolveEntity(self, pubId, sysId):
return JyInputSourceWrapper(self._resolver.resolveEntity(pubId, sysId))
class JyDTDHandlerWrapper(javasax.DTDHandler):
def __init__(self, dtdHandler):
self._handler = dtdHandler
def notationDecl(self, name, publicId, systemId):
self._handler.notationDecl(name, publicId, systemId)
def unparsedEntityDecl(self, name, publicId, systemId, notationName):
self._handler.unparsedEntityDecl(name, publicId, systemId, notationName)
class SimpleLocator(xmlreader.Locator):
def __init__(self, colNum, lineNum, pubId, sysId):
self.colNum = colNum
self.lineNum = lineNum
self.pubId = pubId
self.sysId = sysId
def getColumnNumber(self):
return self.colNum
def getLineNumber(self):
return self.lineNum
def getPublicId(self):
return self.pubId
def getSystemId(self):
return self.sysId
# --- JavaSAXParser
class JavaSAXParser(xmlreader.XMLReader, javasax.ContentHandler):
"SAX driver for the Java SAX parsers."
def __init__(self, jdriver = None):
xmlreader.XMLReader.__init__(self)
self._parser = create_java_parser(jdriver)
self._parser.setFeature(feature_namespaces, 0)
self._parser.setFeature(feature_namespace_prefixes, 0)
self._parser.setContentHandler(self)
self._nsattrs = AttributesNSImpl()
self._attrs = AttributesImpl()
self.setEntityResolver(self.getEntityResolver())
self.setErrorHandler(self.getErrorHandler())
self.setDTDHandler(self.getDTDHandler())
# XMLReader methods
def parse(self, source):
"Parse an XML document from a URL or an InputSource."
self._parser.parse(JyInputSourceWrapper(source))
def getFeature(self, name):
return self._parser.getFeature(name)
def setFeature(self, name, state):
self._parser.setFeature(name, state)
def getProperty(self, name):
return self._parser.getProperty(name)
def setProperty(self, name, value):
self._parser.setProperty(name, value)
def setEntityResolver(self, resolver):
self._parser.entityResolver = JyEntityResolverWrapper(resolver)
xmlreader.XMLReader.setEntityResolver(self, resolver)
def setErrorHandler(self, err_handler):
self._parser.errorHandler = JyErrorHandlerWrapper(err_handler)
xmlreader.XMLReader.setErrorHandler(self, err_handler)
def setDTDHandler(self, dtd_handler):
self._parser.setDTDHandler(JyDTDHandlerWrapper(dtd_handler))
xmlreader.XMLReader.setDTDHandler(self, dtd_handler)
# ContentHandler methods
def setDocumentLocator(self, locator):
self._cont_handler.setDocumentLocator(locator)
def startDocument(self):
self._cont_handler.startDocument()
self._namespaces = self._parser.getFeature(feature_namespaces)
def startElement(self, uri, lname, qname, attrs):
if self._namespaces:
self._nsattrs._attrs = attrs
self._cont_handler.startElementNS((uri or None, lname), qname,
self._nsattrs)
else:
self._attrs._attrs = attrs
self._cont_handler.startElement(qname, self._attrs)
def startPrefixMapping(self, prefix, uri):
self._cont_handler.startPrefixMapping(prefix, uri)
def characters(self, char, start, len):
self._cont_handler.characters(str(String(char, start, len)))
def ignorableWhitespace(self, char, start, len):
self._cont_handler.ignorableWhitespace(str(String(char, start, len)))
def endElement(self, uri, lname, qname):
if self._namespaces:
self._cont_handler.endElementNS((uri or None, lname), qname)
else:
self._cont_handler.endElement(qname)
def endPrefixMapping(self, prefix):
self._cont_handler.endPrefixMapping(prefix)
def endDocument(self):
self._cont_handler.endDocument()
def processingInstruction(self, target, data):
self._cont_handler.processingInstruction(target, data)
class AttributesImpl:
def __init__(self, attrs = None):
self._attrs = attrs
def getLength(self):
return self._attrs.getLength()
def getType(self, name):
return self._attrs.getType(name)
def getValue(self, name):
value = self._attrs.getValue(name)
if value == None:
raise KeyError(name)
return value
def getNames(self):
return [self._attrs.getQName(index) for index in range(len(self))]
def getQNames(self):
return [self._attrs.getQName(index) for index in range(len(self))]
def getValueByQName(self, qname):
idx = self._attrs.getIndex(qname)
if idx == -1:
raise KeyError, qname
return self._attrs.getValue(idx)
def getNameByQName(self, qname):
idx = self._attrs.getIndex(qname)
if idx == -1:
raise KeyError, qname
return qname
def getQNameByName(self, name):
idx = self._attrs.getIndex(name)
if idx == -1:
raise KeyError, name
return name
def __len__(self):
return self._attrs.getLength()
def __getitem__(self, name):
return self.getValue(name)
def keys(self):
return self.getNames()
def copy(self):
return self.__class__(self._attrs)
def items(self):
return [(name, self[name]) for name in self.getNames()]
def values(self):
return map(self.getValue, self.getNames())
def get(self, name, alt=None):
try:
return self.getValue(name)
except KeyError:
return alt
def has_key(self, name):
try:
self.getValue(name)
return True
except KeyError:
return False
# --- AttributesNSImpl
class AttributesNSImpl(AttributesImpl):
def __init__(self, attrs=None):
AttributesImpl.__init__(self, attrs)
def getType(self, name):
return self._attrs.getType(name[0], name[1])
def getValue(self, name):
value = self._attrs.getValue(name[0], name[1])
if value == None:
raise KeyError(name)
return value
def getNames(self):
names = []
for idx in range(len(self)):
names.append((self._attrs.getURI(idx),
self._attrs.getLocalName(idx)))
return names
def getNameByQName(self, qname):
idx = self._attrs.getIndex(qname)
if idx == -1:
raise KeyError, qname
return (self._attrs.getURI(idx), self._attrs.getLocalName(idx))
def getQNameByName(self, name):
idx = self._attrs.getIndex(name[0], name[1])
if idx == -1:
raise KeyError, name
return self._attrs.getQName(idx)
def getQNames(self):
return [self._attrs.getQName(idx) for idx in range(len(self))]
# ---
def create_java_parser(jdriver = None):
try:
if jdriver:
return XMLReaderFactory.createXMLReader(jdriver)
elif jaxp:
return factory.newSAXParser().getXMLReader()
else:
return XMLReaderFactory.createXMLReader()
except ParserConfigurationException, e:
raise _exceptions.SAXReaderNotAvailable(e.getMessage())
except javasax.SAXException, e:
raise _exceptions.SAXReaderNotAvailable(e.getMessage())
def create_parser(jdriver = None):
return JavaSAXParser(jdriver)

View File

@ -0,0 +1,345 @@
"""
This module contains the core classes of version 2.0 of SAX for Python.
This file provides only default classes with absolutely minimum
functionality, from which drivers and applications can be subclassed.
Many of these classes are empty and are included only as documentation
of the interfaces.
$Id: handler.py,v 1.5 2002/02/14 08:09:36 loewis Exp $
"""
version = '2.0beta'
#============================================================================
#
# HANDLER INTERFACES
#
#============================================================================
# ===== ERRORHANDLER =====
class ErrorHandler:
"""Basic interface for SAX error handlers.
If you create an object that implements this interface, then
register the object with your XMLReader, the parser will call the
methods in your object to report all warnings and errors. There
are three levels of errors available: warnings, (possibly)
recoverable errors, and unrecoverable errors. All methods take a
SAXParseException as the only parameter."""
def error(self, exception):
"Handle a recoverable error."
raise exception
def fatalError(self, exception):
"Handle a non-recoverable error."
raise exception
def warning(self, exception):
"Handle a warning."
print exception
# ===== CONTENTHANDLER =====
class ContentHandler:
"""Interface for receiving logical document content events.
This is the main callback interface in SAX, and the one most
important to applications. The order of events in this interface
mirrors the order of the information in the document."""
def __init__(self):
self._locator = None
def setDocumentLocator(self, locator):
"""Called by the parser to give the application a locator for
locating the origin of document events.
SAX parsers are strongly encouraged (though not absolutely
required) to supply a locator: if it does so, it must supply
the locator to the application by invoking this method before
invoking any of the other methods in the DocumentHandler
interface.
The locator allows the application to determine the end
position of any document-related event, even if the parser is
not reporting an error. Typically, the application will use
this information for reporting its own errors (such as
character content that does not match an application's
business rules). The information returned by the locator is
probably not sufficient for use with a search engine.
Note that the locator will return correct information only
during the invocation of the events in this interface. The
application should not attempt to use it at any other time."""
self._locator = locator
def startDocument(self):
"""Receive notification of the beginning of a document.
The SAX parser will invoke this method only once, before any
other methods in this interface or in DTDHandler (except for
setDocumentLocator)."""
def endDocument(self):
"""Receive notification of the end of a document.
The SAX parser will invoke this method only once, and it will
be the last method invoked during the parse. The parser shall
not invoke this method until it has either abandoned parsing
(because of an unrecoverable error) or reached the end of
input."""
def startPrefixMapping(self, prefix, uri):
"""Begin the scope of a prefix-URI Namespace mapping.
The information from this event is not necessary for normal
Namespace processing: the SAX XML reader will automatically
replace prefixes for element and attribute names when the
http://xml.org/sax/features/namespaces feature is true (the
default).
There are cases, however, when applications need to use
prefixes in character data or in attribute values, where they
cannot safely be expanded automatically; the
start/endPrefixMapping event supplies the information to the
application to expand prefixes in those contexts itself, if
necessary.
Note that start/endPrefixMapping events are not guaranteed to
be properly nested relative to each-other: all
startPrefixMapping events will occur before the corresponding
startElement event, and all endPrefixMapping events will occur
after the corresponding endElement event, but their order is
not guaranteed."""
def endPrefixMapping(self, prefix):
"""End the scope of a prefix-URI mapping.
See startPrefixMapping for details. This event will always
occur after the corresponding endElement event, but the order
of endPrefixMapping events is not otherwise guaranteed."""
def startElement(self, name, attrs):
"""Signals the start of an element in non-namespace mode.
The name parameter contains the raw XML 1.0 name of the
element type as a string and the attrs parameter holds an
instance of the Attributes class containing the attributes of
the element."""
def endElement(self, name):
"""Signals the end of an element in non-namespace mode.
The name parameter contains the name of the element type, just
as with the startElement event."""
def startElementNS(self, name, qname, attrs):
"""Signals the start of an element in namespace mode.
The name parameter contains the name of the element type as a
(uri, localname) tuple, the qname parameter the raw XML 1.0
name used in the source document, and the attrs parameter
holds an instance of the Attributes class containing the
attributes of the element.
The uri part of the name tuple is None for elements which have
no namespace."""
def endElementNS(self, name, qname):
"""Signals the end of an element in namespace mode.
The name parameter contains the name of the element type, just
as with the startElementNS event."""
def characters(self, content):
"""Receive notification of character data.
The Parser will call this method to report each chunk of
character data. SAX parsers may return all contiguous
character data in a single chunk, or they may split it into
several chunks; however, all of the characters in any single
event must come from the same external entity so that the
Locator provides useful information."""
def ignorableWhitespace(self, whitespace):
"""Receive notification of ignorable whitespace in element content.
Validating Parsers must use this method to report each chunk
of ignorable whitespace (see the W3C XML 1.0 recommendation,
section 2.10): non-validating parsers may also use this method
if they are capable of parsing and using content models.
SAX parsers may return all contiguous whitespace in a single
chunk, or they may split it into several chunks; however, all
of the characters in any single event must come from the same
external entity, so that the Locator provides useful
information.
The application must not attempt to read from the array
outside of the specified range."""
def processingInstruction(self, target, data):
"""Receive notification of a processing instruction.
The Parser will invoke this method once for each processing
instruction found: note that processing instructions may occur
before or after the main document element.
A SAX parser should never report an XML declaration (XML 1.0,
section 2.8) or a text declaration (XML 1.0, section 4.3.1)
using this method."""
def skippedEntity(self, name):
"""Receive notification of a skipped entity.
The Parser will invoke this method once for each entity
skipped. Non-validating processors may skip entities if they
have not seen the declarations (because, for example, the
entity was declared in an external DTD subset). All processors
may skip external entities, depending on the values of the
http://xml.org/sax/features/external-general-entities and the
http://xml.org/sax/features/external-parameter-entities
properties."""
# ===== DTDHandler =====
class DTDHandler:
"""Handle DTD events.
This interface specifies only those DTD events required for basic
parsing (unparsed entities and attributes)."""
def notationDecl(self, name, publicId, systemId):
"Handle a notation declaration event."
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
"Handle an unparsed entity declaration event."
# ===== ENTITYRESOLVER =====
class EntityResolver:
"""Basic interface for resolving entities. If you create an object
implementing this interface, then register the object with your
Parser, the parser will call the method in your object to
resolve all external entities. Note that DefaultHandler implements
this interface with the default behaviour."""
def resolveEntity(self, publicId, systemId):
"""Resolve the system identifier of an entity and return either
the system identifier to read from as a string, or an InputSource
to read from."""
return systemId
#============================================================================
#
# CORE FEATURES
#
#============================================================================
feature_namespaces = "http://xml.org/sax/features/namespaces"
# true: Perform Namespace processing (default).
# false: Optionally do not perform Namespace processing
# (implies namespace-prefixes).
# access: (parsing) read-only; (not parsing) read/write
feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes"
# true: Report the original prefixed names and attributes used for Namespace
# declarations.
# false: Do not report attributes used for Namespace declarations, and
# optionally do not report original prefixed names (default).
# access: (parsing) read-only; (not parsing) read/write
feature_string_interning = "http://xml.org/sax/features/string-interning"
# true: All element names, prefixes, attribute names, Namespace URIs, and
# local names are interned using the built-in intern function.
# false: Names are not necessarily interned, although they may be (default).
# access: (parsing) read-only; (not parsing) read/write
feature_validation = "http://xml.org/sax/features/validation"
# true: Report all validation errors (implies external-general-entities and
# external-parameter-entities).
# false: Do not report validation errors.
# access: (parsing) read-only; (not parsing) read/write
feature_external_ges = "http://xml.org/sax/features/external-general-entities"
# true: Include all external general (text) entities.
# false: Do not include external general entities.
# access: (parsing) read-only; (not parsing) read/write
feature_external_pes = "http://xml.org/sax/features/external-parameter-entities"
# true: Include all external parameter entities, including the external
# DTD subset.
# false: Do not include any external parameter entities, even the external
# DTD subset.
# access: (parsing) read-only; (not parsing) read/write
all_features = [feature_namespaces,
feature_namespace_prefixes,
feature_string_interning,
feature_validation,
feature_external_ges,
feature_external_pes]
#============================================================================
#
# CORE PROPERTIES
#
#============================================================================
property_lexical_handler = "http://xml.org/sax/properties/lexical-handler"
# data type: xml.sax.sax2lib.LexicalHandler
# description: An optional extension handler for lexical events like comments.
# access: read/write
property_declaration_handler = "http://xml.org/sax/properties/declaration-handler"
# data type: xml.sax.sax2lib.DeclHandler
# description: An optional extension handler for DTD-related events other
# than notations and unparsed entities.
# access: read/write
property_dom_node = "http://xml.org/sax/properties/dom-node"
# data type: org.w3c.dom.Node
# description: When parsing, the current DOM node being visited if this is
# a DOM iterator; when not parsing, the root DOM node for
# iteration.
# access: (parsing) read-only; (not parsing) read/write
property_xml_string = "http://xml.org/sax/properties/xml-string"
# data type: String
# description: The literal string of characters that was the source for
# the current event.
# access: read-only
property_encoding = "http://www.python.org/sax/properties/encoding"
# data type: String
# description: The name of the encoding to assume for input data.
# access: write: set the encoding, e.g. established by a higher-level
# protocol. May change during parsing (e.g. after
# processing a META tag)
# read: return the current encoding (possibly established through
# auto-detection.
# initial value: UTF-8
#
property_interning_dict = "http://www.python.org/sax/properties/interning-dict"
# data type: Dictionary
# description: The dictionary used to intern common strings in the document
# access: write: Request that the parser uses a specific dictionary, to
# allow interning across different documents
# read: return the current interning dictionary, or None
#
all_properties = [property_lexical_handler,
property_dom_node,
property_declaration_handler,
property_xml_string,
property_encoding,
property_interning_dict]

View File

@ -0,0 +1,430 @@
"""
This module contains the core classes of version 2.0 of SAX for Python.
This file provides only default classes with absolutely minimum
functionality, from which drivers and applications can be subclassed.
Many of these classes are empty and are included only as documentation
of the interfaces.
$Id: saxlib.py,v 1.12 2002/05/10 14:49:21 akuchling Exp $
"""
version = '2.0beta'
# A number of interfaces used to live in saxlib, but are now in
# various other modules for Python 2 compatibility. If nobody uses
# them here any longer, the references can be removed
from handler import ErrorHandler, ContentHandler, DTDHandler, EntityResolver
from xmlreader import XMLReader, InputSource, Locator, IncrementalParser
from _exceptions import *
from handler import \
feature_namespaces,\
feature_namespace_prefixes,\
feature_string_interning,\
feature_validation,\
feature_external_ges,\
feature_external_pes,\
all_features,\
property_lexical_handler,\
property_declaration_handler,\
property_dom_node,\
property_xml_string,\
all_properties
#============================================================================
#
# MAIN INTERFACES
#
#============================================================================
# ===== XMLFILTER =====
class XMLFilter(XMLReader):
"""Interface for a SAX2 parser filter.
A parser filter is an XMLReader that gets its events from another
XMLReader (which may in turn also be a filter) rather than from a
primary source like a document or other non-SAX data source.
Filters can modify a stream of events before passing it on to its
handlers."""
def __init__(self, parent = None):
"""Creates a filter instance, allowing applications to set the
parent on instantiation."""
XMLReader.__init__(self)
self._parent = parent
def setParent(self, parent):
"""Sets the parent XMLReader of this filter. The argument may
not be None."""
self._parent = parent
def getParent(self):
"Returns the parent of this filter."
return self._parent
# ===== ATTRIBUTES =====
class Attributes:
"""Interface for a list of XML attributes.
Contains a list of XML attributes, accessible by name."""
def getLength(self):
"Returns the number of attributes in the list."
raise NotImplementedError("This method must be implemented!")
def getType(self, name):
"Returns the type of the attribute with the given name."
raise NotImplementedError("This method must be implemented!")
def getValue(self, name):
"Returns the value of the attribute with the given name."
raise NotImplementedError("This method must be implemented!")
def getValueByQName(self, name):
"""Returns the value of the attribute with the given raw (or
qualified) name."""
raise NotImplementedError("This method must be implemented!")
def getNameByQName(self, name):
"""Returns the namespace name of the attribute with the given
raw (or qualified) name."""
raise NotImplementedError("This method must be implemented!")
def getNames(self):
"""Returns a list of the names of all attributes
in the list."""
raise NotImplementedError("This method must be implemented!")
def getQNames(self):
"""Returns a list of the raw qualified names of all attributes
in the list."""
raise NotImplementedError("This method must be implemented!")
def __len__(self):
"Alias for getLength."
raise NotImplementedError("This method must be implemented!")
def __getitem__(self, name):
"Alias for getValue."
raise NotImplementedError("This method must be implemented!")
def keys(self):
"Returns a list of the attribute names in the list."
raise NotImplementedError("This method must be implemented!")
def has_key(self, name):
"True if the attribute is in the list, false otherwise."
raise NotImplementedError("This method must be implemented!")
def get(self, name, alternative=None):
"""Return the value associated with attribute name; if it is not
available, then return the alternative."""
raise NotImplementedError("This method must be implemented!")
def copy(self):
"Return a copy of the Attributes object."
raise NotImplementedError("This method must be implemented!")
def items(self):
"Return a list of (attribute_name, value) pairs."
raise NotImplementedError("This method must be implemented!")
def values(self):
"Return a list of all attribute values."
raise NotImplementedError("This method must be implemented!")
#============================================================================
#
# HANDLER INTERFACES
#
#============================================================================
# ===== DECLHANDLER =====
class DeclHandler:
"""Optional SAX2 handler for DTD declaration events.
Note that some DTD declarations are already reported through the
DTDHandler interface. All events reported to this handler will
occur between the startDTD and endDTD events of the
LexicalHandler.
To set the DeclHandler for an XMLReader, use the setProperty method
with the identifier http://xml.org/sax/handlers/DeclHandler."""
def attributeDecl(self, elem_name, attr_name, type, value_def, value):
"""Report an attribute type declaration.
Only the first declaration will be reported. The type will be
one of the strings "CDATA", "ID", "IDREF", "IDREFS",
"NMTOKEN", "NMTOKENS", "ENTITY", "ENTITIES", or "NOTATION", or
a list of names (in the case of enumerated definitions).
elem_name is the element type name, attr_name the attribute
type name, type a string representing the attribute type,
value_def a string representing the default declaration
('#IMPLIED', '#REQUIRED', '#FIXED' or None). value is a string
representing the attribute's default value, or None if there
is none."""
def elementDecl(self, elem_name, content_model):
"""Report an element type declaration.
Only the first declaration will be reported.
content_model is the string 'EMPTY', the string 'ANY' or the content
model structure represented as tuple (separator, tokens, modifier)
where separator is the separator in the token list (that is, '|' or
','), tokens is the list of tokens (element type names or tuples
representing parentheses) and modifier is the quantity modifier
('*', '?' or '+')."""
def internalEntityDecl(self, name, value):
"""Report an internal entity declaration.
Only the first declaration of an entity will be reported.
name is the name of the entity. If it is a parameter entity,
the name will begin with '%'. value is the replacement text of
the entity."""
def externalEntityDecl(self, name, public_id, system_id):
"""Report a parsed entity declaration. (Unparsed entities are
reported to the DTDHandler.)
Only the first declaration for each entity will be reported.
name is the name of the entity. If it is a parameter entity,
the name will begin with '%'. public_id and system_id are the
public and system identifiers of the entity. public_id will be
None if none were declared."""
# ===== LEXICALHANDLER =====
class LexicalHandler:
"""Optional SAX2 handler for lexical events.
This handler is used to obtain lexical information about an XML
document, that is, information about how the document was encoded
(as opposed to what it contains, which is reported to the
ContentHandler), such as comments and CDATA marked section
boundaries.
To set the LexicalHandler of an XMLReader, use the setProperty
method with the property identifier
'http://xml.org/sax/handlers/LexicalHandler'. There is no
guarantee that the XMLReader will support or recognize this
property."""
def comment(self, content):
"""Reports a comment anywhere in the document (including the
DTD and outside the document element).
content is a string that holds the contents of the comment."""
def startDTD(self, name, public_id, system_id):
"""Report the start of the DTD declarations, if the document
has an associated DTD.
A startEntity event will be reported before declaration events
from the external DTD subset are reported, and this can be
used to infer from which subset DTD declarations derive.
name is the name of the document element type, public_id the
public identifier of the DTD (or None if none were supplied)
and system_id the system identfier of the external subset (or
None if none were supplied)."""
def endDTD(self):
"Signals the end of DTD declarations."
def startEntity(self, name):
"""Report the beginning of an entity.
The start and end of the document entity is not reported. The
start and end of the external DTD subset is reported with the
pseudo-name '[dtd]'.
Skipped entities will be reported through the skippedEntity
event of the ContentHandler rather than through this event.
name is the name of the entity. If it is a parameter entity,
the name will begin with '%'."""
def endEntity(self, name):
"""Reports the end of an entity. name is the name of the
entity, and follows the same conventions as for
startEntity."""
def startCDATA(self):
"""Reports the beginning of a CDATA marked section.
The contents of the CDATA marked section will be reported
through the characters event."""
def endCDATA(self):
"Reports the end of a CDATA marked section."
#============================================================================
#
# SAX 1.0 COMPATIBILITY CLASSES
# Note that these are all deprecated.
#
#============================================================================
# ===== ATTRIBUTELIST =====
class AttributeList:
"""Interface for an attribute list. This interface provides
information about a list of attributes for an element (only
specified or defaulted attributes will be reported). Note that the
information returned by this object will be valid only during the
scope of the DocumentHandler.startElement callback, and the
attributes will not necessarily be provided in the order declared
or specified."""
def getLength(self):
"Return the number of attributes in list."
def getName(self, i):
"Return the name of an attribute in the list."
def getType(self, i):
"""Return the type of an attribute in the list. (Parameter can be
either integer index or attribute name.)"""
def getValue(self, i):
"""Return the value of an attribute in the list. (Parameter can be
either integer index or attribute name.)"""
def __len__(self):
"Alias for getLength."
def __getitem__(self, key):
"Alias for getName (if key is an integer) and getValue (if string)."
def keys(self):
"Returns a list of the attribute names."
def has_key(self, key):
"True if the attribute is in the list, false otherwise."
def get(self, key, alternative=None):
"""Return the value associated with attribute name; if it is not
available, then return the alternative."""
def copy(self):
"Return a copy of the AttributeList."
def items(self):
"Return a list of (attribute_name,value) pairs."
def values(self):
"Return a list of all attribute values."
# ===== DOCUMENTHANDLER =====
class DocumentHandler:
"""Handle general document events. This is the main client
interface for SAX: it contains callbacks for the most important
document events, such as the start and end of elements. You need
to create an object that implements this interface, and then
register it with the Parser. If you do not want to implement
the entire interface, you can derive a class from HandlerBase,
which implements the default functionality. You can find the
location of any document event using the Locator interface
supplied by setDocumentLocator()."""
def characters(self, ch, start, length):
"Handle a character data event."
def endDocument(self):
"Handle an event for the end of a document."
def endElement(self, name):
"Handle an event for the end of an element."
def ignorableWhitespace(self, ch, start, length):
"Handle an event for ignorable whitespace in element content."
def processingInstruction(self, target, data):
"Handle a processing instruction event."
def setDocumentLocator(self, locator):
"Receive an object for locating the origin of SAX document events."
def startDocument(self):
"Handle an event for the beginning of a document."
def startElement(self, name, atts):
"Handle an event for the beginning of an element."
# ===== HANDLERBASE =====
class HandlerBase(EntityResolver, DTDHandler, DocumentHandler,\
ErrorHandler):
"""Default base class for handlers. This class implements the
default behaviour for four SAX interfaces: EntityResolver,
DTDHandler, DocumentHandler, and ErrorHandler: rather
than implementing those full interfaces, you may simply extend
this class and override the methods that you need. Note that the
use of this class is optional (you are free to implement the
interfaces directly if you wish)."""
# ===== PARSER =====
class Parser:
"""Basic interface for SAX (Simple API for XML) parsers. All SAX
parsers must implement this basic interface: it allows users to
register handlers for different types of events and to initiate a
parse from a URI, a character stream, or a byte stream. SAX
parsers should also implement a zero-argument constructor."""
def __init__(self):
self.doc_handler = DocumentHandler()
self.dtd_handler = DTDHandler()
self.ent_handler = EntityResolver()
self.err_handler = ErrorHandler()
def parse(self, systemId):
"Parse an XML document from a system identifier."
def parseFile(self, fileobj):
"Parse an XML document from a file-like object."
def setDocumentHandler(self, handler):
"Register an object to receive basic document-related events."
self.doc_handler=handler
def setDTDHandler(self, handler):
"Register an object to receive basic DTD-related events."
self.dtd_handler=handler
def setEntityResolver(self, resolver):
"Register an object to resolve external entities."
self.ent_handler=resolver
def setErrorHandler(self, handler):
"Register an object to receive error-message events."
self.err_handler=handler
def setLocale(self, locale):
"""Allow an application to set the locale for errors and warnings.
SAX parsers are not required to provide localisation for errors
and warnings; if they cannot support the requested locale,
however, they must throw a SAX exception. Applications may
request a locale change in the middle of a parse."""
raise SAXNotSupportedException("Locale support not implemented")

View File

@ -0,0 +1,813 @@
"""
A library of useful helper classes to the saxlib classes, for the
convenience of application and driver writers.
$Id: saxutils.py,v 1.37 2005/04/13 14:02:08 syt Exp $
"""
import os, urlparse, urllib2, types
import handler
import xmlreader
import sys, _exceptions, saxlib
from xml.Uri import Absolutize, MakeUrllibSafe,IsAbsolute
try:
_StringTypes = [types.StringType, types.UnicodeType]
except AttributeError: # 1.5 compatibility:UnicodeType not defined
_StringTypes = [types.StringType]
def __dict_replace(s, d):
"""Replace substrings of a string using a dictionary."""
for key, value in d.items():
s = s.replace(key, value)
return s
def escape(data, entities={}):
"""Escape &, <, and > in a string of data.
You can escape other strings of data by passing a dictionary as
the optional entities parameter. The keys and values must all be
strings; each key will be replaced with its corresponding value.
"""
data = data.replace("&", "&amp;")
data = data.replace("<", "&lt;")
data = data.replace(">", "&gt;")
if entities:
data = __dict_replace(data, entities)
return data
def unescape(data, entities={}):
"""Unescape &amp;, &lt;, and &gt; in a string of data.
You can unescape other strings of data by passing a dictionary as
the optional entities parameter. The keys and values must all be
strings; each key will be replaced with its corresponding value.
"""
data = data.replace("&lt;", "<")
data = data.replace("&gt;", ">")
if entities:
data = __dict_replace(data, entities)
# must do ampersand last
return data.replace("&amp;", "&")
def quoteattr(data, entities={}):
"""Escape and quote an attribute value.
Escape &, <, and > in a string of data, then quote it for use as
an attribute value. The \" character will be escaped as well, if
necessary.
You can escape other strings of data by passing a dictionary as
the optional entities parameter. The keys and values must all be
strings; each key will be replaced with its corresponding value.
"""
data = escape(data, entities)
if '"' in data:
if "'" in data:
data = '"%s"' % data.replace('"', "&quot;")
else:
data = "'%s'" % data
else:
data = '"%s"' % data
return data
# --- DefaultHandler
class DefaultHandler(handler.EntityResolver, handler.DTDHandler,
handler.ContentHandler, handler.ErrorHandler):
"""Default base class for SAX2 event handlers. Implements empty
methods for all callback methods, which can be overridden by
application implementors. Replaces the deprecated SAX1 HandlerBase
class."""
# --- Location
class Location:
"""Represents a location in an XML entity. Initialized by being passed
a locator, from which it reads off the current location, which is then
stored internally."""
def __init__(self, locator):
self.__col = locator.getColumnNumber()
self.__line = locator.getLineNumber()
self.__pubid = locator.getPublicId()
self.__sysid = locator.getSystemId()
def getColumnNumber(self):
return self.__col
def getLineNumber(self):
return self.__line
def getPublicId(self):
return self.__pubid
def getSystemId(self):
return self.__sysid
def __str__(self):
if self.__line is None:
line = "?"
else:
line = self.__line
if self.__col is None:
col = "?"
else:
col = self.__col
return "%s:%s:%s" % (
self.__sysid or self.__pubid or "<unknown>",
line, col)
# --- ErrorPrinter
class ErrorPrinter:
"A simple class that just prints error messages to standard out."
def __init__(self, level=0, outfile=sys.stderr):
self._level = level
self._outfile = outfile
def warning(self, exception):
if self._level <= 0:
self._outfile.write("WARNING in %s: %s\n" %
(self.__getpos(exception),
exception.getMessage()))
def error(self, exception):
if self._level <= 1:
self._outfile.write("ERROR in %s: %s\n" %
(self.__getpos(exception),
exception.getMessage()))
def fatalError(self, exception):
if self._level <= 2:
self._outfile.write("FATAL ERROR in %s: %s\n" %
(self.__getpos(exception),
exception.getMessage()))
def __getpos(self, exception):
if isinstance(exception, _exceptions.SAXParseException):
return "%s:%s:%s" % (exception.getSystemId(),
exception.getLineNumber(),
exception.getColumnNumber())
else:
return "<unknown>"
# --- ErrorRaiser
class ErrorRaiser:
"A simple class that just raises the exceptions it is passed."
def __init__(self, level = 0):
self._level = level
def error(self, exception):
if self._level <= 1:
raise exception
def fatalError(self, exception):
if self._level <= 2:
raise exception
def warning(self, exception):
if self._level <= 0:
raise exception
# --- AttributesImpl now lives in xmlreader
from xmlreader import AttributesImpl
# --- XMLGenerator is the SAX2 ContentHandler for writing back XML
import codecs
def _outputwrapper(stream,encoding):
writerclass = codecs.lookup(encoding)[3]
return writerclass(stream)
if hasattr(codecs, "register_error"):
def writetext(stream, text, entities={}):
stream.errors = "xmlcharrefreplace"
stream.write(escape(text, entities))
stream.errors = "strict"
else:
def writetext(stream, text, entities={}):
text = escape(text, entities)
try:
stream.write(text)
except UnicodeError:
for c in text:
try:
stream.write(c)
except UnicodeError:
stream.write("&#%d;" % ord(c))
def writeattr(stream, text):
countdouble = text.count('"')
if countdouble:
countsingle = text.count("'")
if countdouble <= countsingle:
entities = {'"': "&quot;"}
quote = '"'
else:
entities = {"'": "&apos;"}
quote = "'"
else:
entities = {}
quote = '"'
stream.write(quote)
writetext(stream, text, entities)
stream.write(quote)
class XMLGenerator(handler.ContentHandler):
GENERATED_PREFIX = "xml.sax.saxutils.prefix%s"
def __init__(self, out=None, encoding="iso-8859-1"):
if out is None:
import sys
out = sys.stdout
handler.ContentHandler.__init__(self)
self._out = _outputwrapper(out,encoding)
self._ns_contexts = [{}] # contains uri -> prefix dicts
self._current_context = self._ns_contexts[-1]
self._undeclared_ns_maps = []
self._encoding = encoding
self._generated_prefix_ctr = 0
return
# ContentHandler methods
def startDocument(self):
self._out.write('<?xml version="1.0" encoding="%s"?>\n' %
self._encoding)
def startPrefixMapping(self, prefix, uri):
self._ns_contexts.append(self._current_context.copy())
self._current_context[uri] = prefix
self._undeclared_ns_maps.append((prefix, uri))
def endPrefixMapping(self, prefix):
self._current_context = self._ns_contexts[-1]
del self._ns_contexts[-1]
def startElement(self, name, attrs):
self._out.write('<' + name)
for (name, value) in attrs.items():
self._out.write(' %s=' % name)
writeattr(self._out, value)
self._out.write('>')
def endElement(self, name):
self._out.write('</%s>' % name)
def startElementNS(self, name, qname, attrs):
if name[0] is None:
name = name[1]
elif self._current_context[name[0]] is None:
# default namespace
name = name[1]
else:
name = self._current_context[name[0]] + ":" + name[1]
self._out.write('<' + name)
for k,v in self._undeclared_ns_maps:
if k is None:
self._out.write(' xmlns="%s"' % (v or ''))
else:
self._out.write(' xmlns:%s="%s"' % (k,v))
self._undeclared_ns_maps = []
for (name, value) in attrs.items():
if name[0] is None:
name = name[1]
elif self._current_context[name[0]] is None:
# default namespace
#If an attribute has a nsuri but not a prefix, we must
#create a prefix and add a nsdecl
prefix = self.GENERATED_PREFIX % self._generated_prefix_ctr
self._generated_prefix_ctr = self._generated_prefix_ctr + 1
name = prefix + ':' + name[1]
self._out.write(' xmlns:%s=%s' % (prefix, quoteattr(name[0])))
self._current_context[name[0]] = prefix
else:
name = self._current_context[name[0]] + ":" + name[1]
self._out.write(' %s=' % name)
writeattr(self._out, value)
self._out.write('>')
def endElementNS(self, name, qname):
# XXX: if qname is not None, we better use it.
# Python 2.0b2 requires us to use the recorded prefix for
# name[0], though
if name[0] is None:
qname = name[1]
elif self._current_context[name[0]] is None:
qname = name[1]
else:
qname = self._current_context[name[0]] + ":" + name[1]
self._out.write('</%s>' % qname)
def characters(self, content):
writetext(self._out, content)
def ignorableWhitespace(self, content):
self._out.write(content)
def processingInstruction(self, target, data):
self._out.write('<?%s %s?>' % (target, data))
class LexicalXMLGenerator(XMLGenerator, saxlib.LexicalHandler):
"""A XMLGenerator that also supports the LexicalHandler interface"""
def __init__(self, out=None, encoding="iso-8859-1"):
XMLGenerator.__init__(self, out, encoding)
self._in_cdata = 0
def characters(self, content):
if self._in_cdata:
self._out.write(content.replace(']]>', ']]>]]&gt;<![CDATA['))
else:
self._out.write(escape(content))
# LexicalHandler methods
# (we only support the most important ones and inherit the rest)
def startDTD(self, name, public_id, system_id):
self._out.write('<!DOCTYPE %s' % name)
if public_id:
self._out.write(' PUBLIC %s %s' % (
quoteattr(public_id or ""), quoteattr(system_id or "")
))
elif system_id:
self._out.write(' SYSTEM %s' % quoteattr(system_id or ""))
def endDTD(self):
self._out.write('>')
def comment(self, content):
self._out.write('<!--')
self._out.write(content)
self._out.write('-->')
def startCDATA(self):
self._in_cdata = 1
self._out.write('<![CDATA[')
def endCDATA(self):
self._in_cdata = 0
self._out.write(']]>')
# --- ContentGenerator is the SAX1 DocumentHandler for writing back XML
class ContentGenerator(XMLGenerator):
def characters(self, str, start, end):
# In SAX1, characters receives start and end; in SAX2, it receives
# a string. For plain strings, we may want to use a buffer object.
return XMLGenerator.characters(self, str[start:start+end])
# --- XMLFilterImpl
class XMLFilterBase(saxlib.XMLFilter):
"""This class is designed to sit between an XMLReader and the
client application's event handlers. By default, it does nothing
but pass requests up to the reader and events on to the handlers
unmodified, but subclasses can override specific methods to modify
the event stream or the configuration requests as they pass
through."""
# ErrorHandler methods
def error(self, exception):
self._err_handler.error(exception)
def fatalError(self, exception):
self._err_handler.fatalError(exception)
def warning(self, exception):
self._err_handler.warning(exception)
# ContentHandler methods
def setDocumentLocator(self, locator):
self._cont_handler.setDocumentLocator(locator)
def startDocument(self):
self._cont_handler.startDocument()
def endDocument(self):
self._cont_handler.endDocument()
def startPrefixMapping(self, prefix, uri):
self._cont_handler.startPrefixMapping(prefix, uri)
def endPrefixMapping(self, prefix):
self._cont_handler.endPrefixMapping(prefix)
def startElement(self, name, attrs):
self._cont_handler.startElement(name, attrs)
def endElement(self, name):
self._cont_handler.endElement(name)
def startElementNS(self, name, qname, attrs):
self._cont_handler.startElementNS(name, qname, attrs)
def endElementNS(self, name, qname):
self._cont_handler.endElementNS(name, qname)
def characters(self, content):
self._cont_handler.characters(content)
def ignorableWhitespace(self, chars):
self._cont_handler.ignorableWhitespace(chars)
def processingInstruction(self, target, data):
self._cont_handler.processingInstruction(target, data)
def skippedEntity(self, name):
self._cont_handler.skippedEntity(name)
# DTDHandler methods
def notationDecl(self, name, publicId, systemId):
self._dtd_handler.notationDecl(name, publicId, systemId)
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
# EntityResolver methods
def resolveEntity(self, publicId, systemId):
return self._ent_handler.resolveEntity(publicId, systemId)
# XMLReader methods
def parse(self, source):
self._parent.setContentHandler(self)
self._parent.setErrorHandler(self)
self._parent.setEntityResolver(self)
self._parent.setDTDHandler(self)
self._parent.parse(source)
def setLocale(self, locale):
self._parent.setLocale(locale)
def getFeature(self, name):
return self._parent.getFeature(name)
def setFeature(self, name, state):
self._parent.setFeature(name, state)
def getProperty(self, name):
return self._parent.getProperty(name)
def setProperty(self, name, value):
self._parent.setProperty(name, value)
# FIXME: remove this backward compatibility hack when not needed anymore
XMLFilterImpl = XMLFilterBase
# --- BaseIncrementalParser
class BaseIncrementalParser(xmlreader.IncrementalParser):
"""This class implements the parse method of the XMLReader
interface using the feed, close and reset methods of the
IncrementalParser interface as a convenience to SAX 2.0 driver
writers."""
def parse(self, source):
source = prepare_input_source(source)
self.prepareParser(source)
self._cont_handler.startDocument()
# FIXME: what about char-stream?
inf = source.getByteStream()
buffer = inf.read(16384)
while buffer != "":
self.feed(buffer)
buffer = inf.read(16384)
self.close()
self.reset()
self._cont_handler.endDocument()
def prepareParser(self, source):
"""This method is called by the parse implementation to allow
the SAX 2.0 driver to prepare itself for parsing."""
raise NotImplementedError("prepareParser must be overridden!")
# --- Utility functions
def prepare_input_source(source, base = ""):
"""This function takes an InputSource and an optional base URL and
returns a fully resolved InputSource object ready for reading."""
if type(source) in _StringTypes:
source = xmlreader.InputSource(source)
elif hasattr(source, "read"):
f = source
source = xmlreader.InputSource()
source.setByteStream(f)
if hasattr(f, "name"):
source.setSystemId(absolute_system_id(f.name, base))
if source.getByteStream() is None:
sysid = absolute_system_id(source.getSystemId(), base)
source.setSystemId(sysid)
f = urllib2.urlopen(sysid)
source.setByteStream(f)
return source
def absolute_system_id(sysid, base=''):
if os.path.exists(sysid):
sysid = 'file:%s' % os.path.abspath(sysid)
elif base:
sysid = Absolutize(sysid, base)
assert IsAbsolute(sysid)
return MakeUrllibSafe(sysid)
# ===========================================================================
#
# DEPRECATED SAX 1.0 CLASSES
#
# ===========================================================================
# --- AttributeMap
class AttributeMap:
"""An implementation of AttributeList that takes an (attr,val) hash
and uses it to implement the AttributeList interface."""
def __init__(self, map):
self.map=map
def getLength(self):
return len(self.map.keys())
def getName(self, i):
try:
return self.map.keys()[i]
except IndexError,e:
return None
def getType(self, i):
return "CDATA"
def getValue(self, i):
try:
if type(i)==types.IntType:
return self.map[self.getName(i)]
else:
return self.map[i]
except KeyError,e:
return None
def __len__(self):
return len(self.map)
def __getitem__(self, key):
if type(key)==types.IntType:
return self.map.keys()[key]
else:
return self.map[key]
def items(self):
return self.map.items()
def keys(self):
return self.map.keys()
def has_key(self,key):
return self.map.has_key(key)
def get(self, key, alternative=None):
return self.map.get(key, alternative)
def copy(self):
return AttributeMap(self.map.copy())
def values(self):
return self.map.values()
# --- Event broadcasting object
class EventBroadcaster:
"""Takes a list of objects and forwards any method calls received
to all objects in the list. The attribute list holds the list and
can freely be modified by clients."""
class Event:
"Helper objects that represent event methods."
def __init__(self,list,name):
self.list=list
self.name=name
def __call__(self,*rest):
for obj in self.list:
apply(getattr(obj,self.name), rest)
def __init__(self,list):
self.list=list
def __getattr__(self,name):
return self.Event(self.list,name)
def __repr__(self):
return "<EventBroadcaster instance at %d>" % id(self)
# --- ESIS document handler
import saxlib
class ESISDocHandler(saxlib.HandlerBase):
"A SAX document handler that produces naive ESIS output."
def __init__(self,writer=sys.stdout):
self.writer=writer
def processingInstruction (self,target, remainder):
"""Receive an event signalling that a processing instruction
has been found."""
self.writer.write("?"+target+" "+remainder+"\n")
def startElement(self,name,amap):
"Receive an event signalling the start of an element."
self.writer.write("("+name+"\n")
for a_name in amap.keys():
self.writer.write("A"+a_name+" "+amap[a_name]+"\n")
def endElement(self,name):
"Receive an event signalling the end of an element."
self.writer.write(")"+name+"\n")
def characters(self,data,start_ix,length):
"Receive an event signalling that character data has been found."
self.writer.write("-"+data[start_ix:start_ix+length]+"\n")
# --- XML canonizer
class Canonizer(saxlib.HandlerBase):
"A SAX document handler that produces canonized XML output."
def __init__(self,writer=sys.stdout):
self.elem_level=0
self.writer=writer
def processingInstruction (self,target, remainder):
if not target=="xml":
self.writer.write("<?"+target+" "+remainder+"?>")
def startElement(self,name,amap):
self.writer.write("<"+name)
a_names=amap.keys()
a_names.sort()
for a_name in a_names:
self.writer.write(" "+a_name+"=\"")
self.write_data(amap[a_name])
self.writer.write("\"")
self.writer.write(">")
self.elem_level=self.elem_level+1
def endElement(self,name):
self.writer.write("</"+name+">")
self.elem_level=self.elem_level-1
def ignorableWhitespace(self,data,start_ix,length):
self.characters(data,start_ix,length)
def characters(self,data,start_ix,length):
if self.elem_level>0:
self.write_data(data[start_ix:start_ix+length])
def write_data(self,data):
"Writes datachars to writer."
data=data.replace("&","&amp;")
data=data.replace("<","&lt;")
data=data.replace("\"","&quot;")
data=data.replace(">","&gt;")
data=data.replace(chr(9),"&#9;")
data=data.replace(chr(10),"&#10;")
data=data.replace(chr(13),"&#13;")
self.writer.write(data)
# --- mllib
class mllib:
"""A re-implementation of the htmllib, sgmllib and xmllib interfaces as a
SAX DocumentHandler."""
# Unsupported:
# - setnomoretags
# - setliteral
# - translate_references
# - handle_xml
# - handle_doctype
# - handle_charref
# - handle_entityref
# - handle_comment
# - handle_cdata
# - tag_attributes
def __init__(self):
self.reset()
def reset(self):
import saxexts # only used here
self.parser=saxexts.XMLParserFactory.make_parser()
self.handler=mllib.Handler(self.parser,self)
self.handler.reset()
def feed(self,data):
self.parser.feed(data)
def close(self):
self.parser.close()
def get_stack(self):
return self.handler.get_stack()
# --- Handler methods (to be overridden)
def handle_starttag(self,name,method,atts):
method(atts)
def handle_endtag(self,name,method):
method()
def handle_data(self,data):
pass
def handle_proc(self,target,data):
pass
def unknown_starttag(self,name,atts):
pass
def unknown_endtag(self,name):
pass
def syntax_error(self,message):
pass
# --- The internal handler class
class Handler(saxlib.DocumentHandler,saxlib.ErrorHandler):
"""An internal class to handle SAX events and translate them to mllib
events."""
def __init__(self,driver,handler):
self.driver=driver
self.driver.setDocumentHandler(self)
self.driver.setErrorHandler(self)
self.handler=handler
self.reset()
def get_stack(self):
return self.stack
def reset(self):
self.stack=[]
# --- DocumentHandler methods
def characters(self, ch, start, length):
self.handler.handle_data(ch[start:start+length])
def endElement(self, name):
if hasattr(self.handler,"end_"+name):
self.handler.handle_endtag(name,
getattr(self.handler,"end_"+name))
else:
self.handler.unknown_endtag(name)
del self.stack[-1]
def ignorableWhitespace(self, ch, start, length):
self.handler.handle_data(ch[start:start+length])
def processingInstruction(self, target, data):
self.handler.handle_proc(target,data)
def startElement(self, name, atts):
self.stack.append(name)
if hasattr(self.handler,"start_"+name):
self.handler.handle_starttag(name,
getattr(self.handler,
"start_"+name),
atts)
else:
self.handler.unknown_starttag(name,atts)
# --- ErrorHandler methods
def error(self, exception):
self.handler.syntax_error(str(exception))
def fatalError(self, exception):
raise RuntimeError(str(exception))

View File

@ -0,0 +1,378 @@
"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers
should be based on this code. """
import handler
from _exceptions import SAXNotSupportedException, SAXNotRecognizedException
# ===== XMLREADER =====
class XMLReader:
"""Interface for reading an XML document using callbacks.
XMLReader is the interface that an XML parser's SAX2 driver must
implement. This interface allows an application to set and query
features and properties in the parser, to register event handlers
for document processing, and to initiate a document parse.
All SAX interfaces are assumed to be synchronous: the parse
methods must not return until parsing is complete, and readers
must wait for an event-handler callback to return before reporting
the next event."""
def __init__(self):
self._cont_handler = handler.ContentHandler()
self._dtd_handler = handler.DTDHandler()
self._ent_handler = handler.EntityResolver()
self._err_handler = handler.ErrorHandler()
def parse(self, source):
"Parse an XML document from a system identifier or an InputSource."
raise NotImplementedError("This method must be implemented!")
def getContentHandler(self):
"Returns the current ContentHandler."
return self._cont_handler
def setContentHandler(self, handler):
"Registers a new object to receive document content events."
self._cont_handler = handler
def getDTDHandler(self):
"Returns the current DTD handler."
return self._dtd_handler
def setDTDHandler(self, handler):
"Register an object to receive basic DTD-related events."
self._dtd_handler = handler
def getEntityResolver(self):
"Returns the current EntityResolver."
return self._ent_handler
def setEntityResolver(self, resolver):
"Register an object to resolve external entities."
self._ent_handler = resolver
def getErrorHandler(self):
"Returns the current ErrorHandler."
return self._err_handler
def setErrorHandler(self, handler):
"Register an object to receive error-message events."
self._err_handler = handler
def setLocale(self, locale):
"""Allow an application to set the locale for errors and warnings.
SAX parsers are not required to provide localization for errors
and warnings; if they cannot support the requested locale,
however, they must throw a SAX exception. Applications may
request a locale change in the middle of a parse."""
raise SAXNotSupportedException("Locale support not implemented")
def getFeature(self, name):
"Looks up and returns the state of a SAX2 feature."
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
def setFeature(self, name, state):
"Sets the state of a SAX2 feature."
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
def getProperty(self, name):
"Looks up and returns the value of a SAX2 property."
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
def setProperty(self, name, value):
"Sets the value of a SAX2 property."
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
class IncrementalParser(XMLReader):
"""This interface adds three extra methods to the XMLReader
interface that allow XML parsers to support incremental
parsing. Support for this interface is optional, since not all
underlying XML parsers support this functionality.
When the parser is instantiated it is ready to begin accepting
data from the feed method immediately. After parsing has been
finished with a call to close the reset method must be called to
make the parser ready to accept new data, either from feed or
using the parse method.
Note that these methods must _not_ be called during parsing, that
is, after parse has been called and before it returns.
By default, the class also implements the parse method of the XMLReader
interface using the feed, close and reset methods of the
IncrementalParser interface as a convenience to SAX 2.0 driver
writers."""
def __init__(self, bufsize=2**16):
self._bufsize = bufsize
XMLReader.__init__(self)
def parse(self, source):
import saxutils
source = saxutils.prepare_input_source(source)
self.prepareParser(source)
file = source.getByteStream()
buffer = file.read(self._bufsize)
while buffer != "":
self.feed(buffer)
buffer = file.read(self._bufsize)
self.close()
def feed(self, data):
"""This method gives the raw XML data in the data parameter to
the parser and makes it parse the data, emitting the
corresponding events. It is allowed for XML constructs to be
split across several calls to feed.
feed may raise SAXException."""
raise NotImplementedError("This method must be implemented!")
def prepareParser(self, source):
"""This method is called by the parse implementation to allow
the SAX 2.0 driver to prepare itself for parsing."""
raise NotImplementedError("prepareParser must be overridden!")
def close(self):
"""This method is called when the entire XML document has been
passed to the parser through the feed method, to notify the
parser that there are no more data. This allows the parser to
do the final checks on the document and empty the internal
data buffer.
The parser will not be ready to parse another document until
the reset method has been called.
close may raise SAXException."""
raise NotImplementedError("This method must be implemented!")
def reset(self):
"""This method is called after close has been called to reset
the parser so that it is ready to parse new documents. The
results of calling parse or feed after close without calling
reset are undefined."""
raise NotImplementedError("This method must be implemented!")
# ===== LOCATOR =====
class Locator:
"""Interface for associating a SAX event with a document
location. A locator object will return valid results only during
calls to DocumentHandler methods; at any other time, the
results are unpredictable."""
def getColumnNumber(self):
"Return the column number where the current event ends."
return -1
def getLineNumber(self):
"Return the line number where the current event ends."
return -1
def getPublicId(self):
"Return the public identifier for the current event."
return None
def getSystemId(self):
"Return the system identifier for the current event."
return None
# ===== INPUTSOURCE =====
class InputSource:
"""Encapsulation of the information needed by the XMLReader to
read entities.
This class may include information about the public identifier,
system identifier, byte stream (possibly with character encoding
information) and/or the character stream of an entity.
Applications will create objects of this class for use in the
XMLReader.parse method and for returning from
EntityResolver.resolveEntity.
An InputSource belongs to the application, the XMLReader is not
allowed to modify InputSource objects passed to it from the
application, although it may make copies and modify those."""
def __init__(self, system_id = None):
self.__system_id = system_id
self.__public_id = None
self.__encoding = None
self.__bytefile = None
self.__charfile = None
def setPublicId(self, public_id):
"Sets the public identifier of this InputSource."
self.__public_id = public_id
def getPublicId(self):
"Returns the public identifier of this InputSource."
return self.__public_id
def setSystemId(self, system_id):
"Sets the system identifier of this InputSource."
self.__system_id = system_id
def getSystemId(self):
"Returns the system identifier of this InputSource."
return self.__system_id
def setEncoding(self, encoding):
"""Sets the character encoding of this InputSource.
The encoding must be a string acceptable for an XML encoding
declaration (see section 4.3.3 of the XML recommendation).
The encoding attribute of the InputSource is ignored if the
InputSource also contains a character stream."""
self.__encoding = encoding
def getEncoding(self):
"Get the character encoding of this InputSource."
return self.__encoding
def setByteStream(self, bytefile):
"""Set the byte stream (a Python file-like object which does
not perform byte-to-character conversion) for this input
source.
The SAX parser will ignore this if there is also a character
stream specified, but it will use a byte stream in preference
to opening a URI connection itself.
If the application knows the character encoding of the byte
stream, it should set it with the setEncoding method."""
self.__bytefile = bytefile
def getByteStream(self):
"""Get the byte stream for this input source.
The getEncoding method will return the character encoding for
this byte stream, or None if unknown."""
return self.__bytefile
def setCharacterStream(self, charfile):
"""Set the character stream for this input source. (The stream
must be a Python 2.0 Unicode-wrapped file-like that performs
conversion to Unicode strings.)
If there is a character stream specified, the SAX parser will
ignore any byte stream and will not attempt to open a URI
connection to the system identifier."""
self.__charfile = charfile
def getCharacterStream(self):
"Get the character stream for this input source."
return self.__charfile
# ===== ATTRIBUTESIMPL =====
class AttributesImpl:
def __init__(self, attrs):
"""Non-NS-aware implementation.
attrs should be of the form {name : value}."""
self._attrs = attrs
def getLength(self):
return len(self._attrs)
def getType(self, name):
return "CDATA"
def getValue(self, name):
return self._attrs[name]
def getValueByQName(self, name):
return self._attrs[name]
def getNameByQName(self, name):
if not self._attrs.has_key(name):
raise KeyError, name
return name
def getQNameByName(self, name):
if not self._attrs.has_key(name):
raise KeyError, name
return name
def getNames(self):
return self._attrs.keys()
def getQNames(self):
return self._attrs.keys()
def __len__(self):
return len(self._attrs)
def __getitem__(self, name):
return self._attrs[name]
def keys(self):
return self._attrs.keys()
def has_key(self, name):
return self._attrs.has_key(name)
def get(self, name, alternative=None):
return self._attrs.get(name, alternative)
def copy(self):
return self.__class__(self._attrs)
def items(self):
return self._attrs.items()
def values(self):
return self._attrs.values()
# ===== ATTRIBUTESNSIMPL =====
class AttributesNSImpl(AttributesImpl):
def __init__(self, attrs, qnames):
"""NS-aware implementation.
attrs should be of the form {(ns_uri, lname): value, ...}.
qnames of the form {(ns_uri, lname): qname, ...}."""
self._attrs = attrs
self._qnames = qnames
def getValueByQName(self, name):
for (nsname, qname) in self._qnames.items():
if qname == name:
return self._attrs[nsname]
raise KeyError, name
def getNameByQName(self, name):
for (nsname, qname) in self._qnames.items():
if qname == name:
return nsname
raise KeyError, name
def getQNameByName(self, name):
return self._qnames[name]
def getQNames(self):
return self._qnames.values()
def copy(self):
return self.__class__(self._attrs, self._qnames)
def _test():
XMLReader()
IncrementalParser()
Locator()
if __name__ == "__main__":
_test()