Yet a lot more python files that somehow svn just refused to add

git-svn-id: http://google-refine.googlecode.com/svn/trunk@962 7d457c2a-affb-35e4-300a-418c747d4874
2010-06-14 21:59:17 +00:00 · 2010-06-14 21:59:17 +00:00 · 3f58d88922
commit 3f58d88922
parent 7767536292
7 changed files with 2680 additions and 0 deletions
--- a/extensions/jython/module/MOD-INF/lib/jython/xml/Uri.py
+++ b/extensions/jython/module/MOD-INF/lib/jython/xml/Uri.py
@ -0,0 +1,380 @@
+# pylint: disable-msg=C0103
+#
+# backported code from 4Suite with slight modifications, started from r1.89 of
+# Ft/Lib/Uri.py, by syt@logilab.fr on 2005-02-09
+#
+# part if not all of this code should probably move to urlparse (or be used
+# to fix some existant functions in this module)
+#
+#
+# Copyright 2004 Fourthought, Inc. (USA).
+# Detailed license and copyright information: http://4suite.org/COPYRIGHT
+# Project home, documentation, distributions: http://4suite.org/
+import os.path
+import sys
+import re
+import urlparse, urllib, urllib2
+
+def UnsplitUriRef(uriRefSeq):
+    """should replace urlparse.urlunsplit
+
+    Given a sequence as would be produced by SplitUriRef(), assembles and
+    returns a URI reference as a string.
+    """
+    if not isinstance(uriRefSeq, (tuple, list)):
+        raise TypeError("sequence expected, got %s" % type(uriRefSeq))
+    (scheme, authority, path, query, fragment) = uriRefSeq
+    uri = ''
+    if scheme is not None:
+        uri += scheme + ':'
+    if authority is not None:
+        uri += '//' + authority
+    uri += path
+    if query is not None:
+        uri += '?' + query
+    if fragment is not None:
+        uri += '#' + fragment
+    return uri
+
+SPLIT_URI_REF_PATTERN = re.compile(r"^(?:(?P<scheme>[^:/?#]+):)?(?://(?P<authority>[^/?#]*))?(?P<path>[^?#]*)(?:\?(?P<query>[^#]*))?(?:#(?P<fragment>.*))?$")
+
+def SplitUriRef(uriref):
+    """should replace urlparse.urlsplit
+
+    Given a valid URI reference as a string, returns a tuple representing the
+    generic URI components, as per RFC 2396 appendix B. The tuple's structure
+    is (scheme, authority, path, query, fragment).
+
+    All values will be strings (possibly empty) or None if undefined.
+
+    Note that per rfc3986, there is no distinction between a path and
+    an "opaque part", as there was in RFC 2396.
+    """
+    # the pattern will match every possible string, so it's safe to
+    # assume there's a groupdict method to call.
+    g = SPLIT_URI_REF_PATTERN.match(uriref).groupdict()
+    scheme      = g['scheme']
+    authority   = g['authority']
+    path        = g['path']
+    query       = g['query']
+    fragment    = g['fragment']
+    return (scheme, authority, path, query, fragment)
+
+
+def Absolutize(uriRef, baseUri):
+    """
+    Resolves a URI reference to absolute form, effecting the result of RFC
+    3986 section 5. The URI reference is considered to be relative to the
+    given base URI.
+
+    It is the caller's responsibility to ensure that the base URI matches
+    the absolute-URI syntax rule of RFC 3986, and that its path component
+    does not contain '.' or '..' segments if the scheme is hierarchical.
+    Unexpected results may occur otherwise.
+
+    This function only conducts a minimal sanity check in order to determine
+    if relative resolution is possible: it raises a UriException if the base
+    URI does not have a scheme component. While it is true that the base URI
+    is irrelevant if the URI reference has a scheme, an exception is raised
+    in order to signal that the given string does not even come close to
+    meeting the criteria to be usable as a base URI.
+
+    It is the caller's responsibility to make a determination of whether the
+    URI reference constitutes a "same-document reference", as defined in RFC
+    2396 or RFC 3986. As per the spec, dereferencing a same-document
+    reference "should not" involve retrieval of a new representation of the
+    referenced resource. Note that the two specs have different definitions
+    of same-document reference: RFC 2396 says it is *only* the cases where the
+    reference is the empty string, or "#" followed by a fragment; RFC 3986
+    requires making a comparison of the base URI to the absolute form of the
+    reference (as is returned by the spec), minus its fragment component,
+    if any.
+
+    This function is similar to urlparse.urljoin() and urllib.basejoin().
+    Those functions, however, are (as of Python 2.3) outdated, buggy, and/or
+    designed to produce results acceptable for use with other core Python
+    libraries, rather than being earnest implementations of the relevant
+    specs. Their problems are most noticeable in their handling of
+    same-document references and 'file:' URIs, both being situations that
+    come up far too often to consider the functions reliable enough for
+    general use.
+    """
+    # Reasons to avoid using urllib.basejoin() and urlparse.urljoin():
+    # - Both are partial implementations of long-obsolete specs.
+    # - Both accept relative URLs as the base, which no spec allows.
+    # - urllib.basejoin() mishandles the '' and '..' references.
+    # - If the base URL uses a non-hierarchical or relative path,
+    #    or if the URL scheme is unrecognized, the result is not
+    #    always as expected (partly due to issues in RFC 1808).
+    # - If the authority component of a 'file' URI is empty,
+    #    the authority component is removed altogether. If it was
+    #    not present, an empty authority component is in the result.
+    # - '.' and '..' segments are not always collapsed as well as they
+    #    should be (partly due to issues in RFC 1808).
+    # - Effective Python 2.4, urllib.basejoin() *is* urlparse.urljoin(),
+    #    but urlparse.urljoin() is still based on RFC 1808.
+
+    # This procedure is based on the pseudocode in RFC 3986 sec. 5.2.
+    #
+    # ensure base URI is absolute
+    if not baseUri:
+        raise ValueError('baseUri is required and must be a non empty string')
+    if not IsAbsolute(baseUri):
+        raise ValueError('%r is not an absolute URI' % baseUri)
+    # shortcut for the simplest same-document reference cases
+    if uriRef == '' or uriRef[0] == '#':
+        return baseUri.split('#')[0] + uriRef
+    # ensure a clean slate
+    tScheme = tAuth = tPath = tQuery = None
+    # parse the reference into its components
+    (rScheme, rAuth, rPath, rQuery, rFrag) = SplitUriRef(uriRef)
+    # if the reference is absolute, eliminate '.' and '..' path segments
+    # and skip to the end
+    if rScheme is not None:
+        tScheme = rScheme
+        tAuth = rAuth
+        tPath = RemoveDotSegments(rPath)
+        tQuery = rQuery
+    else:
+        # the base URI's scheme, and possibly more, will be inherited
+        (bScheme, bAuth, bPath, bQuery, bFrag) = SplitUriRef(baseUri)
+        # if the reference is a net-path, just eliminate '.' and '..' path
+        # segments; no other changes needed.
+        if rAuth is not None:
+            tAuth = rAuth
+            tPath = RemoveDotSegments(rPath)
+            tQuery = rQuery
+        # if it's not a net-path, we need to inherit pieces of the base URI
+        else:
+            # use base URI's path if the reference's path is empty
+            if not rPath:
+                tPath = bPath
+                # use the reference's query, if any, or else the base URI's,
+                tQuery = rQuery is not None and rQuery or bQuery
+            # the reference's path is not empty
+            else:
+                # just use the reference's path if it's absolute
+                if rPath[0] == '/':
+                    tPath = RemoveDotSegments(rPath)
+                # merge the reference's relative path with the base URI's path
+                else:
+                    if bAuth is not None and not bPath:
+                        tPath = '/' + rPath
+                    else:
+                        tPath = bPath[:bPath.rfind('/')+1] + rPath
+                    tPath = RemoveDotSegments(tPath)
+                # use the reference's query
+                tQuery = rQuery
+            # since the reference isn't a net-path,
+            # use the authority from the base URI
+            tAuth = bAuth
+        # inherit the scheme from the base URI
+        tScheme = bScheme
+    # always use the reference's fragment (but no need to define another var)
+    #tFrag = rFrag
+
+    # now compose the target URI (RFC 3986 sec. 5.3)
+    return UnsplitUriRef((tScheme, tAuth, tPath, tQuery, rFrag))
+
+
+REG_NAME_HOST_PATTERN = re.compile(r"^(?:(?:[0-9A-Za-z\-_\.!~*'();&=+$,]|(?:%[0-9A-Fa-f]{2}))*)$")
+
+def MakeUrllibSafe(uriRef):
+    """
+    Makes the given RFC 3986-conformant URI reference safe for passing
+    to legacy urllib functions. The result may not be a valid URI.
+
+    As of Python 2.3.3, urllib.urlopen() does not fully support
+    internationalized domain names, it does not strip fragment components,
+    and on Windows, it expects file URIs to use '|' instead of ':' in the
+    path component corresponding to the drivespec. It also relies on
+    urllib.unquote(), which mishandles unicode arguments. This function
+    produces a URI reference that will work around these issues, although
+    the IDN workaround is limited to Python 2.3 only. May raise a
+    UnicodeEncodeError if the URI reference is Unicode and erroneously
+    contains non-ASCII characters.
+    """
+    # IDN support requires decoding any percent-encoded octets in the
+    # host part (if it's a reg-name) of the authority component, and when
+    # doing DNS lookups, applying IDNA encoding to that string first.
+    # As of Python 2.3, there is an IDNA codec, and the socket and httplib
+    # modules accept Unicode strings and apply IDNA encoding automatically
+    # where necessary. However, urllib.urlopen() has not yet been updated
+    # to do the same; it raises an exception if you give it a Unicode
+    # string, and does no conversion on non-Unicode strings, meaning you
+    # have to give it an IDNA string yourself. We will only support it on
+    # Python 2.3 and up.
+    #
+    # see if host is a reg-name, as opposed to IPv4 or IPv6 addr.
+    if isinstance(uriRef, unicode):
+        try:
+            uriRef = uriRef.encode('us-ascii') # parts of urllib are not unicode safe
+        except UnicodeError:
+            raise ValueError("uri %r must consist of ASCII characters." % uriRef)
+    (scheme, auth, path, query, frag) = urlparse.urlsplit(uriRef)
+    if auth and auth.find('@') > -1:
+        userinfo, hostport = auth.split('@')
+    else:
+        userinfo = None
+        hostport = auth
+    if hostport and hostport.find(':') > -1:
+        host, port = hostport.split(':')
+    else:
+        host = hostport
+        port = None
+    if host and REG_NAME_HOST_PATTERN.match(host):
+        # percent-encoded hostnames will always fail DNS lookups
+        host = urllib.unquote(host) #PercentDecode(host)
+        # IDNA-encode if possible.
+        # We shouldn't do this for schemes that don't need DNS lookup,
+        # but are there any (that you'd be calling urlopen for)?
+        if sys.version_info[0:2] >= (2, 3):
+            if isinstance(host, str):
+                host = host.decode('utf-8')
+            host = host.encode('idna')
+        # reassemble the authority with the new hostname
+        # (percent-decoded, and possibly IDNA-encoded)
+        auth = ''
+        if userinfo:
+            auth += userinfo + '@'
+        auth += host
+        if port:
+            auth += ':' + port
+
+    # On Windows, ensure that '|', not ':', is used in a drivespec.
+    if os.name == 'nt' and scheme == 'file':
+        path = path.replace(':', '|', 1)
+
+    # Note that we drop fragment, if any. See RFC 3986 sec. 3.5.
+    uri = urlparse.urlunsplit((scheme, auth, path, query, None))
+
+    return uri
+
+
+
+def BaseJoin(base, uriRef):
+    """
+    Merges a base URI reference with another URI reference, returning a
+    new URI reference.
+
+    It behaves exactly the same as Absolutize(), except the arguments
+    are reversed, and it accepts any URI reference (even a relative URI)
+    as the base URI. If the base has no scheme component, it is
+    evaluated as if it did, and then the scheme component of the result
+    is removed from the result, unless the uriRef had a scheme. Thus, if
+    neither argument has a scheme component, the result won't have one.
+
+    This function is named BaseJoin because it is very much like
+    urllib.basejoin(), but it follows the current rfc3986 algorithms
+    for path merging, dot segment elimination, and inheritance of query
+    and fragment components.
+
+    WARNING: This function exists for 2 reasons: (1) because of a need
+    within the 4Suite repository to perform URI reference absolutization
+    using base URIs that are stored (inappropriately) as absolute paths
+    in the subjects of statements in the RDF model, and (2) because of
+    a similar need to interpret relative repo paths in a 4Suite product
+    setup.xml file as being relative to a path that can be set outside
+    the document. When these needs go away, this function probably will,
+    too, so it is not advisable to use it.
+    """
+    if IsAbsolute(base):
+        return Absolutize(uriRef, base)
+    else:
+        dummyscheme = 'basejoin'
+        res = Absolutize(uriRef, '%s:%s' % (dummyscheme, base))
+        if IsAbsolute(uriRef):
+            # scheme will be inherited from uriRef
+            return res
+        else:
+            # no scheme in, no scheme out
+            return res[len(dummyscheme)+1:]
+
+
+def RemoveDotSegments(path):
+    """
+    Supports Absolutize() by implementing the remove_dot_segments function
+    described in RFC 3986 sec. 5.2.  It collapses most of the '.' and '..'
+    segments out of a path without eliminating empty segments. It is intended
+    to be used during the path merging process and may not give expected
+    results when used independently. Use NormalizePathSegments() or
+    NormalizePathSegmentsInUri() if more general normalization is desired.
+
+    semi-private because it is not for general use. I've implemented it
+    using two segment stacks, as alluded to in the spec, rather than the
+    explicit string-walking algorithm that would be too inefficient. (mbrown)
+    """
+    # return empty string if entire path is just "." or ".."
+    if path == '.' or path == '..':
+        return path[0:0] # preserves string type
+    # remove all "./" or "../" segments at the beginning
+    while path:
+        if path[:2] == './':
+            path = path[2:]
+        elif path[:3] == '../':
+            path = path[3:]
+        else:
+            break
+    # We need to keep track of whether there was a leading slash,
+    # because we're going to drop it in order to prevent our list of
+    # segments from having an ambiguous empty first item when we call
+    # split().
+    leading_slash = 0
+    if path[:1] == '/':
+        path = path[1:]
+        leading_slash = 1
+    # replace a trailing "/." with just "/"
+    if path[-2:] == '/.':
+        path = path[:-1]
+    # convert the segments into a list and process each segment in
+    # order from left to right.
+    segments = path.split('/')
+    keepers = []
+    segments.reverse()
+    while segments:
+        seg = segments.pop()
+        # '..' means drop the previous kept segment, if any.
+        # If none, and if the path is relative, then keep the '..'.
+        # If the '..' was the last segment, ensure
+        # that the result ends with '/'.
+        if seg == '..':
+            if keepers:
+                keepers.pop()
+            elif not leading_slash:
+                keepers.append(seg)
+            if not segments:
+                keepers.append('')
+        # ignore '.' segments and keep all others, even empty ones
+        elif seg != '.':
+            keepers.append(seg)
+    # reassemble the kept segments
+    return leading_slash * '/' + '/'.join(keepers)
+
+
+SCHEME_PATTERN = re.compile(r'([a-zA-Z][a-zA-Z0-9+\-.]*):')
+def GetScheme(uriRef):
+    """
+    Obtains, with optimum efficiency, just the scheme from a URI reference.
+    Returns a string, or if no scheme could be found, returns None.
+    """
+    # Using a regex seems to be the best option. Called 50,000 times on
+    # different URIs, on a 1.0-GHz PIII with FreeBSD 4.7 and Python
+    # 2.2.1, this method completed in 0.95s, and 0.05s if there was no
+    # scheme to find. By comparison,
+    #   urllib.splittype()[0] took 1.5s always;
+    #   Ft.Lib.Uri.SplitUriRef()[0] took 2.5s always;
+    #   urlparse.urlparse()[0] took 3.5s always.
+    m = SCHEME_PATTERN.match(uriRef)
+    if m is None:
+        return None
+    else:
+        return m.group(1)
+
+
+def IsAbsolute(identifier):
+    """
+    Given a string believed to be a URI or URI reference, tests that it is
+    absolute (as per RFC 2396), not relative -- i.e., that it has a scheme.
+    """
+    # We do it this way to avoid compiling another massive regex.
+    return GetScheme(identifier) is not None
--- a/extensions/jython/module/MOD-INF/lib/jython/xml/sax/drivers2/init.py
+++ b/extensions/jython/module/MOD-INF/lib/jython/xml/sax/drivers2/init.py
@ -0,0 +1 @@
+"Directory for SAX version 2 drivers."
--- a/extensions/jython/module/MOD-INF/lib/jython/xml/sax/drivers2/drv_javasax.py
+++ b/extensions/jython/module/MOD-INF/lib/jython/xml/sax/drivers2/drv_javasax.py
@ -0,0 +1,333 @@
+"""
+SAX driver for the Java SAX parsers. Can only be used in Jython.
+
+$Id: drv_javasax.py,v 1.5 2003/01/26 09:08:51 loewis Exp $
+"""
+
+# --- Initialization
+
+version = "0.10"
+revision = "$Revision: 1.5 $"
+
+import string
+from xml.sax import xmlreader, saxutils
+from xml.sax.handler import feature_namespaces, feature_namespace_prefixes
+from xml.sax import _exceptions
+
+# we only work in jython
+import sys
+if sys.platform[:4] != "java":
+    raise _exceptions.SAXReaderNotAvailable("drv_javasax not available in CPython", None)
+del sys
+
+# get the necessary Java SAX classes
+try:
+    from org.python.core import FilelikeInputStream
+    from org.xml.sax.helpers import XMLReaderFactory
+    from org.xml import sax as javasax
+except ImportError:
+    raise _exceptions.SAXReaderNotAvailable("SAX is not on the classpath", None)
+
+# get some JAXP stuff
+try:
+    from javax.xml.parsers import SAXParserFactory, ParserConfigurationException
+    factory = SAXParserFactory.newInstance()
+    jaxp = 1
+except ImportError:
+    jaxp = 0
+
+from java.lang import String
+
+
+def _wrap_sax_exception(e):
+    return _exceptions.SAXParseException(e.message,
+                                         e.exception,
+                                         SimpleLocator(e.columnNumber,
+                                                              e.lineNumber,
+                                                              e.publicId,
+                                                              e.systemId))
+
+class JyErrorHandlerWrapper(javasax.ErrorHandler):
+    def __init__(self, err_handler):
+        self._err_handler = err_handler
+
+    def error(self, exc):
+        self._err_handler.error(_wrap_sax_exception(exc))
+
+    def fatalError(self, exc):
+        self._err_handler.fatalError(_wrap_sax_exception(exc))
+
+    def warning(self, exc):
+        self._err_handler.warning(_wrap_sax_exception(exc))
+
+class JyInputSourceWrapper(javasax.InputSource):
+    def __init__(self, source):
+        if isinstance(source, str):
+            javasax.InputSource.__init__(self, source)
+        elif hasattr(source, "read"):#file like object
+            f = source
+            javasax.InputSource.__init__(self, FilelikeInputStream(f))
+            if hasattr(f, "name"):
+                self.setSystemId(f.name)
+        else:#xml.sax.xmlreader.InputSource object
+            #Use byte stream constructor if possible so that Xerces won't attempt to open
+            #the url at systemId unless it's really there
+            if source.getByteStream():
+                javasax.InputSource.__init__(self,
+                                             FilelikeInputStream(source.getByteStream()))
+            else:
+                javasax.InputSource.__init__(self)
+            if source.getSystemId():
+                self.setSystemId(source.getSystemId())
+            self.setPublicId(source.getPublicId())
+            self.setEncoding(source.getEncoding())
+
+class JyEntityResolverWrapper(javasax.EntityResolver):
+    def __init__(self, entityResolver):
+        self._resolver = entityResolver
+
+    def resolveEntity(self, pubId, sysId):
+        return JyInputSourceWrapper(self._resolver.resolveEntity(pubId, sysId))
+
+class JyDTDHandlerWrapper(javasax.DTDHandler):
+    def __init__(self, dtdHandler):
+        self._handler = dtdHandler
+
+    def notationDecl(self, name, publicId, systemId):
+        self._handler.notationDecl(name, publicId, systemId)
+
+    def unparsedEntityDecl(self, name, publicId, systemId, notationName):
+        self._handler.unparsedEntityDecl(name, publicId, systemId, notationName)
+
+class SimpleLocator(xmlreader.Locator):
+    def __init__(self, colNum, lineNum, pubId, sysId):
+        self.colNum = colNum
+        self.lineNum = lineNum
+        self.pubId = pubId
+        self.sysId = sysId
+
+    def getColumnNumber(self):
+        return self.colNum
+
+    def getLineNumber(self):
+        return self.lineNum
+
+    def getPublicId(self):
+        return self.pubId
+
+    def getSystemId(self):
+        return self.sysId
+
+# --- JavaSAXParser
+class JavaSAXParser(xmlreader.XMLReader, javasax.ContentHandler):
+    "SAX driver for the Java SAX parsers."
+
+    def __init__(self, jdriver = None):
+        xmlreader.XMLReader.__init__(self)
+        self._parser = create_java_parser(jdriver)
+        self._parser.setFeature(feature_namespaces, 0)
+        self._parser.setFeature(feature_namespace_prefixes, 0)
+        self._parser.setContentHandler(self)
+        self._nsattrs = AttributesNSImpl()
+        self._attrs = AttributesImpl()
+        self.setEntityResolver(self.getEntityResolver())
+        self.setErrorHandler(self.getErrorHandler())
+        self.setDTDHandler(self.getDTDHandler())
+
+    # XMLReader methods
+
+    def parse(self, source):
+        "Parse an XML document from a URL or an InputSource."
+        self._parser.parse(JyInputSourceWrapper(source))
+
+    def getFeature(self, name):
+        return self._parser.getFeature(name)
+
+    def setFeature(self, name, state):
+        self._parser.setFeature(name, state)
+
+    def getProperty(self, name):
+        return self._parser.getProperty(name)
+
+    def setProperty(self, name, value):
+        self._parser.setProperty(name, value)
+
+    def setEntityResolver(self, resolver):
+        self._parser.entityResolver = JyEntityResolverWrapper(resolver)
+        xmlreader.XMLReader.setEntityResolver(self, resolver)
+
+    def setErrorHandler(self, err_handler):
+        self._parser.errorHandler = JyErrorHandlerWrapper(err_handler)
+        xmlreader.XMLReader.setErrorHandler(self, err_handler)
+
+    def setDTDHandler(self, dtd_handler):
+        self._parser.setDTDHandler(JyDTDHandlerWrapper(dtd_handler))
+        xmlreader.XMLReader.setDTDHandler(self, dtd_handler)
+
+    # ContentHandler methods
+    def setDocumentLocator(self, locator):
+        self._cont_handler.setDocumentLocator(locator)
+
+    def startDocument(self):
+        self._cont_handler.startDocument()
+        self._namespaces = self._parser.getFeature(feature_namespaces)
+
+    def startElement(self, uri, lname, qname, attrs):
+        if self._namespaces:
+            self._nsattrs._attrs = attrs
+            self._cont_handler.startElementNS((uri or None, lname), qname,
+                                              self._nsattrs)
+        else:
+            self._attrs._attrs = attrs
+            self._cont_handler.startElement(qname, self._attrs)
+
+    def startPrefixMapping(self, prefix, uri):
+        self._cont_handler.startPrefixMapping(prefix, uri)
+
+    def characters(self, char, start, len):
+        self._cont_handler.characters(str(String(char, start, len)))
+
+    def ignorableWhitespace(self, char, start, len):
+        self._cont_handler.ignorableWhitespace(str(String(char, start, len)))
+
+    def endElement(self, uri, lname, qname):
+        if self._namespaces:
+            self._cont_handler.endElementNS((uri or None, lname), qname)
+        else:
+            self._cont_handler.endElement(qname)
+
+    def endPrefixMapping(self, prefix):
+        self._cont_handler.endPrefixMapping(prefix)
+
+    def endDocument(self):
+        self._cont_handler.endDocument()
+
+    def processingInstruction(self, target, data):
+        self._cont_handler.processingInstruction(target, data)
+
+class AttributesImpl:
+    def __init__(self, attrs = None):
+        self._attrs = attrs
+
+    def getLength(self):
+        return self._attrs.getLength()
+
+    def getType(self, name):
+        return self._attrs.getType(name)
+
+    def getValue(self, name):
+        value = self._attrs.getValue(name)
+        if value == None:
+            raise KeyError(name)
+        return value
+
+    def getNames(self):
+        return [self._attrs.getQName(index) for index in range(len(self))]
+
+    def getQNames(self):
+        return [self._attrs.getQName(index) for index in range(len(self))]
+
+    def getValueByQName(self, qname):
+        idx = self._attrs.getIndex(qname)
+        if idx == -1:
+            raise KeyError, qname
+        return self._attrs.getValue(idx)
+
+    def getNameByQName(self, qname):
+        idx = self._attrs.getIndex(qname)
+        if idx == -1:
+            raise KeyError, qname
+        return qname
+
+    def getQNameByName(self, name):
+        idx = self._attrs.getIndex(name)
+        if idx == -1:
+            raise KeyError, name
+        return name
+
+    def __len__(self):
+        return self._attrs.getLength()
+
+    def __getitem__(self, name):
+        return self.getValue(name)
+
+    def keys(self):
+        return self.getNames()
+
+    def copy(self):
+        return self.__class__(self._attrs)
+
+    def items(self):
+        return [(name, self[name]) for name in self.getNames()]
+
+    def values(self):
+        return map(self.getValue, self.getNames())
+
+    def get(self, name, alt=None):
+        try:
+            return self.getValue(name)
+        except KeyError:
+            return alt
+
+    def has_key(self, name):
+        try:
+            self.getValue(name)
+            return True
+        except KeyError:
+            return False
+
+# --- AttributesNSImpl
+
+class AttributesNSImpl(AttributesImpl):
+
+    def __init__(self, attrs=None):
+        AttributesImpl.__init__(self, attrs)
+
+    def getType(self, name):
+        return self._attrs.getType(name[0], name[1])
+
+    def getValue(self, name):
+        value = self._attrs.getValue(name[0], name[1])
+        if value == None:
+            raise KeyError(name)
+        return value
+
+    def getNames(self):
+        names = []
+        for idx in range(len(self)):
+            names.append((self._attrs.getURI(idx),
+                          self._attrs.getLocalName(idx)))
+        return names
+
+    def getNameByQName(self, qname):
+        idx = self._attrs.getIndex(qname)
+        if idx == -1:
+            raise KeyError, qname
+        return (self._attrs.getURI(idx), self._attrs.getLocalName(idx))
+
+    def getQNameByName(self, name):
+        idx = self._attrs.getIndex(name[0], name[1])
+        if idx == -1:
+            raise KeyError, name
+        return self._attrs.getQName(idx)
+
+    def getQNames(self):
+        return [self._attrs.getQName(idx) for idx in range(len(self))]
+
+# ---
+
+def create_java_parser(jdriver = None):
+    try:
+        if jdriver:
+            return XMLReaderFactory.createXMLReader(jdriver)
+        elif jaxp:
+            return factory.newSAXParser().getXMLReader()
+        else:
+            return XMLReaderFactory.createXMLReader()
+    except ParserConfigurationException, e:
+        raise _exceptions.SAXReaderNotAvailable(e.getMessage())
+    except javasax.SAXException, e:
+        raise _exceptions.SAXReaderNotAvailable(e.getMessage())
+
+def create_parser(jdriver = None):
+    return JavaSAXParser(jdriver)
--- a/extensions/jython/module/MOD-INF/lib/jython/xml/sax/handler.py
+++ b/extensions/jython/module/MOD-INF/lib/jython/xml/sax/handler.py
@ -0,0 +1,345 @@
+"""
+This module contains the core classes of version 2.0 of SAX for Python.
+This file provides only default classes with absolutely minimum
+functionality, from which drivers and applications can be subclassed.
+
+Many of these classes are empty and are included only as documentation
+of the interfaces.
+
+$Id: handler.py,v 1.5 2002/02/14 08:09:36 loewis Exp $
+"""
+
+version = '2.0beta'
+
+#============================================================================
+#
+# HANDLER INTERFACES
+#
+#============================================================================
+
+# ===== ERRORHANDLER =====
+
+class ErrorHandler:
+    """Basic interface for SAX error handlers.
+
+    If you create an object that implements this interface, then
+    register the object with your XMLReader, the parser will call the
+    methods in your object to report all warnings and errors. There
+    are three levels of errors available: warnings, (possibly)
+    recoverable errors, and unrecoverable errors. All methods take a
+    SAXParseException as the only parameter."""
+
+    def error(self, exception):
+        "Handle a recoverable error."
+        raise exception
+
+    def fatalError(self, exception):
+        "Handle a non-recoverable error."
+        raise exception
+
+    def warning(self, exception):
+        "Handle a warning."
+        print exception
+
+
+# ===== CONTENTHANDLER =====
+
+class ContentHandler:
+    """Interface for receiving logical document content events.
+
+    This is the main callback interface in SAX, and the one most
+    important to applications. The order of events in this interface
+    mirrors the order of the information in the document."""
+
+    def __init__(self):
+        self._locator = None
+
+    def setDocumentLocator(self, locator):
+        """Called by the parser to give the application a locator for
+        locating the origin of document events.
+
+        SAX parsers are strongly encouraged (though not absolutely
+        required) to supply a locator: if it does so, it must supply
+        the locator to the application by invoking this method before
+        invoking any of the other methods in the DocumentHandler
+        interface.
+
+        The locator allows the application to determine the end
+        position of any document-related event, even if the parser is
+        not reporting an error. Typically, the application will use
+        this information for reporting its own errors (such as
+        character content that does not match an application's
+        business rules). The information returned by the locator is
+        probably not sufficient for use with a search engine.
+
+        Note that the locator will return correct information only
+        during the invocation of the events in this interface. The
+        application should not attempt to use it at any other time."""
+        self._locator = locator
+
+    def startDocument(self):
+        """Receive notification of the beginning of a document.
+
+        The SAX parser will invoke this method only once, before any
+        other methods in this interface or in DTDHandler (except for
+        setDocumentLocator)."""
+
+    def endDocument(self):
+        """Receive notification of the end of a document.
+
+        The SAX parser will invoke this method only once, and it will
+        be the last method invoked during the parse. The parser shall
+        not invoke this method until it has either abandoned parsing
+        (because of an unrecoverable error) or reached the end of
+        input."""
+
+    def startPrefixMapping(self, prefix, uri):
+        """Begin the scope of a prefix-URI Namespace mapping.
+
+        The information from this event is not necessary for normal
+        Namespace processing: the SAX XML reader will automatically
+        replace prefixes for element and attribute names when the
+        http://xml.org/sax/features/namespaces feature is true (the
+        default).
+
+        There are cases, however, when applications need to use
+        prefixes in character data or in attribute values, where they
+        cannot safely be expanded automatically; the
+        start/endPrefixMapping event supplies the information to the
+        application to expand prefixes in those contexts itself, if
+        necessary.
+
+        Note that start/endPrefixMapping events are not guaranteed to
+        be properly nested relative to each-other: all
+        startPrefixMapping events will occur before the corresponding
+        startElement event, and all endPrefixMapping events will occur
+        after the corresponding endElement event, but their order is
+        not guaranteed."""
+
+    def endPrefixMapping(self, prefix):
+        """End the scope of a prefix-URI mapping.
+
+        See startPrefixMapping for details. This event will always
+        occur after the corresponding endElement event, but the order
+        of endPrefixMapping events is not otherwise guaranteed."""
+
+    def startElement(self, name, attrs):
+        """Signals the start of an element in non-namespace mode.
+
+        The name parameter contains the raw XML 1.0 name of the
+        element type as a string and the attrs parameter holds an
+        instance of the Attributes class containing the attributes of
+        the element."""
+
+    def endElement(self, name):
+        """Signals the end of an element in non-namespace mode.
+
+        The name parameter contains the name of the element type, just
+        as with the startElement event."""
+
+    def startElementNS(self, name, qname, attrs):
+        """Signals the start of an element in namespace mode.
+
+        The name parameter contains the name of the element type as a
+        (uri, localname) tuple, the qname parameter the raw XML 1.0
+        name used in the source document, and the attrs parameter
+        holds an instance of the Attributes class containing the
+        attributes of the element.
+
+        The uri part of the name tuple is None for elements which have
+        no namespace."""
+
+    def endElementNS(self, name, qname):
+        """Signals the end of an element in namespace mode.
+
+        The name parameter contains the name of the element type, just
+        as with the startElementNS event."""
+
+    def characters(self, content):
+        """Receive notification of character data.
+
+        The Parser will call this method to report each chunk of
+        character data. SAX parsers may return all contiguous
+        character data in a single chunk, or they may split it into
+        several chunks; however, all of the characters in any single
+        event must come from the same external entity so that the
+        Locator provides useful information."""
+
+    def ignorableWhitespace(self, whitespace):
+        """Receive notification of ignorable whitespace in element content.
+
+        Validating Parsers must use this method to report each chunk
+        of ignorable whitespace (see the W3C XML 1.0 recommendation,
+        section 2.10): non-validating parsers may also use this method
+        if they are capable of parsing and using content models.
+
+        SAX parsers may return all contiguous whitespace in a single
+        chunk, or they may split it into several chunks; however, all
+        of the characters in any single event must come from the same
+        external entity, so that the Locator provides useful
+        information.
+
+        The application must not attempt to read from the array
+        outside of the specified range."""
+
+    def processingInstruction(self, target, data):
+        """Receive notification of a processing instruction.
+
+        The Parser will invoke this method once for each processing
+        instruction found: note that processing instructions may occur
+        before or after the main document element.
+
+        A SAX parser should never report an XML declaration (XML 1.0,
+        section 2.8) or a text declaration (XML 1.0, section 4.3.1)
+        using this method."""
+
+    def skippedEntity(self, name):
+        """Receive notification of a skipped entity.
+
+        The Parser will invoke this method once for each entity
+        skipped. Non-validating processors may skip entities if they
+        have not seen the declarations (because, for example, the
+        entity was declared in an external DTD subset). All processors
+        may skip external entities, depending on the values of the
+        http://xml.org/sax/features/external-general-entities and the
+        http://xml.org/sax/features/external-parameter-entities
+        properties."""
+
+
+# ===== DTDHandler =====
+
+class DTDHandler:
+    """Handle DTD events.
+
+    This interface specifies only those DTD events required for basic
+    parsing (unparsed entities and attributes)."""
+
+    def notationDecl(self, name, publicId, systemId):
+        "Handle a notation declaration event."
+
+    def unparsedEntityDecl(self, name, publicId, systemId, ndata):
+        "Handle an unparsed entity declaration event."
+
+
+# ===== ENTITYRESOLVER =====
+
+class EntityResolver:
+    """Basic interface for resolving entities. If you create an object
+    implementing this interface, then register the object with your
+    Parser, the parser will call the method in your object to
+    resolve all external entities. Note that DefaultHandler implements
+    this interface with the default behaviour."""
+
+    def resolveEntity(self, publicId, systemId):
+        """Resolve the system identifier of an entity and return either
+        the system identifier to read from as a string, or an InputSource
+        to read from."""
+        return systemId
+
+
+#============================================================================
+#
+# CORE FEATURES
+#
+#============================================================================
+
+feature_namespaces = "http://xml.org/sax/features/namespaces"
+# true: Perform Namespace processing (default).
+# false: Optionally do not perform Namespace processing
+#        (implies namespace-prefixes).
+# access: (parsing) read-only; (not parsing) read/write
+
+feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes"
+# true: Report the original prefixed names and attributes used for Namespace
+#       declarations.
+# false: Do not report attributes used for Namespace declarations, and
+#        optionally do not report original prefixed names (default).
+# access: (parsing) read-only; (not parsing) read/write
+
+feature_string_interning = "http://xml.org/sax/features/string-interning"
+# true: All element names, prefixes, attribute names, Namespace URIs, and
+#       local names are interned using the built-in intern function.
+# false: Names are not necessarily interned, although they may be (default).
+# access: (parsing) read-only; (not parsing) read/write
+
+feature_validation = "http://xml.org/sax/features/validation"
+# true: Report all validation errors (implies external-general-entities and
+#       external-parameter-entities).
+# false: Do not report validation errors.
+# access: (parsing) read-only; (not parsing) read/write
+
+feature_external_ges = "http://xml.org/sax/features/external-general-entities"
+# true: Include all external general (text) entities.
+# false: Do not include external general entities.
+# access: (parsing) read-only; (not parsing) read/write
+
+feature_external_pes = "http://xml.org/sax/features/external-parameter-entities"
+# true: Include all external parameter entities, including the external
+#       DTD subset.
+# false: Do not include any external parameter entities, even the external
+#        DTD subset.
+# access: (parsing) read-only; (not parsing) read/write
+
+all_features = [feature_namespaces,
+                feature_namespace_prefixes,
+                feature_string_interning,
+                feature_validation,
+                feature_external_ges,
+                feature_external_pes]
+
+
+#============================================================================
+#
+# CORE PROPERTIES
+#
+#============================================================================
+
+property_lexical_handler = "http://xml.org/sax/properties/lexical-handler"
+# data type: xml.sax.sax2lib.LexicalHandler
+# description: An optional extension handler for lexical events like comments.
+# access: read/write
+
+property_declaration_handler = "http://xml.org/sax/properties/declaration-handler"
+# data type: xml.sax.sax2lib.DeclHandler
+# description: An optional extension handler for DTD-related events other
+#              than notations and unparsed entities.
+# access: read/write
+
+property_dom_node = "http://xml.org/sax/properties/dom-node"
+# data type: org.w3c.dom.Node
+# description: When parsing, the current DOM node being visited if this is
+#              a DOM iterator; when not parsing, the root DOM node for
+#              iteration.
+# access: (parsing) read-only; (not parsing) read/write
+
+property_xml_string = "http://xml.org/sax/properties/xml-string"
+# data type: String
+# description: The literal string of characters that was the source for
+#              the current event.
+# access: read-only
+
+property_encoding = "http://www.python.org/sax/properties/encoding"
+# data type: String
+# description: The name of the encoding to assume for input data.
+# access: write: set the encoding, e.g. established by a higher-level
+#                protocol. May change during parsing (e.g. after
+#                processing a META tag)
+#         read:  return the current encoding (possibly established through
+#                auto-detection.
+# initial value: UTF-8
+#
+
+property_interning_dict = "http://www.python.org/sax/properties/interning-dict"
+# data type: Dictionary
+# description: The dictionary used to intern common strings in the document
+# access: write: Request that the parser uses a specific dictionary, to
+#                allow interning across different documents
+#         read:  return the current interning dictionary, or None
+#
+
+all_properties = [property_lexical_handler,
+                  property_dom_node,
+                  property_declaration_handler,
+                  property_xml_string,
+                  property_encoding,
+                  property_interning_dict]
--- a/extensions/jython/module/MOD-INF/lib/jython/xml/sax/saxlib.py
+++ b/extensions/jython/module/MOD-INF/lib/jython/xml/sax/saxlib.py
@ -0,0 +1,430 @@
+"""
+This module contains the core classes of version 2.0 of SAX for Python.
+This file provides only default classes with absolutely minimum
+functionality, from which drivers and applications can be subclassed.
+
+Many of these classes are empty and are included only as documentation
+of the interfaces.
+
+$Id: saxlib.py,v 1.12 2002/05/10 14:49:21 akuchling Exp $
+"""
+
+version = '2.0beta'
+
+# A number of interfaces used to live in saxlib, but are now in
+# various other modules for Python 2 compatibility. If nobody uses
+# them here any longer, the references can be removed
+
+from handler import ErrorHandler, ContentHandler, DTDHandler, EntityResolver
+from xmlreader import XMLReader, InputSource, Locator, IncrementalParser
+from _exceptions import *
+
+from handler import \
+     feature_namespaces,\
+     feature_namespace_prefixes,\
+     feature_string_interning,\
+     feature_validation,\
+     feature_external_ges,\
+     feature_external_pes,\
+     all_features,\
+     property_lexical_handler,\
+     property_declaration_handler,\
+     property_dom_node,\
+     property_xml_string,\
+     all_properties
+
+#============================================================================
+#
+# MAIN INTERFACES
+#
+#============================================================================
+
+# ===== XMLFILTER =====
+
+class XMLFilter(XMLReader):
+    """Interface for a SAX2 parser filter.
+
+    A parser filter is an XMLReader that gets its events from another
+    XMLReader (which may in turn also be a filter) rather than from a
+    primary source like a document or other non-SAX data source.
+    Filters can modify a stream of events before passing it on to its
+    handlers."""
+
+    def __init__(self, parent = None):
+        """Creates a filter instance, allowing applications to set the
+        parent on instantiation."""
+        XMLReader.__init__(self)
+        self._parent = parent
+
+    def setParent(self, parent):
+        """Sets the parent XMLReader of this filter. The argument may
+        not be None."""
+        self._parent = parent
+
+    def getParent(self):
+        "Returns the parent of this filter."
+        return self._parent
+
+# ===== ATTRIBUTES =====
+
+class Attributes:
+    """Interface for a list of XML attributes.
+
+    Contains a list of XML attributes, accessible by name."""
+
+    def getLength(self):
+        "Returns the number of attributes in the list."
+        raise NotImplementedError("This method must be implemented!")
+
+    def getType(self, name):
+        "Returns the type of the attribute with the given name."
+        raise NotImplementedError("This method must be implemented!")
+
+    def getValue(self, name):
+        "Returns the value of the attribute with the given name."
+        raise NotImplementedError("This method must be implemented!")
+
+    def getValueByQName(self, name):
+        """Returns the value of the attribute with the given raw (or
+        qualified) name."""
+        raise NotImplementedError("This method must be implemented!")
+
+    def getNameByQName(self, name):
+        """Returns the namespace name of the attribute with the given
+        raw (or qualified) name."""
+        raise NotImplementedError("This method must be implemented!")
+
+    def getNames(self):
+        """Returns a list of the names of all attributes
+        in the list."""
+        raise NotImplementedError("This method must be implemented!")
+
+    def getQNames(self):
+        """Returns a list of the raw qualified names of all attributes
+        in the list."""
+        raise NotImplementedError("This method must be implemented!")
+
+    def __len__(self):
+        "Alias for getLength."
+        raise NotImplementedError("This method must be implemented!")
+
+    def __getitem__(self, name):
+        "Alias for getValue."
+        raise NotImplementedError("This method must be implemented!")
+
+    def keys(self):
+        "Returns a list of the attribute names in the list."
+        raise NotImplementedError("This method must be implemented!")
+
+    def has_key(self, name):
+        "True if the attribute is in the list, false otherwise."
+        raise NotImplementedError("This method must be implemented!")
+
+    def get(self, name, alternative=None):
+        """Return the value associated with attribute name; if it is not
+        available, then return the alternative."""
+        raise NotImplementedError("This method must be implemented!")
+
+    def copy(self):
+        "Return a copy of the Attributes object."
+        raise NotImplementedError("This method must be implemented!")
+
+    def items(self):
+        "Return a list of (attribute_name, value) pairs."
+        raise NotImplementedError("This method must be implemented!")
+
+    def values(self):
+        "Return a list of all attribute values."
+        raise NotImplementedError("This method must be implemented!")
+
+
+#============================================================================
+#
+# HANDLER INTERFACES
+#
+#============================================================================
+
+
+# ===== DECLHANDLER =====
+
+class DeclHandler:
+    """Optional SAX2 handler for DTD declaration events.
+
+    Note that some DTD declarations are already reported through the
+    DTDHandler interface. All events reported to this handler will
+    occur between the startDTD and endDTD events of the
+    LexicalHandler.
+
+    To set the DeclHandler for an XMLReader, use the setProperty method
+    with the identifier http://xml.org/sax/handlers/DeclHandler."""
+
+    def attributeDecl(self, elem_name, attr_name, type, value_def, value):
+        """Report an attribute type declaration.
+
+        Only the first declaration will be reported. The type will be
+        one of the strings "CDATA", "ID", "IDREF", "IDREFS",
+        "NMTOKEN", "NMTOKENS", "ENTITY", "ENTITIES", or "NOTATION", or
+        a list of names (in the case of enumerated definitions).
+
+        elem_name is the element type name, attr_name the attribute
+        type name, type a string representing the attribute type,
+        value_def a string representing the default declaration
+        ('#IMPLIED', '#REQUIRED', '#FIXED' or None). value is a string
+        representing the attribute's default value, or None if there
+        is none."""
+
+    def elementDecl(self, elem_name, content_model):
+        """Report an element type declaration.
+
+        Only the first declaration will be reported.
+
+        content_model is the string 'EMPTY', the string 'ANY' or the content
+        model structure represented as tuple (separator, tokens, modifier)
+        where separator is the separator in the token list (that is, '|' or
+        ','), tokens is the list of tokens (element type names or tuples
+        representing parentheses) and modifier is the quantity modifier
+        ('*', '?' or '+')."""
+
+    def internalEntityDecl(self, name, value):
+        """Report an internal entity declaration.
+
+        Only the first declaration of an entity will be reported.
+
+        name is the name of the entity. If it is a parameter entity,
+        the name will begin with '%'. value is the replacement text of
+        the entity."""
+
+    def externalEntityDecl(self, name, public_id, system_id):
+        """Report a parsed entity declaration. (Unparsed entities are
+        reported to the DTDHandler.)
+
+        Only the first declaration for each entity will be reported.
+
+        name is the name of the entity. If it is a parameter entity,
+        the name will begin with '%'. public_id and system_id are the
+        public and system identifiers of the entity. public_id will be
+        None if none were declared."""
+
+
+
+# ===== LEXICALHANDLER =====
+
+class LexicalHandler:
+    """Optional SAX2 handler for lexical events.
+
+    This handler is used to obtain lexical information about an XML
+    document, that is, information about how the document was encoded
+    (as opposed to what it contains, which is reported to the
+    ContentHandler), such as comments and CDATA marked section
+    boundaries.
+
+    To set the LexicalHandler of an XMLReader, use the setProperty
+    method with the property identifier
+    'http://xml.org/sax/handlers/LexicalHandler'. There is no
+    guarantee that the XMLReader will support or recognize this
+    property."""
+
+    def comment(self, content):
+        """Reports a comment anywhere in the document (including the
+        DTD and outside the document element).
+
+        content is a string that holds the contents of the comment."""
+
+    def startDTD(self, name, public_id, system_id):
+        """Report the start of the DTD declarations, if the document
+        has an associated DTD.
+
+        A startEntity event will be reported before declaration events
+        from the external DTD subset are reported, and this can be
+        used to infer from which subset DTD declarations derive.
+
+        name is the name of the document element type, public_id the
+        public identifier of the DTD (or None if none were supplied)
+        and system_id the system identfier of the external subset (or
+        None if none were supplied)."""
+
+    def endDTD(self):
+        "Signals the end of DTD declarations."
+
+    def startEntity(self, name):
+        """Report the beginning of an entity.
+
+        The start and end of the document entity is not reported. The
+        start and end of the external DTD subset is reported with the
+        pseudo-name '[dtd]'.
+
+        Skipped entities will be reported through the skippedEntity
+        event of the ContentHandler rather than through this event.
+
+        name is the name of the entity. If it is a parameter entity,
+        the name will begin with '%'."""
+
+    def endEntity(self, name):
+        """Reports the end of an entity. name is the name of the
+        entity, and follows the same conventions as for
+        startEntity."""
+
+    def startCDATA(self):
+        """Reports the beginning of a CDATA marked section.
+
+        The contents of the CDATA marked section will be reported
+        through the characters event."""
+
+    def endCDATA(self):
+        "Reports the end of a CDATA marked section."
+
+
+#============================================================================
+#
+# SAX 1.0 COMPATIBILITY CLASSES
+# Note that these are all deprecated.
+#
+#============================================================================
+
+# ===== ATTRIBUTELIST =====
+
+class AttributeList:
+    """Interface for an attribute list. This interface provides
+    information about a list of attributes for an element (only
+    specified or defaulted attributes will be reported). Note that the
+    information returned by this object will be valid only during the
+    scope of the DocumentHandler.startElement callback, and the
+    attributes will not necessarily be provided in the order declared
+    or specified."""
+
+    def getLength(self):
+        "Return the number of attributes in list."
+
+    def getName(self, i):
+        "Return the name of an attribute in the list."
+
+    def getType(self, i):
+        """Return the type of an attribute in the list. (Parameter can be
+        either integer index or attribute name.)"""
+
+    def getValue(self, i):
+        """Return the value of an attribute in the list. (Parameter can be
+        either integer index or attribute name.)"""
+
+    def __len__(self):
+        "Alias for getLength."
+
+    def __getitem__(self, key):
+        "Alias for getName (if key is an integer) and getValue (if string)."
+
+    def keys(self):
+        "Returns a list of the attribute names."
+
+    def has_key(self, key):
+        "True if the attribute is in the list, false otherwise."
+
+    def get(self, key, alternative=None):
+        """Return the value associated with attribute name; if it is not
+        available, then return the alternative."""
+
+    def copy(self):
+        "Return a copy of the AttributeList."
+
+    def items(self):
+        "Return a list of (attribute_name,value) pairs."
+
+    def values(self):
+        "Return a list of all attribute values."
+
+
+# ===== DOCUMENTHANDLER =====
+
+class DocumentHandler:
+    """Handle general document events. This is the main client
+    interface for SAX: it contains callbacks for the most important
+    document events, such as the start and end of elements. You need
+    to create an object that implements this interface, and then
+    register it with the Parser. If you do not want to implement
+    the entire interface, you can derive a class from HandlerBase,
+    which implements the default functionality. You can find the
+    location of any document event using the Locator interface
+    supplied by setDocumentLocator()."""
+
+    def characters(self, ch, start, length):
+        "Handle a character data event."
+
+    def endDocument(self):
+        "Handle an event for the end of a document."
+
+    def endElement(self, name):
+        "Handle an event for the end of an element."
+
+    def ignorableWhitespace(self, ch, start, length):
+        "Handle an event for ignorable whitespace in element content."
+
+    def processingInstruction(self, target, data):
+        "Handle a processing instruction event."
+
+    def setDocumentLocator(self, locator):
+        "Receive an object for locating the origin of SAX document events."
+
+    def startDocument(self):
+        "Handle an event for the beginning of a document."
+
+    def startElement(self, name, atts):
+        "Handle an event for the beginning of an element."
+
+
+# ===== HANDLERBASE =====
+
+class HandlerBase(EntityResolver, DTDHandler, DocumentHandler,\
+                     ErrorHandler):
+    """Default base class for handlers. This class implements the
+    default behaviour for four SAX interfaces: EntityResolver,
+    DTDHandler, DocumentHandler, and ErrorHandler: rather
+    than implementing those full interfaces, you may simply extend
+    this class and override the methods that you need. Note that the
+    use of this class is optional (you are free to implement the
+    interfaces directly if you wish)."""
+
+
+# ===== PARSER =====
+
+class Parser:
+    """Basic interface for SAX (Simple API for XML) parsers. All SAX
+    parsers must implement this basic interface: it allows users to
+    register handlers for different types of events and to initiate a
+    parse from a URI, a character stream, or a byte stream. SAX
+    parsers should also implement a zero-argument constructor."""
+
+    def __init__(self):
+        self.doc_handler = DocumentHandler()
+        self.dtd_handler = DTDHandler()
+        self.ent_handler = EntityResolver()
+        self.err_handler = ErrorHandler()
+
+    def parse(self, systemId):
+        "Parse an XML document from a system identifier."
+
+    def parseFile(self, fileobj):
+        "Parse an XML document from a file-like object."
+
+    def setDocumentHandler(self, handler):
+        "Register an object to receive basic document-related events."
+        self.doc_handler=handler
+
+    def setDTDHandler(self, handler):
+        "Register an object to receive basic DTD-related events."
+        self.dtd_handler=handler
+
+    def setEntityResolver(self, resolver):
+        "Register an object to resolve external entities."
+        self.ent_handler=resolver
+
+    def setErrorHandler(self, handler):
+        "Register an object to receive error-message events."
+        self.err_handler=handler
+
+    def setLocale(self, locale):
+        """Allow an application to set the locale for errors and warnings.
+
+        SAX parsers are not required to provide localisation for errors
+        and warnings; if they cannot support the requested locale,
+        however, they must throw a SAX exception. Applications may
+        request a locale change in the middle of a parse."""
+        raise SAXNotSupportedException("Locale support not implemented")
--- a/extensions/jython/module/MOD-INF/lib/jython/xml/sax/saxutils.py
+++ b/extensions/jython/module/MOD-INF/lib/jython/xml/sax/saxutils.py
@ -0,0 +1,813 @@
+"""
+A library of useful helper classes to the saxlib classes, for the
+convenience of application and driver writers.
+
+$Id: saxutils.py,v 1.37 2005/04/13 14:02:08 syt Exp $
+"""
+import os, urlparse, urllib2, types
+import handler
+import xmlreader
+import sys, _exceptions, saxlib
+
+from xml.Uri import Absolutize, MakeUrllibSafe,IsAbsolute
+
+try:
+    _StringTypes = [types.StringType, types.UnicodeType]
+except AttributeError: # 1.5 compatibility:UnicodeType not defined
+    _StringTypes = [types.StringType]
+
+def __dict_replace(s, d):
+    """Replace substrings of a string using a dictionary."""
+    for key, value in d.items():
+        s = s.replace(key, value)
+    return s
+
+def escape(data, entities={}):
+    """Escape &, <, and > in a string of data.
+
+    You can escape other strings of data by passing a dictionary as
+    the optional entities parameter.  The keys and values must all be
+    strings; each key will be replaced with its corresponding value.
+    """
+    data = data.replace("&", "&amp;")
+    data = data.replace("<", "&lt;")
+    data = data.replace(">", "&gt;")
+    if entities:
+        data = __dict_replace(data, entities)
+    return data
+
+def unescape(data, entities={}):
+    """Unescape &amp;, &lt;, and &gt; in a string of data.
+
+    You can unescape other strings of data by passing a dictionary as
+    the optional entities parameter.  The keys and values must all be
+    strings; each key will be replaced with its corresponding value.
+    """
+    data = data.replace("&lt;", "<")
+    data = data.replace("&gt;", ">")
+    if entities:
+        data = __dict_replace(data, entities)
+    # must do ampersand last
+    return data.replace("&amp;", "&")
+
+def quoteattr(data, entities={}):
+    """Escape and quote an attribute value.
+
+    Escape &, <, and > in a string of data, then quote it for use as
+    an attribute value.  The \" character will be escaped as well, if
+    necessary.
+
+    You can escape other strings of data by passing a dictionary as
+    the optional entities parameter.  The keys and values must all be
+    strings; each key will be replaced with its corresponding value.
+    """
+    data = escape(data, entities)
+    if '"' in data:
+        if "'" in data:
+            data = '"%s"' % data.replace('"', "&quot;")
+        else:
+            data = "'%s'" % data
+    else:
+        data = '"%s"' % data
+    return data
+
+# --- DefaultHandler
+
+class DefaultHandler(handler.EntityResolver, handler.DTDHandler,
+                     handler.ContentHandler, handler.ErrorHandler):
+    """Default base class for SAX2 event handlers. Implements empty
+    methods for all callback methods, which can be overridden by
+    application implementors. Replaces the deprecated SAX1 HandlerBase
+    class."""
+
+# --- Location
+
+class Location:
+    """Represents a location in an XML entity. Initialized by being passed
+    a locator, from which it reads off the current location, which is then
+    stored internally."""
+
+    def __init__(self, locator):
+        self.__col = locator.getColumnNumber()
+        self.__line = locator.getLineNumber()
+        self.__pubid = locator.getPublicId()
+        self.__sysid = locator.getSystemId()
+
+    def getColumnNumber(self):
+        return self.__col
+
+    def getLineNumber(self):
+        return self.__line
+
+    def getPublicId(self):
+        return self.__pubid
+
+    def getSystemId(self):
+        return self.__sysid
+
+    def __str__(self):
+        if self.__line is None:
+            line = "?"
+        else:
+            line = self.__line
+        if self.__col is None:
+            col = "?"
+        else:
+            col = self.__col
+        return "%s:%s:%s" % (
+            self.__sysid or self.__pubid or "<unknown>",
+            line, col)
+
+# --- ErrorPrinter
+
+class ErrorPrinter:
+    "A simple class that just prints error messages to standard out."
+
+    def __init__(self, level=0, outfile=sys.stderr):
+        self._level = level
+        self._outfile = outfile
+
+    def warning(self, exception):
+        if self._level <= 0:
+            self._outfile.write("WARNING in %s: %s\n" %
+                               (self.__getpos(exception),
+                                exception.getMessage()))
+
+    def error(self, exception):
+        if self._level <= 1:
+            self._outfile.write("ERROR in %s: %s\n" %
+                               (self.__getpos(exception),
+                                exception.getMessage()))
+
+    def fatalError(self, exception):
+        if self._level <= 2:
+            self._outfile.write("FATAL ERROR in %s: %s\n" %
+                               (self.__getpos(exception),
+                                exception.getMessage()))
+
+    def __getpos(self, exception):
+        if isinstance(exception, _exceptions.SAXParseException):
+            return "%s:%s:%s" % (exception.getSystemId(),
+                                 exception.getLineNumber(),
+                                 exception.getColumnNumber())
+        else:
+            return "<unknown>"
+
+# --- ErrorRaiser
+
+class ErrorRaiser:
+    "A simple class that just raises the exceptions it is passed."
+
+    def __init__(self, level = 0):
+        self._level = level
+
+    def error(self, exception):
+        if self._level <= 1:
+            raise exception
+
+    def fatalError(self, exception):
+        if self._level <= 2:
+            raise exception
+
+    def warning(self, exception):
+        if self._level <= 0:
+            raise exception
+
+# --- AttributesImpl now lives in xmlreader
+from xmlreader import AttributesImpl
+
+# --- XMLGenerator is the SAX2 ContentHandler for writing back XML
+import codecs
+
+def _outputwrapper(stream,encoding):
+    writerclass = codecs.lookup(encoding)[3]
+    return writerclass(stream)
+
+if hasattr(codecs, "register_error"):
+    def writetext(stream, text, entities={}):
+        stream.errors = "xmlcharrefreplace"
+        stream.write(escape(text, entities))
+        stream.errors = "strict"
+else:
+    def writetext(stream, text, entities={}):
+        text = escape(text, entities)
+        try:
+            stream.write(text)
+        except UnicodeError:
+            for c in text:
+                try:
+                    stream.write(c)
+                except UnicodeError:
+                    stream.write("&#%d;" % ord(c))
+
+def writeattr(stream, text):
+    countdouble = text.count('"')
+    if countdouble:
+        countsingle = text.count("'")
+        if countdouble <= countsingle:
+            entities = {'"': "&quot;"}
+            quote = '"'
+        else:
+            entities = {"'": "&apos;"}
+            quote = "'"
+    else:
+        entities = {}
+        quote = '"'
+    stream.write(quote)
+    writetext(stream, text, entities)
+    stream.write(quote)
+
+
+class XMLGenerator(handler.ContentHandler):
+    GENERATED_PREFIX = "xml.sax.saxutils.prefix%s"
+
+    def __init__(self, out=None, encoding="iso-8859-1"):
+        if out is None:
+            import sys
+            out = sys.stdout
+        handler.ContentHandler.__init__(self)
+        self._out = _outputwrapper(out,encoding)
+        self._ns_contexts = [{}] # contains uri -> prefix dicts
+        self._current_context = self._ns_contexts[-1]
+        self._undeclared_ns_maps = []
+        self._encoding = encoding
+        self._generated_prefix_ctr = 0
+        return
+
+    # ContentHandler methods
+
+    def startDocument(self):
+        self._out.write('<?xml version="1.0" encoding="%s"?>\n' %
+                        self._encoding)
+
+    def startPrefixMapping(self, prefix, uri):
+        self._ns_contexts.append(self._current_context.copy())
+        self._current_context[uri] = prefix
+        self._undeclared_ns_maps.append((prefix, uri))
+
+    def endPrefixMapping(self, prefix):
+        self._current_context = self._ns_contexts[-1]
+        del self._ns_contexts[-1]
+
+    def startElement(self, name, attrs):
+        self._out.write('<' + name)
+        for (name, value) in attrs.items():
+            self._out.write(' %s=' % name)
+            writeattr(self._out, value)
+        self._out.write('>')
+
+    def endElement(self, name):
+        self._out.write('</%s>' % name)
+
+    def startElementNS(self, name, qname, attrs):
+        if name[0] is None:
+            name = name[1]
+        elif self._current_context[name[0]] is None:
+            # default namespace
+            name = name[1]
+        else:
+            name = self._current_context[name[0]] + ":" + name[1]
+        self._out.write('<' + name)
+
+        for k,v in self._undeclared_ns_maps:
+            if k is None:
+                self._out.write(' xmlns="%s"' % (v or ''))
+            else:
+                self._out.write(' xmlns:%s="%s"' % (k,v))
+        self._undeclared_ns_maps = []
+
+        for (name, value) in attrs.items():
+            if name[0] is None:
+                name = name[1]
+            elif self._current_context[name[0]] is None:
+                # default namespace
+                #If an attribute has a nsuri but not a prefix, we must
+                #create a prefix and add a nsdecl
+                prefix = self.GENERATED_PREFIX % self._generated_prefix_ctr
+                self._generated_prefix_ctr = self._generated_prefix_ctr + 1
+                name = prefix + ':' + name[1]
+                self._out.write(' xmlns:%s=%s' % (prefix, quoteattr(name[0])))
+                self._current_context[name[0]] = prefix
+            else:
+                name = self._current_context[name[0]] + ":" + name[1]
+            self._out.write(' %s=' % name)
+            writeattr(self._out, value)
+        self._out.write('>')
+
+    def endElementNS(self, name, qname):
+        # XXX: if qname is not None, we better use it.
+        # Python 2.0b2 requires us to use the recorded prefix for
+        # name[0], though
+        if name[0] is None:
+            qname = name[1]
+        elif self._current_context[name[0]] is None:
+            qname = name[1]
+        else:
+            qname = self._current_context[name[0]] + ":" + name[1]
+        self._out.write('</%s>' % qname)
+
+    def characters(self, content):
+        writetext(self._out, content)
+
+    def ignorableWhitespace(self, content):
+        self._out.write(content)
+
+    def processingInstruction(self, target, data):
+        self._out.write('<?%s %s?>' % (target, data))
+
+
+class LexicalXMLGenerator(XMLGenerator, saxlib.LexicalHandler):
+    """A XMLGenerator that also supports the LexicalHandler interface"""
+
+    def __init__(self, out=None, encoding="iso-8859-1"):
+        XMLGenerator.__init__(self, out, encoding)
+        self._in_cdata = 0
+
+    def characters(self, content):
+        if self._in_cdata:
+            self._out.write(content.replace(']]>', ']]>]]&gt;<![CDATA['))
+        else:
+            self._out.write(escape(content))
+
+    # LexicalHandler methods
+    # (we only support the most important ones and inherit the rest)
+
+    def startDTD(self, name, public_id, system_id):
+        self._out.write('<!DOCTYPE %s' % name)
+        if public_id:
+            self._out.write(' PUBLIC %s %s' % (
+                quoteattr(public_id or ""), quoteattr(system_id or "")
+            ))
+        elif system_id:
+            self._out.write(' SYSTEM %s' % quoteattr(system_id or ""))
+
+    def endDTD(self):
+        self._out.write('>')
+
+    def comment(self, content):
+        self._out.write('<!--')
+        self._out.write(content)
+        self._out.write('-->')
+
+    def startCDATA(self):
+        self._in_cdata = 1
+        self._out.write('<![CDATA[')
+
+    def endCDATA(self):
+        self._in_cdata = 0
+        self._out.write(']]>')
+
+
+# --- ContentGenerator is the SAX1 DocumentHandler for writing back XML
+class ContentGenerator(XMLGenerator):
+
+    def characters(self, str, start, end):
+        # In SAX1, characters receives start and end; in SAX2, it receives
+        # a string. For plain strings, we may want to use a buffer object.
+        return XMLGenerator.characters(self, str[start:start+end])
+
+# --- XMLFilterImpl
+class XMLFilterBase(saxlib.XMLFilter):
+    """This class is designed to sit between an XMLReader and the
+    client application's event handlers.  By default, it does nothing
+    but pass requests up to the reader and events on to the handlers
+    unmodified, but subclasses can override specific methods to modify
+    the event stream or the configuration requests as they pass
+    through."""
+
+    # ErrorHandler methods
+
+    def error(self, exception):
+        self._err_handler.error(exception)
+
+    def fatalError(self, exception):
+        self._err_handler.fatalError(exception)
+
+    def warning(self, exception):
+        self._err_handler.warning(exception)
+
+    # ContentHandler methods
+
+    def setDocumentLocator(self, locator):
+        self._cont_handler.setDocumentLocator(locator)
+
+    def startDocument(self):
+        self._cont_handler.startDocument()
+
+    def endDocument(self):
+        self._cont_handler.endDocument()
+
+    def startPrefixMapping(self, prefix, uri):
+        self._cont_handler.startPrefixMapping(prefix, uri)
+
+    def endPrefixMapping(self, prefix):
+        self._cont_handler.endPrefixMapping(prefix)
+
+    def startElement(self, name, attrs):
+        self._cont_handler.startElement(name, attrs)
+
+    def endElement(self, name):
+        self._cont_handler.endElement(name)
+
+    def startElementNS(self, name, qname, attrs):
+        self._cont_handler.startElementNS(name, qname, attrs)
+
+    def endElementNS(self, name, qname):
+        self._cont_handler.endElementNS(name, qname)
+
+    def characters(self, content):
+        self._cont_handler.characters(content)
+
+    def ignorableWhitespace(self, chars):
+        self._cont_handler.ignorableWhitespace(chars)
+
+    def processingInstruction(self, target, data):
+        self._cont_handler.processingInstruction(target, data)
+
+    def skippedEntity(self, name):
+        self._cont_handler.skippedEntity(name)
+
+    # DTDHandler methods
+
+    def notationDecl(self, name, publicId, systemId):
+        self._dtd_handler.notationDecl(name, publicId, systemId)
+
+    def unparsedEntityDecl(self, name, publicId, systemId, ndata):
+        self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
+
+    # EntityResolver methods
+
+    def resolveEntity(self, publicId, systemId):
+        return self._ent_handler.resolveEntity(publicId, systemId)
+
+    # XMLReader methods
+
+    def parse(self, source):
+        self._parent.setContentHandler(self)
+        self._parent.setErrorHandler(self)
+        self._parent.setEntityResolver(self)
+        self._parent.setDTDHandler(self)
+        self._parent.parse(source)
+
+    def setLocale(self, locale):
+        self._parent.setLocale(locale)
+
+    def getFeature(self, name):
+        return self._parent.getFeature(name)
+
+    def setFeature(self, name, state):
+        self._parent.setFeature(name, state)
+
+    def getProperty(self, name):
+        return self._parent.getProperty(name)
+
+    def setProperty(self, name, value):
+        self._parent.setProperty(name, value)
+
+# FIXME: remove this backward compatibility hack when not needed anymore
+XMLFilterImpl = XMLFilterBase
+
+# --- BaseIncrementalParser
+
+class BaseIncrementalParser(xmlreader.IncrementalParser):
+    """This class implements the parse method of the XMLReader
+    interface using the feed, close and reset methods of the
+    IncrementalParser interface as a convenience to SAX 2.0 driver
+    writers."""
+
+    def parse(self, source):
+        source = prepare_input_source(source)
+        self.prepareParser(source)
+
+        self._cont_handler.startDocument()
+
+        # FIXME: what about char-stream?
+        inf = source.getByteStream()
+        buffer = inf.read(16384)
+        while buffer != "":
+            self.feed(buffer)
+            buffer = inf.read(16384)
+
+        self.close()
+        self.reset()
+
+        self._cont_handler.endDocument()
+
+    def prepareParser(self, source):
+        """This method is called by the parse implementation to allow
+        the SAX 2.0 driver to prepare itself for parsing."""
+        raise NotImplementedError("prepareParser must be overridden!")
+
+# --- Utility functions
+
+def prepare_input_source(source, base = ""):
+    """This function takes an InputSource and an optional base URL and
+    returns a fully resolved InputSource object ready for reading."""
+
+    if type(source) in _StringTypes:
+        source = xmlreader.InputSource(source)
+    elif hasattr(source, "read"):
+        f = source
+        source = xmlreader.InputSource()
+        source.setByteStream(f)
+        if hasattr(f, "name"):
+            source.setSystemId(absolute_system_id(f.name, base))
+
+    if source.getByteStream() is None:
+        sysid = absolute_system_id(source.getSystemId(), base)
+        source.setSystemId(sysid)
+        f = urllib2.urlopen(sysid)
+        source.setByteStream(f)
+
+    return source
+
+
+def absolute_system_id(sysid, base=''):
+    if os.path.exists(sysid):
+        sysid = 'file:%s' % os.path.abspath(sysid)
+    elif base:
+        sysid = Absolutize(sysid, base)
+    assert IsAbsolute(sysid)
+    return MakeUrllibSafe(sysid)
+
+# ===========================================================================
+#
+# DEPRECATED SAX 1.0 CLASSES
+#
+# ===========================================================================
+
+# --- AttributeMap
+
+class AttributeMap:
+    """An implementation of AttributeList that takes an (attr,val) hash
+    and uses it to implement the AttributeList interface."""
+
+    def __init__(self, map):
+        self.map=map
+
+    def getLength(self):
+        return len(self.map.keys())
+
+    def getName(self, i):
+        try:
+            return self.map.keys()[i]
+        except IndexError,e:
+            return None
+
+    def getType(self, i):
+        return "CDATA"
+
+    def getValue(self, i):
+        try:
+            if type(i)==types.IntType:
+                return self.map[self.getName(i)]
+            else:
+                return self.map[i]
+        except KeyError,e:
+            return None
+
+    def __len__(self):
+        return len(self.map)
+
+    def __getitem__(self, key):
+        if type(key)==types.IntType:
+            return self.map.keys()[key]
+        else:
+            return self.map[key]
+
+    def items(self):
+        return self.map.items()
+
+    def keys(self):
+        return self.map.keys()
+
+    def has_key(self,key):
+        return self.map.has_key(key)
+
+    def get(self, key, alternative=None):
+        return self.map.get(key, alternative)
+
+    def copy(self):
+        return AttributeMap(self.map.copy())
+
+    def values(self):
+        return self.map.values()
+
+# --- Event broadcasting object
+
+class EventBroadcaster:
+    """Takes a list of objects and forwards any method calls received
+    to all objects in the list. The attribute list holds the list and
+    can freely be modified by clients."""
+
+    class Event:
+        "Helper objects that represent event methods."
+
+        def __init__(self,list,name):
+            self.list=list
+            self.name=name
+
+        def __call__(self,*rest):
+            for obj in self.list:
+                apply(getattr(obj,self.name), rest)
+
+    def __init__(self,list):
+        self.list=list
+
+    def __getattr__(self,name):
+        return self.Event(self.list,name)
+
+    def __repr__(self):
+        return "<EventBroadcaster instance at %d>" % id(self)
+
+# --- ESIS document handler
+import saxlib
+class ESISDocHandler(saxlib.HandlerBase):
+    "A SAX document handler that produces naive ESIS output."
+
+    def __init__(self,writer=sys.stdout):
+        self.writer=writer
+
+    def processingInstruction (self,target, remainder):
+        """Receive an event signalling that a processing instruction
+        has been found."""
+        self.writer.write("?"+target+" "+remainder+"\n")
+
+    def startElement(self,name,amap):
+        "Receive an event signalling the start of an element."
+        self.writer.write("("+name+"\n")
+        for a_name in amap.keys():
+            self.writer.write("A"+a_name+" "+amap[a_name]+"\n")
+
+    def endElement(self,name):
+        "Receive an event signalling the end of an element."
+        self.writer.write(")"+name+"\n")
+
+    def characters(self,data,start_ix,length):
+        "Receive an event signalling that character data has been found."
+        self.writer.write("-"+data[start_ix:start_ix+length]+"\n")
+
+# --- XML canonizer
+
+class Canonizer(saxlib.HandlerBase):
+    "A SAX document handler that produces canonized XML output."
+
+    def __init__(self,writer=sys.stdout):
+        self.elem_level=0
+        self.writer=writer
+
+    def processingInstruction (self,target, remainder):
+        if not target=="xml":
+            self.writer.write("<?"+target+" "+remainder+"?>")
+
+    def startElement(self,name,amap):
+        self.writer.write("<"+name)
+
+        a_names=amap.keys()
+        a_names.sort()
+
+        for a_name in a_names:
+            self.writer.write(" "+a_name+"=\"")
+            self.write_data(amap[a_name])
+            self.writer.write("\"")
+        self.writer.write(">")
+        self.elem_level=self.elem_level+1
+
+    def endElement(self,name):
+        self.writer.write("</"+name+">")
+        self.elem_level=self.elem_level-1
+
+    def ignorableWhitespace(self,data,start_ix,length):
+        self.characters(data,start_ix,length)
+
+    def characters(self,data,start_ix,length):
+        if self.elem_level>0:
+            self.write_data(data[start_ix:start_ix+length])
+
+    def write_data(self,data):
+        "Writes datachars to writer."
+        data=data.replace("&","&amp;")
+        data=data.replace("<","&lt;")
+        data=data.replace("\"","&quot;")
+        data=data.replace(">","&gt;")
+        data=data.replace(chr(9),"&#9;")
+        data=data.replace(chr(10),"&#10;")
+        data=data.replace(chr(13),"&#13;")
+        self.writer.write(data)
+
+# --- mllib
+
+class mllib:
+    """A re-implementation of the htmllib, sgmllib and xmllib interfaces as a
+    SAX DocumentHandler."""
+
+# Unsupported:
+# - setnomoretags
+# - setliteral
+# - translate_references
+# - handle_xml
+# - handle_doctype
+# - handle_charref
+# - handle_entityref
+# - handle_comment
+# - handle_cdata
+# - tag_attributes
+
+    def __init__(self):
+        self.reset()
+
+    def reset(self):
+        import saxexts # only used here
+        self.parser=saxexts.XMLParserFactory.make_parser()
+        self.handler=mllib.Handler(self.parser,self)
+        self.handler.reset()
+
+    def feed(self,data):
+        self.parser.feed(data)
+
+    def close(self):
+        self.parser.close()
+
+    def get_stack(self):
+        return self.handler.get_stack()
+
+    # --- Handler methods (to be overridden)
+
+    def handle_starttag(self,name,method,atts):
+        method(atts)
+
+    def handle_endtag(self,name,method):
+        method()
+
+    def handle_data(self,data):
+        pass
+
+    def handle_proc(self,target,data):
+        pass
+
+    def unknown_starttag(self,name,atts):
+        pass
+
+    def unknown_endtag(self,name):
+        pass
+
+    def syntax_error(self,message):
+        pass
+
+    # --- The internal handler class
+
+    class Handler(saxlib.DocumentHandler,saxlib.ErrorHandler):
+        """An internal class to handle SAX events and translate them to mllib
+        events."""
+
+        def __init__(self,driver,handler):
+            self.driver=driver
+            self.driver.setDocumentHandler(self)
+            self.driver.setErrorHandler(self)
+            self.handler=handler
+            self.reset()
+
+        def get_stack(self):
+            return self.stack
+
+        def reset(self):
+            self.stack=[]
+
+        # --- DocumentHandler methods
+
+        def characters(self, ch, start, length):
+            self.handler.handle_data(ch[start:start+length])
+
+        def endElement(self, name):
+            if hasattr(self.handler,"end_"+name):
+                self.handler.handle_endtag(name,
+                                          getattr(self.handler,"end_"+name))
+            else:
+                self.handler.unknown_endtag(name)
+
+            del self.stack[-1]
+
+        def ignorableWhitespace(self, ch, start, length):
+            self.handler.handle_data(ch[start:start+length])
+
+        def processingInstruction(self, target, data):
+            self.handler.handle_proc(target,data)
+
+        def startElement(self, name, atts):
+            self.stack.append(name)
+
+            if hasattr(self.handler,"start_"+name):
+                self.handler.handle_starttag(name,
+                                            getattr(self.handler,
+                                                    "start_"+name),
+                                             atts)
+            else:
+                self.handler.unknown_starttag(name,atts)
+
+        # --- ErrorHandler methods
+
+        def error(self, exception):
+            self.handler.syntax_error(str(exception))
+
+        def fatalError(self, exception):
+            raise RuntimeError(str(exception))
--- a/extensions/jython/module/MOD-INF/lib/jython/xml/sax/xmlreader.py
+++ b/extensions/jython/module/MOD-INF/lib/jython/xml/sax/xmlreader.py
@ -0,0 +1,378 @@
+"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers
+should be based on this code. """
+
+import handler
+
+from _exceptions import SAXNotSupportedException, SAXNotRecognizedException
+
+
+# ===== XMLREADER =====
+
+class XMLReader:
+    """Interface for reading an XML document using callbacks.
+
+    XMLReader is the interface that an XML parser's SAX2 driver must
+    implement. This interface allows an application to set and query
+    features and properties in the parser, to register event handlers
+    for document processing, and to initiate a document parse.
+
+    All SAX interfaces are assumed to be synchronous: the parse
+    methods must not return until parsing is complete, and readers
+    must wait for an event-handler callback to return before reporting
+    the next event."""
+
+    def __init__(self):
+        self._cont_handler = handler.ContentHandler()
+        self._dtd_handler = handler.DTDHandler()
+        self._ent_handler = handler.EntityResolver()
+        self._err_handler = handler.ErrorHandler()
+
+    def parse(self, source):
+        "Parse an XML document from a system identifier or an InputSource."
+        raise NotImplementedError("This method must be implemented!")
+
+    def getContentHandler(self):
+        "Returns the current ContentHandler."
+        return self._cont_handler
+
+    def setContentHandler(self, handler):
+        "Registers a new object to receive document content events."
+        self._cont_handler = handler
+
+    def getDTDHandler(self):
+        "Returns the current DTD handler."
+        return self._dtd_handler
+
+    def setDTDHandler(self, handler):
+        "Register an object to receive basic DTD-related events."
+        self._dtd_handler = handler
+
+    def getEntityResolver(self):
+        "Returns the current EntityResolver."
+        return self._ent_handler
+
+    def setEntityResolver(self, resolver):
+        "Register an object to resolve external entities."
+        self._ent_handler = resolver
+
+    def getErrorHandler(self):
+        "Returns the current ErrorHandler."
+        return self._err_handler
+
+    def setErrorHandler(self, handler):
+        "Register an object to receive error-message events."
+        self._err_handler = handler
+
+    def setLocale(self, locale):
+        """Allow an application to set the locale for errors and warnings.
+
+        SAX parsers are not required to provide localization for errors
+        and warnings; if they cannot support the requested locale,
+        however, they must throw a SAX exception. Applications may
+        request a locale change in the middle of a parse."""
+        raise SAXNotSupportedException("Locale support not implemented")
+
+    def getFeature(self, name):
+        "Looks up and returns the state of a SAX2 feature."
+        raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
+
+    def setFeature(self, name, state):
+        "Sets the state of a SAX2 feature."
+        raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
+
+    def getProperty(self, name):
+        "Looks up and returns the value of a SAX2 property."
+        raise SAXNotRecognizedException("Property '%s' not recognized" % name)
+
+    def setProperty(self, name, value):
+        "Sets the value of a SAX2 property."
+        raise SAXNotRecognizedException("Property '%s' not recognized" % name)
+
+class IncrementalParser(XMLReader):
+    """This interface adds three extra methods to the XMLReader
+    interface that allow XML parsers to support incremental
+    parsing. Support for this interface is optional, since not all
+    underlying XML parsers support this functionality.
+
+    When the parser is instantiated it is ready to begin accepting
+    data from the feed method immediately. After parsing has been
+    finished with a call to close the reset method must be called to
+    make the parser ready to accept new data, either from feed or
+    using the parse method.
+
+    Note that these methods must _not_ be called during parsing, that
+    is, after parse has been called and before it returns.
+
+    By default, the class also implements the parse method of the XMLReader
+    interface using the feed, close and reset methods of the
+    IncrementalParser interface as a convenience to SAX 2.0 driver
+    writers."""
+
+    def __init__(self, bufsize=2**16):
+        self._bufsize = bufsize
+        XMLReader.__init__(self)
+
+    def parse(self, source):
+        import saxutils
+        source = saxutils.prepare_input_source(source)
+
+        self.prepareParser(source)
+        file = source.getByteStream()
+        buffer = file.read(self._bufsize)
+        while buffer != "":
+            self.feed(buffer)
+            buffer = file.read(self._bufsize)
+        self.close()
+
+    def feed(self, data):
+        """This method gives the raw XML data in the data parameter to
+        the parser and makes it parse the data, emitting the
+        corresponding events. It is allowed for XML constructs to be
+        split across several calls to feed.
+
+        feed may raise SAXException."""
+        raise NotImplementedError("This method must be implemented!")
+
+    def prepareParser(self, source):
+        """This method is called by the parse implementation to allow
+        the SAX 2.0 driver to prepare itself for parsing."""
+        raise NotImplementedError("prepareParser must be overridden!")
+
+    def close(self):
+        """This method is called when the entire XML document has been
+        passed to the parser through the feed method, to notify the
+        parser that there are no more data. This allows the parser to
+        do the final checks on the document and empty the internal
+        data buffer.
+
+        The parser will not be ready to parse another document until
+        the reset method has been called.
+
+        close may raise SAXException."""
+        raise NotImplementedError("This method must be implemented!")
+
+    def reset(self):
+        """This method is called after close has been called to reset
+        the parser so that it is ready to parse new documents. The
+        results of calling parse or feed after close without calling
+        reset are undefined."""
+        raise NotImplementedError("This method must be implemented!")
+
+# ===== LOCATOR =====
+
+class Locator:
+    """Interface for associating a SAX event with a document
+    location. A locator object will return valid results only during
+    calls to DocumentHandler methods; at any other time, the
+    results are unpredictable."""
+
+    def getColumnNumber(self):
+        "Return the column number where the current event ends."
+        return -1
+
+    def getLineNumber(self):
+        "Return the line number where the current event ends."
+        return -1
+
+    def getPublicId(self):
+        "Return the public identifier for the current event."
+        return None
+
+    def getSystemId(self):
+        "Return the system identifier for the current event."
+        return None
+
+# ===== INPUTSOURCE =====
+
+class InputSource:
+    """Encapsulation of the information needed by the XMLReader to
+    read entities.
+
+    This class may include information about the public identifier,
+    system identifier, byte stream (possibly with character encoding
+    information) and/or the character stream of an entity.
+
+    Applications will create objects of this class for use in the
+    XMLReader.parse method and for returning from
+    EntityResolver.resolveEntity.
+
+    An InputSource belongs to the application, the XMLReader is not
+    allowed to modify InputSource objects passed to it from the
+    application, although it may make copies and modify those."""
+
+    def __init__(self, system_id = None):
+        self.__system_id = system_id
+        self.__public_id = None
+        self.__encoding  = None
+        self.__bytefile  = None
+        self.__charfile  = None
+
+    def setPublicId(self, public_id):
+        "Sets the public identifier of this InputSource."
+        self.__public_id = public_id
+
+    def getPublicId(self):
+        "Returns the public identifier of this InputSource."
+        return self.__public_id
+
+    def setSystemId(self, system_id):
+        "Sets the system identifier of this InputSource."
+        self.__system_id = system_id
+
+    def getSystemId(self):
+        "Returns the system identifier of this InputSource."
+        return self.__system_id
+
+    def setEncoding(self, encoding):
+        """Sets the character encoding of this InputSource.
+
+        The encoding must be a string acceptable for an XML encoding
+        declaration (see section 4.3.3 of the XML recommendation).
+
+        The encoding attribute of the InputSource is ignored if the
+        InputSource also contains a character stream."""
+        self.__encoding = encoding
+
+    def getEncoding(self):
+        "Get the character encoding of this InputSource."
+        return self.__encoding
+
+    def setByteStream(self, bytefile):
+        """Set the byte stream (a Python file-like object which does
+        not perform byte-to-character conversion) for this input
+        source.
+
+        The SAX parser will ignore this if there is also a character
+        stream specified, but it will use a byte stream in preference
+        to opening a URI connection itself.
+
+        If the application knows the character encoding of the byte
+        stream, it should set it with the setEncoding method."""
+        self.__bytefile = bytefile
+
+    def getByteStream(self):
+        """Get the byte stream for this input source.
+
+        The getEncoding method will return the character encoding for
+        this byte stream, or None if unknown."""
+        return self.__bytefile
+
+    def setCharacterStream(self, charfile):
+        """Set the character stream for this input source. (The stream
+        must be a Python 2.0 Unicode-wrapped file-like that performs
+        conversion to Unicode strings.)
+
+        If there is a character stream specified, the SAX parser will
+        ignore any byte stream and will not attempt to open a URI
+        connection to the system identifier."""
+        self.__charfile = charfile
+
+    def getCharacterStream(self):
+        "Get the character stream for this input source."
+        return self.__charfile
+
+# ===== ATTRIBUTESIMPL =====
+
+class AttributesImpl:
+
+    def __init__(self, attrs):
+        """Non-NS-aware implementation.
+
+        attrs should be of the form {name : value}."""
+        self._attrs = attrs
+
+    def getLength(self):
+        return len(self._attrs)
+
+    def getType(self, name):
+        return "CDATA"
+
+    def getValue(self, name):
+        return self._attrs[name]
+
+    def getValueByQName(self, name):
+        return self._attrs[name]
+
+    def getNameByQName(self, name):
+        if not self._attrs.has_key(name):
+            raise KeyError, name
+        return name
+
+    def getQNameByName(self, name):
+        if not self._attrs.has_key(name):
+            raise KeyError, name
+        return name
+
+    def getNames(self):
+        return self._attrs.keys()
+
+    def getQNames(self):
+        return self._attrs.keys()
+
+    def __len__(self):
+        return len(self._attrs)
+
+    def __getitem__(self, name):
+        return self._attrs[name]
+
+    def keys(self):
+        return self._attrs.keys()
+
+    def has_key(self, name):
+        return self._attrs.has_key(name)
+
+    def get(self, name, alternative=None):
+        return self._attrs.get(name, alternative)
+
+    def copy(self):
+        return self.__class__(self._attrs)
+
+    def items(self):
+        return self._attrs.items()
+
+    def values(self):
+        return self._attrs.values()
+
+# ===== ATTRIBUTESNSIMPL =====
+
+class AttributesNSImpl(AttributesImpl):
+
+    def __init__(self, attrs, qnames):
+        """NS-aware implementation.
+
+        attrs should be of the form {(ns_uri, lname): value, ...}.
+        qnames of the form {(ns_uri, lname): qname, ...}."""
+        self._attrs = attrs
+        self._qnames = qnames
+
+    def getValueByQName(self, name):
+        for (nsname, qname) in self._qnames.items():
+            if qname == name:
+                return self._attrs[nsname]
+
+        raise KeyError, name
+
+    def getNameByQName(self, name):
+        for (nsname, qname) in self._qnames.items():
+            if qname == name:
+                return nsname
+
+        raise KeyError, name
+
+    def getQNameByName(self, name):
+        return self._qnames[name]
+
+    def getQNames(self):
+        return self._qnames.values()
+
+    def copy(self):
+        return self.__class__(self._attrs, self._qnames)
+
+
+def _test():
+    XMLReader()
+    IncrementalParser()
+    Locator()
+
+if __name__ == "__main__":
+    _test()