diff --git a/extensions/jython/module/MOD-INF/lib/jython/xml/Uri.py b/extensions/jython/module/MOD-INF/lib/jython/xml/Uri.py new file mode 100644 index 000000000..730f3caa3 --- /dev/null +++ b/extensions/jython/module/MOD-INF/lib/jython/xml/Uri.py @@ -0,0 +1,380 @@ +# pylint: disable-msg=C0103 +# +# backported code from 4Suite with slight modifications, started from r1.89 of +# Ft/Lib/Uri.py, by syt@logilab.fr on 2005-02-09 +# +# part if not all of this code should probably move to urlparse (or be used +# to fix some existant functions in this module) +# +# +# Copyright 2004 Fourthought, Inc. (USA). +# Detailed license and copyright information: http://4suite.org/COPYRIGHT +# Project home, documentation, distributions: http://4suite.org/ +import os.path +import sys +import re +import urlparse, urllib, urllib2 + +def UnsplitUriRef(uriRefSeq): + """should replace urlparse.urlunsplit + + Given a sequence as would be produced by SplitUriRef(), assembles and + returns a URI reference as a string. + """ + if not isinstance(uriRefSeq, (tuple, list)): + raise TypeError("sequence expected, got %s" % type(uriRefSeq)) + (scheme, authority, path, query, fragment) = uriRefSeq + uri = '' + if scheme is not None: + uri += scheme + ':' + if authority is not None: + uri += '//' + authority + uri += path + if query is not None: + uri += '?' + query + if fragment is not None: + uri += '#' + fragment + return uri + +SPLIT_URI_REF_PATTERN = re.compile(r"^(?:(?P[^:/?#]+):)?(?://(?P[^/?#]*))?(?P[^?#]*)(?:\?(?P[^#]*))?(?:#(?P.*))?$") + +def SplitUriRef(uriref): + """should replace urlparse.urlsplit + + Given a valid URI reference as a string, returns a tuple representing the + generic URI components, as per RFC 2396 appendix B. The tuple's structure + is (scheme, authority, path, query, fragment). + + All values will be strings (possibly empty) or None if undefined. + + Note that per rfc3986, there is no distinction between a path and + an "opaque part", as there was in RFC 2396. + """ + # the pattern will match every possible string, so it's safe to + # assume there's a groupdict method to call. + g = SPLIT_URI_REF_PATTERN.match(uriref).groupdict() + scheme = g['scheme'] + authority = g['authority'] + path = g['path'] + query = g['query'] + fragment = g['fragment'] + return (scheme, authority, path, query, fragment) + + +def Absolutize(uriRef, baseUri): + """ + Resolves a URI reference to absolute form, effecting the result of RFC + 3986 section 5. The URI reference is considered to be relative to the + given base URI. + + It is the caller's responsibility to ensure that the base URI matches + the absolute-URI syntax rule of RFC 3986, and that its path component + does not contain '.' or '..' segments if the scheme is hierarchical. + Unexpected results may occur otherwise. + + This function only conducts a minimal sanity check in order to determine + if relative resolution is possible: it raises a UriException if the base + URI does not have a scheme component. While it is true that the base URI + is irrelevant if the URI reference has a scheme, an exception is raised + in order to signal that the given string does not even come close to + meeting the criteria to be usable as a base URI. + + It is the caller's responsibility to make a determination of whether the + URI reference constitutes a "same-document reference", as defined in RFC + 2396 or RFC 3986. As per the spec, dereferencing a same-document + reference "should not" involve retrieval of a new representation of the + referenced resource. Note that the two specs have different definitions + of same-document reference: RFC 2396 says it is *only* the cases where the + reference is the empty string, or "#" followed by a fragment; RFC 3986 + requires making a comparison of the base URI to the absolute form of the + reference (as is returned by the spec), minus its fragment component, + if any. + + This function is similar to urlparse.urljoin() and urllib.basejoin(). + Those functions, however, are (as of Python 2.3) outdated, buggy, and/or + designed to produce results acceptable for use with other core Python + libraries, rather than being earnest implementations of the relevant + specs. Their problems are most noticeable in their handling of + same-document references and 'file:' URIs, both being situations that + come up far too often to consider the functions reliable enough for + general use. + """ + # Reasons to avoid using urllib.basejoin() and urlparse.urljoin(): + # - Both are partial implementations of long-obsolete specs. + # - Both accept relative URLs as the base, which no spec allows. + # - urllib.basejoin() mishandles the '' and '..' references. + # - If the base URL uses a non-hierarchical or relative path, + # or if the URL scheme is unrecognized, the result is not + # always as expected (partly due to issues in RFC 1808). + # - If the authority component of a 'file' URI is empty, + # the authority component is removed altogether. If it was + # not present, an empty authority component is in the result. + # - '.' and '..' segments are not always collapsed as well as they + # should be (partly due to issues in RFC 1808). + # - Effective Python 2.4, urllib.basejoin() *is* urlparse.urljoin(), + # but urlparse.urljoin() is still based on RFC 1808. + + # This procedure is based on the pseudocode in RFC 3986 sec. 5.2. + # + # ensure base URI is absolute + if not baseUri: + raise ValueError('baseUri is required and must be a non empty string') + if not IsAbsolute(baseUri): + raise ValueError('%r is not an absolute URI' % baseUri) + # shortcut for the simplest same-document reference cases + if uriRef == '' or uriRef[0] == '#': + return baseUri.split('#')[0] + uriRef + # ensure a clean slate + tScheme = tAuth = tPath = tQuery = None + # parse the reference into its components + (rScheme, rAuth, rPath, rQuery, rFrag) = SplitUriRef(uriRef) + # if the reference is absolute, eliminate '.' and '..' path segments + # and skip to the end + if rScheme is not None: + tScheme = rScheme + tAuth = rAuth + tPath = RemoveDotSegments(rPath) + tQuery = rQuery + else: + # the base URI's scheme, and possibly more, will be inherited + (bScheme, bAuth, bPath, bQuery, bFrag) = SplitUriRef(baseUri) + # if the reference is a net-path, just eliminate '.' and '..' path + # segments; no other changes needed. + if rAuth is not None: + tAuth = rAuth + tPath = RemoveDotSegments(rPath) + tQuery = rQuery + # if it's not a net-path, we need to inherit pieces of the base URI + else: + # use base URI's path if the reference's path is empty + if not rPath: + tPath = bPath + # use the reference's query, if any, or else the base URI's, + tQuery = rQuery is not None and rQuery or bQuery + # the reference's path is not empty + else: + # just use the reference's path if it's absolute + if rPath[0] == '/': + tPath = RemoveDotSegments(rPath) + # merge the reference's relative path with the base URI's path + else: + if bAuth is not None and not bPath: + tPath = '/' + rPath + else: + tPath = bPath[:bPath.rfind('/')+1] + rPath + tPath = RemoveDotSegments(tPath) + # use the reference's query + tQuery = rQuery + # since the reference isn't a net-path, + # use the authority from the base URI + tAuth = bAuth + # inherit the scheme from the base URI + tScheme = bScheme + # always use the reference's fragment (but no need to define another var) + #tFrag = rFrag + + # now compose the target URI (RFC 3986 sec. 5.3) + return UnsplitUriRef((tScheme, tAuth, tPath, tQuery, rFrag)) + + +REG_NAME_HOST_PATTERN = re.compile(r"^(?:(?:[0-9A-Za-z\-_\.!~*'();&=+$,]|(?:%[0-9A-Fa-f]{2}))*)$") + +def MakeUrllibSafe(uriRef): + """ + Makes the given RFC 3986-conformant URI reference safe for passing + to legacy urllib functions. The result may not be a valid URI. + + As of Python 2.3.3, urllib.urlopen() does not fully support + internationalized domain names, it does not strip fragment components, + and on Windows, it expects file URIs to use '|' instead of ':' in the + path component corresponding to the drivespec. It also relies on + urllib.unquote(), which mishandles unicode arguments. This function + produces a URI reference that will work around these issues, although + the IDN workaround is limited to Python 2.3 only. May raise a + UnicodeEncodeError if the URI reference is Unicode and erroneously + contains non-ASCII characters. + """ + # IDN support requires decoding any percent-encoded octets in the + # host part (if it's a reg-name) of the authority component, and when + # doing DNS lookups, applying IDNA encoding to that string first. + # As of Python 2.3, there is an IDNA codec, and the socket and httplib + # modules accept Unicode strings and apply IDNA encoding automatically + # where necessary. However, urllib.urlopen() has not yet been updated + # to do the same; it raises an exception if you give it a Unicode + # string, and does no conversion on non-Unicode strings, meaning you + # have to give it an IDNA string yourself. We will only support it on + # Python 2.3 and up. + # + # see if host is a reg-name, as opposed to IPv4 or IPv6 addr. + if isinstance(uriRef, unicode): + try: + uriRef = uriRef.encode('us-ascii') # parts of urllib are not unicode safe + except UnicodeError: + raise ValueError("uri %r must consist of ASCII characters." % uriRef) + (scheme, auth, path, query, frag) = urlparse.urlsplit(uriRef) + if auth and auth.find('@') > -1: + userinfo, hostport = auth.split('@') + else: + userinfo = None + hostport = auth + if hostport and hostport.find(':') > -1: + host, port = hostport.split(':') + else: + host = hostport + port = None + if host and REG_NAME_HOST_PATTERN.match(host): + # percent-encoded hostnames will always fail DNS lookups + host = urllib.unquote(host) #PercentDecode(host) + # IDNA-encode if possible. + # We shouldn't do this for schemes that don't need DNS lookup, + # but are there any (that you'd be calling urlopen for)? + if sys.version_info[0:2] >= (2, 3): + if isinstance(host, str): + host = host.decode('utf-8') + host = host.encode('idna') + # reassemble the authority with the new hostname + # (percent-decoded, and possibly IDNA-encoded) + auth = '' + if userinfo: + auth += userinfo + '@' + auth += host + if port: + auth += ':' + port + + # On Windows, ensure that '|', not ':', is used in a drivespec. + if os.name == 'nt' and scheme == 'file': + path = path.replace(':', '|', 1) + + # Note that we drop fragment, if any. See RFC 3986 sec. 3.5. + uri = urlparse.urlunsplit((scheme, auth, path, query, None)) + + return uri + + + +def BaseJoin(base, uriRef): + """ + Merges a base URI reference with another URI reference, returning a + new URI reference. + + It behaves exactly the same as Absolutize(), except the arguments + are reversed, and it accepts any URI reference (even a relative URI) + as the base URI. If the base has no scheme component, it is + evaluated as if it did, and then the scheme component of the result + is removed from the result, unless the uriRef had a scheme. Thus, if + neither argument has a scheme component, the result won't have one. + + This function is named BaseJoin because it is very much like + urllib.basejoin(), but it follows the current rfc3986 algorithms + for path merging, dot segment elimination, and inheritance of query + and fragment components. + + WARNING: This function exists for 2 reasons: (1) because of a need + within the 4Suite repository to perform URI reference absolutization + using base URIs that are stored (inappropriately) as absolute paths + in the subjects of statements in the RDF model, and (2) because of + a similar need to interpret relative repo paths in a 4Suite product + setup.xml file as being relative to a path that can be set outside + the document. When these needs go away, this function probably will, + too, so it is not advisable to use it. + """ + if IsAbsolute(base): + return Absolutize(uriRef, base) + else: + dummyscheme = 'basejoin' + res = Absolutize(uriRef, '%s:%s' % (dummyscheme, base)) + if IsAbsolute(uriRef): + # scheme will be inherited from uriRef + return res + else: + # no scheme in, no scheme out + return res[len(dummyscheme)+1:] + + +def RemoveDotSegments(path): + """ + Supports Absolutize() by implementing the remove_dot_segments function + described in RFC 3986 sec. 5.2. It collapses most of the '.' and '..' + segments out of a path without eliminating empty segments. It is intended + to be used during the path merging process and may not give expected + results when used independently. Use NormalizePathSegments() or + NormalizePathSegmentsInUri() if more general normalization is desired. + + semi-private because it is not for general use. I've implemented it + using two segment stacks, as alluded to in the spec, rather than the + explicit string-walking algorithm that would be too inefficient. (mbrown) + """ + # return empty string if entire path is just "." or ".." + if path == '.' or path == '..': + return path[0:0] # preserves string type + # remove all "./" or "../" segments at the beginning + while path: + if path[:2] == './': + path = path[2:] + elif path[:3] == '../': + path = path[3:] + else: + break + # We need to keep track of whether there was a leading slash, + # because we're going to drop it in order to prevent our list of + # segments from having an ambiguous empty first item when we call + # split(). + leading_slash = 0 + if path[:1] == '/': + path = path[1:] + leading_slash = 1 + # replace a trailing "/." with just "/" + if path[-2:] == '/.': + path = path[:-1] + # convert the segments into a list and process each segment in + # order from left to right. + segments = path.split('/') + keepers = [] + segments.reverse() + while segments: + seg = segments.pop() + # '..' means drop the previous kept segment, if any. + # If none, and if the path is relative, then keep the '..'. + # If the '..' was the last segment, ensure + # that the result ends with '/'. + if seg == '..': + if keepers: + keepers.pop() + elif not leading_slash: + keepers.append(seg) + if not segments: + keepers.append('') + # ignore '.' segments and keep all others, even empty ones + elif seg != '.': + keepers.append(seg) + # reassemble the kept segments + return leading_slash * '/' + '/'.join(keepers) + + +SCHEME_PATTERN = re.compile(r'([a-zA-Z][a-zA-Z0-9+\-.]*):') +def GetScheme(uriRef): + """ + Obtains, with optimum efficiency, just the scheme from a URI reference. + Returns a string, or if no scheme could be found, returns None. + """ + # Using a regex seems to be the best option. Called 50,000 times on + # different URIs, on a 1.0-GHz PIII with FreeBSD 4.7 and Python + # 2.2.1, this method completed in 0.95s, and 0.05s if there was no + # scheme to find. By comparison, + # urllib.splittype()[0] took 1.5s always; + # Ft.Lib.Uri.SplitUriRef()[0] took 2.5s always; + # urlparse.urlparse()[0] took 3.5s always. + m = SCHEME_PATTERN.match(uriRef) + if m is None: + return None + else: + return m.group(1) + + +def IsAbsolute(identifier): + """ + Given a string believed to be a URI or URI reference, tests that it is + absolute (as per RFC 2396), not relative -- i.e., that it has a scheme. + """ + # We do it this way to avoid compiling another massive regex. + return GetScheme(identifier) is not None diff --git a/extensions/jython/module/MOD-INF/lib/jython/xml/sax/drivers2/__init__.py b/extensions/jython/module/MOD-INF/lib/jython/xml/sax/drivers2/__init__.py new file mode 100644 index 000000000..96a8f30ee --- /dev/null +++ b/extensions/jython/module/MOD-INF/lib/jython/xml/sax/drivers2/__init__.py @@ -0,0 +1 @@ +"Directory for SAX version 2 drivers." diff --git a/extensions/jython/module/MOD-INF/lib/jython/xml/sax/drivers2/drv_javasax.py b/extensions/jython/module/MOD-INF/lib/jython/xml/sax/drivers2/drv_javasax.py new file mode 100644 index 000000000..2139ebc38 --- /dev/null +++ b/extensions/jython/module/MOD-INF/lib/jython/xml/sax/drivers2/drv_javasax.py @@ -0,0 +1,333 @@ +""" +SAX driver for the Java SAX parsers. Can only be used in Jython. + +$Id: drv_javasax.py,v 1.5 2003/01/26 09:08:51 loewis Exp $ +""" + +# --- Initialization + +version = "0.10" +revision = "$Revision: 1.5 $" + +import string +from xml.sax import xmlreader, saxutils +from xml.sax.handler import feature_namespaces, feature_namespace_prefixes +from xml.sax import _exceptions + +# we only work in jython +import sys +if sys.platform[:4] != "java": + raise _exceptions.SAXReaderNotAvailable("drv_javasax not available in CPython", None) +del sys + +# get the necessary Java SAX classes +try: + from org.python.core import FilelikeInputStream + from org.xml.sax.helpers import XMLReaderFactory + from org.xml import sax as javasax +except ImportError: + raise _exceptions.SAXReaderNotAvailable("SAX is not on the classpath", None) + +# get some JAXP stuff +try: + from javax.xml.parsers import SAXParserFactory, ParserConfigurationException + factory = SAXParserFactory.newInstance() + jaxp = 1 +except ImportError: + jaxp = 0 + +from java.lang import String + + +def _wrap_sax_exception(e): + return _exceptions.SAXParseException(e.message, + e.exception, + SimpleLocator(e.columnNumber, + e.lineNumber, + e.publicId, + e.systemId)) + +class JyErrorHandlerWrapper(javasax.ErrorHandler): + def __init__(self, err_handler): + self._err_handler = err_handler + + def error(self, exc): + self._err_handler.error(_wrap_sax_exception(exc)) + + def fatalError(self, exc): + self._err_handler.fatalError(_wrap_sax_exception(exc)) + + def warning(self, exc): + self._err_handler.warning(_wrap_sax_exception(exc)) + +class JyInputSourceWrapper(javasax.InputSource): + def __init__(self, source): + if isinstance(source, str): + javasax.InputSource.__init__(self, source) + elif hasattr(source, "read"):#file like object + f = source + javasax.InputSource.__init__(self, FilelikeInputStream(f)) + if hasattr(f, "name"): + self.setSystemId(f.name) + else:#xml.sax.xmlreader.InputSource object + #Use byte stream constructor if possible so that Xerces won't attempt to open + #the url at systemId unless it's really there + if source.getByteStream(): + javasax.InputSource.__init__(self, + FilelikeInputStream(source.getByteStream())) + else: + javasax.InputSource.__init__(self) + if source.getSystemId(): + self.setSystemId(source.getSystemId()) + self.setPublicId(source.getPublicId()) + self.setEncoding(source.getEncoding()) + +class JyEntityResolverWrapper(javasax.EntityResolver): + def __init__(self, entityResolver): + self._resolver = entityResolver + + def resolveEntity(self, pubId, sysId): + return JyInputSourceWrapper(self._resolver.resolveEntity(pubId, sysId)) + +class JyDTDHandlerWrapper(javasax.DTDHandler): + def __init__(self, dtdHandler): + self._handler = dtdHandler + + def notationDecl(self, name, publicId, systemId): + self._handler.notationDecl(name, publicId, systemId) + + def unparsedEntityDecl(self, name, publicId, systemId, notationName): + self._handler.unparsedEntityDecl(name, publicId, systemId, notationName) + +class SimpleLocator(xmlreader.Locator): + def __init__(self, colNum, lineNum, pubId, sysId): + self.colNum = colNum + self.lineNum = lineNum + self.pubId = pubId + self.sysId = sysId + + def getColumnNumber(self): + return self.colNum + + def getLineNumber(self): + return self.lineNum + + def getPublicId(self): + return self.pubId + + def getSystemId(self): + return self.sysId + +# --- JavaSAXParser +class JavaSAXParser(xmlreader.XMLReader, javasax.ContentHandler): + "SAX driver for the Java SAX parsers." + + def __init__(self, jdriver = None): + xmlreader.XMLReader.__init__(self) + self._parser = create_java_parser(jdriver) + self._parser.setFeature(feature_namespaces, 0) + self._parser.setFeature(feature_namespace_prefixes, 0) + self._parser.setContentHandler(self) + self._nsattrs = AttributesNSImpl() + self._attrs = AttributesImpl() + self.setEntityResolver(self.getEntityResolver()) + self.setErrorHandler(self.getErrorHandler()) + self.setDTDHandler(self.getDTDHandler()) + + # XMLReader methods + + def parse(self, source): + "Parse an XML document from a URL or an InputSource." + self._parser.parse(JyInputSourceWrapper(source)) + + def getFeature(self, name): + return self._parser.getFeature(name) + + def setFeature(self, name, state): + self._parser.setFeature(name, state) + + def getProperty(self, name): + return self._parser.getProperty(name) + + def setProperty(self, name, value): + self._parser.setProperty(name, value) + + def setEntityResolver(self, resolver): + self._parser.entityResolver = JyEntityResolverWrapper(resolver) + xmlreader.XMLReader.setEntityResolver(self, resolver) + + def setErrorHandler(self, err_handler): + self._parser.errorHandler = JyErrorHandlerWrapper(err_handler) + xmlreader.XMLReader.setErrorHandler(self, err_handler) + + def setDTDHandler(self, dtd_handler): + self._parser.setDTDHandler(JyDTDHandlerWrapper(dtd_handler)) + xmlreader.XMLReader.setDTDHandler(self, dtd_handler) + + # ContentHandler methods + def setDocumentLocator(self, locator): + self._cont_handler.setDocumentLocator(locator) + + def startDocument(self): + self._cont_handler.startDocument() + self._namespaces = self._parser.getFeature(feature_namespaces) + + def startElement(self, uri, lname, qname, attrs): + if self._namespaces: + self._nsattrs._attrs = attrs + self._cont_handler.startElementNS((uri or None, lname), qname, + self._nsattrs) + else: + self._attrs._attrs = attrs + self._cont_handler.startElement(qname, self._attrs) + + def startPrefixMapping(self, prefix, uri): + self._cont_handler.startPrefixMapping(prefix, uri) + + def characters(self, char, start, len): + self._cont_handler.characters(str(String(char, start, len))) + + def ignorableWhitespace(self, char, start, len): + self._cont_handler.ignorableWhitespace(str(String(char, start, len))) + + def endElement(self, uri, lname, qname): + if self._namespaces: + self._cont_handler.endElementNS((uri or None, lname), qname) + else: + self._cont_handler.endElement(qname) + + def endPrefixMapping(self, prefix): + self._cont_handler.endPrefixMapping(prefix) + + def endDocument(self): + self._cont_handler.endDocument() + + def processingInstruction(self, target, data): + self._cont_handler.processingInstruction(target, data) + +class AttributesImpl: + def __init__(self, attrs = None): + self._attrs = attrs + + def getLength(self): + return self._attrs.getLength() + + def getType(self, name): + return self._attrs.getType(name) + + def getValue(self, name): + value = self._attrs.getValue(name) + if value == None: + raise KeyError(name) + return value + + def getNames(self): + return [self._attrs.getQName(index) for index in range(len(self))] + + def getQNames(self): + return [self._attrs.getQName(index) for index in range(len(self))] + + def getValueByQName(self, qname): + idx = self._attrs.getIndex(qname) + if idx == -1: + raise KeyError, qname + return self._attrs.getValue(idx) + + def getNameByQName(self, qname): + idx = self._attrs.getIndex(qname) + if idx == -1: + raise KeyError, qname + return qname + + def getQNameByName(self, name): + idx = self._attrs.getIndex(name) + if idx == -1: + raise KeyError, name + return name + + def __len__(self): + return self._attrs.getLength() + + def __getitem__(self, name): + return self.getValue(name) + + def keys(self): + return self.getNames() + + def copy(self): + return self.__class__(self._attrs) + + def items(self): + return [(name, self[name]) for name in self.getNames()] + + def values(self): + return map(self.getValue, self.getNames()) + + def get(self, name, alt=None): + try: + return self.getValue(name) + except KeyError: + return alt + + def has_key(self, name): + try: + self.getValue(name) + return True + except KeyError: + return False + +# --- AttributesNSImpl + +class AttributesNSImpl(AttributesImpl): + + def __init__(self, attrs=None): + AttributesImpl.__init__(self, attrs) + + def getType(self, name): + return self._attrs.getType(name[0], name[1]) + + def getValue(self, name): + value = self._attrs.getValue(name[0], name[1]) + if value == None: + raise KeyError(name) + return value + + def getNames(self): + names = [] + for idx in range(len(self)): + names.append((self._attrs.getURI(idx), + self._attrs.getLocalName(idx))) + return names + + def getNameByQName(self, qname): + idx = self._attrs.getIndex(qname) + if idx == -1: + raise KeyError, qname + return (self._attrs.getURI(idx), self._attrs.getLocalName(idx)) + + def getQNameByName(self, name): + idx = self._attrs.getIndex(name[0], name[1]) + if idx == -1: + raise KeyError, name + return self._attrs.getQName(idx) + + def getQNames(self): + return [self._attrs.getQName(idx) for idx in range(len(self))] + +# --- + +def create_java_parser(jdriver = None): + try: + if jdriver: + return XMLReaderFactory.createXMLReader(jdriver) + elif jaxp: + return factory.newSAXParser().getXMLReader() + else: + return XMLReaderFactory.createXMLReader() + except ParserConfigurationException, e: + raise _exceptions.SAXReaderNotAvailable(e.getMessage()) + except javasax.SAXException, e: + raise _exceptions.SAXReaderNotAvailable(e.getMessage()) + +def create_parser(jdriver = None): + return JavaSAXParser(jdriver) diff --git a/extensions/jython/module/MOD-INF/lib/jython/xml/sax/handler.py b/extensions/jython/module/MOD-INF/lib/jython/xml/sax/handler.py new file mode 100644 index 000000000..aff66e695 --- /dev/null +++ b/extensions/jython/module/MOD-INF/lib/jython/xml/sax/handler.py @@ -0,0 +1,345 @@ +""" +This module contains the core classes of version 2.0 of SAX for Python. +This file provides only default classes with absolutely minimum +functionality, from which drivers and applications can be subclassed. + +Many of these classes are empty and are included only as documentation +of the interfaces. + +$Id: handler.py,v 1.5 2002/02/14 08:09:36 loewis Exp $ +""" + +version = '2.0beta' + +#============================================================================ +# +# HANDLER INTERFACES +# +#============================================================================ + +# ===== ERRORHANDLER ===== + +class ErrorHandler: + """Basic interface for SAX error handlers. + + If you create an object that implements this interface, then + register the object with your XMLReader, the parser will call the + methods in your object to report all warnings and errors. There + are three levels of errors available: warnings, (possibly) + recoverable errors, and unrecoverable errors. All methods take a + SAXParseException as the only parameter.""" + + def error(self, exception): + "Handle a recoverable error." + raise exception + + def fatalError(self, exception): + "Handle a non-recoverable error." + raise exception + + def warning(self, exception): + "Handle a warning." + print exception + + +# ===== CONTENTHANDLER ===== + +class ContentHandler: + """Interface for receiving logical document content events. + + This is the main callback interface in SAX, and the one most + important to applications. The order of events in this interface + mirrors the order of the information in the document.""" + + def __init__(self): + self._locator = None + + def setDocumentLocator(self, locator): + """Called by the parser to give the application a locator for + locating the origin of document events. + + SAX parsers are strongly encouraged (though not absolutely + required) to supply a locator: if it does so, it must supply + the locator to the application by invoking this method before + invoking any of the other methods in the DocumentHandler + interface. + + The locator allows the application to determine the end + position of any document-related event, even if the parser is + not reporting an error. Typically, the application will use + this information for reporting its own errors (such as + character content that does not match an application's + business rules). The information returned by the locator is + probably not sufficient for use with a search engine. + + Note that the locator will return correct information only + during the invocation of the events in this interface. The + application should not attempt to use it at any other time.""" + self._locator = locator + + def startDocument(self): + """Receive notification of the beginning of a document. + + The SAX parser will invoke this method only once, before any + other methods in this interface or in DTDHandler (except for + setDocumentLocator).""" + + def endDocument(self): + """Receive notification of the end of a document. + + The SAX parser will invoke this method only once, and it will + be the last method invoked during the parse. The parser shall + not invoke this method until it has either abandoned parsing + (because of an unrecoverable error) or reached the end of + input.""" + + def startPrefixMapping(self, prefix, uri): + """Begin the scope of a prefix-URI Namespace mapping. + + The information from this event is not necessary for normal + Namespace processing: the SAX XML reader will automatically + replace prefixes for element and attribute names when the + http://xml.org/sax/features/namespaces feature is true (the + default). + + There are cases, however, when applications need to use + prefixes in character data or in attribute values, where they + cannot safely be expanded automatically; the + start/endPrefixMapping event supplies the information to the + application to expand prefixes in those contexts itself, if + necessary. + + Note that start/endPrefixMapping events are not guaranteed to + be properly nested relative to each-other: all + startPrefixMapping events will occur before the corresponding + startElement event, and all endPrefixMapping events will occur + after the corresponding endElement event, but their order is + not guaranteed.""" + + def endPrefixMapping(self, prefix): + """End the scope of a prefix-URI mapping. + + See startPrefixMapping for details. This event will always + occur after the corresponding endElement event, but the order + of endPrefixMapping events is not otherwise guaranteed.""" + + def startElement(self, name, attrs): + """Signals the start of an element in non-namespace mode. + + The name parameter contains the raw XML 1.0 name of the + element type as a string and the attrs parameter holds an + instance of the Attributes class containing the attributes of + the element.""" + + def endElement(self, name): + """Signals the end of an element in non-namespace mode. + + The name parameter contains the name of the element type, just + as with the startElement event.""" + + def startElementNS(self, name, qname, attrs): + """Signals the start of an element in namespace mode. + + The name parameter contains the name of the element type as a + (uri, localname) tuple, the qname parameter the raw XML 1.0 + name used in the source document, and the attrs parameter + holds an instance of the Attributes class containing the + attributes of the element. + + The uri part of the name tuple is None for elements which have + no namespace.""" + + def endElementNS(self, name, qname): + """Signals the end of an element in namespace mode. + + The name parameter contains the name of the element type, just + as with the startElementNS event.""" + + def characters(self, content): + """Receive notification of character data. + + The Parser will call this method to report each chunk of + character data. SAX parsers may return all contiguous + character data in a single chunk, or they may split it into + several chunks; however, all of the characters in any single + event must come from the same external entity so that the + Locator provides useful information.""" + + def ignorableWhitespace(self, whitespace): + """Receive notification of ignorable whitespace in element content. + + Validating Parsers must use this method to report each chunk + of ignorable whitespace (see the W3C XML 1.0 recommendation, + section 2.10): non-validating parsers may also use this method + if they are capable of parsing and using content models. + + SAX parsers may return all contiguous whitespace in a single + chunk, or they may split it into several chunks; however, all + of the characters in any single event must come from the same + external entity, so that the Locator provides useful + information. + + The application must not attempt to read from the array + outside of the specified range.""" + + def processingInstruction(self, target, data): + """Receive notification of a processing instruction. + + The Parser will invoke this method once for each processing + instruction found: note that processing instructions may occur + before or after the main document element. + + A SAX parser should never report an XML declaration (XML 1.0, + section 2.8) or a text declaration (XML 1.0, section 4.3.1) + using this method.""" + + def skippedEntity(self, name): + """Receive notification of a skipped entity. + + The Parser will invoke this method once for each entity + skipped. Non-validating processors may skip entities if they + have not seen the declarations (because, for example, the + entity was declared in an external DTD subset). All processors + may skip external entities, depending on the values of the + http://xml.org/sax/features/external-general-entities and the + http://xml.org/sax/features/external-parameter-entities + properties.""" + + +# ===== DTDHandler ===== + +class DTDHandler: + """Handle DTD events. + + This interface specifies only those DTD events required for basic + parsing (unparsed entities and attributes).""" + + def notationDecl(self, name, publicId, systemId): + "Handle a notation declaration event." + + def unparsedEntityDecl(self, name, publicId, systemId, ndata): + "Handle an unparsed entity declaration event." + + +# ===== ENTITYRESOLVER ===== + +class EntityResolver: + """Basic interface for resolving entities. If you create an object + implementing this interface, then register the object with your + Parser, the parser will call the method in your object to + resolve all external entities. Note that DefaultHandler implements + this interface with the default behaviour.""" + + def resolveEntity(self, publicId, systemId): + """Resolve the system identifier of an entity and return either + the system identifier to read from as a string, or an InputSource + to read from.""" + return systemId + + +#============================================================================ +# +# CORE FEATURES +# +#============================================================================ + +feature_namespaces = "http://xml.org/sax/features/namespaces" +# true: Perform Namespace processing (default). +# false: Optionally do not perform Namespace processing +# (implies namespace-prefixes). +# access: (parsing) read-only; (not parsing) read/write + +feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes" +# true: Report the original prefixed names and attributes used for Namespace +# declarations. +# false: Do not report attributes used for Namespace declarations, and +# optionally do not report original prefixed names (default). +# access: (parsing) read-only; (not parsing) read/write + +feature_string_interning = "http://xml.org/sax/features/string-interning" +# true: All element names, prefixes, attribute names, Namespace URIs, and +# local names are interned using the built-in intern function. +# false: Names are not necessarily interned, although they may be (default). +# access: (parsing) read-only; (not parsing) read/write + +feature_validation = "http://xml.org/sax/features/validation" +# true: Report all validation errors (implies external-general-entities and +# external-parameter-entities). +# false: Do not report validation errors. +# access: (parsing) read-only; (not parsing) read/write + +feature_external_ges = "http://xml.org/sax/features/external-general-entities" +# true: Include all external general (text) entities. +# false: Do not include external general entities. +# access: (parsing) read-only; (not parsing) read/write + +feature_external_pes = "http://xml.org/sax/features/external-parameter-entities" +# true: Include all external parameter entities, including the external +# DTD subset. +# false: Do not include any external parameter entities, even the external +# DTD subset. +# access: (parsing) read-only; (not parsing) read/write + +all_features = [feature_namespaces, + feature_namespace_prefixes, + feature_string_interning, + feature_validation, + feature_external_ges, + feature_external_pes] + + +#============================================================================ +# +# CORE PROPERTIES +# +#============================================================================ + +property_lexical_handler = "http://xml.org/sax/properties/lexical-handler" +# data type: xml.sax.sax2lib.LexicalHandler +# description: An optional extension handler for lexical events like comments. +# access: read/write + +property_declaration_handler = "http://xml.org/sax/properties/declaration-handler" +# data type: xml.sax.sax2lib.DeclHandler +# description: An optional extension handler for DTD-related events other +# than notations and unparsed entities. +# access: read/write + +property_dom_node = "http://xml.org/sax/properties/dom-node" +# data type: org.w3c.dom.Node +# description: When parsing, the current DOM node being visited if this is +# a DOM iterator; when not parsing, the root DOM node for +# iteration. +# access: (parsing) read-only; (not parsing) read/write + +property_xml_string = "http://xml.org/sax/properties/xml-string" +# data type: String +# description: The literal string of characters that was the source for +# the current event. +# access: read-only + +property_encoding = "http://www.python.org/sax/properties/encoding" +# data type: String +# description: The name of the encoding to assume for input data. +# access: write: set the encoding, e.g. established by a higher-level +# protocol. May change during parsing (e.g. after +# processing a META tag) +# read: return the current encoding (possibly established through +# auto-detection. +# initial value: UTF-8 +# + +property_interning_dict = "http://www.python.org/sax/properties/interning-dict" +# data type: Dictionary +# description: The dictionary used to intern common strings in the document +# access: write: Request that the parser uses a specific dictionary, to +# allow interning across different documents +# read: return the current interning dictionary, or None +# + +all_properties = [property_lexical_handler, + property_dom_node, + property_declaration_handler, + property_xml_string, + property_encoding, + property_interning_dict] diff --git a/extensions/jython/module/MOD-INF/lib/jython/xml/sax/saxlib.py b/extensions/jython/module/MOD-INF/lib/jython/xml/sax/saxlib.py new file mode 100644 index 000000000..64603799d --- /dev/null +++ b/extensions/jython/module/MOD-INF/lib/jython/xml/sax/saxlib.py @@ -0,0 +1,430 @@ +""" +This module contains the core classes of version 2.0 of SAX for Python. +This file provides only default classes with absolutely minimum +functionality, from which drivers and applications can be subclassed. + +Many of these classes are empty and are included only as documentation +of the interfaces. + +$Id: saxlib.py,v 1.12 2002/05/10 14:49:21 akuchling Exp $ +""" + +version = '2.0beta' + +# A number of interfaces used to live in saxlib, but are now in +# various other modules for Python 2 compatibility. If nobody uses +# them here any longer, the references can be removed + +from handler import ErrorHandler, ContentHandler, DTDHandler, EntityResolver +from xmlreader import XMLReader, InputSource, Locator, IncrementalParser +from _exceptions import * + +from handler import \ + feature_namespaces,\ + feature_namespace_prefixes,\ + feature_string_interning,\ + feature_validation,\ + feature_external_ges,\ + feature_external_pes,\ + all_features,\ + property_lexical_handler,\ + property_declaration_handler,\ + property_dom_node,\ + property_xml_string,\ + all_properties + +#============================================================================ +# +# MAIN INTERFACES +# +#============================================================================ + +# ===== XMLFILTER ===== + +class XMLFilter(XMLReader): + """Interface for a SAX2 parser filter. + + A parser filter is an XMLReader that gets its events from another + XMLReader (which may in turn also be a filter) rather than from a + primary source like a document or other non-SAX data source. + Filters can modify a stream of events before passing it on to its + handlers.""" + + def __init__(self, parent = None): + """Creates a filter instance, allowing applications to set the + parent on instantiation.""" + XMLReader.__init__(self) + self._parent = parent + + def setParent(self, parent): + """Sets the parent XMLReader of this filter. The argument may + not be None.""" + self._parent = parent + + def getParent(self): + "Returns the parent of this filter." + return self._parent + +# ===== ATTRIBUTES ===== + +class Attributes: + """Interface for a list of XML attributes. + + Contains a list of XML attributes, accessible by name.""" + + def getLength(self): + "Returns the number of attributes in the list." + raise NotImplementedError("This method must be implemented!") + + def getType(self, name): + "Returns the type of the attribute with the given name." + raise NotImplementedError("This method must be implemented!") + + def getValue(self, name): + "Returns the value of the attribute with the given name." + raise NotImplementedError("This method must be implemented!") + + def getValueByQName(self, name): + """Returns the value of the attribute with the given raw (or + qualified) name.""" + raise NotImplementedError("This method must be implemented!") + + def getNameByQName(self, name): + """Returns the namespace name of the attribute with the given + raw (or qualified) name.""" + raise NotImplementedError("This method must be implemented!") + + def getNames(self): + """Returns a list of the names of all attributes + in the list.""" + raise NotImplementedError("This method must be implemented!") + + def getQNames(self): + """Returns a list of the raw qualified names of all attributes + in the list.""" + raise NotImplementedError("This method must be implemented!") + + def __len__(self): + "Alias for getLength." + raise NotImplementedError("This method must be implemented!") + + def __getitem__(self, name): + "Alias for getValue." + raise NotImplementedError("This method must be implemented!") + + def keys(self): + "Returns a list of the attribute names in the list." + raise NotImplementedError("This method must be implemented!") + + def has_key(self, name): + "True if the attribute is in the list, false otherwise." + raise NotImplementedError("This method must be implemented!") + + def get(self, name, alternative=None): + """Return the value associated with attribute name; if it is not + available, then return the alternative.""" + raise NotImplementedError("This method must be implemented!") + + def copy(self): + "Return a copy of the Attributes object." + raise NotImplementedError("This method must be implemented!") + + def items(self): + "Return a list of (attribute_name, value) pairs." + raise NotImplementedError("This method must be implemented!") + + def values(self): + "Return a list of all attribute values." + raise NotImplementedError("This method must be implemented!") + + +#============================================================================ +# +# HANDLER INTERFACES +# +#============================================================================ + + +# ===== DECLHANDLER ===== + +class DeclHandler: + """Optional SAX2 handler for DTD declaration events. + + Note that some DTD declarations are already reported through the + DTDHandler interface. All events reported to this handler will + occur between the startDTD and endDTD events of the + LexicalHandler. + + To set the DeclHandler for an XMLReader, use the setProperty method + with the identifier http://xml.org/sax/handlers/DeclHandler.""" + + def attributeDecl(self, elem_name, attr_name, type, value_def, value): + """Report an attribute type declaration. + + Only the first declaration will be reported. The type will be + one of the strings "CDATA", "ID", "IDREF", "IDREFS", + "NMTOKEN", "NMTOKENS", "ENTITY", "ENTITIES", or "NOTATION", or + a list of names (in the case of enumerated definitions). + + elem_name is the element type name, attr_name the attribute + type name, type a string representing the attribute type, + value_def a string representing the default declaration + ('#IMPLIED', '#REQUIRED', '#FIXED' or None). value is a string + representing the attribute's default value, or None if there + is none.""" + + def elementDecl(self, elem_name, content_model): + """Report an element type declaration. + + Only the first declaration will be reported. + + content_model is the string 'EMPTY', the string 'ANY' or the content + model structure represented as tuple (separator, tokens, modifier) + where separator is the separator in the token list (that is, '|' or + ','), tokens is the list of tokens (element type names or tuples + representing parentheses) and modifier is the quantity modifier + ('*', '?' or '+').""" + + def internalEntityDecl(self, name, value): + """Report an internal entity declaration. + + Only the first declaration of an entity will be reported. + + name is the name of the entity. If it is a parameter entity, + the name will begin with '%'. value is the replacement text of + the entity.""" + + def externalEntityDecl(self, name, public_id, system_id): + """Report a parsed entity declaration. (Unparsed entities are + reported to the DTDHandler.) + + Only the first declaration for each entity will be reported. + + name is the name of the entity. If it is a parameter entity, + the name will begin with '%'. public_id and system_id are the + public and system identifiers of the entity. public_id will be + None if none were declared.""" + + + +# ===== LEXICALHANDLER ===== + +class LexicalHandler: + """Optional SAX2 handler for lexical events. + + This handler is used to obtain lexical information about an XML + document, that is, information about how the document was encoded + (as opposed to what it contains, which is reported to the + ContentHandler), such as comments and CDATA marked section + boundaries. + + To set the LexicalHandler of an XMLReader, use the setProperty + method with the property identifier + 'http://xml.org/sax/handlers/LexicalHandler'. There is no + guarantee that the XMLReader will support or recognize this + property.""" + + def comment(self, content): + """Reports a comment anywhere in the document (including the + DTD and outside the document element). + + content is a string that holds the contents of the comment.""" + + def startDTD(self, name, public_id, system_id): + """Report the start of the DTD declarations, if the document + has an associated DTD. + + A startEntity event will be reported before declaration events + from the external DTD subset are reported, and this can be + used to infer from which subset DTD declarations derive. + + name is the name of the document element type, public_id the + public identifier of the DTD (or None if none were supplied) + and system_id the system identfier of the external subset (or + None if none were supplied).""" + + def endDTD(self): + "Signals the end of DTD declarations." + + def startEntity(self, name): + """Report the beginning of an entity. + + The start and end of the document entity is not reported. The + start and end of the external DTD subset is reported with the + pseudo-name '[dtd]'. + + Skipped entities will be reported through the skippedEntity + event of the ContentHandler rather than through this event. + + name is the name of the entity. If it is a parameter entity, + the name will begin with '%'.""" + + def endEntity(self, name): + """Reports the end of an entity. name is the name of the + entity, and follows the same conventions as for + startEntity.""" + + def startCDATA(self): + """Reports the beginning of a CDATA marked section. + + The contents of the CDATA marked section will be reported + through the characters event.""" + + def endCDATA(self): + "Reports the end of a CDATA marked section." + + +#============================================================================ +# +# SAX 1.0 COMPATIBILITY CLASSES +# Note that these are all deprecated. +# +#============================================================================ + +# ===== ATTRIBUTELIST ===== + +class AttributeList: + """Interface for an attribute list. This interface provides + information about a list of attributes for an element (only + specified or defaulted attributes will be reported). Note that the + information returned by this object will be valid only during the + scope of the DocumentHandler.startElement callback, and the + attributes will not necessarily be provided in the order declared + or specified.""" + + def getLength(self): + "Return the number of attributes in list." + + def getName(self, i): + "Return the name of an attribute in the list." + + def getType(self, i): + """Return the type of an attribute in the list. (Parameter can be + either integer index or attribute name.)""" + + def getValue(self, i): + """Return the value of an attribute in the list. (Parameter can be + either integer index or attribute name.)""" + + def __len__(self): + "Alias for getLength." + + def __getitem__(self, key): + "Alias for getName (if key is an integer) and getValue (if string)." + + def keys(self): + "Returns a list of the attribute names." + + def has_key(self, key): + "True if the attribute is in the list, false otherwise." + + def get(self, key, alternative=None): + """Return the value associated with attribute name; if it is not + available, then return the alternative.""" + + def copy(self): + "Return a copy of the AttributeList." + + def items(self): + "Return a list of (attribute_name,value) pairs." + + def values(self): + "Return a list of all attribute values." + + +# ===== DOCUMENTHANDLER ===== + +class DocumentHandler: + """Handle general document events. This is the main client + interface for SAX: it contains callbacks for the most important + document events, such as the start and end of elements. You need + to create an object that implements this interface, and then + register it with the Parser. If you do not want to implement + the entire interface, you can derive a class from HandlerBase, + which implements the default functionality. You can find the + location of any document event using the Locator interface + supplied by setDocumentLocator().""" + + def characters(self, ch, start, length): + "Handle a character data event." + + def endDocument(self): + "Handle an event for the end of a document." + + def endElement(self, name): + "Handle an event for the end of an element." + + def ignorableWhitespace(self, ch, start, length): + "Handle an event for ignorable whitespace in element content." + + def processingInstruction(self, target, data): + "Handle a processing instruction event." + + def setDocumentLocator(self, locator): + "Receive an object for locating the origin of SAX document events." + + def startDocument(self): + "Handle an event for the beginning of a document." + + def startElement(self, name, atts): + "Handle an event for the beginning of an element." + + +# ===== HANDLERBASE ===== + +class HandlerBase(EntityResolver, DTDHandler, DocumentHandler,\ + ErrorHandler): + """Default base class for handlers. This class implements the + default behaviour for four SAX interfaces: EntityResolver, + DTDHandler, DocumentHandler, and ErrorHandler: rather + than implementing those full interfaces, you may simply extend + this class and override the methods that you need. Note that the + use of this class is optional (you are free to implement the + interfaces directly if you wish).""" + + +# ===== PARSER ===== + +class Parser: + """Basic interface for SAX (Simple API for XML) parsers. All SAX + parsers must implement this basic interface: it allows users to + register handlers for different types of events and to initiate a + parse from a URI, a character stream, or a byte stream. SAX + parsers should also implement a zero-argument constructor.""" + + def __init__(self): + self.doc_handler = DocumentHandler() + self.dtd_handler = DTDHandler() + self.ent_handler = EntityResolver() + self.err_handler = ErrorHandler() + + def parse(self, systemId): + "Parse an XML document from a system identifier." + + def parseFile(self, fileobj): + "Parse an XML document from a file-like object." + + def setDocumentHandler(self, handler): + "Register an object to receive basic document-related events." + self.doc_handler=handler + + def setDTDHandler(self, handler): + "Register an object to receive basic DTD-related events." + self.dtd_handler=handler + + def setEntityResolver(self, resolver): + "Register an object to resolve external entities." + self.ent_handler=resolver + + def setErrorHandler(self, handler): + "Register an object to receive error-message events." + self.err_handler=handler + + def setLocale(self, locale): + """Allow an application to set the locale for errors and warnings. + + SAX parsers are not required to provide localisation for errors + and warnings; if they cannot support the requested locale, + however, they must throw a SAX exception. Applications may + request a locale change in the middle of a parse.""" + raise SAXNotSupportedException("Locale support not implemented") diff --git a/extensions/jython/module/MOD-INF/lib/jython/xml/sax/saxutils.py b/extensions/jython/module/MOD-INF/lib/jython/xml/sax/saxutils.py new file mode 100644 index 000000000..8d4ad9e90 --- /dev/null +++ b/extensions/jython/module/MOD-INF/lib/jython/xml/sax/saxutils.py @@ -0,0 +1,813 @@ +""" +A library of useful helper classes to the saxlib classes, for the +convenience of application and driver writers. + +$Id: saxutils.py,v 1.37 2005/04/13 14:02:08 syt Exp $ +""" +import os, urlparse, urllib2, types +import handler +import xmlreader +import sys, _exceptions, saxlib + +from xml.Uri import Absolutize, MakeUrllibSafe,IsAbsolute + +try: + _StringTypes = [types.StringType, types.UnicodeType] +except AttributeError: # 1.5 compatibility:UnicodeType not defined + _StringTypes = [types.StringType] + +def __dict_replace(s, d): + """Replace substrings of a string using a dictionary.""" + for key, value in d.items(): + s = s.replace(key, value) + return s + +def escape(data, entities={}): + """Escape &, <, and > in a string of data. + + You can escape other strings of data by passing a dictionary as + the optional entities parameter. The keys and values must all be + strings; each key will be replaced with its corresponding value. + """ + data = data.replace("&", "&") + data = data.replace("<", "<") + data = data.replace(">", ">") + if entities: + data = __dict_replace(data, entities) + return data + +def unescape(data, entities={}): + """Unescape &, <, and > in a string of data. + + You can unescape other strings of data by passing a dictionary as + the optional entities parameter. The keys and values must all be + strings; each key will be replaced with its corresponding value. + """ + data = data.replace("<", "<") + data = data.replace(">", ">") + if entities: + data = __dict_replace(data, entities) + # must do ampersand last + return data.replace("&", "&") + +def quoteattr(data, entities={}): + """Escape and quote an attribute value. + + Escape &, <, and > in a string of data, then quote it for use as + an attribute value. The \" character will be escaped as well, if + necessary. + + You can escape other strings of data by passing a dictionary as + the optional entities parameter. The keys and values must all be + strings; each key will be replaced with its corresponding value. + """ + data = escape(data, entities) + if '"' in data: + if "'" in data: + data = '"%s"' % data.replace('"', """) + else: + data = "'%s'" % data + else: + data = '"%s"' % data + return data + +# --- DefaultHandler + +class DefaultHandler(handler.EntityResolver, handler.DTDHandler, + handler.ContentHandler, handler.ErrorHandler): + """Default base class for SAX2 event handlers. Implements empty + methods for all callback methods, which can be overridden by + application implementors. Replaces the deprecated SAX1 HandlerBase + class.""" + +# --- Location + +class Location: + """Represents a location in an XML entity. Initialized by being passed + a locator, from which it reads off the current location, which is then + stored internally.""" + + def __init__(self, locator): + self.__col = locator.getColumnNumber() + self.__line = locator.getLineNumber() + self.__pubid = locator.getPublicId() + self.__sysid = locator.getSystemId() + + def getColumnNumber(self): + return self.__col + + def getLineNumber(self): + return self.__line + + def getPublicId(self): + return self.__pubid + + def getSystemId(self): + return self.__sysid + + def __str__(self): + if self.__line is None: + line = "?" + else: + line = self.__line + if self.__col is None: + col = "?" + else: + col = self.__col + return "%s:%s:%s" % ( + self.__sysid or self.__pubid or "", + line, col) + +# --- ErrorPrinter + +class ErrorPrinter: + "A simple class that just prints error messages to standard out." + + def __init__(self, level=0, outfile=sys.stderr): + self._level = level + self._outfile = outfile + + def warning(self, exception): + if self._level <= 0: + self._outfile.write("WARNING in %s: %s\n" % + (self.__getpos(exception), + exception.getMessage())) + + def error(self, exception): + if self._level <= 1: + self._outfile.write("ERROR in %s: %s\n" % + (self.__getpos(exception), + exception.getMessage())) + + def fatalError(self, exception): + if self._level <= 2: + self._outfile.write("FATAL ERROR in %s: %s\n" % + (self.__getpos(exception), + exception.getMessage())) + + def __getpos(self, exception): + if isinstance(exception, _exceptions.SAXParseException): + return "%s:%s:%s" % (exception.getSystemId(), + exception.getLineNumber(), + exception.getColumnNumber()) + else: + return "" + +# --- ErrorRaiser + +class ErrorRaiser: + "A simple class that just raises the exceptions it is passed." + + def __init__(self, level = 0): + self._level = level + + def error(self, exception): + if self._level <= 1: + raise exception + + def fatalError(self, exception): + if self._level <= 2: + raise exception + + def warning(self, exception): + if self._level <= 0: + raise exception + +# --- AttributesImpl now lives in xmlreader +from xmlreader import AttributesImpl + +# --- XMLGenerator is the SAX2 ContentHandler for writing back XML +import codecs + +def _outputwrapper(stream,encoding): + writerclass = codecs.lookup(encoding)[3] + return writerclass(stream) + +if hasattr(codecs, "register_error"): + def writetext(stream, text, entities={}): + stream.errors = "xmlcharrefreplace" + stream.write(escape(text, entities)) + stream.errors = "strict" +else: + def writetext(stream, text, entities={}): + text = escape(text, entities) + try: + stream.write(text) + except UnicodeError: + for c in text: + try: + stream.write(c) + except UnicodeError: + stream.write("&#%d;" % ord(c)) + +def writeattr(stream, text): + countdouble = text.count('"') + if countdouble: + countsingle = text.count("'") + if countdouble <= countsingle: + entities = {'"': """} + quote = '"' + else: + entities = {"'": "'"} + quote = "'" + else: + entities = {} + quote = '"' + stream.write(quote) + writetext(stream, text, entities) + stream.write(quote) + + +class XMLGenerator(handler.ContentHandler): + GENERATED_PREFIX = "xml.sax.saxutils.prefix%s" + + def __init__(self, out=None, encoding="iso-8859-1"): + if out is None: + import sys + out = sys.stdout + handler.ContentHandler.__init__(self) + self._out = _outputwrapper(out,encoding) + self._ns_contexts = [{}] # contains uri -> prefix dicts + self._current_context = self._ns_contexts[-1] + self._undeclared_ns_maps = [] + self._encoding = encoding + self._generated_prefix_ctr = 0 + return + + # ContentHandler methods + + def startDocument(self): + self._out.write('\n' % + self._encoding) + + def startPrefixMapping(self, prefix, uri): + self._ns_contexts.append(self._current_context.copy()) + self._current_context[uri] = prefix + self._undeclared_ns_maps.append((prefix, uri)) + + def endPrefixMapping(self, prefix): + self._current_context = self._ns_contexts[-1] + del self._ns_contexts[-1] + + def startElement(self, name, attrs): + self._out.write('<' + name) + for (name, value) in attrs.items(): + self._out.write(' %s=' % name) + writeattr(self._out, value) + self._out.write('>') + + def endElement(self, name): + self._out.write('' % name) + + def startElementNS(self, name, qname, attrs): + if name[0] is None: + name = name[1] + elif self._current_context[name[0]] is None: + # default namespace + name = name[1] + else: + name = self._current_context[name[0]] + ":" + name[1] + self._out.write('<' + name) + + for k,v in self._undeclared_ns_maps: + if k is None: + self._out.write(' xmlns="%s"' % (v or '')) + else: + self._out.write(' xmlns:%s="%s"' % (k,v)) + self._undeclared_ns_maps = [] + + for (name, value) in attrs.items(): + if name[0] is None: + name = name[1] + elif self._current_context[name[0]] is None: + # default namespace + #If an attribute has a nsuri but not a prefix, we must + #create a prefix and add a nsdecl + prefix = self.GENERATED_PREFIX % self._generated_prefix_ctr + self._generated_prefix_ctr = self._generated_prefix_ctr + 1 + name = prefix + ':' + name[1] + self._out.write(' xmlns:%s=%s' % (prefix, quoteattr(name[0]))) + self._current_context[name[0]] = prefix + else: + name = self._current_context[name[0]] + ":" + name[1] + self._out.write(' %s=' % name) + writeattr(self._out, value) + self._out.write('>') + + def endElementNS(self, name, qname): + # XXX: if qname is not None, we better use it. + # Python 2.0b2 requires us to use the recorded prefix for + # name[0], though + if name[0] is None: + qname = name[1] + elif self._current_context[name[0]] is None: + qname = name[1] + else: + qname = self._current_context[name[0]] + ":" + name[1] + self._out.write('' % qname) + + def characters(self, content): + writetext(self._out, content) + + def ignorableWhitespace(self, content): + self._out.write(content) + + def processingInstruction(self, target, data): + self._out.write('' % (target, data)) + + +class LexicalXMLGenerator(XMLGenerator, saxlib.LexicalHandler): + """A XMLGenerator that also supports the LexicalHandler interface""" + + def __init__(self, out=None, encoding="iso-8859-1"): + XMLGenerator.__init__(self, out, encoding) + self._in_cdata = 0 + + def characters(self, content): + if self._in_cdata: + self._out.write(content.replace(']]>', ']]>]]>') + + def comment(self, content): + self._out.write('') + + def startCDATA(self): + self._in_cdata = 1 + self._out.write('') + + +# --- ContentGenerator is the SAX1 DocumentHandler for writing back XML +class ContentGenerator(XMLGenerator): + + def characters(self, str, start, end): + # In SAX1, characters receives start and end; in SAX2, it receives + # a string. For plain strings, we may want to use a buffer object. + return XMLGenerator.characters(self, str[start:start+end]) + +# --- XMLFilterImpl +class XMLFilterBase(saxlib.XMLFilter): + """This class is designed to sit between an XMLReader and the + client application's event handlers. By default, it does nothing + but pass requests up to the reader and events on to the handlers + unmodified, but subclasses can override specific methods to modify + the event stream or the configuration requests as they pass + through.""" + + # ErrorHandler methods + + def error(self, exception): + self._err_handler.error(exception) + + def fatalError(self, exception): + self._err_handler.fatalError(exception) + + def warning(self, exception): + self._err_handler.warning(exception) + + # ContentHandler methods + + def setDocumentLocator(self, locator): + self._cont_handler.setDocumentLocator(locator) + + def startDocument(self): + self._cont_handler.startDocument() + + def endDocument(self): + self._cont_handler.endDocument() + + def startPrefixMapping(self, prefix, uri): + self._cont_handler.startPrefixMapping(prefix, uri) + + def endPrefixMapping(self, prefix): + self._cont_handler.endPrefixMapping(prefix) + + def startElement(self, name, attrs): + self._cont_handler.startElement(name, attrs) + + def endElement(self, name): + self._cont_handler.endElement(name) + + def startElementNS(self, name, qname, attrs): + self._cont_handler.startElementNS(name, qname, attrs) + + def endElementNS(self, name, qname): + self._cont_handler.endElementNS(name, qname) + + def characters(self, content): + self._cont_handler.characters(content) + + def ignorableWhitespace(self, chars): + self._cont_handler.ignorableWhitespace(chars) + + def processingInstruction(self, target, data): + self._cont_handler.processingInstruction(target, data) + + def skippedEntity(self, name): + self._cont_handler.skippedEntity(name) + + # DTDHandler methods + + def notationDecl(self, name, publicId, systemId): + self._dtd_handler.notationDecl(name, publicId, systemId) + + def unparsedEntityDecl(self, name, publicId, systemId, ndata): + self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata) + + # EntityResolver methods + + def resolveEntity(self, publicId, systemId): + return self._ent_handler.resolveEntity(publicId, systemId) + + # XMLReader methods + + def parse(self, source): + self._parent.setContentHandler(self) + self._parent.setErrorHandler(self) + self._parent.setEntityResolver(self) + self._parent.setDTDHandler(self) + self._parent.parse(source) + + def setLocale(self, locale): + self._parent.setLocale(locale) + + def getFeature(self, name): + return self._parent.getFeature(name) + + def setFeature(self, name, state): + self._parent.setFeature(name, state) + + def getProperty(self, name): + return self._parent.getProperty(name) + + def setProperty(self, name, value): + self._parent.setProperty(name, value) + +# FIXME: remove this backward compatibility hack when not needed anymore +XMLFilterImpl = XMLFilterBase + +# --- BaseIncrementalParser + +class BaseIncrementalParser(xmlreader.IncrementalParser): + """This class implements the parse method of the XMLReader + interface using the feed, close and reset methods of the + IncrementalParser interface as a convenience to SAX 2.0 driver + writers.""" + + def parse(self, source): + source = prepare_input_source(source) + self.prepareParser(source) + + self._cont_handler.startDocument() + + # FIXME: what about char-stream? + inf = source.getByteStream() + buffer = inf.read(16384) + while buffer != "": + self.feed(buffer) + buffer = inf.read(16384) + + self.close() + self.reset() + + self._cont_handler.endDocument() + + def prepareParser(self, source): + """This method is called by the parse implementation to allow + the SAX 2.0 driver to prepare itself for parsing.""" + raise NotImplementedError("prepareParser must be overridden!") + +# --- Utility functions + +def prepare_input_source(source, base = ""): + """This function takes an InputSource and an optional base URL and + returns a fully resolved InputSource object ready for reading.""" + + if type(source) in _StringTypes: + source = xmlreader.InputSource(source) + elif hasattr(source, "read"): + f = source + source = xmlreader.InputSource() + source.setByteStream(f) + if hasattr(f, "name"): + source.setSystemId(absolute_system_id(f.name, base)) + + if source.getByteStream() is None: + sysid = absolute_system_id(source.getSystemId(), base) + source.setSystemId(sysid) + f = urllib2.urlopen(sysid) + source.setByteStream(f) + + return source + + +def absolute_system_id(sysid, base=''): + if os.path.exists(sysid): + sysid = 'file:%s' % os.path.abspath(sysid) + elif base: + sysid = Absolutize(sysid, base) + assert IsAbsolute(sysid) + return MakeUrllibSafe(sysid) + +# =========================================================================== +# +# DEPRECATED SAX 1.0 CLASSES +# +# =========================================================================== + +# --- AttributeMap + +class AttributeMap: + """An implementation of AttributeList that takes an (attr,val) hash + and uses it to implement the AttributeList interface.""" + + def __init__(self, map): + self.map=map + + def getLength(self): + return len(self.map.keys()) + + def getName(self, i): + try: + return self.map.keys()[i] + except IndexError,e: + return None + + def getType(self, i): + return "CDATA" + + def getValue(self, i): + try: + if type(i)==types.IntType: + return self.map[self.getName(i)] + else: + return self.map[i] + except KeyError,e: + return None + + def __len__(self): + return len(self.map) + + def __getitem__(self, key): + if type(key)==types.IntType: + return self.map.keys()[key] + else: + return self.map[key] + + def items(self): + return self.map.items() + + def keys(self): + return self.map.keys() + + def has_key(self,key): + return self.map.has_key(key) + + def get(self, key, alternative=None): + return self.map.get(key, alternative) + + def copy(self): + return AttributeMap(self.map.copy()) + + def values(self): + return self.map.values() + +# --- Event broadcasting object + +class EventBroadcaster: + """Takes a list of objects and forwards any method calls received + to all objects in the list. The attribute list holds the list and + can freely be modified by clients.""" + + class Event: + "Helper objects that represent event methods." + + def __init__(self,list,name): + self.list=list + self.name=name + + def __call__(self,*rest): + for obj in self.list: + apply(getattr(obj,self.name), rest) + + def __init__(self,list): + self.list=list + + def __getattr__(self,name): + return self.Event(self.list,name) + + def __repr__(self): + return "" % id(self) + +# --- ESIS document handler +import saxlib +class ESISDocHandler(saxlib.HandlerBase): + "A SAX document handler that produces naive ESIS output." + + def __init__(self,writer=sys.stdout): + self.writer=writer + + def processingInstruction (self,target, remainder): + """Receive an event signalling that a processing instruction + has been found.""" + self.writer.write("?"+target+" "+remainder+"\n") + + def startElement(self,name,amap): + "Receive an event signalling the start of an element." + self.writer.write("("+name+"\n") + for a_name in amap.keys(): + self.writer.write("A"+a_name+" "+amap[a_name]+"\n") + + def endElement(self,name): + "Receive an event signalling the end of an element." + self.writer.write(")"+name+"\n") + + def characters(self,data,start_ix,length): + "Receive an event signalling that character data has been found." + self.writer.write("-"+data[start_ix:start_ix+length]+"\n") + +# --- XML canonizer + +class Canonizer(saxlib.HandlerBase): + "A SAX document handler that produces canonized XML output." + + def __init__(self,writer=sys.stdout): + self.elem_level=0 + self.writer=writer + + def processingInstruction (self,target, remainder): + if not target=="xml": + self.writer.write("") + + def startElement(self,name,amap): + self.writer.write("<"+name) + + a_names=amap.keys() + a_names.sort() + + for a_name in a_names: + self.writer.write(" "+a_name+"=\"") + self.write_data(amap[a_name]) + self.writer.write("\"") + self.writer.write(">") + self.elem_level=self.elem_level+1 + + def endElement(self,name): + self.writer.write("") + self.elem_level=self.elem_level-1 + + def ignorableWhitespace(self,data,start_ix,length): + self.characters(data,start_ix,length) + + def characters(self,data,start_ix,length): + if self.elem_level>0: + self.write_data(data[start_ix:start_ix+length]) + + def write_data(self,data): + "Writes datachars to writer." + data=data.replace("&","&") + data=data.replace("<","<") + data=data.replace("\"",""") + data=data.replace(">",">") + data=data.replace(chr(9)," ") + data=data.replace(chr(10)," ") + data=data.replace(chr(13)," ") + self.writer.write(data) + +# --- mllib + +class mllib: + """A re-implementation of the htmllib, sgmllib and xmllib interfaces as a + SAX DocumentHandler.""" + +# Unsupported: +# - setnomoretags +# - setliteral +# - translate_references +# - handle_xml +# - handle_doctype +# - handle_charref +# - handle_entityref +# - handle_comment +# - handle_cdata +# - tag_attributes + + def __init__(self): + self.reset() + + def reset(self): + import saxexts # only used here + self.parser=saxexts.XMLParserFactory.make_parser() + self.handler=mllib.Handler(self.parser,self) + self.handler.reset() + + def feed(self,data): + self.parser.feed(data) + + def close(self): + self.parser.close() + + def get_stack(self): + return self.handler.get_stack() + + # --- Handler methods (to be overridden) + + def handle_starttag(self,name,method,atts): + method(atts) + + def handle_endtag(self,name,method): + method() + + def handle_data(self,data): + pass + + def handle_proc(self,target,data): + pass + + def unknown_starttag(self,name,atts): + pass + + def unknown_endtag(self,name): + pass + + def syntax_error(self,message): + pass + + # --- The internal handler class + + class Handler(saxlib.DocumentHandler,saxlib.ErrorHandler): + """An internal class to handle SAX events and translate them to mllib + events.""" + + def __init__(self,driver,handler): + self.driver=driver + self.driver.setDocumentHandler(self) + self.driver.setErrorHandler(self) + self.handler=handler + self.reset() + + def get_stack(self): + return self.stack + + def reset(self): + self.stack=[] + + # --- DocumentHandler methods + + def characters(self, ch, start, length): + self.handler.handle_data(ch[start:start+length]) + + def endElement(self, name): + if hasattr(self.handler,"end_"+name): + self.handler.handle_endtag(name, + getattr(self.handler,"end_"+name)) + else: + self.handler.unknown_endtag(name) + + del self.stack[-1] + + def ignorableWhitespace(self, ch, start, length): + self.handler.handle_data(ch[start:start+length]) + + def processingInstruction(self, target, data): + self.handler.handle_proc(target,data) + + def startElement(self, name, atts): + self.stack.append(name) + + if hasattr(self.handler,"start_"+name): + self.handler.handle_starttag(name, + getattr(self.handler, + "start_"+name), + atts) + else: + self.handler.unknown_starttag(name,atts) + + # --- ErrorHandler methods + + def error(self, exception): + self.handler.syntax_error(str(exception)) + + def fatalError(self, exception): + raise RuntimeError(str(exception)) diff --git a/extensions/jython/module/MOD-INF/lib/jython/xml/sax/xmlreader.py b/extensions/jython/module/MOD-INF/lib/jython/xml/sax/xmlreader.py new file mode 100644 index 000000000..f1c9d9db2 --- /dev/null +++ b/extensions/jython/module/MOD-INF/lib/jython/xml/sax/xmlreader.py @@ -0,0 +1,378 @@ +"""An XML Reader is the SAX 2 name for an XML parser. XML Parsers +should be based on this code. """ + +import handler + +from _exceptions import SAXNotSupportedException, SAXNotRecognizedException + + +# ===== XMLREADER ===== + +class XMLReader: + """Interface for reading an XML document using callbacks. + + XMLReader is the interface that an XML parser's SAX2 driver must + implement. This interface allows an application to set and query + features and properties in the parser, to register event handlers + for document processing, and to initiate a document parse. + + All SAX interfaces are assumed to be synchronous: the parse + methods must not return until parsing is complete, and readers + must wait for an event-handler callback to return before reporting + the next event.""" + + def __init__(self): + self._cont_handler = handler.ContentHandler() + self._dtd_handler = handler.DTDHandler() + self._ent_handler = handler.EntityResolver() + self._err_handler = handler.ErrorHandler() + + def parse(self, source): + "Parse an XML document from a system identifier or an InputSource." + raise NotImplementedError("This method must be implemented!") + + def getContentHandler(self): + "Returns the current ContentHandler." + return self._cont_handler + + def setContentHandler(self, handler): + "Registers a new object to receive document content events." + self._cont_handler = handler + + def getDTDHandler(self): + "Returns the current DTD handler." + return self._dtd_handler + + def setDTDHandler(self, handler): + "Register an object to receive basic DTD-related events." + self._dtd_handler = handler + + def getEntityResolver(self): + "Returns the current EntityResolver." + return self._ent_handler + + def setEntityResolver(self, resolver): + "Register an object to resolve external entities." + self._ent_handler = resolver + + def getErrorHandler(self): + "Returns the current ErrorHandler." + return self._err_handler + + def setErrorHandler(self, handler): + "Register an object to receive error-message events." + self._err_handler = handler + + def setLocale(self, locale): + """Allow an application to set the locale for errors and warnings. + + SAX parsers are not required to provide localization for errors + and warnings; if they cannot support the requested locale, + however, they must throw a SAX exception. Applications may + request a locale change in the middle of a parse.""" + raise SAXNotSupportedException("Locale support not implemented") + + def getFeature(self, name): + "Looks up and returns the state of a SAX2 feature." + raise SAXNotRecognizedException("Feature '%s' not recognized" % name) + + def setFeature(self, name, state): + "Sets the state of a SAX2 feature." + raise SAXNotRecognizedException("Feature '%s' not recognized" % name) + + def getProperty(self, name): + "Looks up and returns the value of a SAX2 property." + raise SAXNotRecognizedException("Property '%s' not recognized" % name) + + def setProperty(self, name, value): + "Sets the value of a SAX2 property." + raise SAXNotRecognizedException("Property '%s' not recognized" % name) + +class IncrementalParser(XMLReader): + """This interface adds three extra methods to the XMLReader + interface that allow XML parsers to support incremental + parsing. Support for this interface is optional, since not all + underlying XML parsers support this functionality. + + When the parser is instantiated it is ready to begin accepting + data from the feed method immediately. After parsing has been + finished with a call to close the reset method must be called to + make the parser ready to accept new data, either from feed or + using the parse method. + + Note that these methods must _not_ be called during parsing, that + is, after parse has been called and before it returns. + + By default, the class also implements the parse method of the XMLReader + interface using the feed, close and reset methods of the + IncrementalParser interface as a convenience to SAX 2.0 driver + writers.""" + + def __init__(self, bufsize=2**16): + self._bufsize = bufsize + XMLReader.__init__(self) + + def parse(self, source): + import saxutils + source = saxutils.prepare_input_source(source) + + self.prepareParser(source) + file = source.getByteStream() + buffer = file.read(self._bufsize) + while buffer != "": + self.feed(buffer) + buffer = file.read(self._bufsize) + self.close() + + def feed(self, data): + """This method gives the raw XML data in the data parameter to + the parser and makes it parse the data, emitting the + corresponding events. It is allowed for XML constructs to be + split across several calls to feed. + + feed may raise SAXException.""" + raise NotImplementedError("This method must be implemented!") + + def prepareParser(self, source): + """This method is called by the parse implementation to allow + the SAX 2.0 driver to prepare itself for parsing.""" + raise NotImplementedError("prepareParser must be overridden!") + + def close(self): + """This method is called when the entire XML document has been + passed to the parser through the feed method, to notify the + parser that there are no more data. This allows the parser to + do the final checks on the document and empty the internal + data buffer. + + The parser will not be ready to parse another document until + the reset method has been called. + + close may raise SAXException.""" + raise NotImplementedError("This method must be implemented!") + + def reset(self): + """This method is called after close has been called to reset + the parser so that it is ready to parse new documents. The + results of calling parse or feed after close without calling + reset are undefined.""" + raise NotImplementedError("This method must be implemented!") + +# ===== LOCATOR ===== + +class Locator: + """Interface for associating a SAX event with a document + location. A locator object will return valid results only during + calls to DocumentHandler methods; at any other time, the + results are unpredictable.""" + + def getColumnNumber(self): + "Return the column number where the current event ends." + return -1 + + def getLineNumber(self): + "Return the line number where the current event ends." + return -1 + + def getPublicId(self): + "Return the public identifier for the current event." + return None + + def getSystemId(self): + "Return the system identifier for the current event." + return None + +# ===== INPUTSOURCE ===== + +class InputSource: + """Encapsulation of the information needed by the XMLReader to + read entities. + + This class may include information about the public identifier, + system identifier, byte stream (possibly with character encoding + information) and/or the character stream of an entity. + + Applications will create objects of this class for use in the + XMLReader.parse method and for returning from + EntityResolver.resolveEntity. + + An InputSource belongs to the application, the XMLReader is not + allowed to modify InputSource objects passed to it from the + application, although it may make copies and modify those.""" + + def __init__(self, system_id = None): + self.__system_id = system_id + self.__public_id = None + self.__encoding = None + self.__bytefile = None + self.__charfile = None + + def setPublicId(self, public_id): + "Sets the public identifier of this InputSource." + self.__public_id = public_id + + def getPublicId(self): + "Returns the public identifier of this InputSource." + return self.__public_id + + def setSystemId(self, system_id): + "Sets the system identifier of this InputSource." + self.__system_id = system_id + + def getSystemId(self): + "Returns the system identifier of this InputSource." + return self.__system_id + + def setEncoding(self, encoding): + """Sets the character encoding of this InputSource. + + The encoding must be a string acceptable for an XML encoding + declaration (see section 4.3.3 of the XML recommendation). + + The encoding attribute of the InputSource is ignored if the + InputSource also contains a character stream.""" + self.__encoding = encoding + + def getEncoding(self): + "Get the character encoding of this InputSource." + return self.__encoding + + def setByteStream(self, bytefile): + """Set the byte stream (a Python file-like object which does + not perform byte-to-character conversion) for this input + source. + + The SAX parser will ignore this if there is also a character + stream specified, but it will use a byte stream in preference + to opening a URI connection itself. + + If the application knows the character encoding of the byte + stream, it should set it with the setEncoding method.""" + self.__bytefile = bytefile + + def getByteStream(self): + """Get the byte stream for this input source. + + The getEncoding method will return the character encoding for + this byte stream, or None if unknown.""" + return self.__bytefile + + def setCharacterStream(self, charfile): + """Set the character stream for this input source. (The stream + must be a Python 2.0 Unicode-wrapped file-like that performs + conversion to Unicode strings.) + + If there is a character stream specified, the SAX parser will + ignore any byte stream and will not attempt to open a URI + connection to the system identifier.""" + self.__charfile = charfile + + def getCharacterStream(self): + "Get the character stream for this input source." + return self.__charfile + +# ===== ATTRIBUTESIMPL ===== + +class AttributesImpl: + + def __init__(self, attrs): + """Non-NS-aware implementation. + + attrs should be of the form {name : value}.""" + self._attrs = attrs + + def getLength(self): + return len(self._attrs) + + def getType(self, name): + return "CDATA" + + def getValue(self, name): + return self._attrs[name] + + def getValueByQName(self, name): + return self._attrs[name] + + def getNameByQName(self, name): + if not self._attrs.has_key(name): + raise KeyError, name + return name + + def getQNameByName(self, name): + if not self._attrs.has_key(name): + raise KeyError, name + return name + + def getNames(self): + return self._attrs.keys() + + def getQNames(self): + return self._attrs.keys() + + def __len__(self): + return len(self._attrs) + + def __getitem__(self, name): + return self._attrs[name] + + def keys(self): + return self._attrs.keys() + + def has_key(self, name): + return self._attrs.has_key(name) + + def get(self, name, alternative=None): + return self._attrs.get(name, alternative) + + def copy(self): + return self.__class__(self._attrs) + + def items(self): + return self._attrs.items() + + def values(self): + return self._attrs.values() + +# ===== ATTRIBUTESNSIMPL ===== + +class AttributesNSImpl(AttributesImpl): + + def __init__(self, attrs, qnames): + """NS-aware implementation. + + attrs should be of the form {(ns_uri, lname): value, ...}. + qnames of the form {(ns_uri, lname): qname, ...}.""" + self._attrs = attrs + self._qnames = qnames + + def getValueByQName(self, name): + for (nsname, qname) in self._qnames.items(): + if qname == name: + return self._attrs[nsname] + + raise KeyError, name + + def getNameByQName(self, name): + for (nsname, qname) in self._qnames.items(): + if qname == name: + return nsname + + raise KeyError, name + + def getQNameByName(self, name): + return self._qnames[name] + + def getQNames(self): + return self._qnames.values() + + def copy(self): + return self.__class__(self._attrs, self._qnames) + + +def _test(): + XMLReader() + IncrementalParser() + Locator() + +if __name__ == "__main__": + _test()