# # ElementTree # $Id: ElementPath.py 1858 2004-06-17 21:31:41Z Fredrik $ # # limited xpath support for element trees # # history: # 2003-05-23 fl created # 2003-05-28 fl added support for // etc # 2003-08-27 fl fixed parsing of periods in element names # # Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved. # # fredrik@pythonware.com # http://www.pythonware.com # # -------------------------------------------------------------------- # The ElementTree toolkit is # # Copyright (c) 1999-2004 by Fredrik Lundh # # By obtaining, using, and/or copying this software and/or its # associated documentation, you agree that you have read, understood, # and will comply with the following terms and conditions: # # Permission to use, copy, modify, and distribute this software and # its associated documentation for any purpose and without fee is # hereby granted, provided that the above copyright notice appears in # all copies, and that both that copyright notice and this permission # notice appear in supporting documentation, and that the name of # Secret Labs AB or the author not be used in advertising or publicity # pertaining to distribution of the software without specific, written # prior permission. # # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE # OF THIS SOFTWARE. # -------------------------------------------------------------------- # Licensed to PSF under a Contributor Agreement. # See http://www.python.org/2.4/license for licensing details. ## # Implementation module for XPath support. There's usually no reason # to import this module directly; the ElementTree does this for # you, if needed. ## import re xpath_tokenizer = re.compile( "(::|\.\.|\(\)|[/.*:\[\]\(\)@=])|((?:\{[^}]+\})?[^/:\[\]\(\)@=\s]+)|\s+" ).findall class xpath_descendant_or_self: pass ## # Wrapper for a compiled XPath. class Path: ## # Create an Path instance from an XPath expression. def __init__(self, path): tokens = xpath_tokenizer(path) # the current version supports 'path/path'-style expressions only self.path = [] self.tag = None if tokens and tokens[0][0] == "/": raise SyntaxError("cannot use absolute path on element") while tokens: op, tag = tokens.pop(0) if tag or op == "*": self.path.append(tag or op) elif op == ".": pass elif op == "/": self.path.append(xpath_descendant_or_self()) continue else: raise SyntaxError("unsupported path syntax (%s)" % op) if tokens: op, tag = tokens.pop(0) if op != "/": raise SyntaxError( "expected path separator (%s)" % (op or tag) ) if self.path and isinstance(self.path[-1], xpath_descendant_or_self): raise SyntaxError("path cannot end with //") if len(self.path) == 1 and isinstance(self.path[0], type("")): self.tag = self.path[0] ## # Find first matching object. def find(self, element): tag = self.tag if tag is None: nodeset = self.findall(element) if not nodeset: return None return nodeset[0] for elem in element: if elem.tag == tag: return elem return None ## # Find text for first matching object. def findtext(self, element, default=None): tag = self.tag if tag is None: nodeset = self.findall(element) if not nodeset: return default return nodeset[0].text or "" for elem in element: if elem.tag == tag: return elem.text or "" return default ## # Find all matching objects. def findall(self, element): nodeset = [element] index = 0 while 1: try: path = self.path[index] index = index + 1 except IndexError: return nodeset set = [] if isinstance(path, xpath_descendant_or_self): try: tag = self.path[index] if not isinstance(tag, type("")): tag = None else: index = index + 1 except IndexError: tag = None # invalid path for node in nodeset: new = list(node.getiterator(tag)) if new and new[0] is node: set.extend(new[1:]) else: set.extend(new) else: for node in nodeset: for node in node: if path == "*" or node.tag == path: set.append(node) if not set: return [] nodeset = set _cache = {} ## # (Internal) Compile path. def _compile(path): p = _cache.get(path) if p is not None: return p p = Path(path) if len(_cache) >= 100: _cache.clear() _cache[path] = p return p ## # Find first matching object. def find(element, path): return _compile(path).find(element) ## # Find text for first matching object. def findtext(element, path, default=None): return _compile(path).findtext(element, default) ## # Find all matching objects. def findall(element, path): return _compile(path).findall(element)