# Copyright (c) 2014, Menno Smits # Released subject to the New BSD License # Please see http://en.wikipedia.org/wiki/BSD_licenses """ A lexical analyzer class for IMAP responses. Although Lexer does all the work, TokenSource is the class to use for external callers. """ from __future__ import unicode_literals import six from .util import assert_imap_protocol __all__ = ["TokenSource"] CTRL_CHARS = frozenset(c for c in range(32)) ALL_CHARS = frozenset(c for c in range(256)) SPECIALS = frozenset(c for c in six.iterbytes(b' ()%"[')) NON_SPECIALS = ALL_CHARS - SPECIALS - CTRL_CHARS WHITESPACE = frozenset(c for c in six.iterbytes(b' \t\r\n')) BACKSLASH = ord('\\') OPEN_SQUARE = ord('[') CLOSE_SQUARE = ord(']') DOUBLE_QUOTE = ord('"') class TokenSource(object): """ A simple iterator for the Lexer class that also provides access to the current IMAP literal. """ def __init__(self, text): self.lex = Lexer(text) self.src = iter(self.lex) @property def current_literal(self): return self.lex.current_source.literal def __iter__(self): return self.src class Lexer(object): """ A lexical analyzer class for IMAP """ def __init__(self, text): self.sources = (LiteralHandlingIter(self, chunk) for chunk in text) self.current_source = None def read_until(self, stream_i, end_char, escape=True): token = bytearray() try: for nextchar in stream_i: if escape and nextchar == BACKSLASH: escaper = nextchar nextchar = six.next(stream_i) if nextchar != escaper and nextchar != end_char: token.append(escaper) # Don't touch invalid escaping elif nextchar == end_char: break token.append(nextchar) else: raise ValueError("No closing '%s'" % chr(end_char)) except StopIteration: raise ValueError("No closing '%s'" % chr(end_char)) token.append(end_char) return token def read_token_stream(self, stream_i): whitespace = WHITESPACE wordchars = NON_SPECIALS read_until = self.read_until while True: # Whitespace for nextchar in stream_i: if nextchar not in whitespace: stream_i.push(nextchar) break # done skipping over the whitespace # Non-whitespace token = bytearray() for nextchar in stream_i: if nextchar in wordchars: token.append(nextchar) elif nextchar == OPEN_SQUARE: token.append(nextchar) token.extend(read_until(stream_i, CLOSE_SQUARE, escape=False)) else: if nextchar in whitespace: yield token elif nextchar == DOUBLE_QUOTE: assert_imap_protocol(not token) token.append(nextchar) token.extend(read_until(stream_i, nextchar)) yield token else: # Other punctuation, eg. "(". This ends the current token. if token: yield token yield bytearray([nextchar]) break else: if token: yield token break def __iter__(self): for source in self.sources: self.current_source = source for tok in self.read_token_stream(iter(source)): yield bytes(tok) # imaplib has poor handling of 'literals' - it both fails to remove the # {size} marker, and fails to keep responses grouped into the same logical # 'line'. What we end up with is a list of response 'records', where each # record is either a simple string, or tuple of (str_with_lit, literal) - # where str_with_lit is a string with the {xxx} marker at its end. Note # that each element of this list does *not* correspond 1:1 with the # untagged responses. # (http://bugs.python.org/issue5045 also has comments about this) # So: we have a special object for each of these records. When a # string literal is processed, we peek into this object to grab the # literal. class LiteralHandlingIter: def __init__(self, lexer, resp_record): self.lexer = lexer if isinstance(resp_record, tuple): # A 'record' with a string which includes a literal marker, and # the literal itself. self.src_text = resp_record[0] assert_imap_protocol(self.src_text.endswith(b"}"), self.src_text) self.literal = resp_record[1] else: # just a line with no literals. self.src_text = resp_record self.literal = None def __iter__(self): return PushableIterator(six.iterbytes(self.src_text)) class PushableIterator(object): NO_MORE = object() def __init__(self, it): self.it = iter(it) self.pushed = [] def __iter__(self): return self def __next__(self): if self.pushed: return self.pushed.pop() return six.next(self.it) # For Python 2 compatibility next = __next__ def push(self, item): self.pushed.append(item)