Pracownia_programowania/venv/Lib/site-packages/Cython/Plex/Traditional.py

#=======================================================================
#
#   Python Lexical Analyser
#
#   Traditional Regular Expression Syntax
#
#=======================================================================

from __future__ import absolute_import

from .Regexps import Alt, Seq, Rep, Rep1, Opt, Any, AnyBut, Bol, Eol, Char
from .Errors import PlexError


class RegexpSyntaxError(PlexError):
    pass


def re(s):
    """
    Convert traditional string representation of regular expression |s|
    into Plex representation.
    """
    return REParser(s).parse_re()


class REParser(object):
    def __init__(self, s):
        self.s = s
        self.i = -1
        self.end = 0
        self.next()

    def parse_re(self):
        re = self.parse_alt()
        if not self.end:
            self.error("Unexpected %s" % repr(self.c))
        return re

    def parse_alt(self):
        """Parse a set of alternative regexps."""
        re = self.parse_seq()
        if self.c == '|':
            re_list = [re]
            while self.c == '|':
                self.next()
                re_list.append(self.parse_seq())
            re = Alt(*re_list)
        return re

    def parse_seq(self):
        """Parse a sequence of regexps."""
        re_list = []
        while not self.end and not self.c in "|)":
            re_list.append(self.parse_mod())
        return Seq(*re_list)

    def parse_mod(self):
        """Parse a primitive regexp followed by *, +, ? modifiers."""
        re = self.parse_prim()
        while not self.end and self.c in "*+?":
            if self.c == '*':
                re = Rep(re)
            elif self.c == '+':
                re = Rep1(re)
            else:  # self.c == '?'
                re = Opt(re)
            self.next()
        return re

    def parse_prim(self):
        """Parse a primitive regexp."""
        c = self.get()
        if c == '.':
            re = AnyBut("\n")
        elif c == '^':
            re = Bol
        elif c == '$':
            re = Eol
        elif c == '(':
            re = self.parse_alt()
            self.expect(')')
        elif c == '[':
            re = self.parse_charset()
            self.expect(']')
        else:
            if c == '\\':
                c = self.get()
            re = Char(c)
        return re

    def parse_charset(self):
        """Parse a charset. Does not include the surrounding []."""
        char_list = []
        invert = 0
        if self.c == '^':
            invert = 1
            self.next()
        if self.c == ']':
            char_list.append(']')
            self.next()
        while not self.end and self.c != ']':
            c1 = self.get()
            if self.c == '-' and self.lookahead(1) != ']':
                self.next()
                c2 = self.get()
                for a in range(ord(c1), ord(c2) + 1):
                    char_list.append(chr(a))
            else:
                char_list.append(c1)
        chars = ''.join(char_list)
        if invert:
            return AnyBut(chars)
        else:
            return Any(chars)

    def next(self):
        """Advance to the next char."""
        s = self.s
        i = self.i = self.i + 1
        if i < len(s):
            self.c = s[i]
        else:
            self.c = ''
            self.end = 1

    def get(self):
        if self.end:
            self.error("Premature end of string")
        c = self.c
        self.next()
        return c

    def lookahead(self, n):
        """Look ahead n chars."""
        j = self.i + n
        if j < len(self.s):
            return self.s[j]
        else:
            return ''

    def expect(self, c):
        """
        Expect to find character |c| at current position.
        Raises an exception otherwise.
        """
        if self.c == c:
            self.next()
        else:
            self.error("Missing %s" % repr(c))

    def error(self, mess):
        """Raise exception to signal syntax error in regexp."""
        raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
            repr(self.s), self.i, mess))