Pracownia_programowania/venv/Lib/site-packages/Cython/Plex/Traditional.py

159 lines
4.0 KiB
Python
Raw Normal View History

2020-02-01 19:54:00 +01:00
#=======================================================================
#
# Python Lexical Analyser
#
# Traditional Regular Expression Syntax
#
#=======================================================================
from __future__ import absolute_import
from .Regexps import Alt, Seq, Rep, Rep1, Opt, Any, AnyBut, Bol, Eol, Char
from .Errors import PlexError
class RegexpSyntaxError(PlexError):
pass
def re(s):
"""
Convert traditional string representation of regular expression |s|
into Plex representation.
"""
return REParser(s).parse_re()
class REParser(object):
def __init__(self, s):
self.s = s
self.i = -1
self.end = 0
self.next()
def parse_re(self):
re = self.parse_alt()
if not self.end:
self.error("Unexpected %s" % repr(self.c))
return re
def parse_alt(self):
"""Parse a set of alternative regexps."""
re = self.parse_seq()
if self.c == '|':
re_list = [re]
while self.c == '|':
self.next()
re_list.append(self.parse_seq())
re = Alt(*re_list)
return re
def parse_seq(self):
"""Parse a sequence of regexps."""
re_list = []
while not self.end and not self.c in "|)":
re_list.append(self.parse_mod())
return Seq(*re_list)
def parse_mod(self):
"""Parse a primitive regexp followed by *, +, ? modifiers."""
re = self.parse_prim()
while not self.end and self.c in "*+?":
if self.c == '*':
re = Rep(re)
elif self.c == '+':
re = Rep1(re)
else: # self.c == '?'
re = Opt(re)
self.next()
return re
def parse_prim(self):
"""Parse a primitive regexp."""
c = self.get()
if c == '.':
re = AnyBut("\n")
elif c == '^':
re = Bol
elif c == '$':
re = Eol
elif c == '(':
re = self.parse_alt()
self.expect(')')
elif c == '[':
re = self.parse_charset()
self.expect(']')
else:
if c == '\\':
c = self.get()
re = Char(c)
return re
def parse_charset(self):
"""Parse a charset. Does not include the surrounding []."""
char_list = []
invert = 0
if self.c == '^':
invert = 1
self.next()
if self.c == ']':
char_list.append(']')
self.next()
while not self.end and self.c != ']':
c1 = self.get()
if self.c == '-' and self.lookahead(1) != ']':
self.next()
c2 = self.get()
for a in range(ord(c1), ord(c2) + 1):
char_list.append(chr(a))
else:
char_list.append(c1)
chars = ''.join(char_list)
if invert:
return AnyBut(chars)
else:
return Any(chars)
def next(self):
"""Advance to the next char."""
s = self.s
i = self.i = self.i + 1
if i < len(s):
self.c = s[i]
else:
self.c = ''
self.end = 1
def get(self):
if self.end:
self.error("Premature end of string")
c = self.c
self.next()
return c
def lookahead(self, n):
"""Look ahead n chars."""
j = self.i + n
if j < len(self.s):
return self.s[j]
else:
return ''
def expect(self, c):
"""
Expect to find character |c| at current position.
Raises an exception otherwise.
"""
if self.c == c:
self.next()
else:
self.error("Missing %s" % repr(c))
def error(self, mess):
"""Raise exception to signal syntax error in regexp."""
raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
repr(self.s), self.i, mess))