399 lines
12 KiB
Python
399 lines
12 KiB
Python
|
from fontTools.misc.textTools import bytechr, byteord, bytesjoin, tobytes, tostr
|
||
|
from fontTools.misc import eexec
|
||
|
from .psOperators import (
|
||
|
PSOperators,
|
||
|
ps_StandardEncoding,
|
||
|
ps_array,
|
||
|
ps_boolean,
|
||
|
ps_dict,
|
||
|
ps_integer,
|
||
|
ps_literal,
|
||
|
ps_mark,
|
||
|
ps_name,
|
||
|
ps_operator,
|
||
|
ps_procedure,
|
||
|
ps_procmark,
|
||
|
ps_real,
|
||
|
ps_string,
|
||
|
)
|
||
|
import re
|
||
|
from collections.abc import Callable
|
||
|
from string import whitespace
|
||
|
import logging
|
||
|
|
||
|
|
||
|
log = logging.getLogger(__name__)
|
||
|
|
||
|
ps_special = b"()<>[]{}%" # / is one too, but we take care of that one differently
|
||
|
|
||
|
skipwhiteRE = re.compile(bytesjoin([b"[", whitespace, b"]*"]))
|
||
|
endofthingPat = bytesjoin([b"[^][(){}<>/%", whitespace, b"]*"])
|
||
|
endofthingRE = re.compile(endofthingPat)
|
||
|
commentRE = re.compile(b"%[^\n\r]*")
|
||
|
|
||
|
# XXX This not entirely correct as it doesn't allow *nested* embedded parens:
|
||
|
stringPat = rb"""
|
||
|
\(
|
||
|
(
|
||
|
(
|
||
|
[^()]* \ [()]
|
||
|
)
|
||
|
|
|
||
|
(
|
||
|
[^()]* \( [^()]* \)
|
||
|
)
|
||
|
)*
|
||
|
[^()]*
|
||
|
\)
|
||
|
"""
|
||
|
stringPat = b"".join(stringPat.split())
|
||
|
stringRE = re.compile(stringPat)
|
||
|
|
||
|
hexstringRE = re.compile(bytesjoin([b"<[", whitespace, b"0-9A-Fa-f]*>"]))
|
||
|
|
||
|
|
||
|
class PSTokenError(Exception):
|
||
|
pass
|
||
|
|
||
|
|
||
|
class PSError(Exception):
|
||
|
pass
|
||
|
|
||
|
|
||
|
class PSTokenizer(object):
|
||
|
def __init__(self, buf=b"", encoding="ascii"):
|
||
|
# Force self.buf to be a byte string
|
||
|
buf = tobytes(buf)
|
||
|
self.buf = buf
|
||
|
self.len = len(buf)
|
||
|
self.pos = 0
|
||
|
self.closed = False
|
||
|
self.encoding = encoding
|
||
|
|
||
|
def read(self, n=-1):
|
||
|
"""Read at most 'n' bytes from the buffer, or less if the read
|
||
|
hits EOF before obtaining 'n' bytes.
|
||
|
If 'n' is negative or omitted, read all data until EOF is reached.
|
||
|
"""
|
||
|
if self.closed:
|
||
|
raise ValueError("I/O operation on closed file")
|
||
|
if n is None or n < 0:
|
||
|
newpos = self.len
|
||
|
else:
|
||
|
newpos = min(self.pos + n, self.len)
|
||
|
r = self.buf[self.pos : newpos]
|
||
|
self.pos = newpos
|
||
|
return r
|
||
|
|
||
|
def close(self):
|
||
|
if not self.closed:
|
||
|
self.closed = True
|
||
|
del self.buf, self.pos
|
||
|
|
||
|
def getnexttoken(
|
||
|
self,
|
||
|
# localize some stuff, for performance
|
||
|
len=len,
|
||
|
ps_special=ps_special,
|
||
|
stringmatch=stringRE.match,
|
||
|
hexstringmatch=hexstringRE.match,
|
||
|
commentmatch=commentRE.match,
|
||
|
endmatch=endofthingRE.match,
|
||
|
):
|
||
|
self.skipwhite()
|
||
|
if self.pos >= self.len:
|
||
|
return None, None
|
||
|
pos = self.pos
|
||
|
buf = self.buf
|
||
|
char = bytechr(byteord(buf[pos]))
|
||
|
if char in ps_special:
|
||
|
if char in b"{}[]":
|
||
|
tokentype = "do_special"
|
||
|
token = char
|
||
|
elif char == b"%":
|
||
|
tokentype = "do_comment"
|
||
|
_, nextpos = commentmatch(buf, pos).span()
|
||
|
token = buf[pos:nextpos]
|
||
|
elif char == b"(":
|
||
|
tokentype = "do_string"
|
||
|
m = stringmatch(buf, pos)
|
||
|
if m is None:
|
||
|
raise PSTokenError("bad string at character %d" % pos)
|
||
|
_, nextpos = m.span()
|
||
|
token = buf[pos:nextpos]
|
||
|
elif char == b"<":
|
||
|
tokentype = "do_hexstring"
|
||
|
m = hexstringmatch(buf, pos)
|
||
|
if m is None:
|
||
|
raise PSTokenError("bad hexstring at character %d" % pos)
|
||
|
_, nextpos = m.span()
|
||
|
token = buf[pos:nextpos]
|
||
|
else:
|
||
|
raise PSTokenError("bad token at character %d" % pos)
|
||
|
else:
|
||
|
if char == b"/":
|
||
|
tokentype = "do_literal"
|
||
|
m = endmatch(buf, pos + 1)
|
||
|
else:
|
||
|
tokentype = ""
|
||
|
m = endmatch(buf, pos)
|
||
|
if m is None:
|
||
|
raise PSTokenError("bad token at character %d" % pos)
|
||
|
_, nextpos = m.span()
|
||
|
token = buf[pos:nextpos]
|
||
|
self.pos = pos + len(token)
|
||
|
token = tostr(token, encoding=self.encoding)
|
||
|
return tokentype, token
|
||
|
|
||
|
def skipwhite(self, whitematch=skipwhiteRE.match):
|
||
|
_, nextpos = whitematch(self.buf, self.pos).span()
|
||
|
self.pos = nextpos
|
||
|
|
||
|
def starteexec(self):
|
||
|
self.pos = self.pos + 1
|
||
|
self.dirtybuf = self.buf[self.pos :]
|
||
|
self.buf, R = eexec.decrypt(self.dirtybuf, 55665)
|
||
|
self.len = len(self.buf)
|
||
|
self.pos = 4
|
||
|
|
||
|
def stopeexec(self):
|
||
|
if not hasattr(self, "dirtybuf"):
|
||
|
return
|
||
|
self.buf = self.dirtybuf
|
||
|
del self.dirtybuf
|
||
|
|
||
|
|
||
|
class PSInterpreter(PSOperators):
|
||
|
def __init__(self, encoding="ascii"):
|
||
|
systemdict = {}
|
||
|
userdict = {}
|
||
|
self.encoding = encoding
|
||
|
self.dictstack = [systemdict, userdict]
|
||
|
self.stack = []
|
||
|
self.proclevel = 0
|
||
|
self.procmark = ps_procmark()
|
||
|
self.fillsystemdict()
|
||
|
|
||
|
def fillsystemdict(self):
|
||
|
systemdict = self.dictstack[0]
|
||
|
systemdict["["] = systemdict["mark"] = self.mark = ps_mark()
|
||
|
systemdict["]"] = ps_operator("]", self.do_makearray)
|
||
|
systemdict["true"] = ps_boolean(1)
|
||
|
systemdict["false"] = ps_boolean(0)
|
||
|
systemdict["StandardEncoding"] = ps_array(ps_StandardEncoding)
|
||
|
systemdict["FontDirectory"] = ps_dict({})
|
||
|
self.suckoperators(systemdict, self.__class__)
|
||
|
|
||
|
def suckoperators(self, systemdict, klass):
|
||
|
for name in dir(klass):
|
||
|
attr = getattr(self, name)
|
||
|
if isinstance(attr, Callable) and name[:3] == "ps_":
|
||
|
name = name[3:]
|
||
|
systemdict[name] = ps_operator(name, attr)
|
||
|
for baseclass in klass.__bases__:
|
||
|
self.suckoperators(systemdict, baseclass)
|
||
|
|
||
|
def interpret(self, data, getattr=getattr):
|
||
|
tokenizer = self.tokenizer = PSTokenizer(data, self.encoding)
|
||
|
getnexttoken = tokenizer.getnexttoken
|
||
|
do_token = self.do_token
|
||
|
handle_object = self.handle_object
|
||
|
try:
|
||
|
while 1:
|
||
|
tokentype, token = getnexttoken()
|
||
|
if not token:
|
||
|
break
|
||
|
if tokentype:
|
||
|
handler = getattr(self, tokentype)
|
||
|
object = handler(token)
|
||
|
else:
|
||
|
object = do_token(token)
|
||
|
if object is not None:
|
||
|
handle_object(object)
|
||
|
tokenizer.close()
|
||
|
self.tokenizer = None
|
||
|
except:
|
||
|
if self.tokenizer is not None:
|
||
|
log.debug(
|
||
|
"ps error:\n"
|
||
|
"- - - - - - -\n"
|
||
|
"%s\n"
|
||
|
">>>\n"
|
||
|
"%s\n"
|
||
|
"- - - - - - -",
|
||
|
self.tokenizer.buf[self.tokenizer.pos - 50 : self.tokenizer.pos],
|
||
|
self.tokenizer.buf[self.tokenizer.pos : self.tokenizer.pos + 50],
|
||
|
)
|
||
|
raise
|
||
|
|
||
|
def handle_object(self, object):
|
||
|
if not (self.proclevel or object.literal or object.type == "proceduretype"):
|
||
|
if object.type != "operatortype":
|
||
|
object = self.resolve_name(object.value)
|
||
|
if object.literal:
|
||
|
self.push(object)
|
||
|
else:
|
||
|
if object.type == "proceduretype":
|
||
|
self.call_procedure(object)
|
||
|
else:
|
||
|
object.function()
|
||
|
else:
|
||
|
self.push(object)
|
||
|
|
||
|
def call_procedure(self, proc):
|
||
|
handle_object = self.handle_object
|
||
|
for item in proc.value:
|
||
|
handle_object(item)
|
||
|
|
||
|
def resolve_name(self, name):
|
||
|
dictstack = self.dictstack
|
||
|
for i in range(len(dictstack) - 1, -1, -1):
|
||
|
if name in dictstack[i]:
|
||
|
return dictstack[i][name]
|
||
|
raise PSError("name error: " + str(name))
|
||
|
|
||
|
def do_token(
|
||
|
self,
|
||
|
token,
|
||
|
int=int,
|
||
|
float=float,
|
||
|
ps_name=ps_name,
|
||
|
ps_integer=ps_integer,
|
||
|
ps_real=ps_real,
|
||
|
):
|
||
|
try:
|
||
|
num = int(token)
|
||
|
except (ValueError, OverflowError):
|
||
|
try:
|
||
|
num = float(token)
|
||
|
except (ValueError, OverflowError):
|
||
|
if "#" in token:
|
||
|
hashpos = token.find("#")
|
||
|
try:
|
||
|
base = int(token[:hashpos])
|
||
|
num = int(token[hashpos + 1 :], base)
|
||
|
except (ValueError, OverflowError):
|
||
|
return ps_name(token)
|
||
|
else:
|
||
|
return ps_integer(num)
|
||
|
else:
|
||
|
return ps_name(token)
|
||
|
else:
|
||
|
return ps_real(num)
|
||
|
else:
|
||
|
return ps_integer(num)
|
||
|
|
||
|
def do_comment(self, token):
|
||
|
pass
|
||
|
|
||
|
def do_literal(self, token):
|
||
|
return ps_literal(token[1:])
|
||
|
|
||
|
def do_string(self, token):
|
||
|
return ps_string(token[1:-1])
|
||
|
|
||
|
def do_hexstring(self, token):
|
||
|
hexStr = "".join(token[1:-1].split())
|
||
|
if len(hexStr) % 2:
|
||
|
hexStr = hexStr + "0"
|
||
|
cleanstr = []
|
||
|
for i in range(0, len(hexStr), 2):
|
||
|
cleanstr.append(chr(int(hexStr[i : i + 2], 16)))
|
||
|
cleanstr = "".join(cleanstr)
|
||
|
return ps_string(cleanstr)
|
||
|
|
||
|
def do_special(self, token):
|
||
|
if token == "{":
|
||
|
self.proclevel = self.proclevel + 1
|
||
|
return self.procmark
|
||
|
elif token == "}":
|
||
|
proc = []
|
||
|
while 1:
|
||
|
topobject = self.pop()
|
||
|
if topobject == self.procmark:
|
||
|
break
|
||
|
proc.append(topobject)
|
||
|
self.proclevel = self.proclevel - 1
|
||
|
proc.reverse()
|
||
|
return ps_procedure(proc)
|
||
|
elif token == "[":
|
||
|
return self.mark
|
||
|
elif token == "]":
|
||
|
return ps_name("]")
|
||
|
else:
|
||
|
raise PSTokenError("huh?")
|
||
|
|
||
|
def push(self, object):
|
||
|
self.stack.append(object)
|
||
|
|
||
|
def pop(self, *types):
|
||
|
stack = self.stack
|
||
|
if not stack:
|
||
|
raise PSError("stack underflow")
|
||
|
object = stack[-1]
|
||
|
if types:
|
||
|
if object.type not in types:
|
||
|
raise PSError(
|
||
|
"typecheck, expected %s, found %s" % (repr(types), object.type)
|
||
|
)
|
||
|
del stack[-1]
|
||
|
return object
|
||
|
|
||
|
def do_makearray(self):
|
||
|
array = []
|
||
|
while 1:
|
||
|
topobject = self.pop()
|
||
|
if topobject == self.mark:
|
||
|
break
|
||
|
array.append(topobject)
|
||
|
array.reverse()
|
||
|
self.push(ps_array(array))
|
||
|
|
||
|
def close(self):
|
||
|
"""Remove circular references."""
|
||
|
del self.stack
|
||
|
del self.dictstack
|
||
|
|
||
|
|
||
|
def unpack_item(item):
|
||
|
tp = type(item.value)
|
||
|
if tp == dict:
|
||
|
newitem = {}
|
||
|
for key, value in item.value.items():
|
||
|
newitem[key] = unpack_item(value)
|
||
|
elif tp == list:
|
||
|
newitem = [None] * len(item.value)
|
||
|
for i in range(len(item.value)):
|
||
|
newitem[i] = unpack_item(item.value[i])
|
||
|
if item.type == "proceduretype":
|
||
|
newitem = tuple(newitem)
|
||
|
else:
|
||
|
newitem = item.value
|
||
|
return newitem
|
||
|
|
||
|
|
||
|
def suckfont(data, encoding="ascii"):
|
||
|
m = re.search(rb"/FontName\s+/([^ \t\n\r]+)\s+def", data)
|
||
|
if m:
|
||
|
fontName = m.group(1)
|
||
|
fontName = fontName.decode()
|
||
|
else:
|
||
|
fontName = None
|
||
|
interpreter = PSInterpreter(encoding=encoding)
|
||
|
interpreter.interpret(
|
||
|
b"/Helvetica 4 dict dup /Encoding StandardEncoding put definefont pop"
|
||
|
)
|
||
|
interpreter.interpret(data)
|
||
|
fontdir = interpreter.dictstack[0]["FontDirectory"].value
|
||
|
if fontName in fontdir:
|
||
|
rawfont = fontdir[fontName]
|
||
|
else:
|
||
|
# fall back, in case fontName wasn't found
|
||
|
fontNames = list(fontdir.keys())
|
||
|
if len(fontNames) > 1:
|
||
|
fontNames.remove("Helvetica")
|
||
|
fontNames.sort()
|
||
|
rawfont = fontdir[fontNames[0]]
|
||
|
interpreter.close()
|
||
|
return unpack_item(rawfont)
|