198 lines
5.2 KiB
Python
198 lines
5.2 KiB
Python
# lang.py - dot language creation helpers
|
|
|
|
"""Quote strings to be valid DOT identifiers, assemble attribute lists."""
|
|
|
|
import collections
|
|
import functools
|
|
import re
|
|
|
|
from . import _compat
|
|
|
|
from . import tools
|
|
|
|
__all__ = ['quote', 'quote_edge', 'a_list', 'attr_list', 'escape', 'nohtml']
|
|
|
|
# https://www.graphviz.org/doc/info/lang.html
|
|
# https://www.graphviz.org/doc/info/attrs.html#k:escString
|
|
|
|
HTML_STRING = re.compile(r'<.*>$', re.DOTALL)
|
|
|
|
ID = re.compile(r'([a-zA-Z_][a-zA-Z0-9_]*|-?(\.[0-9]+|[0-9]+(\.[0-9]*)?))$')
|
|
|
|
KEYWORDS = {'node', 'edge', 'graph', 'digraph', 'subgraph', 'strict'}
|
|
|
|
COMPASS = {'n', 'ne', 'e', 'se', 's', 'sw', 'w', 'nw', 'c', '_'} # TODO
|
|
|
|
QUOTE_OPTIONAL_BACKSLASHES = re.compile(r'(?P<bs>(?:\\\\)*)'
|
|
r'\\?(?P<quote>")')
|
|
|
|
ESCAPE_UNESCAPED_QUOTES = functools.partial(QUOTE_OPTIONAL_BACKSLASHES.sub,
|
|
r'\g<bs>\\\g<quote>')
|
|
|
|
|
|
def quote(identifier,
|
|
is_html_string=HTML_STRING.match,
|
|
is_valid_id=ID.match, dot_keywords=KEYWORDS,
|
|
escape_unescaped_quotes=ESCAPE_UNESCAPED_QUOTES):
|
|
r"""Return DOT identifier from string, quote if needed.
|
|
|
|
>>> quote('')
|
|
'""'
|
|
|
|
>>> quote('spam')
|
|
'spam'
|
|
|
|
>>> quote('spam spam')
|
|
'"spam spam"'
|
|
|
|
>>> quote('-4.2')
|
|
'-4.2'
|
|
|
|
>>> quote('.42')
|
|
'.42'
|
|
|
|
>>> quote('<<b>spam</b>>')
|
|
'<<b>spam</b>>'
|
|
|
|
>>> quote(nohtml('<>'))
|
|
'"<>"'
|
|
|
|
>>> print(quote('"'))
|
|
"\""
|
|
|
|
>>> print(quote('\\"'))
|
|
"\""
|
|
|
|
>>> print(quote('\\\\"'))
|
|
"\\\""
|
|
|
|
>>> print(quote('\\\\\\"'))
|
|
"\\\""
|
|
"""
|
|
if is_html_string(identifier) and not isinstance(identifier, NoHtml):
|
|
pass
|
|
elif not is_valid_id(identifier) or identifier.lower() in dot_keywords:
|
|
return '"%s"' % escape_unescaped_quotes(identifier)
|
|
return identifier
|
|
|
|
|
|
def quote_edge(identifier):
|
|
"""Return DOT edge statement node_id from string, quote if needed.
|
|
|
|
>>> quote_edge('spam')
|
|
'spam'
|
|
|
|
>>> quote_edge('spam spam:eggs eggs')
|
|
'"spam spam":"eggs eggs"'
|
|
|
|
>>> quote_edge('spam:eggs:s')
|
|
'spam:eggs:s'
|
|
"""
|
|
node, _, rest = identifier.partition(':')
|
|
parts = [quote(node)]
|
|
if rest:
|
|
port, _, compass = rest.partition(':')
|
|
parts.append(quote(port))
|
|
if compass:
|
|
parts.append(compass)
|
|
return ':'.join(parts)
|
|
|
|
|
|
def a_list(label=None, kwargs=None, attributes=None):
|
|
"""Return assembled DOT a_list string.
|
|
|
|
>>> a_list('spam', {'spam': None, 'ham': 'ham ham', 'eggs': ''})
|
|
'label=spam eggs="" ham="ham ham"'
|
|
"""
|
|
result = ['label=%s' % quote(label)] if label is not None else []
|
|
if kwargs:
|
|
items = ['%s=%s' % (quote(k), quote(v))
|
|
for k, v in tools.mapping_items(kwargs) if v is not None]
|
|
result.extend(items)
|
|
if attributes:
|
|
if hasattr(attributes, 'items'):
|
|
attributes = tools.mapping_items(attributes)
|
|
items = ['%s=%s' % (quote(k), quote(v))
|
|
for k, v in attributes if v is not None]
|
|
result.extend(items)
|
|
return ' '.join(result)
|
|
|
|
|
|
def attr_list(label=None, kwargs=None, attributes=None):
|
|
"""Return assembled DOT attribute list string.
|
|
|
|
Sorts ``kwargs`` and ``attributes`` if they are plain dicts (to avoid
|
|
unpredictable order from hash randomization in Python 3 versions).
|
|
|
|
>>> attr_list()
|
|
''
|
|
|
|
>>> attr_list('spam spam', kwargs={'eggs': 'eggs', 'ham': 'ham ham'})
|
|
' [label="spam spam" eggs=eggs ham="ham ham"]'
|
|
|
|
>>> attr_list(kwargs={'spam': None, 'eggs': ''})
|
|
' [eggs=""]'
|
|
"""
|
|
content = a_list(label, kwargs, attributes)
|
|
if not content:
|
|
return ''
|
|
return ' [%s]' % content
|
|
|
|
|
|
def escape(s):
|
|
r"""Return ``s`` as literal disabling special meaning of backslashes and ``'<...>'``.
|
|
|
|
see also https://www.graphviz.org/doc/info/attrs.html#k:escString
|
|
|
|
Args:
|
|
s: String in which backslashes and ``'<...>'`` should be treated as literal.
|
|
|
|
Raises:
|
|
TypeError: If ``s`` is not a ``str`` on Python 3, or a ``str``/``unicode`` on Python 2.
|
|
|
|
>>> print(escape(r'\l'))
|
|
\\l
|
|
"""
|
|
return nohtml(s.replace('\\', '\\\\'))
|
|
|
|
|
|
class NoHtml(object):
|
|
"""Mixin for string subclasses disabling fall-through of ``'<...>'``."""
|
|
|
|
__slots__ = ()
|
|
|
|
_doc = "%s subclass that does not treat ``'<...>'`` as DOT HTML string."
|
|
|
|
@classmethod
|
|
def _subcls(cls, other):
|
|
name = '%s_%s' % (cls.__name__, other.__name__)
|
|
bases = (other, cls)
|
|
ns = {'__doc__': cls._doc % other.__name__}
|
|
return type(name, bases, ns)
|
|
|
|
|
|
NOHTML = collections.OrderedDict((c, NoHtml._subcls(c)) for c in _compat.string_classes)
|
|
|
|
|
|
def nohtml(s):
|
|
"""Return copy of ``s`` that will not treat ``'<...>'`` as DOT HTML string in quoting.
|
|
|
|
Args:
|
|
s: String in which leading ``'<'`` and trailing ``'>'`` should be treated as literal.
|
|
|
|
Raises:
|
|
TypeError: If ``s`` is not a ``str`` on Python 3, or a ``str``/``unicode`` on Python 2.
|
|
|
|
>>> quote('<>-*-<>')
|
|
'<>-*-<>'
|
|
|
|
>>> quote(nohtml('<>-*-<>'))
|
|
'"<>-*-<>"'
|
|
"""
|
|
try:
|
|
subcls = NOHTML[type(s)]
|
|
except KeyError:
|
|
raise TypeError('%r does not have one of the required types:'
|
|
' %r' % (s, list(NOHTML)))
|
|
return subcls(s)
|