204 lines
7.6 KiB
Python
204 lines
7.6 KiB
Python
|
# Attribute List Extension for Python-Markdown
|
||
|
# ============================================
|
||
|
|
||
|
# Adds attribute list syntax. Inspired by
|
||
|
# [Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
|
||
|
# feature of the same name.
|
||
|
|
||
|
# See https://Python-Markdown.github.io/extensions/attr_list
|
||
|
# for documentation.
|
||
|
|
||
|
# Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/).
|
||
|
|
||
|
# All changes Copyright 2011-2014 The Python Markdown Project
|
||
|
|
||
|
# License: [BSD](https://opensource.org/licenses/bsd-license.php)
|
||
|
|
||
|
"""
|
||
|
Adds attribute list syntax. Inspired by
|
||
|
[Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
|
||
|
feature of the same name.
|
||
|
|
||
|
See the [documentation](https://Python-Markdown.github.io/extensions/attr_list)
|
||
|
for details.
|
||
|
"""
|
||
|
|
||
|
from __future__ import annotations
|
||
|
from typing import TYPE_CHECKING
|
||
|
|
||
|
from . import Extension
|
||
|
from ..treeprocessors import Treeprocessor
|
||
|
import re
|
||
|
|
||
|
if TYPE_CHECKING: # pragma: no cover
|
||
|
from xml.etree.ElementTree import Element
|
||
|
|
||
|
|
||
|
def _handle_double_quote(s, t):
|
||
|
k, v = t.split('=', 1)
|
||
|
return k, v.strip('"')
|
||
|
|
||
|
|
||
|
def _handle_single_quote(s, t):
|
||
|
k, v = t.split('=', 1)
|
||
|
return k, v.strip("'")
|
||
|
|
||
|
|
||
|
def _handle_key_value(s, t):
|
||
|
return t.split('=', 1)
|
||
|
|
||
|
|
||
|
def _handle_word(s, t):
|
||
|
if t.startswith('.'):
|
||
|
return '.', t[1:]
|
||
|
if t.startswith('#'):
|
||
|
return 'id', t[1:]
|
||
|
return t, t
|
||
|
|
||
|
|
||
|
_scanner = re.Scanner([
|
||
|
(r'[^ =}]+=".*?"', _handle_double_quote),
|
||
|
(r"[^ =}]+='.*?'", _handle_single_quote),
|
||
|
(r'[^ =}]+=[^ =}]+', _handle_key_value),
|
||
|
(r'[^ =}]+', _handle_word),
|
||
|
(r' ', None)
|
||
|
])
|
||
|
|
||
|
|
||
|
def get_attrs_and_remainder(attrs_string: str) -> tuple[list[tuple[str, str]], str]:
|
||
|
""" Parse attribute list and return a list of attribute tuples.
|
||
|
|
||
|
Additionally, return any text that remained after a curly brace. In typical cases, its presence
|
||
|
should mean that the input does not match the intended attribute list syntax.
|
||
|
"""
|
||
|
attrs, remainder = _scanner.scan(attrs_string)
|
||
|
# To keep historic behavior, discard all unparsable text prior to '}'.
|
||
|
index = remainder.find('}')
|
||
|
remainder = remainder[index:] if index != -1 else ''
|
||
|
return attrs, remainder
|
||
|
|
||
|
|
||
|
def get_attrs(str: str) -> list[tuple[str, str]]: # pragma: no cover
|
||
|
""" Soft-deprecated. Prefer `get_attrs_and_remainder`. """
|
||
|
return get_attrs_and_remainder(str)[0]
|
||
|
|
||
|
|
||
|
def isheader(elem: Element) -> bool:
|
||
|
return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
|
||
|
|
||
|
|
||
|
class AttrListTreeprocessor(Treeprocessor):
|
||
|
|
||
|
BASE_RE = r'\{\:?[ ]*([^\}\n ][^\n]*)[ ]*\}'
|
||
|
HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE))
|
||
|
BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE))
|
||
|
INLINE_RE = re.compile(r'^{}'.format(BASE_RE))
|
||
|
NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff'
|
||
|
r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d'
|
||
|
r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff'
|
||
|
r'\uf900-\ufdcf\ufdf0-\ufffd'
|
||
|
r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')
|
||
|
|
||
|
def run(self, doc: Element) -> None:
|
||
|
for elem in doc.iter():
|
||
|
if self.md.is_block_level(elem.tag):
|
||
|
# Block level: check for `attrs` on last line of text
|
||
|
RE = self.BLOCK_RE
|
||
|
if isheader(elem) or elem.tag in ['dt', 'td', 'th']:
|
||
|
# header, def-term, or table cell: check for attributes at end of element
|
||
|
RE = self.HEADER_RE
|
||
|
if len(elem) and elem.tag == 'li':
|
||
|
# special case list items. children may include a `ul` or `ol`.
|
||
|
pos = None
|
||
|
# find the `ul` or `ol` position
|
||
|
for i, child in enumerate(elem):
|
||
|
if child.tag in ['ul', 'ol']:
|
||
|
pos = i
|
||
|
break
|
||
|
if pos is None and elem[-1].tail:
|
||
|
# use tail of last child. no `ul` or `ol`.
|
||
|
m = RE.search(elem[-1].tail)
|
||
|
if m:
|
||
|
if not self.assign_attrs(elem, m.group(1), strict=True):
|
||
|
elem[-1].tail = elem[-1].tail[:m.start()]
|
||
|
elif pos is not None and pos > 0 and elem[pos-1].tail:
|
||
|
# use tail of last child before `ul` or `ol`
|
||
|
m = RE.search(elem[pos-1].tail)
|
||
|
if m:
|
||
|
if not self.assign_attrs(elem, m.group(1), strict=True):
|
||
|
elem[pos-1].tail = elem[pos-1].tail[:m.start()]
|
||
|
elif elem.text:
|
||
|
# use text. `ul` is first child.
|
||
|
m = RE.search(elem.text)
|
||
|
if m:
|
||
|
if not self.assign_attrs(elem, m.group(1), strict=True):
|
||
|
elem.text = elem.text[:m.start()]
|
||
|
elif len(elem) and elem[-1].tail:
|
||
|
# has children. Get from tail of last child
|
||
|
m = RE.search(elem[-1].tail)
|
||
|
if m:
|
||
|
if not self.assign_attrs(elem, m.group(1), strict=True):
|
||
|
elem[-1].tail = elem[-1].tail[:m.start()]
|
||
|
if isheader(elem):
|
||
|
# clean up trailing #s
|
||
|
elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
|
||
|
elif elem.text:
|
||
|
# no children. Get from text.
|
||
|
m = RE.search(elem.text)
|
||
|
if m:
|
||
|
if not self.assign_attrs(elem, m.group(1), strict=True):
|
||
|
elem.text = elem.text[:m.start()]
|
||
|
if isheader(elem):
|
||
|
# clean up trailing #s
|
||
|
elem.text = elem.text.rstrip('#').rstrip()
|
||
|
else:
|
||
|
# inline: check for `attrs` at start of tail
|
||
|
if elem.tail:
|
||
|
m = self.INLINE_RE.match(elem.tail)
|
||
|
if m:
|
||
|
remainder = self.assign_attrs(elem, m.group(1))
|
||
|
elem.tail = elem.tail[m.end():] + remainder
|
||
|
|
||
|
def assign_attrs(self, elem: Element, attrs_string: str, *, strict: bool = False) -> str:
|
||
|
""" Assign `attrs` to element.
|
||
|
|
||
|
If the `attrs_string` has an extra closing curly brace, the remaining text is returned.
|
||
|
|
||
|
The `strict` argument controls whether to still assign `attrs` if there is a remaining `}`.
|
||
|
"""
|
||
|
attrs, remainder = get_attrs_and_remainder(attrs_string)
|
||
|
if strict and remainder:
|
||
|
return remainder
|
||
|
|
||
|
for k, v in attrs:
|
||
|
if k == '.':
|
||
|
# add to class
|
||
|
cls = elem.get('class')
|
||
|
if cls:
|
||
|
elem.set('class', '{} {}'.format(cls, v))
|
||
|
else:
|
||
|
elem.set('class', v)
|
||
|
else:
|
||
|
# assign attribute `k` with `v`
|
||
|
elem.set(self.sanitize_name(k), v)
|
||
|
# The text that we initially over-matched will be put back.
|
||
|
return remainder
|
||
|
|
||
|
def sanitize_name(self, name: str) -> str:
|
||
|
"""
|
||
|
Sanitize name as 'an XML Name, minus the `:`.'
|
||
|
See <https://www.w3.org/TR/REC-xml-names/#NT-NCName>.
|
||
|
"""
|
||
|
return self.NAME_RE.sub('_', name)
|
||
|
|
||
|
|
||
|
class AttrListExtension(Extension):
|
||
|
""" Attribute List extension for Python-Markdown """
|
||
|
def extendMarkdown(self, md):
|
||
|
md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8)
|
||
|
md.registerExtension(self)
|
||
|
|
||
|
|
||
|
def makeExtension(**kwargs): # pragma: no cover
|
||
|
return AttrListExtension(**kwargs)
|