3RNN/Lib/site-packages/markdown/extensions/attr_list.py

# Attribute List Extension for Python-Markdown
# ============================================

# Adds attribute list syntax. Inspired by
# [Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
# feature of the same name.

# See https://Python-Markdown.github.io/extensions/attr_list
# for documentation.

# Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/).

# All changes Copyright 2011-2014 The Python Markdown Project

# License: [BSD](https://opensource.org/licenses/bsd-license.php)

"""
 Adds attribute list syntax. Inspired by
[Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
feature of the same name.

See the [documentation](https://Python-Markdown.github.io/extensions/attr_list)
for details.
"""

from __future__ import annotations
from typing import TYPE_CHECKING

from . import Extension
from ..treeprocessors import Treeprocessor
import re

if TYPE_CHECKING:  # pragma: no cover
    from xml.etree.ElementTree import Element


def _handle_double_quote(s, t):
    k, v = t.split('=', 1)
    return k, v.strip('"')


def _handle_single_quote(s, t):
    k, v = t.split('=', 1)
    return k, v.strip("'")


def _handle_key_value(s, t):
    return t.split('=', 1)


def _handle_word(s, t):
    if t.startswith('.'):
        return '.', t[1:]
    if t.startswith('#'):
        return 'id', t[1:]
    return t, t


_scanner = re.Scanner([
    (r'[^ =}]+=".*?"', _handle_double_quote),
    (r"[^ =}]+='.*?'", _handle_single_quote),
    (r'[^ =}]+=[^ =}]+', _handle_key_value),
    (r'[^ =}]+', _handle_word),
    (r' ', None)
])


def get_attrs_and_remainder(attrs_string: str) -> tuple[list[tuple[str, str]], str]:
    """ Parse attribute list and return a list of attribute tuples.

    Additionally, return any text that remained after a curly brace. In typical cases, its presence
    should mean that the input does not match the intended attribute list syntax.
    """
    attrs, remainder = _scanner.scan(attrs_string)
    # To keep historic behavior, discard all unparsable text prior to '}'.
    index = remainder.find('}')
    remainder = remainder[index:] if index != -1 else ''
    return attrs, remainder


def get_attrs(str: str) -> list[tuple[str, str]]:  # pragma: no cover
    """ Soft-deprecated. Prefer `get_attrs_and_remainder`. """
    return get_attrs_and_remainder(str)[0]


def isheader(elem: Element) -> bool:
    return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']


class AttrListTreeprocessor(Treeprocessor):

    BASE_RE = r'\{\:?[ ]*([^\}\n ][^\n]*)[ ]*\}'
    HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE))
    BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE))
    INLINE_RE = re.compile(r'^{}'.format(BASE_RE))
    NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff'
                         r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d'
                         r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff'
                         r'\uf900-\ufdcf\ufdf0-\ufffd'
                         r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')

    def run(self, doc: Element) -> None:
        for elem in doc.iter():
            if self.md.is_block_level(elem.tag):
                # Block level: check for `attrs` on last line of text
                RE = self.BLOCK_RE
                if isheader(elem) or elem.tag in ['dt', 'td', 'th']:
                    # header, def-term, or table cell: check for attributes at end of element
                    RE = self.HEADER_RE
                if len(elem) and elem.tag == 'li':
                    # special case list items. children may include a `ul` or `ol`.
                    pos = None
                    # find the `ul` or `ol` position
                    for i, child in enumerate(elem):
                        if child.tag in ['ul', 'ol']:
                            pos = i
                            break
                    if pos is None and elem[-1].tail:
                        # use tail of last child. no `ul` or `ol`.
                        m = RE.search(elem[-1].tail)
                        if m:
                            if not self.assign_attrs(elem, m.group(1), strict=True):
                                elem[-1].tail = elem[-1].tail[:m.start()]
                    elif pos is not None and pos > 0 and elem[pos-1].tail:
                        # use tail of last child before `ul` or `ol`
                        m = RE.search(elem[pos-1].tail)
                        if m:
                            if not self.assign_attrs(elem, m.group(1), strict=True):
                                elem[pos-1].tail = elem[pos-1].tail[:m.start()]
                    elif elem.text:
                        # use text. `ul` is first child.
                        m = RE.search(elem.text)
                        if m:
                            if not self.assign_attrs(elem, m.group(1), strict=True):
                                elem.text = elem.text[:m.start()]
                elif len(elem) and elem[-1].tail:
                    # has children. Get from tail of last child
                    m = RE.search(elem[-1].tail)
                    if m:
                        if not self.assign_attrs(elem, m.group(1), strict=True):
                            elem[-1].tail = elem[-1].tail[:m.start()]
                            if isheader(elem):
                                # clean up trailing #s
                                elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
                elif elem.text:
                    # no children. Get from text.
                    m = RE.search(elem.text)
                    if m:
                        if not self.assign_attrs(elem, m.group(1), strict=True):
                            elem.text = elem.text[:m.start()]
                            if isheader(elem):
                                # clean up trailing #s
                                elem.text = elem.text.rstrip('#').rstrip()
            else:
                # inline: check for `attrs` at start of tail
                if elem.tail:
                    m = self.INLINE_RE.match(elem.tail)
                    if m:
                        remainder = self.assign_attrs(elem, m.group(1))
                        elem.tail = elem.tail[m.end():] + remainder

    def assign_attrs(self, elem: Element, attrs_string: str, *, strict: bool = False) -> str:
        """ Assign `attrs` to element.

        If the `attrs_string` has an extra closing curly brace, the remaining text is returned.

        The `strict` argument controls whether to still assign `attrs` if there is a remaining `}`.
        """
        attrs, remainder = get_attrs_and_remainder(attrs_string)
        if strict and remainder:
            return remainder

        for k, v in attrs:
            if k == '.':
                # add to class
                cls = elem.get('class')
                if cls:
                    elem.set('class', '{} {}'.format(cls, v))
                else:
                    elem.set('class', v)
            else:
                # assign attribute `k` with `v`
                elem.set(self.sanitize_name(k), v)
        # The text that we initially over-matched will be put back.
        return remainder

    def sanitize_name(self, name: str) -> str:
        """
        Sanitize name as 'an XML Name, minus the `:`.'
        See <https://www.w3.org/TR/REC-xml-names/#NT-NCName>.
        """
        return self.NAME_RE.sub('_', name)


class AttrListExtension(Extension):
    """ Attribute List extension for Python-Markdown """
    def extendMarkdown(self, md):
        md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8)
        md.registerExtension(self)


def makeExtension(**kwargs):  # pragma: no cover
    return AttrListExtension(**kwargs)
1.0 2024-05-26 19:49:15 +02:00			`# Attribute List Extension for Python-Markdown`
			`# ============================================`

			`# Adds attribute list syntax. Inspired by`
			`# [Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s`
			`# feature of the same name.`

			`# See https://Python-Markdown.github.io/extensions/attr_list`
			`# for documentation.`

			`# Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/).`

			`# All changes Copyright 2011-2014 The Python Markdown Project`

			`# License: [BSD](https://opensource.org/licenses/bsd-license.php)`

			`"""`
			`Adds attribute list syntax. Inspired by`
			`[Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s`
			`feature of the same name.`

			`See the [documentation](https://Python-Markdown.github.io/extensions/attr_list)`
			`for details.`
			`"""`

			`from __future__ import annotations`
			`from typing import TYPE_CHECKING`

			`from . import Extension`
			`from ..treeprocessors import Treeprocessor`
			`import re`

			`if TYPE_CHECKING: # pragma: no cover`
			`from xml.etree.ElementTree import Element`


			`def _handle_double_quote(s, t):`
			`k, v = t.split('=', 1)`
			`return k, v.strip('"')`


			`def _handle_single_quote(s, t):`
			`k, v = t.split('=', 1)`
			`return k, v.strip("'")`


			`def _handle_key_value(s, t):`
			`return t.split('=', 1)`


			`def _handle_word(s, t):`
			`if t.startswith('.'):`
			`return '.', t[1:]`
			`if t.startswith('#'):`
			`return 'id', t[1:]`
			`return t, t`


			`_scanner = re.Scanner([`
			`(r'[^ =}]+=".*?"', _handle_double_quote),`
			`(r"[^ =}]+='.*?'", _handle_single_quote),`
			`(r'[^ =}]+=[^ =}]+', _handle_key_value),`
			`(r'[^ =}]+', _handle_word),`
			`(r' ', None)`
			`])`


			`def get_attrs_and_remainder(attrs_string: str) -> tuple[list[tuple[str, str]], str]:`
			`""" Parse attribute list and return a list of attribute tuples.`

			`Additionally, return any text that remained after a curly brace. In typical cases, its presence`
			`should mean that the input does not match the intended attribute list syntax.`
			`"""`
			`attrs, remainder = _scanner.scan(attrs_string)`
			`# To keep historic behavior, discard all unparsable text prior to '}'.`
			`index = remainder.find('}')`
			`remainder = remainder[index:] if index != -1 else ''`
			`return attrs, remainder`


			`def get_attrs(str: str) -> list[tuple[str, str]]: # pragma: no cover`
			""" Soft-deprecated. Prefer `get_attrs_and_remainder`. """
			`return get_attrs_and_remainder(str)[0]`


			`def isheader(elem: Element) -> bool:`
			`return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']`


			`class AttrListTreeprocessor(Treeprocessor):`

			`BASE_RE = r'\{\:?[ ]([^\}\n ][^\n])[ ]*\}'`
			`HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE))`
			`BLOCK_RE = re.compile(r'\n[ ]{}[ ]$'.format(BASE_RE))`
			`INLINE_RE = re.compile(r'^{}'.format(BASE_RE))`
			`NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff'`
			`r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d'`
			`r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff'`
			`r'\uf900-\ufdcf\ufdf0-\ufffd'`
			`r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')`

			`def run(self, doc: Element) -> None:`
			`for elem in doc.iter():`
			`if self.md.is_block_level(elem.tag):`
			# Block level: check for `attrs` on last line of text
			`RE = self.BLOCK_RE`
			`if isheader(elem) or elem.tag in ['dt', 'td', 'th']:`
			`# header, def-term, or table cell: check for attributes at end of element`
			`RE = self.HEADER_RE`
			`if len(elem) and elem.tag == 'li':`
			# special case list items. children may include a `ul` or `ol`.
			`pos = None`
			# find the `ul` or `ol` position
			`for i, child in enumerate(elem):`
			`if child.tag in ['ul', 'ol']:`
			`pos = i`
			`break`
			`if pos is None and elem[-1].tail:`
			# use tail of last child. no `ul` or `ol`.
			`m = RE.search(elem[-1].tail)`
			`if m:`
			`if not self.assign_attrs(elem, m.group(1), strict=True):`
			`elem[-1].tail = elem[-1].tail[:m.start()]`
			`elif pos is not None and pos > 0 and elem[pos-1].tail:`
			# use tail of last child before `ul` or `ol`
			`m = RE.search(elem[pos-1].tail)`
			`if m:`
			`if not self.assign_attrs(elem, m.group(1), strict=True):`
			`elem[pos-1].tail = elem[pos-1].tail[:m.start()]`
			`elif elem.text:`
			# use text. `ul` is first child.
			`m = RE.search(elem.text)`
			`if m:`
			`if not self.assign_attrs(elem, m.group(1), strict=True):`
			`elem.text = elem.text[:m.start()]`
			`elif len(elem) and elem[-1].tail:`
			`# has children. Get from tail of last child`
			`m = RE.search(elem[-1].tail)`
			`if m:`
			`if not self.assign_attrs(elem, m.group(1), strict=True):`
			`elem[-1].tail = elem[-1].tail[:m.start()]`
			`if isheader(elem):`
			`# clean up trailing #s`
			`elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()`
			`elif elem.text:`
			`# no children. Get from text.`
			`m = RE.search(elem.text)`
			`if m:`
			`if not self.assign_attrs(elem, m.group(1), strict=True):`
			`elem.text = elem.text[:m.start()]`
			`if isheader(elem):`
			`# clean up trailing #s`
			`elem.text = elem.text.rstrip('#').rstrip()`
			`else:`
			# inline: check for `attrs` at start of tail
			`if elem.tail:`
			`m = self.INLINE_RE.match(elem.tail)`
			`if m:`
			`remainder = self.assign_attrs(elem, m.group(1))`
			`elem.tail = elem.tail[m.end():] + remainder`

			`def assign_attrs(self, elem: Element, attrs_string: str, *, strict: bool = False) -> str:`
			""" Assign `attrs` to element.

			If the `attrs_string` has an extra closing curly brace, the remaining text is returned.

			The `strict` argument controls whether to still assign `attrs` if there is a remaining `}`.
			`"""`
			`attrs, remainder = get_attrs_and_remainder(attrs_string)`
			`if strict and remainder:`
			`return remainder`

			`for k, v in attrs:`
			`if k == '.':`
			`# add to class`
			`cls = elem.get('class')`
			`if cls:`
			`elem.set('class', '{} {}'.format(cls, v))`
			`else:`
			`elem.set('class', v)`
			`else:`
			# assign attribute `k` with `v`
			`elem.set(self.sanitize_name(k), v)`
			`# The text that we initially over-matched will be put back.`
			`return remainder`

			`def sanitize_name(self, name: str) -> str:`
			`"""`
			Sanitize name as 'an XML Name, minus the `:`.'
			`See <https://www.w3.org/TR/REC-xml-names/#NT-NCName>.`
			`"""`
			`return self.NAME_RE.sub('_', name)`


			`class AttrListExtension(Extension):`
			`""" Attribute List extension for Python-Markdown """`
			`def extendMarkdown(self, md):`
			`md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8)`
			`md.registerExtension(self)`


			`def makeExtension(**kwargs): # pragma: no cover`
			`return AttrListExtension(**kwargs)`