138 lines
4.7 KiB
Python
138 lines
4.7 KiB
Python
"""Balance paired characters (*, _, etc) in inline tokens."""
|
|
from __future__ import annotations
|
|
|
|
from .state_inline import Delimiter, StateInline
|
|
|
|
|
|
def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None:
|
|
"""For each opening emphasis-like marker find a matching closing one."""
|
|
if not delimiters:
|
|
return
|
|
|
|
openersBottom = {}
|
|
maximum = len(delimiters)
|
|
|
|
# headerIdx is the first delimiter of the current (where closer is) delimiter run
|
|
headerIdx = 0
|
|
lastTokenIdx = -2 # needs any value lower than -1
|
|
jumps: list[int] = []
|
|
closerIdx = 0
|
|
while closerIdx < maximum:
|
|
closer = delimiters[closerIdx]
|
|
|
|
jumps.append(0)
|
|
|
|
# markers belong to same delimiter run if:
|
|
# - they have adjacent tokens
|
|
# - AND markers are the same
|
|
#
|
|
if (
|
|
delimiters[headerIdx].marker != closer.marker
|
|
or lastTokenIdx != closer.token - 1
|
|
):
|
|
headerIdx = closerIdx
|
|
lastTokenIdx = closer.token
|
|
|
|
# Length is only used for emphasis-specific "rule of 3",
|
|
# if it's not defined (in strikethrough or 3rd party plugins),
|
|
# we can default it to 0 to disable those checks.
|
|
#
|
|
closer.length = closer.length or 0
|
|
|
|
if not closer.close:
|
|
closerIdx += 1
|
|
continue
|
|
|
|
# Previously calculated lower bounds (previous fails)
|
|
# for each marker, each delimiter length modulo 3,
|
|
# and for whether this closer can be an opener;
|
|
# https://github.com/commonmark/cmark/commit/34250e12ccebdc6372b8b49c44fab57c72443460
|
|
if closer.marker not in openersBottom:
|
|
openersBottom[closer.marker] = [-1, -1, -1, -1, -1, -1]
|
|
|
|
minOpenerIdx = openersBottom[closer.marker][
|
|
(3 if closer.open else 0) + (closer.length % 3)
|
|
]
|
|
|
|
openerIdx = headerIdx - jumps[headerIdx] - 1
|
|
|
|
newMinOpenerIdx = openerIdx
|
|
|
|
while openerIdx > minOpenerIdx:
|
|
opener = delimiters[openerIdx]
|
|
|
|
if opener.marker != closer.marker:
|
|
openerIdx -= jumps[openerIdx] + 1
|
|
continue
|
|
|
|
if opener.open and opener.end < 0:
|
|
isOddMatch = False
|
|
|
|
# from spec:
|
|
#
|
|
# If one of the delimiters can both open and close emphasis, then the
|
|
# sum of the lengths of the delimiter runs containing the opening and
|
|
# closing delimiters must not be a multiple of 3 unless both lengths
|
|
# are multiples of 3.
|
|
#
|
|
if (
|
|
(opener.close or closer.open)
|
|
and ((opener.length + closer.length) % 3 == 0)
|
|
and (opener.length % 3 != 0 or closer.length % 3 != 0)
|
|
):
|
|
isOddMatch = True
|
|
|
|
if not isOddMatch:
|
|
# If previous delimiter cannot be an opener, we can safely skip
|
|
# the entire sequence in future checks. This is required to make
|
|
# sure algorithm has linear complexity (see *_*_*_*_*_... case).
|
|
#
|
|
if openerIdx > 0 and not delimiters[openerIdx - 1].open:
|
|
lastJump = jumps[openerIdx - 1] + 1
|
|
else:
|
|
lastJump = 0
|
|
|
|
jumps[closerIdx] = closerIdx - openerIdx + lastJump
|
|
jumps[openerIdx] = lastJump
|
|
|
|
closer.open = False
|
|
opener.end = closerIdx
|
|
opener.close = False
|
|
newMinOpenerIdx = -1
|
|
|
|
# treat next token as start of run,
|
|
# it optimizes skips in **<...>**a**<...>** pathological case
|
|
lastTokenIdx = -2
|
|
|
|
break
|
|
|
|
openerIdx -= jumps[openerIdx] + 1
|
|
|
|
if newMinOpenerIdx != -1:
|
|
# If match for this delimiter run failed, we want to set lower bound for
|
|
# future lookups. This is required to make sure algorithm has linear
|
|
# complexity.
|
|
#
|
|
# See details here:
|
|
# https:#github.com/commonmark/cmark/issues/178#issuecomment-270417442
|
|
#
|
|
openersBottom[closer.marker][
|
|
(3 if closer.open else 0) + ((closer.length or 0) % 3)
|
|
] = newMinOpenerIdx
|
|
|
|
closerIdx += 1
|
|
|
|
|
|
def link_pairs(state: StateInline) -> None:
|
|
tokens_meta = state.tokens_meta
|
|
maximum = len(state.tokens_meta)
|
|
|
|
processDelimiters(state, state.delimiters)
|
|
|
|
curr = 0
|
|
while curr < maximum:
|
|
curr_meta = tokens_meta[curr]
|
|
if curr_meta and "delimiters" in curr_meta:
|
|
processDelimiters(state, curr_meta["delimiters"])
|
|
curr += 1
|