Skip to content

Commit

Permalink
Refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
hukkin committed Oct 26, 2024
1 parent dbc56fd commit 6ccbde4
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 54 deletions.
56 changes: 2 additions & 54 deletions src/mdformat/renderer/_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
decimalify_leading,
decimalify_trailing,
escape_asterisk_emphasis,
escape_square_brackets,
escape_underscore_emphasis,
get_list_marker_type,
is_tight_list,
is_tight_list_item,
longest_consecutive_sequence,
maybe_add_link_brackets,
split_at_indexes,
)
from mdformat.renderer.typing import Postprocess, Render

Expand Down Expand Up @@ -117,7 +117,7 @@ def text(node: RenderTreeNode, context: RenderContext) -> str:
text = escape_asterisk_emphasis(text) # Escape emphasis/strong marker.
text = escape_underscore_emphasis(text) # Escape emphasis/strong marker.
# Escape link label and link ref enclosures
text = _escape_square_brackets(text, context.env["used_refs"])
text = escape_square_brackets(text, context.env["used_refs"])
text = text.replace("<", "\\<") # Escape URI enclosure
text = text.replace("`", "\\`") # Escape code span marker

Expand All @@ -142,58 +142,6 @@ def text(node: RenderTreeNode, context: RenderContext) -> str:
return text


_RE_SQUARE_BRACKET = re.compile(r"[\[\]]")


def _escape_square_brackets(text: str, used_refs: Iterable[str]) -> str:
"""Return the input string with square brackets ("[" and "]") escaped in a
safe way that avoids unintended link labels or refs after formatting.
Heuristic to use:
Escape all square brackets unless all the following are true for
a closed pair of brackets ([ + text + ]):
- the brackets enclose text containing no square brackets
- the text is not a used_ref (a link label used in a valid link or image)
- the enclosure is not followed by ":" or "(" (I believe that this, rather
than requiring the enclosure to be followed by a character other than
":" or "(", should be sufficient, as no inline other than 'text' can
start with ":" or "(", and a following text inline never exists as it
would be included in the same token.
"""
escape_before_pos = []
pos = 0
enclosure_start: int | None = None
while True:
bracket_match = _RE_SQUARE_BRACKET.search(text, pos)
if not bracket_match: # pragma: >=3.10 cover
if enclosure_start is not None:
escape_before_pos.append(enclosure_start)
break

bracket = bracket_match.group()
bracket_pos = bracket_match.start()
pos = bracket_pos + 1
if bracket == "[":
if enclosure_start is not None:
escape_before_pos.append(enclosure_start)
enclosure_start = bracket_pos
else:
if enclosure_start is None:
escape_before_pos.append(bracket_pos)
else:
enclosed = text[enclosure_start + 1 : bracket_pos]
next_char = text[bracket_pos + 1 : bracket_pos + 2] # can be empty str
if enclosed.upper() not in used_refs and next_char not in {":", "("}:
enclosure_start = None
else:
escape_before_pos.append(enclosure_start)
escape_before_pos.append(bracket_pos)
enclosure_start = None
if not escape_before_pos:
return text
return "\\".join(split_at_indexes(text, escape_before_pos))


def fence(node: RenderTreeNode, context: RenderContext) -> str:
info_str = node.info.strip()
lang = info_str.split(maxsplit=1)[0] if info_str else ""
Expand Down
52 changes: 52 additions & 0 deletions src/mdformat/renderer/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,3 +202,55 @@ def split_at_indexes(text: str, indexes: Iterable[int]) -> list[str]:
prev_i = i
parts.append(text[i:])
return parts


def escape_square_brackets(text: str, used_refs: Iterable[str]) -> str:
"""Return the input string with square brackets ("[" and "]") escaped in a
safe way that avoids unintended link labels or refs after formatting.
Heuristic to use:
Escape all square brackets unless all the following are true for
a closed pair of brackets ([ + text + ]):
- the brackets enclose text containing no square brackets
- the text is not a used_ref (a link label used in a valid link or image)
- the enclosure is not followed by ":" or "(" (I believe that this, rather
than requiring the enclosure to be followed by a character other than
":" or "(", should be sufficient, as no inline other than 'text' can
start with ":" or "(", and a following text inline never exists as it
would be included in the same token.
"""
escape_before_pos = []
pos = 0
enclosure_start: int | None = None
while True:
bracket_match = RE_SQUARE_BRACKET.search(text, pos)
if not bracket_match: # pragma: >=3.10 cover
if enclosure_start is not None:
escape_before_pos.append(enclosure_start)
break

bracket = bracket_match.group()
bracket_pos = bracket_match.start()
pos = bracket_pos + 1
if bracket == "[":
if enclosure_start is not None:
escape_before_pos.append(enclosure_start)
enclosure_start = bracket_pos
else:
if enclosure_start is None:
escape_before_pos.append(bracket_pos)
else:
enclosed = text[enclosure_start + 1 : bracket_pos]
next_char = text[bracket_pos + 1 : bracket_pos + 2] # can be empty str
if enclosed.upper() not in used_refs and next_char not in {":", "("}:
enclosure_start = None
else:
escape_before_pos.append(enclosure_start)
escape_before_pos.append(bracket_pos)
enclosure_start = None
if not escape_before_pos:
return text
return "\\".join(split_at_indexes(text, escape_before_pos))


RE_SQUARE_BRACKET = re.compile(r"[\[\]]")

0 comments on commit 6ccbde4

Please sign in to comment.