Skip to content

Commit

Permalink
fix: regex in is_md_equal is too greedy
Browse files Browse the repository at this point in the history
  • Loading branch information
hukkin committed Dec 18, 2024
1 parent 3dd45d2 commit f568716
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 2 deletions.
14 changes: 12 additions & 2 deletions src/mdformat/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,15 @@ def build_mdit(
return mdit


# Chars that markdown-it-py escapes when rendering code_inline:
# https://github.com/executablebooks/markdown-it-py/blob/c5161b550f3c6c0a98d77e8389872405e8f9f9ee/markdown_it/common/utils.py#L138
# Note that "&" is not included as it is used in the escape sequences of
# these characters.
_invalid_html_code_chars = '<>"'
# a regex str that matches all except above chars
_valid_html_code_char_re = rf"[^{re.escape(_invalid_html_code_chars)}]"


def is_md_equal(
md1: str,
md2: str,
Expand All @@ -71,10 +80,11 @@ def is_md_equal(
if codeformatters:
langs_re = "|".join(re.escape(lang) for lang in codeformatters)
html = re.sub(
rf'<code class="language-(?:{langs_re})">.*</code>',
rf'<code class="language-(?:{langs_re})">'
rf"{_valid_html_code_char_re}*"
r"</code>",
"",
html,
flags=re.DOTALL,
)

# Reduce all whitespace to a single space
Expand Down
27 changes: 27 additions & 0 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,30 @@ def test_is_md_equal():
paragr"""
assert not is_md_equal(md1, md2)
assert is_md_equal(md1, md2, codeformatters=("js", "go"))


def test_is_md_equal__not():
md1 = """
```js
console.log()
```
paragr
```js
console.log()
```
"""
md2 = """
```js
bonsole.l()g
```
A different paragraph
```js
console.log()
```
"""
assert not is_md_equal(md1, md2)
assert not is_md_equal(md1, md2, codeformatters=("js",))

0 comments on commit f568716

Please sign in to comment.