Skip to content

Commit

Permalink
Do not modify contents of <note> tag (#86)
Browse files Browse the repository at this point in the history
* Do not modify contents of <note> tag

This changes the behaviour around normalizing the <note> field of .glif
files. Previously the contents of this element would be broken into
lines and reindented, but this was a potentially destructive operation.

With this patch, any content of a <note> element is passed through
unchanged.

For more discussion, see:
#85

* Address code review comments

- make sure we're escaping XML correctly, and test that
- remove dedent_tabs function
- add a test for correctly encoding 'é'
  • Loading branch information
cmyr authored Aug 3, 2021
1 parent 186dbc1 commit 6d2e69c
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 148 deletions.
86 changes: 1 addition & 85 deletions src/ufonormalizer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@
import binascii
import time
import os
import re
import shutil
from xml.etree import cElementTree as ET
import plistlib
import textwrap
import datetime
import glob
from collections import OrderedDict
Expand Down Expand Up @@ -838,9 +836,7 @@ def _normalizeGlifNote(element, writer):
return
if not value.strip():
return
writer.beginElement("note")
writer.text(value)
writer.endElement("note")
writer.simpleElement("note", value=xmlEscapeText(value))


def _normalizeGlifOutlineFormat1(element, writer):
Expand Down Expand Up @@ -1261,29 +1257,6 @@ def data(self, text):
line = "<![CDATA[%s]]>" % text
self.raw(line)

def text(self, text):
text = text.strip("\n")
text = dedent_tabs(text)
text = text.strip()
text = xmlEscapeText(text)
paragraphs = []
for paragraph in text.splitlines():
if not paragraph:
paragraphs.append("")
else:
paragraph = textwrap.wrap(
paragraph.rstrip(),
width=xmlTextMaxLineLength,
expand_tabs=False,
replace_whitespace=False,
drop_whitespace=False,
break_long_words=False,
break_on_hyphens=False
)
paragraphs.extend(paragraph)
for line in paragraphs:
self.raw(line)

def simpleElement(self, tag, attrs=None, value=None):
if attrs:
attrs = self.attributesToString(attrs)
Expand Down Expand Up @@ -1450,63 +1423,6 @@ def xmlConvertInt(value):
return str(value)


# ---------------
# Text Operations
# ---------------

WHITESPACE_ONLY_RE = re.compile(r'^[\s\t]+$', re.MULTILINE)
LEADING_WHITESPACE_RE = re.compile(r'(^(?:\s{4}|\t)*)(?:[^\t\n])', re.MULTILINE)


def dedent_tabs(text):
"""
Based on `textwrap.dedent`, but modified to only work on tabs and 4-space indents
Remove any common leading tabs from every line in `text`.
This can be used to make triple-quoted strings line up with the left
edge of the display, while still presenting them in the source code
in indented form.
Entirely blank lines are normalized to a newline character.
"""
# Look for the longest leading string of spaces and tabs common to
# all lines.
margin = None
text = WHITESPACE_ONLY_RE.sub('', text)
indents = LEADING_WHITESPACE_RE.findall(text)
for indent in indents:
if margin is None:
margin = indent

# Current line more deeply indented than previous winner:
# no change (previous winner is still on top).
elif indent.startswith(margin):
pass

# Current line consistent with and no deeper than previous winner:
# it's the new winner.
elif margin.startswith(indent):
margin = indent

# Find the largest common whitespace between current line and previous
# winner.
else:
for i, (x, y) in enumerate(zip(margin, indent)):
if x != y:
margin = margin[:i]
break

# sanity check (testing/debugging only)
if 0 and margin:
for line in text.split("\n"):
assert not line or line.startswith(margin), \
"line = %r, margin = %r" % (line, margin)

if margin:
text = re.sub(r'(?m)^' + margin, '', text)
return text


# ---------------
# Path Operations
# ---------------
Expand Down
4 changes: 1 addition & 3 deletions tests/data/glif/format2.glif
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,5 @@
<string>1,0,0,0.5</string>
</dict>
</lib>
<note>
arbitrary text about the glyph
</note>
<note>arbitrary text about the glyph</note>
</glyph>
88 changes: 28 additions & 60 deletions tests/test_ufonormalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,7 @@
<string>1,0,0,0.5</string>
</dict>
</lib>
<note>
arbitrary text about the glyph
</note>
<note>arbitrary text about the glyph</note>
</glyph>
'''

Expand Down Expand Up @@ -767,25 +765,35 @@ def test_normalizeGLIF_lib_undefined(self):
self.assertEqual(writer.getText(), '')

def test_normalizeGLIF_note_defined(self):
""" Serialization of notes is non-fancy: we take the note text and
use it, unchanged, as the body of the <note>element</note>. In previous
version of ufonormalizer we would break the user text into lines. See
https://github.com/unified-font-object/ufoNormalizer/issues/85 for some
background.
"""

element = ET.fromstring("<note>Blah</note>")
writer = XMLWriter(declaration=None)
_normalizeGlifNote(element, writer)
self.assertEqual(writer.getText(), "<note>\n\tBlah\n</note>")
self.assertEqual(writer.getText(), "<note>Blah</note>")

element = ET.fromstring("<note> Blah \t\n\t </note>")
# encode accent correctly
element = ET.fromstring(
tobytes("<note>Don't forget to check the béziers!!</note>",
encoding="utf8"))
writer = XMLWriter(declaration=None)
_normalizeGlifNote(element, writer)
self.assertEqual(writer.getText(), "<note>\n\tBlah\n</note>")
self.assertEqual(
writer.getText(),
"<note>Don't forget to check the b\xe9ziers!!</note>")

element = ET.fromstring(
tobytes("<note>Don't forget to check the béziers!!</note>",
encoding="utf8"))
# trailing whitespace is preserved
element = ET.fromstring("<note> Blah \t\n\t </note>")
writer = XMLWriter(declaration=None)
_normalizeGlifNote(element, writer)
self.assertEqual(
writer.getText(),
"<note>\n\tDon't forget to check the b\xe9ziers!!\n</note>")
self.assertEqual(writer.getText(), "<note> Blah \t\n\t </note>")

# multiline strings are preserved
element = ET.fromstring(
tobytes("<note>A quick brown fox jumps over the lazy dog.\n"
"Příliš žluťoučký kůň úpěl ďábelské ódy.</note>",
Expand All @@ -794,64 +802,24 @@ def test_normalizeGLIF_note_defined(self):
_normalizeGlifNote(element, writer)
self.assertEqual(
writer.getText(),
"<note>\n\tA quick brown fox jumps over the lazy dog.\n\t"
"<note>A quick brown fox jumps over the lazy dog.\n"
"P\u0159\xedli\u0161 \u017elu\u0165ou\u010dk\xfd k\u016f\u0148 "
"\xfap\u011bl \u010f\xe1belsk\xe9 \xf3dy.\n</note>")

element = ET.fromstring(
"<note> Line1 \t\n\n Line3\t </note>")
writer = XMLWriter(declaration=None)
_normalizeGlifNote(element, writer)
self.assertEqual(
writer.getText(),
"<note>\n\tLine1\n\t\n\t Line3\n</note>")

# Normalizer should not indent Line2 and Line3 more than already indented
element = ET.fromstring(
"<note>\n\tLine1\n\tLine2\n\tLine3\n</note>")
writer = XMLWriter(declaration=None)
_normalizeGlifNote(element, writer)
self.assertEqual(
writer.getText(),
"<note>\n\tLine1\n\tLine2\n\tLine3\n</note>")

# Normalizer should keep the extra tab in line 2
element = ET.fromstring(
"<note>\n\tLine1\n\t\tLine2\n\tLine3\n</note>")
writer = XMLWriter(declaration=None)
_normalizeGlifNote(element, writer)
self.assertEqual(
writer.getText(),
"<note>\n\tLine1\n\t\tLine2\n\tLine3\n</note>")
"\xfap\u011bl \u010f\xe1belsk\xe9 \xf3dy.</note>")

# Normalizer should keep the extra spaces on line 2
# Everything is always preserved
element = ET.fromstring(
"<note>\n\tLine1\n\t Line2\n\tLine3\n</note>")
"<note>\n\tLine1\n\t\tLine2\n\t Line3\n</note>")
writer = XMLWriter(declaration=None)
_normalizeGlifNote(element, writer)
self.assertEqual(
writer.getText(),
"<note>\n\tLine1\n\t Line2\n\tLine3\n</note>")
"<note>\n\tLine1\n\t\tLine2\n\t Line3\n</note>")

# Normalizer should remove the extra tab all lines have in common,
# but leave the additional tab on line 2
element = ET.fromstring(
"<note>\n\t\tLine1\n\t\t\tLine2\n\t\tLine3\n</note>")
# correctly escape xml
element = ET.fromstring("<note>escape&lt;br /&gt;me!</note>")
writer = XMLWriter(declaration=None)
_normalizeGlifNote(element, writer)
self.assertEqual(
writer.getText(),
"<note>\n\tLine1\n\t\tLine2\n\tLine3\n</note>")

# Normalizer should remove the extra 4-space all lines have in common,
# but leave the additional 4-space on line 2
element = ET.fromstring(
"<note>\n Line1\n Line2\n Line3\n</note>")
writer = XMLWriter(declaration=None)
_normalizeGlifNote(element, writer)
self.assertEqual(
writer.getText(),
"<note>\n\tLine1\n\t Line2\n\tLine3\n</note>")
self.assertEqual(writer.getText(), "<note>escape&lt;br /&gt;me!</note>")

def test_normalizeGLIF_note_undefined(self):
element = ET.fromstring("<note></note>")
Expand Down

0 comments on commit 6d2e69c

Please sign in to comment.