Skip to content

Commit

Permalink
Slashes in tags are actually fine (#673)
Browse files Browse the repository at this point in the history
  • Loading branch information
mr-martian committed Aug 7, 2024
1 parent 29326c1 commit 0c4314a
Showing 1 changed file with 4 additions and 10 deletions.
14 changes: 4 additions & 10 deletions Utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,12 +464,8 @@
reTestID = re.compile('test id=".+?"')
reSpace = re.compile(r'\s')
rePeriod = re.compile(r'\.')
reForwardSlash = re.compile(r'/')
reHyphen = re.compile(r'-')
reAsterisk = re.compile(r'\*')
# the key is to find non-left or right angle brackets for the alphanumeric parts of the symbol
reSlashInSymbol = re.compile(r'(="[^<>=]+?)(/)([^<>=]+?")')
reSLASHInSymbol = re.compile(r'(<[^<>]+?)SLASH([^<>]+?>)')
reDoubleNewline = re.compile(r'\n\n')

NGRAM_SIZE = 5
Expand All @@ -496,19 +492,17 @@
# Invalid category characters & descriptions & messages & replacements
catProbData = [['space', 'converted to an underscore', '_', reSpace],
['period', 'removed', '', rePeriod],
['slash', 'converted to a vertical bar', '|', reForwardSlash]
# ['x char', 'fatal', '']
]

lemmaProbData = [['asterisk', 'converted to an underscore', '_', reAsterisk]
]

bilingFixSymbProbData = [['slash', 'converted to SLASH', r'\1SLASH\3', reSlashInSymbol]
]
bilingFixSymbProbData = []

bilingUnFixSymbProbData = [['SLASH', 'converted to slash', r'\1/\2', reSLASHInSymbol],
['double newline', 'converted to single newline', r'\n', reDoubleNewline]
]
bilingUnFixSymbProbData = [
['double newline', 'converted to single newline', r'\n', reDoubleNewline]
]

def convertProblemChars(convertStr, problemDataList):

Expand Down

0 comments on commit 0c4314a

Please sign in to comment.