This repository has been archived by the owner on Aug 15, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
latex_math.py
289 lines (235 loc) · 10.2 KB
/
latex_math.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
"""
Licensed under Public Domain Mark 1.0.
See http://creativecommons.org/publicdomain/mark/1.0/
Author: Justin Bruce Van Horne <[email protected]>
"""
"""
Python-Markdown LaTeX Extension
Adds support for $math mode$ and %text mode%. This plugin supports
multiline equations/text.
The actual image generation is done via LaTeX/DVI output.
It encodes data as base64 so there is no need for images directly.
All the work is done in the preprocessor.
"""
import re
import os
import string
import base64
import tempfile
import markdown
from subprocess import call, PIPE
# Defines our basic inline image
IMG_EXPR = "<img class=\"latex-inline math-%s\" alt=\"%s\" id=\"%s\" src=\"data:image/svg+xml,%s\">"
# Base CSS template
IMG_CSS = "<style scoped>img.latex-inline { vertical-align: middle; }</style>\n"
class LaTeXPreprocessor(markdown.preprocessors.Preprocessor):
# These are our cached expressions that are stored in latex.cache
cached = {}
# Basic LaTex Setup as well as our list of expressions to parse
tex_preamble = r"""\documentclass[a3paper]{article}
\usepackage{amsmath}
\usepackage[left=0cm,top=0cm,right=0cm, bottom=0cm]{geometry}
\usepackage{amsthm}
\usepackage{amssymb}
\usepackage{bm}
\usepackage{relsize}
\usepackage[usenames,dvipsnames]{color}
\pagestyle{empty}
"""
def __init__(self, configs):
try:
cache_file = open('latex.cache', 'r+')
for line in cache_file.readlines():
key, val = line.strip("\n").split("$")
self.cached[key] = val
except IOError:
pass
self.config = {}
self.config[("general", "preamble")] = ""
self.config[("dvipng", "args")] = "-q -T tight -bg Transparent -z 9 -D 106"
self.config[("dvisvgm", "args")] = "--bbox=min --exact --mag=0.7"
self.config[("delimiters", "text")] = "%"
self.config[("delimiters", "math")] = "$"
self.config[("delimiters", "preamble")] = "%%"
self.config[("delimiters", "equation")] = "$$"
try:
import ConfigParser
cfgfile = ConfigParser.RawConfigParser()
cfgfile.read('markdown-latex.cfg')
for sec in cfgfile.sections():
for opt in cfgfile.options(sec):
self.config[(sec, opt)] = cfgfile.get(sec, opt)
except ConfigParser.NoSectionError:
pass
def build_regexp_single(delim):
delim = re.escape(delim)
regexp = '(?:[^{1}]){1}([^{1}]*){1}|{1}([^{1}]*){1}(?:[^{1}])|^{}([^{1}]*){1}'.format(delim)
return re.compile(regexp, re.MULTILINE | re.DOTALL)
def build_regexp_double(delim):
delim = re.escape(delim)
# (?<!\{)\{\w+\}(?!\}) matches {foo}, but not {{foo}}
regexp = r'(?<!\\)' + delim + r'(.+?)(?<!\\)' + delim
return re.compile(regexp, re.MULTILINE | re.DOTALL)
# %TEXT% mode which is the default LaTeX mode.
self.re_textmode = re.compile(r'(?:[^\%])\%([^\%]*)\%|\%([^\%]*)\%(?:[^\%])|^\%([^\%]*)\%', re.MULTILINE | re.DOTALL)
# $MATH$ mode which is the typical LaTeX math mode.
self.re_mathmode = re.compile(r'(?:[^\$])\$([^\$]*)\$|\$([^\$]*)\$(?:[^\$])|^\$([^\$]*)\$', re.MULTILINE | re.DOTALL)
# $$EQUATION$$ mode which is the typical LaTeX equation mode.
self.re_equationmode = build_regexp_double(self.config[("delimiters", "equation")])
# %%PREAMBLE%% text that modifys the LaTeX preamble for the document
self.re_preamblemode = build_regexp_double(self.config[("delimiters", "preamble")])
"""The TeX preprocessor has to run prior to all the actual processing
and can not be parsed in block mode very sanely."""
def _latex_to_svg(self, tex, mode):
"""Generates a SVG representation of TeX string"""
# Generate the temporary file
tempfile.tempdir = ""
tmp_file_fd, path = tempfile.mkstemp()
tmp_file = os.fdopen(tmp_file_fd, "w")
tmp_file.write(self.tex_preamble)
# Figure out the mode that we're in
if mode == "math":
ftex = "\\relscale{1.1}\n$ %s $" % tex
elif mode == "equation":
ftex = "\\relscale{1.5}\n\[ %s \]" % tex
else:
ftex = "%s" % tex
tmp_file.write(ftex)
tmp_file.write('\n\end{document}')
tmp_file.close()
# compile LaTeX document. A DVI file is created
status = call(('pdflatex -halt-on-error -output-format pdf %s' % path).split(), stdout=PIPE)
# clean up if the above failed
if status:
self._cleanup(path, err=True)
raise Exception("Couldn't compile LaTeX document." +
"Please read '%s.log' for more detail." % path)
# Run dvipng on the generated DVI file. Use tight bounding box.
# Magnification is set to 1200
dvi = "%s.dvi" % path
pdf = "%s.pdf" % path
pdf_crp = "%s-crop.pdf" % path
png = "%s.png" % path
svg = "%s.svg" % path
svgo = "%s-opt.svg" % path
cmd = "pdfcrop %s" % (pdf)
status = call(cmd.split(), stdout=PIPE)
# clean up if we couldn't make the above work
if status:
self._cleanup(path, err=True)
raise Exception("Couldn't crop PDF-LaTeX." +
"Please read '%s.log' for more detail." % path)
cmd = "pdf2svg %s %s" % (pdf_crp, svg)
status = call(cmd.split(), stdout=PIPE)
# clean up if we couldn't make the above work
if status:
self._cleanup(path, err=True)
raise Exception("Couldn't convert cropped PDF-LaTeX to SVG." +
"Please read '%s.log' for more detail." % path)
cmd = "svgo %s %s" % (svg, svgo)
status = call(cmd.split(), stdout=PIPE)
# clean up if we couldn't make the above work
if status:
self._cleanup(path, err=True)
raise Exception("Couldn't optimize SVG." +
"Please read '%s.log' for more detail." % path)
# Read the png and encode the data
svg = open(svgo, "rb")
data = svg.read() \
.replace('\n', '') \
.replace('"', "'") \
.replace('%', '%25') \
.replace('>', '%3E') \
.replace('<', '%3C') \
.replace('#', '%23')
svg.close()
self._cleanup(path)
return data
def _cleanup(self, path, err=False):
# don't clean up the log if there's an error
extensions = ["", ".aux", "-crop.pdf", ".pdf", "-opt.svg", ".svg", ".log"]
if err:
extensions.pop()
# now do the actual cleanup, passing on non-existent files
for extension in extensions:
try:
os.remove("%s%s" % (path, extension))
except (IOError, OSError):
pass
def run(self, lines):
"""Parses the actual page"""
# Re-creates the entire page so we can parse in a multine env.
page = "\n".join(lines)
# Adds a preamble mode
self.tex_preamble += self.config[("general", "preamble")]
preambles = self.re_preamblemode.findall(page)
for preamble in preambles:
self.tex_preamble += preamble + "\n"
page = self.re_preamblemode.sub("", page, 1)
self.tex_preamble += "\n\\begin{document}"
# Figure out our text strings and math-mode strings
tex_expr = [(self.re_textmode, "text", x) for x in self.re_textmode.findall(page)]
tex_expr += [(self.re_equationmode, "equation", x) for x in self.re_equationmode.findall(page)]
# for x in self.re_mathmode.findall(page):
# if x[0] != "":
# print "math:", len(x), x[0]
# tex_expr.append( (self.re_mathmode, "math", x[0]) )
tex_expr += [(self.re_mathmode, "math", x[0]) for x in self.re_mathmode.findall(page) if x[0] != '']
# No sense in doing the extra work
if not len(tex_expr):
return page.split("\n")
# Parse the expressions
new_cache = {}
id = 0
for reg, mode, expr in tex_expr:
# print reg, mode, expr
b64_expr = base64.b64encode(expr)
simp_expr = filter(unicode.isalnum, expr)
if b64_expr in self.cached:
data = self.cached[b64_expr]
else:
data = self._latex_to_svg(expr, mode)
new_cache[b64_expr] = data
expr = expr.replace('"', "").replace("'", "")
id += 1
img = IMG_EXPR % (
'true' if mode in ['math', 'equation'] else 'false',
simp_expr,
simp_expr[:15] + "_" + str(id),
data)
# print img
page = reg.sub(img, page, 1)
# Perform the escaping of delimiters and the backslash per se
tokens = []
tokens += [self.config[("delimiters", "preamble")]]
tokens += [self.config[("delimiters", "text")]]
tokens += [self.config[("delimiters", "math")]]
tokens += ['\\']
for tok in tokens:
page = page.replace('\\' + tok, tok)
# Cache our data
cache_file = open('latex.cache', 'a')
for key, value in new_cache.items():
cache_file.write("%s$%s\n" % (key, value))
cache_file.close()
# Make sure to resplit the lines
return page.split("\n")
class LaTeXPostprocessor(markdown.postprocessors.Postprocessor):
"""This post processor extension just allows us to further
refine, if necessary, the document after it has been parsed."""
def run(self, text):
# Inline a style for default behavior
text = IMG_CSS + text
return text
class MarkdownLatex(markdown.Extension):
"""Wrapper for LaTeXPreprocessor"""
def extendMarkdown(self, md, md_globals):
# Our base LaTeX extension
md.preprocessors.add('latex',
LaTeXPreprocessor(self), ">html_block")
# Our cleanup postprocessing extension
md.postprocessors.add('latex',
LaTeXPostprocessor(self), ">amp_substitute")
def makeExtension(*args, **kwargs):
"""Wrapper for a MarkDown extension"""
return MarkdownLatex(*args, **kwargs)