-
Notifications
You must be signed in to change notification settings - Fork 0
/
embed_html.py
131 lines (111 loc) · 4.51 KB
/
embed_html.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env python
# -*- coding: utf-8 vi:noet
# Embed a Matlab-generated HTML file into .rst
import sys, os, re, io, shutil, argparse
if sys.hexversion < 0x03000000:
from HTMLParser import HTMLParser
else:
from html.parser import HTMLParser
class MyHTMLParser(HTMLParser):
"""
Ad-hoc Matlab HTML parser that extracts the useful HTML payload,
insertable in an existing HTML document.
It looks like everything in the first <div> is desired.
The image content is copied to the destination directory.
"""
def __init__(self, out, dstdir, srcdir):
HTMLParser.__init__(self)
self._out = out
self._divlevel = 0
self._srcdir = srcdir
self._dstdir = dstdir
def handle_starttag(self, tag, attrs):
if tag == "div":
self._divlevel += 1
if tag == "img":
dattrs = dict(attrs)
src = dattrs["src"]
srcpath = os.path.join(self._srcdir, src)
dstpath = os.path.join(self._dstdir, "_static", src)
if os.path.exists(srcpath):
shutil.copy(srcpath, dstpath)
dattrs["src"] = "_static/%s" % src
attrs = [ x for x in dattrs.items() ]
if self._divlevel > 0:
self._out.write(("<%s %s>" % (tag, " ".join(['%s="%s"' % (k, v) for k, v in attrs]))))
def handle_endtag(self, tag):
if self._divlevel > 0:
self._out.write(("</%s>" % tag))
if tag == "div":
self._divlevel -= 1
def handle_data(self, data):
if self._divlevel > 0:
self._out.write(data)
if __name__ == "__main__":
dst = sys.argv[1]
dstdir = os.path.dirname(dst)
src = sys.argv[2]
srcdir = os.path.dirname(src)
title = sys.argv[3]
with io.open(dst, "wb") as fo:
fo.write(title.encode())
fo.write(b"\n")
fo.write(b"=" * len(title))
fo.write(b"\n")
fo.write(b"\n")
fo.write(b".. raw:: html\n")
fo.write(b"\n \n")
with io.open(src, "rb") as fi:
data = fi.read().decode("utf-8")
# This is taken from the Matlab HTML stylesheet, but scoped
sbuf = io.StringIO()
sbuf.write(u"""<style type="text/css">
.content { font-size:1.0em; line-height:140%; padding: 20px; }
.content p { padding:0px; margin:0px 0px 20px; }
.content img { padding:0px; margin:0px 0px 20px; border:none; }
.content p img, pre img, tt img, li img, h1 img, h2 img { margin-bottom:0px; }
.content ul { padding:0px; margin:0px 0px 20px 23px; list-style:square; }
.content ul li { padding:0px; margin:0px 0px 7px 0px; }
.content ul li ul { padding:5px 0px 0px; margin:0px 0px 7px 23px; }
.content ul li ol li { list-style:decimal; }
.content ol { padding:0px; margin:0px 0px 20px 0px; list-style:decimal; }
.content ol li { padding:0px; margin:0px 0px 7px 23px; list-style-type:decimal; }
.content ol li ol { padding:5px 0px 0px; margin:0px 0px 7px 0px; }
.content ol li ol li { list-style-type:lower-alpha; }
.content ol li ul { padding-top:7px; }
.content ol li ul li { list-style:square; }
.content pre, code { font-size:11px; }
.content tt { font-size: 1.0em; }
.content pre { margin:0px 0px 20px; }
.content pre.codeinput { padding:10px; border:1px solid #d3d3d3; background:#f7f7f7; overflow-x:scroll}
.content pre.codeoutput { padding:10px 11px; margin:0px 0px 20px; color:#4c4c4c; white-space: pre-wrap; white-space: -moz-pre-wrap; white-space: -pre-wrap; white-space: -o-pre-wrap; word -wrap: break-word;}
.content pre.error { color:red; }
.content @media print { pre.codeinput, pre.codeoutput { word-wrap:break-word; width:100%; } }
.content span.keyword { color:#0000FF }
.content span.comment { color:#228B22 }
.content span.string { color:#A020F0 }
.content span.untermstring { color:#B20000 }
.content span.syscmd { color:#B28C00 }
.content .footer { width:auto; padding:10px 0px; margin:25px 0px 0px; border-top:1px dotted #878787; font-size:0.8em; line-height:140%; font-style:italic; color:#878787; text-align:left; float:none; }
.content .footer p { margin:0px; }
.content .footer a { color:#878787; }
.content .footer a:hover { color:#878787; text-decoration:underline; }
.content .footer a:visited { color:#878787; }
.content table th { padding:7px 5px; text-align:left; vertical-align:middle; border: 1px solid #d6d4d4; font-weight:bold; }
.content table td { padding:7px 5px; text-align:left; vertical-align:top; border:1px solid #d6d4d4; }
::-webkit-scrollbar {
-webkit-appearance: none;
width: 4px;
height: 5px;
}
::-webkit-scrollbar-thumb {
border-radius: 5px;
background-color: rgba(0,0,0,.5);
-webkit-box-shadow: 0 0 1px rgba(255,255,255,.5);
}
</style>""")
parser = MyHTMLParser(sbuf, dstdir, srcdir)
parser.feed(data)
for line in sbuf.getvalue().splitlines():
fo.write((" %s\n" % line).encode("utf-8"))
fo.close()