-
Notifications
You must be signed in to change notification settings - Fork 4
/
jinja_filters.py
66 lines (44 loc) · 2.09 KB
/
jinja_filters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# -*- coding: utf-8 -*-
from lxml import html
import lxml.etree as le
###############################################################################
"""
New filters must be added to TEMPLATE_FILTERS in conf.py
"""
###############################################################################
def get_lead_text (post_text):
""" Extract the lead text from a post
The lead text is identified by a <blockquote> with the 'highlights' class"""
tree = html.fromstring(post_text)
try:
# http://stackoverflow.com/questions/29887576/xpath-extract-current-node-content-including-all-child-node
highlights = tree.xpath('//blockquote[@class="highlights"]')
lead_text = highlights[0].text + ''.join(le.tostring(node, encoding='unicode') for node in highlights[0])
except IndexError:
lead_text = ''
return lead_text
###############################################################################
def get_lead_figure (post_text):
""" Extract the lead figure from a post
This is similar to get_lead_text except for the lead figure.
The lead figure is identified by a <figure> with the 'lead-figure' class"""
tree = html.fromstring(post_text)
try:
node = tree.xpath('//figure[@class="lead-figure"]')[0]
lead_fig = node.text + ''.join(le.tostring(e, encoding='unicode') for e in node)
except IndexError:
lead_fig = '<p>ERROR</p>'
return lead_fig
###############################################################################
def remove_lead_figure (post_text):
""" Remove the lead figure from a post
This is the opposite of get_lead_figure for when we want to handle them separately.
The lead figure is identified by a <figure> with the 'lead-figure' class"""
tree = html.fromstring(post_text)
try:
node = tree.xpath('//figure[@class="lead-figure"]')[0]
node.getparent().remove(node)
except IndexError:
return le.tostring(tree, encoding='unicode')
return le.tostring(tree, encoding='unicode')
###############################################################################