forked from mooniak/textual-tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
hparser.py
33 lines (26 loc) · 904 Bytes
/
hparser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#
# hparser.py
#
# Copyright (c) 2015,
# Mooniak <[email protected]>
# Ayantha Randika <[email protected]>
# Improvements: https://github.com/mooniak/textual-tools
# Released under the GNU General Public License version 3 or later.
# See accompanying LICENSE file for details.
from html.parser import HTMLParser
class HTMLDataParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.page = ""
def handle_data(self, data):
if not self.lasttag == "script" and not self.lasttag == "style":
data = str(data).strip()
if not data == '\n' and not data == "b'":
self.page += data + "\n"
class HTMLUrlParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.url_list = []
def handle_starttag(self, tag, attrs):
if tag == "a":
self.url_list.append(attrs[0][1])