Skip to content

Commit

Permalink
RFC: html: add parse_html
Browse files Browse the repository at this point in the history
Signed-off-by: Florian Scherf <[email protected]>
  • Loading branch information
fscherf committed Jul 7, 2023
1 parent d0b50b9 commit fb9dbac
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 12 deletions.
2 changes: 1 addition & 1 deletion lona/html/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,13 @@
from lona.html.nodes.scripting import NoScript, Script, Canvas
from lona.html.nodes.forms.select2 import Select2, Option2
from lona.html.nodes.web_components import Template, Slot
from lona.html.parsing import NodeHTMLParser, parse_html
from lona.html.nodes.forms.select import Select, Option
from lona.html.nodes.demarcating_edits import Ins, Del
from lona.html.nodes.svg_and_mathml import Math, SVG
from lona.events.event_types import * # NOQA: F403
from lona.html.nodes.sectioning_root import Body
from lona.html.nodes.raw_nodes import RawHTML
from lona.html.parsing import NodeHTMLParser
from lona.html.widgets import HTML as HTML1
from lona.html.parsing import HTML as HTML2
from lona.compat import get_client_version
Expand Down
31 changes: 22 additions & 9 deletions lona/html/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
tagfind_tolerant,
HTMLParser,
)
from typing import List, Dict
from typing import List, Dict, cast
from html import unescape
import logging

Expand Down Expand Up @@ -198,11 +198,15 @@ def handle_endtag(self, tag):
self.set_current_node(self._node.parent)


def html_string_to_node_list(html_string, use_high_level_nodes=True,
node_classes=None):
def parse_html(
html_string: str,
use_high_level_nodes: bool = True,
node_classes: Dict[str, AbstractNode] | None = None,
flat: bool = True,
) -> AbstractNode | List[AbstractNode]:

root_node = Node()
nodes = []
root_node: Node = Node()
nodes: List[AbstractNode] = []

html_parser = NodeHTMLParser(
use_high_level_nodes=use_high_level_nodes,
Expand All @@ -221,6 +225,9 @@ def html_string_to_node_list(html_string, use_high_level_nodes=True,
node.remove()
nodes.append(node)

if flat and len(nodes) == 1:
return nodes[0]

return nodes


Expand All @@ -230,6 +237,8 @@ def HTML(
node_classes: Dict[str, AbstractNode] | None = None,
) -> AbstractNode:

# TODO: remove HTML parsing in 2.0

_nodes: List[AbstractNode] = []

for node in nodes:
Expand All @@ -243,10 +252,14 @@ def HTML(

# html string
elif '<' in node or '>' in node:
parsed_nodes = html_string_to_node_list(
html_string=node,
use_high_level_nodes=use_high_level_nodes,
node_classes=node_classes or {},
parsed_nodes = cast(
list,
parse_html(
html_string=node,
use_high_level_nodes=use_high_level_nodes,
node_classes=node_classes or {},
flat=False,
),
)

if len(nodes) > 1:
Expand Down
5 changes: 3 additions & 2 deletions lona/html/widgets.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from lona.html.parsing import html_string_to_node_list
from lona.html.text_node import TextNode
from lona.html.parsing import parse_html
from lona.html.widget import Widget


Expand All @@ -22,10 +22,11 @@ def __init__(self, *nodes, use_high_level_nodes=True, node_classes=None):
self.nodes.append(HTML(node))

else:
self.nodes = html_string_to_node_list(
self.nodes = parse_html(
html_string=node,
use_high_level_nodes=use_high_level_nodes,
node_classes=node_classes or {},
flat=False,
)

else:
Expand Down

0 comments on commit fb9dbac

Please sign in to comment.