From e1871eafba72df8bcd6c29e5709577fd9a7e21d9 Mon Sep 17 00:00:00 2001 From: Steven Date: Sat, 13 Jul 2024 09:42:21 +0800 Subject: [PATCH] feat: impl html element node --- ast/ast.go | 1 + ast/inline.go | 28 +++++++++++++++++++- parser/html_element.go | 50 +++++++++++++++++++++++++++++++++++ parser/html_element_test.go | 31 ++++++++++++++++++++++ parser/parser.go | 1 + parser/parser_test.go | 23 ++++++++++++++++ parser/tokenizer/tokenizer.go | 3 +++ 7 files changed, 136 insertions(+), 1 deletion(-) create mode 100644 parser/html_element.go create mode 100644 parser/html_element_test.go diff --git a/ast/ast.go b/ast/ast.go index 17c5088..f5bb4d2 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -37,6 +37,7 @@ const ( SuperscriptNode NodeType = "SUPERSCRIPT" ReferencedContentNode NodeType = "REFERENCED_CONTENT" SpoilerNode NodeType = "SPOILER" + HTMLElementNode NodeType = "HTML_ELEMENT" ) type Node interface { diff --git a/ast/inline.go b/ast/inline.go index 9df02a0..7e47102 100644 --- a/ast/inline.go +++ b/ast/inline.go @@ -1,6 +1,9 @@ package ast -import "fmt" +import ( + "fmt" + "strings" +) type BaseInline struct { BaseNode @@ -267,3 +270,26 @@ func (*Spoiler) Type() NodeType { func (n *Spoiler) Restore() string { return fmt.Sprintf("||%s||", n.Content) } + +type HTMLElement struct { + BaseInline + + TagName string + Attributes map[string]string +} + +func (*HTMLElement) Type() NodeType { + return HTMLElementNode +} + +func (n *HTMLElement) Restore() string { + attributes := []string{} + for key, value := range n.Attributes { + attributes = append(attributes, fmt.Sprintf(`%s="%s"`, key, value)) + } + attrStr := "" + if len(attributes) > 0 { + attrStr = " " + strings.Join(attributes, " ") + } + return fmt.Sprintf("<%s%s />", n.TagName, attrStr) +} diff --git a/parser/html_element.go b/parser/html_element.go new file mode 100644 index 0000000..8a39105 --- /dev/null +++ b/parser/html_element.go @@ -0,0 +1,50 @@ +package parser + +import ( + "slices" + + "github.com/usememos/gomark/ast" + "github.com/usememos/gomark/parser/tokenizer" +) + +type HTMLElementParser struct{} + +func NewHTMLElementParser() *HTMLElementParser { + return &HTMLElementParser{} +} + +var ( + availableHTMLElements = []string{ + "br", + } +) + +func (*HTMLElementParser) Match(tokens []*tokenizer.Token) (ast.Node, int) { + if len(tokens) < 5 { + return nil, 0 + } + if tokens[0].Type != tokenizer.LessThan { + return nil, 0 + } + tagName := tokenizer.Stringify([]*tokenizer.Token{tokens[1]}) + if !slices.Contains(availableHTMLElements, tagName) { + return nil, 0 + } + + greaterThanIndex := tokenizer.FindUnescaped(tokens, tokenizer.GreaterThan) + if greaterThanIndex+1 < 5 || tokens[greaterThanIndex-1].Type != tokenizer.Slash || tokens[greaterThanIndex-2].Type != tokenizer.Space { + return nil, 0 + } + + matchedTokens := tokens[:greaterThanIndex] + attributeTokens := matchedTokens[2 : greaterThanIndex-2] + // TODO: Implement attribute parser. + if len(attributeTokens) != 0 { + return nil, 0 + } + + return &ast.HTMLElement{ + TagName: tagName, + Attributes: make(map[string]string), + }, len(matchedTokens) +} diff --git a/parser/html_element_test.go b/parser/html_element_test.go new file mode 100644 index 0000000..0ec9b0b --- /dev/null +++ b/parser/html_element_test.go @@ -0,0 +1,31 @@ +package parser + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/usememos/gomark/ast" + "github.com/usememos/gomark/parser/tokenizer" + "github.com/usememos/gomark/restore" +) + +func TestHTMLElementParser(t *testing.T) { + tests := []struct { + text string + htmlElement ast.Node + }{ + { + text: "
", + htmlElement: &ast.HTMLElement{ + TagName: "br", + }, + }, + } + + for _, test := range tests { + tokens := tokenizer.Tokenize(test.text) + node, _ := NewHTMLElementParser().Match(tokens) + require.Equal(t, restore.Restore([]ast.Node{test.htmlElement}), restore.Restore([]ast.Node{node})) + } +} diff --git a/parser/parser.go b/parser/parser.go index 0e5ab3a..d142de1 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -79,6 +79,7 @@ var defaultInlineParsers = []InlineParser{ NewReferencedContentParser(), NewTagParser(), NewStrikethroughParser(), + NewHTMLElementParser(), NewLineBreakParser(), NewTextParser(), } diff --git a/parser/parser_test.go b/parser/parser_test.go index a733909..1efa65e 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -234,6 +234,29 @@ func TestParser(t *testing.T) { }, }, }, + { + text: "Hello\nworld
", + nodes: []ast.Node{ + &ast.Paragraph{ + Children: []ast.Node{ + &ast.Text{ + Content: "Hello", + }, + }, + }, + &ast.LineBreak{}, + &ast.Paragraph{ + Children: []ast.Node{ + &ast.Text{ + Content: "world", + }, + &ast.HTMLElement{ + TagName: "br", + }, + }, + }, + }, + }, } for _, test := range tests { diff --git a/parser/tokenizer/tokenizer.go b/parser/tokenizer/tokenizer.go index 77d9659..8d682f5 100644 --- a/parser/tokenizer/tokenizer.go +++ b/parser/tokenizer/tokenizer.go @@ -26,6 +26,7 @@ const ( Colon TokenType = ":" Caret TokenType = "^" Backslash TokenType = "\\" + Slash TokenType = "/" NewLine TokenType = "\n" Space TokenType = " " ) @@ -96,6 +97,8 @@ func Tokenize(text string) []*Token { tokens = append(tokens, NewToken(Caret, "^")) case '\\': tokens = append(tokens, NewToken(Backslash, `\`)) + case '/': + tokens = append(tokens, NewToken(Slash, "/")) case '\n': tokens = append(tokens, NewToken(NewLine, "\n")) case ' ':