Skip to content

Commit

Permalink
Fix #79 :: Dom handler (PR #86)
Browse files Browse the repository at this point in the history
* correctly set a ROOT node as the first node of the dom object.
set the DECL node as the first child node of ROOT.
fix indent in dom:toXml().

* fix detection of an empty ELEMENT node.

* XmlParser: fix parsing of a DTD element.
dom handler: fix handling a DTD element.

* Fix dom.lua docs

* Rename people.xml to people1.xml
Moves complex tags (DOCTYPE and CDATA)
to people2.xml to make people1 a basic XML.

* Restructure example5.lua to
parse the XML files whose names
are defined by an array inside the example,
instead of receiving them in the STDIN.

This way, the entry to run this example
inside the Makefile were removed.
Running a specific example inside the Makefile
is too specific.

If that was for test purposes, they should be inside the test files.

---------

Signed-off-by: Manoel Campos <[email protected]>
Co-authored-by: Sudheer Hebbale <[email protected]>
Co-authored-by: Manoel Campos <[email protected]>
  • Loading branch information
3 people authored Feb 3, 2023
1 parent f6cf04b commit 7e36b2e
Show file tree
Hide file tree
Showing 10 changed files with 204 additions and 34 deletions.
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,7 @@ test:
lint:
docker-compose run --rm lint

.PHONY: lint test all
clean:
find . -name '*~' -delete

.PHONY: lint test all example5 clean
6 changes: 3 additions & 3 deletions README.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ local xml = [[
<person type="legal">
<name>University of Brasília</name>
<city>Brasília-DF</city>
</person>
</people>
</person>
</people>
]]
--Instantiates the XML parser
Expand Down Expand Up @@ -121,7 +121,7 @@ Execute `lua testxml.lua -help` on the terminal for more details.

== Running tests

=== Requeriments
=== Requirements

You must have https://docs.docker.com/compose/install/[installed docker and docker compose].

Expand Down
8 changes: 4 additions & 4 deletions XmlParser.lua
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ local function hexadecimalToHtmlChar(code)
end

local XmlParser = {
-- Private attribures/functions
-- Private attributes/functions
_XML = '^([^<]*)<(%/?)([^>]-)(%/?)>',
_ATTR1 = '([%w-:_]+)%s*=%s*"(.-)"',
_ATTR2 = '([%w-:_]+)%s*=%s*\'(.-)\'',
Expand All @@ -56,10 +56,10 @@ local XmlParser = {
_WS = '^%s*$',
_DTD1 = '<!DOCTYPE%s+(.-)%s+(SYSTEM)%s+["\'](.-)["\']%s*(%b[])%s*>',
_DTD2 = '<!DOCTYPE%s+(.-)%s+(PUBLIC)%s+["\'](.-)["\']%s+["\'](.-)["\']%s*(%b[])%s*>',
--_DTD3 = '<!DOCTYPE%s+(.-)%s*(%b[])%s*>',
_DTD3 = '<!DOCTYPE%s.->',
_DTD3 = '<!DOCTYPE%s+(.-)%s+%[%s+.-%]>', -- Inline DTD Schema
_DTD4 = '<!DOCTYPE%s+(.-)%s+(SYSTEM)%s+["\'](.-)["\']%s*>',
_DTD5 = '<!DOCTYPE%s+(.-)%s+(PUBLIC)%s+["\'](.-)["\']%s+["\'](.-)["\']%s*>',
_DTD6 = '<!DOCTYPE%s+(.-)%s+(PUBLIC)%s+["\'](.-)["\']%s*>',

--Matches an attribute with non-closing double quotes (The equal sign is matched non-greedly by using =+?)
_ATTRERR1 = '=+?%s*"[^"]*$',
Expand Down Expand Up @@ -246,7 +246,7 @@ end

local function _parseDtd(self, xml, pos)
-- match,endMatch,root,type,name,uri,internal
local dtdPatterns = {self._DTD1, self._DTD2, self._DTD3, self._DTD4, self._DTD5}
local dtdPatterns = {self._DTD1, self._DTD2, self._DTD3, self._DTD4, self._DTD5, self._DTD6}

for _, dtd in pairs(dtdPatterns) do
local m,e,r,t,n,u,i = string.find(xml, dtd, pos)
Expand Down
3 changes: 2 additions & 1 deletion books.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
<?xml version="1.0"?>
<!-- Source: https://msdn.microsoft.com/en-us/library/ms762271(v=vs.85).aspx -->
<!DOCTYPE name PUBLIC "-//Beginning XML//DTD Address Example//EN">
<catalog>
<book>
<author>Gambardella, Matthew</author>
Expand Down Expand Up @@ -30,4 +31,4 @@
society in England, the young survivors lay the
foundation for a new society.</description>
</book>
</catalog>
</catalog>
2 changes: 1 addition & 1 deletion example1.lua
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ print("xml2lua v" .. xml2lua._VERSION.."\n")
local handler = require("xmlhandler.tree")


local xml = xml2lua.loadFile("people.xml")
local xml = xml2lua.loadFile("people1.xml")

--Instantiates the XML parser
local parser = xml2lua.parser(handler)
Expand Down
6 changes: 3 additions & 3 deletions example2.lua
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ print("xml2lua v" .. xml2lua._VERSION.."\n")
--Uses a handler that converts the XML to a Lua table
local handler = require("xmlhandler.tree")

----------------------- people.xml parse code -----------------------
print("people.xml")
----------------------- people1.xml parse code -----------------------
print("people1.xml")
local peopleHandler = handler:new()
local peopleParser = xml2lua.parser(peopleHandler)
peopleParser:parse(xml2lua.loadFile("people.xml"))
peopleParser:parse(xml2lua.loadFile("people1.xml"))
xml2lua.printable(peopleHandler.root)

----------------------- books.xml parse code -----------------------
Expand Down
20 changes: 20 additions & 0 deletions example5.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env lua
-- Read XML documents containing DOCTYPE and CDATA tags,
-- parse with the dom parser,
-- print the XML documents to STDOUT.
local xml2lua = require("xml2lua")
local xmlhandler = require("xmlhandler.dom")

local files = {"books.xml", "people2.xml"}
for _, file in ipairs(files) do
print(file, "-----------------------------------------------------------")
local xml = xml2lua.loadFile(file)
local dom = xmlhandler:new()
local parser = xml2lua.parser(dom)
parser:parse(xml)
if not dom.root then
print("parsing ", file , " as XML failed")
else
print(dom:toXml(dom.root))
end
end
20 changes: 20 additions & 0 deletions people1.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?xml version="1.0" encoding="UTF-8"?>
<people>
<person type="natural">
<!-- Just an example comment that will be ignored by the tree
handler and processed by the other ones. -->

<name>Manoel</name>
<city>Palmas-TO</city>
</person>
<person type="natural">
<name>Breno</name>
<city>Palmas-TO</city>
</person>
<person type="legal">
<name>University of Brasília</name>
<city>Brasília-DF</city>
<empty></empty>
<void/>
</person>
</people>
15 changes: 12 additions & 3 deletions people.xml → people2.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- A more complex people XML with DOCTYPE and CDATA tags -->

<!DOCTYPE person [
<!ELEMENT person (name,city,empty,void)>
<!ELEMENT name (#PCDATA)>
<!ELEMENT city (#PCDATA)>
<!ELEMENT void (#PCDATA)>
<!ELEMENT empty (#PCDATA)>
]>
<people>
<person type="natural">
<![CDATA[
Expand All @@ -7,8 +16,6 @@
Its content is extracted but not processed.
]]>

<!-- Just an example comment that will be ignored by the tree handler and processed by the other ones. -->

<name>Manoel</name>
<city>Palmas-TO</city>
</person>
Expand All @@ -19,5 +26,7 @@
<person type="legal">
<name>University of Brasília</name>
<city>Brasília-DF</city>
</person>
<empty></empty>
<void/>
</person>
</people>
153 changes: 135 additions & 18 deletions xmlhandler/dom.lua
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ function dom:starttag(tag)
_children = {}
}

if self.root == nil then
if not self.root then
self.root = node
end

Expand All @@ -76,7 +76,7 @@ end
-- @param tag a {name, attrs} table
-- where name is the name of the tag and attrs
-- is a table containing the attributes of the tag
function dom:endtag(tag, s)
function dom:endtag(tag)
--Table representing the containing tag of the current tag
local prev = self._stack[#self._stack]

Expand All @@ -86,6 +86,22 @@ function dom:endtag(tag, s)

table.remove(self._stack)
self.current = self._stack[#self._stack]
if not self.current then
local node = { _children = {}, _type = "ROOT" }
if self.decl then
table.insert(node._children, self.decl)
self.decl = nil
end
if self.dtd then
table.insert(node._children, self.dtd)
self.dtd = nil
end
if self.root then
table.insert(node._children, self.root)
self.root = node
end
self.current = node
end
end

---Parses a tag content.
Expand Down Expand Up @@ -127,27 +143,128 @@ end
-- where name is the name of the tag and attrs
-- is a table containing the attributes of the tag
function dom:decl(tag)
if self.options.declNode then
local node = { _type = "DECL",
_name = tag.name,
_attr = tag.attrs,
}
table.insert(self.current._children, node)
end
if self.options.declNode then
self.decl = { _type = "DECL",
_name = tag.name,
_attr = tag.attrs,
}
end
end

---Parses a DTD tag.
-- @param tag a {name, attrs} table
-- where name is the name of the tag and attrs
-- @param tag a {name, value} table
-- where name is the name of the tag and value
-- is a table containing the attributes of the tag
function dom:dtd(tag)
if self.options.dtdNode then
local node = { _type = "DTD",
_name = tag.name,
_attr = tag.attrs,
}
table.insert(self.current._children, node)
end
if self.options.dtdNode then
self.dtd = { _type = "DTD",
_name = tag.name,
_text = tag.value
}
end
end

--- XML escape characters for a TEXT node.
-- @param s a string
-- @return @p s XML escaped.
local function xmlEscape(s)
s = string.gsub(s, '&', '&amp;')
s = string.gsub(s, '<', '&lt;')
return string.gsub(s, '>', '&gt;')
end

--- return a string of XML attributes
-- @param tab table with XML attribute pairs. key and value are supposed to be strings.
-- @return a string.
local function attrsToStr(tab)
if not tab then
return ''
end
if type(tab) == 'table' then
local s = ''
for n,v in pairs(tab) do
-- determine a safe quote character
local val = tostring(v)
local found_single_quote = string.find(val, "'")
local found_double_quote = string.find(val, '"')
local quot = '"'
if found_single_quote and found_double_quote then
-- XML escape both quote characters
val = string.gsub(val, '"', '&quot;')
val = string.gsub(val, "'", '&apos;')
elseif found_double_quote then
quot = "'"
end
s = ' ' .. tostring(n) .. '=' .. quot .. val .. quot
end
return s
end
return 'BUG:unknown type:' .. type(tab)
end

--- return a XML formatted string of @p node.
-- @param node a Node object (table) of the xml2lua DOM tree structure.
-- @return a string.
local function toXmlStr(node, indentLevel)
if not node then
return 'BUG:node==nil'
end
if not node._type then
return 'BUG:node._type==nil'
end

local indent = ''
for i=0, indentLevel+1, 1 do
indent = indent .. ' '
end

if node._type == 'ROOT' then
local s = ''
for i, n in pairs(node._children) do
s = s .. toXmlStr(n, indentLevel+2)
end
return s
elseif node._type == 'ELEMENT' then
local s = indent .. '<' .. node._name .. attrsToStr(node._attr)

-- check if ELEMENT has no children
if not node._children or
#node._children == 0 then
return s .. '/>\n'
end

s = s .. '>\n'

for i, n in pairs(node._children) do
local xx = toXmlStr(n, indentLevel+2)
if not xx then
print('BUG:xx==nil')
else
s = s .. xx
end
end

return s .. indent .. '</' .. node._name .. '>\n'

elseif node._type == 'TEXT' then
return indent .. xmlEscape(node._text) .. '\n'
elseif node._type == 'COMMENT' then
return indent .. '<!--' .. node._text .. '-->\n'
elseif node._type == 'PI' then
return indent .. '<?' .. node._name .. ' ' .. node._attr._text .. '?>\n'
elseif node._type == 'DECL' then
return indent .. '<?' .. node._name .. attrsToStr(node._attr) .. '?>\n'
elseif node._type == 'DTD' then
return indent .. '<!' .. node._name .. ' ' .. node._text .. '>\n'
end
return 'BUG:unknown type:' .. tostring(node._type)
end

---create a string in XML format from the dom root object @p node.
-- @param node a root object, typically created with `dom` XML parser handler.
-- @return a string, XML formatted.
function dom:toXml(node)
return toXmlStr(node, -4)
end

---Parses CDATA tag content.
Expand Down

0 comments on commit 7e36b2e

Please sign in to comment.