Skip to content

Commit

Permalink
Merge pull request #10 from eeditiones/edep
Browse files Browse the repository at this point in the history
Refactor TEI output mode
  • Loading branch information
wolfgangmm authored Oct 11, 2023
2 parents 2397c1b + 67bf88a commit 90d713e
Show file tree
Hide file tree
Showing 4 changed files with 198 additions and 161 deletions.
172 changes: 172 additions & 0 deletions content/ext-docx.xql
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
xquery version "3.1";

(:~
: Extension functions for docx to TEI.
:)
module namespace pmf="http://www.tei-c.org/tei-simple/xquery/functions/docx";

declare namespace tf = "http://existsolutions.com/xquery/functions/tei";

declare namespace tei="http://www.tei-c.org/ns/1.0";

declare variable $pmf:INLINE_ELEMENTS := (
"hi", "supplied", "persName", "placeName", "term"
);

declare function pmf:finish($config as map(*), $input as node()*) {
pmf:create-divisions(pmf:combine($input))
(: $input :)
};

declare %private function pmf:create-divisions($tei as element(tei:TEI)) {
let $body := $tei/tei:text/tei:body
let $firstHead := $body/tei:head[1]
return
if ($firstHead) then
<TEI xmlns="http://www.tei-c.org/ns/1.0">
{ $tei/tei:teiHeader }
<text>
<body>
{
$body/@*,
pmf:wrap-divisions($body/node())
}
</body>
</text>
</TEI>
else
$tei
};

(:~
: Wrap headings and following text into a hierarchy of divisions using a "tumbling window" approach.
:)
declare %private function pmf:wrap-divisions($body-nodes as node()*) {
if ($body-nodes) then
let $this := $body-nodes => head()
let $rest := $body-nodes => tail()
return
if ($this instance of element(tei:head)) then
let $level := number(head(($this/@tf:level, 0)))
let $next-window-start := $this/following-sibling::tei:head[@tf:level <= $level] => head()
let $next-window := $body-nodes[. is $next-window-start or . >> $next-window-start]
let $this-window-rest :=
if ($next-window) then
$body-nodes[. >> $this and . << $next-window-start]
else
$body-nodes[. >> $this]
return
(
<div xmlns="http://www.tei-c.org/ns/1.0">
<head>
{
$this/@* except $this/@tf:level,
$this/node()
}
</head>
{ $this-window-rest => pmf:wrap-divisions() }
</div>,
$next-window => pmf:wrap-divisions()
)

else
(
$this,
$rest => pmf:wrap-divisions()
)
else
()
};

declare %private function pmf:wrap-list($items as element()*) {
if ($items) then
let $item := head($items)
return
let $nested :=
pmf:get-following-nested($item/following-sibling::*, (), $item/@tf:level)
return (
<item xmlns="http://www.tei-c.org/ns/1.0">
<p>{ $item/node() }</p>
{
if ($nested) then
<list>
{ if ($nested[1]/@tf:type) then attribute type { $nested[1]/@tf:type } else () }
{ pmf:wrap-list($nested) }
</list>
else
()
}
</item>,
pmf:wrap-list(tail($items) except $nested)
)
else
()
};

declare %private function pmf:get-following($nodes as node()*, $name as xs:string, $siblings as node()*,
$level as item()?) {
let $node := head($nodes)
return
if (local-name($node) = $name and (empty($level) or number($node/@tf:level) >= number($level))) then
pmf:get-following(tail($nodes), $name, ($siblings, $node), $level)
else
$siblings
};

declare %private function pmf:get-following-nested($nodes as node()*, $siblings as node()*,
$level as item()?) {
let $node := head($nodes)
return
if ($node instance of element(tei:item) and (empty($level) or number($node/@tf:level) > number($level))) then
pmf:get-following-nested(tail($nodes), ($siblings, $node), $level)
else
$siblings
};

declare %private function pmf:combine($nodes as node()*) {
for $node in $nodes
return
typeswitch($node)
case element(tei:item) return
if ($node/preceding-sibling::node()[1][self::tei:item]) then
()
else
let $sibs := pmf:get-following($node/following-sibling::*, "item", (), $node/@tf:level)
return (
<list xmlns="http://www.tei-c.org/ns/1.0">
{ if ($node/@tf:type) then attribute type { $node/@tf:type } else () }
{ pmf:wrap-list(($node, $sibs)) }
</list>
)
case element(tei:code) | element(tei:tag) return
$node
case element() return
if (local-name($node) = $pmf:INLINE_ELEMENTS) then
if ($node/preceding-sibling::node()[1][local-name(.) = local-name($node)]) then
()
else
let $following := pmf:get-following($node/following-sibling::node(), local-name($node), (), ())
return
if ($following) then
element { node-name($node) } {
$node/@*,
pmf:combine($node/node()),
pmf:combine($following/node())
}
else
element { node-name($node) } {
$node/@*,
pmf:combine($node/node())
}
else
element { node-name($node) } {
$node/@*,
pmf:combine($node/node())
}
case text() return
if (matches($node, '^(.*?)&#60;.*&#62;.*$')) then
replace($node, '^(.*?)&#60;.*&#62;(.*)$', '$1$2')
else
$node
default return $node
};
168 changes: 8 additions & 160 deletions content/tei-functions.xql
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,14 @@ module namespace pmf="http://existsolutions.com/xquery/functions/tei";

declare namespace tei="http://www.tei-c.org/ns/1.0";

declare variable $pmf:INLINE_ELEMENTS := (
"hi", "supplied", "persName", "placeName", "term"
);

declare function pmf:finish($config as map(*), $input as node()*) {
pmf:create-divisions(pmf:combine($input))
(: $input :)
(:~
: Copy the current element and its attributes, then process its children.
:)
declare function pmf:copy($config as map(*), $node as node(), $class as xs:string+, $content) {
element { node-name($node) } {
$node/@* except $node/@xml:id,
pmf:apply-children($config, $node, $content)
}
};

declare function pmf:paragraph($config as map(*), $node as node(), $class as xs:string+, $content) {
Expand Down Expand Up @@ -252,156 +253,3 @@ declare function pmf:apply-children($config as map(*), $node as node(), $content
$node/@xml:id,
$config?apply-children($config, $node, $content)
};

declare %private function pmf:create-divisions($tei as element(tei:TEI)) {
let $body := $tei/tei:text/tei:body
let $firstHead := $body/tei:head[1]
return
if ($firstHead) then
<TEI xmlns="http://www.tei-c.org/ns/1.0">
{ $tei/tei:teiHeader }
<text>
<body>
{
$body/@*,
pmf:wrap-divisions($body/node())
}
</body>
</text>
</TEI>
else
$tei
};

(:~
: Wrap headings and following text into a hierarchy of divisions using a "tumbling window" approach.
:)
declare function pmf:wrap-divisions($body-nodes as node()*) {
if ($body-nodes) then
let $this := $body-nodes => head()
let $rest := $body-nodes => tail()
return
if ($this instance of element(tei:head)) then
let $level := number(head(($this/@pmf:level, 0)))
let $next-window-start := $this/following-sibling::tei:head[@pmf:level <= $level] => head()
let $next-window := $body-nodes[. is $next-window-start or . >> $next-window-start]
let $this-window-rest :=
if ($next-window) then
$body-nodes[. >> $this and . << $next-window-start]
else
$body-nodes[. >> $this]
return
(
<div xmlns="http://www.tei-c.org/ns/1.0">
<head>
{
$this/@* except $this/@pmf:level,
$this/node()
}
</head>
{ $this-window-rest => pmf:wrap-divisions() }
</div>,
$next-window => pmf:wrap-divisions()
)

else
(
$this,
$rest => pmf:wrap-divisions()
)
else
()
};

declare %private function pmf:wrap-list($items as element()*) {
if ($items) then
let $item := head($items)
return
let $nested :=
pmf:get-following-nested($item/following-sibling::*, (), $item/@pmf:level)
return (
<item xmlns="http://www.tei-c.org/ns/1.0">
<p>{ $item/node() }</p>
{
if ($nested) then
<list>
{ if ($nested[1]/@pmf:type) then attribute type { $nested[1]/@pmf:type } else () }
{ pmf:wrap-list($nested) }
</list>
else
()
}
</item>,
pmf:wrap-list(tail($items) except $nested)
)
else
()
};

declare %private function pmf:get-following($nodes as node()*, $name as xs:string, $siblings as node()*,
$level as item()?) {
let $node := head($nodes)
return
if (local-name($node) = $name and (empty($level) or number($node/@pmf:level) >= number($level))) then
pmf:get-following(tail($nodes), $name, ($siblings, $node), $level)
else
$siblings
};

declare %private function pmf:get-following-nested($nodes as node()*, $siblings as node()*,
$level as item()?) {
let $node := head($nodes)
return
if ($node instance of element(tei:item) and (empty($level) or number($node/@pmf:level) > number($level))) then
pmf:get-following-nested(tail($nodes), ($siblings, $node), $level)
else
$siblings
};

declare %private function pmf:combine($nodes as node()*) {
for $node in $nodes
return
typeswitch($node)
case element(tei:item) return
if ($node/preceding-sibling::node()[1][self::tei:item]) then
()
else
let $sibs := pmf:get-following($node/following-sibling::*, "item", (), $node/@pmf:level)
return (
<list xmlns="http://www.tei-c.org/ns/1.0">
{ if ($node/@pmf:type) then attribute type { $node/@pmf:type } else () }
{ pmf:wrap-list(($node, $sibs)) }
</list>
)
case element(tei:code) | element(tei:tag) return
$node
case element() return
if (local-name($node) = $pmf:INLINE_ELEMENTS) then
if ($node/preceding-sibling::node()[1][local-name(.) = local-name($node)]) then
()
else
let $following := pmf:get-following($node/following-sibling::node(), local-name($node), (), ())
return
if ($following) then
element { node-name($node) } {
$node/@*,
pmf:combine($node/node()),
pmf:combine($following/node())
}
else
element { node-name($node) } {
$node/@*,
pmf:combine($node/node())
}
else
element { node-name($node) } {
$node/@*,
pmf:combine($node/node())
}
case text() return
if (matches($node, '^(.*?)&#60;.*&#62;.*$')) then
replace($node, '^(.*?)&#60;.*&#62;(.*)$', '$1$2')
else
$node
default return $node
};
6 changes: 5 additions & 1 deletion expath-pkg.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8" ?>
<package xmlns="http://expath.org/ns/pkg" name="http://existsolutions.com/apps/tei-publisher-lib" abbrev="tei-publisher-lib" version="3.1.0" spec="1.0">
<package xmlns="http://expath.org/ns/pkg" name="http://existsolutions.com/apps/tei-publisher-lib" abbrev="tei-publisher-lib" version="3.2.0" spec="1.0">
<title>TEI Publisher: Processing Model Libraries</title>
<dependency processor="http://exist-db.org" semver-min="3.6.0" />
<xquery>
Expand All @@ -26,6 +26,10 @@
<namespace>http://www.tei-c.org/tei-simple/xquery/functions/printcss</namespace>
<file>ext-printcss.xql</file>
</xquery>
<xquery>
<namespace>http://www.tei-c.org/tei-simple/xquery/functions/docx</namespace>
<file>ext-docx.xql</file>
</xquery>
<xquery>
<namespace>http://www.tei-c.org/tei-simple/xquery/model</namespace>
<file>model.xql</file>
Expand Down
13 changes: 13 additions & 0 deletions repo.xml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,19 @@
<target />
<note>Generated apps may fail after updating. Make sure to recompile your ODDs.</note>
<changelog>
<change version="3.2.0">
<ul xmlns="http://www.w3.org/1999/xhtml">
<li>Refactor TEI output mode: so far it was only used for docx2tei transformations.
docx-specific code moved into separate module, so TEI output mode becomes usable
for other scenarios.</li>
</ul>
</change>
<change version="3.1.0">
<ul xmlns="http://www.w3.org/1999/xhtml">
<li>Support relative module import paths in configuration.xml</li>
<li>config.xqm is now exposed to the ODD under the "global" prefix</li>
</ul>
</change>
<change version="3.0.1">
<ul xmlns="http://www.w3.org/1999/xhtml">
<li>config.xqm is now available within XQuery expressions in the ODD under the "global:" prefix</li>
Expand Down

0 comments on commit 90d713e

Please sign in to comment.