From 8340fbd42fe6ff4215f13cc3034759fbe74e1525 Mon Sep 17 00:00:00 2001
From: chris <331548+cristoper@users.noreply.github.com>
Date: Thu, 29 Feb 2024 20:26:33 -0700
Subject: [PATCH] Fix handling of content:encoded (#223)
PR #220 introduced a failing test for detecting images in the "content"
element. It should instead be testing the "content:encoded" element. But
that uncovered an issue with how extensions were being detected (the
"content" namespace was being detected as an extension namespace).
As a more robust way of checking for the "content" namespace, this PR
exposes `shared.PrefixForNamspace()` as a public function so it can be
used in the rss parser. This should also fix PR #211 (and includes
@JLugagne's test case from that PR).
Once the fixes to xml:base handling in #222 are merged, this should fix
the remaining failing test reported in #210.
---
internal/shared/extparser.go | 11 ++++-------
rss/parser.go | 3 ++-
.../parser/rss/rss_channel_item_content_encoded.json | 8 ++++++++
.../parser/rss/rss_channel_item_content_encoded.xml | 10 ++++++++++
.../feed_item_image_-_rss_channel_item_content.xml | 2 +-
5 files changed, 25 insertions(+), 9 deletions(-)
create mode 100644 testdata/parser/rss/rss_channel_item_content_encoded.json
create mode 100644 testdata/parser/rss/rss_channel_item_content_encoded.xml
diff --git a/internal/shared/extparser.go b/internal/shared/extparser.go
index 79c8d5ac..cb522b1e 100644
--- a/internal/shared/extparser.go
+++ b/internal/shared/extparser.go
@@ -12,18 +12,15 @@ import (
// non empty prefix)
func IsExtension(p *xpp.XMLPullParser) bool {
space := strings.TrimSpace(p.Space)
- if prefix, ok := p.Spaces[space]; ok {
- return !(prefix == "" || prefix == "rss" || prefix == "rdf" || prefix == "content")
- }
-
- return p.Space != ""
+ prefix := PrefixForNamespace(space, p)
+ return !(prefix == "" || prefix == "rss" || prefix == "rdf" || prefix == "content")
}
// ParseExtension parses the current element of the
// XMLPullParser as an extension element and updates
// the extension map
func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, error) {
- prefix := prefixForNamespace(p.Space, p)
+ prefix := PrefixForNamespace(p.Space, p)
result, err := parseExtensionElement(p)
if err != nil {
@@ -93,7 +90,7 @@ func parseExtensionElement(p *xpp.XMLPullParser) (e ext.Extension, err error) {
return e, nil
}
-func prefixForNamespace(space string, p *xpp.XMLPullParser) string {
+func PrefixForNamespace(space string, p *xpp.XMLPullParser) string {
// First we check if the global namespace map
// contains an entry for this namespace/prefix.
// This way we can use the canonical prefix for this
diff --git a/rss/parser.go b/rss/parser.go
index 4371d31f..575896e9 100644
--- a/rss/parser.go
+++ b/rss/parser.go
@@ -357,7 +357,8 @@ func (rp *Parser) parseItem(p *xpp.XMLPullParser) (item *Item, err error) {
item.Description = result
} else if name == "encoded" {
space := strings.TrimSpace(p.Space)
- if prefix, ok := p.Spaces[space]; ok && prefix == "content" {
+ prefix := shared.PrefixForNamespace(space, p)
+ if prefix == "content" {
result, err := shared.ParseText(p)
if err != nil {
return nil, err
diff --git a/testdata/parser/rss/rss_channel_item_content_encoded.json b/testdata/parser/rss/rss_channel_item_content_encoded.json
new file mode 100644
index 00000000..57474bb9
--- /dev/null
+++ b/testdata/parser/rss/rss_channel_item_content_encoded.json
@@ -0,0 +1,8 @@
+{
+ "items": [
+ {
+ "content": "Item Description"
+ }
+ ],
+ "version": "2.0"
+}
diff --git a/testdata/parser/rss/rss_channel_item_content_encoded.xml b/testdata/parser/rss/rss_channel_item_content_encoded.xml
new file mode 100644
index 00000000..391d2d3d
--- /dev/null
+++ b/testdata/parser/rss/rss_channel_item_content_encoded.xml
@@ -0,0 +1,10 @@
+
+
+
+ -
+ Item Description
+
+
+
diff --git a/testdata/translator/rss/feed_item_image_-_rss_channel_item_content.xml b/testdata/translator/rss/feed_item_image_-_rss_channel_item_content.xml
index 76ce6e73..413ef695 100644
--- a/testdata/translator/rss/feed_item_image_-_rss_channel_item_content.xml
+++ b/testdata/translator/rss/feed_item_image_-_rss_channel_item_content.xml
@@ -4,7 +4,7 @@ Description: item image from content
-
- ]]>
+ ]]>