From 8340fbd42fe6ff4215f13cc3034759fbe74e1525 Mon Sep 17 00:00:00 2001 From: chris <331548+cristoper@users.noreply.github.com> Date: Thu, 29 Feb 2024 20:26:33 -0700 Subject: [PATCH] Fix handling of content:encoded (#223) PR #220 introduced a failing test for detecting images in the "content" element. It should instead be testing the "content:encoded" element. But that uncovered an issue with how extensions were being detected (the "content" namespace was being detected as an extension namespace). As a more robust way of checking for the "content" namespace, this PR exposes `shared.PrefixForNamspace()` as a public function so it can be used in the rss parser. This should also fix PR #211 (and includes @JLugagne's test case from that PR). Once the fixes to xml:base handling in #222 are merged, this should fix the remaining failing test reported in #210. --- internal/shared/extparser.go | 11 ++++------- rss/parser.go | 3 ++- .../parser/rss/rss_channel_item_content_encoded.json | 8 ++++++++ .../parser/rss/rss_channel_item_content_encoded.xml | 10 ++++++++++ .../feed_item_image_-_rss_channel_item_content.xml | 2 +- 5 files changed, 25 insertions(+), 9 deletions(-) create mode 100644 testdata/parser/rss/rss_channel_item_content_encoded.json create mode 100644 testdata/parser/rss/rss_channel_item_content_encoded.xml diff --git a/internal/shared/extparser.go b/internal/shared/extparser.go index 79c8d5ac..cb522b1e 100644 --- a/internal/shared/extparser.go +++ b/internal/shared/extparser.go @@ -12,18 +12,15 @@ import ( // non empty prefix) func IsExtension(p *xpp.XMLPullParser) bool { space := strings.TrimSpace(p.Space) - if prefix, ok := p.Spaces[space]; ok { - return !(prefix == "" || prefix == "rss" || prefix == "rdf" || prefix == "content") - } - - return p.Space != "" + prefix := PrefixForNamespace(space, p) + return !(prefix == "" || prefix == "rss" || prefix == "rdf" || prefix == "content") } // ParseExtension parses the current element of the // XMLPullParser as an extension element and updates // the extension map func ParseExtension(fe ext.Extensions, p *xpp.XMLPullParser) (ext.Extensions, error) { - prefix := prefixForNamespace(p.Space, p) + prefix := PrefixForNamespace(p.Space, p) result, err := parseExtensionElement(p) if err != nil { @@ -93,7 +90,7 @@ func parseExtensionElement(p *xpp.XMLPullParser) (e ext.Extension, err error) { return e, nil } -func prefixForNamespace(space string, p *xpp.XMLPullParser) string { +func PrefixForNamespace(space string, p *xpp.XMLPullParser) string { // First we check if the global namespace map // contains an entry for this namespace/prefix. // This way we can use the canonical prefix for this diff --git a/rss/parser.go b/rss/parser.go index 4371d31f..575896e9 100644 --- a/rss/parser.go +++ b/rss/parser.go @@ -357,7 +357,8 @@ func (rp *Parser) parseItem(p *xpp.XMLPullParser) (item *Item, err error) { item.Description = result } else if name == "encoded" { space := strings.TrimSpace(p.Space) - if prefix, ok := p.Spaces[space]; ok && prefix == "content" { + prefix := shared.PrefixForNamespace(space, p) + if prefix == "content" { result, err := shared.ParseText(p) if err != nil { return nil, err diff --git a/testdata/parser/rss/rss_channel_item_content_encoded.json b/testdata/parser/rss/rss_channel_item_content_encoded.json new file mode 100644 index 00000000..57474bb9 --- /dev/null +++ b/testdata/parser/rss/rss_channel_item_content_encoded.json @@ -0,0 +1,8 @@ +{ + "items": [ + { + "content": "Item Description" + } + ], + "version": "2.0" +} diff --git a/testdata/parser/rss/rss_channel_item_content_encoded.xml b/testdata/parser/rss/rss_channel_item_content_encoded.xml new file mode 100644 index 00000000..391d2d3d --- /dev/null +++ b/testdata/parser/rss/rss_channel_item_content_encoded.xml @@ -0,0 +1,10 @@ + + + + + Item Description + + + diff --git a/testdata/translator/rss/feed_item_image_-_rss_channel_item_content.xml b/testdata/translator/rss/feed_item_image_-_rss_channel_item_content.xml index 76ce6e73..413ef695 100644 --- a/testdata/translator/rss/feed_item_image_-_rss_channel_item_content.xml +++ b/testdata/translator/rss/feed_item_image_-_rss_channel_item_content.xml @@ -4,7 +4,7 @@ Description: item image from content - ]]> + ]]>