Skip to content

Commit

Permalink
hacky special-case solution for links in feed excerpts
Browse files Browse the repository at this point in the history
  • Loading branch information
brabster committed Apr 6, 2024
1 parent 77250f0 commit 8c5db54
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 10 deletions.
1 change: 1 addition & 0 deletions mkdocs_rss_plugin/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ def on_page_content(
created=page_dates[0],
description=self.util.get_description_or_abstract(
in_page=page,
html=html,
chars_count=self.config.abstract_chars_count,
abstract_delimiter=self.config.abstract_delimiter,
),
Expand Down
38 changes: 28 additions & 10 deletions mkdocs_rss_plugin/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

# standard library
import logging
import re
import ssl
import sys
from datetime import date, datetime
Expand All @@ -15,7 +16,7 @@
from typing import Any, Iterable, Optional, Tuple, Union
from urllib import request
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode, urlparse, urlunparse
from urllib.parse import urlencode, urljoin, urlparse, urlunparse

# 3rd party
import markdown
Expand Down Expand Up @@ -48,6 +49,22 @@
# ########## Classes #############
# ################################

HREF_MATCH_PATTERN = re.compile('href="(.*?)"')
SRC_MATCH_PATTERN = re.compile('src="(.*?)"')


def relative_links_resolve_to_page(page_html, page_url):
href_links_to_replace = re.findall(HREF_MATCH_PATTERN, page_html)
src_links_to_replace = re.findall(SRC_MATCH_PATTERN, page_html)
links_to_replace = set(href_links_to_replace + src_links_to_replace)
links_with_replacements = [
(link, urljoin(page_url, link)) for link in links_to_replace
]
replaced_html = page_html
for original, replacement in links_with_replacements:
replaced_html = replaced_html.replace(original, replacement)
return replaced_html


class Util:
"""Plugin logic."""
Expand Down Expand Up @@ -452,12 +469,17 @@ def get_date_from_meta(
return out_date

def get_description_or_abstract(
self, in_page: Page, chars_count: int = 160, abstract_delimiter: str = None
self,
in_page: Page,
html: str,
chars_count: int = 160,
abstract_delimiter: str = None,
) -> str:
"""Returns description from page meta. If it doesn't exist, use the \
{chars_count} first characters from page content (in markdown).
{chars_count} first characters from page content (in html).
:param Page in_page: page to look at
:param str html: rendered page html
:param int chars_count: if page.meta.description is not set, number of chars \
of the content to use. Defaults to: 160 - optional
:param str abstract_delimiter: description delimiter, defaults to None
Expand Down Expand Up @@ -486,14 +508,10 @@ def get_description_or_abstract(
return ""
elif (
abstract_delimiter
and (
excerpt_separator_position := in_page.markdown.find(abstract_delimiter)
)
> -1
and (excerpt_separator_position := html.find(abstract_delimiter)) > -1
):
return markdown.markdown(
in_page.markdown[:excerpt_separator_position],
output_format="html5",
return relative_links_resolve_to_page(
html[:excerpt_separator_position], in_page.canonical_url
)
# If chars count is unlimited, use the html content
elif in_page.content and chars_count == -1:
Expand Down
Binary file not shown.
24 changes: 24 additions & 0 deletions tests/fixtures/docs/blog/posts/sample_blog_post_internal_links.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
---
date: 2023-02-12
authors: [guts]
categories:
- Blog
---

# Blog sample with internal links

I'm a really short intro.

![here's an internal image](./assets/example_image.webp)

[Here's an internal link](./sample_blog_post.md)
and another
[Another link](../../index.md)

<!-- more -->

## This part won't show up in RSS feed

### What is Lorem Ipsum?

Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.
35 changes: 35 additions & 0 deletions tests/test_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,41 @@ def test_not_git_repo(self):
# restore name
git_dir_tmp.replace(git_dir)

def test_abstract_with_internal_links(self):
with tempfile.TemporaryDirectory() as tmpdirname:
cli_result = self.build_docs_setup(
testproject_path="docs",
mkdocs_yml_filepath=Path("tests/fixtures/mkdocs_minimal.yml"),
output_path=tmpdirname,
strict=True,
)
self.assertEqual(cli_result.exit_code, 0)
self.assertIsNone(cli_result.exception)

feed_rss_created = feedparser.parse(
Path(tmpdirname) / OUTPUT_RSS_FEED_CREATED
)

feed_rss_updated = feedparser.parse(
Path(tmpdirname) / OUTPUT_RSS_FEED_UPDATED
)

##print(json.dumps(feed_rss_created))

for page in feed_rss_created.entries + feed_rss_updated.entries:
if page.title == "Blog sample with internal links":
self.assertIn(
'href="https://guts.github.io/mkdocs-rss-plugin/blog/posts/sample_blog_post/"',
page.summary,
)
self.assertIn(
'href="https://guts.github.io/mkdocs-rss-plugin/"', page.summary
)
self.assertIn(
'src="https://guts.github.io/mkdocs-rss-plugin/blog/posts/assets/example_image.webp"',
page.summary,
)


# ##############################################################################
# ##### Stand alone program ########
Expand Down

0 comments on commit 8c5db54

Please sign in to comment.