From adfb55dad2032628d06c75dc0113188e091f3fd4 Mon Sep 17 00:00:00 2001 From: Lai Wang Date: Sun, 3 Nov 2024 16:44:45 -0500 Subject: [PATCH 1/7] markdown url regex support common uri patterns --- liberapay/utils/markdown.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/liberapay/utils/markdown.py b/liberapay/utils/markdown.py index 8c141690d8..1deb79705c 100644 --- a/liberapay/utils/markdown.py +++ b/liberapay/utils/markdown.py @@ -4,13 +4,13 @@ import misaka as m # http://misaka.61924.nl/ -url_re = re.compile(r'^(https?|xmpp):') +uri_re = re.compile(r'^(https?|xmpp|imap|irc|nntp):') class CustomRenderer(m.SaferHtmlRenderer): def check_url(self, url, is_image_src=False): - return bool(url_re.match(url)) + return bool(uri_re.match(url)) renderer = CustomRenderer() From b71379e98f3d80860f820c29aef0f20c8c3e352f Mon Sep 17 00:00:00 2001 From: Lai Wang Date: Mon, 4 Nov 2024 17:07:05 -0500 Subject: [PATCH 2/7] url rewrite logic --- liberapay/utils/markdown.py | 58 +++++++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 5 deletions(-) diff --git a/liberapay/utils/markdown.py b/liberapay/utils/markdown.py index 1deb79705c..0f602c18f3 100644 --- a/liberapay/utils/markdown.py +++ b/liberapay/utils/markdown.py @@ -3,20 +3,68 @@ from markupsafe import Markup import misaka as m # http://misaka.61924.nl/ +from liberapay.website import website -uri_re = re.compile(r'^(https?|xmpp|imap|irc|nntp):') +# probably goto constants.py? +# https://developer.wordpress.org/reference/functions/wp_allowed_protocols/ +ALLOWED_PROTOCOLS = [ + "http", + "https", + "ftp", + "ftps", + "mailto", + "news", + "irc", + "irc6", + "ircs", + "gopher", + "nntp", + "feed", + "telnet", + "mms", + "rtsp", + "sms", + "svn", + "tel", + "fax", + "xmpp", + "webcal", + "urn", +] +_uri_re = re.compile(r"^(" + "|".join(ALLOWED_PROTOCOLS) + "):") +_internal_re = re.compile(r"^https://([\w\-]+\.)?liberapay\.(com|net|org)") + + +# check whether the link is an external link +def _is_internal_url(url: str) -> bool: + return bool(_internal_re.match(url)) class CustomRenderer(m.SaferHtmlRenderer): + # enable url-rewrite and block potential ones + def __init__(self): + super().__init__(link_rewrite=f"{website.env.canonical_host}/redirect?url={{url}}") def check_url(self, url, is_image_src=False): - return bool(uri_re.match(url)) + return bool(_uri_re.match(url)) + + def rewrite_url(self, url, is_image_src=False): + rewrite = not _is_internal_url(url) + if rewrite: + return super().rewrite_url(url, is_image_src) + return url renderer = CustomRenderer() -md = m.Markdown(renderer, extensions=( - 'autolink', 'strikethrough', 'no-intra-emphasis', 'tables', -)) +md = m.Markdown( + renderer, + extensions=( + "autolink", + "strikethrough", + "no-intra-emphasis", + "tables", + ), +) def render(markdown): From ccc7f46375efffe89b8248114fcabed267b1ed91 Mon Sep 17 00:00:00 2001 From: Lai Wang Date: Thu, 14 Nov 2024 19:53:35 -0500 Subject: [PATCH 3/7] fix md host name, add target="_blank" --- liberapay/utils/markdown.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/liberapay/utils/markdown.py b/liberapay/utils/markdown.py index 0f602c18f3..d74b6323e6 100644 --- a/liberapay/utils/markdown.py +++ b/liberapay/utils/markdown.py @@ -43,7 +43,31 @@ def _is_internal_url(url: str) -> bool: class CustomRenderer(m.SaferHtmlRenderer): # enable url-rewrite and block potential ones def __init__(self): - super().__init__(link_rewrite=f"{website.env.canonical_host}/redirect?url={{url}}") + self.canonical_host = website.env.canonical_host + if not self.canonical_host.startswith(('http://', 'https://')): + # make sure localhost is handled + self.canonical_host = 'http://' + self.canonical_host + + super().__init__(link_rewrite=f"{self.canonical_host}/redirect?url={{url}}?back_to={self.canonical_host}") + + def autolink(self, raw_url, is_email): + # override super's autolink function, add target="_blank" + if self.check_url(raw_url): + url = self.rewrite_url(('mailto:' if is_email else '') + raw_url) + url = m.escape_html(url) + return '%s' % (url, m.escape_html(raw_url)) + else: + return m.escape_html('<%s>' % raw_url) + + def link(self, content, raw_url, title=''): + # override super's link function, add target="_blank" + if self.check_url(raw_url): + url = self.rewrite_url(raw_url) + maybe_title = ' title="%s"' % m.escape_html(title) if title else '' + url = m.escape_html(url) + return ('' % (url, maybe_title)) + content + '' + else: + return m.escape_html("[%s](%s)" % (content, raw_url)) def check_url(self, url, is_image_src=False): return bool(_uri_re.match(url)) @@ -51,7 +75,7 @@ def check_url(self, url, is_image_src=False): def rewrite_url(self, url, is_image_src=False): rewrite = not _is_internal_url(url) if rewrite: - return super().rewrite_url(url, is_image_src) + return super().rewrite_url(m.escape_html(url, escape_slash=True), is_image_src) return url From 2977fe3fde7c4b97692c08c8bf1f8ecd93699c38 Mon Sep 17 00:00:00 2001 From: Lai Wang Date: Thu, 14 Nov 2024 19:53:42 -0500 Subject: [PATCH 4/7] init redirect page --- www/redirect.spt | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 www/redirect.spt diff --git a/www/redirect.spt b/www/redirect.spt new file mode 100644 index 0000000000..56aefd1b00 --- /dev/null +++ b/www/redirect.spt @@ -0,0 +1,34 @@ +import html + +[---] + +# Get the URL from the query parameter +raw_url = request.qs.get('url', '') +back_to = request.qs.get('back_to', '/') + +# If no URL is provided, redirect to home +if not raw_url: + response.redirect(back_to) + +unescaped_url = html.unescape(raw_url) +title = _("Redirect") + +[---] text/html +% extends "templates/layouts/base-thin.html" + +{% block heading %}{% endblock %} + +% block thin_content +

{{_("Warning")}}

+ +
+

{{_("You are being redirected to {0}. This link is out of our control and may not be secure.", unescaped_url)}}

+
+ + +% endblock From 4dfd52c23450347564668f1cfa72e7dd187859db Mon Sep 17 00:00:00 2001 From: Lai Wang Date: Thu, 14 Nov 2024 19:54:56 -0500 Subject: [PATCH 5/7] remove redudant back_to --- liberapay/utils/markdown.py | 2 +- www/redirect.spt | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/liberapay/utils/markdown.py b/liberapay/utils/markdown.py index d74b6323e6..c7d9085d42 100644 --- a/liberapay/utils/markdown.py +++ b/liberapay/utils/markdown.py @@ -48,7 +48,7 @@ def __init__(self): # make sure localhost is handled self.canonical_host = 'http://' + self.canonical_host - super().__init__(link_rewrite=f"{self.canonical_host}/redirect?url={{url}}?back_to={self.canonical_host}") + super().__init__(link_rewrite=f"{self.canonical_host}/redirect?url={{url}}") def autolink(self, raw_url, is_email): # override super's autolink function, add target="_blank" diff --git a/www/redirect.spt b/www/redirect.spt index 56aefd1b00..e9d8d960ab 100644 --- a/www/redirect.spt +++ b/www/redirect.spt @@ -4,7 +4,6 @@ import html # Get the URL from the query parameter raw_url = request.qs.get('url', '') -back_to = request.qs.get('back_to', '/') # If no URL is provided, redirect to home if not raw_url: @@ -26,7 +25,7 @@ title = _("Redirect")
- {{ _("No, cancel") }} + {{ _("No, cancel") }} {{_("Yes, proceed")}} From fb162915a6ccf381623064b97c37e65171cf67c3 Mon Sep 17 00:00:00 2001 From: Lai Wang Date: Mon, 9 Dec 2024 18:38:31 -0500 Subject: [PATCH 6/7] disable url rewrite for img --- liberapay/utils/markdown.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/liberapay/utils/markdown.py b/liberapay/utils/markdown.py index c7d9085d42..e0e3ccfe49 100644 --- a/liberapay/utils/markdown.py +++ b/liberapay/utils/markdown.py @@ -49,7 +49,7 @@ def __init__(self): self.canonical_host = 'http://' + self.canonical_host super().__init__(link_rewrite=f"{self.canonical_host}/redirect?url={{url}}") - + def autolink(self, raw_url, is_email): # override super's autolink function, add target="_blank" if self.check_url(raw_url): @@ -73,7 +73,7 @@ def check_url(self, url, is_image_src=False): return bool(_uri_re.match(url)) def rewrite_url(self, url, is_image_src=False): - rewrite = not _is_internal_url(url) + rewrite = not _is_internal_url(url) and not is_image_src if rewrite: return super().rewrite_url(m.escape_html(url, escape_slash=True), is_image_src) return url From 7b02ff3dec0cc48ac3175ac024d9e0cac4f08d92 Mon Sep 17 00:00:00 2001 From: Lai Wang Date: Mon, 9 Dec 2024 18:38:44 -0500 Subject: [PATCH 7/7] add test cases, adapt old tests --- tests/py/test_utils.py | 43 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/tests/py/test_utils.py b/tests/py/test_utils.py index 609c641796..4e60c6f41c 100644 --- a/tests/py/test_utils.py +++ b/tests/py/test_utils.py @@ -1,9 +1,10 @@ from datetime import date, datetime, timedelta, timezone from decimal import Decimal -from markupsafe import escape +from misaka import escape_html from pando.http.response import Response from pando.testing.client import DidntRaiseResponse +from urllib.parse import quote from liberapay import utils from liberapay.i18n.currencies import Money, MoneyBasket @@ -118,7 +119,7 @@ def test_markdown_render_discards_scripts(self): def test_markdown_autolink_filtering(self): # Nice data for url in ('http://a', "https://b?x&y", 'xmpp:c'): - expected = '

{0}

\n'.format(escape(url)) + expected = f'

{escape_html(url)}

\n' actual = markdown.render('<%s>' % url) assert actual == expected # Naughty data @@ -128,11 +129,23 @@ def test_markdown_autolink_filtering(self): encoded_link = ''.join('&x{0:x};'.format(ord(c)) for c in link) html = markdown.render('<%s>' % encoded_link) assert link not in html + + def test_markdown_is_internal_url(self): + # Internal URLs + assert markdown._is_internal_url("https://liberapay.com/page") + assert markdown._is_internal_url("https://subdomain.liberapay.net/other") + assert markdown._is_internal_url("https://liberapay.org/help") + + # External URLs + assert not markdown._is_internal_url("http://example.com") + assert not markdown._is_internal_url("https://liberapay.fake.com") + assert not markdown._is_internal_url("https://google.com") + assert not markdown._is_internal_url("ftp://liberapay.com/file") def test_markdown_link_filtering(self): # Nice data for url in ('http://a', 'https://b', 'xmpp:c'): - expected = '

'foo%s

\n'.format(url) + expected = f'

'foo%s

\n' actual = markdown.render("['foo%%s](%s \"bar'%%s\")" % url) assert actual == expected # Naughty data @@ -157,6 +170,13 @@ def test_markdown_image_src_filtering(self): expected = '

![foo](javascript:foo)

\n' assert markdown.render('![foo](javascript:foo)') == expected + def test_markdown_image_src_is_ignored_for_rewriting(self): + # Image source should not rewrite + md_text = "![Example](http://example.com/image.png)" + expected = '

Example

' + actual = markdown.render(md_text) + assert expected in actual + def test_markdown_render_doesnt_allow_any_explicit_anchors(self): expected = '

foo

\n' assert markdown.render('foo') == expected @@ -166,7 +186,7 @@ def test_markdown_render_doesnt_allow_any_explicit_anchors(self): assert markdown.render('foo') == expected def test_markdown_render_autolinks(self): - expected = '

http://google.com/

\n' + expected = f'

http://google.com/

\n' actual = markdown.render('http://google.com/') assert expected == actual @@ -175,6 +195,21 @@ def test_markdown_render_no_intra_emphasis(self): actual = markdown.render('Examples like this_one and this other_one.') assert expected == actual + def test_markdown_render_autolink_non_whitelisted_protocol(self): + # Invalid protocol + url = "javascript:alert(1)" + expected = "<javascript:alert(1)>" + actual = markdown.render(f"<{url}>") + assert expected in actual + + def test_markdown_render_invalid_url(self): + # Invalid URL should not render as a link + md_text = "[Invalid](javascript:alert('XSS'))" + expected = "

[Invalid](javascript:alert('XSS'))

" + actual = markdown.render(md_text) + + assert expected in actual + # Base64 encoding/decoding # ========================