Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

User descriptions support common URI schemes #2496

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 77 additions & 5 deletions liberapay/utils/markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,92 @@
from markupsafe import Markup
import misaka as m # http://misaka.61924.nl/

from liberapay.website import website

url_re = re.compile(r'^(https?|xmpp):')
# probably goto constants.py?
# https://developer.wordpress.org/reference/functions/wp_allowed_protocols/
ALLOWED_PROTOCOLS = [
"http",
"https",
"ftp",
"ftps",
"mailto",
"news",
"irc",
"irc6",
"ircs",
"gopher",
"nntp",
"feed",
"telnet",
"mms",
"rtsp",
"sms",
"svn",
"tel",
"fax",
"xmpp",
"webcal",
"urn",
]

_uri_re = re.compile(r"^(" + "|".join(ALLOWED_PROTOCOLS) + "):")
_internal_re = re.compile(r"^https://([\w\-]+\.)?liberapay\.(com|net|org)")


# check whether the link is an external link
def _is_internal_url(url: str) -> bool:
return bool(_internal_re.match(url))

class CustomRenderer(m.SaferHtmlRenderer):
# enable url-rewrite and block potential ones
def __init__(self):
self.canonical_host = website.env.canonical_host
if not self.canonical_host.startswith(('http://', 'https://')):
# make sure localhost is handled
self.canonical_host = 'http://' + self.canonical_host

super().__init__(link_rewrite=f"{self.canonical_host}/redirect?url={{url}}")

def autolink(self, raw_url, is_email):
# override super's autolink function, add target="_blank"
if self.check_url(raw_url):
url = self.rewrite_url(('mailto:' if is_email else '') + raw_url)
url = m.escape_html(url)
return '<a href="%s" target="_blank">%s</a>' % (url, m.escape_html(raw_url))
else:
return m.escape_html('<%s>' % raw_url)

def link(self, content, raw_url, title=''):
# override super's link function, add target="_blank"
if self.check_url(raw_url):
url = self.rewrite_url(raw_url)
maybe_title = ' title="%s"' % m.escape_html(title) if title else ''
url = m.escape_html(url)
return ('<a href="%s"%s target="_blank">' % (url, maybe_title)) + content + '</a>'
else:
return m.escape_html("[%s](%s)" % (content, raw_url))

def check_url(self, url, is_image_src=False):
return bool(url_re.match(url))
return bool(_uri_re.match(url))

def rewrite_url(self, url, is_image_src=False):
rewrite = not _is_internal_url(url) and not is_image_src
if rewrite:
return super().rewrite_url(m.escape_html(url, escape_slash=True), is_image_src)
return url


renderer = CustomRenderer()
md = m.Markdown(renderer, extensions=(
'autolink', 'strikethrough', 'no-intra-emphasis', 'tables',
))
md = m.Markdown(
renderer,
extensions=(
"autolink",
"strikethrough",
"no-intra-emphasis",
"tables",
),
)


def render(markdown):
Expand Down
43 changes: 39 additions & 4 deletions tests/py/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from datetime import date, datetime, timedelta, timezone
from decimal import Decimal

from markupsafe import escape
from misaka import escape_html
from pando.http.response import Response
from pando.testing.client import DidntRaiseResponse
from urllib.parse import quote

from liberapay import utils
from liberapay.i18n.currencies import Money, MoneyBasket
Expand Down Expand Up @@ -118,7 +119,7 @@ def test_markdown_render_discards_scripts(self):
def test_markdown_autolink_filtering(self):
# Nice data
for url in ('http://a', "https://b?x&y", 'xmpp:c'):
expected = '<p><a href="{0}">{0}</a></p>\n'.format(escape(url))
expected = f'<p><a href="{markdown.renderer.canonical_host}/redirect?url={quote(escape_html(url, escape_slash=True))}" target="_blank">{escape_html(url)}</a></p>\n'
actual = markdown.render('<%s>' % url)
assert actual == expected
# Naughty data
Expand All @@ -128,11 +129,23 @@ def test_markdown_autolink_filtering(self):
encoded_link = ''.join('&x{0:x};'.format(ord(c)) for c in link)
html = markdown.render('<%s>' % encoded_link)
assert link not in html

def test_markdown_is_internal_url(self):
# Internal URLs
assert markdown._is_internal_url("https://liberapay.com/page")
assert markdown._is_internal_url("https://subdomain.liberapay.net/other")
assert markdown._is_internal_url("https://liberapay.org/help")

# External URLs
assert not markdown._is_internal_url("http://example.com")
assert not markdown._is_internal_url("https://liberapay.fake.com")
assert not markdown._is_internal_url("https://google.com")
assert not markdown._is_internal_url("ftp://liberapay.com/file")

def test_markdown_link_filtering(self):
# Nice data
for url in ('http://a', 'https://b', 'xmpp:c'):
expected = '<p><a href="{0}" title="bar&#39;%s">&#39;foo%s</a></p>\n'.format(url)
expected = f'<p><a href="{markdown.renderer.canonical_host}/redirect?url={quote(escape_html(url, escape_slash=True))}" title="bar&#39;%s" target="_blank">&#39;foo%s</a></p>\n'
actual = markdown.render("['foo%%s](%s \"bar'%%s\")" % url)
assert actual == expected
# Naughty data
Expand All @@ -157,6 +170,13 @@ def test_markdown_image_src_filtering(self):
expected = '<p>![foo](javascript:foo)</p>\n'
assert markdown.render('![foo](javascript:foo)') == expected

def test_markdown_image_src_is_ignored_for_rewriting(self):
# Image source should not rewrite
md_text = "![Example](http://example.com/image.png)"
expected = '<p><img src="http://example.com/image.png" alt="Example" /></p>'
actual = markdown.render(md_text)
assert expected in actual

def test_markdown_render_doesnt_allow_any_explicit_anchors(self):
expected = '<p>foo</p>\n'
assert markdown.render('<a href="http://example.com/">foo</a>') == expected
Expand All @@ -166,7 +186,7 @@ def test_markdown_render_doesnt_allow_any_explicit_anchors(self):
assert markdown.render('<a href="javascript:foo">foo</a>') == expected

def test_markdown_render_autolinks(self):
expected = '<p><a href="http://google.com/">http://google.com/</a></p>\n'
expected = f'<p><a href="{markdown.renderer.canonical_host}/redirect?url=http%3A%26%2347%3B%26%2347%3Bgoogle.com%26%2347%3B" target="_blank">http://google.com/</a></p>\n'
actual = markdown.render('http://google.com/')
assert expected == actual

Expand All @@ -175,6 +195,21 @@ def test_markdown_render_no_intra_emphasis(self):
actual = markdown.render('Examples like this_one and this other_one.')
assert expected == actual

def test_markdown_render_autolink_non_whitelisted_protocol(self):
# Invalid protocol
url = "javascript:alert(1)"
expected = "&lt;javascript:alert(1)&gt;"
actual = markdown.render(f"<{url}>")
assert expected in actual

def test_markdown_render_invalid_url(self):
# Invalid URL should not render as a link
md_text = "[Invalid](javascript:alert('XSS'))"
expected = "<p>[Invalid](javascript:alert(&#39;XSS&#39;))</p>"
actual = markdown.render(md_text)

assert expected in actual

# Base64 encoding/decoding
# ========================

Expand Down
33 changes: 33 additions & 0 deletions www/redirect.spt
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import html

[---]

# Get the URL from the query parameter
raw_url = request.qs.get('url', '')

# If no URL is provided, redirect to home
if not raw_url:
response.redirect(back_to)

unescaped_url = html.unescape(raw_url)
title = _("Redirect")

[---] text/html
% extends "templates/layouts/base-thin.html"

{% block heading %}{% endblock %}

% block thin_content
<h2>{{_("Warning")}}</h2>

<br />
<p>{{_("You are being redirected to {0}. This link is out of our control and may not be secure.", unescaped_url)}}</p>
<br />

<div class="buttons">
<a class="btn btn-primary" href="/">{{ _("No, cancel") }}</a>
<a href="{{unescaped_url}}" class="btn btn-default">
{{_("Yes, proceed")}}
</a>
</div>
% endblock