allow raw HTML markup for a few (whitelisted) tags
To be compatible with comments from Disqus (and users unfamiliar with Markdown), Misaka no longer disables user-inputted HTML, but the generated HTML is now post-processed and all "unsafe" tags (not possible with Markdown) are discarded. Whitelist: p, a, pre, blockquote, h1-h6, em, sub, sup, del, ins, math, dl, ol, ul, li This commit also removes an unnecessary newline generated by Misaka/Sundown.
This commit is contained in:
parent
36d702c7bc
commit
3713d5e8ee
@ -5,9 +5,16 @@ from __future__ import division
|
|||||||
import pkg_resources
|
import pkg_resources
|
||||||
werkzeug = pkg_resources.get_distribution("werkzeug")
|
werkzeug = pkg_resources.get_distribution("werkzeug")
|
||||||
|
|
||||||
|
import io
|
||||||
import json
|
import json
|
||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
|
try:
|
||||||
|
from html.parser import HTMLParser, HTMLParseError
|
||||||
|
except ImportError:
|
||||||
|
from HTMLParser import HTMLParser, HTMLParseError
|
||||||
|
|
||||||
|
from werkzeug.utils import escape
|
||||||
from werkzeug.wrappers import Request, Response
|
from werkzeug.wrappers import Request, Response
|
||||||
from werkzeug.exceptions import BadRequest
|
from werkzeug.exceptions import BadRequest
|
||||||
|
|
||||||
@ -120,13 +127,69 @@ class JSONResponse(Response):
|
|||||||
json.dumps(obj).encode("utf-8"), *args, **kwargs)
|
json.dumps(obj).encode("utf-8"), *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class Sanitizer(HTMLParser, object):
|
||||||
|
"""Sanitize HTML output: remove unsafe HTML tags such as iframe or
|
||||||
|
script based on a whitelist of allowed tags."""
|
||||||
|
|
||||||
|
safe = set([
|
||||||
|
"p", "a", "pre", "blockquote",
|
||||||
|
"h1", "h2", "h3", "h4", "h5", "h6",
|
||||||
|
"em", "sub", "sup", "del", "ins", "math",
|
||||||
|
"dl", "ol", "ul", "li"])
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def format(cls, attrs):
|
||||||
|
res = []
|
||||||
|
for key, value in attrs:
|
||||||
|
if value is None:
|
||||||
|
res.append(key)
|
||||||
|
else:
|
||||||
|
res.append(u'{0}="{1}"'.format(key, escape(value)))
|
||||||
|
return ' '.join(res)
|
||||||
|
|
||||||
|
def __init__(self, html):
|
||||||
|
super(Sanitizer, self).__init__()
|
||||||
|
self.result = io.StringIO()
|
||||||
|
self.feed(html)
|
||||||
|
self.result.seek(0)
|
||||||
|
|
||||||
|
def handle_starttag(self, tag, attrs):
|
||||||
|
if tag in Sanitizer.safe:
|
||||||
|
self.result.write(u"<" + tag)
|
||||||
|
if attrs:
|
||||||
|
self.result.write(" " + Sanitizer.format(attrs))
|
||||||
|
self.result.write(u">")
|
||||||
|
|
||||||
|
def handle_data(self, data):
|
||||||
|
self.result.write(data)
|
||||||
|
|
||||||
|
def handle_endtag(self, tag):
|
||||||
|
if tag in Sanitizer.safe:
|
||||||
|
self.result.write(u"</" + tag + ">")
|
||||||
|
|
||||||
|
def handle_startendtag(self, tag, attrs):
|
||||||
|
if tag in Sanitizer.safe:
|
||||||
|
self.result.write(u"<" + tag)
|
||||||
|
if attrs:
|
||||||
|
self.result.write(" " + Sanitizer.format(attrs))
|
||||||
|
self.result.write(u"/>")
|
||||||
|
|
||||||
|
def handle_entityref(self, name):
|
||||||
|
self.result.write(u'&' + name + ';')
|
||||||
|
|
||||||
|
def handle_charref(self, char):
|
||||||
|
self.result.write(u'&#' + char + ';')
|
||||||
|
|
||||||
|
|
||||||
def markdown(text):
|
def markdown(text):
|
||||||
"""Convert Markdown to (safe) HTML.
|
"""Convert Markdown to (safe) HTML.
|
||||||
|
|
||||||
>>> markdown("*Ohai!*") # doctest: +IGNORE_UNICODE
|
>>> markdown("*Ohai!*") # doctest: +IGNORE_UNICODE
|
||||||
'<p><em>Ohai!</em></p>'
|
'<p><em>Ohai!</em></p>'
|
||||||
|
>>> markdown("<em>Hi</em>") # doctest: +IGNORE_UNICODE
|
||||||
|
'<p><em>Hi</em></p>'
|
||||||
>>> markdown("<script>alert('Onoe')</script>") # doctest: +IGNORE_UNICODE
|
>>> markdown("<script>alert('Onoe')</script>") # doctest: +IGNORE_UNICODE
|
||||||
'<p>alert('Onoe')</p>'
|
"<p>alert('Onoe')</p>"
|
||||||
>>> markdown("http://example.org/ and sms:+1234567890") # doctest: +IGNORE_UNICODE
|
>>> markdown("http://example.org/ and sms:+1234567890") # doctest: +IGNORE_UNICODE
|
||||||
'<p><a href="http://example.org/">http://example.org/</a> and sms:+1234567890</p>'
|
'<p><a href="http://example.org/">http://example.org/</a> and sms:+1234567890</p>'
|
||||||
"""
|
"""
|
||||||
@ -135,9 +198,13 @@ def markdown(text):
|
|||||||
exts = misaka.EXT_STRIKETHROUGH | misaka.EXT_SUPERSCRIPT | misaka.EXT_AUTOLINK
|
exts = misaka.EXT_STRIKETHROUGH | misaka.EXT_SUPERSCRIPT | misaka.EXT_AUTOLINK
|
||||||
|
|
||||||
# remove HTML tags, skip <img> (for now) and only render "safe" protocols
|
# remove HTML tags, skip <img> (for now) and only render "safe" protocols
|
||||||
html = misaka.HTML_SKIP_HTML | misaka.HTML_SKIP_IMAGES | misaka.HTML_SAFELINK
|
html = misaka.HTML_SKIP_STYLE | misaka.HTML_SKIP_IMAGES | misaka.HTML_SAFELINK
|
||||||
|
|
||||||
return misaka.html(text, extensions=exts, render_flags=html).strip("\n")
|
rv = misaka.html(text, extensions=exts, render_flags=html).rstrip("\n")
|
||||||
|
if not rv.startswith("<p>") and not rv.endswith("</p>"):
|
||||||
|
rv = "<p>" + rv + "</p>"
|
||||||
|
|
||||||
|
return Sanitizer(rv).result.read()
|
||||||
|
|
||||||
|
|
||||||
def origin(hosts):
|
def origin(hosts):
|
||||||
|
@ -54,7 +54,7 @@ class TestComments(unittest.TestCase):
|
|||||||
rv = loads(r.data)
|
rv = loads(r.data)
|
||||||
|
|
||||||
assert rv['id'] == 1
|
assert rv['id'] == 1
|
||||||
assert rv['text'] == '<p>Lorem ipsum ...</p>\n'
|
assert rv['text'] == '<p>Lorem ipsum ...</p>'
|
||||||
|
|
||||||
def testCreate(self):
|
def testCreate(self):
|
||||||
|
|
||||||
@ -66,7 +66,7 @@ class TestComments(unittest.TestCase):
|
|||||||
rv = loads(rv.data)
|
rv = loads(rv.data)
|
||||||
|
|
||||||
assert rv["mode"] == 1
|
assert rv["mode"] == 1
|
||||||
assert rv["text"] == '<p>Lorem ipsum ...</p>\n'
|
assert rv["text"] == '<p>Lorem ipsum ...</p>'
|
||||||
|
|
||||||
def textCreateWithNonAsciiText(self):
|
def textCreateWithNonAsciiText(self):
|
||||||
|
|
||||||
@ -78,7 +78,7 @@ class TestComments(unittest.TestCase):
|
|||||||
rv = loads(rv.data)
|
rv = loads(rv.data)
|
||||||
|
|
||||||
assert rv["mode"] == 1
|
assert rv["mode"] == 1
|
||||||
assert rv["text"] == '<p>Здравствуй, мир!</p>\n'
|
assert rv["text"] == '<p>Здравствуй, мир!</p>'
|
||||||
|
|
||||||
def testCreateMultiple(self):
|
def testCreateMultiple(self):
|
||||||
|
|
||||||
@ -262,10 +262,10 @@ class TestComments(unittest.TestCase):
|
|||||||
self.post('/new?uri=test', data=json.dumps({"text": "Tpyo"}))
|
self.post('/new?uri=test', data=json.dumps({"text": "Tpyo"}))
|
||||||
|
|
||||||
self.put('/id/1', data=json.dumps({"text": "Tyop"}))
|
self.put('/id/1', data=json.dumps({"text": "Tyop"}))
|
||||||
assert loads(self.get('/id/1').data)["text"] == "<p>Tyop</p>\n"
|
assert loads(self.get('/id/1').data)["text"] == "<p>Tyop</p>"
|
||||||
|
|
||||||
self.put('/id/1', data=json.dumps({"text": "Typo"}))
|
self.put('/id/1', data=json.dumps({"text": "Typo"}))
|
||||||
assert loads(self.get('/id/1').data)["text"] == "<p>Typo</p>\n"
|
assert loads(self.get('/id/1').data)["text"] == "<p>Typo</p>"
|
||||||
|
|
||||||
def testDeleteCommentRemovesThread(self):
|
def testDeleteCommentRemovesThread(self):
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user