Merge pull request #485 from gloomy-ghost/bleach
Use bleach to sanitize HTML
This commit is contained in:
commit
f6271a04a2
@ -59,7 +59,6 @@ class TestHTML(unittest.TestCase):
|
|||||||
print("Hello, World")
|
print("Hello, World")
|
||||||
</code></pre>""")
|
</code></pre>""")
|
||||||
|
|
||||||
@unittest.skipIf(html.HTML5LIB_VERSION <= html.HTML5LIB_SIMPLETREE, "backport")
|
|
||||||
def test_sanitizer(self):
|
def test_sanitizer(self):
|
||||||
sanitizer = html.Sanitizer(elements=[], attributes=[])
|
sanitizer = html.Sanitizer(elements=[], attributes=[])
|
||||||
examples = [
|
examples = [
|
||||||
@ -73,11 +72,10 @@ class TestHTML(unittest.TestCase):
|
|||||||
|
|
||||||
for (input, expected) in examples:
|
for (input, expected) in examples:
|
||||||
if isinstance(expected, list):
|
if isinstance(expected, list):
|
||||||
self.assertIn(html.sanitize(sanitizer, input), expected)
|
self.assertIn(sanitizer.sanitize(input), expected)
|
||||||
else:
|
else:
|
||||||
self.assertEqual(html.sanitize(sanitizer, input), expected)
|
self.assertEqual(sanitizer.sanitize(input), expected)
|
||||||
|
|
||||||
@unittest.skipIf(html.HTML5LIB_VERSION <= html.HTML5LIB_SIMPLETREE, "backport")
|
|
||||||
def test_sanitizer_extensions(self):
|
def test_sanitizer_extensions(self):
|
||||||
sanitizer = html.Sanitizer(elements=["img"], attributes=["src"])
|
sanitizer = html.Sanitizer(elements=["img"], attributes=["src"])
|
||||||
examples = [
|
examples = [
|
||||||
@ -85,7 +83,7 @@ class TestHTML(unittest.TestCase):
|
|||||||
('<script src="doge.js"></script>', '')]
|
('<script src="doge.js"></script>', '')]
|
||||||
|
|
||||||
for (input, expected) in examples:
|
for (input, expected) in examples:
|
||||||
self.assertEqual(html.sanitize(sanitizer, input), expected)
|
self.assertEqual(sanitizer.sanitize(input), expected)
|
||||||
|
|
||||||
def test_render(self):
|
def test_render(self):
|
||||||
conf = config.new({
|
conf = config.new({
|
||||||
|
@ -6,61 +6,53 @@ import pkg_resources
|
|||||||
|
|
||||||
from distutils.version import LooseVersion as Version
|
from distutils.version import LooseVersion as Version
|
||||||
|
|
||||||
HTML5LIB_VERSION = Version(pkg_resources.get_distribution("html5lib").version)
|
import bleach
|
||||||
HTML5LIB_SIMPLETREE = Version("0.95")
|
|
||||||
|
|
||||||
import html5lib
|
|
||||||
from html5lib.sanitizer import HTMLSanitizer
|
|
||||||
from html5lib.serializer import HTMLSerializer
|
|
||||||
|
|
||||||
import misaka
|
import misaka
|
||||||
|
|
||||||
|
|
||||||
def Sanitizer(elements, attributes):
|
class Sanitizer(object):
|
||||||
|
|
||||||
class Inner(HTMLSanitizer):
|
|
||||||
|
|
||||||
|
def __init__(self, elements, attributes):
|
||||||
# attributes found in Sundown's HTML serializer [1]
|
# attributes found in Sundown's HTML serializer [1]
|
||||||
# except for <img> tag,
|
# except for <img> tag,
|
||||||
# because images are not generated anyways.
|
# because images are not generated anyways.
|
||||||
#
|
#
|
||||||
# [1] https://github.com/vmg/sundown/blob/master/html/html.c
|
# [1] https://github.com/vmg/sundown/blob/master/html/html.c
|
||||||
allowed_elements = ["a", "p", "hr", "br", "ol", "ul", "li",
|
self.elements = ["a", "p", "hr", "br", "ol", "ul", "li",
|
||||||
"pre", "code", "blockquote",
|
"pre", "code", "blockquote",
|
||||||
"del", "ins", "strong", "em",
|
"del", "ins", "strong", "em",
|
||||||
"h1", "h2", "h3", "h4", "h5", "h6",
|
"h1", "h2", "h3", "h4", "h5", "h6",
|
||||||
"table", "thead", "tbody", "th", "td"] + elements
|
"table", "thead", "tbody", "th", "td"] + elements
|
||||||
|
|
||||||
# href for <a> and align for <table>
|
# href for <a> and align for <table>
|
||||||
allowed_attributes = ["align", "href"] + attributes
|
self.attributes = ["align", "href"] + attributes
|
||||||
|
|
||||||
# remove disallowed tokens from the output
|
|
||||||
def disallowed_token(self, token, token_type):
|
|
||||||
return None
|
|
||||||
|
|
||||||
return Inner
|
|
||||||
|
|
||||||
|
|
||||||
def sanitize(tokenizer, document):
|
|
||||||
|
|
||||||
parser = html5lib.HTMLParser(tokenizer=tokenizer)
|
def sanitize(self, text):
|
||||||
domtree = parser.parseFragment(document)
|
clean_html = bleach.clean(text, tags=self.elements,
|
||||||
|
attributes=self.attributes, strip=True)
|
||||||
|
|
||||||
if HTML5LIB_VERSION > HTML5LIB_SIMPLETREE:
|
def set_links(attrs, new=False):
|
||||||
builder = "etree"
|
href_key = (None, u'href')
|
||||||
|
|
||||||
for link in domtree.findall(".//{http://www.w3.org/1999/xhtml}a"):
|
if href_key not in attrs:
|
||||||
if link.get('href', None):
|
return attrs
|
||||||
link.set("rel", "nofollow noopener")
|
if attrs[href_key].startswith(u'mailto:'):
|
||||||
|
return attrs
|
||||||
|
|
||||||
else:
|
rel_key = (None, u'rel')
|
||||||
builder = "simpletree"
|
rel_values = [val for val in attrs.get(rel_key, u'').split(u' ') if val]
|
||||||
|
|
||||||
stream = html5lib.treewalkers.getTreeWalker(builder)(domtree)
|
for value in [u'nofollow', u'noopener']:
|
||||||
serializer = HTMLSerializer(
|
if value not in [rel_val.lower() for rel_val in rel_values]:
|
||||||
quote_attr_values=True, omit_optional_tags=False)
|
rel_values.append(value)
|
||||||
|
|
||||||
return serializer.render(stream)
|
attrs[rel_key] = u' '.join(rel_values)
|
||||||
|
return attrs
|
||||||
|
|
||||||
|
linker = bleach.linkifier.Linker(callbacks=[set_links])
|
||||||
|
return linker.linkify(clean_html)
|
||||||
|
|
||||||
|
|
||||||
def Markdown(extensions=("strikethrough", "superscript", "autolink",
|
def Markdown(extensions=("strikethrough", "superscript", "autolink",
|
||||||
@ -100,7 +92,7 @@ class Markup(object):
|
|||||||
conf.getlist("allowed-elements"),
|
conf.getlist("allowed-elements"),
|
||||||
conf.getlist("allowed-attributes"))
|
conf.getlist("allowed-attributes"))
|
||||||
|
|
||||||
self._render = lambda text: sanitize(sanitizer, parser(text))
|
self._render = lambda text: sanitizer.sanitize(parser(text))
|
||||||
|
|
||||||
def render(self, text):
|
def render(self, text):
|
||||||
return self._render(text)
|
return self._render(text)
|
||||||
|
4
setup.py
4
setup.py
@ -5,8 +5,8 @@ import sys
|
|||||||
|
|
||||||
from setuptools import setup, find_packages
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
requires = ['itsdangerous', 'Jinja2', 'misaka>=2.0,<3.0', 'html5lib<0.9999999',
|
requires = ['itsdangerous', 'Jinja2', 'misaka>=2.0,<3.0', 'html5lib',
|
||||||
'werkzeug>=0.9']
|
'werkzeug>=0.9', 'bleach']
|
||||||
|
|
||||||
if sys.version_info < (2, 7):
|
if sys.version_info < (2, 7):
|
||||||
raise SystemExit("Python 2 versions < 2.7 are not supported.")
|
raise SystemExit("Python 2 versions < 2.7 are not supported.")
|
||||||
|
Loading…
Reference in New Issue
Block a user