|
|
|
@ -6,61 +6,30 @@ import pkg_resources
|
|
|
|
|
|
|
|
|
|
from distutils.version import LooseVersion as Version
|
|
|
|
|
|
|
|
|
|
HTML5LIB_VERSION = Version(pkg_resources.get_distribution("html5lib").version)
|
|
|
|
|
HTML5LIB_SIMPLETREE = Version("0.95")
|
|
|
|
|
|
|
|
|
|
import html5lib
|
|
|
|
|
from html5lib.sanitizer import HTMLSanitizer
|
|
|
|
|
from html5lib.serializer import HTMLSerializer
|
|
|
|
|
|
|
|
|
|
import bleach
|
|
|
|
|
import misaka
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def Sanitizer(elements, attributes):
|
|
|
|
|
|
|
|
|
|
class Inner(HTMLSanitizer):
|
|
|
|
|
class Sanitizer(object):
|
|
|
|
|
|
|
|
|
|
def __init__(self, elements, attributes):
|
|
|
|
|
# attributes found in Sundown's HTML serializer [1]
|
|
|
|
|
# except for <img> tag,
|
|
|
|
|
# because images are not generated anyways.
|
|
|
|
|
#
|
|
|
|
|
# [1] https://github.com/vmg/sundown/blob/master/html/html.c
|
|
|
|
|
allowed_elements = ["a", "p", "hr", "br", "ol", "ul", "li",
|
|
|
|
|
self.elements = ["a", "p", "hr", "br", "ol", "ul", "li",
|
|
|
|
|
"pre", "code", "blockquote",
|
|
|
|
|
"del", "ins", "strong", "em",
|
|
|
|
|
"h1", "h2", "h3", "h4", "h5", "h6",
|
|
|
|
|
"table", "thead", "tbody", "th", "td"] + elements
|
|
|
|
|
|
|
|
|
|
# href for <a> and align for <table>
|
|
|
|
|
allowed_attributes = ["align", "href"] + attributes
|
|
|
|
|
|
|
|
|
|
# remove disallowed tokens from the output
|
|
|
|
|
def disallowed_token(self, token, token_type):
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
return Inner
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def sanitize(tokenizer, document):
|
|
|
|
|
|
|
|
|
|
parser = html5lib.HTMLParser(tokenizer=tokenizer)
|
|
|
|
|
domtree = parser.parseFragment(document)
|
|
|
|
|
|
|
|
|
|
if HTML5LIB_VERSION > HTML5LIB_SIMPLETREE:
|
|
|
|
|
builder = "etree"
|
|
|
|
|
|
|
|
|
|
for link in domtree.findall(".//{http://www.w3.org/1999/xhtml}a"):
|
|
|
|
|
if link.get('href', None):
|
|
|
|
|
link.set("rel", "nofollow noopener")
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
builder = "simpletree"
|
|
|
|
|
|
|
|
|
|
stream = html5lib.treewalkers.getTreeWalker(builder)(domtree)
|
|
|
|
|
serializer = HTMLSerializer(
|
|
|
|
|
quote_attr_values=True, omit_optional_tags=False)
|
|
|
|
|
self.attributes = ["align", "href"] + attributes
|
|
|
|
|
|
|
|
|
|
return serializer.render(stream)
|
|
|
|
|
def sanitize(self, text):
|
|
|
|
|
return bleach.clean(text, tags=self.elements,
|
|
|
|
|
attributes=self.attributes, strip=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def Markdown(extensions=("strikethrough", "superscript", "autolink",
|
|
|
|
@ -100,7 +69,7 @@ class Markup(object):
|
|
|
|
|
conf.getlist("allowed-elements"),
|
|
|
|
|
conf.getlist("allowed-attributes"))
|
|
|
|
|
|
|
|
|
|
self._render = lambda text: sanitize(sanitizer, parser(text))
|
|
|
|
|
self._render = lambda text: sanitizer.sanitize(parser(text))
|
|
|
|
|
|
|
|
|
|
def render(self, text):
|
|
|
|
|
return self._render(text)
|
|
|
|
|