refactor markup and sanitization code
This commit introduces a new configuration section [markup] to refine Misaka's Markdown extensions (by default strikethrough, superscript and autolink). Furthermore, you can set custom HTML elements/attributes that are allowed, e.g. to enable images, set [markup] allowed-elements = img allowed-attributes = src The refactorization separates HTML sanitization from Markdown -> HTML and allows to include new markup languages such as BB Code or reStructuredText.
This commit is contained in:
parent
6071a85787
commit
d93d77c8c7
@ -64,7 +64,7 @@ local_manager = LocalManager([local])
|
||||
|
||||
from isso import db, migrate, wsgi, ext, views
|
||||
from isso.core import ThreadedMixin, ProcessMixin, uWSGIMixin, Config
|
||||
from isso.utils import parse, http, JSONRequest, origin
|
||||
from isso.utils import parse, http, JSONRequest, origin, html
|
||||
from isso.views import comments
|
||||
|
||||
from isso.ext.notifications import Stdout, SMTP
|
||||
@ -86,6 +86,7 @@ class Isso(object):
|
||||
self.conf = conf
|
||||
self.db = db.SQLite3(conf.get('general', 'dbpath'), conf)
|
||||
self.signer = URLSafeTimedSerializer(conf.get('general', 'session-key'))
|
||||
self.markup = html.Markup(conf.section('markup'))
|
||||
|
||||
super(Isso, self).__init__(conf)
|
||||
|
||||
@ -102,6 +103,9 @@ class Isso(object):
|
||||
views.Info(self)
|
||||
comments.API(self)
|
||||
|
||||
def render(self, text):
|
||||
return self.markup.render(text)
|
||||
|
||||
def sign(self, obj):
|
||||
return self.signer.dumps(obj)
|
||||
|
||||
|
@ -6,6 +6,7 @@ PY2K = sys.version_info[0] == 2
|
||||
if not PY2K:
|
||||
|
||||
map, zip, filter = map, zip, filter
|
||||
from functools import reduce
|
||||
|
||||
text_type = str
|
||||
string_types = (str, )
|
||||
@ -15,6 +16,7 @@ else:
|
||||
|
||||
from itertools import imap, izip, ifilter
|
||||
map, zip, filter = imap, izip, ifilter
|
||||
reduce = reduce
|
||||
|
||||
text_type = unicode
|
||||
string_types = (str, unicode)
|
||||
|
@ -132,7 +132,11 @@ class Config:
|
||||
"enabled = true",
|
||||
"ratelimit = 2",
|
||||
"direct-reply = 3",
|
||||
"reply-to-self = false"
|
||||
"reply-to-self = false",
|
||||
"[markup]",
|
||||
"options = strikethrough, superscript, autolink",
|
||||
"allowed-elements = ",
|
||||
"allowed-attributes = "
|
||||
]
|
||||
|
||||
@classmethod
|
||||
|
@ -1,9 +1,12 @@
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
import pkg_resources
|
||||
import operator
|
||||
|
||||
from isso.compat import reduce
|
||||
|
||||
import html5lib
|
||||
setattr(html5lib, "version", pkg_resources.get_distribution("html5lib").version)
|
||||
html5lib_version = pkg_resources.get_distribution("html5lib").version
|
||||
|
||||
from html5lib.sanitizer import HTMLSanitizer
|
||||
from html5lib.serializer import HTMLSerializer
|
||||
@ -12,7 +15,9 @@ from html5lib.treewalkers import getTreeWalker
|
||||
import misaka
|
||||
|
||||
|
||||
class MarkdownSanitizer(HTMLSanitizer):
|
||||
def Sanitizer(elements, attributes):
|
||||
|
||||
class Inner(HTMLSanitizer):
|
||||
|
||||
# attributes found in Sundown's HTML serializer [1] except for <img> tag,
|
||||
# because images are not generated anyways.
|
||||
@ -22,49 +27,54 @@ class MarkdownSanitizer(HTMLSanitizer):
|
||||
"pre", "code", "blockquote",
|
||||
"del", "ins", "strong", "em",
|
||||
"h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"table", "thead", "tbody", "th", "td"]
|
||||
"table", "thead", "tbody", "th", "td"] + elements
|
||||
|
||||
# href for <a> and align for <table>
|
||||
allowed_attributes = ["align", "href"]
|
||||
allowed_attributes = ["align", "href"] + attributes
|
||||
|
||||
# remove disallowed tokens from the output
|
||||
def disallowed_token(self, token, token_type):
|
||||
return None
|
||||
|
||||
return Inner
|
||||
|
||||
def sanitize(document):
|
||||
|
||||
parser = html5lib.HTMLParser(tokenizer=MarkdownSanitizer)
|
||||
def sanitize(tokenizer, document):
|
||||
|
||||
parser = html5lib.HTMLParser(tokenizer=tokenizer)
|
||||
domtree = parser.parseFragment(document)
|
||||
|
||||
builder = "simpletree" if html5lib.version == "0.95" else "etree"
|
||||
builder = "simpletree" if html5lib_version == "0.95" else "etree"
|
||||
stream = html5lib.treewalkers.getTreeWalker(builder)(domtree)
|
||||
serializer = HTMLSerializer(quote_attr_values=True, omit_optional_tags=False)
|
||||
|
||||
return serializer.render(stream)
|
||||
|
||||
|
||||
def markdown(text):
|
||||
"""Convert Markdown to (safe) HTML.
|
||||
def Markdown(extensions=("strikethrough", "superscript", "autolink")):
|
||||
|
||||
>>> markdown("*Ohai!*") # doctest: +IGNORE_UNICODE
|
||||
'<p><em>Ohai!</em></p>'
|
||||
>>> markdown("<em>Hi</em>") # doctest: +IGNORE_UNICODE
|
||||
'<p><em>Hi</em></p>'
|
||||
>>> markdown("<script>alert('Onoe')</script>") # doctest: +IGNORE_UNICODE
|
||||
"<p>alert('Onoe')</p>"
|
||||
>>> markdown("http://example.org/ and sms:+1234567890") # doctest: +IGNORE_UNICODE
|
||||
'<p><a href="http://example.org/">http://example.org/</a> and sms:+1234567890</p>'
|
||||
"""
|
||||
flags = reduce(operator.xor, map(
|
||||
lambda ext: getattr(misaka, 'EXT_' + ext.upper()), extensions), 0)
|
||||
|
||||
# ~~strike through~~, sub script: 2^(nd) and http://example.org/ auto-link
|
||||
exts = misaka.EXT_STRIKETHROUGH | misaka.EXT_SUPERSCRIPT | misaka.EXT_AUTOLINK
|
||||
def inner(text):
|
||||
rv = misaka.html(text, extensions=flags).rstrip("\n")
|
||||
if not rv.endswith("<p>") and not rv.endswith("</p>"):
|
||||
return "<p>" + rv + "</p>"
|
||||
return rv
|
||||
|
||||
# remove HTML tags, skip <img> (for now) and only render "safe" protocols
|
||||
html = misaka.HTML_SKIP_STYLE | misaka.HTML_SKIP_IMAGES | misaka.HTML_SAFELINK
|
||||
return inner
|
||||
|
||||
rv = misaka.html(text, extensions=exts, render_flags=html).rstrip("\n")
|
||||
if not rv.startswith("<p>") and not rv.endswith("</p>"):
|
||||
rv = "<p>" + rv + "</p>"
|
||||
|
||||
return sanitize(rv)
|
||||
class Markup(object):
|
||||
|
||||
def __init__(self, conf):
|
||||
|
||||
parser = Markdown(conf.getlist("options"))
|
||||
sanitizer = Sanitizer(
|
||||
conf.getlist("allowed-elements"),
|
||||
conf.getlist("allowed-attributes"))
|
||||
|
||||
self._render = lambda text: sanitize(sanitizer, parser(text))
|
||||
|
||||
def render(self, text):
|
||||
return self._render(text)
|
||||
|
@ -163,7 +163,7 @@ class API(object):
|
||||
value=self.isso.sign([rv["id"], sha1(rv["text"])]),
|
||||
max_age=self.conf.getint('max-age'))
|
||||
|
||||
rv["text"] = html.markdown(rv["text"])
|
||||
rv["text"] = self.isso.render(rv["text"])
|
||||
rv["hash"] = pbkdf2(rv['email'] or rv['remote_addr'], self.isso.salt, 1000, 6).decode("utf-8")
|
||||
|
||||
self.cache.set('hash', (rv['email'] or rv['remote_addr']).encode('utf-8'), rv['hash'])
|
||||
@ -189,7 +189,7 @@ class API(object):
|
||||
rv.pop(key)
|
||||
|
||||
if request.args.get('plain', '0') == '0':
|
||||
rv['text'] = html.markdown(rv['text'])
|
||||
rv['text'] = self.isso.render(rv['text'])
|
||||
|
||||
return JSON(rv, 200)
|
||||
|
||||
@ -230,7 +230,7 @@ class API(object):
|
||||
value=self.isso.sign([rv["id"], sha1(rv["text"])]),
|
||||
max_age=self.conf.getint('max-age'))
|
||||
|
||||
rv["text"] = html.markdown(rv["text"])
|
||||
rv["text"] = self.isso.render(rv["text"])
|
||||
|
||||
resp = JSON(rv, 200)
|
||||
resp.headers.add("Set-Cookie", cookie(str(rv["id"])))
|
||||
@ -336,7 +336,7 @@ class API(object):
|
||||
|
||||
if request.args.get('plain', '0') == '0':
|
||||
for item in rv:
|
||||
item['text'] = html.markdown(item['text'])
|
||||
item['text'] = self.isso.render(item['text'])
|
||||
|
||||
return JSON(rv, 200)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user