Merge branch 'feature/configurable-markdown', closes #62

This commit is contained in:
Martin Zimmermann 2014-02-18 17:36:09 +01:00
commit 9272e7390f
9 changed files with 187 additions and 44 deletions

View File

@ -350,6 +350,10 @@ main {
margin-left: 1.2em; margin-left: 1.2em;
} }
dl {
margin-bottom: 0.4em;
}
.admonition { .admonition {
p + p { p + p {

View File

@ -221,6 +221,37 @@ reply-to-self
Do not forget to configure the client. Do not forget to configure the client.
Markup
------
Customize markup and sanitized HTML. Currently, only Markdown (via Misaka) is
supported, but new languages are relatively easy to add.
.. code-block:: ini
[markup]
options = strikethrough, superscript, autolink
allowed-elements =
allowed-attributes =
options
`Misaka-specific Markdown extensions <http://misaka.61924.nl/api/>`_, all
flags starting with `EXT_` can be used there, separated by comma.
allowed-elements
Additional HTML tags to allow in the generated output, comma-separated. By
default, only *a*, *blockquote*, *br*, *code*, *del*, *em*, *h1*, *h2*,
*h3*, *h4*, *h5*, *h6*, *hr*, *ins*, *li*, *ol*, *p*, *pre*, *strong*,
*table*, *tbody*, *td*, *th*, *thead* and *ul* are allowed.
allowed-attributes
Additional HTML attributes (independent from elements) to allow in the
generated output, comma-separated. By default, only *align* and *href* are
allowed.
To allow images in comments, you just need to add ``allowed-elements = img`` and
``allowed-attributes = src``.
Appendum Appendum
-------- --------

View File

@ -110,3 +110,22 @@ direct-reply = 3
# comment. After the editing timeframe is gone, commenters can reply to their # comment. After the editing timeframe is gone, commenters can reply to their
# own comments anyways. Do not forget to configure the client. # own comments anyways. Do not forget to configure the client.
reply-to-self = false reply-to-self = false
[markup]
# Customize markup and sanitized HTML. Currently, only Markdown (via Misaka) is
# supported, but new languages are relatively easy to add.
# Misaka-specific Markdown extensions, all flags starting with EXT_ can be used
# there, separated by comma.
options = strikethrough, superscript, autolink
# Additional HTML tags to allow in the generated output, comma-separated. By
# default, only a, blockquote, br, code, del, em, h1, h2, h3, h4, h5, h6, hr,
# ins, li, ol, p, pre, strong, table, tbody, td, th, thead and ul are allowed.
allowed-elements =
# Additional HTML attributes (independent from elements) to allow in the
# generated output, comma-separated. By default, only align and href are
# allowed.
allowed-attributes =

View File

@ -64,7 +64,7 @@ local_manager = LocalManager([local])
from isso import db, migrate, wsgi, ext, views from isso import db, migrate, wsgi, ext, views
from isso.core import ThreadedMixin, ProcessMixin, uWSGIMixin, Config from isso.core import ThreadedMixin, ProcessMixin, uWSGIMixin, Config
from isso.utils import parse, http, JSONRequest, origin from isso.utils import parse, http, JSONRequest, origin, html
from isso.views import comments from isso.views import comments
from isso.ext.notifications import Stdout, SMTP from isso.ext.notifications import Stdout, SMTP
@ -86,6 +86,7 @@ class Isso(object):
self.conf = conf self.conf = conf
self.db = db.SQLite3(conf.get('general', 'dbpath'), conf) self.db = db.SQLite3(conf.get('general', 'dbpath'), conf)
self.signer = URLSafeTimedSerializer(conf.get('general', 'session-key')) self.signer = URLSafeTimedSerializer(conf.get('general', 'session-key'))
self.markup = html.Markup(conf.section('markup'))
super(Isso, self).__init__(conf) super(Isso, self).__init__(conf)
@ -102,6 +103,9 @@ class Isso(object):
views.Info(self) views.Info(self)
comments.API(self) comments.API(self)
def render(self, text):
return self.markup.render(text)
def sign(self, obj): def sign(self, obj):
return self.signer.dumps(obj) return self.signer.dumps(obj)

View File

@ -6,6 +6,7 @@ PY2K = sys.version_info[0] == 2
if not PY2K: if not PY2K:
map, zip, filter = map, zip, filter map, zip, filter = map, zip, filter
from functools import reduce
text_type = str text_type = str
string_types = (str, ) string_types = (str, )
@ -15,6 +16,7 @@ else:
from itertools import imap, izip, ifilter from itertools import imap, izip, ifilter
map, zip, filter = imap, izip, ifilter map, zip, filter = imap, izip, ifilter
reduce = reduce
text_type = unicode text_type = unicode
string_types = (str, unicode) string_types = (str, unicode)

View File

@ -44,6 +44,9 @@ class Section:
def getint(self, key): def getint(self, key):
return self.conf.getint(self.section, key) return self.conf.getint(self.section, key)
def getlist(self, key):
return self.conf.getlist(self.section, key)
def getiter(self, key): def getiter(self, key):
return self.conf.getiter(self.section, key) return self.conf.getiter(self.section, key)
@ -62,6 +65,7 @@ class IssoParser(ConfigParser):
... [foo] ... [foo]
... bar = 1h ... bar = 1h
... baz = 12 ... baz = 12
... spam = a, b, cdef
... bla = ... bla =
... spam ... spam
... ham ... ham
@ -71,6 +75,8 @@ class IssoParser(ConfigParser):
3600 3600
>>> parser.getint("foo", "baz") >>> parser.getint("foo", "baz")
12 12
>>> parser.getlist("foo", "spam") # doctest: +IGNORE_UNICODE
['a', 'b', 'cdef']
>>> list(parser.getiter("foo", "bla")) # doctest: +IGNORE_UNICODE >>> list(parser.getiter("foo", "bla")) # doctest: +IGNORE_UNICODE
['spam', 'ham'] ['spam', 'ham']
>>> list(parser.getiter("foo", "asd")) # doctest: +IGNORE_UNICODE >>> list(parser.getiter("foo", "asd")) # doctest: +IGNORE_UNICODE
@ -92,6 +98,9 @@ class IssoParser(ConfigParser):
except AttributeError: except AttributeError:
return int(IssoParser._total_seconds(delta)) return int(IssoParser._total_seconds(delta))
def getlist(self, section, key):
return list(map(str.strip, self.get(section, key).split(',')))
def getiter(self, section, key): def getiter(self, section, key):
for item in map(str.strip, self.get(section, key).split('\n')): for item in map(str.strip, self.get(section, key).split('\n')):
if item: if item:
@ -123,7 +132,11 @@ class Config:
"enabled = true", "enabled = true",
"ratelimit = 2", "ratelimit = 2",
"direct-reply = 3", "direct-reply = 3",
"reply-to-self = false" "reply-to-self = false",
"[markup]",
"options = strikethrough, superscript, autolink",
"allowed-elements = ",
"allowed-attributes = "
] ]
@classmethod @classmethod

View File

@ -1,9 +1,12 @@
# -*- encoding: utf-8 -*- # -*- encoding: utf-8 -*-
import pkg_resources import pkg_resources
import operator
from isso.compat import reduce
import html5lib import html5lib
setattr(html5lib, "version", pkg_resources.get_distribution("html5lib").version) html5lib_version = pkg_resources.get_distribution("html5lib").version
from html5lib.sanitizer import HTMLSanitizer from html5lib.sanitizer import HTMLSanitizer
from html5lib.serializer import HTMLSerializer from html5lib.serializer import HTMLSerializer
@ -12,59 +15,66 @@ from html5lib.treewalkers import getTreeWalker
import misaka import misaka
class MarkdownSanitizer(HTMLSanitizer): def Sanitizer(elements, attributes):
# attributes found in Sundown's HTML serializer [1] except for <img> tag, class Inner(HTMLSanitizer):
# because images are not generated anyways.
#
# [1] https://github.com/vmg/sundown/blob/master/html/html.c
allowed_elements = ["a", "p", "hr", "br", "ol", "ul", "li",
"pre", "code", "blockquote",
"del", "ins", "strong", "em",
"h1", "h2", "h3", "h4", "h5", "h6",
"table", "thead", "tbody", "th", "td"]
# href for <a> and align for <table> # attributes found in Sundown's HTML serializer [1] except for <img> tag,
allowed_attributes = ["align", "href"] # because images are not generated anyways.
#
# [1] https://github.com/vmg/sundown/blob/master/html/html.c
allowed_elements = ["a", "p", "hr", "br", "ol", "ul", "li",
"pre", "code", "blockquote",
"del", "ins", "strong", "em",
"h1", "h2", "h3", "h4", "h5", "h6",
"table", "thead", "tbody", "th", "td"] + elements
# remove disallowed tokens from the output # href for <a> and align for <table>
def disallowed_token(self, token, token_type): allowed_attributes = ["align", "href"] + attributes
return None
# remove disallowed tokens from the output
def disallowed_token(self, token, token_type):
return None
return Inner
def sanitize(document): def sanitize(tokenizer, document):
parser = html5lib.HTMLParser(tokenizer=MarkdownSanitizer) parser = html5lib.HTMLParser(tokenizer=tokenizer)
domtree = parser.parseFragment(document) domtree = parser.parseFragment(document)
builder = "simpletree" if html5lib.version == "0.95" else "etree" builder = "simpletree" if html5lib_version == "0.95" else "etree"
stream = html5lib.treewalkers.getTreeWalker(builder)(domtree) stream = html5lib.treewalkers.getTreeWalker(builder)(domtree)
serializer = HTMLSerializer(quote_attr_values=True, omit_optional_tags=False) serializer = HTMLSerializer(quote_attr_values=True, omit_optional_tags=False)
return serializer.render(stream) return serializer.render(stream)
def markdown(text): def Markdown(extensions=("strikethrough", "superscript", "autolink")):
"""Convert Markdown to (safe) HTML.
>>> markdown("*Ohai!*") # doctest: +IGNORE_UNICODE flags = reduce(operator.xor, map(
'<p><em>Ohai!</em></p>' lambda ext: getattr(misaka, 'EXT_' + ext.upper()), extensions), 0)
>>> markdown("<em>Hi</em>") # doctest: +IGNORE_UNICODE
'<p><em>Hi</em></p>'
>>> markdown("<script>alert('Onoe')</script>") # doctest: +IGNORE_UNICODE
"<p>alert('Onoe')</p>"
>>> markdown("http://example.org/ and sms:+1234567890") # doctest: +IGNORE_UNICODE
'<p><a href="http://example.org/">http://example.org/</a> and sms:+1234567890</p>'
"""
# ~~strike through~~, sub script: 2^(nd) and http://example.org/ auto-link def inner(text):
exts = misaka.EXT_STRIKETHROUGH | misaka.EXT_SUPERSCRIPT | misaka.EXT_AUTOLINK rv = misaka.html(text, extensions=flags).rstrip("\n")
if not rv.endswith("<p>") and not rv.endswith("</p>"):
return "<p>" + rv + "</p>"
return rv
# remove HTML tags, skip <img> (for now) and only render "safe" protocols return inner
html = misaka.HTML_SKIP_STYLE | misaka.HTML_SKIP_IMAGES | misaka.HTML_SAFELINK
rv = misaka.html(text, extensions=exts, render_flags=html).rstrip("\n")
if not rv.startswith("<p>") and not rv.endswith("</p>"):
rv = "<p>" + rv + "</p>"
return sanitize(rv) class Markup(object):
def __init__(self, conf):
parser = Markdown(conf.getlist("options"))
sanitizer = Sanitizer(
conf.getlist("allowed-elements"),
conf.getlist("allowed-attributes"))
self._render = lambda text: sanitize(sanitizer, parser(text))
def render(self, text):
return self._render(text)

View File

@ -163,7 +163,7 @@ class API(object):
value=self.isso.sign([rv["id"], sha1(rv["text"])]), value=self.isso.sign([rv["id"], sha1(rv["text"])]),
max_age=self.conf.getint('max-age')) max_age=self.conf.getint('max-age'))
rv["text"] = html.markdown(rv["text"]) rv["text"] = self.isso.render(rv["text"])
rv["hash"] = pbkdf2(rv['email'] or rv['remote_addr'], self.isso.salt, 1000, 6).decode("utf-8") rv["hash"] = pbkdf2(rv['email'] or rv['remote_addr'], self.isso.salt, 1000, 6).decode("utf-8")
self.cache.set('hash', (rv['email'] or rv['remote_addr']).encode('utf-8'), rv['hash']) self.cache.set('hash', (rv['email'] or rv['remote_addr']).encode('utf-8'), rv['hash'])
@ -189,7 +189,7 @@ class API(object):
rv.pop(key) rv.pop(key)
if request.args.get('plain', '0') == '0': if request.args.get('plain', '0') == '0':
rv['text'] = html.markdown(rv['text']) rv['text'] = self.isso.render(rv['text'])
return JSON(rv, 200) return JSON(rv, 200)
@ -230,7 +230,7 @@ class API(object):
value=self.isso.sign([rv["id"], sha1(rv["text"])]), value=self.isso.sign([rv["id"], sha1(rv["text"])]),
max_age=self.conf.getint('max-age')) max_age=self.conf.getint('max-age'))
rv["text"] = html.markdown(rv["text"]) rv["text"] = self.isso.render(rv["text"])
resp = JSON(rv, 200) resp = JSON(rv, 200)
resp.headers.add("Set-Cookie", cookie(str(rv["id"]))) resp.headers.add("Set-Cookie", cookie(str(rv["id"])))
@ -336,7 +336,7 @@ class API(object):
if request.args.get('plain', '0') == '0': if request.args.get('plain', '0') == '0':
for item in rv: for item in rv:
item['text'] = html.markdown(item['text']) item['text'] = self.isso.render(item['text'])
return JSON(rv, 200) return JSON(rv, 200)

60
specs/test_html.py Normal file
View File

@ -0,0 +1,60 @@
try:
import unittest2 as unittest
except ImportError:
import unittest
from isso.core import Config
from isso.utils import html
class TestHTML(unittest.TestCase):
def test_markdown(self):
convert = html.Markdown(extensions=())
examples = [
("*Ohai!*", "<p><em>Ohai!</em></p>"),
("<em>Hi</em>", "<p><em>Hi</em></p>"),
("http://example.org/", '<p>http://example.org/</p>')]
for (input, expected) in examples:
self.assertEqual(convert(input), expected)
def test_markdown_extensions(self):
convert = html.Markdown(extensions=("strikethrough", "superscript"))
examples = [
("~~strike~~ through", "<p><del>strike</del> through</p>"),
("sup^(script)", "<p>sup<sup>script</sup></p>")]
for (input, expected) in examples:
self.assertEqual(convert(input), expected)
@unittest.skipIf(html.html5lib_version == "0.95", "backport")
def test_sanitizer(self):
sanitizer = html.Sanitizer(elements=[], attributes=[])
examples = [
('Look: <img src="..." />', 'Look: '),
('<a href="http://example.org/">Ha</a>', '<a href="http://example.org/">Ha</a>'),
('<a href="sms:+1234567890">Ha</a>', '<a>Ha</a>'),
('<p style="visibility: hidden;">Test</p>', '<p>Test</p>'),
('<script>alert("Onoe")</script>', 'alert("Onoe")')]
for (input, expected) in examples:
self.assertEqual(html.sanitize(sanitizer, input), expected)
@unittest.skipIf(html.html5lib_version == "0.95", "backport")
def test_sanitizer_extensions(self):
sanitizer = html.Sanitizer(elements=["img"], attributes=["src"])
examples = [
('<img src="cat.gif" />', '<img src="cat.gif">'),
('<script src="doge.js"></script>', '')]
for (input, expected) in examples:
self.assertEqual(html.sanitize(sanitizer, input), expected)
def test_render(self):
conf = Config.load(None).section("markup")
renderer = html.Markup(conf).render
self.assertEqual(renderer("http://example.org/ and sms:+1234567890"),
'<p><a href="http://example.org/">http://example.org/</a> and sms:+1234567890</p>')