Merge branch 'feature/configurable-markdown', closes #62

This commit is contained in:
Martin Zimmermann 2014-02-18 17:36:09 +01:00
commit 9272e7390f
9 changed files with 187 additions and 44 deletions

View File

@ -350,6 +350,10 @@ main {
margin-left: 1.2em;
}
dl {
margin-bottom: 0.4em;
}
.admonition {
p + p {

View File

@ -221,6 +221,37 @@ reply-to-self
Do not forget to configure the client.
Markup
------
Customize markup and sanitized HTML. Currently, only Markdown (via Misaka) is
supported, but new languages are relatively easy to add.
.. code-block:: ini
[markup]
options = strikethrough, superscript, autolink
allowed-elements =
allowed-attributes =
options
`Misaka-specific Markdown extensions <http://misaka.61924.nl/api/>`_, all
flags starting with `EXT_` can be used there, separated by comma.
allowed-elements
Additional HTML tags to allow in the generated output, comma-separated. By
default, only *a*, *blockquote*, *br*, *code*, *del*, *em*, *h1*, *h2*,
*h3*, *h4*, *h5*, *h6*, *hr*, *ins*, *li*, *ol*, *p*, *pre*, *strong*,
*table*, *tbody*, *td*, *th*, *thead* and *ul* are allowed.
allowed-attributes
Additional HTML attributes (independent from elements) to allow in the
generated output, comma-separated. By default, only *align* and *href* are
allowed.
To allow images in comments, you just need to add ``allowed-elements = img`` and
``allowed-attributes = src``.
Appendum
--------

View File

@ -110,3 +110,22 @@ direct-reply = 3
# comment. After the editing timeframe is gone, commenters can reply to their
# own comments anyways. Do not forget to configure the client.
reply-to-self = false
[markup]
# Customize markup and sanitized HTML. Currently, only Markdown (via Misaka) is
# supported, but new languages are relatively easy to add.
# Misaka-specific Markdown extensions, all flags starting with EXT_ can be used
# there, separated by comma.
options = strikethrough, superscript, autolink
# Additional HTML tags to allow in the generated output, comma-separated. By
# default, only a, blockquote, br, code, del, em, h1, h2, h3, h4, h5, h6, hr,
# ins, li, ol, p, pre, strong, table, tbody, td, th, thead and ul are allowed.
allowed-elements =
# Additional HTML attributes (independent from elements) to allow in the
# generated output, comma-separated. By default, only align and href are
# allowed.
allowed-attributes =

View File

@ -64,7 +64,7 @@ local_manager = LocalManager([local])
from isso import db, migrate, wsgi, ext, views
from isso.core import ThreadedMixin, ProcessMixin, uWSGIMixin, Config
from isso.utils import parse, http, JSONRequest, origin
from isso.utils import parse, http, JSONRequest, origin, html
from isso.views import comments
from isso.ext.notifications import Stdout, SMTP
@ -86,6 +86,7 @@ class Isso(object):
self.conf = conf
self.db = db.SQLite3(conf.get('general', 'dbpath'), conf)
self.signer = URLSafeTimedSerializer(conf.get('general', 'session-key'))
self.markup = html.Markup(conf.section('markup'))
super(Isso, self).__init__(conf)
@ -102,6 +103,9 @@ class Isso(object):
views.Info(self)
comments.API(self)
def render(self, text):
return self.markup.render(text)
def sign(self, obj):
return self.signer.dumps(obj)

View File

@ -6,6 +6,7 @@ PY2K = sys.version_info[0] == 2
if not PY2K:
map, zip, filter = map, zip, filter
from functools import reduce
text_type = str
string_types = (str, )
@ -15,6 +16,7 @@ else:
from itertools import imap, izip, ifilter
map, zip, filter = imap, izip, ifilter
reduce = reduce
text_type = unicode
string_types = (str, unicode)

View File

@ -44,6 +44,9 @@ class Section:
def getint(self, key):
return self.conf.getint(self.section, key)
def getlist(self, key):
return self.conf.getlist(self.section, key)
def getiter(self, key):
return self.conf.getiter(self.section, key)
@ -62,6 +65,7 @@ class IssoParser(ConfigParser):
... [foo]
... bar = 1h
... baz = 12
... spam = a, b, cdef
... bla =
... spam
... ham
@ -71,6 +75,8 @@ class IssoParser(ConfigParser):
3600
>>> parser.getint("foo", "baz")
12
>>> parser.getlist("foo", "spam") # doctest: +IGNORE_UNICODE
['a', 'b', 'cdef']
>>> list(parser.getiter("foo", "bla")) # doctest: +IGNORE_UNICODE
['spam', 'ham']
>>> list(parser.getiter("foo", "asd")) # doctest: +IGNORE_UNICODE
@ -92,6 +98,9 @@ class IssoParser(ConfigParser):
except AttributeError:
return int(IssoParser._total_seconds(delta))
def getlist(self, section, key):
return list(map(str.strip, self.get(section, key).split(',')))
def getiter(self, section, key):
for item in map(str.strip, self.get(section, key).split('\n')):
if item:
@ -123,7 +132,11 @@ class Config:
"enabled = true",
"ratelimit = 2",
"direct-reply = 3",
"reply-to-self = false"
"reply-to-self = false",
"[markup]",
"options = strikethrough, superscript, autolink",
"allowed-elements = ",
"allowed-attributes = "
]
@classmethod

View File

@ -1,9 +1,12 @@
# -*- encoding: utf-8 -*-
import pkg_resources
import operator
from isso.compat import reduce
import html5lib
setattr(html5lib, "version", pkg_resources.get_distribution("html5lib").version)
html5lib_version = pkg_resources.get_distribution("html5lib").version
from html5lib.sanitizer import HTMLSanitizer
from html5lib.serializer import HTMLSerializer
@ -12,59 +15,66 @@ from html5lib.treewalkers import getTreeWalker
import misaka
class MarkdownSanitizer(HTMLSanitizer):
def Sanitizer(elements, attributes):
# attributes found in Sundown's HTML serializer [1] except for <img> tag,
# because images are not generated anyways.
#
# [1] https://github.com/vmg/sundown/blob/master/html/html.c
allowed_elements = ["a", "p", "hr", "br", "ol", "ul", "li",
"pre", "code", "blockquote",
"del", "ins", "strong", "em",
"h1", "h2", "h3", "h4", "h5", "h6",
"table", "thead", "tbody", "th", "td"]
class Inner(HTMLSanitizer):
# href for <a> and align for <table>
allowed_attributes = ["align", "href"]
# attributes found in Sundown's HTML serializer [1] except for <img> tag,
# because images are not generated anyways.
#
# [1] https://github.com/vmg/sundown/blob/master/html/html.c
allowed_elements = ["a", "p", "hr", "br", "ol", "ul", "li",
"pre", "code", "blockquote",
"del", "ins", "strong", "em",
"h1", "h2", "h3", "h4", "h5", "h6",
"table", "thead", "tbody", "th", "td"] + elements
# remove disallowed tokens from the output
def disallowed_token(self, token, token_type):
return None
# href for <a> and align for <table>
allowed_attributes = ["align", "href"] + attributes
# remove disallowed tokens from the output
def disallowed_token(self, token, token_type):
return None
return Inner
def sanitize(document):
def sanitize(tokenizer, document):
parser = html5lib.HTMLParser(tokenizer=MarkdownSanitizer)
parser = html5lib.HTMLParser(tokenizer=tokenizer)
domtree = parser.parseFragment(document)
builder = "simpletree" if html5lib.version == "0.95" else "etree"
builder = "simpletree" if html5lib_version == "0.95" else "etree"
stream = html5lib.treewalkers.getTreeWalker(builder)(domtree)
serializer = HTMLSerializer(quote_attr_values=True, omit_optional_tags=False)
return serializer.render(stream)
def markdown(text):
"""Convert Markdown to (safe) HTML.
def Markdown(extensions=("strikethrough", "superscript", "autolink")):
>>> markdown("*Ohai!*") # doctest: +IGNORE_UNICODE
'<p><em>Ohai!</em></p>'
>>> markdown("<em>Hi</em>") # doctest: +IGNORE_UNICODE
'<p><em>Hi</em></p>'
>>> markdown("<script>alert('Onoe')</script>") # doctest: +IGNORE_UNICODE
"<p>alert('Onoe')</p>"
>>> markdown("http://example.org/ and sms:+1234567890") # doctest: +IGNORE_UNICODE
'<p><a href="http://example.org/">http://example.org/</a> and sms:+1234567890</p>'
"""
flags = reduce(operator.xor, map(
lambda ext: getattr(misaka, 'EXT_' + ext.upper()), extensions), 0)
# ~~strike through~~, sub script: 2^(nd) and http://example.org/ auto-link
exts = misaka.EXT_STRIKETHROUGH | misaka.EXT_SUPERSCRIPT | misaka.EXT_AUTOLINK
def inner(text):
rv = misaka.html(text, extensions=flags).rstrip("\n")
if not rv.endswith("<p>") and not rv.endswith("</p>"):
return "<p>" + rv + "</p>"
return rv
# remove HTML tags, skip <img> (for now) and only render "safe" protocols
html = misaka.HTML_SKIP_STYLE | misaka.HTML_SKIP_IMAGES | misaka.HTML_SAFELINK
return inner
rv = misaka.html(text, extensions=exts, render_flags=html).rstrip("\n")
if not rv.startswith("<p>") and not rv.endswith("</p>"):
rv = "<p>" + rv + "</p>"
return sanitize(rv)
class Markup(object):
def __init__(self, conf):
parser = Markdown(conf.getlist("options"))
sanitizer = Sanitizer(
conf.getlist("allowed-elements"),
conf.getlist("allowed-attributes"))
self._render = lambda text: sanitize(sanitizer, parser(text))
def render(self, text):
return self._render(text)

View File

@ -163,7 +163,7 @@ class API(object):
value=self.isso.sign([rv["id"], sha1(rv["text"])]),
max_age=self.conf.getint('max-age'))
rv["text"] = html.markdown(rv["text"])
rv["text"] = self.isso.render(rv["text"])
rv["hash"] = pbkdf2(rv['email'] or rv['remote_addr'], self.isso.salt, 1000, 6).decode("utf-8")
self.cache.set('hash', (rv['email'] or rv['remote_addr']).encode('utf-8'), rv['hash'])
@ -189,7 +189,7 @@ class API(object):
rv.pop(key)
if request.args.get('plain', '0') == '0':
rv['text'] = html.markdown(rv['text'])
rv['text'] = self.isso.render(rv['text'])
return JSON(rv, 200)
@ -230,7 +230,7 @@ class API(object):
value=self.isso.sign([rv["id"], sha1(rv["text"])]),
max_age=self.conf.getint('max-age'))
rv["text"] = html.markdown(rv["text"])
rv["text"] = self.isso.render(rv["text"])
resp = JSON(rv, 200)
resp.headers.add("Set-Cookie", cookie(str(rv["id"])))
@ -336,7 +336,7 @@ class API(object):
if request.args.get('plain', '0') == '0':
for item in rv:
item['text'] = html.markdown(item['text'])
item['text'] = self.isso.render(item['text'])
return JSON(rv, 200)

60
specs/test_html.py Normal file
View File

@ -0,0 +1,60 @@
try:
import unittest2 as unittest
except ImportError:
import unittest
from isso.core import Config
from isso.utils import html
class TestHTML(unittest.TestCase):
def test_markdown(self):
convert = html.Markdown(extensions=())
examples = [
("*Ohai!*", "<p><em>Ohai!</em></p>"),
("<em>Hi</em>", "<p><em>Hi</em></p>"),
("http://example.org/", '<p>http://example.org/</p>')]
for (input, expected) in examples:
self.assertEqual(convert(input), expected)
def test_markdown_extensions(self):
convert = html.Markdown(extensions=("strikethrough", "superscript"))
examples = [
("~~strike~~ through", "<p><del>strike</del> through</p>"),
("sup^(script)", "<p>sup<sup>script</sup></p>")]
for (input, expected) in examples:
self.assertEqual(convert(input), expected)
@unittest.skipIf(html.html5lib_version == "0.95", "backport")
def test_sanitizer(self):
sanitizer = html.Sanitizer(elements=[], attributes=[])
examples = [
('Look: <img src="..." />', 'Look: '),
('<a href="http://example.org/">Ha</a>', '<a href="http://example.org/">Ha</a>'),
('<a href="sms:+1234567890">Ha</a>', '<a>Ha</a>'),
('<p style="visibility: hidden;">Test</p>', '<p>Test</p>'),
('<script>alert("Onoe")</script>', 'alert("Onoe")')]
for (input, expected) in examples:
self.assertEqual(html.sanitize(sanitizer, input), expected)
@unittest.skipIf(html.html5lib_version == "0.95", "backport")
def test_sanitizer_extensions(self):
sanitizer = html.Sanitizer(elements=["img"], attributes=["src"])
examples = [
('<img src="cat.gif" />', '<img src="cat.gif">'),
('<script src="doge.js"></script>', '')]
for (input, expected) in examples:
self.assertEqual(html.sanitize(sanitizer, input), expected)
def test_render(self):
conf = Config.load(None).section("markup")
renderer = html.Markup(conf).render
self.assertEqual(renderer("http://example.org/ and sms:+1234567890"),
'<p><a href="http://example.org/">http://example.org/</a> and sms:+1234567890</p>')