Merge branch 'feature/configurable-markdown', closes #62
This commit is contained in:
commit
9272e7390f
4
docs/_static/css/site.scss
vendored
4
docs/_static/css/site.scss
vendored
@ -350,6 +350,10 @@ main {
|
|||||||
margin-left: 1.2em;
|
margin-left: 1.2em;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dl {
|
||||||
|
margin-bottom: 0.4em;
|
||||||
|
}
|
||||||
|
|
||||||
.admonition {
|
.admonition {
|
||||||
|
|
||||||
p + p {
|
p + p {
|
||||||
|
@ -221,6 +221,37 @@ reply-to-self
|
|||||||
|
|
||||||
Do not forget to configure the client.
|
Do not forget to configure the client.
|
||||||
|
|
||||||
|
Markup
|
||||||
|
------
|
||||||
|
|
||||||
|
Customize markup and sanitized HTML. Currently, only Markdown (via Misaka) is
|
||||||
|
supported, but new languages are relatively easy to add.
|
||||||
|
|
||||||
|
.. code-block:: ini
|
||||||
|
|
||||||
|
[markup]
|
||||||
|
options = strikethrough, superscript, autolink
|
||||||
|
allowed-elements =
|
||||||
|
allowed-attributes =
|
||||||
|
|
||||||
|
options
|
||||||
|
`Misaka-specific Markdown extensions <http://misaka.61924.nl/api/>`_, all
|
||||||
|
flags starting with `EXT_` can be used there, separated by comma.
|
||||||
|
|
||||||
|
allowed-elements
|
||||||
|
Additional HTML tags to allow in the generated output, comma-separated. By
|
||||||
|
default, only *a*, *blockquote*, *br*, *code*, *del*, *em*, *h1*, *h2*,
|
||||||
|
*h3*, *h4*, *h5*, *h6*, *hr*, *ins*, *li*, *ol*, *p*, *pre*, *strong*,
|
||||||
|
*table*, *tbody*, *td*, *th*, *thead* and *ul* are allowed.
|
||||||
|
|
||||||
|
allowed-attributes
|
||||||
|
Additional HTML attributes (independent from elements) to allow in the
|
||||||
|
generated output, comma-separated. By default, only *align* and *href* are
|
||||||
|
allowed.
|
||||||
|
|
||||||
|
To allow images in comments, you just need to add ``allowed-elements = img`` and
|
||||||
|
``allowed-attributes = src``.
|
||||||
|
|
||||||
|
|
||||||
Appendum
|
Appendum
|
||||||
--------
|
--------
|
||||||
|
@ -110,3 +110,22 @@ direct-reply = 3
|
|||||||
# comment. After the editing timeframe is gone, commenters can reply to their
|
# comment. After the editing timeframe is gone, commenters can reply to their
|
||||||
# own comments anyways. Do not forget to configure the client.
|
# own comments anyways. Do not forget to configure the client.
|
||||||
reply-to-self = false
|
reply-to-self = false
|
||||||
|
|
||||||
|
|
||||||
|
[markup]
|
||||||
|
# Customize markup and sanitized HTML. Currently, only Markdown (via Misaka) is
|
||||||
|
# supported, but new languages are relatively easy to add.
|
||||||
|
|
||||||
|
# Misaka-specific Markdown extensions, all flags starting with EXT_ can be used
|
||||||
|
# there, separated by comma.
|
||||||
|
options = strikethrough, superscript, autolink
|
||||||
|
|
||||||
|
# Additional HTML tags to allow in the generated output, comma-separated. By
|
||||||
|
# default, only a, blockquote, br, code, del, em, h1, h2, h3, h4, h5, h6, hr,
|
||||||
|
# ins, li, ol, p, pre, strong, table, tbody, td, th, thead and ul are allowed.
|
||||||
|
allowed-elements =
|
||||||
|
|
||||||
|
# Additional HTML attributes (independent from elements) to allow in the
|
||||||
|
# generated output, comma-separated. By default, only align and href are
|
||||||
|
# allowed.
|
||||||
|
allowed-attributes =
|
||||||
|
@ -64,7 +64,7 @@ local_manager = LocalManager([local])
|
|||||||
|
|
||||||
from isso import db, migrate, wsgi, ext, views
|
from isso import db, migrate, wsgi, ext, views
|
||||||
from isso.core import ThreadedMixin, ProcessMixin, uWSGIMixin, Config
|
from isso.core import ThreadedMixin, ProcessMixin, uWSGIMixin, Config
|
||||||
from isso.utils import parse, http, JSONRequest, origin
|
from isso.utils import parse, http, JSONRequest, origin, html
|
||||||
from isso.views import comments
|
from isso.views import comments
|
||||||
|
|
||||||
from isso.ext.notifications import Stdout, SMTP
|
from isso.ext.notifications import Stdout, SMTP
|
||||||
@ -86,6 +86,7 @@ class Isso(object):
|
|||||||
self.conf = conf
|
self.conf = conf
|
||||||
self.db = db.SQLite3(conf.get('general', 'dbpath'), conf)
|
self.db = db.SQLite3(conf.get('general', 'dbpath'), conf)
|
||||||
self.signer = URLSafeTimedSerializer(conf.get('general', 'session-key'))
|
self.signer = URLSafeTimedSerializer(conf.get('general', 'session-key'))
|
||||||
|
self.markup = html.Markup(conf.section('markup'))
|
||||||
|
|
||||||
super(Isso, self).__init__(conf)
|
super(Isso, self).__init__(conf)
|
||||||
|
|
||||||
@ -102,6 +103,9 @@ class Isso(object):
|
|||||||
views.Info(self)
|
views.Info(self)
|
||||||
comments.API(self)
|
comments.API(self)
|
||||||
|
|
||||||
|
def render(self, text):
|
||||||
|
return self.markup.render(text)
|
||||||
|
|
||||||
def sign(self, obj):
|
def sign(self, obj):
|
||||||
return self.signer.dumps(obj)
|
return self.signer.dumps(obj)
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@ PY2K = sys.version_info[0] == 2
|
|||||||
if not PY2K:
|
if not PY2K:
|
||||||
|
|
||||||
map, zip, filter = map, zip, filter
|
map, zip, filter = map, zip, filter
|
||||||
|
from functools import reduce
|
||||||
|
|
||||||
text_type = str
|
text_type = str
|
||||||
string_types = (str, )
|
string_types = (str, )
|
||||||
@ -15,6 +16,7 @@ else:
|
|||||||
|
|
||||||
from itertools import imap, izip, ifilter
|
from itertools import imap, izip, ifilter
|
||||||
map, zip, filter = imap, izip, ifilter
|
map, zip, filter = imap, izip, ifilter
|
||||||
|
reduce = reduce
|
||||||
|
|
||||||
text_type = unicode
|
text_type = unicode
|
||||||
string_types = (str, unicode)
|
string_types = (str, unicode)
|
||||||
|
15
isso/core.py
15
isso/core.py
@ -44,6 +44,9 @@ class Section:
|
|||||||
def getint(self, key):
|
def getint(self, key):
|
||||||
return self.conf.getint(self.section, key)
|
return self.conf.getint(self.section, key)
|
||||||
|
|
||||||
|
def getlist(self, key):
|
||||||
|
return self.conf.getlist(self.section, key)
|
||||||
|
|
||||||
def getiter(self, key):
|
def getiter(self, key):
|
||||||
return self.conf.getiter(self.section, key)
|
return self.conf.getiter(self.section, key)
|
||||||
|
|
||||||
@ -62,6 +65,7 @@ class IssoParser(ConfigParser):
|
|||||||
... [foo]
|
... [foo]
|
||||||
... bar = 1h
|
... bar = 1h
|
||||||
... baz = 12
|
... baz = 12
|
||||||
|
... spam = a, b, cdef
|
||||||
... bla =
|
... bla =
|
||||||
... spam
|
... spam
|
||||||
... ham
|
... ham
|
||||||
@ -71,6 +75,8 @@ class IssoParser(ConfigParser):
|
|||||||
3600
|
3600
|
||||||
>>> parser.getint("foo", "baz")
|
>>> parser.getint("foo", "baz")
|
||||||
12
|
12
|
||||||
|
>>> parser.getlist("foo", "spam") # doctest: +IGNORE_UNICODE
|
||||||
|
['a', 'b', 'cdef']
|
||||||
>>> list(parser.getiter("foo", "bla")) # doctest: +IGNORE_UNICODE
|
>>> list(parser.getiter("foo", "bla")) # doctest: +IGNORE_UNICODE
|
||||||
['spam', 'ham']
|
['spam', 'ham']
|
||||||
>>> list(parser.getiter("foo", "asd")) # doctest: +IGNORE_UNICODE
|
>>> list(parser.getiter("foo", "asd")) # doctest: +IGNORE_UNICODE
|
||||||
@ -92,6 +98,9 @@ class IssoParser(ConfigParser):
|
|||||||
except AttributeError:
|
except AttributeError:
|
||||||
return int(IssoParser._total_seconds(delta))
|
return int(IssoParser._total_seconds(delta))
|
||||||
|
|
||||||
|
def getlist(self, section, key):
|
||||||
|
return list(map(str.strip, self.get(section, key).split(',')))
|
||||||
|
|
||||||
def getiter(self, section, key):
|
def getiter(self, section, key):
|
||||||
for item in map(str.strip, self.get(section, key).split('\n')):
|
for item in map(str.strip, self.get(section, key).split('\n')):
|
||||||
if item:
|
if item:
|
||||||
@ -123,7 +132,11 @@ class Config:
|
|||||||
"enabled = true",
|
"enabled = true",
|
||||||
"ratelimit = 2",
|
"ratelimit = 2",
|
||||||
"direct-reply = 3",
|
"direct-reply = 3",
|
||||||
"reply-to-self = false"
|
"reply-to-self = false",
|
||||||
|
"[markup]",
|
||||||
|
"options = strikethrough, superscript, autolink",
|
||||||
|
"allowed-elements = ",
|
||||||
|
"allowed-attributes = "
|
||||||
]
|
]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -1,9 +1,12 @@
|
|||||||
# -*- encoding: utf-8 -*-
|
# -*- encoding: utf-8 -*-
|
||||||
|
|
||||||
import pkg_resources
|
import pkg_resources
|
||||||
|
import operator
|
||||||
|
|
||||||
|
from isso.compat import reduce
|
||||||
|
|
||||||
import html5lib
|
import html5lib
|
||||||
setattr(html5lib, "version", pkg_resources.get_distribution("html5lib").version)
|
html5lib_version = pkg_resources.get_distribution("html5lib").version
|
||||||
|
|
||||||
from html5lib.sanitizer import HTMLSanitizer
|
from html5lib.sanitizer import HTMLSanitizer
|
||||||
from html5lib.serializer import HTMLSerializer
|
from html5lib.serializer import HTMLSerializer
|
||||||
@ -12,7 +15,9 @@ from html5lib.treewalkers import getTreeWalker
|
|||||||
import misaka
|
import misaka
|
||||||
|
|
||||||
|
|
||||||
class MarkdownSanitizer(HTMLSanitizer):
|
def Sanitizer(elements, attributes):
|
||||||
|
|
||||||
|
class Inner(HTMLSanitizer):
|
||||||
|
|
||||||
# attributes found in Sundown's HTML serializer [1] except for <img> tag,
|
# attributes found in Sundown's HTML serializer [1] except for <img> tag,
|
||||||
# because images are not generated anyways.
|
# because images are not generated anyways.
|
||||||
@ -22,49 +27,54 @@ class MarkdownSanitizer(HTMLSanitizer):
|
|||||||
"pre", "code", "blockquote",
|
"pre", "code", "blockquote",
|
||||||
"del", "ins", "strong", "em",
|
"del", "ins", "strong", "em",
|
||||||
"h1", "h2", "h3", "h4", "h5", "h6",
|
"h1", "h2", "h3", "h4", "h5", "h6",
|
||||||
"table", "thead", "tbody", "th", "td"]
|
"table", "thead", "tbody", "th", "td"] + elements
|
||||||
|
|
||||||
# href for <a> and align for <table>
|
# href for <a> and align for <table>
|
||||||
allowed_attributes = ["align", "href"]
|
allowed_attributes = ["align", "href"] + attributes
|
||||||
|
|
||||||
# remove disallowed tokens from the output
|
# remove disallowed tokens from the output
|
||||||
def disallowed_token(self, token, token_type):
|
def disallowed_token(self, token, token_type):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
return Inner
|
||||||
|
|
||||||
def sanitize(document):
|
|
||||||
|
|
||||||
parser = html5lib.HTMLParser(tokenizer=MarkdownSanitizer)
|
def sanitize(tokenizer, document):
|
||||||
|
|
||||||
|
parser = html5lib.HTMLParser(tokenizer=tokenizer)
|
||||||
domtree = parser.parseFragment(document)
|
domtree = parser.parseFragment(document)
|
||||||
|
|
||||||
builder = "simpletree" if html5lib.version == "0.95" else "etree"
|
builder = "simpletree" if html5lib_version == "0.95" else "etree"
|
||||||
stream = html5lib.treewalkers.getTreeWalker(builder)(domtree)
|
stream = html5lib.treewalkers.getTreeWalker(builder)(domtree)
|
||||||
serializer = HTMLSerializer(quote_attr_values=True, omit_optional_tags=False)
|
serializer = HTMLSerializer(quote_attr_values=True, omit_optional_tags=False)
|
||||||
|
|
||||||
return serializer.render(stream)
|
return serializer.render(stream)
|
||||||
|
|
||||||
|
|
||||||
def markdown(text):
|
def Markdown(extensions=("strikethrough", "superscript", "autolink")):
|
||||||
"""Convert Markdown to (safe) HTML.
|
|
||||||
|
|
||||||
>>> markdown("*Ohai!*") # doctest: +IGNORE_UNICODE
|
flags = reduce(operator.xor, map(
|
||||||
'<p><em>Ohai!</em></p>'
|
lambda ext: getattr(misaka, 'EXT_' + ext.upper()), extensions), 0)
|
||||||
>>> markdown("<em>Hi</em>") # doctest: +IGNORE_UNICODE
|
|
||||||
'<p><em>Hi</em></p>'
|
|
||||||
>>> markdown("<script>alert('Onoe')</script>") # doctest: +IGNORE_UNICODE
|
|
||||||
"<p>alert('Onoe')</p>"
|
|
||||||
>>> markdown("http://example.org/ and sms:+1234567890") # doctest: +IGNORE_UNICODE
|
|
||||||
'<p><a href="http://example.org/">http://example.org/</a> and sms:+1234567890</p>'
|
|
||||||
"""
|
|
||||||
|
|
||||||
# ~~strike through~~, sub script: 2^(nd) and http://example.org/ auto-link
|
def inner(text):
|
||||||
exts = misaka.EXT_STRIKETHROUGH | misaka.EXT_SUPERSCRIPT | misaka.EXT_AUTOLINK
|
rv = misaka.html(text, extensions=flags).rstrip("\n")
|
||||||
|
if not rv.endswith("<p>") and not rv.endswith("</p>"):
|
||||||
|
return "<p>" + rv + "</p>"
|
||||||
|
return rv
|
||||||
|
|
||||||
# remove HTML tags, skip <img> (for now) and only render "safe" protocols
|
return inner
|
||||||
html = misaka.HTML_SKIP_STYLE | misaka.HTML_SKIP_IMAGES | misaka.HTML_SAFELINK
|
|
||||||
|
|
||||||
rv = misaka.html(text, extensions=exts, render_flags=html).rstrip("\n")
|
|
||||||
if not rv.startswith("<p>") and not rv.endswith("</p>"):
|
|
||||||
rv = "<p>" + rv + "</p>"
|
|
||||||
|
|
||||||
return sanitize(rv)
|
class Markup(object):
|
||||||
|
|
||||||
|
def __init__(self, conf):
|
||||||
|
|
||||||
|
parser = Markdown(conf.getlist("options"))
|
||||||
|
sanitizer = Sanitizer(
|
||||||
|
conf.getlist("allowed-elements"),
|
||||||
|
conf.getlist("allowed-attributes"))
|
||||||
|
|
||||||
|
self._render = lambda text: sanitize(sanitizer, parser(text))
|
||||||
|
|
||||||
|
def render(self, text):
|
||||||
|
return self._render(text)
|
||||||
|
@ -163,7 +163,7 @@ class API(object):
|
|||||||
value=self.isso.sign([rv["id"], sha1(rv["text"])]),
|
value=self.isso.sign([rv["id"], sha1(rv["text"])]),
|
||||||
max_age=self.conf.getint('max-age'))
|
max_age=self.conf.getint('max-age'))
|
||||||
|
|
||||||
rv["text"] = html.markdown(rv["text"])
|
rv["text"] = self.isso.render(rv["text"])
|
||||||
rv["hash"] = pbkdf2(rv['email'] or rv['remote_addr'], self.isso.salt, 1000, 6).decode("utf-8")
|
rv["hash"] = pbkdf2(rv['email'] or rv['remote_addr'], self.isso.salt, 1000, 6).decode("utf-8")
|
||||||
|
|
||||||
self.cache.set('hash', (rv['email'] or rv['remote_addr']).encode('utf-8'), rv['hash'])
|
self.cache.set('hash', (rv['email'] or rv['remote_addr']).encode('utf-8'), rv['hash'])
|
||||||
@ -189,7 +189,7 @@ class API(object):
|
|||||||
rv.pop(key)
|
rv.pop(key)
|
||||||
|
|
||||||
if request.args.get('plain', '0') == '0':
|
if request.args.get('plain', '0') == '0':
|
||||||
rv['text'] = html.markdown(rv['text'])
|
rv['text'] = self.isso.render(rv['text'])
|
||||||
|
|
||||||
return JSON(rv, 200)
|
return JSON(rv, 200)
|
||||||
|
|
||||||
@ -230,7 +230,7 @@ class API(object):
|
|||||||
value=self.isso.sign([rv["id"], sha1(rv["text"])]),
|
value=self.isso.sign([rv["id"], sha1(rv["text"])]),
|
||||||
max_age=self.conf.getint('max-age'))
|
max_age=self.conf.getint('max-age'))
|
||||||
|
|
||||||
rv["text"] = html.markdown(rv["text"])
|
rv["text"] = self.isso.render(rv["text"])
|
||||||
|
|
||||||
resp = JSON(rv, 200)
|
resp = JSON(rv, 200)
|
||||||
resp.headers.add("Set-Cookie", cookie(str(rv["id"])))
|
resp.headers.add("Set-Cookie", cookie(str(rv["id"])))
|
||||||
@ -336,7 +336,7 @@ class API(object):
|
|||||||
|
|
||||||
if request.args.get('plain', '0') == '0':
|
if request.args.get('plain', '0') == '0':
|
||||||
for item in rv:
|
for item in rv:
|
||||||
item['text'] = html.markdown(item['text'])
|
item['text'] = self.isso.render(item['text'])
|
||||||
|
|
||||||
return JSON(rv, 200)
|
return JSON(rv, 200)
|
||||||
|
|
||||||
|
60
specs/test_html.py
Normal file
60
specs/test_html.py
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
|
||||||
|
try:
|
||||||
|
import unittest2 as unittest
|
||||||
|
except ImportError:
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
|
||||||
|
from isso.core import Config
|
||||||
|
from isso.utils import html
|
||||||
|
|
||||||
|
|
||||||
|
class TestHTML(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_markdown(self):
|
||||||
|
convert = html.Markdown(extensions=())
|
||||||
|
examples = [
|
||||||
|
("*Ohai!*", "<p><em>Ohai!</em></p>"),
|
||||||
|
("<em>Hi</em>", "<p><em>Hi</em></p>"),
|
||||||
|
("http://example.org/", '<p>http://example.org/</p>')]
|
||||||
|
|
||||||
|
for (input, expected) in examples:
|
||||||
|
self.assertEqual(convert(input), expected)
|
||||||
|
|
||||||
|
def test_markdown_extensions(self):
|
||||||
|
convert = html.Markdown(extensions=("strikethrough", "superscript"))
|
||||||
|
examples = [
|
||||||
|
("~~strike~~ through", "<p><del>strike</del> through</p>"),
|
||||||
|
("sup^(script)", "<p>sup<sup>script</sup></p>")]
|
||||||
|
|
||||||
|
for (input, expected) in examples:
|
||||||
|
self.assertEqual(convert(input), expected)
|
||||||
|
|
||||||
|
@unittest.skipIf(html.html5lib_version == "0.95", "backport")
|
||||||
|
def test_sanitizer(self):
|
||||||
|
sanitizer = html.Sanitizer(elements=[], attributes=[])
|
||||||
|
examples = [
|
||||||
|
('Look: <img src="..." />', 'Look: '),
|
||||||
|
('<a href="http://example.org/">Ha</a>', '<a href="http://example.org/">Ha</a>'),
|
||||||
|
('<a href="sms:+1234567890">Ha</a>', '<a>Ha</a>'),
|
||||||
|
('<p style="visibility: hidden;">Test</p>', '<p>Test</p>'),
|
||||||
|
('<script>alert("Onoe")</script>', 'alert("Onoe")')]
|
||||||
|
|
||||||
|
for (input, expected) in examples:
|
||||||
|
self.assertEqual(html.sanitize(sanitizer, input), expected)
|
||||||
|
|
||||||
|
@unittest.skipIf(html.html5lib_version == "0.95", "backport")
|
||||||
|
def test_sanitizer_extensions(self):
|
||||||
|
sanitizer = html.Sanitizer(elements=["img"], attributes=["src"])
|
||||||
|
examples = [
|
||||||
|
('<img src="cat.gif" />', '<img src="cat.gif">'),
|
||||||
|
('<script src="doge.js"></script>', '')]
|
||||||
|
|
||||||
|
for (input, expected) in examples:
|
||||||
|
self.assertEqual(html.sanitize(sanitizer, input), expected)
|
||||||
|
|
||||||
|
def test_render(self):
|
||||||
|
conf = Config.load(None).section("markup")
|
||||||
|
renderer = html.Markup(conf).render
|
||||||
|
self.assertEqual(renderer("http://example.org/ and sms:+1234567890"),
|
||||||
|
'<p><a href="http://example.org/">http://example.org/</a> and sms:+1234567890</p>')
|
Loading…
Reference in New Issue
Block a user