diff --git a/docs/_static/css/site.scss b/docs/_static/css/site.scss
index 99a7b23..9c64a10 100644
--- a/docs/_static/css/site.scss
+++ b/docs/_static/css/site.scss
@@ -350,6 +350,10 @@ main {
margin-left: 1.2em;
}
+ dl {
+ margin-bottom: 0.4em;
+ }
+
.admonition {
p + p {
diff --git a/docs/docs/configuration/server.rst b/docs/docs/configuration/server.rst
index de9ded7..13507b7 100644
--- a/docs/docs/configuration/server.rst
+++ b/docs/docs/configuration/server.rst
@@ -221,6 +221,37 @@ reply-to-self
Do not forget to configure the client.
+Markup
+------
+
+Customize markup and sanitized HTML. Currently, only Markdown (via Misaka) is
+supported, but new languages are relatively easy to add.
+
+.. code-block:: ini
+
+ [markup]
+ options = strikethrough, superscript, autolink
+ allowed-elements =
+ allowed-attributes =
+
+options
+ `Misaka-specific Markdown extensions `_, all
+ flags starting with `EXT_` can be used there, separated by comma.
+
+allowed-elements
+ Additional HTML tags to allow in the generated output, comma-separated. By
+ default, only *a*, *blockquote*, *br*, *code*, *del*, *em*, *h1*, *h2*,
+ *h3*, *h4*, *h5*, *h6*, *hr*, *ins*, *li*, *ol*, *p*, *pre*, *strong*,
+ *table*, *tbody*, *td*, *th*, *thead* and *ul* are allowed.
+
+allowed-attributes
+ Additional HTML attributes (independent from elements) to allow in the
+ generated output, comma-separated. By default, only *align* and *href* are
+ allowed.
+
+To allow images in comments, you just need to add ``allowed-elements = img`` and
+``allowed-attributes = src``.
+
Appendum
--------
diff --git a/docs/isso.example.cfg b/docs/isso.example.cfg
index b17199d..a7bedf0 100644
--- a/docs/isso.example.cfg
+++ b/docs/isso.example.cfg
@@ -110,3 +110,22 @@ direct-reply = 3
# comment. After the editing timeframe is gone, commenters can reply to their
# own comments anyways. Do not forget to configure the client.
reply-to-self = false
+
+
+[markup]
+# Customize markup and sanitized HTML. Currently, only Markdown (via Misaka) is
+# supported, but new languages are relatively easy to add.
+
+# Misaka-specific Markdown extensions, all flags starting with EXT_ can be used
+# there, separated by comma.
+options = strikethrough, superscript, autolink
+
+# Additional HTML tags to allow in the generated output, comma-separated. By
+# default, only a, blockquote, br, code, del, em, h1, h2, h3, h4, h5, h6, hr,
+# ins, li, ol, p, pre, strong, table, tbody, td, th, thead and ul are allowed.
+allowed-elements =
+
+# Additional HTML attributes (independent from elements) to allow in the
+# generated output, comma-separated. By default, only align and href are
+# allowed.
+allowed-attributes =
diff --git a/isso/__init__.py b/isso/__init__.py
index c77b07e..6d48af4 100644
--- a/isso/__init__.py
+++ b/isso/__init__.py
@@ -64,7 +64,7 @@ local_manager = LocalManager([local])
from isso import db, migrate, wsgi, ext, views
from isso.core import ThreadedMixin, ProcessMixin, uWSGIMixin, Config
-from isso.utils import parse, http, JSONRequest, origin
+from isso.utils import parse, http, JSONRequest, origin, html
from isso.views import comments
from isso.ext.notifications import Stdout, SMTP
@@ -86,6 +86,7 @@ class Isso(object):
self.conf = conf
self.db = db.SQLite3(conf.get('general', 'dbpath'), conf)
self.signer = URLSafeTimedSerializer(conf.get('general', 'session-key'))
+ self.markup = html.Markup(conf.section('markup'))
super(Isso, self).__init__(conf)
@@ -102,6 +103,9 @@ class Isso(object):
views.Info(self)
comments.API(self)
+ def render(self, text):
+ return self.markup.render(text)
+
def sign(self, obj):
return self.signer.dumps(obj)
diff --git a/isso/compat.py b/isso/compat.py
index 772d561..ac09826 100644
--- a/isso/compat.py
+++ b/isso/compat.py
@@ -6,6 +6,7 @@ PY2K = sys.version_info[0] == 2
if not PY2K:
map, zip, filter = map, zip, filter
+ from functools import reduce
text_type = str
string_types = (str, )
@@ -15,6 +16,7 @@ else:
from itertools import imap, izip, ifilter
map, zip, filter = imap, izip, ifilter
+ reduce = reduce
text_type = unicode
string_types = (str, unicode)
diff --git a/isso/core.py b/isso/core.py
index 4b1e872..ce2abf2 100644
--- a/isso/core.py
+++ b/isso/core.py
@@ -44,6 +44,9 @@ class Section:
def getint(self, key):
return self.conf.getint(self.section, key)
+ def getlist(self, key):
+ return self.conf.getlist(self.section, key)
+
def getiter(self, key):
return self.conf.getiter(self.section, key)
@@ -62,6 +65,7 @@ class IssoParser(ConfigParser):
... [foo]
... bar = 1h
... baz = 12
+ ... spam = a, b, cdef
... bla =
... spam
... ham
@@ -71,6 +75,8 @@ class IssoParser(ConfigParser):
3600
>>> parser.getint("foo", "baz")
12
+ >>> parser.getlist("foo", "spam") # doctest: +IGNORE_UNICODE
+ ['a', 'b', 'cdef']
>>> list(parser.getiter("foo", "bla")) # doctest: +IGNORE_UNICODE
['spam', 'ham']
>>> list(parser.getiter("foo", "asd")) # doctest: +IGNORE_UNICODE
@@ -92,6 +98,9 @@ class IssoParser(ConfigParser):
except AttributeError:
return int(IssoParser._total_seconds(delta))
+ def getlist(self, section, key):
+ return list(map(str.strip, self.get(section, key).split(',')))
+
def getiter(self, section, key):
for item in map(str.strip, self.get(section, key).split('\n')):
if item:
@@ -123,7 +132,11 @@ class Config:
"enabled = true",
"ratelimit = 2",
"direct-reply = 3",
- "reply-to-self = false"
+ "reply-to-self = false",
+ "[markup]",
+ "options = strikethrough, superscript, autolink",
+ "allowed-elements = ",
+ "allowed-attributes = "
]
@classmethod
diff --git a/isso/utils/html.py b/isso/utils/html.py
index 34a4dc7..5650bfd 100644
--- a/isso/utils/html.py
+++ b/isso/utils/html.py
@@ -1,9 +1,12 @@
# -*- encoding: utf-8 -*-
import pkg_resources
+import operator
+
+from isso.compat import reduce
import html5lib
-setattr(html5lib, "version", pkg_resources.get_distribution("html5lib").version)
+html5lib_version = pkg_resources.get_distribution("html5lib").version
from html5lib.sanitizer import HTMLSanitizer
from html5lib.serializer import HTMLSerializer
@@ -12,59 +15,66 @@ from html5lib.treewalkers import getTreeWalker
import misaka
-class MarkdownSanitizer(HTMLSanitizer):
+def Sanitizer(elements, attributes):
- # attributes found in Sundown's HTML serializer [1] except for tag,
- # because images are not generated anyways.
- #
- # [1] https://github.com/vmg/sundown/blob/master/html/html.c
- allowed_elements = ["a", "p", "hr", "br", "ol", "ul", "li",
- "pre", "code", "blockquote",
- "del", "ins", "strong", "em",
- "h1", "h2", "h3", "h4", "h5", "h6",
- "table", "thead", "tbody", "th", "td"]
+ class Inner(HTMLSanitizer):
- # href for and align for
- allowed_attributes = ["align", "href"]
+ # attributes found in Sundown's HTML serializer [1] except for tag,
+ # because images are not generated anyways.
+ #
+ # [1] https://github.com/vmg/sundown/blob/master/html/html.c
+ allowed_elements = ["a", "p", "hr", "br", "ol", "ul", "li",
+ "pre", "code", "blockquote",
+ "del", "ins", "strong", "em",
+ "h1", "h2", "h3", "h4", "h5", "h6",
+ "table", "thead", "tbody", "th", "td"] + elements
- # remove disallowed tokens from the output
- def disallowed_token(self, token, token_type):
- return None
+ # href for and align for
+ allowed_attributes = ["align", "href"] + attributes
+
+ # remove disallowed tokens from the output
+ def disallowed_token(self, token, token_type):
+ return None
+
+ return Inner
-def sanitize(document):
+def sanitize(tokenizer, document):
- parser = html5lib.HTMLParser(tokenizer=MarkdownSanitizer)
+ parser = html5lib.HTMLParser(tokenizer=tokenizer)
domtree = parser.parseFragment(document)
- builder = "simpletree" if html5lib.version == "0.95" else "etree"
+ builder = "simpletree" if html5lib_version == "0.95" else "etree"
stream = html5lib.treewalkers.getTreeWalker(builder)(domtree)
serializer = HTMLSerializer(quote_attr_values=True, omit_optional_tags=False)
return serializer.render(stream)
-def markdown(text):
- """Convert Markdown to (safe) HTML.
+def Markdown(extensions=("strikethrough", "superscript", "autolink")):
- >>> markdown("*Ohai!*") # doctest: +IGNORE_UNICODE
- 'Ohai!
'
- >>> markdown("Hi") # doctest: +IGNORE_UNICODE
- 'Hi
'
- >>> markdown("") # doctest: +IGNORE_UNICODE
- "alert('Onoe')
"
- >>> markdown("http://example.org/ and sms:+1234567890") # doctest: +IGNORE_UNICODE
- 'http://example.org/ and sms:+1234567890
'
- """
+ flags = reduce(operator.xor, map(
+ lambda ext: getattr(misaka, 'EXT_' + ext.upper()), extensions), 0)
- # ~~strike through~~, sub script: 2^(nd) and http://example.org/ auto-link
- exts = misaka.EXT_STRIKETHROUGH | misaka.EXT_SUPERSCRIPT | misaka.EXT_AUTOLINK
+ def inner(text):
+ rv = misaka.html(text, extensions=flags).rstrip("\n")
+ if not rv.endswith("") and not rv.endswith("
"):
+ return "" + rv + "
"
+ return rv
- # remove HTML tags, skip (for now) and only render "safe" protocols
- html = misaka.HTML_SKIP_STYLE | misaka.HTML_SKIP_IMAGES | misaka.HTML_SAFELINK
+ return inner
- rv = misaka.html(text, extensions=exts, render_flags=html).rstrip("\n")
- if not rv.startswith("") and not rv.endswith("
"):
- rv = "" + rv + "
"
- return sanitize(rv)
+class Markup(object):
+
+ def __init__(self, conf):
+
+ parser = Markdown(conf.getlist("options"))
+ sanitizer = Sanitizer(
+ conf.getlist("allowed-elements"),
+ conf.getlist("allowed-attributes"))
+
+ self._render = lambda text: sanitize(sanitizer, parser(text))
+
+ def render(self, text):
+ return self._render(text)
diff --git a/isso/views/comments.py b/isso/views/comments.py
index 1262170..c231ead 100644
--- a/isso/views/comments.py
+++ b/isso/views/comments.py
@@ -163,7 +163,7 @@ class API(object):
value=self.isso.sign([rv["id"], sha1(rv["text"])]),
max_age=self.conf.getint('max-age'))
- rv["text"] = html.markdown(rv["text"])
+ rv["text"] = self.isso.render(rv["text"])
rv["hash"] = pbkdf2(rv['email'] or rv['remote_addr'], self.isso.salt, 1000, 6).decode("utf-8")
self.cache.set('hash', (rv['email'] or rv['remote_addr']).encode('utf-8'), rv['hash'])
@@ -189,7 +189,7 @@ class API(object):
rv.pop(key)
if request.args.get('plain', '0') == '0':
- rv['text'] = html.markdown(rv['text'])
+ rv['text'] = self.isso.render(rv['text'])
return JSON(rv, 200)
@@ -230,7 +230,7 @@ class API(object):
value=self.isso.sign([rv["id"], sha1(rv["text"])]),
max_age=self.conf.getint('max-age'))
- rv["text"] = html.markdown(rv["text"])
+ rv["text"] = self.isso.render(rv["text"])
resp = JSON(rv, 200)
resp.headers.add("Set-Cookie", cookie(str(rv["id"])))
@@ -336,7 +336,7 @@ class API(object):
if request.args.get('plain', '0') == '0':
for item in rv:
- item['text'] = html.markdown(item['text'])
+ item['text'] = self.isso.render(item['text'])
return JSON(rv, 200)
diff --git a/specs/test_html.py b/specs/test_html.py
new file mode 100644
index 0000000..f03d4c2
--- /dev/null
+++ b/specs/test_html.py
@@ -0,0 +1,60 @@
+
+try:
+ import unittest2 as unittest
+except ImportError:
+ import unittest
+
+
+from isso.core import Config
+from isso.utils import html
+
+
+class TestHTML(unittest.TestCase):
+
+ def test_markdown(self):
+ convert = html.Markdown(extensions=())
+ examples = [
+ ("*Ohai!*", "Ohai!
"),
+ ("Hi", "Hi
"),
+ ("http://example.org/", 'http://example.org/
')]
+
+ for (input, expected) in examples:
+ self.assertEqual(convert(input), expected)
+
+ def test_markdown_extensions(self):
+ convert = html.Markdown(extensions=("strikethrough", "superscript"))
+ examples = [
+ ("~~strike~~ through", "strike through
"),
+ ("sup^(script)", "supscript
")]
+
+ for (input, expected) in examples:
+ self.assertEqual(convert(input), expected)
+
+ @unittest.skipIf(html.html5lib_version == "0.95", "backport")
+ def test_sanitizer(self):
+ sanitizer = html.Sanitizer(elements=[], attributes=[])
+ examples = [
+ ('Look: ', 'Look: '),
+ ('Ha', 'Ha'),
+ ('Ha', 'Ha'),
+ ('Test
', 'Test
'),
+ ('', 'alert("Onoe")')]
+
+ for (input, expected) in examples:
+ self.assertEqual(html.sanitize(sanitizer, input), expected)
+
+ @unittest.skipIf(html.html5lib_version == "0.95", "backport")
+ def test_sanitizer_extensions(self):
+ sanitizer = html.Sanitizer(elements=["img"], attributes=["src"])
+ examples = [
+ ('', ''),
+ ('', '')]
+
+ for (input, expected) in examples:
+ self.assertEqual(html.sanitize(sanitizer, input), expected)
+
+ def test_render(self):
+ conf = Config.load(None).section("markup")
+ renderer = html.Markup(conf).render
+ self.assertEqual(renderer("http://example.org/ and sms:+1234567890"),
+ 'http://example.org/ and sms:+1234567890
')