add support for html5lib below 0.95, #168

This commit is contained in:
Martin Zimmermann 2015-02-25 10:48:09 +01:00
parent c3c519ad0e
commit f681002e7e
2 changed files with 13 additions and 6 deletions

View File

@ -62,7 +62,7 @@ class TestHTML(unittest.TestCase):
print("Hello, World") print("Hello, World")
</code></pre>""") </code></pre>""")
@unittest.skipIf(html.html5lib_version == "0.95", "backport") @unittest.skipIf(html.HTML5LIB_VERSION <= html.HTML5LIB_SIMPLETREE, "backport")
def test_sanitizer(self): def test_sanitizer(self):
sanitizer = html.Sanitizer(elements=[], attributes=[]) sanitizer = html.Sanitizer(elements=[], attributes=[])
examples = [ examples = [
@ -75,7 +75,7 @@ class TestHTML(unittest.TestCase):
for (input, expected) in examples: for (input, expected) in examples:
self.assertEqual(html.sanitize(sanitizer, input), expected) self.assertEqual(html.sanitize(sanitizer, input), expected)
@unittest.skipIf(html.html5lib_version == "0.95", "backport") @unittest.skipIf(html.HTML5LIB_VERSION <= html.HTML5LIB_SIMPLETREE, "backport")
def test_sanitizer_extensions(self): def test_sanitizer_extensions(self):
sanitizer = html.Sanitizer(elements=["img"], attributes=["src"]) sanitizer = html.Sanitizer(elements=["img"], attributes=["src"])
examples = [ examples = [

View File

@ -2,14 +2,17 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import pkg_resources
import operator import operator
import pkg_resources
from distutils.version import LooseVersion as Version
HTML5LIB_VERSION = Version(pkg_resources.get_distribution("html5lib").version)
HTML5LIB_SIMPLETREE = Version("0.9.5")
from isso.compat import reduce from isso.compat import reduce
import html5lib import html5lib
html5lib_version = pkg_resources.get_distribution("html5lib").version
from html5lib.sanitizer import HTMLSanitizer from html5lib.sanitizer import HTMLSanitizer
from html5lib.serializer import HTMLSerializer from html5lib.serializer import HTMLSerializer
@ -45,7 +48,11 @@ def sanitize(tokenizer, document):
parser = html5lib.HTMLParser(tokenizer=tokenizer) parser = html5lib.HTMLParser(tokenizer=tokenizer)
domtree = parser.parseFragment(document) domtree = parser.parseFragment(document)
builder = "simpletree" if html5lib_version == "0.95" else "etree" if HTML5LIB_VERSION > HTML5LIB_SIMPLETREE:
builder = "etree"
else:
builder = "simpletree"
stream = html5lib.treewalkers.getTreeWalker(builder)(domtree) stream = html5lib.treewalkers.getTreeWalker(builder)(domtree)
serializer = HTMLSerializer(quote_attr_values=True, omit_optional_tags=False) serializer = HTMLSerializer(quote_attr_values=True, omit_optional_tags=False)