add support for html5lib==0.95, fixes #60
The python-html5lib package in Debian Wheezy does not support `etree` as tree builder (called `simpletree` back then).
This commit is contained in:
parent
417bd4614c
commit
f3e5d8dc1c
@ -1,6 +1,9 @@
|
|||||||
# -*- encoding: utf-8 -*-
|
# -*- encoding: utf-8 -*-
|
||||||
|
|
||||||
|
import pkg_resources
|
||||||
|
|
||||||
import html5lib
|
import html5lib
|
||||||
|
setattr(html5lib, "version", pkg_resources.get_distribution("html5lib").version)
|
||||||
|
|
||||||
from html5lib.sanitizer import HTMLSanitizer
|
from html5lib.sanitizer import HTMLSanitizer
|
||||||
from html5lib.serializer import HTMLSerializer
|
from html5lib.serializer import HTMLSerializer
|
||||||
@ -34,7 +37,8 @@ def sanitize(document):
|
|||||||
parser = html5lib.HTMLParser(tokenizer=MarkdownSanitizer)
|
parser = html5lib.HTMLParser(tokenizer=MarkdownSanitizer)
|
||||||
domtree = parser.parseFragment(document)
|
domtree = parser.parseFragment(document)
|
||||||
|
|
||||||
stream = html5lib.treewalkers.getTreeWalker('etree')(domtree)
|
builder = "simpletree" if html5lib.version == "0.95" else "etree"
|
||||||
|
stream = html5lib.treewalkers.getTreeWalker(builder)(domtree)
|
||||||
serializer = HTMLSerializer(quote_attr_values=True, omit_optional_tags=False)
|
serializer = HTMLSerializer(quote_attr_values=True, omit_optional_tags=False)
|
||||||
|
|
||||||
return serializer.render(stream)
|
return serializer.render(stream)
|
||||||
|
Loading…
Reference in New Issue
Block a user