@ -7,9 +7,10 @@ import datetime
from itertools import chain
from itertools import chain
try :
try :
from urllib import unquote
from urlparse import urlparse
from urlparse import urlparse
except ImportError :
except ImportError :
from urllib . parse import urlparse
from urllib . parse import urlparse , unquote
import html5lib
import html5lib
@ -81,7 +82,7 @@ def title(data, default=u"Untitled."):
which is the nearest H1 node in context to an element with the ` isso - thread ` id .
which is the nearest H1 node in context to an element with the ` isso - thread ` id .
>> > title ( " asdf " ) # doctest: +IGNORE_UNICODE
>> > title ( " asdf " ) # doctest: +IGNORE_UNICODE
u ' Untitled. '
' Untitled. '
>> > title ( '''
>> > title ( '''
. . . < html >
. . . < html >
. . . < head >
. . . < head >
@ -101,7 +102,14 @@ def title(data, default=u"Untitled."):
. . . < / article >
. . . < / article >
. . . < / body >
. . . < / body >
. . . < / html > ''' ) # doctest: +IGNORE_UNICODE
. . . < / html > ''' ) # doctest: +IGNORE_UNICODE
u ' Can you find me? '
' Can you find me? '
>> > title ( '''
. . . < html >
. . . < body >
. . . < h1 > I ' m the real title!1
. . . < section data - title = " No way % 21 " id = " isso-thread " >
. . . ''' ) # doctest: +IGNORE_UNICODE
' No way! '
"""
"""
html = html5lib . parse ( data , treebuilder = " dom " )
html = html5lib . parse ( data , treebuilder = " dom " )
@ -137,6 +145,11 @@ def title(data, default=u"Untitled."):
for item in gettext ( child ) :
for item in gettext ( child ) :
yield item
yield item
try :
return unquote ( el . attributes [ " data-title " ] . value )
except ( KeyError , AttributeError ) :
pass
while el is not None : # el.parentNode is None in the very end
while el is not None : # el.parentNode is None in the very end
visited . append ( el )
visited . append ( el )