From 0d07515c189fc20f829a8efd2d6fe53f3cebc3d3 Mon Sep 17 00:00:00 2001 From: Martin Zimmermann Date: Sat, 2 Nov 2013 15:07:42 +0100 Subject: [PATCH] override HTML title parsing with a `data-title="...` attribute If you don't use a

to markup your post's title (but h2), it is no longer possible to reliable detect the site's title. E.g. you have a single page with only one

and that's the *real* title of that page. But on the other hand, it is also possible, that the

tag is just your website's name and the actual post title is marked up in

. --- README.md | 35 +++++++++++++++++++++++++++++++++++ isso/utils/parse.py | 19 ++++++++++++++++--- 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a93a6a9..186d73e 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,41 @@ current comment count. This functionality is already included when you embed `embed.min.js`, do *not* mix `embed.min.js` and `count.min.js` in a single document. +### Client Configuration + +You can configure the client (the JS part) via `data-` attributes: + +* data-title + + When you start a new thread (= first comment on a page), Isso sends + a GET request that page to see if it a) exists and b) parse the site's + heading (currently used as subject in emails). + + Isso assumes that the title is inside an `h1` tag near the isso thread: + + ```html + + +

Website Title

+
+
+

Post Title

+
+ ... + ``` + + In this example, the detected title is `Post Title` as expected, but some + older sites may only use a single `h1` as their website's maintitle, and + a `h2` for the post title. Unfortunately this is unambiguous and you have + to tell Isso what's the actual post title: + + ```html +
+ ``` + + Make sure to escape the attribute value. + + ### Webserver configuration * nginx configuration to run Isso on `/isso`: diff --git a/isso/utils/parse.py b/isso/utils/parse.py index 8decd38..a03ba7b 100644 --- a/isso/utils/parse.py +++ b/isso/utils/parse.py @@ -7,9 +7,10 @@ import datetime from itertools import chain try: + from urllib import unquote from urlparse import urlparse except ImportError: - from urllib.parse import urlparse + from urllib.parse import urlparse, unquote import html5lib @@ -81,7 +82,7 @@ def title(data, default=u"Untitled."): which is the nearest H1 node in context to an element with the `isso-thread` id. >>> title("asdf") # doctest: +IGNORE_UNICODE - u'Untitled.' + 'Untitled.' >>> title(''' ... ... @@ -101,7 +102,14 @@ def title(data, default=u"Untitled."): ...
... ... ''') # doctest: +IGNORE_UNICODE - u'Can you find me?' + 'Can you find me?' + >>> title(''' + ... + ... + ...

I'm the real title!1 + ...
+ ... ''') # doctest: +IGNORE_UNICODE + 'No way!' """ html = html5lib.parse(data, treebuilder="dom") @@ -137,6 +145,11 @@ def title(data, default=u"Untitled."): for item in gettext(child): yield item + try: + return unquote(el.attributes["data-title"].value) + except (KeyError, AttributeError): + pass + while el is not None: # el.parentNode is None in the very end visited.append(el)