From 2db98d0d3437ddef7161a11d36f247e7078de0c7 Mon Sep 17 00:00:00 2001 From: Martin Zimmermann Date: Thu, 24 Oct 2013 14:34:45 +0200 Subject: [PATCH] move http utils in separate module --- isso/utils/__init__.py | 89 ------------------------------------------ isso/utils/http.py | 82 ++++++++++++++++++++++++++++++++++++++ isso/views/comment.py | 5 ++- specs/test_comment.py | 7 ++-- 4 files changed, 89 insertions(+), 94 deletions(-) create mode 100644 isso/utils/http.py diff --git a/isso/utils/__init__.py b/isso/utils/__init__.py index 715b2ef..9eb6cbb 100644 --- a/isso/utils/__init__.py +++ b/isso/utils/__init__.py @@ -2,103 +2,14 @@ from __future__ import division -import socket - -try: - import httplib - from urlparse import urlparse -except ImportError: - import http.client as httplib - from urllib.parse import urlparse - import random import hashlib from string import ascii_letters, digits -from contextlib import closing -import html5lib import ipaddress -def normalize(host): - - if not host.startswith(('http://', 'https://')): - host = 'https://' + host - - rv = urlparse(host) - if rv.scheme == 'https': - return (rv.netloc, 443) - return (rv.netloc.rsplit(':')[0], rv.port or 80) - - -def urlexists(host, path): - - host, port = normalize(host) - http = httplib.HTTPSConnection if port == 443 else httplib.HTTPConnection - - with closing(http(host, port, timeout=3)) as con: - try: - con.request('HEAD', path) - except (httplib.HTTPException, socket.error): - return False - return con.getresponse().status == 200 - - -def heading(host, path): - """Connect to `host`, GET path and start from #isso-thread to search for - a possible heading (h1). Returns `None` if nothing found.""" - - host, port = normalize(host) - http = httplib.HTTPSConnection if port == 443 else httplib.HTTPConnection - - with closing(http(host, port, timeout=15)) as con: - con.request('GET', path) - html = html5lib.parse(con.getresponse().read(), treebuilder="dom") - - assert html.lastChild.nodeName == "html" - html = html.lastChild - - # aka getElementById - el = list(filter(lambda i: i.attributes["id"].value == "isso-thread", - filter(lambda i: i.attributes.has_key("id"), html.getElementsByTagName("div")))) - - if not el: - return "Untitled" - - el = el[0] - visited = [] - - def recurse(node): - for child in node.childNodes: - if child.nodeType != child.ELEMENT_NODE: - continue - if child.nodeName.upper() == "H1": - return child - if child not in visited: - return recurse(child) - - def gettext(rv): - for child in rv.childNodes: - if child.nodeType == child.TEXT_NODE: - yield child.nodeValue - if child.nodeType == child.ELEMENT_NODE: - for item in gettext(child): - yield item - - while el is not None: # el.parentNode is None in the very end - - visited.append(el) - rv = recurse(el) - - if rv: - return ''.join(gettext(rv)).strip() - - el = el.parentNode - - return "Untitled." - - def anonymize(remote_addr): try: ipv4 = ipaddress.IPv4Address(remote_addr) diff --git a/isso/utils/http.py b/isso/utils/http.py new file mode 100644 index 0000000..647db7b --- /dev/null +++ b/isso/utils/http.py @@ -0,0 +1,82 @@ +# -*- encoding: utf-8 -*- + +import socket + +from contextlib import closing + +try: + import httplib +except ImportError: + import http.client as httplib + +import html5lib + +from isso.utils import parse + + +def urlexists(host, path): + + host, port = parse.host(host) + http = httplib.HTTPSConnection if port == 443 else httplib.HTTPConnection + + with closing(http(host, port, timeout=3)) as con: + try: + con.request('HEAD', path) + except (httplib.HTTPException, socket.error): + return False + return con.getresponse().status == 200 + + +def heading(host, path): + """Connect to `host`, GET path and start from #isso-thread to search for + a possible heading (h1). Returns `None` if nothing found.""" + + host, port = parse.host(host) + http = httplib.HTTPSConnection if port == 443 else httplib.HTTPConnection + + with closing(http(host, port, timeout=15)) as con: + con.request('GET', path) + html = html5lib.parse(con.getresponse().read(), treebuilder="dom") + + assert html.lastChild.nodeName == "html" + html = html.lastChild + + # aka getElementById + el = list(filter(lambda i: i.attributes["id"].value == "isso-thread", + filter(lambda i: i.attributes.has_key("id"), html.getElementsByTagName("div")))) + + if not el: + return "Untitled" + + el = el[0] + visited = [] + + def recurse(node): + for child in node.childNodes: + if child.nodeType != child.ELEMENT_NODE: + continue + if child.nodeName.upper() == "H1": + return child + if child not in visited: + return recurse(child) + + def gettext(rv): + for child in rv.childNodes: + if child.nodeType == child.TEXT_NODE: + yield child.nodeValue + if child.nodeType == child.ELEMENT_NODE: + for item in gettext(child): + yield item + + while el is not None: # el.parentNode is None in the very end + + visited.append(el) + rv = recurse(el) + + if rv: + return ''.join(gettext(rv)).strip() + + el = el.parentNode + + return "Untitled." + diff --git a/isso/views/comment.py b/isso/views/comment.py index 3cdb3e8..202212c 100644 --- a/isso/views/comment.py +++ b/isso/views/comment.py @@ -15,6 +15,7 @@ from werkzeug.exceptions import abort, BadRequest from isso.compat import text_type as str from isso import utils, notify, db +from isso.utils import http from isso.crypto import pbkdf2 FIELDS = set(['id', 'parent', 'text', 'author', 'website', 'email', 'mode', @@ -46,7 +47,7 @@ class requires: @requires(str, 'uri') def new(app, environ, request, uri): - if uri not in app.db.threads and not utils.urlexists(app.conf.get('general', 'host'), uri): + if uri not in app.db.threads and not http.urlexists(app.conf.get('general', 'host'), uri): return Response('URI does not exist', 404) try: @@ -72,7 +73,7 @@ def new(app, environ, request, uri): with app.lock: if uri not in app.db.threads: - app.db.threads.new(uri, utils.heading(app.conf.get('general', 'host'), uri)) + app.db.threads.new(uri, http.heading(app.conf.get('general', 'host'), uri)) title = app.db.threads[uri].title try: diff --git a/specs/test_comment.py b/specs/test_comment.py index 40ae196..6063ac3 100644 --- a/specs/test_comment.py +++ b/specs/test_comment.py @@ -14,11 +14,12 @@ except ImportError: from werkzeug.test import Client from werkzeug.wrappers import Response -from isso import Isso, notify, utils, views, core +from isso import Isso, notify, views, core +from isso.utils import http from isso.views import comment -utils.heading = lambda *args: "Untitled." -utils.urlexists = lambda *args: True +http.heading = lambda *args: "Untitled." +http.urlexists = lambda *args: True loads = lambda data: json.loads(data.decode('utf-8'))