From 9260e143f12272693aa6c604ce1e1259cac9d73e Mon Sep 17 00:00:00 2001 From: Martin Zimmermann Date: Wed, 11 Jun 2014 14:07:27 +0200 Subject: [PATCH] decouple hash generation from comment view and allow customization Tests now use a dummy hash function that does nothing (basically) and run a bit faster now. --- docs/docs/configuration/server.rst | 26 +++++++ isso/__init__.py | 7 +- isso/core.py | 5 +- isso/tests/test_comments.py | 7 +- isso/tests/test_guard.py | 3 + isso/tests/test_utils_hash.py | 72 +++++++++++++++++++ isso/tests/test_vote.py | 1 + isso/utils/hash.py | 112 +++++++++++++++++++++++++++++ isso/views/comments.py | 31 ++------ 9 files changed, 229 insertions(+), 35 deletions(-) create mode 100644 isso/tests/test_utils_hash.py create mode 100644 isso/utils/hash.py diff --git a/docs/docs/configuration/server.rst b/docs/docs/configuration/server.rst index 16ba15c..ee42957 100644 --- a/docs/docs/configuration/server.rst +++ b/docs/docs/configuration/server.rst @@ -261,6 +261,32 @@ allowed-attributes To allow images in comments, you just need to add ``allowed-elements = img`` and ``allowed-attributes = src``. +Hash +---- + +Customize used hash functions to hide the actual email addresses from +commenters but still be able to generate an identicon. + +.. code-block:: ini + + [hash] + salt = Eech7co8Ohloopo9Ol6baimi + algorithm = pbkdf2 + +salt + A salt is used to protect against rainbow tables. Isso does not make use of + pepper (yet). The default value has been in use since the release of Isso + and generates the same identicons for same addresses across installations. + +algorithm + Hash algorithm to use -- either from Python's `hashlib` or PBKDF2 (a + computational expensive hash function). + + The actual identifier for PBKDF2 is `pbkdf2:1000:6:sha1`, which means 1000 + iterations, 6 bytes to generate and SHA1 as pseudo-random family used for + key strengthening. + Arguments have to be in that order, but can be reduced to `pbkdf2:4096` + for example to override the iterations only. Appendum -------- diff --git a/isso/__init__.py b/isso/__init__.py index afac2b4..ebb86de 100644 --- a/isso/__init__.py +++ b/isso/__init__.py @@ -65,7 +65,7 @@ local_manager = LocalManager([local]) from isso import db, migrate, wsgi, ext, views from isso.core import ThreadedMixin, ProcessMixin, uWSGIMixin, Config from isso.wsgi import origin, urlsplit -from isso.utils import http, JSONRequest, html +from isso.utils import http, JSONRequest, html, hash from isso.views import comments from isso.ext.notifications import Stdout, SMTP @@ -80,14 +80,13 @@ logger = logging.getLogger("isso") class Isso(object): - salt = b"Eech7co8Ohloopo9Ol6baimi" - def __init__(self, conf): self.conf = conf self.db = db.SQLite3(conf.get('general', 'dbpath'), conf) self.signer = URLSafeTimedSerializer(self.db.preferences.get("session-key")) self.markup = html.Markup(conf.section('markup')) + self.hasher = hash.new(conf.section("hash")) super(Isso, self).__init__(conf) @@ -105,7 +104,7 @@ class Isso(object): self.urls = Map() views.Info(self) - comments.API(self) + comments.API(self, self.hasher) def render(self, text): return self.markup.render(text) diff --git a/isso/core.py b/isso/core.py index 4e30f49..9fb1a52 100644 --- a/isso/core.py +++ b/isso/core.py @@ -115,7 +115,10 @@ class Config: "[markup]", "options = strikethrough, autolink", "allowed-elements = ", - "allowed-attributes = " + "allowed-attributes = ", + "[hash]", + "algorithm = pbkdf2", + "salt = Eech7co8Ohloopo9Ol6baimi" ] @classmethod diff --git a/isso/tests/test_comments.py b/isso/tests/test_comments.py index 3c00a03..4ba41b1 100644 --- a/isso/tests/test_comments.py +++ b/isso/tests/test_comments.py @@ -35,6 +35,7 @@ class TestComments(unittest.TestCase): conf = core.Config.load(None) conf.set("general", "dbpath", self.path) conf.set("guard", "enabled", "off") + conf.set("hash", "algorithm", "none") class App(Isso, core.Mixin): pass @@ -292,7 +293,6 @@ class TestComments(unittest.TestCase): b = loads(b.data) c = loads(c.data) - self.assertIsInstance(int(a['hash'], 16), int) self.assertNotEqual(a['hash'], '192.168.1.1') self.assertEqual(a['hash'], b['hash']) self.assertNotEqual(a['hash'], c['hash']) @@ -375,9 +375,6 @@ class TestComments(unittest.TestCase): # just for the record self.assertEqual(self.post('/id/1/dislike', content_type=js).status_code, 200) - def testPBKDF2(self): - self.assertEqual(comments.API.pbkdf2(u"", Isso.salt, 1000, 6), u"42476aafe2e4") - class TestModeratedComments(unittest.TestCase): @@ -387,6 +384,7 @@ class TestModeratedComments(unittest.TestCase): conf.set("general", "dbpath", self.path) conf.set("moderation", "enabled", "true") conf.set("guard", "enabled", "off") + conf.set("hash", "algorithm", "none") class App(Isso, core.Mixin): pass @@ -418,6 +416,7 @@ class TestPurgeComments(unittest.TestCase): conf.set("general", "dbpath", self.path) conf.set("moderation", "enabled", "true") conf.set("guard", "enabled", "off") + conf.set("hash", "algorithm", "none") class App(Isso, core.Mixin): pass diff --git a/isso/tests/test_guard.py b/isso/tests/test_guard.py index a75beba..28ce2b5 100644 --- a/isso/tests/test_guard.py +++ b/isso/tests/test_guard.py @@ -1,5 +1,7 @@ # -*- encoding: utf-8 -*- +from __future__ import unicode_literals + try: import unittest2 as unittest except ImportError: @@ -36,6 +38,7 @@ class TestGuard(unittest.TestCase): conf = core.Config.load(None) conf.set("general", "dbpath", self.path) + conf.set("hash", "algorithm", "none") conf.set("guard", "enabled", "true") conf.set("guard", "ratelimit", str(ratelimit)) conf.set("guard", "direct-reply", str(direct_reply)) diff --git a/isso/tests/test_utils_hash.py b/isso/tests/test_utils_hash.py new file mode 100644 index 0000000..e8db2df --- /dev/null +++ b/isso/tests/test_utils_hash.py @@ -0,0 +1,72 @@ +# -*- encoding: utf-8 -*- + +from __future__ import unicode_literals + +try: + import unittest2 as unittest +except ImportError: + import unittest + +from isso.compat import PY2K, string_types +from isso.core import Config +from isso.utils.hash import Hash, PBKDF2, new + + +class TestHasher(unittest.TestCase): + + def test_hash(self): + self.assertRaises(TypeError, Hash, "Foo") + + self.assertEqual(Hash(b"").salt, b"") + self.assertEqual(Hash().salt, Hash.salt) + + h = Hash(b"", func=None) + + self.assertRaises(TypeError, h.hash, "...") + self.assertEqual(h.hash(b"..."), b"...") + self.assertIsInstance(h.uhash(u"..."), string_types) + + @unittest.skipIf(PY2K, "byte/str quirks") + def test_uhash(self): + h = Hash(b"", func=None) + self.assertRaises(TypeError, h.uhash, b"...") + + +class TestPBKDF2(unittest.TestCase): + + def test_default(self): + pbkdf2 = PBKDF2(iterations=1000) # original setting (and still default) + self.assertEqual(pbkdf2.uhash(""), "42476aafe2e4") + + def test_different_salt(self): + a = PBKDF2(b"a", iterations=1) + b = PBKDF2(b"b", iterations=1) + self.assertNotEqual(a.hash(b""), b.hash(b"")) + + +class TestCreate(unittest.TestCase): + + def test_default(self): + conf = Config.load(None) + self.assertIsInstance(new(conf.section("hash")), PBKDF2) + + def test_custom(self): + + def _new(val): + conf = Config.load(None) + conf.set("hash", "algorithm", val) + return new(conf.section("hash")) + + sha1 = _new("sha1") + self.assertIsInstance(sha1, Hash) + self.assertEqual(sha1.func, "sha1") + self.assertRaises(ValueError, _new, "foo") + + pbkdf2 = _new("pbkdf2:16") + self.assertIsInstance(pbkdf2, PBKDF2) + self.assertEqual(pbkdf2.iterations, 16) + + pbkdf2 = _new("pbkdf2:16:2:md5") + self.assertIsInstance(pbkdf2, PBKDF2) + self.assertEqual(pbkdf2.dklen, 2) + self.assertEqual(pbkdf2.func, "md5") \ No newline at end of file diff --git a/isso/tests/test_vote.py b/isso/tests/test_vote.py index 8def233..45d1445 100644 --- a/isso/tests/test_vote.py +++ b/isso/tests/test_vote.py @@ -28,6 +28,7 @@ class TestVote(unittest.TestCase): conf = core.Config.load(None) conf.set("general", "dbpath", self.path) conf.set("guard", "enabled", "off") + conf.set("hash", "algorithm", "none") class App(Isso, core.Mixin): pass diff --git a/isso/utils/hash.py b/isso/utils/hash.py new file mode 100644 index 0000000..0e93a1a --- /dev/null +++ b/isso/utils/hash.py @@ -0,0 +1,112 @@ +# -*- encoding: utf-8 -*- + +from __future__ import unicode_literals + +import codecs +import hashlib + +from isso.compat import string_types, text_type as str + +try: + from werkzeug.security import pbkdf2_bin as pbkdf2 +except ImportError: + try: + from passlib.utils.pbkdf2 import pbkdf2 as _pbkdf2 + + def pbkdf2(val, salt, iterations, dklen, func): + return _pbkdf2(val, salt, iterations, dklen, ("hmac-" + func).encode("utf-8")) + except ImportError as ex: + raise ImportError("No PBKDF2 implementation found. Either upgrade " + + "to `werkzeug` 0.9 or install `passlib`.") + + +def _TypeError(name, expected, val): + return TypeError("'{0}' must be {1}, not {2}".format( + name, expected, val.__class__.__name__)) + + +class Hash(object): + + func = None + salt = b"Eech7co8Ohloopo9Ol6baimi" + + def __init__(self, salt=None, func="sha1"): + + if func is not None: + hashlib.new(func) # may not be available + self.func = func + + if salt is not None: + if not isinstance(salt, bytes): + raise _TypeError("salt", "bytes", salt) + self.salt = salt + + def hash(self, val): + """Calculate hash from value (must be bytes).""" + + if not isinstance(val, bytes): + raise _TypeError("val", "bytes", val) + + rv = self.compute(val) + + if not isinstance(val, bytes): + raise _TypeError("val", "bytes", rv) + + return rv + + def uhash(self, val): + """Calculate hash from unicode value and return hex value as unicode""" + + if not isinstance(val, string_types): + raise _TypeError("val", "str", val) + + return codecs.encode(self.hash(val.encode("utf-8")), "hex_codec").decode("utf-8") + + def compute(self, val): + if self.func is None: + return val + + h = hashlib.new(self.func) + h.update(val) + + return h.digest() + + +class PBKDF2(Hash): + + def __init__(self, salt=None, iterations=1000, dklen=6, func="sha1"): + super(PBKDF2, self).__init__(salt) + + self.iterations = iterations + self.dklen = dklen + self.func = func + + def compute(self, val): + return pbkdf2(val, self.salt, self.iterations, self.dklen, self.func) + + +def new(conf): + """Factory to create hash functions from configuration section. If an + algorithm takes custom parameters, you can separate them by a colon like + this: pbkdf2:arg1:arg2:arg3.""" + + algorithm = conf.get("algorithm") + salt = conf.get("salt").encode("utf-8") + + if algorithm == "none": + return Hash(salt, None) + elif algorithm.startswith("pbkdf2"): + kwargs = {} + tail = algorithm.partition(":")[2] + for func, key in ((int, "iterations"), (int, "dklen"), (str, "func")): + head, _, tail = tail.partition(":") + if not head: + break + kwargs[key] = func(head) + + return PBKDF2(salt, **kwargs) + else: + return Hash(salt, algorithm) + + +sha1 = Hash(func="sha1").uhash \ No newline at end of file diff --git a/isso/views/comments.py b/isso/views/comments.py index edd3c5e..928bf06 100644 --- a/isso/views/comments.py +++ b/isso/views/comments.py @@ -3,7 +3,6 @@ import re import cgi import time -import hashlib import functools from itsdangerous import SignatureExpired, BadSignature @@ -20,19 +19,7 @@ from isso.compat import text_type as str from isso import utils, local from isso.utils import http, parse, JSONResponse as JSON from isso.views import requires - -try: - from werkzeug.security import pbkdf2_hex -except ImportError: - try: - from passlib.utils.pbkdf2 import pbkdf2 - except ImportError as ex: - raise ImportError("No PBKDF2 implementation found. Either upgrade " + - "to `werkzeug` 0.9 or install `passlib`.") - else: - import base64 - pbkdf2_hex = lambda text, salt, iterations, dklen: base64.b16encode( - pbkdf2(text.encode("utf-8"), salt, iterations, dklen)).lower().decode("utf-8") +from isso.utils.hash import sha1 # from Django appearently, looks good to me *duck* __url_re = re.compile( @@ -56,10 +43,6 @@ def normalize(url): return url -def sha1(text): - return hashlib.sha1(text.encode('utf-8')).hexdigest() - - def xhr(func): """A decorator to check for CSRF on POST/PUT/DELETE using a
element and JS to execute automatically (see #40 for a proof-of-concept). @@ -107,9 +90,10 @@ class API(object): ('demo', ('GET', '/demo')) ] - def __init__(self, isso): + def __init__(self, isso, hasher): self.isso = isso + self.hash = hasher.uhash self.cache = isso.cache self.signal = isso.signal @@ -151,11 +135,6 @@ class API(object): return True, "" - @classmethod - def pbkdf2(cls, text, salt, iterations, dklen): - # werkzeug.security.pbkdf2_hex returns always the native string type - return pbkdf2_hex(text.encode("utf-8"), salt, iterations, dklen) - @xhr @requires(str, 'uri') def new(self, environ, request, uri): @@ -214,7 +193,7 @@ class API(object): max_age=self.conf.getint('max-age')) rv["text"] = self.isso.render(rv["text"]) - rv["hash"] = API.pbkdf2(rv['email'] or rv['remote_addr'], self.isso.salt, 1000, 6) + rv["hash"] = self.hash(rv['email'] or rv['remote_addr']) self.cache.set('hash', (rv['email'] or rv['remote_addr']).encode('utf-8'), rv['hash']) @@ -446,7 +425,7 @@ class API(object): val = self.cache.get('hash', key.encode('utf-8')) if val is None: - val = API.pbkdf2(key, self.isso.salt, 1000, 6) + val = self.hash(key) self.cache.set('hash', key.encode('utf-8'), val) item['hash'] = val