From 58645ca9455442347499503199c48289ece81c67 Mon Sep 17 00:00:00 2001 From: Martin Zimmermann Date: Wed, 13 Nov 2013 18:27:35 +0100 Subject: [PATCH 1/2] do not reference bytearray in a method definition, fixes #5 This is a severe issue which makes the current voters bloomfilter completely useless. Functions are first-class objects in Python, which lead to interesting "issues" like: >>> def foo(x=[]): ... x.append(1) ... print x ... >>> foo() [1] >>> foo() [1, 1] For Isso, this means the bloomfilter, which is usually only initialized with the author's IP address, is now initialized with pretty much all ip addresses from previous authors, thus makes it impossible for the author to vote on other's people comments. --- isso/utils/__init__.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/isso/utils/__init__.py b/isso/utils/__init__.py index de3be2b..1751be4 100644 --- a/isso/utils/__init__.py +++ b/isso/utils/__init__.py @@ -56,15 +56,31 @@ class Bloomfilter: of space efficiency (array is saved for each comment) and 11 hash functions because of best overall false-positive rate in that range. + >>> bf = Bloomfilter() + >>> bf.add("127.0.0.1") + >>> not any(map(bf.__contains__, ("1.2.%i.4" for i in range(256)))) + True + + >>> bf = Bloomfilter() + >>> for i in range(256): + ... bf.add("1.2.%i.4" % i) + ... + >>> len(bf) + 256 + >>> "1.2.3.4" in bf + True + >>> "127.0.0.1" in bf + False + -- via Raymond Hettinger http://code.activestate.com/recipes/577684-bloom-filter/ """ - def __init__(self, array=bytearray(256), elements=0, iterable=()): - self.array = array + def __init__(self, array=None, elements=0, iterable=()): + self.array = array or bytearray(256) self.elements = elements self.k = 11 - self.m = len(array) * 8 + self.m = len(self.array) * 8 for item in iterable: self.add(item) From 6178e933481005597e445549dcddc9de0575c7cd Mon Sep 17 00:00:00 2001 From: Martin Zimmermann Date: Wed, 13 Nov 2013 20:00:06 +0100 Subject: [PATCH 2/2] add database migration Clear voters bloomfilter and initialize with an (almost) empty one. --- isso/db/__init__.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/isso/db/__init__.py b/isso/db/__init__.py index 56e8f4f..f1030f2 100644 --- a/isso/db/__init__.py +++ b/isso/db/__init__.py @@ -1,6 +1,9 @@ # -*- encoding: utf-8 -*- import sqlite3 +import logging + +logger = logging.getLogger("isso") class IssoDBException(Exception): pass @@ -11,12 +14,24 @@ from isso.db.threads import Threads class SQLite3: + MAX_VERSION = 1 + def __init__(self, path, conf): self.path = path self.conf = conf self.mode = 1 + rv = self.execute([ + "SELECT name FROM sqlite_master" + " WHERE type='table' AND name IN ('threads', 'comments')"] + ).fetchall() + + if rv: + self.migrate(to=SQLite3.MAX_VERSION) + else: + self.execute("PRAGMA user_version = %i" % SQLite3.MAX_VERSION) + self.threads = Threads(self) self.comments = Comments(self) @@ -34,3 +49,24 @@ class SQLite3: with sqlite3.connect(self.path) as con: return con.execute(sql, args) + + @property + def version(self): + return self.execute("PRAGMA user_version").fetchone()[0] + + def migrate(self, to): + + if self.version >= to: + return + + logger.info("migrate database from version %i to %i", self.version, to) + + if self.version == 0: + + from isso.utils import Bloomfilter + bf = buffer(Bloomfilter(iterable=["127.0.0.0"]).array) + + with sqlite3.connect(self.path) as con: + con.execute('UPDATE comments SET voters=?', (bf, )) + con.execute('PRAGMA user_version = 1') + logger.info("%i rows changed", con.total_changes)