diff --git a/isso/__init__.py b/isso/__init__.py index d4f7980..14fdcdb 100644 --- a/isso/__init__.py +++ b/isso/__init__.py @@ -214,11 +214,17 @@ def main(): conf = Config.load(args.conf) if args.command == "import": - xxx = tempfile.NamedTemporaryFile() - dbpath = conf.get("general", "dbpath") if not args.dryrun else xxx.name - conf.set("guard", "enabled", "off") - migrate.disqus(db.SQLite3(dbpath, conf), args.dump) + + if args.dryrun: + xxx = tempfile.NamedTemporaryFile() + dbpath = xxx.name + else: + dbpath = conf.get("general", "dbpath") + + mydb = db.SQLite3(dbpath, conf) + migrate.dispatch(mydb, args.dump) + sys.exit(0) if not any(conf.getiter("general", "host")): diff --git a/isso/migrate.py b/isso/migrate.py index 93d645f..eb2fe41 100644 --- a/isso/migrate.py +++ b/isso/migrate.py @@ -21,94 +21,104 @@ except ImportError: from xml.etree import ElementTree -ns = '{http://disqus.com}' -dsq = '{http://disqus.com/disqus-internals}' -threads = set([]) -comments = set([]) +class Disqus(object): + ns = '{http://disqus.com}' + internals = '{http://disqus.com/disqus-internals}' -def insert(db, thread, posts): + def __init__(self, db, xmlfile): + self.threads = set([]) + self.comments = set([]) - path = urlparse(thread.find('%slink' % ns).text).path - remap = dict() + self.db = db + self.xmlfile = xmlfile - if path not in db.threads: - db.threads.new(path, thread.find('%stitle' % ns).text.strip()) + def insert(self, thread, posts): - for item in sorted(posts, key=lambda k: k['created']): + path = urlparse(thread.find('%slink' % Disqus.ns).text).path + remap = dict() - dsq_id = item.pop('dsq:id') - item['parent'] = remap.get(item.pop('dsq:parent', None)) - rv = db.comments.add(path, item) - remap[dsq_id] = rv["id"] + if path not in self.db.threads: + self.db.threads.new(path, thread.find(Disqus.ns + 'title').text.strip()) - comments.update(set(remap.keys())) + for item in sorted(posts, key=lambda k: k['created']): + dsq_id = item.pop('dsq:id') + item['parent'] = remap.get(item.pop('dsq:parent', None)) + rv = self.db.comments.add(path, item) + remap[dsq_id] = rv["id"] -def disqus(db, xmlfile): + self.comments.update(set(remap.keys())) - if db.execute("SELECT * FROM comments").fetchone(): - if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"): - raise SystemExit("Abort.") + def migrate(self): - tree = ElementTree.parse(xmlfile) - res = defaultdict(list) + tree = ElementTree.parse(self.xmlfile) + res = defaultdict(list) - for post in tree.findall('%spost' % ns): + for post in tree.findall('%spost' % Disqus.ns): - item = { - 'dsq:id': post.attrib.get(dsq + 'id'), - 'text': post.find('%smessage' % ns).text, - 'author': post.find('%sauthor/%sname' % (ns, ns)).text, - 'email': post.find('%sauthor/%semail' % (ns, ns)).text, - 'created': mktime(strptime( - post.find('%screatedAt' % ns).text, '%Y-%m-%dT%H:%M:%SZ')), - 'remote_addr': '127.0.0.0', - 'mode': 1 if post.find("%sisDeleted" % ns).text == "false" else 4 - } + item = { + 'dsq:id': post.attrib.get(Disqus.internals + 'id'), + 'text': post.find(Disqus.ns + 'message').text, + 'author': post.find('{0}author/{0}name'.format(Disqus.ns)).text, + 'email': post.find('{0}author/{0}email'.format(Disqus.ns)).text, + 'created': mktime(strptime( + post.find(Disqus.ns + 'createdAt').text, '%Y-%m-%dT%H:%M:%SZ')), + 'remote_addr': '127.0.0.0', + 'mode': 1 if post.find(Disqus.ns + "isDeleted").text == "false" else 4 + } - if post.find(ns + 'parent') is not None: - item['dsq:parent'] = post.find(ns + 'parent').attrib.get(dsq + 'id') + if post.find(Disqus.ns + 'parent') is not None: + item['dsq:parent'] = post.find(Disqus.ns + 'parent').attrib.get(Disqus.internals + 'id') - res[post.find('%sthread' % ns).attrib.get(dsq + 'id')].append(item) + res[post.find('%sthread' % Disqus.ns).attrib.get(Disqus.internals + 'id')].append(item) - num = len(tree.findall('%sthread' % ns)) - cols = int((os.popen('stty size', 'r').read() or "25 80").split()[1]) + num = len(tree.findall(Disqus.ns + 'thread')) + cols = int((os.popen('stty size', 'r').read() or "25 80").split()[1]) - for i, thread in enumerate(tree.findall('%sthread' % ns)): + for i, thread in enumerate(tree.findall(Disqus.ns + 'thread')): - if int(round((i+1)/num, 2) * 100) % 13 == 0: + if int(round((i+1)/num, 2) * 100) % 13 == 0: + sys.stdout.write("\r%s" % (" "*cols)) + sys.stdout.write("\r[%i%%] %s" % (((i+1)/num * 100), thread.find(Disqus.ns + 'id').text)) + sys.stdout.flush() - sys.stdout.write("\r%s" % (" "*cols)) - sys.stdout.write("\r[%i%%] %s" % (((i+1)/num * 100), thread.find('%sid' % ns).text)) - sys.stdout.flush() + # skip (possibly?) duplicate, but empty thread elements + if thread.find(Disqus.ns + 'id').text is None: + continue - # skip (possibly?) duplicate, but empty thread elements - if thread.find('%sid' % ns).text is None: - continue + id = thread.attrib.get(Disqus.internals + 'id') + if id in res: + self.threads.add(id) + self.insert(thread, res[id]) - id = thread.attrib.get(dsq + 'id') - if id in res: - threads.add(id) - insert(db, thread, res[id]) + # in case a comment has been deleted (and no further childs) + self.db.comments._remove_stale() - # in case a comment has been deleted (and no further childs) - db.comments._remove_stale() + sys.stdout.write("\r%s" % (" "*cols)) + sys.stdout.write("\r[100%] {0} threads, {1} comments\n".format( + len(self.threads), len(self.comments))) - sys.stdout.write("\r%s" % (" "*cols)) - sys.stdout.write("\r[100%%] %i threads, %i comments\n" % (len(threads), len(comments))) + orphans = set(map(lambda e: e.attrib.get(Disqus.internals + "id"), tree.findall(Disqus.ns + "post"))) - self.comments + if orphans: + print("Found %i orphans:" % len(orphans)) + for post in tree.findall(Disqus.ns + "post"): + if post.attrib.get(Disqus.internals + "id") not in orphans: + continue - orphans = set(map(lambda e: e.attrib.get(dsq + "id"), tree.findall("%spost" % ns))) - comments - if orphans: - print("Found %i orphans:" % len(orphans)) - for post in tree.findall("%spost" % ns): - if post.attrib.get(dsq + "id") not in orphans: - continue + print(" * {0} by {1} <{2}>".format( + post.attrib.get(Disqus.internals + "id"), + post.find("{0}author/{0}name".format(Disqus.ns)).text, + post.find("{0}author/{0}email".format(Disqus.ns)).text)) + print(textwrap.fill(post.find(Disqus.ns + "message").text, + initial_indent=" ", subsequent_indent=" ")) + print("") + + +def dispatch(db, dump): + if db.execute("SELECT * FROM comments").fetchone(): + if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"): + raise SystemExit("Abort.") - print(" * %s by %s <%s>" % (post.attrib.get(dsq + "id"), - post.find("%sauthor/%sname" % (ns, ns)).text, - post.find("%sauthor/%semail" % (ns, ns)).text)) - print(textwrap.fill(post.find("%smessage" % ns).text, - initial_indent=" ", subsequent_indent=" ")) - print("") + Disqus(db, dump).migrate() diff --git a/isso/tests/test_migration.py b/isso/tests/test_migration.py index 8a40ea2..ef1af6b 100644 --- a/isso/tests/test_migration.py +++ b/isso/tests/test_migration.py @@ -6,7 +6,7 @@ from os.path import join, dirname from isso.core import Config from isso.db import SQLite3 -from isso.migrate import disqus +from isso.migrate import Disqus def test_disqus(): @@ -15,7 +15,7 @@ def test_disqus(): xxx = tempfile.NamedTemporaryFile() db = SQLite3(xxx.name, Config.load(None)) - disqus(db, xml) + Disqus(db, xml).migrate() assert db.threads["/"]["title"] == "Hello, World!" assert db.threads["/"]["id"] == 1