From 8c0af3b10fa5118a242fcc92e57723d0b8d8db62 Mon Sep 17 00:00:00 2001 From: Martin Zimmermann Date: Mon, 11 Nov 2013 11:34:13 +0100 Subject: [PATCH 1/5] show orphan comments after Disqus import (if any) An orphan comment is exported by Disqus but its thread id is non-existent (probably deleted, moved). Usually from the earlier days (or WordPress migration). It is not possible to get the thread without manual intervention ( aka SQLite insertions). --- isso/migrate.py | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/isso/migrate.py b/isso/migrate.py index e0a1005..8bbfa2a 100644 --- a/isso/migrate.py +++ b/isso/migrate.py @@ -9,6 +9,7 @@ from __future__ import division import sys import os +import textwrap from time import mktime, strptime from collections import defaultdict @@ -20,12 +21,14 @@ except ImportError: from xml.etree import ElementTree - ns = '{http://disqus.com}' dsq = '{http://disqus.com/disqus-internals}' +threads = set([]) +comments = set([]) -def insert(db, thread, comments): + +def insert(db, thread, posts): path = urlparse(thread.find('%sid' % ns).text).path remap = dict() @@ -33,13 +36,15 @@ def insert(db, thread, comments): if path not in db.threads: db.threads.new(path, thread.find('%stitle' % ns).text.strip()) - for item in sorted(comments, key=lambda k: k['created']): + for item in sorted(posts, key=lambda k: k['created']): dsq_id = item.pop('dsq:id') item['parent'] = remap.get(item.pop('dsq:parent', None)) rv = db.comments.add(path, item) remap[dsq_id] = rv["id"] + comments.update(set(remap.keys())) + def disqus(db, xmlfile): @@ -67,7 +72,6 @@ def disqus(db, xmlfile): num = len(tree.findall('%sthread' % ns)) cols = int(os.popen('stty size', 'r').read().split()[1]) - threads = 0 for i, thread in enumerate(tree.findall('%sthread' % ns)): if int(round((i+1)/num, 2) * 100) % 13 == 0: @@ -76,13 +80,28 @@ def disqus(db, xmlfile): sys.stdout.write("\r[%i%%] %s" % (((i+1)/num * 100), thread.find('%sid' % ns).text)) sys.stdout.flush() + # skip (possibly?) duplicate, but empty thread elements if thread.find('%sid' % ns).text is None: continue id = thread.attrib.get(dsq + 'id') if id in res: - threads += 1 + threads.add(id) insert(db, thread, res[id]) sys.stdout.write("\r%s" % (" "*cols)) - sys.stdout.write("\r[100%%] %i threads, %i comments" % (threads, len(tree.findall('%spost' % ns)))) + sys.stdout.write("\r[100%%] %i threads, %i comments\n" % (len(threads), len(comments))) + + orphans = set(map(lambda e: e.attrib.get(dsq + "id"), tree.findall("%spost" % ns))) - comments + if orphans: + print("Found %i orphans:" % len(orphans)) + for post in tree.findall("%spost" % ns): + if post.attrib.get(dsq + "id") not in orphans: + continue + + print(" * %s by %s <%s>" % (post.attrib.get(dsq + "id"), + post.find("%sauthor/%sname" % (ns, ns)).text, + post.find("%sauthor/%semail" % (ns, ns)).text)) + print(textwrap.fill(post.find("%smessage" % ns).text, + initial_indent=" ", subsequent_indent=" ")) + print("") From 850b2b4bfb68ee43d0ffc52af4a9497799517833 Mon Sep 17 00:00:00 2001 From: Martin Zimmermann Date: Mon, 11 Nov 2013 11:44:32 +0100 Subject: [PATCH 2/5] set mode to 4 (deleted) when isDeleted is true --- isso/migrate.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/isso/migrate.py b/isso/migrate.py index 8bbfa2a..a12ac61 100644 --- a/isso/migrate.py +++ b/isso/migrate.py @@ -3,7 +3,6 @@ # TODO # # - export does not include website from commenters -# - Disqus includes already deleted comments from __future__ import division @@ -61,7 +60,7 @@ def disqus(db, xmlfile): 'created': mktime(strptime( post.find('%screatedAt' % ns).text, '%Y-%m-%dT%H:%M:%SZ')), 'remote_addr': '127.0.0.0', - 'mode': 1 + 'mode': 1 if post.find("%sisDeleted" % ns).text == "false" else 4 } if post.find(ns + 'parent') is not None: @@ -89,6 +88,9 @@ def disqus(db, xmlfile): threads.add(id) insert(db, thread, res[id]) + # in case a comment has been deleted (and no further childs) + db.comments._remove_stale() + sys.stdout.write("\r%s" % (" "*cols)) sys.stdout.write("\r[100%%] %i threads, %i comments\n" % (len(threads), len(comments))) From a8cd418b5e976833257268b3869d47ef61339b44 Mon Sep 17 00:00:00 2001 From: Martin Zimmermann Date: Mon, 11 Nov 2013 11:45:12 +0100 Subject: [PATCH 3/5] remove website todo, not a high priority --- isso/migrate.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/isso/migrate.py b/isso/migrate.py index a12ac61..57bfd01 100644 --- a/isso/migrate.py +++ b/isso/migrate.py @@ -1,8 +1,4 @@ # -*- encoding: utf-8 -*- -# -# TODO -# -# - export does not include website from commenters from __future__ import division From 5ca3137ddd99e2802e026431e0f9aa5db2ceddf7 Mon Sep 17 00:00:00 2001 From: Martin Zimmermann Date: Mon, 11 Nov 2013 11:57:31 +0100 Subject: [PATCH 4/5] add --dry-run option to import command --- isso/__init__.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/isso/__init__.py b/isso/__init__.py index db3981b..d56bfbe 100644 --- a/isso/__init__.py +++ b/isso/__init__.py @@ -42,6 +42,7 @@ import sys import os import socket import logging +import tempfile from os.path import dirname, join from argparse import ArgumentParser @@ -205,6 +206,8 @@ def main(): imprt = subparser.add_parser('import', help="import Disqus XML export") imprt.add_argument("dump", metavar="FILE") + imprt.add_argument("-n", "--dry-run", dest="dryrun", action="store_true", + help="perform a trial run with no changes made") serve = subparser.add_parser("run", help="run server") @@ -212,8 +215,11 @@ def main(): conf = Config.load(args.conf) if args.command == "import": + xxx = tempfile.NamedTemporaryFile() + dbpath = conf.get("general", "dbpath") if not args.dryrun else xxx.name + conf.set("guard", "enabled", "off") - migrate.disqus(db.SQLite3(conf.get('general', 'dbpath'), conf), args.dump) + migrate.disqus(db.SQLite3(dbpath, conf), args.dump) sys.exit(0) if conf.get("server", "listen").startswith("http://"): From 162fe78aa6f22123d92590fb5204d05ad9951f16 Mon Sep 17 00:00:00 2001 From: Martin Zimmermann Date: Mon, 11 Nov 2013 12:02:49 +0100 Subject: [PATCH 5/5] ask to continue import if DB is not empty --- isso/migrate.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/isso/migrate.py b/isso/migrate.py index 57bfd01..47dbecb 100644 --- a/isso/migrate.py +++ b/isso/migrate.py @@ -9,6 +9,11 @@ import textwrap from time import mktime, strptime from collections import defaultdict +try: + input = raw_input +except NameError: + pass + try: from urlparse import urlparse except ImportError: @@ -43,6 +48,10 @@ def insert(db, thread, posts): def disqus(db, xmlfile): + if db.execute("SELECT * FROM comments").fetchone(): + if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"): + raise SystemExit("Abort.") + tree = ElementTree.parse(xmlfile) res = defaultdict(list)