refactor disqus migration code into a class

pull/83/merge
Martin Zimmermann 10 years ago
parent cb36107eda
commit 910da2a6c0

@ -214,11 +214,17 @@ def main():
conf = Config.load(args.conf) conf = Config.load(args.conf)
if args.command == "import": if args.command == "import":
xxx = tempfile.NamedTemporaryFile()
dbpath = conf.get("general", "dbpath") if not args.dryrun else xxx.name
conf.set("guard", "enabled", "off") conf.set("guard", "enabled", "off")
migrate.disqus(db.SQLite3(dbpath, conf), args.dump)
if args.dryrun:
xxx = tempfile.NamedTemporaryFile()
dbpath = xxx.name
else:
dbpath = conf.get("general", "dbpath")
mydb = db.SQLite3(dbpath, conf)
migrate.dispatch(mydb, args.dump)
sys.exit(0) sys.exit(0)
if not any(conf.getiter("general", "host")): if not any(conf.getiter("general", "host")):

@ -21,94 +21,104 @@ except ImportError:
from xml.etree import ElementTree from xml.etree import ElementTree
ns = '{http://disqus.com}'
dsq = '{http://disqus.com/disqus-internals}'
threads = set([]) class Disqus(object):
comments = set([])
ns = '{http://disqus.com}'
internals = '{http://disqus.com/disqus-internals}'
def insert(db, thread, posts): def __init__(self, db, xmlfile):
self.threads = set([])
self.comments = set([])
path = urlparse(thread.find('%slink' % ns).text).path self.db = db
remap = dict() self.xmlfile = xmlfile
if path not in db.threads: def insert(self, thread, posts):
db.threads.new(path, thread.find('%stitle' % ns).text.strip())
for item in sorted(posts, key=lambda k: k['created']): path = urlparse(thread.find('%slink' % Disqus.ns).text).path
remap = dict()
dsq_id = item.pop('dsq:id') if path not in self.db.threads:
item['parent'] = remap.get(item.pop('dsq:parent', None)) self.db.threads.new(path, thread.find(Disqus.ns + 'title').text.strip())
rv = db.comments.add(path, item)
remap[dsq_id] = rv["id"]
comments.update(set(remap.keys())) for item in sorted(posts, key=lambda k: k['created']):
dsq_id = item.pop('dsq:id')
item['parent'] = remap.get(item.pop('dsq:parent', None))
rv = self.db.comments.add(path, item)
remap[dsq_id] = rv["id"]
def disqus(db, xmlfile): self.comments.update(set(remap.keys()))
if db.execute("SELECT * FROM comments").fetchone(): def migrate(self):
if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"):
raise SystemExit("Abort.")
tree = ElementTree.parse(xmlfile) tree = ElementTree.parse(self.xmlfile)
res = defaultdict(list) res = defaultdict(list)
for post in tree.findall('%spost' % ns): for post in tree.findall('%spost' % Disqus.ns):
item = { item = {
'dsq:id': post.attrib.get(dsq + 'id'), 'dsq:id': post.attrib.get(Disqus.internals + 'id'),
'text': post.find('%smessage' % ns).text, 'text': post.find(Disqus.ns + 'message').text,
'author': post.find('%sauthor/%sname' % (ns, ns)).text, 'author': post.find('{0}author/{0}name'.format(Disqus.ns)).text,
'email': post.find('%sauthor/%semail' % (ns, ns)).text, 'email': post.find('{0}author/{0}email'.format(Disqus.ns)).text,
'created': mktime(strptime( 'created': mktime(strptime(
post.find('%screatedAt' % ns).text, '%Y-%m-%dT%H:%M:%SZ')), post.find(Disqus.ns + 'createdAt').text, '%Y-%m-%dT%H:%M:%SZ')),
'remote_addr': '127.0.0.0', 'remote_addr': '127.0.0.0',
'mode': 1 if post.find("%sisDeleted" % ns).text == "false" else 4 'mode': 1 if post.find(Disqus.ns + "isDeleted").text == "false" else 4
} }
if post.find(ns + 'parent') is not None: if post.find(Disqus.ns + 'parent') is not None:
item['dsq:parent'] = post.find(ns + 'parent').attrib.get(dsq + 'id') item['dsq:parent'] = post.find(Disqus.ns + 'parent').attrib.get(Disqus.internals + 'id')
res[post.find('%sthread' % ns).attrib.get(dsq + 'id')].append(item) res[post.find('%sthread' % Disqus.ns).attrib.get(Disqus.internals + 'id')].append(item)
num = len(tree.findall('%sthread' % ns)) num = len(tree.findall(Disqus.ns + 'thread'))
cols = int((os.popen('stty size', 'r').read() or "25 80").split()[1]) cols = int((os.popen('stty size', 'r').read() or "25 80").split()[1])
for i, thread in enumerate(tree.findall('%sthread' % ns)): for i, thread in enumerate(tree.findall(Disqus.ns + 'thread')):
if int(round((i+1)/num, 2) * 100) % 13 == 0: if int(round((i+1)/num, 2) * 100) % 13 == 0:
sys.stdout.write("\r%s" % (" "*cols))
sys.stdout.write("\r[%i%%] %s" % (((i+1)/num * 100), thread.find(Disqus.ns + 'id').text))
sys.stdout.flush()
sys.stdout.write("\r%s" % (" "*cols)) # skip (possibly?) duplicate, but empty thread elements
sys.stdout.write("\r[%i%%] %s" % (((i+1)/num * 100), thread.find('%sid' % ns).text)) if thread.find(Disqus.ns + 'id').text is None:
sys.stdout.flush() continue
# skip (possibly?) duplicate, but empty thread elements id = thread.attrib.get(Disqus.internals + 'id')
if thread.find('%sid' % ns).text is None: if id in res:
continue self.threads.add(id)
self.insert(thread, res[id])
id = thread.attrib.get(dsq + 'id') # in case a comment has been deleted (and no further childs)
if id in res: self.db.comments._remove_stale()
threads.add(id)
insert(db, thread, res[id])
# in case a comment has been deleted (and no further childs) sys.stdout.write("\r%s" % (" "*cols))
db.comments._remove_stale() sys.stdout.write("\r[100%] {0} threads, {1} comments\n".format(
len(self.threads), len(self.comments)))
sys.stdout.write("\r%s" % (" "*cols)) orphans = set(map(lambda e: e.attrib.get(Disqus.internals + "id"), tree.findall(Disqus.ns + "post"))) - self.comments
sys.stdout.write("\r[100%%] %i threads, %i comments\n" % (len(threads), len(comments))) if orphans:
print("Found %i orphans:" % len(orphans))
for post in tree.findall(Disqus.ns + "post"):
if post.attrib.get(Disqus.internals + "id") not in orphans:
continue
orphans = set(map(lambda e: e.attrib.get(dsq + "id"), tree.findall("%spost" % ns))) - comments print(" * {0} by {1} <{2}>".format(
if orphans: post.attrib.get(Disqus.internals + "id"),
print("Found %i orphans:" % len(orphans)) post.find("{0}author/{0}name".format(Disqus.ns)).text,
for post in tree.findall("%spost" % ns): post.find("{0}author/{0}email".format(Disqus.ns)).text))
if post.attrib.get(dsq + "id") not in orphans: print(textwrap.fill(post.find(Disqus.ns + "message").text,
continue initial_indent=" ", subsequent_indent=" "))
print("")
def dispatch(db, dump):
if db.execute("SELECT * FROM comments").fetchone():
if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"):
raise SystemExit("Abort.")
print(" * %s by %s <%s>" % (post.attrib.get(dsq + "id"), Disqus(db, dump).migrate()
post.find("%sauthor/%sname" % (ns, ns)).text,
post.find("%sauthor/%semail" % (ns, ns)).text))
print(textwrap.fill(post.find("%smessage" % ns).text,
initial_indent=" ", subsequent_indent=" "))
print("")

@ -6,7 +6,7 @@ from os.path import join, dirname
from isso.core import Config from isso.core import Config
from isso.db import SQLite3 from isso.db import SQLite3
from isso.migrate import disqus from isso.migrate import Disqus
def test_disqus(): def test_disqus():
@ -15,7 +15,7 @@ def test_disqus():
xxx = tempfile.NamedTemporaryFile() xxx = tempfile.NamedTemporaryFile()
db = SQLite3(xxx.name, Config.load(None)) db = SQLite3(xxx.name, Config.load(None))
disqus(db, xml) Disqus(db, xml).migrate()
assert db.threads["/"]["title"] == "Hello, World!" assert db.threads["/"]["title"] == "Hello, World!"
assert db.threads["/"]["id"] == 1 assert db.threads["/"]["id"] == 1

Loading…
Cancel
Save