From 12f8af8434c9f38b0dd2cb96fef3157f0d1d0950 Mon Sep 17 00:00:00 2001 From: Martin Zimmermann Date: Wed, 30 Apr 2014 18:46:44 +0200 Subject: [PATCH] add initial support to import WordPress comments --- isso/__init__.py | 4 +- isso/migrate.py | 130 +++++++++++++++++++++++++++++++++-- isso/tests/test_migration.py | 33 +++++++-- isso/tests/wordpress.xml | 119 ++++++++++++++++++++++++++++++++ 4 files changed, 274 insertions(+), 12 deletions(-) create mode 100644 isso/tests/wordpress.xml diff --git a/isso/__init__.py b/isso/__init__.py index 14fdcdb..7f4f4d0 100644 --- a/isso/__init__.py +++ b/isso/__init__.py @@ -207,6 +207,8 @@ def main(): imprt.add_argument("dump", metavar="FILE") imprt.add_argument("-n", "--dry-run", dest="dryrun", action="store_true", help="perform a trial run with no changes made") + imprt.add_argument("-t", "--type", dest="type", default=None, + choices=["disqus", "wordpress"], help="export type") serve = subparser.add_parser("run", help="run server") @@ -223,7 +225,7 @@ def main(): dbpath = conf.get("general", "dbpath") mydb = db.SQLite3(dbpath, conf) - migrate.dispatch(mydb, args.dump) + migrate.dispatch(args.type, mydb, args.dump) sys.exit(0) diff --git a/isso/migrate.py b/isso/migrate.py index eb2fe41..b549238 100644 --- a/isso/migrate.py +++ b/isso/migrate.py @@ -1,14 +1,18 @@ # -*- encoding: utf-8 -*- -from __future__ import division +from __future__ import division, print_function import sys import os +import io import textwrap -from time import mktime, strptime +from time import mktime, strptime, time from collections import defaultdict +from isso.utils import anonymize +from isso.compat import string_types + try: input = raw_input except NameError: @@ -22,6 +26,39 @@ except ImportError: from xml.etree import ElementTree +def strip(val): + if isinstance(val, string_types): + return val.strip() + return val + + +class Progress(object): + + def __init__(self, end): + self.end = end or 1 + + self.istty = sys.stdout.isatty() + self.last = 0 + + def update(self, i, message): + + if not self.istty or message is None: + return + + cols = int((os.popen('stty size', 'r').read()).split()[1]) + message = message[:cols - 7] + + if time() - self.last > 0.2: + sys.stdout.write("\r{0}".format(" " * cols)) + sys.stdout.write("\r[{0:.0%}] {1}".format(i/self.end, message)) + sys.stdout.flush() + self.last = time() + + def finish(self, message): + self.last = 0 + self.update(self.end, message + "\n") + + class Disqus(object): ns = '{http://disqus.com}' @@ -116,9 +153,94 @@ class Disqus(object): print("") -def dispatch(db, dump): +class WordPress(object): + + ns = "{http://wordpress.org/export/1.0/}" + + def __init__(self, db, xmlfile): + self.db = db + self.xmlfile = xmlfile + self.count = 0 + + def insert(self, thread): + + path = urlparse(thread.find("link").text).path + self.db.threads.new(path, thread.find("title").text.strip()) + + comments = list(map(WordPress.Comment, thread.findall(WordPress.ns + "comment"))) + comments.sort(key=lambda k: k["id"]) + + remap = {} + ids = set(c["id"] for c in comments) + + self.count += len(ids) + + while comments: + for i, item in enumerate(comments): + if item["parent"] in ids: + continue + + item["parent"] = remap.get(item["parent"], None) + rv = self.db.comments.add(path, item) + remap[item["id"]] = rv["id"] + + ids.remove(item["id"]) + comments.pop(i) + + break + else: + # should never happen, but... it's WordPress. + return + + def migrate(self): + + tree = ElementTree.parse(self.xmlfile) + items = tree.findall("channel/item") + + progress = Progress(len(items)) + for i, thread in enumerate(items): + progress.update(i, thread.find("title").text) + self.insert(thread) + + progress.finish("{0} threads, {1} comments".format(len(items), self.count)) + + @classmethod + def Comment(cls, el): + return { + "text": strip(el.find(WordPress.ns + "comment_content").text), + "author": strip(el.find(WordPress.ns + "comment_author").text), + "email": strip(el.find(WordPress.ns + "comment_author_email").text), + "website": strip(el.find(WordPress.ns + "comment_author_url").text), + "remote_addr": anonymize( + strip(el.find(WordPress.ns + "comment_author_IP").text)), + "created": mktime(strptime( + strip(el.find(WordPress.ns + "comment_date_gmt").text), + "%Y-%m-%d %H:%M:%S")), + "mode": 1 if el.find(WordPress.ns + "comment_approved").text == "1" else 2, + "id": int(el.find(WordPress.ns + "comment_id").text), + "parent": int(el.find(WordPress.ns + "comment_parent").text) or None + } + + +def dispatch(type, db, dump): if db.execute("SELECT * FROM comments").fetchone(): if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"): raise SystemExit("Abort.") - Disqus(db, dump).migrate() + if type is None: + + with io.open(dump) as fp: + peek = fp.read(2048) + + if 'xmlns:wp="%s"' % WordPress.ns[1:-1] in peek: + type = "wordpress" + + if ' + + + 0 + 0 + + + 6 + + info@posativ.org + + + ::ffff:86.56.63.0 + 2014-04-29 15:21:27 + 2014-04-29 15:21:27 + + 1 + + 0 + 1 + + + 7 + + info@posativ.org + + + ::ffff:86.56.63.0 + 2014-04-29 15:21:35 + + 1 + 6 + + + 8 + + info@posativ.org + + + ::ffff:86.56.63.0 + 2014-04-29 15:21:45 + 2014-04-29 15:21:45 + + + 1 + + 7 + 1 + + + 9 + + info@posativ.org + + + ::ffff:86.56.63.0 + 2014-04-29 15:21:52 + 2014-04-29 15:21:52 + + 1 + + 7 + 1 + + + 10 + + info@posativ.org + + + ::ffff:86.56.63.0 + 2014-04-29 15:21:56 + 2014-04-29 15:21:56 + + 1 + + 0 + 1 + + + + \ No newline at end of file