From 89d6ea076b8ec9056faf037ea34c8022b6146da5 Mon Sep 17 00:00:00 2001 From: Martin Zimmermann Date: Tue, 25 Nov 2014 22:59:05 +0100 Subject: [PATCH] add --empty-id flag to import weird Disqus exports, fixes #135 --- isso/__init__.py | 4 +++- isso/migrate.py | 44 +++++++++++++++++++++++--------------------- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/isso/__init__.py b/isso/__init__.py index 527e480..0df6af2 100644 --- a/isso/__init__.py +++ b/isso/__init__.py @@ -212,6 +212,8 @@ def main(): help="perform a trial run with no changes made") imprt.add_argument("-t", "--type", dest="type", default=None, choices=["disqus", "wordpress"], help="export type") + imprt.add_argument("--empty-id", dest="empty_id", action="store_true", + help="workaround for weird Disqus XML exports, #135") serve = subparser.add_parser("run", help="run server") @@ -228,7 +230,7 @@ def main(): dbpath = conf.get("general", "dbpath") mydb = db.SQLite3(dbpath, conf) - migrate.dispatch(args.type, mydb, args.dump) + migrate.dispatch(args.type, mydb, args.dump, args.empty_id) sys.exit(0) diff --git a/isso/migrate.py b/isso/migrate.py index 5afd6d0..a51118e 100644 --- a/isso/migrate.py +++ b/isso/migrate.py @@ -65,12 +65,13 @@ class Disqus(object): ns = '{http://disqus.com}' internals = '{http://disqus.com/disqus-internals}' - def __init__(self, db, xmlfile): + def __init__(self, db, xmlfile, empty_id=False): self.threads = set([]) self.comments = set([]) self.db = db self.xmlfile = xmlfile + self.empty_id = empty_id def insert(self, thread, posts): @@ -117,7 +118,7 @@ class Disqus(object): progress.update(i, thread.find(Disqus.ns + 'id').text) # skip (possibly?) duplicate, but empty thread elements - if thread.find(Disqus.ns + 'id').text is None: + if thread.find(Disqus.ns + 'id').text is None and not self.empty_id: continue id = thread.attrib.get(Disqus.internals + 'id') @@ -132,7 +133,9 @@ class Disqus(object): len(self.threads), len(self.comments))) orphans = set(map(lambda e: e.attrib.get(Disqus.internals + "id"), tree.findall(Disqus.ns + "post"))) - self.comments - if orphans: + if orphans and not self.threads: + print("Isso couldn't import any thread, try again with --empty-id") + elif orphans: print("Found %i orphans:" % len(orphans)) for post in tree.findall(Disqus.ns + "post"): if post.attrib.get(Disqus.internals + "id") not in orphans: @@ -250,25 +253,24 @@ class WordPress(object): return None -def dispatch(type, db, dump): - if db.execute("SELECT * FROM comments").fetchone(): - if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"): - raise SystemExit("Abort.") - - if type is None: +def dispatch(type, db, dump, empty_id=False): + if db.execute("SELECT * FROM comments").fetchone(): + if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"): + raise SystemExit("Abort.") - with io.open(dump, encoding="utf-8") as fp: - peek = fp.read(io.DEFAULT_BUFFER_SIZE) + if type is None: + with io.open(dump, encoding="utf-8") as fp: + peek = fp.read(io.DEFAULT_BUFFER_SIZE) - if WordPress.detect(peek): - type = "wordpress" + if WordPress.detect(peek): + type = "wordpress" - if Disqus.detect(peek): - type = "disqus" + if Disqus.detect(peek): + type = "disqus" - if type == "wordpress": - WordPress(db, dump).migrate() - elif type == "disqus": - Disqus(db, dump).migrate() - else: - raise SystemExit("Unknown format, abort.") + if type == "wordpress": + WordPress(db, dump).migrate() + elif type == "disqus": + Disqus(db, dump, empty_id).migrate() + else: + raise SystemExit("Unknown format, abort.")