add --empty-id flag to import weird Disqus exports, fixes #135

This commit is contained in:
Martin Zimmermann 2014-11-25 22:59:05 +01:00
parent 928198f340
commit 89d6ea076b
2 changed files with 26 additions and 22 deletions

View File

@ -212,6 +212,8 @@ def main():
help="perform a trial run with no changes made") help="perform a trial run with no changes made")
imprt.add_argument("-t", "--type", dest="type", default=None, imprt.add_argument("-t", "--type", dest="type", default=None,
choices=["disqus", "wordpress"], help="export type") choices=["disqus", "wordpress"], help="export type")
imprt.add_argument("--empty-id", dest="empty_id", action="store_true",
help="workaround for weird Disqus XML exports, #135")
serve = subparser.add_parser("run", help="run server") serve = subparser.add_parser("run", help="run server")
@ -228,7 +230,7 @@ def main():
dbpath = conf.get("general", "dbpath") dbpath = conf.get("general", "dbpath")
mydb = db.SQLite3(dbpath, conf) mydb = db.SQLite3(dbpath, conf)
migrate.dispatch(args.type, mydb, args.dump) migrate.dispatch(args.type, mydb, args.dump, args.empty_id)
sys.exit(0) sys.exit(0)

View File

@ -65,12 +65,13 @@ class Disqus(object):
ns = '{http://disqus.com}' ns = '{http://disqus.com}'
internals = '{http://disqus.com/disqus-internals}' internals = '{http://disqus.com/disqus-internals}'
def __init__(self, db, xmlfile): def __init__(self, db, xmlfile, empty_id=False):
self.threads = set([]) self.threads = set([])
self.comments = set([]) self.comments = set([])
self.db = db self.db = db
self.xmlfile = xmlfile self.xmlfile = xmlfile
self.empty_id = empty_id
def insert(self, thread, posts): def insert(self, thread, posts):
@ -117,7 +118,7 @@ class Disqus(object):
progress.update(i, thread.find(Disqus.ns + 'id').text) progress.update(i, thread.find(Disqus.ns + 'id').text)
# skip (possibly?) duplicate, but empty thread elements # skip (possibly?) duplicate, but empty thread elements
if thread.find(Disqus.ns + 'id').text is None: if thread.find(Disqus.ns + 'id').text is None and not self.empty_id:
continue continue
id = thread.attrib.get(Disqus.internals + 'id') id = thread.attrib.get(Disqus.internals + 'id')
@ -132,7 +133,9 @@ class Disqus(object):
len(self.threads), len(self.comments))) len(self.threads), len(self.comments)))
orphans = set(map(lambda e: e.attrib.get(Disqus.internals + "id"), tree.findall(Disqus.ns + "post"))) - self.comments orphans = set(map(lambda e: e.attrib.get(Disqus.internals + "id"), tree.findall(Disqus.ns + "post"))) - self.comments
if orphans: if orphans and not self.threads:
print("Isso couldn't import any thread, try again with --empty-id")
elif orphans:
print("Found %i orphans:" % len(orphans)) print("Found %i orphans:" % len(orphans))
for post in tree.findall(Disqus.ns + "post"): for post in tree.findall(Disqus.ns + "post"):
if post.attrib.get(Disqus.internals + "id") not in orphans: if post.attrib.get(Disqus.internals + "id") not in orphans:
@ -250,13 +253,12 @@ class WordPress(object):
return None return None
def dispatch(type, db, dump): def dispatch(type, db, dump, empty_id=False):
if db.execute("SELECT * FROM comments").fetchone(): if db.execute("SELECT * FROM comments").fetchone():
if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"): if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"):
raise SystemExit("Abort.") raise SystemExit("Abort.")
if type is None: if type is None:
with io.open(dump, encoding="utf-8") as fp: with io.open(dump, encoding="utf-8") as fp:
peek = fp.read(io.DEFAULT_BUFFER_SIZE) peek = fp.read(io.DEFAULT_BUFFER_SIZE)
@ -269,6 +271,6 @@ def dispatch(type, db, dump):
if type == "wordpress": if type == "wordpress":
WordPress(db, dump).migrate() WordPress(db, dump).migrate()
elif type == "disqus": elif type == "disqus":
Disqus(db, dump).migrate() Disqus(db, dump, empty_id).migrate()
else: else:
raise SystemExit("Unknown format, abort.") raise SystemExit("Unknown format, abort.")