show orphan comments after Disqus import (if any)
An orphan comment is exported by Disqus but its thread id is non-existent (probably deleted, moved). Usually from the earlier days (or WordPress migration). It is not possible to get the thread without manual intervention ( aka SQLite insertions).
This commit is contained in:
parent
29170ac258
commit
8c0af3b10f
@ -9,6 +9,7 @@ from __future__ import division
|
||||
|
||||
import sys
|
||||
import os
|
||||
import textwrap
|
||||
|
||||
from time import mktime, strptime
|
||||
from collections import defaultdict
|
||||
@ -20,12 +21,14 @@ except ImportError:
|
||||
|
||||
from xml.etree import ElementTree
|
||||
|
||||
|
||||
ns = '{http://disqus.com}'
|
||||
dsq = '{http://disqus.com/disqus-internals}'
|
||||
|
||||
threads = set([])
|
||||
comments = set([])
|
||||
|
||||
def insert(db, thread, comments):
|
||||
|
||||
def insert(db, thread, posts):
|
||||
|
||||
path = urlparse(thread.find('%sid' % ns).text).path
|
||||
remap = dict()
|
||||
@ -33,13 +36,15 @@ def insert(db, thread, comments):
|
||||
if path not in db.threads:
|
||||
db.threads.new(path, thread.find('%stitle' % ns).text.strip())
|
||||
|
||||
for item in sorted(comments, key=lambda k: k['created']):
|
||||
for item in sorted(posts, key=lambda k: k['created']):
|
||||
|
||||
dsq_id = item.pop('dsq:id')
|
||||
item['parent'] = remap.get(item.pop('dsq:parent', None))
|
||||
rv = db.comments.add(path, item)
|
||||
remap[dsq_id] = rv["id"]
|
||||
|
||||
comments.update(set(remap.keys()))
|
||||
|
||||
|
||||
def disqus(db, xmlfile):
|
||||
|
||||
@ -67,7 +72,6 @@ def disqus(db, xmlfile):
|
||||
num = len(tree.findall('%sthread' % ns))
|
||||
cols = int(os.popen('stty size', 'r').read().split()[1])
|
||||
|
||||
threads = 0
|
||||
for i, thread in enumerate(tree.findall('%sthread' % ns)):
|
||||
|
||||
if int(round((i+1)/num, 2) * 100) % 13 == 0:
|
||||
@ -76,13 +80,28 @@ def disqus(db, xmlfile):
|
||||
sys.stdout.write("\r[%i%%] %s" % (((i+1)/num * 100), thread.find('%sid' % ns).text))
|
||||
sys.stdout.flush()
|
||||
|
||||
# skip (possibly?) duplicate, but empty thread elements
|
||||
if thread.find('%sid' % ns).text is None:
|
||||
continue
|
||||
|
||||
id = thread.attrib.get(dsq + 'id')
|
||||
if id in res:
|
||||
threads += 1
|
||||
threads.add(id)
|
||||
insert(db, thread, res[id])
|
||||
|
||||
sys.stdout.write("\r%s" % (" "*cols))
|
||||
sys.stdout.write("\r[100%%] %i threads, %i comments" % (threads, len(tree.findall('%spost' % ns))))
|
||||
sys.stdout.write("\r[100%%] %i threads, %i comments\n" % (len(threads), len(comments)))
|
||||
|
||||
orphans = set(map(lambda e: e.attrib.get(dsq + "id"), tree.findall("%spost" % ns))) - comments
|
||||
if orphans:
|
||||
print("Found %i orphans:" % len(orphans))
|
||||
for post in tree.findall("%spost" % ns):
|
||||
if post.attrib.get(dsq + "id") not in orphans:
|
||||
continue
|
||||
|
||||
print(" * %s by %s <%s>" % (post.attrib.get(dsq + "id"),
|
||||
post.find("%sauthor/%sname" % (ns, ns)).text,
|
||||
post.find("%sauthor/%semail" % (ns, ns)).text))
|
||||
print(textwrap.fill(post.find("%smessage" % ns).text,
|
||||
initial_indent=" ", subsequent_indent=" "))
|
||||
print("")
|
||||
|
Loading…
Reference in New Issue
Block a user