|
|
@ -15,6 +15,8 @@ from collections import defaultdict
|
|
|
|
from isso.utils import anonymize
|
|
|
|
from isso.utils import anonymize
|
|
|
|
from isso.compat import string_types
|
|
|
|
from isso.compat import string_types
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from isso.controllers.comments import Invalid
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
input = raw_input
|
|
|
|
input = raw_input
|
|
|
|
except NameError:
|
|
|
|
except NameError:
|
|
|
@ -29,6 +31,7 @@ from xml.etree import ElementTree
|
|
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger("isso")
|
|
|
|
logger = logging.getLogger("isso")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def strip(val):
|
|
|
|
def strip(val):
|
|
|
|
if isinstance(val, string_types):
|
|
|
|
if isinstance(val, string_types):
|
|
|
|
return val.strip()
|
|
|
|
return val.strip()
|
|
|
@ -67,33 +70,40 @@ class Disqus(object):
|
|
|
|
ns = '{http://disqus.com}'
|
|
|
|
ns = '{http://disqus.com}'
|
|
|
|
internals = '{http://disqus.com/disqus-internals}'
|
|
|
|
internals = '{http://disqus.com/disqus-internals}'
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, db, xmlfile):
|
|
|
|
def __init__(self, threads, comments):
|
|
|
|
self.threads = set([])
|
|
|
|
self.threads = threads
|
|
|
|
self.comments = set([])
|
|
|
|
self.comments = comments
|
|
|
|
|
|
|
|
|
|
|
|
self.db = db
|
|
|
|
self.dqthreads = set([])
|
|
|
|
self.xmlfile = xmlfile
|
|
|
|
self.dqcomments = set([])
|
|
|
|
|
|
|
|
|
|
|
|
def insert(self, thread, posts):
|
|
|
|
def insert(self, thread, posts):
|
|
|
|
|
|
|
|
|
|
|
|
path = urlparse(thread.find('%slink' % Disqus.ns).text).path
|
|
|
|
path = urlparse(thread.find('%slink' % Disqus.ns).text).path
|
|
|
|
remap = dict()
|
|
|
|
remap = dict()
|
|
|
|
|
|
|
|
|
|
|
|
if path not in self.db.threads:
|
|
|
|
th = self.threads.get(path)
|
|
|
|
self.db.threads.new(path, thread.find(Disqus.ns + 'title').text.strip())
|
|
|
|
if th is None:
|
|
|
|
|
|
|
|
th = self.threads.new(path, thread.find(Disqus.ns + 'title').text.strip())
|
|
|
|
|
|
|
|
|
|
|
|
for item in sorted(posts, key=lambda k: k['created']):
|
|
|
|
for data in sorted(posts, key=lambda k: k['created']):
|
|
|
|
|
|
|
|
remote_addr = data.pop('remote_addr')
|
|
|
|
|
|
|
|
|
|
|
|
dsq_id = item.pop('dsq:id')
|
|
|
|
dsq_id = data.pop('dsq:id')
|
|
|
|
item['parent'] = remap.get(item.pop('dsq:parent', None))
|
|
|
|
data['parent'] = remap.get(data.pop('dsq:parent', None))
|
|
|
|
rv = self.db.comments.add(path, item)
|
|
|
|
|
|
|
|
remap[dsq_id] = rv["id"]
|
|
|
|
try:
|
|
|
|
|
|
|
|
rv = self.comments.new(remote_addr, th, data)
|
|
|
|
|
|
|
|
except Invalid :
|
|
|
|
|
|
|
|
logger.exception("Unable to insert comment `%s`", data)
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
remap[dsq_id] = rv.id
|
|
|
|
|
|
|
|
|
|
|
|
self.comments.update(set(remap.keys()))
|
|
|
|
self.dqcomments.update(set(remap.keys()))
|
|
|
|
|
|
|
|
|
|
|
|
def migrate(self):
|
|
|
|
def migrate(self, xmlfile):
|
|
|
|
|
|
|
|
|
|
|
|
tree = ElementTree.parse(self.xmlfile)
|
|
|
|
tree = ElementTree.parse(xmlfile)
|
|
|
|
res = defaultdict(list)
|
|
|
|
res = defaultdict(list)
|
|
|
|
|
|
|
|
|
|
|
|
for post in tree.findall(Disqus.ns + 'post'):
|
|
|
|
for post in tree.findall(Disqus.ns + 'post'):
|
|
|
@ -124,16 +134,17 @@ class Disqus(object):
|
|
|
|
|
|
|
|
|
|
|
|
id = thread.attrib.get(Disqus.internals + 'id')
|
|
|
|
id = thread.attrib.get(Disqus.internals + 'id')
|
|
|
|
if id in res:
|
|
|
|
if id in res:
|
|
|
|
self.threads.add(id)
|
|
|
|
self.dqthreads.add(id)
|
|
|
|
self.insert(thread, res[id])
|
|
|
|
self.insert(thread, res[id])
|
|
|
|
|
|
|
|
|
|
|
|
# in case a comment has been deleted (and no further childs)
|
|
|
|
|
|
|
|
self.db.comments._remove_stale()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
progress.finish("{0} threads, {1} comments".format(
|
|
|
|
progress.finish("{0} threads, {1} comments".format(
|
|
|
|
len(self.threads), len(self.comments)))
|
|
|
|
len(self.dqthreads), len(self.dqcomments)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
orphans = set(map(
|
|
|
|
|
|
|
|
lambda e: e.attrib.get(Disqus.internals + "id"),
|
|
|
|
|
|
|
|
tree.findall(Disqus.ns + "post"))
|
|
|
|
|
|
|
|
) - self.dqcomments
|
|
|
|
|
|
|
|
|
|
|
|
orphans = set(map(lambda e: e.attrib.get(Disqus.internals + "id"), tree.findall(Disqus.ns + "post"))) - self.comments
|
|
|
|
|
|
|
|
if orphans:
|
|
|
|
if orphans:
|
|
|
|
print("Found %i orphans:" % len(orphans))
|
|
|
|
print("Found %i orphans:" % len(orphans))
|
|
|
|
for post in tree.findall(Disqus.ns + "post"):
|
|
|
|
for post in tree.findall(Disqus.ns + "post"):
|
|
|
@ -153,18 +164,11 @@ class WordPress(object):
|
|
|
|
|
|
|
|
|
|
|
|
ns = "{http://wordpress.org/export/1.0/}"
|
|
|
|
ns = "{http://wordpress.org/export/1.0/}"
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, db, xmlfile):
|
|
|
|
def __init__(self, threads, comments):
|
|
|
|
self.db = db
|
|
|
|
self.threads = threads
|
|
|
|
self.xmlfile = xmlfile
|
|
|
|
self.comments = comments
|
|
|
|
self.count = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for line in io.open(xmlfile):
|
|
|
|
self.count = 0
|
|
|
|
m = WordPress.detect(line)
|
|
|
|
|
|
|
|
if m:
|
|
|
|
|
|
|
|
self.ns = WordPress.ns.replace("1.0", m.group(1))
|
|
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
logger.warn("No WXR namespace found, assuming 1.0")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def insert(self, thread):
|
|
|
|
def insert(self, thread):
|
|
|
|
|
|
|
|
|
|
|
@ -174,7 +178,7 @@ class WordPress(object):
|
|
|
|
if url.query:
|
|
|
|
if url.query:
|
|
|
|
path += "?" + url.query
|
|
|
|
path += "?" + url.query
|
|
|
|
|
|
|
|
|
|
|
|
self.db.threads.new(path, thread.find("title").text.strip())
|
|
|
|
th = self.threads.new(path, thread.find("title").text.strip())
|
|
|
|
|
|
|
|
|
|
|
|
comments = list(map(self.Comment, thread.findall(self.ns + "comment")))
|
|
|
|
comments = list(map(self.Comment, thread.findall(self.ns + "comment")))
|
|
|
|
comments.sort(key=lambda k: k["id"])
|
|
|
|
comments.sort(key=lambda k: k["id"])
|
|
|
@ -185,25 +189,36 @@ class WordPress(object):
|
|
|
|
self.count += len(ids)
|
|
|
|
self.count += len(ids)
|
|
|
|
|
|
|
|
|
|
|
|
while comments:
|
|
|
|
while comments:
|
|
|
|
for i, item in enumerate(comments):
|
|
|
|
for i, data in enumerate(comments):
|
|
|
|
if item["parent"] in ids:
|
|
|
|
if data["parent"] in ids:
|
|
|
|
continue
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
item["parent"] = remap.get(item["parent"], None)
|
|
|
|
_id = data["id"]
|
|
|
|
rv = self.db.comments.add(path, item)
|
|
|
|
data["parent"] = remap.get(data["parent"], None)
|
|
|
|
remap[item["id"]] = rv["id"]
|
|
|
|
try:
|
|
|
|
|
|
|
|
rv = self.comments.new(data.pop("remote_addr"), th, data)
|
|
|
|
ids.remove(item["id"])
|
|
|
|
except Invalid:
|
|
|
|
comments.pop(i)
|
|
|
|
logger.exception("Unable to insert comment `%s`", data)
|
|
|
|
|
|
|
|
else:
|
|
|
|
break
|
|
|
|
remap[_id] = rv.id
|
|
|
|
|
|
|
|
ids.remove(_id)
|
|
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
finally:
|
|
|
|
|
|
|
|
comments.pop(i)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
# should never happen, but... it's WordPress.
|
|
|
|
# should never happen, but... it's WordPress.
|
|
|
|
return
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
def migrate(self):
|
|
|
|
def migrate(self, xmlfile):
|
|
|
|
|
|
|
|
for line in io.open(xmlfile):
|
|
|
|
|
|
|
|
m = WordPress.detect(line)
|
|
|
|
|
|
|
|
if m:
|
|
|
|
|
|
|
|
self.ns = WordPress.ns.replace("1.0", m.group(1))
|
|
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
logger.warn("No WXR namespace found, assuming 1.0")
|
|
|
|
|
|
|
|
|
|
|
|
tree = ElementTree.parse(self.xmlfile)
|
|
|
|
tree = ElementTree.parse(xmlfile)
|
|
|
|
|
|
|
|
|
|
|
|
skip = 0
|
|
|
|
skip = 0
|
|
|
|
items = tree.findall("channel/item")
|
|
|
|
items = tree.findall("channel/item")
|
|
|
@ -253,10 +268,7 @@ def autodetect(peek):
|
|
|
|
return None
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def dispatch(type, db, dump):
|
|
|
|
def dispatch(threads, comments, type, dump):
|
|
|
|
if db.execute("SELECT * FROM comments").fetchone():
|
|
|
|
|
|
|
|
if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"):
|
|
|
|
|
|
|
|
raise SystemExit("Abort.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if type == "disqus":
|
|
|
|
if type == "disqus":
|
|
|
|
cls = Disqus
|
|
|
|
cls = Disqus
|
|
|
@ -269,4 +281,4 @@ def dispatch(type, db, dump):
|
|
|
|
if cls is None:
|
|
|
|
if cls is None:
|
|
|
|
raise SystemExit("Unknown format, abort.")
|
|
|
|
raise SystemExit("Unknown format, abort.")
|
|
|
|
|
|
|
|
|
|
|
|
cls(db, dump).migrate()
|
|
|
|
cls(threads, comments).migrate(dump)
|
|
|
|