Disqus & Wordpress use controllers now

... still ugly.
pull/108/head
Martin Zimmermann 10 years ago
parent 49f0031157
commit 3809f49f98

@ -48,6 +48,11 @@ from os.path import dirname, join
from argparse import ArgumentParser from argparse import ArgumentParser
from functools import partial, reduce from functools import partial, reduce
try:
input = raw_input
except NameError:
pass
from itsdangerous import URLSafeTimedSerializer from itsdangerous import URLSafeTimedSerializer
from werkzeug.routing import Map, Rule, redirect from werkzeug.routing import Map, Rule, redirect
@ -250,16 +255,19 @@ def main():
conf = config.load(join(dist.location, "isso", "defaults.ini"), args.conf) conf = config.load(join(dist.location, "isso", "defaults.ini"), args.conf)
if args.command == "import": if args.command == "import":
conf.set("guard", "enabled", "off") from isso.controllers import threads, comments
if args.dryrun: dburl = "sqlite:///:memory:" if args.dryrun else conf.get("general", "dbpath")
dbpath = ":memory:" dbobj = db.Adapter(dburl)
else:
dbpath = conf.get("general", "dbpath") tc = threads.Controller(dbobj)
cc = comments.Controller(dbobj)
mydb = db.Adapter(db.SQLite3(dbpath), conf) if not cc.empty():
migrate.dispatch(args.type, mydb, args.dump) if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"):
raise SystemExit("Abort.")
migrate.dispatch(tc, cc, args.type, args.dump)
sys.exit(0) sys.exit(0)
if not any(conf.getiter("general", "host")): if not any(conf.getiter("general", "host")):

@ -15,6 +15,8 @@ from collections import defaultdict
from isso.utils import anonymize from isso.utils import anonymize
from isso.compat import string_types from isso.compat import string_types
from isso.controllers.comments import Invalid
try: try:
input = raw_input input = raw_input
except NameError: except NameError:
@ -29,6 +31,7 @@ from xml.etree import ElementTree
logger = logging.getLogger("isso") logger = logging.getLogger("isso")
def strip(val): def strip(val):
if isinstance(val, string_types): if isinstance(val, string_types):
return val.strip() return val.strip()
@ -67,33 +70,40 @@ class Disqus(object):
ns = '{http://disqus.com}' ns = '{http://disqus.com}'
internals = '{http://disqus.com/disqus-internals}' internals = '{http://disqus.com/disqus-internals}'
def __init__(self, db, xmlfile): def __init__(self, threads, comments):
self.threads = set([]) self.threads = threads
self.comments = set([]) self.comments = comments
self.db = db self.dqthreads = set([])
self.xmlfile = xmlfile self.dqcomments = set([])
def insert(self, thread, posts): def insert(self, thread, posts):
path = urlparse(thread.find('%slink' % Disqus.ns).text).path path = urlparse(thread.find('%slink' % Disqus.ns).text).path
remap = dict() remap = dict()
if path not in self.db.threads: th = self.threads.get(path)
self.db.threads.new(path, thread.find(Disqus.ns + 'title').text.strip()) if th is None:
th = self.threads.new(path, thread.find(Disqus.ns + 'title').text.strip())
for item in sorted(posts, key=lambda k: k['created']): for data in sorted(posts, key=lambda k: k['created']):
remote_addr = data.pop('remote_addr')
dsq_id = item.pop('dsq:id') dsq_id = data.pop('dsq:id')
item['parent'] = remap.get(item.pop('dsq:parent', None)) data['parent'] = remap.get(data.pop('dsq:parent', None))
rv = self.db.comments.add(path, item)
remap[dsq_id] = rv["id"] try:
rv = self.comments.new(remote_addr, th, data)
except Invalid :
logger.exception("Unable to insert comment `%s`", data)
else:
remap[dsq_id] = rv.id
self.comments.update(set(remap.keys())) self.dqcomments.update(set(remap.keys()))
def migrate(self): def migrate(self, xmlfile):
tree = ElementTree.parse(self.xmlfile) tree = ElementTree.parse(xmlfile)
res = defaultdict(list) res = defaultdict(list)
for post in tree.findall(Disqus.ns + 'post'): for post in tree.findall(Disqus.ns + 'post'):
@ -124,16 +134,17 @@ class Disqus(object):
id = thread.attrib.get(Disqus.internals + 'id') id = thread.attrib.get(Disqus.internals + 'id')
if id in res: if id in res:
self.threads.add(id) self.dqthreads.add(id)
self.insert(thread, res[id]) self.insert(thread, res[id])
# in case a comment has been deleted (and no further childs)
self.db.comments._remove_stale()
progress.finish("{0} threads, {1} comments".format( progress.finish("{0} threads, {1} comments".format(
len(self.threads), len(self.comments))) len(self.dqthreads), len(self.dqcomments)))
orphans = set(map(
lambda e: e.attrib.get(Disqus.internals + "id"),
tree.findall(Disqus.ns + "post"))
) - self.dqcomments
orphans = set(map(lambda e: e.attrib.get(Disqus.internals + "id"), tree.findall(Disqus.ns + "post"))) - self.comments
if orphans: if orphans:
print("Found %i orphans:" % len(orphans)) print("Found %i orphans:" % len(orphans))
for post in tree.findall(Disqus.ns + "post"): for post in tree.findall(Disqus.ns + "post"):
@ -153,18 +164,11 @@ class WordPress(object):
ns = "{http://wordpress.org/export/1.0/}" ns = "{http://wordpress.org/export/1.0/}"
def __init__(self, db, xmlfile): def __init__(self, threads, comments):
self.db = db self.threads = threads
self.xmlfile = xmlfile self.comments = comments
self.count = 0
for line in io.open(xmlfile): self.count = 0
m = WordPress.detect(line)
if m:
self.ns = WordPress.ns.replace("1.0", m.group(1))
break
else:
logger.warn("No WXR namespace found, assuming 1.0")
def insert(self, thread): def insert(self, thread):
@ -174,7 +178,7 @@ class WordPress(object):
if url.query: if url.query:
path += "?" + url.query path += "?" + url.query
self.db.threads.new(path, thread.find("title").text.strip()) th = self.threads.new(path, thread.find("title").text.strip())
comments = list(map(self.Comment, thread.findall(self.ns + "comment"))) comments = list(map(self.Comment, thread.findall(self.ns + "comment")))
comments.sort(key=lambda k: k["id"]) comments.sort(key=lambda k: k["id"])
@ -185,25 +189,36 @@ class WordPress(object):
self.count += len(ids) self.count += len(ids)
while comments: while comments:
for i, item in enumerate(comments): for i, data in enumerate(comments):
if item["parent"] in ids: if data["parent"] in ids:
continue continue
item["parent"] = remap.get(item["parent"], None) _id = data["id"]
rv = self.db.comments.add(path, item) data["parent"] = remap.get(data["parent"], None)
remap[item["id"]] = rv["id"] try:
rv = self.comments.new(data.pop("remote_addr"), th, data)
ids.remove(item["id"]) except Invalid:
comments.pop(i) logger.exception("Unable to insert comment `%s`", data)
else:
break remap[_id] = rv.id
ids.remove(_id)
break
finally:
comments.pop(i)
else: else:
# should never happen, but... it's WordPress. # should never happen, but... it's WordPress.
return return
def migrate(self): def migrate(self, xmlfile):
for line in io.open(xmlfile):
m = WordPress.detect(line)
if m:
self.ns = WordPress.ns.replace("1.0", m.group(1))
break
else:
logger.warn("No WXR namespace found, assuming 1.0")
tree = ElementTree.parse(self.xmlfile) tree = ElementTree.parse(xmlfile)
skip = 0 skip = 0
items = tree.findall("channel/item") items = tree.findall("channel/item")
@ -253,10 +268,7 @@ def autodetect(peek):
return None return None
def dispatch(type, db, dump): def dispatch(threads, comments, type, dump):
if db.execute("SELECT * FROM comments").fetchone():
if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"):
raise SystemExit("Abort.")
if type == "disqus": if type == "disqus":
cls = Disqus cls = Disqus
@ -269,4 +281,4 @@ def dispatch(type, db, dump):
if cls is None: if cls is None:
raise SystemExit("Unknown format, abort.") raise SystemExit("Unknown format, abort.")
cls(db, dump).migrate() cls(threads, comments).migrate(dump)

@ -5,73 +5,69 @@ from __future__ import unicode_literals
import unittest import unittest
from os.path import join, dirname from os.path import join, dirname
from isso import config from isso.db import Adapter
from isso.controllers import threads, comments
from isso.db import SQLite3, Adapter
from isso.migrate import Disqus, WordPress, autodetect from isso.migrate import Disqus, WordPress, autodetect
conf = config.new({
"general": {
"dbpath": "/dev/null",
"max-age": "1h"
}
})
class TestMigration(unittest.TestCase): class TestMigration(unittest.TestCase):
def test_disqus(self): def setUp(self):
db = Adapter("sqlite:///:memory:")
self.threads = threads.Controller(db)
self.comments = comments.Controller(db)
xml = join(dirname(__file__), "disqus.xml") def test_disqus(self):
db = Adapter(SQLite3(":memory:"), conf) Disqus(self.threads, self.comments).migrate(
Disqus(db, xml).migrate() join(dirname(__file__), "disqus.xml"))
self.assertEqual(len(db.execute("SELECT id FROM comments").fetchall()), 2) th = self.threads.get("/")
self.assertIsNotNone(th)
self.assertEqual(th.title, "Hello, World!")
self.assertEqual(th.id, 1)
self.assertEqual(db.threads["/"]["title"], "Hello, World!") self.assertEqual(self.comments.count(th)[0], 2)
self.assertEqual(db.threads["/"]["id"], 1)
a = db.comments.get(1) a = self.comments.get(1)
self.assertIsNotNone(a)
self.assertEqual(a["author"], "peter") self.assertEqual(a.author, "peter")
self.assertEqual(a["email"], "foo@bar.com") self.assertEqual(a.email, "foo@bar.com")
self.assertEqual(a["remote_addr"], "127.0.0.0") self.assertEqual(a.remote_addr, "127.0.0.0")
b = db.comments.get(2) b = self.comments.get(2)
self.assertEqual(b["parent"], a["id"]) self.assertEqual(b.parent, a.id)
def test_wordpress(self): def test_wordpress(self):
WordPress(self.threads, self.comments).migrate(
join(dirname(__file__), "wordpress.xml"))
xml = join(dirname(__file__), "wordpress.xml") r = self.threads.get("/2014/test/")
self.assertEqual(r.title, "Hello, World…")
db = Adapter(SQLite3(":memory:"), conf) self.assertEqual(r.id, 1)
WordPress(db, xml).migrate()
self.assertEqual(db.threads["/2014/test/"]["title"], "Hello, World…")
self.assertEqual(db.threads["/2014/test/"]["id"], 1)
self.assertEqual(db.threads["/?p=4"]["title"], "...") s = self.threads.get("/?p=4")
self.assertEqual(db.threads["/?p=4"]["id"], 2) self.assertEqual(s.title, "...")
self.assertEqual(s.id, 2)
self.assertEqual(len(db.execute("SELECT id FROM threads").fetchall()), 2) self.assertEqual(sum(self.comments.count(r, s)), 7)
self.assertEqual(len(db.execute("SELECT id FROM comments").fetchall()), 7)
first = db.comments.get(1) a = self.comments.get(1)
self.assertEqual(first["author"], "Ohai") self.assertEqual(a.author, "Ohai")
self.assertEqual(first["text"], "Erster!1") self.assertEqual(a.text, "Erster!1")
self.assertEqual(first["remote_addr"], "82.119.20.0") self.assertEqual(a.remote_addr, "82.119.20.0")
second = db.comments.get(2) b = self.comments.get(2)
self.assertEqual(second["author"], "Tester") self.assertEqual(b.author, "Tester")
self.assertEqual(second["text"], "Zweiter.") self.assertEqual(b.text, "Zweiter.")
for i in (3, 4, 5): for i in (3, 4, 5):
self.assertEqual(db.comments.get(i)["parent"], second["id"]) self.assertEqual(self.comments.get(i).parent, b.id)
last = db.comments.get(6) last = self.comments.get(6)
self.assertEqual(last["author"], "Letzter :/") self.assertEqual(last.author, "Letzter :/")
self.assertEqual(last["parent"], None) self.assertEqual(last.parent, None)
def test_detection(self): def test_detection(self):

Loading…
Cancel
Save