Disqus & Wordpress use controllers now

... still ugly.
pull/108/head
Martin Zimmermann 10 years ago
parent 49f0031157
commit 3809f49f98

@ -48,6 +48,11 @@ from os.path import dirname, join
from argparse import ArgumentParser
from functools import partial, reduce
try:
input = raw_input
except NameError:
pass
from itsdangerous import URLSafeTimedSerializer
from werkzeug.routing import Map, Rule, redirect
@ -250,16 +255,19 @@ def main():
conf = config.load(join(dist.location, "isso", "defaults.ini"), args.conf)
if args.command == "import":
conf.set("guard", "enabled", "off")
from isso.controllers import threads, comments
if args.dryrun:
dbpath = ":memory:"
else:
dbpath = conf.get("general", "dbpath")
dburl = "sqlite:///:memory:" if args.dryrun else conf.get("general", "dbpath")
dbobj = db.Adapter(dburl)
tc = threads.Controller(dbobj)
cc = comments.Controller(dbobj)
mydb = db.Adapter(db.SQLite3(dbpath), conf)
migrate.dispatch(args.type, mydb, args.dump)
if not cc.empty():
if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"):
raise SystemExit("Abort.")
migrate.dispatch(tc, cc, args.type, args.dump)
sys.exit(0)
if not any(conf.getiter("general", "host")):

@ -15,6 +15,8 @@ from collections import defaultdict
from isso.utils import anonymize
from isso.compat import string_types
from isso.controllers.comments import Invalid
try:
input = raw_input
except NameError:
@ -29,6 +31,7 @@ from xml.etree import ElementTree
logger = logging.getLogger("isso")
def strip(val):
if isinstance(val, string_types):
return val.strip()
@ -67,33 +70,40 @@ class Disqus(object):
ns = '{http://disqus.com}'
internals = '{http://disqus.com/disqus-internals}'
def __init__(self, db, xmlfile):
self.threads = set([])
self.comments = set([])
def __init__(self, threads, comments):
self.threads = threads
self.comments = comments
self.db = db
self.xmlfile = xmlfile
self.dqthreads = set([])
self.dqcomments = set([])
def insert(self, thread, posts):
path = urlparse(thread.find('%slink' % Disqus.ns).text).path
remap = dict()
if path not in self.db.threads:
self.db.threads.new(path, thread.find(Disqus.ns + 'title').text.strip())
th = self.threads.get(path)
if th is None:
th = self.threads.new(path, thread.find(Disqus.ns + 'title').text.strip())
for item in sorted(posts, key=lambda k: k['created']):
for data in sorted(posts, key=lambda k: k['created']):
remote_addr = data.pop('remote_addr')
dsq_id = item.pop('dsq:id')
item['parent'] = remap.get(item.pop('dsq:parent', None))
rv = self.db.comments.add(path, item)
remap[dsq_id] = rv["id"]
dsq_id = data.pop('dsq:id')
data['parent'] = remap.get(data.pop('dsq:parent', None))
try:
rv = self.comments.new(remote_addr, th, data)
except Invalid :
logger.exception("Unable to insert comment `%s`", data)
else:
remap[dsq_id] = rv.id
self.comments.update(set(remap.keys()))
self.dqcomments.update(set(remap.keys()))
def migrate(self):
def migrate(self, xmlfile):
tree = ElementTree.parse(self.xmlfile)
tree = ElementTree.parse(xmlfile)
res = defaultdict(list)
for post in tree.findall(Disqus.ns + 'post'):
@ -124,16 +134,17 @@ class Disqus(object):
id = thread.attrib.get(Disqus.internals + 'id')
if id in res:
self.threads.add(id)
self.dqthreads.add(id)
self.insert(thread, res[id])
# in case a comment has been deleted (and no further childs)
self.db.comments._remove_stale()
progress.finish("{0} threads, {1} comments".format(
len(self.threads), len(self.comments)))
len(self.dqthreads), len(self.dqcomments)))
orphans = set(map(
lambda e: e.attrib.get(Disqus.internals + "id"),
tree.findall(Disqus.ns + "post"))
) - self.dqcomments
orphans = set(map(lambda e: e.attrib.get(Disqus.internals + "id"), tree.findall(Disqus.ns + "post"))) - self.comments
if orphans:
print("Found %i orphans:" % len(orphans))
for post in tree.findall(Disqus.ns + "post"):
@ -153,18 +164,11 @@ class WordPress(object):
ns = "{http://wordpress.org/export/1.0/}"
def __init__(self, db, xmlfile):
self.db = db
self.xmlfile = xmlfile
self.count = 0
def __init__(self, threads, comments):
self.threads = threads
self.comments = comments
for line in io.open(xmlfile):
m = WordPress.detect(line)
if m:
self.ns = WordPress.ns.replace("1.0", m.group(1))
break
else:
logger.warn("No WXR namespace found, assuming 1.0")
self.count = 0
def insert(self, thread):
@ -174,7 +178,7 @@ class WordPress(object):
if url.query:
path += "?" + url.query
self.db.threads.new(path, thread.find("title").text.strip())
th = self.threads.new(path, thread.find("title").text.strip())
comments = list(map(self.Comment, thread.findall(self.ns + "comment")))
comments.sort(key=lambda k: k["id"])
@ -185,25 +189,36 @@ class WordPress(object):
self.count += len(ids)
while comments:
for i, item in enumerate(comments):
if item["parent"] in ids:
for i, data in enumerate(comments):
if data["parent"] in ids:
continue
item["parent"] = remap.get(item["parent"], None)
rv = self.db.comments.add(path, item)
remap[item["id"]] = rv["id"]
ids.remove(item["id"])
comments.pop(i)
break
_id = data["id"]
data["parent"] = remap.get(data["parent"], None)
try:
rv = self.comments.new(data.pop("remote_addr"), th, data)
except Invalid:
logger.exception("Unable to insert comment `%s`", data)
else:
remap[_id] = rv.id
ids.remove(_id)
break
finally:
comments.pop(i)
else:
# should never happen, but... it's WordPress.
return
def migrate(self):
def migrate(self, xmlfile):
for line in io.open(xmlfile):
m = WordPress.detect(line)
if m:
self.ns = WordPress.ns.replace("1.0", m.group(1))
break
else:
logger.warn("No WXR namespace found, assuming 1.0")
tree = ElementTree.parse(self.xmlfile)
tree = ElementTree.parse(xmlfile)
skip = 0
items = tree.findall("channel/item")
@ -253,10 +268,7 @@ def autodetect(peek):
return None
def dispatch(type, db, dump):
if db.execute("SELECT * FROM comments").fetchone():
if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"):
raise SystemExit("Abort.")
def dispatch(threads, comments, type, dump):
if type == "disqus":
cls = Disqus
@ -269,4 +281,4 @@ def dispatch(type, db, dump):
if cls is None:
raise SystemExit("Unknown format, abort.")
cls(db, dump).migrate()
cls(threads, comments).migrate(dump)

@ -5,73 +5,69 @@ from __future__ import unicode_literals
import unittest
from os.path import join, dirname
from isso import config
from isso.db import SQLite3, Adapter
from isso.db import Adapter
from isso.controllers import threads, comments
from isso.migrate import Disqus, WordPress, autodetect
conf = config.new({
"general": {
"dbpath": "/dev/null",
"max-age": "1h"
}
})
class TestMigration(unittest.TestCase):
def test_disqus(self):
def setUp(self):
db = Adapter("sqlite:///:memory:")
self.threads = threads.Controller(db)
self.comments = comments.Controller(db)
xml = join(dirname(__file__), "disqus.xml")
def test_disqus(self):
db = Adapter(SQLite3(":memory:"), conf)
Disqus(db, xml).migrate()
Disqus(self.threads, self.comments).migrate(
join(dirname(__file__), "disqus.xml"))
self.assertEqual(len(db.execute("SELECT id FROM comments").fetchall()), 2)
th = self.threads.get("/")
self.assertIsNotNone(th)
self.assertEqual(th.title, "Hello, World!")
self.assertEqual(th.id, 1)
self.assertEqual(db.threads["/"]["title"], "Hello, World!")
self.assertEqual(db.threads["/"]["id"], 1)
self.assertEqual(self.comments.count(th)[0], 2)
a = db.comments.get(1)
a = self.comments.get(1)
self.assertIsNotNone(a)
self.assertEqual(a["author"], "peter")
self.assertEqual(a["email"], "foo@bar.com")
self.assertEqual(a["remote_addr"], "127.0.0.0")
self.assertEqual(a.author, "peter")
self.assertEqual(a.email, "foo@bar.com")
self.assertEqual(a.remote_addr, "127.0.0.0")
b = db.comments.get(2)
self.assertEqual(b["parent"], a["id"])
b = self.comments.get(2)
self.assertEqual(b.parent, a.id)
def test_wordpress(self):
WordPress(self.threads, self.comments).migrate(
join(dirname(__file__), "wordpress.xml"))
xml = join(dirname(__file__), "wordpress.xml")
db = Adapter(SQLite3(":memory:"), conf)
WordPress(db, xml).migrate()
self.assertEqual(db.threads["/2014/test/"]["title"], "Hello, World…")
self.assertEqual(db.threads["/2014/test/"]["id"], 1)
r = self.threads.get("/2014/test/")
self.assertEqual(r.title, "Hello, World…")
self.assertEqual(r.id, 1)
self.assertEqual(db.threads["/?p=4"]["title"], "...")
self.assertEqual(db.threads["/?p=4"]["id"], 2)
s = self.threads.get("/?p=4")
self.assertEqual(s.title, "...")
self.assertEqual(s.id, 2)
self.assertEqual(len(db.execute("SELECT id FROM threads").fetchall()), 2)
self.assertEqual(len(db.execute("SELECT id FROM comments").fetchall()), 7)
self.assertEqual(sum(self.comments.count(r, s)), 7)
first = db.comments.get(1)
self.assertEqual(first["author"], "Ohai")
self.assertEqual(first["text"], "Erster!1")
self.assertEqual(first["remote_addr"], "82.119.20.0")
a = self.comments.get(1)
self.assertEqual(a.author, "Ohai")
self.assertEqual(a.text, "Erster!1")
self.assertEqual(a.remote_addr, "82.119.20.0")
second = db.comments.get(2)
self.assertEqual(second["author"], "Tester")
self.assertEqual(second["text"], "Zweiter.")
b = self.comments.get(2)
self.assertEqual(b.author, "Tester")
self.assertEqual(b.text, "Zweiter.")
for i in (3, 4, 5):
self.assertEqual(db.comments.get(i)["parent"], second["id"])
self.assertEqual(self.comments.get(i).parent, b.id)
last = db.comments.get(6)
self.assertEqual(last["author"], "Letzter :/")
self.assertEqual(last["parent"], None)
last = self.comments.get(6)
self.assertEqual(last.author, "Letzter :/")
self.assertEqual(last.parent, None)
def test_detection(self):

Loading…
Cancel
Save