Disqus & Wordpress use controllers now
... still ugly.
This commit is contained in:
parent
49f0031157
commit
3809f49f98
@ -48,6 +48,11 @@ from os.path import dirname, join
|
|||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
from functools import partial, reduce
|
from functools import partial, reduce
|
||||||
|
|
||||||
|
try:
|
||||||
|
input = raw_input
|
||||||
|
except NameError:
|
||||||
|
pass
|
||||||
|
|
||||||
from itsdangerous import URLSafeTimedSerializer
|
from itsdangerous import URLSafeTimedSerializer
|
||||||
|
|
||||||
from werkzeug.routing import Map, Rule, redirect
|
from werkzeug.routing import Map, Rule, redirect
|
||||||
@ -250,16 +255,19 @@ def main():
|
|||||||
conf = config.load(join(dist.location, "isso", "defaults.ini"), args.conf)
|
conf = config.load(join(dist.location, "isso", "defaults.ini"), args.conf)
|
||||||
|
|
||||||
if args.command == "import":
|
if args.command == "import":
|
||||||
conf.set("guard", "enabled", "off")
|
from isso.controllers import threads, comments
|
||||||
|
|
||||||
if args.dryrun:
|
dburl = "sqlite:///:memory:" if args.dryrun else conf.get("general", "dbpath")
|
||||||
dbpath = ":memory:"
|
dbobj = db.Adapter(dburl)
|
||||||
else:
|
|
||||||
dbpath = conf.get("general", "dbpath")
|
|
||||||
|
|
||||||
mydb = db.Adapter(db.SQLite3(dbpath), conf)
|
tc = threads.Controller(dbobj)
|
||||||
migrate.dispatch(args.type, mydb, args.dump)
|
cc = comments.Controller(dbobj)
|
||||||
|
|
||||||
|
if not cc.empty():
|
||||||
|
if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"):
|
||||||
|
raise SystemExit("Abort.")
|
||||||
|
|
||||||
|
migrate.dispatch(tc, cc, args.type, args.dump)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
if not any(conf.getiter("general", "host")):
|
if not any(conf.getiter("general", "host")):
|
||||||
|
112
isso/migrate.py
112
isso/migrate.py
@ -15,6 +15,8 @@ from collections import defaultdict
|
|||||||
from isso.utils import anonymize
|
from isso.utils import anonymize
|
||||||
from isso.compat import string_types
|
from isso.compat import string_types
|
||||||
|
|
||||||
|
from isso.controllers.comments import Invalid
|
||||||
|
|
||||||
try:
|
try:
|
||||||
input = raw_input
|
input = raw_input
|
||||||
except NameError:
|
except NameError:
|
||||||
@ -29,6 +31,7 @@ from xml.etree import ElementTree
|
|||||||
|
|
||||||
logger = logging.getLogger("isso")
|
logger = logging.getLogger("isso")
|
||||||
|
|
||||||
|
|
||||||
def strip(val):
|
def strip(val):
|
||||||
if isinstance(val, string_types):
|
if isinstance(val, string_types):
|
||||||
return val.strip()
|
return val.strip()
|
||||||
@ -67,33 +70,40 @@ class Disqus(object):
|
|||||||
ns = '{http://disqus.com}'
|
ns = '{http://disqus.com}'
|
||||||
internals = '{http://disqus.com/disqus-internals}'
|
internals = '{http://disqus.com/disqus-internals}'
|
||||||
|
|
||||||
def __init__(self, db, xmlfile):
|
def __init__(self, threads, comments):
|
||||||
self.threads = set([])
|
self.threads = threads
|
||||||
self.comments = set([])
|
self.comments = comments
|
||||||
|
|
||||||
self.db = db
|
self.dqthreads = set([])
|
||||||
self.xmlfile = xmlfile
|
self.dqcomments = set([])
|
||||||
|
|
||||||
def insert(self, thread, posts):
|
def insert(self, thread, posts):
|
||||||
|
|
||||||
path = urlparse(thread.find('%slink' % Disqus.ns).text).path
|
path = urlparse(thread.find('%slink' % Disqus.ns).text).path
|
||||||
remap = dict()
|
remap = dict()
|
||||||
|
|
||||||
if path not in self.db.threads:
|
th = self.threads.get(path)
|
||||||
self.db.threads.new(path, thread.find(Disqus.ns + 'title').text.strip())
|
if th is None:
|
||||||
|
th = self.threads.new(path, thread.find(Disqus.ns + 'title').text.strip())
|
||||||
|
|
||||||
for item in sorted(posts, key=lambda k: k['created']):
|
for data in sorted(posts, key=lambda k: k['created']):
|
||||||
|
remote_addr = data.pop('remote_addr')
|
||||||
|
|
||||||
dsq_id = item.pop('dsq:id')
|
dsq_id = data.pop('dsq:id')
|
||||||
item['parent'] = remap.get(item.pop('dsq:parent', None))
|
data['parent'] = remap.get(data.pop('dsq:parent', None))
|
||||||
rv = self.db.comments.add(path, item)
|
|
||||||
remap[dsq_id] = rv["id"]
|
|
||||||
|
|
||||||
self.comments.update(set(remap.keys()))
|
try:
|
||||||
|
rv = self.comments.new(remote_addr, th, data)
|
||||||
|
except Invalid :
|
||||||
|
logger.exception("Unable to insert comment `%s`", data)
|
||||||
|
else:
|
||||||
|
remap[dsq_id] = rv.id
|
||||||
|
|
||||||
def migrate(self):
|
self.dqcomments.update(set(remap.keys()))
|
||||||
|
|
||||||
tree = ElementTree.parse(self.xmlfile)
|
def migrate(self, xmlfile):
|
||||||
|
|
||||||
|
tree = ElementTree.parse(xmlfile)
|
||||||
res = defaultdict(list)
|
res = defaultdict(list)
|
||||||
|
|
||||||
for post in tree.findall(Disqus.ns + 'post'):
|
for post in tree.findall(Disqus.ns + 'post'):
|
||||||
@ -124,16 +134,17 @@ class Disqus(object):
|
|||||||
|
|
||||||
id = thread.attrib.get(Disqus.internals + 'id')
|
id = thread.attrib.get(Disqus.internals + 'id')
|
||||||
if id in res:
|
if id in res:
|
||||||
self.threads.add(id)
|
self.dqthreads.add(id)
|
||||||
self.insert(thread, res[id])
|
self.insert(thread, res[id])
|
||||||
|
|
||||||
# in case a comment has been deleted (and no further childs)
|
|
||||||
self.db.comments._remove_stale()
|
|
||||||
|
|
||||||
progress.finish("{0} threads, {1} comments".format(
|
progress.finish("{0} threads, {1} comments".format(
|
||||||
len(self.threads), len(self.comments)))
|
len(self.dqthreads), len(self.dqcomments)))
|
||||||
|
|
||||||
|
orphans = set(map(
|
||||||
|
lambda e: e.attrib.get(Disqus.internals + "id"),
|
||||||
|
tree.findall(Disqus.ns + "post"))
|
||||||
|
) - self.dqcomments
|
||||||
|
|
||||||
orphans = set(map(lambda e: e.attrib.get(Disqus.internals + "id"), tree.findall(Disqus.ns + "post"))) - self.comments
|
|
||||||
if orphans:
|
if orphans:
|
||||||
print("Found %i orphans:" % len(orphans))
|
print("Found %i orphans:" % len(orphans))
|
||||||
for post in tree.findall(Disqus.ns + "post"):
|
for post in tree.findall(Disqus.ns + "post"):
|
||||||
@ -153,18 +164,11 @@ class WordPress(object):
|
|||||||
|
|
||||||
ns = "{http://wordpress.org/export/1.0/}"
|
ns = "{http://wordpress.org/export/1.0/}"
|
||||||
|
|
||||||
def __init__(self, db, xmlfile):
|
def __init__(self, threads, comments):
|
||||||
self.db = db
|
self.threads = threads
|
||||||
self.xmlfile = xmlfile
|
self.comments = comments
|
||||||
self.count = 0
|
|
||||||
|
|
||||||
for line in io.open(xmlfile):
|
self.count = 0
|
||||||
m = WordPress.detect(line)
|
|
||||||
if m:
|
|
||||||
self.ns = WordPress.ns.replace("1.0", m.group(1))
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
logger.warn("No WXR namespace found, assuming 1.0")
|
|
||||||
|
|
||||||
def insert(self, thread):
|
def insert(self, thread):
|
||||||
|
|
||||||
@ -174,7 +178,7 @@ class WordPress(object):
|
|||||||
if url.query:
|
if url.query:
|
||||||
path += "?" + url.query
|
path += "?" + url.query
|
||||||
|
|
||||||
self.db.threads.new(path, thread.find("title").text.strip())
|
th = self.threads.new(path, thread.find("title").text.strip())
|
||||||
|
|
||||||
comments = list(map(self.Comment, thread.findall(self.ns + "comment")))
|
comments = list(map(self.Comment, thread.findall(self.ns + "comment")))
|
||||||
comments.sort(key=lambda k: k["id"])
|
comments.sort(key=lambda k: k["id"])
|
||||||
@ -185,25 +189,36 @@ class WordPress(object):
|
|||||||
self.count += len(ids)
|
self.count += len(ids)
|
||||||
|
|
||||||
while comments:
|
while comments:
|
||||||
for i, item in enumerate(comments):
|
for i, data in enumerate(comments):
|
||||||
if item["parent"] in ids:
|
if data["parent"] in ids:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
item["parent"] = remap.get(item["parent"], None)
|
_id = data["id"]
|
||||||
rv = self.db.comments.add(path, item)
|
data["parent"] = remap.get(data["parent"], None)
|
||||||
remap[item["id"]] = rv["id"]
|
try:
|
||||||
|
rv = self.comments.new(data.pop("remote_addr"), th, data)
|
||||||
ids.remove(item["id"])
|
except Invalid:
|
||||||
comments.pop(i)
|
logger.exception("Unable to insert comment `%s`", data)
|
||||||
|
else:
|
||||||
break
|
remap[_id] = rv.id
|
||||||
|
ids.remove(_id)
|
||||||
|
break
|
||||||
|
finally:
|
||||||
|
comments.pop(i)
|
||||||
else:
|
else:
|
||||||
# should never happen, but... it's WordPress.
|
# should never happen, but... it's WordPress.
|
||||||
return
|
return
|
||||||
|
|
||||||
def migrate(self):
|
def migrate(self, xmlfile):
|
||||||
|
for line in io.open(xmlfile):
|
||||||
|
m = WordPress.detect(line)
|
||||||
|
if m:
|
||||||
|
self.ns = WordPress.ns.replace("1.0", m.group(1))
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
logger.warn("No WXR namespace found, assuming 1.0")
|
||||||
|
|
||||||
tree = ElementTree.parse(self.xmlfile)
|
tree = ElementTree.parse(xmlfile)
|
||||||
|
|
||||||
skip = 0
|
skip = 0
|
||||||
items = tree.findall("channel/item")
|
items = tree.findall("channel/item")
|
||||||
@ -253,10 +268,7 @@ def autodetect(peek):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def dispatch(type, db, dump):
|
def dispatch(threads, comments, type, dump):
|
||||||
if db.execute("SELECT * FROM comments").fetchone():
|
|
||||||
if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"):
|
|
||||||
raise SystemExit("Abort.")
|
|
||||||
|
|
||||||
if type == "disqus":
|
if type == "disqus":
|
||||||
cls = Disqus
|
cls = Disqus
|
||||||
@ -269,4 +281,4 @@ def dispatch(type, db, dump):
|
|||||||
if cls is None:
|
if cls is None:
|
||||||
raise SystemExit("Unknown format, abort.")
|
raise SystemExit("Unknown format, abort.")
|
||||||
|
|
||||||
cls(db, dump).migrate()
|
cls(threads, comments).migrate(dump)
|
||||||
|
@ -5,73 +5,69 @@ from __future__ import unicode_literals
|
|||||||
import unittest
|
import unittest
|
||||||
from os.path import join, dirname
|
from os.path import join, dirname
|
||||||
|
|
||||||
from isso import config
|
from isso.db import Adapter
|
||||||
|
from isso.controllers import threads, comments
|
||||||
from isso.db import SQLite3, Adapter
|
|
||||||
from isso.migrate import Disqus, WordPress, autodetect
|
from isso.migrate import Disqus, WordPress, autodetect
|
||||||
|
|
||||||
conf = config.new({
|
|
||||||
"general": {
|
|
||||||
"dbpath": "/dev/null",
|
|
||||||
"max-age": "1h"
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
class TestMigration(unittest.TestCase):
|
class TestMigration(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
db = Adapter("sqlite:///:memory:")
|
||||||
|
self.threads = threads.Controller(db)
|
||||||
|
self.comments = comments.Controller(db)
|
||||||
|
|
||||||
def test_disqus(self):
|
def test_disqus(self):
|
||||||
|
|
||||||
xml = join(dirname(__file__), "disqus.xml")
|
Disqus(self.threads, self.comments).migrate(
|
||||||
|
join(dirname(__file__), "disqus.xml"))
|
||||||
|
|
||||||
db = Adapter(SQLite3(":memory:"), conf)
|
th = self.threads.get("/")
|
||||||
Disqus(db, xml).migrate()
|
self.assertIsNotNone(th)
|
||||||
|
self.assertEqual(th.title, "Hello, World!")
|
||||||
|
self.assertEqual(th.id, 1)
|
||||||
|
|
||||||
self.assertEqual(len(db.execute("SELECT id FROM comments").fetchall()), 2)
|
self.assertEqual(self.comments.count(th)[0], 2)
|
||||||
|
|
||||||
self.assertEqual(db.threads["/"]["title"], "Hello, World!")
|
a = self.comments.get(1)
|
||||||
self.assertEqual(db.threads["/"]["id"], 1)
|
self.assertIsNotNone(a)
|
||||||
|
|
||||||
a = db.comments.get(1)
|
self.assertEqual(a.author, "peter")
|
||||||
|
self.assertEqual(a.email, "foo@bar.com")
|
||||||
|
self.assertEqual(a.remote_addr, "127.0.0.0")
|
||||||
|
|
||||||
self.assertEqual(a["author"], "peter")
|
b = self.comments.get(2)
|
||||||
self.assertEqual(a["email"], "foo@bar.com")
|
self.assertEqual(b.parent, a.id)
|
||||||
self.assertEqual(a["remote_addr"], "127.0.0.0")
|
|
||||||
|
|
||||||
b = db.comments.get(2)
|
|
||||||
self.assertEqual(b["parent"], a["id"])
|
|
||||||
|
|
||||||
def test_wordpress(self):
|
def test_wordpress(self):
|
||||||
|
WordPress(self.threads, self.comments).migrate(
|
||||||
|
join(dirname(__file__), "wordpress.xml"))
|
||||||
|
|
||||||
xml = join(dirname(__file__), "wordpress.xml")
|
r = self.threads.get("/2014/test/")
|
||||||
|
self.assertEqual(r.title, "Hello, World…")
|
||||||
|
self.assertEqual(r.id, 1)
|
||||||
|
|
||||||
db = Adapter(SQLite3(":memory:"), conf)
|
s = self.threads.get("/?p=4")
|
||||||
WordPress(db, xml).migrate()
|
self.assertEqual(s.title, "...")
|
||||||
|
self.assertEqual(s.id, 2)
|
||||||
|
|
||||||
self.assertEqual(db.threads["/2014/test/"]["title"], "Hello, World…")
|
self.assertEqual(sum(self.comments.count(r, s)), 7)
|
||||||
self.assertEqual(db.threads["/2014/test/"]["id"], 1)
|
|
||||||
|
|
||||||
self.assertEqual(db.threads["/?p=4"]["title"], "...")
|
a = self.comments.get(1)
|
||||||
self.assertEqual(db.threads["/?p=4"]["id"], 2)
|
self.assertEqual(a.author, "Ohai")
|
||||||
|
self.assertEqual(a.text, "Erster!1")
|
||||||
|
self.assertEqual(a.remote_addr, "82.119.20.0")
|
||||||
|
|
||||||
self.assertEqual(len(db.execute("SELECT id FROM threads").fetchall()), 2)
|
b = self.comments.get(2)
|
||||||
self.assertEqual(len(db.execute("SELECT id FROM comments").fetchall()), 7)
|
self.assertEqual(b.author, "Tester")
|
||||||
|
self.assertEqual(b.text, "Zweiter.")
|
||||||
first = db.comments.get(1)
|
|
||||||
self.assertEqual(first["author"], "Ohai")
|
|
||||||
self.assertEqual(first["text"], "Erster!1")
|
|
||||||
self.assertEqual(first["remote_addr"], "82.119.20.0")
|
|
||||||
|
|
||||||
second = db.comments.get(2)
|
|
||||||
self.assertEqual(second["author"], "Tester")
|
|
||||||
self.assertEqual(second["text"], "Zweiter.")
|
|
||||||
|
|
||||||
for i in (3, 4, 5):
|
for i in (3, 4, 5):
|
||||||
self.assertEqual(db.comments.get(i)["parent"], second["id"])
|
self.assertEqual(self.comments.get(i).parent, b.id)
|
||||||
|
|
||||||
last = db.comments.get(6)
|
last = self.comments.get(6)
|
||||||
self.assertEqual(last["author"], "Letzter :/")
|
self.assertEqual(last.author, "Letzter :/")
|
||||||
self.assertEqual(last["parent"], None)
|
self.assertEqual(last.parent, None)
|
||||||
|
|
||||||
def test_detection(self):
|
def test_detection(self):
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user