Merge branch 'feature/75', closes #75

This commit is contained in:
Martin Zimmermann 2014-05-03 00:33:31 +02:00
commit 4ee509ed02
6 changed files with 485 additions and 107 deletions

View File

@ -61,24 +61,25 @@ For more options, see :doc:`server <configuration/server>` and :doc:`client
Migration
---------
You can migrate your existing comments from Disqus_. Log into Disqus, go to
your website, click on *Discussions* and select the *Export* tab. You'll
receive an email with your comments. Unfortunately, Disqus does not export
up- and downvotes.
You can import comments from Disqus_ or WordPress_.
To import existing comments, run Isso with your configuration file:
To export your comments from Disqus, log into Disqus, go to your website, click
on *Discussions* and select the *Export* tab. You'll receive an email with your
comments. Unfortunately, Disqus does not export up- and downvotes.
To export comments from your previous WordPress installation, go to *Tools*,
export your data. WordPress WXR import is quite new and may not work for you;
please report any failures.
Now import the XML dump:
.. code-block:: sh
~> isso -c /path/to/isso.cfg import user-2013-09-02T11_39_22.971478-all.xml
~> isso -c /path/to/isso.cfg import disqus-or-wordpress.xml
[100%] 53 threads, 192 comments
Migration from WordPress_ is not possible, yet (WordPress does not export
comments). As a workaround, install the Disqus plugin, export to Disqus and
then migrate to Isso.
.. _Disqus: <https://disqus.com/>
.. _Disqus: https://disqus.com/
.. _WordPress: https://wordpress.org/
Running Isso
------------

View File

@ -25,8 +25,8 @@
<p>Because comments are not Big Data.</p>
</li>
<li>
<p><strong>Disqus Import</strong></p>
<p>You can migrate your Disqus comments without any hassle.</p>
<p><strong>Disqus/WordPress Import</strong></p>
<p>You can migrate your Disqus/WordPress comments without any hassle.</p>
</li>
<li>
<p><strong>client-side JavaScript</strong></p>

View File

@ -207,6 +207,8 @@ def main():
imprt.add_argument("dump", metavar="FILE")
imprt.add_argument("-n", "--dry-run", dest="dryrun", action="store_true",
help="perform a trial run with no changes made")
imprt.add_argument("-t", "--type", dest="type", default=None,
choices=["disqus", "wordpress"], help="export type")
serve = subparser.add_parser("run", help="run server")
@ -214,11 +216,17 @@ def main():
conf = Config.load(args.conf)
if args.command == "import":
xxx = tempfile.NamedTemporaryFile()
dbpath = conf.get("general", "dbpath") if not args.dryrun else xxx.name
conf.set("guard", "enabled", "off")
migrate.disqus(db.SQLite3(dbpath, conf), args.dump)
if args.dryrun:
xxx = tempfile.NamedTemporaryFile()
dbpath = xxx.name
else:
dbpath = conf.get("general", "dbpath")
mydb = db.SQLite3(dbpath, conf)
migrate.dispatch(args.type, mydb, args.dump)
sys.exit(0)
if not any(conf.getiter("general", "host")):

View File

@ -1,14 +1,19 @@
# -*- encoding: utf-8 -*-
from __future__ import division
from __future__ import division, print_function
import sys
import os
import io
import re
import textwrap
from time import mktime, strptime
from time import mktime, strptime, time
from collections import defaultdict
from isso.utils import anonymize
from isso.compat import string_types
try:
input = raw_input
except NameError:
@ -21,94 +26,249 @@ except ImportError:
from xml.etree import ElementTree
ns = '{http://disqus.com}'
dsq = '{http://disqus.com/disqus-internals}'
threads = set([])
comments = set([])
def strip(val):
if isinstance(val, string_types):
return val.strip()
return val
def insert(db, thread, posts):
class Progress(object):
path = urlparse(thread.find('%slink' % ns).text).path
remap = dict()
def __init__(self, end):
self.end = end or 1
if path not in db.threads:
db.threads.new(path, thread.find('%stitle' % ns).text.strip())
self.istty = sys.stdout.isatty()
self.last = 0
for item in sorted(posts, key=lambda k: k['created']):
def update(self, i, message):
dsq_id = item.pop('dsq:id')
item['parent'] = remap.get(item.pop('dsq:parent', None))
rv = db.comments.add(path, item)
remap[dsq_id] = rv["id"]
if not self.istty or message is None:
return
comments.update(set(remap.keys()))
cols = int((os.popen('stty size', 'r').read()).split()[1])
message = message[:cols - 7]
def disqus(db, xmlfile):
if db.execute("SELECT * FROM comments").fetchone():
if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"):
raise SystemExit("Abort.")
tree = ElementTree.parse(xmlfile)
res = defaultdict(list)
for post in tree.findall('%spost' % ns):
item = {
'dsq:id': post.attrib.get(dsq + 'id'),
'text': post.find('%smessage' % ns).text,
'author': post.find('%sauthor/%sname' % (ns, ns)).text,
'email': post.find('%sauthor/%semail' % (ns, ns)).text,
'created': mktime(strptime(
post.find('%screatedAt' % ns).text, '%Y-%m-%dT%H:%M:%SZ')),
'remote_addr': '127.0.0.0',
'mode': 1 if post.find("%sisDeleted" % ns).text == "false" else 4
}
if post.find(ns + 'parent') is not None:
item['dsq:parent'] = post.find(ns + 'parent').attrib.get(dsq + 'id')
res[post.find('%sthread' % ns).attrib.get(dsq + 'id')].append(item)
num = len(tree.findall('%sthread' % ns))
cols = int((os.popen('stty size', 'r').read() or "25 80").split()[1])
for i, thread in enumerate(tree.findall('%sthread' % ns)):
if int(round((i+1)/num, 2) * 100) % 13 == 0:
sys.stdout.write("\r%s" % (" "*cols))
sys.stdout.write("\r[%i%%] %s" % (((i+1)/num * 100), thread.find('%sid' % ns).text))
if time() - self.last > 0.2:
sys.stdout.write("\r{0}".format(" " * cols))
sys.stdout.write("\r[{0:.0%}] {1}".format(i/self.end, message))
sys.stdout.flush()
self.last = time()
# skip (possibly?) duplicate, but empty thread elements
if thread.find('%sid' % ns).text is None:
continue
def finish(self, message):
self.last = 0
self.update(self.end, message + "\n")
id = thread.attrib.get(dsq + 'id')
if id in res:
threads.add(id)
insert(db, thread, res[id])
# in case a comment has been deleted (and no further childs)
db.comments._remove_stale()
class Disqus(object):
sys.stdout.write("\r%s" % (" "*cols))
sys.stdout.write("\r[100%%] %i threads, %i comments\n" % (len(threads), len(comments)))
ns = '{http://disqus.com}'
internals = '{http://disqus.com/disqus-internals}'
orphans = set(map(lambda e: e.attrib.get(dsq + "id"), tree.findall("%spost" % ns))) - comments
if orphans:
print("Found %i orphans:" % len(orphans))
for post in tree.findall("%spost" % ns):
if post.attrib.get(dsq + "id") not in orphans:
def __init__(self, db, xmlfile):
self.threads = set([])
self.comments = set([])
self.db = db
self.xmlfile = xmlfile
def insert(self, thread, posts):
path = urlparse(thread.find('%slink' % Disqus.ns).text).path
remap = dict()
if path not in self.db.threads:
self.db.threads.new(path, thread.find(Disqus.ns + 'title').text.strip())
for item in sorted(posts, key=lambda k: k['created']):
dsq_id = item.pop('dsq:id')
item['parent'] = remap.get(item.pop('dsq:parent', None))
rv = self.db.comments.add(path, item)
remap[dsq_id] = rv["id"]
self.comments.update(set(remap.keys()))
def migrate(self):
tree = ElementTree.parse(self.xmlfile)
res = defaultdict(list)
for post in tree.findall(Disqus.ns + 'post'):
item = {
'dsq:id': post.attrib.get(Disqus.internals + 'id'),
'text': post.find(Disqus.ns + 'message').text,
'author': post.find('{0}author/{0}name'.format(Disqus.ns)).text,
'email': post.find('{0}author/{0}email'.format(Disqus.ns)).text,
'created': mktime(strptime(
post.find(Disqus.ns + 'createdAt').text, '%Y-%m-%dT%H:%M:%SZ')),
'remote_addr': anonymize(post.find(Disqus.ns + 'ipAddress').text),
'mode': 1 if post.find(Disqus.ns + "isDeleted").text == "false" else 4
}
if post.find(Disqus.ns + 'parent') is not None:
item['dsq:parent'] = post.find(Disqus.ns + 'parent').attrib.get(Disqus.internals + 'id')
res[post.find('%sthread' % Disqus.ns).attrib.get(Disqus.internals + 'id')].append(item)
progress = Progress(len(tree.findall(Disqus.ns + 'thread')))
for i, thread in enumerate(tree.findall(Disqus.ns + 'thread')):
progress.update(i, thread.find(Disqus.ns + 'id').text)
# skip (possibly?) duplicate, but empty thread elements
if thread.find(Disqus.ns + 'id').text is None:
continue
print(" * %s by %s <%s>" % (post.attrib.get(dsq + "id"),
post.find("%sauthor/%sname" % (ns, ns)).text,
post.find("%sauthor/%semail" % (ns, ns)).text))
print(textwrap.fill(post.find("%smessage" % ns).text,
initial_indent=" ", subsequent_indent=" "))
print("")
id = thread.attrib.get(Disqus.internals + 'id')
if id in res:
self.threads.add(id)
self.insert(thread, res[id])
# in case a comment has been deleted (and no further childs)
self.db.comments._remove_stale()
progress.finish("{0} threads, {1} comments".format(
len(self.threads), len(self.comments)))
orphans = set(map(lambda e: e.attrib.get(Disqus.internals + "id"), tree.findall(Disqus.ns + "post"))) - self.comments
if orphans:
print("Found %i orphans:" % len(orphans))
for post in tree.findall(Disqus.ns + "post"):
if post.attrib.get(Disqus.internals + "id") not in orphans:
continue
print(" * {0} by {1} <{2}>".format(
post.attrib.get(Disqus.internals + "id"),
post.find("{0}author/{0}name".format(Disqus.ns)).text,
post.find("{0}author/{0}email".format(Disqus.ns)).text))
print(textwrap.fill(post.find(Disqus.ns + "message").text,
initial_indent=" ", subsequent_indent=" "))
print("")
@classmethod
def detect(cls, peek):
if 'xmlns="http://disqus.com' in peek:
return "http://disqus.com"
return None
class WordPress(object):
ns = "{http://wordpress.org/export/1.0/}"
def __init__(self, db, xmlfile):
self.db = db
self.xmlfile = xmlfile
self.count = 0
with io.open(xmlfile) as fp:
ns = WordPress.detect(fp.read(io.DEFAULT_BUFFER_SIZE))
if ns:
self.ns = "{" + ns + "}"
def insert(self, thread):
url = urlparse(thread.find("link").text)
path = url.path
if url.query:
path += "?" + url.query
self.db.threads.new(path, thread.find("title").text.strip())
comments = list(map(self.Comment, thread.findall(self.ns + "comment")))
comments.sort(key=lambda k: k["id"])
remap = {}
ids = set(c["id"] for c in comments)
self.count += len(ids)
while comments:
for i, item in enumerate(comments):
if item["parent"] in ids:
continue
item["parent"] = remap.get(item["parent"], None)
rv = self.db.comments.add(path, item)
remap[item["id"]] = rv["id"]
ids.remove(item["id"])
comments.pop(i)
break
else:
# should never happen, but... it's WordPress.
return
def migrate(self):
tree = ElementTree.parse(self.xmlfile)
skip = 0
items = tree.findall("channel/item")
progress = Progress(len(items))
for i, thread in enumerate(items):
if thread.find("title").text is None or thread.find(self.ns + "comment") is None:
skip += 1
continue
progress.update(i, thread.find("title").text)
self.insert(thread)
progress.finish("{0} threads, {1} comments".format(
len(items) - skip, self.count))
def Comment(self, el):
return {
"text": strip(el.find(self.ns + "comment_content").text),
"author": strip(el.find(self.ns + "comment_author").text),
"email": strip(el.find(self.ns + "comment_author_email").text),
"website": strip(el.find(self.ns + "comment_author_url").text),
"remote_addr": anonymize(
strip(el.find(self.ns + "comment_author_IP").text)),
"created": mktime(strptime(
strip(el.find(self.ns + "comment_date_gmt").text),
"%Y-%m-%d %H:%M:%S")),
"mode": 1 if el.find(self.ns + "comment_approved").text == "1" else 2,
"id": int(el.find(self.ns + "comment_id").text),
"parent": int(el.find(self.ns + "comment_parent").text) or None
}
@classmethod
def detect(cls, peek):
m = re.search("http://wordpress.org/export/1\.\d/", peek)
if m:
return m.group(0)
return None
def dispatch(type, db, dump):
if db.execute("SELECT * FROM comments").fetchone():
if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"):
raise SystemExit("Abort.")
if type is None:
with io.open(dump) as fp:
peek = fp.read(io.DEFAULT_BUFFER_SIZE)
if WordPress.detect(peek):
type = "wordpress"
if Disqus.detect(peek):
type = "disqus"
if type == "wordpress":
WordPress(db, dump).migrate()
elif type == "disqus":
Disqus(db, dump).migrate()
else:
raise SystemExit("Unknown format, abort.")

View File

@ -1,30 +1,95 @@
# -*- encoding: utf-8 -*-
from __future__ import unicode_literals
try:
import unittest2 as unittest
except ImportError:
import unittest
import tempfile
from os.path import join, dirname
from isso.core import Config
from isso.db import SQLite3
from isso.migrate import disqus
from isso.migrate import Disqus, WordPress
def test_disqus():
class TestMigration(unittest.TestCase):
xml = join(dirname(__file__), "disqus.xml")
xxx = tempfile.NamedTemporaryFile()
def test_disqus(self):
db = SQLite3(xxx.name, Config.load(None))
disqus(db, xml)
xml = join(dirname(__file__), "disqus.xml")
xxx = tempfile.NamedTemporaryFile()
assert db.threads["/"]["title"] == "Hello, World!"
assert db.threads["/"]["id"] == 1
db = SQLite3(xxx.name, Config.load(None))
Disqus(db, xml).migrate()
self.assertEqual(len(db.execute("SELECT id FROM comments").fetchall()), 2)
a = db.comments.get(1)
self.assertEqual(db.threads["/"]["title"], "Hello, World!")
self.assertEqual(db.threads["/"]["id"], 1)
assert a["author"] == "peter"
assert a["email"] == "foo@bar.com"
a = db.comments.get(1)
b = db.comments.get(2)
assert b["parent"] == a["id"]
self.assertEqual(a["author"], "peter")
self.assertEqual(a["email"], "foo@bar.com")
self.assertEqual(a["remote_addr"], "127.0.0.0")
b = db.comments.get(2)
self.assertEqual(b["parent"], a["id"])
def test_wordpress(self):
xml = join(dirname(__file__), "wordpress.xml")
xxx = tempfile.NamedTemporaryFile()
db = SQLite3(xxx.name, Config.load(None))
WordPress(db, xml).migrate()
self.assertEqual(db.threads["/2014/test/"]["title"], "Hello, World!")
self.assertEqual(db.threads["/2014/test/"]["id"], 1)
self.assertEqual(db.threads["/?p=4"]["title"], "...")
self.assertEqual(db.threads["/?p=4"]["id"], 2)
self.assertEqual(len(db.execute("SELECT id FROM threads").fetchall()), 2)
self.assertEqual(len(db.execute("SELECT id FROM comments").fetchall()), 7)
first = db.comments.get(1)
self.assertEqual(first["author"], "Ohai")
self.assertEqual(first["text"], "Erster!1")
self.assertEqual(first["remote_addr"], "82.119.20.0")
second = db.comments.get(2)
self.assertEqual(second["author"], "Tester")
self.assertEqual(second["text"], "Zweiter.")
for i in (3, 4, 5):
self.assertEqual(db.comments.get(i)["parent"], second["id"])
last = db.comments.get(6)
self.assertEqual(last["author"], "Letzter :/")
self.assertEqual(last["parent"], None)
def test_detection(self):
wp = """\
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/%s/">"""
self.assertEqual(WordPress.detect(wp % "invalid"), None)
for version in ("1.0", "1.1", "1.2", "1.3"):
self.assertEqual(WordPress.detect(wp % version),
"http://wordpress.org/export/%s/" % version)
dq = '''\
<?xml version="1.0"?>
<disqus xmlns="http://disqus.com"
xmlns:dsq="http://disqus.com/disqus-internals"'''
self.assertIsNotNone(Disqus.detect(dq))

144
isso/tests/wordpress.xml Normal file
View File

@ -0,0 +1,144 @@
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.0/">
<!-- This WXR dump is incomplete! It only contains elements needed for an
import, a few are unused yet, but eventually useful later.
The <item> node is derived from a sort-of real-world WordPress blog,
but modified to test various things.
-->
<channel>
<item>
<title>Hello, World!</title>
<link>http://example.tld/2014/test/</link>
<pubDate>Tue, 14 Jan 2014 17:31:03 +0000</pubDate>
<dc:creator><![CDATA[Tester]]></dc:creator>
<wp:post_id>18</wp:post_id>
<wp:post_date>2014-01-14 17:31:03</wp:post_date>
<wp:post_date_gmt>2014-01-14 17:31:03</wp:post_date_gmt>
<wp:comment_status>open</wp:comment_status>
<wp:post_name>test</wp:post_name>
<wp:status>publish</wp:status>
<wp:post_type>post</wp:post_type>
<wp:comment>
<wp:comment_id>2</wp:comment_id>
<wp:comment_author><![CDATA[Ohai]]></wp:comment_author>
<wp:comment_author_email>test@example.org
</wp:comment_author_email>
<wp:comment_author_url>http://example.tld/</wp:comment_author_url>
<wp:comment_author_IP>::ffff:82.119.20.0</wp:comment_author_IP>
<wp:comment_date>2014-01-14 17:32:12</wp:comment_date>
<wp:comment_date_gmt>2014-01-14 17:32:12</wp:comment_date_gmt>
<wp:comment_content>
<![CDATA[Erster!1]]></wp:comment_content>
<wp:comment_approved>1</wp:comment_approved>
<!-- what's that? -->
<wp:comment_type></wp:comment_type>
<wp:comment_parent>0</wp:comment_parent>
<wp:comment_user_id>0</wp:comment_user_id>
</wp:comment>
<wp:comment>
<wp:comment_id>6</wp:comment_id>
<wp:comment_author><![CDATA[Tester]]></wp:comment_author>
<wp:comment_author_email>info@posativ.org
</wp:comment_author_email>
<wp:comment_author_url></wp:comment_author_url>
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
<wp:comment_date>2014-04-29 15:21:27</wp:comment_date>
<wp:comment_date_gmt>2014-04-29 15:21:27</wp:comment_date_gmt>
<wp:comment_content><![CDATA[Zweiter.]]></wp:comment_content>
<wp:comment_approved>1</wp:comment_approved>
<wp:comment_type></wp:comment_type>
<wp:comment_parent>0</wp:comment_parent>
<wp:comment_user_id>1</wp:comment_user_id>
</wp:comment>
<wp:comment>
<wp:comment_id>7</wp:comment_id>
<wp:comment_author><![CDATA[Tester]]></wp:comment_author>
<wp:comment_author_email>info@posativ.org
</wp:comment_author_email>
<wp:comment_author_url></wp:comment_author_url>
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
<wp:comment_date_gmt>2014-04-29 15:21:35</wp:comment_date_gmt>
<wp:comment_content><![CDATA[Drölfter!]]></wp:comment_content>
<wp:comment_approved>1</wp:comment_approved>
<wp:comment_parent>6</wp:comment_parent>
</wp:comment>
<wp:comment>
<wp:comment_id>8</wp:comment_id>
<wp:comment_author><![CDATA[Tester]]></wp:comment_author>
<wp:comment_author_email>info@posativ.org
</wp:comment_author_email>
<wp:comment_author_url></wp:comment_author_url>
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
<wp:comment_date>2014-04-29 15:21:45</wp:comment_date>
<wp:comment_date_gmt>2014-04-29 15:21:45</wp:comment_date_gmt>
<wp:comment_content>
<![CDATA[Yet another reply.]]></wp:comment_content>
<wp:comment_approved>1</wp:comment_approved>
<wp:comment_type></wp:comment_type>
<wp:comment_parent>7</wp:comment_parent>
<wp:comment_user_id>1</wp:comment_user_id>
</wp:comment>
<wp:comment>
<wp:comment_id>9</wp:comment_id>
<wp:comment_author><![CDATA[Tester]]></wp:comment_author>
<wp:comment_author_email>info@posativ.org
</wp:comment_author_email>
<wp:comment_author_url></wp:comment_author_url>
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
<wp:comment_date>2014-04-29 15:21:52</wp:comment_date>
<wp:comment_date_gmt>2014-04-29 15:21:52</wp:comment_date_gmt>
<wp:comment_content><![CDATA[...]]></wp:comment_content>
<wp:comment_approved>1</wp:comment_approved>
<wp:comment_type></wp:comment_type>
<wp:comment_parent>7</wp:comment_parent>
<wp:comment_user_id>1</wp:comment_user_id>
</wp:comment>
<wp:comment>
<wp:comment_id>10</wp:comment_id>
<wp:comment_author><![CDATA[Letzter :/]]></wp:comment_author>
<wp:comment_author_email>info@posativ.org
</wp:comment_author_email>
<wp:comment_author_url></wp:comment_author_url>
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
<wp:comment_date>2014-04-29 15:21:56</wp:comment_date>
<wp:comment_date_gmt>2014-04-29 15:21:56</wp:comment_date_gmt>
<wp:comment_content><![CDATA[...]]></wp:comment_content>
<wp:comment_approved>1</wp:comment_approved>
<wp:comment_type></wp:comment_type>
<wp:comment_parent>0</wp:comment_parent>
<wp:comment_user_id>1</wp:comment_user_id>
</wp:comment>
</item>
<!-- handle ?p=X urls -->
<item>
<title>...</title>
<link>http://example.tld/?p=4</link>
<wp:comment>
<wp:comment_id>11</wp:comment_id>
<wp:comment_author><![CDATA[Anonymous]]></wp:comment_author>
<wp:comment_author_email>info@posativ.org
</wp:comment_author_email>
<wp:comment_author_url></wp:comment_author_url>
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
<wp:comment_date>2014-04-29 15:21:56</wp:comment_date>
<wp:comment_date_gmt>2014-04-29 15:21:57</wp:comment_date_gmt>
<wp:comment_content><![CDATA[...]]></wp:comment_content>
<wp:comment_approved>1</wp:comment_approved>
<wp:comment_type></wp:comment_type>
<wp:comment_parent>0</wp:comment_parent>
<wp:comment_user_id>1</wp:comment_user_id>
</wp:comment>
</item>
<item>
<title>No comments</title>
<link>http://example.tld/?p=6</link>
</item>
</channel>
</rss>