Merge branch 'feature/75', closes #75

pull/83/merge
Martin Zimmermann 10 years ago
commit 4ee509ed02

@ -61,24 +61,25 @@ For more options, see :doc:`server <configuration/server>` and :doc:`client
Migration
---------
You can migrate your existing comments from Disqus_. Log into Disqus, go to
your website, click on *Discussions* and select the *Export* tab. You'll
receive an email with your comments. Unfortunately, Disqus does not export
up- and downvotes.
You can import comments from Disqus_ or WordPress_.
To import existing comments, run Isso with your configuration file:
To export your comments from Disqus, log into Disqus, go to your website, click
on *Discussions* and select the *Export* tab. You'll receive an email with your
comments. Unfortunately, Disqus does not export up- and downvotes.
.. code-block:: sh
To export comments from your previous WordPress installation, go to *Tools*,
export your data. WordPress WXR import is quite new and may not work for you;
please report any failures.
~> isso -c /path/to/isso.cfg import user-2013-09-02T11_39_22.971478-all.xml
[100%] 53 threads, 192 comments
Now import the XML dump:
Migration from WordPress_ is not possible, yet (WordPress does not export
comments). As a workaround, install the Disqus plugin, export to Disqus and
then migrate to Isso.
.. code-block:: sh
.. _Disqus: <https://disqus.com/>
~> isso -c /path/to/isso.cfg import disqus-or-wordpress.xml
[100%] 53 threads, 192 comments
.. _Disqus: https://disqus.com/
.. _WordPress: https://wordpress.org/
Running Isso
------------

@ -25,8 +25,8 @@
<p>Because comments are not Big Data.</p>
</li>
<li>
<p><strong>Disqus Import</strong></p>
<p>You can migrate your Disqus comments without any hassle.</p>
<p><strong>Disqus/WordPress Import</strong></p>
<p>You can migrate your Disqus/WordPress comments without any hassle.</p>
</li>
<li>
<p><strong>client-side JavaScript</strong></p>

@ -207,6 +207,8 @@ def main():
imprt.add_argument("dump", metavar="FILE")
imprt.add_argument("-n", "--dry-run", dest="dryrun", action="store_true",
help="perform a trial run with no changes made")
imprt.add_argument("-t", "--type", dest="type", default=None,
choices=["disqus", "wordpress"], help="export type")
serve = subparser.add_parser("run", help="run server")
@ -214,11 +216,17 @@ def main():
conf = Config.load(args.conf)
if args.command == "import":
xxx = tempfile.NamedTemporaryFile()
dbpath = conf.get("general", "dbpath") if not args.dryrun else xxx.name
conf.set("guard", "enabled", "off")
migrate.disqus(db.SQLite3(dbpath, conf), args.dump)
if args.dryrun:
xxx = tempfile.NamedTemporaryFile()
dbpath = xxx.name
else:
dbpath = conf.get("general", "dbpath")
mydb = db.SQLite3(dbpath, conf)
migrate.dispatch(args.type, mydb, args.dump)
sys.exit(0)
if not any(conf.getiter("general", "host")):

@ -1,14 +1,19 @@
# -*- encoding: utf-8 -*-
from __future__ import division
from __future__ import division, print_function
import sys
import os
import io
import re
import textwrap
from time import mktime, strptime
from time import mktime, strptime, time
from collections import defaultdict
from isso.utils import anonymize
from isso.compat import string_types
try:
input = raw_input
except NameError:
@ -21,94 +26,249 @@ except ImportError:
from xml.etree import ElementTree
ns = '{http://disqus.com}'
dsq = '{http://disqus.com/disqus-internals}'
threads = set([])
comments = set([])
def strip(val):
if isinstance(val, string_types):
return val.strip()
return val
def insert(db, thread, posts):
class Progress(object):
path = urlparse(thread.find('%slink' % ns).text).path
remap = dict()
def __init__(self, end):
self.end = end or 1
if path not in db.threads:
db.threads.new(path, thread.find('%stitle' % ns).text.strip())
self.istty = sys.stdout.isatty()
self.last = 0
for item in sorted(posts, key=lambda k: k['created']):
def update(self, i, message):
dsq_id = item.pop('dsq:id')
item['parent'] = remap.get(item.pop('dsq:parent', None))
rv = db.comments.add(path, item)
remap[dsq_id] = rv["id"]
if not self.istty or message is None:
return
comments.update(set(remap.keys()))
cols = int((os.popen('stty size', 'r').read()).split()[1])
message = message[:cols - 7]
if time() - self.last > 0.2:
sys.stdout.write("\r{0}".format(" " * cols))
sys.stdout.write("\r[{0:.0%}] {1}".format(i/self.end, message))
sys.stdout.flush()
self.last = time()
def disqus(db, xmlfile):
def finish(self, message):
self.last = 0
self.update(self.end, message + "\n")
if db.execute("SELECT * FROM comments").fetchone():
if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"):
raise SystemExit("Abort.")
tree = ElementTree.parse(xmlfile)
res = defaultdict(list)
class Disqus(object):
for post in tree.findall('%spost' % ns):
ns = '{http://disqus.com}'
internals = '{http://disqus.com/disqus-internals}'
item = {
'dsq:id': post.attrib.get(dsq + 'id'),
'text': post.find('%smessage' % ns).text,
'author': post.find('%sauthor/%sname' % (ns, ns)).text,
'email': post.find('%sauthor/%semail' % (ns, ns)).text,
'created': mktime(strptime(
post.find('%screatedAt' % ns).text, '%Y-%m-%dT%H:%M:%SZ')),
'remote_addr': '127.0.0.0',
'mode': 1 if post.find("%sisDeleted" % ns).text == "false" else 4
}
def __init__(self, db, xmlfile):
self.threads = set([])
self.comments = set([])
if post.find(ns + 'parent') is not None:
item['dsq:parent'] = post.find(ns + 'parent').attrib.get(dsq + 'id')
self.db = db
self.xmlfile = xmlfile
res[post.find('%sthread' % ns).attrib.get(dsq + 'id')].append(item)
def insert(self, thread, posts):
num = len(tree.findall('%sthread' % ns))
cols = int((os.popen('stty size', 'r').read() or "25 80").split()[1])
path = urlparse(thread.find('%slink' % Disqus.ns).text).path
remap = dict()
for i, thread in enumerate(tree.findall('%sthread' % ns)):
if path not in self.db.threads:
self.db.threads.new(path, thread.find(Disqus.ns + 'title').text.strip())
if int(round((i+1)/num, 2) * 100) % 13 == 0:
for item in sorted(posts, key=lambda k: k['created']):
sys.stdout.write("\r%s" % (" "*cols))
sys.stdout.write("\r[%i%%] %s" % (((i+1)/num * 100), thread.find('%sid' % ns).text))
sys.stdout.flush()
dsq_id = item.pop('dsq:id')
item['parent'] = remap.get(item.pop('dsq:parent', None))
rv = self.db.comments.add(path, item)
remap[dsq_id] = rv["id"]
self.comments.update(set(remap.keys()))
def migrate(self):
tree = ElementTree.parse(self.xmlfile)
res = defaultdict(list)
# skip (possibly?) duplicate, but empty thread elements
if thread.find('%sid' % ns).text is None:
continue
for post in tree.findall(Disqus.ns + 'post'):
id = thread.attrib.get(dsq + 'id')
if id in res:
threads.add(id)
insert(db, thread, res[id])
item = {
'dsq:id': post.attrib.get(Disqus.internals + 'id'),
'text': post.find(Disqus.ns + 'message').text,
'author': post.find('{0}author/{0}name'.format(Disqus.ns)).text,
'email': post.find('{0}author/{0}email'.format(Disqus.ns)).text,
'created': mktime(strptime(
post.find(Disqus.ns + 'createdAt').text, '%Y-%m-%dT%H:%M:%SZ')),
'remote_addr': anonymize(post.find(Disqus.ns + 'ipAddress').text),
'mode': 1 if post.find(Disqus.ns + "isDeleted").text == "false" else 4
}
# in case a comment has been deleted (and no further childs)
db.comments._remove_stale()
if post.find(Disqus.ns + 'parent') is not None:
item['dsq:parent'] = post.find(Disqus.ns + 'parent').attrib.get(Disqus.internals + 'id')
sys.stdout.write("\r%s" % (" "*cols))
sys.stdout.write("\r[100%%] %i threads, %i comments\n" % (len(threads), len(comments)))
res[post.find('%sthread' % Disqus.ns).attrib.get(Disqus.internals + 'id')].append(item)
orphans = set(map(lambda e: e.attrib.get(dsq + "id"), tree.findall("%spost" % ns))) - comments
if orphans:
print("Found %i orphans:" % len(orphans))
for post in tree.findall("%spost" % ns):
if post.attrib.get(dsq + "id") not in orphans:
progress = Progress(len(tree.findall(Disqus.ns + 'thread')))
for i, thread in enumerate(tree.findall(Disqus.ns + 'thread')):
progress.update(i, thread.find(Disqus.ns + 'id').text)
# skip (possibly?) duplicate, but empty thread elements
if thread.find(Disqus.ns + 'id').text is None:
continue
print(" * %s by %s <%s>" % (post.attrib.get(dsq + "id"),
post.find("%sauthor/%sname" % (ns, ns)).text,
post.find("%sauthor/%semail" % (ns, ns)).text))
print(textwrap.fill(post.find("%smessage" % ns).text,
initial_indent=" ", subsequent_indent=" "))
print("")
id = thread.attrib.get(Disqus.internals + 'id')
if id in res:
self.threads.add(id)
self.insert(thread, res[id])
# in case a comment has been deleted (and no further childs)
self.db.comments._remove_stale()
progress.finish("{0} threads, {1} comments".format(
len(self.threads), len(self.comments)))
orphans = set(map(lambda e: e.attrib.get(Disqus.internals + "id"), tree.findall(Disqus.ns + "post"))) - self.comments
if orphans:
print("Found %i orphans:" % len(orphans))
for post in tree.findall(Disqus.ns + "post"):
if post.attrib.get(Disqus.internals + "id") not in orphans:
continue
print(" * {0} by {1} <{2}>".format(
post.attrib.get(Disqus.internals + "id"),
post.find("{0}author/{0}name".format(Disqus.ns)).text,
post.find("{0}author/{0}email".format(Disqus.ns)).text))
print(textwrap.fill(post.find(Disqus.ns + "message").text,
initial_indent=" ", subsequent_indent=" "))
print("")
@classmethod
def detect(cls, peek):
if 'xmlns="http://disqus.com' in peek:
return "http://disqus.com"
return None
class WordPress(object):
ns = "{http://wordpress.org/export/1.0/}"
def __init__(self, db, xmlfile):
self.db = db
self.xmlfile = xmlfile
self.count = 0
with io.open(xmlfile) as fp:
ns = WordPress.detect(fp.read(io.DEFAULT_BUFFER_SIZE))
if ns:
self.ns = "{" + ns + "}"
def insert(self, thread):
url = urlparse(thread.find("link").text)
path = url.path
if url.query:
path += "?" + url.query
self.db.threads.new(path, thread.find("title").text.strip())
comments = list(map(self.Comment, thread.findall(self.ns + "comment")))
comments.sort(key=lambda k: k["id"])
remap = {}
ids = set(c["id"] for c in comments)
self.count += len(ids)
while comments:
for i, item in enumerate(comments):
if item["parent"] in ids:
continue
item["parent"] = remap.get(item["parent"], None)
rv = self.db.comments.add(path, item)
remap[item["id"]] = rv["id"]
ids.remove(item["id"])
comments.pop(i)
break
else:
# should never happen, but... it's WordPress.
return
def migrate(self):
tree = ElementTree.parse(self.xmlfile)
skip = 0
items = tree.findall("channel/item")
progress = Progress(len(items))
for i, thread in enumerate(items):
if thread.find("title").text is None or thread.find(self.ns + "comment") is None:
skip += 1
continue
progress.update(i, thread.find("title").text)
self.insert(thread)
progress.finish("{0} threads, {1} comments".format(
len(items) - skip, self.count))
def Comment(self, el):
return {
"text": strip(el.find(self.ns + "comment_content").text),
"author": strip(el.find(self.ns + "comment_author").text),
"email": strip(el.find(self.ns + "comment_author_email").text),
"website": strip(el.find(self.ns + "comment_author_url").text),
"remote_addr": anonymize(
strip(el.find(self.ns + "comment_author_IP").text)),
"created": mktime(strptime(
strip(el.find(self.ns + "comment_date_gmt").text),
"%Y-%m-%d %H:%M:%S")),
"mode": 1 if el.find(self.ns + "comment_approved").text == "1" else 2,
"id": int(el.find(self.ns + "comment_id").text),
"parent": int(el.find(self.ns + "comment_parent").text) or None
}
@classmethod
def detect(cls, peek):
m = re.search("http://wordpress.org/export/1\.\d/", peek)
if m:
return m.group(0)
return None
def dispatch(type, db, dump):
if db.execute("SELECT * FROM comments").fetchone():
if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"):
raise SystemExit("Abort.")
if type is None:
with io.open(dump) as fp:
peek = fp.read(io.DEFAULT_BUFFER_SIZE)
if WordPress.detect(peek):
type = "wordpress"
if Disqus.detect(peek):
type = "disqus"
if type == "wordpress":
WordPress(db, dump).migrate()
elif type == "disqus":
Disqus(db, dump).migrate()
else:
raise SystemExit("Unknown format, abort.")

@ -1,30 +1,95 @@
# -*- encoding: utf-8 -*-
from __future__ import unicode_literals
try:
import unittest2 as unittest
except ImportError:
import unittest
import tempfile
from os.path import join, dirname
from isso.core import Config
from isso.db import SQLite3
from isso.migrate import disqus
from isso.migrate import Disqus, WordPress
class TestMigration(unittest.TestCase):
def test_disqus(self):
xml = join(dirname(__file__), "disqus.xml")
xxx = tempfile.NamedTemporaryFile()
db = SQLite3(xxx.name, Config.load(None))
Disqus(db, xml).migrate()
self.assertEqual(len(db.execute("SELECT id FROM comments").fetchall()), 2)
self.assertEqual(db.threads["/"]["title"], "Hello, World!")
self.assertEqual(db.threads["/"]["id"], 1)
a = db.comments.get(1)
self.assertEqual(a["author"], "peter")
self.assertEqual(a["email"], "foo@bar.com")
self.assertEqual(a["remote_addr"], "127.0.0.0")
b = db.comments.get(2)
self.assertEqual(b["parent"], a["id"])
def test_wordpress(self):
xml = join(dirname(__file__), "wordpress.xml")
xxx = tempfile.NamedTemporaryFile()
db = SQLite3(xxx.name, Config.load(None))
WordPress(db, xml).migrate()
self.assertEqual(db.threads["/2014/test/"]["title"], "Hello, World!")
self.assertEqual(db.threads["/2014/test/"]["id"], 1)
self.assertEqual(db.threads["/?p=4"]["title"], "...")
self.assertEqual(db.threads["/?p=4"]["id"], 2)
self.assertEqual(len(db.execute("SELECT id FROM threads").fetchall()), 2)
self.assertEqual(len(db.execute("SELECT id FROM comments").fetchall()), 7)
first = db.comments.get(1)
self.assertEqual(first["author"], "Ohai")
self.assertEqual(first["text"], "Erster!1")
self.assertEqual(first["remote_addr"], "82.119.20.0")
def test_disqus():
second = db.comments.get(2)
self.assertEqual(second["author"], "Tester")
self.assertEqual(second["text"], "Zweiter.")
xml = join(dirname(__file__), "disqus.xml")
xxx = tempfile.NamedTemporaryFile()
for i in (3, 4, 5):
self.assertEqual(db.comments.get(i)["parent"], second["id"])
db = SQLite3(xxx.name, Config.load(None))
disqus(db, xml)
last = db.comments.get(6)
self.assertEqual(last["author"], "Letzter :/")
self.assertEqual(last["parent"], None)
assert db.threads["/"]["title"] == "Hello, World!"
assert db.threads["/"]["id"] == 1
def test_detection(self):
wp = """\
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/%s/">"""
a = db.comments.get(1)
self.assertEqual(WordPress.detect(wp % "invalid"), None)
assert a["author"] == "peter"
assert a["email"] == "foo@bar.com"
for version in ("1.0", "1.1", "1.2", "1.3"):
self.assertEqual(WordPress.detect(wp % version),
"http://wordpress.org/export/%s/" % version)
b = db.comments.get(2)
assert b["parent"] == a["id"]
dq = '''\
<?xml version="1.0"?>
<disqus xmlns="http://disqus.com"
xmlns:dsq="http://disqus.com/disqus-internals"'''
self.assertIsNotNone(Disqus.detect(dq))

@ -0,0 +1,144 @@
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.0/">
<!-- This WXR dump is incomplete! It only contains elements needed for an
import, a few are unused yet, but eventually useful later.
The <item> node is derived from a sort-of real-world WordPress blog,
but modified to test various things.
-->
<channel>
<item>
<title>Hello, World!</title>
<link>http://example.tld/2014/test/</link>
<pubDate>Tue, 14 Jan 2014 17:31:03 +0000</pubDate>
<dc:creator><![CDATA[Tester]]></dc:creator>
<wp:post_id>18</wp:post_id>
<wp:post_date>2014-01-14 17:31:03</wp:post_date>
<wp:post_date_gmt>2014-01-14 17:31:03</wp:post_date_gmt>
<wp:comment_status>open</wp:comment_status>
<wp:post_name>test</wp:post_name>
<wp:status>publish</wp:status>
<wp:post_type>post</wp:post_type>
<wp:comment>
<wp:comment_id>2</wp:comment_id>
<wp:comment_author><![CDATA[Ohai]]></wp:comment_author>
<wp:comment_author_email>test@example.org
</wp:comment_author_email>
<wp:comment_author_url>http://example.tld/</wp:comment_author_url>
<wp:comment_author_IP>::ffff:82.119.20.0</wp:comment_author_IP>
<wp:comment_date>2014-01-14 17:32:12</wp:comment_date>
<wp:comment_date_gmt>2014-01-14 17:32:12</wp:comment_date_gmt>
<wp:comment_content>
<![CDATA[Erster!1]]></wp:comment_content>
<wp:comment_approved>1</wp:comment_approved>
<!-- what's that? -->
<wp:comment_type></wp:comment_type>
<wp:comment_parent>0</wp:comment_parent>
<wp:comment_user_id>0</wp:comment_user_id>
</wp:comment>
<wp:comment>
<wp:comment_id>6</wp:comment_id>
<wp:comment_author><![CDATA[Tester]]></wp:comment_author>
<wp:comment_author_email>info@posativ.org
</wp:comment_author_email>
<wp:comment_author_url></wp:comment_author_url>
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
<wp:comment_date>2014-04-29 15:21:27</wp:comment_date>
<wp:comment_date_gmt>2014-04-29 15:21:27</wp:comment_date_gmt>
<wp:comment_content><![CDATA[Zweiter.]]></wp:comment_content>
<wp:comment_approved>1</wp:comment_approved>
<wp:comment_type></wp:comment_type>
<wp:comment_parent>0</wp:comment_parent>
<wp:comment_user_id>1</wp:comment_user_id>
</wp:comment>
<wp:comment>
<wp:comment_id>7</wp:comment_id>
<wp:comment_author><![CDATA[Tester]]></wp:comment_author>
<wp:comment_author_email>info@posativ.org
</wp:comment_author_email>
<wp:comment_author_url></wp:comment_author_url>
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
<wp:comment_date_gmt>2014-04-29 15:21:35</wp:comment_date_gmt>
<wp:comment_content><![CDATA[Drölfter!]]></wp:comment_content>
<wp:comment_approved>1</wp:comment_approved>
<wp:comment_parent>6</wp:comment_parent>
</wp:comment>
<wp:comment>
<wp:comment_id>8</wp:comment_id>
<wp:comment_author><![CDATA[Tester]]></wp:comment_author>
<wp:comment_author_email>info@posativ.org
</wp:comment_author_email>
<wp:comment_author_url></wp:comment_author_url>
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
<wp:comment_date>2014-04-29 15:21:45</wp:comment_date>
<wp:comment_date_gmt>2014-04-29 15:21:45</wp:comment_date_gmt>
<wp:comment_content>
<![CDATA[Yet another reply.]]></wp:comment_content>
<wp:comment_approved>1</wp:comment_approved>
<wp:comment_type></wp:comment_type>
<wp:comment_parent>7</wp:comment_parent>
<wp:comment_user_id>1</wp:comment_user_id>
</wp:comment>
<wp:comment>
<wp:comment_id>9</wp:comment_id>
<wp:comment_author><![CDATA[Tester]]></wp:comment_author>
<wp:comment_author_email>info@posativ.org
</wp:comment_author_email>
<wp:comment_author_url></wp:comment_author_url>
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
<wp:comment_date>2014-04-29 15:21:52</wp:comment_date>
<wp:comment_date_gmt>2014-04-29 15:21:52</wp:comment_date_gmt>
<wp:comment_content><![CDATA[...]]></wp:comment_content>
<wp:comment_approved>1</wp:comment_approved>
<wp:comment_type></wp:comment_type>
<wp:comment_parent>7</wp:comment_parent>
<wp:comment_user_id>1</wp:comment_user_id>
</wp:comment>
<wp:comment>
<wp:comment_id>10</wp:comment_id>
<wp:comment_author><![CDATA[Letzter :/]]></wp:comment_author>
<wp:comment_author_email>info@posativ.org
</wp:comment_author_email>
<wp:comment_author_url></wp:comment_author_url>
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
<wp:comment_date>2014-04-29 15:21:56</wp:comment_date>
<wp:comment_date_gmt>2014-04-29 15:21:56</wp:comment_date_gmt>
<wp:comment_content><![CDATA[...]]></wp:comment_content>
<wp:comment_approved>1</wp:comment_approved>
<wp:comment_type></wp:comment_type>
<wp:comment_parent>0</wp:comment_parent>
<wp:comment_user_id>1</wp:comment_user_id>
</wp:comment>
</item>
<!-- handle ?p=X urls -->
<item>
<title>...</title>
<link>http://example.tld/?p=4</link>
<wp:comment>
<wp:comment_id>11</wp:comment_id>
<wp:comment_author><![CDATA[Anonymous]]></wp:comment_author>
<wp:comment_author_email>info@posativ.org
</wp:comment_author_email>
<wp:comment_author_url></wp:comment_author_url>
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
<wp:comment_date>2014-04-29 15:21:56</wp:comment_date>
<wp:comment_date_gmt>2014-04-29 15:21:57</wp:comment_date_gmt>
<wp:comment_content><![CDATA[...]]></wp:comment_content>
<wp:comment_approved>1</wp:comment_approved>
<wp:comment_type></wp:comment_type>
<wp:comment_parent>0</wp:comment_parent>
<wp:comment_user_id>1</wp:comment_user_id>
</wp:comment>
</item>
<item>
<title>No comments</title>
<link>http://example.tld/?p=6</link>
</item>
</channel>
</rss>
Loading…
Cancel
Save