add initial support to import WordPress comments
This commit is contained in:
parent
0154113c80
commit
12f8af8434
@ -207,6 +207,8 @@ def main():
|
|||||||
imprt.add_argument("dump", metavar="FILE")
|
imprt.add_argument("dump", metavar="FILE")
|
||||||
imprt.add_argument("-n", "--dry-run", dest="dryrun", action="store_true",
|
imprt.add_argument("-n", "--dry-run", dest="dryrun", action="store_true",
|
||||||
help="perform a trial run with no changes made")
|
help="perform a trial run with no changes made")
|
||||||
|
imprt.add_argument("-t", "--type", dest="type", default=None,
|
||||||
|
choices=["disqus", "wordpress"], help="export type")
|
||||||
|
|
||||||
serve = subparser.add_parser("run", help="run server")
|
serve = subparser.add_parser("run", help="run server")
|
||||||
|
|
||||||
@ -223,7 +225,7 @@ def main():
|
|||||||
dbpath = conf.get("general", "dbpath")
|
dbpath = conf.get("general", "dbpath")
|
||||||
|
|
||||||
mydb = db.SQLite3(dbpath, conf)
|
mydb = db.SQLite3(dbpath, conf)
|
||||||
migrate.dispatch(mydb, args.dump)
|
migrate.dispatch(args.type, mydb, args.dump)
|
||||||
|
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
|
128
isso/migrate.py
128
isso/migrate.py
@ -1,14 +1,18 @@
|
|||||||
# -*- encoding: utf-8 -*-
|
# -*- encoding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import division
|
from __future__ import division, print_function
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
import io
|
||||||
import textwrap
|
import textwrap
|
||||||
|
|
||||||
from time import mktime, strptime
|
from time import mktime, strptime, time
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
|
from isso.utils import anonymize
|
||||||
|
from isso.compat import string_types
|
||||||
|
|
||||||
try:
|
try:
|
||||||
input = raw_input
|
input = raw_input
|
||||||
except NameError:
|
except NameError:
|
||||||
@ -22,6 +26,39 @@ except ImportError:
|
|||||||
from xml.etree import ElementTree
|
from xml.etree import ElementTree
|
||||||
|
|
||||||
|
|
||||||
|
def strip(val):
|
||||||
|
if isinstance(val, string_types):
|
||||||
|
return val.strip()
|
||||||
|
return val
|
||||||
|
|
||||||
|
|
||||||
|
class Progress(object):
|
||||||
|
|
||||||
|
def __init__(self, end):
|
||||||
|
self.end = end or 1
|
||||||
|
|
||||||
|
self.istty = sys.stdout.isatty()
|
||||||
|
self.last = 0
|
||||||
|
|
||||||
|
def update(self, i, message):
|
||||||
|
|
||||||
|
if not self.istty or message is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
cols = int((os.popen('stty size', 'r').read()).split()[1])
|
||||||
|
message = message[:cols - 7]
|
||||||
|
|
||||||
|
if time() - self.last > 0.2:
|
||||||
|
sys.stdout.write("\r{0}".format(" " * cols))
|
||||||
|
sys.stdout.write("\r[{0:.0%}] {1}".format(i/self.end, message))
|
||||||
|
sys.stdout.flush()
|
||||||
|
self.last = time()
|
||||||
|
|
||||||
|
def finish(self, message):
|
||||||
|
self.last = 0
|
||||||
|
self.update(self.end, message + "\n")
|
||||||
|
|
||||||
|
|
||||||
class Disqus(object):
|
class Disqus(object):
|
||||||
|
|
||||||
ns = '{http://disqus.com}'
|
ns = '{http://disqus.com}'
|
||||||
@ -116,9 +153,94 @@ class Disqus(object):
|
|||||||
print("")
|
print("")
|
||||||
|
|
||||||
|
|
||||||
def dispatch(db, dump):
|
class WordPress(object):
|
||||||
|
|
||||||
|
ns = "{http://wordpress.org/export/1.0/}"
|
||||||
|
|
||||||
|
def __init__(self, db, xmlfile):
|
||||||
|
self.db = db
|
||||||
|
self.xmlfile = xmlfile
|
||||||
|
self.count = 0
|
||||||
|
|
||||||
|
def insert(self, thread):
|
||||||
|
|
||||||
|
path = urlparse(thread.find("link").text).path
|
||||||
|
self.db.threads.new(path, thread.find("title").text.strip())
|
||||||
|
|
||||||
|
comments = list(map(WordPress.Comment, thread.findall(WordPress.ns + "comment")))
|
||||||
|
comments.sort(key=lambda k: k["id"])
|
||||||
|
|
||||||
|
remap = {}
|
||||||
|
ids = set(c["id"] for c in comments)
|
||||||
|
|
||||||
|
self.count += len(ids)
|
||||||
|
|
||||||
|
while comments:
|
||||||
|
for i, item in enumerate(comments):
|
||||||
|
if item["parent"] in ids:
|
||||||
|
continue
|
||||||
|
|
||||||
|
item["parent"] = remap.get(item["parent"], None)
|
||||||
|
rv = self.db.comments.add(path, item)
|
||||||
|
remap[item["id"]] = rv["id"]
|
||||||
|
|
||||||
|
ids.remove(item["id"])
|
||||||
|
comments.pop(i)
|
||||||
|
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# should never happen, but... it's WordPress.
|
||||||
|
return
|
||||||
|
|
||||||
|
def migrate(self):
|
||||||
|
|
||||||
|
tree = ElementTree.parse(self.xmlfile)
|
||||||
|
items = tree.findall("channel/item")
|
||||||
|
|
||||||
|
progress = Progress(len(items))
|
||||||
|
for i, thread in enumerate(items):
|
||||||
|
progress.update(i, thread.find("title").text)
|
||||||
|
self.insert(thread)
|
||||||
|
|
||||||
|
progress.finish("{0} threads, {1} comments".format(len(items), self.count))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def Comment(cls, el):
|
||||||
|
return {
|
||||||
|
"text": strip(el.find(WordPress.ns + "comment_content").text),
|
||||||
|
"author": strip(el.find(WordPress.ns + "comment_author").text),
|
||||||
|
"email": strip(el.find(WordPress.ns + "comment_author_email").text),
|
||||||
|
"website": strip(el.find(WordPress.ns + "comment_author_url").text),
|
||||||
|
"remote_addr": anonymize(
|
||||||
|
strip(el.find(WordPress.ns + "comment_author_IP").text)),
|
||||||
|
"created": mktime(strptime(
|
||||||
|
strip(el.find(WordPress.ns + "comment_date_gmt").text),
|
||||||
|
"%Y-%m-%d %H:%M:%S")),
|
||||||
|
"mode": 1 if el.find(WordPress.ns + "comment_approved").text == "1" else 2,
|
||||||
|
"id": int(el.find(WordPress.ns + "comment_id").text),
|
||||||
|
"parent": int(el.find(WordPress.ns + "comment_parent").text) or None
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def dispatch(type, db, dump):
|
||||||
if db.execute("SELECT * FROM comments").fetchone():
|
if db.execute("SELECT * FROM comments").fetchone():
|
||||||
if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"):
|
if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"):
|
||||||
raise SystemExit("Abort.")
|
raise SystemExit("Abort.")
|
||||||
|
|
||||||
|
if type is None:
|
||||||
|
|
||||||
|
with io.open(dump) as fp:
|
||||||
|
peek = fp.read(2048)
|
||||||
|
|
||||||
|
if 'xmlns:wp="%s"' % WordPress.ns[1:-1] in peek:
|
||||||
|
type = "wordpress"
|
||||||
|
|
||||||
|
if '<disqus xmlns=' in peek:
|
||||||
|
type = "disqus"
|
||||||
|
|
||||||
|
if type == "wordpress":
|
||||||
|
WordPress(db, dump).migrate()
|
||||||
|
elif type == "disqus":
|
||||||
Disqus(db, dump).migrate()
|
Disqus(db, dump).migrate()
|
||||||
|
else:
|
||||||
|
raise SystemExit("Unknown format, abort.")
|
||||||
|
@ -11,7 +11,7 @@ from os.path import join, dirname
|
|||||||
from isso.core import Config
|
from isso.core import Config
|
||||||
|
|
||||||
from isso.db import SQLite3
|
from isso.db import SQLite3
|
||||||
from isso.migrate import Disqus
|
from isso.migrate import Disqus, WordPress
|
||||||
|
|
||||||
|
|
||||||
class TestMigration(unittest.TestCase):
|
class TestMigration(unittest.TestCase):
|
||||||
@ -35,11 +35,30 @@ class TestMigration(unittest.TestCase):
|
|||||||
b = db.comments.get(2)
|
b = db.comments.get(2)
|
||||||
self.assertEqual(b["parent"], a["id"])
|
self.assertEqual(b["parent"], a["id"])
|
||||||
|
|
||||||
|
def test_wordpress(self):
|
||||||
|
|
||||||
a = db.comments.get(1)
|
xml = join(dirname(__file__), "wordpress.xml")
|
||||||
|
xxx = tempfile.NamedTemporaryFile()
|
||||||
|
|
||||||
assert a["author"] == "peter"
|
db = SQLite3(xxx.name, Config.load(None))
|
||||||
assert a["email"] == "foo@bar.com"
|
WordPress(db, xml).migrate()
|
||||||
|
|
||||||
b = db.comments.get(2)
|
self.assertEqual(db.threads["/2014/test/"]["title"], "Hello, World!")
|
||||||
assert b["parent"] == a["id"]
|
self.assertEqual(db.threads["/2014/test/"]["id"], 1)
|
||||||
|
|
||||||
|
self.assertEqual(len(db.execute("SELECT id FROM comments").fetchall()), 6)
|
||||||
|
|
||||||
|
first = db.comments.get(1)
|
||||||
|
self.assertEqual(first["author"], "Ohai")
|
||||||
|
self.assertEqual(first["text"], "Erster!1")
|
||||||
|
|
||||||
|
second = db.comments.get(2)
|
||||||
|
self.assertEqual(second["author"], "Tester")
|
||||||
|
self.assertEqual(second["text"], "Zweiter.")
|
||||||
|
|
||||||
|
for i in (3, 4, 5):
|
||||||
|
self.assertEqual(db.comments.get(i)["parent"], second["id"])
|
||||||
|
|
||||||
|
last = db.comments.get(6)
|
||||||
|
self.assertEqual(last["author"], "Letzter :/")
|
||||||
|
self.assertEqual(last["parent"], None)
|
||||||
|
119
isso/tests/wordpress.xml
Normal file
119
isso/tests/wordpress.xml
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<rss version="2.0"
|
||||||
|
xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns:wp="http://wordpress.org/export/1.0/">
|
||||||
|
<!-- This WXR dump is incomplete! It only contains elements needed for an
|
||||||
|
import, a few are unused yet, but eventually useful later.
|
||||||
|
|
||||||
|
The <item> node is derived from a sort-of real-world WordPress blog,
|
||||||
|
but modified to test various things.
|
||||||
|
-->
|
||||||
|
<channel>
|
||||||
|
<item>
|
||||||
|
<title>Hello, World!</title>
|
||||||
|
<link>http://example.tld/2014/test/</link>
|
||||||
|
<pubDate>Tue, 14 Jan 2014 17:31:03 +0000</pubDate>
|
||||||
|
<dc:creator><![CDATA[Tester]]></dc:creator>
|
||||||
|
<wp:post_id>18</wp:post_id>
|
||||||
|
<wp:post_date>2014-01-14 17:31:03</wp:post_date>
|
||||||
|
<wp:post_date_gmt>2014-01-14 17:31:03</wp:post_date_gmt>
|
||||||
|
<wp:comment_status>open</wp:comment_status>
|
||||||
|
<wp:post_name>test</wp:post_name>
|
||||||
|
<wp:status>publish</wp:status>
|
||||||
|
<wp:post_type>post</wp:post_type>
|
||||||
|
<wp:comment>
|
||||||
|
<wp:comment_id>2</wp:comment_id>
|
||||||
|
<wp:comment_author><![CDATA[Ohai]]></wp:comment_author>
|
||||||
|
<wp:comment_author_email>test@example.org
|
||||||
|
</wp:comment_author_email>
|
||||||
|
<wp:comment_author_url>http://example.tld/</wp:comment_author_url>
|
||||||
|
<wp:comment_author_IP>::ffff:82.119.20.0</wp:comment_author_IP>
|
||||||
|
<wp:comment_date>2014-01-14 17:32:12</wp:comment_date>
|
||||||
|
<wp:comment_date_gmt>2014-01-14 17:32:12</wp:comment_date_gmt>
|
||||||
|
<wp:comment_content>
|
||||||
|
<![CDATA[Erster!1]]></wp:comment_content>
|
||||||
|
<wp:comment_approved>1</wp:comment_approved>
|
||||||
|
|
||||||
|
<!-- what's that? -->
|
||||||
|
<wp:comment_type></wp:comment_type>
|
||||||
|
|
||||||
|
<wp:comment_parent>0</wp:comment_parent>
|
||||||
|
<wp:comment_user_id>0</wp:comment_user_id>
|
||||||
|
</wp:comment>
|
||||||
|
<wp:comment>
|
||||||
|
<wp:comment_id>6</wp:comment_id>
|
||||||
|
<wp:comment_author><![CDATA[Tester]]></wp:comment_author>
|
||||||
|
<wp:comment_author_email>info@posativ.org
|
||||||
|
</wp:comment_author_email>
|
||||||
|
<wp:comment_author_url></wp:comment_author_url>
|
||||||
|
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
|
||||||
|
<wp:comment_date>2014-04-29 15:21:27</wp:comment_date>
|
||||||
|
<wp:comment_date_gmt>2014-04-29 15:21:27</wp:comment_date_gmt>
|
||||||
|
<wp:comment_content><![CDATA[Zweiter.]]></wp:comment_content>
|
||||||
|
<wp:comment_approved>1</wp:comment_approved>
|
||||||
|
<wp:comment_type></wp:comment_type>
|
||||||
|
<wp:comment_parent>0</wp:comment_parent>
|
||||||
|
<wp:comment_user_id>1</wp:comment_user_id>
|
||||||
|
</wp:comment>
|
||||||
|
<wp:comment>
|
||||||
|
<wp:comment_id>7</wp:comment_id>
|
||||||
|
<wp:comment_author><![CDATA[Tester]]></wp:comment_author>
|
||||||
|
<wp:comment_author_email>info@posativ.org
|
||||||
|
</wp:comment_author_email>
|
||||||
|
<wp:comment_author_url></wp:comment_author_url>
|
||||||
|
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
|
||||||
|
<wp:comment_date_gmt>2014-04-29 15:21:35</wp:comment_date_gmt>
|
||||||
|
<wp:comment_content><![CDATA[Drölfter!]]></wp:comment_content>
|
||||||
|
<wp:comment_approved>1</wp:comment_approved>
|
||||||
|
<wp:comment_parent>6</wp:comment_parent>
|
||||||
|
</wp:comment>
|
||||||
|
<wp:comment>
|
||||||
|
<wp:comment_id>8</wp:comment_id>
|
||||||
|
<wp:comment_author><![CDATA[Tester]]></wp:comment_author>
|
||||||
|
<wp:comment_author_email>info@posativ.org
|
||||||
|
</wp:comment_author_email>
|
||||||
|
<wp:comment_author_url></wp:comment_author_url>
|
||||||
|
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
|
||||||
|
<wp:comment_date>2014-04-29 15:21:45</wp:comment_date>
|
||||||
|
<wp:comment_date_gmt>2014-04-29 15:21:45</wp:comment_date_gmt>
|
||||||
|
<wp:comment_content>
|
||||||
|
<![CDATA[Yet another reply.]]></wp:comment_content>
|
||||||
|
<wp:comment_approved>1</wp:comment_approved>
|
||||||
|
<wp:comment_type></wp:comment_type>
|
||||||
|
<wp:comment_parent>7</wp:comment_parent>
|
||||||
|
<wp:comment_user_id>1</wp:comment_user_id>
|
||||||
|
</wp:comment>
|
||||||
|
<wp:comment>
|
||||||
|
<wp:comment_id>9</wp:comment_id>
|
||||||
|
<wp:comment_author><![CDATA[Tester]]></wp:comment_author>
|
||||||
|
<wp:comment_author_email>info@posativ.org
|
||||||
|
</wp:comment_author_email>
|
||||||
|
<wp:comment_author_url></wp:comment_author_url>
|
||||||
|
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
|
||||||
|
<wp:comment_date>2014-04-29 15:21:52</wp:comment_date>
|
||||||
|
<wp:comment_date_gmt>2014-04-29 15:21:52</wp:comment_date_gmt>
|
||||||
|
<wp:comment_content><![CDATA[...]]></wp:comment_content>
|
||||||
|
<wp:comment_approved>1</wp:comment_approved>
|
||||||
|
<wp:comment_type></wp:comment_type>
|
||||||
|
<wp:comment_parent>7</wp:comment_parent>
|
||||||
|
<wp:comment_user_id>1</wp:comment_user_id>
|
||||||
|
</wp:comment>
|
||||||
|
<wp:comment>
|
||||||
|
<wp:comment_id>10</wp:comment_id>
|
||||||
|
<wp:comment_author><![CDATA[Letzter :/]]></wp:comment_author>
|
||||||
|
<wp:comment_author_email>info@posativ.org
|
||||||
|
</wp:comment_author_email>
|
||||||
|
<wp:comment_author_url></wp:comment_author_url>
|
||||||
|
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
|
||||||
|
<wp:comment_date>2014-04-29 15:21:56</wp:comment_date>
|
||||||
|
<wp:comment_date_gmt>2014-04-29 15:21:56</wp:comment_date_gmt>
|
||||||
|
<wp:comment_content><![CDATA[...]]></wp:comment_content>
|
||||||
|
<wp:comment_approved>1</wp:comment_approved>
|
||||||
|
<wp:comment_type></wp:comment_type>
|
||||||
|
<wp:comment_parent>0</wp:comment_parent>
|
||||||
|
<wp:comment_user_id>1</wp:comment_user_id>
|
||||||
|
</wp:comment>
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>
|
Loading…
Reference in New Issue
Block a user