add initial support to import WordPress comments
This commit is contained in:
parent
0154113c80
commit
12f8af8434
@ -207,6 +207,8 @@ def main():
|
||||
imprt.add_argument("dump", metavar="FILE")
|
||||
imprt.add_argument("-n", "--dry-run", dest="dryrun", action="store_true",
|
||||
help="perform a trial run with no changes made")
|
||||
imprt.add_argument("-t", "--type", dest="type", default=None,
|
||||
choices=["disqus", "wordpress"], help="export type")
|
||||
|
||||
serve = subparser.add_parser("run", help="run server")
|
||||
|
||||
@ -223,7 +225,7 @@ def main():
|
||||
dbpath = conf.get("general", "dbpath")
|
||||
|
||||
mydb = db.SQLite3(dbpath, conf)
|
||||
migrate.dispatch(mydb, args.dump)
|
||||
migrate.dispatch(args.type, mydb, args.dump)
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
|
128
isso/migrate.py
128
isso/migrate.py
@ -1,14 +1,18 @@
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
from __future__ import division
|
||||
from __future__ import division, print_function
|
||||
|
||||
import sys
|
||||
import os
|
||||
import io
|
||||
import textwrap
|
||||
|
||||
from time import mktime, strptime
|
||||
from time import mktime, strptime, time
|
||||
from collections import defaultdict
|
||||
|
||||
from isso.utils import anonymize
|
||||
from isso.compat import string_types
|
||||
|
||||
try:
|
||||
input = raw_input
|
||||
except NameError:
|
||||
@ -22,6 +26,39 @@ except ImportError:
|
||||
from xml.etree import ElementTree
|
||||
|
||||
|
||||
def strip(val):
|
||||
if isinstance(val, string_types):
|
||||
return val.strip()
|
||||
return val
|
||||
|
||||
|
||||
class Progress(object):
|
||||
|
||||
def __init__(self, end):
|
||||
self.end = end or 1
|
||||
|
||||
self.istty = sys.stdout.isatty()
|
||||
self.last = 0
|
||||
|
||||
def update(self, i, message):
|
||||
|
||||
if not self.istty or message is None:
|
||||
return
|
||||
|
||||
cols = int((os.popen('stty size', 'r').read()).split()[1])
|
||||
message = message[:cols - 7]
|
||||
|
||||
if time() - self.last > 0.2:
|
||||
sys.stdout.write("\r{0}".format(" " * cols))
|
||||
sys.stdout.write("\r[{0:.0%}] {1}".format(i/self.end, message))
|
||||
sys.stdout.flush()
|
||||
self.last = time()
|
||||
|
||||
def finish(self, message):
|
||||
self.last = 0
|
||||
self.update(self.end, message + "\n")
|
||||
|
||||
|
||||
class Disqus(object):
|
||||
|
||||
ns = '{http://disqus.com}'
|
||||
@ -116,9 +153,94 @@ class Disqus(object):
|
||||
print("")
|
||||
|
||||
|
||||
def dispatch(db, dump):
|
||||
class WordPress(object):
|
||||
|
||||
ns = "{http://wordpress.org/export/1.0/}"
|
||||
|
||||
def __init__(self, db, xmlfile):
|
||||
self.db = db
|
||||
self.xmlfile = xmlfile
|
||||
self.count = 0
|
||||
|
||||
def insert(self, thread):
|
||||
|
||||
path = urlparse(thread.find("link").text).path
|
||||
self.db.threads.new(path, thread.find("title").text.strip())
|
||||
|
||||
comments = list(map(WordPress.Comment, thread.findall(WordPress.ns + "comment")))
|
||||
comments.sort(key=lambda k: k["id"])
|
||||
|
||||
remap = {}
|
||||
ids = set(c["id"] for c in comments)
|
||||
|
||||
self.count += len(ids)
|
||||
|
||||
while comments:
|
||||
for i, item in enumerate(comments):
|
||||
if item["parent"] in ids:
|
||||
continue
|
||||
|
||||
item["parent"] = remap.get(item["parent"], None)
|
||||
rv = self.db.comments.add(path, item)
|
||||
remap[item["id"]] = rv["id"]
|
||||
|
||||
ids.remove(item["id"])
|
||||
comments.pop(i)
|
||||
|
||||
break
|
||||
else:
|
||||
# should never happen, but... it's WordPress.
|
||||
return
|
||||
|
||||
def migrate(self):
|
||||
|
||||
tree = ElementTree.parse(self.xmlfile)
|
||||
items = tree.findall("channel/item")
|
||||
|
||||
progress = Progress(len(items))
|
||||
for i, thread in enumerate(items):
|
||||
progress.update(i, thread.find("title").text)
|
||||
self.insert(thread)
|
||||
|
||||
progress.finish("{0} threads, {1} comments".format(len(items), self.count))
|
||||
|
||||
@classmethod
|
||||
def Comment(cls, el):
|
||||
return {
|
||||
"text": strip(el.find(WordPress.ns + "comment_content").text),
|
||||
"author": strip(el.find(WordPress.ns + "comment_author").text),
|
||||
"email": strip(el.find(WordPress.ns + "comment_author_email").text),
|
||||
"website": strip(el.find(WordPress.ns + "comment_author_url").text),
|
||||
"remote_addr": anonymize(
|
||||
strip(el.find(WordPress.ns + "comment_author_IP").text)),
|
||||
"created": mktime(strptime(
|
||||
strip(el.find(WordPress.ns + "comment_date_gmt").text),
|
||||
"%Y-%m-%d %H:%M:%S")),
|
||||
"mode": 1 if el.find(WordPress.ns + "comment_approved").text == "1" else 2,
|
||||
"id": int(el.find(WordPress.ns + "comment_id").text),
|
||||
"parent": int(el.find(WordPress.ns + "comment_parent").text) or None
|
||||
}
|
||||
|
||||
|
||||
def dispatch(type, db, dump):
|
||||
if db.execute("SELECT * FROM comments").fetchone():
|
||||
if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"):
|
||||
raise SystemExit("Abort.")
|
||||
|
||||
if type is None:
|
||||
|
||||
with io.open(dump) as fp:
|
||||
peek = fp.read(2048)
|
||||
|
||||
if 'xmlns:wp="%s"' % WordPress.ns[1:-1] in peek:
|
||||
type = "wordpress"
|
||||
|
||||
if '<disqus xmlns=' in peek:
|
||||
type = "disqus"
|
||||
|
||||
if type == "wordpress":
|
||||
WordPress(db, dump).migrate()
|
||||
elif type == "disqus":
|
||||
Disqus(db, dump).migrate()
|
||||
else:
|
||||
raise SystemExit("Unknown format, abort.")
|
||||
|
@ -11,7 +11,7 @@ from os.path import join, dirname
|
||||
from isso.core import Config
|
||||
|
||||
from isso.db import SQLite3
|
||||
from isso.migrate import Disqus
|
||||
from isso.migrate import Disqus, WordPress
|
||||
|
||||
|
||||
class TestMigration(unittest.TestCase):
|
||||
@ -33,13 +33,32 @@ class TestMigration(unittest.TestCase):
|
||||
self.assertEqual(a["email"], "foo@bar.com")
|
||||
|
||||
b = db.comments.get(2)
|
||||
self.assertEqual(b["parent"] ,a["id"])
|
||||
self.assertEqual(b["parent"], a["id"])
|
||||
|
||||
def test_wordpress(self):
|
||||
|
||||
a = db.comments.get(1)
|
||||
xml = join(dirname(__file__), "wordpress.xml")
|
||||
xxx = tempfile.NamedTemporaryFile()
|
||||
|
||||
assert a["author"] == "peter"
|
||||
assert a["email"] == "foo@bar.com"
|
||||
db = SQLite3(xxx.name, Config.load(None))
|
||||
WordPress(db, xml).migrate()
|
||||
|
||||
b = db.comments.get(2)
|
||||
assert b["parent"] == a["id"]
|
||||
self.assertEqual(db.threads["/2014/test/"]["title"], "Hello, World!")
|
||||
self.assertEqual(db.threads["/2014/test/"]["id"], 1)
|
||||
|
||||
self.assertEqual(len(db.execute("SELECT id FROM comments").fetchall()), 6)
|
||||
|
||||
first = db.comments.get(1)
|
||||
self.assertEqual(first["author"], "Ohai")
|
||||
self.assertEqual(first["text"], "Erster!1")
|
||||
|
||||
second = db.comments.get(2)
|
||||
self.assertEqual(second["author"], "Tester")
|
||||
self.assertEqual(second["text"], "Zweiter.")
|
||||
|
||||
for i in (3, 4, 5):
|
||||
self.assertEqual(db.comments.get(i)["parent"], second["id"])
|
||||
|
||||
last = db.comments.get(6)
|
||||
self.assertEqual(last["author"], "Letzter :/")
|
||||
self.assertEqual(last["parent"], None)
|
||||
|
119
isso/tests/wordpress.xml
Normal file
119
isso/tests/wordpress.xml
Normal file
@ -0,0 +1,119 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss version="2.0"
|
||||
xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:wp="http://wordpress.org/export/1.0/">
|
||||
<!-- This WXR dump is incomplete! It only contains elements needed for an
|
||||
import, a few are unused yet, but eventually useful later.
|
||||
|
||||
The <item> node is derived from a sort-of real-world WordPress blog,
|
||||
but modified to test various things.
|
||||
-->
|
||||
<channel>
|
||||
<item>
|
||||
<title>Hello, World!</title>
|
||||
<link>http://example.tld/2014/test/</link>
|
||||
<pubDate>Tue, 14 Jan 2014 17:31:03 +0000</pubDate>
|
||||
<dc:creator><![CDATA[Tester]]></dc:creator>
|
||||
<wp:post_id>18</wp:post_id>
|
||||
<wp:post_date>2014-01-14 17:31:03</wp:post_date>
|
||||
<wp:post_date_gmt>2014-01-14 17:31:03</wp:post_date_gmt>
|
||||
<wp:comment_status>open</wp:comment_status>
|
||||
<wp:post_name>test</wp:post_name>
|
||||
<wp:status>publish</wp:status>
|
||||
<wp:post_type>post</wp:post_type>
|
||||
<wp:comment>
|
||||
<wp:comment_id>2</wp:comment_id>
|
||||
<wp:comment_author><![CDATA[Ohai]]></wp:comment_author>
|
||||
<wp:comment_author_email>test@example.org
|
||||
</wp:comment_author_email>
|
||||
<wp:comment_author_url>http://example.tld/</wp:comment_author_url>
|
||||
<wp:comment_author_IP>::ffff:82.119.20.0</wp:comment_author_IP>
|
||||
<wp:comment_date>2014-01-14 17:32:12</wp:comment_date>
|
||||
<wp:comment_date_gmt>2014-01-14 17:32:12</wp:comment_date_gmt>
|
||||
<wp:comment_content>
|
||||
<![CDATA[Erster!1]]></wp:comment_content>
|
||||
<wp:comment_approved>1</wp:comment_approved>
|
||||
|
||||
<!-- what's that? -->
|
||||
<wp:comment_type></wp:comment_type>
|
||||
|
||||
<wp:comment_parent>0</wp:comment_parent>
|
||||
<wp:comment_user_id>0</wp:comment_user_id>
|
||||
</wp:comment>
|
||||
<wp:comment>
|
||||
<wp:comment_id>6</wp:comment_id>
|
||||
<wp:comment_author><![CDATA[Tester]]></wp:comment_author>
|
||||
<wp:comment_author_email>info@posativ.org
|
||||
</wp:comment_author_email>
|
||||
<wp:comment_author_url></wp:comment_author_url>
|
||||
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
|
||||
<wp:comment_date>2014-04-29 15:21:27</wp:comment_date>
|
||||
<wp:comment_date_gmt>2014-04-29 15:21:27</wp:comment_date_gmt>
|
||||
<wp:comment_content><![CDATA[Zweiter.]]></wp:comment_content>
|
||||
<wp:comment_approved>1</wp:comment_approved>
|
||||
<wp:comment_type></wp:comment_type>
|
||||
<wp:comment_parent>0</wp:comment_parent>
|
||||
<wp:comment_user_id>1</wp:comment_user_id>
|
||||
</wp:comment>
|
||||
<wp:comment>
|
||||
<wp:comment_id>7</wp:comment_id>
|
||||
<wp:comment_author><![CDATA[Tester]]></wp:comment_author>
|
||||
<wp:comment_author_email>info@posativ.org
|
||||
</wp:comment_author_email>
|
||||
<wp:comment_author_url></wp:comment_author_url>
|
||||
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
|
||||
<wp:comment_date_gmt>2014-04-29 15:21:35</wp:comment_date_gmt>
|
||||
<wp:comment_content><![CDATA[Drölfter!]]></wp:comment_content>
|
||||
<wp:comment_approved>1</wp:comment_approved>
|
||||
<wp:comment_parent>6</wp:comment_parent>
|
||||
</wp:comment>
|
||||
<wp:comment>
|
||||
<wp:comment_id>8</wp:comment_id>
|
||||
<wp:comment_author><![CDATA[Tester]]></wp:comment_author>
|
||||
<wp:comment_author_email>info@posativ.org
|
||||
</wp:comment_author_email>
|
||||
<wp:comment_author_url></wp:comment_author_url>
|
||||
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
|
||||
<wp:comment_date>2014-04-29 15:21:45</wp:comment_date>
|
||||
<wp:comment_date_gmt>2014-04-29 15:21:45</wp:comment_date_gmt>
|
||||
<wp:comment_content>
|
||||
<![CDATA[Yet another reply.]]></wp:comment_content>
|
||||
<wp:comment_approved>1</wp:comment_approved>
|
||||
<wp:comment_type></wp:comment_type>
|
||||
<wp:comment_parent>7</wp:comment_parent>
|
||||
<wp:comment_user_id>1</wp:comment_user_id>
|
||||
</wp:comment>
|
||||
<wp:comment>
|
||||
<wp:comment_id>9</wp:comment_id>
|
||||
<wp:comment_author><![CDATA[Tester]]></wp:comment_author>
|
||||
<wp:comment_author_email>info@posativ.org
|
||||
</wp:comment_author_email>
|
||||
<wp:comment_author_url></wp:comment_author_url>
|
||||
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
|
||||
<wp:comment_date>2014-04-29 15:21:52</wp:comment_date>
|
||||
<wp:comment_date_gmt>2014-04-29 15:21:52</wp:comment_date_gmt>
|
||||
<wp:comment_content><![CDATA[...]]></wp:comment_content>
|
||||
<wp:comment_approved>1</wp:comment_approved>
|
||||
<wp:comment_type></wp:comment_type>
|
||||
<wp:comment_parent>7</wp:comment_parent>
|
||||
<wp:comment_user_id>1</wp:comment_user_id>
|
||||
</wp:comment>
|
||||
<wp:comment>
|
||||
<wp:comment_id>10</wp:comment_id>
|
||||
<wp:comment_author><![CDATA[Letzter :/]]></wp:comment_author>
|
||||
<wp:comment_author_email>info@posativ.org
|
||||
</wp:comment_author_email>
|
||||
<wp:comment_author_url></wp:comment_author_url>
|
||||
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
|
||||
<wp:comment_date>2014-04-29 15:21:56</wp:comment_date>
|
||||
<wp:comment_date_gmt>2014-04-29 15:21:56</wp:comment_date_gmt>
|
||||
<wp:comment_content><![CDATA[...]]></wp:comment_content>
|
||||
<wp:comment_approved>1</wp:comment_approved>
|
||||
<wp:comment_type></wp:comment_type>
|
||||
<wp:comment_parent>0</wp:comment_parent>
|
||||
<wp:comment_user_id>1</wp:comment_user_id>
|
||||
</wp:comment>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
Loading…
Reference in New Issue
Block a user