handle WP's query-string "pages" and variable WXR namespaces
Site links such as /?p=1234 are imported *as is* and maybe do work in Isso. Do not use a query-based URL structure as permalinks. Ever. Also, depending on the pages you are going to export, WXR' XML namespace may change from ../export/1.0/ to ../export/1.2/. Isso tries to import any WXR 1.x
This commit is contained in:
parent
333bba728b
commit
123ea26ca9
@ -68,7 +68,8 @@ on *Discussions* and select the *Export* tab. You'll receive an email with your
|
||||
comments. Unfortunately, Disqus does not export up- and downvotes.
|
||||
|
||||
To export comments from your previous WordPress installation, go to *Tools*,
|
||||
export your data.
|
||||
export your data. WordPress WXR import is quite new and may not work for you;
|
||||
please report any failures.
|
||||
|
||||
Now import the XML dump:
|
||||
|
||||
|
@ -5,6 +5,7 @@ from __future__ import division, print_function
|
||||
import sys
|
||||
import os
|
||||
import io
|
||||
import re
|
||||
import textwrap
|
||||
|
||||
from time import mktime, strptime, time
|
||||
@ -145,6 +146,14 @@ class Disqus(object):
|
||||
initial_indent=" ", subsequent_indent=" "))
|
||||
print("")
|
||||
|
||||
@classmethod
|
||||
def detect(cls, peek):
|
||||
|
||||
if 'xmlns="http://disqus.com' in peek:
|
||||
return "http://disqus.com"
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class WordPress(object):
|
||||
|
||||
@ -155,12 +164,23 @@ class WordPress(object):
|
||||
self.xmlfile = xmlfile
|
||||
self.count = 0
|
||||
|
||||
with io.open(xmlfile) as fp:
|
||||
ns = WordPress.detect(fp.read(io.DEFAULT_BUFFER_SIZE))
|
||||
|
||||
if ns:
|
||||
self.ns = "{" + ns + "}"
|
||||
|
||||
def insert(self, thread):
|
||||
|
||||
path = urlparse(thread.find("link").text).path
|
||||
url = urlparse(thread.find("link").text)
|
||||
path = url.path
|
||||
|
||||
if url.query:
|
||||
path += "?" + url.query
|
||||
|
||||
self.db.threads.new(path, thread.find("title").text.strip())
|
||||
|
||||
comments = list(map(WordPress.Comment, thread.findall(WordPress.ns + "comment")))
|
||||
comments = list(map(self.Comment, thread.findall(self.ns + "comment")))
|
||||
comments.sort(key=lambda k: k["id"])
|
||||
|
||||
remap = {}
|
||||
@ -188,31 +208,46 @@ class WordPress(object):
|
||||
def migrate(self):
|
||||
|
||||
tree = ElementTree.parse(self.xmlfile)
|
||||
|
||||
skip = 0
|
||||
items = tree.findall("channel/item")
|
||||
|
||||
progress = Progress(len(items))
|
||||
for i, thread in enumerate(items):
|
||||
if thread.find("title").text is None or thread.find(self.ns + "comment") is None:
|
||||
skip += 1
|
||||
continue
|
||||
|
||||
progress.update(i, thread.find("title").text)
|
||||
self.insert(thread)
|
||||
|
||||
progress.finish("{0} threads, {1} comments".format(len(items), self.count))
|
||||
progress.finish("{0} threads, {1} comments".format(
|
||||
len(items) - skip, self.count))
|
||||
|
||||
def Comment(self, el):
|
||||
return {
|
||||
"text": strip(el.find(self.ns + "comment_content").text),
|
||||
"author": strip(el.find(self.ns + "comment_author").text),
|
||||
"email": strip(el.find(self.ns + "comment_author_email").text),
|
||||
"website": strip(el.find(self.ns + "comment_author_url").text),
|
||||
"remote_addr": anonymize(
|
||||
strip(el.find(self.ns + "comment_author_IP").text)),
|
||||
"created": mktime(strptime(
|
||||
strip(el.find(self.ns + "comment_date_gmt").text),
|
||||
"%Y-%m-%d %H:%M:%S")),
|
||||
"mode": 1 if el.find(self.ns + "comment_approved").text == "1" else 2,
|
||||
"id": int(el.find(self.ns + "comment_id").text),
|
||||
"parent": int(el.find(self.ns + "comment_parent").text) or None
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def Comment(cls, el):
|
||||
return {
|
||||
"text": strip(el.find(WordPress.ns + "comment_content").text),
|
||||
"author": strip(el.find(WordPress.ns + "comment_author").text),
|
||||
"email": strip(el.find(WordPress.ns + "comment_author_email").text),
|
||||
"website": strip(el.find(WordPress.ns + "comment_author_url").text),
|
||||
"remote_addr": anonymize(
|
||||
strip(el.find(WordPress.ns + "comment_author_IP").text)),
|
||||
"created": mktime(strptime(
|
||||
strip(el.find(WordPress.ns + "comment_date_gmt").text),
|
||||
"%Y-%m-%d %H:%M:%S")),
|
||||
"mode": 1 if el.find(WordPress.ns + "comment_approved").text == "1" else 2,
|
||||
"id": int(el.find(WordPress.ns + "comment_id").text),
|
||||
"parent": int(el.find(WordPress.ns + "comment_parent").text) or None
|
||||
}
|
||||
def detect(cls, peek):
|
||||
|
||||
m = re.search("http://wordpress.org/export/1\.\d/", peek)
|
||||
if m:
|
||||
return m.group(0)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def dispatch(type, db, dump):
|
||||
@ -223,12 +258,12 @@ def dispatch(type, db, dump):
|
||||
if type is None:
|
||||
|
||||
with io.open(dump) as fp:
|
||||
peek = fp.read(2048)
|
||||
peek = fp.read(io.DEFAULT_BUFFER_SIZE)
|
||||
|
||||
if 'xmlns:wp="%s"' % WordPress.ns[1:-1] in peek:
|
||||
if WordPress.detect(peek):
|
||||
type = "wordpress"
|
||||
|
||||
if '<disqus xmlns=' in peek:
|
||||
if Disqus.detect(peek):
|
||||
type = "disqus"
|
||||
|
||||
if type == "wordpress":
|
||||
|
@ -1,5 +1,7 @@
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
try:
|
||||
import unittest2 as unittest
|
||||
except ImportError:
|
||||
@ -49,7 +51,11 @@ class TestMigration(unittest.TestCase):
|
||||
self.assertEqual(db.threads["/2014/test/"]["title"], "Hello, World!")
|
||||
self.assertEqual(db.threads["/2014/test/"]["id"], 1)
|
||||
|
||||
self.assertEqual(len(db.execute("SELECT id FROM comments").fetchall()), 6)
|
||||
self.assertEqual(db.threads["/?p=4"]["title"], "...")
|
||||
self.assertEqual(db.threads["/?p=4"]["id"], 2)
|
||||
|
||||
self.assertEqual(len(db.execute("SELECT id FROM threads").fetchall()), 2)
|
||||
self.assertEqual(len(db.execute("SELECT id FROM comments").fetchall()), 7)
|
||||
|
||||
first = db.comments.get(1)
|
||||
self.assertEqual(first["author"], "Ohai")
|
||||
@ -66,3 +72,24 @@ class TestMigration(unittest.TestCase):
|
||||
last = db.comments.get(6)
|
||||
self.assertEqual(last["author"], "Letzter :/")
|
||||
self.assertEqual(last["parent"], None)
|
||||
|
||||
def test_detection(self):
|
||||
|
||||
wp = """\
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss version="2.0"
|
||||
xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:wp="http://wordpress.org/export/%s/">"""
|
||||
|
||||
self.assertEqual(WordPress.detect(wp % "invalid"), None)
|
||||
|
||||
for version in ("1.0", "1.1", "1.2", "1.3"):
|
||||
self.assertEqual(WordPress.detect(wp % version),
|
||||
"http://wordpress.org/export/%s/" % version)
|
||||
|
||||
dq = '''\
|
||||
<?xml version="1.0"?>
|
||||
<disqus xmlns="http://disqus.com"
|
||||
xmlns:dsq="http://disqus.com/disqus-internals"'''
|
||||
self.assertIsNotNone(Disqus.detect(dq))
|
||||
|
@ -115,5 +115,30 @@
|
||||
<wp:comment_user_id>1</wp:comment_user_id>
|
||||
</wp:comment>
|
||||
</item>
|
||||
|
||||
<!-- handle ?p=X urls -->
|
||||
<item>
|
||||
<title>...</title>
|
||||
<link>http://example.tld/?p=4</link>
|
||||
<wp:comment>
|
||||
<wp:comment_id>11</wp:comment_id>
|
||||
<wp:comment_author><![CDATA[Anonymous]]></wp:comment_author>
|
||||
<wp:comment_author_email>info@posativ.org
|
||||
</wp:comment_author_email>
|
||||
<wp:comment_author_url></wp:comment_author_url>
|
||||
<wp:comment_author_IP>::ffff:86.56.63.0</wp:comment_author_IP>
|
||||
<wp:comment_date>2014-04-29 15:21:56</wp:comment_date>
|
||||
<wp:comment_date_gmt>2014-04-29 15:21:57</wp:comment_date_gmt>
|
||||
<wp:comment_content><![CDATA[...]]></wp:comment_content>
|
||||
<wp:comment_approved>1</wp:comment_approved>
|
||||
<wp:comment_type></wp:comment_type>
|
||||
<wp:comment_parent>0</wp:comment_parent>
|
||||
<wp:comment_user_id>1</wp:comment_user_id>
|
||||
</wp:comment>
|
||||
</item>
|
||||
<item>
|
||||
<title>No comments</title>
|
||||
<link>http://example.tld/?p=6</link>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
Loading…
Reference in New Issue
Block a user