contrib: Add Blogger importer tool (#529)
* contrib: Add Blogger importer tool * doc: fix minor issues in migration documentation
This commit is contained in:
parent
f4b0376f1a
commit
c24ee69a1e
123
contrib/import_blogger.py
Executable file
123
contrib/import_blogger.py
Executable file
@ -0,0 +1,123 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
"""Comment importer from Blogger
|
||||
|
||||
This python script can convert comments posted to a Blogger-powered blog to a
|
||||
JSON file with can then be imported into Isso (by following the procedure
|
||||
explained in docs/docs/extras/advanced-migration.rst.
|
||||
|
||||
The script can be run like this:
|
||||
|
||||
python import_blogger.py -p 'http://myblog.com/' blogger.xml out.json
|
||||
|
||||
where `blogger.xml` is a dump of the blog produced by the Blogger platform, and
|
||||
the URL following the `-p` option is a prefix that will be applied to all post
|
||||
URLs: the original host will be stripped and the path will be appended to the
|
||||
string you specify here (this can be useful in the case that your blog moved to
|
||||
a different domain, subdomain, or just into a new directory).
|
||||
The `out.json` file is the file which will be generated by this tool, and which
|
||||
can then be fed into isso:
|
||||
|
||||
isso -c /path/to/isso.cfg import -t generic out.json
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
import feedparser
|
||||
import time
|
||||
from urllib.parse import urlparse
|
||||
|
||||
|
||||
class Post:
|
||||
def __init__(self, url):
|
||||
self.url = url
|
||||
self.title = None
|
||||
self.comments = []
|
||||
|
||||
def add_comment(self, comment):
|
||||
comment['id'] = len(self.comments) + 1
|
||||
self.comments.append(comment)
|
||||
|
||||
|
||||
def encode_post(post):
|
||||
ret = {}
|
||||
ret['id'] = post.url
|
||||
ret['title'] = post.title
|
||||
ret['comments'] = post.comments
|
||||
return ret
|
||||
|
||||
|
||||
class ImportBlogger:
|
||||
TYPE_COMMENT = 'http://schemas.google.com/blogger/2008/kind#comment'
|
||||
TYPE_POST = 'http://schemas.google.com/blogger/2008/kind#post'
|
||||
|
||||
def __init__(self, filename_in, filename_out, prefix):
|
||||
self.channel = feedparser.parse(filename_in)
|
||||
self.filename_out = filename_out
|
||||
self.prefix = prefix
|
||||
|
||||
def run(self):
|
||||
self.posts = {}
|
||||
for item in self.channel.entries:
|
||||
terms = [tag.term for tag in item.tags]
|
||||
if not terms:
|
||||
continue
|
||||
if terms[0] == self.TYPE_COMMENT:
|
||||
post = self.ensure_post(item)
|
||||
post.add_comment(self.process_comment(item))
|
||||
elif terms[0] == self.TYPE_POST:
|
||||
self.process_post(item)
|
||||
|
||||
data = [encode_post(p) for p in self.posts.values() if p.comments]
|
||||
with open(self.filename_out, 'w') as fp:
|
||||
json.dump(data, fp, indent=2)
|
||||
|
||||
def process_post(self, item):
|
||||
pid = self.post_id(item)
|
||||
if pid in self.posts:
|
||||
post = self.posts[pid]
|
||||
else:
|
||||
post = Post(pid)
|
||||
self.posts[pid] = post
|
||||
post.title = item.title
|
||||
|
||||
def ensure_post(self, item):
|
||||
pid = self.post_id(item)
|
||||
post = self.posts.get(pid, None)
|
||||
if not post:
|
||||
post = Post(pid)
|
||||
self.posts[pid] = post
|
||||
return post
|
||||
|
||||
def process_comment(self, item):
|
||||
comment = {}
|
||||
comment['author'] = item.author_detail.name
|
||||
comment['email'] = item.author_detail.email
|
||||
comment['website'] = item.author_detail.get('href', '')
|
||||
t = time.strftime('%Y-%m-%d %H:%M:%S', item.published_parsed)
|
||||
comment['created'] = t
|
||||
comment['text'] = item.content[0].value
|
||||
comment['remote_addr'] = '127.0.0.1'
|
||||
return comment
|
||||
|
||||
def post_id(self, item):
|
||||
u = urlparse(item.link)
|
||||
return self.prefix + u.path
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Convert comments from blogger.com')
|
||||
parser.add_argument('input', help='input file')
|
||||
parser.add_argument('output', help='output file')
|
||||
parser.add_argument('-p', dest='prefix',
|
||||
help='prefix to be added to paths (ID)',
|
||||
type=str, default='')
|
||||
args = parser.parse_args()
|
||||
|
||||
importer = ImportBlogger(args.input, args.output, args.prefix)
|
||||
importer.run()
|
@ -35,8 +35,8 @@ Example:
|
||||
[
|
||||
{
|
||||
"id": "/blog/article1",
|
||||
"title": "First article!"
|
||||
comments": [
|
||||
"title": "First article!",
|
||||
"comments": [
|
||||
{
|
||||
"author": "James",
|
||||
"created": "2018-11-28 17:24:23",
|
||||
|
Loading…
Reference in New Issue
Block a user