diff --git a/contrib/import_blogger.py b/contrib/import_blogger.py new file mode 100755 index 0000000..7ee57fb --- /dev/null +++ b/contrib/import_blogger.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- + +"""Comment importer from Blogger + +This python script can convert comments posted to a Blogger-powered blog to a +JSON file with can then be imported into Isso (by following the procedure +explained in docs/docs/extras/advanced-migration.rst. + +The script can be run like this: + + python import_blogger.py -p 'http://myblog.com/' blogger.xml out.json + +where `blogger.xml` is a dump of the blog produced by the Blogger platform, and +the URL following the `-p` option is a prefix that will be applied to all post +URLs: the original host will be stripped and the path will be appended to the +string you specify here (this can be useful in the case that your blog moved to +a different domain, subdomain, or just into a new directory). +The `out.json` file is the file which will be generated by this tool, and which +can then be fed into isso: + + isso -c /path/to/isso.cfg import -t generic out.json +""" + +from __future__ import unicode_literals + +import json + +import feedparser +import time +from urllib.parse import urlparse + + +class Post: + def __init__(self, url): + self.url = url + self.title = None + self.comments = [] + + def add_comment(self, comment): + comment['id'] = len(self.comments) + 1 + self.comments.append(comment) + + +def encode_post(post): + ret = {} + ret['id'] = post.url + ret['title'] = post.title + ret['comments'] = post.comments + return ret + + +class ImportBlogger: + TYPE_COMMENT = 'http://schemas.google.com/blogger/2008/kind#comment' + TYPE_POST = 'http://schemas.google.com/blogger/2008/kind#post' + + def __init__(self, filename_in, filename_out, prefix): + self.channel = feedparser.parse(filename_in) + self.filename_out = filename_out + self.prefix = prefix + + def run(self): + self.posts = {} + for item in self.channel.entries: + terms = [tag.term for tag in item.tags] + if not terms: + continue + if terms[0] == self.TYPE_COMMENT: + post = self.ensure_post(item) + post.add_comment(self.process_comment(item)) + elif terms[0] == self.TYPE_POST: + self.process_post(item) + + data = [encode_post(p) for p in self.posts.values() if p.comments] + with open(self.filename_out, 'w') as fp: + json.dump(data, fp, indent=2) + + def process_post(self, item): + pid = self.post_id(item) + if pid in self.posts: + post = self.posts[pid] + else: + post = Post(pid) + self.posts[pid] = post + post.title = item.title + + def ensure_post(self, item): + pid = self.post_id(item) + post = self.posts.get(pid, None) + if not post: + post = Post(pid) + self.posts[pid] = post + return post + + def process_comment(self, item): + comment = {} + comment['author'] = item.author_detail.name + comment['email'] = item.author_detail.email + comment['website'] = item.author_detail.get('href', '') + t = time.strftime('%Y-%m-%d %H:%M:%S', item.published_parsed) + comment['created'] = t + comment['text'] = item.content[0].value + comment['remote_addr'] = '127.0.0.1' + return comment + + def post_id(self, item): + u = urlparse(item.link) + return self.prefix + u.path + + +if __name__ == '__main__': + import argparse + parser = argparse.ArgumentParser( + description='Convert comments from blogger.com') + parser.add_argument('input', help='input file') + parser.add_argument('output', help='output file') + parser.add_argument('-p', dest='prefix', + help='prefix to be added to paths (ID)', + type=str, default='') + args = parser.parse_args() + + importer = ImportBlogger(args.input, args.output, args.prefix) + importer.run() diff --git a/docs/docs/extras/advanced-migration.rst b/docs/docs/extras/advanced-migration.rst index 948fccc..7b8555b 100644 --- a/docs/docs/extras/advanced-migration.rst +++ b/docs/docs/extras/advanced-migration.rst @@ -35,8 +35,8 @@ Example: [ { "id": "/blog/article1", - "title": "First article!" - comments": [ + "title": "First article!", + "comments": [ { "author": "James", "created": "2018-11-28 17:24:23",