#!/usr/bin/env python # -*- encoding: utf-8 -*- """Comment importer from Blogger This python script can convert comments posted to a Blogger-powered blog to a JSON file with can then be imported into Isso (by following the procedure explained in docs/docs/extras/advanced-migration.rst. The script can be run like this: python import_blogger.py -p 'http://myblog.com/' blogger.xml out.json where `blogger.xml` is a dump of the blog produced by the Blogger platform, and the URL following the `-p` option is a prefix that will be applied to all post URLs: the original host will be stripped and the path will be appended to the string you specify here (this can be useful in the case that your blog moved to a different domain, subdomain, or just into a new directory). The `out.json` file is the file which will be generated by this tool, and which can then be fed into isso: isso -c /path/to/isso.cfg import -t generic out.json """ from __future__ import unicode_literals import json import feedparser import time from urllib.parse import urlparse class Post: def __init__(self, url): self.url = url self.title = None self.comments = [] def add_comment(self, comment): comment['id'] = len(self.comments) + 1 self.comments.append(comment) def encode_post(post): ret = {} ret['id'] = post.url ret['title'] = post.title ret['comments'] = post.comments return ret class ImportBlogger: TYPE_COMMENT = 'http://schemas.google.com/blogger/2008/kind#comment' TYPE_POST = 'http://schemas.google.com/blogger/2008/kind#post' def __init__(self, filename_in, filename_out, prefix): self.channel = feedparser.parse(filename_in) self.filename_out = filename_out self.prefix = prefix def run(self): self.posts = {} for item in self.channel.entries: terms = [tag.term for tag in item.tags] if not terms: continue if terms[0] == self.TYPE_COMMENT: post = self.ensure_post(item) post.add_comment(self.process_comment(item)) elif terms[0] == self.TYPE_POST: self.process_post(item) data = [encode_post(p) for p in self.posts.values() if p.comments] with open(self.filename_out, 'w') as fp: json.dump(data, fp, indent=2) def process_post(self, item): pid = self.post_id(item) if pid in self.posts: post = self.posts[pid] else: post = Post(pid) self.posts[pid] = post post.title = item.title def ensure_post(self, item): pid = self.post_id(item) post = self.posts.get(pid, None) if not post: post = Post(pid) self.posts[pid] = post return post def process_comment(self, item): comment = {} comment['author'] = item.author_detail.name comment['email'] = item.author_detail.email comment['website'] = item.author_detail.get('href', '') t = time.strftime('%Y-%m-%d %H:%M:%S', item.published_parsed) comment['created'] = t comment['text'] = item.content[0].value comment['remote_addr'] = '127.0.0.1' return comment def post_id(self, item): u = urlparse(item.link) return self.prefix + u.path if __name__ == '__main__': import argparse parser = argparse.ArgumentParser( description='Convert comments from blogger.com') parser.add_argument('input', help='input file') parser.add_argument('output', help='output file') parser.add_argument('-p', dest='prefix', help='prefix to be added to paths (ID)', type=str, default='') args = parser.parse_args() importer = ImportBlogger(args.input, args.output, args.prefix) importer.run()