isso/contrib/import_blogger.py

#!/usr/bin/env python
# -*- encoding: utf-8 -*-

"""Comment importer from Blogger

This python script can convert comments posted to a Blogger-powered blog to a
JSON file with can then be imported into Isso (by following the procedure
explained in docs/docs/extras/advanced-migration.rst.

The script can be run like this:

    python import_blogger.py -p 'http://myblog.com/' blogger.xml out.json

where `blogger.xml` is a dump of the blog produced by the Blogger platform, and
the URL following the `-p` option is a prefix that will be applied to all post
URLs: the original host will be stripped and the path will be appended to the
string you specify here (this can be useful in the case that your blog moved to
a different domain, subdomain, or just into a new directory).
The `out.json` file is the file which will be generated by this tool, and which
can then be fed into isso:

    isso -c /path/to/isso.cfg import -t generic out.json
"""

from __future__ import unicode_literals

import json

import feedparser
import time
from urllib.parse import urlparse


class Post:
    def __init__(self, url):
        self.url = url
        self.title = None
        self.comments = []

    def add_comment(self, comment):
        comment['id'] = len(self.comments) + 1
        self.comments.append(comment)


def encode_post(post):
    ret = {}
    ret['id'] = post.url
    ret['title'] = post.title
    ret['comments'] = post.comments
    return ret


class ImportBlogger:
    TYPE_COMMENT = 'http://schemas.google.com/blogger/2008/kind#comment'
    TYPE_POST = 'http://schemas.google.com/blogger/2008/kind#post'

    def __init__(self, filename_in, filename_out, prefix):
        self.channel = feedparser.parse(filename_in)
        self.filename_out = filename_out
        self.prefix = prefix

    def run(self):
        self.posts = {}
        for item in self.channel.entries:
            terms = [tag.term for tag in item.tags]
            if not terms:
                continue
            if terms[0] == self.TYPE_COMMENT:
                post = self.ensure_post(item)
                post.add_comment(self.process_comment(item))
            elif terms[0] == self.TYPE_POST:
                self.process_post(item)

        data = [encode_post(p) for p in self.posts.values() if p.comments]
        with open(self.filename_out, 'w') as fp:
            json.dump(data, fp, indent=2)

    def process_post(self, item):
        pid = self.post_id(item)
        if pid in self.posts:
            post = self.posts[pid]
        else:
            post = Post(pid)
            self.posts[pid] = post
        post.title = item.title

    def ensure_post(self, item):
        pid = self.post_id(item)
        post = self.posts.get(pid, None)
        if not post:
            post = Post(pid)
            self.posts[pid] = post
        return post

    def process_comment(self, item):
        comment = {}
        comment['author'] = item.author_detail.name
        comment['email'] = item.author_detail.email
        comment['website'] = item.author_detail.get('href', '')
        t = time.strftime('%Y-%m-%d %H:%M:%S', item.published_parsed)
        comment['created'] = t
        comment['text'] = item.content[0].value
        comment['remote_addr'] = '127.0.0.1'
        return comment

    def post_id(self, item):
        u = urlparse(item.link)
        return self.prefix + u.path


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(
        description='Convert comments from blogger.com')
    parser.add_argument('input', help='input file')
    parser.add_argument('output', help='output file')
    parser.add_argument('-p', dest='prefix',
                        help='prefix to be added to paths (ID)',
                        type=str, default='')
    args = parser.parse_args()

    importer = ImportBlogger(args.input, args.output, args.prefix)
    importer.run()
contrib: Add Blogger importer tool (#529) * contrib: Add Blogger importer tool * doc: fix minor issues in migration documentation 2019-10-13 17:55:17 +00:00			`#!/usr/bin/env python`
			`# -- encoding: utf-8 --`

			`"""Comment importer from Blogger`

			`This python script can convert comments posted to a Blogger-powered blog to a`
			`JSON file with can then be imported into Isso (by following the procedure`
			`explained in docs/docs/extras/advanced-migration.rst.`

			`The script can be run like this:`

			`python import_blogger.py -p 'http://myblog.com/' blogger.xml out.json`

			where `blogger.xml` is a dump of the blog produced by the Blogger platform, and
			the URL following the `-p` option is a prefix that will be applied to all post
			`URLs: the original host will be stripped and the path will be appended to the`
			`string you specify here (this can be useful in the case that your blog moved to`
			`a different domain, subdomain, or just into a new directory).`
			The `out.json` file is the file which will be generated by this tool, and which
			`can then be fed into isso:`

			`isso -c /path/to/isso.cfg import -t generic out.json`
			`"""`

			`from __future__ import unicode_literals`

			`import json`

			`import feedparser`
			`import time`
			`from urllib.parse import urlparse`


			`class Post:`
			`def __init__(self, url):`
			`self.url = url`
			`self.title = None`
			`self.comments = []`

			`def add_comment(self, comment):`
			`comment['id'] = len(self.comments) + 1`
			`self.comments.append(comment)`


			`def encode_post(post):`
			`ret = {}`
			`ret['id'] = post.url`
			`ret['title'] = post.title`
			`ret['comments'] = post.comments`
			`return ret`


			`class ImportBlogger:`
			`TYPE_COMMENT = 'http://schemas.google.com/blogger/2008/kind#comment'`
			`TYPE_POST = 'http://schemas.google.com/blogger/2008/kind#post'`

			`def __init__(self, filename_in, filename_out, prefix):`
			`self.channel = feedparser.parse(filename_in)`
			`self.filename_out = filename_out`
			`self.prefix = prefix`

			`def run(self):`
			`self.posts = {}`
			`for item in self.channel.entries:`
			`terms = [tag.term for tag in item.tags]`
			`if not terms:`
			`continue`
			`if terms[0] == self.TYPE_COMMENT:`
			`post = self.ensure_post(item)`
			`post.add_comment(self.process_comment(item))`
			`elif terms[0] == self.TYPE_POST:`
			`self.process_post(item)`

			`data = [encode_post(p) for p in self.posts.values() if p.comments]`
			`with open(self.filename_out, 'w') as fp:`
			`json.dump(data, fp, indent=2)`

			`def process_post(self, item):`
			`pid = self.post_id(item)`
			`if pid in self.posts:`
			`post = self.posts[pid]`
			`else:`
			`post = Post(pid)`
			`self.posts[pid] = post`
			`post.title = item.title`

			`def ensure_post(self, item):`
			`pid = self.post_id(item)`
			`post = self.posts.get(pid, None)`
			`if not post:`
			`post = Post(pid)`
			`self.posts[pid] = post`
			`return post`

			`def process_comment(self, item):`
			`comment = {}`
			`comment['author'] = item.author_detail.name`
			`comment['email'] = item.author_detail.email`
			`comment['website'] = item.author_detail.get('href', '')`
			`t = time.strftime('%Y-%m-%d %H:%M:%S', item.published_parsed)`
			`comment['created'] = t`
			`comment['text'] = item.content[0].value`
			`comment['remote_addr'] = '127.0.0.1'`
			`return comment`

			`def post_id(self, item):`
			`u = urlparse(item.link)`
			`return self.prefix + u.path`


			`if __name__ == '__main__':`
			`import argparse`
			`parser = argparse.ArgumentParser(`
			`description='Convert comments from blogger.com')`
			`parser.add_argument('input', help='input file')`
			`parser.add_argument('output', help='output file')`
			`parser.add_argument('-p', dest='prefix',`
			`help='prefix to be added to paths (ID)',`
			`type=str, default='')`
			`args = parser.parse_args()`

			`importer = ImportBlogger(args.input, args.output, args.prefix)`
			`importer.run()`