Merge pull request #415 from facundobatista/generic-importer
Generic importer
This commit is contained in:
commit
3d0fdffcb7
@ -86,6 +86,9 @@ In chronological order:
|
||||
* @p-vitt & @M4a1x
|
||||
* Documentation on troubleshooting for uberspace users
|
||||
|
||||
* Facundo Batista <facundo@taniquetil.com.ar>
|
||||
* Added a generic way to migrate from a json file
|
||||
|
||||
* @benjhess
|
||||
* Optionnal gravatar support
|
||||
|
||||
|
@ -2,13 +2,14 @@
|
||||
|
||||
from __future__ import division, print_function, unicode_literals
|
||||
|
||||
import sys
|
||||
import os
|
||||
import io
|
||||
import re
|
||||
import logging
|
||||
import textwrap
|
||||
import functools
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import textwrap
|
||||
|
||||
from time import mktime, strptime, time
|
||||
from collections import defaultdict
|
||||
@ -250,6 +251,79 @@ class WordPress(object):
|
||||
return re.compile("http://wordpress.org/export/(1\.\d)/").search(peek)
|
||||
|
||||
|
||||
class Generic(object):
|
||||
"""A generic importer.
|
||||
|
||||
The source format is a json with the following format:
|
||||
|
||||
A list of threads, each item being a dict with the following data:
|
||||
|
||||
- id: a text representing the unique thread id
|
||||
- title: the title of the thread
|
||||
- comments: the list of comments
|
||||
|
||||
Each item in that list of comments is a dict with the following data:
|
||||
|
||||
- id: an integer with the unique id of the comment inside the thread (it can be repeated
|
||||
among different threads); this will be used to order the comment inside the thread
|
||||
- author: the author name
|
||||
- email: the author email
|
||||
- website: the authot's website
|
||||
- created: a timestamp, in the format "%Y-%m-%d %H:%M:%S"
|
||||
"""
|
||||
|
||||
def __init__(self, db, json_file):
|
||||
self.db = db
|
||||
self.json_file = json_file
|
||||
self.count = 0
|
||||
|
||||
def insert(self, thread):
|
||||
"""Process a thread and insert its comments in the DB."""
|
||||
thread_id = thread['id']
|
||||
title = thread['title']
|
||||
self.db.threads.new(thread_id, title)
|
||||
|
||||
comments = list(map(self._build_comment, thread['comments']))
|
||||
comments.sort(key=lambda comment: comment['id'])
|
||||
self.count += len(comments)
|
||||
for comment in comments:
|
||||
self.db.comments.add(thread_id, comment)
|
||||
|
||||
def migrate(self):
|
||||
"""Process the input file and fill the DB."""
|
||||
with io.open(self.json_file, 'rt', encoding='utf8') as fh:
|
||||
threads = json.load(fh)
|
||||
progress = Progress(len(threads))
|
||||
|
||||
for i, thread in enumerate(threads):
|
||||
progress.update(i, str(i))
|
||||
self.insert(thread)
|
||||
|
||||
progress.finish("{0} threads, {1} comments".format(len(threads), self.count))
|
||||
|
||||
def _build_comment(self, raw_comment):
|
||||
return {
|
||||
"text": raw_comment['text'],
|
||||
"author": raw_comment['author'],
|
||||
"email": raw_comment['email'],
|
||||
"website": raw_comment['website'],
|
||||
"created": mktime(strptime(raw_comment['created'], "%Y-%m-%d %H:%M:%S")),
|
||||
"mode": 1,
|
||||
"id": int(raw_comment['id']),
|
||||
"parent": None,
|
||||
"remote_addr": raw_comment["remote_addr"],
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def detect(cls, peek):
|
||||
"""Return if peek looks like the beginning of a JSON file.
|
||||
|
||||
Note that we can not check the JSON properly as we only receive here
|
||||
the original file truncated.
|
||||
"""
|
||||
return peek.startswith("[{")
|
||||
|
||||
|
||||
def autodetect(peek):
|
||||
|
||||
if 'xmlns="http://disqus.com' in peek:
|
||||
@ -259,6 +333,9 @@ def autodetect(peek):
|
||||
if m:
|
||||
return WordPress
|
||||
|
||||
if Generic.detect(peek):
|
||||
return Generic
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
1
isso/tests/generic.json
Normal file
1
isso/tests/generic.json
Normal file
@ -0,0 +1 @@
|
||||
[{"comments": [{"email": "", "remote_addr": "0.0.0.0", "website": "http://www.tigerspice.com", "created": "2005-02-24 04:03:37", "author": "texas holdem", "id": 0, "text": "Great men can't be ruled. by free online poker"}], "id": "/posts/0001/", "title": "Test+post"}, {"comments": [{"email": "105421439@87750645.com", "remote_addr": "0.0.0.0", "website": "", "created": "2005-05-08 06:50:26", "author": "Richard Crinshaw", "id": 0, "text": "Ja-make-a me crazzy mon :)\n"}], "id": "/posts/0007/", "title": "Nat+%26+Miguel"}]
|
@ -9,7 +9,7 @@ from os.path import join, dirname
|
||||
from isso import config
|
||||
|
||||
from isso.db import SQLite3
|
||||
from isso.migrate import Disqus, WordPress, autodetect
|
||||
from isso.migrate import Disqus, WordPress, autodetect, Generic
|
||||
|
||||
conf = config.new({
|
||||
"general": {
|
||||
@ -79,6 +79,38 @@ class TestMigration(unittest.TestCase):
|
||||
self.assertEqual(last["author"], "Letzter :/")
|
||||
self.assertEqual(last["parent"], None)
|
||||
|
||||
def test_generic(self):
|
||||
filepath = join(dirname(__file__), "generic.json")
|
||||
tempf = tempfile.NamedTemporaryFile()
|
||||
|
||||
db = SQLite3(tempf.name, conf)
|
||||
Generic(db, filepath).migrate()
|
||||
|
||||
self.assertEqual(db.threads["/posts/0001/"]["title"], "Test+post")
|
||||
self.assertEqual(db.threads["/posts/0001/"]["id"], 1)
|
||||
|
||||
self.assertEqual(db.threads["/posts/0007/"]["title"], "Nat+%26+Miguel")
|
||||
self.assertEqual(db.threads["/posts/0007/"]["id"], 2)
|
||||
|
||||
self.assertEqual(
|
||||
len(db.execute("SELECT id FROM threads").fetchall()), 2)
|
||||
self.assertEqual(
|
||||
len(db.execute("SELECT id FROM comments").fetchall()), 2)
|
||||
|
||||
comment = db.comments.get(1)
|
||||
self.assertEqual(comment["author"], "texas holdem")
|
||||
self.assertEqual(comment["text"], "Great men can't be ruled. by free online poker")
|
||||
self.assertEqual(comment["email"], "")
|
||||
self.assertEqual(comment["website"], "http://www.tigerspice.com")
|
||||
self.assertEqual(comment["remote_addr"], "0.0.0.0")
|
||||
|
||||
comment = db.comments.get(2)
|
||||
self.assertEqual(comment["author"], "Richard Crinshaw")
|
||||
self.assertEqual(comment["text"], "Ja-make-a me crazzy mon :)\n")
|
||||
self.assertEqual(comment["email"], "105421439@87750645.com")
|
||||
self.assertEqual(comment["website"], "")
|
||||
self.assertEqual(comment["remote_addr"], "0.0.0.0")
|
||||
|
||||
def test_detection(self):
|
||||
|
||||
wp = """\
|
||||
@ -98,3 +130,6 @@ class TestMigration(unittest.TestCase):
|
||||
<disqus xmlns="http://disqus.com"
|
||||
xmlns:dsq="http://disqus.com/disqus-internals"'''
|
||||
self.assertEqual(autodetect(dq), Disqus)
|
||||
|
||||
jf = '[{"comments": [{"email": "", "remote_addr": "0.0.0.0", '
|
||||
self.assertEqual(autodetect(jf), Generic)
|
||||
|
Loading…
Reference in New Issue
Block a user