Merge pull request #415 from facundobatista/generic-importer

Generic importer
This commit is contained in:
Benoît Latinier 2018-05-07 21:13:05 +02:00 committed by GitHub
commit 3d0fdffcb7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 123 additions and 7 deletions

View File

@ -86,6 +86,9 @@ In chronological order:
* @p-vitt & @M4a1x * @p-vitt & @M4a1x
* Documentation on troubleshooting for uberspace users * Documentation on troubleshooting for uberspace users
* Facundo Batista <facundo@taniquetil.com.ar>
* Added a generic way to migrate from a json file
* @benjhess * @benjhess
* Optionnal gravatar support * Optionnal gravatar support

View File

@ -2,13 +2,14 @@
from __future__ import division, print_function, unicode_literals from __future__ import division, print_function, unicode_literals
import sys
import os
import io
import re
import logging
import textwrap
import functools import functools
import io
import json
import logging
import os
import re
import sys
import textwrap
from time import mktime, strptime, time from time import mktime, strptime, time
from collections import defaultdict from collections import defaultdict
@ -250,6 +251,79 @@ class WordPress(object):
return re.compile("http://wordpress.org/export/(1\.\d)/").search(peek) return re.compile("http://wordpress.org/export/(1\.\d)/").search(peek)
class Generic(object):
"""A generic importer.
The source format is a json with the following format:
A list of threads, each item being a dict with the following data:
- id: a text representing the unique thread id
- title: the title of the thread
- comments: the list of comments
Each item in that list of comments is a dict with the following data:
- id: an integer with the unique id of the comment inside the thread (it can be repeated
among different threads); this will be used to order the comment inside the thread
- author: the author name
- email: the author email
- website: the authot's website
- created: a timestamp, in the format "%Y-%m-%d %H:%M:%S"
"""
def __init__(self, db, json_file):
self.db = db
self.json_file = json_file
self.count = 0
def insert(self, thread):
"""Process a thread and insert its comments in the DB."""
thread_id = thread['id']
title = thread['title']
self.db.threads.new(thread_id, title)
comments = list(map(self._build_comment, thread['comments']))
comments.sort(key=lambda comment: comment['id'])
self.count += len(comments)
for comment in comments:
self.db.comments.add(thread_id, comment)
def migrate(self):
"""Process the input file and fill the DB."""
with io.open(self.json_file, 'rt', encoding='utf8') as fh:
threads = json.load(fh)
progress = Progress(len(threads))
for i, thread in enumerate(threads):
progress.update(i, str(i))
self.insert(thread)
progress.finish("{0} threads, {1} comments".format(len(threads), self.count))
def _build_comment(self, raw_comment):
return {
"text": raw_comment['text'],
"author": raw_comment['author'],
"email": raw_comment['email'],
"website": raw_comment['website'],
"created": mktime(strptime(raw_comment['created'], "%Y-%m-%d %H:%M:%S")),
"mode": 1,
"id": int(raw_comment['id']),
"parent": None,
"remote_addr": raw_comment["remote_addr"],
}
@classmethod
def detect(cls, peek):
"""Return if peek looks like the beginning of a JSON file.
Note that we can not check the JSON properly as we only receive here
the original file truncated.
"""
return peek.startswith("[{")
def autodetect(peek): def autodetect(peek):
if 'xmlns="http://disqus.com' in peek: if 'xmlns="http://disqus.com' in peek:
@ -259,6 +333,9 @@ def autodetect(peek):
if m: if m:
return WordPress return WordPress
if Generic.detect(peek):
return Generic
return None return None

1
isso/tests/generic.json Normal file
View File

@ -0,0 +1 @@
[{"comments": [{"email": "", "remote_addr": "0.0.0.0", "website": "http://www.tigerspice.com", "created": "2005-02-24 04:03:37", "author": "texas holdem", "id": 0, "text": "Great men can't be ruled. by free online poker"}], "id": "/posts/0001/", "title": "Test+post"}, {"comments": [{"email": "105421439@87750645.com", "remote_addr": "0.0.0.0", "website": "", "created": "2005-05-08 06:50:26", "author": "Richard Crinshaw", "id": 0, "text": "Ja-make-a me crazzy mon :)\n"}], "id": "/posts/0007/", "title": "Nat+%26+Miguel"}]

View File

@ -9,7 +9,7 @@ from os.path import join, dirname
from isso import config from isso import config
from isso.db import SQLite3 from isso.db import SQLite3
from isso.migrate import Disqus, WordPress, autodetect from isso.migrate import Disqus, WordPress, autodetect, Generic
conf = config.new({ conf = config.new({
"general": { "general": {
@ -79,6 +79,38 @@ class TestMigration(unittest.TestCase):
self.assertEqual(last["author"], "Letzter :/") self.assertEqual(last["author"], "Letzter :/")
self.assertEqual(last["parent"], None) self.assertEqual(last["parent"], None)
def test_generic(self):
filepath = join(dirname(__file__), "generic.json")
tempf = tempfile.NamedTemporaryFile()
db = SQLite3(tempf.name, conf)
Generic(db, filepath).migrate()
self.assertEqual(db.threads["/posts/0001/"]["title"], "Test+post")
self.assertEqual(db.threads["/posts/0001/"]["id"], 1)
self.assertEqual(db.threads["/posts/0007/"]["title"], "Nat+%26+Miguel")
self.assertEqual(db.threads["/posts/0007/"]["id"], 2)
self.assertEqual(
len(db.execute("SELECT id FROM threads").fetchall()), 2)
self.assertEqual(
len(db.execute("SELECT id FROM comments").fetchall()), 2)
comment = db.comments.get(1)
self.assertEqual(comment["author"], "texas holdem")
self.assertEqual(comment["text"], "Great men can't be ruled. by free online poker")
self.assertEqual(comment["email"], "")
self.assertEqual(comment["website"], "http://www.tigerspice.com")
self.assertEqual(comment["remote_addr"], "0.0.0.0")
comment = db.comments.get(2)
self.assertEqual(comment["author"], "Richard Crinshaw")
self.assertEqual(comment["text"], "Ja-make-a me crazzy mon :)\n")
self.assertEqual(comment["email"], "105421439@87750645.com")
self.assertEqual(comment["website"], "")
self.assertEqual(comment["remote_addr"], "0.0.0.0")
def test_detection(self): def test_detection(self):
wp = """\ wp = """\
@ -98,3 +130,6 @@ class TestMigration(unittest.TestCase):
<disqus xmlns="http://disqus.com" <disqus xmlns="http://disqus.com"
xmlns:dsq="http://disqus.com/disqus-internals"''' xmlns:dsq="http://disqus.com/disqus-internals"'''
self.assertEqual(autodetect(dq), Disqus) self.assertEqual(autodetect(dq), Disqus)
jf = '[{"comments": [{"email": "", "remote_addr": "0.0.0.0", '
self.assertEqual(autodetect(jf), Generic)