Compare commits

...

5 Commits

Author SHA1 Message Date
Martin Zimmermann
b359d077b9 commit wynauts-comment wip 2014-01-02 12:10:50 +01:00
Martin Zimmermann
67dcf43ac6 fix IOError when piping wynaut-export to head(1) 2013-12-25 11:46:55 +01:00
Martin Zimmermann
d81a6f6b5c add wynaut-export with CSV support 2013-12-24 10:19:20 +01:00
Martin Zimmermann
1641f7f9c9 refactor wynaut import to wynaut-import 2013-12-24 10:18:34 +01:00
Martin Zimmermann
770dbf48af wynaut import 2013-12-19 22:13:06 +01:00
9 changed files with 408 additions and 133 deletions

View File

View File

@ -62,7 +62,7 @@ from werkzeug.contrib.profiler import ProfilerMiddleware
local = Local()
local_manager = LocalManager([local])
from isso import db, migrate, wsgi, ext, views
from isso import db, wsgi, ext, views
from isso.core import ThreadedMixin, ProcessMixin, uWSGIMixin, Config
from isso.utils import parse, http, JSONRequest, origin
from isso.views import comments
@ -194,24 +194,11 @@ def main():
parser.add_argument("-c", dest="conf", default="/etc/isso.conf",
metavar="/etc/isso.conf", help="set configuration file")
imprt = subparser.add_parser('import', help="import Disqus XML export")
imprt.add_argument("dump", metavar="FILE")
imprt.add_argument("-n", "--dry-run", dest="dryrun", action="store_true",
help="perform a trial run with no changes made")
serve = subparser.add_parser("run", help="run server")
args = parser.parse_args()
conf = Config.load(args.conf)
if args.command == "import":
xxx = tempfile.NamedTemporaryFile()
dbpath = conf.get("general", "dbpath") if not args.dryrun else xxx.name
conf.set("guard", "enabled", "off")
migrate.disqus(db.SQLite3(dbpath, conf), args.dump)
sys.exit(0)
if conf.get("server", "listen").startswith("http://"):
host, port, _ = parse.host(conf.get("server", "listen"))
try:

View File

@ -1,114 +0,0 @@
# -*- encoding: utf-8 -*-
from __future__ import division
import sys
import os
import textwrap
from time import mktime, strptime
from collections import defaultdict
try:
input = raw_input
except NameError:
pass
try:
from urlparse import urlparse
except ImportError:
from urllib.parse import urlparse
from xml.etree import ElementTree
ns = '{http://disqus.com}'
dsq = '{http://disqus.com/disqus-internals}'
threads = set([])
comments = set([])
def insert(db, thread, posts):
path = urlparse(thread.find('%slink' % ns).text).path
remap = dict()
if path not in db.threads:
db.threads.new(path, thread.find('%stitle' % ns).text.strip())
for item in sorted(posts, key=lambda k: k['created']):
dsq_id = item.pop('dsq:id')
item['parent'] = remap.get(item.pop('dsq:parent', None))
rv = db.comments.add(path, item)
remap[dsq_id] = rv["id"]
comments.update(set(remap.keys()))
def disqus(db, xmlfile):
if db.execute("SELECT * FROM comments").fetchone():
if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"):
raise SystemExit("Abort.")
tree = ElementTree.parse(xmlfile)
res = defaultdict(list)
for post in tree.findall('%spost' % ns):
item = {
'dsq:id': post.attrib.get(dsq + 'id'),
'text': post.find('%smessage' % ns).text,
'author': post.find('%sauthor/%sname' % (ns, ns)).text,
'email': post.find('%sauthor/%semail' % (ns, ns)).text,
'created': mktime(strptime(
post.find('%screatedAt' % ns).text, '%Y-%m-%dT%H:%M:%SZ')),
'remote_addr': '127.0.0.0',
'mode': 1 if post.find("%sisDeleted" % ns).text == "false" else 4
}
if post.find(ns + 'parent') is not None:
item['dsq:parent'] = post.find(ns + 'parent').attrib.get(dsq + 'id')
res[post.find('%sthread' % ns).attrib.get(dsq + 'id')].append(item)
num = len(tree.findall('%sthread' % ns))
cols = int((os.popen('stty size', 'r').read() or "25 80").split()[1])
for i, thread in enumerate(tree.findall('%sthread' % ns)):
if int(round((i+1)/num, 2) * 100) % 13 == 0:
sys.stdout.write("\r%s" % (" "*cols))
sys.stdout.write("\r[%i%%] %s" % (((i+1)/num * 100), thread.find('%sid' % ns).text))
sys.stdout.flush()
# skip (possibly?) duplicate, but empty thread elements
if thread.find('%sid' % ns).text is None:
continue
id = thread.attrib.get(dsq + 'id')
if id in res:
threads.add(id)
insert(db, thread, res[id])
# in case a comment has been deleted (and no further childs)
db.comments._remove_stale()
sys.stdout.write("\r%s" % (" "*cols))
sys.stdout.write("\r[100%%] %i threads, %i comments\n" % (len(threads), len(comments)))
orphans = set(map(lambda e: e.attrib.get(dsq + "id"), tree.findall("%spost" % ns))) - comments
if orphans:
print("Found %i orphans:" % len(orphans))
for post in tree.findall("%spost" % ns):
if post.attrib.get(dsq + "id") not in orphans:
continue
print(" * %s by %s <%s>" % (post.attrib.get(dsq + "id"),
post.find("%sauthor/%sname" % (ns, ns)).text,
post.find("%sauthor/%semail" % (ns, ns)).text))
print(textwrap.fill(post.find("%smessage" % ns).text,
initial_indent=" ", subsequent_indent=" "))
print("")

View File

@ -38,7 +38,11 @@ setup(
],
install_requires=requires,
entry_points={
'console_scripts':
['isso = isso:main'],
'console_scripts': [
'isso = isso:main',
'wynaut-import = wynaut.imprt:main',
'wynaut-export = wynaut.export:main',
'wynaut-comments = wynaut.comments:main'
],
},
)

View File

@ -6,7 +6,7 @@ from os.path import join, dirname
from isso.core import Config
from isso.db import SQLite3
from isso.migrate import disqus
from wynaut.imprt import Disqus
def test_disqus():
@ -15,12 +15,13 @@ def test_disqus():
xxx = tempfile.NamedTemporaryFile()
db = SQLite3(xxx.name, Config.load(None))
disqus(db, xml)
dsq = Disqus(xml)
dsq.migrate(db)
assert db.threads["/"]["title"] == "Hello, World!"
assert db.threads["/"]["id"] == 1
a = db.comments.get(1)
assert a["author"] == "peter"

20
wynaut/__init__.py Normal file
View File

@ -0,0 +1,20 @@
# -*- encoding: utf-8 -*-
import pkg_resources
dist = pkg_resources.get_distribution("isso")
import os
from argparse import ArgumentParser, SUPPRESS
def get_parser(desc):
parser = ArgumentParser(description=desc)
parser.add_argument('--version', action='version', version='%(prog)s' + dist.version,
help=SUPPRESS)
parser.add_argument('-c', dest="conf", default=os.environ.get("ISSO_SETTINGS"),
metavar="/etc/isso.conf", help="set configuration file")
return parser

112
wynaut/comments.py Normal file
View File

@ -0,0 +1,112 @@
# -*- encoding: utf-8 -*-
from __future__ import unicode_literals, print_function
import sys
import os
import io
import shlex
import tempfile
import subprocess
from isso.db import SQLite3
from isso.core import Config
from wynaut import get_parser
def read(fp):
comment = {}
for line in fp:
if line == "---\n":
break
key, value = line.split(":", 1)
comment[key.strip()] = value.strip()
comment["text"] = fp.read()
for key in ("created", "modified"):
if comment.get(key):
comment[key] = float(comment[key])
for key in ("likes", "dislikes"):
if comment.get(key):
comment[key] = int(comment[key])
return comment
def write(fp, comment, empty=False):
for key in ("author", "email", "website", "remote_addr", "created",
"modified", "likes", "dislikes"):
if comment.get(key) or empty:
fp.write("{0}: {1}\n".format(key, comment[key] or ""))
fp.write("---\n")
fp.write(comment["text"])
def main():
parser = get_parser("Administrate comments stored in Isso's SQLite3.")
subparsers = parser.add_subparsers(help="commands", dest="command")
parser_list = subparsers.add_parser("list", help="list comments")
parser_show = subparsers.add_parser("show", help="show comment")
parser_show.add_argument("id", metavar="N", type=int)
parser_show.add_argument("--empty", dest="empty", action="store_true")
parser_edit = subparsers.add_parser("edit", help="edit comment")
parser_edit.add_argument("id", metavar="N", type=int)
parser_edit.add_argument("--empty", dest="empty", action="store_true")
parser_rm = subparsers.add_parser("rm", help="remove comment")
parser_rm.add_argument("id", metavar="N", type=int)
parser_edit.add_argument("-r", dest="recursive", action="store_true")
args = parser.parse_args()
conf = Config.load(args.conf)
db = SQLite3(conf.get("general", "dbpath"), conf)
if args.command == "show":
if db.comments.get(args.id) is None:
raise SystemExit("no such id: %i" % args.id)
write(sys.stdout, db.comments.get(args.id), empty=args.empty)
if args.command == "list":
for (id, text) in db.execute("SELECT id, text FROM comments").fetchall():
sys.stdout.write("{0:>3}: {1}\n".format(id, text.replace("\n", " ")).encode("utf-8"))
sys.stdout.flush()
if args.command == "edit":
if db.comments.get(args.id) is None:
raise SystemExit("no such id: %i" % args.id)
xxx = tempfile.NamedTemporaryFile()
with io.open(xxx.name, "w") as fp:
write(fp, db.comments.get(args.id), empty=args.empty)
retcode = subprocess.call(shlex.split(
os.environ.get("EDITOR", "nano")) + [xxx.name])
if retcode < 0:
raise SystemExit("Child was terminated by signal %i" % -retcode)
with io.open(xxx.name, "r") as fp:
db.comments.update(args.id, read(fp))
if args.command == "rm":
if not args.recursive:
rv = db.comments.delete(args.id)
if rv:
print("comment is still referenced")

81
wynaut/export.py Normal file
View File

@ -0,0 +1,81 @@
# -*- encoding: utf-8 -*-
from __future__ import unicode_literals
import sys
from io import StringIO
from csv import writer as csv_writer
from isso.db import SQLite3
from isso.core import Config
from isso.compat import PY2K, text_type as str
from wynaut import get_parser
if PY2K:
_StringIO = StringIO
class StringIO(_StringIO):
def write(self, data):
data = data.decode("utf-8")
return super(StringIO, self).write(data)
def csv(db, threads=True, comments=False):
"""
Print threads *or* comments formatted as CSV (Excel dialect). Rows are
separated by comma and `None` is replaced with the empty string.
The first line is always a row containing the identifiers per column.
"""
fp = StringIO()
writer = csv_writer(fp, dialect="excel")
fmt = lambda val: "" if val is None else val
if threads:
writer.writerow(["id", "uri", "title"])
query = db.execute("SELECT id, uri, title FROM threads").fetchall()
else:
fields = ["id", "parent", "created", "modified", "mode", "remote_addr",
"author", "email", "website", "likes", "dislikes", "text"]
writer.writerow(fields)
query = db.execute("SELECT %s FROM comments" % ",".join(fields))
for row in query:
writer.writerow(map(lambda s: str(fmt(s)).encode("utf-8"), row))
fp.seek(0)
for line in fp:
try:
sys.stdout.write(line.encode("utf-8"))
sys.stdout.flush()
except IOError: # head(1) reads from stdout then closes it.
break
def main():
parser = get_parser("export to various formats")
parser.add_argument("-t", "--to", dest="type", choices=["csv"],
help="export format", required=True)
group = parser.add_mutually_exclusive_group()
group.add_argument("--threads", action="store_true",
help="export threads (only for csv)")
group.add_argument("--comments", action="store_true",
help="export comments (only for csv)")
args = parser.parse_args()
conf = Config.load(args.conf)
db = SQLite3(conf.get("general", "dbpath"), conf)
if args.type == "csv":
if not any((args.threads, args.comments)):
raise SystemExit("CSV export needs either --comments or --threads")
csv(db, args.threads, args.comments)

184
wynaut/imprt.py Normal file
View File

@ -0,0 +1,184 @@
# -*- encoding: utf-8 -*-
from __future__ import division
import sys
import os
import time
import tempfile
import textwrap
from time import mktime, strptime
from xml.etree import ElementTree
from collections import defaultdict
try:
from urlparse import urlparse
except ImportError:
from urllib.parse import urlparse
try:
input = raw_input
except NameError:
pass
from werkzeug.utils import cached_property
from isso.db import SQLite3
from isso.core import Config
from wynaut import get_parser
class Import(object):
def __init__(self):
self.last = 0
try:
self.cols = int(os.popen('stty size', 'r').read().split()[1])
except IndexError:
self.cols = 25
def progress(self, current, max, msg):
if time.time() - self.last < 0.1:
return
sys.stdout.write("\r{0}".format(" "*self.cols))
sys.stdout.write("\r[{0:.3}%] {1:.{2}}".format(
current/max*100, msg.strip(), self.cols - 9))
sys.stdout.flush()
self.last = time.time()
def done(self, msg):
sys.stdout.write("\r{0}".format(" "*self.cols))
sys.stdout.write("\r[100%] {0}\n".format(msg.strip()))
sys.stdout.flush()
class Disqus(Import):
ns = '{http://disqus.com}'
internals = '{http://disqus.com/disqus-internals}'
def __init__(self, xmlfile):
super(Disqus, self).__init__()
self.tree = ElementTree.parse(xmlfile)
self._threads = set([])
self._posts = set([])
@cached_property
def threads(self):
return [thr for thr in self.tree.findall("{0}thread".format(Disqus.ns))
if thr.find("{0}id".format(Disqus.ns)).text is not None]
@cached_property
def posts(self):
return self.tree.findall("{0}post".format(Disqus.ns))
def migrate(self, db):
# map thread id to list of posts
rv = defaultdict(list)
for post in self.posts:
item = {
'dsq:id': post.attrib.get(Disqus.internals + 'id'),
'text': post.find('%smessage' % Disqus.ns).text,
'author': post.find('{0}author/{0}name'.format(Disqus.ns)).text,
'email': post.find('{0}author/{0}email'.format(Disqus.ns)).text,
'created': mktime(strptime(
post.find('%screatedAt' % Disqus.ns).text, '%Y-%m-%dT%H:%M:%SZ')),
'remote_addr': '127.0.0.0',
'mode': 1 if post.find("%sisDeleted" % Disqus.ns).text == "false" else 4
}
if post.find(Disqus.ns + 'parent') is not None:
item['dsq:parent'] = post.find(Disqus.ns + 'parent').attrib.get(Disqus.internals + 'id')
rv[post.find('%sthread' % Disqus.ns).attrib.get(Disqus.internals + 'id')].append(item)
for i, thread in enumerate(self.threads):
self.progress(i, len(self.threads), thread.find('{0}id'.format(Disqus.ns)).text)
# skip (possibly?) duplicate, but empty thread elements
if thread.find('%sid' % Disqus.ns).text is None:
continue
id = thread.attrib.get(Disqus.internals + 'id')
if id in rv:
self._threads.add(id)
self._insert(db, thread, rv[id])
# in case a comment has been deleted (and no further childs)
db.comments._remove_stale()
self.done("{0} threads, {1} comments".format(len(self._threads), len(self._posts)))
orphans = set(map(lambda e: e.attrib.get(Disqus.internals + "id"), self.posts)) - self._posts
if orphans:
print("Found %i orphans:" % len(orphans))
for post in self.posts:
if post.attrib.get(Disqus.internals + "id") not in orphans:
continue
print(" * %s by %s <%s>" % (post.attrib.get(Disqus.internals + "id"),
post.find("{0}author/{0}name".format(Disqus.ns)).text,
post.find("{0}author/{0}email".format(Disqus.ns)).text))
print(textwrap.fill(post.find("%smessage" % Disqus.ns).text,
initial_indent=" ", subsequent_indent=" "))
def _insert(self, db, thread, posts):
path = urlparse(thread.find('%slink' % Disqus.ns).text).path
remap = dict()
if path not in db.threads:
db.threads.new(path, thread.find('%stitle' % Disqus.ns).text.strip())
for item in sorted(posts, key=lambda k: k['created']):
dsq_id = item.pop('dsq:id')
item['parent'] = remap.get(item.pop('dsq:parent', None))
rv = db.comments.add(path, item)
remap[dsq_id] = rv["id"]
self._posts.update(set(remap.keys()))
def main():
parser = get_parser("import Disqus XML export")
parser.add_argument("dump", metavar="FILE")
parser.add_argument("-y", "--yes", dest="yes", action="store_true",
help="always confirm actions")
parser.add_argument("-n", "--dry-run", dest="dryrun", action="store_true",
help="perform a trial run with no changes made")
parser.add_argument("-f", "--from", dest="type", choices=["disqus", "csv"])
args = parser.parse_args()
conf = Config.load(args.conf)
xxx = tempfile.NamedTemporaryFile()
dbpath = conf.get("general", "dbpath") if not args.dryrun else xxx.name
if args.type == "disqus":
importer = Disqus(args.dump)
elif args.type == "csv":
pass
db = SQLite3(dbpath, conf)
if db.execute("SELECT * FROM comments").fetchone():
if not args.yes and input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"):
raise SystemExit("Abort.")
importer.migrate(db)