From 91e63c7e5f92e2064f8195d413ee4f1b94ea2a28 Mon Sep 17 00:00:00 2001 From: Martin Zimmermann Date: Wed, 11 Jun 2014 09:52:20 +0200 Subject: [PATCH] simplify import format detection --- isso/migrate.py | 60 +++++++++++++++++------------------- isso/tests/test_migration.py | 9 +++--- isso/tests/wordpress.xml | 2 +- 3 files changed, 34 insertions(+), 37 deletions(-) diff --git a/isso/migrate.py b/isso/migrate.py index 7afd736..c3f0500 100644 --- a/isso/migrate.py +++ b/isso/migrate.py @@ -6,6 +6,7 @@ import sys import os import io import re +import logging import textwrap from time import mktime, strptime, time @@ -26,6 +27,7 @@ except ImportError: from xml.etree import ElementTree +logger = logging.getLogger("isso") def strip(val): if isinstance(val, string_types): @@ -146,14 +148,6 @@ class Disqus(object): initial_indent=" ", subsequent_indent=" ")) print("") - @classmethod - def detect(cls, peek): - - if 'xmlns="http://disqus.com' in peek: - return "http://disqus.com" - - return None - class WordPress(object): @@ -164,11 +158,13 @@ class WordPress(object): self.xmlfile = xmlfile self.count = 0 - with io.open(xmlfile) as fp: - ns = WordPress.detect(fp.read(io.DEFAULT_BUFFER_SIZE)) - - if ns: - self.ns = "{" + ns + "}" + for line in io.open(xmlfile): + m = WordPress.detect(line) + if m: + self.ns = WordPress.ns.replace("1.0", m.group(1)) + break + else: + logger.warn("No WXR namespace found, assuming 1.0") def insert(self, thread): @@ -242,12 +238,19 @@ class WordPress(object): @classmethod def detect(cls, peek): + return re.compile("http://wordpress.org/export/(1\.\d)/").search(peek) + - m = re.search("http://wordpress.org/export/1\.\d/", peek) - if m: - return m.group(0) +def autodetect(peek): - return None + if 'xmlns="http://disqus.com' in peek: + return Disqus + + m = WordPress.detect(peek) + if m: + return WordPress + + return None def dispatch(type, db, dump): @@ -255,20 +258,15 @@ def dispatch(type, db, dump): if input("Isso DB is not empty! Continue? [y/N]: ") not in ("y", "Y"): raise SystemExit("Abort.") - if type is None: - + if type == "disqus": + cls = Disqus + elif type == "wordpress": + cls = WordPress + else: with io.open(dump) as fp: - peek = fp.read(io.DEFAULT_BUFFER_SIZE) + cls = autodetect(fp.read(io.DEFAULT_BUFFER_SIZE)) - if WordPress.detect(peek): - type = "wordpress" - - if Disqus.detect(peek): - type = "disqus" - - if type == "wordpress": - WordPress(db, dump).migrate() - elif type == "disqus": - Disqus(db, dump).migrate() - else: + if cls is None: raise SystemExit("Unknown format, abort.") + + cls(db, dump).migrate() diff --git a/isso/tests/test_migration.py b/isso/tests/test_migration.py index 857eff1..4b08b94 100644 --- a/isso/tests/test_migration.py +++ b/isso/tests/test_migration.py @@ -13,7 +13,7 @@ from os.path import join, dirname from isso.core import Config from isso.db import SQLite3 -from isso.migrate import Disqus, WordPress +from isso.migrate import Disqus, WordPress, autodetect class TestMigration(unittest.TestCase): @@ -82,14 +82,13 @@ class TestMigration(unittest.TestCase): xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:wp="http://wordpress.org/export/%s/">""" - self.assertEqual(WordPress.detect(wp % "invalid"), None) + self.assertEqual(autodetect(wp % "foo"), None) for version in ("1.0", "1.1", "1.2", "1.3"): - self.assertEqual(WordPress.detect(wp % version), - "http://wordpress.org/export/%s/" % version) + self.assertEqual(autodetect(wp % version), WordPress) dq = '''\ http://example.tld/?p=6 - \ No newline at end of file +