qubes-installer-qubes-os/anaconda/tests/lib/pangocheck.py

#
# pangocheck.py: data and methods for checking pango markup strings
#
# Copyright (C) 2014  Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation; either version 2.1 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Author: David Shea <dshea@redhat.com>

import re
from collections import Counter

__all__ = ["markup_nodes", "is_markup", "markup_match"]

# "a" isn't actually pango markup, but GtkLabel uses it
markup_nodes = ["markup", "a", "b", "big", "i", "s", "span", "sub", "sup", "small", "tt", "u"]

# Check to see if a string looks like Pango markup, no validation
def is_markup(test_string):
    return any(re.search(r'<\s*%s(\s|>)' % node_type, test_string)
            for node_type in markup_nodes)

# Verify that the translation of a markup string looks more or less like the original
def markup_match(orig_markup, xlated_markup):
    # Look for tags. Create a count of each kind of tag and a list of attributes.
    # "Don't parse XML with regular expressions" I can hear you saying, but we're
    # not trying to match elements, just pull tag-like substrings out of the string.
    # Figuring out if tags are closed or in the right order is someone else's job.
    def _parse_markup(markup_string):
        name_count = Counter()
        attr_count = Counter()

        for tag in re.findall(r'<[^>]*>', markup_string):
            # Treat everything up to the first space, / or > as the element name
            (name, rest) = re.match(r'<([^\s/>]*)(.*)>', tag).groups()
            name_count[name] += 1

            # Strip the / from the rest of the tag, if present
            if rest.endswith('/'):
                rest = rest[:-1]

            # Make a list of attributes that need to be contained in the other string
            attr_count.update(rest.split())

        return (name_count, attr_count)

    (name_count1, attr_count1) = _parse_markup(orig_markup)
    (name_count2, attr_count2) = _parse_markup(xlated_markup)

    name_list1 = sorted(name_count1.elements())
    name_list2 = sorted(name_count2.elements())
    attr_list1 = sorted(attr_count1.elements())
    attr_list2 = sorted(attr_count2.elements())

    return (name_list1 == name_list2) and (attr_list1 == attr_list2)

# Check that the markup is needed at all.
# The input is a parsed ElementTree of the string '<markup>pango markup goes here</markup>'
# The markup is unnecessary if the only markup in the string surrounds the entire rest of
# the string, meaning that the pango attributes apply to the entire string, and thus
# could be expressed using attribute lists. For example, strings like:
#   <b>Bold text</b>
# or
#   <span foreground="grey"><i>colorful</i></span>
# but not strings like:
#   <span size="small">This string contains <b>internal</b> markup</span>
# that contain markup that must be passed to the translators.
#
# This function returns True if the markup is necessary and False if the markup
# can be discarded and expressed as attribute lists.
def markup_necessary(markup_tree):
    # If the element has no children at all, there is no markup inside and the
    # markup is unnecessary.
    if not len(markup_tree):
        return False

    # If there is more than one child, the markup is necessary
    if len(markup_tree) > 1:
        return True

    # QUICK NOTE FOR PEOPLE EXPECTING ElementTree TO ACT KINDA LIKE DOM 'CUZ LOL
    # ElementTree is kind of weird with respect to handling multiple text children
    # of an Element node. element.text is the text leading up to the first element
    # child, and element[child_idx].tail is the text following the child node that
    # is actually a child of element but isn't a property of element because Python
    # is crazy.
    #
    # A string like "<markup>word1<i>word2</i>word3<empty/>word4</markup>" will result in
    #   tree == <Element 'markup' ...>
    #   tree.text == 'word1'
    #   tree[0] == <Element 'i' ...>
    #   tree[0].text == 'word2'
    #   tree[0].tail == 'word3'
    #   tree[1] == <Element 'empty' ...>
    #   tree[1].text == None
    #   tree[1].text == 'word4'
    #
    # So elements that contain text before a child markup element will have
    # element.text is not None. Elements that have text after a child element
    # will have .tail on that child set to not None.

    # If .text is set, there is text before the child node, as in
    # <span>text <b>child</b></span>
    # and the markup is necessary
    if markup_tree.text:
        return True

    # If the child (we already know there's only one) has .tail set, then
    # there is text between the close of the child and the end of the element
    # and the markup is necessary
    if markup_tree[0].tail:
        return True

    # Recurse on the child node
    return markup_necessary(markup_tree[0])
anaconda: update to 21.48.21-1 Apply diff anaconda-20.25.16-1..anaconda-21.48.21-1 2015-03-23 11:36:12 +00:00			`#`
			`# pangocheck.py: data and methods for checking pango markup strings`
			`#`
			`# Copyright (C) 2014 Red Hat, Inc.`
			`#`
			`# This program is free software; you can redistribute it and/or modify`
			`# it under the terms of the GNU Lesser General Public License as published`
			`# by the Free Software Foundation; either version 2.1 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU Lesser General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU Lesser General Public License`
			`# along with this program. If not, see <http://www.gnu.org/licenses/>.`
			`#`
			`# Author: David Shea <dshea@redhat.com>`

			`import re`
			`from collections import Counter`

			`__all__ = ["markup_nodes", "is_markup", "markup_match"]`

			`# "a" isn't actually pango markup, but GtkLabel uses it`
			`markup_nodes = ["markup", "a", "b", "big", "i", "s", "span", "sub", "sup", "small", "tt", "u"]`

			`# Check to see if a string looks like Pango markup, no validation`
			`def is_markup(test_string):`
			`return any(re.search(r'<\s*%s(\s\|>)' % node_type, test_string)`
			`for node_type in markup_nodes)`

			`# Verify that the translation of a markup string looks more or less like the original`
			`def markup_match(orig_markup, xlated_markup):`
			`# Look for tags. Create a count of each kind of tag and a list of attributes.`
			`# "Don't parse XML with regular expressions" I can hear you saying, but we're`
			`# not trying to match elements, just pull tag-like substrings out of the string.`
			`# Figuring out if tags are closed or in the right order is someone else's job.`
			`def _parse_markup(markup_string):`
			`name_count = Counter()`
			`attr_count = Counter()`

			`for tag in re.findall(r'<[^>]*>', markup_string):`
			`# Treat everything up to the first space, / or > as the element name`
			`(name, rest) = re.match(r'<([^\s/>])(.)>', tag).groups()`
			`name_count[name] += 1`

			`# Strip the / from the rest of the tag, if present`
			`if rest.endswith('/'):`
			`rest = rest[:-1]`

			`# Make a list of attributes that need to be contained in the other string`
			`attr_count.update(rest.split())`

			`return (name_count, attr_count)`

			`(name_count1, attr_count1) = _parse_markup(orig_markup)`
			`(name_count2, attr_count2) = _parse_markup(xlated_markup)`

			`name_list1 = sorted(name_count1.elements())`
			`name_list2 = sorted(name_count2.elements())`
			`attr_list1 = sorted(attr_count1.elements())`
			`attr_list2 = sorted(attr_count2.elements())`

			`return (name_list1 == name_list2) and (attr_list1 == attr_list2)`

			`# Check that the markup is needed at all.`
			`# The input is a parsed ElementTree of the string '<markup>pango markup goes here</markup>'`
			`# The markup is unnecessary if the only markup in the string surrounds the entire rest of`
			`# the string, meaning that the pango attributes apply to the entire string, and thus`
			`# could be expressed using attribute lists. For example, strings like:`
			`# <b>Bold text</b>`
			`# or`
			`# <span foreground="grey"><i>colorful</i></span>`
			`# but not strings like:`
			`# <span size="small">This string contains <b>internal</b> markup</span>`
			`# that contain markup that must be passed to the translators.`
			`#`
			`# This function returns True if the markup is necessary and False if the markup`
			`# can be discarded and expressed as attribute lists.`
			`def markup_necessary(markup_tree):`
			`# If the element has no children at all, there is no markup inside and the`
			`# markup is unnecessary.`
			`if not len(markup_tree):`
			`return False`

			`# If there is more than one child, the markup is necessary`
			`if len(markup_tree) > 1:`
			`return True`

			`# QUICK NOTE FOR PEOPLE EXPECTING ElementTree TO ACT KINDA LIKE DOM 'CUZ LOL`
			`# ElementTree is kind of weird with respect to handling multiple text children`
			`# of an Element node. element.text is the text leading up to the first element`
			`# child, and element[child_idx].tail is the text following the child node that`
			`# is actually a child of element but isn't a property of element because Python`
			`# is crazy.`
			`#`
			`# A string like "<markup>word1<i>word2</i>word3<empty/>word4</markup>" will result in`
			`# tree == <Element 'markup' ...>`
			`# tree.text == 'word1'`
			`# tree[0] == <Element 'i' ...>`
			`# tree[0].text == 'word2'`
			`# tree[0].tail == 'word3'`
			`# tree[1] == <Element 'empty' ...>`
			`# tree[1].text == None`
			`# tree[1].text == 'word4'`
			`#`
			`# So elements that contain text before a child markup element will have`
			`# element.text is not None. Elements that have text after a child element`
			`# will have .tail on that child set to not None.`

			`# If .text is set, there is text before the child node, as in`
			`# <span>text <b>child</b></span>`
			`# and the markup is necessary`
			`if markup_tree.text:`
			`return True`

			`# If the child (we already know there's only one) has .tail set, then`
			`# there is text between the close of the child and the end of the element`
			`# and the markup is necessary`
			`if markup_tree[0].tail:`
			`return True`

			`# Recurse on the child node`
			`return markup_necessary(markup_tree[0])`