360 lines
8.1 KiB
Go
360 lines
8.1 KiB
Go
// Copyright 2013 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package language
|
|
|
|
import (
|
|
"bytes"
|
|
"flag"
|
|
"fmt"
|
|
"os"
|
|
"path"
|
|
"path/filepath"
|
|
"strings"
|
|
"testing"
|
|
|
|
"golang.org/x/text/internal/testtext"
|
|
"golang.org/x/text/internal/ucd"
|
|
)
|
|
|
|
var verbose = flag.Bool("verbose", false, "set to true to print the internal tables of matchers")
|
|
|
|
func TestCompliance(t *testing.T) {
|
|
filepath.Walk("testdata", func(file string, info os.FileInfo, err error) error {
|
|
if info.IsDir() {
|
|
return nil
|
|
}
|
|
r, err := os.Open(file)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
ucd.Parse(r, func(p *ucd.Parser) {
|
|
name := strings.Replace(path.Join(p.String(0), p.String(1)), " ", "", -1)
|
|
if skip[name] {
|
|
return
|
|
}
|
|
t.Run(info.Name()+"/"+name, func(t *testing.T) {
|
|
supported := makeTagList(p.String(0))
|
|
desired := makeTagList(p.String(1))
|
|
gotCombined, index, conf := NewMatcher(supported).Match(desired...)
|
|
|
|
gotMatch := supported[index]
|
|
wantMatch := Raw.Make(p.String(2)) // wantMatch may be null
|
|
if gotMatch != wantMatch {
|
|
t.Fatalf("match: got %q; want %q (%v)", gotMatch, wantMatch, conf)
|
|
}
|
|
if tag := strings.TrimSpace(p.String(3)); tag != "" {
|
|
wantCombined := Raw.MustParse(tag)
|
|
if err == nil && gotCombined != wantCombined {
|
|
t.Errorf("combined: got %q; want %q (%v)", gotCombined, wantCombined, conf)
|
|
}
|
|
}
|
|
})
|
|
})
|
|
return nil
|
|
})
|
|
}
|
|
|
|
var skip = map[string]bool{
|
|
// TODO: bugs
|
|
// Honor the wildcard match. This may only be useful to select non-exact
|
|
// stuff.
|
|
"mul,af/nl": true, // match: got "af"; want "mul"
|
|
|
|
// TODO: include other extensions.
|
|
// combined: got "en-GB-u-ca-buddhist-nu-arab"; want "en-GB-fonipa-t-m0-iso-i0-pinyin-u-ca-buddhist-nu-arab"
|
|
"und,en-GB-u-sd-gbsct/en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin": true,
|
|
|
|
// Inconsistencies with Mark Davis' implementation where it is not clear
|
|
// which is better.
|
|
|
|
// Inconsistencies in combined. I think the Go approach is more appropriate.
|
|
// We could use -u-rg- as alternative.
|
|
"und,fr/fr-BE-fonipa": true, // combined: got "fr"; want "fr-BE-fonipa"
|
|
"und,fr-CA/fr-BE-fonipa": true, // combined: got "fr-CA"; want "fr-BE-fonipa"
|
|
"und,fr-fonupa/fr-BE-fonipa": true, // combined: got "fr-fonupa"; want "fr-BE-fonipa"
|
|
"und,no/nn-BE-fonipa": true, // combined: got "no"; want "no-BE-fonipa"
|
|
"50,und,fr-CA-fonupa/fr-BE-fonipa": true, // combined: got "fr-CA-fonupa"; want "fr-BE-fonipa"
|
|
|
|
// The initial number is a threshold. As we don't use scoring, we will not
|
|
// implement this.
|
|
"50,und,fr-Cyrl-CA-fonupa/fr-BE-fonipa": true,
|
|
// match: got "und"; want "fr-Cyrl-CA-fonupa"
|
|
// combined: got "und"; want "fr-Cyrl-BE-fonipa"
|
|
|
|
// Other interesting cases to test:
|
|
// - Should same language or same script have the preference if there is
|
|
// usually no understanding of the other script?
|
|
// - More specific region in desired may replace enclosing supported.
|
|
}
|
|
|
|
func makeTagList(s string) (tags []Tag) {
|
|
for _, s := range strings.Split(s, ",") {
|
|
tags = append(tags, mk(strings.TrimSpace(s)))
|
|
}
|
|
return tags
|
|
}
|
|
|
|
func TestMatchStrings(t *testing.T) {
|
|
testCases := []struct {
|
|
supported string
|
|
desired string // strings separted by |
|
|
tag string
|
|
index int
|
|
}{{
|
|
supported: "en",
|
|
desired: "",
|
|
tag: "en",
|
|
index: 0,
|
|
}, {
|
|
supported: "en",
|
|
desired: "nl",
|
|
tag: "en",
|
|
index: 0,
|
|
}, {
|
|
supported: "en,nl",
|
|
desired: "nl",
|
|
tag: "nl",
|
|
index: 1,
|
|
}, {
|
|
supported: "en,nl",
|
|
desired: "nl|en",
|
|
tag: "nl",
|
|
index: 1,
|
|
}, {
|
|
supported: "en-GB,nl",
|
|
desired: "en ; q=0.1,nl",
|
|
tag: "nl",
|
|
index: 1,
|
|
}, {
|
|
supported: "en-GB,nl",
|
|
desired: "en;q=0.005 | dk; q=0.1,nl ",
|
|
tag: "en-GB",
|
|
index: 0,
|
|
}, {
|
|
// do not match faulty tags with und
|
|
supported: "en,und",
|
|
desired: "|en",
|
|
tag: "en",
|
|
index: 0,
|
|
}}
|
|
for _, tc := range testCases {
|
|
t.Run(path.Join(tc.supported, tc.desired), func(t *testing.T) {
|
|
m := NewMatcher(makeTagList(tc.supported))
|
|
tag, index := MatchStrings(m, strings.Split(tc.desired, "|")...)
|
|
if tag.String() != tc.tag || index != tc.index {
|
|
t.Errorf("got %v, %d; want %v, %d", tag, index, tc.tag, tc.index)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestRegionGroups(t *testing.T) {
|
|
testCases := []struct {
|
|
a, b string
|
|
distance uint8
|
|
}{
|
|
{"zh-TW", "zh-HK", 5},
|
|
{"zh-MO", "zh-HK", 4},
|
|
{"es-ES", "es-AR", 5},
|
|
{"es-ES", "es", 4},
|
|
{"es-419", "es-MX", 4},
|
|
{"es-AR", "es-MX", 4},
|
|
{"es-ES", "es-MX", 5},
|
|
{"es-PT", "es-MX", 5},
|
|
}
|
|
for _, tc := range testCases {
|
|
a := MustParse(tc.a)
|
|
aScript, _ := a.Script()
|
|
b := MustParse(tc.b)
|
|
bScript, _ := b.Script()
|
|
|
|
if aScript != bScript {
|
|
t.Errorf("scripts differ: %q vs %q", aScript, bScript)
|
|
continue
|
|
}
|
|
d, _ := regionGroupDist(a.region(), b.region(), aScript.scriptID, a.lang())
|
|
if d != tc.distance {
|
|
t.Errorf("got %q; want %q", d, tc.distance)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestIsParadigmLocale(t *testing.T) {
|
|
testCases := map[string]bool{
|
|
"en-US": true,
|
|
"en-GB": true,
|
|
"en-VI": false,
|
|
"es-GB": false,
|
|
"es-ES": true,
|
|
"es-419": true,
|
|
}
|
|
for str, want := range testCases {
|
|
tt := Make(str)
|
|
tag := tt.tag()
|
|
got := isParadigmLocale(tag.LangID, tag.RegionID)
|
|
if got != want {
|
|
t.Errorf("isPL(%q) = %v; want %v", str, got, want)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Implementation of String methods for various types for debugging purposes.
|
|
|
|
func (m *matcher) String() string {
|
|
w := &bytes.Buffer{}
|
|
fmt.Fprintln(w, "Default:", m.default_)
|
|
for tag, h := range m.index {
|
|
fmt.Fprintf(w, " %s: %v\n", tag, h)
|
|
}
|
|
return w.String()
|
|
}
|
|
|
|
func (h *matchHeader) String() string {
|
|
w := &bytes.Buffer{}
|
|
fmt.Fprint(w, "haveTag: ")
|
|
for _, h := range h.haveTags {
|
|
fmt.Fprintf(w, "%v, ", h)
|
|
}
|
|
return w.String()
|
|
}
|
|
|
|
func (t haveTag) String() string {
|
|
return fmt.Sprintf("%v:%d:%v:%v-%v|%v", t.tag, t.index, t.conf, t.maxRegion, t.maxScript, t.altScript)
|
|
}
|
|
|
|
func TestBestMatchAlloc(t *testing.T) {
|
|
m := NewMatcher(makeTagList("en sr nl"))
|
|
// Go allocates when creating a list of tags from a single tag!
|
|
list := []Tag{English}
|
|
avg := testtext.AllocsPerRun(1, func() {
|
|
m.Match(list...)
|
|
})
|
|
if avg > 0 {
|
|
t.Errorf("got %f; want 0", avg)
|
|
}
|
|
}
|
|
|
|
var benchHave = []Tag{
|
|
mk("en"),
|
|
mk("en-GB"),
|
|
mk("za"),
|
|
mk("zh-Hant"),
|
|
mk("zh-Hans-CN"),
|
|
mk("zh"),
|
|
mk("zh-HK"),
|
|
mk("ar-MK"),
|
|
mk("en-CA"),
|
|
mk("fr-CA"),
|
|
mk("fr-US"),
|
|
mk("fr-CH"),
|
|
mk("fr"),
|
|
mk("lt"),
|
|
mk("lv"),
|
|
mk("iw"),
|
|
mk("iw-NL"),
|
|
mk("he"),
|
|
mk("he-IT"),
|
|
mk("tlh"),
|
|
mk("ja"),
|
|
mk("ja-Jpan"),
|
|
mk("ja-Jpan-JP"),
|
|
mk("de"),
|
|
mk("de-CH"),
|
|
mk("de-AT"),
|
|
mk("de-DE"),
|
|
mk("sr"),
|
|
mk("sr-Latn"),
|
|
mk("sr-Cyrl"),
|
|
mk("sr-ME"),
|
|
}
|
|
|
|
var benchWant = [][]Tag{
|
|
[]Tag{
|
|
mk("en"),
|
|
},
|
|
[]Tag{
|
|
mk("en-AU"),
|
|
mk("de-HK"),
|
|
mk("nl"),
|
|
mk("fy"),
|
|
mk("lv"),
|
|
},
|
|
[]Tag{
|
|
mk("en-AU"),
|
|
mk("de-HK"),
|
|
mk("nl"),
|
|
mk("fy"),
|
|
},
|
|
[]Tag{
|
|
mk("ja-Hant"),
|
|
mk("da-HK"),
|
|
mk("nl"),
|
|
mk("zh-TW"),
|
|
},
|
|
[]Tag{
|
|
mk("ja-Hant"),
|
|
mk("da-HK"),
|
|
mk("nl"),
|
|
mk("hr"),
|
|
},
|
|
}
|
|
|
|
func BenchmarkMatch(b *testing.B) {
|
|
m := newMatcher(benchHave, nil)
|
|
for i := 0; i < b.N; i++ {
|
|
for _, want := range benchWant {
|
|
m.getBest(want...)
|
|
}
|
|
}
|
|
}
|
|
|
|
func BenchmarkMatchExact(b *testing.B) {
|
|
want := mk("en")
|
|
m := newMatcher(benchHave, nil)
|
|
for i := 0; i < b.N; i++ {
|
|
m.getBest(want)
|
|
}
|
|
}
|
|
|
|
func BenchmarkMatchAltLanguagePresent(b *testing.B) {
|
|
want := mk("hr")
|
|
m := newMatcher(benchHave, nil)
|
|
for i := 0; i < b.N; i++ {
|
|
m.getBest(want)
|
|
}
|
|
}
|
|
|
|
func BenchmarkMatchAltLanguageNotPresent(b *testing.B) {
|
|
want := mk("nn")
|
|
m := newMatcher(benchHave, nil)
|
|
for i := 0; i < b.N; i++ {
|
|
m.getBest(want)
|
|
}
|
|
}
|
|
|
|
func BenchmarkMatchAltScriptPresent(b *testing.B) {
|
|
want := mk("zh-Hant-CN")
|
|
m := newMatcher(benchHave, nil)
|
|
for i := 0; i < b.N; i++ {
|
|
m.getBest(want)
|
|
}
|
|
}
|
|
|
|
func BenchmarkMatchAltScriptNotPresent(b *testing.B) {
|
|
want := mk("fr-Cyrl")
|
|
m := newMatcher(benchHave, nil)
|
|
for i := 0; i < b.N; i++ {
|
|
m.getBest(want)
|
|
}
|
|
}
|
|
|
|
func BenchmarkMatchLimitedExact(b *testing.B) {
|
|
want := []Tag{mk("he-NL"), mk("iw-NL")}
|
|
m := newMatcher(benchHave, nil)
|
|
for i := 0; i < b.N; i++ {
|
|
m.getBest(want...)
|
|
}
|
|
}
|