// Copyright 2014 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package display // This file contains common lookup code that is shared between the various // implementations of Namer and Dictionaries. import ( "fmt" "sort" "strings" "golang.org/x/text/language" ) type namer interface { // name gets the string for the given index. It should walk the // inheritance chain if a value is not present in the base index. name(idx int) string } func nameLanguage(n namer, x interface{}) string { t, _ := language.All.Compose(x) i, _, _ := langTagSet.index(t.Raw()) return n.name(i) } func nameScript(n namer, x interface{}) string { t, _ := language.DeprecatedScript.Compose(x) _, s, _ := t.Raw() return n.name(scriptIndex.index(s.String())) } func nameRegion(n namer, x interface{}) string { t, _ := language.DeprecatedRegion.Compose(x) _, _, r := t.Raw() return n.name(regionIndex.index(r.String())) } func nameTag(langN, scrN, regN namer, x interface{}) string { t, ok := x.(language.Tag) if !ok { return "" } const form = language.All &^ language.SuppressScript if c, err := form.Canonicalize(t); err == nil { t = c } i, scr, reg := langTagSet.index(t.Raw()) if i == -1 { return "" } str := langN.name(i) if hasS, hasR := (scr != language.Script{}), (reg != language.Region{}); hasS || hasR { ss, sr := "", "" if hasS { ss = scrN.name(scriptIndex.index(scr.String())) } if hasR { sr = regN.name(regionIndex.index(reg.String())) } // TODO: use patterns in CLDR or at least confirm they are the same for // all languages. if ss != "" && sr != "" { return fmt.Sprintf("%s (%s, %s)", str, ss, sr) } if ss != "" || sr != "" { return fmt.Sprintf("%s (%s%s)", str, ss, sr) } } return str } // header contains the data and indexes for a single namer. // data contains a series of strings concatenated into one. index contains the // offsets for a string in data. For example, consider a header that defines // strings for the languages de, el, en, fi, and nl: // // header{ // data: "GermanGreekEnglishDutch", // index: []uint16{ 0, 6, 11, 18, 18, 23 }, // } // // For a language with index i, the string is defined by // data[index[i]:index[i+1]]. So the number of elements in index is always one // greater than the number of languages for which header defines a value. // A string for a language may be empty, which means the name is undefined. In // the above example, the name for fi (Finnish) is undefined. type header struct { data string index []uint16 } // name looks up the name for a tag in the dictionary, given its index. func (h *header) name(i int) string { if 0 <= i && i < len(h.index)-1 { return h.data[h.index[i]:h.index[i+1]] } return "" } // tagSet is used to find the index of a language in a set of tags. type tagSet struct { single tagIndex long []string } var ( langTagSet = tagSet{ single: langIndex, long: langTagsLong, } // selfTagSet is used for indexing the language strings in their own // language. selfTagSet = tagSet{ single: selfIndex, long: selfTagsLong, } zzzz = language.MustParseScript("Zzzz") zz = language.MustParseRegion("ZZ") ) // index returns the index of the tag for the given base, script and region or // its parent if the tag is not available. If the match is for a parent entry, // the excess script and region are returned. func (ts *tagSet) index(base language.Base, scr language.Script, reg language.Region) (int, language.Script, language.Region) { lang := base.String() index := -1 if (scr != language.Script{} || reg != language.Region{}) { if scr == zzzz { scr = language.Script{} } if reg == zz { reg = language.Region{} } i := sort.SearchStrings(ts.long, lang) // All entries have either a script or a region and not both. scrStr, regStr := scr.String(), reg.String() for ; i < len(ts.long) && strings.HasPrefix(ts.long[i], lang); i++ { if s := ts.long[i][len(lang)+1:]; s == scrStr { scr = language.Script{} index = i + ts.single.len() break } else if s == regStr { reg = language.Region{} index = i + ts.single.len() break } } } if index == -1 { index = ts.single.index(lang) } return index, scr, reg } func (ts *tagSet) Tags() []language.Tag { tags := make([]language.Tag, 0, ts.single.len()+len(ts.long)) ts.single.keys(func(s string) { tags = append(tags, language.Raw.MustParse(s)) }) for _, s := range ts.long { tags = append(tags, language.Raw.MustParse(s)) } return tags } func supportedScripts() []language.Script { scr := make([]language.Script, 0, scriptIndex.len()) scriptIndex.keys(func(s string) { scr = append(scr, language.MustParseScript(s)) }) return scr } func supportedRegions() []language.Region { reg := make([]language.Region, 0, regionIndex.len()) regionIndex.keys(func(s string) { reg = append(reg, language.MustParseRegion(s)) }) return reg } // tagIndex holds a concatenated lists of subtags of length 2 to 4, one string // for each length, which can be used in combination with binary search to get // the index associated with a tag. // For example, a tagIndex{ // "arenesfrruzh", // 6 2-byte tags. // "barwae", // 2 3-byte tags. // "", // } // would mean that the 2-byte tag "fr" had an index of 3, and the 3-byte tag // "wae" had an index of 7. type tagIndex [3]string func (t *tagIndex) index(s string) int { sz := len(s) if sz < 2 || 4 < sz { return -1 } a := t[sz-2] index := sort.Search(len(a)/sz, func(i int) bool { p := i * sz return a[p:p+sz] >= s }) p := index * sz if end := p + sz; end > len(a) || a[p:end] != s { return -1 } // Add the number of tags for smaller sizes. for i := 0; i < sz-2; i++ { index += len(t[i]) / (i + 2) } return index } // len returns the number of tags that are contained in the tagIndex. func (t *tagIndex) len() (n int) { for i, s := range t { n += len(s) / (i + 2) } return n } // keys calls f for each tag. func (t *tagIndex) keys(f func(key string)) { for i, s := range *t { for ; s != ""; s = s[i+2:] { f(s[:i+2]) } } }