vendor: switch to go modules

pull/128/head
Eric Chiang 5 years ago
parent 1c3cffdc1d
commit 681d7bb639

@ -0,0 +1,12 @@
module github.com/ericchiang/pup
go 1.13
require (
github.com/fatih/color v1.0.0
github.com/mattn/go-colorable v0.0.5
github.com/mattn/go-isatty v0.0.0-20151211000621-56b76bdf51f7 // indirect
golang.org/x/net v0.0.0-20160720084139-4d38db76854b
golang.org/x/sys v0.0.0-20160717071931-a646d33e2ee3 // indirect
golang.org/x/text v0.0.0-20160719205907-0a5a09ee4409
)

@ -0,0 +1,12 @@
github.com/fatih/color v1.0.0 h1:4zdNjpoprR9fed2QRCPb2VTPU4UFXEtJc9Vc+sgXkaQ=
github.com/fatih/color v1.0.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/mattn/go-colorable v0.0.5 h1:X1IeP+MaFWC+vpbhw3y426rQftzXSj+N7eJFnBEMBfE=
github.com/mattn/go-colorable v0.0.5/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
github.com/mattn/go-isatty v0.0.0-20151211000621-56b76bdf51f7 h1:owMyzMR4QR+jSdlfkX9jPU3rsby4++j99BfbtgVr6ZY=
github.com/mattn/go-isatty v0.0.0-20151211000621-56b76bdf51f7/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
golang.org/x/net v0.0.0-20160720084139-4d38db76854b h1:2lHDZItrxmjk3OXnITVKcHWo6qQYJSm4q2pmvciVkxo=
golang.org/x/net v0.0.0-20160720084139-4d38db76854b/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/sys v0.0.0-20160717071931-a646d33e2ee3 h1:ZLExsLvnoqWSw6JB6k6RjWobIHGR3NG9dzVANJ7SVKc=
golang.org/x/sys v0.0.0-20160717071931-a646d33e2ee3/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/text v0.0.0-20160719205907-0a5a09ee4409 h1:ImTDOALQ1AOSGXgapb9Q1tOcHlxpQXZCPSIMKLce0JU=
golang.org/x/text v0.0.0-20160719205907-0a5a09ee4409/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

@ -0,0 +1 @@
package css

@ -0,0 +1,5 @@
language: go
go:
- 1.6
- tip

@ -0,0 +1,154 @@
# Color [![GoDoc](http://img.shields.io/badge/go-documentation-blue.svg?style=flat-square)](http://godoc.org/github.com/fatih/color) [![Build Status](http://img.shields.io/travis/fatih/color.svg?style=flat-square)](https://travis-ci.org/fatih/color)
Color lets you use colorized outputs in terms of [ANSI Escape
Codes](http://en.wikipedia.org/wiki/ANSI_escape_code#Colors) in Go (Golang). It
has support for Windows too! The API can be used in several ways, pick one that
suits you.
![Color](http://i.imgur.com/c1JI0lA.png)
## Install
```bash
go get github.com/fatih/color
```
## Examples
### Standard colors
```go
// Print with default helper functions
color.Cyan("Prints text in cyan.")
// A newline will be appended automatically
color.Blue("Prints %s in blue.", "text")
// These are using the default foreground colors
color.Red("We have red")
color.Magenta("And many others ..")
```
### Mix and reuse colors
```go
// Create a new color object
c := color.New(color.FgCyan).Add(color.Underline)
c.Println("Prints cyan text with an underline.")
// Or just add them to New()
d := color.New(color.FgCyan, color.Bold)
d.Printf("This prints bold cyan %s\n", "too!.")
// Mix up foreground and background colors, create new mixes!
red := color.New(color.FgRed)
boldRed := red.Add(color.Bold)
boldRed.Println("This will print text in bold red.")
whiteBackground := red.Add(color.BgWhite)
whiteBackground.Println("Red text with white background.")
```
### Custom print functions (PrintFunc)
```go
// Create a custom print function for convenience
red := color.New(color.FgRed).PrintfFunc()
red("Warning")
red("Error: %s", err)
// Mix up multiple attributes
notice := color.New(color.Bold, color.FgGreen).PrintlnFunc()
notice("Don't forget this...")
```
### Insert into noncolor strings (SprintFunc)
```go
// Create SprintXxx functions to mix strings with other non-colorized strings:
yellow := color.New(color.FgYellow).SprintFunc()
red := color.New(color.FgRed).SprintFunc()
fmt.Printf("This is a %s and this is %s.\n", yellow("warning"), red("error"))
info := color.New(color.FgWhite, color.BgGreen).SprintFunc()
fmt.Printf("This %s rocks!\n", info("package"))
// Use helper functions
fmt.Printf("This", color.RedString("warning"), "should be not neglected.")
fmt.Printf(color.GreenString("Info:"), "an important message." )
// Windows supported too! Just don't forget to change the output to color.Output
fmt.Fprintf(color.Output, "Windows support: %s", color.GreenString("PASS"))
```
### Plug into existing code
```go
// Use handy standard colors
color.Set(color.FgYellow)
fmt.Println("Existing text will now be in yellow")
fmt.Printf("This one %s\n", "too")
color.Unset() // Don't forget to unset
// You can mix up parameters
color.Set(color.FgMagenta, color.Bold)
defer color.Unset() // Use it in your function
fmt.Println("All text will now be bold magenta.")
```
### Disable color
There might be a case where you want to disable color output (for example to
pipe the standard output of your app to somewhere else). `Color` has support to
disable colors both globally and for single color definition. For example
suppose you have a CLI app and a `--no-color` bool flag. You can easily disable
the color output with:
```go
var flagNoColor = flag.Bool("no-color", false, "Disable color output")
if *flagNoColor {
color.NoColor = true // disables colorized output
}
```
It also has support for single color definitions (local). You can
disable/enable color output on the fly:
```go
c := color.New(color.FgCyan)
c.Println("Prints cyan text")
c.DisableColor()
c.Println("This is printed without any color")
c.EnableColor()
c.Println("This prints again cyan...")
```
## Todo
* Save/Return previous values
* Evaluate fmt.Formatter interface
## Credits
* [Fatih Arslan](https://github.com/fatih)
* Windows support via @mattn: [colorable](https://github.com/mattn/go-colorable)
## License
The MIT License (MIT) - see [`LICENSE.md`](https://github.com/fatih/color/blob/master/LICENSE.md) for more details

@ -0,0 +1,43 @@
# go-colorable
Colorable writer for windows.
For example, most of logger packages doesn't show colors on windows. (I know we can do it with ansicon. But I don't want.)
This package is possible to handle escape sequence for ansi color on windows.
## Too Bad!
![](https://raw.githubusercontent.com/mattn/go-colorable/gh-pages/bad.png)
## So Good!
![](https://raw.githubusercontent.com/mattn/go-colorable/gh-pages/good.png)
## Usage
```go
logrus.SetFormatter(&logrus.TextFormatter{ForceColors: true})
logrus.SetOutput(colorable.NewColorableStdout())
logrus.Info("succeeded")
logrus.Warn("not correct")
logrus.Error("something error")
logrus.Fatal("panic")
```
You can compile above code on non-windows OSs.
## Installation
```
$ go get github.com/mattn/go-colorable
```
# License
MIT
# Author
Yasuhiro Matsumoto (a.k.a mattn)

@ -0,0 +1,37 @@
# go-isatty
isatty for golang
## Usage
```go
package main
import (
"fmt"
"github.com/mattn/go-isatty"
"os"
)
func main() {
if isatty.IsTerminal(os.Stdout.Fd()) {
fmt.Println("Is Terminal")
} else {
fmt.Println("Is Not Terminal")
}
}
```
## Installation
```
$ go get github.com/mattn/go-isatty
```
# License
MIT
# Author
Yasuhiro Matsumoto (a.k.a mattn)

@ -1,8 +0,0 @@
// +build dragonfly nacl plan9
package isatty
// IsTerminal return true if the file descriptor is terminal.
func IsTerminal(fd uintptr) bool {
return false
}

3
vendor/golang.org/x/net/AUTHORS generated vendored

@ -0,0 +1,3 @@
# This source code refers to The Go Authors for copyright purposes.
# The master list of authors is in the main Go distribution,
# visible at http://tip.golang.org/AUTHORS.

@ -0,0 +1,3 @@
# This source code was written by the Go contributors.
# The master list of contributors is in the main Go distribution,
# visible at http://tip.golang.org/CONTRIBUTORS.

22
vendor/golang.org/x/net/PATENTS generated vendored

@ -0,0 +1,22 @@
Additional IP Rights Grant (Patents)
"This implementation" means the copyrightable works distributed by
Google as part of the Go project.
Google hereby grants to You a perpetual, worldwide, non-exclusive,
no-charge, royalty-free, irrevocable (except as stated in this section)
patent license to make, have made, use, offer to sell, sell, import,
transfer and otherwise run, modify and propagate the contents of this
implementation of Go, where such license applies only to those patent
claims, both currently owned or controlled by Google and acquired in
the future, licensable by Google that are necessarily infringed by this
implementation of Go. This grant does not include claims that would be
infringed only as a consequence of further modification of this
implementation. If you or your agent or exclusive licensee institute or
order or agree to the institution of patent litigation against any
entity (including a cross-claim or counterclaim in a lawsuit) alleging
that this implementation of Go or any code incorporated within this
implementation of Go constitutes direct or contributory patent
infringement, or inducement of patent infringement, then any patent
rights granted to you under this License for this implementation of Go
shall terminate as of the date such litigation is filed.

@ -1,648 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This program generates table.go and table_test.go.
// Invoke as
//
// go run gen.go |gofmt >table.go
// go run gen.go -test |gofmt >table_test.go
import (
"flag"
"fmt"
"math/rand"
"os"
"sort"
"strings"
)
// identifier converts s to a Go exported identifier.
// It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
func identifier(s string) string {
b := make([]byte, 0, len(s))
cap := true
for _, c := range s {
if c == '-' {
cap = true
continue
}
if cap && 'a' <= c && c <= 'z' {
c -= 'a' - 'A'
}
cap = false
b = append(b, byte(c))
}
return string(b)
}
var test = flag.Bool("test", false, "generate table_test.go")
func main() {
flag.Parse()
var all []string
all = append(all, elements...)
all = append(all, attributes...)
all = append(all, eventHandlers...)
all = append(all, extra...)
sort.Strings(all)
if *test {
fmt.Printf("// generated by go run gen.go -test; DO NOT EDIT\n\n")
fmt.Printf("package atom\n\n")
fmt.Printf("var testAtomList = []string{\n")
for _, s := range all {
fmt.Printf("\t%q,\n", s)
}
fmt.Printf("}\n")
return
}
// uniq - lists have dups
// compute max len too
maxLen := 0
w := 0
for _, s := range all {
if w == 0 || all[w-1] != s {
if maxLen < len(s) {
maxLen = len(s)
}
all[w] = s
w++
}
}
all = all[:w]
// Find hash that minimizes table size.
var best *table
for i := 0; i < 1000000; i++ {
if best != nil && 1<<(best.k-1) < len(all) {
break
}
h := rand.Uint32()
for k := uint(0); k <= 16; k++ {
if best != nil && k >= best.k {
break
}
var t table
if t.init(h, k, all) {
best = &t
break
}
}
}
if best == nil {
fmt.Fprintf(os.Stderr, "failed to construct string table\n")
os.Exit(1)
}
// Lay out strings, using overlaps when possible.
layout := append([]string{}, all...)
// Remove strings that are substrings of other strings
for changed := true; changed; {
changed = false
for i, s := range layout {
if s == "" {
continue
}
for j, t := range layout {
if i != j && t != "" && strings.Contains(s, t) {
changed = true
layout[j] = ""
}
}
}
}
// Join strings where one suffix matches another prefix.
for {
// Find best i, j, k such that layout[i][len-k:] == layout[j][:k],
// maximizing overlap length k.
besti := -1
bestj := -1
bestk := 0
for i, s := range layout {
if s == "" {
continue
}
for j, t := range layout {
if i == j {
continue
}
for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
if s[len(s)-k:] == t[:k] {
besti = i
bestj = j
bestk = k
}
}
}
}
if bestk > 0 {
layout[besti] += layout[bestj][bestk:]
layout[bestj] = ""
continue
}
break
}
text := strings.Join(layout, "")
atom := map[string]uint32{}
for _, s := range all {
off := strings.Index(text, s)
if off < 0 {
panic("lost string " + s)
}
atom[s] = uint32(off<<8 | len(s))
}
// Generate the Go code.
fmt.Printf("// generated by go run gen.go; DO NOT EDIT\n\n")
fmt.Printf("package atom\n\nconst (\n")
for _, s := range all {
fmt.Printf("\t%s Atom = %#x\n", identifier(s), atom[s])
}
fmt.Printf(")\n\n")
fmt.Printf("const hash0 = %#x\n\n", best.h0)
fmt.Printf("const maxAtomLen = %d\n\n", maxLen)
fmt.Printf("var table = [1<<%d]Atom{\n", best.k)
for i, s := range best.tab {
if s == "" {
continue
}
fmt.Printf("\t%#x: %#x, // %s\n", i, atom[s], s)
}
fmt.Printf("}\n")
datasize := (1 << best.k) * 4
fmt.Printf("const atomText =\n")
textsize := len(text)
for len(text) > 60 {
fmt.Printf("\t%q +\n", text[:60])
text = text[60:]
}
fmt.Printf("\t%q\n\n", text)
fmt.Fprintf(os.Stderr, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)
}
type byLen []string
func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }
func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
func (x byLen) Len() int { return len(x) }
// fnv computes the FNV hash with an arbitrary starting value h.
func fnv(h uint32, s string) uint32 {
for i := 0; i < len(s); i++ {
h ^= uint32(s[i])
h *= 16777619
}
return h
}
// A table represents an attempt at constructing the lookup table.
// The lookup table uses cuckoo hashing, meaning that each string
// can be found in one of two positions.
type table struct {
h0 uint32
k uint
mask uint32
tab []string
}
// hash returns the two hashes for s.
func (t *table) hash(s string) (h1, h2 uint32) {
h := fnv(t.h0, s)
h1 = h & t.mask
h2 = (h >> 16) & t.mask
return
}
// init initializes the table with the given parameters.
// h0 is the initial hash value,
// k is the number of bits of hash value to use, and
// x is the list of strings to store in the table.
// init returns false if the table cannot be constructed.
func (t *table) init(h0 uint32, k uint, x []string) bool {
t.h0 = h0
t.k = k
t.tab = make([]string, 1<<k)
t.mask = 1<<k - 1
for _, s := range x {
if !t.insert(s) {
return false
}
}
return true
}
// insert inserts s in the table.
func (t *table) insert(s string) bool {
h1, h2 := t.hash(s)
if t.tab[h1] == "" {
t.tab[h1] = s
return true
}
if t.tab[h2] == "" {
t.tab[h2] = s
return true
}
if t.push(h1, 0) {
t.tab[h1] = s
return true
}
if t.push(h2, 0) {
t.tab[h2] = s
return true
}
return false
}
// push attempts to push aside the entry in slot i.
func (t *table) push(i uint32, depth int) bool {
if depth > len(t.tab) {
return false
}
s := t.tab[i]
h1, h2 := t.hash(s)
j := h1 + h2 - i
if t.tab[j] != "" && !t.push(j, depth+1) {
return false
}
t.tab[j] = s
return true
}
// The lists of element names and attribute keys were taken from
// https://html.spec.whatwg.org/multipage/indices.html#index
// as of the "HTML Living Standard - Last Updated 21 February 2015" version.
var elements = []string{
"a",
"abbr",
"address",
"area",
"article",
"aside",
"audio",
"b",
"base",
"bdi",
"bdo",
"blockquote",
"body",
"br",
"button",
"canvas",
"caption",
"cite",
"code",
"col",
"colgroup",
"command",
"data",
"datalist",
"dd",
"del",
"details",
"dfn",
"dialog",
"div",
"dl",
"dt",
"em",
"embed",
"fieldset",
"figcaption",
"figure",
"footer",
"form",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"head",
"header",
"hgroup",
"hr",
"html",
"i",
"iframe",
"img",
"input",
"ins",
"kbd",
"keygen",
"label",
"legend",
"li",
"link",
"map",
"mark",
"menu",
"menuitem",
"meta",
"meter",
"nav",
"noscript",
"object",
"ol",
"optgroup",
"option",
"output",
"p",
"param",
"pre",
"progress",
"q",
"rp",
"rt",
"ruby",
"s",
"samp",
"script",
"section",
"select",
"small",
"source",
"span",
"strong",
"style",
"sub",
"summary",
"sup",
"table",
"tbody",
"td",
"template",
"textarea",
"tfoot",
"th",
"thead",
"time",
"title",
"tr",
"track",
"u",
"ul",
"var",
"video",
"wbr",
}
// https://html.spec.whatwg.org/multipage/indices.html#attributes-3
var attributes = []string{
"abbr",
"accept",
"accept-charset",
"accesskey",
"action",
"alt",
"async",
"autocomplete",
"autofocus",
"autoplay",
"challenge",
"charset",
"checked",
"cite",
"class",
"cols",
"colspan",
"command",
"content",
"contenteditable",
"contextmenu",
"controls",
"coords",
"crossorigin",
"data",
"datetime",
"default",
"defer",
"dir",
"dirname",
"disabled",
"download",
"draggable",
"dropzone",
"enctype",
"for",
"form",
"formaction",
"formenctype",
"formmethod",
"formnovalidate",
"formtarget",
"headers",
"height",
"hidden",
"high",
"href",
"hreflang",
"http-equiv",
"icon",
"id",
"inputmode",
"ismap",
"itemid",
"itemprop",
"itemref",
"itemscope",
"itemtype",
"keytype",
"kind",
"label",
"lang",
"list",
"loop",
"low",
"manifest",
"max",
"maxlength",
"media",
"mediagroup",
"method",
"min",
"minlength",
"multiple",
"muted",
"name",
"novalidate",
"open",
"optimum",
"pattern",
"ping",
"placeholder",
"poster",
"preload",
"radiogroup",
"readonly",
"rel",
"required",
"reversed",
"rows",
"rowspan",
"sandbox",
"spellcheck",
"scope",
"scoped",
"seamless",
"selected",
"shape",
"size",
"sizes",
"sortable",
"sorted",
"span",
"src",
"srcdoc",
"srclang",
"start",
"step",
"style",
"tabindex",
"target",
"title",
"translate",
"type",
"typemustmatch",
"usemap",
"value",
"width",
"wrap",
}
var eventHandlers = []string{
"onabort",
"onautocomplete",
"onautocompleteerror",
"onafterprint",
"onbeforeprint",
"onbeforeunload",
"onblur",
"oncancel",
"oncanplay",
"oncanplaythrough",
"onchange",
"onclick",
"onclose",
"oncontextmenu",
"oncuechange",
"ondblclick",
"ondrag",
"ondragend",
"ondragenter",
"ondragleave",
"ondragover",
"ondragstart",
"ondrop",
"ondurationchange",
"onemptied",
"onended",
"onerror",
"onfocus",
"onhashchange",
"oninput",
"oninvalid",
"onkeydown",
"onkeypress",
"onkeyup",
"onlanguagechange",
"onload",
"onloadeddata",
"onloadedmetadata",
"onloadstart",
"onmessage",
"onmousedown",
"onmousemove",
"onmouseout",
"onmouseover",
"onmouseup",
"onmousewheel",
"onoffline",
"ononline",
"onpagehide",
"onpageshow",
"onpause",
"onplay",
"onplaying",
"onpopstate",
"onprogress",
"onratechange",
"onreset",
"onresize",
"onscroll",
"onseeked",
"onseeking",
"onselect",
"onshow",
"onsort",
"onstalled",
"onstorage",
"onsubmit",
"onsuspend",
"ontimeupdate",
"ontoggle",
"onunload",
"onvolumechange",
"onwaiting",
}
// extra are ad-hoc values not covered by any of the lists above.
var extra = []string{
"align",
"annotation",
"annotation-xml",
"applet",
"basefont",
"bgsound",
"big",
"blink",
"center",
"color",
"desc",
"face",
"font",
"foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.
"foreignobject",
"frame",
"frameset",
"image",
"isindex",
"listing",
"malignmark",
"marquee",
"math",
"mglyph",
"mi",
"mn",
"mo",
"ms",
"mtext",
"nobr",
"noembed",
"noframes",
"plaintext",
"prompt",
"public",
"spacer",
"strike",
"svg",
"system",
"tt",
"xmp",
}

3
vendor/golang.org/x/sys/AUTHORS generated vendored

@ -0,0 +1,3 @@
# This source code refers to The Go Authors for copyright purposes.
# The master list of authors is in the main Go distribution,
# visible at http://tip.golang.org/AUTHORS.

@ -0,0 +1,3 @@
# This source code was written by the Go contributors.
# The master list of contributors is in the main Go distribution,
# visible at http://tip.golang.org/CONTRIBUTORS.

22
vendor/golang.org/x/sys/PATENTS generated vendored

@ -0,0 +1,22 @@
Additional IP Rights Grant (Patents)
"This implementation" means the copyrightable works distributed by
Google as part of the Go project.
Google hereby grants to You a perpetual, worldwide, non-exclusive,
no-charge, royalty-free, irrevocable (except as stated in this section)
patent license to make, have made, use, offer to sell, sell, import,
transfer and otherwise run, modify and propagate the contents of this
implementation of Go, where such license applies only to those patent
claims, both currently owned or controlled by Google and acquired in
the future, licensable by Google that are necessarily infringed by this
implementation of Go. This grant does not include claims that would be
infringed only as a consequence of further modification of this
implementation. If you or your agent or exclusive licensee institute or
order or agree to the institution of patent litigation against any
entity (including a cross-claim or counterclaim in a lawsuit) alleging
that this implementation of Go or any code incorporated within this
implementation of Go constitutes direct or contributory patent
infringement, or inducement of patent infringement, then any patent
rights granted to you under this License for this implementation of Go
shall terminate as of the date such litigation is filed.

@ -0,0 +1 @@
_obj/

@ -0,0 +1,285 @@
#!/usr/bin/env bash
# Copyright 2009 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
# The unix package provides access to the raw system call
# interface of the underlying operating system. Porting Go to
# a new architecture/operating system combination requires
# some manual effort, though there are tools that automate
# much of the process. The auto-generated files have names
# beginning with z.
#
# This script runs or (given -n) prints suggested commands to generate z files
# for the current system. Running those commands is not automatic.
# This script is documentation more than anything else.
#
# * asm_${GOOS}_${GOARCH}.s
#
# This hand-written assembly file implements system call dispatch.
# There are three entry points:
#
# func Syscall(trap, a1, a2, a3 uintptr) (r1, r2, err uintptr);
# func Syscall6(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr);
# func RawSyscall(trap, a1, a2, a3 uintptr) (r1, r2, err uintptr);
#
# The first and second are the standard ones; they differ only in
# how many arguments can be passed to the kernel.
# The third is for low-level use by the ForkExec wrapper;
# unlike the first two, it does not call into the scheduler to
# let it know that a system call is running.
#
# * syscall_${GOOS}.go
#
# This hand-written Go file implements system calls that need
# special handling and lists "//sys" comments giving prototypes
# for ones that can be auto-generated. Mksyscall reads those
# comments to generate the stubs.
#
# * syscall_${GOOS}_${GOARCH}.go
#
# Same as syscall_${GOOS}.go except that it contains code specific
# to ${GOOS} on one particular architecture.
#
# * types_${GOOS}.c
#
# This hand-written C file includes standard C headers and then
# creates typedef or enum names beginning with a dollar sign
# (use of $ in variable names is a gcc extension). The hardest
# part about preparing this file is figuring out which headers to
# include and which symbols need to be #defined to get the
# actual data structures that pass through to the kernel system calls.
# Some C libraries present alternate versions for binary compatibility
# and translate them on the way in and out of system calls, but
# there is almost always a #define that can get the real ones.
# See types_darwin.c and types_linux.c for examples.
#
# * zerror_${GOOS}_${GOARCH}.go
#
# This machine-generated file defines the system's error numbers,
# error strings, and signal numbers. The generator is "mkerrors.sh".
# Usually no arguments are needed, but mkerrors.sh will pass its
# arguments on to godefs.
#
# * zsyscall_${GOOS}_${GOARCH}.go
#
# Generated by mksyscall.pl; see syscall_${GOOS}.go above.
#
# * zsysnum_${GOOS}_${GOARCH}.go
#
# Generated by mksysnum_${GOOS}.
#
# * ztypes_${GOOS}_${GOARCH}.go
#
# Generated by godefs; see types_${GOOS}.c above.
GOOSARCH="${GOOS}_${GOARCH}"
# defaults
mksyscall="./mksyscall.pl"
mkerrors="./mkerrors.sh"
zerrors="zerrors_$GOOSARCH.go"
mksysctl=""
zsysctl="zsysctl_$GOOSARCH.go"
mksysnum=
mktypes=
run="sh"
case "$1" in
-syscalls)
for i in zsyscall*go
do
sed 1q $i | sed 's;^// ;;' | sh > _$i && gofmt < _$i > $i
rm _$i
done
exit 0
;;
-n)
run="cat"
shift
esac
case "$#" in
0)
;;
*)
echo 'usage: mkall.sh [-n]' 1>&2
exit 2
esac
GOOSARCH_in=syscall_$GOOSARCH.go
case "$GOOSARCH" in
_* | *_ | _)
echo 'undefined $GOOS_$GOARCH:' "$GOOSARCH" 1>&2
exit 1
;;
darwin_386)
mkerrors="$mkerrors -m32"
mksyscall="./mksyscall.pl -l32"
mksysnum="./mksysnum_darwin.pl $(xcrun --show-sdk-path --sdk macosx)/usr/include/sys/syscall.h"
mktypes="GOARCH=$GOARCH go tool cgo -godefs"
;;
darwin_amd64)
mkerrors="$mkerrors -m64"
mksysnum="./mksysnum_darwin.pl $(xcrun --show-sdk-path --sdk macosx)/usr/include/sys/syscall.h"
mktypes="GOARCH=$GOARCH go tool cgo -godefs"
;;
darwin_arm)
mkerrors="$mkerrors"
mksysnum="./mksysnum_darwin.pl /usr/include/sys/syscall.h"
mktypes="GOARCH=$GOARCH go tool cgo -godefs"
;;
darwin_arm64)
mkerrors="$mkerrors -m64"
mksysnum="./mksysnum_darwin.pl $(xcrun --show-sdk-path --sdk iphoneos)/usr/include/sys/syscall.h"
mktypes="GOARCH=$GOARCH go tool cgo -godefs"
;;
dragonfly_386)
mkerrors="$mkerrors -m32"
mksyscall="./mksyscall.pl -l32 -dragonfly"
mksysnum="curl -s 'http://gitweb.dragonflybsd.org/dragonfly.git/blob_plain/HEAD:/sys/kern/syscalls.master' | ./mksysnum_dragonfly.pl"
mktypes="GOARCH=$GOARCH go tool cgo -godefs"
;;
dragonfly_amd64)
mkerrors="$mkerrors -m64"
mksyscall="./mksyscall.pl -dragonfly"
mksysnum="curl -s 'http://gitweb.dragonflybsd.org/dragonfly.git/blob_plain/HEAD:/sys/kern/syscalls.master' | ./mksysnum_dragonfly.pl"
mktypes="GOARCH=$GOARCH go tool cgo -godefs"
;;
freebsd_386)
mkerrors="$mkerrors -m32"
mksyscall="./mksyscall.pl -l32"
mksysnum="curl -s 'http://svn.freebsd.org/base/stable/10/sys/kern/syscalls.master' | ./mksysnum_freebsd.pl"
mktypes="GOARCH=$GOARCH go tool cgo -godefs"
;;
freebsd_amd64)
mkerrors="$mkerrors -m64"
mksysnum="curl -s 'http://svn.freebsd.org/base/stable/10/sys/kern/syscalls.master' | ./mksysnum_freebsd.pl"
mktypes="GOARCH=$GOARCH go tool cgo -godefs"
;;
freebsd_arm)
mkerrors="$mkerrors"
mksyscall="./mksyscall.pl -l32 -arm"
mksysnum="curl -s 'http://svn.freebsd.org/base/stable/10/sys/kern/syscalls.master' | ./mksysnum_freebsd.pl"
# Let the type of C char be signed for making the bare syscall
# API consistent across over platforms.
mktypes="GOARCH=$GOARCH go tool cgo -godefs -- -fsigned-char"
;;
linux_386)
mkerrors="$mkerrors -m32"
mksyscall="./mksyscall.pl -l32"
mksysnum="./mksysnum_linux.pl /usr/include/asm/unistd_32.h"
mktypes="GOARCH=$GOARCH go tool cgo -godefs"
;;
linux_amd64)
unistd_h=$(ls -1 /usr/include/asm/unistd_64.h /usr/include/x86_64-linux-gnu/asm/unistd_64.h 2>/dev/null | head -1)
if [ "$unistd_h" = "" ]; then
echo >&2 cannot find unistd_64.h
exit 1
fi
mkerrors="$mkerrors -m64"
mksysnum="./mksysnum_linux.pl $unistd_h"
mktypes="GOARCH=$GOARCH go tool cgo -godefs"
;;
linux_arm)
mkerrors="$mkerrors"
mksyscall="./mksyscall.pl -l32 -arm"
mksysnum="curl -s 'http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/plain/arch/arm/include/uapi/asm/unistd.h' | ./mksysnum_linux.pl -"
mktypes="GOARCH=$GOARCH go tool cgo -godefs"
;;
linux_arm64)
unistd_h=$(ls -1 /usr/include/asm/unistd.h /usr/include/asm-generic/unistd.h 2>/dev/null | head -1)
if [ "$unistd_h" = "" ]; then
echo >&2 cannot find unistd_64.h
exit 1
fi
mksysnum="./mksysnum_linux.pl $unistd_h"
# Let the type of C char be signed for making the bare syscall
# API consistent across over platforms.
mktypes="GOARCH=$GOARCH go tool cgo -godefs -- -fsigned-char"
;;
linux_ppc64)
GOOSARCH_in=syscall_linux_ppc64x.go
unistd_h=/usr/include/asm/unistd.h
mkerrors="$mkerrors -m64"
mksysnum="./mksysnum_linux.pl $unistd_h"
mktypes="GOARCH=$GOARCH go tool cgo -godefs"
;;
linux_ppc64le)
GOOSARCH_in=syscall_linux_ppc64x.go
unistd_h=/usr/include/powerpc64le-linux-gnu/asm/unistd.h
mkerrors="$mkerrors -m64"
mksysnum="./mksysnum_linux.pl $unistd_h"
mktypes="GOARCH=$GOARCH go tool cgo -godefs"
;;
linux_s390x)
GOOSARCH_in=syscall_linux_s390x.go
unistd_h=/usr/include/asm/unistd.h
mkerrors="$mkerrors -m64"
mksysnum="./mksysnum_linux.pl $unistd_h"
# Let the type of C char be signed to make the bare sys
# API more consistent between platforms.
# This is a deliberate departure from the way the syscall
# package generates its version of the types file.
mktypes="GOARCH=$GOARCH go tool cgo -godefs -- -fsigned-char"
;;
netbsd_386)
mkerrors="$mkerrors -m32"
mksyscall="./mksyscall.pl -l32 -netbsd"
mksysnum="curl -s 'http://cvsweb.netbsd.org/bsdweb.cgi/~checkout~/src/sys/kern/syscalls.master' | ./mksysnum_netbsd.pl"
mktypes="GOARCH=$GOARCH go tool cgo -godefs"
;;
netbsd_amd64)
mkerrors="$mkerrors -m64"
mksyscall="./mksyscall.pl -netbsd"
mksysnum="curl -s 'http://cvsweb.netbsd.org/bsdweb.cgi/~checkout~/src/sys/kern/syscalls.master' | ./mksysnum_netbsd.pl"
mktypes="GOARCH=$GOARCH go tool cgo -godefs"
;;
openbsd_386)
mkerrors="$mkerrors -m32"
mksyscall="./mksyscall.pl -l32 -openbsd"
mksysctl="./mksysctl_openbsd.pl"
zsysctl="zsysctl_openbsd.go"
mksysnum="curl -s 'http://cvsweb.openbsd.org/cgi-bin/cvsweb/~checkout~/src/sys/kern/syscalls.master' | ./mksysnum_openbsd.pl"
mktypes="GOARCH=$GOARCH go tool cgo -godefs"
;;
openbsd_amd64)
mkerrors="$mkerrors -m64"
mksyscall="./mksyscall.pl -openbsd"
mksysctl="./mksysctl_openbsd.pl"
zsysctl="zsysctl_openbsd.go"
mksysnum="curl -s 'http://cvsweb.openbsd.org/cgi-bin/cvsweb/~checkout~/src/sys/kern/syscalls.master' | ./mksysnum_openbsd.pl"
mktypes="GOARCH=$GOARCH go tool cgo -godefs"
;;
solaris_amd64)
mksyscall="./mksyscall_solaris.pl"
mkerrors="$mkerrors -m64"
mksysnum=
mktypes="GOARCH=$GOARCH go tool cgo -godefs"
;;
*)
echo 'unrecognized $GOOS_$GOARCH: ' "$GOOSARCH" 1>&2
exit 1
;;
esac
(
if [ -n "$mkerrors" ]; then echo "$mkerrors |gofmt >$zerrors"; fi
case "$GOOS" in
*)
syscall_goos="syscall_$GOOS.go"
case "$GOOS" in
darwin | dragonfly | freebsd | netbsd | openbsd)
syscall_goos="syscall_bsd.go $syscall_goos"
;;
esac
if [ -n "$mksyscall" ]; then echo "$mksyscall $syscall_goos $GOOSARCH_in |gofmt >zsyscall_$GOOSARCH.go"; fi
;;
esac
if [ -n "$mksysctl" ]; then echo "$mksysctl |gofmt >$zsysctl"; fi
if [ -n "$mksysnum" ]; then echo "$mksysnum |gofmt >zsysnum_$GOOSARCH.go"; fi
if [ -n "$mktypes" ]; then
echo "echo // +build $GOARCH,$GOOS > ztypes_$GOOSARCH.go";
echo "$mktypes types_$GOOS.go | go run mkpost.go >>ztypes_$GOOSARCH.go";
fi
) | $run

@ -0,0 +1,476 @@
#!/usr/bin/env bash
# Copyright 2009 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
# Generate Go code listing errors and other #defined constant
# values (ENAMETOOLONG etc.), by asking the preprocessor
# about the definitions.
unset LANG
export LC_ALL=C
export LC_CTYPE=C
if test -z "$GOARCH" -o -z "$GOOS"; then
echo 1>&2 "GOARCH or GOOS not defined in environment"
exit 1
fi
CC=${CC:-cc}
if [[ "$GOOS" -eq "solaris" ]]; then
# Assumes GNU versions of utilities in PATH.
export PATH=/usr/gnu/bin:$PATH
fi
uname=$(uname)
includes_Darwin='
#define _DARWIN_C_SOURCE
#define KERNEL
#define _DARWIN_USE_64_BIT_INODE
#include <sys/types.h>
#include <sys/event.h>
#include <sys/ptrace.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <termios.h>
'
includes_DragonFly='
#include <sys/types.h>
#include <sys/event.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <sys/ioctl.h>
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/route.h>
#include <netinet/in.h>
#include <termios.h>
#include <netinet/ip.h>
#include <net/ip_mroute/ip_mroute.h>
'
includes_FreeBSD='
#include <sys/param.h>
#include <sys/types.h>
#include <sys/event.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <sys/ioctl.h>
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/route.h>
#include <netinet/in.h>
#include <termios.h>
#include <netinet/ip.h>
#include <netinet/ip_mroute.h>
#include <sys/extattr.h>
#if __FreeBSD__ >= 10
#define IFT_CARP 0xf8 // IFT_CARP is deprecated in FreeBSD 10
#undef SIOCAIFADDR
#define SIOCAIFADDR _IOW(105, 26, struct oifaliasreq) // ifaliasreq contains if_data
#undef SIOCSIFPHYADDR
#define SIOCSIFPHYADDR _IOW(105, 70, struct oifaliasreq) // ifaliasreq contains if_data
#endif
'
includes_Linux='
#define _LARGEFILE_SOURCE
#define _LARGEFILE64_SOURCE
#ifndef __LP64__
#define _FILE_OFFSET_BITS 64
#endif
#define _GNU_SOURCE
#include <bits/sockaddr.h>
#include <sys/epoll.h>
#include <sys/inotify.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/socket.h>
#include <linux/if.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/if_tun.h>
#include <linux/if_packet.h>
#include <linux/if_addr.h>
#include <linux/filter.h>
#include <linux/netlink.h>
#include <linux/reboot.h>
#include <linux/rtnetlink.h>
#include <linux/ptrace.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/icmpv6.h>
#include <net/route.h>
#include <asm/termbits.h>
#ifndef MSG_FASTOPEN
#define MSG_FASTOPEN 0x20000000
#endif
#ifndef PTRACE_GETREGS
#define PTRACE_GETREGS 0xc
#endif
#ifndef PTRACE_SETREGS
#define PTRACE_SETREGS 0xd
#endif
'
includes_NetBSD='
#include <sys/types.h>
#include <sys/param.h>
#include <sys/event.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <sys/termios.h>
#include <sys/ttycom.h>
#include <sys/wait.h>
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/ip_mroute.h>
#include <netinet/if_ether.h>
// Needed since <sys/param.h> refers to it...
#define schedppq 1
'
includes_OpenBSD='
#include <sys/types.h>
#include <sys/param.h>
#include <sys/event.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <sys/termios.h>
#include <sys/ttycom.h>
#include <sys/wait.h>
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/if_var.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/ip_mroute.h>
#include <netinet/if_ether.h>
#include <net/if_bridge.h>
// We keep some constants not supported in OpenBSD 5.5 and beyond for
// the promise of compatibility.
#define EMUL_ENABLED 0x1
#define EMUL_NATIVE 0x2
#define IPV6_FAITH 0x1d
#define IPV6_OPTIONS 0x1
#define IPV6_RTHDR_STRICT 0x1
#define IPV6_SOCKOPT_RESERVED1 0x3
#define SIOCGIFGENERIC 0xc020693a
#define SIOCSIFGENERIC 0x80206939
#define WALTSIG 0x4
'
includes_SunOS='
#include <limits.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <sys/ioctl.h>
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <net/if_types.h>
#include <net/route.h>
#include <netinet/in.h>
#include <termios.h>
#include <netinet/ip.h>
#include <netinet/ip_mroute.h>
'
includes='
#include <sys/types.h>
#include <sys/file.h>
#include <fcntl.h>
#include <dirent.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/tcp.h>
#include <errno.h>
#include <sys/signal.h>
#include <signal.h>
#include <sys/resource.h>
#include <time.h>
'
ccflags="$@"
# Write go tool cgo -godefs input.
(
echo package unix
echo
echo '/*'
indirect="includes_$(uname)"
echo "${!indirect} $includes"
echo '*/'
echo 'import "C"'
echo 'import "syscall"'
echo
echo 'const ('
# The gcc command line prints all the #defines
# it encounters while processing the input
echo "${!indirect} $includes" | $CC -x c - -E -dM $ccflags |
awk '
$1 != "#define" || $2 ~ /\(/ || $3 == "" {next}
$2 ~ /^E([ABCD]X|[BIS]P|[SD]I|S|FL)$/ {next} # 386 registers
$2 ~ /^(SIGEV_|SIGSTKSZ|SIGRT(MIN|MAX))/ {next}
$2 ~ /^(SCM_SRCRT)$/ {next}
$2 ~ /^(MAP_FAILED)$/ {next}
$2 ~ /^ELF_.*$/ {next}# <asm/elf.h> contains ELF_ARCH, etc.
$2 ~ /^EXTATTR_NAMESPACE_NAMES/ ||
$2 ~ /^EXTATTR_NAMESPACE_[A-Z]+_STRING/ {next}
$2 !~ /^ETH_/ &&
$2 !~ /^EPROC_/ &&
$2 !~ /^EQUIV_/ &&
$2 !~ /^EXPR_/ &&
$2 ~ /^E[A-Z0-9_]+$/ ||
$2 ~ /^B[0-9_]+$/ ||
$2 == "BOTHER" ||
$2 ~ /^CI?BAUD(EX)?$/ ||
$2 == "IBSHIFT" ||
$2 ~ /^V[A-Z0-9]+$/ ||
$2 ~ /^CS[A-Z0-9]/ ||
$2 ~ /^I(SIG|CANON|CRNL|UCLC|EXTEN|MAXBEL|STRIP|UTF8)$/ ||
$2 ~ /^IGN/ ||
$2 ~ /^IX(ON|ANY|OFF)$/ ||
$2 ~ /^IN(LCR|PCK)$/ ||
$2 ~ /(^FLU?SH)|(FLU?SH$)/ ||
$2 ~ /^C(LOCAL|READ|MSPAR|RTSCTS)$/ ||
$2 == "BRKINT" ||
$2 == "HUPCL" ||
$2 == "PENDIN" ||
$2 == "TOSTOP" ||
$2 == "XCASE" ||
$2 == "ALTWERASE" ||
$2 == "NOKERNINFO" ||
$2 ~ /^PAR/ ||
$2 ~ /^SIG[^_]/ ||
$2 ~ /^O[CNPFPL][A-Z]+[^_][A-Z]+$/ ||
$2 ~ /^(NL|CR|TAB|BS|VT|FF)DLY$/ ||
$2 ~ /^(NL|CR|TAB|BS|VT|FF)[0-9]$/ ||
$2 ~ /^O?XTABS$/ ||
$2 ~ /^TC[IO](ON|OFF)$/ ||
$2 ~ /^IN_/ ||
$2 ~ /^LOCK_(SH|EX|NB|UN)$/ ||
$2 ~ /^(AF|SOCK|SO|SOL|IPPROTO|IP|IPV6|ICMP6|TCP|EVFILT|NOTE|EV|SHUT|PROT|MAP|PACKET|MSG|SCM|MCL|DT|MADV|PR)_/ ||
$2 == "ICMPV6_FILTER" ||
$2 == "SOMAXCONN" ||
$2 == "NAME_MAX" ||
$2 == "IFNAMSIZ" ||
$2 ~ /^CTL_(MAXNAME|NET|QUERY)$/ ||
$2 ~ /^SYSCTL_VERS/ ||
$2 ~ /^(MS|MNT)_/ ||
$2 ~ /^TUN(SET|GET|ATTACH|DETACH)/ ||
$2 ~ /^(O|F|FD|NAME|S|PTRACE|PT)_/ ||
$2 ~ /^LINUX_REBOOT_CMD_/ ||
$2 ~ /^LINUX_REBOOT_MAGIC[12]$/ ||
$2 !~ "NLA_TYPE_MASK" &&
$2 ~ /^(NETLINK|NLM|NLMSG|NLA|IFA|IFAN|RT|RTCF|RTN|RTPROT|RTNH|ARPHRD|ETH_P)_/ ||
$2 ~ /^SIOC/ ||
$2 ~ /^TIOC/ ||
$2 ~ /^TCGET/ ||
$2 ~ /^TCSET/ ||
$2 ~ /^TC(FLSH|SBRKP?|XONC)$/ ||
$2 !~ "RTF_BITS" &&
$2 ~ /^(IFF|IFT|NET_RT|RTM|RTF|RTV|RTA|RTAX)_/ ||
$2 ~ /^BIOC/ ||
$2 ~ /^RUSAGE_(SELF|CHILDREN|THREAD)/ ||
$2 ~ /^RLIMIT_(AS|CORE|CPU|DATA|FSIZE|NOFILE|STACK)|RLIM_INFINITY/ ||
$2 ~ /^PRIO_(PROCESS|PGRP|USER)/ ||
$2 ~ /^CLONE_[A-Z_]+/ ||
$2 !~ /^(BPF_TIMEVAL)$/ &&
$2 ~ /^(BPF|DLT)_/ ||
$2 ~ /^CLOCK_/ ||
$2 !~ "WMESGLEN" &&
$2 ~ /^W[A-Z0-9]+$/ {printf("\t%s = C.%s\n", $2, $2)}
$2 ~ /^__WCOREFLAG$/ {next}
$2 ~ /^__W[A-Z0-9]+$/ {printf("\t%s = C.%s\n", substr($2,3), $2)}
{next}
' | sort
echo ')'
) >_const.go
# Pull out the error names for later.
errors=$(
echo '#include <errno.h>' | $CC -x c - -E -dM $ccflags |
awk '$1=="#define" && $2 ~ /^E[A-Z0-9_]+$/ { print $2 }' |
sort
)
# Pull out the signal names for later.
signals=$(
echo '#include <signal.h>' | $CC -x c - -E -dM $ccflags |
awk '$1=="#define" && $2 ~ /^SIG[A-Z0-9]+$/ { print $2 }' |
egrep -v '(SIGSTKSIZE|SIGSTKSZ|SIGRT)' |
sort
)
# Again, writing regexps to a file.
echo '#include <errno.h>' | $CC -x c - -E -dM $ccflags |
awk '$1=="#define" && $2 ~ /^E[A-Z0-9_]+$/ { print "^\t" $2 "[ \t]*=" }' |
sort >_error.grep
echo '#include <signal.h>' | $CC -x c - -E -dM $ccflags |
awk '$1=="#define" && $2 ~ /^SIG[A-Z0-9]+$/ { print "^\t" $2 "[ \t]*=" }' |
egrep -v '(SIGSTKSIZE|SIGSTKSZ|SIGRT)' |
sort >_signal.grep
echo '// mkerrors.sh' "$@"
echo '// MACHINE GENERATED BY THE COMMAND ABOVE; DO NOT EDIT'
echo
echo "// +build ${GOARCH},${GOOS}"
echo
go tool cgo -godefs -- "$@" _const.go >_error.out
cat _error.out | grep -vf _error.grep | grep -vf _signal.grep
echo
echo '// Errors'
echo 'const ('
cat _error.out | grep -f _error.grep | sed 's/=\(.*\)/= syscall.Errno(\1)/'
echo ')'
echo
echo '// Signals'
echo 'const ('
cat _error.out | grep -f _signal.grep | sed 's/=\(.*\)/= syscall.Signal(\1)/'
echo ')'
# Run C program to print error and syscall strings.
(
echo -E "
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <ctype.h>
#include <string.h>
#include <signal.h>
#define nelem(x) (sizeof(x)/sizeof((x)[0]))
enum { A = 'A', Z = 'Z', a = 'a', z = 'z' }; // avoid need for single quotes below
int errors[] = {
"
for i in $errors
do
echo -E ' '$i,
done
echo -E "
};
int signals[] = {
"
for i in $signals
do
echo -E ' '$i,
done
# Use -E because on some systems bash builtin interprets \n itself.
echo -E '
};
static int
intcmp(const void *a, const void *b)
{
return *(int*)a - *(int*)b;
}
int
main(void)
{
int i, j, e;
char buf[1024], *p;
printf("\n\n// Error table\n");
printf("var errors = [...]string {\n");
qsort(errors, nelem(errors), sizeof errors[0], intcmp);
for(i=0; i<nelem(errors); i++) {
e = errors[i];
if(i > 0 && errors[i-1] == e)
continue;
strcpy(buf, strerror(e));
// lowercase first letter: Bad -> bad, but STREAM -> STREAM.
if(A <= buf[0] && buf[0] <= Z && a <= buf[1] && buf[1] <= z)
buf[0] += a - A;
printf("\t%d: \"%s\",\n", e, buf);
}
printf("}\n\n");
printf("\n\n// Signal table\n");
printf("var signals = [...]string {\n");
qsort(signals, nelem(signals), sizeof signals[0], intcmp);
for(i=0; i<nelem(signals); i++) {
e = signals[i];
if(i > 0 && signals[i-1] == e)
continue;
strcpy(buf, strsignal(e));
// lowercase first letter: Bad -> bad, but STREAM -> STREAM.
if(A <= buf[0] && buf[0] <= Z && a <= buf[1] && buf[1] <= z)
buf[0] += a - A;
// cut trailing : number.
p = strrchr(buf, ":"[0]);
if(p)
*p = '\0';
printf("\t%d: \"%s\",\n", e, buf);
}
printf("}\n\n");
return 0;
}
'
) >_errors.c
$CC $ccflags -o _errors _errors.c && $GORUN ./_errors && rm -f _errors.c _errors _const.go _error.grep _signal.grep _error.out

@ -1,62 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// mkpost processes the output of cgo -godefs to
// modify the generated types. It is used to clean up
// the sys API in an architecture specific manner.
//
// mkpost is run after cgo -godefs by mkall.sh.
package main
import (
"fmt"
"go/format"
"io/ioutil"
"log"
"os"
"regexp"
)
func main() {
b, err := ioutil.ReadAll(os.Stdin)
if err != nil {
log.Fatal(err)
}
s := string(b)
goarch := os.Getenv("GOARCH")
goos := os.Getenv("GOOS")
if goarch == "s390x" && goos == "linux" {
// Export the types of PtraceRegs fields.
re := regexp.MustCompile("ptrace(Psw|Fpregs|Per)")
s = re.ReplaceAllString(s, "Ptrace$1")
// Replace padding fields inserted by cgo with blank identifiers.
re = regexp.MustCompile("Pad_cgo[A-Za-z0-9_]*")
s = re.ReplaceAllString(s, "_")
// Replace other unwanted fields with blank identifiers.
re = regexp.MustCompile("X_[A-Za-z0-9_]*")
s = re.ReplaceAllString(s, "_")
// Replace the control_regs union with a blank identifier for now.
re = regexp.MustCompile("(Control_regs)\\s+\\[0\\]uint64")
s = re.ReplaceAllString(s, "_ [0]uint64")
}
// gofmt
b, err = format.Source([]byte(s))
if err != nil {
log.Fatal(err)
}
// Append this command to the header to show where the new file
// came from.
re := regexp.MustCompile("(cgo -godefs [a-zA-Z0-9_]+\\.go.*)")
b = re.ReplaceAll(b, []byte("$1 | go run mkpost.go"))
fmt.Printf("%s", b)
}

@ -0,0 +1,323 @@
#!/usr/bin/env perl
# Copyright 2009 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
# This program reads a file containing function prototypes
# (like syscall_darwin.go) and generates system call bodies.
# The prototypes are marked by lines beginning with "//sys"
# and read like func declarations if //sys is replaced by func, but:
# * The parameter lists must give a name for each argument.
# This includes return parameters.
# * The parameter lists must give a type for each argument:
# the (x, y, z int) shorthand is not allowed.
# * If the return parameter is an error number, it must be named errno.
# A line beginning with //sysnb is like //sys, except that the
# goroutine will not be suspended during the execution of the system
# call. This must only be used for system calls which can never
# block, as otherwise the system call could cause all goroutines to
# hang.
use strict;
my $cmdline = "mksyscall.pl " . join(' ', @ARGV);
my $errors = 0;
my $_32bit = "";
my $plan9 = 0;
my $openbsd = 0;
my $netbsd = 0;
my $dragonfly = 0;
my $arm = 0; # 64-bit value should use (even, odd)-pair
if($ARGV[0] eq "-b32") {
$_32bit = "big-endian";
shift;
} elsif($ARGV[0] eq "-l32") {
$_32bit = "little-endian";
shift;
}
if($ARGV[0] eq "-plan9") {
$plan9 = 1;
shift;
}
if($ARGV[0] eq "-openbsd") {
$openbsd = 1;
shift;
}
if($ARGV[0] eq "-netbsd") {
$netbsd = 1;
shift;
}
if($ARGV[0] eq "-dragonfly") {
$dragonfly = 1;
shift;
}
if($ARGV[0] eq "-arm") {
$arm = 1;
shift;
}
if($ARGV[0] =~ /^-/) {
print STDERR "usage: mksyscall.pl [-b32 | -l32] [file ...]\n";
exit 1;
}
if($ENV{'GOARCH'} eq "" || $ENV{'GOOS'} eq "") {
print STDERR "GOARCH or GOOS not defined in environment\n";
exit 1;
}
sub parseparamlist($) {
my ($list) = @_;
$list =~ s/^\s*//;
$list =~ s/\s*$//;
if($list eq "") {
return ();
}
return split(/\s*,\s*/, $list);
}
sub parseparam($) {
my ($p) = @_;
if($p !~ /^(\S*) (\S*)$/) {
print STDERR "$ARGV:$.: malformed parameter: $p\n";
$errors = 1;
return ("xx", "int");
}
return ($1, $2);
}
my $text = "";
while(<>) {
chomp;
s/\s+/ /g;
s/^\s+//;
s/\s+$//;
my $nonblock = /^\/\/sysnb /;
next if !/^\/\/sys / && !$nonblock;
# Line must be of the form
# func Open(path string, mode int, perm int) (fd int, errno error)
# Split into name, in params, out params.
if(!/^\/\/sys(nb)? (\w+)\(([^()]*)\)\s*(?:\(([^()]+)\))?\s*(?:=\s*((?i)SYS_[A-Z0-9_]+))?$/) {
print STDERR "$ARGV:$.: malformed //sys declaration\n";
$errors = 1;
next;
}
my ($func, $in, $out, $sysname) = ($2, $3, $4, $5);
# Split argument lists on comma.
my @in = parseparamlist($in);
my @out = parseparamlist($out);
# Try in vain to keep people from editing this file.
# The theory is that they jump into the middle of the file
# without reading the header.
$text .= "// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT\n\n";
# Go function header.
my $out_decl = @out ? sprintf(" (%s)", join(', ', @out)) : "";
$text .= sprintf "func %s(%s)%s {\n", $func, join(', ', @in), $out_decl;
# Check if err return available
my $errvar = "";
foreach my $p (@out) {
my ($name, $type) = parseparam($p);
if($type eq "error") {
$errvar = $name;
last;
}
}
# Prepare arguments to Syscall.
my @args = ();
my @uses = ();
my $n = 0;
foreach my $p (@in) {
my ($name, $type) = parseparam($p);
if($type =~ /^\*/) {
push @args, "uintptr(unsafe.Pointer($name))";
} elsif($type eq "string" && $errvar ne "") {
$text .= "\tvar _p$n *byte\n";
$text .= "\t_p$n, $errvar = BytePtrFromString($name)\n";
$text .= "\tif $errvar != nil {\n\t\treturn\n\t}\n";
push @args, "uintptr(unsafe.Pointer(_p$n))";
push @uses, "use(unsafe.Pointer(_p$n))";
$n++;
} elsif($type eq "string") {
print STDERR "$ARGV:$.: $func uses string arguments, but has no error return\n";
$text .= "\tvar _p$n *byte\n";
$text .= "\t_p$n, _ = BytePtrFromString($name)\n";
push @args, "uintptr(unsafe.Pointer(_p$n))";
push @uses, "use(unsafe.Pointer(_p$n))";
$n++;
} elsif($type =~ /^\[\](.*)/) {
# Convert slice into pointer, length.
# Have to be careful not to take address of &a[0] if len == 0:
# pass dummy pointer in that case.
# Used to pass nil, but some OSes or simulators reject write(fd, nil, 0).
$text .= "\tvar _p$n unsafe.Pointer\n";
$text .= "\tif len($name) > 0 {\n\t\t_p$n = unsafe.Pointer(\&${name}[0])\n\t}";
$text .= " else {\n\t\t_p$n = unsafe.Pointer(&_zero)\n\t}";
$text .= "\n";
push @args, "uintptr(_p$n)", "uintptr(len($name))";
$n++;
} elsif($type eq "int64" && ($openbsd || $netbsd)) {
push @args, "0";
if($_32bit eq "big-endian") {
push @args, "uintptr($name>>32)", "uintptr($name)";
} elsif($_32bit eq "little-endian") {
push @args, "uintptr($name)", "uintptr($name>>32)";
} else {
push @args, "uintptr($name)";
}
} elsif($type eq "int64" && $dragonfly) {
if ($func !~ /^extp(read|write)/i) {
push @args, "0";
}
if($_32bit eq "big-endian") {
push @args, "uintptr($name>>32)", "uintptr($name)";
} elsif($_32bit eq "little-endian") {
push @args, "uintptr($name)", "uintptr($name>>32)";
} else {
push @args, "uintptr($name)";
}
} elsif($type eq "int64" && $_32bit ne "") {
if(@args % 2 && $arm) {
# arm abi specifies 64-bit argument uses
# (even, odd) pair
push @args, "0"
}
if($_32bit eq "big-endian") {
push @args, "uintptr($name>>32)", "uintptr($name)";
} else {
push @args, "uintptr($name)", "uintptr($name>>32)";
}
} else {
push @args, "uintptr($name)";
}
}
# Determine which form to use; pad args with zeros.
my $asm = "Syscall";
if ($nonblock) {
$asm = "RawSyscall";
}
if(@args <= 3) {
while(@args < 3) {
push @args, "0";
}
} elsif(@args <= 6) {
$asm .= "6";
while(@args < 6) {
push @args, "0";
}
} elsif(@args <= 9) {
$asm .= "9";
while(@args < 9) {
push @args, "0";
}
} else {
print STDERR "$ARGV:$.: too many arguments to system call\n";
}
# System call number.
if($sysname eq "") {
$sysname = "SYS_$func";
$sysname =~ s/([a-z])([A-Z])/${1}_$2/g; # turn FooBar into Foo_Bar
$sysname =~ y/a-z/A-Z/;
}
# Actual call.
my $args = join(', ', @args);
my $call = "$asm($sysname, $args)";
# Assign return values.
my $body = "";
my @ret = ("_", "_", "_");
my $do_errno = 0;
for(my $i=0; $i<@out; $i++) {
my $p = $out[$i];
my ($name, $type) = parseparam($p);
my $reg = "";
if($name eq "err" && !$plan9) {
$reg = "e1";
$ret[2] = $reg;
$do_errno = 1;
} elsif($name eq "err" && $plan9) {
$ret[0] = "r0";
$ret[2] = "e1";
next;
} else {
$reg = sprintf("r%d", $i);
$ret[$i] = $reg;
}
if($type eq "bool") {
$reg = "$reg != 0";
}
if($type eq "int64" && $_32bit ne "") {
# 64-bit number in r1:r0 or r0:r1.
if($i+2 > @out) {
print STDERR "$ARGV:$.: not enough registers for int64 return\n";
}
if($_32bit eq "big-endian") {
$reg = sprintf("int64(r%d)<<32 | int64(r%d)", $i, $i+1);
} else {
$reg = sprintf("int64(r%d)<<32 | int64(r%d)", $i+1, $i);
}
$ret[$i] = sprintf("r%d", $i);
$ret[$i+1] = sprintf("r%d", $i+1);
}
if($reg ne "e1" || $plan9) {
$body .= "\t$name = $type($reg)\n";
}
}
if ($ret[0] eq "_" && $ret[1] eq "_" && $ret[2] eq "_") {
$text .= "\t$call\n";
} else {
$text .= "\t$ret[0], $ret[1], $ret[2] := $call\n";
}
foreach my $use (@uses) {
$text .= "\t$use\n";
}
$text .= $body;
if ($plan9 && $ret[2] eq "e1") {
$text .= "\tif int32(r0) == -1 {\n";
$text .= "\t\terr = e1\n";
$text .= "\t}\n";
} elsif ($do_errno) {
$text .= "\tif e1 != 0 {\n";
$text .= "\t\terr = errnoErr(e1)\n";
$text .= "\t}\n";
}
$text .= "\treturn\n";
$text .= "}\n\n";
}
chomp $text;
chomp $text;
if($errors) {
exit 1;
}
print <<EOF;
// $cmdline
// MACHINE GENERATED BY THE COMMAND ABOVE; DO NOT EDIT
// +build $ENV{'GOARCH'},$ENV{'GOOS'}
package unix
import (
"syscall"
"unsafe"
)
var _ syscall.Errno
$text
EOF
exit 0;

@ -0,0 +1,294 @@
#!/usr/bin/env perl
# Copyright 2009 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
# This program reads a file containing function prototypes
# (like syscall_solaris.go) and generates system call bodies.
# The prototypes are marked by lines beginning with "//sys"
# and read like func declarations if //sys is replaced by func, but:
# * The parameter lists must give a name for each argument.
# This includes return parameters.
# * The parameter lists must give a type for each argument:
# the (x, y, z int) shorthand is not allowed.
# * If the return parameter is an error number, it must be named err.
# * If go func name needs to be different than its libc name,
# * or the function is not in libc, name could be specified
# * at the end, after "=" sign, like
# //sys getsockopt(s int, level int, name int, val uintptr, vallen *_Socklen) (err error) = libsocket.getsockopt
use strict;
my $cmdline = "mksyscall_solaris.pl " . join(' ', @ARGV);
my $errors = 0;
my $_32bit = "";
binmode STDOUT;
if($ARGV[0] eq "-b32") {
$_32bit = "big-endian";
shift;
} elsif($ARGV[0] eq "-l32") {
$_32bit = "little-endian";
shift;
}
if($ARGV[0] =~ /^-/) {
print STDERR "usage: mksyscall_solaris.pl [-b32 | -l32] [file ...]\n";
exit 1;
}
if($ENV{'GOARCH'} eq "" || $ENV{'GOOS'} eq "") {
print STDERR "GOARCH or GOOS not defined in environment\n";
exit 1;
}
sub parseparamlist($) {
my ($list) = @_;
$list =~ s/^\s*//;
$list =~ s/\s*$//;
if($list eq "") {
return ();
}
return split(/\s*,\s*/, $list);
}
sub parseparam($) {
my ($p) = @_;
if($p !~ /^(\S*) (\S*)$/) {
print STDERR "$ARGV:$.: malformed parameter: $p\n";
$errors = 1;
return ("xx", "int");
}
return ($1, $2);
}
my $package = "";
my $text = "";
my $dynimports = "";
my $linknames = "";
my @vars = ();
while(<>) {
chomp;
s/\s+/ /g;
s/^\s+//;
s/\s+$//;
$package = $1 if !$package && /^package (\S+)$/;
my $nonblock = /^\/\/sysnb /;
next if !/^\/\/sys / && !$nonblock;
# Line must be of the form
# func Open(path string, mode int, perm int) (fd int, err error)
# Split into name, in params, out params.
if(!/^\/\/sys(nb)? (\w+)\(([^()]*)\)\s*(?:\(([^()]+)\))?\s*(?:=\s*(?:(\w*)\.)?(\w*))?$/) {
print STDERR "$ARGV:$.: malformed //sys declaration\n";
$errors = 1;
next;
}
my ($nb, $func, $in, $out, $modname, $sysname) = ($1, $2, $3, $4, $5, $6);
# Split argument lists on comma.
my @in = parseparamlist($in);
my @out = parseparamlist($out);
# So file name.
if($modname eq "") {
$modname = "libc";
}
# System call name.
if($sysname eq "") {
$sysname = "$func";
}
# System call pointer variable name.
my $sysvarname = "proc$sysname";
my $strconvfunc = "BytePtrFromString";
my $strconvtype = "*byte";
$sysname =~ y/A-Z/a-z/; # All libc functions are lowercase.
# Runtime import of function to allow cross-platform builds.
$dynimports .= "//go:cgo_import_dynamic libc_${sysname} ${sysname} \"$modname.so\"\n";
# Link symbol to proc address variable.
$linknames .= "//go:linkname ${sysvarname} libc_${sysname}\n";
# Library proc address variable.
push @vars, $sysvarname;
# Go function header.
$out = join(', ', @out);
if($out ne "") {
$out = " ($out)";
}
if($text ne "") {
$text .= "\n"
}
$text .= sprintf "func %s(%s)%s {\n", $func, join(', ', @in), $out;
# Check if err return available
my $errvar = "";
foreach my $p (@out) {
my ($name, $type) = parseparam($p);
if($type eq "error") {
$errvar = $name;
last;
}
}
# Prepare arguments to Syscall.
my @args = ();
my @uses = ();
my $n = 0;
foreach my $p (@in) {
my ($name, $type) = parseparam($p);
if($type =~ /^\*/) {
push @args, "uintptr(unsafe.Pointer($name))";
} elsif($type eq "string" && $errvar ne "") {
$text .= "\tvar _p$n $strconvtype\n";
$text .= "\t_p$n, $errvar = $strconvfunc($name)\n";
$text .= "\tif $errvar != nil {\n\t\treturn\n\t}\n";
push @args, "uintptr(unsafe.Pointer(_p$n))";
push @uses, "use(unsafe.Pointer(_p$n))";
$n++;
} elsif($type eq "string") {
print STDERR "$ARGV:$.: $func uses string arguments, but has no error return\n";
$text .= "\tvar _p$n $strconvtype\n";
$text .= "\t_p$n, _ = $strconvfunc($name)\n";
push @args, "uintptr(unsafe.Pointer(_p$n))";
push @uses, "use(unsafe.Pointer(_p$n))";
$n++;
} elsif($type =~ /^\[\](.*)/) {
# Convert slice into pointer, length.
# Have to be careful not to take address of &a[0] if len == 0:
# pass nil in that case.
$text .= "\tvar _p$n *$1\n";
$text .= "\tif len($name) > 0 {\n\t\t_p$n = \&$name\[0]\n\t}\n";
push @args, "uintptr(unsafe.Pointer(_p$n))", "uintptr(len($name))";
$n++;
} elsif($type eq "int64" && $_32bit ne "") {
if($_32bit eq "big-endian") {
push @args, "uintptr($name >> 32)", "uintptr($name)";
} else {
push @args, "uintptr($name)", "uintptr($name >> 32)";
}
} elsif($type eq "bool") {
$text .= "\tvar _p$n uint32\n";
$text .= "\tif $name {\n\t\t_p$n = 1\n\t} else {\n\t\t_p$n = 0\n\t}\n";
push @args, "uintptr(_p$n)";
$n++;
} else {
push @args, "uintptr($name)";
}
}
my $nargs = @args;
# Determine which form to use; pad args with zeros.
my $asm = "sysvicall6";
if ($nonblock) {
$asm = "rawSysvicall6";
}
if(@args <= 6) {
while(@args < 6) {
push @args, "0";
}
} else {
print STDERR "$ARGV:$.: too many arguments to system call\n";
}
# Actual call.
my $args = join(', ', @args);
my $call = "$asm(uintptr(unsafe.Pointer(&$sysvarname)), $nargs, $args)";
# Assign return values.
my $body = "";
my $failexpr = "";
my @ret = ("_", "_", "_");
my @pout= ();
my $do_errno = 0;
for(my $i=0; $i<@out; $i++) {
my $p = $out[$i];
my ($name, $type) = parseparam($p);
my $reg = "";
if($name eq "err") {
$reg = "e1";
$ret[2] = $reg;
$do_errno = 1;
} else {
$reg = sprintf("r%d", $i);
$ret[$i] = $reg;
}
if($type eq "bool") {
$reg = "$reg != 0";
}
if($type eq "int64" && $_32bit ne "") {
# 64-bit number in r1:r0 or r0:r1.
if($i+2 > @out) {
print STDERR "$ARGV:$.: not enough registers for int64 return\n";
}
if($_32bit eq "big-endian") {
$reg = sprintf("int64(r%d)<<32 | int64(r%d)", $i, $i+1);
} else {
$reg = sprintf("int64(r%d)<<32 | int64(r%d)", $i+1, $i);
}
$ret[$i] = sprintf("r%d", $i);
$ret[$i+1] = sprintf("r%d", $i+1);
}
if($reg ne "e1") {
$body .= "\t$name = $type($reg)\n";
}
}
if ($ret[0] eq "_" && $ret[1] eq "_" && $ret[2] eq "_") {
$text .= "\t$call\n";
} else {
$text .= "\t$ret[0], $ret[1], $ret[2] := $call\n";
}
foreach my $use (@uses) {
$text .= "\t$use\n";
}
$text .= $body;
if ($do_errno) {
$text .= "\tif e1 != 0 {\n";
$text .= "\t\terr = e1\n";
$text .= "\t}\n";
}
$text .= "\treturn\n";
$text .= "}\n";
}
if($errors) {
exit 1;
}
print <<EOF;
// $cmdline
// MACHINE GENERATED BY THE COMMAND ABOVE; DO NOT EDIT
// +build $ENV{'GOARCH'},$ENV{'GOOS'}
package $package
import (
"syscall"
"unsafe"
)
EOF
print "import \"golang.org/x/sys/unix\"\n" if $package ne "unix";
my $vardecls = "\t" . join(",\n\t", @vars);
$vardecls .= " syscallFunc";
chomp($_=<<EOF);
$dynimports
$linknames
var (
$vardecls
)
$text
EOF
print $_;
exit 0;

@ -0,0 +1,264 @@
#!/usr/bin/env perl
# Copyright 2011 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
#
# Parse the header files for OpenBSD and generate a Go usable sysctl MIB.
#
# Build a MIB with each entry being an array containing the level, type and
# a hash that will contain additional entries if the current entry is a node.
# We then walk this MIB and create a flattened sysctl name to OID hash.
#
use strict;
if($ENV{'GOARCH'} eq "" || $ENV{'GOOS'} eq "") {
print STDERR "GOARCH or GOOS not defined in environment\n";
exit 1;
}
my $debug = 0;
my %ctls = ();
my @headers = qw (
sys/sysctl.h
sys/socket.h
sys/tty.h
sys/malloc.h
sys/mount.h
sys/namei.h
sys/sem.h
sys/shm.h
sys/vmmeter.h
uvm/uvm_param.h
uvm/uvm_swap_encrypt.h
ddb/db_var.h
net/if.h
net/if_pfsync.h
net/pipex.h
netinet/in.h
netinet/icmp_var.h
netinet/igmp_var.h
netinet/ip_ah.h
netinet/ip_carp.h
netinet/ip_divert.h
netinet/ip_esp.h
netinet/ip_ether.h
netinet/ip_gre.h
netinet/ip_ipcomp.h
netinet/ip_ipip.h
netinet/pim_var.h
netinet/tcp_var.h
netinet/udp_var.h
netinet6/in6.h
netinet6/ip6_divert.h
netinet6/pim6_var.h
netinet/icmp6.h
netmpls/mpls.h
);
my @ctls = qw (
kern
vm
fs
net
#debug # Special handling required
hw
#machdep # Arch specific
user
ddb
#vfs # Special handling required
fs.posix
kern.forkstat
kern.intrcnt
kern.malloc
kern.nchstats
kern.seminfo
kern.shminfo
kern.timecounter
kern.tty
kern.watchdog
net.bpf
net.ifq
net.inet
net.inet.ah
net.inet.carp
net.inet.divert
net.inet.esp
net.inet.etherip
net.inet.gre
net.inet.icmp
net.inet.igmp
net.inet.ip
net.inet.ip.ifq
net.inet.ipcomp
net.inet.ipip
net.inet.mobileip
net.inet.pfsync
net.inet.pim
net.inet.tcp
net.inet.udp
net.inet6
net.inet6.divert
net.inet6.ip6
net.inet6.icmp6
net.inet6.pim6
net.inet6.tcp6
net.inet6.udp6
net.mpls
net.mpls.ifq
net.key
net.pflow
net.pfsync
net.pipex
net.rt
vm.swapencrypt
#vfsgenctl # Special handling required
);
# Node name "fixups"
my %ctl_map = (
"ipproto" => "net.inet",
"net.inet.ipproto" => "net.inet",
"net.inet6.ipv6proto" => "net.inet6",
"net.inet6.ipv6" => "net.inet6.ip6",
"net.inet.icmpv6" => "net.inet6.icmp6",
"net.inet6.divert6" => "net.inet6.divert",
"net.inet6.tcp6" => "net.inet.tcp",
"net.inet6.udp6" => "net.inet.udp",
"mpls" => "net.mpls",
"swpenc" => "vm.swapencrypt"
);
# Node mappings
my %node_map = (
"net.inet.ip.ifq" => "net.ifq",
"net.inet.pfsync" => "net.pfsync",
"net.mpls.ifq" => "net.ifq"
);
my $ctlname;
my %mib = ();
my %sysctl = ();
my $node;
sub debug() {
print STDERR "$_[0]\n" if $debug;
}
# Walk the MIB and build a sysctl name to OID mapping.
sub build_sysctl() {
my ($node, $name, $oid) = @_;
my %node = %{$node};
my @oid = @{$oid};
foreach my $key (sort keys %node) {
my @node = @{$node{$key}};
my $nodename = $name.($name ne '' ? '.' : '').$key;
my @nodeoid = (@oid, $node[0]);
if ($node[1] eq 'CTLTYPE_NODE') {
if (exists $node_map{$nodename}) {
$node = \%mib;
$ctlname = $node_map{$nodename};
foreach my $part (split /\./, $ctlname) {
$node = \%{@{$$node{$part}}[2]};
}
} else {
$node = $node[2];
}
&build_sysctl($node, $nodename, \@nodeoid);
} elsif ($node[1] ne '') {
$sysctl{$nodename} = \@nodeoid;
}
}
}
foreach my $ctl (@ctls) {
$ctls{$ctl} = $ctl;
}
# Build MIB
foreach my $header (@headers) {
&debug("Processing $header...");
open HEADER, "/usr/include/$header" ||
print STDERR "Failed to open $header\n";
while (<HEADER>) {
if ($_ =~ /^#define\s+(CTL_NAMES)\s+{/ ||
$_ =~ /^#define\s+(CTL_(.*)_NAMES)\s+{/ ||
$_ =~ /^#define\s+((.*)CTL_NAMES)\s+{/) {
if ($1 eq 'CTL_NAMES') {
# Top level.
$node = \%mib;
} else {
# Node.
my $nodename = lc($2);
if ($header =~ /^netinet\//) {
$ctlname = "net.inet.$nodename";
} elsif ($header =~ /^netinet6\//) {
$ctlname = "net.inet6.$nodename";
} elsif ($header =~ /^net\//) {
$ctlname = "net.$nodename";
} else {
$ctlname = "$nodename";
$ctlname =~ s/^(fs|net|kern)_/$1\./;
}
if (exists $ctl_map{$ctlname}) {
$ctlname = $ctl_map{$ctlname};
}
if (not exists $ctls{$ctlname}) {
&debug("Ignoring $ctlname...");
next;
}
# Walk down from the top of the MIB.
$node = \%mib;
foreach my $part (split /\./, $ctlname) {
if (not exists $$node{$part}) {
&debug("Missing node $part");
$$node{$part} = [ 0, '', {} ];
}
$node = \%{@{$$node{$part}}[2]};
}
}
# Populate current node with entries.
my $i = -1;
while (defined($_) && $_ !~ /^}/) {
$_ = <HEADER>;
$i++ if $_ =~ /{.*}/;
next if $_ !~ /{\s+"(\w+)",\s+(CTLTYPE_[A-Z]+)\s+}/;
$$node{$1} = [ $i, $2, {} ];
}
}
}
close HEADER;
}
&build_sysctl(\%mib, "", []);
print <<EOF;
// mksysctl_openbsd.pl
// MACHINE GENERATED BY THE ABOVE COMMAND; DO NOT EDIT
// +build $ENV{'GOARCH'},$ENV{'GOOS'}
package unix;
type mibentry struct {
ctlname string
ctloid []_C_int
}
var sysctlMib = []mibentry {
EOF
foreach my $name (sort keys %sysctl) {
my @oid = @{$sysctl{$name}};
print "\t{ \"$name\", []_C_int{ ", join(', ', @oid), " } }, \n";
}
print <<EOF;
}
EOF

@ -0,0 +1,39 @@
#!/usr/bin/env perl
# Copyright 2009 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
#
# Generate system call table for Darwin from sys/syscall.h
use strict;
if($ENV{'GOARCH'} eq "" || $ENV{'GOOS'} eq "") {
print STDERR "GOARCH or GOOS not defined in environment\n";
exit 1;
}
my $command = "mksysnum_darwin.pl " . join(' ', @ARGV);
print <<EOF;
// $command
// MACHINE GENERATED BY THE ABOVE COMMAND; DO NOT EDIT
// +build $ENV{'GOARCH'},$ENV{'GOOS'}
package unix
const (
EOF
while(<>){
if(/^#define\s+SYS_(\w+)\s+([0-9]+)/){
my $name = $1;
my $num = $2;
$name =~ y/a-z/A-Z/;
print " SYS_$name = $num;"
}
}
print <<EOF;
)
EOF

@ -0,0 +1,50 @@
#!/usr/bin/env perl
# Copyright 2009 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
#
# Generate system call table for DragonFly from master list
# (for example, /usr/src/sys/kern/syscalls.master).
use strict;
if($ENV{'GOARCH'} eq "" || $ENV{'GOOS'} eq "") {
print STDERR "GOARCH or GOOS not defined in environment\n";
exit 1;
}
my $command = "mksysnum_dragonfly.pl " . join(' ', @ARGV);
print <<EOF;
// $command
// MACHINE GENERATED BY THE ABOVE COMMAND; DO NOT EDIT
// +build $ENV{'GOARCH'},$ENV{'GOOS'}
package unix
const (
EOF
while(<>){
if(/^([0-9]+)\s+STD\s+({ \S+\s+(\w+).*)$/){
my $num = $1;
my $proto = $2;
my $name = "SYS_$3";
$name =~ y/a-z/A-Z/;
# There are multiple entries for enosys and nosys, so comment them out.
if($name =~ /^SYS_E?NOSYS$/){
$name = "// $name";
}
if($name eq 'SYS_SYS_EXIT'){
$name = 'SYS_EXIT';
}
print " $name = $num; // $proto\n";
}
}
print <<EOF;
)
EOF

@ -0,0 +1,63 @@
#!/usr/bin/env perl
# Copyright 2009 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
#
# Generate system call table for FreeBSD from master list
# (for example, /usr/src/sys/kern/syscalls.master).
use strict;
if($ENV{'GOARCH'} eq "" || $ENV{'GOOS'} eq "") {
print STDERR "GOARCH or GOOS not defined in environment\n";
exit 1;
}
my $command = "mksysnum_freebsd.pl " . join(' ', @ARGV);
print <<EOF;
// $command
// MACHINE GENERATED BY THE ABOVE COMMAND; DO NOT EDIT
// +build $ENV{'GOARCH'},$ENV{'GOOS'}
package unix
const (
EOF
while(<>){
if(/^([0-9]+)\s+\S+\s+STD\s+({ \S+\s+(\w+).*)$/){
my $num = $1;
my $proto = $2;
my $name = "SYS_$3";
$name =~ y/a-z/A-Z/;
# There are multiple entries for enosys and nosys, so comment them out.
if($name =~ /^SYS_E?NOSYS$/){
$name = "// $name";
}
if($name eq 'SYS_SYS_EXIT'){
$name = 'SYS_EXIT';
}
if($name =~ /^SYS_CAP_+/ || $name =~ /^SYS___CAP_+/){
next
}
print " $name = $num; // $proto\n";
# We keep Capsicum syscall numbers for FreeBSD
# 9-STABLE here because we are not sure whether they
# are mature and stable.
if($num == 513){
print " SYS_CAP_NEW = 514 // { int cap_new(int fd, uint64_t rights); }\n";
print " SYS_CAP_GETRIGHTS = 515 // { int cap_getrights(int fd, \\\n";
print " SYS_CAP_ENTER = 516 // { int cap_enter(void); }\n";
print " SYS_CAP_GETMODE = 517 // { int cap_getmode(u_int *modep); }\n";
}
}
}
print <<EOF;
)
EOF

@ -0,0 +1,58 @@
#!/usr/bin/env perl
# Copyright 2009 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
use strict;
if($ENV{'GOARCH'} eq "" || $ENV{'GOOS'} eq "") {
print STDERR "GOARCH or GOOS not defined in environment\n";
exit 1;
}
my $command = "mksysnum_linux.pl ". join(' ', @ARGV);
print <<EOF;
// $command
// MACHINE GENERATED BY THE ABOVE COMMAND; DO NOT EDIT
// +build $ENV{'GOARCH'},$ENV{'GOOS'}
package unix
const(
EOF
sub fmt {
my ($name, $num) = @_;
if($num > 999){
# ignore deprecated syscalls that are no longer implemented
# https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/include/uapi/asm-generic/unistd.h?id=refs/heads/master#n716
return;
}
$name =~ y/a-z/A-Z/;
print " SYS_$name = $num;\n";
}
my $prev;
open(GCC, "gcc -E -dD $ARGV[0] |") || die "can't run gcc";
while(<GCC>){
if(/^#define __NR_syscalls\s+/) {
# ignore redefinitions of __NR_syscalls
}
elsif(/^#define __NR_(\w+)\s+([0-9]+)/){
$prev = $2;
fmt($1, $2);
}
elsif(/^#define __NR3264_(\w+)\s+([0-9]+)/){
$prev = $2;
fmt($1, $2);
}
elsif(/^#define __NR_(\w+)\s+\(\w+\+\s*([0-9]+)\)/){
fmt($1, $prev+$2)
}
}
print <<EOF;
)
EOF

@ -0,0 +1,58 @@
#!/usr/bin/env perl
# Copyright 2009 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
#
# Generate system call table for OpenBSD from master list
# (for example, /usr/src/sys/kern/syscalls.master).
use strict;
if($ENV{'GOARCH'} eq "" || $ENV{'GOOS'} eq "") {
print STDERR "GOARCH or GOOS not defined in environment\n";
exit 1;
}
my $command = "mksysnum_netbsd.pl " . join(' ', @ARGV);
print <<EOF;
// $command
// MACHINE GENERATED BY THE ABOVE COMMAND; DO NOT EDIT
// +build $ENV{'GOARCH'},$ENV{'GOOS'}
package unix
const (
EOF
my $line = '';
while(<>){
if($line =~ /^(.*)\\$/) {
# Handle continuation
$line = $1;
$_ =~ s/^\s+//;
$line .= $_;
} else {
# New line
$line = $_;
}
next if $line =~ /\\$/;
if($line =~ /^([0-9]+)\s+((STD)|(NOERR))\s+(RUMP\s+)?({\s+\S+\s*\*?\s*\|(\S+)\|(\S*)\|(\w+).*\s+})(\s+(\S+))?$/) {
my $num = $1;
my $proto = $6;
my $compat = $8;
my $name = "$7_$9";
$name = "$7_$11" if $11 ne '';
$name =~ y/a-z/A-Z/;
if($compat eq '' || $compat eq '30' || $compat eq '50') {
print " $name = $num; // $proto\n";
}
}
}
print <<EOF;
)
EOF

@ -0,0 +1,50 @@
#!/usr/bin/env perl
# Copyright 2009 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
#
# Generate system call table for OpenBSD from master list
# (for example, /usr/src/sys/kern/syscalls.master).
use strict;
if($ENV{'GOARCH'} eq "" || $ENV{'GOOS'} eq "") {
print STDERR "GOARCH or GOOS not defined in environment\n";
exit 1;
}
my $command = "mksysnum_openbsd.pl " . join(' ', @ARGV);
print <<EOF;
// $command
// MACHINE GENERATED BY THE ABOVE COMMAND; DO NOT EDIT
// +build $ENV{'GOARCH'},$ENV{'GOOS'}
package unix
const (
EOF
while(<>){
if(/^([0-9]+)\s+STD\s+(NOLOCK\s+)?({ \S+\s+\*?(\w+).*)$/){
my $num = $1;
my $proto = $3;
my $name = $4;
$name =~ y/a-z/A-Z/;
# There are multiple entries for enosys and nosys, so comment them out.
if($name =~ /^SYS_E?NOSYS$/){
$name = "// $name";
}
if($name eq 'SYS_SYS_EXIT'){
$name = 'SYS_EXIT';
}
print " $name = $num; // $proto\n";
}
}
print <<EOF;
)
EOF

@ -1,250 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
/*
Input to cgo -godefs. See also mkerrors.sh and mkall.sh
*/
// +godefs map struct_in_addr [4]byte /* in_addr */
// +godefs map struct_in6_addr [16]byte /* in6_addr */
package unix
/*
#define __DARWIN_UNIX03 0
#define KERNEL
#define _DARWIN_USE_64_BIT_INODE
#include <dirent.h>
#include <fcntl.h>
#include <signal.h>
#include <termios.h>
#include <unistd.h>
#include <mach/mach.h>
#include <mach/message.h>
#include <sys/event.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/param.h>
#include <sys/ptrace.h>
#include <sys/resource.h>
#include <sys/select.h>
#include <sys/signal.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/un.h>
#include <sys/wait.h>
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_var.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/icmp6.h>
#include <netinet/tcp.h>
enum {
sizeofPtr = sizeof(void*),
};
union sockaddr_all {
struct sockaddr s1; // this one gets used for fields
struct sockaddr_in s2; // these pad it out
struct sockaddr_in6 s3;
struct sockaddr_un s4;
struct sockaddr_dl s5;
};
struct sockaddr_any {
struct sockaddr addr;
char pad[sizeof(union sockaddr_all) - sizeof(struct sockaddr)];
};
*/
import "C"
// Machine characteristics; for internal use.
const (
sizeofPtr = C.sizeofPtr
sizeofShort = C.sizeof_short
sizeofInt = C.sizeof_int
sizeofLong = C.sizeof_long
sizeofLongLong = C.sizeof_longlong
)
// Basic types
type (
_C_short C.short
_C_int C.int
_C_long C.long
_C_long_long C.longlong
)
// Time
type Timespec C.struct_timespec
type Timeval C.struct_timeval
type Timeval32 C.struct_timeval32
// Processes
type Rusage C.struct_rusage
type Rlimit C.struct_rlimit
type _Gid_t C.gid_t
// Files
type Stat_t C.struct_stat64
type Statfs_t C.struct_statfs64
type Flock_t C.struct_flock
type Fstore_t C.struct_fstore
type Radvisory_t C.struct_radvisory
type Fbootstraptransfer_t C.struct_fbootstraptransfer
type Log2phys_t C.struct_log2phys
type Fsid C.struct_fsid
type Dirent C.struct_dirent
// Sockets
type RawSockaddrInet4 C.struct_sockaddr_in
type RawSockaddrInet6 C.struct_sockaddr_in6
type RawSockaddrUnix C.struct_sockaddr_un
type RawSockaddrDatalink C.struct_sockaddr_dl
type RawSockaddr C.struct_sockaddr
type RawSockaddrAny C.struct_sockaddr_any
type _Socklen C.socklen_t
type Linger C.struct_linger
type Iovec C.struct_iovec
type IPMreq C.struct_ip_mreq
type IPv6Mreq C.struct_ipv6_mreq
type Msghdr C.struct_msghdr
type Cmsghdr C.struct_cmsghdr
type Inet4Pktinfo C.struct_in_pktinfo
type Inet6Pktinfo C.struct_in6_pktinfo
type IPv6MTUInfo C.struct_ip6_mtuinfo
type ICMPv6Filter C.struct_icmp6_filter
const (
SizeofSockaddrInet4 = C.sizeof_struct_sockaddr_in
SizeofSockaddrInet6 = C.sizeof_struct_sockaddr_in6
SizeofSockaddrAny = C.sizeof_struct_sockaddr_any
SizeofSockaddrUnix = C.sizeof_struct_sockaddr_un
SizeofSockaddrDatalink = C.sizeof_struct_sockaddr_dl
SizeofLinger = C.sizeof_struct_linger
SizeofIPMreq = C.sizeof_struct_ip_mreq
SizeofIPv6Mreq = C.sizeof_struct_ipv6_mreq
SizeofMsghdr = C.sizeof_struct_msghdr
SizeofCmsghdr = C.sizeof_struct_cmsghdr
SizeofInet4Pktinfo = C.sizeof_struct_in_pktinfo
SizeofInet6Pktinfo = C.sizeof_struct_in6_pktinfo
SizeofIPv6MTUInfo = C.sizeof_struct_ip6_mtuinfo
SizeofICMPv6Filter = C.sizeof_struct_icmp6_filter
)
// Ptrace requests
const (
PTRACE_TRACEME = C.PT_TRACE_ME
PTRACE_CONT = C.PT_CONTINUE
PTRACE_KILL = C.PT_KILL
)
// Events (kqueue, kevent)
type Kevent_t C.struct_kevent
// Select
type FdSet C.fd_set
// Routing and interface messages
const (
SizeofIfMsghdr = C.sizeof_struct_if_msghdr
SizeofIfData = C.sizeof_struct_if_data
SizeofIfaMsghdr = C.sizeof_struct_ifa_msghdr
SizeofIfmaMsghdr = C.sizeof_struct_ifma_msghdr
SizeofIfmaMsghdr2 = C.sizeof_struct_ifma_msghdr2
SizeofRtMsghdr = C.sizeof_struct_rt_msghdr
SizeofRtMetrics = C.sizeof_struct_rt_metrics
)
type IfMsghdr C.struct_if_msghdr
type IfData C.struct_if_data
type IfaMsghdr C.struct_ifa_msghdr
type IfmaMsghdr C.struct_ifma_msghdr
type IfmaMsghdr2 C.struct_ifma_msghdr2
type RtMsghdr C.struct_rt_msghdr
type RtMetrics C.struct_rt_metrics
// Berkeley packet filter
const (
SizeofBpfVersion = C.sizeof_struct_bpf_version
SizeofBpfStat = C.sizeof_struct_bpf_stat
SizeofBpfProgram = C.sizeof_struct_bpf_program
SizeofBpfInsn = C.sizeof_struct_bpf_insn
SizeofBpfHdr = C.sizeof_struct_bpf_hdr
)
type BpfVersion C.struct_bpf_version
type BpfStat C.struct_bpf_stat
type BpfProgram C.struct_bpf_program
type BpfInsn C.struct_bpf_insn
type BpfHdr C.struct_bpf_hdr
// Terminal handling
type Termios C.struct_termios
// fchmodat-like syscalls.
const (
AT_FDCWD = C.AT_FDCWD
AT_SYMLINK_NOFOLLOW = C.AT_SYMLINK_NOFOLLOW
)

@ -1,242 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
/*
Input to cgo -godefs. See also mkerrors.sh and mkall.sh
*/
// +godefs map struct_in_addr [4]byte /* in_addr */
// +godefs map struct_in6_addr [16]byte /* in6_addr */
package unix
/*
#define KERNEL
#include <dirent.h>
#include <fcntl.h>
#include <signal.h>
#include <termios.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/event.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/param.h>
#include <sys/ptrace.h>
#include <sys/resource.h>
#include <sys/select.h>
#include <sys/signal.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/un.h>
#include <sys/wait.h>
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/icmp6.h>
#include <netinet/tcp.h>
enum {
sizeofPtr = sizeof(void*),
};
union sockaddr_all {
struct sockaddr s1; // this one gets used for fields
struct sockaddr_in s2; // these pad it out
struct sockaddr_in6 s3;
struct sockaddr_un s4;
struct sockaddr_dl s5;
};
struct sockaddr_any {
struct sockaddr addr;
char pad[sizeof(union sockaddr_all) - sizeof(struct sockaddr)];
};
*/
import "C"
// Machine characteristics; for internal use.
const (
sizeofPtr = C.sizeofPtr
sizeofShort = C.sizeof_short
sizeofInt = C.sizeof_int
sizeofLong = C.sizeof_long
sizeofLongLong = C.sizeof_longlong
)
// Basic types
type (
_C_short C.short
_C_int C.int
_C_long C.long
_C_long_long C.longlong
)
// Time
type Timespec C.struct_timespec
type Timeval C.struct_timeval
// Processes
type Rusage C.struct_rusage
type Rlimit C.struct_rlimit
type _Gid_t C.gid_t
// Files
const ( // Directory mode bits
S_IFMT = C.S_IFMT
S_IFIFO = C.S_IFIFO
S_IFCHR = C.S_IFCHR
S_IFDIR = C.S_IFDIR
S_IFBLK = C.S_IFBLK
S_IFREG = C.S_IFREG
S_IFLNK = C.S_IFLNK
S_IFSOCK = C.S_IFSOCK
S_ISUID = C.S_ISUID
S_ISGID = C.S_ISGID
S_ISVTX = C.S_ISVTX
S_IRUSR = C.S_IRUSR
S_IWUSR = C.S_IWUSR
S_IXUSR = C.S_IXUSR
)
type Stat_t C.struct_stat
type Statfs_t C.struct_statfs
type Flock_t C.struct_flock
type Dirent C.struct_dirent
type Fsid C.struct_fsid
// Sockets
type RawSockaddrInet4 C.struct_sockaddr_in
type RawSockaddrInet6 C.struct_sockaddr_in6
type RawSockaddrUnix C.struct_sockaddr_un
type RawSockaddrDatalink C.struct_sockaddr_dl
type RawSockaddr C.struct_sockaddr
type RawSockaddrAny C.struct_sockaddr_any
type _Socklen C.socklen_t
type Linger C.struct_linger
type Iovec C.struct_iovec
type IPMreq C.struct_ip_mreq
type IPv6Mreq C.struct_ipv6_mreq
type Msghdr C.struct_msghdr
type Cmsghdr C.struct_cmsghdr
type Inet6Pktinfo C.struct_in6_pktinfo
type IPv6MTUInfo C.struct_ip6_mtuinfo
type ICMPv6Filter C.struct_icmp6_filter
const (
SizeofSockaddrInet4 = C.sizeof_struct_sockaddr_in
SizeofSockaddrInet6 = C.sizeof_struct_sockaddr_in6
SizeofSockaddrAny = C.sizeof_struct_sockaddr_any
SizeofSockaddrUnix = C.sizeof_struct_sockaddr_un
SizeofSockaddrDatalink = C.sizeof_struct_sockaddr_dl
SizeofLinger = C.sizeof_struct_linger
SizeofIPMreq = C.sizeof_struct_ip_mreq
SizeofIPv6Mreq = C.sizeof_struct_ipv6_mreq
SizeofMsghdr = C.sizeof_struct_msghdr
SizeofCmsghdr = C.sizeof_struct_cmsghdr
SizeofInet6Pktinfo = C.sizeof_struct_in6_pktinfo
SizeofIPv6MTUInfo = C.sizeof_struct_ip6_mtuinfo
SizeofICMPv6Filter = C.sizeof_struct_icmp6_filter
)
// Ptrace requests
const (
PTRACE_TRACEME = C.PT_TRACE_ME
PTRACE_CONT = C.PT_CONTINUE
PTRACE_KILL = C.PT_KILL
)
// Events (kqueue, kevent)
type Kevent_t C.struct_kevent
// Select
type FdSet C.fd_set
// Routing and interface messages
const (
SizeofIfMsghdr = C.sizeof_struct_if_msghdr
SizeofIfData = C.sizeof_struct_if_data
SizeofIfaMsghdr = C.sizeof_struct_ifa_msghdr
SizeofIfmaMsghdr = C.sizeof_struct_ifma_msghdr
SizeofIfAnnounceMsghdr = C.sizeof_struct_if_announcemsghdr
SizeofRtMsghdr = C.sizeof_struct_rt_msghdr
SizeofRtMetrics = C.sizeof_struct_rt_metrics
)
type IfMsghdr C.struct_if_msghdr
type IfData C.struct_if_data
type IfaMsghdr C.struct_ifa_msghdr
type IfmaMsghdr C.struct_ifma_msghdr
type IfAnnounceMsghdr C.struct_if_announcemsghdr
type RtMsghdr C.struct_rt_msghdr
type RtMetrics C.struct_rt_metrics
// Berkeley packet filter
const (
SizeofBpfVersion = C.sizeof_struct_bpf_version
SizeofBpfStat = C.sizeof_struct_bpf_stat
SizeofBpfProgram = C.sizeof_struct_bpf_program
SizeofBpfInsn = C.sizeof_struct_bpf_insn
SizeofBpfHdr = C.sizeof_struct_bpf_hdr
)
type BpfVersion C.struct_bpf_version
type BpfStat C.struct_bpf_stat
type BpfProgram C.struct_bpf_program
type BpfInsn C.struct_bpf_insn
type BpfHdr C.struct_bpf_hdr
// Terminal handling
type Termios C.struct_termios

@ -1,353 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
/*
Input to cgo -godefs. See also mkerrors.sh and mkall.sh
*/
// +godefs map struct_in_addr [4]byte /* in_addr */
// +godefs map struct_in6_addr [16]byte /* in6_addr */
package unix
/*
#define KERNEL
#include <dirent.h>
#include <fcntl.h>
#include <signal.h>
#include <termios.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/event.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/param.h>
#include <sys/ptrace.h>
#include <sys/resource.h>
#include <sys/select.h>
#include <sys/signal.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/un.h>
#include <sys/wait.h>
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/icmp6.h>
#include <netinet/tcp.h>
enum {
sizeofPtr = sizeof(void*),
};
union sockaddr_all {
struct sockaddr s1; // this one gets used for fields
struct sockaddr_in s2; // these pad it out
struct sockaddr_in6 s3;
struct sockaddr_un s4;
struct sockaddr_dl s5;
};
struct sockaddr_any {
struct sockaddr addr;
char pad[sizeof(union sockaddr_all) - sizeof(struct sockaddr)];
};
// This structure is a duplicate of stat on FreeBSD 8-STABLE.
// See /usr/include/sys/stat.h.
struct stat8 {
#undef st_atimespec st_atim
#undef st_mtimespec st_mtim
#undef st_ctimespec st_ctim
#undef st_birthtimespec st_birthtim
__dev_t st_dev;
ino_t st_ino;
mode_t st_mode;
nlink_t st_nlink;
uid_t st_uid;
gid_t st_gid;
__dev_t st_rdev;
#if __BSD_VISIBLE
struct timespec st_atimespec;
struct timespec st_mtimespec;
struct timespec st_ctimespec;
#else
time_t st_atime;
long __st_atimensec;
time_t st_mtime;
long __st_mtimensec;
time_t st_ctime;
long __st_ctimensec;
#endif
off_t st_size;
blkcnt_t st_blocks;
blksize_t st_blksize;
fflags_t st_flags;
__uint32_t st_gen;
__int32_t st_lspare;
#if __BSD_VISIBLE
struct timespec st_birthtimespec;
unsigned int :(8 / 2) * (16 - (int)sizeof(struct timespec));
unsigned int :(8 / 2) * (16 - (int)sizeof(struct timespec));
#else
time_t st_birthtime;
long st_birthtimensec;
unsigned int :(8 / 2) * (16 - (int)sizeof(struct __timespec));
unsigned int :(8 / 2) * (16 - (int)sizeof(struct __timespec));
#endif
};
// This structure is a duplicate of if_data on FreeBSD 8-STABLE.
// See /usr/include/net/if.h.
struct if_data8 {
u_char ifi_type;
u_char ifi_physical;
u_char ifi_addrlen;
u_char ifi_hdrlen;
u_char ifi_link_state;
u_char ifi_spare_char1;
u_char ifi_spare_char2;
u_char ifi_datalen;
u_long ifi_mtu;
u_long ifi_metric;
u_long ifi_baudrate;
u_long ifi_ipackets;
u_long ifi_ierrors;
u_long ifi_opackets;
u_long ifi_oerrors;
u_long ifi_collisions;
u_long ifi_ibytes;
u_long ifi_obytes;
u_long ifi_imcasts;
u_long ifi_omcasts;
u_long ifi_iqdrops;
u_long ifi_noproto;
u_long ifi_hwassist;
time_t ifi_epoch;
struct timeval ifi_lastchange;
};
// This structure is a duplicate of if_msghdr on FreeBSD 8-STABLE.
// See /usr/include/net/if.h.
struct if_msghdr8 {
u_short ifm_msglen;
u_char ifm_version;
u_char ifm_type;
int ifm_addrs;
int ifm_flags;
u_short ifm_index;
struct if_data8 ifm_data;
};
*/
import "C"
// Machine characteristics; for internal use.
const (
sizeofPtr = C.sizeofPtr
sizeofShort = C.sizeof_short
sizeofInt = C.sizeof_int
sizeofLong = C.sizeof_long
sizeofLongLong = C.sizeof_longlong
)
// Basic types
type (
_C_short C.short
_C_int C.int
_C_long C.long
_C_long_long C.longlong
)
// Time
type Timespec C.struct_timespec
type Timeval C.struct_timeval
// Processes
type Rusage C.struct_rusage
type Rlimit C.struct_rlimit
type _Gid_t C.gid_t
// Files
const ( // Directory mode bits
S_IFMT = C.S_IFMT
S_IFIFO = C.S_IFIFO
S_IFCHR = C.S_IFCHR
S_IFDIR = C.S_IFDIR
S_IFBLK = C.S_IFBLK
S_IFREG = C.S_IFREG
S_IFLNK = C.S_IFLNK
S_IFSOCK = C.S_IFSOCK
S_ISUID = C.S_ISUID
S_ISGID = C.S_ISGID
S_ISVTX = C.S_ISVTX
S_IRUSR = C.S_IRUSR
S_IWUSR = C.S_IWUSR
S_IXUSR = C.S_IXUSR
)
type Stat_t C.struct_stat8
type Statfs_t C.struct_statfs
type Flock_t C.struct_flock
type Dirent C.struct_dirent
type Fsid C.struct_fsid
// Advice to Fadvise
const (
FADV_NORMAL = C.POSIX_FADV_NORMAL
FADV_RANDOM = C.POSIX_FADV_RANDOM
FADV_SEQUENTIAL = C.POSIX_FADV_SEQUENTIAL
FADV_WILLNEED = C.POSIX_FADV_WILLNEED
FADV_DONTNEED = C.POSIX_FADV_DONTNEED
FADV_NOREUSE = C.POSIX_FADV_NOREUSE
)
// Sockets
type RawSockaddrInet4 C.struct_sockaddr_in
type RawSockaddrInet6 C.struct_sockaddr_in6
type RawSockaddrUnix C.struct_sockaddr_un
type RawSockaddrDatalink C.struct_sockaddr_dl
type RawSockaddr C.struct_sockaddr
type RawSockaddrAny C.struct_sockaddr_any
type _Socklen C.socklen_t
type Linger C.struct_linger
type Iovec C.struct_iovec
type IPMreq C.struct_ip_mreq
type IPMreqn C.struct_ip_mreqn
type IPv6Mreq C.struct_ipv6_mreq
type Msghdr C.struct_msghdr
type Cmsghdr C.struct_cmsghdr
type Inet6Pktinfo C.struct_in6_pktinfo
type IPv6MTUInfo C.struct_ip6_mtuinfo
type ICMPv6Filter C.struct_icmp6_filter
const (
SizeofSockaddrInet4 = C.sizeof_struct_sockaddr_in
SizeofSockaddrInet6 = C.sizeof_struct_sockaddr_in6
SizeofSockaddrAny = C.sizeof_struct_sockaddr_any
SizeofSockaddrUnix = C.sizeof_struct_sockaddr_un
SizeofSockaddrDatalink = C.sizeof_struct_sockaddr_dl
SizeofLinger = C.sizeof_struct_linger
SizeofIPMreq = C.sizeof_struct_ip_mreq
SizeofIPMreqn = C.sizeof_struct_ip_mreqn
SizeofIPv6Mreq = C.sizeof_struct_ipv6_mreq
SizeofMsghdr = C.sizeof_struct_msghdr
SizeofCmsghdr = C.sizeof_struct_cmsghdr
SizeofInet6Pktinfo = C.sizeof_struct_in6_pktinfo
SizeofIPv6MTUInfo = C.sizeof_struct_ip6_mtuinfo
SizeofICMPv6Filter = C.sizeof_struct_icmp6_filter
)
// Ptrace requests
const (
PTRACE_TRACEME = C.PT_TRACE_ME
PTRACE_CONT = C.PT_CONTINUE
PTRACE_KILL = C.PT_KILL
)
// Events (kqueue, kevent)
type Kevent_t C.struct_kevent
// Select
type FdSet C.fd_set
// Routing and interface messages
const (
sizeofIfMsghdr = C.sizeof_struct_if_msghdr
SizeofIfMsghdr = C.sizeof_struct_if_msghdr8
sizeofIfData = C.sizeof_struct_if_data
SizeofIfData = C.sizeof_struct_if_data8
SizeofIfaMsghdr = C.sizeof_struct_ifa_msghdr
SizeofIfmaMsghdr = C.sizeof_struct_ifma_msghdr
SizeofIfAnnounceMsghdr = C.sizeof_struct_if_announcemsghdr
SizeofRtMsghdr = C.sizeof_struct_rt_msghdr
SizeofRtMetrics = C.sizeof_struct_rt_metrics
)
type ifMsghdr C.struct_if_msghdr
type IfMsghdr C.struct_if_msghdr8
type ifData C.struct_if_data
type IfData C.struct_if_data8
type IfaMsghdr C.struct_ifa_msghdr
type IfmaMsghdr C.struct_ifma_msghdr
type IfAnnounceMsghdr C.struct_if_announcemsghdr
type RtMsghdr C.struct_rt_msghdr
type RtMetrics C.struct_rt_metrics
// Berkeley packet filter
const (
SizeofBpfVersion = C.sizeof_struct_bpf_version
SizeofBpfStat = C.sizeof_struct_bpf_stat
SizeofBpfZbuf = C.sizeof_struct_bpf_zbuf
SizeofBpfProgram = C.sizeof_struct_bpf_program
SizeofBpfInsn = C.sizeof_struct_bpf_insn
SizeofBpfHdr = C.sizeof_struct_bpf_hdr
SizeofBpfZbufHeader = C.sizeof_struct_bpf_zbuf_header
)
type BpfVersion C.struct_bpf_version
type BpfStat C.struct_bpf_stat
type BpfZbuf C.struct_bpf_zbuf
type BpfProgram C.struct_bpf_program
type BpfInsn C.struct_bpf_insn
type BpfHdr C.struct_bpf_hdr
type BpfZbufHeader C.struct_bpf_zbuf_header
// Terminal handling
type Termios C.struct_termios

@ -1,450 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
/*
Input to cgo -godefs. See also mkerrors.sh and mkall.sh
*/
// +godefs map struct_in_addr [4]byte /* in_addr */
// +godefs map struct_in6_addr [16]byte /* in6_addr */
package unix
/*
#define _LARGEFILE_SOURCE
#define _LARGEFILE64_SOURCE
#define _FILE_OFFSET_BITS 64
#define _GNU_SOURCE
#include <dirent.h>
#include <fcntl.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <netpacket/packet.h>
#include <poll.h>
#include <signal.h>
#include <stdio.h>
#include <sys/epoll.h>
#include <sys/inotify.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/param.h>
#include <sys/ptrace.h>
#include <sys/resource.h>
#include <sys/select.h>
#include <sys/signal.h>
#include <sys/stat.h>
#include <sys/statfs.h>
#include <sys/sysinfo.h>
#include <sys/time.h>
#include <sys/times.h>
#include <sys/timex.h>
#include <sys/types.h>
#include <sys/un.h>
#include <sys/user.h>
#include <sys/utsname.h>
#include <sys/wait.h>
#include <linux/filter.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <linux/icmpv6.h>
#include <asm/termbits.h>
#include <time.h>
#include <unistd.h>
#include <ustat.h>
#include <utime.h>
#include <bluetooth/bluetooth.h>
#include <bluetooth/hci.h>
#ifdef TCSETS2
// On systems that have "struct termios2" use this as type Termios.
typedef struct termios2 termios_t;
#else
typedef struct termios termios_t;
#endif
enum {
sizeofPtr = sizeof(void*),
};
union sockaddr_all {
struct sockaddr s1; // this one gets used for fields
struct sockaddr_in s2; // these pad it out
struct sockaddr_in6 s3;
struct sockaddr_un s4;
struct sockaddr_ll s5;
struct sockaddr_nl s6;
};
struct sockaddr_any {
struct sockaddr addr;
char pad[sizeof(union sockaddr_all) - sizeof(struct sockaddr)];
};
// copied from /usr/include/linux/un.h
struct my_sockaddr_un {
sa_family_t sun_family;
#if defined(__ARM_EABI__) || defined(__powerpc64__)
// on ARM char is by default unsigned
signed char sun_path[108];
#else
char sun_path[108];
#endif
};
#ifdef __ARM_EABI__
typedef struct user_regs PtraceRegs;
#elif defined(__aarch64__)
typedef struct user_pt_regs PtraceRegs;
#elif defined(__powerpc64__)
typedef struct pt_regs PtraceRegs;
#elif defined(__mips__)
typedef struct user PtraceRegs;
#elif defined(__s390x__)
typedef struct _user_regs_struct PtraceRegs;
#else
typedef struct user_regs_struct PtraceRegs;
#endif
#if defined(__s390x__)
typedef struct _user_psw_struct ptracePsw;
typedef struct _user_fpregs_struct ptraceFpregs;
typedef struct _user_per_struct ptracePer;
#else
typedef struct {} ptracePsw;
typedef struct {} ptraceFpregs;
typedef struct {} ptracePer;
#endif
// The real epoll_event is a union, and godefs doesn't handle it well.
struct my_epoll_event {
uint32_t events;
#if defined(__ARM_EABI__) || defined(__aarch64__)
// padding is not specified in linux/eventpoll.h but added to conform to the
// alignment requirements of EABI
int32_t padFd;
#elif defined(__powerpc64__) || defined(__s390x__)
int32_t _padFd;
#endif
int32_t fd;
int32_t pad;
};
*/
import "C"
// Machine characteristics; for internal use.
const (
sizeofPtr = C.sizeofPtr
sizeofShort = C.sizeof_short
sizeofInt = C.sizeof_int
sizeofLong = C.sizeof_long
sizeofLongLong = C.sizeof_longlong
PathMax = C.PATH_MAX
)
// Basic types
type (
_C_short C.short
_C_int C.int
_C_long C.long
_C_long_long C.longlong
)
// Time
type Timespec C.struct_timespec
type Timeval C.struct_timeval
type Timex C.struct_timex
type Time_t C.time_t
type Tms C.struct_tms
type Utimbuf C.struct_utimbuf
// Processes
type Rusage C.struct_rusage
type Rlimit C.struct_rlimit
type _Gid_t C.gid_t
// Files
type Stat_t C.struct_stat
type Statfs_t C.struct_statfs
type Dirent C.struct_dirent
type Fsid C.fsid_t
type Flock_t C.struct_flock
// Advice to Fadvise
const (
FADV_NORMAL = C.POSIX_FADV_NORMAL
FADV_RANDOM = C.POSIX_FADV_RANDOM
FADV_SEQUENTIAL = C.POSIX_FADV_SEQUENTIAL
FADV_WILLNEED = C.POSIX_FADV_WILLNEED
FADV_DONTNEED = C.POSIX_FADV_DONTNEED
FADV_NOREUSE = C.POSIX_FADV_NOREUSE
)
// Sockets
type RawSockaddrInet4 C.struct_sockaddr_in
type RawSockaddrInet6 C.struct_sockaddr_in6
type RawSockaddrUnix C.struct_my_sockaddr_un
type RawSockaddrLinklayer C.struct_sockaddr_ll
type RawSockaddrNetlink C.struct_sockaddr_nl
type RawSockaddrHCI C.struct_sockaddr_hci
type RawSockaddr C.struct_sockaddr
type RawSockaddrAny C.struct_sockaddr_any
type _Socklen C.socklen_t
type Linger C.struct_linger
type Iovec C.struct_iovec
type IPMreq C.struct_ip_mreq
type IPMreqn C.struct_ip_mreqn
type IPv6Mreq C.struct_ipv6_mreq
type Msghdr C.struct_msghdr
type Cmsghdr C.struct_cmsghdr
type Inet4Pktinfo C.struct_in_pktinfo
type Inet6Pktinfo C.struct_in6_pktinfo
type IPv6MTUInfo C.struct_ip6_mtuinfo
type ICMPv6Filter C.struct_icmp6_filter
type Ucred C.struct_ucred
type TCPInfo C.struct_tcp_info
const (
SizeofSockaddrInet4 = C.sizeof_struct_sockaddr_in
SizeofSockaddrInet6 = C.sizeof_struct_sockaddr_in6
SizeofSockaddrAny = C.sizeof_struct_sockaddr_any
SizeofSockaddrUnix = C.sizeof_struct_sockaddr_un
SizeofSockaddrLinklayer = C.sizeof_struct_sockaddr_ll
SizeofSockaddrNetlink = C.sizeof_struct_sockaddr_nl
SizeofSockaddrHCI = C.sizeof_struct_sockaddr_hci
SizeofLinger = C.sizeof_struct_linger
SizeofIPMreq = C.sizeof_struct_ip_mreq
SizeofIPMreqn = C.sizeof_struct_ip_mreqn
SizeofIPv6Mreq = C.sizeof_struct_ipv6_mreq
SizeofMsghdr = C.sizeof_struct_msghdr
SizeofCmsghdr = C.sizeof_struct_cmsghdr
SizeofInet4Pktinfo = C.sizeof_struct_in_pktinfo
SizeofInet6Pktinfo = C.sizeof_struct_in6_pktinfo
SizeofIPv6MTUInfo = C.sizeof_struct_ip6_mtuinfo
SizeofICMPv6Filter = C.sizeof_struct_icmp6_filter
SizeofUcred = C.sizeof_struct_ucred
SizeofTCPInfo = C.sizeof_struct_tcp_info
)
// Netlink routing and interface messages
const (
IFA_UNSPEC = C.IFA_UNSPEC
IFA_ADDRESS = C.IFA_ADDRESS
IFA_LOCAL = C.IFA_LOCAL
IFA_LABEL = C.IFA_LABEL
IFA_BROADCAST = C.IFA_BROADCAST
IFA_ANYCAST = C.IFA_ANYCAST
IFA_CACHEINFO = C.IFA_CACHEINFO
IFA_MULTICAST = C.IFA_MULTICAST
IFLA_UNSPEC = C.IFLA_UNSPEC
IFLA_ADDRESS = C.IFLA_ADDRESS
IFLA_BROADCAST = C.IFLA_BROADCAST
IFLA_IFNAME = C.IFLA_IFNAME
IFLA_MTU = C.IFLA_MTU
IFLA_LINK = C.IFLA_LINK
IFLA_QDISC = C.IFLA_QDISC
IFLA_STATS = C.IFLA_STATS
IFLA_COST = C.IFLA_COST
IFLA_PRIORITY = C.IFLA_PRIORITY
IFLA_MASTER = C.IFLA_MASTER
IFLA_WIRELESS = C.IFLA_WIRELESS
IFLA_PROTINFO = C.IFLA_PROTINFO
IFLA_TXQLEN = C.IFLA_TXQLEN
IFLA_MAP = C.IFLA_MAP
IFLA_WEIGHT = C.IFLA_WEIGHT
IFLA_OPERSTATE = C.IFLA_OPERSTATE
IFLA_LINKMODE = C.IFLA_LINKMODE
IFLA_LINKINFO = C.IFLA_LINKINFO
IFLA_NET_NS_PID = C.IFLA_NET_NS_PID
IFLA_IFALIAS = C.IFLA_IFALIAS
IFLA_MAX = C.IFLA_MAX
RT_SCOPE_UNIVERSE = C.RT_SCOPE_UNIVERSE
RT_SCOPE_SITE = C.RT_SCOPE_SITE
RT_SCOPE_LINK = C.RT_SCOPE_LINK
RT_SCOPE_HOST = C.RT_SCOPE_HOST
RT_SCOPE_NOWHERE = C.RT_SCOPE_NOWHERE
RT_TABLE_UNSPEC = C.RT_TABLE_UNSPEC
RT_TABLE_COMPAT = C.RT_TABLE_COMPAT
RT_TABLE_DEFAULT = C.RT_TABLE_DEFAULT
RT_TABLE_MAIN = C.RT_TABLE_MAIN
RT_TABLE_LOCAL = C.RT_TABLE_LOCAL
RT_TABLE_MAX = C.RT_TABLE_MAX
RTA_UNSPEC = C.RTA_UNSPEC
RTA_DST = C.RTA_DST
RTA_SRC = C.RTA_SRC
RTA_IIF = C.RTA_IIF
RTA_OIF = C.RTA_OIF
RTA_GATEWAY = C.RTA_GATEWAY
RTA_PRIORITY = C.RTA_PRIORITY
RTA_PREFSRC = C.RTA_PREFSRC
RTA_METRICS = C.RTA_METRICS
RTA_MULTIPATH = C.RTA_MULTIPATH
RTA_FLOW = C.RTA_FLOW
RTA_CACHEINFO = C.RTA_CACHEINFO
RTA_TABLE = C.RTA_TABLE
RTN_UNSPEC = C.RTN_UNSPEC
RTN_UNICAST = C.RTN_UNICAST
RTN_LOCAL = C.RTN_LOCAL
RTN_BROADCAST = C.RTN_BROADCAST
RTN_ANYCAST = C.RTN_ANYCAST
RTN_MULTICAST = C.RTN_MULTICAST
RTN_BLACKHOLE = C.RTN_BLACKHOLE
RTN_UNREACHABLE = C.RTN_UNREACHABLE
RTN_PROHIBIT = C.RTN_PROHIBIT
RTN_THROW = C.RTN_THROW
RTN_NAT = C.RTN_NAT
RTN_XRESOLVE = C.RTN_XRESOLVE
RTNLGRP_NONE = C.RTNLGRP_NONE
RTNLGRP_LINK = C.RTNLGRP_LINK
RTNLGRP_NOTIFY = C.RTNLGRP_NOTIFY
RTNLGRP_NEIGH = C.RTNLGRP_NEIGH
RTNLGRP_TC = C.RTNLGRP_TC
RTNLGRP_IPV4_IFADDR = C.RTNLGRP_IPV4_IFADDR
RTNLGRP_IPV4_MROUTE = C.RTNLGRP_IPV4_MROUTE
RTNLGRP_IPV4_ROUTE = C.RTNLGRP_IPV4_ROUTE
RTNLGRP_IPV4_RULE = C.RTNLGRP_IPV4_RULE
RTNLGRP_IPV6_IFADDR = C.RTNLGRP_IPV6_IFADDR
RTNLGRP_IPV6_MROUTE = C.RTNLGRP_IPV6_MROUTE
RTNLGRP_IPV6_ROUTE = C.RTNLGRP_IPV6_ROUTE
RTNLGRP_IPV6_IFINFO = C.RTNLGRP_IPV6_IFINFO
RTNLGRP_IPV6_PREFIX = C.RTNLGRP_IPV6_PREFIX
RTNLGRP_IPV6_RULE = C.RTNLGRP_IPV6_RULE
RTNLGRP_ND_USEROPT = C.RTNLGRP_ND_USEROPT
SizeofNlMsghdr = C.sizeof_struct_nlmsghdr
SizeofNlMsgerr = C.sizeof_struct_nlmsgerr
SizeofRtGenmsg = C.sizeof_struct_rtgenmsg
SizeofNlAttr = C.sizeof_struct_nlattr
SizeofRtAttr = C.sizeof_struct_rtattr
SizeofIfInfomsg = C.sizeof_struct_ifinfomsg
SizeofIfAddrmsg = C.sizeof_struct_ifaddrmsg
SizeofRtMsg = C.sizeof_struct_rtmsg
SizeofRtNexthop = C.sizeof_struct_rtnexthop
)
type NlMsghdr C.struct_nlmsghdr
type NlMsgerr C.struct_nlmsgerr
type RtGenmsg C.struct_rtgenmsg
type NlAttr C.struct_nlattr
type RtAttr C.struct_rtattr
type IfInfomsg C.struct_ifinfomsg
type IfAddrmsg C.struct_ifaddrmsg
type RtMsg C.struct_rtmsg
type RtNexthop C.struct_rtnexthop
// Linux socket filter
const (
SizeofSockFilter = C.sizeof_struct_sock_filter
SizeofSockFprog = C.sizeof_struct_sock_fprog
)
type SockFilter C.struct_sock_filter
type SockFprog C.struct_sock_fprog
// Inotify
type InotifyEvent C.struct_inotify_event
const SizeofInotifyEvent = C.sizeof_struct_inotify_event
// Ptrace
// Register structures
type PtraceRegs C.PtraceRegs
// Structures contained in PtraceRegs on s390x (exported by mkpost.go)
type ptracePsw C.ptracePsw
type ptraceFpregs C.ptraceFpregs
type ptracePer C.ptracePer
// Misc
type FdSet C.fd_set
type Sysinfo_t C.struct_sysinfo
type Utsname C.struct_utsname
type Ustat_t C.struct_ustat
type EpollEvent C.struct_my_epoll_event
const (
AT_FDCWD = C.AT_FDCWD
AT_REMOVEDIR = C.AT_REMOVEDIR
AT_SYMLINK_FOLLOW = C.AT_SYMLINK_FOLLOW
AT_SYMLINK_NOFOLLOW = C.AT_SYMLINK_NOFOLLOW
)
type PollFd C.struct_pollfd
const (
POLLIN = C.POLLIN
POLLPRI = C.POLLPRI
POLLOUT = C.POLLOUT
POLLRDHUP = C.POLLRDHUP
POLLERR = C.POLLERR
POLLHUP = C.POLLHUP
POLLNVAL = C.POLLNVAL
)
type Sigset_t C.sigset_t
// Terminal handling
type Termios C.termios_t

@ -1,232 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
/*
Input to cgo -godefs. See also mkerrors.sh and mkall.sh
*/
// +godefs map struct_in_addr [4]byte /* in_addr */
// +godefs map struct_in6_addr [16]byte /* in6_addr */
package unix
/*
#define KERNEL
#include <dirent.h>
#include <fcntl.h>
#include <signal.h>
#include <termios.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/event.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/ptrace.h>
#include <sys/resource.h>
#include <sys/select.h>
#include <sys/signal.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/time.h>
#include <sys/uio.h>
#include <sys/un.h>
#include <sys/wait.h>
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/icmp6.h>
#include <netinet/tcp.h>
enum {
sizeofPtr = sizeof(void*),
};
union sockaddr_all {
struct sockaddr s1; // this one gets used for fields
struct sockaddr_in s2; // these pad it out
struct sockaddr_in6 s3;
struct sockaddr_un s4;
struct sockaddr_dl s5;
};
struct sockaddr_any {
struct sockaddr addr;
char pad[sizeof(union sockaddr_all) - sizeof(struct sockaddr)];
};
*/
import "C"
// Machine characteristics; for internal use.
const (
sizeofPtr = C.sizeofPtr
sizeofShort = C.sizeof_short
sizeofInt = C.sizeof_int
sizeofLong = C.sizeof_long
sizeofLongLong = C.sizeof_longlong
)
// Basic types
type (
_C_short C.short
_C_int C.int
_C_long C.long
_C_long_long C.longlong
)
// Time
type Timespec C.struct_timespec
type Timeval C.struct_timeval
// Processes
type Rusage C.struct_rusage
type Rlimit C.struct_rlimit
type _Gid_t C.gid_t
// Files
type Stat_t C.struct_stat
type Statfs_t C.struct_statfs
type Flock_t C.struct_flock
type Dirent C.struct_dirent
type Fsid C.fsid_t
// Sockets
type RawSockaddrInet4 C.struct_sockaddr_in
type RawSockaddrInet6 C.struct_sockaddr_in6
type RawSockaddrUnix C.struct_sockaddr_un
type RawSockaddrDatalink C.struct_sockaddr_dl
type RawSockaddr C.struct_sockaddr
type RawSockaddrAny C.struct_sockaddr_any
type _Socklen C.socklen_t
type Linger C.struct_linger
type Iovec C.struct_iovec
type IPMreq C.struct_ip_mreq
type IPv6Mreq C.struct_ipv6_mreq
type Msghdr C.struct_msghdr
type Cmsghdr C.struct_cmsghdr
type Inet6Pktinfo C.struct_in6_pktinfo
type IPv6MTUInfo C.struct_ip6_mtuinfo
type ICMPv6Filter C.struct_icmp6_filter
const (
SizeofSockaddrInet4 = C.sizeof_struct_sockaddr_in
SizeofSockaddrInet6 = C.sizeof_struct_sockaddr_in6
SizeofSockaddrAny = C.sizeof_struct_sockaddr_any
SizeofSockaddrUnix = C.sizeof_struct_sockaddr_un
SizeofSockaddrDatalink = C.sizeof_struct_sockaddr_dl
SizeofLinger = C.sizeof_struct_linger
SizeofIPMreq = C.sizeof_struct_ip_mreq
SizeofIPv6Mreq = C.sizeof_struct_ipv6_mreq
SizeofMsghdr = C.sizeof_struct_msghdr
SizeofCmsghdr = C.sizeof_struct_cmsghdr
SizeofInet6Pktinfo = C.sizeof_struct_in6_pktinfo
SizeofIPv6MTUInfo = C.sizeof_struct_ip6_mtuinfo
SizeofICMPv6Filter = C.sizeof_struct_icmp6_filter
)
// Ptrace requests
const (
PTRACE_TRACEME = C.PT_TRACE_ME
PTRACE_CONT = C.PT_CONTINUE
PTRACE_KILL = C.PT_KILL
)
// Events (kqueue, kevent)
type Kevent_t C.struct_kevent
// Select
type FdSet C.fd_set
// Routing and interface messages
const (
SizeofIfMsghdr = C.sizeof_struct_if_msghdr
SizeofIfData = C.sizeof_struct_if_data
SizeofIfaMsghdr = C.sizeof_struct_ifa_msghdr
SizeofIfAnnounceMsghdr = C.sizeof_struct_if_announcemsghdr
SizeofRtMsghdr = C.sizeof_struct_rt_msghdr
SizeofRtMetrics = C.sizeof_struct_rt_metrics
)
type IfMsghdr C.struct_if_msghdr
type IfData C.struct_if_data
type IfaMsghdr C.struct_ifa_msghdr
type IfAnnounceMsghdr C.struct_if_announcemsghdr
type RtMsghdr C.struct_rt_msghdr
type RtMetrics C.struct_rt_metrics
type Mclpool C.struct_mclpool
// Berkeley packet filter
const (
SizeofBpfVersion = C.sizeof_struct_bpf_version
SizeofBpfStat = C.sizeof_struct_bpf_stat
SizeofBpfProgram = C.sizeof_struct_bpf_program
SizeofBpfInsn = C.sizeof_struct_bpf_insn
SizeofBpfHdr = C.sizeof_struct_bpf_hdr
)
type BpfVersion C.struct_bpf_version
type BpfStat C.struct_bpf_stat
type BpfProgram C.struct_bpf_program
type BpfInsn C.struct_bpf_insn
type BpfHdr C.struct_bpf_hdr
type BpfTimeval C.struct_bpf_timeval
// Terminal handling
type Termios C.struct_termios
// Sysctl
type Sysctlnode C.struct_sysctlnode

@ -1,244 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
/*
Input to cgo -godefs. See also mkerrors.sh and mkall.sh
*/
// +godefs map struct_in_addr [4]byte /* in_addr */
// +godefs map struct_in6_addr [16]byte /* in6_addr */
package unix
/*
#define KERNEL
#include <dirent.h>
#include <fcntl.h>
#include <signal.h>
#include <termios.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/event.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/ptrace.h>
#include <sys/resource.h>
#include <sys/select.h>
#include <sys/signal.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/uio.h>
#include <sys/un.h>
#include <sys/wait.h>
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/icmp6.h>
#include <netinet/tcp.h>
enum {
sizeofPtr = sizeof(void*),
};
union sockaddr_all {
struct sockaddr s1; // this one gets used for fields
struct sockaddr_in s2; // these pad it out
struct sockaddr_in6 s3;
struct sockaddr_un s4;
struct sockaddr_dl s5;
};
struct sockaddr_any {
struct sockaddr addr;
char pad[sizeof(union sockaddr_all) - sizeof(struct sockaddr)];
};
*/
import "C"
// Machine characteristics; for internal use.
const (
sizeofPtr = C.sizeofPtr
sizeofShort = C.sizeof_short
sizeofInt = C.sizeof_int
sizeofLong = C.sizeof_long
sizeofLongLong = C.sizeof_longlong
)
// Basic types
type (
_C_short C.short
_C_int C.int
_C_long C.long
_C_long_long C.longlong
)
// Time
type Timespec C.struct_timespec
type Timeval C.struct_timeval
// Processes
type Rusage C.struct_rusage
type Rlimit C.struct_rlimit
type _Gid_t C.gid_t
// Files
const ( // Directory mode bits
S_IFMT = C.S_IFMT
S_IFIFO = C.S_IFIFO
S_IFCHR = C.S_IFCHR
S_IFDIR = C.S_IFDIR
S_IFBLK = C.S_IFBLK
S_IFREG = C.S_IFREG
S_IFLNK = C.S_IFLNK
S_IFSOCK = C.S_IFSOCK
S_ISUID = C.S_ISUID
S_ISGID = C.S_ISGID
S_ISVTX = C.S_ISVTX
S_IRUSR = C.S_IRUSR
S_IWUSR = C.S_IWUSR
S_IXUSR = C.S_IXUSR
)
type Stat_t C.struct_stat
type Statfs_t C.struct_statfs
type Flock_t C.struct_flock
type Dirent C.struct_dirent
type Fsid C.fsid_t
// Sockets
type RawSockaddrInet4 C.struct_sockaddr_in
type RawSockaddrInet6 C.struct_sockaddr_in6
type RawSockaddrUnix C.struct_sockaddr_un
type RawSockaddrDatalink C.struct_sockaddr_dl
type RawSockaddr C.struct_sockaddr
type RawSockaddrAny C.struct_sockaddr_any
type _Socklen C.socklen_t
type Linger C.struct_linger
type Iovec C.struct_iovec
type IPMreq C.struct_ip_mreq
type IPv6Mreq C.struct_ipv6_mreq
type Msghdr C.struct_msghdr
type Cmsghdr C.struct_cmsghdr
type Inet6Pktinfo C.struct_in6_pktinfo
type IPv6MTUInfo C.struct_ip6_mtuinfo
type ICMPv6Filter C.struct_icmp6_filter
const (
SizeofSockaddrInet4 = C.sizeof_struct_sockaddr_in
SizeofSockaddrInet6 = C.sizeof_struct_sockaddr_in6
SizeofSockaddrAny = C.sizeof_struct_sockaddr_any
SizeofSockaddrUnix = C.sizeof_struct_sockaddr_un
SizeofSockaddrDatalink = C.sizeof_struct_sockaddr_dl
SizeofLinger = C.sizeof_struct_linger
SizeofIPMreq = C.sizeof_struct_ip_mreq
SizeofIPv6Mreq = C.sizeof_struct_ipv6_mreq
SizeofMsghdr = C.sizeof_struct_msghdr
SizeofCmsghdr = C.sizeof_struct_cmsghdr
SizeofInet6Pktinfo = C.sizeof_struct_in6_pktinfo
SizeofIPv6MTUInfo = C.sizeof_struct_ip6_mtuinfo
SizeofICMPv6Filter = C.sizeof_struct_icmp6_filter
)
// Ptrace requests
const (
PTRACE_TRACEME = C.PT_TRACE_ME
PTRACE_CONT = C.PT_CONTINUE
PTRACE_KILL = C.PT_KILL
)
// Events (kqueue, kevent)
type Kevent_t C.struct_kevent
// Select
type FdSet C.fd_set
// Routing and interface messages
const (
SizeofIfMsghdr = C.sizeof_struct_if_msghdr
SizeofIfData = C.sizeof_struct_if_data
SizeofIfaMsghdr = C.sizeof_struct_ifa_msghdr
SizeofIfAnnounceMsghdr = C.sizeof_struct_if_announcemsghdr
SizeofRtMsghdr = C.sizeof_struct_rt_msghdr
SizeofRtMetrics = C.sizeof_struct_rt_metrics
)
type IfMsghdr C.struct_if_msghdr
type IfData C.struct_if_data
type IfaMsghdr C.struct_ifa_msghdr
type IfAnnounceMsghdr C.struct_if_announcemsghdr
type RtMsghdr C.struct_rt_msghdr
type RtMetrics C.struct_rt_metrics
type Mclpool C.struct_mclpool
// Berkeley packet filter
const (
SizeofBpfVersion = C.sizeof_struct_bpf_version
SizeofBpfStat = C.sizeof_struct_bpf_stat
SizeofBpfProgram = C.sizeof_struct_bpf_program
SizeofBpfInsn = C.sizeof_struct_bpf_insn
SizeofBpfHdr = C.sizeof_struct_bpf_hdr
)
type BpfVersion C.struct_bpf_version
type BpfStat C.struct_bpf_stat
type BpfProgram C.struct_bpf_program
type BpfInsn C.struct_bpf_insn
type BpfHdr C.struct_bpf_hdr
type BpfTimeval C.struct_bpf_timeval
// Terminal handling
type Termios C.struct_termios

@ -1,260 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
/*
Input to cgo -godefs. See also mkerrors.sh and mkall.sh
*/
// +godefs map struct_in_addr [4]byte /* in_addr */
// +godefs map struct_in6_addr [16]byte /* in6_addr */
package unix
/*
#define KERNEL
// These defines ensure that builds done on newer versions of Solaris are
// backwards-compatible with older versions of Solaris and
// OpenSolaris-based derivatives.
#define __USE_SUNOS_SOCKETS__ // msghdr
#define __USE_LEGACY_PROTOTYPES__ // iovec
#include <dirent.h>
#include <fcntl.h>
#include <limits.h>
#include <signal.h>
#include <termios.h>
#include <termio.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/param.h>
#include <sys/resource.h>
#include <sys/select.h>
#include <sys/signal.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/times.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <sys/un.h>
#include <sys/wait.h>
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/icmp6.h>
#include <netinet/tcp.h>
#include <ustat.h>
#include <utime.h>
enum {
sizeofPtr = sizeof(void*),
};
union sockaddr_all {
struct sockaddr s1; // this one gets used for fields
struct sockaddr_in s2; // these pad it out
struct sockaddr_in6 s3;
struct sockaddr_un s4;
struct sockaddr_dl s5;
};
struct sockaddr_any {
struct sockaddr addr;
char pad[sizeof(union sockaddr_all) - sizeof(struct sockaddr)];
};
*/
import "C"
// Machine characteristics; for internal use.
const (
sizeofPtr = C.sizeofPtr
sizeofShort = C.sizeof_short
sizeofInt = C.sizeof_int
sizeofLong = C.sizeof_long
sizeofLongLong = C.sizeof_longlong
PathMax = C.PATH_MAX
)
// Basic types
type (
_C_short C.short
_C_int C.int
_C_long C.long
_C_long_long C.longlong
)
// Time
type Timespec C.struct_timespec
type Timeval C.struct_timeval
type Timeval32 C.struct_timeval32
type Tms C.struct_tms
type Utimbuf C.struct_utimbuf
// Processes
type Rusage C.struct_rusage
type Rlimit C.struct_rlimit
type _Gid_t C.gid_t
// Files
const ( // Directory mode bits
S_IFMT = C.S_IFMT
S_IFIFO = C.S_IFIFO
S_IFCHR = C.S_IFCHR
S_IFDIR = C.S_IFDIR
S_IFBLK = C.S_IFBLK
S_IFREG = C.S_IFREG
S_IFLNK = C.S_IFLNK
S_IFSOCK = C.S_IFSOCK
S_ISUID = C.S_ISUID
S_ISGID = C.S_ISGID
S_ISVTX = C.S_ISVTX
S_IRUSR = C.S_IRUSR
S_IWUSR = C.S_IWUSR
S_IXUSR = C.S_IXUSR
)
type Stat_t C.struct_stat
type Flock_t C.struct_flock
type Dirent C.struct_dirent
// Sockets
type RawSockaddrInet4 C.struct_sockaddr_in
type RawSockaddrInet6 C.struct_sockaddr_in6
type RawSockaddrUnix C.struct_sockaddr_un
type RawSockaddrDatalink C.struct_sockaddr_dl
type RawSockaddr C.struct_sockaddr
type RawSockaddrAny C.struct_sockaddr_any
type _Socklen C.socklen_t
type Linger C.struct_linger
type Iovec C.struct_iovec
type IPMreq C.struct_ip_mreq
type IPv6Mreq C.struct_ipv6_mreq
type Msghdr C.struct_msghdr
type Cmsghdr C.struct_cmsghdr
type Inet6Pktinfo C.struct_in6_pktinfo
type IPv6MTUInfo C.struct_ip6_mtuinfo
type ICMPv6Filter C.struct_icmp6_filter
const (
SizeofSockaddrInet4 = C.sizeof_struct_sockaddr_in
SizeofSockaddrInet6 = C.sizeof_struct_sockaddr_in6
SizeofSockaddrAny = C.sizeof_struct_sockaddr_any
SizeofSockaddrUnix = C.sizeof_struct_sockaddr_un
SizeofSockaddrDatalink = C.sizeof_struct_sockaddr_dl
SizeofLinger = C.sizeof_struct_linger
SizeofIPMreq = C.sizeof_struct_ip_mreq
SizeofIPv6Mreq = C.sizeof_struct_ipv6_mreq
SizeofMsghdr = C.sizeof_struct_msghdr
SizeofCmsghdr = C.sizeof_struct_cmsghdr
SizeofInet6Pktinfo = C.sizeof_struct_in6_pktinfo
SizeofIPv6MTUInfo = C.sizeof_struct_ip6_mtuinfo
SizeofICMPv6Filter = C.sizeof_struct_icmp6_filter
)
// Select
type FdSet C.fd_set
// Misc
type Utsname C.struct_utsname
type Ustat_t C.struct_ustat
const (
AT_FDCWD = C.AT_FDCWD
AT_SYMLINK_NOFOLLOW = C.AT_SYMLINK_NOFOLLOW
AT_SYMLINK_FOLLOW = C.AT_SYMLINK_FOLLOW
AT_REMOVEDIR = C.AT_REMOVEDIR
AT_EACCESS = C.AT_EACCESS
)
// Routing and interface messages
const (
SizeofIfMsghdr = C.sizeof_struct_if_msghdr
SizeofIfData = C.sizeof_struct_if_data
SizeofIfaMsghdr = C.sizeof_struct_ifa_msghdr
SizeofRtMsghdr = C.sizeof_struct_rt_msghdr
SizeofRtMetrics = C.sizeof_struct_rt_metrics
)
type IfMsghdr C.struct_if_msghdr
type IfData C.struct_if_data
type IfaMsghdr C.struct_ifa_msghdr
type RtMsghdr C.struct_rt_msghdr
type RtMetrics C.struct_rt_metrics
// Berkeley packet filter
const (
SizeofBpfVersion = C.sizeof_struct_bpf_version
SizeofBpfStat = C.sizeof_struct_bpf_stat
SizeofBpfProgram = C.sizeof_struct_bpf_program
SizeofBpfInsn = C.sizeof_struct_bpf_insn
SizeofBpfHdr = C.sizeof_struct_bpf_hdr
)
type BpfVersion C.struct_bpf_version
type BpfStat C.struct_bpf_stat
type BpfProgram C.struct_bpf_program
type BpfInsn C.struct_bpf_insn
type BpfTimeval C.struct_bpf_timeval
type BpfHdr C.struct_bpf_hdr
// sysconf information
const _SC_PAGESIZE = C._SC_PAGESIZE
// Terminal handling
type Termios C.struct_termios
type Termio C.struct_termio
type Winsize C.struct_winsize

3
vendor/golang.org/x/text/AUTHORS generated vendored

@ -0,0 +1,3 @@
# This source code refers to The Go Authors for copyright purposes.
# The master list of authors is in the main Go distribution,
# visible at http://tip.golang.org/AUTHORS.

@ -0,0 +1,3 @@
# This source code was written by the Go contributors.
# The master list of contributors is in the main Go distribution,
# visible at http://tip.golang.org/CONTRIBUTORS.

22
vendor/golang.org/x/text/PATENTS generated vendored

@ -0,0 +1,22 @@
Additional IP Rights Grant (Patents)
"This implementation" means the copyrightable works distributed by
Google as part of the Go project.
Google hereby grants to You a perpetual, worldwide, non-exclusive,
no-charge, royalty-free, irrevocable (except as stated in this section)
patent license to make, have made, use, offer to sell, sell, import,
transfer and otherwise run, modify and propagate the contents of this
implementation of Go, where such license applies only to those patent
claims, both currently owned or controlled by Google and acquired in
the future, licensable by Google that are necessarily infringed by this
implementation of Go. This grant does not include claims that would be
infringed only as a consequence of further modification of this
implementation. If you or your agent or exclusive licensee institute or
order or agree to the institution of patent litigation against any
entity (including a cross-claim or counterclaim in a lawsuit) alleging
that this implementation of Go or any code incorporated within this
implementation of Go constitutes direct or contributory patent
infringement, or inducement of patent infringement, then any patent
rights granted to you under this License for this implementation of Go
shall terminate as of the date such litigation is filed.

@ -1,129 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go gen_trieval.go
// Package cases provides general and language-specific case mappers.
package cases // import "golang.org/x/text/cases"
import (
"golang.org/x/text/language"
"golang.org/x/text/transform"
)
// References:
// - Unicode Reference Manual Chapter 3.13, 4.2, and 5.18.
// - http://www.unicode.org/reports/tr29/
// - http://www.unicode.org/Public/6.3.0/ucd/CaseFolding.txt
// - http://www.unicode.org/Public/6.3.0/ucd/SpecialCasing.txt
// - http://www.unicode.org/Public/6.3.0/ucd/DerivedCoreProperties.txt
// - http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt
// - http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt
// - http://userguide.icu-project.org/transforms/casemappings
// TODO:
// - Case folding
// - Wide and Narrow?
// - Segmenter option for title casing.
// - ASCII fast paths
// - Encode Soft-Dotted property within trie somehow.
// A Caser transforms given input to a certain case. It implements
// transform.Transformer.
//
// A Caser may be stateful and should therefore not be shared between
// goroutines.
type Caser struct {
t transform.Transformer
}
// Bytes returns a new byte slice with the result of converting b to the case
// form implemented by c.
func (c Caser) Bytes(b []byte) []byte {
b, _, _ = transform.Bytes(c.t, b)
return b
}
// String returns a string with the result of transforming s to the case form
// implemented by c.
func (c Caser) String(s string) string {
s, _, _ = transform.String(c.t, s)
return s
}
// Reset resets the Caser to be reused for new input after a previous call to
// Transform.
func (c Caser) Reset() { c.t.Reset() }
// Transform implements the Transformer interface and transforms the given input
// to the case form implemented by c.
func (c Caser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
return c.t.Transform(dst, src, atEOF)
}
// Upper returns a Caser for language-specific uppercasing.
func Upper(t language.Tag, opts ...Option) Caser {
return Caser{makeUpper(t, getOpts(opts...))}
}
// Lower returns a Caser for language-specific lowercasing.
func Lower(t language.Tag, opts ...Option) Caser {
return Caser{makeLower(t, getOpts(opts...))}
}
// Title returns a Caser for language-specific title casing. It uses an
// approximation of the default Unicode Word Break algorithm.
func Title(t language.Tag, opts ...Option) Caser {
return Caser{makeTitle(t, getOpts(opts...))}
}
// Fold returns a Caser that implements Unicode case folding. The returned Caser
// is stateless and safe to use concurrently by multiple goroutines.
//
// Case folding does not normalize the input and may not preserve a normal form.
// Use the collate or search package for more convenient and linguistically
// sound comparisons. Use unicode/precis for string comparisons where security
// aspects are a concern.
func Fold(opts ...Option) Caser {
return Caser{makeFold(getOpts(opts...))}
}
// An Option is used to modify the behavior of a Caser.
type Option func(o *options)
var (
// NoLower disables the lowercasing of non-leading letters for a title
// caser.
NoLower Option = noLower
// Compact omits mappings in case folding for characters that would grow the
// input. (Unimplemented.)
Compact Option = compact
)
// TODO: option to preserve a normal form, if applicable?
type options struct {
noLower bool
simple bool
// TODO: segmenter, max ignorable, alternative versions, etc.
noFinalSigma bool // Only used for testing.
}
func getOpts(o ...Option) (res options) {
for _, f := range o {
f(&res)
}
return
}
func noLower(o *options) {
o.noLower = true
}
func compact(o *options) {
o.simple = true
}

@ -1,281 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cases
import (
"golang.org/x/text/transform"
)
// A context is used for iterating over source bytes, fetching case info and
// writing to a destination buffer.
//
// Casing operations may need more than one rune of context to decide how a rune
// should be cased. Casing implementations should call checkpoint on context
// whenever it is known to be safe to return the runes processed so far.
//
// It is recommended for implementations to not allow for more than 30 case
// ignorables as lookahead (analogous to the limit in norm) and to use state if
// unbounded lookahead is needed for cased runes.
type context struct {
dst, src []byte
atEOF bool
pDst int // pDst points past the last written rune in dst.
pSrc int // pSrc points to the start of the currently scanned rune.
// checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc.
nDst, nSrc int
err error
sz int // size of current rune
info info // case information of currently scanned rune
// State preserved across calls to Transform.
isMidWord bool // false if next cased letter needs to be title-cased.
}
func (c *context) Reset() {
c.isMidWord = false
}
// ret returns the return values for the Transform method. It checks whether
// there were insufficient bytes in src to complete and introduces an error
// accordingly, if necessary.
func (c *context) ret() (nDst, nSrc int, err error) {
if c.err != nil || c.nSrc == len(c.src) {
return c.nDst, c.nSrc, c.err
}
// This point is only reached by mappers if there was no short destination
// buffer. This means that the source buffer was exhausted and that c.sz was
// set to 0 by next.
if c.atEOF && c.pSrc == len(c.src) {
return c.pDst, c.pSrc, nil
}
return c.nDst, c.nSrc, transform.ErrShortSrc
}
// checkpoint sets the return value buffer points for Transform to the current
// positions.
func (c *context) checkpoint() {
if c.err == nil {
c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz
}
}
// unreadRune causes the last rune read by next to be reread on the next
// invocation of next. Only one unreadRune may be called after a call to next.
func (c *context) unreadRune() {
c.sz = 0
}
func (c *context) next() bool {
c.pSrc += c.sz
if c.pSrc == len(c.src) || c.err != nil {
c.info, c.sz = 0, 0
return false
}
v, sz := trie.lookup(c.src[c.pSrc:])
c.info, c.sz = info(v), sz
if c.sz == 0 {
if c.atEOF {
// A zero size means we have an incomplete rune. If we are atEOF,
// this means it is an illegal rune, which we will consume one
// byte at a time.
c.sz = 1
} else {
c.err = transform.ErrShortSrc
return false
}
}
return true
}
// writeBytes adds bytes to dst.
func (c *context) writeBytes(b []byte) bool {
if len(c.dst)-c.pDst < len(b) {
c.err = transform.ErrShortDst
return false
}
// This loop is faster than using copy.
for _, ch := range b {
c.dst[c.pDst] = ch
c.pDst++
}
return true
}
// writeString writes the given string to dst.
func (c *context) writeString(s string) bool {
if len(c.dst)-c.pDst < len(s) {
c.err = transform.ErrShortDst
return false
}
// This loop is faster than using copy.
for i := 0; i < len(s); i++ {
c.dst[c.pDst] = s[i]
c.pDst++
}
return true
}
// copy writes the current rune to dst.
func (c *context) copy() bool {
return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz])
}
// copyXOR copies the current rune to dst and modifies it by applying the XOR
// pattern of the case info. It is the responsibility of the caller to ensure
// that this is a rune with a XOR pattern defined.
func (c *context) copyXOR() bool {
if !c.copy() {
return false
}
if c.info&xorIndexBit == 0 {
// Fast path for 6-bit XOR pattern, which covers most cases.
c.dst[c.pDst-1] ^= byte(c.info >> xorShift)
} else {
// Interpret XOR bits as an index.
// TODO: test performance for unrolling this loop. Verify that we have
// at least two bytes and at most three.
idx := c.info >> xorShift
for p := c.pDst - 1; ; p-- {
c.dst[p] ^= xorData[idx]
idx--
if xorData[idx] == 0 {
break
}
}
}
return true
}
// hasPrefix returns true if src[pSrc:] starts with the given string.
func (c *context) hasPrefix(s string) bool {
b := c.src[c.pSrc:]
if len(b) < len(s) {
return false
}
for i, c := range b[:len(s)] {
if c != s[i] {
return false
}
}
return true
}
// caseType returns an info with only the case bits, normalized to either
// cLower, cUpper, cTitle or cUncased.
func (c *context) caseType() info {
cm := c.info & 0x7
if cm < 4 {
return cm
}
if cm >= cXORCase {
// xor the last bit of the rune with the case type bits.
b := c.src[c.pSrc+c.sz-1]
return info(b&1) ^ cm&0x3
}
if cm == cIgnorableCased {
return cLower
}
return cUncased
}
// lower writes the lowercase version of the current rune to dst.
func lower(c *context) bool {
ct := c.caseType()
if c.info&hasMappingMask == 0 || ct == cLower {
return c.copy()
}
if c.info&exceptionBit == 0 {
return c.copyXOR()
}
e := exceptions[c.info>>exceptionShift:]
offset := 2 + e[0]&lengthMask // size of header + fold string
if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
return c.writeString(e[offset : offset+nLower])
}
return c.copy()
}
// upper writes the uppercase version of the current rune to dst.
func upper(c *context) bool {
ct := c.caseType()
if c.info&hasMappingMask == 0 || ct == cUpper {
return c.copy()
}
if c.info&exceptionBit == 0 {
return c.copyXOR()
}
e := exceptions[c.info>>exceptionShift:]
offset := 2 + e[0]&lengthMask // size of header + fold string
// Get length of first special case mapping.
n := (e[1] >> lengthBits) & lengthMask
if ct == cTitle {
// The first special case mapping is for lower. Set n to the second.
if n == noChange {
n = 0
}
n, e = e[1]&lengthMask, e[n:]
}
if n != noChange {
return c.writeString(e[offset : offset+n])
}
return c.copy()
}
// title writes the title case version of the current rune to dst.
func title(c *context) bool {
ct := c.caseType()
if c.info&hasMappingMask == 0 || ct == cTitle {
return c.copy()
}
if c.info&exceptionBit == 0 {
if ct == cLower {
return c.copyXOR()
}
return c.copy()
}
// Get the exception data.
e := exceptions[c.info>>exceptionShift:]
offset := 2 + e[0]&lengthMask // size of header + fold string
nFirst := (e[1] >> lengthBits) & lengthMask
if nTitle := e[1] & lengthMask; nTitle != noChange {
if nFirst != noChange {
e = e[nFirst:]
}
return c.writeString(e[offset : offset+nTitle])
}
if ct == cLower && nFirst != noChange {
// Use the uppercase version instead.
return c.writeString(e[offset : offset+nFirst])
}
// Already in correct case.
return c.copy()
}
// foldFull writes the foldFull version of the current rune to dst.
func foldFull(c *context) bool {
if c.info&hasMappingMask == 0 {
return c.copy()
}
ct := c.caseType()
if c.info&exceptionBit == 0 {
if ct != cLower || c.info&inverseFoldBit != 0 {
return c.copyXOR()
}
return c.copy()
}
e := exceptions[c.info>>exceptionShift:]
n := e[0] & lengthMask
if n == 0 {
if ct == cLower {
return c.copy()
}
n = (e[1] >> lengthBits) & lengthMask
}
return c.writeString(e[2 : 2+n])
}

@ -1,26 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cases
import "golang.org/x/text/transform"
type caseFolder struct{ transform.NopResetter }
// caseFolder implements the Transformer interface for doing case folding.
func (t *caseFolder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
c := context{dst: dst, src: src, atEOF: atEOF}
for c.next() {
foldFull(&c)
c.checkpoint()
}
return c.ret()
}
func makeFold(o options) transform.Transformer {
// TODO: Special case folding, through option Language, Special/Turkic, or
// both.
// TODO: Implement Compact options.
return &caseFolder{}
}

@ -1,831 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// This program generates the trie for casing operations. The Unicode casing
// algorithm requires the lookup of various properties and mappings for each
// rune. The table generated by this generator combines several of the most
// frequently used of these into a single trie so that they can be accessed
// with a single lookup.
package main
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"log"
"reflect"
"strconv"
"strings"
"unicode"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/triegen"
"golang.org/x/text/internal/ucd"
"golang.org/x/text/unicode/norm"
)
func main() {
gen.Init()
genTables()
genTablesTest()
gen.Repackage("gen_trieval.go", "trieval.go", "cases")
}
// runeInfo contains all information for a rune that we care about for casing
// operations.
type runeInfo struct {
Rune rune
entry info // trie value for this rune.
CaseMode info
// Simple case mappings.
Simple [1 + maxCaseMode][]rune
// Special casing
HasSpecial bool
Conditional bool
Special [1 + maxCaseMode][]rune
// Folding
FoldSimple rune
FoldSpecial rune
FoldFull []rune
// TODO: FC_NFKC, or equivalent data.
// Properties
SoftDotted bool
CaseIgnorable bool
Cased bool
DecomposeGreek bool
BreakType string
BreakCat breakCategory
// We care mostly about 0, Above, and IotaSubscript.
CCC byte
}
type breakCategory int
const (
breakBreak breakCategory = iota
breakLetter
breakIgnored
)
// mapping returns the case mapping for the given case type.
func (r *runeInfo) mapping(c info) string {
if r.HasSpecial {
return string(r.Special[c])
}
if len(r.Simple[c]) != 0 {
return string(r.Simple[c])
}
return string(r.Rune)
}
func parse(file string, f func(p *ucd.Parser)) {
ucd.Parse(gen.OpenUCDFile(file), f)
}
func parseUCD() []runeInfo {
chars := make([]runeInfo, unicode.MaxRune)
get := func(r rune) *runeInfo {
c := &chars[r]
c.Rune = r
return c
}
parse("UnicodeData.txt", func(p *ucd.Parser) {
ri := get(p.Rune(0))
ri.CCC = byte(p.Int(ucd.CanonicalCombiningClass))
ri.Simple[cLower] = p.Runes(ucd.SimpleLowercaseMapping)
ri.Simple[cUpper] = p.Runes(ucd.SimpleUppercaseMapping)
ri.Simple[cTitle] = p.Runes(ucd.SimpleTitlecaseMapping)
if p.String(ucd.GeneralCategory) == "Lt" {
ri.CaseMode = cTitle
}
})
// <code>; <property>
parse("PropList.txt", func(p *ucd.Parser) {
if p.String(1) == "Soft_Dotted" {
chars[p.Rune(0)].SoftDotted = true
}
})
// <code>; <word break type>
parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
ri := get(p.Rune(0))
switch p.String(1) {
case "Case_Ignorable":
ri.CaseIgnorable = true
case "Cased":
ri.Cased = true
case "Lowercase":
ri.CaseMode = cLower
case "Uppercase":
ri.CaseMode = cUpper
}
})
// <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)?
parse("SpecialCasing.txt", func(p *ucd.Parser) {
// We drop all conditional special casing and deal with them manually in
// the language-specific case mappers. Rune 0x03A3 is the only one with
// a conditional formatting that is not language-specific. However,
// dealing with this letter is tricky, especially in a streaming
// context, so we deal with it in the Caser for Greek specifically.
ri := get(p.Rune(0))
if p.String(4) == "" {
ri.HasSpecial = true
ri.Special[cLower] = p.Runes(1)
ri.Special[cTitle] = p.Runes(2)
ri.Special[cUpper] = p.Runes(3)
} else {
ri.Conditional = true
}
})
// TODO: Use text breaking according to UAX #29.
// <code>; <word break type>
parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
ri := get(p.Rune(0))
ri.BreakType = p.String(1)
// We collapse the word breaking properties onto the categories we need.
switch p.String(1) { // TODO: officially we need to canonicalize.
case "Format", "MidLetter", "MidNumLet", "Single_Quote":
ri.BreakCat = breakIgnored
case "ALetter", "Hebrew_Letter", "Numeric", "Extend", "ExtendNumLet":
ri.BreakCat = breakLetter
}
})
// <code>; <type>; <mapping>
parse("CaseFolding.txt", func(p *ucd.Parser) {
ri := get(p.Rune(0))
switch p.String(1) {
case "C":
ri.FoldSimple = p.Rune(2)
ri.FoldFull = p.Runes(2)
case "S":
ri.FoldSimple = p.Rune(2)
case "T":
ri.FoldSpecial = p.Rune(2)
case "F":
ri.FoldFull = p.Runes(2)
default:
log.Fatalf("%U: unknown type: %s", p.Rune(0), p.String(1))
}
})
return chars
}
func genTables() {
chars := parseUCD()
verifyProperties(chars)
t := triegen.NewTrie("case")
for i := range chars {
c := &chars[i]
makeEntry(c)
t.Insert(rune(i), uint64(c.entry))
}
w := gen.NewCodeWriter()
defer w.WriteGoFile("tables.go", "cases")
gen.WriteUnicodeVersion(w)
// TODO: write CLDR version after adding a mechanism to detect that the
// tables on which the manually created locale-sensitive casing code is
// based hasn't changed.
w.WriteVar("xorData", string(xorData))
w.WriteVar("exceptions", string(exceptionData))
sz, err := t.Gen(w, triegen.Compact(&sparseCompacter{}))
if err != nil {
log.Fatal(err)
}
w.Size += sz
}
func makeEntry(ri *runeInfo) {
if ri.CaseIgnorable {
if ri.Cased {
ri.entry = cIgnorableCased
} else {
ri.entry = cIgnorableUncased
}
} else {
ri.entry = ri.CaseMode
}
// TODO: handle soft-dotted.
ccc := cccOther
switch ri.CCC {
case 0: // Not_Reordered
ccc = cccZero
case above: // Above
ccc = cccAbove
}
if ri.BreakCat == breakBreak {
ccc = cccBreak
}
ri.entry |= ccc
if ri.CaseMode == cUncased {
return
}
// Need to do something special.
if ri.CaseMode == cTitle || ri.HasSpecial || ri.mapping(cTitle) != ri.mapping(cUpper) {
makeException(ri)
return
}
if f := string(ri.FoldFull); len(f) > 0 && f != ri.mapping(cUpper) && f != ri.mapping(cLower) {
makeException(ri)
return
}
// Rune is either lowercase or uppercase.
orig := string(ri.Rune)
mapped := ""
if ri.CaseMode == cUpper {
mapped = ri.mapping(cLower)
} else {
mapped = ri.mapping(cUpper)
}
if len(orig) != len(mapped) {
makeException(ri)
return
}
if string(ri.FoldFull) == ri.mapping(cUpper) {
ri.entry |= inverseFoldBit
}
n := len(orig)
// Create per-byte XOR mask.
var b []byte
for i := 0; i < n; i++ {
b = append(b, orig[i]^mapped[i])
}
// Remove leading 0 bytes, but keep at least one byte.
for ; len(b) > 1 && b[0] == 0; b = b[1:] {
}
if len(b) == 1 && b[0]&0xc0 == 0 {
ri.entry |= info(b[0]) << xorShift
return
}
key := string(b)
x, ok := xorCache[key]
if !ok {
xorData = append(xorData, 0) // for detecting start of sequence
xorData = append(xorData, b...)
x = len(xorData) - 1
xorCache[key] = x
}
ri.entry |= info(x<<xorShift) | xorIndexBit
}
var xorCache = map[string]int{}
// xorData contains byte-wise XOR data for the least significant bytes of a
// UTF-8 encoded rune. An index points to the last byte. The sequence starts
// with a zero terminator.
var xorData = []byte{}
// See the comments in gen_trieval.go re "the exceptions slice".
var exceptionData = []byte{0}
// makeException encodes case mappings that cannot be expressed in a simple
// XOR diff.
func makeException(ri *runeInfo) {
ccc := ri.entry & cccMask
// Set exception bit and retain case type.
ri.entry &= 0x0007
ri.entry |= exceptionBit
if len(exceptionData) >= 1<<numExceptionBits {
log.Fatalf("%U:exceptionData too large %x > %d bits", ri.Rune, len(exceptionData), numExceptionBits)
}
// Set the offset in the exceptionData array.
ri.entry |= info(len(exceptionData) << exceptionShift)
orig := string(ri.Rune)
tc := ri.mapping(cTitle)
uc := ri.mapping(cUpper)
lc := ri.mapping(cLower)
ff := string(ri.FoldFull)
// addString sets the length of a string and adds it to the expansions array.
addString := func(s string, b *byte) {
if len(s) == 0 {
// Zero-length mappings exist, but only for conditional casing,
// which we are representing outside of this table.
log.Fatalf("%U: has zero-length mapping.", ri.Rune)
}
*b <<= 3
if s != orig {
n := len(s)
if n > 7 {
log.Fatalf("%U: mapping larger than 7 (%d)", ri.Rune, n)
}
*b |= byte(n)
exceptionData = append(exceptionData, s...)
}
}
// byte 0:
exceptionData = append(exceptionData, byte(ccc)|byte(len(ff)))
// byte 1:
p := len(exceptionData)
exceptionData = append(exceptionData, 0)
if len(ff) > 7 { // May be zero-length.
log.Fatalf("%U: fold string larger than 7 (%d)", ri.Rune, len(ff))
}
exceptionData = append(exceptionData, ff...)
ct := ri.CaseMode
if ct != cLower {
addString(lc, &exceptionData[p])
}
if ct != cUpper {
addString(uc, &exceptionData[p])
}
if ct != cTitle {
// If title is the same as upper, we set it to the original string so
// that it will be marked as not present. This implies title case is
// the same as upper case.
if tc == uc {
tc = orig
}
addString(tc, &exceptionData[p])
}
}
// sparseCompacter is a trie value block Compacter. There are many cases where
// successive runes alternate between lower- and upper-case. This Compacter
// exploits this by adding a special case type where the case value is obtained
// from or-ing it with the least-significant bit of the rune, creating large
// ranges of equal case values that compress well.
type sparseCompacter struct {
sparseBlocks [][]uint16
sparseOffsets []uint16
sparseCount int
}
// makeSparse returns the number of elements that compact block would contain
// as well as the modified values.
func makeSparse(vals []uint64) ([]uint16, int) {
// Copy the values.
values := make([]uint16, len(vals))
for i, v := range vals {
values[i] = uint16(v)
}
alt := func(i int, v uint16) uint16 {
if cm := info(v & fullCasedMask); cm == cUpper || cm == cLower {
// Convert cLower or cUpper to cXORCase value, which has the form 11x.
xor := v
xor &^= 1
xor |= uint16(i&1) ^ (v & 1)
xor |= 0x4
return xor
}
return v
}
var count int
var previous uint16
for i, v := range values {
if v != 0 {
// Try if the unmodified value is equal to the previous.
if v == previous {
continue
}
// Try if the xor-ed value is equal to the previous value.
a := alt(i, v)
if a == previous {
values[i] = a
continue
}
// This is a new value.
count++
// Use the xor-ed value if it will be identical to the next value.
if p := i + 1; p < len(values) && alt(p, values[p]) == a {
values[i] = a
v = a
}
}
previous = v
}
return values, count
}
func (s *sparseCompacter) Size(v []uint64) (int, bool) {
_, n := makeSparse(v)
// We limit using this method to having 16 entries.
if n > 16 {
return 0, false
}
return 2 + int(reflect.TypeOf(valueRange{}).Size())*n, true
}
func (s *sparseCompacter) Store(v []uint64) uint32 {
h := uint32(len(s.sparseOffsets))
values, sz := makeSparse(v)
s.sparseBlocks = append(s.sparseBlocks, values)
s.sparseOffsets = append(s.sparseOffsets, uint16(s.sparseCount))
s.sparseCount += sz
return h
}
func (s *sparseCompacter) Handler() string {
// The sparse global variable and its lookup method is defined in gen_trieval.go.
return "sparse.lookup"
}
func (s *sparseCompacter) Print(w io.Writer) (retErr error) {
p := func(format string, args ...interface{}) {
_, err := fmt.Fprintf(w, format, args...)
if retErr == nil && err != nil {
retErr = err
}
}
ls := len(s.sparseBlocks)
if ls == len(s.sparseOffsets) {
s.sparseOffsets = append(s.sparseOffsets, uint16(s.sparseCount))
}
p("// sparseOffsets: %d entries, %d bytes\n", ls+1, (ls+1)*2)
p("var sparseOffsets = %#v\n\n", s.sparseOffsets)
ns := s.sparseCount
p("// sparseValues: %d entries, %d bytes\n", ns, ns*4)
p("var sparseValues = [%d]valueRange {", ns)
for i, values := range s.sparseBlocks {
p("\n// Block %#x, offset %#x", i, s.sparseOffsets[i])
var v uint16
for i, nv := range values {
if nv != v {
if v != 0 {
p(",hi:%#02x},", 0x80+i-1)
}
if nv != 0 {
p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
}
}
v = nv
}
if v != 0 {
p(",hi:%#02x},", 0x80+len(values)-1)
}
}
p("\n}\n\n")
return
}
// verifyProperties that properties of the runes that are relied upon in the
// implementation. Each property is marked with an identifier that is referred
// to in the places where it is used.
func verifyProperties(chars []runeInfo) {
for i, c := range chars {
r := rune(i)
// Rune properties.
// A.1: modifier never changes on lowercase. [ltLower]
if c.CCC > 0 && unicode.ToLower(r) != r {
log.Fatalf("%U: non-starter changes when lowercased", r)
}
// A.2: properties of decompositions starting with I or J. [ltLower]
d := norm.NFD.PropertiesString(string(r)).Decomposition()
if len(d) > 0 {
if d[0] == 'I' || d[0] == 'J' {
// A.2.1: we expect at least an ASCII character and a modifier.
if len(d) < 3 {
log.Fatalf("%U: length of decomposition was %d; want >= 3", r, len(d))
}
// All subsequent runes are modifiers and all have the same CCC.
runes := []rune(string(d[1:]))
ccc := chars[runes[0]].CCC
for _, mr := range runes[1:] {
mc := chars[mr]
// A.2.2: all modifiers have a CCC of Above or less.
if ccc == 0 || ccc > above {
log.Fatalf("%U: CCC of successive rune (%U) was %d; want (0,230]", r, mr, ccc)
}
// A.2.3: a sequence of modifiers all have the same CCC.
if mc.CCC != ccc {
log.Fatalf("%U: CCC of follow-up modifier (%U) was %d; want %d", r, mr, mc.CCC, ccc)
}
// A.2.4: for each trailing r, r in [0x300, 0x311] <=> CCC == Above.
if (ccc == above) != (0x300 <= mr && mr <= 0x311) {
log.Fatalf("%U: modifier %U in [U+0300, U+0311] != ccc(%U) == 230", r, mr, mr)
}
if i += len(string(mr)); i >= len(d) {
break
}
}
}
}
// A.3: no U+0307 in decomposition of Soft-Dotted rune. [ltUpper]
if unicode.Is(unicode.Soft_Dotted, r) && strings.Contains(string(d), "\u0307") {
log.Fatalf("%U: decomposition of soft-dotted rune may not contain U+0307", r)
}
// A.4: only rune U+0345 may be of CCC Iota_Subscript. [elUpper]
if c.CCC == iotaSubscript && r != 0x0345 {
log.Fatalf("%U: only rune U+0345 may have CCC Iota_Subscript", r)
}
// A.5: soft-dotted runes do not have exceptions.
if c.SoftDotted && c.entry&exceptionBit != 0 {
log.Fatalf("%U: soft-dotted has exception", r)
}
// A.6: Greek decomposition. [elUpper]
if unicode.Is(unicode.Greek, r) {
if b := norm.NFD.PropertiesString(string(r)).Decomposition(); b != nil {
runes := []rune(string(b))
// A.6.1: If a Greek rune decomposes and the first rune of the
// decomposition is greater than U+00FF, the rune is always
// great and not a modifier.
if f := runes[0]; unicode.IsMark(f) || f > 0xFF && !unicode.Is(unicode.Greek, f) {
log.Fatalf("%U: expeced first rune of Greek decomposition to be letter, found %U", r, f)
}
// A.6.2: Any follow-up rune in a Greek decomposition is a
// modifier of which the first should be gobbled in
// decomposition.
for _, m := range runes[1:] {
switch m {
case 0x0313, 0x0314, 0x0301, 0x0300, 0x0306, 0x0342, 0x0308, 0x0304, 0x345:
default:
log.Fatalf("%U: modifier %U is outside of expeced Greek modifier set", r, m)
}
}
}
}
// Breaking properties.
// B.1: all runes with CCC > 0 are of break type Extend.
if c.CCC > 0 && c.BreakType != "Extend" {
log.Fatalf("%U: CCC == %d, but got break type %s; want Extend", r, c.CCC, c.BreakType)
}
// B.2: all cased runes with c.CCC == 0 are of break type ALetter.
if c.CCC == 0 && c.Cased && c.BreakType != "ALetter" {
log.Fatalf("%U: cased, but got break type %s; want ALetter", r, c.BreakType)
}
// B.3: letter category.
if c.CCC == 0 && c.BreakCat != breakBreak && !c.CaseIgnorable {
if c.BreakCat != breakLetter {
log.Fatalf("%U: check for letter break type gave %d; want %d", r, c.BreakCat, breakLetter)
}
}
}
}
func genTablesTest() {
w := &bytes.Buffer{}
fmt.Fprintln(w, "var (")
printProperties(w, "DerivedCoreProperties.txt", "Case_Ignorable", verifyIgnore)
// We discard the output as we know we have perfect functions. We run them
// just to verify the properties are correct.
n := printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Cased", verifyCased)
n += printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Lowercase", verifyLower)
n += printProperties(ioutil.Discard, "DerivedCoreProperties.txt", "Uppercase", verifyUpper)
if n > 0 {
log.Fatalf("One of the discarded properties does not have a perfect filter.")
}
// <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)?
fmt.Fprintln(w, "\tspecial = map[rune]struct{ toLower, toTitle, toUpper string }{")
parse("SpecialCasing.txt", func(p *ucd.Parser) {
// Skip conditional entries.
if p.String(4) != "" {
return
}
r := p.Rune(0)
fmt.Fprintf(w, "\t\t0x%04x: {%q, %q, %q},\n",
r, string(p.Runes(1)), string(p.Runes(2)), string(p.Runes(3)))
})
fmt.Fprint(w, "\t}\n\n")
// <code>; <type>; <runes>
table := map[rune]struct{ simple, full, special string }{}
parse("CaseFolding.txt", func(p *ucd.Parser) {
r := p.Rune(0)
t := p.String(1)
v := string(p.Runes(2))
if t != "T" && v == string(unicode.ToLower(r)) {
return
}
x := table[r]
switch t {
case "C":
x.full = v
x.simple = v
case "S":
x.simple = v
case "F":
x.full = v
case "T":
x.special = v
}
table[r] = x
})
fmt.Fprintln(w, "\tfoldMap = map[rune]struct{ simple, full, special string }{")
for r := rune(0); r < 0x10FFFF; r++ {
x, ok := table[r]
if !ok {
continue
}
fmt.Fprintf(w, "\t\t0x%04x: {%q, %q, %q},\n", r, x.simple, x.full, x.special)
}
fmt.Fprint(w, "\t}\n\n")
// Break property
notBreak := map[rune]bool{}
parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
switch p.String(1) {
case "Extend", "Format", "MidLetter", "MidNumLet", "Single_Quote",
"ALetter", "Hebrew_Letter", "Numeric", "ExtendNumLet":
notBreak[p.Rune(0)] = true
}
})
fmt.Fprintln(w, "\tbreakProp = []struct{ lo, hi rune }{")
inBreak := false
for r := rune(0); r <= lastRuneForTesting; r++ {
if isBreak := !notBreak[r]; isBreak != inBreak {
if isBreak {
fmt.Fprintf(w, "\t\t{0x%x, ", r)
} else {
fmt.Fprintf(w, "0x%x},\n", r-1)
}
inBreak = isBreak
}
}
if inBreak {
fmt.Fprintf(w, "0x%x},\n", lastRuneForTesting)
}
fmt.Fprint(w, "\t}\n\n")
// Word break test
// Filter out all samples that do not contain cased characters.
cased := map[rune]bool{}
parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
if p.String(1) == "Cased" {
cased[p.Rune(0)] = true
}
})
fmt.Fprintln(w, "\tbreakTest = []string{")
parse("auxiliary/WordBreakTest.txt", func(p *ucd.Parser) {
c := strings.Split(p.String(0), " ")
const sep = '|'
numCased := 0
test := ""
for ; len(c) >= 2; c = c[2:] {
if c[0] == "÷" && test != "" {
test += string(sep)
}
i, err := strconv.ParseUint(c[1], 16, 32)
r := rune(i)
if err != nil {
log.Fatalf("Invalid rune %q.", c[1])
}
if r == sep {
log.Fatalf("Separator %q not allowed in test data. Pick another one.", sep)
}
if cased[r] {
numCased++
}
test += string(r)
}
if numCased > 1 {
fmt.Fprintf(w, "\t\t%q,\n", test)
}
})
fmt.Fprintln(w, "\t}")
fmt.Fprintln(w, ")")
gen.WriteGoFile("tables_test.go", "cases", w.Bytes())
}
// These functions are just used for verification that their definition have not
// changed in the Unicode Standard.
func verifyCased(r rune) bool {
return verifyLower(r) || verifyUpper(r) || unicode.IsTitle(r)
}
func verifyLower(r rune) bool {
return unicode.IsLower(r) || unicode.Is(unicode.Other_Lowercase, r)
}
func verifyUpper(r rune) bool {
return unicode.IsUpper(r) || unicode.Is(unicode.Other_Uppercase, r)
}
// verifyIgnore is an approximation of the Case_Ignorable property using the
// core unicode package. It is used to reduce the size of the test data.
func verifyIgnore(r rune) bool {
props := []*unicode.RangeTable{
unicode.Mn,
unicode.Me,
unicode.Cf,
unicode.Lm,
unicode.Sk,
}
for _, p := range props {
if unicode.Is(p, r) {
return true
}
}
return false
}
// printProperties prints tables of rune properties from the given UCD file.
// A filter func f can be given to exclude certain values. A rune r will have
// the indicated property if it is in the generated table or if f(r).
func printProperties(w io.Writer, file, property string, f func(r rune) bool) int {
verify := map[rune]bool{}
n := 0
varNameParts := strings.Split(property, "_")
varNameParts[0] = strings.ToLower(varNameParts[0])
fmt.Fprintf(w, "\t%s = map[rune]bool{\n", strings.Join(varNameParts, ""))
parse(file, func(p *ucd.Parser) {
if p.String(1) == property {
r := p.Rune(0)
verify[r] = true
if !f(r) {
n++
fmt.Fprintf(w, "\t\t0x%.4x: true,\n", r)
}
}
})
fmt.Fprint(w, "\t}\n\n")
// Verify that f is correct, that is, it represents a subset of the property.
for r := rune(0); r <= lastRuneForTesting; r++ {
if !verify[r] && f(r) {
log.Fatalf("Incorrect filter func for property %q.", property)
}
}
return n
}
// The newCaseTrie, sparseValues and sparseOffsets definitions below are
// placeholders referred to by gen_trieval.go. The real definitions are
// generated by this program and written to tables.go.
func newCaseTrie(int) int { return 0 }
var (
sparseValues [0]valueRange
sparseOffsets [0]uint16
)

@ -1,217 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This file contains definitions for interpreting the trie value of the case
// trie generated by "go run gen*.go". It is shared by both the generator
// program and the resultant package. Sharing is achieved by the generator
// copying gen_trieval.go to trieval.go and changing what's above this comment.
// info holds case information for a single rune. It is the value returned
// by a trie lookup. Most mapping information can be stored in a single 16-bit
// value. If not, for example when a rune is mapped to multiple runes, the value
// stores some basic case data and an index into an array with additional data.
//
// The per-rune values have the following format:
//
// if (exception) {
// 15..5 unsigned exception index
// 4 unused
// } else {
// 15..8 XOR pattern or index to XOR pattern for case mapping
// Only 13..8 are used for XOR patterns.
// 7 inverseFold (fold to upper, not to lower)
// 6 index: interpret the XOR pattern as an index
// 5..4 CCC: zero (normal or break), above or other
// }
// 3 exception: interpret this value as an exception index
// (TODO: is this bit necessary? Probably implied from case mode.)
// 2..0 case mode
//
// For the non-exceptional cases, a rune must be either uncased, lowercase or
// uppercase. If the rune is cased, the XOR pattern maps either a lowercase
// rune to uppercase or an uppercase rune to lowercase (applied to the 10
// least-significant bits of the rune).
//
// See the definitions below for a more detailed description of the various
// bits.
type info uint16
const (
casedMask = 0x0003
fullCasedMask = 0x0007
ignorableMask = 0x0006
ignorableValue = 0x0004
inverseFoldBit = 1 << 7
exceptionBit = 1 << 3
exceptionShift = 5
numExceptionBits = 11
xorIndexBit = 1 << 6
xorShift = 8
// There is no mapping if all xor bits and the exception bit are zero.
hasMappingMask = 0xffc0 | exceptionBit
)
// The case mode bits encodes the case type of a rune. This includes uncased,
// title, upper and lower case and case ignorable. (For a definition of these
// terms see Chapter 3 of The Unicode Standard Core Specification.) In some rare
// cases, a rune can be both cased and case-ignorable. This is encoded by
// cIgnorableCased. A rune of this type is always lower case. Some runes are
// cased while not having a mapping.
//
// A common pattern for scripts in the Unicode standard is for upper and lower
// case runes to alternate for increasing rune values (e.g. the accented Latin
// ranges starting from U+0100 and U+1E00 among others and some Cyrillic
// characters). We use this property by defining a cXORCase mode, where the case
// mode (always upper or lower case) is derived from the rune value. As the XOR
// pattern for case mappings is often identical for successive runes, using
// cXORCase can result in large series of identical trie values. This, in turn,
// allows us to better compress the trie blocks.
const (
cUncased info = iota // 000
cTitle // 001
cLower // 010
cUpper // 011
cIgnorableUncased // 100
cIgnorableCased // 101 // lower case if mappings exist
cXORCase // 11x // case is cLower | ((rune&1) ^ x)
maxCaseMode = cUpper
)
func (c info) isCased() bool {
return c&casedMask != 0
}
func (c info) isCaseIgnorable() bool {
return c&ignorableMask == ignorableValue
}
func (c info) isCaseIgnorableAndNonBreakStarter() bool {
return c&(fullCasedMask|cccMask) == (ignorableValue | cccZero)
}
func (c info) isNotCasedAndNotCaseIgnorable() bool {
return c&fullCasedMask == 0
}
func (c info) isCaseIgnorableAndNotCased() bool {
return c&fullCasedMask == cIgnorableUncased
}
// The case mapping implementation will need to know about various Canonical
// Combining Class (CCC) values. We encode two of these in the trie value:
// cccZero (0) and cccAbove (230). If the value is cccOther, it means that
// CCC(r) > 0, but not 230. A value of cccBreak means that CCC(r) == 0 and that
// the rune also has the break category Break (see below).
const (
cccBreak info = iota << 4
cccZero
cccAbove
cccOther
cccMask = cccBreak | cccZero | cccAbove | cccOther
)
const (
starter = 0
above = 230
iotaSubscript = 240
)
// The exceptions slice holds data that does not fit in a normal info entry.
// The entry is pointed to by the exception index in an entry. It has the
// following format:
//
// Header
// byte 0:
// 7..6 unused
// 5..4 CCC type (same bits as entry)
// 3 unused
// 2..0 length of fold
//
// byte 1:
// 7..6 unused
// 5..3 length of 1st mapping of case type
// 2..0 length of 2nd mapping of case type
//
// case 1st 2nd
// lower -> upper, title
// upper -> lower, title
// title -> lower, upper
//
// Lengths with the value 0x7 indicate no value and implies no change.
// A length of 0 indicates a mapping to zero-length string.
//
// Body bytes:
// case folding bytes
// lowercase mapping bytes
// uppercase mapping bytes
// titlecase mapping bytes
// closure mapping bytes (for NFKC_Casefold). (TODO)
//
// Fallbacks:
// missing fold -> lower
// missing title -> upper
// all missing -> original rune
//
// exceptions starts with a dummy byte to enforce that there is no zero index
// value.
const (
lengthMask = 0x07
lengthBits = 3
noChange = 0
)
// References to generated trie.
var trie = newCaseTrie(0)
var sparse = sparseBlocks{
values: sparseValues[:],
offsets: sparseOffsets[:],
}
// Sparse block lookup code.
// valueRange is an entry in a sparse block.
type valueRange struct {
value uint16
lo, hi byte
}
type sparseBlocks struct {
values []valueRange
offsets []uint16
}
// lookup returns the value from values block n for byte b using binary search.
func (s *sparseBlocks) lookup(n uint32, b byte) uint16 {
lo := s.offsets[n]
hi := s.offsets[n+1]
for lo < hi {
m := lo + (hi-lo)/2
r := s.values[m]
if r.lo <= b && b <= r.hi {
return r.value
}
if b < r.lo {
hi = m
} else {
lo = m + 1
}
}
return 0
}
// lastRuneForTesting is the last rune used for testing. Everything after this
// is boring.
const lastRuneForTesting = rune(0x1FFFF)

@ -1,83 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cases
func (c info) cccVal() info {
if c&exceptionBit != 0 {
return info(exceptions[c>>exceptionShift]) & cccMask
}
return c & cccMask
}
func (c info) cccType() info {
ccc := c.cccVal()
if ccc <= cccZero {
return cccZero
}
return ccc
}
// TODO: Implement full Unicode breaking algorithm:
// 1) Implement breaking in separate package.
// 2) Use the breaker here.
// 3) Compare table size and performance of using the more generic breaker.
//
// Note that we can extend the current algorithm to be much more accurate. This
// only makes sense, though, if the performance and/or space penalty of using
// the generic breaker is big. Extra data will only be needed for non-cased
// runes, which means there are sufficient bits left in the caseType.
// Also note that the standard breaking algorithm doesn't always make sense
// for title casing. For example, a4a -> A4a, but a"4a -> A"4A (where " stands
// for modifier \u0308).
// ICU prohibits breaking in such cases as well.
// For the purpose of title casing we use an approximation of the Unicode Word
// Breaking algorithm defined in Annex #29:
// http://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table.
//
// For our approximation, we group the Word Break types into the following
// categories, with associated rules:
//
// 1) Letter:
// ALetter, Hebrew_Letter, Numeric, ExtendNumLet, Extend.
// Rule: Never break between consecutive runes of this category.
//
// 2) Mid:
// Format, MidLetter, MidNumLet, Single_Quote.
// (Cf. case-ignorable: MidLetter, MidNumLet or cat is Mn, Me, Cf, Lm or Sk).
// Rule: Don't break between Letter and Mid, but break between two Mids.
//
// 3) Break:
// Any other category, including NewLine, CR, LF and Double_Quote. These
// categories should always result in a break between two cased letters.
// Rule: Always break.
//
// Note 1: the Katakana and MidNum categories can, in esoteric cases, result in
// preventing a break between two cased letters. For now we will ignore this
// (e.g. [ALetter] [ExtendNumLet] [Katakana] [ExtendNumLet] [ALetter] and
// [ALetter] [Numeric] [MidNum] [Numeric] [ALetter].)
//
// Note 2: the rule for Mid is very approximate, but works in most cases. To
// improve, we could store the categories in the trie value and use a FA to
// manage breaks. See TODO comment above.
//
// Note 3: according to the spec, it is possible for the Extend category to
// introduce breaks between other categories grouped in Letter. However, this
// is undesirable for our purposes. ICU prevents breaks in such cases as well.
// isBreak returns whether this rune should introduce a break.
func (c info) isBreak() bool {
return c.cccVal() == cccBreak
}
// isLetter returns whether the rune is of break type ALetter, Hebrew_Letter,
// Numeric, ExtendNumLet, or Extend.
func (c info) isLetter() bool {
ccc := c.cccVal()
if ccc == cccZero {
return !c.isCaseIgnorable()
}
return ccc != cccBreak
}

@ -1,599 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cases
// This file contains the definitions of case mappings for all supported
// languages. The rules for the language-specific tailorings were taken and
// modified from the CLDR transform definitions in common/transforms.
import (
"strings"
"unicode"
"unicode/utf8"
"golang.org/x/text/language"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)
// A mapFunc takes a context set to the current rune and writes the mapped
// version to the same context. It may advance the context to the next rune. It
// returns whether a checkpoint is possible: whether the pDst bytes written to
// dst so far won't need changing as we see more source bytes.
type mapFunc func(*context) bool
// maxIgnorable defines the maximum number of ignorables to consider for
// lookahead operations.
const maxIgnorable = 30
// supported lists the language tags for which we have tailorings.
const supported = "und af az el lt nl tr"
func init() {
tags := []language.Tag{}
for _, s := range strings.Split(supported, " ") {
tags = append(tags, language.MustParse(s))
}
matcher = language.NewMatcher(tags)
Supported = language.NewCoverage(tags)
}
var (
matcher language.Matcher
Supported language.Coverage
// We keep the following lists separate, instead of having a single per-
// language struct, to give the compiler a chance to remove unused code.
// Some uppercase mappers are stateless, so we can precompute the
// Transformers and save a bit on runtime allocations.
upperFunc = []mapFunc{
nil, // und
nil, // af
aztrUpper(upper), // az
elUpper, // el
ltUpper(upper), // lt
nil, // nl
aztrUpper(upper), // tr
}
undUpper transform.Transformer = &undUpperCaser{}
lowerFunc = []mapFunc{
lower, // und
lower, // af
aztrLower, // az
lower, // el
ltLower, // lt
lower, // nl
aztrLower, // tr
}
titleInfos = []struct {
title, lower mapFunc
rewrite func(*context)
}{
{title, lower, nil}, // und
{title, lower, afnlRewrite}, // af
{aztrUpper(title), aztrLower, nil}, // az
{title, lower, nil}, // el
{ltUpper(title), ltLower, nil}, // lt
{nlTitle, lower, afnlRewrite}, // nl
{aztrUpper(title), aztrLower, nil}, // tr
}
)
func makeUpper(t language.Tag, o options) transform.Transformer {
_, i, _ := matcher.Match(t)
f := upperFunc[i]
if f == nil {
return undUpper
}
return &simpleCaser{f: f}
}
func makeLower(t language.Tag, o options) transform.Transformer {
_, i, _ := matcher.Match(t)
f := lowerFunc[i]
if o.noFinalSigma {
return &simpleCaser{f: f}
}
return &lowerCaser{
first: f,
midWord: finalSigma(f),
}
}
func makeTitle(t language.Tag, o options) transform.Transformer {
_, i, _ := matcher.Match(t)
x := &titleInfos[i]
lower := x.lower
if o.noLower {
lower = (*context).copy
} else if !o.noFinalSigma {
lower = finalSigma(lower)
}
return &titleCaser{
title: x.title,
lower: lower,
rewrite: x.rewrite,
}
}
// TODO: consider a similar special case for the fast majority lower case. This
// is a bit more involved so will require some more precise benchmarking to
// justify it.
type undUpperCaser struct{ transform.NopResetter }
// undUpperCaser implements the Transformer interface for doing an upper case
// mapping for the root locale (und). It eliminates the need for an allocation
// as it prevents escaping by not using function pointers.
func (t *undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
c := context{dst: dst, src: src, atEOF: atEOF}
for c.next() {
upper(&c)
c.checkpoint()
}
return c.ret()
}
type simpleCaser struct {
context
f mapFunc
}
// simpleCaser implements the Transformer interface for doing a case operation
// on a rune-by-rune basis.
func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
t.context = context{dst: dst, src: src, atEOF: atEOF}
c := &t.context
for c.next() && t.f(c) {
c.checkpoint()
}
return c.ret()
}
// lowerCaser implements the Transformer interface. The default Unicode lower
// casing requires different treatment for the first and subsequent characters
// of a word, most notably to handle the Greek final Sigma.
type lowerCaser struct {
context
first, midWord mapFunc
}
func (t *lowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
t.context = context{dst: dst, src: src, atEOF: atEOF}
c := &t.context
for isInterWord := true; c.next(); {
if isInterWord {
if c.info.isCased() {
if !t.first(c) {
break
}
isInterWord = false
} else if !c.copy() {
break
}
} else {
if c.info.isNotCasedAndNotCaseIgnorable() {
if !c.copy() {
break
}
isInterWord = true
} else if !t.midWord(c) {
break
}
}
c.checkpoint()
}
return c.ret()
}
// titleCaser implements the Transformer interface. Title casing algorithms
// distinguish between the first letter of a word and subsequent letters of the
// same word. It uses state to avoid requiring a potentially infinite lookahead.
type titleCaser struct {
context
// rune mappings used by the actual casing algorithms.
title, lower mapFunc
rewrite func(*context)
}
// Transform implements the standard Unicode title case algorithm as defined in
// Chapter 3 of The Unicode Standard:
// toTitlecase(X): Find the word boundaries in X according to Unicode Standard
// Annex #29, "Unicode Text Segmentation." For each word boundary, find the
// first cased character F following the word boundary. If F exists, map F to
// Titlecase_Mapping(F); then map all characters C between F and the following
// word boundary to Lowercase_Mapping(C).
func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
t.context = context{dst: dst, src: src, atEOF: atEOF, isMidWord: t.isMidWord}
c := &t.context
if !c.next() {
return c.ret()
}
for {
p := c.info
if t.rewrite != nil {
t.rewrite(c)
}
wasMid := p.isCaseIgnorableAndNonBreakStarter()
// Break out of this loop on failure to ensure we do not modify the
// state incorrectly.
if p.isCased() && !p.isCaseIgnorableAndNotCased() {
if !c.isMidWord {
if !t.title(c) {
break
}
c.isMidWord = true
} else if !t.lower(c) {
break
}
} else if !c.copy() {
break
}
// TODO: make this an "else if" if we can prove that no rune that does
// not match the first condition of the if statement can be a break.
if p.isBreak() {
c.isMidWord = false
}
// As we save the state of the transformer, it is safe to call
// checkpoint after any successful write.
c.checkpoint()
if !c.next() {
break
}
if wasMid && c.info.isCaseIgnorableAndNonBreakStarter() {
c.isMidWord = false
}
}
return c.ret()
}
// finalSigma adds Greek final Sigma handing to another casing function. It
// determines whether a lowercased sigma should be σ or ς, by looking ahead for
// case-ignorables and a cased letters.
func finalSigma(f mapFunc) mapFunc {
return func(c *context) bool {
// ::NFD();
// # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
// Σ } [:case-ignorable:]* [:cased:] → σ;
// [:cased:] [:case-ignorable:]* { Σ → ς;
// ::Any-Lower;
// ::NFC();
if !c.hasPrefix("Σ") {
return f(c)
}
p := c.pDst
c.writeString("ς")
// We need to do one more iteration after maxIgnorable, as a cased
// letter is not an ignorable and may modify the result.
for i := 0; i < maxIgnorable+1; i++ {
if !c.next() {
return false
}
if !c.info.isCaseIgnorable() {
if c.info.isCased() {
// p+1 is guaranteed to be in bounds: if writing ς was
// successful, p+1 will contain the second byte of ς. If not,
// this function will have returned after c.next returned false.
c.dst[p+1]++ // ς → σ
}
c.unreadRune()
return true
}
// A case ignorable may also introduce a word break, so we may need
// to continue searching even after detecting a break.
c.isMidWord = c.isMidWord && !c.info.isBreak()
c.copy()
}
return true
}
}
// elUpper implements Greek upper casing, which entails removing a predefined
// set of non-blocked modifiers. Note that these accents should not be removed
// for title casing!
// Example: "Οδός" -> "ΟΔΟΣ".
func elUpper(c *context) bool {
// From CLDR:
// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Above:]]*? { [\u0313\u0314\u0301\u0300\u0306\u0342\u0308\u0304] → ;
// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Iota_Subscript:]]*? { \u0345 → ;
r, _ := utf8.DecodeRune(c.src[c.pSrc:])
oldPDst := c.pDst
if !upper(c) {
return false
}
if !unicode.Is(unicode.Greek, r) {
return true
}
i := 0
// Take the properties of the uppercased rune that is already written to the
// destination. This saves us the trouble of having to uppercase the
// decomposed rune again.
if b := norm.NFD.Properties(c.dst[oldPDst:]).Decomposition(); b != nil {
// Restore the destination position and process the decomposed rune.
r, sz := utf8.DecodeRune(b)
if r <= 0xFF { // See A.6.1
return true
}
c.pDst = oldPDst
// Insert the first rune and ignore the modifiers. See A.6.2.
c.writeBytes(b[:sz])
i = len(b[sz:]) / 2 // Greek modifiers are always of length 2.
}
for ; i < maxIgnorable && c.next(); i++ {
switch r, _ := utf8.DecodeRune(c.src[c.pSrc:]); r {
// Above and Iota Subscript
case 0x0300, // U+0300 COMBINING GRAVE ACCENT
0x0301, // U+0301 COMBINING ACUTE ACCENT
0x0304, // U+0304 COMBINING MACRON
0x0306, // U+0306 COMBINING BREVE
0x0308, // U+0308 COMBINING DIAERESIS
0x0313, // U+0313 COMBINING COMMA ABOVE
0x0314, // U+0314 COMBINING REVERSED COMMA ABOVE
0x0342, // U+0342 COMBINING GREEK PERISPOMENI
0x0345: // U+0345 COMBINING GREEK YPOGEGRAMMENI
// No-op. Gobble the modifier.
default:
switch v, _ := trie.lookup(c.src[c.pSrc:]); info(v).cccType() {
case cccZero:
c.unreadRune()
return true
// We don't need to test for IotaSubscript as the only rune that
// qualifies (U+0345) was already excluded in the switch statement
// above. See A.4.
case cccAbove:
return c.copy()
default:
// Some other modifier. We're still allowed to gobble Greek
// modifiers after this.
c.copy()
}
}
}
return i == maxIgnorable
}
func ltLower(c *context) bool {
// From CLDR:
// # Introduce an explicit dot above when lowercasing capital I's and J's
// # whenever there are more accents above.
// # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
// # 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
// # 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
// # 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
// # 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
// # 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
// # 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
// ::NFD();
// I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
// J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
// Į } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → į \u0307;
// Ì → i \u0307 \u0300;
// Í → i \u0307 \u0301;
// Ĩ → i \u0307 \u0303;
// ::Any-Lower();
// ::NFC();
i := 0
if r := c.src[c.pSrc]; r < utf8.RuneSelf {
lower(c)
if r != 'I' && r != 'J' {
return true
}
} else {
p := norm.NFD.Properties(c.src[c.pSrc:])
if d := p.Decomposition(); len(d) >= 3 && (d[0] == 'I' || d[0] == 'J') {
// UTF-8 optimization: the decomposition will only have an above
// modifier if the last rune of the decomposition is in [U+300-U+311].
// In all other cases, a decomposition starting with I is always
// an I followed by modifiers that are not cased themselves. See A.2.
if d[1] == 0xCC && d[2] <= 0x91 { // A.2.4.
if !c.writeBytes(d[:1]) {
return false
}
c.dst[c.pDst-1] += 'a' - 'A' // lower
// Assumption: modifier never changes on lowercase. See A.1.
// Assumption: all modifiers added have CCC = Above. See A.2.3.
return c.writeString("\u0307") && c.writeBytes(d[1:])
}
// In all other cases the additional modifiers will have a CCC
// that is less than 230 (Above). We will insert the U+0307, if
// needed, after these modifiers so that a string in FCD form
// will remain so. See A.2.2.
lower(c)
i = 1
} else {
return lower(c)
}
}
for ; i < maxIgnorable && c.next(); i++ {
switch c.info.cccType() {
case cccZero:
c.unreadRune()
return true
case cccAbove:
return c.writeString("\u0307") && c.copy() // See A.1.
default:
c.copy() // See A.1.
}
}
return i == maxIgnorable
}
func ltUpper(f mapFunc) mapFunc {
return func(c *context) bool {
// From CLDR:
// ::NFD();
// [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;
// ::Any-Upper();
// ::NFC();
// TODO: See A.5. A soft-dotted rune never has an exception. This would
// allow us to overload the exception bit and encode this property in
// info. Need to measure performance impact of this.
r, _ := utf8.DecodeRune(c.src[c.pSrc:])
oldPDst := c.pDst
if !f(c) {
return false
}
if !unicode.Is(unicode.Soft_Dotted, r) {
return true
}
// We don't need to do an NFD normalization, as a soft-dotted rune never
// contains U+0307. See A.3.
i := 0
for ; i < maxIgnorable && c.next(); i++ {
switch c.info.cccType() {
case cccZero:
c.unreadRune()
return true
case cccAbove:
if c.hasPrefix("\u0307") {
// We don't do a full NFC, but rather combine runes for
// some of the common cases. (Returning NFC or
// preserving normal form is neither a requirement nor
// a possibility anyway).
if !c.next() {
return false
}
if c.dst[oldPDst] == 'I' && c.pDst == oldPDst+1 && c.src[c.pSrc] == 0xcc {
s := ""
switch c.src[c.pSrc+1] {
case 0x80: // U+0300 COMBINING GRAVE ACCENT
s = "\u00cc" // U+00CC LATIN CAPITAL LETTER I WITH GRAVE
case 0x81: // U+0301 COMBINING ACUTE ACCENT
s = "\u00cd" // U+00CD LATIN CAPITAL LETTER I WITH ACUTE
case 0x83: // U+0303 COMBINING TILDE
s = "\u0128" // U+0128 LATIN CAPITAL LETTER I WITH TILDE
case 0x88: // U+0308 COMBINING DIAERESIS
s = "\u00cf" // U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS
default:
}
if s != "" {
c.pDst = oldPDst
return c.writeString(s)
}
}
}
return c.copy()
default:
c.copy()
}
}
return i == maxIgnorable
}
}
func aztrUpper(f mapFunc) mapFunc {
return func(c *context) bool {
// i→İ;
if c.src[c.pSrc] == 'i' {
return c.writeString("İ")
}
return f(c)
}
}
func aztrLower(c *context) (done bool) {
// From CLDR:
// # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
// # 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
// İ→i;
// # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
// # This matches the behavior of the canonically equivalent I-dot_above
// # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
// # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
// # 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
// I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ;
// I→ı ;
// ::Any-Lower();
if c.hasPrefix("\u0130") { // İ
return c.writeString("i")
}
if c.src[c.pSrc] != 'I' {
return lower(c)
}
// We ignore the lower-case I for now, but insert it later when we know
// which form we need.
start := c.pSrc + c.sz
i := 0
Loop:
// We check for up to n ignorables before \u0307. As \u0307 is an
// ignorable as well, n is maxIgnorable-1.
for ; i < maxIgnorable && c.next(); i++ {
switch c.info.cccType() {
case cccAbove:
if c.hasPrefix("\u0307") {
return c.writeString("i") && c.writeBytes(c.src[start:c.pSrc]) // ignore U+0307
}
done = true
break Loop
case cccZero:
c.unreadRune()
done = true
break Loop
default:
// We'll write this rune after we know which starter to use.
}
}
if i == maxIgnorable {
done = true
}
return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done
}
func nlTitle(c *context) bool {
// From CLDR:
// # Special titlecasing for Dutch initial "ij".
// ::Any-Title();
// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
if c.src[c.pSrc] != 'I' && c.src[c.pSrc] != 'i' {
return title(c)
}
if !c.writeString("I") || !c.next() {
return false
}
if c.src[c.pSrc] == 'j' || c.src[c.pSrc] == 'J' {
return c.writeString("J")
}
c.unreadRune()
return true
}
// Not part of CLDR, but see http://unicode.org/cldr/trac/ticket/7078.
func afnlRewrite(c *context) {
if c.hasPrefix("'") || c.hasPrefix("") {
c.isMidWord = true
}
}

File diff suppressed because it is too large Load Diff

@ -1,213 +0,0 @@
// This file was generated by go generate; DO NOT EDIT
package cases
// This file contains definitions for interpreting the trie value of the case
// trie generated by "go run gen*.go". It is shared by both the generator
// program and the resultant package. Sharing is achieved by the generator
// copying gen_trieval.go to trieval.go and changing what's above this comment.
// info holds case information for a single rune. It is the value returned
// by a trie lookup. Most mapping information can be stored in a single 16-bit
// value. If not, for example when a rune is mapped to multiple runes, the value
// stores some basic case data and an index into an array with additional data.
//
// The per-rune values have the following format:
//
// if (exception) {
// 15..5 unsigned exception index
// 4 unused
// } else {
// 15..8 XOR pattern or index to XOR pattern for case mapping
// Only 13..8 are used for XOR patterns.
// 7 inverseFold (fold to upper, not to lower)
// 6 index: interpret the XOR pattern as an index
// 5..4 CCC: zero (normal or break), above or other
// }
// 3 exception: interpret this value as an exception index
// (TODO: is this bit necessary? Probably implied from case mode.)
// 2..0 case mode
//
// For the non-exceptional cases, a rune must be either uncased, lowercase or
// uppercase. If the rune is cased, the XOR pattern maps either a lowercase
// rune to uppercase or an uppercase rune to lowercase (applied to the 10
// least-significant bits of the rune).
//
// See the definitions below for a more detailed description of the various
// bits.
type info uint16
const (
casedMask = 0x0003
fullCasedMask = 0x0007
ignorableMask = 0x0006
ignorableValue = 0x0004
inverseFoldBit = 1 << 7
exceptionBit = 1 << 3
exceptionShift = 5
numExceptionBits = 11
xorIndexBit = 1 << 6
xorShift = 8
// There is no mapping if all xor bits and the exception bit are zero.
hasMappingMask = 0xffc0 | exceptionBit
)
// The case mode bits encodes the case type of a rune. This includes uncased,
// title, upper and lower case and case ignorable. (For a definition of these
// terms see Chapter 3 of The Unicode Standard Core Specification.) In some rare
// cases, a rune can be both cased and case-ignorable. This is encoded by
// cIgnorableCased. A rune of this type is always lower case. Some runes are
// cased while not having a mapping.
//
// A common pattern for scripts in the Unicode standard is for upper and lower
// case runes to alternate for increasing rune values (e.g. the accented Latin
// ranges starting from U+0100 and U+1E00 among others and some Cyrillic
// characters). We use this property by defining a cXORCase mode, where the case
// mode (always upper or lower case) is derived from the rune value. As the XOR
// pattern for case mappings is often identical for successive runes, using
// cXORCase can result in large series of identical trie values. This, in turn,
// allows us to better compress the trie blocks.
const (
cUncased info = iota // 000
cTitle // 001
cLower // 010
cUpper // 011
cIgnorableUncased // 100
cIgnorableCased // 101 // lower case if mappings exist
cXORCase // 11x // case is cLower | ((rune&1) ^ x)
maxCaseMode = cUpper
)
func (c info) isCased() bool {
return c&casedMask != 0
}
func (c info) isCaseIgnorable() bool {
return c&ignorableMask == ignorableValue
}
func (c info) isCaseIgnorableAndNonBreakStarter() bool {
return c&(fullCasedMask|cccMask) == (ignorableValue | cccZero)
}
func (c info) isNotCasedAndNotCaseIgnorable() bool {
return c&fullCasedMask == 0
}
func (c info) isCaseIgnorableAndNotCased() bool {
return c&fullCasedMask == cIgnorableUncased
}
// The case mapping implementation will need to know about various Canonical
// Combining Class (CCC) values. We encode two of these in the trie value:
// cccZero (0) and cccAbove (230). If the value is cccOther, it means that
// CCC(r) > 0, but not 230. A value of cccBreak means that CCC(r) == 0 and that
// the rune also has the break category Break (see below).
const (
cccBreak info = iota << 4
cccZero
cccAbove
cccOther
cccMask = cccBreak | cccZero | cccAbove | cccOther
)
const (
starter = 0
above = 230
iotaSubscript = 240
)
// The exceptions slice holds data that does not fit in a normal info entry.
// The entry is pointed to by the exception index in an entry. It has the
// following format:
//
// Header
// byte 0:
// 7..6 unused
// 5..4 CCC type (same bits as entry)
// 3 unused
// 2..0 length of fold
//
// byte 1:
// 7..6 unused
// 5..3 length of 1st mapping of case type
// 2..0 length of 2nd mapping of case type
//
// case 1st 2nd
// lower -> upper, title
// upper -> lower, title
// title -> lower, upper
//
// Lengths with the value 0x7 indicate no value and implies no change.
// A length of 0 indicates a mapping to zero-length string.
//
// Body bytes:
// case folding bytes
// lowercase mapping bytes
// uppercase mapping bytes
// titlecase mapping bytes
// closure mapping bytes (for NFKC_Casefold). (TODO)
//
// Fallbacks:
// missing fold -> lower
// missing title -> upper
// all missing -> original rune
//
// exceptions starts with a dummy byte to enforce that there is no zero index
// value.
const (
lengthMask = 0x07
lengthBits = 3
noChange = 0
)
// References to generated trie.
var trie = newCaseTrie(0)
var sparse = sparseBlocks{
values: sparseValues[:],
offsets: sparseOffsets[:],
}
// Sparse block lookup code.
// valueRange is an entry in a sparse block.
type valueRange struct {
value uint16
lo, hi byte
}
type sparseBlocks struct {
values []valueRange
offsets []uint16
}
// lookup returns the value from values block n for byte b using binary search.
func (s *sparseBlocks) lookup(n uint32, b byte) uint16 {
lo := s.offsets[n]
hi := s.offsets[n+1]
for lo < hi {
m := lo + (hi-lo)/2
r := s.values[m]
if r.lo <= b && b <= r.hi {
return r.value
}
if b < r.lo {
hi = m
} else {
lo = m + 1
}
}
return 0
}
// lastRuneForTesting is the last rune used for testing. Everything after this
// is boring.
const lastRuneForTesting = rune(0x1FFFF)

@ -1,27 +0,0 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -1,500 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"bufio"
"fmt"
"log"
"net/http"
"sort"
"strings"
"unicode/utf8"
"golang.org/x/text/encoding"
"golang.org/x/text/internal/gen"
)
const ascii = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" +
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
` !"#$%&'()*+,-./0123456789:;<=>?` +
`@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` +
"`abcdefghijklmnopqrstuvwxyz{|}~\u007f"
var encodings = []struct {
name string
mib string
comment string
varName string
replacement byte
mapping string
}{
{
"IBM Code Page 437",
"PC8CodePage437",
"",
"CodePage437",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM437-2.1.2.ucm",
},
{
"IBM Code Page 850",
"PC850Multilingual",
"",
"CodePage850",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM850-2.1.2.ucm",
},
{
"IBM Code Page 852",
"PCp852",
"",
"CodePage852",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM852-2.1.2.ucm",
},
{
"IBM Code Page 855",
"IBM855",
"",
"CodePage855",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM855-2.1.2.ucm",
},
{
"Windows Code Page 858", // PC latin1 with Euro
"IBM00858",
"",
"CodePage858",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/windows-858-2000.ucm",
},
{
"IBM Code Page 862",
"PC862LatinHebrew",
"",
"CodePage862",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM862-2.1.2.ucm",
},
{
"IBM Code Page 866",
"IBM866",
"",
"CodePage866",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-ibm866.txt",
},
{
"ISO 8859-1",
"ISOLatin1",
"",
"ISO8859_1",
encoding.ASCIISub,
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_1-1998.ucm",
},
{
"ISO 8859-2",
"ISOLatin2",
"",
"ISO8859_2",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-2.txt",
},
{
"ISO 8859-3",
"ISOLatin3",
"",
"ISO8859_3",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-3.txt",
},
{
"ISO 8859-4",
"ISOLatin4",
"",
"ISO8859_4",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-4.txt",
},
{
"ISO 8859-5",
"ISOLatinCyrillic",
"",
"ISO8859_5",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-5.txt",
},
{
"ISO 8859-6",
"ISOLatinArabic",
"",
"ISO8859_6,ISO8859_6E,ISO8859_6I",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-6.txt",
},
{
"ISO 8859-7",
"ISOLatinGreek",
"",
"ISO8859_7",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-7.txt",
},
{
"ISO 8859-8",
"ISOLatinHebrew",
"",
"ISO8859_8,ISO8859_8E,ISO8859_8I",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-8.txt",
},
{
"ISO 8859-10",
"ISOLatin6",
"",
"ISO8859_10",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-10.txt",
},
{
"ISO 8859-13",
"ISO885913",
"",
"ISO8859_13",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-13.txt",
},
{
"ISO 8859-14",
"ISO885914",
"",
"ISO8859_14",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-14.txt",
},
{
"ISO 8859-15",
"ISO885915",
"",
"ISO8859_15",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-15.txt",
},
{
"ISO 8859-16",
"ISO885916",
"",
"ISO8859_16",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-iso-8859-16.txt",
},
{
"KOI8-R",
"KOI8R",
"",
"KOI8R",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-koi8-r.txt",
},
{
"KOI8-U",
"KOI8U",
"",
"KOI8U",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-koi8-u.txt",
},
{
"Macintosh",
"Macintosh",
"",
"Macintosh",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-macintosh.txt",
},
{
"Macintosh Cyrillic",
"MacintoshCyrillic",
"",
"MacintoshCyrillic",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-x-mac-cyrillic.txt",
},
{
"Windows 874",
"Windows874",
"",
"Windows874",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-874.txt",
},
{
"Windows 1250",
"Windows1250",
"",
"Windows1250",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1250.txt",
},
{
"Windows 1251",
"Windows1251",
"",
"Windows1251",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1251.txt",
},
{
"Windows 1252",
"Windows1252",
"",
"Windows1252",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1252.txt",
},
{
"Windows 1253",
"Windows1253",
"",
"Windows1253",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1253.txt",
},
{
"Windows 1254",
"Windows1254",
"",
"Windows1254",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1254.txt",
},
{
"Windows 1255",
"Windows1255",
"",
"Windows1255",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1255.txt",
},
{
"Windows 1256",
"Windows1256",
"",
"Windows1256",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1256.txt",
},
{
"Windows 1257",
"Windows1257",
"",
"Windows1257",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1257.txt",
},
{
"Windows 1258",
"Windows1258",
"",
"Windows1258",
encoding.ASCIISub,
"http://encoding.spec.whatwg.org/index-windows-1258.txt",
},
{
"X-User-Defined",
"XUserDefined",
"It is defined at http://encoding.spec.whatwg.org/#x-user-defined",
"XUserDefined",
encoding.ASCIISub,
ascii +
"\uf780\uf781\uf782\uf783\uf784\uf785\uf786\uf787" +
"\uf788\uf789\uf78a\uf78b\uf78c\uf78d\uf78e\uf78f" +
"\uf790\uf791\uf792\uf793\uf794\uf795\uf796\uf797" +
"\uf798\uf799\uf79a\uf79b\uf79c\uf79d\uf79e\uf79f" +
"\uf7a0\uf7a1\uf7a2\uf7a3\uf7a4\uf7a5\uf7a6\uf7a7" +
"\uf7a8\uf7a9\uf7aa\uf7ab\uf7ac\uf7ad\uf7ae\uf7af" +
"\uf7b0\uf7b1\uf7b2\uf7b3\uf7b4\uf7b5\uf7b6\uf7b7" +
"\uf7b8\uf7b9\uf7ba\uf7bb\uf7bc\uf7bd\uf7be\uf7bf" +
"\uf7c0\uf7c1\uf7c2\uf7c3\uf7c4\uf7c5\uf7c6\uf7c7" +
"\uf7c8\uf7c9\uf7ca\uf7cb\uf7cc\uf7cd\uf7ce\uf7cf" +
"\uf7d0\uf7d1\uf7d2\uf7d3\uf7d4\uf7d5\uf7d6\uf7d7" +
"\uf7d8\uf7d9\uf7da\uf7db\uf7dc\uf7dd\uf7de\uf7df" +
"\uf7e0\uf7e1\uf7e2\uf7e3\uf7e4\uf7e5\uf7e6\uf7e7" +
"\uf7e8\uf7e9\uf7ea\uf7eb\uf7ec\uf7ed\uf7ee\uf7ef" +
"\uf7f0\uf7f1\uf7f2\uf7f3\uf7f4\uf7f5\uf7f6\uf7f7" +
"\uf7f8\uf7f9\uf7fa\uf7fb\uf7fc\uf7fd\uf7fe\uf7ff",
},
}
func getWHATWG(url string) string {
res, err := http.Get(url)
if err != nil {
log.Fatalf("%q: Get: %v", url, err)
}
defer res.Body.Close()
mapping := make([]rune, 128)
for i := range mapping {
mapping[i] = '\ufffd'
}
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
x, y := 0, 0
if _, err := fmt.Sscanf(s, "%d\t0x%x", &x, &y); err != nil {
log.Fatalf("could not parse %q", s)
}
if x < 0 || 128 <= x {
log.Fatalf("code %d is out of range", x)
}
if 0x80 <= y && y < 0xa0 {
// We diverge from the WHATWG spec by mapping control characters
// in the range [0x80, 0xa0) to U+FFFD.
continue
}
mapping[x] = rune(y)
}
return ascii + string(mapping)
}
func getUCM(url string) string {
res, err := http.Get(url)
if err != nil {
log.Fatalf("%q: Get: %v", url, err)
}
defer res.Body.Close()
mapping := make([]rune, 256)
for i := range mapping {
mapping[i] = '\ufffd'
}
charsFound := 0
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
var c byte
var r rune
if _, err := fmt.Sscanf(s, `<U%x> \x%x |0`, &r, &c); err != nil {
continue
}
mapping[c] = r
charsFound++
}
if charsFound < 200 {
log.Fatalf("%q: only %d characters found (wrong page format?)", url, charsFound)
}
return string(mapping)
}
func main() {
mibs := map[string]bool{}
all := []string{}
w := gen.NewCodeWriter()
defer w.WriteGoFile("tables.go", "charmap")
printf := func(s string, a ...interface{}) { fmt.Fprintf(w, s, a...) }
printf("import (\n")
printf("\t\"golang.org/x/text/encoding\"\n")
printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n")
printf(")\n\n")
for _, e := range encodings {
varNames := strings.Split(e.varName, ",")
all = append(all, varNames...)
varName := varNames[0]
switch {
case strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/"):
e.mapping = getWHATWG(e.mapping)
case strings.HasPrefix(e.mapping, "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/"):
e.mapping = getUCM(e.mapping)
}
asciiSuperset, low := strings.HasPrefix(e.mapping, ascii), 0x00
if asciiSuperset {
low = 0x80
}
lvn := 1
if strings.HasPrefix(varName, "ISO") || strings.HasPrefix(varName, "KOI") {
lvn = 3
}
lowerVarName := strings.ToLower(varName[:lvn]) + varName[lvn:]
printf("// %s is the %s encoding.\n", varName, e.name)
if e.comment != "" {
printf("//\n// %s\n", e.comment)
}
printf("var %s encoding.Encoding = &%s\n\nvar %s = charmap{\nname: %q,\n",
varName, lowerVarName, lowerVarName, e.name)
if mibs[e.mib] {
log.Fatalf("MIB type %q declared multiple times.", e.mib)
}
printf("mib: identifier.%s,\n", e.mib)
printf("asciiSuperset: %t,\n", asciiSuperset)
printf("low: 0x%02x,\n", low)
printf("replacement: 0x%02x,\n", e.replacement)
printf("decode: [256]utf8Enc{\n")
i, backMapping := 0, map[rune]byte{}
for _, c := range e.mapping {
if _, ok := backMapping[c]; !ok && c != utf8.RuneError {
backMapping[c] = byte(i)
}
var buf [8]byte
n := utf8.EncodeRune(buf[:], c)
if n > 3 {
panic(fmt.Sprintf("rune %q (%U) is too long", c, c))
}
printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2])
if i%2 == 1 {
printf("\n")
}
i++
}
printf("},\n")
printf("encode: [256]uint32{\n")
encode := make([]uint32, 0, 256)
for c, i := range backMapping {
encode = append(encode, uint32(i)<<24|uint32(c))
}
sort.Sort(byRune(encode))
for len(encode) < cap(encode) {
encode = append(encode, encode[len(encode)-1])
}
for i, enc := range encode {
printf("0x%08x,", enc)
if i%8 == 7 {
printf("\n")
}
}
printf("},\n}\n")
// Add an estimate of the size of a single charmap{} struct value, which
// includes two 256 elem arrays of 4 bytes and some extra fields, which
// align to 3 uint64s on 64-bit architectures.
w.Size += 2*4*256 + 3*8
}
// TODO: add proper line breaking.
printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n"))
}
type byRune []uint32
func (b byRune) Len() int { return len(b) }
func (b byRune) Less(i, j int) bool { return b[i]&0xffffff < b[j]&0xffffff }
func (b byRune) Swap(i, j int) { b[i], b[j] = b[j], b[i] }

@ -1,167 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"bytes"
"encoding/json"
"fmt"
"log"
"strings"
"golang.org/x/text/internal/gen"
)
type group struct {
Encodings []struct {
Labels []string
Name string
}
}
func main() {
gen.Init()
r := gen.Open("http://www.w3.org/TR", "w3", "encoding/indexes/encodings.json")
var groups []group
if err := json.NewDecoder(r).Decode(&groups); err != nil {
log.Fatalf("Error reading encodings.json: %v", err)
}
w := &bytes.Buffer{}
fmt.Fprintln(w, "type htmlEncoding byte")
fmt.Fprintln(w, "const (")
for i, g := range groups {
for _, e := range g.Encodings {
name := consts[e.Name]
if name == "" {
log.Fatalf("No const defined for %s.", e.Name)
}
if i == 0 {
fmt.Fprintf(w, "%s htmlEncoding = iota\n", name)
} else {
fmt.Fprintf(w, "%s\n", name)
}
}
}
fmt.Fprintln(w, "numEncodings")
fmt.Fprint(w, ")\n\n")
fmt.Fprintln(w, "var canonical = [numEncodings]string{")
for _, g := range groups {
for _, e := range g.Encodings {
fmt.Fprintf(w, "%q,\n", e.Name)
}
}
fmt.Fprint(w, "}\n\n")
fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{")
for _, g := range groups {
for _, e := range g.Encodings {
for _, l := range e.Labels {
fmt.Fprintf(w, "%q: %s,\n", l, consts[e.Name])
}
}
}
fmt.Fprint(w, "}\n\n")
var tags []string
fmt.Fprintln(w, "var localeMap = []htmlEncoding{")
for _, loc := range locales {
tags = append(tags, loc.tag)
fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag)
}
fmt.Fprint(w, "}\n\n")
fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " "))
gen.WriteGoFile("tables.go", "htmlindex", w.Bytes())
}
// consts maps canonical encoding name to internal constant.
var consts = map[string]string{
"utf-8": "utf8",
"ibm866": "ibm866",
"iso-8859-2": "iso8859_2",
"iso-8859-3": "iso8859_3",
"iso-8859-4": "iso8859_4",
"iso-8859-5": "iso8859_5",
"iso-8859-6": "iso8859_6",
"iso-8859-7": "iso8859_7",
"iso-8859-8": "iso8859_8",
"iso-8859-8-i": "iso8859_8I",
"iso-8859-10": "iso8859_10",
"iso-8859-13": "iso8859_13",
"iso-8859-14": "iso8859_14",
"iso-8859-15": "iso8859_15",
"iso-8859-16": "iso8859_16",
"koi8-r": "koi8r",
"koi8-u": "koi8u",
"macintosh": "macintosh",
"windows-874": "windows874",
"windows-1250": "windows1250",
"windows-1251": "windows1251",
"windows-1252": "windows1252",
"windows-1253": "windows1253",
"windows-1254": "windows1254",
"windows-1255": "windows1255",
"windows-1256": "windows1256",
"windows-1257": "windows1257",
"windows-1258": "windows1258",
"x-mac-cyrillic": "macintoshCyrillic",
"gbk": "gbk",
"gb18030": "gb18030",
// "hz-gb-2312": "hzgb2312", // Was removed from WhatWG
"big5": "big5",
"euc-jp": "eucjp",
"iso-2022-jp": "iso2022jp",
"shift_jis": "shiftJIS",
"euc-kr": "euckr",
"replacement": "replacement",
"utf-16be": "utf16be",
"utf-16le": "utf16le",
"x-user-defined": "xUserDefined",
}
// locales is taken from
// https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm.
var locales = []struct{ tag, name string }{
{"und", "windows-1252"}, // The default value.
{"ar", "windows-1256"},
{"ba", "windows-1251"},
{"be", "windows-1251"},
{"bg", "windows-1251"},
{"cs", "windows-1250"},
{"el", "iso-8859-7"},
{"et", "windows-1257"},
{"fa", "windows-1256"},
{"he", "windows-1255"},
{"hr", "windows-1250"},
{"hu", "iso-8859-2"},
{"ja", "shift_jis"},
{"kk", "windows-1251"},
{"ko", "euc-kr"},
{"ku", "windows-1254"},
{"ky", "windows-1251"},
{"lt", "windows-1257"},
{"lv", "windows-1257"},
{"mk", "windows-1251"},
{"pl", "iso-8859-2"},
{"ru", "windows-1251"},
{"sah", "windows-1251"},
{"sk", "windows-1250"},
{"sl", "iso-8859-2"},
{"sr", "windows-1251"},
{"tg", "windows-1251"},
{"th", "windows-874"},
{"tr", "windows-1254"},
{"tt", "windows-1251"},
{"uk", "windows-1251"},
{"vi", "windows-1258"},
{"zh-hans", "gb18030"},
{"zh-hant", "big5"},
}

@ -1,64 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package ianaindex maps names to Encodings as specified by the IANA registry.
// This includes both the MIME and IANA names.
//
// See http://www.iana.org/assignments/character-sets/character-sets.xhtml for
// more details.
package ianaindex
import (
"golang.org/x/text/encoding"
)
// TODO: allow users to specify their own aliases?
// TODO: allow users to specify their own indexes?
// TODO: allow canonicalizing names
// NOTE: only use these top-level variables if we can get the linker to drop
// the indexes when they are not used. Make them a function or perhaps only
// support MIME otherwise.
var (
// MIME is an index to map MIME names. It does not support aliases.
MIME *Index
// IANA is an index that supports all names and aliases using IANA names as
// the canonical identifier.
IANA *Index
)
// Index maps names registered by IANA to Encodings.
type Index struct {
}
// Get returns an Encoding for IANA-registered names. Matching is
// case-insensitive.
func (x *Index) Get(name string) (encoding.Encoding, error) {
panic("TODO: implement")
}
// Name reports the canonical name of the given Encoding. It will return an
// error if the e is not associated with a known encoding scheme.
func (x *Index) Name(e encoding.Encoding) (string, error) {
panic("TODO: implement")
}
// TODO: the coverage of this index is rather spotty. Allowing users to set
// encodings would allow:
// - users to increase coverage
// - allow a partially loaded set of encodings in case the user doesn't need to
// them all.
// - write an OS-specific wrapper for supported encodings and set them.
// The exact definition of Set depends a bit on if and how we want to let users
// write their own Encoding implementations. Also, it is not possible yet to
// only partially load the encodings without doing some refactoring. Until this
// is solved, we might as well not support Set.
// // Set sets the e to be used for the encoding scheme identified by name. Only
// // canonical names may be used. An empty name assigns e to its internally
// // associated encoding scheme.
// func (x *Index) Set(name string, e encoding.Encoding) error {
// panic("TODO: implement")
// }

@ -1,137 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"bytes"
"encoding/xml"
"fmt"
"io"
"log"
"strings"
"golang.org/x/text/internal/gen"
)
type registry struct {
XMLName xml.Name `xml:"registry"`
Updated string `xml:"updated"`
Registry []struct {
ID string `xml:"id,attr"`
Record []struct {
Name string `xml:"name"`
Xref []struct {
Type string `xml:"type,attr"`
Data string `xml:"data,attr"`
} `xml:"xref"`
Desc struct {
Data string `xml:",innerxml"`
// Any []struct {
// Data string `xml:",chardata"`
// } `xml:",any"`
// Data string `xml:",chardata"`
} `xml:"description,"`
MIB string `xml:"value"`
Alias []string `xml:"alias"`
MIME string `xml:"preferred_alias"`
} `xml:"record"`
} `xml:"registry"`
}
func main() {
r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml")
reg := &registry{}
if err := xml.NewDecoder(r).Decode(&reg); err != nil && err != io.EOF {
log.Fatalf("Error decoding charset registry: %v", err)
}
if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" {
log.Fatalf("Unexpected ID %s", reg.Registry[0].ID)
}
w := &bytes.Buffer{}
fmt.Fprintf(w, "const (\n")
for _, rec := range reg.Registry[0].Record {
constName := ""
for _, a := range rec.Alias {
if strings.HasPrefix(a, "cs") && strings.IndexByte(a, '-') == -1 {
// Some of the constant definitions have comments in them. Strip those.
constName = strings.Title(strings.SplitN(a[2:], "\n", 2)[0])
}
}
if constName == "" {
switch rec.MIB {
case "2085":
constName = "HZGB2312" // Not listed as alias for some reason.
default:
log.Fatalf("No cs alias defined for %s.", rec.MIB)
}
}
if rec.MIME != "" {
rec.MIME = fmt.Sprintf(" (MIME: %s)", rec.MIME)
}
fmt.Fprintf(w, "// %s is the MIB identifier with IANA name %s%s.\n//\n", constName, rec.Name, rec.MIME)
if len(rec.Desc.Data) > 0 {
fmt.Fprint(w, "// ")
d := xml.NewDecoder(strings.NewReader(rec.Desc.Data))
inElem := true
attr := ""
for {
t, err := d.Token()
if err != nil {
if err != io.EOF {
log.Fatal(err)
}
break
}
switch x := t.(type) {
case xml.CharData:
attr = "" // Don't need attribute info.
a := bytes.Split([]byte(x), []byte("\n"))
for i, b := range a {
if b = bytes.TrimSpace(b); len(b) != 0 {
if !inElem && i > 0 {
fmt.Fprint(w, "\n// ")
}
inElem = false
fmt.Fprintf(w, "%s ", string(b))
}
}
case xml.StartElement:
if x.Name.Local == "xref" {
inElem = true
use := false
for _, a := range x.Attr {
if a.Name.Local == "type" {
use = use || a.Value != "person"
}
if a.Name.Local == "data" && use {
attr = a.Value + " "
}
}
}
case xml.EndElement:
inElem = false
fmt.Fprint(w, attr)
}
}
fmt.Fprint(w, "\n")
}
for _, x := range rec.Xref {
switch x.Type {
case "rfc":
fmt.Fprintf(w, "// Reference: %s\n", strings.ToUpper(x.Data))
case "uri":
fmt.Fprintf(w, "// Reference: %s\n", x.Data)
}
}
fmt.Fprintf(w, "%s MIB = %s\n", constName, rec.MIB)
fmt.Fprintln(w)
}
fmt.Fprintln(w, ")")
gen.WriteGoFile("mib.go", "identifier", w.Bytes())
}

@ -1,161 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This program generates tables.go:
// go run maketables.go | gofmt > tables.go
// TODO: Emoji extensions?
// http://www.unicode.org/faq/emoji_dingbats.html
// http://www.unicode.org/Public/UNIDATA/EmojiSources.txt
import (
"bufio"
"fmt"
"log"
"net/http"
"sort"
"strings"
)
type entry struct {
jisCode, table int
}
func main() {
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
fmt.Printf("// Package japanese provides Japanese encodings such as EUC-JP and Shift JIS.\n")
fmt.Printf(`package japanese // import "golang.org/x/text/encoding/japanese"` + "\n\n")
reverse := [65536]entry{}
for i := range reverse {
reverse[i].table = -1
}
tables := []struct {
url string
name string
}{
{"http://encoding.spec.whatwg.org/index-jis0208.txt", "0208"},
{"http://encoding.spec.whatwg.org/index-jis0212.txt", "0212"},
}
for i, table := range tables {
res, err := http.Get(table.url)
if err != nil {
log.Fatalf("%q: Get: %v", table.url, err)
}
defer res.Body.Close()
mapping := [65536]uint16{}
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
x, y := 0, uint16(0)
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
log.Fatalf("%q: could not parse %q", table.url, s)
}
if x < 0 || 120*94 <= x {
log.Fatalf("%q: JIS code %d is out of range", table.url, x)
}
mapping[x] = y
if reverse[y].table == -1 {
reverse[y] = entry{jisCode: x, table: i}
}
}
if err := scanner.Err(); err != nil {
log.Fatalf("%q: scanner error: %v", table.url, err)
}
fmt.Printf("// jis%sDecode is the decoding table from JIS %s code to Unicode.\n// It is defined at %s\n",
table.name, table.name, table.url)
fmt.Printf("var jis%sDecode = [...]uint16{\n", table.name)
for i, m := range mapping {
if m != 0 {
fmt.Printf("\t%d: 0x%04X,\n", i, m)
}
}
fmt.Printf("}\n\n")
}
// Any run of at least separation continuous zero entries in the reverse map will
// be a separate encode table.
const separation = 1024
intervals := []interval(nil)
low, high := -1, -1
for i, v := range reverse {
if v.table == -1 {
continue
}
if low < 0 {
low = i
} else if i-high >= separation {
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
low = i
}
high = i + 1
}
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
sort.Sort(byDecreasingLength(intervals))
fmt.Printf("const (\n")
fmt.Printf("\tjis0208 = 1\n")
fmt.Printf("\tjis0212 = 2\n")
fmt.Printf("\tcodeMask = 0x7f\n")
fmt.Printf("\tcodeShift = 7\n")
fmt.Printf("\ttableShift = 14\n")
fmt.Printf(")\n\n")
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
fmt.Printf("// encodeX are the encoding tables from Unicode to JIS code,\n")
fmt.Printf("// sorted by decreasing length.\n")
for i, v := range intervals {
fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
}
fmt.Printf("//\n")
fmt.Printf("// The high two bits of the value record whether the JIS code comes from the\n")
fmt.Printf("// JIS0208 table (high bits == 1) or the JIS0212 table (high bits == 2).\n")
fmt.Printf("// The low 14 bits are two 7-bit unsigned integers j1 and j2 that form the\n")
fmt.Printf("// JIS code (94*j1 + j2) within that table.\n")
fmt.Printf("\n")
for i, v := range intervals {
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
fmt.Printf("var encode%d = [...]uint16{\n", i)
for j := v.low; j < v.high; j++ {
x := reverse[j]
if x.table == -1 {
continue
}
fmt.Printf("\t%d - %d: jis%s<<14 | 0x%02X<<7 | 0x%02X,\n",
j, v.low, tables[x.table].name, x.jisCode/94, x.jisCode%94)
}
fmt.Printf("}\n\n")
}
}
// interval is a half-open interval [low, high).
type interval struct {
low, high int
}
func (i interval) len() int { return i.high - i.low }
// byDecreasingLength sorts intervals by decreasing length.
type byDecreasingLength []interval
func (b byDecreasingLength) Len() int { return len(b) }
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }

@ -1,143 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This program generates tables.go:
// go run maketables.go | gofmt > tables.go
import (
"bufio"
"fmt"
"log"
"net/http"
"sort"
"strings"
)
func main() {
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
fmt.Printf("// Package korean provides Korean encodings such as EUC-KR.\n")
fmt.Printf(`package korean // import "golang.org/x/text/encoding/korean"` + "\n\n")
res, err := http.Get("http://encoding.spec.whatwg.org/index-euc-kr.txt")
if err != nil {
log.Fatalf("Get: %v", err)
}
defer res.Body.Close()
mapping := [65536]uint16{}
reverse := [65536]uint16{}
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
x, y := uint16(0), uint16(0)
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
log.Fatalf("could not parse %q", s)
}
if x < 0 || 178*(0xc7-0x81)+(0xfe-0xc7)*94+(0xff-0xa1) <= x {
log.Fatalf("EUC-KR code %d is out of range", x)
}
mapping[x] = y
if reverse[y] == 0 {
c0, c1 := uint16(0), uint16(0)
if x < 178*(0xc7-0x81) {
c0 = uint16(x/178) + 0x81
c1 = uint16(x % 178)
switch {
case c1 < 1*26:
c1 += 0x41
case c1 < 2*26:
c1 += 0x47
default:
c1 += 0x4d
}
} else {
x -= 178 * (0xc7 - 0x81)
c0 = uint16(x/94) + 0xc7
c1 = uint16(x%94) + 0xa1
}
reverse[y] = c0<<8 | c1
}
}
if err := scanner.Err(); err != nil {
log.Fatalf("scanner error: %v", err)
}
fmt.Printf("// decode is the decoding table from EUC-KR code to Unicode.\n")
fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-euc-kr.txt\n")
fmt.Printf("var decode = [...]uint16{\n")
for i, v := range mapping {
if v != 0 {
fmt.Printf("\t%d: 0x%04X,\n", i, v)
}
}
fmt.Printf("}\n\n")
// Any run of at least separation continuous zero entries in the reverse map will
// be a separate encode table.
const separation = 1024
intervals := []interval(nil)
low, high := -1, -1
for i, v := range reverse {
if v == 0 {
continue
}
if low < 0 {
low = i
} else if i-high >= separation {
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
low = i
}
high = i + 1
}
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
sort.Sort(byDecreasingLength(intervals))
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
fmt.Printf("// encodeX are the encoding tables from Unicode to EUC-KR code,\n")
fmt.Printf("// sorted by decreasing length.\n")
for i, v := range intervals {
fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
}
fmt.Printf("\n")
for i, v := range intervals {
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
fmt.Printf("var encode%d = [...]uint16{\n", i)
for j := v.low; j < v.high; j++ {
x := reverse[j]
if x == 0 {
continue
}
fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
}
fmt.Printf("}\n\n")
}
}
// interval is a half-open interval [low, high).
type interval struct {
low, high int
}
func (i interval) len() int { return i.high - i.low }
// byDecreasingLength sorts intervals by decreasing length.
type byDecreasingLength []interval
func (b byDecreasingLength) Len() int { return len(b) }
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }

@ -1,161 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This program generates tables.go:
// go run maketables.go | gofmt > tables.go
import (
"bufio"
"fmt"
"log"
"net/http"
"sort"
"strings"
)
func main() {
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
fmt.Printf("// Package simplifiedchinese provides Simplified Chinese encodings such as GBK.\n")
fmt.Printf(`package simplifiedchinese // import "golang.org/x/text/encoding/simplifiedchinese"` + "\n\n")
printGB18030()
printGBK()
}
func printGB18030() {
res, err := http.Get("http://encoding.spec.whatwg.org/index-gb18030.txt")
if err != nil {
log.Fatalf("Get: %v", err)
}
defer res.Body.Close()
fmt.Printf("// gb18030 is the table from http://encoding.spec.whatwg.org/index-gb18030.txt\n")
fmt.Printf("var gb18030 = [...][2]uint16{\n")
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
x, y := uint32(0), uint32(0)
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
log.Fatalf("could not parse %q", s)
}
if x < 0x10000 && y < 0x10000 {
fmt.Printf("\t{0x%04x, 0x%04x},\n", x, y)
}
}
fmt.Printf("}\n\n")
}
func printGBK() {
res, err := http.Get("http://encoding.spec.whatwg.org/index-gbk.txt")
if err != nil {
log.Fatalf("Get: %v", err)
}
defer res.Body.Close()
mapping := [65536]uint16{}
reverse := [65536]uint16{}
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
x, y := uint16(0), uint16(0)
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
log.Fatalf("could not parse %q", s)
}
if x < 0 || 126*190 <= x {
log.Fatalf("GBK code %d is out of range", x)
}
mapping[x] = y
if reverse[y] == 0 {
c0, c1 := x/190, x%190
if c1 >= 0x3f {
c1++
}
reverse[y] = (0x81+c0)<<8 | (0x40 + c1)
}
}
if err := scanner.Err(); err != nil {
log.Fatalf("scanner error: %v", err)
}
fmt.Printf("// decode is the decoding table from GBK code to Unicode.\n")
fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-gbk.txt\n")
fmt.Printf("var decode = [...]uint16{\n")
for i, v := range mapping {
if v != 0 {
fmt.Printf("\t%d: 0x%04X,\n", i, v)
}
}
fmt.Printf("}\n\n")
// Any run of at least separation continuous zero entries in the reverse map will
// be a separate encode table.
const separation = 1024
intervals := []interval(nil)
low, high := -1, -1
for i, v := range reverse {
if v == 0 {
continue
}
if low < 0 {
low = i
} else if i-high >= separation {
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
low = i
}
high = i + 1
}
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
sort.Sort(byDecreasingLength(intervals))
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
fmt.Printf("// encodeX are the encoding tables from Unicode to GBK code,\n")
fmt.Printf("// sorted by decreasing length.\n")
for i, v := range intervals {
fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
}
fmt.Printf("\n")
for i, v := range intervals {
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
fmt.Printf("var encode%d = [...]uint16{\n", i)
for j := v.low; j < v.high; j++ {
x := reverse[j]
if x == 0 {
continue
}
fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
}
fmt.Printf("}\n\n")
}
}
// interval is a half-open interval [low, high).
type interval struct {
low, high int
}
func (i interval) len() int { return i.high - i.low }
// byDecreasingLength sorts intervals by decreasing length.
type byDecreasingLength []interval
func (b byDecreasingLength) Len() int { return len(b) }
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }

@ -1,140 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This program generates tables.go:
// go run maketables.go | gofmt > tables.go
import (
"bufio"
"fmt"
"log"
"net/http"
"sort"
"strings"
)
func main() {
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
fmt.Printf("// Package traditionalchinese provides Traditional Chinese encodings such as Big5.\n")
fmt.Printf(`package traditionalchinese // import "golang.org/x/text/encoding/traditionalchinese"` + "\n\n")
res, err := http.Get("http://encoding.spec.whatwg.org/index-big5.txt")
if err != nil {
log.Fatalf("Get: %v", err)
}
defer res.Body.Close()
mapping := [65536]uint32{}
reverse := [65536 * 4]uint16{}
scanner := bufio.NewScanner(res.Body)
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if s == "" || s[0] == '#' {
continue
}
x, y := uint16(0), uint32(0)
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
log.Fatalf("could not parse %q", s)
}
if x < 0 || 126*157 <= x {
log.Fatalf("Big5 code %d is out of range", x)
}
mapping[x] = y
// The WHATWG spec http://encoding.spec.whatwg.org/#indexes says that
// "The index pointer for code point in index is the first pointer
// corresponding to code point in index", which would normally mean
// that the code below should be guarded by "if reverse[y] == 0", but
// last instead of first seems to match the behavior of
// "iconv -f UTF-8 -t BIG5". For example, U+8005 者 occurs twice in
// http://encoding.spec.whatwg.org/index-big5.txt, as index 2148
// (encoded as "\x8e\xcd") and index 6543 (encoded as "\xaa\xcc")
// and "echo 者 | iconv -f UTF-8 -t BIG5 | xxd" gives "\xaa\xcc".
c0, c1 := x/157, x%157
if c1 < 0x3f {
c1 += 0x40
} else {
c1 += 0x62
}
reverse[y] = (0x81+c0)<<8 | c1
}
if err := scanner.Err(); err != nil {
log.Fatalf("scanner error: %v", err)
}
fmt.Printf("// decode is the decoding table from Big5 code to Unicode.\n")
fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-big5.txt\n")
fmt.Printf("var decode = [...]uint32{\n")
for i, v := range mapping {
if v != 0 {
fmt.Printf("\t%d: 0x%08X,\n", i, v)
}
}
fmt.Printf("}\n\n")
// Any run of at least separation continuous zero entries in the reverse map will
// be a separate encode table.
const separation = 1024
intervals := []interval(nil)
low, high := -1, -1
for i, v := range reverse {
if v == 0 {
continue
}
if low < 0 {
low = i
} else if i-high >= separation {
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
low = i
}
high = i + 1
}
if high >= 0 {
intervals = append(intervals, interval{low, high})
}
sort.Sort(byDecreasingLength(intervals))
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
fmt.Printf("// encodeX are the encoding tables from Unicode to Big5 code,\n")
fmt.Printf("// sorted by decreasing length.\n")
for i, v := range intervals {
fmt.Printf("// encode%d: %5d entries for runes in [%6d, %6d).\n", i, v.len(), v.low, v.high)
}
fmt.Printf("\n")
for i, v := range intervals {
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
fmt.Printf("var encode%d = [...]uint16{\n", i)
for j := v.low; j < v.high; j++ {
x := reverse[j]
if x == 0 {
continue
}
fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
}
fmt.Printf("}\n\n")
}
}
// interval is a half-open interval [low, high).
type interval struct {
low, high int
}
func (i interval) len() int { return i.high - i.low }
// byDecreasingLength sorts intervals by decreasing length.
type byDecreasingLength []interval
func (b byDecreasingLength) Len() int { return len(b) }
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }

@ -1,296 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package utf32 provides the UTF-32 Unicode encoding.
//
// Please note that support for UTF-32 is discouraged as it is a rare and
// inefficient encoding, unfit for use as an interchange format. For use
// on the web, the W3C strongly discourages its use
// (https://www.w3.org/TR/html5/document-metadata.html#charset)
// while WHATWG directly prohibits supporting it
// (https://html.spec.whatwg.org/multipage/syntax.html#character-encodings).
package utf32 // import "golang.org/x/text/encoding/unicode/utf32"
import (
"errors"
"unicode/utf8"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/internal/identifier"
"golang.org/x/text/transform"
)
// All lists a configuration for each IANA-defined UTF-32 variant.
var All = []encoding.Encoding{
UTF32(BigEndian, UseBOM),
UTF32(BigEndian, IgnoreBOM),
UTF32(LittleEndian, IgnoreBOM),
}
// ErrMissingBOM means that decoding UTF-32 input with ExpectBOM did not
// find a starting byte order mark.
var ErrMissingBOM = errors.New("encoding: missing byte order mark")
// UTF32 returns a UTF-32 Encoding for the given default endianness and
// byte order mark (BOM) policy.
//
// When decoding from UTF-32 to UTF-8, if the BOMPolicy is IgnoreBOM then
// neither BOMs U+FEFF nor ill-formed code units 0xFFFE0000 in the input
// stream will affect the endianness used for decoding. Instead BOMs will
// be output as their standard UTF-8 encoding "\xef\xbb\xbf" while
// 0xFFFE0000 code units will be output as "\xef\xbf\xbd", the standard
// UTF-8 encoding for the Unicode replacement character. If the BOMPolicy
// is UseBOM or ExpectBOM a starting BOM is not written to the UTF-8
// output. Instead, it overrides the default endianness e for the remainder
// of the transformation. Any subsequent BOMs U+FEFF or ill-formed code
// units 0xFFFE0000 will not affect the endianness used, and will instead
// be output as their standard UTF-8 (replacement) encodings. For UseBOM,
// if there is no starting BOM, it will proceed with the default
// Endianness. For ExpectBOM, in that case, the transformation will return
// early with an ErrMissingBOM error.
//
// When encoding from UTF-8 to UTF-32, a BOM will be inserted at the start
// of the output if the BOMPolicy is UseBOM or ExpectBOM. Otherwise, a BOM
// will not be inserted. The UTF-8 input does not need to contain a BOM.
//
// There is no concept of a 'native' endianness. If the UTF-32 data is
// produced and consumed in a greater context that implies a certain
// endianness, use IgnoreBOM. Otherwise, use ExpectBOM and always produce
// and consume a BOM.
//
// In the language of http://www.unicode.org/faq/utf_bom.html#bom10,
// IgnoreBOM corresponds to "Where the precise type of the data stream is
// known... the BOM should not be used" and ExpectBOM corresponds to "A
// particular protocol... may require use of the BOM".
func UTF32(e Endianness, b BOMPolicy) encoding.Encoding {
return utf32Encoding{config{e, b}, mibValue[e][b&bomMask]}
}
// mibValue maps Endianness and BOMPolicy settings to MIB constants for UTF-32.
// Note that some configurations map to the same MIB identifier.
var mibValue = map[Endianness][numBOMValues]identifier.MIB{
BigEndian: [numBOMValues]identifier.MIB{
IgnoreBOM: identifier.UTF32BE,
UseBOM: identifier.UTF32,
},
LittleEndian: [numBOMValues]identifier.MIB{
IgnoreBOM: identifier.UTF32LE,
UseBOM: identifier.UTF32,
},
// ExpectBOM is not widely used and has no valid MIB identifier.
}
// BOMPolicy is a UTF-32 encodings's byte order mark policy.
type BOMPolicy uint8
const (
writeBOM BOMPolicy = 0x01
acceptBOM BOMPolicy = 0x02
requireBOM BOMPolicy = 0x04
bomMask BOMPolicy = 0x07
// HACK: numBOMValues == 8 triggers a bug in the 1.4 compiler (cannot have a
// map of an array of length 8 of a type that is also used as a key or value
// in another map). See golang.org/issue/11354.
// TODO: consider changing this value back to 8 if the use of 1.4.* has
// been minimized.
numBOMValues = 8 + 1
// IgnoreBOM means to ignore any byte order marks.
IgnoreBOM BOMPolicy = 0
// Unicode-compliant interpretation for UTF-32BE/LE.
// UseBOM means that the UTF-32 form may start with a byte order mark,
// which will be used to override the default encoding.
UseBOM BOMPolicy = writeBOM | acceptBOM
// Unicode-compliant interpretation for UTF-32.
// ExpectBOM means that the UTF-32 form must start with a byte order mark,
// which will be used to override the default encoding.
ExpectBOM BOMPolicy = writeBOM | acceptBOM | requireBOM
// Consistent with BOMPolicy definition in golang.org/x/text/encoding/unicode
)
// Endianness is a UTF-32 encoding's default endianness.
type Endianness bool
const (
// BigEndian is UTF-32BE.
BigEndian Endianness = false
// LittleEndian is UTF-32LE.
LittleEndian Endianness = true
)
type config struct {
endianness Endianness
bomPolicy BOMPolicy
}
type utf32Encoding struct {
config
mib identifier.MIB
}
func (u utf32Encoding) NewDecoder() *encoding.Decoder {
return &encoding.Decoder{Transformer: &utf32Decoder{
initial: u.config,
current: u.config,
}}
}
func (u utf32Encoding) NewEncoder() *encoding.Encoder {
return &encoding.Encoder{Transformer: &utf32Encoder{
endianness: u.endianness,
initialBOMPolicy: u.bomPolicy,
currentBOMPolicy: u.bomPolicy,
}}
}
func (u utf32Encoding) ID() (mib identifier.MIB, other string) {
return u.mib, ""
}
func (u utf32Encoding) String() string {
e, b := "B", ""
if u.endianness == LittleEndian {
e = "L"
}
switch u.bomPolicy {
case ExpectBOM:
b = "Expect"
case UseBOM:
b = "Use"
case IgnoreBOM:
b = "Ignore"
}
return "UTF-32" + e + "E (" + b + " BOM)"
}
type utf32Decoder struct {
initial config
current config
}
func (u *utf32Decoder) Reset() {
u.current = u.initial
}
func (u *utf32Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
if len(src) == 0 {
if atEOF && u.current.bomPolicy&requireBOM != 0 {
return 0, 0, ErrMissingBOM
}
return 0, 0, nil
}
if u.current.bomPolicy&acceptBOM != 0 {
if len(src) < 4 {
return 0, 0, transform.ErrShortSrc
}
switch {
case src[0] == 0x00 && src[1] == 0x00 && src[2] == 0xfe && src[3] == 0xff:
u.current.endianness = BigEndian
nSrc = 4
case src[0] == 0xff && src[1] == 0xfe && src[2] == 0x00 && src[3] == 0x00:
u.current.endianness = LittleEndian
nSrc = 4
default:
if u.current.bomPolicy&requireBOM != 0 {
return 0, 0, ErrMissingBOM
}
}
u.current.bomPolicy = IgnoreBOM
}
var r rune
var dSize, sSize int
for nSrc < len(src) {
if nSrc+3 < len(src) {
x := uint32(src[nSrc+0])<<24 | uint32(src[nSrc+1])<<16 |
uint32(src[nSrc+2])<<8 | uint32(src[nSrc+3])
if u.current.endianness == LittleEndian {
x = x>>24 | (x >> 8 & 0x0000FF00) | (x << 8 & 0x00FF0000) | x<<24
}
r, sSize = rune(x), 4
if dSize = utf8.RuneLen(r); dSize < 0 {
r, dSize = utf8.RuneError, 3
}
} else if atEOF {
// 1..3 trailing bytes.
r, dSize, sSize = utf8.RuneError, 3, len(src)-nSrc
} else {
err = transform.ErrShortSrc
break
}
if nDst+dSize > len(dst) {
err = transform.ErrShortDst
break
}
nDst += utf8.EncodeRune(dst[nDst:], r)
nSrc += sSize
}
return nDst, nSrc, err
}
type utf32Encoder struct {
endianness Endianness
initialBOMPolicy BOMPolicy
currentBOMPolicy BOMPolicy
}
func (u *utf32Encoder) Reset() {
u.currentBOMPolicy = u.initialBOMPolicy
}
func (u *utf32Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
if u.currentBOMPolicy&writeBOM != 0 {
if len(dst) < 4 {
return 0, 0, transform.ErrShortDst
}
dst[0], dst[1], dst[2], dst[3] = 0x00, 0x00, 0xfe, 0xff
u.currentBOMPolicy = IgnoreBOM
nDst = 4
}
r, size := rune(0), 0
for nSrc < len(src) {
r = rune(src[nSrc])
// Decode a 1-byte rune.
if r < utf8.RuneSelf {
size = 1
} else {
// Decode a multi-byte rune.
r, size = utf8.DecodeRune(src[nSrc:])
if size == 1 {
// All valid runes of size 1 (those below utf8.RuneSelf) were
// handled above. We have invalid UTF-8 or we haven't seen the
// full character yet.
if !atEOF && !utf8.FullRune(src[nSrc:]) {
err = transform.ErrShortSrc
break
}
}
}
if nDst+4 > len(dst) {
err = transform.ErrShortDst
break
}
dst[nDst+0] = uint8(r >> 24)
dst[nDst+1] = uint8(r >> 16)
dst[nDst+2] = uint8(r >> 8)
dst[nDst+3] = uint8(r)
nDst += 4
nSrc += size
}
if u.endianness == LittleEndian {
for i := 0; i < nDst; i += 4 {
dst[i], dst[i+1], dst[i+2], dst[i+3] = dst[i+3], dst[i+2], dst[i+1], dst[i]
}
}
return nDst, nSrc, err
}

@ -1,27 +0,0 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -1,338 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gen
import (
"bytes"
"encoding/gob"
"fmt"
"hash"
"hash/fnv"
"io"
"log"
"os"
"reflect"
"strings"
"unicode"
"unicode/utf8"
)
// This file contains utilities for generating code.
// TODO: other write methods like:
// - slices, maps, types, etc.
// CodeWriter is a utility for writing structured code. It computes the content
// hash and size of written content. It ensures there are newlines between
// written code blocks.
type CodeWriter struct {
buf bytes.Buffer
Size int
Hash hash.Hash32 // content hash
gob *gob.Encoder
// For comments we skip the usual one-line separator if they are followed by
// a code block.
skipSep bool
}
func (w *CodeWriter) Write(p []byte) (n int, err error) {
return w.buf.Write(p)
}
// NewCodeWriter returns a new CodeWriter.
func NewCodeWriter() *CodeWriter {
h := fnv.New32()
return &CodeWriter{Hash: h, gob: gob.NewEncoder(h)}
}
// WriteGoFile appends the buffer with the total size of all created structures
// and writes it as a Go file to the the given file with the given package name.
func (w *CodeWriter) WriteGoFile(filename, pkg string) {
f, err := os.Create(filename)
if err != nil {
log.Fatalf("Could not create file %s: %v", filename, err)
}
defer f.Close()
if _, err = w.WriteGo(f, pkg); err != nil {
log.Fatalf("Error writing file %s: %v", filename, err)
}
}
// WriteGo appends the buffer with the total size of all created structures and
// writes it as a Go file to the the given writer with the given package name.
func (w *CodeWriter) WriteGo(out io.Writer, pkg string) (n int, err error) {
sz := w.Size
w.WriteComment("Total table size %d bytes (%dKiB); checksum: %X\n", sz, sz/1024, w.Hash.Sum32())
defer w.buf.Reset()
return WriteGo(out, pkg, w.buf.Bytes())
}
func (w *CodeWriter) printf(f string, x ...interface{}) {
fmt.Fprintf(w, f, x...)
}
func (w *CodeWriter) insertSep() {
if w.skipSep {
w.skipSep = false
return
}
// Use at least two newlines to ensure a blank space between the previous
// block. WriteGoFile will remove extraneous newlines.
w.printf("\n\n")
}
// WriteComment writes a comment block. All line starts are prefixed with "//".
// Initial empty lines are gobbled. The indentation for the first line is
// stripped from consecutive lines.
func (w *CodeWriter) WriteComment(comment string, args ...interface{}) {
s := fmt.Sprintf(comment, args...)
s = strings.Trim(s, "\n")
// Use at least two newlines to ensure a blank space between the previous
// block. WriteGoFile will remove extraneous newlines.
w.printf("\n\n// ")
w.skipSep = true
// strip first indent level.
sep := "\n"
for ; len(s) > 0 && (s[0] == '\t' || s[0] == ' '); s = s[1:] {
sep += s[:1]
}
strings.NewReplacer(sep, "\n// ", "\n", "\n// ").WriteString(w, s)
w.printf("\n")
}
func (w *CodeWriter) writeSizeInfo(size int) {
w.printf("// Size: %d bytes\n", size)
}
// WriteConst writes a constant of the given name and value.
func (w *CodeWriter) WriteConst(name string, x interface{}) {
w.insertSep()
v := reflect.ValueOf(x)
switch v.Type().Kind() {
case reflect.String:
// See golang.org/issue/13145.
const arbitraryCutoff = 16
if v.Len() > arbitraryCutoff {
w.printf("var %s %s = ", name, typeName(x))
} else {
w.printf("const %s %s = ", name, typeName(x))
}
w.WriteString(v.String())
w.printf("\n")
default:
w.printf("const %s = %#v\n", name, x)
}
}
// WriteVar writes a variable of the given name and value.
func (w *CodeWriter) WriteVar(name string, x interface{}) {
w.insertSep()
v := reflect.ValueOf(x)
oldSize := w.Size
sz := int(v.Type().Size())
w.Size += sz
switch v.Type().Kind() {
case reflect.String:
w.printf("var %s %s = ", name, typeName(x))
w.WriteString(v.String())
case reflect.Struct:
w.gob.Encode(x)
fallthrough
case reflect.Slice, reflect.Array:
w.printf("var %s = ", name)
w.writeValue(v)
w.writeSizeInfo(w.Size - oldSize)
default:
w.printf("var %s %s = ", name, typeName(x))
w.gob.Encode(x)
w.writeValue(v)
w.writeSizeInfo(w.Size - oldSize)
}
w.printf("\n")
}
func (w *CodeWriter) writeValue(v reflect.Value) {
x := v.Interface()
switch v.Kind() {
case reflect.String:
w.WriteString(v.String())
case reflect.Array:
// Don't double count: callers of WriteArray count on the size being
// added, so we need to discount it here.
w.Size -= int(v.Type().Size())
w.writeSlice(x, true)
case reflect.Slice:
w.writeSlice(x, false)
case reflect.Struct:
w.printf("%s{\n", typeName(v.Interface()))
t := v.Type()
for i := 0; i < v.NumField(); i++ {
w.printf("%s: ", t.Field(i).Name)
w.writeValue(v.Field(i))
w.printf(",\n")
}
w.printf("}")
default:
w.printf("%#v", x)
}
}
// WriteString writes a string literal.
func (w *CodeWriter) WriteString(s string) {
io.WriteString(w.Hash, s) // content hash
w.Size += len(s)
const maxInline = 40
if len(s) <= maxInline {
w.printf("%q", s)
return
}
// We will render the string as a multi-line string.
const maxWidth = 80 - 4 - len(`"`) - len(`" +`)
// When starting on its own line, go fmt indents line 2+ an extra level.
n, max := maxWidth, maxWidth-4
// Print "" +\n, if a string does not start on its own line.
b := w.buf.Bytes()
if p := len(bytes.TrimRight(b, " \t")); p > 0 && b[p-1] != '\n' {
w.printf("\"\" + // Size: %d bytes\n", len(s))
n, max = maxWidth, maxWidth
}
w.printf(`"`)
for sz, p := 0, 0; p < len(s); {
var r rune
r, sz = utf8.DecodeRuneInString(s[p:])
out := s[p : p+sz]
chars := 1
if !unicode.IsPrint(r) || r == utf8.RuneError || r == '"' {
switch sz {
case 1:
out = fmt.Sprintf("\\x%02x", s[p])
case 2, 3:
out = fmt.Sprintf("\\u%04x", r)
case 4:
out = fmt.Sprintf("\\U%08x", r)
}
chars = len(out)
}
if n -= chars; n < 0 {
w.printf("\" +\n\"")
n = max - len(out)
}
w.printf("%s", out)
p += sz
}
w.printf(`"`)
}
// WriteSlice writes a slice value.
func (w *CodeWriter) WriteSlice(x interface{}) {
w.writeSlice(x, false)
}
// WriteArray writes an array value.
func (w *CodeWriter) WriteArray(x interface{}) {
w.writeSlice(x, true)
}
func (w *CodeWriter) writeSlice(x interface{}, isArray bool) {
v := reflect.ValueOf(x)
w.gob.Encode(v.Len())
w.Size += v.Len() * int(v.Type().Elem().Size())
name := typeName(x)
if isArray {
name = fmt.Sprintf("[%d]%s", v.Len(), name[strings.Index(name, "]")+1:])
}
if isArray {
w.printf("%s{\n", name)
} else {
w.printf("%s{ // %d elements\n", name, v.Len())
}
switch kind := v.Type().Elem().Kind(); kind {
case reflect.String:
for _, s := range x.([]string) {
w.WriteString(s)
w.printf(",\n")
}
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
// nLine and nBlock are the number of elements per line and block.
nLine, nBlock, format := 8, 64, "%d,"
switch kind {
case reflect.Uint8:
format = "%#02x,"
case reflect.Uint16:
format = "%#04x,"
case reflect.Uint32:
nLine, nBlock, format = 4, 32, "%#08x,"
case reflect.Uint, reflect.Uint64:
nLine, nBlock, format = 4, 32, "%#016x,"
case reflect.Int8:
nLine = 16
}
n := nLine
for i := 0; i < v.Len(); i++ {
if i%nBlock == 0 && v.Len() > nBlock {
w.printf("// Entry %X - %X\n", i, i+nBlock-1)
}
x := v.Index(i).Interface()
w.gob.Encode(x)
w.printf(format, x)
if n--; n == 0 {
n = nLine
w.printf("\n")
}
}
w.printf("\n")
case reflect.Struct:
zero := reflect.Zero(v.Type().Elem()).Interface()
for i := 0; i < v.Len(); i++ {
x := v.Index(i).Interface()
w.gob.EncodeValue(v)
if !reflect.DeepEqual(zero, x) {
line := fmt.Sprintf("%#v,\n", x)
line = line[strings.IndexByte(line, '{'):]
w.printf("%d: ", i)
w.printf(line)
}
}
case reflect.Array:
for i := 0; i < v.Len(); i++ {
w.printf("%d: %#v,\n", i, v.Index(i).Interface())
}
default:
panic("gen: slice elem type not supported")
}
w.printf("}")
}
// WriteType writes a definition of the type of the given value and returns the
// type name.
func (w *CodeWriter) WriteType(x interface{}) string {
t := reflect.TypeOf(x)
w.printf("type %s struct {\n", t.Name())
for i := 0; i < t.NumField(); i++ {
w.printf("\t%s %s\n", t.Field(i).Name, t.Field(i).Type)
}
w.printf("}\n")
return t.Name()
}
// typeName returns the name of the go type of x.
func typeName(x interface{}) string {
t := reflect.ValueOf(x).Type()
return strings.Replace(fmt.Sprint(t), "main.", "", 1)
}

@ -1,226 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package gen contains common code for the various code generation tools in the
// text repository. Its usage ensures consistency between tools.
//
// This package defines command line flags that are common to most generation
// tools. The flags allow for specifying specific Unicode and CLDR versions
// in the public Unicode data repository (http://www.unicode.org/Public).
//
// A local Unicode data mirror can be set through the flag -local or the
// environment variable UNICODE_DIR. The former takes precedence. The local
// directory should follow the same structure as the public repository.
//
// IANA data can also optionally be mirrored by putting it in the iana directory
// rooted at the top of the local mirror. Beware, though, that IANA data is not
// versioned. So it is up to the developer to use the right version.
package gen // import "golang.org/x/text/internal/gen"
import (
"bytes"
"flag"
"fmt"
"go/format"
"io"
"io/ioutil"
"log"
"net/http"
"os"
"path"
"path/filepath"
"unicode"
"golang.org/x/text/unicode/cldr"
)
var (
url = flag.String("url",
"http://www.unicode.org/Public",
"URL of Unicode database directory")
iana = flag.String("iana",
"http://www.iana.org",
"URL of the IANA repository")
unicodeVersion = flag.String("unicode",
getEnv("UNICODE_VERSION", unicode.Version),
"unicode version to use")
cldrVersion = flag.String("cldr",
getEnv("CLDR_VERSION", cldr.Version),
"cldr version to use")
// Allow an environment variable to specify the local directory.
// go generate doesn't allow specifying arguments; this is a useful
// alternative to specifying a local mirror.
localDir = flag.String("local",
os.Getenv("UNICODE_DIR"),
"directory containing local data files; for debugging only.")
)
func getEnv(name, def string) string {
if v := os.Getenv(name); v != "" {
return v
}
return def
}
// Init performs common initialization for a gen command. It parses the flags
// and sets up the standard logging parameters.
func Init() {
log.SetPrefix("")
log.SetFlags(log.Lshortfile)
flag.Parse()
}
const header = `// This file was generated by go generate; DO NOT EDIT
package %s
`
// UnicodeVersion reports the requested Unicode version.
func UnicodeVersion() string {
return *unicodeVersion
}
// UnicodeVersion reports the requested CLDR version.
func CLDRVersion() string {
return *cldrVersion
}
// IsLocal reports whether the user specified a local directory.
func IsLocal() bool {
return *localDir != ""
}
// OpenUCDFile opens the requested UCD file. The file is specified relative to
// the public Unicode root directory. It will call log.Fatal if there are any
// errors.
func OpenUCDFile(file string) io.ReadCloser {
return openUnicode(path.Join(*unicodeVersion, "ucd", file))
}
// OpenCLDRCoreZip opens the CLDR core zip file. It will call log.Fatal if there
// are any errors.
func OpenCLDRCoreZip() io.ReadCloser {
return OpenUnicodeFile("cldr", *cldrVersion, "core.zip")
}
// OpenUnicodeFile opens the requested file of the requested category from the
// root of the Unicode data archive. The file is specified relative to the
// public Unicode root directory. If version is "", it will use the default
// Unicode version. It will call log.Fatal if there are any errors.
func OpenUnicodeFile(category, version, file string) io.ReadCloser {
if version == "" {
version = UnicodeVersion()
}
return openUnicode(path.Join(category, version, file))
}
// OpenIANAFile opens the requested IANA file. The file is specified relative
// to the IANA root, which is typically either http://www.iana.org or the
// iana directory in the local mirror. It will call log.Fatal if there are any
// errors.
func OpenIANAFile(path string) io.ReadCloser {
return Open(*iana, "iana", path)
}
// Open opens subdir/path if a local directory is specified and the file exists,
// where subdir is a directory relative to the local root, or fetches it from
// urlRoot/path otherwise. It will call log.Fatal if there are any errors.
func Open(urlRoot, subdir, path string) io.ReadCloser {
if *localDir != "" {
path = filepath.FromSlash(path)
if f, err := os.Open(filepath.Join(*localDir, subdir, path)); err == nil {
return f
}
}
return get(urlRoot, path)
}
func openUnicode(path string) io.ReadCloser {
if *localDir != "" {
path = filepath.FromSlash(path)
f, err := os.Open(filepath.Join(*localDir, path))
if err != nil {
log.Fatal(err)
}
return f
}
return get(*url, path)
}
func get(root, path string) io.ReadCloser {
url := root + "/" + path
fmt.Printf("Fetching %s...", url)
defer fmt.Println(" done.")
resp, err := http.Get(url)
if err != nil {
log.Fatalf("HTTP GET: %v", err)
}
if resp.StatusCode != 200 {
log.Fatalf("Bad GET status for %q: %q", url, resp.Status)
}
return resp.Body
}
// TODO: use Write*Version in all applicable packages.
// WriteUnicodeVersion writes a constant for the Unicode version from which the
// tables are generated.
func WriteUnicodeVersion(w io.Writer) {
fmt.Fprintf(w, "// UnicodeVersion is the Unicode version from which the tables in this package are derived.\n")
fmt.Fprintf(w, "const UnicodeVersion = %q\n\n", UnicodeVersion())
}
// WriteCLDRVersion writes a constant for the CLDR version from which the
// tables are generated.
func WriteCLDRVersion(w io.Writer) {
fmt.Fprintf(w, "// CLDRVersion is the CLDR version from which the tables in this package are derived.\n")
fmt.Fprintf(w, "const CLDRVersion = %q\n\n", CLDRVersion())
}
// WriteGoFile prepends a standard file comment and package statement to the
// given bytes, applies gofmt, and writes them to a file with the given name.
// It will call log.Fatal if there are any errors.
func WriteGoFile(filename, pkg string, b []byte) {
w, err := os.Create(filename)
if err != nil {
log.Fatalf("Could not create file %s: %v", filename, err)
}
defer w.Close()
if _, err = WriteGo(w, pkg, b); err != nil {
log.Fatalf("Error writing file %s: %v", filename, err)
}
}
// WriteGo prepends a standard file comment and package statement to the given
// bytes, applies gofmt, and writes them to w.
func WriteGo(w io.Writer, pkg string, b []byte) (n int, err error) {
src := []byte(fmt.Sprintf(header, pkg))
src = append(src, b...)
formatted, err := format.Source(src)
if err != nil {
// Print the generated code even in case of an error so that the
// returned error can be meaningfully interpreted.
n, _ = w.Write(src)
return n, err
}
return w.Write(formatted)
}
// Repackage rewrites a Go file from belonging to package main to belonging to
// the given package.
func Repackage(inFile, outFile, pkg string) {
src, err := ioutil.ReadFile(inFile)
if err != nil {
log.Fatalf("reading %s: %v", inFile, err)
}
const toDelete = "package main\n\n"
i := bytes.Index(src, []byte(toDelete))
if i < 0 {
log.Fatalf("Could not find %q in %s.", toDelete, inFile)
}
w := &bytes.Buffer{}
w.Write(src[i+len(toDelete):])
WriteGoFile(outFile, pkg, w.Bytes())
}

@ -1,27 +0,0 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -1,27 +0,0 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -1,53 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package testtext
import (
"bytes"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"runtime"
)
// CodeSize builds the given code sample and returns the binary size or en error
// if an error occurred. The code sample typically will look like this:
// package main
// import "golang.org/x/text/somepackage"
// func main() {
// somepackage.Func() // reference Func to cause it to be linked in.
// }
// See dict_test.go in the display package for an example.
func CodeSize(s string) (int, error) {
// Write the file.
tmpdir, err := ioutil.TempDir(os.TempDir(), "testtext")
if err != nil {
return 0, fmt.Errorf("testtext: failed to create tmpdir: %v", err)
}
defer os.RemoveAll(tmpdir)
filename := filepath.Join(tmpdir, "main.go")
if err := ioutil.WriteFile(filename, []byte(s), 0644); err != nil {
return 0, fmt.Errorf("testtext: failed to write main.go: %v", err)
}
// Build the binary.
w := &bytes.Buffer{}
cmd := exec.Command(filepath.Join(runtime.GOROOT(), "bin", "go"), "build", "-o", "main")
cmd.Dir = tmpdir
cmd.Stderr = w
cmd.Stdout = w
if err := cmd.Run(); err != nil {
return 0, fmt.Errorf("testtext: failed to execute command: %v\nmain.go:\n%vErrors:%s", err, s, w)
}
// Determine the size.
fi, err := os.Stat(filepath.Join(tmpdir, "main"))
if err != nil {
return 0, fmt.Errorf("testtext: failed to get file info: %v", err)
}
return int(fi.Size()), nil
}

@ -1,22 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package testtext
import (
"flag"
"testing"
"golang.org/x/text/internal/gen"
)
var long = flag.Bool("long", false,
"run tests that require fetching data online")
// SkipIfNotLong returns whether long tests should be performed.
func SkipIfNotLong(t *testing.T) {
if !gen.IsLocal() && !*long {
t.Skip("skipping test to prevent downloading; to run use -long or use -local or UNICODE_DIR to specify a local source")
}
}

@ -1,14 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !gccgo
package testtext
import "testing"
// AllocsPerRun wraps testing.AllocsPerRun.
func AllocsPerRun(runs int, f func()) (avg float64) {
return testing.AllocsPerRun(runs, f)
}

@ -1,11 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build gccgo
package testtext
// AllocsPerRun always returns 0 for gccgo until gccgo implements escape
// analysis equal or better to that of gc.
func AllocsPerRun(runs int, f func()) (avg float64) { return 0 }

@ -1,14 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !go1.7
package testtext
import "testing"
func Run(t *testing.T, name string, fn func(t *testing.T)) {
t.Logf("Running %s...", name)
fn(t)
}

@ -1,13 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build go1.7
package testtext
import "testing"
func Run(t *testing.T, name string, fn func(t *testing.T)) {
t.Run(name, fn)
}

@ -1,105 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package testtext contains test data that is of common use to the text
// repository.
package testtext // import "golang.org/x/text/internal/testtext"
const (
// ASCII is an ASCII string containing all letters in the English alphabet.
ASCII = "The quick brown fox jumps over the lazy dog. " +
"The quick brown fox jumps over the lazy dog. " +
"The quick brown fox jumps over the lazy dog. " +
"The quick brown fox jumps over the lazy dog. " +
"The quick brown fox jumps over the lazy dog. " +
"The quick brown fox jumps over the lazy dog. " +
"The quick brown fox jumps over the lazy dog. " +
"The quick brown fox jumps over the lazy dog. " +
"The quick brown fox jumps over the lazy dog. " +
"The quick brown fox jumps over the lazy dog. "
// Vietnamese is a snippet from http://creativecommons.org/licenses/by-sa/3.0/vn/
Vietnamese = `Vi các điu kin sau: Ghi nhn công ca tác gi.
Nếu bn s dng, chuyn đi, hoc xây dng d án t
ni dung đưc chia s này, bn phi áp dng giy phép này hoc
mt giy phép khác có các điu khon tương t như giy phép này
cho d án ca bn. Hiu rng: Min Bt k các điu kin nào
trên đây cũng có th đưc min b nếu bn đưc s cho phép ca
ngưi s hu bn quyn. Phm vi công chúng Khi tác phm hoc
bt k chương nào ca tác phm đã trong vùng dành cho công
chúng theo quy đnh ca pháp lut thì tình trng ca nó không
b nh hưng bi giy phép trong bt k trưng hp nào.`
// Russian is a snippet from http://creativecommons.org/licenses/by-sa/1.0/deed.ru
Russian = `При обязательном соблюдении следующих условий:
Attribution Вы должны атрибутировать произведение (указывать
автора и источник) в порядке, предусмотренном автором или
лицензиаром (но только так, чтобы никоим образом не подразумевалось,
что они поддерживают вас или использование вами данного произведения).
Υπό τις ακόλουθες προϋποθέσεις:`
// Greek is a snippet from http://creativecommons.org/licenses/by-sa/3.0/gr/
Greek = `Αναφορά Δημιουργού Θα πρέπει να κάνετε την αναφορά στο έργο με τον
τρόπο που έχει οριστεί από το δημιουργό ή το χορηγούντο την άδεια
(χωρίς όμως να εννοείται με οποιονδήποτε τρόπο ότι εγκρίνουν εσάς ή
τη χρήση του έργου από εσάς). Παρόμοια Διανομή Εάν αλλοιώσετε,
τροποποιήσετε ή δημιουργήσετε περαιτέρω βασισμένοι στο έργο θα
μπορείτε να διανέμετε το έργο που θα προκύψει μόνο με την ίδια ή
παρόμοια άδεια.`
// Arabic is a snippet from http://creativecommons.org/licenses/by-sa/3.0/deed.ar
Arabic = `بموجب الشروط التالية نسب المصنف يجب عليك أن
تنسب العمل بالطريقة التي تحددها المؤلف أو المرخص (ولكن ليس بأي حال من
الأحوال أن توحي وتقترح بتحول أو استخدامك للعمل).
المشاركة على قدم المساواة إذا كنت يعدل ، والتغيير ، أو الاستفادة
من هذا العمل ، قد ينتج عن توزيع العمل إلا في ظل تشابه او تطابق فى واحد
لهذا الترخيص.`
// Hebrew is a snippet from http://creativecommons.org/licenses/by-sa/1.0/il/
Hebrew = `בכפוף לתנאים הבאים: ייחוס עליך לייחס את היצירה (לתת קרדיט) באופן
המצויין על-ידי היוצר או מעניק הרישיון (אך לא בשום אופן המרמז על כך
שהם תומכים בך או בשימוש שלך ביצירה). שיתוף זהה אם תחליט/י לשנות,
לעבד או ליצור יצירה נגזרת בהסתמך על יצירה זו, תוכל/י להפיץ את יצירתך
החדשה רק תחת אותו הרישיון או רישיון דומה לרישיון זה.`
TwoByteUTF8 = Russian + Greek + Arabic + Hebrew
// Thai is a snippet from http://creativecommons.org/licenses/by-sa/3.0/th/
Thai = ` :
(
)
`
ThreeByteUTF8 = Thai
// Japanese is a snippet from http://creativecommons.org/licenses/by-sa/2.0/jp/
Japanese = `
`
// Chinese is a snippet from http://creativecommons.org/licenses/by-sa/2.5/cn/
Chinese = `
广
使
`
// Korean is a snippet from http://creativecommons.org/licenses/by-sa/2.0/kr/
Korean = ` :
(
).
.`
CJK = Chinese + Japanese + Korean
All = ASCII + Vietnamese + TwoByteUTF8 + ThreeByteUTF8 + CJK
)

@ -1,27 +0,0 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -1,58 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package triegen
// This file defines Compacter and its implementations.
import "io"
// A Compacter generates an alternative, more space-efficient way to store a
// trie value block. A trie value block holds all possible values for the last
// byte of a UTF-8 encoded rune. Excluding ASCII characters, a trie value block
// always has 64 values, as a UTF-8 encoding ends with a byte in [0x80, 0xC0).
type Compacter interface {
// Size returns whether the Compacter could encode the given block as well
// as its size in case it can. len(v) is always 64.
Size(v []uint64) (sz int, ok bool)
// Store stores the block using the Compacter's compression method.
// It returns a handle with which the block can be retrieved.
// len(v) is always 64.
Store(v []uint64) uint32
// Print writes the data structures associated to the given store to w.
Print(w io.Writer) error
// Handler returns the name of a function that gets called during trie
// lookup for blocks generated by the Compacter. The function should be of
// the form func (n uint32, b byte) uint64, where n is the index returned by
// the Compacter's Store method and b is the last byte of the UTF-8
// encoding, where 0x80 <= b < 0xC0, for which to do the lookup in the
// block.
Handler() string
}
// simpleCompacter is the default Compacter used by builder. It implements a
// normal trie block.
type simpleCompacter builder
func (b *simpleCompacter) Size([]uint64) (sz int, ok bool) {
return blockSize * b.ValueSize, true
}
func (b *simpleCompacter) Store(v []uint64) uint32 {
h := uint32(len(b.ValueBlocks) - blockOffset)
b.ValueBlocks = append(b.ValueBlocks, v)
return h
}
func (b *simpleCompacter) Print(io.Writer) error {
// Structures are printed in print.go.
return nil
}
func (b *simpleCompacter) Handler() string {
panic("Handler should be special-cased for this Compacter")
}

@ -1,251 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package triegen
import (
"bytes"
"fmt"
"io"
"strings"
"text/template"
)
// print writes all the data structures as well as the code necessary to use the
// trie to w.
func (b *builder) print(w io.Writer) error {
b.Stats.NValueEntries = len(b.ValueBlocks) * blockSize
b.Stats.NValueBytes = len(b.ValueBlocks) * blockSize * b.ValueSize
b.Stats.NIndexEntries = len(b.IndexBlocks) * blockSize
b.Stats.NIndexBytes = len(b.IndexBlocks) * blockSize * b.IndexSize
b.Stats.NHandleBytes = len(b.Trie) * 2 * b.IndexSize
// If we only have one root trie, all starter blocks are at position 0 and
// we can access the arrays directly.
if len(b.Trie) == 1 {
// At this point we cannot refer to the generated tables directly.
b.ASCIIBlock = b.Name + "Values"
b.StarterBlock = b.Name + "Index"
} else {
// Otherwise we need to have explicit starter indexes in the trie
// structure.
b.ASCIIBlock = "t.ascii"
b.StarterBlock = "t.utf8Start"
}
b.SourceType = "[]byte"
if err := lookupGen.Execute(w, b); err != nil {
return err
}
b.SourceType = "string"
if err := lookupGen.Execute(w, b); err != nil {
return err
}
if err := trieGen.Execute(w, b); err != nil {
return err
}
for _, c := range b.Compactions {
if err := c.c.Print(w); err != nil {
return err
}
}
return nil
}
func printValues(n int, values []uint64) string {
w := &bytes.Buffer{}
boff := n * blockSize
fmt.Fprintf(w, "\t// Block %#x, offset %#x", n, boff)
var newline bool
for i, v := range values {
if i%6 == 0 {
newline = true
}
if v != 0 {
if newline {
fmt.Fprintf(w, "\n")
newline = false
}
fmt.Fprintf(w, "\t%#02x:%#04x, ", boff+i, v)
}
}
return w.String()
}
func printIndex(b *builder, nr int, n *node) string {
w := &bytes.Buffer{}
boff := nr * blockSize
fmt.Fprintf(w, "\t// Block %#x, offset %#x", nr, boff)
var newline bool
for i, c := range n.children {
if i%8 == 0 {
newline = true
}
if c != nil {
v := b.Compactions[c.index.compaction].Offset + uint32(c.index.index)
if v != 0 {
if newline {
fmt.Fprintf(w, "\n")
newline = false
}
fmt.Fprintf(w, "\t%#02x:%#02x, ", boff+i, v)
}
}
}
return w.String()
}
var (
trieGen = template.Must(template.New("trie").Funcs(template.FuncMap{
"printValues": printValues,
"printIndex": printIndex,
"title": strings.Title,
"dec": func(x int) int { return x - 1 },
"psize": func(n int) string {
return fmt.Sprintf("%d bytes (%.2f KiB)", n, float64(n)/1024)
},
}).Parse(trieTemplate))
lookupGen = template.Must(template.New("lookup").Parse(lookupTemplate))
)
// TODO: consider the return type of lookup. It could be uint64, even if the
// internal value type is smaller. We will have to verify this with the
// performance of unicode/norm, which is very sensitive to such changes.
const trieTemplate = `{{$b := .}}{{$multi := gt (len .Trie) 1}}
// {{.Name}}Trie. Total size: {{psize .Size}}. Checksum: {{printf "%08x" .Checksum}}.
type {{.Name}}Trie struct { {{if $multi}}
ascii []{{.ValueType}} // index for ASCII bytes
utf8Start []{{.IndexType}} // index for UTF-8 bytes >= 0xC0
{{end}}}
func new{{title .Name}}Trie(i int) *{{.Name}}Trie { {{if $multi}}
h := {{.Name}}TrieHandles[i]
return &{{.Name}}Trie{ {{.Name}}Values[uint32(h.ascii)<<6:], {{.Name}}Index[uint32(h.multi)<<6:] }
}
type {{.Name}}TrieHandle struct {
ascii, multi {{.IndexType}}
}
// {{.Name}}TrieHandles: {{len .Trie}} handles, {{.Stats.NHandleBytes}} bytes
var {{.Name}}TrieHandles = [{{len .Trie}}]{{.Name}}TrieHandle{
{{range .Trie}} { {{.ASCIIIndex}}, {{.StarterIndex}} }, // {{printf "%08x" .Checksum}}: {{.Name}}
{{end}}}{{else}}
return &{{.Name}}Trie{}
}
{{end}}
// lookupValue determines the type of block n and looks up the value for b.
func (t *{{.Name}}Trie) lookupValue(n uint32, b byte) {{.ValueType}}{{$last := dec (len .Compactions)}} {
switch { {{range $i, $c := .Compactions}}
{{if eq $i $last}}default{{else}}case n < {{$c.Cutoff}}{{end}}:{{if ne $i 0}}
n -= {{$c.Offset}}{{end}}
return {{print $b.ValueType}}({{$c.Handler}}){{end}}
}
}
// {{.Name}}Values: {{len .ValueBlocks}} blocks, {{.Stats.NValueEntries}} entries, {{.Stats.NValueBytes}} bytes
// The third block is the zero block.
var {{.Name}}Values = [{{.Stats.NValueEntries}}]{{.ValueType}} {
{{range $i, $v := .ValueBlocks}}{{printValues $i $v}}
{{end}}}
// {{.Name}}Index: {{len .IndexBlocks}} blocks, {{.Stats.NIndexEntries}} entries, {{.Stats.NIndexBytes}} bytes
// Block 0 is the zero block.
var {{.Name}}Index = [{{.Stats.NIndexEntries}}]{{.IndexType}} {
{{range $i, $v := .IndexBlocks}}{{printIndex $b $i $v}}
{{end}}}
`
// TODO: consider allowing zero-length strings after evaluating performance with
// unicode/norm.
const lookupTemplate = `
// lookup{{if eq .SourceType "string"}}String{{end}} returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}(s {{.SourceType}}) (v {{.ValueType}}, sz int) {
c0 := s[0]
switch {
case c0 < 0x80: // is ASCII
return {{.ASCIIBlock}}[c0], 1
case c0 < 0xC2:
return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
case c0 < 0xE0: // 2-byte UTF-8
if len(s) < 2 {
return 0, 0
}
i := {{.StarterBlock}}[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c1), 2
case c0 < 0xF0: // 3-byte UTF-8
if len(s) < 3 {
return 0, 0
}
i := {{.StarterBlock}}[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = {{.Name}}Index[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c2), 3
case c0 < 0xF8: // 4-byte UTF-8
if len(s) < 4 {
return 0, 0
}
i := {{.StarterBlock}}[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = {{.Name}}Index[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
o = uint32(i)<<6 + uint32(c2)
i = {{.Name}}Index[o]
c3 := s[3]
if c3 < 0x80 || 0xC0 <= c3 {
return 0, 3 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c3), 4
}
// Illegal rune
return 0, 1
}
// lookup{{if eq .SourceType "string"}}String{{end}}Unsafe returns the trie value for the first UTF-8 encoding in s.
// s must start with a full and valid UTF-8 encoded rune.
func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}Unsafe(s {{.SourceType}}) {{.ValueType}} {
c0 := s[0]
if c0 < 0x80 { // is ASCII
return {{.ASCIIBlock}}[c0]
}
i := {{.StarterBlock}}[c0]
if c0 < 0xE0 { // 2-byte UTF-8
return t.lookupValue(uint32(i), s[1])
}
i = {{.Name}}Index[uint32(i)<<6+uint32(s[1])]
if c0 < 0xF0 { // 3-byte UTF-8
return t.lookupValue(uint32(i), s[2])
}
i = {{.Name}}Index[uint32(i)<<6+uint32(s[2])]
if c0 < 0xF8 { // 4-byte UTF-8
return t.lookupValue(uint32(i), s[3])
}
return 0
}
`

@ -1,494 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package triegen implements a code generator for a trie for associating
// unsigned integer values with UTF-8 encoded runes.
//
// Many of the go.text packages use tries for storing per-rune information. A
// trie is especially useful if many of the runes have the same value. If this
// is the case, many blocks can be expected to be shared allowing for
// information on many runes to be stored in little space.
//
// As most of the lookups are done directly on []byte slices, the tries use the
// UTF-8 bytes directly for the lookup. This saves a conversion from UTF-8 to
// runes and contributes a little bit to better performance. It also naturally
// provides a fast path for ASCII.
//
// Space is also an issue. There are many code points defined in Unicode and as
// a result tables can get quite large. So every byte counts. The triegen
// package automatically chooses the smallest integer values to represent the
// tables. Compacters allow further compression of the trie by allowing for
// alternative representations of individual trie blocks.
//
// triegen allows generating multiple tries as a single structure. This is
// useful when, for example, one wants to generate tries for several languages
// that have a lot of values in common. Some existing libraries for
// internationalization store all per-language data as a dynamically loadable
// chunk. The go.text packages are designed with the assumption that the user
// typically wants to compile in support for all supported languages, in line
// with the approach common to Go to create a single standalone binary. The
// multi-root trie approach can give significant storage savings in this
// scenario.
//
// triegen generates both tables and code. The code is optimized to use the
// automatically chosen data types. The following code is generated for a Trie
// or multiple Tries named "foo":
// - type fooTrie
// The trie type.
//
// - func newFooTrie(x int) *fooTrie
// Trie constructor, where x is the index of the trie passed to Gen.
//
// - func (t *fooTrie) lookup(s []byte) (v uintX, sz int)
// The lookup method, where uintX is automatically chosen.
//
// - func lookupString, lookupUnsafe and lookupStringUnsafe
// Variants of the above.
//
// - var fooValues and fooIndex and any tables generated by Compacters.
// The core trie data.
//
// - var fooTrieHandles
// Indexes of starter blocks in case of multiple trie roots.
//
// It is recommended that users test the generated trie by checking the returned
// value for every rune. Such exhaustive tests are possible as the the number of
// runes in Unicode is limited.
package triegen // import "golang.org/x/text/internal/triegen"
// TODO: Arguably, the internally optimized data types would not have to be
// exposed in the generated API. We could also investigate not generating the
// code, but using it through a package. We would have to investigate the impact
// on performance of making such change, though. For packages like unicode/norm,
// small changes like this could tank performance.
import (
"encoding/binary"
"fmt"
"hash/crc64"
"io"
"log"
"unicode/utf8"
)
// builder builds a set of tries for associating values with runes. The set of
// tries can share common index and value blocks.
type builder struct {
Name string
// ValueType is the type of the trie values looked up.
ValueType string
// ValueSize is the byte size of the ValueType.
ValueSize int
// IndexType is the type of trie index values used for all UTF-8 bytes of
// a rune except the last one.
IndexType string
// IndexSize is the byte size of the IndexType.
IndexSize int
// SourceType is used when generating the lookup functions. If the user
// requests StringSupport, all lookup functions will be generated for
// string input as well.
SourceType string
Trie []*Trie
IndexBlocks []*node
ValueBlocks [][]uint64
Compactions []compaction
Checksum uint64
ASCIIBlock string
StarterBlock string
indexBlockIdx map[uint64]int
valueBlockIdx map[uint64]nodeIndex
asciiBlockIdx map[uint64]int
// Stats are used to fill out the template.
Stats struct {
NValueEntries int
NValueBytes int
NIndexEntries int
NIndexBytes int
NHandleBytes int
}
err error
}
// A nodeIndex encodes the index of a node, which is defined by the compaction
// which stores it and an index within the compaction. For internal nodes, the
// compaction is always 0.
type nodeIndex struct {
compaction int
index int
}
// compaction keeps track of stats used for the compaction.
type compaction struct {
c Compacter
blocks []*node
maxHandle uint32
totalSize int
// Used by template-based generator and thus exported.
Cutoff uint32
Offset uint32
Handler string
}
func (b *builder) setError(err error) {
if b.err == nil {
b.err = err
}
}
// An Option can be passed to Gen.
type Option func(b *builder) error
// Compact configures the trie generator to use the given Compacter.
func Compact(c Compacter) Option {
return func(b *builder) error {
b.Compactions = append(b.Compactions, compaction{
c: c,
Handler: c.Handler() + "(n, b)"})
return nil
}
}
// Gen writes Go code for a shared trie lookup structure to w for the given
// Tries. The generated trie type will be called nameTrie. newNameTrie(x) will
// return the *nameTrie for tries[x]. A value can be looked up by using one of
// the various lookup methods defined on nameTrie. It returns the table size of
// the generated trie.
func Gen(w io.Writer, name string, tries []*Trie, opts ...Option) (sz int, err error) {
// The index contains two dummy blocks, followed by the zero block. The zero
// block is at offset 0x80, so that the offset for the zero block for
// continuation bytes is 0.
b := &builder{
Name: name,
Trie: tries,
IndexBlocks: []*node{{}, {}, {}},
Compactions: []compaction{{
Handler: name + "Values[n<<6+uint32(b)]",
}},
// The 0 key in indexBlockIdx and valueBlockIdx is the hash of the zero
// block.
indexBlockIdx: map[uint64]int{0: 0},
valueBlockIdx: map[uint64]nodeIndex{0: {}},
asciiBlockIdx: map[uint64]int{},
}
b.Compactions[0].c = (*simpleCompacter)(b)
for _, f := range opts {
if err := f(b); err != nil {
return 0, err
}
}
b.build()
if b.err != nil {
return 0, b.err
}
if err = b.print(w); err != nil {
return 0, err
}
return b.Size(), nil
}
// A Trie represents a single root node of a trie. A builder may build several
// overlapping tries at once.
type Trie struct {
root *node
hiddenTrie
}
// hiddenTrie contains values we want to be visible to the template generator,
// but hidden from the API documentation.
type hiddenTrie struct {
Name string
Checksum uint64
ASCIIIndex int
StarterIndex int
}
// NewTrie returns a new trie root.
func NewTrie(name string) *Trie {
return &Trie{
&node{
children: make([]*node, blockSize),
values: make([]uint64, utf8.RuneSelf),
},
hiddenTrie{Name: name},
}
}
// Gen is a convenience wrapper around the Gen func passing t as the only trie
// and uses the name passed to NewTrie. It returns the size of the generated
// tables.
func (t *Trie) Gen(w io.Writer, opts ...Option) (sz int, err error) {
return Gen(w, t.Name, []*Trie{t}, opts...)
}
// node is a node of the intermediate trie structure.
type node struct {
// children holds this node's children. It is always of length 64.
// A child node may be nil.
children []*node
// values contains the values of this node. If it is non-nil, this node is
// either a root or leaf node:
// For root nodes, len(values) == 128 and it maps the bytes in [0x00, 0x7F].
// For leaf nodes, len(values) == 64 and it maps the bytes in [0x80, 0xBF].
values []uint64
index nodeIndex
}
// Insert associates value with the given rune. Insert will panic if a non-zero
// value is passed for an invalid rune.
func (t *Trie) Insert(r rune, value uint64) {
if value == 0 {
return
}
s := string(r)
if []rune(s)[0] != r && value != 0 {
// Note: The UCD tables will always assign what amounts to a zero value
// to a surrogate. Allowing a zero value for an illegal rune allows
// users to iterate over [0..MaxRune] without having to explicitly
// exclude surrogates, which would be tedious.
panic(fmt.Sprintf("triegen: non-zero value for invalid rune %U", r))
}
if len(s) == 1 {
// It is a root node value (ASCII).
t.root.values[s[0]] = value
return
}
n := t.root
for ; len(s) > 1; s = s[1:] {
if n.children == nil {
n.children = make([]*node, blockSize)
}
p := s[0] % blockSize
c := n.children[p]
if c == nil {
c = &node{}
n.children[p] = c
}
if len(s) > 2 && c.values != nil {
log.Fatalf("triegen: insert(%U): found internal node with values", r)
}
n = c
}
if n.values == nil {
n.values = make([]uint64, blockSize)
}
if n.children != nil {
log.Fatalf("triegen: insert(%U): found leaf node that also has child nodes", r)
}
n.values[s[0]-0x80] = value
}
// Size returns the number of bytes the generated trie will take to store. It
// needs to be exported as it is used in the templates.
func (b *builder) Size() int {
// Index blocks.
sz := len(b.IndexBlocks) * blockSize * b.IndexSize
// Skip the first compaction, which represents the normal value blocks, as
// its totalSize does not account for the ASCII blocks, which are managed
// separately.
sz += len(b.ValueBlocks) * blockSize * b.ValueSize
for _, c := range b.Compactions[1:] {
sz += c.totalSize
}
// TODO: this computation does not account for the fixed overhead of a using
// a compaction, either code or data. As for data, though, the typical
// overhead of data is in the order of bytes (2 bytes for cases). Further,
// the savings of using a compaction should anyway be substantial for it to
// be worth it.
// For multi-root tries, we also need to account for the handles.
if len(b.Trie) > 1 {
sz += 2 * b.IndexSize * len(b.Trie)
}
return sz
}
func (b *builder) build() {
// Compute the sizes of the values.
var vmax uint64
for _, t := range b.Trie {
vmax = maxValue(t.root, vmax)
}
b.ValueType, b.ValueSize = getIntType(vmax)
// Compute all block allocations.
// TODO: first compute the ASCII blocks for all tries and then the other
// nodes. ASCII blocks are more restricted in placement, as they require two
// blocks to be placed consecutively. Processing them first may improve
// sharing (at least one zero block can be expected to be saved.)
for _, t := range b.Trie {
b.Checksum += b.buildTrie(t)
}
// Compute the offsets for all the Compacters.
offset := uint32(0)
for i := range b.Compactions {
c := &b.Compactions[i]
c.Offset = offset
offset += c.maxHandle + 1
c.Cutoff = offset
}
// Compute the sizes of indexes.
// TODO: different byte positions could have different sizes. So far we have
// not found a case where this is beneficial.
imax := uint64(b.Compactions[len(b.Compactions)-1].Cutoff)
for _, ib := range b.IndexBlocks {
if x := uint64(ib.index.index); x > imax {
imax = x
}
}
b.IndexType, b.IndexSize = getIntType(imax)
}
func maxValue(n *node, max uint64) uint64 {
if n == nil {
return max
}
for _, c := range n.children {
max = maxValue(c, max)
}
for _, v := range n.values {
if max < v {
max = v
}
}
return max
}
func getIntType(v uint64) (string, int) {
switch {
case v < 1<<8:
return "uint8", 1
case v < 1<<16:
return "uint16", 2
case v < 1<<32:
return "uint32", 4
}
return "uint64", 8
}
const (
blockSize = 64
// Subtract two blocks to offset 0x80, the first continuation byte.
blockOffset = 2
// Subtract three blocks to offset 0xC0, the first non-ASCII starter.
rootBlockOffset = 3
)
var crcTable = crc64.MakeTable(crc64.ISO)
func (b *builder) buildTrie(t *Trie) uint64 {
n := t.root
// Get the ASCII offset. For the first trie, the ASCII block will be at
// position 0.
hasher := crc64.New(crcTable)
binary.Write(hasher, binary.BigEndian, n.values)
hash := hasher.Sum64()
v, ok := b.asciiBlockIdx[hash]
if !ok {
v = len(b.ValueBlocks)
b.asciiBlockIdx[hash] = v
b.ValueBlocks = append(b.ValueBlocks, n.values[:blockSize], n.values[blockSize:])
if v == 0 {
// Add the zero block at position 2 so that it will be assigned a
// zero reference in the lookup blocks.
// TODO: always do this? This would allow us to remove a check from
// the trie lookup, but at the expense of extra space. Analyze
// performance for unicode/norm.
b.ValueBlocks = append(b.ValueBlocks, make([]uint64, blockSize))
}
}
t.ASCIIIndex = v
// Compute remaining offsets.
t.Checksum = b.computeOffsets(n, true)
// We already subtracted the normal blockOffset from the index. Subtract the
// difference for starter bytes.
t.StarterIndex = n.index.index - (rootBlockOffset - blockOffset)
return t.Checksum
}
func (b *builder) computeOffsets(n *node, root bool) uint64 {
// For the first trie, the root lookup block will be at position 3, which is
// the offset for UTF-8 non-ASCII starter bytes.
first := len(b.IndexBlocks) == rootBlockOffset
if first {
b.IndexBlocks = append(b.IndexBlocks, n)
}
// We special-case the cases where all values recursively are 0. This allows
// for the use of a zero block to which all such values can be directed.
hash := uint64(0)
if n.children != nil || n.values != nil {
hasher := crc64.New(crcTable)
for _, c := range n.children {
var v uint64
if c != nil {
v = b.computeOffsets(c, false)
}
binary.Write(hasher, binary.BigEndian, v)
}
binary.Write(hasher, binary.BigEndian, n.values)
hash = hasher.Sum64()
}
if first {
b.indexBlockIdx[hash] = rootBlockOffset - blockOffset
}
// Compacters don't apply to internal nodes.
if n.children != nil {
v, ok := b.indexBlockIdx[hash]
if !ok {
v = len(b.IndexBlocks) - blockOffset
b.IndexBlocks = append(b.IndexBlocks, n)
b.indexBlockIdx[hash] = v
}
n.index = nodeIndex{0, v}
} else {
h, ok := b.valueBlockIdx[hash]
if !ok {
bestI, bestSize := 0, blockSize*b.ValueSize
for i, c := range b.Compactions[1:] {
if sz, ok := c.c.Size(n.values); ok && bestSize > sz {
bestI, bestSize = i+1, sz
}
}
c := &b.Compactions[bestI]
c.totalSize += bestSize
v := c.c.Store(n.values)
if c.maxHandle < v {
c.maxHandle = v
}
h = nodeIndex{bestI, int(v)}
b.valueBlockIdx[hash] = h
}
n.index = h
}
return hash
}

@ -1,27 +0,0 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -1,363 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package ucd provides a parser for Unicode Character Database files, the
// format of which is defined in http://www.unicode.org/reports/tr44/. See
// http://www.unicode.org/Public/UCD/latest/ucd/ for example files.
//
// It currently does not support substitutions of missing fields.
package ucd // import "golang.org/x/text/internal/ucd"
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"log"
"regexp"
"strconv"
"strings"
)
// UnicodeData.txt fields.
const (
CodePoint = iota
Name
GeneralCategory
CanonicalCombiningClass
BidiClass
DecompMapping
DecimalValue
DigitValue
NumericValue
BidiMirrored
Unicode1Name
ISOComment
SimpleUppercaseMapping
SimpleLowercaseMapping
SimpleTitlecaseMapping
)
// Parse calls f for each entry in the given reader of a UCD file. It will close
// the reader upon return. It will call log.Fatal if any error occurred.
//
// This implements the most common usage pattern of using Parser.
func Parse(r io.ReadCloser, f func(p *Parser)) {
defer r.Close()
p := New(r)
for p.Next() {
f(p)
}
if err := p.Err(); err != nil {
r.Close() // os.Exit will cause defers not to be called.
log.Fatal(err)
}
}
// An Option is used to configure a Parser.
type Option func(p *Parser)
func keepRanges(p *Parser) {
p.keepRanges = true
}
var (
// KeepRanges prevents the expansion of ranges. The raw ranges can be
// obtained by calling Range(0) on the parser.
KeepRanges Option = keepRanges
)
// The Part option register a handler for lines starting with a '@'. The text
// after a '@' is available as the first field. Comments are handled as usual.
func Part(f func(p *Parser)) Option {
return func(p *Parser) {
p.partHandler = f
}
}
// A Parser parses Unicode Character Database (UCD) files.
type Parser struct {
scanner *bufio.Scanner
keepRanges bool // Don't expand rune ranges in field 0.
err error
comment []byte
field [][]byte
// parsedRange is needed in case Range(0) is called more than once for one
// field. In some cases this requires scanning ahead.
parsedRange bool
rangeStart, rangeEnd rune
partHandler func(p *Parser)
}
func (p *Parser) setError(err error) {
if p.err == nil {
p.err = err
}
}
func (p *Parser) getField(i int) []byte {
if i >= len(p.field) {
p.setError(fmt.Errorf("ucd: index of field %d out of bounds", i))
return nil
}
return p.field[i]
}
// Err returns a non-nil error if any error occurred during parsing.
func (p *Parser) Err() error {
return p.err
}
// New returns a Parser for the given Reader.
func New(r io.Reader, o ...Option) *Parser {
p := &Parser{
scanner: bufio.NewScanner(r),
}
for _, f := range o {
f(p)
}
return p
}
// Next parses the next line in the file. It returns true if a line was parsed
// and false if it reached the end of the file.
func (p *Parser) Next() bool {
if !p.keepRanges && p.rangeStart < p.rangeEnd {
p.rangeStart++
return true
}
p.comment = nil
p.field = p.field[:0]
p.parsedRange = false
for p.scanner.Scan() {
b := p.scanner.Bytes()
if len(b) == 0 || b[0] == '#' {
continue
}
// Parse line
if i := bytes.IndexByte(b, '#'); i != -1 {
p.comment = bytes.TrimSpace(b[i+1:])
b = b[:i]
}
if b[0] == '@' {
if p.partHandler != nil {
p.field = append(p.field, bytes.TrimSpace(b[1:]))
p.partHandler(p)
p.field = p.field[:0]
}
p.comment = nil
continue
}
for {
i := bytes.IndexByte(b, ';')
if i == -1 {
p.field = append(p.field, bytes.TrimSpace(b))
break
}
p.field = append(p.field, bytes.TrimSpace(b[:i]))
b = b[i+1:]
}
if !p.keepRanges {
p.rangeStart, p.rangeEnd = p.getRange(0)
}
return true
}
p.setError(p.scanner.Err())
return false
}
func parseRune(b []byte) (rune, error) {
if len(b) > 2 && b[0] == 'U' && b[1] == '+' {
b = b[2:]
}
x, err := strconv.ParseUint(string(b), 16, 32)
return rune(x), err
}
func (p *Parser) parseRune(b []byte) rune {
x, err := parseRune(b)
p.setError(err)
return x
}
// Rune parses and returns field i as a rune.
func (p *Parser) Rune(i int) rune {
if i > 0 || p.keepRanges {
return p.parseRune(p.getField(i))
}
return p.rangeStart
}
// Runes interprets and returns field i as a sequence of runes.
func (p *Parser) Runes(i int) (runes []rune) {
add := func(b []byte) {
if b = bytes.TrimSpace(b); len(b) > 0 {
runes = append(runes, p.parseRune(b))
}
}
for b := p.getField(i); ; {
i := bytes.IndexByte(b, ' ')
if i == -1 {
add(b)
break
}
add(b[:i])
b = b[i+1:]
}
return
}
var (
errIncorrectLegacyRange = errors.New("ucd: unmatched <* First>")
// reRange matches one line of a legacy rune range.
reRange = regexp.MustCompile("^([0-9A-F]*);<([^,]*), ([^>]*)>(.*)$")
)
// Range parses and returns field i as a rune range. A range is inclusive at
// both ends. If the field only has one rune, first and last will be identical.
// It supports the legacy format for ranges used in UnicodeData.txt.
func (p *Parser) Range(i int) (first, last rune) {
if !p.keepRanges {
return p.rangeStart, p.rangeStart
}
return p.getRange(i)
}
func (p *Parser) getRange(i int) (first, last rune) {
b := p.getField(i)
if k := bytes.Index(b, []byte("..")); k != -1 {
return p.parseRune(b[:k]), p.parseRune(b[k+2:])
}
// The first field may not be a rune, in which case we may ignore any error
// and set the range as 0..0.
x, err := parseRune(b)
if err != nil {
// Disable range parsing henceforth. This ensures that an error will be
// returned if the user subsequently will try to parse this field as
// a Rune.
p.keepRanges = true
}
// Special case for UnicodeData that was retained for backwards compatibility.
if i == 0 && len(p.field) > 1 && bytes.HasSuffix(p.field[1], []byte("First>")) {
if p.parsedRange {
return p.rangeStart, p.rangeEnd
}
mf := reRange.FindStringSubmatch(p.scanner.Text())
if mf == nil || !p.scanner.Scan() {
p.setError(errIncorrectLegacyRange)
return x, x
}
// Using Bytes would be more efficient here, but Text is a lot easier
// and this is not a frequent case.
ml := reRange.FindStringSubmatch(p.scanner.Text())
if ml == nil || mf[2] != ml[2] || ml[3] != "Last" || mf[4] != ml[4] {
p.setError(errIncorrectLegacyRange)
return x, x
}
p.rangeStart, p.rangeEnd = x, p.parseRune(p.scanner.Bytes()[:len(ml[1])])
p.parsedRange = true
return p.rangeStart, p.rangeEnd
}
return x, x
}
// bools recognizes all valid UCD boolean values.
var bools = map[string]bool{
"": false,
"N": false,
"No": false,
"F": false,
"False": false,
"Y": true,
"Yes": true,
"T": true,
"True": true,
}
// Bool parses and returns field i as a boolean value.
func (p *Parser) Bool(i int) bool {
b := p.getField(i)
for s, v := range bools {
if bstrEq(b, s) {
return v
}
}
p.setError(strconv.ErrSyntax)
return false
}
// Int parses and returns field i as an integer value.
func (p *Parser) Int(i int) int {
x, err := strconv.ParseInt(string(p.getField(i)), 10, 64)
p.setError(err)
return int(x)
}
// Uint parses and returns field i as an unsigned integer value.
func (p *Parser) Uint(i int) uint {
x, err := strconv.ParseUint(string(p.getField(i)), 10, 64)
p.setError(err)
return uint(x)
}
// Float parses and returns field i as a decimal value.
func (p *Parser) Float(i int) float64 {
x, err := strconv.ParseFloat(string(p.getField(i)), 64)
p.setError(err)
return x
}
// String parses and returns field i as a string value.
func (p *Parser) String(i int) string {
return string(p.getField(i))
}
// Strings parses and returns field i as a space-separated list of strings.
func (p *Parser) Strings(i int) []string {
ss := strings.Split(string(p.getField(i)), " ")
for i, s := range ss {
ss[i] = strings.TrimSpace(s)
}
return ss
}
// Comment returns the comments for the current line.
func (p *Parser) Comment() string {
return string(p.comment)
}
var errUndefinedEnum = errors.New("ucd: undefined enum value")
// Enum interprets and returns field i as a value that must be one of the values
// in enum.
func (p *Parser) Enum(i int, enum ...string) string {
b := p.getField(i)
for _, s := range enum {
if bstrEq(b, s) {
return s
}
}
p.setError(errUndefinedEnum)
return ""
}
func bstrEq(b []byte, s string) bool {
if len(b) != len(s) {
return false
}
for i, c := range b {
if c != s[i] {
return false
}
}
return true
}

@ -1,27 +0,0 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -1,27 +0,0 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -0,0 +1,16 @@
# Copyright 2013 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
CLEANFILES+=maketables
maketables: maketables.go
go build $^
tables: maketables
./maketables > tables.go
gofmt -w -s tables.go
# Build (but do not run) maketables during testing,
# just to make sure it still compiles.
testshort: maketables

@ -1,92 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package display
// This file contains sets of data for specific languages. Users can use these
// to create smaller collections of supported languages and reduce total table
// size.
// The variable names defined here correspond to those in package language.
var (
Afrikaans *Dictionary = &af // af
Amharic *Dictionary = &am // am
Arabic *Dictionary = &ar // ar
ModernStandardArabic *Dictionary = Arabic // ar-001
Azerbaijani *Dictionary = &az // az
Bulgarian *Dictionary = &bg // bg
Bengali *Dictionary = &bn // bn
Catalan *Dictionary = &ca // ca
Czech *Dictionary = &cs // cs
Danish *Dictionary = &da // da
German *Dictionary = &de // de
Greek *Dictionary = &el // el
English *Dictionary = &en // en
AmericanEnglish *Dictionary = English // en-US
BritishEnglish *Dictionary = English // en-GB
Spanish *Dictionary = &es // es
EuropeanSpanish *Dictionary = Spanish // es-ES
LatinAmericanSpanish *Dictionary = Spanish // es-419
Estonian *Dictionary = &et // et
Persian *Dictionary = &fa // fa
Finnish *Dictionary = &fi // fi
Filipino *Dictionary = &fil // fil
French *Dictionary = &fr // fr
Gujarati *Dictionary = &gu // gu
Hebrew *Dictionary = &he // he
Hindi *Dictionary = &hi // hi
Croatian *Dictionary = &hr // hr
Hungarian *Dictionary = &hu // hu
Armenian *Dictionary = &hy // hy
Indonesian *Dictionary = &id // id
Icelandic *Dictionary = &is // is
Italian *Dictionary = &it // it
Japanese *Dictionary = &ja // ja
Georgian *Dictionary = &ka // ka
Kazakh *Dictionary = &kk // kk
Khmer *Dictionary = &km // km
Kannada *Dictionary = &kn // kn
Korean *Dictionary = &ko // ko
Kirghiz *Dictionary = &ky // ky
Lao *Dictionary = &lo // lo
Lithuanian *Dictionary = &lt // lt
Latvian *Dictionary = &lv // lv
Macedonian *Dictionary = &mk // mk
Malayalam *Dictionary = &ml // ml
Mongolian *Dictionary = &mn // mn
Marathi *Dictionary = &mr // mr
Malay *Dictionary = &ms // ms
Burmese *Dictionary = &my // my
Nepali *Dictionary = &ne // ne
Dutch *Dictionary = &nl // nl
Norwegian *Dictionary = &no // no
Punjabi *Dictionary = &pa // pa
Polish *Dictionary = &pl // pl
Portuguese *Dictionary = &pt // pt
BrazilianPortuguese *Dictionary = Portuguese // pt-BR
EuropeanPortuguese *Dictionary = &ptPT // pt-PT
Romanian *Dictionary = &ro // ro
Russian *Dictionary = &ru // ru
Sinhala *Dictionary = &si // si
Slovak *Dictionary = &sk // sk
Slovenian *Dictionary = &sl // sl
Albanian *Dictionary = &sq // sq
Serbian *Dictionary = &sr // sr
SerbianLatin *Dictionary = &srLatn // sr
Swedish *Dictionary = &sv // sv
Swahili *Dictionary = &sw // sw
Tamil *Dictionary = &ta // ta
Telugu *Dictionary = &te // te
Thai *Dictionary = &th // th
Turkish *Dictionary = &tr // tr
Ukrainian *Dictionary = &uk // uk
Urdu *Dictionary = &ur // ur
Uzbek *Dictionary = &uz // uz
Vietnamese *Dictionary = &vi // vi
Chinese *Dictionary = &zh // zh
SimplifiedChinese *Dictionary = Chinese // zh-Hans
TraditionalChinese *Dictionary = &zhHant // zh-Hant
Zulu *Dictionary = &zu // zu
)

@ -1,343 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run maketables.go -output tables.go
// Package display provides display names for languages, scripts and regions in
// a requested language.
//
// The data is based on CLDR's localeDisplayNames. It includes the names of the
// draft level "contributed" or "approved". The resulting tables are quite
// large. The display package is designed so that users can reduce the linked-in
// table sizes by cherry picking the languages one wishes to support. There is a
// Dictionary defined for a selected set of common languages for this purpose.
package display // import "golang.org/x/text/language/display"
import (
"strings"
"golang.org/x/text/language"
)
/*
TODO:
All fairly low priority at the moment:
- Include alternative and variants as an option (using func options).
- Option for returning the empty string for undefined values.
- Support variants, currencies, time zones, option names and other data
provided in CLDR.
- Do various optimizations:
- Reduce size of offset tables.
- Consider compressing infrequently used languages and decompress on demand.
*/
// A Namer is used to get the name for a given value, such as a Tag, Language,
// Script or Region.
type Namer interface {
// Name returns a display string for the given value. A Namer returns an
// empty string for values it does not support. A Namer may support naming
// an unspecified value. For example, when getting the name for a region for
// a tag that does not have a defined Region, it may return the name for an
// unknown region. It is up to the user to filter calls to Name for values
// for which one does not want to have a name string.
Name(x interface{}) string
}
var (
// Supported lists the languages for which names are defined.
Supported language.Coverage
// The set of all possible values for which names are defined. Note that not
// all Namer implementations will cover all the values of a given type.
// A Namer will return the empty string for unsupported values.
Values language.Coverage
matcher language.Matcher
)
func init() {
tags := make([]language.Tag, numSupported)
s := supported
for i := range tags {
p := strings.IndexByte(s, '|')
tags[i] = language.Raw.Make(s[:p])
s = s[p+1:]
}
matcher = language.NewMatcher(tags)
Supported = language.NewCoverage(tags)
Values = language.NewCoverage(langTagSet.Tags, supportedScripts, supportedRegions)
}
// Languages returns a Namer for naming languages. It returns nil if there is no
// data for the given tag. The type passed to Name must be either language.Base
// or language.Tag. Note that the result may differ between passing a tag or its
// base language. For example, for English, passing "nl-BE" would return Flemish
// whereas passing "nl" returns "Dutch".
func Languages(t language.Tag) Namer {
if _, index, conf := matcher.Match(t); conf != language.No {
return languageNamer(index)
}
return nil
}
type languageNamer int
func (n languageNamer) name(i int) string {
return lookup(langHeaders[:], int(n), i)
}
// Name implements the Namer interface for language names.
func (n languageNamer) Name(x interface{}) string {
return nameLanguage(n, x)
}
// nonEmptyIndex walks up the parent chain until a non-empty header is found.
// It returns -1 if no index could be found.
func nonEmptyIndex(h []header, index int) int {
for ; index != -1 && h[index].data == ""; index = int(parents[index]) {
}
return index
}
// Scripts returns a Namer for naming scripts. It returns nil if there is no
// data for the given tag. The type passed to Name must be either a
// language.Script or a language.Tag. It will not attempt to infer a script for
// tags with an unspecified script.
func Scripts(t language.Tag) Namer {
if _, index, conf := matcher.Match(t); conf != language.No {
if index = nonEmptyIndex(scriptHeaders[:], index); index != -1 {
return scriptNamer(index)
}
}
return nil
}
type scriptNamer int
func (n scriptNamer) name(i int) string {
return lookup(scriptHeaders[:], int(n), i)
}
// Name implements the Namer interface for script names.
func (n scriptNamer) Name(x interface{}) string {
return nameScript(n, x)
}
// Regions returns a Namer for naming regions. It returns nil if there is no
// data for the given tag. The type passed to Name must be either a
// language.Region or a language.Tag. It will not attempt to infer a region for
// tags with an unspecified region.
func Regions(t language.Tag) Namer {
if _, index, conf := matcher.Match(t); conf != language.No {
if index = nonEmptyIndex(regionHeaders[:], index); index != -1 {
return regionNamer(index)
}
}
return nil
}
type regionNamer int
func (n regionNamer) name(i int) string {
return lookup(regionHeaders[:], int(n), i)
}
// Name implements the Namer interface for region names.
func (n regionNamer) Name(x interface{}) string {
return nameRegion(n, x)
}
// Tags returns a Namer for giving a full description of a tag. The names of
// scripts and regions that are not already implied by the language name will
// in appended within parentheses. It returns nil if there is not data for the
// given tag. The type passed to Name must be a tag.
func Tags(t language.Tag) Namer {
if _, index, conf := matcher.Match(t); conf != language.No {
return tagNamer(index)
}
return nil
}
type tagNamer int
// Name implements the Namer interface for tag names.
func (n tagNamer) Name(x interface{}) string {
return nameTag(languageNamer(n), scriptNamer(n), regionNamer(n), x)
}
// lookup finds the name for an entry in a global table, traversing the
// inheritance hierarchy if needed.
func lookup(table []header, dict, want int) string {
for dict != -1 {
if s := table[dict].name(want); s != "" {
return s
}
dict = int(parents[dict])
}
return ""
}
// A Dictionary holds a collection of Namers for a single language. One can
// reduce the amount of data linked in to a binary by only referencing
// Dictionaries for the languages one needs to support instead of using the
// generic Namer factories.
type Dictionary struct {
parent *Dictionary
lang header
script header
region header
}
// Tags returns a Namer for giving a full description of a tag. The names of
// scripts and regions that are not already implied by the language name will
// in appended within parentheses. It returns nil if there is not data for the
// given tag. The type passed to Name must be a tag.
func (d *Dictionary) Tags() Namer {
return dictTags{d}
}
type dictTags struct {
d *Dictionary
}
// Name implements the Namer interface for tag names.
func (n dictTags) Name(x interface{}) string {
return nameTag(dictLanguages{n.d}, dictScripts{n.d}, dictRegions{n.d}, x)
}
// Languages returns a Namer for naming languages. It returns nil if there is no
// data for the given tag. The type passed to Name must be either language.Base
// or language.Tag. Note that the result may differ between passing a tag or its
// base language. For example, for English, passing "nl-BE" would return Flemish
// whereas passing "nl" returns "Dutch".
func (d *Dictionary) Languages() Namer {
return dictLanguages{d}
}
type dictLanguages struct {
d *Dictionary
}
func (n dictLanguages) name(i int) string {
for d := n.d; d != nil; d = d.parent {
if s := d.lang.name(i); s != "" {
return s
}
}
return ""
}
// Name implements the Namer interface for language names.
func (n dictLanguages) Name(x interface{}) string {
return nameLanguage(n, x)
}
// Scripts returns a Namer for naming scripts. It returns nil if there is no
// data for the given tag. The type passed to Name must be either a
// language.Script or a language.Tag. It will not attempt to infer a script for
// tags with an unspecified script.
func (d *Dictionary) Scripts() Namer {
return dictScripts{d}
}
type dictScripts struct {
d *Dictionary
}
func (n dictScripts) name(i int) string {
for d := n.d; d != nil; d = d.parent {
if s := d.script.name(i); s != "" {
return s
}
}
return ""
}
// Name implements the Namer interface for script names.
func (n dictScripts) Name(x interface{}) string {
return nameScript(n, x)
}
// Regions returns a Namer for naming regions. It returns nil if there is no
// data for the given tag. The type passed to Name must be either a
// language.Region or a language.Tag. It will not attempt to infer a region for
// tags with an unspecified region.
func (d *Dictionary) Regions() Namer {
return dictRegions{d}
}
type dictRegions struct {
d *Dictionary
}
func (n dictRegions) name(i int) string {
for d := n.d; d != nil; d = d.parent {
if s := d.region.name(i); s != "" {
return s
}
}
return ""
}
// Name implements the Namer interface for region names.
func (n dictRegions) Name(x interface{}) string {
return nameRegion(n, x)
}
// A SelfNamer implements a Namer that returns the name of language in this same
// language. It provides a very compact mechanism to provide a comprehensive
// list of languages to users in their native language.
type SelfNamer struct {
// Supported defines the values supported by this Namer.
Supported language.Coverage
}
var (
// Self is a shared instance of a SelfNamer.
Self *SelfNamer = &self
self = SelfNamer{language.NewCoverage(selfTagSet.Tags)}
)
// Name returns the name of a given language tag in the language identified by
// this tag. It supports both the language.Base and language.Tag types.
func (n SelfNamer) Name(x interface{}) string {
t, _ := language.All.Compose(x)
base, scr, reg := t.Raw()
baseScript := language.Script{}
if (scr == language.Script{} && reg != language.Region{}) {
// For looking up in the self dictionary, we need to select the
// maximized script. This is even the case if the script isn't
// specified.
s1, _ := t.Script()
if baseScript = getScript(base); baseScript != s1 {
scr = s1
}
}
i, scr, reg := selfTagSet.index(base, scr, reg)
if i == -1 {
return ""
}
// Only return the display name if the script matches the expected script.
if (scr != language.Script{}) {
if (baseScript == language.Script{}) {
baseScript = getScript(base)
}
if baseScript != scr {
return ""
}
}
return selfHeaders[0].name(i)
}
// getScript returns the maximized script for a base language.
func getScript(b language.Base) language.Script {
tag, _ := language.Raw.Compose(b)
scr, _ := tag.Script()
return scr
}

@ -1,238 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package display
// This file contains common lookup code that is shared between the various
// implementations of Namer and Dictionaries.
import (
"fmt"
"sort"
"strings"
"golang.org/x/text/language"
)
type namer interface {
// name gets the string for the given index. It should walk the
// inheritance chain if a value is not present in the base index.
name(idx int) string
}
func nameLanguage(n namer, x interface{}) string {
t, _ := language.All.Compose(x)
i, _, _ := langTagSet.index(t.Raw())
return n.name(i)
}
func nameScript(n namer, x interface{}) string {
t, _ := language.DeprecatedScript.Compose(x)
_, s, _ := t.Raw()
return n.name(scriptIndex.index(s.String()))
}
func nameRegion(n namer, x interface{}) string {
t, _ := language.DeprecatedRegion.Compose(x)
_, _, r := t.Raw()
return n.name(regionIndex.index(r.String()))
}
func nameTag(langN, scrN, regN namer, x interface{}) string {
t, ok := x.(language.Tag)
if !ok {
return ""
}
const form = language.All &^ language.SuppressScript
if c, err := form.Canonicalize(t); err == nil {
t = c
}
i, scr, reg := langTagSet.index(t.Raw())
if i == -1 {
return ""
}
str := langN.name(i)
if hasS, hasR := (scr != language.Script{}), (reg != language.Region{}); hasS || hasR {
ss, sr := "", ""
if hasS {
ss = scrN.name(scriptIndex.index(scr.String()))
}
if hasR {
sr = regN.name(regionIndex.index(reg.String()))
}
// TODO: use patterns in CLDR or at least confirm they are the same for
// all languages.
if ss != "" && sr != "" {
return fmt.Sprintf("%s (%s, %s)", str, ss, sr)
}
if ss != "" || sr != "" {
return fmt.Sprintf("%s (%s%s)", str, ss, sr)
}
}
return str
}
// header contains the data and indexes for a single namer.
// data contains a series of strings concatenated into one. index contains the
// offsets for a string in data. For example, consider a header that defines
// strings for the languages de, el, en, fi, and nl:
//
// header{
// data: "GermanGreekEnglishDutch",
// index: []uint16{ 0, 6, 11, 18, 18, 23 },
// }
//
// For a language with index i, the string is defined by
// data[index[i]:index[i+1]]. So the number of elements in index is always one
// greater than the number of languages for which header defines a value.
// A string for a language may be empty, which means the name is undefined. In
// the above example, the name for fi (Finnish) is undefined.
type header struct {
data string
index []uint16
}
// name looks up the name for a tag in the dictionary, given its index.
func (h *header) name(i int) string {
if 0 <= i && i < len(h.index)-1 {
return h.data[h.index[i]:h.index[i+1]]
}
return ""
}
// tagSet is used to find the index of a language in a set of tags.
type tagSet struct {
single tagIndex
long []string
}
var (
langTagSet = tagSet{
single: langIndex,
long: langTagsLong,
}
// selfTagSet is used for indexing the language strings in their own
// language.
selfTagSet = tagSet{
single: selfIndex,
long: selfTagsLong,
}
zzzz = language.MustParseScript("Zzzz")
zz = language.MustParseRegion("ZZ")
)
// index returns the index of the tag for the given base, script and region or
// its parent if the tag is not available. If the match is for a parent entry,
// the excess script and region are returned.
func (ts *tagSet) index(base language.Base, scr language.Script, reg language.Region) (int, language.Script, language.Region) {
lang := base.String()
index := -1
if (scr != language.Script{} || reg != language.Region{}) {
if scr == zzzz {
scr = language.Script{}
}
if reg == zz {
reg = language.Region{}
}
i := sort.SearchStrings(ts.long, lang)
// All entries have either a script or a region and not both.
scrStr, regStr := scr.String(), reg.String()
for ; i < len(ts.long) && strings.HasPrefix(ts.long[i], lang); i++ {
if s := ts.long[i][len(lang)+1:]; s == scrStr {
scr = language.Script{}
index = i + ts.single.len()
break
} else if s == regStr {
reg = language.Region{}
index = i + ts.single.len()
break
}
}
}
if index == -1 {
index = ts.single.index(lang)
}
return index, scr, reg
}
func (ts *tagSet) Tags() []language.Tag {
tags := make([]language.Tag, 0, ts.single.len()+len(ts.long))
ts.single.keys(func(s string) {
tags = append(tags, language.Raw.MustParse(s))
})
for _, s := range ts.long {
tags = append(tags, language.Raw.MustParse(s))
}
return tags
}
func supportedScripts() []language.Script {
scr := make([]language.Script, 0, scriptIndex.len())
scriptIndex.keys(func(s string) {
scr = append(scr, language.MustParseScript(s))
})
return scr
}
func supportedRegions() []language.Region {
reg := make([]language.Region, 0, regionIndex.len())
regionIndex.keys(func(s string) {
reg = append(reg, language.MustParseRegion(s))
})
return reg
}
// tagIndex holds a concatenated lists of subtags of length 2 to 4, one string
// for each length, which can be used in combination with binary search to get
// the index associated with a tag.
// For example, a tagIndex{
// "arenesfrruzh", // 6 2-byte tags.
// "barwae", // 2 3-byte tags.
// "",
// }
// would mean that the 2-byte tag "fr" had an index of 3, and the 3-byte tag
// "wae" had an index of 7.
type tagIndex [3]string
func (t *tagIndex) index(s string) int {
sz := len(s)
if sz < 2 || 4 < sz {
return -1
}
a := t[sz-2]
index := sort.Search(len(a)/sz, func(i int) bool {
p := i * sz
return a[p:p+sz] >= s
})
p := index * sz
if end := p + sz; end > len(a) || a[p:end] != s {
return -1
}
// Add the number of tags for smaller sizes.
for i := 0; i < sz-2; i++ {
index += len(t[i]) / (i + 2)
}
return index
}
// len returns the number of tags that are contained in the tagIndex.
func (t *tagIndex) len() (n int) {
for i, s := range t {
n += len(s) / (i + 2)
}
return n
}
// keys calls f for each tag.
func (t *tagIndex) keys(f func(key string)) {
for i, s := range *t {
for ; s != ""; s = s[i+2:] {
f(s[:i+2])
}
}
}

@ -1,596 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// Generator for display name tables.
package main
import (
"bytes"
"flag"
"fmt"
"log"
"reflect"
"sort"
"strings"
"golang.org/x/text/internal/gen"
"golang.org/x/text/language"
"golang.org/x/text/unicode/cldr"
)
var (
test = flag.Bool("test", false,
"test existing tables; can be used to compare web data with package data.")
outputFile = flag.String("output", "tables.go", "output file")
stats = flag.Bool("stats", false, "prints statistics to stderr")
short = flag.Bool("short", false, `Use "short" alternatives, when available.`)
draft = flag.String("draft",
"contributed",
`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
pkg = flag.String("package",
"display",
"the name of the package in which the generated file is to be included")
tags = newTagSet("tags",
[]language.Tag{},
"space-separated list of tags to include or empty for all")
dict = newTagSet("dict",
dictTags(),
"space-separated list or tags for which to include a Dictionary. "+
`"" means the common list from go.text/language.`)
)
func dictTags() (tag []language.Tag) {
// TODO: replace with language.Common.Tags() once supported.
const str = "af am ar ar-001 az bg bn ca cs da de el en en-US en-GB " +
"es es-ES es-419 et fa fi fil fr fr-CA gu he hi hr hu hy id is it ja " +
"ka kk km kn ko ky lo lt lv mk ml mn mr ms my ne nl no pa pl pt pt-BR " +
"pt-PT ro ru si sk sl sq sr sr-Latn sv sw ta te th tr uk ur uz vi " +
"zh zh-Hans zh-Hant zu"
for _, s := range strings.Split(str, " ") {
tag = append(tag, language.MustParse(s))
}
return tag
}
func main() {
gen.Init()
// Read the CLDR zip file.
r := gen.OpenCLDRCoreZip()
defer r.Close()
d := &cldr.Decoder{}
d.SetDirFilter("main", "supplemental")
d.SetSectionFilter("localeDisplayNames")
data, err := d.DecodeZip(r)
if err != nil {
log.Fatalf("DecodeZip: %v", err)
}
w := gen.NewCodeWriter()
defer w.WriteGoFile(*outputFile, "display")
gen.WriteCLDRVersion(w)
b := builder{
w: w,
data: data,
group: make(map[string]*group),
}
b.generate()
}
const tagForm = language.All
// tagSet is used to parse command line flags of tags. It implements the
// flag.Value interface.
type tagSet map[language.Tag]bool
func newTagSet(name string, tags []language.Tag, usage string) tagSet {
f := tagSet(make(map[language.Tag]bool))
for _, t := range tags {
f[t] = true
}
flag.Var(f, name, usage)
return f
}
// String implements the String method of the flag.Value interface.
func (f tagSet) String() string {
tags := []string{}
for t := range f {
tags = append(tags, t.String())
}
sort.Strings(tags)
return strings.Join(tags, " ")
}
// Set implements Set from the flag.Value interface.
func (f tagSet) Set(s string) error {
if s != "" {
for _, s := range strings.Split(s, " ") {
if s != "" {
tag, err := tagForm.Parse(s)
if err != nil {
return err
}
f[tag] = true
}
}
}
return nil
}
func (f tagSet) contains(t language.Tag) bool {
if len(f) == 0 {
return true
}
return f[t]
}
// builder is used to create all tables with display name information.
type builder struct {
w *gen.CodeWriter
data *cldr.CLDR
fromLocs []string
// destination tags for the current locale.
toTags []string
toTagIndex map[string]int
// list of supported tags
supported []language.Tag
// key-value pairs per group
group map[string]*group
// statistics
sizeIndex int // total size of all indexes of headers
sizeData int // total size of all data of headers
totalSize int
}
type group struct {
// Maps from a given language to the Namer data for this language.
lang map[language.Tag]keyValues
headers []header
toTags []string
threeStart int
fourPlusStart int
}
// set sets the typ to the name for locale loc.
func (g *group) set(t language.Tag, typ, name string) {
kv := g.lang[t]
if kv == nil {
kv = make(keyValues)
g.lang[t] = kv
}
if kv[typ] == "" {
kv[typ] = name
}
}
type keyValues map[string]string
type header struct {
tag language.Tag
data string
index []uint16
}
var versionInfo = `// Version is deprecated. Use CLDRVersion.
const Version = %#v
`
var self = language.MustParse("mul")
// generate builds and writes all tables.
func (b *builder) generate() {
fmt.Fprintf(b.w, versionInfo, cldr.Version)
b.filter()
b.setData("lang", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
if ldn.Languages != nil {
for _, v := range ldn.Languages.Language {
tag := tagForm.MustParse(v.Type)
if tags.contains(tag) {
g.set(loc, tag.String(), v.Data())
}
}
}
})
b.setData("script", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
if ldn.Scripts != nil {
for _, v := range ldn.Scripts.Script {
code := language.MustParseScript(v.Type)
if code.IsPrivateUse() { // Qaaa..Qabx
// TODO: data currently appears to be very meager.
// Reconsider if we have data for English.
if loc == language.English {
log.Fatal("Consider including data for private use scripts.")
}
continue
}
g.set(loc, code.String(), v.Data())
}
}
})
b.setData("region", func(g *group, loc language.Tag, ldn *cldr.LocaleDisplayNames) {
if ldn.Territories != nil {
for _, v := range ldn.Territories.Territory {
g.set(loc, language.MustParseRegion(v.Type).String(), v.Data())
}
}
})
b.makeSupported()
b.writeParents()
b.writeGroup("lang")
b.writeGroup("script")
b.writeGroup("region")
b.w.WriteConst("numSupported", len(b.supported))
buf := bytes.Buffer{}
for _, tag := range b.supported {
fmt.Fprint(&buf, tag.String(), "|")
}
b.w.WriteConst("supported", buf.String())
b.writeDictionaries()
b.supported = []language.Tag{self}
// Compute the names of locales in their own language. Some of these names
// may be specified in their parent locales. We iterate the maximum depth
// of the parent three times to match successive parents of tags until a
// possible match is found.
for i := 0; i < 4; i++ {
b.setData("self", func(g *group, tag language.Tag, ldn *cldr.LocaleDisplayNames) {
parent := tag
if b, s, r := tag.Raw(); i > 0 && (s != language.Script{} && r == language.Region{}) {
parent, _ = language.Raw.Compose(b)
}
if ldn.Languages != nil {
for _, v := range ldn.Languages.Language {
key := tagForm.MustParse(v.Type)
saved := key
if key == parent {
g.set(self, tag.String(), v.Data())
}
for k := 0; k < i; k++ {
key = key.Parent()
}
if key == tag {
g.set(self, saved.String(), v.Data()) // set does not overwrite a value.
}
}
}
})
}
b.writeGroup("self")
}
func (b *builder) setData(name string, f func(*group, language.Tag, *cldr.LocaleDisplayNames)) {
b.sizeIndex = 0
b.sizeData = 0
b.toTags = nil
b.fromLocs = nil
b.toTagIndex = make(map[string]int)
g := b.group[name]
if g == nil {
g = &group{lang: make(map[language.Tag]keyValues)}
b.group[name] = g
}
for _, loc := range b.data.Locales() {
// We use RawLDML instead of LDML as we are managing our own inheritance
// in this implementation.
ldml := b.data.RawLDML(loc)
// We do not support the POSIX variant (it is not a supported BCP 47
// variant). This locale also doesn't happen to contain any data, so
// we'll skip it by checking for this.
tag, err := tagForm.Parse(loc)
if err != nil {
if ldml.LocaleDisplayNames != nil {
log.Fatalf("setData: %v", err)
}
continue
}
if ldml.LocaleDisplayNames != nil && tags.contains(tag) {
f(g, tag, ldml.LocaleDisplayNames)
}
}
}
func (b *builder) filter() {
filter := func(s *cldr.Slice) {
if *short {
s.SelectOnePerGroup("alt", []string{"short", ""})
} else {
s.SelectOnePerGroup("alt", []string{"stand-alone", ""})
}
d, err := cldr.ParseDraft(*draft)
if err != nil {
log.Fatalf("filter: %v", err)
}
s.SelectDraft(d)
}
for _, loc := range b.data.Locales() {
if ldn := b.data.RawLDML(loc).LocaleDisplayNames; ldn != nil {
if ldn.Languages != nil {
s := cldr.MakeSlice(&ldn.Languages.Language)
if filter(&s); len(ldn.Languages.Language) == 0 {
ldn.Languages = nil
}
}
if ldn.Scripts != nil {
s := cldr.MakeSlice(&ldn.Scripts.Script)
if filter(&s); len(ldn.Scripts.Script) == 0 {
ldn.Scripts = nil
}
}
if ldn.Territories != nil {
s := cldr.MakeSlice(&ldn.Territories.Territory)
if filter(&s); len(ldn.Territories.Territory) == 0 {
ldn.Territories = nil
}
}
}
}
}
// makeSupported creates a list of all supported locales.
func (b *builder) makeSupported() {
// tags across groups
for _, g := range b.group {
for t, _ := range g.lang {
b.supported = append(b.supported, t)
}
}
b.supported = b.supported[:unique(tagsSorter(b.supported))]
}
type tagsSorter []language.Tag
func (a tagsSorter) Len() int { return len(a) }
func (a tagsSorter) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a tagsSorter) Less(i, j int) bool { return a[i].String() < a[j].String() }
func (b *builder) writeGroup(name string) {
g := b.group[name]
for _, kv := range g.lang {
for t, _ := range kv {
g.toTags = append(g.toTags, t)
}
}
g.toTags = g.toTags[:unique(tagsBySize(g.toTags))]
// Allocate header per supported value.
g.headers = make([]header, len(b.supported))
for i, sup := range b.supported {
kv, ok := g.lang[sup]
if !ok {
g.headers[i].tag = sup
continue
}
data := []byte{}
index := make([]uint16, len(g.toTags), len(g.toTags)+1)
for j, t := range g.toTags {
index[j] = uint16(len(data))
data = append(data, kv[t]...)
}
index = append(index, uint16(len(data)))
// Trim the tail of the index.
// TODO: indexes can be reduced in size quite a bit more.
n := len(index)
for ; n >= 2 && index[n-2] == index[n-1]; n-- {
}
index = index[:n]
// Workaround for a bug in CLDR 26.
// See http://unicode.org/cldr/trac/ticket/8042.
if cldr.Version == "26" && sup.String() == "hsb" {
data = bytes.Replace(data, []byte{'"'}, nil, 1)
}
g.headers[i] = header{sup, string(data), index}
}
g.writeTable(b.w, name)
}
type tagsBySize []string
func (l tagsBySize) Len() int { return len(l) }
func (l tagsBySize) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
func (l tagsBySize) Less(i, j int) bool {
a, b := l[i], l[j]
// Sort single-tag entries based on size first. Otherwise alphabetic.
if len(a) != len(b) && (len(a) <= 4 || len(b) <= 4) {
return len(a) < len(b)
}
return a < b
}
// parentIndices returns slice a of len(tags) where tags[a[i]] is the parent
// of tags[i].
func parentIndices(tags []language.Tag) []int16 {
index := make(map[language.Tag]int16)
for i, t := range tags {
index[t] = int16(i)
}
// Construct default parents.
parents := make([]int16, len(tags))
for i, t := range tags {
parents[i] = -1
for t = t.Parent(); t != language.Und; t = t.Parent() {
if j, ok := index[t]; ok {
parents[i] = j
break
}
}
}
return parents
}
func (b *builder) writeParents() {
parents := parentIndices(b.supported)
fmt.Fprintf(b.w, "var parents = ")
b.w.WriteArray(parents)
}
// writeKeys writes keys to a special index used by the display package.
// tags are assumed to be sorted by length.
func writeKeys(w *gen.CodeWriter, name string, keys []string) {
w.Size += int(3 * reflect.TypeOf("").Size())
w.WriteComment("Number of keys: %d", len(keys))
fmt.Fprintf(w, "var (\n\t%sIndex = tagIndex{\n", name)
for i := 2; i <= 4; i++ {
sub := []string{}
for _, t := range keys {
if len(t) != i {
break
}
sub = append(sub, t)
}
s := strings.Join(sub, "")
w.WriteString(s)
fmt.Fprintf(w, ",\n")
keys = keys[len(sub):]
}
fmt.Fprintln(w, "\t}")
if len(keys) > 0 {
w.Size += int(reflect.TypeOf([]string{}).Size())
fmt.Fprintf(w, "\t%sTagsLong = ", name)
w.WriteSlice(keys)
}
fmt.Fprintln(w, ")\n")
}
// identifier creates an identifier from the given tag.
func identifier(t language.Tag) string {
return strings.Replace(t.String(), "-", "", -1)
}
func (h *header) writeEntry(w *gen.CodeWriter, name string) {
if len(dict) > 0 && dict.contains(h.tag) {
fmt.Fprintf(w, "\t{ // %s\n", h.tag)
fmt.Fprintf(w, "\t\t%[1]s%[2]sStr,\n\t\t%[1]s%[2]sIdx,\n", identifier(h.tag), name)
fmt.Fprintln(w, "\t},")
} else if len(h.data) == 0 {
fmt.Fprintln(w, "\t\t{}, //", h.tag)
} else {
fmt.Fprintf(w, "\t{ // %s\n", h.tag)
w.WriteString(h.data)
fmt.Fprintln(w, ",")
w.WriteSlice(h.index)
fmt.Fprintln(w, ",\n\t},")
}
}
// write the data for the given header as single entries. The size for this data
// was already accounted for in writeEntry.
func (h *header) writeSingle(w *gen.CodeWriter, name string) {
if len(dict) > 0 && dict.contains(h.tag) {
tag := identifier(h.tag)
w.WriteConst(tag+name+"Str", h.data)
// Note that we create a slice instead of an array. If we use an array
// we need to refer to it as a[:] in other tables, which will cause the
// array to always be included by the linker. See Issue 7651.
w.WriteVar(tag+name+"Idx", h.index)
}
}
// WriteTable writes an entry for a single Namer.
func (g *group) writeTable(w *gen.CodeWriter, name string) {
start := w.Size
writeKeys(w, name, g.toTags)
w.Size += len(g.headers) * int(reflect.ValueOf(g.headers[0]).Type().Size())
fmt.Fprintf(w, "var %sHeaders = [%d]header{\n", name, len(g.headers))
title := strings.Title(name)
for _, h := range g.headers {
h.writeEntry(w, title)
}
fmt.Fprintln(w, "}\n")
for _, h := range g.headers {
h.writeSingle(w, title)
}
n := w.Size - start
fmt.Fprintf(w, "// Total size for %s: %d bytes (%d KB)\n\n", name, n, n/1000)
}
func (b *builder) writeDictionaries() {
fmt.Fprintln(b.w, "// Dictionary entries of frequent languages")
fmt.Fprintln(b.w, "var (")
parents := parentIndices(b.supported)
for i, t := range b.supported {
if dict.contains(t) {
ident := identifier(t)
fmt.Fprintf(b.w, "\t%s = Dictionary{ // %s\n", ident, t)
if p := parents[i]; p == -1 {
fmt.Fprintln(b.w, "\t\tnil,")
} else {
fmt.Fprintf(b.w, "\t\t&%s,\n", identifier(b.supported[p]))
}
fmt.Fprintf(b.w, "\t\theader{%[1]sLangStr, %[1]sLangIdx},\n", ident)
fmt.Fprintf(b.w, "\t\theader{%[1]sScriptStr, %[1]sScriptIdx},\n", ident)
fmt.Fprintf(b.w, "\t\theader{%[1]sRegionStr, %[1]sRegionIdx},\n", ident)
fmt.Fprintln(b.w, "\t}")
}
}
fmt.Fprintln(b.w, ")")
var s string
var a []uint16
sz := reflect.TypeOf(s).Size()
sz += reflect.TypeOf(a).Size()
sz *= 3
sz += reflect.TypeOf(&a).Size()
n := int(sz) * len(dict)
fmt.Fprintf(b.w, "// Total size for %d entries: %d bytes (%d KB)\n\n", len(dict), n, n/1000)
b.w.Size += n
}
// unique sorts the given lists and removes duplicate entries by swapping them
// past position k, where k is the number of unique values. It returns k.
func unique(a sort.Interface) int {
if a.Len() == 0 {
return 0
}
sort.Sort(a)
k := 1
for i := 1; i < a.Len(); i++ {
if a.Less(k-1, i) {
if k != i {
a.Swap(k, i)
}
k++
}
}
return k
}

File diff suppressed because it is too large Load Diff

@ -1,20 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This file contains code common to the maketables.go and the package code.
// langAliasType is the type of an alias in langAliasMap.
type langAliasType int8
const (
langDeprecated langAliasType = iota
langMacro
langLegacy
langAliasTypeUnknown langAliasType = -1
)

@ -1,162 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This file generates derivative tables based on the language package itself.
import (
"bytes"
"flag"
"fmt"
"io/ioutil"
"log"
"reflect"
"sort"
"strings"
"golang.org/x/text/internal/gen"
"golang.org/x/text/language"
"golang.org/x/text/unicode/cldr"
)
var (
test = flag.Bool("test", false,
"test existing tables; can be used to compare web data with package data.")
draft = flag.String("draft",
"contributed",
`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
)
func main() {
gen.Init()
// Read the CLDR zip file.
r := gen.OpenCLDRCoreZip()
defer r.Close()
d := &cldr.Decoder{}
data, err := d.DecodeZip(r)
if err != nil {
log.Fatalf("DecodeZip: %v", err)
}
w := gen.NewCodeWriter()
defer func() {
buf := &bytes.Buffer{}
if _, err = w.WriteGo(buf, "language"); err != nil {
log.Fatalf("Error formatting file index.go: %v", err)
}
// Since we're generating a table for our own package we need to rewrite
// doing the equivalent of go fmt -r 'language.b -> b'. Using
// bytes.Replace will do.
out := bytes.Replace(buf.Bytes(), []byte("language."), nil, -1)
if err := ioutil.WriteFile("index.go", out, 0600); err != nil {
log.Fatalf("Could not create file index.go: %v", err)
}
}()
m := map[language.Tag]bool{}
for _, lang := range data.Locales() {
// We include all locales unconditionally to be consistent with en_US.
// We want en_US, even though it has no data associated with it.
// TODO: put any of the languages for which no data exists at the end
// of the index. This allows all components based on ICU to use that
// as the cutoff point.
// if x := data.RawLDML(lang); false ||
// x.LocaleDisplayNames != nil ||
// x.Characters != nil ||
// x.Delimiters != nil ||
// x.Measurement != nil ||
// x.Dates != nil ||
// x.Numbers != nil ||
// x.Units != nil ||
// x.ListPatterns != nil ||
// x.Collations != nil ||
// x.Segmentations != nil ||
// x.Rbnf != nil ||
// x.Annotations != nil ||
// x.Metadata != nil {
// TODO: support POSIX natively, albeit non-standard.
tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1))
m[tag] = true
// }
}
// Include locales for plural rules, which uses a different structure.
for _, plurals := range data.Supplemental().Plurals {
for _, rules := range plurals.PluralRules {
for _, lang := range strings.Split(rules.Locales, " ") {
m[language.Make(lang)] = true
}
}
}
var core, special []language.Tag
for t := range m {
if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" {
log.Fatalf("Unexpected extension %v in %v", x, t)
}
if len(t.Variants()) == 0 && len(t.Extensions()) == 0 {
core = append(core, t)
} else {
special = append(special, t)
}
}
w.WriteComment(`
NumCompactTags is the number of common tags. The maximum tag is
NumCompactTags-1.`)
w.WriteConst("NumCompactTags", len(core)+len(special))
sort.Sort(byAlpha(special))
w.WriteVar("specialTags", special)
// TODO: order by frequency?
sort.Sort(byAlpha(core))
// Size computations are just an estimate.
w.Size += int(reflect.TypeOf(map[uint32]uint16{}).Size())
w.Size += len(core) * 6 // size of uint32 and uint16
fmt.Fprintln(w)
fmt.Fprintln(w, "var coreTags = map[uint32]uint16{")
fmt.Fprintln(w, "0x0: 0, // und")
i := len(special) + 1 // Und and special tags already written.
for _, t := range core {
if t == language.Und {
continue
}
fmt.Fprint(w.Hash, t, i)
b, s, r := t.Raw()
fmt.Fprintf(w, "0x%s%s%s: %d, // %s\n",
getIndex(b, 3), // 3 is enough as it is guaranteed to be a compact number
getIndex(s, 2),
getIndex(r, 3),
i, t)
i++
}
fmt.Fprintln(w, "}")
}
// getIndex prints the subtag type and extracts its index of size nibble.
// If the index is less than n nibbles, the result is prefixed with 0s.
func getIndex(x interface{}, n int) string {
s := fmt.Sprintf("%#v", x) // s is of form Type{typeID: 0x00}
s = s[strings.Index(s, "0x")+2 : len(s)-1]
return strings.Repeat("0", n-len(s)) + s
}
type byAlpha []language.Tag
func (a byAlpha) Len() int { return len(a) }
func (a byAlpha) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a byAlpha) Less(i, j int) bool { return a[i].String() < a[j].String() }

File diff suppressed because it is too large Load Diff

@ -1,27 +0,0 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -1,27 +0,0 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -1,27 +0,0 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -1,110 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package cldr provides a parser for LDML and related XML formats.
// This package is inteded to be used by the table generation tools
// for the various internationalization-related packages.
// As the XML types are generated from the CLDR DTD, and as the CLDR standard
// is periodically amended, this package may change considerably over time.
// This mostly means that data may appear and disappear between versions.
// That is, old code should keep compiling for newer versions, but data
// may have moved or changed.
// CLDR version 22 is the first version supported by this package.
// Older versions may not work.
package cldr
import (
"encoding/xml"
"regexp"
"strconv"
)
// Elem is implemented by every XML element.
type Elem interface {
setEnclosing(Elem)
setName(string)
enclosing() Elem
GetCommon() *Common
}
type hidden struct {
CharData string `xml:",chardata"`
Alias *struct {
Common
Source string `xml:"source,attr"`
Path string `xml:"path,attr"`
} `xml:"alias"`
Def *struct {
Common
Choice string `xml:"choice,attr,omitempty"`
Type string `xml:"type,attr,omitempty"`
} `xml:"default"`
}
// Common holds several of the most common attributes and sub elements
// of an XML element.
type Common struct {
XMLName xml.Name
name string
enclElem Elem
Type string `xml:"type,attr,omitempty"`
Reference string `xml:"reference,attr,omitempty"`
Alt string `xml:"alt,attr,omitempty"`
ValidSubLocales string `xml:"validSubLocales,attr,omitempty"`
Draft string `xml:"draft,attr,omitempty"`
hidden
}
// Default returns the default type to select from the enclosed list
// or "" if no default value is specified.
func (e *Common) Default() string {
if e.Def == nil {
return ""
}
if e.Def.Choice != "" {
return e.Def.Choice
} else if e.Def.Type != "" {
// Type is still used by the default element in collation.
return e.Def.Type
}
return ""
}
// GetCommon returns e. It is provided such that Common implements Elem.
func (e *Common) GetCommon() *Common {
return e
}
// Data returns the character data accumulated for this element.
func (e *Common) Data() string {
e.CharData = charRe.ReplaceAllStringFunc(e.CharData, replaceUnicode)
return e.CharData
}
func (e *Common) setName(s string) {
e.name = s
}
func (e *Common) enclosing() Elem {
return e.enclElem
}
func (e *Common) setEnclosing(en Elem) {
e.enclElem = en
}
// Escape characters that can be escaped without further escaping the string.
var charRe = regexp.MustCompile(`&#x[0-9a-fA-F]*;|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\[abtnvfr]`)
// replaceUnicode converts hexadecimal Unicode codepoint notations to a one-rune string.
// It assumes the input string is correctly formatted.
func replaceUnicode(s string) string {
if s[1] == '#' {
r, _ := strconv.ParseInt(s[3:len(s)-1], 16, 32)
return string(r)
}
r, _, _, _ := strconv.UnquoteChar(s, 0)
return string(r)
}

@ -1,130 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run makexml.go -output xml.go
// Package cldr provides a parser for LDML and related XML formats.
// This package is inteded to be used by the table generation tools
// for the various internationalization-related packages.
// As the XML types are generated from the CLDR DTD, and as the CLDR standard
// is periodically amended, this package may change considerably over time.
// This mostly means that data may appear and disappear between versions.
// That is, old code should keep compiling for newer versions, but data
// may have moved or changed.
// CLDR version 22 is the first version supported by this package.
// Older versions may not work.
package cldr // import "golang.org/x/text/unicode/cldr"
import (
"fmt"
"sort"
)
// CLDR provides access to parsed data of the Unicode Common Locale Data Repository.
type CLDR struct {
parent map[string][]string
locale map[string]*LDML
resolved map[string]*LDML
bcp47 *LDMLBCP47
supp *SupplementalData
}
func makeCLDR() *CLDR {
return &CLDR{
parent: make(map[string][]string),
locale: make(map[string]*LDML),
resolved: make(map[string]*LDML),
bcp47: &LDMLBCP47{},
supp: &SupplementalData{},
}
}
// BCP47 returns the parsed BCP47 LDML data. If no such data was parsed, nil is returned.
func (cldr *CLDR) BCP47() *LDMLBCP47 {
return nil
}
// Draft indicates the draft level of an element.
type Draft int
const (
Approved Draft = iota
Contributed
Provisional
Unconfirmed
)
var drafts = []string{"unconfirmed", "provisional", "contributed", "approved", ""}
// ParseDraft returns the Draft value corresponding to the given string. The
// empty string corresponds to Approved.
func ParseDraft(level string) (Draft, error) {
if level == "" {
return Approved, nil
}
for i, s := range drafts {
if level == s {
return Unconfirmed - Draft(i), nil
}
}
return Approved, fmt.Errorf("cldr: unknown draft level %q", level)
}
func (d Draft) String() string {
return drafts[len(drafts)-1-int(d)]
}
// SetDraftLevel sets which draft levels to include in the evaluated LDML.
// Any draft element for which the draft level is higher than lev will be excluded.
// If multiple draft levels are available for a single element, the one with the
// lowest draft level will be selected, unless preferDraft is true, in which case
// the highest draft will be chosen.
// It is assumed that the underlying LDML is canonicalized.
func (cldr *CLDR) SetDraftLevel(lev Draft, preferDraft bool) {
// TODO: implement
cldr.resolved = make(map[string]*LDML)
}
// RawLDML returns the LDML XML for id in unresolved form.
// id must be one of the strings returned by Locales.
func (cldr *CLDR) RawLDML(loc string) *LDML {
return cldr.locale[loc]
}
// LDML returns the fully resolved LDML XML for loc, which must be one of
// the strings returned by Locales.
func (cldr *CLDR) LDML(loc string) (*LDML, error) {
return cldr.resolve(loc)
}
// Supplemental returns the parsed supplemental data. If no such data was parsed,
// nil is returned.
func (cldr *CLDR) Supplemental() *SupplementalData {
return cldr.supp
}
// Locales returns the locales for which there exist files.
// Valid sublocales for which there is no file are not included.
// The root locale is always sorted first.
func (cldr *CLDR) Locales() []string {
loc := []string{"root"}
hasRoot := false
for l, _ := range cldr.locale {
if l == "root" {
hasRoot = true
continue
}
loc = append(loc, l)
}
sort.Strings(loc[1:])
if !hasRoot {
return loc[1:]
}
return loc
}
// Get fills in the fields of x based on the XPath path.
func Get(e Elem, path string) (res Elem, err error) {
return walkXPath(e, path)
}

@ -1,359 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldr
import (
"bufio"
"encoding/xml"
"errors"
"fmt"
"strconv"
"strings"
"unicode"
"unicode/utf8"
)
// RuleProcessor can be passed to Collator's Process method, which
// parses the rules and calls the respective method for each rule found.
type RuleProcessor interface {
Reset(anchor string, before int) error
Insert(level int, str, context, extend string) error
Index(id string)
}
const (
// cldrIndex is a Unicode-reserved sentinel value used to mark the start
// of a grouping within an index.
// We ignore any rule that starts with this rune.
// See http://unicode.org/reports/tr35/#Collation_Elements for details.
cldrIndex = "\uFDD0"
// specialAnchor is the format in which to represent logical reset positions,
// such as "first tertiary ignorable".
specialAnchor = "<%s/>"
)
// Process parses the rules for the tailorings of this collation
// and calls the respective methods of p for each rule found.
func (c Collation) Process(p RuleProcessor) (err error) {
if len(c.Cr) > 0 {
if len(c.Cr) > 1 {
return fmt.Errorf("multiple cr elements, want 0 or 1")
}
return processRules(p, c.Cr[0].Data())
}
if c.Rules.Any != nil {
return c.processXML(p)
}
return errors.New("no tailoring data")
}
// processRules parses rules in the Collation Rule Syntax defined in
// http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Tailorings.
func processRules(p RuleProcessor, s string) (err error) {
chk := func(s string, e error) string {
if err == nil {
err = e
}
return s
}
i := 0 // Save the line number for use after the loop.
scanner := bufio.NewScanner(strings.NewReader(s))
for ; scanner.Scan() && err == nil; i++ {
for s := skipSpace(scanner.Text()); s != "" && s[0] != '#'; s = skipSpace(s) {
level := 5
var ch byte
switch ch, s = s[0], s[1:]; ch {
case '&': // followed by <anchor> or '[' <key> ']'
if s = skipSpace(s); consume(&s, '[') {
s = chk(parseSpecialAnchor(p, s))
} else {
s = chk(parseAnchor(p, 0, s))
}
case '<': // sort relation '<'{1,4}, optionally followed by '*'.
for level = 1; consume(&s, '<'); level++ {
}
if level > 4 {
err = fmt.Errorf("level %d > 4", level)
}
fallthrough
case '=': // identity relation, optionally followed by *.
if consume(&s, '*') {
s = chk(parseSequence(p, level, s))
} else {
s = chk(parseOrder(p, level, s))
}
default:
chk("", fmt.Errorf("illegal operator %q", ch))
break
}
}
}
if chk("", scanner.Err()); err != nil {
return fmt.Errorf("%d: %v", i, err)
}
return nil
}
// parseSpecialAnchor parses the anchor syntax which is either of the form
// ['before' <level>] <anchor>
// or
// [<label>]
// The starting should already be consumed.
func parseSpecialAnchor(p RuleProcessor, s string) (tail string, err error) {
i := strings.IndexByte(s, ']')
if i == -1 {
return "", errors.New("unmatched bracket")
}
a := strings.TrimSpace(s[:i])
s = s[i+1:]
if strings.HasPrefix(a, "before ") {
l, err := strconv.ParseUint(skipSpace(a[len("before "):]), 10, 3)
if err != nil {
return s, err
}
return parseAnchor(p, int(l), s)
}
return s, p.Reset(fmt.Sprintf(specialAnchor, a), 0)
}
func parseAnchor(p RuleProcessor, level int, s string) (tail string, err error) {
anchor, s, err := scanString(s)
if err != nil {
return s, err
}
return s, p.Reset(anchor, level)
}
func parseOrder(p RuleProcessor, level int, s string) (tail string, err error) {
var value, context, extend string
if value, s, err = scanString(s); err != nil {
return s, err
}
if strings.HasPrefix(value, cldrIndex) {
p.Index(value[len(cldrIndex):])
return
}
if consume(&s, '|') {
if context, s, err = scanString(s); err != nil {
return s, errors.New("missing string after context")
}
}
if consume(&s, '/') {
if extend, s, err = scanString(s); err != nil {
return s, errors.New("missing string after extension")
}
}
return s, p.Insert(level, value, context, extend)
}
// scanString scans a single input string.
func scanString(s string) (str, tail string, err error) {
if s = skipSpace(s); s == "" {
return s, s, errors.New("missing string")
}
buf := [16]byte{} // small but enough to hold most cases.
value := buf[:0]
for s != "" {
if consume(&s, '\'') {
i := strings.IndexByte(s, '\'')
if i == -1 {
return "", "", errors.New(`unmatched single quote`)
}
if i == 0 {
value = append(value, '\'')
} else {
value = append(value, s[:i]...)
}
s = s[i+1:]
continue
}
r, sz := utf8.DecodeRuneInString(s)
if unicode.IsSpace(r) || strings.ContainsRune("&<=#", r) {
break
}
value = append(value, s[:sz]...)
s = s[sz:]
}
return string(value), skipSpace(s), nil
}
func parseSequence(p RuleProcessor, level int, s string) (tail string, err error) {
if s = skipSpace(s); s == "" {
return s, errors.New("empty sequence")
}
last := rune(0)
for s != "" {
r, sz := utf8.DecodeRuneInString(s)
s = s[sz:]
if r == '-' {
// We have a range. The first element was already written.
if last == 0 {
return s, errors.New("range without starter value")
}
r, sz = utf8.DecodeRuneInString(s)
s = s[sz:]
if r == utf8.RuneError || r < last {
return s, fmt.Errorf("invalid range %q-%q", last, r)
}
for i := last + 1; i <= r; i++ {
if err := p.Insert(level, string(i), "", ""); err != nil {
return s, err
}
}
last = 0
continue
}
if unicode.IsSpace(r) || unicode.IsPunct(r) {
break
}
// normal case
if err := p.Insert(level, string(r), "", ""); err != nil {
return s, err
}
last = r
}
return s, nil
}
func skipSpace(s string) string {
return strings.TrimLeftFunc(s, unicode.IsSpace)
}
// consumes returns whether the next byte is ch. If so, it gobbles it by
// updating s.
func consume(s *string, ch byte) (ok bool) {
if *s == "" || (*s)[0] != ch {
return false
}
*s = (*s)[1:]
return true
}
// The following code parses Collation rules of CLDR version 24 and before.
var lmap = map[byte]int{
'p': 1,
's': 2,
't': 3,
'i': 5,
}
type rulesElem struct {
Rules struct {
Common
Any []*struct {
XMLName xml.Name
rule
} `xml:",any"`
} `xml:"rules"`
}
type rule struct {
Value string `xml:",chardata"`
Before string `xml:"before,attr"`
Any []*struct {
XMLName xml.Name
rule
} `xml:",any"`
}
var emptyValueError = errors.New("cldr: empty rule value")
func (r *rule) value() (string, error) {
// Convert hexadecimal Unicode codepoint notation to a string.
s := charRe.ReplaceAllStringFunc(r.Value, replaceUnicode)
r.Value = s
if s == "" {
if len(r.Any) != 1 {
return "", emptyValueError
}
r.Value = fmt.Sprintf(specialAnchor, r.Any[0].XMLName.Local)
r.Any = nil
} else if len(r.Any) != 0 {
return "", fmt.Errorf("cldr: XML elements found in collation rule: %v", r.Any)
}
return r.Value, nil
}
func (r rule) process(p RuleProcessor, name, context, extend string) error {
v, err := r.value()
if err != nil {
return err
}
switch name {
case "p", "s", "t", "i":
if strings.HasPrefix(v, cldrIndex) {
p.Index(v[len(cldrIndex):])
return nil
}
if err := p.Insert(lmap[name[0]], v, context, extend); err != nil {
return err
}
case "pc", "sc", "tc", "ic":
level := lmap[name[0]]
for _, s := range v {
if err := p.Insert(level, string(s), context, extend); err != nil {
return err
}
}
default:
return fmt.Errorf("cldr: unsupported tag: %q", name)
}
return nil
}
// processXML parses the format of CLDR versions 24 and older.
func (c Collation) processXML(p RuleProcessor) (err error) {
// Collation is generated and defined in xml.go.
var v string
for _, r := range c.Rules.Any {
switch r.XMLName.Local {
case "reset":
level := 0
switch r.Before {
case "primary", "1":
level = 1
case "secondary", "2":
level = 2
case "tertiary", "3":
level = 3
case "":
default:
return fmt.Errorf("cldr: unknown level %q", r.Before)
}
v, err = r.value()
if err == nil {
err = p.Reset(v, level)
}
case "x":
var context, extend string
for _, r1 := range r.Any {
v, err = r1.value()
switch r1.XMLName.Local {
case "context":
context = v
case "extend":
extend = v
}
}
for _, r1 := range r.Any {
if t := r1.XMLName.Local; t == "context" || t == "extend" {
continue
}
r1.rule.process(p, r1.XMLName.Local, context, extend)
}
default:
err = r.rule.process(p, r.XMLName.Local, "", "")
}
if err != nil {
return err
}
}
return nil
}

@ -1,171 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldr
import (
"archive/zip"
"bytes"
"encoding/xml"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"path/filepath"
"regexp"
)
// A Decoder loads an archive of CLDR data.
type Decoder struct {
dirFilter []string
sectionFilter []string
loader Loader
cldr *CLDR
curLocale string
}
// SetSectionFilter takes a list top-level LDML element names to which
// evaluation of LDML should be limited. It automatically calls SetDirFilter.
func (d *Decoder) SetSectionFilter(filter ...string) {
d.sectionFilter = filter
// TODO: automatically set dir filter
}
// SetDirFilter limits the loading of LDML XML files of the specied directories.
// Note that sections may be split across directories differently for different CLDR versions.
// For more robust code, use SetSectionFilter.
func (d *Decoder) SetDirFilter(dir ...string) {
d.dirFilter = dir
}
// A Loader provides access to the files of a CLDR archive.
type Loader interface {
Len() int
Path(i int) string
Reader(i int) (io.ReadCloser, error)
}
var fileRe = regexp.MustCompile(".*/(.*)/(.*)\\.xml")
// Decode loads and decodes the files represented by l.
func (d *Decoder) Decode(l Loader) (cldr *CLDR, err error) {
d.cldr = makeCLDR()
for i := 0; i < l.Len(); i++ {
fname := l.Path(i)
if m := fileRe.FindStringSubmatch(fname); m != nil {
if len(d.dirFilter) > 0 && !in(d.dirFilter, m[1]) {
continue
}
var r io.Reader
if r, err = l.Reader(i); err == nil {
err = d.decode(m[1], m[2], r)
}
if err != nil {
return nil, err
}
}
}
d.cldr.finalize(d.sectionFilter)
return d.cldr, nil
}
func (d *Decoder) decode(dir, id string, r io.Reader) error {
var v interface{}
var l *LDML
cldr := d.cldr
switch {
case dir == "supplemental":
v = cldr.supp
case dir == "transforms":
return nil
case dir == "bcp47":
v = cldr.bcp47
case dir == "validity":
return nil
default:
ok := false
if v, ok = cldr.locale[id]; !ok {
l = &LDML{}
v, cldr.locale[id] = l, l
}
}
x := xml.NewDecoder(r)
if err := x.Decode(v); err != nil {
log.Printf("%s/%s: %v", dir, id, err)
return err
}
if l != nil {
if l.Identity == nil {
return fmt.Errorf("%s/%s: missing identity element", dir, id)
}
// TODO: verify when CLDR bug http://unicode.org/cldr/trac/ticket/8970
// is resolved.
// path := strings.Split(id, "_")
// if lang := l.Identity.Language.Type; lang != path[0] {
// return fmt.Errorf("%s/%s: language was %s; want %s", dir, id, lang, path[0])
// }
}
return nil
}
type pathLoader []string
func makePathLoader(path string) (pl pathLoader, err error) {
err = filepath.Walk(path, func(path string, _ os.FileInfo, err error) error {
pl = append(pl, path)
return err
})
return pl, err
}
func (pl pathLoader) Len() int {
return len(pl)
}
func (pl pathLoader) Path(i int) string {
return pl[i]
}
func (pl pathLoader) Reader(i int) (io.ReadCloser, error) {
return os.Open(pl[i])
}
// DecodePath loads CLDR data from the given path.
func (d *Decoder) DecodePath(path string) (cldr *CLDR, err error) {
loader, err := makePathLoader(path)
if err != nil {
return nil, err
}
return d.Decode(loader)
}
type zipLoader struct {
r *zip.Reader
}
func (zl zipLoader) Len() int {
return len(zl.r.File)
}
func (zl zipLoader) Path(i int) string {
return zl.r.File[i].Name
}
func (zl zipLoader) Reader(i int) (io.ReadCloser, error) {
return zl.r.File[i].Open()
}
// DecodeZip loads CLDR data from the zip archive for which r is the source.
func (d *Decoder) DecodeZip(r io.Reader) (cldr *CLDR, err error) {
buffer, err := ioutil.ReadAll(r)
if err != nil {
return nil, err
}
archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
if err != nil {
return nil, err
}
return d.Decode(zipLoader{archive})
}

@ -1,400 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// This tool generates types for the various XML formats of CLDR.
package main
import (
"archive/zip"
"bytes"
"encoding/xml"
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"regexp"
"strings"
"golang.org/x/text/internal/gen"
)
var outputFile = flag.String("output", "xml.go", "output file name")
func main() {
flag.Parse()
r := gen.OpenCLDRCoreZip()
buffer, err := ioutil.ReadAll(r)
if err != nil {
log.Fatal("Could not read zip file")
}
r.Close()
z, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
if err != nil {
log.Fatalf("Could not read zip archive: %v", err)
}
var buf bytes.Buffer
version := gen.CLDRVersion()
for _, dtd := range files {
for _, f := range z.File {
if strings.HasSuffix(f.Name, dtd.file+".dtd") {
r, err := f.Open()
failOnError(err)
b := makeBuilder(&buf, dtd)
b.parseDTD(r)
b.resolve(b.index[dtd.top[0]])
b.write()
if b.version != "" && version != b.version {
println(f.Name)
log.Fatalf("main: inconsistent versions: found %s; want %s", b.version, version)
}
break
}
}
}
fmt.Fprintln(&buf, "// Version is the version of CLDR from which the XML definitions are generated.")
fmt.Fprintf(&buf, "const Version = %q\n", version)
gen.WriteGoFile(*outputFile, "cldr", buf.Bytes())
}
func failOnError(err error) {
if err != nil {
log.New(os.Stderr, "", log.Lshortfile).Output(2, err.Error())
os.Exit(1)
}
}
// configuration data per DTD type
type dtd struct {
file string // base file name
root string // Go name of the root XML element
top []string // create a different type for this section
skipElem []string // hard-coded or deprecated elements
skipAttr []string // attributes to exclude
predefined []string // hard-coded elements exist of the form <name>Elem
forceRepeat []string // elements to make slices despite DTD
}
var files = []dtd{
{
file: "ldmlBCP47",
root: "LDMLBCP47",
top: []string{"ldmlBCP47"},
skipElem: []string{
"cldrVersion", // deprecated, not used
},
},
{
file: "ldmlSupplemental",
root: "SupplementalData",
top: []string{"supplementalData"},
skipElem: []string{
"cldrVersion", // deprecated, not used
},
forceRepeat: []string{
"plurals", // data defined in plurals.xml and ordinals.xml
},
},
{
file: "ldml",
root: "LDML",
top: []string{
"ldml", "collation", "calendar", "timeZoneNames", "localeDisplayNames", "numbers",
},
skipElem: []string{
"cp", // not used anywhere
"special", // not used anywhere
"fallback", // deprecated, not used
"alias", // in Common
"default", // in Common
},
skipAttr: []string{
"hiraganaQuarternary", // typo in DTD, correct version included as well
},
predefined: []string{"rules"},
},
}
var comments = map[string]string{
"ldmlBCP47": `
// LDMLBCP47 holds information on allowable values for various variables in LDML.
`,
"supplementalData": `
// SupplementalData holds information relevant for internationalization
// and proper use of CLDR, but that is not contained in the locale hierarchy.
`,
"ldml": `
// LDML is the top-level type for locale-specific data.
`,
"collation": `
// Collation contains rules that specify a certain sort-order,
// as a tailoring of the root order.
// The parsed rules are obtained by passing a RuleProcessor to Collation's
// Process method.
`,
"calendar": `
// Calendar specifies the fields used for formatting and parsing dates and times.
// The month and quarter names are identified numerically, starting at 1.
// The day (of the week) names are identified with short strings, since there is
// no universally-accepted numeric designation.
`,
"dates": `
// Dates contains information regarding the format and parsing of dates and times.
`,
"localeDisplayNames": `
// LocaleDisplayNames specifies localized display names for for scripts, languages,
// countries, currencies, and variants.
`,
"numbers": `
// Numbers supplies information for formatting and parsing numbers and currencies.
`,
}
type element struct {
name string // XML element name
category string // elements contained by this element
signature string // category + attrKey*
attr []*attribute // attributes supported by this element.
sub []struct { // parsed and evaluated sub elements of this element.
e *element
repeat bool // true if the element needs to be a slice
}
resolved bool // prevent multiple resolutions of this element.
}
type attribute struct {
name string
key string
list []string
tag string // Go tag
}
var (
reHead = regexp.MustCompile(` *(\w+) +([\w\-]+)`)
reAttr = regexp.MustCompile(` *(\w+) *(?:(\w+)|\(([\w\- \|]+)\)) *(?:#([A-Z]*) *(?:\"([\.\d+])\")?)? *("[\w\-:]*")?`)
reElem = regexp.MustCompile(`^ *(EMPTY|ANY|\(.*\)[\*\+\?]?) *$`)
reToken = regexp.MustCompile(`\w\-`)
)
// builder is used to read in the DTD files from CLDR and generate Go code
// to be used with the encoding/xml package.
type builder struct {
w io.Writer
index map[string]*element
elem []*element
info dtd
version string
}
func makeBuilder(w io.Writer, d dtd) builder {
return builder{
w: w,
index: make(map[string]*element),
elem: []*element{},
info: d,
}
}
// parseDTD parses a DTD file.
func (b *builder) parseDTD(r io.Reader) {
for d := xml.NewDecoder(r); ; {
t, err := d.Token()
if t == nil {
break
}
failOnError(err)
dir, ok := t.(xml.Directive)
if !ok {
continue
}
m := reHead.FindSubmatch(dir)
dir = dir[len(m[0]):]
ename := string(m[2])
el, elementFound := b.index[ename]
switch string(m[1]) {
case "ELEMENT":
if elementFound {
log.Fatal("parseDTD: duplicate entry for element %q", ename)
}
m := reElem.FindSubmatch(dir)
if m == nil {
log.Fatalf("parseDTD: invalid element %q", string(dir))
}
if len(m[0]) != len(dir) {
log.Fatal("parseDTD: invalid element %q", string(dir), len(dir), len(m[0]), string(m[0]))
}
s := string(m[1])
el = &element{
name: ename,
category: s,
}
b.index[ename] = el
case "ATTLIST":
if !elementFound {
log.Fatalf("parseDTD: unknown element %q", ename)
}
s := string(dir)
m := reAttr.FindStringSubmatch(s)
if m == nil {
log.Fatal(fmt.Errorf("parseDTD: invalid attribute %q", string(dir)))
}
if m[4] == "FIXED" {
b.version = m[5]
} else {
switch m[1] {
case "draft", "references", "alt", "validSubLocales", "standard" /* in Common */ :
case "type", "choice":
default:
el.attr = append(el.attr, &attribute{
name: m[1],
key: s,
list: reToken.FindAllString(m[3], -1),
})
el.signature = fmt.Sprintf("%s=%s+%s", el.signature, m[1], m[2])
}
}
}
}
}
var reCat = regexp.MustCompile(`[ ,\|]*(?:(\(|\)|\#?[\w_-]+)([\*\+\?]?))?`)
// resolve takes a parsed element and converts it into structured data
// that can be used to generate the XML code.
func (b *builder) resolve(e *element) {
if e.resolved {
return
}
b.elem = append(b.elem, e)
e.resolved = true
s := e.category
found := make(map[string]bool)
sequenceStart := []int{}
for len(s) > 0 {
m := reCat.FindStringSubmatch(s)
if m == nil {
log.Fatalf("%s: invalid category string %q", e.name, s)
}
repeat := m[2] == "*" || m[2] == "+" || in(b.info.forceRepeat, m[1])
switch m[1] {
case "":
case "(":
sequenceStart = append(sequenceStart, len(e.sub))
case ")":
if len(sequenceStart) == 0 {
log.Fatalf("%s: unmatched closing parenthesis", e.name)
}
for i := sequenceStart[len(sequenceStart)-1]; i < len(e.sub); i++ {
e.sub[i].repeat = e.sub[i].repeat || repeat
}
sequenceStart = sequenceStart[:len(sequenceStart)-1]
default:
if in(b.info.skipElem, m[1]) {
} else if sub, ok := b.index[m[1]]; ok {
if !found[sub.name] {
e.sub = append(e.sub, struct {
e *element
repeat bool
}{sub, repeat})
found[sub.name] = true
b.resolve(sub)
}
} else if m[1] == "#PCDATA" || m[1] == "ANY" {
} else if m[1] != "EMPTY" {
log.Fatalf("resolve:%s: element %q not found", e.name, m[1])
}
}
s = s[len(m[0]):]
}
}
// return true if s is contained in set.
func in(set []string, s string) bool {
for _, v := range set {
if v == s {
return true
}
}
return false
}
var repl = strings.NewReplacer("-", " ", "_", " ")
// title puts the first character or each character following '_' in title case and
// removes all occurrences of '_'.
func title(s string) string {
return strings.Replace(strings.Title(repl.Replace(s)), " ", "", -1)
}
// writeElem generates Go code for a single element, recursively.
func (b *builder) writeElem(tab int, e *element) {
p := func(f string, x ...interface{}) {
f = strings.Replace(f, "\n", "\n"+strings.Repeat("\t", tab), -1)
fmt.Fprintf(b.w, f, x...)
}
if len(e.sub) == 0 && len(e.attr) == 0 {
p("Common")
return
}
p("struct {")
tab++
p("\nCommon")
for _, attr := range e.attr {
if !in(b.info.skipAttr, attr.name) {
p("\n%s string `xml:\"%s,attr\"`", title(attr.name), attr.name)
}
}
for _, sub := range e.sub {
if in(b.info.predefined, sub.e.name) {
p("\n%sElem", sub.e.name)
continue
}
if in(b.info.skipElem, sub.e.name) {
continue
}
p("\n%s ", title(sub.e.name))
if sub.repeat {
p("[]")
}
p("*")
if in(b.info.top, sub.e.name) {
p(title(sub.e.name))
} else {
b.writeElem(tab, sub.e)
}
p(" `xml:\"%s\"`", sub.e.name)
}
tab--
p("\n}")
}
// write generates the Go XML code.
func (b *builder) write() {
for i, name := range b.info.top {
e := b.index[name]
if e != nil {
fmt.Fprintf(b.w, comments[name])
name := title(e.name)
if i == 0 {
name = b.info.root
}
fmt.Fprintf(b.w, "type %s ", name)
b.writeElem(0, e)
fmt.Fprint(b.w, "\n")
}
}
}

@ -1,602 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldr
// This file implements the various inheritance constructs defined by LDML.
// See http://www.unicode.org/reports/tr35/#Inheritance_and_Validity
// for more details.
import (
"fmt"
"log"
"reflect"
"regexp"
"sort"
"strings"
)
// fieldIter iterates over fields in a struct. It includes
// fields of embedded structs.
type fieldIter struct {
v reflect.Value
index, n []int
}
func iter(v reflect.Value) fieldIter {
if v.Kind() != reflect.Struct {
log.Panicf("value %v must be a struct", v)
}
i := fieldIter{
v: v,
index: []int{0},
n: []int{v.NumField()},
}
i.descent()
return i
}
func (i *fieldIter) descent() {
for f := i.field(); f.Anonymous && f.Type.NumField() > 0; f = i.field() {
i.index = append(i.index, 0)
i.n = append(i.n, f.Type.NumField())
}
}
func (i *fieldIter) done() bool {
return len(i.index) == 1 && i.index[0] >= i.n[0]
}
func skip(f reflect.StructField) bool {
return !f.Anonymous && (f.Name[0] < 'A' || f.Name[0] > 'Z')
}
func (i *fieldIter) next() {
for {
k := len(i.index) - 1
i.index[k]++
if i.index[k] < i.n[k] {
if !skip(i.field()) {
break
}
} else {
if k == 0 {
return
}
i.index = i.index[:k]
i.n = i.n[:k]
}
}
i.descent()
}
func (i *fieldIter) value() reflect.Value {
return i.v.FieldByIndex(i.index)
}
func (i *fieldIter) field() reflect.StructField {
return i.v.Type().FieldByIndex(i.index)
}
type visitor func(v reflect.Value) error
var stopDescent = fmt.Errorf("do not recurse")
func (f visitor) visit(x interface{}) error {
return f.visitRec(reflect.ValueOf(x))
}
// visit recursively calls f on all nodes in v.
func (f visitor) visitRec(v reflect.Value) error {
if v.Kind() == reflect.Ptr {
if v.IsNil() {
return nil
}
return f.visitRec(v.Elem())
}
if err := f(v); err != nil {
if err == stopDescent {
return nil
}
return err
}
switch v.Kind() {
case reflect.Struct:
for i := iter(v); !i.done(); i.next() {
if err := f.visitRec(i.value()); err != nil {
return err
}
}
case reflect.Slice:
for i := 0; i < v.Len(); i++ {
if err := f.visitRec(v.Index(i)); err != nil {
return err
}
}
}
return nil
}
// getPath is used for error reporting purposes only.
func getPath(e Elem) string {
if e == nil {
return "<nil>"
}
if e.enclosing() == nil {
return e.GetCommon().name
}
if e.GetCommon().Type == "" {
return fmt.Sprintf("%s.%s", getPath(e.enclosing()), e.GetCommon().name)
}
return fmt.Sprintf("%s.%s[type=%s]", getPath(e.enclosing()), e.GetCommon().name, e.GetCommon().Type)
}
// xmlName returns the xml name of the element or attribute
func xmlName(f reflect.StructField) (name string, attr bool) {
tags := strings.Split(f.Tag.Get("xml"), ",")
for _, s := range tags {
attr = attr || s == "attr"
}
return tags[0], attr
}
func findField(v reflect.Value, key string) (reflect.Value, error) {
v = reflect.Indirect(v)
for i := iter(v); !i.done(); i.next() {
if n, _ := xmlName(i.field()); n == key {
return i.value(), nil
}
}
return reflect.Value{}, fmt.Errorf("cldr: no field %q in element %#v", key, v.Interface())
}
var xpathPart = regexp.MustCompile(`(\pL+)(?:\[@(\pL+)='([\w-]+)'\])?`)
func walkXPath(e Elem, path string) (res Elem, err error) {
for _, c := range strings.Split(path, "/") {
if c == ".." {
if e = e.enclosing(); e == nil {
panic("path ..")
return nil, fmt.Errorf(`cldr: ".." moves past root in path %q`, path)
}
continue
} else if c == "" {
continue
}
m := xpathPart.FindStringSubmatch(c)
if len(m) == 0 || len(m[0]) != len(c) {
return nil, fmt.Errorf("cldr: syntax error in path component %q", c)
}
v, err := findField(reflect.ValueOf(e), m[1])
if err != nil {
return nil, err
}
switch v.Kind() {
case reflect.Slice:
i := 0
if m[2] != "" || v.Len() > 1 {
if m[2] == "" {
m[2] = "type"
if m[3] = e.GetCommon().Default(); m[3] == "" {
return nil, fmt.Errorf("cldr: type selector or default value needed for element %s", m[1])
}
}
for ; i < v.Len(); i++ {
vi := v.Index(i)
key, err := findField(vi.Elem(), m[2])
if err != nil {
return nil, err
}
key = reflect.Indirect(key)
if key.Kind() == reflect.String && key.String() == m[3] {
break
}
}
}
if i == v.Len() || v.Index(i).IsNil() {
return nil, fmt.Errorf("no %s found with %s==%s", m[1], m[2], m[3])
}
e = v.Index(i).Interface().(Elem)
case reflect.Ptr:
if v.IsNil() {
return nil, fmt.Errorf("cldr: element %q not found within element %q", m[1], e.GetCommon().name)
}
var ok bool
if e, ok = v.Interface().(Elem); !ok {
return nil, fmt.Errorf("cldr: %q is not an XML element", m[1])
} else if m[2] != "" || m[3] != "" {
return nil, fmt.Errorf("cldr: no type selector allowed for element %s", m[1])
}
default:
return nil, fmt.Errorf("cldr: %q is not an XML element", m[1])
}
}
return e, nil
}
const absPrefix = "//ldml/"
func (cldr *CLDR) resolveAlias(e Elem, src, path string) (res Elem, err error) {
if src != "locale" {
if !strings.HasPrefix(path, absPrefix) {
return nil, fmt.Errorf("cldr: expected absolute path, found %q", path)
}
path = path[len(absPrefix):]
if e, err = cldr.resolve(src); err != nil {
return nil, err
}
}
return walkXPath(e, path)
}
func (cldr *CLDR) resolveAndMergeAlias(e Elem) error {
alias := e.GetCommon().Alias
if alias == nil {
return nil
}
a, err := cldr.resolveAlias(e, alias.Source, alias.Path)
if err != nil {
return fmt.Errorf("%v: error evaluating path %q: %v", getPath(e), alias.Path, err)
}
// Ensure alias node was already evaluated. TODO: avoid double evaluation.
err = cldr.resolveAndMergeAlias(a)
v := reflect.ValueOf(e).Elem()
for i := iter(reflect.ValueOf(a).Elem()); !i.done(); i.next() {
if vv := i.value(); vv.Kind() != reflect.Ptr || !vv.IsNil() {
if _, attr := xmlName(i.field()); !attr {
v.FieldByIndex(i.index).Set(vv)
}
}
}
return err
}
func (cldr *CLDR) aliasResolver() visitor {
return func(v reflect.Value) (err error) {
if e, ok := v.Addr().Interface().(Elem); ok {
err = cldr.resolveAndMergeAlias(e)
if err == nil && blocking[e.GetCommon().name] {
return stopDescent
}
}
return err
}
}
// elements within blocking elements do not inherit.
// Taken from CLDR's supplementalMetaData.xml.
var blocking = map[string]bool{
"identity": true,
"supplementalData": true,
"cldrTest": true,
"collation": true,
"transform": true,
}
// Distinguishing attributes affect inheritance; two elements with different
// distinguishing attributes are treated as different for purposes of inheritance,
// except when such attributes occur in the indicated elements.
// Taken from CLDR's supplementalMetaData.xml.
var distinguishing = map[string][]string{
"key": nil,
"request_id": nil,
"id": nil,
"registry": nil,
"alt": nil,
"iso4217": nil,
"iso3166": nil,
"mzone": nil,
"from": nil,
"to": nil,
"type": []string{
"abbreviationFallback",
"default",
"mapping",
"measurementSystem",
"preferenceOrdering",
},
"numberSystem": nil,
}
func in(set []string, s string) bool {
for _, v := range set {
if v == s {
return true
}
}
return false
}
// attrKey computes a key based on the distinguishable attributes of
// an element and it's values.
func attrKey(v reflect.Value, exclude ...string) string {
parts := []string{}
ename := v.Interface().(Elem).GetCommon().name
v = v.Elem()
for i := iter(v); !i.done(); i.next() {
if name, attr := xmlName(i.field()); attr {
if except, ok := distinguishing[name]; ok && !in(exclude, name) && !in(except, ename) {
v := i.value()
if v.Kind() == reflect.Ptr {
v = v.Elem()
}
if v.IsValid() {
parts = append(parts, fmt.Sprintf("%s=%s", name, v.String()))
}
}
}
}
sort.Strings(parts)
return strings.Join(parts, ";")
}
// Key returns a key for e derived from all distinguishing attributes
// except those specified by exclude.
func Key(e Elem, exclude ...string) string {
return attrKey(reflect.ValueOf(e), exclude...)
}
// linkEnclosing sets the enclosing element as well as the name
// for all sub-elements of child, recursively.
func linkEnclosing(parent, child Elem) {
child.setEnclosing(parent)
v := reflect.ValueOf(child).Elem()
for i := iter(v); !i.done(); i.next() {
vf := i.value()
if vf.Kind() == reflect.Slice {
for j := 0; j < vf.Len(); j++ {
linkEnclosing(child, vf.Index(j).Interface().(Elem))
}
} else if vf.Kind() == reflect.Ptr && !vf.IsNil() && vf.Elem().Kind() == reflect.Struct {
linkEnclosing(child, vf.Interface().(Elem))
}
}
}
func setNames(e Elem, name string) {
e.setName(name)
v := reflect.ValueOf(e).Elem()
for i := iter(v); !i.done(); i.next() {
vf := i.value()
name, _ = xmlName(i.field())
if vf.Kind() == reflect.Slice {
for j := 0; j < vf.Len(); j++ {
setNames(vf.Index(j).Interface().(Elem), name)
}
} else if vf.Kind() == reflect.Ptr && !vf.IsNil() && vf.Elem().Kind() == reflect.Struct {
setNames(vf.Interface().(Elem), name)
}
}
}
// deepCopy copies elements of v recursively. All elements of v that may
// be modified by inheritance are explicitly copied.
func deepCopy(v reflect.Value) reflect.Value {
switch v.Kind() {
case reflect.Ptr:
if v.IsNil() || v.Elem().Kind() != reflect.Struct {
return v
}
nv := reflect.New(v.Elem().Type())
nv.Elem().Set(v.Elem())
deepCopyRec(nv.Elem(), v.Elem())
return nv
case reflect.Slice:
nv := reflect.MakeSlice(v.Type(), v.Len(), v.Len())
for i := 0; i < v.Len(); i++ {
deepCopyRec(nv.Index(i), v.Index(i))
}
return nv
}
panic("deepCopy: must be called with pointer or slice")
}
// deepCopyRec is only called by deepCopy.
func deepCopyRec(nv, v reflect.Value) {
if v.Kind() == reflect.Struct {
t := v.Type()
for i := 0; i < v.NumField(); i++ {
if name, attr := xmlName(t.Field(i)); name != "" && !attr {
deepCopyRec(nv.Field(i), v.Field(i))
}
}
} else {
nv.Set(deepCopy(v))
}
}
// newNode is used to insert a missing node during inheritance.
func (cldr *CLDR) newNode(v, enc reflect.Value) reflect.Value {
n := reflect.New(v.Type())
for i := iter(v); !i.done(); i.next() {
if name, attr := xmlName(i.field()); name == "" || attr {
n.Elem().FieldByIndex(i.index).Set(i.value())
}
}
n.Interface().(Elem).GetCommon().setEnclosing(enc.Addr().Interface().(Elem))
return n
}
// v, parent must be pointers to struct
func (cldr *CLDR) inheritFields(v, parent reflect.Value) (res reflect.Value, err error) {
t := v.Type()
nv := reflect.New(t)
nv.Elem().Set(v)
for i := iter(v); !i.done(); i.next() {
vf := i.value()
f := i.field()
name, attr := xmlName(f)
if name == "" || attr {
continue
}
pf := parent.FieldByIndex(i.index)
if blocking[name] {
if vf.IsNil() {
vf = pf
}
nv.Elem().FieldByIndex(i.index).Set(deepCopy(vf))
continue
}
switch f.Type.Kind() {
case reflect.Ptr:
if f.Type.Elem().Kind() == reflect.Struct {
if !vf.IsNil() {
if vf, err = cldr.inheritStructPtr(vf, pf); err != nil {
return reflect.Value{}, err
}
vf.Interface().(Elem).setEnclosing(nv.Interface().(Elem))
nv.Elem().FieldByIndex(i.index).Set(vf)
} else if !pf.IsNil() {
n := cldr.newNode(pf.Elem(), v)
if vf, err = cldr.inheritStructPtr(n, pf); err != nil {
return reflect.Value{}, err
}
vf.Interface().(Elem).setEnclosing(nv.Interface().(Elem))
nv.Elem().FieldByIndex(i.index).Set(vf)
}
}
case reflect.Slice:
vf, err := cldr.inheritSlice(nv.Elem(), vf, pf)
if err != nil {
return reflect.Zero(t), err
}
nv.Elem().FieldByIndex(i.index).Set(vf)
}
}
return nv, nil
}
func root(e Elem) *LDML {
for ; e.enclosing() != nil; e = e.enclosing() {
}
return e.(*LDML)
}
// inheritStructPtr first merges possible aliases in with v and then inherits
// any underspecified elements from parent.
func (cldr *CLDR) inheritStructPtr(v, parent reflect.Value) (r reflect.Value, err error) {
if !v.IsNil() {
e := v.Interface().(Elem).GetCommon()
alias := e.Alias
if alias == nil && !parent.IsNil() {
alias = parent.Interface().(Elem).GetCommon().Alias
}
if alias != nil {
a, err := cldr.resolveAlias(v.Interface().(Elem), alias.Source, alias.Path)
if a != nil {
if v, err = cldr.inheritFields(v.Elem(), reflect.ValueOf(a).Elem()); err != nil {
return reflect.Value{}, err
}
}
}
if !parent.IsNil() {
return cldr.inheritFields(v.Elem(), parent.Elem())
}
} else if parent.IsNil() {
panic("should not reach here")
}
return v, nil
}
// Must be slice of struct pointers.
func (cldr *CLDR) inheritSlice(enc, v, parent reflect.Value) (res reflect.Value, err error) {
t := v.Type()
index := make(map[string]reflect.Value)
if !v.IsNil() {
for i := 0; i < v.Len(); i++ {
vi := v.Index(i)
key := attrKey(vi)
index[key] = vi
}
}
if !parent.IsNil() {
for i := 0; i < parent.Len(); i++ {
vi := parent.Index(i)
key := attrKey(vi)
if w, ok := index[key]; ok {
index[key], err = cldr.inheritStructPtr(w, vi)
} else {
n := cldr.newNode(vi.Elem(), enc)
index[key], err = cldr.inheritStructPtr(n, vi)
}
index[key].Interface().(Elem).setEnclosing(enc.Addr().Interface().(Elem))
if err != nil {
return v, err
}
}
}
keys := make([]string, 0, len(index))
for k, _ := range index {
keys = append(keys, k)
}
sort.Strings(keys)
sl := reflect.MakeSlice(t, len(index), len(index))
for i, k := range keys {
sl.Index(i).Set(index[k])
}
return sl, nil
}
func parentLocale(loc string) string {
parts := strings.Split(loc, "_")
if len(parts) == 1 {
return "root"
}
parts = parts[:len(parts)-1]
key := strings.Join(parts, "_")
return key
}
func (cldr *CLDR) resolve(loc string) (res *LDML, err error) {
if r := cldr.resolved[loc]; r != nil {
return r, nil
}
x := cldr.RawLDML(loc)
if x == nil {
return nil, fmt.Errorf("cldr: unknown locale %q", loc)
}
var v reflect.Value
if loc == "root" {
x = deepCopy(reflect.ValueOf(x)).Interface().(*LDML)
linkEnclosing(nil, x)
err = cldr.aliasResolver().visit(x)
} else {
key := parentLocale(loc)
var parent *LDML
for ; cldr.locale[key] == nil; key = parentLocale(key) {
}
if parent, err = cldr.resolve(key); err != nil {
return nil, err
}
v, err = cldr.inheritFields(reflect.ValueOf(x).Elem(), reflect.ValueOf(parent).Elem())
x = v.Interface().(*LDML)
linkEnclosing(nil, x)
}
if err != nil {
return nil, err
}
cldr.resolved[loc] = x
return x, err
}
// finalize finalizes the initialization of the raw LDML structs. It also
// removed unwanted fields, as specified by filter, so that they will not
// be unnecessarily evaluated.
func (cldr *CLDR) finalize(filter []string) {
for _, x := range cldr.locale {
if filter != nil {
v := reflect.ValueOf(x).Elem()
t := v.Type()
for i := 0; i < v.NumField(); i++ {
f := t.Field(i)
name, _ := xmlName(f)
if name != "" && name != "identity" && !in(filter, name) {
v.Field(i).Set(reflect.Zero(f.Type))
}
}
}
linkEnclosing(nil, x) // for resolving aliases and paths
setNames(x, "ldml")
}
}

@ -1,144 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldr
import (
"fmt"
"reflect"
"sort"
)
// Slice provides utilities for modifying slices of elements.
// It can be wrapped around any slice of which the element type implements
// interface Elem.
type Slice struct {
ptr reflect.Value
typ reflect.Type
}
// Value returns the reflect.Value of the underlying slice.
func (s *Slice) Value() reflect.Value {
return s.ptr.Elem()
}
// MakeSlice wraps a pointer to a slice of Elems.
// It replaces the array pointed to by the slice so that subsequent modifications
// do not alter the data in a CLDR type.
// It panics if an incorrect type is passed.
func MakeSlice(slicePtr interface{}) Slice {
ptr := reflect.ValueOf(slicePtr)
if ptr.Kind() != reflect.Ptr {
panic(fmt.Sprintf("MakeSlice: argument must be pointer to slice, found %v", ptr.Type()))
}
sl := ptr.Elem()
if sl.Kind() != reflect.Slice {
panic(fmt.Sprintf("MakeSlice: argument must point to a slice, found %v", sl.Type()))
}
intf := reflect.TypeOf((*Elem)(nil)).Elem()
if !sl.Type().Elem().Implements(intf) {
panic(fmt.Sprintf("MakeSlice: element type of slice (%v) does not implement Elem", sl.Type().Elem()))
}
nsl := reflect.MakeSlice(sl.Type(), sl.Len(), sl.Len())
reflect.Copy(nsl, sl)
sl.Set(nsl)
return Slice{
ptr: ptr,
typ: sl.Type().Elem().Elem(),
}
}
func (s Slice) indexForAttr(a string) []int {
for i := iter(reflect.Zero(s.typ)); !i.done(); i.next() {
if n, _ := xmlName(i.field()); n == a {
return i.index
}
}
panic(fmt.Sprintf("MakeSlice: no attribute %q for type %v", a, s.typ))
}
// Filter filters s to only include elements for which fn returns true.
func (s Slice) Filter(fn func(e Elem) bool) {
k := 0
sl := s.Value()
for i := 0; i < sl.Len(); i++ {
vi := sl.Index(i)
if fn(vi.Interface().(Elem)) {
sl.Index(k).Set(vi)
k++
}
}
sl.Set(sl.Slice(0, k))
}
// Group finds elements in s for which fn returns the same value and groups
// them in a new Slice.
func (s Slice) Group(fn func(e Elem) string) []Slice {
m := make(map[string][]reflect.Value)
sl := s.Value()
for i := 0; i < sl.Len(); i++ {
vi := sl.Index(i)
key := fn(vi.Interface().(Elem))
m[key] = append(m[key], vi)
}
keys := []string{}
for k, _ := range m {
keys = append(keys, k)
}
sort.Strings(keys)
res := []Slice{}
for _, k := range keys {
nsl := reflect.New(sl.Type())
nsl.Elem().Set(reflect.Append(nsl.Elem(), m[k]...))
res = append(res, MakeSlice(nsl.Interface()))
}
return res
}
// SelectAnyOf filters s to contain only elements for which attr matches
// any of the values.
func (s Slice) SelectAnyOf(attr string, values ...string) {
index := s.indexForAttr(attr)
s.Filter(func(e Elem) bool {
vf := reflect.ValueOf(e).Elem().FieldByIndex(index)
return in(values, vf.String())
})
}
// SelectOnePerGroup filters s to include at most one element e per group of
// elements matching Key(attr), where e has an attribute a that matches any
// the values in v.
// If more than one element in a group matches a value in v preference
// is given to the element that matches the first value in v.
func (s Slice) SelectOnePerGroup(a string, v []string) {
index := s.indexForAttr(a)
grouped := s.Group(func(e Elem) string { return Key(e, a) })
sl := s.Value()
sl.Set(sl.Slice(0, 0))
for _, g := range grouped {
e := reflect.Value{}
found := len(v)
gsl := g.Value()
for i := 0; i < gsl.Len(); i++ {
vi := gsl.Index(i).Elem().FieldByIndex(index)
j := 0
for ; j < len(v) && v[j] != vi.String(); j++ {
}
if j < found {
found = j
e = gsl.Index(i)
}
}
if found < len(v) {
sl.Set(reflect.Append(sl, e))
}
}
}
// SelectDraft drops all elements from the list with a draft level smaller than d
// and selects the highest draft level of the remaining.
// This method assumes that the input CLDR is canonicalized.
func (s Slice) SelectDraft(d Draft) {
s.SelectOnePerGroup("draft", drafts[len(drafts)-2-int(d):])
}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save