// Copyright 2014 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package cases import ( "golang.org/x/text/transform" ) // A context is used for iterating over source bytes, fetching case info and // writing to a destination buffer. // // Casing operations may need more than one rune of context to decide how a rune // should be cased. Casing implementations should call checkpoint on context // whenever it is known to be safe to return the runes processed so far. // // It is recommended for implementations to not allow for more than 30 case // ignorables as lookahead (analogous to the limit in norm) and to use state if // unbounded lookahead is needed for cased runes. type context struct { dst, src []byte atEOF bool pDst int // pDst points past the last written rune in dst. pSrc int // pSrc points to the start of the currently scanned rune. // checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc. nDst, nSrc int err error sz int // size of current rune info info // case information of currently scanned rune // State preserved across calls to Transform. isMidWord bool // false if next cased letter needs to be title-cased. } func (c *context) Reset() { c.isMidWord = false } // ret returns the return values for the Transform method. It checks whether // there were insufficient bytes in src to complete and introduces an error // accordingly, if necessary. func (c *context) ret() (nDst, nSrc int, err error) { if c.err != nil || c.nSrc == len(c.src) { return c.nDst, c.nSrc, c.err } // This point is only reached by mappers if there was no short destination // buffer. This means that the source buffer was exhausted and that c.sz was // set to 0 by next. if c.atEOF && c.pSrc == len(c.src) { return c.pDst, c.pSrc, nil } return c.nDst, c.nSrc, transform.ErrShortSrc } // checkpoint sets the return value buffer points for Transform to the current // positions. func (c *context) checkpoint() { if c.err == nil { c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz } } // unreadRune causes the last rune read by next to be reread on the next // invocation of next. Only one unreadRune may be called after a call to next. func (c *context) unreadRune() { c.sz = 0 } func (c *context) next() bool { c.pSrc += c.sz if c.pSrc == len(c.src) || c.err != nil { c.info, c.sz = 0, 0 return false } v, sz := trie.lookup(c.src[c.pSrc:]) c.info, c.sz = info(v), sz if c.sz == 0 { if c.atEOF { // A zero size means we have an incomplete rune. If we are atEOF, // this means it is an illegal rune, which we will consume one // byte at a time. c.sz = 1 } else { c.err = transform.ErrShortSrc return false } } return true } // writeBytes adds bytes to dst. func (c *context) writeBytes(b []byte) bool { if len(c.dst)-c.pDst < len(b) { c.err = transform.ErrShortDst return false } // This loop is faster than using copy. for _, ch := range b { c.dst[c.pDst] = ch c.pDst++ } return true } // writeString writes the given string to dst. func (c *context) writeString(s string) bool { if len(c.dst)-c.pDst < len(s) { c.err = transform.ErrShortDst return false } // This loop is faster than using copy. for i := 0; i < len(s); i++ { c.dst[c.pDst] = s[i] c.pDst++ } return true } // copy writes the current rune to dst. func (c *context) copy() bool { return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz]) } // copyXOR copies the current rune to dst and modifies it by applying the XOR // pattern of the case info. It is the responsibility of the caller to ensure // that this is a rune with a XOR pattern defined. func (c *context) copyXOR() bool { if !c.copy() { return false } if c.info&xorIndexBit == 0 { // Fast path for 6-bit XOR pattern, which covers most cases. c.dst[c.pDst-1] ^= byte(c.info >> xorShift) } else { // Interpret XOR bits as an index. // TODO: test performance for unrolling this loop. Verify that we have // at least two bytes and at most three. idx := c.info >> xorShift for p := c.pDst - 1; ; p-- { c.dst[p] ^= xorData[idx] idx-- if xorData[idx] == 0 { break } } } return true } // hasPrefix returns true if src[pSrc:] starts with the given string. func (c *context) hasPrefix(s string) bool { b := c.src[c.pSrc:] if len(b) < len(s) { return false } for i, c := range b[:len(s)] { if c != s[i] { return false } } return true } // caseType returns an info with only the case bits, normalized to either // cLower, cUpper, cTitle or cUncased. func (c *context) caseType() info { cm := c.info & 0x7 if cm < 4 { return cm } if cm >= cXORCase { // xor the last bit of the rune with the case type bits. b := c.src[c.pSrc+c.sz-1] return info(b&1) ^ cm&0x3 } if cm == cIgnorableCased { return cLower } return cUncased } // lower writes the lowercase version of the current rune to dst. func lower(c *context) bool { ct := c.caseType() if c.info&hasMappingMask == 0 || ct == cLower { return c.copy() } if c.info&exceptionBit == 0 { return c.copyXOR() } e := exceptions[c.info>>exceptionShift:] offset := 2 + e[0]&lengthMask // size of header + fold string if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange { return c.writeString(e[offset : offset+nLower]) } return c.copy() } // upper writes the uppercase version of the current rune to dst. func upper(c *context) bool { ct := c.caseType() if c.info&hasMappingMask == 0 || ct == cUpper { return c.copy() } if c.info&exceptionBit == 0 { return c.copyXOR() } e := exceptions[c.info>>exceptionShift:] offset := 2 + e[0]&lengthMask // size of header + fold string // Get length of first special case mapping. n := (e[1] >> lengthBits) & lengthMask if ct == cTitle { // The first special case mapping is for lower. Set n to the second. if n == noChange { n = 0 } n, e = e[1]&lengthMask, e[n:] } if n != noChange { return c.writeString(e[offset : offset+n]) } return c.copy() } // title writes the title case version of the current rune to dst. func title(c *context) bool { ct := c.caseType() if c.info&hasMappingMask == 0 || ct == cTitle { return c.copy() } if c.info&exceptionBit == 0 { if ct == cLower { return c.copyXOR() } return c.copy() } // Get the exception data. e := exceptions[c.info>>exceptionShift:] offset := 2 + e[0]&lengthMask // size of header + fold string nFirst := (e[1] >> lengthBits) & lengthMask if nTitle := e[1] & lengthMask; nTitle != noChange { if nFirst != noChange { e = e[nFirst:] } return c.writeString(e[offset : offset+nTitle]) } if ct == cLower && nFirst != noChange { // Use the uppercase version instead. return c.writeString(e[offset : offset+nFirst]) } // Already in correct case. return c.copy() } // foldFull writes the foldFull version of the current rune to dst. func foldFull(c *context) bool { if c.info&hasMappingMask == 0 { return c.copy() } ct := c.caseType() if c.info&exceptionBit == 0 { if ct != cLower || c.info&inverseFoldBit != 0 { return c.copyXOR() } return c.copy() } e := exceptions[c.info>>exceptionShift:] n := e[0] & lengthMask if n == 0 { if ct == cLower { return c.copy() } n = (e[1] >> lengthBits) & lengthMask } return c.writeString(e[2 : 2+n]) }