| // Copyright 2014 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package cases |
| |
| import "golang.org/x/text/transform" |
| |
| // A context is used for iterating over source bytes, fetching case info and |
| // writing to a destination buffer. |
| // |
| // Casing operations may need more than one rune of context to decide how a rune |
| // should be cased. Casing implementations should call checkpoint on context |
| // whenever it is known to be safe to return the runes processed so far. |
| // |
| // It is recommended for implementations to not allow for more than 30 case |
| // ignorables as lookahead (analogous to the limit in norm) and to use state if |
| // unbounded lookahead is needed for cased runes. |
| type context struct { |
| dst, src []byte |
| atEOF bool |
| |
| pDst int // pDst points past the last written rune in dst. |
| pSrc int // pSrc points to the start of the currently scanned rune. |
| |
| // checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc. |
| nDst, nSrc int |
| err error |
| |
| sz int // size of current rune |
| info info // case information of currently scanned rune |
| |
| // State preserved across calls to Transform. |
| isMidWord bool // false if next cased letter needs to be title-cased. |
| } |
| |
| func (c *context) Reset() { |
| c.isMidWord = false |
| } |
| |
| // ret returns the return values for the Transform method. It checks whether |
| // there were insufficient bytes in src to complete and introduces an error |
| // accordingly, if necessary. |
| func (c *context) ret() (nDst, nSrc int, err error) { |
| if c.err != nil || c.nSrc == len(c.src) { |
| return c.nDst, c.nSrc, c.err |
| } |
| // This point is only reached by mappers if there was no short destination |
| // buffer. This means that the source buffer was exhausted and that c.sz was |
| // set to 0 by next. |
| if c.atEOF && c.pSrc == len(c.src) { |
| return c.pDst, c.pSrc, nil |
| } |
| return c.nDst, c.nSrc, transform.ErrShortSrc |
| } |
| |
| // retSpan returns the return values for the Span method. It checks whether |
| // there were insufficient bytes in src to complete and introduces an error |
| // accordingly, if necessary. |
| func (c *context) retSpan() (n int, err error) { |
| _, nSrc, err := c.ret() |
| return nSrc, err |
| } |
| |
| // checkpoint sets the return value buffer points for Transform to the current |
| // positions. |
| func (c *context) checkpoint() { |
| if c.err == nil { |
| c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz |
| } |
| } |
| |
| // unreadRune causes the last rune read by next to be reread on the next |
| // invocation of next. Only one unreadRune may be called after a call to next. |
| func (c *context) unreadRune() { |
| c.sz = 0 |
| } |
| |
| func (c *context) next() bool { |
| c.pSrc += c.sz |
| if c.pSrc == len(c.src) || c.err != nil { |
| c.info, c.sz = 0, 0 |
| return false |
| } |
| v, sz := trie.lookup(c.src[c.pSrc:]) |
| c.info, c.sz = info(v), sz |
| if c.sz == 0 { |
| if c.atEOF { |
| // A zero size means we have an incomplete rune. If we are atEOF, |
| // this means it is an illegal rune, which we will consume one |
| // byte at a time. |
| c.sz = 1 |
| } else { |
| c.err = transform.ErrShortSrc |
| return false |
| } |
| } |
| return true |
| } |
| |
| // writeBytes adds bytes to dst. |
| func (c *context) writeBytes(b []byte) bool { |
| if len(c.dst)-c.pDst < len(b) { |
| c.err = transform.ErrShortDst |
| return false |
| } |
| // This loop is faster than using copy. |
| for _, ch := range b { |
| c.dst[c.pDst] = ch |
| c.pDst++ |
| } |
| return true |
| } |
| |
| // writeString writes the given string to dst. |
| func (c *context) writeString(s string) bool { |
| if len(c.dst)-c.pDst < len(s) { |
| c.err = transform.ErrShortDst |
| return false |
| } |
| // This loop is faster than using copy. |
| for i := 0; i < len(s); i++ { |
| c.dst[c.pDst] = s[i] |
| c.pDst++ |
| } |
| return true |
| } |
| |
| // copy writes the current rune to dst. |
| func (c *context) copy() bool { |
| return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz]) |
| } |
| |
| // copyXOR copies the current rune to dst and modifies it by applying the XOR |
| // pattern of the case info. It is the responsibility of the caller to ensure |
| // that this is a rune with a XOR pattern defined. |
| func (c *context) copyXOR() bool { |
| if !c.copy() { |
| return false |
| } |
| if c.info&xorIndexBit == 0 { |
| // Fast path for 6-bit XOR pattern, which covers most cases. |
| c.dst[c.pDst-1] ^= byte(c.info >> xorShift) |
| } else { |
| // Interpret XOR bits as an index. |
| // TODO: test performance for unrolling this loop. Verify that we have |
| // at least two bytes and at most three. |
| idx := c.info >> xorShift |
| for p := c.pDst - 1; ; p-- { |
| c.dst[p] ^= xorData[idx] |
| idx-- |
| if xorData[idx] == 0 { |
| break |
| } |
| } |
| } |
| return true |
| } |
| |
| // hasPrefix returns true if src[pSrc:] starts with the given string. |
| func (c *context) hasPrefix(s string) bool { |
| b := c.src[c.pSrc:] |
| if len(b) < len(s) { |
| return false |
| } |
| for i, c := range b[:len(s)] { |
| if c != s[i] { |
| return false |
| } |
| } |
| return true |
| } |
| |
| // caseType returns an info with only the case bits, normalized to either |
| // cLower, cUpper, cTitle or cUncased. |
| func (c *context) caseType() info { |
| cm := c.info & 0x7 |
| if cm < 4 { |
| return cm |
| } |
| if cm >= cXORCase { |
| // xor the last bit of the rune with the case type bits. |
| b := c.src[c.pSrc+c.sz-1] |
| return info(b&1) ^ cm&0x3 |
| } |
| if cm == cIgnorableCased { |
| return cLower |
| } |
| return cUncased |
| } |
| |
| // lower writes the lowercase version of the current rune to dst. |
| func lower(c *context) bool { |
| ct := c.caseType() |
| if c.info&hasMappingMask == 0 || ct == cLower { |
| return c.copy() |
| } |
| if c.info&exceptionBit == 0 { |
| return c.copyXOR() |
| } |
| e := exceptions[c.info>>exceptionShift:] |
| offset := 2 + e[0]&lengthMask // size of header + fold string |
| if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange { |
| return c.writeString(e[offset : offset+nLower]) |
| } |
| return c.copy() |
| } |
| |
| func isLower(c *context) bool { |
| ct := c.caseType() |
| if c.info&hasMappingMask == 0 || ct == cLower { |
| return true |
| } |
| if c.info&exceptionBit == 0 { |
| c.err = transform.ErrEndOfSpan |
| return false |
| } |
| e := exceptions[c.info>>exceptionShift:] |
| if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange { |
| c.err = transform.ErrEndOfSpan |
| return false |
| } |
| return true |
| } |
| |
| // upper writes the uppercase version of the current rune to dst. |
| func upper(c *context) bool { |
| ct := c.caseType() |
| if c.info&hasMappingMask == 0 || ct == cUpper { |
| return c.copy() |
| } |
| if c.info&exceptionBit == 0 { |
| return c.copyXOR() |
| } |
| e := exceptions[c.info>>exceptionShift:] |
| offset := 2 + e[0]&lengthMask // size of header + fold string |
| // Get length of first special case mapping. |
| n := (e[1] >> lengthBits) & lengthMask |
| if ct == cTitle { |
| // The first special case mapping is for lower. Set n to the second. |
| if n == noChange { |
| n = 0 |
| } |
| n, e = e[1]&lengthMask, e[n:] |
| } |
| if n != noChange { |
| return c.writeString(e[offset : offset+n]) |
| } |
| return c.copy() |
| } |
| |
| // isUpper writes the isUppercase version of the current rune to dst. |
| func isUpper(c *context) bool { |
| ct := c.caseType() |
| if c.info&hasMappingMask == 0 || ct == cUpper { |
| return true |
| } |
| if c.info&exceptionBit == 0 { |
| c.err = transform.ErrEndOfSpan |
| return false |
| } |
| e := exceptions[c.info>>exceptionShift:] |
| // Get length of first special case mapping. |
| n := (e[1] >> lengthBits) & lengthMask |
| if ct == cTitle { |
| n = e[1] & lengthMask |
| } |
| if n != noChange { |
| c.err = transform.ErrEndOfSpan |
| return false |
| } |
| return true |
| } |
| |
| // title writes the title case version of the current rune to dst. |
| func title(c *context) bool { |
| ct := c.caseType() |
| if c.info&hasMappingMask == 0 || ct == cTitle { |
| return c.copy() |
| } |
| if c.info&exceptionBit == 0 { |
| if ct == cLower { |
| return c.copyXOR() |
| } |
| return c.copy() |
| } |
| // Get the exception data. |
| e := exceptions[c.info>>exceptionShift:] |
| offset := 2 + e[0]&lengthMask // size of header + fold string |
| |
| nFirst := (e[1] >> lengthBits) & lengthMask |
| if nTitle := e[1] & lengthMask; nTitle != noChange { |
| if nFirst != noChange { |
| e = e[nFirst:] |
| } |
| return c.writeString(e[offset : offset+nTitle]) |
| } |
| if ct == cLower && nFirst != noChange { |
| // Use the uppercase version instead. |
| return c.writeString(e[offset : offset+nFirst]) |
| } |
| // Already in correct case. |
| return c.copy() |
| } |
| |
| // isTitle reports whether the current rune is in title case. |
| func isTitle(c *context) bool { |
| ct := c.caseType() |
| if c.info&hasMappingMask == 0 || ct == cTitle { |
| return true |
| } |
| if c.info&exceptionBit == 0 { |
| if ct == cLower { |
| c.err = transform.ErrEndOfSpan |
| return false |
| } |
| return true |
| } |
| // Get the exception data. |
| e := exceptions[c.info>>exceptionShift:] |
| if nTitle := e[1] & lengthMask; nTitle != noChange { |
| c.err = transform.ErrEndOfSpan |
| return false |
| } |
| nFirst := (e[1] >> lengthBits) & lengthMask |
| if ct == cLower && nFirst != noChange { |
| c.err = transform.ErrEndOfSpan |
| return false |
| } |
| return true |
| } |
| |
| // foldFull writes the foldFull version of the current rune to dst. |
| func foldFull(c *context) bool { |
| if c.info&hasMappingMask == 0 { |
| return c.copy() |
| } |
| ct := c.caseType() |
| if c.info&exceptionBit == 0 { |
| if ct != cLower || c.info&inverseFoldBit != 0 { |
| return c.copyXOR() |
| } |
| return c.copy() |
| } |
| e := exceptions[c.info>>exceptionShift:] |
| n := e[0] & lengthMask |
| if n == 0 { |
| if ct == cLower { |
| return c.copy() |
| } |
| n = (e[1] >> lengthBits) & lengthMask |
| } |
| return c.writeString(e[2 : 2+n]) |
| } |
| |
| // isFoldFull reports whether the current run is mapped to foldFull |
| func isFoldFull(c *context) bool { |
| if c.info&hasMappingMask == 0 { |
| return true |
| } |
| ct := c.caseType() |
| if c.info&exceptionBit == 0 { |
| if ct != cLower || c.info&inverseFoldBit != 0 { |
| c.err = transform.ErrEndOfSpan |
| return false |
| } |
| return true |
| } |
| e := exceptions[c.info>>exceptionShift:] |
| n := e[0] & lengthMask |
| if n == 0 && ct == cLower { |
| return true |
| } |
| c.err = transform.ErrEndOfSpan |
| return false |
| } |