|  | // Copyright 2009 The Go Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style | 
|  | // license that can be found in the LICENSE file. | 
|  |  | 
|  | // Package bytes implements functions for the manipulation of byte slices. | 
|  | // It is analogous to the facilities of the strings package. | 
|  | package bytes | 
|  |  | 
|  | import ( | 
|  | "internal/bytealg" | 
|  | "unicode" | 
|  | "unicode/utf8" | 
|  | ) | 
|  |  | 
|  | // Equal reports whether a and b | 
|  | // are the same length and contain the same bytes. | 
|  | // A nil argument is equivalent to an empty slice. | 
|  | func Equal(a, b []byte) bool { | 
|  | // Neither cmd/compile nor gccgo allocates for these string conversions. | 
|  | return string(a) == string(b) | 
|  | } | 
|  |  | 
|  | // Compare returns an integer comparing two byte slices lexicographically. | 
|  | // The result will be 0 if a==b, -1 if a < b, and +1 if a > b. | 
|  | // A nil argument is equivalent to an empty slice. | 
|  | func Compare(a, b []byte) int { | 
|  | return bytealg.Compare(a, b) | 
|  | } | 
|  |  | 
|  | // explode splits s into a slice of UTF-8 sequences, one per Unicode code point (still slices of bytes), | 
|  | // up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes. | 
|  | func explode(s []byte, n int) [][]byte { | 
|  | if n <= 0 { | 
|  | n = len(s) | 
|  | } | 
|  | a := make([][]byte, n) | 
|  | var size int | 
|  | na := 0 | 
|  | for len(s) > 0 { | 
|  | if na+1 >= n { | 
|  | a[na] = s | 
|  | na++ | 
|  | break | 
|  | } | 
|  | _, size = utf8.DecodeRune(s) | 
|  | a[na] = s[0:size:size] | 
|  | s = s[size:] | 
|  | na++ | 
|  | } | 
|  | return a[0:na] | 
|  | } | 
|  |  | 
|  | // Count counts the number of non-overlapping instances of sep in s. | 
|  | // If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s. | 
|  | func Count(s, sep []byte) int { | 
|  | // special case | 
|  | if len(sep) == 0 { | 
|  | return utf8.RuneCount(s) + 1 | 
|  | } | 
|  | if len(sep) == 1 { | 
|  | return bytealg.Count(s, sep[0]) | 
|  | } | 
|  | n := 0 | 
|  | for { | 
|  | i := Index(s, sep) | 
|  | if i == -1 { | 
|  | return n | 
|  | } | 
|  | n++ | 
|  | s = s[i+len(sep):] | 
|  | } | 
|  | } | 
|  |  | 
|  | // Contains reports whether subslice is within b. | 
|  | func Contains(b, subslice []byte) bool { | 
|  | return Index(b, subslice) != -1 | 
|  | } | 
|  |  | 
|  | // ContainsAny reports whether any of the UTF-8-encoded code points in chars are within b. | 
|  | func ContainsAny(b []byte, chars string) bool { | 
|  | return IndexAny(b, chars) >= 0 | 
|  | } | 
|  |  | 
|  | // ContainsRune reports whether the rune is contained in the UTF-8-encoded byte slice b. | 
|  | func ContainsRune(b []byte, r rune) bool { | 
|  | return IndexRune(b, r) >= 0 | 
|  | } | 
|  |  | 
|  | // IndexByte returns the index of the first instance of c in b, or -1 if c is not present in b. | 
|  | func IndexByte(b []byte, c byte) int { | 
|  | return bytealg.IndexByte(b, c) | 
|  | } | 
|  |  | 
|  | func indexBytePortable(s []byte, c byte) int { | 
|  | for i, b := range s { | 
|  | if b == c { | 
|  | return i | 
|  | } | 
|  | } | 
|  | return -1 | 
|  | } | 
|  |  | 
|  | // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s. | 
|  | func LastIndex(s, sep []byte) int { | 
|  | n := len(sep) | 
|  | switch { | 
|  | case n == 0: | 
|  | return len(s) | 
|  | case n == 1: | 
|  | return LastIndexByte(s, sep[0]) | 
|  | case n == len(s): | 
|  | if Equal(s, sep) { | 
|  | return 0 | 
|  | } | 
|  | return -1 | 
|  | case n > len(s): | 
|  | return -1 | 
|  | } | 
|  | // Rabin-Karp search from the end of the string | 
|  | hashss, pow := hashStrRev(sep) | 
|  | last := len(s) - n | 
|  | var h uint32 | 
|  | for i := len(s) - 1; i >= last; i-- { | 
|  | h = h*primeRK + uint32(s[i]) | 
|  | } | 
|  | if h == hashss && Equal(s[last:], sep) { | 
|  | return last | 
|  | } | 
|  | for i := last - 1; i >= 0; i-- { | 
|  | h *= primeRK | 
|  | h += uint32(s[i]) | 
|  | h -= pow * uint32(s[i+n]) | 
|  | if h == hashss && Equal(s[i:i+n], sep) { | 
|  | return i | 
|  | } | 
|  | } | 
|  | return -1 | 
|  | } | 
|  |  | 
|  | // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s. | 
|  | func LastIndexByte(s []byte, c byte) int { | 
|  | for i := len(s) - 1; i >= 0; i-- { | 
|  | if s[i] == c { | 
|  | return i | 
|  | } | 
|  | } | 
|  | return -1 | 
|  | } | 
|  |  | 
|  | // IndexRune interprets s as a sequence of UTF-8-encoded code points. | 
|  | // It returns the byte index of the first occurrence in s of the given rune. | 
|  | // It returns -1 if rune is not present in s. | 
|  | // If r is utf8.RuneError, it returns the first instance of any | 
|  | // invalid UTF-8 byte sequence. | 
|  | func IndexRune(s []byte, r rune) int { | 
|  | switch { | 
|  | case 0 <= r && r < utf8.RuneSelf: | 
|  | return IndexByte(s, byte(r)) | 
|  | case r == utf8.RuneError: | 
|  | for i := 0; i < len(s); { | 
|  | r1, n := utf8.DecodeRune(s[i:]) | 
|  | if r1 == utf8.RuneError { | 
|  | return i | 
|  | } | 
|  | i += n | 
|  | } | 
|  | return -1 | 
|  | case !utf8.ValidRune(r): | 
|  | return -1 | 
|  | default: | 
|  | var b [utf8.UTFMax]byte | 
|  | n := utf8.EncodeRune(b[:], r) | 
|  | return Index(s, b[:n]) | 
|  | } | 
|  | } | 
|  |  | 
|  | // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points. | 
|  | // It returns the byte index of the first occurrence in s of any of the Unicode | 
|  | // code points in chars. It returns -1 if chars is empty or if there is no code | 
|  | // point in common. | 
|  | func IndexAny(s []byte, chars string) int { | 
|  | if chars == "" { | 
|  | // Avoid scanning all of s. | 
|  | return -1 | 
|  | } | 
|  | if len(s) > 8 { | 
|  | if as, isASCII := makeASCIISet(chars); isASCII { | 
|  | for i, c := range s { | 
|  | if as.contains(c) { | 
|  | return i | 
|  | } | 
|  | } | 
|  | return -1 | 
|  | } | 
|  | } | 
|  | var width int | 
|  | for i := 0; i < len(s); i += width { | 
|  | r := rune(s[i]) | 
|  | if r < utf8.RuneSelf { | 
|  | width = 1 | 
|  | } else { | 
|  | r, width = utf8.DecodeRune(s[i:]) | 
|  | } | 
|  | for _, ch := range chars { | 
|  | if r == ch { | 
|  | return i | 
|  | } | 
|  | } | 
|  | } | 
|  | return -1 | 
|  | } | 
|  |  | 
|  | // LastIndexAny interprets s as a sequence of UTF-8-encoded Unicode code | 
|  | // points. It returns the byte index of the last occurrence in s of any of | 
|  | // the Unicode code points in chars. It returns -1 if chars is empty or if | 
|  | // there is no code point in common. | 
|  | func LastIndexAny(s []byte, chars string) int { | 
|  | if chars == "" { | 
|  | // Avoid scanning all of s. | 
|  | return -1 | 
|  | } | 
|  | if len(s) > 8 { | 
|  | if as, isASCII := makeASCIISet(chars); isASCII { | 
|  | for i := len(s) - 1; i >= 0; i-- { | 
|  | if as.contains(s[i]) { | 
|  | return i | 
|  | } | 
|  | } | 
|  | return -1 | 
|  | } | 
|  | } | 
|  | for i := len(s); i > 0; { | 
|  | r, size := utf8.DecodeLastRune(s[:i]) | 
|  | i -= size | 
|  | for _, c := range chars { | 
|  | if r == c { | 
|  | return i | 
|  | } | 
|  | } | 
|  | } | 
|  | return -1 | 
|  | } | 
|  |  | 
|  | // Generic split: splits after each instance of sep, | 
|  | // including sepSave bytes of sep in the subslices. | 
|  | func genSplit(s, sep []byte, sepSave, n int) [][]byte { | 
|  | if n == 0 { | 
|  | return nil | 
|  | } | 
|  | if len(sep) == 0 { | 
|  | return explode(s, n) | 
|  | } | 
|  | if n < 0 { | 
|  | n = Count(s, sep) + 1 | 
|  | } | 
|  |  | 
|  | a := make([][]byte, n) | 
|  | n-- | 
|  | i := 0 | 
|  | for i < n { | 
|  | m := Index(s, sep) | 
|  | if m < 0 { | 
|  | break | 
|  | } | 
|  | a[i] = s[: m+sepSave : m+sepSave] | 
|  | s = s[m+len(sep):] | 
|  | i++ | 
|  | } | 
|  | a[i] = s | 
|  | return a[:i+1] | 
|  | } | 
|  |  | 
|  | // SplitN slices s into subslices separated by sep and returns a slice of | 
|  | // the subslices between those separators. | 
|  | // If sep is empty, SplitN splits after each UTF-8 sequence. | 
|  | // The count determines the number of subslices to return: | 
|  | //   n > 0: at most n subslices; the last subslice will be the unsplit remainder. | 
|  | //   n == 0: the result is nil (zero subslices) | 
|  | //   n < 0: all subslices | 
|  | func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) } | 
|  |  | 
|  | // SplitAfterN slices s into subslices after each instance of sep and | 
|  | // returns a slice of those subslices. | 
|  | // If sep is empty, SplitAfterN splits after each UTF-8 sequence. | 
|  | // The count determines the number of subslices to return: | 
|  | //   n > 0: at most n subslices; the last subslice will be the unsplit remainder. | 
|  | //   n == 0: the result is nil (zero subslices) | 
|  | //   n < 0: all subslices | 
|  | func SplitAfterN(s, sep []byte, n int) [][]byte { | 
|  | return genSplit(s, sep, len(sep), n) | 
|  | } | 
|  |  | 
|  | // Split slices s into all subslices separated by sep and returns a slice of | 
|  | // the subslices between those separators. | 
|  | // If sep is empty, Split splits after each UTF-8 sequence. | 
|  | // It is equivalent to SplitN with a count of -1. | 
|  | func Split(s, sep []byte) [][]byte { return genSplit(s, sep, 0, -1) } | 
|  |  | 
|  | // SplitAfter slices s into all subslices after each instance of sep and | 
|  | // returns a slice of those subslices. | 
|  | // If sep is empty, SplitAfter splits after each UTF-8 sequence. | 
|  | // It is equivalent to SplitAfterN with a count of -1. | 
|  | func SplitAfter(s, sep []byte) [][]byte { | 
|  | return genSplit(s, sep, len(sep), -1) | 
|  | } | 
|  |  | 
|  | var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} | 
|  |  | 
|  | // Fields interprets s as a sequence of UTF-8-encoded code points. | 
|  | // It splits the slice s around each instance of one or more consecutive white space | 
|  | // characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an | 
|  | // empty slice if s contains only white space. | 
|  | func Fields(s []byte) [][]byte { | 
|  | // First count the fields. | 
|  | // This is an exact count if s is ASCII, otherwise it is an approximation. | 
|  | n := 0 | 
|  | wasSpace := 1 | 
|  | // setBits is used to track which bits are set in the bytes of s. | 
|  | setBits := uint8(0) | 
|  | for i := 0; i < len(s); i++ { | 
|  | r := s[i] | 
|  | setBits |= r | 
|  | isSpace := int(asciiSpace[r]) | 
|  | n += wasSpace & ^isSpace | 
|  | wasSpace = isSpace | 
|  | } | 
|  |  | 
|  | if setBits >= utf8.RuneSelf { | 
|  | // Some runes in the input slice are not ASCII. | 
|  | return FieldsFunc(s, unicode.IsSpace) | 
|  | } | 
|  |  | 
|  | // ASCII fast path | 
|  | a := make([][]byte, n) | 
|  | na := 0 | 
|  | fieldStart := 0 | 
|  | i := 0 | 
|  | // Skip spaces in the front of the input. | 
|  | for i < len(s) && asciiSpace[s[i]] != 0 { | 
|  | i++ | 
|  | } | 
|  | fieldStart = i | 
|  | for i < len(s) { | 
|  | if asciiSpace[s[i]] == 0 { | 
|  | i++ | 
|  | continue | 
|  | } | 
|  | a[na] = s[fieldStart:i:i] | 
|  | na++ | 
|  | i++ | 
|  | // Skip spaces in between fields. | 
|  | for i < len(s) && asciiSpace[s[i]] != 0 { | 
|  | i++ | 
|  | } | 
|  | fieldStart = i | 
|  | } | 
|  | if fieldStart < len(s) { // Last field might end at EOF. | 
|  | a[na] = s[fieldStart:len(s):len(s)] | 
|  | } | 
|  | return a | 
|  | } | 
|  |  | 
|  | // FieldsFunc interprets s as a sequence of UTF-8-encoded code points. | 
|  | // It splits the slice s at each run of code points c satisfying f(c) and | 
|  | // returns a slice of subslices of s. If all code points in s satisfy f(c), or | 
|  | // len(s) == 0, an empty slice is returned. | 
|  | // FieldsFunc makes no guarantees about the order in which it calls f(c). | 
|  | // If f does not return consistent results for a given c, FieldsFunc may crash. | 
|  | func FieldsFunc(s []byte, f func(rune) bool) [][]byte { | 
|  | // A span is used to record a slice of s of the form s[start:end]. | 
|  | // The start index is inclusive and the end index is exclusive. | 
|  | type span struct { | 
|  | start int | 
|  | end   int | 
|  | } | 
|  | spans := make([]span, 0, 32) | 
|  |  | 
|  | // Find the field start and end indices. | 
|  | wasField := false | 
|  | fromIndex := 0 | 
|  | for i := 0; i < len(s); { | 
|  | size := 1 | 
|  | r := rune(s[i]) | 
|  | if r >= utf8.RuneSelf { | 
|  | r, size = utf8.DecodeRune(s[i:]) | 
|  | } | 
|  | if f(r) { | 
|  | if wasField { | 
|  | spans = append(spans, span{start: fromIndex, end: i}) | 
|  | wasField = false | 
|  | } | 
|  | } else { | 
|  | if !wasField { | 
|  | fromIndex = i | 
|  | wasField = true | 
|  | } | 
|  | } | 
|  | i += size | 
|  | } | 
|  |  | 
|  | // Last field might end at EOF. | 
|  | if wasField { | 
|  | spans = append(spans, span{fromIndex, len(s)}) | 
|  | } | 
|  |  | 
|  | // Create subslices from recorded field indices. | 
|  | a := make([][]byte, len(spans)) | 
|  | for i, span := range spans { | 
|  | a[i] = s[span.start:span.end:span.end] | 
|  | } | 
|  |  | 
|  | return a | 
|  | } | 
|  |  | 
|  | // Join concatenates the elements of s to create a new byte slice. The separator | 
|  | // sep is placed between elements in the resulting slice. | 
|  | func Join(s [][]byte, sep []byte) []byte { | 
|  | if len(s) == 0 { | 
|  | return []byte{} | 
|  | } | 
|  | if len(s) == 1 { | 
|  | // Just return a copy. | 
|  | return append([]byte(nil), s[0]...) | 
|  | } | 
|  | n := len(sep) * (len(s) - 1) | 
|  | for _, v := range s { | 
|  | n += len(v) | 
|  | } | 
|  |  | 
|  | b := make([]byte, n) | 
|  | bp := copy(b, s[0]) | 
|  | for _, v := range s[1:] { | 
|  | bp += copy(b[bp:], sep) | 
|  | bp += copy(b[bp:], v) | 
|  | } | 
|  | return b | 
|  | } | 
|  |  | 
|  | // HasPrefix tests whether the byte slice s begins with prefix. | 
|  | func HasPrefix(s, prefix []byte) bool { | 
|  | return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix) | 
|  | } | 
|  |  | 
|  | // HasSuffix tests whether the byte slice s ends with suffix. | 
|  | func HasSuffix(s, suffix []byte) bool { | 
|  | return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix) | 
|  | } | 
|  |  | 
|  | // Map returns a copy of the byte slice s with all its characters modified | 
|  | // according to the mapping function. If mapping returns a negative value, the character is | 
|  | // dropped from the byte slice with no replacement. The characters in s and the | 
|  | // output are interpreted as UTF-8-encoded code points. | 
|  | func Map(mapping func(r rune) rune, s []byte) []byte { | 
|  | // In the worst case, the slice can grow when mapped, making | 
|  | // things unpleasant. But it's so rare we barge in assuming it's | 
|  | // fine. It could also shrink but that falls out naturally. | 
|  | maxbytes := len(s) // length of b | 
|  | nbytes := 0        // number of bytes encoded in b | 
|  | b := make([]byte, maxbytes) | 
|  | for i := 0; i < len(s); { | 
|  | wid := 1 | 
|  | r := rune(s[i]) | 
|  | if r >= utf8.RuneSelf { | 
|  | r, wid = utf8.DecodeRune(s[i:]) | 
|  | } | 
|  | r = mapping(r) | 
|  | if r >= 0 { | 
|  | rl := utf8.RuneLen(r) | 
|  | if rl < 0 { | 
|  | rl = len(string(utf8.RuneError)) | 
|  | } | 
|  | if nbytes+rl > maxbytes { | 
|  | // Grow the buffer. | 
|  | maxbytes = maxbytes*2 + utf8.UTFMax | 
|  | nb := make([]byte, maxbytes) | 
|  | copy(nb, b[0:nbytes]) | 
|  | b = nb | 
|  | } | 
|  | nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r) | 
|  | } | 
|  | i += wid | 
|  | } | 
|  | return b[0:nbytes] | 
|  | } | 
|  |  | 
|  | // Repeat returns a new byte slice consisting of count copies of b. | 
|  | // | 
|  | // It panics if count is negative or if | 
|  | // the result of (len(b) * count) overflows. | 
|  | func Repeat(b []byte, count int) []byte { | 
|  | if count == 0 { | 
|  | return []byte{} | 
|  | } | 
|  | // Since we cannot return an error on overflow, | 
|  | // we should panic if the repeat will generate | 
|  | // an overflow. | 
|  | // See Issue golang.org/issue/16237. | 
|  | if count < 0 { | 
|  | panic("bytes: negative Repeat count") | 
|  | } else if len(b)*count/count != len(b) { | 
|  | panic("bytes: Repeat count causes overflow") | 
|  | } | 
|  |  | 
|  | nb := make([]byte, len(b)*count) | 
|  | bp := copy(nb, b) | 
|  | for bp < len(nb) { | 
|  | copy(nb[bp:], nb[:bp]) | 
|  | bp *= 2 | 
|  | } | 
|  | return nb | 
|  | } | 
|  |  | 
|  | // ToUpper returns a copy of the byte slice s with all Unicode letters mapped to | 
|  | // their upper case. | 
|  | func ToUpper(s []byte) []byte { | 
|  | isASCII, hasLower := true, false | 
|  | for i := 0; i < len(s); i++ { | 
|  | c := s[i] | 
|  | if c >= utf8.RuneSelf { | 
|  | isASCII = false | 
|  | break | 
|  | } | 
|  | hasLower = hasLower || ('a' <= c && c <= 'z') | 
|  | } | 
|  |  | 
|  | if isASCII { // optimize for ASCII-only byte slices. | 
|  | if !hasLower { | 
|  | // Just return a copy. | 
|  | return append([]byte(""), s...) | 
|  | } | 
|  | b := make([]byte, len(s)) | 
|  | for i := 0; i < len(s); i++ { | 
|  | c := s[i] | 
|  | if 'a' <= c && c <= 'z' { | 
|  | c -= 'a' - 'A' | 
|  | } | 
|  | b[i] = c | 
|  | } | 
|  | return b | 
|  | } | 
|  | return Map(unicode.ToUpper, s) | 
|  | } | 
|  |  | 
|  | // ToLower returns a copy of the byte slice s with all Unicode letters mapped to | 
|  | // their lower case. | 
|  | func ToLower(s []byte) []byte { | 
|  | isASCII, hasUpper := true, false | 
|  | for i := 0; i < len(s); i++ { | 
|  | c := s[i] | 
|  | if c >= utf8.RuneSelf { | 
|  | isASCII = false | 
|  | break | 
|  | } | 
|  | hasUpper = hasUpper || ('A' <= c && c <= 'Z') | 
|  | } | 
|  |  | 
|  | if isASCII { // optimize for ASCII-only byte slices. | 
|  | if !hasUpper { | 
|  | return append([]byte(""), s...) | 
|  | } | 
|  | b := make([]byte, len(s)) | 
|  | for i := 0; i < len(s); i++ { | 
|  | c := s[i] | 
|  | if 'A' <= c && c <= 'Z' { | 
|  | c += 'a' - 'A' | 
|  | } | 
|  | b[i] = c | 
|  | } | 
|  | return b | 
|  | } | 
|  | return Map(unicode.ToLower, s) | 
|  | } | 
|  |  | 
|  | // ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case. | 
|  | func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) } | 
|  |  | 
|  | // ToUpperSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their | 
|  | // upper case, giving priority to the special casing rules. | 
|  | func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte { | 
|  | return Map(c.ToUpper, s) | 
|  | } | 
|  |  | 
|  | // ToLowerSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their | 
|  | // lower case, giving priority to the special casing rules. | 
|  | func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte { | 
|  | return Map(c.ToLower, s) | 
|  | } | 
|  |  | 
|  | // ToTitleSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their | 
|  | // title case, giving priority to the special casing rules. | 
|  | func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte { | 
|  | return Map(c.ToTitle, s) | 
|  | } | 
|  |  | 
|  | // ToValidUTF8 treats s as UTF-8-encoded bytes and returns a copy with each run of bytes | 
|  | // representing invalid UTF-8 replaced with the bytes in replacement, which may be empty. | 
|  | func ToValidUTF8(s, replacement []byte) []byte { | 
|  | b := make([]byte, 0, len(s)+len(replacement)) | 
|  | invalid := false // previous byte was from an invalid UTF-8 sequence | 
|  | for i := 0; i < len(s); { | 
|  | c := s[i] | 
|  | if c < utf8.RuneSelf { | 
|  | i++ | 
|  | invalid = false | 
|  | b = append(b, byte(c)) | 
|  | continue | 
|  | } | 
|  | _, wid := utf8.DecodeRune(s[i:]) | 
|  | if wid == 1 { | 
|  | i++ | 
|  | if !invalid { | 
|  | invalid = true | 
|  | b = append(b, replacement...) | 
|  | } | 
|  | continue | 
|  | } | 
|  | invalid = false | 
|  | b = append(b, s[i:i+wid]...) | 
|  | i += wid | 
|  | } | 
|  | return b | 
|  | } | 
|  |  | 
|  | // isSeparator reports whether the rune could mark a word boundary. | 
|  | // TODO: update when package unicode captures more of the properties. | 
|  | func isSeparator(r rune) bool { | 
|  | // ASCII alphanumerics and underscore are not separators | 
|  | if r <= 0x7F { | 
|  | switch { | 
|  | case '0' <= r && r <= '9': | 
|  | return false | 
|  | case 'a' <= r && r <= 'z': | 
|  | return false | 
|  | case 'A' <= r && r <= 'Z': | 
|  | return false | 
|  | case r == '_': | 
|  | return false | 
|  | } | 
|  | return true | 
|  | } | 
|  | // Letters and digits are not separators | 
|  | if unicode.IsLetter(r) || unicode.IsDigit(r) { | 
|  | return false | 
|  | } | 
|  | // Otherwise, all we can do for now is treat spaces as separators. | 
|  | return unicode.IsSpace(r) | 
|  | } | 
|  |  | 
|  | // Title treats s as UTF-8-encoded bytes and returns a copy with all Unicode letters that begin | 
|  | // words mapped to their title case. | 
|  | // | 
|  | // BUG(rsc): The rule Title uses for word boundaries does not handle Unicode punctuation properly. | 
|  | func Title(s []byte) []byte { | 
|  | // Use a closure here to remember state. | 
|  | // Hackish but effective. Depends on Map scanning in order and calling | 
|  | // the closure once per rune. | 
|  | prev := ' ' | 
|  | return Map( | 
|  | func(r rune) rune { | 
|  | if isSeparator(prev) { | 
|  | prev = r | 
|  | return unicode.ToTitle(r) | 
|  | } | 
|  | prev = r | 
|  | return r | 
|  | }, | 
|  | s) | 
|  | } | 
|  |  | 
|  | // TrimLeftFunc treats s as UTF-8-encoded bytes and returns a subslice of s by slicing off | 
|  | // all leading UTF-8-encoded code points c that satisfy f(c). | 
|  | func TrimLeftFunc(s []byte, f func(r rune) bool) []byte { | 
|  | i := indexFunc(s, f, false) | 
|  | if i == -1 { | 
|  | return nil | 
|  | } | 
|  | return s[i:] | 
|  | } | 
|  |  | 
|  | // TrimRightFunc returns a subslice of s by slicing off all trailing | 
|  | // UTF-8-encoded code points c that satisfy f(c). | 
|  | func TrimRightFunc(s []byte, f func(r rune) bool) []byte { | 
|  | i := lastIndexFunc(s, f, false) | 
|  | if i >= 0 && s[i] >= utf8.RuneSelf { | 
|  | _, wid := utf8.DecodeRune(s[i:]) | 
|  | i += wid | 
|  | } else { | 
|  | i++ | 
|  | } | 
|  | return s[0:i] | 
|  | } | 
|  |  | 
|  | // TrimFunc returns a subslice of s by slicing off all leading and trailing | 
|  | // UTF-8-encoded code points c that satisfy f(c). | 
|  | func TrimFunc(s []byte, f func(r rune) bool) []byte { | 
|  | return TrimRightFunc(TrimLeftFunc(s, f), f) | 
|  | } | 
|  |  | 
|  | // TrimPrefix returns s without the provided leading prefix string. | 
|  | // If s doesn't start with prefix, s is returned unchanged. | 
|  | func TrimPrefix(s, prefix []byte) []byte { | 
|  | if HasPrefix(s, prefix) { | 
|  | return s[len(prefix):] | 
|  | } | 
|  | return s | 
|  | } | 
|  |  | 
|  | // TrimSuffix returns s without the provided trailing suffix string. | 
|  | // If s doesn't end with suffix, s is returned unchanged. | 
|  | func TrimSuffix(s, suffix []byte) []byte { | 
|  | if HasSuffix(s, suffix) { | 
|  | return s[:len(s)-len(suffix)] | 
|  | } | 
|  | return s | 
|  | } | 
|  |  | 
|  | // IndexFunc interprets s as a sequence of UTF-8-encoded code points. | 
|  | // It returns the byte index in s of the first Unicode | 
|  | // code point satisfying f(c), or -1 if none do. | 
|  | func IndexFunc(s []byte, f func(r rune) bool) int { | 
|  | return indexFunc(s, f, true) | 
|  | } | 
|  |  | 
|  | // LastIndexFunc interprets s as a sequence of UTF-8-encoded code points. | 
|  | // It returns the byte index in s of the last Unicode | 
|  | // code point satisfying f(c), or -1 if none do. | 
|  | func LastIndexFunc(s []byte, f func(r rune) bool) int { | 
|  | return lastIndexFunc(s, f, true) | 
|  | } | 
|  |  | 
|  | // indexFunc is the same as IndexFunc except that if | 
|  | // truth==false, the sense of the predicate function is | 
|  | // inverted. | 
|  | func indexFunc(s []byte, f func(r rune) bool, truth bool) int { | 
|  | start := 0 | 
|  | for start < len(s) { | 
|  | wid := 1 | 
|  | r := rune(s[start]) | 
|  | if r >= utf8.RuneSelf { | 
|  | r, wid = utf8.DecodeRune(s[start:]) | 
|  | } | 
|  | if f(r) == truth { | 
|  | return start | 
|  | } | 
|  | start += wid | 
|  | } | 
|  | return -1 | 
|  | } | 
|  |  | 
|  | // lastIndexFunc is the same as LastIndexFunc except that if | 
|  | // truth==false, the sense of the predicate function is | 
|  | // inverted. | 
|  | func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int { | 
|  | for i := len(s); i > 0; { | 
|  | r, size := rune(s[i-1]), 1 | 
|  | if r >= utf8.RuneSelf { | 
|  | r, size = utf8.DecodeLastRune(s[0:i]) | 
|  | } | 
|  | i -= size | 
|  | if f(r) == truth { | 
|  | return i | 
|  | } | 
|  | } | 
|  | return -1 | 
|  | } | 
|  |  | 
|  | // asciiSet is a 32-byte value, where each bit represents the presence of a | 
|  | // given ASCII character in the set. The 128-bits of the lower 16 bytes, | 
|  | // starting with the least-significant bit of the lowest word to the | 
|  | // most-significant bit of the highest word, map to the full range of all | 
|  | // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed, | 
|  | // ensuring that any non-ASCII character will be reported as not in the set. | 
|  | type asciiSet [8]uint32 | 
|  |  | 
|  | // makeASCIISet creates a set of ASCII characters and reports whether all | 
|  | // characters in chars are ASCII. | 
|  | func makeASCIISet(chars string) (as asciiSet, ok bool) { | 
|  | for i := 0; i < len(chars); i++ { | 
|  | c := chars[i] | 
|  | if c >= utf8.RuneSelf { | 
|  | return as, false | 
|  | } | 
|  | as[c>>5] |= 1 << uint(c&31) | 
|  | } | 
|  | return as, true | 
|  | } | 
|  |  | 
|  | // contains reports whether c is inside the set. | 
|  | func (as *asciiSet) contains(c byte) bool { | 
|  | return (as[c>>5] & (1 << uint(c&31))) != 0 | 
|  | } | 
|  |  | 
|  | func makeCutsetFunc(cutset string) func(r rune) bool { | 
|  | if len(cutset) == 1 && cutset[0] < utf8.RuneSelf { | 
|  | return func(r rune) bool { | 
|  | return r == rune(cutset[0]) | 
|  | } | 
|  | } | 
|  | if as, isASCII := makeASCIISet(cutset); isASCII { | 
|  | return func(r rune) bool { | 
|  | return r < utf8.RuneSelf && as.contains(byte(r)) | 
|  | } | 
|  | } | 
|  | return func(r rune) bool { | 
|  | for _, c := range cutset { | 
|  | if c == r { | 
|  | return true | 
|  | } | 
|  | } | 
|  | return false | 
|  | } | 
|  | } | 
|  |  | 
|  | // Trim returns a subslice of s by slicing off all leading and | 
|  | // trailing UTF-8-encoded code points contained in cutset. | 
|  | func Trim(s []byte, cutset string) []byte { | 
|  | return TrimFunc(s, makeCutsetFunc(cutset)) | 
|  | } | 
|  |  | 
|  | // TrimLeft returns a subslice of s by slicing off all leading | 
|  | // UTF-8-encoded code points contained in cutset. | 
|  | func TrimLeft(s []byte, cutset string) []byte { | 
|  | return TrimLeftFunc(s, makeCutsetFunc(cutset)) | 
|  | } | 
|  |  | 
|  | // TrimRight returns a subslice of s by slicing off all trailing | 
|  | // UTF-8-encoded code points that are contained in cutset. | 
|  | func TrimRight(s []byte, cutset string) []byte { | 
|  | return TrimRightFunc(s, makeCutsetFunc(cutset)) | 
|  | } | 
|  |  | 
|  | // TrimSpace returns a subslice of s by slicing off all leading and | 
|  | // trailing white space, as defined by Unicode. | 
|  | func TrimSpace(s []byte) []byte { | 
|  | // Fast path for ASCII: look for the first ASCII non-space byte | 
|  | start := 0 | 
|  | for ; start < len(s); start++ { | 
|  | c := s[start] | 
|  | if c >= utf8.RuneSelf { | 
|  | // If we run into a non-ASCII byte, fall back to the | 
|  | // slower unicode-aware method on the remaining bytes | 
|  | return TrimFunc(s[start:], unicode.IsSpace) | 
|  | } | 
|  | if asciiSpace[c] == 0 { | 
|  | break | 
|  | } | 
|  | } | 
|  |  | 
|  | // Now look for the first ASCII non-space byte from the end | 
|  | stop := len(s) | 
|  | for ; stop > start; stop-- { | 
|  | c := s[stop-1] | 
|  | if c >= utf8.RuneSelf { | 
|  | return TrimFunc(s[start:stop], unicode.IsSpace) | 
|  | } | 
|  | if asciiSpace[c] == 0 { | 
|  | break | 
|  | } | 
|  | } | 
|  |  | 
|  | // At this point s[start:stop] starts and ends with an ASCII | 
|  | // non-space bytes, so we're done. Non-ASCII cases have already | 
|  | // been handled above. | 
|  | if start == stop { | 
|  | // Special case to preserve previous TrimLeftFunc behavior, | 
|  | // returning nil instead of empty slice if all spaces. | 
|  | return nil | 
|  | } | 
|  | return s[start:stop] | 
|  | } | 
|  |  | 
|  | // Runes interprets s as a sequence of UTF-8-encoded code points. | 
|  | // It returns a slice of runes (Unicode code points) equivalent to s. | 
|  | func Runes(s []byte) []rune { | 
|  | t := make([]rune, utf8.RuneCount(s)) | 
|  | i := 0 | 
|  | for len(s) > 0 { | 
|  | r, l := utf8.DecodeRune(s) | 
|  | t[i] = r | 
|  | i++ | 
|  | s = s[l:] | 
|  | } | 
|  | return t | 
|  | } | 
|  |  | 
|  | // Replace returns a copy of the slice s with the first n | 
|  | // non-overlapping instances of old replaced by new. | 
|  | // If old is empty, it matches at the beginning of the slice | 
|  | // and after each UTF-8 sequence, yielding up to k+1 replacements | 
|  | // for a k-rune slice. | 
|  | // If n < 0, there is no limit on the number of replacements. | 
|  | func Replace(s, old, new []byte, n int) []byte { | 
|  | m := 0 | 
|  | if n != 0 { | 
|  | // Compute number of replacements. | 
|  | m = Count(s, old) | 
|  | } | 
|  | if m == 0 { | 
|  | // Just return a copy. | 
|  | return append([]byte(nil), s...) | 
|  | } | 
|  | if n < 0 || m < n { | 
|  | n = m | 
|  | } | 
|  |  | 
|  | // Apply replacements to buffer. | 
|  | t := make([]byte, len(s)+n*(len(new)-len(old))) | 
|  | w := 0 | 
|  | start := 0 | 
|  | for i := 0; i < n; i++ { | 
|  | j := start | 
|  | if len(old) == 0 { | 
|  | if i > 0 { | 
|  | _, wid := utf8.DecodeRune(s[start:]) | 
|  | j += wid | 
|  | } | 
|  | } else { | 
|  | j += Index(s[start:], old) | 
|  | } | 
|  | w += copy(t[w:], s[start:j]) | 
|  | w += copy(t[w:], new) | 
|  | start = j + len(old) | 
|  | } | 
|  | w += copy(t[w:], s[start:]) | 
|  | return t[0:w] | 
|  | } | 
|  |  | 
|  | // ReplaceAll returns a copy of the slice s with all | 
|  | // non-overlapping instances of old replaced by new. | 
|  | // If old is empty, it matches at the beginning of the slice | 
|  | // and after each UTF-8 sequence, yielding up to k+1 replacements | 
|  | // for a k-rune slice. | 
|  | func ReplaceAll(s, old, new []byte) []byte { | 
|  | return Replace(s, old, new, -1) | 
|  | } | 
|  |  | 
|  | // EqualFold reports whether s and t, interpreted as UTF-8 strings, | 
|  | // are equal under Unicode case-folding, which is a more general | 
|  | // form of case-insensitivity. | 
|  | func EqualFold(s, t []byte) bool { | 
|  | for len(s) != 0 && len(t) != 0 { | 
|  | // Extract first rune from each. | 
|  | var sr, tr rune | 
|  | if s[0] < utf8.RuneSelf { | 
|  | sr, s = rune(s[0]), s[1:] | 
|  | } else { | 
|  | r, size := utf8.DecodeRune(s) | 
|  | sr, s = r, s[size:] | 
|  | } | 
|  | if t[0] < utf8.RuneSelf { | 
|  | tr, t = rune(t[0]), t[1:] | 
|  | } else { | 
|  | r, size := utf8.DecodeRune(t) | 
|  | tr, t = r, t[size:] | 
|  | } | 
|  |  | 
|  | // If they match, keep going; if not, return false. | 
|  |  | 
|  | // Easy case. | 
|  | if tr == sr { | 
|  | continue | 
|  | } | 
|  |  | 
|  | // Make sr < tr to simplify what follows. | 
|  | if tr < sr { | 
|  | tr, sr = sr, tr | 
|  | } | 
|  | // Fast check for ASCII. | 
|  | if tr < utf8.RuneSelf { | 
|  | // ASCII only, sr/tr must be upper/lower case | 
|  | if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' { | 
|  | continue | 
|  | } | 
|  | return false | 
|  | } | 
|  |  | 
|  | // General case. SimpleFold(x) returns the next equivalent rune > x | 
|  | // or wraps around to smaller values. | 
|  | r := unicode.SimpleFold(sr) | 
|  | for r != sr && r < tr { | 
|  | r = unicode.SimpleFold(r) | 
|  | } | 
|  | if r == tr { | 
|  | continue | 
|  | } | 
|  | return false | 
|  | } | 
|  |  | 
|  | // One string is empty. Are both? | 
|  | return len(s) == len(t) | 
|  | } | 
|  |  | 
|  | // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. | 
|  | func Index(s, sep []byte) int { | 
|  | n := len(sep) | 
|  | switch { | 
|  | case n == 0: | 
|  | return 0 | 
|  | case n == 1: | 
|  | return IndexByte(s, sep[0]) | 
|  | case n == len(s): | 
|  | if Equal(sep, s) { | 
|  | return 0 | 
|  | } | 
|  | return -1 | 
|  | case n > len(s): | 
|  | return -1 | 
|  | case n <= bytealg.MaxLen: | 
|  | // Use brute force when s and sep both are small | 
|  | if len(s) <= bytealg.MaxBruteForce { | 
|  | return bytealg.Index(s, sep) | 
|  | } | 
|  | c0 := sep[0] | 
|  | c1 := sep[1] | 
|  | i := 0 | 
|  | t := len(s) - n + 1 | 
|  | fails := 0 | 
|  | for i < t { | 
|  | if s[i] != c0 { | 
|  | // IndexByte is faster than bytealg.Index, so use it as long as | 
|  | // we're not getting lots of false positives. | 
|  | o := IndexByte(s[i:t], c0) | 
|  | if o < 0 { | 
|  | return -1 | 
|  | } | 
|  | i += o | 
|  | } | 
|  | if s[i+1] == c1 && Equal(s[i:i+n], sep) { | 
|  | return i | 
|  | } | 
|  | fails++ | 
|  | i++ | 
|  | // Switch to bytealg.Index when IndexByte produces too many false positives. | 
|  | if fails > bytealg.Cutover(i) { | 
|  | r := bytealg.Index(s[i:], sep) | 
|  | if r >= 0 { | 
|  | return r + i | 
|  | } | 
|  | return -1 | 
|  | } | 
|  | } | 
|  | return -1 | 
|  | } | 
|  | c0 := sep[0] | 
|  | c1 := sep[1] | 
|  | i := 0 | 
|  | fails := 0 | 
|  | t := len(s) - n + 1 | 
|  | for i < t { | 
|  | if s[i] != c0 { | 
|  | o := IndexByte(s[i:t], c0) | 
|  | if o < 0 { | 
|  | break | 
|  | } | 
|  | i += o | 
|  | } | 
|  | if s[i+1] == c1 && Equal(s[i:i+n], sep) { | 
|  | return i | 
|  | } | 
|  | i++ | 
|  | fails++ | 
|  | if fails >= 4+i>>4 && i < t { | 
|  | // Give up on IndexByte, it isn't skipping ahead | 
|  | // far enough to be better than Rabin-Karp. | 
|  | // Experiments (using IndexPeriodic) suggest | 
|  | // the cutover is about 16 byte skips. | 
|  | // TODO: if large prefixes of sep are matching | 
|  | // we should cutover at even larger average skips, | 
|  | // because Equal becomes that much more expensive. | 
|  | // This code does not take that effect into account. | 
|  | j := indexRabinKarp(s[i:], sep) | 
|  | if j < 0 { | 
|  | return -1 | 
|  | } | 
|  | return i + j | 
|  | } | 
|  | } | 
|  | return -1 | 
|  | } | 
|  |  | 
|  | func indexRabinKarp(s, sep []byte) int { | 
|  | // Rabin-Karp search | 
|  | hashsep, pow := hashStr(sep) | 
|  | n := len(sep) | 
|  | var h uint32 | 
|  | for i := 0; i < n; i++ { | 
|  | h = h*primeRK + uint32(s[i]) | 
|  | } | 
|  | if h == hashsep && Equal(s[:n], sep) { | 
|  | return 0 | 
|  | } | 
|  | for i := n; i < len(s); { | 
|  | h *= primeRK | 
|  | h += uint32(s[i]) | 
|  | h -= pow * uint32(s[i-n]) | 
|  | i++ | 
|  | if h == hashsep && Equal(s[i-n:i], sep) { | 
|  | return i - n | 
|  | } | 
|  | } | 
|  | return -1 | 
|  | } | 
|  |  | 
|  | // primeRK is the prime base used in Rabin-Karp algorithm. | 
|  | const primeRK = 16777619 | 
|  |  | 
|  | // hashStr returns the hash and the appropriate multiplicative | 
|  | // factor for use in Rabin-Karp algorithm. | 
|  | func hashStr(sep []byte) (uint32, uint32) { | 
|  | hash := uint32(0) | 
|  | for i := 0; i < len(sep); i++ { | 
|  | hash = hash*primeRK + uint32(sep[i]) | 
|  | } | 
|  | var pow, sq uint32 = 1, primeRK | 
|  | for i := len(sep); i > 0; i >>= 1 { | 
|  | if i&1 != 0 { | 
|  | pow *= sq | 
|  | } | 
|  | sq *= sq | 
|  | } | 
|  | return hash, pow | 
|  | } | 
|  |  | 
|  | // hashStrRev returns the hash of the reverse of sep and the | 
|  | // appropriate multiplicative factor for use in Rabin-Karp algorithm. | 
|  | func hashStrRev(sep []byte) (uint32, uint32) { | 
|  | hash := uint32(0) | 
|  | for i := len(sep) - 1; i >= 0; i-- { | 
|  | hash = hash*primeRK + uint32(sep[i]) | 
|  | } | 
|  | var pow, sq uint32 = 1, primeRK | 
|  | for i := len(sep); i > 0; i >>= 1 { | 
|  | if i&1 != 0 { | 
|  | pow *= sq | 
|  | } | 
|  | sq *= sq | 
|  | } | 
|  | return hash, pow | 
|  | } |