| // Copyright 2011 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package strings |
| |
| import ( |
| "io" |
| "sync" |
| ) |
| |
| // Replacer replaces a list of strings with replacements. |
| // It is safe for concurrent use by multiple goroutines. |
| type Replacer struct { |
| once sync.Once // guards buildOnce method |
| r replacer |
| oldnew []string |
| } |
| |
| // replacer is the interface that a replacement algorithm needs to implement. |
| type replacer interface { |
| Replace(s string) string |
| WriteString(w io.Writer, s string) (n int, err error) |
| } |
| |
| // NewReplacer returns a new Replacer from a list of old, new string |
| // pairs. Replacements are performed in the order they appear in the |
| // target string, without overlapping matches. |
| // |
| // NewReplacer panics if given an odd number of arguments. |
| func NewReplacer(oldnew ...string) *Replacer { |
| if len(oldnew)%2 == 1 { |
| panic("strings.NewReplacer: odd argument count") |
| } |
| return &Replacer{oldnew: append([]string(nil), oldnew...)} |
| } |
| |
| func (r *Replacer) buildOnce() { |
| r.r = r.build() |
| r.oldnew = nil |
| } |
| |
| func (b *Replacer) build() replacer { |
| oldnew := b.oldnew |
| if len(oldnew) == 2 && len(oldnew[0]) > 1 { |
| return makeSingleStringReplacer(oldnew[0], oldnew[1]) |
| } |
| |
| allNewBytes := true |
| for i := 0; i < len(oldnew); i += 2 { |
| if len(oldnew[i]) != 1 { |
| return makeGenericReplacer(oldnew) |
| } |
| if len(oldnew[i+1]) != 1 { |
| allNewBytes = false |
| } |
| } |
| |
| if allNewBytes { |
| r := byteReplacer{} |
| for i := range r { |
| r[i] = byte(i) |
| } |
| // The first occurrence of old->new map takes precedence |
| // over the others with the same old string. |
| for i := len(oldnew) - 2; i >= 0; i -= 2 { |
| o := oldnew[i][0] |
| n := oldnew[i+1][0] |
| r[o] = n |
| } |
| return &r |
| } |
| |
| r := byteStringReplacer{toReplace: make([]string, 0, len(oldnew)/2)} |
| // The first occurrence of old->new map takes precedence |
| // over the others with the same old string. |
| for i := len(oldnew) - 2; i >= 0; i -= 2 { |
| o := oldnew[i][0] |
| n := oldnew[i+1] |
| // To avoid counting repetitions multiple times. |
| if r.replacements[o] == nil { |
| // We need to use string([]byte{o}) instead of string(o), |
| // to avoid utf8 encoding of o. |
| // E. g. byte(150) produces string of length 2. |
| r.toReplace = append(r.toReplace, string([]byte{o})) |
| } |
| r.replacements[o] = []byte(n) |
| |
| } |
| return &r |
| } |
| |
| // Replace returns a copy of s with all replacements performed. |
| func (r *Replacer) Replace(s string) string { |
| r.once.Do(r.buildOnce) |
| return r.r.Replace(s) |
| } |
| |
| // WriteString writes s to w with all replacements performed. |
| func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) { |
| r.once.Do(r.buildOnce) |
| return r.r.WriteString(w, s) |
| } |
| |
| // trieNode is a node in a lookup trie for prioritized key/value pairs. Keys |
| // and values may be empty. For example, the trie containing keys "ax", "ay", |
| // "bcbc", "x" and "xy" could have eight nodes: |
| // |
| // n0 - |
| // n1 a- |
| // n2 .x+ |
| // n3 .y+ |
| // n4 b- |
| // n5 .cbc+ |
| // n6 x+ |
| // n7 .y+ |
| // |
| // n0 is the root node, and its children are n1, n4 and n6; n1's children are |
| // n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked |
| // with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7 |
| // (marked with a trailing "+") are complete keys. |
| type trieNode struct { |
| // value is the value of the trie node's key/value pair. It is empty if |
| // this node is not a complete key. |
| value string |
| // priority is the priority (higher is more important) of the trie node's |
| // key/value pair; keys are not necessarily matched shortest- or longest- |
| // first. Priority is positive if this node is a complete key, and zero |
| // otherwise. In the example above, positive/zero priorities are marked |
| // with a trailing "+" or "-". |
| priority int |
| |
| // A trie node may have zero, one or more child nodes: |
| // * if the remaining fields are zero, there are no children. |
| // * if prefix and next are non-zero, there is one child in next. |
| // * if table is non-zero, it defines all the children. |
| // |
| // Prefixes are preferred over tables when there is one child, but the |
| // root node always uses a table for lookup efficiency. |
| |
| // prefix is the difference in keys between this trie node and the next. |
| // In the example above, node n4 has prefix "cbc" and n4's next node is n5. |
| // Node n5 has no children and so has zero prefix, next and table fields. |
| prefix string |
| next *trieNode |
| |
| // table is a lookup table indexed by the next byte in the key, after |
| // remapping that byte through genericReplacer.mapping to create a dense |
| // index. In the example above, the keys only use 'a', 'b', 'c', 'x' and |
| // 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and |
| // genericReplacer.tableSize will be 5. Node n0's table will be |
| // []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped |
| // 'a', 'b' and 'x'. |
| table []*trieNode |
| } |
| |
| func (t *trieNode) add(key, val string, priority int, r *genericReplacer) { |
| if key == "" { |
| if t.priority == 0 { |
| t.value = val |
| t.priority = priority |
| } |
| return |
| } |
| |
| if t.prefix != "" { |
| // Need to split the prefix among multiple nodes. |
| var n int // length of the longest common prefix |
| for ; n < len(t.prefix) && n < len(key); n++ { |
| if t.prefix[n] != key[n] { |
| break |
| } |
| } |
| if n == len(t.prefix) { |
| t.next.add(key[n:], val, priority, r) |
| } else if n == 0 { |
| // First byte differs, start a new lookup table here. Looking up |
| // what is currently t.prefix[0] will lead to prefixNode, and |
| // looking up key[0] will lead to keyNode. |
| var prefixNode *trieNode |
| if len(t.prefix) == 1 { |
| prefixNode = t.next |
| } else { |
| prefixNode = &trieNode{ |
| prefix: t.prefix[1:], |
| next: t.next, |
| } |
| } |
| keyNode := new(trieNode) |
| t.table = make([]*trieNode, r.tableSize) |
| t.table[r.mapping[t.prefix[0]]] = prefixNode |
| t.table[r.mapping[key[0]]] = keyNode |
| t.prefix = "" |
| t.next = nil |
| keyNode.add(key[1:], val, priority, r) |
| } else { |
| // Insert new node after the common section of the prefix. |
| next := &trieNode{ |
| prefix: t.prefix[n:], |
| next: t.next, |
| } |
| t.prefix = t.prefix[:n] |
| t.next = next |
| next.add(key[n:], val, priority, r) |
| } |
| } else if t.table != nil { |
| // Insert into existing table. |
| m := r.mapping[key[0]] |
| if t.table[m] == nil { |
| t.table[m] = new(trieNode) |
| } |
| t.table[m].add(key[1:], val, priority, r) |
| } else { |
| t.prefix = key |
| t.next = new(trieNode) |
| t.next.add("", val, priority, r) |
| } |
| } |
| |
| func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) { |
| // Iterate down the trie to the end, and grab the value and keylen with |
| // the highest priority. |
| bestPriority := 0 |
| node := &r.root |
| n := 0 |
| for node != nil { |
| if node.priority > bestPriority && !(ignoreRoot && node == &r.root) { |
| bestPriority = node.priority |
| val = node.value |
| keylen = n |
| found = true |
| } |
| |
| if s == "" { |
| break |
| } |
| if node.table != nil { |
| index := r.mapping[s[0]] |
| if int(index) == r.tableSize { |
| break |
| } |
| node = node.table[index] |
| s = s[1:] |
| n++ |
| } else if node.prefix != "" && HasPrefix(s, node.prefix) { |
| n += len(node.prefix) |
| s = s[len(node.prefix):] |
| node = node.next |
| } else { |
| break |
| } |
| } |
| return |
| } |
| |
| // genericReplacer is the fully generic algorithm. |
| // It's used as a fallback when nothing faster can be used. |
| type genericReplacer struct { |
| root trieNode |
| // tableSize is the size of a trie node's lookup table. It is the number |
| // of unique key bytes. |
| tableSize int |
| // mapping maps from key bytes to a dense index for trieNode.table. |
| mapping [256]byte |
| } |
| |
| func makeGenericReplacer(oldnew []string) *genericReplacer { |
| r := new(genericReplacer) |
| // Find each byte used, then assign them each an index. |
| for i := 0; i < len(oldnew); i += 2 { |
| key := oldnew[i] |
| for j := 0; j < len(key); j++ { |
| r.mapping[key[j]] = 1 |
| } |
| } |
| |
| for _, b := range r.mapping { |
| r.tableSize += int(b) |
| } |
| |
| var index byte |
| for i, b := range r.mapping { |
| if b == 0 { |
| r.mapping[i] = byte(r.tableSize) |
| } else { |
| r.mapping[i] = index |
| index++ |
| } |
| } |
| // Ensure root node uses a lookup table (for performance). |
| r.root.table = make([]*trieNode, r.tableSize) |
| |
| for i := 0; i < len(oldnew); i += 2 { |
| r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r) |
| } |
| return r |
| } |
| |
| type appendSliceWriter []byte |
| |
| // Write writes to the buffer to satisfy io.Writer. |
| func (w *appendSliceWriter) Write(p []byte) (int, error) { |
| *w = append(*w, p...) |
| return len(p), nil |
| } |
| |
| // WriteString writes to the buffer without string->[]byte->string allocations. |
| func (w *appendSliceWriter) WriteString(s string) (int, error) { |
| *w = append(*w, s...) |
| return len(s), nil |
| } |
| |
| type stringWriter struct { |
| w io.Writer |
| } |
| |
| func (w stringWriter) WriteString(s string) (int, error) { |
| return w.w.Write([]byte(s)) |
| } |
| |
| func getStringWriter(w io.Writer) io.StringWriter { |
| sw, ok := w.(io.StringWriter) |
| if !ok { |
| sw = stringWriter{w} |
| } |
| return sw |
| } |
| |
| func (r *genericReplacer) Replace(s string) string { |
| buf := make(appendSliceWriter, 0, len(s)) |
| r.WriteString(&buf, s) |
| return string(buf) |
| } |
| |
| func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) { |
| sw := getStringWriter(w) |
| var last, wn int |
| var prevMatchEmpty bool |
| for i := 0; i <= len(s); { |
| // Fast path: s[i] is not a prefix of any pattern. |
| if i != len(s) && r.root.priority == 0 { |
| index := int(r.mapping[s[i]]) |
| if index == r.tableSize || r.root.table[index] == nil { |
| i++ |
| continue |
| } |
| } |
| |
| // Ignore the empty match iff the previous loop found the empty match. |
| val, keylen, match := r.lookup(s[i:], prevMatchEmpty) |
| prevMatchEmpty = match && keylen == 0 |
| if match { |
| wn, err = sw.WriteString(s[last:i]) |
| n += wn |
| if err != nil { |
| return |
| } |
| wn, err = sw.WriteString(val) |
| n += wn |
| if err != nil { |
| return |
| } |
| i += keylen |
| last = i |
| continue |
| } |
| i++ |
| } |
| if last != len(s) { |
| wn, err = sw.WriteString(s[last:]) |
| n += wn |
| } |
| return |
| } |
| |
| // singleStringReplacer is the implementation that's used when there is only |
| // one string to replace (and that string has more than one byte). |
| type singleStringReplacer struct { |
| finder *stringFinder |
| // value is the new string that replaces that pattern when it's found. |
| value string |
| } |
| |
| func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer { |
| return &singleStringReplacer{finder: makeStringFinder(pattern), value: value} |
| } |
| |
| func (r *singleStringReplacer) Replace(s string) string { |
| var buf []byte |
| i, matched := 0, false |
| for { |
| match := r.finder.next(s[i:]) |
| if match == -1 { |
| break |
| } |
| matched = true |
| buf = append(buf, s[i:i+match]...) |
| buf = append(buf, r.value...) |
| i += match + len(r.finder.pattern) |
| } |
| if !matched { |
| return s |
| } |
| buf = append(buf, s[i:]...) |
| return string(buf) |
| } |
| |
| func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) { |
| sw := getStringWriter(w) |
| var i, wn int |
| for { |
| match := r.finder.next(s[i:]) |
| if match == -1 { |
| break |
| } |
| wn, err = sw.WriteString(s[i : i+match]) |
| n += wn |
| if err != nil { |
| return |
| } |
| wn, err = sw.WriteString(r.value) |
| n += wn |
| if err != nil { |
| return |
| } |
| i += match + len(r.finder.pattern) |
| } |
| wn, err = sw.WriteString(s[i:]) |
| n += wn |
| return |
| } |
| |
| // byteReplacer is the implementation that's used when all the "old" |
| // and "new" values are single ASCII bytes. |
| // The array contains replacement bytes indexed by old byte. |
| type byteReplacer [256]byte |
| |
| func (r *byteReplacer) Replace(s string) string { |
| var buf []byte // lazily allocated |
| for i := 0; i < len(s); i++ { |
| b := s[i] |
| if r[b] != b { |
| if buf == nil { |
| buf = []byte(s) |
| } |
| buf[i] = r[b] |
| } |
| } |
| if buf == nil { |
| return s |
| } |
| return string(buf) |
| } |
| |
| func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) { |
| // TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation. |
| bufsize := 32 << 10 |
| if len(s) < bufsize { |
| bufsize = len(s) |
| } |
| buf := make([]byte, bufsize) |
| |
| for len(s) > 0 { |
| ncopy := copy(buf, s) |
| s = s[ncopy:] |
| for i, b := range buf[:ncopy] { |
| buf[i] = r[b] |
| } |
| wn, err := w.Write(buf[:ncopy]) |
| n += wn |
| if err != nil { |
| return n, err |
| } |
| } |
| return n, nil |
| } |
| |
| // byteStringReplacer is the implementation that's used when all the |
| // "old" values are single ASCII bytes but the "new" values vary in size. |
| type byteStringReplacer struct { |
| // replacements contains replacement byte slices indexed by old byte. |
| // A nil []byte means that the old byte should not be replaced. |
| replacements [256][]byte |
| // toReplace keeps a list of bytes to replace. Depending on length of toReplace |
| // and length of target string it may be faster to use Count, or a plain loop. |
| // We store single byte as a string, because Count takes a string. |
| toReplace []string |
| } |
| |
| // countCutOff controls the ratio of a string length to a number of replacements |
| // at which (*byteStringReplacer).Replace switches algorithms. |
| // For strings with higher ration of length to replacements than that value, |
| // we call Count, for each replacement from toReplace. |
| // For strings, with a lower ratio we use simple loop, because of Count overhead. |
| // countCutOff is an empirically determined overhead multiplier. |
| // TODO(tocarip) revisit once we have register-based abi/mid-stack inlining. |
| const countCutOff = 8 |
| |
| func (r *byteStringReplacer) Replace(s string) string { |
| newSize := len(s) |
| anyChanges := false |
| // Is it faster to use Count? |
| if len(r.toReplace)*countCutOff <= len(s) { |
| for _, x := range r.toReplace { |
| if c := Count(s, x); c != 0 { |
| // The -1 is because we are replacing 1 byte with len(replacements[b]) bytes. |
| newSize += c * (len(r.replacements[x[0]]) - 1) |
| anyChanges = true |
| } |
| |
| } |
| } else { |
| for i := 0; i < len(s); i++ { |
| b := s[i] |
| if r.replacements[b] != nil { |
| // See above for explanation of -1 |
| newSize += len(r.replacements[b]) - 1 |
| anyChanges = true |
| } |
| } |
| } |
| if !anyChanges { |
| return s |
| } |
| buf := make([]byte, newSize) |
| j := 0 |
| for i := 0; i < len(s); i++ { |
| b := s[i] |
| if r.replacements[b] != nil { |
| j += copy(buf[j:], r.replacements[b]) |
| } else { |
| buf[j] = b |
| j++ |
| } |
| } |
| return string(buf) |
| } |
| |
| func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) { |
| sw := getStringWriter(w) |
| last := 0 |
| for i := 0; i < len(s); i++ { |
| b := s[i] |
| if r.replacements[b] == nil { |
| continue |
| } |
| if last != i { |
| nw, err := sw.WriteString(s[last:i]) |
| n += nw |
| if err != nil { |
| return n, err |
| } |
| } |
| last = i + 1 |
| nw, err := w.Write(r.replacements[b]) |
| n += nw |
| if err != nil { |
| return n, err |
| } |
| } |
| if last != len(s) { |
| var nw int |
| nw, err = sw.WriteString(s[last:]) |
| n += nw |
| } |
| return |
| } |