| // Copyright 2009 The Go Authors. All rights reserved. | 
 | // Use of this source code is governed by a BSD-style | 
 | // license that can be found in the LICENSE file. | 
 |  | 
 | // Package utf8string provides an efficient way to index strings by rune rather than by byte. | 
 | package utf8string // import "golang.org/x/exp/utf8string" | 
 |  | 
 | import ( | 
 | 	"errors" | 
 | 	"unicode/utf8" | 
 | ) | 
 |  | 
 | // String wraps a regular string with a small structure that provides more | 
 | // efficient indexing by code point index, as opposed to byte index. | 
 | // Scanning incrementally forwards or backwards is O(1) per index operation | 
 | // (although not as fast a range clause going forwards).  Random access is | 
 | // O(N) in the length of the string, but the overhead is less than always | 
 | // scanning from the beginning. | 
 | // If the string is ASCII, random access is O(1). | 
 | // Unlike the built-in string type, String has internal mutable state and | 
 | // is not thread-safe. | 
 | type String struct { | 
 | 	str      string | 
 | 	numRunes int | 
 | 	// If width > 0, the rune at runePos starts at bytePos and has the specified width. | 
 | 	width    int | 
 | 	bytePos  int | 
 | 	runePos  int | 
 | 	nonASCII int // byte index of the first non-ASCII rune. | 
 | } | 
 |  | 
 | // NewString returns a new UTF-8 string with the provided contents. | 
 | func NewString(contents string) *String { | 
 | 	return new(String).Init(contents) | 
 | } | 
 |  | 
 | // Init initializes an existing String to hold the provided contents. | 
 | // It returns a pointer to the initialized String. | 
 | func (s *String) Init(contents string) *String { | 
 | 	s.str = contents | 
 | 	s.bytePos = 0 | 
 | 	s.runePos = 0 | 
 | 	for i := 0; i < len(contents); i++ { | 
 | 		if contents[i] >= utf8.RuneSelf { | 
 | 			// Not ASCII. | 
 | 			s.numRunes = utf8.RuneCountInString(contents) | 
 | 			_, s.width = utf8.DecodeRuneInString(contents) | 
 | 			s.nonASCII = i | 
 | 			return s | 
 | 		} | 
 | 	} | 
 | 	// ASCII is simple.  Also, the empty string is ASCII. | 
 | 	s.numRunes = len(contents) | 
 | 	s.width = 0 | 
 | 	s.nonASCII = len(contents) | 
 | 	return s | 
 | } | 
 |  | 
 | // String returns the contents of the String.  This method also means the | 
 | // String is directly printable by fmt.Print. | 
 | func (s *String) String() string { | 
 | 	return s.str | 
 | } | 
 |  | 
 | // RuneCount returns the number of runes (Unicode code points) in the String. | 
 | func (s *String) RuneCount() int { | 
 | 	return s.numRunes | 
 | } | 
 |  | 
 | // IsASCII returns a boolean indicating whether the String contains only ASCII bytes. | 
 | func (s *String) IsASCII() bool { | 
 | 	return s.width == 0 | 
 | } | 
 |  | 
 | // Slice returns the string sliced at rune positions [i:j]. | 
 | func (s *String) Slice(i, j int) string { | 
 | 	// ASCII is easy.  Let the compiler catch the indexing error if there is one. | 
 | 	if j < s.nonASCII { | 
 | 		return s.str[i:j] | 
 | 	} | 
 | 	if i < 0 || j > s.numRunes || i > j { | 
 | 		panic(sliceOutOfRange) | 
 | 	} | 
 | 	if i == j { | 
 | 		return "" | 
 | 	} | 
 | 	// For non-ASCII, after At(i), bytePos is always the position of the indexed character. | 
 | 	var low, high int | 
 | 	switch { | 
 | 	case i < s.nonASCII: | 
 | 		low = i | 
 | 	case i == s.numRunes: | 
 | 		low = len(s.str) | 
 | 	default: | 
 | 		s.At(i) | 
 | 		low = s.bytePos | 
 | 	} | 
 | 	switch { | 
 | 	case j == s.numRunes: | 
 | 		high = len(s.str) | 
 | 	default: | 
 | 		s.At(j) | 
 | 		high = s.bytePos | 
 | 	} | 
 | 	return s.str[low:high] | 
 | } | 
 |  | 
 | // At returns the rune with index i in the String.  The sequence of runes is the same | 
 | // as iterating over the contents with a "for range" clause. | 
 | func (s *String) At(i int) rune { | 
 | 	// ASCII is easy.  Let the compiler catch the indexing error if there is one. | 
 | 	if i < s.nonASCII { | 
 | 		return rune(s.str[i]) | 
 | 	} | 
 |  | 
 | 	// Now we do need to know the index is valid. | 
 | 	if i < 0 || i >= s.numRunes { | 
 | 		panic(outOfRange) | 
 | 	} | 
 |  | 
 | 	var r rune | 
 |  | 
 | 	// Five easy common cases: within 1 spot of bytePos/runePos, or the beginning, or the end. | 
 | 	// With these cases, all scans from beginning or end work in O(1) time per rune. | 
 | 	switch { | 
 |  | 
 | 	case i == s.runePos-1: // backing up one rune | 
 | 		r, s.width = utf8.DecodeLastRuneInString(s.str[0:s.bytePos]) | 
 | 		s.runePos = i | 
 | 		s.bytePos -= s.width | 
 | 		return r | 
 | 	case i == s.runePos+1: // moving ahead one rune | 
 | 		s.runePos = i | 
 | 		s.bytePos += s.width | 
 | 		fallthrough | 
 | 	case i == s.runePos: | 
 | 		r, s.width = utf8.DecodeRuneInString(s.str[s.bytePos:]) | 
 | 		return r | 
 | 	case i == 0: // start of string | 
 | 		r, s.width = utf8.DecodeRuneInString(s.str) | 
 | 		s.runePos = 0 | 
 | 		s.bytePos = 0 | 
 | 		return r | 
 |  | 
 | 	case i == s.numRunes-1: // last rune in string | 
 | 		r, s.width = utf8.DecodeLastRuneInString(s.str) | 
 | 		s.runePos = i | 
 | 		s.bytePos = len(s.str) - s.width | 
 | 		return r | 
 | 	} | 
 |  | 
 | 	// We need to do a linear scan.  There are three places to start from: | 
 | 	// 1) The beginning | 
 | 	// 2) bytePos/runePos. | 
 | 	// 3) The end | 
 | 	// Choose the closest in rune count, scanning backwards if necessary. | 
 | 	forward := true | 
 | 	if i < s.runePos { | 
 | 		// Between beginning and pos.  Which is closer? | 
 | 		// Since both i and runePos are guaranteed >= nonASCII, that's the | 
 | 		// lowest location we need to start from. | 
 | 		if i < (s.runePos-s.nonASCII)/2 { | 
 | 			// Scan forward from beginning | 
 | 			s.bytePos, s.runePos = s.nonASCII, s.nonASCII | 
 | 		} else { | 
 | 			// Scan backwards from where we are | 
 | 			forward = false | 
 | 		} | 
 | 	} else { | 
 | 		// Between pos and end.  Which is closer? | 
 | 		if i-s.runePos < (s.numRunes-s.runePos)/2 { | 
 | 			// Scan forward from pos | 
 | 		} else { | 
 | 			// Scan backwards from end | 
 | 			s.bytePos, s.runePos = len(s.str), s.numRunes | 
 | 			forward = false | 
 | 		} | 
 | 	} | 
 | 	if forward { | 
 | 		// TODO: Is it much faster to use a range loop for this scan? | 
 | 		for { | 
 | 			r, s.width = utf8.DecodeRuneInString(s.str[s.bytePos:]) | 
 | 			if s.runePos == i { | 
 | 				break | 
 | 			} | 
 | 			s.runePos++ | 
 | 			s.bytePos += s.width | 
 | 		} | 
 | 	} else { | 
 | 		for { | 
 | 			r, s.width = utf8.DecodeLastRuneInString(s.str[0:s.bytePos]) | 
 | 			s.runePos-- | 
 | 			s.bytePos -= s.width | 
 | 			if s.runePos == i { | 
 | 				break | 
 | 			} | 
 | 		} | 
 | 	} | 
 | 	return r | 
 | } | 
 |  | 
 | var outOfRange = errors.New("utf8string: index out of range") | 
 | var sliceOutOfRange = errors.New("utf8string: slice index out of range") |