| // Copyright 2010 The Go Authors. All rights reserved. | 
 | // Use of this source code is governed by a BSD-style | 
 | // license that can be found in the LICENSE file. | 
 |  | 
 | // Package utf16 implements encoding and decoding of UTF-16 sequences. | 
 | package utf16 | 
 |  | 
 | // The conditions replacementChar==unicode.ReplacementChar and | 
 | // maxRune==unicode.MaxRune are verified in the tests. | 
 | // Defining them locally avoids this package depending on package unicode. | 
 |  | 
 | const ( | 
 | 	replacementChar = '\uFFFD'     // Unicode replacement character | 
 | 	maxRune         = '\U0010FFFF' // Maximum valid Unicode code point. | 
 | ) | 
 |  | 
 | const ( | 
 | 	// 0xd800-0xdc00 encodes the high 10 bits of a pair. | 
 | 	// 0xdc00-0xe000 encodes the low 10 bits of a pair. | 
 | 	// the value is those 20 bits plus 0x10000. | 
 | 	surr1 = 0xd800 | 
 | 	surr2 = 0xdc00 | 
 | 	surr3 = 0xe000 | 
 |  | 
 | 	surrSelf = 0x10000 | 
 | ) | 
 |  | 
 | // IsSurrogate reports whether the specified Unicode code point | 
 | // can appear in a surrogate pair. | 
 | func IsSurrogate(r rune) bool { | 
 | 	return surr1 <= r && r < surr3 | 
 | } | 
 |  | 
 | // DecodeRune returns the UTF-16 decoding of a surrogate pair. | 
 | // If the pair is not a valid UTF-16 surrogate pair, DecodeRune returns | 
 | // the Unicode replacement code point U+FFFD. | 
 | func DecodeRune(r1, r2 rune) rune { | 
 | 	if surr1 <= r1 && r1 < surr2 && surr2 <= r2 && r2 < surr3 { | 
 | 		return (r1-surr1)<<10 | (r2 - surr2) + surrSelf | 
 | 	} | 
 | 	return replacementChar | 
 | } | 
 |  | 
 | // EncodeRune returns the UTF-16 surrogate pair r1, r2 for the given rune. | 
 | // If the rune is not a valid Unicode code point or does not need encoding, | 
 | // EncodeRune returns U+FFFD, U+FFFD. | 
 | func EncodeRune(r rune) (r1, r2 rune) { | 
 | 	if r < surrSelf || r > maxRune { | 
 | 		return replacementChar, replacementChar | 
 | 	} | 
 | 	r -= surrSelf | 
 | 	return surr1 + (r>>10)&0x3ff, surr2 + r&0x3ff | 
 | } | 
 |  | 
 | // Encode returns the UTF-16 encoding of the Unicode code point sequence s. | 
 | func Encode(s []rune) []uint16 { | 
 | 	n := len(s) | 
 | 	for _, v := range s { | 
 | 		if v >= surrSelf { | 
 | 			n++ | 
 | 		} | 
 | 	} | 
 |  | 
 | 	a := make([]uint16, n) | 
 | 	n = 0 | 
 | 	for _, v := range s { | 
 | 		switch { | 
 | 		case 0 <= v && v < surr1, surr3 <= v && v < surrSelf: | 
 | 			// normal rune | 
 | 			a[n] = uint16(v) | 
 | 			n++ | 
 | 		case surrSelf <= v && v <= maxRune: | 
 | 			// needs surrogate sequence | 
 | 			r1, r2 := EncodeRune(v) | 
 | 			a[n] = uint16(r1) | 
 | 			a[n+1] = uint16(r2) | 
 | 			n += 2 | 
 | 		default: | 
 | 			a[n] = uint16(replacementChar) | 
 | 			n++ | 
 | 		} | 
 | 	} | 
 | 	return a[:n] | 
 | } | 
 |  | 
 | // Decode returns the Unicode code point sequence represented | 
 | // by the UTF-16 encoding s. | 
 | func Decode(s []uint16) []rune { | 
 | 	a := make([]rune, len(s)) | 
 | 	n := 0 | 
 | 	for i := 0; i < len(s); i++ { | 
 | 		switch r := s[i]; { | 
 | 		case r < surr1, surr3 <= r: | 
 | 			// normal rune | 
 | 			a[n] = rune(r) | 
 | 		case surr1 <= r && r < surr2 && i+1 < len(s) && | 
 | 			surr2 <= s[i+1] && s[i+1] < surr3: | 
 | 			// valid surrogate sequence | 
 | 			a[n] = DecodeRune(rune(r), rune(s[i+1])) | 
 | 			i++ | 
 | 		default: | 
 | 			// invalid surrogate sequence | 
 | 			a[n] = replacementChar | 
 | 		} | 
 | 		n++ | 
 | 	} | 
 | 	return a[:n] | 
 | } |