blob: b578d553d49f79499691e40b8648db773f10537a [file] [log] [blame]
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package seq
import "unicode/utf16"
// Based heavily on package unicode/utf16 from the Go standard library.
const (
replacementChar = '\uFFFD' // Unicode replacement character
maxRune = '\U0010FFFF' // Maximum valid Unicode code point.
)
const (
// 0xd800-0xdc00 encodes the high 10 bits of a pair.
// 0xdc00-0xe000 encodes the low 10 bits of a pair.
// the value is those 20 bits plus 0x10000.
surr1 = 0xd800
surr2 = 0xdc00
surr3 = 0xe000
surrSelf = 0x10000
)
// UTF16Encode utf16 encodes s into chars. It returns the resulting
// length in units of uint16. It is assumed that the chars slice
// has enough room for the encoded string.
func UTF16Encode(s string, chars []uint16) int {
n := 0
for _, v := range s {
switch {
case v < 0, surr1 <= v && v < surr3, v > maxRune:
v = replacementChar
fallthrough
case v < surrSelf:
chars[n] = uint16(v)
n += 1
default:
// surrogate pair, two uint16 values
r1, r2 := utf16.EncodeRune(v)
chars[n] = uint16(r1)
chars[n+1] = uint16(r2)
n += 2
}
}
return n
}