unicode/norm/normalize.go - text - Git at Google

 // Copyright 2011 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 // Note: the file data_test.go that is generated should not be checked in.
 //go:generate go run maketables.go triegen.go
 //go:generate go test -tags test

 // Package norm contains types and functions for normalizing Unicode strings.
 package norm // import "golang.org/x/text/unicode/norm"

 import (
 	"unicode/utf8"

 	"golang.org/x/text/transform"
 )

 // A Form denotes a canonical representation of Unicode code points.
 // The Unicode-defined normalization and equivalence forms are:
 //
 //	NFC   Unicode Normalization Form C
 //	NFD   Unicode Normalization Form D
 //	NFKC  Unicode Normalization Form KC
 //	NFKD  Unicode Normalization Form KD
 //
 // For a Form f, this documentation uses the notation f(x) to mean
 // the bytes or string x converted to the given form.
 // A position n in x is called a boundary if conversion to the form can
 // proceed independently on both sides:
 //
 //	f(x) == append(f(x[0:n]), f(x[n:])...)
 //
 // References: https://unicode.org/reports/tr15/ and
 // https://unicode.org/notes/tn5/.
 type Form int

 const (
 	NFC Form = iota
 	NFD
 	NFKC
 	NFKD
 )

 // Bytes returns f(b). May return b if f(b) = b.
 func (f Form) Bytes(b []byte) []byte {
 	src := inputBytes(b)
 	ft := formTable[f]
 	n, ok := ft.quickSpan(src, 0, len(b), true)
 	if ok {
 		return b
 	}
 	out := make([]byte, n, len(b))
 	copy(out, b[0:n])
 	rb := reorderBuffer{f: *ft, src: src, nsrc: len(b), out: out, flushF: appendFlush}
 	return doAppendInner(&rb, n)
 }

 // String returns f(s).
 func (f Form) String(s string) string {
 	src := inputString(s)
 	ft := formTable[f]
 	n, ok := ft.quickSpan(src, 0, len(s), true)
 	if ok {
 		return s
 	}
 	out := make([]byte, n, len(s))
 	copy(out, s[0:n])
 	rb := reorderBuffer{f: *ft, src: src, nsrc: len(s), out: out, flushF: appendFlush}
 	return string(doAppendInner(&rb, n))
 }

 // IsNormal returns true if b == f(b).
 func (f Form) IsNormal(b []byte) bool {
 	src := inputBytes(b)
 	ft := formTable[f]
 	bp, ok := ft.quickSpan(src, 0, len(b), true)
 	if ok {
 		return true
 	}
 	rb := reorderBuffer{f: *ft, src: src, nsrc: len(b)}
 	rb.setFlusher(nil, cmpNormalBytes)
 	for bp < len(b) {
 		rb.out = b[bp:]
 		if bp = decomposeSegment(&rb, bp, true); bp < 0 {
 			return false
 		}
 		bp, _ = rb.f.quickSpan(rb.src, bp, len(b), true)
 	}
 	return true
 }

 func cmpNormalBytes(rb *reorderBuffer) bool {
 	b := rb.out
 	for i := 0; i < rb.nrune; i++ {
 		info := rb.rune[i]
 		if int(info.size) > len(b) {
 			return false
 		}
 		p := info.pos
 		pe := p + info.size
 		for ; p < pe; p++ {
 			if b[0] != rb.byte[p] {
 				return false
 			}
 			b = b[1:]
 		}
 	}
 	return true
 }

 // IsNormalString returns true if s == f(s).
 func (f Form) IsNormalString(s string) bool {
 	src := inputString(s)
 	ft := formTable[f]
 	bp, ok := ft.quickSpan(src, 0, len(s), true)
 	if ok {
 		return true
 	}
 	rb := reorderBuffer{f: *ft, src: src, nsrc: len(s)}
 	rb.setFlusher(nil, func(rb *reorderBuffer) bool {
 		for i := 0; i < rb.nrune; i++ {
 			info := rb.rune[i]
 			if bp+int(info.size) > len(s) {
 				return false
 			}
 			p := info.pos
 			pe := p + info.size
 			for ; p < pe; p++ {
 				if s[bp] != rb.byte[p] {
 					return false
 				}
 				bp++
 			}
 		}
 		return true
 	})
 	for bp < len(s) {
 		if bp = decomposeSegment(&rb, bp, true); bp < 0 {
 			return false
 		}
 		bp, _ = rb.f.quickSpan(rb.src, bp, len(s), true)
 	}
 	return true
 }

 // patchTail fixes a case where a rune may be incorrectly normalized
 // if it is followed by illegal continuation bytes. It returns the
 // patched buffer and whether the decomposition is still in progress.
 func patchTail(rb *reorderBuffer) bool {
 	info, p := lastRuneStart(&rb.f, rb.out)
 	if p == -1 || info.size == 0 {
 		return true
 	}
 	end := p + int(info.size)
 	extra := len(rb.out) - end
 	if extra > 0 {
 		// Potentially allocating memory. However, this only
 		// happens with ill-formed UTF-8.
 		x := make([]byte, 0)
 		x = append(x, rb.out[len(rb.out)-extra:]...)
 		rb.out = rb.out[:end]
 		decomposeToLastBoundary(rb)
 		rb.doFlush()
 		rb.out = append(rb.out, x...)
 		return false
 	}
 	buf := rb.out[p:]
 	rb.out = rb.out[:p]
 	decomposeToLastBoundary(rb)
 	if s := rb.ss.next(info); s == ssStarter {
 		rb.doFlush()
 		rb.ss.first(info)
 	} else if s == ssOverflow {
 		rb.doFlush()
 		rb.insertCGJ()
 		rb.ss = 0
 	}
 	rb.insertUnsafe(inputBytes(buf), 0, info)
 	return true
 }

 func appendQuick(rb *reorderBuffer, i int) int {
 	if rb.nsrc == i {
 		return i
 	}
 	end, _ := rb.f.quickSpan(rb.src, i, rb.nsrc, true)
 	rb.out = rb.src.appendSlice(rb.out, i, end)
 	return end
 }

 // Append returns f(append(out, b...)).
 // The buffer out must be nil, empty, or equal to f(out).
 func (f Form) Append(out []byte, src ...byte) []byte {
 	return f.doAppend(out, inputBytes(src), len(src))
 }

 func (f Form) doAppend(out []byte, src input, n int) []byte {
 	if n == 0 {
 		return out
 	}
 	ft := formTable[f]
 	// Attempt to do a quickSpan first so we can avoid initializing the reorderBuffer.
 	if len(out) == 0 {
 		p, _ := ft.quickSpan(src, 0, n, true)
 		out = src.appendSlice(out, 0, p)
 		if p == n {
 			return out
 		}
 		rb := reorderBuffer{f: *ft, src: src, nsrc: n, out: out, flushF: appendFlush}
 		return doAppendInner(&rb, p)
 	}
 	rb := reorderBuffer{f: *ft, src: src, nsrc: n}
 	return doAppend(&rb, out, 0)
 }

 func doAppend(rb *reorderBuffer, out []byte, p int) []byte {
 	rb.setFlusher(out, appendFlush)
 	src, n := rb.src, rb.nsrc
 	doMerge := len(out) > 0
 	if q := src.skipContinuationBytes(p); q > p {
 		// Move leading non-starters to destination.
 		rb.out = src.appendSlice(rb.out, p, q)
 		p = q
 		doMerge = patchTail(rb)
 	}
 	fd := &rb.f
 	if doMerge {
 		var info Properties
 		if p < n {
 			info = fd.info(src, p)
 			if !info.BoundaryBefore() || info.nLeadingNonStarters() > 0 {
 				if p == 0 {
 					decomposeToLastBoundary(rb)
 				}
 				p = decomposeSegment(rb, p, true)
 			}
 		}
 		if info.size == 0 {
 			rb.doFlush()
 			// Append incomplete UTF-8 encoding.
 			return src.appendSlice(rb.out, p, n)
 		}
 		if rb.nrune > 0 {
 			return doAppendInner(rb, p)
 		}
 	}
 	p = appendQuick(rb, p)
 	return doAppendInner(rb, p)
 }

 func doAppendInner(rb *reorderBuffer, p int) []byte {
 	for n := rb.nsrc; p < n; {
 		p = decomposeSegment(rb, p, true)
 		p = appendQuick(rb, p)
 	}
 	return rb.out
 }

 // AppendString returns f(append(out, []byte(s))).
 // The buffer out must be nil, empty, or equal to f(out).
 func (f Form) AppendString(out []byte, src string) []byte {
 	return f.doAppend(out, inputString(src), len(src))
 }

 // QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]).
 // It is not guaranteed to return the largest such n.
 func (f Form) QuickSpan(b []byte) int {
 	n, _ := formTable[f].quickSpan(inputBytes(b), 0, len(b), true)
 	return n
 }

 // Span implements transform.SpanningTransformer. It returns a boundary n such
 // that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n.
 func (f Form) Span(b []byte, atEOF bool) (n int, err error) {
 	n, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), atEOF)
 	if n < len(b) {
 		if !ok {
 			err = transform.ErrEndOfSpan
 		} else {
 			err = transform.ErrShortSrc
 		}
 	}
 	return n, err
 }

 // SpanString returns a boundary n such that s[0:n] == f(s[0:n]).
 // It is not guaranteed to return the largest such n.
 func (f Form) SpanString(s string, atEOF bool) (n int, err error) {
 	n, ok := formTable[f].quickSpan(inputString(s), 0, len(s), atEOF)
 	if n < len(s) {
 		if !ok {
 			err = transform.ErrEndOfSpan
 		} else {
 			err = transform.ErrShortSrc
 		}
 	}
 	return n, err
 }

 // quickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and
 // whether any non-normalized parts were found. If atEOF is false, n will
 // not point past the last segment if this segment might be become
 // non-normalized by appending other runes.
 func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool) {
 	var lastCC uint8
 	ss := streamSafe(0)
 	lastSegStart := i
 	for n = end; i < n; {
 		if j := src.skipASCII(i, n); i != j {
 			i = j
 			lastSegStart = i - 1
 			lastCC = 0
 			ss = 0
 			continue
 		}
 		info := f.info(src, i)
 		if info.size == 0 {
 			if atEOF {
 				// include incomplete runes
 				return n, true
 			}
 			return lastSegStart, true
 		}
 		// This block needs to be before the next, because it is possible to
 		// have an overflow for runes that are starters (e.g. with U+FF9E).
 		switch ss.next(info) {
 		case ssStarter:
 			lastSegStart = i
 		case ssOverflow:
 			return lastSegStart, false
 		case ssSuccess:
 			if lastCC > info.ccc {
 				return lastSegStart, false
 			}
 		}
 		if f.composing {
 			if !info.isYesC() {
 				break
 			}
 		} else {
 			if !info.isYesD() {
 				break
 			}
 		}
 		lastCC = info.ccc
 		i += int(info.size)
 	}
 	if i == n {
 		if !atEOF {
 			n = lastSegStart
 		}
 		return n, true
 	}
 	return lastSegStart, false
 }

 // QuickSpanString returns a boundary n such that s[0:n] == f(s[0:n]).
 // It is not guaranteed to return the largest such n.
 func (f Form) QuickSpanString(s string) int {
 	n, _ := formTable[f].quickSpan(inputString(s), 0, len(s), true)
 	return n
 }

 // FirstBoundary returns the position i of the first boundary in b
 // or -1 if b contains no boundary.
 func (f Form) FirstBoundary(b []byte) int {
 	return f.firstBoundary(inputBytes(b), len(b))
 }

 func (f Form) firstBoundary(src input, nsrc int) int {
 	i := src.skipContinuationBytes(0)
 	if i >= nsrc {
 		return -1
 	}
 	fd := formTable[f]
 	ss := streamSafe(0)
 	// We should call ss.first here, but we can't as the first rune is
 	// skipped already. This means FirstBoundary can't really determine
 	// CGJ insertion points correctly. Luckily it doesn't have to.
 	for {
 		info := fd.info(src, i)
 		if info.size == 0 {
 			return -1
 		}
 		if s := ss.next(info); s != ssSuccess {
 			return i
 		}
 		i += int(info.size)
 		if i >= nsrc {
 			if !info.BoundaryAfter() && !ss.isMax() {
 				return -1
 			}
 			return nsrc
 		}
 	}
 }

 // FirstBoundaryInString returns the position i of the first boundary in s
 // or -1 if s contains no boundary.
 func (f Form) FirstBoundaryInString(s string) int {
 	return f.firstBoundary(inputString(s), len(s))
 }

 // NextBoundary reports the index of the boundary between the first and next
 // segment in b or -1 if atEOF is false and there are not enough bytes to
 // determine this boundary.
 func (f Form) NextBoundary(b []byte, atEOF bool) int {
 	return f.nextBoundary(inputBytes(b), len(b), atEOF)
 }

 // NextBoundaryInString reports the index of the boundary between the first and
 // next segment in b or -1 if atEOF is false and there are not enough bytes to
 // determine this boundary.
 func (f Form) NextBoundaryInString(s string, atEOF bool) int {
 	return f.nextBoundary(inputString(s), len(s), atEOF)
 }

 func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int {
 	if nsrc == 0 {
 		if atEOF {
 			return 0
 		}
 		return -1
 	}
 	fd := formTable[f]
 	info := fd.info(src, 0)
 	if info.size == 0 {
 		if atEOF {
 			return 1
 		}
 		return -1
 	}
 	ss := streamSafe(0)
 	ss.first(info)

 	for i := int(info.size); i < nsrc; i += int(info.size) {
 		info = fd.info(src, i)
 		if info.size == 0 {
 			if atEOF {
 				return i
 			}
 			return -1
 		}
 		// TODO: Using streamSafe to determine the boundary isn't the same as
 		// using BoundaryBefore. Determine which should be used.
 		if s := ss.next(info); s != ssSuccess {
 			return i
 		}
 	}
 	if !atEOF && !info.BoundaryAfter() && !ss.isMax() {
 		return -1
 	}
 	return nsrc
 }

 // LastBoundary returns the position i of the last boundary in b
 // or -1 if b contains no boundary.
 func (f Form) LastBoundary(b []byte) int {
 	return lastBoundary(formTable[f], b)
 }

 func lastBoundary(fd *formInfo, b []byte) int {
 	i := len(b)
 	info, p := lastRuneStart(fd, b)
 	if p == -1 {
 		return -1
 	}
 	if info.size == 0 { // ends with incomplete rune
 		if p == 0 { // starts with incomplete rune
 			return -1
 		}
 		i = p
 		info, p = lastRuneStart(fd, b[:i])
 		if p == -1 { // incomplete UTF-8 encoding or non-starter bytes without a starter
 			return i
 		}
 	}
 	if p+int(info.size) != i { // trailing non-starter bytes: illegal UTF-8
 		return i
 	}
 	if info.BoundaryAfter() {
 		return i
 	}
 	ss := streamSafe(0)
 	v := ss.backwards(info)
 	for i = p; i >= 0 && v != ssStarter; i = p {
 		info, p = lastRuneStart(fd, b[:i])
 		if v = ss.backwards(info); v == ssOverflow {
 			break
 		}
 		if p+int(info.size) != i {
 			if p == -1 { // no boundary found
 				return -1
 			}
 			return i // boundary after an illegal UTF-8 encoding
 		}
 	}
 	return i
 }

 // decomposeSegment scans the first segment in src into rb. It inserts 0x034f
 // (Grapheme Joiner) when it encounters a sequence of more than 30 non-starters
 // and returns the number of bytes consumed from src or iShortDst or iShortSrc.
 func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int {
 	// Force one character to be consumed.
 	info := rb.f.info(rb.src, sp)
 	if info.size == 0 {
 		return 0
 	}
 	if s := rb.ss.next(info); s == ssStarter {
 		// TODO: this could be removed if we don't support merging.
 		if rb.nrune > 0 {
 			goto end
 		}
 	} else if s == ssOverflow {
 		rb.insertCGJ()
 		goto end
 	}
 	if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
 		return int(err)
 	}
 	for {
 		sp += int(info.size)
 		if sp >= rb.nsrc {
 			if !atEOF && !info.BoundaryAfter() {
 				return int(iShortSrc)
 			}
 			break
 		}
 		info = rb.f.info(rb.src, sp)
 		if info.size == 0 {
 			if !atEOF {
 				return int(iShortSrc)
 			}
 			break
 		}
 		if s := rb.ss.next(info); s == ssStarter {
 			break
 		} else if s == ssOverflow {
 			rb.insertCGJ()
 			break
 		}
 		if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
 			return int(err)
 		}
 	}
 end:
 	if !rb.doFlush() {
 		return int(iShortDst)
 	}
 	return sp
 }

 // lastRuneStart returns the runeInfo and position of the last
 // rune in buf or the zero runeInfo and -1 if no rune was found.
 func lastRuneStart(fd *formInfo, buf []byte) (Properties, int) {
 	p := len(buf) - 1
 	for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- {
 	}
 	if p < 0 {
 		return Properties{}, -1
 	}
 	return fd.info(inputBytes(buf), p), p
 }

 // decomposeToLastBoundary finds an open segment at the end of the buffer
 // and scans it into rb. Returns the buffer minus the last segment.
 func decomposeToLastBoundary(rb *reorderBuffer) {
 	fd := &rb.f
 	info, i := lastRuneStart(fd, rb.out)
 	if int(info.size) != len(rb.out)-i {
 		// illegal trailing continuation bytes
 		return
 	}
 	if info.BoundaryAfter() {
 		return
 	}
 	var add [maxNonStarters + 1]Properties // stores runeInfo in reverse order
 	padd := 0
 	ss := streamSafe(0)
 	p := len(rb.out)
 	for {
 		add[padd] = info
 		v := ss.backwards(info)
 		if v == ssOverflow {
 			// Note that if we have an overflow, it the string we are appending to
 			// is not correctly normalized. In this case the behavior is undefined.
 			break
 		}
 		padd++
 		p -= int(info.size)
 		if v == ssStarter || p < 0 {
 			break
 		}
 		info, i = lastRuneStart(fd, rb.out[:p])
 		if int(info.size) != p-i {
 			break
 		}
 	}
 	rb.ss = ss
 	// Copy bytes for insertion as we may need to overwrite rb.out.
 	var buf [maxBufferSize * utf8.UTFMax]byte
 	cp := buf[:copy(buf[:], rb.out[p:])]
 	rb.out = rb.out[:p]
 	for padd--; padd >= 0; padd-- {
 		info = add[padd]
 		rb.insertUnsafe(inputBytes(cp), 0, info)
 		cp = cp[info.size:]
 	}
 }
	// Copyright 2011 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	// Note: the file data_test.go that is generated should not be checked in.
	//go:generate go run maketables.go triegen.go
	//go:generate go test -tags test

	// Package norm contains types and functions for normalizing Unicode strings.
	package norm // import "golang.org/x/text/unicode/norm"

	import (
	"unicode/utf8"

	"golang.org/x/text/transform"
	)

	// A Form denotes a canonical representation of Unicode code points.
	// The Unicode-defined normalization and equivalence forms are:
	//
	// NFC Unicode Normalization Form C
	// NFD Unicode Normalization Form D
	// NFKC Unicode Normalization Form KC
	// NFKD Unicode Normalization Form KD
	//
	// For a Form f, this documentation uses the notation f(x) to mean
	// the bytes or string x converted to the given form.
	// A position n in x is called a boundary if conversion to the form can
	// proceed independently on both sides:
	//
	// f(x) == append(f(x[0:n]), f(x[n:])...)
	//
	// References: https://unicode.org/reports/tr15/ and
	// https://unicode.org/notes/tn5/.
	type Form int

	const (
	NFC Form = iota
	NFD
	NFKC
	NFKD
	)

	// Bytes returns f(b). May return b if f(b) = b.
	func (f Form) Bytes(b []byte) []byte {
	src := inputBytes(b)
	ft := formTable[f]
	n, ok := ft.quickSpan(src, 0, len(b), true)
	if ok {
	return b
	}
	out := make([]byte, n, len(b))
	copy(out, b[0:n])
	rb := reorderBuffer{f: *ft, src: src, nsrc: len(b), out: out, flushF: appendFlush}
	return doAppendInner(&rb, n)
	}

	// String returns f(s).
	func (f Form) String(s string) string {
	src := inputString(s)
	ft := formTable[f]
	n, ok := ft.quickSpan(src, 0, len(s), true)
	if ok {
	return s
	}
	out := make([]byte, n, len(s))
	copy(out, s[0:n])
	rb := reorderBuffer{f: *ft, src: src, nsrc: len(s), out: out, flushF: appendFlush}
	return string(doAppendInner(&rb, n))
	}

	// IsNormal returns true if b == f(b).
	func (f Form) IsNormal(b []byte) bool {
	src := inputBytes(b)
	ft := formTable[f]
	bp, ok := ft.quickSpan(src, 0, len(b), true)
	if ok {
	return true
	}
	rb := reorderBuffer{f: *ft, src: src, nsrc: len(b)}
	rb.setFlusher(nil, cmpNormalBytes)
	for bp < len(b) {
	rb.out = b[bp:]
	if bp = decomposeSegment(&rb, bp, true); bp < 0 {
	return false
	}
	bp, _ = rb.f.quickSpan(rb.src, bp, len(b), true)
	}
	return true
	}

	func cmpNormalBytes(rb *reorderBuffer) bool {
	b := rb.out
	for i := 0; i < rb.nrune; i++ {
	info := rb.rune[i]
	if int(info.size) > len(b) {
	return false
	}
	p := info.pos
	pe := p + info.size
	for ; p < pe; p++ {
	if b[0] != rb.byte[p] {
	return false
	}
	b = b[1:]
	}
	}
	return true
	}

	// IsNormalString returns true if s == f(s).
	func (f Form) IsNormalString(s string) bool {
	src := inputString(s)
	ft := formTable[f]
	bp, ok := ft.quickSpan(src, 0, len(s), true)
	if ok {
	return true
	}
	rb := reorderBuffer{f: *ft, src: src, nsrc: len(s)}
	rb.setFlusher(nil, func(rb *reorderBuffer) bool {
	for i := 0; i < rb.nrune; i++ {
	info := rb.rune[i]
	if bp+int(info.size) > len(s) {
	return false
	}
	p := info.pos
	pe := p + info.size
	for ; p < pe; p++ {
	if s[bp] != rb.byte[p] {
	return false
	}
	bp++
	}
	}
	return true
	})
	for bp < len(s) {
	if bp = decomposeSegment(&rb, bp, true); bp < 0 {
	return false
	}
	bp, _ = rb.f.quickSpan(rb.src, bp, len(s), true)
	}
	return true
	}

	// patchTail fixes a case where a rune may be incorrectly normalized
	// if it is followed by illegal continuation bytes. It returns the
	// patched buffer and whether the decomposition is still in progress.
	func patchTail(rb *reorderBuffer) bool {
	info, p := lastRuneStart(&rb.f, rb.out)
	if p == -1 \|\| info.size == 0 {
	return true
	}
	end := p + int(info.size)
	extra := len(rb.out) - end
	if extra > 0 {
	// Potentially allocating memory. However, this only
	// happens with ill-formed UTF-8.
	x := make([]byte, 0)
	x = append(x, rb.out[len(rb.out)-extra:]...)
	rb.out = rb.out[:end]
	decomposeToLastBoundary(rb)
	rb.doFlush()
	rb.out = append(rb.out, x...)
	return false
	}
	buf := rb.out[p:]
	rb.out = rb.out[:p]
	decomposeToLastBoundary(rb)
	if s := rb.ss.next(info); s == ssStarter {
	rb.doFlush()
	rb.ss.first(info)
	} else if s == ssOverflow {
	rb.doFlush()
	rb.insertCGJ()
	rb.ss = 0
	}
	rb.insertUnsafe(inputBytes(buf), 0, info)
	return true
	}

	func appendQuick(rb *reorderBuffer, i int) int {
	if rb.nsrc == i {
	return i
	}
	end, _ := rb.f.quickSpan(rb.src, i, rb.nsrc, true)
	rb.out = rb.src.appendSlice(rb.out, i, end)
	return end
	}

	// Append returns f(append(out, b...)).
	// The buffer out must be nil, empty, or equal to f(out).
	func (f Form) Append(out []byte, src ...byte) []byte {
	return f.doAppend(out, inputBytes(src), len(src))
	}

	func (f Form) doAppend(out []byte, src input, n int) []byte {
	if n == 0 {
	return out
	}
	ft := formTable[f]
	// Attempt to do a quickSpan first so we can avoid initializing the reorderBuffer.
	if len(out) == 0 {
	p, _ := ft.quickSpan(src, 0, n, true)
	out = src.appendSlice(out, 0, p)
	if p == n {
	return out
	}
	rb := reorderBuffer{f: *ft, src: src, nsrc: n, out: out, flushF: appendFlush}
	return doAppendInner(&rb, p)
	}
	rb := reorderBuffer{f: *ft, src: src, nsrc: n}
	return doAppend(&rb, out, 0)
	}

	func doAppend(rb *reorderBuffer, out []byte, p int) []byte {
	rb.setFlusher(out, appendFlush)
	src, n := rb.src, rb.nsrc
	doMerge := len(out) > 0
	if q := src.skipContinuationBytes(p); q > p {
	// Move leading non-starters to destination.
	rb.out = src.appendSlice(rb.out, p, q)
	p = q
	doMerge = patchTail(rb)
	}
	fd := &rb.f
	if doMerge {
	var info Properties
	if p < n {
	info = fd.info(src, p)
	if !info.BoundaryBefore() \|\| info.nLeadingNonStarters() > 0 {
	if p == 0 {
	decomposeToLastBoundary(rb)
	}
	p = decomposeSegment(rb, p, true)
	}
	}
	if info.size == 0 {
	rb.doFlush()
	// Append incomplete UTF-8 encoding.
	return src.appendSlice(rb.out, p, n)
	}
	if rb.nrune > 0 {
	return doAppendInner(rb, p)
	}
	}
	p = appendQuick(rb, p)
	return doAppendInner(rb, p)
	}

	func doAppendInner(rb *reorderBuffer, p int) []byte {
	for n := rb.nsrc; p < n; {
	p = decomposeSegment(rb, p, true)
	p = appendQuick(rb, p)
	}
	return rb.out
	}

	// AppendString returns f(append(out, []byte(s))).
	// The buffer out must be nil, empty, or equal to f(out).
	func (f Form) AppendString(out []byte, src string) []byte {
	return f.doAppend(out, inputString(src), len(src))
	}

	// QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]).
	// It is not guaranteed to return the largest such n.
	func (f Form) QuickSpan(b []byte) int {
	n, _ := formTable[f].quickSpan(inputBytes(b), 0, len(b), true)
	return n
	}

	// Span implements transform.SpanningTransformer. It returns a boundary n such
	// that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n.
	func (f Form) Span(b []byte, atEOF bool) (n int, err error) {
	n, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), atEOF)
	if n < len(b) {
	if !ok {
	err = transform.ErrEndOfSpan
	} else {
	err = transform.ErrShortSrc
	}
	}
	return n, err
	}

	// SpanString returns a boundary n such that s[0:n] == f(s[0:n]).
	// It is not guaranteed to return the largest such n.
	func (f Form) SpanString(s string, atEOF bool) (n int, err error) {
	n, ok := formTable[f].quickSpan(inputString(s), 0, len(s), atEOF)
	if n < len(s) {
	if !ok {
	err = transform.ErrEndOfSpan
	} else {
	err = transform.ErrShortSrc
	}
	}
	return n, err
	}

	// quickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and
	// whether any non-normalized parts were found. If atEOF is false, n will
	// not point past the last segment if this segment might be become
	// non-normalized by appending other runes.
	func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool) {
	var lastCC uint8
	ss := streamSafe(0)
	lastSegStart := i
	for n = end; i < n; {
	if j := src.skipASCII(i, n); i != j {
	i = j
	lastSegStart = i - 1
	lastCC = 0
	ss = 0
	continue
	}
	info := f.info(src, i)
	if info.size == 0 {
	if atEOF {
	// include incomplete runes
	return n, true
	}
	return lastSegStart, true
	}
	// This block needs to be before the next, because it is possible to
	// have an overflow for runes that are starters (e.g. with U+FF9E).
	switch ss.next(info) {
	case ssStarter:
	lastSegStart = i
	case ssOverflow:
	return lastSegStart, false
	case ssSuccess:
	if lastCC > info.ccc {
	return lastSegStart, false
	}
	}
	if f.composing {
	if !info.isYesC() {
	break
	}
	} else {
	if !info.isYesD() {
	break
	}
	}
	lastCC = info.ccc
	i += int(info.size)
	}
	if i == n {
	if !atEOF {
	n = lastSegStart
	}
	return n, true
	}
	return lastSegStart, false
	}

	// QuickSpanString returns a boundary n such that s[0:n] == f(s[0:n]).
	// It is not guaranteed to return the largest such n.
	func (f Form) QuickSpanString(s string) int {
	n, _ := formTable[f].quickSpan(inputString(s), 0, len(s), true)
	return n
	}

	// FirstBoundary returns the position i of the first boundary in b
	// or -1 if b contains no boundary.
	func (f Form) FirstBoundary(b []byte) int {
	return f.firstBoundary(inputBytes(b), len(b))
	}

	func (f Form) firstBoundary(src input, nsrc int) int {
	i := src.skipContinuationBytes(0)
	if i >= nsrc {
	return -1
	}
	fd := formTable[f]
	ss := streamSafe(0)
	// We should call ss.first here, but we can't as the first rune is
	// skipped already. This means FirstBoundary can't really determine
	// CGJ insertion points correctly. Luckily it doesn't have to.
	for {
	info := fd.info(src, i)
	if info.size == 0 {
	return -1
	}
	if s := ss.next(info); s != ssSuccess {
	return i
	}
	i += int(info.size)
	if i >= nsrc {
	if !info.BoundaryAfter() && !ss.isMax() {
	return -1
	}
	return nsrc
	}
	}
	}

	// FirstBoundaryInString returns the position i of the first boundary in s
	// or -1 if s contains no boundary.
	func (f Form) FirstBoundaryInString(s string) int {
	return f.firstBoundary(inputString(s), len(s))
	}

	// NextBoundary reports the index of the boundary between the first and next
	// segment in b or -1 if atEOF is false and there are not enough bytes to
	// determine this boundary.
	func (f Form) NextBoundary(b []byte, atEOF bool) int {
	return f.nextBoundary(inputBytes(b), len(b), atEOF)
	}

	// NextBoundaryInString reports the index of the boundary between the first and
	// next segment in b or -1 if atEOF is false and there are not enough bytes to
	// determine this boundary.
	func (f Form) NextBoundaryInString(s string, atEOF bool) int {
	return f.nextBoundary(inputString(s), len(s), atEOF)
	}

	func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int {
	if nsrc == 0 {
	if atEOF {
	return 0
	}
	return -1
	}
	fd := formTable[f]
	info := fd.info(src, 0)
	if info.size == 0 {
	if atEOF {
	return 1
	}
	return -1
	}
	ss := streamSafe(0)
	ss.first(info)

	for i := int(info.size); i < nsrc; i += int(info.size) {
	info = fd.info(src, i)
	if info.size == 0 {
	if atEOF {
	return i
	}
	return -1
	}
	// TODO: Using streamSafe to determine the boundary isn't the same as
	// using BoundaryBefore. Determine which should be used.
	if s := ss.next(info); s != ssSuccess {
	return i
	}
	}
	if !atEOF && !info.BoundaryAfter() && !ss.isMax() {
	return -1
	}
	return nsrc
	}

	// LastBoundary returns the position i of the last boundary in b
	// or -1 if b contains no boundary.
	func (f Form) LastBoundary(b []byte) int {
	return lastBoundary(formTable[f], b)
	}

	func lastBoundary(fd *formInfo, b []byte) int {
	i := len(b)
	info, p := lastRuneStart(fd, b)
	if p == -1 {
	return -1
	}
	if info.size == 0 { // ends with incomplete rune
	if p == 0 { // starts with incomplete rune
	return -1
	}
	i = p
	info, p = lastRuneStart(fd, b[:i])
	if p == -1 { // incomplete UTF-8 encoding or non-starter bytes without a starter
	return i
	}
	}
	if p+int(info.size) != i { // trailing non-starter bytes: illegal UTF-8
	return i
	}
	if info.BoundaryAfter() {
	return i
	}
	ss := streamSafe(0)
	v := ss.backwards(info)
	for i = p; i >= 0 && v != ssStarter; i = p {
	info, p = lastRuneStart(fd, b[:i])
	if v = ss.backwards(info); v == ssOverflow {
	break
	}
	if p+int(info.size) != i {
	if p == -1 { // no boundary found
	return -1
	}
	return i // boundary after an illegal UTF-8 encoding
	}
	}
	return i
	}

	// decomposeSegment scans the first segment in src into rb. It inserts 0x034f
	// (Grapheme Joiner) when it encounters a sequence of more than 30 non-starters
	// and returns the number of bytes consumed from src or iShortDst or iShortSrc.
	func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int {
	// Force one character to be consumed.
	info := rb.f.info(rb.src, sp)
	if info.size == 0 {
	return 0
	}
	if s := rb.ss.next(info); s == ssStarter {
	// TODO: this could be removed if we don't support merging.
	if rb.nrune > 0 {
	goto end
	}
	} else if s == ssOverflow {
	rb.insertCGJ()
	goto end
	}
	if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
	return int(err)
	}
	for {
	sp += int(info.size)
	if sp >= rb.nsrc {
	if !atEOF && !info.BoundaryAfter() {
	return int(iShortSrc)
	}
	break
	}
	info = rb.f.info(rb.src, sp)
	if info.size == 0 {
	if !atEOF {
	return int(iShortSrc)
	}
	break
	}
	if s := rb.ss.next(info); s == ssStarter {
	break
	} else if s == ssOverflow {
	rb.insertCGJ()
	break
	}
	if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
	return int(err)
	}
	}
	end:
	if !rb.doFlush() {
	return int(iShortDst)
	}
	return sp
	}

	// lastRuneStart returns the runeInfo and position of the last
	// rune in buf or the zero runeInfo and -1 if no rune was found.
	func lastRuneStart(fd *formInfo, buf []byte) (Properties, int) {
	p := len(buf) - 1
	for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- {
	}
	if p < 0 {
	return Properties{}, -1
	}
	return fd.info(inputBytes(buf), p), p
	}

	// decomposeToLastBoundary finds an open segment at the end of the buffer
	// and scans it into rb. Returns the buffer minus the last segment.
	func decomposeToLastBoundary(rb *reorderBuffer) {
	fd := &rb.f
	info, i := lastRuneStart(fd, rb.out)
	if int(info.size) != len(rb.out)-i {
	// illegal trailing continuation bytes
	return
	}
	if info.BoundaryAfter() {
	return
	}
	var add [maxNonStarters + 1]Properties // stores runeInfo in reverse order
	padd := 0
	ss := streamSafe(0)
	p := len(rb.out)
	for {
	add[padd] = info
	v := ss.backwards(info)
	if v == ssOverflow {
	// Note that if we have an overflow, it the string we are appending to
	// is not correctly normalized. In this case the behavior is undefined.
	break
	}
	padd++
	p -= int(info.size)
	if v == ssStarter \|\| p < 0 {
	break
	}
	info, i = lastRuneStart(fd, rb.out[:p])
	if int(info.size) != p-i {
	break
	}
	}
	rb.ss = ss
	// Copy bytes for insertion as we may need to overwrite rb.out.
	var buf [maxBufferSize * utf8.UTFMax]byte
	cp := buf[:copy(buf[:], rb.out[p:])]
	rb.out = rb.out[:p]
	for padd--; padd >= 0; padd-- {
	info = add[padd]
	rb.insertUnsafe(inputBytes(cp), 0, info)
	cp = cp[info.size:]
	}
	}