blob: f9122d6db01e5ea11a975599fb0f840438b5a121 [file] [log] [blame] [edit]
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package width
import (
"bytes"
"strings"
"testing"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/transform"
)
func foldRune(r rune) (folded rune, ok bool) {
alt, ok := mapRunes[r]
if ok && alt.e&tagNeedsFold != 0 {
return alt.r, true
}
return r, false
}
func widenRune(r rune) (wide rune, ok bool) {
alt, ok := mapRunes[r]
if k := alt.e.kind(); k == EastAsianHalfwidth || k == EastAsianNarrow {
return alt.r, true
}
return r, false
}
func narrowRune(r rune) (narrow rune, ok bool) {
alt, ok := mapRunes[r]
if k := alt.e.kind(); k == EastAsianFullwidth || k == EastAsianWide || k == EastAsianAmbiguous {
return alt.r, true
}
return r, false
}
func TestFoldSingleRunes(t *testing.T) {
for r := rune(0); r < 0x1FFFF; r++ {
if loSurrogate <= r && r <= hiSurrogate {
continue
}
x, _ := foldRune(r)
want := string(x)
got := Fold.String(string(r))
if got != want {
t.Errorf("Fold().String(%U) = %+q; want %+q", r, got, want)
}
}
}
type transformTest struct {
desc string
src string
nBuf int
nDst int
atEOF bool
dst string
nSrc int
err error
nSpan int
errSpan error
}
func (tc *transformTest) doTest(t *testing.T, tr Transformer) {
testtext.Run(t, tc.desc, func(t *testing.T) {
b := make([]byte, tc.nBuf)
nDst, nSrc, err := tr.Transform(b, []byte(tc.src), tc.atEOF)
if got := string(b[:nDst]); got != tc.dst[:nDst] {
t.Errorf("dst was %+q; want %+q", got, tc.dst)
}
if nDst != tc.nDst {
t.Errorf("nDst was %d; want %d", nDst, tc.nDst)
}
if nSrc != tc.nSrc {
t.Errorf("nSrc was %d; want %d", nSrc, tc.nSrc)
}
if err != tc.err {
t.Errorf("error was %v; want %v", err, tc.err)
}
if got := tr.String(tc.src); got != tc.dst {
t.Errorf("String(%q) = %q; want %q", tc.src, got, tc.dst)
}
n, err := tr.Span([]byte(tc.src), tc.atEOF)
if n != tc.nSpan || err != tc.errSpan {
t.Errorf("Span: got %d, %v; want %d, %v", n, err, tc.nSpan, tc.errSpan)
}
})
}
func TestFold(t *testing.T) {
for _, tc := range []transformTest{{
desc: "empty",
src: "",
nBuf: 10,
dst: "",
nDst: 0,
nSrc: 0,
atEOF: false,
err: nil,
nSpan: 0,
errSpan: nil,
}, {
desc: "short source 1",
src: "a\xc2",
nBuf: 10,
dst: "a\xc2",
nDst: 1,
nSrc: 1,
atEOF: false,
err: transform.ErrShortSrc,
nSpan: 1,
errSpan: transform.ErrShortSrc,
}, {
desc: "short source 2",
src: "a\xe0\x80",
nBuf: 10,
dst: "a\xe0\x80",
nDst: 1,
nSrc: 1,
atEOF: false,
err: transform.ErrShortSrc,
nSpan: 1,
errSpan: transform.ErrShortSrc,
}, {
desc: "incomplete but terminated source 1",
src: "a\xc2",
nBuf: 10,
dst: "a\xc2",
nDst: 2,
nSrc: 2,
atEOF: true,
err: nil,
nSpan: 2,
errSpan: nil,
}, {
desc: "incomplete but terminated source 2",
src: "a\xe0\x80",
nBuf: 10,
dst: "a\xe0\x80",
nDst: 3,
nSrc: 3,
atEOF: true,
err: nil,
nSpan: 3,
errSpan: nil,
}, {
desc: "exact fit dst",
src: "a\uff01",
nBuf: 2,
dst: "a!",
nDst: 2,
nSrc: 4,
atEOF: false,
err: nil,
nSpan: 1,
errSpan: transform.ErrEndOfSpan,
}, {
desc: "exact fit dst and src ascii",
src: "ab",
nBuf: 2,
dst: "ab",
nDst: 2,
nSrc: 2,
atEOF: true,
err: nil,
nSpan: 2,
errSpan: nil,
}, {
desc: "empty dst",
src: "\u0300",
nBuf: 0,
dst: "\u0300",
nDst: 0,
nSrc: 0,
atEOF: true,
err: transform.ErrShortDst,
nSpan: 2,
errSpan: nil,
}, {
desc: "empty dst ascii",
src: "a",
nBuf: 0,
dst: "a",
nDst: 0,
nSrc: 0,
atEOF: true,
err: transform.ErrShortDst,
nSpan: 1,
errSpan: nil,
}, {
desc: "short dst 1",
src: "a\uffe0", // ¢
nBuf: 2,
dst: "a\u00a2", // ¢
nDst: 1,
nSrc: 1,
atEOF: false,
err: transform.ErrShortDst,
nSpan: 1,
errSpan: transform.ErrEndOfSpan,
}, {
desc: "short dst 2",
src: "不夠",
nBuf: 3,
dst: "不夠",
nDst: 3,
nSrc: 3,
atEOF: true,
err: transform.ErrShortDst,
nSpan: 6,
errSpan: nil,
}, {
desc: "short dst fast path",
src: "fast",
nDst: 3,
dst: "fast",
nBuf: 3,
nSrc: 3,
atEOF: true,
err: transform.ErrShortDst,
nSpan: 4,
errSpan: nil,
}, {
desc: "short dst larger buffer",
src: "\uff21" + strings.Repeat("0", 127) + "B",
nBuf: 128,
dst: "A" + strings.Repeat("0", 127) + "B",
nDst: 128,
nSrc: 130,
atEOF: true,
err: transform.ErrShortDst,
nSpan: 0,
errSpan: transform.ErrEndOfSpan,
}, {
desc: "fast path alternation",
src: "fast路徑fast路徑",
nBuf: 20,
dst: "fast路徑fast路徑",
nDst: 20,
nSrc: 20,
atEOF: true,
err: nil,
nSpan: 20,
errSpan: nil,
}} {
tc.doTest(t, Fold)
}
}
func TestWidenSingleRunes(t *testing.T) {
for r := rune(0); r < 0x1FFFF; r++ {
if loSurrogate <= r && r <= hiSurrogate {
continue
}
alt, _ := widenRune(r)
want := string(alt)
got := Widen.String(string(r))
if got != want {
t.Errorf("Widen().String(%U) = %+q; want %+q", r, got, want)
}
}
}
func TestWiden(t *testing.T) {
for _, tc := range []transformTest{{
desc: "empty",
src: "",
nBuf: 10,
dst: "",
nDst: 0,
nSrc: 0,
atEOF: false,
err: nil,
nSpan: 0,
errSpan: nil,
}, {
desc: "short source 1",
src: "a\xc2",
nBuf: 10,
dst: "a\xc2",
nDst: 3,
nSrc: 1,
atEOF: false,
err: transform.ErrShortSrc,
nSpan: 0,
errSpan: transform.ErrEndOfSpan,
}, {
desc: "short source 2",
src: "a\xe0\x80",
nBuf: 10,
dst: "a\xe0\x80",
nDst: 3,
nSrc: 1,
atEOF: false,
err: transform.ErrShortSrc,
nSpan: 0,
errSpan: transform.ErrEndOfSpan,
}, {
desc: "incomplete but terminated source 1",
src: "a\xc2",
nBuf: 10,
dst: "a\xc2",
nDst: 4,
nSrc: 2,
atEOF: true,
err: nil,
nSpan: 0,
errSpan: transform.ErrEndOfSpan,
}, {
desc: "incomplete but terminated source 2",
src: "a\xe0\x80",
nBuf: 10,
dst: "a\xe0\x80",
nDst: 5,
nSrc: 3,
atEOF: true,
err: nil,
nSpan: 0,
errSpan: transform.ErrEndOfSpan,
}, {
desc: "short source 1 some span",
src: "a\xc2",
nBuf: 10,
dst: "a\xc2",
nDst: 3,
nSrc: 3,
atEOF: false,
err: transform.ErrShortSrc,
nSpan: 3,
errSpan: transform.ErrShortSrc,
}, {
desc: "short source 2 some span",
src: "a\xe0\x80",
nBuf: 10,
dst: "a\xe0\x80",
nDst: 3,
nSrc: 3,
atEOF: false,
err: transform.ErrShortSrc,
nSpan: 3,
errSpan: transform.ErrShortSrc,
}, {
desc: "incomplete but terminated source 1 some span",
src: "a\xc2",
nBuf: 10,
dst: "a\xc2",
nDst: 4,
nSrc: 4,
atEOF: true,
err: nil,
nSpan: 4,
errSpan: nil,
}, {
desc: "incomplete but terminated source 2 some span",
src: "a\xe0\x80",
nBuf: 10,
dst: "a\xe0\x80",
nDst: 5,
nSrc: 5,
atEOF: true,
err: nil,
nSpan: 5,
errSpan: nil,
}, {
desc: "exact fit dst",
src: "a!",
nBuf: 6,
dst: "a\uff01",
nDst: 6,
nSrc: 2,
atEOF: false,
err: nil,
nSpan: 0,
errSpan: transform.ErrEndOfSpan,
}, {
desc: "empty dst",
src: "\u0300",
nBuf: 0,
dst: "\u0300",
nDst: 0,
nSrc: 0,
atEOF: true,
err: transform.ErrShortDst,
nSpan: 2,
errSpan: nil,
}, {
desc: "empty dst ascii",
src: "a",
nBuf: 0,
dst: "a",
nDst: 0,
nSrc: 0,
atEOF: true,
err: transform.ErrShortDst,
nSpan: 0,
errSpan: transform.ErrEndOfSpan,
}, {
desc: "short dst 1",
src: "a\uffe0",
nBuf: 4,
dst: "a\uffe0",
nDst: 3,
nSrc: 1,
atEOF: false,
err: transform.ErrShortDst,
nSpan: 0,
errSpan: transform.ErrEndOfSpan,
}, {
desc: "short dst 2",
src: "不夠",
nBuf: 3,
dst: "不夠",
nDst: 3,
nSrc: 3,
atEOF: true,
err: transform.ErrShortDst,
nSpan: 6,
errSpan: nil,
}, {
desc: "short dst ascii",
src: "ascii",
nBuf: 3,
dst: "ascii", // U+ff41, ...
nDst: 3,
nSrc: 1,
atEOF: true,
err: transform.ErrShortDst,
nSpan: 0,
errSpan: transform.ErrEndOfSpan,
}, {
desc: "ambiguous",
src: "\uffe9",
nBuf: 4,
dst: "\u2190",
nDst: 3,
nSrc: 3,
atEOF: false,
err: nil,
nSpan: 0,
errSpan: transform.ErrEndOfSpan,
}} {
tc.doTest(t, Widen)
}
}
func TestNarrowSingleRunes(t *testing.T) {
for r := rune(0); r < 0x1FFFF; r++ {
if loSurrogate <= r && r <= hiSurrogate {
continue
}
alt, _ := narrowRune(r)
want := string(alt)
got := Narrow.String(string(r))
if got != want {
t.Errorf("Narrow().String(%U) = %+q; want %+q", r, got, want)
}
}
}
func TestNarrow(t *testing.T) {
for _, tc := range []transformTest{{
desc: "empty",
src: "",
nBuf: 10,
dst: "",
nDst: 0,
nSrc: 0,
atEOF: false,
err: nil,
nSpan: 0,
errSpan: nil,
}, {
desc: "short source 1",
src: "a\xc2",
nBuf: 10,
dst: "a\xc2",
nDst: 1,
nSrc: 1,
atEOF: false,
err: transform.ErrShortSrc,
nSpan: 1,
errSpan: transform.ErrShortSrc,
}, {
desc: "short source 2",
src: "a\xe0\x80",
nBuf: 10,
dst: "a\xe0\x80",
nDst: 1,
nSrc: 3,
atEOF: false,
err: transform.ErrShortSrc,
nSpan: 0,
errSpan: transform.ErrEndOfSpan,
}, {
desc: "incomplete but terminated source 1",
src: "a\xc2",
nBuf: 10,
dst: "a\xc2",
nDst: 2,
nSrc: 4,
atEOF: true,
err: nil,
nSpan: 0,
errSpan: transform.ErrEndOfSpan,
}, {
desc: "incomplete but terminated source 2",
src: "a\xe0\x80",
nBuf: 10,
dst: "a\xe0\x80",
nDst: 3,
nSrc: 5,
atEOF: true,
err: nil,
nSpan: 0,
errSpan: transform.ErrEndOfSpan,
}, {
desc: "exact fit dst",
src: "a\uff01",
nBuf: 2,
dst: "a!",
nDst: 2,
nSrc: 6,
atEOF: false,
err: nil,
nSpan: 0,
errSpan: transform.ErrEndOfSpan,
}, {
desc: "exact fit dst some span",
src: "a\uff01",
nBuf: 2,
dst: "a!",
nDst: 2,
nSrc: 4,
atEOF: false,
err: nil,
nSpan: 1,
errSpan: transform.ErrEndOfSpan,
}, {
desc: "empty dst",
src: "\u0300",
nBuf: 0,
dst: "\u0300",
nDst: 0,
nSrc: 0,
atEOF: true,
err: transform.ErrShortDst,
nSpan: 2,
errSpan: nil,
}, {
desc: "empty dst ascii",
src: "a",
nBuf: 0,
dst: "a",
nDst: 0,
nSrc: 0,
atEOF: true,
err: transform.ErrShortDst,
nSpan: 1,
errSpan: nil,
}, {
desc: "short dst 1",
src: "a\uffe0", // ¢
nBuf: 2,
dst: "a\u00a2", // ¢
nDst: 1,
nSrc: 3,
atEOF: false,
err: transform.ErrShortDst,
nSpan: 0,
errSpan: transform.ErrEndOfSpan,
}, {
desc: "short dst 2",
src: "不夠",
nBuf: 3,
dst: "不夠",
nDst: 3,
nSrc: 3,
atEOF: true,
err: transform.ErrShortDst,
nSpan: 6,
errSpan: nil,
}, {
// Create a narrow variant of ambiguous runes, if they exist.
desc: "ambiguous",
src: "\u2190",
nBuf: 4,
dst: "\uffe9",
nDst: 3,
nSrc: 3,
atEOF: false,
err: nil,
nSpan: 0,
errSpan: transform.ErrEndOfSpan,
}, {
desc: "short dst fast path",
src: "fast",
nBuf: 3,
dst: "fast",
nDst: 3,
nSrc: 3,
atEOF: true,
err: transform.ErrShortDst,
nSpan: 4,
errSpan: nil,
}, {
desc: "short dst larger buffer",
src: "\uff21" + strings.Repeat("0", 127) + "B",
nBuf: 128,
dst: "A" + strings.Repeat("0", 127) + "B",
nDst: 128,
nSrc: 130,
atEOF: true,
err: transform.ErrShortDst,
nSpan: 0,
errSpan: transform.ErrEndOfSpan,
}, {
desc: "fast path alternation",
src: "fast路徑fast路徑",
nBuf: 20,
dst: "fast路徑fast路徑",
nDst: 20,
nSrc: 20,
atEOF: true,
err: nil,
nSpan: 20,
errSpan: nil,
}} {
tc.doTest(t, Narrow)
}
}
func bench(b *testing.B, t Transformer, s string) {
dst := make([]byte, 1024)
src := []byte(s)
b.SetBytes(int64(len(src)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
t.Transform(dst, src, true)
}
}
func changingRunes(f func(r rune) (rune, bool)) string {
buf := &bytes.Buffer{}
for r := rune(0); r <= 0xFFFF; r++ {
if _, ok := foldRune(r); ok {
buf.WriteRune(r)
}
}
return buf.String()
}
func BenchmarkFoldASCII(b *testing.B) {
bench(b, Fold, testtext.ASCII)
}
func BenchmarkFoldCJK(b *testing.B) {
bench(b, Fold, testtext.CJK)
}
func BenchmarkFoldNonCanonical(b *testing.B) {
bench(b, Fold, changingRunes(foldRune))
}
func BenchmarkFoldOther(b *testing.B) {
bench(b, Fold, testtext.TwoByteUTF8+testtext.ThreeByteUTF8)
}
func BenchmarkWideASCII(b *testing.B) {
bench(b, Widen, testtext.ASCII)
}
func BenchmarkWideCJK(b *testing.B) {
bench(b, Widen, testtext.CJK)
}
func BenchmarkWideNonCanonical(b *testing.B) {
bench(b, Widen, changingRunes(widenRune))
}
func BenchmarkWideOther(b *testing.B) {
bench(b, Widen, testtext.TwoByteUTF8+testtext.ThreeByteUTF8)
}
func BenchmarkNarrowASCII(b *testing.B) {
bench(b, Narrow, testtext.ASCII)
}
func BenchmarkNarrowCJK(b *testing.B) {
bench(b, Narrow, testtext.CJK)
}
func BenchmarkNarrowNonCanonical(b *testing.B) {
bench(b, Narrow, changingRunes(narrowRune))
}
func BenchmarkNarrowOther(b *testing.B) {
bench(b, Narrow, testtext.TwoByteUTF8+testtext.ThreeByteUTF8)
}