| // Copyright 2015 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package width |
| |
| import ( |
| "bytes" |
| "strings" |
| "testing" |
| |
| "golang.org/x/text/internal/testtext" |
| "golang.org/x/text/transform" |
| ) |
| |
| func foldRune(r rune) (folded rune, ok bool) { |
| alt, ok := mapRunes[r] |
| if ok && alt.e&tagNeedsFold != 0 { |
| return alt.r, true |
| } |
| return r, false |
| } |
| |
| func widenRune(r rune) (wide rune, ok bool) { |
| alt, ok := mapRunes[r] |
| if k := alt.e.kind(); k == EastAsianHalfwidth || k == EastAsianNarrow { |
| return alt.r, true |
| } |
| return r, false |
| } |
| |
| func narrowRune(r rune) (narrow rune, ok bool) { |
| alt, ok := mapRunes[r] |
| if k := alt.e.kind(); k == EastAsianFullwidth || k == EastAsianWide || k == EastAsianAmbiguous { |
| return alt.r, true |
| } |
| return r, false |
| } |
| |
| func TestFoldSingleRunes(t *testing.T) { |
| for r := rune(0); r < 0x1FFFF; r++ { |
| if loSurrogate <= r && r <= hiSurrogate { |
| continue |
| } |
| x, _ := foldRune(r) |
| want := string(x) |
| got := Fold.String(string(r)) |
| if got != want { |
| t.Errorf("Fold().String(%U) = %+q; want %+q", r, got, want) |
| } |
| } |
| } |
| |
| type transformTest struct { |
| desc string |
| src string |
| nBuf int |
| nDst int |
| atEOF bool |
| dst string |
| nSrc int |
| err error |
| nSpan int |
| errSpan error |
| } |
| |
| func (tc *transformTest) doTest(t *testing.T, tr Transformer) { |
| testtext.Run(t, tc.desc, func(t *testing.T) { |
| b := make([]byte, tc.nBuf) |
| nDst, nSrc, err := tr.Transform(b, []byte(tc.src), tc.atEOF) |
| if got := string(b[:nDst]); got != tc.dst[:nDst] { |
| t.Errorf("dst was %+q; want %+q", got, tc.dst) |
| } |
| if nDst != tc.nDst { |
| t.Errorf("nDst was %d; want %d", nDst, tc.nDst) |
| } |
| if nSrc != tc.nSrc { |
| t.Errorf("nSrc was %d; want %d", nSrc, tc.nSrc) |
| } |
| if err != tc.err { |
| t.Errorf("error was %v; want %v", err, tc.err) |
| } |
| if got := tr.String(tc.src); got != tc.dst { |
| t.Errorf("String(%q) = %q; want %q", tc.src, got, tc.dst) |
| } |
| n, err := tr.Span([]byte(tc.src), tc.atEOF) |
| if n != tc.nSpan || err != tc.errSpan { |
| t.Errorf("Span: got %d, %v; want %d, %v", n, err, tc.nSpan, tc.errSpan) |
| } |
| }) |
| } |
| |
| func TestFold(t *testing.T) { |
| for _, tc := range []transformTest{{ |
| desc: "empty", |
| src: "", |
| nBuf: 10, |
| dst: "", |
| nDst: 0, |
| nSrc: 0, |
| atEOF: false, |
| err: nil, |
| nSpan: 0, |
| errSpan: nil, |
| }, { |
| desc: "short source 1", |
| src: "a\xc2", |
| nBuf: 10, |
| dst: "a\xc2", |
| nDst: 1, |
| nSrc: 1, |
| atEOF: false, |
| err: transform.ErrShortSrc, |
| nSpan: 1, |
| errSpan: transform.ErrShortSrc, |
| }, { |
| desc: "short source 2", |
| src: "a\xe0\x80", |
| nBuf: 10, |
| dst: "a\xe0\x80", |
| nDst: 1, |
| nSrc: 1, |
| atEOF: false, |
| err: transform.ErrShortSrc, |
| nSpan: 1, |
| errSpan: transform.ErrShortSrc, |
| }, { |
| desc: "incomplete but terminated source 1", |
| src: "a\xc2", |
| nBuf: 10, |
| dst: "a\xc2", |
| nDst: 2, |
| nSrc: 2, |
| atEOF: true, |
| err: nil, |
| nSpan: 2, |
| errSpan: nil, |
| }, { |
| desc: "incomplete but terminated source 2", |
| src: "a\xe0\x80", |
| nBuf: 10, |
| dst: "a\xe0\x80", |
| nDst: 3, |
| nSrc: 3, |
| atEOF: true, |
| err: nil, |
| nSpan: 3, |
| errSpan: nil, |
| }, { |
| desc: "exact fit dst", |
| src: "a\uff01", |
| nBuf: 2, |
| dst: "a!", |
| nDst: 2, |
| nSrc: 4, |
| atEOF: false, |
| err: nil, |
| nSpan: 1, |
| errSpan: transform.ErrEndOfSpan, |
| }, { |
| desc: "exact fit dst and src ascii", |
| src: "ab", |
| nBuf: 2, |
| dst: "ab", |
| nDst: 2, |
| nSrc: 2, |
| atEOF: true, |
| err: nil, |
| nSpan: 2, |
| errSpan: nil, |
| }, { |
| desc: "empty dst", |
| src: "\u0300", |
| nBuf: 0, |
| dst: "\u0300", |
| nDst: 0, |
| nSrc: 0, |
| atEOF: true, |
| err: transform.ErrShortDst, |
| nSpan: 2, |
| errSpan: nil, |
| }, { |
| desc: "empty dst ascii", |
| src: "a", |
| nBuf: 0, |
| dst: "a", |
| nDst: 0, |
| nSrc: 0, |
| atEOF: true, |
| err: transform.ErrShortDst, |
| nSpan: 1, |
| errSpan: nil, |
| }, { |
| desc: "short dst 1", |
| src: "a\uffe0", // ¢ |
| nBuf: 2, |
| dst: "a\u00a2", // ¢ |
| nDst: 1, |
| nSrc: 1, |
| atEOF: false, |
| err: transform.ErrShortDst, |
| nSpan: 1, |
| errSpan: transform.ErrEndOfSpan, |
| }, { |
| desc: "short dst 2", |
| src: "不夠", |
| nBuf: 3, |
| dst: "不夠", |
| nDst: 3, |
| nSrc: 3, |
| atEOF: true, |
| err: transform.ErrShortDst, |
| nSpan: 6, |
| errSpan: nil, |
| }, { |
| desc: "short dst fast path", |
| src: "fast", |
| nDst: 3, |
| dst: "fast", |
| nBuf: 3, |
| nSrc: 3, |
| atEOF: true, |
| err: transform.ErrShortDst, |
| nSpan: 4, |
| errSpan: nil, |
| }, { |
| desc: "short dst larger buffer", |
| src: "\uff21" + strings.Repeat("0", 127) + "B", |
| nBuf: 128, |
| dst: "A" + strings.Repeat("0", 127) + "B", |
| nDst: 128, |
| nSrc: 130, |
| atEOF: true, |
| err: transform.ErrShortDst, |
| nSpan: 0, |
| errSpan: transform.ErrEndOfSpan, |
| }, { |
| desc: "fast path alternation", |
| src: "fast路徑fast路徑", |
| nBuf: 20, |
| dst: "fast路徑fast路徑", |
| nDst: 20, |
| nSrc: 20, |
| atEOF: true, |
| err: nil, |
| nSpan: 20, |
| errSpan: nil, |
| }} { |
| tc.doTest(t, Fold) |
| } |
| } |
| |
| func TestWidenSingleRunes(t *testing.T) { |
| for r := rune(0); r < 0x1FFFF; r++ { |
| if loSurrogate <= r && r <= hiSurrogate { |
| continue |
| } |
| alt, _ := widenRune(r) |
| want := string(alt) |
| got := Widen.String(string(r)) |
| if got != want { |
| t.Errorf("Widen().String(%U) = %+q; want %+q", r, got, want) |
| } |
| } |
| } |
| |
| func TestWiden(t *testing.T) { |
| for _, tc := range []transformTest{{ |
| desc: "empty", |
| src: "", |
| nBuf: 10, |
| dst: "", |
| nDst: 0, |
| nSrc: 0, |
| atEOF: false, |
| err: nil, |
| nSpan: 0, |
| errSpan: nil, |
| }, { |
| desc: "short source 1", |
| src: "a\xc2", |
| nBuf: 10, |
| dst: "a\xc2", |
| nDst: 3, |
| nSrc: 1, |
| atEOF: false, |
| err: transform.ErrShortSrc, |
| nSpan: 0, |
| errSpan: transform.ErrEndOfSpan, |
| }, { |
| desc: "short source 2", |
| src: "a\xe0\x80", |
| nBuf: 10, |
| dst: "a\xe0\x80", |
| nDst: 3, |
| nSrc: 1, |
| atEOF: false, |
| err: transform.ErrShortSrc, |
| nSpan: 0, |
| errSpan: transform.ErrEndOfSpan, |
| }, { |
| desc: "incomplete but terminated source 1", |
| src: "a\xc2", |
| nBuf: 10, |
| dst: "a\xc2", |
| nDst: 4, |
| nSrc: 2, |
| atEOF: true, |
| err: nil, |
| nSpan: 0, |
| errSpan: transform.ErrEndOfSpan, |
| }, { |
| desc: "incomplete but terminated source 2", |
| src: "a\xe0\x80", |
| nBuf: 10, |
| dst: "a\xe0\x80", |
| nDst: 5, |
| nSrc: 3, |
| atEOF: true, |
| err: nil, |
| nSpan: 0, |
| errSpan: transform.ErrEndOfSpan, |
| }, { |
| desc: "short source 1 some span", |
| src: "a\xc2", |
| nBuf: 10, |
| dst: "a\xc2", |
| nDst: 3, |
| nSrc: 3, |
| atEOF: false, |
| err: transform.ErrShortSrc, |
| nSpan: 3, |
| errSpan: transform.ErrShortSrc, |
| }, { |
| desc: "short source 2 some span", |
| src: "a\xe0\x80", |
| nBuf: 10, |
| dst: "a\xe0\x80", |
| nDst: 3, |
| nSrc: 3, |
| atEOF: false, |
| err: transform.ErrShortSrc, |
| nSpan: 3, |
| errSpan: transform.ErrShortSrc, |
| }, { |
| desc: "incomplete but terminated source 1 some span", |
| src: "a\xc2", |
| nBuf: 10, |
| dst: "a\xc2", |
| nDst: 4, |
| nSrc: 4, |
| atEOF: true, |
| err: nil, |
| nSpan: 4, |
| errSpan: nil, |
| }, { |
| desc: "incomplete but terminated source 2 some span", |
| src: "a\xe0\x80", |
| nBuf: 10, |
| dst: "a\xe0\x80", |
| nDst: 5, |
| nSrc: 5, |
| atEOF: true, |
| err: nil, |
| nSpan: 5, |
| errSpan: nil, |
| }, { |
| desc: "exact fit dst", |
| src: "a!", |
| nBuf: 6, |
| dst: "a\uff01", |
| nDst: 6, |
| nSrc: 2, |
| atEOF: false, |
| err: nil, |
| nSpan: 0, |
| errSpan: transform.ErrEndOfSpan, |
| }, { |
| desc: "empty dst", |
| src: "\u0300", |
| nBuf: 0, |
| dst: "\u0300", |
| nDst: 0, |
| nSrc: 0, |
| atEOF: true, |
| err: transform.ErrShortDst, |
| nSpan: 2, |
| errSpan: nil, |
| }, { |
| desc: "empty dst ascii", |
| src: "a", |
| nBuf: 0, |
| dst: "a", |
| nDst: 0, |
| nSrc: 0, |
| atEOF: true, |
| err: transform.ErrShortDst, |
| nSpan: 0, |
| errSpan: transform.ErrEndOfSpan, |
| }, { |
| desc: "short dst 1", |
| src: "a\uffe0", |
| nBuf: 4, |
| dst: "a\uffe0", |
| nDst: 3, |
| nSrc: 1, |
| atEOF: false, |
| err: transform.ErrShortDst, |
| nSpan: 0, |
| errSpan: transform.ErrEndOfSpan, |
| }, { |
| desc: "short dst 2", |
| src: "不夠", |
| nBuf: 3, |
| dst: "不夠", |
| nDst: 3, |
| nSrc: 3, |
| atEOF: true, |
| err: transform.ErrShortDst, |
| nSpan: 6, |
| errSpan: nil, |
| }, { |
| desc: "short dst ascii", |
| src: "ascii", |
| nBuf: 3, |
| dst: "ascii", // U+ff41, ... |
| nDst: 3, |
| nSrc: 1, |
| atEOF: true, |
| err: transform.ErrShortDst, |
| nSpan: 0, |
| errSpan: transform.ErrEndOfSpan, |
| }, { |
| desc: "ambiguous", |
| src: "\uffe9", |
| nBuf: 4, |
| dst: "\u2190", |
| nDst: 3, |
| nSrc: 3, |
| atEOF: false, |
| err: nil, |
| nSpan: 0, |
| errSpan: transform.ErrEndOfSpan, |
| }} { |
| tc.doTest(t, Widen) |
| } |
| } |
| |
| func TestNarrowSingleRunes(t *testing.T) { |
| for r := rune(0); r < 0x1FFFF; r++ { |
| if loSurrogate <= r && r <= hiSurrogate { |
| continue |
| } |
| alt, _ := narrowRune(r) |
| want := string(alt) |
| got := Narrow.String(string(r)) |
| if got != want { |
| t.Errorf("Narrow().String(%U) = %+q; want %+q", r, got, want) |
| } |
| } |
| } |
| |
| func TestNarrow(t *testing.T) { |
| for _, tc := range []transformTest{{ |
| desc: "empty", |
| src: "", |
| nBuf: 10, |
| dst: "", |
| nDst: 0, |
| nSrc: 0, |
| atEOF: false, |
| err: nil, |
| nSpan: 0, |
| errSpan: nil, |
| }, { |
| desc: "short source 1", |
| src: "a\xc2", |
| nBuf: 10, |
| dst: "a\xc2", |
| nDst: 1, |
| nSrc: 1, |
| atEOF: false, |
| err: transform.ErrShortSrc, |
| nSpan: 1, |
| errSpan: transform.ErrShortSrc, |
| }, { |
| desc: "short source 2", |
| src: "a\xe0\x80", |
| nBuf: 10, |
| dst: "a\xe0\x80", |
| nDst: 1, |
| nSrc: 3, |
| atEOF: false, |
| err: transform.ErrShortSrc, |
| nSpan: 0, |
| errSpan: transform.ErrEndOfSpan, |
| }, { |
| desc: "incomplete but terminated source 1", |
| src: "a\xc2", |
| nBuf: 10, |
| dst: "a\xc2", |
| nDst: 2, |
| nSrc: 4, |
| atEOF: true, |
| err: nil, |
| nSpan: 0, |
| errSpan: transform.ErrEndOfSpan, |
| }, { |
| desc: "incomplete but terminated source 2", |
| src: "a\xe0\x80", |
| nBuf: 10, |
| dst: "a\xe0\x80", |
| nDst: 3, |
| nSrc: 5, |
| atEOF: true, |
| err: nil, |
| nSpan: 0, |
| errSpan: transform.ErrEndOfSpan, |
| }, { |
| desc: "exact fit dst", |
| src: "a\uff01", |
| nBuf: 2, |
| dst: "a!", |
| nDst: 2, |
| nSrc: 6, |
| atEOF: false, |
| err: nil, |
| nSpan: 0, |
| errSpan: transform.ErrEndOfSpan, |
| }, { |
| desc: "exact fit dst some span", |
| src: "a\uff01", |
| nBuf: 2, |
| dst: "a!", |
| nDst: 2, |
| nSrc: 4, |
| atEOF: false, |
| err: nil, |
| nSpan: 1, |
| errSpan: transform.ErrEndOfSpan, |
| }, { |
| desc: "empty dst", |
| src: "\u0300", |
| nBuf: 0, |
| dst: "\u0300", |
| nDst: 0, |
| nSrc: 0, |
| atEOF: true, |
| err: transform.ErrShortDst, |
| nSpan: 2, |
| errSpan: nil, |
| }, { |
| desc: "empty dst ascii", |
| src: "a", |
| nBuf: 0, |
| dst: "a", |
| nDst: 0, |
| nSrc: 0, |
| atEOF: true, |
| err: transform.ErrShortDst, |
| nSpan: 1, |
| errSpan: nil, |
| }, { |
| desc: "short dst 1", |
| src: "a\uffe0", // ¢ |
| nBuf: 2, |
| dst: "a\u00a2", // ¢ |
| nDst: 1, |
| nSrc: 3, |
| atEOF: false, |
| err: transform.ErrShortDst, |
| nSpan: 0, |
| errSpan: transform.ErrEndOfSpan, |
| }, { |
| desc: "short dst 2", |
| src: "不夠", |
| nBuf: 3, |
| dst: "不夠", |
| nDst: 3, |
| nSrc: 3, |
| atEOF: true, |
| err: transform.ErrShortDst, |
| nSpan: 6, |
| errSpan: nil, |
| }, { |
| // Create a narrow variant of ambiguous runes, if they exist. |
| desc: "ambiguous", |
| src: "\u2190", |
| nBuf: 4, |
| dst: "\uffe9", |
| nDst: 3, |
| nSrc: 3, |
| atEOF: false, |
| err: nil, |
| nSpan: 0, |
| errSpan: transform.ErrEndOfSpan, |
| }, { |
| desc: "short dst fast path", |
| src: "fast", |
| nBuf: 3, |
| dst: "fast", |
| nDst: 3, |
| nSrc: 3, |
| atEOF: true, |
| err: transform.ErrShortDst, |
| nSpan: 4, |
| errSpan: nil, |
| }, { |
| desc: "short dst larger buffer", |
| src: "\uff21" + strings.Repeat("0", 127) + "B", |
| nBuf: 128, |
| dst: "A" + strings.Repeat("0", 127) + "B", |
| nDst: 128, |
| nSrc: 130, |
| atEOF: true, |
| err: transform.ErrShortDst, |
| nSpan: 0, |
| errSpan: transform.ErrEndOfSpan, |
| }, { |
| desc: "fast path alternation", |
| src: "fast路徑fast路徑", |
| nBuf: 20, |
| dst: "fast路徑fast路徑", |
| nDst: 20, |
| nSrc: 20, |
| atEOF: true, |
| err: nil, |
| nSpan: 20, |
| errSpan: nil, |
| }} { |
| tc.doTest(t, Narrow) |
| } |
| } |
| |
| func bench(b *testing.B, t Transformer, s string) { |
| dst := make([]byte, 1024) |
| src := []byte(s) |
| b.SetBytes(int64(len(src))) |
| b.ResetTimer() |
| for i := 0; i < b.N; i++ { |
| t.Transform(dst, src, true) |
| } |
| } |
| |
| func changingRunes(f func(r rune) (rune, bool)) string { |
| buf := &bytes.Buffer{} |
| for r := rune(0); r <= 0xFFFF; r++ { |
| if _, ok := foldRune(r); ok { |
| buf.WriteRune(r) |
| } |
| } |
| return buf.String() |
| } |
| |
| func BenchmarkFoldASCII(b *testing.B) { |
| bench(b, Fold, testtext.ASCII) |
| } |
| |
| func BenchmarkFoldCJK(b *testing.B) { |
| bench(b, Fold, testtext.CJK) |
| } |
| |
| func BenchmarkFoldNonCanonical(b *testing.B) { |
| bench(b, Fold, changingRunes(foldRune)) |
| } |
| |
| func BenchmarkFoldOther(b *testing.B) { |
| bench(b, Fold, testtext.TwoByteUTF8+testtext.ThreeByteUTF8) |
| } |
| |
| func BenchmarkWideASCII(b *testing.B) { |
| bench(b, Widen, testtext.ASCII) |
| } |
| |
| func BenchmarkWideCJK(b *testing.B) { |
| bench(b, Widen, testtext.CJK) |
| } |
| |
| func BenchmarkWideNonCanonical(b *testing.B) { |
| bench(b, Widen, changingRunes(widenRune)) |
| } |
| |
| func BenchmarkWideOther(b *testing.B) { |
| bench(b, Widen, testtext.TwoByteUTF8+testtext.ThreeByteUTF8) |
| } |
| |
| func BenchmarkNarrowASCII(b *testing.B) { |
| bench(b, Narrow, testtext.ASCII) |
| } |
| |
| func BenchmarkNarrowCJK(b *testing.B) { |
| bench(b, Narrow, testtext.CJK) |
| } |
| |
| func BenchmarkNarrowNonCanonical(b *testing.B) { |
| bench(b, Narrow, changingRunes(narrowRune)) |
| } |
| |
| func BenchmarkNarrowOther(b *testing.B) { |
| bench(b, Narrow, testtext.TwoByteUTF8+testtext.ThreeByteUTF8) |
| } |