blob: 44cd36d0b15bf638f06023579b53bcdf2df62e7c [file] [log] [blame] [edit]
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runes
import (
"strings"
"testing"
"unicode"
"golang.org/x/text/cases"
"golang.org/x/text/language"
"golang.org/x/text/transform"
)
var (
toUpper = cases.Upper(language.Und)
toLower = cases.Lower(language.Und)
)
type spanformer interface {
transform.SpanningTransformer
}
func TestPredicate(t *testing.T) {
testConditional(t, func(rt *unicode.RangeTable, t, f spanformer) spanformer {
return If(Predicate(func(r rune) bool {
return unicode.Is(rt, r)
}), t, f)
})
}
func TestIn(t *testing.T) {
testConditional(t, func(rt *unicode.RangeTable, t, f spanformer) spanformer {
return If(In(rt), t, f)
})
}
func TestNotIn(t *testing.T) {
testConditional(t, func(rt *unicode.RangeTable, t, f spanformer) spanformer {
return If(NotIn(rt), f, t)
})
}
func testConditional(t *testing.T, f func(rt *unicode.RangeTable, t, f spanformer) spanformer) {
lower := f(unicode.Latin, toLower, toLower)
for i, tt := range []transformTest{{
desc: "empty",
szDst: large,
atEOF: true,
in: "",
out: "",
outFull: "",
t: lower,
}, {
desc: "small",
szDst: 1,
atEOF: true,
in: "B",
out: "b",
outFull: "b",
errSpan: transform.ErrEndOfSpan,
t: lower,
}, {
desc: "short dst",
szDst: 2,
atEOF: true,
in: "AAA",
out: "aa",
outFull: "aaa",
err: transform.ErrShortDst,
errSpan: transform.ErrEndOfSpan,
t: lower,
}, {
desc: "short dst writing error",
szDst: 1,
atEOF: false,
in: "A\x80",
out: "a",
outFull: "a\x80",
err: transform.ErrShortDst,
errSpan: transform.ErrEndOfSpan,
t: lower,
}, {
desc: "short dst writing incomplete rune",
szDst: 2,
atEOF: true,
in: "Σ\xc2",
out: "Σ",
outFull: "Σ\xc2",
err: transform.ErrShortDst,
t: f(unicode.Latin, toLower, nil),
}, {
desc: "short dst, longer",
szDst: 5,
atEOF: true,
in: "Hellø",
out: "Hell",
outFull: "Hellø",
err: transform.ErrShortDst,
// idem is used to test short buffers by forcing processing of full-rune increments.
t: f(unicode.Latin, Map(idem), nil),
}, {
desc: "short dst, longer, writing error",
szDst: 6,
atEOF: false,
in: "\x80Hello\x80",
out: "\x80Hello",
outFull: "\x80Hello\x80",
err: transform.ErrShortDst,
t: f(unicode.Latin, Map(idem), nil),
}, {
desc: "short src",
szDst: 2,
atEOF: false,
in: "A\xc2",
out: "a",
outFull: "a\xc2",
err: transform.ErrShortSrc,
errSpan: transform.ErrEndOfSpan,
t: lower,
}, {
desc: "short src no change",
szDst: 2,
atEOF: false,
in: "a\xc2",
out: "a",
outFull: "a\xc2",
err: transform.ErrShortSrc,
errSpan: transform.ErrShortSrc,
nSpan: 1,
t: lower,
}, {
desc: "invalid input, atEOF",
szDst: large,
atEOF: true,
in: "\x80",
out: "\x80",
outFull: "\x80",
t: lower,
}, {
desc: "invalid input, !atEOF",
szDst: large,
atEOF: false,
in: "\x80",
out: "\x80",
outFull: "\x80",
t: lower,
}, {
desc: "invalid input, incomplete rune atEOF",
szDst: large,
atEOF: true,
in: "\xc2",
out: "\xc2",
outFull: "\xc2",
t: lower,
}, {
desc: "nop",
szDst: large,
atEOF: true,
in: "Hello World!",
out: "Hello World!",
outFull: "Hello World!",
t: f(unicode.Latin, nil, nil),
}, {
desc: "nop in",
szDst: large,
atEOF: true,
in: "THIS IS α ΤΕΣΤ",
out: "this is α ΤΕΣΤ",
outFull: "this is α ΤΕΣΤ",
errSpan: transform.ErrEndOfSpan,
t: f(unicode.Greek, nil, toLower),
}, {
desc: "nop in latin",
szDst: large,
atEOF: true,
in: "THIS IS α ΤΕΣΤ",
out: "THIS IS α τεστ",
outFull: "THIS IS α τεστ",
errSpan: transform.ErrEndOfSpan,
t: f(unicode.Latin, nil, toLower),
}, {
desc: "nop not in",
szDst: large,
atEOF: true,
in: "THIS IS α ΤΕΣΤ",
out: "this is α ΤΕΣΤ",
outFull: "this is α ΤΕΣΤ",
errSpan: transform.ErrEndOfSpan,
t: f(unicode.Latin, toLower, nil),
}, {
desc: "pass atEOF is true when at end",
szDst: large,
atEOF: true,
in: "hello",
out: "HELLO",
outFull: "HELLO",
errSpan: transform.ErrEndOfSpan,
t: f(unicode.Latin, upperAtEOF{}, nil),
}, {
desc: "pass atEOF is true when at end of segment",
szDst: large,
atEOF: true,
in: "hello ",
out: "HELLO ",
outFull: "HELLO ",
errSpan: transform.ErrEndOfSpan,
t: f(unicode.Latin, upperAtEOF{}, nil),
}, {
desc: "don't pass atEOF is true when atEOF is false",
szDst: large,
atEOF: false,
in: "hello",
out: "",
outFull: "HELLO",
err: transform.ErrShortSrc,
errSpan: transform.ErrShortSrc,
t: f(unicode.Latin, upperAtEOF{}, nil),
}, {
desc: "pass atEOF is true when at end, no change",
szDst: large,
atEOF: true,
in: "HELLO",
out: "HELLO",
outFull: "HELLO",
t: f(unicode.Latin, upperAtEOF{}, nil),
}, {
desc: "pass atEOF is true when at end of segment, no change",
szDst: large,
atEOF: true,
in: "HELLO ",
out: "HELLO ",
outFull: "HELLO ",
t: f(unicode.Latin, upperAtEOF{}, nil),
}, {
desc: "large input ASCII",
szDst: 12000,
atEOF: false,
in: strings.Repeat("HELLO", 2000),
out: strings.Repeat("hello", 2000),
outFull: strings.Repeat("hello", 2000),
errSpan: transform.ErrEndOfSpan,
err: nil,
t: lower,
}, {
desc: "large input non-ASCII",
szDst: 12000,
atEOF: false,
in: strings.Repeat("\u3333", 2000),
out: strings.Repeat("\u3333", 2000),
outFull: strings.Repeat("\u3333", 2000),
err: nil,
t: lower,
}} {
tt.check(t, i)
}
}
// upperAtEOF is a strange Transformer that converts text to uppercase, but only
// if atEOF is true.
type upperAtEOF struct{ transform.NopResetter }
func (upperAtEOF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
if !atEOF {
return 0, 0, transform.ErrShortSrc
}
return toUpper.Transform(dst, src, atEOF)
}
func (upperAtEOF) Span(src []byte, atEOF bool) (n int, err error) {
if !atEOF {
return 0, transform.ErrShortSrc
}
return toUpper.Span(src, atEOF)
}
func BenchmarkConditional(b *testing.B) {
doBench(b, If(In(unicode.Hangul), transform.Nop, transform.Nop))
}