blob: 273abfa52f5a1aaf1267978f5c0888e742882c50 [file] [log] [blame] [edit]
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package transform
import (
"bytes"
"errors"
"fmt"
"io/ioutil"
"strconv"
"strings"
"testing"
"time"
"unicode/utf8"
"golang.org/x/text/internal/testtext"
)
type lowerCaseASCII struct{ NopResetter }
func (lowerCaseASCII) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
n := len(src)
if n > len(dst) {
n, err = len(dst), ErrShortDst
}
for i, c := range src[:n] {
if 'A' <= c && c <= 'Z' {
c += 'a' - 'A'
}
dst[i] = c
}
return n, n, err
}
// lowerCaseASCIILookahead lowercases the string and reports ErrShortSrc as long
// as the input is not atEOF.
type lowerCaseASCIILookahead struct{ NopResetter }
func (lowerCaseASCIILookahead) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
n := len(src)
if n > len(dst) {
n, err = len(dst), ErrShortDst
}
for i, c := range src[:n] {
if 'A' <= c && c <= 'Z' {
c += 'a' - 'A'
}
dst[i] = c
}
if !atEOF {
err = ErrShortSrc
}
return n, n, err
}
var errYouMentionedX = errors.New("you mentioned X")
type dontMentionX struct{ NopResetter }
func (dontMentionX) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
n := len(src)
if n > len(dst) {
n, err = len(dst), ErrShortDst
}
for i, c := range src[:n] {
if c == 'X' {
return i, i, errYouMentionedX
}
dst[i] = c
}
return n, n, err
}
var errAtEnd = errors.New("error after all text")
type errorAtEnd struct{ NopResetter }
func (errorAtEnd) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
n := copy(dst, src)
if n < len(src) {
return n, n, ErrShortDst
}
if atEOF {
return n, n, errAtEnd
}
return n, n, nil
}
type replaceWithConstant struct {
replacement string
written int
}
func (t *replaceWithConstant) Reset() {
t.written = 0
}
func (t *replaceWithConstant) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
if atEOF {
nDst = copy(dst, t.replacement[t.written:])
t.written += nDst
if t.written < len(t.replacement) {
err = ErrShortDst
}
}
return nDst, len(src), err
}
type addAnXAtTheEnd struct{ NopResetter }
func (addAnXAtTheEnd) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
n := copy(dst, src)
if n < len(src) {
return n, n, ErrShortDst
}
if !atEOF {
return n, n, nil
}
if len(dst) == n {
return n, n, ErrShortDst
}
dst[n] = 'X'
return n + 1, n, nil
}
// doublerAtEOF is a strange Transformer that transforms "this" to "tthhiiss",
// but only if atEOF is true.
type doublerAtEOF struct{ NopResetter }
func (doublerAtEOF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
if !atEOF {
return 0, 0, ErrShortSrc
}
for i, c := range src {
if 2*i+2 >= len(dst) {
return 2 * i, i, ErrShortDst
}
dst[2*i+0] = c
dst[2*i+1] = c
}
return 2 * len(src), len(src), nil
}
// rleDecode and rleEncode implement a toy run-length encoding: "aabbbbbbbbbb"
// is encoded as "2a10b". The decoding is assumed to not contain any numbers.
type rleDecode struct{ NopResetter }
func (rleDecode) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
loop:
for len(src) > 0 {
n := 0
for i, c := range src {
if '0' <= c && c <= '9' {
n = 10*n + int(c-'0')
continue
}
if i == 0 {
return nDst, nSrc, errors.New("rleDecode: bad input")
}
if n > len(dst) {
return nDst, nSrc, ErrShortDst
}
for j := 0; j < n; j++ {
dst[j] = c
}
dst, src = dst[n:], src[i+1:]
nDst, nSrc = nDst+n, nSrc+i+1
continue loop
}
if atEOF {
return nDst, nSrc, errors.New("rleDecode: bad input")
}
return nDst, nSrc, ErrShortSrc
}
return nDst, nSrc, nil
}
type rleEncode struct {
NopResetter
// allowStutter means that "xxxxxxxx" can be encoded as "5x3x"
// instead of always as "8x".
allowStutter bool
}
func (e rleEncode) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
for len(src) > 0 {
n, c0 := len(src), src[0]
for i, c := range src[1:] {
if c != c0 {
n = i + 1
break
}
}
if n == len(src) && !atEOF && !e.allowStutter {
return nDst, nSrc, ErrShortSrc
}
s := strconv.Itoa(n)
if len(s) >= len(dst) {
return nDst, nSrc, ErrShortDst
}
copy(dst, s)
dst[len(s)] = c0
dst, src = dst[len(s)+1:], src[n:]
nDst, nSrc = nDst+len(s)+1, nSrc+n
}
return nDst, nSrc, nil
}
// trickler consumes all input bytes, but writes a single byte at a time to dst.
type trickler []byte
func (t *trickler) Reset() {
*t = nil
}
func (t *trickler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
*t = append(*t, src...)
if len(*t) == 0 {
return 0, 0, nil
}
if len(dst) == 0 {
return 0, len(src), ErrShortDst
}
dst[0] = (*t)[0]
*t = (*t)[1:]
if len(*t) > 0 {
err = ErrShortDst
}
return 1, len(src), err
}
// delayedTrickler is like trickler, but delays writing output to dst. This is
// highly unlikely to be relevant in practice, but it seems like a good idea
// to have some tolerance as long as progress can be detected.
type delayedTrickler []byte
func (t *delayedTrickler) Reset() {
*t = nil
}
func (t *delayedTrickler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
if len(*t) > 0 && len(dst) > 0 {
dst[0] = (*t)[0]
*t = (*t)[1:]
nDst = 1
}
*t = append(*t, src...)
if len(*t) > 0 {
err = ErrShortDst
}
return nDst, len(src), err
}
type testCase struct {
desc string
t Transformer
src string
dstSize int
srcSize int
ioSize int
wantStr string
wantErr error
wantIter int // number of iterations taken; 0 means we don't care.
}
func (t testCase) String() string {
return tstr(t.t) + "; " + t.desc
}
func tstr(t Transformer) string {
if stringer, ok := t.(fmt.Stringer); ok {
return stringer.String()
}
s := fmt.Sprintf("%T", t)
return s[1+strings.Index(s, "."):]
}
func (c chain) String() string {
buf := &bytes.Buffer{}
buf.WriteString("Chain(")
for i, l := range c.link[:len(c.link)-1] {
if i != 0 {
fmt.Fprint(buf, ", ")
}
buf.WriteString(tstr(l.t))
}
buf.WriteString(")")
return buf.String()
}
var testCases = []testCase{
{
desc: "empty",
t: lowerCaseASCII{},
src: "",
dstSize: 100,
srcSize: 100,
wantStr: "",
},
{
desc: "basic",
t: lowerCaseASCII{},
src: "Hello WORLD.",
dstSize: 100,
srcSize: 100,
wantStr: "hello world.",
},
{
desc: "small dst",
t: lowerCaseASCII{},
src: "Hello WORLD.",
dstSize: 3,
srcSize: 100,
wantStr: "hello world.",
},
{
desc: "small src",
t: lowerCaseASCII{},
src: "Hello WORLD.",
dstSize: 100,
srcSize: 4,
wantStr: "hello world.",
},
{
desc: "small buffers",
t: lowerCaseASCII{},
src: "Hello WORLD.",
dstSize: 3,
srcSize: 4,
wantStr: "hello world.",
},
{
desc: "very small buffers",
t: lowerCaseASCII{},
src: "Hello WORLD.",
dstSize: 1,
srcSize: 1,
wantStr: "hello world.",
},
{
desc: "small dst with lookahead",
t: lowerCaseASCIILookahead{},
src: "Hello WORLD.",
dstSize: 3,
srcSize: 100,
wantStr: "hello world.",
},
{
desc: "small src with lookahead",
t: lowerCaseASCIILookahead{},
src: "Hello WORLD.",
dstSize: 100,
srcSize: 4,
wantStr: "hello world.",
},
{
desc: "small buffers with lookahead",
t: lowerCaseASCIILookahead{},
src: "Hello WORLD.",
dstSize: 3,
srcSize: 4,
wantStr: "hello world.",
},
{
desc: "very small buffers with lookahead",
t: lowerCaseASCIILookahead{},
src: "Hello WORLD.",
dstSize: 1,
srcSize: 2,
wantStr: "hello world.",
},
{
desc: "user error",
t: dontMentionX{},
src: "The First Rule of Transform Club: don't mention Mister X, ever.",
dstSize: 100,
srcSize: 100,
wantStr: "The First Rule of Transform Club: don't mention Mister ",
wantErr: errYouMentionedX,
},
{
desc: "user error at end",
t: errorAtEnd{},
src: "All goes well until it doesn't.",
dstSize: 100,
srcSize: 100,
wantStr: "All goes well until it doesn't.",
wantErr: errAtEnd,
},
{
desc: "user error at end, incremental",
t: errorAtEnd{},
src: "All goes well until it doesn't.",
dstSize: 10,
srcSize: 10,
wantStr: "All goes well until it doesn't.",
wantErr: errAtEnd,
},
{
desc: "replace entire non-empty string with one byte",
t: &replaceWithConstant{replacement: "X"},
src: "none of this will be copied",
dstSize: 1,
srcSize: 10,
wantStr: "X",
},
{
desc: "replace entire empty string with one byte",
t: &replaceWithConstant{replacement: "X"},
src: "",
dstSize: 1,
srcSize: 10,
wantStr: "X",
},
{
desc: "replace entire empty string with seven bytes",
t: &replaceWithConstant{replacement: "ABCDEFG"},
src: "",
dstSize: 3,
srcSize: 10,
wantStr: "ABCDEFG",
},
{
desc: "add an X (initialBufSize-1)",
t: addAnXAtTheEnd{},
src: aaa[:initialBufSize-1],
dstSize: 10,
srcSize: 10,
wantStr: aaa[:initialBufSize-1] + "X",
},
{
desc: "add an X (initialBufSize+0)",
t: addAnXAtTheEnd{},
src: aaa[:initialBufSize+0],
dstSize: 10,
srcSize: 10,
wantStr: aaa[:initialBufSize+0] + "X",
},
{
desc: "add an X (initialBufSize+1)",
t: addAnXAtTheEnd{},
src: aaa[:initialBufSize+1],
dstSize: 10,
srcSize: 10,
wantStr: aaa[:initialBufSize+1] + "X",
},
{
desc: "small buffers",
t: dontMentionX{},
src: "The First Rule of Transform Club: don't mention Mister X, ever.",
dstSize: 10,
srcSize: 10,
wantStr: "The First Rule of Transform Club: don't mention Mister ",
wantErr: errYouMentionedX,
},
{
desc: "very small buffers",
t: dontMentionX{},
src: "The First Rule of Transform Club: don't mention Mister X, ever.",
dstSize: 1,
srcSize: 1,
wantStr: "The First Rule of Transform Club: don't mention Mister ",
wantErr: errYouMentionedX,
},
{
desc: "only transform at EOF",
t: doublerAtEOF{},
src: "this",
dstSize: 100,
srcSize: 100,
wantStr: "tthhiiss",
},
{
desc: "basic",
t: rleDecode{},
src: "1a2b3c10d11e0f1g",
dstSize: 100,
srcSize: 100,
wantStr: "abbcccddddddddddeeeeeeeeeeeg",
},
{
desc: "long",
t: rleDecode{},
src: "12a23b34c45d56e99z",
dstSize: 100,
srcSize: 100,
wantStr: strings.Repeat("a", 12) +
strings.Repeat("b", 23) +
strings.Repeat("c", 34) +
strings.Repeat("d", 45) +
strings.Repeat("e", 56) +
strings.Repeat("z", 99),
},
{
desc: "tight buffers",
t: rleDecode{},
src: "1a2b3c10d11e0f1g",
dstSize: 11,
srcSize: 3,
wantStr: "abbcccddddddddddeeeeeeeeeeeg",
},
{
desc: "short dst",
t: rleDecode{},
src: "1a2b3c10d11e0f1g",
dstSize: 10,
srcSize: 3,
wantStr: "abbcccdddddddddd",
wantErr: ErrShortDst,
},
{
desc: "short src",
t: rleDecode{},
src: "1a2b3c10d11e0f1g",
dstSize: 11,
srcSize: 2,
ioSize: 2,
wantStr: "abbccc",
wantErr: ErrShortSrc,
},
{
desc: "basic",
t: rleEncode{},
src: "abbcccddddddddddeeeeeeeeeeeg",
dstSize: 100,
srcSize: 100,
wantStr: "1a2b3c10d11e1g",
},
{
desc: "long",
t: rleEncode{},
src: strings.Repeat("a", 12) +
strings.Repeat("b", 23) +
strings.Repeat("c", 34) +
strings.Repeat("d", 45) +
strings.Repeat("e", 56) +
strings.Repeat("z", 99),
dstSize: 100,
srcSize: 100,
wantStr: "12a23b34c45d56e99z",
},
{
desc: "tight buffers",
t: rleEncode{},
src: "abbcccddddddddddeeeeeeeeeeeg",
dstSize: 3,
srcSize: 12,
wantStr: "1a2b3c10d11e1g",
},
{
desc: "short dst",
t: rleEncode{},
src: "abbcccddddddddddeeeeeeeeeeeg",
dstSize: 2,
srcSize: 12,
wantStr: "1a2b3c",
wantErr: ErrShortDst,
},
{
desc: "short src",
t: rleEncode{},
src: "abbcccddddddddddeeeeeeeeeeeg",
dstSize: 3,
srcSize: 11,
ioSize: 11,
wantStr: "1a2b3c10d",
wantErr: ErrShortSrc,
},
{
desc: "allowStutter = false",
t: rleEncode{allowStutter: false},
src: "aaaabbbbbbbbccccddddd",
dstSize: 10,
srcSize: 10,
wantStr: "4a8b4c5d",
},
{
desc: "allowStutter = true",
t: rleEncode{allowStutter: true},
src: "aaaabbbbbbbbccccddddd",
dstSize: 10,
srcSize: 10,
ioSize: 10,
wantStr: "4a6b2b4c4d1d",
},
{
desc: "trickler",
t: &trickler{},
src: "abcdefghijklm",
dstSize: 3,
srcSize: 15,
wantStr: "abcdefghijklm",
},
{
desc: "delayedTrickler",
t: &delayedTrickler{},
src: "abcdefghijklm",
dstSize: 3,
srcSize: 15,
wantStr: "abcdefghijklm",
},
}
func TestReader(t *testing.T) {
for _, tc := range testCases {
testtext.Run(t, tc.desc, func(t *testing.T) {
r := NewReader(strings.NewReader(tc.src), tc.t)
// Differently sized dst and src buffers are not part of the
// exported API. We override them manually.
r.dst = make([]byte, tc.dstSize)
r.src = make([]byte, tc.srcSize)
got, err := ioutil.ReadAll(r)
str := string(got)
if str != tc.wantStr || err != tc.wantErr {
t.Errorf("\ngot %q, %v\nwant %q, %v", str, err, tc.wantStr, tc.wantErr)
}
})
}
}
func TestWriter(t *testing.T) {
tests := append(testCases, chainTests()...)
for _, tc := range tests {
sizes := []int{1, 2, 3, 4, 5, 10, 100, 1000}
if tc.ioSize > 0 {
sizes = []int{tc.ioSize}
}
for _, sz := range sizes {
testtext.Run(t, fmt.Sprintf("%s/%d", tc.desc, sz), func(t *testing.T) {
bb := &bytes.Buffer{}
w := NewWriter(bb, tc.t)
// Differently sized dst and src buffers are not part of the
// exported API. We override them manually.
w.dst = make([]byte, tc.dstSize)
w.src = make([]byte, tc.srcSize)
src := make([]byte, sz)
var err error
for b := tc.src; len(b) > 0 && err == nil; {
n := copy(src, b)
b = b[n:]
m := 0
m, err = w.Write(src[:n])
if m != n && err == nil {
t.Errorf("did not consume all bytes %d < %d", m, n)
}
}
if err == nil {
err = w.Close()
}
str := bb.String()
if str != tc.wantStr || err != tc.wantErr {
t.Errorf("\ngot %q, %v\nwant %q, %v", str, err, tc.wantStr, tc.wantErr)
}
})
}
}
}
func TestNop(t *testing.T) {
testCases := []struct {
str string
dstSize int
err error
}{
{"", 0, nil},
{"", 10, nil},
{"a", 0, ErrShortDst},
{"a", 1, nil},
{"a", 10, nil},
}
for i, tc := range testCases {
dst := make([]byte, tc.dstSize)
nDst, nSrc, err := Nop.Transform(dst, []byte(tc.str), true)
want := tc.str
if tc.dstSize < len(want) {
want = want[:tc.dstSize]
}
if got := string(dst[:nDst]); got != want || err != tc.err || nSrc != nDst {
t.Errorf("%d:\ngot %q, %d, %v\nwant %q, %d, %v", i, got, nSrc, err, want, nDst, tc.err)
}
}
}
func TestDiscard(t *testing.T) {
testCases := []struct {
str string
dstSize int
}{
{"", 0},
{"", 10},
{"a", 0},
{"ab", 10},
}
for i, tc := range testCases {
nDst, nSrc, err := Discard.Transform(make([]byte, tc.dstSize), []byte(tc.str), true)
if nDst != 0 || nSrc != len(tc.str) || err != nil {
t.Errorf("%d:\ngot %q, %d, %v\nwant 0, %d, nil", i, nDst, nSrc, err, len(tc.str))
}
}
}
// mkChain creates a Chain transformer. x must be alternating between transformer
// and bufSize, like T, (sz, T)*
func mkChain(x ...interface{}) *chain {
t := []Transformer{}
for i := 0; i < len(x); i += 2 {
t = append(t, x[i].(Transformer))
}
c := Chain(t...).(*chain)
for i, j := 1, 1; i < len(x); i, j = i+2, j+1 {
c.link[j].b = make([]byte, x[i].(int))
}
return c
}
func chainTests() []testCase {
return []testCase{
{
desc: "nil error",
t: mkChain(rleEncode{}, 100, lowerCaseASCII{}),
src: "ABB",
dstSize: 100,
srcSize: 100,
wantStr: "1a2b",
wantErr: nil,
wantIter: 1,
},
{
desc: "short dst buffer",
t: mkChain(lowerCaseASCII{}, 3, rleDecode{}),
src: "1a2b3c10d11e0f1g",
dstSize: 10,
srcSize: 3,
wantStr: "abbcccdddddddddd",
wantErr: ErrShortDst,
},
{
desc: "short internal dst buffer",
t: mkChain(lowerCaseASCII{}, 3, rleDecode{}, 10, Nop),
src: "1a2b3c10d11e0f1g",
dstSize: 100,
srcSize: 3,
wantStr: "abbcccdddddddddd",
wantErr: errShortInternal,
},
{
desc: "short internal dst buffer from input",
t: mkChain(rleDecode{}, 10, Nop),
src: "1a2b3c10d11e0f1g",
dstSize: 100,
srcSize: 3,
wantStr: "abbcccdddddddddd",
wantErr: errShortInternal,
},
{
desc: "empty short internal dst buffer",
t: mkChain(lowerCaseASCII{}, 3, rleDecode{}, 10, Nop),
src: "4a7b11e0f1g",
dstSize: 100,
srcSize: 3,
wantStr: "aaaabbbbbbb",
wantErr: errShortInternal,
},
{
desc: "empty short internal dst buffer from input",
t: mkChain(rleDecode{}, 10, Nop),
src: "4a7b11e0f1g",
dstSize: 100,
srcSize: 3,
wantStr: "aaaabbbbbbb",
wantErr: errShortInternal,
},
{
desc: "short internal src buffer after full dst buffer",
t: mkChain(Nop, 5, rleEncode{}, 10, Nop),
src: "cccccddddd",
dstSize: 100,
srcSize: 100,
wantStr: "",
wantErr: errShortInternal,
wantIter: 1,
},
{
desc: "short internal src buffer after short dst buffer; test lastFull",
t: mkChain(rleDecode{}, 5, rleEncode{}, 4, Nop),
src: "2a1b4c6d",
dstSize: 100,
srcSize: 100,
wantStr: "2a1b",
wantErr: errShortInternal,
},
{
desc: "short internal src buffer after successful complete fill",
t: mkChain(Nop, 3, rleDecode{}),
src: "123a4b",
dstSize: 4,
srcSize: 3,
wantStr: "",
wantErr: errShortInternal,
wantIter: 1,
},
{
desc: "short internal src buffer after short dst buffer; test lastFull",
t: mkChain(rleDecode{}, 5, rleEncode{}),
src: "2a1b4c6d",
dstSize: 4,
srcSize: 100,
wantStr: "2a1b",
wantErr: errShortInternal,
},
{
desc: "short src buffer",
t: mkChain(rleEncode{}, 5, Nop),
src: "abbcccddddeeeee",
dstSize: 4,
srcSize: 4,
ioSize: 4,
wantStr: "1a2b3c",
wantErr: ErrShortSrc,
},
{
desc: "process all in one go",
t: mkChain(rleEncode{}, 5, Nop),
src: "abbcccddddeeeeeffffff",
dstSize: 100,
srcSize: 100,
wantStr: "1a2b3c4d5e6f",
wantErr: nil,
wantIter: 1,
},
{
desc: "complete processing downstream after error",
t: mkChain(dontMentionX{}, 2, rleDecode{}, 5, Nop),
src: "3a4b5eX",
dstSize: 100,
srcSize: 100,
ioSize: 100,
wantStr: "aaabbbbeeeee",
wantErr: errYouMentionedX,
},
{
desc: "return downstream fatal errors first (followed by short dst)",
t: mkChain(dontMentionX{}, 8, rleDecode{}, 4, Nop),
src: "3a4b5eX",
dstSize: 100,
srcSize: 100,
ioSize: 100,
wantStr: "aaabbbb",
wantErr: errShortInternal,
},
{
desc: "return downstream fatal errors first (followed by short src)",
t: mkChain(dontMentionX{}, 5, Nop, 1, rleDecode{}),
src: "1a5bX",
dstSize: 100,
srcSize: 100,
ioSize: 100,
wantStr: "",
wantErr: errShortInternal,
},
{
desc: "short internal",
t: mkChain(Nop, 11, rleEncode{}, 3, Nop),
src: "abbcccddddddddddeeeeeeeeeeeg",
dstSize: 3,
srcSize: 100,
wantStr: "1a2b3c10d",
wantErr: errShortInternal,
},
}
}
func doTransform(tc testCase) (res string, iter int, err error) {
tc.t.Reset()
dst := make([]byte, tc.dstSize)
out, in := make([]byte, 0, 2*len(tc.src)), []byte(tc.src)
for {
iter++
src, atEOF := in, true
if len(src) > tc.srcSize {
src, atEOF = src[:tc.srcSize], false
}
nDst, nSrc, err := tc.t.Transform(dst, src, atEOF)
out = append(out, dst[:nDst]...)
in = in[nSrc:]
switch {
case err == nil && len(in) != 0:
case err == ErrShortSrc && nSrc > 0:
case err == ErrShortDst && (nDst > 0 || nSrc > 0):
default:
return string(out), iter, err
}
}
}
func TestChain(t *testing.T) {
if c, ok := Chain().(nop); !ok {
t.Errorf("empty chain: %v; want Nop", c)
}
// Test Chain for a single Transformer.
for _, tc := range testCases {
tc.t = Chain(tc.t)
str, _, err := doTransform(tc)
if str != tc.wantStr || err != tc.wantErr {
t.Errorf("%s:\ngot %q, %v\nwant %q, %v", tc, str, err, tc.wantStr, tc.wantErr)
}
}
tests := chainTests()
sizes := []int{1, 2, 3, 4, 5, 7, 10, 100, 1000}
addTest := func(tc testCase, t *chain) {
if t.link[0].t != tc.t && tc.wantErr == ErrShortSrc {
tc.wantErr = errShortInternal
}
if t.link[len(t.link)-2].t != tc.t && tc.wantErr == ErrShortDst {
tc.wantErr = errShortInternal
}
tc.t = t
tests = append(tests, tc)
}
for _, tc := range testCases {
for _, sz := range sizes {
tt := tc
tt.dstSize = sz
addTest(tt, mkChain(tc.t, tc.dstSize, Nop))
addTest(tt, mkChain(tc.t, tc.dstSize, Nop, 2, Nop))
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop))
if sz >= tc.dstSize && (tc.wantErr != ErrShortDst || sz == tc.dstSize) {
addTest(tt, mkChain(Nop, tc.srcSize, tc.t))
addTest(tt, mkChain(Nop, 100, Nop, tc.srcSize, tc.t))
}
}
}
for _, tc := range testCases {
tt := tc
tt.dstSize = 1
tt.wantStr = ""
addTest(tt, mkChain(tc.t, tc.dstSize, Discard))
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Discard))
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, tc.dstSize, Discard))
}
for _, tc := range testCases {
tt := tc
tt.dstSize = 100
tt.wantStr = strings.Replace(tc.src, "0f", "", -1)
// Chain encoders and decoders.
if _, ok := tc.t.(rleEncode); ok && tc.wantErr == nil {
addTest(tt, mkChain(tc.t, tc.dstSize, Nop, 1000, rleDecode{}))
addTest(tt, mkChain(tc.t, tc.dstSize, Nop, tc.dstSize, rleDecode{}))
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 100, rleDecode{}))
// decoding needs larger destinations
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, rleDecode{}, 100, Nop))
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 100, rleDecode{}, 100, Nop))
} else if _, ok := tc.t.(rleDecode); ok && tc.wantErr == nil {
// The internal buffer size may need to be the sum of the maximum segment
// size of the two encoders!
addTest(tt, mkChain(tc.t, 2*tc.dstSize, rleEncode{}))
addTest(tt, mkChain(tc.t, tc.dstSize, Nop, 101, rleEncode{}))
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 100, rleEncode{}))
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 200, rleEncode{}, 100, Nop))
}
}
for _, tc := range tests {
str, iter, err := doTransform(tc)
mi := tc.wantIter != 0 && tc.wantIter != iter
if str != tc.wantStr || err != tc.wantErr || mi {
t.Errorf("%s:\ngot iter:%d, %q, %v\nwant iter:%d, %q, %v", tc, iter, str, err, tc.wantIter, tc.wantStr, tc.wantErr)
}
break
}
}
func TestRemoveFunc(t *testing.T) {
filter := RemoveFunc(func(r rune) bool {
return strings.IndexRune("ab\u0300\u1234,", r) != -1
})
tests := []testCase{
{
src: ",",
wantStr: "",
},
{
src: "c",
wantStr: "c",
},
{
src: "\u2345",
wantStr: "\u2345",
},
{
src: "tschüß",
wantStr: "tschüß",
},
{
src: ",до,свидания,",
wantStr: "досвидания",
},
{
src: "a\xbd\xb2=\xbc ⌘",
wantStr: "\uFFFD\uFFFD=\uFFFD ⌘",
},
{
// If we didn't replace illegal bytes with RuneError, the result
// would be \u0300 or the code would need to be more complex.
src: "\xcc\u0300\x80",
wantStr: "\uFFFD\uFFFD",
},
{
src: "\xcc\u0300\x80",
dstSize: 3,
wantStr: "\uFFFD\uFFFD",
wantIter: 2,
},
{
// Test a long buffer greater than the internal buffer size
src: "hello\xcc\xcc\xccworld",
srcSize: 13,
wantStr: "hello\uFFFD\uFFFD\uFFFDworld",
wantIter: 1,
},
{
src: "\u2345",
dstSize: 2,
wantStr: "",
wantErr: ErrShortDst,
},
{
src: "\xcc",
dstSize: 2,
wantStr: "",
wantErr: ErrShortDst,
},
{
src: "\u0300",
dstSize: 2,
srcSize: 1,
wantStr: "",
wantErr: ErrShortSrc,
},
{
t: RemoveFunc(func(r rune) bool {
return r == utf8.RuneError
}),
src: "\xcc\u0300\x80",
wantStr: "\u0300",
},
}
for _, tc := range tests {
tc.desc = tc.src
if tc.t == nil {
tc.t = filter
}
if tc.dstSize == 0 {
tc.dstSize = 100
}
if tc.srcSize == 0 {
tc.srcSize = 100
}
str, iter, err := doTransform(tc)
mi := tc.wantIter != 0 && tc.wantIter != iter
if str != tc.wantStr || err != tc.wantErr || mi {
t.Errorf("%+q:\ngot iter:%d, %+q, %v\nwant iter:%d, %+q, %v", tc.src, iter, str, err, tc.wantIter, tc.wantStr, tc.wantErr)
}
tc.src = str
idem, _, _ := doTransform(tc)
if str != idem {
t.Errorf("%+q: found %+q; want %+q", tc.src, idem, str)
}
}
}
func testString(t *testing.T, f func(Transformer, string) (string, int, error)) {
for _, tt := range append(testCases, chainTests()...) {
if tt.desc == "allowStutter = true" {
// We don't have control over the buffer size, so we eliminate tests
// that depend on a specific buffer size being set.
continue
}
if tt.wantErr == ErrShortDst || tt.wantErr == ErrShortSrc {
// The result string will be different.
continue
}
testtext.Run(t, tt.desc, func(t *testing.T) {
got, n, err := f(tt.t, tt.src)
if tt.wantErr != err {
t.Errorf("error: got %v; want %v", err, tt.wantErr)
}
// Check that err == nil implies that n == len(tt.src). Note that vice
// versa isn't necessarily true.
if err == nil && n != len(tt.src) {
t.Errorf("err == nil: got %d bytes, want %d", n, err)
}
if got != tt.wantStr {
t.Errorf("string: got %q; want %q", got, tt.wantStr)
}
})
}
}
func TestBytes(t *testing.T) {
testString(t, func(z Transformer, s string) (string, int, error) {
b, n, err := Bytes(z, []byte(s))
return string(b), n, err
})
}
func TestAppend(t *testing.T) {
// Create a bunch of subtests for different buffer sizes.
testCases := [][]byte{
nil,
make([]byte, 0, 0),
make([]byte, 0, 1),
make([]byte, 1, 1),
make([]byte, 1, 5),
make([]byte, 100, 100),
make([]byte, 100, 200),
}
for _, tc := range testCases {
testString(t, func(z Transformer, s string) (string, int, error) {
b, n, err := Append(z, tc, []byte(s))
return string(b[len(tc):]), n, err
})
}
}
func TestString(t *testing.T) {
testtext.Run(t, "transform", func(t *testing.T) { testString(t, String) })
// Overrun the internal destination buffer.
for i, s := range []string{
aaa[:1*initialBufSize-1],
aaa[:1*initialBufSize+0],
aaa[:1*initialBufSize+1],
AAA[:1*initialBufSize-1],
AAA[:1*initialBufSize+0],
AAA[:1*initialBufSize+1],
AAA[:2*initialBufSize-1],
AAA[:2*initialBufSize+0],
AAA[:2*initialBufSize+1],
aaa[:1*initialBufSize-2] + "A",
aaa[:1*initialBufSize-1] + "A",
aaa[:1*initialBufSize+0] + "A",
aaa[:1*initialBufSize+1] + "A",
} {
testtext.Run(t, fmt.Sprint("dst buffer test using lower/", i), func(t *testing.T) {
got, _, _ := String(lowerCaseASCII{}, s)
if want := strings.ToLower(s); got != want {
t.Errorf("got %s (%d); want %s (%d)", got, len(got), want, len(want))
}
})
}
// Overrun the internal source buffer.
for i, s := range []string{
aaa[:1*initialBufSize-1],
aaa[:1*initialBufSize+0],
aaa[:1*initialBufSize+1],
aaa[:2*initialBufSize+1],
aaa[:2*initialBufSize+0],
aaa[:2*initialBufSize+1],
} {
testtext.Run(t, fmt.Sprint("src buffer test using rleEncode/", i), func(t *testing.T) {
got, _, _ := String(rleEncode{}, s)
if want := fmt.Sprintf("%da", len(s)); got != want {
t.Errorf("got %s (%d); want %s (%d)", got, len(got), want, len(want))
}
})
}
// Test allocations for non-changing strings.
// Note we still need to allocate a single buffer.
for i, s := range []string{
"",
"123456789",
aaa[:initialBufSize-1],
aaa[:initialBufSize+0],
aaa[:initialBufSize+1],
aaa[:10*initialBufSize],
} {
testtext.Run(t, fmt.Sprint("alloc/", i), func(t *testing.T) {
if n := testtext.AllocsPerRun(5, func() { String(&lowerCaseASCIILookahead{}, s) }); n > 1 {
t.Errorf("#allocs was %f; want 1", n)
}
})
}
}
// TestBytesAllocation tests that buffer growth stays limited with the trickler
// transformer, which behaves oddly but within spec. In case buffer growth is
// not correctly handled, the test will either panic with a failed allocation or
// thrash. To ensure the tests terminate under the last condition, we time out
// after some sufficiently long period of time.
func TestBytesAllocation(t *testing.T) {
done := make(chan bool)
go func() {
in := bytes.Repeat([]byte{'a'}, 1000)
tr := trickler(make([]byte, 1))
Bytes(&tr, in)
done <- true
}()
select {
case <-done:
case <-time.After(3 * time.Second):
t.Error("time out, likely due to excessive allocation")
}
}
// TestStringAllocation tests that buffer growth stays limited with the trickler
// transformer, which behaves oddly but within spec. In case buffer growth is
// not correctly handled, the test will either panic with a failed allocation or
// thrash. To ensure the tests terminate under the last condition, we time out
// after some sufficiently long period of time.
func TestStringAllocation(t *testing.T) {
done := make(chan bool)
go func() {
tr := trickler(make([]byte, 1))
String(&tr, aaa[:1000])
done <- true
}()
select {
case <-done:
case <-time.After(3 * time.Second):
t.Error("time out, likely due to excessive allocation")
}
}
func BenchmarkStringLowerEmpty(b *testing.B) {
for i := 0; i < b.N; i++ {
String(&lowerCaseASCIILookahead{}, "")
}
}
func BenchmarkStringLowerIdentical(b *testing.B) {
for i := 0; i < b.N; i++ {
String(&lowerCaseASCIILookahead{}, aaa[:4096])
}
}
func BenchmarkStringLowerChanged(b *testing.B) {
for i := 0; i < b.N; i++ {
String(&lowerCaseASCIILookahead{}, AAA[:4096])
}
}
var (
aaa = strings.Repeat("a", 4096)
AAA = strings.Repeat("A", 4096)
)
type badTransformer struct{}
func (bt badTransformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
return 0, 0, ErrShortSrc
}
func (bt badTransformer) Reset() {}
func TestBadTransformer(t *testing.T) {
bt := badTransformer{}
if _, _, err := String(bt, "aaa"); err != ErrShortSrc {
t.Errorf("String expected ErrShortSrc, got nil")
}
if _, _, err := Bytes(bt, []byte("aaa")); err != ErrShortSrc {
t.Errorf("Bytes expected ErrShortSrc, got nil")
}
r := NewReader(bytes.NewReader([]byte("aaa")), bt)
var bytes []byte
if _, err := r.Read(bytes); err != ErrShortSrc {
t.Errorf("NewReader Read expected ErrShortSrc, got nil")
}
}