| // Copyright 2013 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package transform |
| |
| import ( |
| "bytes" |
| "errors" |
| "fmt" |
| "io/ioutil" |
| "strconv" |
| "strings" |
| "testing" |
| "time" |
| "unicode/utf8" |
| |
| "golang.org/x/text/internal/testtext" |
| ) |
| |
| type lowerCaseASCII struct{ NopResetter } |
| |
| func (lowerCaseASCII) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { |
| n := len(src) |
| if n > len(dst) { |
| n, err = len(dst), ErrShortDst |
| } |
| for i, c := range src[:n] { |
| if 'A' <= c && c <= 'Z' { |
| c += 'a' - 'A' |
| } |
| dst[i] = c |
| } |
| return n, n, err |
| } |
| |
| // lowerCaseASCIILookahead lowercases the string and reports ErrShortSrc as long |
| // as the input is not atEOF. |
| type lowerCaseASCIILookahead struct{ NopResetter } |
| |
| func (lowerCaseASCIILookahead) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { |
| n := len(src) |
| if n > len(dst) { |
| n, err = len(dst), ErrShortDst |
| } |
| for i, c := range src[:n] { |
| if 'A' <= c && c <= 'Z' { |
| c += 'a' - 'A' |
| } |
| dst[i] = c |
| } |
| if !atEOF { |
| err = ErrShortSrc |
| } |
| return n, n, err |
| } |
| |
| var errYouMentionedX = errors.New("you mentioned X") |
| |
| type dontMentionX struct{ NopResetter } |
| |
| func (dontMentionX) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { |
| n := len(src) |
| if n > len(dst) { |
| n, err = len(dst), ErrShortDst |
| } |
| for i, c := range src[:n] { |
| if c == 'X' { |
| return i, i, errYouMentionedX |
| } |
| dst[i] = c |
| } |
| return n, n, err |
| } |
| |
| var errAtEnd = errors.New("error after all text") |
| |
| type errorAtEnd struct{ NopResetter } |
| |
| func (errorAtEnd) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { |
| n := copy(dst, src) |
| if n < len(src) { |
| return n, n, ErrShortDst |
| } |
| if atEOF { |
| return n, n, errAtEnd |
| } |
| return n, n, nil |
| } |
| |
| type replaceWithConstant struct { |
| replacement string |
| written int |
| } |
| |
| func (t *replaceWithConstant) Reset() { |
| t.written = 0 |
| } |
| |
| func (t *replaceWithConstant) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { |
| if atEOF { |
| nDst = copy(dst, t.replacement[t.written:]) |
| t.written += nDst |
| if t.written < len(t.replacement) { |
| err = ErrShortDst |
| } |
| } |
| return nDst, len(src), err |
| } |
| |
| type addAnXAtTheEnd struct{ NopResetter } |
| |
| func (addAnXAtTheEnd) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { |
| n := copy(dst, src) |
| if n < len(src) { |
| return n, n, ErrShortDst |
| } |
| if !atEOF { |
| return n, n, nil |
| } |
| if len(dst) == n { |
| return n, n, ErrShortDst |
| } |
| dst[n] = 'X' |
| return n + 1, n, nil |
| } |
| |
| // doublerAtEOF is a strange Transformer that transforms "this" to "tthhiiss", |
| // but only if atEOF is true. |
| type doublerAtEOF struct{ NopResetter } |
| |
| func (doublerAtEOF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { |
| if !atEOF { |
| return 0, 0, ErrShortSrc |
| } |
| for i, c := range src { |
| if 2*i+2 >= len(dst) { |
| return 2 * i, i, ErrShortDst |
| } |
| dst[2*i+0] = c |
| dst[2*i+1] = c |
| } |
| return 2 * len(src), len(src), nil |
| } |
| |
| // rleDecode and rleEncode implement a toy run-length encoding: "aabbbbbbbbbb" |
| // is encoded as "2a10b". The decoding is assumed to not contain any numbers. |
| |
| type rleDecode struct{ NopResetter } |
| |
| func (rleDecode) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { |
| loop: |
| for len(src) > 0 { |
| n := 0 |
| for i, c := range src { |
| if '0' <= c && c <= '9' { |
| n = 10*n + int(c-'0') |
| continue |
| } |
| if i == 0 { |
| return nDst, nSrc, errors.New("rleDecode: bad input") |
| } |
| if n > len(dst) { |
| return nDst, nSrc, ErrShortDst |
| } |
| for j := 0; j < n; j++ { |
| dst[j] = c |
| } |
| dst, src = dst[n:], src[i+1:] |
| nDst, nSrc = nDst+n, nSrc+i+1 |
| continue loop |
| } |
| if atEOF { |
| return nDst, nSrc, errors.New("rleDecode: bad input") |
| } |
| return nDst, nSrc, ErrShortSrc |
| } |
| return nDst, nSrc, nil |
| } |
| |
| type rleEncode struct { |
| NopResetter |
| |
| // allowStutter means that "xxxxxxxx" can be encoded as "5x3x" |
| // instead of always as "8x". |
| allowStutter bool |
| } |
| |
| func (e rleEncode) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { |
| for len(src) > 0 { |
| n, c0 := len(src), src[0] |
| for i, c := range src[1:] { |
| if c != c0 { |
| n = i + 1 |
| break |
| } |
| } |
| if n == len(src) && !atEOF && !e.allowStutter { |
| return nDst, nSrc, ErrShortSrc |
| } |
| s := strconv.Itoa(n) |
| if len(s) >= len(dst) { |
| return nDst, nSrc, ErrShortDst |
| } |
| copy(dst, s) |
| dst[len(s)] = c0 |
| dst, src = dst[len(s)+1:], src[n:] |
| nDst, nSrc = nDst+len(s)+1, nSrc+n |
| } |
| return nDst, nSrc, nil |
| } |
| |
| // trickler consumes all input bytes, but writes a single byte at a time to dst. |
| type trickler []byte |
| |
| func (t *trickler) Reset() { |
| *t = nil |
| } |
| |
| func (t *trickler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { |
| *t = append(*t, src...) |
| if len(*t) == 0 { |
| return 0, 0, nil |
| } |
| if len(dst) == 0 { |
| return 0, len(src), ErrShortDst |
| } |
| dst[0] = (*t)[0] |
| *t = (*t)[1:] |
| if len(*t) > 0 { |
| err = ErrShortDst |
| } |
| return 1, len(src), err |
| } |
| |
| // delayedTrickler is like trickler, but delays writing output to dst. This is |
| // highly unlikely to be relevant in practice, but it seems like a good idea |
| // to have some tolerance as long as progress can be detected. |
| type delayedTrickler []byte |
| |
| func (t *delayedTrickler) Reset() { |
| *t = nil |
| } |
| |
| func (t *delayedTrickler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { |
| if len(*t) > 0 && len(dst) > 0 { |
| dst[0] = (*t)[0] |
| *t = (*t)[1:] |
| nDst = 1 |
| } |
| *t = append(*t, src...) |
| if len(*t) > 0 { |
| err = ErrShortDst |
| } |
| return nDst, len(src), err |
| } |
| |
| type testCase struct { |
| desc string |
| t Transformer |
| src string |
| dstSize int |
| srcSize int |
| ioSize int |
| wantStr string |
| wantErr error |
| wantIter int // number of iterations taken; 0 means we don't care. |
| } |
| |
| func (t testCase) String() string { |
| return tstr(t.t) + "; " + t.desc |
| } |
| |
| func tstr(t Transformer) string { |
| if stringer, ok := t.(fmt.Stringer); ok { |
| return stringer.String() |
| } |
| s := fmt.Sprintf("%T", t) |
| return s[1+strings.Index(s, "."):] |
| } |
| |
| func (c chain) String() string { |
| buf := &bytes.Buffer{} |
| buf.WriteString("Chain(") |
| for i, l := range c.link[:len(c.link)-1] { |
| if i != 0 { |
| fmt.Fprint(buf, ", ") |
| } |
| buf.WriteString(tstr(l.t)) |
| } |
| buf.WriteString(")") |
| return buf.String() |
| } |
| |
| var testCases = []testCase{ |
| { |
| desc: "empty", |
| t: lowerCaseASCII{}, |
| src: "", |
| dstSize: 100, |
| srcSize: 100, |
| wantStr: "", |
| }, |
| |
| { |
| desc: "basic", |
| t: lowerCaseASCII{}, |
| src: "Hello WORLD.", |
| dstSize: 100, |
| srcSize: 100, |
| wantStr: "hello world.", |
| }, |
| |
| { |
| desc: "small dst", |
| t: lowerCaseASCII{}, |
| src: "Hello WORLD.", |
| dstSize: 3, |
| srcSize: 100, |
| wantStr: "hello world.", |
| }, |
| |
| { |
| desc: "small src", |
| t: lowerCaseASCII{}, |
| src: "Hello WORLD.", |
| dstSize: 100, |
| srcSize: 4, |
| wantStr: "hello world.", |
| }, |
| |
| { |
| desc: "small buffers", |
| t: lowerCaseASCII{}, |
| src: "Hello WORLD.", |
| dstSize: 3, |
| srcSize: 4, |
| wantStr: "hello world.", |
| }, |
| |
| { |
| desc: "very small buffers", |
| t: lowerCaseASCII{}, |
| src: "Hello WORLD.", |
| dstSize: 1, |
| srcSize: 1, |
| wantStr: "hello world.", |
| }, |
| |
| { |
| desc: "small dst with lookahead", |
| t: lowerCaseASCIILookahead{}, |
| src: "Hello WORLD.", |
| dstSize: 3, |
| srcSize: 100, |
| wantStr: "hello world.", |
| }, |
| |
| { |
| desc: "small src with lookahead", |
| t: lowerCaseASCIILookahead{}, |
| src: "Hello WORLD.", |
| dstSize: 100, |
| srcSize: 4, |
| wantStr: "hello world.", |
| }, |
| |
| { |
| desc: "small buffers with lookahead", |
| t: lowerCaseASCIILookahead{}, |
| src: "Hello WORLD.", |
| dstSize: 3, |
| srcSize: 4, |
| wantStr: "hello world.", |
| }, |
| |
| { |
| desc: "very small buffers with lookahead", |
| t: lowerCaseASCIILookahead{}, |
| src: "Hello WORLD.", |
| dstSize: 1, |
| srcSize: 2, |
| wantStr: "hello world.", |
| }, |
| |
| { |
| desc: "user error", |
| t: dontMentionX{}, |
| src: "The First Rule of Transform Club: don't mention Mister X, ever.", |
| dstSize: 100, |
| srcSize: 100, |
| wantStr: "The First Rule of Transform Club: don't mention Mister ", |
| wantErr: errYouMentionedX, |
| }, |
| |
| { |
| desc: "user error at end", |
| t: errorAtEnd{}, |
| src: "All goes well until it doesn't.", |
| dstSize: 100, |
| srcSize: 100, |
| wantStr: "All goes well until it doesn't.", |
| wantErr: errAtEnd, |
| }, |
| |
| { |
| desc: "user error at end, incremental", |
| t: errorAtEnd{}, |
| src: "All goes well until it doesn't.", |
| dstSize: 10, |
| srcSize: 10, |
| wantStr: "All goes well until it doesn't.", |
| wantErr: errAtEnd, |
| }, |
| |
| { |
| desc: "replace entire non-empty string with one byte", |
| t: &replaceWithConstant{replacement: "X"}, |
| src: "none of this will be copied", |
| dstSize: 1, |
| srcSize: 10, |
| wantStr: "X", |
| }, |
| |
| { |
| desc: "replace entire empty string with one byte", |
| t: &replaceWithConstant{replacement: "X"}, |
| src: "", |
| dstSize: 1, |
| srcSize: 10, |
| wantStr: "X", |
| }, |
| |
| { |
| desc: "replace entire empty string with seven bytes", |
| t: &replaceWithConstant{replacement: "ABCDEFG"}, |
| src: "", |
| dstSize: 3, |
| srcSize: 10, |
| wantStr: "ABCDEFG", |
| }, |
| |
| { |
| desc: "add an X (initialBufSize-1)", |
| t: addAnXAtTheEnd{}, |
| src: aaa[:initialBufSize-1], |
| dstSize: 10, |
| srcSize: 10, |
| wantStr: aaa[:initialBufSize-1] + "X", |
| }, |
| |
| { |
| desc: "add an X (initialBufSize+0)", |
| t: addAnXAtTheEnd{}, |
| src: aaa[:initialBufSize+0], |
| dstSize: 10, |
| srcSize: 10, |
| wantStr: aaa[:initialBufSize+0] + "X", |
| }, |
| |
| { |
| desc: "add an X (initialBufSize+1)", |
| t: addAnXAtTheEnd{}, |
| src: aaa[:initialBufSize+1], |
| dstSize: 10, |
| srcSize: 10, |
| wantStr: aaa[:initialBufSize+1] + "X", |
| }, |
| |
| { |
| desc: "small buffers", |
| t: dontMentionX{}, |
| src: "The First Rule of Transform Club: don't mention Mister X, ever.", |
| dstSize: 10, |
| srcSize: 10, |
| wantStr: "The First Rule of Transform Club: don't mention Mister ", |
| wantErr: errYouMentionedX, |
| }, |
| |
| { |
| desc: "very small buffers", |
| t: dontMentionX{}, |
| src: "The First Rule of Transform Club: don't mention Mister X, ever.", |
| dstSize: 1, |
| srcSize: 1, |
| wantStr: "The First Rule of Transform Club: don't mention Mister ", |
| wantErr: errYouMentionedX, |
| }, |
| |
| { |
| desc: "only transform at EOF", |
| t: doublerAtEOF{}, |
| src: "this", |
| dstSize: 100, |
| srcSize: 100, |
| wantStr: "tthhiiss", |
| }, |
| |
| { |
| desc: "basic", |
| t: rleDecode{}, |
| src: "1a2b3c10d11e0f1g", |
| dstSize: 100, |
| srcSize: 100, |
| wantStr: "abbcccddddddddddeeeeeeeeeeeg", |
| }, |
| |
| { |
| desc: "long", |
| t: rleDecode{}, |
| src: "12a23b34c45d56e99z", |
| dstSize: 100, |
| srcSize: 100, |
| wantStr: strings.Repeat("a", 12) + |
| strings.Repeat("b", 23) + |
| strings.Repeat("c", 34) + |
| strings.Repeat("d", 45) + |
| strings.Repeat("e", 56) + |
| strings.Repeat("z", 99), |
| }, |
| |
| { |
| desc: "tight buffers", |
| t: rleDecode{}, |
| src: "1a2b3c10d11e0f1g", |
| dstSize: 11, |
| srcSize: 3, |
| wantStr: "abbcccddddddddddeeeeeeeeeeeg", |
| }, |
| |
| { |
| desc: "short dst", |
| t: rleDecode{}, |
| src: "1a2b3c10d11e0f1g", |
| dstSize: 10, |
| srcSize: 3, |
| wantStr: "abbcccdddddddddd", |
| wantErr: ErrShortDst, |
| }, |
| |
| { |
| desc: "short src", |
| t: rleDecode{}, |
| src: "1a2b3c10d11e0f1g", |
| dstSize: 11, |
| srcSize: 2, |
| ioSize: 2, |
| wantStr: "abbccc", |
| wantErr: ErrShortSrc, |
| }, |
| |
| { |
| desc: "basic", |
| t: rleEncode{}, |
| src: "abbcccddddddddddeeeeeeeeeeeg", |
| dstSize: 100, |
| srcSize: 100, |
| wantStr: "1a2b3c10d11e1g", |
| }, |
| |
| { |
| desc: "long", |
| t: rleEncode{}, |
| src: strings.Repeat("a", 12) + |
| strings.Repeat("b", 23) + |
| strings.Repeat("c", 34) + |
| strings.Repeat("d", 45) + |
| strings.Repeat("e", 56) + |
| strings.Repeat("z", 99), |
| dstSize: 100, |
| srcSize: 100, |
| wantStr: "12a23b34c45d56e99z", |
| }, |
| |
| { |
| desc: "tight buffers", |
| t: rleEncode{}, |
| src: "abbcccddddddddddeeeeeeeeeeeg", |
| dstSize: 3, |
| srcSize: 12, |
| wantStr: "1a2b3c10d11e1g", |
| }, |
| |
| { |
| desc: "short dst", |
| t: rleEncode{}, |
| src: "abbcccddddddddddeeeeeeeeeeeg", |
| dstSize: 2, |
| srcSize: 12, |
| wantStr: "1a2b3c", |
| wantErr: ErrShortDst, |
| }, |
| |
| { |
| desc: "short src", |
| t: rleEncode{}, |
| src: "abbcccddddddddddeeeeeeeeeeeg", |
| dstSize: 3, |
| srcSize: 11, |
| ioSize: 11, |
| wantStr: "1a2b3c10d", |
| wantErr: ErrShortSrc, |
| }, |
| |
| { |
| desc: "allowStutter = false", |
| t: rleEncode{allowStutter: false}, |
| src: "aaaabbbbbbbbccccddddd", |
| dstSize: 10, |
| srcSize: 10, |
| wantStr: "4a8b4c5d", |
| }, |
| |
| { |
| desc: "allowStutter = true", |
| t: rleEncode{allowStutter: true}, |
| src: "aaaabbbbbbbbccccddddd", |
| dstSize: 10, |
| srcSize: 10, |
| ioSize: 10, |
| wantStr: "4a6b2b4c4d1d", |
| }, |
| |
| { |
| desc: "trickler", |
| t: &trickler{}, |
| src: "abcdefghijklm", |
| dstSize: 3, |
| srcSize: 15, |
| wantStr: "abcdefghijklm", |
| }, |
| |
| { |
| desc: "delayedTrickler", |
| t: &delayedTrickler{}, |
| src: "abcdefghijklm", |
| dstSize: 3, |
| srcSize: 15, |
| wantStr: "abcdefghijklm", |
| }, |
| } |
| |
| func TestReader(t *testing.T) { |
| for _, tc := range testCases { |
| testtext.Run(t, tc.desc, func(t *testing.T) { |
| r := NewReader(strings.NewReader(tc.src), tc.t) |
| // Differently sized dst and src buffers are not part of the |
| // exported API. We override them manually. |
| r.dst = make([]byte, tc.dstSize) |
| r.src = make([]byte, tc.srcSize) |
| got, err := ioutil.ReadAll(r) |
| str := string(got) |
| if str != tc.wantStr || err != tc.wantErr { |
| t.Errorf("\ngot %q, %v\nwant %q, %v", str, err, tc.wantStr, tc.wantErr) |
| } |
| }) |
| } |
| } |
| |
| func TestWriter(t *testing.T) { |
| tests := append(testCases, chainTests()...) |
| for _, tc := range tests { |
| sizes := []int{1, 2, 3, 4, 5, 10, 100, 1000} |
| if tc.ioSize > 0 { |
| sizes = []int{tc.ioSize} |
| } |
| for _, sz := range sizes { |
| testtext.Run(t, fmt.Sprintf("%s/%d", tc.desc, sz), func(t *testing.T) { |
| bb := &bytes.Buffer{} |
| w := NewWriter(bb, tc.t) |
| // Differently sized dst and src buffers are not part of the |
| // exported API. We override them manually. |
| w.dst = make([]byte, tc.dstSize) |
| w.src = make([]byte, tc.srcSize) |
| src := make([]byte, sz) |
| var err error |
| for b := tc.src; len(b) > 0 && err == nil; { |
| n := copy(src, b) |
| b = b[n:] |
| m := 0 |
| m, err = w.Write(src[:n]) |
| if m != n && err == nil { |
| t.Errorf("did not consume all bytes %d < %d", m, n) |
| } |
| } |
| if err == nil { |
| err = w.Close() |
| } |
| str := bb.String() |
| if str != tc.wantStr || err != tc.wantErr { |
| t.Errorf("\ngot %q, %v\nwant %q, %v", str, err, tc.wantStr, tc.wantErr) |
| } |
| }) |
| } |
| } |
| } |
| |
| func TestNop(t *testing.T) { |
| testCases := []struct { |
| str string |
| dstSize int |
| err error |
| }{ |
| {"", 0, nil}, |
| {"", 10, nil}, |
| {"a", 0, ErrShortDst}, |
| {"a", 1, nil}, |
| {"a", 10, nil}, |
| } |
| for i, tc := range testCases { |
| dst := make([]byte, tc.dstSize) |
| nDst, nSrc, err := Nop.Transform(dst, []byte(tc.str), true) |
| want := tc.str |
| if tc.dstSize < len(want) { |
| want = want[:tc.dstSize] |
| } |
| if got := string(dst[:nDst]); got != want || err != tc.err || nSrc != nDst { |
| t.Errorf("%d:\ngot %q, %d, %v\nwant %q, %d, %v", i, got, nSrc, err, want, nDst, tc.err) |
| } |
| } |
| } |
| |
| func TestDiscard(t *testing.T) { |
| testCases := []struct { |
| str string |
| dstSize int |
| }{ |
| {"", 0}, |
| {"", 10}, |
| {"a", 0}, |
| {"ab", 10}, |
| } |
| for i, tc := range testCases { |
| nDst, nSrc, err := Discard.Transform(make([]byte, tc.dstSize), []byte(tc.str), true) |
| if nDst != 0 || nSrc != len(tc.str) || err != nil { |
| t.Errorf("%d:\ngot %q, %d, %v\nwant 0, %d, nil", i, nDst, nSrc, err, len(tc.str)) |
| } |
| } |
| } |
| |
| // mkChain creates a Chain transformer. x must be alternating between transformer |
| // and bufSize, like T, (sz, T)* |
| func mkChain(x ...interface{}) *chain { |
| t := []Transformer{} |
| for i := 0; i < len(x); i += 2 { |
| t = append(t, x[i].(Transformer)) |
| } |
| c := Chain(t...).(*chain) |
| for i, j := 1, 1; i < len(x); i, j = i+2, j+1 { |
| c.link[j].b = make([]byte, x[i].(int)) |
| } |
| return c |
| } |
| |
| func chainTests() []testCase { |
| return []testCase{ |
| { |
| desc: "nil error", |
| t: mkChain(rleEncode{}, 100, lowerCaseASCII{}), |
| src: "ABB", |
| dstSize: 100, |
| srcSize: 100, |
| wantStr: "1a2b", |
| wantErr: nil, |
| wantIter: 1, |
| }, |
| |
| { |
| desc: "short dst buffer", |
| t: mkChain(lowerCaseASCII{}, 3, rleDecode{}), |
| src: "1a2b3c10d11e0f1g", |
| dstSize: 10, |
| srcSize: 3, |
| wantStr: "abbcccdddddddddd", |
| wantErr: ErrShortDst, |
| }, |
| |
| { |
| desc: "short internal dst buffer", |
| t: mkChain(lowerCaseASCII{}, 3, rleDecode{}, 10, Nop), |
| src: "1a2b3c10d11e0f1g", |
| dstSize: 100, |
| srcSize: 3, |
| wantStr: "abbcccdddddddddd", |
| wantErr: errShortInternal, |
| }, |
| |
| { |
| desc: "short internal dst buffer from input", |
| t: mkChain(rleDecode{}, 10, Nop), |
| src: "1a2b3c10d11e0f1g", |
| dstSize: 100, |
| srcSize: 3, |
| wantStr: "abbcccdddddddddd", |
| wantErr: errShortInternal, |
| }, |
| |
| { |
| desc: "empty short internal dst buffer", |
| t: mkChain(lowerCaseASCII{}, 3, rleDecode{}, 10, Nop), |
| src: "4a7b11e0f1g", |
| dstSize: 100, |
| srcSize: 3, |
| wantStr: "aaaabbbbbbb", |
| wantErr: errShortInternal, |
| }, |
| |
| { |
| desc: "empty short internal dst buffer from input", |
| t: mkChain(rleDecode{}, 10, Nop), |
| src: "4a7b11e0f1g", |
| dstSize: 100, |
| srcSize: 3, |
| wantStr: "aaaabbbbbbb", |
| wantErr: errShortInternal, |
| }, |
| |
| { |
| desc: "short internal src buffer after full dst buffer", |
| t: mkChain(Nop, 5, rleEncode{}, 10, Nop), |
| src: "cccccddddd", |
| dstSize: 100, |
| srcSize: 100, |
| wantStr: "", |
| wantErr: errShortInternal, |
| wantIter: 1, |
| }, |
| |
| { |
| desc: "short internal src buffer after short dst buffer; test lastFull", |
| t: mkChain(rleDecode{}, 5, rleEncode{}, 4, Nop), |
| src: "2a1b4c6d", |
| dstSize: 100, |
| srcSize: 100, |
| wantStr: "2a1b", |
| wantErr: errShortInternal, |
| }, |
| |
| { |
| desc: "short internal src buffer after successful complete fill", |
| t: mkChain(Nop, 3, rleDecode{}), |
| src: "123a4b", |
| dstSize: 4, |
| srcSize: 3, |
| wantStr: "", |
| wantErr: errShortInternal, |
| wantIter: 1, |
| }, |
| |
| { |
| desc: "short internal src buffer after short dst buffer; test lastFull", |
| t: mkChain(rleDecode{}, 5, rleEncode{}), |
| src: "2a1b4c6d", |
| dstSize: 4, |
| srcSize: 100, |
| wantStr: "2a1b", |
| wantErr: errShortInternal, |
| }, |
| |
| { |
| desc: "short src buffer", |
| t: mkChain(rleEncode{}, 5, Nop), |
| src: "abbcccddddeeeee", |
| dstSize: 4, |
| srcSize: 4, |
| ioSize: 4, |
| wantStr: "1a2b3c", |
| wantErr: ErrShortSrc, |
| }, |
| |
| { |
| desc: "process all in one go", |
| t: mkChain(rleEncode{}, 5, Nop), |
| src: "abbcccddddeeeeeffffff", |
| dstSize: 100, |
| srcSize: 100, |
| wantStr: "1a2b3c4d5e6f", |
| wantErr: nil, |
| wantIter: 1, |
| }, |
| |
| { |
| desc: "complete processing downstream after error", |
| t: mkChain(dontMentionX{}, 2, rleDecode{}, 5, Nop), |
| src: "3a4b5eX", |
| dstSize: 100, |
| srcSize: 100, |
| ioSize: 100, |
| wantStr: "aaabbbbeeeee", |
| wantErr: errYouMentionedX, |
| }, |
| |
| { |
| desc: "return downstream fatal errors first (followed by short dst)", |
| t: mkChain(dontMentionX{}, 8, rleDecode{}, 4, Nop), |
| src: "3a4b5eX", |
| dstSize: 100, |
| srcSize: 100, |
| ioSize: 100, |
| wantStr: "aaabbbb", |
| wantErr: errShortInternal, |
| }, |
| |
| { |
| desc: "return downstream fatal errors first (followed by short src)", |
| t: mkChain(dontMentionX{}, 5, Nop, 1, rleDecode{}), |
| src: "1a5bX", |
| dstSize: 100, |
| srcSize: 100, |
| ioSize: 100, |
| wantStr: "", |
| wantErr: errShortInternal, |
| }, |
| |
| { |
| desc: "short internal", |
| t: mkChain(Nop, 11, rleEncode{}, 3, Nop), |
| src: "abbcccddddddddddeeeeeeeeeeeg", |
| dstSize: 3, |
| srcSize: 100, |
| wantStr: "1a2b3c10d", |
| wantErr: errShortInternal, |
| }, |
| } |
| } |
| |
| func doTransform(tc testCase) (res string, iter int, err error) { |
| tc.t.Reset() |
| dst := make([]byte, tc.dstSize) |
| out, in := make([]byte, 0, 2*len(tc.src)), []byte(tc.src) |
| for { |
| iter++ |
| src, atEOF := in, true |
| if len(src) > tc.srcSize { |
| src, atEOF = src[:tc.srcSize], false |
| } |
| nDst, nSrc, err := tc.t.Transform(dst, src, atEOF) |
| out = append(out, dst[:nDst]...) |
| in = in[nSrc:] |
| switch { |
| case err == nil && len(in) != 0: |
| case err == ErrShortSrc && nSrc > 0: |
| case err == ErrShortDst && (nDst > 0 || nSrc > 0): |
| default: |
| return string(out), iter, err |
| } |
| } |
| } |
| |
| func TestChain(t *testing.T) { |
| if c, ok := Chain().(nop); !ok { |
| t.Errorf("empty chain: %v; want Nop", c) |
| } |
| |
| // Test Chain for a single Transformer. |
| for _, tc := range testCases { |
| tc.t = Chain(tc.t) |
| str, _, err := doTransform(tc) |
| if str != tc.wantStr || err != tc.wantErr { |
| t.Errorf("%s:\ngot %q, %v\nwant %q, %v", tc, str, err, tc.wantStr, tc.wantErr) |
| } |
| } |
| |
| tests := chainTests() |
| sizes := []int{1, 2, 3, 4, 5, 7, 10, 100, 1000} |
| addTest := func(tc testCase, t *chain) { |
| if t.link[0].t != tc.t && tc.wantErr == ErrShortSrc { |
| tc.wantErr = errShortInternal |
| } |
| if t.link[len(t.link)-2].t != tc.t && tc.wantErr == ErrShortDst { |
| tc.wantErr = errShortInternal |
| } |
| tc.t = t |
| tests = append(tests, tc) |
| } |
| for _, tc := range testCases { |
| for _, sz := range sizes { |
| tt := tc |
| tt.dstSize = sz |
| addTest(tt, mkChain(tc.t, tc.dstSize, Nop)) |
| addTest(tt, mkChain(tc.t, tc.dstSize, Nop, 2, Nop)) |
| addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop)) |
| if sz >= tc.dstSize && (tc.wantErr != ErrShortDst || sz == tc.dstSize) { |
| addTest(tt, mkChain(Nop, tc.srcSize, tc.t)) |
| addTest(tt, mkChain(Nop, 100, Nop, tc.srcSize, tc.t)) |
| } |
| } |
| } |
| for _, tc := range testCases { |
| tt := tc |
| tt.dstSize = 1 |
| tt.wantStr = "" |
| addTest(tt, mkChain(tc.t, tc.dstSize, Discard)) |
| addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Discard)) |
| addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, tc.dstSize, Discard)) |
| } |
| for _, tc := range testCases { |
| tt := tc |
| tt.dstSize = 100 |
| tt.wantStr = strings.Replace(tc.src, "0f", "", -1) |
| // Chain encoders and decoders. |
| if _, ok := tc.t.(rleEncode); ok && tc.wantErr == nil { |
| addTest(tt, mkChain(tc.t, tc.dstSize, Nop, 1000, rleDecode{})) |
| addTest(tt, mkChain(tc.t, tc.dstSize, Nop, tc.dstSize, rleDecode{})) |
| addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 100, rleDecode{})) |
| // decoding needs larger destinations |
| addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, rleDecode{}, 100, Nop)) |
| addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 100, rleDecode{}, 100, Nop)) |
| } else if _, ok := tc.t.(rleDecode); ok && tc.wantErr == nil { |
| // The internal buffer size may need to be the sum of the maximum segment |
| // size of the two encoders! |
| addTest(tt, mkChain(tc.t, 2*tc.dstSize, rleEncode{})) |
| addTest(tt, mkChain(tc.t, tc.dstSize, Nop, 101, rleEncode{})) |
| addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 100, rleEncode{})) |
| addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 200, rleEncode{}, 100, Nop)) |
| } |
| } |
| for _, tc := range tests { |
| str, iter, err := doTransform(tc) |
| mi := tc.wantIter != 0 && tc.wantIter != iter |
| if str != tc.wantStr || err != tc.wantErr || mi { |
| t.Errorf("%s:\ngot iter:%d, %q, %v\nwant iter:%d, %q, %v", tc, iter, str, err, tc.wantIter, tc.wantStr, tc.wantErr) |
| } |
| break |
| } |
| } |
| |
| func TestRemoveFunc(t *testing.T) { |
| filter := RemoveFunc(func(r rune) bool { |
| return strings.IndexRune("ab\u0300\u1234,", r) != -1 |
| }) |
| tests := []testCase{ |
| { |
| src: ",", |
| wantStr: "", |
| }, |
| |
| { |
| src: "c", |
| wantStr: "c", |
| }, |
| |
| { |
| src: "\u2345", |
| wantStr: "\u2345", |
| }, |
| |
| { |
| src: "tschüß", |
| wantStr: "tschüß", |
| }, |
| |
| { |
| src: ",до,свидания,", |
| wantStr: "досвидания", |
| }, |
| |
| { |
| src: "a\xbd\xb2=\xbc ⌘", |
| wantStr: "\uFFFD\uFFFD=\uFFFD ⌘", |
| }, |
| |
| { |
| // If we didn't replace illegal bytes with RuneError, the result |
| // would be \u0300 or the code would need to be more complex. |
| src: "\xcc\u0300\x80", |
| wantStr: "\uFFFD\uFFFD", |
| }, |
| |
| { |
| src: "\xcc\u0300\x80", |
| dstSize: 3, |
| wantStr: "\uFFFD\uFFFD", |
| wantIter: 2, |
| }, |
| |
| { |
| // Test a long buffer greater than the internal buffer size |
| src: "hello\xcc\xcc\xccworld", |
| srcSize: 13, |
| wantStr: "hello\uFFFD\uFFFD\uFFFDworld", |
| wantIter: 1, |
| }, |
| |
| { |
| src: "\u2345", |
| dstSize: 2, |
| wantStr: "", |
| wantErr: ErrShortDst, |
| }, |
| |
| { |
| src: "\xcc", |
| dstSize: 2, |
| wantStr: "", |
| wantErr: ErrShortDst, |
| }, |
| |
| { |
| src: "\u0300", |
| dstSize: 2, |
| srcSize: 1, |
| wantStr: "", |
| wantErr: ErrShortSrc, |
| }, |
| |
| { |
| t: RemoveFunc(func(r rune) bool { |
| return r == utf8.RuneError |
| }), |
| src: "\xcc\u0300\x80", |
| wantStr: "\u0300", |
| }, |
| } |
| |
| for _, tc := range tests { |
| tc.desc = tc.src |
| if tc.t == nil { |
| tc.t = filter |
| } |
| if tc.dstSize == 0 { |
| tc.dstSize = 100 |
| } |
| if tc.srcSize == 0 { |
| tc.srcSize = 100 |
| } |
| str, iter, err := doTransform(tc) |
| mi := tc.wantIter != 0 && tc.wantIter != iter |
| if str != tc.wantStr || err != tc.wantErr || mi { |
| t.Errorf("%+q:\ngot iter:%d, %+q, %v\nwant iter:%d, %+q, %v", tc.src, iter, str, err, tc.wantIter, tc.wantStr, tc.wantErr) |
| } |
| |
| tc.src = str |
| idem, _, _ := doTransform(tc) |
| if str != idem { |
| t.Errorf("%+q: found %+q; want %+q", tc.src, idem, str) |
| } |
| } |
| } |
| |
| func testString(t *testing.T, f func(Transformer, string) (string, int, error)) { |
| for _, tt := range append(testCases, chainTests()...) { |
| if tt.desc == "allowStutter = true" { |
| // We don't have control over the buffer size, so we eliminate tests |
| // that depend on a specific buffer size being set. |
| continue |
| } |
| if tt.wantErr == ErrShortDst || tt.wantErr == ErrShortSrc { |
| // The result string will be different. |
| continue |
| } |
| testtext.Run(t, tt.desc, func(t *testing.T) { |
| got, n, err := f(tt.t, tt.src) |
| if tt.wantErr != err { |
| t.Errorf("error: got %v; want %v", err, tt.wantErr) |
| } |
| // Check that err == nil implies that n == len(tt.src). Note that vice |
| // versa isn't necessarily true. |
| if err == nil && n != len(tt.src) { |
| t.Errorf("err == nil: got %d bytes, want %d", n, err) |
| } |
| if got != tt.wantStr { |
| t.Errorf("string: got %q; want %q", got, tt.wantStr) |
| } |
| }) |
| } |
| } |
| |
| func TestBytes(t *testing.T) { |
| testString(t, func(z Transformer, s string) (string, int, error) { |
| b, n, err := Bytes(z, []byte(s)) |
| return string(b), n, err |
| }) |
| } |
| |
| func TestAppend(t *testing.T) { |
| // Create a bunch of subtests for different buffer sizes. |
| testCases := [][]byte{ |
| nil, |
| make([]byte, 0, 0), |
| make([]byte, 0, 1), |
| make([]byte, 1, 1), |
| make([]byte, 1, 5), |
| make([]byte, 100, 100), |
| make([]byte, 100, 200), |
| } |
| for _, tc := range testCases { |
| testString(t, func(z Transformer, s string) (string, int, error) { |
| b, n, err := Append(z, tc, []byte(s)) |
| return string(b[len(tc):]), n, err |
| }) |
| } |
| } |
| |
| func TestString(t *testing.T) { |
| testtext.Run(t, "transform", func(t *testing.T) { testString(t, String) }) |
| |
| // Overrun the internal destination buffer. |
| for i, s := range []string{ |
| aaa[:1*initialBufSize-1], |
| aaa[:1*initialBufSize+0], |
| aaa[:1*initialBufSize+1], |
| AAA[:1*initialBufSize-1], |
| AAA[:1*initialBufSize+0], |
| AAA[:1*initialBufSize+1], |
| AAA[:2*initialBufSize-1], |
| AAA[:2*initialBufSize+0], |
| AAA[:2*initialBufSize+1], |
| aaa[:1*initialBufSize-2] + "A", |
| aaa[:1*initialBufSize-1] + "A", |
| aaa[:1*initialBufSize+0] + "A", |
| aaa[:1*initialBufSize+1] + "A", |
| } { |
| testtext.Run(t, fmt.Sprint("dst buffer test using lower/", i), func(t *testing.T) { |
| got, _, _ := String(lowerCaseASCII{}, s) |
| if want := strings.ToLower(s); got != want { |
| t.Errorf("got %s (%d); want %s (%d)", got, len(got), want, len(want)) |
| } |
| }) |
| } |
| |
| // Overrun the internal source buffer. |
| for i, s := range []string{ |
| aaa[:1*initialBufSize-1], |
| aaa[:1*initialBufSize+0], |
| aaa[:1*initialBufSize+1], |
| aaa[:2*initialBufSize+1], |
| aaa[:2*initialBufSize+0], |
| aaa[:2*initialBufSize+1], |
| } { |
| testtext.Run(t, fmt.Sprint("src buffer test using rleEncode/", i), func(t *testing.T) { |
| got, _, _ := String(rleEncode{}, s) |
| if want := fmt.Sprintf("%da", len(s)); got != want { |
| t.Errorf("got %s (%d); want %s (%d)", got, len(got), want, len(want)) |
| } |
| }) |
| } |
| |
| // Test allocations for non-changing strings. |
| // Note we still need to allocate a single buffer. |
| for i, s := range []string{ |
| "", |
| "123456789", |
| aaa[:initialBufSize-1], |
| aaa[:initialBufSize+0], |
| aaa[:initialBufSize+1], |
| aaa[:10*initialBufSize], |
| } { |
| testtext.Run(t, fmt.Sprint("alloc/", i), func(t *testing.T) { |
| if n := testtext.AllocsPerRun(5, func() { String(&lowerCaseASCIILookahead{}, s) }); n > 1 { |
| t.Errorf("#allocs was %f; want 1", n) |
| } |
| }) |
| } |
| } |
| |
| // TestBytesAllocation tests that buffer growth stays limited with the trickler |
| // transformer, which behaves oddly but within spec. In case buffer growth is |
| // not correctly handled, the test will either panic with a failed allocation or |
| // thrash. To ensure the tests terminate under the last condition, we time out |
| // after some sufficiently long period of time. |
| func TestBytesAllocation(t *testing.T) { |
| done := make(chan bool) |
| go func() { |
| in := bytes.Repeat([]byte{'a'}, 1000) |
| tr := trickler(make([]byte, 1)) |
| Bytes(&tr, in) |
| done <- true |
| }() |
| select { |
| case <-done: |
| case <-time.After(3 * time.Second): |
| t.Error("time out, likely due to excessive allocation") |
| } |
| } |
| |
| // TestStringAllocation tests that buffer growth stays limited with the trickler |
| // transformer, which behaves oddly but within spec. In case buffer growth is |
| // not correctly handled, the test will either panic with a failed allocation or |
| // thrash. To ensure the tests terminate under the last condition, we time out |
| // after some sufficiently long period of time. |
| func TestStringAllocation(t *testing.T) { |
| done := make(chan bool) |
| go func() { |
| tr := trickler(make([]byte, 1)) |
| String(&tr, aaa[:1000]) |
| done <- true |
| }() |
| select { |
| case <-done: |
| case <-time.After(3 * time.Second): |
| t.Error("time out, likely due to excessive allocation") |
| } |
| } |
| |
| func BenchmarkStringLowerEmpty(b *testing.B) { |
| for i := 0; i < b.N; i++ { |
| String(&lowerCaseASCIILookahead{}, "") |
| } |
| } |
| |
| func BenchmarkStringLowerIdentical(b *testing.B) { |
| for i := 0; i < b.N; i++ { |
| String(&lowerCaseASCIILookahead{}, aaa[:4096]) |
| } |
| } |
| |
| func BenchmarkStringLowerChanged(b *testing.B) { |
| for i := 0; i < b.N; i++ { |
| String(&lowerCaseASCIILookahead{}, AAA[:4096]) |
| } |
| } |
| |
| var ( |
| aaa = strings.Repeat("a", 4096) |
| AAA = strings.Repeat("A", 4096) |
| ) |
| |
| type badTransformer struct{} |
| |
| func (bt badTransformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { |
| return 0, 0, ErrShortSrc |
| } |
| |
| func (bt badTransformer) Reset() {} |
| |
| func TestBadTransformer(t *testing.T) { |
| bt := badTransformer{} |
| if _, _, err := String(bt, "aaa"); err != ErrShortSrc { |
| t.Errorf("String expected ErrShortSrc, got nil") |
| } |
| if _, _, err := Bytes(bt, []byte("aaa")); err != ErrShortSrc { |
| t.Errorf("Bytes expected ErrShortSrc, got nil") |
| } |
| r := NewReader(bytes.NewReader([]byte("aaa")), bt) |
| var bytes []byte |
| if _, err := r.Read(bytes); err != ErrShortSrc { |
| t.Errorf("NewReader Read expected ErrShortSrc, got nil") |
| } |
| } |