blob: cd6158e5ead6d71d21da5478b77a7842eefd1251 [file] [log] [blame]
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package utf32
import (
"testing"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/internal/enctest"
"golang.org/x/text/transform"
)
var (
utf32LEIB = UTF32(LittleEndian, IgnoreBOM) // UTF-32LE (atypical interpretation)
utf32LEUB = UTF32(LittleEndian, UseBOM) // UTF-32, LE
// utf32LEEB = UTF32(LittleEndian, ExpectBOM) // UTF-32, LE, Expect - covered in encoding_test.go
utf32BEIB = UTF32(BigEndian, IgnoreBOM) // UTF-32BE (atypical interpretation)
utf32BEUB = UTF32(BigEndian, UseBOM) // UTF-32 default
utf32BEEB = UTF32(BigEndian, ExpectBOM) // UTF-32 Expect
)
func TestBasics(t *testing.T) {
testCases := []struct {
e encoding.Encoding
encPrefix string
encSuffix string
encoded string
utf8 string
}{{
e: utf32BEIB,
encoded: "\x00\x00\x00\x57\x00\x00\x00\xe4\x00\x01\xd5\x65",
utf8: "\x57\u00e4\U0001d565",
}, {
e: UTF32(BigEndian, ExpectBOM),
encPrefix: "\x00\x00\xfe\xff",
encoded: "\x00\x00\x00\x57\x00\x00\x00\xe4\x00\x01\xd5\x65",
utf8: "\x57\u00e4\U0001d565",
}, {
e: UTF32(LittleEndian, IgnoreBOM),
encoded: "\x57\x00\x00\x00\xe4\x00\x00\x00\x65\xd5\x01\x00",
utf8: "\x57\u00e4\U0001d565",
}, {
e: UTF32(LittleEndian, ExpectBOM),
encPrefix: "\xff\xfe\x00\x00",
encoded: "\x57\x00\x00\x00\xe4\x00\x00\x00\x65\xd5\x01\x00",
utf8: "\x57\u00e4\U0001d565",
}}
for _, tc := range testCases {
enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, tc.encPrefix, tc.encSuffix)
}
}
func TestFiles(t *testing.T) { enctest.TestFile(t, utf32BEIB) }
func BenchmarkEncoding(b *testing.B) { enctest.Benchmark(b, utf32BEIB) }
func TestUTF32(t *testing.T) {
testCases := []struct {
desc string
src string
notEOF bool // the inverse of atEOF
sizeDst int
want string
nSrc int
err error
t transform.Transformer
}{{
desc: "utf-32 IgnoreBOM dec: empty string",
t: utf32BEIB.NewDecoder(),
}, {
desc: "utf-32 UseBOM dec: empty string",
t: utf32BEUB.NewDecoder(),
}, {
desc: "utf-32 ExpectBOM dec: empty string",
err: ErrMissingBOM,
t: utf32BEEB.NewDecoder(),
}, {
desc: "utf-32be dec: Doesn't interpret U+FEFF as BOM",
src: "\x00\x00\xFE\xFF\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
sizeDst: 100,
want: "\uFEFF\U00012345=Ra",
nSrc: 20,
t: utf32BEIB.NewDecoder(),
}, {
desc: "utf-32be dec: Interprets little endian U+FEFF as invalid",
src: "\xFF\xFE\x00\x00\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
sizeDst: 100,
want: "\uFFFD\U00012345=Ra",
nSrc: 20,
t: utf32BEIB.NewDecoder(),
}, {
desc: "utf-32le dec: Doesn't interpret U+FEFF as BOM",
src: "\xFF\xFE\x00\x00\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00",
sizeDst: 100,
want: "\uFEFF\U00012345=Ra",
nSrc: 20,
t: utf32LEIB.NewDecoder(),
}, {
desc: "utf-32le dec: Interprets big endian U+FEFF as invalid",
src: "\x00\x00\xFE\xFF\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00",
sizeDst: 100,
want: "\uFFFD\U00012345=Ra",
nSrc: 20,
t: utf32LEIB.NewDecoder(),
}, {
desc: "utf-32 enc: Writes big-endian BOM",
src: "\U00012345=Ra",
sizeDst: 100,
want: "\x00\x00\xFE\xFF\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
nSrc: 7,
t: utf32BEUB.NewEncoder(),
}, {
desc: "utf-32 enc: Writes little-endian BOM",
src: "\U00012345=Ra",
sizeDst: 100,
want: "\xFF\xFE\x00\x00\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00",
nSrc: 7,
t: utf32LEUB.NewEncoder(),
}, {
desc: "utf-32 dec: Interprets text using big-endian default when BOM not present",
src: "\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
sizeDst: 100,
want: "\U00012345=Ra",
nSrc: 16,
t: utf32BEUB.NewDecoder(),
}, {
desc: "utf-32 dec: Interprets text using little-endian default when BOM not present",
src: "\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00",
sizeDst: 100,
want: "\U00012345=Ra",
nSrc: 16,
t: utf32LEUB.NewDecoder(),
}, {
desc: "utf-32 dec: BOM determines encoding BE",
src: "\x00\x00\xFE\xFF\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
sizeDst: 100,
want: "\U00012345=Ra",
nSrc: 20,
t: utf32BEUB.NewDecoder(),
}, {
desc: "utf-32 dec: BOM determines encoding LE",
src: "\xFF\xFE\x00\x00\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00",
sizeDst: 100,
want: "\U00012345=Ra",
nSrc: 20,
t: utf32LEUB.NewDecoder(),
}, {
desc: "utf-32 dec: BOM determines encoding LE, change default",
src: "\xFF\xFE\x00\x00\x45\x23\x01\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00",
sizeDst: 100,
want: "\U00012345=Ra",
nSrc: 20,
t: utf32BEUB.NewDecoder(),
}, {
desc: "utf-32 dec: BOM determines encoding BE, change default",
src: "\x00\x00\xFE\xFF\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
sizeDst: 100,
want: "\U00012345=Ra",
nSrc: 20,
t: utf32LEUB.NewDecoder(),
}, {
desc: "utf-32 dec: Don't change big-endian byte order mid-stream",
src: "\x00\x01\x23\x45\x00\x00\x00\x3D\xFF\xFE\x00\x00\x00\x00\xFE\xFF\x00\x00\x00\x52\x00\x00\x00\x61",
sizeDst: 100,
want: "\U00012345=\uFFFD\uFEFFRa",
nSrc: 24,
t: utf32BEUB.NewDecoder(),
}, {
desc: "utf-32 dec: Don't change little-endian byte order mid-stream",
src: "\x45\x23\x01\x00\x3D\x00\x00\x00\x00\x00\xFE\xFF\xFF\xFE\x00\x00\x52\x00\x00\x00\x61\x00\x00\x00",
sizeDst: 100,
want: "\U00012345=\uFFFD\uFEFFRa",
nSrc: 24,
t: utf32LEUB.NewDecoder(),
}, {
desc: "utf-32 dec: Fail on missing BOM when required",
src: "\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
sizeDst: 100,
want: "",
nSrc: 0,
err: ErrMissingBOM,
t: utf32BEEB.NewDecoder(),
}, {
desc: "utf-32 enc: Short dst",
src: "\U00012345=Ra",
sizeDst: 15,
want: "\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52",
nSrc: 6,
err: transform.ErrShortDst,
t: utf32BEIB.NewEncoder(),
}, {
desc: "utf-32 enc: Short src",
src: "\U00012345=Ra\xC2",
notEOF: true,
sizeDst: 100,
want: "\x00\x01\x23\x45\x00\x00\x00\x3D\x00\x00\x00\x52\x00\x00\x00\x61",
nSrc: 7,
err: transform.ErrShortSrc,
t: utf32BEIB.NewEncoder(),
}, {
desc: "utf-32 enc: Invalid input",
src: "\x80\xC1\xC2\x7F\xC2",
sizeDst: 100,
want: "\x00\x00\xFF\xFD\x00\x00\xFF\xFD\x00\x00\xFF\xFD\x00\x00\x00\x7F\x00\x00\xFF\xFD",
nSrc: 5,
t: utf32BEIB.NewEncoder(),
}, {
desc: "utf-32 dec: Short dst",
src: "\x00\x00\x00\x41",
sizeDst: 0,
want: "",
nSrc: 0,
err: transform.ErrShortDst,
t: utf32BEIB.NewDecoder(),
}, {
desc: "utf-32 dec: Short src",
src: "\x00\x00\x00",
notEOF: true,
sizeDst: 4,
want: "",
nSrc: 0,
err: transform.ErrShortSrc,
t: utf32BEIB.NewDecoder(),
}, {
desc: "utf-32 dec: Invalid input",
src: "\x00\x00\xD8\x00\x00\x00\xDF\xFF\x00\x11\x00\x00\x00\x00\x00",
sizeDst: 100,
want: "\uFFFD\uFFFD\uFFFD\uFFFD",
nSrc: 15,
t: utf32BEIB.NewDecoder(),
}}
for i, tc := range testCases {
b := make([]byte, tc.sizeDst)
nDst, nSrc, err := tc.t.Transform(b, []byte(tc.src), !tc.notEOF)
if err != tc.err {
t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
}
if got := string(b[:nDst]); got != tc.want {
t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
}
if nSrc != tc.nSrc {
t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)
}
}
}