encoding/traditionalchinese: make Big5 replace with FFFD on error
Updates golang/go#18898
Change-Id: If234aa5fdc35daf5ab02f49400462aa0c1ffa5ea
Reviewed-on: https://go-review.googlesource.com/37325
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/encoding/traditionalchinese/all_test.go b/encoding/traditionalchinese/all_test.go
index 3097827..3825c76 100644
--- a/encoding/traditionalchinese/all_test.go
+++ b/encoding/traditionalchinese/all_test.go
@@ -5,6 +5,7 @@
package traditionalchinese
import (
+ "fmt"
"io/ioutil"
"strings"
"testing"
@@ -28,7 +29,12 @@
e encoding.Encoding
src, want string
}{
+ {dec, Big5, "\x80", "\ufffd"},
+ {dec, Big5, "\x81", "\ufffd"},
+ {dec, Big5, "\x81\x30", "\ufffd\x30"},
{dec, Big5, "\x81\x40", "\ufffd"},
+ {dec, Big5, "\x81\xa0", "\ufffd"},
+ {dec, Big5, "\xff", "\ufffd"},
{enc, Big5, "갂", ""},
{enc, Big5, "a갂", "a"},
@@ -36,14 +42,24 @@
}
for _, tc := range testCases {
dir, tr, wantErr := tc.init(tc.e)
-
- dst, _, err := transform.String(tr, tc.src)
- if err != wantErr {
- t.Errorf("%s %v(%q): got %v; want %v", dir, tc.e, tc.src, err, wantErr)
- }
- if got := string(dst); got != tc.want {
- t.Errorf("%s %v(%q):\ngot %q\nwant %q", dir, tc.e, tc.src, got, tc.want)
- }
+ t.Run(fmt.Sprintf("%s/%v/%q", dir, tc.e, tc.src), func(t *testing.T) {
+ dst := make([]byte, 100)
+ src := []byte(tc.src)
+ for i := 0; i <= len(tc.src); i++ {
+ nDst, nSrc, err := tr.Transform(dst, src[:i], false)
+ if err != nil && err != transform.ErrShortSrc && err != wantErr {
+ t.Fatalf("error on first call to Transform: %v", err)
+ }
+ n, _, err := tr.Transform(dst[nDst:], src[nSrc:], true)
+ nDst += n
+ if err != wantErr {
+ t.Fatalf("(%q|%q): got %v; want %v", tc.src[:i], tc.src[i:], err, wantErr)
+ }
+ if got := string(dst[:nDst]); got != tc.want {
+ t.Errorf("(%q|%q):\ngot %q\nwant %q", tc.src[:i], tc.src[i:], got, tc.want)
+ }
+ }
+ })
}
}
diff --git a/encoding/traditionalchinese/big5.go b/encoding/traditionalchinese/big5.go
index 275821f..1fcddde 100644
--- a/encoding/traditionalchinese/big5.go
+++ b/encoding/traditionalchinese/big5.go
@@ -5,7 +5,6 @@
package traditionalchinese
import (
- "errors"
"unicode/utf8"
"golang.org/x/text/encoding"
@@ -26,8 +25,6 @@
identifier.Big5,
}
-var errInvalidBig5 = errors.New("traditionalchinese: invalid Big5 encoding")
-
type big5Decoder struct{ transform.NopResetter }
func (big5Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
@@ -40,8 +37,12 @@
case 0x81 <= c0 && c0 < 0xff:
if nSrc+1 >= len(src) {
- err = transform.ErrShortSrc
- break loop
+ if !atEOF {
+ err = transform.ErrShortSrc
+ break loop
+ }
+ r, size = utf8.RuneError, 1
+ goto write
}
c1 := src[nSrc+1]
switch {
@@ -49,9 +50,12 @@
c1 -= 0x40
case 0xa1 <= c1 && c1 < 0xff:
c1 -= 0x62
+ case c1 < 0x40:
+ r, size = utf8.RuneError, 1
+ goto write
default:
- err = errInvalidBig5
- break loop
+ r, size = utf8.RuneError, 2
+ goto write
}
r, size = '\ufffd', 2
if i := int(c0-0x81)*157 + int(c1); i < len(decode) {
@@ -80,10 +84,10 @@
}
default:
- err = errInvalidBig5
- break loop
+ r, size = utf8.RuneError, 1
}
+ write:
if nDst+utf8.RuneLen(r) > len(dst) {
err = transform.ErrShortDst
break loop
@@ -99,9 +103,6 @@
nDst += copy(dst[nDst:], s)
continue loop
}
- if atEOF && err == transform.ErrShortSrc {
- err = errInvalidBig5
- }
return nDst, nSrc, err
}