encoding/japanese: make ShiftJIS replace with FFFD on error
Updates golang/go#18898
Change-Id: I049e5ba1fca9529eeacc3aa58f7e5c2d17f22ecd
Reviewed-on: https://go-review.googlesource.com/37317
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/encoding/japanese/all_test.go b/encoding/japanese/all_test.go
index d71c542..a3def34 100644
--- a/encoding/japanese/all_test.go
+++ b/encoding/japanese/all_test.go
@@ -64,6 +64,22 @@
{dec, EUCJP, strings.Repeat("\x8f\xa0", n), strings.Repeat("\ufffd", 2*n)},
{dec, EUCJP, "a" + strings.Repeat("\x8f\xa1", n), "a" + strings.Repeat("\ufffd", n)},
{dec, EUCJP, "a" + strings.Repeat("\x8f\xa1\xff", n), "a" + strings.Repeat("\ufffd", 2*n)},
+
+ // Continue correctly after errors
+ {dec, ShiftJIS, "\x80", "\u0080"}, // It's what the spec says.
+ {dec, ShiftJIS, "\x81", "\ufffd"},
+ {dec, ShiftJIS, "\xe0", "\ufffd"},
+ {dec, ShiftJIS, "\xe0\x39", "\ufffd\u0039"},
+ {dec, ShiftJIS, "\x81\x7f", "\ufffd\u007f"},
+ {dec, ShiftJIS, "\xe0\xfd", "\ufffd"},
+ {dec, ShiftJIS, "\xe0\x9f", "燹"},
+ {dec, ShiftJIS, "\xfc\xfc", "\ufffd"},
+ {dec, ShiftJIS, "\xfc\xfd", "\ufffd"},
+ {dec, ShiftJIS, "\xfdaa", "\ufffdaa"},
+
+ {dec, ShiftJIS, strings.Repeat("\x81\x81", n), strings.Repeat("=", n)},
+ {dec, ShiftJIS, strings.Repeat("\xe0\xfd", n), strings.Repeat("\ufffd", n)},
+ {dec, ShiftJIS, "a" + strings.Repeat("\xe0\xfd", n), "a" + strings.Repeat("\ufffd", n)},
}
for _, tc := range testCases {
dir, tr, wantErr := tc.init(tc.e)
diff --git a/encoding/japanese/shiftjis.go b/encoding/japanese/shiftjis.go
index 099aecc..9667b1f 100644
--- a/encoding/japanese/shiftjis.go
+++ b/encoding/japanese/shiftjis.go
@@ -5,7 +5,6 @@
package japanese
import (
- "errors"
"unicode/utf8"
"golang.org/x/text/encoding"
@@ -24,8 +23,6 @@
identifier.ShiftJIS,
}
-var errInvalidShiftJIS = errors.New("japanese: invalid Shift JIS encoding")
-
type shiftJISDecoder struct{ transform.NopResetter }
func (shiftJISDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
@@ -49,27 +46,31 @@
if nSrc+1 >= len(src) {
err = transform.ErrShortSrc
- break loop
+ if !atEOF {
+ break loop
+ }
+ r, size = '\ufffd', 1
+ goto write
}
c1 := src[nSrc+1]
switch {
case c1 < 0x40:
- err = errInvalidShiftJIS
- break loop
+ r, size = '\ufffd', 1 // c1 is ASCII so output on next round
+ goto write
case c1 < 0x7f:
c0--
c1 -= 0x40
case c1 == 0x7f:
- err = errInvalidShiftJIS
- break loop
+ r, size = '\ufffd', 1 // c1 is ASCII so output on next round
+ goto write
case c1 < 0x9f:
c0--
c1 -= 0x41
case c1 < 0xfd:
c1 -= 0x9f
default:
- err = errInvalidShiftJIS
- break loop
+ r, size = '\ufffd', 2
+ goto write
}
r, size = '\ufffd', 2
if i := int(c0)*94 + int(c1); i < len(jis0208Decode) {
@@ -79,20 +80,19 @@
}
}
- default:
- err = errInvalidShiftJIS
- break loop
- }
+ case c0 == 0x80:
+ r, size = 0x80, 1
+ default:
+ r, size = '\ufffd', 1
+ }
+ write:
if nDst+utf8.RuneLen(r) > len(dst) {
err = transform.ErrShortDst
break loop
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
- if atEOF && err == transform.ErrShortSrc {
- err = errInvalidShiftJIS
- }
return nDst, nSrc, err
}