encoding/simplifiedchinese: make HZGB2312 replace with FFFD on error

Not sure if the right number of replacement characters is
returned in all instances, but seems reasonable.

Updates golang/go#18898

Change-Id: Ibf6efdb079191aa6db4eb05b41b7dae593947bd0
Reviewed-on: https://go-review.googlesource.com/37324
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/encoding/simplifiedchinese/all_test.go b/encoding/simplifiedchinese/all_test.go
index 96a0585..b369da2 100644
--- a/encoding/simplifiedchinese/all_test.go
+++ b/encoding/simplifiedchinese/all_test.go
@@ -51,6 +51,12 @@
 		{dec, GB18030, "\xfe\x30\x81\x21", "\ufffd0\ufffd!"},
 
 		{dec, GB18030, strings.Repeat("\xfe\x30", n), strings.Repeat("\ufffd0", n)},
+
+		{dec, HZGB2312, "~/", "\ufffd"},
+		{dec, HZGB2312, "~{a\x80", "\ufffd"},
+		{dec, HZGB2312, "~{a\x80", "\ufffd"},
+		{dec, HZGB2312, "~{" + strings.Repeat("z~", n), strings.Repeat("\ufffd", n)},
+		{dec, HZGB2312, "~{" + strings.Repeat("\xfe\x30", n), strings.Repeat("\ufffd", n*2)},
 	}
 	for _, tc := range testCases {
 		dir, tr, wantErr := tc.init(tc.e)
diff --git a/encoding/simplifiedchinese/hzgb2312.go b/encoding/simplifiedchinese/hzgb2312.go
index 85de6b1..eb3157f 100644
--- a/encoding/simplifiedchinese/hzgb2312.go
+++ b/encoding/simplifiedchinese/hzgb2312.go
@@ -5,7 +5,6 @@
 package simplifiedchinese
 
 import (
-	"errors"
 	"unicode/utf8"
 
 	"golang.org/x/text/encoding"
@@ -31,8 +30,6 @@
 	return new(hzGB2312Encoder)
 }
 
-var errInvalidHZGB2312 = errors.New("simplifiedchinese: invalid HZ-GB2312 encoding")
-
 const (
 	asciiState = iota
 	gbState
@@ -50,14 +47,18 @@
 	for ; nSrc < len(src); nSrc += size {
 		c0 := src[nSrc]
 		if c0 >= utf8.RuneSelf {
-			err = errInvalidHZGB2312
-			break loop
+			r, size = utf8.RuneError, 1
+			goto write
 		}
 
 		if c0 == '~' {
 			if nSrc+1 >= len(src) {
-				err = transform.ErrShortSrc
-				break loop
+				if !atEOF {
+					err = transform.ErrShortSrc
+					break loop
+				}
+				r = utf8.RuneError
+				goto write
 			}
 			size = 2
 			switch src[nSrc+1] {
@@ -78,8 +79,8 @@
 			case '\n':
 				continue
 			default:
-				err = errInvalidHZGB2312
-				break loop
+				r = utf8.RuneError
+				goto write
 			}
 		}
 
@@ -87,33 +88,37 @@
 			r, size = rune(c0), 1
 		} else {
 			if nSrc+1 >= len(src) {
-				err = transform.ErrShortSrc
-				break loop
+				if !atEOF {
+					err = transform.ErrShortSrc
+					break loop
+				}
+				r, size = utf8.RuneError, 1
+				goto write
 			}
+			size = 2
 			c1 := src[nSrc+1]
 			if c0 < 0x21 || 0x7e <= c0 || c1 < 0x21 || 0x7f <= c1 {
-				err = errInvalidHZGB2312
-				break loop
-			}
-
-			r, size = '\ufffd', 2
-			if i := int(c0-0x01)*190 + int(c1+0x3f); i < len(decode) {
+				// error
+			} else if i := int(c0-0x01)*190 + int(c1+0x3f); i < len(decode) {
 				r = rune(decode[i])
-				if r == 0 {
-					r = '\ufffd'
+				if r != 0 {
+					goto write
 				}
 			}
+			if c1 > utf8.RuneSelf {
+				// Be consistent and always treat non-ASCII as a single error.
+				size = 1
+			}
+			r = utf8.RuneError
 		}
 
+	write:
 		if nDst+utf8.RuneLen(r) > len(dst) {
 			err = transform.ErrShortDst
 			break loop
 		}
 		nDst += utf8.EncodeRune(dst[nDst:], r)
 	}
-	if atEOF && err == transform.ErrShortSrc {
-		err = errInvalidHZGB2312
-	}
 	return nDst, nSrc, err
 }