encoding/simplifiedchinese: Fixes € encoding in GB18030

The euro sign is an exception which is given a single byte code of 0x80
in Microsoft's later versions of CP936/GBK and a two byte code of A2 E3
in GB18030. https://en.wikipedia.org/wiki/GB_18030#cite_note-4

Fixes golang/go#48691

Change-Id: I6a4460274d4313ad1d03bcd8070373af674691eb
GitHub-Last-Rev: acbbc50f20d663452f8da77cf2a66d8d893bec1d
GitHub-Pull-Request: golang/text#26
Reviewed-on: https://go-review.googlesource.com/c/text/+/353712
Reviewed-by: Nigel Tao <nigeltao@golang.org>
Trust: Nigel Tao <nigeltao@golang.org>
Trust: Alberto Donizetti <alb.donizetti@gmail.com>
Run-TryBot: Nigel Tao <nigeltao@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
diff --git a/encoding/simplifiedchinese/all_test.go b/encoding/simplifiedchinese/all_test.go
index a556c94..fbb623c 100644
--- a/encoding/simplifiedchinese/all_test.go
+++ b/encoding/simplifiedchinese/all_test.go
@@ -40,7 +40,9 @@
 		{enc, HZGB2312, "a갂", "a"},
 		{enc, HZGB2312, "\u6cf5갂", "~{1C~}"},
 
+		{dec, GBK, "\xa2\xe3", "€"},
 		{dec, GB18030, "\x80", "€"},
+
 		{dec, GB18030, "\x81", "\ufffd"},
 		{dec, GB18030, "\x81\x20", "\ufffd "},
 		{dec, GB18030, "\xfe\xfe", "\ufffd"},
@@ -125,6 +127,14 @@
 		encPrefix: "~{",
 		encoded:   ";(<dR;:x>F#,6@WCN^O`GW!#",
 		utf8:      "花间一壶酒,独酌无相亲。",
+	}, {
+		e:       GBK,
+		encoded: "\x80",
+		utf8:    "€",
+	}, {
+		e:       GB18030,
+		encoded: "\xa2\xe3",
+		utf8:    "€",
 	}}
 
 	for _, tc := range testCases {
diff --git a/encoding/simplifiedchinese/gbk.go b/encoding/simplifiedchinese/gbk.go
index b89c45b..0e0fabf 100644
--- a/encoding/simplifiedchinese/gbk.go
+++ b/encoding/simplifiedchinese/gbk.go
@@ -55,6 +55,8 @@
 		// Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC
 		// as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk
 		// says to treat "gbk" as Code Page 936.
+		// GBK’s decoder is gb18030’s decoder. https://encoding.spec.whatwg.org/#gbk-decoder
+		// If byte is 0x80, return code point U+20AC. https://encoding.spec.whatwg.org/#gb18030-decoder
 		case c0 == 0x80:
 			r, size = '€', 1
 
@@ -180,7 +182,9 @@
 				// Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC
 				// as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk
 				// says to treat "gbk" as Code Page 936.
-				if r == '€' {
+				// GBK’s encoder is gb18030’s encoder with its _is GBK_ set to true. https://encoding.spec.whatwg.org/#gbk-encoder
+				// If _is GBK_ is true and code point is U+20AC, return byte 0x80. https://encoding.spec.whatwg.org/#gb18030-encoder
+				if !e.gb18030 && r == '€' {
 					r = 0x80
 					goto write1
 				}