all: make Unicode surrogate halves illegal as UTF-8
Surrogate halves are part of UTF-16 and should never appear in UTF-8.
(The rune that two combined halves represent in UTF-16 should
be encoded directly.)
Encoding: encode as RuneError.
Decoding: convert to RuneError, consume one byte.
This requires changing:
package unicode/utf8
runtime for range over string
Also added utf8.ValidRune and fixed bug in utf.RuneLen.
Fixes #3927.
R=golang-dev, rsc, bsiegert
CC=golang-dev
https://golang.org/cl/6458099
diff --git a/test/stringrange.go b/test/stringrange.go
index daaba91..99e5edb 100644
--- a/test/stringrange.go
+++ b/test/stringrange.go
@@ -57,6 +57,13 @@
ok = false
}
+ for _, c := range "a\xed\xa0\x80a" {
+ if c != 'a' && c != utf8.RuneError {
+ fmt.Printf("surrogate UTF-8 does not error: %U\n", c)
+ ok = false
+ }
+ }
+
if !ok {
fmt.Println("BUG: stringrange")
os.Exit(1)