unicode/norm: fix panic in composition
Fixes golang/go#20079
The problem is not the bound check (something must have gone wrong
if this condition is reached). And indeed it is. The problem is
that false start of a Hangul UTF-8 sequence is interpreted as a
correct Hangul sequence. The single byte is then correctly
transformed to U+FFFD, but happily interpreted and decomposed as
Hangul. This is then fills up the reorder buffer with two spurious
modifier runes that are not accounted for, causing a large sequence
of trailing modifiers to overflow the buffer without being caught.
Change-Id: Ib5c7b5bc247388dffe87c0a2ba42fd1e2bb7479a
Reviewed-on: https://go-review.googlesource.com/41697
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Sam Whited <sam@samwhited.com>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/unicode/norm/input.go b/unicode/norm/input.go
index 045d4cc..479e35b 100644
--- a/unicode/norm/input.go
+++ b/unicode/norm/input.go
@@ -90,16 +90,20 @@
}
func (in *input) hangul(p int) (r rune) {
+ var size int
if in.bytes == nil {
if !isHangulString(in.str[p:]) {
return 0
}
- r, _ = utf8.DecodeRuneInString(in.str[p:])
+ r, size = utf8.DecodeRuneInString(in.str[p:])
} else {
if !isHangul(in.bytes[p:]) {
return 0
}
- r, _ = utf8.DecodeRune(in.bytes[p:])
+ r, size = utf8.DecodeRune(in.bytes[p:])
+ }
+ if size != hangulUTF8Size {
+ return 0
}
return r
}
diff --git a/unicode/norm/normalize_test.go b/unicode/norm/normalize_test.go
index 69a84bf..4f83737 100644
--- a/unicode/norm/normalize_test.go
+++ b/unicode/norm/normalize_test.go
@@ -699,6 +699,24 @@
"\u0d4a" + strings.Repeat("\u0d3e", 30),
"\u0d4a" + strings.Repeat("\u0d3e", 29) + cgj + "\u0d3e",
},
+
+ { // https://golang.org/issues/20079
+ "",
+ "\xeb\u0344",
+ "\xeb\u0308\u0301",
+ },
+
+ { // https://golang.org/issues/20079
+ "",
+ "\uac00" + strings.Repeat("\u0300", 30),
+ "\uac00" + strings.Repeat("\u0300", 29) + "\u034f\u0300",
+ },
+
+ { // https://golang.org/issues/20079
+ "",
+ "\xeb" + strings.Repeat("\u0300", 31),
+ "\xeb" + strings.Repeat("\u0300", 30) + "\u034f\u0300",
+ },
}
var appendTestsNFD = []AppendTest{