unicode/norm: fix panic in composition Fixes golang/go#20079 The problem is not the bound check (something must have gone wrong if this condition is reached). And indeed it is. The problem is that false start of a Hangul UTF-8 sequence is interpreted as a correct Hangul sequence. The single byte is then correctly transformed to U+FFFD, but happily interpreted and decomposed as Hangul. This is then fills up the reorder buffer with two spurious modifier runes that are not accounted for, causing a large sequence of trailing modifiers to overflow the buffer without being caught. Change-Id: Ib5c7b5bc247388dffe87c0a2ba42fd1e2bb7479a Reviewed-on: https://go-review.googlesource.com/41697 Run-TryBot: Marcel van Lohuizen <mpvl@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Sam Whited <sam@samwhited.com> Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/unicode/norm/input.go b/unicode/norm/input.go index 045d4cc..479e35b 100644 --- a/unicode/norm/input.go +++ b/unicode/norm/input.go
@@ -90,16 +90,20 @@ } func (in *input) hangul(p int) (r rune) { + var size int if in.bytes == nil { if !isHangulString(in.str[p:]) { return 0 } - r, _ = utf8.DecodeRuneInString(in.str[p:]) + r, size = utf8.DecodeRuneInString(in.str[p:]) } else { if !isHangul(in.bytes[p:]) { return 0 } - r, _ = utf8.DecodeRune(in.bytes[p:]) + r, size = utf8.DecodeRune(in.bytes[p:]) + } + if size != hangulUTF8Size { + return 0 } return r }
diff --git a/unicode/norm/normalize_test.go b/unicode/norm/normalize_test.go index 69a84bf..4f83737 100644 --- a/unicode/norm/normalize_test.go +++ b/unicode/norm/normalize_test.go
@@ -699,6 +699,24 @@ "\u0d4a" + strings.Repeat("\u0d3e", 30), "\u0d4a" + strings.Repeat("\u0d3e", 29) + cgj + "\u0d3e", }, + + { // https://golang.org/issues/20079 + "", + "\xeb\u0344", + "\xeb\u0308\u0301", + }, + + { // https://golang.org/issues/20079 + "", + "\uac00" + strings.Repeat("\u0300", 30), + "\uac00" + strings.Repeat("\u0300", 29) + "\u034f\u0300", + }, + + { // https://golang.org/issues/20079 + "", + "\xeb" + strings.Repeat("\u0300", 31), + "\xeb" + strings.Repeat("\u0300", 30) + "\u034f\u0300", + }, } var appendTestsNFD = []AppendTest{