unicode: improve SimpleFold performance for ascii
This change significantly speeds up case-insensitive regexp matching.
benchmark old ns/op new ns/op delta
BenchmarkMatchEasy0i_32-8 2690 1473 -45.24%
BenchmarkMatchEasy0i_1K-8 80404 42269 -47.43%
BenchmarkMatchEasy0i_32K-8 3272187 2076118 -36.55%
BenchmarkMatchEasy0i_1M-8 104805990 66503805 -36.55%
BenchmarkMatchEasy0i_32M-8 3360192200 2126121600 -36.73%
benchmark old MB/s new MB/s speedup
BenchmarkMatchEasy0i_32-8 11.90 21.72 1.83x
BenchmarkMatchEasy0i_1K-8 12.74 24.23 1.90x
BenchmarkMatchEasy0i_32K-8 10.01 15.78 1.58x
BenchmarkMatchEasy0i_1M-8 10.00 15.77 1.58x
BenchmarkMatchEasy0i_32M-8 9.99 15.78 1.58x
Issue #13288
Change-Id: I94af7bb29e75d60b4f6ee760124867ab271b9642
Reviewed-on: https://go-review.googlesource.com/16943
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/src/unicode/maketables.go b/src/unicode/maketables.go
index 328c75e..f364515 100644
--- a/src/unicode/maketables.go
+++ b/src/unicode/maketables.go
@@ -1172,6 +1172,7 @@
}
}
+ printAsciiFold()
printCaseOrbit()
// Tables of category and script folding exceptions: code points
@@ -1269,6 +1270,25 @@
"// If there is no entry for a script name, there are no such points.\n",
}
+func printAsciiFold() {
+ printf("var asciiFold = [MaxASCII + 1]uint16{\n")
+ for i := rune(0); i <= unicode.MaxASCII; i++ {
+ c := chars[i]
+ f := c.caseOrbit
+ if f == 0 {
+ if c.lowerCase != i && c.lowerCase != 0 {
+ f = c.lowerCase
+ } else if c.upperCase != i && c.upperCase != 0 {
+ f = c.upperCase
+ } else {
+ f = i
+ }
+ }
+ printf("\t0x%04X,\n", f)
+ }
+ printf("}\n\n")
+}
+
func printCaseOrbit() {
if *test {
for j := range chars {