unicode: improve SimpleFold performance for ascii

This change significantly speeds up case-insensitive regexp matching.

benchmark                      old ns/op      new ns/op      delta
BenchmarkMatchEasy0i_32-8      2690           1473           -45.24%
BenchmarkMatchEasy0i_1K-8      80404          42269          -47.43%
BenchmarkMatchEasy0i_32K-8     3272187        2076118        -36.55%
BenchmarkMatchEasy0i_1M-8      104805990      66503805       -36.55%
BenchmarkMatchEasy0i_32M-8     3360192200     2126121600     -36.73%

benchmark                      old MB/s     new MB/s     speedup
BenchmarkMatchEasy0i_32-8      11.90        21.72        1.83x
BenchmarkMatchEasy0i_1K-8      12.74        24.23        1.90x
BenchmarkMatchEasy0i_32K-8     10.01        15.78        1.58x
BenchmarkMatchEasy0i_1M-8      10.00        15.77        1.58x
BenchmarkMatchEasy0i_32M-8     9.99         15.78        1.58x

Issue #13288

Change-Id: I94af7bb29e75d60b4f6ee760124867ab271b9642
Reviewed-on: https://go-review.googlesource.com/16943
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/src/unicode/letter.go b/src/unicode/letter.go
index ffa083e..8aec920 100644
--- a/src/unicode/letter.go
+++ b/src/unicode/letter.go
@@ -332,6 +332,10 @@
 //	SimpleFold('1') = '1'
 //
 func SimpleFold(r rune) rune {
+	if int(r) < len(asciiFold) {
+		return rune(asciiFold[r])
+	}
+
 	// Consult caseOrbit table for special cases.
 	lo := 0
 	hi := len(caseOrbit)
diff --git a/src/unicode/maketables.go b/src/unicode/maketables.go
index 328c75e..f364515 100644
--- a/src/unicode/maketables.go
+++ b/src/unicode/maketables.go
@@ -1172,6 +1172,7 @@
 		}
 	}
 
+	printAsciiFold()
 	printCaseOrbit()
 
 	// Tables of category and script folding exceptions: code points
@@ -1269,6 +1270,25 @@
 		"// If there is no entry for a script name, there are no such points.\n",
 }
 
+func printAsciiFold() {
+	printf("var asciiFold = [MaxASCII + 1]uint16{\n")
+	for i := rune(0); i <= unicode.MaxASCII; i++ {
+		c := chars[i]
+		f := c.caseOrbit
+		if f == 0 {
+			if c.lowerCase != i && c.lowerCase != 0 {
+				f = c.lowerCase
+			} else if c.upperCase != i && c.upperCase != 0 {
+				f = c.upperCase
+			} else {
+				f = i
+			}
+		}
+		printf("\t0x%04X,\n", f)
+	}
+	printf("}\n\n")
+}
+
 func printCaseOrbit() {
 	if *test {
 		for j := range chars {
diff --git a/src/unicode/tables.go b/src/unicode/tables.go
index 8bb4206..c04d69a 100644
--- a/src/unicode/tables.go
+++ b/src/unicode/tables.go
@@ -6834,6 +6834,137 @@
 	0xFF: pLl | pp, // 'ΓΏ'
 }
 
+var asciiFold = [MaxASCII + 1]uint16{
+	0x0000,
+	0x0001,
+	0x0002,
+	0x0003,
+	0x0004,
+	0x0005,
+	0x0006,
+	0x0007,
+	0x0008,
+	0x0009,
+	0x000A,
+	0x000B,
+	0x000C,
+	0x000D,
+	0x000E,
+	0x000F,
+	0x0010,
+	0x0011,
+	0x0012,
+	0x0013,
+	0x0014,
+	0x0015,
+	0x0016,
+	0x0017,
+	0x0018,
+	0x0019,
+	0x001A,
+	0x001B,
+	0x001C,
+	0x001D,
+	0x001E,
+	0x001F,
+	0x0020,
+	0x0021,
+	0x0022,
+	0x0023,
+	0x0024,
+	0x0025,
+	0x0026,
+	0x0027,
+	0x0028,
+	0x0029,
+	0x002A,
+	0x002B,
+	0x002C,
+	0x002D,
+	0x002E,
+	0x002F,
+	0x0030,
+	0x0031,
+	0x0032,
+	0x0033,
+	0x0034,
+	0x0035,
+	0x0036,
+	0x0037,
+	0x0038,
+	0x0039,
+	0x003A,
+	0x003B,
+	0x003C,
+	0x003D,
+	0x003E,
+	0x003F,
+	0x0040,
+	0x0061,
+	0x0062,
+	0x0063,
+	0x0064,
+	0x0065,
+	0x0066,
+	0x0067,
+	0x0068,
+	0x0069,
+	0x006A,
+	0x006B,
+	0x006C,
+	0x006D,
+	0x006E,
+	0x006F,
+	0x0070,
+	0x0071,
+	0x0072,
+	0x0073,
+	0x0074,
+	0x0075,
+	0x0076,
+	0x0077,
+	0x0078,
+	0x0079,
+	0x007A,
+	0x005B,
+	0x005C,
+	0x005D,
+	0x005E,
+	0x005F,
+	0x0060,
+	0x0041,
+	0x0042,
+	0x0043,
+	0x0044,
+	0x0045,
+	0x0046,
+	0x0047,
+	0x0048,
+	0x0049,
+	0x004A,
+	0x212A,
+	0x004C,
+	0x004D,
+	0x004E,
+	0x004F,
+	0x0050,
+	0x0051,
+	0x0052,
+	0x017F,
+	0x0054,
+	0x0055,
+	0x0056,
+	0x0057,
+	0x0058,
+	0x0059,
+	0x005A,
+	0x007B,
+	0x007C,
+	0x007D,
+	0x007E,
+	0x007F,
+}
+
 var caseOrbit = []foldPair{
 	{0x004B, 0x006B},
 	{0x0053, 0x0073},