unicode: performance improvements (API change) *** There is an API change here: the introduction of the LatinOffset int in the RangeTable struct. *** * Avoid checking Latin range multiple times for non-Latin runes. * Use linear search when it is faster than binary search. go test -calibrate runs the calibration for where the linear/binary crossover should be. benchmark old MB/s new MB/s speedup BenchmarkFields 36.27 41.43 1.14x BenchmarkFieldsFunc 36.23 41.38 1.14x The speedup here is evenly split between the linear scans and the LatinOffset change. Both are about 1.07x. R=r CC=bradfitz, gobot, golang-dev https://golang.org/cl/6526048

commit: 4591cd631dad588c8fb40097021d1c63860aa4ad [log] [tgz]
author: Russ Cox <rsc@golang.org> Fri Sep 21 00:35:25 2012 -0400
committer: Russ Cox <rsc@golang.org> Fri Sep 21 00:35:25 2012 -0400
tree: edb9d86e634d257e088b9ed6320172090c6b532c
parent: 31758b2c1a4fef9c387d039190e55c640bda9408 [diff] [blame]
diff --git a/src/pkg/unicode/maketables.go b/src/pkg/unicode/maketables.go
index fcd14fc..2ed1915 100644
--- a/src/pkg/unicode/maketables.go
+++ b/src/pkg/unicode/maketables.go

@@ -503,6 +503,7 @@
 func dumpRange(header string, inCategory Op) {
 	fmt.Print(header)
 	next := rune(0)
+	latinOffset := 0
 	fmt.Print("\tR16: []Range16{\n")
 	// one Range for each iteration
 	count := &range16Count
@@ -546,11 +547,17 @@
 				break
 			}
 		}
+		if uint32(hi) <= unicode.MaxLatin1 {
+			latinOffset++
+		}
 		size, count = printRange(uint32(lo), uint32(hi), uint32(stride), size, count)
 		// next range: start looking where this range ends
 		next = hi + 1
 	}
 	fmt.Print("\t},\n")
+	if latinOffset > 0 {
+		fmt.Printf("\tLatinOffset: %d,\n", latinOffset)
+	}
 	fmt.Print("}\n\n")
 }
 
@@ -760,14 +767,17 @@
 		}
 		ndecl++
 		fmt.Printf("var _%s = &RangeTable {\n", name)
-		fmt.Print("\tR16: []Range16{\n")
 		ranges := foldAdjacent(table[name])
+		fmt.Print("\tR16: []Range16{\n")
 		size := 16
 		count := &range16Count
 		for _, s := range ranges {
 			size, count = printRange(s.Lo, s.Hi, s.Stride, size, count)
 		}
 		fmt.Print("\t},\n")
+		if off := findLatinOffset(ranges); off > 0 {
+			fmt.Printf("\tLatinOffset: %d,\n", off)
+		}
 		fmt.Print("}\n\n")
 	}
 	decl.Sort()
@@ -779,6 +789,14 @@
 	fmt.Print(")\n\n")
 }
 
+func findLatinOffset(ranges []unicode.Range32) int {
+	i := 0
+	for i < len(ranges) && ranges[i].Hi <= unicode.MaxLatin1 {
+		i++
+	}
+	return i
+}
+
 const (
 	CaseUpper = 1 << iota
 	CaseLower
commit	4591cd631dad588c8fb40097021d1c63860aa4ad	[log] [tgz]
author	Russ Cox <rsc@golang.org>	Fri Sep 21 00:35:25 2012 -0400
committer	Russ Cox <rsc@golang.org>	Fri Sep 21 00:35:25 2012 -0400
tree	edb9d86e634d257e088b9ed6320172090c6b532c
parent	31758b2c1a4fef9c387d039190e55c640bda9408 [diff] [blame]