runtime: speed up fastrand() % n This occurs a fair amount in the runtime for non-power-of-two n. Use an alternative, faster formulation. name old time/op new time/op delta Fastrandn/2-8 4.45ns ± 2% 2.09ns ± 3% -53.12% (p=0.000 n=14+14) Fastrandn/3-8 4.78ns ±11% 2.06ns ± 2% -56.94% (p=0.000 n=15+15) Fastrandn/4-8 4.76ns ± 9% 1.99ns ± 3% -58.28% (p=0.000 n=15+13) Fastrandn/5-8 4.96ns ±13% 2.03ns ± 6% -59.14% (p=0.000 n=15+15) name old time/op new time/op delta SelectUncontended-8 33.7ns ± 2% 33.9ns ± 2% +0.70% (p=0.000 n=49+50) SelectSyncContended-8 1.68µs ± 4% 1.65µs ± 4% -1.54% (p=0.000 n=50+45) SelectAsyncContended-8 282ns ± 1% 277ns ± 1% -1.50% (p=0.000 n=48+43) SelectNonblock-8 5.31ns ± 1% 5.32ns ± 1% ~ (p=0.275 n=45+44) SelectProdCons-8 585ns ± 3% 577ns ± 2% -1.35% (p=0.000 n=50+50) GoroutineSelect-8 1.59ms ± 2% 1.59ms ± 1% ~ (p=0.084 n=49+48) Updates #16213 Change-Id: Ib555a4d7da2042a25c3976f76a436b536487d5b7 Reviewed-on: https://go-review.googlesource.com/36932 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>

commit: 46a75870ad5b9b9711e69ffce3738a3ab2057789 [log] [tgz]
author: Josh Bleecher Snyder <josharian@gmail.com> Mon Feb 13 12:46:17 2017 -0800
committer: Josh Bleecher Snyder <josharian@gmail.com> Tue Feb 14 00:01:22 2017 +0000
tree: 56cb970b704a126287c469112d2bf6c5d7b27c74
parent: 83c58ac710517e0b3fb2654cdba187e45900cca2 [diff]
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index 5f85d91..985cd7f 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go

@@ -246,4 +246,5 @@
 	return
 }
 
-func Fastrand() uint32 { return fastrand() }
+func Fastrand() uint32          { return fastrand() }
+func Fastrandn(n uint32) uint32 { return fastrandn(n) }

diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 527df17..cb0d305 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go

@@ -648,7 +648,7 @@
 	}
 	myID := gp.m.p.ptr().id
 	for tries := 0; tries < 5; tries++ {
-		id := int32(fastrand() % uint32(gomaxprocs-1))
+		id := int32(fastrandn(uint32(gomaxprocs - 1)))
 		if id >= myID {
 			id++
 		}

diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 23626f1..e71ebcd 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go

@@ -4280,7 +4280,7 @@
 
 	if randomizeScheduler {
 		for i := uint32(1); i <= n; i++ {
-			j := fastrand() % (i + 1)
+			j := fastrandn(i + 1)
 			batch[i], batch[j] = batch[j], batch[i]
 		}
 	}

diff --git a/src/runtime/rand_test.go b/src/runtime/rand_test.go
index 0f6ec0f..f8831b0 100644
--- a/src/runtime/rand_test.go
+++ b/src/runtime/rand_test.go

@@ -6,6 +6,7 @@
 
 import (
 	. "runtime"
+	"strconv"
 	"testing"
 )
 
@@ -30,3 +31,15 @@
 		}
 	})
 }
+
+var sink32 uint32
+
+func BenchmarkFastrandn(b *testing.B) {
+	for n := uint32(2); n <= 5; n++ {
+		b.Run(strconv.Itoa(int(n)), func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				sink32 = Fastrandn(n)
+			}
+		})
+	}
+}

diff --git a/src/runtime/select.go b/src/runtime/select.go
index 4a744a1..1ace6dc 100644
--- a/src/runtime/select.go
+++ b/src/runtime/select.go

@@ -270,7 +270,7 @@
 	pollslice := slice{unsafe.Pointer(sel.pollorder), int(sel.ncase), int(sel.ncase)}
 	pollorder := *(*[]uint16)(unsafe.Pointer(&pollslice))
 	for i := 1; i < int(sel.ncase); i++ {
-		j := fastrand() % uint32(i+1)
+		j := fastrandn(uint32(i + 1))
 		pollorder[i] = pollorder[j]
 		pollorder[j] = uint16(i)
 	}

diff --git a/src/runtime/stubs.go b/src/runtime/stubs.go
index e839c59..ff230b8 100644
--- a/src/runtime/stubs.go
+++ b/src/runtime/stubs.go

@@ -103,6 +103,13 @@
 	return fr
 }
 
+//go:nosplit
+func fastrandn(n uint32) uint32 {
+	// This is similar to fastrand() % n, but faster.
+	// See http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
+	return uint32(uint64(fastrand()) * uint64(n) >> 32)
+}
+
 //go:linkname sync_fastrand sync.fastrand
 func sync_fastrand() uint32 { return fastrand() }
 

diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go
index ed82783..377d970 100644
--- a/src/runtime/symtab.go
+++ b/src/runtime/symtab.go

@@ -549,7 +549,7 @@
 			// a recursive stack's cycle is slightly
 			// larger than the cache.
 			if cache != nil {
-				ci := fastrand() % uint32(len(cache.entries))
+				ci := fastrandn(uint32(len(cache.entries)))
 				cache.entries[ci] = pcvalueCacheEnt{
 					targetpc: targetpc,
 					off:      off,
commit	46a75870ad5b9b9711e69ffce3738a3ab2057789	[log] [tgz]
author	Josh Bleecher Snyder <josharian@gmail.com>	Mon Feb 13 12:46:17 2017 -0800
committer	Josh Bleecher Snyder <josharian@gmail.com>	Tue Feb 14 00:01:22 2017 +0000
tree	56cb970b704a126287c469112d2bf6c5d7b27c74
parent	83c58ac710517e0b3fb2654cdba187e45900cca2 [diff]