rand: apply 'de-virtualization' optimization from math/rand

https://golang.org/cl/19153{8,9} and sped up things by removing a closure
call. It doesn't help here, perhaps because the compiler isn't aware.

BenchmarkRead3-4                 10.7          11.5          +7.48%
BenchmarkRead64-4                113           112           -0.88%
BenchmarkRead1000-4              1608          1588          -1.24%

Part of the reduced improvement is because the compiler doesn't
know about exp/rand (yet).

Update golang/go#34614

Change-Id: Ia439c60d69c30a25d35f21675d60601dbdc98f4f
Reviewed-on: https://go-review.googlesource.com/c/exp/+/198099
Reviewed-by: Emmanuel Odeke <emm.odeke@gmail.com>
Reviewed-by: Rob Pike <r@golang.org>
Run-TryBot: Rob Pike <r@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/rand/rand.go b/rand/rand.go
index 884e3e4..a369bc7 100644
--- a/rand/rand.go
+++ b/rand/rand.go
@@ -217,15 +217,20 @@
 	if lk, ok := r.src.(*LockedSource); ok {
 		return lk.Read(p, &r.readVal, &r.readPos)
 	}
-	return read(p, r.Uint64, &r.readVal, &r.readPos)
+	return read(p, r.src, &r.readVal, &r.readPos)
 }
 
-func read(p []byte, uint64 func() uint64, readVal *uint64, readPos *int8) (n int, err error) {
+func read(p []byte, src Source, readVal *uint64, readPos *int8) (n int, err error) {
 	pos := *readPos
 	val := *readVal
+	rng, _ := src.(*PCGSource)
 	for n = 0; n < len(p); n++ {
 		if pos == 0 {
-			val = uint64()
+			if rng != nil {
+				val = rng.Uint64()
+			} else {
+				val = src.Uint64()
+			}
 			pos = 8
 		}
 		p[n] = byte(val)
@@ -241,7 +246,10 @@
  * Top-level convenience functions
  */
 
-var globalRand = New(&LockedSource{src: NewSource(1)})
+var globalRand = New(&LockedSource{src: NewSource(1).(*PCGSource)})
+
+// Type assert that globalRand's source is a LockedSource whose src is a *rngSource.
+var _ *PCGSource = globalRand.src.(*LockedSource).src
 
 // Seed uses the provided seed value to initialize the default Source to a
 // deterministic state. If Seed is not called, the generator behaves as
@@ -330,7 +338,7 @@
 // It is just a standard Source with its operations protected by a sync.Mutex.
 type LockedSource struct {
 	lk  sync.Mutex
-	src Source
+	src *PCGSource
 }
 
 func (s *LockedSource) Uint64() (n uint64) {
@@ -357,7 +365,7 @@
 // Read implements Read for a LockedSource.
 func (s *LockedSource) Read(p []byte, readVal *uint64, readPos *int8) (n int, err error) {
 	s.lk.Lock()
-	n, err = read(p, s.src.Uint64, readVal, readPos)
+	n, err = read(p, s.src, readVal, readPos)
 	s.lk.Unlock()
 	return
 }
diff --git a/rand/rand_test.go b/rand/rand_test.go
index b310fa6..545b7dd 100644
--- a/rand/rand_test.go
+++ b/rand/rand_test.go
@@ -472,6 +472,14 @@
 	}
 }
 
+func BenchmarkInt63ThreadsafeParallel(b *testing.B) {
+	b.RunParallel(func(pb *testing.PB) {
+		for pb.Next() {
+			Int63()
+		}
+	})
+}
+
 func BenchmarkInt63Unthreadsafe(b *testing.B) {
 	r := New(NewSource(1))
 	for n := b.N; n > 0; n-- {