math/big: make division faster

- Add new BenchmarkQuoRem.
- Eliminate allocation in divLarge nat pool
- Unroll mulAddVWW body 4x
- Remove some redundant slice loads in divLarge

name      old time/op  new time/op  delta
QuoRem-8  2.18µs ± 1%  1.93µs ± 1%  -11.38%  (p=0.000 n=19+18)

The starting point in the comparison here is Cherry's
pending CL to turn mulWW and divWW into intrinsics.
The optimizations in divLarge work best because all
the function calls are gone. The effect of this CL is not
as large if you don't assume Cherry's CL.

Change-Id: Ia6138907489c5b9168497912e43705634e163b35
Reviewed-on: https://go-review.googlesource.com/30613
Run-TryBot: Russ Cox <rsc@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
diff --git a/src/math/big/int_test.go b/src/math/big/int_test.go
index 0cae4a1..4df103a 100644
--- a/src/math/big/int_test.go
+++ b/src/math/big/int_test.go
@@ -478,6 +478,18 @@
 	}
 }
 
+func BenchmarkQuoRem(b *testing.B) {
+	x, _ := new(Int).SetString("153980389784927331788354528594524332344709972855165340650588877572729725338415474372475094155672066328274535240275856844648695200875763869073572078279316458648124537905600131008790701752441155668003033945258023841165089852359980273279085783159654751552359397986180318708491098942831252291841441726305535546071", 0)
+	y, _ := new(Int).SetString("7746362281539803897849273317883545285945243323447099728551653406505888775727297253384154743724750941556720663282745352402758568446486952008757638690735720782793164586481245379056001310087907017524411556680030339452580238411650898523599802732790857831596547515523593979861803187084910989428312522918414417263055355460715745539358014631136245887418412633787074173796862711588221766398229333338511838891484974940633857861775630560092874987828057333663969469797013996401149696897591265769095952887917296740109742927689053276850469671231961384715398038978492733178835452859452433234470997285516534065058887757272972533841547437247509415567206632827453524027585684464869520087576386907357207827931645864812453790560013100879070175244115566800303394525802384116508985235998027327908578315965475155235939798618031870849109894283125229184144172630553554607112725169432413343763989564437170644270643461665184965150423819594083121075825", 0)
+	q := new(Int)
+	r := new(Int)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		q.QuoRem(y, x, r)
+	}
+}
+
 var bitLenTests = []struct {
 	in  string
 	out int
@@ -794,6 +806,17 @@
 	}
 }
 
+func BenchmarkProbablyPrime(b *testing.B) {
+	p, _ := new(Int).SetString("203956878356401977405765866929034577280193993314348263094772646453283062722701277632936616063144088173312372882677123879538709400158306567338328279154499698366071906766440037074217117805690872792848149112022286332144876183376326512083574821647933992961249917319836219304274280243803104015000563790123", 10)
+	for _, rep := range []int{1, 5, 10, 20} {
+		b.Run(fmt.Sprintf("Rep=%d", rep), func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				p.ProbablyPrime(rep)
+			}
+		})
+	}
+}
+
 type intShiftTest struct {
 	in    string
 	shift uint