math/big: add 4-bit, fixed window exponentiation. A 4-bit window is convenient because 4 divides both 32 and 64, therefore we never have a window spanning words of the exponent. Additionaly, the benefit of a 5-bit window is only 2.6% at 1024-bits and 3.3% at 2048-bits. This code is still not constant time, however. benchmark old ns/op new ns/op delta BenchmarkRSA2048Decrypt 17108590 11180370 -34.65% Benchmark3PrimeRSA2048Decrypt 13003720 7680390 -40.94% R=gri CC=golang-dev https://golang.org/cl/6716048

commit: 73f11171b4965af85d9c410bb34a2940f798a7ed [log] [tgz]
author: Adam Langley <agl@golang.org> Wed Oct 17 11:19:26 2012 -0400
committer: Adam Langley <agl@golang.org> Wed Oct 17 11:19:26 2012 -0400
tree: ba201ebaa687f49bff399d3f19f3148dff500e95
parent: ace9ff4578cb6d3077fbd2c7934b4ac063047145 [diff] [blame]
diff --git a/src/pkg/math/big/nat.go b/src/pkg/math/big/nat.go
index 2e5c56d..17985c2 100644
--- a/src/pkg/math/big/nat.go
+++ b/src/pkg/math/big/nat.go

@@ -1248,6 +1248,15 @@
 	}
 	z = z.set(x)
 
+	// If the base is non-trivial and the exponent is large, we use
+	// 4-bit, windowed exponentiation. This involves precomputing 14 values
+	// (x^2...x^15) but then reduces the number of multiply-reduces by a
+	// third. Even for a 32-bit exponent, this reduces the number of
+	// operations.
+	if len(x) > 1 && len(y) > 1 && len(m) > 0 {
+		return z.expNNWindowed(x, y, m)
+	}
+
 	v := y[len(y)-1] // v > 0 because y is normalized and y > 0
 	shift := leadingZeros(v) + 1
 	v <<= shift
@@ -1304,6 +1313,69 @@
 	return z.norm()
 }
 
+// expNNWindowed calculates x**y mod m using a fixed, 4-bit window.
+func (z nat) expNNWindowed(x, y, m nat) nat {
+	// zz and r are used to avoid allocating in mul and div as otherwise
+	// the arguments would alias.
+	var zz, r nat
+
+	const n = 4
+	// powers[i] contains x^i.
+	var powers [1 << n]nat
+	powers[0] = natOne
+	powers[1] = x
+	for i := 2; i < 1<<n; i += 2 {
+		p2, p, p1 := &powers[i/2], &powers[i], &powers[i+1]
+		*p = p.mul(*p2, *p2)
+		zz, r = zz.div(r, *p, m)
+		*p, r = r, *p
+		*p1 = p1.mul(*p, x)
+		zz, r = zz.div(r, *p1, m)
+		*p1, r = r, *p1
+	}
+
+	z = z.setWord(1)
+
+	for i := len(y) - 1; i >= 0; i-- {
+		yi := y[i]
+		for j := 0; j < _W; j += n {
+			if i != len(y)-1 || j != 0 {
+				// Unrolled loop for significant performance
+				// gain.  Use go test -bench=".*" in crypto/rsa
+				// to check performance before making changes.
+				zz = zz.mul(z, z)
+				zz, z = z, zz
+				zz, r = zz.div(r, z, m)
+				z, r = r, z
+
+				zz = zz.mul(z, z)
+				zz, z = z, zz
+				zz, r = zz.div(r, z, m)
+				z, r = r, z
+
+				zz = zz.mul(z, z)
+				zz, z = z, zz
+				zz, r = zz.div(r, z, m)
+				z, r = r, z
+
+				zz = zz.mul(z, z)
+				zz, z = z, zz
+				zz, r = zz.div(r, z, m)
+				z, r = r, z
+			}
+
+			zz = zz.mul(z, powers[yi>>(_W-n)])
+			zz, z = z, zz
+			zz, r = zz.div(r, z, m)
+			z, r = r, z
+
+			yi <<= n
+		}
+	}
+
+	return z.norm()
+}
+
 // probablyPrime performs reps Miller-Rabin tests to check whether n is prime.
 // If it returns true, n is prime with probability 1 - 1/4^reps.
 // If it returns false, n is not prime.
commit	73f11171b4965af85d9c410bb34a2940f798a7ed	[log] [tgz]
author	Adam Langley <agl@golang.org>	Wed Oct 17 11:19:26 2012 -0400
committer	Adam Langley <agl@golang.org>	Wed Oct 17 11:19:26 2012 -0400
tree	ba201ebaa687f49bff399d3f19f3148dff500e95
parent	ace9ff4578cb6d3077fbd2c7934b4ac063047145 [diff] [blame]