ripemd160: use bits.Rotate for rotates

Replace x << k | x >> (32 - k) with bits.RotateLeft32 and add
benchmark to check performance. This makes code cleaner and improves performance:

MillionA-6  34.8ms ± 1%  26.7ms ± 1%  -23.41%  (p=0.000 n=10+10)

Change-Id: I1b0c45f5ea12c7b53ab1ec5a26efc1903555fa66
Reviewed-on: https://go-review.googlesource.com/97915
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
diff --git a/ripemd160/ripemd160_test.go b/ripemd160/ripemd160_test.go
index 5df1b25..a1fbffd 100644
--- a/ripemd160/ripemd160_test.go
+++ b/ripemd160/ripemd160_test.go
@@ -50,15 +50,23 @@
 	}
 }
 
-func TestMillionA(t *testing.T) {
+func millionA() string {
 	md := New()
 	for i := 0; i < 100000; i++ {
 		io.WriteString(md, "aaaaaaaaaa")
 	}
-	out := "52783243c1697bdbe16d37f97f68f08325dc1528"
-	s := fmt.Sprintf("%x", md.Sum(nil))
-	if s != out {
+	return fmt.Sprintf("%x", md.Sum(nil))
+}
+
+func TestMillionA(t *testing.T) {
+	const out = "52783243c1697bdbe16d37f97f68f08325dc1528"
+	if s := millionA(); s != out {
 		t.Fatalf("RIPEMD-160 (1 million 'a') = %s, expected %s", s, out)
 	}
-	md.Reset()
+}
+
+func BenchmarkMillionA(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		millionA()
+	}
 }
diff --git a/ripemd160/ripemd160block.go b/ripemd160/ripemd160block.go
index 7bc8e6c..e0edc02 100644
--- a/ripemd160/ripemd160block.go
+++ b/ripemd160/ripemd160block.go
@@ -8,6 +8,10 @@
 
 package ripemd160
 
+import (
+	"math/bits"
+)
+
 // work buffer indices and roll amounts for one line
 var _n = [80]uint{
 	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@@ -59,16 +63,16 @@
 		i := 0
 		for i < 16 {
 			alpha = a + (b ^ c ^ d) + x[_n[i]]
-			s := _r[i]
-			alpha = (alpha<<s | alpha>>(32-s)) + e
-			beta = c<<10 | c>>22
+			s := int(_r[i])
+			alpha = bits.RotateLeft32(alpha, s) + e
+			beta = bits.RotateLeft32(c, 10)
 			a, b, c, d, e = e, alpha, b, beta, d
 
 			// parallel line
 			alpha = aa + (bb ^ (cc | ^dd)) + x[n_[i]] + 0x50a28be6
-			s = r_[i]
-			alpha = (alpha<<s | alpha>>(32-s)) + ee
-			beta = cc<<10 | cc>>22
+			s = int(r_[i])
+			alpha = bits.RotateLeft32(alpha, s) + ee
+			beta = bits.RotateLeft32(cc, 10)
 			aa, bb, cc, dd, ee = ee, alpha, bb, beta, dd
 
 			i++
@@ -77,16 +81,16 @@
 		// round 2
 		for i < 32 {
 			alpha = a + (b&c | ^b&d) + x[_n[i]] + 0x5a827999
-			s := _r[i]
-			alpha = (alpha<<s | alpha>>(32-s)) + e
-			beta = c<<10 | c>>22
+			s := int(_r[i])
+			alpha = bits.RotateLeft32(alpha, s) + e
+			beta = bits.RotateLeft32(c, 10)
 			a, b, c, d, e = e, alpha, b, beta, d
 
 			// parallel line
 			alpha = aa + (bb&dd | cc&^dd) + x[n_[i]] + 0x5c4dd124
-			s = r_[i]
-			alpha = (alpha<<s | alpha>>(32-s)) + ee
-			beta = cc<<10 | cc>>22
+			s = int(r_[i])
+			alpha = bits.RotateLeft32(alpha, s) + ee
+			beta = bits.RotateLeft32(cc, 10)
 			aa, bb, cc, dd, ee = ee, alpha, bb, beta, dd
 
 			i++
@@ -95,16 +99,16 @@
 		// round 3
 		for i < 48 {
 			alpha = a + (b | ^c ^ d) + x[_n[i]] + 0x6ed9eba1
-			s := _r[i]
-			alpha = (alpha<<s | alpha>>(32-s)) + e
-			beta = c<<10 | c>>22
+			s := int(_r[i])
+			alpha = bits.RotateLeft32(alpha, s) + e
+			beta = bits.RotateLeft32(c, 10)
 			a, b, c, d, e = e, alpha, b, beta, d
 
 			// parallel line
 			alpha = aa + (bb | ^cc ^ dd) + x[n_[i]] + 0x6d703ef3
-			s = r_[i]
-			alpha = (alpha<<s | alpha>>(32-s)) + ee
-			beta = cc<<10 | cc>>22
+			s = int(r_[i])
+			alpha = bits.RotateLeft32(alpha, s) + ee
+			beta = bits.RotateLeft32(cc, 10)
 			aa, bb, cc, dd, ee = ee, alpha, bb, beta, dd
 
 			i++
@@ -113,16 +117,16 @@
 		// round 4
 		for i < 64 {
 			alpha = a + (b&d | c&^d) + x[_n[i]] + 0x8f1bbcdc
-			s := _r[i]
-			alpha = (alpha<<s | alpha>>(32-s)) + e
-			beta = c<<10 | c>>22
+			s := int(_r[i])
+			alpha = bits.RotateLeft32(alpha, s) + e
+			beta = bits.RotateLeft32(c, 10)
 			a, b, c, d, e = e, alpha, b, beta, d
 
 			// parallel line
 			alpha = aa + (bb&cc | ^bb&dd) + x[n_[i]] + 0x7a6d76e9
-			s = r_[i]
-			alpha = (alpha<<s | alpha>>(32-s)) + ee
-			beta = cc<<10 | cc>>22
+			s = int(r_[i])
+			alpha = bits.RotateLeft32(alpha, s) + ee
+			beta = bits.RotateLeft32(cc, 10)
 			aa, bb, cc, dd, ee = ee, alpha, bb, beta, dd
 
 			i++
@@ -131,16 +135,16 @@
 		// round 5
 		for i < 80 {
 			alpha = a + (b ^ (c | ^d)) + x[_n[i]] + 0xa953fd4e
-			s := _r[i]
-			alpha = (alpha<<s | alpha>>(32-s)) + e
-			beta = c<<10 | c>>22
+			s := int(_r[i])
+			alpha = bits.RotateLeft32(alpha, s) + e
+			beta = bits.RotateLeft32(c, 10)
 			a, b, c, d, e = e, alpha, b, beta, d
 
 			// parallel line
 			alpha = aa + (bb ^ cc ^ dd) + x[n_[i]]
-			s = r_[i]
-			alpha = (alpha<<s | alpha>>(32-s)) + ee
-			beta = cc<<10 | cc>>22
+			s = int(r_[i])
+			alpha = bits.RotateLeft32(alpha, s) + ee
+			beta = bits.RotateLeft32(cc, 10)
 			aa, bb, cc, dd, ee = ee, alpha, bb, beta, dd
 
 			i++