crypto/md5: speed up aligned writes and test/bench unaligned writes
Write() can safely use uint32 loads when input is aligned.
Also add test and benchmarks for unaligned writes.
Benchmark result obtained by Dave Cheney on ARMv5TE @ 1.2GHz:
benchmark old ns/op new ns/op delta
BenchmarkHash8Bytes 4104 3417 -16.74%
BenchmarkHash1K 22061 11208 -49.20%
BenchmarkHash8K 146630 65148 -55.57%
BenchmarkHash8BytesUnaligned 4128 3436 -16.76%
BenchmarkHash1KUnaligned 22054 21473 -2.63%
BenchmarkHash8KUnaligned 146658 146909 +0.17%
benchmark old MB/s new MB/s speedup
BenchmarkHash8Bytes 1.95 2.34 1.20x
BenchmarkHash1K 46.42 91.36 1.97x
BenchmarkHash8K 55.87 125.74 2.25x
BenchmarkHash8BytesUnaligned 1.94 2.33 1.20x
BenchmarkHash1KUnaligned 46.43 47.69 1.03x
BenchmarkHash8KUnaligned 55.86 55.76 1.00x
R=golang-dev, dave, bradfitz
CC=golang-dev
https://golang.org/cl/6782072
diff --git a/src/pkg/crypto/md5/gen.go b/src/pkg/crypto/md5/gen.go
index 1a9c4ab..966bdae 100644
--- a/src/pkg/crypto/md5/gen.go
+++ b/src/pkg/crypto/md5/gen.go
@@ -203,6 +203,8 @@
// less code and run 1.3x faster if we take advantage of that.
// My apologies.
X = (*[16]uint32)(unsafe.Pointer(&p[0]))
+ } else if uintptr(unsafe.Pointer(&p[0]))&(unsafe.Alignof(uint32(0))-1) == 0 {
+ X = (*[16]uint32)(unsafe.Pointer(&p[0]))
} else {
X = &xbuf
j := 0
diff --git a/src/pkg/crypto/md5/md5_test.go b/src/pkg/crypto/md5/md5_test.go
index c810251..cac39ad 100644
--- a/src/pkg/crypto/md5/md5_test.go
+++ b/src/pkg/crypto/md5/md5_test.go
@@ -9,6 +9,7 @@
"fmt"
"io"
"testing"
+ "unsafe"
)
type md5Test struct {
@@ -54,13 +55,19 @@
for i := 0; i < len(golden); i++ {
g := golden[i]
c := md5.New()
- for j := 0; j < 3; j++ {
+ buf := make([]byte, len(g.in)+4)
+ for j := 0; j < 3+4; j++ {
if j < 2 {
io.WriteString(c, g.in)
- } else {
+ } else if j == 2 {
io.WriteString(c, g.in[0:len(g.in)/2])
c.Sum(nil)
io.WriteString(c, g.in[len(g.in)/2:])
+ } else if j > 2 {
+ // test unaligned write
+ buf = buf[1:]
+ copy(buf, g.in)
+ c.Write(buf[:len(g.in)])
}
s := fmt.Sprintf("%x", c.Sum(nil))
if s != g.out {
@@ -80,11 +87,18 @@
}
var bench = md5.New()
-var buf = make([]byte, 8192)
+var buf = make([]byte, 8192+1)
+var sum = make([]byte, bench.Size())
-func benchmarkSize(b *testing.B, size int) {
+func benchmarkSize(b *testing.B, size int, unaligned bool) {
b.SetBytes(int64(size))
- sum := make([]byte, bench.Size())
+ buf := buf
+ if unaligned {
+ if uintptr(unsafe.Pointer(&buf[0]))&(unsafe.Alignof(uint32(0))-1) == 0 {
+ buf = buf[1:]
+ }
+ }
+ b.ResetTimer()
for i := 0; i < b.N; i++ {
bench.Reset()
bench.Write(buf[:size])
@@ -93,13 +107,25 @@
}
func BenchmarkHash8Bytes(b *testing.B) {
- benchmarkSize(b, 8)
+ benchmarkSize(b, 8, false)
}
func BenchmarkHash1K(b *testing.B) {
- benchmarkSize(b, 1024)
+ benchmarkSize(b, 1024, false)
}
func BenchmarkHash8K(b *testing.B) {
- benchmarkSize(b, 8192)
+ benchmarkSize(b, 8192, false)
+}
+
+func BenchmarkHash8BytesUnaligned(b *testing.B) {
+ benchmarkSize(b, 8, true)
+}
+
+func BenchmarkHash1KUnaligned(b *testing.B) {
+ benchmarkSize(b, 1024, true)
+}
+
+func BenchmarkHash8KUnaligned(b *testing.B) {
+ benchmarkSize(b, 8192, true)
}
diff --git a/src/pkg/crypto/md5/md5block.go b/src/pkg/crypto/md5/md5block.go
index 5dbdf56..59f8f6f 100644
--- a/src/pkg/crypto/md5/md5block.go
+++ b/src/pkg/crypto/md5/md5block.go
@@ -22,6 +22,8 @@
// less code and run 1.3x faster if we take advantage of that.
// My apologies.
X = (*[16]uint32)(unsafe.Pointer(&p[0]))
+ } else if uintptr(unsafe.Pointer(&p[0]))&(unsafe.Alignof(uint32(0))-1) == 0 {
+ X = (*[16]uint32)(unsafe.Pointer(&p[0]))
} else {
X = &xbuf
j := 0