runtime: add matrix multiplication benchmark

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/7431047
diff --git a/src/pkg/runtime/proc_test.go b/src/pkg/runtime/proc_test.go
index b9d57a6..b2fa4c9 100644
--- a/src/pkg/runtime/proc_test.go
+++ b/src/pkg/runtime/proc_test.go
@@ -5,6 +5,7 @@
 package runtime_test
 
 import (
+	"math"
 	"runtime"
 	"sync/atomic"
 	"testing"
@@ -225,3 +226,67 @@
 		<-c
 	}
 }
+
+type Matrix [][]float64
+
+func BenchmarkMatmult(b *testing.B) {
+	b.StopTimer()
+	// matmult is O(N**3) but testing expects O(b.N),
+	// so we need to take cube root of b.N
+	n := int(math.Cbrt(float64(b.N))) + 1
+	A := makeMatrix(n)
+	B := makeMatrix(n)
+	C := makeMatrix(n)
+	b.StartTimer()
+	matmult(nil, A, B, C, 0, n, 0, n, 0, n, 8)
+}
+
+func makeMatrix(n int) Matrix {
+	m := make(Matrix, n)
+	for i := 0; i < n; i++ {
+		m[i] = make([]float64, n)
+		for j := 0; j < n; j++ {
+			m[i][j] = float64(i*n + j)
+		}
+	}
+	return m
+}
+
+func matmult(done chan<- struct{}, A, B, C Matrix, i0, i1, j0, j1, k0, k1, threshold int) {
+	di := i1 - i0
+	dj := j1 - j0
+	dk := k1 - k0
+	if di >= dj && di >= dk && di >= threshold {
+		// divide in two by y axis
+		mi := i0 + di/2
+		done1 := make(chan struct{}, 1)
+		go matmult(done1, A, B, C, i0, mi, j0, j1, k0, k1, threshold)
+		matmult(nil, A, B, C, mi, i1, j0, j1, k0, k1, threshold)
+		<-done1
+	} else if dj >= dk && dj >= threshold {
+		// divide in two by x axis
+		mj := j0 + dj/2
+		done1 := make(chan struct{}, 1)
+		go matmult(done1, A, B, C, i0, i1, j0, mj, k0, k1, threshold)
+		matmult(nil, A, B, C, i0, i1, mj, j1, k0, k1, threshold)
+		<-done1
+	} else if dk >= threshold {
+		// divide in two by "k" axis
+		// deliberately not parallel because of data races
+		mk := k0 + dk/2
+		matmult(nil, A, B, C, i0, i1, j0, j1, k0, mk, threshold)
+		matmult(nil, A, B, C, i0, i1, j0, j1, mk, k1, threshold)
+	} else {
+		// the matrices are small enough, compute directly
+		for i := i0; i < i1; i++ {
+			for j := j0; j < j1; j++ {
+				for k := k0; k < k1; k++ {
+					C[i][j] += A[i][k] * B[k][j]
+				}
+			}
+		}
+	}
+	if done != nil {
+		done <- struct{}{}
+	}
+}