blob: d58c5234a9bb1ccb662cd54b5e354ab8145984f5 [file] [log] [blame]
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file contains common benchmarking logic shared between benchmarks.
// It defines the main function which calls one of the benchmarks registered
// with Register function.
// When a benchmark is invoked it has 2 choices:
// 1. Do whatever it wants, fill and return Result object.
// 2. Call Benchmark helper function and provide benchmarking function
// func(N uint64), similar to standard testing benchmarks. The rest is handled
// by the driver.
package driver // import "golang.org/x/benchmarks/driver"
import (
"bytes"
"flag"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
"runtime"
"runtime/pprof"
"sort"
"strings"
"sync"
"sync/atomic"
"time"
)
var (
bench = flag.String("bench", "", "benchmark to run")
flake = flag.Int("flake", 0, "test flakiness of a benchmark")
benchNum = flag.Int("benchnum", 5, "number of benchmark runs")
benchMem = flag.Int("benchmem", 64, "approx RSS value to aim at in benchmarks, in MB")
benchTime = flag.Duration("benchtime", 5*time.Second, "run enough iterations of each benchmark to take the specified time")
affinity = flag.Int("affinity", 0, "process affinity (passed to an OS-specific function like sched_setaffinity/SetProcessAffinityMask)")
tmpDir = flag.String("tmpdir", os.TempDir(), "dir for temporary files")
genSvg = flag.Bool("svg", false, "generate svg profiles")
BenchNum int
BenchMem int
BenchTime time.Duration
WorkDir string
benchmarks = make(map[string]func() Result)
)
func Register(name string, f func() Result) {
benchmarks[name] = f
}
func Main() {
flag.Parse()
// Copy to public variables, so that benchmarks can access the values.
BenchNum = *benchNum
BenchMem = *benchMem
BenchTime = *benchTime
WorkDir = *tmpDir
if *affinity != 0 {
setProcessAffinity(*affinity)
}
if *bench == "" {
printBenchmarks()
return
}
f := benchmarks[*bench]
if f == nil {
fmt.Printf("unknown benchmark '%v'\n", *bench)
os.Exit(1)
}
setupWatchdog()
if *flake > 0 {
testFlakiness(f, *flake)
return
}
res := f()
var metrics []string
for k := range res.Metrics {
metrics = append(metrics, k)
}
sort.Strings(metrics)
for _, m := range metrics {
fmt.Printf("GOPERF-METRIC:%v=%v\n", m, res.Metrics[m])
}
var files []string
for k := range res.Files {
files = append(files, k)
}
sort.Strings(files)
for _, f := range files {
fmt.Printf("GOPERF-FILE:%v=%v\n", f, res.Files[f])
}
}
func printBenchmarks() {
var bb []string
for name, _ := range benchmarks {
bb = append(bb, name)
}
sort.Strings(bb)
for i, name := range bb {
if i != 0 {
fmt.Print(",")
}
fmt.Print(name)
}
fmt.Print("\n")
}
func setupWatchdog() {
t := *benchTime
// Be somewhat conservative, and build benchmark does not care about benchTime.
if t < time.Minute {
t = time.Minute
}
t *= time.Duration(*benchNum)
t *= 2 // to account for iteration number auto-tuning
if *flake > 0 {
t *= time.Duration(*flake + 2)
}
go func() {
time.Sleep(t)
panic(fmt.Sprintf("timed out after %v", t))
}()
}
// testFlakiness runs the function N+2 times and prints metrics diffs between
// the second and subsequent runs.
func testFlakiness(f func() Result, N int) {
res := make([]Result, N+2)
for i := range res {
res[i] = f()
}
fmt.Printf("\n")
for k, v := range res[0].Metrics {
fmt.Printf("%v:\t", k)
for i := 2; i < len(res); i++ {
d := 100*float64(v)/float64(res[i].Metrics[k]) - 100
fmt.Printf(" %+.2f%%", d)
}
fmt.Printf("\n")
}
}
// Result contains all the interesting data about benchmark execution.
type Result struct {
N uint64 // number of iterations
Duration time.Duration // total run duration
RunTime uint64 // ns/op
Metrics map[string]uint64
Files map[string]string
}
func MakeResult() Result {
return Result{Metrics: make(map[string]uint64), Files: make(map[string]string)}
}
// Benchmark runs f several times, collects stats, chooses the best run
// and creates cpu/mem profiles.
func Benchmark(f func(uint64)) Result {
res := MakeResult()
for i := 0; i < *benchNum; i++ {
res1 := runBenchmark(f)
if res.N == 0 || res.RunTime > res1.RunTime {
res = res1
}
// Always take RSS and sys memory metrics from last iteration.
// They only grow, and seem to converge to some eigen value.
// Variations are smaller if we do this.
for k, v := range res1.Metrics {
if k == "rss" || strings.HasPrefix(k, "sys-") {
res.Metrics[k] = v
}
}
}
cpuprof := processProfile(os.Args[0], res.Files["cpuprof"])
delete(res.Files, "cpuprof")
if cpuprof != "" {
res.Files["cpuprof"] = cpuprof
}
memprof := processProfile("--lines", "--unit=byte", "--alloc_space", "--base", res.Files["memprof0"], os.Args[0], res.Files["memprof"])
delete(res.Files, "memprof")
delete(res.Files, "memprof0")
if memprof != "" {
res.Files["memprof"] = memprof
}
return res
}
// processProfile invokes 'go tool pprof' with the specified args
// and returns name of the resulting file, or an empty string.
func processProfile(args ...string) string {
fname := "prof.txt"
typ := "--text"
if *genSvg {
fname = "prof.svg"
typ = "--svg"
}
proff, err := os.Create(tempFilename(fname))
if err != nil {
log.Printf("Failed to create profile file: %v", err)
return ""
}
defer proff.Close()
var proflog bytes.Buffer
cmdargs := append([]string{"tool", "pprof", typ}, args...)
cmd := exec.Command("go", cmdargs...)
cmd.Stdout = proff
cmd.Stderr = &proflog
err = cmd.Run()
if err != nil {
log.Printf("go tool pprof cpuprof failed: %v\n%v", err, proflog.String())
return "" // Deliberately ignore the error.
}
return proff.Name()
}
// runBenchmark runs f several times with increasing number of iterations
// until execution time reaches the requested duration.
func runBenchmark(f func(uint64)) Result {
res := MakeResult()
for chooseN(&res) {
log.Printf("Benchmarking %v iterations\n", res.N)
res = runBenchmarkOnce(f, res.N)
}
log.Printf("Result: %+v\n", res)
return res
}
// runBenchmarkOnce runs f once and collects all performance metrics and profiles.
func runBenchmarkOnce(f func(uint64), N uint64) Result {
latencyInit(N)
runtime.GC()
mstats0 := new(runtime.MemStats)
runtime.ReadMemStats(mstats0)
ss := InitSysStats(N)
res := MakeResult()
res.N = N
res.Files["memprof0"] = tempFilename("memprof")
memprof0, err := os.Create(res.Files["memprof0"])
if err != nil {
log.Fatalf("Failed to create profile file '%v': %v", res.Files["memprof0"], err)
}
pprof.WriteHeapProfile(memprof0)
memprof0.Close()
res.Files["cpuprof"] = tempFilename("cpuprof")
cpuprof, err := os.Create(res.Files["cpuprof"])
if err != nil {
log.Fatalf("Failed to create profile file '%v': %v", res.Files["cpuprof"], err)
}
defer cpuprof.Close()
pprof.StartCPUProfile(cpuprof)
t0 := time.Now()
f(N)
res.Duration = time.Since(t0)
res.RunTime = uint64(time.Since(t0)) / N
res.Metrics["time"] = res.RunTime
pprof.StopCPUProfile()
latencyCollect(&res)
ss.Collect(&res)
res.Files["memprof"] = tempFilename("memprof")
memprof, err := os.Create(res.Files["memprof"])
if err != nil {
log.Fatalf("Failed to create profile file '%v': %v", res.Files["memprof"], err)
}
pprof.WriteHeapProfile(memprof)
memprof.Close()
mstats1 := new(runtime.MemStats)
runtime.ReadMemStats(mstats1)
res.Metrics["allocated"] = (mstats1.TotalAlloc - mstats0.TotalAlloc) / N
res.Metrics["allocs"] = (mstats1.Mallocs - mstats0.Mallocs) / N
res.Metrics["sys-total"] = mstats1.Sys
res.Metrics["sys-heap"] = mstats1.HeapSys
res.Metrics["sys-stack"] = mstats1.StackSys
res.Metrics["gc-pause-total"] = (mstats1.PauseTotalNs - mstats0.PauseTotalNs) / N
collectGo12MemStats(&res, mstats0, mstats1)
numGC := uint64(mstats1.NumGC - mstats0.NumGC)
if numGC == 0 {
res.Metrics["gc-pause-one"] = 0
} else {
res.Metrics["gc-pause-one"] = (mstats1.PauseTotalNs - mstats0.PauseTotalNs) / numGC
}
return res
}
// Parallel is a public helper function that runs f N times in P*GOMAXPROCS goroutines.
func Parallel(N uint64, P int, f func()) {
numProcs := P * runtime.GOMAXPROCS(0)
var wg sync.WaitGroup
wg.Add(numProcs)
for p := 0; p < numProcs; p++ {
go func() {
defer wg.Done()
for int64(atomic.AddUint64(&N, ^uint64(0))) >= 0 {
f()
}
}()
}
wg.Wait()
}
// perfLatency collects and reports information about latencies.
var latency struct {
data latencyData
idx int32
}
type latencyData []uint64
func (p latencyData) Len() int { return len(p) }
func (p latencyData) Less(i, j int) bool { return p[i] < p[j] }
func (p latencyData) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func latencyInit(N uint64) {
// Use fixed size slice to:
// - bound the amount of memory consumed
// - eliminate variance in runs that use different number of iterations
latency.data = make(latencyData, 1e6)
latency.idx = 0
}
func LatencyNote(t time.Time) {
d := time.Since(t)
if int(atomic.LoadInt32(&latency.idx)) >= len(latency.data) {
return
}
i := atomic.AddInt32(&latency.idx, 1) - 1
if int(i) >= len(latency.data) {
return
}
latency.data[i] = uint64(d)
}
func latencyCollect(res *Result) {
cnt := int(latency.idx)
if cnt == 0 {
return
}
if cnt > len(latency.data) {
cnt = len(latency.data)
}
sort.Sort(latency.data[:cnt])
res.Metrics["latency-50"] = latency.data[cnt*50/100]
res.Metrics["latency-95"] = latency.data[cnt*95/100]
res.Metrics["latency-99"] = latency.data[cnt*99/100]
}
// chooseN chooses the next number of iterations for benchmark.
func chooseN(res *Result) bool {
const MaxN = 1e12
last := res.N
if last == 0 {
res.N = 1
return true
} else if res.Duration >= *benchTime || last >= MaxN {
return false
}
nsPerOp := max(1, res.RunTime)
res.N = uint64(*benchTime) / nsPerOp
res.N = max(min(res.N+res.N/2, 100*last), last+1)
res.N = roundUp(res.N)
return true
}
// roundUp rounds the number of iterations to a nice value.
func roundUp(n uint64) uint64 {
tmp := n
base := uint64(1)
for tmp >= 10 {
tmp /= 10
base *= 10
}
switch {
case n <= base:
return base
case n <= (2 * base):
return 2 * base
case n <= (5 * base):
return 5 * base
default:
return 10 * base
}
panic("unreachable")
return 0
}
func min(a, b uint64) uint64 {
if a < b {
return a
}
return b
}
func max(a, b uint64) uint64 {
if a > b {
return a
}
return b
}
var tmpSeq = 0
func tempFilename(ext string) string {
tmpSeq++
return filepath.Join(*tmpDir, fmt.Sprintf("%v.%v", tmpSeq, ext))
}