| // Copyright 2013 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Package driver provides common benchmarking logic shared between benchmarks. |
| // |
| // A benchmark should call Main with a benchmark function. The |
| // benchmark function can do one of two things when invoked: |
| // 1. Do whatever it wants, fill and return Result object. |
| // 2. Call Benchmark helper function and provide benchmarking function |
| // func(N uint64), similar to standard testing benchmarks. The rest is handled |
| // by the driver. |
| package driver // import "golang.org/x/benchmarks/driver" |
| |
| import ( |
| "bytes" |
| "flag" |
| "fmt" |
| "log" |
| "os" |
| "os/exec" |
| "path/filepath" |
| "runtime" |
| "runtime/pprof" |
| "sort" |
| "sync" |
| "sync/atomic" |
| "time" |
| ) |
| |
| var ( |
| flake = flag.Int("flake", 0, "test flakiness of a benchmark") |
| benchNum = flag.Int("benchnum", 1, "number of benchmark runs") |
| benchMem = flag.Int("benchmem", 64, "approx RSS value to aim at in benchmarks, in MB") |
| benchTime = flag.Duration("benchtime", 5*time.Second, "run enough iterations of each benchmark to take the specified time") |
| affinity = flag.Int("affinity", 0, "process affinity (passed to an OS-specific function like sched_setaffinity/SetProcessAffinityMask)") |
| tmpDir = flag.String("tmpdir", os.TempDir(), "dir for temporary files") |
| genSvg = flag.Bool("svg", false, "generate svg profiles") |
| |
| BenchTime time.Duration |
| WorkDir string |
| |
| usedBenchMem bool |
| |
| // startTrace starts runtime tracing if supported and |
| // requested and returns a function to stop tracing. |
| startTrace = func() func() { |
| return func() {} |
| } |
| ) |
| |
| func Main(name string, f func() Result) { |
| flag.Parse() |
| // Copy to public variables, so that benchmarks can access the values. |
| BenchTime = *benchTime |
| WorkDir = *tmpDir |
| |
| if *affinity != 0 { |
| setProcessAffinity(*affinity) |
| } |
| |
| setupWatchdog() |
| |
| if *flake > 0 { |
| testFlakiness(f, *flake) |
| return |
| } |
| |
| fmt.Print("pkg: golang.org/x/benchmarks\n") |
| fmt.Printf("goos: %s\ngoarch: %s\n\n", runtime.GOOS, runtime.GOARCH) |
| |
| stopTrace := startTrace() |
| defer stopTrace() |
| |
| for i := 0; i < *benchNum; i++ { |
| res := f() |
| report(name, res) |
| } |
| } |
| |
| func BenchMem() int { |
| usedBenchMem = true |
| return *benchMem |
| } |
| |
| func setupWatchdog() { |
| t := *benchTime |
| // Be somewhat conservative, and build benchmark does not care about benchTime. |
| if t < time.Minute { |
| t = time.Minute |
| } |
| t *= time.Duration(*benchNum) |
| t *= 2 // to account for iteration number auto-tuning |
| if *flake > 0 { |
| t *= time.Duration(*flake + 2) |
| } |
| go func() { |
| time.Sleep(t) |
| panic(fmt.Sprintf("timed out after %v", t)) |
| }() |
| } |
| |
| // testFlakiness runs the function N+2 times and prints metrics diffs between |
| // the second and subsequent runs. |
| func testFlakiness(f func() Result, N int) { |
| res := make([]Result, N+2) |
| for i := range res { |
| res[i] = f() |
| } |
| fmt.Printf("\n") |
| for k, v := range res[0].Metrics { |
| fmt.Printf("%v:\t", k) |
| for i := 2; i < len(res); i++ { |
| d := 100*float64(v)/float64(res[i].Metrics[k]) - 100 |
| fmt.Printf(" %+.2f%%", d) |
| } |
| fmt.Printf("\n") |
| } |
| } |
| |
| // Result contains all the interesting data about benchmark execution. |
| type Result struct { |
| N uint64 // number of iterations |
| Duration time.Duration // total run duration |
| RunTime uint64 // ns/op |
| Metrics map[string]uint64 |
| Files map[string]string |
| } |
| |
| func MakeResult() Result { |
| return Result{Metrics: make(map[string]uint64), Files: make(map[string]string)} |
| } |
| |
| func report(name string, res Result) { |
| for name, path := range res.Files { |
| fmt.Printf("# %s=%s\n", name, path) |
| } |
| |
| if usedBenchMem { |
| name = fmt.Sprintf("%s/benchmem-MB=%d", name, *benchMem) |
| } |
| fmt.Printf("Benchmark%s-%d %8d\t%10d ns/op", name, runtime.GOMAXPROCS(-1), res.N, res.RunTime) |
| var metrics []string |
| for metric := range res.Metrics { |
| if metric == "ns/op" { |
| // Already reported from res.RunTime. |
| continue |
| } |
| metrics = append(metrics, metric) |
| } |
| sort.Strings(metrics) |
| for _, metric := range metrics { |
| fmt.Printf("\t%10d %s", res.Metrics[metric], metric) |
| } |
| fmt.Printf("\n") |
| } |
| |
| // Benchmark runs f several times, collects stats, |
| // and creates cpu/mem profiles. |
| func Benchmark(f func(uint64)) Result { |
| res := runBenchmark(f) |
| |
| cpuprof := processProfile(os.Args[0], res.Files["cpuprof"]) |
| delete(res.Files, "cpuprof") |
| if cpuprof != "" { |
| res.Files["cpuprof"] = cpuprof |
| } |
| |
| memprof := processProfile("--lines", "--unit=byte", "--alloc_space", "--base", res.Files["memprof0"], os.Args[0], res.Files["memprof"]) |
| delete(res.Files, "memprof") |
| delete(res.Files, "memprof0") |
| if memprof != "" { |
| res.Files["memprof"] = memprof |
| } |
| |
| return res |
| } |
| |
| // processProfile invokes 'go tool pprof' with the specified args |
| // and returns name of the resulting file, or an empty string. |
| func processProfile(args ...string) string { |
| fname := "prof.txt" |
| typ := "--text" |
| if *genSvg { |
| fname = "prof.svg" |
| typ = "--svg" |
| } |
| proff, err := os.Create(tempFilename(fname)) |
| if err != nil { |
| log.Printf("Failed to create profile file: %v", err) |
| return "" |
| } |
| defer proff.Close() |
| var proflog bytes.Buffer |
| cmdargs := append([]string{"tool", "pprof", typ}, args...) |
| cmd := exec.Command("go", cmdargs...) |
| cmd.Stdout = proff |
| cmd.Stderr = &proflog |
| err = cmd.Run() |
| if err != nil { |
| log.Printf("go tool pprof cpuprof failed: %v\n%v", err, proflog.String()) |
| return "" // Deliberately ignore the error. |
| } |
| return proff.Name() |
| } |
| |
| // runBenchmark runs f several times with increasing number of iterations |
| // until execution time reaches the requested duration. |
| func runBenchmark(f func(uint64)) Result { |
| res := MakeResult() |
| for chooseN(&res) { |
| log.Printf("Benchmarking %v iterations\n", res.N) |
| res = runBenchmarkOnce(f, res.N) |
| } |
| return res |
| } |
| |
| // runBenchmarkOnce runs f once and collects all performance metrics and profiles. |
| func runBenchmarkOnce(f func(uint64), N uint64) Result { |
| latencyInit(N) |
| runtime.GC() |
| mstats0 := new(runtime.MemStats) |
| runtime.ReadMemStats(mstats0) |
| ss := InitSysStats(N) |
| res := MakeResult() |
| res.N = N |
| res.Files["memprof0"] = tempFilename("memprof") |
| memprof0, err := os.Create(res.Files["memprof0"]) |
| if err != nil { |
| log.Fatalf("Failed to create profile file '%v': %v", res.Files["memprof0"], err) |
| } |
| pprof.WriteHeapProfile(memprof0) |
| memprof0.Close() |
| |
| res.Files["cpuprof"] = tempFilename("cpuprof") |
| cpuprof, err := os.Create(res.Files["cpuprof"]) |
| if err != nil { |
| log.Fatalf("Failed to create profile file '%v': %v", res.Files["cpuprof"], err) |
| } |
| defer cpuprof.Close() |
| pprof.StartCPUProfile(cpuprof) |
| t0 := time.Now() |
| f(N) |
| res.Duration = time.Since(t0) |
| res.RunTime = uint64(time.Since(t0)) / N |
| res.Metrics["ns/op"] = res.RunTime |
| pprof.StopCPUProfile() |
| |
| latencyCollect(&res) |
| ss.Collect(&res) |
| |
| res.Files["memprof"] = tempFilename("memprof") |
| memprof, err := os.Create(res.Files["memprof"]) |
| if err != nil { |
| log.Fatalf("Failed to create profile file '%v': %v", res.Files["memprof"], err) |
| } |
| pprof.WriteHeapProfile(memprof) |
| memprof.Close() |
| |
| mstats1 := new(runtime.MemStats) |
| runtime.ReadMemStats(mstats1) |
| res.Metrics["allocated-bytes/op"] = (mstats1.TotalAlloc - mstats0.TotalAlloc) / N |
| res.Metrics["allocs/op"] = (mstats1.Mallocs - mstats0.Mallocs) / N |
| res.Metrics["bytes-from-system"] = mstats1.Sys |
| res.Metrics["heap-bytes-from-system"] = mstats1.HeapSys |
| res.Metrics["stack-bytes-from-system"] = mstats1.StackSys |
| res.Metrics["STW-ns/op"] = (mstats1.PauseTotalNs - mstats0.PauseTotalNs) / N |
| collectGo12MemStats(&res, mstats0, mstats1) |
| numGC := uint64(mstats1.NumGC - mstats0.NumGC) |
| if numGC == 0 { |
| res.Metrics["STW-ns/GC"] = 0 |
| } else { |
| res.Metrics["STW-ns/GC"] = (mstats1.PauseTotalNs - mstats0.PauseTotalNs) / numGC |
| } |
| return res |
| } |
| |
| // Parallel is a public helper function that runs f N times in P*GOMAXPROCS goroutines. |
| func Parallel(N uint64, P int, f func()) { |
| numProcs := P * runtime.GOMAXPROCS(0) |
| var wg sync.WaitGroup |
| wg.Add(numProcs) |
| for p := 0; p < numProcs; p++ { |
| go func() { |
| defer wg.Done() |
| for int64(atomic.AddUint64(&N, ^uint64(0))) >= 0 { |
| f() |
| } |
| }() |
| } |
| wg.Wait() |
| } |
| |
| // perfLatency collects and reports information about latencies. |
| var latency struct { |
| data latencyData |
| idx int32 |
| } |
| |
| type latencyData []uint64 |
| |
| func (p latencyData) Len() int { return len(p) } |
| func (p latencyData) Less(i, j int) bool { return p[i] < p[j] } |
| func (p latencyData) Swap(i, j int) { p[i], p[j] = p[j], p[i] } |
| |
| func latencyInit(N uint64) { |
| // Use fixed size slice to: |
| // - bound the amount of memory consumed |
| // - eliminate variance in runs that use different number of iterations |
| latency.data = make(latencyData, 1e6) |
| latency.idx = 0 |
| } |
| |
| func LatencyNote(t time.Time) { |
| d := time.Since(t) |
| if int(atomic.LoadInt32(&latency.idx)) >= len(latency.data) { |
| return |
| } |
| i := atomic.AddInt32(&latency.idx, 1) - 1 |
| if int(i) >= len(latency.data) { |
| return |
| } |
| latency.data[i] = uint64(d) |
| } |
| |
| func latencyCollect(res *Result) { |
| cnt := int(latency.idx) |
| if cnt == 0 { |
| return |
| } |
| if cnt > len(latency.data) { |
| cnt = len(latency.data) |
| } |
| sort.Sort(latency.data[:cnt]) |
| res.Metrics["P50-ns/op"] = latency.data[cnt*50/100] |
| res.Metrics["P95-ns/op"] = latency.data[cnt*95/100] |
| res.Metrics["P99-ns/op"] = latency.data[cnt*99/100] |
| } |
| |
| // chooseN chooses the next number of iterations for benchmark. |
| func chooseN(res *Result) bool { |
| const MaxN = 1e12 |
| last := res.N |
| if last == 0 { |
| res.N = 1 |
| return true |
| } else if res.Duration >= *benchTime || last >= MaxN { |
| return false |
| } |
| nsPerOp := max(1, res.RunTime) |
| res.N = uint64(*benchTime) / nsPerOp |
| res.N = max(min(res.N+res.N/2, 100*last), last+1) |
| res.N = roundUp(res.N) |
| return true |
| } |
| |
| // roundUp rounds the number of iterations to a nice value. |
| func roundUp(n uint64) uint64 { |
| tmp := n |
| base := uint64(1) |
| for tmp >= 10 { |
| tmp /= 10 |
| base *= 10 |
| } |
| switch { |
| case n <= base: |
| return base |
| case n <= (2 * base): |
| return 2 * base |
| case n <= (5 * base): |
| return 5 * base |
| default: |
| return 10 * base |
| } |
| panic("unreachable") |
| return 0 |
| } |
| |
| func min(a, b uint64) uint64 { |
| if a < b { |
| return a |
| } |
| return b |
| } |
| |
| func max(a, b uint64) uint64 { |
| if a > b { |
| return a |
| } |
| return b |
| } |
| |
| var tmpSeq = 0 |
| |
| func tempFilename(ext string) string { |
| tmpSeq++ |
| return filepath.Join(*tmpDir, fmt.Sprintf("%v.%v", tmpSeq, ext)) |
| } |