bench: move unreviewed change from the other repo
- sort metrics/files in output
- support generating svg profiles (when run manually)
- specify explicit output binary name in build benchmark (otherwise it does not work on windows)
- make system stats collection work across linux and windows (there is notable hardcore in windows version but that was reviewed by Alex)
LGTM=adg
R=adg
CC=alex.brainman, golang-codereviews
https://golang.org/cl/51450048
diff --git a/build/build.go b/build/build.go
index 8dea9f4..4d64e5d 100644
--- a/build/build.go
+++ b/build/build.go
@@ -5,13 +5,9 @@
package build
import (
- "bytes"
"log"
"os"
"os/exec"
- "path/filepath"
- "runtime"
- "time"
"code.google.com/p/go.benchmarks/driver"
)
@@ -32,7 +28,7 @@
}
log.Printf("Run %v: %+v\n", i, res)
}
- perf1, perf2 := driver.RunUnderProfiler("go", "install", "-a", "-p", os.Getenv("GOMAXPROCS"), "cmd/go")
+ perf1, perf2 := driver.RunUnderProfiler("go", "build", "-o", "goperf", "-a", "-p", os.Getenv("GOMAXPROCS"), "cmd/go")
if perf1 != "" {
res.Files["processes"] = perf1
}
@@ -44,28 +40,15 @@
func benchmarkOnce() driver.Result {
// run 'go build -a'
- t0 := time.Now()
- cmd := exec.Command("go", "install", "-a", "-p", os.Getenv("GOMAXPROCS"), "cmd/go")
- var stderr bytes.Buffer
- cmd.Stderr = &stderr
- if err := cmd.Start(); err != nil {
- log.Fatalf("Failed to start 'go install -a cmd/go': %v", err)
- }
- ss := driver.InitSysStats(1, cmd)
- if err := cmd.Wait(); err != nil {
- log.Fatalf("Failed to run 'go install -a cmd/go': %v\n%v", err, stderr.String())
- }
res := driver.MakeResult()
- res.RunTime = uint64(time.Since(t0))
- res.Metrics["build-time"] = res.RunTime
- ss.Collect(&res, "build-")
+ cmd := exec.Command("go", "build", "-o", "gobuild", "-a", "-p", os.Getenv("GOMAXPROCS"), "cmd/go")
+ out, err := driver.RunAndCollectSysStats(cmd, &res, 1, "build-")
+ if err != nil {
+ log.Fatalf("Failed to run 'go build -a cmd/go': %v\n%v", err, out)
+ }
// go command binary size
- gobin := filepath.Join(os.Getenv("GOROOT"), "bin", "go")
- if runtime.GOOS == "windows" {
- gobin += ".exe"
- }
- gof, err := os.Open(gobin)
+ gof, err := os.Open("gobuild")
if err != nil {
log.Fatalf("Failed to open $GOROOT/bin/go: %v\n", err)
}
@@ -75,7 +58,7 @@
}
res.Metrics["binary-size"] = uint64(st.Size())
- sizef := driver.Size(gobin)
+ sizef := driver.Size("gobuild")
if sizef != "" {
res.Files["sections"] = sizef
}
diff --git a/driver/driver.go b/driver/driver.go
index 92a9bff..74e96f0 100644
--- a/driver/driver.go
+++ b/driver/driver.go
@@ -38,6 +38,7 @@
benchTime = flag.Duration("benchtime", 5*time.Second, "run enough iterations of each benchmark to take the specified time")
affinity = flag.Int("affinity", 0, "process affinity (passed to an OS-specific function like sched_setaffinity/SetProcessAffinityMask)")
tmpDir = flag.String("tmpdir", os.TempDir(), "dir for temporary files")
+ genSvg = flag.Bool("svg", false, "generate svg profiles")
BenchNum int
BenchMem int
@@ -81,11 +82,23 @@
}
res := f()
- for k, v := range res.Metrics {
- fmt.Printf("GOPERF-METRIC:%v=%v\n", k, v)
+
+ var metrics []string
+ for k := range res.Metrics {
+ metrics = append(metrics, k)
}
- for k, v := range res.Files {
- fmt.Printf("GOPERF-FILE:%v=%v\n", k, v)
+ sort.Strings(metrics)
+ for _, m := range metrics {
+ fmt.Printf("GOPERF-METRIC:%v=%v\n", m, res.Metrics[m])
+ }
+
+ var files []string
+ for k := range res.Files {
+ files = append(files, k)
+ }
+ sort.Strings(files)
+ for _, f := range files {
+ fmt.Printf("GOPERF-FILE:%v=%v\n", f, res.Files[f])
}
}
@@ -111,6 +124,7 @@
t = time.Minute
}
t *= time.Duration(*benchNum)
+ t *= 2 // to account for iteration number auto-tuning
if *flake > 0 {
t *= time.Duration(*flake + 2)
}
@@ -189,14 +203,20 @@
// processProfile invokes 'go tool pprof' with the specified args
// and returns name of the resulting file, or an empty string.
func processProfile(args ...string) string {
- proff, err := os.Create(tempFilename("prof.txt"))
+ fname := "prof.txt"
+ typ := "--text"
+ if *genSvg {
+ fname = "prof.svg"
+ typ = "--svg"
+ }
+ proff, err := os.Create(tempFilename(fname))
if err != nil {
log.Printf("Failed to create profile file: %v", err)
return ""
}
defer proff.Close()
var proflog bytes.Buffer
- cmdargs := append([]string{"tool", "pprof", "--text"}, args...)
+ cmdargs := append([]string{"tool", "pprof", typ}, args...)
cmd := exec.Command("go", cmdargs...)
cmd.Stdout = proff
cmd.Stderr = &proflog
@@ -215,8 +235,8 @@
for chooseN(&res) {
log.Printf("Benchmarking %v iterations\n", res.N)
res = runBenchmarkOnce(f, res.N)
- log.Printf("Done: %+v\n", res)
}
+ log.Printf("Result: %+v\n", res)
return res
}
@@ -226,7 +246,7 @@
runtime.GC()
mstats0 := new(runtime.MemStats)
runtime.ReadMemStats(mstats0)
- ss := InitSysStats(N, nil)
+ ss := InitSysStats(N)
res := MakeResult()
res.N = N
res.Files["memprof0"] = tempFilename("memprof")
@@ -252,7 +272,7 @@
pprof.StopCPUProfile()
latencyCollect(&res)
- ss.Collect(&res, "")
+ ss.Collect(&res)
res.Files["memprof"] = tempFilename("memprof")
memprof, err := os.Create(res.Files["memprof"])
@@ -287,10 +307,10 @@
wg.Add(numProcs)
for p := 0; p < numProcs; p++ {
go func() {
+ defer wg.Done()
for int64(atomic.AddUint64(&N, ^uint64(0))) >= 0 {
f()
}
- wg.Done()
}()
}
wg.Wait()
diff --git a/driver/driver_freebsd.go b/driver/driver_freebsd.go
index f27e900..23aa3bd 100644
--- a/driver/driver_freebsd.go
+++ b/driver/driver_freebsd.go
@@ -15,7 +15,7 @@
return ""
}
-func getVMPeak(pid int) uint64 {
+func getVMPeak() uint64 {
return 0
}
diff --git a/driver/driver_unix.go b/driver/driver_unix.go
index 58f7c4f..f41f4f9 100644
--- a/driver/driver_unix.go
+++ b/driver/driver_unix.go
@@ -7,56 +7,63 @@
package driver
import (
+ "bytes"
"log"
"os/exec"
"syscall"
+ "time"
)
type sysStats struct {
N uint64
- Cmd *exec.Cmd
Rusage syscall.Rusage
}
-func InitSysStats(N uint64, cmd *exec.Cmd) sysStats {
- ss := sysStats{N: N, Cmd: cmd}
- if cmd == nil {
- err := syscall.Getrusage(0, &ss.Rusage)
- if err != nil {
- log.Printf("Getrusage failed: %v", err)
- ss.N = 0
- // Deliberately ignore the error.
- }
+func InitSysStats(N uint64) sysStats {
+ ss := sysStats{N: N}
+ if err := syscall.Getrusage(0, &ss.Rusage); err != nil {
+ log.Printf("Getrusage failed: %v", err)
+ ss.N = 0
+ // Deliberately ignore the error.
}
return ss
}
-func (ss sysStats) Collect(res *Result, prefix string) {
+func (ss sysStats) Collect(res *Result) {
if ss.N == 0 {
return
}
- Rusage := new(syscall.Rusage)
- if ss.Cmd == nil {
- err := syscall.Getrusage(0, Rusage)
- if err != nil {
- log.Printf("Getrusage failed: %v", err)
- // Deliberately ignore the error.
- return
- }
- } else {
- Rusage = ss.Cmd.ProcessState.SysUsage().(*syscall.Rusage)
+ if vm := getVMPeak(); vm != 0 {
+ res.Metrics["virtual-mem"] = vm
}
- cpuTime := func(usage *syscall.Rusage) uint64 {
- return uint64(usage.Utime.Sec)*1e9 + uint64(usage.Utime.Usec*1e3) +
- uint64(usage.Stime.Sec)*1e9 + uint64(usage.Stime.Usec)*1e3
+ usage := new(syscall.Rusage)
+ if err := syscall.Getrusage(0, usage); err != nil {
+ log.Printf("Getrusage failed: %v", err)
+ // Deliberately ignore the error.
+ return
}
- res.Metrics[prefix+"rss"] = uint64(Rusage.Maxrss) * rssMultiplier
- res.Metrics[prefix+"cputime"] = (cpuTime(Rusage) - cpuTime(&ss.Rusage)) / ss.N
+ res.Metrics["rss"] = uint64(usage.Maxrss) * rssMultiplier
+ res.Metrics["cputime"] = (cpuTime(usage) - cpuTime(&ss.Rusage)) / ss.N
+}
- if ss.Cmd == nil {
- vm := getVMPeak()
- if vm != 0 {
- res.Metrics[prefix+"vm"] = vm
- }
+func RunAndCollectSysStats(cmd *exec.Cmd, res *Result, N uint64, prefix string) (string, error) {
+ var out bytes.Buffer
+ cmd.Stdout = &out
+ cmd.Stderr = &out
+ t0 := time.Now()
+ if err := cmd.Run(); err != nil {
+ return out.String(), err
}
+ t1 := time.Now()
+ usage := cmd.ProcessState.SysUsage().(*syscall.Rusage)
+ res.RunTime = uint64(t1.Sub(t0)) / N
+ res.Metrics[prefix+"time"] = res.RunTime
+ res.Metrics[prefix+"cputime"] = cpuTime(usage) / N
+ res.Metrics[prefix+"rss"] = uint64(usage.Maxrss) * rssMultiplier
+ return out.String(), nil
+}
+
+func cpuTime(usage *syscall.Rusage) uint64 {
+ return uint64(usage.Utime.Sec)*1e9 + uint64(usage.Utime.Usec*1e3) +
+ uint64(usage.Stime.Sec)*1e9 + uint64(usage.Stime.Usec)*1e3
}
diff --git a/driver/driver_windows.go b/driver/driver_windows.go
index 3110240..8232e11 100644
--- a/driver/driver_windows.go
+++ b/driver/driver_windows.go
@@ -5,18 +5,35 @@
package driver
import (
+ "bytes"
"log"
"os/exec"
+ "sync"
"syscall"
+ "time"
"unsafe"
)
// access to Windows APIs
var (
- modkernel32 = syscall.NewLazyDLL("kernel32.dll")
- modpsapi = syscall.NewLazyDLL("psapi.dll")
- procGetProcessMemoryInfo = modpsapi.NewProc("GetProcessMemoryInfo")
- procSetProcessAffinityMask = modkernel32.NewProc("SetProcessAffinityMask")
+ modkernel32 = syscall.NewLazyDLL("kernel32.dll")
+ modpsapi = syscall.NewLazyDLL("psapi.dll")
+ procGetProcessMemoryInfo = modpsapi.NewProc("GetProcessMemoryInfo")
+ procSetProcessAffinityMask = modkernel32.NewProc("SetProcessAffinityMask")
+ procCreateJobObjectW = modkernel32.NewProc("CreateJobObjectW")
+ procOpenJobObjectW = modkernel32.NewProc("OpenJobObjectW")
+ procAssignProcessToJobObject = modkernel32.NewProc("AssignProcessToJobObject")
+ procSetInformationJobObject = modkernel32.NewProc("SetInformationJobObject")
+
+ initJobOnce sync.Once
+ currentProcess syscall.Handle
+ childMu sync.Mutex
+ childProcesses []syscall.Handle
+)
+
+const (
+ JOB_OBJECT_MSG_NEW_PROCESS = 6
+ JobObjectAssociateCompletionPortInformation = 7
)
type PROCESS_MEMORY_COUNTERS struct {
@@ -32,6 +49,79 @@
PeakPagefileUsage uintptr
}
+type JOBOBJECT_ASSOCIATE_COMPLETION_PORT struct {
+ CompletionKey uintptr
+ CompletionPort syscall.Handle
+}
+
+func init() {
+ var err error
+ currentProcess, err = syscall.GetCurrentProcess()
+ if err != nil {
+ log.Fatalf("GetCurrentProcess failed: %v", err)
+ }
+}
+
+func initJob() {
+ // Create Job object and assign current process to it.
+ jobObject, err := createJobObject(nil, nil)
+ if err != nil {
+ log.Printf("CreateJobObject failed: %v", err)
+ return
+ }
+ if err = assignProcessToJobObject(jobObject, currentProcess); err != nil {
+ log.Printf("AssignProcessToJobObject failed: %v", err)
+ syscall.Close(jobObject)
+ return
+ }
+ iocp, err := syscall.CreateIoCompletionPort(syscall.InvalidHandle, 0, 0, 1)
+ if err != nil {
+ log.Printf("CreateIoCompletionPort failed: %v", err)
+ syscall.Close(jobObject)
+ return
+ }
+ port := JOBOBJECT_ASSOCIATE_COMPLETION_PORT{
+ CompletionKey: uintptr(jobObject),
+ CompletionPort: iocp,
+ }
+ err = setInformationJobObject(jobObject, JobObjectAssociateCompletionPortInformation, uintptr(unsafe.Pointer(&port)), uint32(unsafe.Sizeof(port)))
+ if err != nil {
+ log.Printf("SetInformationJobObject failed: %v", err)
+ syscall.Close(jobObject)
+ syscall.Close(iocp)
+ return
+ }
+ // Read Job notifications about new "child" processes and collect them in childProcesses.
+ go func() {
+ var code, key uint32
+ var o *syscall.Overlapped
+ for {
+ err := syscall.GetQueuedCompletionStatus(iocp, &code, &key, &o, syscall.INFINITE)
+ if err != nil {
+ log.Printf("GetQueuedCompletionStatus failed: %v", err)
+ return
+ }
+ if key != uint32(jobObject) {
+ panic("Invalid GetQueuedCompletionStatus key parameter")
+ }
+ if code == JOB_OBJECT_MSG_NEW_PROCESS {
+ pid := int(uintptr(unsafe.Pointer(o)))
+ if pid == syscall.Getpid() {
+ continue
+ }
+ c, err := syscall.OpenProcess(syscall.PROCESS_QUERY_INFORMATION, false, uint32(pid))
+ if err != nil {
+ log.Printf("OpenProcess failed: %v", err)
+ continue
+ }
+ childMu.Lock()
+ childProcesses = append(childProcesses, c)
+ childMu.Unlock()
+ }
+ }
+ }()
+}
+
func getProcessMemoryInfo(h syscall.Handle, mem *PROCESS_MEMORY_COUNTERS) (err error) {
r1, _, e1 := syscall.Syscall(procGetProcessMemoryInfo.Addr(), 3, uintptr(h), uintptr(unsafe.Pointer(mem)), uintptr(unsafe.Sizeof(*mem)))
if r1 == 0 {
@@ -56,6 +146,43 @@
return
}
+func createJobObject(jobAttrs *syscall.SecurityAttributes, name *uint16) (handle syscall.Handle, err error) {
+ r0, _, e1 := syscall.Syscall(procCreateJobObjectW.Addr(), 2, uintptr(unsafe.Pointer(jobAttrs)), uintptr(unsafe.Pointer(name)), 0)
+ handle = syscall.Handle(r0)
+ if handle == 0 {
+ if e1 != 0 {
+ err = error(e1)
+ } else {
+ err = syscall.EINVAL
+ }
+ }
+ return
+}
+
+func assignProcessToJobObject(job syscall.Handle, process syscall.Handle) (err error) {
+ r1, _, e1 := syscall.Syscall(procAssignProcessToJobObject.Addr(), 2, uintptr(job), uintptr(process), 0)
+ if r1 == 0 {
+ if e1 != 0 {
+ err = error(e1)
+ } else {
+ err = syscall.EINVAL
+ }
+ }
+ return
+}
+
+func setInformationJobObject(job syscall.Handle, infoclass uint32, info uintptr, infolien uint32) (err error) {
+ r1, _, e1 := syscall.Syscall6(procSetInformationJobObject.Addr(), 4, uintptr(job), uintptr(infoclass), uintptr(info), uintptr(infolien), 0, 0)
+ if r1 == 0 {
+ if e1 != 0 {
+ err = error(e1)
+ } else {
+ err = syscall.EINVAL
+ }
+ }
+ return
+}
+
func RunUnderProfiler(args ...string) (string, string) {
return "", ""
}
@@ -66,65 +193,99 @@
}
type sysStats struct {
- N uint64
- Cmd *exec.Cmd
- Handle syscall.Handle
- Mem PROCESS_MEMORY_COUNTERS
- CPU syscall.Rusage
+ N uint64
+ CPU syscall.Rusage
}
-func InitSysStats(N uint64, cmd *exec.Cmd) (ss sysStats) {
- if cmd == nil {
- h, err := syscall.GetCurrentProcess()
- if err != nil {
- log.Printf("GetCurrentProcess failed: %v", err)
- return
- }
- if err := syscall.GetProcessTimes(h, &ss.CPU.CreationTime, &ss.CPU.ExitTime, &ss.CPU.KernelTime, &ss.CPU.UserTime); err != nil {
- log.Printf("GetProcessTimes failed: %v", err)
- return
- }
- ss.Handle = h
- } else {
- h, err := syscall.OpenProcess(syscall.PROCESS_QUERY_INFORMATION, false, uint32(cmd.Process.Pid))
- if err != nil {
- log.Printf("OpenProcess failed: %v", err)
- return
- }
- ss.Handle = h
+func InitSysStats(N uint64) (ss sysStats) {
+ if err := syscall.GetProcessTimes(currentProcess, &ss.CPU.CreationTime, &ss.CPU.ExitTime, &ss.CPU.KernelTime, &ss.CPU.UserTime); err != nil {
+ log.Printf("GetProcessTimes failed: %v", err)
+ return
}
ss.N = N
- ss.Cmd = cmd
return ss
}
-func (ss sysStats) Collect(res *Result, prefix string) {
+func (ss sysStats) Collect(res *Result) {
if ss.N == 0 {
return
}
- if ss.Cmd != nil {
- defer syscall.CloseHandle(ss.Handle)
- // TODO(dvyukov): GetProcessMemoryInfo/GetProcessTimes return info only for the process,
- // but not for child processes, so build benchmark numbers are incorrect.
- // It's better to report nothing than to report wrong numbers.
- return
- }
var Mem PROCESS_MEMORY_COUNTERS
- if err := getProcessMemoryInfo(ss.Handle, &Mem); err != nil {
+ if err := getProcessMemoryInfo(currentProcess, &Mem); err != nil {
log.Printf("GetProcessMemoryInfo failed: %v", err)
return
}
var CPU syscall.Rusage
- if err := syscall.GetProcessTimes(ss.Handle, &CPU.CreationTime, &CPU.ExitTime, &CPU.KernelTime, &CPU.UserTime); err != nil {
+ if err := syscall.GetProcessTimes(currentProcess, &CPU.CreationTime, &CPU.ExitTime, &CPU.KernelTime, &CPU.UserTime); err != nil {
log.Printf("GetProcessTimes failed: %v", err)
return
}
- res.Metrics[prefix+"cputime"] = (getCPUTime(CPU) - getCPUTime(ss.CPU)) / ss.N
- res.Metrics[prefix+"rss"] = uint64(Mem.PeakWorkingSetSize)
+ res.Metrics["cputime"] = (getCPUTime(CPU) - getCPUTime(ss.CPU)) / ss.N
+ res.Metrics["rss"] = uint64(Mem.PeakWorkingSetSize)
+}
+
+func RunAndCollectSysStats(cmd *exec.Cmd, res *Result, N uint64, prefix string) (string, error) {
+ initJobOnce.Do(initJob)
+
+ childMu.Lock()
+ children := childProcesses
+ childProcesses = []syscall.Handle{}
+ childMu.Unlock()
+ for _, proc := range children {
+ syscall.CloseHandle(proc)
+ }
+
+ var out bytes.Buffer
+ cmd.Stdout = &out
+ cmd.Stderr = &out
+ t0 := time.Now()
+ if err := cmd.Run(); err != nil {
+ return out.String(), err
+ }
+ t1 := time.Now()
+
+ res.RunTime = uint64(t1.Sub(t0)) / N
+ res.Metrics[prefix+"time"] = res.RunTime
+
+ childMu.Lock()
+ children = childProcesses
+ childProcesses = []syscall.Handle{}
+ childMu.Unlock()
+ if len(children) == 0 {
+ log.Printf("sysStats.Collect: no child processes?")
+ return out.String(), nil
+ }
+ defer func() {
+ for _, proc := range children {
+ syscall.CloseHandle(proc)
+ }
+ }()
+ cputime := uint64(0)
+ rss := uint64(0)
+ for _, proc := range children {
+ var Mem PROCESS_MEMORY_COUNTERS
+ if err := getProcessMemoryInfo(proc, &Mem); err != nil {
+ log.Printf("GetProcessMemoryInfo failed: %v", err)
+ return out.String(), nil
+ }
+ var CPU syscall.Rusage
+ if err := syscall.GetProcessTimes(proc, &CPU.CreationTime, &CPU.ExitTime, &CPU.KernelTime, &CPU.UserTime); err != nil {
+ log.Printf("GetProcessTimes failed: %v", err)
+ return out.String(), nil
+ }
+ cputime += getCPUTime(CPU) / N
+ rss += uint64(Mem.PeakWorkingSetSize)
+ }
+
+ res.Metrics[prefix+"cputime"] = cputime
+ res.Metrics[prefix+"rss"] = rss
+ return out.String(), nil
}
func getCPUTime(CPU syscall.Rusage) uint64 {
- return uint64(CPU.KernelTime.Nanoseconds() + CPU.UserTime.Nanoseconds())
+ var CPU0 syscall.Rusage // time is offsetted, so we need to subtract "zero"
+ return uint64(CPU.KernelTime.Nanoseconds() + CPU.UserTime.Nanoseconds() -
+ CPU0.KernelTime.Nanoseconds() - CPU0.UserTime.Nanoseconds())
}
func setProcessAffinity(v int) {