sweet: add mode for PGO benchmarking

With -pgo, for each configuration sweet automatically runs an initial
profiling configuration. The merged profiles from these runs are used as
the PGO input to a ".pgo" variant of the configuration. Comparing the
base configuration to the ".pgo" variant indicates the effect of PGO.

At a lower level, the config format adds a "pgofiles" map, which can be
used to specify PGO profiles to use for each benchmark.

Ultimately this sets GOFLAGS=-pgo=/path in BuildEnv. Some benchmarks may
not currently properly plumb this into their build (e.g., the GoBuild
benchmarks don't build the compiler at all). Existing benchmarks need to
be double-checked that they actually get PGO enabled.

For golang/go#55022.

Change-Id: I81c0cb085ef3b5196a05d2565dd0e2f83057b9fa
Reviewed-on: https://go-review.googlesource.com/c/benchmarks/+/444557
Reviewed-by: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Auto-Submit: Michael Pratt <mpratt@google.com>
Run-TryBot: Michael Pratt <mpratt@google.com>
diff --git a/sweet/benchmarks/go-build/main.go b/sweet/benchmarks/go-build/main.go
index 1d72584..c49ad0b 100644
--- a/sweet/benchmarks/go-build/main.go
+++ b/sweet/benchmarks/go-build/main.go
@@ -17,6 +17,7 @@
 	"golang.org/x/benchmarks/sweet/benchmarks/internal/cgroups"
 	"golang.org/x/benchmarks/sweet/benchmarks/internal/driver"
 	"golang.org/x/benchmarks/sweet/common"
+	sprofile "golang.org/x/benchmarks/sweet/common/profile"
 )
 
 var (
@@ -131,7 +132,9 @@
 }
 
 func mergeProfiles(dir, prefix string) (*profile.Profile, error) {
-	profiles, err := collectProfiles(dir, prefix)
+	profiles, err := sprofile.ReadDir(dir, func(name string) bool {
+		return strings.HasPrefix(name, prefix)
+	})
 	if err != nil {
 		return nil, err
 	}
@@ -139,7 +142,10 @@
 }
 
 func copyProfiles(dir, bin string, typ driver.ProfileType, finalPrefix string) error {
-	profiles, err := collectProfiles(dir, profilePrefix(bin, typ))
+	prefix := profilePrefix(bin, typ)
+	profiles, err := sprofile.ReadDir(dir, func(name string) bool {
+		return strings.HasPrefix(name, prefix)
+	})
 	if err != nil {
 		return err
 	}
@@ -151,34 +157,6 @@
 	return nil
 }
 
-func collectProfiles(dir, prefix string) ([]*profile.Profile, error) {
-	entries, err := os.ReadDir(dir)
-	if err != nil {
-		return nil, err
-	}
-	var profiles []*profile.Profile
-	for _, entry := range entries {
-		name := entry.Name()
-		path := filepath.Join(tmpDir, name)
-		if info, err := entry.Info(); err != nil {
-			return nil, err
-		} else if info.Size() == 0 {
-			// Skip zero-sized files, otherwise the pprof package
-			// will call it a parsing error.
-			continue
-		}
-		if strings.HasPrefix(name, prefix) {
-			p, err := driver.ReadProfile(path)
-			if err != nil {
-				return nil, err
-			}
-			profiles = append(profiles, p)
-			continue
-		}
-	}
-	return profiles, nil
-}
-
 func profilePrefix(bin string, typ driver.ProfileType) string {
 	return bin + "-prof." + string(typ)
 }
diff --git a/sweet/benchmarks/internal/cgroups/cgroups.go b/sweet/benchmarks/internal/cgroups/cgroups.go
index 0a55a3d..569efa8 100644
--- a/sweet/benchmarks/internal/cgroups/cgroups.go
+++ b/sweet/benchmarks/internal/cgroups/cgroups.go
@@ -110,4 +110,3 @@
 		return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
 	}
 }
-
diff --git a/sweet/benchmarks/internal/driver/driver.go b/sweet/benchmarks/internal/driver/driver.go
index 015691a..bb6e3eb 100644
--- a/sweet/benchmarks/internal/driver/driver.go
+++ b/sweet/benchmarks/internal/driver/driver.go
@@ -547,15 +547,6 @@
 	panic("bad profile type")
 }
 
-func ReadProfile(filename string) (*profile.Profile, error) {
-	f, err := os.Open(filename)
-	if err != nil {
-		return nil, err
-	}
-	defer f.Close()
-	return profile.Parse(f)
-}
-
 func WriteProfile(prof *profile.Profile, typ ProfileType, pattern string) error {
 	if !ProfilingEnabled(typ) {
 		return fmt.Errorf("this type of profile is not currently enabled")
diff --git a/sweet/benchmarks/tile38/main.go b/sweet/benchmarks/tile38/main.go
index e0605bd..ea3027a 100644
--- a/sweet/benchmarks/tile38/main.go
+++ b/sweet/benchmarks/tile38/main.go
@@ -22,6 +22,7 @@
 
 	"golang.org/x/benchmarks/sweet/benchmarks/internal/driver"
 	"golang.org/x/benchmarks/sweet/benchmarks/internal/pool"
+	"golang.org/x/benchmarks/sweet/common/profile"
 
 	"github.com/gomodule/redigo/redis"
 )
@@ -307,7 +308,7 @@
 		// Copy it over.
 		for _, typ := range []driver.ProfileType{driver.ProfileCPU, driver.ProfileMem} {
 			if driver.ProfilingEnabled(typ) {
-				p, r := driver.ReadProfile(cfg.profilePath(typ))
+				p, r := profile.Read(cfg.profilePath(typ))
 				if r != nil {
 					err = r
 					return
diff --git a/sweet/cmd/sweet/benchmark.go b/sweet/cmd/sweet/benchmark.go
index a39d2c2..8ad98a0 100644
--- a/sweet/cmd/sweet/benchmark.go
+++ b/sweet/cmd/sweet/benchmark.go
@@ -202,20 +202,27 @@
 		return err
 	}
 
-	// Retrieve the benchmark's source.
-	if err := b.harness.Get(srcDir); err != nil {
-		return fmt.Errorf("retrieving source for %s: %v", b.name, err)
+	// Retrieve the benchmark's source, if needed. If execute is called
+	// multiple times, this will already be done.
+	_, err := os.Stat(srcDir)
+	if os.IsNotExist(err) {
+		if err := b.harness.Get(srcDir); err != nil {
+			return fmt.Errorf("retrieving source for %s: %v", b.name, err)
+		}
 	}
 
 	// Create the results directory for the benchmark.
-	resultsDir := filepath.Join(r.resultsDir, b.name)
+	resultsDir := r.benchmarkResultsDir(b)
 	if err := mkdirAll(resultsDir); err != nil {
 		return fmt.Errorf("creating results directory for %s: %v", b.name, err)
 	}
 
 	// Perform a setup step for each config for the benchmark.
 	setups := make([]common.RunConfig, 0, len(cfgs))
-	for _, cfg := range cfgs {
+	for _, pcfg := range cfgs {
+		// Local copy for per-benchmark environment adjustments.
+		cfg := pcfg.Copy()
+
 		// Create directory hierarchy for benchmarks.
 		workDir := filepath.Join(topDir, cfg.Name)
 		binDir := filepath.Join(workDir, "bin")
@@ -236,6 +243,16 @@
 			}
 		}
 
+		// Add PGO if profile specified for this benchmark.
+		if pgo, ok := cfg.PGOFiles[b.name]; ok {
+			goflags, ok := cfg.BuildEnv.Lookup("GOFLAGS")
+			if ok {
+				goflags += " "
+			}
+			goflags += fmt.Sprintf("-pgo=%s", pgo)
+			cfg.BuildEnv.Env = cfg.BuildEnv.MustSet("GOFLAGS=" + goflags)
+		}
+
 		// Build the benchmark (application and any other necessary components).
 		bcfg := common.BuildConfig{
 			BinDir:   binDir,
@@ -264,7 +281,7 @@
 		}
 		if r.cpuProfile || r.memProfile || r.perf {
 			// Create a directory for any profile files to live in.
-			resultsProfilesDir := filepath.Join(resultsDir, fmt.Sprintf("%s.debug", cfg.Name))
+			resultsProfilesDir := r.runProfilesDir(b, cfg)
 			mkdirAll(resultsProfilesDir)
 
 			// We need to pass arguments to the benchmark binary to generate
diff --git a/sweet/cmd/sweet/integration_test.go b/sweet/cmd/sweet/integration_test.go
index c106f4c..544a654 100644
--- a/sweet/cmd/sweet/integration_test.go
+++ b/sweet/cmd/sweet/integration_test.go
@@ -20,6 +20,15 @@
 )
 
 func TestSweetEndToEnd(t *testing.T) {
+	t.Run("standard", func(t *testing.T) {
+		testSweetEndToEnd(t, false)
+	})
+	t.Run("pgo", func(t *testing.T) {
+		testSweetEndToEnd(t, true)
+	})
+}
+
+func testSweetEndToEnd(t *testing.T, pgo bool) {
 	if runtime.GOOS != "linux" || runtime.GOARCH != "amd64" {
 		t.Skip("Sweet is currently only fully supported on linux/amd64")
 	}
@@ -39,6 +48,17 @@
 		Env:  common.NewEnvFromEnviron(),
 	}
 
+	if pgo {
+		cmd := exec.Command(goTool.Tool, "help", "build")
+		out, err := cmd.Output()
+		if err != nil {
+			t.Fatalf("error running go help build: %v", err)
+		}
+		if !strings.Contains(string(out), "-pgo") {
+			t.Skip("toolchain missing -pgo support")
+		}
+	}
+
 	// Build sweet.
 	wd, err := os.Getwd()
 	if err != nil {
@@ -103,7 +123,8 @@
 
 	var outputMu sync.Mutex
 	runShard := func(shard, resultsDir, workDir string) {
-		runCmd := exec.Command(sweetBin, "run",
+		args := []string{
+			"run",
 			"-run", shard,
 			"-shell",
 			"-count", "1",
@@ -112,8 +133,12 @@
 			"-results", resultsDir,
 			"-work-dir", workDir,
 			"-short",
-			cfgPath,
-		)
+		}
+		if pgo {
+			args = append(args, "-pgo")
+		}
+		args = append(args, cfgPath)
+		runCmd := exec.Command(sweetBin, args...)
 		output, runErr := runCmd.CombinedOutput()
 
 		outputMu.Lock()
diff --git a/sweet/cmd/sweet/run.go b/sweet/cmd/sweet/run.go
index 04ce9ed..f683633 100644
--- a/sweet/cmd/sweet/run.go
+++ b/sweet/cmd/sweet/run.go
@@ -12,6 +12,7 @@
 	"io/fs"
 	"os"
 	"path/filepath"
+	"regexp"
 	"sort"
 	"strings"
 	"unicode/utf8"
@@ -19,8 +20,10 @@
 	"golang.org/x/benchmarks/sweet/cli/bootstrap"
 	"golang.org/x/benchmarks/sweet/common"
 	"golang.org/x/benchmarks/sweet/common/log"
+	sprofile "golang.org/x/benchmarks/sweet/common/profile"
 
 	"github.com/BurntSushi/toml"
+	"github.com/google/pprof/profile"
 )
 
 type csvFlag []string
@@ -41,6 +44,8 @@
 `
 )
 
+const pgoCountDefaultMax = 5
+
 type runCfg struct {
 	count       int
 	resultsDir  string
@@ -53,6 +58,8 @@
 	memProfile  bool
 	perf        bool
 	perfFlags   string
+	pgo         bool
+	pgoCount    int
 	short       bool
 
 	assetsFS fs.FS
@@ -67,6 +74,14 @@
 	}
 }
 
+func (r *runCfg) benchmarkResultsDir(b *benchmark) string {
+	return filepath.Join(r.resultsDir, b.name)
+}
+
+func (r *runCfg) runProfilesDir(b *benchmark, c *common.Config) string {
+	return filepath.Join(r.benchmarkResultsDir(b), fmt.Sprintf("%s.debug", c.Name))
+}
+
 type runCmd struct {
 	runCfg
 	quiet       bool
@@ -135,6 +150,8 @@
 	f.BoolVar(&c.runCfg.memProfile, "memprofile", false, "whether to dump a memory profile for each benchmark (ensures all executions do the same amount of work")
 	f.BoolVar(&c.runCfg.perf, "perf", false, "whether to run each benchmark under Linux perf and dump the results")
 	f.StringVar(&c.runCfg.perfFlags, "perf-flags", "", "the flags to pass to Linux perf if -perf is set")
+	f.BoolVar(&c.pgo, "pgo", false, "perform PGO testing; for each config, collect profiles from a baseline run which are used to feed into a generated PGO config")
+	f.IntVar(&c.runCfg.pgoCount, "pgo-count", 0, "the number of times to run profiling runs for -pgo; defaults to the value of -count if <=5, or 5 if higher")
 	f.IntVar(&c.runCfg.count, "count", 25, "the number of times to run each benchmark")
 
 	f.BoolVar(&c.quiet, "quiet", false, "whether to suppress activity output on stderr (no effect on -shell)")
@@ -153,6 +170,13 @@
 	log.SetCommandTrace(c.printCmd)
 	log.SetActivityLog(!c.quiet)
 
+	if c.runCfg.pgoCount == 0 {
+		c.runCfg.pgoCount = c.runCfg.count
+		if c.runCfg.pgoCount > pgoCountDefaultMax {
+			c.runCfg.pgoCount = pgoCountDefaultMax
+		}
+	}
+
 	var err error
 	if c.workDir == "" {
 		// Create a temporary work tree for running the benchmarks.
@@ -271,6 +295,14 @@
 			if config.ExecEnv.Env == nil {
 				config.ExecEnv.Env = common.NewEnvFromEnviron()
 			}
+			if config.PGOFiles == nil {
+				config.PGOFiles = make(map[string]string)
+			}
+			for k := range config.PGOFiles {
+				if _, ok := allBenchmarksMap[k]; !ok {
+					return fmt.Errorf("config %q in %q pgofiles references unknown benchmark %q", config.Name, configFile, k)
+				}
+			}
 			configs = append(configs, config)
 		}
 	}
@@ -308,6 +340,14 @@
 		}
 	}
 
+	// Collect profiles from baseline runs and create new PGO'd configs.
+	if c.pgo {
+		configs, err = c.preparePGO(configs, benchmarks)
+		if err != nil {
+			return fmt.Errorf("error preparing PGO profiles: %w", err)
+		}
+	}
+
 	// Execute each benchmark for all configs.
 	var errEncountered bool
 	for _, b := range benchmarks {
@@ -325,6 +365,87 @@
 	return nil
 }
 
+func (c *runCmd) preparePGO(configs []*common.Config, benchmarks []*benchmark) ([]*common.Config, error) {
+	profileConfigs := make([]*common.Config, 0, len(configs))
+	for _, c := range configs {
+		cc := c.Copy()
+		cc.Name += ".profile"
+		profileConfigs = append(profileConfigs, cc)
+	}
+
+	profileRunCfg := c.runCfg
+	profileRunCfg.cpuProfile = true
+	profileRunCfg.count = profileRunCfg.pgoCount
+
+	log.Printf("Running profile collection runs")
+
+	// Execute benchmarks to collect profiles.
+	var errEncountered bool
+	for _, b := range benchmarks {
+		if err := b.execute(profileConfigs, &profileRunCfg); err != nil {
+			if c.stopOnError {
+				return nil, err
+			}
+			errEncountered = true
+			log.Error(err)
+		}
+	}
+	if errEncountered {
+		return nil, fmt.Errorf("failed to execute profile collection benchmarks, see log for details")
+	}
+
+	// Merge all the profiles and add new PGO configs.
+	newConfigs := configs
+	for i := range configs {
+		origConfig := configs[i]
+		profileConfig := profileConfigs[i]
+		pgoConfig := origConfig.Copy()
+		pgoConfig.Name += ".pgo"
+		pgoConfig.PGOFiles = make(map[string]string)
+
+		for _, b := range benchmarks {
+			p, err := mergeCPUProfiles(profileRunCfg.runProfilesDir(b, profileConfig))
+			if err != nil {
+				return nil, fmt.Errorf("error merging profiles for %s/%s: %w", b.name, profileConfig.Name, err)
+			}
+			pgoConfig.PGOFiles[b.name] = p
+		}
+
+		newConfigs = append(newConfigs, pgoConfig)
+	}
+
+	return newConfigs, nil
+}
+
+var cpuProfileRe = regexp.MustCompile(`^.*\.cpu[0-9]+$`)
+
+func mergeCPUProfiles(dir string) (string, error) {
+	profiles, err := sprofile.ReadDir(dir, func(name string) bool {
+		return cpuProfileRe.FindString(name) != ""
+	})
+	if err != nil {
+		return "", fmt.Errorf("error reading dir %q: %w", dir, err)
+	}
+	if len(profiles) == 0 {
+		return "", fmt.Errorf("no profiles found in %q", dir)
+	}
+
+	p, err := profile.Merge(profiles)
+	if err != nil {
+		return "", fmt.Errorf("error merging profiles: %w", err)
+	}
+
+	out := filepath.Join(dir, "merged.cpu")
+	f, err := os.Create(out)
+	defer f.Close()
+
+	if err := p.Write(f); err != nil {
+		return "", fmt.Errorf("error writing merged profile: %w", err)
+	}
+
+	return out, nil
+}
+
 func canonicalizePath(path, base string) string {
 	if filepath.IsAbs(path) {
 		return path
diff --git a/sweet/common/config.go b/sweet/common/config.go
index 5fefdda..6b56820 100644
--- a/sweet/common/config.go
+++ b/sweet/common/config.go
@@ -43,10 +43,11 @@
 }
 
 type Config struct {
-	Name     string    `toml:"name"`
-	GoRoot   string    `toml:"goroot"`
-	BuildEnv ConfigEnv `toml:"envbuild"`
-	ExecEnv  ConfigEnv `toml:"envexec"`
+	Name     string            `toml:"name"`
+	GoRoot   string            `toml:"goroot"`
+	BuildEnv ConfigEnv         `toml:"envbuild"`
+	ExecEnv  ConfigEnv         `toml:"envexec"`
+	PGOFiles map[string]string `toml:"pgofiles"`
 }
 
 func (c *Config) GoTool() *Go {
@@ -58,6 +59,14 @@
 	}
 }
 
+// Copy returns a deep copy of Config.
+func (c *Config) Copy() *Config {
+	// Currently, all fields in Config are immutable, so a simply copy is
+	// sufficient.
+	cc := *c
+	return &cc
+}
+
 func ConfigFileMarshalTOML(c *ConfigFile) ([]byte, error) {
 	// Unfortunately because the github.com/BurntSushi/toml
 	// package at v1.0.0 doesn't correctly support Marshaler
@@ -67,10 +76,11 @@
 	// on Config and use dummy types that have a straightforward
 	// mapping that *does* work.
 	type config struct {
-		Name     string   `toml:"name"`
-		GoRoot   string   `toml:"goroot"`
-		BuildEnv []string `toml:"envbuild"`
-		ExecEnv  []string `toml:"envexec"`
+		Name     string            `toml:"name"`
+		GoRoot   string            `toml:"goroot"`
+		BuildEnv []string          `toml:"envbuild"`
+		ExecEnv  []string          `toml:"envexec"`
+		PGOFiles map[string]string `toml:"pgofiles"`
 	}
 	type configFile struct {
 		Configs []*config `toml:"config"`
@@ -82,6 +92,7 @@
 		cfg.GoRoot = c.GoRoot
 		cfg.BuildEnv = c.BuildEnv.Collapse()
 		cfg.ExecEnv = c.ExecEnv.Collapse()
+		cfg.PGOFiles = c.PGOFiles
 
 		cfgs.Configs = append(cfgs.Configs, &cfg)
 	}
diff --git a/sweet/common/profile/profile.go b/sweet/common/profile/profile.go
new file mode 100644
index 0000000..d448997
--- /dev/null
+++ b/sweet/common/profile/profile.go
@@ -0,0 +1,51 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package profile supports working with pprof profiles.
+package profile
+
+import (
+	"os"
+	"path/filepath"
+
+	"github.com/google/pprof/profile"
+)
+
+func Read(filename string) (*profile.Profile, error) {
+	f, err := os.Open(filename)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+	return profile.Parse(f)
+}
+
+// ReadDir reads all profiles in dir whose name matches match(name).
+func ReadDir(dir string, match func(string) bool) ([]*profile.Profile, error) {
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		return nil, err
+	}
+	var profiles []*profile.Profile
+	for _, entry := range entries {
+		name := entry.Name()
+		path := filepath.Join(dir, name)
+		if info, err := entry.Info(); err != nil {
+			return nil, err
+		} else if info.Size() == 0 {
+			// Skip zero-sized files, otherwise the pprof package
+			// will call it a parsing error.
+			continue
+		}
+		if match(name) {
+			p, err := Read(path)
+			if err != nil {
+				return nil, err
+			}
+			profiles = append(profiles, p)
+			continue
+		}
+	}
+	return profiles, nil
+}