blob: 149cb7de6317b269014ef4d801dde1ebd7cbbe68 [file] [log] [blame]
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package benchproc
import (
"fmt"
"hash/maphash"
"strings"
"sync"
"golang.org/x/perf/benchfmt"
"golang.org/x/perf/benchproc/internal/parse"
)
// TODO: If we support comparison operators in filter expressions,
// does it make sense to unify the orders understood by projections
// with the comparison orders supported in filters? One danger is that
// the default order for projections is observation order, but if you
// filter on key<val, you probably want that to be numeric by default
// (it's not clear you ever want a comparison on observation order).
// A ProjectionParser parses one or more related projection expressions.
type ProjectionParser struct {
configKeys map[string]bool // Specific .config keys (excluded from .config)
fullnameKeys []string // Specific sub-name keys (excluded from .fullname)
haveConfig bool // .config was projected
haveFullname bool // .fullname was projected
// Fields below here are constructed when the first Result is
// processed.
fullExtractor extractor
}
// Parse parses a single projection expression, such as ".name,/size".
// A projection expression describes how to extract fields of a
// benchfmt.Result into a Key and how to order the resulting Keys. See
// "go doc golang.org/x/perf/benchproc/syntax" for a description of
// projection syntax.
//
// A projection expression may also imply a filter, for example if
// there's a fixed order like "/size@(1MiB)". Parse will add any filters
// to "filter".
//
// If an application calls Parse multiple times on the same
// ProjectionParser, these form a mutually-exclusive group of
// projections in which specific keys in any projection are excluded
// from group keys in any other projection. The group keys are
// ".config" and ".fullname". For example, given two projections
// ".config" and "commit,date", the specific file configuration keys
// "commit" and "date" are excluded from the group key ".config".
// The result is the same regardless of the order these expressions
// are parsed in.
func (p *ProjectionParser) Parse(projection string, filter *Filter) (*Projection, error) {
if p.configKeys == nil {
p.configKeys = make(map[string]bool)
}
proj := newProjection()
// Parse the projection.
parts, err := parse.ParseProjection(projection)
if err != nil {
return nil, err
}
var filterParts []filterFn
for _, part := range parts {
f, err := p.makeProjection(proj, projection, part)
if err != nil {
return nil, err
}
if f != nil {
filterParts = append(filterParts, f)
}
}
// Now that we've ensured the projection is valid, add any
// filter parts to the filter.
if len(filterParts) > 0 {
if filter == nil {
panic(fmt.Sprintf("projection expression %s contains a filter, but Parse was passed a nil *Filter", projection))
}
filterParts = append(filterParts, filter.match)
filter.match = filterOp(parse.OpAnd, filterParts)
}
return proj, nil
}
// ParseWithUnit is like Parse, but the returned Projection has an
// additional field called ".unit" that extracts the unit of each
// individual benchfmt.Value in a benchfmt.Result. It returns the
// Projection and the ".unit" Field.
//
// Typically, callers need to break out individual benchmark values on
// some dimension of a set of Projections. Adding a .unit field makes
// this easy.
//
// Callers should use the ProjectValues method of the returned
// Projection rather than the Project method to project each value
// rather than the whole benchfmt.Result.
func (p *ProjectionParser) ParseWithUnit(projection string, filter *Filter) (*Projection, *Field, error) {
proj, err := p.Parse(projection, filter)
if err != nil {
return nil, nil, err
}
field := proj.addField(proj.root, ".unit")
field.order = make(map[string]int)
field.cmp = func(a, b string) int {
return field.order[a] - field.order[b]
}
proj.unitField = field
return proj, field, nil
}
// Residue returns a projection for any field not yet projected by any
// projection parsed by p. The resulting Projection does not have a
// meaningful order.
//
// For example, following calls to p.Parse("goos") and
// p.Parse(".fullname"), Reside would return a Projection with fields
// for all file configuration fields except goos.
//
// The intended use of this is to report when a user may have
// over-aggregated results. Specifically, track the residues of all of
// the benchfmt.Results that are aggregated together (e.g., into a
// single table cell). If there's more than one distinct residue, report
// that those results differed in some field. Typically this is used
// with NonSingularFields to report exactly which fields differ.
func (p *ProjectionParser) Residue() *Projection {
s := newProjection()
// The .config and .fullname groups together cover the
// projection space. If they haven't already been specified,
// then these groups (with any specific keys excluded) exactly
// form the remainder.
if !p.haveConfig {
p.makeProjection(s, "", parse.Field{Key: ".config", Order: "first"})
}
if !p.haveFullname {
p.makeProjection(s, "", parse.Field{Key: ".fullname", Order: "first"})
}
return s
}
func (p *ProjectionParser) makeProjection(s *Projection, q string, proj parse.Field) (filterFn, error) {
// Construct the order function.
var initField func(field *Field)
var filter filterFn
makeFilter := func(ext extractor) {}
if proj.Order == "fixed" {
fixedMap := make(map[string]int, len(proj.Fixed))
for i, s := range proj.Fixed {
fixedMap[s] = i
}
initField = func(field *Field) {
field.cmp = func(a, b string) int {
return fixedMap[a] - fixedMap[b]
}
}
makeFilter = func(ext extractor) {
filter = func(res *benchfmt.Result) (mask, bool) {
_, ok := fixedMap[string(ext(res))]
return nil, ok
}
}
} else if proj.Order == "first" {
initField = func(field *Field) {
field.order = make(map[string]int)
field.cmp = func(a, b string) int {
return field.order[a] - field.order[b]
}
}
} else if cmp, ok := builtinOrders[proj.Order]; ok {
initField = func(field *Field) {
field.cmp = cmp
}
} else {
return nil, &parse.SyntaxError{q, proj.OrderOff, fmt.Sprintf("unknown order %q", proj.Order)}
}
var project func(*benchfmt.Result, *[]string)
switch proj.Key {
case ".config":
// File configuration, excluding any more
// specific file keys.
if proj.Order == "fixed" {
// Fixed orders don't make sense for a whole tuple.
return nil, &parse.SyntaxError{q, proj.OrderOff, "fixed order not allowed for .config"}
}
p.haveConfig = true
group := s.addGroup(s.root, ".config")
seen := make(map[string]*Field)
project = func(r *benchfmt.Result, row *[]string) {
for _, cfg := range r.Config {
if !cfg.File {
continue
}
// Have we already seen this key? If so, use its already
// assigned field index.
field, ok := seen[cfg.Key]
if !ok {
// This closure doesn't get called until we've
// parsed all projections, so p.configKeys is fully
// populated from all parsed projections.
if p.configKeys[cfg.Key] {
// This key was explicitly specified in another
// projection, so omit it from .config.
continue
}
// Create a new field for this new key.
field = s.addField(group, cfg.Key)
initField(field)
seen[cfg.Key] = field
}
(*row)[field.idx] = s.intern(cfg.Value)
}
}
case ".fullname":
// Full benchmark name, including name config.
// We want to exclude any more specific keys,
// including keys from later projections, so
// we delay constructing the extractor until
// we process the first Result.
p.haveFullname = true
field := s.addField(s.root, ".fullname")
initField(field)
makeFilter(extractFull)
project = func(r *benchfmt.Result, row *[]string) {
if p.fullExtractor == nil {
p.fullExtractor = newExtractorFullName(p.fullnameKeys)
}
val := p.fullExtractor(r)
(*row)[field.idx] = s.intern(val)
}
case ".unit":
return nil, &parse.SyntaxError{q, proj.KeyOff, ".unit is only allowed in filters"}
default:
// This is a specific sub-name or file key. Add it
// to the excludes.
if proj.Key == ".name" || strings.HasPrefix(proj.Key, "/") {
p.fullnameKeys = append(p.fullnameKeys, proj.Key)
} else {
p.configKeys[proj.Key] = true
}
ext, err := newExtractor(proj.Key)
if err != nil {
return nil, &parse.SyntaxError{q, proj.KeyOff, err.Error()}
}
field := s.addField(s.root, proj.Key)
initField(field)
makeFilter(ext)
project = func(r *benchfmt.Result, row *[]string) {
val := ext(r)
(*row)[field.idx] = s.intern(val)
}
}
s.project = append(s.project, project)
return filter, nil
}
// A Projection extracts some subset of the fields of a benchfmt.Result
// into a Key.
//
// A Projection also implies a sort order over Keys that is
// lexicographic over the fields of the Projection. The sort order of
// each individual field is specified by the projection expression and
// defaults to the order in which values of that field were first
// observed.
type Projection struct {
root *Field
nFields int
// unitField, if non-nil, is the ".unit" field used to project
// the values of a benchmark result.
unitField *Field
// project is a set of functions that project a Result into
// row.
//
// These take a pointer to row because these functions may
// grow the set of fields, so the row slice may grow.
project []func(r *benchfmt.Result, row *[]string)
// row is the buffer used to construct a projection.
row []string
// flatCache is a cache of the flattened sort fields in tuple
// comparison order.
flatCache []*Field
flatCacheOnce *sync.Once
// interns is used to intern the []byte to string conversion. It's
// keyed by string because we can't key a map on []byte, but the
// compiler elides the string allocation in interns[string(x)], so
// lookups are still cheap. These strings are always referenced in
// keys, so this doesn't cause any over-retention.
interns map[string]string
// keys are the interned Keys of this Projection.
keys map[uint64][]*keyNode
}
func newProjection() *Projection {
var p Projection
p.root = &Field{idx: -1}
p.flatCacheOnce = new(sync.Once)
p.interns = make(map[string]string)
p.keys = make(map[uint64][]*keyNode)
return &p
}
func (p *Projection) addField(group *Field, name string) *Field {
if group.idx != -1 {
panic("field's parent is not a group")
}
// Assign this field an index.
field := &Field{Name: name, proj: p, idx: p.nFields}
p.nFields++
group.Sub = append(group.Sub, field)
// Clear the flat cache.
if p.flatCache != nil {
p.flatCache = nil
p.flatCacheOnce = new(sync.Once)
}
// Add to the row buffer.
p.row = append(p.row, "")
return field
}
func (p *Projection) addGroup(group *Field, name string) *Field {
field := &Field{Name: name, IsTuple: true, proj: p, idx: -1}
group.Sub = append(group.Sub, field)
return field
}
// Fields returns the fields of p. These correspond exactly to the
// fields in the Projection's projection expression.
//
// The caller must not modify the returned slice.
func (p *Projection) Fields() []*Field {
return p.root.Sub
}
// FlattenedFields is like Fields, but expands tuple Fields
// (specifically, ".config") into their sub-Fields. This is also the
// sequence of Fields used for sorting Keys returned from this
// Projection.
//
// The caller must not modify the returned slice.
func (p *Projection) FlattenedFields() []*Field {
// This can reasonably be called in parallel after all results have
// been projected, so we make sure it's thread-safe.
p.flatCacheOnce.Do(func() {
p.flatCache = []*Field{}
var walk func(f *Field)
walk = func(f *Field) {
if f.idx != -1 {
p.flatCache = append(p.flatCache, f)
return
}
for _, sub := range f.Sub {
walk(sub)
}
}
walk(p.root)
})
return p.flatCache
}
// A Field is a single field of a Projection.
//
// For example, in the projection ".name,/gomaxprocs", ".name" and
// "/gomaxprocs" are both Fields.
//
// A Field may be a group field with sub-Fields.
type Field struct {
Name string
// IsTuple indicates that this Field is a tuple that does not itself
// have a string value.
IsTuple bool
// Sub is the sequence of sub-Fields for a group field.
Sub []*Field
proj *Projection
// idx gives the index of this field's values in a keyNode.
//
// Indexes are assigned sequentially as new sub-Fields are added to
// group Fields. This allows the set of Fields to grow without
// invalidating existing Keys.
//
// idx is -1 for Fields that are not directly stored in a keyNode,
// such as the root Field and ".config".
idx int
// cmp is the comparison function for values of this field. It
// returns <0 if a < b, >0 if a > b, or 0 if a == b or a and b
// are unorderable.
cmp func(a, b string) int
// order, if non-nil, records the observation order of this
// field.
order map[string]int
}
// String returns the name of Field f.
func (f Field) String() string {
return f.Name
}
var keySeed = maphash.MakeSeed()
// Project extracts fields from benchmark Result r according to
// Projection s and returns them as a Key.
//
// Two Keys produced by Project will be == if and only if their
// projected fields have the same values. Notably, this means Keys can
// be used as Go map keys, which is useful for grouping benchmark
// results.
//
// Calling Project may add new sub-Fields to group Fields in this
// Projection. For example, if the Projection has a ".config" field and
// r has a never-before-seen file configuration key, this will add a new
// sub-Field to the ".config" Field.
//
// If this Projection includes a .units field, it will be left as "" in
// the resulting Key. The caller should use ProjectValues instead.
func (p *Projection) Project(r *benchfmt.Result) Key {
p.populateRow(r)
return p.internRow()
}
// ProjectValues is like Project, but for each benchmark value of
// r.Values individually. The returned slice corresponds to the
// r.Values slice.
//
// If this Projection includes a .unit field, it will differ between
// these Keys. If not, then all of the Keys will be identical
// because the benchmark values vary only on .unit.
func (p *Projection) ProjectValues(r *benchfmt.Result) []Key {
p.populateRow(r)
out := make([]Key, len(r.Values))
if p.unitField == nil {
// There's no .unit, so the Keys will all be the same.
key := p.internRow()
for i := range out {
out[i] = key
}
return out
}
// Vary the .unit field.
for i, val := range r.Values {
p.row[p.unitField.idx] = val.Unit
out[i] = p.internRow()
}
return out
}
func (p *Projection) populateRow(r *benchfmt.Result) {
// Clear the row buffer.
for i := range p.row {
p.row[i] = ""
}
// Run the projection functions to fill in row.
for _, proj := range p.project {
// proj may add fields and grow row.
proj(r, &p.row)
}
}
func (p *Projection) internRow() Key {
// Hash the row. This must be invariant to unused trailing fields: the
// field set can grow, and if those new fields are later cleared,
// we want Keys from before the growth to equal Keys from after the growth.
row := p.row
for len(row) > 0 && row[len(row)-1] == "" {
row = row[:len(row)-1]
}
var h maphash.Hash
h.SetSeed(keySeed)
for _, val := range row {
h.WriteString(val)
}
hash := h.Sum64()
// Check if we already have this key.
keys := p.keys[hash]
for _, key := range keys {
if key.equalRow(row) {
return Key{key}
}
}
// Update observation orders.
for _, field := range p.Fields() {
if field.order == nil {
// Not tracking observation order for this field.
continue
}
var val string
if field.idx < len(row) {
val = row[field.idx]
}
if _, ok := field.order[val]; !ok {
field.order[val] = len(field.order)
}
}
// Save the key.
key := &keyNode{p, append([]string(nil), row...)}
p.keys[hash] = append(p.keys[hash], key)
return Key{key}
}
func (p *Projection) intern(b []byte) string {
if str, ok := p.interns[string(b)]; ok {
return str
}
str := string(b)
p.interns[str] = str
return str
}