benchfmt: add support for unit metadata
This adds support for tracking unit metadata in Result, reading it in
the Reader, and writing it back out in the Writer.
Updates golang/go#43744.
Change-Id: Id3311340b6e6992c149f9bca44c2e1e442b53eb4
Reviewed-on: https://go-review.googlesource.com/c/perf/+/357549
Trust: Austin Clements <austin@google.com>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
diff --git a/benchfmt/files.go b/benchfmt/files.go
index d23630a..973788d 100644
--- a/benchfmt/files.go
+++ b/benchfmt/files.go
@@ -177,3 +177,9 @@
func (f *Files) Err() error {
return f.err
}
+
+// Units returns the accumulated unit metadata.
+// See Reader.Units.
+func (f *Files) Units() map[UnitMetadataKey]*UnitMetadata {
+ return f.reader.Units()
+}
diff --git a/benchfmt/reader.go b/benchfmt/reader.go
index 91464fb..32b0ec6 100644
--- a/benchfmt/reader.go
+++ b/benchfmt/reader.go
@@ -29,8 +29,15 @@
s *bufio.Scanner
err error // current I/O error
- result Result
- resultErr *SyntaxError
+ // q is the queue of records to return before processing the next
+ // input line. qPos is the index of the current record in q. We
+ // track the index explicitly rather than slicing q so that we can
+ // reuse the q slice when we reach the end.
+ q []Record
+ qPos int
+
+ result Result
+ units UnitMetadataMap
interns map[string]string
}
@@ -68,6 +75,7 @@
// Reset resets the reader to begin reading from a new input.
// It also resets all accumulated configuration values.
+// It does NOT reset unit metadata because it carries across files.
//
// initConfig is an alternating sequence of keys and values.
// Reset will install these as the initial internal configuration
@@ -78,10 +86,17 @@
fileName = "<unknown>"
}
r.err = nil
- r.resultErr = noResult
if r.interns == nil {
r.interns = make(map[string]string)
}
+ if r.units == nil {
+ r.units = make(map[UnitMetadataKey]*UnitMetadata)
+ }
+
+ // Wipe the queue in case the user hasn't consumed everything from
+ // this file.
+ r.qPos = 0
+ r.q = r.q[:0]
// Wipe the Result.
r.result.Config = r.result.Config[:0]
@@ -103,7 +118,10 @@
}
}
-var benchmarkPrefix = []byte("Benchmark")
+var (
+ benchmarkPrefix = []byte("Benchmark")
+ unitPrefix = []byte("Unit")
+)
// Scan advances the reader to the next result and reports whether a
// result was read.
@@ -115,7 +133,19 @@
return false
}
- for r.s.Scan() {
+ // If there's anything in the queue from an earlier line, just pop
+ // the queue and return without consuming any more input.
+ if r.qPos+1 < len(r.q) {
+ r.qPos++
+ return true
+ }
+ // Otherwise, we've drained the queue and need to parse more input
+ // to refill it. Reset it to 0 so we can reuse the space.
+ r.qPos = 0
+ r.q = r.q[:0]
+
+ // Process lines until we add something to the queue or hit EOF.
+ for len(r.q) == 0 && r.s.Scan() {
r.result.line++
// We do everything in byte buffers to avoid allocation.
line := r.s.Bytes()
@@ -125,8 +155,20 @@
// At this point we commit to this being a
// benchmark line. If it's malformed, we treat
// that as an error.
- r.resultErr = r.parseBenchmarkLine(line)
- return true
+ if err := r.parseBenchmarkLine(line); err != nil {
+ r.q = append(r.q, err)
+ } else {
+ r.q = append(r.q, &r.result)
+ }
+ continue
+ }
+ if len(line) > 0 && line[0] == 'U' {
+ if nLine, ok := r.isUnitLine(line); ok {
+ // Parse unit metadata line. This queues up its own
+ // records and errors.
+ r.parseUnitLine(nLine)
+ continue
+ }
}
if key, val, ok := parseKeyValueLine(line); ok {
// Intern key, since there tend to be few
@@ -143,6 +185,12 @@
// Ignore the line.
}
+ if len(r.q) > 0 {
+ // We queued something up to return.
+ return true
+ }
+
+ // We hit EOF. Check for IO errors.
if err := r.s.Err(); err != nil {
r.err = fmt.Errorf("%s:%d: %w", r.result.fileName, r.result.line, err)
return false
@@ -257,6 +305,68 @@
return nil
}
+// isUnitLine tests whether line is a unit metadata line. If it is, it
+// returns the line after the "Unit" literal and true.
+func (r *Reader) isUnitLine(line []byte) (rest []byte, ok bool) {
+ var f []byte
+ // Is this a unit metadata line?
+ f, line = splitField(line)
+ if bytes.Equal(f, unitPrefix) {
+ return line, true
+ }
+ return nil, false
+}
+
+// parseUnitLine parses line as a unit metadata line, starting
+// after "Unit". It updates r.q.
+// If there are syntax errors on the line, it will attempt to parse
+// what it can and return a non-nil error.
+func (r *Reader) parseUnitLine(line []byte) {
+ var f []byte
+ // isUnitLine already consumed the literal "Unit".
+ // Consume the next field, which is the unit.
+ f, line = splitField(line)
+ if len(f) == 0 {
+ r.q = append(r.q, r.newSyntaxError("missing unit"))
+ return
+ }
+ unit := r.intern(f)
+
+ // The metadata map is indexed by tidied units because we want to
+ // support lookups by tidy units and there's no way to "untidy" a
+ // unit.
+ _, tidyUnit := benchunit.Tidy(1, unit)
+
+ // Consume key=value pairs.
+ for {
+ f, line = splitField(line)
+ if len(f) == 0 {
+ break
+ }
+ eq := bytes.IndexByte(f, '=')
+ if eq <= 0 {
+ r.q = append(r.q, r.newSyntaxError("expected key=value"))
+ continue
+ }
+ key := UnitMetadataKey{tidyUnit, r.intern(f[:eq])}
+ value := r.intern(f[eq+1:])
+
+ if have, ok := r.units[key]; ok {
+ if have.Value == value {
+ // We already have this unit metadata. Ignore.
+ continue
+ }
+ // Report incompatible unit metadata.
+ r.q = append(r.q, r.newSyntaxError(fmt.Sprintf("metadata %s of unit %s already set to %s", key.Key, unit, have.Value)))
+ continue
+ }
+
+ metadata := &UnitMetadata{key, unit, value, r.result.fileName, r.result.line}
+ r.units[key] = metadata
+ r.q = append(r.q, metadata)
+ }
+}
+
func (r *Reader) intern(x []byte) string {
const maxIntern = 1024
if s, ok := r.interns[string(x)]; ok {
@@ -289,9 +399,10 @@
var _ Record = (*Result)(nil)
var _ Record = (*SyntaxError)(nil)
+var _ Record = (*UnitMetadata)(nil)
// Result returns the record that was just read by Scan. This is either
-// a *Result or a *SyntaxError indicating a parse error.
+// a *Result, a *UnitMetadata, or a *SyntaxError indicating a parse error.
// It may return more types in the future.
//
// Parse errors are non-fatal, so the caller can continue to call
@@ -300,10 +411,11 @@
// If this returns a *Result, the caller should not retain the Result,
// as it will be overwritten by the next call to Scan.
func (r *Reader) Result() Record {
- if r.resultErr != nil {
- return r.resultErr
+ if r.qPos >= len(r.q) {
+ // This should only happen if Scan has never been called.
+ return noResult
}
- return &r.result
+ return r.q[r.qPos]
}
// Err returns the first non-EOF I/O error that was encountered by the
@@ -312,6 +424,15 @@
return r.err
}
+// Units returns the accumulated unit metadata.
+//
+// Callers that want to consume the entire stream of benchmark results
+// and then process units can use this instead of monitoring
+// *UnitMetadata Records.
+func (r *Reader) Units() UnitMetadataMap {
+ return r.units
+}
+
// Parsing helpers.
//
// These are designed to leverage common fast paths. The ASCII fast
diff --git a/benchfmt/reader_test.go b/benchfmt/reader_test.go
index 8ea6243..9636dff 100644
--- a/benchfmt/reader_test.go
+++ b/benchfmt/reader_test.go
@@ -17,7 +17,7 @@
"time"
)
-func parseAll(t *testing.T, data string, setup ...func(r *Reader, sr io.Reader)) []Record {
+func parseAll(t *testing.T, data string, setup ...func(r *Reader, sr io.Reader)) ([]Record, *Reader) {
sr := strings.NewReader(data)
r := NewReader(sr, "test")
for _, f := range setup {
@@ -32,7 +32,7 @@
res.fileName = ""
res.line = 0
out = append(out, res)
- case *SyntaxError:
+ case *SyntaxError, *UnitMetadata:
out = append(out, rec)
default:
t.Fatalf("unexpected result type %T", rec)
@@ -41,7 +41,7 @@
if err := r.Err(); err != nil {
t.Fatal("parsing failed: ", err)
}
- return out
+ return out, r
}
func printRecord(w io.Writer, r Record) {
@@ -55,6 +55,8 @@
fmt.Fprintf(w, " %v %s", val.Value, val.Unit)
}
fmt.Fprintf(w, "\n")
+ case *UnitMetadata:
+ fmt.Fprintf(w, "Unit: %+v\n", r)
case *SyntaxError:
fmt.Fprintf(w, "SyntaxError: %s\n", r)
default:
@@ -186,6 +188,10 @@
BenchmarkMissingUnit 100 1
BenchmarkMissingUnit2 100 1 ns/op 2
also not a benchmark
+Unit
+Unit ns/op blah
+Unit ns/op a=1
+Unit ns/op a=2
`,
[]Record{
&SyntaxError{"test", 2, "missing iteration count"},
@@ -195,6 +201,10 @@
&SyntaxError{"test", 6, "parsing measurement: invalid syntax"},
&SyntaxError{"test", 7, "missing units"},
&SyntaxError{"test", 8, "missing units"},
+ &SyntaxError{"test", 10, "missing unit"},
+ &SyntaxError{"test", 11, "expected key=value"},
+ &UnitMetadata{UnitMetadataKey{"sec/op", "a"}, "ns/op", "1", "test", 12},
+ &SyntaxError{"test", 13, "metadata a of unit ns/op already set to 1"},
},
},
{
@@ -221,16 +231,36 @@
v(1, "ns/op").res,
},
},
+ {
+ "unit metadata",
+ `Unit ns/op a=1 b=2
+Unit ns/op c=3 error d=4
+# Repeated unit should report nothing
+Unit ns/op d=4
+# Starts like a unit line but actually isn't
+Unitx
+BenchmarkOne 100 1 ns/op
+`,
+ []Record{
+ &UnitMetadata{UnitMetadataKey{"sec/op", "a"}, "ns/op", "1", "test", 1},
+ &UnitMetadata{UnitMetadataKey{"sec/op", "b"}, "ns/op", "2", "test", 1},
+ &UnitMetadata{UnitMetadataKey{"sec/op", "c"}, "ns/op", "3", "test", 2},
+ &SyntaxError{"test", 2, "expected key=value"},
+ &UnitMetadata{UnitMetadataKey{"sec/op", "d"}, "ns/op", "4", "test", 2},
+ r("One", 100).
+ v(1, "ns/op").res,
+ },
+ },
} {
t.Run(test.name, func(t *testing.T) {
- got := parseAll(t, test.input)
+ got, _ := parseAll(t, test.input)
compareRecords(t, got, test.want)
})
}
}
func TestReaderInternalConfig(t *testing.T) {
- got := parseAll(t, `
+ got, _ := parseAll(t, `
# Test initial internal config
Benchmark1 100 1 ns/op
# Overwrite internal config with file config
diff --git a/benchfmt/units.go b/benchfmt/units.go
new file mode 100644
index 0000000..1bc1f1e
--- /dev/null
+++ b/benchfmt/units.go
@@ -0,0 +1,99 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package benchfmt
+
+import (
+ "golang.org/x/perf/benchmath"
+ "golang.org/x/perf/benchunit"
+)
+
+// Unit metadata is a single piece of unit metadata.
+//
+// Unit metadata gives information that's useful to interpreting
+// values in a given unit. The following metadata keys are predefined:
+//
+// better={higher,lower} indicates whether higher or lower values of
+// this unit are better (indicate an improvement).
+//
+// assume={nothing,exact} indicates what statistical assumption to
+// make when considering distributions of values.
+// `nothing` means to make no statistical assumptions (e.g., use
+// non-parametric methods) and `exact` means to assume measurements are
+// exact (repeated measurement does not increase confidence).
+// The default is `nothing`.
+type UnitMetadata struct {
+ UnitMetadataKey
+
+ // OrigUnit is the original, untidied unit as written in the input.
+ OrigUnit string
+
+ Value string
+
+ fileName string
+ line int
+}
+
+func (u *UnitMetadata) Pos() (fileName string, line int) {
+ return u.fileName, u.line
+}
+
+// UnitMetadataKey identifies a single piece of unit metadata by unit
+// and metadata name.
+type UnitMetadataKey struct {
+ Unit string
+ Key string // Metadata key (e.g., "better" or "assume")
+}
+
+// UnitMetadataMap stores the accumulated metadata for several units.
+// This is indexed by tidied units, while the values store the original
+// units from the benchmark file.
+type UnitMetadataMap map[UnitMetadataKey]*UnitMetadata
+
+// Get returns the unit metadata for the specified unit and metadata key.
+// It tidies unit if necessary.
+func (m UnitMetadataMap) Get(unit, key string) *UnitMetadata {
+ _, tidyUnit := benchunit.Tidy(1, unit)
+ return m[UnitMetadataKey{tidyUnit, key}]
+}
+
+// GetAssumption returns the appropriate statistical Assumption to make
+// about distributions of values in the given unit.
+func (m UnitMetadataMap) GetAssumption(unit string) benchmath.Assumption {
+ dist := m.Get(unit, "assume")
+ if dist != nil && dist.Value == "exact" {
+ return benchmath.AssumeExact
+ }
+ // The default is to assume nothing.
+ return benchmath.AssumeNothing
+}
+
+// GetBetter returns whether higher or lower values of the given unit
+// indicate an improvement. It returns +1 if higher values are better,
+// -1 if lower values are better, or 0 if unknown.
+func (m UnitMetadataMap) GetBetter(unit string) int {
+ better := m.Get(unit, "better")
+ if better != nil {
+ switch better.Value {
+ case "higher":
+ return 1
+ case "lower":
+ return -1
+ }
+ return 0
+ }
+ // Fall back to some built-in defaults.
+ switch unit {
+ case "ns/op", "sec/op":
+ // This measures "duration", so lower is better.
+ return -1
+ case "MB/s", "B/s":
+ // This measures "speed", so higher is better.
+ return 1
+ case "B/op", "allocs/op":
+ // These measure amount of allocation, so lower is better.
+ return -1
+ }
+ return 0
+}
diff --git a/benchfmt/units_test.go b/benchfmt/units_test.go
new file mode 100644
index 0000000..fa56963
--- /dev/null
+++ b/benchfmt/units_test.go
@@ -0,0 +1,99 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package benchfmt
+
+import (
+ "testing"
+
+ "golang.org/x/perf/benchmath"
+)
+
+func TestReaderUnits(t *testing.T) {
+ _, reader := parseAll(t, `
+Unit ns/op a=1 b=2
+Unit MB/s c=3 c=3
+`)
+ units := reader.Units()
+ if len(units) != 3 {
+ t.Fatalf("expected 3 unit metadata, got %d:\n%v", len(units), units)
+ }
+ check := func(unit, tidyUnit, key, value string) {
+ t.Helper()
+ // The map is indexed by tidy units.
+ mkey := UnitMetadataKey{tidyUnit, key}
+ m, ok := units[mkey]
+ if !ok {
+ t.Errorf("for %s:%s, want %s, got none", tidyUnit, key, value)
+ return
+ }
+ want := UnitMetadata{UnitMetadataKey: mkey, OrigUnit: unit, Value: value}
+ if m.UnitMetadataKey != want.UnitMetadataKey ||
+ m.OrigUnit != want.OrigUnit ||
+ m.Value != want.Value {
+ t.Errorf("for %s:%s, want %+v, got %+v", unit, key, want, m)
+ }
+ // Check that Get works for both tidied and untidied units.
+ if got := units.Get(unit, key); got != m {
+ t.Errorf("for Get(%v, %v), want %+v, got %+v", unit, key, m, got)
+ }
+ if got := units.Get(tidyUnit, key); got != m {
+ t.Errorf("for Get(%v, %v), want %+v, got %+v", tidyUnit, key, m, got)
+ }
+ }
+ check("ns/op", "sec/op", "a", "1")
+ check("ns/op", "sec/op", "b", "2")
+ check("MB/s", "B/s", "c", "3")
+}
+
+func TestUnitsGetAssumption(t *testing.T) {
+ _, reader := parseAll(t, `
+Unit a assume=nothing
+Unit ns/frob assume=exact
+Unit c assume=blah`)
+ units := reader.Units()
+
+ check := func(unit string, want benchmath.Assumption) {
+ got := units.GetAssumption(unit)
+ if got != want {
+ t.Errorf("for unit %s, want assumption %s, got %s", unit, want, got)
+ }
+ }
+ check("a", benchmath.AssumeNothing)
+ check("ns/frob", benchmath.AssumeExact)
+ check("sec/frob", benchmath.AssumeExact)
+ check("c", benchmath.AssumeNothing)
+}
+
+func TestUnitsGetBetter(t *testing.T) {
+ _, reader := parseAll(t, `
+Unit a better=higher
+Unit b better=lower`)
+ units := reader.Units()
+
+ check := func(unit string, want int) {
+ got := units.GetBetter(unit)
+ if got != want {
+ t.Errorf("for unit %s, want better=%+d, got %+d", unit, want, got)
+ }
+ }
+ check("a", 1)
+ check("b", -1)
+ check("c", 0)
+
+ check("ns/op", -1)
+ check("sec/op", -1)
+
+ check("MB/s", 1)
+ check("B/s", 1)
+
+ check("B/op", -1)
+ check("allocs/op", -1)
+
+ // Check that we can override the built-ins
+ _, reader = parseAll(t, "Unit ns/op better=higher")
+ units = reader.Units()
+ check("ns/op", 1)
+ check("sec/op", 1)
+}
diff --git a/benchfmt/writer.go b/benchfmt/writer.go
index 24b38b7..2c5ccc4 100644
--- a/benchfmt/writer.go
+++ b/benchfmt/writer.go
@@ -33,15 +33,24 @@
func (w *Writer) Write(rec Record) error {
switch rec := rec.(type) {
case *Result:
- return w.writeResult(rec)
+ w.writeResult(rec)
+ case *UnitMetadata:
+ w.writeUnitMetadata(rec)
case *SyntaxError:
// Ignore
return nil
+ default:
+ return fmt.Errorf("unknown Record type %T", rec)
}
- return fmt.Errorf("unknown Record type %T", rec)
+
+ // Flush the buffer out to the io.Writer. Write to the buffer
+ // can't fail, so we only have to check if this fails.
+ _, err := w.w.Write(w.buf.Bytes())
+ w.buf.Reset()
+ return err
}
-func (w *Writer) writeResult(res *Result) error {
+func (w *Writer) writeResult(res *Result) {
// If any file config changed, write out the changes.
if len(w.fileConfig) != len(res.Config) {
w.writeFileConfig(res)
@@ -66,12 +75,6 @@
w.buf.WriteByte('\n')
w.first = false
-
- // Flush the buffer out to the io.Writer. Write to the buffer
- // can't fail, so we only have to check if this fails.
- _, err := w.w.Write(w.buf.Bytes())
- w.buf.Reset()
- return err
}
func (w *Writer) writeFileConfig(res *Result) {
@@ -127,3 +130,7 @@
w.buf.WriteByte('\n')
}
+
+func (w *Writer) writeUnitMetadata(m *UnitMetadata) {
+ fmt.Fprintf(&w.buf, "Unit %s %s=%s\n", m.OrigUnit, m.Key, m.Value)
+}
diff --git a/benchfmt/writer_test.go b/benchfmt/writer_test.go
index 1798e5c..de0ffc0 100644
--- a/benchfmt/writer_test.go
+++ b/benchfmt/writer_test.go
@@ -12,6 +12,9 @@
func TestWriter(t *testing.T) {
const input = `BenchmarkOne 1 1 ns/op
+Unit ns/op a=1
+Unit ns/op b=2
+Unit MB/s c=3
key: val
key1: val1