blob: 32b0ec6daf90f91b090856b47503597b8c35e28f [file] [log] [blame]
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package benchfmt
import (
"bufio"
"bytes"
"fmt"
"io"
"math"
"unicode"
"unicode/utf8"
"golang.org/x/perf/benchfmt/internal/bytesconv"
"golang.org/x/perf/benchunit"
)
// A Reader reads the Go benchmark format.
//
// Its API is modeled on bufio.Scanner. To minimize allocation, a
// Reader retains ownership of everything it creates; a caller should
// copy anything it needs to retain.
//
// To construct a new Reader, either call NewReader, or call Reset on
// a zeroed Reader.
type Reader struct {
s *bufio.Scanner
err error // current I/O error
// q is the queue of records to return before processing the next
// input line. qPos is the index of the current record in q. We
// track the index explicitly rather than slicing q so that we can
// reuse the q slice when we reach the end.
q []Record
qPos int
result Result
units UnitMetadataMap
interns map[string]string
}
// A SyntaxError represents a syntax error on a particular line of a
// benchmark results file.
type SyntaxError struct {
FileName string
Line int
Msg string
}
func (e *SyntaxError) Pos() (fileName string, line int) {
return e.FileName, e.Line
}
func (s *SyntaxError) Error() string {
return fmt.Sprintf("%s:%d: %s", s.FileName, s.Line, s.Msg)
}
var noResult = &SyntaxError{"", 0, "Reader.Scan has not been called"}
// NewReader constructs a reader to parse the Go benchmark format from r.
// fileName is used in error messages; it is purely diagnostic.
func NewReader(r io.Reader, fileName string) *Reader {
reader := new(Reader)
reader.Reset(r, fileName)
return reader
}
// newSyntaxError returns a *SyntaxError at the Reader's current position.
func (r *Reader) newSyntaxError(msg string) *SyntaxError {
return &SyntaxError{r.result.fileName, r.result.line, msg}
}
// Reset resets the reader to begin reading from a new input.
// It also resets all accumulated configuration values.
// It does NOT reset unit metadata because it carries across files.
//
// initConfig is an alternating sequence of keys and values.
// Reset will install these as the initial internal configuration
// before any results are read from the input file.
func (r *Reader) Reset(ior io.Reader, fileName string, initConfig ...string) {
r.s = bufio.NewScanner(ior)
if fileName == "" {
fileName = "<unknown>"
}
r.err = nil
if r.interns == nil {
r.interns = make(map[string]string)
}
if r.units == nil {
r.units = make(map[UnitMetadataKey]*UnitMetadata)
}
// Wipe the queue in case the user hasn't consumed everything from
// this file.
r.qPos = 0
r.q = r.q[:0]
// Wipe the Result.
r.result.Config = r.result.Config[:0]
r.result.Name = r.result.Name[:0]
r.result.Iters = 0
r.result.Values = r.result.Values[:0]
for k := range r.result.configPos {
delete(r.result.configPos, k)
}
r.result.fileName = fileName
r.result.line = 0
// Set up initial configuration.
if len(initConfig)%2 != 0 {
panic("len(initConfig) must be a multiple of 2")
}
for i := 0; i < len(initConfig); i += 2 {
r.result.SetConfig(initConfig[i], initConfig[i+1])
}
}
var (
benchmarkPrefix = []byte("Benchmark")
unitPrefix = []byte("Unit")
)
// Scan advances the reader to the next result and reports whether a
// result was read.
// The caller should use the Result method to get the result.
// If Scan reaches EOF or an I/O error occurs, it returns false,
// in which case the caller should use the Err method to check for errors.
func (r *Reader) Scan() bool {
if r.err != nil {
return false
}
// If there's anything in the queue from an earlier line, just pop
// the queue and return without consuming any more input.
if r.qPos+1 < len(r.q) {
r.qPos++
return true
}
// Otherwise, we've drained the queue and need to parse more input
// to refill it. Reset it to 0 so we can reuse the space.
r.qPos = 0
r.q = r.q[:0]
// Process lines until we add something to the queue or hit EOF.
for len(r.q) == 0 && r.s.Scan() {
r.result.line++
// We do everything in byte buffers to avoid allocation.
line := r.s.Bytes()
// Most lines are benchmark lines, and we can check
// for that very quickly, so start with that.
if bytes.HasPrefix(line, benchmarkPrefix) {
// At this point we commit to this being a
// benchmark line. If it's malformed, we treat
// that as an error.
if err := r.parseBenchmarkLine(line); err != nil {
r.q = append(r.q, err)
} else {
r.q = append(r.q, &r.result)
}
continue
}
if len(line) > 0 && line[0] == 'U' {
if nLine, ok := r.isUnitLine(line); ok {
// Parse unit metadata line. This queues up its own
// records and errors.
r.parseUnitLine(nLine)
continue
}
}
if key, val, ok := parseKeyValueLine(line); ok {
// Intern key, since there tend to be few
// unique keys.
keyStr := r.intern(key)
if len(val) == 0 {
r.result.deleteConfig(keyStr)
} else {
cfg := r.result.ensureConfig(keyStr, true)
cfg.Value = append(cfg.Value[:0], val...)
}
continue
}
// Ignore the line.
}
if len(r.q) > 0 {
// We queued something up to return.
return true
}
// We hit EOF. Check for IO errors.
if err := r.s.Err(); err != nil {
r.err = fmt.Errorf("%s:%d: %w", r.result.fileName, r.result.line, err)
return false
}
r.err = nil
return false
}
// parseKeyValueLine attempts to parse line as a key: val pair,
// with ok reporting whether the line could be parsed.
func parseKeyValueLine(line []byte) (key, val []byte, ok bool) {
for i := 0; i < len(line); {
r, n := utf8.DecodeRune(line[i:])
// key begins with a lower case character ...
if i == 0 && !unicode.IsLower(r) {
return
}
// and contains no space characters nor upper case
// characters.
if unicode.IsSpace(r) || unicode.IsUpper(r) {
return
}
if i > 0 && r == ':' {
key, val = line[:i], line[i+1:]
break
}
i += n
}
if len(key) == 0 {
return
}
// Value can be omitted entirely, in which case the colon must
// still be present, but need not be followed by a space.
if len(val) == 0 {
ok = true
return
}
// One or more ASCII space or tab characters separate "key:"
// from "value."
for len(val) > 0 && (val[0] == ' ' || val[0] == '\t') {
val = val[1:]
ok = true
}
return
}
// parseBenchmarkLine parses line as a benchmark result and updates r.result.
// The caller must have already checked that line begins with "Benchmark".
func (r *Reader) parseBenchmarkLine(line []byte) *SyntaxError {
var f []byte
var err error
// Skip "Benchmark"
line = line[len("Benchmark"):]
// Read the name.
r.result.Name, line = splitField(line)
// Read the iteration count.
f, line = splitField(line)
if len(f) == 0 {
return r.newSyntaxError("missing iteration count")
}
r.result.Iters, err = bytesconv.Atoi(f)
switch err := err.(type) {
case nil:
// ok
case *bytesconv.NumError:
return r.newSyntaxError("parsing iteration count: " + err.Err.Error())
default:
return r.newSyntaxError(err.Error())
}
// Read value/unit pairs.
r.result.Values = r.result.Values[:0]
for {
f, line = splitField(line)
if len(f) == 0 {
if len(r.result.Values) > 0 {
break
}
return r.newSyntaxError("missing measurements")
}
val, err := atof(f)
switch err := err.(type) {
case nil:
// ok
case *bytesconv.NumError:
return r.newSyntaxError("parsing measurement: " + err.Err.Error())
default:
return r.newSyntaxError(err.Error())
}
f, line = splitField(line)
if len(f) == 0 {
return r.newSyntaxError("missing units")
}
unit := r.intern(f)
// Tidy the value.
tidyVal, tidyUnit := benchunit.Tidy(val, unit)
var v Value
if tidyVal == val {
v = Value{Value: val, Unit: unit}
} else {
v = Value{Value: tidyVal, Unit: tidyUnit, OrigValue: val, OrigUnit: unit}
}
r.result.Values = append(r.result.Values, v)
}
return nil
}
// isUnitLine tests whether line is a unit metadata line. If it is, it
// returns the line after the "Unit" literal and true.
func (r *Reader) isUnitLine(line []byte) (rest []byte, ok bool) {
var f []byte
// Is this a unit metadata line?
f, line = splitField(line)
if bytes.Equal(f, unitPrefix) {
return line, true
}
return nil, false
}
// parseUnitLine parses line as a unit metadata line, starting
// after "Unit". It updates r.q.
// If there are syntax errors on the line, it will attempt to parse
// what it can and return a non-nil error.
func (r *Reader) parseUnitLine(line []byte) {
var f []byte
// isUnitLine already consumed the literal "Unit".
// Consume the next field, which is the unit.
f, line = splitField(line)
if len(f) == 0 {
r.q = append(r.q, r.newSyntaxError("missing unit"))
return
}
unit := r.intern(f)
// The metadata map is indexed by tidied units because we want to
// support lookups by tidy units and there's no way to "untidy" a
// unit.
_, tidyUnit := benchunit.Tidy(1, unit)
// Consume key=value pairs.
for {
f, line = splitField(line)
if len(f) == 0 {
break
}
eq := bytes.IndexByte(f, '=')
if eq <= 0 {
r.q = append(r.q, r.newSyntaxError("expected key=value"))
continue
}
key := UnitMetadataKey{tidyUnit, r.intern(f[:eq])}
value := r.intern(f[eq+1:])
if have, ok := r.units[key]; ok {
if have.Value == value {
// We already have this unit metadata. Ignore.
continue
}
// Report incompatible unit metadata.
r.q = append(r.q, r.newSyntaxError(fmt.Sprintf("metadata %s of unit %s already set to %s", key.Key, unit, have.Value)))
continue
}
metadata := &UnitMetadata{key, unit, value, r.result.fileName, r.result.line}
r.units[key] = metadata
r.q = append(r.q, metadata)
}
}
func (r *Reader) intern(x []byte) string {
const maxIntern = 1024
if s, ok := r.interns[string(x)]; ok {
return s
}
if len(r.interns) >= maxIntern {
// Evict a random item from the interns table.
// Map iteration order is unspecified, but both
// the gc and libgo runtimes both provide random
// iteration order. The choice of item to evict doesn't
// affect correctness, so we do the simple thing.
for k := range r.interns {
delete(r.interns, k)
break
}
}
s := string(x)
r.interns[s] = s
return s
}
// A Record is a single record read from a benchmark file. It may be a
// *Result or a *SyntaxError.
type Record interface {
// Pos returns the position of this record as a file name and a
// 1-based line number within that file. If this record was not read
// from a file, it returns "", 0.
Pos() (fileName string, line int)
}
var _ Record = (*Result)(nil)
var _ Record = (*SyntaxError)(nil)
var _ Record = (*UnitMetadata)(nil)
// Result returns the record that was just read by Scan. This is either
// a *Result, a *UnitMetadata, or a *SyntaxError indicating a parse error.
// It may return more types in the future.
//
// Parse errors are non-fatal, so the caller can continue to call
// Scan.
//
// If this returns a *Result, the caller should not retain the Result,
// as it will be overwritten by the next call to Scan.
func (r *Reader) Result() Record {
if r.qPos >= len(r.q) {
// This should only happen if Scan has never been called.
return noResult
}
return r.q[r.qPos]
}
// Err returns the first non-EOF I/O error that was encountered by the
// Reader.
func (r *Reader) Err() error {
return r.err
}
// Units returns the accumulated unit metadata.
//
// Callers that want to consume the entire stream of benchmark results
// and then process units can use this instead of monitoring
// *UnitMetadata Records.
func (r *Reader) Units() UnitMetadataMap {
return r.units
}
// Parsing helpers.
//
// These are designed to leverage common fast paths. The ASCII fast
// path is especially important, and more than doubles the performance
// of the parser.
// atof is a wrapper for bytesconv.ParseFloat that optimizes for
// numbers that are usually integers.
func atof(x []byte) (float64, error) {
// Try parsing as an integer.
var val int64
for _, ch := range x {
digit := ch - '0'
if digit >= 10 {
goto fail
}
if val > (math.MaxInt64-10)/10 {
goto fail // avoid int64 overflow
}
val = (val * 10) + int64(digit)
}
return float64(val), nil
fail:
// The fast path failed. Parse it as a float.
return bytesconv.ParseFloat(x, 64)
}
const isSpace uint64 = 1<<'\t' | 1<<'\n' | 1<<'\v' | 1<<'\f' | 1<<'\r' | 1<<' '
// splitField consumes and returns non-whitespace in x as field,
// consumes whitespace following the field, and then returns the
// remaining bytes of x.
func splitField(x []byte) (field, rest []byte) {
// Collect non-whitespace into field.
var i int
for i = 0; i < len(x); {
if x[i] < utf8.RuneSelf {
// Fast path for ASCII
if (isSpace>>x[i])&1 != 0 {
rest = x[i+1:]
break
}
i++
} else {
// Slow path for Unicode
r, n := utf8.DecodeRune(x[i:])
if unicode.IsSpace(r) {
rest = x[i+n:]
break
}
i += n
}
}
field = x[:i]
// Strip whitespace from rest.
for len(rest) > 0 {
if rest[0] < utf8.RuneSelf {
if (isSpace>>rest[0])&1 == 0 {
break
}
rest = rest[1:]
} else {
r, n := utf8.DecodeRune(rest)
if !unicode.IsSpace(r) {
break
}
rest = rest[n:]
}
}
return
}