trace/internal/oldtrace/parser.go - exp - Git at Google

 // Copyright 2024 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 // Code generated by "gen.bash" from internal/trace/v2; DO NOT EDIT.

 //go:build go1.21

 // Package oldtrace implements a parser for Go execution traces from versions
 // 1.11–1.21.
 //
 // The package started as a copy of Go 1.19's internal/trace, but has been
 // optimized to be faster while using less memory and fewer allocations. It has
 // been further modified for the specific purpose of converting traces to the
 // new 1.22+ format.
 package oldtrace

 import (
 	"bytes"
 	"encoding/binary"
 	"errors"
 	"fmt"
 	"golang.org/x/exp/trace/internal/event"
 	"golang.org/x/exp/trace/internal/version"
 	"io"
 	"math"
 	"sort"
 )

 // Timestamp represents a count of nanoseconds since the beginning of the trace.
 // They can only be meaningfully compared with other timestamps from the same
 // trace.
 type Timestamp int64

 // Event describes one event in the trace.
 type Event struct {
 	// The Event type is carefully laid out to optimize its size and to avoid
 	// pointers, the latter so that the garbage collector won't have to scan any
 	// memory of our millions of events.

 	Ts    Timestamp  // timestamp in nanoseconds
 	G     uint64     // G on which the event happened
 	Args  [4]uint64  // event-type-specific arguments
 	StkID uint32     // unique stack ID
 	P     int32      // P on which the event happened (can be a real P or one of TimerP, NetpollP, SyscallP)
 	Type  event.Type // one of Ev*
 }

 // Frame is a frame in stack traces.
 type Frame struct {
 	PC uint64
 	// string ID of the function name
 	Fn uint64
 	// string ID of the file name
 	File uint64
 	Line int
 }

 const (
 	// Special P identifiers:
 	FakeP    = 1000000 + iota
 	TimerP   // contains timer unblocks
 	NetpollP // contains network unblocks
 	SyscallP // contains returns from syscalls
 	GCP      // contains GC state
 	ProfileP // contains recording of CPU profile samples
 )

 // Trace is the result of Parse.
 type Trace struct {
 	Version version.Version

 	// Events is the sorted list of Events in the trace.
 	Events Events
 	// Stacks is the stack traces (stored as slices of PCs), keyed by stack IDs
 	// from the trace.
 	Stacks        map[uint32][]uint64
 	PCs           map[uint64]Frame
 	Strings       map[uint64]string
 	InlineStrings []string
 }

 // batchOffset records the byte offset of, and number of events in, a batch. A
 // batch is a sequence of events emitted by a P. Events within a single batch
 // are sorted by time.
 type batchOffset struct {
 	offset    int
 	numEvents int
 }

 type parser struct {
 	ver  version.Version
 	data []byte
 	off  int

 	strings              map[uint64]string
 	inlineStrings        []string
 	inlineStringsMapping map[string]int
 	// map from Ps to their batch offsets
 	batchOffsets map[int32][]batchOffset
 	stacks       map[uint32][]uint64
 	stacksData   []uint64
 	ticksPerSec  int64
 	pcs          map[uint64]Frame
 	cpuSamples   []Event
 	timerGoids   map[uint64]bool

 	// state for readRawEvent
 	args []uint64

 	// state for parseEvent
 	lastTs Timestamp
 	lastG  uint64
 	// map from Ps to the last Gs that ran on them
 	lastGs map[int32]uint64
 	lastP  int32
 }

 func (p *parser) discard(n uint64) bool {
 	if n > math.MaxInt {
 		return false
 	}
 	if noff := p.off + int(n); noff < p.off || noff > len(p.data) {
 		return false
 	} else {
 		p.off = noff
 	}
 	return true
 }

 func newParser(r io.Reader, ver version.Version) (*parser, error) {
 	var buf []byte
 	if seeker, ok := r.(io.Seeker); ok {
 		// Determine the size of the reader so that we can allocate a buffer
 		// without having to grow it later.
 		cur, err := seeker.Seek(0, io.SeekCurrent)
 		if err != nil {
 			return nil, err
 		}
 		end, err := seeker.Seek(0, io.SeekEnd)
 		if err != nil {
 			return nil, err
 		}
 		_, err = seeker.Seek(cur, io.SeekStart)
 		if err != nil {
 			return nil, err
 		}

 		buf = make([]byte, end-cur)
 		_, err = io.ReadFull(r, buf)
 		if err != nil {
 			return nil, err
 		}
 	} else {
 		var err error
 		buf, err = io.ReadAll(r)
 		if err != nil {
 			return nil, err
 		}
 	}
 	return &parser{data: buf, ver: ver, timerGoids: make(map[uint64]bool)}, nil
 }

 // Parse parses Go execution traces from versions 1.11–1.21. The provided reader
 // will be read to completion and the entire trace will be materialized in
 // memory. That is, this function does not allow incremental parsing.
 //
 // The reader has to be positioned just after the trace header and vers needs to
 // be the version of the trace. This can be achieved by using
 // version.ReadHeader.
 func Parse(r io.Reader, vers version.Version) (Trace, error) {
 	// We accept the version as an argument because golang.org/x/exp/trace will have
 	// already read the version to determine which parser to use.
 	p, err := newParser(r, vers)
 	if err != nil {
 		return Trace{}, err
 	}
 	return p.parse()
 }

 // parse parses, post-processes and verifies the trace.
 func (p *parser) parse() (Trace, error) {
 	defer func() {
 		p.data = nil
 	}()

 	// We parse a trace by running the following steps in order:
 	//
 	// 1. In the initial pass we collect information about batches (their
 	//    locations and sizes.) We also parse CPU profiling samples in this
 	//    step, simply to reduce the number of full passes that we need.
 	//
 	// 2. In the second pass we parse batches and merge them into a globally
 	//    ordered event stream. This uses the batch information from the first
 	//    pass to quickly find batches.
 	//
 	// 3. After all events have been parsed we convert their timestamps from CPU
 	//    ticks to wall time. Furthermore we move timers and syscalls to
 	//    dedicated, fake Ps.
 	//
 	// 4. Finally, we validate the trace.

 	p.strings = make(map[uint64]string)
 	p.batchOffsets = make(map[int32][]batchOffset)
 	p.lastGs = make(map[int32]uint64)
 	p.stacks = make(map[uint32][]uint64)
 	p.pcs = make(map[uint64]Frame)
 	p.inlineStringsMapping = make(map[string]int)

 	if err := p.collectBatchesAndCPUSamples(); err != nil {
 		return Trace{}, err
 	}

 	events, err := p.parseEventBatches()
 	if err != nil {
 		return Trace{}, err
 	}

 	if p.ticksPerSec == 0 {
 		return Trace{}, errors.New("no EvFrequency event")
 	}

 	if events.Len() > 0 {
 		// Translate cpu ticks to real time.
 		minTs := events.Ptr(0).Ts
 		// Use floating point to avoid integer overflows.
 		freq := 1e9 / float64(p.ticksPerSec)
 		for i := 0; i < events.Len(); i++ {
 			ev := events.Ptr(i)
 			ev.Ts = Timestamp(float64(ev.Ts-minTs) * freq)
 			// Move timers and syscalls to separate fake Ps.
 			if p.timerGoids[ev.G] && ev.Type == EvGoUnblock {
 				ev.P = TimerP
 			}
 			if ev.Type == EvGoSysExit {
 				ev.P = SyscallP
 			}
 		}
 	}

 	if err := p.postProcessTrace(events); err != nil {
 		return Trace{}, err
 	}

 	res := Trace{
 		Version:       p.ver,
 		Events:        events,
 		Stacks:        p.stacks,
 		Strings:       p.strings,
 		InlineStrings: p.inlineStrings,
 		PCs:           p.pcs,
 	}
 	return res, nil
 }

 // rawEvent is a helper type used during parsing.
 type rawEvent struct {
 	typ   event.Type
 	args  []uint64
 	sargs []string

 	// if typ == EvBatch, these fields describe the batch.
 	batchPid    int32
 	batchOffset int
 }

 type proc struct {
 	pid int32
 	// the remaining events in the current batch
 	events []Event
 	// buffer for reading batches into, aliased by proc.events
 	buf []Event

 	// there are no more batches left
 	done bool
 }

 const eventsBucketSize = 524288 // 32 MiB of events

 type Events struct {
 	// Events is a slice of slices that grows one slice of size eventsBucketSize
 	// at a time. This avoids the O(n) cost of slice growth in append, and
 	// additionally allows consumers to drop references to parts of the data,
 	// freeing memory piecewise.
 	n       int
 	buckets []*[eventsBucketSize]Event
 	off     int
 }

 // grow grows the slice by one and returns a pointer to the new element, without
 // overwriting it.
 func (l *Events) grow() *Event {
 	a, b := l.index(l.n)
 	if a >= len(l.buckets) {
 		l.buckets = append(l.buckets, new([eventsBucketSize]Event))
 	}
 	ptr := &l.buckets[a][b]
 	l.n++
 	return ptr
 }

 // append appends v to the slice and returns a pointer to the new element.
 func (l *Events) append(v Event) *Event {
 	ptr := l.grow()
 	*ptr = v
 	return ptr
 }

 func (l *Events) Ptr(i int) *Event {
 	a, b := l.index(i + l.off)
 	return &l.buckets[a][b]
 }

 func (l *Events) index(i int) (int, int) {
 	// Doing the division on uint instead of int compiles this function to a
 	// shift and an AND (for power of 2 bucket sizes), versus a whole bunch of
 	// instructions for int.
 	return int(uint(i) / eventsBucketSize), int(uint(i) % eventsBucketSize)
 }

 func (l *Events) Len() int {
 	return l.n - l.off
 }

 func (l *Events) Less(i, j int) bool {
 	return l.Ptr(i).Ts < l.Ptr(j).Ts
 }

 func (l *Events) Swap(i, j int) {
 	*l.Ptr(i), *l.Ptr(j) = *l.Ptr(j), *l.Ptr(i)
 }

 func (l *Events) Pop() (*Event, bool) {
 	if l.off == l.n {
 		return nil, false
 	}
 	a, b := l.index(l.off)
 	ptr := &l.buckets[a][b]
 	l.off++
 	if b == eventsBucketSize-1 || l.off == l.n {
 		// We've consumed the last event from the bucket, so drop the bucket and
 		// allow GC to collect it.
 		l.buckets[a] = nil
 	}
 	return ptr, true
 }

 func (l *Events) All() func(yield func(ev *Event) bool) {
 	return func(yield func(ev *Event) bool) {
 		for i := 0; i < l.Len(); i++ {
 			a, b := l.index(i + l.off)
 			ptr := &l.buckets[a][b]
 			if !yield(ptr) {
 				return
 			}
 		}
 	}
 }

 // parseEventBatches reads per-P event batches and merges them into a single, consistent
 // stream. The high level idea is as follows. Events within an individual batch
 // are in correct order, because they are emitted by a single P. So we need to
 // produce a correct interleaving of the batches. To do this we take first
 // unmerged event from each batch (frontier). Then choose subset that is "ready"
 // to be merged, that is, events for which all dependencies are already merged.
 // Then we choose event with the lowest timestamp from the subset, merge it and
 // repeat. This approach ensures that we form a consistent stream even if
 // timestamps are incorrect (condition observed on some machines).
 func (p *parser) parseEventBatches() (Events, error) {
 	// The ordering of CPU profile sample events in the data stream is based on
 	// when each run of the signal handler was able to acquire the spinlock,
 	// with original timestamps corresponding to when ReadTrace pulled the data
 	// off of the profBuf queue. Re-sort them by the timestamp we captured
 	// inside the signal handler.
 	sort.Sort((*eventList)(&p.cpuSamples))

 	allProcs := make([]proc, 0, len(p.batchOffsets))
 	for pid := range p.batchOffsets {
 		allProcs = append(allProcs, proc{pid: pid})
 	}
 	allProcs = append(allProcs, proc{pid: ProfileP, events: p.cpuSamples})

 	events := Events{}

 	// Merge events as long as at least one P has more events
 	gs := make(map[uint64]gState)
 	// Note: technically we don't need a priority queue here. We're only ever
 	// interested in the earliest elligible event, which means we just have to
 	// track the smallest element. However, in practice, the priority queue
 	// performs better, because for each event we only have to compute its state
 	// transition once, not on each iteration. If it was elligible before, it'll
 	// already be in the queue. Furthermore, on average, we only have one P to
 	// look at in each iteration, because all other Ps are already in the queue.
 	var frontier orderEventList

 	availableProcs := make([]*proc, len(allProcs))
 	for i := range allProcs {
 		availableProcs[i] = &allProcs[i]
 	}
 	for {
 	pidLoop:
 		for i := 0; i < len(availableProcs); i++ {
 			proc := availableProcs[i]

 			for len(proc.events) == 0 {
 				// Call loadBatch in a loop because sometimes batches are empty
 				evs, err := p.loadBatch(proc.pid, proc.buf[:0])
 				proc.buf = evs[:0]
 				if err == io.EOF {
 					// This P has no more events
 					proc.done = true
 					availableProcs[i], availableProcs[len(availableProcs)-1] = availableProcs[len(availableProcs)-1], availableProcs[i]
 					availableProcs = availableProcs[:len(availableProcs)-1]
 					// We swapped the element at i with another proc, so look at
 					// the index again
 					i--
 					continue pidLoop
 				} else if err != nil {
 					return Events{}, err
 				} else {
 					proc.events = evs
 				}
 			}

 			ev := &proc.events[0]
 			g, init, _ := stateTransition(ev)

 			// TODO(dh): This implementation matches the behavior of the
 			// upstream 'go tool trace', and works in practice, but has run into
 			// the following inconsistency during fuzzing: what happens if
 			// multiple Ps have events for the same G? While building the
 			// frontier we will check all of the events against the current
 			// state of the G. However, when we process the frontier, the state
 			// of the G changes, and a transition that was valid while building
 			// the frontier may no longer be valid when processing the frontier.
 			// Is this something that can happen for real, valid traces, or is
 			// this only possible with corrupt data?
 			if !transitionReady(g, gs[g], init) {
 				continue
 			}
 			proc.events = proc.events[1:]
 			availableProcs[i], availableProcs[len(availableProcs)-1] = availableProcs[len(availableProcs)-1], availableProcs[i]
 			availableProcs = availableProcs[:len(availableProcs)-1]
 			frontier.Push(orderEvent{*ev, proc})

 			// We swapped the element at i with another proc, so look at the
 			// index again
 			i--
 		}

 		if len(frontier) == 0 {
 			for i := range allProcs {
 				if !allProcs[i].done {
 					return Events{}, fmt.Errorf("no consistent ordering of events possible")
 				}
 			}
 			break
 		}
 		f := frontier.Pop()

 		// We're computing the state transition twice, once when computing the
 		// frontier, and now to apply the transition. This is fine because
 		// stateTransition is a pure function. Computing it again is cheaper
 		// than storing large items in the frontier.
 		g, init, next := stateTransition(&f.ev)

 		// Get rid of "Local" events, they are intended merely for ordering.
 		switch f.ev.Type {
 		case EvGoStartLocal:
 			f.ev.Type = EvGoStart
 		case EvGoUnblockLocal:
 			f.ev.Type = EvGoUnblock
 		case EvGoSysExitLocal:
 			f.ev.Type = EvGoSysExit
 		}
 		events.append(f.ev)

 		if err := transition(gs, g, init, next); err != nil {
 			return Events{}, err
 		}
 		availableProcs = append(availableProcs, f.proc)
 	}

 	// At this point we have a consistent stream of events. Make sure time
 	// stamps respect the ordering. The tests will skip (not fail) the test case
 	// if they see this error.
 	if !sort.IsSorted(&events) {
 		return Events{}, ErrTimeOrder
 	}

 	// The last part is giving correct timestamps to EvGoSysExit events. The
 	// problem with EvGoSysExit is that actual syscall exit timestamp
 	// (ev.Args[2]) is potentially acquired long before event emission. So far
 	// we've used timestamp of event emission (ev.Ts). We could not set ev.Ts =
 	// ev.Args[2] earlier, because it would produce seemingly broken timestamps
 	// (misplaced event). We also can't simply update the timestamp and resort
 	// events, because if timestamps are broken we will misplace the event and
 	// later report logically broken trace (instead of reporting broken
 	// timestamps).
 	lastSysBlock := make(map[uint64]Timestamp)
 	for i := 0; i < events.Len(); i++ {
 		ev := events.Ptr(i)
 		switch ev.Type {
 		case EvGoSysBlock, EvGoInSyscall:
 			lastSysBlock[ev.G] = ev.Ts
 		case EvGoSysExit:
 			ts := Timestamp(ev.Args[2])
 			if ts == 0 {
 				continue
 			}
 			block := lastSysBlock[ev.G]
 			if block == 0 {
 				return Events{}, fmt.Errorf("stray syscall exit")
 			}
 			if ts < block {
 				return Events{}, ErrTimeOrder
 			}
 			ev.Ts = ts
 		}
 	}
 	sort.Stable(&events)

 	return events, nil
 }

 // collectBatchesAndCPUSamples records the offsets of batches and parses CPU samples.
 func (p *parser) collectBatchesAndCPUSamples() error {
 	// Read events.
 	var raw rawEvent
 	var curP int32
 	for n := uint64(0); ; n++ {
 		err := p.readRawEvent(skipArgs|skipStrings, &raw)
 		if err == io.EOF {
 			break
 		}
 		if err != nil {
 			return err
 		}
 		if raw.typ == EvNone {
 			continue
 		}

 		if raw.typ == EvBatch {
 			bo := batchOffset{offset: raw.batchOffset}
 			p.batchOffsets[raw.batchPid] = append(p.batchOffsets[raw.batchPid], bo)
 			curP = raw.batchPid
 		}

 		batches := p.batchOffsets[curP]
 		if len(batches) == 0 {
 			return fmt.Errorf("read event %d with current P of %d, but P has no batches yet",
 				raw.typ, curP)
 		}
 		batches[len(batches)-1].numEvents++

 		if raw.typ == EvCPUSample {
 			e := Event{Type: raw.typ}

 			argOffset := 1
 			narg := raw.argNum()
 			if len(raw.args) != narg {
 				return fmt.Errorf("CPU sample has wrong number of arguments: want %d, got %d", narg, len(raw.args))
 			}
 			for i := argOffset; i < narg; i++ {
 				if i == narg-1 {
 					e.StkID = uint32(raw.args[i])
 				} else {
 					e.Args[i-argOffset] = raw.args[i]
 				}
 			}

 			e.Ts = Timestamp(e.Args[0])
 			e.P = int32(e.Args[1])
 			e.G = e.Args[2]
 			e.Args[0] = 0

 			// Most events are written out by the active P at the exact moment
 			// they describe. CPU profile samples are different because they're
 			// written to the tracing log after some delay, by a separate worker
 			// goroutine, into a separate buffer.
 			//
 			// We keep these in their own batch until all of the batches are
 			// merged in timestamp order. We also (right before the merge)
 			// re-sort these events by the timestamp captured in the profiling
 			// signal handler.
 			//
 			// Note that we're not concerned about the memory usage of storing
 			// all CPU samples during the indexing phase. There are orders of
 			// magnitude fewer CPU samples than runtime events.
 			p.cpuSamples = append(p.cpuSamples, e)
 		}
 	}

 	return nil
 }

 const (
 	skipArgs = 1 << iota
 	skipStrings
 )

 func (p *parser) readByte() (byte, bool) {
 	if p.off < len(p.data) && p.off >= 0 {
 		b := p.data[p.off]
 		p.off++
 		return b, true
 	} else {
 		return 0, false
 	}
 }

 func (p *parser) readFull(n int) ([]byte, error) {
 	if p.off >= len(p.data) || p.off < 0 || p.off+n > len(p.data) {
 		// p.off < 0 is impossible but makes BCE happy.
 		//
 		// We do fail outright if there's not enough data, we don't care about
 		// partial results.
 		return nil, io.ErrUnexpectedEOF
 	}
 	buf := p.data[p.off : p.off+n]
 	p.off += n
 	return buf, nil
 }

 // readRawEvent reads a raw event into ev. The slices in ev are only valid until
 // the next call to readRawEvent, even when storing to a different location.
 func (p *parser) readRawEvent(flags uint, ev *rawEvent) error {
 	// The number of arguments is encoded using two bits and can thus only
 	// represent the values 0–3. The value 3 (on the wire) indicates that
 	// arguments are prefixed by their byte length, to encode >=3 arguments.
 	const inlineArgs = 3

 	// Read event type and number of arguments (1 byte).
 	b, ok := p.readByte()
 	if !ok {
 		return io.EOF
 	}
 	typ := event.Type(b << 2 >> 2)
 	// Most events have a timestamp before the actual arguments, so we add 1 and
 	// parse it like it's the first argument. EvString has a special format and
 	// the number of arguments doesn't matter. EvBatch writes '1' as the number
 	// of arguments, but actually has two: a pid and a timestamp, but here the
 	// timestamp is the second argument, not the first; adding 1 happens to come
 	// up with the correct number, but it doesn't matter, because EvBatch has
 	// custom logic for parsing.
 	//
 	// Note that because we're adding 1, inlineArgs == 3 describes the largest
 	// number of logical arguments that isn't length-prefixed, even though the
 	// value 3 on the wire indicates length-prefixing. For us, that becomes narg
 	// == 4.
 	narg := b>>6 + 1
 	if typ == EvNone || typ >= EvCount || EventDescriptions[typ].minVersion > p.ver {
 		return fmt.Errorf("unknown event type %d", typ)
 	}

 	switch typ {
 	case EvString:
 		if flags&skipStrings != 0 {
 			// String dictionary entry [ID, length, string].
 			if _, err := p.readVal(); err != nil {
 				return errMalformedVarint
 			}
 			ln, err := p.readVal()
 			if err != nil {
 				return err
 			}
 			if !p.discard(ln) {
 				return fmt.Errorf("failed to read trace: %w", io.EOF)
 			}
 		} else {
 			// String dictionary entry [ID, length, string].
 			id, err := p.readVal()
 			if err != nil {
 				return err
 			}
 			if id == 0 {
 				return errors.New("string has invalid id 0")
 			}
 			if p.strings[id] != "" {
 				return fmt.Errorf("string has duplicate id %d", id)
 			}
 			var ln uint64
 			ln, err = p.readVal()
 			if err != nil {
 				return err
 			}
 			if ln == 0 {
 				return errors.New("string has invalid length 0")
 			}
 			if ln > 1e6 {
 				return fmt.Errorf("string has too large length %d", ln)
 			}
 			buf, err := p.readFull(int(ln))
 			if err != nil {
 				return fmt.Errorf("failed to read trace: %w", err)
 			}
 			p.strings[id] = string(buf)
 		}

 		ev.typ = EvNone
 		return nil
 	case EvBatch:
 		if want := byte(2); narg != want {
 			return fmt.Errorf("EvBatch has wrong number of arguments: got %d, want %d", narg, want)
 		}

 		// -1 because we've already read the first byte of the batch
 		off := p.off - 1

 		pid, err := p.readVal()
 		if err != nil {
 			return err
 		}
 		if pid != math.MaxUint64 && pid > math.MaxInt32 {
 			return fmt.Errorf("processor ID %d is larger than maximum of %d", pid, uint64(math.MaxUint))
 		}

 		var pid32 int32
 		if pid == math.MaxUint64 {
 			pid32 = -1
 		} else {
 			pid32 = int32(pid)
 		}

 		v, err := p.readVal()
 		if err != nil {
 			return err
 		}

 		*ev = rawEvent{
 			typ:         EvBatch,
 			args:        p.args[:0],
 			batchPid:    pid32,
 			batchOffset: off,
 		}
 		ev.args = append(ev.args, pid, v)
 		return nil
 	default:
 		*ev = rawEvent{typ: typ, args: p.args[:0]}
 		if narg <= inlineArgs {
 			if flags&skipArgs == 0 {
 				for i := 0; i < int(narg); i++ {
 					v, err := p.readVal()
 					if err != nil {
 						return fmt.Errorf("failed to read event %d argument: %w", typ, err)
 					}
 					ev.args = append(ev.args, v)
 				}
 			} else {
 				for i := 0; i < int(narg); i++ {
 					if _, err := p.readVal(); err != nil {
 						return fmt.Errorf("failed to read event %d argument: %w", typ, errMalformedVarint)
 					}
 				}
 			}
 		} else {
 			// More than inlineArgs args, the first value is length of the event
 			// in bytes.
 			v, err := p.readVal()
 			if err != nil {
 				return fmt.Errorf("failed to read event %d argument: %w", typ, err)
 			}

 			if limit := uint64(2048); v > limit {
 				// At the time of Go 1.19, v seems to be at most 128. Set 2048
 				// as a generous upper limit and guard against malformed traces.
 				return fmt.Errorf("failed to read event %d argument: length-prefixed argument too big: %d bytes, limit is %d", typ, v, limit)
 			}

 			if flags&skipArgs == 0 || typ == EvCPUSample {
 				buf, err := p.readFull(int(v))
 				if err != nil {
 					return fmt.Errorf("failed to read trace: %w", err)
 				}
 				for len(buf) > 0 {
 					var v uint64
 					v, buf, err = readValFrom(buf)
 					if err != nil {
 						return err
 					}
 					ev.args = append(ev.args, v)
 				}
 			} else {
 				// Skip over arguments
 				if !p.discard(v) {
 					return fmt.Errorf("failed to read trace: %w", io.EOF)
 				}
 			}
 			if typ == EvUserLog {
 				// EvUserLog records are followed by a value string
 				if flags&skipArgs == 0 {
 					// Read string
 					s, err := p.readStr()
 					if err != nil {
 						return err
 					}
 					ev.sargs = append(ev.sargs, s)
 				} else {
 					// Skip string
 					v, err := p.readVal()
 					if err != nil {
 						return err
 					}
 					if !p.discard(v) {
 						return io.EOF
 					}
 				}
 			}
 		}

 		p.args = ev.args[:0]
 		return nil
 	}
 }

 // loadBatch loads the next batch for pid and appends its contents to to events.
 func (p *parser) loadBatch(pid int32, events []Event) ([]Event, error) {
 	offsets := p.batchOffsets[pid]
 	if len(offsets) == 0 {
 		return nil, io.EOF
 	}
 	n := offsets[0].numEvents
 	offset := offsets[0].offset
 	offsets = offsets[1:]
 	p.batchOffsets[pid] = offsets

 	p.off = offset

 	if cap(events) < n {
 		events = make([]Event, 0, n)
 	}

 	gotHeader := false
 	var raw rawEvent
 	var ev Event
 	for {
 		err := p.readRawEvent(0, &raw)
 		if err == io.EOF {
 			break
 		}
 		if err != nil {
 			return nil, err
 		}
 		if raw.typ == EvNone || raw.typ == EvCPUSample {
 			continue
 		}
 		if raw.typ == EvBatch {
 			if gotHeader {
 				break
 			} else {
 				gotHeader = true
 			}
 		}

 		err = p.parseEvent(&raw, &ev)
 		if err != nil {
 			return nil, err
 		}
 		if ev.Type != EvNone {
 			events = append(events, ev)
 		}
 	}

 	return events, nil
 }

 func (p *parser) readStr() (s string, err error) {
 	sz, err := p.readVal()
 	if err != nil {
 		return "", err
 	}
 	if sz == 0 {
 		return "", nil
 	}
 	if sz > 1e6 {
 		return "", fmt.Errorf("string is too large (len=%d)", sz)
 	}
 	buf, err := p.readFull(int(sz))
 	if err != nil {
 		return "", fmt.Errorf("failed to read trace: %w", err)
 	}
 	return string(buf), nil
 }

 // parseEvent transforms raw events into events.
 // It does analyze and verify per-event-type arguments.
 func (p *parser) parseEvent(raw *rawEvent, ev *Event) error {
 	desc := &EventDescriptions[raw.typ]
 	if desc.Name == "" {
 		return fmt.Errorf("missing description for event type %d", raw.typ)
 	}
 	narg := raw.argNum()
 	if len(raw.args) != narg {
 		return fmt.Errorf("%s has wrong number of arguments: want %d, got %d", desc.Name, narg, len(raw.args))
 	}
 	switch raw.typ {
 	case EvBatch:
 		p.lastGs[p.lastP] = p.lastG
 		if raw.args[0] != math.MaxUint64 && raw.args[0] > math.MaxInt32 {
 			return fmt.Errorf("processor ID %d is larger than maximum of %d", raw.args[0], uint64(math.MaxInt32))
 		}
 		if raw.args[0] == math.MaxUint64 {
 			p.lastP = -1
 		} else {
 			p.lastP = int32(raw.args[0])
 		}
 		p.lastG = p.lastGs[p.lastP]
 		p.lastTs = Timestamp(raw.args[1])
 	case EvFrequency:
 		p.ticksPerSec = int64(raw.args[0])
 		if p.ticksPerSec <= 0 {
 			// The most likely cause for this is tick skew on different CPUs.
 			// For example, solaris/amd64 seems to have wildly different
 			// ticks on different CPUs.
 			return ErrTimeOrder
 		}
 	case EvTimerGoroutine:
 		p.timerGoids[raw.args[0]] = true
 	case EvStack:
 		if len(raw.args) < 2 {
 			return fmt.Errorf("EvStack has wrong number of arguments: want at least 2, got %d", len(raw.args))
 		}
 		size := raw.args[1]
 		if size > 1000 {
 			return fmt.Errorf("EvStack has bad number of frames: %d", size)
 		}
 		want := 2 + 4*size
 		if uint64(len(raw.args)) != want {
 			return fmt.Errorf("EvStack has wrong number of arguments: want %d, got %d", want, len(raw.args))
 		}
 		id := uint32(raw.args[0])
 		if id != 0 && size > 0 {
 			stk := p.allocateStack(size)
 			for i := 0; i < int(size); i++ {
 				pc := raw.args[2+i*4+0]
 				fn := raw.args[2+i*4+1]
 				file := raw.args[2+i*4+2]
 				line := raw.args[2+i*4+3]
 				stk[i] = pc

 				if _, ok := p.pcs[pc]; !ok {
 					p.pcs[pc] = Frame{PC: pc, Fn: fn, File: file, Line: int(line)}
 				}
 			}
 			p.stacks[id] = stk
 		}
 	case EvCPUSample:
 		// These events get parsed during the indexing step and don't strictly
 		// belong to the batch.
 	default:
 		*ev = Event{Type: raw.typ, P: p.lastP, G: p.lastG}
 		var argOffset int
 		ev.Ts = p.lastTs + Timestamp(raw.args[0])
 		argOffset = 1
 		p.lastTs = ev.Ts
 		for i := argOffset; i < narg; i++ {
 			if i == narg-1 && desc.Stack {
 				ev.StkID = uint32(raw.args[i])
 			} else {
 				ev.Args[i-argOffset] = raw.args[i]
 			}
 		}
 		switch raw.typ {
 		case EvGoStart, EvGoStartLocal, EvGoStartLabel:
 			p.lastG = ev.Args[0]
 			ev.G = p.lastG
 		case EvGoEnd, EvGoStop, EvGoSched, EvGoPreempt,
 			EvGoSleep, EvGoBlock, EvGoBlockSend, EvGoBlockRecv,
 			EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet,
 			EvGoSysBlock, EvGoBlockGC:
 			p.lastG = 0
 		case EvGoSysExit, EvGoWaiting, EvGoInSyscall:
 			ev.G = ev.Args[0]
 		case EvUserTaskCreate:
 			// e.Args 0: taskID, 1:parentID, 2:nameID
 		case EvUserRegion:
 			// e.Args 0: taskID, 1: mode, 2:nameID
 		case EvUserLog:
 			// e.Args 0: taskID, 1:keyID, 2: stackID, 3: messageID
 			// raw.sargs 0: message

 			if id, ok := p.inlineStringsMapping[raw.sargs[0]]; ok {
 				ev.Args[3] = uint64(id)
 			} else {
 				id := len(p.inlineStrings)
 				p.inlineStringsMapping[raw.sargs[0]] = id
 				p.inlineStrings = append(p.inlineStrings, raw.sargs[0])
 				ev.Args[3] = uint64(id)
 			}
 		}

 		return nil
 	}

 	ev.Type = EvNone
 	return nil
 }

 // ErrTimeOrder is returned by Parse when the trace contains
 // time stamps that do not respect actual event ordering.
 var ErrTimeOrder = errors.New("time stamps out of order")

 // postProcessTrace does inter-event verification and information restoration.
 // The resulting trace is guaranteed to be consistent
 // (for example, a P does not run two Gs at the same time, or a G is indeed
 // blocked before an unblock event).
 func (p *parser) postProcessTrace(events Events) error {
 	const (
 		gDead = iota
 		gRunnable
 		gRunning
 		gWaiting
 	)
 	type gdesc struct {
 		state        int
 		ev           *Event
 		evStart      *Event
 		evCreate     *Event
 		evMarkAssist *Event
 	}
 	type pdesc struct {
 		running bool
 		g       uint64
 		evSweep *Event
 	}

 	gs := make(map[uint64]gdesc)
 	ps := make(map[int32]pdesc)
 	tasks := make(map[uint64]*Event)           // task id to task creation events
 	activeRegions := make(map[uint64][]*Event) // goroutine id to stack of regions
 	gs[0] = gdesc{state: gRunning}
 	var evGC, evSTW *Event

 	checkRunning := func(p pdesc, g gdesc, ev *Event, allowG0 bool) error {
 		name := EventDescriptions[ev.Type].Name
 		if g.state != gRunning {
 			return fmt.Errorf("g %d is not running while %s (time %d)", ev.G, name, ev.Ts)
 		}
 		if p.g != ev.G {
 			return fmt.Errorf("p %d is not running g %d while %s (time %d)", ev.P, ev.G, name, ev.Ts)
 		}
 		if !allowG0 && ev.G == 0 {
 			return fmt.Errorf("g 0 did %s (time %d)", name, ev.Ts)
 		}
 		return nil
 	}

 	for evIdx := 0; evIdx < events.Len(); evIdx++ {
 		ev := events.Ptr(evIdx)

 		switch ev.Type {
 		case EvProcStart:
 			p := ps[ev.P]
 			if p.running {
 				return fmt.Errorf("p %d is running before start (time %d)", ev.P, ev.Ts)
 			}
 			p.running = true

 			ps[ev.P] = p
 		case EvProcStop:
 			p := ps[ev.P]
 			if !p.running {
 				return fmt.Errorf("p %d is not running before stop (time %d)", ev.P, ev.Ts)
 			}
 			if p.g != 0 {
 				return fmt.Errorf("p %d is running a goroutine %d during stop (time %d)", ev.P, p.g, ev.Ts)
 			}
 			p.running = false

 			ps[ev.P] = p
 		case EvGCStart:
 			if evGC != nil {
 				return fmt.Errorf("previous GC is not ended before a new one (time %d)", ev.Ts)
 			}
 			evGC = ev
 			// Attribute this to the global GC state.
 			ev.P = GCP
 		case EvGCDone:
 			if evGC == nil {
 				return fmt.Errorf("bogus GC end (time %d)", ev.Ts)
 			}
 			evGC = nil
 		case EvSTWStart:
 			evp := &evSTW
 			if *evp != nil {
 				return fmt.Errorf("previous STW is not ended before a new one (time %d)", ev.Ts)
 			}
 			*evp = ev
 		case EvSTWDone:
 			evp := &evSTW
 			if *evp == nil {
 				return fmt.Errorf("bogus STW end (time %d)", ev.Ts)
 			}
 			*evp = nil
 		case EvGCSweepStart:
 			p := ps[ev.P]
 			if p.evSweep != nil {
 				return fmt.Errorf("previous sweeping is not ended before a new one (time %d)", ev.Ts)
 			}
 			p.evSweep = ev

 			ps[ev.P] = p
 		case EvGCMarkAssistStart:
 			g := gs[ev.G]
 			if g.evMarkAssist != nil {
 				return fmt.Errorf("previous mark assist is not ended before a new one (time %d)", ev.Ts)
 			}
 			g.evMarkAssist = ev

 			gs[ev.G] = g
 		case EvGCMarkAssistDone:
 			// Unlike most events, mark assists can be in progress when a
 			// goroutine starts tracing, so we can't report an error here.
 			g := gs[ev.G]
 			if g.evMarkAssist != nil {
 				g.evMarkAssist = nil
 			}

 			gs[ev.G] = g
 		case EvGCSweepDone:
 			p := ps[ev.P]
 			if p.evSweep == nil {
 				return fmt.Errorf("bogus sweeping end (time %d)", ev.Ts)
 			}
 			p.evSweep = nil

 			ps[ev.P] = p
 		case EvGoWaiting:
 			g := gs[ev.G]
 			if g.state != gRunnable {
 				return fmt.Errorf("g %d is not runnable before EvGoWaiting (time %d)", ev.G, ev.Ts)
 			}
 			g.state = gWaiting
 			g.ev = ev

 			gs[ev.G] = g
 		case EvGoInSyscall:
 			g := gs[ev.G]
 			if g.state != gRunnable {
 				return fmt.Errorf("g %d is not runnable before EvGoInSyscall (time %d)", ev.G, ev.Ts)
 			}
 			g.state = gWaiting
 			g.ev = ev

 			gs[ev.G] = g
 		case EvGoCreate:
 			g := gs[ev.G]
 			p := ps[ev.P]
 			if err := checkRunning(p, g, ev, true); err != nil {
 				return err
 			}
 			if _, ok := gs[ev.Args[0]]; ok {
 				return fmt.Errorf("g %d already exists (time %d)", ev.Args[0], ev.Ts)
 			}
 			gs[ev.Args[0]] = gdesc{state: gRunnable, ev: ev, evCreate: ev}

 		case EvGoStart, EvGoStartLabel:
 			g := gs[ev.G]
 			p := ps[ev.P]
 			if g.state != gRunnable {
 				return fmt.Errorf("g %d is not runnable before start (time %d)", ev.G, ev.Ts)
 			}
 			if p.g != 0 {
 				return fmt.Errorf("p %d is already running g %d while start g %d (time %d)", ev.P, p.g, ev.G, ev.Ts)
 			}
 			g.state = gRunning
 			g.evStart = ev
 			p.g = ev.G
 			if g.evCreate != nil {
 				ev.StkID = uint32(g.evCreate.Args[1])
 				g.evCreate = nil
 			}

 			if g.ev != nil {
 				g.ev = nil
 			}

 			gs[ev.G] = g
 			ps[ev.P] = p
 		case EvGoEnd, EvGoStop:
 			g := gs[ev.G]
 			p := ps[ev.P]
 			if err := checkRunning(p, g, ev, false); err != nil {
 				return err
 			}
 			g.evStart = nil
 			g.state = gDead
 			p.g = 0

 			if ev.Type == EvGoEnd { // flush all active regions
 				delete(activeRegions, ev.G)
 			}

 			gs[ev.G] = g
 			ps[ev.P] = p
 		case EvGoSched, EvGoPreempt:
 			g := gs[ev.G]
 			p := ps[ev.P]
 			if err := checkRunning(p, g, ev, false); err != nil {
 				return err
 			}
 			g.state = gRunnable
 			g.evStart = nil
 			p.g = 0
 			g.ev = ev

 			gs[ev.G] = g
 			ps[ev.P] = p
 		case EvGoUnblock:
 			g := gs[ev.G]
 			p := ps[ev.P]
 			if g.state != gRunning {
 				return fmt.Errorf("g %d is not running while unpark (time %d)", ev.G, ev.Ts)
 			}
 			if ev.P != TimerP && p.g != ev.G {
 				return fmt.Errorf("p %d is not running g %d while unpark (time %d)", ev.P, ev.G, ev.Ts)
 			}
 			g1 := gs[ev.Args[0]]
 			if g1.state != gWaiting {
 				return fmt.Errorf("g %d is not waiting before unpark (time %d)", ev.Args[0], ev.Ts)
 			}
 			if g1.ev != nil && g1.ev.Type == EvGoBlockNet {
 				ev.P = NetpollP
 			}
 			g1.state = gRunnable
 			g1.ev = ev
 			gs[ev.Args[0]] = g1

 		case EvGoSysCall:
 			g := gs[ev.G]
 			p := ps[ev.P]
 			if err := checkRunning(p, g, ev, false); err != nil {
 				return err
 			}
 			g.ev = ev

 			gs[ev.G] = g
 		case EvGoSysBlock:
 			g := gs[ev.G]
 			p := ps[ev.P]
 			if err := checkRunning(p, g, ev, false); err != nil {
 				return err
 			}
 			g.state = gWaiting
 			g.evStart = nil
 			p.g = 0

 			gs[ev.G] = g
 			ps[ev.P] = p
 		case EvGoSysExit:
 			g := gs[ev.G]
 			if g.state != gWaiting {
 				return fmt.Errorf("g %d is not waiting during syscall exit (time %d)", ev.G, ev.Ts)
 			}
 			g.state = gRunnable
 			g.ev = ev

 			gs[ev.G] = g
 		case EvGoSleep, EvGoBlock, EvGoBlockSend, EvGoBlockRecv,
 			EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet, EvGoBlockGC:
 			g := gs[ev.G]
 			p := ps[ev.P]
 			if err := checkRunning(p, g, ev, false); err != nil {
 				return err
 			}
 			g.state = gWaiting
 			g.ev = ev
 			g.evStart = nil
 			p.g = 0

 			gs[ev.G] = g
 			ps[ev.P] = p
 		case EvUserTaskCreate:
 			taskid := ev.Args[0]
 			if prevEv, ok := tasks[taskid]; ok {
 				return fmt.Errorf("task id conflicts (id:%d), %q vs %q", taskid, ev, prevEv)
 			}
 			tasks[ev.Args[0]] = ev

 		case EvUserTaskEnd:
 			taskid := ev.Args[0]
 			delete(tasks, taskid)

 		case EvUserRegion:
 			mode := ev.Args[1]
 			regions := activeRegions[ev.G]
 			if mode == 0 { // region start
 				activeRegions[ev.G] = append(regions, ev) // push
 			} else if mode == 1 { // region end
 				n := len(regions)
 				if n > 0 { // matching region start event is in the trace.
 					s := regions[n-1]
 					if s.Args[0] != ev.Args[0] || s.Args[2] != ev.Args[2] { // task id, region name mismatch
 						return fmt.Errorf("misuse of region in goroutine %d: span end %q when the inner-most active span start event is %q", ev.G, ev, s)
 					}

 					if n > 1 {
 						activeRegions[ev.G] = regions[:n-1]
 					} else {
 						delete(activeRegions, ev.G)
 					}
 				}
 			} else {
 				return fmt.Errorf("invalid user region mode: %q", ev)
 			}
 		}

 		if ev.StkID != 0 && len(p.stacks[ev.StkID]) == 0 {
 			// Make sure events don't refer to stacks that don't exist or to
 			// stacks with zero frames. Neither of these should be possible, but
 			// better be safe than sorry.

 			ev.StkID = 0
 		}

 	}

 	// TODO(mknyszek): restore stacks for EvGoStart events.
 	return nil
 }

 var errMalformedVarint = errors.New("malformatted base-128 varint")

 // readVal reads unsigned base-128 value from r.
 func (p *parser) readVal() (uint64, error) {
 	v, n := binary.Uvarint(p.data[p.off:])
 	if n <= 0 {
 		return 0, errMalformedVarint
 	}
 	p.off += n
 	return v, nil
 }

 func readValFrom(buf []byte) (v uint64, rem []byte, err error) {
 	v, n := binary.Uvarint(buf)
 	if n <= 0 {
 		return 0, nil, errMalformedVarint
 	}
 	return v, buf[n:], nil
 }

 func (ev *Event) String() string {
 	desc := &EventDescriptions[ev.Type]
 	w := new(bytes.Buffer)
 	fmt.Fprintf(w, "%d %s p=%d g=%d stk=%d", ev.Ts, desc.Name, ev.P, ev.G, ev.StkID)
 	for i, a := range desc.Args {
 		fmt.Fprintf(w, " %s=%d", a, ev.Args[i])
 	}
 	return w.String()
 }

 // argNum returns total number of args for the event accounting for timestamps,
 // sequence numbers and differences between trace format versions.
 func (raw *rawEvent) argNum() int {
 	desc := &EventDescriptions[raw.typ]
 	if raw.typ == EvStack {
 		return len(raw.args)
 	}
 	narg := len(desc.Args)
 	if desc.Stack {
 		narg++
 	}
 	switch raw.typ {
 	case EvBatch, EvFrequency, EvTimerGoroutine:
 		return narg
 	}
 	narg++ // timestamp
 	return narg
 }

 // Event types in the trace.
 // Verbatim copy from src/runtime/trace.go with the "trace" prefix removed.
 const (
 	EvNone              event.Type = 0  // unused
 	EvBatch             event.Type = 1  // start of per-P batch of events [pid, timestamp]
 	EvFrequency         event.Type = 2  // contains tracer timer frequency [frequency (ticks per second)]
 	EvStack             event.Type = 3  // stack [stack id, number of PCs, array of {PC, func string ID, file string ID, line}]
 	EvGomaxprocs        event.Type = 4  // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id]
 	EvProcStart         event.Type = 5  // start of P [timestamp, thread id]
 	EvProcStop          event.Type = 6  // stop of P [timestamp]
 	EvGCStart           event.Type = 7  // GC start [timestamp, seq, stack id]
 	EvGCDone            event.Type = 8  // GC done [timestamp]
 	EvSTWStart          event.Type = 9  // GC mark termination start [timestamp, kind]
 	EvSTWDone           event.Type = 10 // GC mark termination done [timestamp]
 	EvGCSweepStart      event.Type = 11 // GC sweep start [timestamp, stack id]
 	EvGCSweepDone       event.Type = 12 // GC sweep done [timestamp, swept, reclaimed]
 	EvGoCreate          event.Type = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
 	EvGoStart           event.Type = 14 // goroutine starts running [timestamp, goroutine id, seq]
 	EvGoEnd             event.Type = 15 // goroutine ends [timestamp]
 	EvGoStop            event.Type = 16 // goroutine stops (like in select{}) [timestamp, stack]
 	EvGoSched           event.Type = 17 // goroutine calls Gosched [timestamp, stack]
 	EvGoPreempt         event.Type = 18 // goroutine is preempted [timestamp, stack]
 	EvGoSleep           event.Type = 19 // goroutine calls Sleep [timestamp, stack]
 	EvGoBlock           event.Type = 20 // goroutine blocks [timestamp, stack]
 	EvGoUnblock         event.Type = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack]
 	EvGoBlockSend       event.Type = 22 // goroutine blocks on chan send [timestamp, stack]
 	EvGoBlockRecv       event.Type = 23 // goroutine blocks on chan recv [timestamp, stack]
 	EvGoBlockSelect     event.Type = 24 // goroutine blocks on select [timestamp, stack]
 	EvGoBlockSync       event.Type = 25 // goroutine blocks on Mutex/RWMutex [timestamp, stack]
 	EvGoBlockCond       event.Type = 26 // goroutine blocks on Cond [timestamp, stack]
 	EvGoBlockNet        event.Type = 27 // goroutine blocks on network [timestamp, stack]
 	EvGoSysCall         event.Type = 28 // syscall enter [timestamp, stack]
 	EvGoSysExit         event.Type = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp]
 	EvGoSysBlock        event.Type = 30 // syscall blocks [timestamp]
 	EvGoWaiting         event.Type = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id]
 	EvGoInSyscall       event.Type = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id]
 	EvHeapAlloc         event.Type = 33 // gcController.heapLive change [timestamp, heap live bytes]
 	EvHeapGoal          event.Type = 34 // gcController.heapGoal change [timestamp, heap goal bytes]
 	EvTimerGoroutine    event.Type = 35 // denotes timer goroutine [timer goroutine id]
 	EvFutileWakeup      event.Type = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp]
 	EvString            event.Type = 37 // string dictionary entry [ID, length, string]
 	EvGoStartLocal      event.Type = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id]
 	EvGoUnblockLocal    event.Type = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack]
 	EvGoSysExitLocal    event.Type = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp]
 	EvGoStartLabel      event.Type = 41 // goroutine starts running with label [timestamp, goroutine id, seq, label string id]
 	EvGoBlockGC         event.Type = 42 // goroutine blocks on GC assist [timestamp, stack]
 	EvGCMarkAssistStart event.Type = 43 // GC mark assist start [timestamp, stack]
 	EvGCMarkAssistDone  event.Type = 44 // GC mark assist done [timestamp]
 	EvUserTaskCreate    event.Type = 45 // trace.NewTask [timestamp, internal task id, internal parent id, stack, name string]
 	EvUserTaskEnd       event.Type = 46 // end of task [timestamp, internal task id, stack]
 	EvUserRegion        event.Type = 47 // trace.WithRegion [timestamp, internal task id, mode(0:start, 1:end), name string]
 	EvUserLog           event.Type = 48 // trace.Log [timestamp, internal id, key string id, stack, value string]
 	EvCPUSample         event.Type = 49 // CPU profiling sample [timestamp, stack, real timestamp, real P id (-1 when absent), goroutine id]
 	EvCount             event.Type = 50
 )

 var EventDescriptions = [256]struct {
 	Name       string
 	minVersion version.Version
 	Stack      bool
 	Args       []string
 	SArgs      []string // string arguments
 }{
 	EvNone:              {"None", 5, false, []string{}, nil},
 	EvBatch:             {"Batch", 5, false, []string{"p", "ticks"}, nil}, // in 1.5 format it was {"p", "seq", "ticks"}
 	EvFrequency:         {"Frequency", 5, false, []string{"freq"}, nil},   // in 1.5 format it was {"freq", "unused"}
 	EvStack:             {"Stack", 5, false, []string{"id", "siz"}, nil},
 	EvGomaxprocs:        {"Gomaxprocs", 5, true, []string{"procs"}, nil},
 	EvProcStart:         {"ProcStart", 5, false, []string{"thread"}, nil},
 	EvProcStop:          {"ProcStop", 5, false, []string{}, nil},
 	EvGCStart:           {"GCStart", 5, true, []string{"seq"}, nil}, // in 1.5 format it was {}
 	EvGCDone:            {"GCDone", 5, false, []string{}, nil},
 	EvSTWStart:          {"GCSTWStart", 5, false, []string{"kindid"}, []string{"kind"}}, // <= 1.9, args was {} (implicitly {0})
 	EvSTWDone:           {"GCSTWDone", 5, false, []string{}, nil},
 	EvGCSweepStart:      {"GCSweepStart", 5, true, []string{}, nil},
 	EvGCSweepDone:       {"GCSweepDone", 5, false, []string{"swept", "reclaimed"}, nil}, // before 1.9, format was {}
 	EvGoCreate:          {"GoCreate", 5, true, []string{"g", "stack"}, nil},
 	EvGoStart:           {"GoStart", 5, false, []string{"g", "seq"}, nil}, // in 1.5 format it was {"g"}
 	EvGoEnd:             {"GoEnd", 5, false, []string{}, nil},
 	EvGoStop:            {"GoStop", 5, true, []string{}, nil},
 	EvGoSched:           {"GoSched", 5, true, []string{}, nil},
 	EvGoPreempt:         {"GoPreempt", 5, true, []string{}, nil},
 	EvGoSleep:           {"GoSleep", 5, true, []string{}, nil},
 	EvGoBlock:           {"GoBlock", 5, true, []string{}, nil},
 	EvGoUnblock:         {"GoUnblock", 5, true, []string{"g", "seq"}, nil}, // in 1.5 format it was {"g"}
 	EvGoBlockSend:       {"GoBlockSend", 5, true, []string{}, nil},
 	EvGoBlockRecv:       {"GoBlockRecv", 5, true, []string{}, nil},
 	EvGoBlockSelect:     {"GoBlockSelect", 5, true, []string{}, nil},
 	EvGoBlockSync:       {"GoBlockSync", 5, true, []string{}, nil},
 	EvGoBlockCond:       {"GoBlockCond", 5, true, []string{}, nil},
 	EvGoBlockNet:        {"GoBlockNet", 5, true, []string{}, nil},
 	EvGoSysCall:         {"GoSysCall", 5, true, []string{}, nil},
 	EvGoSysExit:         {"GoSysExit", 5, false, []string{"g", "seq", "ts"}, nil},
 	EvGoSysBlock:        {"GoSysBlock", 5, false, []string{}, nil},
 	EvGoWaiting:         {"GoWaiting", 5, false, []string{"g"}, nil},
 	EvGoInSyscall:       {"GoInSyscall", 5, false, []string{"g"}, nil},
 	EvHeapAlloc:         {"HeapAlloc", 5, false, []string{"mem"}, nil},
 	EvHeapGoal:          {"HeapGoal", 5, false, []string{"mem"}, nil},
 	EvTimerGoroutine:    {"TimerGoroutine", 5, false, []string{"g"}, nil}, // in 1.5 format it was {"g", "unused"}
 	EvFutileWakeup:      {"FutileWakeup", 5, false, []string{}, nil},
 	EvString:            {"String", 7, false, []string{}, nil},
 	EvGoStartLocal:      {"GoStartLocal", 7, false, []string{"g"}, nil},
 	EvGoUnblockLocal:    {"GoUnblockLocal", 7, true, []string{"g"}, nil},
 	EvGoSysExitLocal:    {"GoSysExitLocal", 7, false, []string{"g", "ts"}, nil},
 	EvGoStartLabel:      {"GoStartLabel", 8, false, []string{"g", "seq", "labelid"}, []string{"label"}},
 	EvGoBlockGC:         {"GoBlockGC", 8, true, []string{}, nil},
 	EvGCMarkAssistStart: {"GCMarkAssistStart", 9, true, []string{}, nil},
 	EvGCMarkAssistDone:  {"GCMarkAssistDone", 9, false, []string{}, nil},
 	EvUserTaskCreate:    {"UserTaskCreate", 11, true, []string{"taskid", "pid", "typeid"}, []string{"name"}},
 	EvUserTaskEnd:       {"UserTaskEnd", 11, true, []string{"taskid"}, nil},
 	EvUserRegion:        {"UserRegion", 11, true, []string{"taskid", "mode", "typeid"}, []string{"name"}},
 	EvUserLog:           {"UserLog", 11, true, []string{"id", "keyid"}, []string{"category", "message"}},
 	EvCPUSample:         {"CPUSample", 19, true, []string{"ts", "p", "g"}, nil},
 }

 //gcassert:inline
 func (p *parser) allocateStack(size uint64) []uint64 {
 	if size == 0 {
 		return nil
 	}

 	// Stacks are plentiful but small. For our "Staticcheck on std" trace with
 	// 11e6 events, we have roughly 500,000 stacks, using 200 MiB of memory. To
 	// avoid making 500,000 small allocations we allocate backing arrays 1 MiB
 	// at a time.
 	out := p.stacksData
 	if uint64(len(out)) < size {
 		out = make([]uint64, 1024*128)
 	}
 	p.stacksData = out[size:]
 	return out[:size:size]
 }

 func (tr *Trace) STWReason(kindID uint64) STWReason {
 	if tr.Version < 21 {
 		if kindID == 0 || kindID == 1 {
 			return STWReason(kindID + 1)
 		} else {
 			return STWUnknown
 		}
 	} else if tr.Version == 21 {
 		if kindID < NumSTWReasons {
 			return STWReason(kindID)
 		} else {
 			return STWUnknown
 		}
 	} else {
 		return STWUnknown
 	}
 }

 type STWReason int

 const (
 	STWUnknown                 STWReason = 0
 	STWGCMarkTermination       STWReason = 1
 	STWGCSweepTermination      STWReason = 2
 	STWWriteHeapDump           STWReason = 3
 	STWGoroutineProfile        STWReason = 4
 	STWGoroutineProfileCleanup STWReason = 5
 	STWAllGoroutinesStackTrace STWReason = 6
 	STWReadMemStats            STWReason = 7
 	STWAllThreadsSyscall       STWReason = 8
 	STWGOMAXPROCS              STWReason = 9
 	STWStartTrace              STWReason = 10
 	STWStopTrace               STWReason = 11
 	STWCountPagesInUse         STWReason = 12
 	STWReadMetricsSlow         STWReason = 13
 	STWReadMemStatsSlow        STWReason = 14
 	STWPageCachePagesLeaked    STWReason = 15
 	STWResetDebugLog           STWReason = 16

 	NumSTWReasons = 17
 )