trace: regenerate trace from go@6133c1e

[git-generate]
cd trace
./gen.bash $HOME/work/go

Change-Id: I5ed79135627101fced5a2c78f2dd1d8504e19918
Reviewed-on: https://go-review.googlesource.com/c/exp/+/571616
Reviewed-by: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
diff --git a/trace/cmd/gotraceraw/main.go b/trace/cmd/gotraceraw/main.go
index 840548f..c544141 100644
--- a/trace/cmd/gotraceraw/main.go
+++ b/trace/cmd/gotraceraw/main.go
@@ -73,11 +73,9 @@
 		}
 		if err != nil {
 			log.Fatal(err)
-			break
 		}
 		if err := tw.WriteEvent(ev); err != nil {
 			log.Fatal(err)
-			break
 		}
 	}
 }
diff --git a/trace/generation.go b/trace/generation.go
index 28ec773..4ff535a 100644
--- a/trace/generation.go
+++ b/trace/generation.go
@@ -332,7 +332,7 @@
 // sample contained therein to the provided samples list.
 func addCPUSamples(samples []cpuSample, b batch) ([]cpuSample, error) {
 	if !b.isCPUSamplesBatch() {
-		return nil, fmt.Errorf("internal error: addStrings called on non-string batch")
+		return nil, fmt.Errorf("internal error: addCPUSamples called on non-CPU-sample batch")
 	}
 	r := bytes.NewReader(b.data)
 	hdr, err := r.ReadByte() // Consume the EvCPUSamples byte.
diff --git a/trace/internal/event/go122/event.go b/trace/internal/event/go122/event.go
index 9d0977e..a77fb8b 100644
--- a/trace/internal/event/go122/event.go
+++ b/trace/internal/event/go122/event.go
@@ -45,7 +45,7 @@
 	EvGoSyscallBegin      // syscall enter [timestamp, P seq, stack ID]
 	EvGoSyscallEnd        // syscall exit [timestamp]
 	EvGoSyscallEndBlocked // syscall exit and it blocked at some point [timestamp]
-	EvGoStatus            // goroutine status at the start of a generation [timestamp, goroutine ID, status]
+	EvGoStatus            // goroutine status at the start of a generation [timestamp, goroutine ID, thread ID, status]
 
 	// STW.
 	EvSTWBegin // STW start [timestamp, kind]
@@ -70,7 +70,7 @@
 	EvUserTaskEnd     // end of a task [timestamp, internal task ID, stack ID]
 	EvUserRegionBegin // trace.{Start,With}Region [timestamp, internal task ID, name string ID, stack ID]
 	EvUserRegionEnd   // trace.{End,With}Region [timestamp, internal task ID, name string ID, stack ID]
-	EvUserLog         // trace.Log [timestamp, internal task ID, key string ID, stack, value string ID]
+	EvUserLog         // trace.Log [timestamp, internal task ID, key string ID, value string ID, stack]
 )
 
 // EventString returns the name of a Go 1.22 event.
@@ -112,7 +112,7 @@
 	},
 	EvCPUSample: {
 		Name: "CPUSample",
-		Args: []string{"time", "p", "g", "m", "stack"},
+		Args: []string{"time", "m", "p", "g", "stack"},
 		// N.B. There's clearly a timestamp here, but these Events
 		// are special in that they don't appear in the regular
 		// M streams.
diff --git a/trace/internal/oldtrace/order.go b/trace/internal/oldtrace/order.go
new file mode 100644
index 0000000..78f08180
--- /dev/null
+++ b/trace/internal/oldtrace/order.go
@@ -0,0 +1,190 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Code generated by "gen.bash" from internal/trace/v2; DO NOT EDIT.
+
+//go:build go1.21
+
+package oldtrace
+
+import "errors"
+
+type orderEvent struct {
+	ev   Event
+	proc *proc
+}
+
+type gStatus int
+
+type gState struct {
+	seq    uint64
+	status gStatus
+}
+
+const (
+	gDead gStatus = iota
+	gRunnable
+	gRunning
+	gWaiting
+
+	unordered = ^uint64(0)
+	garbage   = ^uint64(0) - 1
+	noseq     = ^uint64(0)
+	seqinc    = ^uint64(0) - 1
+)
+
+// stateTransition returns goroutine state (sequence and status) when the event
+// becomes ready for merging (init) and the goroutine state after the event (next).
+func stateTransition(ev *Event) (g uint64, init, next gState) {
+	// Note that we have an explicit return in each case, as that produces slightly better code (tested on Go 1.19).
+
+	switch ev.Type {
+	case EvGoCreate:
+		g = ev.Args[0]
+		init = gState{0, gDead}
+		next = gState{1, gRunnable}
+		return
+	case EvGoWaiting, EvGoInSyscall:
+		g = ev.G
+		init = gState{1, gRunnable}
+		next = gState{2, gWaiting}
+		return
+	case EvGoStart, EvGoStartLabel:
+		g = ev.G
+		init = gState{ev.Args[1], gRunnable}
+		next = gState{ev.Args[1] + 1, gRunning}
+		return
+	case EvGoStartLocal:
+		// noseq means that this event is ready for merging as soon as
+		// frontier reaches it (EvGoStartLocal is emitted on the same P
+		// as the corresponding EvGoCreate/EvGoUnblock, and thus the latter
+		// is already merged).
+		// seqinc is a stub for cases when event increments g sequence,
+		// but since we don't know current seq we also don't know next seq.
+		g = ev.G
+		init = gState{noseq, gRunnable}
+		next = gState{seqinc, gRunning}
+		return
+	case EvGoBlock, EvGoBlockSend, EvGoBlockRecv, EvGoBlockSelect,
+		EvGoBlockSync, EvGoBlockCond, EvGoBlockNet, EvGoSleep,
+		EvGoSysBlock, EvGoBlockGC:
+		g = ev.G
+		init = gState{noseq, gRunning}
+		next = gState{noseq, gWaiting}
+		return
+	case EvGoSched, EvGoPreempt:
+		g = ev.G
+		init = gState{noseq, gRunning}
+		next = gState{noseq, gRunnable}
+		return
+	case EvGoUnblock, EvGoSysExit:
+		g = ev.Args[0]
+		init = gState{ev.Args[1], gWaiting}
+		next = gState{ev.Args[1] + 1, gRunnable}
+		return
+	case EvGoUnblockLocal, EvGoSysExitLocal:
+		g = ev.Args[0]
+		init = gState{noseq, gWaiting}
+		next = gState{seqinc, gRunnable}
+		return
+	case EvGCStart:
+		g = garbage
+		init = gState{ev.Args[0], gDead}
+		next = gState{ev.Args[0] + 1, gDead}
+		return
+	default:
+		// no ordering requirements
+		g = unordered
+		return
+	}
+}
+
+func transitionReady(g uint64, curr, init gState) bool {
+	return g == unordered || (init.seq == noseq || init.seq == curr.seq) && init.status == curr.status
+}
+
+func transition(gs map[uint64]gState, g uint64, init, next gState) error {
+	if g == unordered {
+		return nil
+	}
+	curr := gs[g]
+	if !transitionReady(g, curr, init) {
+		// See comment near the call to transition, where we're building the frontier, for details on how this could
+		// possibly happen.
+		return errors.New("encountered impossible goroutine state transition")
+	}
+	switch next.seq {
+	case noseq:
+		next.seq = curr.seq
+	case seqinc:
+		next.seq = curr.seq + 1
+	}
+	gs[g] = next
+	return nil
+}
+
+type orderEventList []orderEvent
+
+func (l *orderEventList) Less(i, j int) bool {
+	return (*l)[i].ev.Ts < (*l)[j].ev.Ts
+}
+
+type eventList []Event
+
+func (l *eventList) Len() int {
+	return len(*l)
+}
+
+func (l *eventList) Less(i, j int) bool {
+	return (*l)[i].Ts < (*l)[j].Ts
+}
+
+func (l *eventList) Swap(i, j int) {
+	(*l)[i], (*l)[j] = (*l)[j], (*l)[i]
+}
+
+func (h *orderEventList) Push(x orderEvent) {
+	*h = append(*h, x)
+	heapUp(h, len(*h)-1)
+}
+
+func (h *orderEventList) Pop() orderEvent {
+	n := len(*h) - 1
+	(*h)[0], (*h)[n] = (*h)[n], (*h)[0]
+	heapDown(h, 0, n)
+	x := (*h)[len(*h)-1]
+	*h = (*h)[:len(*h)-1]
+	return x
+}
+
+func heapUp(h *orderEventList, j int) {
+	for {
+		i := (j - 1) / 2 // parent
+		if i == j || !h.Less(j, i) {
+			break
+		}
+		(*h)[i], (*h)[j] = (*h)[j], (*h)[i]
+		j = i
+	}
+}
+
+func heapDown(h *orderEventList, i0, n int) bool {
+	i := i0
+	for {
+		j1 := 2*i + 1
+		if j1 >= n || j1 < 0 { // j1 < 0 after int overflow
+			break
+		}
+		j := j1 // left child
+		if j2 := j1 + 1; j2 < n && h.Less(j2, j1) {
+			j = j2 // = 2*i + 2  // right child
+		}
+		if !h.Less(j, i) {
+			break
+		}
+		(*h)[i], (*h)[j] = (*h)[j], (*h)[i]
+		i = j
+	}
+	return i > i0
+}
diff --git a/trace/internal/oldtrace/parser.go b/trace/internal/oldtrace/parser.go
new file mode 100644
index 0000000..8ab3773
--- /dev/null
+++ b/trace/internal/oldtrace/parser.go
@@ -0,0 +1,1544 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Code generated by "gen.bash" from internal/trace/v2; DO NOT EDIT.
+
+//go:build go1.21
+
+// Package oldtrace implements a parser for Go execution traces from versions
+// 1.11–1.21.
+//
+// The package started as a copy of Go 1.19's internal/trace, but has been
+// optimized to be faster while using less memory and fewer allocations. It has
+// been further modified for the specific purpose of converting traces to the
+// new 1.22+ format.
+package oldtrace
+
+import (
+	"bytes"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"golang.org/x/exp/trace/internal/event"
+	"golang.org/x/exp/trace/internal/version"
+	"io"
+	"math"
+	"sort"
+)
+
+// Timestamp represents a count of nanoseconds since the beginning of the trace.
+// They can only be meaningfully compared with other timestamps from the same
+// trace.
+type Timestamp int64
+
+// Event describes one event in the trace.
+type Event struct {
+	// The Event type is carefully laid out to optimize its size and to avoid
+	// pointers, the latter so that the garbage collector won't have to scan any
+	// memory of our millions of events.
+
+	Ts    Timestamp  // timestamp in nanoseconds
+	G     uint64     // G on which the event happened
+	Args  [4]uint64  // event-type-specific arguments
+	StkID uint32     // unique stack ID
+	P     int32      // P on which the event happened (can be a real P or one of TimerP, NetpollP, SyscallP)
+	Type  event.Type // one of Ev*
+}
+
+// Frame is a frame in stack traces.
+type Frame struct {
+	PC uint64
+	// string ID of the function name
+	Fn uint64
+	// string ID of the file name
+	File uint64
+	Line int
+}
+
+const (
+	// Special P identifiers:
+	FakeP    = 1000000 + iota
+	TimerP   // contains timer unblocks
+	NetpollP // contains network unblocks
+	SyscallP // contains returns from syscalls
+	GCP      // contains GC state
+	ProfileP // contains recording of CPU profile samples
+)
+
+// Trace is the result of Parse.
+type Trace struct {
+	Version version.Version
+
+	// Events is the sorted list of Events in the trace.
+	Events Events
+	// Stacks is the stack traces (stored as slices of PCs), keyed by stack IDs
+	// from the trace.
+	Stacks        map[uint32][]uint64
+	PCs           map[uint64]Frame
+	Strings       map[uint64]string
+	InlineStrings []string
+}
+
+// batchOffset records the byte offset of, and number of events in, a batch. A
+// batch is a sequence of events emitted by a P. Events within a single batch
+// are sorted by time.
+type batchOffset struct {
+	offset    int
+	numEvents int
+}
+
+type parser struct {
+	ver  version.Version
+	data []byte
+	off  int
+
+	strings              map[uint64]string
+	inlineStrings        []string
+	inlineStringsMapping map[string]int
+	// map from Ps to their batch offsets
+	batchOffsets map[int32][]batchOffset
+	stacks       map[uint32][]uint64
+	stacksData   []uint64
+	ticksPerSec  int64
+	pcs          map[uint64]Frame
+	cpuSamples   []Event
+	timerGoids   map[uint64]bool
+
+	// state for readRawEvent
+	args []uint64
+
+	// state for parseEvent
+	lastTs Timestamp
+	lastG  uint64
+	// map from Ps to the last Gs that ran on them
+	lastGs map[int32]uint64
+	lastP  int32
+}
+
+func (p *parser) discard(n uint64) bool {
+	if n > math.MaxInt {
+		return false
+	}
+	if noff := p.off + int(n); noff < p.off || noff > len(p.data) {
+		return false
+	} else {
+		p.off = noff
+	}
+	return true
+}
+
+func newParser(r io.Reader, ver version.Version) (*parser, error) {
+	var buf []byte
+	if seeker, ok := r.(io.Seeker); ok {
+		// Determine the size of the reader so that we can allocate a buffer
+		// without having to grow it later.
+		cur, err := seeker.Seek(0, io.SeekCurrent)
+		if err != nil {
+			return nil, err
+		}
+		end, err := seeker.Seek(0, io.SeekEnd)
+		if err != nil {
+			return nil, err
+		}
+		_, err = seeker.Seek(cur, io.SeekStart)
+		if err != nil {
+			return nil, err
+		}
+
+		buf = make([]byte, end-cur)
+		_, err = io.ReadFull(r, buf)
+		if err != nil {
+			return nil, err
+		}
+	} else {
+		var err error
+		buf, err = io.ReadAll(r)
+		if err != nil {
+			return nil, err
+		}
+	}
+	return &parser{data: buf, ver: ver, timerGoids: make(map[uint64]bool)}, nil
+}
+
+// Parse parses Go execution traces from versions 1.11–1.21. The provided reader
+// will be read to completion and the entire trace will be materialized in
+// memory. That is, this function does not allow incremental parsing.
+//
+// The reader has to be positioned just after the trace header and vers needs to
+// be the version of the trace. This can be achieved by using
+// version.ReadHeader.
+func Parse(r io.Reader, vers version.Version) (Trace, error) {
+	// We accept the version as an argument because golang.org/x/exp/trace will have
+	// already read the version to determine which parser to use.
+	p, err := newParser(r, vers)
+	if err != nil {
+		return Trace{}, err
+	}
+	return p.parse()
+}
+
+// parse parses, post-processes and verifies the trace.
+func (p *parser) parse() (Trace, error) {
+	defer func() {
+		p.data = nil
+	}()
+
+	// We parse a trace by running the following steps in order:
+	//
+	// 1. In the initial pass we collect information about batches (their
+	//    locations and sizes.) We also parse CPU profiling samples in this
+	//    step, simply to reduce the number of full passes that we need.
+	//
+	// 2. In the second pass we parse batches and merge them into a globally
+	//    ordered event stream. This uses the batch information from the first
+	//    pass to quickly find batches.
+	//
+	// 3. After all events have been parsed we convert their timestamps from CPU
+	//    ticks to wall time. Furthermore we move timers and syscalls to
+	//    dedicated, fake Ps.
+	//
+	// 4. Finally, we validate the trace.
+
+	p.strings = make(map[uint64]string)
+	p.batchOffsets = make(map[int32][]batchOffset)
+	p.lastGs = make(map[int32]uint64)
+	p.stacks = make(map[uint32][]uint64)
+	p.pcs = make(map[uint64]Frame)
+	p.inlineStringsMapping = make(map[string]int)
+
+	if err := p.collectBatchesAndCPUSamples(); err != nil {
+		return Trace{}, err
+	}
+
+	events, err := p.parseEventBatches()
+	if err != nil {
+		return Trace{}, err
+	}
+
+	if p.ticksPerSec == 0 {
+		return Trace{}, errors.New("no EvFrequency event")
+	}
+
+	if events.Len() > 0 {
+		// Translate cpu ticks to real time.
+		minTs := events.Ptr(0).Ts
+		// Use floating point to avoid integer overflows.
+		freq := 1e9 / float64(p.ticksPerSec)
+		for i := 0; i < events.Len(); i++ {
+			ev := events.Ptr(i)
+			ev.Ts = Timestamp(float64(ev.Ts-minTs) * freq)
+			// Move timers and syscalls to separate fake Ps.
+			if p.timerGoids[ev.G] && ev.Type == EvGoUnblock {
+				ev.P = TimerP
+			}
+			if ev.Type == EvGoSysExit {
+				ev.P = SyscallP
+			}
+		}
+	}
+
+	if err := p.postProcessTrace(events); err != nil {
+		return Trace{}, err
+	}
+
+	res := Trace{
+		Version:       p.ver,
+		Events:        events,
+		Stacks:        p.stacks,
+		Strings:       p.strings,
+		InlineStrings: p.inlineStrings,
+		PCs:           p.pcs,
+	}
+	return res, nil
+}
+
+// rawEvent is a helper type used during parsing.
+type rawEvent struct {
+	typ   event.Type
+	args  []uint64
+	sargs []string
+
+	// if typ == EvBatch, these fields describe the batch.
+	batchPid    int32
+	batchOffset int
+}
+
+type proc struct {
+	pid int32
+	// the remaining events in the current batch
+	events []Event
+	// buffer for reading batches into, aliased by proc.events
+	buf []Event
+
+	// there are no more batches left
+	done bool
+}
+
+const eventsBucketSize = 524288 // 32 MiB of events
+
+type Events struct {
+	// Events is a slice of slices that grows one slice of size eventsBucketSize
+	// at a time. This avoids the O(n) cost of slice growth in append, and
+	// additionally allows consumers to drop references to parts of the data,
+	// freeing memory piecewise.
+	n       int
+	buckets []*[eventsBucketSize]Event
+	off     int
+}
+
+// grow grows the slice by one and returns a pointer to the new element, without
+// overwriting it.
+func (l *Events) grow() *Event {
+	a, b := l.index(l.n)
+	if a >= len(l.buckets) {
+		l.buckets = append(l.buckets, new([eventsBucketSize]Event))
+	}
+	ptr := &l.buckets[a][b]
+	l.n++
+	return ptr
+}
+
+// append appends v to the slice and returns a pointer to the new element.
+func (l *Events) append(v Event) *Event {
+	ptr := l.grow()
+	*ptr = v
+	return ptr
+}
+
+func (l *Events) Ptr(i int) *Event {
+	a, b := l.index(i + l.off)
+	return &l.buckets[a][b]
+}
+
+func (l *Events) index(i int) (int, int) {
+	// Doing the division on uint instead of int compiles this function to a
+	// shift and an AND (for power of 2 bucket sizes), versus a whole bunch of
+	// instructions for int.
+	return int(uint(i) / eventsBucketSize), int(uint(i) % eventsBucketSize)
+}
+
+func (l *Events) Len() int {
+	return l.n - l.off
+}
+
+func (l *Events) Less(i, j int) bool {
+	return l.Ptr(i).Ts < l.Ptr(j).Ts
+}
+
+func (l *Events) Swap(i, j int) {
+	*l.Ptr(i), *l.Ptr(j) = *l.Ptr(j), *l.Ptr(i)
+}
+
+func (l *Events) Pop() (*Event, bool) {
+	if l.off == l.n {
+		return nil, false
+	}
+	a, b := l.index(l.off)
+	ptr := &l.buckets[a][b]
+	l.off++
+	if b == eventsBucketSize-1 || l.off == l.n {
+		// We've consumed the last event from the bucket, so drop the bucket and
+		// allow GC to collect it.
+		l.buckets[a] = nil
+	}
+	return ptr, true
+}
+
+func (l *Events) All() func(yield func(ev *Event) bool) {
+	return func(yield func(ev *Event) bool) {
+		for i := 0; i < l.Len(); i++ {
+			a, b := l.index(i + l.off)
+			ptr := &l.buckets[a][b]
+			if !yield(ptr) {
+				return
+			}
+		}
+	}
+}
+
+// parseEventBatches reads per-P event batches and merges them into a single, consistent
+// stream. The high level idea is as follows. Events within an individual batch
+// are in correct order, because they are emitted by a single P. So we need to
+// produce a correct interleaving of the batches. To do this we take first
+// unmerged event from each batch (frontier). Then choose subset that is "ready"
+// to be merged, that is, events for which all dependencies are already merged.
+// Then we choose event with the lowest timestamp from the subset, merge it and
+// repeat. This approach ensures that we form a consistent stream even if
+// timestamps are incorrect (condition observed on some machines).
+func (p *parser) parseEventBatches() (Events, error) {
+	// The ordering of CPU profile sample events in the data stream is based on
+	// when each run of the signal handler was able to acquire the spinlock,
+	// with original timestamps corresponding to when ReadTrace pulled the data
+	// off of the profBuf queue. Re-sort them by the timestamp we captured
+	// inside the signal handler.
+	sort.Sort((*eventList)(&p.cpuSamples))
+
+	allProcs := make([]proc, 0, len(p.batchOffsets))
+	for pid := range p.batchOffsets {
+		allProcs = append(allProcs, proc{pid: pid})
+	}
+	allProcs = append(allProcs, proc{pid: ProfileP, events: p.cpuSamples})
+
+	events := Events{}
+
+	// Merge events as long as at least one P has more events
+	gs := make(map[uint64]gState)
+	// Note: technically we don't need a priority queue here. We're only ever
+	// interested in the earliest elligible event, which means we just have to
+	// track the smallest element. However, in practice, the priority queue
+	// performs better, because for each event we only have to compute its state
+	// transition once, not on each iteration. If it was elligible before, it'll
+	// already be in the queue. Furthermore, on average, we only have one P to
+	// look at in each iteration, because all other Ps are already in the queue.
+	var frontier orderEventList
+
+	availableProcs := make([]*proc, len(allProcs))
+	for i := range allProcs {
+		availableProcs[i] = &allProcs[i]
+	}
+	for {
+	pidLoop:
+		for i := 0; i < len(availableProcs); i++ {
+			proc := availableProcs[i]
+
+			for len(proc.events) == 0 {
+				// Call loadBatch in a loop because sometimes batches are empty
+				evs, err := p.loadBatch(proc.pid, proc.buf[:0])
+				proc.buf = evs[:0]
+				if err == io.EOF {
+					// This P has no more events
+					proc.done = true
+					availableProcs[i], availableProcs[len(availableProcs)-1] = availableProcs[len(availableProcs)-1], availableProcs[i]
+					availableProcs = availableProcs[:len(availableProcs)-1]
+					// We swapped the element at i with another proc, so look at
+					// the index again
+					i--
+					continue pidLoop
+				} else if err != nil {
+					return Events{}, err
+				} else {
+					proc.events = evs
+				}
+			}
+
+			ev := &proc.events[0]
+			g, init, _ := stateTransition(ev)
+
+			// TODO(dh): This implementation matches the behavior of the
+			// upstream 'go tool trace', and works in practice, but has run into
+			// the following inconsistency during fuzzing: what happens if
+			// multiple Ps have events for the same G? While building the
+			// frontier we will check all of the events against the current
+			// state of the G. However, when we process the frontier, the state
+			// of the G changes, and a transition that was valid while building
+			// the frontier may no longer be valid when processing the frontier.
+			// Is this something that can happen for real, valid traces, or is
+			// this only possible with corrupt data?
+			if !transitionReady(g, gs[g], init) {
+				continue
+			}
+			proc.events = proc.events[1:]
+			availableProcs[i], availableProcs[len(availableProcs)-1] = availableProcs[len(availableProcs)-1], availableProcs[i]
+			availableProcs = availableProcs[:len(availableProcs)-1]
+			frontier.Push(orderEvent{*ev, proc})
+
+			// We swapped the element at i with another proc, so look at the
+			// index again
+			i--
+		}
+
+		if len(frontier) == 0 {
+			for i := range allProcs {
+				if !allProcs[i].done {
+					return Events{}, fmt.Errorf("no consistent ordering of events possible")
+				}
+			}
+			break
+		}
+		f := frontier.Pop()
+
+		// We're computing the state transition twice, once when computing the
+		// frontier, and now to apply the transition. This is fine because
+		// stateTransition is a pure function. Computing it again is cheaper
+		// than storing large items in the frontier.
+		g, init, next := stateTransition(&f.ev)
+
+		// Get rid of "Local" events, they are intended merely for ordering.
+		switch f.ev.Type {
+		case EvGoStartLocal:
+			f.ev.Type = EvGoStart
+		case EvGoUnblockLocal:
+			f.ev.Type = EvGoUnblock
+		case EvGoSysExitLocal:
+			f.ev.Type = EvGoSysExit
+		}
+		events.append(f.ev)
+
+		if err := transition(gs, g, init, next); err != nil {
+			return Events{}, err
+		}
+		availableProcs = append(availableProcs, f.proc)
+	}
+
+	// At this point we have a consistent stream of events. Make sure time
+	// stamps respect the ordering. The tests will skip (not fail) the test case
+	// if they see this error.
+	if !sort.IsSorted(&events) {
+		return Events{}, ErrTimeOrder
+	}
+
+	// The last part is giving correct timestamps to EvGoSysExit events. The
+	// problem with EvGoSysExit is that actual syscall exit timestamp
+	// (ev.Args[2]) is potentially acquired long before event emission. So far
+	// we've used timestamp of event emission (ev.Ts). We could not set ev.Ts =
+	// ev.Args[2] earlier, because it would produce seemingly broken timestamps
+	// (misplaced event). We also can't simply update the timestamp and resort
+	// events, because if timestamps are broken we will misplace the event and
+	// later report logically broken trace (instead of reporting broken
+	// timestamps).
+	lastSysBlock := make(map[uint64]Timestamp)
+	for i := 0; i < events.Len(); i++ {
+		ev := events.Ptr(i)
+		switch ev.Type {
+		case EvGoSysBlock, EvGoInSyscall:
+			lastSysBlock[ev.G] = ev.Ts
+		case EvGoSysExit:
+			ts := Timestamp(ev.Args[2])
+			if ts == 0 {
+				continue
+			}
+			block := lastSysBlock[ev.G]
+			if block == 0 {
+				return Events{}, fmt.Errorf("stray syscall exit")
+			}
+			if ts < block {
+				return Events{}, ErrTimeOrder
+			}
+			ev.Ts = ts
+		}
+	}
+	sort.Stable(&events)
+
+	return events, nil
+}
+
+// collectBatchesAndCPUSamples records the offsets of batches and parses CPU samples.
+func (p *parser) collectBatchesAndCPUSamples() error {
+	// Read events.
+	var raw rawEvent
+	var curP int32
+	for n := uint64(0); ; n++ {
+		err := p.readRawEvent(skipArgs|skipStrings, &raw)
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			return err
+		}
+		if raw.typ == EvNone {
+			continue
+		}
+
+		if raw.typ == EvBatch {
+			bo := batchOffset{offset: raw.batchOffset}
+			p.batchOffsets[raw.batchPid] = append(p.batchOffsets[raw.batchPid], bo)
+			curP = raw.batchPid
+		}
+
+		batches := p.batchOffsets[curP]
+		if len(batches) == 0 {
+			return fmt.Errorf("read event %d with current P of %d, but P has no batches yet",
+				raw.typ, curP)
+		}
+		batches[len(batches)-1].numEvents++
+
+		if raw.typ == EvCPUSample {
+			e := Event{Type: raw.typ}
+
+			argOffset := 1
+			narg := raw.argNum()
+			if len(raw.args) != narg {
+				return fmt.Errorf("CPU sample has wrong number of arguments: want %d, got %d", narg, len(raw.args))
+			}
+			for i := argOffset; i < narg; i++ {
+				if i == narg-1 {
+					e.StkID = uint32(raw.args[i])
+				} else {
+					e.Args[i-argOffset] = raw.args[i]
+				}
+			}
+
+			e.Ts = Timestamp(e.Args[0])
+			e.P = int32(e.Args[1])
+			e.G = e.Args[2]
+			e.Args[0] = 0
+
+			// Most events are written out by the active P at the exact moment
+			// they describe. CPU profile samples are different because they're
+			// written to the tracing log after some delay, by a separate worker
+			// goroutine, into a separate buffer.
+			//
+			// We keep these in their own batch until all of the batches are
+			// merged in timestamp order. We also (right before the merge)
+			// re-sort these events by the timestamp captured in the profiling
+			// signal handler.
+			//
+			// Note that we're not concerned about the memory usage of storing
+			// all CPU samples during the indexing phase. There are orders of
+			// magnitude fewer CPU samples than runtime events.
+			p.cpuSamples = append(p.cpuSamples, e)
+		}
+	}
+
+	return nil
+}
+
+const (
+	skipArgs = 1 << iota
+	skipStrings
+)
+
+func (p *parser) readByte() (byte, bool) {
+	if p.off < len(p.data) && p.off >= 0 {
+		b := p.data[p.off]
+		p.off++
+		return b, true
+	} else {
+		return 0, false
+	}
+}
+
+func (p *parser) readFull(n int) ([]byte, error) {
+	if p.off >= len(p.data) || p.off < 0 || p.off+n > len(p.data) {
+		// p.off < 0 is impossible but makes BCE happy.
+		//
+		// We do fail outright if there's not enough data, we don't care about
+		// partial results.
+		return nil, io.ErrUnexpectedEOF
+	}
+	buf := p.data[p.off : p.off+n]
+	p.off += n
+	return buf, nil
+}
+
+// readRawEvent reads a raw event into ev. The slices in ev are only valid until
+// the next call to readRawEvent, even when storing to a different location.
+func (p *parser) readRawEvent(flags uint, ev *rawEvent) error {
+	// The number of arguments is encoded using two bits and can thus only
+	// represent the values 0–3. The value 3 (on the wire) indicates that
+	// arguments are prefixed by their byte length, to encode >=3 arguments.
+	const inlineArgs = 3
+
+	// Read event type and number of arguments (1 byte).
+	b, ok := p.readByte()
+	if !ok {
+		return io.EOF
+	}
+	typ := event.Type(b << 2 >> 2)
+	// Most events have a timestamp before the actual arguments, so we add 1 and
+	// parse it like it's the first argument. EvString has a special format and
+	// the number of arguments doesn't matter. EvBatch writes '1' as the number
+	// of arguments, but actually has two: a pid and a timestamp, but here the
+	// timestamp is the second argument, not the first; adding 1 happens to come
+	// up with the correct number, but it doesn't matter, because EvBatch has
+	// custom logic for parsing.
+	//
+	// Note that because we're adding 1, inlineArgs == 3 describes the largest
+	// number of logical arguments that isn't length-prefixed, even though the
+	// value 3 on the wire indicates length-prefixing. For us, that becomes narg
+	// == 4.
+	narg := b>>6 + 1
+	if typ == EvNone || typ >= EvCount || EventDescriptions[typ].minVersion > p.ver {
+		return fmt.Errorf("unknown event type %d", typ)
+	}
+
+	switch typ {
+	case EvString:
+		if flags&skipStrings != 0 {
+			// String dictionary entry [ID, length, string].
+			if _, err := p.readVal(); err != nil {
+				return errMalformedVarint
+			}
+			ln, err := p.readVal()
+			if err != nil {
+				return err
+			}
+			if !p.discard(ln) {
+				return fmt.Errorf("failed to read trace: %w", io.EOF)
+			}
+		} else {
+			// String dictionary entry [ID, length, string].
+			id, err := p.readVal()
+			if err != nil {
+				return err
+			}
+			if id == 0 {
+				return errors.New("string has invalid id 0")
+			}
+			if p.strings[id] != "" {
+				return fmt.Errorf("string has duplicate id %d", id)
+			}
+			var ln uint64
+			ln, err = p.readVal()
+			if err != nil {
+				return err
+			}
+			if ln == 0 {
+				return errors.New("string has invalid length 0")
+			}
+			if ln > 1e6 {
+				return fmt.Errorf("string has too large length %d", ln)
+			}
+			buf, err := p.readFull(int(ln))
+			if err != nil {
+				return fmt.Errorf("failed to read trace: %w", err)
+			}
+			p.strings[id] = string(buf)
+		}
+
+		ev.typ = EvNone
+		return nil
+	case EvBatch:
+		if want := byte(2); narg != want {
+			return fmt.Errorf("EvBatch has wrong number of arguments: got %d, want %d", narg, want)
+		}
+
+		// -1 because we've already read the first byte of the batch
+		off := p.off - 1
+
+		pid, err := p.readVal()
+		if err != nil {
+			return err
+		}
+		if pid != math.MaxUint64 && pid > math.MaxInt32 {
+			return fmt.Errorf("processor ID %d is larger than maximum of %d", pid, uint64(math.MaxUint))
+		}
+
+		var pid32 int32
+		if pid == math.MaxUint64 {
+			pid32 = -1
+		} else {
+			pid32 = int32(pid)
+		}
+
+		v, err := p.readVal()
+		if err != nil {
+			return err
+		}
+
+		*ev = rawEvent{
+			typ:         EvBatch,
+			args:        p.args[:0],
+			batchPid:    pid32,
+			batchOffset: off,
+		}
+		ev.args = append(ev.args, pid, v)
+		return nil
+	default:
+		*ev = rawEvent{typ: typ, args: p.args[:0]}
+		if narg <= inlineArgs {
+			if flags&skipArgs == 0 {
+				for i := 0; i < int(narg); i++ {
+					v, err := p.readVal()
+					if err != nil {
+						return fmt.Errorf("failed to read event %d argument: %w", typ, err)
+					}
+					ev.args = append(ev.args, v)
+				}
+			} else {
+				for i := 0; i < int(narg); i++ {
+					if _, err := p.readVal(); err != nil {
+						return fmt.Errorf("failed to read event %d argument: %w", typ, errMalformedVarint)
+					}
+				}
+			}
+		} else {
+			// More than inlineArgs args, the first value is length of the event
+			// in bytes.
+			v, err := p.readVal()
+			if err != nil {
+				return fmt.Errorf("failed to read event %d argument: %w", typ, err)
+			}
+
+			if limit := uint64(2048); v > limit {
+				// At the time of Go 1.19, v seems to be at most 128. Set 2048
+				// as a generous upper limit and guard against malformed traces.
+				return fmt.Errorf("failed to read event %d argument: length-prefixed argument too big: %d bytes, limit is %d", typ, v, limit)
+			}
+
+			if flags&skipArgs == 0 || typ == EvCPUSample {
+				buf, err := p.readFull(int(v))
+				if err != nil {
+					return fmt.Errorf("failed to read trace: %w", err)
+				}
+				for len(buf) > 0 {
+					var v uint64
+					v, buf, err = readValFrom(buf)
+					if err != nil {
+						return err
+					}
+					ev.args = append(ev.args, v)
+				}
+			} else {
+				// Skip over arguments
+				if !p.discard(v) {
+					return fmt.Errorf("failed to read trace: %w", io.EOF)
+				}
+			}
+			if typ == EvUserLog {
+				// EvUserLog records are followed by a value string
+				if flags&skipArgs == 0 {
+					// Read string
+					s, err := p.readStr()
+					if err != nil {
+						return err
+					}
+					ev.sargs = append(ev.sargs, s)
+				} else {
+					// Skip string
+					v, err := p.readVal()
+					if err != nil {
+						return err
+					}
+					if !p.discard(v) {
+						return io.EOF
+					}
+				}
+			}
+		}
+
+		p.args = ev.args[:0]
+		return nil
+	}
+}
+
+// loadBatch loads the next batch for pid and appends its contents to to events.
+func (p *parser) loadBatch(pid int32, events []Event) ([]Event, error) {
+	offsets := p.batchOffsets[pid]
+	if len(offsets) == 0 {
+		return nil, io.EOF
+	}
+	n := offsets[0].numEvents
+	offset := offsets[0].offset
+	offsets = offsets[1:]
+	p.batchOffsets[pid] = offsets
+
+	p.off = offset
+
+	if cap(events) < n {
+		events = make([]Event, 0, n)
+	}
+
+	gotHeader := false
+	var raw rawEvent
+	var ev Event
+	for {
+		err := p.readRawEvent(0, &raw)
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			return nil, err
+		}
+		if raw.typ == EvNone || raw.typ == EvCPUSample {
+			continue
+		}
+		if raw.typ == EvBatch {
+			if gotHeader {
+				break
+			} else {
+				gotHeader = true
+			}
+		}
+
+		err = p.parseEvent(&raw, &ev)
+		if err != nil {
+			return nil, err
+		}
+		if ev.Type != EvNone {
+			events = append(events, ev)
+		}
+	}
+
+	return events, nil
+}
+
+func (p *parser) readStr() (s string, err error) {
+	sz, err := p.readVal()
+	if err != nil {
+		return "", err
+	}
+	if sz == 0 {
+		return "", nil
+	}
+	if sz > 1e6 {
+		return "", fmt.Errorf("string is too large (len=%d)", sz)
+	}
+	buf, err := p.readFull(int(sz))
+	if err != nil {
+		return "", fmt.Errorf("failed to read trace: %w", err)
+	}
+	return string(buf), nil
+}
+
+// parseEvent transforms raw events into events.
+// It does analyze and verify per-event-type arguments.
+func (p *parser) parseEvent(raw *rawEvent, ev *Event) error {
+	desc := &EventDescriptions[raw.typ]
+	if desc.Name == "" {
+		return fmt.Errorf("missing description for event type %d", raw.typ)
+	}
+	narg := raw.argNum()
+	if len(raw.args) != narg {
+		return fmt.Errorf("%s has wrong number of arguments: want %d, got %d", desc.Name, narg, len(raw.args))
+	}
+	switch raw.typ {
+	case EvBatch:
+		p.lastGs[p.lastP] = p.lastG
+		if raw.args[0] != math.MaxUint64 && raw.args[0] > math.MaxInt32 {
+			return fmt.Errorf("processor ID %d is larger than maximum of %d", raw.args[0], uint64(math.MaxInt32))
+		}
+		if raw.args[0] == math.MaxUint64 {
+			p.lastP = -1
+		} else {
+			p.lastP = int32(raw.args[0])
+		}
+		p.lastG = p.lastGs[p.lastP]
+		p.lastTs = Timestamp(raw.args[1])
+	case EvFrequency:
+		p.ticksPerSec = int64(raw.args[0])
+		if p.ticksPerSec <= 0 {
+			// The most likely cause for this is tick skew on different CPUs.
+			// For example, solaris/amd64 seems to have wildly different
+			// ticks on different CPUs.
+			return ErrTimeOrder
+		}
+	case EvTimerGoroutine:
+		p.timerGoids[raw.args[0]] = true
+	case EvStack:
+		if len(raw.args) < 2 {
+			return fmt.Errorf("EvStack has wrong number of arguments: want at least 2, got %d", len(raw.args))
+		}
+		size := raw.args[1]
+		if size > 1000 {
+			return fmt.Errorf("EvStack has bad number of frames: %d", size)
+		}
+		want := 2 + 4*size
+		if uint64(len(raw.args)) != want {
+			return fmt.Errorf("EvStack has wrong number of arguments: want %d, got %d", want, len(raw.args))
+		}
+		id := uint32(raw.args[0])
+		if id != 0 && size > 0 {
+			stk := p.allocateStack(size)
+			for i := 0; i < int(size); i++ {
+				pc := raw.args[2+i*4+0]
+				fn := raw.args[2+i*4+1]
+				file := raw.args[2+i*4+2]
+				line := raw.args[2+i*4+3]
+				stk[i] = pc
+
+				if _, ok := p.pcs[pc]; !ok {
+					p.pcs[pc] = Frame{PC: pc, Fn: fn, File: file, Line: int(line)}
+				}
+			}
+			p.stacks[id] = stk
+		}
+	case EvCPUSample:
+		// These events get parsed during the indexing step and don't strictly
+		// belong to the batch.
+	default:
+		*ev = Event{Type: raw.typ, P: p.lastP, G: p.lastG}
+		var argOffset int
+		ev.Ts = p.lastTs + Timestamp(raw.args[0])
+		argOffset = 1
+		p.lastTs = ev.Ts
+		for i := argOffset; i < narg; i++ {
+			if i == narg-1 && desc.Stack {
+				ev.StkID = uint32(raw.args[i])
+			} else {
+				ev.Args[i-argOffset] = raw.args[i]
+			}
+		}
+		switch raw.typ {
+		case EvGoStart, EvGoStartLocal, EvGoStartLabel:
+			p.lastG = ev.Args[0]
+			ev.G = p.lastG
+		case EvGoEnd, EvGoStop, EvGoSched, EvGoPreempt,
+			EvGoSleep, EvGoBlock, EvGoBlockSend, EvGoBlockRecv,
+			EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet,
+			EvGoSysBlock, EvGoBlockGC:
+			p.lastG = 0
+		case EvGoSysExit, EvGoWaiting, EvGoInSyscall:
+			ev.G = ev.Args[0]
+		case EvUserTaskCreate:
+			// e.Args 0: taskID, 1:parentID, 2:nameID
+		case EvUserRegion:
+			// e.Args 0: taskID, 1: mode, 2:nameID
+		case EvUserLog:
+			// e.Args 0: taskID, 1:keyID, 2: stackID, 3: messageID
+			// raw.sargs 0: message
+
+			if id, ok := p.inlineStringsMapping[raw.sargs[0]]; ok {
+				ev.Args[3] = uint64(id)
+			} else {
+				id := len(p.inlineStrings)
+				p.inlineStringsMapping[raw.sargs[0]] = id
+				p.inlineStrings = append(p.inlineStrings, raw.sargs[0])
+				ev.Args[3] = uint64(id)
+			}
+		}
+
+		return nil
+	}
+
+	ev.Type = EvNone
+	return nil
+}
+
+// ErrTimeOrder is returned by Parse when the trace contains
+// time stamps that do not respect actual event ordering.
+var ErrTimeOrder = errors.New("time stamps out of order")
+
+// postProcessTrace does inter-event verification and information restoration.
+// The resulting trace is guaranteed to be consistent
+// (for example, a P does not run two Gs at the same time, or a G is indeed
+// blocked before an unblock event).
+func (p *parser) postProcessTrace(events Events) error {
+	const (
+		gDead = iota
+		gRunnable
+		gRunning
+		gWaiting
+	)
+	type gdesc struct {
+		state        int
+		ev           *Event
+		evStart      *Event
+		evCreate     *Event
+		evMarkAssist *Event
+	}
+	type pdesc struct {
+		running bool
+		g       uint64
+		evSweep *Event
+	}
+
+	gs := make(map[uint64]gdesc)
+	ps := make(map[int32]pdesc)
+	tasks := make(map[uint64]*Event)           // task id to task creation events
+	activeRegions := make(map[uint64][]*Event) // goroutine id to stack of regions
+	gs[0] = gdesc{state: gRunning}
+	var evGC, evSTW *Event
+
+	checkRunning := func(p pdesc, g gdesc, ev *Event, allowG0 bool) error {
+		name := EventDescriptions[ev.Type].Name
+		if g.state != gRunning {
+			return fmt.Errorf("g %d is not running while %s (time %d)", ev.G, name, ev.Ts)
+		}
+		if p.g != ev.G {
+			return fmt.Errorf("p %d is not running g %d while %s (time %d)", ev.P, ev.G, name, ev.Ts)
+		}
+		if !allowG0 && ev.G == 0 {
+			return fmt.Errorf("g 0 did %s (time %d)", name, ev.Ts)
+		}
+		return nil
+	}
+
+	for evIdx := 0; evIdx < events.Len(); evIdx++ {
+		ev := events.Ptr(evIdx)
+
+		switch ev.Type {
+		case EvProcStart:
+			p := ps[ev.P]
+			if p.running {
+				return fmt.Errorf("p %d is running before start (time %d)", ev.P, ev.Ts)
+			}
+			p.running = true
+
+			ps[ev.P] = p
+		case EvProcStop:
+			p := ps[ev.P]
+			if !p.running {
+				return fmt.Errorf("p %d is not running before stop (time %d)", ev.P, ev.Ts)
+			}
+			if p.g != 0 {
+				return fmt.Errorf("p %d is running a goroutine %d during stop (time %d)", ev.P, p.g, ev.Ts)
+			}
+			p.running = false
+
+			ps[ev.P] = p
+		case EvGCStart:
+			if evGC != nil {
+				return fmt.Errorf("previous GC is not ended before a new one (time %d)", ev.Ts)
+			}
+			evGC = ev
+			// Attribute this to the global GC state.
+			ev.P = GCP
+		case EvGCDone:
+			if evGC == nil {
+				return fmt.Errorf("bogus GC end (time %d)", ev.Ts)
+			}
+			evGC = nil
+		case EvSTWStart:
+			evp := &evSTW
+			if *evp != nil {
+				return fmt.Errorf("previous STW is not ended before a new one (time %d)", ev.Ts)
+			}
+			*evp = ev
+		case EvSTWDone:
+			evp := &evSTW
+			if *evp == nil {
+				return fmt.Errorf("bogus STW end (time %d)", ev.Ts)
+			}
+			*evp = nil
+		case EvGCSweepStart:
+			p := ps[ev.P]
+			if p.evSweep != nil {
+				return fmt.Errorf("previous sweeping is not ended before a new one (time %d)", ev.Ts)
+			}
+			p.evSweep = ev
+
+			ps[ev.P] = p
+		case EvGCMarkAssistStart:
+			g := gs[ev.G]
+			if g.evMarkAssist != nil {
+				return fmt.Errorf("previous mark assist is not ended before a new one (time %d)", ev.Ts)
+			}
+			g.evMarkAssist = ev
+
+			gs[ev.G] = g
+		case EvGCMarkAssistDone:
+			// Unlike most events, mark assists can be in progress when a
+			// goroutine starts tracing, so we can't report an error here.
+			g := gs[ev.G]
+			if g.evMarkAssist != nil {
+				g.evMarkAssist = nil
+			}
+
+			gs[ev.G] = g
+		case EvGCSweepDone:
+			p := ps[ev.P]
+			if p.evSweep == nil {
+				return fmt.Errorf("bogus sweeping end (time %d)", ev.Ts)
+			}
+			p.evSweep = nil
+
+			ps[ev.P] = p
+		case EvGoWaiting:
+			g := gs[ev.G]
+			if g.state != gRunnable {
+				return fmt.Errorf("g %d is not runnable before EvGoWaiting (time %d)", ev.G, ev.Ts)
+			}
+			g.state = gWaiting
+			g.ev = ev
+
+			gs[ev.G] = g
+		case EvGoInSyscall:
+			g := gs[ev.G]
+			if g.state != gRunnable {
+				return fmt.Errorf("g %d is not runnable before EvGoInSyscall (time %d)", ev.G, ev.Ts)
+			}
+			g.state = gWaiting
+			g.ev = ev
+
+			gs[ev.G] = g
+		case EvGoCreate:
+			g := gs[ev.G]
+			p := ps[ev.P]
+			if err := checkRunning(p, g, ev, true); err != nil {
+				return err
+			}
+			if _, ok := gs[ev.Args[0]]; ok {
+				return fmt.Errorf("g %d already exists (time %d)", ev.Args[0], ev.Ts)
+			}
+			gs[ev.Args[0]] = gdesc{state: gRunnable, ev: ev, evCreate: ev}
+
+		case EvGoStart, EvGoStartLabel:
+			g := gs[ev.G]
+			p := ps[ev.P]
+			if g.state != gRunnable {
+				return fmt.Errorf("g %d is not runnable before start (time %d)", ev.G, ev.Ts)
+			}
+			if p.g != 0 {
+				return fmt.Errorf("p %d is already running g %d while start g %d (time %d)", ev.P, p.g, ev.G, ev.Ts)
+			}
+			g.state = gRunning
+			g.evStart = ev
+			p.g = ev.G
+			if g.evCreate != nil {
+				ev.StkID = uint32(g.evCreate.Args[1])
+				g.evCreate = nil
+			}
+
+			if g.ev != nil {
+				g.ev = nil
+			}
+
+			gs[ev.G] = g
+			ps[ev.P] = p
+		case EvGoEnd, EvGoStop:
+			g := gs[ev.G]
+			p := ps[ev.P]
+			if err := checkRunning(p, g, ev, false); err != nil {
+				return err
+			}
+			g.evStart = nil
+			g.state = gDead
+			p.g = 0
+
+			if ev.Type == EvGoEnd { // flush all active regions
+				delete(activeRegions, ev.G)
+			}
+
+			gs[ev.G] = g
+			ps[ev.P] = p
+		case EvGoSched, EvGoPreempt:
+			g := gs[ev.G]
+			p := ps[ev.P]
+			if err := checkRunning(p, g, ev, false); err != nil {
+				return err
+			}
+			g.state = gRunnable
+			g.evStart = nil
+			p.g = 0
+			g.ev = ev
+
+			gs[ev.G] = g
+			ps[ev.P] = p
+		case EvGoUnblock:
+			g := gs[ev.G]
+			p := ps[ev.P]
+			if g.state != gRunning {
+				return fmt.Errorf("g %d is not running while unpark (time %d)", ev.G, ev.Ts)
+			}
+			if ev.P != TimerP && p.g != ev.G {
+				return fmt.Errorf("p %d is not running g %d while unpark (time %d)", ev.P, ev.G, ev.Ts)
+			}
+			g1 := gs[ev.Args[0]]
+			if g1.state != gWaiting {
+				return fmt.Errorf("g %d is not waiting before unpark (time %d)", ev.Args[0], ev.Ts)
+			}
+			if g1.ev != nil && g1.ev.Type == EvGoBlockNet {
+				ev.P = NetpollP
+			}
+			g1.state = gRunnable
+			g1.ev = ev
+			gs[ev.Args[0]] = g1
+
+		case EvGoSysCall:
+			g := gs[ev.G]
+			p := ps[ev.P]
+			if err := checkRunning(p, g, ev, false); err != nil {
+				return err
+			}
+			g.ev = ev
+
+			gs[ev.G] = g
+		case EvGoSysBlock:
+			g := gs[ev.G]
+			p := ps[ev.P]
+			if err := checkRunning(p, g, ev, false); err != nil {
+				return err
+			}
+			g.state = gWaiting
+			g.evStart = nil
+			p.g = 0
+
+			gs[ev.G] = g
+			ps[ev.P] = p
+		case EvGoSysExit:
+			g := gs[ev.G]
+			if g.state != gWaiting {
+				return fmt.Errorf("g %d is not waiting during syscall exit (time %d)", ev.G, ev.Ts)
+			}
+			g.state = gRunnable
+			g.ev = ev
+
+			gs[ev.G] = g
+		case EvGoSleep, EvGoBlock, EvGoBlockSend, EvGoBlockRecv,
+			EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet, EvGoBlockGC:
+			g := gs[ev.G]
+			p := ps[ev.P]
+			if err := checkRunning(p, g, ev, false); err != nil {
+				return err
+			}
+			g.state = gWaiting
+			g.ev = ev
+			g.evStart = nil
+			p.g = 0
+
+			gs[ev.G] = g
+			ps[ev.P] = p
+		case EvUserTaskCreate:
+			taskid := ev.Args[0]
+			if prevEv, ok := tasks[taskid]; ok {
+				return fmt.Errorf("task id conflicts (id:%d), %q vs %q", taskid, ev, prevEv)
+			}
+			tasks[ev.Args[0]] = ev
+
+		case EvUserTaskEnd:
+			taskid := ev.Args[0]
+			delete(tasks, taskid)
+
+		case EvUserRegion:
+			mode := ev.Args[1]
+			regions := activeRegions[ev.G]
+			if mode == 0 { // region start
+				activeRegions[ev.G] = append(regions, ev) // push
+			} else if mode == 1 { // region end
+				n := len(regions)
+				if n > 0 { // matching region start event is in the trace.
+					s := regions[n-1]
+					if s.Args[0] != ev.Args[0] || s.Args[2] != ev.Args[2] { // task id, region name mismatch
+						return fmt.Errorf("misuse of region in goroutine %d: span end %q when the inner-most active span start event is %q", ev.G, ev, s)
+					}
+
+					if n > 1 {
+						activeRegions[ev.G] = regions[:n-1]
+					} else {
+						delete(activeRegions, ev.G)
+					}
+				}
+			} else {
+				return fmt.Errorf("invalid user region mode: %q", ev)
+			}
+		}
+
+		if ev.StkID != 0 && len(p.stacks[ev.StkID]) == 0 {
+			// Make sure events don't refer to stacks that don't exist or to
+			// stacks with zero frames. Neither of these should be possible, but
+			// better be safe than sorry.
+
+			ev.StkID = 0
+		}
+
+	}
+
+	// TODO(mknyszek): restore stacks for EvGoStart events.
+	return nil
+}
+
+var errMalformedVarint = errors.New("malformatted base-128 varint")
+
+// readVal reads unsigned base-128 value from r.
+func (p *parser) readVal() (uint64, error) {
+	v, n := binary.Uvarint(p.data[p.off:])
+	if n <= 0 {
+		return 0, errMalformedVarint
+	}
+	p.off += n
+	return v, nil
+}
+
+func readValFrom(buf []byte) (v uint64, rem []byte, err error) {
+	v, n := binary.Uvarint(buf)
+	if n <= 0 {
+		return 0, nil, errMalformedVarint
+	}
+	return v, buf[n:], nil
+}
+
+func (ev *Event) String() string {
+	desc := &EventDescriptions[ev.Type]
+	w := new(bytes.Buffer)
+	fmt.Fprintf(w, "%d %s p=%d g=%d stk=%d", ev.Ts, desc.Name, ev.P, ev.G, ev.StkID)
+	for i, a := range desc.Args {
+		fmt.Fprintf(w, " %s=%d", a, ev.Args[i])
+	}
+	return w.String()
+}
+
+// argNum returns total number of args for the event accounting for timestamps,
+// sequence numbers and differences between trace format versions.
+func (raw *rawEvent) argNum() int {
+	desc := &EventDescriptions[raw.typ]
+	if raw.typ == EvStack {
+		return len(raw.args)
+	}
+	narg := len(desc.Args)
+	if desc.Stack {
+		narg++
+	}
+	switch raw.typ {
+	case EvBatch, EvFrequency, EvTimerGoroutine:
+		return narg
+	}
+	narg++ // timestamp
+	return narg
+}
+
+// Event types in the trace.
+// Verbatim copy from src/runtime/trace.go with the "trace" prefix removed.
+const (
+	EvNone              event.Type = 0  // unused
+	EvBatch             event.Type = 1  // start of per-P batch of events [pid, timestamp]
+	EvFrequency         event.Type = 2  // contains tracer timer frequency [frequency (ticks per second)]
+	EvStack             event.Type = 3  // stack [stack id, number of PCs, array of {PC, func string ID, file string ID, line}]
+	EvGomaxprocs        event.Type = 4  // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id]
+	EvProcStart         event.Type = 5  // start of P [timestamp, thread id]
+	EvProcStop          event.Type = 6  // stop of P [timestamp]
+	EvGCStart           event.Type = 7  // GC start [timestamp, seq, stack id]
+	EvGCDone            event.Type = 8  // GC done [timestamp]
+	EvSTWStart          event.Type = 9  // GC mark termination start [timestamp, kind]
+	EvSTWDone           event.Type = 10 // GC mark termination done [timestamp]
+	EvGCSweepStart      event.Type = 11 // GC sweep start [timestamp, stack id]
+	EvGCSweepDone       event.Type = 12 // GC sweep done [timestamp, swept, reclaimed]
+	EvGoCreate          event.Type = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id]
+	EvGoStart           event.Type = 14 // goroutine starts running [timestamp, goroutine id, seq]
+	EvGoEnd             event.Type = 15 // goroutine ends [timestamp]
+	EvGoStop            event.Type = 16 // goroutine stops (like in select{}) [timestamp, stack]
+	EvGoSched           event.Type = 17 // goroutine calls Gosched [timestamp, stack]
+	EvGoPreempt         event.Type = 18 // goroutine is preempted [timestamp, stack]
+	EvGoSleep           event.Type = 19 // goroutine calls Sleep [timestamp, stack]
+	EvGoBlock           event.Type = 20 // goroutine blocks [timestamp, stack]
+	EvGoUnblock         event.Type = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack]
+	EvGoBlockSend       event.Type = 22 // goroutine blocks on chan send [timestamp, stack]
+	EvGoBlockRecv       event.Type = 23 // goroutine blocks on chan recv [timestamp, stack]
+	EvGoBlockSelect     event.Type = 24 // goroutine blocks on select [timestamp, stack]
+	EvGoBlockSync       event.Type = 25 // goroutine blocks on Mutex/RWMutex [timestamp, stack]
+	EvGoBlockCond       event.Type = 26 // goroutine blocks on Cond [timestamp, stack]
+	EvGoBlockNet        event.Type = 27 // goroutine blocks on network [timestamp, stack]
+	EvGoSysCall         event.Type = 28 // syscall enter [timestamp, stack]
+	EvGoSysExit         event.Type = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp]
+	EvGoSysBlock        event.Type = 30 // syscall blocks [timestamp]
+	EvGoWaiting         event.Type = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id]
+	EvGoInSyscall       event.Type = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id]
+	EvHeapAlloc         event.Type = 33 // gcController.heapLive change [timestamp, heap live bytes]
+	EvHeapGoal          event.Type = 34 // gcController.heapGoal change [timestamp, heap goal bytes]
+	EvTimerGoroutine    event.Type = 35 // denotes timer goroutine [timer goroutine id]
+	EvFutileWakeup      event.Type = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp]
+	EvString            event.Type = 37 // string dictionary entry [ID, length, string]
+	EvGoStartLocal      event.Type = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id]
+	EvGoUnblockLocal    event.Type = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack]
+	EvGoSysExitLocal    event.Type = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp]
+	EvGoStartLabel      event.Type = 41 // goroutine starts running with label [timestamp, goroutine id, seq, label string id]
+	EvGoBlockGC         event.Type = 42 // goroutine blocks on GC assist [timestamp, stack]
+	EvGCMarkAssistStart event.Type = 43 // GC mark assist start [timestamp, stack]
+	EvGCMarkAssistDone  event.Type = 44 // GC mark assist done [timestamp]
+	EvUserTaskCreate    event.Type = 45 // trace.NewTask [timestamp, internal task id, internal parent id, stack, name string]
+	EvUserTaskEnd       event.Type = 46 // end of task [timestamp, internal task id, stack]
+	EvUserRegion        event.Type = 47 // trace.WithRegion [timestamp, internal task id, mode(0:start, 1:end), name string]
+	EvUserLog           event.Type = 48 // trace.Log [timestamp, internal id, key string id, stack, value string]
+	EvCPUSample         event.Type = 49 // CPU profiling sample [timestamp, stack, real timestamp, real P id (-1 when absent), goroutine id]
+	EvCount             event.Type = 50
+)
+
+var EventDescriptions = [256]struct {
+	Name       string
+	minVersion version.Version
+	Stack      bool
+	Args       []string
+	SArgs      []string // string arguments
+}{
+	EvNone:              {"None", 5, false, []string{}, nil},
+	EvBatch:             {"Batch", 5, false, []string{"p", "ticks"}, nil}, // in 1.5 format it was {"p", "seq", "ticks"}
+	EvFrequency:         {"Frequency", 5, false, []string{"freq"}, nil},   // in 1.5 format it was {"freq", "unused"}
+	EvStack:             {"Stack", 5, false, []string{"id", "siz"}, nil},
+	EvGomaxprocs:        {"Gomaxprocs", 5, true, []string{"procs"}, nil},
+	EvProcStart:         {"ProcStart", 5, false, []string{"thread"}, nil},
+	EvProcStop:          {"ProcStop", 5, false, []string{}, nil},
+	EvGCStart:           {"GCStart", 5, true, []string{"seq"}, nil}, // in 1.5 format it was {}
+	EvGCDone:            {"GCDone", 5, false, []string{}, nil},
+	EvSTWStart:          {"GCSTWStart", 5, false, []string{"kindid"}, []string{"kind"}}, // <= 1.9, args was {} (implicitly {0})
+	EvSTWDone:           {"GCSTWDone", 5, false, []string{}, nil},
+	EvGCSweepStart:      {"GCSweepStart", 5, true, []string{}, nil},
+	EvGCSweepDone:       {"GCSweepDone", 5, false, []string{"swept", "reclaimed"}, nil}, // before 1.9, format was {}
+	EvGoCreate:          {"GoCreate", 5, true, []string{"g", "stack"}, nil},
+	EvGoStart:           {"GoStart", 5, false, []string{"g", "seq"}, nil}, // in 1.5 format it was {"g"}
+	EvGoEnd:             {"GoEnd", 5, false, []string{}, nil},
+	EvGoStop:            {"GoStop", 5, true, []string{}, nil},
+	EvGoSched:           {"GoSched", 5, true, []string{}, nil},
+	EvGoPreempt:         {"GoPreempt", 5, true, []string{}, nil},
+	EvGoSleep:           {"GoSleep", 5, true, []string{}, nil},
+	EvGoBlock:           {"GoBlock", 5, true, []string{}, nil},
+	EvGoUnblock:         {"GoUnblock", 5, true, []string{"g", "seq"}, nil}, // in 1.5 format it was {"g"}
+	EvGoBlockSend:       {"GoBlockSend", 5, true, []string{}, nil},
+	EvGoBlockRecv:       {"GoBlockRecv", 5, true, []string{}, nil},
+	EvGoBlockSelect:     {"GoBlockSelect", 5, true, []string{}, nil},
+	EvGoBlockSync:       {"GoBlockSync", 5, true, []string{}, nil},
+	EvGoBlockCond:       {"GoBlockCond", 5, true, []string{}, nil},
+	EvGoBlockNet:        {"GoBlockNet", 5, true, []string{}, nil},
+	EvGoSysCall:         {"GoSysCall", 5, true, []string{}, nil},
+	EvGoSysExit:         {"GoSysExit", 5, false, []string{"g", "seq", "ts"}, nil},
+	EvGoSysBlock:        {"GoSysBlock", 5, false, []string{}, nil},
+	EvGoWaiting:         {"GoWaiting", 5, false, []string{"g"}, nil},
+	EvGoInSyscall:       {"GoInSyscall", 5, false, []string{"g"}, nil},
+	EvHeapAlloc:         {"HeapAlloc", 5, false, []string{"mem"}, nil},
+	EvHeapGoal:          {"HeapGoal", 5, false, []string{"mem"}, nil},
+	EvTimerGoroutine:    {"TimerGoroutine", 5, false, []string{"g"}, nil}, // in 1.5 format it was {"g", "unused"}
+	EvFutileWakeup:      {"FutileWakeup", 5, false, []string{}, nil},
+	EvString:            {"String", 7, false, []string{}, nil},
+	EvGoStartLocal:      {"GoStartLocal", 7, false, []string{"g"}, nil},
+	EvGoUnblockLocal:    {"GoUnblockLocal", 7, true, []string{"g"}, nil},
+	EvGoSysExitLocal:    {"GoSysExitLocal", 7, false, []string{"g", "ts"}, nil},
+	EvGoStartLabel:      {"GoStartLabel", 8, false, []string{"g", "seq", "labelid"}, []string{"label"}},
+	EvGoBlockGC:         {"GoBlockGC", 8, true, []string{}, nil},
+	EvGCMarkAssistStart: {"GCMarkAssistStart", 9, true, []string{}, nil},
+	EvGCMarkAssistDone:  {"GCMarkAssistDone", 9, false, []string{}, nil},
+	EvUserTaskCreate:    {"UserTaskCreate", 11, true, []string{"taskid", "pid", "typeid"}, []string{"name"}},
+	EvUserTaskEnd:       {"UserTaskEnd", 11, true, []string{"taskid"}, nil},
+	EvUserRegion:        {"UserRegion", 11, true, []string{"taskid", "mode", "typeid"}, []string{"name"}},
+	EvUserLog:           {"UserLog", 11, true, []string{"id", "keyid"}, []string{"category", "message"}},
+	EvCPUSample:         {"CPUSample", 19, true, []string{"ts", "p", "g"}, nil},
+}
+
+//gcassert:inline
+func (p *parser) allocateStack(size uint64) []uint64 {
+	if size == 0 {
+		return nil
+	}
+
+	// Stacks are plentiful but small. For our "Staticcheck on std" trace with
+	// 11e6 events, we have roughly 500,000 stacks, using 200 MiB of memory. To
+	// avoid making 500,000 small allocations we allocate backing arrays 1 MiB
+	// at a time.
+	out := p.stacksData
+	if uint64(len(out)) < size {
+		out = make([]uint64, 1024*128)
+	}
+	p.stacksData = out[size:]
+	return out[:size:size]
+}
+
+func (tr *Trace) STWReason(kindID uint64) STWReason {
+	if tr.Version < 21 {
+		if kindID == 0 || kindID == 1 {
+			return STWReason(kindID + 1)
+		} else {
+			return STWUnknown
+		}
+	} else if tr.Version == 21 {
+		if kindID < NumSTWReasons {
+			return STWReason(kindID)
+		} else {
+			return STWUnknown
+		}
+	} else {
+		return STWUnknown
+	}
+}
+
+type STWReason int
+
+const (
+	STWUnknown                 STWReason = 0
+	STWGCMarkTermination       STWReason = 1
+	STWGCSweepTermination      STWReason = 2
+	STWWriteHeapDump           STWReason = 3
+	STWGoroutineProfile        STWReason = 4
+	STWGoroutineProfileCleanup STWReason = 5
+	STWAllGoroutinesStackTrace STWReason = 6
+	STWReadMemStats            STWReason = 7
+	STWAllThreadsSyscall       STWReason = 8
+	STWGOMAXPROCS              STWReason = 9
+	STWStartTrace              STWReason = 10
+	STWStopTrace               STWReason = 11
+	STWCountPagesInUse         STWReason = 12
+	STWReadMetricsSlow         STWReason = 13
+	STWReadMemStatsSlow        STWReason = 14
+	STWPageCachePagesLeaked    STWReason = 15
+	STWResetDebugLog           STWReason = 16
+
+	NumSTWReasons = 17
+)
diff --git a/trace/internal/testtrace/validation.go b/trace/internal/testtrace/validation.go
index e9bc808..ddbf0b6 100644
--- a/trace/internal/testtrace/validation.go
+++ b/trace/internal/testtrace/validation.go
@@ -25,6 +25,7 @@
 	ranges   map[trace.ResourceID][]string
 	tasks    map[trace.TaskID]string
 	seenSync bool
+	Go121    bool
 }
 
 type schedContext struct {
@@ -164,7 +165,7 @@
 			}
 			// Validate sched context.
 			if new.Executing() {
-				ctx := v.getOrCreateThread(e, ev.Thread())
+				ctx := v.getOrCreateThread(e, ev, ev.Thread())
 				if ctx != nil {
 					if ctx.G != trace.NoGoroutine && ctx.G != id {
 						e.Errorf("tried to run goroutine %d when one was already executing (%d) on thread %d", id, ctx.G, ev.Thread())
@@ -217,7 +218,7 @@
 			}
 			// Validate sched context.
 			if new.Executing() {
-				ctx := v.getOrCreateThread(e, ev.Thread())
+				ctx := v.getOrCreateThread(e, ev, ev.Thread())
 				if ctx != nil {
 					if ctx.P != trace.NoProc && ctx.P != id {
 						e.Errorf("tried to run proc %d when one was already executing (%d) on thread %d", id, ctx.P, ev.Thread())
@@ -320,8 +321,25 @@
 	v.ranges[r] = slices.Delete(ranges, i, i+1)
 }
 
-func (v *Validator) getOrCreateThread(e *errAccumulator, m trace.ThreadID) *schedContext {
-	if m == trace.NoThread {
+func (v *Validator) getOrCreateThread(e *errAccumulator, ev trace.Event, m trace.ThreadID) *schedContext {
+	lenient := func() bool {
+		// Be lenient about GoUndetermined -> GoSyscall transitions if they
+		// originate from an old trace. These transitions lack thread
+		// information in trace formats older than 1.22.
+		if !v.Go121 {
+			return false
+		}
+		if ev.Kind() != trace.EventStateTransition {
+			return false
+		}
+		tr := ev.StateTransition()
+		if tr.Resource.Kind != trace.ResourceGoroutine {
+			return false
+		}
+		from, to := tr.Goroutine()
+		return from == trace.GoUndetermined && to == trace.GoSyscall
+	}
+	if m == trace.NoThread && !lenient() {
 		e.Errorf("must have thread, but thread ID is none")
 		return nil
 	}
diff --git a/trace/internal/version/version.go b/trace/internal/version/version.go
index eeb7cab..047ca0c 100644
--- a/trace/internal/version/version.go
+++ b/trace/internal/version/version.go
@@ -20,11 +20,20 @@
 type Version uint32
 
 const (
+	Go111   Version = 11
+	Go119   Version = 19
+	Go121   Version = 21
 	Go122   Version = 22
 	Current         = Go122
 )
 
 var versions = map[Version][]event.Spec{
+	// Go 1.11–1.21 use a different parser and are only set here for the sake of
+	// Version.Valid.
+	Go111: nil,
+	Go119: nil,
+	Go121: nil,
+
 	Go122: go122.Specs(),
 }
 
diff --git a/trace/oldtrace.go b/trace/oldtrace.go
new file mode 100644
index 0000000..c698709
--- /dev/null
+++ b/trace/oldtrace.go
@@ -0,0 +1,571 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Code generated by "gen.bash" from internal/trace/v2; DO NOT EDIT.
+
+//go:build go1.21
+
+// This file implements conversion from old (Go 1.11–Go 1.21) traces to the Go
+// 1.22 format.
+//
+// Most events have direct equivalents in 1.22, at worst requiring arguments to
+// be reordered. Some events, such as GoWaiting need to look ahead for follow-up
+// events to determine the correct translation. GoSyscall, which is an
+// instantaneous event, gets turned into a 1 ns long pair of
+// GoSyscallStart+GoSyscallEnd, unless we observe a GoSysBlock, in which case we
+// emit a GoSyscallStart+GoSyscallEndBlocked pair with the correct duration
+// (i.e. starting at the original GoSyscall).
+//
+// The resulting trace treats the old trace as a single, large generation,
+// sharing a single evTable for all events.
+//
+// We use a new (compared to what was used for 'go tool trace' in earlier
+// versions of Go) parser for old traces that is optimized for speed, low memory
+// usage, and minimal GC pressure. It allocates events in batches so that even
+// though we have to load the entire trace into memory, the conversion process
+// shouldn't result in a doubling of memory usage, even if all converted events
+// are kept alive, as we free batches once we're done with them.
+//
+// The conversion process is lossless.
+
+package trace
+
+import (
+	"errors"
+	"fmt"
+	"golang.org/x/exp/trace/internal/event"
+	"golang.org/x/exp/trace/internal/event/go122"
+	"golang.org/x/exp/trace/internal/oldtrace"
+	"io"
+)
+
+type oldTraceConverter struct {
+	trace          oldtrace.Trace
+	evt            *evTable
+	preInit        bool
+	createdPreInit map[GoID]struct{}
+	events         oldtrace.Events
+	extra          []Event
+	extraArr       [3]Event
+	tasks          map[TaskID]taskState
+	seenProcs      map[ProcID]struct{}
+	lastTs         Time
+	procMs         map[ProcID]ThreadID
+	lastStwReason  uint64
+
+	inlineToStringID  []uint64
+	builtinToStringID []uint64
+}
+
+const (
+	// Block reasons
+	sForever = iota
+	sPreempted
+	sGosched
+	sSleep
+	sChanSend
+	sChanRecv
+	sNetwork
+	sSync
+	sSyncCond
+	sSelect
+	sEmpty
+	sMarkAssistWait
+
+	// STW kinds
+	sSTWUnknown
+	sSTWGCMarkTermination
+	sSTWGCSweepTermination
+	sSTWWriteHeapDump
+	sSTWGoroutineProfile
+	sSTWGoroutineProfileCleanup
+	sSTWAllGoroutinesStackTrace
+	sSTWReadMemStats
+	sSTWAllThreadsSyscall
+	sSTWGOMAXPROCS
+	sSTWStartTrace
+	sSTWStopTrace
+	sSTWCountPagesInUse
+	sSTWReadMetricsSlow
+	sSTWReadMemStatsSlow
+	sSTWPageCachePagesLeaked
+	sSTWResetDebugLog
+
+	sLast
+)
+
+func (it *oldTraceConverter) init(pr oldtrace.Trace) error {
+	it.trace = pr
+	it.preInit = true
+	it.createdPreInit = make(map[GoID]struct{})
+	it.evt = &evTable{pcs: make(map[uint64]frame)}
+	it.events = pr.Events
+	it.extra = it.extraArr[:0]
+	it.tasks = make(map[TaskID]taskState)
+	it.seenProcs = make(map[ProcID]struct{})
+	it.procMs = make(map[ProcID]ThreadID)
+	it.lastTs = -1
+
+	evt := it.evt
+
+	// Convert from oldtracer's Strings map to our dataTable.
+	var max uint64
+	for id, s := range pr.Strings {
+		evt.strings.insert(stringID(id), s)
+		if id > max {
+			max = id
+		}
+	}
+	pr.Strings = nil
+
+	// Add all strings used for UserLog. In the old trace format, these were
+	// stored inline and didn't have IDs. We generate IDs for them.
+	if max+uint64(len(pr.InlineStrings)) < max {
+		return errors.New("trace contains too many strings")
+	}
+	var addErr error
+	add := func(id stringID, s string) {
+		if err := evt.strings.insert(id, s); err != nil && addErr == nil {
+			addErr = err
+		}
+	}
+	for id, s := range pr.InlineStrings {
+		nid := max + 1 + uint64(id)
+		it.inlineToStringID = append(it.inlineToStringID, nid)
+		add(stringID(nid), s)
+	}
+	max += uint64(len(pr.InlineStrings))
+	pr.InlineStrings = nil
+
+	// Add strings that the converter emits explicitly.
+	if max+uint64(sLast) < max {
+		return errors.New("trace contains too many strings")
+	}
+	it.builtinToStringID = make([]uint64, sLast)
+	addBuiltin := func(c int, s string) {
+		nid := max + 1 + uint64(c)
+		it.builtinToStringID[c] = nid
+		add(stringID(nid), s)
+	}
+	addBuiltin(sForever, "forever")
+	addBuiltin(sPreempted, "preempted")
+	addBuiltin(sGosched, "runtime.Gosched")
+	addBuiltin(sSleep, "sleep")
+	addBuiltin(sChanSend, "chan send")
+	addBuiltin(sChanRecv, "chan receive")
+	addBuiltin(sNetwork, "network")
+	addBuiltin(sSync, "sync")
+	addBuiltin(sSyncCond, "sync.(*Cond).Wait")
+	addBuiltin(sSelect, "select")
+	addBuiltin(sEmpty, "")
+	addBuiltin(sMarkAssistWait, "GC mark assist wait for work")
+	addBuiltin(sSTWUnknown, "")
+	addBuiltin(sSTWGCMarkTermination, "GC mark termination")
+	addBuiltin(sSTWGCSweepTermination, "GC sweep termination")
+	addBuiltin(sSTWWriteHeapDump, "write heap dump")
+	addBuiltin(sSTWGoroutineProfile, "goroutine profile")
+	addBuiltin(sSTWGoroutineProfileCleanup, "goroutine profile cleanup")
+	addBuiltin(sSTWAllGoroutinesStackTrace, "all goroutine stack trace")
+	addBuiltin(sSTWReadMemStats, "read mem stats")
+	addBuiltin(sSTWAllThreadsSyscall, "AllThreadsSyscall")
+	addBuiltin(sSTWGOMAXPROCS, "GOMAXPROCS")
+	addBuiltin(sSTWStartTrace, "start trace")
+	addBuiltin(sSTWStopTrace, "stop trace")
+	addBuiltin(sSTWCountPagesInUse, "CountPagesInUse (test)")
+	addBuiltin(sSTWReadMetricsSlow, "ReadMetricsSlow (test)")
+	addBuiltin(sSTWReadMemStatsSlow, "ReadMemStatsSlow (test)")
+	addBuiltin(sSTWPageCachePagesLeaked, "PageCachePagesLeaked (test)")
+	addBuiltin(sSTWResetDebugLog, "ResetDebugLog (test)")
+
+	if addErr != nil {
+		// This should be impossible but let's be safe.
+		return fmt.Errorf("couldn't add strings: %w", addErr)
+	}
+
+	it.evt.strings.compactify()
+
+	// Convert stacks.
+	for id, stk := range pr.Stacks {
+		evt.stacks.insert(stackID(id), stack{pcs: stk})
+	}
+
+	// OPT(dh): if we could share the frame type between this package and
+	// oldtrace we wouldn't have to copy the map.
+	for pc, f := range pr.PCs {
+		evt.pcs[pc] = frame{
+			pc:     pc,
+			funcID: stringID(f.Fn),
+			fileID: stringID(f.File),
+			line:   uint64(f.Line),
+		}
+	}
+	pr.Stacks = nil
+	pr.PCs = nil
+	evt.stacks.compactify()
+	return nil
+}
+
+// next returns the next event, io.EOF if there are no more events, or a
+// descriptive error for invalid events.
+func (it *oldTraceConverter) next() (Event, error) {
+	if len(it.extra) > 0 {
+		ev := it.extra[0]
+		it.extra = it.extra[1:]
+
+		if len(it.extra) == 0 {
+			it.extra = it.extraArr[:0]
+		}
+		// Two events aren't allowed to fall on the same timestamp in the new API,
+		// but this may happen when we produce EvGoStatus events
+		if ev.base.time <= it.lastTs {
+			ev.base.time = it.lastTs + 1
+		}
+		it.lastTs = ev.base.time
+		return ev, nil
+	}
+
+	oev, ok := it.events.Pop()
+	if !ok {
+		return Event{}, io.EOF
+	}
+
+	ev, err := it.convertEvent(oev)
+
+	if err == errSkip {
+		return it.next()
+	} else if err != nil {
+		return Event{}, err
+	}
+
+	// Two events aren't allowed to fall on the same timestamp in the new API,
+	// but this may happen when we produce EvGoStatus events
+	if ev.base.time <= it.lastTs {
+		ev.base.time = it.lastTs + 1
+	}
+	it.lastTs = ev.base.time
+	return ev, nil
+}
+
+var errSkip = errors.New("skip event")
+
+// convertEvent converts an event from the old trace format to zero or more
+// events in the new format. Most events translate 1 to 1. Some events don't
+// result in an event right away, in which case convertEvent returns errSkip.
+// Some events result in more than one new event; in this case, convertEvent
+// returns the first event and stores additional events in it.extra. When
+// encountering events that oldtrace shouldn't be able to emit, ocnvertEvent
+// returns a descriptive error.
+func (it *oldTraceConverter) convertEvent(ev *oldtrace.Event) (OUT Event, ERR error) {
+	var mappedType event.Type
+	mappedArgs := ev.Args
+
+	switch ev.Type {
+	case oldtrace.EvGomaxprocs:
+		mappedType = go122.EvProcsChange
+		if it.preInit {
+			// The first EvGomaxprocs signals the end of trace initialization. At this point we've seen
+			// all goroutines that already existed at trace begin.
+			it.preInit = false
+			for gid := range it.createdPreInit {
+				// These are goroutines that already existed when tracing started but for which we
+				// received neither GoWaiting, GoInSyscall, or GoStart. These are goroutines that are in
+				// the states _Gidle or _Grunnable.
+				it.extra = append(it.extra, Event{
+					ctx: schedCtx{
+						// G: GoID(gid),
+						G: NoGoroutine,
+						P: NoProc,
+						M: NoThread,
+					},
+					table: it.evt,
+					base: baseEvent{
+						typ:  go122.EvGoStatus,
+						time: Time(ev.Ts),
+						args: [4]uint64{uint64(gid), ^uint64(0), uint64(go122.GoRunnable)},
+					},
+				})
+			}
+			it.createdPreInit = nil
+			return Event{}, errSkip
+		}
+	case oldtrace.EvProcStart:
+		it.procMs[ProcID(ev.P)] = ThreadID(ev.Args[0])
+		if _, ok := it.seenProcs[ProcID(ev.P)]; ok {
+			mappedType = go122.EvProcStart
+			mappedArgs = [4]uint64{uint64(ev.P)}
+		} else {
+			it.seenProcs[ProcID(ev.P)] = struct{}{}
+			mappedType = go122.EvProcStatus
+			mappedArgs = [4]uint64{uint64(ev.P), uint64(go122.ProcRunning)}
+		}
+	case oldtrace.EvProcStop:
+		if _, ok := it.seenProcs[ProcID(ev.P)]; ok {
+			mappedType = go122.EvProcStop
+			mappedArgs = [4]uint64{uint64(ev.P)}
+		} else {
+			it.seenProcs[ProcID(ev.P)] = struct{}{}
+			mappedType = go122.EvProcStatus
+			mappedArgs = [4]uint64{uint64(ev.P), uint64(go122.ProcIdle)}
+		}
+	case oldtrace.EvGCStart:
+		mappedType = go122.EvGCBegin
+	case oldtrace.EvGCDone:
+		mappedType = go122.EvGCEnd
+	case oldtrace.EvSTWStart:
+		sid := it.builtinToStringID[sSTWUnknown+it.trace.STWReason(ev.Args[0])]
+		it.lastStwReason = sid
+		mappedType = go122.EvSTWBegin
+		mappedArgs = [4]uint64{uint64(sid)}
+	case oldtrace.EvSTWDone:
+		mappedType = go122.EvSTWEnd
+		mappedArgs = [4]uint64{it.lastStwReason}
+	case oldtrace.EvGCSweepStart:
+		mappedType = go122.EvGCSweepBegin
+	case oldtrace.EvGCSweepDone:
+		mappedType = go122.EvGCSweepEnd
+	case oldtrace.EvGoCreate:
+		if it.preInit {
+			it.createdPreInit[GoID(ev.Args[0])] = struct{}{}
+			return Event{}, errSkip
+		}
+		mappedType = go122.EvGoCreate
+	case oldtrace.EvGoStart:
+		if it.preInit {
+			mappedType = go122.EvGoStatus
+			mappedArgs = [4]uint64{ev.Args[0], ^uint64(0), uint64(go122.GoRunning)}
+			delete(it.createdPreInit, GoID(ev.Args[0]))
+		} else {
+			mappedType = go122.EvGoStart
+		}
+	case oldtrace.EvGoStartLabel:
+		it.extra = []Event{{
+			ctx: schedCtx{
+				G: GoID(ev.G),
+				P: ProcID(ev.P),
+				M: it.procMs[ProcID(ev.P)],
+			},
+			table: it.evt,
+			base: baseEvent{
+				typ:  go122.EvGoLabel,
+				time: Time(ev.Ts),
+				args: [4]uint64{ev.Args[2]},
+			},
+		}}
+		return Event{
+			ctx: schedCtx{
+				G: GoID(ev.G),
+				P: ProcID(ev.P),
+				M: it.procMs[ProcID(ev.P)],
+			},
+			table: it.evt,
+			base: baseEvent{
+				typ:  go122.EvGoStart,
+				time: Time(ev.Ts),
+				args: ev.Args,
+			},
+		}, nil
+	case oldtrace.EvGoEnd:
+		mappedType = go122.EvGoDestroy
+	case oldtrace.EvGoStop:
+		mappedType = go122.EvGoBlock
+		mappedArgs = [4]uint64{uint64(it.builtinToStringID[sForever]), uint64(ev.StkID)}
+	case oldtrace.EvGoSched:
+		mappedType = go122.EvGoStop
+		mappedArgs = [4]uint64{uint64(it.builtinToStringID[sGosched]), uint64(ev.StkID)}
+	case oldtrace.EvGoPreempt:
+		mappedType = go122.EvGoStop
+		mappedArgs = [4]uint64{uint64(it.builtinToStringID[sPreempted]), uint64(ev.StkID)}
+	case oldtrace.EvGoSleep:
+		mappedType = go122.EvGoBlock
+		mappedArgs = [4]uint64{uint64(it.builtinToStringID[sSleep]), uint64(ev.StkID)}
+	case oldtrace.EvGoBlock:
+		mappedType = go122.EvGoBlock
+		mappedArgs = [4]uint64{uint64(it.builtinToStringID[sEmpty]), uint64(ev.StkID)}
+	case oldtrace.EvGoUnblock:
+		mappedType = go122.EvGoUnblock
+	case oldtrace.EvGoBlockSend:
+		mappedType = go122.EvGoBlock
+		mappedArgs = [4]uint64{uint64(it.builtinToStringID[sChanSend]), uint64(ev.StkID)}
+	case oldtrace.EvGoBlockRecv:
+		mappedType = go122.EvGoBlock
+		mappedArgs = [4]uint64{uint64(it.builtinToStringID[sChanRecv]), uint64(ev.StkID)}
+	case oldtrace.EvGoBlockSelect:
+		mappedType = go122.EvGoBlock
+		mappedArgs = [4]uint64{uint64(it.builtinToStringID[sSelect]), uint64(ev.StkID)}
+	case oldtrace.EvGoBlockSync:
+		mappedType = go122.EvGoBlock
+		mappedArgs = [4]uint64{uint64(it.builtinToStringID[sSync]), uint64(ev.StkID)}
+	case oldtrace.EvGoBlockCond:
+		mappedType = go122.EvGoBlock
+		mappedArgs = [4]uint64{uint64(it.builtinToStringID[sSyncCond]), uint64(ev.StkID)}
+	case oldtrace.EvGoBlockNet:
+		mappedType = go122.EvGoBlock
+		mappedArgs = [4]uint64{uint64(it.builtinToStringID[sNetwork]), uint64(ev.StkID)}
+	case oldtrace.EvGoBlockGC:
+		mappedType = go122.EvGoBlock
+		mappedArgs = [4]uint64{uint64(it.builtinToStringID[sMarkAssistWait]), uint64(ev.StkID)}
+	case oldtrace.EvGoSysCall:
+		// Look for the next event for the same G to determine if the syscall
+		// blocked.
+		blocked := false
+		it.events.All()(func(nev *oldtrace.Event) bool {
+			if nev.G != ev.G {
+				return true
+			}
+			// After an EvGoSysCall, the next event on the same G will either be
+			// EvGoSysBlock to denote a blocking syscall, or some other event
+			// (or the end of the trace) if the syscall didn't block.
+			if nev.Type == oldtrace.EvGoSysBlock {
+				blocked = true
+			}
+			return false
+		})
+		if blocked {
+			mappedType = go122.EvGoSyscallBegin
+			mappedArgs = [4]uint64{1: uint64(ev.StkID)}
+		} else {
+			// Convert the old instantaneous syscall event to a pair of syscall
+			// begin and syscall end and give it the shortest possible duration,
+			// 1ns.
+			out1 := Event{
+				ctx: schedCtx{
+					G: GoID(ev.G),
+					P: ProcID(ev.P),
+					M: it.procMs[ProcID(ev.P)],
+				},
+				table: it.evt,
+				base: baseEvent{
+					typ:  go122.EvGoSyscallBegin,
+					time: Time(ev.Ts),
+					args: [4]uint64{1: uint64(ev.StkID)},
+				},
+			}
+
+			out2 := Event{
+				ctx:   out1.ctx,
+				table: it.evt,
+				base: baseEvent{
+					typ:  go122.EvGoSyscallEnd,
+					time: Time(ev.Ts + 1),
+					args: [4]uint64{},
+				},
+			}
+
+			it.extra = append(it.extra, out2)
+			return out1, nil
+		}
+
+	case oldtrace.EvGoSysExit:
+		mappedType = go122.EvGoSyscallEndBlocked
+	case oldtrace.EvGoSysBlock:
+		return Event{}, errSkip
+	case oldtrace.EvGoWaiting:
+		mappedType = go122.EvGoStatus
+		mappedArgs = [4]uint64{ev.Args[0], ^uint64(0), uint64(go122.GoWaiting)}
+		delete(it.createdPreInit, GoID(ev.Args[0]))
+	case oldtrace.EvGoInSyscall:
+		mappedType = go122.EvGoStatus
+		// In the new tracer, GoStatus with GoSyscall knows what thread the
+		// syscall is on. In the old tracer, EvGoInSyscall doesn't contain that
+		// information and all we can do here is specify NoThread.
+		mappedArgs = [4]uint64{ev.Args[0], ^uint64(0), uint64(go122.GoSyscall)}
+		delete(it.createdPreInit, GoID(ev.Args[0]))
+	case oldtrace.EvHeapAlloc:
+		mappedType = go122.EvHeapAlloc
+	case oldtrace.EvHeapGoal:
+		mappedType = go122.EvHeapGoal
+	case oldtrace.EvGCMarkAssistStart:
+		mappedType = go122.EvGCMarkAssistBegin
+	case oldtrace.EvGCMarkAssistDone:
+		mappedType = go122.EvGCMarkAssistEnd
+	case oldtrace.EvUserTaskCreate:
+		mappedType = go122.EvUserTaskBegin
+		parent := ev.Args[1]
+		if parent == 0 {
+			parent = uint64(NoTask)
+		}
+		mappedArgs = [4]uint64{ev.Args[0], parent, ev.Args[2], uint64(ev.StkID)}
+		name, _ := it.evt.strings.get(stringID(ev.Args[2]))
+		it.tasks[TaskID(ev.Args[0])] = taskState{name: name, parentID: TaskID(ev.Args[1])}
+	case oldtrace.EvUserTaskEnd:
+		mappedType = go122.EvUserTaskEnd
+		// Event.Task expects the parent and name to be smuggled in extra args
+		// and as extra strings.
+		ts, ok := it.tasks[TaskID(ev.Args[0])]
+		if ok {
+			delete(it.tasks, TaskID(ev.Args[0]))
+			mappedArgs = [4]uint64{
+				ev.Args[0],
+				ev.Args[1],
+				uint64(ts.parentID),
+				uint64(it.evt.addExtraString(ts.name)),
+			}
+		} else {
+			mappedArgs = [4]uint64{ev.Args[0], ev.Args[1], uint64(NoTask), uint64(it.evt.addExtraString(""))}
+		}
+	case oldtrace.EvUserRegion:
+		switch ev.Args[1] {
+		case 0: // start
+			mappedType = go122.EvUserRegionBegin
+		case 1: // end
+			mappedType = go122.EvUserRegionEnd
+		}
+		mappedArgs = [4]uint64{ev.Args[0], ev.Args[2], uint64(ev.StkID)}
+	case oldtrace.EvUserLog:
+		mappedType = go122.EvUserLog
+		mappedArgs = [4]uint64{ev.Args[0], ev.Args[1], it.inlineToStringID[ev.Args[3]], uint64(ev.StkID)}
+	case oldtrace.EvCPUSample:
+		mappedType = go122.EvCPUSample
+		// When emitted by the Go 1.22 tracer, CPU samples have 5 arguments:
+		// timestamp, M, P, G, stack. However, after they get turned into Event,
+		// they have the arguments stack, M, P, G.
+		//
+		// In Go 1.21, CPU samples did not have Ms.
+		mappedArgs = [4]uint64{uint64(ev.StkID), ^uint64(0), uint64(ev.P), ev.G}
+	default:
+		return Event{}, fmt.Errorf("unexpected event type %v", ev.Type)
+	}
+
+	if oldtrace.EventDescriptions[ev.Type].Stack {
+		if stackIDs := go122.Specs()[mappedType].StackIDs; len(stackIDs) > 0 {
+			mappedArgs[stackIDs[0]-1] = uint64(ev.StkID)
+		}
+	}
+
+	m := NoThread
+	if ev.P != -1 && ev.Type != oldtrace.EvCPUSample {
+		if t, ok := it.procMs[ProcID(ev.P)]; ok {
+			m = ThreadID(t)
+		}
+	}
+	if ev.Type == oldtrace.EvProcStop {
+		delete(it.procMs, ProcID(ev.P))
+	}
+	g := GoID(ev.G)
+	if g == 0 {
+		g = NoGoroutine
+	}
+	out := Event{
+		ctx: schedCtx{
+			G: GoID(g),
+			P: ProcID(ev.P),
+			M: m,
+		},
+		table: it.evt,
+		base: baseEvent{
+			typ:  mappedType,
+			time: Time(ev.Ts),
+			args: mappedArgs,
+		},
+	}
+	return out, nil
+}
+
+// convertOldFormat takes a fully loaded trace in the old trace format and
+// returns an iterator over events in the new format.
+func convertOldFormat(pr oldtrace.Trace) *oldTraceConverter {
+	it := &oldTraceConverter{}
+	it.init(pr)
+	return it
+}
diff --git a/trace/order.go b/trace/order.go
index e0f77c6..0e2f600 100644
--- a/trace/order.go
+++ b/trace/order.go
@@ -19,6 +19,11 @@
 
 // ordering emulates Go scheduler state for both validation and
 // for putting events in the right order.
+//
+// The interface to ordering consists of two methods: Advance
+// and Next. Advance is called to try and advance an event and
+// add completed events to the ordering. Next is used to pick
+// off events in the ordering.
 type ordering struct {
 	gStates     map[GoID]*gState
 	pStates     map[ProcID]*pState // TODO: The keys are dense, so this can be a slice.
@@ -27,29 +32,10 @@
 	gcSeq       uint64
 	gcState     gcState
 	initialGen  uint64
-
-	// Some events like GoDestroySyscall produce two events instead of one.
-	// extraEvent is this extra space. advance must not be called unless
-	// the extraEvent has been consumed with consumeExtraEvent.
-	//
-	// TODO(mknyszek): Replace this with a more formal queue.
-	extraEvent Event
+	queue       queue[Event]
 }
 
-// consumeExtraEvent consumes the extra event.
-func (o *ordering) consumeExtraEvent() Event {
-	if o.extraEvent.Kind() == EventBad {
-		return Event{}
-	}
-	r := o.extraEvent
-	o.extraEvent = Event{}
-	return r
-}
-
-// advance checks if it's valid to proceed with ev which came from thread m.
-//
-// Returns the schedCtx at the point of the event, whether it's OK to advance
-// with this event, and any error encountered in validation.
+// Advance checks if it's valid to proceed with ev which came from thread m.
 //
 // It assumes the gen value passed to it is monotonically increasing across calls.
 //
@@ -57,7 +43,11 @@
 // If it's not valid to advance with ev, but no error was encountered, the caller
 // should attempt to advance with other candidate events from other threads. If the
 // caller runs out of candidates, the trace is invalid.
-func (o *ordering) advance(ev *baseEvent, evt *evTable, m ThreadID, gen uint64) (schedCtx, bool, error) {
+//
+// If this returns true, Next is guaranteed to return a complete event. However,
+// multiple events may be added to the ordering, so the caller should (but is not
+// required to) continue to call Next until it is exhausted.
+func (o *ordering) Advance(ev *baseEvent, evt *evTable, m ThreadID, gen uint64) (bool, error) {
 	if o.initialGen == 0 {
 		// Set the initial gen if necessary.
 		o.initialGen = gen
@@ -67,13 +57,15 @@
 	curCtx.M = m
 	newCtx.M = m
 
+	var ms *mState
 	if m == NoThread {
 		curCtx.P = NoProc
 		curCtx.G = NoGoroutine
 		newCtx = curCtx
 	} else {
 		// Pull out or create the mState for this event.
-		ms, ok := o.mStates[m]
+		var ok bool
+		ms, ok = o.mStates[m]
 		if !ok {
 			ms = &mState{
 				g: NoGoroutine,
@@ -84,11 +76,11 @@
 		curCtx.P = ms.p
 		curCtx.G = ms.g
 		newCtx = curCtx
-		defer func() {
-			// Update the mState for this event.
-			ms.p = newCtx.P
-			ms.g = newCtx.G
-		}()
+	}
+
+	// Generates an event from the current context.
+	currentEvent := func() Event {
+		return Event{table: evt, ctx: curCtx, base: *ev}
 	}
 
 	switch typ := ev.typ; typ {
@@ -97,7 +89,7 @@
 		pid := ProcID(ev.args[0])
 		status := go122.ProcStatus(ev.args[1])
 		if int(status) >= len(go122ProcStatus2ProcState) {
-			return curCtx, false, fmt.Errorf("invalid status for proc %d: %d", pid, status)
+			return false, fmt.Errorf("invalid status for proc %d: %d", pid, status)
 		}
 		oldState := go122ProcStatus2ProcState[status]
 		if s, ok := o.pStates[pid]; ok {
@@ -114,7 +106,7 @@
 				oldState = ProcIdle
 				ev.args[1] = uint64(go122.ProcSyscallAbandoned)
 			} else if s.status != status {
-				return curCtx, false, fmt.Errorf("inconsistent status for proc %d: old %v vs. new %v", pid, s.status, status)
+				return false, fmt.Errorf("inconsistent status for proc %d: old %v vs. new %v", pid, s.status, status)
 			}
 			s.seq = makeSeq(gen, 0) // Reset seq.
 		} else {
@@ -149,10 +141,10 @@
 				}
 			}
 			if !found {
-				return curCtx, false, fmt.Errorf("failed to find sched context for proc %d that's about to be stolen", pid)
+				return false, fmt.Errorf("failed to find sched context for proc %d that's about to be stolen", pid)
 			}
 		}
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 	case go122.EvProcStart:
 		pid := ProcID(ev.args[0])
 		seq := makeSeq(gen, ev.args[1])
@@ -167,19 +159,19 @@
 			// got to the right point in the trace.
 			//
 			// Note that we also don't advance here if we have a P and we're in a syscall.
-			return curCtx, false, nil
+			return false, nil
 		}
 		// We can advance this P. Check some invariants.
 		//
 		// We might have a goroutine if a goroutine is exiting a syscall.
 		reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustNotHave, Goroutine: event.MayHave}
 		if err := validateCtx(curCtx, reqs); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
 		state.status = go122.ProcRunning
 		state.seq = seq
 		newCtx.P = pid
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 	case go122.EvProcStop:
 		// We must be able to advance this P.
 		//
@@ -192,18 +184,18 @@
 		// ProcStop doesn't need a sequence number.
 		state, ok := o.pStates[curCtx.P]
 		if !ok {
-			return curCtx, false, fmt.Errorf("event %s for proc (%v) that doesn't exist", go122.EventString(typ), curCtx.P)
+			return false, fmt.Errorf("event %s for proc (%v) that doesn't exist", go122.EventString(typ), curCtx.P)
 		}
 		if state.status != go122.ProcRunning && state.status != go122.ProcSyscall {
-			return curCtx, false, fmt.Errorf("%s event for proc that's not %s or %s", go122.EventString(typ), go122.ProcRunning, go122.ProcSyscall)
+			return false, fmt.Errorf("%s event for proc that's not %s or %s", go122.EventString(typ), go122.ProcRunning, go122.ProcSyscall)
 		}
 		reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}
 		if err := validateCtx(curCtx, reqs); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
 		state.status = go122.ProcIdle
 		newCtx.P = NoProc
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 	case go122.EvProcSteal:
 		pid := ProcID(ev.args[0])
 		seq := makeSeq(gen, ev.args[1])
@@ -212,12 +204,12 @@
 			// We can't make an inference as to whether this is bad. We could just be seeing
 			// a ProcStart on a different M before the proc's state was emitted, or before we
 			// got to the right point in the trace.
-			return curCtx, false, nil
+			return false, nil
 		}
 		// We can advance this P. Check some invariants.
 		reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MayHave}
 		if err := validateCtx(curCtx, reqs); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
 		// Smuggle in the P state that let us advance so we can surface information to the event.
 		// Specifically, we need to make sure that the event is interpreted not as a transition of
@@ -235,7 +227,8 @@
 		// If we've lost information then don't try to do anything with the M.
 		// It may have moved on and we can't be sure.
 		if oldStatus == go122.ProcSyscallAbandoned {
-			return curCtx, true, nil
+			o.queue.push(currentEvent())
+			break
 		}
 
 		// Validate that the M we're stealing from is what we expect.
@@ -244,21 +237,22 @@
 		if mid == curCtx.M {
 			// We're stealing from ourselves. This behaves like a ProcStop.
 			if curCtx.P != pid {
-				return curCtx, false, fmt.Errorf("tried to self-steal proc %d (thread %d), but got proc %d instead", pid, mid, curCtx.P)
+				return false, fmt.Errorf("tried to self-steal proc %d (thread %d), but got proc %d instead", pid, mid, curCtx.P)
 			}
 			newCtx.P = NoProc
-			return curCtx, true, nil
+			o.queue.push(currentEvent())
+			break
 		}
 
 		// We're stealing from some other M.
 		mState, ok := o.mStates[mid]
 		if !ok {
-			return curCtx, false, fmt.Errorf("stole proc from non-existent thread %d", mid)
+			return false, fmt.Errorf("stole proc from non-existent thread %d", mid)
 		}
 
 		// Make sure we're actually stealing the right P.
 		if mState.p != pid {
-			return curCtx, false, fmt.Errorf("tried to steal proc %d from thread %d, but got proc %d instead", pid, mid, mState.p)
+			return false, fmt.Errorf("tried to steal proc %d from thread %d, but got proc %d instead", pid, mid, mState.p)
 		}
 
 		// Tell the M it has no P so it can proceed.
@@ -268,7 +262,7 @@
 		// GoSyscallEndBlocked cannot advance until the corresponding
 		// M loses its P.
 		mState.p = NoProc
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 
 	// Handle goroutines.
 	case go122.EvGoStatus:
@@ -277,12 +271,12 @@
 		status := go122.GoStatus(ev.args[2])
 
 		if int(status) >= len(go122GoStatus2GoState) {
-			return curCtx, false, fmt.Errorf("invalid status for goroutine %d: %d", gid, status)
+			return false, fmt.Errorf("invalid status for goroutine %d: %d", gid, status)
 		}
 		oldState := go122GoStatus2GoState[status]
 		if s, ok := o.gStates[gid]; ok {
 			if s.status != status {
-				return curCtx, false, fmt.Errorf("inconsistent status for goroutine %d: old %v vs. new %v", gid, s.status, status)
+				return false, fmt.Errorf("inconsistent status for goroutine %d: old %v vs. new %v", gid, s.status, status)
 			}
 			s.seq = makeSeq(gen, 0) // Reset seq.
 		} else if gen == o.initialGen {
@@ -290,7 +284,7 @@
 			o.gStates[gid] = &gState{id: gid, status: status, seq: makeSeq(gen, 0)}
 			oldState = GoUndetermined
 		} else {
-			return curCtx, false, fmt.Errorf("found goroutine status for new goroutine after the first generation: id=%v status=%v", gid, status)
+			return false, fmt.Errorf("found goroutine status for new goroutine after the first generation: id=%v status=%v", gid, status)
 		}
 		ev.extra(version.Go122)[0] = uint64(oldState) // Smuggle in the old state for StateTransition.
 
@@ -300,7 +294,7 @@
 			newCtx.G = gid
 		case go122.GoSyscall:
 			if mid == NoThread {
-				return curCtx, false, fmt.Errorf("found goroutine %d in syscall without a thread", gid)
+				return false, fmt.Errorf("found goroutine %d in syscall without a thread", gid)
 			}
 			// Is the syscall on this thread? If so, bind it to the context.
 			// Otherwise, we're talking about a G sitting in a syscall on an M.
@@ -311,7 +305,7 @@
 					// binding occur already. Even if the G was blocked in a syscall
 					// for multiple generations since trace start, we would have seen
 					// a previous GoStatus event that bound the goroutine to an M.
-					return curCtx, false, fmt.Errorf("inconsistent thread for syscalling goroutine %d: thread has goroutine %d", gid, curCtx.G)
+					return false, fmt.Errorf("inconsistent thread for syscalling goroutine %d: thread has goroutine %d", gid, curCtx.G)
 				}
 				newCtx.G = gid
 				break
@@ -327,7 +321,7 @@
 				// goroutine go into a syscall on this thread at some point.
 				if ms.g != gid {
 					// But the G on the M doesn't match. Something's wrong.
-					return curCtx, false, fmt.Errorf("inconsistent thread for syscalling goroutine %d: thread has goroutine %d", gid, ms.g)
+					return false, fmt.Errorf("inconsistent thread for syscalling goroutine %d: thread has goroutine %d", gid, ms.g)
 				}
 				// This case is just a Syscall->Syscall event, which needs to
 				// appear as having the G currently bound to this M.
@@ -343,38 +337,38 @@
 			// Update the current context to the M we're talking about.
 			curCtx.M = mid
 		}
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 	case go122.EvGoCreate:
 		// Goroutines must be created on a running P, but may or may not be created
 		// by a running goroutine.
 		reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}
 		if err := validateCtx(curCtx, reqs); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
 		// If we have a goroutine, it must be running.
 		if state, ok := o.gStates[curCtx.G]; ok && state.status != go122.GoRunning {
-			return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(typ), GoRunning)
+			return false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(typ), GoRunning)
 		}
 		// This goroutine created another. Add a state for it.
 		newgid := GoID(ev.args[0])
 		if _, ok := o.gStates[newgid]; ok {
-			return curCtx, false, fmt.Errorf("tried to create goroutine (%v) that already exists", newgid)
+			return false, fmt.Errorf("tried to create goroutine (%v) that already exists", newgid)
 		}
 		o.gStates[newgid] = &gState{id: newgid, status: go122.GoRunnable, seq: makeSeq(gen, 0)}
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 	case go122.EvGoDestroy, go122.EvGoStop, go122.EvGoBlock:
 		// These are goroutine events that all require an active running
 		// goroutine on some thread. They must *always* be advance-able,
 		// since running goroutines are bound to their M.
 		if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
 		state, ok := o.gStates[curCtx.G]
 		if !ok {
-			return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(typ), curCtx.G)
+			return false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(typ), curCtx.G)
 		}
 		if state.status != go122.GoRunning {
-			return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(typ), GoRunning)
+			return false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(typ), GoRunning)
 		}
 		// Handle each case slightly differently; we just group them together
 		// because they have shared preconditions.
@@ -392,7 +386,7 @@
 			state.status = go122.GoWaiting
 			newCtx.G = NoGoroutine
 		}
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 	case go122.EvGoStart:
 		gid := GoID(ev.args[0])
 		seq := makeSeq(gen, ev.args[1])
@@ -401,17 +395,17 @@
 			// We can't make an inference as to whether this is bad. We could just be seeing
 			// a GoStart on a different M before the goroutine was created, before it had its
 			// state emitted, or before we got to the right point in the trace yet.
-			return curCtx, false, nil
+			return false, nil
 		}
 		// We can advance this goroutine. Check some invariants.
 		reqs := event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MustNotHave}
 		if err := validateCtx(curCtx, reqs); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
 		state.status = go122.GoRunning
 		state.seq = seq
 		newCtx.G = gid
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 	case go122.EvGoUnblock:
 		// N.B. These both reference the goroutine to unblock, not the current goroutine.
 		gid := GoID(ev.args[0])
@@ -421,31 +415,31 @@
 			// We can't make an inference as to whether this is bad. We could just be seeing
 			// a GoUnblock on a different M before the goroutine was created and blocked itself,
 			// before it had its state emitted, or before we got to the right point in the trace yet.
-			return curCtx, false, nil
+			return false, nil
 		}
 		state.status = go122.GoRunnable
 		state.seq = seq
 		// N.B. No context to validate. Basically anything can unblock
 		// a goroutine (e.g. sysmon).
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 	case go122.EvGoSyscallBegin:
 		// Entering a syscall requires an active running goroutine with a
 		// proc on some thread. It is always advancable.
 		if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
 		state, ok := o.gStates[curCtx.G]
 		if !ok {
-			return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(typ), curCtx.G)
+			return false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(typ), curCtx.G)
 		}
 		if state.status != go122.GoRunning {
-			return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(typ), GoRunning)
+			return false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(typ), GoRunning)
 		}
 		// Goroutine entered a syscall. It's still running on this P and M.
 		state.status = go122.GoSyscall
 		pState, ok := o.pStates[curCtx.P]
 		if !ok {
-			return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(typ))
+			return false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(typ))
 		}
 		pState.status = go122.ProcSyscall
 		// Validate the P sequence number on the event and advance it.
@@ -461,36 +455,36 @@
 		// to back off and see if any other events will advance. This is a running P.
 		pSeq := makeSeq(gen, ev.args[0])
 		if !pSeq.succeeds(pState.seq) {
-			return curCtx, false, fmt.Errorf("failed to advance %s: can't make sequence: %s -> %s", go122.EventString(typ), pState.seq, pSeq)
+			return false, fmt.Errorf("failed to advance %s: can't make sequence: %s -> %s", go122.EventString(typ), pState.seq, pSeq)
 		}
 		pState.seq = pSeq
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 	case go122.EvGoSyscallEnd:
 		// This event is always advance-able because it happens on the same
 		// thread that EvGoSyscallStart happened, and the goroutine can't leave
 		// that thread until its done.
 		if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
 		state, ok := o.gStates[curCtx.G]
 		if !ok {
-			return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(typ), curCtx.G)
+			return false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(typ), curCtx.G)
 		}
 		if state.status != go122.GoSyscall {
-			return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(typ), GoRunning)
+			return false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(typ), GoRunning)
 		}
 		state.status = go122.GoRunning
 
 		// Transfer the P back to running from syscall.
 		pState, ok := o.pStates[curCtx.P]
 		if !ok {
-			return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(typ))
+			return false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(typ))
 		}
 		if pState.status != go122.ProcSyscall {
-			return curCtx, false, fmt.Errorf("expected proc %d in state %v, but got %v instead", curCtx.P, go122.ProcSyscall, pState.status)
+			return false, fmt.Errorf("expected proc %d in state %v, but got %v instead", curCtx.P, go122.ProcSyscall, pState.status)
 		}
 		pState.status = go122.ProcRunning
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 	case go122.EvGoSyscallEndBlocked:
 		// This event becomes advanceable when its P is not in a syscall state
 		// (lack of a P altogether is also acceptable for advancing).
@@ -505,43 +499,43 @@
 		if curCtx.P != NoProc {
 			pState, ok := o.pStates[curCtx.P]
 			if !ok {
-				return curCtx, false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(typ))
+				return false, fmt.Errorf("uninitialized proc %d found during %s", curCtx.P, go122.EventString(typ))
 			}
 			if pState.status == go122.ProcSyscall {
-				return curCtx, false, nil
+				return false, nil
 			}
 		}
 		// As mentioned above, we may have a P here if we ProcStart
 		// before this event.
 		if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MustHave}); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
 		state, ok := o.gStates[curCtx.G]
 		if !ok {
-			return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(typ), curCtx.G)
+			return false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(typ), curCtx.G)
 		}
 		if state.status != go122.GoSyscall {
-			return curCtx, false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(typ), GoRunning)
+			return false, fmt.Errorf("%s event for goroutine that's not %s", go122.EventString(typ), GoRunning)
 		}
 		newCtx.G = NoGoroutine
 		state.status = go122.GoRunnable
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 	case go122.EvGoCreateSyscall:
 		// This event indicates that a goroutine is effectively
 		// being created out of a cgo callback. Such a goroutine
 		// is 'created' in the syscall state.
 		if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MustNotHave}); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
 		// This goroutine is effectively being created. Add a state for it.
 		newgid := GoID(ev.args[0])
 		if _, ok := o.gStates[newgid]; ok {
-			return curCtx, false, fmt.Errorf("tried to create goroutine (%v) in syscall that already exists", newgid)
+			return false, fmt.Errorf("tried to create goroutine (%v) in syscall that already exists", newgid)
 		}
 		o.gStates[newgid] = &gState{id: newgid, status: go122.GoSyscall, seq: makeSeq(gen, 0)}
 		// Goroutine is executing. Bind it to the context.
 		newCtx.G = newgid
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 	case go122.EvGoDestroySyscall:
 		// This event indicates that a goroutine created for a
 		// cgo callback is disappearing, either because the callback
@@ -560,15 +554,15 @@
 		// eagerly by the runtime, and it might get stolen back later
 		// (or never again, if the program is going to exit).
 		if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MayHave, Goroutine: event.MustHave}); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
 		// Check to make sure the goroutine exists in the right state.
 		state, ok := o.gStates[curCtx.G]
 		if !ok {
-			return curCtx, false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(typ), curCtx.G)
+			return false, fmt.Errorf("event %s for goroutine (%v) that doesn't exist", go122.EventString(typ), curCtx.G)
 		}
 		if state.status != go122.GoSyscall {
-			return curCtx, false, fmt.Errorf("%s event for goroutine that's not %v", go122.EventString(typ), GoSyscall)
+			return false, fmt.Errorf("%s event for goroutine that's not %v", go122.EventString(typ), GoSyscall)
 		}
 		// This goroutine is exiting itself.
 		delete(o.gStates, curCtx.G)
@@ -578,17 +572,17 @@
 		if curCtx.P != NoProc {
 			pState, ok := o.pStates[curCtx.P]
 			if !ok {
-				return curCtx, false, fmt.Errorf("found invalid proc %d during %s", curCtx.P, go122.EventString(typ))
+				return false, fmt.Errorf("found invalid proc %d during %s", curCtx.P, go122.EventString(typ))
 			}
 			if pState.status != go122.ProcSyscall {
-				return curCtx, false, fmt.Errorf("proc %d in unexpected state %s during %s", curCtx.P, pState.status, go122.EventString(typ))
+				return false, fmt.Errorf("proc %d in unexpected state %s during %s", curCtx.P, pState.status, go122.EventString(typ))
 			}
 			// See the go122-create-syscall-reuse-thread-id test case for more details.
 			pState.status = go122.ProcSyscallAbandoned
 			newCtx.P = NoProc
 
 			// Queue an extra self-ProcSteal event.
-			o.extraEvent = Event{
+			extra := Event{
 				table: evt,
 				ctx:   curCtx,
 				base: baseEvent{
@@ -596,10 +590,11 @@
 					time: ev.time,
 				},
 			}
-			o.extraEvent.base.args[0] = uint64(curCtx.P)
-			o.extraEvent.base.extra(version.Go122)[0] = uint64(go122.ProcSyscall)
+			extra.base.args[0] = uint64(curCtx.P)
+			extra.base.extra(version.Go122)[0] = uint64(go122.ProcSyscall)
+			o.queue.push(extra)
 		}
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 
 	// Handle tasks. Tasks are interesting because:
 	// - There's no Begin event required to reference a task.
@@ -611,7 +606,7 @@
 	case go122.EvUserTaskBegin:
 		id := TaskID(ev.args[0])
 		if _, ok := o.activeTasks[id]; ok {
-			return curCtx, false, fmt.Errorf("task ID conflict: %d", id)
+			return false, fmt.Errorf("task ID conflict: %d", id)
 		}
 		// Get the parent ID, but don't validate it. There's no guarantee
 		// we actually have information on whether it's active.
@@ -629,10 +624,13 @@
 		nameID := stringID(ev.args[2])
 		name, ok := evt.strings.get(nameID)
 		if !ok {
-			return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, typ)
+			return false, fmt.Errorf("invalid string ID %v for %v event", nameID, typ)
 		}
 		o.activeTasks[id] = taskState{name: name, parentID: parentID}
-		return curCtx, true, validateCtx(curCtx, event.UserGoReqs)
+		if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
+			return false, err
+		}
+		o.queue.push(currentEvent())
 	case go122.EvUserTaskEnd:
 		id := TaskID(ev.args[0])
 		if ts, ok := o.activeTasks[id]; ok {
@@ -647,45 +645,48 @@
 			ev.extra(version.Go122)[0] = uint64(NoTask)
 			ev.extra(version.Go122)[1] = uint64(evt.addExtraString(""))
 		}
-		return curCtx, true, validateCtx(curCtx, event.UserGoReqs)
+		if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
+			return false, err
+		}
+		o.queue.push(currentEvent())
 
 	// Handle user regions.
 	case go122.EvUserRegionBegin:
 		if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
 		tid := TaskID(ev.args[0])
 		nameID := stringID(ev.args[1])
 		name, ok := evt.strings.get(nameID)
 		if !ok {
-			return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, typ)
+			return false, fmt.Errorf("invalid string ID %v for %v event", nameID, typ)
 		}
 		gState, ok := o.gStates[curCtx.G]
 		if !ok {
-			return curCtx, false, fmt.Errorf("encountered EvUserRegionBegin without known state for current goroutine %d", curCtx.G)
+			return false, fmt.Errorf("encountered EvUserRegionBegin without known state for current goroutine %d", curCtx.G)
 		}
 		if err := gState.beginRegion(userRegion{tid, name}); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 	case go122.EvUserRegionEnd:
 		if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
 		tid := TaskID(ev.args[0])
 		nameID := stringID(ev.args[1])
 		name, ok := evt.strings.get(nameID)
 		if !ok {
-			return curCtx, false, fmt.Errorf("invalid string ID %v for %v event", nameID, typ)
+			return false, fmt.Errorf("invalid string ID %v for %v event", nameID, typ)
 		}
 		gState, ok := o.gStates[curCtx.G]
 		if !ok {
-			return curCtx, false, fmt.Errorf("encountered EvUserRegionEnd without known state for current goroutine %d", curCtx.G)
+			return false, fmt.Errorf("encountered EvUserRegionEnd without known state for current goroutine %d", curCtx.G)
 		}
 		if err := gState.endRegion(userRegion{tid, name}); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 
 	// Handle the GC mark phase.
 	//
@@ -698,86 +699,88 @@
 		seq := ev.args[0]
 		if gen == o.initialGen {
 			if o.gcState != gcUndetermined {
-				return curCtx, false, fmt.Errorf("GCActive in the first generation isn't first GC event")
+				return false, fmt.Errorf("GCActive in the first generation isn't first GC event")
 			}
 			o.gcSeq = seq
 			o.gcState = gcRunning
-			return curCtx, true, nil
+			o.queue.push(currentEvent())
+			break
 		}
 		if seq != o.gcSeq+1 {
 			// This is not the right GC cycle.
-			return curCtx, false, nil
+			return false, nil
 		}
 		if o.gcState != gcRunning {
-			return curCtx, false, fmt.Errorf("encountered GCActive while GC was not in progress")
+			return false, fmt.Errorf("encountered GCActive while GC was not in progress")
 		}
 		o.gcSeq = seq
 		if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 	case go122.EvGCBegin:
 		seq := ev.args[0]
 		if o.gcState == gcUndetermined {
 			o.gcSeq = seq
 			o.gcState = gcRunning
-			return curCtx, true, nil
+			o.queue.push(currentEvent())
+			break
 		}
 		if seq != o.gcSeq+1 {
 			// This is not the right GC cycle.
-			return curCtx, false, nil
+			return false, nil
 		}
 		if o.gcState == gcRunning {
-			return curCtx, false, fmt.Errorf("encountered GCBegin while GC was already in progress")
+			return false, fmt.Errorf("encountered GCBegin while GC was already in progress")
 		}
 		o.gcSeq = seq
 		o.gcState = gcRunning
 		if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 	case go122.EvGCEnd:
 		seq := ev.args[0]
 		if seq != o.gcSeq+1 {
 			// This is not the right GC cycle.
-			return curCtx, false, nil
+			return false, nil
 		}
 		if o.gcState == gcNotRunning {
-			return curCtx, false, fmt.Errorf("encountered GCEnd when GC was not in progress")
+			return false, fmt.Errorf("encountered GCEnd when GC was not in progress")
 		}
 		if o.gcState == gcUndetermined {
-			return curCtx, false, fmt.Errorf("encountered GCEnd when GC was in an undetermined state")
+			return false, fmt.Errorf("encountered GCEnd when GC was in an undetermined state")
 		}
 		o.gcSeq = seq
 		o.gcState = gcNotRunning
 		if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 
 	// Handle simple instantaneous events that require a G.
 	case go122.EvGoLabel, go122.EvProcsChange, go122.EvUserLog:
 		if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 
 	// Handle allocation states, which don't require a G.
 	case go122.EvHeapAlloc, go122.EvHeapGoal:
 		if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 
 	// Handle sweep, which is bound to a P and doesn't require a G.
 	case go122.EvGCSweepBegin:
 		if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
 		if err := o.pStates[curCtx.P].beginRange(makeRangeType(typ, 0)); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 	case go122.EvGCSweepActive:
 		pid := ProcID(ev.args[0])
 		// N.B. In practice Ps can't block while they're sweeping, so this can only
@@ -786,26 +789,26 @@
 		// in the middle of a sweep.
 		pState, ok := o.pStates[pid]
 		if !ok {
-			return curCtx, false, fmt.Errorf("encountered GCSweepActive for unknown proc %d", pid)
+			return false, fmt.Errorf("encountered GCSweepActive for unknown proc %d", pid)
 		}
 		if err := pState.activeRange(makeRangeType(typ, 0), gen == o.initialGen); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 	case go122.EvGCSweepEnd:
 		if err := validateCtx(curCtx, event.SchedReqs{Thread: event.MustHave, Proc: event.MustHave, Goroutine: event.MayHave}); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
 		_, err := o.pStates[curCtx.P].endRange(typ)
 		if err != nil {
-			return curCtx, false, err
+			return false, err
 		}
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 
 	// Handle special goroutine-bound event ranges.
 	case go122.EvSTWBegin, go122.EvGCMarkAssistBegin:
 		if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
 		desc := stringID(0)
 		if typ == go122.EvSTWBegin {
@@ -813,12 +816,12 @@
 		}
 		gState, ok := o.gStates[curCtx.G]
 		if !ok {
-			return curCtx, false, fmt.Errorf("encountered event of type %d without known state for current goroutine %d", typ, curCtx.G)
+			return false, fmt.Errorf("encountered event of type %d without known state for current goroutine %d", typ, curCtx.G)
 		}
 		if err := gState.beginRange(makeRangeType(typ, desc)); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 	case go122.EvGCMarkAssistActive:
 		gid := GoID(ev.args[0])
 		// N.B. Like GoStatus, this can happen at any time, because it can
@@ -826,32 +829,44 @@
 		// current scheduler context.
 		gState, ok := o.gStates[gid]
 		if !ok {
-			return curCtx, false, fmt.Errorf("uninitialized goroutine %d found during %s", gid, go122.EventString(typ))
+			return false, fmt.Errorf("uninitialized goroutine %d found during %s", gid, go122.EventString(typ))
 		}
 		if err := gState.activeRange(makeRangeType(typ, 0), gen == o.initialGen); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
 	case go122.EvSTWEnd, go122.EvGCMarkAssistEnd:
 		if err := validateCtx(curCtx, event.UserGoReqs); err != nil {
-			return curCtx, false, err
+			return false, err
 		}
 		gState, ok := o.gStates[curCtx.G]
 		if !ok {
-			return curCtx, false, fmt.Errorf("encountered event of type %d without known state for current goroutine %d", typ, curCtx.G)
+			return false, fmt.Errorf("encountered event of type %d without known state for current goroutine %d", typ, curCtx.G)
 		}
 		desc, err := gState.endRange(typ)
 		if err != nil {
-			return curCtx, false, err
+			return false, err
 		}
 		if typ == go122.EvSTWEnd {
 			// Smuggle the kind into the event.
 			// Don't use ev.extra here so we have symmetry with STWBegin.
 			ev.args[0] = uint64(desc)
 		}
-		return curCtx, true, nil
+		o.queue.push(currentEvent())
+	default:
+		return false, fmt.Errorf("bad event type found while ordering: %v", ev.typ)
 	}
-	return curCtx, false, fmt.Errorf("bad event type found while ordering: %v", ev.typ)
+	if ms != nil {
+		// Update the mState for this event.
+		ms.p = newCtx.P
+		ms.g = newCtx.G
+	}
+	return true, nil
+}
+
+// Next returns the next event in the ordering.
+func (o *ordering) Next() (Event, bool) {
+	return o.queue.pop()
 }
 
 // schedCtx represents the scheduling resources associated with an event.
@@ -1096,3 +1111,51 @@
 	// parentID is the parent ID of the active task.
 	parentID TaskID
 }
+
+// queue implements a growable ring buffer with a queue API.
+type queue[T any] struct {
+	start, end int
+	buf        []T
+}
+
+// push adds a new event to the back of the queue.
+func (q *queue[T]) push(value T) {
+	if q.end-q.start == len(q.buf) {
+		q.grow()
+	}
+	q.buf[q.end%len(q.buf)] = value
+	q.end++
+}
+
+// grow increases the size of the queue.
+func (q *queue[T]) grow() {
+	if len(q.buf) == 0 {
+		q.buf = make([]T, 2)
+		return
+	}
+
+	// Create new buf and copy data over.
+	newBuf := make([]T, len(q.buf)*2)
+	pivot := q.start % len(q.buf)
+	first, last := q.buf[pivot:], q.buf[:pivot]
+	copy(newBuf[:len(first)], first)
+	copy(newBuf[len(first):], last)
+
+	// Update the queue state.
+	q.start = 0
+	q.end = len(q.buf)
+	q.buf = newBuf
+}
+
+// pop removes an event from the front of the queue. If the
+// queue is empty, it returns an EventBad event.
+func (q *queue[T]) pop() (T, bool) {
+	if q.end-q.start == 0 {
+		return *new(T), false
+	}
+	elem := &q.buf[q.start%len(q.buf)]
+	value := *elem
+	*elem = *new(T) // Clear the entry before returning, so we don't hold onto old tables.
+	q.start++
+	return value, true
+}
diff --git a/trace/order_test.go b/trace/order_test.go
new file mode 100644
index 0000000..ee9e3c6
--- /dev/null
+++ b/trace/order_test.go
@@ -0,0 +1,38 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Code generated by "gen.bash" from internal/trace/v2; DO NOT EDIT.
+
+//go:build go1.21
+
+package trace
+
+import "testing"
+
+func TestQueue(t *testing.T) {
+	var q queue[int]
+	check := func(name string, exp []int) {
+		for _, v := range exp {
+			q.push(v)
+		}
+		for i, want := range exp {
+			if got, ok := q.pop(); !ok {
+				t.Fatalf("check %q: expected to be able to pop after %d pops", name, i+1)
+			} else if got != want {
+				t.Fatalf("check %q: expected value %d after on pop %d, got %d", name, want, i+1, got)
+			}
+		}
+		if _, ok := q.pop(); ok {
+			t.Fatalf("check %q: did not expect to be able to pop more values", name)
+		}
+		if _, ok := q.pop(); ok {
+			t.Fatalf("check %q: did not expect to be able to pop more values a second time", name)
+		}
+	}
+	check("one element", []int{4})
+	check("two elements", []int{64, 12})
+	check("six elements", []int{55, 16423, 2352, 644, 12874, 9372})
+	check("one element again", []int{7})
+	check("two elements again", []int{77, 6336})
+}
diff --git a/trace/reader.go b/trace/reader.go
index d7311c9..088565e 100644
--- a/trace/reader.go
+++ b/trace/reader.go
@@ -16,6 +16,7 @@
 	"strings"
 
 	"golang.org/x/exp/trace/internal/event/go122"
+	"golang.org/x/exp/trace/internal/oldtrace"
 	"golang.org/x/exp/trace/internal/version"
 )
 
@@ -29,6 +30,8 @@
 	cpuSamples  []cpuSample
 	order       ordering
 	emittedSync bool
+
+	go121Events *oldTraceConverter
 }
 
 // NewReader creates a new trace reader.
@@ -38,20 +41,30 @@
 	if err != nil {
 		return nil, err
 	}
-	if v != version.Go122 {
+	switch v {
+	case version.Go111, version.Go119, version.Go121:
+		tr, err := oldtrace.Parse(br, v)
+		if err != nil {
+			return nil, err
+		}
+		return &Reader{
+			go121Events: convertOldFormat(tr),
+		}, nil
+	case version.Go122:
+		return &Reader{
+			r: br,
+			order: ordering{
+				mStates:     make(map[ThreadID]*mState),
+				pStates:     make(map[ProcID]*pState),
+				gStates:     make(map[GoID]*gState),
+				activeTasks: make(map[TaskID]taskState),
+			},
+			// Don't emit a sync event when we first go to emit events.
+			emittedSync: true,
+		}, nil
+	default:
 		return nil, fmt.Errorf("unknown or unsupported version go 1.%d", v)
 	}
-	return &Reader{
-		r: br,
-		order: ordering{
-			mStates:     make(map[ThreadID]*mState),
-			pStates:     make(map[ProcID]*pState),
-			gStates:     make(map[GoID]*gState),
-			activeTasks: make(map[TaskID]taskState),
-		},
-		// Don't emit a sync event when we first go to emit events.
-		emittedSync: true,
-	}, nil
 }
 
 // ReadEvent reads a single event from the stream.
@@ -59,6 +72,15 @@
 // If the stream has been exhausted, it returns an invalid
 // event and io.EOF.
 func (r *Reader) ReadEvent() (e Event, err error) {
+	if r.go121Events != nil {
+		ev, err := r.go121Events.next()
+		if err != nil {
+			// XXX do we have to emit an EventSync when the trace is done?
+			return Event{}, err
+		}
+		return ev, nil
+	}
+
 	// Go 1.22+ trace parsing algorithm.
 	//
 	// (1) Read in all the batches for the next generation from the stream.
@@ -89,8 +111,8 @@
 		r.lastTs = e.base.time
 	}()
 
-	// Consume any extra events produced during parsing.
-	if ev := r.order.consumeExtraEvent(); ev.Kind() != EventBad {
+	// Consume any events in the ordering first.
+	if ev, ok := r.order.Next(); ok {
 		return ev, nil
 	}
 
@@ -134,13 +156,17 @@
 		// Reset emittedSync.
 		r.emittedSync = false
 	}
-	refresh := func(i int) error {
+	tryAdvance := func(i int) (bool, error) {
 		bc := r.frontier[i]
 
+		if ok, err := r.order.Advance(&bc.ev, r.gen.evTable, bc.m, r.gen.gen); !ok || err != nil {
+			return ok, err
+		}
+
 		// Refresh the cursor's event.
 		ok, err := bc.nextEvent(r.gen.batches[bc.m], r.gen.freq)
 		if err != nil {
-			return err
+			return false, err
 		}
 		if ok {
 			// If we successfully refreshed, update the heap.
@@ -149,7 +175,7 @@
 			// There's nothing else to read. Delete this cursor from the frontier.
 			r.frontier = heapRemove(r.frontier, i)
 		}
-		return nil
+		return true, nil
 	}
 	// Inject a CPU sample if it comes next.
 	if len(r.cpuSamples) != 0 {
@@ -164,28 +190,35 @@
 	if len(r.frontier) == 0 {
 		return Event{}, fmt.Errorf("broken trace: frontier is empty:\n[gen=%d]\n\n%s\n%s\n", r.gen.gen, dumpFrontier(r.frontier), dumpOrdering(&r.order))
 	}
-	bc := r.frontier[0]
-	if ctx, ok, err := r.order.advance(&bc.ev, r.gen.evTable, bc.m, r.gen.gen); err != nil {
+	if ok, err := tryAdvance(0); err != nil {
 		return Event{}, err
-	} else if ok {
-		e := Event{table: r.gen.evTable, ctx: ctx, base: bc.ev}
-		return e, refresh(0)
-	}
-	// Sort the min-heap. A sorted min-heap is still a min-heap,
-	// but now we can iterate over the rest and try to advance in
-	// order. This path should be rare.
-	slices.SortFunc(r.frontier, (*batchCursor).compare)
-	// Try to advance the rest of the frontier, in timestamp order.
-	for i := 1; i < len(r.frontier); i++ {
-		bc := r.frontier[i]
-		if ctx, ok, err := r.order.advance(&bc.ev, r.gen.evTable, bc.m, r.gen.gen); err != nil {
-			return Event{}, err
-		} else if ok {
-			e := Event{table: r.gen.evTable, ctx: ctx, base: bc.ev}
-			return e, refresh(i)
+	} else if !ok {
+		// Try to advance the rest of the frontier, in timestamp order.
+		//
+		// To do this, sort the min-heap. A sorted min-heap is still a
+		// min-heap, but now we can iterate over the rest and try to
+		// advance in order. This path should be rare.
+		slices.SortFunc(r.frontier, (*batchCursor).compare)
+		success := false
+		for i := 1; i < len(r.frontier); i++ {
+			if ok, err = tryAdvance(i); err != nil {
+				return Event{}, err
+			} else if ok {
+				success = true
+				break
+			}
+		}
+		if !success {
+			return Event{}, fmt.Errorf("broken trace: failed to advance: frontier:\n[gen=%d]\n\n%s\n%s\n", r.gen.gen, dumpFrontier(r.frontier), dumpOrdering(&r.order))
 		}
 	}
-	return Event{}, fmt.Errorf("broken trace: failed to advance: frontier:\n[gen=%d]\n\n%s\n%s\n", r.gen.gen, dumpFrontier(r.frontier), dumpOrdering(&r.order))
+
+	// Pick off the next event on the queue. At this point, one must exist.
+	ev, ok := r.order.Next()
+	if !ok {
+		panic("invariant violation: advance successful, but queue is empty")
+	}
+	return ev, nil
 }
 
 func dumpFrontier(frontier []*batchCursor) string {
diff --git a/trace/testdata/fuzz/FuzzReader/4055b17cae1a3443 b/trace/testdata/fuzz/FuzzReader/4055b17cae1a3443
new file mode 100644
index 0000000..ea5417c
--- /dev/null
+++ b/trace/testdata/fuzz/FuzzReader/4055b17cae1a3443
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("go 1.11 trace\x00\x00\x00A00\x020$0")