blob: bdb4454b6e1cc8d4c46f7ceead5bf9e2f01a77cc [file] [log] [blame] [edit]
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package pprof
import (
"bytes"
"compress/gzip"
"fmt"
"io"
"os"
"runtime"
"strconv"
"time"
"unsafe"
)
// lostProfileEvent is the function to which lost profiling
// events are attributed.
// (The name shows up in the pprof graphs.)
func lostProfileEvent() { lostProfileEvent() }
// funcPC returns the PC for the func value f.
func funcPC(f interface{}) uintptr {
return *(*[2]*uintptr)(unsafe.Pointer(&f))[1]
}
// A profileBuilder writes a profile incrementally from a
// stream of profile samples delivered by the runtime.
type profileBuilder struct {
start time.Time
end time.Time
havePeriod bool
period int64
m profMap
// encoding state
w io.Writer
zw *gzip.Writer
pb protobuf
strings []string
stringMap map[string]int
locs map[uintptr]locInfo // list of locInfo starting with the given PC.
funcs map[string]int // Package path-qualified function name to Function.ID
mem []memMap
deck pcDeck
}
type memMap struct {
// initialized as reading mapping
start uintptr
end uintptr
offset uint64
file, buildID string
funcs symbolizeFlag
fake bool // map entry was faked; /proc/self/maps wasn't available
}
// symbolizeFlag keeps track of symbolization result.
// 0 : no symbol lookup was performed
// 1<<0 (lookupTried) : symbol lookup was performed
// 1<<1 (lookupFailed): symbol lookup was performed but failed
type symbolizeFlag uint8
const (
lookupTried symbolizeFlag = 1 << iota
lookupFailed symbolizeFlag = 1 << iota
)
const (
// message Profile
tagProfile_SampleType = 1 // repeated ValueType
tagProfile_Sample = 2 // repeated Sample
tagProfile_Mapping = 3 // repeated Mapping
tagProfile_Location = 4 // repeated Location
tagProfile_Function = 5 // repeated Function
tagProfile_StringTable = 6 // repeated string
tagProfile_DropFrames = 7 // int64 (string table index)
tagProfile_KeepFrames = 8 // int64 (string table index)
tagProfile_TimeNanos = 9 // int64
tagProfile_DurationNanos = 10 // int64
tagProfile_PeriodType = 11 // ValueType (really optional string???)
tagProfile_Period = 12 // int64
tagProfile_Comment = 13 // repeated int64
tagProfile_DefaultSampleType = 14 // int64
// message ValueType
tagValueType_Type = 1 // int64 (string table index)
tagValueType_Unit = 2 // int64 (string table index)
// message Sample
tagSample_Location = 1 // repeated uint64
tagSample_Value = 2 // repeated int64
tagSample_Label = 3 // repeated Label
// message Label
tagLabel_Key = 1 // int64 (string table index)
tagLabel_Str = 2 // int64 (string table index)
tagLabel_Num = 3 // int64
// message Mapping
tagMapping_ID = 1 // uint64
tagMapping_Start = 2 // uint64
tagMapping_Limit = 3 // uint64
tagMapping_Offset = 4 // uint64
tagMapping_Filename = 5 // int64 (string table index)
tagMapping_BuildID = 6 // int64 (string table index)
tagMapping_HasFunctions = 7 // bool
tagMapping_HasFilenames = 8 // bool
tagMapping_HasLineNumbers = 9 // bool
tagMapping_HasInlineFrames = 10 // bool
// message Location
tagLocation_ID = 1 // uint64
tagLocation_MappingID = 2 // uint64
tagLocation_Address = 3 // uint64
tagLocation_Line = 4 // repeated Line
// message Line
tagLine_FunctionID = 1 // uint64
tagLine_Line = 2 // int64
// message Function
tagFunction_ID = 1 // uint64
tagFunction_Name = 2 // int64 (string table index)
tagFunction_SystemName = 3 // int64 (string table index)
tagFunction_Filename = 4 // int64 (string table index)
tagFunction_StartLine = 5 // int64
)
// stringIndex adds s to the string table if not already present
// and returns the index of s in the string table.
func (b *profileBuilder) stringIndex(s string) int64 {
id, ok := b.stringMap[s]
if !ok {
id = len(b.strings)
b.strings = append(b.strings, s)
b.stringMap[s] = id
}
return int64(id)
}
func (b *profileBuilder) flush() {
const dataFlush = 4096
if b.pb.nest == 0 && len(b.pb.data) > dataFlush {
b.zw.Write(b.pb.data)
b.pb.data = b.pb.data[:0]
}
}
// pbValueType encodes a ValueType message to b.pb.
func (b *profileBuilder) pbValueType(tag int, typ, unit string) {
start := b.pb.startMessage()
b.pb.int64(tagValueType_Type, b.stringIndex(typ))
b.pb.int64(tagValueType_Unit, b.stringIndex(unit))
b.pb.endMessage(tag, start)
}
// pbSample encodes a Sample message to b.pb.
func (b *profileBuilder) pbSample(values []int64, locs []uint64, labels func()) {
start := b.pb.startMessage()
b.pb.int64s(tagSample_Value, values)
b.pb.uint64s(tagSample_Location, locs)
if labels != nil {
labels()
}
b.pb.endMessage(tagProfile_Sample, start)
b.flush()
}
// pbLabel encodes a Label message to b.pb.
func (b *profileBuilder) pbLabel(tag int, key, str string, num int64) {
start := b.pb.startMessage()
b.pb.int64Opt(tagLabel_Key, b.stringIndex(key))
b.pb.int64Opt(tagLabel_Str, b.stringIndex(str))
b.pb.int64Opt(tagLabel_Num, num)
b.pb.endMessage(tag, start)
}
// pbLine encodes a Line message to b.pb.
func (b *profileBuilder) pbLine(tag int, funcID uint64, line int64) {
start := b.pb.startMessage()
b.pb.uint64Opt(tagLine_FunctionID, funcID)
b.pb.int64Opt(tagLine_Line, line)
b.pb.endMessage(tag, start)
}
// pbMapping encodes a Mapping message to b.pb.
func (b *profileBuilder) pbMapping(tag int, id, base, limit, offset uint64, file, buildID string, hasFuncs bool) {
start := b.pb.startMessage()
b.pb.uint64Opt(tagMapping_ID, id)
b.pb.uint64Opt(tagMapping_Start, base)
b.pb.uint64Opt(tagMapping_Limit, limit)
b.pb.uint64Opt(tagMapping_Offset, offset)
b.pb.int64Opt(tagMapping_Filename, b.stringIndex(file))
b.pb.int64Opt(tagMapping_BuildID, b.stringIndex(buildID))
// TODO: we set HasFunctions if all symbols from samples were symbolized (hasFuncs).
// Decide what to do about HasInlineFrames and HasLineNumbers.
// Also, another approach to handle the mapping entry with
// incomplete symbolization results is to dupliace the mapping
// entry (but with different Has* fields values) and use
// different entries for symbolized locations and unsymbolized locations.
if hasFuncs {
b.pb.bool(tagMapping_HasFunctions, true)
}
b.pb.endMessage(tag, start)
}
func allFrames(addr uintptr) ([]runtime.Frame, symbolizeFlag) {
// Expand this one address using CallersFrames so we can cache
// each expansion. In general, CallersFrames takes a whole
// stack, but in this case we know there will be no skips in
// the stack and we have return PCs anyway.
frames := runtime.CallersFrames([]uintptr{addr})
frame, more := frames.Next()
if frame.Function == "runtime.goexit" {
// Short-circuit if we see runtime.goexit so the loop
// below doesn't allocate a useless empty location.
return nil, 0
}
symbolizeResult := lookupTried
if frame.PC == 0 || frame.Function == "" || frame.File == "" || frame.Line == 0 {
symbolizeResult |= lookupFailed
}
if frame.PC == 0 {
// If we failed to resolve the frame, at least make up
// a reasonable call PC. This mostly happens in tests.
frame.PC = addr - 1
}
ret := []runtime.Frame{frame}
for frame.Function != "runtime.goexit" && more == true {
frame, more = frames.Next()
ret = append(ret, frame)
}
return ret, symbolizeResult
}
type locInfo struct {
// location id assigned by the profileBuilder
id uint64
// sequence of PCs, including the fake PCs returned by the traceback
// to represent inlined functions
// https://github.com/golang/go/blob/d6f2f833c93a41ec1c68e49804b8387a06b131c5/src/runtime/traceback.go#L347-L368
pcs []uintptr
}
// newProfileBuilder returns a new profileBuilder.
// CPU profiling data obtained from the runtime can be added
// by calling b.addCPUData, and then the eventual profile
// can be obtained by calling b.finish.
func newProfileBuilder(w io.Writer) *profileBuilder {
zw, _ := gzip.NewWriterLevel(w, gzip.BestSpeed)
b := &profileBuilder{
w: w,
zw: zw,
start: time.Now(),
strings: []string{""},
stringMap: map[string]int{"": 0},
locs: map[uintptr]locInfo{},
funcs: map[string]int{},
}
b.readMapping()
return b
}
// addCPUData adds the CPU profiling data to the profile.
// The data must be a whole number of records,
// as delivered by the runtime.
func (b *profileBuilder) addCPUData(data []uint64, tags []unsafe.Pointer) error {
if !b.havePeriod {
// first record is period
if len(data) < 3 {
return fmt.Errorf("truncated profile")
}
if data[0] != 3 || data[2] == 0 {
return fmt.Errorf("malformed profile")
}
// data[2] is sampling rate in Hz. Convert to sampling
// period in nanoseconds.
b.period = 1e9 / int64(data[2])
b.havePeriod = true
data = data[3:]
}
// Parse CPU samples from the profile.
// Each sample is 3+n uint64s:
// data[0] = 3+n
// data[1] = time stamp (ignored)
// data[2] = count
// data[3:3+n] = stack
// If the count is 0 and the stack has length 1,
// that's an overflow record inserted by the runtime
// to indicate that stack[0] samples were lost.
// Otherwise the count is usually 1,
// but in a few special cases like lost non-Go samples
// there can be larger counts.
// Because many samples with the same stack arrive,
// we want to deduplicate immediately, which we do
// using the b.m profMap.
for len(data) > 0 {
if len(data) < 3 || data[0] > uint64(len(data)) {
return fmt.Errorf("truncated profile")
}
if data[0] < 3 || tags != nil && len(tags) < 1 {
return fmt.Errorf("malformed profile")
}
count := data[2]
stk := data[3:data[0]]
data = data[data[0]:]
var tag unsafe.Pointer
if tags != nil {
tag = tags[0]
tags = tags[1:]
}
if count == 0 && len(stk) == 1 {
// overflow record
count = uint64(stk[0])
stk = []uint64{
// gentraceback guarantees that PCs in the
// stack can be unconditionally decremented and
// still be valid, so we must do the same.
uint64(funcPC(lostProfileEvent) + 1),
}
}
b.m.lookup(stk, tag).count += int64(count)
}
return nil
}
// build completes and returns the constructed profile.
func (b *profileBuilder) build() {
b.end = time.Now()
b.pb.int64Opt(tagProfile_TimeNanos, b.start.UnixNano())
if b.havePeriod { // must be CPU profile
b.pbValueType(tagProfile_SampleType, "samples", "count")
b.pbValueType(tagProfile_SampleType, "cpu", "nanoseconds")
b.pb.int64Opt(tagProfile_DurationNanos, b.end.Sub(b.start).Nanoseconds())
b.pbValueType(tagProfile_PeriodType, "cpu", "nanoseconds")
b.pb.int64Opt(tagProfile_Period, b.period)
}
values := []int64{0, 0}
var locs []uint64
for e := b.m.all; e != nil; e = e.nextAll {
values[0] = e.count
values[1] = e.count * b.period
var labels func()
if e.tag != nil {
labels = func() {
for k, v := range *(*labelMap)(e.tag) {
b.pbLabel(tagSample_Label, k, v, 0)
}
}
}
locs = b.appendLocsForStack(locs[:0], e.stk)
b.pbSample(values, locs, labels)
}
for i, m := range b.mem {
hasFunctions := m.funcs == lookupTried // lookupTried but not lookupFailed
b.pbMapping(tagProfile_Mapping, uint64(i+1), uint64(m.start), uint64(m.end), m.offset, m.file, m.buildID, hasFunctions)
}
// TODO: Anything for tagProfile_DropFrames?
// TODO: Anything for tagProfile_KeepFrames?
b.pb.strings(tagProfile_StringTable, b.strings)
b.zw.Write(b.pb.data)
b.zw.Close()
}
// appendLocsForStack appends the location IDs for the given stack trace to the given
// location ID slice, locs. The addresses in the stack are return PCs or 1 + the PC of
// an inline marker as the runtime traceback function returns.
//
// It may emit to b.pb, so there must be no message encoding in progress.
func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLocs []uint64) {
b.deck.reset()
// The last frame might be truncated. Recover lost inline frames.
stk = runtime_expandFinalInlineFrame(stk)
for len(stk) > 0 {
addr := stk[0]
if l, ok := b.locs[addr]; ok {
// first record the location if there is any pending accumulated info.
if id := b.emitLocation(); id > 0 {
locs = append(locs, id)
}
// then, record the cached location.
locs = append(locs, l.id)
// Skip the matching pcs.
//
// Even if stk was truncated due to the stack depth
// limit, expandFinalInlineFrame above has already
// fixed the truncation, ensuring it is long enough.
stk = stk[len(l.pcs):]
continue
}
frames, symbolizeResult := allFrames(addr)
if len(frames) == 0 { // runtime.goexit.
if id := b.emitLocation(); id > 0 {
locs = append(locs, id)
}
stk = stk[1:]
continue
}
if added := b.deck.tryAdd(addr, frames, symbolizeResult); added {
stk = stk[1:]
continue
}
// add failed because this addr is not inlined with the
// existing PCs in the deck. Flush the deck and retry handling
// this pc.
if id := b.emitLocation(); id > 0 {
locs = append(locs, id)
}
// check cache again - previous emitLocation added a new entry
if l, ok := b.locs[addr]; ok {
locs = append(locs, l.id)
stk = stk[len(l.pcs):] // skip the matching pcs.
} else {
b.deck.tryAdd(addr, frames, symbolizeResult) // must succeed.
stk = stk[1:]
}
}
if id := b.emitLocation(); id > 0 { // emit remaining location.
locs = append(locs, id)
}
return locs
}
// pcDeck is a helper to detect a sequence of inlined functions from
// a stack trace returned by the runtime.
//
// The stack traces returned by runtime's trackback functions are fully
// expanded (at least for Go functions) and include the fake pcs representing
// inlined functions. The profile proto expects the inlined functions to be
// encoded in one Location message.
// https://github.com/google/pprof/blob/5e965273ee43930341d897407202dd5e10e952cb/proto/profile.proto#L177-L184
//
// Runtime does not directly expose whether a frame is for an inlined function
// and looking up debug info is not ideal, so we use a heuristic to filter
// the fake pcs and restore the inlined and entry functions. Inlined functions
// have the following properties:
// Frame's Func is nil (note: also true for non-Go functions), and
// Frame's Entry matches its entry function frame's Entry (note: could also be true for recursive calls and non-Go functions), and
// Frame's Name does not match its entry function frame's name (note: inlined functions cannot be directly recursive).
//
// As reading and processing the pcs in a stack trace one by one (from leaf to the root),
// we use pcDeck to temporarily hold the observed pcs and their expanded frames
// until we observe the entry function frame.
type pcDeck struct {
pcs []uintptr
frames []runtime.Frame
symbolizeResult symbolizeFlag
}
func (d *pcDeck) reset() {
d.pcs = d.pcs[:0]
d.frames = d.frames[:0]
d.symbolizeResult = 0
}
// tryAdd tries to add the pc and Frames expanded from it (most likely one,
// since the stack trace is already fully expanded) and the symbolizeResult
// to the deck. If it fails the caller needs to flush the deck and retry.
func (d *pcDeck) tryAdd(pc uintptr, frames []runtime.Frame, symbolizeResult symbolizeFlag) (success bool) {
if existing := len(d.pcs); existing > 0 {
// 'd.frames' are all expanded from one 'pc' and represent all
// inlined functions so we check only the last one.
newFrame := frames[0]
last := d.frames[existing-1]
if last.Func != nil { // the last frame can't be inlined. Flush.
return false
}
if last.Entry == 0 || newFrame.Entry == 0 { // Possibly not a Go function. Don't try to merge.
return false
}
if last.Entry != newFrame.Entry { // newFrame is for a different function.
return false
}
if last.Function == newFrame.Function { // maybe recursion.
return false
}
}
d.pcs = append(d.pcs, pc)
d.frames = append(d.frames, frames...)
d.symbolizeResult |= symbolizeResult
return true
}
// emitLocation emits the new location and function information recorded in the deck
// and returns the location ID encoded in the profile protobuf.
// It emits to b.pb, so there must be no message encoding in progress.
// It resets the deck.
func (b *profileBuilder) emitLocation() uint64 {
if len(b.deck.pcs) == 0 {
return 0
}
defer b.deck.reset()
addr := b.deck.pcs[0]
firstFrame := b.deck.frames[0]
// We can't write out functions while in the middle of the
// Location message, so record new functions we encounter and
// write them out after the Location.
type newFunc struct {
id uint64
name, file string
}
newFuncs := make([]newFunc, 0, 8)
id := uint64(len(b.locs)) + 1
b.locs[addr] = locInfo{id: id, pcs: append([]uintptr{}, b.deck.pcs...)}
start := b.pb.startMessage()
b.pb.uint64Opt(tagLocation_ID, id)
b.pb.uint64Opt(tagLocation_Address, uint64(firstFrame.PC))
for _, frame := range b.deck.frames {
// Write out each line in frame expansion.
funcID := uint64(b.funcs[frame.Function])
if funcID == 0 {
funcID = uint64(len(b.funcs)) + 1
b.funcs[frame.Function] = int(funcID)
newFuncs = append(newFuncs, newFunc{funcID, frame.Function, frame.File})
}
b.pbLine(tagLocation_Line, funcID, int64(frame.Line))
}
for i := range b.mem {
if b.mem[i].start <= addr && addr < b.mem[i].end || b.mem[i].fake {
b.pb.uint64Opt(tagLocation_MappingID, uint64(i+1))
m := b.mem[i]
m.funcs |= b.deck.symbolizeResult
b.mem[i] = m
break
}
}
b.pb.endMessage(tagProfile_Location, start)
// Write out functions we found during frame expansion.
for _, fn := range newFuncs {
start := b.pb.startMessage()
b.pb.uint64Opt(tagFunction_ID, fn.id)
b.pb.int64Opt(tagFunction_Name, b.stringIndex(fn.name))
b.pb.int64Opt(tagFunction_SystemName, b.stringIndex(fn.name))
b.pb.int64Opt(tagFunction_Filename, b.stringIndex(fn.file))
b.pb.endMessage(tagProfile_Function, start)
}
b.flush()
return id
}
// readMapping reads /proc/self/maps and writes mappings to b.pb.
// It saves the address ranges of the mappings in b.mem for use
// when emitting locations.
func (b *profileBuilder) readMapping() {
data, _ := os.ReadFile("/proc/self/maps")
parseProcSelfMaps(data, b.addMapping)
if len(b.mem) == 0 { // pprof expects a map entry, so fake one.
b.addMappingEntry(0, 0, 0, "", "", true)
// TODO(hyangah): make addMapping return *memMap or
// take a memMap struct, and get rid of addMappingEntry
// that takes a bunch of positional arguments.
}
}
func parseProcSelfMaps(data []byte, addMapping func(lo, hi, offset uint64, file, buildID string)) {
// $ cat /proc/self/maps
// 00400000-0040b000 r-xp 00000000 fc:01 787766 /bin/cat
// 0060a000-0060b000 r--p 0000a000 fc:01 787766 /bin/cat
// 0060b000-0060c000 rw-p 0000b000 fc:01 787766 /bin/cat
// 014ab000-014cc000 rw-p 00000000 00:00 0 [heap]
// 7f7d76af8000-7f7d7797c000 r--p 00000000 fc:01 1318064 /usr/lib/locale/locale-archive
// 7f7d7797c000-7f7d77b36000 r-xp 00000000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so
// 7f7d77b36000-7f7d77d36000 ---p 001ba000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so
// 7f7d77d36000-7f7d77d3a000 r--p 001ba000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so
// 7f7d77d3a000-7f7d77d3c000 rw-p 001be000 fc:01 1180226 /lib/x86_64-linux-gnu/libc-2.19.so
// 7f7d77d3c000-7f7d77d41000 rw-p 00000000 00:00 0
// 7f7d77d41000-7f7d77d64000 r-xp 00000000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so
// 7f7d77f3f000-7f7d77f42000 rw-p 00000000 00:00 0
// 7f7d77f61000-7f7d77f63000 rw-p 00000000 00:00 0
// 7f7d77f63000-7f7d77f64000 r--p 00022000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so
// 7f7d77f64000-7f7d77f65000 rw-p 00023000 fc:01 1180217 /lib/x86_64-linux-gnu/ld-2.19.so
// 7f7d77f65000-7f7d77f66000 rw-p 00000000 00:00 0
// 7ffc342a2000-7ffc342c3000 rw-p 00000000 00:00 0 [stack]
// 7ffc34343000-7ffc34345000 r-xp 00000000 00:00 0 [vdso]
// ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]
var line []byte
// next removes and returns the next field in the line.
// It also removes from line any spaces following the field.
next := func() []byte {
j := bytes.IndexByte(line, ' ')
if j < 0 {
f := line
line = nil
return f
}
f := line[:j]
line = line[j+1:]
for len(line) > 0 && line[0] == ' ' {
line = line[1:]
}
return f
}
for len(data) > 0 {
i := bytes.IndexByte(data, '\n')
if i < 0 {
line, data = data, nil
} else {
line, data = data[:i], data[i+1:]
}
addr := next()
i = bytes.IndexByte(addr, '-')
if i < 0 {
continue
}
lo, err := strconv.ParseUint(string(addr[:i]), 16, 64)
if err != nil {
continue
}
hi, err := strconv.ParseUint(string(addr[i+1:]), 16, 64)
if err != nil {
continue
}
perm := next()
if len(perm) < 4 || perm[2] != 'x' {
// Only interested in executable mappings.
continue
}
offset, err := strconv.ParseUint(string(next()), 16, 64)
if err != nil {
continue
}
next() // dev
inode := next() // inode
if line == nil {
continue
}
file := string(line)
// Trim deleted file marker.
deletedStr := " (deleted)"
deletedLen := len(deletedStr)
if len(file) >= deletedLen && file[len(file)-deletedLen:] == deletedStr {
file = file[:len(file)-deletedLen]
}
if len(inode) == 1 && inode[0] == '0' && file == "" {
// Huge-page text mappings list the initial fragment of
// mapped but unpopulated memory as being inode 0.
// Don't report that part.
// But [vdso] and [vsyscall] are inode 0, so let non-empty file names through.
continue
}
// TODO: pprof's remapMappingIDs makes two adjustments:
// 1. If there is an /anon_hugepage mapping first and it is
// consecutive to a next mapping, drop the /anon_hugepage.
// 2. If start-offset = 0x400000, change start to 0x400000 and offset to 0.
// There's no indication why either of these is needed.
// Let's try not doing these and see what breaks.
// If we do need them, they would go here, before we
// enter the mappings into b.mem in the first place.
buildID, _ := elfBuildID(file)
addMapping(lo, hi, offset, file, buildID)
}
}
func (b *profileBuilder) addMapping(lo, hi, offset uint64, file, buildID string) {
b.addMappingEntry(lo, hi, offset, file, buildID, false)
}
func (b *profileBuilder) addMappingEntry(lo, hi, offset uint64, file, buildID string, fake bool) {
b.mem = append(b.mem, memMap{
start: uintptr(lo),
end: uintptr(hi),
offset: offset,
file: file,
buildID: buildID,
fake: fake,
})
}