blob: 7d377db54c39b7678e5c122eace058cb9699a9ae [file] [log] [blame]
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Annotate Crefs in Prog with C types by parsing gcc debug output.
// Conversion of debug output to Go types.
package main
import (
"bytes";
"debug/dwarf";
"debug/elf";
"debug/macho";
"fmt";
"go/ast";
"go/token";
"os";
"strconv";
"strings";
)
func (p *Prog) loadDebugInfo() {
// Construct a slice of unique names from p.Crefs.
m := make(map[string]int);
for _, c := range p.Crefs {
m[c.Name] = -1
}
names := make([]string, 0, len(m));
for name, _ := range m {
i := len(names);
names = names[0 : i+1];
names[i] = name;
m[name] = i;
}
// Coerce gcc into telling us whether each name is
// a type, a value, or undeclared. We compile a function
// containing the line:
// name;
// If name is a type, gcc will print:
// x.c:2: warning: useless type name in empty declaration
// If name is a value, gcc will print
// x.c:2: warning: statement with no effect
// If name is undeclared, gcc will print
// x.c:2: error: 'name' undeclared (first use in this function)
// A line number directive causes the line number to
// correspond to the index in the names array.
var b bytes.Buffer;
b.WriteString(p.Preamble);
b.WriteString("void f(void) {\n");
b.WriteString("#line 0 \"cgo-test\"\n");
for _, n := range names {
b.WriteString(n);
b.WriteString(";\n");
}
b.WriteString("}\n");
kind := make(map[string]string);
_, stderr := p.gccDebug(b.Bytes());
if stderr == "" {
fatal("gcc produced no output")
}
for _, line := range strings.Split(stderr, "\n", 0) {
if len(line) < 9 || line[0:9] != "cgo-test:" {
continue
}
line = line[9:];
colon := strings.Index(line, ":");
if colon < 0 {
continue
}
i, err := strconv.Atoi(line[0:colon]);
if err != nil {
continue
}
what := "";
switch {
default:
continue
case strings.Index(line, ": useless type name in empty declaration") >= 0:
what = "type"
case strings.Index(line, ": statement with no effect") >= 0:
what = "value"
case strings.Index(line, "undeclared") >= 0:
what = "error"
}
if old, ok := kind[names[i]]; ok && old != what {
error(noPos, "inconsistent gcc output about C.%s", names[i])
}
kind[names[i]] = what;
}
for _, n := range names {
if _, ok := kind[n]; !ok {
error(noPos, "could not determine kind of name for C.%s", n)
}
}
if nerrors > 0 {
fatal("failed to interpret gcc output:\n%s", stderr)
}
// Extract the types from the DWARF section of an object
// from a well-formed C program. Gcc only generates DWARF info
// for symbols in the object file, so it is not enough to print the
// preamble and hope the symbols we care about will be there.
// Instead, emit
// typeof(names[i]) *__cgo__i;
// for each entry in names and then dereference the type we
// learn for __cgo__i.
b.Reset();
b.WriteString(p.Preamble);
for i, n := range names {
fmt.Fprintf(&b, "typeof(%s) *__cgo__%d;\n", n, i)
}
d, stderr := p.gccDebug(b.Bytes());
if d == nil {
fatal("gcc failed:\n%s\non input:\n%s", stderr, b.Bytes())
}
// Scan DWARF info for top-level TagVariable entries with AttrName __cgo__i.
types := make([]dwarf.Type, len(names));
r := d.Reader();
for {
e, err := r.Next();
if err != nil {
fatal("reading DWARF entry: %s", err)
}
if e == nil {
break
}
if e.Tag != dwarf.TagVariable {
goto Continue
}
name, _ := e.Val(dwarf.AttrName).(string);
typOff, _ := e.Val(dwarf.AttrType).(dwarf.Offset);
if name == "" || typOff == 0 {
fatal("malformed DWARF TagVariable entry")
}
if !strings.HasPrefix(name, "__cgo__") {
goto Continue
}
typ, err := d.Type(typOff);
if err != nil {
fatal("loading DWARF type: %s", err)
}
t, ok := typ.(*dwarf.PtrType);
if !ok || t == nil {
fatal("internal error: %s has non-pointer type", name)
}
i, err := strconv.Atoi(name[7:]);
if err != nil {
fatal("malformed __cgo__ name: %s", name)
}
types[i] = t.Type;
Continue:
if e.Tag != dwarf.TagCompileUnit {
r.SkipChildren()
}
}
// Record types and typedef information in Crefs.
var conv typeConv;
conv.Init(p.PtrSize);
for _, c := range p.Crefs {
i := m[c.Name];
c.TypeName = kind[c.Name] == "type";
f, fok := types[i].(*dwarf.FuncType);
if c.Context == "call" && !c.TypeName && fok {
c.FuncType = conv.FuncType(f)
} else {
c.Type = conv.Type(types[i])
}
}
p.Typedef = conv.typedef;
}
func concat(a, b []string) []string {
c := make([]string, len(a)+len(b));
for i, s := range a {
c[i] = s
}
for i, s := range b {
c[i+len(a)] = s
}
return c;
}
// gccDebug runs gcc -gdwarf-2 over the C program stdin and
// returns the corresponding DWARF data and any messages
// printed to standard error.
func (p *Prog) gccDebug(stdin []byte) (*dwarf.Data, string) {
machine := "-m32";
if p.PtrSize == 8 {
machine = "-m64"
}
tmp := "_cgo_.o";
base := []string{
"gcc",
machine,
"-Wall", // many warnings
"-Werror", // warnings are errors
"-o" + tmp, // write object to tmp
"-gdwarf-2", // generate DWARF v2 debugging symbols
"-c", // do not link
"-xc", // input language is C
"-", // read input from standard input
};
_, stderr, ok := run(stdin, concat(base, p.GccOptions));
if !ok {
return nil, string(stderr)
}
// Try to parse f as ELF and Mach-O and hope one works.
var f interface {
DWARF() (*dwarf.Data, os.Error);
}
var err os.Error;
if f, err = elf.Open(tmp); err != nil {
if f, err = macho.Open(tmp); err != nil {
fatal("cannot parse gcc output %s as ELF or Mach-O object", tmp)
}
}
d, err := f.DWARF();
if err != nil {
fatal("cannot load DWARF debug information from %s: %s", tmp, err)
}
return d, "";
}
// A typeConv is a translator from dwarf types to Go types
// with equivalent memory layout.
type typeConv struct {
// Cache of already-translated or in-progress types.
m map[dwarf.Type]*Type;
typedef map[string]ast.Expr;
// Predeclared types.
byte ast.Expr; // denotes padding
int8, int16, int32, int64 ast.Expr;
uint8, uint16, uint32, uint64, uintptr ast.Expr;
float32, float64 ast.Expr;
void ast.Expr;
unsafePointer ast.Expr;
string ast.Expr;
ptrSize int64;
tagGen int;
}
func (c *typeConv) Init(ptrSize int64) {
c.ptrSize = ptrSize;
c.m = make(map[dwarf.Type]*Type);
c.typedef = make(map[string]ast.Expr);
c.byte = c.Ident("byte");
c.int8 = c.Ident("int8");
c.int16 = c.Ident("int16");
c.int32 = c.Ident("int32");
c.int64 = c.Ident("int64");
c.uint8 = c.Ident("uint8");
c.uint16 = c.Ident("uint16");
c.uint32 = c.Ident("uint32");
c.uint64 = c.Ident("uint64");
c.uintptr = c.Ident("uintptr");
c.float32 = c.Ident("float32");
c.float64 = c.Ident("float64");
c.unsafePointer = c.Ident("unsafe.Pointer");
c.void = c.Ident("void");
c.string = c.Ident("string");
}
// base strips away qualifiers and typedefs to get the underlying type
func base(dt dwarf.Type) dwarf.Type {
for {
if d, ok := dt.(*dwarf.QualType); ok {
dt = d.Type;
continue;
}
if d, ok := dt.(*dwarf.TypedefType); ok {
dt = d.Type;
continue;
}
break;
}
return dt;
}
// Map from dwarf text names to aliases we use in package "C".
var cnameMap = map[string]string{
"long int": "long",
"long unsigned int": "ulong",
"unsigned int": "uint",
"short unsigned int": "ushort",
"short int": "short",
"long long int": "longlong",
"long long unsigned int": "ulonglong",
"signed char": "schar",
}
// Type returns a *Type with the same memory layout as
// dtype when used as the type of a variable or a struct field.
func (c *typeConv) Type(dtype dwarf.Type) *Type {
if t, ok := c.m[dtype]; ok {
if t.Go == nil {
fatal("type conversion loop at %s", dtype)
}
return t;
}
t := new(Type);
t.Size = dtype.Size();
t.Align = -1;
t.C = dtype.Common().Name;
c.m[dtype] = t;
if t.Size < 0 {
// Unsized types are [0]byte
t.Size = 0;
t.Go = c.Opaque(0);
if t.C == "" {
t.C = "void"
}
return t;
}
switch dt := dtype.(type) {
default:
fatal("unexpected type: %s", dtype)
case *dwarf.AddrType:
if t.Size != c.ptrSize {
fatal("unexpected: %d-byte address type - %s", t.Size, dtype)
}
t.Go = c.uintptr;
t.Align = t.Size;
case *dwarf.ArrayType:
if dt.StrideBitSize > 0 {
// Cannot represent bit-sized elements in Go.
t.Go = c.Opaque(t.Size);
break;
}
gt := &ast.ArrayType{
Len: c.intExpr(dt.Count),
};
t.Go = gt; // publish before recursive call
sub := c.Type(dt.Type);
t.Align = sub.Align;
gt.Elt = sub.Go;
t.C = fmt.Sprintf("typeof(%s[%d])", sub.C, dt.Count);
case *dwarf.CharType:
if t.Size != 1 {
fatal("unexpected: %d-byte char type - %s", t.Size, dtype)
}
t.Go = c.int8;
t.Align = 1;
case *dwarf.EnumType:
switch t.Size {
default:
fatal("unexpected: %d-byte enum type - %s", t.Size, dtype)
case 1:
t.Go = c.uint8
case 2:
t.Go = c.uint16
case 4:
t.Go = c.uint32
case 8:
t.Go = c.uint64
}
if t.Align = t.Size; t.Align >= c.ptrSize {
t.Align = c.ptrSize
}
t.C = "enum " + dt.EnumName;
case *dwarf.FloatType:
switch t.Size {
default:
fatal("unexpected: %d-byte float type - %s", t.Size, dtype)
case 4:
t.Go = c.float32
case 8:
t.Go = c.float64
}
if t.Align = t.Size; t.Align >= c.ptrSize {
t.Align = c.ptrSize
}
case *dwarf.FuncType:
// No attempt at translation: would enable calls
// directly between worlds, but we need to moderate those.
t.Go = c.uintptr;
t.Align = c.ptrSize;
case *dwarf.IntType:
if dt.BitSize > 0 {
fatal("unexpected: %d-bit int type - %s", dt.BitSize, dtype)
}
switch t.Size {
default:
fatal("unexpected: %d-byte int type - %s", t.Size, dtype)
case 1:
t.Go = c.int8
case 2:
t.Go = c.int16
case 4:
t.Go = c.int32
case 8:
t.Go = c.int64
}
if t.Align = t.Size; t.Align >= c.ptrSize {
t.Align = c.ptrSize
}
case *dwarf.PtrType:
t.Align = c.ptrSize;
// Translate void* as unsafe.Pointer
if _, ok := base(dt.Type).(*dwarf.VoidType); ok {
t.Go = c.unsafePointer;
t.C = "void*";
break;
}
gt := &ast.StarExpr{};
t.Go = gt; // publish before recursive call
sub := c.Type(dt.Type);
gt.X = sub.Go;
t.C = sub.C + "*";
case *dwarf.QualType:
// Ignore qualifier.
t = c.Type(dt.Type);
c.m[dtype] = t;
return t;
case *dwarf.StructType:
// Convert to Go struct, being careful about alignment.
// Have to give it a name to simulate C "struct foo" references.
tag := dt.StructName;
if tag == "" {
tag = "__" + strconv.Itoa(c.tagGen);
c.tagGen++;
} else if t.C == "" {
t.C = dt.Kind + " " + tag
}
name := c.Ident("_C" + dt.Kind + "_" + tag);
t.Go = name; // publish before recursive calls
switch dt.Kind {
case "union", "class":
c.typedef[name.Value] = c.Opaque(t.Size);
if t.C == "" {
t.C = fmt.Sprintf("typeof(unsigned char[%d])", t.Size)
}
case "struct":
g, csyntax, align := c.Struct(dt);
if t.C == "" {
t.C = csyntax
}
t.Align = align;
c.typedef[name.Value] = g;
}
case *dwarf.TypedefType:
// Record typedef for printing.
if dt.Name == "_GoString_" {
// Special C name for Go string type.
// Knows string layout used by compilers: pointer plus length,
// which rounds up to 2 pointers after alignment.
t.Go = c.string;
t.Size = c.ptrSize * 2;
t.Align = c.ptrSize;
break;
}
name := c.Ident("_C_" + dt.Name);
t.Go = name; // publish before recursive call
sub := c.Type(dt.Type);
t.Size = sub.Size;
t.Align = sub.Align;
if _, ok := c.typedef[name.Value]; !ok {
c.typedef[name.Value] = sub.Go
}
case *dwarf.UcharType:
if t.Size != 1 {
fatal("unexpected: %d-byte uchar type - %s", t.Size, dtype)
}
t.Go = c.uint8;
t.Align = 1;
case *dwarf.UintType:
if dt.BitSize > 0 {
fatal("unexpected: %d-bit uint type - %s", dt.BitSize, dtype)
}
switch t.Size {
default:
fatal("unexpected: %d-byte uint type - %s", t.Size, dtype)
case 1:
t.Go = c.uint8
case 2:
t.Go = c.uint16
case 4:
t.Go = c.uint32
case 8:
t.Go = c.uint64
}
if t.Align = t.Size; t.Align >= c.ptrSize {
t.Align = c.ptrSize
}
case *dwarf.VoidType:
t.Go = c.void;
t.C = "void";
}
switch dtype.(type) {
case *dwarf.AddrType, *dwarf.CharType, *dwarf.IntType, *dwarf.FloatType, *dwarf.UcharType, *dwarf.UintType:
s := dtype.Common().Name;
if s != "" {
if ss, ok := cnameMap[s]; ok {
s = ss
}
s = strings.Join(strings.Split(s, " ", 0), ""); // strip spaces
name := c.Ident("_C_" + s);
c.typedef[name.Value] = t.Go;
t.Go = name;
}
}
if t.C == "" {
fatal("internal error: did not create C name for %s", dtype)
}
return t;
}
// FuncArg returns a Go type with the same memory layout as
// dtype when used as the type of a C function argument.
func (c *typeConv) FuncArg(dtype dwarf.Type) *Type {
t := c.Type(dtype);
switch dt := dtype.(type) {
case *dwarf.ArrayType:
// Arrays are passed implicitly as pointers in C.
// In Go, we must be explicit.
return &Type{
Size: c.ptrSize,
Align: c.ptrSize,
Go: &ast.StarExpr{X: t.Go},
C: t.C + "*",
}
case *dwarf.TypedefType:
// C has much more relaxed rules than Go for
// implicit type conversions. When the parameter
// is type T defined as *X, simulate a little of the
// laxness of C by making the argument *X instead of T.
if ptr, ok := base(dt.Type).(*dwarf.PtrType); ok {
// Unless the typedef happens to point to void* since
// Go has special rules around using unsafe.Pointer.
if _, void := base(ptr.Type).(*dwarf.VoidType); !void {
return c.Type(ptr)
}
}
}
return t;
}
// FuncType returns the Go type analogous to dtype.
// There is no guarantee about matching memory layout.
func (c *typeConv) FuncType(dtype *dwarf.FuncType) *FuncType {
p := make([]*Type, len(dtype.ParamType));
gp := make([]*ast.Field, len(dtype.ParamType));
for i, f := range dtype.ParamType {
// gcc's DWARF generator outputs a single DotDotDotType parameter for
// function pointers that specify no parameters (e.g. void
// (*__cgo_0)()). Treat this special case as void. This case is
// invalid according to ISO C anyway (i.e. void (*__cgo_1)(...) is not
// legal).
if _, ok := f.(*dwarf.DotDotDotType); ok && i == 0 {
p, gp = nil, nil;
break;
}
p[i] = c.FuncArg(f);
gp[i] = &ast.Field{Type: p[i].Go};
}
var r *Type;
var gr []*ast.Field;
if _, ok := dtype.ReturnType.(*dwarf.VoidType); !ok && dtype.ReturnType != nil {
r = c.Type(dtype.ReturnType);
gr = []*ast.Field{&ast.Field{Type: r.Go}};
}
return &FuncType{
Params: p,
Result: r,
Go: &ast.FuncType{
Params: gp,
Results: gr,
},
};
}
// Identifier
func (c *typeConv) Ident(s string) *ast.Ident { return &ast.Ident{Value: s} }
// Opaque type of n bytes.
func (c *typeConv) Opaque(n int64) ast.Expr {
return &ast.ArrayType{
Len: c.intExpr(n),
Elt: c.byte,
}
}
// Expr for integer n.
func (c *typeConv) intExpr(n int64) ast.Expr {
return &ast.BasicLit{
Kind: token.INT,
Value: strings.Bytes(strconv.Itoa64(n)),
}
}
// Add padding of given size to fld.
func (c *typeConv) pad(fld []*ast.Field, size int64) []*ast.Field {
n := len(fld);
fld = fld[0 : n+1];
fld[n] = &ast.Field{Names: []*ast.Ident{c.Ident("_")}, Type: c.Opaque(size)};
return fld;
}
// Struct conversion
func (c *typeConv) Struct(dt *dwarf.StructType) (expr *ast.StructType, csyntax string, align int64) {
csyntax = "struct { ";
fld := make([]*ast.Field, 0, 2*len(dt.Field)+1); // enough for padding around every field
off := int64(0);
// Mangle struct fields that happen to be named Go keywords into
// _{keyword}. Create a map from C ident -> Go ident. The Go ident will
// be mangled. Any existing identifier that already has the same name on
// the C-side will cause the Go-mangled version to be prefixed with _.
// (e.g. in a struct with fields '_type' and 'type', the latter would be
// rendered as '__type' in Go).
ident := make(map[string]string);
used := make(map[string]bool);
for _, f := range dt.Field {
ident[f.Name] = f.Name;
used[f.Name] = true;
}
for cid, goid := range ident {
if token.Lookup(strings.Bytes(goid)).IsKeyword() {
// Avoid keyword
goid = "_" + goid;
// Also avoid existing fields
for _, exist := used[goid]; exist; _, exist = used[goid] {
goid = "_" + goid
}
used[goid] = true;
ident[cid] = goid;
}
}
for _, f := range dt.Field {
if f.ByteOffset > off {
fld = c.pad(fld, f.ByteOffset-off);
off = f.ByteOffset;
}
t := c.Type(f.Type);
n := len(fld);
fld = fld[0 : n+1];
fld[n] = &ast.Field{Names: []*ast.Ident{c.Ident(ident[f.Name])}, Type: t.Go};
off += t.Size;
csyntax += t.C + " " + f.Name + "; ";
if t.Align > align {
align = t.Align
}
}
if off < dt.ByteSize {
fld = c.pad(fld, dt.ByteSize-off);
off = dt.ByteSize;
}
if off != dt.ByteSize {
fatal("struct size calculation error")
}
csyntax += "}";
expr = &ast.StructType{Fields: fld};
return;
}