blob: 610f87954efb1afac36610253b340fc4183b751a [file] [log] [blame]
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Writing and reading of Go object files.
//
// Originally, Go object files were Plan 9 object files, but no longer.
// Now they are more like standard object files, in that each symbol is defined
// by an associated memory image (bytes) and a list of relocations to apply
// during linking. We do not (yet?) use a standard file format, however.
// For now, the format is chosen to be as simple as possible to read and write.
// It may change for reasons of efficiency, or we may even switch to a
// standard file format if there are compelling benefits to doing so.
// See golang.org/s/go13linker for more background.
//
// The file format is:
//
// - magic header: "\x00\x00go13ld"
// - byte 1 - version number
// - sequence of strings giving dependencies (imported packages)
// - empty string (marks end of sequence)
// - sequence of defined symbols
// - byte 0xff (marks end of sequence)
// - magic footer: "\xff\xffgo13ld"
//
// All integers are stored in a zigzag varint format.
// See golang.org/s/go12symtab for a definition.
//
// Data blocks and strings are both stored as an integer
// followed by that many bytes.
//
// A symbol reference is a string name followed by a version.
// An empty name corresponds to a nil LSym* pointer.
//
// Each symbol is laid out as the following fields (taken from LSym*):
//
// - byte 0xfe (sanity check for synchronization)
// - type [int]
// - name [string]
// - version [int]
// - dupok [int]
// - size [int]
// - gotype [symbol reference]
// - p [data block]
// - nr [int]
// - r [nr relocations, sorted by off]
//
// If type == STEXT, there are a few more fields:
//
// - args [int]
// - locals [int]
// - nosplit [int]
// - leaf [int]
// - nlocal [int]
// - local [nlocal automatics]
// - pcln [pcln table]
//
// Each relocation has the encoding:
//
// - off [int]
// - siz [int]
// - type [int]
// - add [int]
// - xadd [int]
// - sym [symbol reference]
// - xsym [symbol reference]
//
// Each local has the encoding:
//
// - asym [symbol reference]
// - offset [int]
// - type [int]
// - gotype [symbol reference]
//
// The pcln table has the encoding:
//
// - pcsp [data block]
// - pcfile [data block]
// - pcline [data block]
// - npcdata [int]
// - pcdata [npcdata data blocks]
// - nfuncdata [int]
// - funcdata [nfuncdata symbol references]
// - funcdatasym [nfuncdata ints]
// - nfile [int]
// - file [nfile symbol references]
//
// The file layout and meaning of type integers are architecture-independent.
//
// TODO(rsc): The file format is good for a first pass but needs work.
// - There are SymID in the object file that should really just be strings.
// - The actual symbol memory images are interlaced with the symbol
// metadata. They should be separated, to reduce the I/O required to
// load just the metadata.
// - The symbol references should be shortened, either with a symbol
// table or by using a simple backward index to an earlier mentioned symbol.
#include <u.h>
#include <libc.h>
#include <bio.h>
#include <link.h>
#include "../cmd/ld/textflag.h"
static void writesym(Link*, Biobuf*, LSym*);
static void wrint(Biobuf*, int64);
static void wrstring(Biobuf*, char*);
static void wrpath(Link *, Biobuf*, char*);
static void wrdata(Biobuf*, void*, int);
static void wrsym(Biobuf*, LSym*);
static void wrpathsym(Link *ctxt, Biobuf *b, LSym *s);
static void readsym(Link*, Biobuf*, char*, char*);
static int64 rdint(Biobuf*);
static char *rdstring(Biobuf*);
static void rddata(Biobuf*, uchar**, int*);
static LSym *rdsym(Link*, Biobuf*, char*);
// The Go and C compilers, and the assembler, call writeobj to write
// out a Go object file. The linker does not call this; the linker
// does not write out object files.
void
writeobj(Link *ctxt, Biobuf *b)
{
int flag;
Hist *h;
LSym *s, *text, *etext, *curtext, *data, *edata;
Plist *pl;
Prog *p, *plink;
Auto *a;
// Build list of symbols, and assign instructions to lists.
// Ignore ctxt->plist boundaries. There are no guarantees there,
// and the C compilers and assemblers just use one big list.
text = nil;
curtext = nil;
data = nil;
etext = nil;
edata = nil;
for(pl = ctxt->plist; pl != nil; pl = pl->link) {
for(p = pl->firstpc; p != nil; p = plink) {
plink = p->link;
p->link = nil;
if(p->as == ctxt->arch->AEND)
continue;
if(p->as == ctxt->arch->ATYPE) {
// Assume each TYPE instruction describes
// a different local variable or parameter,
// so no dedup.
// Using only the TYPE instructions means
// that we discard location information about local variables
// in C and assembly functions; that information is inferred
// from ordinary references, because there are no TYPE
// instructions there. Without the type information, gdb can't
// use the locations, so we don't bother to save them.
// If something else could use them, we could arrange to
// preserve them.
if(curtext == nil)
continue;
a = emallocz(sizeof *a);
a->asym = p->from.sym;
a->aoffset = p->from.offset;
a->type = ctxt->arch->symtype(&p->from);
a->gotype = p->from.gotype;
a->link = curtext->autom;
curtext->autom = a;
continue;
}
if(p->as == ctxt->arch->AGLOBL) {
s = p->from.sym;
if(s->seenglobl++)
print("duplicate %P\n", p);
if(s->onlist)
sysfatal("symbol %s listed multiple times", s->name);
s->onlist = 1;
if(data == nil)
data = s;
else
edata->next = s;
s->next = nil;
s->size = p->to.offset;
if(s->type == 0 || s->type == SXREF)
s->type = SBSS;
if(ctxt->arch->thechar == '5')
flag = p->reg;
else
flag = p->from.scale;
if(flag & DUPOK)
s->dupok = 1;
if(flag & RODATA)
s->type = SRODATA;
else if(flag & NOPTR)
s->type = SNOPTRBSS;
edata = s;
continue;
}
if(p->as == ctxt->arch->ADATA) {
savedata(ctxt, p->from.sym, p, "<input>");
continue;
}
if(p->as == ctxt->arch->ATEXT) {
s = p->from.sym;
if(s == nil) {
// func _() { }
curtext = nil;
continue;
}
if(s->text != nil)
sysfatal("duplicate TEXT for %s", s->name);
if(s->onlist)
sysfatal("symbol %s listed multiple times", s->name);
s->onlist = 1;
if(text == nil)
text = s;
else
etext->next = s;
etext = s;
if(ctxt->arch->thechar == '5')
flag = p->reg;
else
flag = p->from.scale;
if(flag & DUPOK)
s->dupok = 1;
if(flag & NOSPLIT)
s->nosplit = 1;
s->next = nil;
s->type = STEXT;
s->text = p;
s->etext = p;
curtext = s;
continue;
}
if(curtext == nil)
continue;
s = curtext;
s->etext->link = p;
s->etext = p;
}
}
// Turn functions into machine code images.
for(s = text; s != nil; s = s->next) {
mkfwd(s);
linkpatch(ctxt, s);
ctxt->arch->follow(ctxt, s);
ctxt->arch->addstacksplit(ctxt, s);
ctxt->arch->assemble(ctxt, s);
linkpcln(ctxt, s);
}
// Emit header.
Bputc(b, 0);
Bputc(b, 0);
Bprint(b, "go13ld");
Bputc(b, 1); // version
// Emit autolib.
for(h = ctxt->hist; h != nil; h = h->link)
if(h->offset < 0)
wrstring(b, h->name);
wrstring(b, "");
// Emit symbols.
for(s = text; s != nil; s = s->next)
writesym(ctxt, b, s);
for(s = data; s != nil; s = s->next)
writesym(ctxt, b, s);
// Emit footer.
Bputc(b, 0xff);
Bputc(b, 0xff);
Bprint(b, "go13ld");
}
static void
writesym(Link *ctxt, Biobuf *b, LSym *s)
{
Reloc *r;
int i, j, c, n;
Pcln *pc;
Prog *p;
Auto *a;
char *name;
if(ctxt->debugasm) {
Bprint(ctxt->bso, "%s ", s->name);
if(s->version)
Bprint(ctxt->bso, "v=%d ", s->version);
if(s->type)
Bprint(ctxt->bso, "t=%d ", s->type);
if(s->dupok)
Bprint(ctxt->bso, "dupok ");
if(s->nosplit)
Bprint(ctxt->bso, "nosplit ");
Bprint(ctxt->bso, "size=%lld value=%lld", (vlong)s->size, (vlong)s->value);
if(s->type == STEXT) {
Bprint(ctxt->bso, " args=%#llux locals=%#llux", (uvlong)s->args, (uvlong)s->locals);
if(s->leaf)
Bprint(ctxt->bso, " leaf");
}
Bprint(ctxt->bso, "\n");
for(p=s->text; p != nil; p = p->link)
Bprint(ctxt->bso, "\t%#06ux %P\n", (int)p->pc, p);
for(i=0; i<s->np; ) {
Bprint(ctxt->bso, "\t%#06ux", i);
for(j=i; j<i+16 && j<s->np; j++)
Bprint(ctxt->bso, " %02ux", s->p[j]);
for(; j<i+16; j++)
Bprint(ctxt->bso, " ");
Bprint(ctxt->bso, " ");
for(j=i; j<i+16 && j<s->np; j++) {
c = s->p[j];
if(' ' <= c && c <= 0x7e)
Bprint(ctxt->bso, "%c", c);
else
Bprint(ctxt->bso, ".");
}
Bprint(ctxt->bso, "\n");
i += 16;
}
for(i=0; i<s->nr; i++) {
r = &s->r[i];
name = "";
if(r->sym != nil)
name = r->sym->name;
Bprint(ctxt->bso, "\trel %d+%d t=%d %s+%lld\n", (int)r->off, r->siz, r->type, name, (vlong)r->add);
}
}
Bputc(b, 0xfe);
wrint(b, s->type);
wrstring(b, s->name);
wrint(b, s->version);
wrint(b, s->dupok);
wrint(b, s->size);
wrsym(b, s->gotype);
wrdata(b, s->p, s->np);
wrint(b, s->nr);
for(i=0; i<s->nr; i++) {
r = &s->r[i];
wrint(b, r->off);
wrint(b, r->siz);
wrint(b, r->type);
wrint(b, r->add);
wrint(b, r->xadd);
wrsym(b, r->sym);
wrsym(b, r->xsym);
}
if(s->type == STEXT) {
wrint(b, s->args);
wrint(b, s->locals);
wrint(b, s->nosplit);
wrint(b, s->leaf);
n = 0;
for(a = s->autom; a != nil; a = a->link)
n++;
wrint(b, n);
for(a = s->autom; a != nil; a = a->link) {
wrsym(b, a->asym);
wrint(b, a->aoffset);
if(a->type == ctxt->arch->D_AUTO)
wrint(b, A_AUTO);
else if(a->type == ctxt->arch->D_PARAM)
wrint(b, A_PARAM);
else
sysfatal("%s: invalid local variable type %d", s->name, a->type);
wrsym(b, a->gotype);
}
pc = s->pcln;
wrdata(b, pc->pcsp.p, pc->pcsp.n);
wrdata(b, pc->pcfile.p, pc->pcfile.n);
wrdata(b, pc->pcline.p, pc->pcline.n);
wrint(b, pc->npcdata);
for(i=0; i<pc->npcdata; i++)
wrdata(b, pc->pcdata[i].p, pc->pcdata[i].n);
wrint(b, pc->nfuncdata);
for(i=0; i<pc->nfuncdata; i++)
wrsym(b, pc->funcdata[i]);
for(i=0; i<pc->nfuncdata; i++)
wrint(b, pc->funcdataoff[i]);
wrint(b, pc->nfile);
for(i=0; i<pc->nfile; i++)
wrpathsym(ctxt, b, pc->file[i]);
}
}
static void
wrint(Biobuf *b, int64 sval)
{
uint64 uv, v;
uchar buf[10], *p;
uv = ((uint64)sval<<1) ^ (uint64)(int64)(sval>>63);
p = buf;
for(v = uv; v >= 0x80; v >>= 7)
*p++ = v | 0x80;
*p++ = v;
Bwrite(b, buf, p - buf);
}
static void
wrstring(Biobuf *b, char *s)
{
wrdata(b, s, strlen(s));
}
// wrpath writes a path just like a string, but on windows, it
// translates '\\' to '/' in the process.
static void
wrpath(Link *ctxt, Biobuf *b, char *p)
{
int i, n;
if (!ctxt->windows || strchr(p, '\\') == nil) {
wrstring(b, p);
return;
} else {
n = strlen(p);
wrint(b, n);
for (i = 0; i < n; i++)
Bputc(b, p[i] == '\\' ? '/' : p[i]);
}
}
static void
wrdata(Biobuf *b, void *v, int n)
{
wrint(b, n);
Bwrite(b, v, n);
}
static void
wrpathsym(Link *ctxt, Biobuf *b, LSym *s)
{
if(s == nil) {
wrint(b, 0);
wrint(b, 0);
return;
}
wrpath(ctxt, b, s->name);
wrint(b, s->version);
}
static void
wrsym(Biobuf *b, LSym *s)
{
if(s == nil) {
wrint(b, 0);
wrint(b, 0);
return;
}
wrstring(b, s->name);
wrint(b, s->version);
}
static char startmagic[] = "\x00\x00go13ld";
static char endmagic[] = "\xff\xffgo13ld";
void
ldobjfile(Link *ctxt, Biobuf *f, char *pkg, int64 len, char *pn)
{
int c;
uchar buf[8];
int64 start;
char *lib;
start = Boffset(f);
ctxt->version++;
memset(buf, 0, sizeof buf);
Bread(f, buf, sizeof buf);
if(memcmp(buf, startmagic, sizeof buf) != 0)
sysfatal("%s: invalid file start %x %x %x %x %x %x %x %x", pn, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]);
if((c = Bgetc(f)) != 1)
sysfatal("%s: invalid file version number %d", pn, c);
for(;;) {
lib = rdstring(f);
if(lib[0] == 0)
break;
addlib(ctxt, pkg, pn, lib);
}
for(;;) {
c = Bgetc(f);
Bungetc(f);
if(c == 0xff)
break;
readsym(ctxt, f, pkg, pn);
}
memset(buf, 0, sizeof buf);
Bread(f, buf, sizeof buf);
if(memcmp(buf, endmagic, sizeof buf) != 0)
sysfatal("%s: invalid file end", pn);
if(Boffset(f) != start+len)
sysfatal("%s: unexpected end at %lld, want %lld", pn, (vlong)Boffset(f), (vlong)(start+len));
}
static void
readsym(Link *ctxt, Biobuf *f, char *pkg, char *pn)
{
int i, j, c, t, v, n, size, dupok;
static int ndup;
char *name;
Reloc *r;
LSym *s, *dup;
Pcln *pc;
Auto *a;
if(Bgetc(f) != 0xfe)
sysfatal("readsym out of sync");
t = rdint(f);
name = expandpkg(rdstring(f), pkg);
v = rdint(f);
if(v != 0 && v != 1)
sysfatal("invalid symbol version %d", v);
dupok = rdint(f);
size = rdint(f);
if(v != 0)
v = ctxt->version;
s = linklookup(ctxt, name, v);
dup = nil;
if(s->type != 0 && s->type != SXREF) {
if(s->type != SBSS && s->type != SNOPTRBSS && !dupok && !s->dupok)
sysfatal("duplicate symbol %s (types %d and %d) in %s and %s", s->name, s->type, t, s->file, pn);
if(s->np > 0) {
dup = s;
s = linknewsym(ctxt, ".dup", ndup++); // scratch
}
}
s->file = pkg;
s->dupok = dupok;
if(t == SXREF)
sysfatal("bad sxref");
if(t == 0)
sysfatal("missing type for %s in %s", name, pn);
s->type = t;
if(s->size < size)
s->size = size;
s->gotype = rdsym(ctxt, f, pkg);
rddata(f, &s->p, &s->np);
s->maxp = s->np;
n = rdint(f);
if(n > 0) {
s->r = emallocz(n * sizeof s->r[0]);
s->nr = n;
s->maxr = n;
for(i=0; i<n; i++) {
r = &s->r[i];
r->off = rdint(f);
r->siz = rdint(f);
r->type = rdint(f);
r->add = rdint(f);
r->xadd = rdint(f);
r->sym = rdsym(ctxt, f, pkg);
r->xsym = rdsym(ctxt, f, pkg);
}
}
if(s->np > 0 && dup != nil && dup->np > 0 && strncmp(s->name, "gclocals·", 10) == 0) {
// content-addressed garbage collection liveness bitmap symbol.
// double check for hash collisions.
if(s->np != dup->np || memcmp(s->p, dup->p, s->np) != 0)
sysfatal("dupok hash collision for %s in %s and %s", s->name, s->file, pn);
}
if(s->type == STEXT) {
s->args = rdint(f);
s->locals = rdint(f);
s->nosplit = rdint(f);
s->leaf = rdint(f);
n = rdint(f);
for(i=0; i<n; i++) {
a = emallocz(sizeof *a);
a->asym = rdsym(ctxt, f, pkg);
a->aoffset = rdint(f);
a->type = rdint(f);
a->gotype = rdsym(ctxt, f, pkg);
a->link = s->autom;
s->autom = a;
}
s->pcln = emallocz(sizeof *s->pcln);
pc = s->pcln;
rddata(f, &pc->pcsp.p, &pc->pcsp.n);
rddata(f, &pc->pcfile.p, &pc->pcfile.n);
rddata(f, &pc->pcline.p, &pc->pcline.n);
n = rdint(f);
pc->pcdata = emallocz(n * sizeof pc->pcdata[0]);
pc->npcdata = n;
for(i=0; i<n; i++)
rddata(f, &pc->pcdata[i].p, &pc->pcdata[i].n);
n = rdint(f);
pc->funcdata = emallocz(n * sizeof pc->funcdata[0]);
pc->funcdataoff = emallocz(n * sizeof pc->funcdataoff[0]);
pc->nfuncdata = n;
for(i=0; i<n; i++)
pc->funcdata[i] = rdsym(ctxt, f, pkg);
for(i=0; i<n; i++)
pc->funcdataoff[i] = rdint(f);
n = rdint(f);
pc->file = emallocz(n * sizeof pc->file[0]);
pc->nfile = n;
for(i=0; i<n; i++)
pc->file[i] = rdsym(ctxt, f, pkg);
if(dup == nil) {
if(s->onlist)
sysfatal("symbol %s listed multiple times", s->name);
s->onlist = 1;
if(ctxt->etextp)
ctxt->etextp->next = s;
else
ctxt->textp = s;
ctxt->etextp = s;
}
}
if(ctxt->debugasm) {
Bprint(ctxt->bso, "%s ", s->name);
if(s->version)
Bprint(ctxt->bso, "v=%d ", s->version);
if(s->type)
Bprint(ctxt->bso, "t=%d ", s->type);
if(s->dupok)
Bprint(ctxt->bso, "dupok ");
if(s->nosplit)
Bprint(ctxt->bso, "nosplit ");
Bprint(ctxt->bso, "size=%lld value=%lld", (vlong)s->size, (vlong)s->value);
if(s->type == STEXT)
Bprint(ctxt->bso, " args=%#llux locals=%#llux", (uvlong)s->args, (uvlong)s->locals);
Bprint(ctxt->bso, "\n");
for(i=0; i<s->np; ) {
Bprint(ctxt->bso, "\t%#06ux", i);
for(j=i; j<i+16 && j<s->np; j++)
Bprint(ctxt->bso, " %02ux", s->p[j]);
for(; j<i+16; j++)
Bprint(ctxt->bso, " ");
Bprint(ctxt->bso, " ");
for(j=i; j<i+16 && j<s->np; j++) {
c = s->p[j];
if(' ' <= c && c <= 0x7e)
Bprint(ctxt->bso, "%c", c);
else
Bprint(ctxt->bso, ".");
}
Bprint(ctxt->bso, "\n");
i += 16;
}
for(i=0; i<s->nr; i++) {
r = &s->r[i];
Bprint(ctxt->bso, "\trel %d+%d t=%d %s+%lld\n", (int)r->off, r->siz, r->type, r->sym->name, (vlong)r->add);
}
}
}
static int64
rdint(Biobuf *f)
{
int c;
uint64 uv;
int shift;
uv = 0;
for(shift = 0;; shift += 7) {
if(shift >= 64)
sysfatal("corrupt input");
c = Bgetc(f);
uv |= (uint64)(c & 0x7F) << shift;
if(!(c & 0x80))
break;
}
return (int64)(uv>>1) ^ ((int64)((uint64)uv<<63)>>63);
}
static char*
rdstring(Biobuf *f)
{
int n;
char *p;
n = rdint(f);
p = emallocz(n+1);
Bread(f, p, n);
return p;
}
static void
rddata(Biobuf *f, uchar **pp, int *np)
{
*np = rdint(f);
*pp = emallocz(*np);
Bread(f, *pp, *np);
}
static LSym*
rdsym(Link *ctxt, Biobuf *f, char *pkg)
{
int n, v;
char *p;
LSym *s;
n = rdint(f);
if(n == 0) {
rdint(f);
return nil;
}
p = emallocz(n+1);
Bread(f, p, n);
v = rdint(f);
if(v != 0)
v = ctxt->version;
s = linklookup(ctxt, expandpkg(p, pkg), v);
if(v == 0 && s->name[0] == '$' && s->type == 0) {
if(strncmp(s->name, "$f32.", 5) == 0) {
int32 i32;
i32 = strtoul(s->name+5, nil, 16);
s->type = SRODATA;
adduint32(ctxt, s, i32);
s->reachable = 0;
} else if(strncmp(s->name, "$f64.", 5) == 0) {
int64 i64;
i64 = strtoull(s->name+5, nil, 16);
s->type = SRODATA;
adduint64(ctxt, s, i64);
s->reachable = 0;
}
}
return s;
}