cmd/internal/obj/x86: fix duffcopy/duffzero for GOEXPERIMENT=framepointer
Change-Id: I99aee6dff97a4abcaf5a9cddb505ba90b65667ea
Reviewed-on: https://go-review.googlesource.com/7728
Reviewed-by: Rob Pike <r@golang.org>
diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go
index 4842bd6..feca295 100644
--- a/src/cmd/internal/obj/x86/asm6.go
+++ b/src/cmd/internal/obj/x86/asm6.go
@@ -164,6 +164,7 @@
Zbr
Zcall
Zcallcon
+ Zcallduff
Zcallind
Zcallindreg
Zib_
@@ -528,7 +529,7 @@
}
var yduff = []ytab{
- {Ynone, Ynone, Yi32, Zcall, 1},
+ {Ynone, Ynone, Yi32, Zcallduff, 1},
}
var yjmp = []ytab{
@@ -2913,6 +2914,16 @@
return z
}
+var bpduff1 = []byte{
+ 0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
+ 0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
+}
+
+var bpduff2 = []byte{
+ 0x90,
+ 0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
+}
+
func doasm(ctxt *obj.Link, p *obj.Prog) {
ctxt.Curp = p // TODO
@@ -3436,12 +3447,23 @@
r.Sym = p.To.Sym
put4(ctxt, 0)
- case Zcall:
+ case Zcall, Zcallduff:
if p.To.Sym == nil {
ctxt.Diag("call without target")
log.Fatalf("bad code")
}
+ if obj.Framepointer_enabled != 0 && yt.zcase == Zcallduff && p.Mode == 64 {
+ // Maintain BP around call, since duffcopy/duffzero can't do it
+ // (the call jumps into the middle of the function).
+ // This makes it possible to see call sites for duffcopy/duffzero in
+ // BP-based profiling tools like Linux perf (which is the
+ // whole point of obj.Framepointer_enabled).
+ // MOVQ BP, -16(SP)
+ // LEAQ -16(SP), BP
+ copy(ctxt.Andptr, bpduff1)
+ ctxt.Andptr = ctxt.Andptr[len(bpduff1):]
+ }
ctxt.Andptr[0] = byte(op)
ctxt.Andptr = ctxt.Andptr[1:]
r = obj.Addrel(ctxt.Cursym)
@@ -3452,7 +3474,14 @@
r.Siz = 4
put4(ctxt, 0)
- // TODO: jump across functions needs reloc
+ if obj.Framepointer_enabled != 0 && yt.zcase == Zcallduff && p.Mode == 64 {
+ // Pop BP pushed above.
+ // MOVQ 0(BP), BP
+ copy(ctxt.Andptr, bpduff2)
+ ctxt.Andptr = ctxt.Andptr[len(bpduff2):]
+ }
+
+ // TODO: jump across functions needs reloc
case Zbr,
Zjmp,
Zloop:
@@ -4339,7 +4368,14 @@
if ctxt.Rexflag != 0 {
r.Off++
}
- if r.Type == obj.R_PCREL || r.Type == obj.R_CALL {
+ if r.Type == obj.R_PCREL {
+ // PC-relative addressing is relative to the end of the instruction,
+ // but the relocations applied by the linker are relative to the end
+ // of the relocation. Because immediate instruction
+ // arguments can follow the PC-relative memory reference in the
+ // instruction encoding, the two may not coincide. In this case,
+ // adjust addend so that linker can keep relocating relative to the
+ // end of the relocation.
r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
}
}