cmd/internal/obj/s390x: mark unsafe points
For async preemption, we will be using REGTMP as a temporary
register in injected call on S390X, which will clobber it. So any
code that uses REGTMP is not safe for async preemption.
In the assembler backend, we expand a Prog to multiple machine
instructions and use REGTMP as a temporary register if necessary.
These need to be marked unsafe. Unlike ARM64 and MIPS,
instructions on S390X are variable length so we don't use the
length as a condition. Instead, we set a bit on the Prog whenever
REGTMP is used.
Change-Id: Ie5d14068a950f4c7cea51dff2c4a8bdc19ec9348
Reviewed-on: https://go-review.googlesource.com/c/go/+/204105
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
diff --git a/src/cmd/internal/obj/s390x/a.out.go b/src/cmd/internal/obj/s390x/a.out.go
index 6768be0..3e83072 100644
--- a/src/cmd/internal/obj/s390x/a.out.go
+++ b/src/cmd/internal/obj/s390x/a.out.go
@@ -186,6 +186,7 @@
// mark flags
LEAF = 1 << iota
BRANCH
+ USETMP // generated code of this Prog uses REGTMP
)
const ( // comments from func aclass in asmz.go
diff --git a/src/cmd/internal/obj/s390x/asmz.go b/src/cmd/internal/obj/s390x/asmz.go
index 0402e8c..1bb79a2 100644
--- a/src/cmd/internal/obj/s390x/asmz.go
+++ b/src/cmd/internal/obj/s390x/asmz.go
@@ -490,6 +490,25 @@
}
c.cursym.Grow(c.cursym.Size)
copy(c.cursym.P, buffer)
+
+ // Mark nonpreemptible instruction sequences.
+ // We use REGTMP as a scratch register during call injection,
+ // so instruction sequences that use REGTMP are unsafe to
+ // preempt asynchronously.
+ obj.MarkUnsafePoints(c.ctxt, c.cursym.Func.Text, c.newprog, c.isUnsafePoint)
+}
+
+// Return whether p is an unsafe point.
+func (c *ctxtz) isUnsafePoint(p *obj.Prog) bool {
+ if p.From.Reg == REGTMP || p.To.Reg == REGTMP || p.Reg == REGTMP {
+ return true
+ }
+ for _, a := range p.RestArgs {
+ if a.Reg == REGTMP {
+ return true
+ }
+ }
+ return p.Mark&USETMP != 0
}
func isint32(v int64) bool {
@@ -2679,6 +2698,11 @@
return Always
}
+func regtmp(p *obj.Prog) uint32 {
+ p.Mark |= USETMP
+ return REGTMP
+}
+
func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) {
o := c.oplook(p)
@@ -2686,6 +2710,9 @@
return
}
+ // If REGTMP is used in generated code, we need to set USETMP on p.Mark.
+ // So we use regtmp(p) for REGTMP.
+
switch o.i {
default:
c.ctxt.Diag("unknown index %d", o.i)
@@ -2778,19 +2805,19 @@
case ADIVW, ADIVWU, ADIVD, ADIVDU:
if p.As == ADIVWU || p.As == ADIVDU {
- zRI(op_LGHI, REGTMP, 0, asm)
+ zRI(op_LGHI, regtmp(p), 0, asm)
}
zRRE(op_LGR, REGTMP2, uint32(r), asm)
- zRRE(opcode, REGTMP, uint32(p.From.Reg), asm)
+ zRRE(opcode, regtmp(p), uint32(p.From.Reg), asm)
zRRE(op_LGR, uint32(p.To.Reg), REGTMP2, asm)
case AMODW, AMODWU, AMODD, AMODDU:
if p.As == AMODWU || p.As == AMODDU {
- zRI(op_LGHI, REGTMP, 0, asm)
+ zRI(op_LGHI, regtmp(p), 0, asm)
}
zRRE(op_LGR, REGTMP2, uint32(r), asm)
- zRRE(opcode, REGTMP, uint32(p.From.Reg), asm)
- zRRE(op_LGR, uint32(p.To.Reg), REGTMP, asm)
+ zRRE(opcode, regtmp(p), uint32(p.From.Reg), asm)
+ zRRE(op_LGR, uint32(p.To.Reg), regtmp(p), asm)
}
@@ -2835,20 +2862,20 @@
r = p.To.Reg
}
zRRE(op_LGR, REGTMP2, uint32(r), asm)
- zRRE(op_MLGR, REGTMP, uint32(p.From.Reg), asm)
+ zRRE(op_MLGR, regtmp(p), uint32(p.From.Reg), asm)
switch p.As {
case AMULHDU:
// Unsigned: move result into correct register.
- zRRE(op_LGR, uint32(p.To.Reg), REGTMP, asm)
+ zRRE(op_LGR, uint32(p.To.Reg), regtmp(p), asm)
case AMULHD:
// Signed: need to convert result.
// See Hacker's Delight 8-3.
zRSY(op_SRAG, REGTMP2, uint32(p.From.Reg), 0, 63, asm)
zRRE(op_NGR, REGTMP2, uint32(r), asm)
- zRRE(op_SGR, REGTMP, REGTMP2, asm)
+ zRRE(op_SGR, regtmp(p), REGTMP2, asm)
zRSY(op_SRAG, REGTMP2, uint32(r), 0, 63, asm)
zRRE(op_NGR, REGTMP2, uint32(p.From.Reg), asm)
- zRRF(op_SGRK, REGTMP2, 0, uint32(p.To.Reg), REGTMP, asm)
+ zRRF(op_SGRK, REGTMP2, 0, uint32(p.To.Reg), regtmp(p), asm)
}
case 5: // syscall
@@ -2950,9 +2977,9 @@
if r == int(p.To.Reg) {
zRRE(op_SLBGR, uint32(p.To.Reg), uint32(p.From.Reg), asm)
} else if p.From.Reg == p.To.Reg {
- zRRE(op_LGR, REGTMP, uint32(p.From.Reg), asm)
+ zRRE(op_LGR, regtmp(p), uint32(p.From.Reg), asm)
zRRE(op_LGR, uint32(p.To.Reg), uint32(r), asm)
- zRRE(op_SLBGR, uint32(p.To.Reg), REGTMP, asm)
+ zRRE(op_SLBGR, uint32(p.To.Reg), regtmp(p), asm)
} else {
zRRE(op_LGR, uint32(p.To.Reg), uint32(r), asm)
zRRE(op_SLBGR, uint32(p.To.Reg), uint32(p.From.Reg), asm)
@@ -2994,11 +3021,11 @@
}
x2 := p.From.Index
if -DISP20/2 > d2 || d2 >= DISP20/2 {
- zRIL(_a, op_LGFI, REGTMP, uint32(d2), asm)
+ zRIL(_a, op_LGFI, regtmp(p), uint32(d2), asm)
if x2 != 0 {
- zRX(op_LA, REGTMP, REGTMP, uint32(x2), 0, asm)
+ zRX(op_LA, regtmp(p), regtmp(p), uint32(x2), 0, asm)
}
- x2 = REGTMP
+ x2 = int16(regtmp(p))
d2 = 0
}
var opx, opxy uint32
@@ -3128,8 +3155,8 @@
}
switch p.As {
case ASUB:
- zRIL(_a, op_LGFI, uint32(REGTMP), uint32(v), asm)
- zRRF(op_SLGRK, uint32(REGTMP), 0, uint32(p.To.Reg), uint32(r), asm)
+ zRIL(_a, op_LGFI, uint32(regtmp(p)), uint32(v), asm)
+ zRRF(op_SLGRK, uint32(regtmp(p)), 0, uint32(p.To.Reg), uint32(r), asm)
case ASUBC:
if r != p.To.Reg {
zRRE(op_LGR, uint32(p.To.Reg), uint32(r), asm)
@@ -3193,8 +3220,8 @@
c.ctxt.Diag("%v is not supported", p)
case AAND:
if v >= 0 { // needs zero extend
- zRIL(_a, op_LGFI, REGTMP, uint32(v), asm)
- zRRE(op_NGR, uint32(p.To.Reg), REGTMP, asm)
+ zRIL(_a, op_LGFI, regtmp(p), uint32(v), asm)
+ zRRE(op_NGR, uint32(p.To.Reg), regtmp(p), asm)
} else if int64(int16(v)) == v {
zRI(op_NILL, uint32(p.To.Reg), uint32(v), asm)
} else { // r.To.Reg & 0xffffffff00000000 & uint32(v)
@@ -3202,8 +3229,8 @@
}
case AOR:
if int64(uint32(v)) != v { // needs sign extend
- zRIL(_a, op_LGFI, REGTMP, uint32(v), asm)
- zRRE(op_OGR, uint32(p.To.Reg), REGTMP, asm)
+ zRIL(_a, op_LGFI, regtmp(p), uint32(v), asm)
+ zRRE(op_OGR, uint32(p.To.Reg), regtmp(p), asm)
} else if int64(uint16(v)) == v {
zRI(op_OILL, uint32(p.To.Reg), uint32(v), asm)
} else {
@@ -3211,8 +3238,8 @@
}
case AXOR:
if int64(uint32(v)) != v { // needs sign extend
- zRIL(_a, op_LGFI, REGTMP, uint32(v), asm)
- zRRE(op_XGR, uint32(p.To.Reg), REGTMP, asm)
+ zRIL(_a, op_LGFI, regtmp(p), uint32(v), asm)
+ zRRE(op_XGR, uint32(p.To.Reg), regtmp(p), asm)
} else {
zRIL(_a, op_XILF, uint32(p.To.Reg), uint32(v), asm)
}
@@ -3264,8 +3291,8 @@
} else if v >= -DISP20/2 && v < DISP20/2 {
zRXY(op_LAY, uint32(p.To.Reg), uint32(r), uint32(i), uint32(v), asm)
} else {
- zRIL(_a, op_LGFI, REGTMP, uint32(v), asm)
- zRX(op_LA, uint32(p.To.Reg), uint32(r), REGTMP, uint32(i), asm)
+ zRIL(_a, op_LGFI, regtmp(p), uint32(v), asm)
+ zRX(op_LA, uint32(p.To.Reg), uint32(r), regtmp(p), uint32(i), asm)
}
case 31: // dword
@@ -3359,11 +3386,11 @@
}
x2 := p.To.Index
if d2 < -DISP20/2 || d2 >= DISP20/2 {
- zRIL(_a, op_LGFI, REGTMP, uint32(d2), asm)
+ zRIL(_a, op_LGFI, regtmp(p), uint32(d2), asm)
if x2 != 0 {
- zRX(op_LA, REGTMP, REGTMP, uint32(x2), 0, asm)
+ zRX(op_LA, regtmp(p), regtmp(p), uint32(x2), 0, asm)
}
- x2 = REGTMP
+ x2 = int16(regtmp(p))
d2 = 0
}
// Emits an RX instruction if an appropriate one exists and the displacement fits in 12 bits. Otherwise use an RXY instruction.
@@ -3381,11 +3408,11 @@
}
x2 := p.From.Index
if d2 < -DISP20/2 || d2 >= DISP20/2 {
- zRIL(_a, op_LGFI, REGTMP, uint32(d2), asm)
+ zRIL(_a, op_LGFI, regtmp(p), uint32(d2), asm)
if x2 != 0 {
- zRX(op_LA, REGTMP, REGTMP, uint32(x2), 0, asm)
+ zRX(op_LA, regtmp(p), regtmp(p), uint32(x2), 0, asm)
}
- x2 = REGTMP
+ x2 = int16(regtmp(p))
d2 = 0
}
// Emits an RX instruction if an appropriate one exists and the displacement fits in 12 bits. Otherwise use an RXY instruction.
@@ -3539,21 +3566,21 @@
opcode = op_MVI
}
if d < 0 || d >= DISP12 {
- if r == REGTMP {
+ if r == int16(regtmp(p)) {
c.ctxt.Diag("displacement must be in range [0, 4096) to use %v", r)
}
if d >= -DISP20/2 && d < DISP20/2 {
if opcode == op_MVI {
opcode = op_MVIY
} else {
- zRXY(op_LAY, uint32(REGTMP), 0, uint32(r), uint32(d), asm)
- r = REGTMP
+ zRXY(op_LAY, uint32(regtmp(p)), 0, uint32(r), uint32(d), asm)
+ r = int16(regtmp(p))
d = 0
}
} else {
- zRIL(_a, op_LGFI, REGTMP, uint32(d), asm)
- zRX(op_LA, REGTMP, REGTMP, uint32(r), 0, asm)
- r = REGTMP
+ zRIL(_a, op_LGFI, regtmp(p), uint32(d), asm)
+ zRX(op_LA, regtmp(p), regtmp(p), uint32(r), 0, asm)
+ r = int16(regtmp(p))
d = 0
}
}
@@ -3576,19 +3603,19 @@
case AMOVH, AMOVHZ: // The zero extension doesn't affect store instructions
zRIL(_b, op_STHRL, uint32(p.From.Reg), 0, asm)
case AMOVB, AMOVBZ: // The zero extension doesn't affect store instructions
- zRIL(_b, op_LARL, REGTMP, 0, asm)
+ zRIL(_b, op_LARL, regtmp(p), 0, asm)
adj := uint32(0) // adjustment needed for odd addresses
if i2&1 != 0 {
i2 -= 1
adj = 1
}
- zRX(op_STC, uint32(p.From.Reg), 0, REGTMP, adj, asm)
+ zRX(op_STC, uint32(p.From.Reg), 0, regtmp(p), adj, asm)
case AFMOVD:
- zRIL(_b, op_LARL, REGTMP, 0, asm)
- zRX(op_STD, uint32(p.From.Reg), 0, REGTMP, 0, asm)
+ zRIL(_b, op_LARL, regtmp(p), 0, asm)
+ zRX(op_STD, uint32(p.From.Reg), 0, regtmp(p), 0, asm)
case AFMOVS:
- zRIL(_b, op_LARL, REGTMP, 0, asm)
- zRX(op_STE, uint32(p.From.Reg), 0, REGTMP, 0, asm)
+ zRIL(_b, op_LARL, regtmp(p), 0, asm)
+ zRX(op_STE, uint32(p.From.Reg), 0, regtmp(p), 0, asm)
}
c.addrilreloc(p.To.Sym, int64(i2))
@@ -3597,8 +3624,8 @@
switch p.As {
case AMOVD:
if i2&1 != 0 {
- zRIL(_b, op_LARL, REGTMP, 0, asm)
- zRXY(op_LG, uint32(p.To.Reg), REGTMP, 0, 1, asm)
+ zRIL(_b, op_LARL, regtmp(p), 0, asm)
+ zRXY(op_LG, uint32(p.To.Reg), regtmp(p), 0, 1, asm)
i2 -= 1
} else {
zRIL(_b, op_LGRL, uint32(p.To.Reg), 0, asm)
@@ -3612,7 +3639,7 @@
case AMOVHZ:
zRIL(_b, op_LLGHRL, uint32(p.To.Reg), 0, asm)
case AMOVB, AMOVBZ:
- zRIL(_b, op_LARL, REGTMP, 0, asm)
+ zRIL(_b, op_LARL, regtmp(p), 0, asm)
adj := uint32(0) // adjustment needed for odd addresses
if i2&1 != 0 {
i2 -= 1
@@ -3620,16 +3647,16 @@
}
switch p.As {
case AMOVB:
- zRXY(op_LGB, uint32(p.To.Reg), 0, REGTMP, adj, asm)
+ zRXY(op_LGB, uint32(p.To.Reg), 0, regtmp(p), adj, asm)
case AMOVBZ:
- zRXY(op_LLGC, uint32(p.To.Reg), 0, REGTMP, adj, asm)
+ zRXY(op_LLGC, uint32(p.To.Reg), 0, regtmp(p), adj, asm)
}
case AFMOVD:
- zRIL(_a, op_LARL, REGTMP, 0, asm)
- zRX(op_LD, uint32(p.To.Reg), 0, REGTMP, 0, asm)
+ zRIL(_a, op_LARL, regtmp(p), 0, asm)
+ zRX(op_LD, uint32(p.To.Reg), 0, regtmp(p), 0, asm)
case AFMOVS:
- zRIL(_a, op_LARL, REGTMP, 0, asm)
- zRX(op_LE, uint32(p.To.Reg), 0, REGTMP, 0, asm)
+ zRIL(_a, op_LARL, regtmp(p), 0, asm)
+ zRX(op_LE, uint32(p.To.Reg), 0, regtmp(p), 0, asm)
}
c.addrilreloc(p.From.Sym, int64(i2))
@@ -3744,19 +3771,19 @@
d1 := c.regoff(&p.To)
d2 := c.regoff(p.GetFrom3())
if d1 < 0 || d1 >= DISP12 {
- if b2 == REGTMP {
- c.ctxt.Diag("REGTMP conflict")
+ if b2 == int16(regtmp(p)) {
+ c.ctxt.Diag("regtmp(p) conflict")
}
- if b1 != REGTMP {
- zRRE(op_LGR, REGTMP, uint32(b1), asm)
+ if b1 != int16(regtmp(p)) {
+ zRRE(op_LGR, regtmp(p), uint32(b1), asm)
}
- zRIL(_a, op_AGFI, REGTMP, uint32(d1), asm)
+ zRIL(_a, op_AGFI, regtmp(p), uint32(d1), asm)
if d1 == d2 && b1 == b2 {
d2 = 0
- b2 = REGTMP
+ b2 = int16(regtmp(p))
}
d1 = 0
- b1 = REGTMP
+ b1 = int16(regtmp(p))
}
if d2 < 0 || d2 >= DISP12 {
if b1 == REGTMP2 {
@@ -3962,8 +3989,8 @@
rel.Add = 2 + int64(rel.Siz)
case 94: // TLS local exec model
- zRIL(_b, op_LARL, REGTMP, (sizeRIL+sizeRXY+sizeRI)>>1, asm)
- zRXY(op_LG, uint32(p.To.Reg), REGTMP, 0, 0, asm)
+ zRIL(_b, op_LARL, regtmp(p), (sizeRIL+sizeRXY+sizeRI)>>1, asm)
+ zRXY(op_LG, uint32(p.To.Reg), regtmp(p), 0, 0, asm)
zRI(op_BRC, 0xF, (sizeRI+8)>>1, asm)
*asm = append(*asm, 0, 0, 0, 0, 0, 0, 0, 0)
rel := obj.Addrel(c.cursym)
@@ -3985,7 +4012,7 @@
// --------------------------------------------------------------
// R_390_TLS_IEENT
- zRIL(_b, op_LARL, REGTMP, 0, asm)
+ zRIL(_b, op_LARL, regtmp(p), 0, asm)
ieent := obj.Addrel(c.cursym)
ieent.Off = int32(c.pc + 2)
ieent.Siz = 4
@@ -3994,7 +4021,7 @@
ieent.Add = 2 + int64(ieent.Siz)
// R_390_TLS_LOAD
- zRXY(op_LGF, uint32(p.To.Reg), REGTMP, 0, 0, asm)
+ zRXY(op_LGF, uint32(p.To.Reg), regtmp(p), 0, 0, asm)
// TODO(mundaym): add R_390_TLS_LOAD relocation here
// not strictly required but might allow the linker to optimize
@@ -4011,14 +4038,14 @@
for length > 0 {
if offset < 0 || offset >= DISP12 {
if offset >= -DISP20/2 && offset < DISP20/2 {
- zRXY(op_LAY, REGTMP, uint32(reg), 0, uint32(offset), asm)
+ zRXY(op_LAY, regtmp(p), uint32(reg), 0, uint32(offset), asm)
} else {
- if reg != REGTMP {
- zRRE(op_LGR, REGTMP, uint32(reg), asm)
+ if reg != int16(regtmp(p)) {
+ zRRE(op_LGR, regtmp(p), uint32(reg), asm)
}
- zRIL(_a, op_AGFI, REGTMP, uint32(offset), asm)
+ zRIL(_a, op_AGFI, regtmp(p), uint32(offset), asm)
}
- reg = REGTMP
+ reg = int16(regtmp(p))
offset = 0
}
size := length
@@ -4052,11 +4079,11 @@
reg = REGSP
}
if offset < -DISP20/2 || offset >= DISP20/2 {
- if reg != REGTMP {
- zRRE(op_LGR, REGTMP, uint32(reg), asm)
+ if reg != int16(regtmp(p)) {
+ zRRE(op_LGR, regtmp(p), uint32(reg), asm)
}
- zRIL(_a, op_AGFI, REGTMP, uint32(offset), asm)
- reg = REGTMP
+ zRIL(_a, op_AGFI, regtmp(p), uint32(offset), asm)
+ reg = int16(regtmp(p))
offset = 0
}
switch p.As {
@@ -4079,11 +4106,11 @@
reg = REGSP
}
if offset < -DISP20/2 || offset >= DISP20/2 {
- if reg != REGTMP {
- zRRE(op_LGR, REGTMP, uint32(reg), asm)
+ if reg != int16(regtmp(p)) {
+ zRRE(op_LGR, regtmp(p), uint32(reg), asm)
}
- zRIL(_a, op_AGFI, REGTMP, uint32(offset), asm)
- reg = REGTMP
+ zRIL(_a, op_AGFI, regtmp(p), uint32(offset), asm)
+ reg = int16(regtmp(p))
offset = 0
}
switch p.As {
diff --git a/src/cmd/internal/obj/s390x/objz.go b/src/cmd/internal/obj/s390x/objz.go
index 0e0d7a2..9e4f2d4 100644
--- a/src/cmd/internal/obj/s390x/objz.go
+++ b/src/cmd/internal/obj/s390x/objz.go
@@ -344,7 +344,11 @@
// Store link register before decrementing SP, so if a signal comes
// during the execution of the function prologue, the traceback
// code will not see a half-updated stack frame.
- q = obj.Appendp(p, c.newprog)
+ // This sequence is not async preemptible, as if we open a frame
+ // at the current SP, it will clobber the saved LR.
+ q = c.ctxt.StartUnsafePoint(p, c.newprog)
+
+ q = obj.Appendp(q, c.newprog)
q.As = AMOVD
q.From.Type = obj.TYPE_REG
q.From.Reg = REG_LR
@@ -360,6 +364,8 @@
q.To.Type = obj.TYPE_REG
q.To.Reg = REGSP
q.Spadj = autosize
+
+ q = c.ctxt.EndUnsafePoint(q, c.newprog, -1)
} else if c.cursym.Func.Text.Mark&LEAF == 0 {
// A very few functions that do not return to their caller
// (e.g. gogo) are not identified as leaves but still have