cmd/internal/obj/riscv: implement prologue and epilogue

Based on riscv-go port.

Updates #27532

Change-Id: If552225552bf8d27c29b08de31146dd34986a3a1
Reviewed-on: https://go-review.googlesource.com/c/go/+/204630
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go
index f2ff4ff..53cd860 100644
--- a/src/cmd/internal/obj/riscv/obj.go
+++ b/src/cmd/internal/obj/riscv/obj.go
@@ -504,6 +504,24 @@
 	}
 }
 
+// containsCall reports whether the symbol contains a CALL (or equivalent)
+// instruction. Must be called after progedit.
+func containsCall(sym *obj.LSym) bool {
+	// CALLs are CALL or JAL(R) with link register LR.
+	for p := sym.Func.Text; p != nil; p = p.Link {
+		switch p.As {
+		case obj.ACALL:
+			return true
+		case AJAL, AJALR:
+			if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_LR {
+				return true
+			}
+		}
+	}
+
+	return false
+}
+
 // setPCs sets the Pc field in all instructions reachable from p.
 // It uses pc as the initial value.
 func setPCs(p *obj.Prog, pc int64) {
@@ -551,11 +569,20 @@
 	}
 }
 
+// preprocess generates prologue and epilogue code, computes PC-relative branch
+// and jump offsets, and resolves pseudo-registers.
+//
+// preprocess is called once per linker symbol.
+//
+// When preprocess finishes, all instructions in the symbol are either
+// concrete, real RISC-V instructions or directive pseudo-ops like TEXT,
+// PCDATA, and FUNCDATA.
 func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
 	if cursym.Func.Text == nil || cursym.Func.Text.Link == nil {
 		return
 	}
 
+	// Generate the prologue.
 	text := cursym.Func.Text
 	if text.As != obj.ATEXT {
 		ctxt.Diag("preprocess: found symbol that does not start with TEXT directive")
@@ -577,10 +604,126 @@
 		}
 	}
 
+	if !containsCall(cursym) {
+		text.From.Sym.Set(obj.AttrLeaf, true)
+		if stacksize == 0 {
+			// A leaf function with no locals has no frame.
+			text.From.Sym.Set(obj.AttrNoFrame, true)
+		}
+	}
+
+	// Save LR unless there is no frame.
+	if !text.From.Sym.NoFrame() {
+		stacksize += ctxt.FixedFrameSize()
+	}
+
 	cursym.Func.Args = text.To.Val.(int32)
 	cursym.Func.Locals = int32(stacksize)
 
-	// TODO(jsing): Implement.
+	prologue := text
+
+	if !cursym.Func.Text.From.Sym.NoSplit() {
+		prologue = stacksplit(ctxt, prologue, cursym, newprog, stacksize) // emit split check
+	}
+
+	if stacksize != 0 {
+		prologue = ctxt.StartUnsafePoint(prologue, newprog)
+
+		// Actually save LR.
+		prologue = obj.Appendp(prologue, newprog)
+		prologue.As = AMOV
+		prologue.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
+		prologue.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: -stacksize}
+
+		// Insert stack adjustment.
+		prologue = obj.Appendp(prologue, newprog)
+		prologue.As = AADDI
+		prologue.From = obj.Addr{Type: obj.TYPE_CONST, Offset: -stacksize}
+		prologue.Reg = REG_SP
+		prologue.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP}
+		prologue.Spadj = int32(stacksize)
+
+		prologue = ctxt.EndUnsafePoint(prologue, newprog, -1)
+	}
+
+	if cursym.Func.Text.From.Sym.Wrapper() {
+		// if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame
+		//
+		//   MOV g_panic(g), X11
+		//   BNE X11, ZERO, adjust
+		// end:
+		//   NOP
+		// ...rest of function..
+		// adjust:
+		//   MOV panic_argp(X11), X12
+		//   ADD $(autosize+FIXED_FRAME), SP, X13
+		//   BNE X12, X13, end
+		//   ADD $FIXED_FRAME, SP, X12
+		//   MOV X12, panic_argp(X11)
+		//   JMP end
+		//
+		// The NOP is needed to give the jumps somewhere to land.
+
+		ldpanic := obj.Appendp(prologue, newprog)
+
+		ldpanic.As = AMOV
+		ldpanic.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REGG, Offset: 4 * int64(ctxt.Arch.PtrSize)} // G.panic
+		ldpanic.Reg = 0
+		ldpanic.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X11}
+
+		bneadj := obj.Appendp(ldpanic, newprog)
+		bneadj.As = ABNE
+		bneadj.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X11}
+		bneadj.Reg = REG_ZERO
+		bneadj.To.Type = obj.TYPE_BRANCH
+
+		endadj := obj.Appendp(bneadj, newprog)
+		endadj.As = obj.ANOP
+
+		last := endadj
+		for last.Link != nil {
+			last = last.Link
+		}
+
+		getargp := obj.Appendp(last, newprog)
+		getargp.As = AMOV
+		getargp.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_X11, Offset: 0} // Panic.argp
+		getargp.Reg = 0
+		getargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X12}
+
+		bneadj.Pcond = getargp
+
+		calcargp := obj.Appendp(getargp, newprog)
+		calcargp.As = AADDI
+		calcargp.From = obj.Addr{Type: obj.TYPE_CONST, Offset: stacksize + ctxt.FixedFrameSize()}
+		calcargp.Reg = REG_SP
+		calcargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X13}
+
+		testargp := obj.Appendp(calcargp, newprog)
+		testargp.As = ABNE
+		testargp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X12}
+		testargp.Reg = REG_X13
+		testargp.To.Type = obj.TYPE_BRANCH
+		testargp.Pcond = endadj
+
+		adjargp := obj.Appendp(testargp, newprog)
+		adjargp.As = AADDI
+		adjargp.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(ctxt.Arch.PtrSize)}
+		adjargp.Reg = REG_SP
+		adjargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X12}
+
+		setargp := obj.Appendp(adjargp, newprog)
+		setargp.As = AMOV
+		setargp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X12}
+		setargp.Reg = 0
+		setargp.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_X11, Offset: 0} // Panic.argp
+
+		godone := obj.Appendp(setargp, newprog)
+		godone.As = AJAL
+		godone.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
+		godone.To.Type = obj.TYPE_BRANCH
+		godone.Pcond = endadj
+	}
 
 	// Update stack-based offsets.
 	for p := cursym.Func.Text; p != nil; p = p.Link {
@@ -588,8 +731,7 @@
 		stackOffset(&p.To, stacksize)
 	}
 
-	// Additional instruction rewriting. Any rewrites that change the number
-	// of instructions must occur here (before jump target resolution).
+	// Additional instruction rewriting.
 	for p := cursym.Func.Text; p != nil; p = p.Link {
 		switch p.As {
 		case obj.AGETCALLERPC:
@@ -621,6 +763,46 @@
 				}
 			}
 
+		case obj.ARET:
+			// Replace RET with epilogue.
+			retJMP := p.To.Sym
+
+			if stacksize != 0 {
+				// Restore LR.
+				p.As = AMOV
+				p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 0}
+				p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
+				p = obj.Appendp(p, newprog)
+
+				p.As = AADDI
+				p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: stacksize}
+				p.Reg = REG_SP
+				p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP}
+				p.Spadj = int32(-stacksize)
+				p = obj.Appendp(p, newprog)
+			}
+
+			if retJMP != nil {
+				p.As = obj.ARET
+				p.To.Sym = retJMP
+				p = jalrToSym(ctxt, p, newprog, REG_ZERO)
+			} else {
+				p.As = AJALR
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = 0
+				p.Reg = REG_LR
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = REG_ZERO
+			}
+
+			// "Add back" the stack removed in the previous instruction.
+			//
+			// This is to avoid confusing pctospadj, which sums
+			// Spadj from function entry to each PC, and shouldn't
+			// count adjustments from earlier epilogues, since they
+			// won't affect later PCs.
+			p.Spadj = int32(stacksize)
+
 		// Replace FNE[SD] with FEQ[SD] and NOT.
 		case AFNES:
 			if p.To.Type != obj.TYPE_REG {
@@ -864,6 +1046,152 @@
 	}
 }
 
+func stacksplit(ctxt *obj.Link, p *obj.Prog, cursym *obj.LSym, newprog obj.ProgAlloc, framesize int64) *obj.Prog {
+	// Leaf function with no frame is effectively NOSPLIT.
+	if framesize == 0 {
+		return p
+	}
+
+	// MOV	g_stackguard(g), X10
+	p = obj.Appendp(p, newprog)
+	p.As = AMOV
+	p.From.Type = obj.TYPE_MEM
+	p.From.Reg = REGG
+	p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
+	if cursym.CFunc() {
+		p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
+	}
+	p.To.Type = obj.TYPE_REG
+	p.To.Reg = REG_X10
+
+	var to_done, to_more *obj.Prog
+
+	if framesize <= objabi.StackSmall {
+		// small stack: SP < stackguard
+		//	BLTU	SP, stackguard, done
+		p = obj.Appendp(p, newprog)
+		p.As = ABLTU
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = REG_X10
+		p.Reg = REG_SP
+		p.To.Type = obj.TYPE_BRANCH
+		to_done = p
+	} else if framesize <= objabi.StackBig {
+		// large stack: SP-framesize < stackguard-StackSmall
+		//	ADD	$-(framesize-StackSmall), SP, X11
+		//	BLTU	X11, stackguard, done
+		p = obj.Appendp(p, newprog)
+		// TODO(sorear): logic inconsistent with comment, but both match all non-x86 arches
+		p.As = AADDI
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = -(int64(framesize) - objabi.StackSmall)
+		p.Reg = REG_SP
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = REG_X11
+
+		p = obj.Appendp(p, newprog)
+		p.As = ABLTU
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = REG_X10
+		p.Reg = REG_X11
+		p.To.Type = obj.TYPE_BRANCH
+		to_done = p
+	} else {
+		// Such a large stack we need to protect against wraparound.
+		// If SP is close to zero:
+		//	SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall)
+		// The +StackGuard on both sides is required to keep the left side positive:
+		// SP is allowed to be slightly below stackguard. See stack.h.
+		//
+		// Preemption sets stackguard to StackPreempt, a very large value.
+		// That breaks the math above, so we have to check for that explicitly.
+		//	// stackguard is X10
+		//	MOV	$StackPreempt, X11
+		//	BEQ	X10, X11, more
+		//	ADD	$StackGuard, SP, X11
+		//	SUB	X10, X11
+		//	MOV	$(framesize+(StackGuard-StackSmall)), X10
+		//	BGTU	X11, X10, done
+		p = obj.Appendp(p, newprog)
+		p.As = AMOV
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = objabi.StackPreempt
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = REG_X11
+
+		p = obj.Appendp(p, newprog)
+		to_more = p
+		p.As = ABEQ
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = REG_X10
+		p.Reg = REG_X11
+		p.To.Type = obj.TYPE_BRANCH
+
+		p = obj.Appendp(p, newprog)
+		p.As = AADDI
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = int64(objabi.StackGuard)
+		p.Reg = REG_SP
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = REG_X11
+
+		p = obj.Appendp(p, newprog)
+		p.As = ASUB
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = REG_X10
+		p.Reg = REG_X11
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = REG_X11
+
+		p = obj.Appendp(p, newprog)
+		p.As = AMOV
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = int64(framesize) + int64(objabi.StackGuard) - objabi.StackSmall
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = REG_X10
+
+		p = obj.Appendp(p, newprog)
+		p.As = ABLTU
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = REG_X10
+		p.Reg = REG_X11
+		p.To.Type = obj.TYPE_BRANCH
+		to_done = p
+	}
+
+	p = ctxt.EmitEntryLiveness(cursym, p, newprog)
+
+	// CALL runtime.morestack(SB)
+	p = obj.Appendp(p, newprog)
+	p.As = obj.ACALL
+	p.To.Type = obj.TYPE_BRANCH
+	if cursym.CFunc() {
+		p.To.Sym = ctxt.Lookup("runtime.morestackc")
+	} else if !cursym.Func.Text.From.Sym.NeedCtxt() {
+		p.To.Sym = ctxt.Lookup("runtime.morestack_noctxt")
+	} else {
+		p.To.Sym = ctxt.Lookup("runtime.morestack")
+	}
+	if to_more != nil {
+		to_more.Pcond = p
+	}
+	p = jalrToSym(ctxt, p, newprog, REG_X5)
+
+	// JMP start
+	p = obj.Appendp(p, newprog)
+	p.As = AJAL
+	p.To = obj.Addr{Type: obj.TYPE_BRANCH}
+	p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
+	p.Pcond = cursym.Func.Text.Link
+
+	// placeholder for to_done's jump target
+	p = obj.Appendp(p, newprog)
+	p.As = obj.ANOP // zero-width place holder
+	to_done.Pcond = p
+
+	return p
+}
+
 // signExtend sign extends val starting at bit bit.
 func signExtend(val int64, bit uint) int64 {
 	return val << (64 - bit) >> (64 - bit)