runtime: buffered write barrier for mips64

Updates #22460.

Change-Id: I9718bff3a346e765601cfd1890417bdfa0f7b9d8
Reviewed-on: https://go-review.googlesource.com/92700
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go
index 5b3d7d6..c624f1d 100644
--- a/src/cmd/compile/internal/gc/main.go
+++ b/src/cmd/compile/internal/gc/main.go
@@ -408,7 +408,7 @@
 	}
 
 	switch objabi.GOARCH {
-	case "amd64", "amd64p32", "386", "arm", "arm64":
+	case "amd64", "amd64p32", "386", "arm", "arm64", "mips64", "mips64le":
 	default:
 		// Other architectures don't support the buffered
 		// write barrier yet.
diff --git a/src/cmd/compile/internal/mips64/ssa.go b/src/cmd/compile/internal/mips64/ssa.go
index 291a162..ff2f612 100644
--- a/src/cmd/compile/internal/mips64/ssa.go
+++ b/src/cmd/compile/internal/mips64/ssa.go
@@ -483,6 +483,11 @@
 		gc.Patch(p6, p2)
 	case ssa.OpMIPS64CALLstatic, ssa.OpMIPS64CALLclosure, ssa.OpMIPS64CALLinter:
 		s.Call(v)
+	case ssa.OpMIPS64LoweredWB:
+		p := s.Prog(obj.ACALL)
+		p.To.Type = obj.TYPE_MEM
+		p.To.Name = obj.NAME_EXTERN
+		p.To.Sym = v.Aux.(*obj.LSym)
 	case ssa.OpMIPS64LoweredAtomicLoad32, ssa.OpMIPS64LoweredAtomicLoad64:
 		as := mips.AMOVV
 		if v.Op == ssa.OpMIPS64LoweredAtomicLoad32 {
diff --git a/src/cmd/compile/internal/ssa/gen/MIPS64.rules b/src/cmd/compile/internal/ssa/gen/MIPS64.rules
index a95b1fc..49e2160 100644
--- a/src/cmd/compile/internal/ssa/gen/MIPS64.rules
+++ b/src/cmd/compile/internal/ssa/gen/MIPS64.rules
@@ -450,6 +450,9 @@
 
 (If cond yes no) -> (NE cond yes no)
 
+// Write barrier.
+(WB {fn} destptr srcptr mem) -> (LoweredWB {fn} destptr srcptr mem)
+
 // Optimizations
 
 // Absorb boolean tests into block
diff --git a/src/cmd/compile/internal/ssa/gen/MIPS64Ops.go b/src/cmd/compile/internal/ssa/gen/MIPS64Ops.go
index 592a85e..b13ebb4 100644
--- a/src/cmd/compile/internal/ssa/gen/MIPS64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/MIPS64Ops.go
@@ -408,6 +408,12 @@
 		// LoweredGetCallerSP returns the SP of the caller of the current function.
 		{name: "LoweredGetCallerSP", reg: gp01, rematerializeable: true},
 
+		// LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
+		// It saves all GP registers if necessary,
+		// but clobbers R31 (LR) because it's a call
+		// and R23 (REGTMP).
+		{name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("R20"), buildReg("R21")}, clobbers: (callerSave &^ gpg) | buildReg("R31")}, clobberFlags: true, aux: "Sym", symEffect: "None"},
+
 		// MOVDconvert converts between pointers and integers.
 		// We have a special op for this so as to not confuse GC
 		// (particularly stack maps).  It takes a memory arg so it
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index b8aa0aa..d99abaa 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -1341,6 +1341,7 @@
 	OpMIPS64FPFlagFalse
 	OpMIPS64LoweredGetClosurePtr
 	OpMIPS64LoweredGetCallerSP
+	OpMIPS64LoweredWB
 	OpMIPS64MOVVconvert
 
 	OpPPC64ADD
@@ -17227,6 +17228,20 @@
 		},
 	},
 	{
+		name:         "LoweredWB",
+		auxType:      auxSym,
+		argLen:       3,
+		clobberFlags: true,
+		symEffect:    SymNone,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 1048576}, // R20
+				{1, 2097152}, // R21
+			},
+			clobbers: 4611686018293170176, // R31 F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 HI LO
+		},
+	},
+	{
 		name:   "MOVVconvert",
 		argLen: 2,
 		asm:    mips.AMOVV,
diff --git a/src/cmd/compile/internal/ssa/rewriteMIPS64.go b/src/cmd/compile/internal/ssa/rewriteMIPS64.go
index 21265e3..8d718a6 100644
--- a/src/cmd/compile/internal/ssa/rewriteMIPS64.go
+++ b/src/cmd/compile/internal/ssa/rewriteMIPS64.go
@@ -581,6 +581,8 @@
 		return rewriteValueMIPS64_OpTrunc64to32_0(v)
 	case OpTrunc64to8:
 		return rewriteValueMIPS64_OpTrunc64to8_0(v)
+	case OpWB:
+		return rewriteValueMIPS64_OpWB_0(v)
 	case OpXor16:
 		return rewriteValueMIPS64_OpXor16_0(v)
 	case OpXor32:
@@ -9872,6 +9874,24 @@
 		return true
 	}
 }
+func rewriteValueMIPS64_OpWB_0(v *Value) bool {
+	// match: (WB {fn} destptr srcptr mem)
+	// cond:
+	// result: (LoweredWB {fn} destptr srcptr mem)
+	for {
+		fn := v.Aux
+		_ = v.Args[2]
+		destptr := v.Args[0]
+		srcptr := v.Args[1]
+		mem := v.Args[2]
+		v.reset(OpMIPS64LoweredWB)
+		v.Aux = fn
+		v.AddArg(destptr)
+		v.AddArg(srcptr)
+		v.AddArg(mem)
+		return true
+	}
+}
 func rewriteValueMIPS64_OpXor16_0(v *Value) bool {
 	// match: (Xor16 x y)
 	// cond:
diff --git a/src/cmd/vet/all/whitelist/mips64x.txt b/src/cmd/vet/all/whitelist/mips64x.txt
index 5354d21..45efdc6 100644
--- a/src/cmd/vet/all/whitelist/mips64x.txt
+++ b/src/cmd/vet/all/whitelist/mips64x.txt
@@ -4,3 +4,5 @@
 runtime/duff_mips64x.s: [GOARCH] duffzero: function duffzero missing Go declaration
 runtime/tls_mips64x.s: [GOARCH] save_g: function save_g missing Go declaration
 runtime/tls_mips64x.s: [GOARCH] load_g: function load_g missing Go declaration
+
+runtime/asm_ARCHSUFF.s: [GOARCH] gcWriteBarrier: function gcWriteBarrier missing Go declaration
diff --git a/src/runtime/asm_mips64x.s b/src/runtime/asm_mips64x.s
index 7d8d97d..f59421f 100644
--- a/src/runtime/asm_mips64x.s
+++ b/src/runtime/asm_mips64x.s
@@ -776,3 +776,101 @@
 	MOVW	$1, R1
 	MOVB	R1, ret+0(FP)
 	RET
+
+// gcWriteBarrier performs a heap pointer write and informs the GC.
+//
+// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
+// - R20 is the destination of the write
+// - R21 is the value being written at R20.
+// It clobbers R23 (the linker temp register).
+// The act of CALLing gcWriteBarrier will clobber R31 (LR).
+// It does not clobber any other general-purpose registers,
+// but may clobber others (e.g., floating point registers).
+TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$192
+	// Save the registers clobbered by the fast path.
+	MOVV	R1, 184(R29)
+	MOVV	R2, 192(R29)
+	MOVV	g_m(g), R1
+	MOVV	m_p(R1), R1
+	MOVV	(p_wbBuf+wbBuf_next)(R1), R2
+	// Increment wbBuf.next position.
+	ADDV	$16, R2
+	MOVV	R2, (p_wbBuf+wbBuf_next)(R1)
+	MOVV	(p_wbBuf+wbBuf_end)(R1), R1
+	MOVV	R1, R23		// R23 is linker temp register
+	// Record the write.
+	MOVV	R21, -16(R2)	// Record value
+	MOVV	(R20), R1	// TODO: This turns bad writes into bad reads.
+	MOVV	R1, -8(R2)	// Record *slot
+	// Is the buffer full?
+	BEQ	R2, R23, flush
+ret:
+	MOVV	184(R29), R1
+	MOVV	192(R29), R2
+	// Do the write.
+	MOVV	R21, (R20)
+	RET
+
+flush:
+	// Save all general purpose registers since these could be
+	// clobbered by wbBufFlush and were not saved by the caller.
+	MOVV	R20, 8(R29)	// Also first argument to wbBufFlush
+	MOVV	R21, 16(R29)	// Also second argument to wbBufFlush
+	// R1 already saved
+	// R2 already saved
+	MOVV	R3, 24(R29)
+	MOVV	R4, 32(R29)
+	MOVV	R5, 40(R29)
+	MOVV	R6, 48(R29)
+	MOVV	R7, 56(R29)
+	MOVV	R8, 64(R29)
+	MOVV	R9, 72(R29)
+	MOVV	R10, 80(R29)
+	MOVV	R11, 88(R29)
+	MOVV	R12, 96(R29)
+	MOVV	R13, 104(R29)
+	MOVV	R14, 112(R29)
+	MOVV	R15, 120(R29)
+	MOVV	R16, 128(R29)
+	MOVV	R17, 136(R29)
+	MOVV	R18, 144(R29)
+	MOVV	R19, 152(R29)
+	// R20 already saved
+	// R21 already saved.
+	MOVV	R22, 160(R29)
+	// R23 is tmp register.
+	MOVV	R24, 168(R29)
+	MOVV	R25, 176(R29)
+	// R26 is reserved by kernel.
+	// R27 is reserved by kernel.
+	// R28 is REGSB (not modified by Go code).
+	// R29 is SP.
+	// R30 is g.
+	// R31 is LR, which was saved by the prologue.
+
+	// This takes arguments R20 and R21.
+	CALL	runtime·wbBufFlush(SB)
+
+	MOVV	8(R29), R20
+	MOVV	16(R29), R21
+	MOVV	24(R29), R3
+	MOVV	32(R29), R4
+	MOVV	40(R29), R5
+	MOVV	48(R29), R6
+	MOVV	56(R29), R7
+	MOVV	64(R29), R8
+	MOVV	72(R29), R9
+	MOVV	80(R29), R10
+	MOVV	88(R29), R11
+	MOVV	96(R29), R12
+	MOVV	104(R29), R13
+	MOVV	112(R29), R14
+	MOVV	120(R29), R15
+	MOVV	128(R29), R16
+	MOVV	136(R29), R17
+	MOVV	144(R29), R18
+	MOVV	152(R29), R19
+	MOVV	160(R29), R22
+	MOVV	168(R29), R24
+	MOVV	176(R29), R25
+	JMP	ret