cmd/dist,cmd/go,runtime: add support for cgo on linux/riscv64

Fixes #36641

Change-Id: I51868d83ce341d78d33b221d184c5a5110c60d14
Reviewed-on: https://go-review.googlesource.com/c/go/+/263598
Trust: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
diff --git a/misc/cgo/test/testdata/issue9400/asm_riscv64.s b/misc/cgo/test/testdata/issue9400/asm_riscv64.s
new file mode 100644
index 0000000..20fcc00
--- /dev/null
+++ b/misc/cgo/test/testdata/issue9400/asm_riscv64.s
@@ -0,0 +1,31 @@
+// Copyright 2020 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build riscv64
+// +build !gccgo
+
+#include "textflag.h"
+
+TEXT ·RewindAndSetgid(SB),NOSPLIT|NOFRAME,$0-0
+	// Rewind stack pointer so anything that happens on the stack
+	// will clobber the test pattern created by the caller
+	ADD	$(1024*8), X2
+
+	// Ask signaller to setgid
+	MOV	$1, X5
+	FENCE
+	MOVW	X5, ·Baton(SB)
+	FENCE
+
+	// Wait for setgid completion
+loop:
+	FENCE
+	MOVW	·Baton(SB), X5
+	OR	X6, X6, X6	// hint that we're in a spin loop
+	BNE	ZERO, X5, loop
+	FENCE
+
+	// Restore stack
+	ADD	$(-1024*8), X2
+	RET
diff --git a/src/cmd/dist/build.go b/src/cmd/dist/build.go
index d822a83..20cb04d 100644
--- a/src/cmd/dist/build.go
+++ b/src/cmd/dist/build.go
@@ -1549,7 +1549,7 @@
 	"linux/mipsle":    true,
 	"linux/mips64":    true,
 	"linux/mips64le":  true,
-	"linux/riscv64":   false, // Issue 36641
+	"linux/riscv64":   true,
 	"linux/s390x":     true,
 	"linux/sparc64":   true,
 	"android/386":     true,
diff --git a/src/cmd/dist/test.go b/src/cmd/dist/test.go
index 9c25392..d12a52b 100644
--- a/src/cmd/dist/test.go
+++ b/src/cmd/dist/test.go
@@ -953,7 +953,7 @@
 	// Internally linking cgo is incomplete on some architectures.
 	// https://golang.org/issue/10373
 	// https://golang.org/issue/14449
-	if goarch == "mips64" || goarch == "mips64le" || goarch == "mips" || goarch == "mipsle" {
+	if goarch == "mips64" || goarch == "mips64le" || goarch == "mips" || goarch == "mipsle" || goarch == "riscv64" {
 		return false
 	}
 	if goos == "aix" {
@@ -1117,8 +1117,8 @@
 		"android-arm", "android-arm64",
 		"dragonfly-amd64",
 		"freebsd-386", "freebsd-amd64", "freebsd-arm",
-		"linux-386", "linux-amd64", "linux-arm", "linux-ppc64le", "linux-s390x",
-		"netbsd-386", "netbsd-amd64", "linux-arm64":
+		"linux-386", "linux-amd64", "linux-arm", "linux-arm64", "linux-ppc64le", "linux-riscv64", "linux-s390x",
+		"netbsd-386", "netbsd-amd64":
 
 		cmd := t.addCmd(dt, "misc/cgo/test", t.goTest())
 		cmd.Env = append(os.Environ(), "GOFLAGS=-ldflags=-linkmode=external")
diff --git a/src/cmd/go/testdata/script/build_plugin_non_main.txt b/src/cmd/go/testdata/script/build_plugin_non_main.txt
index dba80c2..3c82dce 100644
--- a/src/cmd/go/testdata/script/build_plugin_non_main.txt
+++ b/src/cmd/go/testdata/script/build_plugin_non_main.txt
@@ -1,5 +1,6 @@
-# Plugins are only supported on linux,cgo and darwin,cgo.
+# Plugins are only supported on linux,cgo (!riscv64) and darwin,cgo.
 [!linux] [!darwin] skip
+[linux] [riscv64] skip
 [!cgo] skip
 
 go build -n testdep
diff --git a/src/cmd/nm/nm_cgo_test.go b/src/cmd/nm/nm_cgo_test.go
index 9a257e0..e0414e6 100644
--- a/src/cmd/nm/nm_cgo_test.go
+++ b/src/cmd/nm/nm_cgo_test.go
@@ -24,7 +24,7 @@
 		}
 	case "linux":
 		switch runtime.GOARCH {
-		case "arm64", "mips64", "mips64le", "mips", "mipsle", "ppc64", "ppc64le":
+		case "arm64", "mips64", "mips64le", "mips", "mipsle", "ppc64", "ppc64le", "riscv64":
 			return false
 		}
 	case "openbsd":
diff --git a/src/runtime/asm_riscv64.s b/src/runtime/asm_riscv64.s
index caaf42a..01b42dc 100644
--- a/src/runtime/asm_riscv64.s
+++ b/src/runtime/asm_riscv64.s
@@ -9,10 +9,9 @@
 // func rt0_go()
 TEXT runtime·rt0_go(SB),NOSPLIT,$0
 	// X2 = stack; A0 = argc; A1 = argv
-
 	ADD	$-24, X2
-	MOV	A0, 8(X2) // argc
-	MOV	A1, 16(X2) // argv
+	MOV	A0, 8(X2)	// argc
+	MOV	A1, 16(X2)	// argv
 
 	// create istack out of the given (operating system) stack.
 	// _cgo_init may update stackguard.
@@ -28,10 +27,10 @@
 	MOV	_cgo_init(SB), T0
 	BEQ	T0, ZERO, nocgo
 
-	MOV	ZERO, A3	// arg 3: not used
-	MOV	ZERO, A2	// arg 2: not used
+	MOV	ZERO, A3		// arg 3: not used
+	MOV	ZERO, A2		// arg 2: not used
 	MOV	$setg_gcc<>(SB), A1	// arg 1: setg
-	MOV	g, A0	// arg 0: G
+	MOV	g, A0			// arg 0: G
 	JALR	RA, T0
 
 nocgo:
@@ -313,10 +312,62 @@
 	CALL	runtime·badctxt(SB)
 	RET
 
+// Save state of caller into g->sched. Smashes X31.
+TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
+	MOV	X1, (g_sched+gobuf_pc)(g)
+	MOV	X2, (g_sched+gobuf_sp)(g)
+	MOV	ZERO, (g_sched+gobuf_lr)(g)
+	MOV	ZERO, (g_sched+gobuf_ret)(g)
+	// Assert ctxt is zero. See func save.
+	MOV	(g_sched+gobuf_ctxt)(g), X31
+	BEQ	ZERO, X31, 2(PC)
+	CALL	runtime·badctxt(SB)
+	RET
+
 // func asmcgocall(fn, arg unsafe.Pointer) int32
+// Call fn(arg) on the scheduler stack,
+// aligned appropriately for the gcc ABI.
+// See cgocall.go for more details.
 TEXT ·asmcgocall(SB),NOSPLIT,$0-20
-	// TODO(jsing): Add support for cgo - issue #36641.
-	WORD $0		// crash
+	MOV	fn+0(FP), X5
+	MOV	arg+8(FP), X10
+
+	MOV	X2, X8	// save original stack pointer
+	MOV	g, X9
+
+	// Figure out if we need to switch to m->g0 stack.
+	// We get called to create new OS threads too, and those
+	// come in on the m->g0 stack already.
+	MOV	g_m(g), X6
+	MOV	m_g0(X6), X7
+	BEQ	X7, g, g0
+
+	CALL	gosave<>(SB)
+	MOV	X7, g
+	CALL	runtime·save_g(SB)
+	MOV	(g_sched+gobuf_sp)(g), X2
+
+	// Now on a scheduling stack (a pthread-created stack).
+g0:
+	// Save room for two of our pointers.
+	ADD	$-16, X2
+	MOV	X9, 0(X2)	// save old g on stack
+	MOV	(g_stack+stack_hi)(X9), X9
+	SUB	X8, X9, X8
+	MOV	X8, 8(X2)	// save depth in old g stack (can't just save SP, as stack might be copied during a callback)
+
+	JALR	RA, (X5)
+
+	// Restore g, stack pointer. X10 is return value.
+	MOV	0(X2), g
+	CALL	runtime·save_g(SB)
+	MOV	(g_stack+stack_hi)(g), X5
+	MOV	8(X2), X6
+	SUB	X6, X5, X6
+	MOV	X6, X2
+
+	MOVW	X10, ret+16(FP)
+	RET
 
 // func asminit()
 TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
@@ -444,6 +495,21 @@
 CALLFN(·call536870912, 536870912)
 CALLFN(·call1073741824, 1073741824)
 
+// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
+// Must obey the gcc calling convention.
+TEXT _cgo_topofstack(SB),NOSPLIT,$8
+	// g (X27) and REG_TMP (X31) might be clobbered by load_g.
+	// X27 is callee-save in the gcc calling convention, so save it.
+	MOV	g, savedX27-8(SP)
+
+	CALL	runtime·load_g(SB)
+	MOV	g_m(g), X5
+	MOV	m_curg(X5), X5
+	MOV	(g_stack+stack_hi)(X5), X10 // return value in X10
+
+	MOV	savedX27-8(SP), g
+	RET
+
 // func goexit(neverCallThisFunction)
 // The top-most function running on a goroutine
 // returns to goexit+PCQuantum.
@@ -453,11 +519,111 @@
 	// traceback from goexit1 must hit code range of goexit
 	MOV	ZERO, ZERO	// NOP
 
-// cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
+// func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
 // See cgocall.go for more details.
-TEXT ·cgocallback(SB),NOSPLIT,$0-24
-	// TODO(jsing): Add support for cgo - issue #36641.
-	WORD $0		// crash
+TEXT ·cgocallback(SB),NOSPLIT,$24-24
+	NO_LOCAL_POINTERS
+
+	// Load m and g from thread-local storage.
+	MOVBU	runtime·iscgo(SB), X5
+	BEQ	ZERO, X5, nocgo
+	CALL	runtime·load_g(SB)
+nocgo:
+
+	// If g is nil, Go did not create the current thread.
+	// Call needm to obtain one for temporary use.
+	// In this case, we're running on the thread stack, so there's
+	// lots of space, but the linker doesn't know. Hide the call from
+	// the linker analysis by using an indirect call.
+	BEQ	ZERO, g, needm
+
+	MOV	g_m(g), X5
+	MOV	X5, savedm-8(SP)
+	JMP	havem
+
+needm:
+	MOV	g, savedm-8(SP) // g is zero, so is m.
+	MOV	$runtime·needm(SB), X6
+	JALR	RA, X6
+
+	// Set m->sched.sp = SP, so that if a panic happens
+	// during the function we are about to execute, it will
+	// have a valid SP to run on the g0 stack.
+	// The next few lines (after the havem label)
+	// will save this SP onto the stack and then write
+	// the same SP back to m->sched.sp. That seems redundant,
+	// but if an unrecovered panic happens, unwindm will
+	// restore the g->sched.sp from the stack location
+	// and then systemstack will try to use it. If we don't set it here,
+	// that restored SP will be uninitialized (typically 0) and
+	// will not be usable.
+	MOV	g_m(g), X5
+	MOV	m_g0(X5), X6
+	MOV	X2, (g_sched+gobuf_sp)(X6)
+
+havem:
+	// Now there's a valid m, and we're running on its m->g0.
+	// Save current m->g0->sched.sp on stack and then set it to SP.
+	// Save current sp in m->g0->sched.sp in preparation for
+	// switch back to m->curg stack.
+	// NOTE: unwindm knows that the saved g->sched.sp is at 8(X2) aka savedsp-24(SP).
+	MOV	m_g0(X5), X6
+	MOV	(g_sched+gobuf_sp)(X6), X7
+	MOV	X7, savedsp-24(SP)	// must match frame size
+	MOV	X2, (g_sched+gobuf_sp)(X6)
+
+	// Switch to m->curg stack and call runtime.cgocallbackg.
+	// Because we are taking over the execution of m->curg
+	// but *not* resuming what had been running, we need to
+	// save that information (m->curg->sched) so we can restore it.
+	// We can restore m->curg->sched.sp easily, because calling
+	// runtime.cgocallbackg leaves SP unchanged upon return.
+	// To save m->curg->sched.pc, we push it onto the curg stack and
+	// open a frame the same size as cgocallback's g0 frame.
+	// Once we switch to the curg stack, the pushed PC will appear
+	// to be the return PC of cgocallback, so that the traceback
+	// will seamlessly trace back into the earlier calls.
+	MOV	m_curg(X5), g
+	CALL	runtime·save_g(SB)
+	MOV	(g_sched+gobuf_sp)(g), X6 // prepare stack as X6
+	MOV	(g_sched+gobuf_pc)(g), X7
+	MOV	X7, -(24+8)(X6)		// "saved LR"; must match frame size
+	// Gather our arguments into registers.
+	MOV	fn+0(FP), X7
+	MOV	frame+8(FP), X8
+	MOV	ctxt+16(FP), X9
+	MOV	$-(24+8)(X6), X2	// switch stack; must match frame size
+	MOV	X7, 8(X2)
+	MOV	X8, 16(X2)
+	MOV	X9, 24(X2)
+	CALL	runtime·cgocallbackg(SB)
+
+	// Restore g->sched (== m->curg->sched) from saved values.
+	MOV	0(X2), X7
+	MOV	X7, (g_sched+gobuf_pc)(g)
+	MOV	$(24+8)(X2), X6		// must match frame size
+	MOV	X6, (g_sched+gobuf_sp)(g)
+
+	// Switch back to m->g0's stack and restore m->g0->sched.sp.
+	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
+	// so we do not have to restore it.)
+	MOV	g_m(g), X5
+	MOV	m_g0(X5), g
+	CALL	runtime·save_g(SB)
+	MOV	(g_sched+gobuf_sp)(g), X2
+	MOV	savedsp-24(SP), X6	// must match frame size
+	MOV	X6, (g_sched+gobuf_sp)(g)
+
+	// If the m on entry was nil, we called needm above to borrow an m
+	// for the duration of the call. Since the call is over, return it with dropm.
+	MOV	savedm-8(SP), X5
+	BNE	ZERO, X5, droppedm
+	MOV	$runtime·dropm(SB), X6
+	JALR	RA, X6
+droppedm:
+
+	// Done!
+	RET
 
 TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
 	EBREAK
diff --git a/src/runtime/cgo/asm_riscv64.s b/src/runtime/cgo/asm_riscv64.s
new file mode 100644
index 0000000..b4ddbb0
--- /dev/null
+++ b/src/runtime/cgo/asm_riscv64.s
@@ -0,0 +1,84 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build riscv64
+
+#include "textflag.h"
+
+// Called by C code generated by cmd/cgo.
+// func crosscall2(fn, a unsafe.Pointer, n int32, ctxt uintptr)
+// Saves C callee-saved registers and calls cgocallback with three arguments.
+// fn is the PC of a func(a unsafe.Pointer) function.
+TEXT crosscall2(SB),NOSPLIT|NOFRAME,$0
+	/*
+	 * Push arguments for fn (X10, X11, X13), along with all callee-save
+	 * registers. Note that at procedure entry the first argument is at
+	 * 8(X2).
+	 */
+	ADD	$(-8*31), X2
+	MOV	X10, (8*1)(X2) // fn unsafe.Pointer
+	MOV	X11, (8*2)(X2) // a unsafe.Pointer
+	MOV	X13, (8*3)(X2) // ctxt uintptr
+	MOV	X8, (8*4)(X2)
+	MOV	X9, (8*5)(X2)
+	MOV	X18, (8*6)(X2)
+	MOV	X19, (8*7)(X2)
+	MOV	X20, (8*8)(X2)
+	MOV	X21, (8*9)(X2)
+	MOV	X22, (8*10)(X2)
+	MOV	X23, (8*11)(X2)
+	MOV	X24, (8*12)(X2)
+	MOV	X25, (8*13)(X2)
+	MOV	X26, (8*14)(X2)
+	MOV	g, (8*15)(X2)
+	MOV	X3, (8*16)(X2)
+	MOV	X4, (8*17)(X2)
+	MOV	X1, (8*18)(X2)
+	MOVD	F8, (8*19)(X2)
+	MOVD	F9, (8*20)(X2)
+	MOVD	F18, (8*21)(X2)
+	MOVD	F19, (8*22)(X2)
+	MOVD	F20, (8*23)(X2)
+	MOVD	F21, (8*24)(X2)
+	MOVD	F22, (8*25)(X2)
+	MOVD	F23, (8*26)(X2)
+	MOVD	F24, (8*27)(X2)
+	MOVD	F25, (8*28)(X2)
+	MOVD	F26, (8*29)(X2)
+	MOVD	F27, (8*30)(X2)
+
+	// Initialize Go ABI environment
+	CALL	runtime·load_g(SB)
+	CALL	runtime·cgocallback(SB)
+
+	MOV	(8*4)(X2), X8
+	MOV	(8*5)(X2), X9
+	MOV	(8*6)(X2), X18
+	MOV	(8*7)(X2), X19
+	MOV	(8*8)(X2), X20
+	MOV	(8*9)(X2), X21
+	MOV	(8*10)(X2), X22
+	MOV	(8*11)(X2), X23
+	MOV	(8*12)(X2), X24
+	MOV	(8*13)(X2), X25
+	MOV	(8*14)(X2), X26
+	MOV	(8*15)(X2), g
+	MOV	(8*16)(X2), X3
+	MOV	(8*17)(X2), X4
+	MOV	(8*18)(X2), X1
+	MOVD	(8*19)(X2), F8
+	MOVD	(8*20)(X2), F9
+	MOVD	(8*21)(X2), F18
+	MOVD	(8*22)(X2), F19
+	MOVD	(8*23)(X2), F20
+	MOVD	(8*24)(X2), F21
+	MOVD	(8*25)(X2), F22
+	MOVD	(8*26)(X2), F23
+	MOVD	(8*27)(X2), F24
+	MOVD	(8*28)(X2), F25
+	MOVD	(8*29)(X2), F26
+	MOVD	(8*30)(X2), F27
+	ADD	$(8*31), X2
+
+	RET
diff --git a/src/runtime/cgo/gcc_linux_riscv64.c b/src/runtime/cgo/gcc_linux_riscv64.c
new file mode 100644
index 0000000..22b76c2
--- /dev/null
+++ b/src/runtime/cgo/gcc_linux_riscv64.c
@@ -0,0 +1,74 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <pthread.h>
+#include <string.h>
+#include <signal.h>
+#include "libcgo.h"
+#include "libcgo_unix.h"
+
+static void *threadentry(void*);
+
+void (*x_cgo_inittls)(void **tlsg, void **tlsbase);
+static void (*setg_gcc)(void*);
+
+void
+_cgo_sys_thread_start(ThreadStart *ts)
+{
+	pthread_attr_t attr;
+	sigset_t ign, oset;
+	pthread_t p;
+	size_t size;
+	int err;
+
+	sigfillset(&ign);
+	pthread_sigmask(SIG_SETMASK, &ign, &oset);
+
+	// Not sure why the memset is necessary here,
+	// but without it, we get a bogus stack size
+	// out of pthread_attr_getstacksize.  C'est la Linux.
+	memset(&attr, 0, sizeof attr);
+	pthread_attr_init(&attr);
+	size = 0;
+	pthread_attr_getstacksize(&attr, &size);
+	// Leave stacklo=0 and set stackhi=size; mstart will do the rest.
+	ts->g->stackhi = size;
+	err = _cgo_try_pthread_create(&p, &attr, threadentry, ts);
+
+	pthread_sigmask(SIG_SETMASK, &oset, nil);
+
+	if (err != 0) {
+		fatalf("pthread_create failed: %s", strerror(err));
+	}
+}
+
+extern void crosscall1(void (*fn)(void), void (*setg_gcc)(void*), void *g);
+static void*
+threadentry(void *v)
+{
+	ThreadStart ts;
+
+	ts = *(ThreadStart*)v;
+	free(v);
+
+	crosscall1(ts.fn, setg_gcc, (void*)ts.g);
+	return nil;
+}
+
+void
+x_cgo_init(G *g, void (*setg)(void*), void **tlsg, void **tlsbase)
+{
+	pthread_attr_t attr;
+	size_t size;
+
+	setg_gcc = setg;
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	g->stacklo = (uintptr)&attr - size + 4096;
+	pthread_attr_destroy(&attr);
+
+	if (x_cgo_inittls) {
+		x_cgo_inittls(tlsg, tlsbase);
+	}
+}
diff --git a/src/runtime/cgo/gcc_riscv64.S b/src/runtime/cgo/gcc_riscv64.S
new file mode 100644
index 0000000..f429dc6
--- /dev/null
+++ b/src/runtime/cgo/gcc_riscv64.S
@@ -0,0 +1,80 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+ * void crosscall1(void (*fn)(void), void (*setg_gcc)(void *g), void *g)
+ *
+ * Calling into the gc tool chain, where all registers are caller save.
+ * Called from standard RISCV ELF psABI, where x8-x9, x18-x27, f8-f9 and
+ * f18-f27 are callee-save, so they must be saved explicitly, along with
+ * x1 (LR).
+ */
+.globl crosscall1
+crosscall1:
+	sd	x1, -200(sp)
+	addi	sp, sp, -200
+	sd	x8, 8(sp)
+	sd	x9, 16(sp)
+	sd	x18, 24(sp)
+	sd	x19, 32(sp)
+	sd	x20, 40(sp)
+	sd	x21, 48(sp)
+	sd	x22, 56(sp)
+	sd	x23, 64(sp)
+	sd	x24, 72(sp)
+	sd	x25, 80(sp)
+	sd	x26, 88(sp)
+	sd	x27, 96(sp)
+	fsd	f8, 104(sp)
+	fsd	f9, 112(sp)
+	fsd	f18, 120(sp)
+	fsd	f19, 128(sp)
+	fsd	f20, 136(sp)
+	fsd	f21, 144(sp)
+	fsd	f22, 152(sp)
+	fsd	f23, 160(sp)
+	fsd	f24, 168(sp)
+	fsd	f25, 176(sp)
+	fsd	f26, 184(sp)
+	fsd	f27, 192(sp)
+
+	// a0 = *fn, a1 = *setg_gcc, a2 = *g
+	mv	s1, a0
+	mv	s0, a1
+	mv	a0, a2
+	jalr	ra, s0	// call setg_gcc (clobbers x30 aka g)
+	jalr	ra, s1	// call fn
+
+	ld	x1, 0(sp)
+	ld	x8, 8(sp)
+	ld	x9, 16(sp)
+	ld	x18, 24(sp)
+	ld	x19, 32(sp)
+	ld	x20, 40(sp)
+	ld	x21, 48(sp)
+	ld	x22, 56(sp)
+	ld	x23, 64(sp)
+	ld	x24, 72(sp)
+	ld	x25, 80(sp)
+	ld	x26, 88(sp)
+	ld	x27, 96(sp)
+	fld	f8, 104(sp)
+	fld	f9, 112(sp)
+	fld	f18, 120(sp)
+	fld	f19, 128(sp)
+	fld	f20, 136(sp)
+	fld	f21, 144(sp)
+	fld	f22, 152(sp)
+	fld	f23, 160(sp)
+	fld	f24, 168(sp)
+	fld	f25, 176(sp)
+	fld	f26, 184(sp)
+	fld	f27, 192(sp)
+	addi	sp, sp, 200
+
+	jr	ra
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go
index 9bca279..20cacd60 100644
--- a/src/runtime/cgocall.go
+++ b/src/runtime/cgocall.go
@@ -309,7 +309,7 @@
 		switch GOARCH {
 		default:
 			throw("unwindm not implemented")
-		case "386", "amd64", "arm", "ppc64", "ppc64le", "mips64", "mips64le", "s390x", "mips", "mipsle":
+		case "386", "amd64", "arm", "ppc64", "ppc64le", "mips64", "mips64le", "s390x", "mips", "mipsle", "riscv64":
 			sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp + sys.MinFrameSize))
 		case "arm64":
 			sched.sp = *(*uintptr)(unsafe.Pointer(sched.sp + 16))
diff --git a/src/runtime/tls_riscv64.s b/src/runtime/tls_riscv64.s
index 8386980..22b550b 100644
--- a/src/runtime/tls_riscv64.s
+++ b/src/runtime/tls_riscv64.s
@@ -9,10 +9,22 @@
 
 // If !iscgo, this is a no-op.
 //
-// NOTE: mcall() assumes this clobbers only R23 (REGTMP).
-// FIXME: cgo
+// NOTE: mcall() assumes this clobbers only X31 (REG_TMP).
 TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0-0
+	MOVB	runtime·iscgo(SB), X31
+	BEQ	X0, X31, nocgo
+
+	MOV	runtime·tls_g(SB), X31
+	ADD	X4, X31		// add offset to thread pointer (X4)
+	MOV	g, (X31)
+
+nocgo:
 	RET
 
 TEXT runtime·load_g(SB),NOSPLIT|NOFRAME,$0-0
+	MOV	runtime·tls_g(SB), X31
+	ADD	X4, X31		// add offset to thread pointer (X4)
+	MOV	(X31), g
 	RET
+
+GLOBL runtime·tls_g(SB), TLSBSS, $8