runtime: support preemption on windows/{386,amd64}
This implements preemptM on Windows using SuspendThead and
ResumeThread.
Unlike on POSIX platforms, preemptM on Windows happens synchronously.
This means we need a make a few other tweaks to suspendG:
1. We need to CAS the G back to _Grunning before doing the preemptM,
or there's a good chance we'll just catch the G spinning on its
status in the runtime, which won't be preemptible.
2. We need to rate-limit preemptM attempts. Otherwise, if the first
attempt catches the G at a non-preemptible point, the busy loop in
suspendG may hammer it so hard that it never makes it past that
non-preemptible point.
Updates #10958, #24543.
Change-Id: Ie53b098811096f7e45d864afd292dc9e999ce226
Reviewed-on: https://go-review.googlesource.com/c/go/+/204340
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go
index 00f4c6e..cf5837c 100644
--- a/src/runtime/os_windows.go
+++ b/src/runtime/os_windows.go
@@ -6,6 +6,7 @@
import (
"runtime/internal/atomic"
+ "runtime/internal/sys"
"unsafe"
)
@@ -32,6 +33,7 @@
//go:cgo_import_dynamic runtime._GetSystemDirectoryA GetSystemDirectoryA%2 "kernel32.dll"
//go:cgo_import_dynamic runtime._GetSystemInfo GetSystemInfo%1 "kernel32.dll"
//go:cgo_import_dynamic runtime._GetThreadContext GetThreadContext%2 "kernel32.dll"
+//go:cgo_import_dynamic runtime._SetThreadContext SetThreadContext%2 "kernel32.dll"
//go:cgo_import_dynamic runtime._LoadLibraryW LoadLibraryW%1 "kernel32.dll"
//go:cgo_import_dynamic runtime._LoadLibraryA LoadLibraryA%1 "kernel32.dll"
//go:cgo_import_dynamic runtime._PostQueuedCompletionStatus PostQueuedCompletionStatus%4 "kernel32.dll"
@@ -79,6 +81,7 @@
_GetSystemInfo,
_GetSystemTimeAsFileTime,
_GetThreadContext,
+ _SetThreadContext,
_LoadLibraryW,
_LoadLibraryA,
_PostQueuedCompletionStatus,
@@ -539,6 +542,11 @@
//go:nosplit
func exit(code int32) {
+ // Disallow thread suspension for preemption. Otherwise,
+ // ExitProcess and SuspendThread can race: SuspendThread
+ // queues a suspension request for this thread, ExitProcess
+ // kills the suspending thread, and then this thread suspends.
+ lock(&suspendLock)
atomic.Store(&exiting, 1)
stdcall1(_ExitProcess, uintptr(code))
}
@@ -1003,21 +1011,24 @@
r.contextflags = _CONTEXT_CONTROL
stdcall2(_GetThreadContext, thread, uintptr(unsafe.Pointer(r)))
- var gp *g
- switch GOARCH {
- default:
- panic("unsupported architecture")
- case "arm":
- tls := &mp.tls[0]
- gp = **((***g)(unsafe.Pointer(tls)))
- case "386", "amd64":
- tls := &mp.tls[0]
- gp = *((**g)(unsafe.Pointer(tls)))
- }
+ gp := gFromTLS(mp)
sigprof(r.ip(), r.sp(), r.lr(), gp, mp)
}
+func gFromTLS(mp *m) *g {
+ switch GOARCH {
+ case "arm":
+ tls := &mp.tls[0]
+ return **((***g)(unsafe.Pointer(tls)))
+ case "386", "amd64":
+ tls := &mp.tls[0]
+ return *((**g)(unsafe.Pointer(tls)))
+ }
+ throw("unsupported architecture")
+ return nil
+}
+
func profileloop1(param uintptr) uint32 {
stdcall2(_SetThreadPriority, currentThread, _THREAD_PRIORITY_HIGHEST)
@@ -1081,10 +1092,95 @@
atomic.Store((*uint32)(unsafe.Pointer(&getg().m.profilehz)), uint32(hz))
}
-const preemptMSupported = false
+const preemptMSupported = GOARCH != "arm"
+
+// suspendLock protects simultaneous SuspendThread operations from
+// suspending each other.
+var suspendLock mutex
func preemptM(mp *m) {
- // Not currently supported.
- //
- // TODO: Use SuspendThread/GetThreadContext/ResumeThread
+ if GOARCH == "arm" {
+ // TODO: Implement call injection
+ return
+ }
+
+ if mp == getg().m {
+ throw("self-preempt")
+ }
+
+ // Acquire our own handle to mp's thread.
+ lock(&mp.threadLock)
+ if mp.thread == 0 {
+ // The M hasn't been minit'd yet (or was just unminit'd).
+ unlock(&mp.threadLock)
+ atomic.Xadd(&mp.preemptGen, 1)
+ return
+ }
+ var thread uintptr
+ stdcall7(_DuplicateHandle, currentProcess, mp.thread, currentProcess, uintptr(unsafe.Pointer(&thread)), 0, 0, _DUPLICATE_SAME_ACCESS)
+ unlock(&mp.threadLock)
+
+ // Prepare thread context buffer.
+ var c *context
+ cbuf := make([]byte, unsafe.Sizeof(*c)+15)
+ // Align Context to 16 bytes.
+ c = (*context)(unsafe.Pointer((uintptr(unsafe.Pointer(&cbuf[15]))) &^ 15))
+ c.contextflags = _CONTEXT_CONTROL
+
+ // Serialize thread suspension. SuspendThread is asynchronous,
+ // so it's otherwise possible for two threads to suspend each
+ // other and deadlock. We must hold this lock until after
+ // GetThreadContext, since that blocks until the thread is
+ // actually suspended.
+ lock(&suspendLock)
+
+ // Suspend the thread.
+ if int32(stdcall1(_SuspendThread, thread)) == -1 {
+ unlock(&suspendLock)
+ stdcall1(_CloseHandle, thread)
+ // The thread no longer exists. This shouldn't be
+ // possible, but just acknowledge the request.
+ atomic.Xadd(&mp.preemptGen, 1)
+ return
+ }
+
+ // We have to be very careful between this point and once
+ // we've shown mp is at an async safe-point. This is like a
+ // signal handler in the sense that mp could have been doing
+ // anything when we stopped it, including holding arbitrary
+ // locks.
+
+ // We have to get the thread context before inspecting the M
+ // because SuspendThread only requests a suspend.
+ // GetThreadContext actually blocks until it's suspended.
+ stdcall2(_GetThreadContext, thread, uintptr(unsafe.Pointer(c)))
+
+ unlock(&suspendLock)
+
+ // Does it want a preemption and is it safe to preempt?
+ gp := gFromTLS(mp)
+ if wantAsyncPreempt(gp) && isAsyncSafePoint(gp, c.ip(), c.sp(), c.lr()) {
+ // Inject call to asyncPreempt
+ targetPC := funcPC(asyncPreempt)
+ switch GOARCH {
+ default:
+ throw("unsupported architecture")
+ case "386", "amd64":
+ // Make it look like the thread called targetPC.
+ pc := c.ip()
+ sp := c.sp()
+ sp -= sys.PtrSize
+ *(*uintptr)(unsafe.Pointer(sp)) = pc
+ c.set_sp(sp)
+ c.set_ip(targetPC)
+ }
+
+ stdcall2(_SetThreadContext, thread, uintptr(unsafe.Pointer(c)))
+ }
+
+ // Acknowledge the preemption.
+ atomic.Xadd(&mp.preemptGen, 1)
+
+ stdcall1(_ResumeThread, thread)
+ stdcall1(_CloseHandle, thread)
}
diff --git a/src/runtime/preempt.go b/src/runtime/preempt.go
index f154614..60e1bce 100644
--- a/src/runtime/preempt.go
+++ b/src/runtime/preempt.go
@@ -118,6 +118,7 @@
stopped := false
var asyncM *m
var asyncGen uint32
+ var nextPreemptM int64
for i := 0; ; i++ {
switch s := readgstatus(gp); s {
default:
@@ -205,14 +206,32 @@
gp.preempt = true
gp.stackguard0 = stackPreempt
- // Send asynchronous preemption.
- asyncM = gp.m
- asyncGen = atomic.Load(&asyncM.preemptGen)
- if preemptMSupported && debug.asyncpreemptoff == 0 {
- preemptM(asyncM)
- }
+ // Prepare for asynchronous preemption.
+ asyncM2 := gp.m
+ asyncGen2 := atomic.Load(&asyncM2.preemptGen)
+ needAsync := asyncM != asyncM2 || asyncGen != asyncGen2
+ asyncM = asyncM2
+ asyncGen = asyncGen2
casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
+
+ // Send asynchronous preemption. We do this
+ // after CASing the G back to _Grunning
+ // because preemptM may be synchronous and we
+ // don't want to catch the G just spinning on
+ // its status.
+ if preemptMSupported && debug.asyncpreemptoff == 0 && needAsync {
+ // Rate limit preemptM calls. This is
+ // particularly important on Windows
+ // where preemptM is actually
+ // synchronous and the spin loop here
+ // can lead to live-lock.
+ now := nanotime()
+ if now >= nextPreemptM {
+ nextPreemptM = now + yieldDelay/2
+ preemptM(asyncM)
+ }
+ }
}
// TODO: Don't busy wait. This loop should really only