src/runtime/netpoll.go - go - Git at Google

 // Copyright 2013 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 //go:build aix || darwin || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris || windows

 package runtime

 import (
 	"runtime/internal/atomic"
 	"unsafe"
 )

 // Integrated network poller (platform-independent part).
 // A particular implementation (epoll/kqueue/port/AIX/Windows)
 // must define the following functions:
 //
 // func netpollinit()
 //     Initialize the poller. Only called once.
 //
 // func netpollopen(fd uintptr, pd *pollDesc) int32
 //     Arm edge-triggered notifications for fd. The pd argument is to pass
 //     back to netpollready when fd is ready. Return an errno value.
 //
 // func netpollclose(fd uintptr) int32
 //     Disable notifications for fd. Return an errno value.
 //
 // func netpoll(delta int64) gList
 //     Poll the network. If delta < 0, block indefinitely. If delta == 0,
 //     poll without blocking. If delta > 0, block for up to delta nanoseconds.
 //     Return a list of goroutines built by calling netpollready.
 //
 // func netpollBreak()
 //     Wake up the network poller, assumed to be blocked in netpoll.
 //
 // func netpollIsPollDescriptor(fd uintptr) bool
 //     Reports whether fd is a file descriptor used by the poller.

 // Error codes returned by runtime_pollReset and runtime_pollWait.
 // These must match the values in internal/poll/fd_poll_runtime.go.
 const (
 	pollNoError        = 0 // no error
 	pollErrClosing     = 1 // descriptor is closed
 	pollErrTimeout     = 2 // I/O timeout
 	pollErrNotPollable = 3 // general error polling descriptor
 )

 // pollDesc contains 2 binary semaphores, rg and wg, to park reader and writer
 // goroutines respectively. The semaphore can be in the following states:
 // pdReady - io readiness notification is pending;
 //           a goroutine consumes the notification by changing the state to nil.
 // pdWait - a goroutine prepares to park on the semaphore, but not yet parked;
 //          the goroutine commits to park by changing the state to G pointer,
 //          or, alternatively, concurrent io notification changes the state to pdReady,
 //          or, alternatively, concurrent timeout/close changes the state to nil.
 // G pointer - the goroutine is blocked on the semaphore;
 //             io notification or timeout/close changes the state to pdReady or nil respectively
 //             and unparks the goroutine.
 // nil - none of the above.
 const (
 	pdReady uintptr = 1
 	pdWait  uintptr = 2
 )

 const pollBlockSize = 4 * 1024

 // Network poller descriptor.
 //
 // No heap pointers.
 //
 //go:notinheap
 type pollDesc struct {
 	link *pollDesc // in pollcache, protected by pollcache.lock
 	fd   uintptr   // constant for pollDesc usage lifetime

 	// atomicInfo holds bits from closing, rd, and wd,
 	// which are only ever written while holding the lock,
 	// summarized for use by netpollcheckerr,
 	// which cannot acquire the lock.
 	// After writing these fields under lock in a way that
 	// might change the summary, code must call publishInfo
 	// before releasing the lock.
 	// Code that changes fields and then calls netpollunblock
 	// (while still holding the lock) must call publishInfo
 	// before calling netpollunblock, because publishInfo is what
 	// stops netpollblock from blocking anew
 	// (by changing the result of netpollcheckerr).
 	// atomicInfo also holds the eventErr bit,
 	// recording whether a poll event on the fd got an error;
 	// atomicInfo is the only source of truth for that bit.
 	atomicInfo atomic.Uint32 // atomic pollInfo

 	// rg, wg are accessed atomically and hold g pointers.
 	// (Using atomic.Uintptr here is similar to using guintptr elsewhere.)
 	rg atomic.Uintptr // pdReady, pdWait, G waiting for read or nil
 	wg atomic.Uintptr // pdReady, pdWait, G waiting for write or nil

 	lock    mutex // protects the following fields
 	closing bool
 	user    uint32    // user settable cookie
 	rseq    uintptr   // protects from stale read timers
 	rt      timer     // read deadline timer (set if rt.f != nil)
 	rd      int64     // read deadline (a nanotime in the future, -1 when expired)
 	wseq    uintptr   // protects from stale write timers
 	wt      timer     // write deadline timer
 	wd      int64     // write deadline (a nanotime in the future, -1 when expired)
 	self    *pollDesc // storage for indirect interface. See (*pollDesc).makeArg.
 }

 // pollInfo is the bits needed by netpollcheckerr, stored atomically,
 // mostly duplicating state that is manipulated under lock in pollDesc.
 // The one exception is the pollEventErr bit, which is maintained only
 // in the pollInfo.
 type pollInfo uint32

 const (
 	pollClosing = 1 << iota
 	pollEventErr
 	pollExpiredReadDeadline
 	pollExpiredWriteDeadline
 )

 func (i pollInfo) closing() bool              { return i&pollClosing != 0 }
 func (i pollInfo) eventErr() bool             { return i&pollEventErr != 0 }
 func (i pollInfo) expiredReadDeadline() bool  { return i&pollExpiredReadDeadline != 0 }
 func (i pollInfo) expiredWriteDeadline() bool { return i&pollExpiredWriteDeadline != 0 }

 // info returns the pollInfo corresponding to pd.
 func (pd *pollDesc) info() pollInfo {
 	return pollInfo(pd.atomicInfo.Load())
 }

 // publishInfo updates pd.atomicInfo (returned by pd.info)
 // using the other values in pd.
 // It must be called while holding pd.lock,
 // and it must be called after changing anything
 // that might affect the info bits.
 // In practice this means after changing closing
 // or changing rd or wd from < 0 to >= 0.
 func (pd *pollDesc) publishInfo() {
 	var info uint32
 	if pd.closing {
 		info |= pollClosing
 	}
 	if pd.rd < 0 {
 		info |= pollExpiredReadDeadline
 	}
 	if pd.wd < 0 {
 		info |= pollExpiredWriteDeadline
 	}

 	// Set all of x except the pollEventErr bit.
 	x := pd.atomicInfo.Load()
 	for !pd.atomicInfo.CompareAndSwap(x, (x&pollEventErr)|info) {
 		x = pd.atomicInfo.Load()
 	}
 }

 // setEventErr sets the result of pd.info().eventErr() to b.
 func (pd *pollDesc) setEventErr(b bool) {
 	x := pd.atomicInfo.Load()
 	for (x&pollEventErr != 0) != b && !pd.atomicInfo.CompareAndSwap(x, x^pollEventErr) {
 		x = pd.atomicInfo.Load()
 	}
 }

 type pollCache struct {
 	lock  mutex
 	first *pollDesc
 	// PollDesc objects must be type-stable,
 	// because we can get ready notification from epoll/kqueue
 	// after the descriptor is closed/reused.
 	// Stale notifications are detected using seq variable,
 	// seq is incremented when deadlines are changed or descriptor is reused.
 }

 var (
 	netpollInitLock mutex
 	netpollInited   uint32

 	pollcache      pollCache
 	netpollWaiters uint32
 )

 //go:linkname poll_runtime_pollServerInit internal/poll.runtime_pollServerInit
 func poll_runtime_pollServerInit() {
 	netpollGenericInit()
 }

 func netpollGenericInit() {
 	if atomic.Load(&netpollInited) == 0 {
 		lockInit(&netpollInitLock, lockRankNetpollInit)
 		lock(&netpollInitLock)
 		if netpollInited == 0 {
 			netpollinit()
 			atomic.Store(&netpollInited, 1)
 		}
 		unlock(&netpollInitLock)
 	}
 }

 func netpollinited() bool {
 	return atomic.Load(&netpollInited) != 0
 }

 //go:linkname poll_runtime_isPollServerDescriptor internal/poll.runtime_isPollServerDescriptor

 // poll_runtime_isPollServerDescriptor reports whether fd is a
 // descriptor being used by netpoll.
 func poll_runtime_isPollServerDescriptor(fd uintptr) bool {
 	return netpollIsPollDescriptor(fd)
 }

 //go:linkname poll_runtime_pollOpen internal/poll.runtime_pollOpen
 func poll_runtime_pollOpen(fd uintptr) (*pollDesc, int) {
 	pd := pollcache.alloc()
 	lock(&pd.lock)
 	wg := pd.wg.Load()
 	if wg != 0 && wg != pdReady {
 		throw("runtime: blocked write on free polldesc")
 	}
 	rg := pd.rg.Load()
 	if rg != 0 && rg != pdReady {
 		throw("runtime: blocked read on free polldesc")
 	}
 	pd.fd = fd
 	pd.closing = false
 	pd.setEventErr(false)
 	pd.rseq++
 	pd.rg.Store(0)
 	pd.rd = 0
 	pd.wseq++
 	pd.wg.Store(0)
 	pd.wd = 0
 	pd.self = pd
 	pd.publishInfo()
 	unlock(&pd.lock)

 	errno := netpollopen(fd, pd)
 	if errno != 0 {
 		pollcache.free(pd)
 		return nil, int(errno)
 	}
 	return pd, 0
 }

 //go:linkname poll_runtime_pollClose internal/poll.runtime_pollClose
 func poll_runtime_pollClose(pd *pollDesc) {
 	if !pd.closing {
 		throw("runtime: close polldesc w/o unblock")
 	}
 	wg := pd.wg.Load()
 	if wg != 0 && wg != pdReady {
 		throw("runtime: blocked write on closing polldesc")
 	}
 	rg := pd.rg.Load()
 	if rg != 0 && rg != pdReady {
 		throw("runtime: blocked read on closing polldesc")
 	}
 	netpollclose(pd.fd)
 	pollcache.free(pd)
 }

 func (c *pollCache) free(pd *pollDesc) {
 	lock(&c.lock)
 	pd.link = c.first
 	c.first = pd
 	unlock(&c.lock)
 }

 // poll_runtime_pollReset, which is internal/poll.runtime_pollReset,
 // prepares a descriptor for polling in mode, which is 'r' or 'w'.
 // This returns an error code; the codes are defined above.
 //go:linkname poll_runtime_pollReset internal/poll.runtime_pollReset
 func poll_runtime_pollReset(pd *pollDesc, mode int) int {
 	errcode := netpollcheckerr(pd, int32(mode))
 	if errcode != pollNoError {
 		return errcode
 	}
 	if mode == 'r' {
 		pd.rg.Store(0)
 	} else if mode == 'w' {
 		pd.wg.Store(0)
 	}
 	return pollNoError
 }

 // poll_runtime_pollWait, which is internal/poll.runtime_pollWait,
 // waits for a descriptor to be ready for reading or writing,
 // according to mode, which is 'r' or 'w'.
 // This returns an error code; the codes are defined above.
 //go:linkname poll_runtime_pollWait internal/poll.runtime_pollWait
 func poll_runtime_pollWait(pd *pollDesc, mode int) int {
 	errcode := netpollcheckerr(pd, int32(mode))
 	if errcode != pollNoError {
 		return errcode
 	}
 	// As for now only Solaris, illumos, and AIX use level-triggered IO.
 	if GOOS == "solaris" || GOOS == "illumos" || GOOS == "aix" {
 		netpollarm(pd, mode)
 	}
 	for !netpollblock(pd, int32(mode), false) {
 		errcode = netpollcheckerr(pd, int32(mode))
 		if errcode != pollNoError {
 			return errcode
 		}
 		// Can happen if timeout has fired and unblocked us,
 		// but before we had a chance to run, timeout has been reset.
 		// Pretend it has not happened and retry.
 	}
 	return pollNoError
 }

 //go:linkname poll_runtime_pollWaitCanceled internal/poll.runtime_pollWaitCanceled
 func poll_runtime_pollWaitCanceled(pd *pollDesc, mode int) {
 	// This function is used only on windows after a failed attempt to cancel
 	// a pending async IO operation. Wait for ioready, ignore closing or timeouts.
 	for !netpollblock(pd, int32(mode), true) {
 	}
 }

 //go:linkname poll_runtime_pollSetDeadline internal/poll.runtime_pollSetDeadline
 func poll_runtime_pollSetDeadline(pd *pollDesc, d int64, mode int) {
 	lock(&pd.lock)
 	if pd.closing {
 		unlock(&pd.lock)
 		return
 	}
 	rd0, wd0 := pd.rd, pd.wd
 	combo0 := rd0 > 0 && rd0 == wd0
 	if d > 0 {
 		d += nanotime()
 		if d <= 0 {
 			// If the user has a deadline in the future, but the delay calculation
 			// overflows, then set the deadline to the maximum possible value.
 			d = 1<<63 - 1
 		}
 	}
 	if mode == 'r' || mode == 'r'+'w' {
 		pd.rd = d
 	}
 	if mode == 'w' || mode == 'r'+'w' {
 		pd.wd = d
 	}
 	pd.publishInfo()
 	combo := pd.rd > 0 && pd.rd == pd.wd
 	rtf := netpollReadDeadline
 	if combo {
 		rtf = netpollDeadline
 	}
 	if pd.rt.f == nil {
 		if pd.rd > 0 {
 			pd.rt.f = rtf
 			// Copy current seq into the timer arg.
 			// Timer func will check the seq against current descriptor seq,
 			// if they differ the descriptor was reused or timers were reset.
 			pd.rt.arg = pd.makeArg()
 			pd.rt.seq = pd.rseq
 			resettimer(&pd.rt, pd.rd)
 		}
 	} else if pd.rd != rd0 || combo != combo0 {
 		pd.rseq++ // invalidate current timers
 		if pd.rd > 0 {
 			modtimer(&pd.rt, pd.rd, 0, rtf, pd.makeArg(), pd.rseq)
 		} else {
 			deltimer(&pd.rt)
 			pd.rt.f = nil
 		}
 	}
 	if pd.wt.f == nil {
 		if pd.wd > 0 && !combo {
 			pd.wt.f = netpollWriteDeadline
 			pd.wt.arg = pd.makeArg()
 			pd.wt.seq = pd.wseq
 			resettimer(&pd.wt, pd.wd)
 		}
 	} else if pd.wd != wd0 || combo != combo0 {
 		pd.wseq++ // invalidate current timers
 		if pd.wd > 0 && !combo {
 			modtimer(&pd.wt, pd.wd, 0, netpollWriteDeadline, pd.makeArg(), pd.wseq)
 		} else {
 			deltimer(&pd.wt)
 			pd.wt.f = nil
 		}
 	}
 	// If we set the new deadline in the past, unblock currently pending IO if any.
 	// Note that pd.publishInfo has already been called, above, immediately after modifying rd and wd.
 	var rg, wg *g
 	if pd.rd < 0 {
 		rg = netpollunblock(pd, 'r', false)
 	}
 	if pd.wd < 0 {
 		wg = netpollunblock(pd, 'w', false)
 	}
 	unlock(&pd.lock)
 	if rg != nil {
 		netpollgoready(rg, 3)
 	}
 	if wg != nil {
 		netpollgoready(wg, 3)
 	}
 }

 //go:linkname poll_runtime_pollUnblock internal/poll.runtime_pollUnblock
 func poll_runtime_pollUnblock(pd *pollDesc) {
 	lock(&pd.lock)
 	if pd.closing {
 		throw("runtime: unblock on closing polldesc")
 	}
 	pd.closing = true
 	pd.rseq++
 	pd.wseq++
 	var rg, wg *g
 	pd.publishInfo()
 	rg = netpollunblock(pd, 'r', false)
 	wg = netpollunblock(pd, 'w', false)
 	if pd.rt.f != nil {
 		deltimer(&pd.rt)
 		pd.rt.f = nil
 	}
 	if pd.wt.f != nil {
 		deltimer(&pd.wt)
 		pd.wt.f = nil
 	}
 	unlock(&pd.lock)
 	if rg != nil {
 		netpollgoready(rg, 3)
 	}
 	if wg != nil {
 		netpollgoready(wg, 3)
 	}
 }

 // netpollready is called by the platform-specific netpoll function.
 // It declares that the fd associated with pd is ready for I/O.
 // The toRun argument is used to build a list of goroutines to return
 // from netpoll. The mode argument is 'r', 'w', or 'r'+'w' to indicate
 // whether the fd is ready for reading or writing or both.
 //
 // This may run while the world is stopped, so write barriers are not allowed.
 //go:nowritebarrier
 func netpollready(toRun *gList, pd *pollDesc, mode int32) {
 	var rg, wg *g
 	if mode == 'r' || mode == 'r'+'w' {
 		rg = netpollunblock(pd, 'r', true)
 	}
 	if mode == 'w' || mode == 'r'+'w' {
 		wg = netpollunblock(pd, 'w', true)
 	}
 	if rg != nil {
 		toRun.push(rg)
 	}
 	if wg != nil {
 		toRun.push(wg)
 	}
 }

 func netpollcheckerr(pd *pollDesc, mode int32) int {
 	info := pd.info()
 	if info.closing() {
 		return pollErrClosing
 	}
 	if (mode == 'r' && info.expiredReadDeadline()) || (mode == 'w' && info.expiredWriteDeadline()) {
 		return pollErrTimeout
 	}
 	// Report an event scanning error only on a read event.
 	// An error on a write event will be captured in a subsequent
 	// write call that is able to report a more specific error.
 	if mode == 'r' && info.eventErr() {
 		return pollErrNotPollable
 	}
 	return pollNoError
 }

 func netpollblockcommit(gp *g, gpp unsafe.Pointer) bool {
 	r := atomic.Casuintptr((*uintptr)(gpp), pdWait, uintptr(unsafe.Pointer(gp)))
 	if r {
 		// Bump the count of goroutines waiting for the poller.
 		// The scheduler uses this to decide whether to block
 		// waiting for the poller if there is nothing else to do.
 		atomic.Xadd(&netpollWaiters, 1)
 	}
 	return r
 }

 func netpollgoready(gp *g, traceskip int) {
 	atomic.Xadd(&netpollWaiters, -1)
 	goready(gp, traceskip+1)
 }

 // returns true if IO is ready, or false if timedout or closed
 // waitio - wait only for completed IO, ignore errors
 // Concurrent calls to netpollblock in the same mode are forbidden, as pollDesc
 // can hold only a single waiting goroutine for each mode.
 func netpollblock(pd *pollDesc, mode int32, waitio bool) bool {
 	gpp := &pd.rg
 	if mode == 'w' {
 		gpp = &pd.wg
 	}

 	// set the gpp semaphore to pdWait
 	for {
 		// Consume notification if already ready.
 		if gpp.CompareAndSwap(pdReady, 0) {
 			return true
 		}
 		if gpp.CompareAndSwap(0, pdWait) {
 			break
 		}

 		// Double check that this isn't corrupt; otherwise we'd loop
 		// forever.
 		if v := gpp.Load(); v != pdReady && v != 0 {
 			throw("runtime: double wait")
 		}
 	}

 	// need to recheck error states after setting gpp to pdWait
 	// this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl
 	// do the opposite: store to closing/rd/wd, publishInfo, load of rg/wg
 	if waitio || netpollcheckerr(pd, mode) == pollNoError {
 		gopark(netpollblockcommit, unsafe.Pointer(gpp), waitReasonIOWait, traceEvGoBlockNet, 5)
 	}
 	// be careful to not lose concurrent pdReady notification
 	old := gpp.Swap(0)
 	if old > pdWait {
 		throw("runtime: corrupted polldesc")
 	}
 	return old == pdReady
 }

 func netpollunblock(pd *pollDesc, mode int32, ioready bool) *g {
 	gpp := &pd.rg
 	if mode == 'w' {
 		gpp = &pd.wg
 	}

 	for {
 		old := gpp.Load()
 		if old == pdReady {
 			return nil
 		}
 		if old == 0 && !ioready {
 			// Only set pdReady for ioready. runtime_pollWait
 			// will check for timeout/cancel before waiting.
 			return nil
 		}
 		var new uintptr
 		if ioready {
 			new = pdReady
 		}
 		if gpp.CompareAndSwap(old, new) {
 			if old == pdWait {
 				old = 0
 			}
 			return (*g)(unsafe.Pointer(old))
 		}
 	}
 }

 func netpolldeadlineimpl(pd *pollDesc, seq uintptr, read, write bool) {
 	lock(&pd.lock)
 	// Seq arg is seq when the timer was set.
 	// If it's stale, ignore the timer event.
 	currentSeq := pd.rseq
 	if !read {
 		currentSeq = pd.wseq
 	}
 	if seq != currentSeq {
 		// The descriptor was reused or timers were reset.
 		unlock(&pd.lock)
 		return
 	}
 	var rg *g
 	if read {
 		if pd.rd <= 0 || pd.rt.f == nil {
 			throw("runtime: inconsistent read deadline")
 		}
 		pd.rd = -1
 		pd.publishInfo()
 		rg = netpollunblock(pd, 'r', false)
 	}
 	var wg *g
 	if write {
 		if pd.wd <= 0 || pd.wt.f == nil && !read {
 			throw("runtime: inconsistent write deadline")
 		}
 		pd.wd = -1
 		pd.publishInfo()
 		wg = netpollunblock(pd, 'w', false)
 	}
 	unlock(&pd.lock)
 	if rg != nil {
 		netpollgoready(rg, 0)
 	}
 	if wg != nil {
 		netpollgoready(wg, 0)
 	}
 }

 func netpollDeadline(arg any, seq uintptr) {
 	netpolldeadlineimpl(arg.(*pollDesc), seq, true, true)
 }

 func netpollReadDeadline(arg any, seq uintptr) {
 	netpolldeadlineimpl(arg.(*pollDesc), seq, true, false)
 }

 func netpollWriteDeadline(arg any, seq uintptr) {
 	netpolldeadlineimpl(arg.(*pollDesc), seq, false, true)
 }

 func (c *pollCache) alloc() *pollDesc {
 	lock(&c.lock)
 	if c.first == nil {
 		const pdSize = unsafe.Sizeof(pollDesc{})
 		n := pollBlockSize / pdSize
 		if n == 0 {
 			n = 1
 		}
 		// Must be in non-GC memory because can be referenced
 		// only from epoll/kqueue internals.
 		mem := persistentalloc(n*pdSize, 0, &memstats.other_sys)
 		for i := uintptr(0); i < n; i++ {
 			pd := (*pollDesc)(add(mem, i*pdSize))
 			pd.link = c.first
 			c.first = pd
 		}
 	}
 	pd := c.first
 	c.first = pd.link
 	lockInit(&pd.lock, lockRankPollDesc)
 	unlock(&c.lock)
 	return pd
 }

 // makeArg converts pd to an interface{}.
 // makeArg does not do any allocation. Normally, such
 // a conversion requires an allocation because pointers to
 // go:notinheap types (which pollDesc is) must be stored
 // in interfaces indirectly. See issue 42076.
 func (pd *pollDesc) makeArg() (i any) {
 	x := (*eface)(unsafe.Pointer(&i))
 	x._type = pdType
 	x.data = unsafe.Pointer(&pd.self)
 	return
 }

 var (
 	pdEface any    = (*pollDesc)(nil)
 	pdType  *_type = efaceOf(&pdEface)._type
 )
	// Copyright 2013 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	//go:build aix \|\| darwin \|\| dragonfly \|\| freebsd \|\| (js && wasm) \|\| linux \|\| netbsd \|\| openbsd \|\| solaris \|\| windows

	package runtime

	import (
	"runtime/internal/atomic"
	"unsafe"
	)

	// Integrated network poller (platform-independent part).
	// A particular implementation (epoll/kqueue/port/AIX/Windows)
	// must define the following functions:
	//
	// func netpollinit()
	// Initialize the poller. Only called once.
	//
	// func netpollopen(fd uintptr, pd *pollDesc) int32
	// Arm edge-triggered notifications for fd. The pd argument is to pass
	// back to netpollready when fd is ready. Return an errno value.
	//
	// func netpollclose(fd uintptr) int32
	// Disable notifications for fd. Return an errno value.
	//
	// func netpoll(delta int64) gList
	// Poll the network. If delta < 0, block indefinitely. If delta == 0,
	// poll without blocking. If delta > 0, block for up to delta nanoseconds.
	// Return a list of goroutines built by calling netpollready.
	//
	// func netpollBreak()
	// Wake up the network poller, assumed to be blocked in netpoll.
	//
	// func netpollIsPollDescriptor(fd uintptr) bool
	// Reports whether fd is a file descriptor used by the poller.

	// Error codes returned by runtime_pollReset and runtime_pollWait.
	// These must match the values in internal/poll/fd_poll_runtime.go.
	const (
	pollNoError = 0 // no error
	pollErrClosing = 1 // descriptor is closed
	pollErrTimeout = 2 // I/O timeout
	pollErrNotPollable = 3 // general error polling descriptor
	)

	// pollDesc contains 2 binary semaphores, rg and wg, to park reader and writer
	// goroutines respectively. The semaphore can be in the following states:
	// pdReady - io readiness notification is pending;
	// a goroutine consumes the notification by changing the state to nil.
	// pdWait - a goroutine prepares to park on the semaphore, but not yet parked;
	// the goroutine commits to park by changing the state to G pointer,
	// or, alternatively, concurrent io notification changes the state to pdReady,
	// or, alternatively, concurrent timeout/close changes the state to nil.
	// G pointer - the goroutine is blocked on the semaphore;
	// io notification or timeout/close changes the state to pdReady or nil respectively
	// and unparks the goroutine.
	// nil - none of the above.
	const (
	pdReady uintptr = 1
	pdWait uintptr = 2
	)

	const pollBlockSize = 4 * 1024

	// Network poller descriptor.
	//
	// No heap pointers.
	//
	//go:notinheap
	type pollDesc struct {
	link *pollDesc // in pollcache, protected by pollcache.lock
	fd uintptr // constant for pollDesc usage lifetime

	// atomicInfo holds bits from closing, rd, and wd,
	// which are only ever written while holding the lock,
	// summarized for use by netpollcheckerr,
	// which cannot acquire the lock.
	// After writing these fields under lock in a way that
	// might change the summary, code must call publishInfo
	// before releasing the lock.
	// Code that changes fields and then calls netpollunblock
	// (while still holding the lock) must call publishInfo
	// before calling netpollunblock, because publishInfo is what
	// stops netpollblock from blocking anew
	// (by changing the result of netpollcheckerr).
	// atomicInfo also holds the eventErr bit,
	// recording whether a poll event on the fd got an error;
	// atomicInfo is the only source of truth for that bit.
	atomicInfo atomic.Uint32 // atomic pollInfo

	// rg, wg are accessed atomically and hold g pointers.
	// (Using atomic.Uintptr here is similar to using guintptr elsewhere.)
	rg atomic.Uintptr // pdReady, pdWait, G waiting for read or nil
	wg atomic.Uintptr // pdReady, pdWait, G waiting for write or nil

	lock mutex // protects the following fields
	closing bool
	user uint32 // user settable cookie
	rseq uintptr // protects from stale read timers
	rt timer // read deadline timer (set if rt.f != nil)
	rd int64 // read deadline (a nanotime in the future, -1 when expired)
	wseq uintptr // protects from stale write timers
	wt timer // write deadline timer
	wd int64 // write deadline (a nanotime in the future, -1 when expired)
	self pollDesc // storage for indirect interface. See (pollDesc).makeArg.
	}

	// pollInfo is the bits needed by netpollcheckerr, stored atomically,
	// mostly duplicating state that is manipulated under lock in pollDesc.
	// The one exception is the pollEventErr bit, which is maintained only
	// in the pollInfo.
	type pollInfo uint32

	const (
	pollClosing = 1 << iota
	pollEventErr
	pollExpiredReadDeadline
	pollExpiredWriteDeadline
	)

	func (i pollInfo) closing() bool { return i&pollClosing != 0 }
	func (i pollInfo) eventErr() bool { return i&pollEventErr != 0 }
	func (i pollInfo) expiredReadDeadline() bool { return i&pollExpiredReadDeadline != 0 }
	func (i pollInfo) expiredWriteDeadline() bool { return i&pollExpiredWriteDeadline != 0 }

	// info returns the pollInfo corresponding to pd.
	func (pd *pollDesc) info() pollInfo {
	return pollInfo(pd.atomicInfo.Load())
	}

	// publishInfo updates pd.atomicInfo (returned by pd.info)
	// using the other values in pd.
	// It must be called while holding pd.lock,
	// and it must be called after changing anything
	// that might affect the info bits.
	// In practice this means after changing closing
	// or changing rd or wd from < 0 to >= 0.
	func (pd *pollDesc) publishInfo() {
	var info uint32
	if pd.closing {
	info \|= pollClosing
	}
	if pd.rd < 0 {
	info \|= pollExpiredReadDeadline
	}
	if pd.wd < 0 {
	info \|= pollExpiredWriteDeadline
	}

	// Set all of x except the pollEventErr bit.
	x := pd.atomicInfo.Load()
	for !pd.atomicInfo.CompareAndSwap(x, (x&pollEventErr)\|info) {
	x = pd.atomicInfo.Load()
	}
	}

	// setEventErr sets the result of pd.info().eventErr() to b.
	func (pd *pollDesc) setEventErr(b bool) {
	x := pd.atomicInfo.Load()
	for (x&pollEventErr != 0) != b && !pd.atomicInfo.CompareAndSwap(x, x^pollEventErr) {
	x = pd.atomicInfo.Load()
	}
	}

	type pollCache struct {
	lock mutex
	first *pollDesc
	// PollDesc objects must be type-stable,
	// because we can get ready notification from epoll/kqueue
	// after the descriptor is closed/reused.
	// Stale notifications are detected using seq variable,
	// seq is incremented when deadlines are changed or descriptor is reused.
	}

	var (
	netpollInitLock mutex
	netpollInited uint32

	pollcache pollCache
	netpollWaiters uint32
	)

	//go:linkname poll_runtime_pollServerInit internal/poll.runtime_pollServerInit
	func poll_runtime_pollServerInit() {
	netpollGenericInit()
	}

	func netpollGenericInit() {
	if atomic.Load(&netpollInited) == 0 {
	lockInit(&netpollInitLock, lockRankNetpollInit)
	lock(&netpollInitLock)
	if netpollInited == 0 {
	netpollinit()
	atomic.Store(&netpollInited, 1)
	}
	unlock(&netpollInitLock)
	}
	}

	func netpollinited() bool {
	return atomic.Load(&netpollInited) != 0
	}

	//go:linkname poll_runtime_isPollServerDescriptor internal/poll.runtime_isPollServerDescriptor

	// poll_runtime_isPollServerDescriptor reports whether fd is a
	// descriptor being used by netpoll.
	func poll_runtime_isPollServerDescriptor(fd uintptr) bool {
	return netpollIsPollDescriptor(fd)
	}

	//go:linkname poll_runtime_pollOpen internal/poll.runtime_pollOpen
	func poll_runtime_pollOpen(fd uintptr) (*pollDesc, int) {
	pd := pollcache.alloc()
	lock(&pd.lock)
	wg := pd.wg.Load()
	if wg != 0 && wg != pdReady {
	throw("runtime: blocked write on free polldesc")
	}
	rg := pd.rg.Load()
	if rg != 0 && rg != pdReady {
	throw("runtime: blocked read on free polldesc")
	}
	pd.fd = fd
	pd.closing = false
	pd.setEventErr(false)
	pd.rseq++
	pd.rg.Store(0)
	pd.rd = 0
	pd.wseq++
	pd.wg.Store(0)
	pd.wd = 0
	pd.self = pd
	pd.publishInfo()
	unlock(&pd.lock)

	errno := netpollopen(fd, pd)
	if errno != 0 {
	pollcache.free(pd)
	return nil, int(errno)
	}
	return pd, 0
	}

	//go:linkname poll_runtime_pollClose internal/poll.runtime_pollClose
	func poll_runtime_pollClose(pd *pollDesc) {
	if !pd.closing {
	throw("runtime: close polldesc w/o unblock")
	}
	wg := pd.wg.Load()
	if wg != 0 && wg != pdReady {
	throw("runtime: blocked write on closing polldesc")
	}
	rg := pd.rg.Load()
	if rg != 0 && rg != pdReady {
	throw("runtime: blocked read on closing polldesc")
	}
	netpollclose(pd.fd)
	pollcache.free(pd)
	}

	func (c pollCache) free(pd pollDesc) {
	lock(&c.lock)
	pd.link = c.first
	c.first = pd
	unlock(&c.lock)
	}

	// poll_runtime_pollReset, which is internal/poll.runtime_pollReset,
	// prepares a descriptor for polling in mode, which is 'r' or 'w'.
	// This returns an error code; the codes are defined above.
	//go:linkname poll_runtime_pollReset internal/poll.runtime_pollReset
	func poll_runtime_pollReset(pd *pollDesc, mode int) int {
	errcode := netpollcheckerr(pd, int32(mode))
	if errcode != pollNoError {
	return errcode
	}
	if mode == 'r' {
	pd.rg.Store(0)
	} else if mode == 'w' {
	pd.wg.Store(0)
	}
	return pollNoError
	}

	// poll_runtime_pollWait, which is internal/poll.runtime_pollWait,
	// waits for a descriptor to be ready for reading or writing,
	// according to mode, which is 'r' or 'w'.
	// This returns an error code; the codes are defined above.
	//go:linkname poll_runtime_pollWait internal/poll.runtime_pollWait
	func poll_runtime_pollWait(pd *pollDesc, mode int) int {
	errcode := netpollcheckerr(pd, int32(mode))
	if errcode != pollNoError {
	return errcode
	}
	// As for now only Solaris, illumos, and AIX use level-triggered IO.
	if GOOS == "solaris" \|\| GOOS == "illumos" \|\| GOOS == "aix" {
	netpollarm(pd, mode)
	}
	for !netpollblock(pd, int32(mode), false) {
	errcode = netpollcheckerr(pd, int32(mode))
	if errcode != pollNoError {
	return errcode
	}
	// Can happen if timeout has fired and unblocked us,
	// but before we had a chance to run, timeout has been reset.
	// Pretend it has not happened and retry.
	}
	return pollNoError
	}

	//go:linkname poll_runtime_pollWaitCanceled internal/poll.runtime_pollWaitCanceled
	func poll_runtime_pollWaitCanceled(pd *pollDesc, mode int) {
	// This function is used only on windows after a failed attempt to cancel
	// a pending async IO operation. Wait for ioready, ignore closing or timeouts.
	for !netpollblock(pd, int32(mode), true) {
	}
	}

	//go:linkname poll_runtime_pollSetDeadline internal/poll.runtime_pollSetDeadline
	func poll_runtime_pollSetDeadline(pd *pollDesc, d int64, mode int) {
	lock(&pd.lock)
	if pd.closing {
	unlock(&pd.lock)
	return
	}
	rd0, wd0 := pd.rd, pd.wd
	combo0 := rd0 > 0 && rd0 == wd0
	if d > 0 {
	d += nanotime()
	if d <= 0 {
	// If the user has a deadline in the future, but the delay calculation
	// overflows, then set the deadline to the maximum possible value.
	d = 1<<63 - 1
	}
	}
	if mode == 'r' \|\| mode == 'r'+'w' {
	pd.rd = d
	}
	if mode == 'w' \|\| mode == 'r'+'w' {
	pd.wd = d
	}
	pd.publishInfo()
	combo := pd.rd > 0 && pd.rd == pd.wd
	rtf := netpollReadDeadline
	if combo {
	rtf = netpollDeadline
	}
	if pd.rt.f == nil {
	if pd.rd > 0 {
	pd.rt.f = rtf
	// Copy current seq into the timer arg.
	// Timer func will check the seq against current descriptor seq,
	// if they differ the descriptor was reused or timers were reset.
	pd.rt.arg = pd.makeArg()
	pd.rt.seq = pd.rseq
	resettimer(&pd.rt, pd.rd)
	}
	} else if pd.rd != rd0 \|\| combo != combo0 {
	pd.rseq++ // invalidate current timers
	if pd.rd > 0 {
	modtimer(&pd.rt, pd.rd, 0, rtf, pd.makeArg(), pd.rseq)
	} else {
	deltimer(&pd.rt)
	pd.rt.f = nil
	}
	}
	if pd.wt.f == nil {
	if pd.wd > 0 && !combo {
	pd.wt.f = netpollWriteDeadline
	pd.wt.arg = pd.makeArg()
	pd.wt.seq = pd.wseq
	resettimer(&pd.wt, pd.wd)
	}
	} else if pd.wd != wd0 \|\| combo != combo0 {
	pd.wseq++ // invalidate current timers
	if pd.wd > 0 && !combo {
	modtimer(&pd.wt, pd.wd, 0, netpollWriteDeadline, pd.makeArg(), pd.wseq)
	} else {
	deltimer(&pd.wt)
	pd.wt.f = nil
	}
	}
	// If we set the new deadline in the past, unblock currently pending IO if any.
	// Note that pd.publishInfo has already been called, above, immediately after modifying rd and wd.
	var rg, wg *g
	if pd.rd < 0 {
	rg = netpollunblock(pd, 'r', false)
	}
	if pd.wd < 0 {
	wg = netpollunblock(pd, 'w', false)
	}
	unlock(&pd.lock)
	if rg != nil {
	netpollgoready(rg, 3)
	}
	if wg != nil {
	netpollgoready(wg, 3)
	}
	}

	//go:linkname poll_runtime_pollUnblock internal/poll.runtime_pollUnblock
	func poll_runtime_pollUnblock(pd *pollDesc) {
	lock(&pd.lock)
	if pd.closing {
	throw("runtime: unblock on closing polldesc")
	}
	pd.closing = true
	pd.rseq++
	pd.wseq++
	var rg, wg *g
	pd.publishInfo()
	rg = netpollunblock(pd, 'r', false)
	wg = netpollunblock(pd, 'w', false)
	if pd.rt.f != nil {
	deltimer(&pd.rt)
	pd.rt.f = nil
	}
	if pd.wt.f != nil {
	deltimer(&pd.wt)
	pd.wt.f = nil
	}
	unlock(&pd.lock)
	if rg != nil {
	netpollgoready(rg, 3)
	}
	if wg != nil {
	netpollgoready(wg, 3)
	}
	}

	// netpollready is called by the platform-specific netpoll function.
	// It declares that the fd associated with pd is ready for I/O.
	// The toRun argument is used to build a list of goroutines to return
	// from netpoll. The mode argument is 'r', 'w', or 'r'+'w' to indicate
	// whether the fd is ready for reading or writing or both.
	//
	// This may run while the world is stopped, so write barriers are not allowed.
	//go:nowritebarrier
	func netpollready(toRun gList, pd pollDesc, mode int32) {
	var rg, wg *g
	if mode == 'r' \|\| mode == 'r'+'w' {
	rg = netpollunblock(pd, 'r', true)
	}
	if mode == 'w' \|\| mode == 'r'+'w' {
	wg = netpollunblock(pd, 'w', true)
	}
	if rg != nil {
	toRun.push(rg)
	}
	if wg != nil {
	toRun.push(wg)
	}
	}

	func netpollcheckerr(pd *pollDesc, mode int32) int {
	info := pd.info()
	if info.closing() {
	return pollErrClosing
	}
	if (mode == 'r' && info.expiredReadDeadline()) \|\| (mode == 'w' && info.expiredWriteDeadline()) {
	return pollErrTimeout
	}
	// Report an event scanning error only on a read event.
	// An error on a write event will be captured in a subsequent
	// write call that is able to report a more specific error.
	if mode == 'r' && info.eventErr() {
	return pollErrNotPollable
	}
	return pollNoError
	}

	func netpollblockcommit(gp *g, gpp unsafe.Pointer) bool {
	r := atomic.Casuintptr((*uintptr)(gpp), pdWait, uintptr(unsafe.Pointer(gp)))
	if r {
	// Bump the count of goroutines waiting for the poller.
	// The scheduler uses this to decide whether to block
	// waiting for the poller if there is nothing else to do.
	atomic.Xadd(&netpollWaiters, 1)
	}
	return r
	}

	func netpollgoready(gp *g, traceskip int) {
	atomic.Xadd(&netpollWaiters, -1)
	goready(gp, traceskip+1)
	}

	// returns true if IO is ready, or false if timedout or closed
	// waitio - wait only for completed IO, ignore errors
	// Concurrent calls to netpollblock in the same mode are forbidden, as pollDesc
	// can hold only a single waiting goroutine for each mode.
	func netpollblock(pd *pollDesc, mode int32, waitio bool) bool {
	gpp := &pd.rg
	if mode == 'w' {
	gpp = &pd.wg
	}

	// set the gpp semaphore to pdWait
	for {
	// Consume notification if already ready.
	if gpp.CompareAndSwap(pdReady, 0) {
	return true
	}
	if gpp.CompareAndSwap(0, pdWait) {
	break
	}

	// Double check that this isn't corrupt; otherwise we'd loop
	// forever.
	if v := gpp.Load(); v != pdReady && v != 0 {
	throw("runtime: double wait")
	}
	}

	// need to recheck error states after setting gpp to pdWait
	// this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl
	// do the opposite: store to closing/rd/wd, publishInfo, load of rg/wg
	if waitio \|\| netpollcheckerr(pd, mode) == pollNoError {
	gopark(netpollblockcommit, unsafe.Pointer(gpp), waitReasonIOWait, traceEvGoBlockNet, 5)
	}
	// be careful to not lose concurrent pdReady notification
	old := gpp.Swap(0)
	if old > pdWait {
	throw("runtime: corrupted polldesc")
	}
	return old == pdReady
	}

	func netpollunblock(pd pollDesc, mode int32, ioready bool) g {
	gpp := &pd.rg
	if mode == 'w' {
	gpp = &pd.wg
	}

	for {
	old := gpp.Load()
	if old == pdReady {
	return nil
	}
	if old == 0 && !ioready {
	// Only set pdReady for ioready. runtime_pollWait
	// will check for timeout/cancel before waiting.
	return nil
	}
	var new uintptr
	if ioready {
	new = pdReady
	}
	if gpp.CompareAndSwap(old, new) {
	if old == pdWait {
	old = 0
	}
	return (*g)(unsafe.Pointer(old))
	}
	}
	}

	func netpolldeadlineimpl(pd *pollDesc, seq uintptr, read, write bool) {
	lock(&pd.lock)
	// Seq arg is seq when the timer was set.
	// If it's stale, ignore the timer event.
	currentSeq := pd.rseq
	if !read {
	currentSeq = pd.wseq
	}
	if seq != currentSeq {
	// The descriptor was reused or timers were reset.
	unlock(&pd.lock)
	return
	}
	var rg *g
	if read {
	if pd.rd <= 0 \|\| pd.rt.f == nil {
	throw("runtime: inconsistent read deadline")
	}
	pd.rd = -1
	pd.publishInfo()
	rg = netpollunblock(pd, 'r', false)
	}
	var wg *g
	if write {
	if pd.wd <= 0 \|\| pd.wt.f == nil && !read {
	throw("runtime: inconsistent write deadline")
	}
	pd.wd = -1
	pd.publishInfo()
	wg = netpollunblock(pd, 'w', false)
	}
	unlock(&pd.lock)
	if rg != nil {
	netpollgoready(rg, 0)
	}
	if wg != nil {
	netpollgoready(wg, 0)
	}
	}

	func netpollDeadline(arg any, seq uintptr) {
	netpolldeadlineimpl(arg.(*pollDesc), seq, true, true)
	}

	func netpollReadDeadline(arg any, seq uintptr) {
	netpolldeadlineimpl(arg.(*pollDesc), seq, true, false)
	}

	func netpollWriteDeadline(arg any, seq uintptr) {
	netpolldeadlineimpl(arg.(*pollDesc), seq, false, true)
	}

	func (c pollCache) alloc() pollDesc {
	lock(&c.lock)
	if c.first == nil {
	const pdSize = unsafe.Sizeof(pollDesc{})
	n := pollBlockSize / pdSize
	if n == 0 {
	n = 1
	}
	// Must be in non-GC memory because can be referenced
	// only from epoll/kqueue internals.
	mem := persistentalloc(n*pdSize, 0, &memstats.other_sys)
	for i := uintptr(0); i < n; i++ {
	pd := (pollDesc)(add(mem, ipdSize))
	pd.link = c.first
	c.first = pd
	}
	}
	pd := c.first
	c.first = pd.link
	lockInit(&pd.lock, lockRankPollDesc)
	unlock(&c.lock)
	return pd
	}

	// makeArg converts pd to an interface{}.
	// makeArg does not do any allocation. Normally, such
	// a conversion requires an allocation because pointers to
	// go:notinheap types (which pollDesc is) must be stored
	// in interfaces indirectly. See issue 42076.
	func (pd *pollDesc) makeArg() (i any) {
	x := (*eface)(unsafe.Pointer(&i))
	x._type = pdType
	x.data = unsafe.Pointer(&pd.self)
	return
	}

	var (
	pdEface any = (*pollDesc)(nil)
	pdType *_type = efaceOf(&pdEface)._type
	)