blob: 5373052e655933442be34e3623752cf83e1ebbf1 [file] [log] [blame]
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build unix || (js && wasm)
package poll
import (
"internal/syscall/unix"
"io"
"sync/atomic"
"syscall"
)
// FD is a file descriptor. The net and os packages use this type as a
// field of a larger type representing a network connection or OS file.
type FD struct {
// Lock sysfd and serialize access to Read and Write methods.
fdmu fdMutex
// System file descriptor. Immutable until Close.
Sysfd int
// I/O poller.
pd pollDesc
// Writev cache.
iovecs *[]syscall.Iovec
// Semaphore signaled when file is closed.
csema uint32
// Non-zero if this file has been set to blocking mode.
isBlocking uint32
// Whether this is a streaming descriptor, as opposed to a
// packet-based descriptor like a UDP socket. Immutable.
IsStream bool
// Whether a zero byte read indicates EOF. This is false for a
// message based socket connection.
ZeroReadIsEOF bool
// Whether this is a file rather than a network socket.
isFile bool
}
// Init initializes the FD. The Sysfd field should already be set.
// This can be called multiple times on a single FD.
// The net argument is a network name from the net package (e.g., "tcp"),
// or "file".
// Set pollable to true if fd should be managed by runtime netpoll.
func (fd *FD) Init(net string, pollable bool) error {
// We don't actually care about the various network types.
if net == "file" {
fd.isFile = true
}
if !pollable {
fd.isBlocking = 1
return nil
}
err := fd.pd.init(fd)
if err != nil {
// If we could not initialize the runtime poller,
// assume we are using blocking mode.
fd.isBlocking = 1
}
return err
}
// Destroy closes the file descriptor. This is called when there are
// no remaining references.
func (fd *FD) destroy() error {
// Poller may want to unregister fd in readiness notification mechanism,
// so this must be executed before CloseFunc.
fd.pd.close()
// We don't use ignoringEINTR here because POSIX does not define
// whether the descriptor is closed if close returns EINTR.
// If the descriptor is indeed closed, using a loop would race
// with some other goroutine opening a new descriptor.
// (The Linux kernel guarantees that it is closed on an EINTR error.)
err := CloseFunc(fd.Sysfd)
fd.Sysfd = -1
runtime_Semrelease(&fd.csema)
return err
}
// Close closes the FD. The underlying file descriptor is closed by the
// destroy method when there are no remaining references.
func (fd *FD) Close() error {
if !fd.fdmu.increfAndClose() {
return errClosing(fd.isFile)
}
// Unblock any I/O. Once it all unblocks and returns,
// so that it cannot be referring to fd.sysfd anymore,
// the final decref will close fd.sysfd. This should happen
// fairly quickly, since all the I/O is non-blocking, and any
// attempts to block in the pollDesc will return errClosing(fd.isFile).
fd.pd.evict()
// The call to decref will call destroy if there are no other
// references.
err := fd.decref()
// Wait until the descriptor is closed. If this was the only
// reference, it is already closed. Only wait if the file has
// not been set to blocking mode, as otherwise any current I/O
// may be blocking, and that would block the Close.
// No need for an atomic read of isBlocking, increfAndClose means
// we have exclusive access to fd.
if fd.isBlocking == 0 {
runtime_Semacquire(&fd.csema)
}
return err
}
// SetBlocking puts the file into blocking mode.
func (fd *FD) SetBlocking() error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
// Atomic store so that concurrent calls to SetBlocking
// do not cause a race condition. isBlocking only ever goes
// from 0 to 1 so there is no real race here.
atomic.StoreUint32(&fd.isBlocking, 1)
return syscall.SetNonblock(fd.Sysfd, false)
}
// Darwin and FreeBSD can't read or write 2GB+ files at a time,
// even on 64-bit systems.
// The same is true of socket implementations on many systems.
// See golang.org/issue/7812 and golang.org/issue/16266.
// Use 1GB instead of, say, 2GB-1, to keep subsequent reads aligned.
const maxRW = 1 << 30
// Read implements io.Reader.
func (fd *FD) Read(p []byte) (int, error) {
if err := fd.readLock(); err != nil {
return 0, err
}
defer fd.readUnlock()
if len(p) == 0 {
// If the caller wanted a zero byte read, return immediately
// without trying (but after acquiring the readLock).
// Otherwise syscall.Read returns 0, nil which looks like
// io.EOF.
// TODO(bradfitz): make it wait for readability? (Issue 15735)
return 0, nil
}
if err := fd.pd.prepareRead(fd.isFile); err != nil {
return 0, err
}
if fd.IsStream && len(p) > maxRW {
p = p[:maxRW]
}
for {
n, err := ignoringEINTRIO(syscall.Read, fd.Sysfd, p)
if err != nil {
n = 0
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitRead(fd.isFile); err == nil {
continue
}
}
}
err = fd.eofError(n, err)
return n, err
}
}
// Pread wraps the pread system call.
func (fd *FD) Pread(p []byte, off int64) (int, error) {
// Call incref, not readLock, because since pread specifies the
// offset it is independent from other reads.
// Similarly, using the poller doesn't make sense for pread.
if err := fd.incref(); err != nil {
return 0, err
}
if fd.IsStream && len(p) > maxRW {
p = p[:maxRW]
}
var (
n int
err error
)
for {
n, err = syscall.Pread(fd.Sysfd, p, off)
if err != syscall.EINTR {
break
}
}
if err != nil {
n = 0
}
fd.decref()
err = fd.eofError(n, err)
return n, err
}
// ReadFrom wraps the recvfrom network call.
func (fd *FD) ReadFrom(p []byte) (int, syscall.Sockaddr, error) {
if err := fd.readLock(); err != nil {
return 0, nil, err
}
defer fd.readUnlock()
if err := fd.pd.prepareRead(fd.isFile); err != nil {
return 0, nil, err
}
for {
n, sa, err := syscall.Recvfrom(fd.Sysfd, p, 0)
if err != nil {
if err == syscall.EINTR {
continue
}
n = 0
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitRead(fd.isFile); err == nil {
continue
}
}
}
err = fd.eofError(n, err)
return n, sa, err
}
}
// ReadFromInet4 wraps the recvfrom network call for IPv4.
func (fd *FD) ReadFromInet4(p []byte, from *syscall.SockaddrInet4) (int, error) {
if err := fd.readLock(); err != nil {
return 0, err
}
defer fd.readUnlock()
if err := fd.pd.prepareRead(fd.isFile); err != nil {
return 0, err
}
for {
n, err := unix.RecvfromInet4(fd.Sysfd, p, 0, from)
if err != nil {
if err == syscall.EINTR {
continue
}
n = 0
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitRead(fd.isFile); err == nil {
continue
}
}
}
err = fd.eofError(n, err)
return n, err
}
}
// ReadFromInet6 wraps the recvfrom network call for IPv6.
func (fd *FD) ReadFromInet6(p []byte, from *syscall.SockaddrInet6) (int, error) {
if err := fd.readLock(); err != nil {
return 0, err
}
defer fd.readUnlock()
if err := fd.pd.prepareRead(fd.isFile); err != nil {
return 0, err
}
for {
n, err := unix.RecvfromInet6(fd.Sysfd, p, 0, from)
if err != nil {
if err == syscall.EINTR {
continue
}
n = 0
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitRead(fd.isFile); err == nil {
continue
}
}
}
err = fd.eofError(n, err)
return n, err
}
}
// ReadMsg wraps the recvmsg network call.
func (fd *FD) ReadMsg(p []byte, oob []byte, flags int) (int, int, int, syscall.Sockaddr, error) {
if err := fd.readLock(); err != nil {
return 0, 0, 0, nil, err
}
defer fd.readUnlock()
if err := fd.pd.prepareRead(fd.isFile); err != nil {
return 0, 0, 0, nil, err
}
for {
n, oobn, sysflags, sa, err := syscall.Recvmsg(fd.Sysfd, p, oob, flags)
if err != nil {
if err == syscall.EINTR {
continue
}
// TODO(dfc) should n and oobn be set to 0
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitRead(fd.isFile); err == nil {
continue
}
}
}
err = fd.eofError(n, err)
return n, oobn, sysflags, sa, err
}
}
// ReadMsgInet4 is ReadMsg, but specialized for syscall.SockaddrInet4.
func (fd *FD) ReadMsgInet4(p []byte, oob []byte, flags int, sa4 *syscall.SockaddrInet4) (int, int, int, error) {
if err := fd.readLock(); err != nil {
return 0, 0, 0, err
}
defer fd.readUnlock()
if err := fd.pd.prepareRead(fd.isFile); err != nil {
return 0, 0, 0, err
}
for {
n, oobn, sysflags, err := unix.RecvmsgInet4(fd.Sysfd, p, oob, flags, sa4)
if err != nil {
if err == syscall.EINTR {
continue
}
// TODO(dfc) should n and oobn be set to 0
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitRead(fd.isFile); err == nil {
continue
}
}
}
err = fd.eofError(n, err)
return n, oobn, sysflags, err
}
}
// ReadMsgInet6 is ReadMsg, but specialized for syscall.SockaddrInet6.
func (fd *FD) ReadMsgInet6(p []byte, oob []byte, flags int, sa6 *syscall.SockaddrInet6) (int, int, int, error) {
if err := fd.readLock(); err != nil {
return 0, 0, 0, err
}
defer fd.readUnlock()
if err := fd.pd.prepareRead(fd.isFile); err != nil {
return 0, 0, 0, err
}
for {
n, oobn, sysflags, err := unix.RecvmsgInet6(fd.Sysfd, p, oob, flags, sa6)
if err != nil {
if err == syscall.EINTR {
continue
}
// TODO(dfc) should n and oobn be set to 0
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitRead(fd.isFile); err == nil {
continue
}
}
}
err = fd.eofError(n, err)
return n, oobn, sysflags, err
}
}
// Write implements io.Writer.
func (fd *FD) Write(p []byte) (int, error) {
if err := fd.writeLock(); err != nil {
return 0, err
}
defer fd.writeUnlock()
if err := fd.pd.prepareWrite(fd.isFile); err != nil {
return 0, err
}
var nn int
for {
max := len(p)
if fd.IsStream && max-nn > maxRW {
max = nn + maxRW
}
n, err := ignoringEINTRIO(syscall.Write, fd.Sysfd, p[nn:max])
if n > 0 {
nn += n
}
if nn == len(p) {
return nn, err
}
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitWrite(fd.isFile); err == nil {
continue
}
}
if err != nil {
return nn, err
}
if n == 0 {
return nn, io.ErrUnexpectedEOF
}
}
}
// Pwrite wraps the pwrite system call.
func (fd *FD) Pwrite(p []byte, off int64) (int, error) {
// Call incref, not writeLock, because since pwrite specifies the
// offset it is independent from other writes.
// Similarly, using the poller doesn't make sense for pwrite.
if err := fd.incref(); err != nil {
return 0, err
}
defer fd.decref()
var nn int
for {
max := len(p)
if fd.IsStream && max-nn > maxRW {
max = nn + maxRW
}
n, err := syscall.Pwrite(fd.Sysfd, p[nn:max], off+int64(nn))
if err == syscall.EINTR {
continue
}
if n > 0 {
nn += n
}
if nn == len(p) {
return nn, err
}
if err != nil {
return nn, err
}
if n == 0 {
return nn, io.ErrUnexpectedEOF
}
}
}
// WriteToInet4 wraps the sendto network call for IPv4 addresses.
func (fd *FD) WriteToInet4(p []byte, sa *syscall.SockaddrInet4) (int, error) {
if err := fd.writeLock(); err != nil {
return 0, err
}
defer fd.writeUnlock()
if err := fd.pd.prepareWrite(fd.isFile); err != nil {
return 0, err
}
for {
err := unix.SendtoInet4(fd.Sysfd, p, 0, sa)
if err == syscall.EINTR {
continue
}
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitWrite(fd.isFile); err == nil {
continue
}
}
if err != nil {
return 0, err
}
return len(p), nil
}
}
// WriteToInet6 wraps the sendto network call for IPv6 addresses.
func (fd *FD) WriteToInet6(p []byte, sa *syscall.SockaddrInet6) (int, error) {
if err := fd.writeLock(); err != nil {
return 0, err
}
defer fd.writeUnlock()
if err := fd.pd.prepareWrite(fd.isFile); err != nil {
return 0, err
}
for {
err := unix.SendtoInet6(fd.Sysfd, p, 0, sa)
if err == syscall.EINTR {
continue
}
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitWrite(fd.isFile); err == nil {
continue
}
}
if err != nil {
return 0, err
}
return len(p), nil
}
}
// WriteTo wraps the sendto network call.
func (fd *FD) WriteTo(p []byte, sa syscall.Sockaddr) (int, error) {
if err := fd.writeLock(); err != nil {
return 0, err
}
defer fd.writeUnlock()
if err := fd.pd.prepareWrite(fd.isFile); err != nil {
return 0, err
}
for {
err := syscall.Sendto(fd.Sysfd, p, 0, sa)
if err == syscall.EINTR {
continue
}
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitWrite(fd.isFile); err == nil {
continue
}
}
if err != nil {
return 0, err
}
return len(p), nil
}
}
// WriteMsg wraps the sendmsg network call.
func (fd *FD) WriteMsg(p []byte, oob []byte, sa syscall.Sockaddr) (int, int, error) {
if err := fd.writeLock(); err != nil {
return 0, 0, err
}
defer fd.writeUnlock()
if err := fd.pd.prepareWrite(fd.isFile); err != nil {
return 0, 0, err
}
for {
n, err := syscall.SendmsgN(fd.Sysfd, p, oob, sa, 0)
if err == syscall.EINTR {
continue
}
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitWrite(fd.isFile); err == nil {
continue
}
}
if err != nil {
return n, 0, err
}
return n, len(oob), err
}
}
// WriteMsgInet4 is WriteMsg specialized for syscall.SockaddrInet4.
func (fd *FD) WriteMsgInet4(p []byte, oob []byte, sa *syscall.SockaddrInet4) (int, int, error) {
if err := fd.writeLock(); err != nil {
return 0, 0, err
}
defer fd.writeUnlock()
if err := fd.pd.prepareWrite(fd.isFile); err != nil {
return 0, 0, err
}
for {
n, err := unix.SendmsgNInet4(fd.Sysfd, p, oob, sa, 0)
if err == syscall.EINTR {
continue
}
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitWrite(fd.isFile); err == nil {
continue
}
}
if err != nil {
return n, 0, err
}
return n, len(oob), err
}
}
// WriteMsgInet6 is WriteMsg specialized for syscall.SockaddrInet6.
func (fd *FD) WriteMsgInet6(p []byte, oob []byte, sa *syscall.SockaddrInet6) (int, int, error) {
if err := fd.writeLock(); err != nil {
return 0, 0, err
}
defer fd.writeUnlock()
if err := fd.pd.prepareWrite(fd.isFile); err != nil {
return 0, 0, err
}
for {
n, err := unix.SendmsgNInet6(fd.Sysfd, p, oob, sa, 0)
if err == syscall.EINTR {
continue
}
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitWrite(fd.isFile); err == nil {
continue
}
}
if err != nil {
return n, 0, err
}
return n, len(oob), err
}
}
// Accept wraps the accept network call.
func (fd *FD) Accept() (int, syscall.Sockaddr, string, error) {
if err := fd.readLock(); err != nil {
return -1, nil, "", err
}
defer fd.readUnlock()
if err := fd.pd.prepareRead(fd.isFile); err != nil {
return -1, nil, "", err
}
for {
s, rsa, errcall, err := accept(fd.Sysfd)
if err == nil {
return s, rsa, "", err
}
switch err {
case syscall.EINTR:
continue
case syscall.EAGAIN:
if fd.pd.pollable() {
if err = fd.pd.waitRead(fd.isFile); err == nil {
continue
}
}
case syscall.ECONNABORTED:
// This means that a socket on the listen
// queue was closed before we Accept()ed it;
// it's a silly error, so try again.
continue
}
return -1, nil, errcall, err
}
}
// Seek wraps syscall.Seek.
func (fd *FD) Seek(offset int64, whence int) (int64, error) {
if err := fd.incref(); err != nil {
return 0, err
}
defer fd.decref()
return syscall.Seek(fd.Sysfd, offset, whence)
}
// ReadDirent wraps syscall.ReadDirent.
// We treat this like an ordinary system call rather than a call
// that tries to fill the buffer.
func (fd *FD) ReadDirent(buf []byte) (int, error) {
if err := fd.incref(); err != nil {
return 0, err
}
defer fd.decref()
for {
n, err := ignoringEINTRIO(syscall.ReadDirent, fd.Sysfd, buf)
if err != nil {
n = 0
if err == syscall.EAGAIN && fd.pd.pollable() {
if err = fd.pd.waitRead(fd.isFile); err == nil {
continue
}
}
}
// Do not call eofError; caller does not expect to see io.EOF.
return n, err
}
}
// Fchmod wraps syscall.Fchmod.
func (fd *FD) Fchmod(mode uint32) error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
return ignoringEINTR(func() error {
return syscall.Fchmod(fd.Sysfd, mode)
})
}
// Fchdir wraps syscall.Fchdir.
func (fd *FD) Fchdir() error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
return syscall.Fchdir(fd.Sysfd)
}
// Fstat wraps syscall.Fstat
func (fd *FD) Fstat(s *syscall.Stat_t) error {
if err := fd.incref(); err != nil {
return err
}
defer fd.decref()
return ignoringEINTR(func() error {
return syscall.Fstat(fd.Sysfd, s)
})
}
// dupCloexecUnsupported indicates whether F_DUPFD_CLOEXEC is supported by the kernel.
var dupCloexecUnsupported atomic.Bool
// DupCloseOnExec dups fd and marks it close-on-exec.
func DupCloseOnExec(fd int) (int, string, error) {
if syscall.F_DUPFD_CLOEXEC != 0 && !dupCloexecUnsupported.Load() {
r0, e1 := fcntl(fd, syscall.F_DUPFD_CLOEXEC, 0)
if e1 == nil {
return r0, "", nil
}
switch e1.(syscall.Errno) {
case syscall.EINVAL, syscall.ENOSYS:
// Old kernel, or js/wasm (which returns
// ENOSYS). Fall back to the portable way from
// now on.
dupCloexecUnsupported.Store(true)
default:
return -1, "fcntl", e1
}
}
return dupCloseOnExecOld(fd)
}
// dupCloseOnExecOld is the traditional way to dup an fd and
// set its O_CLOEXEC bit, using two system calls.
func dupCloseOnExecOld(fd int) (int, string, error) {
syscall.ForkLock.RLock()
defer syscall.ForkLock.RUnlock()
newfd, err := syscall.Dup(fd)
if err != nil {
return -1, "dup", err
}
syscall.CloseOnExec(newfd)
return newfd, "", nil
}
// Dup duplicates the file descriptor.
func (fd *FD) Dup() (int, string, error) {
if err := fd.incref(); err != nil {
return -1, "", err
}
defer fd.decref()
return DupCloseOnExec(fd.Sysfd)
}
// On Unix variants only, expose the IO event for the net code.
// WaitWrite waits until data can be read from fd.
func (fd *FD) WaitWrite() error {
return fd.pd.waitWrite(fd.isFile)
}
// WriteOnce is for testing only. It makes a single write call.
func (fd *FD) WriteOnce(p []byte) (int, error) {
if err := fd.writeLock(); err != nil {
return 0, err
}
defer fd.writeUnlock()
return ignoringEINTRIO(syscall.Write, fd.Sysfd, p)
}
// RawRead invokes the user-defined function f for a read operation.
func (fd *FD) RawRead(f func(uintptr) bool) error {
if err := fd.readLock(); err != nil {
return err
}
defer fd.readUnlock()
if err := fd.pd.prepareRead(fd.isFile); err != nil {
return err
}
for {
if f(uintptr(fd.Sysfd)) {
return nil
}
if err := fd.pd.waitRead(fd.isFile); err != nil {
return err
}
}
}
// RawWrite invokes the user-defined function f for a write operation.
func (fd *FD) RawWrite(f func(uintptr) bool) error {
if err := fd.writeLock(); err != nil {
return err
}
defer fd.writeUnlock()
if err := fd.pd.prepareWrite(fd.isFile); err != nil {
return err
}
for {
if f(uintptr(fd.Sysfd)) {
return nil
}
if err := fd.pd.waitWrite(fd.isFile); err != nil {
return err
}
}
}
// ignoringEINTRIO is like ignoringEINTR, but just for IO calls.
func ignoringEINTRIO(fn func(fd int, p []byte) (int, error), fd int, p []byte) (int, error) {
for {
n, err := fn(fd, p)
if err != syscall.EINTR {
return n, err
}
}
}