| // Copyright 2017 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // +build aix darwin dragonfly freebsd hurd js,wasm linux netbsd openbsd solaris |
| |
| package poll |
| |
| import ( |
| "io" |
| "sync/atomic" |
| "syscall" |
| ) |
| |
| // FD is a file descriptor. The net and os packages use this type as a |
| // field of a larger type representing a network connection or OS file. |
| type FD struct { |
| // Lock sysfd and serialize access to Read and Write methods. |
| fdmu fdMutex |
| |
| // System file descriptor. Immutable until Close. |
| Sysfd int |
| |
| // I/O poller. |
| pd pollDesc |
| |
| // Writev cache. |
| iovecs *[]syscall.Iovec |
| |
| // Semaphore signaled when file is closed. |
| csema uint32 |
| |
| // Non-zero if this file has been set to blocking mode. |
| isBlocking uint32 |
| |
| // Whether this is a streaming descriptor, as opposed to a |
| // packet-based descriptor like a UDP socket. Immutable. |
| IsStream bool |
| |
| // Whether a zero byte read indicates EOF. This is false for a |
| // message based socket connection. |
| ZeroReadIsEOF bool |
| |
| // Whether this is a file rather than a network socket. |
| isFile bool |
| } |
| |
| // Init initializes the FD. The Sysfd field should already be set. |
| // This can be called multiple times on a single FD. |
| // The net argument is a network name from the net package (e.g., "tcp"), |
| // or "file". |
| // Set pollable to true if fd should be managed by runtime netpoll. |
| func (fd *FD) Init(net string, pollable bool) error { |
| // We don't actually care about the various network types. |
| if net == "file" { |
| fd.isFile = true |
| } |
| if !pollable { |
| fd.isBlocking = 1 |
| return nil |
| } |
| err := fd.pd.init(fd) |
| if err != nil { |
| // If we could not initialize the runtime poller, |
| // assume we are using blocking mode. |
| fd.isBlocking = 1 |
| } |
| return err |
| } |
| |
| // Destroy closes the file descriptor. This is called when there are |
| // no remaining references. |
| func (fd *FD) destroy() error { |
| // Poller may want to unregister fd in readiness notification mechanism, |
| // so this must be executed before CloseFunc. |
| fd.pd.close() |
| err := CloseFunc(fd.Sysfd) |
| fd.Sysfd = -1 |
| runtime_Semrelease(&fd.csema) |
| return err |
| } |
| |
| // Close closes the FD. The underlying file descriptor is closed by the |
| // destroy method when there are no remaining references. |
| func (fd *FD) Close() error { |
| if !fd.fdmu.increfAndClose() { |
| return errClosing(fd.isFile) |
| } |
| |
| // Unblock any I/O. Once it all unblocks and returns, |
| // so that it cannot be referring to fd.sysfd anymore, |
| // the final decref will close fd.sysfd. This should happen |
| // fairly quickly, since all the I/O is non-blocking, and any |
| // attempts to block in the pollDesc will return errClosing(fd.isFile). |
| fd.pd.evict() |
| |
| // The call to decref will call destroy if there are no other |
| // references. |
| err := fd.decref() |
| |
| // Wait until the descriptor is closed. If this was the only |
| // reference, it is already closed. Only wait if the file has |
| // not been set to blocking mode, as otherwise any current I/O |
| // may be blocking, and that would block the Close. |
| // No need for an atomic read of isBlocking, increfAndClose means |
| // we have exclusive access to fd. |
| if fd.isBlocking == 0 { |
| runtime_Semacquire(&fd.csema) |
| } |
| |
| return err |
| } |
| |
| // SetBlocking puts the file into blocking mode. |
| func (fd *FD) SetBlocking() error { |
| if err := fd.incref(); err != nil { |
| return err |
| } |
| defer fd.decref() |
| // Atomic store so that concurrent calls to SetBlocking |
| // do not cause a race condition. isBlocking only ever goes |
| // from 0 to 1 so there is no real race here. |
| atomic.StoreUint32(&fd.isBlocking, 1) |
| return syscall.SetNonblock(fd.Sysfd, false) |
| } |
| |
| // Darwin and FreeBSD can't read or write 2GB+ files at a time, |
| // even on 64-bit systems. |
| // The same is true of socket implementations on many systems. |
| // See golang.org/issue/7812 and golang.org/issue/16266. |
| // Use 1GB instead of, say, 2GB-1, to keep subsequent reads aligned. |
| const maxRW = 1 << 30 |
| |
| // Read implements io.Reader. |
| func (fd *FD) Read(p []byte) (int, error) { |
| if err := fd.readLock(); err != nil { |
| return 0, err |
| } |
| defer fd.readUnlock() |
| if len(p) == 0 { |
| // If the caller wanted a zero byte read, return immediately |
| // without trying (but after acquiring the readLock). |
| // Otherwise syscall.Read returns 0, nil which looks like |
| // io.EOF. |
| // TODO(bradfitz): make it wait for readability? (Issue 15735) |
| return 0, nil |
| } |
| if err := fd.pd.prepareRead(fd.isFile); err != nil { |
| return 0, err |
| } |
| if fd.IsStream && len(p) > maxRW { |
| p = p[:maxRW] |
| } |
| for { |
| n, err := ignoringEINTR(syscall.Read, fd.Sysfd, p) |
| if err != nil { |
| n = 0 |
| if err == syscall.EAGAIN && fd.pd.pollable() { |
| if err = fd.pd.waitRead(fd.isFile); err == nil { |
| continue |
| } |
| } |
| } |
| err = fd.eofError(n, err) |
| return n, err |
| } |
| } |
| |
| // Pread wraps the pread system call. |
| func (fd *FD) Pread(p []byte, off int64) (int, error) { |
| // Call incref, not readLock, because since pread specifies the |
| // offset it is independent from other reads. |
| // Similarly, using the poller doesn't make sense for pread. |
| if err := fd.incref(); err != nil { |
| return 0, err |
| } |
| if fd.IsStream && len(p) > maxRW { |
| p = p[:maxRW] |
| } |
| var ( |
| n int |
| err error |
| ) |
| for { |
| n, err = syscall.Pread(fd.Sysfd, p, off) |
| if err != syscall.EINTR { |
| break |
| } |
| } |
| if err != nil { |
| n = 0 |
| } |
| fd.decref() |
| err = fd.eofError(n, err) |
| return n, err |
| } |
| |
| // ReadFrom wraps the recvfrom network call. |
| func (fd *FD) ReadFrom(p []byte) (int, syscall.Sockaddr, error) { |
| if err := fd.readLock(); err != nil { |
| return 0, nil, err |
| } |
| defer fd.readUnlock() |
| if err := fd.pd.prepareRead(fd.isFile); err != nil { |
| return 0, nil, err |
| } |
| for { |
| n, sa, err := syscall.Recvfrom(fd.Sysfd, p, 0) |
| if err != nil { |
| if err == syscall.EINTR { |
| continue |
| } |
| n = 0 |
| if err == syscall.EAGAIN && fd.pd.pollable() { |
| if err = fd.pd.waitRead(fd.isFile); err == nil { |
| continue |
| } |
| } |
| } |
| err = fd.eofError(n, err) |
| return n, sa, err |
| } |
| } |
| |
| // ReadMsg wraps the recvmsg network call. |
| func (fd *FD) ReadMsg(p []byte, oob []byte) (int, int, int, syscall.Sockaddr, error) { |
| if err := fd.readLock(); err != nil { |
| return 0, 0, 0, nil, err |
| } |
| defer fd.readUnlock() |
| if err := fd.pd.prepareRead(fd.isFile); err != nil { |
| return 0, 0, 0, nil, err |
| } |
| for { |
| n, oobn, flags, sa, err := syscall.Recvmsg(fd.Sysfd, p, oob, 0) |
| if err != nil { |
| if err == syscall.EINTR { |
| continue |
| } |
| // TODO(dfc) should n and oobn be set to 0 |
| if err == syscall.EAGAIN && fd.pd.pollable() { |
| if err = fd.pd.waitRead(fd.isFile); err == nil { |
| continue |
| } |
| } |
| } |
| err = fd.eofError(n, err) |
| return n, oobn, flags, sa, err |
| } |
| } |
| |
| // Write implements io.Writer. |
| func (fd *FD) Write(p []byte) (int, error) { |
| if err := fd.writeLock(); err != nil { |
| return 0, err |
| } |
| defer fd.writeUnlock() |
| if err := fd.pd.prepareWrite(fd.isFile); err != nil { |
| return 0, err |
| } |
| var nn int |
| for { |
| max := len(p) |
| if fd.IsStream && max-nn > maxRW { |
| max = nn + maxRW |
| } |
| n, err := ignoringEINTR(syscall.Write, fd.Sysfd, p[nn:max]) |
| if n > 0 { |
| nn += n |
| } |
| if nn == len(p) { |
| return nn, err |
| } |
| if err == syscall.EAGAIN && fd.pd.pollable() { |
| if err = fd.pd.waitWrite(fd.isFile); err == nil { |
| continue |
| } |
| } |
| if err != nil { |
| return nn, err |
| } |
| if n == 0 { |
| return nn, io.ErrUnexpectedEOF |
| } |
| } |
| } |
| |
| // Pwrite wraps the pwrite system call. |
| func (fd *FD) Pwrite(p []byte, off int64) (int, error) { |
| // Call incref, not writeLock, because since pwrite specifies the |
| // offset it is independent from other writes. |
| // Similarly, using the poller doesn't make sense for pwrite. |
| if err := fd.incref(); err != nil { |
| return 0, err |
| } |
| defer fd.decref() |
| var nn int |
| for { |
| max := len(p) |
| if fd.IsStream && max-nn > maxRW { |
| max = nn + maxRW |
| } |
| n, err := syscall.Pwrite(fd.Sysfd, p[nn:max], off+int64(nn)) |
| if err == syscall.EINTR { |
| continue |
| } |
| if n > 0 { |
| nn += n |
| } |
| if nn == len(p) { |
| return nn, err |
| } |
| if err != nil { |
| return nn, err |
| } |
| if n == 0 { |
| return nn, io.ErrUnexpectedEOF |
| } |
| } |
| } |
| |
| // WriteTo wraps the sendto network call. |
| func (fd *FD) WriteTo(p []byte, sa syscall.Sockaddr) (int, error) { |
| if err := fd.writeLock(); err != nil { |
| return 0, err |
| } |
| defer fd.writeUnlock() |
| if err := fd.pd.prepareWrite(fd.isFile); err != nil { |
| return 0, err |
| } |
| for { |
| err := syscall.Sendto(fd.Sysfd, p, 0, sa) |
| if err == syscall.EINTR { |
| continue |
| } |
| if err == syscall.EAGAIN && fd.pd.pollable() { |
| if err = fd.pd.waitWrite(fd.isFile); err == nil { |
| continue |
| } |
| } |
| if err != nil { |
| return 0, err |
| } |
| return len(p), nil |
| } |
| } |
| |
| // WriteMsg wraps the sendmsg network call. |
| func (fd *FD) WriteMsg(p []byte, oob []byte, sa syscall.Sockaddr) (int, int, error) { |
| if err := fd.writeLock(); err != nil { |
| return 0, 0, err |
| } |
| defer fd.writeUnlock() |
| if err := fd.pd.prepareWrite(fd.isFile); err != nil { |
| return 0, 0, err |
| } |
| for { |
| n, err := syscall.SendmsgN(fd.Sysfd, p, oob, sa, 0) |
| if err == syscall.EINTR { |
| continue |
| } |
| if err == syscall.EAGAIN && fd.pd.pollable() { |
| if err = fd.pd.waitWrite(fd.isFile); err == nil { |
| continue |
| } |
| } |
| if err != nil { |
| return n, 0, err |
| } |
| return n, len(oob), err |
| } |
| } |
| |
| // Accept wraps the accept network call. |
| func (fd *FD) Accept() (int, syscall.Sockaddr, string, error) { |
| if err := fd.readLock(); err != nil { |
| return -1, nil, "", err |
| } |
| defer fd.readUnlock() |
| |
| if err := fd.pd.prepareRead(fd.isFile); err != nil { |
| return -1, nil, "", err |
| } |
| for { |
| s, rsa, errcall, err := accept(fd.Sysfd) |
| if err == nil { |
| return s, rsa, "", err |
| } |
| switch err { |
| case syscall.EINTR: |
| continue |
| case syscall.EAGAIN: |
| if fd.pd.pollable() { |
| if err = fd.pd.waitRead(fd.isFile); err == nil { |
| continue |
| } |
| } |
| case syscall.ECONNABORTED: |
| // This means that a socket on the listen |
| // queue was closed before we Accept()ed it; |
| // it's a silly error, so try again. |
| continue |
| } |
| return -1, nil, errcall, err |
| } |
| } |
| |
| // Seek wraps syscall.Seek. |
| func (fd *FD) Seek(offset int64, whence int) (int64, error) { |
| if err := fd.incref(); err != nil { |
| return 0, err |
| } |
| defer fd.decref() |
| return syscall.Seek(fd.Sysfd, offset, whence) |
| } |
| |
| // ReadDirent wraps syscall.ReadDirent. |
| // We treat this like an ordinary system call rather than a call |
| // that tries to fill the buffer. |
| func (fd *FD) ReadDirent(buf []byte) (int, error) { |
| if err := fd.incref(); err != nil { |
| return 0, err |
| } |
| defer fd.decref() |
| for { |
| n, err := ignoringEINTR(syscall.ReadDirent, fd.Sysfd, buf) |
| if err != nil { |
| n = 0 |
| if err == syscall.EAGAIN && fd.pd.pollable() { |
| if err = fd.pd.waitRead(fd.isFile); err == nil { |
| continue |
| } |
| } |
| } |
| // Do not call eofError; caller does not expect to see io.EOF. |
| return n, err |
| } |
| } |
| |
| // Fchdir wraps syscall.Fchdir. |
| func (fd *FD) Fchdir() error { |
| if err := fd.incref(); err != nil { |
| return err |
| } |
| defer fd.decref() |
| return syscall.Fchdir(fd.Sysfd) |
| } |
| |
| // Fstat wraps syscall.Fstat |
| func (fd *FD) Fstat(s *syscall.Stat_t) error { |
| if err := fd.incref(); err != nil { |
| return err |
| } |
| defer fd.decref() |
| return syscall.Fstat(fd.Sysfd, s) |
| } |
| |
| // tryDupCloexec indicates whether F_DUPFD_CLOEXEC should be used. |
| // If the kernel doesn't support it, this is set to 0. |
| var tryDupCloexec = int32(1) |
| |
| // DupCloseOnExec dups fd and marks it close-on-exec. |
| func DupCloseOnExec(fd int) (int, string, error) { |
| if syscall.F_DUPFD_CLOEXEC != 0 && atomic.LoadInt32(&tryDupCloexec) == 1 { |
| r0, e1 := fcntl(fd, syscall.F_DUPFD_CLOEXEC, 0) |
| if e1 == nil { |
| return r0, "", nil |
| } |
| switch e1.(syscall.Errno) { |
| case syscall.EINVAL, syscall.ENOSYS: |
| // Old kernel, or js/wasm (which returns |
| // ENOSYS). Fall back to the portable way from |
| // now on. |
| atomic.StoreInt32(&tryDupCloexec, 0) |
| default: |
| return -1, "fcntl", e1 |
| } |
| } |
| return dupCloseOnExecOld(fd) |
| } |
| |
| // dupCloseOnExecOld is the traditional way to dup an fd and |
| // set its O_CLOEXEC bit, using two system calls. |
| func dupCloseOnExecOld(fd int) (int, string, error) { |
| syscall.ForkLock.RLock() |
| defer syscall.ForkLock.RUnlock() |
| newfd, err := syscall.Dup(fd) |
| if err != nil { |
| return -1, "dup", err |
| } |
| syscall.CloseOnExec(newfd) |
| return newfd, "", nil |
| } |
| |
| // Dup duplicates the file descriptor. |
| func (fd *FD) Dup() (int, string, error) { |
| if err := fd.incref(); err != nil { |
| return -1, "", err |
| } |
| defer fd.decref() |
| return DupCloseOnExec(fd.Sysfd) |
| } |
| |
| // On Unix variants only, expose the IO event for the net code. |
| |
| // WaitWrite waits until data can be read from fd. |
| func (fd *FD) WaitWrite() error { |
| return fd.pd.waitWrite(fd.isFile) |
| } |
| |
| // WriteOnce is for testing only. It makes a single write call. |
| func (fd *FD) WriteOnce(p []byte) (int, error) { |
| if err := fd.writeLock(); err != nil { |
| return 0, err |
| } |
| defer fd.writeUnlock() |
| return ignoringEINTR(syscall.Write, fd.Sysfd, p) |
| } |
| |
| // RawRead invokes the user-defined function f for a read operation. |
| func (fd *FD) RawRead(f func(uintptr) bool) error { |
| if err := fd.readLock(); err != nil { |
| return err |
| } |
| defer fd.readUnlock() |
| if err := fd.pd.prepareRead(fd.isFile); err != nil { |
| return err |
| } |
| for { |
| if f(uintptr(fd.Sysfd)) { |
| return nil |
| } |
| if err := fd.pd.waitRead(fd.isFile); err != nil { |
| return err |
| } |
| } |
| } |
| |
| // RawWrite invokes the user-defined function f for a write operation. |
| func (fd *FD) RawWrite(f func(uintptr) bool) error { |
| if err := fd.writeLock(); err != nil { |
| return err |
| } |
| defer fd.writeUnlock() |
| if err := fd.pd.prepareWrite(fd.isFile); err != nil { |
| return err |
| } |
| for { |
| if f(uintptr(fd.Sysfd)) { |
| return nil |
| } |
| if err := fd.pd.waitWrite(fd.isFile); err != nil { |
| return err |
| } |
| } |
| } |
| |
| // ignoringEINTR makes a function call and repeats it if it returns |
| // an EINTR error. This appears to be required even though we install |
| // all signal handlers with SA_RESTART: see #22838, #38033, #38836. |
| // Also #20400 and #36644 are issues in which a signal handler is |
| // installed without setting SA_RESTART. None of these are the common case, |
| // but there are enough of them that it seems that we can't avoid |
| // an EINTR loop. |
| func ignoringEINTR(fn func(fd int, p []byte) (int, error), fd int, p []byte) (int, error) { |
| for { |
| n, err := fn(fd, p) |
| if err != syscall.EINTR { |
| return n, err |
| } |
| } |
| } |