blob: 7b3d16d02da4c27a164d7142b92765f27a24226d [file] [log] [blame]
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris windows
package net
#include "runtime.h"
#include "defs_GOOS_GOARCH.h"
#include "arch_GOARCH.h"
#include "malloc.h"
// Integrated network poller (platform-independent part).
// A particular implementation (epoll/kqueue) must define the following functions:
// void runtime·netpollinit(void); // to initialize the poller
// int32 runtime·netpollopen(uintptr fd, PollDesc *pd); // to arm edge-triggered notifications
// and associate fd with pd.
// An implementation must call the following function to denote that the pd is ready.
// void runtime·netpollready(G **gpp, PollDesc *pd, int32 mode);
// PollDesc contains 2 binary semaphores, rg and wg, to park reader and writer
// goroutines respectively. The semaphore can be in the following states:
// READY - io readiness notification is pending;
// a goroutine consumes the notification by changing the state to nil.
// WAIT - a goroutine prepares to park on the semaphore, but not yet parked;
// the goroutine commits to park by changing the state to G pointer,
// or, alternatively, concurrent io notification changes the state to READY,
// or, alternatively, concurrent timeout/close changes the state to nil.
// G pointer - the goroutine is blocked on the semaphore;
// io notification or timeout/close changes the state to READY or nil respectively
// and unparks the goroutine.
// nil - nothing of the above.
#define READY ((G*)1)
#define WAIT ((G*)2)
enum
{
PollBlockSize = 4*1024,
};
struct PollDesc
{
PollDesc* link; // in pollcache, protected by pollcache.Lock
// The lock protects pollOpen, pollSetDeadline, pollUnblock and deadlineimpl operations.
// This fully covers seq, rt and wt variables. fd is constant throughout the PollDesc lifetime.
// pollReset, pollWait, pollWaitCanceled and runtime·netpollready (IO rediness notification)
// proceed w/o taking the lock. So closing, rg, rd, wg and wd are manipulated
// in a lock-free way by all operations.
Lock; // protectes the following fields
uintptr fd;
bool closing;
uintptr seq; // protects from stale timers and ready notifications
G* rg; // READY, WAIT, G waiting for read or nil
Timer rt; // read deadline timer (set if rt.fv != nil)
int64 rd; // read deadline
G* wg; // READY, WAIT, G waiting for write or nil
Timer wt; // write deadline timer
int64 wd; // write deadline
void* user; // user settable cookie
};
static struct
{
Lock;
PollDesc* first;
// PollDesc objects must be type-stable,
// because we can get ready notification from epoll/kqueue
// after the descriptor is closed/reused.
// Stale notifications are detected using seq variable,
// seq is incremented when deadlines are changed or descriptor is reused.
} pollcache;
static bool netpollblock(PollDesc*, int32, bool);
static G* netpollunblock(PollDesc*, int32, bool);
static void deadline(int64, Eface);
static void readDeadline(int64, Eface);
static void writeDeadline(int64, Eface);
static PollDesc* allocPollDesc(void);
static intgo checkerr(PollDesc *pd, int32 mode);
static FuncVal deadlineFn = {(void(*)(void))deadline};
static FuncVal readDeadlineFn = {(void(*)(void))readDeadline};
static FuncVal writeDeadlineFn = {(void(*)(void))writeDeadline};
// runtimeNano returns the current value of the runtime clock in nanoseconds.
func runtimeNano() (ns int64) {
ns = runtime·nanotime();
}
func runtime_pollServerInit() {
runtime·netpollinit();
}
func runtime_pollOpen(fd uintptr) (pd *PollDesc, errno int) {
pd = allocPollDesc();
runtime·lock(pd);
if(pd->wg != nil && pd->wg != READY)
runtime·throw("runtime_pollOpen: blocked write on free descriptor");
if(pd->rg != nil && pd->rg != READY)
runtime·throw("runtime_pollOpen: blocked read on free descriptor");
pd->fd = fd;
pd->closing = false;
pd->seq++;
pd->rg = nil;
pd->rd = 0;
pd->wg = nil;
pd->wd = 0;
runtime·unlock(pd);
errno = runtime·netpollopen(fd, pd);
}
func runtime_pollClose(pd *PollDesc) {
if(!pd->closing)
runtime·throw("runtime_pollClose: close w/o unblock");
if(pd->wg != nil && pd->wg != READY)
runtime·throw("runtime_pollClose: blocked write on closing descriptor");
if(pd->rg != nil && pd->rg != READY)
runtime·throw("runtime_pollClose: blocked read on closing descriptor");
runtime·netpollclose(pd->fd);
runtime·lock(&pollcache);
pd->link = pollcache.first;
pollcache.first = pd;
runtime·unlock(&pollcache);
}
func runtime_pollReset(pd *PollDesc, mode int) (err int) {
err = checkerr(pd, mode);
if(err)
goto ret;
if(mode == 'r')
pd->rg = nil;
else if(mode == 'w')
pd->wg = nil;
ret:
}
func runtime_pollWait(pd *PollDesc, mode int) (err int) {
err = checkerr(pd, mode);
if(err == 0) {
// As for now only Solaris uses level-triggered IO.
if(Solaris)
runtime·netpollarm(pd, mode);
while(!netpollblock(pd, mode, false)) {
err = checkerr(pd, mode);
if(err != 0)
break;
// Can happen if timeout has fired and unblocked us,
// but before we had a chance to run, timeout has been reset.
// Pretend it has not happened and retry.
}
}
}
func runtime_pollWaitCanceled(pd *PollDesc, mode int) {
// This function is used only on windows after a failed attempt to cancel
// a pending async IO operation. Wait for ioready, ignore closing or timeouts.
while(!netpollblock(pd, mode, true))
;
}
func runtime_pollSetDeadline(pd *PollDesc, d int64, mode int) {
G *rg, *wg;
runtime·lock(pd);
if(pd->closing) {
runtime·unlock(pd);
return;
}
pd->seq++; // invalidate current timers
// Reset current timers.
if(pd->rt.fv) {
runtime·deltimer(&pd->rt);
pd->rt.fv = nil;
}
if(pd->wt.fv) {
runtime·deltimer(&pd->wt);
pd->wt.fv = nil;
}
// Setup new timers.
if(d != 0 && d <= runtime·nanotime())
d = -1;
if(mode == 'r' || mode == 'r'+'w')
pd->rd = d;
if(mode == 'w' || mode == 'r'+'w')
pd->wd = d;
if(pd->rd > 0 && pd->rd == pd->wd) {
pd->rt.fv = &deadlineFn;
pd->rt.when = pd->rd;
// Copy current seq into the timer arg.
// Timer func will check the seq against current descriptor seq,
// if they differ the descriptor was reused or timers were reset.
pd->rt.arg.type = (Type*)pd->seq;
pd->rt.arg.data = pd;
runtime·addtimer(&pd->rt);
} else {
if(pd->rd > 0) {
pd->rt.fv = &readDeadlineFn;
pd->rt.when = pd->rd;
pd->rt.arg.type = (Type*)pd->seq;
pd->rt.arg.data = pd;
runtime·addtimer(&pd->rt);
}
if(pd->wd > 0) {
pd->wt.fv = &writeDeadlineFn;
pd->wt.when = pd->wd;
pd->wt.arg.type = (Type*)pd->seq;
pd->wt.arg.data = pd;
runtime·addtimer(&pd->wt);
}
}
// If we set the new deadline in the past, unblock currently pending IO if any.
rg = nil;
runtime·atomicstorep(&wg, nil); // full memory barrier between stores to rd/wd and load of rg/wg in netpollunblock
if(pd->rd < 0)
rg = netpollunblock(pd, 'r', false);
if(pd->wd < 0)
wg = netpollunblock(pd, 'w', false);
runtime·unlock(pd);
if(rg)
runtime·ready(rg);
if(wg)
runtime·ready(wg);
}
func runtime_pollUnblock(pd *PollDesc) {
G *rg, *wg;
runtime·lock(pd);
if(pd->closing)
runtime·throw("runtime_pollUnblock: already closing");
pd->closing = true;
pd->seq++;
runtime·atomicstorep(&rg, nil); // full memory barrier between store to closing and read of rg/wg in netpollunblock
rg = netpollunblock(pd, 'r', false);
wg = netpollunblock(pd, 'w', false);
if(pd->rt.fv) {
runtime·deltimer(&pd->rt);
pd->rt.fv = nil;
}
if(pd->wt.fv) {
runtime·deltimer(&pd->wt);
pd->wt.fv = nil;
}
runtime·unlock(pd);
if(rg)
runtime·ready(rg);
if(wg)
runtime·ready(wg);
}
uintptr
runtime·netpollfd(PollDesc *pd)
{
return pd->fd;
}
void**
runtime·netpolluser(PollDesc *pd)
{
return &pd->user;
}
bool
runtime·netpollclosing(PollDesc *pd)
{
return pd->closing;
}
void
runtime·netpolllock(PollDesc *pd)
{
runtime·lock(pd);
}
void
runtime·netpollunlock(PollDesc *pd)
{
runtime·unlock(pd);
}
// make pd ready, newly runnable goroutines (if any) are enqueued info gpp list
void
runtime·netpollready(G **gpp, PollDesc *pd, int32 mode)
{
G *rg, *wg;
rg = wg = nil;
if(mode == 'r' || mode == 'r'+'w')
rg = netpollunblock(pd, 'r', true);
if(mode == 'w' || mode == 'r'+'w')
wg = netpollunblock(pd, 'w', true);
if(rg) {
rg->schedlink = *gpp;
*gpp = rg;
}
if(wg) {
wg->schedlink = *gpp;
*gpp = wg;
}
}
static intgo
checkerr(PollDesc *pd, int32 mode)
{
if(pd->closing)
return 1; // errClosing
if((mode == 'r' && pd->rd < 0) || (mode == 'w' && pd->wd < 0))
return 2; // errTimeout
return 0;
}
static bool
blockcommit(G *gp, G **gpp)
{
return runtime·casp(gpp, WAIT, gp);
}
// returns true if IO is ready, or false if timedout or closed
// waitio - wait only for completed IO, ignore errors
static bool
netpollblock(PollDesc *pd, int32 mode, bool waitio)
{
G **gpp, *old;
gpp = &pd->rg;
if(mode == 'w')
gpp = &pd->wg;
// set the gpp semaphore to WAIT
for(;;) {
old = *gpp;
if(old == READY) {
*gpp = nil;
return true;
}
if(old != nil)
runtime·throw("netpollblock: double wait");
if(runtime·casp(gpp, nil, WAIT))
break;
}
// need to recheck error states after setting gpp to WAIT
// this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl
// do the opposite: store to closing/rd/wd, membarrier, load of rg/wg
if(waitio || checkerr(pd, mode) == 0)
runtime·park((bool(*)(G*, void*))blockcommit, gpp, "IO wait");
// be careful to not lose concurrent READY notification
old = runtime·xchgp(gpp, nil);
if(old > WAIT)
runtime·throw("netpollblock: corrupted state");
return old == READY;
}
static G*
netpollunblock(PollDesc *pd, int32 mode, bool ioready)
{
G **gpp, *old, *new;
gpp = &pd->rg;
if(mode == 'w')
gpp = &pd->wg;
for(;;) {
old = *gpp;
if(old == READY)
return nil;
if(old == nil && !ioready) {
// Only set READY for ioready. runtime_pollWait
// will check for timeout/cancel before waiting.
return nil;
}
new = nil;
if(ioready)
new = READY;
if(runtime·casp(gpp, old, new))
break;
}
if(old > WAIT)
return old; // must be G*
return nil;
}
static void
deadlineimpl(int64 now, Eface arg, bool read, bool write)
{
PollDesc *pd;
uint32 seq;
G *rg, *wg;
USED(now);
pd = (PollDesc*)arg.data;
// This is the seq when the timer was set.
// If it's stale, ignore the timer event.
seq = (uintptr)arg.type;
rg = wg = nil;
runtime·lock(pd);
if(seq != pd->seq) {
// The descriptor was reused or timers were reset.
runtime·unlock(pd);
return;
}
if(read) {
if(pd->rd <= 0 || pd->rt.fv == nil)
runtime·throw("deadlineimpl: inconsistent read deadline");
pd->rd = -1;
runtime·atomicstorep(&pd->rt.fv, nil); // full memory barrier between store to rd and load of rg in netpollunblock
rg = netpollunblock(pd, 'r', false);
}
if(write) {
if(pd->wd <= 0 || (pd->wt.fv == nil && !read))
runtime·throw("deadlineimpl: inconsistent write deadline");
pd->wd = -1;
runtime·atomicstorep(&pd->wt.fv, nil); // full memory barrier between store to wd and load of wg in netpollunblock
wg = netpollunblock(pd, 'w', false);
}
runtime·unlock(pd);
if(rg)
runtime·ready(rg);
if(wg)
runtime·ready(wg);
}
static void
deadline(int64 now, Eface arg)
{
deadlineimpl(now, arg, true, true);
}
static void
readDeadline(int64 now, Eface arg)
{
deadlineimpl(now, arg, true, false);
}
static void
writeDeadline(int64 now, Eface arg)
{
deadlineimpl(now, arg, false, true);
}
static PollDesc*
allocPollDesc(void)
{
PollDesc *pd;
uint32 i, n;
runtime·lock(&pollcache);
if(pollcache.first == nil) {
n = PollBlockSize/sizeof(*pd);
if(n == 0)
n = 1;
// Must be in non-GC memory because can be referenced
// only from epoll/kqueue internals.
pd = runtime·persistentalloc(n*sizeof(*pd), 0, &mstats.other_sys);
for(i = 0; i < n; i++) {
pd[i].link = pollcache.first;
pollcache.first = &pd[i];
}
}
pd = pollcache.first;
pollcache.first = pd->link;
runtime·unlock(&pollcache);
return pd;
}