* implement Linux epoll for polling i/o
* isolate OS-specific polling goop in Pollster type
* move generic poll loop out of fd_darwin.go into fd.go

R=r
DELTA=782  (448 added, 281 deleted, 53 changed)
OCL=16108
CL=16119
diff --git a/src/lib/net/Makefile b/src/lib/net/Makefile
index 568377d..afbfd1e 100644
--- a/src/lib/net/Makefile
+++ b/src/lib/net/Makefile
@@ -3,7 +3,7 @@
 # license that can be found in the LICENSE file.
 
 # DO NOT EDIT.  Automatically generated by gobuild.
-# gobuild -m net fd_darwin.go net.go net_darwin.go ip.go
+# gobuild -m net fd_darwin.go fd.go net.go net_darwin.go ip.go
 O=6
 GC=$(O)g
 CC=$(O)c -w
@@ -38,9 +38,12 @@
 	net_$(GOOS).$O\
 
 O3=\
+	fd.$O\
+
+O4=\
 	net.$O\
 
-$(PKG): a1 a2 a3
+$(PKG): a1 a2 a3 a4
 a1:	$(O1)
 	$(AR) grc $(PKG) $(O1)
 	rm -f $(O1)
@@ -50,8 +53,12 @@
 a3:	$(O3)
 	$(AR) grc $(PKG) $(O3)
 	rm -f $(O3)
+a4:	$(O4)
+	$(AR) grc $(PKG) $(O4)
+	rm -f $(O4)
 
 $(O1): nuke
 $(O2): a1
 $(O3): a2
+$(O4): a3
 
diff --git a/src/lib/net/fd.go b/src/lib/net/fd.go
new file mode 100644
index 0000000..a93dac0
--- /dev/null
+++ b/src/lib/net/fd.go
@@ -0,0 +1,287 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// TODO(rsc): All the prints in this file should go to standard error.
+
+package net
+
+import (
+	"net";
+	"once";
+	"os";
+	"syscall";
+)
+
+// Network file descriptor.  Only intended to be used internally,
+// but have to export to make it available in other files implementing package net.
+export type FD struct {
+	// immutable until Close
+	fd int64;
+	osfd *os.FD;
+	cr *chan *FD;
+	cw *chan *FD;
+
+	// owned by fd wait server
+	ncr, ncw int;
+}
+
+// Make reads and writes on fd return EAGAIN instead of blocking.
+func SetNonblock(fd int64) *os.Error {
+	flags, e := syscall.fcntl(fd, syscall.F_GETFL, 0)
+	if e != 0 {
+		return os.ErrnoToError(e)
+	}
+	flags, e = syscall.fcntl(fd, syscall.F_SETFL, flags | syscall.O_NONBLOCK)
+	if e != 0 {
+		return os.ErrnoToError(e)
+	}
+	return nil
+}
+
+
+// A PollServer helps FDs determine when to retry a non-blocking
+// read or write after they get EAGAIN.  When an FD needs to wait,
+// send the fd on s.cr (for a read) or s.cw (for a write) to pass the
+// request to the poll server.  Then receive on fd.cr/fd.cw.
+// When the PollServer finds that i/o on FD should be possible
+// again, it will send fd on fd.cr/fd.cw to wake any waiting processes.
+// This protocol is implemented as s.WaitRead() and s.WaitWrite().
+//
+// There is one subtlety: when sending on s.cr/s.cw, the
+// poll server is probably in a system call, waiting for an fd
+// to become ready.  It's not looking at the request channels.
+// To resolve this, the poll server waits not just on the FDs it has
+// been given but also its own pipe.  After sending on the
+// buffered channel s.cr/s.cw, WaitRead/WaitWrite writes a
+// byte to the pipe, causing the PollServer's poll system call to
+// return.  In response to the pipe being readable, the PollServer
+// re-polls its request channels.
+//
+// Note that the ordering is "send request" and then "wake up server".
+// If the operations were reversed, there would be a race: the poll
+// server might wake up and look at the request channel, see that it
+// was empty, and go back to sleep, all before the requester managed
+// to send the request.  Because the send must complete before the wakeup,
+// the request channel must be buffered.  A buffer of size 1 is sufficient
+// for any request load.  If many processes are trying to submit requests,
+// one will succeed, the PollServer will read the request, and then the
+// channel will be empty for the next process's request.  A larger buffer
+// might help batch requests.
+
+type PollServer struct {
+	cr, cw *chan *FD;	// buffered >= 1
+	pr, pw *os.FD;
+	pending *map[int64] *FD;
+	poll *Pollster;	// low-level OS hooks
+}
+func (s *PollServer) Run();
+
+func NewPollServer() (s *PollServer, err *os.Error) {
+	s = new(PollServer);
+	s.cr = new(chan *FD, 1);
+	s.cw = new(chan *FD, 1);
+	if s.pr, s.pw, err = os.Pipe(); err != nil {
+		return nil, err
+	}
+	if err = SetNonblock(s.pr.fd); err != nil {
+	Error:
+		s.pr.Close();
+		s.pw.Close()
+		return nil, err
+	}
+	if err = SetNonblock(s.pw.fd); err != nil {
+		goto Error
+	}
+	if s.poll, err = NewPollster(); err != nil {
+		goto Error
+	}
+	if err = s.poll.AddFD(s.pr.fd, 'r', true); err != nil {
+		s.poll.Close()
+		goto Error
+	}
+	s.pending = new(map[int64] *FD)
+	go s.Run()
+	return s, nil
+}
+
+func (s *PollServer) AddFD(fd *FD, mode int) {
+	if err := s.poll.AddFD(fd.fd, mode, false); err != nil {
+		print("PollServer AddFD: ", err.String(), "\n")
+		return
+	}
+
+	key := fd.fd << 1
+	if mode == 'r' {
+		fd.ncr++
+	} else {
+		fd.ncw++
+		key++
+	}
+	s.pending[key] = fd
+}
+
+func (s *PollServer) LookupFD(fd int64, mode int) *FD {
+	key := fd << 1
+	if mode == 'w' {
+		key++
+	}
+	netfd, ok := s.pending[key]
+	if !ok {
+		return nil
+	}
+	s.pending[key] = nil, false
+	return netfd
+}
+
+func (s *PollServer) Run() {
+	var scratch [100]byte;
+	for {
+		fd, mode, err := s.poll.WaitFD();
+		if err != nil {
+			print("PollServer WaitFD: ", err.String(), "\n")
+			return
+		}
+		if fd == s.pr.fd {
+			// Drain our wakeup pipe.
+			for nn, e := s.pr.Read(&scratch); nn > 0; {
+				nn, e = s.pr.Read(&scratch)
+			}
+
+			// Read from channels
+			for fd, ok := <-s.cr; ok; fd, ok = <-s.cr {
+				s.AddFD(fd, 'r')
+			}
+			for fd, ok := <-s.cw; ok; fd, ok = <-s.cw {
+				s.AddFD(fd, 'w')
+			}
+		} else {
+			netfd := s.LookupFD(fd, mode)
+			if netfd == nil {
+				print("PollServer: unexpected wakeup for fd=", netfd, " mode=", string(mode), "\n")
+				continue
+			}
+			if mode == 'r' {
+				for netfd.ncr > 0 {
+					netfd.ncr--
+					netfd.cr <- netfd
+				}
+			} else {
+				for netfd.ncw > 0 {
+					netfd.ncw--
+					netfd.cw <- netfd
+				}
+			}
+		}
+	}
+}
+
+func (s *PollServer) Wakeup() {
+	var b [1]byte;
+	s.pw.Write(&b)
+}
+
+func (s *PollServer) WaitRead(fd *FD) {
+	s.cr <- fd;
+	s.Wakeup();
+	<-fd.cr
+}
+
+func (s *PollServer) WaitWrite(fd *FD) {
+	s.cr <- fd;
+	s.Wakeup();
+	<-fd.cr
+}
+
+
+// Network FD methods.
+// All the network FDs use a single PollServer.
+
+var pollserver *PollServer
+
+func StartServer() {
+	p, err := NewPollServer()
+	if err != nil {
+		print("Start PollServer: ", err.String(), "\n")
+	}
+	pollserver = p
+}
+
+export func NewFD(fd int64) (f *FD, err *os.Error) {
+	if pollserver == nil {
+		once.Do(&StartServer);
+	}
+	if err = SetNonblock(fd); err != nil {
+		return nil, err
+	}
+	f = new(FD);
+	f.fd = fd;
+	f.osfd = os.NewFD(fd);
+	f.cr = new(chan *FD, 1);
+	f.cw = new(chan *FD, 1);
+	return f, nil
+}
+
+func (fd *FD) Close() *os.Error {
+	if fd == nil || fd.osfd == nil {
+		return os.EINVAL
+	}
+	e := fd.osfd.Close();
+	fd.osfd = nil;
+	fd.fd = -1
+	return e
+}
+
+func (fd *FD) Read(p *[]byte) (n int, err *os.Error) {
+	if fd == nil || fd.osfd == nil {
+		return -1, os.EINVAL
+	}
+	n, err = fd.osfd.Read(p)
+	for err == os.EAGAIN {
+		pollserver.WaitRead(fd);
+		n, err = fd.osfd.Read(p)
+	}
+	return n, err
+}
+
+func (fd *FD) Write(p *[]byte) (n int, err *os.Error) {
+	if fd == nil || fd.osfd == nil {
+		return -1, os.EINVAL
+	}
+	err = nil;
+	nn := 0
+	for nn < len(p) && err == nil {
+		n, err = fd.osfd.Write(p[nn:len(p)]);
+		for err == os.EAGAIN {
+			pollserver.WaitWrite(fd);
+			n, err = fd.osfd.Write(p[nn:len(p)])
+		}
+		if n > 0 {
+			nn += n
+		}
+		if n == 0 {
+			break
+		}
+	}
+	return nn, err
+}
+
+func (fd *FD) Accept(sa *syscall.Sockaddr) (nfd *FD, err *os.Error) {
+	if fd == nil || fd.osfd == nil {
+		return nil, os.EINVAL
+	}
+	s, e := syscall.accept(fd.fd, sa)
+	for e == syscall.EAGAIN {
+		pollserver.WaitRead(fd);
+		s, e = syscall.accept(fd.fd, sa)
+	}
+	if e != 0 {
+		return nil, os.ErrnoToError(e)
+	}
+	if nfd, err = NewFD(s); err != nil {
+		syscall.close(s)
+		return nil, err
+	}
+	return nfd, nil
+}
+
diff --git a/src/lib/net/fd_darwin.go b/src/lib/net/fd_darwin.go
index 23d085c..16f0d4e 100644
--- a/src/lib/net/fd_darwin.go
+++ b/src/lib/net/fd_darwin.go
@@ -2,161 +2,54 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// TODO(rsc): All the prints in this file should go to standard error.
+// Waiting for FDs via kqueue/kevent.
 
 package net
 
 import (
 	"net";
-	"once";
 	"os";
 	"syscall";
 )
 
-const Debug = false
-
-// Network file descriptor.  Only intended to be used internally,
-// but have to export to make it available in other files implementing package net.
-export type FD struct {
-	fd int64;
-	cr *chan *FD;
-	cw *chan *FD;
-
-	// owned by fd wait server
-	ncr, ncw int;
-	next *FD;
+export type Pollster struct {
+	kq int64;
+	eventbuf [10]syscall.Kevent;
+	events *[]syscall.Kevent;
 }
 
-func WaitRead(fd *FD);
-func WaitWrite(fd *FD);
-func StartServer();
-
-func MakeNonblocking(fd int64) *os.Error {
-	if Debug { print("MakeNonBlocking ", fd, "\n") }
-	flags, e := syscall.fcntl(fd, syscall.F_GETFL, 0)
-	if e != 0 {
-		return os.ErrnoToError(e)
-	}
-	flags, e = syscall.fcntl(fd, syscall.F_SETFL, flags | syscall.O_NONBLOCK)
-	if e != 0 {
-		return os.ErrnoToError(e)
-	}
-	return nil
-}
-
-export func NewFD(fd int64) (f *FD, err *os.Error) {
-	once.Do(&StartServer);
-	if err = MakeNonblocking(fd); err != nil {
-		return nil, err
-	}
-	f = new(FD);
-	f.fd = fd;
-	f.cr = new(chan *FD);
-	f.cw = new(chan *FD);
-	return f, nil
-}
-
-func (fd *FD) Close() *os.Error {
-	if fd == nil {
-		return os.EINVAL
-	}
-	r1, e := syscall.close(fd.fd);
-	if e != 0 {
-		return os.ErrnoToError(e)
-	}
-	return nil
-}
-
-func (fd *FD) Read(p *[]byte) (n int, err *os.Error) {
-	if fd == nil {
-		return -1, os.EINVAL
-	}
-L:	nn, e := syscall.read(fd.fd, &p[0], int64(len(p)))
-	switch {
-	case e == syscall.EAGAIN:
-		WaitRead(fd)
-		goto L
-	case e != 0:
-		return -1, os.ErrnoToError(e)
-	}
-	return int(nn), nil
-}
-
-func (fd *FD) Write(p *[]byte) (n int, err *os.Error) {
-	if fd == nil {
-		return -1, os.EINVAL
-	}
-	total := len(p)
-	for len(p) > 0 {
-	L:	nn, e := syscall.write(fd.fd, &p[0], int64(len(p)))
-		switch {
-		case e == syscall.EAGAIN:
-			WaitWrite(fd)
-			goto L
-		case e != 0:
-			return total - len(p), os.ErrnoToError(e)
-		}
-		p = p[nn:len(p)]
-	}
-	return total, nil
-}
-
-func (fd *FD) Accept(sa *syscall.Sockaddr) (nfd *FD, err *os.Error) {
-	if fd == nil {
-		return nil, os.EINVAL
-	}
-L:	s, e := syscall.accept(fd.fd, sa)
-	switch {
-	case e == syscall.EAGAIN:
-		WaitRead(fd)
-		goto L
-	case e != 0:
+export func NewPollster() (p *Pollster, err *os.Error) {
+	p = new(Pollster);
+	var e int64;
+	if p.kq, e = syscall.kqueue(); e != 0 {
 		return nil, os.ErrnoToError(e)
 	}
-	nfd, err = NewFD(s)
-	if err != nil {
-		syscall.close(s)
-		return nil, err
-	}
-	return nfd, nil
+	p.events = (&p.eventbuf)[0:0]
+	return p, nil
 }
 
-
-// Waiting for FDs via kqueue(2).
-type Kstate struct {
-	cr *chan *FD;
-	cw *chan *FD;
-	pr *os.FD;
-	pw *os.FD;
-	pend *map[int64] *FD;
-	kq int64;
-}
-
-var kstate Kstate;
-
-func KqueueAdd(fd int64, mode byte, repeat bool) *os.Error {
-	if Debug { print("Kqueue add ", fd, " ", mode, " ", repeat, "\n") }
+func (p *Pollster) AddFD(fd int64, mode int, repeat bool) *os.Error {
 	var kmode int16;
 	if mode == 'r' {
 		kmode = syscall.EVFILT_READ
 	} else {
 		kmode = syscall.EVFILT_WRITE
 	}
-
 	var events [1]syscall.Kevent;
 	ev := &events[0];
 	ev.ident = fd;
 	ev.filter = kmode;
 
 	// EV_ADD - add event to kqueue list
-	// EV_RECEIPT - generate fake EV_ERROR as result of add
+	// EV_RECEIPT - generate fake EV_ERROR as result of add,
+	//	rather than waiting for real event
 	// EV_ONESHOT - delete the event the first time it triggers
 	ev.flags = syscall.EV_ADD | syscall.EV_RECEIPT
 	if !repeat {
 		ev.flags |= syscall.EV_ONESHOT
 	}
 
-	n, e := syscall.kevent(kstate.kq, &events, &events, nil);
+	n, e := syscall.kevent(p.kq, &events, &events, nil);
 	if e != 0 {
 		return os.ErrnoToError(e)
 	}
@@ -169,166 +62,29 @@
 	return nil
 }
 
-func KqueueAddFD(fd *FD, mode byte) *os.Error {
-	if e := KqueueAdd(fd.fd, 'r', false); e != nil {
-		return e
+func (p *Pollster) WaitFD() (fd int64, mode int, err *os.Error) {
+	for len(p.events) == 0 {
+		nn, e := syscall.kevent(p.kq, nil, &p.eventbuf, nil)
+		if e != 0 {
+			if e == syscall.EAGAIN || e == syscall.EINTR {
+				continue
+			}
+			return -1, 0, os.ErrnoToError(e)
+		}
+		p.events = (&p.eventbuf)[0:nn]
 	}
-	id := fd.fd << 1
-	if mode == 'r' {
-		fd.ncr++
-	} else {
-		id++
-		fd.ncw++
-	}
-	kstate.pend[id] = fd
-	return nil
-}
-
-func KqueueGet(events *[]syscall.Kevent) (n int, err *os.Error) {
-	var nn, e int64;
-	if nn, e = syscall.kevent(kstate.kq, nil, events, nil); e != 0 {
-		return -1, os.ErrnoToError(e)
-	}
-	return int(nn),  nil
-}
-
-func KqueueLookup(ev *syscall.Kevent) (fd *FD, mode byte) {
-	id := ev.ident << 1
+	ev := &p.events[0];
+	p.events = p.events[1:len(p.events)];
+	fd = ev.ident;
 	if ev.filter == syscall.EVFILT_READ {
 		mode = 'r'
 	} else {
-		id++
 		mode = 'w'
 	}
-	var ok bool
-	if fd, ok = kstate.pend[id]; !ok {
-		return nil, 0
-	}
-	kstate.pend[id] = nil, false
-	return fd, mode
+	return fd, mode, nil
 }
 
-func Serve() {
-	var r, e int64;
-	k := &kstate;
-
-	if Debug { print("Kqueue server running\n") }
-	var events [10]syscall.Kevent;
-	var scratch [100]byte;
-	for {
-		var n int
-		var err *os.Error;
-		if n, err = KqueueGet(&events); err != nil {
-			print("kqueue get: ", err.String(), "\n")
-			return
-		}
-		if Debug { print("Kqueue server get ", n, "\n") }
-		for i := 0; i < n; i++ {
-			ev := &events[i]
-			if ev.ident == k.pr.fd {
-				if Debug { print("Kqueue server wakeup\n") }
-				// Drain our wakeup pipe
-				for {
-					nn, e := k.pr.Read(&scratch)
-					if Debug { print("Read ", k.pr.fd, " ", nn, " ", e.String(), "\n") }
-					if nn <= 0 {
-						break
-					}
-				}
-
-				if Debug { print("Kqueue server drain channels\n") }
-				// Then read from channels.
-				for {
-					fd, ok := <-k.cr
-					if !ok {
-						break
-					}
-					KqueueAddFD(fd, 'r')
-				}
-				for {
-					fd, ok := <-k.cw
-					if !ok {
-						break
-					}
-					KqueueAddFD(fd, 'w')
-				}
-				if Debug { print("Kqueue server awake\n") }
-				continue
-			}
-
-			// Otherwise, wakeup the right FD.
-			fd, mode := KqueueLookup(ev);
-			if fd == nil {
-				print("kqueue: unexpected wakeup for fd=", ev.ident, " filter=", ev.filter, "\n")
-				continue
-			}
-			if mode == 'r' {
-				if Debug { print("Kqueue server r fd=", fd.fd, " ncr=", fd.ncr, "\n") }
-				for fd.ncr > 0 {
-					fd.ncr--
-					fd.cr <- fd
-				}
-			} else {
-				if Debug { print("Kqueue server w fd=", fd.fd, " ncw=", fd.ncw, "\n") }
-				for fd.ncw > 0 {
-					fd.ncw--
-					fd.cw <- fd
-				}
-			}
-		}
-	}
-}
-
-func StartServer() {
-	k := &kstate;
-
-	k.cr = new(chan *FD, 1);
-	k.cw = new(chan *FD, 1);
-	k.pend = new(map[int64] *FD)
-
-	var err *os.Error
-	if k.pr, k.pw, err = os.Pipe(); err != nil {
-		print("kqueue pipe: ", err.String(), "\n")
-		return
-	}
-
-	if err := MakeNonblocking(k.pr.fd); err != nil {
-		print("make nonblocking pr: ", err.String(), "\n")
-		return
-	}
-	if err := MakeNonblocking(k.pw.fd); err != nil {
-		print("make nonblocking pw: ", err.String(), "\n")
-		return
-	}
-
-	var e int64
-	if k.kq, e = syscall.kqueue(); e != 0 {
-		err := os.ErrnoToError(e);
-		print("kqueue: ", err.String(), "\n")
-		return
-	}
-
-	if err := KqueueAdd(k.pr.fd, 'r', true); err != nil {
-		print("kqueue add pipe: ", err.String(), "\n")
-		return
-	}
-
-	go Serve()
-}
-
-func WakeupServer() {
-	var b [1]byte;
-	kstate.pw.Write(&b);
-}
-
-func WaitRead(fd *FD) {
-	kstate.cr <- fd;
-	WakeupServer();
-	<-fd.cr
-}
-
-func WaitWrite(fd *FD) {
-	kstate.cw <- fd;
-	WakeupServer();
-	<-fd.cw
+func (p *Pollster) Close() *os.Error {
+	r, e := syscall.close(p.kq)
+	return os.ErrnoToError(e)
 }
diff --git a/src/lib/net/fd_linux.go b/src/lib/net/fd_linux.go
index dc7ba2d..e20ca94 100644
--- a/src/lib/net/fd_linux.go
+++ b/src/lib/net/fd_linux.go
@@ -2,78 +2,135 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// Network file descriptors.
+// Waiting for FDs via epoll(7).
 
 package net
 
 import (
+	"net";
 	"os";
 	"syscall";
-	"net"
 )
 
-/* BUG 6g has trouble with this.
+const (
+	Read = syscall.EPOLLIN | syscall.EPOLLRDHUP;
+	Write = syscall.EPOLLOUT
+)
 
-export type FD os.FD;
+export type Pollster struct {
+	epfd int64;
 
-export func NewFD(fd int64) (nfd *FD, err *os.Error) {
-	ofd := os.NewFD(fd)
-	return ofd, nil
+	// Events we're already waiting for
+	events *map[int64] uint;
 }
 
-func (fd *FD) Close() *os.Error {
-	var ofd *os.FD = fd
-	return ofd.Close()
-}
+export func NewPollster() (p *Pollster, err *os.Error) {
+	p = new(Pollster);
+	var e int64;
 
-func (fd *FD) Read(p *[]byte) (n int, err *os.Error) {
-	var ofd *os.FD = fd;
-	n, err = ofd.Read(p)
-	return n, err
-}
-
-func (fd *FD) Write(p *[]byte) (n int, err *os.Error) {
-	var ofd *os.FD = fd;
-	n, err = ofd.Write(p)
-	return n, err
-}
-
-*/
-
-// TODO: Replace with epoll.
-
-export type FD struct {
-	fd int64;
-	osfd *os.FD;
-}
-
-export func NewFD(fd int64) (nfd *FD, err *os.Error) {
-	nfd = new(FD);
-	nfd.osfd = os.NewFD(fd);
-	nfd.fd = fd
-	return nfd, nil
-}
-
-func (fd *FD) Close() *os.Error {
-	return fd.osfd.Close()
-}
-
-func (fd *FD) Read(p *[]byte) (n int, err *os.Error) {
-	n, err = fd.osfd.Read(p)
-	return n, err
-}
-
-func (fd *FD) Write(p *[]byte) (n int, err *os.Error) {
-	n, err = fd.osfd.Write(p)
-	return n, err
-}
-
-func (fd *FD) Accept(sa *syscall.Sockaddr) (nfd *FD, err *os.Error) {
-	s, e := syscall.accept(fd.fd, sa);
-	if e != 0 {
+	// The arg to epoll_create is a hint to the kernel
+	// about the number of FDs we will care about.
+	// We don't know.
+	if p.epfd, e = syscall.epoll_create(16); e != 0 {
 		return nil, os.ErrnoToError(e)
 	}
-	nfd, err = NewFD(s)
-	return nfd, err
+	p.events = new(map[int64] uint)
+	return p, nil
 }
 
+func (p *Pollster) AddFD(fd int64, mode int, repeat bool) *os.Error {
+	var ev syscall.EpollEvent
+	var already bool;
+	ev.fd = int32(fd);
+	ev.events, already = p.events[fd]
+	if !repeat {
+		ev.events |= syscall.EPOLLONESHOT
+	}
+	if mode == 'r' {
+		ev.events |= Read
+	} else {
+		ev.events |= Write
+	}
+
+	var op int64
+	if already {
+		op = syscall.EPOLL_CTL_MOD
+	} else {
+		op = syscall.EPOLL_CTL_ADD
+	}
+	if e := syscall.epoll_ctl(p.epfd, op, fd, &ev); e != 0 {
+		return os.ErrnoToError(e)
+	}
+	p.events[fd] = ev.events
+	return nil
+}
+
+func (p *Pollster) StopWaiting(fd int64, bits uint) {
+	events, already := p.events[fd]
+	if !already {
+		print("Epoll unexpected fd=", fd, "\n")
+		return
+	}
+
+	// If syscall.EPOLLONESHOT is not set, the wait
+	// is a repeating wait, so don't change it.
+	if events & syscall.EPOLLONESHOT == 0 {
+		return
+	}
+
+	// Disable the given bits.
+	// If we're still waiting for other events, modify the fd
+	// event in the kernel.  Otherwise, delete it.
+	events &= ^bits
+	if int32(events) & ^syscall.EPOLLONESHOT != 0 {
+		var ev syscall.EpollEvent;
+		ev.fd = int32(fd);
+		ev.events = events;
+		if e := syscall.epoll_ctl(p.epfd, syscall.EPOLL_CTL_MOD, fd, &ev); e != 0 {
+			print("Epoll modify fd=", fd, ": ", os.ErrnoToError(e).String(), "\n")
+		}
+		p.events[fd] = events
+	} else {
+		if e := syscall.epoll_ctl(p.epfd, syscall.EPOLL_CTL_DEL, fd, nil); e != 0 {
+			print("Epoll delete fd=", fd, ": ", os.ErrnoToError(e).String(), "\n")
+		}
+		p.events[fd] = 0, false
+	}
+}
+
+func (p *Pollster) WaitFD() (fd int64, mode int, err *os.Error) {
+	// Get an event.
+	var evarray [1]syscall.EpollEvent;
+	ev := &evarray[0];
+	n, e := syscall.epoll_wait(p.epfd, &evarray, -1)
+	for e == syscall.EAGAIN || e == syscall.EINTR {
+		n, e = syscall.epoll_wait(p.epfd, &evarray, -1)
+	}
+	if e != 0 {
+		return -1, 0, os.ErrnoToError(e)
+	}
+	fd = int64(ev.fd)
+
+	if ev.events & Write != 0 {
+		p.StopWaiting(fd, Write)
+		return fd, 'w', nil
+	}
+	if ev.events & Read != 0 {
+		p.StopWaiting(fd, Read)
+		return fd, 'r', nil
+	}
+
+	// Other events are error conditions - wake whoever is waiting.
+	events, already := p.events[fd]
+	if events & Write != 0 {
+		p.StopWaiting(fd, Write)
+		return fd, 'w', nil
+	}
+	p.StopWaiting(fd, Read)
+	return fd, 'r', nil
+}
+
+func (p *Pollster) Close() *os.Error {
+	r, e := syscall.close(p.epfd)
+	return os.ErrnoToError(e)
+}
diff --git a/src/lib/net/net_linux.go b/src/lib/net/net_linux.go
index bc0b8f6..17ffa5a 100644
--- a/src/lib/net/net_linux.go
+++ b/src/lib/net/net_linux.go
@@ -25,11 +25,21 @@
 	return syscall.SockaddrInet4ToSockaddr(sa), nil
 }
 
+var IPv6zero [16]byte;
+
 export func IPv6ToSockaddr(p *[]byte, port int) (sa1 *syscall.Sockaddr, err *os.Error) {
 	p = ToIPv6(p)
 	if p == nil || port < 0 || port > 0xFFFF {
 		return nil, os.EINVAL
 	}
+
+	// IPv4 callers use 0.0.0.0 to mean "announce on any available address".
+	// In IPv6 mode, Linux treats that as meaning "announce on 0.0.0.0",
+	// which it refuses to do.  Rewrite to the IPv6 all zeros.
+	if p4 := ToIPv4(p); p4 != nil && p4[0] == 0 && p4[1] == 0 && p4[2] == 0 && p4[3] == 0 {
+		p = &IPv6zero;
+	}
+
 	sa := new(syscall.SockaddrInet6);
 	sa.family = syscall.AF_INET6;
 	sa.port[0] = byte(port>>8);