internal/socket: reuse buffers in recv/sendMsgs
Use a pool to reuse internally allocated temporary buffers, i.e. the
mmsghdrs structures and the associated iovecs and sockaddr buffers.
The temporary buffers obtained from the pool are re-allocated when their
sizes are not sufficient for the current call.
The (hopefully reasonable) assumption is that RecvMsgs and SendMsgs are
called with a somewhat uniform number of Messages/Buffers. If not, the
buffers in the pool will likely be re-allocated a few times until they
reach the size required for the "biggest" caller.
This pooling can significantly reduce the temporary allocations made in
recv/sendMsgs. While the effectiveness of this change clearly depends
on the usage pattern, pressure on the GC, etc., some improvements can
already be observed in the micro-benchmark BenchmarkUDP: the number and
size of allocations per iteration decreases significantly and there is a
small but noticable improvement in time per iteration.
```
name old time/op new time/op delta
UDP/Iter-1-8 5.50µs ± 2% 5.46µs ± 2% ~ (p=0.055 n=20+20)
UDP/Batch-1-8 5.65µs ± 3% 5.56µs ± 4% -1.68% (p=0.011 n=20+20)
UDP/Iter-2-8 11.0µs ± 3% 11.0µs ± 3% ~ (p=0.645 n=20+20)
UDP/Batch-2-8 8.40µs ± 3% 8.24µs ± 5% -1.87% (p=0.012 n=20+20)
UDP/Iter-4-8 22.0µs ± 1% 21.9µs ± 3% ~ (p=0.437 n=17+20)
UDP/Batch-4-8 13.3µs ± 2% 12.7µs ± 2% -4.69% (p=0.000 n=20+20)
UDP/Iter-8-8 44.2µs ± 3% 44.0µs ± 2% ~ (p=0.551 n=20+20)
UDP/Batch-8-8 24.2µs ± 4% 23.1µs ± 4% -4.65% (p=0.000 n=20+20)
UDP/Iter-16-8 87.9µs ± 4% 88.1µs ± 3% ~ (p=0.708 n=19+20)
UDP/Batch-16-8 45.6µs ± 4% 44.1µs ± 5% -3.12% (p=0.000 n=20+20)
UDP/Iter-32-8 175µs ± 4% 176µs ± 4% ~ (p=0.087 n=20+20)
UDP/Batch-32-8 87.9µs ± 1% 84.5µs ± 6% -3.78% (p=0.000 n=19+19)
UDP/Iter-64-8 353µs ± 3% 352µs ± 3% ~ (p=0.414 n=20+20)
UDP/Batch-64-8 172µs ± 4% 172µs ±11% ~ (p=0.157 n=20+20)
UDP/Iter-128-8 705µs ± 3% 699µs ± 4% ~ (p=0.142 n=20+20)
UDP/Batch-128-8 345µs ± 2% 343µs ± 6% ~ (p=0.134 n=20+20)
UDP/Iter-256-8 1.41ms ± 3% 1.41ms ± 4% ~ (p=0.758 n=20+20)
UDP/Batch-256-8 692µs ± 4% 685µs ± 4% ~ (p=0.114 n=20+20)
UDP/Iter-512-8 2.82ms ± 2% 2.81ms ± 2% ~ (p=0.820 n=20+20)
UDP/Batch-512-8 1.27ms ± 3% 0.75ms ± 6% -40.62% (p=0.000 n=20+20)
name old alloc/op new alloc/op delta
UDP/Iter-1-8 408B ± 0% 424B ± 0% +3.92% (p=0.000 n=20+20)
UDP/Batch-1-8 440B ± 0% 232B ± 0% -47.27% (p=0.000 n=20+20)
UDP/Iter-2-8 816B ± 0% 848B ± 0% +3.92% (p=0.000 n=20+20)
UDP/Batch-2-8 696B ± 0% 280B ± 0% -59.77% (p=0.000 n=20+20)
UDP/Iter-4-8 1.63kB ± 0% 1.70kB ± 0% +3.92% (p=0.000 n=20+20)
UDP/Batch-4-8 1.22kB ± 0% 0.38kB ± 0% -68.42% (p=0.000 n=20+20)
UDP/Iter-8-8 3.26kB ± 0% 3.39kB ± 0% +3.92% (p=0.000 n=20+20)
UDP/Batch-8-8 2.26kB ± 0% 0.59kB ± 0% -73.76% (p=0.000 n=20+20)
UDP/Iter-16-8 6.53kB ± 0% 6.78kB ± 0% +3.92% (p=0.000 n=20+20)
UDP/Batch-16-8 4.34kB ± 0% 1.01kB ± 0% -76.75% (p=0.000 n=20+20)
UDP/Iter-32-8 13.1kB ± 0% 13.6kB ± 0% +3.92% (p=0.000 n=20+20)
UDP/Batch-32-8 8.50kB ± 0% 1.84kB ± 0% -78.34% (p=0.000 n=20+18)
UDP/Iter-64-8 26.1kB ± 0% 27.1kB ± 0% +3.92% (p=0.000 n=20+20)
UDP/Batch-64-8 16.8kB ± 0% 3.5kB ± 0% -79.16% (p=0.000 n=20+20)
UDP/Iter-128-8 52.2kB ± 0% 54.3kB ± 0% +3.92% (p=0.000 n=20+20)
UDP/Batch-128-8 33.5kB ± 0% 6.8kB ± 0% -79.56% (p=0.000 n=20+16)
UDP/Iter-256-8 104kB ± 0% 109kB ± 0% +3.92% (p=0.000 n=20+17)
UDP/Batch-256-8 66.7kB ± 0% 13.5kB ± 0% -79.77% (p=0.000 n=20+20)
UDP/Iter-512-8 209kB ± 0% 217kB ± 0% +3.92% (p=0.000 n=20+20)
UDP/Batch-512-8 121kB ± 0% 15kB ± 0% -87.89% (p=0.000 n=20+20)
name old allocs/op new allocs/op delta
UDP/Iter-1-8 14.0 ± 0% 14.0 ± 0% ~ (all equal)
UDP/Batch-1-8 14.0 ± 0% 8.0 ± 0% -42.86% (p=0.000 n=20+20)
UDP/Iter-2-8 28.0 ± 0% 28.0 ± 0% ~ (all equal)
UDP/Batch-2-8 20.0 ± 0% 10.0 ± 0% -50.00% (p=0.000 n=20+20)
UDP/Iter-4-8 56.0 ± 0% 56.0 ± 0% ~ (all equal)
UDP/Batch-4-8 32.0 ± 0% 14.0 ± 0% -56.25% (p=0.000 n=20+20)
UDP/Iter-8-8 112 ± 0% 112 ± 0% ~ (all equal)
UDP/Batch-8-8 56.0 ± 0% 22.0 ± 0% -60.71% (p=0.000 n=20+20)
UDP/Iter-16-8 224 ± 0% 224 ± 0% ~ (all equal)
UDP/Batch-16-8 104 ± 0% 38 ± 0% -63.46% (p=0.000 n=20+20)
UDP/Iter-32-8 448 ± 0% 448 ± 0% ~ (all equal)
UDP/Batch-32-8 200 ± 0% 70 ± 0% -65.00% (p=0.000 n=20+20)
UDP/Iter-64-8 896 ± 0% 896 ± 0% ~ (all equal)
UDP/Batch-64-8 392 ± 0% 134 ± 0% -65.82% (p=0.000 n=20+20)
UDP/Iter-128-8 1.79k ± 0% 1.79k ± 0% ~ (all equal)
UDP/Batch-128-8 776 ± 0% 262 ± 0% -66.24% (p=0.000 n=20+20)
UDP/Iter-256-8 3.58k ± 0% 3.58k ± 0% ~ (all equal)
UDP/Batch-256-8 1.54k ± 0% 0.52k ± 0% -66.45% (p=0.000 n=20+20)
UDP/Iter-512-8 7.17k ± 0% 7.17k ± 0% ~ (all equal)
UDP/Batch-512-8 2.61k ± 0% 0.56k ± 0% -78.48% (p=0.000 n=20+20)
```
Fixes golang/go#26838
Change-Id: Id34e362737455cd48df5bc751426be49fbc28094
GitHub-Last-Rev: 4e33d507bb0325fd2cd8b58b586081de1acdf2ad
GitHub-Pull-Request: golang/net#102
Reviewed-on: https://go-review.googlesource.com/c/net/+/315589
Run-TryBot: Tobias Klauser <tobias.klauser@gmail.com>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Tobias Klauser <tobias.klauser@gmail.com>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
diff --git a/internal/socket/mmsghdr_unix.go b/internal/socket/mmsghdr_unix.go
index 5025a0f..42c4a0d 100644
--- a/internal/socket/mmsghdr_unix.go
+++ b/internal/socket/mmsghdr_unix.go
@@ -7,25 +7,13 @@
package socket
-import "net"
+import (
+ "net"
+ "sync"
+)
type mmsghdrs []mmsghdr
-func (hs mmsghdrs) pack(ms []Message, parseFn func([]byte, string) (net.Addr, error), marshalFn func(net.Addr) []byte) error {
- for i := range hs {
- vs := make([]iovec, len(ms[i].Buffers))
- var sa []byte
- if parseFn != nil {
- sa = make([]byte, sizeofSockaddrInet6)
- }
- if marshalFn != nil {
- sa = marshalFn(ms[i].Addr)
- }
- hs[i].Hdr.pack(vs, ms[i].Buffers, ms[i].OOB, sa)
- }
- return nil
-}
-
func (hs mmsghdrs) unpack(ms []Message, parseFn func([]byte, string) (net.Addr, error), hint string) error {
for i := range hs {
ms[i].N = int(hs[i].Len)
@@ -41,3 +29,84 @@
}
return nil
}
+
+// mmsghdrsPacker packs Message-slices into mmsghdrs (re-)using pre-allocated buffers.
+type mmsghdrsPacker struct {
+ // hs are the pre-allocated mmsghdrs.
+ hs mmsghdrs
+ // sockaddrs is the pre-allocated buffer for the Hdr.Name buffers.
+ // We use one large buffer for all messages and slice it up.
+ sockaddrs []byte
+ // vs are the pre-allocated iovecs.
+ // We allocate one large buffer for all messages and slice it up. This allows to reuse the buffer
+ // if the number of buffers per message is distributed differently between calls.
+ vs []iovec
+}
+
+func (p *mmsghdrsPacker) prepare(ms []Message) {
+ n := len(ms)
+ if n <= cap(p.hs) {
+ p.hs = p.hs[:n]
+ } else {
+ p.hs = make(mmsghdrs, n)
+ }
+ if n*sizeofSockaddrInet6 <= cap(p.sockaddrs) {
+ p.sockaddrs = p.sockaddrs[:n*sizeofSockaddrInet6]
+ } else {
+ p.sockaddrs = make([]byte, n*sizeofSockaddrInet6)
+ }
+
+ nb := 0
+ for _, m := range ms {
+ nb += len(m.Buffers)
+ }
+ if nb <= cap(p.vs) {
+ p.vs = p.vs[:nb]
+ } else {
+ p.vs = make([]iovec, nb)
+ }
+}
+
+func (p *mmsghdrsPacker) pack(ms []Message, parseFn func([]byte, string) (net.Addr, error), marshalFn func(net.Addr, []byte) int) mmsghdrs {
+ p.prepare(ms)
+ hs := p.hs
+ vsRest := p.vs
+ saRest := p.sockaddrs
+ for i := range hs {
+ nvs := len(ms[i].Buffers)
+ vs := vsRest[:nvs]
+ vsRest = vsRest[nvs:]
+
+ var sa []byte
+ if parseFn != nil {
+ sa = saRest[:sizeofSockaddrInet6]
+ saRest = saRest[sizeofSockaddrInet6:]
+ } else if marshalFn != nil {
+ n := marshalFn(ms[i].Addr, saRest)
+ sa = saRest[:n]
+ saRest = saRest[n:]
+ }
+ hs[i].Hdr.pack(vs, ms[i].Buffers, ms[i].OOB, sa)
+ }
+ return hs
+}
+
+var defaultMmsghdrsPool = mmsghdrsPool{
+ p: sync.Pool{
+ New: func() interface{} {
+ return new(mmsghdrsPacker)
+ },
+ },
+}
+
+type mmsghdrsPool struct {
+ p sync.Pool
+}
+
+func (p *mmsghdrsPool) Get() *mmsghdrsPacker {
+ return p.p.Get().(*mmsghdrsPacker)
+}
+
+func (p *mmsghdrsPool) Put(packer *mmsghdrsPacker) {
+ p.p.Put(packer)
+}
diff --git a/internal/socket/rawconn_mmsg.go b/internal/socket/rawconn_mmsg.go
index 5d90de1..d80a15c 100644
--- a/internal/socket/rawconn_mmsg.go
+++ b/internal/socket/rawconn_mmsg.go
@@ -17,14 +17,13 @@
for i := range ms {
ms[i].raceWrite()
}
- hs := make(mmsghdrs, len(ms))
+ packer := defaultMmsghdrsPool.Get()
+ defer defaultMmsghdrsPool.Put(packer)
var parseFn func([]byte, string) (net.Addr, error)
if c.network != "tcp" {
parseFn = parseInetAddr
}
- if err := hs.pack(ms, parseFn, nil); err != nil {
- return 0, err
- }
+ hs := packer.pack(ms, parseFn, nil)
var operr error
var n int
fn := func(s uintptr) bool {
@@ -50,14 +49,13 @@
for i := range ms {
ms[i].raceRead()
}
- hs := make(mmsghdrs, len(ms))
- var marshalFn func(net.Addr) []byte
+ packer := defaultMmsghdrsPool.Get()
+ defer defaultMmsghdrsPool.Put(packer)
+ var marshalFn func(net.Addr, []byte) int
if c.network != "tcp" {
marshalFn = marshalInetAddr
}
- if err := hs.pack(ms, nil, marshalFn); err != nil {
- return 0, err
- }
+ hs := packer.pack(ms, nil, marshalFn)
var operr error
var n int
fn := func(s uintptr) bool {
diff --git a/internal/socket/rawconn_msg.go b/internal/socket/rawconn_msg.go
index dfed9a8..2e2d61b 100644
--- a/internal/socket/rawconn_msg.go
+++ b/internal/socket/rawconn_msg.go
@@ -55,7 +55,9 @@
vs := make([]iovec, len(m.Buffers))
var sa []byte
if m.Addr != nil {
- sa = marshalInetAddr(m.Addr)
+ var a [sizeofSockaddrInet6]byte
+ n := marshalInetAddr(m.Addr, a[:])
+ sa = a[:n]
}
h.pack(vs, m.Buffers, m.OOB, sa)
var operr error
diff --git a/internal/socket/sys_posix.go b/internal/socket/sys_posix.go
index d8dda77..42b8f23 100644
--- a/internal/socket/sys_posix.go
+++ b/internal/socket/sys_posix.go
@@ -17,22 +17,24 @@
"time"
)
-func marshalInetAddr(a net.Addr) []byte {
+// marshalInetAddr writes a in sockaddr format into the buffer b.
+// The buffer must be sufficiently large (sizeofSockaddrInet4/6).
+// Returns the number of bytes written.
+func marshalInetAddr(a net.Addr, b []byte) int {
switch a := a.(type) {
case *net.TCPAddr:
- return marshalSockaddr(a.IP, a.Port, a.Zone)
+ return marshalSockaddr(a.IP, a.Port, a.Zone, b)
case *net.UDPAddr:
- return marshalSockaddr(a.IP, a.Port, a.Zone)
+ return marshalSockaddr(a.IP, a.Port, a.Zone, b)
case *net.IPAddr:
- return marshalSockaddr(a.IP, 0, a.Zone)
+ return marshalSockaddr(a.IP, 0, a.Zone, b)
default:
- return nil
+ return 0
}
}
-func marshalSockaddr(ip net.IP, port int, zone string) []byte {
+func marshalSockaddr(ip net.IP, port int, zone string, b []byte) int {
if ip4 := ip.To4(); ip4 != nil {
- b := make([]byte, sizeofSockaddrInet4)
switch runtime.GOOS {
case "android", "illumos", "linux", "solaris", "windows":
NativeEndian.PutUint16(b[:2], uint16(sysAF_INET))
@@ -42,10 +44,9 @@
}
binary.BigEndian.PutUint16(b[2:4], uint16(port))
copy(b[4:8], ip4)
- return b
+ return sizeofSockaddrInet4
}
if ip6 := ip.To16(); ip6 != nil && ip.To4() == nil {
- b := make([]byte, sizeofSockaddrInet6)
switch runtime.GOOS {
case "android", "illumos", "linux", "solaris", "windows":
NativeEndian.PutUint16(b[:2], uint16(sysAF_INET6))
@@ -58,9 +59,9 @@
if zone != "" {
NativeEndian.PutUint32(b[24:28], uint32(zoneCache.index(zone)))
}
- return b
+ return sizeofSockaddrInet6
}
- return nil
+ return 0
}
func parseInetAddr(b []byte, network string) (net.Addr, error) {