draw: use a sync.Pool for kernel scaling's temporary buffers.
benchmark old ns/op new ns/op delta
BenchmarkScaleBLLargeDown 257715146 260286012 +1.00%
BenchmarkScaleCRLargeDown 426797448 430078734 +0.77%
BenchmarkScaleBLDown 4449939 4222542 -5.11%
BenchmarkScaleCRDown 8160446 8010056 -1.84%
BenchmarkScaleBLUp 22290312 21044122 -5.59%
BenchmarkScaleCRUp 33010722 32021468 -3.00%
BenchmarkScaleCRSrcGray 13307961 13020192 -2.16%
BenchmarkScaleCRSrcNRGBA 40567431 40801939 +0.58%
BenchmarkScaleCRSrcRGBA 39892971 40240558 +0.87%
BenchmarkScaleCRSrcYCbCr 59020222 59686699 +1.13%
benchmark old allocs new allocs delta
BenchmarkScaleBLLargeDown 1 1 +0.00%
BenchmarkScaleCRLargeDown 1 2 +100.00%
BenchmarkScaleBLDown 1 0 -100.00%
BenchmarkScaleCRDown 1 0 -100.00%
BenchmarkScaleBLUp 1 0 -100.00%
BenchmarkScaleCRUp 1 0 -100.00%
BenchmarkScaleCRSrcGray 1 0 -100.00%
BenchmarkScaleCRSrcNRGBA 1 0 -100.00%
BenchmarkScaleCRSrcRGBA 1 0 -100.00%
BenchmarkScaleCRSrcYCbCr 1 0 -100.00%
benchmark old bytes new bytes delta
BenchmarkScaleBLLargeDown 14745600 2949200 -80.00%
BenchmarkScaleCRLargeDown 14745600 4915333 -66.67%
BenchmarkScaleBLDown 1523712 5079 -99.67%
BenchmarkScaleCRDown 1523712 7619 -99.50%
BenchmarkScaleBLUp 10117120 101175 -99.00%
BenchmarkScaleCRUp 10117120 202350 -98.00%
BenchmarkScaleCRSrcGray 4915200 49156 -99.00%
BenchmarkScaleCRSrcNRGBA 4915200 163853 -96.67%
BenchmarkScaleCRSrcRGBA 4915200 163853 -96.67%
BenchmarkScaleCRSrcYCbCr 4915200 245780 -95.00%
The increase in BenchmarkScale??LargeDown number of allocs I think is an
accounting error due to the low number of iterations: a low denominator.
I suspect that there are one or two extra allocs up front for using the
sync.Pool, but one fewer alloc per iteration. The number of iterations
is only 5 for BL and 3 for CR, for the default timeout. If I increase
the -test.benchtime value to 5s, then the reported average (allocs/op)
drop from 2 to 0, so the delta should actually be -100% instead of +0 or
+100%.
Change-Id: I21d9bb0086bdb25517b6a430e8a21bdf3db026f6
Reviewed-on: https://go-review.googlesource.com/8150
Reviewed-by: Rob Pike <r@golang.org>
diff --git a/draw/gen.go b/draw/gen.go
index 1acae2e..6efdafa 100644
--- a/draw/gen.go
+++ b/draw/gen.go
@@ -936,8 +936,14 @@
// Create a temporary buffer:
// scaleX distributes the source image's columns over the temporary image.
// scaleY distributes the temporary image's rows over the destination image.
- // TODO: is it worth having a sync.Pool for this temporary buffer?
- tmp := make([][4]float64, z.dw*z.sh)
+ var tmp [][4]float64
+ if z.pool.New != nil {
+ tmpp := z.pool.Get().(*[][4]float64)
+ defer z.pool.Put(tmpp)
+ tmp = *tmpp
+ } else {
+ tmp = z.makeTmpBuf()
+ }
// sr is the source pixels. If it extends beyond the src bounds,
// we cannot use the type-specific fast paths, as they access
diff --git a/draw/impl.go b/draw/impl.go
index 6278c92..a1de1cd 100644
--- a/draw/impl.go
+++ b/draw/impl.go
@@ -3036,8 +3036,14 @@
// Create a temporary buffer:
// scaleX distributes the source image's columns over the temporary image.
// scaleY distributes the temporary image's rows over the destination image.
- // TODO: is it worth having a sync.Pool for this temporary buffer?
- tmp := make([][4]float64, z.dw*z.sh)
+ var tmp [][4]float64
+ if z.pool.New != nil {
+ tmpp := z.pool.Get().(*[][4]float64)
+ defer z.pool.Put(tmpp)
+ tmp = *tmpp
+ } else {
+ tmp = z.makeTmpBuf()
+ }
// sr is the source pixels. If it extends beyond the src bounds,
// we cannot use the type-specific fast paths, as they access
diff --git a/draw/scale.go b/draw/scale.go
index 1e95971..2e481b1 100644
--- a/draw/scale.go
+++ b/draw/scale.go
@@ -10,6 +10,7 @@
"image"
"image/color"
"math"
+ "sync"
"golang.org/x/image/math/f64"
)
@@ -88,13 +89,17 @@
// Scale implements the Scaler interface.
func (q *Kernel) Scale(dst Image, dr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) {
- q.NewScaler(dr.Dx(), dr.Dy(), sr.Dx(), sr.Dy()).Scale(dst, dr, src, sr, opts)
+ q.newScaler(dr.Dx(), dr.Dy(), sr.Dx(), sr.Dy(), false).Scale(dst, dr, src, sr, opts)
}
// NewScaler returns a Scaler that is optimized for scaling multiple times with
// the same fixed destination and source width and height.
func (q *Kernel) NewScaler(dw, dh, sw, sh int) Scaler {
- return &kernelScaler{
+ return q.newScaler(dw, dh, sw, sh, true)
+}
+
+func (q *Kernel) newScaler(dw, dh, sw, sh int, usePool bool) Scaler {
+ z := &kernelScaler{
kernel: q,
dw: int32(dw),
dh: int32(dh),
@@ -103,6 +108,13 @@
horizontal: newDistrib(q, int32(dw), int32(sw)),
vertical: newDistrib(q, int32(dh), int32(sh)),
}
+ if usePool {
+ z.pool.New = func() interface{} {
+ tmp := z.makeTmpBuf()
+ return &tmp
+ }
+ }
+ return z
}
var (
@@ -152,6 +164,11 @@
kernel *Kernel
dw, dh, sw, sh int32
horizontal, vertical distrib
+ pool sync.Pool
+}
+
+func (z *kernelScaler) makeTmpBuf() [][4]float64 {
+ return make([][4]float64, z.dw*z.sh)
}
// source is a range of contribs, their inverse total weight, and that ITW
diff --git a/draw/scale_test.go b/draw/scale_test.go
index bad9c5f..9cce5b1 100644
--- a/draw/scale_test.go
+++ b/draw/scale_test.go
@@ -393,6 +393,7 @@
scaler = n.NewScaler(dr.Dx(), dr.Dy(), sr.Dx(), sr.Dy())
}
+ b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
scaler.Scale(dst, dr, src, sr, nil)
@@ -408,6 +409,7 @@
sr := src.Bounds()
m := transformMatrix(40, 10)
+ b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
q.Transform(dst, m, src, sr, nil)