draw: optimize Kernel.Transform.
benchmark old ns/op new ns/op delta
BenchmarkTformCRSrcGray 6111610 5344117 -12.56%
BenchmarkTformCRSrcNRGBA 62070281 59295178 -4.47%
BenchmarkTformCRSrcRGBA 13840290 10612547 -23.32%
BenchmarkTformCRSrcUniform 591637 587621 -0.68%
BenchmarkTformCRSrcYCbCr 72219184 69404747 -3.90%
As of current origin/master, Gray and RGBA have fast paths but the other src
image types do not. They have more fat, so the relative improvement is smaller.
Change-Id: Ibbae91cd3cb3c139efb1dcc8fda1cb6432505189
Reviewed-on: https://go-review.googlesource.com/7794
Reviewed-by: Rob Pike <r@golang.org>
diff --git a/draw/gen.go b/draw/gen.go
index 4048e19..79ef527 100644
--- a/draw/gen.go
+++ b/draw/gen.go
@@ -958,7 +958,8 @@
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky - iy]
for kx := ix; kx < jx; kx++ {
- p += $srcf[kx, ky] * xWeights[kx - ix] * yWeight
+ w := xWeights[kx - ix] * yWeight
+ p += $srcf[kx, ky] * w
}
}
$outputf[dr.Min.X + int(dx), dr.Min.Y + int(dy), fffftou, p, 1]
diff --git a/draw/impl.go b/draw/impl.go
index 06b8432..89e7cd0 100644
--- a/draw/impl.go
+++ b/draw/impl.go
@@ -2719,9 +2719,10 @@
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
for kx := ix; kx < jx; kx++ {
+ w := xWeights[kx-ix] * yWeight
pi := src.PixOffset(kx, ky)
pru := uint32(src.Pix[pi]) * 0x101
- pr += float64(pru) * xWeights[kx-ix] * yWeight
+ pr += float64(pru) * w
}
}
out := uint8(fffftou(pr) >> 8)
@@ -2812,15 +2813,16 @@
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
for kx := ix; kx < jx; kx++ {
+ w := xWeights[kx-ix] * yWeight
pi := src.PixOffset(kx, ky)
pau := uint32(src.Pix[pi+3]) * 0x101
pru := uint32(src.Pix[pi+0]) * pau / 0xff
pgu := uint32(src.Pix[pi+1]) * pau / 0xff
pbu := uint32(src.Pix[pi+2]) * pau / 0xff
- pr += float64(pru) * xWeights[kx-ix] * yWeight
- pg += float64(pgu) * xWeights[kx-ix] * yWeight
- pb += float64(pbu) * xWeights[kx-ix] * yWeight
- pa += float64(pau) * xWeights[kx-ix] * yWeight
+ pr += float64(pru) * w
+ pg += float64(pgu) * w
+ pb += float64(pbu) * w
+ pa += float64(pau) * w
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@@ -2910,15 +2912,16 @@
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
for kx := ix; kx < jx; kx++ {
+ w := xWeights[kx-ix] * yWeight
pi := src.PixOffset(kx, ky)
pru := uint32(src.Pix[pi+0]) * 0x101
pgu := uint32(src.Pix[pi+1]) * 0x101
pbu := uint32(src.Pix[pi+2]) * 0x101
pau := uint32(src.Pix[pi+3]) * 0x101
- pr += float64(pru) * xWeights[kx-ix] * yWeight
- pg += float64(pgu) * xWeights[kx-ix] * yWeight
- pb += float64(pbu) * xWeights[kx-ix] * yWeight
- pa += float64(pau) * xWeights[kx-ix] * yWeight
+ pr += float64(pru) * w
+ pg += float64(pgu) * w
+ pb += float64(pbu) * w
+ pa += float64(pau) * w
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@@ -3008,11 +3011,12 @@
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
for kx := ix; kx < jx; kx++ {
+ w := xWeights[kx-ix] * yWeight
pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
- pr += float64(pru) * xWeights[kx-ix] * yWeight
- pg += float64(pgu) * xWeights[kx-ix] * yWeight
- pb += float64(pbu) * xWeights[kx-ix] * yWeight
- pa += float64(pau) * xWeights[kx-ix] * yWeight
+ pr += float64(pru) * w
+ pg += float64(pgu) * w
+ pb += float64(pbu) * w
+ pa += float64(pau) * w
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@@ -3102,11 +3106,12 @@
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
for kx := ix; kx < jx; kx++ {
+ w := xWeights[kx-ix] * yWeight
pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
- pr += float64(pru) * xWeights[kx-ix] * yWeight
- pg += float64(pgu) * xWeights[kx-ix] * yWeight
- pb += float64(pbu) * xWeights[kx-ix] * yWeight
- pa += float64(pau) * xWeights[kx-ix] * yWeight
+ pr += float64(pru) * w
+ pg += float64(pgu) * w
+ pb += float64(pbu) * w
+ pa += float64(pau) * w
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@@ -3196,11 +3201,12 @@
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
for kx := ix; kx < jx; kx++ {
+ w := xWeights[kx-ix] * yWeight
pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
- pr += float64(pru) * xWeights[kx-ix] * yWeight
- pg += float64(pgu) * xWeights[kx-ix] * yWeight
- pb += float64(pbu) * xWeights[kx-ix] * yWeight
- pa += float64(pau) * xWeights[kx-ix] * yWeight
+ pr += float64(pru) * w
+ pg += float64(pgu) * w
+ pb += float64(pbu) * w
+ pa += float64(pau) * w
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@@ -3290,11 +3296,12 @@
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
for kx := ix; kx < jx; kx++ {
+ w := xWeights[kx-ix] * yWeight
pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
- pr += float64(pru) * xWeights[kx-ix] * yWeight
- pg += float64(pgu) * xWeights[kx-ix] * yWeight
- pb += float64(pbu) * xWeights[kx-ix] * yWeight
- pa += float64(pau) * xWeights[kx-ix] * yWeight
+ pr += float64(pru) * w
+ pg += float64(pgu) * w
+ pb += float64(pbu) * w
+ pa += float64(pau) * w
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@@ -3384,11 +3391,12 @@
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
for kx := ix; kx < jx; kx++ {
+ w := xWeights[kx-ix] * yWeight
pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
- pr += float64(pru) * xWeights[kx-ix] * yWeight
- pg += float64(pgu) * xWeights[kx-ix] * yWeight
- pb += float64(pbu) * xWeights[kx-ix] * yWeight
- pa += float64(pau) * xWeights[kx-ix] * yWeight
+ pr += float64(pru) * w
+ pg += float64(pgu) * w
+ pb += float64(pbu) * w
+ pa += float64(pau) * w
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@@ -3478,11 +3486,12 @@
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
for kx := ix; kx < jx; kx++ {
+ w := xWeights[kx-ix] * yWeight
pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
- pr += float64(pru) * xWeights[kx-ix] * yWeight
- pg += float64(pgu) * xWeights[kx-ix] * yWeight
- pb += float64(pbu) * xWeights[kx-ix] * yWeight
- pa += float64(pau) * xWeights[kx-ix] * yWeight
+ pr += float64(pru) * w
+ pg += float64(pgu) * w
+ pb += float64(pbu) * w
+ pa += float64(pau) * w
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@@ -3573,11 +3582,12 @@
for ky := iy; ky < jy; ky++ {
yWeight := yWeights[ky-iy]
for kx := ix; kx < jx; kx++ {
+ w := xWeights[kx-ix] * yWeight
pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
- pr += float64(pru) * xWeights[kx-ix] * yWeight
- pg += float64(pgu) * xWeights[kx-ix] * yWeight
- pb += float64(pbu) * xWeights[kx-ix] * yWeight
- pa += float64(pau) * xWeights[kx-ix] * yWeight
+ pr += float64(pru) * w
+ pg += float64(pgu) * w
+ pb += float64(pbu) * w
+ pa += float64(pau) * w
}
}
dstColorRGBA64.R = fffftou(pr)