draw: optimize some multiply-by-zeroes in Kernel.Transform.
benchmark old ns/op new ns/op delta
BenchmarkTformCRSrcGray 5096041 4820642 -5.40%
BenchmarkTformCRSrcNRGBA 10476578 8414331 -19.68%
BenchmarkTformCRSrcRGBA 10361135 7954413 -23.23%
BenchmarkTformCRSrcYCbCr 11952218 9824899 -17.80%
Change-Id: I8b4cfe68ecae85e447ae65ceecf185261445a8a2
Reviewed-on: https://go-review.googlesource.com/7991
Reviewed-by: Rob Pike <r@golang.org>
diff --git a/draw/gen.go b/draw/gen.go
index b670521..3ff3d84 100644
--- a/draw/gen.go
+++ b/draw/gen.go
@@ -1131,10 +1131,12 @@
var pr, pg, pb, pa float64 $tweakVarP
for ky := iy; ky < jy; ky++ {
- yWeight := yWeights[ky - iy]
- for kx := ix; kx < jx; kx++ {
- w := xWeights[kx - ix] * yWeight
- p += $srcf[kx, ky] * w
+ if yWeight := yWeights[ky - iy]; yWeight != 0 {
+ for kx := ix; kx < jx; kx++ {
+ if w := xWeights[kx - ix] * yWeight; w != 0 {
+ p += $srcf[kx, ky] * w
+ }
+ }
}
}
$outputf[dr.Min.X + int(dx), dr.Min.Y + int(dy), fffftou, p, 1]
diff --git a/draw/impl.go b/draw/impl.go
index 9b4ca07..f144531 100644
--- a/draw/impl.go
+++ b/draw/impl.go
@@ -3706,12 +3706,14 @@
var pr float64
for ky := iy; ky < jy; ky++ {
- yWeight := yWeights[ky-iy]
- for kx := ix; kx < jx; kx++ {
- w := xWeights[kx-ix] * yWeight
- pi := (ky-src.Rect.Min.Y)*src.Stride + (kx - src.Rect.Min.X)
- pru := uint32(src.Pix[pi]) * 0x101
- pr += float64(pru) * w
+ if yWeight := yWeights[ky-iy]; yWeight != 0 {
+ for kx := ix; kx < jx; kx++ {
+ if w := xWeights[kx-ix] * yWeight; w != 0 {
+ pi := (ky-src.Rect.Min.Y)*src.Stride + (kx - src.Rect.Min.X)
+ pru := uint32(src.Pix[pi]) * 0x101
+ pr += float64(pru) * w
+ }
+ }
}
}
out := uint8(fffftou(pr) >> 8)
@@ -3803,18 +3805,20 @@
var pr, pg, pb, pa float64
for ky := iy; ky < jy; ky++ {
- yWeight := yWeights[ky-iy]
- for kx := ix; kx < jx; kx++ {
- w := xWeights[kx-ix] * yWeight
- pi := (ky-src.Rect.Min.Y)*src.Stride + (kx-src.Rect.Min.X)*4
- pau := uint32(src.Pix[pi+3]) * 0x101
- pru := uint32(src.Pix[pi+0]) * pau / 0xff
- pgu := uint32(src.Pix[pi+1]) * pau / 0xff
- pbu := uint32(src.Pix[pi+2]) * pau / 0xff
- pr += float64(pru) * w
- pg += float64(pgu) * w
- pb += float64(pbu) * w
- pa += float64(pau) * w
+ if yWeight := yWeights[ky-iy]; yWeight != 0 {
+ for kx := ix; kx < jx; kx++ {
+ if w := xWeights[kx-ix] * yWeight; w != 0 {
+ pi := (ky-src.Rect.Min.Y)*src.Stride + (kx-src.Rect.Min.X)*4
+ pau := uint32(src.Pix[pi+3]) * 0x101
+ pru := uint32(src.Pix[pi+0]) * pau / 0xff
+ pgu := uint32(src.Pix[pi+1]) * pau / 0xff
+ pbu := uint32(src.Pix[pi+2]) * pau / 0xff
+ pr += float64(pru) * w
+ pg += float64(pgu) * w
+ pb += float64(pbu) * w
+ pa += float64(pau) * w
+ }
+ }
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@@ -3905,18 +3909,20 @@
var pr, pg, pb, pa float64
for ky := iy; ky < jy; ky++ {
- yWeight := yWeights[ky-iy]
- for kx := ix; kx < jx; kx++ {
- w := xWeights[kx-ix] * yWeight
- pi := (ky-src.Rect.Min.Y)*src.Stride + (kx-src.Rect.Min.X)*4
- pru := uint32(src.Pix[pi+0]) * 0x101
- pgu := uint32(src.Pix[pi+1]) * 0x101
- pbu := uint32(src.Pix[pi+2]) * 0x101
- pau := uint32(src.Pix[pi+3]) * 0x101
- pr += float64(pru) * w
- pg += float64(pgu) * w
- pb += float64(pbu) * w
- pa += float64(pau) * w
+ if yWeight := yWeights[ky-iy]; yWeight != 0 {
+ for kx := ix; kx < jx; kx++ {
+ if w := xWeights[kx-ix] * yWeight; w != 0 {
+ pi := (ky-src.Rect.Min.Y)*src.Stride + (kx-src.Rect.Min.X)*4
+ pru := uint32(src.Pix[pi+0]) * 0x101
+ pgu := uint32(src.Pix[pi+1]) * 0x101
+ pbu := uint32(src.Pix[pi+2]) * 0x101
+ pau := uint32(src.Pix[pi+3]) * 0x101
+ pr += float64(pru) * w
+ pg += float64(pgu) * w
+ pb += float64(pbu) * w
+ pa += float64(pau) * w
+ }
+ }
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@@ -4007,41 +4013,43 @@
var pr, pg, pb float64
for ky := iy; ky < jy; ky++ {
- yWeight := yWeights[ky-iy]
- for kx := ix; kx < jx; kx++ {
- w := xWeights[kx-ix] * yWeight
- pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
- pj := (ky-src.Rect.Min.Y)*src.CStride + (kx - src.Rect.Min.X)
+ if yWeight := yWeights[ky-iy]; yWeight != 0 {
+ for kx := ix; kx < jx; kx++ {
+ if w := xWeights[kx-ix] * yWeight; w != 0 {
+ pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
+ pj := (ky-src.Rect.Min.Y)*src.CStride + (kx - src.Rect.Min.X)
- // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
- pyy1 := int(src.Y[pi])<<16 + 1<<15
- pcb1 := int(src.Cb[pj]) - 128
- pcr1 := int(src.Cr[pj]) - 128
- pr8 := (pyy1 + 91881*pcr1) >> 16
- pg8 := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 16
- pb8 := (pyy1 + 116130*pcb1) >> 16
- if pr8 < 0 {
- pr8 = 0
- } else if pr8 > 0xff {
- pr8 = 0xff
- }
- if pg8 < 0 {
- pg8 = 0
- } else if pg8 > 0xff {
- pg8 = 0xff
- }
- if pb8 < 0 {
- pb8 = 0
- } else if pb8 > 0xff {
- pb8 = 0xff
- }
+ // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
+ pyy1 := int(src.Y[pi])<<16 + 1<<15
+ pcb1 := int(src.Cb[pj]) - 128
+ pcr1 := int(src.Cr[pj]) - 128
+ pr8 := (pyy1 + 91881*pcr1) >> 16
+ pg8 := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 16
+ pb8 := (pyy1 + 116130*pcb1) >> 16
+ if pr8 < 0 {
+ pr8 = 0
+ } else if pr8 > 0xff {
+ pr8 = 0xff
+ }
+ if pg8 < 0 {
+ pg8 = 0
+ } else if pg8 > 0xff {
+ pg8 = 0xff
+ }
+ if pb8 < 0 {
+ pb8 = 0
+ } else if pb8 > 0xff {
+ pb8 = 0xff
+ }
- pru := uint32(pr8) * 0x101
- pgu := uint32(pg8) * 0x101
- pbu := uint32(pb8) * 0x101
- pr += float64(pru) * w
- pg += float64(pgu) * w
- pb += float64(pbu) * w
+ pru := uint32(pr8) * 0x101
+ pgu := uint32(pg8) * 0x101
+ pbu := uint32(pb8) * 0x101
+ pr += float64(pru) * w
+ pg += float64(pgu) * w
+ pb += float64(pbu) * w
+ }
+ }
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@@ -4132,41 +4140,43 @@
var pr, pg, pb float64
for ky := iy; ky < jy; ky++ {
- yWeight := yWeights[ky-iy]
- for kx := ix; kx < jx; kx++ {
- w := xWeights[kx-ix] * yWeight
- pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
- pj := (ky-src.Rect.Min.Y)*src.CStride + ((kx)/2 - src.Rect.Min.X/2)
+ if yWeight := yWeights[ky-iy]; yWeight != 0 {
+ for kx := ix; kx < jx; kx++ {
+ if w := xWeights[kx-ix] * yWeight; w != 0 {
+ pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
+ pj := (ky-src.Rect.Min.Y)*src.CStride + ((kx)/2 - src.Rect.Min.X/2)
- // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
- pyy1 := int(src.Y[pi])<<16 + 1<<15
- pcb1 := int(src.Cb[pj]) - 128
- pcr1 := int(src.Cr[pj]) - 128
- pr8 := (pyy1 + 91881*pcr1) >> 16
- pg8 := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 16
- pb8 := (pyy1 + 116130*pcb1) >> 16
- if pr8 < 0 {
- pr8 = 0
- } else if pr8 > 0xff {
- pr8 = 0xff
- }
- if pg8 < 0 {
- pg8 = 0
- } else if pg8 > 0xff {
- pg8 = 0xff
- }
- if pb8 < 0 {
- pb8 = 0
- } else if pb8 > 0xff {
- pb8 = 0xff
- }
+ // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
+ pyy1 := int(src.Y[pi])<<16 + 1<<15
+ pcb1 := int(src.Cb[pj]) - 128
+ pcr1 := int(src.Cr[pj]) - 128
+ pr8 := (pyy1 + 91881*pcr1) >> 16
+ pg8 := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 16
+ pb8 := (pyy1 + 116130*pcb1) >> 16
+ if pr8 < 0 {
+ pr8 = 0
+ } else if pr8 > 0xff {
+ pr8 = 0xff
+ }
+ if pg8 < 0 {
+ pg8 = 0
+ } else if pg8 > 0xff {
+ pg8 = 0xff
+ }
+ if pb8 < 0 {
+ pb8 = 0
+ } else if pb8 > 0xff {
+ pb8 = 0xff
+ }
- pru := uint32(pr8) * 0x101
- pgu := uint32(pg8) * 0x101
- pbu := uint32(pb8) * 0x101
- pr += float64(pru) * w
- pg += float64(pgu) * w
- pb += float64(pbu) * w
+ pru := uint32(pr8) * 0x101
+ pgu := uint32(pg8) * 0x101
+ pbu := uint32(pb8) * 0x101
+ pr += float64(pru) * w
+ pg += float64(pgu) * w
+ pb += float64(pbu) * w
+ }
+ }
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@@ -4257,41 +4267,43 @@
var pr, pg, pb float64
for ky := iy; ky < jy; ky++ {
- yWeight := yWeights[ky-iy]
- for kx := ix; kx < jx; kx++ {
- w := xWeights[kx-ix] * yWeight
- pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
- pj := ((ky)/2-src.Rect.Min.Y/2)*src.CStride + ((kx)/2 - src.Rect.Min.X/2)
+ if yWeight := yWeights[ky-iy]; yWeight != 0 {
+ for kx := ix; kx < jx; kx++ {
+ if w := xWeights[kx-ix] * yWeight; w != 0 {
+ pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
+ pj := ((ky)/2-src.Rect.Min.Y/2)*src.CStride + ((kx)/2 - src.Rect.Min.X/2)
- // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
- pyy1 := int(src.Y[pi])<<16 + 1<<15
- pcb1 := int(src.Cb[pj]) - 128
- pcr1 := int(src.Cr[pj]) - 128
- pr8 := (pyy1 + 91881*pcr1) >> 16
- pg8 := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 16
- pb8 := (pyy1 + 116130*pcb1) >> 16
- if pr8 < 0 {
- pr8 = 0
- } else if pr8 > 0xff {
- pr8 = 0xff
- }
- if pg8 < 0 {
- pg8 = 0
- } else if pg8 > 0xff {
- pg8 = 0xff
- }
- if pb8 < 0 {
- pb8 = 0
- } else if pb8 > 0xff {
- pb8 = 0xff
- }
+ // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
+ pyy1 := int(src.Y[pi])<<16 + 1<<15
+ pcb1 := int(src.Cb[pj]) - 128
+ pcr1 := int(src.Cr[pj]) - 128
+ pr8 := (pyy1 + 91881*pcr1) >> 16
+ pg8 := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 16
+ pb8 := (pyy1 + 116130*pcb1) >> 16
+ if pr8 < 0 {
+ pr8 = 0
+ } else if pr8 > 0xff {
+ pr8 = 0xff
+ }
+ if pg8 < 0 {
+ pg8 = 0
+ } else if pg8 > 0xff {
+ pg8 = 0xff
+ }
+ if pb8 < 0 {
+ pb8 = 0
+ } else if pb8 > 0xff {
+ pb8 = 0xff
+ }
- pru := uint32(pr8) * 0x101
- pgu := uint32(pg8) * 0x101
- pbu := uint32(pb8) * 0x101
- pr += float64(pru) * w
- pg += float64(pgu) * w
- pb += float64(pbu) * w
+ pru := uint32(pr8) * 0x101
+ pgu := uint32(pg8) * 0x101
+ pbu := uint32(pb8) * 0x101
+ pr += float64(pru) * w
+ pg += float64(pgu) * w
+ pb += float64(pbu) * w
+ }
+ }
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@@ -4382,41 +4394,43 @@
var pr, pg, pb float64
for ky := iy; ky < jy; ky++ {
- yWeight := yWeights[ky-iy]
- for kx := ix; kx < jx; kx++ {
- w := xWeights[kx-ix] * yWeight
- pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
- pj := ((ky)/2-src.Rect.Min.Y/2)*src.CStride + (kx - src.Rect.Min.X)
+ if yWeight := yWeights[ky-iy]; yWeight != 0 {
+ for kx := ix; kx < jx; kx++ {
+ if w := xWeights[kx-ix] * yWeight; w != 0 {
+ pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
+ pj := ((ky)/2-src.Rect.Min.Y/2)*src.CStride + (kx - src.Rect.Min.X)
- // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
- pyy1 := int(src.Y[pi])<<16 + 1<<15
- pcb1 := int(src.Cb[pj]) - 128
- pcr1 := int(src.Cr[pj]) - 128
- pr8 := (pyy1 + 91881*pcr1) >> 16
- pg8 := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 16
- pb8 := (pyy1 + 116130*pcb1) >> 16
- if pr8 < 0 {
- pr8 = 0
- } else if pr8 > 0xff {
- pr8 = 0xff
- }
- if pg8 < 0 {
- pg8 = 0
- } else if pg8 > 0xff {
- pg8 = 0xff
- }
- if pb8 < 0 {
- pb8 = 0
- } else if pb8 > 0xff {
- pb8 = 0xff
- }
+ // This is an inline version of image/color/ycbcr.go's func YCbCrToRGB.
+ pyy1 := int(src.Y[pi])<<16 + 1<<15
+ pcb1 := int(src.Cb[pj]) - 128
+ pcr1 := int(src.Cr[pj]) - 128
+ pr8 := (pyy1 + 91881*pcr1) >> 16
+ pg8 := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 16
+ pb8 := (pyy1 + 116130*pcb1) >> 16
+ if pr8 < 0 {
+ pr8 = 0
+ } else if pr8 > 0xff {
+ pr8 = 0xff
+ }
+ if pg8 < 0 {
+ pg8 = 0
+ } else if pg8 > 0xff {
+ pg8 = 0xff
+ }
+ if pb8 < 0 {
+ pb8 = 0
+ } else if pb8 > 0xff {
+ pb8 = 0xff
+ }
- pru := uint32(pr8) * 0x101
- pgu := uint32(pg8) * 0x101
- pbu := uint32(pb8) * 0x101
- pr += float64(pru) * w
- pg += float64(pgu) * w
- pb += float64(pbu) * w
+ pru := uint32(pr8) * 0x101
+ pgu := uint32(pg8) * 0x101
+ pbu := uint32(pb8) * 0x101
+ pr += float64(pru) * w
+ pg += float64(pgu) * w
+ pb += float64(pbu) * w
+ }
+ }
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@@ -4507,14 +4521,16 @@
var pr, pg, pb, pa float64
for ky := iy; ky < jy; ky++ {
- yWeight := yWeights[ky-iy]
- for kx := ix; kx < jx; kx++ {
- w := xWeights[kx-ix] * yWeight
- pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
- pr += float64(pru) * w
- pg += float64(pgu) * w
- pb += float64(pbu) * w
- pa += float64(pau) * w
+ if yWeight := yWeights[ky-iy]; yWeight != 0 {
+ for kx := ix; kx < jx; kx++ {
+ if w := xWeights[kx-ix] * yWeight; w != 0 {
+ pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
+ pr += float64(pru) * w
+ pg += float64(pgu) * w
+ pb += float64(pbu) * w
+ pa += float64(pau) * w
+ }
+ }
}
}
dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
@@ -4606,14 +4622,16 @@
var pr, pg, pb, pa float64
for ky := iy; ky < jy; ky++ {
- yWeight := yWeights[ky-iy]
- for kx := ix; kx < jx; kx++ {
- w := xWeights[kx-ix] * yWeight
- pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
- pr += float64(pru) * w
- pg += float64(pgu) * w
- pb += float64(pbu) * w
- pa += float64(pau) * w
+ if yWeight := yWeights[ky-iy]; yWeight != 0 {
+ for kx := ix; kx < jx; kx++ {
+ if w := xWeights[kx-ix] * yWeight; w != 0 {
+ pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
+ pr += float64(pru) * w
+ pg += float64(pgu) * w
+ pb += float64(pbu) * w
+ pa += float64(pau) * w
+ }
+ }
}
}
dstColorRGBA64.R = fffftou(pr)