draw: RGBA dst fast path for scaling.
benchmark old ns/op new ns/op delta
BenchmarkScaleLargeDownNN 6124873 3348203 -45.33%
BenchmarkScaleLargeDownAB 15608417 12626534 -19.10%
BenchmarkScaleLargeDownBL 1503354937 1482605150 -1.38%
BenchmarkScaleLargeDownCR 2987623786 2937846270 -1.67%
BenchmarkScaleDownNN 1793478 935896 -47.82%
BenchmarkScaleDownAB 4277596 3405613 -20.38%
BenchmarkScaleDownBL 29932226 29268085 -2.22%
BenchmarkScaleDownCR 57563042 57322266 -0.42%
BenchmarkScaleUpNN 89694138 46216098 -48.47%
BenchmarkScaleUpAB 212318283 169267373 -20.28%
BenchmarkScaleUpBL 120899444 80215032 -33.65%
BenchmarkScaleUpCR 181116518 140140247 -22.62%
BenchmarkScaleSrcNRGBA 13229017 10620746 -19.72%
BenchmarkScaleSrcRGBA 12993292 10155919 -21.84%
BenchmarkScaleSrcUniform 3964808 1146947 -71.07%
BenchmarkScaleSrcYCbCr 15871184 12779895 -19.48%
Change-Id: I7d92bd9f4c20692c5a52ea31019fe3852e657535
Reviewed-on: https://go-review.googlesource.com/6230
Reviewed-by: Rob Pike <r@golang.org>
diff --git a/draw/gen.go b/draw/gen.go
index 387b879..7b7bfc3 100644
--- a/draw/gen.go
+++ b/draw/gen.go
@@ -111,7 +111,11 @@
func expn(w *bytes.Buffer, code string, d *data) {
for _, line := range strings.Split(code, "\n") {
- fmt.Fprintln(w, expnLine(line, d))
+ line = expnLine(line, d)
+ if line == ";" {
+ continue
+ }
+ fmt.Fprintln(w, line)
}
}
@@ -161,12 +165,31 @@
case "switchS":
return expnSwitch("anyDType", false, suffix)
- case "dstColorDecl":
- if d.dType == "Image" || d.dType == "*image.RGBA" { // TODO: separate code for concrete types.
- return "dstColorRGBA64 := &color.RGBA64{}\n" +
+ case "preOuter":
+ switch d.dType {
+ default:
+ return ";"
+ case "Image":
+ return "" +
+ "dstColorRGBA64 := &color.RGBA64{}\n" +
"dstColor := color.Color(dstColorRGBA64)"
}
- return ";"
+
+ case "preInner":
+ switch d.dType {
+ default:
+ return ";"
+ case "*image.RGBA":
+ return "d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))"
+ }
+
+ case "preKernelInner":
+ switch d.dType {
+ default:
+ return ";"
+ case "*image.RGBA":
+ return "d := dst.PixOffset(dp.X+int(dx), dp.Y+dr.Min.Y)"
+ }
case "blend":
args, _ := splitArgs(suffix)
@@ -192,7 +215,7 @@
switch d.dType {
default:
log.Fatalf("bad dType %q", d.dType)
- case "Image", "*image.RGBA": // TODO: separate code for concrete types.
+ case "Image":
return fmt.Sprintf(""+
"dstColorRGBA64.R = uint16(%sr)\n"+
"dstColorRGBA64.G = uint16(%sg)\n"+
@@ -202,6 +225,15 @@
args[2], args[2], args[2], args[2],
args[0], args[1],
)
+ case "*image.RGBA":
+ return fmt.Sprintf(""+
+ "dst.Pix[d+0] = uint8(uint32(%sr) >> 8)\n"+
+ "dst.Pix[d+1] = uint8(uint32(%sg) >> 8)\n"+
+ "dst.Pix[d+2] = uint8(uint32(%sb) >> 8)\n"+
+ "dst.Pix[d+3] = uint8(uint32(%sa) >> 8)\n"+
+ "d += 4",
+ args[2], args[2], args[2], args[2],
+ )
}
case "outputf":
@@ -212,7 +244,7 @@
switch d.dType {
default:
log.Fatalf("bad dType %q", d.dType)
- case "Image", "*image.RGBA": // TODO: separate code for concrete types.
+ case "Image":
return fmt.Sprintf(""+
"dstColorRGBA64.R = ftou(%sr * %s)\n"+
"dstColorRGBA64.G = ftou(%sg * %s)\n"+
@@ -222,6 +254,15 @@
args[2], args[3], args[2], args[3], args[2], args[3], args[2], args[3],
args[0], args[1],
)
+ case "*image.RGBA":
+ return fmt.Sprintf(""+
+ "dst.Pix[d+0] = uint8(ftou(%sr * %s) >> 8)\n"+
+ "dst.Pix[d+1] = uint8(ftou(%sg * %s) >> 8)\n"+
+ "dst.Pix[d+2] = uint8(ftou(%sb * %s) >> 8)\n"+
+ "dst.Pix[d+3] = uint8(ftou(%sa * %s) >> 8)\n"+
+ "d += dst.Stride",
+ args[2], args[3], args[2], args[3], args[2], args[3], args[2], args[3],
+ )
}
case "srcf", "srcu":
@@ -263,6 +304,12 @@
}
return strings.TrimSpace(buf.String())
+
+ case "tweakDy":
+ if d.dType == "*image.RGBA" {
+ return strings.Replace(suffix, "for dy, s", "for _, s", 1)
+ }
+ return suffix
}
return ""
}
@@ -358,9 +405,10 @@
codeNNLeaf = `
func (z *nnScaler) scale_$dTypeRN_$sTypeRN(dst $dType, dp image.Point, dr image.Rectangle, src $sType, sp image.Point) {
- $dstColorDecl
+ $preOuter
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh))
+ $preInner
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
p := $srcu[sx, sy]
@@ -374,7 +422,7 @@
func (z *ablScaler) scale_$dTypeRN_$sTypeRN(dst $dType, dp image.Point, dr image.Rectangle, src $sType, sp image.Point) {
yscale := float64(z.sh) / float64(z.dh)
xscale := float64(z.sw) / float64(z.dw)
- $dstColorDecl
+ $preOuter
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (float64(dy)+0.5)*yscale - 0.5
sy0 := int32(sy)
@@ -388,6 +436,7 @@
sy1 = sy0
yFrac0, yFrac1 = 1, 0
}
+ $preInner
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (float64(dx)+0.5)*xscale - 0.5
sx0 := int32(sx)
@@ -457,9 +506,10 @@
codeKernelLeafY = `
func (z *kernelScaler) scaleY_$dTypeRN(dst $dType, dp image.Point, dr image.Rectangle, tmp [][4]float64) {
- $dstColorDecl
+ $preOuter
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
- for dy, s := range z.vertical.sources[dr.Min.Y:dr.Max.Y] {
+ $preKernelInner
+ $tweakDy for dy, s := range z.vertical.sources[dr.Min.Y:dr.Max.Y] {
var pr, pg, pb, pa float64
for _, c := range z.vertical.contribs[s.i:s.j] {
p := &tmp[c.coord*z.dw+dx]
diff --git a/draw/impl.go b/draw/impl.go
index 0cfee91..c284c4c 100644
--- a/draw/impl.go
+++ b/draw/impl.go
@@ -39,86 +39,81 @@
}
func (z *nnScaler) scale_RGBA_NRGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.NRGBA, sp image.Point) {
- dstColorRGBA64 := &color.RGBA64{}
- dstColor := color.Color(dstColorRGBA64)
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh))
+ d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA()
- dstColorRGBA64.R = uint16(pr)
- dstColorRGBA64.G = uint16(pg)
- dstColorRGBA64.B = uint16(pb)
- dstColorRGBA64.A = uint16(pa)
- dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor)
+ dst.Pix[d+0] = uint8(uint32(pr) >> 8)
+ dst.Pix[d+1] = uint8(uint32(pg) >> 8)
+ dst.Pix[d+2] = uint8(uint32(pb) >> 8)
+ dst.Pix[d+3] = uint8(uint32(pa) >> 8)
+ d += 4
}
}
}
func (z *nnScaler) scale_RGBA_RGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.RGBA, sp image.Point) {
- dstColorRGBA64 := &color.RGBA64{}
- dstColor := color.Color(dstColorRGBA64)
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh))
+ d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA()
- dstColorRGBA64.R = uint16(pr)
- dstColorRGBA64.G = uint16(pg)
- dstColorRGBA64.B = uint16(pb)
- dstColorRGBA64.A = uint16(pa)
- dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor)
+ dst.Pix[d+0] = uint8(uint32(pr) >> 8)
+ dst.Pix[d+1] = uint8(uint32(pg) >> 8)
+ dst.Pix[d+2] = uint8(uint32(pb) >> 8)
+ dst.Pix[d+3] = uint8(uint32(pa) >> 8)
+ d += 4
}
}
}
func (z *nnScaler) scale_RGBA_Uniform(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.Uniform, sp image.Point) {
- dstColorRGBA64 := &color.RGBA64{}
- dstColor := color.Color(dstColorRGBA64)
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh))
+ d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA()
- dstColorRGBA64.R = uint16(pr)
- dstColorRGBA64.G = uint16(pg)
- dstColorRGBA64.B = uint16(pb)
- dstColorRGBA64.A = uint16(pa)
- dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor)
+ dst.Pix[d+0] = uint8(uint32(pr) >> 8)
+ dst.Pix[d+1] = uint8(uint32(pg) >> 8)
+ dst.Pix[d+2] = uint8(uint32(pb) >> 8)
+ dst.Pix[d+3] = uint8(uint32(pa) >> 8)
+ d += 4
}
}
}
func (z *nnScaler) scale_RGBA_YCbCr(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.YCbCr, sp image.Point) {
- dstColorRGBA64 := &color.RGBA64{}
- dstColor := color.Color(dstColorRGBA64)
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh))
+ d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA()
- dstColorRGBA64.R = uint16(pr)
- dstColorRGBA64.G = uint16(pg)
- dstColorRGBA64.B = uint16(pb)
- dstColorRGBA64.A = uint16(pa)
- dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor)
+ dst.Pix[d+0] = uint8(uint32(pr) >> 8)
+ dst.Pix[d+1] = uint8(uint32(pg) >> 8)
+ dst.Pix[d+2] = uint8(uint32(pb) >> 8)
+ dst.Pix[d+3] = uint8(uint32(pa) >> 8)
+ d += 4
}
}
}
func (z *nnScaler) scale_RGBA_Image(dst *image.RGBA, dp image.Point, dr image.Rectangle, src image.Image, sp image.Point) {
- dstColorRGBA64 := &color.RGBA64{}
- dstColor := color.Color(dstColorRGBA64)
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh))
+ d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA()
- dstColorRGBA64.R = uint16(pr)
- dstColorRGBA64.G = uint16(pg)
- dstColorRGBA64.B = uint16(pb)
- dstColorRGBA64.A = uint16(pa)
- dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor)
+ dst.Pix[d+0] = uint8(uint32(pr) >> 8)
+ dst.Pix[d+1] = uint8(uint32(pg) >> 8)
+ dst.Pix[d+2] = uint8(uint32(pb) >> 8)
+ dst.Pix[d+3] = uint8(uint32(pa) >> 8)
+ d += 4
}
}
}
@@ -174,8 +169,6 @@
func (z *ablScaler) scale_RGBA_NRGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.NRGBA, sp image.Point) {
yscale := float64(z.sh) / float64(z.dh)
xscale := float64(z.sw) / float64(z.dw)
- dstColorRGBA64 := &color.RGBA64{}
- dstColor := color.Color(dstColorRGBA64)
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (float64(dy)+0.5)*yscale - 0.5
sy0 := int32(sy)
@@ -189,6 +182,7 @@
sy1 = sy0
yFrac0, yFrac1 = 1, 0
}
+ d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (float64(dx)+0.5)*xscale - 0.5
sx0 := int32(sx)
@@ -234,11 +228,11 @@
s11g = yFrac1*s10g + yFrac0*s11g
s11b = yFrac1*s10b + yFrac0*s11b
s11a = yFrac1*s10a + yFrac0*s11a
- dstColorRGBA64.R = uint16(s11r)
- dstColorRGBA64.G = uint16(s11g)
- dstColorRGBA64.B = uint16(s11b)
- dstColorRGBA64.A = uint16(s11a)
- dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor)
+ dst.Pix[d+0] = uint8(uint32(s11r) >> 8)
+ dst.Pix[d+1] = uint8(uint32(s11g) >> 8)
+ dst.Pix[d+2] = uint8(uint32(s11b) >> 8)
+ dst.Pix[d+3] = uint8(uint32(s11a) >> 8)
+ d += 4
}
}
}
@@ -246,8 +240,6 @@
func (z *ablScaler) scale_RGBA_RGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.RGBA, sp image.Point) {
yscale := float64(z.sh) / float64(z.dh)
xscale := float64(z.sw) / float64(z.dw)
- dstColorRGBA64 := &color.RGBA64{}
- dstColor := color.Color(dstColorRGBA64)
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (float64(dy)+0.5)*yscale - 0.5
sy0 := int32(sy)
@@ -261,6 +253,7 @@
sy1 = sy0
yFrac0, yFrac1 = 1, 0
}
+ d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (float64(dx)+0.5)*xscale - 0.5
sx0 := int32(sx)
@@ -306,11 +299,11 @@
s11g = yFrac1*s10g + yFrac0*s11g
s11b = yFrac1*s10b + yFrac0*s11b
s11a = yFrac1*s10a + yFrac0*s11a
- dstColorRGBA64.R = uint16(s11r)
- dstColorRGBA64.G = uint16(s11g)
- dstColorRGBA64.B = uint16(s11b)
- dstColorRGBA64.A = uint16(s11a)
- dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor)
+ dst.Pix[d+0] = uint8(uint32(s11r) >> 8)
+ dst.Pix[d+1] = uint8(uint32(s11g) >> 8)
+ dst.Pix[d+2] = uint8(uint32(s11b) >> 8)
+ dst.Pix[d+3] = uint8(uint32(s11a) >> 8)
+ d += 4
}
}
}
@@ -318,8 +311,6 @@
func (z *ablScaler) scale_RGBA_Uniform(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.Uniform, sp image.Point) {
yscale := float64(z.sh) / float64(z.dh)
xscale := float64(z.sw) / float64(z.dw)
- dstColorRGBA64 := &color.RGBA64{}
- dstColor := color.Color(dstColorRGBA64)
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (float64(dy)+0.5)*yscale - 0.5
sy0 := int32(sy)
@@ -333,6 +324,7 @@
sy1 = sy0
yFrac0, yFrac1 = 1, 0
}
+ d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (float64(dx)+0.5)*xscale - 0.5
sx0 := int32(sx)
@@ -378,11 +370,11 @@
s11g = yFrac1*s10g + yFrac0*s11g
s11b = yFrac1*s10b + yFrac0*s11b
s11a = yFrac1*s10a + yFrac0*s11a
- dstColorRGBA64.R = uint16(s11r)
- dstColorRGBA64.G = uint16(s11g)
- dstColorRGBA64.B = uint16(s11b)
- dstColorRGBA64.A = uint16(s11a)
- dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor)
+ dst.Pix[d+0] = uint8(uint32(s11r) >> 8)
+ dst.Pix[d+1] = uint8(uint32(s11g) >> 8)
+ dst.Pix[d+2] = uint8(uint32(s11b) >> 8)
+ dst.Pix[d+3] = uint8(uint32(s11a) >> 8)
+ d += 4
}
}
}
@@ -390,8 +382,6 @@
func (z *ablScaler) scale_RGBA_YCbCr(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.YCbCr, sp image.Point) {
yscale := float64(z.sh) / float64(z.dh)
xscale := float64(z.sw) / float64(z.dw)
- dstColorRGBA64 := &color.RGBA64{}
- dstColor := color.Color(dstColorRGBA64)
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (float64(dy)+0.5)*yscale - 0.5
sy0 := int32(sy)
@@ -405,6 +395,7 @@
sy1 = sy0
yFrac0, yFrac1 = 1, 0
}
+ d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (float64(dx)+0.5)*xscale - 0.5
sx0 := int32(sx)
@@ -450,11 +441,11 @@
s11g = yFrac1*s10g + yFrac0*s11g
s11b = yFrac1*s10b + yFrac0*s11b
s11a = yFrac1*s10a + yFrac0*s11a
- dstColorRGBA64.R = uint16(s11r)
- dstColorRGBA64.G = uint16(s11g)
- dstColorRGBA64.B = uint16(s11b)
- dstColorRGBA64.A = uint16(s11a)
- dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor)
+ dst.Pix[d+0] = uint8(uint32(s11r) >> 8)
+ dst.Pix[d+1] = uint8(uint32(s11g) >> 8)
+ dst.Pix[d+2] = uint8(uint32(s11b) >> 8)
+ dst.Pix[d+3] = uint8(uint32(s11a) >> 8)
+ d += 4
}
}
}
@@ -462,8 +453,6 @@
func (z *ablScaler) scale_RGBA_Image(dst *image.RGBA, dp image.Point, dr image.Rectangle, src image.Image, sp image.Point) {
yscale := float64(z.sh) / float64(z.dh)
xscale := float64(z.sw) / float64(z.dw)
- dstColorRGBA64 := &color.RGBA64{}
- dstColor := color.Color(dstColorRGBA64)
for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
sy := (float64(dy)+0.5)*yscale - 0.5
sy0 := int32(sy)
@@ -477,6 +466,7 @@
sy1 = sy0
yFrac0, yFrac1 = 1, 0
}
+ d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
sx := (float64(dx)+0.5)*xscale - 0.5
sx0 := int32(sx)
@@ -522,11 +512,11 @@
s11g = yFrac1*s10g + yFrac0*s11g
s11b = yFrac1*s10b + yFrac0*s11b
s11a = yFrac1*s10a + yFrac0*s11a
- dstColorRGBA64.R = uint16(s11r)
- dstColorRGBA64.G = uint16(s11g)
- dstColorRGBA64.B = uint16(s11b)
- dstColorRGBA64.A = uint16(s11a)
- dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor)
+ dst.Pix[d+0] = uint8(uint32(s11r) >> 8)
+ dst.Pix[d+1] = uint8(uint32(s11g) >> 8)
+ dst.Pix[d+2] = uint8(uint32(s11b) >> 8)
+ dst.Pix[d+3] = uint8(uint32(s11a) >> 8)
+ d += 4
}
}
}
@@ -753,10 +743,9 @@
}
func (z *kernelScaler) scaleY_RGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, tmp [][4]float64) {
- dstColorRGBA64 := &color.RGBA64{}
- dstColor := color.Color(dstColorRGBA64)
for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
- for dy, s := range z.vertical.sources[dr.Min.Y:dr.Max.Y] {
+ d := dst.PixOffset(dp.X+int(dx), dp.Y+dr.Min.Y)
+ for _, s := range z.vertical.sources[dr.Min.Y:dr.Max.Y] {
var pr, pg, pb, pa float64
for _, c := range z.vertical.contribs[s.i:s.j] {
p := &tmp[c.coord*z.dw+dx]
@@ -765,11 +754,11 @@
pb += p[2] * c.weight
pa += p[3] * c.weight
}
- dstColorRGBA64.R = ftou(pr * s.invTotalWeight)
- dstColorRGBA64.G = ftou(pg * s.invTotalWeight)
- dstColorRGBA64.B = ftou(pb * s.invTotalWeight)
- dstColorRGBA64.A = ftou(pa * s.invTotalWeight)
- dst.Set(dp.X+int(dx), dp.Y+int(dr.Min.Y+dy), dstColor)
+ dst.Pix[d+0] = uint8(ftou(pr*s.invTotalWeight) >> 8)
+ dst.Pix[d+1] = uint8(ftou(pg*s.invTotalWeight) >> 8)
+ dst.Pix[d+2] = uint8(ftou(pb*s.invTotalWeight) >> 8)
+ dst.Pix[d+3] = uint8(ftou(pa*s.invTotalWeight) >> 8)
+ d += dst.Stride
}
}
}