draw: generate impl.go type switches.
This just the type switches to concrete-type-specific methods.
Generating RGBA- or YCbCr-specific code instead of calling the
general-purpose At and Set methods will be follow-up changes.
benchmark old ns/op new ns/op delta
BenchmarkScaleLargeDownNN 5771212 5766160 -0.09%
BenchmarkScaleLargeDownAB 14057354 14023083 -0.24%
BenchmarkScaleLargeDownBL 1349968635 1347746478 -0.16%
BenchmarkScaleLargeDownCR 2687753329 2678909022 -0.33%
BenchmarkScaleDownNN 1802408 1808723 +0.35%
BenchmarkScaleDownAB 4352978 4284878 -1.56%
BenchmarkScaleDownBL 29760913 29522171 -0.80%
BenchmarkScaleDownCR 57666917 56334005 -2.31%
BenchmarkScaleUpNN 88886450 89235790 +0.39%
BenchmarkScaleUpAB 214687758 213797857 -0.41%
BenchmarkScaleUpBL 119588096 119846371 +0.22%
BenchmarkScaleUpCR 179784111 180662030 +0.49%
BenchmarkScaleSrcUniform 4128528 3855385 -6.62%
BenchmarkScaleSrcNRGBA 13275457 13375039 +0.75%
BenchmarkScaleSrcRGBA 13333672 12812895 -3.91%
BenchmarkScaleSrcYCbCr 14321945 14139071 -1.28%
Change-Id: I28025c3b49186a12d20ee0182a4c8ff9b036d0ae
Reviewed-on: https://go-review.googlesource.com/5712
Reviewed-by: Rob Pike <r@golang.org>
diff --git a/draw/gen.go b/draw/gen.go
index 4ba769a..8fef308 100644
--- a/draw/gen.go
+++ b/draw/gen.go
@@ -43,13 +43,39 @@
}
}
-// dsTypes are the space-separated (dst image type, src image type) pairs to
-// generate scale_DType_SType implementations for. The last element in the
-// slice should be the fallback pair "Image image.Image".
-//
-// TODO: add more concrete types: *image.RGBA, *image.YCbCr, etc.
-var dsTypes = []string{
- "Image image.Image",
+var (
+ // dsTypes are the (dst image type, src image type) pairs to generate
+ // scale_DType_SType implementations for. The last element in the slice
+ // should be the fallback pair ("Image", "image.Image").
+ //
+ // TODO: add *image.CMYK src type after Go 1.5 is released.
+ dsTypes = []struct{ dType, sType string }{
+ {"*image.RGBA", "*image.NRGBA"},
+ {"*image.RGBA", "*image.RGBA"},
+ {"*image.RGBA", "*image.Uniform"},
+ {"*image.RGBA", "*image.YCbCr"},
+ {"*image.RGBA", "image.Image"},
+ {"Image", "image.Image"},
+ }
+ dTypes, sTypes []string
+ sTypesForDType = map[string][]string{}
+)
+
+func init() {
+ dTypesSeen := map[string]bool{}
+ sTypesSeen := map[string]bool{}
+ for _, t := range dsTypes {
+ if !sTypesSeen[t.sType] {
+ sTypesSeen[t.sType] = true
+ sTypes = append(sTypes, t.sType)
+ }
+ if !dTypesSeen[t.dType] {
+ dTypesSeen[t.dType] = true
+ dTypes = append(dTypes, t.dType)
+ }
+ sTypesForDType[t.dType] = append(sTypesForDType[t.dType], t.sType)
+ }
+ sTypesForDType["anyDType"] = sTypes
}
type data struct {
@@ -60,12 +86,10 @@
func gen(w *bytes.Buffer, receiver string, code string) {
expn(w, codeRoot, &data{receiver: receiver})
-
- for _, dsType := range dsTypes {
- dType, sType := split(dsType, " ")
+ for _, t := range dsTypes {
expn(w, code, &data{
- dType: dType,
- sType: sType,
+ dType: t.dType,
+ sType: t.sType,
receiver: receiver,
})
}
@@ -73,55 +97,51 @@
func genKernel(w *bytes.Buffer) {
expn(w, codeKernelRoot, &data{})
-
- dTypesSeen := map[string]bool{}
- sTypesSeen := map[string]bool{}
- for _, dsType := range dsTypes {
- dType, sType := split(dsType, " ")
- if !sTypesSeen[sType] {
- sTypesSeen[sType] = true
- expn(w, codeKernelLeafX, &data{
- sType: sType,
- })
- }
- if !dTypesSeen[dType] {
- dTypesSeen[dType] = true
- expn(w, codeKernelLeafY, &data{
- dType: dType,
- })
- }
+ for _, sType := range sTypes {
+ expn(w, codeKernelLeafX, &data{
+ sType: sType,
+ })
+ }
+ for _, dType := range dTypes {
+ expn(w, codeKernelLeafY, &data{
+ dType: dType,
+ })
}
}
func expn(w *bytes.Buffer, code string, d *data) {
for _, line := range strings.Split(code, "\n") {
- for {
- i := strings.IndexByte(line, '$')
- if i < 0 {
- break
- }
- prefix, s := line[:i], line[i+1:]
-
- i = len(s)
- for j, c := range s {
- if !('A' <= c && c <= 'Z' || 'a' <= c && c <= 'z') {
- i = j
- break
- }
- }
- dollar, suffix := s[:i], s[i:]
-
- e := expnLine(prefix, dollar, suffix, d)
- if e == "" {
- log.Fatalf("couldn't expand %q", line)
- }
- line = e
- }
- fmt.Fprintln(w, line)
+ fmt.Fprintln(w, expnLine(line, d))
}
}
-func expnLine(prefix, dollar, suffix string, d *data) string {
+func expnLine(line string, d *data) string {
+ for {
+ i := strings.IndexByte(line, '$')
+ if i < 0 {
+ break
+ }
+ prefix, s := line[:i], line[i+1:]
+
+ i = len(s)
+ for j, c := range s {
+ if !('A' <= c && c <= 'Z' || 'a' <= c && c <= 'z') {
+ i = j
+ break
+ }
+ }
+ dollar, suffix := s[:i], s[i:]
+
+ e := expnDollar(prefix, dollar, suffix, d)
+ if e == "" {
+ log.Fatalf("couldn't expand %q", line)
+ }
+ line = e
+ }
+ return line
+}
+
+func expnDollar(prefix, dollar, suffix string, d *data) string {
switch dollar {
case "dType":
return prefix + d.dType + suffix
@@ -134,8 +154,15 @@
case "receiver":
return prefix + d.receiver + suffix
+ case "switch":
+ return expnSwitch("", true, suffix)
+ case "switchD":
+ return expnSwitch("", false, suffix)
+ case "switchS":
+ return expnSwitch("anyDType", false, suffix)
+
case "dstColorDecl":
- if d.dType == "Image" {
+ if d.dType == "Image" || d.dType == "*image.RGBA" { // TODO: separate code for concrete types.
return "dstColorRGBA64 := &color.RGBA64{}\n" +
"dstColor := color.Color(dstColorRGBA64)"
}
@@ -165,7 +192,7 @@
switch d.dType {
default:
log.Fatalf("bad dType %q", d.dType)
- case "Image":
+ case "Image", "*image.RGBA": // TODO: separate code for concrete types.
return fmt.Sprintf(""+
"dstColorRGBA64.R = uint16(%sr)\n"+
"dstColorRGBA64.G = uint16(%sg)\n"+
@@ -185,7 +212,7 @@
switch d.dType {
default:
log.Fatalf("bad dType %q", d.dType)
- case "Image":
+ case "Image", "*image.RGBA": // TODO: separate code for concrete types.
return fmt.Sprintf(""+
"dstColorRGBA64.R = ftou(%sr * %s)\n"+
"dstColorRGBA64.G = ftou(%sg * %s)\n"+
@@ -216,7 +243,7 @@
switch d.sType {
default:
log.Fatalf("bad sType %q", d.sType)
- case "image.Image":
+ case "image.Image", "*image.NRGBA", "*image.RGBA", "*image.Uniform", "*image.YCbCr": // TODO: separate code for concrete types.
fmt.Fprintf(buf, "%sr%s, %sg%s, %sb%s, %sa%s := "+
"src.At(sp.X + int(%s), sp.Y+int(%s)).RGBA()\n",
lhs, tmp, lhs, tmp, lhs, tmp, lhs, tmp, args[0], args[1])
@@ -240,6 +267,37 @@
return ""
}
+func expnSwitch(dType string, expandBoth bool, template string) string {
+ switchVar := "dst"
+ if dType != "" {
+ switchVar = "src"
+ }
+ lines := []string{fmt.Sprintf("switch %s := %s.(type) {", switchVar, switchVar)}
+
+ fallback, values := "Image", dTypes
+ if dType != "" {
+ fallback, values = "image.Image", sTypesForDType[dType]
+ }
+ for _, v := range values {
+ if v == fallback {
+ lines = append(lines, "default:")
+ } else {
+ lines = append(lines, fmt.Sprintf("case %s:", v))
+ }
+
+ if dType != "" {
+ lines = append(lines, expnLine(template, &data{dType: dType, sType: v}))
+ } else if !expandBoth {
+ lines = append(lines, expnLine(template, &data{dType: v}))
+ } else {
+ lines = append(lines, expnSwitch(v, false, template))
+ }
+ }
+
+ lines = append(lines, "}")
+ return strings.Join(lines, "\n")
+}
+
func split(s, sep string) (string, string) {
if i := strings.Index(s, sep); i >= 0 {
return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+len(sep):])
@@ -289,8 +347,7 @@
if z.dw <= 0 || z.dh <= 0 || z.sw <= 0 || z.sh <= 0 {
return
}
- // TODO: generate type switches for the different dsTypes.
- z.scale_Image_Image(dst, dp, src, sp)
+ $switch z.scale_$dTypeRN_$sTypeRN(dst, dp, src, sp)
}
`
@@ -362,9 +419,8 @@
// scaleY distributes the temporary image's rows over the destination image.
// TODO: is it worth having a sync.Pool for this temporary buffer?
tmp := make([][4]float64, z.dw*z.sh)
- // TODO: generate type switches for the different dTypes and sTypes.
- z.scaleX_Image(tmp, src, sp)
- z.scaleY_Image(dst, dp, tmp)
+ $switchS z.scaleX_$sTypeRN(tmp, src, sp)
+ $switchD z.scaleY_$dTypeRN(dst, dp, tmp)
}
`
diff --git a/draw/impl.go b/draw/impl.go
index cf164aa..2c18717 100644
--- a/draw/impl.go
+++ b/draw/impl.go
@@ -11,8 +11,111 @@
if z.dw <= 0 || z.dh <= 0 || z.sw <= 0 || z.sh <= 0 {
return
}
- // TODO: generate type switches for the different dsTypes.
- z.scale_Image_Image(dst, dp, src, sp)
+ switch dst := dst.(type) {
+ case *image.RGBA:
+ switch src := src.(type) {
+ case *image.NRGBA:
+ z.scale_RGBA_NRGBA(dst, dp, src, sp)
+ case *image.RGBA:
+ z.scale_RGBA_RGBA(dst, dp, src, sp)
+ case *image.Uniform:
+ z.scale_RGBA_Uniform(dst, dp, src, sp)
+ case *image.YCbCr:
+ z.scale_RGBA_YCbCr(dst, dp, src, sp)
+ default:
+ z.scale_RGBA_Image(dst, dp, src, sp)
+ }
+ default:
+ switch src := src.(type) {
+ default:
+ z.scale_Image_Image(dst, dp, src, sp)
+ }
+ }
+}
+
+func (z *nnScaler) scale_RGBA_NRGBA(dst *image.RGBA, dp image.Point, src *image.NRGBA, sp image.Point) {
+ dstColorRGBA64 := &color.RGBA64{}
+ dstColor := color.Color(dstColorRGBA64)
+ for dy := int32(0); dy < z.dh; dy++ {
+ sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh))
+ for dx := int32(0); dx < z.dw; dx++ {
+ sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
+ pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA()
+ dstColorRGBA64.R = uint16(pr)
+ dstColorRGBA64.G = uint16(pg)
+ dstColorRGBA64.B = uint16(pb)
+ dstColorRGBA64.A = uint16(pa)
+ dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor)
+ }
+ }
+}
+
+func (z *nnScaler) scale_RGBA_RGBA(dst *image.RGBA, dp image.Point, src *image.RGBA, sp image.Point) {
+ dstColorRGBA64 := &color.RGBA64{}
+ dstColor := color.Color(dstColorRGBA64)
+ for dy := int32(0); dy < z.dh; dy++ {
+ sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh))
+ for dx := int32(0); dx < z.dw; dx++ {
+ sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
+ pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA()
+ dstColorRGBA64.R = uint16(pr)
+ dstColorRGBA64.G = uint16(pg)
+ dstColorRGBA64.B = uint16(pb)
+ dstColorRGBA64.A = uint16(pa)
+ dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor)
+ }
+ }
+}
+
+func (z *nnScaler) scale_RGBA_Uniform(dst *image.RGBA, dp image.Point, src *image.Uniform, sp image.Point) {
+ dstColorRGBA64 := &color.RGBA64{}
+ dstColor := color.Color(dstColorRGBA64)
+ for dy := int32(0); dy < z.dh; dy++ {
+ sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh))
+ for dx := int32(0); dx < z.dw; dx++ {
+ sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
+ pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA()
+ dstColorRGBA64.R = uint16(pr)
+ dstColorRGBA64.G = uint16(pg)
+ dstColorRGBA64.B = uint16(pb)
+ dstColorRGBA64.A = uint16(pa)
+ dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor)
+ }
+ }
+}
+
+func (z *nnScaler) scale_RGBA_YCbCr(dst *image.RGBA, dp image.Point, src *image.YCbCr, sp image.Point) {
+ dstColorRGBA64 := &color.RGBA64{}
+ dstColor := color.Color(dstColorRGBA64)
+ for dy := int32(0); dy < z.dh; dy++ {
+ sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh))
+ for dx := int32(0); dx < z.dw; dx++ {
+ sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
+ pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA()
+ dstColorRGBA64.R = uint16(pr)
+ dstColorRGBA64.G = uint16(pg)
+ dstColorRGBA64.B = uint16(pb)
+ dstColorRGBA64.A = uint16(pa)
+ dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor)
+ }
+ }
+}
+
+func (z *nnScaler) scale_RGBA_Image(dst *image.RGBA, dp image.Point, src image.Image, sp image.Point) {
+ dstColorRGBA64 := &color.RGBA64{}
+ dstColor := color.Color(dstColorRGBA64)
+ for dy := int32(0); dy < z.dh; dy++ {
+ sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh))
+ for dx := int32(0); dx < z.dw; dx++ {
+ sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
+ pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA()
+ dstColorRGBA64.R = uint16(pr)
+ dstColorRGBA64.G = uint16(pg)
+ dstColorRGBA64.B = uint16(pb)
+ dstColorRGBA64.A = uint16(pa)
+ dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor)
+ }
+ }
}
func (z *nnScaler) scale_Image_Image(dst Image, dp image.Point, src image.Image, sp image.Point) {
@@ -36,8 +139,386 @@
if z.dw <= 0 || z.dh <= 0 || z.sw <= 0 || z.sh <= 0 {
return
}
- // TODO: generate type switches for the different dsTypes.
- z.scale_Image_Image(dst, dp, src, sp)
+ switch dst := dst.(type) {
+ case *image.RGBA:
+ switch src := src.(type) {
+ case *image.NRGBA:
+ z.scale_RGBA_NRGBA(dst, dp, src, sp)
+ case *image.RGBA:
+ z.scale_RGBA_RGBA(dst, dp, src, sp)
+ case *image.Uniform:
+ z.scale_RGBA_Uniform(dst, dp, src, sp)
+ case *image.YCbCr:
+ z.scale_RGBA_YCbCr(dst, dp, src, sp)
+ default:
+ z.scale_RGBA_Image(dst, dp, src, sp)
+ }
+ default:
+ switch src := src.(type) {
+ default:
+ z.scale_Image_Image(dst, dp, src, sp)
+ }
+ }
+}
+
+func (z *ablScaler) scale_RGBA_NRGBA(dst *image.RGBA, dp image.Point, src *image.NRGBA, sp image.Point) {
+ yscale := float64(z.sh) / float64(z.dh)
+ xscale := float64(z.sw) / float64(z.dw)
+ dstColorRGBA64 := &color.RGBA64{}
+ dstColor := color.Color(dstColorRGBA64)
+ for dy := int32(0); dy < z.dh; dy++ {
+ sy := (float64(dy)+0.5)*yscale - 0.5
+ sy0 := int32(sy)
+ yFrac0 := sy - float64(sy0)
+ yFrac1 := 1 - yFrac0
+ sy1 := sy0 + 1
+ if sy < 0 {
+ sy0, sy1 = 0, 0
+ yFrac0, yFrac1 = 0, 1
+ } else if sy1 >= z.sh {
+ sy1 = sy0
+ yFrac0, yFrac1 = 1, 0
+ }
+ for dx := int32(0); dx < z.dw; dx++ {
+ sx := (float64(dx)+0.5)*xscale - 0.5
+ sx0 := int32(sx)
+ xFrac0 := sx - float64(sx0)
+ xFrac1 := 1 - xFrac0
+ sx1 := sx0 + 1
+ if sx < 0 {
+ sx0, sx1 = 0, 0
+ xFrac0, xFrac1 = 0, 1
+ } else if sx1 >= z.sw {
+ sx1 = sx0
+ xFrac0, xFrac1 = 1, 0
+ }
+ s00ru, s00gu, s00bu, s00au := src.At(sp.X+int(sx0), sp.Y+int(sy0)).RGBA()
+ s00r := float64(s00ru)
+ s00g := float64(s00gu)
+ s00b := float64(s00bu)
+ s00a := float64(s00au)
+ s10ru, s10gu, s10bu, s10au := src.At(sp.X+int(sx1), sp.Y+int(sy0)).RGBA()
+ s10r := float64(s10ru)
+ s10g := float64(s10gu)
+ s10b := float64(s10bu)
+ s10a := float64(s10au)
+ s10r = xFrac1*s00r + xFrac0*s10r
+ s10g = xFrac1*s00g + xFrac0*s10g
+ s10b = xFrac1*s00b + xFrac0*s10b
+ s10a = xFrac1*s00a + xFrac0*s10a
+ s01ru, s01gu, s01bu, s01au := src.At(sp.X+int(sx0), sp.Y+int(sy1)).RGBA()
+ s01r := float64(s01ru)
+ s01g := float64(s01gu)
+ s01b := float64(s01bu)
+ s01a := float64(s01au)
+ s11ru, s11gu, s11bu, s11au := src.At(sp.X+int(sx1), sp.Y+int(sy1)).RGBA()
+ s11r := float64(s11ru)
+ s11g := float64(s11gu)
+ s11b := float64(s11bu)
+ s11a := float64(s11au)
+ s11r = xFrac1*s01r + xFrac0*s11r
+ s11g = xFrac1*s01g + xFrac0*s11g
+ s11b = xFrac1*s01b + xFrac0*s11b
+ s11a = xFrac1*s01a + xFrac0*s11a
+ s11r = yFrac1*s10r + yFrac0*s11r
+ s11g = yFrac1*s10g + yFrac0*s11g
+ s11b = yFrac1*s10b + yFrac0*s11b
+ s11a = yFrac1*s10a + yFrac0*s11a
+ dstColorRGBA64.R = uint16(s11r)
+ dstColorRGBA64.G = uint16(s11g)
+ dstColorRGBA64.B = uint16(s11b)
+ dstColorRGBA64.A = uint16(s11a)
+ dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor)
+ }
+ }
+}
+
+func (z *ablScaler) scale_RGBA_RGBA(dst *image.RGBA, dp image.Point, src *image.RGBA, sp image.Point) {
+ yscale := float64(z.sh) / float64(z.dh)
+ xscale := float64(z.sw) / float64(z.dw)
+ dstColorRGBA64 := &color.RGBA64{}
+ dstColor := color.Color(dstColorRGBA64)
+ for dy := int32(0); dy < z.dh; dy++ {
+ sy := (float64(dy)+0.5)*yscale - 0.5
+ sy0 := int32(sy)
+ yFrac0 := sy - float64(sy0)
+ yFrac1 := 1 - yFrac0
+ sy1 := sy0 + 1
+ if sy < 0 {
+ sy0, sy1 = 0, 0
+ yFrac0, yFrac1 = 0, 1
+ } else if sy1 >= z.sh {
+ sy1 = sy0
+ yFrac0, yFrac1 = 1, 0
+ }
+ for dx := int32(0); dx < z.dw; dx++ {
+ sx := (float64(dx)+0.5)*xscale - 0.5
+ sx0 := int32(sx)
+ xFrac0 := sx - float64(sx0)
+ xFrac1 := 1 - xFrac0
+ sx1 := sx0 + 1
+ if sx < 0 {
+ sx0, sx1 = 0, 0
+ xFrac0, xFrac1 = 0, 1
+ } else if sx1 >= z.sw {
+ sx1 = sx0
+ xFrac0, xFrac1 = 1, 0
+ }
+ s00ru, s00gu, s00bu, s00au := src.At(sp.X+int(sx0), sp.Y+int(sy0)).RGBA()
+ s00r := float64(s00ru)
+ s00g := float64(s00gu)
+ s00b := float64(s00bu)
+ s00a := float64(s00au)
+ s10ru, s10gu, s10bu, s10au := src.At(sp.X+int(sx1), sp.Y+int(sy0)).RGBA()
+ s10r := float64(s10ru)
+ s10g := float64(s10gu)
+ s10b := float64(s10bu)
+ s10a := float64(s10au)
+ s10r = xFrac1*s00r + xFrac0*s10r
+ s10g = xFrac1*s00g + xFrac0*s10g
+ s10b = xFrac1*s00b + xFrac0*s10b
+ s10a = xFrac1*s00a + xFrac0*s10a
+ s01ru, s01gu, s01bu, s01au := src.At(sp.X+int(sx0), sp.Y+int(sy1)).RGBA()
+ s01r := float64(s01ru)
+ s01g := float64(s01gu)
+ s01b := float64(s01bu)
+ s01a := float64(s01au)
+ s11ru, s11gu, s11bu, s11au := src.At(sp.X+int(sx1), sp.Y+int(sy1)).RGBA()
+ s11r := float64(s11ru)
+ s11g := float64(s11gu)
+ s11b := float64(s11bu)
+ s11a := float64(s11au)
+ s11r = xFrac1*s01r + xFrac0*s11r
+ s11g = xFrac1*s01g + xFrac0*s11g
+ s11b = xFrac1*s01b + xFrac0*s11b
+ s11a = xFrac1*s01a + xFrac0*s11a
+ s11r = yFrac1*s10r + yFrac0*s11r
+ s11g = yFrac1*s10g + yFrac0*s11g
+ s11b = yFrac1*s10b + yFrac0*s11b
+ s11a = yFrac1*s10a + yFrac0*s11a
+ dstColorRGBA64.R = uint16(s11r)
+ dstColorRGBA64.G = uint16(s11g)
+ dstColorRGBA64.B = uint16(s11b)
+ dstColorRGBA64.A = uint16(s11a)
+ dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor)
+ }
+ }
+}
+
+func (z *ablScaler) scale_RGBA_Uniform(dst *image.RGBA, dp image.Point, src *image.Uniform, sp image.Point) {
+ yscale := float64(z.sh) / float64(z.dh)
+ xscale := float64(z.sw) / float64(z.dw)
+ dstColorRGBA64 := &color.RGBA64{}
+ dstColor := color.Color(dstColorRGBA64)
+ for dy := int32(0); dy < z.dh; dy++ {
+ sy := (float64(dy)+0.5)*yscale - 0.5
+ sy0 := int32(sy)
+ yFrac0 := sy - float64(sy0)
+ yFrac1 := 1 - yFrac0
+ sy1 := sy0 + 1
+ if sy < 0 {
+ sy0, sy1 = 0, 0
+ yFrac0, yFrac1 = 0, 1
+ } else if sy1 >= z.sh {
+ sy1 = sy0
+ yFrac0, yFrac1 = 1, 0
+ }
+ for dx := int32(0); dx < z.dw; dx++ {
+ sx := (float64(dx)+0.5)*xscale - 0.5
+ sx0 := int32(sx)
+ xFrac0 := sx - float64(sx0)
+ xFrac1 := 1 - xFrac0
+ sx1 := sx0 + 1
+ if sx < 0 {
+ sx0, sx1 = 0, 0
+ xFrac0, xFrac1 = 0, 1
+ } else if sx1 >= z.sw {
+ sx1 = sx0
+ xFrac0, xFrac1 = 1, 0
+ }
+ s00ru, s00gu, s00bu, s00au := src.At(sp.X+int(sx0), sp.Y+int(sy0)).RGBA()
+ s00r := float64(s00ru)
+ s00g := float64(s00gu)
+ s00b := float64(s00bu)
+ s00a := float64(s00au)
+ s10ru, s10gu, s10bu, s10au := src.At(sp.X+int(sx1), sp.Y+int(sy0)).RGBA()
+ s10r := float64(s10ru)
+ s10g := float64(s10gu)
+ s10b := float64(s10bu)
+ s10a := float64(s10au)
+ s10r = xFrac1*s00r + xFrac0*s10r
+ s10g = xFrac1*s00g + xFrac0*s10g
+ s10b = xFrac1*s00b + xFrac0*s10b
+ s10a = xFrac1*s00a + xFrac0*s10a
+ s01ru, s01gu, s01bu, s01au := src.At(sp.X+int(sx0), sp.Y+int(sy1)).RGBA()
+ s01r := float64(s01ru)
+ s01g := float64(s01gu)
+ s01b := float64(s01bu)
+ s01a := float64(s01au)
+ s11ru, s11gu, s11bu, s11au := src.At(sp.X+int(sx1), sp.Y+int(sy1)).RGBA()
+ s11r := float64(s11ru)
+ s11g := float64(s11gu)
+ s11b := float64(s11bu)
+ s11a := float64(s11au)
+ s11r = xFrac1*s01r + xFrac0*s11r
+ s11g = xFrac1*s01g + xFrac0*s11g
+ s11b = xFrac1*s01b + xFrac0*s11b
+ s11a = xFrac1*s01a + xFrac0*s11a
+ s11r = yFrac1*s10r + yFrac0*s11r
+ s11g = yFrac1*s10g + yFrac0*s11g
+ s11b = yFrac1*s10b + yFrac0*s11b
+ s11a = yFrac1*s10a + yFrac0*s11a
+ dstColorRGBA64.R = uint16(s11r)
+ dstColorRGBA64.G = uint16(s11g)
+ dstColorRGBA64.B = uint16(s11b)
+ dstColorRGBA64.A = uint16(s11a)
+ dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor)
+ }
+ }
+}
+
+func (z *ablScaler) scale_RGBA_YCbCr(dst *image.RGBA, dp image.Point, src *image.YCbCr, sp image.Point) {
+ yscale := float64(z.sh) / float64(z.dh)
+ xscale := float64(z.sw) / float64(z.dw)
+ dstColorRGBA64 := &color.RGBA64{}
+ dstColor := color.Color(dstColorRGBA64)
+ for dy := int32(0); dy < z.dh; dy++ {
+ sy := (float64(dy)+0.5)*yscale - 0.5
+ sy0 := int32(sy)
+ yFrac0 := sy - float64(sy0)
+ yFrac1 := 1 - yFrac0
+ sy1 := sy0 + 1
+ if sy < 0 {
+ sy0, sy1 = 0, 0
+ yFrac0, yFrac1 = 0, 1
+ } else if sy1 >= z.sh {
+ sy1 = sy0
+ yFrac0, yFrac1 = 1, 0
+ }
+ for dx := int32(0); dx < z.dw; dx++ {
+ sx := (float64(dx)+0.5)*xscale - 0.5
+ sx0 := int32(sx)
+ xFrac0 := sx - float64(sx0)
+ xFrac1 := 1 - xFrac0
+ sx1 := sx0 + 1
+ if sx < 0 {
+ sx0, sx1 = 0, 0
+ xFrac0, xFrac1 = 0, 1
+ } else if sx1 >= z.sw {
+ sx1 = sx0
+ xFrac0, xFrac1 = 1, 0
+ }
+ s00ru, s00gu, s00bu, s00au := src.At(sp.X+int(sx0), sp.Y+int(sy0)).RGBA()
+ s00r := float64(s00ru)
+ s00g := float64(s00gu)
+ s00b := float64(s00bu)
+ s00a := float64(s00au)
+ s10ru, s10gu, s10bu, s10au := src.At(sp.X+int(sx1), sp.Y+int(sy0)).RGBA()
+ s10r := float64(s10ru)
+ s10g := float64(s10gu)
+ s10b := float64(s10bu)
+ s10a := float64(s10au)
+ s10r = xFrac1*s00r + xFrac0*s10r
+ s10g = xFrac1*s00g + xFrac0*s10g
+ s10b = xFrac1*s00b + xFrac0*s10b
+ s10a = xFrac1*s00a + xFrac0*s10a
+ s01ru, s01gu, s01bu, s01au := src.At(sp.X+int(sx0), sp.Y+int(sy1)).RGBA()
+ s01r := float64(s01ru)
+ s01g := float64(s01gu)
+ s01b := float64(s01bu)
+ s01a := float64(s01au)
+ s11ru, s11gu, s11bu, s11au := src.At(sp.X+int(sx1), sp.Y+int(sy1)).RGBA()
+ s11r := float64(s11ru)
+ s11g := float64(s11gu)
+ s11b := float64(s11bu)
+ s11a := float64(s11au)
+ s11r = xFrac1*s01r + xFrac0*s11r
+ s11g = xFrac1*s01g + xFrac0*s11g
+ s11b = xFrac1*s01b + xFrac0*s11b
+ s11a = xFrac1*s01a + xFrac0*s11a
+ s11r = yFrac1*s10r + yFrac0*s11r
+ s11g = yFrac1*s10g + yFrac0*s11g
+ s11b = yFrac1*s10b + yFrac0*s11b
+ s11a = yFrac1*s10a + yFrac0*s11a
+ dstColorRGBA64.R = uint16(s11r)
+ dstColorRGBA64.G = uint16(s11g)
+ dstColorRGBA64.B = uint16(s11b)
+ dstColorRGBA64.A = uint16(s11a)
+ dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor)
+ }
+ }
+}
+
+func (z *ablScaler) scale_RGBA_Image(dst *image.RGBA, dp image.Point, src image.Image, sp image.Point) {
+ yscale := float64(z.sh) / float64(z.dh)
+ xscale := float64(z.sw) / float64(z.dw)
+ dstColorRGBA64 := &color.RGBA64{}
+ dstColor := color.Color(dstColorRGBA64)
+ for dy := int32(0); dy < z.dh; dy++ {
+ sy := (float64(dy)+0.5)*yscale - 0.5
+ sy0 := int32(sy)
+ yFrac0 := sy - float64(sy0)
+ yFrac1 := 1 - yFrac0
+ sy1 := sy0 + 1
+ if sy < 0 {
+ sy0, sy1 = 0, 0
+ yFrac0, yFrac1 = 0, 1
+ } else if sy1 >= z.sh {
+ sy1 = sy0
+ yFrac0, yFrac1 = 1, 0
+ }
+ for dx := int32(0); dx < z.dw; dx++ {
+ sx := (float64(dx)+0.5)*xscale - 0.5
+ sx0 := int32(sx)
+ xFrac0 := sx - float64(sx0)
+ xFrac1 := 1 - xFrac0
+ sx1 := sx0 + 1
+ if sx < 0 {
+ sx0, sx1 = 0, 0
+ xFrac0, xFrac1 = 0, 1
+ } else if sx1 >= z.sw {
+ sx1 = sx0
+ xFrac0, xFrac1 = 1, 0
+ }
+ s00ru, s00gu, s00bu, s00au := src.At(sp.X+int(sx0), sp.Y+int(sy0)).RGBA()
+ s00r := float64(s00ru)
+ s00g := float64(s00gu)
+ s00b := float64(s00bu)
+ s00a := float64(s00au)
+ s10ru, s10gu, s10bu, s10au := src.At(sp.X+int(sx1), sp.Y+int(sy0)).RGBA()
+ s10r := float64(s10ru)
+ s10g := float64(s10gu)
+ s10b := float64(s10bu)
+ s10a := float64(s10au)
+ s10r = xFrac1*s00r + xFrac0*s10r
+ s10g = xFrac1*s00g + xFrac0*s10g
+ s10b = xFrac1*s00b + xFrac0*s10b
+ s10a = xFrac1*s00a + xFrac0*s10a
+ s01ru, s01gu, s01bu, s01au := src.At(sp.X+int(sx0), sp.Y+int(sy1)).RGBA()
+ s01r := float64(s01ru)
+ s01g := float64(s01gu)
+ s01b := float64(s01bu)
+ s01a := float64(s01au)
+ s11ru, s11gu, s11bu, s11au := src.At(sp.X+int(sx1), sp.Y+int(sy1)).RGBA()
+ s11r := float64(s11ru)
+ s11g := float64(s11gu)
+ s11b := float64(s11bu)
+ s11a := float64(s11au)
+ s11r = xFrac1*s01r + xFrac0*s11r
+ s11g = xFrac1*s01g + xFrac0*s11g
+ s11b = xFrac1*s01b + xFrac0*s11b
+ s11a = xFrac1*s01a + xFrac0*s11a
+ s11r = yFrac1*s10r + yFrac0*s11r
+ s11g = yFrac1*s10g + yFrac0*s11g
+ s11b = yFrac1*s10b + yFrac0*s11b
+ s11a = yFrac1*s10a + yFrac0*s11a
+ dstColorRGBA64.R = uint16(s11r)
+ dstColorRGBA64.G = uint16(s11g)
+ dstColorRGBA64.B = uint16(s11b)
+ dstColorRGBA64.A = uint16(s11a)
+ dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor)
+ }
+ }
}
func (z *ablScaler) scale_Image_Image(dst Image, dp image.Point, src image.Image, sp image.Point) {
@@ -121,9 +602,116 @@
// scaleY distributes the temporary image's rows over the destination image.
// TODO: is it worth having a sync.Pool for this temporary buffer?
tmp := make([][4]float64, z.dw*z.sh)
- // TODO: generate type switches for the different dTypes and sTypes.
- z.scaleX_Image(tmp, src, sp)
- z.scaleY_Image(dst, dp, tmp)
+ switch src := src.(type) {
+ case *image.NRGBA:
+ z.scaleX_NRGBA(tmp, src, sp)
+ case *image.RGBA:
+ z.scaleX_RGBA(tmp, src, sp)
+ case *image.Uniform:
+ z.scaleX_Uniform(tmp, src, sp)
+ case *image.YCbCr:
+ z.scaleX_YCbCr(tmp, src, sp)
+ default:
+ z.scaleX_Image(tmp, src, sp)
+ }
+ switch dst := dst.(type) {
+ case *image.RGBA:
+ z.scaleY_RGBA(dst, dp, tmp)
+ default:
+ z.scaleY_Image(dst, dp, tmp)
+ }
+}
+
+func (z *kernelScaler) scaleX_NRGBA(tmp [][4]float64, src *image.NRGBA, sp image.Point) {
+ t := 0
+ for y := int32(0); y < z.sh; y++ {
+ for _, s := range z.horizontal.sources {
+ var pr, pg, pb, pa float64
+ for _, c := range z.horizontal.contribs[s.i:s.j] {
+ pru, pgu, pbu, pau := src.At(sp.X+int(c.coord), sp.Y+int(y)).RGBA()
+ pr += float64(pru) * c.weight
+ pg += float64(pgu) * c.weight
+ pb += float64(pbu) * c.weight
+ pa += float64(pau) * c.weight
+ }
+ tmp[t] = [4]float64{
+ pr * s.invTotalWeightFFFF,
+ pg * s.invTotalWeightFFFF,
+ pb * s.invTotalWeightFFFF,
+ pa * s.invTotalWeightFFFF,
+ }
+ t++
+ }
+ }
+}
+
+func (z *kernelScaler) scaleX_RGBA(tmp [][4]float64, src *image.RGBA, sp image.Point) {
+ t := 0
+ for y := int32(0); y < z.sh; y++ {
+ for _, s := range z.horizontal.sources {
+ var pr, pg, pb, pa float64
+ for _, c := range z.horizontal.contribs[s.i:s.j] {
+ pru, pgu, pbu, pau := src.At(sp.X+int(c.coord), sp.Y+int(y)).RGBA()
+ pr += float64(pru) * c.weight
+ pg += float64(pgu) * c.weight
+ pb += float64(pbu) * c.weight
+ pa += float64(pau) * c.weight
+ }
+ tmp[t] = [4]float64{
+ pr * s.invTotalWeightFFFF,
+ pg * s.invTotalWeightFFFF,
+ pb * s.invTotalWeightFFFF,
+ pa * s.invTotalWeightFFFF,
+ }
+ t++
+ }
+ }
+}
+
+func (z *kernelScaler) scaleX_Uniform(tmp [][4]float64, src *image.Uniform, sp image.Point) {
+ t := 0
+ for y := int32(0); y < z.sh; y++ {
+ for _, s := range z.horizontal.sources {
+ var pr, pg, pb, pa float64
+ for _, c := range z.horizontal.contribs[s.i:s.j] {
+ pru, pgu, pbu, pau := src.At(sp.X+int(c.coord), sp.Y+int(y)).RGBA()
+ pr += float64(pru) * c.weight
+ pg += float64(pgu) * c.weight
+ pb += float64(pbu) * c.weight
+ pa += float64(pau) * c.weight
+ }
+ tmp[t] = [4]float64{
+ pr * s.invTotalWeightFFFF,
+ pg * s.invTotalWeightFFFF,
+ pb * s.invTotalWeightFFFF,
+ pa * s.invTotalWeightFFFF,
+ }
+ t++
+ }
+ }
+}
+
+func (z *kernelScaler) scaleX_YCbCr(tmp [][4]float64, src *image.YCbCr, sp image.Point) {
+ t := 0
+ for y := int32(0); y < z.sh; y++ {
+ for _, s := range z.horizontal.sources {
+ var pr, pg, pb, pa float64
+ for _, c := range z.horizontal.contribs[s.i:s.j] {
+ pru, pgu, pbu, pau := src.At(sp.X+int(c.coord), sp.Y+int(y)).RGBA()
+ pr += float64(pru) * c.weight
+ pg += float64(pgu) * c.weight
+ pb += float64(pbu) * c.weight
+ pa += float64(pau) * c.weight
+ }
+ tmp[t] = [4]float64{
+ pr * s.invTotalWeightFFFF,
+ pg * s.invTotalWeightFFFF,
+ pb * s.invTotalWeightFFFF,
+ pa * s.invTotalWeightFFFF,
+ }
+ t++
+ }
+ }
}
func (z *kernelScaler) scaleX_Image(tmp [][4]float64, src image.Image, sp image.Point) {
@@ -149,6 +737,28 @@
}
}
+func (z *kernelScaler) scaleY_RGBA(dst *image.RGBA, dp image.Point, tmp [][4]float64) {
+ dstColorRGBA64 := &color.RGBA64{}
+ dstColor := color.Color(dstColorRGBA64)
+ for x := int32(0); x < z.dw; x++ {
+ for y, s := range z.vertical.sources {
+ var pr, pg, pb, pa float64
+ for _, c := range z.vertical.contribs[s.i:s.j] {
+ p := &tmp[c.coord*z.dw+x]
+ pr += p[0] * c.weight
+ pg += p[1] * c.weight
+ pb += p[2] * c.weight
+ pa += p[3] * c.weight
+ }
+ dstColorRGBA64.R = ftou(pr * s.invTotalWeight)
+ dstColorRGBA64.G = ftou(pg * s.invTotalWeight)
+ dstColorRGBA64.B = ftou(pb * s.invTotalWeight)
+ dstColorRGBA64.A = ftou(pa * s.invTotalWeight)
+ dst.Set(dp.X+int(x), dp.Y+int(y), dstColor)
+ }
+ }
+}
+
func (z *kernelScaler) scaleY_Image(dst Image, dp image.Point, tmp [][4]float64) {
dstColorRGBA64 := &color.RGBA64{}
dstColor := color.Color(dstColorRGBA64)
diff --git a/draw/scale_test.go b/draw/scale_test.go
index bc99393..6d4baac 100644
--- a/draw/scale_test.go
+++ b/draw/scale_test.go
@@ -81,26 +81,52 @@
func TestScaleDown(t *testing.T) { testScale(t, 100, 100, "down", "280x360.jpeg") }
func TestScaleUp(t *testing.T) { testScale(t, 75, 100, "up", "14x18.png") }
-func benchScale(b *testing.B, largeSrc bool, w int, h int, q Interpolator) {
- var src image.Image
- if largeSrc {
- // 3072 x 2304 is over 7 million pixels at 4:3, comparable to a
- // 2015 smart-phone camera's output.
- src = image.NewYCbCr(image.Rect(0, 0, 3072, 2304), image.YCbCrSubsampleRatio420)
- } else {
- // tux.png is a 386 x 395 image.
- f, err := os.Open("../testdata/tux.png")
- if err != nil {
- b.Fatalf("Open: %v", err)
- }
- defer f.Close()
- src, err = png.Decode(f)
- if err != nil {
- b.Fatalf("Decode: %v", err)
- }
- }
+// TODO: test that scaling concrete types like *image.RGBA and *image.YCbCr
+// give the same results as scaling those images wrapped in another Image or
+// image.Image type that would skip the fast-path type switch.
+func srcNRGBA() (image.Image, error) {
+ return image.NewNRGBA(image.Rect(0, 0, 1024, 768)), nil
+}
+
+func srcRGBA() (image.Image, error) {
+ return image.NewRGBA(image.Rect(0, 0, 1024, 768)), nil
+}
+
+func srcUniform() (image.Image, error) {
+ return image.White, nil
+}
+
+func srcYCbCr() (image.Image, error) {
+ return image.NewYCbCr(image.Rect(0, 0, 1024, 768), image.YCbCrSubsampleRatio420), nil
+}
+
+func srcYCbCrLarge() (image.Image, error) {
+ // 3072 x 2304 is over 7 million pixels at 4:3, comparable to a
+ // 2015 smart-phone camera's output.
+ return image.NewYCbCr(image.Rect(0, 0, 3072, 2304), image.YCbCrSubsampleRatio420), nil
+}
+
+func srcTux() (image.Image, error) {
+ // tux.png is a 386 x 395 image.
+ f, err := os.Open("../testdata/tux.png")
+ if err != nil {
+ return nil, fmt.Errorf("Open: %v", err)
+ }
+ defer f.Close()
+ src, err := png.Decode(f)
+ if err != nil {
+ return nil, fmt.Errorf("Decode: %v", err)
+ }
+ return src, nil
+}
+
+func benchScale(b *testing.B, srcf func() (image.Image, error), w int, h int, q Interpolator) {
dst := image.NewRGBA(image.Rect(0, 0, w, h))
+ src, err := srcf()
+ if err != nil {
+ b.Fatal(err)
+ }
dr, sr := dst.Bounds(), src.Bounds()
scaler := q.NewScaler(int32(dr.Dx()), int32(dr.Dy()), int32(sr.Dx()), int32(sr.Dy()))
@@ -110,15 +136,22 @@
}
}
-func BenchmarkScaleLargeDownNN(b *testing.B) { benchScale(b, true, 200, 150, NearestNeighbor) }
-func BenchmarkScaleLargeDownAB(b *testing.B) { benchScale(b, true, 200, 150, ApproxBiLinear) }
-func BenchmarkScaleLargeDownBL(b *testing.B) { benchScale(b, true, 200, 150, BiLinear) }
-func BenchmarkScaleLargeDownCR(b *testing.B) { benchScale(b, true, 200, 150, CatmullRom) }
-func BenchmarkScaleDownNN(b *testing.B) { benchScale(b, false, 120, 80, NearestNeighbor) }
-func BenchmarkScaleDownAB(b *testing.B) { benchScale(b, false, 120, 80, ApproxBiLinear) }
-func BenchmarkScaleDownBL(b *testing.B) { benchScale(b, false, 120, 80, BiLinear) }
-func BenchmarkScaleDownCR(b *testing.B) { benchScale(b, false, 120, 80, CatmullRom) }
-func BenchmarkScaleUpNN(b *testing.B) { benchScale(b, false, 800, 600, NearestNeighbor) }
-func BenchmarkScaleUpAB(b *testing.B) { benchScale(b, false, 800, 600, ApproxBiLinear) }
-func BenchmarkScaleUpBL(b *testing.B) { benchScale(b, false, 800, 600, BiLinear) }
-func BenchmarkScaleUpCR(b *testing.B) { benchScale(b, false, 800, 600, CatmullRom) }
+func BenchmarkScaleLargeDownNN(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, NearestNeighbor) }
+func BenchmarkScaleLargeDownAB(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, ApproxBiLinear) }
+func BenchmarkScaleLargeDownBL(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, BiLinear) }
+func BenchmarkScaleLargeDownCR(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, CatmullRom) }
+
+func BenchmarkScaleDownNN(b *testing.B) { benchScale(b, srcTux, 120, 80, NearestNeighbor) }
+func BenchmarkScaleDownAB(b *testing.B) { benchScale(b, srcTux, 120, 80, ApproxBiLinear) }
+func BenchmarkScaleDownBL(b *testing.B) { benchScale(b, srcTux, 120, 80, BiLinear) }
+func BenchmarkScaleDownCR(b *testing.B) { benchScale(b, srcTux, 120, 80, CatmullRom) }
+
+func BenchmarkScaleUpNN(b *testing.B) { benchScale(b, srcTux, 800, 600, NearestNeighbor) }
+func BenchmarkScaleUpAB(b *testing.B) { benchScale(b, srcTux, 800, 600, ApproxBiLinear) }
+func BenchmarkScaleUpBL(b *testing.B) { benchScale(b, srcTux, 800, 600, BiLinear) }
+func BenchmarkScaleUpCR(b *testing.B) { benchScale(b, srcTux, 800, 600, CatmullRom) }
+
+func BenchmarkScaleSrcNRGBA(b *testing.B) { benchScale(b, srcNRGBA, 200, 150, ApproxBiLinear) }
+func BenchmarkScaleSrcRGBA(b *testing.B) { benchScale(b, srcRGBA, 200, 150, ApproxBiLinear) }
+func BenchmarkScaleSrcUniform(b *testing.B) { benchScale(b, srcUniform, 200, 150, ApproxBiLinear) }
+func BenchmarkScaleSrcYCbCr(b *testing.B) { benchScale(b, srcYCbCr, 200, 150, ApproxBiLinear) }