draw: autogenerate impl.go.

Adding fast paths for concrete dst and src image types will be follow-up
changes.

Change-Id: Ib23e3cc46d9c7a52ebd5eddfbad09177b0b421af
Reviewed-on: https://go-review.googlesource.com/5417
Reviewed-by: Rob Pike <r@golang.org>
diff --git a/draw/gen.go b/draw/gen.go
new file mode 100644
index 0000000..4ba769a
--- /dev/null
+++ b/draw/gen.go
@@ -0,0 +1,410 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+import (
+	"bytes"
+	"flag"
+	"fmt"
+	"go/format"
+	"io/ioutil"
+	"log"
+	"os"
+	"strings"
+)
+
+var debug = flag.Bool("debug", false, "")
+
+func main() {
+	flag.Parse()
+
+	w := new(bytes.Buffer)
+	w.WriteString("// generated by \"go run gen.go\". DO NOT EDIT.\n\n" +
+		"package draw\n\nimport (\n\"image\"\n\"image/color\"\n)\n")
+
+	gen(w, "nnScaler", codeNNLeaf)
+	gen(w, "ablScaler", codeABLLeaf)
+	genKernel(w)
+
+	if *debug {
+		os.Stdout.Write(w.Bytes())
+		return
+	}
+	out, err := format.Source(w.Bytes())
+	if err != nil {
+		log.Fatal(err)
+	}
+	if err := ioutil.WriteFile("impl.go", out, 0660); err != nil {
+		log.Fatal(err)
+	}
+}
+
+// dsTypes are the space-separated (dst image type, src image type) pairs to
+// generate scale_DType_SType implementations for. The last element in the
+// slice should be the fallback pair "Image image.Image".
+//
+// TODO: add more concrete types: *image.RGBA, *image.YCbCr, etc.
+var dsTypes = []string{
+	"Image image.Image",
+}
+
+type data struct {
+	dType    string
+	sType    string
+	receiver string
+}
+
+func gen(w *bytes.Buffer, receiver string, code string) {
+	expn(w, codeRoot, &data{receiver: receiver})
+
+	for _, dsType := range dsTypes {
+		dType, sType := split(dsType, " ")
+		expn(w, code, &data{
+			dType:    dType,
+			sType:    sType,
+			receiver: receiver,
+		})
+	}
+}
+
+func genKernel(w *bytes.Buffer) {
+	expn(w, codeKernelRoot, &data{})
+
+	dTypesSeen := map[string]bool{}
+	sTypesSeen := map[string]bool{}
+	for _, dsType := range dsTypes {
+		dType, sType := split(dsType, " ")
+		if !sTypesSeen[sType] {
+			sTypesSeen[sType] = true
+			expn(w, codeKernelLeafX, &data{
+				sType: sType,
+			})
+		}
+		if !dTypesSeen[dType] {
+			dTypesSeen[dType] = true
+			expn(w, codeKernelLeafY, &data{
+				dType: dType,
+			})
+		}
+	}
+}
+
+func expn(w *bytes.Buffer, code string, d *data) {
+	for _, line := range strings.Split(code, "\n") {
+		for {
+			i := strings.IndexByte(line, '$')
+			if i < 0 {
+				break
+			}
+			prefix, s := line[:i], line[i+1:]
+
+			i = len(s)
+			for j, c := range s {
+				if !('A' <= c && c <= 'Z' || 'a' <= c && c <= 'z') {
+					i = j
+					break
+				}
+			}
+			dollar, suffix := s[:i], s[i:]
+
+			e := expnLine(prefix, dollar, suffix, d)
+			if e == "" {
+				log.Fatalf("couldn't expand %q", line)
+			}
+			line = e
+		}
+		fmt.Fprintln(w, line)
+	}
+}
+
+func expnLine(prefix, dollar, suffix string, d *data) string {
+	switch dollar {
+	case "dType":
+		return prefix + d.dType + suffix
+	case "dTypeRN":
+		return prefix + relName(d.dType) + suffix
+	case "sType":
+		return prefix + d.sType + suffix
+	case "sTypeRN":
+		return prefix + relName(d.sType) + suffix
+	case "receiver":
+		return prefix + d.receiver + suffix
+
+	case "dstColorDecl":
+		if d.dType == "Image" {
+			return "dstColorRGBA64 := &color.RGBA64{}\n" +
+				"dstColor := color.Color(dstColorRGBA64)"
+		}
+		return ";"
+
+	case "blend":
+		args, _ := splitArgs(suffix)
+		if len(args) != 4 {
+			return ""
+		}
+		return fmt.Sprintf(""+
+			"%sr = %s*%sr + %s*%sr\n"+
+			"%sg = %s*%sg + %s*%sg\n"+
+			"%sb = %s*%sb + %s*%sb\n"+
+			"%sa = %s*%sa + %s*%sa",
+			args[3], args[0], args[1], args[2], args[3],
+			args[3], args[0], args[1], args[2], args[3],
+			args[3], args[0], args[1], args[2], args[3],
+			args[3], args[0], args[1], args[2], args[3],
+		)
+
+	case "outputu":
+		args, _ := splitArgs(suffix)
+		if len(args) != 3 {
+			return ""
+		}
+		switch d.dType {
+		default:
+			log.Fatalf("bad dType %q", d.dType)
+		case "Image":
+			return fmt.Sprintf(""+
+				"dstColorRGBA64.R = uint16(%sr)\n"+
+				"dstColorRGBA64.G = uint16(%sg)\n"+
+				"dstColorRGBA64.B = uint16(%sb)\n"+
+				"dstColorRGBA64.A = uint16(%sa)\n"+
+				"dst.Set(dp.X+int(%s), dp.Y+int(%s), dstColor)",
+				args[2], args[2], args[2], args[2],
+				args[0], args[1],
+			)
+		}
+
+	case "outputf":
+		args, _ := splitArgs(suffix)
+		if len(args) != 4 {
+			return ""
+		}
+		switch d.dType {
+		default:
+			log.Fatalf("bad dType %q", d.dType)
+		case "Image":
+			return fmt.Sprintf(""+
+				"dstColorRGBA64.R = ftou(%sr * %s)\n"+
+				"dstColorRGBA64.G = ftou(%sg * %s)\n"+
+				"dstColorRGBA64.B = ftou(%sb * %s)\n"+
+				"dstColorRGBA64.A = ftou(%sa * %s)\n"+
+				"dst.Set(dp.X+int(%s), dp.Y+int(%s), dstColor)",
+				args[2], args[3], args[2], args[3], args[2], args[3], args[2], args[3],
+				args[0], args[1],
+			)
+		}
+
+	case "srcf", "srcu":
+		lhs, eqOp := splitEq(prefix)
+		if lhs == "" {
+			return ""
+		}
+		args, extra := splitArgs(suffix)
+		if len(args) != 2 {
+			return ""
+		}
+
+		tmp := ""
+		if dollar == "srcf" {
+			tmp = "u"
+		}
+
+		buf := new(bytes.Buffer)
+		switch d.sType {
+		default:
+			log.Fatalf("bad sType %q", d.sType)
+		case "image.Image":
+			fmt.Fprintf(buf, "%sr%s, %sg%s, %sb%s, %sa%s := "+
+				"src.At(sp.X + int(%s), sp.Y+int(%s)).RGBA()\n",
+				lhs, tmp, lhs, tmp, lhs, tmp, lhs, tmp, args[0], args[1])
+		}
+
+		if dollar == "srcf" {
+			fmt.Fprintf(buf, ""+
+				"%sr %s float64(%sru)%s\n"+
+				"%sg %s float64(%sgu)%s\n"+
+				"%sb %s float64(%sbu)%s\n"+
+				"%sa %s float64(%sau)%s\n",
+				lhs, eqOp, lhs, extra,
+				lhs, eqOp, lhs, extra,
+				lhs, eqOp, lhs, extra,
+				lhs, eqOp, lhs, extra,
+			)
+		}
+
+		return strings.TrimSpace(buf.String())
+	}
+	return ""
+}
+
+func split(s, sep string) (string, string) {
+	if i := strings.Index(s, sep); i >= 0 {
+		return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+len(sep):])
+	}
+	return "", ""
+}
+
+func splitEq(s string) (lhs, eqOp string) {
+	s = strings.TrimSpace(s)
+	if lhs, _ = split(s, ":="); lhs != "" {
+		return lhs, ":="
+	}
+	if lhs, _ = split(s, "+="); lhs != "" {
+		return lhs, "+="
+	}
+	return "", ""
+}
+
+func splitArgs(s string) (args []string, extra string) {
+	s = strings.TrimSpace(s)
+	if s == "" || s[0] != '[' {
+		return nil, ""
+	}
+	s = s[1:]
+
+	i := strings.IndexByte(s, ']')
+	if i < 0 {
+		return nil, ""
+	}
+	args, extra = strings.Split(s[:i], ","), s[i+1:]
+	for i := range args {
+		args[i] = strings.TrimSpace(args[i])
+	}
+	return args, extra
+}
+
+func relName(s string) string {
+	if i := strings.LastIndex(s, "."); i >= 0 {
+		return s[i+1:]
+	}
+	return s
+}
+
+const (
+	codeRoot = `
+		func (z *$receiver) Scale(dst Image, dp image.Point, src image.Image, sp image.Point) {
+			if z.dw <= 0 || z.dh <= 0 || z.sw <= 0 || z.sh <= 0 {
+				return
+			}
+			// TODO: generate type switches for the different dsTypes.
+			z.scale_Image_Image(dst, dp, src, sp)
+		}
+	`
+
+	codeNNLeaf = `
+		func (z *nnScaler) scale_$dTypeRN_$sTypeRN(dst $dType, dp image.Point, src $sType, sp image.Point) {
+			$dstColorDecl
+			for dy := int32(0); dy < z.dh; dy++ {
+				sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh))
+				for dx := int32(0); dx < z.dw; dx++ {
+					sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
+					p := $srcu[sx, sy]
+					$outputu[dx, dy, p]
+				}
+			}
+		}
+	`
+
+	codeABLLeaf = `
+		func (z *ablScaler) scale_$dTypeRN_$sTypeRN(dst $dType, dp image.Point, src $sType, sp image.Point) {
+			yscale := float64(z.sh) / float64(z.dh)
+			xscale := float64(z.sw) / float64(z.dw)
+			$dstColorDecl
+			for dy := int32(0); dy < z.dh; dy++ {
+				sy := (float64(dy)+0.5)*yscale - 0.5
+				sy0 := int32(sy)
+				yFrac0 := sy - float64(sy0)
+				yFrac1 := 1 - yFrac0
+				sy1 := sy0 + 1
+				if sy < 0 {
+					sy0, sy1 = 0, 0
+					yFrac0, yFrac1 = 0, 1
+				} else if sy1 >= z.sh {
+					sy1 = sy0
+					yFrac0, yFrac1 = 1, 0
+				}
+				for dx := int32(0); dx < z.dw; dx++ {
+					sx := (float64(dx)+0.5)*xscale - 0.5
+					sx0 := int32(sx)
+					xFrac0 := sx - float64(sx0)
+					xFrac1 := 1 - xFrac0
+					sx1 := sx0 + 1
+					if sx < 0 {
+						sx0, sx1 = 0, 0
+						xFrac0, xFrac1 = 0, 1
+					} else if sx1 >= z.sw {
+						sx1 = sx0
+						xFrac0, xFrac1 = 1, 0
+					}
+					s00 := $srcf[sx0, sy0]
+					s10 := $srcf[sx1, sy0]
+					$blend[xFrac1, s00, xFrac0, s10]
+					s01 := $srcf[sx0, sy1]
+					s11 := $srcf[sx1, sy1]
+					$blend[xFrac1, s01, xFrac0, s11]
+					$blend[yFrac1, s10, yFrac0, s11]
+					$outputu[dx, dy, s11]
+				}
+			}
+		}
+	`
+
+	codeKernelRoot = `
+		func (z *kernelScaler) Scale(dst Image, dp image.Point, src image.Image, sp image.Point) {
+			if z.dw <= 0 || z.dh <= 0 || z.sw <= 0 || z.sh <= 0 {
+				return
+			}
+			// Create a temporary buffer:
+			// scaleX distributes the source image's columns over the temporary image.
+			// scaleY distributes the temporary image's rows over the destination image.
+			// TODO: is it worth having a sync.Pool for this temporary buffer?
+			tmp := make([][4]float64, z.dw*z.sh)
+			// TODO: generate type switches for the different dTypes and sTypes.
+			z.scaleX_Image(tmp, src, sp)
+			z.scaleY_Image(dst, dp, tmp)
+		}
+	`
+
+	codeKernelLeafX = `
+		func (z *kernelScaler) scaleX_$sTypeRN(tmp [][4]float64, src $sType, sp image.Point) {
+			t := 0
+			for y := int32(0); y < z.sh; y++ {
+				for _, s := range z.horizontal.sources {
+					var pr, pg, pb, pa float64
+					for _, c := range z.horizontal.contribs[s.i:s.j] {
+						p += $srcf[c.coord, y] * c.weight
+					}
+					tmp[t] = [4]float64{
+						pr * s.invTotalWeightFFFF,
+						pg * s.invTotalWeightFFFF,
+						pb * s.invTotalWeightFFFF,
+						pa * s.invTotalWeightFFFF,
+					}
+					t++
+				}
+			}
+		}
+	`
+
+	codeKernelLeafY = `
+		func (z *kernelScaler) scaleY_$dTypeRN(dst $dType, dp image.Point, tmp [][4]float64) {
+			$dstColorDecl
+			for x := int32(0); x < z.dw; x++ {
+				for y, s := range z.vertical.sources {
+					var pr, pg, pb, pa float64
+					for _, c := range z.vertical.contribs[s.i:s.j] {
+						p := &tmp[c.coord*z.dw+x]
+						pr += p[0] * c.weight
+						pg += p[1] * c.weight
+						pb += p[2] * c.weight
+						pa += p[3] * c.weight
+					}
+					$outputf[x, y, p, s.invTotalWeight]
+				}
+			}
+		}
+	`
+)
diff --git a/draw/impl.go b/draw/impl.go
index c59ba87..cf164aa 100644
--- a/draw/impl.go
+++ b/draw/impl.go
@@ -1,11 +1,7 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
+// generated by "go run gen.go". DO NOT EDIT.
 
 package draw
 
-// TODO: autogenerate this file.
-
 import (
 	"image"
 	"image/color"
@@ -15,6 +11,11 @@
 	if z.dw <= 0 || z.dh <= 0 || z.sw <= 0 || z.sh <= 0 {
 		return
 	}
+	// TODO: generate type switches for the different dsTypes.
+	z.scale_Image_Image(dst, dp, src, sp)
+}
+
+func (z *nnScaler) scale_Image_Image(dst Image, dp image.Point, src image.Image, sp image.Point) {
 	dstColorRGBA64 := &color.RGBA64{}
 	dstColor := color.Color(dstColorRGBA64)
 	for dy := int32(0); dy < z.dh; dy++ {
@@ -35,6 +36,11 @@
 	if z.dw <= 0 || z.dh <= 0 || z.sw <= 0 || z.sh <= 0 {
 		return
 	}
+	// TODO: generate type switches for the different dsTypes.
+	z.scale_Image_Image(dst, dp, src, sp)
+}
+
+func (z *ablScaler) scale_Image_Image(dst Image, dp image.Point, src image.Image, sp image.Point) {
 	yscale := float64(z.sh) / float64(z.dh)
 	xscale := float64(z.sw) / float64(z.dw)
 	dstColorRGBA64 := &color.RGBA64{}
@@ -115,51 +121,52 @@
 	// scaleY distributes the temporary image's rows over the destination image.
 	// TODO: is it worth having a sync.Pool for this temporary buffer?
 	tmp := make([][4]float64, z.dw*z.sh)
-	z.scaleX(tmp, src, sp)
-	z.scaleY(dst, dp, tmp)
+	// TODO: generate type switches for the different dTypes and sTypes.
+	z.scaleX_Image(tmp, src, sp)
+	z.scaleY_Image(dst, dp, tmp)
 }
 
-func (z *kernelScaler) scaleX(tmp [][4]float64, src image.Image, sp image.Point) {
+func (z *kernelScaler) scaleX_Image(tmp [][4]float64, src image.Image, sp image.Point) {
 	t := 0
 	for y := int32(0); y < z.sh; y++ {
 		for _, s := range z.horizontal.sources {
-			var r, g, b, a float64
+			var pr, pg, pb, pa float64
 			for _, c := range z.horizontal.contribs[s.i:s.j] {
-				rr, gg, bb, aa := src.At(sp.X+int(c.coord), sp.Y+int(y)).RGBA()
-				r += float64(rr) * c.weight
-				g += float64(gg) * c.weight
-				b += float64(bb) * c.weight
-				a += float64(aa) * c.weight
+				pru, pgu, pbu, pau := src.At(sp.X+int(c.coord), sp.Y+int(y)).RGBA()
+				pr += float64(pru) * c.weight
+				pg += float64(pgu) * c.weight
+				pb += float64(pbu) * c.weight
+				pa += float64(pau) * c.weight
 			}
 			tmp[t] = [4]float64{
-				r * s.invTotalWeightFFFF,
-				g * s.invTotalWeightFFFF,
-				b * s.invTotalWeightFFFF,
-				a * s.invTotalWeightFFFF,
+				pr * s.invTotalWeightFFFF,
+				pg * s.invTotalWeightFFFF,
+				pb * s.invTotalWeightFFFF,
+				pa * s.invTotalWeightFFFF,
 			}
 			t++
 		}
 	}
 }
 
-func (z *kernelScaler) scaleY(dst Image, dp image.Point, tmp [][4]float64) {
+func (z *kernelScaler) scaleY_Image(dst Image, dp image.Point, tmp [][4]float64) {
 	dstColorRGBA64 := &color.RGBA64{}
 	dstColor := color.Color(dstColorRGBA64)
 	for x := int32(0); x < z.dw; x++ {
 		for y, s := range z.vertical.sources {
-			var r, g, b, a float64
+			var pr, pg, pb, pa float64
 			for _, c := range z.vertical.contribs[s.i:s.j] {
 				p := &tmp[c.coord*z.dw+x]
-				r += p[0] * c.weight
-				g += p[1] * c.weight
-				b += p[2] * c.weight
-				a += p[3] * c.weight
+				pr += p[0] * c.weight
+				pg += p[1] * c.weight
+				pb += p[2] * c.weight
+				pa += p[3] * c.weight
 			}
-			dstColorRGBA64.R = ftou(r * s.invTotalWeight)
-			dstColorRGBA64.G = ftou(g * s.invTotalWeight)
-			dstColorRGBA64.B = ftou(b * s.invTotalWeight)
-			dstColorRGBA64.A = ftou(a * s.invTotalWeight)
-			dst.Set(dp.X+int(x), dp.Y+y, dstColor)
+			dstColorRGBA64.R = ftou(pr * s.invTotalWeight)
+			dstColorRGBA64.G = ftou(pg * s.invTotalWeight)
+			dstColorRGBA64.B = ftou(pb * s.invTotalWeight)
+			dstColorRGBA64.A = ftou(pa * s.invTotalWeight)
+			dst.Set(dp.X+int(x), dp.Y+int(y), dstColor)
 		}
 	}
 }
diff --git a/draw/scale.go b/draw/scale.go
index b3aaf7e..53ec801 100644
--- a/draw/scale.go
+++ b/draw/scale.go
@@ -2,6 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:generate go run gen.go
+
 package draw
 
 // TODO: should Scale and NewScaler also take an Op argument?