draw: generate code paths for image.Gray sources.

Optimizing this code path will be follow-up changes.

Change-Id: Ifece6c7d3685bb8f2d20cf8828a121c9ff346434
Reviewed-on: https://go-review.googlesource.com/6238
Reviewed-by: Rob Pike <r@golang.org>
diff --git a/draw/gen.go b/draw/gen.go
index 8ffea52..0d98f68 100644
--- a/draw/gen.go
+++ b/draw/gen.go
@@ -50,6 +50,7 @@
 	//
 	// TODO: add *image.CMYK src type after Go 1.5 is released.
 	dsTypes = []struct{ dType, sType string }{
+		{"*image.RGBA", "*image.Gray"},
 		{"*image.RGBA", "*image.NRGBA"},
 		{"*image.RGBA", "*image.RGBA"},
 		{"*image.RGBA", "*image.Uniform"},
@@ -284,7 +285,7 @@
 		switch d.sType {
 		default:
 			log.Fatalf("bad sType %q", d.sType)
-		case "image.Image", "*image.NRGBA", "*image.Uniform", "*image.YCbCr": // TODO: separate code for concrete types.
+		case "image.Image", "*image.Gray", "*image.NRGBA", "*image.Uniform", "*image.YCbCr": // TODO: separate code for concrete types.
 			fmt.Fprintf(buf, "%sr%s, %sg%s, %sb%s, %sa%s := "+
 				"src.At(sp.X + int(%s), sp.Y+int(%s)).RGBA()\n",
 				lhs, tmp, lhs, tmp, lhs, tmp, lhs, tmp,
diff --git a/draw/impl.go b/draw/impl.go
index 3c07d31..9a2fbf5 100644
--- a/draw/impl.go
+++ b/draw/impl.go
@@ -25,6 +25,8 @@
 		switch dst := dst.(type) {
 		case *image.RGBA:
 			switch src := src.(type) {
+			case *image.Gray:
+				z.scale_RGBA_Gray(dst, dp, dr, src, sp)
 			case *image.NRGBA:
 				z.scale_RGBA_NRGBA(dst, dp, dr, src, sp)
 			case *image.RGBA:
@@ -45,6 +47,22 @@
 	}
 }
 
+func (z *nnScaler) scale_RGBA_Gray(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.Gray, sp image.Point) {
+	for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
+		sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh))
+		d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
+		for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
+			sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
+			pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA()
+			dst.Pix[d+0] = uint8(uint32(pr) >> 8)
+			dst.Pix[d+1] = uint8(uint32(pg) >> 8)
+			dst.Pix[d+2] = uint8(uint32(pb) >> 8)
+			dst.Pix[d+3] = uint8(uint32(pa) >> 8)
+			d += 4
+		}
+	}
+}
+
 func (z *nnScaler) scale_RGBA_NRGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.NRGBA, sp image.Point) {
 	for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
 		sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh))
@@ -164,6 +182,8 @@
 		switch dst := dst.(type) {
 		case *image.RGBA:
 			switch src := src.(type) {
+			case *image.Gray:
+				z.scale_RGBA_Gray(dst, dp, dr, src, sp)
 			case *image.NRGBA:
 				z.scale_RGBA_NRGBA(dst, dp, dr, src, sp)
 			case *image.RGBA:
@@ -184,6 +204,77 @@
 	}
 }
 
+func (z *ablScaler) scale_RGBA_Gray(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.Gray, sp image.Point) {
+	yscale := float64(z.sh) / float64(z.dh)
+	xscale := float64(z.sw) / float64(z.dw)
+	for dy := int32(dr.Min.Y); dy < int32(dr.Max.Y); dy++ {
+		sy := (float64(dy)+0.5)*yscale - 0.5
+		sy0 := int32(sy)
+		yFrac0 := sy - float64(sy0)
+		yFrac1 := 1 - yFrac0
+		sy1 := sy0 + 1
+		if sy < 0 {
+			sy0, sy1 = 0, 0
+			yFrac0, yFrac1 = 0, 1
+		} else if sy1 >= z.sh {
+			sy1 = sy0
+			yFrac0, yFrac1 = 1, 0
+		}
+		d := dst.PixOffset(dp.X+dr.Min.X, dp.Y+int(dy))
+		for dx := int32(dr.Min.X); dx < int32(dr.Max.X); dx++ {
+			sx := (float64(dx)+0.5)*xscale - 0.5
+			sx0 := int32(sx)
+			xFrac0 := sx - float64(sx0)
+			xFrac1 := 1 - xFrac0
+			sx1 := sx0 + 1
+			if sx < 0 {
+				sx0, sx1 = 0, 0
+				xFrac0, xFrac1 = 0, 1
+			} else if sx1 >= z.sw {
+				sx1 = sx0
+				xFrac0, xFrac1 = 1, 0
+			}
+			s00ru, s00gu, s00bu, s00au := src.At(sp.X+int(sx0), sp.Y+int(sy0)).RGBA()
+			s00r := float64(s00ru)
+			s00g := float64(s00gu)
+			s00b := float64(s00bu)
+			s00a := float64(s00au)
+			s10ru, s10gu, s10bu, s10au := src.At(sp.X+int(sx1), sp.Y+int(sy0)).RGBA()
+			s10r := float64(s10ru)
+			s10g := float64(s10gu)
+			s10b := float64(s10bu)
+			s10a := float64(s10au)
+			s10r = xFrac1*s00r + xFrac0*s10r
+			s10g = xFrac1*s00g + xFrac0*s10g
+			s10b = xFrac1*s00b + xFrac0*s10b
+			s10a = xFrac1*s00a + xFrac0*s10a
+			s01ru, s01gu, s01bu, s01au := src.At(sp.X+int(sx0), sp.Y+int(sy1)).RGBA()
+			s01r := float64(s01ru)
+			s01g := float64(s01gu)
+			s01b := float64(s01bu)
+			s01a := float64(s01au)
+			s11ru, s11gu, s11bu, s11au := src.At(sp.X+int(sx1), sp.Y+int(sy1)).RGBA()
+			s11r := float64(s11ru)
+			s11g := float64(s11gu)
+			s11b := float64(s11bu)
+			s11a := float64(s11au)
+			s11r = xFrac1*s01r + xFrac0*s11r
+			s11g = xFrac1*s01g + xFrac0*s11g
+			s11b = xFrac1*s01b + xFrac0*s11b
+			s11a = xFrac1*s01a + xFrac0*s11a
+			s11r = yFrac1*s10r + yFrac0*s11r
+			s11g = yFrac1*s10g + yFrac0*s11g
+			s11b = yFrac1*s10b + yFrac0*s11b
+			s11a = yFrac1*s10a + yFrac0*s11a
+			dst.Pix[d+0] = uint8(uint32(s11r) >> 8)
+			dst.Pix[d+1] = uint8(uint32(s11g) >> 8)
+			dst.Pix[d+2] = uint8(uint32(s11b) >> 8)
+			dst.Pix[d+3] = uint8(uint32(s11a) >> 8)
+			d += 4
+		}
+	}
+}
+
 func (z *ablScaler) scale_RGBA_NRGBA(dst *image.RGBA, dp image.Point, dr image.Rectangle, src *image.NRGBA, sp image.Point) {
 	yscale := float64(z.sh) / float64(z.dh)
 	xscale := float64(z.sw) / float64(z.dw)
@@ -649,6 +740,8 @@
 		z.scaleX_Image(tmp, src, sp)
 	} else {
 		switch src := src.(type) {
+		case *image.Gray:
+			z.scaleX_Gray(tmp, src, sp)
 		case *image.NRGBA:
 			z.scaleX_NRGBA(tmp, src, sp)
 		case *image.RGBA:
@@ -670,6 +763,29 @@
 	}
 }
 
+func (z *kernelScaler) scaleX_Gray(tmp [][4]float64, src *image.Gray, sp image.Point) {
+	t := 0
+	for y := int32(0); y < z.sh; y++ {
+		for _, s := range z.horizontal.sources {
+			var pr, pg, pb, pa float64
+			for _, c := range z.horizontal.contribs[s.i:s.j] {
+				pru, pgu, pbu, pau := src.At(sp.X+int(c.coord), sp.Y+int(y)).RGBA()
+				pr += float64(pru) * c.weight
+				pg += float64(pgu) * c.weight
+				pb += float64(pbu) * c.weight
+				pa += float64(pau) * c.weight
+			}
+			tmp[t] = [4]float64{
+				pr * s.invTotalWeightFFFF,
+				pg * s.invTotalWeightFFFF,
+				pb * s.invTotalWeightFFFF,
+				pa * s.invTotalWeightFFFF,
+			}
+			t++
+		}
+	}
+}
+
 func (z *kernelScaler) scaleX_NRGBA(tmp [][4]float64, src *image.NRGBA, sp image.Point) {
 	t := 0
 	for y := int32(0); y < z.sh; y++ {
diff --git a/draw/scale_test.go b/draw/scale_test.go
index cceaad4..c7c6ef2 100644
--- a/draw/scale_test.go
+++ b/draw/scale_test.go
@@ -158,6 +158,7 @@
 		image.Rect(5, 5, 5, 5),     // Empty.
 	}
 	srcfs := []func(image.Rectangle) (image.Image, error){
+		srcGray,
 		srcNRGBA,
 		srcRGBA,
 		srcUniform,
@@ -197,6 +198,12 @@
 	}
 }
 
+func srcGray(boundsHint image.Rectangle) (image.Image, error) {
+	m := image.NewGray(boundsHint)
+	fillPix(rand.New(rand.NewSource(0)), m.Pix)
+	return m, nil
+}
+
 func srcNRGBA(boundsHint image.Rectangle) (image.Image, error) {
 	m := image.NewNRGBA(boundsHint)
 	fillPix(rand.New(rand.NewSource(1)), m.Pix)
@@ -276,6 +283,7 @@
 func BenchmarkScaleUpBL(b *testing.B) { benchScale(b, srcTux, 800, 600, BiLinear) }
 func BenchmarkScaleUpCR(b *testing.B) { benchScale(b, srcTux, 800, 600, CatmullRom) }
 
+func BenchmarkScaleSrcGray(b *testing.B)    { benchScale(b, srcGray, 200, 150, ApproxBiLinear) }
 func BenchmarkScaleSrcNRGBA(b *testing.B)   { benchScale(b, srcNRGBA, 200, 150, ApproxBiLinear) }
 func BenchmarkScaleSrcRGBA(b *testing.B)    { benchScale(b, srcRGBA, 200, 150, ApproxBiLinear) }
 func BenchmarkScaleSrcUniform(b *testing.B) { benchScale(b, srcUniform, 200, 150, ApproxBiLinear) }