draw: substitute Src for Over when the source image is completely opaque.

The two Ops are equivalent, but Src is faster.

Change-Id: I2c73a13755047c224c71fb5af786875f02681de9
Reviewed-on: https://go-review.googlesource.com/8640
Reviewed-by: Rob Pike <r@golang.org>
diff --git a/draw/gen.go b/draw/gen.go
index 658270e..1e335e0 100644
--- a/draw/gen.go
+++ b/draw/gen.go
@@ -55,6 +55,7 @@
 	// should be the fallback pair ("Image", "image.Image").
 	//
 	// TODO: add *image.CMYK src type after Go 1.5 is released.
+	// An *image.CMYK is also alwaysOpaque.
 	dsTypes = []struct{ dType, sType string }{
 		{"*image.RGBA", "*image.Gray"},
 		{"*image.RGBA", "*image.NRGBA"},
@@ -72,6 +73,13 @@
 		"440",
 	}
 	ops = []string{"Over", "Src"}
+	// alwaysOpaque are those image.Image implementations that are always
+	// opaque. For these types, Over is equivalent to the faster Src, in the
+	// absence of a source mask.
+	alwaysOpaque = map[string]bool{
+		"*image.Gray":  true,
+		"*image.YCbCr": true,
+	}
 )
 
 func init() {
@@ -104,6 +112,9 @@
 	for _, code := range codes {
 		for _, t := range dsTypes {
 			for _, op := range ops {
+				if op == "Over" && alwaysOpaque[t.sType] {
+					continue
+				}
 				expn(w, code, &data{
 					dType:    t.dType,
 					sType:    t.sType,
@@ -132,6 +143,9 @@
 	}
 	for _, t := range dsTypes {
 		for _, op := range ops {
+			if op == "Over" && alwaysOpaque[t.sType] {
+				continue
+			}
 			expn(w, codeKernelTransformLeaf, &data{
 				dType: t.dType,
 				sType: t.sType,
@@ -585,7 +599,7 @@
 
 func expnSwitch(op, dType string, expandBoth bool, template string) string {
 	if op == "" && dType != "anyDType" {
-		lines := []string{"switch opts.op() {"}
+		lines := []string{"switch op {"}
 		for _, op = range ops {
 			lines = append(lines,
 				fmt.Sprintf("case %s:", op),
@@ -607,6 +621,14 @@
 		fallback, values = "image.Image", sTypesForDType[dType]
 	}
 	for _, v := range values {
+		if dType != "" {
+			// v is the sType. Skip those always-opaque sTypes, where Over is
+			// equivalent to Src.
+			if op == "Over" && alwaysOpaque[v] {
+				continue
+			}
+		}
+
 		if v == fallback {
 			lines = append(lines, "default:")
 		} else {
@@ -745,18 +767,22 @@
 			if adr.Empty() || sr.Empty() {
 				return
 			}
+			op := opts.op()
+			if op == Over && opaque(src) { // TODO: also check that opts.SrcMask == nil.
+				op = Src
+			}
 			// sr is the source pixels. If it extends beyond the src bounds,
 			// we cannot use the type-specific fast paths, as they access
 			// the Pix fields directly without bounds checking.
 			if !sr.In(src.Bounds()) {
-				switch opts.op() {
+				switch op {
 				case Over:
 					z.scale_Image_Image_Over(dst, dr, adr, src, sr)
 				case Src:
 					z.scale_Image_Image_Src(dst, dr, adr, src, sr)
 				}
 			} else if _, ok := src.(*image.Uniform); ok {
-				Draw(dst, dr, src, src.Bounds().Min, opts.op())
+				Draw(dst, dr, src, src.Bounds().Min, op)
 			} else {
 				$switch z.scale_$dTypeRN_$sTypeRN$sratio_$op(dst, dr, adr, src, sr)
 			}
@@ -769,6 +795,10 @@
 			if adr.Empty() || sr.Empty() {
 				return
 			}
+			op := opts.op()
+			if op == Over && opaque(src) { // TODO: also check that opts.SrcMask == nil.
+				op = Src
+			}
 			d2s := invert(s2d)
 			// bias is a translation of the mapping from dst co-ordinates to
 			// src co-ordinates such that the latter temporarily have
@@ -788,14 +818,14 @@
 			// we cannot use the type-specific fast paths, as they access
 			// the Pix fields directly without bounds checking.
 			if !sr.In(src.Bounds()) {
-				switch opts.op() {
+				switch op {
 				case Over:
 					z.transform_Image_Image_Over(dst, dr, adr, &d2s, src, sr, bias)
 				case Src:
 					z.transform_Image_Image_Src(dst, dr, adr, &d2s, src, sr, bias)
 				}
 			} else if u, ok := src.(*image.Uniform); ok {
-				transform_Uniform(dst, dr, adr, &d2s, u, sr, bias, opts.op())
+				transform_Uniform(dst, dr, adr, &d2s, u, sr, bias, op)
 			} else {
 				$switch z.transform_$dTypeRN_$sTypeRN$sratio_$op(dst, dr, adr, &d2s, src, sr, bias)
 			}
@@ -961,9 +991,13 @@
 			if adr.Empty() || sr.Empty() {
 				return
 			}
+			op := opts.op()
+			if op == Over && opaque(src) { // TODO: also check that opts.SrcMask == nil.
+				op = Src
+			}
 
 			if _, ok := src.(*image.Uniform); ok && sr.In(src.Bounds()) {
-				Draw(dst, dr, src, src.Bounds().Min, opts.op())
+				Draw(dst, dr, src, src.Bounds().Min, op)
 				return
 			}
 
@@ -998,6 +1032,10 @@
 			if adr.Empty() || sr.Empty() {
 				return
 			}
+			op := opts.op()
+			if op == Over && opaque(src) { // TODO: also check that opts.SrcMask == nil.
+				op = Src
+			}
 			d2s := invert(s2d)
 			// bias is a translation of the mapping from dst co-ordinates to
 			// src co-ordinates such that the latter temporarily have
@@ -1015,7 +1053,7 @@
 			adr = adr.Sub(dr.Min)
 
 			if u, ok := src.(*image.Uniform); ok && sr.In(src.Bounds()) {
-				transform_Uniform(dst, dr, adr, &d2s, u, sr, bias, opts.op())
+				transform_Uniform(dst, dr, adr, &d2s, u, sr, bias, op)
 				return
 			}
 
@@ -1032,7 +1070,7 @@
 			// we cannot use the type-specific fast paths, as they access
 			// the Pix fields directly without bounds checking.
 			if !sr.In(src.Bounds()) {
-				switch opts.op() {
+				switch op {
 				case Over:
 					q.transform_Image_Image_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
 				case Src:
diff --git a/draw/impl.go b/draw/impl.go
index efe65d2..8fed0bd 100644
--- a/draw/impl.go
+++ b/draw/impl.go
@@ -16,43 +16,32 @@
 	if adr.Empty() || sr.Empty() {
 		return
 	}
+	op := opts.op()
+	if op == Over && opaque(src) { // TODO: also check that opts.SrcMask == nil.
+		op = Src
+	}
 	// sr is the source pixels. If it extends beyond the src bounds,
 	// we cannot use the type-specific fast paths, as they access
 	// the Pix fields directly without bounds checking.
 	if !sr.In(src.Bounds()) {
-		switch opts.op() {
+		switch op {
 		case Over:
 			z.scale_Image_Image_Over(dst, dr, adr, src, sr)
 		case Src:
 			z.scale_Image_Image_Src(dst, dr, adr, src, sr)
 		}
 	} else if _, ok := src.(*image.Uniform); ok {
-		Draw(dst, dr, src, src.Bounds().Min, opts.op())
+		Draw(dst, dr, src, src.Bounds().Min, op)
 	} else {
-		switch opts.op() {
+		switch op {
 		case Over:
 			switch dst := dst.(type) {
 			case *image.RGBA:
 				switch src := src.(type) {
-				case *image.Gray:
-					z.scale_RGBA_Gray_Over(dst, dr, adr, src, sr)
 				case *image.NRGBA:
 					z.scale_RGBA_NRGBA_Over(dst, dr, adr, src, sr)
 				case *image.RGBA:
 					z.scale_RGBA_RGBA_Over(dst, dr, adr, src, sr)
-				case *image.YCbCr:
-					switch src.SubsampleRatio {
-					default:
-						z.scale_RGBA_Image_Over(dst, dr, adr, src, sr)
-					case image.YCbCrSubsampleRatio444:
-						z.scale_RGBA_YCbCr444_Over(dst, dr, adr, src, sr)
-					case image.YCbCrSubsampleRatio422:
-						z.scale_RGBA_YCbCr422_Over(dst, dr, adr, src, sr)
-					case image.YCbCrSubsampleRatio420:
-						z.scale_RGBA_YCbCr420_Over(dst, dr, adr, src, sr)
-					case image.YCbCrSubsampleRatio440:
-						z.scale_RGBA_YCbCr440_Over(dst, dr, adr, src, sr)
-					}
 				default:
 					z.scale_RGBA_Image_Over(dst, dr, adr, src, sr)
 				}
@@ -105,6 +94,10 @@
 	if adr.Empty() || sr.Empty() {
 		return
 	}
+	op := opts.op()
+	if op == Over && opaque(src) { // TODO: also check that opts.SrcMask == nil.
+		op = Src
+	}
 	d2s := invert(s2d)
 	// bias is a translation of the mapping from dst co-ordinates to
 	// src co-ordinates such that the latter temporarily have
@@ -124,39 +117,24 @@
 	// we cannot use the type-specific fast paths, as they access
 	// the Pix fields directly without bounds checking.
 	if !sr.In(src.Bounds()) {
-		switch opts.op() {
+		switch op {
 		case Over:
 			z.transform_Image_Image_Over(dst, dr, adr, &d2s, src, sr, bias)
 		case Src:
 			z.transform_Image_Image_Src(dst, dr, adr, &d2s, src, sr, bias)
 		}
 	} else if u, ok := src.(*image.Uniform); ok {
-		transform_Uniform(dst, dr, adr, &d2s, u, sr, bias, opts.op())
+		transform_Uniform(dst, dr, adr, &d2s, u, sr, bias, op)
 	} else {
-		switch opts.op() {
+		switch op {
 		case Over:
 			switch dst := dst.(type) {
 			case *image.RGBA:
 				switch src := src.(type) {
-				case *image.Gray:
-					z.transform_RGBA_Gray_Over(dst, dr, adr, &d2s, src, sr, bias)
 				case *image.NRGBA:
 					z.transform_RGBA_NRGBA_Over(dst, dr, adr, &d2s, src, sr, bias)
 				case *image.RGBA:
 					z.transform_RGBA_RGBA_Over(dst, dr, adr, &d2s, src, sr, bias)
-				case *image.YCbCr:
-					switch src.SubsampleRatio {
-					default:
-						z.transform_RGBA_Image_Over(dst, dr, adr, &d2s, src, sr, bias)
-					case image.YCbCrSubsampleRatio444:
-						z.transform_RGBA_YCbCr444_Over(dst, dr, adr, &d2s, src, sr, bias)
-					case image.YCbCrSubsampleRatio422:
-						z.transform_RGBA_YCbCr422_Over(dst, dr, adr, &d2s, src, sr, bias)
-					case image.YCbCrSubsampleRatio420:
-						z.transform_RGBA_YCbCr420_Over(dst, dr, adr, &d2s, src, sr, bias)
-					case image.YCbCrSubsampleRatio440:
-						z.transform_RGBA_YCbCr440_Over(dst, dr, adr, &d2s, src, sr, bias)
-					}
 				default:
 					z.transform_RGBA_Image_Over(dst, dr, adr, &d2s, src, sr, bias)
 				}
@@ -202,27 +180,6 @@
 	}
 }
 
-func (nnInterpolator) scale_RGBA_Gray_Over(dst *image.RGBA, dr, adr image.Rectangle, src *image.Gray, sr image.Rectangle) {
-	dw2 := uint64(dr.Dx()) * 2
-	dh2 := uint64(dr.Dy()) * 2
-	sw := uint64(sr.Dx())
-	sh := uint64(sr.Dy())
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (2*uint64(dy) + 1) * sh / dh2
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (2*uint64(dx) + 1) * sw / dw2
-			pi := (sr.Min.Y+int(sy)-src.Rect.Min.Y)*src.Stride + (sr.Min.X + int(sx) - src.Rect.Min.X)
-			pr := uint32(src.Pix[pi]) * 0x101
-			out := uint8(uint32(pr) >> 8)
-			dst.Pix[d+0] = out
-			dst.Pix[d+1] = out
-			dst.Pix[d+2] = out
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
 func (nnInterpolator) scale_RGBA_Gray_Src(dst *image.RGBA, dr, adr image.Rectangle, src *image.Gray, sr image.Rectangle) {
 	dw2 := uint64(dr.Dx()) * 2
 	dh2 := uint64(dr.Dy()) * 2
@@ -336,178 +293,6 @@
 	}
 }
 
-func (nnInterpolator) scale_RGBA_YCbCr444_Over(dst *image.RGBA, dr, adr image.Rectangle, src *image.YCbCr, sr image.Rectangle) {
-	dw2 := uint64(dr.Dx()) * 2
-	dh2 := uint64(dr.Dy()) * 2
-	sw := uint64(sr.Dx())
-	sh := uint64(sr.Dy())
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (2*uint64(dy) + 1) * sh / dh2
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (2*uint64(dx) + 1) * sw / dw2
-			pi := (sr.Min.Y+int(sy)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx) - src.Rect.Min.X)
-			pj := (sr.Min.Y+int(sy)-src.Rect.Min.Y)*src.CStride + (sr.Min.X + int(sx) - src.Rect.Min.X)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			pyy1 := int(src.Y[pi])<<16 + 1<<15
-			pcb1 := int(src.Cb[pj]) - 128
-			pcr1 := int(src.Cr[pj]) - 128
-			pr := (pyy1 + 91881*pcr1) >> 8
-			pg := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
-			pb := (pyy1 + 116130*pcb1) >> 8
-			if pr < 0 {
-				pr = 0
-			} else if pr > 0xffff {
-				pr = 0xffff
-			}
-			if pg < 0 {
-				pg = 0
-			} else if pg > 0xffff {
-				pg = 0xffff
-			}
-			if pb < 0 {
-				pb = 0
-			} else if pb > 0xffff {
-				pb = 0xffff
-			}
-			dst.Pix[d+0] = uint8(uint32(pr) >> 8)
-			dst.Pix[d+1] = uint8(uint32(pg) >> 8)
-			dst.Pix[d+2] = uint8(uint32(pb) >> 8)
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
-func (nnInterpolator) scale_RGBA_YCbCr422_Over(dst *image.RGBA, dr, adr image.Rectangle, src *image.YCbCr, sr image.Rectangle) {
-	dw2 := uint64(dr.Dx()) * 2
-	dh2 := uint64(dr.Dy()) * 2
-	sw := uint64(sr.Dx())
-	sh := uint64(sr.Dy())
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (2*uint64(dy) + 1) * sh / dh2
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (2*uint64(dx) + 1) * sw / dw2
-			pi := (sr.Min.Y+int(sy)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx) - src.Rect.Min.X)
-			pj := (sr.Min.Y+int(sy)-src.Rect.Min.Y)*src.CStride + ((sr.Min.X+int(sx))/2 - src.Rect.Min.X/2)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			pyy1 := int(src.Y[pi])<<16 + 1<<15
-			pcb1 := int(src.Cb[pj]) - 128
-			pcr1 := int(src.Cr[pj]) - 128
-			pr := (pyy1 + 91881*pcr1) >> 8
-			pg := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
-			pb := (pyy1 + 116130*pcb1) >> 8
-			if pr < 0 {
-				pr = 0
-			} else if pr > 0xffff {
-				pr = 0xffff
-			}
-			if pg < 0 {
-				pg = 0
-			} else if pg > 0xffff {
-				pg = 0xffff
-			}
-			if pb < 0 {
-				pb = 0
-			} else if pb > 0xffff {
-				pb = 0xffff
-			}
-			dst.Pix[d+0] = uint8(uint32(pr) >> 8)
-			dst.Pix[d+1] = uint8(uint32(pg) >> 8)
-			dst.Pix[d+2] = uint8(uint32(pb) >> 8)
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
-func (nnInterpolator) scale_RGBA_YCbCr420_Over(dst *image.RGBA, dr, adr image.Rectangle, src *image.YCbCr, sr image.Rectangle) {
-	dw2 := uint64(dr.Dx()) * 2
-	dh2 := uint64(dr.Dy()) * 2
-	sw := uint64(sr.Dx())
-	sh := uint64(sr.Dy())
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (2*uint64(dy) + 1) * sh / dh2
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (2*uint64(dx) + 1) * sw / dw2
-			pi := (sr.Min.Y+int(sy)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx) - src.Rect.Min.X)
-			pj := ((sr.Min.Y+int(sy))/2-src.Rect.Min.Y/2)*src.CStride + ((sr.Min.X+int(sx))/2 - src.Rect.Min.X/2)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			pyy1 := int(src.Y[pi])<<16 + 1<<15
-			pcb1 := int(src.Cb[pj]) - 128
-			pcr1 := int(src.Cr[pj]) - 128
-			pr := (pyy1 + 91881*pcr1) >> 8
-			pg := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
-			pb := (pyy1 + 116130*pcb1) >> 8
-			if pr < 0 {
-				pr = 0
-			} else if pr > 0xffff {
-				pr = 0xffff
-			}
-			if pg < 0 {
-				pg = 0
-			} else if pg > 0xffff {
-				pg = 0xffff
-			}
-			if pb < 0 {
-				pb = 0
-			} else if pb > 0xffff {
-				pb = 0xffff
-			}
-			dst.Pix[d+0] = uint8(uint32(pr) >> 8)
-			dst.Pix[d+1] = uint8(uint32(pg) >> 8)
-			dst.Pix[d+2] = uint8(uint32(pb) >> 8)
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
-func (nnInterpolator) scale_RGBA_YCbCr440_Over(dst *image.RGBA, dr, adr image.Rectangle, src *image.YCbCr, sr image.Rectangle) {
-	dw2 := uint64(dr.Dx()) * 2
-	dh2 := uint64(dr.Dy()) * 2
-	sw := uint64(sr.Dx())
-	sh := uint64(sr.Dy())
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (2*uint64(dy) + 1) * sh / dh2
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (2*uint64(dx) + 1) * sw / dw2
-			pi := (sr.Min.Y+int(sy)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx) - src.Rect.Min.X)
-			pj := ((sr.Min.Y+int(sy))/2-src.Rect.Min.Y/2)*src.CStride + (sr.Min.X + int(sx) - src.Rect.Min.X)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			pyy1 := int(src.Y[pi])<<16 + 1<<15
-			pcb1 := int(src.Cb[pj]) - 128
-			pcr1 := int(src.Cr[pj]) - 128
-			pr := (pyy1 + 91881*pcr1) >> 8
-			pg := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
-			pb := (pyy1 + 116130*pcb1) >> 8
-			if pr < 0 {
-				pr = 0
-			} else if pr > 0xffff {
-				pr = 0xffff
-			}
-			if pg < 0 {
-				pg = 0
-			} else if pg > 0xffff {
-				pg = 0xffff
-			}
-			if pb < 0 {
-				pb = 0
-			} else if pb > 0xffff {
-				pb = 0xffff
-			}
-			dst.Pix[d+0] = uint8(uint32(pr) >> 8)
-			dst.Pix[d+1] = uint8(uint32(pg) >> 8)
-			dst.Pix[d+2] = uint8(uint32(pb) >> 8)
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
 func (nnInterpolator) scale_RGBA_YCbCr444_Src(dst *image.RGBA, dr, adr image.Rectangle, src *image.YCbCr, sr image.Rectangle) {
 	dw2 := uint64(dr.Dx()) * 2
 	dh2 := uint64(dr.Dy()) * 2
@@ -760,28 +545,6 @@
 	}
 }
 
-func (nnInterpolator) transform_RGBA_Gray_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Gray, sr image.Rectangle, bias image.Point) {
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		dyf := float64(dr.Min.Y+int(dy)) + 0.5
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
-			if !(image.Point{sx0, sy0}).In(sr) {
-				continue
-			}
-			pi := (sy0-src.Rect.Min.Y)*src.Stride + (sx0 - src.Rect.Min.X)
-			pr := uint32(src.Pix[pi]) * 0x101
-			out := uint8(uint32(pr) >> 8)
-			dst.Pix[d+0] = out
-			dst.Pix[d+1] = out
-			dst.Pix[d+2] = out
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
 func (nnInterpolator) transform_RGBA_Gray_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Gray, sr image.Rectangle, bias image.Point) {
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
@@ -900,182 +663,6 @@
 	}
 }
 
-func (nnInterpolator) transform_RGBA_YCbCr444_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point) {
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		dyf := float64(dr.Min.Y+int(dy)) + 0.5
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
-			if !(image.Point{sx0, sy0}).In(sr) {
-				continue
-			}
-			pi := (sy0-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
-			pj := (sy0-src.Rect.Min.Y)*src.CStride + (sx0 - src.Rect.Min.X)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			pyy1 := int(src.Y[pi])<<16 + 1<<15
-			pcb1 := int(src.Cb[pj]) - 128
-			pcr1 := int(src.Cr[pj]) - 128
-			pr := (pyy1 + 91881*pcr1) >> 8
-			pg := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
-			pb := (pyy1 + 116130*pcb1) >> 8
-			if pr < 0 {
-				pr = 0
-			} else if pr > 0xffff {
-				pr = 0xffff
-			}
-			if pg < 0 {
-				pg = 0
-			} else if pg > 0xffff {
-				pg = 0xffff
-			}
-			if pb < 0 {
-				pb = 0
-			} else if pb > 0xffff {
-				pb = 0xffff
-			}
-			dst.Pix[d+0] = uint8(uint32(pr) >> 8)
-			dst.Pix[d+1] = uint8(uint32(pg) >> 8)
-			dst.Pix[d+2] = uint8(uint32(pb) >> 8)
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
-func (nnInterpolator) transform_RGBA_YCbCr422_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point) {
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		dyf := float64(dr.Min.Y+int(dy)) + 0.5
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
-			if !(image.Point{sx0, sy0}).In(sr) {
-				continue
-			}
-			pi := (sy0-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
-			pj := (sy0-src.Rect.Min.Y)*src.CStride + ((sx0)/2 - src.Rect.Min.X/2)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			pyy1 := int(src.Y[pi])<<16 + 1<<15
-			pcb1 := int(src.Cb[pj]) - 128
-			pcr1 := int(src.Cr[pj]) - 128
-			pr := (pyy1 + 91881*pcr1) >> 8
-			pg := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
-			pb := (pyy1 + 116130*pcb1) >> 8
-			if pr < 0 {
-				pr = 0
-			} else if pr > 0xffff {
-				pr = 0xffff
-			}
-			if pg < 0 {
-				pg = 0
-			} else if pg > 0xffff {
-				pg = 0xffff
-			}
-			if pb < 0 {
-				pb = 0
-			} else if pb > 0xffff {
-				pb = 0xffff
-			}
-			dst.Pix[d+0] = uint8(uint32(pr) >> 8)
-			dst.Pix[d+1] = uint8(uint32(pg) >> 8)
-			dst.Pix[d+2] = uint8(uint32(pb) >> 8)
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
-func (nnInterpolator) transform_RGBA_YCbCr420_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point) {
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		dyf := float64(dr.Min.Y+int(dy)) + 0.5
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
-			if !(image.Point{sx0, sy0}).In(sr) {
-				continue
-			}
-			pi := (sy0-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
-			pj := ((sy0)/2-src.Rect.Min.Y/2)*src.CStride + ((sx0)/2 - src.Rect.Min.X/2)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			pyy1 := int(src.Y[pi])<<16 + 1<<15
-			pcb1 := int(src.Cb[pj]) - 128
-			pcr1 := int(src.Cr[pj]) - 128
-			pr := (pyy1 + 91881*pcr1) >> 8
-			pg := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
-			pb := (pyy1 + 116130*pcb1) >> 8
-			if pr < 0 {
-				pr = 0
-			} else if pr > 0xffff {
-				pr = 0xffff
-			}
-			if pg < 0 {
-				pg = 0
-			} else if pg > 0xffff {
-				pg = 0xffff
-			}
-			if pb < 0 {
-				pb = 0
-			} else if pb > 0xffff {
-				pb = 0xffff
-			}
-			dst.Pix[d+0] = uint8(uint32(pr) >> 8)
-			dst.Pix[d+1] = uint8(uint32(pg) >> 8)
-			dst.Pix[d+2] = uint8(uint32(pb) >> 8)
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
-func (nnInterpolator) transform_RGBA_YCbCr440_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point) {
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		dyf := float64(dr.Min.Y+int(dy)) + 0.5
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
-			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
-			if !(image.Point{sx0, sy0}).In(sr) {
-				continue
-			}
-			pi := (sy0-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
-			pj := ((sy0)/2-src.Rect.Min.Y/2)*src.CStride + (sx0 - src.Rect.Min.X)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			pyy1 := int(src.Y[pi])<<16 + 1<<15
-			pcb1 := int(src.Cb[pj]) - 128
-			pcr1 := int(src.Cr[pj]) - 128
-			pr := (pyy1 + 91881*pcr1) >> 8
-			pg := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
-			pb := (pyy1 + 116130*pcb1) >> 8
-			if pr < 0 {
-				pr = 0
-			} else if pr > 0xffff {
-				pr = 0xffff
-			}
-			if pg < 0 {
-				pg = 0
-			} else if pg > 0xffff {
-				pg = 0xffff
-			}
-			if pb < 0 {
-				pb = 0
-			} else if pb > 0xffff {
-				pb = 0xffff
-			}
-			dst.Pix[d+0] = uint8(uint32(pr) >> 8)
-			dst.Pix[d+1] = uint8(uint32(pg) >> 8)
-			dst.Pix[d+2] = uint8(uint32(pb) >> 8)
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
 func (nnInterpolator) transform_RGBA_YCbCr444_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point) {
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
@@ -1342,43 +929,32 @@
 	if adr.Empty() || sr.Empty() {
 		return
 	}
+	op := opts.op()
+	if op == Over && opaque(src) { // TODO: also check that opts.SrcMask == nil.
+		op = Src
+	}
 	// sr is the source pixels. If it extends beyond the src bounds,
 	// we cannot use the type-specific fast paths, as they access
 	// the Pix fields directly without bounds checking.
 	if !sr.In(src.Bounds()) {
-		switch opts.op() {
+		switch op {
 		case Over:
 			z.scale_Image_Image_Over(dst, dr, adr, src, sr)
 		case Src:
 			z.scale_Image_Image_Src(dst, dr, adr, src, sr)
 		}
 	} else if _, ok := src.(*image.Uniform); ok {
-		Draw(dst, dr, src, src.Bounds().Min, opts.op())
+		Draw(dst, dr, src, src.Bounds().Min, op)
 	} else {
-		switch opts.op() {
+		switch op {
 		case Over:
 			switch dst := dst.(type) {
 			case *image.RGBA:
 				switch src := src.(type) {
-				case *image.Gray:
-					z.scale_RGBA_Gray_Over(dst, dr, adr, src, sr)
 				case *image.NRGBA:
 					z.scale_RGBA_NRGBA_Over(dst, dr, adr, src, sr)
 				case *image.RGBA:
 					z.scale_RGBA_RGBA_Over(dst, dr, adr, src, sr)
-				case *image.YCbCr:
-					switch src.SubsampleRatio {
-					default:
-						z.scale_RGBA_Image_Over(dst, dr, adr, src, sr)
-					case image.YCbCrSubsampleRatio444:
-						z.scale_RGBA_YCbCr444_Over(dst, dr, adr, src, sr)
-					case image.YCbCrSubsampleRatio422:
-						z.scale_RGBA_YCbCr422_Over(dst, dr, adr, src, sr)
-					case image.YCbCrSubsampleRatio420:
-						z.scale_RGBA_YCbCr420_Over(dst, dr, adr, src, sr)
-					case image.YCbCrSubsampleRatio440:
-						z.scale_RGBA_YCbCr440_Over(dst, dr, adr, src, sr)
-					}
 				default:
 					z.scale_RGBA_Image_Over(dst, dr, adr, src, sr)
 				}
@@ -1431,6 +1007,10 @@
 	if adr.Empty() || sr.Empty() {
 		return
 	}
+	op := opts.op()
+	if op == Over && opaque(src) { // TODO: also check that opts.SrcMask == nil.
+		op = Src
+	}
 	d2s := invert(s2d)
 	// bias is a translation of the mapping from dst co-ordinates to
 	// src co-ordinates such that the latter temporarily have
@@ -1450,39 +1030,24 @@
 	// we cannot use the type-specific fast paths, as they access
 	// the Pix fields directly without bounds checking.
 	if !sr.In(src.Bounds()) {
-		switch opts.op() {
+		switch op {
 		case Over:
 			z.transform_Image_Image_Over(dst, dr, adr, &d2s, src, sr, bias)
 		case Src:
 			z.transform_Image_Image_Src(dst, dr, adr, &d2s, src, sr, bias)
 		}
 	} else if u, ok := src.(*image.Uniform); ok {
-		transform_Uniform(dst, dr, adr, &d2s, u, sr, bias, opts.op())
+		transform_Uniform(dst, dr, adr, &d2s, u, sr, bias, op)
 	} else {
-		switch opts.op() {
+		switch op {
 		case Over:
 			switch dst := dst.(type) {
 			case *image.RGBA:
 				switch src := src.(type) {
-				case *image.Gray:
-					z.transform_RGBA_Gray_Over(dst, dr, adr, &d2s, src, sr, bias)
 				case *image.NRGBA:
 					z.transform_RGBA_NRGBA_Over(dst, dr, adr, &d2s, src, sr, bias)
 				case *image.RGBA:
 					z.transform_RGBA_RGBA_Over(dst, dr, adr, &d2s, src, sr, bias)
-				case *image.YCbCr:
-					switch src.SubsampleRatio {
-					default:
-						z.transform_RGBA_Image_Over(dst, dr, adr, &d2s, src, sr, bias)
-					case image.YCbCrSubsampleRatio444:
-						z.transform_RGBA_YCbCr444_Over(dst, dr, adr, &d2s, src, sr, bias)
-					case image.YCbCrSubsampleRatio422:
-						z.transform_RGBA_YCbCr422_Over(dst, dr, adr, &d2s, src, sr, bias)
-					case image.YCbCrSubsampleRatio420:
-						z.transform_RGBA_YCbCr420_Over(dst, dr, adr, &d2s, src, sr, bias)
-					case image.YCbCrSubsampleRatio440:
-						z.transform_RGBA_YCbCr440_Over(dst, dr, adr, &d2s, src, sr, bias)
-					}
 				default:
 					z.transform_RGBA_Image_Over(dst, dr, adr, &d2s, src, sr, bias)
 				}
@@ -1528,69 +1093,6 @@
 	}
 }
 
-func (ablInterpolator) scale_RGBA_Gray_Over(dst *image.RGBA, dr, adr image.Rectangle, src *image.Gray, sr image.Rectangle) {
-	sw := int32(sr.Dx())
-	sh := int32(sr.Dy())
-	yscale := float64(sh) / float64(dr.Dy())
-	xscale := float64(sw) / float64(dr.Dx())
-	swMinus1, shMinus1 := sw-1, sh-1
-
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (float64(dy)+0.5)*yscale - 0.5
-		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
-		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
-		// sx, below.
-		sy0 := int32(sy)
-		yFrac0 := sy - float64(sy0)
-		yFrac1 := 1 - yFrac0
-		sy1 := sy0 + 1
-		if sy < 0 {
-			sy0, sy1 = 0, 0
-			yFrac0, yFrac1 = 0, 1
-		} else if sy1 > shMinus1 {
-			sy0, sy1 = shMinus1, shMinus1
-			yFrac0, yFrac1 = 1, 0
-		}
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (float64(dx)+0.5)*xscale - 0.5
-			sx0 := int32(sx)
-			xFrac0 := sx - float64(sx0)
-			xFrac1 := 1 - xFrac0
-			sx1 := sx0 + 1
-			if sx < 0 {
-				sx0, sx1 = 0, 0
-				xFrac0, xFrac1 = 0, 1
-			} else if sx1 > swMinus1 {
-				sx0, sx1 = swMinus1, swMinus1
-				xFrac0, xFrac1 = 1, 0
-			}
-
-			s00i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.Stride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
-			s00ru := uint32(src.Pix[s00i]) * 0x101
-			s00r := float64(s00ru)
-			s10i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.Stride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
-			s10ru := uint32(src.Pix[s10i]) * 0x101
-			s10r := float64(s10ru)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
-			s01ru := uint32(src.Pix[s01i]) * 0x101
-			s01r := float64(s01ru)
-			s11i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
-			s11ru := uint32(src.Pix[s11i]) * 0x101
-			s11r := float64(s11ru)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11r = yFrac1*s10r + yFrac0*s11r
-			out := uint8(uint32(s11r) >> 8)
-			dst.Pix[d+0] = out
-			dst.Pix[d+1] = out
-			dst.Pix[d+2] = out
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
 func (ablInterpolator) scale_RGBA_Gray_Src(dst *image.RGBA, dr, adr image.Rectangle, src *image.Gray, sr image.Rectangle) {
 	sw := int32(sr.Dx())
 	sh := int32(sr.Dy())
@@ -2034,694 +1536,6 @@
 	}
 }
 
-func (ablInterpolator) scale_RGBA_YCbCr444_Over(dst *image.RGBA, dr, adr image.Rectangle, src *image.YCbCr, sr image.Rectangle) {
-	sw := int32(sr.Dx())
-	sh := int32(sr.Dy())
-	yscale := float64(sh) / float64(dr.Dy())
-	xscale := float64(sw) / float64(dr.Dx())
-	swMinus1, shMinus1 := sw-1, sh-1
-
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (float64(dy)+0.5)*yscale - 0.5
-		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
-		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
-		// sx, below.
-		sy0 := int32(sy)
-		yFrac0 := sy - float64(sy0)
-		yFrac1 := 1 - yFrac0
-		sy1 := sy0 + 1
-		if sy < 0 {
-			sy0, sy1 = 0, 0
-			yFrac0, yFrac1 = 0, 1
-		} else if sy1 > shMinus1 {
-			sy0, sy1 = shMinus1, shMinus1
-			yFrac0, yFrac1 = 1, 0
-		}
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (float64(dx)+0.5)*xscale - 0.5
-			sx0 := int32(sx)
-			xFrac0 := sx - float64(sx0)
-			xFrac1 := 1 - xFrac0
-			sx1 := sx0 + 1
-			if sx < 0 {
-				sx0, sx1 = 0, 0
-				xFrac0, xFrac1 = 0, 1
-			} else if sx1 > swMinus1 {
-				sx0, sx1 = swMinus1, swMinus1
-				xFrac0, xFrac1 = 1, 0
-			}
-
-			s00i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
-			s00j := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.CStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s00yy1 := int(src.Y[s00i])<<16 + 1<<15
-			s00cb1 := int(src.Cb[s00j]) - 128
-			s00cr1 := int(src.Cr[s00j]) - 128
-			s00ru := (s00yy1 + 91881*s00cr1) >> 8
-			s00gu := (s00yy1 - 22554*s00cb1 - 46802*s00cr1) >> 8
-			s00bu := (s00yy1 + 116130*s00cb1) >> 8
-			if s00ru < 0 {
-				s00ru = 0
-			} else if s00ru > 0xffff {
-				s00ru = 0xffff
-			}
-			if s00gu < 0 {
-				s00gu = 0
-			} else if s00gu > 0xffff {
-				s00gu = 0xffff
-			}
-			if s00bu < 0 {
-				s00bu = 0
-			} else if s00bu > 0xffff {
-				s00bu = 0xffff
-			}
-
-			s00r := float64(s00ru)
-			s00g := float64(s00gu)
-			s00b := float64(s00bu)
-			s10i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
-			s10j := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.CStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s10yy1 := int(src.Y[s10i])<<16 + 1<<15
-			s10cb1 := int(src.Cb[s10j]) - 128
-			s10cr1 := int(src.Cr[s10j]) - 128
-			s10ru := (s10yy1 + 91881*s10cr1) >> 8
-			s10gu := (s10yy1 - 22554*s10cb1 - 46802*s10cr1) >> 8
-			s10bu := (s10yy1 + 116130*s10cb1) >> 8
-			if s10ru < 0 {
-				s10ru = 0
-			} else if s10ru > 0xffff {
-				s10ru = 0xffff
-			}
-			if s10gu < 0 {
-				s10gu = 0
-			} else if s10gu > 0xffff {
-				s10gu = 0xffff
-			}
-			if s10bu < 0 {
-				s10bu = 0
-			} else if s10bu > 0xffff {
-				s10bu = 0xffff
-			}
-
-			s10r := float64(s10ru)
-			s10g := float64(s10gu)
-			s10b := float64(s10bu)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
-			s01j := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.CStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s01yy1 := int(src.Y[s01i])<<16 + 1<<15
-			s01cb1 := int(src.Cb[s01j]) - 128
-			s01cr1 := int(src.Cr[s01j]) - 128
-			s01ru := (s01yy1 + 91881*s01cr1) >> 8
-			s01gu := (s01yy1 - 22554*s01cb1 - 46802*s01cr1) >> 8
-			s01bu := (s01yy1 + 116130*s01cb1) >> 8
-			if s01ru < 0 {
-				s01ru = 0
-			} else if s01ru > 0xffff {
-				s01ru = 0xffff
-			}
-			if s01gu < 0 {
-				s01gu = 0
-			} else if s01gu > 0xffff {
-				s01gu = 0xffff
-			}
-			if s01bu < 0 {
-				s01bu = 0
-			} else if s01bu > 0xffff {
-				s01bu = 0xffff
-			}
-
-			s01r := float64(s01ru)
-			s01g := float64(s01gu)
-			s01b := float64(s01bu)
-			s11i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
-			s11j := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.CStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s11yy1 := int(src.Y[s11i])<<16 + 1<<15
-			s11cb1 := int(src.Cb[s11j]) - 128
-			s11cr1 := int(src.Cr[s11j]) - 128
-			s11ru := (s11yy1 + 91881*s11cr1) >> 8
-			s11gu := (s11yy1 - 22554*s11cb1 - 46802*s11cr1) >> 8
-			s11bu := (s11yy1 + 116130*s11cb1) >> 8
-			if s11ru < 0 {
-				s11ru = 0
-			} else if s11ru > 0xffff {
-				s11ru = 0xffff
-			}
-			if s11gu < 0 {
-				s11gu = 0
-			} else if s11gu > 0xffff {
-				s11gu = 0xffff
-			}
-			if s11bu < 0 {
-				s11bu = 0
-			} else if s11bu > 0xffff {
-				s11bu = 0xffff
-			}
-
-			s11r := float64(s11ru)
-			s11g := float64(s11gu)
-			s11b := float64(s11bu)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			dst.Pix[d+0] = uint8(uint32(s11r) >> 8)
-			dst.Pix[d+1] = uint8(uint32(s11g) >> 8)
-			dst.Pix[d+2] = uint8(uint32(s11b) >> 8)
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
-func (ablInterpolator) scale_RGBA_YCbCr422_Over(dst *image.RGBA, dr, adr image.Rectangle, src *image.YCbCr, sr image.Rectangle) {
-	sw := int32(sr.Dx())
-	sh := int32(sr.Dy())
-	yscale := float64(sh) / float64(dr.Dy())
-	xscale := float64(sw) / float64(dr.Dx())
-	swMinus1, shMinus1 := sw-1, sh-1
-
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (float64(dy)+0.5)*yscale - 0.5
-		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
-		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
-		// sx, below.
-		sy0 := int32(sy)
-		yFrac0 := sy - float64(sy0)
-		yFrac1 := 1 - yFrac0
-		sy1 := sy0 + 1
-		if sy < 0 {
-			sy0, sy1 = 0, 0
-			yFrac0, yFrac1 = 0, 1
-		} else if sy1 > shMinus1 {
-			sy0, sy1 = shMinus1, shMinus1
-			yFrac0, yFrac1 = 1, 0
-		}
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (float64(dx)+0.5)*xscale - 0.5
-			sx0 := int32(sx)
-			xFrac0 := sx - float64(sx0)
-			xFrac1 := 1 - xFrac0
-			sx1 := sx0 + 1
-			if sx < 0 {
-				sx0, sx1 = 0, 0
-				xFrac0, xFrac1 = 0, 1
-			} else if sx1 > swMinus1 {
-				sx0, sx1 = swMinus1, swMinus1
-				xFrac0, xFrac1 = 1, 0
-			}
-
-			s00i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
-			s00j := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.CStride + ((sr.Min.X+int(sx0))/2 - src.Rect.Min.X/2)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s00yy1 := int(src.Y[s00i])<<16 + 1<<15
-			s00cb1 := int(src.Cb[s00j]) - 128
-			s00cr1 := int(src.Cr[s00j]) - 128
-			s00ru := (s00yy1 + 91881*s00cr1) >> 8
-			s00gu := (s00yy1 - 22554*s00cb1 - 46802*s00cr1) >> 8
-			s00bu := (s00yy1 + 116130*s00cb1) >> 8
-			if s00ru < 0 {
-				s00ru = 0
-			} else if s00ru > 0xffff {
-				s00ru = 0xffff
-			}
-			if s00gu < 0 {
-				s00gu = 0
-			} else if s00gu > 0xffff {
-				s00gu = 0xffff
-			}
-			if s00bu < 0 {
-				s00bu = 0
-			} else if s00bu > 0xffff {
-				s00bu = 0xffff
-			}
-
-			s00r := float64(s00ru)
-			s00g := float64(s00gu)
-			s00b := float64(s00bu)
-			s10i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
-			s10j := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.CStride + ((sr.Min.X+int(sx1))/2 - src.Rect.Min.X/2)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s10yy1 := int(src.Y[s10i])<<16 + 1<<15
-			s10cb1 := int(src.Cb[s10j]) - 128
-			s10cr1 := int(src.Cr[s10j]) - 128
-			s10ru := (s10yy1 + 91881*s10cr1) >> 8
-			s10gu := (s10yy1 - 22554*s10cb1 - 46802*s10cr1) >> 8
-			s10bu := (s10yy1 + 116130*s10cb1) >> 8
-			if s10ru < 0 {
-				s10ru = 0
-			} else if s10ru > 0xffff {
-				s10ru = 0xffff
-			}
-			if s10gu < 0 {
-				s10gu = 0
-			} else if s10gu > 0xffff {
-				s10gu = 0xffff
-			}
-			if s10bu < 0 {
-				s10bu = 0
-			} else if s10bu > 0xffff {
-				s10bu = 0xffff
-			}
-
-			s10r := float64(s10ru)
-			s10g := float64(s10gu)
-			s10b := float64(s10bu)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
-			s01j := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.CStride + ((sr.Min.X+int(sx0))/2 - src.Rect.Min.X/2)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s01yy1 := int(src.Y[s01i])<<16 + 1<<15
-			s01cb1 := int(src.Cb[s01j]) - 128
-			s01cr1 := int(src.Cr[s01j]) - 128
-			s01ru := (s01yy1 + 91881*s01cr1) >> 8
-			s01gu := (s01yy1 - 22554*s01cb1 - 46802*s01cr1) >> 8
-			s01bu := (s01yy1 + 116130*s01cb1) >> 8
-			if s01ru < 0 {
-				s01ru = 0
-			} else if s01ru > 0xffff {
-				s01ru = 0xffff
-			}
-			if s01gu < 0 {
-				s01gu = 0
-			} else if s01gu > 0xffff {
-				s01gu = 0xffff
-			}
-			if s01bu < 0 {
-				s01bu = 0
-			} else if s01bu > 0xffff {
-				s01bu = 0xffff
-			}
-
-			s01r := float64(s01ru)
-			s01g := float64(s01gu)
-			s01b := float64(s01bu)
-			s11i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
-			s11j := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.CStride + ((sr.Min.X+int(sx1))/2 - src.Rect.Min.X/2)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s11yy1 := int(src.Y[s11i])<<16 + 1<<15
-			s11cb1 := int(src.Cb[s11j]) - 128
-			s11cr1 := int(src.Cr[s11j]) - 128
-			s11ru := (s11yy1 + 91881*s11cr1) >> 8
-			s11gu := (s11yy1 - 22554*s11cb1 - 46802*s11cr1) >> 8
-			s11bu := (s11yy1 + 116130*s11cb1) >> 8
-			if s11ru < 0 {
-				s11ru = 0
-			} else if s11ru > 0xffff {
-				s11ru = 0xffff
-			}
-			if s11gu < 0 {
-				s11gu = 0
-			} else if s11gu > 0xffff {
-				s11gu = 0xffff
-			}
-			if s11bu < 0 {
-				s11bu = 0
-			} else if s11bu > 0xffff {
-				s11bu = 0xffff
-			}
-
-			s11r := float64(s11ru)
-			s11g := float64(s11gu)
-			s11b := float64(s11bu)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			dst.Pix[d+0] = uint8(uint32(s11r) >> 8)
-			dst.Pix[d+1] = uint8(uint32(s11g) >> 8)
-			dst.Pix[d+2] = uint8(uint32(s11b) >> 8)
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
-func (ablInterpolator) scale_RGBA_YCbCr420_Over(dst *image.RGBA, dr, adr image.Rectangle, src *image.YCbCr, sr image.Rectangle) {
-	sw := int32(sr.Dx())
-	sh := int32(sr.Dy())
-	yscale := float64(sh) / float64(dr.Dy())
-	xscale := float64(sw) / float64(dr.Dx())
-	swMinus1, shMinus1 := sw-1, sh-1
-
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (float64(dy)+0.5)*yscale - 0.5
-		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
-		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
-		// sx, below.
-		sy0 := int32(sy)
-		yFrac0 := sy - float64(sy0)
-		yFrac1 := 1 - yFrac0
-		sy1 := sy0 + 1
-		if sy < 0 {
-			sy0, sy1 = 0, 0
-			yFrac0, yFrac1 = 0, 1
-		} else if sy1 > shMinus1 {
-			sy0, sy1 = shMinus1, shMinus1
-			yFrac0, yFrac1 = 1, 0
-		}
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (float64(dx)+0.5)*xscale - 0.5
-			sx0 := int32(sx)
-			xFrac0 := sx - float64(sx0)
-			xFrac1 := 1 - xFrac0
-			sx1 := sx0 + 1
-			if sx < 0 {
-				sx0, sx1 = 0, 0
-				xFrac0, xFrac1 = 0, 1
-			} else if sx1 > swMinus1 {
-				sx0, sx1 = swMinus1, swMinus1
-				xFrac0, xFrac1 = 1, 0
-			}
-
-			s00i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
-			s00j := ((sr.Min.Y+int(sy0))/2-src.Rect.Min.Y/2)*src.CStride + ((sr.Min.X+int(sx0))/2 - src.Rect.Min.X/2)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s00yy1 := int(src.Y[s00i])<<16 + 1<<15
-			s00cb1 := int(src.Cb[s00j]) - 128
-			s00cr1 := int(src.Cr[s00j]) - 128
-			s00ru := (s00yy1 + 91881*s00cr1) >> 8
-			s00gu := (s00yy1 - 22554*s00cb1 - 46802*s00cr1) >> 8
-			s00bu := (s00yy1 + 116130*s00cb1) >> 8
-			if s00ru < 0 {
-				s00ru = 0
-			} else if s00ru > 0xffff {
-				s00ru = 0xffff
-			}
-			if s00gu < 0 {
-				s00gu = 0
-			} else if s00gu > 0xffff {
-				s00gu = 0xffff
-			}
-			if s00bu < 0 {
-				s00bu = 0
-			} else if s00bu > 0xffff {
-				s00bu = 0xffff
-			}
-
-			s00r := float64(s00ru)
-			s00g := float64(s00gu)
-			s00b := float64(s00bu)
-			s10i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
-			s10j := ((sr.Min.Y+int(sy0))/2-src.Rect.Min.Y/2)*src.CStride + ((sr.Min.X+int(sx1))/2 - src.Rect.Min.X/2)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s10yy1 := int(src.Y[s10i])<<16 + 1<<15
-			s10cb1 := int(src.Cb[s10j]) - 128
-			s10cr1 := int(src.Cr[s10j]) - 128
-			s10ru := (s10yy1 + 91881*s10cr1) >> 8
-			s10gu := (s10yy1 - 22554*s10cb1 - 46802*s10cr1) >> 8
-			s10bu := (s10yy1 + 116130*s10cb1) >> 8
-			if s10ru < 0 {
-				s10ru = 0
-			} else if s10ru > 0xffff {
-				s10ru = 0xffff
-			}
-			if s10gu < 0 {
-				s10gu = 0
-			} else if s10gu > 0xffff {
-				s10gu = 0xffff
-			}
-			if s10bu < 0 {
-				s10bu = 0
-			} else if s10bu > 0xffff {
-				s10bu = 0xffff
-			}
-
-			s10r := float64(s10ru)
-			s10g := float64(s10gu)
-			s10b := float64(s10bu)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
-			s01j := ((sr.Min.Y+int(sy1))/2-src.Rect.Min.Y/2)*src.CStride + ((sr.Min.X+int(sx0))/2 - src.Rect.Min.X/2)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s01yy1 := int(src.Y[s01i])<<16 + 1<<15
-			s01cb1 := int(src.Cb[s01j]) - 128
-			s01cr1 := int(src.Cr[s01j]) - 128
-			s01ru := (s01yy1 + 91881*s01cr1) >> 8
-			s01gu := (s01yy1 - 22554*s01cb1 - 46802*s01cr1) >> 8
-			s01bu := (s01yy1 + 116130*s01cb1) >> 8
-			if s01ru < 0 {
-				s01ru = 0
-			} else if s01ru > 0xffff {
-				s01ru = 0xffff
-			}
-			if s01gu < 0 {
-				s01gu = 0
-			} else if s01gu > 0xffff {
-				s01gu = 0xffff
-			}
-			if s01bu < 0 {
-				s01bu = 0
-			} else if s01bu > 0xffff {
-				s01bu = 0xffff
-			}
-
-			s01r := float64(s01ru)
-			s01g := float64(s01gu)
-			s01b := float64(s01bu)
-			s11i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
-			s11j := ((sr.Min.Y+int(sy1))/2-src.Rect.Min.Y/2)*src.CStride + ((sr.Min.X+int(sx1))/2 - src.Rect.Min.X/2)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s11yy1 := int(src.Y[s11i])<<16 + 1<<15
-			s11cb1 := int(src.Cb[s11j]) - 128
-			s11cr1 := int(src.Cr[s11j]) - 128
-			s11ru := (s11yy1 + 91881*s11cr1) >> 8
-			s11gu := (s11yy1 - 22554*s11cb1 - 46802*s11cr1) >> 8
-			s11bu := (s11yy1 + 116130*s11cb1) >> 8
-			if s11ru < 0 {
-				s11ru = 0
-			} else if s11ru > 0xffff {
-				s11ru = 0xffff
-			}
-			if s11gu < 0 {
-				s11gu = 0
-			} else if s11gu > 0xffff {
-				s11gu = 0xffff
-			}
-			if s11bu < 0 {
-				s11bu = 0
-			} else if s11bu > 0xffff {
-				s11bu = 0xffff
-			}
-
-			s11r := float64(s11ru)
-			s11g := float64(s11gu)
-			s11b := float64(s11bu)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			dst.Pix[d+0] = uint8(uint32(s11r) >> 8)
-			dst.Pix[d+1] = uint8(uint32(s11g) >> 8)
-			dst.Pix[d+2] = uint8(uint32(s11b) >> 8)
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
-func (ablInterpolator) scale_RGBA_YCbCr440_Over(dst *image.RGBA, dr, adr image.Rectangle, src *image.YCbCr, sr image.Rectangle) {
-	sw := int32(sr.Dx())
-	sh := int32(sr.Dy())
-	yscale := float64(sh) / float64(dr.Dy())
-	xscale := float64(sw) / float64(dr.Dx())
-	swMinus1, shMinus1 := sw-1, sh-1
-
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		sy := (float64(dy)+0.5)*yscale - 0.5
-		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
-		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
-		// sx, below.
-		sy0 := int32(sy)
-		yFrac0 := sy - float64(sy0)
-		yFrac1 := 1 - yFrac0
-		sy1 := sy0 + 1
-		if sy < 0 {
-			sy0, sy1 = 0, 0
-			yFrac0, yFrac1 = 0, 1
-		} else if sy1 > shMinus1 {
-			sy0, sy1 = shMinus1, shMinus1
-			yFrac0, yFrac1 = 1, 0
-		}
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			sx := (float64(dx)+0.5)*xscale - 0.5
-			sx0 := int32(sx)
-			xFrac0 := sx - float64(sx0)
-			xFrac1 := 1 - xFrac0
-			sx1 := sx0 + 1
-			if sx < 0 {
-				sx0, sx1 = 0, 0
-				xFrac0, xFrac1 = 0, 1
-			} else if sx1 > swMinus1 {
-				sx0, sx1 = swMinus1, swMinus1
-				xFrac0, xFrac1 = 1, 0
-			}
-
-			s00i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
-			s00j := ((sr.Min.Y+int(sy0))/2-src.Rect.Min.Y/2)*src.CStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s00yy1 := int(src.Y[s00i])<<16 + 1<<15
-			s00cb1 := int(src.Cb[s00j]) - 128
-			s00cr1 := int(src.Cr[s00j]) - 128
-			s00ru := (s00yy1 + 91881*s00cr1) >> 8
-			s00gu := (s00yy1 - 22554*s00cb1 - 46802*s00cr1) >> 8
-			s00bu := (s00yy1 + 116130*s00cb1) >> 8
-			if s00ru < 0 {
-				s00ru = 0
-			} else if s00ru > 0xffff {
-				s00ru = 0xffff
-			}
-			if s00gu < 0 {
-				s00gu = 0
-			} else if s00gu > 0xffff {
-				s00gu = 0xffff
-			}
-			if s00bu < 0 {
-				s00bu = 0
-			} else if s00bu > 0xffff {
-				s00bu = 0xffff
-			}
-
-			s00r := float64(s00ru)
-			s00g := float64(s00gu)
-			s00b := float64(s00bu)
-			s10i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
-			s10j := ((sr.Min.Y+int(sy0))/2-src.Rect.Min.Y/2)*src.CStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s10yy1 := int(src.Y[s10i])<<16 + 1<<15
-			s10cb1 := int(src.Cb[s10j]) - 128
-			s10cr1 := int(src.Cr[s10j]) - 128
-			s10ru := (s10yy1 + 91881*s10cr1) >> 8
-			s10gu := (s10yy1 - 22554*s10cb1 - 46802*s10cr1) >> 8
-			s10bu := (s10yy1 + 116130*s10cb1) >> 8
-			if s10ru < 0 {
-				s10ru = 0
-			} else if s10ru > 0xffff {
-				s10ru = 0xffff
-			}
-			if s10gu < 0 {
-				s10gu = 0
-			} else if s10gu > 0xffff {
-				s10gu = 0xffff
-			}
-			if s10bu < 0 {
-				s10bu = 0
-			} else if s10bu > 0xffff {
-				s10bu = 0xffff
-			}
-
-			s10r := float64(s10ru)
-			s10g := float64(s10gu)
-			s10b := float64(s10bu)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
-			s01j := ((sr.Min.Y+int(sy1))/2-src.Rect.Min.Y/2)*src.CStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s01yy1 := int(src.Y[s01i])<<16 + 1<<15
-			s01cb1 := int(src.Cb[s01j]) - 128
-			s01cr1 := int(src.Cr[s01j]) - 128
-			s01ru := (s01yy1 + 91881*s01cr1) >> 8
-			s01gu := (s01yy1 - 22554*s01cb1 - 46802*s01cr1) >> 8
-			s01bu := (s01yy1 + 116130*s01cb1) >> 8
-			if s01ru < 0 {
-				s01ru = 0
-			} else if s01ru > 0xffff {
-				s01ru = 0xffff
-			}
-			if s01gu < 0 {
-				s01gu = 0
-			} else if s01gu > 0xffff {
-				s01gu = 0xffff
-			}
-			if s01bu < 0 {
-				s01bu = 0
-			} else if s01bu > 0xffff {
-				s01bu = 0xffff
-			}
-
-			s01r := float64(s01ru)
-			s01g := float64(s01gu)
-			s01b := float64(s01bu)
-			s11i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
-			s11j := ((sr.Min.Y+int(sy1))/2-src.Rect.Min.Y/2)*src.CStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s11yy1 := int(src.Y[s11i])<<16 + 1<<15
-			s11cb1 := int(src.Cb[s11j]) - 128
-			s11cr1 := int(src.Cr[s11j]) - 128
-			s11ru := (s11yy1 + 91881*s11cr1) >> 8
-			s11gu := (s11yy1 - 22554*s11cb1 - 46802*s11cr1) >> 8
-			s11bu := (s11yy1 + 116130*s11cb1) >> 8
-			if s11ru < 0 {
-				s11ru = 0
-			} else if s11ru > 0xffff {
-				s11ru = 0xffff
-			}
-			if s11gu < 0 {
-				s11gu = 0
-			} else if s11gu > 0xffff {
-				s11gu = 0xffff
-			}
-			if s11bu < 0 {
-				s11bu = 0
-			} else if s11bu > 0xffff {
-				s11bu = 0xffff
-			}
-
-			s11r := float64(s11ru)
-			s11g := float64(s11gu)
-			s11b := float64(s11bu)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			dst.Pix[d+0] = uint8(uint32(s11r) >> 8)
-			dst.Pix[d+1] = uint8(uint32(s11g) >> 8)
-			dst.Pix[d+2] = uint8(uint32(s11b) >> 8)
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
 func (ablInterpolator) scale_RGBA_YCbCr444_Src(dst *image.RGBA, dr, adr image.Rectangle, src *image.YCbCr, sr image.Rectangle) {
 	sw := int32(sr.Dx())
 	sh := int32(sr.Dy())
@@ -3730,70 +2544,6 @@
 	}
 }
 
-func (ablInterpolator) transform_RGBA_Gray_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Gray, sr image.Rectangle, bias image.Point) {
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		dyf := float64(dr.Min.Y+int(dy)) + 0.5
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
-				continue
-			}
-
-			sx -= 0.5
-			sx0 := int(sx)
-			xFrac0 := sx - float64(sx0)
-			xFrac1 := 1 - xFrac0
-			sx0 += bias.X
-			sx1 := sx0 + 1
-			if sx0 < sr.Min.X {
-				sx0, sx1 = sr.Min.X, sr.Min.X
-				xFrac0, xFrac1 = 0, 1
-			} else if sx1 >= sr.Max.X {
-				sx0, sx1 = sr.Max.X-1, sr.Max.X-1
-				xFrac0, xFrac1 = 1, 0
-			}
-
-			sy -= 0.5
-			sy0 := int(sy)
-			yFrac0 := sy - float64(sy0)
-			yFrac1 := 1 - yFrac0
-			sy0 += bias.Y
-			sy1 := sy0 + 1
-			if sy0 < sr.Min.Y {
-				sy0, sy1 = sr.Min.Y, sr.Min.Y
-				yFrac0, yFrac1 = 0, 1
-			} else if sy1 >= sr.Max.Y {
-				sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
-				yFrac0, yFrac1 = 1, 0
-			}
-
-			s00i := (sy0-src.Rect.Min.Y)*src.Stride + (sx0 - src.Rect.Min.X)
-			s00ru := uint32(src.Pix[s00i]) * 0x101
-			s00r := float64(s00ru)
-			s10i := (sy0-src.Rect.Min.Y)*src.Stride + (sx1 - src.Rect.Min.X)
-			s10ru := uint32(src.Pix[s10i]) * 0x101
-			s10r := float64(s10ru)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s01i := (sy1-src.Rect.Min.Y)*src.Stride + (sx0 - src.Rect.Min.X)
-			s01ru := uint32(src.Pix[s01i]) * 0x101
-			s01r := float64(s01ru)
-			s11i := (sy1-src.Rect.Min.Y)*src.Stride + (sx1 - src.Rect.Min.X)
-			s11ru := uint32(src.Pix[s11i]) * 0x101
-			s11r := float64(s11ru)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11r = yFrac1*s10r + yFrac0*s11r
-			out := uint8(uint32(s11r) >> 8)
-			dst.Pix[d+0] = out
-			dst.Pix[d+1] = out
-			dst.Pix[d+2] = out
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
 func (ablInterpolator) transform_RGBA_Gray_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Gray, sr image.Rectangle, bias image.Point) {
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
@@ -4242,698 +2992,6 @@
 	}
 }
 
-func (ablInterpolator) transform_RGBA_YCbCr444_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point) {
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		dyf := float64(dr.Min.Y+int(dy)) + 0.5
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
-				continue
-			}
-
-			sx -= 0.5
-			sx0 := int(sx)
-			xFrac0 := sx - float64(sx0)
-			xFrac1 := 1 - xFrac0
-			sx0 += bias.X
-			sx1 := sx0 + 1
-			if sx0 < sr.Min.X {
-				sx0, sx1 = sr.Min.X, sr.Min.X
-				xFrac0, xFrac1 = 0, 1
-			} else if sx1 >= sr.Max.X {
-				sx0, sx1 = sr.Max.X-1, sr.Max.X-1
-				xFrac0, xFrac1 = 1, 0
-			}
-
-			sy -= 0.5
-			sy0 := int(sy)
-			yFrac0 := sy - float64(sy0)
-			yFrac1 := 1 - yFrac0
-			sy0 += bias.Y
-			sy1 := sy0 + 1
-			if sy0 < sr.Min.Y {
-				sy0, sy1 = sr.Min.Y, sr.Min.Y
-				yFrac0, yFrac1 = 0, 1
-			} else if sy1 >= sr.Max.Y {
-				sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
-				yFrac0, yFrac1 = 1, 0
-			}
-
-			s00i := (sy0-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
-			s00j := (sy0-src.Rect.Min.Y)*src.CStride + (sx0 - src.Rect.Min.X)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s00yy1 := int(src.Y[s00i])<<16 + 1<<15
-			s00cb1 := int(src.Cb[s00j]) - 128
-			s00cr1 := int(src.Cr[s00j]) - 128
-			s00ru := (s00yy1 + 91881*s00cr1) >> 8
-			s00gu := (s00yy1 - 22554*s00cb1 - 46802*s00cr1) >> 8
-			s00bu := (s00yy1 + 116130*s00cb1) >> 8
-			if s00ru < 0 {
-				s00ru = 0
-			} else if s00ru > 0xffff {
-				s00ru = 0xffff
-			}
-			if s00gu < 0 {
-				s00gu = 0
-			} else if s00gu > 0xffff {
-				s00gu = 0xffff
-			}
-			if s00bu < 0 {
-				s00bu = 0
-			} else if s00bu > 0xffff {
-				s00bu = 0xffff
-			}
-
-			s00r := float64(s00ru)
-			s00g := float64(s00gu)
-			s00b := float64(s00bu)
-			s10i := (sy0-src.Rect.Min.Y)*src.YStride + (sx1 - src.Rect.Min.X)
-			s10j := (sy0-src.Rect.Min.Y)*src.CStride + (sx1 - src.Rect.Min.X)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s10yy1 := int(src.Y[s10i])<<16 + 1<<15
-			s10cb1 := int(src.Cb[s10j]) - 128
-			s10cr1 := int(src.Cr[s10j]) - 128
-			s10ru := (s10yy1 + 91881*s10cr1) >> 8
-			s10gu := (s10yy1 - 22554*s10cb1 - 46802*s10cr1) >> 8
-			s10bu := (s10yy1 + 116130*s10cb1) >> 8
-			if s10ru < 0 {
-				s10ru = 0
-			} else if s10ru > 0xffff {
-				s10ru = 0xffff
-			}
-			if s10gu < 0 {
-				s10gu = 0
-			} else if s10gu > 0xffff {
-				s10gu = 0xffff
-			}
-			if s10bu < 0 {
-				s10bu = 0
-			} else if s10bu > 0xffff {
-				s10bu = 0xffff
-			}
-
-			s10r := float64(s10ru)
-			s10g := float64(s10gu)
-			s10b := float64(s10bu)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s01i := (sy1-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
-			s01j := (sy1-src.Rect.Min.Y)*src.CStride + (sx0 - src.Rect.Min.X)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s01yy1 := int(src.Y[s01i])<<16 + 1<<15
-			s01cb1 := int(src.Cb[s01j]) - 128
-			s01cr1 := int(src.Cr[s01j]) - 128
-			s01ru := (s01yy1 + 91881*s01cr1) >> 8
-			s01gu := (s01yy1 - 22554*s01cb1 - 46802*s01cr1) >> 8
-			s01bu := (s01yy1 + 116130*s01cb1) >> 8
-			if s01ru < 0 {
-				s01ru = 0
-			} else if s01ru > 0xffff {
-				s01ru = 0xffff
-			}
-			if s01gu < 0 {
-				s01gu = 0
-			} else if s01gu > 0xffff {
-				s01gu = 0xffff
-			}
-			if s01bu < 0 {
-				s01bu = 0
-			} else if s01bu > 0xffff {
-				s01bu = 0xffff
-			}
-
-			s01r := float64(s01ru)
-			s01g := float64(s01gu)
-			s01b := float64(s01bu)
-			s11i := (sy1-src.Rect.Min.Y)*src.YStride + (sx1 - src.Rect.Min.X)
-			s11j := (sy1-src.Rect.Min.Y)*src.CStride + (sx1 - src.Rect.Min.X)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s11yy1 := int(src.Y[s11i])<<16 + 1<<15
-			s11cb1 := int(src.Cb[s11j]) - 128
-			s11cr1 := int(src.Cr[s11j]) - 128
-			s11ru := (s11yy1 + 91881*s11cr1) >> 8
-			s11gu := (s11yy1 - 22554*s11cb1 - 46802*s11cr1) >> 8
-			s11bu := (s11yy1 + 116130*s11cb1) >> 8
-			if s11ru < 0 {
-				s11ru = 0
-			} else if s11ru > 0xffff {
-				s11ru = 0xffff
-			}
-			if s11gu < 0 {
-				s11gu = 0
-			} else if s11gu > 0xffff {
-				s11gu = 0xffff
-			}
-			if s11bu < 0 {
-				s11bu = 0
-			} else if s11bu > 0xffff {
-				s11bu = 0xffff
-			}
-
-			s11r := float64(s11ru)
-			s11g := float64(s11gu)
-			s11b := float64(s11bu)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			dst.Pix[d+0] = uint8(uint32(s11r) >> 8)
-			dst.Pix[d+1] = uint8(uint32(s11g) >> 8)
-			dst.Pix[d+2] = uint8(uint32(s11b) >> 8)
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
-func (ablInterpolator) transform_RGBA_YCbCr422_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point) {
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		dyf := float64(dr.Min.Y+int(dy)) + 0.5
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
-				continue
-			}
-
-			sx -= 0.5
-			sx0 := int(sx)
-			xFrac0 := sx - float64(sx0)
-			xFrac1 := 1 - xFrac0
-			sx0 += bias.X
-			sx1 := sx0 + 1
-			if sx0 < sr.Min.X {
-				sx0, sx1 = sr.Min.X, sr.Min.X
-				xFrac0, xFrac1 = 0, 1
-			} else if sx1 >= sr.Max.X {
-				sx0, sx1 = sr.Max.X-1, sr.Max.X-1
-				xFrac0, xFrac1 = 1, 0
-			}
-
-			sy -= 0.5
-			sy0 := int(sy)
-			yFrac0 := sy - float64(sy0)
-			yFrac1 := 1 - yFrac0
-			sy0 += bias.Y
-			sy1 := sy0 + 1
-			if sy0 < sr.Min.Y {
-				sy0, sy1 = sr.Min.Y, sr.Min.Y
-				yFrac0, yFrac1 = 0, 1
-			} else if sy1 >= sr.Max.Y {
-				sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
-				yFrac0, yFrac1 = 1, 0
-			}
-
-			s00i := (sy0-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
-			s00j := (sy0-src.Rect.Min.Y)*src.CStride + ((sx0)/2 - src.Rect.Min.X/2)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s00yy1 := int(src.Y[s00i])<<16 + 1<<15
-			s00cb1 := int(src.Cb[s00j]) - 128
-			s00cr1 := int(src.Cr[s00j]) - 128
-			s00ru := (s00yy1 + 91881*s00cr1) >> 8
-			s00gu := (s00yy1 - 22554*s00cb1 - 46802*s00cr1) >> 8
-			s00bu := (s00yy1 + 116130*s00cb1) >> 8
-			if s00ru < 0 {
-				s00ru = 0
-			} else if s00ru > 0xffff {
-				s00ru = 0xffff
-			}
-			if s00gu < 0 {
-				s00gu = 0
-			} else if s00gu > 0xffff {
-				s00gu = 0xffff
-			}
-			if s00bu < 0 {
-				s00bu = 0
-			} else if s00bu > 0xffff {
-				s00bu = 0xffff
-			}
-
-			s00r := float64(s00ru)
-			s00g := float64(s00gu)
-			s00b := float64(s00bu)
-			s10i := (sy0-src.Rect.Min.Y)*src.YStride + (sx1 - src.Rect.Min.X)
-			s10j := (sy0-src.Rect.Min.Y)*src.CStride + ((sx1)/2 - src.Rect.Min.X/2)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s10yy1 := int(src.Y[s10i])<<16 + 1<<15
-			s10cb1 := int(src.Cb[s10j]) - 128
-			s10cr1 := int(src.Cr[s10j]) - 128
-			s10ru := (s10yy1 + 91881*s10cr1) >> 8
-			s10gu := (s10yy1 - 22554*s10cb1 - 46802*s10cr1) >> 8
-			s10bu := (s10yy1 + 116130*s10cb1) >> 8
-			if s10ru < 0 {
-				s10ru = 0
-			} else if s10ru > 0xffff {
-				s10ru = 0xffff
-			}
-			if s10gu < 0 {
-				s10gu = 0
-			} else if s10gu > 0xffff {
-				s10gu = 0xffff
-			}
-			if s10bu < 0 {
-				s10bu = 0
-			} else if s10bu > 0xffff {
-				s10bu = 0xffff
-			}
-
-			s10r := float64(s10ru)
-			s10g := float64(s10gu)
-			s10b := float64(s10bu)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s01i := (sy1-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
-			s01j := (sy1-src.Rect.Min.Y)*src.CStride + ((sx0)/2 - src.Rect.Min.X/2)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s01yy1 := int(src.Y[s01i])<<16 + 1<<15
-			s01cb1 := int(src.Cb[s01j]) - 128
-			s01cr1 := int(src.Cr[s01j]) - 128
-			s01ru := (s01yy1 + 91881*s01cr1) >> 8
-			s01gu := (s01yy1 - 22554*s01cb1 - 46802*s01cr1) >> 8
-			s01bu := (s01yy1 + 116130*s01cb1) >> 8
-			if s01ru < 0 {
-				s01ru = 0
-			} else if s01ru > 0xffff {
-				s01ru = 0xffff
-			}
-			if s01gu < 0 {
-				s01gu = 0
-			} else if s01gu > 0xffff {
-				s01gu = 0xffff
-			}
-			if s01bu < 0 {
-				s01bu = 0
-			} else if s01bu > 0xffff {
-				s01bu = 0xffff
-			}
-
-			s01r := float64(s01ru)
-			s01g := float64(s01gu)
-			s01b := float64(s01bu)
-			s11i := (sy1-src.Rect.Min.Y)*src.YStride + (sx1 - src.Rect.Min.X)
-			s11j := (sy1-src.Rect.Min.Y)*src.CStride + ((sx1)/2 - src.Rect.Min.X/2)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s11yy1 := int(src.Y[s11i])<<16 + 1<<15
-			s11cb1 := int(src.Cb[s11j]) - 128
-			s11cr1 := int(src.Cr[s11j]) - 128
-			s11ru := (s11yy1 + 91881*s11cr1) >> 8
-			s11gu := (s11yy1 - 22554*s11cb1 - 46802*s11cr1) >> 8
-			s11bu := (s11yy1 + 116130*s11cb1) >> 8
-			if s11ru < 0 {
-				s11ru = 0
-			} else if s11ru > 0xffff {
-				s11ru = 0xffff
-			}
-			if s11gu < 0 {
-				s11gu = 0
-			} else if s11gu > 0xffff {
-				s11gu = 0xffff
-			}
-			if s11bu < 0 {
-				s11bu = 0
-			} else if s11bu > 0xffff {
-				s11bu = 0xffff
-			}
-
-			s11r := float64(s11ru)
-			s11g := float64(s11gu)
-			s11b := float64(s11bu)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			dst.Pix[d+0] = uint8(uint32(s11r) >> 8)
-			dst.Pix[d+1] = uint8(uint32(s11g) >> 8)
-			dst.Pix[d+2] = uint8(uint32(s11b) >> 8)
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
-func (ablInterpolator) transform_RGBA_YCbCr420_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point) {
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		dyf := float64(dr.Min.Y+int(dy)) + 0.5
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
-				continue
-			}
-
-			sx -= 0.5
-			sx0 := int(sx)
-			xFrac0 := sx - float64(sx0)
-			xFrac1 := 1 - xFrac0
-			sx0 += bias.X
-			sx1 := sx0 + 1
-			if sx0 < sr.Min.X {
-				sx0, sx1 = sr.Min.X, sr.Min.X
-				xFrac0, xFrac1 = 0, 1
-			} else if sx1 >= sr.Max.X {
-				sx0, sx1 = sr.Max.X-1, sr.Max.X-1
-				xFrac0, xFrac1 = 1, 0
-			}
-
-			sy -= 0.5
-			sy0 := int(sy)
-			yFrac0 := sy - float64(sy0)
-			yFrac1 := 1 - yFrac0
-			sy0 += bias.Y
-			sy1 := sy0 + 1
-			if sy0 < sr.Min.Y {
-				sy0, sy1 = sr.Min.Y, sr.Min.Y
-				yFrac0, yFrac1 = 0, 1
-			} else if sy1 >= sr.Max.Y {
-				sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
-				yFrac0, yFrac1 = 1, 0
-			}
-
-			s00i := (sy0-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
-			s00j := ((sy0)/2-src.Rect.Min.Y/2)*src.CStride + ((sx0)/2 - src.Rect.Min.X/2)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s00yy1 := int(src.Y[s00i])<<16 + 1<<15
-			s00cb1 := int(src.Cb[s00j]) - 128
-			s00cr1 := int(src.Cr[s00j]) - 128
-			s00ru := (s00yy1 + 91881*s00cr1) >> 8
-			s00gu := (s00yy1 - 22554*s00cb1 - 46802*s00cr1) >> 8
-			s00bu := (s00yy1 + 116130*s00cb1) >> 8
-			if s00ru < 0 {
-				s00ru = 0
-			} else if s00ru > 0xffff {
-				s00ru = 0xffff
-			}
-			if s00gu < 0 {
-				s00gu = 0
-			} else if s00gu > 0xffff {
-				s00gu = 0xffff
-			}
-			if s00bu < 0 {
-				s00bu = 0
-			} else if s00bu > 0xffff {
-				s00bu = 0xffff
-			}
-
-			s00r := float64(s00ru)
-			s00g := float64(s00gu)
-			s00b := float64(s00bu)
-			s10i := (sy0-src.Rect.Min.Y)*src.YStride + (sx1 - src.Rect.Min.X)
-			s10j := ((sy0)/2-src.Rect.Min.Y/2)*src.CStride + ((sx1)/2 - src.Rect.Min.X/2)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s10yy1 := int(src.Y[s10i])<<16 + 1<<15
-			s10cb1 := int(src.Cb[s10j]) - 128
-			s10cr1 := int(src.Cr[s10j]) - 128
-			s10ru := (s10yy1 + 91881*s10cr1) >> 8
-			s10gu := (s10yy1 - 22554*s10cb1 - 46802*s10cr1) >> 8
-			s10bu := (s10yy1 + 116130*s10cb1) >> 8
-			if s10ru < 0 {
-				s10ru = 0
-			} else if s10ru > 0xffff {
-				s10ru = 0xffff
-			}
-			if s10gu < 0 {
-				s10gu = 0
-			} else if s10gu > 0xffff {
-				s10gu = 0xffff
-			}
-			if s10bu < 0 {
-				s10bu = 0
-			} else if s10bu > 0xffff {
-				s10bu = 0xffff
-			}
-
-			s10r := float64(s10ru)
-			s10g := float64(s10gu)
-			s10b := float64(s10bu)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s01i := (sy1-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
-			s01j := ((sy1)/2-src.Rect.Min.Y/2)*src.CStride + ((sx0)/2 - src.Rect.Min.X/2)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s01yy1 := int(src.Y[s01i])<<16 + 1<<15
-			s01cb1 := int(src.Cb[s01j]) - 128
-			s01cr1 := int(src.Cr[s01j]) - 128
-			s01ru := (s01yy1 + 91881*s01cr1) >> 8
-			s01gu := (s01yy1 - 22554*s01cb1 - 46802*s01cr1) >> 8
-			s01bu := (s01yy1 + 116130*s01cb1) >> 8
-			if s01ru < 0 {
-				s01ru = 0
-			} else if s01ru > 0xffff {
-				s01ru = 0xffff
-			}
-			if s01gu < 0 {
-				s01gu = 0
-			} else if s01gu > 0xffff {
-				s01gu = 0xffff
-			}
-			if s01bu < 0 {
-				s01bu = 0
-			} else if s01bu > 0xffff {
-				s01bu = 0xffff
-			}
-
-			s01r := float64(s01ru)
-			s01g := float64(s01gu)
-			s01b := float64(s01bu)
-			s11i := (sy1-src.Rect.Min.Y)*src.YStride + (sx1 - src.Rect.Min.X)
-			s11j := ((sy1)/2-src.Rect.Min.Y/2)*src.CStride + ((sx1)/2 - src.Rect.Min.X/2)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s11yy1 := int(src.Y[s11i])<<16 + 1<<15
-			s11cb1 := int(src.Cb[s11j]) - 128
-			s11cr1 := int(src.Cr[s11j]) - 128
-			s11ru := (s11yy1 + 91881*s11cr1) >> 8
-			s11gu := (s11yy1 - 22554*s11cb1 - 46802*s11cr1) >> 8
-			s11bu := (s11yy1 + 116130*s11cb1) >> 8
-			if s11ru < 0 {
-				s11ru = 0
-			} else if s11ru > 0xffff {
-				s11ru = 0xffff
-			}
-			if s11gu < 0 {
-				s11gu = 0
-			} else if s11gu > 0xffff {
-				s11gu = 0xffff
-			}
-			if s11bu < 0 {
-				s11bu = 0
-			} else if s11bu > 0xffff {
-				s11bu = 0xffff
-			}
-
-			s11r := float64(s11ru)
-			s11g := float64(s11gu)
-			s11b := float64(s11bu)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			dst.Pix[d+0] = uint8(uint32(s11r) >> 8)
-			dst.Pix[d+1] = uint8(uint32(s11g) >> 8)
-			dst.Pix[d+2] = uint8(uint32(s11b) >> 8)
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
-func (ablInterpolator) transform_RGBA_YCbCr440_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point) {
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		dyf := float64(dr.Min.Y+int(dy)) + 0.5
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
-				continue
-			}
-
-			sx -= 0.5
-			sx0 := int(sx)
-			xFrac0 := sx - float64(sx0)
-			xFrac1 := 1 - xFrac0
-			sx0 += bias.X
-			sx1 := sx0 + 1
-			if sx0 < sr.Min.X {
-				sx0, sx1 = sr.Min.X, sr.Min.X
-				xFrac0, xFrac1 = 0, 1
-			} else if sx1 >= sr.Max.X {
-				sx0, sx1 = sr.Max.X-1, sr.Max.X-1
-				xFrac0, xFrac1 = 1, 0
-			}
-
-			sy -= 0.5
-			sy0 := int(sy)
-			yFrac0 := sy - float64(sy0)
-			yFrac1 := 1 - yFrac0
-			sy0 += bias.Y
-			sy1 := sy0 + 1
-			if sy0 < sr.Min.Y {
-				sy0, sy1 = sr.Min.Y, sr.Min.Y
-				yFrac0, yFrac1 = 0, 1
-			} else if sy1 >= sr.Max.Y {
-				sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
-				yFrac0, yFrac1 = 1, 0
-			}
-
-			s00i := (sy0-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
-			s00j := ((sy0)/2-src.Rect.Min.Y/2)*src.CStride + (sx0 - src.Rect.Min.X)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s00yy1 := int(src.Y[s00i])<<16 + 1<<15
-			s00cb1 := int(src.Cb[s00j]) - 128
-			s00cr1 := int(src.Cr[s00j]) - 128
-			s00ru := (s00yy1 + 91881*s00cr1) >> 8
-			s00gu := (s00yy1 - 22554*s00cb1 - 46802*s00cr1) >> 8
-			s00bu := (s00yy1 + 116130*s00cb1) >> 8
-			if s00ru < 0 {
-				s00ru = 0
-			} else if s00ru > 0xffff {
-				s00ru = 0xffff
-			}
-			if s00gu < 0 {
-				s00gu = 0
-			} else if s00gu > 0xffff {
-				s00gu = 0xffff
-			}
-			if s00bu < 0 {
-				s00bu = 0
-			} else if s00bu > 0xffff {
-				s00bu = 0xffff
-			}
-
-			s00r := float64(s00ru)
-			s00g := float64(s00gu)
-			s00b := float64(s00bu)
-			s10i := (sy0-src.Rect.Min.Y)*src.YStride + (sx1 - src.Rect.Min.X)
-			s10j := ((sy0)/2-src.Rect.Min.Y/2)*src.CStride + (sx1 - src.Rect.Min.X)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s10yy1 := int(src.Y[s10i])<<16 + 1<<15
-			s10cb1 := int(src.Cb[s10j]) - 128
-			s10cr1 := int(src.Cr[s10j]) - 128
-			s10ru := (s10yy1 + 91881*s10cr1) >> 8
-			s10gu := (s10yy1 - 22554*s10cb1 - 46802*s10cr1) >> 8
-			s10bu := (s10yy1 + 116130*s10cb1) >> 8
-			if s10ru < 0 {
-				s10ru = 0
-			} else if s10ru > 0xffff {
-				s10ru = 0xffff
-			}
-			if s10gu < 0 {
-				s10gu = 0
-			} else if s10gu > 0xffff {
-				s10gu = 0xffff
-			}
-			if s10bu < 0 {
-				s10bu = 0
-			} else if s10bu > 0xffff {
-				s10bu = 0xffff
-			}
-
-			s10r := float64(s10ru)
-			s10g := float64(s10gu)
-			s10b := float64(s10bu)
-			s10r = xFrac1*s00r + xFrac0*s10r
-			s10g = xFrac1*s00g + xFrac0*s10g
-			s10b = xFrac1*s00b + xFrac0*s10b
-			s01i := (sy1-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
-			s01j := ((sy1)/2-src.Rect.Min.Y/2)*src.CStride + (sx0 - src.Rect.Min.X)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s01yy1 := int(src.Y[s01i])<<16 + 1<<15
-			s01cb1 := int(src.Cb[s01j]) - 128
-			s01cr1 := int(src.Cr[s01j]) - 128
-			s01ru := (s01yy1 + 91881*s01cr1) >> 8
-			s01gu := (s01yy1 - 22554*s01cb1 - 46802*s01cr1) >> 8
-			s01bu := (s01yy1 + 116130*s01cb1) >> 8
-			if s01ru < 0 {
-				s01ru = 0
-			} else if s01ru > 0xffff {
-				s01ru = 0xffff
-			}
-			if s01gu < 0 {
-				s01gu = 0
-			} else if s01gu > 0xffff {
-				s01gu = 0xffff
-			}
-			if s01bu < 0 {
-				s01bu = 0
-			} else if s01bu > 0xffff {
-				s01bu = 0xffff
-			}
-
-			s01r := float64(s01ru)
-			s01g := float64(s01gu)
-			s01b := float64(s01bu)
-			s11i := (sy1-src.Rect.Min.Y)*src.YStride + (sx1 - src.Rect.Min.X)
-			s11j := ((sy1)/2-src.Rect.Min.Y/2)*src.CStride + (sx1 - src.Rect.Min.X)
-
-			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-			s11yy1 := int(src.Y[s11i])<<16 + 1<<15
-			s11cb1 := int(src.Cb[s11j]) - 128
-			s11cr1 := int(src.Cr[s11j]) - 128
-			s11ru := (s11yy1 + 91881*s11cr1) >> 8
-			s11gu := (s11yy1 - 22554*s11cb1 - 46802*s11cr1) >> 8
-			s11bu := (s11yy1 + 116130*s11cb1) >> 8
-			if s11ru < 0 {
-				s11ru = 0
-			} else if s11ru > 0xffff {
-				s11ru = 0xffff
-			}
-			if s11gu < 0 {
-				s11gu = 0
-			} else if s11gu > 0xffff {
-				s11gu = 0xffff
-			}
-			if s11bu < 0 {
-				s11bu = 0
-			} else if s11bu > 0xffff {
-				s11bu = 0xffff
-			}
-
-			s11r := float64(s11ru)
-			s11g := float64(s11gu)
-			s11b := float64(s11bu)
-			s11r = xFrac1*s01r + xFrac0*s11r
-			s11g = xFrac1*s01g + xFrac0*s11g
-			s11b = xFrac1*s01b + xFrac0*s11b
-			s11r = yFrac1*s10r + yFrac0*s11r
-			s11g = yFrac1*s10g + yFrac0*s11g
-			s11b = yFrac1*s10b + yFrac0*s11b
-			dst.Pix[d+0] = uint8(uint32(s11r) >> 8)
-			dst.Pix[d+1] = uint8(uint32(s11g) >> 8)
-			dst.Pix[d+2] = uint8(uint32(s11b) >> 8)
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
 func (ablInterpolator) transform_RGBA_YCbCr444_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point) {
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
@@ -5960,9 +4018,13 @@
 	if adr.Empty() || sr.Empty() {
 		return
 	}
+	op := opts.op()
+	if op == Over && opaque(src) { // TODO: also check that opts.SrcMask == nil.
+		op = Src
+	}
 
 	if _, ok := src.(*image.Uniform); ok && sr.In(src.Bounds()) {
-		Draw(dst, dr, src, src.Bounds().Min, opts.op())
+		Draw(dst, dr, src, src.Bounds().Min, op)
 		return
 	}
 
@@ -6009,7 +4071,7 @@
 		}
 	}
 
-	switch opts.op() {
+	switch op {
 	case Over:
 		switch dst := dst.(type) {
 		case *image.RGBA:
@@ -6034,6 +4096,10 @@
 	if adr.Empty() || sr.Empty() {
 		return
 	}
+	op := opts.op()
+	if op == Over && opaque(src) { // TODO: also check that opts.SrcMask == nil.
+		op = Src
+	}
 	d2s := invert(s2d)
 	// bias is a translation of the mapping from dst co-ordinates to
 	// src co-ordinates such that the latter temporarily have
@@ -6051,7 +4117,7 @@
 	adr = adr.Sub(dr.Min)
 
 	if u, ok := src.(*image.Uniform); ok && sr.In(src.Bounds()) {
-		transform_Uniform(dst, dr, adr, &d2s, u, sr, bias, opts.op())
+		transform_Uniform(dst, dr, adr, &d2s, u, sr, bias, op)
 		return
 	}
 
@@ -6068,37 +4134,22 @@
 	// we cannot use the type-specific fast paths, as they access
 	// the Pix fields directly without bounds checking.
 	if !sr.In(src.Bounds()) {
-		switch opts.op() {
+		switch op {
 		case Over:
 			q.transform_Image_Image_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
 		case Src:
 			q.transform_Image_Image_Src(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
 		}
 	} else {
-		switch opts.op() {
+		switch op {
 		case Over:
 			switch dst := dst.(type) {
 			case *image.RGBA:
 				switch src := src.(type) {
-				case *image.Gray:
-					q.transform_RGBA_Gray_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
 				case *image.NRGBA:
 					q.transform_RGBA_NRGBA_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
 				case *image.RGBA:
 					q.transform_RGBA_RGBA_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
-				case *image.YCbCr:
-					switch src.SubsampleRatio {
-					default:
-						q.transform_RGBA_Image_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
-					case image.YCbCrSubsampleRatio444:
-						q.transform_RGBA_YCbCr444_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
-					case image.YCbCrSubsampleRatio422:
-						q.transform_RGBA_YCbCr422_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
-					case image.YCbCrSubsampleRatio420:
-						q.transform_RGBA_YCbCr420_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
-					case image.YCbCrSubsampleRatio440:
-						q.transform_RGBA_YCbCr440_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
-					}
 				default:
 					q.transform_RGBA_Image_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
 				}
@@ -6517,105 +4568,6 @@
 	}
 }
 
-func (q *Kernel) transform_RGBA_Gray_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Gray, sr image.Rectangle, bias image.Point, xscale, yscale float64) {
-	// When shrinking, broaden the effective kernel support so that we still
-	// visit every source pixel.
-	xHalfWidth, xKernelArgScale := q.Support, 1.0
-	if xscale > 1 {
-		xHalfWidth *= xscale
-		xKernelArgScale = 1 / xscale
-	}
-	yHalfWidth, yKernelArgScale := q.Support, 1.0
-	if yscale > 1 {
-		yHalfWidth *= yscale
-		yKernelArgScale = 1 / yscale
-	}
-
-	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
-	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))
-
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		dyf := float64(dr.Min.Y+int(dy)) + 0.5
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
-				continue
-			}
-
-			// TODO: adjust the bias so that we can use int(f) instead
-			// of math.Floor(f) and math.Ceil(f).
-			sx += float64(bias.X)
-			sx -= 0.5
-			ix := int(math.Floor(sx - xHalfWidth))
-			if ix < sr.Min.X {
-				ix = sr.Min.X
-			}
-			jx := int(math.Ceil(sx + xHalfWidth))
-			if jx > sr.Max.X {
-				jx = sr.Max.X
-			}
-
-			totalXWeight := 0.0
-			for kx := ix; kx < jx; kx++ {
-				xWeight := 0.0
-				if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
-					xWeight = q.At(t)
-				}
-				xWeights[kx-ix] = xWeight
-				totalXWeight += xWeight
-			}
-			for x := range xWeights[:jx-ix] {
-				xWeights[x] /= totalXWeight
-			}
-
-			sy += float64(bias.Y)
-			sy -= 0.5
-			iy := int(math.Floor(sy - yHalfWidth))
-			if iy < sr.Min.Y {
-				iy = sr.Min.Y
-			}
-			jy := int(math.Ceil(sy + yHalfWidth))
-			if jy > sr.Max.Y {
-				jy = sr.Max.Y
-			}
-
-			totalYWeight := 0.0
-			for ky := iy; ky < jy; ky++ {
-				yWeight := 0.0
-				if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
-					yWeight = q.At(t)
-				}
-				yWeights[ky-iy] = yWeight
-				totalYWeight += yWeight
-			}
-			for y := range yWeights[:jy-iy] {
-				yWeights[y] /= totalYWeight
-			}
-
-			var pr float64
-			for ky := iy; ky < jy; ky++ {
-				if yWeight := yWeights[ky-iy]; yWeight != 0 {
-					for kx := ix; kx < jx; kx++ {
-						if w := xWeights[kx-ix] * yWeight; w != 0 {
-							pi := (ky-src.Rect.Min.Y)*src.Stride + (kx - src.Rect.Min.X)
-							pru := uint32(src.Pix[pi]) * 0x101
-							pr += float64(pru) * w
-						}
-					}
-				}
-			}
-			out := uint8(fffftou(pr) >> 8)
-			dst.Pix[d+0] = out
-			dst.Pix[d+1] = out
-			dst.Pix[d+2] = out
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
 func (q *Kernel) transform_RGBA_Gray_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Gray, sr image.Rectangle, bias image.Point, xscale, yscale float64) {
 	// When shrinking, broaden the effective kernel support so that we still
 	// visit every source pixel.
@@ -7131,502 +5083,6 @@
 	}
 }
 
-func (q *Kernel) transform_RGBA_YCbCr444_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point, xscale, yscale float64) {
-	// When shrinking, broaden the effective kernel support so that we still
-	// visit every source pixel.
-	xHalfWidth, xKernelArgScale := q.Support, 1.0
-	if xscale > 1 {
-		xHalfWidth *= xscale
-		xKernelArgScale = 1 / xscale
-	}
-	yHalfWidth, yKernelArgScale := q.Support, 1.0
-	if yscale > 1 {
-		yHalfWidth *= yscale
-		yKernelArgScale = 1 / yscale
-	}
-
-	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
-	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))
-
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		dyf := float64(dr.Min.Y+int(dy)) + 0.5
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
-				continue
-			}
-
-			// TODO: adjust the bias so that we can use int(f) instead
-			// of math.Floor(f) and math.Ceil(f).
-			sx += float64(bias.X)
-			sx -= 0.5
-			ix := int(math.Floor(sx - xHalfWidth))
-			if ix < sr.Min.X {
-				ix = sr.Min.X
-			}
-			jx := int(math.Ceil(sx + xHalfWidth))
-			if jx > sr.Max.X {
-				jx = sr.Max.X
-			}
-
-			totalXWeight := 0.0
-			for kx := ix; kx < jx; kx++ {
-				xWeight := 0.0
-				if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
-					xWeight = q.At(t)
-				}
-				xWeights[kx-ix] = xWeight
-				totalXWeight += xWeight
-			}
-			for x := range xWeights[:jx-ix] {
-				xWeights[x] /= totalXWeight
-			}
-
-			sy += float64(bias.Y)
-			sy -= 0.5
-			iy := int(math.Floor(sy - yHalfWidth))
-			if iy < sr.Min.Y {
-				iy = sr.Min.Y
-			}
-			jy := int(math.Ceil(sy + yHalfWidth))
-			if jy > sr.Max.Y {
-				jy = sr.Max.Y
-			}
-
-			totalYWeight := 0.0
-			for ky := iy; ky < jy; ky++ {
-				yWeight := 0.0
-				if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
-					yWeight = q.At(t)
-				}
-				yWeights[ky-iy] = yWeight
-				totalYWeight += yWeight
-			}
-			for y := range yWeights[:jy-iy] {
-				yWeights[y] /= totalYWeight
-			}
-
-			var pr, pg, pb float64
-			for ky := iy; ky < jy; ky++ {
-				if yWeight := yWeights[ky-iy]; yWeight != 0 {
-					for kx := ix; kx < jx; kx++ {
-						if w := xWeights[kx-ix] * yWeight; w != 0 {
-							pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
-							pj := (ky-src.Rect.Min.Y)*src.CStride + (kx - src.Rect.Min.X)
-
-							// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-							pyy1 := int(src.Y[pi])<<16 + 1<<15
-							pcb1 := int(src.Cb[pj]) - 128
-							pcr1 := int(src.Cr[pj]) - 128
-							pru := (pyy1 + 91881*pcr1) >> 8
-							pgu := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
-							pbu := (pyy1 + 116130*pcb1) >> 8
-							if pru < 0 {
-								pru = 0
-							} else if pru > 0xffff {
-								pru = 0xffff
-							}
-							if pgu < 0 {
-								pgu = 0
-							} else if pgu > 0xffff {
-								pgu = 0xffff
-							}
-							if pbu < 0 {
-								pbu = 0
-							} else if pbu > 0xffff {
-								pbu = 0xffff
-							}
-
-							pr += float64(pru) * w
-							pg += float64(pgu) * w
-							pb += float64(pbu) * w
-						}
-					}
-				}
-			}
-			dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
-			dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
-			dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
-func (q *Kernel) transform_RGBA_YCbCr422_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point, xscale, yscale float64) {
-	// When shrinking, broaden the effective kernel support so that we still
-	// visit every source pixel.
-	xHalfWidth, xKernelArgScale := q.Support, 1.0
-	if xscale > 1 {
-		xHalfWidth *= xscale
-		xKernelArgScale = 1 / xscale
-	}
-	yHalfWidth, yKernelArgScale := q.Support, 1.0
-	if yscale > 1 {
-		yHalfWidth *= yscale
-		yKernelArgScale = 1 / yscale
-	}
-
-	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
-	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))
-
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		dyf := float64(dr.Min.Y+int(dy)) + 0.5
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
-				continue
-			}
-
-			// TODO: adjust the bias so that we can use int(f) instead
-			// of math.Floor(f) and math.Ceil(f).
-			sx += float64(bias.X)
-			sx -= 0.5
-			ix := int(math.Floor(sx - xHalfWidth))
-			if ix < sr.Min.X {
-				ix = sr.Min.X
-			}
-			jx := int(math.Ceil(sx + xHalfWidth))
-			if jx > sr.Max.X {
-				jx = sr.Max.X
-			}
-
-			totalXWeight := 0.0
-			for kx := ix; kx < jx; kx++ {
-				xWeight := 0.0
-				if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
-					xWeight = q.At(t)
-				}
-				xWeights[kx-ix] = xWeight
-				totalXWeight += xWeight
-			}
-			for x := range xWeights[:jx-ix] {
-				xWeights[x] /= totalXWeight
-			}
-
-			sy += float64(bias.Y)
-			sy -= 0.5
-			iy := int(math.Floor(sy - yHalfWidth))
-			if iy < sr.Min.Y {
-				iy = sr.Min.Y
-			}
-			jy := int(math.Ceil(sy + yHalfWidth))
-			if jy > sr.Max.Y {
-				jy = sr.Max.Y
-			}
-
-			totalYWeight := 0.0
-			for ky := iy; ky < jy; ky++ {
-				yWeight := 0.0
-				if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
-					yWeight = q.At(t)
-				}
-				yWeights[ky-iy] = yWeight
-				totalYWeight += yWeight
-			}
-			for y := range yWeights[:jy-iy] {
-				yWeights[y] /= totalYWeight
-			}
-
-			var pr, pg, pb float64
-			for ky := iy; ky < jy; ky++ {
-				if yWeight := yWeights[ky-iy]; yWeight != 0 {
-					for kx := ix; kx < jx; kx++ {
-						if w := xWeights[kx-ix] * yWeight; w != 0 {
-							pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
-							pj := (ky-src.Rect.Min.Y)*src.CStride + ((kx)/2 - src.Rect.Min.X/2)
-
-							// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-							pyy1 := int(src.Y[pi])<<16 + 1<<15
-							pcb1 := int(src.Cb[pj]) - 128
-							pcr1 := int(src.Cr[pj]) - 128
-							pru := (pyy1 + 91881*pcr1) >> 8
-							pgu := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
-							pbu := (pyy1 + 116130*pcb1) >> 8
-							if pru < 0 {
-								pru = 0
-							} else if pru > 0xffff {
-								pru = 0xffff
-							}
-							if pgu < 0 {
-								pgu = 0
-							} else if pgu > 0xffff {
-								pgu = 0xffff
-							}
-							if pbu < 0 {
-								pbu = 0
-							} else if pbu > 0xffff {
-								pbu = 0xffff
-							}
-
-							pr += float64(pru) * w
-							pg += float64(pgu) * w
-							pb += float64(pbu) * w
-						}
-					}
-				}
-			}
-			dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
-			dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
-			dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
-func (q *Kernel) transform_RGBA_YCbCr420_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point, xscale, yscale float64) {
-	// When shrinking, broaden the effective kernel support so that we still
-	// visit every source pixel.
-	xHalfWidth, xKernelArgScale := q.Support, 1.0
-	if xscale > 1 {
-		xHalfWidth *= xscale
-		xKernelArgScale = 1 / xscale
-	}
-	yHalfWidth, yKernelArgScale := q.Support, 1.0
-	if yscale > 1 {
-		yHalfWidth *= yscale
-		yKernelArgScale = 1 / yscale
-	}
-
-	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
-	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))
-
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		dyf := float64(dr.Min.Y+int(dy)) + 0.5
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
-				continue
-			}
-
-			// TODO: adjust the bias so that we can use int(f) instead
-			// of math.Floor(f) and math.Ceil(f).
-			sx += float64(bias.X)
-			sx -= 0.5
-			ix := int(math.Floor(sx - xHalfWidth))
-			if ix < sr.Min.X {
-				ix = sr.Min.X
-			}
-			jx := int(math.Ceil(sx + xHalfWidth))
-			if jx > sr.Max.X {
-				jx = sr.Max.X
-			}
-
-			totalXWeight := 0.0
-			for kx := ix; kx < jx; kx++ {
-				xWeight := 0.0
-				if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
-					xWeight = q.At(t)
-				}
-				xWeights[kx-ix] = xWeight
-				totalXWeight += xWeight
-			}
-			for x := range xWeights[:jx-ix] {
-				xWeights[x] /= totalXWeight
-			}
-
-			sy += float64(bias.Y)
-			sy -= 0.5
-			iy := int(math.Floor(sy - yHalfWidth))
-			if iy < sr.Min.Y {
-				iy = sr.Min.Y
-			}
-			jy := int(math.Ceil(sy + yHalfWidth))
-			if jy > sr.Max.Y {
-				jy = sr.Max.Y
-			}
-
-			totalYWeight := 0.0
-			for ky := iy; ky < jy; ky++ {
-				yWeight := 0.0
-				if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
-					yWeight = q.At(t)
-				}
-				yWeights[ky-iy] = yWeight
-				totalYWeight += yWeight
-			}
-			for y := range yWeights[:jy-iy] {
-				yWeights[y] /= totalYWeight
-			}
-
-			var pr, pg, pb float64
-			for ky := iy; ky < jy; ky++ {
-				if yWeight := yWeights[ky-iy]; yWeight != 0 {
-					for kx := ix; kx < jx; kx++ {
-						if w := xWeights[kx-ix] * yWeight; w != 0 {
-							pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
-							pj := ((ky)/2-src.Rect.Min.Y/2)*src.CStride + ((kx)/2 - src.Rect.Min.X/2)
-
-							// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-							pyy1 := int(src.Y[pi])<<16 + 1<<15
-							pcb1 := int(src.Cb[pj]) - 128
-							pcr1 := int(src.Cr[pj]) - 128
-							pru := (pyy1 + 91881*pcr1) >> 8
-							pgu := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
-							pbu := (pyy1 + 116130*pcb1) >> 8
-							if pru < 0 {
-								pru = 0
-							} else if pru > 0xffff {
-								pru = 0xffff
-							}
-							if pgu < 0 {
-								pgu = 0
-							} else if pgu > 0xffff {
-								pgu = 0xffff
-							}
-							if pbu < 0 {
-								pbu = 0
-							} else if pbu > 0xffff {
-								pbu = 0xffff
-							}
-
-							pr += float64(pru) * w
-							pg += float64(pgu) * w
-							pb += float64(pbu) * w
-						}
-					}
-				}
-			}
-			dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
-			dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
-			dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
-func (q *Kernel) transform_RGBA_YCbCr440_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point, xscale, yscale float64) {
-	// When shrinking, broaden the effective kernel support so that we still
-	// visit every source pixel.
-	xHalfWidth, xKernelArgScale := q.Support, 1.0
-	if xscale > 1 {
-		xHalfWidth *= xscale
-		xKernelArgScale = 1 / xscale
-	}
-	yHalfWidth, yKernelArgScale := q.Support, 1.0
-	if yscale > 1 {
-		yHalfWidth *= yscale
-		yKernelArgScale = 1 / yscale
-	}
-
-	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
-	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))
-
-	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
-		dyf := float64(dr.Min.Y+int(dy)) + 0.5
-		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
-		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
-			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
-			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
-				continue
-			}
-
-			// TODO: adjust the bias so that we can use int(f) instead
-			// of math.Floor(f) and math.Ceil(f).
-			sx += float64(bias.X)
-			sx -= 0.5
-			ix := int(math.Floor(sx - xHalfWidth))
-			if ix < sr.Min.X {
-				ix = sr.Min.X
-			}
-			jx := int(math.Ceil(sx + xHalfWidth))
-			if jx > sr.Max.X {
-				jx = sr.Max.X
-			}
-
-			totalXWeight := 0.0
-			for kx := ix; kx < jx; kx++ {
-				xWeight := 0.0
-				if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
-					xWeight = q.At(t)
-				}
-				xWeights[kx-ix] = xWeight
-				totalXWeight += xWeight
-			}
-			for x := range xWeights[:jx-ix] {
-				xWeights[x] /= totalXWeight
-			}
-
-			sy += float64(bias.Y)
-			sy -= 0.5
-			iy := int(math.Floor(sy - yHalfWidth))
-			if iy < sr.Min.Y {
-				iy = sr.Min.Y
-			}
-			jy := int(math.Ceil(sy + yHalfWidth))
-			if jy > sr.Max.Y {
-				jy = sr.Max.Y
-			}
-
-			totalYWeight := 0.0
-			for ky := iy; ky < jy; ky++ {
-				yWeight := 0.0
-				if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
-					yWeight = q.At(t)
-				}
-				yWeights[ky-iy] = yWeight
-				totalYWeight += yWeight
-			}
-			for y := range yWeights[:jy-iy] {
-				yWeights[y] /= totalYWeight
-			}
-
-			var pr, pg, pb float64
-			for ky := iy; ky < jy; ky++ {
-				if yWeight := yWeights[ky-iy]; yWeight != 0 {
-					for kx := ix; kx < jx; kx++ {
-						if w := xWeights[kx-ix] * yWeight; w != 0 {
-							pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
-							pj := ((ky)/2-src.Rect.Min.Y/2)*src.CStride + (kx - src.Rect.Min.X)
-
-							// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
-							pyy1 := int(src.Y[pi])<<16 + 1<<15
-							pcb1 := int(src.Cb[pj]) - 128
-							pcr1 := int(src.Cr[pj]) - 128
-							pru := (pyy1 + 91881*pcr1) >> 8
-							pgu := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
-							pbu := (pyy1 + 116130*pcb1) >> 8
-							if pru < 0 {
-								pru = 0
-							} else if pru > 0xffff {
-								pru = 0xffff
-							}
-							if pgu < 0 {
-								pgu = 0
-							} else if pgu > 0xffff {
-								pgu = 0xffff
-							}
-							if pbu < 0 {
-								pbu = 0
-							} else if pbu > 0xffff {
-								pbu = 0xffff
-							}
-
-							pr += float64(pru) * w
-							pg += float64(pgu) * w
-							pb += float64(pbu) * w
-						}
-					}
-				}
-			}
-			dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
-			dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
-			dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
-			dst.Pix[d+3] = 0xff
-		}
-	}
-}
-
 func (q *Kernel) transform_RGBA_YCbCr444_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point, xscale, yscale float64) {
 	// When shrinking, broaden the effective kernel support so that we still
 	// visit every source pixel.
diff --git a/draw/scale.go b/draw/scale.go
index 39520f4..00ef1d3 100644
--- a/draw/scale.go
+++ b/draw/scale.go
@@ -431,3 +431,10 @@
 		}
 	}
 }
+
+func opaque(m image.Image) bool {
+	o, ok := m.(interface {
+		Opaque() bool
+	})
+	return ok && o.Opaque()
+}