draw: implement dstMask.

Change-Id: Id63695c3cef67fbe79fa1fbe916b85108f401093
Reviewed-on: https://go-review.googlesource.com/9191
Reviewed-by: Rob Pike <r@golang.org>
diff --git a/draw/gen.go b/draw/gen.go
index 2d24a54..cb5e12f 100644
--- a/draw/gen.go
+++ b/draw/gen.go
@@ -233,6 +233,7 @@
 			return ";"
 		case "Image":
 			return "" +
+				"dstMask, dmp := opts.DstMask, opts.DstMaskP\n" +
 				"dstColorRGBA64 := &color.RGBA64{}\n" +
 				"dstColor := color.Color(dstColorRGBA64)"
 		}
@@ -336,6 +337,13 @@
 			case "Image":
 				return argf(args, ""+
 					"qr, qg, qb, qa := dst.At($0, $1).RGBA()\n"+
+					"if dstMask != nil {\n"+
+					"	_, _, _, ma := dstMask.At(dmp.X + $0, dmp.Y + $1).RGBA()\n"+
+					"	$2r = $2r * ma / 0xffff\n"+
+					"	$2g = $2g * ma / 0xffff\n"+
+					"	$2b = $2b * ma / 0xffff\n"+
+					"	$2a = $2a * ma / 0xffff\n"+
+					"}\n"+
 					"$2a1 := 0xffff - $2a\n"+
 					"dstColorRGBA64.R = uint16(qr*$2a1/0xffff + $2r)\n"+
 					"dstColorRGBA64.G = uint16(qg*$2a1/0xffff + $2g)\n"+
@@ -361,11 +369,26 @@
 				switch d.sType {
 				default:
 					return argf(args, ""+
-						"dstColorRGBA64.R = uint16($2r)\n"+
-						"dstColorRGBA64.G = uint16($2g)\n"+
-						"dstColorRGBA64.B = uint16($2b)\n"+
-						"dstColorRGBA64.A = uint16($2a)\n"+
-						"dst.Set($0, $1, dstColor)",
+						"if dstMask != nil {\n"+
+						"	qr, qg, qb, qa := dst.At($0, $1).RGBA()\n"+
+						"	_, _, _, ma := dstMask.At(dmp.X + $0, dmp.Y + $1).RGBA()\n"+
+						"	pr = pr * ma / 0xffff\n"+
+						"	pg = pg * ma / 0xffff\n"+
+						"	pb = pb * ma / 0xffff\n"+
+						"	pa = pa * ma / 0xffff\n"+
+						"	$2a1 := 0xffff - ma\n"+ // Note that this is ma, not $2a.
+						"	dstColorRGBA64.R = uint16(qr*$2a1/0xffff + $2r)\n"+
+						"	dstColorRGBA64.G = uint16(qg*$2a1/0xffff + $2g)\n"+
+						"	dstColorRGBA64.B = uint16(qb*$2a1/0xffff + $2b)\n"+
+						"	dstColorRGBA64.A = uint16(qa*$2a1/0xffff + $2a)\n"+
+						"	dst.Set($0, $1, dstColor)\n"+
+						"} else {\n"+
+						"	dstColorRGBA64.R = uint16($2r)\n"+
+						"	dstColorRGBA64.G = uint16($2g)\n"+
+						"	dstColorRGBA64.B = uint16($2b)\n"+
+						"	dstColorRGBA64.A = uint16($2a)\n"+
+						"	dst.Set($0, $1, dstColor)\n"+
+						"}",
 					)
 				case "*image.Gray":
 					return argf(args, ""+
@@ -432,6 +455,13 @@
 					"$3g0 := uint32($2($3g * $4))\n"+
 					"$3b0 := uint32($2($3b * $4))\n"+
 					"$3a0 := uint32($2($3a * $4))\n"+
+					"if dstMask != nil {\n"+
+					"	_, _, _, ma := dstMask.At(dmp.X + $0, dmp.Y + $1).RGBA()\n"+
+					"	$3r0 = $3r0 * ma / 0xffff\n"+
+					"	$3g0 = $3g0 * ma / 0xffff\n"+
+					"	$3b0 = $3b0 * ma / 0xffff\n"+
+					"	$3a0 = $3a0 * ma / 0xffff\n"+
+					"}\n"+
 					"$3a1 := 0xffff - $3a0\n"+
 					"dstColorRGBA64.R = uint16(qr*$3a1/0xffff + $3r0)\n"+
 					"dstColorRGBA64.G = uint16(qg*$3a1/0xffff + $3g0)\n"+
@@ -461,11 +491,26 @@
 				switch d.sType {
 				default:
 					ret = argf(args, ""+
-						"dstColorRGBA64.R = $2($3r * $4)\n"+
-						"dstColorRGBA64.G = $2($3g * $4)\n"+
-						"dstColorRGBA64.B = $2($3b * $4)\n"+
-						"dstColorRGBA64.A = $2($3a * $4)\n"+
-						"dst.Set($0, $1, dstColor)",
+						"if dstMask != nil {\n"+
+						"	qr, qg, qb, qa := dst.At($0, $1).RGBA()\n"+
+						"	_, _, _, ma := dstMask.At(dmp.X + $0, dmp.Y + $1).RGBA()\n"+
+						"	pr := uint32($2($3r * $4)) * ma / 0xffff\n"+
+						"	pg := uint32($2($3g * $4)) * ma / 0xffff\n"+
+						"	pb := uint32($2($3b * $4)) * ma / 0xffff\n"+
+						"	pa := uint32($2($3a * $4)) * ma / 0xffff\n"+
+						"	pa1 := 0xffff - ma\n"+ // Note that this is ma, not pa.
+						"	dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr)\n"+
+						"	dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg)\n"+
+						"	dstColorRGBA64.B = uint16(qb*pa1/0xffff + pb)\n"+
+						"	dstColorRGBA64.A = uint16(qa*pa1/0xffff + pa)\n"+
+						"	dst.Set($0, $1, dstColor)\n"+
+						"} else {\n"+
+						"	dstColorRGBA64.R = $2($3r * $4)\n"+
+						"	dstColorRGBA64.G = $2($3g * $4)\n"+
+						"	dstColorRGBA64.B = $2($3b * $4)\n"+
+						"	dstColorRGBA64.A = $2($3a * $4)\n"+
+						"	dst.Set($0, $1, dstColor)\n"+
+						"}",
 					)
 				case "*image.Gray":
 					ret = argf(args, ""+
@@ -1130,8 +1175,16 @@
 				$switchS z.scaleX_$sTypeRN$sratio(tmp, src, sr, &o)
 			}
 
-			// TODO: honor o.DstMask.
-			$switchD z.scaleY_$dTypeRN_$op(dst, dr, adr, tmp, &o)
+			if o.DstMask != nil {
+				switch o.Op {
+				case Over:
+					z.scaleY_Image_Over(dst, dr, adr, tmp, &o)
+				case Src:
+					z.scaleY_Image_Src(dst, dr, adr, tmp, &o)
+				}
+			} else {
+				$switchD z.scaleY_$dTypeRN_$op(dst, dr, adr, tmp, &o)
+			}
 		}
 
 		func (q *Kernel) Transform(dst Image, s2d *f64.Aff3, src image.Image, sr image.Rectangle, opts *Options) {
diff --git a/draw/impl.go b/draw/impl.go
index eb7cda3..d64cb77 100644
--- a/draw/impl.go
+++ b/draw/impl.go
@@ -527,6 +527,7 @@
 	dh2 := uint64(dr.Dy()) * 2
 	sw := uint64(sr.Dx())
 	sh := uint64(sr.Dy())
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
 	dstColorRGBA64 := &color.RGBA64{}
 	dstColor := color.Color(dstColorRGBA64)
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
@@ -535,6 +536,13 @@
 			sx := (2*uint64(dx) + 1) * sw / dw2
 			pr, pg, pb, pa := src.At(sr.Min.X+int(sx), sr.Min.Y+int(sy)).RGBA()
 			qr, qg, qb, qa := dst.At(dr.Min.X+int(dx), dr.Min.Y+int(dy)).RGBA()
+			if dstMask != nil {
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
+				pr = pr * ma / 0xffff
+				pg = pg * ma / 0xffff
+				pb = pb * ma / 0xffff
+				pa = pa * ma / 0xffff
+			}
 			pa1 := 0xffff - pa
 			dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr)
 			dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg)
@@ -550,6 +558,7 @@
 	dh2 := uint64(dr.Dy()) * 2
 	sw := uint64(sr.Dx())
 	sh := uint64(sr.Dy())
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
 	dstColorRGBA64 := &color.RGBA64{}
 	dstColor := color.Color(dstColorRGBA64)
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
@@ -557,11 +566,26 @@
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
 			sx := (2*uint64(dx) + 1) * sw / dw2
 			pr, pg, pb, pa := src.At(sr.Min.X+int(sx), sr.Min.Y+int(sy)).RGBA()
-			dstColorRGBA64.R = uint16(pr)
-			dstColorRGBA64.G = uint16(pg)
-			dstColorRGBA64.B = uint16(pb)
-			dstColorRGBA64.A = uint16(pa)
-			dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
+			if dstMask != nil {
+				qr, qg, qb, qa := dst.At(dr.Min.X+int(dx), dr.Min.Y+int(dy)).RGBA()
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
+				pr = pr * ma / 0xffff
+				pg = pg * ma / 0xffff
+				pb = pb * ma / 0xffff
+				pa = pa * ma / 0xffff
+				pa1 := 0xffff - ma
+				dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr)
+				dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg)
+				dstColorRGBA64.B = uint16(qb*pa1/0xffff + pb)
+				dstColorRGBA64.A = uint16(qa*pa1/0xffff + pa)
+				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
+			} else {
+				dstColorRGBA64.R = uint16(pr)
+				dstColorRGBA64.G = uint16(pg)
+				dstColorRGBA64.B = uint16(pb)
+				dstColorRGBA64.A = uint16(pa)
+				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
+			}
 		}
 	}
 }
@@ -904,6 +928,7 @@
 }
 
 func (nnInterpolator) transform_Image_Image_Over(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, opts *Options) {
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
 	dstColorRGBA64 := &color.RGBA64{}
 	dstColor := color.Color(dstColorRGBA64)
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
@@ -917,6 +942,13 @@
 			}
 			pr, pg, pb, pa := src.At(sx0, sy0).RGBA()
 			qr, qg, qb, qa := dst.At(dr.Min.X+int(dx), dr.Min.Y+int(dy)).RGBA()
+			if dstMask != nil {
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
+				pr = pr * ma / 0xffff
+				pg = pg * ma / 0xffff
+				pb = pb * ma / 0xffff
+				pa = pa * ma / 0xffff
+			}
 			pa1 := 0xffff - pa
 			dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr)
 			dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg)
@@ -928,6 +960,7 @@
 }
 
 func (nnInterpolator) transform_Image_Image_Src(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, opts *Options) {
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
 	dstColorRGBA64 := &color.RGBA64{}
 	dstColor := color.Color(dstColorRGBA64)
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
@@ -940,11 +973,26 @@
 				continue
 			}
 			pr, pg, pb, pa := src.At(sx0, sy0).RGBA()
-			dstColorRGBA64.R = uint16(pr)
-			dstColorRGBA64.G = uint16(pg)
-			dstColorRGBA64.B = uint16(pb)
-			dstColorRGBA64.A = uint16(pa)
-			dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
+			if dstMask != nil {
+				qr, qg, qb, qa := dst.At(dr.Min.X+int(dx), dr.Min.Y+int(dy)).RGBA()
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
+				pr = pr * ma / 0xffff
+				pg = pg * ma / 0xffff
+				pb = pb * ma / 0xffff
+				pa = pa * ma / 0xffff
+				pa1 := 0xffff - ma
+				dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr)
+				dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg)
+				dstColorRGBA64.B = uint16(qb*pa1/0xffff + pb)
+				dstColorRGBA64.A = uint16(qa*pa1/0xffff + pa)
+				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
+			} else {
+				dstColorRGBA64.R = uint16(pr)
+				dstColorRGBA64.G = uint16(pg)
+				dstColorRGBA64.B = uint16(pb)
+				dstColorRGBA64.A = uint16(pa)
+				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
+			}
 		}
 	}
 }
@@ -2470,6 +2518,7 @@
 	yscale := float64(sh) / float64(dr.Dy())
 	xscale := float64(sw) / float64(dr.Dx())
 	swMinus1, shMinus1 := sw-1, sh-1
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
 	dstColorRGBA64 := &color.RGBA64{}
 	dstColor := color.Color(dstColorRGBA64)
 
@@ -2541,6 +2590,13 @@
 			pb := uint32(s11b)
 			pa := uint32(s11a)
 			qr, qg, qb, qa := dst.At(dr.Min.X+int(dx), dr.Min.Y+int(dy)).RGBA()
+			if dstMask != nil {
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
+				pr = pr * ma / 0xffff
+				pg = pg * ma / 0xffff
+				pb = pb * ma / 0xffff
+				pa = pa * ma / 0xffff
+			}
 			pa1 := 0xffff - pa
 			dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr)
 			dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg)
@@ -2557,6 +2613,7 @@
 	yscale := float64(sh) / float64(dr.Dy())
 	xscale := float64(sw) / float64(dr.Dx())
 	swMinus1, shMinus1 := sw-1, sh-1
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
 	dstColorRGBA64 := &color.RGBA64{}
 	dstColor := color.Color(dstColorRGBA64)
 
@@ -2627,11 +2684,26 @@
 			pg := uint32(s11g)
 			pb := uint32(s11b)
 			pa := uint32(s11a)
-			dstColorRGBA64.R = uint16(pr)
-			dstColorRGBA64.G = uint16(pg)
-			dstColorRGBA64.B = uint16(pb)
-			dstColorRGBA64.A = uint16(pa)
-			dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
+			if dstMask != nil {
+				qr, qg, qb, qa := dst.At(dr.Min.X+int(dx), dr.Min.Y+int(dy)).RGBA()
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
+				pr = pr * ma / 0xffff
+				pg = pg * ma / 0xffff
+				pb = pb * ma / 0xffff
+				pa = pa * ma / 0xffff
+				pa1 := 0xffff - ma
+				dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr)
+				dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg)
+				dstColorRGBA64.B = uint16(qb*pa1/0xffff + pb)
+				dstColorRGBA64.A = uint16(qa*pa1/0xffff + pa)
+				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
+			} else {
+				dstColorRGBA64.R = uint16(pr)
+				dstColorRGBA64.G = uint16(pg)
+				dstColorRGBA64.B = uint16(pb)
+				dstColorRGBA64.A = uint16(pa)
+				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
+			}
 		}
 	}
 }
@@ -3977,6 +4049,7 @@
 }
 
 func (ablInterpolator) transform_Image_Image_Over(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, opts *Options) {
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
 	dstColorRGBA64 := &color.RGBA64{}
 	dstColor := color.Color(dstColorRGBA64)
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
@@ -4054,6 +4127,13 @@
 			pb := uint32(s11b)
 			pa := uint32(s11a)
 			qr, qg, qb, qa := dst.At(dr.Min.X+int(dx), dr.Min.Y+int(dy)).RGBA()
+			if dstMask != nil {
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
+				pr = pr * ma / 0xffff
+				pg = pg * ma / 0xffff
+				pb = pb * ma / 0xffff
+				pa = pa * ma / 0xffff
+			}
 			pa1 := 0xffff - pa
 			dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr)
 			dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg)
@@ -4065,6 +4145,7 @@
 }
 
 func (ablInterpolator) transform_Image_Image_Src(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, opts *Options) {
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
 	dstColorRGBA64 := &color.RGBA64{}
 	dstColor := color.Color(dstColorRGBA64)
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
@@ -4141,11 +4222,26 @@
 			pg := uint32(s11g)
 			pb := uint32(s11b)
 			pa := uint32(s11a)
-			dstColorRGBA64.R = uint16(pr)
-			dstColorRGBA64.G = uint16(pg)
-			dstColorRGBA64.B = uint16(pb)
-			dstColorRGBA64.A = uint16(pa)
-			dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
+			if dstMask != nil {
+				qr, qg, qb, qa := dst.At(dr.Min.X+int(dx), dr.Min.Y+int(dy)).RGBA()
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
+				pr = pr * ma / 0xffff
+				pg = pg * ma / 0xffff
+				pb = pb * ma / 0xffff
+				pa = pa * ma / 0xffff
+				pa1 := 0xffff - ma
+				dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr)
+				dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg)
+				dstColorRGBA64.B = uint16(qb*pa1/0xffff + pb)
+				dstColorRGBA64.A = uint16(qa*pa1/0xffff + pa)
+				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
+			} else {
+				dstColorRGBA64.R = uint16(pr)
+				dstColorRGBA64.G = uint16(pg)
+				dstColorRGBA64.B = uint16(pb)
+				dstColorRGBA64.A = uint16(pa)
+				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
+			}
 		}
 	}
 }
@@ -4221,22 +4317,30 @@
 		}
 	}
 
-	// TODO: honor o.DstMask.
-	switch o.Op {
-	case Over:
-		switch dst := dst.(type) {
-		case *image.RGBA:
-			z.scaleY_RGBA_Over(dst, dr, adr, tmp, &o)
-		default:
+	if o.DstMask != nil {
+		switch o.Op {
+		case Over:
 			z.scaleY_Image_Over(dst, dr, adr, tmp, &o)
-		}
-	case Src:
-		switch dst := dst.(type) {
-		case *image.RGBA:
-			z.scaleY_RGBA_Src(dst, dr, adr, tmp, &o)
-		default:
+		case Src:
 			z.scaleY_Image_Src(dst, dr, adr, tmp, &o)
 		}
+	} else {
+		switch o.Op {
+		case Over:
+			switch dst := dst.(type) {
+			case *image.RGBA:
+				z.scaleY_RGBA_Over(dst, dr, adr, tmp, &o)
+			default:
+				z.scaleY_Image_Over(dst, dr, adr, tmp, &o)
+			}
+		case Src:
+			switch dst := dst.(type) {
+			case *image.RGBA:
+				z.scaleY_RGBA_Src(dst, dr, adr, tmp, &o)
+			default:
+				z.scaleY_Image_Src(dst, dr, adr, tmp, &o)
+			}
+		}
 	}
 }
 
@@ -4710,6 +4814,7 @@
 }
 
 func (z *kernelScaler) scaleY_Image_Over(dst Image, dr, adr image.Rectangle, tmp [][4]float64, opts *Options) {
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
 	dstColorRGBA64 := &color.RGBA64{}
 	dstColor := color.Color(dstColorRGBA64)
 	for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
@@ -4738,6 +4843,13 @@
 			pg0 := uint32(ftou(pg * s.invTotalWeight))
 			pb0 := uint32(ftou(pb * s.invTotalWeight))
 			pa0 := uint32(ftou(pa * s.invTotalWeight))
+			if dstMask != nil {
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(adr.Min.Y+dy)).RGBA()
+				pr0 = pr0 * ma / 0xffff
+				pg0 = pg0 * ma / 0xffff
+				pb0 = pb0 * ma / 0xffff
+				pa0 = pa0 * ma / 0xffff
+			}
 			pa1 := 0xffff - pa0
 			dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr0)
 			dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg0)
@@ -4749,6 +4861,7 @@
 }
 
 func (z *kernelScaler) scaleY_Image_Src(dst Image, dr, adr image.Rectangle, tmp [][4]float64, opts *Options) {
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
 	dstColorRGBA64 := &color.RGBA64{}
 	dstColor := color.Color(dstColorRGBA64)
 	for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
@@ -4772,11 +4885,26 @@
 				pb = pa
 			}
 
-			dstColorRGBA64.R = ftou(pr * s.invTotalWeight)
-			dstColorRGBA64.G = ftou(pg * s.invTotalWeight)
-			dstColorRGBA64.B = ftou(pb * s.invTotalWeight)
-			dstColorRGBA64.A = ftou(pa * s.invTotalWeight)
-			dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(adr.Min.Y+dy), dstColor)
+			if dstMask != nil {
+				qr, qg, qb, qa := dst.At(dr.Min.X+int(dx), dr.Min.Y+int(adr.Min.Y+dy)).RGBA()
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(adr.Min.Y+dy)).RGBA()
+				pr := uint32(ftou(pr*s.invTotalWeight)) * ma / 0xffff
+				pg := uint32(ftou(pg*s.invTotalWeight)) * ma / 0xffff
+				pb := uint32(ftou(pb*s.invTotalWeight)) * ma / 0xffff
+				pa := uint32(ftou(pa*s.invTotalWeight)) * ma / 0xffff
+				pa1 := 0xffff - ma
+				dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr)
+				dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg)
+				dstColorRGBA64.B = uint16(qb*pa1/0xffff + pb)
+				dstColorRGBA64.A = uint16(qa*pa1/0xffff + pa)
+				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(adr.Min.Y+dy), dstColor)
+			} else {
+				dstColorRGBA64.R = ftou(pr * s.invTotalWeight)
+				dstColorRGBA64.G = ftou(pg * s.invTotalWeight)
+				dstColorRGBA64.B = ftou(pb * s.invTotalWeight)
+				dstColorRGBA64.A = ftou(pa * s.invTotalWeight)
+				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(adr.Min.Y+dy), dstColor)
+			}
 		}
 	}
 }
@@ -6090,6 +6218,7 @@
 	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
 	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))
 
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
 	dstColorRGBA64 := &color.RGBA64{}
 	dstColor := color.Color(dstColorRGBA64)
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
@@ -6182,6 +6311,13 @@
 			pg0 := uint32(fffftou(pg))
 			pb0 := uint32(fffftou(pb))
 			pa0 := uint32(fffftou(pa))
+			if dstMask != nil {
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
+				pr0 = pr0 * ma / 0xffff
+				pg0 = pg0 * ma / 0xffff
+				pb0 = pb0 * ma / 0xffff
+				pa0 = pa0 * ma / 0xffff
+			}
 			pa1 := 0xffff - pa0
 			dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr0)
 			dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg0)
@@ -6209,6 +6345,7 @@
 	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
 	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))
 
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
 	dstColorRGBA64 := &color.RGBA64{}
 	dstColor := color.Color(dstColorRGBA64)
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
@@ -6296,11 +6433,26 @@
 				pb = pa
 			}
 
-			dstColorRGBA64.R = fffftou(pr)
-			dstColorRGBA64.G = fffftou(pg)
-			dstColorRGBA64.B = fffftou(pb)
-			dstColorRGBA64.A = fffftou(pa)
-			dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
+			if dstMask != nil {
+				qr, qg, qb, qa := dst.At(dr.Min.X+int(dx), dr.Min.Y+int(dy)).RGBA()
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
+				pr := uint32(fffftou(pr)) * ma / 0xffff
+				pg := uint32(fffftou(pg)) * ma / 0xffff
+				pb := uint32(fffftou(pb)) * ma / 0xffff
+				pa := uint32(fffftou(pa)) * ma / 0xffff
+				pa1 := 0xffff - ma
+				dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr)
+				dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg)
+				dstColorRGBA64.B = uint16(qb*pa1/0xffff + pb)
+				dstColorRGBA64.A = uint16(qa*pa1/0xffff + pa)
+				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
+			} else {
+				dstColorRGBA64.R = fffftou(pr)
+				dstColorRGBA64.G = fffftou(pg)
+				dstColorRGBA64.B = fffftou(pb)
+				dstColorRGBA64.A = fffftou(pa)
+				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
+			}
 		}
 	}
 }
diff --git a/draw/scale.go b/draw/scale.go
index 85659cf..248c083 100644
--- a/draw/scale.go
+++ b/draw/scale.go
@@ -24,8 +24,12 @@
 		o = *opts
 	}
 	dr := sr.Add(dp.Sub(sr.Min))
-	// TODO: honor o.DstMask and o.SrcMask.
-	DrawMask(dst, dr, src, sr.Min, nil, image.Point{}, o.Op)
+	if o.DstMask == nil {
+		// TODO: honor o.SrcMask.
+		DrawMask(dst, dr, src, sr.Min, nil, image.Point{}, o.Op)
+	} else {
+		NearestNeighbor.Scale(dst, dr, src, sr, opts)
+	}
 }
 
 // Scaler scales the part of the source image defined by src and sr and writes
@@ -89,7 +93,7 @@
 	DstMaskP image.Point
 	SrcMask  image.Image
 	SrcMaskP image.Point
-	// TODO: actually implement DstMask and SrcMask.
+	// TODO: actually implement SrcMask.
 
 	// TODO: a smooth vs sharp edges option, for arbitrary rotations?
 }
diff --git a/draw/scale_test.go b/draw/scale_test.go
index a1fb5cc..4a79339 100644
--- a/draw/scale_test.go
+++ b/draw/scale_test.go
@@ -316,6 +316,93 @@
 	}
 }
 
+func TestDstMask(t *testing.T) {
+	dstMask := image.NewRGBA(image.Rect(0, 0, 23, 1))
+	dstMask.SetRGBA(19, 0, color.RGBA{0x00, 0x00, 0x00, 0x7f})
+	dstMask.SetRGBA(20, 0, color.RGBA{0x00, 0x00, 0x00, 0xff})
+	dstMask.SetRGBA(21, 0, color.RGBA{0x00, 0x00, 0x00, 0x3f})
+	dstMask.SetRGBA(22, 0, color.RGBA{0x00, 0x00, 0x00, 0x00})
+	red := image.NewRGBA(image.Rect(0, 0, 1, 1))
+	red.SetRGBA(0, 0, color.RGBA{0xff, 0x00, 0x00, 0xff})
+	blue := image.NewUniform(color.RGBA{0x00, 0x00, 0xff, 0xff})
+	qs := []Interpolator{
+		NearestNeighbor,
+		ApproxBiLinear,
+		CatmullRom,
+	}
+	for _, q := range qs {
+		dst := image.NewRGBA(image.Rect(0, 0, 3, 1))
+		Copy(dst, image.Point{}, blue, dst.Bounds(), nil)
+		q.Scale(dst, dst.Bounds(), red, red.Bounds(), &Options{
+			DstMask:  dstMask,
+			DstMaskP: image.Point{20, 0},
+		})
+		got := [3]color.RGBA{
+			dst.RGBAAt(0, 0),
+			dst.RGBAAt(1, 0),
+			dst.RGBAAt(2, 0),
+		}
+		want := [3]color.RGBA{
+			{0xff, 0x00, 0x00, 0xff},
+			{0x3f, 0x00, 0xc0, 0xff},
+			{0x00, 0x00, 0xff, 0xff},
+		}
+		if got != want {
+			t.Errorf("q=%T:\ngot  %v\nwant %v", q, got, want)
+		}
+	}
+}
+
+func TestRectDstMask(t *testing.T) {
+	f, err := os.Open("../testdata/testpattern.png")
+	if err != nil {
+		t.Fatalf("Open: %v", err)
+	}
+	defer f.Close()
+	src, _, err := image.Decode(f)
+	if err != nil {
+		t.Fatalf("Decode: %v", err)
+	}
+	m00 := transformMatrix(1, 0, 0)
+
+	bounds := image.Rect(0, 0, 50, 50)
+	dstOutside := image.NewRGBA(bounds)
+	for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
+		for x := bounds.Min.X; x < bounds.Max.X; x++ {
+			dstOutside.SetRGBA(x, y, color.RGBA{uint8(5 * x), uint8(5 * y), 0x00, 0xff})
+		}
+	}
+
+	mk := func(q Transformer, dstMask image.Image) *image.RGBA {
+		m := image.NewRGBA(bounds)
+		Copy(m, bounds.Min, dstOutside, bounds, nil)
+		q.Transform(m, m00, src, src.Bounds(), &Options{DstMask: dstMask})
+		return m
+	}
+
+	rect := image.Rect(20, 10, 30, 40)
+	qs := []Interpolator{
+		NearestNeighbor,
+		ApproxBiLinear,
+		CatmullRom,
+	}
+	for _, q := range qs {
+		dstInside := mk(q, nil)
+		dst := mk(q, rect)
+		for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
+			for x := bounds.Min.X; x < bounds.Max.X; x++ {
+				which := dstOutside
+				if (image.Point{x, y}).In(rect) {
+					which = dstInside
+				}
+				if got, want := dst.RGBAAt(x, y), which.RGBAAt(x, y); got != want {
+					t.Errorf("x=%3d y=%3d: got %v, want %v", x, y, got, want)
+				}
+			}
+		}
+	}
+}
+
 // The fooWrapper types wrap the dst or src image to avoid triggering the
 // type-specific fast path implementations.
 type (