draw: eliminate some math.Floor calls in Transform methods.

benchmark                      old ns/op     new ns/op     delta
BenchmarkTformNNSrcRGBA        524533        293230        -44.10%
BenchmarkTformNNSrcUniform     362974        149389        -58.84%
BenchmarkTformABSrcGray        827461        427720        -48.31%
BenchmarkTformABSrcNRGBA       1286930       919391        -28.56%
BenchmarkTformABSrcRGBA        1115444       794334        -28.79%
BenchmarkTformABSrcYCbCr       1732420       1379846       -20.35%
BenchmarkTformCRSrcGray        3629048       3467404       -4.45%
BenchmarkTformCRSrcNRGBA       7569407       7418874       -1.99%
BenchmarkTformCRSrcRGBA        7552459       7432745       -1.59%
BenchmarkTformCRSrcYCbCr       8072351       7854715       -2.70%

Change-Id: I6c01e631d9f88c36ae99d0cd181104ad5ac48db0
Reviewed-on: https://go-review.googlesource.com/7926
Reviewed-by: Rob Pike <r@golang.org>
diff --git a/draw/gen.go b/draw/gen.go
index 75c906f..b670521 100644
--- a/draw/gen.go
+++ b/draw/gen.go
@@ -739,22 +739,36 @@
 
 		func (z $receiver) Transform(dst Image, s2d *f64.Aff3, src image.Image, sr image.Rectangle, opts *Options) {
 			dr := transformRect(s2d, &sr)
-			// adr is the affected destination pixels, relative to dr.Min.
-			adr := dst.Bounds().Intersect(dr).Sub(dr.Min)
+			// adr is the affected destination pixels.
+			adr := dst.Bounds().Intersect(dr)
 			if adr.Empty() || sr.Empty() {
 				return
 			}
 			d2s := invert(s2d)
+			// bias is a translation of the mapping from dst co-ordinates to
+			// src co-ordinates such that the latter temporarily have
+			// non-negative X and Y co-ordinates. This allows us to write
+			// int(f) instead of int(math.Floor(f)), since "round to zero" and
+			// "round down" are equivalent when f >= 0, but the former is much
+			// cheaper. The X-- and Y-- are because the TransformLeaf methods
+			// have a "sx -= 0.5" adjustment.
+			bias := transformRect(&d2s, &adr).Min
+			bias.X--
+			bias.Y--
+			d2s[2] -= float64(bias.X)
+			d2s[5] -= float64(bias.Y)
+			// Make adr relative to dr.Min.
+			adr = adr.Sub(dr.Min)
 			// sr is the source pixels. If it extends beyond the src bounds,
 			// we cannot use the type-specific fast paths, as they access
 			// the Pix fields directly without bounds checking.
 			if !sr.In(src.Bounds()) {
-				z.transform_Image_Image(dst, dr, adr, &d2s, src, sr)
+				z.transform_Image_Image(dst, dr, adr, &d2s, src, sr, bias)
 			} else if u, ok := src.(*image.Uniform); ok {
 				// TODO: get the Op from opts.
-				transform_Uniform(dst, dr, adr, &d2s, u, sr, Src)
+				transform_Uniform(dst, dr, adr, &d2s, u, sr, bias, Src)
 			} else {
-				$switch z.transform_$dTypeRN_$sTypeRN$sratio(dst, dr, adr, &d2s, src, sr)
+				$switch z.transform_$dTypeRN_$sTypeRN$sratio(dst, dr, adr, &d2s, src, sr, bias)
 			}
 		}
 	`
@@ -779,16 +793,15 @@
 	`
 
 	codeNNTransformLeaf = `
-		func (nnInterpolator) transform_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle) {
+		func (nnInterpolator) transform_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle, bias image.Point) {
 			$preOuter
 			for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 				dyf := float64(dr.Min.Y + int(dy)) + 0.5
 				$preInner
 				for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { $tweakDx
 					dxf := float64(dr.Min.X + int(dx)) + 0.5
-					// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
-					sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2]))
-					sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5]))
+					sx0 := int(d2s[0]*dxf + d2s[1]*dyf + d2s[2]) + bias.X
+					sy0 := int(d2s[3]*dxf + d2s[4]*dyf + d2s[5]) + bias.Y
 					if !(image.Point{sx0, sy0}).In(sr) {
 						continue
 					}
@@ -854,25 +867,24 @@
 	`
 
 	codeABLTransformLeaf = `
-		func (ablInterpolator) transform_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle) {
+		func (ablInterpolator) transform_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle, bias image.Point) {
 			$preOuter
 			for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 				dyf := float64(dr.Min.Y + int(dy)) + 0.5
 				$preInner
 				for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { $tweakDx
 					dxf := float64(dr.Min.X + int(dx)) + 0.5
-					// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
 					sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
 					sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-					if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
+					if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 						continue
 					}
 
 					sx -= 0.5
-					sxf := math.Floor(sx)
-					xFrac0 := sx - sxf
+					sx0 := int(sx)
+					xFrac0 := sx - float64(sx0)
 					xFrac1 := 1 - xFrac0
-					sx0 := int(sxf)
+					sx0 += bias.X
 					sx1 := sx0 + 1
 					if sx0 < sr.Min.X {
 						sx0, sx1 = sr.Min.X, sr.Min.X
@@ -883,10 +895,10 @@
 					}
 
 					sy -= 0.5
-					syf := math.Floor(sy)
-					yFrac0 := sy - syf
+					sy0 := int(sy)
+					yFrac0 := sy - float64(sy0)
 					yFrac1 := 1 - yFrac0
-					sy0 := int(syf)
+					sy0 += bias.Y
 					sy1 := sy0 + 1
 					if sy0 < sr.Min.Y {
 						sy0, sy1 = sr.Min.Y, sr.Min.Y
@@ -947,16 +959,30 @@
 
 		func (q *Kernel) Transform(dst Image, s2d *f64.Aff3, src image.Image, sr image.Rectangle, opts *Options) {
 			dr := transformRect(s2d, &sr)
-			// adr is the affected destination pixels, relative to dr.Min.
-			adr := dst.Bounds().Intersect(dr).Sub(dr.Min)
+			// adr is the affected destination pixels.
+			adr := dst.Bounds().Intersect(dr)
 			if adr.Empty() || sr.Empty() {
 				return
 			}
 			d2s := invert(s2d)
+			// bias is a translation of the mapping from dst co-ordinates to
+			// src co-ordinates such that the latter temporarily have
+			// non-negative X and Y co-ordinates. This allows us to write
+			// int(f) instead of int(math.Floor(f)), since "round to zero" and
+			// "round down" are equivalent when f >= 0, but the former is much
+			// cheaper. The X-- and Y-- are because the TransformLeaf methods
+			// have a "sx -= 0.5" adjustment.
+			bias := transformRect(&d2s, &adr).Min
+			bias.X--
+			bias.Y--
+			d2s[2] -= float64(bias.X)
+			d2s[5] -= float64(bias.Y)
+			// Make adr relative to dr.Min.
+			adr = adr.Sub(dr.Min)
 
 			if u, ok := src.(*image.Uniform); ok && sr.In(src.Bounds()) {
 				// TODO: get the Op from opts.
-				transform_Uniform(dst, dr, adr, &d2s, u, sr, Src)
+				transform_Uniform(dst, dr, adr, &d2s, u, sr, bias, Src)
 				return
 			}
 
@@ -973,9 +999,9 @@
 			// we cannot use the type-specific fast paths, as they access
 			// the Pix fields directly without bounds checking.
 			if !sr.In(src.Bounds()) {
-				q.transform_Image_Image(dst, dr, adr, &d2s, src, sr, xscale, yscale)
+				q.transform_Image_Image(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
 			} else {
-				$switch q.transform_$dTypeRN_$sTypeRN$sratio(dst, dr, adr, &d2s, src, sr, xscale, yscale)
+				$switch q.transform_$dTypeRN_$sTypeRN$sratio(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
 			}
 		}
 	`
@@ -1024,7 +1050,7 @@
 	`
 
 	codeKernelTransformLeaf = `
-		func (q *Kernel) transform_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle, xscale, yscale float64) {
+		func (q *Kernel) transform_$dTypeRN_$sTypeRN$sratio(dst $dType, dr, adr image.Rectangle, d2s *f64.Aff3, src $sType, sr image.Rectangle, bias image.Point, xscale, yscale float64) {
 			// When shrinking, broaden the effective kernel support so that we still
 			// visit every source pixel.
 			xHalfWidth, xKernelArgScale := q.Support, 1.0
@@ -1047,13 +1073,15 @@
 				$preInner
 				for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ { $tweakDx
 					dxf := float64(dr.Min.X + int(dx)) + 0.5
-					// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
 					sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
 					sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-					if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
+					if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 						continue
 					}
 
+					// TODO: adjust the bias so that we can use int(f) instead
+					// of math.Floor(f) and math.Ceil(f).
+					sx += float64(bias.X)
 					sx -= 0.5
 					ix := int(math.Floor(sx - xHalfWidth))
 					if ix < sr.Min.X {
@@ -1077,6 +1105,7 @@
 						xWeights[x] /= totalXWeight
 					}
 
+					sy += float64(bias.Y)
 					sy -= 0.5
 					iy := int(math.Floor(sy - yHalfWidth))
 					if iy < sr.Min.Y {
diff --git a/draw/impl.go b/draw/impl.go
index 8d053d5..9b4ca07 100644
--- a/draw/impl.go
+++ b/draw/impl.go
@@ -61,50 +61,64 @@
 
 func (z nnInterpolator) Transform(dst Image, s2d *f64.Aff3, src image.Image, sr image.Rectangle, opts *Options) {
 	dr := transformRect(s2d, &sr)
-	// adr is the affected destination pixels, relative to dr.Min.
-	adr := dst.Bounds().Intersect(dr).Sub(dr.Min)
+	// adr is the affected destination pixels.
+	adr := dst.Bounds().Intersect(dr)
 	if adr.Empty() || sr.Empty() {
 		return
 	}
 	d2s := invert(s2d)
+	// bias is a translation of the mapping from dst co-ordinates to
+	// src co-ordinates such that the latter temporarily have
+	// non-negative X and Y co-ordinates. This allows us to write
+	// int(f) instead of int(math.Floor(f)), since "round to zero" and
+	// "round down" are equivalent when f >= 0, but the former is much
+	// cheaper. The X-- and Y-- are because the TransformLeaf methods
+	// have a "sx -= 0.5" adjustment.
+	bias := transformRect(&d2s, &adr).Min
+	bias.X--
+	bias.Y--
+	d2s[2] -= float64(bias.X)
+	d2s[5] -= float64(bias.Y)
+	// Make adr relative to dr.Min.
+	adr = adr.Sub(dr.Min)
 	// sr is the source pixels. If it extends beyond the src bounds,
 	// we cannot use the type-specific fast paths, as they access
 	// the Pix fields directly without bounds checking.
 	if !sr.In(src.Bounds()) {
-		z.transform_Image_Image(dst, dr, adr, &d2s, src, sr)
+		z.transform_Image_Image(dst, dr, adr, &d2s, src, sr, bias)
 	} else if u, ok := src.(*image.Uniform); ok {
 		// TODO: get the Op from opts.
-		transform_Uniform(dst, dr, adr, &d2s, u, sr, Src)
+		transform_Uniform(dst, dr, adr, &d2s, u, sr, bias, Src)
 	} else {
 		switch dst := dst.(type) {
 		case *image.RGBA:
 			switch src := src.(type) {
 			case *image.Gray:
-				z.transform_RGBA_Gray(dst, dr, adr, &d2s, src, sr)
+				z.transform_RGBA_Gray(dst, dr, adr, &d2s, src, sr, bias)
 			case *image.NRGBA:
-				z.transform_RGBA_NRGBA(dst, dr, adr, &d2s, src, sr)
+				z.transform_RGBA_NRGBA(dst, dr, adr, &d2s, src, sr, bias)
 			case *image.RGBA:
-				z.transform_RGBA_RGBA(dst, dr, adr, &d2s, src, sr)
+				z.transform_RGBA_RGBA(dst, dr, adr, &d2s, src, sr, bias)
 			case *image.YCbCr:
 				switch src.SubsampleRatio {
 				default:
-					z.transform_RGBA_Image(dst, dr, adr, &d2s, src, sr)
+					z.transform_RGBA_Image(dst, dr, adr, &d2s, src, sr, bias)
 				case image.YCbCrSubsampleRatio444:
-					z.transform_RGBA_YCbCr444(dst, dr, adr, &d2s, src, sr)
+					z.transform_RGBA_YCbCr444(dst, dr, adr, &d2s, src, sr, bias)
 				case image.YCbCrSubsampleRatio422:
-					z.transform_RGBA_YCbCr422(dst, dr, adr, &d2s, src, sr)
+					z.transform_RGBA_YCbCr422(dst, dr, adr, &d2s, src, sr, bias)
 				case image.YCbCrSubsampleRatio420:
-					z.transform_RGBA_YCbCr420(dst, dr, adr, &d2s, src, sr)
+					z.transform_RGBA_YCbCr420(dst, dr, adr, &d2s, src, sr, bias)
 				case image.YCbCrSubsampleRatio440:
-					z.transform_RGBA_YCbCr440(dst, dr, adr, &d2s, src, sr)
+					z.transform_RGBA_YCbCr440(dst, dr, adr, &d2s, src, sr, bias)
 				}
 			default:
-				z.transform_RGBA_Image(dst, dr, adr, &d2s, src, sr)
+				z.transform_RGBA_Image(dst, dr, adr, &d2s, src, sr, bias)
 			}
 		default:
 			switch src := src.(type) {
 			default:
-				z.transform_Image_Image(dst, dr, adr, &d2s, src, sr)
+				z.transform_Image_Image(dst, dr, adr, &d2s, src, sr, bias)
 			}
 		}
 	}
@@ -405,15 +419,14 @@
 	}
 }
 
-func (nnInterpolator) transform_RGBA_Gray(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Gray, sr image.Rectangle) {
+func (nnInterpolator) transform_RGBA_Gray(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Gray, sr image.Rectangle, bias image.Point) {
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
-			sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2]))
-			sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5]))
+			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
+			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -428,15 +441,14 @@
 	}
 }
 
-func (nnInterpolator) transform_RGBA_NRGBA(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.NRGBA, sr image.Rectangle) {
+func (nnInterpolator) transform_RGBA_NRGBA(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.NRGBA, sr image.Rectangle, bias image.Point) {
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
-			sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2]))
-			sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5]))
+			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
+			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -453,15 +465,14 @@
 	}
 }
 
-func (nnInterpolator) transform_RGBA_RGBA(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.RGBA, sr image.Rectangle) {
+func (nnInterpolator) transform_RGBA_RGBA(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.RGBA, sr image.Rectangle, bias image.Point) {
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
-			sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2]))
-			sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5]))
+			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
+			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -478,15 +489,14 @@
 	}
 }
 
-func (nnInterpolator) transform_RGBA_YCbCr444(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle) {
+func (nnInterpolator) transform_RGBA_YCbCr444(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point) {
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
-			sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2]))
-			sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5]))
+			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
+			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -527,15 +537,14 @@
 	}
 }
 
-func (nnInterpolator) transform_RGBA_YCbCr422(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle) {
+func (nnInterpolator) transform_RGBA_YCbCr422(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point) {
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
-			sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2]))
-			sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5]))
+			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
+			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -576,15 +585,14 @@
 	}
 }
 
-func (nnInterpolator) transform_RGBA_YCbCr420(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle) {
+func (nnInterpolator) transform_RGBA_YCbCr420(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point) {
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
-			sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2]))
-			sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5]))
+			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
+			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -625,15 +633,14 @@
 	}
 }
 
-func (nnInterpolator) transform_RGBA_YCbCr440(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle) {
+func (nnInterpolator) transform_RGBA_YCbCr440(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point) {
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
-			sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2]))
-			sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5]))
+			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
+			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -674,15 +681,14 @@
 	}
 }
 
-func (nnInterpolator) transform_RGBA_Image(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle) {
+func (nnInterpolator) transform_RGBA_Image(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point) {
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
-			sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2]))
-			sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5]))
+			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
+			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -695,16 +701,15 @@
 	}
 }
 
-func (nnInterpolator) transform_Image_Image(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle) {
+func (nnInterpolator) transform_Image_Image(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point) {
 	dstColorRGBA64 := &color.RGBA64{}
 	dstColor := color.Color(dstColorRGBA64)
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
-			sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2]))
-			sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5]))
+			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
+			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
 			if !(image.Point{sx0, sy0}).In(sr) {
 				continue
 			}
@@ -769,50 +774,64 @@
 
 func (z ablInterpolator) Transform(dst Image, s2d *f64.Aff3, src image.Image, sr image.Rectangle, opts *Options) {
 	dr := transformRect(s2d, &sr)
-	// adr is the affected destination pixels, relative to dr.Min.
-	adr := dst.Bounds().Intersect(dr).Sub(dr.Min)
+	// adr is the affected destination pixels.
+	adr := dst.Bounds().Intersect(dr)
 	if adr.Empty() || sr.Empty() {
 		return
 	}
 	d2s := invert(s2d)
+	// bias is a translation of the mapping from dst co-ordinates to
+	// src co-ordinates such that the latter temporarily have
+	// non-negative X and Y co-ordinates. This allows us to write
+	// int(f) instead of int(math.Floor(f)), since "round to zero" and
+	// "round down" are equivalent when f >= 0, but the former is much
+	// cheaper. The X-- and Y-- are because the TransformLeaf methods
+	// have a "sx -= 0.5" adjustment.
+	bias := transformRect(&d2s, &adr).Min
+	bias.X--
+	bias.Y--
+	d2s[2] -= float64(bias.X)
+	d2s[5] -= float64(bias.Y)
+	// Make adr relative to dr.Min.
+	adr = adr.Sub(dr.Min)
 	// sr is the source pixels. If it extends beyond the src bounds,
 	// we cannot use the type-specific fast paths, as they access
 	// the Pix fields directly without bounds checking.
 	if !sr.In(src.Bounds()) {
-		z.transform_Image_Image(dst, dr, adr, &d2s, src, sr)
+		z.transform_Image_Image(dst, dr, adr, &d2s, src, sr, bias)
 	} else if u, ok := src.(*image.Uniform); ok {
 		// TODO: get the Op from opts.
-		transform_Uniform(dst, dr, adr, &d2s, u, sr, Src)
+		transform_Uniform(dst, dr, adr, &d2s, u, sr, bias, Src)
 	} else {
 		switch dst := dst.(type) {
 		case *image.RGBA:
 			switch src := src.(type) {
 			case *image.Gray:
-				z.transform_RGBA_Gray(dst, dr, adr, &d2s, src, sr)
+				z.transform_RGBA_Gray(dst, dr, adr, &d2s, src, sr, bias)
 			case *image.NRGBA:
-				z.transform_RGBA_NRGBA(dst, dr, adr, &d2s, src, sr)
+				z.transform_RGBA_NRGBA(dst, dr, adr, &d2s, src, sr, bias)
 			case *image.RGBA:
-				z.transform_RGBA_RGBA(dst, dr, adr, &d2s, src, sr)
+				z.transform_RGBA_RGBA(dst, dr, adr, &d2s, src, sr, bias)
 			case *image.YCbCr:
 				switch src.SubsampleRatio {
 				default:
-					z.transform_RGBA_Image(dst, dr, adr, &d2s, src, sr)
+					z.transform_RGBA_Image(dst, dr, adr, &d2s, src, sr, bias)
 				case image.YCbCrSubsampleRatio444:
-					z.transform_RGBA_YCbCr444(dst, dr, adr, &d2s, src, sr)
+					z.transform_RGBA_YCbCr444(dst, dr, adr, &d2s, src, sr, bias)
 				case image.YCbCrSubsampleRatio422:
-					z.transform_RGBA_YCbCr422(dst, dr, adr, &d2s, src, sr)
+					z.transform_RGBA_YCbCr422(dst, dr, adr, &d2s, src, sr, bias)
 				case image.YCbCrSubsampleRatio420:
-					z.transform_RGBA_YCbCr420(dst, dr, adr, &d2s, src, sr)
+					z.transform_RGBA_YCbCr420(dst, dr, adr, &d2s, src, sr, bias)
 				case image.YCbCrSubsampleRatio440:
-					z.transform_RGBA_YCbCr440(dst, dr, adr, &d2s, src, sr)
+					z.transform_RGBA_YCbCr440(dst, dr, adr, &d2s, src, sr, bias)
 				}
 			default:
-				z.transform_RGBA_Image(dst, dr, adr, &d2s, src, sr)
+				z.transform_RGBA_Image(dst, dr, adr, &d2s, src, sr, bias)
 			}
 		default:
 			switch src := src.(type) {
 			default:
-				z.transform_Image_Image(dst, dr, adr, &d2s, src, sr)
+				z.transform_Image_Image(dst, dr, adr, &d2s, src, sr, bias)
 			}
 		}
 	}
@@ -1967,24 +1986,23 @@
 	}
 }
 
-func (ablInterpolator) transform_RGBA_Gray(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Gray, sr image.Rectangle) {
+func (ablInterpolator) transform_RGBA_Gray(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Gray, sr image.Rectangle, bias image.Point) {
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
 			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
 			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
 
 			sx -= 0.5
-			sxf := math.Floor(sx)
-			xFrac0 := sx - sxf
+			sx0 := int(sx)
+			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
-			sx0 := int(sxf)
+			sx0 += bias.X
 			sx1 := sx0 + 1
 			if sx0 < sr.Min.X {
 				sx0, sx1 = sr.Min.X, sr.Min.X
@@ -1995,10 +2013,10 @@
 			}
 
 			sy -= 0.5
-			syf := math.Floor(sy)
-			yFrac0 := sy - syf
+			sy0 := int(sy)
+			yFrac0 := sy - float64(sy0)
 			yFrac1 := 1 - yFrac0
-			sy0 := int(syf)
+			sy0 += bias.Y
 			sy1 := sy0 + 1
 			if sy0 < sr.Min.Y {
 				sy0, sy1 = sr.Min.Y, sr.Min.Y
@@ -2032,24 +2050,23 @@
 	}
 }
 
-func (ablInterpolator) transform_RGBA_NRGBA(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.NRGBA, sr image.Rectangle) {
+func (ablInterpolator) transform_RGBA_NRGBA(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.NRGBA, sr image.Rectangle, bias image.Point) {
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
 			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
 			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
 
 			sx -= 0.5
-			sxf := math.Floor(sx)
-			xFrac0 := sx - sxf
+			sx0 := int(sx)
+			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
-			sx0 := int(sxf)
+			sx0 += bias.X
 			sx1 := sx0 + 1
 			if sx0 < sr.Min.X {
 				sx0, sx1 = sr.Min.X, sr.Min.X
@@ -2060,10 +2077,10 @@
 			}
 
 			sy -= 0.5
-			syf := math.Floor(sy)
-			yFrac0 := sy - syf
+			sy0 := int(sy)
+			yFrac0 := sy - float64(sy0)
 			yFrac1 := 1 - yFrac0
-			sy0 := int(syf)
+			sy0 += bias.Y
 			sy1 := sy0 + 1
 			if sy0 < sr.Min.Y {
 				sy0, sy1 = sr.Min.Y, sr.Min.Y
@@ -2129,24 +2146,23 @@
 	}
 }
 
-func (ablInterpolator) transform_RGBA_RGBA(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.RGBA, sr image.Rectangle) {
+func (ablInterpolator) transform_RGBA_RGBA(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.RGBA, sr image.Rectangle, bias image.Point) {
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
 			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
 			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
 
 			sx -= 0.5
-			sxf := math.Floor(sx)
-			xFrac0 := sx - sxf
+			sx0 := int(sx)
+			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
-			sx0 := int(sxf)
+			sx0 += bias.X
 			sx1 := sx0 + 1
 			if sx0 < sr.Min.X {
 				sx0, sx1 = sr.Min.X, sr.Min.X
@@ -2157,10 +2173,10 @@
 			}
 
 			sy -= 0.5
-			syf := math.Floor(sy)
-			yFrac0 := sy - syf
+			sy0 := int(sy)
+			yFrac0 := sy - float64(sy0)
 			yFrac1 := 1 - yFrac0
-			sy0 := int(syf)
+			sy0 += bias.Y
 			sy1 := sy0 + 1
 			if sy0 < sr.Min.Y {
 				sy0, sy1 = sr.Min.Y, sr.Min.Y
@@ -2226,24 +2242,23 @@
 	}
 }
 
-func (ablInterpolator) transform_RGBA_YCbCr444(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle) {
+func (ablInterpolator) transform_RGBA_YCbCr444(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point) {
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
 			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
 			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
 
 			sx -= 0.5
-			sxf := math.Floor(sx)
-			xFrac0 := sx - sxf
+			sx0 := int(sx)
+			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
-			sx0 := int(sxf)
+			sx0 += bias.X
 			sx1 := sx0 + 1
 			if sx0 < sr.Min.X {
 				sx0, sx1 = sr.Min.X, sr.Min.X
@@ -2254,10 +2269,10 @@
 			}
 
 			sy -= 0.5
-			syf := math.Floor(sy)
-			yFrac0 := sy - syf
+			sy0 := int(sy)
+			yFrac0 := sy - float64(sy0)
 			yFrac1 := 1 - yFrac0
-			sy0 := int(syf)
+			sy0 += bias.Y
 			sy1 := sy0 + 1
 			if sy0 < sr.Min.Y {
 				sy0, sy1 = sr.Min.Y, sr.Min.Y
@@ -2412,24 +2427,23 @@
 	}
 }
 
-func (ablInterpolator) transform_RGBA_YCbCr422(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle) {
+func (ablInterpolator) transform_RGBA_YCbCr422(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point) {
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
 			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
 			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
 
 			sx -= 0.5
-			sxf := math.Floor(sx)
-			xFrac0 := sx - sxf
+			sx0 := int(sx)
+			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
-			sx0 := int(sxf)
+			sx0 += bias.X
 			sx1 := sx0 + 1
 			if sx0 < sr.Min.X {
 				sx0, sx1 = sr.Min.X, sr.Min.X
@@ -2440,10 +2454,10 @@
 			}
 
 			sy -= 0.5
-			syf := math.Floor(sy)
-			yFrac0 := sy - syf
+			sy0 := int(sy)
+			yFrac0 := sy - float64(sy0)
 			yFrac1 := 1 - yFrac0
-			sy0 := int(syf)
+			sy0 += bias.Y
 			sy1 := sy0 + 1
 			if sy0 < sr.Min.Y {
 				sy0, sy1 = sr.Min.Y, sr.Min.Y
@@ -2598,24 +2612,23 @@
 	}
 }
 
-func (ablInterpolator) transform_RGBA_YCbCr420(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle) {
+func (ablInterpolator) transform_RGBA_YCbCr420(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point) {
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
 			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
 			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
 
 			sx -= 0.5
-			sxf := math.Floor(sx)
-			xFrac0 := sx - sxf
+			sx0 := int(sx)
+			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
-			sx0 := int(sxf)
+			sx0 += bias.X
 			sx1 := sx0 + 1
 			if sx0 < sr.Min.X {
 				sx0, sx1 = sr.Min.X, sr.Min.X
@@ -2626,10 +2639,10 @@
 			}
 
 			sy -= 0.5
-			syf := math.Floor(sy)
-			yFrac0 := sy - syf
+			sy0 := int(sy)
+			yFrac0 := sy - float64(sy0)
 			yFrac1 := 1 - yFrac0
-			sy0 := int(syf)
+			sy0 += bias.Y
 			sy1 := sy0 + 1
 			if sy0 < sr.Min.Y {
 				sy0, sy1 = sr.Min.Y, sr.Min.Y
@@ -2784,24 +2797,23 @@
 	}
 }
 
-func (ablInterpolator) transform_RGBA_YCbCr440(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle) {
+func (ablInterpolator) transform_RGBA_YCbCr440(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point) {
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
 			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
 			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
 
 			sx -= 0.5
-			sxf := math.Floor(sx)
-			xFrac0 := sx - sxf
+			sx0 := int(sx)
+			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
-			sx0 := int(sxf)
+			sx0 += bias.X
 			sx1 := sx0 + 1
 			if sx0 < sr.Min.X {
 				sx0, sx1 = sr.Min.X, sr.Min.X
@@ -2812,10 +2824,10 @@
 			}
 
 			sy -= 0.5
-			syf := math.Floor(sy)
-			yFrac0 := sy - syf
+			sy0 := int(sy)
+			yFrac0 := sy - float64(sy0)
 			yFrac1 := 1 - yFrac0
-			sy0 := int(syf)
+			sy0 += bias.Y
 			sy1 := sy0 + 1
 			if sy0 < sr.Min.Y {
 				sy0, sy1 = sr.Min.Y, sr.Min.Y
@@ -2970,24 +2982,23 @@
 	}
 }
 
-func (ablInterpolator) transform_RGBA_Image(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle) {
+func (ablInterpolator) transform_RGBA_Image(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point) {
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
 			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
 			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
 
 			sx -= 0.5
-			sxf := math.Floor(sx)
-			xFrac0 := sx - sxf
+			sx0 := int(sx)
+			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
-			sx0 := int(sxf)
+			sx0 += bias.X
 			sx1 := sx0 + 1
 			if sx0 < sr.Min.X {
 				sx0, sx1 = sr.Min.X, sr.Min.X
@@ -2998,10 +3009,10 @@
 			}
 
 			sy -= 0.5
-			syf := math.Floor(sy)
-			yFrac0 := sy - syf
+			sy0 := int(sy)
+			yFrac0 := sy - float64(sy0)
 			yFrac1 := 1 - yFrac0
-			sy0 := int(syf)
+			sy0 += bias.Y
 			sy1 := sy0 + 1
 			if sy0 < sr.Min.Y {
 				sy0, sy1 = sr.Min.Y, sr.Min.Y
@@ -3051,25 +3062,24 @@
 	}
 }
 
-func (ablInterpolator) transform_Image_Image(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle) {
+func (ablInterpolator) transform_Image_Image(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point) {
 	dstColorRGBA64 := &color.RGBA64{}
 	dstColor := color.Color(dstColorRGBA64)
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
 			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
 			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
 
 			sx -= 0.5
-			sxf := math.Floor(sx)
-			xFrac0 := sx - sxf
+			sx0 := int(sx)
+			xFrac0 := sx - float64(sx0)
 			xFrac1 := 1 - xFrac0
-			sx0 := int(sxf)
+			sx0 += bias.X
 			sx1 := sx0 + 1
 			if sx0 < sr.Min.X {
 				sx0, sx1 = sr.Min.X, sr.Min.X
@@ -3080,10 +3090,10 @@
 			}
 
 			sy -= 0.5
-			syf := math.Floor(sy)
-			yFrac0 := sy - syf
+			sy0 := int(sy)
+			yFrac0 := sy - float64(sy0)
 			yFrac1 := 1 - yFrac0
-			sy0 := int(syf)
+			sy0 += bias.Y
 			sy1 := sy0 + 1
 			if sy0 < sr.Min.Y {
 				sy0, sy1 = sr.Min.Y, sr.Min.Y
@@ -3198,16 +3208,30 @@
 
 func (q *Kernel) Transform(dst Image, s2d *f64.Aff3, src image.Image, sr image.Rectangle, opts *Options) {
 	dr := transformRect(s2d, &sr)
-	// adr is the affected destination pixels, relative to dr.Min.
-	adr := dst.Bounds().Intersect(dr).Sub(dr.Min)
+	// adr is the affected destination pixels.
+	adr := dst.Bounds().Intersect(dr)
 	if adr.Empty() || sr.Empty() {
 		return
 	}
 	d2s := invert(s2d)
+	// bias is a translation of the mapping from dst co-ordinates to
+	// src co-ordinates such that the latter temporarily have
+	// non-negative X and Y co-ordinates. This allows us to write
+	// int(f) instead of int(math.Floor(f)), since "round to zero" and
+	// "round down" are equivalent when f >= 0, but the former is much
+	// cheaper. The X-- and Y-- are because the TransformLeaf methods
+	// have a "sx -= 0.5" adjustment.
+	bias := transformRect(&d2s, &adr).Min
+	bias.X--
+	bias.Y--
+	d2s[2] -= float64(bias.X)
+	d2s[5] -= float64(bias.Y)
+	// Make adr relative to dr.Min.
+	adr = adr.Sub(dr.Min)
 
 	if u, ok := src.(*image.Uniform); ok && sr.In(src.Bounds()) {
 		// TODO: get the Op from opts.
-		transform_Uniform(dst, dr, adr, &d2s, u, sr, Src)
+		transform_Uniform(dst, dr, adr, &d2s, u, sr, bias, Src)
 		return
 	}
 
@@ -3224,37 +3248,37 @@
 	// we cannot use the type-specific fast paths, as they access
 	// the Pix fields directly without bounds checking.
 	if !sr.In(src.Bounds()) {
-		q.transform_Image_Image(dst, dr, adr, &d2s, src, sr, xscale, yscale)
+		q.transform_Image_Image(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
 	} else {
 		switch dst := dst.(type) {
 		case *image.RGBA:
 			switch src := src.(type) {
 			case *image.Gray:
-				q.transform_RGBA_Gray(dst, dr, adr, &d2s, src, sr, xscale, yscale)
+				q.transform_RGBA_Gray(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
 			case *image.NRGBA:
-				q.transform_RGBA_NRGBA(dst, dr, adr, &d2s, src, sr, xscale, yscale)
+				q.transform_RGBA_NRGBA(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
 			case *image.RGBA:
-				q.transform_RGBA_RGBA(dst, dr, adr, &d2s, src, sr, xscale, yscale)
+				q.transform_RGBA_RGBA(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
 			case *image.YCbCr:
 				switch src.SubsampleRatio {
 				default:
-					q.transform_RGBA_Image(dst, dr, adr, &d2s, src, sr, xscale, yscale)
+					q.transform_RGBA_Image(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
 				case image.YCbCrSubsampleRatio444:
-					q.transform_RGBA_YCbCr444(dst, dr, adr, &d2s, src, sr, xscale, yscale)
+					q.transform_RGBA_YCbCr444(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
 				case image.YCbCrSubsampleRatio422:
-					q.transform_RGBA_YCbCr422(dst, dr, adr, &d2s, src, sr, xscale, yscale)
+					q.transform_RGBA_YCbCr422(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
 				case image.YCbCrSubsampleRatio420:
-					q.transform_RGBA_YCbCr420(dst, dr, adr, &d2s, src, sr, xscale, yscale)
+					q.transform_RGBA_YCbCr420(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
 				case image.YCbCrSubsampleRatio440:
-					q.transform_RGBA_YCbCr440(dst, dr, adr, &d2s, src, sr, xscale, yscale)
+					q.transform_RGBA_YCbCr440(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
 				}
 			default:
-				q.transform_RGBA_Image(dst, dr, adr, &d2s, src, sr, xscale, yscale)
+				q.transform_RGBA_Image(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
 			}
 		default:
 			switch src := src.(type) {
 			default:
-				q.transform_Image_Image(dst, dr, adr, &d2s, src, sr, xscale, yscale)
+				q.transform_Image_Image(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale)
 			}
 		}
 	}
@@ -3602,7 +3626,7 @@
 	}
 }
 
-func (q *Kernel) transform_RGBA_Gray(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Gray, sr image.Rectangle, xscale, yscale float64) {
+func (q *Kernel) transform_RGBA_Gray(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Gray, sr image.Rectangle, bias image.Point, xscale, yscale float64) {
 	// When shrinking, broaden the effective kernel support so that we still
 	// visit every source pixel.
 	xHalfWidth, xKernelArgScale := q.Support, 1.0
@@ -3624,13 +3648,15 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
 			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
 			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
 
+			// TODO: adjust the bias so that we can use int(f) instead
+			// of math.Floor(f) and math.Ceil(f).
+			sx += float64(bias.X)
 			sx -= 0.5
 			ix := int(math.Floor(sx - xHalfWidth))
 			if ix < sr.Min.X {
@@ -3654,6 +3680,7 @@
 				xWeights[x] /= totalXWeight
 			}
 
+			sy += float64(bias.Y)
 			sy -= 0.5
 			iy := int(math.Floor(sy - yHalfWidth))
 			if iy < sr.Min.Y {
@@ -3696,7 +3723,7 @@
 	}
 }
 
-func (q *Kernel) transform_RGBA_NRGBA(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.NRGBA, sr image.Rectangle, xscale, yscale float64) {
+func (q *Kernel) transform_RGBA_NRGBA(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.NRGBA, sr image.Rectangle, bias image.Point, xscale, yscale float64) {
 	// When shrinking, broaden the effective kernel support so that we still
 	// visit every source pixel.
 	xHalfWidth, xKernelArgScale := q.Support, 1.0
@@ -3718,13 +3745,15 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
 			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
 			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
 
+			// TODO: adjust the bias so that we can use int(f) instead
+			// of math.Floor(f) and math.Ceil(f).
+			sx += float64(bias.X)
 			sx -= 0.5
 			ix := int(math.Floor(sx - xHalfWidth))
 			if ix < sr.Min.X {
@@ -3748,6 +3777,7 @@
 				xWeights[x] /= totalXWeight
 			}
 
+			sy += float64(bias.Y)
 			sy -= 0.5
 			iy := int(math.Floor(sy - yHalfWidth))
 			if iy < sr.Min.Y {
@@ -3795,7 +3825,7 @@
 	}
 }
 
-func (q *Kernel) transform_RGBA_RGBA(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.RGBA, sr image.Rectangle, xscale, yscale float64) {
+func (q *Kernel) transform_RGBA_RGBA(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.RGBA, sr image.Rectangle, bias image.Point, xscale, yscale float64) {
 	// When shrinking, broaden the effective kernel support so that we still
 	// visit every source pixel.
 	xHalfWidth, xKernelArgScale := q.Support, 1.0
@@ -3817,13 +3847,15 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
 			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
 			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
 
+			// TODO: adjust the bias so that we can use int(f) instead
+			// of math.Floor(f) and math.Ceil(f).
+			sx += float64(bias.X)
 			sx -= 0.5
 			ix := int(math.Floor(sx - xHalfWidth))
 			if ix < sr.Min.X {
@@ -3847,6 +3879,7 @@
 				xWeights[x] /= totalXWeight
 			}
 
+			sy += float64(bias.Y)
 			sy -= 0.5
 			iy := int(math.Floor(sy - yHalfWidth))
 			if iy < sr.Min.Y {
@@ -3894,7 +3927,7 @@
 	}
 }
 
-func (q *Kernel) transform_RGBA_YCbCr444(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, xscale, yscale float64) {
+func (q *Kernel) transform_RGBA_YCbCr444(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point, xscale, yscale float64) {
 	// When shrinking, broaden the effective kernel support so that we still
 	// visit every source pixel.
 	xHalfWidth, xKernelArgScale := q.Support, 1.0
@@ -3916,13 +3949,15 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
 			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
 			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
 
+			// TODO: adjust the bias so that we can use int(f) instead
+			// of math.Floor(f) and math.Ceil(f).
+			sx += float64(bias.X)
 			sx -= 0.5
 			ix := int(math.Floor(sx - xHalfWidth))
 			if ix < sr.Min.X {
@@ -3946,6 +3981,7 @@
 				xWeights[x] /= totalXWeight
 			}
 
+			sy += float64(bias.Y)
 			sy -= 0.5
 			iy := int(math.Floor(sy - yHalfWidth))
 			if iy < sr.Min.Y {
@@ -4016,7 +4052,7 @@
 	}
 }
 
-func (q *Kernel) transform_RGBA_YCbCr422(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, xscale, yscale float64) {
+func (q *Kernel) transform_RGBA_YCbCr422(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point, xscale, yscale float64) {
 	// When shrinking, broaden the effective kernel support so that we still
 	// visit every source pixel.
 	xHalfWidth, xKernelArgScale := q.Support, 1.0
@@ -4038,13 +4074,15 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
 			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
 			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
 
+			// TODO: adjust the bias so that we can use int(f) instead
+			// of math.Floor(f) and math.Ceil(f).
+			sx += float64(bias.X)
 			sx -= 0.5
 			ix := int(math.Floor(sx - xHalfWidth))
 			if ix < sr.Min.X {
@@ -4068,6 +4106,7 @@
 				xWeights[x] /= totalXWeight
 			}
 
+			sy += float64(bias.Y)
 			sy -= 0.5
 			iy := int(math.Floor(sy - yHalfWidth))
 			if iy < sr.Min.Y {
@@ -4138,7 +4177,7 @@
 	}
 }
 
-func (q *Kernel) transform_RGBA_YCbCr420(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, xscale, yscale float64) {
+func (q *Kernel) transform_RGBA_YCbCr420(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point, xscale, yscale float64) {
 	// When shrinking, broaden the effective kernel support so that we still
 	// visit every source pixel.
 	xHalfWidth, xKernelArgScale := q.Support, 1.0
@@ -4160,13 +4199,15 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
 			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
 			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
 
+			// TODO: adjust the bias so that we can use int(f) instead
+			// of math.Floor(f) and math.Ceil(f).
+			sx += float64(bias.X)
 			sx -= 0.5
 			ix := int(math.Floor(sx - xHalfWidth))
 			if ix < sr.Min.X {
@@ -4190,6 +4231,7 @@
 				xWeights[x] /= totalXWeight
 			}
 
+			sy += float64(bias.Y)
 			sy -= 0.5
 			iy := int(math.Floor(sy - yHalfWidth))
 			if iy < sr.Min.Y {
@@ -4260,7 +4302,7 @@
 	}
 }
 
-func (q *Kernel) transform_RGBA_YCbCr440(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, xscale, yscale float64) {
+func (q *Kernel) transform_RGBA_YCbCr440(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point, xscale, yscale float64) {
 	// When shrinking, broaden the effective kernel support so that we still
 	// visit every source pixel.
 	xHalfWidth, xKernelArgScale := q.Support, 1.0
@@ -4282,13 +4324,15 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
 			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
 			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
 
+			// TODO: adjust the bias so that we can use int(f) instead
+			// of math.Floor(f) and math.Ceil(f).
+			sx += float64(bias.X)
 			sx -= 0.5
 			ix := int(math.Floor(sx - xHalfWidth))
 			if ix < sr.Min.X {
@@ -4312,6 +4356,7 @@
 				xWeights[x] /= totalXWeight
 			}
 
+			sy += float64(bias.Y)
 			sy -= 0.5
 			iy := int(math.Floor(sy - yHalfWidth))
 			if iy < sr.Min.Y {
@@ -4382,7 +4427,7 @@
 	}
 }
 
-func (q *Kernel) transform_RGBA_Image(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, xscale, yscale float64) {
+func (q *Kernel) transform_RGBA_Image(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, xscale, yscale float64) {
 	// When shrinking, broaden the effective kernel support so that we still
 	// visit every source pixel.
 	xHalfWidth, xKernelArgScale := q.Support, 1.0
@@ -4404,13 +4449,15 @@
 		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
 			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
 			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
 
+			// TODO: adjust the bias so that we can use int(f) instead
+			// of math.Floor(f) and math.Ceil(f).
+			sx += float64(bias.X)
 			sx -= 0.5
 			ix := int(math.Floor(sx - xHalfWidth))
 			if ix < sr.Min.X {
@@ -4434,6 +4481,7 @@
 				xWeights[x] /= totalXWeight
 			}
 
+			sy += float64(bias.Y)
 			sy -= 0.5
 			iy := int(math.Floor(sy - yHalfWidth))
 			if iy < sr.Min.Y {
@@ -4477,7 +4525,7 @@
 	}
 }
 
-func (q *Kernel) transform_Image_Image(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, xscale, yscale float64) {
+func (q *Kernel) transform_Image_Image(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, xscale, yscale float64) {
 	// When shrinking, broaden the effective kernel support so that we still
 	// visit every source pixel.
 	xHalfWidth, xKernelArgScale := q.Support, 1.0
@@ -4500,13 +4548,15 @@
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
 		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
 			dxf := float64(dr.Min.X+int(dx)) + 0.5
-			// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
 			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
 			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
-			if !(image.Point{int(math.Floor(sx)), int(math.Floor(sy))}).In(sr) {
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
 				continue
 			}
 
+			// TODO: adjust the bias so that we can use int(f) instead
+			// of math.Floor(f) and math.Ceil(f).
+			sx += float64(bias.X)
 			sx -= 0.5
 			ix := int(math.Floor(sx - xHalfWidth))
 			if ix < sr.Min.X {
@@ -4530,6 +4580,7 @@
 				xWeights[x] /= totalXWeight
 			}
 
+			sy += float64(bias.Y)
 			sy -= 0.5
 			iy := int(math.Floor(sy - yHalfWidth))
 			if iy < sr.Min.Y {
diff --git a/draw/scale.go b/draw/scale.go
index e8daa3f..1e95971 100644
--- a/draw/scale.go
+++ b/draw/scale.go
@@ -353,7 +353,7 @@
 	return dr
 }
 
-func transform_Uniform(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Uniform, sr image.Rectangle, op Op) {
+func transform_Uniform(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Uniform, sr image.Rectangle, bias image.Point, op Op) {
 	switch dst := dst.(type) {
 	case *image.RGBA:
 		pr, pg, pb, pa := src.C.RGBA()
@@ -367,9 +367,8 @@
 			d := dst.PixOffset(dr.Min.X+adr.Min.X, dr.Min.Y+int(dy))
 			for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
 				dxf := float64(dr.Min.X+int(dx)) + 0.5
-				// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
-				sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2]))
-				sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5]))
+				sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
+				sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
 				if !(image.Point{sx0, sy0}).In(sr) {
 					continue
 				}
@@ -394,9 +393,8 @@
 			dyf := float64(dr.Min.Y+int(dy)) + 0.5
 			for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
 				dxf := float64(dr.Min.X+int(dx)) + 0.5
-				// TODO: change the src origin so that we can say int(f) instead of int(math.Floor(f)).
-				sx0 := int(math.Floor(d2s[0]*dxf + d2s[1]*dyf + d2s[2]))
-				sy0 := int(math.Floor(d2s[3]*dxf + d2s[4]*dyf + d2s[5]))
+				sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
+				sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
 				if !(image.Point{sx0, sy0}).In(sr) {
 					continue
 				}
diff --git a/draw/scale_test.go b/draw/scale_test.go
index b112643..bad9c5f 100644
--- a/draw/scale_test.go
+++ b/draw/scale_test.go
@@ -414,35 +414,43 @@
 	}
 }
 
-func BenchmarkScaleLargeDownNN(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, NearestNeighbor) }
-func BenchmarkScaleLargeDownAB(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, ApproxBiLinear) }
-func BenchmarkScaleLargeDownBL(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, BiLinear) }
-func BenchmarkScaleLargeDownCR(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, CatmullRom) }
+func BenchmarkScaleNNLargeDown(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, NearestNeighbor) }
+func BenchmarkScaleABLargeDown(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, ApproxBiLinear) }
+func BenchmarkScaleBLLargeDown(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, BiLinear) }
+func BenchmarkScaleCRLargeDown(b *testing.B) { benchScale(b, srcYCbCrLarge, 200, 150, CatmullRom) }
 
-func BenchmarkScaleDownNN(b *testing.B) { benchScale(b, srcTux, 120, 80, NearestNeighbor) }
-func BenchmarkScaleDownAB(b *testing.B) { benchScale(b, srcTux, 120, 80, ApproxBiLinear) }
-func BenchmarkScaleDownBL(b *testing.B) { benchScale(b, srcTux, 120, 80, BiLinear) }
-func BenchmarkScaleDownCR(b *testing.B) { benchScale(b, srcTux, 120, 80, CatmullRom) }
+func BenchmarkScaleNNDown(b *testing.B) { benchScale(b, srcTux, 120, 80, NearestNeighbor) }
+func BenchmarkScaleABDown(b *testing.B) { benchScale(b, srcTux, 120, 80, ApproxBiLinear) }
+func BenchmarkScaleBLDown(b *testing.B) { benchScale(b, srcTux, 120, 80, BiLinear) }
+func BenchmarkScaleCRDown(b *testing.B) { benchScale(b, srcTux, 120, 80, CatmullRom) }
 
-func BenchmarkScaleUpNN(b *testing.B) { benchScale(b, srcTux, 800, 600, NearestNeighbor) }
-func BenchmarkScaleUpAB(b *testing.B) { benchScale(b, srcTux, 800, 600, ApproxBiLinear) }
-func BenchmarkScaleUpBL(b *testing.B) { benchScale(b, srcTux, 800, 600, BiLinear) }
-func BenchmarkScaleUpCR(b *testing.B) { benchScale(b, srcTux, 800, 600, CatmullRom) }
+func BenchmarkScaleNNUp(b *testing.B) { benchScale(b, srcTux, 800, 600, NearestNeighbor) }
+func BenchmarkScaleABUp(b *testing.B) { benchScale(b, srcTux, 800, 600, ApproxBiLinear) }
+func BenchmarkScaleBLUp(b *testing.B) { benchScale(b, srcTux, 800, 600, BiLinear) }
+func BenchmarkScaleCRUp(b *testing.B) { benchScale(b, srcTux, 800, 600, CatmullRom) }
 
-func BenchmarkScaleSrcGray(b *testing.B)    { benchScale(b, srcGray, 200, 150, ApproxBiLinear) }
-func BenchmarkScaleSrcNRGBA(b *testing.B)   { benchScale(b, srcNRGBA, 200, 150, ApproxBiLinear) }
-func BenchmarkScaleSrcRGBA(b *testing.B)    { benchScale(b, srcRGBA, 200, 150, ApproxBiLinear) }
-func BenchmarkScaleSrcUniform(b *testing.B) { benchScale(b, srcUniform, 200, 150, ApproxBiLinear) }
-func BenchmarkScaleSrcYCbCr(b *testing.B)   { benchScale(b, srcYCbCr, 200, 150, ApproxBiLinear) }
+func BenchmarkScaleNNSrcRGBA(b *testing.B)    { benchScale(b, srcRGBA, 200, 150, NearestNeighbor) }
+func BenchmarkScaleNNSrcUniform(b *testing.B) { benchScale(b, srcUniform, 200, 150, NearestNeighbor) }
 
-func BenchmarkTformABSrcGray(b *testing.B)    { benchTform(b, srcGray, 200, 150, ApproxBiLinear) }
-func BenchmarkTformABSrcNRGBA(b *testing.B)   { benchTform(b, srcNRGBA, 200, 150, ApproxBiLinear) }
-func BenchmarkTformABSrcRGBA(b *testing.B)    { benchTform(b, srcRGBA, 200, 150, ApproxBiLinear) }
-func BenchmarkTformABSrcUniform(b *testing.B) { benchTform(b, srcUniform, 200, 150, ApproxBiLinear) }
-func BenchmarkTformABSrcYCbCr(b *testing.B)   { benchTform(b, srcYCbCr, 200, 150, ApproxBiLinear) }
+func BenchmarkTformNNSrcRGBA(b *testing.B)    { benchTform(b, srcRGBA, 200, 150, NearestNeighbor) }
+func BenchmarkTformNNSrcUniform(b *testing.B) { benchTform(b, srcUniform, 200, 150, NearestNeighbor) }
 
-func BenchmarkTformCRSrcGray(b *testing.B)    { benchTform(b, srcGray, 200, 150, CatmullRom) }
-func BenchmarkTformCRSrcNRGBA(b *testing.B)   { benchTform(b, srcNRGBA, 200, 150, CatmullRom) }
-func BenchmarkTformCRSrcRGBA(b *testing.B)    { benchTform(b, srcRGBA, 200, 150, CatmullRom) }
-func BenchmarkTformCRSrcUniform(b *testing.B) { benchTform(b, srcUniform, 200, 150, CatmullRom) }
-func BenchmarkTformCRSrcYCbCr(b *testing.B)   { benchTform(b, srcYCbCr, 200, 150, CatmullRom) }
+func BenchmarkScaleABSrcGray(b *testing.B)  { benchScale(b, srcGray, 200, 150, ApproxBiLinear) }
+func BenchmarkScaleABSrcNRGBA(b *testing.B) { benchScale(b, srcNRGBA, 200, 150, ApproxBiLinear) }
+func BenchmarkScaleABSrcRGBA(b *testing.B)  { benchScale(b, srcRGBA, 200, 150, ApproxBiLinear) }
+func BenchmarkScaleABSrcYCbCr(b *testing.B) { benchScale(b, srcYCbCr, 200, 150, ApproxBiLinear) }
+
+func BenchmarkTformABSrcGray(b *testing.B)  { benchTform(b, srcGray, 200, 150, ApproxBiLinear) }
+func BenchmarkTformABSrcNRGBA(b *testing.B) { benchTform(b, srcNRGBA, 200, 150, ApproxBiLinear) }
+func BenchmarkTformABSrcRGBA(b *testing.B)  { benchTform(b, srcRGBA, 200, 150, ApproxBiLinear) }
+func BenchmarkTformABSrcYCbCr(b *testing.B) { benchTform(b, srcYCbCr, 200, 150, ApproxBiLinear) }
+
+func BenchmarkScaleCRSrcGray(b *testing.B)  { benchScale(b, srcGray, 200, 150, CatmullRom) }
+func BenchmarkScaleCRSrcNRGBA(b *testing.B) { benchScale(b, srcNRGBA, 200, 150, CatmullRom) }
+func BenchmarkScaleCRSrcRGBA(b *testing.B)  { benchScale(b, srcRGBA, 200, 150, CatmullRom) }
+func BenchmarkScaleCRSrcYCbCr(b *testing.B) { benchScale(b, srcYCbCr, 200, 150, CatmullRom) }
+
+func BenchmarkTformCRSrcGray(b *testing.B)  { benchTform(b, srcGray, 200, 150, CatmullRom) }
+func BenchmarkTformCRSrcNRGBA(b *testing.B) { benchTform(b, srcNRGBA, 200, 150, CatmullRom) }
+func BenchmarkTformCRSrcRGBA(b *testing.B)  { benchTform(b, srcRGBA, 200, 150, CatmullRom) }
+func BenchmarkTformCRSrcYCbCr(b *testing.B) { benchTform(b, srcYCbCr, 200, 150, CatmullRom) }