draw: add fast paths for RGBA64Image Go 1.17 added image.RGBA64Image and draw.RGBA64Image interfaces which use color.RGBA64 instead of color.Color in order to avoid heap allocations on pixel operations. Fast paths were added to image/draw for drawing with these images, but not to x/image/draw. This CL adds them. Fixes golang/go#62423 goos: windows goarch: amd64 pkg: golang.org/x/image/draw cpu: AMD Ryzen 9 7900X 12-Core Processor │ old.bench │ new.bench │ │ sec/op │ sec/op vs base │ SimpleScaleCopy-24 7.425µ ± 0% 7.581µ ± 0% +2.10% (p=0.000 n=20) SimpleTransformCopy-24 7.473µ ± 1% 7.678µ ± 1% +2.74% (p=0.000 n=20) SimpleTransformScale-24 440.7µ ± 0% 442.6µ ± 0% +0.44% (p=0.012 n=20) ScaleNNLargeDown-24 155.3µ ± 4% 156.9µ ± 2% ~ (p=0.495 n=20) ScaleABLargeDown-24 843.4µ ± 0% 850.7µ ± 1% +0.87% (p=0.000 n=20) ScaleBLLargeDown-24 102.7m ± 0% 102.7m ± 1% ~ (p=0.779 n=20) ScaleCRLargeDown-24 165.6m ± 0% 168.7m ± 1% +1.84% (p=0.000 n=20) ScaleNNDown-24 42.53µ ± 1% 43.00µ ± 1% +1.10% (p=0.000 n=20) ScaleABDown-24 149.3µ ± 0% 151.5µ ± 1% +1.52% (p=0.000 n=20) ScaleBLDown-24 1.347m ± 1% 1.367m ± 1% +1.43% (p=0.000 n=20) ScaleCRDown-24 2.635m ± 1% 2.663m ± 1% +1.09% (p=0.007 n=20) ScaleNNUp-24 2.108m ± 0% 2.128m ± 2% +0.93% (p=0.023 n=20) ScaleABUp-24 7.493m ± 0% 7.594m ± 1% +1.35% (p=0.000 n=20) ScaleBLUp-24 6.105m ± 3% 6.410m ± 3% +5.01% (p=0.001 n=20) ScaleCRUp-24 10.297m ± 9% 9.640m ± 3% ~ (p=0.121 n=20) ScaleNNSrcRGBA-24 111.3µ ± 1% 108.6µ ± 2% -2.37% (p=0.000 n=20) ScaleNNSrcUnif-24 1.410µ ± 1% 1.397µ ± 1% ~ (p=0.103 n=20) ScaleNNOverRGBA-24 174.0µ ± 1% 170.5µ ± 2% -1.98% (p=0.004 n=20) ScaleNNOverUnif-24 90.17µ ± 1% 90.12µ ± 1% ~ (p=0.490 n=20) TformNNSrcRGBA-24 103.4µ ± 1% 105.8µ ± 1% +2.25% (p=0.000 n=20) TformNNSrcUnif-24 38.61µ ± 1% 40.80µ ± 1% +5.69% (p=0.000 n=20) TformNNOverRGBA-24 150.3µ ± 0% 155.5µ ± 2% +3.49% (p=0.000 n=20) TformNNOverUnif-24 35.16µ ± 2% 34.75µ ± 2% -1.18% (p=0.015 n=20) ScaleABSrcGray-24 154.5µ ± 2% 151.8µ ± 1% -1.69% (p=0.006 n=20) ScaleABSrcNRGBA-24 482.0µ ± 1% 475.9µ ± 1% -1.26% (p=0.002 n=20) ScaleABSrcRGBA-24 418.2µ ± 0% 416.0µ ± 1% ~ (p=0.108 n=20) ScaleABSrcYCbCr-24 849.4µ ± 0% 845.6µ ± 1% -0.45% (p=0.015 n=20) ScaleABSrcRGBA64-24 1616.6µ ± 2% 467.8µ ± 2% -71.06% (p=0.000 n=20) ScaleABOverGray-24 152.0µ ± 1% 148.2µ ± 1% -2.51% (p=0.000 n=20) ScaleABOverNRGBA-24 511.8µ ± 1% 510.2µ ± 1% ~ (p=0.640 n=20) ScaleABOverRGBA-24 480.1µ ± 1% 475.5µ ± 0% -0.94% (p=0.000 n=20) ScaleABOverYCbCr-24 861.1µ ± 1% 843.2µ ± 0% -2.08% (p=0.000 n=20) ScaleABOverRGBA64-24 1723.2µ ± 2% 538.0µ ± 0% -68.78% (p=0.000 n=20) TformABSrcGray-24 148.6µ ± 1% 142.6µ ± 0% -4.01% (p=0.000 n=20) TformABSrcNRGBA-24 363.3µ ± 2% 356.2µ ± 0% -1.95% (p=0.000 n=20) TformABSrcRGBA-24 301.0µ ± 0% 296.5µ ± 0% -1.49% (p=0.000 n=20) TformABSrcYCbCr-24 415.1µ ± 0% 409.1µ ± 0% -1.45% (p=0.000 n=20) TformABSrcRGBA64-24 1068.9µ ± 2% 337.7µ ± 1% -68.41% (p=0.000 n=20) TformABOverGray-24 146.0µ ± 1% 143.2µ ± 0% -1.88% (p=0.000 n=20) TformABOverNRGBA-24 394.6µ ± 1% 389.0µ ± 0% -1.43% (p=0.000 n=20) TformABOverRGBA-24 341.0µ ± 1% 338.1µ ± 0% -0.83% (p=0.001 n=20) TformABOverYCbCr-24 414.4µ ± 1% 410.6µ ± 1% -0.91% (p=0.049 n=20) TformABOverRGBA64-24 1108.7µ ± 2% 389.4µ ± 1% -64.88% (p=0.000 n=20) ScaleCRSrcGray-24 4.065m ± 2% 3.979m ± 0% -2.12% (p=0.000 n=20) ScaleCRSrcNRGBA-24 13.66m ± 1% 13.41m ± 1% -1.79% (p=0.007 n=20) ScaleCRSrcRGBA-24 9.258m ± 3% 9.091m ± 1% -1.80% (p=0.002 n=20) ScaleCRSrcYCbCr-24 23.53m ± 0% 22.85m ± 1% -2.89% (p=0.000 n=20) ScaleCRSrcRGBA64-24 42.76m ± 2% 13.13m ± 1% -69.31% (p=0.000 n=20) ScaleCROverGray-24 4.110m ± 1% 3.973m ± 1% -3.33% (p=0.000 n=20) ScaleCROverNRGBA-24 14.74m ± 1% 13.50m ± 0% -8.41% (p=0.000 n=20) ScaleCROverRGBA-24 9.504m ± 1% 9.301m ± 1% -2.14% (p=0.000 n=20) ScaleCROverYCbCr-24 23.42m ± 1% 22.86m ± 0% -2.38% (p=0.000 n=20) ScaleCROverRGBA64-24 43.47m ± 1% 13.07m ± 1% -69.93% (p=0.000 n=20) TformCRSrcGray-24 1.253m ± 1% 1.225m ± 0% -2.24% (p=0.000 n=20) TformCRSrcNRGBA-24 2.141m ± 2% 2.050m ± 1% -4.24% (p=0.000 n=20) TformCRSrcRGBA-24 1.810m ± 1% 1.771m ± 0% -2.15% (p=0.002 n=20) TformCRSrcYCbCr-24 2.404m ± 1% 2.403m ± 1% ~ (p=0.698 n=20) TformCRSrcRGBA64-24 5.150m ± 1% 2.197m ± 1% -57.34% (p=0.000 n=20) TformCROverGray-24 1.251m ± 0% 1.223m ± 0% -2.23% (p=0.000 n=20) TformCROverNRGBA-24 2.081m ± 1% 2.037m ± 0% -2.13% (p=0.000 n=20) TformCROverRGBA-24 1.809m ± 1% 1.793m ± 2% ~ (p=0.149 n=20) TformCROverYCbCr-24 2.444m ± 0% 2.400m ± 1% -1.82% (p=0.000 n=20) TformCROverRGBA64-24 5.303m ± 2% 2.221m ± 1% -58.12% (p=0.000 n=20) geomean 930.7µ 804.7µ -13.54% │ old.bench │ new.bench │ │ B/op │ B/op vs base │ ScaleNNLargeDown-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABLargeDown-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleBLLargeDown-24 1.407Mi ± 0% 1.407Mi ± 0% ~ (p=0.283 n=20) ScaleCRLargeDown-24 2.010Mi ± 0% 2.345Mi ± 14% +16.67% (p=0.000 n=20) ScaleNNDown-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABDown-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleBLDown-24 1.704Ki ± 1% 1.721Ki ± 3% ~ (p=0.109 n=20) ScaleCRDown-24 3.309Ki ± 2% 3.342Ki ± 2% ~ (p=0.568 n=20) ScaleNNUp-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABUp-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleBLUp-24 50.96Ki ± 2% 52.73Ki ± 4% +3.47% (p=0.002 n=20) ScaleCRUp-24 86.54Ki ± 14% 79.09Ki ± 2% ~ (p=0.061 n=20) ScaleNNSrcRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleNNSrcUnif-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleNNOverRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleNNOverUnif-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformNNSrcRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformNNSrcUnif-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformNNOverRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformNNOverUnif-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABSrcGray-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABSrcNRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABSrcRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABSrcYCbCr-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABSrcRGBA64-24 937.5Ki ± 0% 0.0Ki ± 0% -100.00% (p=0.000 n=20) ScaleABOverGray-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABOverNRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABOverRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABOverYCbCr-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABOverRGBA64-24 937.5Ki ± 0% 0.0Ki ± 0% -100.00% (p=0.000 n=20) TformABSrcGray-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformABSrcNRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformABSrcRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformABSrcYCbCr-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformABSrcRGBA64-24 600.8Ki ± 0% 0.0Ki ± 0% -100.00% (p=0.000 n=20) TformABOverGray-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformABOverNRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformABOverRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformABOverYCbCr-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformABOverRGBA64-24 600.8Ki ± 0% 0.0Ki ± 0% -100.00% (p=0.000 n=20) ScaleCRSrcGray-24 16.51Ki ± 1% 15.97Ki ± 1% -3.32% (p=0.000 n=20) ScaleCRSrcNRGBA-24 59.33Ki ± 5% 57.21Ki ± 4% ~ (p=0.142 n=20) ScaleCRSrcRGBA-24 37.55Ki ± 2% 36.41Ki ± 2% -3.03% (p=0.001 n=20) ScaleCRSrcYCbCr-24 98.08Ki ± 2% 98.08Ki ± 2% ~ (p=0.952 n=20) ScaleCRSrcRGBA64-24 24624.95Ki ± 0% 55.21Ki ± 4% -99.78% (p=0.000 n=20) ScaleCROverGray-24 16.46Ki ± 1% 15.97Ki ± 1% -2.99% (p=0.000 n=20) ScaleCROverNRGBA-24 61.62Ki ± 3% 59.70Ki ± 5% -3.10% (p=0.015 n=20) ScaleCROverRGBA-24 38.76Ki ± 2% 37.55Ki ± 1% -3.12% (p=0.000 n=20) ScaleCROverYCbCr-24 98.08Ki ± 2% 98.08Ki ± 2% ~ (p=0.232 n=20) ScaleCROverRGBA64-24 24624.95Ki ± 0% 55.21Ki ± 2% -99.78% (p=0.000 n=20) TformCRSrcGray-24 96.00 ± 0% 96.00 ± 0% ~ (p=1.000 n=20) ¹ TformCRSrcNRGBA-24 96.00 ± 0% 96.00 ± 0% ~ (p=1.000 n=20) ¹ TformCRSrcRGBA-24 96.00 ± 0% 96.00 ± 0% ~ (p=1.000 n=20) ¹ TformCRSrcYCbCr-24 96.00 ± 0% 96.00 ± 0% ~ (p=1.000 n=20) ¹ TformCRSrcRGBA64-24 2396479.00 ± 0% 96.00 ± 0% -100.00% (p=0.000 n=20) TformCROverGray-24 96.00 ± 0% 96.00 ± 0% ~ (p=1.000 n=20) ¹ TformCROverNRGBA-24 96.00 ± 0% 96.00 ± 0% ~ (p=1.000 n=20) ¹ TformCROverRGBA-24 96.00 ± 0% 96.00 ± 0% ~ (p=1.000 n=20) ¹ TformCROverYCbCr-24 96.00 ± 0% 96.00 ± 0% ~ (p=1.000 n=20) ¹ TformCROverRGBA64-24 2396479.00 ± 0% 96.00 ± 0% -100.00% (p=0.000 n=20) geomean ² ? ² ³ ¹ all samples are equal ² summaries must be >0 to compute geomean ³ ratios must be >0 to compute geomean │ old.bench │ new.bench │ │ allocs/op │ allocs/op vs base │ ScaleNNLargeDown-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABLargeDown-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleBLLargeDown-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleCRLargeDown-24 0.000 ± 0% 1.000 ± ? ? (p=0.000 n=20) ScaleNNDown-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABDown-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleBLDown-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleCRDown-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleNNUp-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABUp-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleBLUp-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleCRUp-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleNNSrcRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleNNSrcUnif-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleNNOverRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleNNOverUnif-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformNNSrcRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformNNSrcUnif-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformNNOverRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformNNOverUnif-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABSrcGray-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABSrcNRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABSrcRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABSrcYCbCr-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABSrcRGBA64-24 120.0k ± 0% 0.0k ± 0% -100.00% (p=0.000 n=20) ScaleABOverGray-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABOverNRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABOverRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABOverYCbCr-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleABOverRGBA64-24 120.0k ± 0% 0.0k ± 0% -100.00% (p=0.000 n=20) TformABSrcGray-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformABSrcNRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformABSrcRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformABSrcYCbCr-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformABSrcRGBA64-24 76.90k ± 0% 0.00k ± 0% -100.00% (p=0.000 n=20) TformABOverGray-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformABOverNRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformABOverRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformABOverYCbCr-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ TformABOverRGBA64-24 76.90k ± 0% 0.00k ± 0% -100.00% (p=0.000 n=20) ScaleCRSrcGray-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleCRSrcNRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleCRSrcRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleCRSrcYCbCr-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleCRSrcRGBA64-24 3.129M ± 0% 0.000M ± 0% -100.00% (p=0.000 n=20) ScaleCROverGray-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleCROverNRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleCROverRGBA-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleCROverYCbCr-24 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=20) ¹ ScaleCROverRGBA64-24 3.129M ± 0% 0.000M ± 0% -100.00% (p=0.000 n=20) TformCRSrcGray-24 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=20) ¹ TformCRSrcNRGBA-24 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=20) ¹ TformCRSrcRGBA-24 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=20) ¹ TformCRSrcYCbCr-24 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=20) ¹ TformCRSrcRGBA64-24 299549.000 ± 0% 2.000 ± 0% -100.00% (p=0.000 n=20) TformCROverGray-24 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=20) ¹ TformCROverNRGBA-24 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=20) ¹ TformCROverRGBA-24 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=20) ¹ TformCROverYCbCr-24 2.000 ± 0% 2.000 ± 0% ~ (p=1.000 n=20) ¹ TformCROverRGBA64-24 299549.000 ± 0% 2.000 ± 0% -100.00% (p=0.000 n=20) geomean ² ? ² ¹ all samples are equal ² summaries must be >0 to compute geomean Change-Id: I37778e925cce13c4fec65c9e6d57e205440e2a06 Reviewed-on: https://go-review.googlesource.com/c/image/+/525255 Reviewed-by: Dmitri Shuralyov <dmitshur@google.com> Auto-Submit: Dmitri Shuralyov <dmitshur@golang.org> Reviewed-by: Nigel Tao <nigeltao@golang.org> Reviewed-by: Nigel Tao (INACTIVE; USE @golang.org INSTEAD) <nigeltao@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

commit: 3aac9c63df6c65cda99e4890d824b12c2c440b89 [log] [tgz]
author: Branden J Brown <zephyrtronium@gmail.com> Fri Sep 01 20:54:35 2023 -0500
committer: Gopher Robot <gobot@golang.org> Thu Sep 07 14:37:48 2023 +0000
tree: e4fcc6318d9908d9cb7be29289c6e120f4c5d97e
parent: fa10be5a6235278651316f4f0cfc136d80cef613 [diff]
diff --git a/draw/gen.go b/draw/gen.go
index 33678ad..1646932 100644
--- a/draw/gen.go
+++ b/draw/gen.go

@@ -12,7 +12,6 @@
 	"flag"
 	"fmt"
 	"go/format"
-	"io/ioutil"
 	"log"
 	"os"
 	"strings"
@@ -45,7 +44,7 @@
 	if err != nil {
 		log.Fatal(err)
 	}
-	if err := ioutil.WriteFile("impl.go", out, 0660); err != nil {
+	if err := os.WriteFile("impl.go", out, 0660); err != nil {
 		log.Fatal(err)
 	}
 }
@@ -62,7 +61,9 @@
 		{"*image.RGBA", "*image.NRGBA"},
 		{"*image.RGBA", "*image.RGBA"},
 		{"*image.RGBA", "*image.YCbCr"},
+		{"*image.RGBA", "image.RGBA64Image"},
 		{"*image.RGBA", "image.Image"},
+		{"RGBA64Image", "image.RGBA64Image"},
 		{"Image", "image.Image"},
 	}
 	dTypes, sTypes  []string
@@ -234,13 +235,21 @@
 			return ";"
 		case "Image":
 			s := ""
-			if d.sType == "image.Image" {
+			if d.sType == "image.Image" || d.sType == "image.RGBA64Image" {
 				s = "srcMask, smp := opts.SrcMask, opts.SrcMaskP\n"
 			}
 			return s +
 				"dstMask, dmp := opts.DstMask, opts.DstMaskP\n" +
 				"dstColorRGBA64 := &color.RGBA64{}\n" +
 				"dstColor := color.Color(dstColorRGBA64)"
+		case "RGBA64Image":
+			s := ""
+			if d.sType == "image.Image" || d.sType == "image.RGBA64Image" {
+				s = "srcMask, smp := opts.SrcMask, opts.SrcMaskP\n"
+			}
+			return s +
+				"dstMask, dmp := opts.DstMask, opts.DstMaskP\n" +
+				"dstColorRGBA64 := color.RGBA64{}\n"
 		}
 
 	case "preInner":
@@ -255,7 +264,7 @@
 		switch d.sType {
 		default:
 			return ";"
-		case "image.Image":
+		case "image.Image", "image.RGBA64Image":
 			return "srcMask, smp := opts.SrcMask, opts.SrcMaskP"
 		}
 
@@ -334,6 +343,10 @@
 				"$0g := uint32($1g)\n"+
 				"$0b := uint32($1b)",
 			)
+		case "image.RGBA64Image":
+			return argf(args, ""+
+				"$0 := color.RGBA64{uint16($1r), uint16($1g), uint16($1b), uint16($1a)}",
+			)
 		}
 
 	case "outputu":
@@ -364,14 +377,62 @@
 					"dstColorRGBA64.A = uint16(qa*$2a1/0xffff + $2a)\n"+
 					"dst.Set($0, $1, dstColor)",
 				)
+			case "RGBA64Image":
+				switch d.sType {
+				default:
+					return argf(args, ""+
+						"q := dst.RGBA64At($0, $1)\n"+
+						"if dstMask != nil {\n"+
+						"	_, _, _, ma := dstMask.At(dmp.X + $0, dmp.Y + $1).RGBA()\n"+
+						"	$2r = $2r * ma / 0xffff\n"+
+						"	$2g = $2g * ma / 0xffff\n"+
+						"	$2b = $2b * ma / 0xffff\n"+
+						"	$2a = $2a * ma / 0xffff\n"+
+						"}\n"+
+						"$2a1 := 0xffff - $2a\n"+
+						"dstColorRGBA64.R = uint16(uint32(q.R)*$2a1/0xffff + $2r)\n"+
+						"dstColorRGBA64.G = uint16(uint32(q.G)*$2a1/0xffff + $2g)\n"+
+						"dstColorRGBA64.B = uint16(uint32(q.B)*$2a1/0xffff + $2b)\n"+
+						"dstColorRGBA64.A = uint16(uint32(q.A)*$2a1/0xffff + $2a)\n"+
+						"dst.Set($0, $1, dstColorRGBA64)",
+					)
+				case "image.RGBA64Image":
+					return argf(args, ""+
+						"q := dst.RGBA64At($0, $1)\n"+
+						"if dstMask != nil {\n"+
+						"	_, _, _, ma := dstMask.At(dmp.X + $0, dmp.Y + $1).RGBA()\n"+
+						"	$2.R = uint16(uint32($2.R) * ma / 0xffff)\n"+
+						"	$2.G = uint16(uint32($2.G) * ma / 0xffff)\n"+
+						"	$2.B = uint16(uint32($2.B) * ma / 0xffff)\n"+
+						"	$2.A = uint16(uint32($2.A) * ma / 0xffff)\n"+
+						"}\n"+
+						"$2a1 := 0xffff - uint32($2.A)\n"+
+						"dstColorRGBA64.R = uint16(uint32(q.R)*$2a1/0xffff + uint32($2.R))\n"+
+						"dstColorRGBA64.G = uint16(uint32(q.G)*$2a1/0xffff + uint32($2.G))\n"+
+						"dstColorRGBA64.B = uint16(uint32(q.B)*$2a1/0xffff + uint32($2.B))\n"+
+						"dstColorRGBA64.A = uint16(uint32(q.A)*$2a1/0xffff + uint32($2.A))\n"+
+						"dst.Set($0, $1, dstColorRGBA64)",
+					)
+				}
 			case "*image.RGBA":
-				return argf(args, ""+
-					"$2a1 := (0xffff - $2a) * 0x101\n"+
-					"dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*$2a1/0xffff + $2r) >> 8)\n"+
-					"dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*$2a1/0xffff + $2g) >> 8)\n"+
-					"dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*$2a1/0xffff + $2b) >> 8)\n"+
-					"dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*$2a1/0xffff + $2a) >> 8)",
-				)
+				switch d.sType {
+				default:
+					return argf(args, ""+
+						"$2a1 := (0xffff - $2a) * 0x101\n"+
+						"dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*$2a1/0xffff + $2r) >> 8)\n"+
+						"dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*$2a1/0xffff + $2g) >> 8)\n"+
+						"dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*$2a1/0xffff + $2b) >> 8)\n"+
+						"dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*$2a1/0xffff + $2a) >> 8)",
+					)
+				case "image.RGBA64Image":
+					return argf(args, ""+
+						"$2a1 := (0xffff - uint32($2.A)) * 0x101\n"+
+						"dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*$2a1/0xffff + uint32($2.R)) >> 8)\n"+
+						"dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*$2a1/0xffff + uint32($2.G)) >> 8)\n"+
+						"dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*$2a1/0xffff + uint32($2.B)) >> 8)\n"+
+						"dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*$2a1/0xffff + uint32($2.A)) >> 8)",
+					)
+				}
 			}
 
 		case "Src":
@@ -401,6 +462,51 @@
 					"	dst.Set($0, $1, dstColor)\n"+
 					"}",
 				)
+			case "RGBA64Image":
+				switch d.sType {
+				default:
+					return argf(args, ""+
+						"if dstMask != nil {\n"+
+						"	q := dst.RGBA64At($0, $1)\n"+
+						"	_, _, _, ma := dstMask.At(dmp.X + $0, dmp.Y + $1).RGBA()\n"+
+						"	pr = pr * ma / 0xffff\n"+
+						"	pg = pg * ma / 0xffff\n"+
+						"	pb = pb * ma / 0xffff\n"+
+						"	pa = pa * ma / 0xffff\n"+
+						"	$2a1 := 0xffff - ma\n"+ // Note that this is ma, not $2a.
+						"	dstColorRGBA64.R = uint16(uint32(q.R)*$2a1/0xffff + $2r)\n"+
+						"	dstColorRGBA64.G = uint16(uint32(q.G)*$2a1/0xffff + $2g)\n"+
+						"	dstColorRGBA64.B = uint16(uint32(q.B)*$2a1/0xffff + $2b)\n"+
+						"	dstColorRGBA64.A = uint16(uint32(q.A)*$2a1/0xffff + $2a)\n"+
+						"	dst.Set($0, $1, dstColorRGBA64)\n"+
+						"} else {\n"+
+						"	dstColorRGBA64.R = uint16($2r)\n"+
+						"	dstColorRGBA64.G = uint16($2g)\n"+
+						"	dstColorRGBA64.B = uint16($2b)\n"+
+						"	dstColorRGBA64.A = uint16($2a)\n"+
+						"	dst.Set($0, $1, dstColorRGBA64)\n"+
+						"}",
+					)
+				case "image.RGBA64Image":
+					return argf(args, ""+
+						"if dstMask != nil {\n"+
+						"	q := dst.RGBA64At($0, $1)\n"+
+						"	_, _, _, ma := dstMask.At(dmp.X + $0, dmp.Y + $1).RGBA()\n"+
+						"	p.R = uint16(uint32(p.R) * ma / 0xffff)\n"+
+						"	p.G = uint16(uint32(p.G) * ma / 0xffff)\n"+
+						"	p.B = uint16(uint32(p.B) * ma / 0xffff)\n"+
+						"	p.A = uint16(uint32(p.A) * ma / 0xffff)\n"+
+						"	$2a1 := 0xffff - ma\n"+ // Note that this is ma, not $2a.
+						"	dstColorRGBA64.R = uint16(uint32(q.R)*$2a1/0xffff + uint32($2.R))\n"+
+						"	dstColorRGBA64.G = uint16(uint32(q.G)*$2a1/0xffff + uint32($2.G))\n"+
+						"	dstColorRGBA64.B = uint16(uint32(q.B)*$2a1/0xffff + uint32($2.B))\n"+
+						"	dstColorRGBA64.A = uint16(uint32(q.A)*$2a1/0xffff + uint32($2.A))\n"+
+						"	dst.Set($0, $1, dstColorRGBA64)\n"+
+						"} else {\n"+
+						"	dst.Set($0, $1, $2)\n"+
+						"}",
+					)
+				}
 			case "*image.RGBA":
 				switch d.sType {
 				default:
@@ -425,6 +531,13 @@
 						"dst.Pix[d+2] = uint8($2b >> 8)\n"+
 						"dst.Pix[d+3] = 0xff",
 					)
+				case "image.RGBA64Image":
+					return argf(args, ""+
+						"dst.Pix[d+0] = uint8($2.R >> 8)\n"+
+						"dst.Pix[d+1] = uint8($2.G >> 8)\n"+
+						"dst.Pix[d+2] = uint8($2.B >> 8)\n"+
+						"dst.Pix[d+3] = uint8($2.A >> 8)",
+					)
 				}
 			}
 		}
@@ -462,6 +575,27 @@
 					"dstColorRGBA64.A = uint16(qa*$3a1/0xffff + $3a0)\n"+
 					"dst.Set($0, $1, dstColor)",
 				)
+			case "RGBA64Image":
+				ret = argf(args, ""+
+					"q := dst.RGBA64At($0, $1)\n"+
+					"$3r0 := uint32($2($3r * $4))\n"+
+					"$3g0 := uint32($2($3g * $4))\n"+
+					"$3b0 := uint32($2($3b * $4))\n"+
+					"$3a0 := uint32($2($3a * $4))\n"+
+					"if dstMask != nil {\n"+
+					"	_, _, _, ma := dstMask.At(dmp.X + $0, dmp.Y + $1).RGBA()\n"+
+					"	$3r0 = $3r0 * ma / 0xffff\n"+
+					"	$3g0 = $3g0 * ma / 0xffff\n"+
+					"	$3b0 = $3b0 * ma / 0xffff\n"+
+					"	$3a0 = $3a0 * ma / 0xffff\n"+
+					"}\n"+
+					"$3a1 := 0xffff - $3a0\n"+
+					"dstColorRGBA64.R = uint16(uint32(q.R)*$3a1/0xffff + $3r0)\n"+
+					"dstColorRGBA64.G = uint16(uint32(q.G)*$3a1/0xffff + $3g0)\n"+
+					"dstColorRGBA64.B = uint16(uint32(q.B)*$3a1/0xffff + $3b0)\n"+
+					"dstColorRGBA64.A = uint16(uint32(q.A)*$3a1/0xffff + $3a0)\n"+
+					"dst.SetRGBA64($0, $1, dstColorRGBA64)",
+				)
 			case "*image.RGBA":
 				ret = argf(args, ""+
 					"$3r0 := uint32($2($3r * $4))\n"+
@@ -503,6 +637,29 @@
 					"	dst.Set($0, $1, dstColor)\n"+
 					"}",
 				)
+			case "RGBA64Image":
+				ret = argf(args, ""+
+					"if dstMask != nil {\n"+
+					"	q := dst.RGBA64At($0, $1)\n"+
+					"	_, _, _, ma := dstMask.At(dmp.X + $0, dmp.Y + $1).RGBA()\n"+
+					"	pr := uint32($2($3r * $4)) * ma / 0xffff\n"+
+					"	pg := uint32($2($3g * $4)) * ma / 0xffff\n"+
+					"	pb := uint32($2($3b * $4)) * ma / 0xffff\n"+
+					"	pa := uint32($2($3a * $4)) * ma / 0xffff\n"+
+					"	pa1 := 0xffff - ma\n"+ // Note that this is ma, not pa.
+					"	dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + pr)\n"+
+					"	dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + pg)\n"+
+					"	dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + pb)\n"+
+					"	dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + pa)\n"+
+					"	dst.SetRGBA64($0, $1, dstColorRGBA64)\n"+
+					"} else {\n"+
+					"	dstColorRGBA64.R = $2($3r * $4)\n"+
+					"	dstColorRGBA64.G = $2($3g * $4)\n"+
+					"	dstColorRGBA64.B = $2($3b * $4)\n"+
+					"	dstColorRGBA64.A = $2($3a * $4)\n"+
+					"	dst.SetRGBA64($0, $1, dstColorRGBA64)\n"+
+					"}",
+				)
 			case "*image.RGBA":
 				switch d.sType {
 				default:
@@ -560,7 +717,7 @@
 				"%sr%s, %sg%s, %sb%s, %sa%s := src.At(%s, %s).RGBA()\n",
 				lhs, tmp, lhs, tmp, lhs, tmp, lhs, tmp, args[0], args[1],
 			)
-			if d.dType == "" || d.dType == "Image" {
+			if d.dType == "" || d.dType == "Image" || d.dType == "RGBA64Image" {
 				fmt.Fprintf(buf, ""+
 					"if srcMask != nil {\n"+
 					"	_, _, _, ma := srcMask.At(smp.X+%s, smp.Y+%s).RGBA()\n"+
@@ -576,6 +733,24 @@
 					lhs, tmp, lhs, tmp,
 				)
 			}
+		case "image.RGBA64Image":
+			fmt.Fprintf(buf, ""+
+				"%s%s := src.RGBA64At(%s, %s)\n",
+				lhs, tmp, args[0], args[1],
+			)
+			if d.dType == "" || d.dType == "Image" || d.dType == "RGBA64Image" {
+				fmt.Fprintf(buf, ""+
+					"if srcMask != nil {\n"+
+					"	_, _, _, ma := srcMask.At(smp.X+%[1]s, smp.Y+%[2]s).RGBA()\n"+
+					"	%[3]s%[4]s.R = uint16(uint32(%[3]s%[4]s.R) * ma / 0xffff)\n"+
+					"	%[3]s%[4]s.G = uint16(uint32(%[3]s%[4]s.G) * ma / 0xffff)\n"+
+					"	%[3]s%[4]s.B = uint16(uint32(%[3]s%[4]s.B) * ma / 0xffff)\n"+
+					"	%[3]s%[4]s.A = uint16(uint32(%[3]s%[4]s.A) * ma / 0xffff)\n"+
+					"}\n",
+					args[0], args[1],
+					lhs, tmp,
+				)
+			}
 		case "*image.Gray":
 			fmt.Fprintf(buf, ""+
 				"%si := %s\n"+
@@ -647,6 +822,14 @@
 					lhs, eqOp, lhs, extra,
 					lhs, eqOp, lhs, extra,
 				)
+			case "image.RGBA64Image":
+				fmt.Fprintf(buf, ""+
+					"%[1]sr %[2]s float64(%[3]su.R)%[4]s\n"+
+					"%[1]sg %[2]s float64(%[3]su.G)%[4]s\n"+
+					"%[1]sb %[2]s float64(%[3]su.B)%[4]s\n"+
+					"%[1]sa %[2]s float64(%[3]su.A)%[4]s\n",
+					lhs, eqOp, lhs, extra,
+				)
 			}
 		}
 

diff --git a/draw/impl.go b/draw/impl.go
index 75498ad..94ee826 100644
--- a/draw/impl.go
+++ b/draw/impl.go

@@ -59,9 +59,16 @@
 					z.scale_RGBA_NRGBA_Over(dst, dr, adr, src, sr, &o)
 				case *image.RGBA:
 					z.scale_RGBA_RGBA_Over(dst, dr, adr, src, sr, &o)
+				case image.RGBA64Image:
+					z.scale_RGBA_RGBA64Image_Over(dst, dr, adr, src, sr, &o)
 				default:
 					z.scale_RGBA_Image_Over(dst, dr, adr, src, sr, &o)
 				}
+			case RGBA64Image:
+				switch src := src.(type) {
+				case image.RGBA64Image:
+					z.scale_RGBA64Image_RGBA64Image_Over(dst, dr, adr, src, sr, &o)
+				}
 			default:
 				switch src := src.(type) {
 				default:
@@ -91,9 +98,16 @@
 					case image.YCbCrSubsampleRatio440:
 						z.scale_RGBA_YCbCr440_Src(dst, dr, adr, src, sr, &o)
 					}
+				case image.RGBA64Image:
+					z.scale_RGBA_RGBA64Image_Src(dst, dr, adr, src, sr, &o)
 				default:
 					z.scale_RGBA_Image_Src(dst, dr, adr, src, sr, &o)
 				}
+			case RGBA64Image:
+				switch src := src.(type) {
+				case image.RGBA64Image:
+					z.scale_RGBA64Image_RGBA64Image_Src(dst, dr, adr, src, sr, &o)
+				}
 			default:
 				switch src := src.(type) {
 				default:
@@ -170,9 +184,16 @@
 					z.transform_RGBA_NRGBA_Over(dst, dr, adr, &d2s, src, sr, bias, &o)
 				case *image.RGBA:
 					z.transform_RGBA_RGBA_Over(dst, dr, adr, &d2s, src, sr, bias, &o)
+				case image.RGBA64Image:
+					z.transform_RGBA_RGBA64Image_Over(dst, dr, adr, &d2s, src, sr, bias, &o)
 				default:
 					z.transform_RGBA_Image_Over(dst, dr, adr, &d2s, src, sr, bias, &o)
 				}
+			case RGBA64Image:
+				switch src := src.(type) {
+				case image.RGBA64Image:
+					z.transform_RGBA64Image_RGBA64Image_Over(dst, dr, adr, &d2s, src, sr, bias, &o)
+				}
 			default:
 				switch src := src.(type) {
 				default:
@@ -202,9 +223,16 @@
 					case image.YCbCrSubsampleRatio440:
 						z.transform_RGBA_YCbCr440_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
 					}
+				case image.RGBA64Image:
+					z.transform_RGBA_RGBA64Image_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
 				default:
 					z.transform_RGBA_Image_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
 				}
+			case RGBA64Image:
+				switch src := src.(type) {
+				case image.RGBA64Image:
+					z.transform_RGBA64Image_RGBA64Image_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
+				}
 			default:
 				switch src := src.(type) {
 				default:
@@ -502,6 +530,45 @@
 	}
 }
 
+func (nnInterpolator) scale_RGBA_RGBA64Image_Over(dst *image.RGBA, dr, adr image.Rectangle, src image.RGBA64Image, sr image.Rectangle, opts *Options) {
+	dw2 := uint64(dr.Dx()) * 2
+	dh2 := uint64(dr.Dy()) * 2
+	sw := uint64(sr.Dx())
+	sh := uint64(sr.Dy())
+	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
+		sy := (2*uint64(dy) + 1) * sh / dh2
+		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
+		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
+			sx := (2*uint64(dx) + 1) * sw / dw2
+			p := src.RGBA64At(sr.Min.X+int(sx), sr.Min.Y+int(sy))
+			pa1 := (0xffff - uint32(p.A)) * 0x101
+			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + uint32(p.R)) >> 8)
+			dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + uint32(p.G)) >> 8)
+			dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + uint32(p.B)) >> 8)
+			dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + uint32(p.A)) >> 8)
+		}
+	}
+}
+
+func (nnInterpolator) scale_RGBA_RGBA64Image_Src(dst *image.RGBA, dr, adr image.Rectangle, src image.RGBA64Image, sr image.Rectangle, opts *Options) {
+	dw2 := uint64(dr.Dx()) * 2
+	dh2 := uint64(dr.Dy()) * 2
+	sw := uint64(sr.Dx())
+	sh := uint64(sr.Dy())
+	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
+		sy := (2*uint64(dy) + 1) * sh / dh2
+		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
+		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
+			sx := (2*uint64(dx) + 1) * sw / dw2
+			p := src.RGBA64At(sr.Min.X+int(sx), sr.Min.Y+int(sy))
+			dst.Pix[d+0] = uint8(p.R >> 8)
+			dst.Pix[d+1] = uint8(p.G >> 8)
+			dst.Pix[d+2] = uint8(p.B >> 8)
+			dst.Pix[d+3] = uint8(p.A >> 8)
+		}
+	}
+}
+
 func (nnInterpolator) scale_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) {
 	dw2 := uint64(dr.Dx()) * 2
 	dh2 := uint64(dr.Dy()) * 2
@@ -541,6 +608,86 @@
 	}
 }
 
+func (nnInterpolator) scale_RGBA64Image_RGBA64Image_Over(dst RGBA64Image, dr, adr image.Rectangle, src image.RGBA64Image, sr image.Rectangle, opts *Options) {
+	dw2 := uint64(dr.Dx()) * 2
+	dh2 := uint64(dr.Dy()) * 2
+	sw := uint64(sr.Dx())
+	sh := uint64(sr.Dy())
+	srcMask, smp := opts.SrcMask, opts.SrcMaskP
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
+	dstColorRGBA64 := color.RGBA64{}
+
+	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
+		sy := (2*uint64(dy) + 1) * sh / dh2
+		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
+			sx := (2*uint64(dx) + 1) * sw / dw2
+			p := src.RGBA64At(sr.Min.X+int(sx), sr.Min.Y+int(sy))
+			if srcMask != nil {
+				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx), smp.Y+sr.Min.Y+int(sy)).RGBA()
+				p.R = uint16(uint32(p.R) * ma / 0xffff)
+				p.G = uint16(uint32(p.G) * ma / 0xffff)
+				p.B = uint16(uint32(p.B) * ma / 0xffff)
+				p.A = uint16(uint32(p.A) * ma / 0xffff)
+			}
+			q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy))
+			if dstMask != nil {
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
+				p.R = uint16(uint32(p.R) * ma / 0xffff)
+				p.G = uint16(uint32(p.G) * ma / 0xffff)
+				p.B = uint16(uint32(p.B) * ma / 0xffff)
+				p.A = uint16(uint32(p.A) * ma / 0xffff)
+			}
+			pa1 := 0xffff - uint32(p.A)
+			dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + uint32(p.R))
+			dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + uint32(p.G))
+			dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + uint32(p.B))
+			dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + uint32(p.A))
+			dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColorRGBA64)
+		}
+	}
+}
+
+func (nnInterpolator) scale_RGBA64Image_RGBA64Image_Src(dst RGBA64Image, dr, adr image.Rectangle, src image.RGBA64Image, sr image.Rectangle, opts *Options) {
+	dw2 := uint64(dr.Dx()) * 2
+	dh2 := uint64(dr.Dy()) * 2
+	sw := uint64(sr.Dx())
+	sh := uint64(sr.Dy())
+	srcMask, smp := opts.SrcMask, opts.SrcMaskP
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
+	dstColorRGBA64 := color.RGBA64{}
+
+	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
+		sy := (2*uint64(dy) + 1) * sh / dh2
+		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
+			sx := (2*uint64(dx) + 1) * sw / dw2
+			p := src.RGBA64At(sr.Min.X+int(sx), sr.Min.Y+int(sy))
+			if srcMask != nil {
+				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx), smp.Y+sr.Min.Y+int(sy)).RGBA()
+				p.R = uint16(uint32(p.R) * ma / 0xffff)
+				p.G = uint16(uint32(p.G) * ma / 0xffff)
+				p.B = uint16(uint32(p.B) * ma / 0xffff)
+				p.A = uint16(uint32(p.A) * ma / 0xffff)
+			}
+			if dstMask != nil {
+				q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy))
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
+				p.R = uint16(uint32(p.R) * ma / 0xffff)
+				p.G = uint16(uint32(p.G) * ma / 0xffff)
+				p.B = uint16(uint32(p.B) * ma / 0xffff)
+				p.A = uint16(uint32(p.A) * ma / 0xffff)
+				pa1 := 0xffff - ma
+				dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + uint32(p.R))
+				dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + uint32(p.G))
+				dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + uint32(p.B))
+				dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + uint32(p.A))
+				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColorRGBA64)
+			} else {
+				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), p)
+			}
+		}
+	}
+}
+
 func (nnInterpolator) scale_Image_Image_Over(dst Image, dr, adr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) {
 	dw2 := uint64(dr.Dx()) * 2
 	dh2 := uint64(dr.Dy()) * 2
@@ -921,6 +1068,47 @@
 	}
 }
 
+func (nnInterpolator) transform_RGBA_RGBA64Image_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, opts *Options) {
+	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
+		dyf := float64(dr.Min.Y+int(dy)) + 0.5
+		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
+		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
+			dxf := float64(dr.Min.X+int(dx)) + 0.5
+			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
+			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+			if !(image.Point{sx0, sy0}).In(sr) {
+				continue
+			}
+			p := src.RGBA64At(sx0, sy0)
+			pa1 := (0xffff - uint32(p.A)) * 0x101
+			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + uint32(p.R)) >> 8)
+			dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + uint32(p.G)) >> 8)
+			dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + uint32(p.B)) >> 8)
+			dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + uint32(p.A)) >> 8)
+		}
+	}
+}
+
+func (nnInterpolator) transform_RGBA_RGBA64Image_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, opts *Options) {
+	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
+		dyf := float64(dr.Min.Y+int(dy)) + 0.5
+		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
+		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
+			dxf := float64(dr.Min.X+int(dx)) + 0.5
+			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
+			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+			if !(image.Point{sx0, sy0}).In(sr) {
+				continue
+			}
+			p := src.RGBA64At(sx0, sy0)
+			dst.Pix[d+0] = uint8(p.R >> 8)
+			dst.Pix[d+1] = uint8(p.G >> 8)
+			dst.Pix[d+2] = uint8(p.B >> 8)
+			dst.Pix[d+3] = uint8(p.A >> 8)
+		}
+	}
+}
+
 func (nnInterpolator) transform_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, opts *Options) {
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
@@ -962,6 +1150,88 @@
 	}
 }
 
+func (nnInterpolator) transform_RGBA64Image_RGBA64Image_Over(dst RGBA64Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, opts *Options) {
+	srcMask, smp := opts.SrcMask, opts.SrcMaskP
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
+	dstColorRGBA64 := color.RGBA64{}
+
+	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
+		dyf := float64(dr.Min.Y+int(dy)) + 0.5
+		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
+			dxf := float64(dr.Min.X+int(dx)) + 0.5
+			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
+			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+			if !(image.Point{sx0, sy0}).In(sr) {
+				continue
+			}
+			p := src.RGBA64At(sx0, sy0)
+			if srcMask != nil {
+				_, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy0).RGBA()
+				p.R = uint16(uint32(p.R) * ma / 0xffff)
+				p.G = uint16(uint32(p.G) * ma / 0xffff)
+				p.B = uint16(uint32(p.B) * ma / 0xffff)
+				p.A = uint16(uint32(p.A) * ma / 0xffff)
+			}
+			q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy))
+			if dstMask != nil {
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
+				p.R = uint16(uint32(p.R) * ma / 0xffff)
+				p.G = uint16(uint32(p.G) * ma / 0xffff)
+				p.B = uint16(uint32(p.B) * ma / 0xffff)
+				p.A = uint16(uint32(p.A) * ma / 0xffff)
+			}
+			pa1 := 0xffff - uint32(p.A)
+			dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + uint32(p.R))
+			dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + uint32(p.G))
+			dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + uint32(p.B))
+			dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + uint32(p.A))
+			dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColorRGBA64)
+		}
+	}
+}
+
+func (nnInterpolator) transform_RGBA64Image_RGBA64Image_Src(dst RGBA64Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, opts *Options) {
+	srcMask, smp := opts.SrcMask, opts.SrcMaskP
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
+	dstColorRGBA64 := color.RGBA64{}
+
+	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
+		dyf := float64(dr.Min.Y+int(dy)) + 0.5
+		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
+			dxf := float64(dr.Min.X+int(dx)) + 0.5
+			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
+			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
+			if !(image.Point{sx0, sy0}).In(sr) {
+				continue
+			}
+			p := src.RGBA64At(sx0, sy0)
+			if srcMask != nil {
+				_, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy0).RGBA()
+				p.R = uint16(uint32(p.R) * ma / 0xffff)
+				p.G = uint16(uint32(p.G) * ma / 0xffff)
+				p.B = uint16(uint32(p.B) * ma / 0xffff)
+				p.A = uint16(uint32(p.A) * ma / 0xffff)
+			}
+			if dstMask != nil {
+				q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy))
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
+				p.R = uint16(uint32(p.R) * ma / 0xffff)
+				p.G = uint16(uint32(p.G) * ma / 0xffff)
+				p.B = uint16(uint32(p.B) * ma / 0xffff)
+				p.A = uint16(uint32(p.A) * ma / 0xffff)
+				pa1 := 0xffff - ma
+				dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + uint32(p.R))
+				dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + uint32(p.G))
+				dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + uint32(p.B))
+				dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + uint32(p.A))
+				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColorRGBA64)
+			} else {
+				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), p)
+			}
+		}
+	}
+}
+
 func (nnInterpolator) transform_Image_Image_Over(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, opts *Options) {
 	srcMask, smp := opts.SrcMask, opts.SrcMaskP
 	dstMask, dmp := opts.DstMask, opts.DstMaskP
@@ -1097,9 +1367,16 @@
 					z.scale_RGBA_NRGBA_Over(dst, dr, adr, src, sr, &o)
 				case *image.RGBA:
 					z.scale_RGBA_RGBA_Over(dst, dr, adr, src, sr, &o)
+				case image.RGBA64Image:
+					z.scale_RGBA_RGBA64Image_Over(dst, dr, adr, src, sr, &o)
 				default:
 					z.scale_RGBA_Image_Over(dst, dr, adr, src, sr, &o)
 				}
+			case RGBA64Image:
+				switch src := src.(type) {
+				case image.RGBA64Image:
+					z.scale_RGBA64Image_RGBA64Image_Over(dst, dr, adr, src, sr, &o)
+				}
 			default:
 				switch src := src.(type) {
 				default:
@@ -1129,9 +1406,16 @@
 					case image.YCbCrSubsampleRatio440:
 						z.scale_RGBA_YCbCr440_Src(dst, dr, adr, src, sr, &o)
 					}
+				case image.RGBA64Image:
+					z.scale_RGBA_RGBA64Image_Src(dst, dr, adr, src, sr, &o)
 				default:
 					z.scale_RGBA_Image_Src(dst, dr, adr, src, sr, &o)
 				}
+			case RGBA64Image:
+				switch src := src.(type) {
+				case image.RGBA64Image:
+					z.scale_RGBA64Image_RGBA64Image_Src(dst, dr, adr, src, sr, &o)
+				}
 			default:
 				switch src := src.(type) {
 				default:
@@ -1208,9 +1492,16 @@
 					z.transform_RGBA_NRGBA_Over(dst, dr, adr, &d2s, src, sr, bias, &o)
 				case *image.RGBA:
 					z.transform_RGBA_RGBA_Over(dst, dr, adr, &d2s, src, sr, bias, &o)
+				case image.RGBA64Image:
+					z.transform_RGBA_RGBA64Image_Over(dst, dr, adr, &d2s, src, sr, bias, &o)
 				default:
 					z.transform_RGBA_Image_Over(dst, dr, adr, &d2s, src, sr, bias, &o)
 				}
+			case RGBA64Image:
+				switch src := src.(type) {
+				case image.RGBA64Image:
+					z.transform_RGBA64Image_RGBA64Image_Over(dst, dr, adr, &d2s, src, sr, bias, &o)
+				}
 			default:
 				switch src := src.(type) {
 				default:
@@ -1240,9 +1531,16 @@
 					case image.YCbCrSubsampleRatio440:
 						z.transform_RGBA_YCbCr440_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
 					}
+				case image.RGBA64Image:
+					z.transform_RGBA_RGBA64Image_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
 				default:
 					z.transform_RGBA_Image_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
 				}
+			case RGBA64Image:
+				switch src := src.(type) {
+				case image.RGBA64Image:
+					z.transform_RGBA64Image_RGBA64Image_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
+				}
 			default:
 				switch src := src.(type) {
 				default:
@@ -2415,6 +2713,167 @@
 	}
 }
 
+func (ablInterpolator) scale_RGBA_RGBA64Image_Over(dst *image.RGBA, dr, adr image.Rectangle, src image.RGBA64Image, sr image.Rectangle, opts *Options) {
+	sw := int32(sr.Dx())
+	sh := int32(sr.Dy())
+	yscale := float64(sh) / float64(dr.Dy())
+	xscale := float64(sw) / float64(dr.Dx())
+	swMinus1, shMinus1 := sw-1, sh-1
+
+	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
+		sy := (float64(dy)+0.5)*yscale - 0.5
+		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
+		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
+		// sx, below.
+		sy0 := int32(sy)
+		yFrac0 := sy - float64(sy0)
+		yFrac1 := 1 - yFrac0
+		sy1 := sy0 + 1
+		if sy < 0 {
+			sy0, sy1 = 0, 0
+			yFrac0, yFrac1 = 0, 1
+		} else if sy1 > shMinus1 {
+			sy0, sy1 = shMinus1, shMinus1
+			yFrac0, yFrac1 = 1, 0
+		}
+		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
+
+		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
+			sx := (float64(dx)+0.5)*xscale - 0.5
+			sx0 := int32(sx)
+			xFrac0 := sx - float64(sx0)
+			xFrac1 := 1 - xFrac0
+			sx1 := sx0 + 1
+			if sx < 0 {
+				sx0, sx1 = 0, 0
+				xFrac0, xFrac1 = 0, 1
+			} else if sx1 > swMinus1 {
+				sx0, sx1 = swMinus1, swMinus1
+				xFrac0, xFrac1 = 1, 0
+			}
+
+			s00u := src.RGBA64At(sr.Min.X+int(sx0), sr.Min.Y+int(sy0))
+			s00r := float64(s00u.R)
+			s00g := float64(s00u.G)
+			s00b := float64(s00u.B)
+			s00a := float64(s00u.A)
+			s10u := src.RGBA64At(sr.Min.X+int(sx1), sr.Min.Y+int(sy0))
+			s10r := float64(s10u.R)
+			s10g := float64(s10u.G)
+			s10b := float64(s10u.B)
+			s10a := float64(s10u.A)
+			s10r = xFrac1*s00r + xFrac0*s10r
+			s10g = xFrac1*s00g + xFrac0*s10g
+			s10b = xFrac1*s00b + xFrac0*s10b
+			s10a = xFrac1*s00a + xFrac0*s10a
+			s01u := src.RGBA64At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1))
+			s01r := float64(s01u.R)
+			s01g := float64(s01u.G)
+			s01b := float64(s01u.B)
+			s01a := float64(s01u.A)
+			s11u := src.RGBA64At(sr.Min.X+int(sx1), sr.Min.Y+int(sy1))
+			s11r := float64(s11u.R)
+			s11g := float64(s11u.G)
+			s11b := float64(s11u.B)
+			s11a := float64(s11u.A)
+			s11r = xFrac1*s01r + xFrac0*s11r
+			s11g = xFrac1*s01g + xFrac0*s11g
+			s11b = xFrac1*s01b + xFrac0*s11b
+			s11a = xFrac1*s01a + xFrac0*s11a
+			s11r = yFrac1*s10r + yFrac0*s11r
+			s11g = yFrac1*s10g + yFrac0*s11g
+			s11b = yFrac1*s10b + yFrac0*s11b
+			s11a = yFrac1*s10a + yFrac0*s11a
+			p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)}
+			pa1 := (0xffff - uint32(p.A)) * 0x101
+			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + uint32(p.R)) >> 8)
+			dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + uint32(p.G)) >> 8)
+			dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + uint32(p.B)) >> 8)
+			dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + uint32(p.A)) >> 8)
+		}
+	}
+}
+
+func (ablInterpolator) scale_RGBA_RGBA64Image_Src(dst *image.RGBA, dr, adr image.Rectangle, src image.RGBA64Image, sr image.Rectangle, opts *Options) {
+	sw := int32(sr.Dx())
+	sh := int32(sr.Dy())
+	yscale := float64(sh) / float64(dr.Dy())
+	xscale := float64(sw) / float64(dr.Dx())
+	swMinus1, shMinus1 := sw-1, sh-1
+
+	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
+		sy := (float64(dy)+0.5)*yscale - 0.5
+		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
+		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
+		// sx, below.
+		sy0 := int32(sy)
+		yFrac0 := sy - float64(sy0)
+		yFrac1 := 1 - yFrac0
+		sy1 := sy0 + 1
+		if sy < 0 {
+			sy0, sy1 = 0, 0
+			yFrac0, yFrac1 = 0, 1
+		} else if sy1 > shMinus1 {
+			sy0, sy1 = shMinus1, shMinus1
+			yFrac0, yFrac1 = 1, 0
+		}
+		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
+
+		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
+			sx := (float64(dx)+0.5)*xscale - 0.5
+			sx0 := int32(sx)
+			xFrac0 := sx - float64(sx0)
+			xFrac1 := 1 - xFrac0
+			sx1 := sx0 + 1
+			if sx < 0 {
+				sx0, sx1 = 0, 0
+				xFrac0, xFrac1 = 0, 1
+			} else if sx1 > swMinus1 {
+				sx0, sx1 = swMinus1, swMinus1
+				xFrac0, xFrac1 = 1, 0
+			}
+
+			s00u := src.RGBA64At(sr.Min.X+int(sx0), sr.Min.Y+int(sy0))
+			s00r := float64(s00u.R)
+			s00g := float64(s00u.G)
+			s00b := float64(s00u.B)
+			s00a := float64(s00u.A)
+			s10u := src.RGBA64At(sr.Min.X+int(sx1), sr.Min.Y+int(sy0))
+			s10r := float64(s10u.R)
+			s10g := float64(s10u.G)
+			s10b := float64(s10u.B)
+			s10a := float64(s10u.A)
+			s10r = xFrac1*s00r + xFrac0*s10r
+			s10g = xFrac1*s00g + xFrac0*s10g
+			s10b = xFrac1*s00b + xFrac0*s10b
+			s10a = xFrac1*s00a + xFrac0*s10a
+			s01u := src.RGBA64At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1))
+			s01r := float64(s01u.R)
+			s01g := float64(s01u.G)
+			s01b := float64(s01u.B)
+			s01a := float64(s01u.A)
+			s11u := src.RGBA64At(sr.Min.X+int(sx1), sr.Min.Y+int(sy1))
+			s11r := float64(s11u.R)
+			s11g := float64(s11u.G)
+			s11b := float64(s11u.B)
+			s11a := float64(s11u.A)
+			s11r = xFrac1*s01r + xFrac0*s11r
+			s11g = xFrac1*s01g + xFrac0*s11g
+			s11b = xFrac1*s01b + xFrac0*s11b
+			s11a = xFrac1*s01a + xFrac0*s11a
+			s11r = yFrac1*s10r + yFrac0*s11r
+			s11g = yFrac1*s10g + yFrac0*s11g
+			s11b = yFrac1*s10b + yFrac0*s11b
+			s11a = yFrac1*s10a + yFrac0*s11a
+			p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)}
+			dst.Pix[d+0] = uint8(p.R >> 8)
+			dst.Pix[d+1] = uint8(p.G >> 8)
+			dst.Pix[d+2] = uint8(p.B >> 8)
+			dst.Pix[d+3] = uint8(p.A >> 8)
+		}
+	}
+}
+
 func (ablInterpolator) scale_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) {
 	sw := int32(sr.Dx())
 	sh := int32(sr.Dy())
@@ -2582,6 +3041,248 @@
 	}
 }
 
+func (ablInterpolator) scale_RGBA64Image_RGBA64Image_Over(dst RGBA64Image, dr, adr image.Rectangle, src image.RGBA64Image, sr image.Rectangle, opts *Options) {
+	sw := int32(sr.Dx())
+	sh := int32(sr.Dy())
+	yscale := float64(sh) / float64(dr.Dy())
+	xscale := float64(sw) / float64(dr.Dx())
+	swMinus1, shMinus1 := sw-1, sh-1
+	srcMask, smp := opts.SrcMask, opts.SrcMaskP
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
+	dstColorRGBA64 := color.RGBA64{}
+
+	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
+		sy := (float64(dy)+0.5)*yscale - 0.5
+		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
+		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
+		// sx, below.
+		sy0 := int32(sy)
+		yFrac0 := sy - float64(sy0)
+		yFrac1 := 1 - yFrac0
+		sy1 := sy0 + 1
+		if sy < 0 {
+			sy0, sy1 = 0, 0
+			yFrac0, yFrac1 = 0, 1
+		} else if sy1 > shMinus1 {
+			sy0, sy1 = shMinus1, shMinus1
+			yFrac0, yFrac1 = 1, 0
+		}
+
+		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
+			sx := (float64(dx)+0.5)*xscale - 0.5
+			sx0 := int32(sx)
+			xFrac0 := sx - float64(sx0)
+			xFrac1 := 1 - xFrac0
+			sx1 := sx0 + 1
+			if sx < 0 {
+				sx0, sx1 = 0, 0
+				xFrac0, xFrac1 = 0, 1
+			} else if sx1 > swMinus1 {
+				sx0, sx1 = swMinus1, swMinus1
+				xFrac0, xFrac1 = 1, 0
+			}
+
+			s00u := src.RGBA64At(sr.Min.X+int(sx0), sr.Min.Y+int(sy0))
+			if srcMask != nil {
+				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx0), smp.Y+sr.Min.Y+int(sy0)).RGBA()
+				s00u.R = uint16(uint32(s00u.R) * ma / 0xffff)
+				s00u.G = uint16(uint32(s00u.G) * ma / 0xffff)
+				s00u.B = uint16(uint32(s00u.B) * ma / 0xffff)
+				s00u.A = uint16(uint32(s00u.A) * ma / 0xffff)
+			}
+			s00r := float64(s00u.R)
+			s00g := float64(s00u.G)
+			s00b := float64(s00u.B)
+			s00a := float64(s00u.A)
+			s10u := src.RGBA64At(sr.Min.X+int(sx1), sr.Min.Y+int(sy0))
+			if srcMask != nil {
+				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx1), smp.Y+sr.Min.Y+int(sy0)).RGBA()
+				s10u.R = uint16(uint32(s10u.R) * ma / 0xffff)
+				s10u.G = uint16(uint32(s10u.G) * ma / 0xffff)
+				s10u.B = uint16(uint32(s10u.B) * ma / 0xffff)
+				s10u.A = uint16(uint32(s10u.A) * ma / 0xffff)
+			}
+			s10r := float64(s10u.R)
+			s10g := float64(s10u.G)
+			s10b := float64(s10u.B)
+			s10a := float64(s10u.A)
+			s10r = xFrac1*s00r + xFrac0*s10r
+			s10g = xFrac1*s00g + xFrac0*s10g
+			s10b = xFrac1*s00b + xFrac0*s10b
+			s10a = xFrac1*s00a + xFrac0*s10a
+			s01u := src.RGBA64At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1))
+			if srcMask != nil {
+				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx0), smp.Y+sr.Min.Y+int(sy1)).RGBA()
+				s01u.R = uint16(uint32(s01u.R) * ma / 0xffff)
+				s01u.G = uint16(uint32(s01u.G) * ma / 0xffff)
+				s01u.B = uint16(uint32(s01u.B) * ma / 0xffff)
+				s01u.A = uint16(uint32(s01u.A) * ma / 0xffff)
+			}
+			s01r := float64(s01u.R)
+			s01g := float64(s01u.G)
+			s01b := float64(s01u.B)
+			s01a := float64(s01u.A)
+			s11u := src.RGBA64At(sr.Min.X+int(sx1), sr.Min.Y+int(sy1))
+			if srcMask != nil {
+				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx1), smp.Y+sr.Min.Y+int(sy1)).RGBA()
+				s11u.R = uint16(uint32(s11u.R) * ma / 0xffff)
+				s11u.G = uint16(uint32(s11u.G) * ma / 0xffff)
+				s11u.B = uint16(uint32(s11u.B) * ma / 0xffff)
+				s11u.A = uint16(uint32(s11u.A) * ma / 0xffff)
+			}
+			s11r := float64(s11u.R)
+			s11g := float64(s11u.G)
+			s11b := float64(s11u.B)
+			s11a := float64(s11u.A)
+			s11r = xFrac1*s01r + xFrac0*s11r
+			s11g = xFrac1*s01g + xFrac0*s11g
+			s11b = xFrac1*s01b + xFrac0*s11b
+			s11a = xFrac1*s01a + xFrac0*s11a
+			s11r = yFrac1*s10r + yFrac0*s11r
+			s11g = yFrac1*s10g + yFrac0*s11g
+			s11b = yFrac1*s10b + yFrac0*s11b
+			s11a = yFrac1*s10a + yFrac0*s11a
+			p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)}
+			q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy))
+			if dstMask != nil {
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
+				p.R = uint16(uint32(p.R) * ma / 0xffff)
+				p.G = uint16(uint32(p.G) * ma / 0xffff)
+				p.B = uint16(uint32(p.B) * ma / 0xffff)
+				p.A = uint16(uint32(p.A) * ma / 0xffff)
+			}
+			pa1 := 0xffff - uint32(p.A)
+			dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + uint32(p.R))
+			dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + uint32(p.G))
+			dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + uint32(p.B))
+			dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + uint32(p.A))
+			dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColorRGBA64)
+		}
+	}
+}
+
+func (ablInterpolator) scale_RGBA64Image_RGBA64Image_Src(dst RGBA64Image, dr, adr image.Rectangle, src image.RGBA64Image, sr image.Rectangle, opts *Options) {
+	sw := int32(sr.Dx())
+	sh := int32(sr.Dy())
+	yscale := float64(sh) / float64(dr.Dy())
+	xscale := float64(sw) / float64(dr.Dx())
+	swMinus1, shMinus1 := sw-1, sh-1
+	srcMask, smp := opts.SrcMask, opts.SrcMaskP
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
+	dstColorRGBA64 := color.RGBA64{}
+
+	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
+		sy := (float64(dy)+0.5)*yscale - 0.5
+		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
+		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
+		// sx, below.
+		sy0 := int32(sy)
+		yFrac0 := sy - float64(sy0)
+		yFrac1 := 1 - yFrac0
+		sy1 := sy0 + 1
+		if sy < 0 {
+			sy0, sy1 = 0, 0
+			yFrac0, yFrac1 = 0, 1
+		} else if sy1 > shMinus1 {
+			sy0, sy1 = shMinus1, shMinus1
+			yFrac0, yFrac1 = 1, 0
+		}
+
+		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
+			sx := (float64(dx)+0.5)*xscale - 0.5
+			sx0 := int32(sx)
+			xFrac0 := sx - float64(sx0)
+			xFrac1 := 1 - xFrac0
+			sx1 := sx0 + 1
+			if sx < 0 {
+				sx0, sx1 = 0, 0
+				xFrac0, xFrac1 = 0, 1
+			} else if sx1 > swMinus1 {
+				sx0, sx1 = swMinus1, swMinus1
+				xFrac0, xFrac1 = 1, 0
+			}
+
+			s00u := src.RGBA64At(sr.Min.X+int(sx0), sr.Min.Y+int(sy0))
+			if srcMask != nil {
+				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx0), smp.Y+sr.Min.Y+int(sy0)).RGBA()
+				s00u.R = uint16(uint32(s00u.R) * ma / 0xffff)
+				s00u.G = uint16(uint32(s00u.G) * ma / 0xffff)
+				s00u.B = uint16(uint32(s00u.B) * ma / 0xffff)
+				s00u.A = uint16(uint32(s00u.A) * ma / 0xffff)
+			}
+			s00r := float64(s00u.R)
+			s00g := float64(s00u.G)
+			s00b := float64(s00u.B)
+			s00a := float64(s00u.A)
+			s10u := src.RGBA64At(sr.Min.X+int(sx1), sr.Min.Y+int(sy0))
+			if srcMask != nil {
+				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx1), smp.Y+sr.Min.Y+int(sy0)).RGBA()
+				s10u.R = uint16(uint32(s10u.R) * ma / 0xffff)
+				s10u.G = uint16(uint32(s10u.G) * ma / 0xffff)
+				s10u.B = uint16(uint32(s10u.B) * ma / 0xffff)
+				s10u.A = uint16(uint32(s10u.A) * ma / 0xffff)
+			}
+			s10r := float64(s10u.R)
+			s10g := float64(s10u.G)
+			s10b := float64(s10u.B)
+			s10a := float64(s10u.A)
+			s10r = xFrac1*s00r + xFrac0*s10r
+			s10g = xFrac1*s00g + xFrac0*s10g
+			s10b = xFrac1*s00b + xFrac0*s10b
+			s10a = xFrac1*s00a + xFrac0*s10a
+			s01u := src.RGBA64At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1))
+			if srcMask != nil {
+				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx0), smp.Y+sr.Min.Y+int(sy1)).RGBA()
+				s01u.R = uint16(uint32(s01u.R) * ma / 0xffff)
+				s01u.G = uint16(uint32(s01u.G) * ma / 0xffff)
+				s01u.B = uint16(uint32(s01u.B) * ma / 0xffff)
+				s01u.A = uint16(uint32(s01u.A) * ma / 0xffff)
+			}
+			s01r := float64(s01u.R)
+			s01g := float64(s01u.G)
+			s01b := float64(s01u.B)
+			s01a := float64(s01u.A)
+			s11u := src.RGBA64At(sr.Min.X+int(sx1), sr.Min.Y+int(sy1))
+			if srcMask != nil {
+				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx1), smp.Y+sr.Min.Y+int(sy1)).RGBA()
+				s11u.R = uint16(uint32(s11u.R) * ma / 0xffff)
+				s11u.G = uint16(uint32(s11u.G) * ma / 0xffff)
+				s11u.B = uint16(uint32(s11u.B) * ma / 0xffff)
+				s11u.A = uint16(uint32(s11u.A) * ma / 0xffff)
+			}
+			s11r := float64(s11u.R)
+			s11g := float64(s11u.G)
+			s11b := float64(s11u.B)
+			s11a := float64(s11u.A)
+			s11r = xFrac1*s01r + xFrac0*s11r
+			s11g = xFrac1*s01g + xFrac0*s11g
+			s11b = xFrac1*s01b + xFrac0*s11b
+			s11a = xFrac1*s01a + xFrac0*s11a
+			s11r = yFrac1*s10r + yFrac0*s11r
+			s11g = yFrac1*s10g + yFrac0*s11g
+			s11b = yFrac1*s10b + yFrac0*s11b
+			s11a = yFrac1*s10a + yFrac0*s11a
+			p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)}
+			if dstMask != nil {
+				q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy))
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
+				p.R = uint16(uint32(p.R) * ma / 0xffff)
+				p.G = uint16(uint32(p.G) * ma / 0xffff)
+				p.B = uint16(uint32(p.B) * ma / 0xffff)
+				p.A = uint16(uint32(p.A) * ma / 0xffff)
+				pa1 := 0xffff - ma
+				dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + uint32(p.R))
+				dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + uint32(p.G))
+				dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + uint32(p.B))
+				dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + uint32(p.A))
+				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColorRGBA64)
+			} else {
+				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), p)
+			}
+		}
+	}
+}
+
 func (ablInterpolator) scale_Image_Image_Over(dst Image, dr, adr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) {
 	sw := int32(sr.Dx())
 	sh := int32(sr.Dy())
@@ -4007,6 +4708,169 @@
 	}
 }
 
+func (ablInterpolator) transform_RGBA_RGBA64Image_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, opts *Options) {
+	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
+		dyf := float64(dr.Min.Y+int(dy)) + 0.5
+		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
+		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
+			dxf := float64(dr.Min.X+int(dx)) + 0.5
+			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
+			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
+				continue
+			}
+
+			sx -= 0.5
+			sx0 := int(sx)
+			xFrac0 := sx - float64(sx0)
+			xFrac1 := 1 - xFrac0
+			sx0 += bias.X
+			sx1 := sx0 + 1
+			if sx0 < sr.Min.X {
+				sx0, sx1 = sr.Min.X, sr.Min.X
+				xFrac0, xFrac1 = 0, 1
+			} else if sx1 >= sr.Max.X {
+				sx0, sx1 = sr.Max.X-1, sr.Max.X-1
+				xFrac0, xFrac1 = 1, 0
+			}
+
+			sy -= 0.5
+			sy0 := int(sy)
+			yFrac0 := sy - float64(sy0)
+			yFrac1 := 1 - yFrac0
+			sy0 += bias.Y
+			sy1 := sy0 + 1
+			if sy0 < sr.Min.Y {
+				sy0, sy1 = sr.Min.Y, sr.Min.Y
+				yFrac0, yFrac1 = 0, 1
+			} else if sy1 >= sr.Max.Y {
+				sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
+				yFrac0, yFrac1 = 1, 0
+			}
+
+			s00u := src.RGBA64At(sx0, sy0)
+			s00r := float64(s00u.R)
+			s00g := float64(s00u.G)
+			s00b := float64(s00u.B)
+			s00a := float64(s00u.A)
+			s10u := src.RGBA64At(sx1, sy0)
+			s10r := float64(s10u.R)
+			s10g := float64(s10u.G)
+			s10b := float64(s10u.B)
+			s10a := float64(s10u.A)
+			s10r = xFrac1*s00r + xFrac0*s10r
+			s10g = xFrac1*s00g + xFrac0*s10g
+			s10b = xFrac1*s00b + xFrac0*s10b
+			s10a = xFrac1*s00a + xFrac0*s10a
+			s01u := src.RGBA64At(sx0, sy1)
+			s01r := float64(s01u.R)
+			s01g := float64(s01u.G)
+			s01b := float64(s01u.B)
+			s01a := float64(s01u.A)
+			s11u := src.RGBA64At(sx1, sy1)
+			s11r := float64(s11u.R)
+			s11g := float64(s11u.G)
+			s11b := float64(s11u.B)
+			s11a := float64(s11u.A)
+			s11r = xFrac1*s01r + xFrac0*s11r
+			s11g = xFrac1*s01g + xFrac0*s11g
+			s11b = xFrac1*s01b + xFrac0*s11b
+			s11a = xFrac1*s01a + xFrac0*s11a
+			s11r = yFrac1*s10r + yFrac0*s11r
+			s11g = yFrac1*s10g + yFrac0*s11g
+			s11b = yFrac1*s10b + yFrac0*s11b
+			s11a = yFrac1*s10a + yFrac0*s11a
+			p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)}
+			pa1 := (0xffff - uint32(p.A)) * 0x101
+			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + uint32(p.R)) >> 8)
+			dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + uint32(p.G)) >> 8)
+			dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + uint32(p.B)) >> 8)
+			dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + uint32(p.A)) >> 8)
+		}
+	}
+}
+
+func (ablInterpolator) transform_RGBA_RGBA64Image_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, opts *Options) {
+	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
+		dyf := float64(dr.Min.Y+int(dy)) + 0.5
+		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
+		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
+			dxf := float64(dr.Min.X+int(dx)) + 0.5
+			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
+			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
+				continue
+			}
+
+			sx -= 0.5
+			sx0 := int(sx)
+			xFrac0 := sx - float64(sx0)
+			xFrac1 := 1 - xFrac0
+			sx0 += bias.X
+			sx1 := sx0 + 1
+			if sx0 < sr.Min.X {
+				sx0, sx1 = sr.Min.X, sr.Min.X
+				xFrac0, xFrac1 = 0, 1
+			} else if sx1 >= sr.Max.X {
+				sx0, sx1 = sr.Max.X-1, sr.Max.X-1
+				xFrac0, xFrac1 = 1, 0
+			}
+
+			sy -= 0.5
+			sy0 := int(sy)
+			yFrac0 := sy - float64(sy0)
+			yFrac1 := 1 - yFrac0
+			sy0 += bias.Y
+			sy1 := sy0 + 1
+			if sy0 < sr.Min.Y {
+				sy0, sy1 = sr.Min.Y, sr.Min.Y
+				yFrac0, yFrac1 = 0, 1
+			} else if sy1 >= sr.Max.Y {
+				sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
+				yFrac0, yFrac1 = 1, 0
+			}
+
+			s00u := src.RGBA64At(sx0, sy0)
+			s00r := float64(s00u.R)
+			s00g := float64(s00u.G)
+			s00b := float64(s00u.B)
+			s00a := float64(s00u.A)
+			s10u := src.RGBA64At(sx1, sy0)
+			s10r := float64(s10u.R)
+			s10g := float64(s10u.G)
+			s10b := float64(s10u.B)
+			s10a := float64(s10u.A)
+			s10r = xFrac1*s00r + xFrac0*s10r
+			s10g = xFrac1*s00g + xFrac0*s10g
+			s10b = xFrac1*s00b + xFrac0*s10b
+			s10a = xFrac1*s00a + xFrac0*s10a
+			s01u := src.RGBA64At(sx0, sy1)
+			s01r := float64(s01u.R)
+			s01g := float64(s01u.G)
+			s01b := float64(s01u.B)
+			s01a := float64(s01u.A)
+			s11u := src.RGBA64At(sx1, sy1)
+			s11r := float64(s11u.R)
+			s11g := float64(s11u.G)
+			s11b := float64(s11u.B)
+			s11a := float64(s11u.A)
+			s11r = xFrac1*s01r + xFrac0*s11r
+			s11g = xFrac1*s01g + xFrac0*s11g
+			s11b = xFrac1*s01b + xFrac0*s11b
+			s11a = xFrac1*s01a + xFrac0*s11a
+			s11r = yFrac1*s10r + yFrac0*s11r
+			s11g = yFrac1*s10g + yFrac0*s11g
+			s11b = yFrac1*s10b + yFrac0*s11b
+			s11a = yFrac1*s10a + yFrac0*s11a
+			p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)}
+			dst.Pix[d+0] = uint8(p.R >> 8)
+			dst.Pix[d+1] = uint8(p.G >> 8)
+			dst.Pix[d+2] = uint8(p.B >> 8)
+			dst.Pix[d+3] = uint8(p.A >> 8)
+		}
+	}
+}
+
 func (ablInterpolator) transform_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, opts *Options) {
 	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
 		dyf := float64(dr.Min.Y+int(dy)) + 0.5
@@ -4176,6 +5040,252 @@
 	}
 }
 
+func (ablInterpolator) transform_RGBA64Image_RGBA64Image_Over(dst RGBA64Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, opts *Options) {
+	srcMask, smp := opts.SrcMask, opts.SrcMaskP
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
+	dstColorRGBA64 := color.RGBA64{}
+
+	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
+		dyf := float64(dr.Min.Y+int(dy)) + 0.5
+		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
+			dxf := float64(dr.Min.X+int(dx)) + 0.5
+			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
+			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
+				continue
+			}
+
+			sx -= 0.5
+			sx0 := int(sx)
+			xFrac0 := sx - float64(sx0)
+			xFrac1 := 1 - xFrac0
+			sx0 += bias.X
+			sx1 := sx0 + 1
+			if sx0 < sr.Min.X {
+				sx0, sx1 = sr.Min.X, sr.Min.X
+				xFrac0, xFrac1 = 0, 1
+			} else if sx1 >= sr.Max.X {
+				sx0, sx1 = sr.Max.X-1, sr.Max.X-1
+				xFrac0, xFrac1 = 1, 0
+			}
+
+			sy -= 0.5
+			sy0 := int(sy)
+			yFrac0 := sy - float64(sy0)
+			yFrac1 := 1 - yFrac0
+			sy0 += bias.Y
+			sy1 := sy0 + 1
+			if sy0 < sr.Min.Y {
+				sy0, sy1 = sr.Min.Y, sr.Min.Y
+				yFrac0, yFrac1 = 0, 1
+			} else if sy1 >= sr.Max.Y {
+				sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
+				yFrac0, yFrac1 = 1, 0
+			}
+
+			s00u := src.RGBA64At(sx0, sy0)
+			if srcMask != nil {
+				_, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy0).RGBA()
+				s00u.R = uint16(uint32(s00u.R) * ma / 0xffff)
+				s00u.G = uint16(uint32(s00u.G) * ma / 0xffff)
+				s00u.B = uint16(uint32(s00u.B) * ma / 0xffff)
+				s00u.A = uint16(uint32(s00u.A) * ma / 0xffff)
+			}
+			s00r := float64(s00u.R)
+			s00g := float64(s00u.G)
+			s00b := float64(s00u.B)
+			s00a := float64(s00u.A)
+			s10u := src.RGBA64At(sx1, sy0)
+			if srcMask != nil {
+				_, _, _, ma := srcMask.At(smp.X+sx1, smp.Y+sy0).RGBA()
+				s10u.R = uint16(uint32(s10u.R) * ma / 0xffff)
+				s10u.G = uint16(uint32(s10u.G) * ma / 0xffff)
+				s10u.B = uint16(uint32(s10u.B) * ma / 0xffff)
+				s10u.A = uint16(uint32(s10u.A) * ma / 0xffff)
+			}
+			s10r := float64(s10u.R)
+			s10g := float64(s10u.G)
+			s10b := float64(s10u.B)
+			s10a := float64(s10u.A)
+			s10r = xFrac1*s00r + xFrac0*s10r
+			s10g = xFrac1*s00g + xFrac0*s10g
+			s10b = xFrac1*s00b + xFrac0*s10b
+			s10a = xFrac1*s00a + xFrac0*s10a
+			s01u := src.RGBA64At(sx0, sy1)
+			if srcMask != nil {
+				_, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy1).RGBA()
+				s01u.R = uint16(uint32(s01u.R) * ma / 0xffff)
+				s01u.G = uint16(uint32(s01u.G) * ma / 0xffff)
+				s01u.B = uint16(uint32(s01u.B) * ma / 0xffff)
+				s01u.A = uint16(uint32(s01u.A) * ma / 0xffff)
+			}
+			s01r := float64(s01u.R)
+			s01g := float64(s01u.G)
+			s01b := float64(s01u.B)
+			s01a := float64(s01u.A)
+			s11u := src.RGBA64At(sx1, sy1)
+			if srcMask != nil {
+				_, _, _, ma := srcMask.At(smp.X+sx1, smp.Y+sy1).RGBA()
+				s11u.R = uint16(uint32(s11u.R) * ma / 0xffff)
+				s11u.G = uint16(uint32(s11u.G) * ma / 0xffff)
+				s11u.B = uint16(uint32(s11u.B) * ma / 0xffff)
+				s11u.A = uint16(uint32(s11u.A) * ma / 0xffff)
+			}
+			s11r := float64(s11u.R)
+			s11g := float64(s11u.G)
+			s11b := float64(s11u.B)
+			s11a := float64(s11u.A)
+			s11r = xFrac1*s01r + xFrac0*s11r
+			s11g = xFrac1*s01g + xFrac0*s11g
+			s11b = xFrac1*s01b + xFrac0*s11b
+			s11a = xFrac1*s01a + xFrac0*s11a
+			s11r = yFrac1*s10r + yFrac0*s11r
+			s11g = yFrac1*s10g + yFrac0*s11g
+			s11b = yFrac1*s10b + yFrac0*s11b
+			s11a = yFrac1*s10a + yFrac0*s11a
+			p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)}
+			q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy))
+			if dstMask != nil {
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
+				p.R = uint16(uint32(p.R) * ma / 0xffff)
+				p.G = uint16(uint32(p.G) * ma / 0xffff)
+				p.B = uint16(uint32(p.B) * ma / 0xffff)
+				p.A = uint16(uint32(p.A) * ma / 0xffff)
+			}
+			pa1 := 0xffff - uint32(p.A)
+			dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + uint32(p.R))
+			dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + uint32(p.G))
+			dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + uint32(p.B))
+			dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + uint32(p.A))
+			dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColorRGBA64)
+		}
+	}
+}
+
+func (ablInterpolator) transform_RGBA64Image_RGBA64Image_Src(dst RGBA64Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, opts *Options) {
+	srcMask, smp := opts.SrcMask, opts.SrcMaskP
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
+	dstColorRGBA64 := color.RGBA64{}
+
+	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
+		dyf := float64(dr.Min.Y+int(dy)) + 0.5
+		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
+			dxf := float64(dr.Min.X+int(dx)) + 0.5
+			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
+			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
+				continue
+			}
+
+			sx -= 0.5
+			sx0 := int(sx)
+			xFrac0 := sx - float64(sx0)
+			xFrac1 := 1 - xFrac0
+			sx0 += bias.X
+			sx1 := sx0 + 1
+			if sx0 < sr.Min.X {
+				sx0, sx1 = sr.Min.X, sr.Min.X
+				xFrac0, xFrac1 = 0, 1
+			} else if sx1 >= sr.Max.X {
+				sx0, sx1 = sr.Max.X-1, sr.Max.X-1
+				xFrac0, xFrac1 = 1, 0
+			}
+
+			sy -= 0.5
+			sy0 := int(sy)
+			yFrac0 := sy - float64(sy0)
+			yFrac1 := 1 - yFrac0
+			sy0 += bias.Y
+			sy1 := sy0 + 1
+			if sy0 < sr.Min.Y {
+				sy0, sy1 = sr.Min.Y, sr.Min.Y
+				yFrac0, yFrac1 = 0, 1
+			} else if sy1 >= sr.Max.Y {
+				sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
+				yFrac0, yFrac1 = 1, 0
+			}
+
+			s00u := src.RGBA64At(sx0, sy0)
+			if srcMask != nil {
+				_, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy0).RGBA()
+				s00u.R = uint16(uint32(s00u.R) * ma / 0xffff)
+				s00u.G = uint16(uint32(s00u.G) * ma / 0xffff)
+				s00u.B = uint16(uint32(s00u.B) * ma / 0xffff)
+				s00u.A = uint16(uint32(s00u.A) * ma / 0xffff)
+			}
+			s00r := float64(s00u.R)
+			s00g := float64(s00u.G)
+			s00b := float64(s00u.B)
+			s00a := float64(s00u.A)
+			s10u := src.RGBA64At(sx1, sy0)
+			if srcMask != nil {
+				_, _, _, ma := srcMask.At(smp.X+sx1, smp.Y+sy0).RGBA()
+				s10u.R = uint16(uint32(s10u.R) * ma / 0xffff)
+				s10u.G = uint16(uint32(s10u.G) * ma / 0xffff)
+				s10u.B = uint16(uint32(s10u.B) * ma / 0xffff)
+				s10u.A = uint16(uint32(s10u.A) * ma / 0xffff)
+			}
+			s10r := float64(s10u.R)
+			s10g := float64(s10u.G)
+			s10b := float64(s10u.B)
+			s10a := float64(s10u.A)
+			s10r = xFrac1*s00r + xFrac0*s10r
+			s10g = xFrac1*s00g + xFrac0*s10g
+			s10b = xFrac1*s00b + xFrac0*s10b
+			s10a = xFrac1*s00a + xFrac0*s10a
+			s01u := src.RGBA64At(sx0, sy1)
+			if srcMask != nil {
+				_, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy1).RGBA()
+				s01u.R = uint16(uint32(s01u.R) * ma / 0xffff)
+				s01u.G = uint16(uint32(s01u.G) * ma / 0xffff)
+				s01u.B = uint16(uint32(s01u.B) * ma / 0xffff)
+				s01u.A = uint16(uint32(s01u.A) * ma / 0xffff)
+			}
+			s01r := float64(s01u.R)
+			s01g := float64(s01u.G)
+			s01b := float64(s01u.B)
+			s01a := float64(s01u.A)
+			s11u := src.RGBA64At(sx1, sy1)
+			if srcMask != nil {
+				_, _, _, ma := srcMask.At(smp.X+sx1, smp.Y+sy1).RGBA()
+				s11u.R = uint16(uint32(s11u.R) * ma / 0xffff)
+				s11u.G = uint16(uint32(s11u.G) * ma / 0xffff)
+				s11u.B = uint16(uint32(s11u.B) * ma / 0xffff)
+				s11u.A = uint16(uint32(s11u.A) * ma / 0xffff)
+			}
+			s11r := float64(s11u.R)
+			s11g := float64(s11u.G)
+			s11b := float64(s11u.B)
+			s11a := float64(s11u.A)
+			s11r = xFrac1*s01r + xFrac0*s11r
+			s11g = xFrac1*s01g + xFrac0*s11g
+			s11b = xFrac1*s01b + xFrac0*s11b
+			s11a = xFrac1*s01a + xFrac0*s11a
+			s11r = yFrac1*s10r + yFrac0*s11r
+			s11g = yFrac1*s10g + yFrac0*s11g
+			s11b = yFrac1*s10b + yFrac0*s11b
+			s11a = yFrac1*s10a + yFrac0*s11a
+			p := color.RGBA64{uint16(s11r), uint16(s11g), uint16(s11b), uint16(s11a)}
+			if dstMask != nil {
+				q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy))
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
+				p.R = uint16(uint32(p.R) * ma / 0xffff)
+				p.G = uint16(uint32(p.G) * ma / 0xffff)
+				p.B = uint16(uint32(p.B) * ma / 0xffff)
+				p.A = uint16(uint32(p.A) * ma / 0xffff)
+				pa1 := 0xffff - ma
+				dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + uint32(p.R))
+				dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + uint32(p.G))
+				dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + uint32(p.B))
+				dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + uint32(p.A))
+				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColorRGBA64)
+			} else {
+				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), p)
+			}
+		}
+	}
+}
+
 func (ablInterpolator) transform_Image_Image_Over(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, opts *Options) {
 	srcMask, smp := opts.SrcMask, opts.SrcMaskP
 	dstMask, dmp := opts.DstMask, opts.DstMaskP
@@ -4500,6 +5610,8 @@
 			case image.YCbCrSubsampleRatio440:
 				z.scaleX_YCbCr440(tmp, src, sr, &o)
 			}
+		case image.RGBA64Image:
+			z.scaleX_RGBA64Image(tmp, src, sr, &o)
 		default:
 			z.scaleX_Image(tmp, src, sr, &o)
 		}
@@ -4518,6 +5630,8 @@
 			switch dst := dst.(type) {
 			case *image.RGBA:
 				z.scaleY_RGBA_Over(dst, dr, adr, tmp, &o)
+			case RGBA64Image:
+				z.scaleY_RGBA64Image_Over(dst, dr, adr, tmp, &o)
 			default:
 				z.scaleY_Image_Over(dst, dr, adr, tmp, &o)
 			}
@@ -4525,6 +5639,8 @@
 			switch dst := dst.(type) {
 			case *image.RGBA:
 				z.scaleY_RGBA_Src(dst, dr, adr, tmp, &o)
+			case RGBA64Image:
+				z.scaleY_RGBA64Image_Src(dst, dr, adr, tmp, &o)
 			default:
 				z.scaleY_Image_Src(dst, dr, adr, tmp, &o)
 			}
@@ -4600,9 +5716,16 @@
 					q.transform_RGBA_NRGBA_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
 				case *image.RGBA:
 					q.transform_RGBA_RGBA_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
+				case image.RGBA64Image:
+					q.transform_RGBA_RGBA64Image_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
 				default:
 					q.transform_RGBA_Image_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
 				}
+			case RGBA64Image:
+				switch src := src.(type) {
+				case image.RGBA64Image:
+					q.transform_RGBA64Image_RGBA64Image_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
+				}
 			default:
 				switch src := src.(type) {
 				default:
@@ -4632,9 +5755,16 @@
 					case image.YCbCrSubsampleRatio440:
 						q.transform_RGBA_YCbCr440_Src(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
 					}
+				case image.RGBA64Image:
+					q.transform_RGBA_RGBA64Image_Src(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
 				default:
 					q.transform_RGBA_Image_Src(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
 				}
+			case RGBA64Image:
+				switch src := src.(type) {
+				case image.RGBA64Image:
+					q.transform_RGBA64Image_RGBA64Image_Src(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
+				}
 			default:
 				switch src := src.(type) {
 				default:
@@ -4909,6 +6039,37 @@
 	}
 }
 
+func (z *kernelScaler) scaleX_RGBA64Image(tmp [][4]float64, src image.RGBA64Image, sr image.Rectangle, opts *Options) {
+	t := 0
+	srcMask, smp := opts.SrcMask, opts.SrcMaskP
+	for y := int32(0); y < z.sh; y++ {
+		for _, s := range z.horizontal.sources {
+			var pr, pg, pb, pa float64
+			for _, c := range z.horizontal.contribs[s.i:s.j] {
+				pu := src.RGBA64At(sr.Min.X+int(c.coord), sr.Min.Y+int(y))
+				if srcMask != nil {
+					_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(c.coord), smp.Y+sr.Min.Y+int(y)).RGBA()
+					pu.R = uint16(uint32(pu.R) * ma / 0xffff)
+					pu.G = uint16(uint32(pu.G) * ma / 0xffff)
+					pu.B = uint16(uint32(pu.B) * ma / 0xffff)
+					pu.A = uint16(uint32(pu.A) * ma / 0xffff)
+				}
+				pr += float64(pu.R) * c.weight
+				pg += float64(pu.G) * c.weight
+				pb += float64(pu.B) * c.weight
+				pa += float64(pu.A) * c.weight
+			}
+			tmp[t] = [4]float64{
+				pr * s.invTotalWeightFFFF,
+				pg * s.invTotalWeightFFFF,
+				pb * s.invTotalWeightFFFF,
+				pa * s.invTotalWeightFFFF,
+			}
+			t++
+		}
+	}
+}
+
 func (z *kernelScaler) scaleX_Image(tmp [][4]float64, src image.Image, sr image.Rectangle, opts *Options) {
 	t := 0
 	srcMask, smp := opts.SrcMask, opts.SrcMaskP
@@ -5009,6 +6170,102 @@
 	}
 }
 
+func (z *kernelScaler) scaleY_RGBA64Image_Over(dst RGBA64Image, dr, adr image.Rectangle, tmp [][4]float64, opts *Options) {
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
+	dstColorRGBA64 := color.RGBA64{}
+
+	for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
+		for dy, s := range z.vertical.sources[adr.Min.Y:adr.Max.Y] {
+			var pr, pg, pb, pa float64
+			for _, c := range z.vertical.contribs[s.i:s.j] {
+				p := &tmp[c.coord*z.dw+dx]
+				pr += p[0] * c.weight
+				pg += p[1] * c.weight
+				pb += p[2] * c.weight
+				pa += p[3] * c.weight
+			}
+
+			if pr > pa {
+				pr = pa
+			}
+			if pg > pa {
+				pg = pa
+			}
+			if pb > pa {
+				pb = pa
+			}
+
+			q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(adr.Min.Y+dy))
+			pr0 := uint32(ftou(pr * s.invTotalWeight))
+			pg0 := uint32(ftou(pg * s.invTotalWeight))
+			pb0 := uint32(ftou(pb * s.invTotalWeight))
+			pa0 := uint32(ftou(pa * s.invTotalWeight))
+			if dstMask != nil {
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(adr.Min.Y+dy)).RGBA()
+				pr0 = pr0 * ma / 0xffff
+				pg0 = pg0 * ma / 0xffff
+				pb0 = pb0 * ma / 0xffff
+				pa0 = pa0 * ma / 0xffff
+			}
+			pa1 := 0xffff - pa0
+			dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + pr0)
+			dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + pg0)
+			dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + pb0)
+			dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + pa0)
+			dst.SetRGBA64(dr.Min.X+int(dx), dr.Min.Y+int(adr.Min.Y+dy), dstColorRGBA64)
+		}
+	}
+}
+
+func (z *kernelScaler) scaleY_RGBA64Image_Src(dst RGBA64Image, dr, adr image.Rectangle, tmp [][4]float64, opts *Options) {
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
+	dstColorRGBA64 := color.RGBA64{}
+
+	for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
+		for dy, s := range z.vertical.sources[adr.Min.Y:adr.Max.Y] {
+			var pr, pg, pb, pa float64
+			for _, c := range z.vertical.contribs[s.i:s.j] {
+				p := &tmp[c.coord*z.dw+dx]
+				pr += p[0] * c.weight
+				pg += p[1] * c.weight
+				pb += p[2] * c.weight
+				pa += p[3] * c.weight
+			}
+
+			if pr > pa {
+				pr = pa
+			}
+			if pg > pa {
+				pg = pa
+			}
+			if pb > pa {
+				pb = pa
+			}
+
+			if dstMask != nil {
+				q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(adr.Min.Y+dy))
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(adr.Min.Y+dy)).RGBA()
+				pr := uint32(ftou(pr*s.invTotalWeight)) * ma / 0xffff
+				pg := uint32(ftou(pg*s.invTotalWeight)) * ma / 0xffff
+				pb := uint32(ftou(pb*s.invTotalWeight)) * ma / 0xffff
+				pa := uint32(ftou(pa*s.invTotalWeight)) * ma / 0xffff
+				pa1 := 0xffff - ma
+				dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + pr)
+				dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + pg)
+				dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + pb)
+				dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + pa)
+				dst.SetRGBA64(dr.Min.X+int(dx), dr.Min.Y+int(adr.Min.Y+dy), dstColorRGBA64)
+			} else {
+				dstColorRGBA64.R = ftou(pr * s.invTotalWeight)
+				dstColorRGBA64.G = ftou(pg * s.invTotalWeight)
+				dstColorRGBA64.B = ftou(pb * s.invTotalWeight)
+				dstColorRGBA64.A = ftou(pa * s.invTotalWeight)
+				dst.SetRGBA64(dr.Min.X+int(dx), dr.Min.Y+int(adr.Min.Y+dy), dstColorRGBA64)
+			}
+		}
+	}
+}
+
 func (z *kernelScaler) scaleY_Image_Over(dst Image, dr, adr image.Rectangle, tmp [][4]float64, opts *Options) {
 	dstMask, dmp := opts.DstMask, opts.DstMaskP
 	dstColorRGBA64 := &color.RGBA64{}
@@ -6170,6 +7427,233 @@
 	}
 }
 
+func (q *Kernel) transform_RGBA_RGBA64Image_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) {
+	// When shrinking, broaden the effective kernel support so that we still
+	// visit every source pixel.
+	xHalfWidth, xKernelArgScale := q.Support, 1.0
+	if xscale > 1 {
+		xHalfWidth *= xscale
+		xKernelArgScale = 1 / xscale
+	}
+	yHalfWidth, yKernelArgScale := q.Support, 1.0
+	if yscale > 1 {
+		yHalfWidth *= yscale
+		yKernelArgScale = 1 / yscale
+	}
+
+	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
+	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))
+
+	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
+		dyf := float64(dr.Min.Y+int(dy)) + 0.5
+		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
+		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
+			dxf := float64(dr.Min.X+int(dx)) + 0.5
+			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
+			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
+				continue
+			}
+
+			// TODO: adjust the bias so that we can use int(f) instead
+			// of math.Floor(f) and math.Ceil(f).
+			sx += float64(bias.X)
+			sx -= 0.5
+			ix := int(math.Floor(sx - xHalfWidth))
+			if ix < sr.Min.X {
+				ix = sr.Min.X
+			}
+			jx := int(math.Ceil(sx + xHalfWidth))
+			if jx > sr.Max.X {
+				jx = sr.Max.X
+			}
+
+			totalXWeight := 0.0
+			for kx := ix; kx < jx; kx++ {
+				xWeight := 0.0
+				if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
+					xWeight = q.At(t)
+				}
+				xWeights[kx-ix] = xWeight
+				totalXWeight += xWeight
+			}
+			for x := range xWeights[:jx-ix] {
+				xWeights[x] /= totalXWeight
+			}
+
+			sy += float64(bias.Y)
+			sy -= 0.5
+			iy := int(math.Floor(sy - yHalfWidth))
+			if iy < sr.Min.Y {
+				iy = sr.Min.Y
+			}
+			jy := int(math.Ceil(sy + yHalfWidth))
+			if jy > sr.Max.Y {
+				jy = sr.Max.Y
+			}
+
+			totalYWeight := 0.0
+			for ky := iy; ky < jy; ky++ {
+				yWeight := 0.0
+				if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
+					yWeight = q.At(t)
+				}
+				yWeights[ky-iy] = yWeight
+				totalYWeight += yWeight
+			}
+			for y := range yWeights[:jy-iy] {
+				yWeights[y] /= totalYWeight
+			}
+
+			var pr, pg, pb, pa float64
+			for ky := iy; ky < jy; ky++ {
+				if yWeight := yWeights[ky-iy]; yWeight != 0 {
+					for kx := ix; kx < jx; kx++ {
+						if w := xWeights[kx-ix] * yWeight; w != 0 {
+							pu := src.RGBA64At(kx, ky)
+							pr += float64(pu.R) * w
+							pg += float64(pu.G) * w
+							pb += float64(pu.B) * w
+							pa += float64(pu.A) * w
+						}
+					}
+				}
+			}
+
+			if pr > pa {
+				pr = pa
+			}
+			if pg > pa {
+				pg = pa
+			}
+			if pb > pa {
+				pb = pa
+			}
+
+			pr0 := uint32(fffftou(pr))
+			pg0 := uint32(fffftou(pg))
+			pb0 := uint32(fffftou(pb))
+			pa0 := uint32(fffftou(pa))
+			pa1 := (0xffff - uint32(pa0)) * 0x101
+			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + pr0) >> 8)
+			dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + pg0) >> 8)
+			dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + pb0) >> 8)
+			dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + pa0) >> 8)
+		}
+	}
+}
+
+func (q *Kernel) transform_RGBA_RGBA64Image_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) {
+	// When shrinking, broaden the effective kernel support so that we still
+	// visit every source pixel.
+	xHalfWidth, xKernelArgScale := q.Support, 1.0
+	if xscale > 1 {
+		xHalfWidth *= xscale
+		xKernelArgScale = 1 / xscale
+	}
+	yHalfWidth, yKernelArgScale := q.Support, 1.0
+	if yscale > 1 {
+		yHalfWidth *= yscale
+		yKernelArgScale = 1 / yscale
+	}
+
+	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
+	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))
+
+	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
+		dyf := float64(dr.Min.Y+int(dy)) + 0.5
+		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
+		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
+			dxf := float64(dr.Min.X+int(dx)) + 0.5
+			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
+			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
+				continue
+			}
+
+			// TODO: adjust the bias so that we can use int(f) instead
+			// of math.Floor(f) and math.Ceil(f).
+			sx += float64(bias.X)
+			sx -= 0.5
+			ix := int(math.Floor(sx - xHalfWidth))
+			if ix < sr.Min.X {
+				ix = sr.Min.X
+			}
+			jx := int(math.Ceil(sx + xHalfWidth))
+			if jx > sr.Max.X {
+				jx = sr.Max.X
+			}
+
+			totalXWeight := 0.0
+			for kx := ix; kx < jx; kx++ {
+				xWeight := 0.0
+				if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
+					xWeight = q.At(t)
+				}
+				xWeights[kx-ix] = xWeight
+				totalXWeight += xWeight
+			}
+			for x := range xWeights[:jx-ix] {
+				xWeights[x] /= totalXWeight
+			}
+
+			sy += float64(bias.Y)
+			sy -= 0.5
+			iy := int(math.Floor(sy - yHalfWidth))
+			if iy < sr.Min.Y {
+				iy = sr.Min.Y
+			}
+			jy := int(math.Ceil(sy + yHalfWidth))
+			if jy > sr.Max.Y {
+				jy = sr.Max.Y
+			}
+
+			totalYWeight := 0.0
+			for ky := iy; ky < jy; ky++ {
+				yWeight := 0.0
+				if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
+					yWeight = q.At(t)
+				}
+				yWeights[ky-iy] = yWeight
+				totalYWeight += yWeight
+			}
+			for y := range yWeights[:jy-iy] {
+				yWeights[y] /= totalYWeight
+			}
+
+			var pr, pg, pb, pa float64
+			for ky := iy; ky < jy; ky++ {
+				if yWeight := yWeights[ky-iy]; yWeight != 0 {
+					for kx := ix; kx < jx; kx++ {
+						if w := xWeights[kx-ix] * yWeight; w != 0 {
+							pu := src.RGBA64At(kx, ky)
+							pr += float64(pu.R) * w
+							pg += float64(pu.G) * w
+							pb += float64(pu.B) * w
+							pa += float64(pu.A) * w
+						}
+					}
+				}
+			}
+
+			if pr > pa {
+				pr = pa
+			}
+			if pg > pa {
+				pg = pa
+			}
+			if pb > pa {
+				pb = pa
+			}
+
+			dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
+			dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
+			dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
+			dst.Pix[d+3] = uint8(fffftou(pa) >> 8)
+		}
+	}
+}
+
 func (q *Kernel) transform_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) {
 	// When shrinking, broaden the effective kernel support so that we still
 	// visit every source pixel.
@@ -6397,6 +7881,278 @@
 	}
 }
 
+func (q *Kernel) transform_RGBA64Image_RGBA64Image_Over(dst RGBA64Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) {
+	// When shrinking, broaden the effective kernel support so that we still
+	// visit every source pixel.
+	xHalfWidth, xKernelArgScale := q.Support, 1.0
+	if xscale > 1 {
+		xHalfWidth *= xscale
+		xKernelArgScale = 1 / xscale
+	}
+	yHalfWidth, yKernelArgScale := q.Support, 1.0
+	if yscale > 1 {
+		yHalfWidth *= yscale
+		yKernelArgScale = 1 / yscale
+	}
+
+	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
+	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))
+
+	srcMask, smp := opts.SrcMask, opts.SrcMaskP
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
+	dstColorRGBA64 := color.RGBA64{}
+
+	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
+		dyf := float64(dr.Min.Y+int(dy)) + 0.5
+		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
+			dxf := float64(dr.Min.X+int(dx)) + 0.5
+			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
+			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
+				continue
+			}
+
+			// TODO: adjust the bias so that we can use int(f) instead
+			// of math.Floor(f) and math.Ceil(f).
+			sx += float64(bias.X)
+			sx -= 0.5
+			ix := int(math.Floor(sx - xHalfWidth))
+			if ix < sr.Min.X {
+				ix = sr.Min.X
+			}
+			jx := int(math.Ceil(sx + xHalfWidth))
+			if jx > sr.Max.X {
+				jx = sr.Max.X
+			}
+
+			totalXWeight := 0.0
+			for kx := ix; kx < jx; kx++ {
+				xWeight := 0.0
+				if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
+					xWeight = q.At(t)
+				}
+				xWeights[kx-ix] = xWeight
+				totalXWeight += xWeight
+			}
+			for x := range xWeights[:jx-ix] {
+				xWeights[x] /= totalXWeight
+			}
+
+			sy += float64(bias.Y)
+			sy -= 0.5
+			iy := int(math.Floor(sy - yHalfWidth))
+			if iy < sr.Min.Y {
+				iy = sr.Min.Y
+			}
+			jy := int(math.Ceil(sy + yHalfWidth))
+			if jy > sr.Max.Y {
+				jy = sr.Max.Y
+			}
+
+			totalYWeight := 0.0
+			for ky := iy; ky < jy; ky++ {
+				yWeight := 0.0
+				if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
+					yWeight = q.At(t)
+				}
+				yWeights[ky-iy] = yWeight
+				totalYWeight += yWeight
+			}
+			for y := range yWeights[:jy-iy] {
+				yWeights[y] /= totalYWeight
+			}
+
+			var pr, pg, pb, pa float64
+			for ky := iy; ky < jy; ky++ {
+				if yWeight := yWeights[ky-iy]; yWeight != 0 {
+					for kx := ix; kx < jx; kx++ {
+						if w := xWeights[kx-ix] * yWeight; w != 0 {
+							pu := src.RGBA64At(kx, ky)
+							if srcMask != nil {
+								_, _, _, ma := srcMask.At(smp.X+kx, smp.Y+ky).RGBA()
+								pu.R = uint16(uint32(pu.R) * ma / 0xffff)
+								pu.G = uint16(uint32(pu.G) * ma / 0xffff)
+								pu.B = uint16(uint32(pu.B) * ma / 0xffff)
+								pu.A = uint16(uint32(pu.A) * ma / 0xffff)
+							}
+							pr += float64(pu.R) * w
+							pg += float64(pu.G) * w
+							pb += float64(pu.B) * w
+							pa += float64(pu.A) * w
+						}
+					}
+				}
+			}
+
+			if pr > pa {
+				pr = pa
+			}
+			if pg > pa {
+				pg = pa
+			}
+			if pb > pa {
+				pb = pa
+			}
+
+			q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy))
+			pr0 := uint32(fffftou(pr))
+			pg0 := uint32(fffftou(pg))
+			pb0 := uint32(fffftou(pb))
+			pa0 := uint32(fffftou(pa))
+			if dstMask != nil {
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
+				pr0 = pr0 * ma / 0xffff
+				pg0 = pg0 * ma / 0xffff
+				pb0 = pb0 * ma / 0xffff
+				pa0 = pa0 * ma / 0xffff
+			}
+			pa1 := 0xffff - pa0
+			dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + pr0)
+			dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + pg0)
+			dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + pb0)
+			dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + pa0)
+			dst.SetRGBA64(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColorRGBA64)
+		}
+	}
+}
+
+func (q *Kernel) transform_RGBA64Image_RGBA64Image_Src(dst RGBA64Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.RGBA64Image, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) {
+	// When shrinking, broaden the effective kernel support so that we still
+	// visit every source pixel.
+	xHalfWidth, xKernelArgScale := q.Support, 1.0
+	if xscale > 1 {
+		xHalfWidth *= xscale
+		xKernelArgScale = 1 / xscale
+	}
+	yHalfWidth, yKernelArgScale := q.Support, 1.0
+	if yscale > 1 {
+		yHalfWidth *= yscale
+		yKernelArgScale = 1 / yscale
+	}
+
+	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
+	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))
+
+	srcMask, smp := opts.SrcMask, opts.SrcMaskP
+	dstMask, dmp := opts.DstMask, opts.DstMaskP
+	dstColorRGBA64 := color.RGBA64{}
+
+	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
+		dyf := float64(dr.Min.Y+int(dy)) + 0.5
+		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
+			dxf := float64(dr.Min.X+int(dx)) + 0.5
+			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
+			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
+			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
+				continue
+			}
+
+			// TODO: adjust the bias so that we can use int(f) instead
+			// of math.Floor(f) and math.Ceil(f).
+			sx += float64(bias.X)
+			sx -= 0.5
+			ix := int(math.Floor(sx - xHalfWidth))
+			if ix < sr.Min.X {
+				ix = sr.Min.X
+			}
+			jx := int(math.Ceil(sx + xHalfWidth))
+			if jx > sr.Max.X {
+				jx = sr.Max.X
+			}
+
+			totalXWeight := 0.0
+			for kx := ix; kx < jx; kx++ {
+				xWeight := 0.0
+				if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
+					xWeight = q.At(t)
+				}
+				xWeights[kx-ix] = xWeight
+				totalXWeight += xWeight
+			}
+			for x := range xWeights[:jx-ix] {
+				xWeights[x] /= totalXWeight
+			}
+
+			sy += float64(bias.Y)
+			sy -= 0.5
+			iy := int(math.Floor(sy - yHalfWidth))
+			if iy < sr.Min.Y {
+				iy = sr.Min.Y
+			}
+			jy := int(math.Ceil(sy + yHalfWidth))
+			if jy > sr.Max.Y {
+				jy = sr.Max.Y
+			}
+
+			totalYWeight := 0.0
+			for ky := iy; ky < jy; ky++ {
+				yWeight := 0.0
+				if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
+					yWeight = q.At(t)
+				}
+				yWeights[ky-iy] = yWeight
+				totalYWeight += yWeight
+			}
+			for y := range yWeights[:jy-iy] {
+				yWeights[y] /= totalYWeight
+			}
+
+			var pr, pg, pb, pa float64
+			for ky := iy; ky < jy; ky++ {
+				if yWeight := yWeights[ky-iy]; yWeight != 0 {
+					for kx := ix; kx < jx; kx++ {
+						if w := xWeights[kx-ix] * yWeight; w != 0 {
+							pu := src.RGBA64At(kx, ky)
+							if srcMask != nil {
+								_, _, _, ma := srcMask.At(smp.X+kx, smp.Y+ky).RGBA()
+								pu.R = uint16(uint32(pu.R) * ma / 0xffff)
+								pu.G = uint16(uint32(pu.G) * ma / 0xffff)
+								pu.B = uint16(uint32(pu.B) * ma / 0xffff)
+								pu.A = uint16(uint32(pu.A) * ma / 0xffff)
+							}
+							pr += float64(pu.R) * w
+							pg += float64(pu.G) * w
+							pb += float64(pu.B) * w
+							pa += float64(pu.A) * w
+						}
+					}
+				}
+			}
+
+			if pr > pa {
+				pr = pa
+			}
+			if pg > pa {
+				pg = pa
+			}
+			if pb > pa {
+				pb = pa
+			}
+
+			if dstMask != nil {
+				q := dst.RGBA64At(dr.Min.X+int(dx), dr.Min.Y+int(dy))
+				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
+				pr := uint32(fffftou(pr)) * ma / 0xffff
+				pg := uint32(fffftou(pg)) * ma / 0xffff
+				pb := uint32(fffftou(pb)) * ma / 0xffff
+				pa := uint32(fffftou(pa)) * ma / 0xffff
+				pa1 := 0xffff - ma
+				dstColorRGBA64.R = uint16(uint32(q.R)*pa1/0xffff + pr)
+				dstColorRGBA64.G = uint16(uint32(q.G)*pa1/0xffff + pg)
+				dstColorRGBA64.B = uint16(uint32(q.B)*pa1/0xffff + pb)
+				dstColorRGBA64.A = uint16(uint32(q.A)*pa1/0xffff + pa)
+				dst.SetRGBA64(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColorRGBA64)
+			} else {
+				dstColorRGBA64.R = fffftou(pr)
+				dstColorRGBA64.G = fffftou(pg)
+				dstColorRGBA64.B = fffftou(pb)
+				dstColorRGBA64.A = fffftou(pa)
+				dst.SetRGBA64(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColorRGBA64)
+			}
+		}
+	}
+}
+
 func (q *Kernel) transform_Image_Image_Over(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) {
 	// When shrinking, broaden the effective kernel support so that we still
 	// visit every source pixel.

diff --git a/draw/scale_test.go b/draw/scale_test.go
index 042a82d..bd6ff36 100644
--- a/draw/scale_test.go
+++ b/draw/scale_test.go

@@ -555,11 +555,68 @@
 	dst := image.NewRGBA(bounds)
 	mask := image.NewRGBA(bounds)
 
-	Copy(dst, image.ZP, src, bounds, Src, &Options{
+	Copy(dst, image.Point{}, src, bounds, Src, &Options{
 		DstMask: mask,
 	})
 }
 
+func TestScaleRGBA64ImageAllocations(t *testing.T) {
+	// The goal of RGBA64Image is to prevent heap allocation of the color
+	// argument by using a non-interface type. Assert that we meet that goal.
+	// This assumes there is no fast path for *image.RGBA64.
+	src := image.NewRGBA64(image.Rect(0, 0, 16, 32))
+	dst := image.NewRGBA64(image.Rect(0, 0, 32, 16))
+	fillPix(rand.New(rand.NewSource(1)), src.Pix, dst.Pix)
+	t.Run("Over", func(t *testing.T) {
+		allocs := testing.AllocsPerRun(10, func() {
+			CatmullRom.Scale(dst, dst.Bounds(), src, src.Bounds(), Over, nil)
+		})
+		// Scale and Transform below allocate on their own, so allocations will
+		// never be zero. The expectation we want to check is that the number
+		// of allocations does not scale linearly with the number of pixels in
+		// the image. We could test that directly, but it's sufficient to test
+		// that we have much fewer allocations than the number of pixels, 512.
+		if allocs > 8 {
+			t.Errorf("too many allocations: %v", allocs)
+		}
+	})
+	t.Run("Src", func(t *testing.T) {
+		allocs := testing.AllocsPerRun(10, func() {
+			CatmullRom.Scale(dst, dst.Bounds(), src, src.Bounds(), Src, nil)
+		})
+		if allocs > 8 {
+			t.Errorf("too many allocations: %v", allocs)
+		}
+	})
+}
+
+func TestTransformRGBA64ImageAllocations(t *testing.T) {
+	// This assumes there is no fast path for *image.RGBA64.
+	src := image.NewRGBA64(image.Rect(0, 0, 16, 32))
+	dst := image.NewRGBA64(image.Rect(0, 0, 32, 16))
+	fillPix(rand.New(rand.NewSource(1)), src.Pix, dst.Pix)
+	mat := f64.Aff3{
+		2, 0, 0,
+		0, 0.5, 0,
+	}
+	t.Run("Over", func(t *testing.T) {
+		allocs := testing.AllocsPerRun(10, func() {
+			CatmullRom.Transform(dst, mat, src, src.Bounds(), Over, nil)
+		})
+		if allocs > 8 {
+			t.Errorf("too many allocations: %v", allocs)
+		}
+	})
+	t.Run("Src", func(t *testing.T) {
+		allocs := testing.AllocsPerRun(10, func() {
+			CatmullRom.Transform(dst, mat, src, src.Bounds(), Src, nil)
+		})
+		if allocs > 8 {
+			t.Errorf("too many allocations: %v", allocs)
+		}
+	})
+}
+
 // The fooWrapper types wrap the dst or src image to avoid triggering the
 // type-specific fast path implementations.
 type (
@@ -602,6 +659,12 @@
 	return m, nil
 }
 
+func srcRGBA64(boundsHint image.Rectangle) (image.Image, error) {
+	m := image.NewRGBA64(boundsHint)
+	fillPix(rand.New(rand.NewSource(4)), m.Pix)
+	return m, nil
+}
+
 func srcLarge(boundsHint image.Rectangle) (image.Image, error) {
 	// 3072 x 2304 is over 7 million pixels at 4:3, comparable to a
 	// 2015 smart-phone camera's output.
@@ -686,42 +749,54 @@
 func BenchmarkTformNNOverRGBA(b *testing.B) { benchTform(b, 200, 150, Over, srcRGBA, NearestNeighbor) }
 func BenchmarkTformNNOverUnif(b *testing.B) { benchTform(b, 200, 150, Over, srcUnif, NearestNeighbor) }
 
-func BenchmarkScaleABSrcGray(b *testing.B)  { benchScale(b, 200, 150, Src, srcGray, ApproxBiLinear) }
-func BenchmarkScaleABSrcNRGBA(b *testing.B) { benchScale(b, 200, 150, Src, srcNRGBA, ApproxBiLinear) }
-func BenchmarkScaleABSrcRGBA(b *testing.B)  { benchScale(b, 200, 150, Src, srcRGBA, ApproxBiLinear) }
-func BenchmarkScaleABSrcYCbCr(b *testing.B) { benchScale(b, 200, 150, Src, srcYCbCr, ApproxBiLinear) }
+func BenchmarkScaleABSrcGray(b *testing.B)   { benchScale(b, 200, 150, Src, srcGray, ApproxBiLinear) }
+func BenchmarkScaleABSrcNRGBA(b *testing.B)  { benchScale(b, 200, 150, Src, srcNRGBA, ApproxBiLinear) }
+func BenchmarkScaleABSrcRGBA(b *testing.B)   { benchScale(b, 200, 150, Src, srcRGBA, ApproxBiLinear) }
+func BenchmarkScaleABSrcYCbCr(b *testing.B)  { benchScale(b, 200, 150, Src, srcYCbCr, ApproxBiLinear) }
+func BenchmarkScaleABSrcRGBA64(b *testing.B) { benchScale(b, 200, 150, Src, srcRGBA64, ApproxBiLinear) }
 
 func BenchmarkScaleABOverGray(b *testing.B)  { benchScale(b, 200, 150, Over, srcGray, ApproxBiLinear) }
 func BenchmarkScaleABOverNRGBA(b *testing.B) { benchScale(b, 200, 150, Over, srcNRGBA, ApproxBiLinear) }
 func BenchmarkScaleABOverRGBA(b *testing.B)  { benchScale(b, 200, 150, Over, srcRGBA, ApproxBiLinear) }
 func BenchmarkScaleABOverYCbCr(b *testing.B) { benchScale(b, 200, 150, Over, srcYCbCr, ApproxBiLinear) }
+func BenchmarkScaleABOverRGBA64(b *testing.B) {
+	benchScale(b, 200, 150, Over, srcRGBA64, ApproxBiLinear)
+}
 
-func BenchmarkTformABSrcGray(b *testing.B)  { benchTform(b, 200, 150, Src, srcGray, ApproxBiLinear) }
-func BenchmarkTformABSrcNRGBA(b *testing.B) { benchTform(b, 200, 150, Src, srcNRGBA, ApproxBiLinear) }
-func BenchmarkTformABSrcRGBA(b *testing.B)  { benchTform(b, 200, 150, Src, srcRGBA, ApproxBiLinear) }
-func BenchmarkTformABSrcYCbCr(b *testing.B) { benchTform(b, 200, 150, Src, srcYCbCr, ApproxBiLinear) }
+func BenchmarkTformABSrcGray(b *testing.B)   { benchTform(b, 200, 150, Src, srcGray, ApproxBiLinear) }
+func BenchmarkTformABSrcNRGBA(b *testing.B)  { benchTform(b, 200, 150, Src, srcNRGBA, ApproxBiLinear) }
+func BenchmarkTformABSrcRGBA(b *testing.B)   { benchTform(b, 200, 150, Src, srcRGBA, ApproxBiLinear) }
+func BenchmarkTformABSrcYCbCr(b *testing.B)  { benchTform(b, 200, 150, Src, srcYCbCr, ApproxBiLinear) }
+func BenchmarkTformABSrcRGBA64(b *testing.B) { benchTform(b, 200, 150, Src, srcRGBA64, ApproxBiLinear) }
 
 func BenchmarkTformABOverGray(b *testing.B)  { benchTform(b, 200, 150, Over, srcGray, ApproxBiLinear) }
 func BenchmarkTformABOverNRGBA(b *testing.B) { benchTform(b, 200, 150, Over, srcNRGBA, ApproxBiLinear) }
 func BenchmarkTformABOverRGBA(b *testing.B)  { benchTform(b, 200, 150, Over, srcRGBA, ApproxBiLinear) }
 func BenchmarkTformABOverYCbCr(b *testing.B) { benchTform(b, 200, 150, Over, srcYCbCr, ApproxBiLinear) }
+func BenchmarkTformABOverRGBA64(b *testing.B) {
+	benchTform(b, 200, 150, Over, srcRGBA64, ApproxBiLinear)
+}
 
-func BenchmarkScaleCRSrcGray(b *testing.B)  { benchScale(b, 200, 150, Src, srcGray, CatmullRom) }
-func BenchmarkScaleCRSrcNRGBA(b *testing.B) { benchScale(b, 200, 150, Src, srcNRGBA, CatmullRom) }
-func BenchmarkScaleCRSrcRGBA(b *testing.B)  { benchScale(b, 200, 150, Src, srcRGBA, CatmullRom) }
-func BenchmarkScaleCRSrcYCbCr(b *testing.B) { benchScale(b, 200, 150, Src, srcYCbCr, CatmullRom) }
+func BenchmarkScaleCRSrcGray(b *testing.B)   { benchScale(b, 200, 150, Src, srcGray, CatmullRom) }
+func BenchmarkScaleCRSrcNRGBA(b *testing.B)  { benchScale(b, 200, 150, Src, srcNRGBA, CatmullRom) }
+func BenchmarkScaleCRSrcRGBA(b *testing.B)   { benchScale(b, 200, 150, Src, srcRGBA, CatmullRom) }
+func BenchmarkScaleCRSrcYCbCr(b *testing.B)  { benchScale(b, 200, 150, Src, srcYCbCr, CatmullRom) }
+func BenchmarkScaleCRSrcRGBA64(b *testing.B) { benchScale(b, 200, 150, Src, srcRGBA64, CatmullRom) }
 
-func BenchmarkScaleCROverGray(b *testing.B)  { benchScale(b, 200, 150, Over, srcGray, CatmullRom) }
-func BenchmarkScaleCROverNRGBA(b *testing.B) { benchScale(b, 200, 150, Over, srcNRGBA, CatmullRom) }
-func BenchmarkScaleCROverRGBA(b *testing.B)  { benchScale(b, 200, 150, Over, srcRGBA, CatmullRom) }
-func BenchmarkScaleCROverYCbCr(b *testing.B) { benchScale(b, 200, 150, Over, srcYCbCr, CatmullRom) }
+func BenchmarkScaleCROverGray(b *testing.B)   { benchScale(b, 200, 150, Over, srcGray, CatmullRom) }
+func BenchmarkScaleCROverNRGBA(b *testing.B)  { benchScale(b, 200, 150, Over, srcNRGBA, CatmullRom) }
+func BenchmarkScaleCROverRGBA(b *testing.B)   { benchScale(b, 200, 150, Over, srcRGBA, CatmullRom) }
+func BenchmarkScaleCROverYCbCr(b *testing.B)  { benchScale(b, 200, 150, Over, srcYCbCr, CatmullRom) }
+func BenchmarkScaleCROverRGBA64(b *testing.B) { benchScale(b, 200, 150, Over, srcRGBA64, CatmullRom) }
 
-func BenchmarkTformCRSrcGray(b *testing.B)  { benchTform(b, 200, 150, Src, srcGray, CatmullRom) }
-func BenchmarkTformCRSrcNRGBA(b *testing.B) { benchTform(b, 200, 150, Src, srcNRGBA, CatmullRom) }
-func BenchmarkTformCRSrcRGBA(b *testing.B)  { benchTform(b, 200, 150, Src, srcRGBA, CatmullRom) }
-func BenchmarkTformCRSrcYCbCr(b *testing.B) { benchTform(b, 200, 150, Src, srcYCbCr, CatmullRom) }
+func BenchmarkTformCRSrcGray(b *testing.B)   { benchTform(b, 200, 150, Src, srcGray, CatmullRom) }
+func BenchmarkTformCRSrcNRGBA(b *testing.B)  { benchTform(b, 200, 150, Src, srcNRGBA, CatmullRom) }
+func BenchmarkTformCRSrcRGBA(b *testing.B)   { benchTform(b, 200, 150, Src, srcRGBA, CatmullRom) }
+func BenchmarkTformCRSrcYCbCr(b *testing.B)  { benchTform(b, 200, 150, Src, srcYCbCr, CatmullRom) }
+func BenchmarkTformCRSrcRGBA64(b *testing.B) { benchTform(b, 200, 150, Src, srcRGBA64, CatmullRom) }
 
-func BenchmarkTformCROverGray(b *testing.B)  { benchTform(b, 200, 150, Over, srcGray, CatmullRom) }
-func BenchmarkTformCROverNRGBA(b *testing.B) { benchTform(b, 200, 150, Over, srcNRGBA, CatmullRom) }
-func BenchmarkTformCROverRGBA(b *testing.B)  { benchTform(b, 200, 150, Over, srcRGBA, CatmullRom) }
-func BenchmarkTformCROverYCbCr(b *testing.B) { benchTform(b, 200, 150, Over, srcYCbCr, CatmullRom) }
+func BenchmarkTformCROverGray(b *testing.B)   { benchTform(b, 200, 150, Over, srcGray, CatmullRom) }
+func BenchmarkTformCROverNRGBA(b *testing.B)  { benchTform(b, 200, 150, Over, srcNRGBA, CatmullRom) }
+func BenchmarkTformCROverRGBA(b *testing.B)   { benchTform(b, 200, 150, Over, srcRGBA, CatmullRom) }
+func BenchmarkTformCROverYCbCr(b *testing.B)  { benchTform(b, 200, 150, Over, srcYCbCr, CatmullRom) }
+func BenchmarkTformCROverRGBA64(b *testing.B) { benchTform(b, 200, 150, Over, srcRGBA64, CatmullRom) }
commit	3aac9c63df6c65cda99e4890d824b12c2c440b89	[log] [tgz]
author	Branden J Brown <zephyrtronium@gmail.com>	Fri Sep 01 20:54:35 2023 -0500
committer	Gopher Robot <gobot@golang.org>	Thu Sep 07 14:37:48 2023 +0000
tree	e4fcc6318d9908d9cb7be29289c6e120f4c5d97e
parent	fa10be5a6235278651316f4f0cfc136d80cef613 [diff]