vector: change ϕ from 10 to 9. This slight loss in quality allows us to use int32 math exclusively throughout raster_fixed.go, instead of occasionally dropping into int64 math. The change in ϕ doesn't affect the benchmarks noticably, but staying in int32 does. The net effect: name old time/op new time/op delta GlyphAlpha16Over-8 3.36µs ± 0% 2.99µs ± 0% -10.89% (p=0.000 n=10+9) GlyphAlpha16Src-8 3.26µs ± 0% 2.89µs ± 1% -11.34% (p=0.000 n=9+10) GlyphAlpha32Over-8 5.20µs ± 0% 4.53µs ± 0% -12.76% (p=0.000 n=8+10) GlyphAlpha32Src-8 4.81µs ± 1% 4.14µs ± 0% -13.91% (p=0.000 n=9+9) GlyphAlpha64Over-8 10.2µs ± 0% 9.0µs ± 1% -11.99% (p=0.000 n=9+10) GlyphAlpha64Src-8 8.62µs ± 0% 7.42µs ± 1% -13.89% (p=0.000 n=9+10) GlyphAlpha128Over-8 24.1µs ± 0% 21.8µs ± 0% -9.32% (p=0.000 n=9+9) GlyphAlpha128Src-8 17.9µs ± 0% 15.6µs ± 0% -12.68% (p=0.000 n=9+10) GlyphAlpha256Over-8 70.1µs ± 0% 66.3µs ± 1% -5.44% (p=0.000 n=10+10) GlyphAlpha256Src-8 45.2µs ± 1% 41.2µs ± 1% -8.92% (p=0.000 n=10+10) GlyphRGBA16Over-8 5.12µs ± 0% 4.75µs ± 0% -7.15% (p=0.000 n=10+9) GlyphRGBA16Src-8 4.57µs ± 1% 4.20µs ± 0% -8.18% (p=0.000 n=9+8) GlyphRGBA32Over-8 12.1µs ± 0% 11.4µs ± 0% -5.50% (p=0.000 n=10+9) GlyphRGBA32Src-8 10.0µs ± 0% 9.3µs ± 1% -6.80% (p=0.000 n=10+9) GlyphRGBA64Over-8 37.2µs ± 0% 36.0µs ± 0% -3.17% (p=0.000 n=9+8) GlyphRGBA64Src-8 29.0µs ± 1% 27.9µs ± 1% -4.05% (p=0.000 n=9+10) GlyphRGBA128Over-8 134µs ± 1% 131µs ± 0% -1.85% (p=0.000 n=9+9) GlyphRGBA128Src-8 100µs ± 1% 98µs ± 0% -2.27% (p=0.000 n=10+9) GlyphRGBA256Over-8 506µs ± 0% 503µs ± 0% -0.56% (p=0.000 n=10+8) GlyphRGBA256Src-8 373µs ± 0% 370µs ± 0% -1.01% (p=0.000 n=10+9) Change-Id: Ie02afac6fd6fa95f090bf3fe0a5c744799ea9dc5 Reviewed-on: https://go-review.googlesource.com/31532 Reviewed-by: David Crawshaw <crawshaw@golang.org>

commit: 8874bef159af77ab9e746fc3c80b2219b3652045 [log] [tgz]
author: Nigel Tao <nigeltao@golang.org> Thu Oct 20 10:34:23 2016 +1100
committer: Nigel Tao <nigeltao@golang.org> Thu Oct 20 04:12:57 2016 +0000
tree: 43eb47b0aaf328bda952604d51943c76f968ef07
parent: fa54d6fa1c2aaf941da0bc4885015df17693f8ec [diff]
diff --git a/vector/acc_amd64.s b/vector/acc_amd64.s
index 31b9c6e..8d9719c 100644
--- a/vector/acc_amd64.s
+++ b/vector/acc_amd64.s

@@ -143,18 +143,18 @@
 	PADDD X7, X1
 
 	// y = abs(x)
-	// y >>= 4 // Shift by 2*ϕ - 16.
+	// y >>= 2 // Shift by 2*ϕ - 16.
 	// y = min(y, fxAlmost65536)
 	//
 	// pabsd  %xmm1,%xmm2
-	// psrld  $0x4,%xmm2
+	// psrld  $0x2,%xmm2
 	// pminud %xmm5,%xmm2
 	//
 	// Hopefully we'll get these opcode mnemonics into the assembler for Go
 	// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
 	// it's similar.
 	BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
-	BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x04
+	BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
 	BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
 
 	// z = convertToInt32(y)
@@ -230,18 +230,18 @@
 	PADDD X7, X1
 
 	// y = abs(x)
-	// y >>= 4 // Shift by 2*ϕ - 16.
+	// y >>= 2 // Shift by 2*ϕ - 16.
 	// y = min(y, fxAlmost65536)
 	//
 	// pabsd  %xmm1,%xmm2
-	// psrld  $0x4,%xmm2
+	// psrld  $0x2,%xmm2
 	// pminud %xmm5,%xmm2
 	//
 	// Hopefully we'll get these opcode mnemonics into the assembler for Go
 	// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
 	// it's similar.
 	BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
-	BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x04
+	BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
 	BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
 
 	// z = convertToInt32(y)
@@ -353,18 +353,18 @@
 	PADDD X7, X1
 
 	// y = abs(x)
-	// y >>= 12 // Shift by 2*ϕ - 8.
+	// y >>= 10 // Shift by 2*ϕ - 8.
 	// y = min(y, fxAlmost256)
 	//
 	// pabsd  %xmm1,%xmm2
-	// psrld  $0xc,%xmm2
+	// psrld  $0xa,%xmm2
 	// pminud %xmm5,%xmm2
 	//
 	// Hopefully we'll get these opcode mnemonics into the assembler for Go
 	// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
 	// it's similar.
 	BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
-	BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x0c
+	BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x0a
 	BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
 
 	// z = convertToInt32(y)
@@ -397,18 +397,18 @@
 	PADDD X7, X1
 
 	// y = abs(x)
-	// y >>= 12 // Shift by 2*ϕ - 8.
+	// y >>= 10 // Shift by 2*ϕ - 8.
 	// y = min(y, fxAlmost256)
 	//
 	// pabsd  %xmm1,%xmm2
-	// psrld  $0xc,%xmm2
+	// psrld  $0xa,%xmm2
 	// pminud %xmm5,%xmm2
 	//
 	// Hopefully we'll get these opcode mnemonics into the assembler for Go
 	// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
 	// it's similar.
 	BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
-	BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x0c
+	BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x0a
 	BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
 
 	// z = convertToInt32(y)
@@ -498,18 +498,18 @@
 	PADDD X7, X1
 
 	// y = abs(x)
-	// y >>= 4 // Shift by 2*ϕ - 16.
+	// y >>= 2 // Shift by 2*ϕ - 16.
 	// y = min(y, fxAlmost65536)
 	//
 	// pabsd  %xmm1,%xmm2
-	// psrld  $0x4,%xmm2
+	// psrld  $0x2,%xmm2
 	// pminud %xmm5,%xmm2
 	//
 	// Hopefully we'll get these opcode mnemonics into the assembler for Go
 	// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
 	// it's similar.
 	BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
-	BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x04
+	BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
 	BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
 
 	// z = convertToInt32(y)
@@ -540,18 +540,18 @@
 	PADDD X7, X1
 
 	// y = abs(x)
-	// y >>= 4 // Shift by 2*ϕ - 16.
+	// y >>= 2 // Shift by 2*ϕ - 16.
 	// y = min(y, fxAlmost65536)
 	//
 	// pabsd  %xmm1,%xmm2
-	// psrld  $0x4,%xmm2
+	// psrld  $0x2,%xmm2
 	// pminud %xmm5,%xmm2
 	//
 	// Hopefully we'll get these opcode mnemonics into the assembler for Go
 	// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
 	// it's similar.
 	BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
-	BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x04
+	BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
 	BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
 
 	// z = convertToInt32(y)

diff --git a/vector/acc_test.go b/vector/acc_test.go
index db238f7..1dce439 100644
--- a/vector/acc_test.go
+++ b/vector/acc_test.go

@@ -370,18 +370,18 @@
 }
 
 var fxInShort = []uint32{
-	itou(+0x020000), // +0.125, // Running sum: +0.125
-	itou(-0x080000), // -0.500, // Running sum: -0.375
-	itou(+0x040000), // +0.250, // Running sum: -0.125
-	itou(+0x060000), // +0.375, // Running sum: +0.250
-	itou(+0x020000), // +0.125, // Running sum: +0.375
-	itou(+0x000000), // +0.000, // Running sum: +0.375
-	itou(-0x100000), // -1.000, // Running sum: -0.625
-	itou(-0x080000), // -0.500, // Running sum: -1.125
-	itou(+0x040000), // +0.250, // Running sum: -0.875
-	itou(+0x0e0000), // +0.875, // Running sum: +0.000
-	itou(+0x040000), // +0.250, // Running sum: +0.250
-	itou(+0x0c0000), // +0.750, // Running sum: +1.000
+	itou(+0x08000), // +0.125, // Running sum: +0.125
+	itou(-0x20000), // -0.500, // Running sum: -0.375
+	itou(+0x10000), // +0.250, // Running sum: -0.125
+	itou(+0x18000), // +0.375, // Running sum: +0.250
+	itou(+0x08000), // +0.125, // Running sum: +0.375
+	itou(+0x00000), // +0.000, // Running sum: +0.375
+	itou(-0x40000), // -1.000, // Running sum: -0.625
+	itou(-0x20000), // -0.500, // Running sum: -1.125
+	itou(+0x10000), // +0.250, // Running sum: -0.875
+	itou(+0x38000), // +0.875, // Running sum: +0.000
+	itou(+0x10000), // +0.250, // Running sum: +0.250
+	itou(+0x30000), // +0.750, // Running sum: +1.000
 }
 
 var flInShort = []float32{
@@ -518,32 +518,32 @@
 )
 
 var hardCodedFxIn16 = []uint32{
-	0x00000000, 0x00000000, 0xffffa3ee, 0xfff9f0c9, 0xfffaaafc, 0xfffd38ec, 0xffff073f, 0x0001dddf,
-	0x0002589a, 0x0006a22c, 0x0004a6df, 0x000000a0, 0x00000000, 0x00000000, 0xfffdb883, 0xfff4c620,
-	0xfffd815f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00052ec6,
-	0x000ab1ba, 0x00001f7f, 0xffff29b7, 0xfff2ad44, 0xfffe2906, 0x00006c84, 0x0006ce82, 0x00050d7b,
-	0x00010db4, 0xfffd8c05, 0xfff85159, 0xfffccc6d, 0x00000000, 0x00088d28, 0x000772d8, 0xfff8a36a,
-	0xfff75c96, 0x00000000, 0x000a2b80, 0x0005d480, 0x00000000, 0x00000000, 0x00000000, 0xffff4bbf,
-	0xfff2b937, 0xfffdfb0b, 0x0001cc00, 0x000e3400, 0xfffa4980, 0xfffcb680, 0x000008e8, 0x0008966f,
-	0x000060a8, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfff72000, 0xfff8e000, 0x00000165,
-	0x000e9134, 0x00016d65, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-	0x00000000, 0x00000000, 0xfff8d3be, 0xfff72c42, 0x00000000, 0x000cec0f, 0x000313f1, 0x00000000,
-	0x00000000, 0xfffe84f1, 0xfffbbb8f, 0xfffe3008, 0xfffe311b, 0xffff1e60, 0x00000000, 0xfffd6f10,
-	0xfffcd0f0, 0x00000000, 0x000cec00, 0x00031400, 0xfffe6d8a, 0xfff7d307, 0xfffa38bf, 0xffff86b3,
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000cec00,
-	0x00024dc4, 0xfff3cc79, 0xfffcf9c4, 0x00003ed0, 0x000467df, 0x0004c32f, 0x0001a038, 0x00012964,
-	0x00002883, 0xfffa7bf1, 0xfff9280f, 0x00000000, 0x000cec00, 0xfffa2901, 0xfff8eaff, 0x00004138,
-	0x000aebd5, 0x0004d2f2, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfff8dc00, 0xfff72400,
-	0x00000000, 0x000cec00, 0xfff64800, 0xfffccc00, 0x00039400, 0x000c6c00, 0x00000000, 0x00000000,
-	0x00000000, 0x00000000, 0x00000000, 0xfff8dc00, 0xfff72400, 0x00000000, 0x000cec00, 0xfff3ea8a,
-	0xffff2976, 0x00047cad, 0x000b8353, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
-	0xfff6cb2e, 0xfff934d2, 0x00000000, 0x000cec00, 0xfff68000, 0xfffc9400, 0x0000babf, 0x000cfbcc,
-	0x00024974, 0x00000000, 0x00000000, 0x00000000, 0xfffa12a1, 0xfff61e13, 0xffffcf4d, 0x00000000,
-	0x000c79a0, 0xfffcac8c, 0xfff6d9d4, 0x00000000, 0x00015024, 0x0006d297, 0x000288dc, 0xfffe8e52,
-	0xfffaba3a, 0xfffc0cbd, 0xffffff20, 0x00000000, 0x00000000, 0x000b5c00, 0x000496d7, 0xfff5a25f,
-	0xfffa6acc, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0002abf1, 0x0005195f,
-	0xfff83aae, 0x00000000, 0x00089fb6, 0x0007604a, 0xfffffe47, 0xfffb0173, 0xfff94d6b, 0xfffd7586,
-	0xffff5219, 0x000319cc, 0x0003eed3, 0x0007529f, 0xfffedc08, 0xfff647f6, 0x00000000, 0x000392ce,
+	0x00000000, 0x00000000, 0xffffe91d, 0xfffe7c4a, 0xfffeaa9f, 0xffff4e33, 0xffffc1c5, 0x00007782,
+	0x00009619, 0x0001a857, 0x000129e9, 0x00000028, 0x00000000, 0x00000000, 0xffff6e70, 0xfffd3199,
+	0xffff5ff8, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00014b29,
+	0x0002acf3, 0x000007e2, 0xffffca5a, 0xfffcab73, 0xffff8a34, 0x00001b55, 0x0001b334, 0x0001449e,
+	0x0000434d, 0xffff62ec, 0xfffe1443, 0xffff325d, 0x00000000, 0x0002234a, 0x0001dcb6, 0xfffe2948,
+	0xfffdd6b8, 0x00000000, 0x00028cc0, 0x00017340, 0x00000000, 0x00000000, 0x00000000, 0xffffd2d6,
+	0xfffcadd0, 0xffff7f5c, 0x00007400, 0x00038c00, 0xfffe9260, 0xffff2da0, 0x0000023a, 0x0002259b,
+	0x0000182a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffdc600, 0xfffe3a00, 0x00000059,
+	0x0003a44d, 0x00005b59, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0xfffe33f3, 0xfffdcc0d, 0x00000000, 0x00033c02, 0x0000c3fe, 0x00000000,
+	0x00000000, 0xffffa13d, 0xfffeeec8, 0xffff8c02, 0xffff8c48, 0xffffc7b5, 0x00000000, 0xffff5b68,
+	0xffff3498, 0x00000000, 0x00033c00, 0x0000c400, 0xffff9bc4, 0xfffdf4a3, 0xfffe8df3, 0xffffe1a8,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00033c00,
+	0x000092c7, 0xfffcf373, 0xffff3dc7, 0x00000fcc, 0x00011ae7, 0x000130c3, 0x0000680d, 0x00004a59,
+	0x00000a20, 0xfffe9dc4, 0xfffe4a3c, 0x00000000, 0x00033c00, 0xfffe87ef, 0xfffe3c11, 0x0000105e,
+	0x0002b9c4, 0x000135dc, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffe3600, 0xfffdca00,
+	0x00000000, 0x00033c00, 0xfffd9000, 0xffff3400, 0x0000e400, 0x00031c00, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0xfffe3600, 0xfffdca00, 0x00000000, 0x00033c00, 0xfffcf9a5,
+	0xffffca5b, 0x000120e6, 0x0002df1a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0xfffdb195, 0xfffe4e6b, 0x00000000, 0x00033c00, 0xfffd9e00, 0xffff2600, 0x00002f0e, 0x00033ea3,
+	0x0000924d, 0x00000000, 0x00000000, 0x00000000, 0xfffe83b3, 0xfffd881d, 0xfffff431, 0x00000000,
+	0x00031f60, 0xffff297a, 0xfffdb726, 0x00000000, 0x000053a7, 0x0001b506, 0x0000a24b, 0xffffa32d,
+	0xfffead9b, 0xffff0479, 0xffffffc9, 0x00000000, 0x00000000, 0x0002d800, 0x0001249d, 0xfffd67bb,
+	0xfffe9baa, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ac03, 0x0001448b,
+	0xfffe0f70, 0x00000000, 0x000229ea, 0x0001d616, 0xffffff8c, 0xfffebf76, 0xfffe54d9, 0xffff5d9e,
+	0xffffd3eb, 0x0000c65e, 0x0000fc15, 0x0001d491, 0xffffb566, 0xfffd9433, 0x00000000, 0x0000e4ec,
 }
 
 var hardCodedFlIn16 = []float32{
@@ -576,22 +576,22 @@
 }
 
 var fxMask16 = []uint32{
-	0x0000, 0x0000, 0x05c1, 0x66b4, 0xbc04, 0xe876, 0xf802, 0xda24, 0xb49a, 0x4a77, 0x0009, 0x0000, 0x0000,
-	0x0000, 0x2477, 0xd815, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xad13, 0x01f7, 0x0000,
-	0x0d64, 0xe290, 0xffff, 0xf937, 0x8c4f, 0x3b77, 0x2a9c, 0x51dc, 0xccc6, 0xffff, 0xffff, 0x772d, 0x0000,
-	0x75c9, 0xffff, 0xffff, 0x5d47, 0x0000, 0x0000, 0x0000, 0x0000, 0x0b43, 0xdfb0, 0xffff, 0xe33f, 0x0000,
-	0x5b67, 0x8fff, 0x8f71, 0x060a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x8dff, 0xffff, 0xffe9, 0x16d6,
-	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x72c4, 0xffff, 0xffff, 0x313e,
-	0x0000, 0x0000, 0x0000, 0x17b0, 0x5bf7, 0x78f7, 0x95e5, 0xa3ff, 0xa3ff, 0xcd0e, 0xffff, 0xffff, 0x313f,
-	0x0000, 0x1927, 0x9bf6, 0xf86a, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x313f,
-	0x0c63, 0xcf9b, 0xffff, 0xfc12, 0xb594, 0x6961, 0x4f5e, 0x3cc7, 0x3a3f, 0x9280, 0xffff, 0xffff, 0x313f,
-	0x8eaf, 0xffff, 0xfbec, 0x4d2e, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x723f, 0xffff, 0xffff, 0x313f,
-	0xccbf, 0xffff, 0xc6bf, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x723f, 0xffff, 0xffff, 0x313f,
-	0xf297, 0xffff, 0xb834, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x934c, 0xffff, 0xffff, 0x313f,
-	0xc93f, 0xffff, 0xf453, 0x2497, 0x0000, 0x0000, 0x0000, 0x0000, 0x5ed5, 0xfcf4, 0xffff, 0xffff, 0x3865,
-	0x6d9c, 0xffff, 0xffff, 0xeafd, 0x7dd4, 0x5546, 0x6c61, 0xc0bd, 0xfff1, 0xffff, 0xffff, 0xffff, 0x4a3f,
-	0x00d2, 0xa6ac, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xd540, 0x83aa, 0xffff, 0xffff, 0x7604,
-	0x0000, 0x001b, 0x5004, 0xbb2d, 0xe3d5, 0xeeb3, 0xbd16, 0x7e29, 0x08ff, 0x1b3f, 0xb6bf, 0xb6bf, 0x7d92,
+	0x0000, 0x0000, 0x05b8, 0x66a6, 0xbbfe, 0xe871, 0xf800, 0xda20, 0xb499, 0x4a84, 0x0009, 0x0000, 0x0000,
+	0x0000, 0x2463, 0xd7fd, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xad35, 0x01f8, 0x0000,
+	0x0d69, 0xe28c, 0xffff, 0xf92a, 0x8c5d, 0x3b36, 0x2a62, 0x51a7, 0xcc97, 0xffff, 0xffff, 0x772d, 0x0000,
+	0x75ad, 0xffff, 0xffff, 0x5ccf, 0x0000, 0x0000, 0x0000, 0x0000, 0x0b4a, 0xdfd6, 0xffff, 0xe2ff, 0x0000,
+	0x5b67, 0x8fff, 0x8f70, 0x060a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x8e7f, 0xffff, 0xffe9, 0x16d6,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x7303, 0xffff, 0xffff, 0x30ff,
+	0x0000, 0x0000, 0x0000, 0x17b0, 0x5bfe, 0x78fe, 0x95ec, 0xa3fe, 0xa3fe, 0xcd24, 0xfffe, 0xfffe, 0x30fe,
+	0x0001, 0x190d, 0x9be5, 0xf868, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0x30fe,
+	0x0c4c, 0xcf6f, 0xfffe, 0xfc0b, 0xb551, 0x6920, 0x4f1d, 0x3c87, 0x39ff, 0x928e, 0xffff, 0xffff, 0x30ff,
+	0x8f03, 0xffff, 0xfbe7, 0x4d76, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x727f, 0xffff, 0xffff, 0x30ff,
+	0xccff, 0xffff, 0xc6ff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x727f, 0xffff, 0xffff, 0x30ff,
+	0xf296, 0xffff, 0xb7c6, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x939a, 0xffff, 0xffff, 0x30ff,
+	0xc97f, 0xffff, 0xf43c, 0x2493, 0x0000, 0x0000, 0x0000, 0x0000, 0x5f13, 0xfd0c, 0xffff, 0xffff, 0x3827,
+	0x6dc9, 0xffff, 0xffff, 0xeb16, 0x7dd4, 0x5541, 0x6c76, 0xc10f, 0xfff1, 0xffff, 0xffff, 0xffff, 0x49ff,
+	0x00d8, 0xa6e9, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xd4fe, 0x83db, 0xffff, 0xffff, 0x7584,
+	0x0000, 0x001c, 0x503e, 0xbb08, 0xe3a1, 0xeea6, 0xbd0e, 0x7e09, 0x08e5, 0x1b8b, 0xb67f, 0xb67f, 0x7d44,
 }
 
 var flMask16 = []uint32{

diff --git a/vector/gen.go b/vector/gen.go
index 355226a..92be417 100644
--- a/vector/gen.go
+++ b/vector/gen.go

@@ -308,34 +308,34 @@
 
 	fxClampAndScale256 = `
 		// y = abs(x)
-		// y >>= 12 // Shift by 2*ϕ - 8.
+		// y >>= 10 // Shift by 2*ϕ - 8.
 		// y = min(y, fxAlmost256)
 		//
 		// pabsd  %xmm1,%xmm2
-		// psrld  $0xc,%xmm2
+		// psrld  $0xa,%xmm2
 		// pminud %xmm5,%xmm2
 		//
 		// Hopefully we'll get these opcode mnemonics into the assembler for Go
 		// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
 		// it's similar.
 		BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
-		BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x0c
+		BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x0a
 		BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
 		`
 	fxClampAndScale65536 = `
 		// y = abs(x)
-		// y >>= 4 // Shift by 2*ϕ - 16.
+		// y >>= 2 // Shift by 2*ϕ - 16.
 		// y = min(y, fxAlmost65536)
 		//
 		// pabsd  %xmm1,%xmm2
-		// psrld  $0x4,%xmm2
+		// psrld  $0x2,%xmm2
 		// pminud %xmm5,%xmm2
 		//
 		// Hopefully we'll get these opcode mnemonics into the assembler for Go
 		// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
 		// it's similar.
 		BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
-		BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x04
+		BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
 		BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
 		`
 	flClampAndScale256 = `

diff --git a/vector/raster_fixed.go b/vector/raster_fixed.go
index fe5f75a..086b9fc 100644
--- a/vector/raster_fixed.go
+++ b/vector/raster_fixed.go

@@ -19,11 +19,7 @@
 	//
 	// When changing this number, also change the assembly code (search for ϕ
 	// in the .s files).
-	//
-	// TODO: drop ϕ from 10 to 9, so that ±1<<(3*ϕ+3) doesn't overflow an int32
-	// and we can therefore use int32 math instead of the slower int64 math in
-	// Rasterizer.fixedLineTo below.
-	ϕ = 10
+	ϕ = 9
 
 	fxOne          int1ϕ = 1 << ϕ
 	fxOneAndAHalf  int1ϕ = 1<<ϕ + 1<<(ϕ-1)
@@ -146,13 +142,10 @@
 					// In ideal math: buf[i] += uint32(d * (fxOne - a0 - am))
 					//
 					// (x1i == x0i+2) and (twoOverS == 2 * (x1 - x0)) implies
-					// that int64(twoOverS) ranges up to +1<<(1*ϕ+2).
-					//
-					// Convert to int64 to avoid overflow. Without that,
-					// TestRasterize30Degrees fails.
-					D := int64(twoOverS<<ϕ - oneMinusX0fSquared - x1fSquared) // D ranges up to ±1<<(2*ϕ+2).
-					D *= int64(d)                                             // D ranges up to ±1<<(3*ϕ+2).
-					D /= int64(twoOverS)
+					// that twoOverS ranges up to +1<<(1*ϕ+2).
+					D := twoOverS<<ϕ - oneMinusX0fSquared - x1fSquared // D ranges up to ±1<<(2*ϕ+2).
+					D *= d                                             // D ranges up to ±1<<(3*ϕ+2).
+					D /= twoOverS
 					buf[i] += uint32(D)
 				}
 			} else {
@@ -194,12 +187,9 @@
 					// Thus, A ranges up to ±1<<(2*ϕ+2). It is possible to
 					// derive a tighter bound, but this bound is sufficient to
 					// reason about overflow.
-					//
-					// Convert to int64 to avoid overflow. Without that,
-					// TestRasterizePolygon fails.
-					D := int64((fxOneAndAHalf-x0f)<<(ϕ+1) - oneMinusX0fSquared) // D ranges up to ±1<<(2*ϕ+2).
-					D *= int64(d)                                               // D ranges up to ±1<<(3*ϕ+2).
-					D /= int64(twoOverS)
+					D := (fxOneAndAHalf-x0f)<<(ϕ+1) - oneMinusX0fSquared // D ranges up to ±1<<(2*ϕ+2).
+					D *= d                                               // D ranges up to ±1<<(3*ϕ+2).
+					D /= twoOverS
 					buf[i] += uint32(D)
 				}
 				dTimesS := uint32((d << (2 * ϕ)) / oneOverS)
@@ -255,15 +245,12 @@
 					// greater than -fxOne<<2, or -1<<(ϕ+2). Thus, B ranges up
 					// to ±1<<(ϕ+2). One final simplification:
 					//	B = x1f<<1 + (1<<(ϕ+2) - fxOneAndAHalf<<1)
-					//
-					// Convert to int64 to avoid overflow. Without that,
-					// TestRasterizePolygon fails.
 					const C = 1<<(ϕ+2) - fxOneAndAHalf<<1
-					D := int64(x1f<<1 + C) // D ranges up to ±1<<(1*ϕ+2).
-					D <<= ϕ                // D ranges up to ±1<<(2*ϕ+2).
-					D -= int64(x1fSquared) // D ranges up to ±1<<(2*ϕ+3).
-					D *= int64(d)          // D ranges up to ±1<<(3*ϕ+3).
-					D /= int64(twoOverS)
+					D := x1f<<1 + C // D ranges up to ±1<<(1*ϕ+2).
+					D <<= ϕ         // D ranges up to ±1<<(2*ϕ+2).
+					D -= x1fSquared // D ranges up to ±1<<(2*ϕ+3).
+					D *= d          // D ranges up to ±1<<(3*ϕ+3).
+					D /= twoOverS
 					buf[i] += uint32(D)
 				}
 			}

diff --git a/vector/vector.go b/vector/vector.go
index 8538bb9..418a956 100644
--- a/vector/vector.go
+++ b/vector/vector.go

@@ -46,9 +46,9 @@
 //
 // The rationale for this particular value is that TestRasterizePolygon in
 // vector_test.go checks the rendering quality of polygon edges at various
-// angles, inscribed in a circle of diameter 2048. It may be that a higher
-// value would still produce acceptable quality, but 2048 seems to work.
-const floatingPointMathThreshold = 2048
+// angles, inscribed in a circle of diameter 512. It may be that a higher value
+// would still produce acceptable quality, but 512 seems to work.
+const floatingPointMathThreshold = 512
 
 func midPoint(p, q f32.Vec2) f32.Vec2 {
 	return f32.Vec2{

diff --git a/vector/vector_test.go b/vector/vector_test.go
index e2cbbdb..f84d040 100644
--- a/vector/vector_test.go
+++ b/vector/vector_test.go

@@ -88,7 +88,7 @@
 
 func TestRasterizePolygon(t *testing.T) {
 	var z Rasterizer
-	for radius := 4; radius <= 1024; radius *= 2 {
+	for radius := 4; radius <= 256; radius *= 2 {
 		for n := 3; n <= 19; n += 4 {
 			z.Reset(2*radius, 2*radius)
 			z.MoveTo(f32.Vec2{
commit	8874bef159af77ab9e746fc3c80b2219b3652045	[log] [tgz]
author	Nigel Tao <nigeltao@golang.org>	Thu Oct 20 10:34:23 2016 +1100
committer	Nigel Tao <nigeltao@golang.org>	Thu Oct 20 04:12:57 2016 +0000
tree	43eb47b0aaf328bda952604d51943c76f968ef07
parent	fa54d6fa1c2aaf941da0bc4885015df17693f8ec [diff]