go.image/vp8: use branch-free abs

This change seems to restore performance to
the level before CL 108140045.

benchmark                         old ns/op    new ns/op    delta
BenchmarkDecodeVP8SimpleFilter       796416       784394   -1.51%
BenchmarkDecodeVP8NormalFilter      4931138      4693078   -4.83%
BenchmarkDecodeVP8L                 7820030      7821796   +0.02%

LGTM=nigeltao, r
R=nigeltao, r
CC=golang-codereviews
https://golang.org/cl/107340047
diff --git a/vp8/filter.go b/vp8/filter.go
index ea6ed56..e34a811 100644
--- a/vp8/filter.go
+++ b/vp8/filter.go
@@ -227,11 +227,19 @@
 	}
 }
 
+// intSize is either 32 or 64.
+const intSize = 32 << (^uint(0) >> 63)
+
 func abs(x int) int {
-	if x < 0 {
-		return -x
-	}
-	return x
+	// m := -1 if x < 0. m := 0 otherwise.
+	m := x >> (intSize - 1)
+
+	// In two's complement representation, the negative number
+	// of any number (except the smallest one) can be computed
+	// by flipping all the bits and add 1. This is faster than
+	// code with a branch.
+	// See Hacker's Delight, section 2-4.
+	return (x ^ m) - m
 }
 
 func clamp15(x int) int {