go.image/webp: implement alpha filtering and uncompressed alpha.

This passes the four new alpha_*.webp conformance tests referred to in
https://golang.org/cl/162850043/#msg7

LGTM=pascal.massimino, r
R=r, pascal.massimino
CC=golang-codereviews
https://golang.org/cl/163630043
diff --git a/webp/decode.go b/webp/decode.go
index c688292..faf9ebf 100644
--- a/webp/decode.go
+++ b/webp/decode.go
@@ -70,45 +70,11 @@
 				}
 				return nil, image.Config{}, err
 			}
-			filter := (buf[0] >> 2) & 0x03
-			if filter != 0 {
-				return nil, image.Config{}, errors.New(
-					"webp: VP8X Alpha filtering != 0 is not implemented")
-			}
-			compression := buf[0] & 0x03
-			if compression != 1 {
-				return nil, image.Config{}, errors.New(
-					"webp: VP8X Alpha compression != 1 is not implemented")
-			}
-			// Read the VP8L-compressed alpha values. First, synthesize a 5-byte VP8L header:
-			// a 1-byte magic number, a 14-bit widthMinusOne, a 14-bit heightMinusOne,
-			// a 1-bit (ignored, zero) alphaIsUsed and a 3-bit (zero) version.
-			// TODO(nigeltao): be more efficient than decoding an *image.NRGBA just to
-			// extract the green values to a separately allocated []byte. Fixing this
-			// will require changes to the vp8l package's API.
-			if widthMinusOne > 0x3fff || heightMinusOne > 0x3fff {
-				return nil, image.Config{}, errors.New("webp: invalid format")
-			}
-			buf[0] = 0x2f // VP8L magic number.
-			buf[1] = uint8(widthMinusOne)
-			buf[2] = uint8(widthMinusOne>>8) | uint8(heightMinusOne<<6)
-			buf[3] = uint8(heightMinusOne >> 2)
-			buf[4] = uint8(heightMinusOne >> 10)
-			alphaImage, err := vp8l.Decode(io.MultiReader(
-				bytes.NewReader(buf[:5]),
-				chunkData,
-			))
+			alpha, alphaStride, err = readAlpha(chunkData, widthMinusOne, heightMinusOne, buf[0]&0x03)
 			if err != nil {
 				return nil, image.Config{}, err
 			}
-			// The green values of the inner NRGBA image are the alpha values of the
-			// outer NYCbCrA image.
-			pix := alphaImage.(*image.NRGBA).Pix
-			alpha = make([]byte, len(pix)/4)
-			for i := range alpha {
-				alpha[i] = pix[4*i+1]
-			}
-			alphaStride = int(widthMinusOne) + 1
+			unfilterAlpha(alpha, alphaStride, (buf[0]>>2)&0x03)
 
 		case fccVP8:
 			if wantAlpha {
@@ -182,6 +148,109 @@
 	}
 }
 
+func readAlpha(chunkData io.Reader, widthMinusOne, heightMinusOne uint32, compression byte) (
+	alpha []byte, alphaStride int, err error) {
+
+	switch compression {
+	case 0:
+		w := int(widthMinusOne) + 1
+		h := int(heightMinusOne) + 1
+		alpha = make([]byte, w*h)
+		if _, err := io.ReadFull(chunkData, alpha); err != nil {
+			return nil, 0, err
+		}
+		return alpha, w, nil
+
+	case 1:
+		// Read the VP8L-compressed alpha values. First, synthesize a 5-byte VP8L header:
+		// a 1-byte magic number, a 14-bit widthMinusOne, a 14-bit heightMinusOne,
+		// a 1-bit (ignored, zero) alphaIsUsed and a 3-bit (zero) version.
+		// TODO(nigeltao): be more efficient than decoding an *image.NRGBA just to
+		// extract the green values to a separately allocated []byte. Fixing this
+		// will require changes to the vp8l package's API.
+		if widthMinusOne > 0x3fff || heightMinusOne > 0x3fff {
+			return nil, 0, errors.New("webp: invalid format")
+		}
+		alphaImage, err := vp8l.Decode(io.MultiReader(
+			bytes.NewReader([]byte{
+				0x2f, // VP8L magic number.
+				uint8(widthMinusOne),
+				uint8(widthMinusOne>>8) | uint8(heightMinusOne<<6),
+				uint8(heightMinusOne >> 2),
+				uint8(heightMinusOne >> 10),
+			}),
+			chunkData,
+		))
+		if err != nil {
+			return nil, 0, err
+		}
+		// The green values of the inner NRGBA image are the alpha values of the
+		// outer NYCbCrA image.
+		pix := alphaImage.(*image.NRGBA).Pix
+		alpha = make([]byte, len(pix)/4)
+		for i := range alpha {
+			alpha[i] = pix[4*i+1]
+		}
+		return alpha, int(widthMinusOne) + 1, nil
+	}
+	return nil, 0, errInvalidFormat
+}
+
+func unfilterAlpha(alpha []byte, alphaStride int, filter byte) {
+	if len(alpha) == 0 || alphaStride == 0 {
+		return
+	}
+	switch filter {
+	case 1: // Horizontal filter.
+		for i := 1; i < alphaStride; i++ {
+			alpha[i] += alpha[i-1]
+		}
+		for i := alphaStride; i < len(alpha); i += alphaStride {
+			// The first column is equivalent to the vertical filter.
+			alpha[i] += alpha[i-alphaStride]
+
+			for j := 1; j < alphaStride; j++ {
+				alpha[i+j] += alpha[i+j-1]
+			}
+		}
+
+	case 2: // Vertical filter.
+		// The first row is equivalent to the horizontal filter.
+		for i := 1; i < alphaStride; i++ {
+			alpha[i] += alpha[i-1]
+		}
+
+		for i := alphaStride; i < len(alpha); i++ {
+			alpha[i] += alpha[i-alphaStride]
+		}
+
+	case 3: // Gradient filter.
+		// The first row is equivalent to the horizontal filter.
+		for i := 1; i < alphaStride; i++ {
+			alpha[i] += alpha[i-1]
+		}
+
+		for i := alphaStride; i < len(alpha); i += alphaStride {
+			// The first column is equivalent to the vertical filter.
+			alpha[i] += alpha[i-alphaStride]
+
+			// The interior is predicted on the three top/left pixels.
+			for j := 1; j < alphaStride; j++ {
+				c := int(alpha[i+j-alphaStride-1])
+				b := int(alpha[i+j-alphaStride])
+				a := int(alpha[i+j-1])
+				x := a + b - c
+				if x < 0 {
+					x = 0
+				} else if x > 255 {
+					x = 255
+				}
+				alpha[i+j] += uint8(x)
+			}
+		}
+	}
+}
+
 // Decode reads a WEBP image from r and returns it as an image.Image.
 func Decode(r io.Reader) (image.Image, error) {
 	m, _, err := decode(r, false)