Optimize exp/draw/x11 flusher inner loop.

On my laptop, time to prepare and write 800x600 pixels over the
socket falls from 125-ish ms to 80-ish ms.

Thanks to Roger Peppe for the suggestion.

R=r
CC=golang-dev
https://golang.org/cl/1228044
diff --git a/src/pkg/exp/draw/x11/conn.go b/src/pkg/exp/draw/x11/conn.go
index f1a3dca..3579a24 100644
--- a/src/pkg/exp/draw/x11/conn.go
+++ b/src/pkg/exp/draw/x11/conn.go
@@ -91,18 +91,18 @@
 				close(c.flush)
 				return
 			}
+			p := c.img.Pixel[y]
 			for x := 0; x < w; {
 				nx := w - x
 				if nx > len(c.flushBuf1)/4 {
 					nx = len(c.flushBuf1) / 4
 				}
-				for i := 0; i < nx; i++ {
-					r, g, b, _ := c.img.At(x, y).RGBA()
-					c.flushBuf1[4*i+0] = uint8(b >> 24)
-					c.flushBuf1[4*i+1] = uint8(g >> 24)
-					c.flushBuf1[4*i+2] = uint8(r >> 24)
-					x++
+				for i, rgba := range p[x : x+nx] {
+					c.flushBuf1[4*i+0] = rgba.B
+					c.flushBuf1[4*i+1] = rgba.G
+					c.flushBuf1[4*i+2] = rgba.R
 				}
+				x += nx
 				_, err := c.w.Write(c.flushBuf1[0 : 4*nx])
 				if err != nil {
 					close(c.flush)