runtime: make the GC bitmap a byte array
Half the code in the garbage collector accesses the bitmap
as an array of bytes instead of as an array of uintptrs.
This is tricky to do correctly in a portable fashion,
it breaks on big-endian systems.
Make the bitmap a byte array.
Simplifies markallocated, scanblock and span sweep along the way,
as we don't need to recalculate bitmap position for each word.

LGTM=khr
R=golang-codereviews, khr
CC=golang-codereviews, rlh, rsc
https://golang.org/cl/125250043
diff --git a/src/pkg/runtime/malloc.go b/src/pkg/runtime/malloc.go
index 152b3b6..8ee4607 100644
--- a/src/pkg/runtime/malloc.go
+++ b/src/pkg/runtime/malloc.go
@@ -22,8 +22,8 @@
 	pageSize  = 1 << pageShift
 	pageMask  = pageSize - 1
 
-	wordsPerBitmapWord = ptrSize * 8 / 4
 	gcBits             = 4
+	wordsPerBitmapByte = 8 / gcBits
 	bitsPerPointer     = 2
 	bitsMask           = 1<<bitsPerPointer - 1
 	pointersPerByte    = 8 / bitsPerPointer
@@ -211,8 +211,8 @@
 	{
 		arena_start := uintptr(unsafe.Pointer(mheap_.arena_start))
 		off := (uintptr(x) - arena_start) / ptrSize
-		xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize))
-		shift := (off % wordsPerBitmapWord) * gcBits
+		xbits := (*uint8)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1))
+		shift := (off % wordsPerBitmapByte) * gcBits
 		if debugMalloc && ((*xbits>>shift)&(bitMask|bitPtrMask)) != bitBoundary {
 			println("runtime: bits =", (*xbits>>shift)&(bitMask|bitPtrMask))
 			gothrow("bad bits in markallocated")
@@ -260,8 +260,7 @@
 			ptrmask = (*uint8)(unsafe.Pointer(&typ.gc[0])) // embed mask
 		}
 		if size == 2*ptrSize {
-			xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
-			*xbitsb = *ptrmask | bitBoundary
+			*xbits = *ptrmask | bitBoundary
 			goto marked
 		}
 		te = uintptr(typ.size) / ptrSize
@@ -283,19 +282,12 @@
 				v &^= uint8(bitPtrMask << 4)
 			}
 
-			off := (uintptr(x) + i - arena_start) / ptrSize
-			xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize))
-			shift := (off % wordsPerBitmapWord) * gcBits
-			xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
-			*xbitsb = v
+			*xbits = v
+			xbits = (*byte)(add(unsafe.Pointer(xbits), ^uintptr(0)))
 		}
 		if size0%(2*ptrSize) == 0 && size0 < size {
 			// Mark the word after last object's word as bitsDead.
-			off := (uintptr(x) + size0 - arena_start) / ptrSize
-			xbits := (*uintptr)(unsafe.Pointer(arena_start - off/wordsPerBitmapWord*ptrSize - ptrSize))
-			shift := (off % wordsPerBitmapWord) * gcBits
-			xbitsb := (*uint8)(add(unsafe.Pointer(xbits), shift/8))
-			*xbitsb = bitsDead << 2
+			*xbits = bitsDead << 2
 		}
 	}
 marked: