runtime: use 1-bit pointer bitmaps in type representation

The type information in reflect.Type and the GC programs is now
1 bit per word, down from 2 bits.

The in-memory unrolled type bitmap representation are now
1 bit per word, down from 4 bits.

The conversion from the unrolled (now 1-bit) bitmap to the
heap bitmap (still 4-bit) is not optimized. A followup CL will
work on that, after the heap bitmap has been converted to 2-bit.

The typeDead optimization, in which a special value denotes
that there are no more pointers anywhere in the object, is lost
in this CL. A followup CL will bring it back in the final form of
heapBitsSetType.

Change-Id: If61e67950c16a293b0b516a6fd9a1c755b6d5549
Reviewed-on: https://go-review.googlesource.com/9702
Reviewed-by: Austin Clements <austin@google.com>
diff --git a/src/cmd/internal/gc/reflect.go b/src/cmd/internal/gc/reflect.go
index 9979fe8..6ff9df2 100644
--- a/src/cmd/internal/gc/reflect.go
+++ b/src/cmd/internal/gc/reflect.go
@@ -1430,11 +1430,7 @@
 	// Calculate size of the unrolled GC mask.
 	nptr := (t.Width + int64(Widthptr) - 1) / int64(Widthptr)
 
-	size := nptr
-	if size%2 != 0 {
-		size *= 2 // repeated
-	}
-	size = size * obj.GcBits / 8 // 4 bits per word
+	size := (nptr + 7) / 8
 
 	// Decide whether to use unrolled GC mask or GC program.
 	// We could use a more elaborate condition, but this seems to work well in practice.
@@ -1445,7 +1441,7 @@
 	return size > int64(2*Widthptr)
 }
 
-// Generates sparse GC bitmask (4 bits per word).
+// Generates GC bitmask (1 bit per word).
 func gengcmask(t *Type, gcmask []byte) {
 	for i := int64(0); i < 16; i++ {
 		gcmask[i] = 0
@@ -1454,40 +1450,14 @@
 		return
 	}
 
-	// Generate compact mask as stacks use.
-	xoffset := int64(0)
-
 	vec := bvalloc(2 * int32(Widthptr) * 8)
+	xoffset := int64(0)
 	onebitwalktype1(t, &xoffset, vec)
 
-	// Unfold the mask for the GC bitmap format:
-	// 4 bits per word, 2 high bits encode pointer info.
-	pos := gcmask
-
 	nptr := (t.Width + int64(Widthptr) - 1) / int64(Widthptr)
-	half := false
-
-	// If number of words is odd, repeat the mask.
-	// This makes simpler handling of arrays in runtime.
-	var i int64
-	var bits uint8
-	for j := int64(0); j <= (nptr % 2); j++ {
-		for i = 0; i < nptr; i++ {
-			// convert 0=scalar / 1=pointer to GC bit encoding
-			if bvget(vec, int32(i)) == 0 {
-				bits = obj.BitsScalar
-			} else {
-				bits = obj.BitsPointer
-			}
-			bits <<= 2
-			if half {
-				bits <<= 4
-			}
-			pos[0] |= byte(bits)
-			half = !half
-			if !half {
-				pos = pos[1:]
-			}
+	for i := int64(0); i < nptr; i++ {
+		if bvget(vec, int32(i)) == 1 {
+			gcmask[i/8] |= 1 << (uint(i) % 8)
 		}
 	}
 }
@@ -1496,7 +1466,7 @@
 type ProgGen struct {
 	s        *Sym
 	datasize int32
-	data     [256 / obj.PointersPerByte]uint8
+	data     [256 / 8]uint8
 	ot       int64
 }
 
@@ -1504,7 +1474,7 @@
 	g.s = s
 	g.datasize = 0
 	g.ot = 0
-	g.data = [256 / obj.PointersPerByte]uint8{}
+	g.data = [256 / 8]uint8{}
 }
 
 func proggenemit(g *ProgGen, v uint8) {
@@ -1518,16 +1488,16 @@
 	}
 	proggenemit(g, obj.InsData)
 	proggenemit(g, uint8(g.datasize))
-	s := (g.datasize + obj.PointersPerByte - 1) / obj.PointersPerByte
+	s := (g.datasize + 7) / 8
 	for i := int32(0); i < s; i++ {
 		proggenemit(g, g.data[i])
 	}
 	g.datasize = 0
-	g.data = [256 / obj.PointersPerByte]uint8{}
+	g.data = [256 / 8]uint8{}
 }
 
 func proggendata(g *ProgGen, d uint8) {
-	g.data[g.datasize/obj.PointersPerByte] |= d << uint((g.datasize%obj.PointersPerByte)*obj.BitsPerPointer)
+	g.data[g.datasize/8] |= d << uint(g.datasize%8)
 	g.datasize++
 	if g.datasize == 255 {
 		proggendataflush(g)
@@ -1538,7 +1508,7 @@
 func proggenskip(g *ProgGen, off int64, v int64) {
 	for i := off; i < off+v; i++ {
 		if (i % int64(Widthptr)) == 0 {
-			proggendata(g, obj.BitsScalar)
+			proggendata(g, 0)
 		}
 	}
 }
@@ -1566,12 +1536,7 @@
 // Generates GC program for large types.
 func gengcprog(t *Type, pgc0 **Sym, pgc1 **Sym) {
 	nptr := (t.Width + int64(Widthptr) - 1) / int64(Widthptr)
-	size := nptr
-	if size%2 != 0 {
-		size *= 2 // repeated twice
-	}
-	size = size * obj.PointersPerByte / 8 // 4 bits per word
-	size++                                // unroll flag in the beginning, used by runtime (see runtime.markallocated)
+	size := nptr + 1 // unroll flag in the beginning, used by runtime (see runtime.markallocated)
 
 	// emity space in BSS for unrolled program
 	*pgc0 = nil
@@ -1623,26 +1588,25 @@
 		TFUNC,
 		TCHAN,
 		TMAP:
-		proggendata(g, obj.BitsPointer)
+		proggendata(g, 1)
 		*xoffset += t.Width
 
 	case TSTRING:
-		proggendata(g, obj.BitsPointer)
-		proggendata(g, obj.BitsScalar)
+		proggendata(g, 1)
+		proggendata(g, 0)
 		*xoffset += t.Width
 
 		// Assuming IfacePointerOnly=1.
 	case TINTER:
-		proggendata(g, obj.BitsPointer)
-
-		proggendata(g, obj.BitsPointer)
+		proggendata(g, 1)
+		proggendata(g, 1)
 		*xoffset += t.Width
 
 	case TARRAY:
 		if Isslice(t) {
-			proggendata(g, obj.BitsPointer)
-			proggendata(g, obj.BitsScalar)
-			proggendata(g, obj.BitsScalar)
+			proggendata(g, 1)
+			proggendata(g, 0)
+			proggendata(g, 0)
 		} else {
 			t1 := t.Type
 			if t1.Width == 0 {
@@ -1656,7 +1620,7 @@
 				n := t.Width
 				n -= -*xoffset & (int64(Widthptr) - 1) // skip to next ptr boundary
 				proggenarray(g, (n+int64(Widthptr)-1)/int64(Widthptr))
-				proggendata(g, obj.BitsScalar)
+				proggendata(g, 0)
 				proggenarrayend(g)
 				*xoffset -= (n+int64(Widthptr)-1)/int64(Widthptr)*int64(Widthptr) - t.Width
 			} else {
diff --git a/src/cmd/internal/ld/data.go b/src/cmd/internal/ld/data.go
index 3194bd5..676c885 100644
--- a/src/cmd/internal/ld/data.go
+++ b/src/cmd/internal/ld/data.go
@@ -1032,7 +1032,7 @@
 type ProgGen struct {
 	s        *LSym
 	datasize int32
-	data     [256 / obj.PointersPerByte]uint8
+	data     [256 / 8]uint8
 	pos      int64
 }
 
@@ -1040,7 +1040,7 @@
 	g.s = s
 	g.datasize = 0
 	g.pos = 0
-	g.data = [256 / obj.PointersPerByte]uint8{}
+	g.data = [256 / 8]uint8{}
 }
 
 func proggenemit(g *ProgGen, v uint8) {
@@ -1054,16 +1054,16 @@
 	}
 	proggenemit(g, obj.InsData)
 	proggenemit(g, uint8(g.datasize))
-	s := (g.datasize + obj.PointersPerByte - 1) / obj.PointersPerByte
+	s := (g.datasize + 7) / 8
 	for i := int32(0); i < s; i++ {
 		proggenemit(g, g.data[i])
 	}
 	g.datasize = 0
-	g.data = [256 / obj.PointersPerByte]uint8{}
+	g.data = [256 / 8]uint8{}
 }
 
 func proggendata(g *ProgGen, d uint8) {
-	g.data[g.datasize/obj.PointersPerByte] |= d << uint((g.datasize%obj.PointersPerByte)*obj.BitsPerPointer)
+	g.data[g.datasize/8] |= d << uint(g.datasize%8)
 	g.datasize++
 	if g.datasize == 255 {
 		proggendataflush(g)
@@ -1074,7 +1074,7 @@
 func proggenskip(g *ProgGen, off int64, v int64) {
 	for i := off; i < off+v; i++ {
 		if (i % int64(Thearch.Ptrsize)) == 0 {
-			proggendata(g, obj.BitsScalar)
+			proggendata(g, 0)
 		}
 	}
 }
@@ -1119,35 +1119,18 @@
 	// Leave debugging the SDATA issue for the Go rewrite.
 
 	if s.Gotype == nil && s.Size >= int64(Thearch.Ptrsize) && s.Name[0] != '.' {
-		// conservative scan
 		Diag("missing Go type information for global symbol: %s size %d", s.Name, int(s.Size))
+		return
+	}
 
-		if (s.Size%int64(Thearch.Ptrsize) != 0) || (g.pos%int64(Thearch.Ptrsize) != 0) {
-			Diag("proggenaddsym: unaligned conservative symbol %s: size=%d pos=%d", s.Name, s.Size, g.pos)
-		}
-		size := (s.Size + int64(Thearch.Ptrsize) - 1) / int64(Thearch.Ptrsize) * int64(Thearch.Ptrsize)
-		if size < int64(32*Thearch.Ptrsize) {
-			// Emit small symbols as data.
-			for i := int64(0); i < size/int64(Thearch.Ptrsize); i++ {
-				proggendata(g, obj.BitsPointer)
-			}
-		} else {
-			// Emit large symbols as array.
-			proggenarray(g, size/int64(Thearch.Ptrsize))
-
-			proggendata(g, obj.BitsPointer)
-			proggenarrayend(g)
-		}
-
-		g.pos = s.Value + size
-	} else if s.Gotype == nil || decodetype_noptr(s.Gotype) != 0 || s.Size < int64(Thearch.Ptrsize) || s.Name[0] == '.' {
+	if s.Gotype == nil || decodetype_noptr(s.Gotype) != 0 || s.Size < int64(Thearch.Ptrsize) || s.Name[0] == '.' {
 		// no scan
 		if s.Size < int64(32*Thearch.Ptrsize) {
 			// Emit small symbols as data.
 			// This case also handles unaligned and tiny symbols, so tread carefully.
 			for i := s.Value; i < s.Value+s.Size; i++ {
 				if (i % int64(Thearch.Ptrsize)) == 0 {
-					proggendata(g, obj.BitsScalar)
+					proggendata(g, 0)
 				}
 			}
 		} else {
@@ -1156,7 +1139,7 @@
 				Diag("proggenaddsym: unaligned noscan symbol %s: size=%d pos=%d", s.Name, s.Size, g.pos)
 			}
 			proggenarray(g, s.Size/int64(Thearch.Ptrsize))
-			proggendata(g, obj.BitsScalar)
+			proggendata(g, 0)
 			proggenarrayend(g)
 		}
 
@@ -1183,7 +1166,8 @@
 			Diag("proggenaddsym: unaligned gcmask symbol %s: size=%d pos=%d", s.Name, s.Size, g.pos)
 		}
 		for i := int64(0); i < size; i += int64(Thearch.Ptrsize) {
-			proggendata(g, uint8((mask[i/int64(Thearch.Ptrsize)/2]>>uint64((i/int64(Thearch.Ptrsize)%2)*4+2))&obj.BitsMask))
+			word := uint(i / int64(Thearch.Ptrsize))
+			proggendata(g, (mask[word/8]>>(word%8))&1)
 		}
 		g.pos = s.Value + size
 	}
diff --git a/src/cmd/internal/obj/mgc0.go b/src/cmd/internal/obj/mgc0.go
index 2407dea..a385d60 100644
--- a/src/cmd/internal/obj/mgc0.go
+++ b/src/cmd/internal/obj/mgc0.go
@@ -22,16 +22,6 @@
 // Used by cmd/gc.
 
 const (
-	GcBits          = 4
-	BitsPerPointer  = 2
-	BitsDead        = 0
-	BitsScalar      = 1
-	BitsPointer     = 2
-	BitsMask        = 3
-	PointersPerByte = 8 / BitsPerPointer
-)
-
-const (
 	InsData = 1 + iota
 	InsArray
 	InsArrayEnd