Merge branch 'master' into dev.garbage

Change-Id: I36274cf72b8e1908efc8e375cab7880d7b0b3f43
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index da39dac..837f3ea 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -111,7 +111,7 @@
 
 	// Tiny allocator parameters, see "Tiny allocator" comment in malloc.go.
 	_TinySize      = 16
-	_TinySizeClass = 2
+	_TinySizeClass = int8(2)
 
 	_FixAllocChunk  = 16 << 10               // Chunk size for FixAlloc
 	_MaxMHeapList   = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
@@ -512,8 +512,8 @@
 // weight allocation. If it is a heavy weight allocation the caller must
 // determine whether a new GC cycle needs to be started or if the GC is active
 // whether this goroutine needs to assist the GC.
-func (c *mcache) nextFree(sizeclass uint8) (v gclinkptr, s *mspan, shouldhelpgc bool) {
-	s = c.alloc[sizeclass]
+func (c *mcache) nextFree(spc spanClass) (v gclinkptr, s *mspan, shouldhelpgc bool) {
+	s = c.alloc[spc]
 	shouldhelpgc = false
 	freeIndex := s.nextFreeIndex()
 	if freeIndex == s.nelems {
@@ -523,10 +523,10 @@
 			throw("s.allocCount != s.nelems && freeIndex == s.nelems")
 		}
 		systemstack(func() {
-			c.refill(int32(sizeclass))
+			c.refill(spc)
 		})
 		shouldhelpgc = true
-		s = c.alloc[sizeclass]
+		s = c.alloc[spc]
 
 		freeIndex = s.nextFreeIndex()
 	}
@@ -650,10 +650,10 @@
 				return x
 			}
 			// Allocate a new maxTinySize block.
-			span := c.alloc[tinySizeClass]
+			span := c.alloc[tinySpanClass]
 			v := nextFreeFast(span)
 			if v == 0 {
-				v, _, shouldhelpgc = c.nextFree(tinySizeClass)
+				v, _, shouldhelpgc = c.nextFree(tinySpanClass)
 			}
 			x = unsafe.Pointer(v)
 			(*[2]uint64)(x)[0] = 0
@@ -673,10 +673,11 @@
 				sizeclass = size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv]
 			}
 			size = uintptr(class_to_size[sizeclass])
-			span := c.alloc[sizeclass]
+			spc := makeSpanClass(sizeclass, noscan)
+			span := c.alloc[spc]
 			v := nextFreeFast(span)
 			if v == 0 {
-				v, span, shouldhelpgc = c.nextFree(sizeclass)
+				v, span, shouldhelpgc = c.nextFree(spc)
 			}
 			x = unsafe.Pointer(v)
 			if needzero && span.needzero != 0 {
@@ -687,7 +688,7 @@
 		var s *mspan
 		shouldhelpgc = true
 		systemstack(func() {
-			s = largeAlloc(size, needzero)
+			s = largeAlloc(size, needzero, noscan)
 		})
 		s.freeindex = 1
 		s.allocCount = 1
@@ -696,9 +697,7 @@
 	}
 
 	var scanSize uintptr
-	if noscan {
-		heapBitsSetTypeNoScan(uintptr(x))
-	} else {
+	if !noscan {
 		// If allocating a defer+arg block, now that we've picked a malloc size
 		// large enough to hold everything, cut the "asked for" size down to
 		// just the defer header, so that the GC bitmap will record the arg block
@@ -776,7 +775,7 @@
 	return x
 }
 
-func largeAlloc(size uintptr, needzero bool) *mspan {
+func largeAlloc(size uintptr, needzero bool, noscan bool) *mspan {
 	// print("largeAlloc size=", size, "\n")
 
 	if size+_PageSize < size {
@@ -792,7 +791,7 @@
 	// pays the debt down to npage pages.
 	deductSweepCredit(npages*_PageSize, npages)
 
-	s := mheap_.alloc(npages, 0, true, needzero)
+	s := mheap_.alloc(npages, makeSpanClass(0, noscan), true, needzero)
 	if s == nil {
 		throw("out of memory")
 	}
diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go
index 89d8a4c..3011e07 100644
--- a/src/runtime/mbitmap.go
+++ b/src/runtime/mbitmap.go
@@ -45,6 +45,11 @@
 // not checkmarked, and is the dead encoding.
 // These properties must be preserved when modifying the encoding.
 //
+// The bitmap for noscan spans is not maintained. Code must ensure
+// that an object is scannable before consulting its bitmap by
+// checking either the noscan bit in the span or by consulting its
+// type's information.
+//
 // Checkmarks
 //
 // In a concurrent garbage collector, one worries about failing to mark
@@ -509,16 +514,6 @@
 	return h.bits()&bitPointer != 0
 }
 
-// hasPointers reports whether the given object has any pointers.
-// It must be told how large the object at h is for efficiency.
-// h must describe the initial word of the object.
-func (h heapBits) hasPointers(size uintptr) bool {
-	if size == sys.PtrSize { // 1-word objects are always pointers
-		return true
-	}
-	return (*h.bitp>>h.shift)&bitScan != 0
-}
-
 // isCheckmarked reports whether the heap bits have the checkmarked bit set.
 // It must be told how large the object at h is, because the encoding of the
 // checkmark bit varies by size.
@@ -1363,13 +1358,6 @@
 	}
 }
 
-// heapBitsSetTypeNoScan marks x as noscan by setting the first word
-// of x in the heap bitmap to scalar/dead.
-func heapBitsSetTypeNoScan(x uintptr) {
-	h := heapBitsForAddr(uintptr(x))
-	*h.bitp &^= (bitPointer | bitScan) << h.shift
-}
-
 var debugPtrmask struct {
 	lock mutex
 	data *byte
diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go
index c483310..96fb273 100644
--- a/src/runtime/mcache.go
+++ b/src/runtime/mcache.go
@@ -33,7 +33,8 @@
 	local_tinyallocs uintptr // number of tiny allocs not counted in other stats
 
 	// The rest is not accessed on every malloc.
-	alloc [_NumSizeClasses]*mspan // spans to allocate from
+
+	alloc [numSpanClasses]*mspan // spans to allocate from, indexed by spanClass
 
 	stackcache [_NumStackOrders]stackfreelist
 
@@ -77,7 +78,7 @@
 	lock(&mheap_.lock)
 	c := (*mcache)(mheap_.cachealloc.alloc())
 	unlock(&mheap_.lock)
-	for i := 0; i < _NumSizeClasses; i++ {
+	for i := range c.alloc {
 		c.alloc[i] = &emptymspan
 	}
 	c.next_sample = nextSample()
@@ -103,12 +104,12 @@
 
 // Gets a span that has a free object in it and assigns it
 // to be the cached span for the given sizeclass. Returns this span.
-func (c *mcache) refill(sizeclass int32) *mspan {
+func (c *mcache) refill(spc spanClass) *mspan {
 	_g_ := getg()
 
 	_g_.m.locks++
 	// Return the current cached span to the central lists.
-	s := c.alloc[sizeclass]
+	s := c.alloc[spc]
 
 	if uintptr(s.allocCount) != s.nelems {
 		throw("refill of span with free space remaining")
@@ -119,7 +120,7 @@
 	}
 
 	// Get a new cached span from the central lists.
-	s = mheap_.central[sizeclass].mcentral.cacheSpan()
+	s = mheap_.central[spc].mcentral.cacheSpan()
 	if s == nil {
 		throw("out of memory")
 	}
@@ -128,13 +129,13 @@
 		throw("span has no free space")
 	}
 
-	c.alloc[sizeclass] = s
+	c.alloc[spc] = s
 	_g_.m.locks--
 	return s
 }
 
 func (c *mcache) releaseAll() {
-	for i := 0; i < _NumSizeClasses; i++ {
+	for i := range c.alloc {
 		s := c.alloc[i]
 		if s != &emptymspan {
 			mheap_.central[i].mcentral.uncacheSpan(s)
diff --git a/src/runtime/mcentral.go b/src/runtime/mcentral.go
index ddcf81e..7a14980 100644
--- a/src/runtime/mcentral.go
+++ b/src/runtime/mcentral.go
@@ -19,14 +19,14 @@
 //go:notinheap
 type mcentral struct {
 	lock      mutex
-	sizeclass int32
+	spanclass spanClass
 	nonempty  mSpanList // list of spans with a free object, ie a nonempty free list
 	empty     mSpanList // list of spans with no free objects (or cached in an mcache)
 }
 
 // Initialize a single central free list.
-func (c *mcentral) init(sizeclass int32) {
-	c.sizeclass = sizeclass
+func (c *mcentral) init(spc spanClass) {
+	c.spanclass = spc
 	c.nonempty.init()
 	c.empty.init()
 }
@@ -34,7 +34,7 @@
 // Allocate a span to use in an MCache.
 func (c *mcentral) cacheSpan() *mspan {
 	// Deduct credit for this span allocation and sweep if necessary.
-	spanBytes := uintptr(class_to_allocnpages[c.sizeclass]) * _PageSize
+	spanBytes := uintptr(class_to_allocnpages[c.spanclass.sizeclass()]) * _PageSize
 	deductSweepCredit(spanBytes, 0)
 
 	lock(&c.lock)
@@ -205,11 +205,11 @@
 
 // grow allocates a new empty span from the heap and initializes it for c's size class.
 func (c *mcentral) grow() *mspan {
-	npages := uintptr(class_to_allocnpages[c.sizeclass])
-	size := uintptr(class_to_size[c.sizeclass])
+	npages := uintptr(class_to_allocnpages[c.spanclass.sizeclass()])
+	size := uintptr(class_to_size[c.spanclass.sizeclass()])
 	n := (npages << _PageShift) / size
 
-	s := mheap_.alloc(npages, c.sizeclass, false, true)
+	s := mheap_.alloc(npages, c.spanclass, false, true)
 	if s == nil {
 		return nil
 	}
diff --git a/src/runtime/mfinal.go b/src/runtime/mfinal.go
index 7e191d4..44414bc 100644
--- a/src/runtime/mfinal.go
+++ b/src/runtime/mfinal.go
@@ -441,7 +441,7 @@
 	}
 
 	n = s.elemsize
-	if s.sizeclass != 0 {
+	if s.spanclass.sizeclass() != 0 {
 		x = add(x, (uintptr(v)-uintptr(x))/n*n)
 	}
 	return
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
index 64a2f3a..0117044b 100644
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -244,6 +244,7 @@
 	pad     [3]byte // compiler uses 32-bit load for "enabled" field
 	needed  bool    // whether we need a write barrier for current GC phase
 	cgo     bool    // whether we need a write barrier for a cgo check
+	roc     bool    // whether we need a write barrier for the ROC algorithm
 	alignme uint64  // guarantee alignment so that compiler can use a 32 or 64-bit load
 }
 
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index 85130bf..c01fe64 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -1268,7 +1268,7 @@
 			// paths), in which case we must *not* enqueue
 			// oblets since their bitmaps will be
 			// uninitialized.
-			if !hbits.hasPointers(n) {
+			if s.spanclass.noscan() {
 				// Bypass the whole scan.
 				gcw.bytesMarked += uint64(n)
 				return
@@ -1396,7 +1396,7 @@
 		atomic.Or8(mbits.bytep, mbits.mask)
 		// If this is a noscan object, fast-track it to black
 		// instead of greying it.
-		if !hbits.hasPointers(span.elemsize) {
+		if span.spanclass.noscan() {
 			gcw.bytesMarked += uint64(span.elemsize)
 			return
 		}
@@ -1429,7 +1429,7 @@
 		print(" s=nil\n")
 		return
 	}
-	print(" s.base()=", hex(s.base()), " s.limit=", hex(s.limit), " s.sizeclass=", s.sizeclass, " s.elemsize=", s.elemsize, " s.state=")
+	print(" s.base()=", hex(s.base()), " s.limit=", hex(s.limit), " s.spanclass=", s.spanclass, " s.elemsize=", s.elemsize, " s.state=")
 	if 0 <= s.state && int(s.state) < len(mSpanStateNames) {
 		print(mSpanStateNames[s.state], "\n")
 	} else {
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go
index fb5c488..722a8a5 100644
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -183,7 +183,7 @@
 
 	atomic.Xadd64(&mheap_.pagesSwept, int64(s.npages))
 
-	cl := s.sizeclass
+	spc := s.spanclass
 	size := s.elemsize
 	res := false
 	nfree := 0
@@ -277,7 +277,7 @@
 
 	// Count the number of free objects in this span.
 	nfree = s.countFree()
-	if cl == 0 && nfree != 0 {
+	if spc.sizeclass() == 0 && nfree != 0 {
 		s.needzero = 1
 		freeToHeap = true
 	}
@@ -318,9 +318,9 @@
 		atomic.Store(&s.sweepgen, sweepgen)
 	}
 
-	if nfreed > 0 && cl != 0 {
-		c.local_nsmallfree[cl] += uintptr(nfreed)
-		res = mheap_.central[cl].mcentral.freeSpan(s, preserve, wasempty)
+	if nfreed > 0 && spc.sizeclass() != 0 {
+		c.local_nsmallfree[spc.sizeclass()] += uintptr(nfreed)
+		res = mheap_.central[spc].mcentral.freeSpan(s, preserve, wasempty)
 		// MCentral_FreeSpan updates sweepgen
 	} else if freeToHeap {
 		// Free large span to heap
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index ef62eff..4cf08e4 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -98,7 +98,8 @@
 	// the padding makes sure that the MCentrals are
 	// spaced CacheLineSize bytes apart, so that each MCentral.lock
 	// gets its own cache line.
-	central [_NumSizeClasses]struct {
+	// central is indexed by spanClass.
+	central [numSpanClasses]struct {
 		mcentral mcentral
 		pad      [sys.CacheLineSize]byte
 	}
@@ -237,7 +238,7 @@
 	divMul      uint16     // for divide by elemsize - divMagic.mul
 	baseMask    uint16     // if non-0, elemsize is a power of 2, & this will get object allocation base
 	allocCount  uint16     // capacity - number of objects in freelist
-	sizeclass   uint8      // size class
+	spanclass   spanClass  // size class and noscan (uint8)
 	incache     bool       // being used by an mcache
 	state       mSpanState // mspaninuse etc
 	needzero    uint8      // needs to be zeroed before allocation
@@ -292,6 +293,31 @@
 	h.allspans = append(h.allspans, s)
 }
 
+// A spanClass represents the size class and noscan-ness of a span.
+//
+// Each size class has a noscan spanClass and a scan spanClass. The
+// noscan spanClass contains only noscan objects, which do not contain
+// pointers and thus do not need to be scanned by the garbage
+// collector.
+type spanClass uint8
+
+const (
+	numSpanClasses = _NumSizeClasses << 1
+	tinySpanClass  = spanClass(tinySizeClass<<1 | 1)
+)
+
+func makeSpanClass(sizeclass uint8, noscan bool) spanClass {
+	return spanClass(sizeclass<<1) | spanClass(bool2int(noscan))
+}
+
+func (sc spanClass) sizeclass() int8 {
+	return int8(sc >> 1)
+}
+
+func (sc spanClass) noscan() bool {
+	return sc&1 != 0
+}
+
 // inheap reports whether b is a pointer into a (potentially dead) heap object.
 // It returns false for pointers into stack spans.
 // Non-preemptible because it is used by write barriers.
@@ -376,7 +402,7 @@
 	}
 
 	p := s.base()
-	if s.sizeclass == 0 {
+	if s.spanclass.sizeclass() == 0 {
 		// Large object.
 		if base != nil {
 			*base = p
@@ -424,7 +450,7 @@
 	h.freelarge.init()
 	h.busylarge.init()
 	for i := range h.central {
-		h.central[i].mcentral.init(int32(i))
+		h.central[i].mcentral.init(spanClass(i))
 	}
 
 	sp := (*slice)(unsafe.Pointer(&h.spans))
@@ -533,7 +559,7 @@
 
 // Allocate a new span of npage pages from the heap for GC'd memory
 // and record its size class in the HeapMap and HeapMapCache.
-func (h *mheap) alloc_m(npage uintptr, sizeclass int32, large bool) *mspan {
+func (h *mheap) alloc_m(npage uintptr, spanclass spanClass, large bool) *mspan {
 	_g_ := getg()
 	if _g_ != _g_.m.g0 {
 		throw("_mheap_alloc not on g0 stack")
@@ -567,8 +593,8 @@
 		h.sweepSpans[h.sweepgen/2%2].push(s) // Add to swept in-use list.
 		s.state = _MSpanInUse
 		s.allocCount = 0
-		s.sizeclass = uint8(sizeclass)
-		if sizeclass == 0 {
+		s.spanclass = spanclass
+		if sizeclass := spanclass.sizeclass(); sizeclass == 0 {
 			s.elemsize = s.npages << _PageShift
 			s.divShift = 0
 			s.divMul = 0
@@ -618,13 +644,13 @@
 	return s
 }
 
-func (h *mheap) alloc(npage uintptr, sizeclass int32, large bool, needzero bool) *mspan {
+func (h *mheap) alloc(npage uintptr, spanclass spanClass, large bool, needzero bool) *mspan {
 	// Don't do any operations that lock the heap on the G stack.
 	// It might trigger stack growth, and the stack growth code needs
 	// to be able to allocate heap.
 	var s *mspan
 	systemstack(func() {
-		s = h.alloc_m(npage, sizeclass, large)
+		s = h.alloc_m(npage, spanclass, large)
 	})
 
 	if s != nil {
@@ -1020,7 +1046,7 @@
 	span.startAddr = base
 	span.npages = npages
 	span.allocCount = 0
-	span.sizeclass = 0
+	span.spanclass = 0
 	span.incache = false
 	span.elemsize = 0
 	span.state = _MSpanDead
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
index 41b9005..aaf16ac 100644
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -553,12 +553,12 @@
 		if s.state != mSpanInUse {
 			continue
 		}
-		if s.sizeclass == 0 {
+		if sizeclass := s.spanclass.sizeclass(); sizeclass == 0 {
 			memstats.nmalloc++
 			memstats.alloc += uint64(s.elemsize)
 		} else {
 			memstats.nmalloc += uint64(s.allocCount)
-			memstats.by_size[s.sizeclass].nmalloc += uint64(s.allocCount)
+			memstats.by_size[sizeclass].nmalloc += uint64(s.allocCount)
 			memstats.alloc += uint64(s.allocCount) * uint64(s.elemsize)
 		}
 	}
diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go
index 40c0e85..088c00e 100644
--- a/src/runtime/runtime1.go
+++ b/src/runtime/runtime1.go
@@ -324,6 +324,7 @@
 	gcrescanstacks    int32
 	gcstoptheworld    int32
 	gctrace           int32
+	gcroc             int32
 	invalidptr        int32
 	sbrk              int32
 	scavenge          int32
@@ -344,6 +345,7 @@
 	{"gcrescanstacks", &debug.gcrescanstacks},
 	{"gcstoptheworld", &debug.gcstoptheworld},
 	{"gctrace", &debug.gctrace},
+	{"gcroc", &debug.gcroc},
 	{"invalidptr", &debug.invalidptr},
 	{"sbrk", &debug.sbrk},
 	{"scavenge", &debug.scavenge},
@@ -409,6 +411,11 @@
 		writeBarrier.cgo = true
 		writeBarrier.enabled = true
 	}
+	// For the roc algorithm we turn on the write barrier at all times
+	if debug.gcroc >= 1 {
+		writeBarrier.roc = true
+		writeBarrier.enabled = true
+	}
 }
 
 //go:linkname setTraceback runtime/debug.SetTraceback
diff --git a/src/runtime/stubs.go b/src/runtime/stubs.go
index 107f260..ff2b27e 100644
--- a/src/runtime/stubs.go
+++ b/src/runtime/stubs.go
@@ -295,3 +295,10 @@
 
 func memequal_varlen(a, b unsafe.Pointer) bool
 func eqstring(s1, s2 string) bool
+
+// bool2int returns 0 if x is false or 1 if x is true.
+func bool2int(x bool) int {
+	// Avoid branches. In the SSA compiler, this compiles to
+	// exactly what you would want it to.
+	return int(uint8(*(*uint8)(unsafe.Pointer(&x))))
+}