runtime: factor out bitmap, finalizer code from malloc/mgc

The code in mfinal.go is moved from malloc*.go and mgc*.go
and substantially unchanged.

The code in mbitmap.go is also moved from those files, but
cleaned up so that it can be called from those files (in most cases
the code being moved was not already a standalone function).
I also renamed the constants and wrote comments describing
the format. The result is a significant cleanup and isolation of
the bitmap code, but, roughly speaking, it should be treated
and reviewed as new code.

The other files changed only as much as necessary to support
this code movement.

This CL does NOT change the semantics of the heap or type
bitmaps at all, although there are now some obvious opportunities
to do so in followup CLs.

Change-Id: I41b8d5de87ad1d3cd322709931ab25e659dbb21d
Reviewed-on: https://go-review.googlesource.com/2991
Reviewed-by: Keith Randall <khr@golang.org>
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 90cf736..223220a 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -20,14 +20,6 @@
 	pageSize  = _PageSize
 	pageMask  = _PageMask
 
-	bitsPerPointer  = _BitsPerPointer
-	bitsMask        = _BitsMask
-	pointersPerByte = _PointersPerByte
-	maxGCMask       = _MaxGCMask
-	bitsDead        = _BitsDead
-	bitsPointer     = _BitsPointer
-	bitsScalar      = _BitsScalar
-
 	mSpanInUse = _MSpanInUse
 
 	concurrentSweep = _ConcurrentSweep
@@ -63,27 +55,18 @@
 	if size == 0 {
 		return unsafe.Pointer(&zerobase)
 	}
-	size0 := size
+	dataSize := size
 
 	if flags&flagNoScan == 0 && typ == nil {
 		throw("malloc missing type")
 	}
 
-	// This function must be atomic wrt GC, but for performance reasons
-	// we don't acquirem/releasem on fast path. The code below does not have
-	// split stack checks, so it can't be preempted by GC.
-	// Functions like roundup/add are inlined. And systemstack/racemalloc are nosplit.
-	// If debugMalloc = true, these assumptions are checked below.
-	if debugMalloc {
-		mp := acquirem()
-		if mp.mallocing != 0 {
-			throw("malloc deadlock")
-		}
-		mp.mallocing = 1
-		if mp.curg != nil {
-			mp.curg.stackguard0 = ^uintptr(0xfff) | 0xbad
-		}
+	// Set mp.mallocing to keep from being preempted by GC.
+	mp := acquirem()
+	if mp.mallocing != 0 {
+		throw("malloc deadlock")
 	}
+	mp.mallocing = 1
 
 	c := gomcache()
 	var s *mspan
@@ -133,20 +116,8 @@
 				x = add(c.tiny, off)
 				c.tinyoffset = off + size
 				c.local_tinyallocs++
-				if debugMalloc {
-					mp := acquirem()
-					if mp.mallocing == 0 {
-						throw("bad malloc")
-					}
-					mp.mallocing = 0
-					if mp.curg != nil {
-						mp.curg.stackguard0 = mp.curg.stack.lo + _StackGuard
-					}
-					// Note: one releasem for the acquirem just above.
-					// The other for the acquirem at start of malloc.
-					releasem(mp)
-					releasem(mp)
-				}
+				mp.mallocing = 0
+				releasem(mp)
 				return x
 			}
 			// Allocate a new maxTinySize block.
@@ -214,108 +185,20 @@
 	}
 
 	if flags&flagNoScan != 0 {
-		// All objects are pre-marked as noscan.
-		goto marked
+		// All objects are pre-marked as noscan. Nothing to do.
+	} else {
+		// If allocating a defer+arg block, now that we've picked a malloc size
+		// large enough to hold everything, cut the "asked for" size down to
+		// just the defer header, so that the GC bitmap will record the arg block
+		// as containing nothing at all (as if it were unused space at the end of
+		// a malloc block caused by size rounding).
+		// The defer arg areas are scanned as part of scanstack.
+		if typ == deferType {
+			dataSize = unsafe.Sizeof(_defer{})
+		}
+		heapBitsSetType(uintptr(x), size, dataSize, typ)
 	}
 
-	// If allocating a defer+arg block, now that we've picked a malloc size
-	// large enough to hold everything, cut the "asked for" size down to
-	// just the defer header, so that the GC bitmap will record the arg block
-	// as containing nothing at all (as if it were unused space at the end of
-	// a malloc block caused by size rounding).
-	// The defer arg areas are scanned as part of scanstack.
-	if typ == deferType {
-		size0 = unsafe.Sizeof(_defer{})
-	}
-
-	// From here till marked label marking the object as allocated
-	// and storing type info in the GC bitmap.
-	{
-		arena_start := uintptr(unsafe.Pointer(mheap_.arena_start))
-		off := (uintptr(x) - arena_start) / ptrSize
-		xbits := (*uint8)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1))
-		shift := (off % wordsPerBitmapByte) * gcBits
-		if debugMalloc && ((*xbits>>shift)&(bitMask|bitPtrMask)) != bitBoundary {
-			println("runtime: bits =", (*xbits>>shift)&(bitMask|bitPtrMask))
-			throw("bad bits in markallocated")
-		}
-
-		var ti, te uintptr
-		var ptrmask *uint8
-		if size == ptrSize {
-			// It's one word and it has pointers, it must be a pointer.
-			// The bitmap byte is shared with the one-word object
-			// next to it, and concurrent GC might be marking that
-			// object, so we must use an atomic update.
-			atomicor8(xbits, (bitsPointer<<2)<<shift)
-			goto marked
-		}
-		if typ.kind&kindGCProg != 0 {
-			nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize
-			masksize := nptr
-			if masksize%2 != 0 {
-				masksize *= 2 // repeated
-			}
-			masksize = masksize * pointersPerByte / 8 // 4 bits per word
-			masksize++                                // unroll flag in the beginning
-			if masksize > maxGCMask && typ.gc[1] != 0 {
-				// write barriers have not been updated to deal with this case yet.
-				throw("maxGCMask too small for now")
-				// If the mask is too large, unroll the program directly
-				// into the GC bitmap. It's 7 times slower than copying
-				// from the pre-unrolled mask, but saves 1/16 of type size
-				// memory for the mask.
-				systemstack(func() {
-					unrollgcproginplace_m(x, typ, size, size0)
-				})
-				goto marked
-			}
-			ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
-			// Check whether the program is already unrolled
-			// by checking if the unroll flag byte is set
-			maskword := uintptr(atomicloadp(unsafe.Pointer(ptrmask)))
-			if *(*uint8)(unsafe.Pointer(&maskword)) == 0 {
-				systemstack(func() {
-					unrollgcprog_m(typ)
-				})
-			}
-			ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
-		} else {
-			ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask
-		}
-		if size == 2*ptrSize {
-			*xbits = *ptrmask | bitBoundary
-			goto marked
-		}
-		te = uintptr(typ.size) / ptrSize
-		// If the type occupies odd number of words, its mask is repeated.
-		if te%2 == 0 {
-			te /= 2
-		}
-		// Copy pointer bitmask into the bitmap.
-		for i := uintptr(0); i < size0; i += 2 * ptrSize {
-			v := *(*uint8)(add(unsafe.Pointer(ptrmask), ti))
-			ti++
-			if ti == te {
-				ti = 0
-			}
-			if i == 0 {
-				v |= bitBoundary
-			}
-			if i+ptrSize == size0 {
-				v &^= uint8(bitPtrMask << 4)
-			}
-
-			*xbits = v
-			xbits = (*byte)(add(unsafe.Pointer(xbits), ^uintptr(0)))
-		}
-		if size0%(2*ptrSize) == 0 && size0 < size {
-			// Mark the word after last object's word as bitsDead.
-			*xbits = bitsDead << 2
-		}
-	}
-marked:
-
 	// GCmarkterminate allocates black
 	// All slots hold nil so no scanning is needed.
 	// This may be racing with GC so do it atomically if there can be
@@ -334,20 +217,8 @@
 		racemalloc(x, size)
 	}
 
-	if debugMalloc {
-		mp := acquirem()
-		if mp.mallocing == 0 {
-			throw("bad malloc")
-		}
-		mp.mallocing = 0
-		if mp.curg != nil {
-			mp.curg.stackguard0 = mp.curg.stack.lo + _StackGuard
-		}
-		// Note: one releasem for the acquirem just above.
-		// The other for the acquirem at start of malloc.
-		releasem(mp)
-		releasem(mp)
-	}
+	mp.mallocing = 0
+	releasem(mp)
 
 	if debug.allocfreetrace != 0 {
 		tracealloc(x, size, typ)
@@ -377,36 +248,6 @@
 	return x
 }
 
-func loadPtrMask(typ *_type) []uint8 {
-	var ptrmask *uint8
-	nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize
-	if typ.kind&kindGCProg != 0 {
-		masksize := nptr
-		if masksize%2 != 0 {
-			masksize *= 2 // repeated
-		}
-		masksize = masksize * pointersPerByte / 8 // 4 bits per word
-		masksize++                                // unroll flag in the beginning
-		if masksize > maxGCMask && typ.gc[1] != 0 {
-			// write barriers have not been updated to deal with this case yet.
-			throw("maxGCMask too small for now")
-		}
-		ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
-		// Check whether the program is already unrolled
-		// by checking if the unroll flag byte is set
-		maskword := uintptr(atomicloadp(unsafe.Pointer(ptrmask)))
-		if *(*uint8)(unsafe.Pointer(&maskword)) == 0 {
-			systemstack(func() {
-				unrollgcprog_m(typ)
-			})
-		}
-		ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
-	} else {
-		ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask
-	}
-	return (*[1 << 30]byte)(unsafe.Pointer(ptrmask))[:(nptr+1)/2]
-}
-
 // implementation of new builtin
 func newobject(typ *_type) unsafe.Pointer {
 	flags := uint32(0)
@@ -724,288 +565,11 @@
 var noptrbss struct{}
 var enoptrbss struct{}
 
-// SetFinalizer sets the finalizer associated with x to f.
-// When the garbage collector finds an unreachable block
-// with an associated finalizer, it clears the association and runs
-// f(x) in a separate goroutine.  This makes x reachable again, but
-// now without an associated finalizer.  Assuming that SetFinalizer
-// is not called again, the next time the garbage collector sees
-// that x is unreachable, it will free x.
-//
-// SetFinalizer(x, nil) clears any finalizer associated with x.
-//
-// The argument x must be a pointer to an object allocated by
-// calling new or by taking the address of a composite literal.
-// The argument f must be a function that takes a single argument
-// to which x's type can be assigned, and can have arbitrary ignored return
-// values. If either of these is not true, SetFinalizer aborts the
-// program.
-//
-// Finalizers are run in dependency order: if A points at B, both have
-// finalizers, and they are otherwise unreachable, only the finalizer
-// for A runs; once A is freed, the finalizer for B can run.
-// If a cyclic structure includes a block with a finalizer, that
-// cycle is not guaranteed to be garbage collected and the finalizer
-// is not guaranteed to run, because there is no ordering that
-// respects the dependencies.
-//
-// The finalizer for x is scheduled to run at some arbitrary time after
-// x becomes unreachable.
-// There is no guarantee that finalizers will run before a program exits,
-// so typically they are useful only for releasing non-memory resources
-// associated with an object during a long-running program.
-// For example, an os.File object could use a finalizer to close the
-// associated operating system file descriptor when a program discards
-// an os.File without calling Close, but it would be a mistake
-// to depend on a finalizer to flush an in-memory I/O buffer such as a
-// bufio.Writer, because the buffer would not be flushed at program exit.
-//
-// It is not guaranteed that a finalizer will run if the size of *x is
-// zero bytes.
-//
-// It is not guaranteed that a finalizer will run for objects allocated
-// in initializers for package-level variables. Such objects may be
-// linker-allocated, not heap-allocated.
-//
-// A single goroutine runs all finalizers for a program, sequentially.
-// If a finalizer must run for a long time, it should do so by starting
-// a new goroutine.
-func SetFinalizer(obj interface{}, finalizer interface{}) {
-	e := (*eface)(unsafe.Pointer(&obj))
-	etyp := e._type
-	if etyp == nil {
-		throw("runtime.SetFinalizer: first argument is nil")
-	}
-	if etyp.kind&kindMask != kindPtr {
-		throw("runtime.SetFinalizer: first argument is " + *etyp._string + ", not pointer")
-	}
-	ot := (*ptrtype)(unsafe.Pointer(etyp))
-	if ot.elem == nil {
-		throw("nil elem type!")
-	}
-
-	// find the containing object
-	_, base, _ := findObject(e.data)
-
-	if base == nil {
-		// 0-length objects are okay.
-		if e.data == unsafe.Pointer(&zerobase) {
-			return
-		}
-
-		// Global initializers might be linker-allocated.
-		//	var Foo = &Object{}
-		//	func main() {
-		//		runtime.SetFinalizer(Foo, nil)
-		//	}
-		// The relevant segments are: noptrdata, data, bss, noptrbss.
-		// We cannot assume they are in any order or even contiguous,
-		// due to external linking.
-		if uintptr(unsafe.Pointer(&noptrdata)) <= uintptr(e.data) && uintptr(e.data) < uintptr(unsafe.Pointer(&enoptrdata)) ||
-			uintptr(unsafe.Pointer(&data)) <= uintptr(e.data) && uintptr(e.data) < uintptr(unsafe.Pointer(&edata)) ||
-			uintptr(unsafe.Pointer(&bss)) <= uintptr(e.data) && uintptr(e.data) < uintptr(unsafe.Pointer(&ebss)) ||
-			uintptr(unsafe.Pointer(&noptrbss)) <= uintptr(e.data) && uintptr(e.data) < uintptr(unsafe.Pointer(&enoptrbss)) {
-			return
-		}
-		throw("runtime.SetFinalizer: pointer not in allocated block")
-	}
-
-	if e.data != base {
-		// As an implementation detail we allow to set finalizers for an inner byte
-		// of an object if it could come from tiny alloc (see mallocgc for details).
-		if ot.elem == nil || ot.elem.kind&kindNoPointers == 0 || ot.elem.size >= maxTinySize {
-			throw("runtime.SetFinalizer: pointer not at beginning of allocated block")
-		}
-	}
-
-	f := (*eface)(unsafe.Pointer(&finalizer))
-	ftyp := f._type
-	if ftyp == nil {
-		// switch to system stack and remove finalizer
-		systemstack(func() {
-			removefinalizer(e.data)
-		})
-		return
-	}
-
-	if ftyp.kind&kindMask != kindFunc {
-		throw("runtime.SetFinalizer: second argument is " + *ftyp._string + ", not a function")
-	}
-	ft := (*functype)(unsafe.Pointer(ftyp))
-	ins := *(*[]*_type)(unsafe.Pointer(&ft.in))
-	if ft.dotdotdot || len(ins) != 1 {
-		throw("runtime.SetFinalizer: cannot pass " + *etyp._string + " to finalizer " + *ftyp._string)
-	}
-	fint := ins[0]
-	switch {
-	case fint == etyp:
-		// ok - same type
-		goto okarg
-	case fint.kind&kindMask == kindPtr:
-		if (fint.x == nil || fint.x.name == nil || etyp.x == nil || etyp.x.name == nil) && (*ptrtype)(unsafe.Pointer(fint)).elem == ot.elem {
-			// ok - not same type, but both pointers,
-			// one or the other is unnamed, and same element type, so assignable.
-			goto okarg
-		}
-	case fint.kind&kindMask == kindInterface:
-		ityp := (*interfacetype)(unsafe.Pointer(fint))
-		if len(ityp.mhdr) == 0 {
-			// ok - satisfies empty interface
-			goto okarg
-		}
-		if assertE2I2(ityp, obj, nil) {
-			goto okarg
-		}
-	}
-	throw("runtime.SetFinalizer: cannot pass " + *etyp._string + " to finalizer " + *ftyp._string)
-okarg:
-	// compute size needed for return parameters
-	nret := uintptr(0)
-	for _, t := range *(*[]*_type)(unsafe.Pointer(&ft.out)) {
-		nret = round(nret, uintptr(t.align)) + uintptr(t.size)
-	}
-	nret = round(nret, ptrSize)
-
-	// make sure we have a finalizer goroutine
-	createfing()
-
-	systemstack(func() {
-		if !addfinalizer(e.data, (*funcval)(f.data), nret, fint, ot) {
-			throw("runtime.SetFinalizer: finalizer already set")
-		}
-	})
-}
-
 // round n up to a multiple of a.  a must be a power of 2.
 func round(n, a uintptr) uintptr {
 	return (n + a - 1) &^ (a - 1)
 }
 
-// Look up pointer v in heap.  Return the span containing the object,
-// the start of the object, and the size of the object.  If the object
-// does not exist, return nil, nil, 0.
-func findObject(v unsafe.Pointer) (s *mspan, x unsafe.Pointer, n uintptr) {
-	c := gomcache()
-	c.local_nlookup++
-	if ptrSize == 4 && c.local_nlookup >= 1<<30 {
-		// purge cache stats to prevent overflow
-		lock(&mheap_.lock)
-		purgecachedstats(c)
-		unlock(&mheap_.lock)
-	}
-
-	// find span
-	arena_start := uintptr(unsafe.Pointer(mheap_.arena_start))
-	arena_used := uintptr(unsafe.Pointer(mheap_.arena_used))
-	if uintptr(v) < arena_start || uintptr(v) >= arena_used {
-		return
-	}
-	p := uintptr(v) >> pageShift
-	q := p - arena_start>>pageShift
-	s = *(**mspan)(add(unsafe.Pointer(mheap_.spans), q*ptrSize))
-	if s == nil {
-		return
-	}
-	x = unsafe.Pointer(uintptr(s.start) << pageShift)
-
-	if uintptr(v) < uintptr(x) || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != mSpanInUse {
-		s = nil
-		x = nil
-		return
-	}
-
-	n = uintptr(s.elemsize)
-	if s.sizeclass != 0 {
-		x = add(x, (uintptr(v)-uintptr(x))/n*n)
-	}
-	return
-}
-
-var fingCreate uint32
-
-func createfing() {
-	// start the finalizer goroutine exactly once
-	if fingCreate == 0 && cas(&fingCreate, 0, 1) {
-		go runfinq()
-	}
-}
-
-// This is the goroutine that runs all of the finalizers
-func runfinq() {
-	var (
-		frame    unsafe.Pointer
-		framecap uintptr
-	)
-
-	for {
-		lock(&finlock)
-		fb := finq
-		finq = nil
-		if fb == nil {
-			gp := getg()
-			fing = gp
-			fingwait = true
-			gp.issystem = true
-			goparkunlock(&finlock, "finalizer wait")
-			gp.issystem = false
-			continue
-		}
-		unlock(&finlock)
-		if raceenabled {
-			racefingo()
-		}
-		for fb != nil {
-			for i := int32(0); i < fb.cnt; i++ {
-				f := (*finalizer)(add(unsafe.Pointer(&fb.fin), uintptr(i)*unsafe.Sizeof(finalizer{})))
-
-				framesz := unsafe.Sizeof((interface{})(nil)) + uintptr(f.nret)
-				if framecap < framesz {
-					// The frame does not contain pointers interesting for GC,
-					// all not yet finalized objects are stored in finq.
-					// If we do not mark it as FlagNoScan,
-					// the last finalized object is not collected.
-					frame = mallocgc(framesz, nil, flagNoScan)
-					framecap = framesz
-				}
-
-				if f.fint == nil {
-					throw("missing type in runfinq")
-				}
-				switch f.fint.kind & kindMask {
-				case kindPtr:
-					// direct use of pointer
-					*(*unsafe.Pointer)(frame) = f.arg
-				case kindInterface:
-					ityp := (*interfacetype)(unsafe.Pointer(f.fint))
-					// set up with empty interface
-					(*eface)(frame)._type = &f.ot.typ
-					(*eface)(frame).data = f.arg
-					if len(ityp.mhdr) != 0 {
-						// convert to interface with methods
-						// this conversion is guaranteed to succeed - we checked in SetFinalizer
-						assertE2I(ityp, *(*interface{})(frame), (*fInterface)(frame))
-					}
-				default:
-					throw("bad kind in runfinq")
-				}
-				reflectcall(nil, unsafe.Pointer(f.fn), frame, uint32(framesz), uint32(framesz))
-
-				// drop finalizer queue references to finalized object
-				f.fn = nil
-				f.arg = nil
-				f.ot = nil
-			}
-			fb.cnt = 0
-			next := fb.next
-			lock(&finlock)
-			fb.next = finc
-			finc = fb
-			unlock(&finlock)
-			fb = next
-		}
-	}
-}
-
 var persistent struct {
 	lock mutex
 	base unsafe.Pointer