[dev.power64] runtime: make all bitmaps arrays of bytes
The "simpler faster garbage collector" is full of little-endian assumptions.
Instead of trying to correct all the mistakes, just give in and make
everything use bytes.
LGTM=minux
R=minux
CC=dvyukov, golang-codereviews
https://golang.org/cl/124400043
diff --git a/src/pkg/runtime/heapdump.c b/src/pkg/runtime/heapdump.c
index eec34f2..f29cf01 100644
--- a/src/pkg/runtime/heapdump.c
+++ b/src/pkg/runtime/heapdump.c
@@ -252,7 +252,7 @@
uintptr i;
for(i = 0; i < bv->n; i += BitsPerPointer) {
- switch(bv->data[i/32] >> i%32 & 3) {
+ switch(bv->bytedata[i/8] >> i%8 & 3) {
case BitsDead:
case BitsScalar:
break;
@@ -261,7 +261,7 @@
dumpint(offset + i / BitsPerPointer * PtrSize);
break;
case BitsMultiWord:
- switch(bv->data[(i+BitsPerPointer)/32] >> (i+BitsPerPointer)%32 & 3) {
+ switch(bv->bytedata[(i+BitsPerPointer)/8] >> (i+BitsPerPointer)%8 & 3) {
case BitsString:
dumpint(FieldKindString);
dumpint(offset + i / BitsPerPointer * PtrSize);
@@ -497,13 +497,13 @@
dumpint(TagData);
dumpint((uintptr)data);
dumpmemrange(data, edata - data);
- dumpfields((BitVector){(edata - data)*8, (uint32*)gcdata});
+ dumpfields((BitVector){(edata - data)*8, (byte*)gcdata}); /* WRONG! gcbss is not a bitmap */
// bss segment
dumpint(TagBss);
dumpint((uintptr)bss);
dumpmemrange(bss, ebss - bss);
- dumpfields((BitVector){(ebss - bss)*8, (uint32*)gcbss});
+ dumpfields((BitVector){(ebss - bss)*8, (byte*)gcbss}); /* WRONG! gcbss is not a bitmap */
// MSpan.types
allspans = runtime·mheap.allspans;
@@ -795,9 +795,9 @@
uintptr i;
for(i = 0; i < bv->n; i += BitsPerPointer) {
- if((bv->data[i/32] >> i%32 & 3) != BitsMultiWord)
+ if((bv->bytedata[i/8] >> i%8 & 3) != BitsMultiWord)
continue;
- switch(bv->data[(i+BitsPerPointer)/32] >> (i+BitsPerPointer)%32 & 3) {
+ switch(bv->bytedata[(i+BitsPerPointer)/8] >> (i+BitsPerPointer)%8 & 3) {
case BitsString:
case BitsIface:
i += BitsPerPointer;
@@ -843,5 +843,5 @@
tmpbuf[i*BitsPerPointer/8] &= ~(3<<((i*BitsPerPointer)%8));
tmpbuf[i*BitsPerPointer/8] |= bits<<((i*BitsPerPointer)%8);
}
- return (BitVector){i*BitsPerPointer, (uint32*)tmpbuf};
+ return (BitVector){i*BitsPerPointer, (byte*)tmpbuf};
}
diff --git a/src/pkg/runtime/malloc.go b/src/pkg/runtime/malloc.go
index 68baa80..df03079 100644
--- a/src/pkg/runtime/malloc.go
+++ b/src/pkg/runtime/malloc.go
@@ -4,9 +4,7 @@
package runtime
-import (
- "unsafe"
-)
+import "unsafe"
const (
flagNoScan = 1 << 0 // GC doesn't have to scan object
@@ -278,7 +276,10 @@
// force = 1 - do GC regardless of current heap usage
// force = 2 - go GC and eager sweep
func gogc(force int32) {
- if GOARCH == "power64" || GOARCH == "power64le" || memstats.enablegc == 0 {
+ if false && (GOARCH == "power64" || GOARCH == "power64le") {
+ return
+ }
+ if memstats.enablegc == 0 {
return
}
diff --git a/src/pkg/runtime/malloc.h b/src/pkg/runtime/malloc.h
index 1e26509..4198841 100644
--- a/src/pkg/runtime/malloc.h
+++ b/src/pkg/runtime/malloc.h
@@ -568,14 +568,14 @@
struct BitVector
{
int32 n; // # of bits
- uint32 *data;
+ uint8 *bytedata;
};
typedef struct StackMap StackMap;
struct StackMap
{
int32 n; // number of bitmaps
int32 nbit; // number of bits in each bitmap
- uint32 data[];
+ uint8 bytedata[];
};
// Returns pointer map data for the given stackmap index
// (the index is encoded in PCDATA_StackMapIndex).
diff --git a/src/pkg/runtime/mgc0.c b/src/pkg/runtime/mgc0.c
index 03622db..b1a8943 100644
--- a/src/pkg/runtime/mgc0.c
+++ b/src/pkg/runtime/mgc0.c
@@ -64,7 +64,7 @@
enum {
Debug = 0,
- ConcurrentSweep = 1,
+ ConcurrentSweep = 0,
PreciseScan = 1,
WorkbufSize = 4*1024,
@@ -75,6 +75,12 @@
RootSpans = 3,
RootFlushCaches = 4,
RootCount = 5,
+
+#ifdef _64BIT
+ byteEndian = BigEndian*7,
+#else
+ byteEndian = BigEndian*3,
+#endif
};
#define ScanConservatively ((byte*)1)
@@ -669,7 +675,7 @@
{
if(n < 0 || n >= stackmap->n)
runtime·throw("stackmapdata: index out of range");
- return (BitVector){stackmap->nbit, stackmap->data + n*((stackmap->nbit+31)/32)};
+ return (BitVector){stackmap->nbit, stackmap->bytedata + 4*n*((stackmap->nbit+31)/32)};
}
// Scan a stack frame: local variables and function arguments/results.
@@ -727,7 +733,7 @@
}
bv = runtime·stackmapdata(stackmap, pcdata);
size = (bv.n * PtrSize) / BitsPerPointer;
- scanblock(frame->varp - size, bv.n/BitsPerPointer*PtrSize, (byte*)bv.data);
+ scanblock(frame->varp - size, bv.n/BitsPerPointer*PtrSize, (byte*)bv.bytedata);
}
// Scan arguments.
@@ -735,7 +741,7 @@
stackmap = runtime·funcdata(f, FUNCDATA_ArgsPointerMaps);
if(stackmap != nil) {
bv = runtime·stackmapdata(stackmap, pcdata);
- scanblock(frame->argp, bv.n/BitsPerPointer*PtrSize, (byte*)bv.data);
+ scanblock(frame->argp, bv.n/BitsPerPointer*PtrSize, (byte*)bv.bytedata);
} else {
if(Debug > 2)
runtime·printf("frame %s conservative args %p+%p\n", runtime·funcname(f), frame->argp, (uintptr)frame->arglen);
@@ -1292,6 +1298,8 @@
struct gc_args a;
int32 i;
+//if(thechar == '9') return;
+
if(sizeof(Workbuf) != WorkbufSize)
runtime·throw("runtime: size of Workbuf is suboptimal");
// The gc is turned off (via enablegc) until
@@ -1305,10 +1313,6 @@
if(!mstats.enablegc || g == g->m->g0 || g->m->locks > 0 || runtime·panicking)
return;
- if(thechar == '9') {
- runtime·gcpercent = -1;
- return;
- }
if(runtime·gcpercent == GcpercentUnknown) { // first time through
runtime·lock(&runtime·mheap);
if(runtime·gcpercent == GcpercentUnknown)
@@ -1777,8 +1781,8 @@
b = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
shift = (off % wordsPerBitmapWord) * gcBits;
if((shift%8)==0)
- ((byte*)b)[shift/8] = 0;
- ((byte*)b)[shift/8] |= v<<((shift%8)+2);
+ ((byte*)b)[(shift/8)^byteEndian] = 0;
+ ((byte*)b)[(shift/8)^byteEndian] |= v<<((shift%8)+2);
pos += PtrSize;
} else if(sparse) {
// 4-bits per word
@@ -1873,7 +1877,7 @@
static Lock lock;
byte *mask, *prog;
uintptr pos;
- uint32 x;
+ uintptr x;
runtime·lock(&lock);
mask = (byte*)typ->gc[0];
@@ -1888,9 +1892,11 @@
prog = (byte*)typ->gc[1];
unrollgcprog1(mask, prog, &pos, false, true);
}
+
// atomic way to say mask[0] = 1
- x = ((uint32*)mask)[0];
- runtime·atomicstore((uint32*)mask, x|1);
+ x = typ->gc[0];
+ ((byte*)&x)[0] = 1;
+ runtime·atomicstorep((void**)mask, (void*)x);
}
runtime·unlock(&lock);
}
@@ -1898,7 +1904,7 @@
void
runtime·markallocated(void *v, uintptr size, uintptr size0, Type *typ, bool scan)
{
- uintptr *b, off, shift, i, ti, te, nptr, masksize;
+ uintptr *b, off, shift, i, ti, te, nptr, masksize, maskword;
byte *arena_start, x;
bool *ptrmask;
@@ -1907,7 +1913,7 @@
b = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
shift = (off % wordsPerBitmapWord) * gcBits;
if(Debug && (((*b)>>shift)&bitMask) != bitBoundary) {
- runtime·printf("runtime: bad bits in markallocated (%p) b=%p[%p]\n", v, b, *b);
+ runtime·printf("runtime: bad bits in markallocated (%p) b=%p[%p] off=%p shift=%d\n", v, b, *b, off, (int32)shift);
runtime·throw("bad bits in markallocated");
}
@@ -1916,7 +1922,7 @@
if(size == PtrSize)
*b = (*b & ~((bitBoundary|bitPtrMask)<<shift)) | ((bitAllocated+(BitsDead<<2))<<shift);
else
- ((byte*)b)[shift/8] = bitAllocated+(BitsDead<<2);
+ ((byte*)b)[(shift/8)^byteEndian] = bitAllocated+(BitsDead<<2);
return;
}
if(size == PtrSize) {
@@ -1944,13 +1950,14 @@
}
ptrmask = (byte*)typ->gc[0];
// check whether the program is already unrolled
- if((runtime·atomicload((uint32*)ptrmask)&0xff) == 0)
+ maskword = (uintptr)runtime·atomicloadp((void*)&typ->gc[0]);
+ if(((byte*)&maskword)[0] == 0)
unrollgcprog(typ);
ptrmask++; // skip the unroll flag byte
} else
ptrmask = (byte*)&typ->gc[0]; // embed mask
if(size == 2*PtrSize) {
- ((byte*)b)[shift/8] = ptrmask[0] | bitAllocated;
+ ((byte*)b)[(shift/8)^byteEndian] = ptrmask[0] | bitAllocated;
return;
}
te = typ->size/PtrSize;
@@ -1959,7 +1966,7 @@
te /= 2;
}
if(size == 2*PtrSize) {
- ((byte*)b)[shift/8] = (BitsPointer<<2) | (BitsPointer<<6) | bitAllocated;
+ ((byte*)b)[(shift/8)^byteEndian] = (BitsPointer<<2) | (BitsPointer<<6) | bitAllocated;
return;
}
// Copy pointer bitmask into the bitmap.
@@ -1977,14 +1984,14 @@
x |= bitAllocated;
if(i+PtrSize == size0)
x &= ~(bitPtrMask<<4);
- ((byte*)b)[shift/8] = x;
+ ((byte*)b)[(shift/8)^byteEndian] = x;
}
if(size0 == i && size0 < size) {
// mark the word after last object's word as BitsDead
off = (uintptr*)((byte*)v + size0) - (uintptr*)arena_start;
b = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
shift = (off % wordsPerBitmapWord) * gcBits;
- ((byte*)b)[shift/8] = 0;
+ ((byte*)b)[(shift/8)^byteEndian] = 0;
}
}
@@ -2174,7 +2181,7 @@
*mask = runtime·mallocgc(*len, nil, 0);
for(i = 0; i < n; i += PtrSize) {
off = (p+i-frame.varp+size)/PtrSize;
- bits = (bv.data[off/PointersPerByte] >> ((off%PointersPerByte)*BitsPerPointer))&BitsMask;
+ bits = (bv.bytedata[off/PointersPerByte] >> ((off%PointersPerByte)*BitsPerPointer))&BitsMask;
(*mask)[i/PtrSize] = bits;
}
}
diff --git a/src/pkg/runtime/mgc0.h b/src/pkg/runtime/mgc0.h
index 3b1c5ba..99271a5 100644
--- a/src/pkg/runtime/mgc0.h
+++ b/src/pkg/runtime/mgc0.h
@@ -7,6 +7,13 @@
enum {
ScanStackByFrames = 1,
+ // TODO(rsc): Half the code in the garbage collector
+ // now accesses the bitmap as an array of bytes
+ // instead of as an array of uintptrs.
+ // This is tricky to do correctly in a portable fashion.
+ // (It breaks on big-endian systems.)
+ // Should we just make the bitmap a byte array?
+
// Four bits per word (see #defines below).
wordsPerBitmapWord = sizeof(void*)*8/4,
gcBits = 4,
diff --git a/src/pkg/runtime/stack.c b/src/pkg/runtime/stack.c
index 49ecd6c..88f2440 100644
--- a/src/pkg/runtime/stack.c
+++ b/src/pkg/runtime/stack.c
@@ -543,8 +543,8 @@
num = bv->n / BitsPerPointer;
for(i = 0; i < num; i++) {
if(StackDebug >= 4)
- runtime·printf(" %p:%s:%p\n", &scanp[i], mapnames[bv->data[i / (32 / BitsPerPointer)] >> (i * BitsPerPointer & 31) & 3], scanp[i]);
- switch(bv->data[i / (32 / BitsPerPointer)] >> (i * BitsPerPointer & 31) & 3) {
+ runtime·printf(" %p:%s:%p\n", &scanp[i], mapnames[bv->bytedata[i / (8 / BitsPerPointer)] >> (i * BitsPerPointer & 7) & 3], scanp[i]);
+ switch(bv->bytedata[i / (8 / BitsPerPointer)] >> (i * BitsPerPointer & 7) & 3) {
case BitsDead:
if(runtime·debug.gcdead)
scanp[i] = (byte*)PoisonStack;
@@ -567,7 +567,7 @@
}
break;
case BitsMultiWord:
- switch(bv->data[(i+1) / (32 / BitsPerPointer)] >> ((i+1) * BitsPerPointer & 31) & 3) {
+ switch(bv->bytedata[(i+1) / (8 / BitsPerPointer)] >> ((i+1) * BitsPerPointer & 7) & 3) {
case BitsString:
// string referents are never on the stack, never need to be adjusted
i++; // skip len