runtime: fix misaligned 64-bit atomic
Fixes #4869.
Fixes #5007.
Update #5005.

R=golang-dev, 0xe2.0x9a.0x9b, bradfitz
CC=golang-dev
https://golang.org/cl/7534044
diff --git a/src/pkg/runtime/mgc0.c b/src/pkg/runtime/mgc0.c
index 010f9cd..6ec9706 100644
--- a/src/pkg/runtime/mgc0.c
+++ b/src/pkg/runtime/mgc0.c
@@ -1757,6 +1757,8 @@
 	// a problem in the past.
 	if((((uintptr)&work.empty) & 7) != 0)
 		runtime·throw("runtime: gc work buffer is misaligned");
+	if((((uintptr)&work.full) & 7) != 0)
+		runtime·throw("runtime: gc work buffer is misaligned");
 
 	// The gc is turned off (via enablegc) until
 	// the bootstrap has completed.
diff --git a/src/pkg/runtime/parfor.c b/src/pkg/runtime/parfor.c
index aa5537d..a4468c2 100644
--- a/src/pkg/runtime/parfor.c
+++ b/src/pkg/runtime/parfor.c
@@ -46,6 +46,7 @@
 runtime·parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void (*body)(ParFor*, uint32))
 {
 	uint32 i, begin, end;
+	uint64 *pos;
 
 	if(desc == nil || nthr == 0 || nthr > desc->nthrmax || body == nil) {
 		runtime·printf("desc=%p nthr=%d count=%d body=%p\n", desc, nthr, n, body);
@@ -67,7 +68,10 @@
 	for(i=0; i<nthr; i++) {
 		begin = (uint64)n*i / nthr;
 		end = (uint64)n*(i+1) / nthr;
-		desc->thr[i].pos = (uint64)begin | (((uint64)end)<<32);
+		pos = &desc->thr[i].pos;
+		if(((uintptr)pos & 7) != 0)
+			runtime·throw("parforsetup: pos is not aligned");
+		*pos = (uint64)begin | (((uint64)end)<<32);
 	}
 }
 
diff --git a/src/pkg/runtime/runtime.h b/src/pkg/runtime/runtime.h
index 8ae6e6a..d9afd5b 100644
--- a/src/pkg/runtime/runtime.h
+++ b/src/pkg/runtime/runtime.h
@@ -483,6 +483,7 @@
 	bool wait;			// if true, wait while all threads finish processing,
 					// otherwise parfor may return while other threads are still working
 	ParForThread *thr;		// array of thread descriptors
+	uint32 pad;			// to align ParForThread.pos for 64-bit atomic operations
 	// stats
 	uint64 nsteal;
 	uint64 nstealcnt;