src/pkg/runtime/linux/thread.c - go - Git at Google

 // Copyright 2009 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 #include "runtime.h"
 #include "defs.h"
 #include "os.h"
 #include "stack.h"

 extern SigTab runtime·sigtab[];

 // Linux futex.
 //
 //	futexsleep(uint32 *addr, uint32 val)
 //	futexwakeup(uint32 *addr)
 //
 // Futexsleep atomically checks if *addr == val and if so, sleeps on addr.
 // Futexwakeup wakes up one thread sleeping on addr.
 // Futexsleep is allowed to wake up spuriously.

 enum
 {
 	FUTEX_WAIT = 0,
 	FUTEX_WAKE = 1,

 	EINTR = 4,
 	EAGAIN = 11,
 };

 // TODO(rsc): I tried using 1<<40 here but futex woke up (-ETIMEDOUT).
 // I wonder if the timespec that gets to the kernel
 // actually has two 32-bit numbers in it, so that
 // a 64-bit 1<<40 ends up being 0 seconds,
 // 1<<8 nanoseconds.
 static Timespec longtime =
 {
 	1<<30,	// 34 years
 	0
 };

 // Atomically,
 //	if(*addr == val) sleep
 // Might be woken up spuriously; that's allowed.
 static void
 futexsleep(uint32 *addr, uint32 val)
 {
 	// Some Linux kernels have a bug where futex of
 	// FUTEX_WAIT returns an internal error code
 	// as an errno.  Libpthread ignores the return value
 	// here, and so can we: as it says a few lines up,
 	// spurious wakeups are allowed.
 	runtime·futex(addr, FUTEX_WAIT, val, &longtime, nil, 0);
 }

 // If any procs are sleeping on addr, wake up at least one.
 static void
 futexwakeup(uint32 *addr)
 {
 	int64 ret;

 	ret = runtime·futex(addr, FUTEX_WAKE, 1, nil, nil, 0);

 	if(ret >= 0)
 		return;

 	// I don't know that futex wakeup can return
 	// EAGAIN or EINTR, but if it does, it would be
 	// safe to loop and call futex again.

 	runtime·prints("futexwakeup addr=");
 	runtime·printpointer(addr);
 	runtime·prints(" returned ");
 	runtime·printint(ret);
 	runtime·prints("\n");
 	*(int32*)0x1006 = 0x1006;
 }


 // Lock and unlock.
 //
 // The lock state is a single 32-bit word that holds
 // a 31-bit count of threads waiting for the lock
 // and a single bit (the low bit) saying whether the lock is held.
 // The uncontended case runs entirely in user space.
 // When contention is detected, we defer to the kernel (futex).
 //
 // A reminder: compare-and-swap runtime·cas(addr, old, new) does
 //	if(*addr == old) { *addr = new; return 1; }
 //	else return 0;
 // but atomically.

 static void
 futexlock(Lock *l)
 {
 	uint32 v;

 again:
 	v = l->key;
 	if((v&1) == 0){
 		if(runtime·cas(&l->key, v, v|1)){
 			// Lock wasn't held; we grabbed it.
 			return;
 		}
 		goto again;
 	}

 	// Lock was held; try to add ourselves to the waiter count.
 	if(!runtime·cas(&l->key, v, v+2))
 		goto again;

 	// We're accounted for, now sleep in the kernel.
 	//
 	// We avoid the obvious lock/unlock race because
 	// the kernel won't put us to sleep if l->key has
 	// changed underfoot and is no longer v+2.
 	//
 	// We only really care that (v&1) == 1 (the lock is held),
 	// and in fact there is a futex variant that could
 	// accommodate that check, but let's not get carried away.)
 	futexsleep(&l->key, v+2);

 	// We're awake: remove ourselves from the count.
 	for(;;){
 		v = l->key;
 		if(v < 2)
 			runtime·throw("bad lock key");
 		if(runtime·cas(&l->key, v, v-2))
 			break;
 	}

 	// Try for the lock again.
 	goto again;
 }

 static void
 futexunlock(Lock *l)
 {
 	uint32 v;

 	// Atomically get value and clear lock bit.
 again:
 	v = l->key;
 	if((v&1) == 0)
 		runtime·throw("unlock of unlocked lock");
 	if(!runtime·cas(&l->key, v, v&~1))
 		goto again;

 	// If there were waiters, wake one.
 	if(v & ~1)
 		futexwakeup(&l->key);
 }

 void
 runtime·lock(Lock *l)
 {
 	if(m->locks < 0)
 		runtime·throw("lock count");
 	m->locks++;
 	futexlock(l);
 }

 void
 runtime·unlock(Lock *l)
 {
 	m->locks--;
 	if(m->locks < 0)
 		runtime·throw("lock count");
 	futexunlock(l);
 }

 void
 runtime·destroylock(Lock*)
 {
 }


 // One-time notifications.
 //
 // Since the lock/unlock implementation already
 // takes care of sleeping in the kernel, we just reuse it.
 // (But it's a weird use, so it gets its own interface.)
 //
 // We use a lock to represent the event:
 // unlocked == event has happened.
 // Thus the lock starts out locked, and to wait for the
 // event you try to lock the lock.  To signal the event,
 // you unlock the lock.

 void
 runtime·noteclear(Note *n)
 {
 	n->lock.key = 0;	// memset(n, 0, sizeof *n)
 	futexlock(&n->lock);
 }

 void
 runtime·notewakeup(Note *n)
 {
 	futexunlock(&n->lock);
 }

 void
 runtime·notesleep(Note *n)
 {
 	futexlock(&n->lock);
 	futexunlock(&n->lock);	// Let other sleepers find out too.
 }


 // Clone, the Linux rfork.
 enum
 {
 	CLONE_VM = 0x100,
 	CLONE_FS = 0x200,
 	CLONE_FILES = 0x400,
 	CLONE_SIGHAND = 0x800,
 	CLONE_PTRACE = 0x2000,
 	CLONE_VFORK = 0x4000,
 	CLONE_PARENT = 0x8000,
 	CLONE_THREAD = 0x10000,
 	CLONE_NEWNS = 0x20000,
 	CLONE_SYSVSEM = 0x40000,
 	CLONE_SETTLS = 0x80000,
 	CLONE_PARENT_SETTID = 0x100000,
 	CLONE_CHILD_CLEARTID = 0x200000,
 	CLONE_UNTRACED = 0x800000,
 	CLONE_CHILD_SETTID = 0x1000000,
 	CLONE_STOPPED = 0x2000000,
 	CLONE_NEWUTS = 0x4000000,
 	CLONE_NEWIPC = 0x8000000,
 };

 void
 runtime·newosproc(M *m, G *g, void *stk, void (*fn)(void))
 {
 	int32 ret;
 	int32 flags;

 	/*
 	 * note: strace gets confused if we use CLONE_PTRACE here.
 	 */
 	flags = CLONE_VM	/* share memory */
 		| CLONE_FS	/* share cwd, etc */
 		| CLONE_FILES	/* share fd table */
 		| CLONE_SIGHAND	/* share sig handler table */
 		| CLONE_THREAD	/* revisit - okay for now */
 		;

 	m->tls[0] = m->id;	// so 386 asm can find it
 	if(0){
 		runtime·printf("newosproc stk=%p m=%p g=%p fn=%p clone=%p id=%d/%d ostk=%p\n",
 			stk, m, g, fn, runtime·clone, m->id, m->tls[0], &m);
 	}

 	if((ret = runtime·clone(flags, stk, m, g, fn)) < 0) {
 		runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount(), -ret);
 		runtime·throw("runtime.newosproc");
 	}
 }

 void
 runtime·osinit(void)
 {
 }

 void
 runtime·goenvs(void)
 {
 	runtime·goenvs_unix();
 }

 // Called to initialize a new m (including the bootstrap m).
 void
 runtime·minit(void)
 {
 	// Initialize signal handling.
 	m->gsignal = runtime·malg(32*1024);	// OS X wants >=8K, Linux >=2K
 	runtime·signalstack(m->gsignal->stackguard - StackGuard, 32*1024);
 }

 void
 runtime·sigpanic(void)
 {
 	switch(g->sig) {
 	case SIGBUS:
 		if(g->sigcode0 == BUS_ADRERR && g->sigcode1 < 0x1000)
 			runtime·panicstring("invalid memory address or nil pointer dereference");
 		runtime·printf("unexpected fault address %p\n", g->sigcode1);
 		runtime·throw("fault");
 	case SIGSEGV:
 		if((g->sigcode0 == 0 || g->sigcode0 == SEGV_MAPERR || g->sigcode0 == SEGV_ACCERR) && g->sigcode1 < 0x1000)
 			runtime·panicstring("invalid memory address or nil pointer dereference");
 		runtime·printf("unexpected fault address %p\n", g->sigcode1);
 		runtime·throw("fault");
 	case SIGFPE:
 		switch(g->sigcode0) {
 		case FPE_INTDIV:
 			runtime·panicstring("integer divide by zero");
 		case FPE_INTOVF:
 			runtime·panicstring("integer overflow");
 		}
 		runtime·panicstring("floating point error");
 	}
 	runtime·panicstring(runtime·sigtab[g->sig].name);
 }
	// Copyright 2009 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	#include "runtime.h"
	#include "defs.h"
	#include "os.h"
	#include "stack.h"

	extern SigTab runtime·sigtab[];

	// Linux futex.
	//
	// futexsleep(uint32 *addr, uint32 val)
	// futexwakeup(uint32 *addr)
	//
	// Futexsleep atomically checks if *addr == val and if so, sleeps on addr.
	// Futexwakeup wakes up one thread sleeping on addr.
	// Futexsleep is allowed to wake up spuriously.

	enum
	{
	FUTEX_WAIT = 0,
	FUTEX_WAKE = 1,

	EINTR = 4,
	EAGAIN = 11,
	};

	// TODO(rsc): I tried using 1<<40 here but futex woke up (-ETIMEDOUT).
	// I wonder if the timespec that gets to the kernel
	// actually has two 32-bit numbers in it, so that
	// a 64-bit 1<<40 ends up being 0 seconds,
	// 1<<8 nanoseconds.
	static Timespec longtime =
	{
	1<<30, // 34 years
	0
	};

	// Atomically,
	// if(*addr == val) sleep
	// Might be woken up spuriously; that's allowed.
	static void
	futexsleep(uint32 *addr, uint32 val)
	{
	// Some Linux kernels have a bug where futex of
	// FUTEX_WAIT returns an internal error code
	// as an errno. Libpthread ignores the return value
	// here, and so can we: as it says a few lines up,
	// spurious wakeups are allowed.
	runtime·futex(addr, FUTEX_WAIT, val, &longtime, nil, 0);
	}

	// If any procs are sleeping on addr, wake up at least one.
	static void
	futexwakeup(uint32 *addr)
	{
	int64 ret;

	ret = runtime·futex(addr, FUTEX_WAKE, 1, nil, nil, 0);

	if(ret >= 0)
	return;

	// I don't know that futex wakeup can return
	// EAGAIN or EINTR, but if it does, it would be
	// safe to loop and call futex again.

	runtime·prints("futexwakeup addr=");
	runtime·printpointer(addr);
	runtime·prints(" returned ");
	runtime·printint(ret);
	runtime·prints("\n");
	(int32)0x1006 = 0x1006;
	}


	// Lock and unlock.
	//
	// The lock state is a single 32-bit word that holds
	// a 31-bit count of threads waiting for the lock
	// and a single bit (the low bit) saying whether the lock is held.
	// The uncontended case runs entirely in user space.
	// When contention is detected, we defer to the kernel (futex).
	//
	// A reminder: compare-and-swap runtime·cas(addr, old, new) does
	// if(addr == old) { addr = new; return 1; }
	// else return 0;
	// but atomically.

	static void
	futexlock(Lock *l)
	{
	uint32 v;

	again:
	v = l->key;
	if((v&1) == 0){
	if(runtime·cas(&l->key, v, v\|1)){
	// Lock wasn't held; we grabbed it.
	return;
	}
	goto again;
	}

	// Lock was held; try to add ourselves to the waiter count.
	if(!runtime·cas(&l->key, v, v+2))
	goto again;

	// We're accounted for, now sleep in the kernel.
	//
	// We avoid the obvious lock/unlock race because
	// the kernel won't put us to sleep if l->key has
	// changed underfoot and is no longer v+2.
	//
	// We only really care that (v&1) == 1 (the lock is held),
	// and in fact there is a futex variant that could
	// accommodate that check, but let's not get carried away.)
	futexsleep(&l->key, v+2);

	// We're awake: remove ourselves from the count.
	for(;;){
	v = l->key;
	if(v < 2)
	runtime·throw("bad lock key");
	if(runtime·cas(&l->key, v, v-2))
	break;
	}

	// Try for the lock again.
	goto again;
	}

	static void
	futexunlock(Lock *l)
	{
	uint32 v;

	// Atomically get value and clear lock bit.
	again:
	v = l->key;
	if((v&1) == 0)
	runtime·throw("unlock of unlocked lock");
	if(!runtime·cas(&l->key, v, v&~1))
	goto again;

	// If there were waiters, wake one.
	if(v & ~1)
	futexwakeup(&l->key);
	}

	void
	runtime·lock(Lock *l)
	{
	if(m->locks < 0)
	runtime·throw("lock count");
	m->locks++;
	futexlock(l);
	}

	void
	runtime·unlock(Lock *l)
	{
	m->locks--;
	if(m->locks < 0)
	runtime·throw("lock count");
	futexunlock(l);
	}

	void
	runtime·destroylock(Lock*)
	{
	}


	// One-time notifications.
	//
	// Since the lock/unlock implementation already
	// takes care of sleeping in the kernel, we just reuse it.
	// (But it's a weird use, so it gets its own interface.)
	//
	// We use a lock to represent the event:
	// unlocked == event has happened.
	// Thus the lock starts out locked, and to wait for the
	// event you try to lock the lock. To signal the event,
	// you unlock the lock.

	void
	runtime·noteclear(Note *n)
	{
	n->lock.key = 0; // memset(n, 0, sizeof *n)
	futexlock(&n->lock);
	}

	void
	runtime·notewakeup(Note *n)
	{
	futexunlock(&n->lock);
	}

	void
	runtime·notesleep(Note *n)
	{
	futexlock(&n->lock);
	futexunlock(&n->lock); // Let other sleepers find out too.
	}


	// Clone, the Linux rfork.
	enum
	{
	CLONE_VM = 0x100,
	CLONE_FS = 0x200,
	CLONE_FILES = 0x400,
	CLONE_SIGHAND = 0x800,
	CLONE_PTRACE = 0x2000,
	CLONE_VFORK = 0x4000,
	CLONE_PARENT = 0x8000,
	CLONE_THREAD = 0x10000,
	CLONE_NEWNS = 0x20000,
	CLONE_SYSVSEM = 0x40000,
	CLONE_SETTLS = 0x80000,
	CLONE_PARENT_SETTID = 0x100000,
	CLONE_CHILD_CLEARTID = 0x200000,
	CLONE_UNTRACED = 0x800000,
	CLONE_CHILD_SETTID = 0x1000000,
	CLONE_STOPPED = 0x2000000,
	CLONE_NEWUTS = 0x4000000,
	CLONE_NEWIPC = 0x8000000,
	};

	void
	runtime·newosproc(M m, G g, void stk, void (fn)(void))
	{
	int32 ret;
	int32 flags;

	/*
	* note: strace gets confused if we use CLONE_PTRACE here.
	*/
	flags = CLONE_VM /* share memory */
	\| CLONE_FS /* share cwd, etc */
	\| CLONE_FILES /* share fd table */
	\| CLONE_SIGHAND /* share sig handler table */
	\| CLONE_THREAD /* revisit - okay for now */
	;

	m->tls[0] = m->id; // so 386 asm can find it
	if(0){
	runtime·printf("newosproc stk=%p m=%p g=%p fn=%p clone=%p id=%d/%d ostk=%p\n",
	stk, m, g, fn, runtime·clone, m->id, m->tls[0], &m);
	}

	if((ret = runtime·clone(flags, stk, m, g, fn)) < 0) {
	runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount(), -ret);
	runtime·throw("runtime.newosproc");
	}
	}

	void
	runtime·osinit(void)
	{
	}

	void
	runtime·goenvs(void)
	{
	runtime·goenvs_unix();
	}

	// Called to initialize a new m (including the bootstrap m).
	void
	runtime·minit(void)
	{
	// Initialize signal handling.
	m->gsignal = runtime·malg(32*1024); // OS X wants >=8K, Linux >=2K
	runtime·signalstack(m->gsignal->stackguard - StackGuard, 32*1024);
	}

	void
	runtime·sigpanic(void)
	{
	switch(g->sig) {
	case SIGBUS:
	if(g->sigcode0 == BUS_ADRERR && g->sigcode1 < 0x1000)
	runtime·panicstring("invalid memory address or nil pointer dereference");
	runtime·printf("unexpected fault address %p\n", g->sigcode1);
	runtime·throw("fault");
	case SIGSEGV:
	if((g->sigcode0 == 0 \|\| g->sigcode0 == SEGV_MAPERR \|\| g->sigcode0 == SEGV_ACCERR) && g->sigcode1 < 0x1000)
	runtime·panicstring("invalid memory address or nil pointer dereference");
	runtime·printf("unexpected fault address %p\n", g->sigcode1);
	runtime·throw("fault");
	case SIGFPE:
	switch(g->sigcode0) {
	case FPE_INTDIV:
	runtime·panicstring("integer divide by zero");
	case FPE_INTOVF:
	runtime·panicstring("integer overflow");
	}
	runtime·panicstring("floating point error");
	}
	runtime·panicstring(runtime·sigtab[g->sig].name);
	}