| // Copyright 2009 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| #include "runtime.h" |
| #include "defs.h" |
| #include "os.h" |
| #include "stack.h" |
| |
| extern SigTab runtime·sigtab[]; |
| static int32 proccount; |
| |
| int32 runtime·open(uint8*, int32, int32); |
| int32 runtime·close(int32); |
| int32 runtime·read(int32, void*, int32); |
| |
| // Linux futex. |
| // |
| // futexsleep(uint32 *addr, uint32 val) |
| // futexwakeup(uint32 *addr) |
| // |
| // Futexsleep atomically checks if *addr == val and if so, sleeps on addr. |
| // Futexwakeup wakes up threads sleeping on addr. |
| // Futexsleep is allowed to wake up spuriously. |
| |
| enum |
| { |
| MUTEX_UNLOCKED = 0, |
| MUTEX_LOCKED = 1, |
| MUTEX_SLEEPING = 2, |
| |
| ACTIVE_SPIN = 4, |
| ACTIVE_SPIN_CNT = 30, |
| PASSIVE_SPIN = 1, |
| |
| FUTEX_WAIT = 0, |
| FUTEX_WAKE = 1, |
| |
| EINTR = 4, |
| EAGAIN = 11, |
| }; |
| |
| // TODO(rsc): I tried using 1<<40 here but futex woke up (-ETIMEDOUT). |
| // I wonder if the timespec that gets to the kernel |
| // actually has two 32-bit numbers in it, so that |
| // a 64-bit 1<<40 ends up being 0 seconds, |
| // 1<<8 nanoseconds. |
| static Timespec longtime = |
| { |
| 1<<30, // 34 years |
| 0 |
| }; |
| |
| // Atomically, |
| // if(*addr == val) sleep |
| // Might be woken up spuriously; that's allowed. |
| static void |
| futexsleep(uint32 *addr, uint32 val) |
| { |
| // Some Linux kernels have a bug where futex of |
| // FUTEX_WAIT returns an internal error code |
| // as an errno. Libpthread ignores the return value |
| // here, and so can we: as it says a few lines up, |
| // spurious wakeups are allowed. |
| runtime·futex(addr, FUTEX_WAIT, val, &longtime, nil, 0); |
| } |
| |
| // If any procs are sleeping on addr, wake up at most cnt. |
| static void |
| futexwakeup(uint32 *addr, uint32 cnt) |
| { |
| int64 ret; |
| |
| ret = runtime·futex(addr, FUTEX_WAKE, cnt, nil, nil, 0); |
| |
| if(ret >= 0) |
| return; |
| |
| // I don't know that futex wakeup can return |
| // EAGAIN or EINTR, but if it does, it would be |
| // safe to loop and call futex again. |
| runtime·printf("futexwakeup addr=%p returned %D\n", addr, ret); |
| *(int32*)0x1006 = 0x1006; |
| } |
| |
| static int32 |
| getproccount(void) |
| { |
| int32 fd, rd, cnt, cpustrlen; |
| byte *cpustr, *pos, *bufpos; |
| byte buf[256]; |
| |
| fd = runtime·open((byte*)"/proc/stat", O_RDONLY|O_CLOEXEC, 0); |
| if(fd == -1) |
| return 1; |
| cnt = 0; |
| bufpos = buf; |
| cpustr = (byte*)"\ncpu"; |
| cpustrlen = runtime·findnull(cpustr); |
| for(;;) { |
| rd = runtime·read(fd, bufpos, sizeof(buf)-cpustrlen); |
| if(rd == -1) |
| break; |
| bufpos[rd] = 0; |
| for(pos=buf; pos=runtime·strstr(pos, cpustr); cnt++, pos++) { |
| } |
| if(rd < cpustrlen) |
| break; |
| runtime·memmove(buf, bufpos+rd-cpustrlen+1, cpustrlen-1); |
| bufpos = buf+cpustrlen-1; |
| } |
| runtime·close(fd); |
| return cnt ? cnt : 1; |
| } |
| |
| // Possible lock states are MUTEX_UNLOCKED, MUTEX_LOCKED and MUTEX_SLEEPING. |
| // MUTEX_SLEEPING means that there is presumably at least one sleeping thread. |
| // Note that there can be spinning threads during all states - they do not |
| // affect mutex's state. |
| static void |
| futexlock(Lock *l) |
| { |
| uint32 i, v, wait, spin; |
| |
| // Speculative grab for lock. |
| v = runtime·xchg(&l->key, MUTEX_LOCKED); |
| if(v == MUTEX_UNLOCKED) |
| return; |
| |
| // wait is either MUTEX_LOCKED or MUTEX_SLEEPING |
| // depending on whether there is a thread sleeping |
| // on this mutex. If we ever change l->key from |
| // MUTEX_SLEEPING to some other value, we must be |
| // careful to change it back to MUTEX_SLEEPING before |
| // returning, to ensure that the sleeping thread gets |
| // its wakeup call. |
| wait = v; |
| |
| if(proccount == 0) |
| proccount = getproccount(); |
| |
| // On uniprocessor's, no point spinning. |
| // On multiprocessors, spin for ACTIVE_SPIN attempts. |
| spin = 0; |
| if(proccount > 1) |
| spin = ACTIVE_SPIN; |
| |
| for(;;) { |
| // Try for lock, spinning. |
| for(i = 0; i < spin; i++) { |
| while(l->key == MUTEX_UNLOCKED) |
| if(runtime·cas(&l->key, MUTEX_UNLOCKED, wait)) |
| return; |
| runtime·procyield(ACTIVE_SPIN_CNT); |
| } |
| |
| // Try for lock, rescheduling. |
| for(i=0; i < PASSIVE_SPIN; i++) { |
| while(l->key == MUTEX_UNLOCKED) |
| if(runtime·cas(&l->key, MUTEX_UNLOCKED, wait)) |
| return; |
| runtime·osyield(); |
| } |
| |
| // Sleep. |
| v = runtime·xchg(&l->key, MUTEX_SLEEPING); |
| if(v == MUTEX_UNLOCKED) |
| return; |
| wait = MUTEX_SLEEPING; |
| futexsleep(&l->key, MUTEX_SLEEPING); |
| } |
| } |
| |
| static void |
| futexunlock(Lock *l) |
| { |
| uint32 v; |
| |
| v = runtime·xchg(&l->key, MUTEX_UNLOCKED); |
| if(v == MUTEX_UNLOCKED) |
| runtime·throw("unlock of unlocked lock"); |
| if(v == MUTEX_SLEEPING) |
| futexwakeup(&l->key, 1); |
| } |
| |
| void |
| runtime·lock(Lock *l) |
| { |
| if(m->locks++ < 0) |
| runtime·throw("runtime·lock: lock count"); |
| futexlock(l); |
| } |
| |
| void |
| runtime·unlock(Lock *l) |
| { |
| if(--m->locks < 0) |
| runtime·throw("runtime·unlock: lock count"); |
| futexunlock(l); |
| } |
| |
| |
| // One-time notifications. |
| void |
| runtime·noteclear(Note *n) |
| { |
| n->state = 0; |
| } |
| |
| void |
| runtime·notewakeup(Note *n) |
| { |
| runtime·xchg(&n->state, 1); |
| futexwakeup(&n->state, 1<<30); |
| } |
| |
| void |
| runtime·notesleep(Note *n) |
| { |
| while(runtime·atomicload(&n->state) == 0) |
| futexsleep(&n->state, 0); |
| } |
| |
| |
| // Clone, the Linux rfork. |
| enum |
| { |
| CLONE_VM = 0x100, |
| CLONE_FS = 0x200, |
| CLONE_FILES = 0x400, |
| CLONE_SIGHAND = 0x800, |
| CLONE_PTRACE = 0x2000, |
| CLONE_VFORK = 0x4000, |
| CLONE_PARENT = 0x8000, |
| CLONE_THREAD = 0x10000, |
| CLONE_NEWNS = 0x20000, |
| CLONE_SYSVSEM = 0x40000, |
| CLONE_SETTLS = 0x80000, |
| CLONE_PARENT_SETTID = 0x100000, |
| CLONE_CHILD_CLEARTID = 0x200000, |
| CLONE_UNTRACED = 0x800000, |
| CLONE_CHILD_SETTID = 0x1000000, |
| CLONE_STOPPED = 0x2000000, |
| CLONE_NEWUTS = 0x4000000, |
| CLONE_NEWIPC = 0x8000000, |
| }; |
| |
| void |
| runtime·newosproc(M *m, G *g, void *stk, void (*fn)(void)) |
| { |
| int32 ret; |
| int32 flags; |
| |
| /* |
| * note: strace gets confused if we use CLONE_PTRACE here. |
| */ |
| flags = CLONE_VM /* share memory */ |
| | CLONE_FS /* share cwd, etc */ |
| | CLONE_FILES /* share fd table */ |
| | CLONE_SIGHAND /* share sig handler table */ |
| | CLONE_THREAD /* revisit - okay for now */ |
| ; |
| |
| m->tls[0] = m->id; // so 386 asm can find it |
| if(0){ |
| runtime·printf("newosproc stk=%p m=%p g=%p fn=%p clone=%p id=%d/%d ostk=%p\n", |
| stk, m, g, fn, runtime·clone, m->id, m->tls[0], &m); |
| } |
| |
| if((ret = runtime·clone(flags, stk, m, g, fn)) < 0) { |
| runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount(), -ret); |
| runtime·throw("runtime.newosproc"); |
| } |
| } |
| |
| void |
| runtime·osinit(void) |
| { |
| } |
| |
| void |
| runtime·goenvs(void) |
| { |
| runtime·goenvs_unix(); |
| } |
| |
| // Called to initialize a new m (including the bootstrap m). |
| void |
| runtime·minit(void) |
| { |
| // Initialize signal handling. |
| m->gsignal = runtime·malg(32*1024); // OS X wants >=8K, Linux >=2K |
| runtime·signalstack(m->gsignal->stackguard - StackGuard, 32*1024); |
| } |
| |
| void |
| runtime·sigpanic(void) |
| { |
| switch(g->sig) { |
| case SIGBUS: |
| if(g->sigcode0 == BUS_ADRERR && g->sigcode1 < 0x1000) |
| runtime·panicstring("invalid memory address or nil pointer dereference"); |
| runtime·printf("unexpected fault address %p\n", g->sigcode1); |
| runtime·throw("fault"); |
| case SIGSEGV: |
| if((g->sigcode0 == 0 || g->sigcode0 == SEGV_MAPERR || g->sigcode0 == SEGV_ACCERR) && g->sigcode1 < 0x1000) |
| runtime·panicstring("invalid memory address or nil pointer dereference"); |
| runtime·printf("unexpected fault address %p\n", g->sigcode1); |
| runtime·throw("fault"); |
| case SIGFPE: |
| switch(g->sigcode0) { |
| case FPE_INTDIV: |
| runtime·panicstring("integer divide by zero"); |
| case FPE_INTOVF: |
| runtime·panicstring("integer overflow"); |
| } |
| runtime·panicstring("floating point error"); |
| } |
| runtime·panicstring(runtime·sigtab[g->sig].name); |
| } |