summaryrefslogtreecommitdiff
path: root/src/pkg/runtime/linux/thread.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/runtime/linux/thread.c')
-rw-r--r--src/pkg/runtime/linux/thread.c282
1 files changed, 282 insertions, 0 deletions
diff --git a/src/pkg/runtime/linux/thread.c b/src/pkg/runtime/linux/thread.c
new file mode 100644
index 000000000..cc9ba161b
--- /dev/null
+++ b/src/pkg/runtime/linux/thread.c
@@ -0,0 +1,282 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "defs.h"
+#include "signals.h"
+#include "os.h"
+
+// Linux futex.
+//
+// futexsleep(uint32 *addr, uint32 val)
+// futexwakeup(uint32 *addr)
+//
+// Futexsleep atomically checks if *addr == val and if so, sleeps on addr.
+// Futexwakeup wakes up one thread sleeping on addr.
+// Futexsleep is allowed to wake up spuriously.
+
+enum
+{
+ FUTEX_WAIT = 0,
+ FUTEX_WAKE = 1,
+
+ EINTR = 4,
+ EAGAIN = 11,
+};
+
+// TODO(rsc): I tried using 1<<40 here but futex woke up (-ETIMEDOUT).
+// I wonder if the timespec that gets to the kernel
+// actually has two 32-bit numbers in it, so that
+// a 64-bit 1<<40 ends up being 0 seconds,
+// 1<<8 nanoseconds.
+static Timespec longtime =
+{
+ 1<<30, // 34 years
+ 0
+};
+
+// Atomically,
+// if(*addr == val) sleep
+// Might be woken up spuriously; that's allowed.
+static void
+futexsleep(uint32 *addr, uint32 val)
+{
+ int64 ret;
+
+ ret = futex(addr, FUTEX_WAIT, val, &longtime, nil, 0);
+ if(ret >= 0 || ret == -EAGAIN || ret == -EINTR)
+ return;
+
+ prints("futexsleep addr=");
+ sys·printpointer(addr);
+ prints(" val=");
+ sys·printint(val);
+ prints(" returned ");
+ sys·printint(ret);
+ prints("\n");
+ *(int32*)0x1005 = 0x1005;
+}
+
+// If any procs are sleeping on addr, wake up at least one.
+static void
+futexwakeup(uint32 *addr)
+{
+ int64 ret;
+
+ ret = futex(addr, FUTEX_WAKE, 1, nil, nil, 0);
+
+ if(ret >= 0)
+ return;
+
+ // I don't know that futex wakeup can return
+ // EAGAIN or EINTR, but if it does, it would be
+ // safe to loop and call futex again.
+
+ prints("futexwakeup addr=");
+ sys·printpointer(addr);
+ prints(" returned ");
+ sys·printint(ret);
+ prints("\n");
+ *(int32*)0x1006 = 0x1006;
+}
+
+
+// Lock and unlock.
+//
+// The lock state is a single 32-bit word that holds
+// a 31-bit count of threads waiting for the lock
+// and a single bit (the low bit) saying whether the lock is held.
+// The uncontended case runs entirely in user space.
+// When contention is detected, we defer to the kernel (futex).
+//
+// A reminder: compare-and-swap cas(addr, old, new) does
+// if(*addr == old) { *addr = new; return 1; }
+// else return 0;
+// but atomically.
+
+static void
+futexlock(Lock *l)
+{
+ uint32 v;
+
+again:
+ v = l->key;
+ if((v&1) == 0){
+ if(cas(&l->key, v, v|1)){
+ // Lock wasn't held; we grabbed it.
+ return;
+ }
+ goto again;
+ }
+
+ // Lock was held; try to add ourselves to the waiter count.
+ if(!cas(&l->key, v, v+2))
+ goto again;
+
+ // We're accounted for, now sleep in the kernel.
+ //
+ // We avoid the obvious lock/unlock race because
+ // the kernel won't put us to sleep if l->key has
+ // changed underfoot and is no longer v+2.
+ //
+ // We only really care that (v&1) == 1 (the lock is held),
+ // and in fact there is a futex variant that could
+ // accomodate that check, but let's not get carried away.)
+ futexsleep(&l->key, v+2);
+
+ // We're awake: remove ourselves from the count.
+ for(;;){
+ v = l->key;
+ if(v < 2)
+ throw("bad lock key");
+ if(cas(&l->key, v, v-2))
+ break;
+ }
+
+ // Try for the lock again.
+ goto again;
+}
+
+static void
+futexunlock(Lock *l)
+{
+ uint32 v;
+
+ // Atomically get value and clear lock bit.
+again:
+ v = l->key;
+ if((v&1) == 0)
+ throw("unlock of unlocked lock");
+ if(!cas(&l->key, v, v&~1))
+ goto again;
+
+ // If there were waiters, wake one.
+ if(v & ~1)
+ futexwakeup(&l->key);
+}
+
+void
+lock(Lock *l)
+{
+ if(m->locks < 0)
+ throw("lock count");
+ m->locks++;
+ futexlock(l);
+}
+
+void
+unlock(Lock *l)
+{
+ m->locks--;
+ if(m->locks < 0)
+ throw("lock count");
+ futexunlock(l);
+}
+
+
+// One-time notifications.
+//
+// Since the lock/unlock implementation already
+// takes care of sleeping in the kernel, we just reuse it.
+// (But it's a weird use, so it gets its own interface.)
+//
+// We use a lock to represent the event:
+// unlocked == event has happened.
+// Thus the lock starts out locked, and to wait for the
+// event you try to lock the lock. To signal the event,
+// you unlock the lock.
+
+void
+noteclear(Note *n)
+{
+ n->lock.key = 0; // memset(n, 0, sizeof *n)
+ futexlock(&n->lock);
+}
+
+void
+notewakeup(Note *n)
+{
+ futexunlock(&n->lock);
+}
+
+void
+notesleep(Note *n)
+{
+ futexlock(&n->lock);
+ futexunlock(&n->lock); // Let other sleepers find out too.
+}
+
+
+// Clone, the Linux rfork.
+enum
+{
+ CLONE_VM = 0x100,
+ CLONE_FS = 0x200,
+ CLONE_FILES = 0x400,
+ CLONE_SIGHAND = 0x800,
+ CLONE_PTRACE = 0x2000,
+ CLONE_VFORK = 0x4000,
+ CLONE_PARENT = 0x8000,
+ CLONE_THREAD = 0x10000,
+ CLONE_NEWNS = 0x20000,
+ CLONE_SYSVSEM = 0x40000,
+ CLONE_SETTLS = 0x80000,
+ CLONE_PARENT_SETTID = 0x100000,
+ CLONE_CHILD_CLEARTID = 0x200000,
+ CLONE_UNTRACED = 0x800000,
+ CLONE_CHILD_SETTID = 0x1000000,
+ CLONE_STOPPED = 0x2000000,
+ CLONE_NEWUTS = 0x4000000,
+ CLONE_NEWIPC = 0x8000000,
+};
+
+void
+newosproc(M *m, G *g, void *stk, void (*fn)(void))
+{
+ int64 ret;
+ int32 flags;
+
+ /*
+ * note: strace gets confused if we use CLONE_PTRACE here.
+ */
+ flags = CLONE_PARENT /* getppid doesn't change in child */
+ | CLONE_VM /* share memory */
+ | CLONE_FS /* share cwd, etc */
+ | CLONE_FILES /* share fd table */
+ | CLONE_SIGHAND /* share sig handler table */
+ | CLONE_THREAD /* revisit - okay for now */
+ ;
+
+ if(0){
+ prints("newosproc stk=");
+ sys·printpointer(stk);
+ prints(" m=");
+ sys·printpointer(m);
+ prints(" g=");
+ sys·printpointer(g);
+ prints(" fn=");
+ sys·printpointer(fn);
+ prints(" clone=");
+ sys·printpointer(clone);
+ prints("\n");
+ }
+
+ ret = clone(flags, stk, m, g, fn);
+ if(ret < 0)
+ *(int32*)123 = 123;
+}
+
+void
+osinit(void)
+{
+}
+
+// Called to initialize a new m (including the bootstrap m).
+void
+minit(void)
+{
+ // Initialize signal handling.
+ m->gsignal = malg(32*1024); // OS X wants >=8K, Linux >=2K
+ signalstack(m->gsignal->stackguard, 32*1024);
+}