10 files changed, 2624 insertions, 0 deletions
diff --git a/src/pkg/runtime/arm/arch.h b/src/pkg/runtime/arm/arch.h
new file mode 100644
index 000000000..3ddb626dd
--- /dev/null
+++ b/src/pkg/runtime/arm/arch.h
@@ -0,0 +1,3 @@
+enum {
+	thechar = '5'
+};
diff --git a/src/pkg/runtime/arm/asm.s b/src/pkg/runtime/arm/asm.s
new file mode 100644
index 000000000..63153658f
--- /dev/null
+++ b/src/pkg/runtime/arm/asm.s
@@ -0,0 +1,316 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "arm/asm.h"
+
+// using frame size $-4 means do not save LR on stack.
+TEXT _rt0_arm(SB),7,$-4
+	MOVW	$0xcafebabe, R12
+
+	// copy arguments forward on an even stack
+	// use R13 instead of SP to avoid linker rewriting the offsets
+	MOVW	0(R13), R0		// argc
+	MOVW	$4(R13), R1		// argv
+	SUB	$64, R13		// plenty of scratch
+	AND	$~7, R13
+	MOVW	R0, 60(R13)		// save argc, argv away
+	MOVW	R1, 64(R13)
+
+	// set up m and g registers
+	// g is R10, m is R9
+	MOVW	$runtime·g0(SB), g
+	MOVW	$runtime·m0(SB), m
+
+	// save m->g0 = g0
+	MOVW	g, m_g0(m)
+
+	// create istack out of the OS stack
+	MOVW	$(-8192+104)(R13), R0
+	MOVW	R0, g_stackguard(g)	// (w 104b guard)
+	MOVW	R13, g_stackbase(g)
+	BL	runtime·emptyfunc(SB)	// fault if stack check is wrong
+
+	BL	runtime·check(SB)
+
+	// saved argc, argv
+	MOVW	60(R13), R0
+	MOVW	R0, 4(R13)
+	MOVW	64(R13), R1
+	MOVW	R1, 8(R13)
+	BL	runtime·args(SB)
+	BL	runtime·osinit(SB)
+	BL	runtime·schedinit(SB)
+
+	// create a new goroutine to start program
+	MOVW	$runtime·mainstart(SB), R0
+	MOVW.W	R0, -4(R13)
+	MOVW	$8, R0
+	MOVW.W	R0, -4(R13)
+	MOVW	$0, R0
+	MOVW.W	R0, -4(R13)	// push $0 as guard
+	BL	runtime·newproc(SB)
+	MOVW	$12(R13), R13	// pop args and LR
+
+	// start this M
+	BL	runtime·mstart(SB)
+
+	MOVW	$1234, R0
+	MOVW	$1000, R1
+	MOVW	R0, (R1)	// fail hard
+	B	runtime·_dep_dummy(SB)	// Never reached
+
+
+TEXT runtime·mainstart(SB),7,$4
+	BL	main·init(SB)
+	BL	runtime·initdone(SB)
+	EOR	R0, R0
+	MOVW	R0, 0(R13)
+	BL	main·main(SB)
+	MOVW	$0, R0
+	MOVW	R0, 4(SP)
+	BL	runtime·exit(SB)
+	MOVW	$1234, R0
+	MOVW	$1001, R1
+	MOVW	R0, (R1)	// fail hard
+	RET
+
+// TODO(kaib): remove these once i actually understand how the linker removes symbols
+// pull in dummy dependencies
+TEXT runtime·_dep_dummy(SB),7,$0
+	BL	_div(SB)
+	BL	_divu(SB)
+	BL	_mod(SB)
+	BL	_modu(SB)
+	BL	_modu(SB)
+	BL	_sfloat(SB)
+
+TEXT runtime·breakpoint(SB),7,$0
+	// no breakpoint yet; let program exit
+	RET
+
+/*
+ *  go-routine
+ */
+
+// void gosave(Gobuf*)
+// save state in Gobuf; setjmp
+TEXT runtime·gosave(SB), 7, $-4
+	MOVW	0(FP), R0		// gobuf
+	MOVW	SP, gobuf_sp(R0)
+	MOVW	LR, gobuf_pc(R0)
+	MOVW	g, gobuf_g(R0)
+	RET
+
+// void gogo(Gobuf*, uintptr)
+// restore state from Gobuf; longjmp
+TEXT runtime·gogo(SB), 7, $-4
+	MOVW	0(FP), R1		// gobuf
+	MOVW	4(FP), R0		// return 2nd arg
+	MOVW	gobuf_g(R1), g
+	MOVW	0(g), R2		// make sure g != nil
+	MOVW	gobuf_sp(R1), SP	// restore SP
+	MOVW	gobuf_pc(R1), PC
+
+// void gogocall(Gobuf*, void (*fn)(void))
+// restore state from Gobuf but then call fn.
+// (call fn, returning to state in Gobuf)
+// using frame size $-4 means do not save LR on stack.
+TEXT runtime·gogocall(SB), 7, $-4
+	MOVW	0(FP), R0		// gobuf
+	MOVW	4(FP), R1		// fn
+	MOVW	8(FP), R2		// fp offset
+	MOVW	gobuf_g(R0), g
+	MOVW	0(g), R3		// make sure g != nil
+	MOVW	gobuf_sp(R0), SP	// restore SP
+	MOVW	gobuf_pc(R0), LR
+	MOVW	R1, PC
+
+// void mcall(void (*fn)(G*))
+// Switch to m->g0's stack, call fn(g).
+// Fn must never return.  It should gogo(&g->sched)
+// to keep running g.
+TEXT runtime·mcall(SB), 7, $-4
+	MOVW	fn+0(FP), R0
+
+	// Save caller state in g->gobuf.
+	MOVW	SP, (g_sched+gobuf_sp)(g)
+	MOVW	LR, (g_sched+gobuf_pc)(g)
+	MOVW	g, (g_sched+gobuf_g)(g)
+
+	// Switch to m->g0 & its stack, call fn.
+	MOVW	g, R1
+	MOVW	m_g0(m), g
+	CMP	g, R1
+	BL.EQ	runtime·badmcall(SB)
+	MOVW	(g_sched+gobuf_sp)(g), SP
+	SUB	$8, SP
+	MOVW	R1, 4(SP)
+	BL	(R0)
+	BL	runtime·badmcall2(SB)
+	RET
+
+/*
+ * support for morestack
+ */
+
+// Called during function prolog when more stack is needed.
+// R1 frame size
+// R2 arg size
+// R3 prolog's LR
+// NB. we do not save R0 because we've forced 5c to pass all arguments
+// on the stack.
+// using frame size $-4 means do not save LR on stack.
+TEXT runtime·morestack(SB),7,$-4
+	// Cannot grow scheduler stack (m->g0).
+	MOVW	m_g0(m), R4
+	CMP	g, R4
+	BL.EQ	runtime·abort(SB)
+
+	// Save in m.
+	MOVW	R1, m_moreframesize(m)
+	MOVW	R2, m_moreargsize(m)
+
+	// Called from f.
+	// Set m->morebuf to f's caller.
+	MOVW	R3, (m_morebuf+gobuf_pc)(m)	// f's caller's PC
+	MOVW	SP, (m_morebuf+gobuf_sp)(m)	// f's caller's SP
+	MOVW	$4(SP), R3			// f's argument pointer
+	MOVW	R3, m_moreargp(m)	
+	MOVW	g, (m_morebuf+gobuf_g)(m)
+
+	// Set m->morepc to f's PC.
+	MOVW	LR, m_morepc(m)
+
+	// Call newstack on m->g0's stack.
+	MOVW	m_g0(m), g
+	MOVW	(g_sched+gobuf_sp)(g), SP
+	B	runtime·newstack(SB)
+
+// Called from reflection library.  Mimics morestack,
+// reuses stack growth code to create a frame
+// with the desired args running the desired function.
+//
+// func call(fn *byte, arg *byte, argsize uint32).
+TEXT reflect·call(SB), 7, $-4
+	// Save our caller's state as the PC and SP to
+	// restore when returning from f.
+	MOVW	LR, (m_morebuf+gobuf_pc)(m)	// our caller's PC
+	MOVW	SP, (m_morebuf+gobuf_sp)(m)	// our caller's SP
+	MOVW	g,  (m_morebuf+gobuf_g)(m)
+
+	// Set up morestack arguments to call f on a new stack.
+	// We set f's frame size to 1, as a hint to newstack
+	// that this is a call from reflect·call.
+	// If it turns out that f needs a larger frame than
+	// the default stack, f's usual stack growth prolog will
+	// allocate a new segment (and recopy the arguments).
+	MOVW	4(SP), R0			// fn
+	MOVW	8(SP), R1			// arg frame
+	MOVW	12(SP), R2			// arg size
+
+	MOVW	R0, m_morepc(m)			// f's PC
+	MOVW	R1, m_moreargp(m)		// f's argument pointer
+	MOVW	R2, m_moreargsize(m)		// f's argument size
+	MOVW	$1, R3
+	MOVW	R3, m_moreframesize(m)		// f's frame size
+
+	// Call newstack on m->g0's stack.
+	MOVW	m_g0(m), g
+	MOVW	(g_sched+gobuf_sp)(g), SP
+	B	runtime·newstack(SB)
+
+// Return point when leaving stack.
+// using frame size $-4 means do not save LR on stack.
+TEXT runtime·lessstack(SB), 7, $-4
+	// Save return value in m->cret
+	MOVW	R0, m_cret(m)
+
+	// Call oldstack on m->g0's stack.
+	MOVW	m_g0(m), g
+	MOVW	(g_sched+gobuf_sp)(g), SP
+	B	runtime·oldstack(SB)
+
+// void jmpdefer(fn, sp);
+// called from deferreturn.
+// 1. grab stored LR for caller
+// 2. sub 4 bytes to get back to BL deferreturn
+// 3. B to fn
+TEXT runtime·jmpdefer(SB), 7, $0
+	MOVW	0(SP), LR
+	MOVW	$-4(LR), LR	// BL deferreturn
+	MOVW	fn+0(FP), R0
+	MOVW	argp+4(FP), SP
+	MOVW	$-4(SP), SP	// SP is 4 below argp, due to saved LR
+	B		(R0)
+
+TEXT	runtime·asmcgocall(SB),7,$0
+	B	runtime·cgounimpl(SB)
+
+TEXT	runtime·cgocallback(SB),7,$0
+	B	runtime·cgounimpl(SB)
+
+TEXT runtime·memclr(SB),7,$20
+	MOVW	0(FP), R0
+	MOVW	$0, R1		// c = 0
+	MOVW	R1, -16(SP)
+	MOVW	4(FP), R1	// n
+	MOVW	R1, -12(SP)
+	MOVW	m, -8(SP)	// Save m and g
+	MOVW	g, -4(SP)
+	BL	runtime·memset(SB)
+	MOVW	-8(SP), m	// Restore m and g, memset clobbers them
+	MOVW	-4(SP), g
+	RET
+
+TEXT runtime·getcallerpc(SB),7,$-4
+	MOVW	0(SP), R0
+	RET
+
+TEXT runtime·setcallerpc(SB),7,$-4
+	MOVW	x+4(FP), R0
+	MOVW	R0, 0(SP)
+	RET
+
+TEXT runtime·getcallersp(SB),7,$-4
+	MOVW	0(FP), R0
+	MOVW	$-4(R0), R0
+	RET
+
+TEXT runtime·emptyfunc(SB),0,$0
+	RET
+
+TEXT runtime·abort(SB),7,$-4
+	MOVW	$0, R0
+	MOVW	(R0), R1
+
+// bool armcas(int32 *val, int32 old, int32 new)
+// Atomically:
+//	if(*val == old){
+//		*val = new;
+//		return 1;
+//	}else
+//		return 0;
+//
+// To implement runtime·cas in ../$GOOS/arm/sys.s
+// using the native instructions, use:
+//
+//	TEXT runtime·cas(SB),7,$0
+//		B	runtime·armcas(SB)
+//
+TEXT runtime·armcas(SB),7,$0
+	MOVW	valptr+0(FP), R1
+	MOVW	old+4(FP), R2
+	MOVW	new+8(FP), R3
+casl:
+	LDREX	(R1), R0
+	CMP		R0, R2
+	BNE		casfail
+	STREX	R3, (R1), R0
+	CMP		$0, R0
+	BNE		casl
+	MOVW	$1, R0
+	RET
+casfail:
+	MOVW	$0, R0
+	RET
diff --git a/src/pkg/runtime/arm/atomic.c b/src/pkg/runtime/arm/atomic.c
new file mode 100644
index 000000000..52e4059ae
--- /dev/null
+++ b/src/pkg/runtime/arm/atomic.c
@@ -0,0 +1,83 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+
+// Atomic add and return new value.
+#pragma textflag 7
+uint32
+runtime·xadd(uint32 volatile *val, int32 delta)
+{
+	uint32 oval, nval;
+
+	for(;;){
+		oval = *val;
+		nval = oval + delta;
+		if(runtime·cas(val, oval, nval))
+			return nval;
+	}
+}
+
+#pragma textflag 7
+uint32
+runtime·xchg(uint32 volatile* addr, uint32 v)
+{
+	uint32 old;
+
+	for(;;) {
+		old = *addr;
+		if(runtime·cas(addr, old, v))
+			return old;
+	}
+}
+
+#pragma textflag 7
+void
+runtime·procyield(uint32 cnt)
+{
+	uint32 volatile i;
+
+	for(i = 0; i < cnt; i++) {
+	}
+}
+
+#pragma textflag 7
+uint32
+runtime·atomicload(uint32 volatile* addr)
+{
+	return runtime·xadd(addr, 0);
+}
+
+#pragma textflag 7
+void*
+runtime·atomicloadp(void* volatile* addr)
+{
+	return (void*)runtime·xadd((uint32 volatile*)addr, 0);
+}
+
+#pragma textflag 7
+void
+runtime·atomicstorep(void* volatile* addr, void* v)
+{
+	void *old;
+
+	for(;;) {
+		old = *addr;
+		if(runtime·casp(addr, old, v))
+			return;
+	}
+}
+
+#pragma textflag 7
+void
+runtime·atomicstore(uint32 volatile* addr, uint32 v)
+{
+	uint32 old;
+	
+	for(;;) {
+		old = *addr;
+		if(runtime·cas(addr, old, v))
+			return;
+	}
+}
+\ No newline at end of file
diff --git a/src/pkg/runtime/arm/closure.c b/src/pkg/runtime/arm/closure.c
new file mode 100644
index 000000000..119e91b61
--- /dev/null
+++ b/src/pkg/runtime/arm/closure.c
@@ -0,0 +1,129 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+
+/*
+	There are two bits of magic:
+	- The signature of the compiler generated function uses two stack frames
+	as arguments (callerpc separates these frames)
+	- size determines how many arguments runtime.closure actually has
+	starting at arg0.
+
+	Example closure with 3 captured variables:
+	func closure(siz int32,
+	fn func(arg0, arg1, arg2 *ptr, callerpc uintptr, xxx) yyy,
+		arg0, arg1, arg2 *ptr) (func(xxx) yyy)
+
+	Code generated:
+	src R0
+	dst R1
+	end R3
+	tmp R4
+	frame = siz+4
+
+//skip loop for 0 size closures
+		MOVW.W	R14,-frame(R13)
+
+		MOVW	$vars(PC), R0
+		MOVW	$4(SP), R1
+		MOVW	$siz(R0), R3
+loop:		MOVW.P	4(R0), R4
+		MOVW.P	R4, 4(R1)
+		CMP		R0, R3
+		BNE		loop
+
+		MOVW	8(PC), R0
+		BL		(R0)			// 2 words
+		MOVW.P	frame(R13),R15
+fptr:		WORD	*fn
+vars:		WORD	arg0
+		WORD	arg1
+		WORD	arg2
+*/
+
+extern void runtime·cacheflush(byte* start, byte* end);
+
+#pragma textflag 7
+void
+runtime·closure(int32 siz, byte *fn, byte *arg0)
+{
+	byte *p, *q, **ret;
+	uint32 *pc;
+	int32 n;
+
+	if(siz < 0 || siz%4 != 0)
+		runtime·throw("bad closure size");
+
+	ret = (byte**)((byte*)&arg0 + siz);
+
+	if(siz > 100) {
+		// TODO(kaib): implement stack growth preamble?
+		runtime·throw("closure too big");
+	}
+
+	// size of new fn.
+	// must match code laid out below.
+	if (siz > 0)
+		n = 6 * 4 + 7 * 4;
+	else
+		n = 6 * 4;
+
+	// store args aligned after code, so gc can find them.
+	n += siz;
+
+	p = runtime·mal(n);
+	*ret = p;
+	q = p + n - siz;
+
+	pc = (uint32*)p;
+
+	//	MOVW.W	R14,-frame(R13)
+	*pc++ = 0xe52de000 | (siz + 4);
+
+	if(siz > 0) {
+		runtime·memmove(q, (byte*)&arg0, siz);
+
+		//	MOVW	$vars(PC), R0
+		*pc = 0xe28f0000 | (int32)(q - (byte*)pc - 8);
+		pc++;
+
+		//	MOVW	$4(SP), R1
+		*pc++ = 0xe28d1004;
+
+		//	MOVW	$siz(R0), R3
+		*pc++ = 0xe2803000 | siz;
+
+		//	MOVW.P	4(R0), R4
+		*pc++ = 0xe4904004;
+		//	MOVW.P	R4, 4(R1)
+		*pc++ = 0xe4814004;
+		//	CMP		R0, R3
+		*pc++ = 0xe1530000;
+		//	BNE		loop
+		*pc++ = 0x1afffffb;
+	}
+
+	//	MOVW	fptr(PC), R0
+	*pc = 0xe59f0008 | (int32)((q - 4) -(byte*) pc - 8);
+	pc++;
+
+	//	BL		(R0)
+	*pc++ = 0xe28fe000;
+	*pc++ = 0xe280f000;
+
+	//	MOVW.P	frame(R13),R15
+	*pc++ = 0xe49df000 | (siz + 4);
+
+	//	WORD	*fn
+	*pc++ = (uint32)fn;
+
+	p = (byte*)pc;
+
+	if(p > q)
+		runtime·throw("bad math in sys.closure");
+
+	runtime·cacheflush(*ret, q+siz);
+}
+
diff --git a/src/pkg/runtime/arm/memmove.s b/src/pkg/runtime/arm/memmove.s
new file mode 100644
index 000000000..5c0e57404
--- /dev/null
+++ b/src/pkg/runtime/arm/memmove.s
@@ -0,0 +1,255 @@
+// Inferno's libkern/memmove-arm.s
+// http://code.google.com/p/inferno-os/source/browse/libkern/memmove-arm.s
+//
+//         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
+//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
+//         Portions Copyright 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+TS = 0
+TE = 1
+FROM = 2
+N = 3
+TMP = 3					/* N and TMP don't overlap */
+TMP1 = 4
+
+// TODO(kaib): This can be done with the existing registers of LR is re-used. Same for memset.
+TEXT runtime·memmove(SB), 7, $8
+	// save g and m
+	MOVW	R9, 4(R13)
+	MOVW	R10, 8(R13)
+
+_memmove:
+	MOVW	to+0(FP), R(TS)
+	MOVW	from+4(FP), R(FROM)
+	MOVW	n+8(FP), R(N)
+
+	ADD	R(N), R(TS), R(TE)	/* to end pointer */
+
+	CMP	R(FROM), R(TS)
+	BLS	_forward
+
+_back:
+	ADD	R(N), R(FROM)		/* from end pointer */
+	CMP	$4, R(N)		/* need at least 4 bytes to copy */
+	BLT	_b1tail
+
+_b4align:				/* align destination on 4 */
+	AND.S	$3, R(TE), R(TMP)
+	BEQ	_b4aligned
+
+	MOVBU.W	-1(R(FROM)), R(TMP)	/* pre-indexed */
+	MOVBU.W	R(TMP), -1(R(TE))	/* pre-indexed */
+	B	_b4align
+
+_b4aligned:				/* is source now aligned? */
+	AND.S	$3, R(FROM), R(TMP)
+	BNE	_bunaligned
+
+	ADD	$31, R(TS), R(TMP)	/* do 32-byte chunks if possible */
+_b32loop:
+	CMP	R(TMP), R(TE)
+	BLS	_b4tail
+
+	MOVM.DB.W (R(FROM)), [R4-R11]
+	MOVM.DB.W [R4-R11], (R(TE))
+	B	_b32loop
+
+_b4tail:				/* do remaining words if possible */
+	ADD	$3, R(TS), R(TMP)
+_b4loop:
+	CMP	R(TMP), R(TE)
+	BLS	_b1tail
+
+	MOVW.W	-4(R(FROM)), R(TMP1)	/* pre-indexed */
+	MOVW.W	R(TMP1), -4(R(TE))	/* pre-indexed */
+	B	_b4loop
+
+_b1tail:				/* remaining bytes */
+	CMP	R(TE), R(TS)
+	BEQ	_return
+
+	MOVBU.W	-1(R(FROM)), R(TMP)	/* pre-indexed */
+	MOVBU.W	R(TMP), -1(R(TE))	/* pre-indexed */
+	B	_b1tail
+
+_forward:
+	CMP	$4, R(N)		/* need at least 4 bytes to copy */
+	BLT	_f1tail
+
+_f4align:				/* align destination on 4 */
+	AND.S	$3, R(TS), R(TMP)
+	BEQ	_f4aligned
+
+	MOVBU.P	1(R(FROM)), R(TMP)	/* implicit write back */
+	MOVBU.P	R(TMP), 1(R(TS))	/* implicit write back */
+	B	_f4align
+
+_f4aligned:				/* is source now aligned? */
+	AND.S	$3, R(FROM), R(TMP)
+	BNE	_funaligned
+
+	SUB	$31, R(TE), R(TMP)	/* do 32-byte chunks if possible */
+_f32loop:
+	CMP	R(TMP), R(TS)
+	BHS	_f4tail
+
+	MOVM.IA.W (R(FROM)), [R4-R11] 
+	MOVM.IA.W [R4-R11], (R(TS))
+	B	_f32loop
+
+_f4tail:
+	SUB	$3, R(TE), R(TMP)	/* do remaining words if possible */
+_f4loop:
+	CMP	R(TMP), R(TS)
+	BHS	_f1tail
+
+	MOVW.P	4(R(FROM)), R(TMP1)	/* implicit write back */
+	MOVW.P	R4, 4(R(TS))		/* implicit write back */
+	B	_f4loop
+
+_f1tail:
+	CMP	R(TS), R(TE)
+	BEQ	_return
+
+	MOVBU.P	1(R(FROM)), R(TMP)	/* implicit write back */
+	MOVBU.P	R(TMP), 1(R(TS))	/* implicit write back */
+	B	_f1tail
+
+_return:
+	// restore g and m
+	MOVW	4(R13), R9
+	MOVW	8(R13), R10
+	MOVW	to+0(FP), R0
+	RET
+
+RSHIFT = 4
+LSHIFT = 5
+OFFSET = 6
+
+BR0 = 7
+BW0 = 8
+BR1 = 8
+BW1 = 9
+BR2 = 9
+BW2 = 10
+BR3 = 10
+BW3 = 11
+
+_bunaligned:
+	CMP	$2, R(TMP)		/* is R(TMP) < 2 ? */
+
+	MOVW.LT	$8, R(RSHIFT)		/* (R(n)<<24)|(R(n-1)>>8) */
+	MOVW.LT	$24, R(LSHIFT)
+	MOVW.LT	$1, R(OFFSET)
+
+	MOVW.EQ	$16, R(RSHIFT)		/* (R(n)<<16)|(R(n-1)>>16) */
+	MOVW.EQ	$16, R(LSHIFT)
+	MOVW.EQ	$2, R(OFFSET)
+
+	MOVW.GT	$24, R(RSHIFT)		/* (R(n)<<8)|(R(n-1)>>24) */
+	MOVW.GT	$8, R(LSHIFT)
+	MOVW.GT	$3, R(OFFSET)
+
+	ADD	$16, R(TS), R(TMP)	/* do 16-byte chunks if possible */
+	CMP	R(TMP), R(TE)
+	BLS	_b1tail
+
+	AND	$~0x03, R(FROM)		/* align source */
+	MOVW	(R(FROM)), R(BR0)	/* prime first block register */
+
+_bu16loop:
+	CMP	R(TMP), R(TE)
+	BLS	_bu1tail
+
+	MOVW	R(BR0)<<R(LSHIFT), R(BW3)
+	MOVM.DB.W (R(FROM)), [R(BR0)-R(BR3)]
+	ORR	R(BR3)>>R(RSHIFT), R(BW3)
+
+	MOVW	R(BR3)<<R(LSHIFT), R(BW2)
+	ORR	R(BR2)>>R(RSHIFT), R(BW2)
+
+	MOVW	R(BR2)<<R(LSHIFT), R(BW1)
+	ORR	R(BR1)>>R(RSHIFT), R(BW1)
+
+	MOVW	R(BR1)<<R(LSHIFT), R(BW0)
+	ORR	R(BR0)>>R(RSHIFT), R(BW0)
+
+	MOVM.DB.W [R(BW0)-R(BW3)], (R(TE))
+	B	_bu16loop
+
+_bu1tail:
+	ADD	R(OFFSET), R(FROM)
+	B	_b1tail
+
+FW0 = 7
+FR0 = 8
+FW1 = 8
+FR1 = 9
+FW2 = 9
+FR2 = 10
+FW3 = 10
+FR3 = 11
+
+_funaligned:
+	CMP	$2, R(TMP)
+
+	MOVW.LT	$8, R(RSHIFT)		/* (R(n+1)<<24)|(R(n)>>8) */
+	MOVW.LT	$24, R(LSHIFT)
+	MOVW.LT	$3, R(OFFSET)
+
+	MOVW.EQ	$16, R(RSHIFT)		/* (R(n+1)<<16)|(R(n)>>16) */
+	MOVW.EQ	$16, R(LSHIFT)
+	MOVW.EQ	$2, R(OFFSET)
+
+	MOVW.GT	$24, R(RSHIFT)		/* (R(n+1)<<8)|(R(n)>>24) */
+	MOVW.GT	$8, R(LSHIFT)
+	MOVW.GT	$1, R(OFFSET)
+
+	SUB	$16, R(TE), R(TMP)	/* do 16-byte chunks if possible */
+	CMP	R(TMP), R(TS)
+	BHS	_f1tail
+
+	AND	$~0x03, R(FROM)		/* align source */
+	MOVW.P	4(R(FROM)), R(FR3)	/* prime last block register, implicit write back */
+
+_fu16loop:
+	CMP	R(TMP), R(TS)
+	BHS	_fu1tail
+
+	MOVW	R(FR3)>>R(RSHIFT), R(FW0)
+	MOVM.IA.W (R(FROM)), [R(FR0)-R(FR3)]
+	ORR	R(FR0)<<R(LSHIFT), R(FW0)
+
+	MOVW	R(FR0)>>R(RSHIFT), R(FW1)
+	ORR	R(FR1)<<R(LSHIFT), R(FW1)
+
+	MOVW	R(FR1)>>R(RSHIFT), R(FW2)
+	ORR	R(FR2)<<R(LSHIFT), R(FW2)
+
+	MOVW	R(FR2)>>R(RSHIFT), R(FW3)
+	ORR	R(FR3)<<R(LSHIFT), R(FW3)
+
+	MOVM.IA.W [R(FW0)-R(FW3)], (R(TS))
+	B	_fu16loop
+
+_fu1tail:
+	SUB	R(OFFSET), R(FROM)
+	B	_f1tail
diff --git a/src/pkg/runtime/arm/memset.s b/src/pkg/runtime/arm/memset.s
new file mode 100644
index 000000000..974b8da7a
--- /dev/null
+++ b/src/pkg/runtime/arm/memset.s
@@ -0,0 +1,94 @@
+// Inferno's libkern/memset-arm.s
+// http://code.google.com/p/inferno-os/source/browse/libkern/memset-arm.s
+//
+//         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
+//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
+//         Portions Copyright 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+TO = 1
+TOE = 2
+N = 3
+TMP = 3					/* N and TMP don't overlap */
+
+// TODO(kaib): memset clobbers R9 and R10 (m and g). This makes the
+// registers unpredictable if (when) memset SIGSEGV's. Fix it by
+// moving the R4-R11 register bank.
+TEXT runtime·memset(SB), $0
+	MOVW	R0, R(TO)
+	MOVW	data+4(FP), R(4)
+	MOVW	n+8(FP), R(N)
+
+	ADD	R(N), R(TO), R(TOE)	/* to end pointer */
+
+	CMP	$4, R(N)		/* need at least 4 bytes to copy */
+	BLT	_1tail
+
+	AND	$0xFF, R(4)		/* it's a byte */
+	SLL	$8, R(4), R(TMP)	/* replicate to a word */
+	ORR	R(TMP), R(4)
+	SLL	$16, R(4), R(TMP)
+	ORR	R(TMP), R(4)
+
+_4align:				/* align on 4 */
+	AND.S	$3, R(TO), R(TMP)
+	BEQ	_4aligned
+
+	MOVBU.P	R(4), 1(R(TO))		/* implicit write back */
+	B	_4align
+
+_4aligned:
+	SUB	$31, R(TOE), R(TMP)	/* do 32-byte chunks if possible */
+	CMP	R(TMP), R(TO)
+	BHS	_4tail
+
+	MOVW	R4, R5			/* replicate */
+	MOVW	R4, R6
+	MOVW	R4, R7
+	MOVW	R4, R8
+	MOVW	R4, R9
+	MOVW	R4, R10
+	MOVW	R4, R11
+
+_f32loop:
+	CMP	R(TMP), R(TO)
+	BHS	_4tail
+
+	MOVM.IA.W [R4-R11], (R(TO))
+	B	_f32loop
+
+_4tail:
+	SUB	$3, R(TOE), R(TMP)	/* do remaining words if possible */
+_4loop:
+	CMP	R(TMP), R(TO)
+	BHS	_1tail
+
+	MOVW.P	R(4), 4(R(TO))		/* implicit write back */
+	B	_4loop
+
+_1tail:
+	CMP	R(TO), R(TOE)
+	BEQ	_return
+
+	MOVBU.P	R(4), 1(R(TO))		/* implicit write back */
+	B	_1tail
+
+_return:
+	RET
diff --git a/src/pkg/runtime/arm/softfloat.c b/src/pkg/runtime/arm/softfloat.c
new file mode 100644
index 000000000..0a071dada
--- /dev/null
+++ b/src/pkg/runtime/arm/softfloat.c
@@ -0,0 +1,525 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Software floating point interpretaton of ARM 7500 FP instructions.
+// The interpretation is not bit compatible with the 7500.
+// It uses true little-endian doubles, while the 7500 used mixed-endian.
+
+#include "runtime.h"
+
+#define CPSR 14
+#define FLAGS_N (1 << 31)
+#define FLAGS_Z (1 << 30)
+#define FLAGS_C (1 << 29)
+#define FLAGS_V (1 << 28)
+
+void	runtime·abort(void);
+void	math·sqrtGoC(uint64, uint64*);
+
+static	uint32	trace = 0;
+
+static void
+fabort(void)
+{
+	if (1) {
+		runtime·printf("Unsupported floating point instruction\n");
+		runtime·abort();
+	}
+}
+
+static void
+putf(uint32 reg, uint32 val)
+{
+	m->freglo[reg] = val;
+}
+
+static void
+putd(uint32 reg, uint64 val)
+{
+	m->freglo[reg] = (uint32)val;
+	m->freghi[reg] = (uint32)(val>>32);
+}
+
+static uint64
+getd(uint32 reg)
+{
+	return (uint64)m->freglo[reg] | ((uint64)m->freghi[reg]<<32);
+}
+
+static void
+fprint(void)
+{
+	uint32 i;
+	for (i = 0; i < 16; i++) {
+		runtime·printf("\tf%d:\t%X %X\n", i, m->freghi[i], m->freglo[i]);
+	}
+}
+
+static uint32
+d2f(uint64 d)
+{
+	uint32 x;
+
+	runtime·f64to32c(d, &x);
+	return x;
+}
+
+static uint64
+f2d(uint32 f)
+{
+	uint64 x;
+
+	runtime·f32to64c(f, &x);
+	return x;
+}
+
+static uint32
+fstatus(bool nan, int32 cmp)
+{
+	if(nan)
+		return FLAGS_C | FLAGS_V;
+	if(cmp == 0)
+		return FLAGS_Z | FLAGS_C;
+	if(cmp < 0)
+		return FLAGS_N;
+	return FLAGS_C;
+}
+
+// returns number of words that the fp instruction
+// is occupying, 0 if next instruction isn't float.
+static uint32
+stepflt(uint32 *pc, uint32 *regs)
+{
+	uint32 i, regd, regm, regn;
+	int32 delta;
+	uint32 *addr;
+	uint64 uval;
+	int64 sval;
+	bool nan, ok;
+	int32 cmp;
+
+	i = *pc;
+
+	if(trace)
+		runtime·printf("stepflt %p %x\n", pc, i);
+
+	// special cases
+	if((i&0xfffff000) == 0xe59fb000) {
+		// load r11 from pc-relative address.
+		// might be part of a floating point move
+		// (or might not, but no harm in simulating
+		// one instruction too many).
+		addr = (uint32*)((uint8*)pc + (i&0xfff) + 8);
+		regs[11] = addr[0];
+
+		if(trace)
+			runtime·printf("*** cpu R[%d] = *(%p) %x\n",
+				11, addr, regs[11]);
+		return 1;
+	}
+	if(i == 0xe08bb00d) {
+		// add sp to r11.
+		// might be part of a large stack offset address
+		// (or might not, but again no harm done).
+		regs[11] += regs[13];
+
+		if(trace)
+			runtime·printf("*** cpu R[%d] += R[%d] %x\n",
+				11, 13, regs[11]);
+		return 1;
+	}
+	if(i == 0xeef1fa10) {
+		regs[CPSR] = (regs[CPSR]&0x0fffffff) | m->fflag;
+
+		if(trace)
+			runtime·printf("*** fpsr R[CPSR] = F[CPSR] %x\n", regs[CPSR]);
+		return 1;
+	}
+	if((i&0xff000000) == 0xea000000) {
+		// unconditional branch
+		// can happen in the middle of floating point
+		// if the linker decides it is time to lay down
+		// a sequence of instruction stream constants.
+		delta = i&0xffffff;
+		delta = (delta<<8) >> 8;	// sign extend
+
+		if(trace)
+			runtime·printf("*** cpu PC += %x\n", (delta+2)*4);
+		return delta+2;
+	}
+
+	goto stage1;
+
+stage1:	// load/store regn is cpureg, regm is 8bit offset
+	regd = i>>12 & 0xf;
+	regn = i>>16 & 0xf;
+	regm = (i & 0xff) << 2;	// PLUS or MINUS ??
+
+	switch(i & 0xfff00f00) {
+	default:
+		goto stage2;
+
+	case 0xed900a00:	// single load
+		addr = (uint32*)(regs[regn] + regm);
+		m->freglo[regd] = addr[0];
+
+		if(trace)
+			runtime·printf("*** load F[%d] = %x\n",
+				regd, m->freglo[regd]);
+		break;
+
+	case 0xed900b00:	// double load
+		addr = (uint32*)(regs[regn] + regm);
+		m->freglo[regd] = addr[0];
+		m->freghi[regd] = addr[1];
+
+		if(trace)
+			runtime·printf("*** load D[%d] = %x-%x\n",
+				regd, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xed800a00:	// single store
+		addr = (uint32*)(regs[regn] + regm);
+		addr[0] = m->freglo[regd];
+
+		if(trace)
+			runtime·printf("*** *(%p) = %x\n",
+				addr, addr[0]);
+		break;
+
+	case 0xed800b00:	// double store
+		addr = (uint32*)(regs[regn] + regm);
+		addr[0] = m->freglo[regd];
+		addr[1] = m->freghi[regd];
+
+		if(trace)
+			runtime·printf("*** *(%p) = %x-%x\n",
+				addr, addr[1], addr[0]);
+		break;
+	}
+	return 1;
+
+stage2:	// regd, regm, regn are 4bit variables
+	regm = i>>0 & 0xf;
+	switch(i & 0xfff00ff0) {
+	default:
+		goto stage3;
+
+	case 0xf3000110:	// veor
+		m->freglo[regd] = m->freglo[regm]^m->freglo[regn];
+		m->freghi[regd] = m->freghi[regm]^m->freghi[regn];
+
+		if(trace)
+			runtime·printf("*** veor D[%d] = %x-%x\n",
+				regd, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xeeb00b00:	// D[regd] = const(regn,regm)
+		regn = (regn<<4) | regm;
+		regm = 0x40000000UL;
+		if(regn & 0x80)
+			regm |= 0x80000000UL;
+		if(regn & 0x40)
+			regm ^= 0x7fc00000UL;
+		regm |= (regn & 0x3f) << 16;
+		m->freglo[regd] = 0;
+		m->freghi[regd] = regm;
+
+		if(trace)
+			runtime·printf("*** immed D[%d] = %x-%x\n",
+				regd, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xeeb00a00:	// F[regd] = const(regn,regm)
+		regn = (regn<<4) | regm;
+		regm = 0x40000000UL;
+		if(regn & 0x80)
+			regm |= 0x80000000UL;
+		if(regn & 0x40)
+			regm ^= 0x7e000000UL;
+		regm |= (regn & 0x3f) << 19;
+		m->freglo[regd] = regm;
+
+		if(trace)
+			runtime·printf("*** immed D[%d] = %x\n",
+				regd, m->freglo[regd]);
+		break;
+
+	case 0xee300b00:	// D[regd] = D[regn]+D[regm]
+		runtime·fadd64c(getd(regn), getd(regm), &uval);
+		putd(regd, uval);
+
+		if(trace)
+			runtime·printf("*** add D[%d] = D[%d]+D[%d] %x-%x\n",
+				regd, regn, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xee300a00:	// F[regd] = F[regn]+F[regm]
+		runtime·fadd64c(f2d(m->freglo[regn]), f2d(m->freglo[regm]), &uval);
+		m->freglo[regd] = d2f(uval);
+
+		if(trace)
+			runtime·printf("*** add F[%d] = F[%d]+F[%d] %x\n",
+				regd, regn, regm, m->freglo[regd]);
+		break;
+
+	case 0xee300b40:	// D[regd] = D[regn]-D[regm]
+		runtime·fsub64c(getd(regn), getd(regm), &uval);
+		putd(regd, uval);
+
+		if(trace)
+			runtime·printf("*** sub D[%d] = D[%d]-D[%d] %x-%x\n",
+				regd, regn, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xee300a40:	// F[regd] = F[regn]-F[regm]
+		runtime·fsub64c(f2d(m->freglo[regn]), f2d(m->freglo[regm]), &uval);
+		m->freglo[regd] = d2f(uval);
+
+		if(trace)
+			runtime·printf("*** sub F[%d] = F[%d]-F[%d] %x\n",
+				regd, regn, regm, m->freglo[regd]);
+		break;
+
+	case 0xee200b00:	// D[regd] = D[regn]*D[regm]
+		runtime·fmul64c(getd(regn), getd(regm), &uval);
+		putd(regd, uval);
+
+		if(trace)
+			runtime·printf("*** mul D[%d] = D[%d]*D[%d] %x-%x\n",
+				regd, regn, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xee200a00:	// F[regd] = F[regn]*F[regm]
+		runtime·fmul64c(f2d(m->freglo[regn]), f2d(m->freglo[regm]), &uval);
+		m->freglo[regd] = d2f(uval);
+
+		if(trace)
+			runtime·printf("*** mul F[%d] = F[%d]*F[%d] %x\n",
+				regd, regn, regm, m->freglo[regd]);
+		break;
+
+	case 0xee800b00:	// D[regd] = D[regn]/D[regm]
+		runtime·fdiv64c(getd(regn), getd(regm), &uval);
+		putd(regd, uval);
+
+		if(trace)
+			runtime·printf("*** div D[%d] = D[%d]/D[%d] %x-%x\n",
+				regd, regn, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xee800a00:	// F[regd] = F[regn]/F[regm]
+		runtime·fdiv64c(f2d(m->freglo[regn]), f2d(m->freglo[regm]), &uval);
+		m->freglo[regd] = d2f(uval);
+
+		if(trace)
+			runtime·printf("*** div F[%d] = F[%d]/F[%d] %x\n",
+				regd, regn, regm, m->freglo[regd]);
+		break;
+
+	case 0xee000b10:	// S[regn] = R[regd] (MOVW) (regm ignored)
+		m->freglo[regn] = regs[regd];
+
+		if(trace)
+			runtime·printf("*** cpy S[%d] = R[%d] %x\n",
+				regn, regd, m->freglo[regn]);
+		break;
+
+	case 0xee100b10:	// R[regd] = S[regn] (MOVW) (regm ignored)
+		regs[regd] = m->freglo[regn];
+
+		if(trace)
+			runtime·printf("*** cpy R[%d] = S[%d] %x\n",
+				regd, regn, regs[regd]);
+		break;
+	}
+	return 1;
+
+stage3:	// regd, regm are 4bit variables
+	switch(i & 0xffff0ff0) {
+	default:
+		goto done;
+
+	case 0xeeb00a40:	// F[regd] = F[regm] (MOVF)
+		m->freglo[regd] = m->freglo[regm];
+
+		if(trace)
+			runtime·printf("*** F[%d] = F[%d] %x\n",
+				regd, regm, m->freglo[regd]);
+		break;
+
+	case 0xeeb00b40:	// D[regd] = D[regm] (MOVD)
+		m->freglo[regd] = m->freglo[regm];
+		m->freghi[regd] = m->freghi[regm];
+
+		if(trace)
+			runtime·printf("*** D[%d] = D[%d] %x-%x\n",
+				regd, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xeeb10bc0:	// D[regd] = sqrt D[regm]
+		math·sqrtGoC(getd(regm), &uval);
+		putd(regd, uval);
+
+		if(trace)
+			runtime·printf("*** D[%d] = sqrt D[%d] %x-%x\n",
+				regd, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xeeb40bc0:	// D[regd] :: D[regm] (CMPD)
+		runtime·fcmp64c(getd(regd), getd(regm), &cmp, &nan);
+		m->fflag = fstatus(nan, cmp);
+
+		if(trace)
+			runtime·printf("*** cmp D[%d]::D[%d] %x\n",
+				regd, regm, m->fflag);
+		break;
+
+	case 0xeeb40ac0:	// F[regd] :: F[regm] (CMPF)
+		runtime·fcmp64c(f2d(m->freglo[regd]), f2d(m->freglo[regm]), &cmp, &nan);
+		m->fflag = fstatus(nan, cmp);
+
+		if(trace)
+			runtime·printf("*** cmp F[%d]::F[%d] %x\n",
+				regd, regm, m->fflag);
+		break;
+
+	case 0xeeb70ac0:	// D[regd] = F[regm] (MOVFD)
+		putd(regd, f2d(m->freglo[regm]));
+
+		if(trace)
+			runtime·printf("*** f2d D[%d]=F[%d] %x-%x\n",
+				regd, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xeeb70bc0:	// F[regd] = D[regm] (MOVDF)
+		m->freglo[regd] = d2f(getd(regm));
+
+		if(trace)
+			runtime·printf("*** d2f F[%d]=D[%d] %x-%x\n",
+				regd, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xeebd0ac0:	// S[regd] = F[regm] (MOVFW)
+		runtime·f64tointc(f2d(m->freglo[regm]), &sval, &ok);
+		if(!ok || (int32)sval != sval)
+			sval = 0;
+		m->freglo[regd] = sval;
+
+		if(trace)
+			runtime·printf("*** fix S[%d]=F[%d] %x\n",
+				regd, regm, m->freglo[regd]);
+		break;
+
+	case 0xeebc0ac0:	// S[regd] = F[regm] (MOVFW.U)
+		runtime·f64tointc(f2d(m->freglo[regm]), &sval, &ok);
+		if(!ok || (uint32)sval != sval)
+			sval = 0;
+		m->freglo[regd] = sval;
+
+		if(trace)
+			runtime·printf("*** fix unsigned S[%d]=F[%d] %x\n",
+				regd, regm, m->freglo[regd]);
+		break;
+
+	case 0xeebd0bc0:	// S[regd] = D[regm] (MOVDW)
+		runtime·f64tointc(getd(regm), &sval, &ok);
+		if(!ok || (int32)sval != sval)
+			sval = 0;
+		m->freglo[regd] = sval;
+
+		if(trace)
+			runtime·printf("*** fix S[%d]=D[%d] %x\n",
+				regd, regm, m->freglo[regd]);
+		break;
+
+	case 0xeebc0bc0:	// S[regd] = D[regm] (MOVDW.U)
+		runtime·f64tointc(getd(regm), &sval, &ok);
+		if(!ok || (uint32)sval != sval)
+			sval = 0;
+		m->freglo[regd] = sval;
+
+		if(trace)
+			runtime·printf("*** fix unsigned S[%d]=D[%d] %x\n",
+				regd, regm, m->freglo[regd]);
+		break;
+
+	case 0xeeb80ac0:	// D[regd] = S[regm] (MOVWF)
+		cmp = m->freglo[regm];
+		if(cmp < 0) {
+			runtime·fintto64c(-cmp, &uval);
+			putf(regd, d2f(uval));
+			m->freglo[regd] ^= 0x80000000;
+		} else {
+			runtime·fintto64c(cmp, &uval);
+			putf(regd, d2f(uval));
+		}
+
+		if(trace)
+			runtime·printf("*** float D[%d]=S[%d] %x-%x\n",
+				regd, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xeeb80a40:	// D[regd] = S[regm] (MOVWF.U)
+		runtime·fintto64c(m->freglo[regm], &uval);
+		putf(regd, d2f(uval));
+
+		if(trace)
+			runtime·printf("*** float unsigned D[%d]=S[%d] %x-%x\n",
+				regd, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xeeb80bc0:	// D[regd] = S[regm] (MOVWD)
+		cmp = m->freglo[regm];
+		if(cmp < 0) {
+			runtime·fintto64c(-cmp, &uval);
+			putd(regd, uval);
+			m->freghi[regd] ^= 0x80000000;
+		} else {
+			runtime·fintto64c(cmp, &uval);
+			putd(regd, uval);
+		}
+
+		if(trace)
+			runtime·printf("*** float D[%d]=S[%d] %x-%x\n",
+				regd, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+
+	case 0xeeb80b40:	// D[regd] = S[regm] (MOVWD.U)
+		runtime·fintto64c(m->freglo[regm], &uval);
+		putd(regd, uval);
+
+		if(trace)
+			runtime·printf("*** float unsigned D[%d]=S[%d] %x-%x\n",
+				regd, regm, m->freghi[regd], m->freglo[regd]);
+		break;
+	}
+	return 1;
+
+done:
+	if((i&0xff000000) == 0xee000000 ||
+	   (i&0xff000000) == 0xed000000) {
+		runtime·printf("stepflt %p %x\n", pc, i);
+		fabort();
+	}
+	return 0;
+}
+
+#pragma textflag 7
+uint32*
+runtime·_sfloat2(uint32 *lr, uint32 r0)
+{
+	uint32 skip;
+
+	skip = stepflt(lr, &r0);
+	if(skip == 0) {
+		runtime·printf("sfloat2 %p %x\n", lr, *lr);
+		fabort(); // not ok to fail first instruction
+	}
+
+	lr += skip;
+	while(skip = stepflt(lr, &r0))
+		lr += skip;
+	return lr;
+}
diff --git a/src/pkg/runtime/arm/traceback.c b/src/pkg/runtime/arm/traceback.c
new file mode 100644
index 000000000..5628b8349
--- /dev/null
+++ b/src/pkg/runtime/arm/traceback.c
@@ -0,0 +1,213 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "malloc.h"
+
+void runtime·deferproc(void);
+void runtime·newproc(void);
+void runtime·newstack(void);
+void runtime·morestack(void);
+void runtime·sigpanic(void);
+void _div(void);
+void _mod(void);
+void _divu(void);
+void _modu(void);
+
+int32
+runtime·gentraceback(byte *pc0, byte *sp, byte *lr0, G *g, int32 skip, uintptr *pcbuf, int32 max)
+{
+	int32 i, n, iter;
+	uintptr pc, lr, tracepc, x;
+	byte *fp, *p;
+	bool waspanic;
+	Stktop *stk;
+	Func *f;
+	
+	pc = (uintptr)pc0;
+	lr = (uintptr)lr0;
+	fp = nil;
+	waspanic = false;
+
+	// If the PC is goexit, the goroutine hasn't started yet.
+	if(pc == (uintptr)runtime·goexit) {
+		pc = (uintptr)g->entry;
+		lr = (uintptr)runtime·goexit;
+	}
+
+	// If the PC is zero, it's likely a nil function call.
+	// Start in the caller's frame.
+	if(pc == 0) {
+		pc = lr;
+		lr = 0;
+	}
+
+	n = 0;
+	stk = (Stktop*)g->stackbase;
+	for(iter = 0; iter < 100 && n < max; iter++) {	// iter avoids looping forever
+		// Typically:
+		//	pc is the PC of the running function.
+		//	sp is the stack pointer at that program counter.
+		//	fp is the frame pointer (caller's stack pointer) at that program counter, or nil if unknown.
+		//	stk is the stack containing sp.
+		//	The caller's program counter is lr, unless lr is zero, in which case it is *(uintptr*)sp.
+		
+		if(pc == (uintptr)runtime·lessstack) {
+			// Hit top of stack segment.  Unwind to next segment.
+			pc = (uintptr)stk->gobuf.pc;
+			sp = stk->gobuf.sp;
+			lr = 0;
+			fp = nil;
+			if(pcbuf == nil)
+				runtime·printf("----- stack segment boundary -----\n");
+			stk = (Stktop*)stk->stackbase;
+			continue;
+		}
+		
+		if(pc <= 0x1000 || (f = runtime·findfunc(pc)) == nil) {
+			// Dangerous, but worthwhile: see if this is a closure by
+			// decoding the instruction stream.
+			//
+			// We check p < p+4 to avoid wrapping and faulting if
+			// we have lost track of where we are.
+			p = (byte*)pc;
+			if((pc&3) == 0 && p < p+4 &&
+			   runtime·mheap.arena_start < p &&
+			   p+4 < runtime·mheap.arena_used) {
+			   	x = *(uintptr*)p;
+				if((x&0xfffff000) == 0xe49df000) {
+					// End of closure:
+					// MOVW.P frame(R13), R15
+					pc = *(uintptr*)sp;
+					lr = 0;
+					sp += x & 0xfff;
+					fp = nil;
+					continue;
+				}
+				if((x&0xfffff000) == 0xe52de000 && lr == (uintptr)runtime·goexit) {
+					// Beginning of closure.
+					// Closure at top of stack, not yet started.
+					p += 5*4;
+					if((x&0xfff) != 4) {
+						// argument copying
+						p += 7*4;
+					}
+					if((byte*)pc < p && p < p+4 && p+4 < runtime·mheap.arena_used) {
+						pc = *(uintptr*)p;
+						fp = nil;
+						continue;
+					}
+				}
+			}
+			break;
+		}
+		
+		// Found an actual function.
+		if(lr == 0)
+			lr = *(uintptr*)sp;
+		if(fp == nil) {
+			fp = sp;
+			if(pc > f->entry && f->frame >= 0)
+				fp += f->frame;
+		}
+
+		if(skip > 0)
+			skip--;
+		else if(pcbuf != nil)
+			pcbuf[n++] = pc;
+		else {
+			// Print during crash.
+			//	main+0xf /home/rsc/go/src/runtime/x.go:23
+			//		main(0x1, 0x2, 0x3)
+			runtime·printf("[%p] %S", fp, f->name);
+			if(pc > f->entry)
+				runtime·printf("+%p", (uintptr)(pc - f->entry));
+			tracepc = pc;	// back up to CALL instruction for funcline.
+			if(n > 0 && pc > f->entry && !waspanic)
+				tracepc -= sizeof(uintptr);
+			runtime·printf(" %S:%d\n", f->src, runtime·funcline(f, tracepc));
+			runtime·printf("\t%S(", f->name);
+			for(i = 0; i < f->args; i++) {
+				if(i != 0)
+					runtime·prints(", ");
+				runtime·printhex(((uintptr*)fp)[1+i]);
+				if(i >= 4) {
+					runtime·prints(", ...");
+					break;
+				}
+			}
+			runtime·prints(")\n");
+			n++;
+		}
+		
+		waspanic = f->entry == (uintptr)runtime·sigpanic;
+
+		if(pcbuf == nil && f->entry == (uintptr)runtime·newstack && g == m->g0) {
+			runtime·printf("----- newstack called from goroutine %d -----\n", m->curg->goid);
+			pc = (uintptr)m->morepc;
+			sp = (byte*)m->moreargp - sizeof(void*);
+			lr = (uintptr)m->morebuf.pc;
+			fp = m->morebuf.sp;
+			g = m->curg;
+			stk = (Stktop*)g->stackbase;
+			continue;
+		}
+		
+		if(pcbuf == nil && f->entry == (uintptr)runtime·lessstack && g == m->g0) {
+			runtime·printf("----- lessstack called from goroutine %d -----\n", m->curg->goid);
+			g = m->curg;
+			stk = (Stktop*)g->stackbase;
+			sp = stk->gobuf.sp;
+			pc = (uintptr)stk->gobuf.pc;
+			fp = nil;
+			lr = 0;
+			continue;
+		}	
+		
+		// Unwind to next frame.
+		pc = lr;
+		lr = 0;
+		sp = fp;
+		fp = nil;
+		
+		// If this was div or divu or mod or modu, the caller had
+		// an extra 8 bytes on its stack.  Adjust sp.
+		if(f->entry == (uintptr)_div || f->entry == (uintptr)_divu || f->entry == (uintptr)_mod || f->entry == (uintptr)_modu)
+			sp += 8;
+		
+		// If this was deferproc or newproc, the caller had an extra 12.
+		if(f->entry == (uintptr)runtime·deferproc || f->entry == (uintptr)runtime·newproc)
+			sp += 12;
+	}
+	
+	if(pcbuf == nil && (pc = g->gopc) != 0 && (f = runtime·findfunc(pc)) != nil) {
+		runtime·printf("----- goroutine created by -----\n%S", f->name);
+		if(pc > f->entry)
+			runtime·printf("+%p", (uintptr)(pc - f->entry));
+		tracepc = pc;	// back up to CALL instruction for funcline.
+		if(n > 0 && pc > f->entry)
+			tracepc -= sizeof(uintptr);
+		runtime·printf(" %S:%d\n", f->src, runtime·funcline(f, tracepc));
+	}
+
+	return n;		
+}
+
+void
+runtime·traceback(byte *pc0, byte *sp, byte *lr, G *g)
+{
+	runtime·gentraceback(pc0, sp, lr, g, 0, nil, 100);
+}
+
+// func caller(n int) (pc uintptr, file string, line int, ok bool)
+int32
+runtime·callers(int32 skip, uintptr *pcbuf, int32 m)
+{
+	byte *pc, *sp;
+	
+	sp = runtime·getcallersp(&skip);
+	pc = runtime·getcallerpc(&skip);
+
+	return runtime·gentraceback(pc, sp, 0, g, skip, pcbuf, m);
+}
diff --git a/src/pkg/runtime/arm/vlop.s b/src/pkg/runtime/arm/vlop.s
new file mode 100644
index 000000000..fc679f0ee
--- /dev/null
+++ b/src/pkg/runtime/arm/vlop.s
@@ -0,0 +1,190 @@
+// Inferno's libkern/vlop-arm.s
+// http://code.google.com/p/inferno-os/source/browse/libkern/vlop-arm.s
+//
+//         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
+//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
+//         Portions Copyright 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#define UMULL(Rs,Rm,Rhi,Rlo,S)  WORD	 $((14<<28)|(4<<21)|(S<<20)|(Rhi<<16)|(Rlo<<12)|(Rs<<8)|(9<<4)|Rm)
+#define UMLAL(Rs,Rm,Rhi,Rlo,S)  WORD	 $((14<<28)|(5<<21)|(S<<20)|(Rhi<<16)|(Rlo<<12)|(Rs<<8)|(9<<4)|Rm)
+#define MUL(Rs,Rm,Rd,S) WORD	 $((14<<28)|(0<<21)|(S<<20)|(Rd<<16)|(Rs<<8)|(9<<4)|Rm)
+arg=0
+
+/* replaced use of R10 by R11 because the former can be the data segment base register */
+
+TEXT _mulv(SB), $0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R2	/* l0 */
+	MOVW	8(FP), R11	/* h0 */
+	MOVW	12(FP), R4	/* l1 */
+	MOVW	16(FP), R5	/* h1 */
+	UMULL(4, 2, 7, 6, 0)
+	MUL(11, 4, 8, 0)
+	ADD	R8, R7
+	MUL(2, 5, 8, 0)
+	ADD	R8, R7
+	MOVW	R6, 0(R(arg))
+	MOVW	R7, 4(R(arg))
+	RET
+
+
+Q	= 0
+N	= 1
+D	= 2
+CC	= 3
+TMP	= 11
+
+TEXT save<>(SB), 7, $0
+	MOVW	R(Q), 0(FP)
+	MOVW	R(N), 4(FP)
+	MOVW	R(D), 8(FP)
+	MOVW	R(CC), 12(FP)
+
+	MOVW	R(TMP), R(Q)		/* numerator */
+	MOVW	20(FP), R(D)		/* denominator */
+	CMP	$0, R(D)
+	BNE	s1
+	BL	runtime·panicdivide(SB)
+/*	  MOVW	-1(R(D)), R(TMP)	/* divide by zero fault */
+s1:	 RET
+
+TEXT rest<>(SB), 7, $0
+	MOVW	0(FP), R(Q)
+	MOVW	4(FP), R(N)
+	MOVW	8(FP), R(D)
+	MOVW	12(FP), R(CC)
+/*
+ * return to caller
+ * of rest<>
+ */
+	MOVW	0(R13), R14
+	ADD	$20, R13
+	B	(R14)
+
+TEXT div<>(SB), 7, $0
+	MOVW	$32, R(CC)
+/*
+ * skip zeros 8-at-a-time
+ */
+e1:
+	AND.S	$(0xff<<24),R(Q), R(N)
+	BNE	e2
+	SLL	$8, R(Q)
+	SUB.S	$8, R(CC)
+	BNE	e1
+	RET
+e2:
+	MOVW	$0, R(N)
+
+loop:
+/*
+ * shift R(N||Q) left one
+ */
+	SLL	$1, R(N)
+	CMP	$0, R(Q)
+	ORR.LT  $1, R(N)
+	SLL	$1, R(Q)
+
+/*
+ * compare numerator to denominator
+ * if less, subtract and set quotient bit
+ */
+	CMP	R(D), R(N)
+	ORR.HS  $1, R(Q)
+	SUB.HS  R(D), R(N)
+	SUB.S	$1, R(CC)
+	BNE	loop
+	RET
+
+TEXT _div(SB), 7, $16
+	BL	save<>(SB)
+	CMP	$0, R(Q)
+	BGE	d1
+	RSB	$0, R(Q), R(Q)
+	CMP	$0, R(D)
+	BGE	d2
+	RSB	$0, R(D), R(D)
+d0:
+	BL	div<>(SB)			/* none/both neg */
+	MOVW	R(Q), R(TMP)
+	B	out
+d1:
+	CMP	$0, R(D)
+	BGE	d0
+	RSB	$0, R(D), R(D)
+d2:
+	BL	div<>(SB)			/* one neg */
+	RSB	$0, R(Q), R(TMP)
+	B	out
+
+TEXT _mod(SB), 7, $16
+	BL	save<>(SB)
+	CMP	$0, R(D)
+	RSB.LT	$0, R(D), R(D)
+	CMP	$0, R(Q)
+	BGE	m1
+	RSB	$0, R(Q), R(Q)
+	BL	div<>(SB)			/* neg numerator */
+	RSB	$0, R(N), R(TMP)
+	B	out
+m1:
+	BL	div<>(SB)			/* pos numerator */
+	MOVW	R(N), R(TMP)
+	B	out
+
+TEXT _divu(SB), 7, $16
+	BL	save<>(SB)
+	BL	div<>(SB)
+	MOVW	R(Q), R(TMP)
+	B	out
+
+TEXT _modu(SB), 7, $16
+	BL	save<>(SB)
+	BL	div<>(SB)
+	MOVW	R(N), R(TMP)
+	B	out
+
+out:
+	BL	rest<>(SB)
+	B	out
+
+// trampoline for _sfloat2. passes LR as arg0 and
+// saves registers R0-R13 and CPSR on the stack. R0-R12 and CPSR flags can
+// be changed by _sfloat2.
+TEXT _sfloat(SB), 7, $64 // 4 arg + 14*4 saved regs + cpsr
+	MOVW	R14, 4(R13)
+	MOVW	R0, 8(R13)
+	MOVW	$12(R13), R0
+	MOVM.IA.W	[R1-R12], (R0)
+	MOVW	$68(R13), R1 // correct for frame size
+	MOVW	R1, 60(R13)
+	WORD	$0xe10f1000 // mrs r1, cpsr
+	MOVW	R1, 64(R13)
+	BL	runtime·_sfloat2(SB)
+	MOVW	R0, 0(R13)
+	MOVW	64(R13), R1
+	WORD	$0xe128f001	// msr cpsr_f, r1
+	MOVW	$12(R13), R0
+	MOVM.IA.W	(R0), [R1-R12]
+	MOVW	8(R13), R0
+	RET
+			
+
diff --git a/src/pkg/runtime/arm/vlrt.c b/src/pkg/runtime/arm/vlrt.c
new file mode 100644
index 000000000..50f33710b
--- /dev/null
+++ b/src/pkg/runtime/arm/vlrt.c
@@ -0,0 +1,816 @@
+// Inferno's libkern/vlrt-arm.c
+// http://code.google.com/p/inferno-os/source/browse/libkern/vlrt-arm.c
+//
+//         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
+//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
+//         Portions Copyright 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+// declared here to avoid include of runtime.h
+void	runtime·panicstring(char*);
+
+typedef unsigned long   ulong;
+typedef unsigned int    uint;
+typedef unsigned short  ushort;
+typedef unsigned char   uchar;
+typedef signed char     schar;
+
+#define SIGN(n) (1UL<<(n-1))
+
+void
+runtime·panicdivide(void)
+{
+	runtime·panicstring("integer divide by zero");
+}
+
+typedef struct  Vlong   Vlong;
+struct  Vlong
+{
+	union
+	{
+		struct
+		{
+			ulong   lo;
+			ulong   hi;
+		};
+		struct
+		{
+			ushort lols;
+			ushort loms;
+			ushort hils;
+			ushort hims;
+		};
+	};
+};
+
+void    runtime·abort(void);
+
+void
+_addv(Vlong *r, Vlong a, Vlong b)
+{
+	ulong lo, hi;
+
+	lo = a.lo + b.lo;
+	hi = a.hi + b.hi;
+	if(lo < a.lo)
+		hi++;
+	r->lo = lo;
+	r->hi = hi;
+}
+
+void
+_subv(Vlong *r, Vlong a, Vlong b)
+{
+	ulong lo, hi;
+
+	lo = a.lo - b.lo;
+	hi = a.hi - b.hi;
+	if(lo > a.lo)
+		hi--;
+	r->lo = lo;
+	r->hi = hi;
+}
+
+void
+_d2v(Vlong *y, double d)
+{
+	union { double d; struct Vlong; } x;
+	ulong xhi, xlo, ylo, yhi;
+	int sh;
+
+	x.d = d;
+
+	xhi = (x.hi & 0xfffff) | 0x100000;
+	xlo = x.lo;
+	sh = 1075 - ((x.hi >> 20) & 0x7ff);
+
+	ylo = 0;
+	yhi = 0;
+	if(sh >= 0) {
+		/* v = (hi||lo) >> sh */
+		if(sh < 32) {
+			if(sh == 0) {
+				ylo = xlo;
+				yhi = xhi;
+			} else {
+				ylo = (xlo >> sh) | (xhi << (32-sh));
+				yhi = xhi >> sh;
+			}
+		} else {
+			if(sh == 32) {
+				ylo = xhi;
+			} else
+			if(sh < 64) {
+				ylo = xhi >> (sh-32);
+			}
+		}
+	} else {
+		/* v = (hi||lo) << -sh */
+		sh = -sh;
+		if(sh <= 11) {
+			ylo = xlo << sh;
+			yhi = (xhi << sh) | (xlo >> (32-sh));
+		} else {
+			/* overflow */
+			yhi = d;        /* causes something awful */
+		}
+	}
+	if(x.hi & SIGN(32)) {
+		if(ylo != 0) {
+			ylo = -ylo;
+			yhi = ~yhi;
+		} else
+			yhi = -yhi;
+	}
+
+	y->hi = yhi;
+	y->lo = ylo;
+}
+
+void
+_f2v(Vlong *y, float f)
+{
+	_d2v(y, f);
+}
+
+void
+runtime·float64toint64(double d, Vlong y)
+{
+	_d2v(&y, d);
+}
+
+void
+runtime·float64touint64(double d, Vlong y)
+{
+	_d2v(&y, d);
+}
+
+double
+_ul2d(ulong u)
+{
+	// compensate for bug in c
+	if(u & SIGN(32)) {
+		u ^= SIGN(32);
+		return 2147483648. + u;
+	}
+	return u;
+}
+
+double
+_v2d(Vlong x)
+{
+	if(x.hi & SIGN(32)) {
+		if(x.lo) {
+			x.lo = -x.lo;
+			x.hi = ~x.hi;
+		} else
+			x.hi = -x.hi;
+		return -(_ul2d(x.hi)*4294967296. + _ul2d(x.lo));
+	}
+	return x.hi*4294967296. + _ul2d(x.lo);
+}
+
+float
+_v2f(Vlong x)
+{
+	return _v2d(x);
+}
+
+void
+runtime·int64tofloat64(Vlong y, double d)
+{
+	d = _v2d(y);
+}
+
+void
+runtime·uint64tofloat64(Vlong y, double d)
+{
+	d = _ul2d(y.hi)*4294967296. + _ul2d(y.lo);
+}
+
+static void
+dodiv(Vlong num, Vlong den, Vlong *q, Vlong *r)
+{
+	ulong numlo, numhi, denhi, denlo, quohi, quolo, t;
+	int i;
+
+	numhi = num.hi;
+	numlo = num.lo;
+	denhi = den.hi;
+	denlo = den.lo;
+
+	/*
+	 * get a divide by zero
+	 */
+	if(denlo==0 && denhi==0) {
+		runtime·panicdivide();
+	}
+
+	/*
+	 * set up the divisor and find the number of iterations needed
+	 */
+	if(numhi >= SIGN(32)) {
+		quohi = SIGN(32);
+		quolo = 0;
+	} else {
+		quohi = numhi;
+		quolo = numlo;
+	}
+	i = 0;
+	while(denhi < quohi || (denhi == quohi && denlo < quolo)) {
+		denhi = (denhi<<1) | (denlo>>31);
+		denlo <<= 1;
+		i++;
+	}
+
+	quohi = 0;
+	quolo = 0;
+	for(; i >= 0; i--) {
+		quohi = (quohi<<1) | (quolo>>31);
+		quolo <<= 1;
+		if(numhi > denhi || (numhi == denhi && numlo >= denlo)) {
+			t = numlo;
+			numlo -= denlo;
+			if(numlo > t)
+				numhi--;
+			numhi -= denhi;
+			quolo |= 1;
+		}
+		denlo = (denlo>>1) | (denhi<<31);
+		denhi >>= 1;
+	}
+
+	if(q) {
+		q->lo = quolo;
+		q->hi = quohi;
+	}
+	if(r) {
+		r->lo = numlo;
+		r->hi = numhi;
+	}
+}
+
+void
+_divvu(Vlong *q, Vlong n, Vlong d)
+{
+
+	if(n.hi == 0 && d.hi == 0) {
+		q->hi = 0;
+		q->lo = n.lo / d.lo;
+		return;
+	}
+	dodiv(n, d, q, 0);
+}
+
+void
+runtime·uint64div(Vlong n, Vlong d, Vlong q)
+{
+	_divvu(&q, n, d);
+}
+
+void
+_modvu(Vlong *r, Vlong n, Vlong d)
+{
+
+	if(n.hi == 0 && d.hi == 0) {
+		r->hi = 0;
+		r->lo = n.lo % d.lo;
+		return;
+	}
+	dodiv(n, d, 0, r);
+}
+
+void
+runtime·uint64mod(Vlong n, Vlong d, Vlong q)
+{
+	_modvu(&q, n, d);
+}
+
+static void
+vneg(Vlong *v)
+{
+
+	if(v->lo == 0) {
+		v->hi = -v->hi;
+		return;
+	}
+	v->lo = -v->lo;
+	v->hi = ~v->hi;
+}
+
+void
+_divv(Vlong *q, Vlong n, Vlong d)
+{
+	long nneg, dneg;
+
+	if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) {
+		if((long)n.lo == -0x80000000 && (long)d.lo == -1) {
+			// special case: 32-bit -0x80000000 / -1 causes wrong sign
+			q->lo = 0x80000000;
+			q->hi = 0;
+			return;
+		}
+		q->lo = (long)n.lo / (long)d.lo;
+		q->hi = ((long)q->lo) >> 31;
+		return;
+	}
+	nneg = n.hi >> 31;
+	if(nneg)
+		vneg(&n);
+	dneg = d.hi >> 31;
+	if(dneg)
+		vneg(&d);
+	dodiv(n, d, q, 0);
+	if(nneg != dneg)
+		vneg(q);
+}
+
+void
+runtime·int64div(Vlong n, Vlong d, Vlong q)
+{
+	_divv(&q, n, d);
+}
+
+void
+_modv(Vlong *r, Vlong n, Vlong d)
+{
+	long nneg, dneg;
+
+	if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) {
+		r->lo = (long)n.lo % (long)d.lo;
+		r->hi = ((long)r->lo) >> 31;
+		return;
+	}
+	nneg = n.hi >> 31;
+	if(nneg)
+		vneg(&n);
+	dneg = d.hi >> 31;
+	if(dneg)
+		vneg(&d);
+	dodiv(n, d, 0, r);
+	if(nneg)
+		vneg(r);
+}
+
+void
+runtime·int64mod(Vlong n, Vlong d, Vlong q)
+{
+	_modv(&q, n, d);
+}
+
+void
+_rshav(Vlong *r, Vlong a, int b)
+{
+	long t;
+
+	t = a.hi;
+	if(b >= 32) {
+		r->hi = t>>31;
+		if(b >= 64) {
+			/* this is illegal re C standard */
+			r->lo = t>>31;
+			return;
+		}
+		r->lo = t >> (b-32);
+		return;
+	}
+	if(b <= 0) {
+		r->hi = t;
+		r->lo = a.lo;
+		return;
+	}
+	r->hi = t >> b;
+	r->lo = (t << (32-b)) | (a.lo >> b);
+}
+
+void
+_rshlv(Vlong *r, Vlong a, int b)
+{
+	ulong t;
+
+	t = a.hi;
+	if(b >= 32) {
+		r->hi = 0;
+		if(b >= 64) {
+			/* this is illegal re C standard */
+			r->lo = 0;
+			return;
+		}
+		r->lo = t >> (b-32);
+		return;
+	}
+	if(b <= 0) {
+		r->hi = t;
+		r->lo = a.lo;
+		return;
+	}
+	r->hi = t >> b;
+	r->lo = (t << (32-b)) | (a.lo >> b);
+}
+
+void
+_lshv(Vlong *r, Vlong a, int b)
+{
+	ulong t;
+
+	t = a.lo;
+	if(b >= 32) {
+		r->lo = 0;
+		if(b >= 64) {
+			/* this is illegal re C standard */
+			r->hi = 0;
+			return;
+		}
+		r->hi = t << (b-32);
+		return;
+	}
+	if(b <= 0) {
+		r->lo = t;
+		r->hi = a.hi;
+		return;
+	}
+	r->lo = t << b;
+	r->hi = (t >> (32-b)) | (a.hi << b);
+}
+
+void
+_andv(Vlong *r, Vlong a, Vlong b)
+{
+	r->hi = a.hi & b.hi;
+	r->lo = a.lo & b.lo;
+}
+
+void
+_orv(Vlong *r, Vlong a, Vlong b)
+{
+	r->hi = a.hi | b.hi;
+	r->lo = a.lo | b.lo;
+}
+
+void
+_xorv(Vlong *r, Vlong a, Vlong b)
+{
+	r->hi = a.hi ^ b.hi;
+	r->lo = a.lo ^ b.lo;
+}
+
+void
+_vpp(Vlong *l, Vlong *r)
+{
+
+	l->hi = r->hi;
+	l->lo = r->lo;
+	r->lo++;
+	if(r->lo == 0)
+		r->hi++;
+}
+
+void
+_vmm(Vlong *l, Vlong *r)
+{
+
+	l->hi = r->hi;
+	l->lo = r->lo;
+	if(r->lo == 0)
+		r->hi--;
+	r->lo--;
+}
+
+void
+_ppv(Vlong *l, Vlong *r)
+{
+
+	r->lo++;
+	if(r->lo == 0)
+		r->hi++;
+	l->hi = r->hi;
+	l->lo = r->lo;
+}
+
+void
+_mmv(Vlong *l, Vlong *r)
+{
+
+	if(r->lo == 0)
+		r->hi--;
+	r->lo--;
+	l->hi = r->hi;
+	l->lo = r->lo;
+}
+
+void
+_vasop(Vlong *ret, void *lv, void fn(Vlong*, Vlong, Vlong), int type, Vlong rv)
+{
+	Vlong t, u;
+
+	u = *ret;
+	switch(type) {
+	default:
+		runtime·abort();
+		break;
+
+	case 1: /* schar */
+		t.lo = *(schar*)lv;
+		t.hi = t.lo >> 31;
+		fn(&u, t, rv);
+		*(schar*)lv = u.lo;
+		break;
+
+	case 2: /* uchar */
+		t.lo = *(uchar*)lv;
+		t.hi = 0;
+		fn(&u, t, rv);
+		*(uchar*)lv = u.lo;
+		break;
+
+	case 3: /* short */
+		t.lo = *(short*)lv;
+		t.hi = t.lo >> 31;
+		fn(&u, t, rv);
+		*(short*)lv = u.lo;
+		break;
+
+	case 4: /* ushort */
+		t.lo = *(ushort*)lv;
+		t.hi = 0;
+		fn(&u, t, rv);
+		*(ushort*)lv = u.lo;
+		break;
+
+	case 9: /* int */
+		t.lo = *(int*)lv;
+		t.hi = t.lo >> 31;
+		fn(&u, t, rv);
+		*(int*)lv = u.lo;
+		break;
+
+	case 10:        /* uint */
+		t.lo = *(uint*)lv;
+		t.hi = 0;
+		fn(&u, t, rv);
+		*(uint*)lv = u.lo;
+		break;
+
+	case 5: /* long */
+		t.lo = *(long*)lv;
+		t.hi = t.lo >> 31;
+		fn(&u, t, rv);
+		*(long*)lv = u.lo;
+		break;
+
+	case 6: /* ulong */
+		t.lo = *(ulong*)lv;
+		t.hi = 0;
+		fn(&u, t, rv);
+		*(ulong*)lv = u.lo;
+		break;
+
+	case 7: /* vlong */
+	case 8: /* uvlong */
+		fn(&u, *(Vlong*)lv, rv);
+		*(Vlong*)lv = u;
+		break;
+	}
+	*ret = u;
+}
+
+void
+_p2v(Vlong *ret, void *p)
+{
+	long t;
+
+	t = (ulong)p;
+	ret->lo = t;
+	ret->hi = 0;
+}
+
+void
+_sl2v(Vlong *ret, long sl)
+{
+	long t;
+
+	t = sl;
+	ret->lo = t;
+	ret->hi = t >> 31;
+}
+
+void
+_ul2v(Vlong *ret, ulong ul)
+{
+	long t;
+
+	t = ul;
+	ret->lo = t;
+	ret->hi = 0;
+}
+
+void
+_si2v(Vlong *ret, int si)
+{
+	long t;
+
+	t = si;
+	ret->lo = t;
+	ret->hi = t >> 31;
+}
+
+void
+_ui2v(Vlong *ret, uint ui)
+{
+	long t;
+
+	t = ui;
+	ret->lo = t;
+	ret->hi = 0;
+}
+
+void
+_sh2v(Vlong *ret, long sh)
+{
+	long t;
+
+	t = (sh << 16) >> 16;
+	ret->lo = t;
+	ret->hi = t >> 31;
+}
+
+void
+_uh2v(Vlong *ret, ulong ul)
+{
+	long t;
+
+	t = ul & 0xffff;
+	ret->lo = t;
+	ret->hi = 0;
+}
+
+void
+_sc2v(Vlong *ret, long uc)
+{
+	long t;
+
+	t = (uc << 24) >> 24;
+	ret->lo = t;
+	ret->hi = t >> 31;
+}
+
+void
+_uc2v(Vlong *ret, ulong ul)
+{
+	long t;
+
+	t = ul & 0xff;
+	ret->lo = t;
+	ret->hi = 0;
+}
+
+long
+_v2sc(Vlong rv)
+{
+	long t;
+
+	t = rv.lo & 0xff;
+	return (t << 24) >> 24;
+}
+
+long
+_v2uc(Vlong rv)
+{
+
+	return rv.lo & 0xff;
+}
+
+long
+_v2sh(Vlong rv)
+{
+	long t;
+
+	t = rv.lo & 0xffff;
+	return (t << 16) >> 16;
+}
+
+long
+_v2uh(Vlong rv)
+{
+
+	return rv.lo & 0xffff;
+}
+
+long
+_v2sl(Vlong rv)
+{
+
+	return rv.lo;
+}
+
+long
+_v2ul(Vlong rv)
+{
+
+	return rv.lo;
+}
+
+long
+_v2si(Vlong rv)
+{
+
+	return rv.lo;
+}
+
+long
+_v2ui(Vlong rv)
+{
+
+	return rv.lo;
+}
+
+int
+_testv(Vlong rv)
+{
+	return rv.lo || rv.hi;
+}
+
+int
+_eqv(Vlong lv, Vlong rv)
+{
+	return lv.lo == rv.lo && lv.hi == rv.hi;
+}
+
+int
+_nev(Vlong lv, Vlong rv)
+{
+	return lv.lo != rv.lo || lv.hi != rv.hi;
+}
+
+int
+_ltv(Vlong lv, Vlong rv)
+{
+	return (long)lv.hi < (long)rv.hi ||
+		(lv.hi == rv.hi && lv.lo < rv.lo);
+}
+
+int
+_lev(Vlong lv, Vlong rv)
+{
+	return (long)lv.hi < (long)rv.hi ||
+		(lv.hi == rv.hi && lv.lo <= rv.lo);
+}
+
+int
+_gtv(Vlong lv, Vlong rv)
+{
+	return (long)lv.hi > (long)rv.hi ||
+		(lv.hi == rv.hi && lv.lo > rv.lo);
+}
+
+int
+_gev(Vlong lv, Vlong rv)
+{
+	return (long)lv.hi > (long)rv.hi ||
+		(lv.hi == rv.hi && lv.lo >= rv.lo);
+}
+
+int
+_lov(Vlong lv, Vlong rv)
+{
+	return lv.hi < rv.hi ||
+		(lv.hi == rv.hi && lv.lo < rv.lo);
+}
+
+int
+_lsv(Vlong lv, Vlong rv)
+{
+	return lv.hi < rv.hi ||
+		(lv.hi == rv.hi && lv.lo <= rv.lo);
+}
+
+int
+_hiv(Vlong lv, Vlong rv)
+{
+	return lv.hi > rv.hi ||
+		(lv.hi == rv.hi && lv.lo > rv.lo);
+}
+
+int
+_hsv(Vlong lv, Vlong rv)
+{
+	return lv.hi > rv.hi ||
+		(lv.hi == rv.hi && lv.lo >= rv.lo);
+}