Runtime is now starting up with a dummy c program as target:
- morestack and gosave/gogo/gocall support
- memclr and memset from inferno
- bugfixes in _rt0_arm
R=rsc
APPROVED=rsc
DELTA=304 (174 added, 36 deleted, 94 changed)
OCL=30636
CL=30642
diff --git a/src/cmd/5l/noop.c b/src/cmd/5l/noop.c
index f4de0a0..19fc567 100644
--- a/src/cmd/5l/noop.c
+++ b/src/cmd/5l/noop.c
@@ -141,8 +141,8 @@
}
}
// TODO(kaib): make lack of morestack an error
-// if(pmorestack == P)
-// diag("sys·morestack not defined");
+// if(pmorestack == P)
+// diag("sys·morestack not defined");
curframe = 0;
curbecome = 0;
@@ -356,27 +356,26 @@
p->link = q1;
} else if (autosize < StackBig) {
// split stack check for small functions
- // MOVW (REGG), R1
+ // MOVW g_stackguard(g), R1
// CMP R1, $-autosize(SP)
- // MOVW.W.LT R14,$-autosize(SP)
- // MOVW.W.GE R14,$-4(SP)
- // MOVW.GE $(args << 24 | autosize), R1
- // BL.GE callmorestack(SB)
+ // MOVW.LT $args, R2
+ // MOVW.W.LT R14, R3
+ // BL.LT sys·morestackx(SB) // modifies LR
+ // MOVW.W R14,$-autosize(SP)
- // TODO(kaib): double check we allocate autosize after
- // stack has been split
- // TODO(kaib): add error in case autosize doesn't pack
// TODO(kaib): add more trampolines
// TODO(kaib): put stackguard in register
// TODO(kaib): add support for -K and underflow detection
- p = appendp(p); // load G.stackguard into R1
+ // MOVW g_stackguard(g), R1
+ p = appendp(p);
p->as = AMOVW;
p->from.type = D_OREG;
p->from.reg = REGG;
p->to.type = D_REG;
p->to.reg = 1;
+ // CMP R1, $-autosize(SP)
p = appendp(p);
p->as = ACMP;
p->from.type = D_REG;
@@ -384,42 +383,41 @@
p->from.offset = -autosize;
p->reg = REGSP;
+ // MOVW.LT $args, R2
p = appendp(p);
p->as = AMOVW;
- p->scond = C_SCOND_GE | C_WBIT;
- p->from.type = D_REG;
- p->from.reg = REGLINK;
- p->to.type = D_OREG;
- p->to.offset = -autosize;
- p->to.reg = REGSP;
-
- p = appendp(p);
- p->as = AMOVW;
- p->scond = C_SCOND_LT | C_WBIT;
- p->from.type = D_REG;
- p->from.reg = REGLINK;
- p->to.type = D_OREG;
- p->to.offset = -4;
- p->to.reg = REGSP;
-
- p = appendp(p); // packs args and autosize
- p->as = AMOVW;
p->scond = C_SCOND_LT;
p->from.type = D_CONST;
- // top 8 bits are arg count, lower 24 bits number of 4 byte
- // words
- p->from.offset =
- (curtext->to.offset2 & ~7) << 21 |
- (autosize & ~7) >> 3;
+ p->from.offset = curtext->to.offset2 & ~7;
p->to.type = D_REG;
- p->to.reg = 1;
+ p->to.reg = 2;
+ // MOVW.W.LT R14, R3
+ p = appendp(p);
+ p->as = AMOVW;
+ p->scond = C_SCOND_LT;
+ p->from.type = D_REG;
+ p->from.reg = REGLINK;
+ p->to.type = D_REG;
+ p->to.reg = 3;
+
+ // BL.LT sys·morestackx(SB) // modifies LR
p = appendp(p);
p->as = ABL;
p->scond = C_SCOND_LT;
p->to.type = D_BRANCH;
p->to.sym = symmorestack;
p->cond = pmorestack;
+
+ // MOVW.W R14,$-autosize(SP)
+ p = appendp(p);
+ p->as = AMOVW;
+ p->scond |= C_WBIT;
+ p->from.type = D_REG;
+ p->from.reg = REGLINK;
+ p->to.type = D_OREG;
+ p->to.offset = -autosize;
+ p->to.reg = REGSP;
} else { // > StackBig
// MOVW.W R14,$-4(SP)
// MOVW $(args << 24 | autosize), R1
diff --git a/src/pkg/runtime/Makefile b/src/pkg/runtime/Makefile
index 984c1f5..281dca9 100644
--- a/src/pkg/runtime/Makefile
+++ b/src/pkg/runtime/Makefile
@@ -10,7 +10,10 @@
# Setup CFLAGS. Add -D_64BIT on 64-bit platforms (sorry).
CFLAGS_64=-D_64BIT
-CFLAGS=-I$(GOOS) -I$(GOOS)/$(GOARCH) -wF $(CFLAGS_$(SIZE))
+# TODO(kaib): fix register allocation to honor extern register so we
+# can enable optimizations again.
+CFLAGS_arm=-N
+CFLAGS=-I$(GOOS) -I$(GOOS)/$(GOARCH) -wF $(CFLAGS_$(SIZE)) $(CFLAGS_$(GOARCH))
# Set O to right letter.
O_386=8
@@ -33,6 +36,7 @@
# arm-specific object files
OFILES_arm=\
+ memset.$O\
vlop.$O\
vlrt.$O\
diff --git a/src/pkg/runtime/arm/asm.s b/src/pkg/runtime/arm/asm.s
index 5e68b72..39ac99e 100644
--- a/src/pkg/runtime/arm/asm.s
+++ b/src/pkg/runtime/arm/asm.s
@@ -2,51 +2,55 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-TEXT _rt0_arm(SB),7,$0
+#include "arm/asm.h"
+
+// using frame size $-4 means do not save LR on stack.
+TEXT _rt0_arm(SB),7,$-4
MOVW $setR12(SB), R12
// copy arguments forward on an even stack
- MOVW 0(SP), R0 // argc
- MOVW 4(SP), R1 // argv
- SUB $128, SP // plenty of scratch
- AND $~7, SP
- MOVW R0, 120(SP) // save argc, argv away
- MOVW R1, 124(SP)
+ // use R13 instead of SP to avoid linker rewriting the offsets
+ MOVW 0(R13), R0 // argc
+ MOVW $4(R13), R1 // argv
+ SUB $128, R13 // plenty of scratch
+ AND $~7, R13
+ MOVW R0, 120(R13) // save argc, argv away
+ MOVW R1, 124(R13)
// set up m and g registers
// g is R10, m is R9
- MOVW $g0(SB), R10
- MOVW $m0(SB), R9
+ MOVW $g0(SB), g
+ MOVW $m0(SB), m
// save m->g0 = g0
- MOVW R10, 0(R9)
+ MOVW g, m_g0(m)
// create istack out of the OS stack
- MOVW $(-8192+104)(SP), R0
- MOVW R0, 0(R10) // 0(g) is stack limit (w 104b guard)
- MOVW SP, 4(R10) // 4(g) is base
+ MOVW $(-8192+104)(R13), R0
+ MOVW R0, g_stackguard(g) // (w 104b guard)
+ MOVW R13, g_stackbase(g)
BL emptyfunc(SB) // fault if stack check is wrong
BL check(SB)
// saved argc, argv
- MOVW 120(SP), R0
- MOVW R0, 0(SP)
- MOVW 124(SP), R0
- MOVW R0, 4(SP)
+ MOVW 120(R13), R0
+ MOVW R0, 4(R13)
+ MOVW 124(R13), R1
+ MOVW R1, 8(R13)
BL args(SB)
BL osinit(SB)
BL schedinit(SB)
// create a new goroutine to start program
MOVW $mainstart(SB), R0
- MOVW.W R0, -4(SP)
+ MOVW.W R0, -4(R13)
MOVW $8, R0
- MOVW.W R0, -4(SP)
+ MOVW.W R0, -4(R13)
MOVW $0, R0
- MOVW.W R0, -4(SP) // push $0 as guard
+ MOVW.W R0, -4(R13) // push $0 as guard
BL sys·newproc(SB)
- MOVW $12(SP), SP // pop args and LR
+ MOVW $12(R13), R13 // pop args and LR
// start this M
BL mstart(SB)
@@ -70,73 +74,106 @@
// TODO(kaib): remove these once linker works properly
// pull in dummy dependencies
TEXT _dep_dummy(SB),7,$0
- BL sys·morestack(SB)
- BL sys·morestackx(SB)
BL _div(SB)
BL _divu(SB)
BL _mod(SB)
BL _modu(SB)
BL _modu(SB)
-
TEXT breakpoint(SB),7,$0
BL abort(SB)
// BYTE $0xcc
// RET
-// go-routine
-TEXT gogo(SB), 7, $0
- BL abort(SB)
-// MOVL 4(SP), AX // gobuf
-// MOVL 0(AX), SP // restore SP
-// MOVL 4(AX), AX
-// MOVL AX, 0(SP) // put PC on the stack
-// MOVL $1, AX
-// RET
+/*
+ * go-routine
+ */
+// uintptr gosave(Gobuf*)
+// save state in Gobuf; setjmp
TEXT gosave(SB), 7, $0
+ MOVW SP, gobuf_sp(R0)
+ MOVW LR, gobuf_pc(R0)
+ MOVW g, gobuf_g(R0)
+ MOVW $0, R0 // return 0
+ RET
+
+// void gogo(Gobuf*, uintptr)
+// restore state from Gobuf; longjmp
+TEXT gogo(SB), 7, $0
+ MOVW R0, R1 // gobuf
+ MOVW 8(SP), R0 // return 2nd arg
+ MOVW gobuf_g(R1), g
+ MOVW 0(g), R2 // make sure g != nil
+ MOVW gobuf_sp(R1), SP // restore SP
+ MOVW gobuf_pc(R1), PC
+
+// void gogocall(Gobuf*, void (*fn)(void))
+// restore state from Gobuf but then call fn.
+// (call fn, returning to state in Gobuf)
+// TODO(kaib): add R0 to gobuf so it can be restored properly
+// using frame size $-4 means do not save LR on stack.
+TEXT gogocall(SB), 7, $-4
+ MOVW 8(SP), R1 // fn
+ MOVW gobuf_g(R0), g
+ MOVW 0(g), R2 // make sure g != nil
+ MOVW gobuf_sp(R0), SP // restore SP
+ MOVW gobuf_pc(R0), LR
+ MOVW R1, PC
+
+/*
+ * support for morestack
+ */
+
+// Called during function prolog when more stack is needed.
+// R1 frame size
+// R2 arg size
+// R3 prolog's LR
+// using frame size $-4 means do not save LR on stack.
+TEXT sys·morestack(SB),7,$-4
+ // Cannot grow scheduler stack (m->g0).
+ MOVW m_g0(m), R4
+ CMP g, R4
+ BNE 2(PC)
BL abort(SB)
-// MOVL 4(SP), AX // gobuf
-// MOVL SP, 0(AX) // save SP
-// MOVL 0(SP), BX
-// MOVL BX, 4(AX) // save PC
-// MOVL $0, AX // return 0
-// RET
-// support for morestack
+ // Save in m.
+ MOVW R1, m_moreframe(m)
+ MOVW R2, m_moreargs(m)
-// return point when leaving new stack.
-// save R0, jmp to lesstack to switch back
-TEXT retfromnewstack(SB),7,$0
- MOVW R0,12(R9) // m->cret
- B lessstack(SB)
+ // Called from f.
+ // Set m->morebuf to f's caller.
+ MOVW R3, (m_morebuf+gobuf_pc)(m) // f's caller's PC
+ MOVW SP, (m_morebuf+gobuf_sp)(m) // f's caller's SP
+ MOVW g, (m_morebuf+gobuf_g)(m)
-// gogo, returning 2nd arg instead of 1
-TEXT gogoret(SB), 7, $0
- MOVW 8(SP), R0 // return 2nd arg
- MOVW 4(SP), R1 // gobuf
- MOVW 0(R1), SP // restore SP
- MOVW 4(R1), PC // restore PC
+ // Set m->morepc to f's PC.
+ MOVW LR, m_morepc(m)
-TEXT setspgoto(SB), 7, $0
- MOVW 4(SP), R0 // SP
- MOVW 8(SP), R1 // fn to call
- MOVW 12(SP), R2 // fn to return into
- MOVW R2, R14 // restore LR
- MOVW R0, SP
- MOVW R1, PC // goto
+ // Call newstack on m's scheduling stack.
+ MOVW m_g0(m), g
+ MOVW (m_sched+gobuf_sp)(m), SP
+ B newstack(SB)
+
+// Return point when leaving stack.
+// using frame size $-4 means do not save LR on stack.
+TEXT sys·lessstack(SB), 7, $-4
+ // Save return value in m->cret
+ MOVW R0, m_cret(m)
+
+ // Call oldstack on m's scheduling stack.
+ MOVW m_g0(m), g
+ MOVW (m_sched+gobuf_sp)(m), SP
+ B oldstack(SB)
// Optimization to make inline stack splitting code smaller
// R0 is original first argument
-// R1 is arg_num << 24 | autosize >> 3
-TEXT sys·morestackx(SB), 7, $0
- MOVW R0, 4(SP) // Save arg0
- MOVW R1<<8, R2
- MOVW R2>>5, R2
- MOVW R2, 4(R10) // autooffset into g
- MOVW R1>>24, R2
- MOVW R2<<3, R2
- MOVW R2, 8(R10) // argsize into g
+// R2 is argsize
+// R3 is LR for f (f's caller's PC)
+// using frame size $-4 means do not save LR on stack.
+TEXT sys·morestackx(SB), 7, $-4
+ MOVW R0, 0(FP) // Save arg0
+ MOVW $0, R1 // set frame size
B sys·morestack(SB)
// bool cas(int32 *val, int32 old, int32 new)
@@ -180,17 +217,18 @@
// SUBL $5, (SP) // return to CALL again
// JMP AX // but first run the deferred function
-TEXT sys·memclr(SB),7,$0
- BL abort(SB)
-// MOVL 4(SP), DI // arg 1 addr
-// MOVL 8(SP), CX // arg 2 count
-// ADDL $3, CX
-// SHRL $2, CX
-// MOVL $0, AX
-// CLD
-// REP
-// STOSL
-// RET
+TEXT sys·memclr(SB),7,$20
+// R0 = addr and passes implicitly to memset
+ MOVW $0, R1 // c = 0
+ MOVW R1, -16(SP)
+ MOVW 4(FP), R1 // n
+ MOVW R1, -12(SP)
+ MOVW m, -8(SP) // Save m and g
+ MOVW g, -4(SP)
+ BL memset(SB)
+ MOVW -8(SP), m // Restore m and g, memset clobbers them
+ MOVW -4(SP), g
+ RET
TEXT sys·getcallerpc+0(SB),7,$0
BL abort(SB)
diff --git a/src/pkg/runtime/arm/memset.s b/src/pkg/runtime/arm/memset.s
new file mode 100644
index 0000000..cce9453
--- /dev/null
+++ b/src/pkg/runtime/arm/memset.s
@@ -0,0 +1,94 @@
+// Inferno's libkern/memset-arm.s
+// http://code.google.com/p/inferno-os/source/browse/libkern/memset-arm.s
+//
+// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
+// Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved.
+// Portions Copyright 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+TO = 1
+TOE = 2
+N = 3
+TMP = 3 /* N and TMP don't overlap */
+
+// TODO(kaib): memset clobbers R9 and R10 (m and g). This makes the
+// registers unpredictable if (when) memset SIGSEGV's. Fix it by
+// moving the R4-R11 register bank.
+TEXT memset(SB), $0
+ MOVW R0, R(TO)
+ MOVW data+4(FP), R(4)
+ MOVW n+8(FP), R(N)
+
+ ADD R(N), R(TO), R(TOE) /* to end pointer */
+
+ CMP $4, R(N) /* need at least 4 bytes to copy */
+ BLT _1tail
+
+ AND $0xFF, R(4) /* it's a byte */
+ SLL $8, R(4), R(TMP) /* replicate to a word */
+ ORR R(TMP), R(4)
+ SLL $16, R(4), R(TMP)
+ ORR R(TMP), R(4)
+
+_4align: /* align on 4 */
+ AND.S $3, R(TO), R(TMP)
+ BEQ _4aligned
+
+ MOVBU.P R(4), 1(R(TO)) /* implicit write back */
+ B _4align
+
+_4aligned:
+ SUB $31, R(TOE), R(TMP) /* do 32-byte chunks if possible */
+ CMP R(TMP), R(TO)
+ BHS _4tail
+
+ MOVW R4, R5 /* replicate */
+ MOVW R4, R6
+ MOVW R4, R7
+ MOVW R4, R8
+ MOVW R4, R9
+ MOVW R4, R10
+ MOVW R4, R11
+
+_f32loop:
+ CMP R(TMP), R(TO)
+ BHS _4tail
+
+ MOVM.IA.W [R4-R11], (R(TO))
+ B _f32loop
+
+_4tail:
+ SUB $3, R(TOE), R(TMP) /* do remaining words if possible */
+_4loop:
+ CMP R(TMP), R(TO)
+ BHS _1tail
+
+ MOVW.P R(4), 4(R(TO)) /* implicit write back */
+ B _4loop
+
+_1tail:
+ CMP R(TO), R(TOE)
+ BEQ _return
+
+ MOVBU.P R(4), 1(R(TO)) /* implicit write back */
+ B _1tail
+
+_return:
+ RET
diff --git a/src/pkg/runtime/linux/arm/sys.s b/src/pkg/runtime/linux/arm/sys.s
index 25e64a3..c61d08f 100644
--- a/src/pkg/runtime/linux/arm/sys.s
+++ b/src/pkg/runtime/linux/arm/sys.s
@@ -6,23 +6,28 @@
// System calls and other sys.stuff for arm, Linux
//
+#define SYS_BASE 0x00900000
+#define SYS_exit (SYS_BASE + 1)
+#define SYS_write (SYS_BASE + 4)
+#define SYS_mmap2 (SYS_BASE + 192)
+
TEXT write(SB),7,$0
MOVW 8(SP), R1
MOVW 12(SP), R2
- SWI $0x00900004 // syscall write
+ SWI $SYS_write
RET
TEXT exit(SB),7,$0
- SWI $0x00900001 // exit value in R0
-
-TEXT sys·write(SB),7,$0
- MOVW 8(SP), R1
- MOVW 12(SP), R2
- SWI $0x00900004 // syscall write
- RET
+ // Exit value already in R0
+ SWI $SYS_exit
TEXT sys·mmap(SB),7,$0
- BL abort(SB)
+ MOVW 4(FP), R1
+ MOVW 8(FP), R2
+ MOVW 12(FP), R3
+ MOVW 16(FP), R4
+ MOVW 20(FP), R5
+ SWI $SYS_mmap2
RET
// int64 futex(int32 *uaddr, int32 op, int32 val,