src/runtime/sys_windows_arm.s - go - Git at Google

 // Copyright 2018 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 #include "go_asm.h"
 #include "go_tls.h"
 #include "textflag.h"

 // void runtime·asmstdcall(void *c);
 TEXT runtime·asmstdcall(SB),NOSPLIT|NOFRAME,$0
 	MOVM.DB.W [R4, R5, R14], (R13)	// push {r4, r5, lr}
 	MOVW	R0, R4			// put libcall * in r4
 	MOVW	R13, R5			// save stack pointer in r5

 	// SetLastError(0)
 	MOVW	$0, R0
 	MRC	15, 0, R1, C13, C0, 2
 	MOVW	R0, 0x34(R1)

 	MOVW	8(R4), R12	// libcall->args

 	// Do we have more than 4 arguments?
 	MOVW	4(R4), R0	// libcall->n
 	SUB.S	$4, R0, R2
 	BLE	loadregs

 	// Reserve stack space for remaining args
 	SUB	R2<<2, R13
 	BIC	$0x7, R13	// alignment for ABI

 	// R0: count of arguments
 	// R1:
 	// R2: loop counter, from 0 to (n-4)
 	// R3: scratch
 	// R4: pointer to libcall struct
 	// R12: libcall->args
 	MOVW	$0, R2
 stackargs:
 	ADD	$4, R2, R3		// r3 = args[4 + i]
 	MOVW	R3<<2(R12), R3
 	MOVW	R3, R2<<2(R13)		// stack[i] = r3

 	ADD	$1, R2			// i++
 	SUB	$4, R0, R3		// while (i < (n - 4))
 	CMP	R3, R2
 	BLT	stackargs

 loadregs:
 	CMP	$3, R0
 	MOVW.GT 12(R12), R3

 	CMP	$2, R0
 	MOVW.GT 8(R12), R2

 	CMP	$1, R0
 	MOVW.GT 4(R12), R1

 	CMP	$0, R0
 	MOVW.GT 0(R12), R0

 	BIC	$0x7, R13		// alignment for ABI
 	MOVW	0(R4), R12		// branch to libcall->fn
 	BL	(R12)

 	MOVW	R5, R13			// free stack space
 	MOVW	R0, 12(R4)		// save return value to libcall->r1
 	MOVW	R1, 16(R4)

 	// GetLastError
 	MRC	15, 0, R1, C13, C0, 2
 	MOVW	0x34(R1), R0
 	MOVW	R0, 20(R4)		// store in libcall->err

 	MOVM.IA.W (R13), [R4, R5, R15]

 TEXT runtime·badsignal2(SB),NOSPLIT|NOFRAME,$0
 	MOVM.DB.W [R4, R14], (R13)	// push {r4, lr}
 	MOVW	R13, R4			// save original stack pointer
 	SUB	$8, R13			// space for 2 variables
 	BIC	$0x7, R13		// alignment for ABI

 	// stderr
 	MOVW	runtime·_GetStdHandle(SB), R1
 	MOVW	$-12, R0
 	BL	(R1)

 	MOVW	$runtime·badsignalmsg(SB), R1	// lpBuffer
 	MOVW	$runtime·badsignallen(SB), R2	// lpNumberOfBytesToWrite
 	MOVW	(R2), R2
 	ADD	$0x4, R13, R3		// lpNumberOfBytesWritten
 	MOVW	$0, R12			// lpOverlapped
 	MOVW	R12, (R13)

 	MOVW	runtime·_WriteFile(SB), R12
 	BL	(R12)

 	MOVW	R4, R13			// restore SP
 	MOVM.IA.W (R13), [R4, R15]	// pop {r4, pc}

 TEXT runtime·getlasterror(SB),NOSPLIT,$0
 	MRC	15, 0, R0, C13, C0, 2
 	MOVW	0x34(R0), R0
 	MOVW	R0, ret+0(FP)
 	RET

 TEXT runtime·setlasterror(SB),NOSPLIT|NOFRAME,$0
 	MRC	15, 0, R1, C13, C0, 2
 	MOVW	R0, 0x34(R1)
 	RET

 // Called by Windows as a Vectored Exception Handler (VEH).
 // First argument is pointer to struct containing
 // exception record and context pointers.
 // Handler function is stored in R1
 // Return 0 for 'not handled', -1 for handled.
 // int32_t sigtramp(
 //     PEXCEPTION_POINTERS ExceptionInfo,
 //     func *GoExceptionHandler);
 TEXT runtime·sigtramp(SB),NOSPLIT|NOFRAME,$0
 	MOVM.DB.W [R0, R4-R11, R14], (R13)	// push {r0, r4-r11, lr} (SP-=40)
 	SUB	$(8+20), R13		// reserve space for g, sp, and
 					// parameters/retval to go call

 	MOVW	R0, R6			// Save param0
 	MOVW	R1, R7			// Save param1

 	BL      runtime·load_g(SB)
 	CMP	$0, g			// is there a current g?
 	BL.EQ	runtime·badsignal2(SB)

 	// save g and SP in case of stack switch
 	MOVW	R13, 24(R13)
 	MOVW	g, 20(R13)

 	// do we need to switch to the g0 stack?
 	MOVW	g, R5			// R5 = g
 	MOVW	g_m(R5), R2		// R2 = m
 	MOVW	m_g0(R2), R4		// R4 = g0
 	CMP	R5, R4			// if curg == g0
 	BEQ	g0

 	// switch to g0 stack
 	MOVW	R4, g				// g = g0
 	MOVW	(g_sched+gobuf_sp)(g), R3	// R3 = g->gobuf.sp
 	BL      runtime·save_g(SB)

 	// traceback will think that we've done PUSH and SUB
 	// on this stack, so subtract them here to match.
 	// (we need room for sighandler arguments anyway).
 	// and re-save old SP for restoring later.
 	SUB	$(40+8+20), R3
 	MOVW	R13, 24(R3)		// save old stack pointer
 	MOVW	R3, R13			// switch stack

 g0:
 	MOVW	0(R6), R2	// R2 = ExceptionPointers->ExceptionRecord
 	MOVW	4(R6), R3	// R3 = ExceptionPointers->ContextRecord

 	// make it look like mstart called us on g0, to stop traceback
 	MOVW    $runtime·mstart(SB), R4

 	MOVW	R4, 0(R13)	// Save link register for traceback
 	MOVW	R2, 4(R13)	// Move arg0 (ExceptionRecord) into position
 	MOVW	R3, 8(R13)	// Move arg1 (ContextRecord) into position
 	MOVW	R5, 12(R13)	// Move arg2 (original g) into position
 	BL	(R7)		// Call the go routine
 	MOVW	16(R13), R4	// Fetch return value from stack

 	// Compute the value of the g0 stack pointer after deallocating
 	// this frame, then allocating 8 bytes. We may need to store
 	// the resume SP and PC on the g0 stack to work around
 	// control flow guard when we resume from the exception.
 	ADD	$(40+20), R13, R12

 	// switch back to original stack and g
 	MOVW	24(R13), R13
 	MOVW	20(R13), g
 	BL      runtime·save_g(SB)

 done:
 	MOVW	R4, R0				// move retval into position
 	ADD	$(8 + 20), R13			// free locals
 	MOVM.IA.W (R13), [R3, R4-R11, R14]	// pop {r3, r4-r11, lr}

 	// if return value is CONTINUE_SEARCH, do not set up control
 	// flow guard workaround
 	CMP	$0, R0
 	BEQ	return

 	// Check if we need to set up the control flow guard workaround.
 	// On Windows/ARM, the stack pointer must lie within system
 	// stack limits when we resume from exception.
 	// Store the resume SP and PC on the g0 stack,
 	// and return to returntramp on the g0 stack. returntramp
 	// pops the saved PC and SP from the g0 stack, resuming execution
 	// at the desired location.
 	// If returntramp has already been set up by a previous exception
 	// handler, don't clobber the stored SP and PC on the stack.
 	MOVW	4(R3), R3			// PEXCEPTION_POINTERS->Context
 	MOVW	0x40(R3), R2			// load PC from context record
 	MOVW	$runtime·returntramp(SB), R1
 	CMP	R1, R2
 	B.EQ	return				// do not clobber saved SP/PC

 	// Save resume SP and PC on g0 stack
 	MOVW	0x38(R3), R2			// load SP from context record
 	MOVW	R2, 0(R12)			// Store resume SP on g0 stack
 	MOVW	0x40(R3), R2			// load PC from context record
 	MOVW	R2, 4(R12)			// Store resume PC on g0 stack

 	// Set up context record to return to returntramp on g0 stack
 	MOVW	R12, 0x38(R3)			// save g0 stack pointer
 						// in context record
 	MOVW	$runtime·returntramp(SB), R2	// save resume address
 	MOVW	R2, 0x40(R3)			// in context record

 return:
 	B	(R14)				// return

 //
 // Trampoline to resume execution from exception handler.
 // This is part of the control flow guard workaround.
 // It switches stacks and jumps to the continuation address.
 //
 TEXT runtime·returntramp(SB),NOSPLIT|NOFRAME,$0
 	MOVM.IA	(R13), [R13, R15]		// ldm sp, [sp, pc]

 TEXT runtime·exceptiontramp(SB),NOSPLIT|NOFRAME,$0
 	MOVW	$runtime·exceptionhandler(SB), R1
 	B	runtime·sigtramp(SB)

 TEXT runtime·firstcontinuetramp(SB),NOSPLIT|NOFRAME,$0
 	MOVW	$runtime·firstcontinuehandler(SB), R1
 	B	runtime·sigtramp(SB)

 TEXT runtime·lastcontinuetramp(SB),NOSPLIT|NOFRAME,$0
 	MOVW	$runtime·lastcontinuehandler(SB), R1
 	B	runtime·sigtramp(SB)

 TEXT runtime·ctrlhandler(SB),NOSPLIT|NOFRAME,$0
 	MOVW	$runtime·ctrlhandler1(SB), R1
 	B	runtime·externalthreadhandler(SB)

 TEXT runtime·profileloop(SB),NOSPLIT|NOFRAME,$0
 	MOVW	$runtime·profileloop1(SB), R1
 	B	runtime·externalthreadhandler(SB)

 // int32 externalthreadhandler(uint32 arg, int (*func)(uint32))
 // stack layout:
 //   +----------------+
 //   | callee-save    |
 //   | registers      |
 //   +----------------+
 //   | m              |
 //   +----------------+
 // 20| g              |
 //   +----------------+
 // 16| func ptr (r1)  |
 //   +----------------+
 // 12| argument (r0)  |
 //---+----------------+
 // 8 | param1         |
 //   +----------------+
 // 4 | param0         |
 //   +----------------+
 // 0 | retval         |
 //   +----------------+
 //
 TEXT runtime·externalthreadhandler(SB),NOSPLIT|NOFRAME,$0
 	MOVM.DB.W [R4-R11, R14], (R13)		// push {r4-r11, lr}
 	SUB	$(m__size + g__size + 20), R13	// space for locals
 	MOVW	R0, 12(R13)
 	MOVW	R1, 16(R13)

 	// zero out m and g structures
 	ADD	$20, R13, R0			// compute pointer to g
 	MOVW	R0, 4(R13)
 	MOVW	$(m__size + g__size), R0
 	MOVW	R0, 8(R13)
 	BL	runtime·memclrNoHeapPointers(SB)

 	// initialize m and g structures
 	ADD	$20, R13, R2			// R2 = g
 	ADD	$(20 + g__size), R13, R3	// R3 = m
 	MOVW	R2, m_g0(R3)			// m->g0 = g
 	MOVW	R3, g_m(R2)			// g->m = m
 	MOVW	R2, m_curg(R3)			// m->curg = g

 	MOVW	R2, g
 	BL	runtime·save_g(SB)

 	// set up stackguard stuff
 	MOVW	R13, R0
 	MOVW	R0, g_stack+stack_hi(g)
 	SUB	$(32*1024), R0
 	MOVW	R0, (g_stack+stack_lo)(g)
 	MOVW	R0, g_stackguard0(g)
 	MOVW	R0, g_stackguard1(g)

 	// move argument into position and call function
 	MOVW	12(R13), R0
 	MOVW	R0, 4(R13)
 	MOVW	16(R13), R1
 	BL	(R1)

 	// clear g
 	MOVW	$0, g
 	BL	runtime·save_g(SB)

 	MOVW	0(R13), R0			// load return value
 	ADD	$(m__size + g__size + 20), R13	// free locals
 	MOVM.IA.W (R13), [R4-R11, R15]		// pop {r4-r11, pc}

 GLOBL runtime·cbctxts(SB), NOPTR, $4

 TEXT runtime·callbackasm1(SB),NOSPLIT|NOFRAME,$0
 	MOVM.DB.W [R4-R11, R14], (R13)	// push {r4-r11, lr}
 	SUB	$36, R13		// space for locals

 	// save callback arguments to stack. We currently support up to 4 arguments
 	ADD	$16, R13, R4
 	MOVM.IA	[R0-R3], (R4)

 	// load cbctxts[i]. The trampoline in zcallback_windows.s puts the callback
 	// index in R12
 	MOVW	runtime·cbctxts(SB), R4
 	MOVW	R12<<2(R4), R4		// R4 holds pointer to wincallbackcontext structure

 	// extract callback context
 	MOVW	wincallbackcontext_argsize(R4), R5
 	MOVW	wincallbackcontext_gobody(R4), R4

 	// we currently support up to 4 arguments
 	CMP	$(4 * 4), R5
 	BL.GT	runtime·abort(SB)

 	// extend argsize by size of return value
 	ADD	$4, R5

 	// Build 'type args struct'
 	MOVW	R4, 4(R13)		// fn
 	ADD	$16, R13, R0		// arg (points to r0-r3, ret on stack)
 	MOVW	R0, 8(R13)
 	MOVW	R5, 12(R13)		// argsize

 	BL	runtime·load_g(SB)
 	BL	runtime·cgocallback_gofunc(SB)

 	ADD	$16, R13, R0		// load arg
 	MOVW	12(R13), R1		// load argsize
 	SUB	$4, R1			// offset to return value
 	MOVW	R1<<0(R0), R0		// load return value

 	ADD	$36, R13		// free locals
 	MOVM.IA.W (R13), [R4-R11, R15]	// pop {r4-r11, pc}

 // uint32 tstart_stdcall(M *newm);
 TEXT runtime·tstart_stdcall(SB),NOSPLIT|NOFRAME,$0
 	MOVM.DB.W [R4-R11, R14], (R13)		// push {r4-r11, lr}

 	MOVW	m_g0(R0), g
 	MOVW	R0, g_m(g)
 	BL	runtime·save_g(SB)

 	// do per-thread TLS initialization
 	BL	runtime·init_thread_tls(SB)

 	// Layout new m scheduler stack on os stack.
 	MOVW	R13, R0
 	MOVW	R0, g_stack+stack_hi(g)
 	SUB	$(64*1024), R0
 	MOVW	R0, (g_stack+stack_lo)(g)
 	MOVW	R0, g_stackguard0(g)
 	MOVW	R0, g_stackguard1(g)

 	BL	runtime·emptyfunc(SB)	// fault if stack check is wrong
 	BL	runtime·mstart(SB)

 	// Exit the thread.
 	MOVW	$0, R0
 	MOVM.IA.W (R13), [R4-R11, R15]		// pop {r4-r11, pc}

 // onosstack calls fn on OS stack.
 // adapted from asm_arm.s : systemstack
 // func onosstack(fn unsafe.Pointer, arg uint32)
 TEXT runtime·onosstack(SB),NOSPLIT,$0
 	MOVW	fn+0(FP), R5		// R5 = fn
 	MOVW	arg+4(FP), R6		// R6 = arg

 	// This function can be called when there is no g,
 	// for example, when we are handling a callback on a non-go thread.
 	// In this case we're already on the system stack.
 	CMP	$0, g
 	BEQ	noswitch

 	MOVW	g_m(g), R1		// R1 = m

 	MOVW	m_gsignal(R1), R2	// R2 = gsignal
 	CMP	g, R2
 	B.EQ	noswitch

 	MOVW	m_g0(R1), R2		// R2 = g0
 	CMP	g, R2
 	B.EQ	noswitch

 	MOVW	m_curg(R1), R3
 	CMP	g, R3
 	B.EQ	switch

 	// Bad: g is not gsignal, not g0, not curg. What is it?
 	// Hide call from linker nosplit analysis.
 	MOVW	$runtime·badsystemstack(SB), R0
 	BL	(R0)
 	B	runtime·abort(SB)

 switch:
 	// save our state in g->sched. Pretend to
 	// be systemstack_switch if the G stack is scanned.
 	MOVW	$runtime·systemstack_switch(SB), R3
 	ADD	$4, R3, R3 // get past push {lr}
 	MOVW	R3, (g_sched+gobuf_pc)(g)
 	MOVW	R13, (g_sched+gobuf_sp)(g)
 	MOVW	LR, (g_sched+gobuf_lr)(g)
 	MOVW	g, (g_sched+gobuf_g)(g)

 	// switch to g0
 	MOVW	R2, g
 	MOVW	(g_sched+gobuf_sp)(R2), R3
 	// make it look like mstart called systemstack on g0, to stop traceback
 	SUB	$4, R3, R3
 	MOVW	$runtime·mstart(SB), R4
 	MOVW	R4, 0(R3)
 	MOVW	R3, R13

 	// call target function
 	MOVW	R6, R0		// arg
 	BL	(R5)

 	// switch back to g
 	MOVW	g_m(g), R1
 	MOVW	m_curg(R1), g
 	MOVW	(g_sched+gobuf_sp)(g), R13
 	MOVW	$0, R3
 	MOVW	R3, (g_sched+gobuf_sp)(g)
 	RET

 noswitch:
 	// Using a tail call here cleans up tracebacks since we won't stop
 	// at an intermediate systemstack.
 	MOVW.P	4(R13), R14	// restore LR
 	MOVW	R6, R0		// arg
 	B	(R5)

 // Runs on OS stack. Duration (in 100ns units) is in R0.
 TEXT runtime·usleep2(SB),NOSPLIT|NOFRAME,$0
 	MOVM.DB.W [R4, R14], (R13)	// push {r4, lr}
 	MOVW	R13, R4			// Save SP
 	SUB	$8, R13			// R13 = R13 - 8
 	BIC	$0x7, R13		// Align SP for ABI
 	RSB	$0, R0, R3		// R3 = -R0
 	MOVW	$0, R1			// R1 = FALSE (alertable)
 	MOVW	$-1, R0			// R0 = handle
 	MOVW	R13, R2			// R2 = pTime
 	MOVW	R3, 0(R2)		// time_lo
 	MOVW	R0, 4(R2)		// time_hi
 	MOVW	runtime·_NtWaitForSingleObject(SB), R3
 	BL	(R3)
 	MOVW	R4, R13			// Restore SP
 	MOVM.IA.W (R13), [R4, R15]	// pop {R4, pc}

 // Runs on OS stack.
 TEXT runtime·switchtothread(SB),NOSPLIT|NOFRAME,$0
 	MOVM.DB.W [R4, R14], (R13)  	// push {R4, lr}
 	MOVW    R13, R4
 	BIC	$0x7, R13		// alignment for ABI
 	MOVW	runtime·_SwitchToThread(SB), R0
 	BL	(R0)
 	MOVW 	R4, R13			// restore stack pointer
 	MOVM.IA.W (R13), [R4, R15]	// pop {R4, pc}

 TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
 	B	runtime·armPublicationBarrier(SB)

 // never called (cgo not supported)
 TEXT runtime·read_tls_fallback(SB),NOSPLIT|NOFRAME,$0
 	MOVW	$0xabcd, R0
 	MOVW	R0, (R0)
 	RET

 // See http://www.dcl.hpi.uni-potsdam.de/research/WRK/2007/08/getting-os-information-the-kuser_shared_data-structure/
 // Must read hi1, then lo, then hi2. The snapshot is valid if hi1 == hi2.
 #define _INTERRUPT_TIME 0x7ffe0008
 #define _SYSTEM_TIME 0x7ffe0014
 #define time_lo 0
 #define time_hi1 4
 #define time_hi2 8

 TEXT runtime·nanotime(SB),NOSPLIT,$0-8
 	MOVW	$0, R0
 	MOVB	runtime·useQPCTime(SB), R0
 	CMP	$0, R0
 	BNE	useQPC
 	MOVW	$_INTERRUPT_TIME, R3
 loop:
 	MOVW	time_hi1(R3), R1
 	MOVW	time_lo(R3), R0
 	MOVW	time_hi2(R3), R2
 	CMP	R1, R2
 	BNE	loop

 	// wintime = R1:R0, multiply by 100
 	MOVW	$100, R2
 	MULLU	R0, R2, (R4, R3)    // R4:R3 = R1:R0 * R2
 	MULA	R1, R2, R4, R4

 	// wintime*100 = R4:R3
 	MOVW	R3, ret_lo+0(FP)
 	MOVW	R4, ret_hi+4(FP)
 	RET
 useQPC:
 	B	runtime·nanotimeQPC(SB)		// tail call
 	RET

 TEXT time·now(SB),NOSPLIT,$0-20
 	MOVW    $0, R0
 	MOVB    runtime·useQPCTime(SB), R0
 	CMP	$0, R0
 	BNE	useQPC
 	MOVW	$_INTERRUPT_TIME, R3
 loop:
 	MOVW	time_hi1(R3), R1
 	MOVW	time_lo(R3), R0
 	MOVW	time_hi2(R3), R2
 	CMP	R1, R2
 	BNE	loop

 	// wintime = R1:R0, multiply by 100
 	MOVW	$100, R2
 	MULLU	R0, R2, (R4, R3)    // R4:R3 = R1:R0 * R2
 	MULA	R1, R2, R4, R4

 	// wintime*100 = R4:R3
 	MOVW	R3, mono+12(FP)
 	MOVW	R4, mono+16(FP)

 	MOVW	$_SYSTEM_TIME, R3
 wall:
 	MOVW	time_hi1(R3), R1
 	MOVW	time_lo(R3), R0
 	MOVW	time_hi2(R3), R2
 	CMP	R1, R2
 	BNE	wall

 	// w = R1:R0 in 100ns untis
 	// convert to Unix epoch (but still 100ns units)
 	#define delta 116444736000000000
 	SUB.S   $(delta & 0xFFFFFFFF), R0
 	SBC     $(delta >> 32), R1

 	// Convert to nSec
 	MOVW    $100, R2
 	MULLU   R0, R2, (R4, R3)    // R4:R3 = R1:R0 * R2
 	MULA    R1, R2, R4, R4
 	// w = R2:R1 in nSec
 	MOVW    R3, R1	      // R4:R3 -> R2:R1
 	MOVW    R4, R2

 	// multiply nanoseconds by reciprocal of 10**9 (scaled by 2**61)
 	// to get seconds (96 bit scaled result)
 	MOVW	$0x89705f41, R3		// 2**61 * 10**-9
 	MULLU	R1,R3,(R6,R5)		// R7:R6:R5 = R2:R1 * R3
 	MOVW	$0,R7
 	MULALU	R2,R3,(R7,R6)

 	// unscale by discarding low 32 bits, shifting the rest by 29
 	MOVW	R6>>29,R6		// R7:R6 = (R7:R6:R5 >> 61)
 	ORR	R7<<3,R6
 	MOVW	R7>>29,R7

 	// subtract (10**9 * sec) from nsec to get nanosecond remainder
 	MOVW	$1000000000, R5	// 10**9
 	MULLU	R6,R5,(R9,R8)   // R9:R8 = R7:R6 * R5
 	MULA	R7,R5,R9,R9
 	SUB.S	R8,R1		// R2:R1 -= R9:R8
 	SBC	R9,R2

 	// because reciprocal was a truncated repeating fraction, quotient
 	// may be slightly too small -- adjust to make remainder < 10**9
 	CMP	R5,R1	// if remainder > 10**9
 	SUB.HS	R5,R1   //    remainder -= 10**9
 	ADD.HS	$1,R6	//    sec += 1

 	MOVW	R6,sec_lo+0(FP)
 	MOVW	R7,sec_hi+4(FP)
 	MOVW	R1,nsec+8(FP)
 	RET
 useQPC:
 	B	runtime·nanotimeQPC(SB)		// tail call
 	RET

 // save_g saves the g register (R10) into thread local memory
 // so that we can call externally compiled
 // ARM code that will overwrite those registers.
 // NOTE: runtime.gogo assumes that R1 is preserved by this function.
 //       runtime.mcall assumes this function only clobbers R0 and R11.
 // Returns with g in R0.
 // Save the value in the _TEB->TlsSlots array.
 // Effectively implements TlsSetValue().
 // tls_g stores the TLS slot allocated TlsAlloc().
 TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0
 	MRC	15, 0, R0, C13, C0, 2
 	ADD	$0xe10, R0
 	MOVW 	$runtime·tls_g(SB), R11
 	MOVW	(R11), R11
 	MOVW	g, R11<<2(R0)
 	MOVW	g, R0	// preserve R0 across call to setg<>
 	RET

 // load_g loads the g register from thread-local memory,
 // for use after calling externally compiled
 // ARM code that overwrote those registers.
 // Get the value from the _TEB->TlsSlots array.
 // Effectively implements TlsGetValue().
 TEXT runtime·load_g(SB),NOSPLIT|NOFRAME,$0
 	MRC	15, 0, R0, C13, C0, 2
 	ADD	$0xe10, R0
 	MOVW 	$runtime·tls_g(SB), g
 	MOVW	(g), g
 	MOVW	g<<2(R0), g
 	RET

 // This is called from rt0_go, which runs on the system stack
 // using the initial stack allocated by the OS.
 // It calls back into standard C using the BL below.
 // To do that, the stack pointer must be 8-byte-aligned.
 TEXT runtime·_initcgo(SB),NOSPLIT|NOFRAME,$0
 	MOVM.DB.W [R4, R14], (R13)	// push {r4, lr}

 	// Ensure stack is 8-byte aligned before calling C code
 	MOVW	R13, R4
 	BIC	$0x7, R13

 	// Allocate a TLS slot to hold g across calls to external code
 	MOVW 	$runtime·_TlsAlloc(SB), R0
 	MOVW	(R0), R0
 	BL	(R0)

 	// Assert that slot is less than 64 so we can use _TEB->TlsSlots
 	CMP	$64, R0
 	MOVW	$runtime·abort(SB), R1
 	BL.GE	(R1)

 	// Save Slot into tls_g
 	MOVW 	$runtime·tls_g(SB), R1
 	MOVW	R0, (R1)

 	BL	runtime·init_thread_tls(SB)

 	MOVW	R4, R13
 	MOVM.IA.W (R13), [R4, R15]	// pop {r4, pc}

 // void init_thread_tls()
 //
 // Does per-thread TLS initialization. Saves a pointer to the TLS slot
 // holding G, in the current m.
 //
 //     g->m->tls[0] = &_TEB->TlsSlots[tls_g]
 //
 // The purpose of this is to enable the profiling handler to get the
 // current g associated with the thread. We cannot use m->curg because curg
 // only holds the current user g. If the thread is executing system code or
 // external code, m->curg will be NULL. The thread's TLS slot always holds
 // the current g, so save a reference to this location so the profiling
 // handler can get the real g from the thread's m.
 //
 // Clobbers R0-R3
 TEXT runtime·init_thread_tls(SB),NOSPLIT|NOFRAME,$0
 	// compute &_TEB->TlsSlots[tls_g]
 	MRC	15, 0, R0, C13, C0, 2
 	ADD	$0xe10, R0
 	MOVW 	$runtime·tls_g(SB), R1
 	MOVW	(R1), R1
 	MOVW	R1<<2, R1
 	ADD	R1, R0

 	// save in g->m->tls[0]
 	MOVW	g_m(g), R1
 	MOVW	R0, m_tls(R1)
 	RET

 // Holds the TLS Slot, which was allocated by TlsAlloc()
 GLOBL runtime·tls_g+0(SB), NOPTR, $4
	// Copyright 2018 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	#include "go_asm.h"
	#include "go_tls.h"
	#include "textflag.h"

	// void runtime·asmstdcall(void *c);
	TEXT runtime·asmstdcall(SB),NOSPLIT\|NOFRAME,$0
	MOVM.DB.W [R4, R5, R14], (R13) // push {r4, r5, lr}
	MOVW R0, R4 // put libcall * in r4
	MOVW R13, R5 // save stack pointer in r5

	// SetLastError(0)
	MOVW $0, R0
	MRC 15, 0, R1, C13, C0, 2
	MOVW R0, 0x34(R1)

	MOVW 8(R4), R12 // libcall->args

	// Do we have more than 4 arguments?
	MOVW 4(R4), R0 // libcall->n
	SUB.S $4, R0, R2
	BLE loadregs

	// Reserve stack space for remaining args
	SUB R2<<2, R13
	BIC $0x7, R13 // alignment for ABI

	// R0: count of arguments
	// R1:
	// R2: loop counter, from 0 to (n-4)
	// R3: scratch
	// R4: pointer to libcall struct
	// R12: libcall->args
	MOVW $0, R2
	stackargs:
	ADD $4, R2, R3 // r3 = args[4 + i]
	MOVW R3<<2(R12), R3
	MOVW R3, R2<<2(R13) // stack[i] = r3

	ADD $1, R2 // i++
	SUB $4, R0, R3 // while (i < (n - 4))
	CMP R3, R2
	BLT stackargs

	loadregs:
	CMP $3, R0
	MOVW.GT 12(R12), R3

	CMP $2, R0
	MOVW.GT 8(R12), R2

	CMP $1, R0
	MOVW.GT 4(R12), R1

	CMP $0, R0
	MOVW.GT 0(R12), R0

	BIC $0x7, R13 // alignment for ABI
	MOVW 0(R4), R12 // branch to libcall->fn
	BL (R12)

	MOVW R5, R13 // free stack space
	MOVW R0, 12(R4) // save return value to libcall->r1
	MOVW R1, 16(R4)

	// GetLastError
	MRC 15, 0, R1, C13, C0, 2
	MOVW 0x34(R1), R0
	MOVW R0, 20(R4) // store in libcall->err

	MOVM.IA.W (R13), [R4, R5, R15]

	TEXT runtime·badsignal2(SB),NOSPLIT\|NOFRAME,$0
	MOVM.DB.W [R4, R14], (R13) // push {r4, lr}
	MOVW R13, R4 // save original stack pointer
	SUB $8, R13 // space for 2 variables
	BIC $0x7, R13 // alignment for ABI

	// stderr
	MOVW runtime·_GetStdHandle(SB), R1
	MOVW $-12, R0
	BL (R1)

	MOVW $runtime·badsignalmsg(SB), R1 // lpBuffer
	MOVW $runtime·badsignallen(SB), R2 // lpNumberOfBytesToWrite
	MOVW (R2), R2
	ADD $0x4, R13, R3 // lpNumberOfBytesWritten
	MOVW $0, R12 // lpOverlapped
	MOVW R12, (R13)

	MOVW runtime·_WriteFile(SB), R12
	BL (R12)

	MOVW R4, R13 // restore SP
	MOVM.IA.W (R13), [R4, R15] // pop {r4, pc}

	TEXT runtime·getlasterror(SB),NOSPLIT,$0
	MRC 15, 0, R0, C13, C0, 2
	MOVW 0x34(R0), R0
	MOVW R0, ret+0(FP)
	RET

	TEXT runtime·setlasterror(SB),NOSPLIT\|NOFRAME,$0
	MRC 15, 0, R1, C13, C0, 2
	MOVW R0, 0x34(R1)
	RET

	// Called by Windows as a Vectored Exception Handler (VEH).
	// First argument is pointer to struct containing
	// exception record and context pointers.
	// Handler function is stored in R1
	// Return 0 for 'not handled', -1 for handled.
	// int32_t sigtramp(
	// PEXCEPTION_POINTERS ExceptionInfo,
	// func *GoExceptionHandler);
	TEXT runtime·sigtramp(SB),NOSPLIT\|NOFRAME,$0
	MOVM.DB.W [R0, R4-R11, R14], (R13) // push {r0, r4-r11, lr} (SP-=40)
	SUB $(8+20), R13 // reserve space for g, sp, and
	// parameters/retval to go call

	MOVW R0, R6 // Save param0
	MOVW R1, R7 // Save param1

	BL runtime·load_g(SB)
	CMP $0, g // is there a current g?
	BL.EQ runtime·badsignal2(SB)

	// save g and SP in case of stack switch
	MOVW R13, 24(R13)
	MOVW g, 20(R13)

	// do we need to switch to the g0 stack?
	MOVW g, R5 // R5 = g
	MOVW g_m(R5), R2 // R2 = m
	MOVW m_g0(R2), R4 // R4 = g0
	CMP R5, R4 // if curg == g0
	BEQ g0

	// switch to g0 stack
	MOVW R4, g // g = g0
	MOVW (g_sched+gobuf_sp)(g), R3 // R3 = g->gobuf.sp
	BL runtime·save_g(SB)

	// traceback will think that we've done PUSH and SUB
	// on this stack, so subtract them here to match.
	// (we need room for sighandler arguments anyway).
	// and re-save old SP for restoring later.
	SUB $(40+8+20), R3
	MOVW R13, 24(R3) // save old stack pointer
	MOVW R3, R13 // switch stack

	g0:
	MOVW 0(R6), R2 // R2 = ExceptionPointers->ExceptionRecord
	MOVW 4(R6), R3 // R3 = ExceptionPointers->ContextRecord

	// make it look like mstart called us on g0, to stop traceback
	MOVW $runtime·mstart(SB), R4

	MOVW R4, 0(R13) // Save link register for traceback
	MOVW R2, 4(R13) // Move arg0 (ExceptionRecord) into position
	MOVW R3, 8(R13) // Move arg1 (ContextRecord) into position
	MOVW R5, 12(R13) // Move arg2 (original g) into position
	BL (R7) // Call the go routine
	MOVW 16(R13), R4 // Fetch return value from stack

	// Compute the value of the g0 stack pointer after deallocating
	// this frame, then allocating 8 bytes. We may need to store
	// the resume SP and PC on the g0 stack to work around
	// control flow guard when we resume from the exception.
	ADD $(40+20), R13, R12

	// switch back to original stack and g
	MOVW 24(R13), R13
	MOVW 20(R13), g
	BL runtime·save_g(SB)

	done:
	MOVW R4, R0 // move retval into position
	ADD $(8 + 20), R13 // free locals
	MOVM.IA.W (R13), [R3, R4-R11, R14] // pop {r3, r4-r11, lr}

	// if return value is CONTINUE_SEARCH, do not set up control
	// flow guard workaround
	CMP $0, R0
	BEQ return

	// Check if we need to set up the control flow guard workaround.
	// On Windows/ARM, the stack pointer must lie within system
	// stack limits when we resume from exception.
	// Store the resume SP and PC on the g0 stack,
	// and return to returntramp on the g0 stack. returntramp
	// pops the saved PC and SP from the g0 stack, resuming execution
	// at the desired location.
	// If returntramp has already been set up by a previous exception
	// handler, don't clobber the stored SP and PC on the stack.
	MOVW 4(R3), R3 // PEXCEPTION_POINTERS->Context
	MOVW 0x40(R3), R2 // load PC from context record
	MOVW $runtime·returntramp(SB), R1
	CMP R1, R2
	B.EQ return // do not clobber saved SP/PC

	// Save resume SP and PC on g0 stack
	MOVW 0x38(R3), R2 // load SP from context record
	MOVW R2, 0(R12) // Store resume SP on g0 stack
	MOVW 0x40(R3), R2 // load PC from context record
	MOVW R2, 4(R12) // Store resume PC on g0 stack

	// Set up context record to return to returntramp on g0 stack
	MOVW R12, 0x38(R3) // save g0 stack pointer
	// in context record
	MOVW $runtime·returntramp(SB), R2 // save resume address
	MOVW R2, 0x40(R3) // in context record

	return:
	B (R14) // return

	//
	// Trampoline to resume execution from exception handler.
	// This is part of the control flow guard workaround.
	// It switches stacks and jumps to the continuation address.
	//
	TEXT runtime·returntramp(SB),NOSPLIT\|NOFRAME,$0
	MOVM.IA (R13), [R13, R15] // ldm sp, [sp, pc]

	TEXT runtime·exceptiontramp(SB),NOSPLIT\|NOFRAME,$0
	MOVW $runtime·exceptionhandler(SB), R1
	B runtime·sigtramp(SB)

	TEXT runtime·firstcontinuetramp(SB),NOSPLIT\|NOFRAME,$0
	MOVW $runtime·firstcontinuehandler(SB), R1
	B runtime·sigtramp(SB)

	TEXT runtime·lastcontinuetramp(SB),NOSPLIT\|NOFRAME,$0
	MOVW $runtime·lastcontinuehandler(SB), R1
	B runtime·sigtramp(SB)

	TEXT runtime·ctrlhandler(SB),NOSPLIT\|NOFRAME,$0
	MOVW $runtime·ctrlhandler1(SB), R1
	B runtime·externalthreadhandler(SB)

	TEXT runtime·profileloop(SB),NOSPLIT\|NOFRAME,$0
	MOVW $runtime·profileloop1(SB), R1
	B runtime·externalthreadhandler(SB)

	// int32 externalthreadhandler(uint32 arg, int (*func)(uint32))
	// stack layout:
	// +----------------+
	// \| callee-save \|
	// \| registers \|
	// +----------------+
	// \| m \|
	// +----------------+
	// 20\| g \|
	// +----------------+
	// 16\| func ptr (r1) \|
	// +----------------+
	// 12\| argument (r0) \|
	//---+----------------+
	// 8 \| param1 \|
	// +----------------+
	// 4 \| param0 \|
	// +----------------+
	// 0 \| retval \|
	// +----------------+
	//
	TEXT runtime·externalthreadhandler(SB),NOSPLIT\|NOFRAME,$0
	MOVM.DB.W [R4-R11, R14], (R13) // push {r4-r11, lr}
	SUB $(m__size + g__size + 20), R13 // space for locals
	MOVW R0, 12(R13)
	MOVW R1, 16(R13)

	// zero out m and g structures
	ADD $20, R13, R0 // compute pointer to g
	MOVW R0, 4(R13)
	MOVW $(m__size + g__size), R0
	MOVW R0, 8(R13)
	BL runtime·memclrNoHeapPointers(SB)

	// initialize m and g structures
	ADD $20, R13, R2 // R2 = g
	ADD $(20 + g__size), R13, R3 // R3 = m
	MOVW R2, m_g0(R3) // m->g0 = g
	MOVW R3, g_m(R2) // g->m = m
	MOVW R2, m_curg(R3) // m->curg = g

	MOVW R2, g
	BL runtime·save_g(SB)

	// set up stackguard stuff
	MOVW R13, R0
	MOVW R0, g_stack+stack_hi(g)
	SUB $(32*1024), R0
	MOVW R0, (g_stack+stack_lo)(g)
	MOVW R0, g_stackguard0(g)
	MOVW R0, g_stackguard1(g)

	// move argument into position and call function
	MOVW 12(R13), R0
	MOVW R0, 4(R13)
	MOVW 16(R13), R1
	BL (R1)

	// clear g
	MOVW $0, g
	BL runtime·save_g(SB)

	MOVW 0(R13), R0 // load return value
	ADD $(m__size + g__size + 20), R13 // free locals
	MOVM.IA.W (R13), [R4-R11, R15] // pop {r4-r11, pc}

	GLOBL runtime·cbctxts(SB), NOPTR, $4

	TEXT runtime·callbackasm1(SB),NOSPLIT\|NOFRAME,$0
	MOVM.DB.W [R4-R11, R14], (R13) // push {r4-r11, lr}
	SUB $36, R13 // space for locals

	// save callback arguments to stack. We currently support up to 4 arguments
	ADD $16, R13, R4
	MOVM.IA [R0-R3], (R4)

	// load cbctxts[i]. The trampoline in zcallback_windows.s puts the callback
	// index in R12
	MOVW runtime·cbctxts(SB), R4
	MOVW R12<<2(R4), R4 // R4 holds pointer to wincallbackcontext structure

	// extract callback context
	MOVW wincallbackcontext_argsize(R4), R5
	MOVW wincallbackcontext_gobody(R4), R4

	// we currently support up to 4 arguments
	CMP $(4 * 4), R5
	BL.GT runtime·abort(SB)

	// extend argsize by size of return value
	ADD $4, R5

	// Build 'type args struct'
	MOVW R4, 4(R13) // fn
	ADD $16, R13, R0 // arg (points to r0-r3, ret on stack)
	MOVW R0, 8(R13)
	MOVW R5, 12(R13) // argsize

	BL runtime·load_g(SB)
	BL runtime·cgocallback_gofunc(SB)

	ADD $16, R13, R0 // load arg
	MOVW 12(R13), R1 // load argsize
	SUB $4, R1 // offset to return value
	MOVW R1<<0(R0), R0 // load return value

	ADD $36, R13 // free locals
	MOVM.IA.W (R13), [R4-R11, R15] // pop {r4-r11, pc}

	// uint32 tstart_stdcall(M *newm);
	TEXT runtime·tstart_stdcall(SB),NOSPLIT\|NOFRAME,$0
	MOVM.DB.W [R4-R11, R14], (R13) // push {r4-r11, lr}

	MOVW m_g0(R0), g
	MOVW R0, g_m(g)
	BL runtime·save_g(SB)

	// do per-thread TLS initialization
	BL runtime·init_thread_tls(SB)

	// Layout new m scheduler stack on os stack.
	MOVW R13, R0
	MOVW R0, g_stack+stack_hi(g)
	SUB $(64*1024), R0
	MOVW R0, (g_stack+stack_lo)(g)
	MOVW R0, g_stackguard0(g)
	MOVW R0, g_stackguard1(g)

	BL runtime·emptyfunc(SB) // fault if stack check is wrong
	BL runtime·mstart(SB)

	// Exit the thread.
	MOVW $0, R0
	MOVM.IA.W (R13), [R4-R11, R15] // pop {r4-r11, pc}

	// onosstack calls fn on OS stack.
	// adapted from asm_arm.s : systemstack
	// func onosstack(fn unsafe.Pointer, arg uint32)
	TEXT runtime·onosstack(SB),NOSPLIT,$0
	MOVW fn+0(FP), R5 // R5 = fn
	MOVW arg+4(FP), R6 // R6 = arg

	// This function can be called when there is no g,
	// for example, when we are handling a callback on a non-go thread.
	// In this case we're already on the system stack.
	CMP $0, g
	BEQ noswitch

	MOVW g_m(g), R1 // R1 = m

	MOVW m_gsignal(R1), R2 // R2 = gsignal
	CMP g, R2
	B.EQ noswitch

	MOVW m_g0(R1), R2 // R2 = g0
	CMP g, R2
	B.EQ noswitch

	MOVW m_curg(R1), R3
	CMP g, R3
	B.EQ switch

	// Bad: g is not gsignal, not g0, not curg. What is it?
	// Hide call from linker nosplit analysis.
	MOVW $runtime·badsystemstack(SB), R0
	BL (R0)
	B runtime·abort(SB)

	switch:
	// save our state in g->sched. Pretend to
	// be systemstack_switch if the G stack is scanned.
	MOVW $runtime·systemstack_switch(SB), R3
	ADD $4, R3, R3 // get past push {lr}
	MOVW R3, (g_sched+gobuf_pc)(g)
	MOVW R13, (g_sched+gobuf_sp)(g)
	MOVW LR, (g_sched+gobuf_lr)(g)
	MOVW g, (g_sched+gobuf_g)(g)

	// switch to g0
	MOVW R2, g
	MOVW (g_sched+gobuf_sp)(R2), R3
	// make it look like mstart called systemstack on g0, to stop traceback
	SUB $4, R3, R3
	MOVW $runtime·mstart(SB), R4
	MOVW R4, 0(R3)
	MOVW R3, R13

	// call target function
	MOVW R6, R0 // arg
	BL (R5)

	// switch back to g
	MOVW g_m(g), R1
	MOVW m_curg(R1), g
	MOVW (g_sched+gobuf_sp)(g), R13
	MOVW $0, R3
	MOVW R3, (g_sched+gobuf_sp)(g)
	RET

	noswitch:
	// Using a tail call here cleans up tracebacks since we won't stop
	// at an intermediate systemstack.
	MOVW.P 4(R13), R14 // restore LR
	MOVW R6, R0 // arg
	B (R5)

	// Runs on OS stack. Duration (in 100ns units) is in R0.
	TEXT runtime·usleep2(SB),NOSPLIT\|NOFRAME,$0
	MOVM.DB.W [R4, R14], (R13) // push {r4, lr}
	MOVW R13, R4 // Save SP
	SUB $8, R13 // R13 = R13 - 8
	BIC $0x7, R13 // Align SP for ABI
	RSB $0, R0, R3 // R3 = -R0
	MOVW $0, R1 // R1 = FALSE (alertable)
	MOVW $-1, R0 // R0 = handle
	MOVW R13, R2 // R2 = pTime
	MOVW R3, 0(R2) // time_lo
	MOVW R0, 4(R2) // time_hi
	MOVW runtime·_NtWaitForSingleObject(SB), R3
	BL (R3)
	MOVW R4, R13 // Restore SP
	MOVM.IA.W (R13), [R4, R15] // pop {R4, pc}

	// Runs on OS stack.
	TEXT runtime·switchtothread(SB),NOSPLIT\|NOFRAME,$0
	MOVM.DB.W [R4, R14], (R13) // push {R4, lr}
	MOVW R13, R4
	BIC $0x7, R13 // alignment for ABI
	MOVW runtime·_SwitchToThread(SB), R0
	BL (R0)
	MOVW R4, R13 // restore stack pointer
	MOVM.IA.W (R13), [R4, R15] // pop {R4, pc}

	TEXT ·publicationBarrier(SB),NOSPLIT\|NOFRAME,$0-0
	B runtime·armPublicationBarrier(SB)

	// never called (cgo not supported)
	TEXT runtime·read_tls_fallback(SB),NOSPLIT\|NOFRAME,$0
	MOVW $0xabcd, R0
	MOVW R0, (R0)
	RET

	// See http://www.dcl.hpi.uni-potsdam.de/research/WRK/2007/08/getting-os-information-the-kuser_shared_data-structure/
	// Must read hi1, then lo, then hi2. The snapshot is valid if hi1 == hi2.
	#define _INTERRUPT_TIME 0x7ffe0008
	#define _SYSTEM_TIME 0x7ffe0014
	#define time_lo 0
	#define time_hi1 4
	#define time_hi2 8

	TEXT runtime·nanotime(SB),NOSPLIT,$0-8
	MOVW $0, R0
	MOVB runtime·useQPCTime(SB), R0
	CMP $0, R0
	BNE useQPC
	MOVW $_INTERRUPT_TIME, R3
	loop:
	MOVW time_hi1(R3), R1
	MOVW time_lo(R3), R0
	MOVW time_hi2(R3), R2
	CMP R1, R2
	BNE loop

	// wintime = R1:R0, multiply by 100
	MOVW $100, R2
	MULLU R0, R2, (R4, R3) // R4:R3 = R1:R0 * R2
	MULA R1, R2, R4, R4

	// wintime*100 = R4:R3
	MOVW R3, ret_lo+0(FP)
	MOVW R4, ret_hi+4(FP)
	RET
	useQPC:
	B runtime·nanotimeQPC(SB) // tail call
	RET

	TEXT time·now(SB),NOSPLIT,$0-20
	MOVW $0, R0
	MOVB runtime·useQPCTime(SB), R0
	CMP $0, R0
	BNE useQPC
	MOVW $_INTERRUPT_TIME, R3
	loop:
	MOVW time_hi1(R3), R1
	MOVW time_lo(R3), R0
	MOVW time_hi2(R3), R2
	CMP R1, R2
	BNE loop

	// wintime = R1:R0, multiply by 100
	MOVW $100, R2
	MULLU R0, R2, (R4, R3) // R4:R3 = R1:R0 * R2
	MULA R1, R2, R4, R4

	// wintime*100 = R4:R3
	MOVW R3, mono+12(FP)
	MOVW R4, mono+16(FP)

	MOVW $_SYSTEM_TIME, R3
	wall:
	MOVW time_hi1(R3), R1
	MOVW time_lo(R3), R0
	MOVW time_hi2(R3), R2
	CMP R1, R2
	BNE wall

	// w = R1:R0 in 100ns untis
	// convert to Unix epoch (but still 100ns units)
	#define delta 116444736000000000
	SUB.S $(delta & 0xFFFFFFFF), R0
	SBC $(delta >> 32), R1

	// Convert to nSec
	MOVW $100, R2
	MULLU R0, R2, (R4, R3) // R4:R3 = R1:R0 * R2
	MULA R1, R2, R4, R4
	// w = R2:R1 in nSec
	MOVW R3, R1 // R4:R3 -> R2:R1
	MOVW R4, R2

	// multiply nanoseconds by reciprocal of 109 (scaled by 261)
	// to get seconds (96 bit scaled result)
	MOVW $0x89705f41, R3 // 2*61 10**-9
	MULLU R1,R3,(R6,R5) // R7:R6:R5 = R2:R1 * R3
	MOVW $0,R7
	MULALU R2,R3,(R7,R6)

	// unscale by discarding low 32 bits, shifting the rest by 29
	MOVW R6>>29,R6 // R7:R6 = (R7:R6:R5 >> 61)
	ORR R7<<3,R6
	MOVW R7>>29,R7

	// subtract (10*9 sec) from nsec to get nanosecond remainder
	MOVW $1000000000, R5 // 10**9
	MULLU R6,R5,(R9,R8) // R9:R8 = R7:R6 * R5
	MULA R7,R5,R9,R9
	SUB.S R8,R1 // R2:R1 -= R9:R8
	SBC R9,R2

	// because reciprocal was a truncated repeating fraction, quotient
	// may be slightly too small -- adjust to make remainder < 10**9
	CMP R5,R1 // if remainder > 10**9
	SUB.HS R5,R1 // remainder -= 10**9
	ADD.HS $1,R6 // sec += 1

	MOVW R6,sec_lo+0(FP)
	MOVW R7,sec_hi+4(FP)
	MOVW R1,nsec+8(FP)
	RET
	useQPC:
	B runtime·nanotimeQPC(SB) // tail call
	RET

	// save_g saves the g register (R10) into thread local memory
	// so that we can call externally compiled
	// ARM code that will overwrite those registers.
	// NOTE: runtime.gogo assumes that R1 is preserved by this function.
	// runtime.mcall assumes this function only clobbers R0 and R11.
	// Returns with g in R0.
	// Save the value in the _TEB->TlsSlots array.
	// Effectively implements TlsSetValue().
	// tls_g stores the TLS slot allocated TlsAlloc().
	TEXT runtime·save_g(SB),NOSPLIT\|NOFRAME,$0
	MRC 15, 0, R0, C13, C0, 2
	ADD $0xe10, R0
	MOVW $runtime·tls_g(SB), R11
	MOVW (R11), R11
	MOVW g, R11<<2(R0)
	MOVW g, R0 // preserve R0 across call to setg<>
	RET

	// load_g loads the g register from thread-local memory,
	// for use after calling externally compiled
	// ARM code that overwrote those registers.
	// Get the value from the _TEB->TlsSlots array.
	// Effectively implements TlsGetValue().
	TEXT runtime·load_g(SB),NOSPLIT\|NOFRAME,$0
	MRC 15, 0, R0, C13, C0, 2
	ADD $0xe10, R0
	MOVW $runtime·tls_g(SB), g
	MOVW (g), g
	MOVW g<<2(R0), g
	RET

	// This is called from rt0_go, which runs on the system stack
	// using the initial stack allocated by the OS.
	// It calls back into standard C using the BL below.
	// To do that, the stack pointer must be 8-byte-aligned.
	TEXT runtime·_initcgo(SB),NOSPLIT\|NOFRAME,$0
	MOVM.DB.W [R4, R14], (R13) // push {r4, lr}

	// Ensure stack is 8-byte aligned before calling C code
	MOVW R13, R4
	BIC $0x7, R13

	// Allocate a TLS slot to hold g across calls to external code
	MOVW $runtime·_TlsAlloc(SB), R0
	MOVW (R0), R0
	BL (R0)

	// Assert that slot is less than 64 so we can use _TEB->TlsSlots
	CMP $64, R0
	MOVW $runtime·abort(SB), R1
	BL.GE (R1)

	// Save Slot into tls_g
	MOVW $runtime·tls_g(SB), R1
	MOVW R0, (R1)

	BL runtime·init_thread_tls(SB)

	MOVW R4, R13
	MOVM.IA.W (R13), [R4, R15] // pop {r4, pc}

	// void init_thread_tls()
	//
	// Does per-thread TLS initialization. Saves a pointer to the TLS slot
	// holding G, in the current m.
	//
	// g->m->tls[0] = &_TEB->TlsSlots[tls_g]
	//
	// The purpose of this is to enable the profiling handler to get the
	// current g associated with the thread. We cannot use m->curg because curg
	// only holds the current user g. If the thread is executing system code or
	// external code, m->curg will be NULL. The thread's TLS slot always holds
	// the current g, so save a reference to this location so the profiling
	// handler can get the real g from the thread's m.
	//
	// Clobbers R0-R3
	TEXT runtime·init_thread_tls(SB),NOSPLIT\|NOFRAME,$0
	// compute &_TEB->TlsSlots[tls_g]
	MRC 15, 0, R0, C13, C0, 2
	ADD $0xe10, R0
	MOVW $runtime·tls_g(SB), R1
	MOVW (R1), R1
	MOVW R1<<2, R1
	ADD R1, R0

	// save in g->m->tls[0]
	MOVW g_m(g), R1
	MOVW R0, m_tls(R1)
	RET

	// Holds the TLS Slot, which was allocated by TlsAlloc()
	GLOBL runtime·tls_g+0(SB), NOPTR, $4