blob: 6408da42e96d9e3506604cef1311c831539819ee [file] [log] [blame]
Rob Pike8e82a672008-06-30 11:50:36 -07001// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
Russ Cox55889402011-12-19 15:51:13 -05005#include "zasm_GOOS_GOARCH.h"
Rob Pike8e82a672008-06-30 11:50:36 -07006
Russ Cox68b42552010-11-04 14:00:19 -04007TEXT _rt0_amd64(SB),7,$-8
Rob Pike8e82a672008-06-30 11:50:36 -07008 // copy arguments forward on an even stack
Russ Cox36b414f2013-03-06 15:03:04 -05009 MOVQ DI, AX // argc
10 MOVQ SI, BX // argv
Rob Pike8e82a672008-06-30 11:50:36 -070011 SUBQ $(4*8+7), SP // 2args 2auto
Ian Lance Taylora4f8d362010-04-09 14:15:15 -070012 ANDQ $~15, SP
Rob Pike8e82a672008-06-30 11:50:36 -070013 MOVQ AX, 16(SP)
14 MOVQ BX, 24(SP)
Dmitriy Vyukov428062d2011-12-07 16:53:17 +030015
16 // create istack out of the given (operating system) stack.
Russ Coxf8d49b52013-02-28 16:24:38 -050017 // _cgo_init may update stackguard.
Dmitriy Vyukov428062d2011-12-07 16:53:17 +030018 MOVQ $runtime·g0(SB), DI
Alex Brainman8d6958f2012-01-20 12:59:44 +110019 LEAQ (-64*1024+104)(SP), BX
Dmitriy Vyukov428062d2011-12-07 16:53:17 +030020 MOVQ BX, g_stackguard(DI)
Dmitriy Vyukovf5becf42013-06-03 12:28:24 +040021 MOVQ BX, g_stackguard0(DI)
Dmitriy Vyukov428062d2011-12-07 16:53:17 +030022 MOVQ SP, g_stackbase(DI)
Rob Pike8e82a672008-06-30 11:50:36 -070023
Keith Randalla5d40242013-03-12 10:47:44 -070024 // find out information about the processor we're on
25 MOVQ $0, AX
26 CPUID
27 CMPQ AX, $0
28 JE nocpuinfo
29 MOVQ $1, AX
30 CPUID
31 MOVL CX, runtime·cpuid_ecx(SB)
32 MOVL DX, runtime·cpuid_edx(SB)
33nocpuinfo:
34
Russ Coxf8d49b52013-02-28 16:24:38 -050035 // if there is an _cgo_init, call it.
36 MOVQ _cgo_init(SB), AX
Ian Lance Taylora4f8d362010-04-09 14:15:15 -070037 TESTQ AX, AX
Russ Coxe473f422010-08-04 17:50:22 -070038 JZ needtls
Alex Brainman8d6958f2012-01-20 12:59:44 +110039 // g0 already in DI
40 MOVQ DI, CX // Win64 uses CX for first parameter
Russ Cox6a70f9d2013-03-25 18:14:02 -040041 MOVQ $setmg_gcc<>(SB), SI
Alex Brainman8d6958f2012-01-20 12:59:44 +110042 CALL AX
Dmitriy Vyukovf5becf42013-06-03 12:28:24 +040043 // update stackguard after _cgo_init
44 MOVQ $runtime·g0(SB), CX
45 MOVQ g_stackguard0(CX), AX
46 MOVQ AX, g_stackguard(CX)
Wei Guangjing9f636592011-07-19 10:47:33 -040047 CMPL runtime·iswindows(SB), $0
48 JEQ ok
Ian Lance Taylora4f8d362010-04-09 14:15:15 -070049
Russ Coxe473f422010-08-04 17:50:22 -070050needtls:
Akshat Kumara72bebf2012-08-31 13:21:13 -040051 // skip TLS setup on Plan 9
52 CMPL runtime·isplan9(SB), $1
53 JEQ ok
54
Russ Cox68b42552010-11-04 14:00:19 -040055 LEAQ runtime·tls0(SB), DI
56 CALL runtime·settls(SB)
Russ Coxe473f422010-08-04 17:50:22 -070057
58 // store through it, to make sure it works
59 get_tls(BX)
60 MOVQ $0x123, g(BX)
Russ Cox68b42552010-11-04 14:00:19 -040061 MOVQ runtime·tls0(SB), AX
Russ Coxe473f422010-08-04 17:50:22 -070062 CMPQ AX, $0x123
63 JEQ 2(PC)
64 MOVL AX, 0 // abort
65ok:
66 // set the per-goroutine and per-mach "registers"
67 get_tls(BX)
Russ Cox68b42552010-11-04 14:00:19 -040068 LEAQ runtime·g0(SB), CX
Russ Coxe473f422010-08-04 17:50:22 -070069 MOVQ CX, g(BX)
Russ Cox68b42552010-11-04 14:00:19 -040070 LEAQ runtime·m0(SB), AX
Russ Coxe473f422010-08-04 17:50:22 -070071 MOVQ AX, m(BX)
72
73 // save m->g0 = g0
74 MOVQ CX, m_g0(AX)
Rob Pike8e82a672008-06-30 11:50:36 -070075
Ken Thompson8f53bc02008-12-15 15:07:35 -080076 CLD // convention is D is always left cleared
Russ Cox68b42552010-11-04 14:00:19 -040077 CALL runtime·check(SB)
Rob Pike8e82a672008-06-30 11:50:36 -070078
Rob Pike8e82a672008-06-30 11:50:36 -070079 MOVL 16(SP), AX // copy argc
80 MOVL AX, 0(SP)
81 MOVQ 24(SP), AX // copy argv
82 MOVQ AX, 8(SP)
Russ Cox68b42552010-11-04 14:00:19 -040083 CALL runtime·args(SB)
84 CALL runtime·osinit(SB)
Keith Randalla5d40242013-03-12 10:47:44 -070085 CALL runtime·hashinit(SB)
Russ Cox68b42552010-11-04 14:00:19 -040086 CALL runtime·schedinit(SB)
Russ Coxf7f63292008-08-05 14:21:42 -070087
Ken Thompson751ce3a2008-07-11 19:16:39 -070088 // create a new goroutine to start program
Russ Cox1903ad72013-02-21 17:01:13 -050089 PUSHQ $runtime·main·f(SB) // entry
Russ Cox7343e032009-06-17 15:12:16 -070090 PUSHQ $0 // arg size
Russ Cox68b42552010-11-04 14:00:19 -040091 CALL runtime·newproc(SB)
Russ Coxebd1eef2008-09-22 13:47:59 -070092 POPQ AX
93 POPQ AX
Russ Cox79e1db22008-12-04 08:30:54 -080094
Russ Coxebd1eef2008-09-22 13:47:59 -070095 // start this M
Russ Cox68b42552010-11-04 14:00:19 -040096 CALL runtime·mstart(SB)
Rob Pike8e82a672008-06-30 11:50:36 -070097
Russ Cox36aa7d42012-03-08 14:03:56 -050098 MOVL $0xf1, 0xf1 // crash
Rob Pike8e82a672008-06-30 11:50:36 -070099 RET
100
Russ Cox1903ad72013-02-21 17:01:13 -0500101DATA runtime·main·f+0(SB)/8,$runtime·main(SB)
102GLOBL runtime·main·f(SB),8,$8
103
Russ Cox68b42552010-11-04 14:00:19 -0400104TEXT runtime·breakpoint(SB),7,$0
Ken Thompson751ce3a2008-07-11 19:16:39 -0700105 BYTE $0xcc
Rob Pike8e82a672008-06-30 11:50:36 -0700106 RET
107
Russ Cox1707a992012-02-14 01:23:15 -0500108TEXT runtime·asminit(SB),7,$0
109 // No per-thread init.
110 RET
111
Ken Thompson751ce3a2008-07-11 19:16:39 -0700112/*
113 * go-routine
114 */
Rob Piked3204ef2008-06-30 14:39:47 -0700115
Russ Coxf9ca3b52011-03-07 10:37:42 -0500116// void gosave(Gobuf*)
Russ Cox7343e032009-06-17 15:12:16 -0700117// save state in Gobuf; setjmp
Russ Cox68b42552010-11-04 14:00:19 -0400118TEXT runtime·gosave(SB), 7, $0
Ken Thompson751ce3a2008-07-11 19:16:39 -0700119 MOVQ 8(SP), AX // gobuf
Russ Cox7343e032009-06-17 15:12:16 -0700120 LEAQ 8(SP), BX // caller's SP
121 MOVQ BX, gobuf_sp(AX)
122 MOVQ 0(SP), BX // caller's PC
123 MOVQ BX, gobuf_pc(AX)
Russ Coxe473f422010-08-04 17:50:22 -0700124 get_tls(CX)
125 MOVQ g(CX), BX
126 MOVQ BX, gobuf_g(AX)
Ken Thompson751ce3a2008-07-11 19:16:39 -0700127 RET
128
Russ Cox7343e032009-06-17 15:12:16 -0700129// void gogo(Gobuf*, uintptr)
130// restore state from Gobuf; longjmp
Russ Cox68b42552010-11-04 14:00:19 -0400131TEXT runtime·gogo(SB), 7, $0
Russ Cox7343e032009-06-17 15:12:16 -0700132 MOVQ 16(SP), AX // return 2nd arg
133 MOVQ 8(SP), BX // gobuf
Russ Coxe473f422010-08-04 17:50:22 -0700134 MOVQ gobuf_g(BX), DX
135 MOVQ 0(DX), CX // make sure g != nil
136 get_tls(CX)
137 MOVQ DX, g(CX)
Russ Cox7343e032009-06-17 15:12:16 -0700138 MOVQ gobuf_sp(BX), SP // restore SP
139 MOVQ gobuf_pc(BX), BX
140 JMP BX
141
Russ Cox6066fdc2013-02-22 10:47:54 -0500142// void gogocall(Gobuf*, void (*fn)(void), uintptr r0)
Russ Cox7343e032009-06-17 15:12:16 -0700143// restore state from Gobuf but then call fn.
144// (call fn, returning to state in Gobuf)
Russ Cox68b42552010-11-04 14:00:19 -0400145TEXT runtime·gogocall(SB), 7, $0
Russ Cox6066fdc2013-02-22 10:47:54 -0500146 MOVQ 24(SP), DX // context
Russ Cox7343e032009-06-17 15:12:16 -0700147 MOVQ 16(SP), AX // fn
148 MOVQ 8(SP), BX // gobuf
Russ Cox6066fdc2013-02-22 10:47:54 -0500149 MOVQ gobuf_g(BX), DI
Russ Coxe473f422010-08-04 17:50:22 -0700150 get_tls(CX)
Russ Cox6066fdc2013-02-22 10:47:54 -0500151 MOVQ DI, g(CX)
152 MOVQ 0(DI), CX // make sure g != nil
Russ Cox7343e032009-06-17 15:12:16 -0700153 MOVQ gobuf_sp(BX), SP // restore SP
154 MOVQ gobuf_pc(BX), BX
155 PUSHQ BX
156 JMP AX
157 POPQ BX // not reached
158
Russ Cox1903ad72013-02-21 17:01:13 -0500159// void gogocallfn(Gobuf*, FuncVal*)
160// restore state from Gobuf but then call fn.
161// (call fn, returning to state in Gobuf)
162TEXT runtime·gogocallfn(SB), 7, $0
Russ Cox6066fdc2013-02-22 10:47:54 -0500163 MOVQ 16(SP), DX // fn
Russ Cox1903ad72013-02-21 17:01:13 -0500164 MOVQ 8(SP), BX // gobuf
Russ Cox6066fdc2013-02-22 10:47:54 -0500165 MOVQ gobuf_g(BX), AX
Russ Cox1903ad72013-02-21 17:01:13 -0500166 get_tls(CX)
Russ Cox6066fdc2013-02-22 10:47:54 -0500167 MOVQ AX, g(CX)
168 MOVQ 0(AX), CX // make sure g != nil
Russ Cox1903ad72013-02-21 17:01:13 -0500169 MOVQ gobuf_sp(BX), SP // restore SP
170 MOVQ gobuf_pc(BX), BX
171 PUSHQ BX
Russ Cox6066fdc2013-02-22 10:47:54 -0500172 MOVQ 0(DX), BX
Russ Cox1903ad72013-02-21 17:01:13 -0500173 JMP BX
174 POPQ BX // not reached
175
Russ Coxf9ca3b52011-03-07 10:37:42 -0500176// void mcall(void (*fn)(G*))
177// Switch to m->g0's stack, call fn(g).
Russ Cox370276a2011-04-27 23:21:12 -0400178// Fn must never return. It should gogo(&g->sched)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500179// to keep running g.
180TEXT runtime·mcall(SB), 7, $0
181 MOVQ fn+0(FP), DI
182
183 get_tls(CX)
Russ Cox528534c2013-06-05 07:16:53 -0400184 MOVQ g(CX), AX // save state in g->sched
Russ Coxf9ca3b52011-03-07 10:37:42 -0500185 MOVQ 0(SP), BX // caller's PC
186 MOVQ BX, (g_sched+gobuf_pc)(AX)
187 LEAQ 8(SP), BX // caller's SP
188 MOVQ BX, (g_sched+gobuf_sp)(AX)
189 MOVQ AX, (g_sched+gobuf_g)(AX)
190
191 // switch to m->g0 & its stack, call fn
192 MOVQ m(CX), BX
193 MOVQ m_g0(BX), SI
194 CMPQ SI, AX // if g == m->g0 call badmcall
195 JNE 2(PC)
196 CALL runtime·badmcall(SB)
197 MOVQ SI, g(CX) // g = m->g0
Russ Cox528534c2013-06-05 07:16:53 -0400198 MOVQ (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
Russ Coxf9ca3b52011-03-07 10:37:42 -0500199 PUSHQ AX
200 CALL DI
201 POPQ AX
202 CALL runtime·badmcall2(SB)
203 RET
204
Rob Pike2da97832008-07-12 11:30:53 -0700205/*
206 * support for morestack
207 */
208
Russ Cox7343e032009-06-17 15:12:16 -0700209// Called during function prolog when more stack is needed.
Russ Coxe473f422010-08-04 17:50:22 -0700210// Caller has already done get_tls(CX); MOVQ m(CX), BX.
Russ Cox68b42552010-11-04 14:00:19 -0400211TEXT runtime·morestack(SB),7,$0
Russ Coxe473f422010-08-04 17:50:22 -0700212 // Cannot grow scheduler stack (m->g0).
213 MOVQ m_g0(BX), SI
214 CMPQ g(CX), SI
215 JNE 2(PC)
216 INT $3
Russ Cox6066fdc2013-02-22 10:47:54 -0500217
218 MOVQ DX, m_cret(BX)
Russ Coxe473f422010-08-04 17:50:22 -0700219
Russ Cox7343e032009-06-17 15:12:16 -0700220 // Called from f.
221 // Set m->morebuf to f's caller.
222 MOVQ 8(SP), AX // f's caller's PC
Russ Coxe473f422010-08-04 17:50:22 -0700223 MOVQ AX, (m_morebuf+gobuf_pc)(BX)
Russ Cox7343e032009-06-17 15:12:16 -0700224 LEAQ 16(SP), AX // f's caller's SP
Russ Coxe473f422010-08-04 17:50:22 -0700225 MOVQ AX, (m_morebuf+gobuf_sp)(BX)
Russ Cox141a4a12011-01-14 14:05:20 -0500226 MOVQ AX, m_moreargp(BX)
Russ Coxe473f422010-08-04 17:50:22 -0700227 get_tls(CX)
228 MOVQ g(CX), SI
229 MOVQ SI, (m_morebuf+gobuf_g)(BX)
Russ Cox7343e032009-06-17 15:12:16 -0700230
231 // Set m->morepc to f's PC.
232 MOVQ 0(SP), AX
Russ Coxe473f422010-08-04 17:50:22 -0700233 MOVQ AX, m_morepc(BX)
Russ Cox7343e032009-06-17 15:12:16 -0700234
Russ Coxf9ca3b52011-03-07 10:37:42 -0500235 // Call newstack on m->g0's stack.
Russ Coxe473f422010-08-04 17:50:22 -0700236 MOVQ m_g0(BX), BP
237 MOVQ BP, g(CX)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500238 MOVQ (g_sched+gobuf_sp)(BP), SP
Russ Cox68b42552010-11-04 14:00:19 -0400239 CALL runtime·newstack(SB)
Russ Cox7343e032009-06-17 15:12:16 -0700240 MOVQ $0, 0x1003 // crash if newstack returns
241 RET
242
Russ Coxbba278a2009-07-08 18:16:09 -0700243// Called from reflection library. Mimics morestack,
244// reuses stack growth code to create a frame
245// with the desired args running the desired function.
246//
247// func call(fn *byte, arg *byte, argsize uint32).
248TEXT reflect·call(SB), 7, $0
Russ Coxe473f422010-08-04 17:50:22 -0700249 get_tls(CX)
250 MOVQ m(CX), BX
251
Russ Coxbba278a2009-07-08 18:16:09 -0700252 // Save our caller's state as the PC and SP to
253 // restore when returning from f.
254 MOVQ 0(SP), AX // our caller's PC
Russ Coxe473f422010-08-04 17:50:22 -0700255 MOVQ AX, (m_morebuf+gobuf_pc)(BX)
Russ Coxbba278a2009-07-08 18:16:09 -0700256 LEAQ 8(SP), AX // our caller's SP
Russ Coxe473f422010-08-04 17:50:22 -0700257 MOVQ AX, (m_morebuf+gobuf_sp)(BX)
258 MOVQ g(CX), AX
259 MOVQ AX, (m_morebuf+gobuf_g)(BX)
Russ Coxbba278a2009-07-08 18:16:09 -0700260
261 // Set up morestack arguments to call f on a new stack.
Russ Cox83727cc2010-03-29 21:48:22 -0700262 // We set f's frame size to 1, as a hint to newstack
263 // that this is a call from reflect·call.
264 // If it turns out that f needs a larger frame than
265 // the default stack, f's usual stack growth prolog will
266 // allocate a new segment (and recopy the arguments).
Russ Coxbba278a2009-07-08 18:16:09 -0700267 MOVQ 8(SP), AX // fn
Russ Coxe473f422010-08-04 17:50:22 -0700268 MOVQ 16(SP), DX // arg frame
Russ Coxbba278a2009-07-08 18:16:09 -0700269 MOVL 24(SP), CX // arg size
270
Russ Coxe473f422010-08-04 17:50:22 -0700271 MOVQ AX, m_morepc(BX) // f's PC
Russ Cox141a4a12011-01-14 14:05:20 -0500272 MOVQ DX, m_moreargp(BX) // argument frame pointer
273 MOVL CX, m_moreargsize(BX) // f's argument size
274 MOVL $1, m_moreframesize(BX) // f's frame size
Russ Coxbba278a2009-07-08 18:16:09 -0700275
Russ Coxf9ca3b52011-03-07 10:37:42 -0500276 // Call newstack on m->g0's stack.
Russ Coxe473f422010-08-04 17:50:22 -0700277 MOVQ m_g0(BX), BP
278 get_tls(CX)
279 MOVQ BP, g(CX)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500280 MOVQ (g_sched+gobuf_sp)(BP), SP
Russ Cox68b42552010-11-04 14:00:19 -0400281 CALL runtime·newstack(SB)
Russ Coxbba278a2009-07-08 18:16:09 -0700282 MOVQ $0, 0x1103 // crash if newstack returns
283 RET
284
Russ Cox7343e032009-06-17 15:12:16 -0700285// Return point when leaving stack.
Russ Cox68b42552010-11-04 14:00:19 -0400286TEXT runtime·lessstack(SB), 7, $0
Russ Cox7343e032009-06-17 15:12:16 -0700287 // Save return value in m->cret
Russ Coxe473f422010-08-04 17:50:22 -0700288 get_tls(CX)
289 MOVQ m(CX), BX
290 MOVQ AX, m_cret(BX)
Russ Cox7343e032009-06-17 15:12:16 -0700291
Russ Coxf9ca3b52011-03-07 10:37:42 -0500292 // Call oldstack on m->g0's stack.
293 MOVQ m_g0(BX), BP
294 MOVQ BP, g(CX)
295 MOVQ (g_sched+gobuf_sp)(BP), SP
Russ Cox68b42552010-11-04 14:00:19 -0400296 CALL runtime·oldstack(SB)
Russ Cox7343e032009-06-17 15:12:16 -0700297 MOVQ $0, 0x1004 // crash if oldstack returns
298 RET
299
Ken Thompson1ed7f182009-05-01 18:07:33 -0700300// morestack trampolines
Russ Cox68b42552010-11-04 14:00:19 -0400301TEXT runtime·morestack00(SB),7,$0
Russ Coxe473f422010-08-04 17:50:22 -0700302 get_tls(CX)
303 MOVQ m(CX), BX
Ken Thompson1ed7f182009-05-01 18:07:33 -0700304 MOVQ $0, AX
Russ Cox141a4a12011-01-14 14:05:20 -0500305 MOVQ AX, m_moreframesize(BX)
Russ Cox68b42552010-11-04 14:00:19 -0400306 MOVQ $runtime·morestack(SB), AX
Ken Thompson1ed7f182009-05-01 18:07:33 -0700307 JMP AX
308
Russ Cox68b42552010-11-04 14:00:19 -0400309TEXT runtime·morestack01(SB),7,$0
Russ Coxe473f422010-08-04 17:50:22 -0700310 get_tls(CX)
311 MOVQ m(CX), BX
Ken Thompson1ed7f182009-05-01 18:07:33 -0700312 SHLQ $32, AX
Russ Cox141a4a12011-01-14 14:05:20 -0500313 MOVQ AX, m_moreframesize(BX)
Russ Cox68b42552010-11-04 14:00:19 -0400314 MOVQ $runtime·morestack(SB), AX
Ken Thompson1ed7f182009-05-01 18:07:33 -0700315 JMP AX
316
Russ Cox68b42552010-11-04 14:00:19 -0400317TEXT runtime·morestack10(SB),7,$0
Russ Coxe473f422010-08-04 17:50:22 -0700318 get_tls(CX)
319 MOVQ m(CX), BX
Ken Thompson1ed7f182009-05-01 18:07:33 -0700320 MOVLQZX AX, AX
Russ Cox141a4a12011-01-14 14:05:20 -0500321 MOVQ AX, m_moreframesize(BX)
Russ Cox68b42552010-11-04 14:00:19 -0400322 MOVQ $runtime·morestack(SB), AX
Ken Thompson1ed7f182009-05-01 18:07:33 -0700323 JMP AX
324
Russ Cox68b42552010-11-04 14:00:19 -0400325TEXT runtime·morestack11(SB),7,$0
Russ Coxe473f422010-08-04 17:50:22 -0700326 get_tls(CX)
327 MOVQ m(CX), BX
Russ Cox141a4a12011-01-14 14:05:20 -0500328 MOVQ AX, m_moreframesize(BX)
Russ Cox68b42552010-11-04 14:00:19 -0400329 MOVQ $runtime·morestack(SB), AX
Ken Thompson1ed7f182009-05-01 18:07:33 -0700330 JMP AX
331
Ken Thompson5963f592009-05-03 19:09:14 -0700332// subcases of morestack01
333// with const of 8,16,...48
Russ Cox68b42552010-11-04 14:00:19 -0400334TEXT runtime·morestack8(SB),7,$0
Ken Thompson5963f592009-05-03 19:09:14 -0700335 PUSHQ $1
Russ Cox68b42552010-11-04 14:00:19 -0400336 MOVQ $morestack<>(SB), AX
Ken Thompson5963f592009-05-03 19:09:14 -0700337 JMP AX
338
Russ Cox68b42552010-11-04 14:00:19 -0400339TEXT runtime·morestack16(SB),7,$0
Ken Thompson5963f592009-05-03 19:09:14 -0700340 PUSHQ $2
Russ Cox68b42552010-11-04 14:00:19 -0400341 MOVQ $morestack<>(SB), AX
Ken Thompson5963f592009-05-03 19:09:14 -0700342 JMP AX
343
Russ Cox68b42552010-11-04 14:00:19 -0400344TEXT runtime·morestack24(SB),7,$0
Ken Thompson5963f592009-05-03 19:09:14 -0700345 PUSHQ $3
Russ Cox68b42552010-11-04 14:00:19 -0400346 MOVQ $morestack<>(SB), AX
Ken Thompson5963f592009-05-03 19:09:14 -0700347 JMP AX
348
Russ Cox68b42552010-11-04 14:00:19 -0400349TEXT runtime·morestack32(SB),7,$0
Ken Thompson5963f592009-05-03 19:09:14 -0700350 PUSHQ $4
Russ Cox68b42552010-11-04 14:00:19 -0400351 MOVQ $morestack<>(SB), AX
Ken Thompson5963f592009-05-03 19:09:14 -0700352 JMP AX
353
Russ Cox68b42552010-11-04 14:00:19 -0400354TEXT runtime·morestack40(SB),7,$0
Ken Thompson5963f592009-05-03 19:09:14 -0700355 PUSHQ $5
Russ Cox68b42552010-11-04 14:00:19 -0400356 MOVQ $morestack<>(SB), AX
Ken Thompson5963f592009-05-03 19:09:14 -0700357 JMP AX
358
Russ Cox68b42552010-11-04 14:00:19 -0400359TEXT runtime·morestack48(SB),7,$0
Ken Thompson5963f592009-05-03 19:09:14 -0700360 PUSHQ $6
Russ Cox68b42552010-11-04 14:00:19 -0400361 MOVQ $morestack<>(SB), AX
Ken Thompson5963f592009-05-03 19:09:14 -0700362 JMP AX
363
Russ Cox68b42552010-11-04 14:00:19 -0400364TEXT morestack<>(SB),7,$0
Russ Coxe473f422010-08-04 17:50:22 -0700365 get_tls(CX)
366 MOVQ m(CX), BX
Russ Cox7343e032009-06-17 15:12:16 -0700367 POPQ AX
368 SHLQ $35, AX
Russ Cox141a4a12011-01-14 14:05:20 -0500369 MOVQ AX, m_moreframesize(BX)
Russ Cox68b42552010-11-04 14:00:19 -0400370 MOVQ $runtime·morestack(SB), AX
Rob Pike2da97832008-07-12 11:30:53 -0700371 JMP AX
372
Russ Coxd28acc42008-08-04 16:43:49 -0700373// bool cas(int32 *val, int32 old, int32 new)
374// Atomically:
375// if(*val == old){
376// *val = new;
377// return 1;
Ken Thompson1e1cc4e2009-01-27 12:03:53 -0800378// } else
Russ Coxd28acc42008-08-04 16:43:49 -0700379// return 0;
Russ Cox68b42552010-11-04 14:00:19 -0400380TEXT runtime·cas(SB), 7, $0
Russ Coxd28acc42008-08-04 16:43:49 -0700381 MOVQ 8(SP), BX
382 MOVL 16(SP), AX
383 MOVL 20(SP), CX
384 LOCK
385 CMPXCHGL CX, 0(BX)
386 JZ 3(PC)
387 MOVL $0, AX
388 RET
389 MOVL $1, AX
390 RET
Ken Thompson1e1cc4e2009-01-27 12:03:53 -0800391
Dmitriy Vyukov46675712012-04-05 18:47:43 +0400392// bool runtime·cas64(uint64 *val, uint64 *old, uint64 new)
393// Atomically:
394// if(*val == *old){
395// *val = new;
396// return 1;
397// } else {
398// *old = *val
399// return 0;
400// }
401TEXT runtime·cas64(SB), 7, $0
402 MOVQ 8(SP), BX
403 MOVQ 16(SP), BP
404 MOVQ 0(BP), AX
405 MOVQ 24(SP), CX
406 LOCK
407 CMPXCHGQ CX, 0(BX)
408 JNZ cas64_fail
409 MOVL $1, AX
410 RET
411cas64_fail:
412 MOVQ AX, 0(BP)
413 MOVL $0, AX
414 RET
415
Russ Cox67793502011-02-16 13:21:13 -0500416// bool casp(void **val, void *old, void *new)
417// Atomically:
418// if(*val == old){
419// *val = new;
420// return 1;
421// } else
422// return 0;
423TEXT runtime·casp(SB), 7, $0
424 MOVQ 8(SP), BX
425 MOVQ 16(SP), AX
426 MOVQ 24(SP), CX
427 LOCK
428 CMPXCHGQ CX, 0(BX)
429 JZ 3(PC)
430 MOVL $0, AX
431 RET
432 MOVL $1, AX
433 RET
434
Dmitriy Vyukov491aa152011-07-15 11:27:16 -0400435// uint32 xadd(uint32 volatile *val, int32 delta)
436// Atomically:
437// *val += delta;
438// return *val;
439TEXT runtime·xadd(SB), 7, $0
440 MOVQ 8(SP), BX
441 MOVL 16(SP), AX
442 MOVL AX, CX
443 LOCK
444 XADDL AX, 0(BX)
445 ADDL CX, AX
446 RET
447
Dmitriy Vyukov46675712012-04-05 18:47:43 +0400448TEXT runtime·xadd64(SB), 7, $0
449 MOVQ 8(SP), BX
450 MOVQ 16(SP), AX
451 MOVQ AX, CX
452 LOCK
453 XADDQ AX, 0(BX)
454 ADDQ CX, AX
455 RET
456
Dmitriy Vyukov4e5086b2011-07-29 12:44:06 -0400457TEXT runtime·xchg(SB), 7, $0
458 MOVQ 8(SP), BX
459 MOVL 16(SP), AX
460 XCHGL AX, 0(BX)
461 RET
462
Dmitriy Vyukovadd33492013-03-05 09:46:52 +0200463TEXT runtime·xchg64(SB), 7, $0
464 MOVQ 8(SP), BX
465 MOVQ 16(SP), AX
466 XCHGQ AX, 0(BX)
467 RET
468
Dmitriy Vyukov4e5086b2011-07-29 12:44:06 -0400469TEXT runtime·procyield(SB),7,$0
470 MOVL 8(SP), AX
471again:
472 PAUSE
473 SUBL $1, AX
474 JNZ again
475 RET
476
Dmitriy Vyukov86a659c2011-07-13 11:22:41 -0700477TEXT runtime·atomicstorep(SB), 7, $0
478 MOVQ 8(SP), BX
479 MOVQ 16(SP), AX
480 XCHGQ AX, 0(BX)
481 RET
482
Dmitriy Vyukov91f0f182011-07-29 13:47:24 -0400483TEXT runtime·atomicstore(SB), 7, $0
484 MOVQ 8(SP), BX
485 MOVL 16(SP), AX
486 XCHGL AX, 0(BX)
487 RET
488
Dmitriy Vyukov46675712012-04-05 18:47:43 +0400489TEXT runtime·atomicstore64(SB), 7, $0
490 MOVQ 8(SP), BX
491 MOVQ 16(SP), AX
492 XCHGQ AX, 0(BX)
493 RET
494
Russ Coxaa3222d82009-06-02 23:02:12 -0700495// void jmpdefer(fn, sp);
496// called from deferreturn.
Ken Thompson1e1cc4e2009-01-27 12:03:53 -0800497// 1. pop the caller
498// 2. sub 5 bytes from the callers return
499// 3. jmp to the argument
Russ Cox68b42552010-11-04 14:00:19 -0400500TEXT runtime·jmpdefer(SB), 7, $0
Russ Cox6066fdc2013-02-22 10:47:54 -0500501 MOVQ 8(SP), DX // fn
Russ Coxaa3222d82009-06-02 23:02:12 -0700502 MOVQ 16(SP), BX // caller sp
503 LEAQ -8(BX), SP // caller sp after CALL
504 SUBQ $5, (SP) // return to CALL again
Russ Cox6066fdc2013-02-22 10:47:54 -0500505 MOVQ 0(DX), BX
Russ Cox1903ad72013-02-21 17:01:13 -0500506 JMP BX // but first run the deferred function
Russ Cox133a1582009-10-03 10:37:12 -0700507
Russ Coxf9ca3b52011-03-07 10:37:42 -0500508// Dummy function to use in saved gobuf.PC,
509// to match SP pointing at a return address.
510// The gobuf.PC is unused by the contortions here
511// but setting it to return will make the traceback code work.
512TEXT return<>(SB),7,$0
513 RET
514
515// asmcgocall(void(*fn)(void*), void *arg)
Russ Coxadd89dd2009-10-12 10:26:38 -0700516// Call fn(arg) on the scheduler stack,
517// aligned appropriately for the gcc ABI.
Russ Coxf9ca3b52011-03-07 10:37:42 -0500518// See cgocall.c for more details.
519TEXT runtime·asmcgocall(SB),7,$0
520 MOVQ fn+0(FP), AX
521 MOVQ arg+8(FP), BX
522 MOVQ SP, DX
Russ Coxadd89dd2009-10-12 10:26:38 -0700523
524 // Figure out if we need to switch to m->g0 stack.
Russ Coxf9ca3b52011-03-07 10:37:42 -0500525 // We get called to create new OS threads too, and those
526 // come in on the m->g0 stack already.
527 get_tls(CX)
528 MOVQ m(CX), BP
529 MOVQ m_g0(BP), SI
530 MOVQ g(CX), DI
531 CMPQ SI, DI
532 JEQ 6(PC)
533 MOVQ SP, (g_sched+gobuf_sp)(DI)
534 MOVQ $return<>(SB), (g_sched+gobuf_pc)(DI)
535 MOVQ DI, (g_sched+gobuf_g)(DI)
536 MOVQ SI, g(CX)
537 MOVQ (g_sched+gobuf_sp)(SI), SP
Russ Coxadd89dd2009-10-12 10:26:38 -0700538
539 // Now on a scheduling stack (a pthread-created stack).
Alex Brainman7f075ec2012-09-03 12:12:51 +1000540 // Make sure we have enough room for 4 stack-backed fast-call
541 // registers as per windows amd64 calling convention.
542 SUBQ $64, SP
Russ Cox133a1582009-10-03 10:37:12 -0700543 ANDQ $~15, SP // alignment for gcc ABI
Alex Brainman7f075ec2012-09-03 12:12:51 +1000544 MOVQ DI, 48(SP) // save g
545 MOVQ DX, 40(SP) // save SP
Russ Coxf9ca3b52011-03-07 10:37:42 -0500546 MOVQ BX, DI // DI = first argument in AMD64 ABI
Wei Guangjing9f636592011-07-19 10:47:33 -0400547 MOVQ BX, CX // CX = first argument in Win64
Russ Coxf9ca3b52011-03-07 10:37:42 -0500548 CALL AX
Russ Coxadd89dd2009-10-12 10:26:38 -0700549
Russ Coxe473f422010-08-04 17:50:22 -0700550 // Restore registers, g, stack pointer.
Russ Coxf9ca3b52011-03-07 10:37:42 -0500551 get_tls(CX)
Alex Brainman7f075ec2012-09-03 12:12:51 +1000552 MOVQ 48(SP), DI
Russ Coxf9ca3b52011-03-07 10:37:42 -0500553 MOVQ DI, g(CX)
Alex Brainman7f075ec2012-09-03 12:12:51 +1000554 MOVQ 40(SP), SP
Russ Cox133a1582009-10-03 10:37:12 -0700555 RET
556
Russ Coxf9ca3b52011-03-07 10:37:42 -0500557// cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
Russ Cox3d2dfc52013-02-22 16:08:56 -0500558// Turn the fn into a Go func (by taking its address) and call
559// cgocallback_gofunc.
Russ Coxf9ca3b52011-03-07 10:37:42 -0500560TEXT runtime·cgocallback(SB),7,$24
Russ Cox3d2dfc52013-02-22 16:08:56 -0500561 LEAQ fn+0(FP), AX
562 MOVQ AX, 0(SP)
563 MOVQ frame+8(FP), AX
564 MOVQ AX, 8(SP)
565 MOVQ framesize+16(FP), AX
566 MOVQ AX, 16(SP)
567 MOVQ $runtime·cgocallback_gofunc(SB), AX
568 CALL AX
569 RET
570
571// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
572// See cgocall.c for more details.
573TEXT runtime·cgocallback_gofunc(SB),7,$24
Russ Cox6c976392013-02-20 17:48:23 -0500574 // If m is nil, Go did not create the current thread.
575 // Call needm to obtain one for temporary use.
576 // In this case, we're running on the thread stack, so there's
577 // lots of space, but the linker doesn't know. Hide the call from
578 // the linker analysis by using an indirect call through AX.
579 get_tls(CX)
580#ifdef GOOS_windows
581 CMPQ CX, $0
582 JNE 3(PC)
583 PUSHQ $0
584 JMP needm
585#endif
586 MOVQ m(CX), BP
587 PUSHQ BP
588 CMPQ BP, $0
589 JNE havem
590needm:
591 MOVQ $runtime·needm(SB), AX
592 CALL AX
Russ Coxe473f422010-08-04 17:50:22 -0700593 get_tls(CX)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500594 MOVQ m(CX), BP
Russ Cox9b732382012-03-08 12:12:40 -0500595
Russ Cox6c976392013-02-20 17:48:23 -0500596havem:
597 // Now there's a valid m, and we're running on its m->g0.
598 // Save current m->g0->sched.sp on stack and then set it to SP.
599 // Save current sp in m->g0->sched.sp in preparation for
600 // switch back to m->curg stack.
Russ Coxf9ca3b52011-03-07 10:37:42 -0500601 MOVQ m_g0(BP), SI
602 PUSHQ (g_sched+gobuf_sp)(SI)
603 MOVQ SP, (g_sched+gobuf_sp)(SI)
Ian Lance Taylor2d0ff3f2010-04-09 13:30:11 -0700604
Alex Brainman72e83482011-08-18 12:17:09 -0400605 // Switch to m->curg stack and call runtime.cgocallbackg
Russ Coxf9ca3b52011-03-07 10:37:42 -0500606 // with the three arguments. Because we are taking over
607 // the execution of m->curg but *not* resuming what had
Russ Cox528534c2013-06-05 07:16:53 -0400608 // been running, we need to save that information (m->curg->sched)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500609 // so that we can restore it when we're done.
Russ Cox528534c2013-06-05 07:16:53 -0400610 // We can restore m->curg->sched.sp easily, because calling
Alex Brainman72e83482011-08-18 12:17:09 -0400611 // runtime.cgocallbackg leaves SP unchanged upon return.
Russ Cox528534c2013-06-05 07:16:53 -0400612 // To save m->curg->sched.pc, we push it onto the stack.
Russ Coxf9ca3b52011-03-07 10:37:42 -0500613 // This has the added benefit that it looks to the traceback
Alex Brainman72e83482011-08-18 12:17:09 -0400614 // routine like cgocallbackg is going to return to that
615 // PC (because we defined cgocallbackg to have
Russ Coxf9ca3b52011-03-07 10:37:42 -0500616 // a frame size of 24, the same amount that we use below),
617 // so that the traceback will seamlessly trace back into
618 // the earlier calls.
Russ Cox6c976392013-02-20 17:48:23 -0500619 MOVQ fn+0(FP), AX
620 MOVQ frame+8(FP), BX
621 MOVQ framesize+16(FP), DX
622
Russ Coxf9ca3b52011-03-07 10:37:42 -0500623 MOVQ m_curg(BP), SI
624 MOVQ SI, g(CX)
625 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
Ian Lance Taylor2d0ff3f2010-04-09 13:30:11 -0700626
Russ Coxf9ca3b52011-03-07 10:37:42 -0500627 // Push gobuf.pc
628 MOVQ (g_sched+gobuf_pc)(SI), BP
629 SUBQ $8, DI
630 MOVQ BP, 0(DI)
631
632 // Push arguments to cgocallbackg.
633 // Frame size here must match the frame size above
634 // to trick traceback routines into doing the right thing.
635 SUBQ $24, DI
636 MOVQ AX, 0(DI)
637 MOVQ BX, 8(DI)
638 MOVQ DX, 16(DI)
639
640 // Switch stack and make the call.
641 MOVQ DI, SP
642 CALL runtime·cgocallbackg(SB)
643
Russ Cox528534c2013-06-05 07:16:53 -0400644 // Restore g->sched (== m->curg->sched) from saved values.
Russ Coxe473f422010-08-04 17:50:22 -0700645 get_tls(CX)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500646 MOVQ g(CX), SI
647 MOVQ 24(SP), BP
648 MOVQ BP, (g_sched+gobuf_pc)(SI)
649 LEAQ (24+8)(SP), DI
650 MOVQ DI, (g_sched+gobuf_sp)(SI)
651
652 // Switch back to m->g0's stack and restore m->g0->sched.sp.
653 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
654 // so we do not have to restore it.)
655 MOVQ m(CX), BP
656 MOVQ m_g0(BP), SI
657 MOVQ SI, g(CX)
658 MOVQ (g_sched+gobuf_sp)(SI), SP
659 POPQ (g_sched+gobuf_sp)(SI)
Russ Cox6c976392013-02-20 17:48:23 -0500660
661 // If the m on entry was nil, we called needm above to borrow an m
662 // for the duration of the call. Since the call is over, return it with dropm.
663 POPQ BP
664 CMPQ BP, $0
665 JNE 3(PC)
666 MOVQ $runtime·dropm(SB), AX
667 CALL AX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500668
669 // Done!
Ian Lance Taylor2d0ff3f2010-04-09 13:30:11 -0700670 RET
671
Russ Cox6c976392013-02-20 17:48:23 -0500672// void setmg(M*, G*); set m and g. for use by needm.
673TEXT runtime·setmg(SB), 7, $0
674 MOVQ mm+0(FP), AX
675#ifdef GOOS_windows
676 CMPQ AX, $0
677 JNE settls
678 MOVQ $0, 0x28(GS)
679 RET
680settls:
681 LEAQ m_tls(AX), AX
682 MOVQ AX, 0x28(GS)
683#endif
684 get_tls(CX)
685 MOVQ mm+0(FP), AX
686 MOVQ AX, m(CX)
687 MOVQ gg+8(FP), BX
688 MOVQ BX, g(CX)
689 RET
690
Russ Cox6a70f9d2013-03-25 18:14:02 -0400691// void setmg_gcc(M*, G*); set m and g called from gcc.
692TEXT setmg_gcc<>(SB),7,$0
693 get_tls(AX)
694 MOVQ DI, m(AX)
695 MOVQ SI, g(AX)
696 RET
697
Devon H. O'Dell5a4a08f2009-12-08 18:19:30 -0800698// check that SP is in range [g->stackbase, g->stackguard)
Russ Cox68b42552010-11-04 14:00:19 -0400699TEXT runtime·stackcheck(SB), 7, $0
Russ Coxe473f422010-08-04 17:50:22 -0700700 get_tls(CX)
701 MOVQ g(CX), AX
702 CMPQ g_stackbase(AX), SP
Russ Cox01eaf782010-03-30 10:53:16 -0700703 JHI 2(PC)
704 INT $3
Russ Coxe473f422010-08-04 17:50:22 -0700705 CMPQ SP, g_stackguard(AX)
Russ Cox01eaf782010-03-30 10:53:16 -0700706 JHI 2(PC)
707 INT $3
708 RET
709
Russ Cox68b42552010-11-04 14:00:19 -0400710TEXT runtime·memclr(SB),7,$0
Russ Cox6c196012010-04-05 12:51:09 -0700711 MOVQ 8(SP), DI // arg 1 addr
Russ Coxafc69282011-01-25 16:35:36 -0500712 MOVQ 16(SP), CX // arg 2 count
Quan Yong Zhai47410a22011-07-23 15:46:58 -0400713 MOVQ CX, BX
714 ANDQ $7, BX
Russ Coxafc69282011-01-25 16:35:36 -0500715 SHRQ $3, CX
Russ Cox6c196012010-04-05 12:51:09 -0700716 MOVQ $0, AX
717 CLD
718 REP
719 STOSQ
Quan Yong Zhai47410a22011-07-23 15:46:58 -0400720 MOVQ BX, CX
721 REP
722 STOSB
Russ Cox6c196012010-04-05 12:51:09 -0700723 RET
724
Russ Cox68b42552010-11-04 14:00:19 -0400725TEXT runtime·getcallerpc(SB),7,$0
Russ Cox6c196012010-04-05 12:51:09 -0700726 MOVQ x+0(FP),AX // addr of first arg
727 MOVQ -8(AX),AX // get calling pc
728 RET
729
Russ Cox68b42552010-11-04 14:00:19 -0400730TEXT runtime·setcallerpc(SB),7,$0
Russ Cox6c196012010-04-05 12:51:09 -0700731 MOVQ x+0(FP),AX // addr of first arg
732 MOVQ x+8(FP), BX
733 MOVQ BX, -8(AX) // set calling pc
734 RET
735
Russ Cox68b42552010-11-04 14:00:19 -0400736TEXT runtime·getcallersp(SB),7,$0
Russ Cox6c196012010-04-05 12:51:09 -0700737 MOVQ sp+0(FP), AX
738 RET
739
Damian Gryski8e765da2012-02-02 14:09:27 -0500740// int64 runtime·cputicks(void)
741TEXT runtime·cputicks(SB),7,$0
742 RDTSC
743 SHLQ $32, DX
744 ADDQ DX, AX
745 RET
746
Russ Cox9e5db8c2012-03-15 15:22:30 -0400747TEXT runtime·stackguard(SB),7,$0
748 MOVQ SP, DX
749 MOVQ DX, sp+0(FP)
750 get_tls(CX)
751 MOVQ g(CX), BX
752 MOVQ g_stackguard(BX), DX
Russ Cox07720b62013-03-22 12:57:55 -0400753 MOVQ DX, limit+8(FP)
Russ Cox9e5db8c2012-03-15 15:22:30 -0400754 RET
755
Russ Cox68b42552010-11-04 14:00:19 -0400756GLOBL runtime·tls0(SB), $64
Keith Randalla5d40242013-03-12 10:47:44 -0700757
758// hash function using AES hardware instructions
759TEXT runtime·aeshash(SB),7,$0
760 MOVQ 8(SP), DX // ptr to hash value
761 MOVQ 16(SP), CX // size
762 MOVQ 24(SP), AX // ptr to data
763 JMP runtime·aeshashbody(SB)
764
765TEXT runtime·aeshashstr(SB),7,$0
766 MOVQ 8(SP), DX // ptr to hash value
767 MOVQ 24(SP), AX // ptr to string struct
768 MOVQ 8(AX), CX // length of string
769 MOVQ (AX), AX // string data
770 JMP runtime·aeshashbody(SB)
771
772// AX: data
773// CX: length
774// DX: ptr to seed input / hash output
775TEXT runtime·aeshashbody(SB),7,$0
776 MOVQ (DX), X0 // seed to low 64 bits of xmm0
777 PINSRQ $1, CX, X0 // size to high 64 bits of xmm0
Keith Randalldb53d972013-03-20 14:34:26 -0700778 MOVO runtime·aeskeysched+0(SB), X2
779 MOVO runtime·aeskeysched+16(SB), X3
Keith Randallee669722013-05-15 09:40:14 -0700780 CMPQ CX, $16
781 JB aessmall
Keith Randalla5d40242013-03-12 10:47:44 -0700782aesloop:
783 CMPQ CX, $16
Keith Randallee669722013-05-15 09:40:14 -0700784 JBE aesloopend
Keith Randalla5d40242013-03-12 10:47:44 -0700785 MOVOU (AX), X1
786 AESENC X2, X0
787 AESENC X1, X0
788 SUBQ $16, CX
789 ADDQ $16, AX
790 JMP aesloop
Keith Randallee669722013-05-15 09:40:14 -0700791// 1-16 bytes remaining
Keith Randalla5d40242013-03-12 10:47:44 -0700792aesloopend:
Keith Randallee669722013-05-15 09:40:14 -0700793 // This load may overlap with the previous load above.
794 // We'll hash some bytes twice, but that's ok.
795 MOVOU -16(AX)(CX*1), X1
796 JMP partial
797// 0-15 bytes
798aessmall:
Keith Randalla5d40242013-03-12 10:47:44 -0700799 TESTQ CX, CX
Keith Randallee669722013-05-15 09:40:14 -0700800 JE finalize // 0 bytes
Keith Randalla5d40242013-03-12 10:47:44 -0700801
Keith Randallee669722013-05-15 09:40:14 -0700802 CMPB AX, $0xf0
803 JA highpartial
Keith Randalla5d40242013-03-12 10:47:44 -0700804
Keith Randallee669722013-05-15 09:40:14 -0700805 // 16 bytes loaded at this address won't cross
806 // a page boundary, so we can load it directly.
Keith Randalla5d40242013-03-12 10:47:44 -0700807 MOVOU (AX), X1
808 ADDQ CX, CX
809 PAND masks(SB)(CX*8), X1
810 JMP partial
811highpartial:
Keith Randallee669722013-05-15 09:40:14 -0700812 // address ends in 1111xxxx. Might be up against
Keith Randalla5d40242013-03-12 10:47:44 -0700813 // a page boundary, so load ending at last byte.
814 // Then shift bytes down using pshufb.
815 MOVOU -16(AX)(CX*1), X1
816 ADDQ CX, CX
817 PSHUFB shifts(SB)(CX*8), X1
818partial:
819 // incorporate partial block into hash
820 AESENC X3, X0
821 AESENC X1, X0
822finalize:
823 // finalize hash
824 AESENC X2, X0
825 AESENC X3, X0
826 AESENC X2, X0
827 MOVQ X0, (DX)
828 RET
829
830TEXT runtime·aeshash32(SB),7,$0
831 MOVQ 8(SP), DX // ptr to hash value
832 MOVQ 24(SP), AX // ptr to data
833 MOVQ (DX), X0 // seed
834 PINSRD $2, (AX), X0 // data
Keith Randalldb53d972013-03-20 14:34:26 -0700835 AESENC runtime·aeskeysched+0(SB), X0
836 AESENC runtime·aeskeysched+16(SB), X0
837 AESENC runtime·aeskeysched+0(SB), X0
Keith Randalla5d40242013-03-12 10:47:44 -0700838 MOVQ X0, (DX)
839 RET
840
841TEXT runtime·aeshash64(SB),7,$0
842 MOVQ 8(SP), DX // ptr to hash value
843 MOVQ 24(SP), AX // ptr to data
844 MOVQ (DX), X0 // seed
845 PINSRQ $1, (AX), X0 // data
Keith Randalldb53d972013-03-20 14:34:26 -0700846 AESENC runtime·aeskeysched+0(SB), X0
847 AESENC runtime·aeskeysched+16(SB), X0
848 AESENC runtime·aeskeysched+0(SB), X0
Keith Randalla5d40242013-03-12 10:47:44 -0700849 MOVQ X0, (DX)
850 RET
851
852// simple mask to get rid of data in the high part of the register.
853TEXT masks(SB),7,$0
854 QUAD $0x0000000000000000
855 QUAD $0x0000000000000000
856 QUAD $0x00000000000000ff
857 QUAD $0x0000000000000000
858 QUAD $0x000000000000ffff
859 QUAD $0x0000000000000000
860 QUAD $0x0000000000ffffff
861 QUAD $0x0000000000000000
862 QUAD $0x00000000ffffffff
863 QUAD $0x0000000000000000
864 QUAD $0x000000ffffffffff
865 QUAD $0x0000000000000000
866 QUAD $0x0000ffffffffffff
867 QUAD $0x0000000000000000
868 QUAD $0x00ffffffffffffff
869 QUAD $0x0000000000000000
870 QUAD $0xffffffffffffffff
871 QUAD $0x0000000000000000
872 QUAD $0xffffffffffffffff
873 QUAD $0x00000000000000ff
874 QUAD $0xffffffffffffffff
875 QUAD $0x000000000000ffff
876 QUAD $0xffffffffffffffff
877 QUAD $0x0000000000ffffff
878 QUAD $0xffffffffffffffff
879 QUAD $0x00000000ffffffff
880 QUAD $0xffffffffffffffff
881 QUAD $0x000000ffffffffff
882 QUAD $0xffffffffffffffff
883 QUAD $0x0000ffffffffffff
884 QUAD $0xffffffffffffffff
885 QUAD $0x00ffffffffffffff
886
887 // these are arguments to pshufb. They move data down from
888 // the high bytes of the register to the low bytes of the register.
889 // index is how many bytes to move.
890TEXT shifts(SB),7,$0
891 QUAD $0x0000000000000000
892 QUAD $0x0000000000000000
893 QUAD $0xffffffffffffff0f
894 QUAD $0xffffffffffffffff
895 QUAD $0xffffffffffff0f0e
896 QUAD $0xffffffffffffffff
897 QUAD $0xffffffffff0f0e0d
898 QUAD $0xffffffffffffffff
899 QUAD $0xffffffff0f0e0d0c
900 QUAD $0xffffffffffffffff
901 QUAD $0xffffff0f0e0d0c0b
902 QUAD $0xffffffffffffffff
903 QUAD $0xffff0f0e0d0c0b0a
904 QUAD $0xffffffffffffffff
905 QUAD $0xff0f0e0d0c0b0a09
906 QUAD $0xffffffffffffffff
907 QUAD $0x0f0e0d0c0b0a0908
908 QUAD $0xffffffffffffffff
909 QUAD $0x0e0d0c0b0a090807
910 QUAD $0xffffffffffffff0f
911 QUAD $0x0d0c0b0a09080706
912 QUAD $0xffffffffffff0f0e
913 QUAD $0x0c0b0a0908070605
914 QUAD $0xffffffffff0f0e0d
915 QUAD $0x0b0a090807060504
916 QUAD $0xffffffff0f0e0d0c
917 QUAD $0x0a09080706050403
918 QUAD $0xffffff0f0e0d0c0b
919 QUAD $0x0908070605040302
920 QUAD $0xffff0f0e0d0c0b0a
921 QUAD $0x0807060504030201
922 QUAD $0xff0f0e0d0c0b0a09
Keith Randall3d5daa22013-04-02 16:26:15 -0700923
924TEXT runtime·memeq(SB),7,$0
925 MOVQ a+0(FP), SI
926 MOVQ b+8(FP), DI
927 MOVQ count+16(FP), BX
928 JMP runtime·memeqbody(SB)
929
930
931TEXT bytes·Equal(SB),7,$0
932 MOVQ a_len+8(FP), BX
933 MOVQ b_len+32(FP), CX
934 XORQ AX, AX
935 CMPQ BX, CX
936 JNE eqret
937 MOVQ a+0(FP), SI
938 MOVQ b+24(FP), DI
939 CALL runtime·memeqbody(SB)
940eqret:
941 MOVB AX, ret+48(FP)
942 RET
943
944// a in SI
945// b in DI
946// count in BX
947TEXT runtime·memeqbody(SB),7,$0
948 XORQ AX, AX
949
950 CMPQ BX, $8
951 JB small
952
953 // 64 bytes at a time using xmm registers
954hugeloop:
955 CMPQ BX, $64
956 JB bigloop
957 MOVOU (SI), X0
958 MOVOU (DI), X1
959 MOVOU 16(SI), X2
960 MOVOU 16(DI), X3
961 MOVOU 32(SI), X4
962 MOVOU 32(DI), X5
963 MOVOU 48(SI), X6
964 MOVOU 48(DI), X7
965 PCMPEQB X1, X0
966 PCMPEQB X3, X2
967 PCMPEQB X5, X4
968 PCMPEQB X7, X6
969 PAND X2, X0
970 PAND X6, X4
971 PAND X4, X0
972 PMOVMSKB X0, DX
973 ADDQ $64, SI
974 ADDQ $64, DI
975 SUBQ $64, BX
976 CMPL DX, $0xffff
977 JEQ hugeloop
978 RET
979
980 // 8 bytes at a time using 64-bit register
981bigloop:
982 CMPQ BX, $8
983 JBE leftover
984 MOVQ (SI), CX
985 MOVQ (DI), DX
986 ADDQ $8, SI
987 ADDQ $8, DI
988 SUBQ $8, BX
989 CMPQ CX, DX
990 JEQ bigloop
991 RET
992
993 // remaining 0-8 bytes
994leftover:
995 MOVQ -8(SI)(BX*1), CX
996 MOVQ -8(DI)(BX*1), DX
997 CMPQ CX, DX
998 SETEQ AX
999 RET
1000
1001small:
1002 CMPQ BX, $0
1003 JEQ equal
1004
1005 LEAQ 0(BX*8), CX
1006 NEGQ CX
1007
1008 CMPB SI, $0xf8
1009 JA si_high
1010
1011 // load at SI won't cross a page boundary.
1012 MOVQ (SI), SI
1013 JMP si_finish
1014si_high:
1015 // address ends in 11111xxx. Load up to bytes we want, move to correct position.
1016 MOVQ -8(SI)(BX*1), SI
1017 SHRQ CX, SI
1018si_finish:
1019
1020 // same for DI.
1021 CMPB DI, $0xf8
1022 JA di_high
1023 MOVQ (DI), DI
1024 JMP di_finish
1025di_high:
1026 MOVQ -8(DI)(BX*1), DI
1027 SHRQ CX, DI
1028di_finish:
1029
1030 SUBQ SI, DI
1031 SHLQ CX, DI
1032equal:
1033 SETEQ AX
1034 RET
Keith Randallb3946dc2013-05-14 16:05:51 -07001035
1036
1037TEXT runtime·cmpstring(SB),7,$0
1038 MOVQ s1+0(FP), SI
1039 MOVQ s1+8(FP), BX
1040 MOVQ s2+16(FP), DI
1041 MOVQ s2+24(FP), DX
1042 CALL runtime·cmpbody(SB)
1043 MOVQ AX, res+32(FP)
1044 RET
1045
1046TEXT bytes·Compare(SB),7,$0
1047 MOVQ s1+0(FP), SI
1048 MOVQ s1+8(FP), BX
1049 MOVQ s2+24(FP), DI
1050 MOVQ s2+32(FP), DX
1051 CALL runtime·cmpbody(SB)
1052 MOVQ AX, res+48(FP)
1053 RET
1054
1055// input:
1056// SI = a
1057// DI = b
1058// BX = alen
1059// DX = blen
1060// output:
1061// AX = 1/0/-1
1062TEXT runtime·cmpbody(SB),7,$0
1063 CMPQ SI, DI
1064 JEQ cmp_allsame
1065 CMPQ BX, DX
1066 MOVQ DX, BP
1067 CMOVQLT BX, BP // BP = min(alen, blen) = # of bytes to compare
1068 CMPQ BP, $8
1069 JB cmp_small
1070
1071cmp_loop:
1072 CMPQ BP, $16
1073 JBE cmp_0through16
1074 MOVOU (SI), X0
1075 MOVOU (DI), X1
1076 PCMPEQB X0, X1
1077 PMOVMSKB X1, AX
1078 XORQ $0xffff, AX // convert EQ to NE
1079 JNE cmp_diff16 // branch if at least one byte is not equal
1080 ADDQ $16, SI
1081 ADDQ $16, DI
1082 SUBQ $16, BP
1083 JMP cmp_loop
1084
1085 // AX = bit mask of differences
1086cmp_diff16:
1087 BSFQ AX, BX // index of first byte that differs
1088 XORQ AX, AX
1089 MOVB (SI)(BX*1), CX
1090 CMPB CX, (DI)(BX*1)
1091 SETHI AX
1092 LEAQ -1(AX*2), AX // convert 1/0 to +1/-1
1093 RET
1094
1095 // 0 through 16 bytes left, alen>=8, blen>=8
1096cmp_0through16:
1097 CMPQ BP, $8
1098 JBE cmp_0through8
1099 MOVQ (SI), AX
1100 MOVQ (DI), CX
1101 CMPQ AX, CX
1102 JNE cmp_diff8
1103cmp_0through8:
1104 MOVQ -8(SI)(BP*1), AX
1105 MOVQ -8(DI)(BP*1), CX
1106 CMPQ AX, CX
1107 JEQ cmp_allsame
1108
1109 // AX and CX contain parts of a and b that differ.
1110cmp_diff8:
1111 BSWAPQ AX // reverse order of bytes
1112 BSWAPQ CX
1113 XORQ AX, CX
1114 BSRQ CX, CX // index of highest bit difference
1115 SHRQ CX, AX // move a's bit to bottom
1116 ANDQ $1, AX // mask bit
1117 LEAQ -1(AX*2), AX // 1/0 => +1/-1
1118 RET
1119
1120 // 0-7 bytes in common
1121cmp_small:
1122 LEAQ (BP*8), CX // bytes left -> bits left
1123 NEGQ CX // - bits lift (== 64 - bits left mod 64)
1124 JEQ cmp_allsame
1125
1126 // load bytes of a into high bytes of AX
1127 CMPB SI, $0xf8
1128 JA cmp_si_high
1129 MOVQ (SI), SI
1130 JMP cmp_si_finish
1131cmp_si_high:
1132 MOVQ -8(SI)(BP*1), SI
1133 SHRQ CX, SI
1134cmp_si_finish:
1135 SHLQ CX, SI
1136
1137 // load bytes of b in to high bytes of BX
1138 CMPB DI, $0xf8
1139 JA cmp_di_high
1140 MOVQ (DI), DI
1141 JMP cmp_di_finish
1142cmp_di_high:
1143 MOVQ -8(DI)(BP*1), DI
1144 SHRQ CX, DI
1145cmp_di_finish:
1146 SHLQ CX, DI
1147
1148 BSWAPQ SI // reverse order of bytes
1149 BSWAPQ DI
1150 XORQ SI, DI // find bit differences
1151 JEQ cmp_allsame
1152 BSRQ DI, CX // index of highest bit difference
1153 SHRQ CX, SI // move a's bit to bottom
1154 ANDQ $1, SI // mask bit
1155 LEAQ -1(SI*2), AX // 1/0 => +1/-1
1156 RET
1157
1158cmp_allsame:
1159 XORQ AX, AX
1160 XORQ CX, CX
1161 CMPQ BX, DX
1162 SETGT AX // 1 if alen > blen
1163 SETEQ CX // 1 if alen == blen
1164 LEAQ -1(CX)(AX*2), AX // 1,0,-1 result
1165 RET