blob: 5238e594370601987aedbdc951abd03f752e3d0a [file] [log] [blame]
Russ Cox0d3a0432009-03-30 00:01:07 -07001// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
Russ Cox55889402011-12-19 15:51:13 -05005#include "zasm_GOOS_GOARCH.h"
Russ Cox9ddfb642013-07-16 16:24:09 -04006#include "funcdata.h"
Russ Cox8522a472009-06-17 15:15:55 -07007
Russ Cox6120ef02013-06-11 16:49:24 -04008TEXT _rt0_go(SB),7,$0
Russ Cox0d3a0432009-03-30 00:01:07 -07009 // copy arguments forward on an even stack
Russ Coxdfc22e292013-03-07 19:57:10 -080010 MOVL argc+0(FP), AX
11 MOVL argv+4(FP), BX
Russ Cox0d3a0432009-03-30 00:01:07 -070012 SUBL $128, SP // plenty of scratch
Russ Cox133a1582009-10-03 10:37:12 -070013 ANDL $~15, SP
Russ Cox0d3a0432009-03-30 00:01:07 -070014 MOVL AX, 120(SP) // save argc, argv away
15 MOVL BX, 124(SP)
16
Dmitriy Vyukov428062d2011-12-07 16:53:17 +030017 // set default stack bounds.
Russ Coxf8d49b52013-02-28 16:24:38 -050018 // _cgo_init may update stackguard.
Dmitriy Vyukov428062d2011-12-07 16:53:17 +030019 MOVL $runtime·g0(SB), BP
20 LEAL (-64*1024+104)(SP), BX
21 MOVL BX, g_stackguard(BP)
Dmitriy Vyukovf5becf42013-06-03 12:28:24 +040022 MOVL BX, g_stackguard0(BP)
Dmitriy Vyukov428062d2011-12-07 16:53:17 +030023 MOVL SP, g_stackbase(BP)
24
Keith Randalla5d40242013-03-12 10:47:44 -070025 // find out information about the processor we're on
26 MOVL $0, AX
27 CPUID
28 CMPL AX, $0
29 JE nocpuinfo
30 MOVL $1, AX
31 CPUID
32 MOVL CX, runtime·cpuid_ecx(SB)
33 MOVL DX, runtime·cpuid_edx(SB)
34nocpuinfo:
35
Russ Coxf8d49b52013-02-28 16:24:38 -050036 // if there is an _cgo_init, call it to let it
Russ Cox133a1582009-10-03 10:37:12 -070037 // initialize and to set up GS. if not,
38 // we set up GS ourselves.
Russ Coxf8d49b52013-02-28 16:24:38 -050039 MOVL _cgo_init(SB), AX
Russ Cox133a1582009-10-03 10:37:12 -070040 TESTL AX, AX
Dmitriy Vyukovfbfed492011-11-09 23:11:48 +030041 JZ needtls
Russ Cox6a70f9d2013-03-25 18:14:02 -040042 MOVL $setmg_gcc<>(SB), BX
43 MOVL BX, 4(SP)
Russ Cox3b85b722013-03-11 00:51:42 -040044 MOVL BP, 0(SP)
Russ Cox133a1582009-10-03 10:37:12 -070045 CALL AX
Dmitriy Vyukovf5becf42013-06-03 12:28:24 +040046 // update stackguard after _cgo_init
47 MOVL $runtime·g0(SB), CX
48 MOVL g_stackguard0(CX), AX
49 MOVL AX, g_stackguard(CX)
Russ Coxf8d49b52013-02-28 16:24:38 -050050 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
Wei Guangjing1aa2d882011-01-20 10:22:20 -050051 CMPL runtime·iswindows(SB), $0
52 JEQ ok
Dmitriy Vyukovfbfed492011-11-09 23:11:48 +030053needtls:
Yuval Pavel Zholkover2aa2ceb2011-07-25 12:25:41 -040054 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
55 CMPL runtime·isplan9(SB), $1
56 JEQ ok
57
Russ Cox1b14bdb2009-09-22 16:28:32 -070058 // set up %gs
Russ Cox68b42552010-11-04 14:00:19 -040059 CALL runtime·ldt0setup(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -070060
Russ Cox0d3a0432009-03-30 00:01:07 -070061 // store through it, to make sure it works
Hector Chu6bfe5f52010-01-06 17:58:55 -080062 get_tls(BX)
63 MOVL $0x123, g(BX)
Russ Cox68b42552010-11-04 14:00:19 -040064 MOVL runtime·tls0(SB), AX
Russ Cox0d3a0432009-03-30 00:01:07 -070065 CMPL AX, $0x123
66 JEQ ok
Russ Cox133a1582009-10-03 10:37:12 -070067 MOVL AX, 0 // abort
Russ Cox0d3a0432009-03-30 00:01:07 -070068ok:
Russ Cox0d3a0432009-03-30 00:01:07 -070069 // set up m and g "registers"
Hector Chu6bfe5f52010-01-06 17:58:55 -080070 get_tls(BX)
Russ Cox68b42552010-11-04 14:00:19 -040071 LEAL runtime·g0(SB), CX
Hector Chu6bfe5f52010-01-06 17:58:55 -080072 MOVL CX, g(BX)
Russ Cox68b42552010-11-04 14:00:19 -040073 LEAL runtime·m0(SB), AX
Hector Chu6bfe5f52010-01-06 17:58:55 -080074 MOVL AX, m(BX)
Russ Cox0d3a0432009-03-30 00:01:07 -070075
76 // save m->g0 = g0
Russ Cox8522a472009-06-17 15:15:55 -070077 MOVL CX, m_g0(AX)
Russ Cox0d3a0432009-03-30 00:01:07 -070078
Russ Cox68b42552010-11-04 14:00:19 -040079 CALL runtime·emptyfunc(SB) // fault if stack check is wrong
Russ Cox0d3a0432009-03-30 00:01:07 -070080
81 // convention is D is always cleared
82 CLD
83
Russ Cox68b42552010-11-04 14:00:19 -040084 CALL runtime·check(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -070085
86 // saved argc, argv
87 MOVL 120(SP), AX
88 MOVL AX, 0(SP)
89 MOVL 124(SP), AX
90 MOVL AX, 4(SP)
Russ Cox68b42552010-11-04 14:00:19 -040091 CALL runtime·args(SB)
92 CALL runtime·osinit(SB)
Keith Randalla5d40242013-03-12 10:47:44 -070093 CALL runtime·hashinit(SB)
Russ Cox68b42552010-11-04 14:00:19 -040094 CALL runtime·schedinit(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -070095
96 // create a new goroutine to start program
Russ Cox1903ad72013-02-21 17:01:13 -050097 PUSHL $runtime·main·f(SB) // entry
Russ Cox8522a472009-06-17 15:15:55 -070098 PUSHL $0 // arg size
Russ Cox9ddfb642013-07-16 16:24:09 -040099 ARGSIZE(8)
Russ Cox68b42552010-11-04 14:00:19 -0400100 CALL runtime·newproc(SB)
Russ Cox9ddfb642013-07-16 16:24:09 -0400101 ARGSIZE(-1)
Russ Cox0d3a0432009-03-30 00:01:07 -0700102 POPL AX
103 POPL AX
104
105 // start this M
Russ Cox68b42552010-11-04 14:00:19 -0400106 CALL runtime·mstart(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -0700107
108 INT $3
109 RET
110
Russ Cox1903ad72013-02-21 17:01:13 -0500111DATA runtime·main·f+0(SB)/4,$runtime·main(SB)
112GLOBL runtime·main·f(SB),8,$4
113
Russ Cox9ddfb642013-07-16 16:24:09 -0400114TEXT runtime·breakpoint(SB),7,$0-0
Russ Cox1b14bdb2009-09-22 16:28:32 -0700115 INT $3
Russ Cox0d3a0432009-03-30 00:01:07 -0700116 RET
117
Russ Cox9ddfb642013-07-16 16:24:09 -0400118TEXT runtime·asminit(SB),7,$0-0
Carl Shapiro019c8fc2013-04-02 13:45:56 -0700119 // Linux and MinGW start the FPU in extended double precision.
Russ Cox1707a992012-02-14 01:23:15 -0500120 // Other operating systems use double precision.
121 // Change to double precision to match them,
122 // and to match other hardware that only has double.
123 PUSHL $0x27F
124 FLDCW 0(SP)
125 POPL AX
126 RET
127
Russ Cox8522a472009-06-17 15:15:55 -0700128/*
129 * go-routine
130 */
Russ Cox0d3a0432009-03-30 00:01:07 -0700131
Russ Coxf9ca3b52011-03-07 10:37:42 -0500132// void gosave(Gobuf*)
Russ Cox8522a472009-06-17 15:15:55 -0700133// save state in Gobuf; setjmp
Russ Cox9ddfb642013-07-16 16:24:09 -0400134TEXT runtime·gosave(SB), 7, $0-4
Russ Cox8522a472009-06-17 15:15:55 -0700135 MOVL 4(SP), AX // gobuf
136 LEAL 4(SP), BX // caller's SP
137 MOVL BX, gobuf_sp(AX)
138 MOVL 0(SP), BX // caller's PC
139 MOVL BX, gobuf_pc(AX)
Russ Coxd67e7e32013-06-12 15:22:26 -0400140 MOVL $0, gobuf_ret(AX)
141 MOVL $0, gobuf_ctxt(AX)
Hector Chu6bfe5f52010-01-06 17:58:55 -0800142 get_tls(CX)
143 MOVL g(CX), BX
Russ Cox8522a472009-06-17 15:15:55 -0700144 MOVL BX, gobuf_g(AX)
Russ Cox0d3a0432009-03-30 00:01:07 -0700145 RET
146
Ian Lance Taylor06272482013-06-12 15:05:10 -0700147// void gogo(Gobuf*)
Russ Cox8522a472009-06-17 15:15:55 -0700148// restore state from Gobuf; longjmp
Russ Cox9ddfb642013-07-16 16:24:09 -0400149TEXT runtime·gogo(SB), 7, $0-4
Russ Cox8522a472009-06-17 15:15:55 -0700150 MOVL 4(SP), BX // gobuf
151 MOVL gobuf_g(BX), DX
152 MOVL 0(DX), CX // make sure g != nil
Hector Chu6bfe5f52010-01-06 17:58:55 -0800153 get_tls(CX)
154 MOVL DX, g(CX)
Russ Cox8522a472009-06-17 15:15:55 -0700155 MOVL gobuf_sp(BX), SP // restore SP
Russ Coxd67e7e32013-06-12 15:22:26 -0400156 MOVL gobuf_ret(BX), AX
157 MOVL gobuf_ctxt(BX), DX
158 MOVL $0, gobuf_sp(BX) // clear to help garbage collector
159 MOVL $0, gobuf_ret(BX)
160 MOVL $0, gobuf_ctxt(BX)
Russ Cox8522a472009-06-17 15:15:55 -0700161 MOVL gobuf_pc(BX), BX
Russ Cox0d3a0432009-03-30 00:01:07 -0700162 JMP BX
Russ Cox8522a472009-06-17 15:15:55 -0700163
Russ Coxf9ca3b52011-03-07 10:37:42 -0500164// void mcall(void (*fn)(G*))
165// Switch to m->g0's stack, call fn(g).
Russ Cox370276a2011-04-27 23:21:12 -0400166// Fn must never return. It should gogo(&g->sched)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500167// to keep running g.
Russ Cox9ddfb642013-07-16 16:24:09 -0400168TEXT runtime·mcall(SB), 7, $0-4
Russ Coxf9ca3b52011-03-07 10:37:42 -0500169 MOVL fn+0(FP), DI
170
171 get_tls(CX)
Russ Cox528534c2013-06-05 07:16:53 -0400172 MOVL g(CX), AX // save state in g->sched
Russ Coxf9ca3b52011-03-07 10:37:42 -0500173 MOVL 0(SP), BX // caller's PC
174 MOVL BX, (g_sched+gobuf_pc)(AX)
175 LEAL 4(SP), BX // caller's SP
176 MOVL BX, (g_sched+gobuf_sp)(AX)
177 MOVL AX, (g_sched+gobuf_g)(AX)
178
179 // switch to m->g0 & its stack, call fn
180 MOVL m(CX), BX
181 MOVL m_g0(BX), SI
182 CMPL SI, AX // if g == m->g0 call badmcall
183 JNE 2(PC)
184 CALL runtime·badmcall(SB)
185 MOVL SI, g(CX) // g = m->g0
Russ Cox528534c2013-06-05 07:16:53 -0400186 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
Russ Coxf9ca3b52011-03-07 10:37:42 -0500187 PUSHL AX
188 CALL DI
189 POPL AX
190 CALL runtime·badmcall2(SB)
191 RET
192
Russ Cox8522a472009-06-17 15:15:55 -0700193/*
194 * support for morestack
195 */
196
197// Called during function prolog when more stack is needed.
Russ Cox58f12ff2013-07-18 16:53:45 -0400198//
199// The traceback routines see morestack on a g0 as being
200// the top of a stack (for example, morestack calling newstack
201// calling the scheduler calling newm calling gc), so we must
202// record an argument size. For that purpose, it has no arguments.
203TEXT runtime·morestack(SB),7,$0-0
Russ Cox8522a472009-06-17 15:15:55 -0700204 // Cannot grow scheduler stack (m->g0).
Hector Chu6bfe5f52010-01-06 17:58:55 -0800205 get_tls(CX)
206 MOVL m(CX), BX
Russ Cox8522a472009-06-17 15:15:55 -0700207 MOVL m_g0(BX), SI
Hector Chu6bfe5f52010-01-06 17:58:55 -0800208 CMPL g(CX), SI
Russ Cox8522a472009-06-17 15:15:55 -0700209 JNE 2(PC)
210 INT $3
211
Russ Cox6066fdc2013-02-22 10:47:54 -0500212 // frame size in DI
Russ Cox8522a472009-06-17 15:15:55 -0700213 // arg size in AX
214 // Save in m.
Russ Cox6066fdc2013-02-22 10:47:54 -0500215 MOVL DI, m_moreframesize(BX)
Russ Cox141a4a12011-01-14 14:05:20 -0500216 MOVL AX, m_moreargsize(BX)
Russ Cox8522a472009-06-17 15:15:55 -0700217
218 // Called from f.
219 // Set m->morebuf to f's caller.
220 MOVL 4(SP), DI // f's caller's PC
221 MOVL DI, (m_morebuf+gobuf_pc)(BX)
222 LEAL 8(SP), CX // f's caller's SP
223 MOVL CX, (m_morebuf+gobuf_sp)(BX)
Russ Cox141a4a12011-01-14 14:05:20 -0500224 MOVL CX, m_moreargp(BX)
Hector Chu6bfe5f52010-01-06 17:58:55 -0800225 get_tls(CX)
226 MOVL g(CX), SI
Russ Cox8522a472009-06-17 15:15:55 -0700227 MOVL SI, (m_morebuf+gobuf_g)(BX)
228
Russ Cox6fa3c892013-06-27 11:32:01 -0400229 // Set g->sched to context in f.
230 MOVL 0(SP), AX // f's PC
231 MOVL AX, (g_sched+gobuf_pc)(SI)
232 MOVL SI, (g_sched+gobuf_g)(SI)
233 LEAL 4(SP), AX // f's SP
234 MOVL AX, (g_sched+gobuf_sp)(SI)
235 MOVL DX, (g_sched+gobuf_ctxt)(SI)
Russ Cox8522a472009-06-17 15:15:55 -0700236
Russ Coxf9ca3b52011-03-07 10:37:42 -0500237 // Call newstack on m->g0's stack.
Russ Cox8522a472009-06-17 15:15:55 -0700238 MOVL m_g0(BX), BP
Hector Chu6bfe5f52010-01-06 17:58:55 -0800239 MOVL BP, g(CX)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500240 MOVL (g_sched+gobuf_sp)(BP), AX
Russ Cox7e14bd82010-12-07 17:19:36 -0500241 MOVL -4(AX), BX // fault if CALL would, before smashing SP
242 MOVL AX, SP
Russ Cox68b42552010-11-04 14:00:19 -0400243 CALL runtime·newstack(SB)
Russ Cox8522a472009-06-17 15:15:55 -0700244 MOVL $0, 0x1003 // crash if newstack returns
Russ Cox0d3a0432009-03-30 00:01:07 -0700245 RET
246
Russ Coxbba278a2009-07-08 18:16:09 -0700247// Called from reflection library. Mimics morestack,
248// reuses stack growth code to create a frame
249// with the desired args running the desired function.
250//
251// func call(fn *byte, arg *byte, argsize uint32).
Russ Cox9ddfb642013-07-16 16:24:09 -0400252TEXT reflect·call(SB), 7, $0-12
Hector Chu6bfe5f52010-01-06 17:58:55 -0800253 get_tls(CX)
254 MOVL m(CX), BX
Russ Coxbba278a2009-07-08 18:16:09 -0700255
256 // Save our caller's state as the PC and SP to
257 // restore when returning from f.
258 MOVL 0(SP), AX // our caller's PC
259 MOVL AX, (m_morebuf+gobuf_pc)(BX)
260 LEAL 4(SP), AX // our caller's SP
261 MOVL AX, (m_morebuf+gobuf_sp)(BX)
Hector Chu6bfe5f52010-01-06 17:58:55 -0800262 MOVL g(CX), AX
Russ Coxbba278a2009-07-08 18:16:09 -0700263 MOVL AX, (m_morebuf+gobuf_g)(BX)
264
Russ Coxf0d73fb2013-06-27 16:51:06 -0400265 // Save our own state as the PC and SP to restore
266 // if this goroutine needs to be restarted.
267 MOVL $reflect·call(SB), (g_sched+gobuf_pc)(AX)
268 MOVL SP, (g_sched+gobuf_sp)(AX)
269
Russ Coxbba278a2009-07-08 18:16:09 -0700270 // Set up morestack arguments to call f on a new stack.
Russ Cox83727cc2010-03-29 21:48:22 -0700271 // We set f's frame size to 1, as a hint to newstack
272 // that this is a call from reflect·call.
273 // If it turns out that f needs a larger frame than
274 // the default stack, f's usual stack growth prolog will
275 // allocate a new segment (and recopy the arguments).
Russ Coxbba278a2009-07-08 18:16:09 -0700276 MOVL 4(SP), AX // fn
277 MOVL 8(SP), DX // arg frame
278 MOVL 12(SP), CX // arg size
279
Russ Cox6fa3c892013-06-27 11:32:01 -0400280 MOVL AX, m_cret(BX) // f's PC
Russ Cox141a4a12011-01-14 14:05:20 -0500281 MOVL DX, m_moreargp(BX) // f's argument pointer
282 MOVL CX, m_moreargsize(BX) // f's argument size
283 MOVL $1, m_moreframesize(BX) // f's frame size
Russ Coxbba278a2009-07-08 18:16:09 -0700284
Russ Coxf9ca3b52011-03-07 10:37:42 -0500285 // Call newstack on m->g0's stack.
Russ Coxbba278a2009-07-08 18:16:09 -0700286 MOVL m_g0(BX), BP
Hector Chu6bfe5f52010-01-06 17:58:55 -0800287 get_tls(CX)
288 MOVL BP, g(CX)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500289 MOVL (g_sched+gobuf_sp)(BP), SP
Russ Cox68b42552010-11-04 14:00:19 -0400290 CALL runtime·newstack(SB)
Russ Coxbba278a2009-07-08 18:16:09 -0700291 MOVL $0, 0x1103 // crash if newstack returns
292 RET
293
294
Russ Cox8522a472009-06-17 15:15:55 -0700295// Return point when leaving stack.
Russ Cox58f12ff2013-07-18 16:53:45 -0400296//
297// Lessstack can appear in stack traces for the same reason
298// as morestack; in that context, it has 0 arguments.
299TEXT runtime·lessstack(SB), 7, $0-0
Russ Cox8522a472009-06-17 15:15:55 -0700300 // Save return value in m->cret
Hector Chu6bfe5f52010-01-06 17:58:55 -0800301 get_tls(CX)
302 MOVL m(CX), BX
Russ Cox8522a472009-06-17 15:15:55 -0700303 MOVL AX, m_cret(BX)
304
Russ Coxf9ca3b52011-03-07 10:37:42 -0500305 // Call oldstack on m->g0's stack.
306 MOVL m_g0(BX), BP
307 MOVL BP, g(CX)
308 MOVL (g_sched+gobuf_sp)(BP), SP
Russ Cox68b42552010-11-04 14:00:19 -0400309 CALL runtime·oldstack(SB)
Russ Cox8522a472009-06-17 15:15:55 -0700310 MOVL $0, 0x1004 // crash if oldstack returns
311 RET
312
313
Russ Cox0d3a0432009-03-30 00:01:07 -0700314// bool cas(int32 *val, int32 old, int32 new)
315// Atomically:
316// if(*val == old){
317// *val = new;
318// return 1;
319// }else
320// return 0;
Russ Cox9ddfb642013-07-16 16:24:09 -0400321TEXT runtime·cas(SB), 7, $0-12
Russ Cox0d3a0432009-03-30 00:01:07 -0700322 MOVL 4(SP), BX
323 MOVL 8(SP), AX
324 MOVL 12(SP), CX
325 LOCK
326 CMPXCHGL CX, 0(BX)
327 JZ 3(PC)
328 MOVL $0, AX
329 RET
330 MOVL $1, AX
331 RET
332
Russ Coxf70a19f2013-07-12 00:42:46 -0400333// bool runtime·cas64(uint64 *val, uint64 old, uint64 new)
Dmitriy Vyukov46675712012-04-05 18:47:43 +0400334// Atomically:
335// if(*val == *old){
336// *val = new;
337// return 1;
338// } else {
Dmitriy Vyukov46675712012-04-05 18:47:43 +0400339// return 0;
340// }
Russ Cox9ddfb642013-07-16 16:24:09 -0400341TEXT runtime·cas64(SB), 7, $0-20
Dmitriy Vyukov46675712012-04-05 18:47:43 +0400342 MOVL 4(SP), BP
Russ Coxf70a19f2013-07-12 00:42:46 -0400343 MOVL 8(SP), AX
344 MOVL 12(SP), DX
345 MOVL 16(SP), BX
346 MOVL 20(SP), CX
Dmitriy Vyukov46675712012-04-05 18:47:43 +0400347 LOCK
348 CMPXCHG8B 0(BP)
349 JNZ cas64_fail
350 MOVL $1, AX
351 RET
352cas64_fail:
Dmitriy Vyukovfd04f052012-04-05 18:59:50 +0400353 MOVL $0, AX
Dmitriy Vyukov46675712012-04-05 18:47:43 +0400354 RET
355
Hector Chu6bfe5f52010-01-06 17:58:55 -0800356// bool casp(void **p, void *old, void *new)
357// Atomically:
358// if(*p == old){
359// *p = new;
360// return 1;
361// }else
362// return 0;
Russ Cox9ddfb642013-07-16 16:24:09 -0400363TEXT runtime·casp(SB), 7, $0-12
Hector Chu6bfe5f52010-01-06 17:58:55 -0800364 MOVL 4(SP), BX
365 MOVL 8(SP), AX
366 MOVL 12(SP), CX
367 LOCK
368 CMPXCHGL CX, 0(BX)
369 JZ 3(PC)
370 MOVL $0, AX
371 RET
372 MOVL $1, AX
373 RET
374
Dmitriy Vyukov491aa152011-07-15 11:27:16 -0400375// uint32 xadd(uint32 volatile *val, int32 delta)
376// Atomically:
377// *val += delta;
378// return *val;
Russ Cox9ddfb642013-07-16 16:24:09 -0400379TEXT runtime·xadd(SB), 7, $0-8
Dmitriy Vyukov491aa152011-07-15 11:27:16 -0400380 MOVL 4(SP), BX
381 MOVL 8(SP), AX
382 MOVL AX, CX
383 LOCK
384 XADDL AX, 0(BX)
385 ADDL CX, AX
386 RET
387
Russ Cox9ddfb642013-07-16 16:24:09 -0400388TEXT runtime·xchg(SB), 7, $0-8
Dmitriy Vyukov4e5086b2011-07-29 12:44:06 -0400389 MOVL 4(SP), BX
390 MOVL 8(SP), AX
391 XCHGL AX, 0(BX)
392 RET
393
Russ Cox9ddfb642013-07-16 16:24:09 -0400394TEXT runtime·procyield(SB),7,$0-0
Dmitriy Vyukov4e5086b2011-07-29 12:44:06 -0400395 MOVL 4(SP), AX
396again:
397 PAUSE
398 SUBL $1, AX
399 JNZ again
400 RET
401
Russ Cox9ddfb642013-07-16 16:24:09 -0400402TEXT runtime·atomicstorep(SB), 7, $0-8
Dmitriy Vyukov86a659c2011-07-13 11:22:41 -0700403 MOVL 4(SP), BX
404 MOVL 8(SP), AX
405 XCHGL AX, 0(BX)
406 RET
407
Russ Cox9ddfb642013-07-16 16:24:09 -0400408TEXT runtime·atomicstore(SB), 7, $0-8
Dmitriy Vyukov91f0f182011-07-29 13:47:24 -0400409 MOVL 4(SP), BX
410 MOVL 8(SP), AX
411 XCHGL AX, 0(BX)
412 RET
413
Dmitriy Vyukov46675712012-04-05 18:47:43 +0400414// uint64 atomicload64(uint64 volatile* addr);
415// so actually
416// void atomicload64(uint64 *res, uint64 volatile *addr);
Russ Cox9ddfb642013-07-16 16:24:09 -0400417TEXT runtime·atomicload64(SB), 7, $0-8
Dmitriy Vyukov46675712012-04-05 18:47:43 +0400418 MOVL 4(SP), BX
419 MOVL 8(SP), AX
420 // MOVQ (%EAX), %MM0
421 BYTE $0x0f; BYTE $0x6f; BYTE $0x00
422 // MOVQ %MM0, 0(%EBX)
423 BYTE $0x0f; BYTE $0x7f; BYTE $0x03
424 // EMMS
425 BYTE $0x0F; BYTE $0x77
426 RET
427
428// void runtime·atomicstore64(uint64 volatile* addr, uint64 v);
Russ Cox9ddfb642013-07-16 16:24:09 -0400429TEXT runtime·atomicstore64(SB), 7, $0-12
Dmitriy Vyukov46675712012-04-05 18:47:43 +0400430 MOVL 4(SP), AX
431 // MOVQ and EMMS were introduced on the Pentium MMX.
432 // MOVQ 0x8(%ESP), %MM0
433 BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08
434 // MOVQ %MM0, (%EAX)
435 BYTE $0x0f; BYTE $0x7f; BYTE $0x00
436 // EMMS
437 BYTE $0x0F; BYTE $0x77
438 // This is essentially a no-op, but it provides required memory fencing.
439 // It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2).
440 MOVL $0, AX
441 LOCK
442 XADDL AX, (SP)
443 RET
444
Russ Coxaa3222d82009-06-02 23:02:12 -0700445// void jmpdefer(fn, sp);
446// called from deferreturn.
Russ Cox0d3a0432009-03-30 00:01:07 -0700447// 1. pop the caller
448// 2. sub 5 bytes from the callers return
449// 3. jmp to the argument
Russ Cox68b42552010-11-04 14:00:19 -0400450TEXT runtime·jmpdefer(SB), 7, $0
Russ Cox6066fdc2013-02-22 10:47:54 -0500451 MOVL 4(SP), DX // fn
Russ Coxaa3222d82009-06-02 23:02:12 -0700452 MOVL 8(SP), BX // caller sp
453 LEAL -4(BX), SP // caller sp after CALL
454 SUBL $5, (SP) // return to CALL again
Russ Cox6066fdc2013-02-22 10:47:54 -0500455 MOVL 0(DX), BX
Russ Cox1903ad72013-02-21 17:01:13 -0500456 JMP BX // but first run the deferred function
Russ Cox0d3a0432009-03-30 00:01:07 -0700457
Russ Coxd67e7e32013-06-12 15:22:26 -0400458// Save state of caller into g->sched.
459TEXT gosave<>(SB),7,$0
460 PUSHL AX
461 PUSHL BX
462 get_tls(BX)
463 MOVL g(BX), BX
464 LEAL arg+0(FP), AX
465 MOVL AX, (g_sched+gobuf_sp)(BX)
466 MOVL -4(AX), AX
467 MOVL AX, (g_sched+gobuf_pc)(BX)
468 MOVL $0, (g_sched+gobuf_ret)(BX)
469 MOVL $0, (g_sched+gobuf_ctxt)(BX)
470 POPL BX
471 POPL AX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500472 RET
473
474// asmcgocall(void(*fn)(void*), void *arg)
475// Call fn(arg) on the scheduler stack,
476// aligned appropriately for the gcc ABI.
477// See cgocall.c for more details.
Russ Cox9ddfb642013-07-16 16:24:09 -0400478TEXT runtime·asmcgocall(SB),7,$0-8
Russ Coxf9ca3b52011-03-07 10:37:42 -0500479 MOVL fn+0(FP), AX
480 MOVL arg+4(FP), BX
481 MOVL SP, DX
482
483 // Figure out if we need to switch to m->g0 stack.
484 // We get called to create new OS threads too, and those
485 // come in on the m->g0 stack already.
486 get_tls(CX)
487 MOVL m(CX), BP
488 MOVL m_g0(BP), SI
489 MOVL g(CX), DI
490 CMPL SI, DI
Russ Coxd67e7e32013-06-12 15:22:26 -0400491 JEQ 4(PC)
492 CALL gosave<>(SB)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500493 MOVL SI, g(CX)
494 MOVL (g_sched+gobuf_sp)(SI), SP
495
496 // Now on a scheduling stack (a pthread-created stack).
497 SUBL $32, SP
498 ANDL $~15, SP // alignment, perhaps unnecessary
499 MOVL DI, 8(SP) // save g
500 MOVL DX, 4(SP) // save SP
501 MOVL BX, 0(SP) // first argument in x86-32 ABI
502 CALL AX
503
504 // Restore registers, g, stack pointer.
505 get_tls(CX)
506 MOVL 8(SP), DI
507 MOVL DI, g(CX)
508 MOVL 4(SP), SP
509 RET
510
511// cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
Russ Cox3d2dfc52013-02-22 16:08:56 -0500512// Turn the fn into a Go func (by taking its address) and call
513// cgocallback_gofunc.
Russ Cox9ddfb642013-07-16 16:24:09 -0400514TEXT runtime·cgocallback(SB),7,$12-12
Russ Cox3d2dfc52013-02-22 16:08:56 -0500515 LEAL fn+0(FP), AX
516 MOVL AX, 0(SP)
517 MOVL frame+4(FP), AX
518 MOVL AX, 4(SP)
519 MOVL framesize+8(FP), AX
520 MOVL AX, 8(SP)
521 MOVL $runtime·cgocallback_gofunc(SB), AX
522 CALL AX
523 RET
524
525// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
526// See cgocall.c for more details.
Russ Cox9ddfb642013-07-16 16:24:09 -0400527TEXT runtime·cgocallback_gofunc(SB),7,$12-12
Russ Cox6c976392013-02-20 17:48:23 -0500528 // If m is nil, Go did not create the current thread.
529 // Call needm to obtain one for temporary use.
530 // In this case, we're running on the thread stack, so there's
531 // lots of space, but the linker doesn't know. Hide the call from
532 // the linker analysis by using an indirect call through AX.
533 get_tls(CX)
534#ifdef GOOS_windows
535 CMPL CX, $0
536 JNE 3(PC)
537 PUSHL $0
538 JMP needm
539#endif
540 MOVL m(CX), BP
541 PUSHL BP
542 CMPL BP, $0
543 JNE havem
544needm:
545 MOVL $runtime·needm(SB), AX
546 CALL AX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500547 get_tls(CX)
548 MOVL m(CX), BP
Russ Cox9b732382012-03-08 12:12:40 -0500549
Russ Cox6c976392013-02-20 17:48:23 -0500550havem:
551 // Now there's a valid m, and we're running on its m->g0.
552 // Save current m->g0->sched.sp on stack and then set it to SP.
553 // Save current sp in m->g0->sched.sp in preparation for
554 // switch back to m->curg stack.
Russ Coxf9ca3b52011-03-07 10:37:42 -0500555 MOVL m_g0(BP), SI
556 PUSHL (g_sched+gobuf_sp)(SI)
557 MOVL SP, (g_sched+gobuf_sp)(SI)
558
Alex Brainman72e83482011-08-18 12:17:09 -0400559 // Switch to m->curg stack and call runtime.cgocallbackg
Russ Coxf9ca3b52011-03-07 10:37:42 -0500560 // with the three arguments. Because we are taking over
561 // the execution of m->curg but *not* resuming what had
Russ Cox528534c2013-06-05 07:16:53 -0400562 // been running, we need to save that information (m->curg->sched)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500563 // so that we can restore it when we're done.
Russ Cox528534c2013-06-05 07:16:53 -0400564 // We can restore m->curg->sched.sp easily, because calling
Alex Brainman72e83482011-08-18 12:17:09 -0400565 // runtime.cgocallbackg leaves SP unchanged upon return.
Russ Cox528534c2013-06-05 07:16:53 -0400566 // To save m->curg->sched.pc, we push it onto the stack.
Russ Coxf9ca3b52011-03-07 10:37:42 -0500567 // This has the added benefit that it looks to the traceback
Alex Brainman72e83482011-08-18 12:17:09 -0400568 // routine like cgocallbackg is going to return to that
569 // PC (because we defined cgocallbackg to have
Russ Coxf9ca3b52011-03-07 10:37:42 -0500570 // a frame size of 12, the same amount that we use below),
571 // so that the traceback will seamlessly trace back into
572 // the earlier calls.
Russ Cox6c976392013-02-20 17:48:23 -0500573 MOVL fn+0(FP), AX
574 MOVL frame+4(FP), BX
575 MOVL framesize+8(FP), DX
576
Russ Coxf9ca3b52011-03-07 10:37:42 -0500577 MOVL m_curg(BP), SI
578 MOVL SI, g(CX)
579 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
580
581 // Push gobuf.pc
582 MOVL (g_sched+gobuf_pc)(SI), BP
583 SUBL $4, DI
584 MOVL BP, 0(DI)
585
586 // Push arguments to cgocallbackg.
Russ Cox5d363c62013-07-16 09:41:38 -0400587 // Frame size here must match the frame size above plus the pushes
Russ Coxf9ca3b52011-03-07 10:37:42 -0500588 // to trick traceback routines into doing the right thing.
Russ Cox5d363c62013-07-16 09:41:38 -0400589 SUBL $20, DI
Russ Coxf9ca3b52011-03-07 10:37:42 -0500590 MOVL AX, 0(DI)
591 MOVL BX, 4(DI)
592 MOVL DX, 8(DI)
593
594 // Switch stack and make the call.
595 MOVL DI, SP
596 CALL runtime·cgocallbackg(SB)
597
Russ Cox528534c2013-06-05 07:16:53 -0400598 // Restore g->sched (== m->curg->sched) from saved values.
Russ Coxf9ca3b52011-03-07 10:37:42 -0500599 get_tls(CX)
600 MOVL g(CX), SI
Russ Cox5d363c62013-07-16 09:41:38 -0400601 MOVL 20(SP), BP
Russ Coxf9ca3b52011-03-07 10:37:42 -0500602 MOVL BP, (g_sched+gobuf_pc)(SI)
Russ Cox5d363c62013-07-16 09:41:38 -0400603 LEAL (20+4)(SP), DI
Russ Coxf9ca3b52011-03-07 10:37:42 -0500604 MOVL DI, (g_sched+gobuf_sp)(SI)
605
606 // Switch back to m->g0's stack and restore m->g0->sched.sp.
607 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
608 // so we do not have to restore it.)
609 MOVL m(CX), BP
610 MOVL m_g0(BP), SI
611 MOVL SI, g(CX)
612 MOVL (g_sched+gobuf_sp)(SI), SP
613 POPL (g_sched+gobuf_sp)(SI)
Russ Cox6c976392013-02-20 17:48:23 -0500614
615 // If the m on entry was nil, we called needm above to borrow an m
616 // for the duration of the call. Since the call is over, return it with dropm.
617 POPL BP
618 CMPL BP, $0
619 JNE 3(PC)
620 MOVL $runtime·dropm(SB), AX
621 CALL AX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500622
623 // Done!
624 RET
625
Russ Cox6c976392013-02-20 17:48:23 -0500626// void setmg(M*, G*); set m and g. for use by needm.
Russ Cox9ddfb642013-07-16 16:24:09 -0400627TEXT runtime·setmg(SB), 7, $0-8
Russ Cox6c976392013-02-20 17:48:23 -0500628#ifdef GOOS_windows
629 MOVL mm+0(FP), AX
630 CMPL AX, $0
631 JNE settls
632 MOVL $0, 0x14(FS)
633 RET
634settls:
635 LEAL m_tls(AX), AX
636 MOVL AX, 0x14(FS)
637#endif
638 MOVL mm+0(FP), AX
639 get_tls(CX)
640 MOVL mm+0(FP), AX
641 MOVL AX, m(CX)
642 MOVL gg+4(FP), BX
643 MOVL BX, g(CX)
644 RET
645
Russ Cox6a70f9d2013-03-25 18:14:02 -0400646// void setmg_gcc(M*, G*); set m and g. for use by gcc
Russ Cox9ddfb642013-07-16 16:24:09 -0400647TEXT setmg_gcc<>(SB), 7, $0
Russ Cox6a70f9d2013-03-25 18:14:02 -0400648 get_tls(AX)
649 MOVL mm+0(FP), DX
650 MOVL DX, m(AX)
651 MOVL gg+4(FP), DX
652 MOVL DX,g (AX)
653 RET
654
Russ Coxf9ca3b52011-03-07 10:37:42 -0500655// check that SP is in range [g->stackbase, g->stackguard)
Russ Cox9ddfb642013-07-16 16:24:09 -0400656TEXT runtime·stackcheck(SB), 7, $0-0
Russ Coxf9ca3b52011-03-07 10:37:42 -0500657 get_tls(CX)
658 MOVL g(CX), AX
659 CMPL g_stackbase(AX), SP
660 JHI 2(PC)
661 INT $3
662 CMPL SP, g_stackguard(AX)
663 JHI 2(PC)
664 INT $3
665 RET
666
Russ Cox9ddfb642013-07-16 16:24:09 -0400667TEXT runtime·memclr(SB),7,$0-8
Russ Cox0d3a0432009-03-30 00:01:07 -0700668 MOVL 4(SP), DI // arg 1 addr
669 MOVL 8(SP), CX // arg 2 count
Quan Yong Zhai47410a22011-07-23 15:46:58 -0400670 MOVL CX, BX
671 ANDL $3, BX
Russ Cox0d3a0432009-03-30 00:01:07 -0700672 SHRL $2, CX
673 MOVL $0, AX
674 CLD
675 REP
676 STOSL
Quan Yong Zhai47410a22011-07-23 15:46:58 -0400677 MOVL BX, CX
678 REP
679 STOSB
Russ Cox0d3a0432009-03-30 00:01:07 -0700680 RET
681
Russ Cox9ddfb642013-07-16 16:24:09 -0400682TEXT runtime·getcallerpc(SB),7,$0-4
Russ Cox0d3a0432009-03-30 00:01:07 -0700683 MOVL x+0(FP),AX // addr of first arg
684 MOVL -4(AX),AX // get calling pc
685 RET
686
Russ Cox9ddfb642013-07-16 16:24:09 -0400687TEXT runtime·setcallerpc(SB),7,$0-8
Russ Cox0d3a0432009-03-30 00:01:07 -0700688 MOVL x+0(FP),AX // addr of first arg
689 MOVL x+4(FP), BX
690 MOVL BX, -4(AX) // set calling pc
691 RET
692
Russ Cox9ddfb642013-07-16 16:24:09 -0400693TEXT runtime·getcallersp(SB), 7, $0-4
Russ Cox6c196012010-04-05 12:51:09 -0700694 MOVL sp+0(FP), AX
695 RET
696
Damian Gryski8e765da2012-02-02 14:09:27 -0500697// int64 runtime·cputicks(void), so really
698// void runtime·cputicks(int64 *ticks)
Russ Cox9ddfb642013-07-16 16:24:09 -0400699TEXT runtime·cputicks(SB),7,$0-4
Shenghou Ma6392b432012-02-06 12:49:28 -0500700 RDTSC
Damian Gryski8e765da2012-02-02 14:09:27 -0500701 MOVL ret+0(FP), DI
702 MOVL AX, 0(DI)
703 MOVL DX, 4(DI)
704 RET
705
Russ Cox9ddfb642013-07-16 16:24:09 -0400706TEXT runtime·ldt0setup(SB),7,$16-0
Russ Cox0d3a0432009-03-30 00:01:07 -0700707 // set up ldt 7 to point at tls0
708 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
Russ Cox1b14bdb2009-09-22 16:28:32 -0700709 // the entry number is just a hint. setldt will set up GS with what it used.
Russ Cox0d3a0432009-03-30 00:01:07 -0700710 MOVL $7, 0(SP)
Russ Cox68b42552010-11-04 14:00:19 -0400711 LEAL runtime·tls0(SB), AX
Russ Cox0d3a0432009-03-30 00:01:07 -0700712 MOVL AX, 4(SP)
713 MOVL $32, 8(SP) // sizeof(tls array)
Russ Cox68b42552010-11-04 14:00:19 -0400714 CALL runtime·setldt(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -0700715 RET
716
Russ Cox9ddfb642013-07-16 16:24:09 -0400717TEXT runtime·emptyfunc(SB),0,$0-0
Russ Cox0d3a0432009-03-30 00:01:07 -0700718 RET
719
Russ Cox9ddfb642013-07-16 16:24:09 -0400720TEXT runtime·abort(SB),7,$0-0
Russ Cox0d3a0432009-03-30 00:01:07 -0700721 INT $0x3
Russ Cox133a1582009-10-03 10:37:12 -0700722
Russ Cox9ddfb642013-07-16 16:24:09 -0400723TEXT runtime·stackguard(SB),7,$0-8
Russ Cox9e5db8c2012-03-15 15:22:30 -0400724 MOVL SP, DX
725 MOVL DX, sp+0(FP)
726 get_tls(CX)
727 MOVL g(CX), BX
728 MOVL g_stackguard(BX), DX
Russ Cox07720b62013-03-22 12:57:55 -0400729 MOVL DX, limit+4(FP)
Russ Cox9e5db8c2012-03-15 15:22:30 -0400730 RET
731
Russ Cox68b42552010-11-04 14:00:19 -0400732GLOBL runtime·tls0(SB), $32
Keith Randalla5d40242013-03-12 10:47:44 -0700733
734// hash function using AES hardware instructions
Russ Cox9ddfb642013-07-16 16:24:09 -0400735TEXT runtime·aeshash(SB),7,$0-12
Keith Randalla5d40242013-03-12 10:47:44 -0700736 MOVL 4(SP), DX // ptr to hash value
737 MOVL 8(SP), CX // size
738 MOVL 12(SP), AX // ptr to data
739 JMP runtime·aeshashbody(SB)
740
Russ Cox9ddfb642013-07-16 16:24:09 -0400741TEXT runtime·aeshashstr(SB),7,$0-12
Keith Randalla5d40242013-03-12 10:47:44 -0700742 MOVL 4(SP), DX // ptr to hash value
743 MOVL 12(SP), AX // ptr to string struct
744 MOVL 4(AX), CX // length of string
745 MOVL (AX), AX // string data
746 JMP runtime·aeshashbody(SB)
747
748// AX: data
749// CX: length
750// DX: ptr to seed input / hash output
Russ Cox9ddfb642013-07-16 16:24:09 -0400751TEXT runtime·aeshashbody(SB),7,$0-12
Keith Randalla5d40242013-03-12 10:47:44 -0700752 MOVL (DX), X0 // seed to low 32 bits of xmm0
753 PINSRD $1, CX, X0 // size to next 32 bits of xmm0
Keith Randalldb53d972013-03-20 14:34:26 -0700754 MOVO runtime·aeskeysched+0(SB), X2
755 MOVO runtime·aeskeysched+16(SB), X3
Keith Randallee669722013-05-15 09:40:14 -0700756 CMPL CX, $16
757 JB aessmall
Keith Randalla5d40242013-03-12 10:47:44 -0700758aesloop:
759 CMPL CX, $16
Keith Randallee669722013-05-15 09:40:14 -0700760 JBE aesloopend
Keith Randalla5d40242013-03-12 10:47:44 -0700761 MOVOU (AX), X1
762 AESENC X2, X0
763 AESENC X1, X0
764 SUBL $16, CX
765 ADDL $16, AX
766 JMP aesloop
Keith Randallee669722013-05-15 09:40:14 -0700767// 1-16 bytes remaining
Keith Randalla5d40242013-03-12 10:47:44 -0700768aesloopend:
Keith Randallee669722013-05-15 09:40:14 -0700769 // This load may overlap with the previous load above.
770 // We'll hash some bytes twice, but that's ok.
771 MOVOU -16(AX)(CX*1), X1
772 JMP partial
773// 0-15 bytes
774aessmall:
Keith Randalla5d40242013-03-12 10:47:44 -0700775 TESTL CX, CX
Keith Randallee669722013-05-15 09:40:14 -0700776 JE finalize // 0 bytes
Keith Randalla5d40242013-03-12 10:47:44 -0700777
Keith Randallee669722013-05-15 09:40:14 -0700778 CMPB AX, $0xf0
779 JA highpartial
Keith Randalla5d40242013-03-12 10:47:44 -0700780
Keith Randallee669722013-05-15 09:40:14 -0700781 // 16 bytes loaded at this address won't cross
782 // a page boundary, so we can load it directly.
Keith Randalla5d40242013-03-12 10:47:44 -0700783 MOVOU (AX), X1
784 ADDL CX, CX
Russ Cox9ddfb642013-07-16 16:24:09 -0400785 PAND masks<>(SB)(CX*8), X1
Keith Randalla5d40242013-03-12 10:47:44 -0700786 JMP partial
787highpartial:
Keith Randallee669722013-05-15 09:40:14 -0700788 // address ends in 1111xxxx. Might be up against
Keith Randalla5d40242013-03-12 10:47:44 -0700789 // a page boundary, so load ending at last byte.
790 // Then shift bytes down using pshufb.
791 MOVOU -16(AX)(CX*1), X1
792 ADDL CX, CX
Russ Cox9ddfb642013-07-16 16:24:09 -0400793 PSHUFB shifts<>(SB)(CX*8), X1
Keith Randalla5d40242013-03-12 10:47:44 -0700794partial:
795 // incorporate partial block into hash
796 AESENC X3, X0
797 AESENC X1, X0
798finalize:
799 // finalize hash
800 AESENC X2, X0
801 AESENC X3, X0
802 AESENC X2, X0
803 MOVL X0, (DX)
804 RET
805
Russ Cox9ddfb642013-07-16 16:24:09 -0400806TEXT runtime·aeshash32(SB),7,$0-12
Keith Randalla5d40242013-03-12 10:47:44 -0700807 MOVL 4(SP), DX // ptr to hash value
808 MOVL 12(SP), AX // ptr to data
809 MOVL (DX), X0 // seed
810 PINSRD $1, (AX), X0 // data
Keith Randalldb53d972013-03-20 14:34:26 -0700811 AESENC runtime·aeskeysched+0(SB), X0
812 AESENC runtime·aeskeysched+16(SB), X0
813 AESENC runtime·aeskeysched+0(SB), X0
Keith Randalla5d40242013-03-12 10:47:44 -0700814 MOVL X0, (DX)
815 RET
816
Russ Cox9ddfb642013-07-16 16:24:09 -0400817TEXT runtime·aeshash64(SB),7,$0-12
Keith Randalla5d40242013-03-12 10:47:44 -0700818 MOVL 4(SP), DX // ptr to hash value
819 MOVL 12(SP), AX // ptr to data
820 MOVQ (AX), X0 // data
821 PINSRD $2, (DX), X0 // seed
Keith Randalldb53d972013-03-20 14:34:26 -0700822 AESENC runtime·aeskeysched+0(SB), X0
823 AESENC runtime·aeskeysched+16(SB), X0
824 AESENC runtime·aeskeysched+0(SB), X0
Keith Randalla5d40242013-03-12 10:47:44 -0700825 MOVL X0, (DX)
826 RET
827
Keith Randalla5d40242013-03-12 10:47:44 -0700828// simple mask to get rid of data in the high part of the register.
Russ Cox9ddfb642013-07-16 16:24:09 -0400829DATA masks<>+0x00(SB)/4, $0x00000000
830DATA masks<>+0x04(SB)/4, $0x00000000
831DATA masks<>+0x08(SB)/4, $0x00000000
832DATA masks<>+0x0c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700833
Russ Cox9ddfb642013-07-16 16:24:09 -0400834DATA masks<>+0x10(SB)/4, $0x000000ff
835DATA masks<>+0x14(SB)/4, $0x00000000
836DATA masks<>+0x18(SB)/4, $0x00000000
837DATA masks<>+0x1c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700838
Russ Cox9ddfb642013-07-16 16:24:09 -0400839DATA masks<>+0x20(SB)/4, $0x0000ffff
840DATA masks<>+0x24(SB)/4, $0x00000000
841DATA masks<>+0x28(SB)/4, $0x00000000
842DATA masks<>+0x2c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700843
Russ Cox9ddfb642013-07-16 16:24:09 -0400844DATA masks<>+0x30(SB)/4, $0x00ffffff
845DATA masks<>+0x34(SB)/4, $0x00000000
846DATA masks<>+0x38(SB)/4, $0x00000000
847DATA masks<>+0x3c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700848
Russ Cox9ddfb642013-07-16 16:24:09 -0400849DATA masks<>+0x40(SB)/4, $0xffffffff
850DATA masks<>+0x44(SB)/4, $0x00000000
851DATA masks<>+0x48(SB)/4, $0x00000000
852DATA masks<>+0x4c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700853
Russ Cox9ddfb642013-07-16 16:24:09 -0400854DATA masks<>+0x50(SB)/4, $0xffffffff
855DATA masks<>+0x54(SB)/4, $0x000000ff
856DATA masks<>+0x58(SB)/4, $0x00000000
857DATA masks<>+0x5c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700858
Russ Cox9ddfb642013-07-16 16:24:09 -0400859DATA masks<>+0x60(SB)/4, $0xffffffff
860DATA masks<>+0x64(SB)/4, $0x0000ffff
861DATA masks<>+0x68(SB)/4, $0x00000000
862DATA masks<>+0x6c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700863
Russ Cox9ddfb642013-07-16 16:24:09 -0400864DATA masks<>+0x70(SB)/4, $0xffffffff
865DATA masks<>+0x74(SB)/4, $0x00ffffff
866DATA masks<>+0x78(SB)/4, $0x00000000
867DATA masks<>+0x7c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700868
Russ Cox9ddfb642013-07-16 16:24:09 -0400869DATA masks<>+0x80(SB)/4, $0xffffffff
870DATA masks<>+0x84(SB)/4, $0xffffffff
871DATA masks<>+0x88(SB)/4, $0x00000000
872DATA masks<>+0x8c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700873
Russ Cox9ddfb642013-07-16 16:24:09 -0400874DATA masks<>+0x90(SB)/4, $0xffffffff
875DATA masks<>+0x94(SB)/4, $0xffffffff
876DATA masks<>+0x98(SB)/4, $0x000000ff
877DATA masks<>+0x9c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700878
Russ Cox9ddfb642013-07-16 16:24:09 -0400879DATA masks<>+0xa0(SB)/4, $0xffffffff
880DATA masks<>+0xa4(SB)/4, $0xffffffff
881DATA masks<>+0xa8(SB)/4, $0x0000ffff
882DATA masks<>+0xac(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700883
Russ Cox9ddfb642013-07-16 16:24:09 -0400884DATA masks<>+0xb0(SB)/4, $0xffffffff
885DATA masks<>+0xb4(SB)/4, $0xffffffff
886DATA masks<>+0xb8(SB)/4, $0x00ffffff
887DATA masks<>+0xbc(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700888
Russ Cox9ddfb642013-07-16 16:24:09 -0400889DATA masks<>+0xc0(SB)/4, $0xffffffff
890DATA masks<>+0xc4(SB)/4, $0xffffffff
891DATA masks<>+0xc8(SB)/4, $0xffffffff
892DATA masks<>+0xcc(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700893
Russ Cox9ddfb642013-07-16 16:24:09 -0400894DATA masks<>+0xd0(SB)/4, $0xffffffff
895DATA masks<>+0xd4(SB)/4, $0xffffffff
896DATA masks<>+0xd8(SB)/4, $0xffffffff
897DATA masks<>+0xdc(SB)/4, $0x000000ff
Keith Randalla5d40242013-03-12 10:47:44 -0700898
Russ Cox9ddfb642013-07-16 16:24:09 -0400899DATA masks<>+0xe0(SB)/4, $0xffffffff
900DATA masks<>+0xe4(SB)/4, $0xffffffff
901DATA masks<>+0xe8(SB)/4, $0xffffffff
902DATA masks<>+0xec(SB)/4, $0x0000ffff
Keith Randalla5d40242013-03-12 10:47:44 -0700903
Russ Cox9ddfb642013-07-16 16:24:09 -0400904DATA masks<>+0xf0(SB)/4, $0xffffffff
905DATA masks<>+0xf4(SB)/4, $0xffffffff
906DATA masks<>+0xf8(SB)/4, $0xffffffff
907DATA masks<>+0xfc(SB)/4, $0x00ffffff
Keith Randalla5d40242013-03-12 10:47:44 -0700908
Russ Cox9ddfb642013-07-16 16:24:09 -0400909GLOBL masks<>(SB),8,$256
Keith Randalla5d40242013-03-12 10:47:44 -0700910
Russ Cox9ddfb642013-07-16 16:24:09 -0400911// these are arguments to pshufb. They move data down from
912// the high bytes of the register to the low bytes of the register.
913// index is how many bytes to move.
914DATA shifts<>+0x00(SB)/4, $0x00000000
915DATA shifts<>+0x04(SB)/4, $0x00000000
916DATA shifts<>+0x08(SB)/4, $0x00000000
917DATA shifts<>+0x0c(SB)/4, $0x00000000
918
919DATA shifts<>+0x10(SB)/4, $0xffffff0f
920DATA shifts<>+0x14(SB)/4, $0xffffffff
921DATA shifts<>+0x18(SB)/4, $0xffffffff
922DATA shifts<>+0x1c(SB)/4, $0xffffffff
923
924DATA shifts<>+0x20(SB)/4, $0xffff0f0e
925DATA shifts<>+0x24(SB)/4, $0xffffffff
926DATA shifts<>+0x28(SB)/4, $0xffffffff
927DATA shifts<>+0x2c(SB)/4, $0xffffffff
928
929DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
930DATA shifts<>+0x34(SB)/4, $0xffffffff
931DATA shifts<>+0x38(SB)/4, $0xffffffff
932DATA shifts<>+0x3c(SB)/4, $0xffffffff
933
934DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
935DATA shifts<>+0x44(SB)/4, $0xffffffff
936DATA shifts<>+0x48(SB)/4, $0xffffffff
937DATA shifts<>+0x4c(SB)/4, $0xffffffff
938
939DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
940DATA shifts<>+0x54(SB)/4, $0xffffff0f
941DATA shifts<>+0x58(SB)/4, $0xffffffff
942DATA shifts<>+0x5c(SB)/4, $0xffffffff
943
944DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
945DATA shifts<>+0x64(SB)/4, $0xffff0f0e
946DATA shifts<>+0x68(SB)/4, $0xffffffff
947DATA shifts<>+0x6c(SB)/4, $0xffffffff
948
949DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
950DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
951DATA shifts<>+0x78(SB)/4, $0xffffffff
952DATA shifts<>+0x7c(SB)/4, $0xffffffff
953
954DATA shifts<>+0x80(SB)/4, $0x0b0a0908
955DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
956DATA shifts<>+0x88(SB)/4, $0xffffffff
957DATA shifts<>+0x8c(SB)/4, $0xffffffff
958
959DATA shifts<>+0x90(SB)/4, $0x0a090807
960DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
961DATA shifts<>+0x98(SB)/4, $0xffffff0f
962DATA shifts<>+0x9c(SB)/4, $0xffffffff
963
964DATA shifts<>+0xa0(SB)/4, $0x09080706
965DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
966DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
967DATA shifts<>+0xac(SB)/4, $0xffffffff
968
969DATA shifts<>+0xb0(SB)/4, $0x08070605
970DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
971DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
972DATA shifts<>+0xbc(SB)/4, $0xffffffff
973
974DATA shifts<>+0xc0(SB)/4, $0x07060504
975DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
976DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
977DATA shifts<>+0xcc(SB)/4, $0xffffffff
978
979DATA shifts<>+0xd0(SB)/4, $0x06050403
980DATA shifts<>+0xd4(SB)/4, $0x0a090807
981DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
982DATA shifts<>+0xdc(SB)/4, $0xffffff0f
983
984DATA shifts<>+0xe0(SB)/4, $0x05040302
985DATA shifts<>+0xe4(SB)/4, $0x09080706
986DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
987DATA shifts<>+0xec(SB)/4, $0xffff0f0e
988
989DATA shifts<>+0xf0(SB)/4, $0x04030201
990DATA shifts<>+0xf4(SB)/4, $0x08070605
991DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
992DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
993
994GLOBL shifts<>(SB),8,$256
995
996TEXT runtime·memeq(SB),7,$0-12
Keith Randall3d5daa22013-04-02 16:26:15 -0700997 MOVL a+0(FP), SI
998 MOVL b+4(FP), DI
999 MOVL count+8(FP), BX
1000 JMP runtime·memeqbody(SB)
1001
Russ Cox9ddfb642013-07-16 16:24:09 -04001002TEXT bytes·Equal(SB),7,$0-25
Keith Randall3d5daa22013-04-02 16:26:15 -07001003 MOVL a_len+4(FP), BX
1004 MOVL b_len+16(FP), CX
1005 XORL AX, AX
1006 CMPL BX, CX
1007 JNE eqret
1008 MOVL a+0(FP), SI
1009 MOVL b+12(FP), DI
1010 CALL runtime·memeqbody(SB)
1011eqret:
1012 MOVB AX, ret+24(FP)
1013 RET
1014
1015// a in SI
1016// b in DI
1017// count in BX
Russ Cox9ddfb642013-07-16 16:24:09 -04001018TEXT runtime·memeqbody(SB),7,$0-0
Keith Randall3d5daa22013-04-02 16:26:15 -07001019 XORL AX, AX
1020
1021 CMPL BX, $4
1022 JB small
1023
1024 // 64 bytes at a time using xmm registers
1025hugeloop:
1026 CMPL BX, $64
1027 JB bigloop
1028 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
1029 JE bigloop
1030 MOVOU (SI), X0
1031 MOVOU (DI), X1
1032 MOVOU 16(SI), X2
1033 MOVOU 16(DI), X3
1034 MOVOU 32(SI), X4
1035 MOVOU 32(DI), X5
1036 MOVOU 48(SI), X6
1037 MOVOU 48(DI), X7
1038 PCMPEQB X1, X0
1039 PCMPEQB X3, X2
1040 PCMPEQB X5, X4
1041 PCMPEQB X7, X6
1042 PAND X2, X0
1043 PAND X6, X4
1044 PAND X4, X0
1045 PMOVMSKB X0, DX
1046 ADDL $64, SI
1047 ADDL $64, DI
1048 SUBL $64, BX
1049 CMPL DX, $0xffff
1050 JEQ hugeloop
1051 RET
1052
1053 // 4 bytes at a time using 32-bit register
1054bigloop:
1055 CMPL BX, $4
1056 JBE leftover
1057 MOVL (SI), CX
1058 MOVL (DI), DX
1059 ADDL $4, SI
1060 ADDL $4, DI
1061 SUBL $4, BX
1062 CMPL CX, DX
1063 JEQ bigloop
1064 RET
1065
1066 // remaining 0-4 bytes
1067leftover:
1068 MOVL -4(SI)(BX*1), CX
1069 MOVL -4(DI)(BX*1), DX
1070 CMPL CX, DX
1071 SETEQ AX
1072 RET
1073
1074small:
1075 CMPL BX, $0
1076 JEQ equal
1077
1078 LEAL 0(BX*8), CX
1079 NEGL CX
1080
1081 MOVL SI, DX
1082 CMPB DX, $0xfc
1083 JA si_high
1084
1085 // load at SI won't cross a page boundary.
1086 MOVL (SI), SI
1087 JMP si_finish
1088si_high:
1089 // address ends in 111111xx. Load up to bytes we want, move to correct position.
1090 MOVL -4(SI)(BX*1), SI
1091 SHRL CX, SI
1092si_finish:
1093
1094 // same for DI.
1095 MOVL DI, DX
1096 CMPB DX, $0xfc
1097 JA di_high
1098 MOVL (DI), DI
1099 JMP di_finish
1100di_high:
1101 MOVL -4(DI)(BX*1), DI
1102 SHRL CX, DI
1103di_finish:
1104
1105 SUBL SI, DI
1106 SHLL CX, DI
1107equal:
1108 SETEQ AX
1109 RET
Keith Randallb3946dc2013-05-14 16:05:51 -07001110
Russ Cox9ddfb642013-07-16 16:24:09 -04001111TEXT runtime·cmpstring(SB),7,$0-20
Keith Randallb3946dc2013-05-14 16:05:51 -07001112 MOVL s1+0(FP), SI
1113 MOVL s1+4(FP), BX
1114 MOVL s2+8(FP), DI
1115 MOVL s2+12(FP), DX
1116 CALL runtime·cmpbody(SB)
1117 MOVL AX, res+16(FP)
1118 RET
1119
Russ Cox9ddfb642013-07-16 16:24:09 -04001120TEXT bytes·Compare(SB),7,$0-28
Keith Randallb3946dc2013-05-14 16:05:51 -07001121 MOVL s1+0(FP), SI
1122 MOVL s1+4(FP), BX
1123 MOVL s2+12(FP), DI
1124 MOVL s2+16(FP), DX
1125 CALL runtime·cmpbody(SB)
1126 MOVL AX, res+24(FP)
1127 RET
1128
1129// input:
1130// SI = a
1131// DI = b
1132// BX = alen
1133// DX = blen
1134// output:
1135// AX = 1/0/-1
Russ Cox9ddfb642013-07-16 16:24:09 -04001136TEXT runtime·cmpbody(SB),7,$0-0
Keith Randallb3946dc2013-05-14 16:05:51 -07001137 CMPL SI, DI
1138 JEQ cmp_allsame
1139 CMPL BX, DX
1140 MOVL DX, BP
1141 CMOVLLT BX, BP // BP = min(alen, blen)
1142 CMPL BP, $4
1143 JB cmp_small
1144 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
1145 JE cmp_mediumloop
1146cmp_largeloop:
1147 CMPL BP, $16
1148 JB cmp_mediumloop
1149 MOVOU (SI), X0
1150 MOVOU (DI), X1
1151 PCMPEQB X0, X1
1152 PMOVMSKB X1, AX
1153 XORL $0xffff, AX // convert EQ to NE
1154 JNE cmp_diff16 // branch if at least one byte is not equal
1155 ADDL $16, SI
1156 ADDL $16, DI
1157 SUBL $16, BP
1158 JMP cmp_largeloop
1159
1160cmp_diff16:
1161 BSFL AX, BX // index of first byte that differs
1162 XORL AX, AX
1163 MOVB (SI)(BX*1), CX
1164 CMPB CX, (DI)(BX*1)
1165 SETHI AX
1166 LEAL -1(AX*2), AX // convert 1/0 to +1/-1
1167 RET
1168
1169cmp_mediumloop:
1170 CMPL BP, $4
1171 JBE cmp_0through4
1172 MOVL (SI), AX
1173 MOVL (DI), CX
1174 CMPL AX, CX
1175 JNE cmp_diff4
1176 ADDL $4, SI
1177 ADDL $4, DI
1178 SUBL $4, BP
1179 JMP cmp_mediumloop
1180
1181cmp_0through4:
1182 MOVL -4(SI)(BP*1), AX
1183 MOVL -4(DI)(BP*1), CX
1184 CMPL AX, CX
1185 JEQ cmp_allsame
1186
1187cmp_diff4:
1188 BSWAPL AX // reverse order of bytes
1189 BSWAPL CX
1190 XORL AX, CX // find bit differences
1191 BSRL CX, CX // index of highest bit difference
1192 SHRL CX, AX // move a's bit to bottom
1193 ANDL $1, AX // mask bit
1194 LEAL -1(AX*2), AX // 1/0 => +1/-1
1195 RET
1196
1197 // 0-3 bytes in common
1198cmp_small:
1199 LEAL (BP*8), CX
1200 NEGL CX
1201 JEQ cmp_allsame
1202
1203 // load si
1204 CMPB SI, $0xfc
1205 JA cmp_si_high
1206 MOVL (SI), SI
1207 JMP cmp_si_finish
1208cmp_si_high:
1209 MOVL -4(SI)(BP*1), SI
1210 SHRL CX, SI
1211cmp_si_finish:
1212 SHLL CX, SI
1213
1214 // same for di
1215 CMPB DI, $0xfc
1216 JA cmp_di_high
1217 MOVL (DI), DI
1218 JMP cmp_di_finish
1219cmp_di_high:
1220 MOVL -4(DI)(BP*1), DI
1221 SHRL CX, DI
1222cmp_di_finish:
1223 SHLL CX, DI
1224
1225 BSWAPL SI // reverse order of bytes
1226 BSWAPL DI
1227 XORL SI, DI // find bit differences
1228 JEQ cmp_allsame
1229 BSRL DI, CX // index of highest bit difference
1230 SHRL CX, SI // move a's bit to bottom
1231 ANDL $1, SI // mask bit
1232 LEAL -1(SI*2), AX // 1/0 => +1/-1
1233 RET
1234
1235 // all the bytes in common are the same, so we just need
1236 // to compare the lengths.
1237cmp_allsame:
1238 XORL AX, AX
1239 XORL CX, CX
1240 CMPL BX, DX
1241 SETGT AX // 1 if alen > blen
1242 SETEQ CX // 1 if alen == blen
1243 LEAL -1(CX)(AX*2), AX // 1,0,-1 result
1244 RET