blob: 79fb4e9f961df67bce3776b557b69d2c4e11c809 [file] [log] [blame]
Russ Cox0d3a0432009-03-30 00:01:07 -07001// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
Russ Cox55889402011-12-19 15:51:13 -05005#include "zasm_GOOS_GOARCH.h"
Russ Cox9ddfb642013-07-16 16:24:09 -04006#include "funcdata.h"
Russ Cox8522a472009-06-17 15:15:55 -07007
Russ Cox6120ef02013-06-11 16:49:24 -04008TEXT _rt0_go(SB),7,$0
Russ Cox0d3a0432009-03-30 00:01:07 -07009 // copy arguments forward on an even stack
Russ Coxdfc22e292013-03-07 19:57:10 -080010 MOVL argc+0(FP), AX
11 MOVL argv+4(FP), BX
Russ Cox0d3a0432009-03-30 00:01:07 -070012 SUBL $128, SP // plenty of scratch
Russ Cox133a1582009-10-03 10:37:12 -070013 ANDL $~15, SP
Russ Cox0d3a0432009-03-30 00:01:07 -070014 MOVL AX, 120(SP) // save argc, argv away
15 MOVL BX, 124(SP)
16
Dmitriy Vyukov428062d2011-12-07 16:53:17 +030017 // set default stack bounds.
Russ Coxf8d49b52013-02-28 16:24:38 -050018 // _cgo_init may update stackguard.
Dmitriy Vyukov428062d2011-12-07 16:53:17 +030019 MOVL $runtime·g0(SB), BP
20 LEAL (-64*1024+104)(SP), BX
21 MOVL BX, g_stackguard(BP)
Dmitriy Vyukovf5becf42013-06-03 12:28:24 +040022 MOVL BX, g_stackguard0(BP)
Dmitriy Vyukov428062d2011-12-07 16:53:17 +030023 MOVL SP, g_stackbase(BP)
24
Keith Randalla5d40242013-03-12 10:47:44 -070025 // find out information about the processor we're on
26 MOVL $0, AX
27 CPUID
28 CMPL AX, $0
29 JE nocpuinfo
30 MOVL $1, AX
31 CPUID
32 MOVL CX, runtime·cpuid_ecx(SB)
33 MOVL DX, runtime·cpuid_edx(SB)
34nocpuinfo:
35
Russ Coxf8d49b52013-02-28 16:24:38 -050036 // if there is an _cgo_init, call it to let it
Russ Cox133a1582009-10-03 10:37:12 -070037 // initialize and to set up GS. if not,
38 // we set up GS ourselves.
Russ Coxf8d49b52013-02-28 16:24:38 -050039 MOVL _cgo_init(SB), AX
Russ Cox133a1582009-10-03 10:37:12 -070040 TESTL AX, AX
Dmitriy Vyukovfbfed492011-11-09 23:11:48 +030041 JZ needtls
Russ Cox6a70f9d2013-03-25 18:14:02 -040042 MOVL $setmg_gcc<>(SB), BX
43 MOVL BX, 4(SP)
Russ Cox3b85b722013-03-11 00:51:42 -040044 MOVL BP, 0(SP)
Russ Cox133a1582009-10-03 10:37:12 -070045 CALL AX
Dmitriy Vyukovf5becf42013-06-03 12:28:24 +040046 // update stackguard after _cgo_init
47 MOVL $runtime·g0(SB), CX
48 MOVL g_stackguard0(CX), AX
49 MOVL AX, g_stackguard(CX)
Russ Coxf8d49b52013-02-28 16:24:38 -050050 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
Wei Guangjing1aa2d882011-01-20 10:22:20 -050051 CMPL runtime·iswindows(SB), $0
52 JEQ ok
Dmitriy Vyukovfbfed492011-11-09 23:11:48 +030053needtls:
Yuval Pavel Zholkover2aa2ceb2011-07-25 12:25:41 -040054 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
55 CMPL runtime·isplan9(SB), $1
56 JEQ ok
57
Russ Cox1b14bdb2009-09-22 16:28:32 -070058 // set up %gs
Russ Cox68b42552010-11-04 14:00:19 -040059 CALL runtime·ldt0setup(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -070060
Russ Cox0d3a0432009-03-30 00:01:07 -070061 // store through it, to make sure it works
Hector Chu6bfe5f52010-01-06 17:58:55 -080062 get_tls(BX)
63 MOVL $0x123, g(BX)
Russ Cox68b42552010-11-04 14:00:19 -040064 MOVL runtime·tls0(SB), AX
Russ Cox0d3a0432009-03-30 00:01:07 -070065 CMPL AX, $0x123
66 JEQ ok
Russ Cox133a1582009-10-03 10:37:12 -070067 MOVL AX, 0 // abort
Russ Cox0d3a0432009-03-30 00:01:07 -070068ok:
Russ Cox0d3a0432009-03-30 00:01:07 -070069 // set up m and g "registers"
Hector Chu6bfe5f52010-01-06 17:58:55 -080070 get_tls(BX)
Russ Cox68b42552010-11-04 14:00:19 -040071 LEAL runtime·g0(SB), CX
Hector Chu6bfe5f52010-01-06 17:58:55 -080072 MOVL CX, g(BX)
Russ Cox68b42552010-11-04 14:00:19 -040073 LEAL runtime·m0(SB), AX
Hector Chu6bfe5f52010-01-06 17:58:55 -080074 MOVL AX, m(BX)
Russ Cox0d3a0432009-03-30 00:01:07 -070075
76 // save m->g0 = g0
Russ Cox8522a472009-06-17 15:15:55 -070077 MOVL CX, m_g0(AX)
Russ Cox0d3a0432009-03-30 00:01:07 -070078
Russ Cox68b42552010-11-04 14:00:19 -040079 CALL runtime·emptyfunc(SB) // fault if stack check is wrong
Russ Cox0d3a0432009-03-30 00:01:07 -070080
81 // convention is D is always cleared
82 CLD
83
Russ Cox68b42552010-11-04 14:00:19 -040084 CALL runtime·check(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -070085
86 // saved argc, argv
87 MOVL 120(SP), AX
88 MOVL AX, 0(SP)
89 MOVL 124(SP), AX
90 MOVL AX, 4(SP)
Russ Cox68b42552010-11-04 14:00:19 -040091 CALL runtime·args(SB)
92 CALL runtime·osinit(SB)
Keith Randalla5d40242013-03-12 10:47:44 -070093 CALL runtime·hashinit(SB)
Russ Cox68b42552010-11-04 14:00:19 -040094 CALL runtime·schedinit(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -070095
96 // create a new goroutine to start program
Russ Cox1903ad72013-02-21 17:01:13 -050097 PUSHL $runtime·main·f(SB) // entry
Russ Cox8522a472009-06-17 15:15:55 -070098 PUSHL $0 // arg size
Russ Cox9ddfb642013-07-16 16:24:09 -040099 ARGSIZE(8)
Russ Cox68b42552010-11-04 14:00:19 -0400100 CALL runtime·newproc(SB)
Russ Cox9ddfb642013-07-16 16:24:09 -0400101 ARGSIZE(-1)
Russ Cox0d3a0432009-03-30 00:01:07 -0700102 POPL AX
103 POPL AX
104
105 // start this M
Russ Cox68b42552010-11-04 14:00:19 -0400106 CALL runtime·mstart(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -0700107
108 INT $3
109 RET
110
Russ Cox1903ad72013-02-21 17:01:13 -0500111DATA runtime·main·f+0(SB)/4,$runtime·main(SB)
112GLOBL runtime·main·f(SB),8,$4
113
Russ Cox9ddfb642013-07-16 16:24:09 -0400114TEXT runtime·breakpoint(SB),7,$0-0
Russ Cox1b14bdb2009-09-22 16:28:32 -0700115 INT $3
Russ Cox0d3a0432009-03-30 00:01:07 -0700116 RET
117
Russ Cox9ddfb642013-07-16 16:24:09 -0400118TEXT runtime·asminit(SB),7,$0-0
Carl Shapiro019c8fc2013-04-02 13:45:56 -0700119 // Linux and MinGW start the FPU in extended double precision.
Russ Cox1707a992012-02-14 01:23:15 -0500120 // Other operating systems use double precision.
121 // Change to double precision to match them,
122 // and to match other hardware that only has double.
123 PUSHL $0x27F
124 FLDCW 0(SP)
125 POPL AX
126 RET
127
Russ Cox8522a472009-06-17 15:15:55 -0700128/*
129 * go-routine
130 */
Russ Cox0d3a0432009-03-30 00:01:07 -0700131
Russ Coxf9ca3b52011-03-07 10:37:42 -0500132// void gosave(Gobuf*)
Russ Cox8522a472009-06-17 15:15:55 -0700133// save state in Gobuf; setjmp
Russ Cox9ddfb642013-07-16 16:24:09 -0400134TEXT runtime·gosave(SB), 7, $0-4
Russ Cox8522a472009-06-17 15:15:55 -0700135 MOVL 4(SP), AX // gobuf
136 LEAL 4(SP), BX // caller's SP
137 MOVL BX, gobuf_sp(AX)
138 MOVL 0(SP), BX // caller's PC
139 MOVL BX, gobuf_pc(AX)
Russ Coxd67e7e32013-06-12 15:22:26 -0400140 MOVL $0, gobuf_ret(AX)
141 MOVL $0, gobuf_ctxt(AX)
Hector Chu6bfe5f52010-01-06 17:58:55 -0800142 get_tls(CX)
143 MOVL g(CX), BX
Russ Cox8522a472009-06-17 15:15:55 -0700144 MOVL BX, gobuf_g(AX)
Russ Cox0d3a0432009-03-30 00:01:07 -0700145 RET
146
Ian Lance Taylor06272482013-06-12 15:05:10 -0700147// void gogo(Gobuf*)
Russ Cox8522a472009-06-17 15:15:55 -0700148// restore state from Gobuf; longjmp
Russ Cox9ddfb642013-07-16 16:24:09 -0400149TEXT runtime·gogo(SB), 7, $0-4
Russ Cox8522a472009-06-17 15:15:55 -0700150 MOVL 4(SP), BX // gobuf
151 MOVL gobuf_g(BX), DX
152 MOVL 0(DX), CX // make sure g != nil
Hector Chu6bfe5f52010-01-06 17:58:55 -0800153 get_tls(CX)
154 MOVL DX, g(CX)
Russ Cox8522a472009-06-17 15:15:55 -0700155 MOVL gobuf_sp(BX), SP // restore SP
Russ Coxd67e7e32013-06-12 15:22:26 -0400156 MOVL gobuf_ret(BX), AX
157 MOVL gobuf_ctxt(BX), DX
158 MOVL $0, gobuf_sp(BX) // clear to help garbage collector
159 MOVL $0, gobuf_ret(BX)
160 MOVL $0, gobuf_ctxt(BX)
Russ Cox8522a472009-06-17 15:15:55 -0700161 MOVL gobuf_pc(BX), BX
Russ Cox0d3a0432009-03-30 00:01:07 -0700162 JMP BX
Russ Cox8522a472009-06-17 15:15:55 -0700163
Russ Coxf9ca3b52011-03-07 10:37:42 -0500164// void mcall(void (*fn)(G*))
165// Switch to m->g0's stack, call fn(g).
Russ Cox370276a2011-04-27 23:21:12 -0400166// Fn must never return. It should gogo(&g->sched)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500167// to keep running g.
Russ Cox9ddfb642013-07-16 16:24:09 -0400168TEXT runtime·mcall(SB), 7, $0-4
Russ Coxf9ca3b52011-03-07 10:37:42 -0500169 MOVL fn+0(FP), DI
170
171 get_tls(CX)
Russ Cox528534c2013-06-05 07:16:53 -0400172 MOVL g(CX), AX // save state in g->sched
Russ Coxf9ca3b52011-03-07 10:37:42 -0500173 MOVL 0(SP), BX // caller's PC
174 MOVL BX, (g_sched+gobuf_pc)(AX)
175 LEAL 4(SP), BX // caller's SP
176 MOVL BX, (g_sched+gobuf_sp)(AX)
177 MOVL AX, (g_sched+gobuf_g)(AX)
178
179 // switch to m->g0 & its stack, call fn
180 MOVL m(CX), BX
181 MOVL m_g0(BX), SI
182 CMPL SI, AX // if g == m->g0 call badmcall
183 JNE 2(PC)
184 CALL runtime·badmcall(SB)
185 MOVL SI, g(CX) // g = m->g0
Russ Cox528534c2013-06-05 07:16:53 -0400186 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
Russ Coxf9ca3b52011-03-07 10:37:42 -0500187 PUSHL AX
188 CALL DI
189 POPL AX
190 CALL runtime·badmcall2(SB)
191 RET
192
Russ Cox8522a472009-06-17 15:15:55 -0700193/*
194 * support for morestack
195 */
196
197// Called during function prolog when more stack is needed.
Russ Cox58f12ff2013-07-18 16:53:45 -0400198//
199// The traceback routines see morestack on a g0 as being
200// the top of a stack (for example, morestack calling newstack
201// calling the scheduler calling newm calling gc), so we must
202// record an argument size. For that purpose, it has no arguments.
203TEXT runtime·morestack(SB),7,$0-0
Russ Cox8522a472009-06-17 15:15:55 -0700204 // Cannot grow scheduler stack (m->g0).
Hector Chu6bfe5f52010-01-06 17:58:55 -0800205 get_tls(CX)
206 MOVL m(CX), BX
Russ Cox8522a472009-06-17 15:15:55 -0700207 MOVL m_g0(BX), SI
Hector Chu6bfe5f52010-01-06 17:58:55 -0800208 CMPL g(CX), SI
Russ Cox8522a472009-06-17 15:15:55 -0700209 JNE 2(PC)
210 INT $3
211
Russ Cox6066fdc2013-02-22 10:47:54 -0500212 // frame size in DI
Russ Cox8522a472009-06-17 15:15:55 -0700213 // arg size in AX
214 // Save in m.
Russ Cox6066fdc2013-02-22 10:47:54 -0500215 MOVL DI, m_moreframesize(BX)
Russ Cox141a4a12011-01-14 14:05:20 -0500216 MOVL AX, m_moreargsize(BX)
Russ Cox8522a472009-06-17 15:15:55 -0700217
218 // Called from f.
219 // Set m->morebuf to f's caller.
220 MOVL 4(SP), DI // f's caller's PC
221 MOVL DI, (m_morebuf+gobuf_pc)(BX)
222 LEAL 8(SP), CX // f's caller's SP
223 MOVL CX, (m_morebuf+gobuf_sp)(BX)
Russ Cox141a4a12011-01-14 14:05:20 -0500224 MOVL CX, m_moreargp(BX)
Hector Chu6bfe5f52010-01-06 17:58:55 -0800225 get_tls(CX)
226 MOVL g(CX), SI
Russ Cox8522a472009-06-17 15:15:55 -0700227 MOVL SI, (m_morebuf+gobuf_g)(BX)
228
Russ Cox6fa3c892013-06-27 11:32:01 -0400229 // Set g->sched to context in f.
230 MOVL 0(SP), AX // f's PC
231 MOVL AX, (g_sched+gobuf_pc)(SI)
232 MOVL SI, (g_sched+gobuf_g)(SI)
233 LEAL 4(SP), AX // f's SP
234 MOVL AX, (g_sched+gobuf_sp)(SI)
235 MOVL DX, (g_sched+gobuf_ctxt)(SI)
Russ Cox8522a472009-06-17 15:15:55 -0700236
Russ Coxf9ca3b52011-03-07 10:37:42 -0500237 // Call newstack on m->g0's stack.
Russ Cox8522a472009-06-17 15:15:55 -0700238 MOVL m_g0(BX), BP
Hector Chu6bfe5f52010-01-06 17:58:55 -0800239 MOVL BP, g(CX)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500240 MOVL (g_sched+gobuf_sp)(BP), AX
Russ Cox7e14bd82010-12-07 17:19:36 -0500241 MOVL -4(AX), BX // fault if CALL would, before smashing SP
242 MOVL AX, SP
Russ Cox68b42552010-11-04 14:00:19 -0400243 CALL runtime·newstack(SB)
Russ Cox8522a472009-06-17 15:15:55 -0700244 MOVL $0, 0x1003 // crash if newstack returns
Russ Cox0d3a0432009-03-30 00:01:07 -0700245 RET
246
Russ Coxbba278a2009-07-08 18:16:09 -0700247// Called from reflection library. Mimics morestack,
248// reuses stack growth code to create a frame
249// with the desired args running the desired function.
250//
251// func call(fn *byte, arg *byte, argsize uint32).
Russ Cox9ddfb642013-07-16 16:24:09 -0400252TEXT reflect·call(SB), 7, $0-12
Hector Chu6bfe5f52010-01-06 17:58:55 -0800253 get_tls(CX)
254 MOVL m(CX), BX
Russ Coxbba278a2009-07-08 18:16:09 -0700255
256 // Save our caller's state as the PC and SP to
257 // restore when returning from f.
258 MOVL 0(SP), AX // our caller's PC
259 MOVL AX, (m_morebuf+gobuf_pc)(BX)
260 LEAL 4(SP), AX // our caller's SP
261 MOVL AX, (m_morebuf+gobuf_sp)(BX)
Hector Chu6bfe5f52010-01-06 17:58:55 -0800262 MOVL g(CX), AX
Russ Coxbba278a2009-07-08 18:16:09 -0700263 MOVL AX, (m_morebuf+gobuf_g)(BX)
264
Russ Coxf0d73fb2013-06-27 16:51:06 -0400265 // Save our own state as the PC and SP to restore
266 // if this goroutine needs to be restarted.
267 MOVL $reflect·call(SB), (g_sched+gobuf_pc)(AX)
268 MOVL SP, (g_sched+gobuf_sp)(AX)
269
Russ Coxbba278a2009-07-08 18:16:09 -0700270 // Set up morestack arguments to call f on a new stack.
Russ Cox83727cc2010-03-29 21:48:22 -0700271 // We set f's frame size to 1, as a hint to newstack
272 // that this is a call from reflect·call.
273 // If it turns out that f needs a larger frame than
274 // the default stack, f's usual stack growth prolog will
275 // allocate a new segment (and recopy the arguments).
Russ Coxbba278a2009-07-08 18:16:09 -0700276 MOVL 4(SP), AX // fn
277 MOVL 8(SP), DX // arg frame
278 MOVL 12(SP), CX // arg size
279
Russ Cox6fa3c892013-06-27 11:32:01 -0400280 MOVL AX, m_cret(BX) // f's PC
Russ Cox141a4a12011-01-14 14:05:20 -0500281 MOVL DX, m_moreargp(BX) // f's argument pointer
282 MOVL CX, m_moreargsize(BX) // f's argument size
283 MOVL $1, m_moreframesize(BX) // f's frame size
Russ Coxbba278a2009-07-08 18:16:09 -0700284
Russ Coxf9ca3b52011-03-07 10:37:42 -0500285 // Call newstack on m->g0's stack.
Russ Coxbba278a2009-07-08 18:16:09 -0700286 MOVL m_g0(BX), BP
Hector Chu6bfe5f52010-01-06 17:58:55 -0800287 get_tls(CX)
288 MOVL BP, g(CX)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500289 MOVL (g_sched+gobuf_sp)(BP), SP
Russ Cox68b42552010-11-04 14:00:19 -0400290 CALL runtime·newstack(SB)
Russ Coxbba278a2009-07-08 18:16:09 -0700291 MOVL $0, 0x1103 // crash if newstack returns
292 RET
293
294
Russ Cox8522a472009-06-17 15:15:55 -0700295// Return point when leaving stack.
Russ Cox58f12ff2013-07-18 16:53:45 -0400296//
297// Lessstack can appear in stack traces for the same reason
298// as morestack; in that context, it has 0 arguments.
299TEXT runtime·lessstack(SB), 7, $0-0
Russ Cox8522a472009-06-17 15:15:55 -0700300 // Save return value in m->cret
Hector Chu6bfe5f52010-01-06 17:58:55 -0800301 get_tls(CX)
302 MOVL m(CX), BX
Russ Cox8522a472009-06-17 15:15:55 -0700303 MOVL AX, m_cret(BX)
304
Russ Coxf9ca3b52011-03-07 10:37:42 -0500305 // Call oldstack on m->g0's stack.
306 MOVL m_g0(BX), BP
307 MOVL BP, g(CX)
308 MOVL (g_sched+gobuf_sp)(BP), SP
Russ Cox68b42552010-11-04 14:00:19 -0400309 CALL runtime·oldstack(SB)
Russ Cox8522a472009-06-17 15:15:55 -0700310 MOVL $0, 0x1004 // crash if oldstack returns
311 RET
312
313
Russ Cox0d3a0432009-03-30 00:01:07 -0700314// bool cas(int32 *val, int32 old, int32 new)
315// Atomically:
316// if(*val == old){
317// *val = new;
318// return 1;
319// }else
320// return 0;
Russ Cox9ddfb642013-07-16 16:24:09 -0400321TEXT runtime·cas(SB), 7, $0-12
Russ Cox0d3a0432009-03-30 00:01:07 -0700322 MOVL 4(SP), BX
323 MOVL 8(SP), AX
324 MOVL 12(SP), CX
325 LOCK
326 CMPXCHGL CX, 0(BX)
327 JZ 3(PC)
328 MOVL $0, AX
329 RET
330 MOVL $1, AX
331 RET
332
Russ Coxf70a19f2013-07-12 00:42:46 -0400333// bool runtime·cas64(uint64 *val, uint64 old, uint64 new)
Dmitriy Vyukov46675712012-04-05 18:47:43 +0400334// Atomically:
335// if(*val == *old){
336// *val = new;
337// return 1;
338// } else {
Dmitriy Vyukov46675712012-04-05 18:47:43 +0400339// return 0;
340// }
Russ Cox9ddfb642013-07-16 16:24:09 -0400341TEXT runtime·cas64(SB), 7, $0-20
Dmitriy Vyukov46675712012-04-05 18:47:43 +0400342 MOVL 4(SP), BP
Russ Coxf70a19f2013-07-12 00:42:46 -0400343 MOVL 8(SP), AX
344 MOVL 12(SP), DX
345 MOVL 16(SP), BX
346 MOVL 20(SP), CX
Dmitriy Vyukov46675712012-04-05 18:47:43 +0400347 LOCK
348 CMPXCHG8B 0(BP)
349 JNZ cas64_fail
350 MOVL $1, AX
351 RET
352cas64_fail:
Dmitriy Vyukovfd04f052012-04-05 18:59:50 +0400353 MOVL $0, AX
Dmitriy Vyukov46675712012-04-05 18:47:43 +0400354 RET
355
Hector Chu6bfe5f52010-01-06 17:58:55 -0800356// bool casp(void **p, void *old, void *new)
357// Atomically:
358// if(*p == old){
359// *p = new;
360// return 1;
361// }else
362// return 0;
Russ Cox9ddfb642013-07-16 16:24:09 -0400363TEXT runtime·casp(SB), 7, $0-12
Hector Chu6bfe5f52010-01-06 17:58:55 -0800364 MOVL 4(SP), BX
365 MOVL 8(SP), AX
366 MOVL 12(SP), CX
367 LOCK
368 CMPXCHGL CX, 0(BX)
369 JZ 3(PC)
370 MOVL $0, AX
371 RET
372 MOVL $1, AX
373 RET
374
Dmitriy Vyukov491aa152011-07-15 11:27:16 -0400375// uint32 xadd(uint32 volatile *val, int32 delta)
376// Atomically:
377// *val += delta;
378// return *val;
Russ Cox9ddfb642013-07-16 16:24:09 -0400379TEXT runtime·xadd(SB), 7, $0-8
Dmitriy Vyukov491aa152011-07-15 11:27:16 -0400380 MOVL 4(SP), BX
381 MOVL 8(SP), AX
382 MOVL AX, CX
383 LOCK
384 XADDL AX, 0(BX)
385 ADDL CX, AX
386 RET
387
Russ Cox9ddfb642013-07-16 16:24:09 -0400388TEXT runtime·xchg(SB), 7, $0-8
Dmitriy Vyukov4e5086b2011-07-29 12:44:06 -0400389 MOVL 4(SP), BX
390 MOVL 8(SP), AX
391 XCHGL AX, 0(BX)
392 RET
393
Russ Cox9ddfb642013-07-16 16:24:09 -0400394TEXT runtime·procyield(SB),7,$0-0
Dmitriy Vyukov4e5086b2011-07-29 12:44:06 -0400395 MOVL 4(SP), AX
396again:
397 PAUSE
398 SUBL $1, AX
399 JNZ again
400 RET
401
Russ Cox9ddfb642013-07-16 16:24:09 -0400402TEXT runtime·atomicstorep(SB), 7, $0-8
Dmitriy Vyukov86a659c2011-07-13 11:22:41 -0700403 MOVL 4(SP), BX
404 MOVL 8(SP), AX
405 XCHGL AX, 0(BX)
406 RET
407
Russ Cox9ddfb642013-07-16 16:24:09 -0400408TEXT runtime·atomicstore(SB), 7, $0-8
Dmitriy Vyukov91f0f182011-07-29 13:47:24 -0400409 MOVL 4(SP), BX
410 MOVL 8(SP), AX
411 XCHGL AX, 0(BX)
412 RET
413
Dmitriy Vyukov46675712012-04-05 18:47:43 +0400414// uint64 atomicload64(uint64 volatile* addr);
415// so actually
416// void atomicload64(uint64 *res, uint64 volatile *addr);
Russ Cox9ddfb642013-07-16 16:24:09 -0400417TEXT runtime·atomicload64(SB), 7, $0-8
Dmitriy Vyukov46675712012-04-05 18:47:43 +0400418 MOVL 4(SP), BX
419 MOVL 8(SP), AX
420 // MOVQ (%EAX), %MM0
421 BYTE $0x0f; BYTE $0x6f; BYTE $0x00
422 // MOVQ %MM0, 0(%EBX)
423 BYTE $0x0f; BYTE $0x7f; BYTE $0x03
424 // EMMS
425 BYTE $0x0F; BYTE $0x77
426 RET
427
428// void runtime·atomicstore64(uint64 volatile* addr, uint64 v);
Russ Cox9ddfb642013-07-16 16:24:09 -0400429TEXT runtime·atomicstore64(SB), 7, $0-12
Dmitriy Vyukov46675712012-04-05 18:47:43 +0400430 MOVL 4(SP), AX
431 // MOVQ and EMMS were introduced on the Pentium MMX.
432 // MOVQ 0x8(%ESP), %MM0
433 BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08
434 // MOVQ %MM0, (%EAX)
435 BYTE $0x0f; BYTE $0x7f; BYTE $0x00
436 // EMMS
437 BYTE $0x0F; BYTE $0x77
438 // This is essentially a no-op, but it provides required memory fencing.
439 // It can be replaced with MFENCE, but MFENCE was introduced only on the Pentium4 (SSE2).
440 MOVL $0, AX
441 LOCK
442 XADDL AX, (SP)
443 RET
444
Russ Coxaa3222d82009-06-02 23:02:12 -0700445// void jmpdefer(fn, sp);
446// called from deferreturn.
Russ Cox0d3a0432009-03-30 00:01:07 -0700447// 1. pop the caller
448// 2. sub 5 bytes from the callers return
449// 3. jmp to the argument
Russ Cox68b42552010-11-04 14:00:19 -0400450TEXT runtime·jmpdefer(SB), 7, $0
Russ Cox6066fdc2013-02-22 10:47:54 -0500451 MOVL 4(SP), DX // fn
Russ Coxaa3222d82009-06-02 23:02:12 -0700452 MOVL 8(SP), BX // caller sp
453 LEAL -4(BX), SP // caller sp after CALL
454 SUBL $5, (SP) // return to CALL again
Russ Cox6066fdc2013-02-22 10:47:54 -0500455 MOVL 0(DX), BX
Russ Cox1903ad72013-02-21 17:01:13 -0500456 JMP BX // but first run the deferred function
Russ Cox0d3a0432009-03-30 00:01:07 -0700457
Russ Coxd67e7e32013-06-12 15:22:26 -0400458// Save state of caller into g->sched.
459TEXT gosave<>(SB),7,$0
460 PUSHL AX
461 PUSHL BX
462 get_tls(BX)
463 MOVL g(BX), BX
464 LEAL arg+0(FP), AX
465 MOVL AX, (g_sched+gobuf_sp)(BX)
466 MOVL -4(AX), AX
467 MOVL AX, (g_sched+gobuf_pc)(BX)
468 MOVL $0, (g_sched+gobuf_ret)(BX)
469 MOVL $0, (g_sched+gobuf_ctxt)(BX)
470 POPL BX
471 POPL AX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500472 RET
473
474// asmcgocall(void(*fn)(void*), void *arg)
475// Call fn(arg) on the scheduler stack,
476// aligned appropriately for the gcc ABI.
477// See cgocall.c for more details.
Russ Cox9ddfb642013-07-16 16:24:09 -0400478TEXT runtime·asmcgocall(SB),7,$0-8
Russ Coxf9ca3b52011-03-07 10:37:42 -0500479 MOVL fn+0(FP), AX
480 MOVL arg+4(FP), BX
481 MOVL SP, DX
482
483 // Figure out if we need to switch to m->g0 stack.
484 // We get called to create new OS threads too, and those
485 // come in on the m->g0 stack already.
486 get_tls(CX)
487 MOVL m(CX), BP
488 MOVL m_g0(BP), SI
489 MOVL g(CX), DI
490 CMPL SI, DI
Russ Coxd67e7e32013-06-12 15:22:26 -0400491 JEQ 4(PC)
492 CALL gosave<>(SB)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500493 MOVL SI, g(CX)
494 MOVL (g_sched+gobuf_sp)(SI), SP
495
496 // Now on a scheduling stack (a pthread-created stack).
497 SUBL $32, SP
498 ANDL $~15, SP // alignment, perhaps unnecessary
499 MOVL DI, 8(SP) // save g
500 MOVL DX, 4(SP) // save SP
501 MOVL BX, 0(SP) // first argument in x86-32 ABI
502 CALL AX
503
504 // Restore registers, g, stack pointer.
505 get_tls(CX)
506 MOVL 8(SP), DI
507 MOVL DI, g(CX)
508 MOVL 4(SP), SP
509 RET
510
511// cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
Russ Cox3d2dfc52013-02-22 16:08:56 -0500512// Turn the fn into a Go func (by taking its address) and call
513// cgocallback_gofunc.
Russ Cox9ddfb642013-07-16 16:24:09 -0400514TEXT runtime·cgocallback(SB),7,$12-12
Russ Cox3d2dfc52013-02-22 16:08:56 -0500515 LEAL fn+0(FP), AX
516 MOVL AX, 0(SP)
517 MOVL frame+4(FP), AX
518 MOVL AX, 4(SP)
519 MOVL framesize+8(FP), AX
520 MOVL AX, 8(SP)
521 MOVL $runtime·cgocallback_gofunc(SB), AX
522 CALL AX
523 RET
524
525// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
526// See cgocall.c for more details.
Russ Coxf0112822013-07-24 09:01:57 -0400527TEXT runtime·cgocallback_gofunc(SB),7,$12-12
Russ Cox6c976392013-02-20 17:48:23 -0500528 // If m is nil, Go did not create the current thread.
529 // Call needm to obtain one for temporary use.
530 // In this case, we're running on the thread stack, so there's
531 // lots of space, but the linker doesn't know. Hide the call from
532 // the linker analysis by using an indirect call through AX.
533 get_tls(CX)
534#ifdef GOOS_windows
Russ Coxdba623b2013-07-23 18:40:02 -0400535 MOVL $0, BP
Russ Cox6c976392013-02-20 17:48:23 -0500536 CMPL CX, $0
Russ Coxcefdb9c2013-07-23 22:59:32 -0400537 JEQ 2(PC)
Russ Cox6c976392013-02-20 17:48:23 -0500538#endif
539 MOVL m(CX), BP
Russ Coxf0112822013-07-24 09:01:57 -0400540 MOVL BP, DX // saved copy of oldm
Russ Cox6c976392013-02-20 17:48:23 -0500541 CMPL BP, $0
542 JNE havem
543needm:
Russ Coxf0112822013-07-24 09:01:57 -0400544 MOVL DX, 0(SP)
Russ Cox6c976392013-02-20 17:48:23 -0500545 MOVL $runtime·needm(SB), AX
546 CALL AX
Russ Coxf0112822013-07-24 09:01:57 -0400547 MOVL 0(SP), DX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500548 get_tls(CX)
549 MOVL m(CX), BP
Russ Cox9b732382012-03-08 12:12:40 -0500550
Russ Cox6c976392013-02-20 17:48:23 -0500551havem:
552 // Now there's a valid m, and we're running on its m->g0.
553 // Save current m->g0->sched.sp on stack and then set it to SP.
554 // Save current sp in m->g0->sched.sp in preparation for
555 // switch back to m->curg stack.
Russ Coxdba623b2013-07-23 18:40:02 -0400556 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
Russ Coxf0112822013-07-24 09:01:57 -0400557 // On Windows, the SEH is at 4(SP) and 8(SP).
Russ Coxf9ca3b52011-03-07 10:37:42 -0500558 MOVL m_g0(BP), SI
Russ Coxdba623b2013-07-23 18:40:02 -0400559 MOVL (g_sched+gobuf_sp)(SI), AX
560 MOVL AX, 0(SP)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500561 MOVL SP, (g_sched+gobuf_sp)(SI)
562
Russ Coxdba623b2013-07-23 18:40:02 -0400563 // Switch to m->curg stack and call runtime.cgocallbackg.
564 // Because we are taking over the execution of m->curg
565 // but *not* resuming what had been running, we need to
566 // save that information (m->curg->sched) so we can restore it.
Russ Cox528534c2013-06-05 07:16:53 -0400567 // We can restore m->curg->sched.sp easily, because calling
Alex Brainman72e83482011-08-18 12:17:09 -0400568 // runtime.cgocallbackg leaves SP unchanged upon return.
Russ Cox528534c2013-06-05 07:16:53 -0400569 // To save m->curg->sched.pc, we push it onto the stack.
Russ Coxf9ca3b52011-03-07 10:37:42 -0500570 // This has the added benefit that it looks to the traceback
Alex Brainman72e83482011-08-18 12:17:09 -0400571 // routine like cgocallbackg is going to return to that
Russ Coxdba623b2013-07-23 18:40:02 -0400572 // PC (because the frame we allocate below has the same
573 // size as cgocallback_gofunc's frame declared above)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500574 // so that the traceback will seamlessly trace back into
575 // the earlier calls.
Russ Coxdba623b2013-07-23 18:40:02 -0400576 //
Russ Coxf0112822013-07-24 09:01:57 -0400577 // In the new goroutine, 0(SP) holds the saved oldm (DX) register.
578 // 4(SP) and 8(SP) are unused.
Russ Coxf9ca3b52011-03-07 10:37:42 -0500579 MOVL m_curg(BP), SI
580 MOVL SI, g(CX)
Russ Coxdba623b2013-07-23 18:40:02 -0400581 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
Russ Coxf9ca3b52011-03-07 10:37:42 -0500582 MOVL (g_sched+gobuf_pc)(SI), BP
Russ Coxdba623b2013-07-23 18:40:02 -0400583 MOVL BP, -4(DI)
Russ Coxf0112822013-07-24 09:01:57 -0400584 LEAL -(4+12)(DI), SP
585 MOVL DX, 0(SP)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500586 CALL runtime·cgocallbackg(SB)
Russ Coxf0112822013-07-24 09:01:57 -0400587 MOVL 0(SP), DX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500588
Russ Cox528534c2013-06-05 07:16:53 -0400589 // Restore g->sched (== m->curg->sched) from saved values.
Russ Coxf9ca3b52011-03-07 10:37:42 -0500590 get_tls(CX)
591 MOVL g(CX), SI
Russ Coxf0112822013-07-24 09:01:57 -0400592 MOVL 12(SP), BP
Russ Coxf9ca3b52011-03-07 10:37:42 -0500593 MOVL BP, (g_sched+gobuf_pc)(SI)
Russ Coxf0112822013-07-24 09:01:57 -0400594 LEAL (12+4)(SP), DI
Russ Coxf9ca3b52011-03-07 10:37:42 -0500595 MOVL DI, (g_sched+gobuf_sp)(SI)
596
597 // Switch back to m->g0's stack and restore m->g0->sched.sp.
598 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
599 // so we do not have to restore it.)
600 MOVL m(CX), BP
601 MOVL m_g0(BP), SI
602 MOVL SI, g(CX)
603 MOVL (g_sched+gobuf_sp)(SI), SP
Russ Coxdba623b2013-07-23 18:40:02 -0400604 MOVL 0(SP), AX
605 MOVL AX, (g_sched+gobuf_sp)(SI)
Russ Cox6c976392013-02-20 17:48:23 -0500606
607 // If the m on entry was nil, we called needm above to borrow an m
608 // for the duration of the call. Since the call is over, return it with dropm.
Russ Coxf0112822013-07-24 09:01:57 -0400609 CMPL DX, $0
Russ Cox6c976392013-02-20 17:48:23 -0500610 JNE 3(PC)
611 MOVL $runtime·dropm(SB), AX
612 CALL AX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500613
614 // Done!
615 RET
616
Russ Cox6c976392013-02-20 17:48:23 -0500617// void setmg(M*, G*); set m and g. for use by needm.
Russ Cox9ddfb642013-07-16 16:24:09 -0400618TEXT runtime·setmg(SB), 7, $0-8
Russ Cox6c976392013-02-20 17:48:23 -0500619#ifdef GOOS_windows
620 MOVL mm+0(FP), AX
621 CMPL AX, $0
622 JNE settls
623 MOVL $0, 0x14(FS)
624 RET
625settls:
626 LEAL m_tls(AX), AX
627 MOVL AX, 0x14(FS)
628#endif
629 MOVL mm+0(FP), AX
630 get_tls(CX)
631 MOVL mm+0(FP), AX
632 MOVL AX, m(CX)
633 MOVL gg+4(FP), BX
634 MOVL BX, g(CX)
635 RET
636
Russ Cox6a70f9d2013-03-25 18:14:02 -0400637// void setmg_gcc(M*, G*); set m and g. for use by gcc
Russ Cox9ddfb642013-07-16 16:24:09 -0400638TEXT setmg_gcc<>(SB), 7, $0
Russ Cox6a70f9d2013-03-25 18:14:02 -0400639 get_tls(AX)
640 MOVL mm+0(FP), DX
641 MOVL DX, m(AX)
642 MOVL gg+4(FP), DX
643 MOVL DX,g (AX)
644 RET
645
Russ Coxf9ca3b52011-03-07 10:37:42 -0500646// check that SP is in range [g->stackbase, g->stackguard)
Russ Cox9ddfb642013-07-16 16:24:09 -0400647TEXT runtime·stackcheck(SB), 7, $0-0
Russ Coxf9ca3b52011-03-07 10:37:42 -0500648 get_tls(CX)
649 MOVL g(CX), AX
650 CMPL g_stackbase(AX), SP
651 JHI 2(PC)
652 INT $3
653 CMPL SP, g_stackguard(AX)
654 JHI 2(PC)
655 INT $3
656 RET
657
Russ Cox9ddfb642013-07-16 16:24:09 -0400658TEXT runtime·memclr(SB),7,$0-8
Russ Cox0d3a0432009-03-30 00:01:07 -0700659 MOVL 4(SP), DI // arg 1 addr
660 MOVL 8(SP), CX // arg 2 count
Quan Yong Zhai47410a22011-07-23 15:46:58 -0400661 MOVL CX, BX
662 ANDL $3, BX
Russ Cox0d3a0432009-03-30 00:01:07 -0700663 SHRL $2, CX
664 MOVL $0, AX
665 CLD
666 REP
667 STOSL
Quan Yong Zhai47410a22011-07-23 15:46:58 -0400668 MOVL BX, CX
669 REP
670 STOSB
Russ Cox0d3a0432009-03-30 00:01:07 -0700671 RET
672
Russ Cox9ddfb642013-07-16 16:24:09 -0400673TEXT runtime·getcallerpc(SB),7,$0-4
Russ Cox0d3a0432009-03-30 00:01:07 -0700674 MOVL x+0(FP),AX // addr of first arg
675 MOVL -4(AX),AX // get calling pc
676 RET
677
Russ Cox9ddfb642013-07-16 16:24:09 -0400678TEXT runtime·setcallerpc(SB),7,$0-8
Russ Cox0d3a0432009-03-30 00:01:07 -0700679 MOVL x+0(FP),AX // addr of first arg
680 MOVL x+4(FP), BX
681 MOVL BX, -4(AX) // set calling pc
682 RET
683
Russ Cox9ddfb642013-07-16 16:24:09 -0400684TEXT runtime·getcallersp(SB), 7, $0-4
Russ Cox6c196012010-04-05 12:51:09 -0700685 MOVL sp+0(FP), AX
686 RET
687
Damian Gryski8e765da2012-02-02 14:09:27 -0500688// int64 runtime·cputicks(void), so really
689// void runtime·cputicks(int64 *ticks)
Russ Cox9ddfb642013-07-16 16:24:09 -0400690TEXT runtime·cputicks(SB),7,$0-4
Shenghou Ma6392b432012-02-06 12:49:28 -0500691 RDTSC
Damian Gryski8e765da2012-02-02 14:09:27 -0500692 MOVL ret+0(FP), DI
693 MOVL AX, 0(DI)
694 MOVL DX, 4(DI)
695 RET
696
Russ Cox9ddfb642013-07-16 16:24:09 -0400697TEXT runtime·ldt0setup(SB),7,$16-0
Russ Cox0d3a0432009-03-30 00:01:07 -0700698 // set up ldt 7 to point at tls0
699 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
Russ Cox1b14bdb2009-09-22 16:28:32 -0700700 // the entry number is just a hint. setldt will set up GS with what it used.
Russ Cox0d3a0432009-03-30 00:01:07 -0700701 MOVL $7, 0(SP)
Russ Cox68b42552010-11-04 14:00:19 -0400702 LEAL runtime·tls0(SB), AX
Russ Cox0d3a0432009-03-30 00:01:07 -0700703 MOVL AX, 4(SP)
704 MOVL $32, 8(SP) // sizeof(tls array)
Russ Cox68b42552010-11-04 14:00:19 -0400705 CALL runtime·setldt(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -0700706 RET
707
Russ Cox9ddfb642013-07-16 16:24:09 -0400708TEXT runtime·emptyfunc(SB),0,$0-0
Russ Cox0d3a0432009-03-30 00:01:07 -0700709 RET
710
Russ Cox9ddfb642013-07-16 16:24:09 -0400711TEXT runtime·abort(SB),7,$0-0
Russ Cox0d3a0432009-03-30 00:01:07 -0700712 INT $0x3
Russ Cox133a1582009-10-03 10:37:12 -0700713
Russ Cox9ddfb642013-07-16 16:24:09 -0400714TEXT runtime·stackguard(SB),7,$0-8
Russ Cox9e5db8c2012-03-15 15:22:30 -0400715 MOVL SP, DX
716 MOVL DX, sp+0(FP)
717 get_tls(CX)
718 MOVL g(CX), BX
719 MOVL g_stackguard(BX), DX
Russ Cox07720b62013-03-22 12:57:55 -0400720 MOVL DX, limit+4(FP)
Russ Cox9e5db8c2012-03-15 15:22:30 -0400721 RET
722
Russ Cox68b42552010-11-04 14:00:19 -0400723GLOBL runtime·tls0(SB), $32
Keith Randalla5d40242013-03-12 10:47:44 -0700724
725// hash function using AES hardware instructions
Russ Cox9ddfb642013-07-16 16:24:09 -0400726TEXT runtime·aeshash(SB),7,$0-12
Keith Randalla5d40242013-03-12 10:47:44 -0700727 MOVL 4(SP), DX // ptr to hash value
728 MOVL 8(SP), CX // size
729 MOVL 12(SP), AX // ptr to data
730 JMP runtime·aeshashbody(SB)
731
Russ Cox9ddfb642013-07-16 16:24:09 -0400732TEXT runtime·aeshashstr(SB),7,$0-12
Keith Randalla5d40242013-03-12 10:47:44 -0700733 MOVL 4(SP), DX // ptr to hash value
734 MOVL 12(SP), AX // ptr to string struct
735 MOVL 4(AX), CX // length of string
736 MOVL (AX), AX // string data
737 JMP runtime·aeshashbody(SB)
738
739// AX: data
740// CX: length
741// DX: ptr to seed input / hash output
Russ Cox9ddfb642013-07-16 16:24:09 -0400742TEXT runtime·aeshashbody(SB),7,$0-12
Keith Randalla5d40242013-03-12 10:47:44 -0700743 MOVL (DX), X0 // seed to low 32 bits of xmm0
744 PINSRD $1, CX, X0 // size to next 32 bits of xmm0
Keith Randalldb53d972013-03-20 14:34:26 -0700745 MOVO runtime·aeskeysched+0(SB), X2
746 MOVO runtime·aeskeysched+16(SB), X3
Keith Randallee669722013-05-15 09:40:14 -0700747 CMPL CX, $16
748 JB aessmall
Keith Randalla5d40242013-03-12 10:47:44 -0700749aesloop:
750 CMPL CX, $16
Keith Randallee669722013-05-15 09:40:14 -0700751 JBE aesloopend
Keith Randalla5d40242013-03-12 10:47:44 -0700752 MOVOU (AX), X1
753 AESENC X2, X0
754 AESENC X1, X0
755 SUBL $16, CX
756 ADDL $16, AX
757 JMP aesloop
Keith Randallee669722013-05-15 09:40:14 -0700758// 1-16 bytes remaining
Keith Randalla5d40242013-03-12 10:47:44 -0700759aesloopend:
Keith Randallee669722013-05-15 09:40:14 -0700760 // This load may overlap with the previous load above.
761 // We'll hash some bytes twice, but that's ok.
762 MOVOU -16(AX)(CX*1), X1
763 JMP partial
764// 0-15 bytes
765aessmall:
Keith Randalla5d40242013-03-12 10:47:44 -0700766 TESTL CX, CX
Keith Randallee669722013-05-15 09:40:14 -0700767 JE finalize // 0 bytes
Keith Randalla5d40242013-03-12 10:47:44 -0700768
Keith Randallee669722013-05-15 09:40:14 -0700769 CMPB AX, $0xf0
770 JA highpartial
Keith Randalla5d40242013-03-12 10:47:44 -0700771
Keith Randallee669722013-05-15 09:40:14 -0700772 // 16 bytes loaded at this address won't cross
773 // a page boundary, so we can load it directly.
Keith Randalla5d40242013-03-12 10:47:44 -0700774 MOVOU (AX), X1
775 ADDL CX, CX
Russ Cox9ddfb642013-07-16 16:24:09 -0400776 PAND masks<>(SB)(CX*8), X1
Keith Randalla5d40242013-03-12 10:47:44 -0700777 JMP partial
778highpartial:
Keith Randallee669722013-05-15 09:40:14 -0700779 // address ends in 1111xxxx. Might be up against
Keith Randalla5d40242013-03-12 10:47:44 -0700780 // a page boundary, so load ending at last byte.
781 // Then shift bytes down using pshufb.
782 MOVOU -16(AX)(CX*1), X1
783 ADDL CX, CX
Russ Cox9ddfb642013-07-16 16:24:09 -0400784 PSHUFB shifts<>(SB)(CX*8), X1
Keith Randalla5d40242013-03-12 10:47:44 -0700785partial:
786 // incorporate partial block into hash
787 AESENC X3, X0
788 AESENC X1, X0
789finalize:
790 // finalize hash
791 AESENC X2, X0
792 AESENC X3, X0
793 AESENC X2, X0
794 MOVL X0, (DX)
795 RET
796
Russ Cox9ddfb642013-07-16 16:24:09 -0400797TEXT runtime·aeshash32(SB),7,$0-12
Keith Randalla5d40242013-03-12 10:47:44 -0700798 MOVL 4(SP), DX // ptr to hash value
799 MOVL 12(SP), AX // ptr to data
800 MOVL (DX), X0 // seed
801 PINSRD $1, (AX), X0 // data
Keith Randalldb53d972013-03-20 14:34:26 -0700802 AESENC runtime·aeskeysched+0(SB), X0
803 AESENC runtime·aeskeysched+16(SB), X0
804 AESENC runtime·aeskeysched+0(SB), X0
Keith Randalla5d40242013-03-12 10:47:44 -0700805 MOVL X0, (DX)
806 RET
807
Russ Cox9ddfb642013-07-16 16:24:09 -0400808TEXT runtime·aeshash64(SB),7,$0-12
Keith Randalla5d40242013-03-12 10:47:44 -0700809 MOVL 4(SP), DX // ptr to hash value
810 MOVL 12(SP), AX // ptr to data
811 MOVQ (AX), X0 // data
812 PINSRD $2, (DX), X0 // seed
Keith Randalldb53d972013-03-20 14:34:26 -0700813 AESENC runtime·aeskeysched+0(SB), X0
814 AESENC runtime·aeskeysched+16(SB), X0
815 AESENC runtime·aeskeysched+0(SB), X0
Keith Randalla5d40242013-03-12 10:47:44 -0700816 MOVL X0, (DX)
817 RET
818
Keith Randalla5d40242013-03-12 10:47:44 -0700819// simple mask to get rid of data in the high part of the register.
Russ Cox9ddfb642013-07-16 16:24:09 -0400820DATA masks<>+0x00(SB)/4, $0x00000000
821DATA masks<>+0x04(SB)/4, $0x00000000
822DATA masks<>+0x08(SB)/4, $0x00000000
823DATA masks<>+0x0c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700824
Russ Cox9ddfb642013-07-16 16:24:09 -0400825DATA masks<>+0x10(SB)/4, $0x000000ff
826DATA masks<>+0x14(SB)/4, $0x00000000
827DATA masks<>+0x18(SB)/4, $0x00000000
828DATA masks<>+0x1c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700829
Russ Cox9ddfb642013-07-16 16:24:09 -0400830DATA masks<>+0x20(SB)/4, $0x0000ffff
831DATA masks<>+0x24(SB)/4, $0x00000000
832DATA masks<>+0x28(SB)/4, $0x00000000
833DATA masks<>+0x2c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700834
Russ Cox9ddfb642013-07-16 16:24:09 -0400835DATA masks<>+0x30(SB)/4, $0x00ffffff
836DATA masks<>+0x34(SB)/4, $0x00000000
837DATA masks<>+0x38(SB)/4, $0x00000000
838DATA masks<>+0x3c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700839
Russ Cox9ddfb642013-07-16 16:24:09 -0400840DATA masks<>+0x40(SB)/4, $0xffffffff
841DATA masks<>+0x44(SB)/4, $0x00000000
842DATA masks<>+0x48(SB)/4, $0x00000000
843DATA masks<>+0x4c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700844
Russ Cox9ddfb642013-07-16 16:24:09 -0400845DATA masks<>+0x50(SB)/4, $0xffffffff
846DATA masks<>+0x54(SB)/4, $0x000000ff
847DATA masks<>+0x58(SB)/4, $0x00000000
848DATA masks<>+0x5c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700849
Russ Cox9ddfb642013-07-16 16:24:09 -0400850DATA masks<>+0x60(SB)/4, $0xffffffff
851DATA masks<>+0x64(SB)/4, $0x0000ffff
852DATA masks<>+0x68(SB)/4, $0x00000000
853DATA masks<>+0x6c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700854
Russ Cox9ddfb642013-07-16 16:24:09 -0400855DATA masks<>+0x70(SB)/4, $0xffffffff
856DATA masks<>+0x74(SB)/4, $0x00ffffff
857DATA masks<>+0x78(SB)/4, $0x00000000
858DATA masks<>+0x7c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700859
Russ Cox9ddfb642013-07-16 16:24:09 -0400860DATA masks<>+0x80(SB)/4, $0xffffffff
861DATA masks<>+0x84(SB)/4, $0xffffffff
862DATA masks<>+0x88(SB)/4, $0x00000000
863DATA masks<>+0x8c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700864
Russ Cox9ddfb642013-07-16 16:24:09 -0400865DATA masks<>+0x90(SB)/4, $0xffffffff
866DATA masks<>+0x94(SB)/4, $0xffffffff
867DATA masks<>+0x98(SB)/4, $0x000000ff
868DATA masks<>+0x9c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700869
Russ Cox9ddfb642013-07-16 16:24:09 -0400870DATA masks<>+0xa0(SB)/4, $0xffffffff
871DATA masks<>+0xa4(SB)/4, $0xffffffff
872DATA masks<>+0xa8(SB)/4, $0x0000ffff
873DATA masks<>+0xac(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700874
Russ Cox9ddfb642013-07-16 16:24:09 -0400875DATA masks<>+0xb0(SB)/4, $0xffffffff
876DATA masks<>+0xb4(SB)/4, $0xffffffff
877DATA masks<>+0xb8(SB)/4, $0x00ffffff
878DATA masks<>+0xbc(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700879
Russ Cox9ddfb642013-07-16 16:24:09 -0400880DATA masks<>+0xc0(SB)/4, $0xffffffff
881DATA masks<>+0xc4(SB)/4, $0xffffffff
882DATA masks<>+0xc8(SB)/4, $0xffffffff
883DATA masks<>+0xcc(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -0700884
Russ Cox9ddfb642013-07-16 16:24:09 -0400885DATA masks<>+0xd0(SB)/4, $0xffffffff
886DATA masks<>+0xd4(SB)/4, $0xffffffff
887DATA masks<>+0xd8(SB)/4, $0xffffffff
888DATA masks<>+0xdc(SB)/4, $0x000000ff
Keith Randalla5d40242013-03-12 10:47:44 -0700889
Russ Cox9ddfb642013-07-16 16:24:09 -0400890DATA masks<>+0xe0(SB)/4, $0xffffffff
891DATA masks<>+0xe4(SB)/4, $0xffffffff
892DATA masks<>+0xe8(SB)/4, $0xffffffff
893DATA masks<>+0xec(SB)/4, $0x0000ffff
Keith Randalla5d40242013-03-12 10:47:44 -0700894
Russ Cox9ddfb642013-07-16 16:24:09 -0400895DATA masks<>+0xf0(SB)/4, $0xffffffff
896DATA masks<>+0xf4(SB)/4, $0xffffffff
897DATA masks<>+0xf8(SB)/4, $0xffffffff
898DATA masks<>+0xfc(SB)/4, $0x00ffffff
Keith Randalla5d40242013-03-12 10:47:44 -0700899
Russ Cox9ddfb642013-07-16 16:24:09 -0400900GLOBL masks<>(SB),8,$256
Keith Randalla5d40242013-03-12 10:47:44 -0700901
Russ Cox9ddfb642013-07-16 16:24:09 -0400902// these are arguments to pshufb. They move data down from
903// the high bytes of the register to the low bytes of the register.
904// index is how many bytes to move.
905DATA shifts<>+0x00(SB)/4, $0x00000000
906DATA shifts<>+0x04(SB)/4, $0x00000000
907DATA shifts<>+0x08(SB)/4, $0x00000000
908DATA shifts<>+0x0c(SB)/4, $0x00000000
909
910DATA shifts<>+0x10(SB)/4, $0xffffff0f
911DATA shifts<>+0x14(SB)/4, $0xffffffff
912DATA shifts<>+0x18(SB)/4, $0xffffffff
913DATA shifts<>+0x1c(SB)/4, $0xffffffff
914
915DATA shifts<>+0x20(SB)/4, $0xffff0f0e
916DATA shifts<>+0x24(SB)/4, $0xffffffff
917DATA shifts<>+0x28(SB)/4, $0xffffffff
918DATA shifts<>+0x2c(SB)/4, $0xffffffff
919
920DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
921DATA shifts<>+0x34(SB)/4, $0xffffffff
922DATA shifts<>+0x38(SB)/4, $0xffffffff
923DATA shifts<>+0x3c(SB)/4, $0xffffffff
924
925DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
926DATA shifts<>+0x44(SB)/4, $0xffffffff
927DATA shifts<>+0x48(SB)/4, $0xffffffff
928DATA shifts<>+0x4c(SB)/4, $0xffffffff
929
930DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
931DATA shifts<>+0x54(SB)/4, $0xffffff0f
932DATA shifts<>+0x58(SB)/4, $0xffffffff
933DATA shifts<>+0x5c(SB)/4, $0xffffffff
934
935DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
936DATA shifts<>+0x64(SB)/4, $0xffff0f0e
937DATA shifts<>+0x68(SB)/4, $0xffffffff
938DATA shifts<>+0x6c(SB)/4, $0xffffffff
939
940DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
941DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
942DATA shifts<>+0x78(SB)/4, $0xffffffff
943DATA shifts<>+0x7c(SB)/4, $0xffffffff
944
945DATA shifts<>+0x80(SB)/4, $0x0b0a0908
946DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
947DATA shifts<>+0x88(SB)/4, $0xffffffff
948DATA shifts<>+0x8c(SB)/4, $0xffffffff
949
950DATA shifts<>+0x90(SB)/4, $0x0a090807
951DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
952DATA shifts<>+0x98(SB)/4, $0xffffff0f
953DATA shifts<>+0x9c(SB)/4, $0xffffffff
954
955DATA shifts<>+0xa0(SB)/4, $0x09080706
956DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
957DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
958DATA shifts<>+0xac(SB)/4, $0xffffffff
959
960DATA shifts<>+0xb0(SB)/4, $0x08070605
961DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
962DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
963DATA shifts<>+0xbc(SB)/4, $0xffffffff
964
965DATA shifts<>+0xc0(SB)/4, $0x07060504
966DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
967DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
968DATA shifts<>+0xcc(SB)/4, $0xffffffff
969
970DATA shifts<>+0xd0(SB)/4, $0x06050403
971DATA shifts<>+0xd4(SB)/4, $0x0a090807
972DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
973DATA shifts<>+0xdc(SB)/4, $0xffffff0f
974
975DATA shifts<>+0xe0(SB)/4, $0x05040302
976DATA shifts<>+0xe4(SB)/4, $0x09080706
977DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
978DATA shifts<>+0xec(SB)/4, $0xffff0f0e
979
980DATA shifts<>+0xf0(SB)/4, $0x04030201
981DATA shifts<>+0xf4(SB)/4, $0x08070605
982DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
983DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
984
985GLOBL shifts<>(SB),8,$256
986
987TEXT runtime·memeq(SB),7,$0-12
Keith Randall3d5daa22013-04-02 16:26:15 -0700988 MOVL a+0(FP), SI
989 MOVL b+4(FP), DI
990 MOVL count+8(FP), BX
991 JMP runtime·memeqbody(SB)
992
Russ Cox9ddfb642013-07-16 16:24:09 -0400993TEXT bytes·Equal(SB),7,$0-25
Keith Randall3d5daa22013-04-02 16:26:15 -0700994 MOVL a_len+4(FP), BX
995 MOVL b_len+16(FP), CX
996 XORL AX, AX
997 CMPL BX, CX
998 JNE eqret
999 MOVL a+0(FP), SI
1000 MOVL b+12(FP), DI
1001 CALL runtime·memeqbody(SB)
1002eqret:
1003 MOVB AX, ret+24(FP)
1004 RET
1005
1006// a in SI
1007// b in DI
1008// count in BX
Russ Cox9ddfb642013-07-16 16:24:09 -04001009TEXT runtime·memeqbody(SB),7,$0-0
Keith Randall3d5daa22013-04-02 16:26:15 -07001010 XORL AX, AX
1011
1012 CMPL BX, $4
1013 JB small
1014
1015 // 64 bytes at a time using xmm registers
1016hugeloop:
1017 CMPL BX, $64
1018 JB bigloop
1019 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
1020 JE bigloop
1021 MOVOU (SI), X0
1022 MOVOU (DI), X1
1023 MOVOU 16(SI), X2
1024 MOVOU 16(DI), X3
1025 MOVOU 32(SI), X4
1026 MOVOU 32(DI), X5
1027 MOVOU 48(SI), X6
1028 MOVOU 48(DI), X7
1029 PCMPEQB X1, X0
1030 PCMPEQB X3, X2
1031 PCMPEQB X5, X4
1032 PCMPEQB X7, X6
1033 PAND X2, X0
1034 PAND X6, X4
1035 PAND X4, X0
1036 PMOVMSKB X0, DX
1037 ADDL $64, SI
1038 ADDL $64, DI
1039 SUBL $64, BX
1040 CMPL DX, $0xffff
1041 JEQ hugeloop
1042 RET
1043
1044 // 4 bytes at a time using 32-bit register
1045bigloop:
1046 CMPL BX, $4
1047 JBE leftover
1048 MOVL (SI), CX
1049 MOVL (DI), DX
1050 ADDL $4, SI
1051 ADDL $4, DI
1052 SUBL $4, BX
1053 CMPL CX, DX
1054 JEQ bigloop
1055 RET
1056
1057 // remaining 0-4 bytes
1058leftover:
1059 MOVL -4(SI)(BX*1), CX
1060 MOVL -4(DI)(BX*1), DX
1061 CMPL CX, DX
1062 SETEQ AX
1063 RET
1064
1065small:
1066 CMPL BX, $0
1067 JEQ equal
1068
1069 LEAL 0(BX*8), CX
1070 NEGL CX
1071
1072 MOVL SI, DX
1073 CMPB DX, $0xfc
1074 JA si_high
1075
1076 // load at SI won't cross a page boundary.
1077 MOVL (SI), SI
1078 JMP si_finish
1079si_high:
1080 // address ends in 111111xx. Load up to bytes we want, move to correct position.
1081 MOVL -4(SI)(BX*1), SI
1082 SHRL CX, SI
1083si_finish:
1084
1085 // same for DI.
1086 MOVL DI, DX
1087 CMPB DX, $0xfc
1088 JA di_high
1089 MOVL (DI), DI
1090 JMP di_finish
1091di_high:
1092 MOVL -4(DI)(BX*1), DI
1093 SHRL CX, DI
1094di_finish:
1095
1096 SUBL SI, DI
1097 SHLL CX, DI
1098equal:
1099 SETEQ AX
1100 RET
Keith Randallb3946dc2013-05-14 16:05:51 -07001101
Russ Cox9ddfb642013-07-16 16:24:09 -04001102TEXT runtime·cmpstring(SB),7,$0-20
Keith Randallb3946dc2013-05-14 16:05:51 -07001103 MOVL s1+0(FP), SI
1104 MOVL s1+4(FP), BX
1105 MOVL s2+8(FP), DI
1106 MOVL s2+12(FP), DX
1107 CALL runtime·cmpbody(SB)
1108 MOVL AX, res+16(FP)
1109 RET
1110
Russ Cox9ddfb642013-07-16 16:24:09 -04001111TEXT bytes·Compare(SB),7,$0-28
Keith Randallb3946dc2013-05-14 16:05:51 -07001112 MOVL s1+0(FP), SI
1113 MOVL s1+4(FP), BX
1114 MOVL s2+12(FP), DI
1115 MOVL s2+16(FP), DX
1116 CALL runtime·cmpbody(SB)
1117 MOVL AX, res+24(FP)
1118 RET
1119
1120// input:
1121// SI = a
1122// DI = b
1123// BX = alen
1124// DX = blen
1125// output:
1126// AX = 1/0/-1
Russ Cox9ddfb642013-07-16 16:24:09 -04001127TEXT runtime·cmpbody(SB),7,$0-0
Keith Randallb3946dc2013-05-14 16:05:51 -07001128 CMPL SI, DI
1129 JEQ cmp_allsame
1130 CMPL BX, DX
1131 MOVL DX, BP
1132 CMOVLLT BX, BP // BP = min(alen, blen)
1133 CMPL BP, $4
1134 JB cmp_small
1135 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
1136 JE cmp_mediumloop
1137cmp_largeloop:
1138 CMPL BP, $16
1139 JB cmp_mediumloop
1140 MOVOU (SI), X0
1141 MOVOU (DI), X1
1142 PCMPEQB X0, X1
1143 PMOVMSKB X1, AX
1144 XORL $0xffff, AX // convert EQ to NE
1145 JNE cmp_diff16 // branch if at least one byte is not equal
1146 ADDL $16, SI
1147 ADDL $16, DI
1148 SUBL $16, BP
1149 JMP cmp_largeloop
1150
1151cmp_diff16:
1152 BSFL AX, BX // index of first byte that differs
1153 XORL AX, AX
1154 MOVB (SI)(BX*1), CX
1155 CMPB CX, (DI)(BX*1)
1156 SETHI AX
1157 LEAL -1(AX*2), AX // convert 1/0 to +1/-1
1158 RET
1159
1160cmp_mediumloop:
1161 CMPL BP, $4
1162 JBE cmp_0through4
1163 MOVL (SI), AX
1164 MOVL (DI), CX
1165 CMPL AX, CX
1166 JNE cmp_diff4
1167 ADDL $4, SI
1168 ADDL $4, DI
1169 SUBL $4, BP
1170 JMP cmp_mediumloop
1171
1172cmp_0through4:
1173 MOVL -4(SI)(BP*1), AX
1174 MOVL -4(DI)(BP*1), CX
1175 CMPL AX, CX
1176 JEQ cmp_allsame
1177
1178cmp_diff4:
1179 BSWAPL AX // reverse order of bytes
1180 BSWAPL CX
1181 XORL AX, CX // find bit differences
1182 BSRL CX, CX // index of highest bit difference
1183 SHRL CX, AX // move a's bit to bottom
1184 ANDL $1, AX // mask bit
1185 LEAL -1(AX*2), AX // 1/0 => +1/-1
1186 RET
1187
1188 // 0-3 bytes in common
1189cmp_small:
1190 LEAL (BP*8), CX
1191 NEGL CX
1192 JEQ cmp_allsame
1193
1194 // load si
1195 CMPB SI, $0xfc
1196 JA cmp_si_high
1197 MOVL (SI), SI
1198 JMP cmp_si_finish
1199cmp_si_high:
1200 MOVL -4(SI)(BP*1), SI
1201 SHRL CX, SI
1202cmp_si_finish:
1203 SHLL CX, SI
1204
1205 // same for di
1206 CMPB DI, $0xfc
1207 JA cmp_di_high
1208 MOVL (DI), DI
1209 JMP cmp_di_finish
1210cmp_di_high:
1211 MOVL -4(DI)(BP*1), DI
1212 SHRL CX, DI
1213cmp_di_finish:
1214 SHLL CX, DI
1215
1216 BSWAPL SI // reverse order of bytes
1217 BSWAPL DI
1218 XORL SI, DI // find bit differences
1219 JEQ cmp_allsame
1220 BSRL DI, CX // index of highest bit difference
1221 SHRL CX, SI // move a's bit to bottom
1222 ANDL $1, SI // mask bit
1223 LEAL -1(SI*2), AX // 1/0 => +1/-1
1224 RET
1225
1226 // all the bytes in common are the same, so we just need
1227 // to compare the lengths.
1228cmp_allsame:
1229 XORL AX, AX
1230 XORL CX, CX
1231 CMPL BX, DX
1232 SETGT AX // 1 if alen > blen
1233 SETEQ CX // 1 if alen == blen
1234 LEAL -1(CX)(AX*2), AX // 1,0,-1 result
1235 RET