blob: ef82756397bbd85c05decd9d2432be8a2d511edb [file] [log] [blame]
Russ Cox0d3a0432009-03-30 00:01:07 -07001// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
Russ Cox15ced2d2014-11-11 17:06:22 -05005#include "go_asm.h"
6#include "go_tls.h"
Russ Cox9ddfb642013-07-16 16:24:09 -04007#include "funcdata.h"
Russ Coxcb040d52014-09-04 23:05:18 -04008#include "textflag.h"
Russ Cox8522a472009-06-17 15:15:55 -07009
Ian Lance Taylor378de1a2017-10-12 19:46:42 -070010// _rt0_386 is common startup code for most 386 systems when using
11// internal linking. This is the entry point for the program from the
12// kernel for an ordinary -buildmode=exe program. The stack holds the
13// number of arguments and the C-style argv.
14TEXT _rt0_386(SB),NOSPLIT,$8
15 MOVL 8(SP), AX // argc
16 LEAL 12(SP), BX // argv
17 MOVL AX, 0(SP)
18 MOVL BX, 4(SP)
19 JMP runtime·rt0_go(SB)
20
21// _rt0_386_lib is common startup code for most 386 systems when
22// using -buildmode=c-archive or -buildmode=c-shared. The linker will
23// arrange to invoke this function as a global constructor (for
24// c-archive) or when the shared library is loaded (for c-shared).
25// We expect argc and argv to be passed on the stack following the
26// usual C ABI.
27TEXT _rt0_386_lib(SB),NOSPLIT,$0
28 PUSHL BP
29 MOVL SP, BP
30 PUSHL BX
31 PUSHL SI
32 PUSHL DI
33
34 MOVL 8(BP), AX
35 MOVL AX, _rt0_386_lib_argc<>(SB)
36 MOVL 12(BP), AX
37 MOVL AX, _rt0_386_lib_argv<>(SB)
38
39 // Synchronous initialization.
40 CALL runtime·libpreinit(SB)
41
42 SUBL $8, SP
43
44 // Create a new thread to do the runtime initialization.
45 MOVL _cgo_sys_thread_create(SB), AX
46 TESTL AX, AX
47 JZ nocgo
Ian Lance Taylor48754592017-10-17 15:38:13 -070048
49 // Align stack to call C function.
50 // We moved SP to BP above, but BP was clobbered by the libpreinit call.
51 MOVL SP, BP
52 ANDL $~15, SP
53
Ian Lance Taylor378de1a2017-10-12 19:46:42 -070054 MOVL $_rt0_386_lib_go(SB), BX
55 MOVL BX, 0(SP)
56 MOVL $0, 4(SP)
57
Ian Lance Taylor378de1a2017-10-12 19:46:42 -070058 CALL AX
Ian Lance Taylor48754592017-10-17 15:38:13 -070059
60 MOVL BP, SP
61
Ian Lance Taylor378de1a2017-10-12 19:46:42 -070062 JMP restore
63
64nocgo:
65 MOVL $0x800000, 0(SP) // stacksize = 8192KB
66 MOVL $_rt0_386_lib_go(SB), AX
67 MOVL AX, 4(SP) // fn
68 CALL runtime·newosproc0(SB)
69
70restore:
71 ADDL $8, SP
72 POPL DI
73 POPL SI
74 POPL BX
75 POPL BP
76 RET
77
78// _rt0_386_lib_go initializes the Go runtime.
79// This is started in a separate thread by _rt0_386_lib.
80TEXT _rt0_386_lib_go(SB),NOSPLIT,$8
81 MOVL _rt0_386_lib_argc<>(SB), AX
82 MOVL AX, 0(SP)
83 MOVL _rt0_386_lib_argv<>(SB), AX
84 MOVL AX, 4(SP)
85 JMP runtime·rt0_go(SB)
86
87DATA _rt0_386_lib_argc<>(SB)/4, $0
88GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4
89DATA _rt0_386_lib_argv<>(SB)/4, $0
90GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4
91
Russ Cox7ba41e92014-09-03 11:11:16 -040092TEXT runtime·rt0_go(SB),NOSPLIT,$0
Ian Lance Taylor378de1a2017-10-12 19:46:42 -070093 // Copy arguments forward on an even stack.
94 // Users of this function jump to it, they don't call it.
95 MOVL 0(SP), AX
96 MOVL 4(SP), BX
Russ Cox0d3a0432009-03-30 00:01:07 -070097 SUBL $128, SP // plenty of scratch
Russ Cox133a1582009-10-03 10:37:12 -070098 ANDL $~15, SP
Russ Cox0d3a0432009-03-30 00:01:07 -070099 MOVL AX, 120(SP) // save argc, argv away
100 MOVL BX, 124(SP)
101
Dmitriy Vyukov428062d2011-12-07 16:53:17 +0300102 // set default stack bounds.
Russ Coxf8d49b52013-02-28 16:24:38 -0500103 // _cgo_init may update stackguard.
Dmitriy Vyukov428062d2011-12-07 16:53:17 +0300104 MOVL $runtime·g0(SB), BP
105 LEAL (-64*1024+104)(SP), BX
Russ Coxe6d35112015-01-05 16:29:21 +0000106 MOVL BX, g_stackguard0(BP)
107 MOVL BX, g_stackguard1(BP)
Russ Cox15b76ad2014-09-09 13:39:57 -0400108 MOVL BX, (g_stack+stack_lo)(BP)
109 MOVL SP, (g_stack+stack_hi)(BP)
Dmitriy Vyukov428062d2011-12-07 16:53:17 +0300110
Keith Randalla5d40242013-03-12 10:47:44 -0700111 // find out information about the processor we're on
Shenghou Ma35e84542015-10-17 17:46:25 -0400112#ifdef GOOS_nacl // NaCl doesn't like PUSHFL/POPFL
113 JMP has_cpuid
114#else
115 // first see if CPUID instruction is supported.
116 PUSHFL
117 PUSHFL
118 XORL $(1<<21), 0(SP) // flip ID bit
119 POPFL
120 PUSHFL
121 POPL AX
122 XORL 0(SP), AX
123 POPFL // restore EFLAGS
124 TESTL $(1<<21), AX
125 JNE has_cpuid
126#endif
127
128bad_proc: // show that the program requires MMX.
129 MOVL $2, 0(SP)
130 MOVL $bad_proc_msg<>(SB), 4(SP)
131 MOVL $0x3d, 8(SP)
132 CALL runtime·write(SB)
133 MOVL $1, 0(SP)
134 CALL runtime·exit(SB)
135 INT $3
136
137has_cpuid:
Keith Randalla5d40242013-03-12 10:47:44 -0700138 MOVL $0, AX
139 CPUID
Keith Randall4b209db2016-03-29 21:25:33 -0700140 MOVL AX, SI
Keith Randalla5d40242013-03-12 10:47:44 -0700141 CMPL AX, $0
142 JE nocpuinfo
Dmitry Vyukov6e70fdd2015-02-17 14:25:49 +0300143
144 // Figure out how to serialize RDTSC.
145 // On Intel processors LFENCE is enough. AMD requires MFENCE.
146 // Don't know about the rest, so let's do MFENCE.
147 CMPL BX, $0x756E6547 // "Genu"
148 JNE notintel
149 CMPL DX, $0x49656E69 // "ineI"
150 JNE notintel
151 CMPL CX, $0x6C65746E // "ntel"
152 JNE notintel
Martin Möhrmannb64e8172017-04-24 16:59:33 +0200153 MOVB $1, runtime·isIntel(SB)
Dmitry Vyukov6e70fdd2015-02-17 14:25:49 +0300154 MOVB $1, runtime·lfenceBeforeRdtsc(SB)
155notintel:
156
Keith Randall4b209db2016-03-29 21:25:33 -0700157 // Load EAX=1 cpuid flags
Keith Randalla5d40242013-03-12 10:47:44 -0700158 MOVL $1, AX
159 CPUID
Martin Möhrmannb64e8172017-04-24 16:59:33 +0200160 MOVL CX, DI // Move to global variable clobbers CX when generating PIC
Martin Möhrmann5a6c5802017-04-27 08:30:27 +0200161 MOVL AX, runtime·processorVersionInfo(SB)
Shenghou Ma35e84542015-10-17 17:46:25 -0400162
163 // Check for MMX support
Martin Möhrmann5a6c5802017-04-27 08:30:27 +0200164 TESTL $(1<<23), DX // MMX
165 JZ bad_proc
Shenghou Ma35e84542015-10-17 17:46:25 -0400166
Martin Möhrmann5a6c5802017-04-27 08:30:27 +0200167 TESTL $(1<<26), DX // SSE2
168 SETNE runtime·support_sse2(SB)
169
170 TESTL $(1<<9), DI // SSSE3
171 SETNE runtime·support_ssse3(SB)
172
173 TESTL $(1<<19), DI // SSE4.1
174 SETNE runtime·support_sse41(SB)
175
176 TESTL $(1<<20), DI // SSE4.2
177 SETNE runtime·support_sse42(SB)
178
179 TESTL $(1<<23), DI // POPCNT
180 SETNE runtime·support_popcnt(SB)
181
182 TESTL $(1<<25), DI // AES
183 SETNE runtime·support_aes(SB)
184
185 TESTL $(1<<27), DI // OSXSAVE
186 SETNE runtime·support_osxsave(SB)
187
188 // If OS support for XMM and YMM is not present
189 // support_avx will be set back to false later.
190 TESTL $(1<<28), DI // AVX
191 SETNE runtime·support_avx(SB)
192
193eax7:
Keith Randall4b209db2016-03-29 21:25:33 -0700194 // Load EAX=7/ECX=0 cpuid flags
195 CMPL SI, $7
Martin Möhrmann5a6c5802017-04-27 08:30:27 +0200196 JLT osavx
Keith Randall4b209db2016-03-29 21:25:33 -0700197 MOVL $7, AX
198 MOVL $0, CX
199 CPUID
Keith Randall4b209db2016-03-29 21:25:33 -0700200
Martin Möhrmann5a6c5802017-04-27 08:30:27 +0200201 TESTL $(1<<3), BX // BMI1
202 SETNE runtime·support_bmi1(SB)
Keith Randalla5d40242013-03-12 10:47:44 -0700203
Martin Möhrmann5a6c5802017-04-27 08:30:27 +0200204 // If OS support for XMM and YMM is not present
205 // support_avx2 will be set back to false later.
206 TESTL $(1<<5), BX
207 SETNE runtime·support_avx2(SB)
208
209 TESTL $(1<<8), BX // BMI2
210 SETNE runtime·support_bmi2(SB)
211
212 TESTL $(1<<9), BX // ERMS
213 SETNE runtime·support_erms(SB)
214
215osavx:
216 // nacl does not support XGETBV to test
217 // for XMM and YMM OS support.
218#ifndef GOOS_nacl
219 CMPB runtime·support_osxsave(SB), $1
220 JNE noavx
221 MOVL $0, CX
222 // For XGETBV, OSXSAVE bit is required and sufficient
223 XGETBV
224 ANDL $6, AX
225 CMPL AX, $6 // Check for OS support of XMM and YMM registers.
226 JE nocpuinfo
227#endif
228noavx:
229 MOVB $0, runtime·support_avx(SB)
230 MOVB $0, runtime·support_avx2(SB)
231
232nocpuinfo:
Russ Coxf8d49b52013-02-28 16:24:38 -0500233 // if there is an _cgo_init, call it to let it
Russ Cox133a1582009-10-03 10:37:12 -0700234 // initialize and to set up GS. if not,
235 // we set up GS ourselves.
Russ Coxf8d49b52013-02-28 16:24:38 -0500236 MOVL _cgo_init(SB), AX
Russ Cox133a1582009-10-03 10:37:12 -0700237 TESTL AX, AX
Dmitriy Vyukovfbfed492011-11-09 23:11:48 +0300238 JZ needtls
Russ Cox89f185f2014-06-26 11:54:39 -0400239 MOVL $setg_gcc<>(SB), BX
Russ Cox6a70f9d2013-03-25 18:14:02 -0400240 MOVL BX, 4(SP)
Russ Cox3b85b722013-03-11 00:51:42 -0400241 MOVL BP, 0(SP)
Russ Cox133a1582009-10-03 10:37:12 -0700242 CALL AX
Russ Cox15b76ad2014-09-09 13:39:57 -0400243
Dmitriy Vyukovf5becf42013-06-03 12:28:24 +0400244 // update stackguard after _cgo_init
245 MOVL $runtime·g0(SB), CX
Russ Cox15b76ad2014-09-09 13:39:57 -0400246 MOVL (g_stack+stack_lo)(CX), AX
Russ Cox15ced2d2014-11-11 17:06:22 -0500247 ADDL $const__StackGuard, AX
Russ Coxe6d35112015-01-05 16:29:21 +0000248 MOVL AX, g_stackguard0(CX)
249 MOVL AX, g_stackguard1(CX)
Russ Cox15b76ad2014-09-09 13:39:57 -0400250
Matthew Dempsky8ee0fd82015-06-09 15:24:38 -0700251#ifndef GOOS_windows
Russ Coxf8d49b52013-02-28 16:24:38 -0500252 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
Matthew Dempsky8ee0fd82015-06-09 15:24:38 -0700253 JMP ok
254#endif
Dmitriy Vyukovfbfed492011-11-09 23:11:48 +0300255needtls:
Matthew Dempsky8ee0fd82015-06-09 15:24:38 -0700256#ifdef GOOS_plan9
Yuval Pavel Zholkover2aa2ceb2011-07-25 12:25:41 -0400257 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
Matthew Dempsky8ee0fd82015-06-09 15:24:38 -0700258 JMP ok
259#endif
Yuval Pavel Zholkover2aa2ceb2011-07-25 12:25:41 -0400260
Russ Cox1b14bdb2009-09-22 16:28:32 -0700261 // set up %gs
Russ Cox68b42552010-11-04 14:00:19 -0400262 CALL runtime·ldt0setup(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -0700263
Russ Cox0d3a0432009-03-30 00:01:07 -0700264 // store through it, to make sure it works
Hector Chu6bfe5f52010-01-06 17:58:55 -0800265 get_tls(BX)
266 MOVL $0x123, g(BX)
Matthew Dempsky7bb38f62015-11-12 15:35:50 -0800267 MOVL runtime·m0+m_tls(SB), AX
Russ Cox0d3a0432009-03-30 00:01:07 -0700268 CMPL AX, $0x123
269 JEQ ok
Russ Cox133a1582009-10-03 10:37:12 -0700270 MOVL AX, 0 // abort
Russ Cox0d3a0432009-03-30 00:01:07 -0700271ok:
Russ Cox0d3a0432009-03-30 00:01:07 -0700272 // set up m and g "registers"
Hector Chu6bfe5f52010-01-06 17:58:55 -0800273 get_tls(BX)
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300274 LEAL runtime·g0(SB), DX
275 MOVL DX, g(BX)
Russ Cox68b42552010-11-04 14:00:19 -0400276 LEAL runtime·m0(SB), AX
Russ Cox0d3a0432009-03-30 00:01:07 -0700277
278 // save m->g0 = g0
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300279 MOVL DX, m_g0(AX)
Russ Cox89f185f2014-06-26 11:54:39 -0400280 // save g0->m = m0
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300281 MOVL AX, g_m(DX)
Russ Cox0d3a0432009-03-30 00:01:07 -0700282
Russ Cox68b42552010-11-04 14:00:19 -0400283 CALL runtime·emptyfunc(SB) // fault if stack check is wrong
Russ Cox0d3a0432009-03-30 00:01:07 -0700284
285 // convention is D is always cleared
286 CLD
287
Russ Cox68b42552010-11-04 14:00:19 -0400288 CALL runtime·check(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -0700289
290 // saved argc, argv
291 MOVL 120(SP), AX
292 MOVL AX, 0(SP)
293 MOVL 124(SP), AX
294 MOVL AX, 4(SP)
Russ Cox68b42552010-11-04 14:00:19 -0400295 CALL runtime·args(SB)
296 CALL runtime·osinit(SB)
297 CALL runtime·schedinit(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -0700298
299 // create a new goroutine to start program
Michael Hudson-Doylef78dc1d2015-03-29 23:38:20 +0000300 PUSHL $runtime·mainPC(SB) // entry
Russ Cox8522a472009-06-17 15:15:55 -0700301 PUSHL $0 // arg size
Russ Cox68b42552010-11-04 14:00:19 -0400302 CALL runtime·newproc(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -0700303 POPL AX
304 POPL AX
305
306 // start this M
Russ Cox68b42552010-11-04 14:00:19 -0400307 CALL runtime·mstart(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -0700308
309 INT $3
310 RET
311
Shenghou Ma35e84542015-10-17 17:46:25 -0400312DATA bad_proc_msg<>+0x00(SB)/8, $"This pro"
313DATA bad_proc_msg<>+0x08(SB)/8, $"gram can"
314DATA bad_proc_msg<>+0x10(SB)/8, $" only be"
315DATA bad_proc_msg<>+0x18(SB)/8, $" run on "
Keith Randalla8714642016-06-05 09:24:09 -0700316DATA bad_proc_msg<>+0x20(SB)/8, $"processo"
Shenghou Ma35e84542015-10-17 17:46:25 -0400317DATA bad_proc_msg<>+0x28(SB)/8, $"rs with "
318DATA bad_proc_msg<>+0x30(SB)/8, $"MMX supp"
319DATA bad_proc_msg<>+0x38(SB)/4, $"ort."
320DATA bad_proc_msg<>+0x3c(SB)/1, $0xa
321GLOBL bad_proc_msg<>(SB), RODATA, $0x3d
322
Michael Hudson-Doylef78dc1d2015-03-29 23:38:20 +0000323DATA runtime·mainPC+0(SB)/4,$runtime·main(SB)
324GLOBL runtime·mainPC(SB),RODATA,$4
Russ Cox1903ad72013-02-21 17:01:13 -0500325
Keith Randall5a546962013-08-07 10:23:24 -0700326TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
Russ Cox1b14bdb2009-09-22 16:28:32 -0700327 INT $3
Russ Cox0d3a0432009-03-30 00:01:07 -0700328 RET
329
Keith Randall5a546962013-08-07 10:23:24 -0700330TEXT runtime·asminit(SB),NOSPLIT,$0-0
Carl Shapiro019c8fc2013-04-02 13:45:56 -0700331 // Linux and MinGW start the FPU in extended double precision.
Russ Cox1707a992012-02-14 01:23:15 -0500332 // Other operating systems use double precision.
333 // Change to double precision to match them,
334 // and to match other hardware that only has double.
Keith Randallc069bc42016-07-26 11:51:33 -0700335 FLDCW runtime·controlWord64(SB)
Russ Cox1707a992012-02-14 01:23:15 -0500336 RET
337
Russ Cox8522a472009-06-17 15:15:55 -0700338/*
339 * go-routine
340 */
Russ Cox0d3a0432009-03-30 00:01:07 -0700341
Russ Coxf9ca3b52011-03-07 10:37:42 -0500342// void gosave(Gobuf*)
Russ Cox8522a472009-06-17 15:15:55 -0700343// save state in Gobuf; setjmp
Keith Randall5a546962013-08-07 10:23:24 -0700344TEXT runtime·gosave(SB), NOSPLIT, $0-4
Russ Cox25f6b022014-08-27 11:32:17 -0400345 MOVL buf+0(FP), AX // gobuf
346 LEAL buf+0(FP), BX // caller's SP
Russ Cox8522a472009-06-17 15:15:55 -0700347 MOVL BX, gobuf_sp(AX)
348 MOVL 0(SP), BX // caller's PC
349 MOVL BX, gobuf_pc(AX)
Russ Coxd67e7e32013-06-12 15:22:26 -0400350 MOVL $0, gobuf_ret(AX)
Austin Clements70c107c2016-10-19 15:49:31 -0400351 // Assert ctxt is zero. See func save.
352 MOVL gobuf_ctxt(AX), BX
353 TESTL BX, BX
354 JZ 2(PC)
355 CALL runtime·badctxt(SB)
Hector Chu6bfe5f52010-01-06 17:58:55 -0800356 get_tls(CX)
357 MOVL g(CX), BX
Russ Cox8522a472009-06-17 15:15:55 -0700358 MOVL BX, gobuf_g(AX)
Russ Cox0d3a0432009-03-30 00:01:07 -0700359 RET
360
Ian Lance Taylor06272482013-06-12 15:05:10 -0700361// void gogo(Gobuf*)
Russ Cox8522a472009-06-17 15:15:55 -0700362// restore state from Gobuf; longjmp
Austin Clements70c107c2016-10-19 15:49:31 -0400363TEXT runtime·gogo(SB), NOSPLIT, $8-4
Russ Cox25f6b022014-08-27 11:32:17 -0400364 MOVL buf+0(FP), BX // gobuf
Austin Clements70c107c2016-10-19 15:49:31 -0400365
366 // If ctxt is not nil, invoke deletion barrier before overwriting.
367 MOVL gobuf_ctxt(BX), DX
368 TESTL DX, DX
369 JZ nilctxt
370 LEAL gobuf_ctxt(BX), AX
371 MOVL AX, 0(SP)
372 MOVL $0, 4(SP)
373 CALL runtime·writebarrierptr_prewrite(SB)
374 MOVL buf+0(FP), BX
375
376nilctxt:
Russ Cox8522a472009-06-17 15:15:55 -0700377 MOVL gobuf_g(BX), DX
378 MOVL 0(DX), CX // make sure g != nil
Hector Chu6bfe5f52010-01-06 17:58:55 -0800379 get_tls(CX)
380 MOVL DX, g(CX)
Russ Cox8522a472009-06-17 15:15:55 -0700381 MOVL gobuf_sp(BX), SP // restore SP
Russ Coxd67e7e32013-06-12 15:22:26 -0400382 MOVL gobuf_ret(BX), AX
383 MOVL gobuf_ctxt(BX), DX
384 MOVL $0, gobuf_sp(BX) // clear to help garbage collector
385 MOVL $0, gobuf_ret(BX)
386 MOVL $0, gobuf_ctxt(BX)
Russ Cox8522a472009-06-17 15:15:55 -0700387 MOVL gobuf_pc(BX), BX
Russ Cox0d3a0432009-03-30 00:01:07 -0700388 JMP BX
Russ Cox8522a472009-06-17 15:15:55 -0700389
Russ Cox012ceed2014-09-03 11:35:22 -0400390// func mcall(fn func(*g))
Russ Coxf9ca3b52011-03-07 10:37:42 -0500391// Switch to m->g0's stack, call fn(g).
Brad Fitzpatrick5fea2cc2016-03-01 23:21:55 +0000392// Fn must never return. It should gogo(&g->sched)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500393// to keep running g.
Keith Randall5a546962013-08-07 10:23:24 -0700394TEXT runtime·mcall(SB), NOSPLIT, $0-4
Russ Coxf9ca3b52011-03-07 10:37:42 -0500395 MOVL fn+0(FP), DI
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300396
397 get_tls(DX)
398 MOVL g(DX), AX // save state in g->sched
Russ Coxf9ca3b52011-03-07 10:37:42 -0500399 MOVL 0(SP), BX // caller's PC
400 MOVL BX, (g_sched+gobuf_pc)(AX)
Russ Cox25f6b022014-08-27 11:32:17 -0400401 LEAL fn+0(FP), BX // caller's SP
Russ Coxf9ca3b52011-03-07 10:37:42 -0500402 MOVL BX, (g_sched+gobuf_sp)(AX)
403 MOVL AX, (g_sched+gobuf_g)(AX)
404
405 // switch to m->g0 & its stack, call fn
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300406 MOVL g(DX), BX
Russ Cox89f185f2014-06-26 11:54:39 -0400407 MOVL g_m(BX), BX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500408 MOVL m_g0(BX), SI
409 CMPL SI, AX // if g == m->g0 call badmcall
Keith Randall32b770b2013-08-29 15:53:34 -0700410 JNE 3(PC)
411 MOVL $runtime·badmcall(SB), AX
412 JMP AX
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300413 MOVL SI, g(DX) // g = m->g0
Russ Cox528534c2013-06-05 07:16:53 -0400414 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
Russ Coxf9ca3b52011-03-07 10:37:42 -0500415 PUSHL AX
Russ Cox012ceed2014-09-03 11:35:22 -0400416 MOVL DI, DX
417 MOVL 0(DI), DI
Russ Coxf9ca3b52011-03-07 10:37:42 -0500418 CALL DI
419 POPL AX
Keith Randall32b770b2013-08-29 15:53:34 -0700420 MOVL $runtime·badmcall2(SB), AX
421 JMP AX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500422 RET
423
Russ Cox656be312014-11-12 14:54:31 -0500424// systemstack_switch is a dummy routine that systemstack leaves at the bottom
Brad Fitzpatrick5fea2cc2016-03-01 23:21:55 +0000425// of the G stack. We need to distinguish the routine that
Keith Randall4aa50432014-07-30 09:01:52 -0700426// lives at the bottom of the G stack from the one that lives
Russ Cox656be312014-11-12 14:54:31 -0500427// at the top of the system stack because the one at the top of
428// the system stack terminates the stack walk (see topofstack()).
429TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
Keith Randall4aa50432014-07-30 09:01:52 -0700430 RET
431
Russ Cox656be312014-11-12 14:54:31 -0500432// func systemstack(fn func())
433TEXT runtime·systemstack(SB), NOSPLIT, $0-4
434 MOVL fn+0(FP), DI // DI = fn
Russ Cox1d550b82014-09-11 12:08:30 -0400435 get_tls(CX)
436 MOVL g(CX), AX // AX = g
437 MOVL g_m(AX), BX // BX = m
Russ Cox656be312014-11-12 14:54:31 -0500438
Russ Cox1d550b82014-09-11 12:08:30 -0400439 MOVL m_gsignal(BX), DX // DX = gsignal
440 CMPL AX, DX
Russ Cox656be312014-11-12 14:54:31 -0500441 JEQ noswitch
Russ Cox32ecf572014-09-04 00:10:10 -0400442
Keith Randall4aa50432014-07-30 09:01:52 -0700443 MOVL m_g0(BX), DX // DX = g0
444 CMPL AX, DX
Russ Cox656be312014-11-12 14:54:31 -0500445 JEQ noswitch
Keith Randall4aa50432014-07-30 09:01:52 -0700446
Russ Cox32ecf572014-09-04 00:10:10 -0400447 MOVL m_curg(BX), BP
448 CMPL AX, BP
Russ Cox656be312014-11-12 14:54:31 -0500449 JEQ switch
Russ Cox32ecf572014-09-04 00:10:10 -0400450
Russ Cox656be312014-11-12 14:54:31 -0500451 // Bad: g is not gsignal, not g0, not curg. What is it?
Russ Cox32ecf572014-09-04 00:10:10 -0400452 // Hide call from linker nosplit analysis.
Russ Cox656be312014-11-12 14:54:31 -0500453 MOVL $runtime·badsystemstack(SB), AX
Russ Cox32ecf572014-09-04 00:10:10 -0400454 CALL AX
455
Russ Cox656be312014-11-12 14:54:31 -0500456switch:
Brad Fitzpatrick5fea2cc2016-03-01 23:21:55 +0000457 // save our state in g->sched. Pretend to
Russ Cox656be312014-11-12 14:54:31 -0500458 // be systemstack_switch if the G stack is scanned.
459 MOVL $runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX)
Keith Randall4aa50432014-07-30 09:01:52 -0700460 MOVL SP, (g_sched+gobuf_sp)(AX)
461 MOVL AX, (g_sched+gobuf_g)(AX)
462
463 // switch to g0
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300464 get_tls(CX)
Keith Randall4aa50432014-07-30 09:01:52 -0700465 MOVL DX, g(CX)
Russ Coxd16a2ad2014-09-04 22:48:08 -0400466 MOVL (g_sched+gobuf_sp)(DX), BX
Russ Cox656be312014-11-12 14:54:31 -0500467 // make it look like mstart called systemstack on g0, to stop traceback
Russ Coxd16a2ad2014-09-04 22:48:08 -0400468 SUBL $4, BX
469 MOVL $runtime·mstart(SB), DX
470 MOVL DX, 0(BX)
471 MOVL BX, SP
Keith Randall4aa50432014-07-30 09:01:52 -0700472
473 // call target function
Russ Cox012ceed2014-09-03 11:35:22 -0400474 MOVL DI, DX
475 MOVL 0(DI), DI
Keith Randall4aa50432014-07-30 09:01:52 -0700476 CALL DI
477
478 // switch back to g
479 get_tls(CX)
480 MOVL g(CX), AX
481 MOVL g_m(AX), BX
482 MOVL m_curg(BX), AX
483 MOVL AX, g(CX)
484 MOVL (g_sched+gobuf_sp)(AX), SP
485 MOVL $0, (g_sched+gobuf_sp)(AX)
486 RET
487
Russ Cox656be312014-11-12 14:54:31 -0500488noswitch:
489 // already on system stack, just call directly
Russ Cox012ceed2014-09-03 11:35:22 -0400490 MOVL DI, DX
491 MOVL 0(DI), DI
Keith Randall4aa50432014-07-30 09:01:52 -0700492 CALL DI
493 RET
494
Russ Cox8522a472009-06-17 15:15:55 -0700495/*
496 * support for morestack
497 */
498
499// Called during function prolog when more stack is needed.
Russ Cox58f12ff2013-07-18 16:53:45 -0400500//
501// The traceback routines see morestack on a g0 as being
502// the top of a stack (for example, morestack calling newstack
503// calling the scheduler calling newm calling gc), so we must
504// record an argument size. For that purpose, it has no arguments.
Keith Randall5a546962013-08-07 10:23:24 -0700505TEXT runtime·morestack(SB),NOSPLIT,$0-0
Russ Cox8522a472009-06-17 15:15:55 -0700506 // Cannot grow scheduler stack (m->g0).
Hector Chu6bfe5f52010-01-06 17:58:55 -0800507 get_tls(CX)
Russ Cox89f185f2014-06-26 11:54:39 -0400508 MOVL g(CX), BX
509 MOVL g_m(BX), BX
Russ Cox8522a472009-06-17 15:15:55 -0700510 MOVL m_g0(BX), SI
Hector Chu6bfe5f52010-01-06 17:58:55 -0800511 CMPL g(CX), SI
Austin Clements687d9d52016-10-13 10:44:57 -0400512 JNE 3(PC)
513 CALL runtime·badmorestackg0(SB)
Russ Cox8522a472009-06-17 15:15:55 -0700514 INT $3
515
Russ Coxf8f630f2014-09-05 16:51:45 -0400516 // Cannot grow signal stack.
517 MOVL m_gsignal(BX), SI
518 CMPL g(CX), SI
Austin Clements687d9d52016-10-13 10:44:57 -0400519 JNE 3(PC)
520 CALL runtime·badmorestackgsignal(SB)
Russ Coxf8f630f2014-09-05 16:51:45 -0400521 INT $3
522
Russ Cox8522a472009-06-17 15:15:55 -0700523 // Called from f.
524 // Set m->morebuf to f's caller.
525 MOVL 4(SP), DI // f's caller's PC
526 MOVL DI, (m_morebuf+gobuf_pc)(BX)
527 LEAL 8(SP), CX // f's caller's SP
528 MOVL CX, (m_morebuf+gobuf_sp)(BX)
Hector Chu6bfe5f52010-01-06 17:58:55 -0800529 get_tls(CX)
530 MOVL g(CX), SI
Russ Cox8522a472009-06-17 15:15:55 -0700531 MOVL SI, (m_morebuf+gobuf_g)(BX)
532
Russ Cox6fa3c892013-06-27 11:32:01 -0400533 // Set g->sched to context in f.
534 MOVL 0(SP), AX // f's PC
535 MOVL AX, (g_sched+gobuf_pc)(SI)
536 MOVL SI, (g_sched+gobuf_g)(SI)
537 LEAL 4(SP), AX // f's SP
538 MOVL AX, (g_sched+gobuf_sp)(SI)
Austin Clementsbf9c71c2016-10-19 18:27:39 -0400539 // newstack will fill gobuf.ctxt.
Russ Cox8522a472009-06-17 15:15:55 -0700540
Russ Coxf9ca3b52011-03-07 10:37:42 -0500541 // Call newstack on m->g0's stack.
Russ Cox8522a472009-06-17 15:15:55 -0700542 MOVL m_g0(BX), BP
Hector Chu6bfe5f52010-01-06 17:58:55 -0800543 MOVL BP, g(CX)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500544 MOVL (g_sched+gobuf_sp)(BP), AX
Russ Cox7e14bd82010-12-07 17:19:36 -0500545 MOVL -4(AX), BX // fault if CALL would, before smashing SP
546 MOVL AX, SP
Austin Clementsbf9c71c2016-10-19 18:27:39 -0400547 PUSHL DX // ctxt argument
Russ Cox68b42552010-11-04 14:00:19 -0400548 CALL runtime·newstack(SB)
Russ Cox8522a472009-06-17 15:15:55 -0700549 MOVL $0, 0x1003 // crash if newstack returns
Austin Clementsbf9c71c2016-10-19 18:27:39 -0400550 POPL DX // keep balance check happy
Russ Cox0d3a0432009-03-30 00:01:07 -0700551 RET
552
Russ Coxc2dd33a2014-03-04 13:53:08 -0500553TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
554 MOVL $0, DX
555 JMP runtime·morestack(SB)
556
Keith Randall52631982014-09-08 10:14:41 -0700557// reflectcall: call a function with the given argument list
Russ Coxdf027ac2014-12-30 13:59:55 -0500558// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
Keith Randall9cd57062013-08-02 13:03:14 -0700559// we don't have variable-sized frames, so we use a small number
560// of constant-sized-frame functions to encode a few bits of size in the pc.
561// Caution: ugly multiline assembly macros in your future!
562
563#define DISPATCH(NAME,MAXSIZE) \
564 CMPL CX, $MAXSIZE; \
565 JA 3(PC); \
Russ Coxf8f630f2014-09-05 16:51:45 -0400566 MOVL $NAME(SB), AX; \
Keith Randall9cd57062013-08-02 13:03:14 -0700567 JMP AX
Rob Pikeaff78832014-07-30 10:11:44 -0700568// Note: can't just "JMP NAME(SB)" - bad inlining results.
Keith Randall9cd57062013-08-02 13:03:14 -0700569
Russ Cox7a524a12014-12-22 13:27:53 -0500570TEXT reflect·call(SB), NOSPLIT, $0-0
571 JMP ·reflectcall(SB)
572
Russ Coxdf027ac2014-12-30 13:59:55 -0500573TEXT ·reflectcall(SB), NOSPLIT, $0-20
574 MOVL argsize+12(FP), CX
Rob Pikeaff78832014-07-30 10:11:44 -0700575 DISPATCH(runtime·call16, 16)
576 DISPATCH(runtime·call32, 32)
577 DISPATCH(runtime·call64, 64)
578 DISPATCH(runtime·call128, 128)
579 DISPATCH(runtime·call256, 256)
580 DISPATCH(runtime·call512, 512)
581 DISPATCH(runtime·call1024, 1024)
582 DISPATCH(runtime·call2048, 2048)
583 DISPATCH(runtime·call4096, 4096)
584 DISPATCH(runtime·call8192, 8192)
585 DISPATCH(runtime·call16384, 16384)
586 DISPATCH(runtime·call32768, 32768)
587 DISPATCH(runtime·call65536, 65536)
588 DISPATCH(runtime·call131072, 131072)
589 DISPATCH(runtime·call262144, 262144)
590 DISPATCH(runtime·call524288, 524288)
591 DISPATCH(runtime·call1048576, 1048576)
592 DISPATCH(runtime·call2097152, 2097152)
593 DISPATCH(runtime·call4194304, 4194304)
594 DISPATCH(runtime·call8388608, 8388608)
595 DISPATCH(runtime·call16777216, 16777216)
596 DISPATCH(runtime·call33554432, 33554432)
597 DISPATCH(runtime·call67108864, 67108864)
598 DISPATCH(runtime·call134217728, 134217728)
599 DISPATCH(runtime·call268435456, 268435456)
600 DISPATCH(runtime·call536870912, 536870912)
601 DISPATCH(runtime·call1073741824, 1073741824)
Keith Randall9cd57062013-08-02 13:03:14 -0700602 MOVL $runtime·badreflectcall(SB), AX
603 JMP AX
604
Keith Randall12e46e42013-08-06 14:33:55 -0700605#define CALLFN(NAME,MAXSIZE) \
Russ Coxdf027ac2014-12-30 13:59:55 -0500606TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \
Russ Coxcb6f5ac2014-10-15 13:12:16 -0400607 NO_LOCAL_POINTERS; \
Keith Randall9cd57062013-08-02 13:03:14 -0700608 /* copy arguments to stack */ \
Russ Coxdf027ac2014-12-30 13:59:55 -0500609 MOVL argptr+8(FP), SI; \
610 MOVL argsize+12(FP), CX; \
Keith Randall9cd57062013-08-02 13:03:14 -0700611 MOVL SP, DI; \
612 REP;MOVSB; \
613 /* call function */ \
Russ Coxdf027ac2014-12-30 13:59:55 -0500614 MOVL f+4(FP), DX; \
Russ Cox4a000b92014-02-25 17:00:08 -0500615 MOVL (DX), AX; \
Keith Randallcee8bca2014-05-21 14:28:34 -0700616 PCDATA $PCDATA_StackMapIndex, $0; \
Russ Cox4a000b92014-02-25 17:00:08 -0500617 CALL AX; \
Keith Randall9cd57062013-08-02 13:03:14 -0700618 /* copy return values back */ \
Austin Clements79561a82016-10-20 22:45:18 -0400619 MOVL argtype+0(FP), DX; \
Russ Coxdf027ac2014-12-30 13:59:55 -0500620 MOVL argptr+8(FP), DI; \
621 MOVL argsize+12(FP), CX; \
622 MOVL retoffset+16(FP), BX; \
Keith Randall9cd57062013-08-02 13:03:14 -0700623 MOVL SP, SI; \
Russ Cox72c5d5e2014-04-08 11:11:35 -0400624 ADDL BX, DI; \
625 ADDL BX, SI; \
626 SUBL BX, CX; \
Austin Clements79561a82016-10-20 22:45:18 -0400627 CALL callRet<>(SB); \
628 RET
629
630// callRet copies return values back at the end of call*. This is a
631// separate function so it can allocate stack space for the arguments
632// to reflectcallmove. It does not follow the Go ABI; it expects its
633// arguments in registers.
634TEXT callRet<>(SB), NOSPLIT, $16-0
635 MOVL DX, 0(SP)
636 MOVL DI, 4(SP)
637 MOVL SI, 8(SP)
638 MOVL CX, 12(SP)
639 CALL runtime·reflectcallmove(SB)
Keith Randall9cd57062013-08-02 13:03:14 -0700640 RET
641
Russ Coxcb6f5ac2014-10-15 13:12:16 -0400642CALLFN(·call16, 16)
643CALLFN(·call32, 32)
644CALLFN(·call64, 64)
645CALLFN(·call128, 128)
646CALLFN(·call256, 256)
647CALLFN(·call512, 512)
648CALLFN(·call1024, 1024)
649CALLFN(·call2048, 2048)
650CALLFN(·call4096, 4096)
651CALLFN(·call8192, 8192)
652CALLFN(·call16384, 16384)
653CALLFN(·call32768, 32768)
654CALLFN(·call65536, 65536)
655CALLFN(·call131072, 131072)
656CALLFN(·call262144, 262144)
657CALLFN(·call524288, 524288)
658CALLFN(·call1048576, 1048576)
659CALLFN(·call2097152, 2097152)
660CALLFN(·call4194304, 4194304)
661CALLFN(·call8388608, 8388608)
662CALLFN(·call16777216, 16777216)
663CALLFN(·call33554432, 33554432)
664CALLFN(·call67108864, 67108864)
665CALLFN(·call134217728, 134217728)
666CALLFN(·call268435456, 268435456)
667CALLFN(·call536870912, 536870912)
668CALLFN(·call1073741824, 1073741824)
Russ Coxbba278a2009-07-08 18:16:09 -0700669
Keith Randall5a546962013-08-07 10:23:24 -0700670TEXT runtime·procyield(SB),NOSPLIT,$0-0
Russ Cox25f6b022014-08-27 11:32:17 -0400671 MOVL cycles+0(FP), AX
Dmitriy Vyukov4e5086b2011-07-29 12:44:06 -0400672again:
673 PAUSE
674 SUBL $1, AX
675 JNZ again
676 RET
677
Austin Clementsf5d494b2015-06-15 12:30:23 -0400678TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
679 // Stores are already ordered on x86, so this is just a
680 // compile barrier.
681 RET
682
Russ Coxaa3222d82009-06-02 23:02:12 -0700683// void jmpdefer(fn, sp);
684// called from deferreturn.
Russ Cox0d3a0432009-03-30 00:01:07 -0700685// 1. pop the caller
Michael Hudson-Doyle26849742016-06-02 11:07:55 +1200686// 2. sub 5 bytes (the length of CALL & a 32 bit displacement) from the callers
687// return (when building for shared libraries, subtract 16 bytes -- 5 bytes
688// for CALL & displacement to call __x86.get_pc_thunk.cx, 6 bytes for the
689// LEAL to load the offset into BX, and finally 5 for the call & displacement)
Russ Cox0d3a0432009-03-30 00:01:07 -0700690// 3. jmp to the argument
Keith Randalla97a91d2013-08-07 14:03:50 -0700691TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
Russ Cox25f6b022014-08-27 11:32:17 -0400692 MOVL fv+0(FP), DX // fn
693 MOVL argp+4(FP), BX // caller sp
Russ Coxaa3222d82009-06-02 23:02:12 -0700694 LEAL -4(BX), SP // caller sp after CALL
Michael Hudson-Doyle26849742016-06-02 11:07:55 +1200695#ifdef GOBUILDMODE_shared
696 SUBL $16, (SP) // return to CALL again
697#else
Russ Coxaa3222d82009-06-02 23:02:12 -0700698 SUBL $5, (SP) // return to CALL again
Michael Hudson-Doyle26849742016-06-02 11:07:55 +1200699#endif
Russ Cox6066fdc2013-02-22 10:47:54 -0500700 MOVL 0(DX), BX
Russ Cox1903ad72013-02-21 17:01:13 -0500701 JMP BX // but first run the deferred function
Russ Cox0d3a0432009-03-30 00:01:07 -0700702
Russ Coxd67e7e32013-06-12 15:22:26 -0400703// Save state of caller into g->sched.
Keith Randall5a546962013-08-07 10:23:24 -0700704TEXT gosave<>(SB),NOSPLIT,$0
Russ Coxd67e7e32013-06-12 15:22:26 -0400705 PUSHL AX
706 PUSHL BX
707 get_tls(BX)
708 MOVL g(BX), BX
709 LEAL arg+0(FP), AX
710 MOVL AX, (g_sched+gobuf_sp)(BX)
711 MOVL -4(AX), AX
712 MOVL AX, (g_sched+gobuf_pc)(BX)
713 MOVL $0, (g_sched+gobuf_ret)(BX)
Austin Clements70c107c2016-10-19 15:49:31 -0400714 // Assert ctxt is zero. See func save.
715 MOVL (g_sched+gobuf_ctxt)(BX), AX
716 TESTL AX, AX
717 JZ 2(PC)
718 CALL runtime·badctxt(SB)
Russ Coxd67e7e32013-06-12 15:22:26 -0400719 POPL BX
720 POPL AX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500721 RET
722
Alex Brainman9d968cb2015-04-27 17:32:23 +1000723// func asmcgocall(fn, arg unsafe.Pointer) int32
Russ Coxf9ca3b52011-03-07 10:37:42 -0500724// Call fn(arg) on the scheduler stack,
725// aligned appropriately for the gcc ABI.
Alex Brainman9d968cb2015-04-27 17:32:23 +1000726// See cgocall.go for more details.
727TEXT ·asmcgocall(SB),NOSPLIT,$0-12
Russ Coxf9ca3b52011-03-07 10:37:42 -0500728 MOVL fn+0(FP), AX
729 MOVL arg+4(FP), BX
Russ Coxcb767242014-09-04 00:01:55 -0400730
Russ Coxf9ca3b52011-03-07 10:37:42 -0500731 MOVL SP, DX
732
733 // Figure out if we need to switch to m->g0 stack.
734 // We get called to create new OS threads too, and those
735 // come in on the m->g0 stack already.
736 get_tls(CX)
Russ Cox89f185f2014-06-26 11:54:39 -0400737 MOVL g(CX), BP
738 MOVL g_m(BP), BP
Russ Coxf9ca3b52011-03-07 10:37:42 -0500739 MOVL m_g0(BP), SI
740 MOVL g(CX), DI
741 CMPL SI, DI
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300742 JEQ noswitch
Russ Coxd67e7e32013-06-12 15:22:26 -0400743 CALL gosave<>(SB)
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300744 get_tls(CX)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500745 MOVL SI, g(CX)
746 MOVL (g_sched+gobuf_sp)(SI), SP
747
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300748noswitch:
Russ Coxf9ca3b52011-03-07 10:37:42 -0500749 // Now on a scheduling stack (a pthread-created stack).
750 SUBL $32, SP
751 ANDL $~15, SP // alignment, perhaps unnecessary
752 MOVL DI, 8(SP) // save g
Keith Randall47f251c2014-09-11 20:36:23 -0700753 MOVL (g_stack+stack_hi)(DI), DI
754 SUBL DX, DI
755 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500756 MOVL BX, 0(SP) // first argument in x86-32 ABI
757 CALL AX
758
759 // Restore registers, g, stack pointer.
760 get_tls(CX)
761 MOVL 8(SP), DI
Keith Randall47f251c2014-09-11 20:36:23 -0700762 MOVL (g_stack+stack_hi)(DI), SI
763 SUBL 4(SP), SI
Russ Coxf9ca3b52011-03-07 10:37:42 -0500764 MOVL DI, g(CX)
Keith Randall47f251c2014-09-11 20:36:23 -0700765 MOVL SI, SP
Alex Brainman9d968cb2015-04-27 17:32:23 +1000766
767 MOVL AX, ret+8(FP)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500768 RET
769
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700770// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
Russ Cox3d2dfc52013-02-22 16:08:56 -0500771// Turn the fn into a Go func (by taking its address) and call
772// cgocallback_gofunc.
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700773TEXT runtime·cgocallback(SB),NOSPLIT,$16-16
Russ Cox3d2dfc52013-02-22 16:08:56 -0500774 LEAL fn+0(FP), AX
775 MOVL AX, 0(SP)
776 MOVL frame+4(FP), AX
777 MOVL AX, 4(SP)
778 MOVL framesize+8(FP), AX
779 MOVL AX, 8(SP)
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700780 MOVL ctxt+12(FP), AX
781 MOVL AX, 12(SP)
Russ Cox3d2dfc52013-02-22 16:08:56 -0500782 MOVL $runtime·cgocallback_gofunc(SB), AX
783 CALL AX
784 RET
785
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700786// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
Alex Brainman9d968cb2015-04-27 17:32:23 +1000787// See cgocall.go for more details.
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700788TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-16
Russ Coxe844f532014-09-12 07:46:11 -0400789 NO_LOCAL_POINTERS
790
Russ Cox89f185f2014-06-26 11:54:39 -0400791 // If g is nil, Go did not create the current thread.
Russ Cox6c976392013-02-20 17:48:23 -0500792 // Call needm to obtain one for temporary use.
793 // In this case, we're running on the thread stack, so there's
794 // lots of space, but the linker doesn't know. Hide the call from
795 // the linker analysis by using an indirect call through AX.
796 get_tls(CX)
797#ifdef GOOS_windows
Russ Coxdba623b2013-07-23 18:40:02 -0400798 MOVL $0, BP
Russ Cox6c976392013-02-20 17:48:23 -0500799 CMPL CX, $0
Russ Cox89f185f2014-06-26 11:54:39 -0400800 JEQ 2(PC) // TODO
Russ Cox6c976392013-02-20 17:48:23 -0500801#endif
Russ Cox89f185f2014-06-26 11:54:39 -0400802 MOVL g(CX), BP
Russ Cox6c976392013-02-20 17:48:23 -0500803 CMPL BP, $0
Russ Cox89f185f2014-06-26 11:54:39 -0400804 JEQ needm
805 MOVL g_m(BP), BP
806 MOVL BP, DX // saved copy of oldm
807 JMP havem
Russ Cox6c976392013-02-20 17:48:23 -0500808needm:
Russ Cox89f185f2014-06-26 11:54:39 -0400809 MOVL $0, 0(SP)
Russ Cox6c976392013-02-20 17:48:23 -0500810 MOVL $runtime·needm(SB), AX
811 CALL AX
Russ Coxf0112822013-07-24 09:01:57 -0400812 MOVL 0(SP), DX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500813 get_tls(CX)
Russ Cox89f185f2014-06-26 11:54:39 -0400814 MOVL g(CX), BP
815 MOVL g_m(BP), BP
Russ Cox9b732382012-03-08 12:12:40 -0500816
Russ Coxc4efaac2014-10-28 21:53:09 -0400817 // Set m->sched.sp = SP, so that if a panic happens
818 // during the function we are about to execute, it will
819 // have a valid SP to run on the g0 stack.
820 // The next few lines (after the havem label)
821 // will save this SP onto the stack and then write
822 // the same SP back to m->sched.sp. That seems redundant,
823 // but if an unrecovered panic happens, unwindm will
824 // restore the g->sched.sp from the stack location
Russ Cox656be312014-11-12 14:54:31 -0500825 // and then systemstack will try to use it. If we don't set it here,
Russ Coxc4efaac2014-10-28 21:53:09 -0400826 // that restored SP will be uninitialized (typically 0) and
827 // will not be usable.
828 MOVL m_g0(BP), SI
829 MOVL SP, (g_sched+gobuf_sp)(SI)
830
Russ Cox6c976392013-02-20 17:48:23 -0500831havem:
832 // Now there's a valid m, and we're running on its m->g0.
833 // Save current m->g0->sched.sp on stack and then set it to SP.
834 // Save current sp in m->g0->sched.sp in preparation for
835 // switch back to m->curg stack.
Russ Coxdba623b2013-07-23 18:40:02 -0400836 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
Russ Coxf9ca3b52011-03-07 10:37:42 -0500837 MOVL m_g0(BP), SI
Russ Coxdba623b2013-07-23 18:40:02 -0400838 MOVL (g_sched+gobuf_sp)(SI), AX
839 MOVL AX, 0(SP)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500840 MOVL SP, (g_sched+gobuf_sp)(SI)
841
Russ Coxdba623b2013-07-23 18:40:02 -0400842 // Switch to m->curg stack and call runtime.cgocallbackg.
843 // Because we are taking over the execution of m->curg
844 // but *not* resuming what had been running, we need to
845 // save that information (m->curg->sched) so we can restore it.
Russ Cox528534c2013-06-05 07:16:53 -0400846 // We can restore m->curg->sched.sp easily, because calling
Alex Brainman72e83482011-08-18 12:17:09 -0400847 // runtime.cgocallbackg leaves SP unchanged upon return.
Russ Cox528534c2013-06-05 07:16:53 -0400848 // To save m->curg->sched.pc, we push it onto the stack.
Russ Coxf9ca3b52011-03-07 10:37:42 -0500849 // This has the added benefit that it looks to the traceback
Alex Brainman72e83482011-08-18 12:17:09 -0400850 // routine like cgocallbackg is going to return to that
Russ Coxdba623b2013-07-23 18:40:02 -0400851 // PC (because the frame we allocate below has the same
852 // size as cgocallback_gofunc's frame declared above)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500853 // so that the traceback will seamlessly trace back into
854 // the earlier calls.
Russ Coxdba623b2013-07-23 18:40:02 -0400855 //
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700856 // In the new goroutine, 4(SP) holds the saved oldm (DX) register.
857 // 8(SP) is unused.
Russ Coxf9ca3b52011-03-07 10:37:42 -0500858 MOVL m_curg(BP), SI
859 MOVL SI, g(CX)
Russ Coxdba623b2013-07-23 18:40:02 -0400860 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
Russ Coxf9ca3b52011-03-07 10:37:42 -0500861 MOVL (g_sched+gobuf_pc)(SI), BP
Russ Coxdba623b2013-07-23 18:40:02 -0400862 MOVL BP, -4(DI)
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700863 MOVL ctxt+12(FP), CX
Russ Coxf0112822013-07-24 09:01:57 -0400864 LEAL -(4+12)(DI), SP
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700865 MOVL DX, 4(SP)
866 MOVL CX, 0(SP)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500867 CALL runtime·cgocallbackg(SB)
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700868 MOVL 4(SP), DX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500869
Russ Cox528534c2013-06-05 07:16:53 -0400870 // Restore g->sched (== m->curg->sched) from saved values.
Russ Coxf9ca3b52011-03-07 10:37:42 -0500871 get_tls(CX)
872 MOVL g(CX), SI
Russ Coxf0112822013-07-24 09:01:57 -0400873 MOVL 12(SP), BP
Russ Coxf9ca3b52011-03-07 10:37:42 -0500874 MOVL BP, (g_sched+gobuf_pc)(SI)
Russ Coxf0112822013-07-24 09:01:57 -0400875 LEAL (12+4)(SP), DI
Russ Coxf9ca3b52011-03-07 10:37:42 -0500876 MOVL DI, (g_sched+gobuf_sp)(SI)
877
878 // Switch back to m->g0's stack and restore m->g0->sched.sp.
879 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
880 // so we do not have to restore it.)
Russ Cox89f185f2014-06-26 11:54:39 -0400881 MOVL g(CX), BP
882 MOVL g_m(BP), BP
Russ Coxf9ca3b52011-03-07 10:37:42 -0500883 MOVL m_g0(BP), SI
884 MOVL SI, g(CX)
885 MOVL (g_sched+gobuf_sp)(SI), SP
Russ Coxdba623b2013-07-23 18:40:02 -0400886 MOVL 0(SP), AX
887 MOVL AX, (g_sched+gobuf_sp)(SI)
Russ Cox6c976392013-02-20 17:48:23 -0500888
889 // If the m on entry was nil, we called needm above to borrow an m
890 // for the duration of the call. Since the call is over, return it with dropm.
Russ Coxf0112822013-07-24 09:01:57 -0400891 CMPL DX, $0
Russ Cox6c976392013-02-20 17:48:23 -0500892 JNE 3(PC)
893 MOVL $runtime·dropm(SB), AX
894 CALL AX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500895
896 // Done!
897 RET
898
Russ Cox89f185f2014-06-26 11:54:39 -0400899// void setg(G*); set g. for use by needm.
Russ Cox25f6b022014-08-27 11:32:17 -0400900TEXT runtime·setg(SB), NOSPLIT, $0-4
Russ Cox89f185f2014-06-26 11:54:39 -0400901 MOVL gg+0(FP), BX
Russ Cox6c976392013-02-20 17:48:23 -0500902#ifdef GOOS_windows
Russ Cox89f185f2014-06-26 11:54:39 -0400903 CMPL BX, $0
Russ Cox6c976392013-02-20 17:48:23 -0500904 JNE settls
905 MOVL $0, 0x14(FS)
906 RET
907settls:
Russ Cox89f185f2014-06-26 11:54:39 -0400908 MOVL g_m(BX), AX
Russ Cox6c976392013-02-20 17:48:23 -0500909 LEAL m_tls(AX), AX
910 MOVL AX, 0x14(FS)
911#endif
Russ Cox6c976392013-02-20 17:48:23 -0500912 get_tls(CX)
Russ Cox6c976392013-02-20 17:48:23 -0500913 MOVL BX, g(CX)
914 RET
915
Russ Cox89f185f2014-06-26 11:54:39 -0400916// void setg_gcc(G*); set g. for use by gcc
917TEXT setg_gcc<>(SB), NOSPLIT, $0
Russ Cox6a70f9d2013-03-25 18:14:02 -0400918 get_tls(AX)
Russ Cox89f185f2014-06-26 11:54:39 -0400919 MOVL gg+0(FP), DX
920 MOVL DX, g(AX)
Russ Cox6a70f9d2013-03-25 18:14:02 -0400921 RET
922
Russ Cox8ac35be2014-09-09 14:02:37 -0400923// check that SP is in range [g->stack.lo, g->stack.hi)
Keith Randall5a546962013-08-07 10:23:24 -0700924TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
Russ Coxf9ca3b52011-03-07 10:37:42 -0500925 get_tls(CX)
926 MOVL g(CX), AX
Russ Cox15b76ad2014-09-09 13:39:57 -0400927 CMPL (g_stack+stack_hi)(AX), SP
Russ Coxf9ca3b52011-03-07 10:37:42 -0500928 JHI 2(PC)
929 INT $3
Russ Cox15b76ad2014-09-09 13:39:57 -0400930 CMPL SP, (g_stack+stack_lo)(AX)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500931 JHI 2(PC)
932 INT $3
933 RET
934
Dmitry Vyukov6e70fdd2015-02-17 14:25:49 +0300935// func cputicks() int64
Russ Cox25f6b022014-08-27 11:32:17 -0400936TEXT runtime·cputicks(SB),NOSPLIT,$0-8
Martin Möhrmann5a6c5802017-04-27 08:30:27 +0200937 CMPB runtime·support_sse2(SB), $1
938 JNE done
Dmitry Vyukov6e70fdd2015-02-17 14:25:49 +0300939 CMPB runtime·lfenceBeforeRdtsc(SB), $1
940 JNE mfence
941 BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE
942 JMP done
943mfence:
944 BYTE $0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE
945done:
Shenghou Ma6392b432012-02-06 12:49:28 -0500946 RDTSC
Russ Cox25f6b022014-08-27 11:32:17 -0400947 MOVL AX, ret_lo+0(FP)
948 MOVL DX, ret_hi+4(FP)
Damian Gryski8e765da2012-02-02 14:09:27 -0500949 RET
950
Keith Randall5a546962013-08-07 10:23:24 -0700951TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0
Matthew Dempsky7bb38f62015-11-12 15:35:50 -0800952 // set up ldt 7 to point at m0.tls
Russ Cox0d3a0432009-03-30 00:01:07 -0700953 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
Russ Cox1b14bdb2009-09-22 16:28:32 -0700954 // the entry number is just a hint. setldt will set up GS with what it used.
Russ Cox0d3a0432009-03-30 00:01:07 -0700955 MOVL $7, 0(SP)
Matthew Dempsky7bb38f62015-11-12 15:35:50 -0800956 LEAL runtime·m0+m_tls(SB), AX
Russ Cox0d3a0432009-03-30 00:01:07 -0700957 MOVL AX, 4(SP)
958 MOVL $32, 8(SP) // sizeof(tls array)
Russ Cox68b42552010-11-04 14:00:19 -0400959 CALL runtime·setldt(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -0700960 RET
961
Russ Cox9ddfb642013-07-16 16:24:09 -0400962TEXT runtime·emptyfunc(SB),0,$0-0
Russ Cox0d3a0432009-03-30 00:01:07 -0700963 RET
964
Keith Randalla5d40242013-03-12 10:47:44 -0700965// hash function using AES hardware instructions
Keith Randalla2a97682014-07-31 15:07:05 -0700966TEXT runtime·aeshash(SB),NOSPLIT,$0-16
967 MOVL p+0(FP), AX // ptr to data
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300968 MOVL s+8(FP), BX // size
Keith Randalld5e4c402015-01-06 16:42:48 -0800969 LEAL ret+12(FP), DX
Keith Randalla5d40242013-03-12 10:47:44 -0700970 JMP runtime·aeshashbody(SB)
971
Keith Randalld5e4c402015-01-06 16:42:48 -0800972TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12
Keith Randalla2a97682014-07-31 15:07:05 -0700973 MOVL p+0(FP), AX // ptr to string object
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300974 MOVL 4(AX), BX // length of string
Keith Randalla5d40242013-03-12 10:47:44 -0700975 MOVL (AX), AX // string data
Keith Randalld5e4c402015-01-06 16:42:48 -0800976 LEAL ret+8(FP), DX
Keith Randalla5d40242013-03-12 10:47:44 -0700977 JMP runtime·aeshashbody(SB)
978
979// AX: data
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300980// BX: length
Keith Randalld5e4c402015-01-06 16:42:48 -0800981// DX: address to put return value
982TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0
Keith Randall91059de2015-08-31 16:26:12 -0700983 MOVL h+4(FP), X0 // 32 bits of per-table hash seed
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300984 PINSRW $4, BX, X0 // 16 bits of length
Keith Randall91059de2015-08-31 16:26:12 -0700985 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times
986 MOVO X0, X1 // save unscrambled seed
987 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
988 AESENC X0, X0 // scramble seed
989
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300990 CMPL BX, $16
Keith Randall7a4a64e2014-12-10 14:20:17 -0800991 JB aes0to15
992 JE aes16
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300993 CMPL BX, $32
Keith Randall7a4a64e2014-12-10 14:20:17 -0800994 JBE aes17to32
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300995 CMPL BX, $64
Keith Randall7a4a64e2014-12-10 14:20:17 -0800996 JBE aes33to64
997 JMP aes65plus
998
999aes0to15:
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +13001000 TESTL BX, BX
Keith Randall7a4a64e2014-12-10 14:20:17 -08001001 JE aes0
Keith Randalla5d40242013-03-12 10:47:44 -07001002
Keith Randall7a4a64e2014-12-10 14:20:17 -08001003 ADDL $16, AX
1004 TESTW $0xff0, AX
1005 JE endofpage
Keith Randalla5d40242013-03-12 10:47:44 -07001006
Keith Randallee669722013-05-15 09:40:14 -07001007 // 16 bytes loaded at this address won't cross
1008 // a page boundary, so we can load it directly.
Keith Randall91059de2015-08-31 16:26:12 -07001009 MOVOU -16(AX), X1
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +13001010 ADDL BX, BX
1011 PAND masks<>(SB)(BX*8), X1
Keith Randall7a4a64e2014-12-10 14:20:17 -08001012
Keith Randall91059de2015-08-31 16:26:12 -07001013final1:
1014 AESENC X0, X1 // scramble input, xor in seed
1015 AESENC X1, X1 // scramble combo 2 times
1016 AESENC X1, X1
1017 MOVL X1, (DX)
Keith Randall7a4a64e2014-12-10 14:20:17 -08001018 RET
1019
1020endofpage:
Brad Fitzpatrick5fea2cc2016-03-01 23:21:55 +00001021 // address ends in 1111xxxx. Might be up against
Keith Randalla5d40242013-03-12 10:47:44 -07001022 // a page boundary, so load ending at last byte.
1023 // Then shift bytes down using pshufb.
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +13001024 MOVOU -32(AX)(BX*1), X1
1025 ADDL BX, BX
1026 PSHUFB shifts<>(SB)(BX*8), X1
Keith Randall91059de2015-08-31 16:26:12 -07001027 JMP final1
Keith Randall7a4a64e2014-12-10 14:20:17 -08001028
1029aes0:
Keith Randall731bdc52015-09-01 12:53:15 -07001030 // Return scrambled input seed
Keith Randall91059de2015-08-31 16:26:12 -07001031 AESENC X0, X0
1032 MOVL X0, (DX)
Keith Randall7a4a64e2014-12-10 14:20:17 -08001033 RET
1034
1035aes16:
Keith Randall91059de2015-08-31 16:26:12 -07001036 MOVOU (AX), X1
1037 JMP final1
Keith Randall7a4a64e2014-12-10 14:20:17 -08001038
1039aes17to32:
Keith Randall91059de2015-08-31 16:26:12 -07001040 // make second starting seed
1041 PXOR runtime·aeskeysched+16(SB), X1
1042 AESENC X1, X1
1043
Keith Randall7a4a64e2014-12-10 14:20:17 -08001044 // load data to be hashed
Keith Randall91059de2015-08-31 16:26:12 -07001045 MOVOU (AX), X2
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +13001046 MOVOU -16(AX)(BX*1), X3
Keith Randall7a4a64e2014-12-10 14:20:17 -08001047
1048 // scramble 3 times
Keith Randall91059de2015-08-31 16:26:12 -07001049 AESENC X0, X2
1050 AESENC X1, X3
1051 AESENC X2, X2
1052 AESENC X3, X3
1053 AESENC X2, X2
1054 AESENC X3, X3
Keith Randall7a4a64e2014-12-10 14:20:17 -08001055
1056 // combine results
Keith Randall91059de2015-08-31 16:26:12 -07001057 PXOR X3, X2
1058 MOVL X2, (DX)
Keith Randall7a4a64e2014-12-10 14:20:17 -08001059 RET
1060
1061aes33to64:
Keith Randall91059de2015-08-31 16:26:12 -07001062 // make 3 more starting seeds
1063 MOVO X1, X2
1064 MOVO X1, X3
1065 PXOR runtime·aeskeysched+16(SB), X1
1066 PXOR runtime·aeskeysched+32(SB), X2
1067 PXOR runtime·aeskeysched+48(SB), X3
1068 AESENC X1, X1
1069 AESENC X2, X2
1070 AESENC X3, X3
Keith Randall7a4a64e2014-12-10 14:20:17 -08001071
Keith Randall91059de2015-08-31 16:26:12 -07001072 MOVOU (AX), X4
1073 MOVOU 16(AX), X5
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +13001074 MOVOU -32(AX)(BX*1), X6
1075 MOVOU -16(AX)(BX*1), X7
Keith Randall91059de2015-08-31 16:26:12 -07001076
1077 AESENC X0, X4
1078 AESENC X1, X5
1079 AESENC X2, X6
1080 AESENC X3, X7
1081
1082 AESENC X4, X4
1083 AESENC X5, X5
1084 AESENC X6, X6
1085 AESENC X7, X7
1086
1087 AESENC X4, X4
1088 AESENC X5, X5
1089 AESENC X6, X6
1090 AESENC X7, X7
Keith Randall7a4a64e2014-12-10 14:20:17 -08001091
Keith Randall91059de2015-08-31 16:26:12 -07001092 PXOR X6, X4
1093 PXOR X7, X5
1094 PXOR X5, X4
1095 MOVL X4, (DX)
Keith Randall7a4a64e2014-12-10 14:20:17 -08001096 RET
1097
1098aes65plus:
Keith Randall91059de2015-08-31 16:26:12 -07001099 // make 3 more starting seeds
1100 MOVO X1, X2
1101 MOVO X1, X3
1102 PXOR runtime·aeskeysched+16(SB), X1
1103 PXOR runtime·aeskeysched+32(SB), X2
1104 PXOR runtime·aeskeysched+48(SB), X3
1105 AESENC X1, X1
1106 AESENC X2, X2
1107 AESENC X3, X3
1108
Keith Randall7a4a64e2014-12-10 14:20:17 -08001109 // start with last (possibly overlapping) block
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +13001110 MOVOU -64(AX)(BX*1), X4
1111 MOVOU -48(AX)(BX*1), X5
1112 MOVOU -32(AX)(BX*1), X6
1113 MOVOU -16(AX)(BX*1), X7
Keith Randall7a4a64e2014-12-10 14:20:17 -08001114
1115 // scramble state once
Keith Randall91059de2015-08-31 16:26:12 -07001116 AESENC X0, X4
1117 AESENC X1, X5
1118 AESENC X2, X6
1119 AESENC X3, X7
Keith Randall7a4a64e2014-12-10 14:20:17 -08001120
1121 // compute number of remaining 64-byte blocks
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +13001122 DECL BX
1123 SHRL $6, BX
Keith Randall7a4a64e2014-12-10 14:20:17 -08001124
1125aesloop:
1126 // scramble state, xor in a block
Keith Randall91059de2015-08-31 16:26:12 -07001127 MOVOU (AX), X0
1128 MOVOU 16(AX), X1
1129 MOVOU 32(AX), X2
1130 MOVOU 48(AX), X3
1131 AESENC X0, X4
1132 AESENC X1, X5
1133 AESENC X2, X6
1134 AESENC X3, X7
Keith Randall7a4a64e2014-12-10 14:20:17 -08001135
1136 // scramble state
Keith Randall91059de2015-08-31 16:26:12 -07001137 AESENC X4, X4
1138 AESENC X5, X5
1139 AESENC X6, X6
1140 AESENC X7, X7
Keith Randall7a4a64e2014-12-10 14:20:17 -08001141
1142 ADDL $64, AX
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +13001143 DECL BX
Keith Randall7a4a64e2014-12-10 14:20:17 -08001144 JNE aesloop
1145
1146 // 2 more scrambles to finish
Keith Randall91059de2015-08-31 16:26:12 -07001147 AESENC X4, X4
1148 AESENC X5, X5
1149 AESENC X6, X6
1150 AESENC X7, X7
1151
1152 AESENC X4, X4
1153 AESENC X5, X5
1154 AESENC X6, X6
1155 AESENC X7, X7
Keith Randall7a4a64e2014-12-10 14:20:17 -08001156
Keith Randall91059de2015-08-31 16:26:12 -07001157 PXOR X6, X4
1158 PXOR X7, X5
1159 PXOR X5, X4
1160 MOVL X4, (DX)
Keith Randalla5d40242013-03-12 10:47:44 -07001161 RET
1162
Keith Randalld5e4c402015-01-06 16:42:48 -08001163TEXT runtime·aeshash32(SB),NOSPLIT,$0-12
Keith Randalla2a97682014-07-31 15:07:05 -07001164 MOVL p+0(FP), AX // ptr to data
Keith Randalld5e4c402015-01-06 16:42:48 -08001165 MOVL h+4(FP), X0 // seed
Keith Randalla5d40242013-03-12 10:47:44 -07001166 PINSRD $1, (AX), X0 // data
Keith Randalldb53d972013-03-20 14:34:26 -07001167 AESENC runtime·aeskeysched+0(SB), X0
1168 AESENC runtime·aeskeysched+16(SB), X0
Keith Randall7a4a64e2014-12-10 14:20:17 -08001169 AESENC runtime·aeskeysched+32(SB), X0
Keith Randalld5e4c402015-01-06 16:42:48 -08001170 MOVL X0, ret+8(FP)
Keith Randalla5d40242013-03-12 10:47:44 -07001171 RET
1172
Keith Randalld5e4c402015-01-06 16:42:48 -08001173TEXT runtime·aeshash64(SB),NOSPLIT,$0-12
Keith Randalla2a97682014-07-31 15:07:05 -07001174 MOVL p+0(FP), AX // ptr to data
Keith Randalla5d40242013-03-12 10:47:44 -07001175 MOVQ (AX), X0 // data
Keith Randalld5e4c402015-01-06 16:42:48 -08001176 PINSRD $2, h+4(FP), X0 // seed
Keith Randalldb53d972013-03-20 14:34:26 -07001177 AESENC runtime·aeskeysched+0(SB), X0
1178 AESENC runtime·aeskeysched+16(SB), X0
Keith Randall7a4a64e2014-12-10 14:20:17 -08001179 AESENC runtime·aeskeysched+32(SB), X0
Keith Randalld5e4c402015-01-06 16:42:48 -08001180 MOVL X0, ret+8(FP)
Keith Randalla5d40242013-03-12 10:47:44 -07001181 RET
1182
Keith Randalla5d40242013-03-12 10:47:44 -07001183// simple mask to get rid of data in the high part of the register.
Russ Cox9ddfb642013-07-16 16:24:09 -04001184DATA masks<>+0x00(SB)/4, $0x00000000
1185DATA masks<>+0x04(SB)/4, $0x00000000
1186DATA masks<>+0x08(SB)/4, $0x00000000
1187DATA masks<>+0x0c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001188
Russ Cox9ddfb642013-07-16 16:24:09 -04001189DATA masks<>+0x10(SB)/4, $0x000000ff
1190DATA masks<>+0x14(SB)/4, $0x00000000
1191DATA masks<>+0x18(SB)/4, $0x00000000
1192DATA masks<>+0x1c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001193
Russ Cox9ddfb642013-07-16 16:24:09 -04001194DATA masks<>+0x20(SB)/4, $0x0000ffff
1195DATA masks<>+0x24(SB)/4, $0x00000000
1196DATA masks<>+0x28(SB)/4, $0x00000000
1197DATA masks<>+0x2c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001198
Russ Cox9ddfb642013-07-16 16:24:09 -04001199DATA masks<>+0x30(SB)/4, $0x00ffffff
1200DATA masks<>+0x34(SB)/4, $0x00000000
1201DATA masks<>+0x38(SB)/4, $0x00000000
1202DATA masks<>+0x3c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001203
Russ Cox9ddfb642013-07-16 16:24:09 -04001204DATA masks<>+0x40(SB)/4, $0xffffffff
1205DATA masks<>+0x44(SB)/4, $0x00000000
1206DATA masks<>+0x48(SB)/4, $0x00000000
1207DATA masks<>+0x4c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001208
Russ Cox9ddfb642013-07-16 16:24:09 -04001209DATA masks<>+0x50(SB)/4, $0xffffffff
1210DATA masks<>+0x54(SB)/4, $0x000000ff
1211DATA masks<>+0x58(SB)/4, $0x00000000
1212DATA masks<>+0x5c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001213
Russ Cox9ddfb642013-07-16 16:24:09 -04001214DATA masks<>+0x60(SB)/4, $0xffffffff
1215DATA masks<>+0x64(SB)/4, $0x0000ffff
1216DATA masks<>+0x68(SB)/4, $0x00000000
1217DATA masks<>+0x6c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001218
Russ Cox9ddfb642013-07-16 16:24:09 -04001219DATA masks<>+0x70(SB)/4, $0xffffffff
1220DATA masks<>+0x74(SB)/4, $0x00ffffff
1221DATA masks<>+0x78(SB)/4, $0x00000000
1222DATA masks<>+0x7c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001223
Russ Cox9ddfb642013-07-16 16:24:09 -04001224DATA masks<>+0x80(SB)/4, $0xffffffff
1225DATA masks<>+0x84(SB)/4, $0xffffffff
1226DATA masks<>+0x88(SB)/4, $0x00000000
1227DATA masks<>+0x8c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001228
Russ Cox9ddfb642013-07-16 16:24:09 -04001229DATA masks<>+0x90(SB)/4, $0xffffffff
1230DATA masks<>+0x94(SB)/4, $0xffffffff
1231DATA masks<>+0x98(SB)/4, $0x000000ff
1232DATA masks<>+0x9c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001233
Russ Cox9ddfb642013-07-16 16:24:09 -04001234DATA masks<>+0xa0(SB)/4, $0xffffffff
1235DATA masks<>+0xa4(SB)/4, $0xffffffff
1236DATA masks<>+0xa8(SB)/4, $0x0000ffff
1237DATA masks<>+0xac(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001238
Russ Cox9ddfb642013-07-16 16:24:09 -04001239DATA masks<>+0xb0(SB)/4, $0xffffffff
1240DATA masks<>+0xb4(SB)/4, $0xffffffff
1241DATA masks<>+0xb8(SB)/4, $0x00ffffff
1242DATA masks<>+0xbc(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001243
Russ Cox9ddfb642013-07-16 16:24:09 -04001244DATA masks<>+0xc0(SB)/4, $0xffffffff
1245DATA masks<>+0xc4(SB)/4, $0xffffffff
1246DATA masks<>+0xc8(SB)/4, $0xffffffff
1247DATA masks<>+0xcc(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001248
Russ Cox9ddfb642013-07-16 16:24:09 -04001249DATA masks<>+0xd0(SB)/4, $0xffffffff
1250DATA masks<>+0xd4(SB)/4, $0xffffffff
1251DATA masks<>+0xd8(SB)/4, $0xffffffff
1252DATA masks<>+0xdc(SB)/4, $0x000000ff
Keith Randalla5d40242013-03-12 10:47:44 -07001253
Russ Cox9ddfb642013-07-16 16:24:09 -04001254DATA masks<>+0xe0(SB)/4, $0xffffffff
1255DATA masks<>+0xe4(SB)/4, $0xffffffff
1256DATA masks<>+0xe8(SB)/4, $0xffffffff
1257DATA masks<>+0xec(SB)/4, $0x0000ffff
Keith Randalla5d40242013-03-12 10:47:44 -07001258
Russ Cox9ddfb642013-07-16 16:24:09 -04001259DATA masks<>+0xf0(SB)/4, $0xffffffff
1260DATA masks<>+0xf4(SB)/4, $0xffffffff
1261DATA masks<>+0xf8(SB)/4, $0xffffffff
1262DATA masks<>+0xfc(SB)/4, $0x00ffffff
Keith Randalla5d40242013-03-12 10:47:44 -07001263
Keith Randall5a546962013-08-07 10:23:24 -07001264GLOBL masks<>(SB),RODATA,$256
Keith Randalla5d40242013-03-12 10:47:44 -07001265
Brad Fitzpatrick5fea2cc2016-03-01 23:21:55 +00001266// these are arguments to pshufb. They move data down from
Russ Cox9ddfb642013-07-16 16:24:09 -04001267// the high bytes of the register to the low bytes of the register.
1268// index is how many bytes to move.
1269DATA shifts<>+0x00(SB)/4, $0x00000000
1270DATA shifts<>+0x04(SB)/4, $0x00000000
1271DATA shifts<>+0x08(SB)/4, $0x00000000
1272DATA shifts<>+0x0c(SB)/4, $0x00000000
1273
1274DATA shifts<>+0x10(SB)/4, $0xffffff0f
1275DATA shifts<>+0x14(SB)/4, $0xffffffff
1276DATA shifts<>+0x18(SB)/4, $0xffffffff
1277DATA shifts<>+0x1c(SB)/4, $0xffffffff
1278
1279DATA shifts<>+0x20(SB)/4, $0xffff0f0e
1280DATA shifts<>+0x24(SB)/4, $0xffffffff
1281DATA shifts<>+0x28(SB)/4, $0xffffffff
1282DATA shifts<>+0x2c(SB)/4, $0xffffffff
1283
1284DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
1285DATA shifts<>+0x34(SB)/4, $0xffffffff
1286DATA shifts<>+0x38(SB)/4, $0xffffffff
1287DATA shifts<>+0x3c(SB)/4, $0xffffffff
1288
1289DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
1290DATA shifts<>+0x44(SB)/4, $0xffffffff
1291DATA shifts<>+0x48(SB)/4, $0xffffffff
1292DATA shifts<>+0x4c(SB)/4, $0xffffffff
1293
1294DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
1295DATA shifts<>+0x54(SB)/4, $0xffffff0f
1296DATA shifts<>+0x58(SB)/4, $0xffffffff
1297DATA shifts<>+0x5c(SB)/4, $0xffffffff
1298
1299DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
1300DATA shifts<>+0x64(SB)/4, $0xffff0f0e
1301DATA shifts<>+0x68(SB)/4, $0xffffffff
1302DATA shifts<>+0x6c(SB)/4, $0xffffffff
1303
1304DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
1305DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
1306DATA shifts<>+0x78(SB)/4, $0xffffffff
1307DATA shifts<>+0x7c(SB)/4, $0xffffffff
1308
1309DATA shifts<>+0x80(SB)/4, $0x0b0a0908
1310DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
1311DATA shifts<>+0x88(SB)/4, $0xffffffff
1312DATA shifts<>+0x8c(SB)/4, $0xffffffff
1313
1314DATA shifts<>+0x90(SB)/4, $0x0a090807
1315DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
1316DATA shifts<>+0x98(SB)/4, $0xffffff0f
1317DATA shifts<>+0x9c(SB)/4, $0xffffffff
1318
1319DATA shifts<>+0xa0(SB)/4, $0x09080706
1320DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
1321DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
1322DATA shifts<>+0xac(SB)/4, $0xffffffff
1323
1324DATA shifts<>+0xb0(SB)/4, $0x08070605
1325DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
1326DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
1327DATA shifts<>+0xbc(SB)/4, $0xffffffff
1328
1329DATA shifts<>+0xc0(SB)/4, $0x07060504
1330DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
1331DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
1332DATA shifts<>+0xcc(SB)/4, $0xffffffff
1333
1334DATA shifts<>+0xd0(SB)/4, $0x06050403
1335DATA shifts<>+0xd4(SB)/4, $0x0a090807
1336DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
1337DATA shifts<>+0xdc(SB)/4, $0xffffff0f
1338
1339DATA shifts<>+0xe0(SB)/4, $0x05040302
1340DATA shifts<>+0xe4(SB)/4, $0x09080706
1341DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
1342DATA shifts<>+0xec(SB)/4, $0xffff0f0e
1343
1344DATA shifts<>+0xf0(SB)/4, $0x04030201
1345DATA shifts<>+0xf4(SB)/4, $0x08070605
1346DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
1347DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
1348
Keith Randall5a546962013-08-07 10:23:24 -07001349GLOBL shifts<>(SB),RODATA,$256
Russ Cox9ddfb642013-07-16 16:24:09 -04001350
Shenghou Ma3583a442015-09-03 02:44:26 -04001351TEXT ·checkASM(SB),NOSPLIT,$0-1
1352 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1353 MOVL $masks<>(SB), AX
1354 MOVL $shifts<>(SB), BX
1355 ORL BX, AX
1356 TESTL $15, AX
1357 SETEQ ret+0(FP)
1358 RET
1359
Keith Randallbd70bd92016-02-22 13:20:38 -08001360// memequal(p, q unsafe.Pointer, size uintptr) bool
1361TEXT runtime·memequal(SB),NOSPLIT,$0-13
Keith Randall0c6b55e2014-07-16 14:16:19 -07001362 MOVL a+0(FP), SI
1363 MOVL b+4(FP), DI
Keith Randallbd70bd92016-02-22 13:20:38 -08001364 CMPL SI, DI
1365 JEQ eq
Keith Randall0c6b55e2014-07-16 14:16:19 -07001366 MOVL size+8(FP), BX
Keith Randallc526f3a2015-04-21 14:22:41 -07001367 LEAL ret+12(FP), AX
1368 JMP runtime·memeqbody(SB)
Keith Randallbd70bd92016-02-22 13:20:38 -08001369eq:
1370 MOVB $1, ret+12(FP)
1371 RET
Keith Randall0c6b55e2014-07-16 14:16:19 -07001372
Keith Randalld5e4c402015-01-06 16:42:48 -08001373// memequal_varlen(a, b unsafe.Pointer) bool
1374TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
1375 MOVL a+0(FP), SI
1376 MOVL b+4(FP), DI
1377 CMPL SI, DI
1378 JEQ eq
1379 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure
Keith Randallc526f3a2015-04-21 14:22:41 -07001380 LEAL ret+8(FP), AX
1381 JMP runtime·memeqbody(SB)
Keith Randalld5e4c402015-01-06 16:42:48 -08001382eq:
1383 MOVB $1, ret+8(FP)
1384 RET
1385
Keith Randall5a546962013-08-07 10:23:24 -07001386TEXT bytes·Equal(SB),NOSPLIT,$0-25
Keith Randall3d5daa22013-04-02 16:26:15 -07001387 MOVL a_len+4(FP), BX
1388 MOVL b_len+16(FP), CX
Keith Randall3d5daa22013-04-02 16:26:15 -07001389 CMPL BX, CX
1390 JNE eqret
1391 MOVL a+0(FP), SI
1392 MOVL b+12(FP), DI
Keith Randallc526f3a2015-04-21 14:22:41 -07001393 LEAL ret+24(FP), AX
1394 JMP runtime·memeqbody(SB)
Keith Randall3d5daa22013-04-02 16:26:15 -07001395eqret:
Keith Randallc526f3a2015-04-21 14:22:41 -07001396 MOVB $0, ret+24(FP)
Keith Randall3d5daa22013-04-02 16:26:15 -07001397 RET
1398
1399// a in SI
1400// b in DI
1401// count in BX
Keith Randallc526f3a2015-04-21 14:22:41 -07001402// address of result byte in AX
Keith Randall5a546962013-08-07 10:23:24 -07001403TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
Keith Randall3d5daa22013-04-02 16:26:15 -07001404 CMPL BX, $4
1405 JB small
1406
1407 // 64 bytes at a time using xmm registers
1408hugeloop:
1409 CMPL BX, $64
1410 JB bigloop
Martin Möhrmann5a6c5802017-04-27 08:30:27 +02001411 CMPB runtime·support_sse2(SB), $1
1412 JNE bigloop
Keith Randall3d5daa22013-04-02 16:26:15 -07001413 MOVOU (SI), X0
1414 MOVOU (DI), X1
1415 MOVOU 16(SI), X2
1416 MOVOU 16(DI), X3
1417 MOVOU 32(SI), X4
1418 MOVOU 32(DI), X5
1419 MOVOU 48(SI), X6
1420 MOVOU 48(DI), X7
1421 PCMPEQB X1, X0
1422 PCMPEQB X3, X2
1423 PCMPEQB X5, X4
1424 PCMPEQB X7, X6
1425 PAND X2, X0
1426 PAND X6, X4
1427 PAND X4, X0
1428 PMOVMSKB X0, DX
1429 ADDL $64, SI
1430 ADDL $64, DI
1431 SUBL $64, BX
1432 CMPL DX, $0xffff
1433 JEQ hugeloop
Keith Randallc526f3a2015-04-21 14:22:41 -07001434 MOVB $0, (AX)
Keith Randall3d5daa22013-04-02 16:26:15 -07001435 RET
1436
1437 // 4 bytes at a time using 32-bit register
1438bigloop:
1439 CMPL BX, $4
1440 JBE leftover
1441 MOVL (SI), CX
1442 MOVL (DI), DX
1443 ADDL $4, SI
1444 ADDL $4, DI
1445 SUBL $4, BX
1446 CMPL CX, DX
1447 JEQ bigloop
Keith Randallc526f3a2015-04-21 14:22:41 -07001448 MOVB $0, (AX)
Keith Randall3d5daa22013-04-02 16:26:15 -07001449 RET
1450
1451 // remaining 0-4 bytes
1452leftover:
1453 MOVL -4(SI)(BX*1), CX
1454 MOVL -4(DI)(BX*1), DX
1455 CMPL CX, DX
Keith Randallc526f3a2015-04-21 14:22:41 -07001456 SETEQ (AX)
Keith Randall3d5daa22013-04-02 16:26:15 -07001457 RET
1458
1459small:
1460 CMPL BX, $0
1461 JEQ equal
1462
1463 LEAL 0(BX*8), CX
1464 NEGL CX
1465
1466 MOVL SI, DX
1467 CMPB DX, $0xfc
1468 JA si_high
1469
1470 // load at SI won't cross a page boundary.
1471 MOVL (SI), SI
1472 JMP si_finish
1473si_high:
Brad Fitzpatrick5fea2cc2016-03-01 23:21:55 +00001474 // address ends in 111111xx. Load up to bytes we want, move to correct position.
Keith Randall3d5daa22013-04-02 16:26:15 -07001475 MOVL -4(SI)(BX*1), SI
1476 SHRL CX, SI
1477si_finish:
1478
1479 // same for DI.
1480 MOVL DI, DX
1481 CMPB DX, $0xfc
1482 JA di_high
1483 MOVL (DI), DI
1484 JMP di_finish
1485di_high:
1486 MOVL -4(DI)(BX*1), DI
1487 SHRL CX, DI
1488di_finish:
1489
1490 SUBL SI, DI
1491 SHLL CX, DI
1492equal:
Keith Randallc526f3a2015-04-21 14:22:41 -07001493 SETEQ (AX)
Keith Randall3d5daa22013-04-02 16:26:15 -07001494 RET
Keith Randallb3946dc2013-05-14 16:05:51 -07001495
Keith Randall5a546962013-08-07 10:23:24 -07001496TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
Russ Cox25f6b022014-08-27 11:32:17 -04001497 MOVL s1_base+0(FP), SI
1498 MOVL s1_len+4(FP), BX
1499 MOVL s2_base+8(FP), DI
1500 MOVL s2_len+12(FP), DX
Keith Randallc526f3a2015-04-21 14:22:41 -07001501 LEAL ret+16(FP), AX
1502 JMP runtime·cmpbody(SB)
Keith Randallb3946dc2013-05-14 16:05:51 -07001503
Russ Cox7a524a12014-12-22 13:27:53 -05001504TEXT bytes·Compare(SB),NOSPLIT,$0-28
Keith Randallb3946dc2013-05-14 16:05:51 -07001505 MOVL s1+0(FP), SI
1506 MOVL s1+4(FP), BX
1507 MOVL s2+12(FP), DI
1508 MOVL s2+16(FP), DX
Keith Randallc526f3a2015-04-21 14:22:41 -07001509 LEAL ret+24(FP), AX
1510 JMP runtime·cmpbody(SB)
Keith Randallb3946dc2013-05-14 16:05:51 -07001511
Shenghou Ma3b001972015-03-07 00:18:16 -05001512TEXT bytes·IndexByte(SB),NOSPLIT,$0-20
Brad Fitzpatricke2a1bd62013-08-01 16:11:19 -07001513 MOVL s+0(FP), SI
1514 MOVL s_len+4(FP), CX
1515 MOVB c+12(FP), AL
1516 MOVL SI, DI
1517 CLD; REPN; SCASB
1518 JZ 3(PC)
1519 MOVL $-1, ret+16(FP)
1520 RET
1521 SUBL SI, DI
1522 SUBL $1, DI
1523 MOVL DI, ret+16(FP)
1524 RET
1525
Shenghou Ma3b001972015-03-07 00:18:16 -05001526TEXT strings·IndexByte(SB),NOSPLIT,$0-16
Brad Fitzpatrick598c7892013-08-05 15:04:05 -07001527 MOVL s+0(FP), SI
1528 MOVL s_len+4(FP), CX
1529 MOVB c+8(FP), AL
1530 MOVL SI, DI
1531 CLD; REPN; SCASB
1532 JZ 3(PC)
1533 MOVL $-1, ret+12(FP)
1534 RET
1535 SUBL SI, DI
1536 SUBL $1, DI
1537 MOVL DI, ret+12(FP)
1538 RET
1539
Keith Randallb3946dc2013-05-14 16:05:51 -07001540// input:
1541// SI = a
1542// DI = b
1543// BX = alen
1544// DX = blen
Keith Randallc526f3a2015-04-21 14:22:41 -07001545// AX = address of return word (set to 1/0/-1)
Keith Randall5a546962013-08-07 10:23:24 -07001546TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
Keith Randallc526f3a2015-04-21 14:22:41 -07001547 MOVL DX, BP
1548 SUBL BX, DX // DX = blen-alen
Keith Randall8c9ef9d2016-01-13 13:09:46 -08001549 JLE 2(PC)
1550 MOVL BX, BP // BP = min(alen, blen)
Keith Randallb3946dc2013-05-14 16:05:51 -07001551 CMPL SI, DI
Russ Coxb55791e2014-10-28 21:50:16 -04001552 JEQ allsame
Keith Randallb3946dc2013-05-14 16:05:51 -07001553 CMPL BP, $4
Russ Coxb55791e2014-10-28 21:50:16 -04001554 JB small
Martin Möhrmann5a6c5802017-04-27 08:30:27 +02001555 CMPB runtime·support_sse2(SB), $1
1556 JNE mediumloop
Russ Coxb55791e2014-10-28 21:50:16 -04001557largeloop:
Keith Randallb3946dc2013-05-14 16:05:51 -07001558 CMPL BP, $16
Russ Coxb55791e2014-10-28 21:50:16 -04001559 JB mediumloop
Keith Randallb3946dc2013-05-14 16:05:51 -07001560 MOVOU (SI), X0
1561 MOVOU (DI), X1
1562 PCMPEQB X0, X1
Keith Randallc526f3a2015-04-21 14:22:41 -07001563 PMOVMSKB X1, BX
1564 XORL $0xffff, BX // convert EQ to NE
Russ Coxb55791e2014-10-28 21:50:16 -04001565 JNE diff16 // branch if at least one byte is not equal
Keith Randallb3946dc2013-05-14 16:05:51 -07001566 ADDL $16, SI
1567 ADDL $16, DI
1568 SUBL $16, BP
Russ Coxb55791e2014-10-28 21:50:16 -04001569 JMP largeloop
Keith Randallb3946dc2013-05-14 16:05:51 -07001570
Russ Coxb55791e2014-10-28 21:50:16 -04001571diff16:
Keith Randallc526f3a2015-04-21 14:22:41 -07001572 BSFL BX, BX // index of first byte that differs
1573 XORL DX, DX
Keith Randallb3946dc2013-05-14 16:05:51 -07001574 MOVB (SI)(BX*1), CX
1575 CMPB CX, (DI)(BX*1)
Keith Randallc526f3a2015-04-21 14:22:41 -07001576 SETHI DX
1577 LEAL -1(DX*2), DX // convert 1/0 to +1/-1
1578 MOVL DX, (AX)
Keith Randallb3946dc2013-05-14 16:05:51 -07001579 RET
1580
Russ Coxb55791e2014-10-28 21:50:16 -04001581mediumloop:
Keith Randallb3946dc2013-05-14 16:05:51 -07001582 CMPL BP, $4
Russ Coxb55791e2014-10-28 21:50:16 -04001583 JBE _0through4
Keith Randallc526f3a2015-04-21 14:22:41 -07001584 MOVL (SI), BX
Keith Randallb3946dc2013-05-14 16:05:51 -07001585 MOVL (DI), CX
Keith Randallc526f3a2015-04-21 14:22:41 -07001586 CMPL BX, CX
Russ Coxb55791e2014-10-28 21:50:16 -04001587 JNE diff4
Keith Randallb3946dc2013-05-14 16:05:51 -07001588 ADDL $4, SI
1589 ADDL $4, DI
1590 SUBL $4, BP
Russ Coxb55791e2014-10-28 21:50:16 -04001591 JMP mediumloop
Keith Randallb3946dc2013-05-14 16:05:51 -07001592
Russ Coxb55791e2014-10-28 21:50:16 -04001593_0through4:
Keith Randallc526f3a2015-04-21 14:22:41 -07001594 MOVL -4(SI)(BP*1), BX
Keith Randallb3946dc2013-05-14 16:05:51 -07001595 MOVL -4(DI)(BP*1), CX
Keith Randallc526f3a2015-04-21 14:22:41 -07001596 CMPL BX, CX
Russ Coxb55791e2014-10-28 21:50:16 -04001597 JEQ allsame
Keith Randallb3946dc2013-05-14 16:05:51 -07001598
Russ Coxb55791e2014-10-28 21:50:16 -04001599diff4:
Keith Randallc526f3a2015-04-21 14:22:41 -07001600 BSWAPL BX // reverse order of bytes
Keith Randallb3946dc2013-05-14 16:05:51 -07001601 BSWAPL CX
Keith Randallc526f3a2015-04-21 14:22:41 -07001602 XORL BX, CX // find bit differences
Keith Randallb3946dc2013-05-14 16:05:51 -07001603 BSRL CX, CX // index of highest bit difference
Keith Randallc526f3a2015-04-21 14:22:41 -07001604 SHRL CX, BX // move a's bit to bottom
1605 ANDL $1, BX // mask bit
1606 LEAL -1(BX*2), BX // 1/0 => +1/-1
1607 MOVL BX, (AX)
Keith Randallb3946dc2013-05-14 16:05:51 -07001608 RET
1609
1610 // 0-3 bytes in common
Russ Coxb55791e2014-10-28 21:50:16 -04001611small:
Keith Randallb3946dc2013-05-14 16:05:51 -07001612 LEAL (BP*8), CX
1613 NEGL CX
Russ Coxb55791e2014-10-28 21:50:16 -04001614 JEQ allsame
Keith Randallb3946dc2013-05-14 16:05:51 -07001615
1616 // load si
1617 CMPB SI, $0xfc
Russ Coxb55791e2014-10-28 21:50:16 -04001618 JA si_high
Keith Randallb3946dc2013-05-14 16:05:51 -07001619 MOVL (SI), SI
Russ Coxb55791e2014-10-28 21:50:16 -04001620 JMP si_finish
1621si_high:
Keith Randallb3946dc2013-05-14 16:05:51 -07001622 MOVL -4(SI)(BP*1), SI
1623 SHRL CX, SI
Russ Coxb55791e2014-10-28 21:50:16 -04001624si_finish:
Keith Randallb3946dc2013-05-14 16:05:51 -07001625 SHLL CX, SI
1626
1627 // same for di
1628 CMPB DI, $0xfc
Russ Coxb55791e2014-10-28 21:50:16 -04001629 JA di_high
Keith Randallb3946dc2013-05-14 16:05:51 -07001630 MOVL (DI), DI
Russ Coxb55791e2014-10-28 21:50:16 -04001631 JMP di_finish
1632di_high:
Keith Randallb3946dc2013-05-14 16:05:51 -07001633 MOVL -4(DI)(BP*1), DI
1634 SHRL CX, DI
Russ Coxb55791e2014-10-28 21:50:16 -04001635di_finish:
Keith Randallb3946dc2013-05-14 16:05:51 -07001636 SHLL CX, DI
1637
1638 BSWAPL SI // reverse order of bytes
1639 BSWAPL DI
1640 XORL SI, DI // find bit differences
Russ Coxb55791e2014-10-28 21:50:16 -04001641 JEQ allsame
Keith Randallb3946dc2013-05-14 16:05:51 -07001642 BSRL DI, CX // index of highest bit difference
1643 SHRL CX, SI // move a's bit to bottom
1644 ANDL $1, SI // mask bit
Keith Randallc526f3a2015-04-21 14:22:41 -07001645 LEAL -1(SI*2), BX // 1/0 => +1/-1
1646 MOVL BX, (AX)
Keith Randallb3946dc2013-05-14 16:05:51 -07001647 RET
1648
1649 // all the bytes in common are the same, so we just need
1650 // to compare the lengths.
Russ Coxb55791e2014-10-28 21:50:16 -04001651allsame:
Keith Randallc526f3a2015-04-21 14:22:41 -07001652 XORL BX, BX
Keith Randallb3946dc2013-05-14 16:05:51 -07001653 XORL CX, CX
Keith Randallc526f3a2015-04-21 14:22:41 -07001654 TESTL DX, DX
1655 SETLT BX // 1 if alen > blen
Keith Randallb3946dc2013-05-14 16:05:51 -07001656 SETEQ CX // 1 if alen == blen
Keith Randallc526f3a2015-04-21 14:22:41 -07001657 LEAL -1(CX)(BX*2), BX // 1,0,-1 result
1658 MOVL BX, (AX)
Keith Randallb3946dc2013-05-14 16:05:51 -07001659 RET
Keith Randall6c7cbf02014-04-01 12:51:02 -07001660
Keith Randallf4407372014-09-03 08:49:43 -07001661TEXT runtime·return0(SB), NOSPLIT, $0
1662 MOVL $0, AX
1663 RET
Keith Randall1b6807b2014-09-25 07:59:01 -07001664
1665// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1666// Must obey the gcc calling convention.
Keith Randall1aa65fe2014-09-25 08:37:04 -07001667TEXT _cgo_topofstack(SB),NOSPLIT,$0
Keith Randall1b6807b2014-09-25 07:59:01 -07001668 get_tls(CX)
1669 MOVL g(CX), AX
1670 MOVL g_m(AX), AX
1671 MOVL m_curg(AX), AX
1672 MOVL (g_stack+stack_hi)(AX), AX
1673 RET
Russ Coxa5a07332014-10-29 20:37:44 -04001674
1675// The top-most function running on a goroutine
1676// returns to goexit+PCQuantum.
1677TEXT runtime·goexit(SB),NOSPLIT,$0-0
1678 BYTE $0x90 // NOP
1679 CALL runtime·goexit1(SB) // does not return
Dmitry Vyukov894024f2015-02-20 20:07:02 +03001680 // traceback from goexit1 must hit code range of goexit
1681 BYTE $0x90 // NOP
Russ Cox15ced2d2014-11-11 17:06:22 -05001682
Brad Fitzpatrick5fea2cc2016-03-01 23:21:55 +00001683// Add a module's moduledata to the linked list of moduledata objects. This
Michael Hudson-Doyle09d7de82015-10-28 12:15:43 +13001684// is called from .init_array by a function generated in the linker and so
1685// follows the platform ABI wrt register preservation -- it only touches AX,
1686// CX (implicitly) and DX, but it does not follow the ABI wrt arguments:
1687// instead the pointer to the moduledata is passed in AX.
1688TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1689 MOVL runtime·lastmoduledatap(SB), DX
1690 MOVL AX, moduledata_next(DX)
1691 MOVL AX, runtime·lastmoduledatap(SB)
1692 RET
Keith Randalldf2f8132016-07-21 10:37:59 -07001693
Keith Randallc069bc42016-07-26 11:51:33 -07001694TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12
Keith Randalldf2f8132016-07-21 10:37:59 -07001695 MOVL a+0(FP), AX
Keith Randallc069bc42016-07-26 11:51:33 -07001696 MOVL AX, 0(SP)
1697 MOVL $0, 4(SP)
1698 FMOVV 0(SP), F0
1699 FMOVDP F0, ret+4(FP)
Keith Randalldf2f8132016-07-21 10:37:59 -07001700 RET
1701
Keith Randallc069bc42016-07-26 11:51:33 -07001702TEXT runtime·float64touint32(SB),NOSPLIT,$12-12
1703 FMOVD a+0(FP), F0
1704 FSTCW 0(SP)
1705 FLDCW runtime·controlWord64trunc(SB)
1706 FMOVVP F0, 4(SP)
1707 FLDCW 0(SP)
1708 MOVL 4(SP), AX
Keith Randalldf2f8132016-07-21 10:37:59 -07001709 MOVL AX, ret+8(FP)
1710 RET