blob: 80a145187c884bf18f14dd68d19d9319bb2bbce7 [file] [log] [blame]
Russ Cox0d3a0432009-03-30 00:01:07 -07001// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
Russ Cox15ced2d2014-11-11 17:06:22 -05005#include "go_asm.h"
6#include "go_tls.h"
Russ Cox9ddfb642013-07-16 16:24:09 -04007#include "funcdata.h"
Russ Coxcb040d52014-09-04 23:05:18 -04008#include "textflag.h"
Russ Cox8522a472009-06-17 15:15:55 -07009
Ian Lance Taylor378de1a2017-10-12 19:46:42 -070010// _rt0_386 is common startup code for most 386 systems when using
11// internal linking. This is the entry point for the program from the
12// kernel for an ordinary -buildmode=exe program. The stack holds the
13// number of arguments and the C-style argv.
14TEXT _rt0_386(SB),NOSPLIT,$8
15 MOVL 8(SP), AX // argc
16 LEAL 12(SP), BX // argv
17 MOVL AX, 0(SP)
18 MOVL BX, 4(SP)
19 JMP runtime·rt0_go(SB)
20
21// _rt0_386_lib is common startup code for most 386 systems when
22// using -buildmode=c-archive or -buildmode=c-shared. The linker will
23// arrange to invoke this function as a global constructor (for
24// c-archive) or when the shared library is loaded (for c-shared).
25// We expect argc and argv to be passed on the stack following the
26// usual C ABI.
27TEXT _rt0_386_lib(SB),NOSPLIT,$0
28 PUSHL BP
29 MOVL SP, BP
30 PUSHL BX
31 PUSHL SI
32 PUSHL DI
33
34 MOVL 8(BP), AX
35 MOVL AX, _rt0_386_lib_argc<>(SB)
36 MOVL 12(BP), AX
37 MOVL AX, _rt0_386_lib_argv<>(SB)
38
39 // Synchronous initialization.
40 CALL runtime·libpreinit(SB)
41
42 SUBL $8, SP
43
44 // Create a new thread to do the runtime initialization.
45 MOVL _cgo_sys_thread_create(SB), AX
46 TESTL AX, AX
47 JZ nocgo
Ian Lance Taylor48754592017-10-17 15:38:13 -070048
49 // Align stack to call C function.
50 // We moved SP to BP above, but BP was clobbered by the libpreinit call.
51 MOVL SP, BP
52 ANDL $~15, SP
53
Ian Lance Taylor378de1a2017-10-12 19:46:42 -070054 MOVL $_rt0_386_lib_go(SB), BX
55 MOVL BX, 0(SP)
56 MOVL $0, 4(SP)
57
Ian Lance Taylor378de1a2017-10-12 19:46:42 -070058 CALL AX
Ian Lance Taylor48754592017-10-17 15:38:13 -070059
60 MOVL BP, SP
61
Ian Lance Taylor378de1a2017-10-12 19:46:42 -070062 JMP restore
63
64nocgo:
65 MOVL $0x800000, 0(SP) // stacksize = 8192KB
66 MOVL $_rt0_386_lib_go(SB), AX
67 MOVL AX, 4(SP) // fn
68 CALL runtime·newosproc0(SB)
69
70restore:
71 ADDL $8, SP
72 POPL DI
73 POPL SI
74 POPL BX
75 POPL BP
76 RET
77
78// _rt0_386_lib_go initializes the Go runtime.
79// This is started in a separate thread by _rt0_386_lib.
80TEXT _rt0_386_lib_go(SB),NOSPLIT,$8
81 MOVL _rt0_386_lib_argc<>(SB), AX
82 MOVL AX, 0(SP)
83 MOVL _rt0_386_lib_argv<>(SB), AX
84 MOVL AX, 4(SP)
85 JMP runtime·rt0_go(SB)
86
87DATA _rt0_386_lib_argc<>(SB)/4, $0
88GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4
89DATA _rt0_386_lib_argv<>(SB)/4, $0
90GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4
91
Russ Cox7ba41e92014-09-03 11:11:16 -040092TEXT runtime·rt0_go(SB),NOSPLIT,$0
Ian Lance Taylor378de1a2017-10-12 19:46:42 -070093 // Copy arguments forward on an even stack.
94 // Users of this function jump to it, they don't call it.
95 MOVL 0(SP), AX
96 MOVL 4(SP), BX
Russ Cox0d3a0432009-03-30 00:01:07 -070097 SUBL $128, SP // plenty of scratch
Russ Cox133a1582009-10-03 10:37:12 -070098 ANDL $~15, SP
Russ Cox0d3a0432009-03-30 00:01:07 -070099 MOVL AX, 120(SP) // save argc, argv away
100 MOVL BX, 124(SP)
101
Dmitriy Vyukov428062d2011-12-07 16:53:17 +0300102 // set default stack bounds.
Russ Coxf8d49b52013-02-28 16:24:38 -0500103 // _cgo_init may update stackguard.
Dmitriy Vyukov428062d2011-12-07 16:53:17 +0300104 MOVL $runtime·g0(SB), BP
105 LEAL (-64*1024+104)(SP), BX
Russ Coxe6d35112015-01-05 16:29:21 +0000106 MOVL BX, g_stackguard0(BP)
107 MOVL BX, g_stackguard1(BP)
Russ Cox15b76ad2014-09-09 13:39:57 -0400108 MOVL BX, (g_stack+stack_lo)(BP)
109 MOVL SP, (g_stack+stack_hi)(BP)
Dmitriy Vyukov428062d2011-12-07 16:53:17 +0300110
Keith Randalla5d40242013-03-12 10:47:44 -0700111 // find out information about the processor we're on
Shenghou Ma35e84542015-10-17 17:46:25 -0400112#ifdef GOOS_nacl // NaCl doesn't like PUSHFL/POPFL
113 JMP has_cpuid
114#else
115 // first see if CPUID instruction is supported.
116 PUSHFL
117 PUSHFL
118 XORL $(1<<21), 0(SP) // flip ID bit
119 POPFL
120 PUSHFL
121 POPL AX
122 XORL 0(SP), AX
123 POPFL // restore EFLAGS
124 TESTL $(1<<21), AX
125 JNE has_cpuid
126#endif
127
128bad_proc: // show that the program requires MMX.
129 MOVL $2, 0(SP)
130 MOVL $bad_proc_msg<>(SB), 4(SP)
131 MOVL $0x3d, 8(SP)
132 CALL runtime·write(SB)
133 MOVL $1, 0(SP)
134 CALL runtime·exit(SB)
135 INT $3
136
137has_cpuid:
Keith Randalla5d40242013-03-12 10:47:44 -0700138 MOVL $0, AX
139 CPUID
Keith Randall4b209db2016-03-29 21:25:33 -0700140 MOVL AX, SI
Keith Randalla5d40242013-03-12 10:47:44 -0700141 CMPL AX, $0
142 JE nocpuinfo
Dmitry Vyukov6e70fdd2015-02-17 14:25:49 +0300143
144 // Figure out how to serialize RDTSC.
145 // On Intel processors LFENCE is enough. AMD requires MFENCE.
146 // Don't know about the rest, so let's do MFENCE.
147 CMPL BX, $0x756E6547 // "Genu"
148 JNE notintel
149 CMPL DX, $0x49656E69 // "ineI"
150 JNE notintel
151 CMPL CX, $0x6C65746E // "ntel"
152 JNE notintel
Martin Möhrmannb64e8172017-04-24 16:59:33 +0200153 MOVB $1, runtime·isIntel(SB)
Dmitry Vyukov6e70fdd2015-02-17 14:25:49 +0300154 MOVB $1, runtime·lfenceBeforeRdtsc(SB)
155notintel:
156
Keith Randall4b209db2016-03-29 21:25:33 -0700157 // Load EAX=1 cpuid flags
Keith Randalla5d40242013-03-12 10:47:44 -0700158 MOVL $1, AX
159 CPUID
Martin Möhrmannb64e8172017-04-24 16:59:33 +0200160 MOVL CX, DI // Move to global variable clobbers CX when generating PIC
Martin Möhrmann5a6c5802017-04-27 08:30:27 +0200161 MOVL AX, runtime·processorVersionInfo(SB)
Shenghou Ma35e84542015-10-17 17:46:25 -0400162
163 // Check for MMX support
Martin Möhrmann5a6c5802017-04-27 08:30:27 +0200164 TESTL $(1<<23), DX // MMX
165 JZ bad_proc
Shenghou Ma35e84542015-10-17 17:46:25 -0400166
Martin Möhrmann5a6c5802017-04-27 08:30:27 +0200167 TESTL $(1<<26), DX // SSE2
168 SETNE runtime·support_sse2(SB)
169
170 TESTL $(1<<9), DI // SSSE3
171 SETNE runtime·support_ssse3(SB)
172
173 TESTL $(1<<19), DI // SSE4.1
174 SETNE runtime·support_sse41(SB)
175
176 TESTL $(1<<20), DI // SSE4.2
177 SETNE runtime·support_sse42(SB)
178
179 TESTL $(1<<23), DI // POPCNT
180 SETNE runtime·support_popcnt(SB)
181
182 TESTL $(1<<25), DI // AES
183 SETNE runtime·support_aes(SB)
184
185 TESTL $(1<<27), DI // OSXSAVE
186 SETNE runtime·support_osxsave(SB)
187
188 // If OS support for XMM and YMM is not present
189 // support_avx will be set back to false later.
190 TESTL $(1<<28), DI // AVX
191 SETNE runtime·support_avx(SB)
192
193eax7:
Keith Randall4b209db2016-03-29 21:25:33 -0700194 // Load EAX=7/ECX=0 cpuid flags
195 CMPL SI, $7
Martin Möhrmann5a6c5802017-04-27 08:30:27 +0200196 JLT osavx
Keith Randall4b209db2016-03-29 21:25:33 -0700197 MOVL $7, AX
198 MOVL $0, CX
199 CPUID
Keith Randall4b209db2016-03-29 21:25:33 -0700200
Martin Möhrmann5a6c5802017-04-27 08:30:27 +0200201 TESTL $(1<<3), BX // BMI1
202 SETNE runtime·support_bmi1(SB)
Keith Randalla5d40242013-03-12 10:47:44 -0700203
Martin Möhrmann5a6c5802017-04-27 08:30:27 +0200204 // If OS support for XMM and YMM is not present
205 // support_avx2 will be set back to false later.
206 TESTL $(1<<5), BX
207 SETNE runtime·support_avx2(SB)
208
209 TESTL $(1<<8), BX // BMI2
210 SETNE runtime·support_bmi2(SB)
211
212 TESTL $(1<<9), BX // ERMS
213 SETNE runtime·support_erms(SB)
214
215osavx:
216 // nacl does not support XGETBV to test
217 // for XMM and YMM OS support.
218#ifndef GOOS_nacl
219 CMPB runtime·support_osxsave(SB), $1
220 JNE noavx
221 MOVL $0, CX
222 // For XGETBV, OSXSAVE bit is required and sufficient
223 XGETBV
224 ANDL $6, AX
225 CMPL AX, $6 // Check for OS support of XMM and YMM registers.
226 JE nocpuinfo
227#endif
228noavx:
229 MOVB $0, runtime·support_avx(SB)
230 MOVB $0, runtime·support_avx2(SB)
231
232nocpuinfo:
Russ Coxf8d49b52013-02-28 16:24:38 -0500233 // if there is an _cgo_init, call it to let it
Russ Cox133a1582009-10-03 10:37:12 -0700234 // initialize and to set up GS. if not,
235 // we set up GS ourselves.
Russ Coxf8d49b52013-02-28 16:24:38 -0500236 MOVL _cgo_init(SB), AX
Russ Cox133a1582009-10-03 10:37:12 -0700237 TESTL AX, AX
Dmitriy Vyukovfbfed492011-11-09 23:11:48 +0300238 JZ needtls
Russ Cox89f185f2014-06-26 11:54:39 -0400239 MOVL $setg_gcc<>(SB), BX
Russ Cox6a70f9d2013-03-25 18:14:02 -0400240 MOVL BX, 4(SP)
Russ Cox3b85b722013-03-11 00:51:42 -0400241 MOVL BP, 0(SP)
Russ Cox133a1582009-10-03 10:37:12 -0700242 CALL AX
Russ Cox15b76ad2014-09-09 13:39:57 -0400243
Dmitriy Vyukovf5becf42013-06-03 12:28:24 +0400244 // update stackguard after _cgo_init
245 MOVL $runtime·g0(SB), CX
Russ Cox15b76ad2014-09-09 13:39:57 -0400246 MOVL (g_stack+stack_lo)(CX), AX
Russ Cox15ced2d2014-11-11 17:06:22 -0500247 ADDL $const__StackGuard, AX
Russ Coxe6d35112015-01-05 16:29:21 +0000248 MOVL AX, g_stackguard0(CX)
249 MOVL AX, g_stackguard1(CX)
Russ Cox15b76ad2014-09-09 13:39:57 -0400250
Matthew Dempsky8ee0fd82015-06-09 15:24:38 -0700251#ifndef GOOS_windows
Russ Coxf8d49b52013-02-28 16:24:38 -0500252 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
Matthew Dempsky8ee0fd82015-06-09 15:24:38 -0700253 JMP ok
254#endif
Dmitriy Vyukovfbfed492011-11-09 23:11:48 +0300255needtls:
Matthew Dempsky8ee0fd82015-06-09 15:24:38 -0700256#ifdef GOOS_plan9
Yuval Pavel Zholkover2aa2ceb2011-07-25 12:25:41 -0400257 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
Matthew Dempsky8ee0fd82015-06-09 15:24:38 -0700258 JMP ok
259#endif
Yuval Pavel Zholkover2aa2ceb2011-07-25 12:25:41 -0400260
Russ Cox1b14bdb2009-09-22 16:28:32 -0700261 // set up %gs
Russ Cox68b42552010-11-04 14:00:19 -0400262 CALL runtime·ldt0setup(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -0700263
Russ Cox0d3a0432009-03-30 00:01:07 -0700264 // store through it, to make sure it works
Hector Chu6bfe5f52010-01-06 17:58:55 -0800265 get_tls(BX)
266 MOVL $0x123, g(BX)
Matthew Dempsky7bb38f62015-11-12 15:35:50 -0800267 MOVL runtime·m0+m_tls(SB), AX
Russ Cox0d3a0432009-03-30 00:01:07 -0700268 CMPL AX, $0x123
269 JEQ ok
Russ Cox133a1582009-10-03 10:37:12 -0700270 MOVL AX, 0 // abort
Russ Cox0d3a0432009-03-30 00:01:07 -0700271ok:
Russ Cox0d3a0432009-03-30 00:01:07 -0700272 // set up m and g "registers"
Hector Chu6bfe5f52010-01-06 17:58:55 -0800273 get_tls(BX)
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300274 LEAL runtime·g0(SB), DX
275 MOVL DX, g(BX)
Russ Cox68b42552010-11-04 14:00:19 -0400276 LEAL runtime·m0(SB), AX
Russ Cox0d3a0432009-03-30 00:01:07 -0700277
278 // save m->g0 = g0
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300279 MOVL DX, m_g0(AX)
Russ Cox89f185f2014-06-26 11:54:39 -0400280 // save g0->m = m0
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300281 MOVL AX, g_m(DX)
Russ Cox0d3a0432009-03-30 00:01:07 -0700282
Russ Cox68b42552010-11-04 14:00:19 -0400283 CALL runtime·emptyfunc(SB) // fault if stack check is wrong
Russ Cox0d3a0432009-03-30 00:01:07 -0700284
285 // convention is D is always cleared
286 CLD
287
Russ Cox68b42552010-11-04 14:00:19 -0400288 CALL runtime·check(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -0700289
290 // saved argc, argv
291 MOVL 120(SP), AX
292 MOVL AX, 0(SP)
293 MOVL 124(SP), AX
294 MOVL AX, 4(SP)
Russ Cox68b42552010-11-04 14:00:19 -0400295 CALL runtime·args(SB)
296 CALL runtime·osinit(SB)
297 CALL runtime·schedinit(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -0700298
299 // create a new goroutine to start program
Michael Hudson-Doylef78dc1d2015-03-29 23:38:20 +0000300 PUSHL $runtime·mainPC(SB) // entry
Russ Cox8522a472009-06-17 15:15:55 -0700301 PUSHL $0 // arg size
Russ Cox68b42552010-11-04 14:00:19 -0400302 CALL runtime·newproc(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -0700303 POPL AX
304 POPL AX
305
306 // start this M
Russ Cox68b42552010-11-04 14:00:19 -0400307 CALL runtime·mstart(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -0700308
309 INT $3
310 RET
311
Shenghou Ma35e84542015-10-17 17:46:25 -0400312DATA bad_proc_msg<>+0x00(SB)/8, $"This pro"
313DATA bad_proc_msg<>+0x08(SB)/8, $"gram can"
314DATA bad_proc_msg<>+0x10(SB)/8, $" only be"
315DATA bad_proc_msg<>+0x18(SB)/8, $" run on "
Keith Randalla8714642016-06-05 09:24:09 -0700316DATA bad_proc_msg<>+0x20(SB)/8, $"processo"
Shenghou Ma35e84542015-10-17 17:46:25 -0400317DATA bad_proc_msg<>+0x28(SB)/8, $"rs with "
318DATA bad_proc_msg<>+0x30(SB)/8, $"MMX supp"
319DATA bad_proc_msg<>+0x38(SB)/4, $"ort."
320DATA bad_proc_msg<>+0x3c(SB)/1, $0xa
321GLOBL bad_proc_msg<>(SB), RODATA, $0x3d
322
Michael Hudson-Doylef78dc1d2015-03-29 23:38:20 +0000323DATA runtime·mainPC+0(SB)/4,$runtime·main(SB)
324GLOBL runtime·mainPC(SB),RODATA,$4
Russ Cox1903ad72013-02-21 17:01:13 -0500325
Keith Randall5a546962013-08-07 10:23:24 -0700326TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
Russ Cox1b14bdb2009-09-22 16:28:32 -0700327 INT $3
Russ Cox0d3a0432009-03-30 00:01:07 -0700328 RET
329
Keith Randall5a546962013-08-07 10:23:24 -0700330TEXT runtime·asminit(SB),NOSPLIT,$0-0
Carl Shapiro019c8fc2013-04-02 13:45:56 -0700331 // Linux and MinGW start the FPU in extended double precision.
Russ Cox1707a992012-02-14 01:23:15 -0500332 // Other operating systems use double precision.
333 // Change to double precision to match them,
334 // and to match other hardware that only has double.
Keith Randallc069bc42016-07-26 11:51:33 -0700335 FLDCW runtime·controlWord64(SB)
Russ Cox1707a992012-02-14 01:23:15 -0500336 RET
337
Russ Cox8522a472009-06-17 15:15:55 -0700338/*
339 * go-routine
340 */
Russ Cox0d3a0432009-03-30 00:01:07 -0700341
Russ Coxf9ca3b52011-03-07 10:37:42 -0500342// void gosave(Gobuf*)
Russ Cox8522a472009-06-17 15:15:55 -0700343// save state in Gobuf; setjmp
Keith Randall5a546962013-08-07 10:23:24 -0700344TEXT runtime·gosave(SB), NOSPLIT, $0-4
Russ Cox25f6b022014-08-27 11:32:17 -0400345 MOVL buf+0(FP), AX // gobuf
346 LEAL buf+0(FP), BX // caller's SP
Russ Cox8522a472009-06-17 15:15:55 -0700347 MOVL BX, gobuf_sp(AX)
348 MOVL 0(SP), BX // caller's PC
349 MOVL BX, gobuf_pc(AX)
Russ Coxd67e7e32013-06-12 15:22:26 -0400350 MOVL $0, gobuf_ret(AX)
Austin Clements70c107c2016-10-19 15:49:31 -0400351 // Assert ctxt is zero. See func save.
352 MOVL gobuf_ctxt(AX), BX
353 TESTL BX, BX
354 JZ 2(PC)
355 CALL runtime·badctxt(SB)
Hector Chu6bfe5f52010-01-06 17:58:55 -0800356 get_tls(CX)
357 MOVL g(CX), BX
Russ Cox8522a472009-06-17 15:15:55 -0700358 MOVL BX, gobuf_g(AX)
Russ Cox0d3a0432009-03-30 00:01:07 -0700359 RET
360
Ian Lance Taylor06272482013-06-12 15:05:10 -0700361// void gogo(Gobuf*)
Russ Cox8522a472009-06-17 15:15:55 -0700362// restore state from Gobuf; longjmp
Austin Clements70c107c2016-10-19 15:49:31 -0400363TEXT runtime·gogo(SB), NOSPLIT, $8-4
Russ Cox25f6b022014-08-27 11:32:17 -0400364 MOVL buf+0(FP), BX // gobuf
Russ Cox8522a472009-06-17 15:15:55 -0700365 MOVL gobuf_g(BX), DX
366 MOVL 0(DX), CX // make sure g != nil
Hector Chu6bfe5f52010-01-06 17:58:55 -0800367 get_tls(CX)
368 MOVL DX, g(CX)
Russ Cox8522a472009-06-17 15:15:55 -0700369 MOVL gobuf_sp(BX), SP // restore SP
Russ Coxd67e7e32013-06-12 15:22:26 -0400370 MOVL gobuf_ret(BX), AX
371 MOVL gobuf_ctxt(BX), DX
372 MOVL $0, gobuf_sp(BX) // clear to help garbage collector
373 MOVL $0, gobuf_ret(BX)
374 MOVL $0, gobuf_ctxt(BX)
Russ Cox8522a472009-06-17 15:15:55 -0700375 MOVL gobuf_pc(BX), BX
Russ Cox0d3a0432009-03-30 00:01:07 -0700376 JMP BX
Russ Cox8522a472009-06-17 15:15:55 -0700377
Russ Cox012ceed2014-09-03 11:35:22 -0400378// func mcall(fn func(*g))
Russ Coxf9ca3b52011-03-07 10:37:42 -0500379// Switch to m->g0's stack, call fn(g).
Brad Fitzpatrick5fea2cc2016-03-01 23:21:55 +0000380// Fn must never return. It should gogo(&g->sched)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500381// to keep running g.
Keith Randall5a546962013-08-07 10:23:24 -0700382TEXT runtime·mcall(SB), NOSPLIT, $0-4
Russ Coxf9ca3b52011-03-07 10:37:42 -0500383 MOVL fn+0(FP), DI
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300384
385 get_tls(DX)
386 MOVL g(DX), AX // save state in g->sched
Russ Coxf9ca3b52011-03-07 10:37:42 -0500387 MOVL 0(SP), BX // caller's PC
388 MOVL BX, (g_sched+gobuf_pc)(AX)
Russ Cox25f6b022014-08-27 11:32:17 -0400389 LEAL fn+0(FP), BX // caller's SP
Russ Coxf9ca3b52011-03-07 10:37:42 -0500390 MOVL BX, (g_sched+gobuf_sp)(AX)
391 MOVL AX, (g_sched+gobuf_g)(AX)
392
393 // switch to m->g0 & its stack, call fn
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300394 MOVL g(DX), BX
Russ Cox89f185f2014-06-26 11:54:39 -0400395 MOVL g_m(BX), BX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500396 MOVL m_g0(BX), SI
397 CMPL SI, AX // if g == m->g0 call badmcall
Keith Randall32b770b2013-08-29 15:53:34 -0700398 JNE 3(PC)
399 MOVL $runtime·badmcall(SB), AX
400 JMP AX
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300401 MOVL SI, g(DX) // g = m->g0
Russ Cox528534c2013-06-05 07:16:53 -0400402 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
Russ Coxf9ca3b52011-03-07 10:37:42 -0500403 PUSHL AX
Russ Cox012ceed2014-09-03 11:35:22 -0400404 MOVL DI, DX
405 MOVL 0(DI), DI
Russ Coxf9ca3b52011-03-07 10:37:42 -0500406 CALL DI
407 POPL AX
Keith Randall32b770b2013-08-29 15:53:34 -0700408 MOVL $runtime·badmcall2(SB), AX
409 JMP AX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500410 RET
411
Russ Cox656be312014-11-12 14:54:31 -0500412// systemstack_switch is a dummy routine that systemstack leaves at the bottom
Brad Fitzpatrick5fea2cc2016-03-01 23:21:55 +0000413// of the G stack. We need to distinguish the routine that
Keith Randall4aa50432014-07-30 09:01:52 -0700414// lives at the bottom of the G stack from the one that lives
Russ Cox656be312014-11-12 14:54:31 -0500415// at the top of the system stack because the one at the top of
416// the system stack terminates the stack walk (see topofstack()).
417TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
Keith Randall4aa50432014-07-30 09:01:52 -0700418 RET
419
Russ Cox656be312014-11-12 14:54:31 -0500420// func systemstack(fn func())
421TEXT runtime·systemstack(SB), NOSPLIT, $0-4
422 MOVL fn+0(FP), DI // DI = fn
Russ Cox1d550b82014-09-11 12:08:30 -0400423 get_tls(CX)
424 MOVL g(CX), AX // AX = g
425 MOVL g_m(AX), BX // BX = m
Russ Cox656be312014-11-12 14:54:31 -0500426
Russ Cox1d550b82014-09-11 12:08:30 -0400427 MOVL m_gsignal(BX), DX // DX = gsignal
428 CMPL AX, DX
Russ Cox656be312014-11-12 14:54:31 -0500429 JEQ noswitch
Russ Cox32ecf572014-09-04 00:10:10 -0400430
Keith Randall4aa50432014-07-30 09:01:52 -0700431 MOVL m_g0(BX), DX // DX = g0
432 CMPL AX, DX
Russ Cox656be312014-11-12 14:54:31 -0500433 JEQ noswitch
Keith Randall4aa50432014-07-30 09:01:52 -0700434
Russ Cox32ecf572014-09-04 00:10:10 -0400435 MOVL m_curg(BX), BP
436 CMPL AX, BP
Russ Cox656be312014-11-12 14:54:31 -0500437 JEQ switch
Russ Cox32ecf572014-09-04 00:10:10 -0400438
Russ Cox656be312014-11-12 14:54:31 -0500439 // Bad: g is not gsignal, not g0, not curg. What is it?
Russ Cox32ecf572014-09-04 00:10:10 -0400440 // Hide call from linker nosplit analysis.
Russ Cox656be312014-11-12 14:54:31 -0500441 MOVL $runtime·badsystemstack(SB), AX
Russ Cox32ecf572014-09-04 00:10:10 -0400442 CALL AX
443
Russ Cox656be312014-11-12 14:54:31 -0500444switch:
Brad Fitzpatrick5fea2cc2016-03-01 23:21:55 +0000445 // save our state in g->sched. Pretend to
Russ Cox656be312014-11-12 14:54:31 -0500446 // be systemstack_switch if the G stack is scanned.
447 MOVL $runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX)
Keith Randall4aa50432014-07-30 09:01:52 -0700448 MOVL SP, (g_sched+gobuf_sp)(AX)
449 MOVL AX, (g_sched+gobuf_g)(AX)
450
451 // switch to g0
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300452 get_tls(CX)
Keith Randall4aa50432014-07-30 09:01:52 -0700453 MOVL DX, g(CX)
Russ Coxd16a2ad2014-09-04 22:48:08 -0400454 MOVL (g_sched+gobuf_sp)(DX), BX
Russ Cox656be312014-11-12 14:54:31 -0500455 // make it look like mstart called systemstack on g0, to stop traceback
Russ Coxd16a2ad2014-09-04 22:48:08 -0400456 SUBL $4, BX
457 MOVL $runtime·mstart(SB), DX
458 MOVL DX, 0(BX)
459 MOVL BX, SP
Keith Randall4aa50432014-07-30 09:01:52 -0700460
461 // call target function
Russ Cox012ceed2014-09-03 11:35:22 -0400462 MOVL DI, DX
463 MOVL 0(DI), DI
Keith Randall4aa50432014-07-30 09:01:52 -0700464 CALL DI
465
466 // switch back to g
467 get_tls(CX)
468 MOVL g(CX), AX
469 MOVL g_m(AX), BX
470 MOVL m_curg(BX), AX
471 MOVL AX, g(CX)
472 MOVL (g_sched+gobuf_sp)(AX), SP
473 MOVL $0, (g_sched+gobuf_sp)(AX)
474 RET
475
Russ Cox656be312014-11-12 14:54:31 -0500476noswitch:
Austin Clements15d6ab62017-10-27 15:20:21 -0400477 // already on system stack; tail call the function
478 // Using a tail call here cleans up tracebacks since we won't stop
479 // at an intermediate systemstack.
Russ Cox012ceed2014-09-03 11:35:22 -0400480 MOVL DI, DX
481 MOVL 0(DI), DI
Austin Clements15d6ab62017-10-27 15:20:21 -0400482 JMP DI
Keith Randall4aa50432014-07-30 09:01:52 -0700483
Russ Cox8522a472009-06-17 15:15:55 -0700484/*
485 * support for morestack
486 */
487
488// Called during function prolog when more stack is needed.
Russ Cox58f12ff2013-07-18 16:53:45 -0400489//
490// The traceback routines see morestack on a g0 as being
491// the top of a stack (for example, morestack calling newstack
492// calling the scheduler calling newm calling gc), so we must
493// record an argument size. For that purpose, it has no arguments.
Keith Randall5a546962013-08-07 10:23:24 -0700494TEXT runtime·morestack(SB),NOSPLIT,$0-0
Russ Cox8522a472009-06-17 15:15:55 -0700495 // Cannot grow scheduler stack (m->g0).
Hector Chu6bfe5f52010-01-06 17:58:55 -0800496 get_tls(CX)
Russ Cox89f185f2014-06-26 11:54:39 -0400497 MOVL g(CX), BX
498 MOVL g_m(BX), BX
Russ Cox8522a472009-06-17 15:15:55 -0700499 MOVL m_g0(BX), SI
Hector Chu6bfe5f52010-01-06 17:58:55 -0800500 CMPL g(CX), SI
Austin Clements687d9d52016-10-13 10:44:57 -0400501 JNE 3(PC)
502 CALL runtime·badmorestackg0(SB)
Russ Cox8522a472009-06-17 15:15:55 -0700503 INT $3
504
Russ Coxf8f630f2014-09-05 16:51:45 -0400505 // Cannot grow signal stack.
506 MOVL m_gsignal(BX), SI
507 CMPL g(CX), SI
Austin Clements687d9d52016-10-13 10:44:57 -0400508 JNE 3(PC)
509 CALL runtime·badmorestackgsignal(SB)
Russ Coxf8f630f2014-09-05 16:51:45 -0400510 INT $3
511
Russ Cox8522a472009-06-17 15:15:55 -0700512 // Called from f.
513 // Set m->morebuf to f's caller.
514 MOVL 4(SP), DI // f's caller's PC
515 MOVL DI, (m_morebuf+gobuf_pc)(BX)
516 LEAL 8(SP), CX // f's caller's SP
517 MOVL CX, (m_morebuf+gobuf_sp)(BX)
Hector Chu6bfe5f52010-01-06 17:58:55 -0800518 get_tls(CX)
519 MOVL g(CX), SI
Russ Cox8522a472009-06-17 15:15:55 -0700520 MOVL SI, (m_morebuf+gobuf_g)(BX)
521
Russ Cox6fa3c892013-06-27 11:32:01 -0400522 // Set g->sched to context in f.
523 MOVL 0(SP), AX // f's PC
524 MOVL AX, (g_sched+gobuf_pc)(SI)
525 MOVL SI, (g_sched+gobuf_g)(SI)
526 LEAL 4(SP), AX // f's SP
527 MOVL AX, (g_sched+gobuf_sp)(SI)
Austin Clements3beaf262017-10-22 21:37:05 -0400528 MOVL DX, (g_sched+gobuf_ctxt)(SI)
Russ Cox8522a472009-06-17 15:15:55 -0700529
Russ Coxf9ca3b52011-03-07 10:37:42 -0500530 // Call newstack on m->g0's stack.
Russ Cox8522a472009-06-17 15:15:55 -0700531 MOVL m_g0(BX), BP
Hector Chu6bfe5f52010-01-06 17:58:55 -0800532 MOVL BP, g(CX)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500533 MOVL (g_sched+gobuf_sp)(BP), AX
Russ Cox7e14bd82010-12-07 17:19:36 -0500534 MOVL -4(AX), BX // fault if CALL would, before smashing SP
535 MOVL AX, SP
Russ Cox68b42552010-11-04 14:00:19 -0400536 CALL runtime·newstack(SB)
Russ Cox8522a472009-06-17 15:15:55 -0700537 MOVL $0, 0x1003 // crash if newstack returns
Russ Cox0d3a0432009-03-30 00:01:07 -0700538 RET
539
Russ Coxc2dd33a2014-03-04 13:53:08 -0500540TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
541 MOVL $0, DX
542 JMP runtime·morestack(SB)
543
Keith Randall52631982014-09-08 10:14:41 -0700544// reflectcall: call a function with the given argument list
Russ Coxdf027ac2014-12-30 13:59:55 -0500545// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
Keith Randall9cd57062013-08-02 13:03:14 -0700546// we don't have variable-sized frames, so we use a small number
547// of constant-sized-frame functions to encode a few bits of size in the pc.
548// Caution: ugly multiline assembly macros in your future!
549
550#define DISPATCH(NAME,MAXSIZE) \
551 CMPL CX, $MAXSIZE; \
552 JA 3(PC); \
Russ Coxf8f630f2014-09-05 16:51:45 -0400553 MOVL $NAME(SB), AX; \
Keith Randall9cd57062013-08-02 13:03:14 -0700554 JMP AX
Rob Pikeaff78832014-07-30 10:11:44 -0700555// Note: can't just "JMP NAME(SB)" - bad inlining results.
Keith Randall9cd57062013-08-02 13:03:14 -0700556
Russ Cox7a524a12014-12-22 13:27:53 -0500557TEXT reflect·call(SB), NOSPLIT, $0-0
558 JMP ·reflectcall(SB)
559
Russ Coxdf027ac2014-12-30 13:59:55 -0500560TEXT ·reflectcall(SB), NOSPLIT, $0-20
561 MOVL argsize+12(FP), CX
Rob Pikeaff78832014-07-30 10:11:44 -0700562 DISPATCH(runtime·call16, 16)
563 DISPATCH(runtime·call32, 32)
564 DISPATCH(runtime·call64, 64)
565 DISPATCH(runtime·call128, 128)
566 DISPATCH(runtime·call256, 256)
567 DISPATCH(runtime·call512, 512)
568 DISPATCH(runtime·call1024, 1024)
569 DISPATCH(runtime·call2048, 2048)
570 DISPATCH(runtime·call4096, 4096)
571 DISPATCH(runtime·call8192, 8192)
572 DISPATCH(runtime·call16384, 16384)
573 DISPATCH(runtime·call32768, 32768)
574 DISPATCH(runtime·call65536, 65536)
575 DISPATCH(runtime·call131072, 131072)
576 DISPATCH(runtime·call262144, 262144)
577 DISPATCH(runtime·call524288, 524288)
578 DISPATCH(runtime·call1048576, 1048576)
579 DISPATCH(runtime·call2097152, 2097152)
580 DISPATCH(runtime·call4194304, 4194304)
581 DISPATCH(runtime·call8388608, 8388608)
582 DISPATCH(runtime·call16777216, 16777216)
583 DISPATCH(runtime·call33554432, 33554432)
584 DISPATCH(runtime·call67108864, 67108864)
585 DISPATCH(runtime·call134217728, 134217728)
586 DISPATCH(runtime·call268435456, 268435456)
587 DISPATCH(runtime·call536870912, 536870912)
588 DISPATCH(runtime·call1073741824, 1073741824)
Keith Randall9cd57062013-08-02 13:03:14 -0700589 MOVL $runtime·badreflectcall(SB), AX
590 JMP AX
591
Keith Randall12e46e42013-08-06 14:33:55 -0700592#define CALLFN(NAME,MAXSIZE) \
Russ Coxdf027ac2014-12-30 13:59:55 -0500593TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \
Russ Coxcb6f5ac2014-10-15 13:12:16 -0400594 NO_LOCAL_POINTERS; \
Keith Randall9cd57062013-08-02 13:03:14 -0700595 /* copy arguments to stack */ \
Russ Coxdf027ac2014-12-30 13:59:55 -0500596 MOVL argptr+8(FP), SI; \
597 MOVL argsize+12(FP), CX; \
Keith Randall9cd57062013-08-02 13:03:14 -0700598 MOVL SP, DI; \
599 REP;MOVSB; \
600 /* call function */ \
Russ Coxdf027ac2014-12-30 13:59:55 -0500601 MOVL f+4(FP), DX; \
Russ Cox4a000b92014-02-25 17:00:08 -0500602 MOVL (DX), AX; \
Keith Randallcee8bca2014-05-21 14:28:34 -0700603 PCDATA $PCDATA_StackMapIndex, $0; \
Russ Cox4a000b92014-02-25 17:00:08 -0500604 CALL AX; \
Keith Randall9cd57062013-08-02 13:03:14 -0700605 /* copy return values back */ \
Austin Clements79561a82016-10-20 22:45:18 -0400606 MOVL argtype+0(FP), DX; \
Russ Coxdf027ac2014-12-30 13:59:55 -0500607 MOVL argptr+8(FP), DI; \
608 MOVL argsize+12(FP), CX; \
609 MOVL retoffset+16(FP), BX; \
Keith Randall9cd57062013-08-02 13:03:14 -0700610 MOVL SP, SI; \
Russ Cox72c5d5e2014-04-08 11:11:35 -0400611 ADDL BX, DI; \
612 ADDL BX, SI; \
613 SUBL BX, CX; \
Austin Clements79561a82016-10-20 22:45:18 -0400614 CALL callRet<>(SB); \
615 RET
616
617// callRet copies return values back at the end of call*. This is a
618// separate function so it can allocate stack space for the arguments
619// to reflectcallmove. It does not follow the Go ABI; it expects its
620// arguments in registers.
621TEXT callRet<>(SB), NOSPLIT, $16-0
622 MOVL DX, 0(SP)
623 MOVL DI, 4(SP)
624 MOVL SI, 8(SP)
625 MOVL CX, 12(SP)
626 CALL runtime·reflectcallmove(SB)
Keith Randall9cd57062013-08-02 13:03:14 -0700627 RET
628
Russ Coxcb6f5ac2014-10-15 13:12:16 -0400629CALLFN(·call16, 16)
630CALLFN(·call32, 32)
631CALLFN(·call64, 64)
632CALLFN(·call128, 128)
633CALLFN(·call256, 256)
634CALLFN(·call512, 512)
635CALLFN(·call1024, 1024)
636CALLFN(·call2048, 2048)
637CALLFN(·call4096, 4096)
638CALLFN(·call8192, 8192)
639CALLFN(·call16384, 16384)
640CALLFN(·call32768, 32768)
641CALLFN(·call65536, 65536)
642CALLFN(·call131072, 131072)
643CALLFN(·call262144, 262144)
644CALLFN(·call524288, 524288)
645CALLFN(·call1048576, 1048576)
646CALLFN(·call2097152, 2097152)
647CALLFN(·call4194304, 4194304)
648CALLFN(·call8388608, 8388608)
649CALLFN(·call16777216, 16777216)
650CALLFN(·call33554432, 33554432)
651CALLFN(·call67108864, 67108864)
652CALLFN(·call134217728, 134217728)
653CALLFN(·call268435456, 268435456)
654CALLFN(·call536870912, 536870912)
655CALLFN(·call1073741824, 1073741824)
Russ Coxbba278a2009-07-08 18:16:09 -0700656
Keith Randall5a546962013-08-07 10:23:24 -0700657TEXT runtime·procyield(SB),NOSPLIT,$0-0
Russ Cox25f6b022014-08-27 11:32:17 -0400658 MOVL cycles+0(FP), AX
Dmitriy Vyukov4e5086b2011-07-29 12:44:06 -0400659again:
660 PAUSE
661 SUBL $1, AX
662 JNZ again
663 RET
664
Austin Clementsf5d494b2015-06-15 12:30:23 -0400665TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
666 // Stores are already ordered on x86, so this is just a
667 // compile barrier.
668 RET
669
Russ Coxaa3222d82009-06-02 23:02:12 -0700670// void jmpdefer(fn, sp);
671// called from deferreturn.
Russ Cox0d3a0432009-03-30 00:01:07 -0700672// 1. pop the caller
Michael Hudson-Doyle26849742016-06-02 11:07:55 +1200673// 2. sub 5 bytes (the length of CALL & a 32 bit displacement) from the callers
674// return (when building for shared libraries, subtract 16 bytes -- 5 bytes
675// for CALL & displacement to call __x86.get_pc_thunk.cx, 6 bytes for the
676// LEAL to load the offset into BX, and finally 5 for the call & displacement)
Russ Cox0d3a0432009-03-30 00:01:07 -0700677// 3. jmp to the argument
Keith Randalla97a91d2013-08-07 14:03:50 -0700678TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
Russ Cox25f6b022014-08-27 11:32:17 -0400679 MOVL fv+0(FP), DX // fn
680 MOVL argp+4(FP), BX // caller sp
Russ Coxaa3222d82009-06-02 23:02:12 -0700681 LEAL -4(BX), SP // caller sp after CALL
Michael Hudson-Doyle26849742016-06-02 11:07:55 +1200682#ifdef GOBUILDMODE_shared
683 SUBL $16, (SP) // return to CALL again
684#else
Russ Coxaa3222d82009-06-02 23:02:12 -0700685 SUBL $5, (SP) // return to CALL again
Michael Hudson-Doyle26849742016-06-02 11:07:55 +1200686#endif
Russ Cox6066fdc2013-02-22 10:47:54 -0500687 MOVL 0(DX), BX
Russ Cox1903ad72013-02-21 17:01:13 -0500688 JMP BX // but first run the deferred function
Russ Cox0d3a0432009-03-30 00:01:07 -0700689
Russ Coxd67e7e32013-06-12 15:22:26 -0400690// Save state of caller into g->sched.
Keith Randall5a546962013-08-07 10:23:24 -0700691TEXT gosave<>(SB),NOSPLIT,$0
Russ Coxd67e7e32013-06-12 15:22:26 -0400692 PUSHL AX
693 PUSHL BX
694 get_tls(BX)
695 MOVL g(BX), BX
696 LEAL arg+0(FP), AX
697 MOVL AX, (g_sched+gobuf_sp)(BX)
698 MOVL -4(AX), AX
699 MOVL AX, (g_sched+gobuf_pc)(BX)
700 MOVL $0, (g_sched+gobuf_ret)(BX)
Austin Clements70c107c2016-10-19 15:49:31 -0400701 // Assert ctxt is zero. See func save.
702 MOVL (g_sched+gobuf_ctxt)(BX), AX
703 TESTL AX, AX
704 JZ 2(PC)
705 CALL runtime·badctxt(SB)
Russ Coxd67e7e32013-06-12 15:22:26 -0400706 POPL BX
707 POPL AX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500708 RET
709
Alex Brainman9d968cb2015-04-27 17:32:23 +1000710// func asmcgocall(fn, arg unsafe.Pointer) int32
Russ Coxf9ca3b52011-03-07 10:37:42 -0500711// Call fn(arg) on the scheduler stack,
712// aligned appropriately for the gcc ABI.
Alex Brainman9d968cb2015-04-27 17:32:23 +1000713// See cgocall.go for more details.
714TEXT ·asmcgocall(SB),NOSPLIT,$0-12
Russ Coxf9ca3b52011-03-07 10:37:42 -0500715 MOVL fn+0(FP), AX
716 MOVL arg+4(FP), BX
Russ Coxcb767242014-09-04 00:01:55 -0400717
Russ Coxf9ca3b52011-03-07 10:37:42 -0500718 MOVL SP, DX
719
720 // Figure out if we need to switch to m->g0 stack.
721 // We get called to create new OS threads too, and those
722 // come in on the m->g0 stack already.
723 get_tls(CX)
Russ Cox89f185f2014-06-26 11:54:39 -0400724 MOVL g(CX), BP
725 MOVL g_m(BP), BP
Russ Coxf9ca3b52011-03-07 10:37:42 -0500726 MOVL m_g0(BP), SI
727 MOVL g(CX), DI
728 CMPL SI, DI
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300729 JEQ noswitch
Russ Coxd67e7e32013-06-12 15:22:26 -0400730 CALL gosave<>(SB)
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300731 get_tls(CX)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500732 MOVL SI, g(CX)
733 MOVL (g_sched+gobuf_sp)(SI), SP
734
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300735noswitch:
Russ Coxf9ca3b52011-03-07 10:37:42 -0500736 // Now on a scheduling stack (a pthread-created stack).
737 SUBL $32, SP
738 ANDL $~15, SP // alignment, perhaps unnecessary
739 MOVL DI, 8(SP) // save g
Keith Randall47f251c2014-09-11 20:36:23 -0700740 MOVL (g_stack+stack_hi)(DI), DI
741 SUBL DX, DI
742 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500743 MOVL BX, 0(SP) // first argument in x86-32 ABI
744 CALL AX
745
746 // Restore registers, g, stack pointer.
747 get_tls(CX)
748 MOVL 8(SP), DI
Keith Randall47f251c2014-09-11 20:36:23 -0700749 MOVL (g_stack+stack_hi)(DI), SI
750 SUBL 4(SP), SI
Russ Coxf9ca3b52011-03-07 10:37:42 -0500751 MOVL DI, g(CX)
Keith Randall47f251c2014-09-11 20:36:23 -0700752 MOVL SI, SP
Alex Brainman9d968cb2015-04-27 17:32:23 +1000753
754 MOVL AX, ret+8(FP)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500755 RET
756
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700757// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
Russ Cox3d2dfc52013-02-22 16:08:56 -0500758// Turn the fn into a Go func (by taking its address) and call
759// cgocallback_gofunc.
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700760TEXT runtime·cgocallback(SB),NOSPLIT,$16-16
Russ Cox3d2dfc52013-02-22 16:08:56 -0500761 LEAL fn+0(FP), AX
762 MOVL AX, 0(SP)
763 MOVL frame+4(FP), AX
764 MOVL AX, 4(SP)
765 MOVL framesize+8(FP), AX
766 MOVL AX, 8(SP)
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700767 MOVL ctxt+12(FP), AX
768 MOVL AX, 12(SP)
Russ Cox3d2dfc52013-02-22 16:08:56 -0500769 MOVL $runtime·cgocallback_gofunc(SB), AX
770 CALL AX
771 RET
772
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700773// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
Alex Brainman9d968cb2015-04-27 17:32:23 +1000774// See cgocall.go for more details.
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700775TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-16
Russ Coxe844f532014-09-12 07:46:11 -0400776 NO_LOCAL_POINTERS
777
Russ Cox89f185f2014-06-26 11:54:39 -0400778 // If g is nil, Go did not create the current thread.
Russ Cox6c976392013-02-20 17:48:23 -0500779 // Call needm to obtain one for temporary use.
780 // In this case, we're running on the thread stack, so there's
781 // lots of space, but the linker doesn't know. Hide the call from
782 // the linker analysis by using an indirect call through AX.
783 get_tls(CX)
784#ifdef GOOS_windows
Russ Coxdba623b2013-07-23 18:40:02 -0400785 MOVL $0, BP
Russ Cox6c976392013-02-20 17:48:23 -0500786 CMPL CX, $0
Russ Cox89f185f2014-06-26 11:54:39 -0400787 JEQ 2(PC) // TODO
Russ Cox6c976392013-02-20 17:48:23 -0500788#endif
Russ Cox89f185f2014-06-26 11:54:39 -0400789 MOVL g(CX), BP
Russ Cox6c976392013-02-20 17:48:23 -0500790 CMPL BP, $0
Russ Cox89f185f2014-06-26 11:54:39 -0400791 JEQ needm
792 MOVL g_m(BP), BP
793 MOVL BP, DX // saved copy of oldm
794 JMP havem
Russ Cox6c976392013-02-20 17:48:23 -0500795needm:
Russ Cox89f185f2014-06-26 11:54:39 -0400796 MOVL $0, 0(SP)
Russ Cox6c976392013-02-20 17:48:23 -0500797 MOVL $runtime·needm(SB), AX
798 CALL AX
Russ Coxf0112822013-07-24 09:01:57 -0400799 MOVL 0(SP), DX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500800 get_tls(CX)
Russ Cox89f185f2014-06-26 11:54:39 -0400801 MOVL g(CX), BP
802 MOVL g_m(BP), BP
Russ Cox9b732382012-03-08 12:12:40 -0500803
Russ Coxc4efaac2014-10-28 21:53:09 -0400804 // Set m->sched.sp = SP, so that if a panic happens
805 // during the function we are about to execute, it will
806 // have a valid SP to run on the g0 stack.
807 // The next few lines (after the havem label)
808 // will save this SP onto the stack and then write
809 // the same SP back to m->sched.sp. That seems redundant,
810 // but if an unrecovered panic happens, unwindm will
811 // restore the g->sched.sp from the stack location
Russ Cox656be312014-11-12 14:54:31 -0500812 // and then systemstack will try to use it. If we don't set it here,
Russ Coxc4efaac2014-10-28 21:53:09 -0400813 // that restored SP will be uninitialized (typically 0) and
814 // will not be usable.
815 MOVL m_g0(BP), SI
816 MOVL SP, (g_sched+gobuf_sp)(SI)
817
Russ Cox6c976392013-02-20 17:48:23 -0500818havem:
819 // Now there's a valid m, and we're running on its m->g0.
820 // Save current m->g0->sched.sp on stack and then set it to SP.
821 // Save current sp in m->g0->sched.sp in preparation for
822 // switch back to m->curg stack.
Russ Coxdba623b2013-07-23 18:40:02 -0400823 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
Russ Coxf9ca3b52011-03-07 10:37:42 -0500824 MOVL m_g0(BP), SI
Russ Coxdba623b2013-07-23 18:40:02 -0400825 MOVL (g_sched+gobuf_sp)(SI), AX
826 MOVL AX, 0(SP)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500827 MOVL SP, (g_sched+gobuf_sp)(SI)
828
Russ Coxdba623b2013-07-23 18:40:02 -0400829 // Switch to m->curg stack and call runtime.cgocallbackg.
830 // Because we are taking over the execution of m->curg
831 // but *not* resuming what had been running, we need to
832 // save that information (m->curg->sched) so we can restore it.
Russ Cox528534c2013-06-05 07:16:53 -0400833 // We can restore m->curg->sched.sp easily, because calling
Alex Brainman72e83482011-08-18 12:17:09 -0400834 // runtime.cgocallbackg leaves SP unchanged upon return.
Russ Cox528534c2013-06-05 07:16:53 -0400835 // To save m->curg->sched.pc, we push it onto the stack.
Russ Coxf9ca3b52011-03-07 10:37:42 -0500836 // This has the added benefit that it looks to the traceback
Alex Brainman72e83482011-08-18 12:17:09 -0400837 // routine like cgocallbackg is going to return to that
Russ Coxdba623b2013-07-23 18:40:02 -0400838 // PC (because the frame we allocate below has the same
839 // size as cgocallback_gofunc's frame declared above)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500840 // so that the traceback will seamlessly trace back into
841 // the earlier calls.
Russ Coxdba623b2013-07-23 18:40:02 -0400842 //
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700843 // In the new goroutine, 4(SP) holds the saved oldm (DX) register.
844 // 8(SP) is unused.
Russ Coxf9ca3b52011-03-07 10:37:42 -0500845 MOVL m_curg(BP), SI
846 MOVL SI, g(CX)
Russ Coxdba623b2013-07-23 18:40:02 -0400847 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
Russ Coxf9ca3b52011-03-07 10:37:42 -0500848 MOVL (g_sched+gobuf_pc)(SI), BP
Russ Coxdba623b2013-07-23 18:40:02 -0400849 MOVL BP, -4(DI)
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700850 MOVL ctxt+12(FP), CX
Russ Coxf0112822013-07-24 09:01:57 -0400851 LEAL -(4+12)(DI), SP
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700852 MOVL DX, 4(SP)
853 MOVL CX, 0(SP)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500854 CALL runtime·cgocallbackg(SB)
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700855 MOVL 4(SP), DX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500856
Russ Cox528534c2013-06-05 07:16:53 -0400857 // Restore g->sched (== m->curg->sched) from saved values.
Russ Coxf9ca3b52011-03-07 10:37:42 -0500858 get_tls(CX)
859 MOVL g(CX), SI
Russ Coxf0112822013-07-24 09:01:57 -0400860 MOVL 12(SP), BP
Russ Coxf9ca3b52011-03-07 10:37:42 -0500861 MOVL BP, (g_sched+gobuf_pc)(SI)
Russ Coxf0112822013-07-24 09:01:57 -0400862 LEAL (12+4)(SP), DI
Russ Coxf9ca3b52011-03-07 10:37:42 -0500863 MOVL DI, (g_sched+gobuf_sp)(SI)
864
865 // Switch back to m->g0's stack and restore m->g0->sched.sp.
866 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
867 // so we do not have to restore it.)
Russ Cox89f185f2014-06-26 11:54:39 -0400868 MOVL g(CX), BP
869 MOVL g_m(BP), BP
Russ Coxf9ca3b52011-03-07 10:37:42 -0500870 MOVL m_g0(BP), SI
871 MOVL SI, g(CX)
872 MOVL (g_sched+gobuf_sp)(SI), SP
Russ Coxdba623b2013-07-23 18:40:02 -0400873 MOVL 0(SP), AX
874 MOVL AX, (g_sched+gobuf_sp)(SI)
Russ Cox6c976392013-02-20 17:48:23 -0500875
876 // If the m on entry was nil, we called needm above to borrow an m
877 // for the duration of the call. Since the call is over, return it with dropm.
Russ Coxf0112822013-07-24 09:01:57 -0400878 CMPL DX, $0
Russ Cox6c976392013-02-20 17:48:23 -0500879 JNE 3(PC)
880 MOVL $runtime·dropm(SB), AX
881 CALL AX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500882
883 // Done!
884 RET
885
Russ Cox89f185f2014-06-26 11:54:39 -0400886// void setg(G*); set g. for use by needm.
Russ Cox25f6b022014-08-27 11:32:17 -0400887TEXT runtime·setg(SB), NOSPLIT, $0-4
Russ Cox89f185f2014-06-26 11:54:39 -0400888 MOVL gg+0(FP), BX
Russ Cox6c976392013-02-20 17:48:23 -0500889#ifdef GOOS_windows
Russ Cox89f185f2014-06-26 11:54:39 -0400890 CMPL BX, $0
Russ Cox6c976392013-02-20 17:48:23 -0500891 JNE settls
892 MOVL $0, 0x14(FS)
893 RET
894settls:
Russ Cox89f185f2014-06-26 11:54:39 -0400895 MOVL g_m(BX), AX
Russ Cox6c976392013-02-20 17:48:23 -0500896 LEAL m_tls(AX), AX
897 MOVL AX, 0x14(FS)
898#endif
Russ Cox6c976392013-02-20 17:48:23 -0500899 get_tls(CX)
Russ Cox6c976392013-02-20 17:48:23 -0500900 MOVL BX, g(CX)
901 RET
902
Russ Cox89f185f2014-06-26 11:54:39 -0400903// void setg_gcc(G*); set g. for use by gcc
904TEXT setg_gcc<>(SB), NOSPLIT, $0
Russ Cox6a70f9d2013-03-25 18:14:02 -0400905 get_tls(AX)
Russ Cox89f185f2014-06-26 11:54:39 -0400906 MOVL gg+0(FP), DX
907 MOVL DX, g(AX)
Russ Cox6a70f9d2013-03-25 18:14:02 -0400908 RET
909
Russ Cox8ac35be2014-09-09 14:02:37 -0400910// check that SP is in range [g->stack.lo, g->stack.hi)
Keith Randall5a546962013-08-07 10:23:24 -0700911TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
Russ Coxf9ca3b52011-03-07 10:37:42 -0500912 get_tls(CX)
913 MOVL g(CX), AX
Russ Cox15b76ad2014-09-09 13:39:57 -0400914 CMPL (g_stack+stack_hi)(AX), SP
Russ Coxf9ca3b52011-03-07 10:37:42 -0500915 JHI 2(PC)
916 INT $3
Russ Cox15b76ad2014-09-09 13:39:57 -0400917 CMPL SP, (g_stack+stack_lo)(AX)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500918 JHI 2(PC)
919 INT $3
920 RET
921
Dmitry Vyukov6e70fdd2015-02-17 14:25:49 +0300922// func cputicks() int64
Russ Cox25f6b022014-08-27 11:32:17 -0400923TEXT runtime·cputicks(SB),NOSPLIT,$0-8
Martin Möhrmann5a6c5802017-04-27 08:30:27 +0200924 CMPB runtime·support_sse2(SB), $1
925 JNE done
Dmitry Vyukov6e70fdd2015-02-17 14:25:49 +0300926 CMPB runtime·lfenceBeforeRdtsc(SB), $1
927 JNE mfence
928 BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE
929 JMP done
930mfence:
931 BYTE $0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE
932done:
Shenghou Ma6392b432012-02-06 12:49:28 -0500933 RDTSC
Russ Cox25f6b022014-08-27 11:32:17 -0400934 MOVL AX, ret_lo+0(FP)
935 MOVL DX, ret_hi+4(FP)
Damian Gryski8e765da2012-02-02 14:09:27 -0500936 RET
937
Keith Randall5a546962013-08-07 10:23:24 -0700938TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0
Matthew Dempsky7bb38f62015-11-12 15:35:50 -0800939 // set up ldt 7 to point at m0.tls
Russ Cox0d3a0432009-03-30 00:01:07 -0700940 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
Russ Cox1b14bdb2009-09-22 16:28:32 -0700941 // the entry number is just a hint. setldt will set up GS with what it used.
Russ Cox0d3a0432009-03-30 00:01:07 -0700942 MOVL $7, 0(SP)
Matthew Dempsky7bb38f62015-11-12 15:35:50 -0800943 LEAL runtime·m0+m_tls(SB), AX
Russ Cox0d3a0432009-03-30 00:01:07 -0700944 MOVL AX, 4(SP)
945 MOVL $32, 8(SP) // sizeof(tls array)
Russ Cox68b42552010-11-04 14:00:19 -0400946 CALL runtime·setldt(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -0700947 RET
948
Russ Cox9ddfb642013-07-16 16:24:09 -0400949TEXT runtime·emptyfunc(SB),0,$0-0
Russ Cox0d3a0432009-03-30 00:01:07 -0700950 RET
951
Keith Randalla5d40242013-03-12 10:47:44 -0700952// hash function using AES hardware instructions
Keith Randalla2a97682014-07-31 15:07:05 -0700953TEXT runtime·aeshash(SB),NOSPLIT,$0-16
954 MOVL p+0(FP), AX // ptr to data
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300955 MOVL s+8(FP), BX // size
Keith Randalld5e4c402015-01-06 16:42:48 -0800956 LEAL ret+12(FP), DX
Keith Randalla5d40242013-03-12 10:47:44 -0700957 JMP runtime·aeshashbody(SB)
958
Keith Randalld5e4c402015-01-06 16:42:48 -0800959TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12
Keith Randalla2a97682014-07-31 15:07:05 -0700960 MOVL p+0(FP), AX // ptr to string object
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300961 MOVL 4(AX), BX // length of string
Keith Randalla5d40242013-03-12 10:47:44 -0700962 MOVL (AX), AX // string data
Keith Randalld5e4c402015-01-06 16:42:48 -0800963 LEAL ret+8(FP), DX
Keith Randalla5d40242013-03-12 10:47:44 -0700964 JMP runtime·aeshashbody(SB)
965
966// AX: data
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300967// BX: length
Keith Randalld5e4c402015-01-06 16:42:48 -0800968// DX: address to put return value
969TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0
Keith Randall91059de2015-08-31 16:26:12 -0700970 MOVL h+4(FP), X0 // 32 bits of per-table hash seed
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300971 PINSRW $4, BX, X0 // 16 bits of length
Keith Randall91059de2015-08-31 16:26:12 -0700972 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times
973 MOVO X0, X1 // save unscrambled seed
974 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
975 AESENC X0, X0 // scramble seed
976
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300977 CMPL BX, $16
Keith Randall7a4a64e2014-12-10 14:20:17 -0800978 JB aes0to15
979 JE aes16
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300980 CMPL BX, $32
Keith Randall7a4a64e2014-12-10 14:20:17 -0800981 JBE aes17to32
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300982 CMPL BX, $64
Keith Randall7a4a64e2014-12-10 14:20:17 -0800983 JBE aes33to64
984 JMP aes65plus
985
986aes0to15:
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300987 TESTL BX, BX
Keith Randall7a4a64e2014-12-10 14:20:17 -0800988 JE aes0
Keith Randalla5d40242013-03-12 10:47:44 -0700989
Keith Randall7a4a64e2014-12-10 14:20:17 -0800990 ADDL $16, AX
991 TESTW $0xff0, AX
992 JE endofpage
Keith Randalla5d40242013-03-12 10:47:44 -0700993
Keith Randallee669722013-05-15 09:40:14 -0700994 // 16 bytes loaded at this address won't cross
995 // a page boundary, so we can load it directly.
Keith Randall91059de2015-08-31 16:26:12 -0700996 MOVOU -16(AX), X1
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300997 ADDL BX, BX
998 PAND masks<>(SB)(BX*8), X1
Keith Randall7a4a64e2014-12-10 14:20:17 -0800999
Keith Randall91059de2015-08-31 16:26:12 -07001000final1:
1001 AESENC X0, X1 // scramble input, xor in seed
1002 AESENC X1, X1 // scramble combo 2 times
1003 AESENC X1, X1
1004 MOVL X1, (DX)
Keith Randall7a4a64e2014-12-10 14:20:17 -08001005 RET
1006
1007endofpage:
Brad Fitzpatrick5fea2cc2016-03-01 23:21:55 +00001008 // address ends in 1111xxxx. Might be up against
Keith Randalla5d40242013-03-12 10:47:44 -07001009 // a page boundary, so load ending at last byte.
1010 // Then shift bytes down using pshufb.
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +13001011 MOVOU -32(AX)(BX*1), X1
1012 ADDL BX, BX
1013 PSHUFB shifts<>(SB)(BX*8), X1
Keith Randall91059de2015-08-31 16:26:12 -07001014 JMP final1
Keith Randall7a4a64e2014-12-10 14:20:17 -08001015
1016aes0:
Keith Randall731bdc52015-09-01 12:53:15 -07001017 // Return scrambled input seed
Keith Randall91059de2015-08-31 16:26:12 -07001018 AESENC X0, X0
1019 MOVL X0, (DX)
Keith Randall7a4a64e2014-12-10 14:20:17 -08001020 RET
1021
1022aes16:
Keith Randall91059de2015-08-31 16:26:12 -07001023 MOVOU (AX), X1
1024 JMP final1
Keith Randall7a4a64e2014-12-10 14:20:17 -08001025
1026aes17to32:
Keith Randall91059de2015-08-31 16:26:12 -07001027 // make second starting seed
1028 PXOR runtime·aeskeysched+16(SB), X1
1029 AESENC X1, X1
1030
Keith Randall7a4a64e2014-12-10 14:20:17 -08001031 // load data to be hashed
Keith Randall91059de2015-08-31 16:26:12 -07001032 MOVOU (AX), X2
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +13001033 MOVOU -16(AX)(BX*1), X3
Keith Randall7a4a64e2014-12-10 14:20:17 -08001034
1035 // scramble 3 times
Keith Randall91059de2015-08-31 16:26:12 -07001036 AESENC X0, X2
1037 AESENC X1, X3
1038 AESENC X2, X2
1039 AESENC X3, X3
1040 AESENC X2, X2
1041 AESENC X3, X3
Keith Randall7a4a64e2014-12-10 14:20:17 -08001042
1043 // combine results
Keith Randall91059de2015-08-31 16:26:12 -07001044 PXOR X3, X2
1045 MOVL X2, (DX)
Keith Randall7a4a64e2014-12-10 14:20:17 -08001046 RET
1047
1048aes33to64:
Keith Randall91059de2015-08-31 16:26:12 -07001049 // make 3 more starting seeds
1050 MOVO X1, X2
1051 MOVO X1, X3
1052 PXOR runtime·aeskeysched+16(SB), X1
1053 PXOR runtime·aeskeysched+32(SB), X2
1054 PXOR runtime·aeskeysched+48(SB), X3
1055 AESENC X1, X1
1056 AESENC X2, X2
1057 AESENC X3, X3
Keith Randall7a4a64e2014-12-10 14:20:17 -08001058
Keith Randall91059de2015-08-31 16:26:12 -07001059 MOVOU (AX), X4
1060 MOVOU 16(AX), X5
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +13001061 MOVOU -32(AX)(BX*1), X6
1062 MOVOU -16(AX)(BX*1), X7
Keith Randall91059de2015-08-31 16:26:12 -07001063
1064 AESENC X0, X4
1065 AESENC X1, X5
1066 AESENC X2, X6
1067 AESENC X3, X7
1068
1069 AESENC X4, X4
1070 AESENC X5, X5
1071 AESENC X6, X6
1072 AESENC X7, X7
1073
1074 AESENC X4, X4
1075 AESENC X5, X5
1076 AESENC X6, X6
1077 AESENC X7, X7
Keith Randall7a4a64e2014-12-10 14:20:17 -08001078
Keith Randall91059de2015-08-31 16:26:12 -07001079 PXOR X6, X4
1080 PXOR X7, X5
1081 PXOR X5, X4
1082 MOVL X4, (DX)
Keith Randall7a4a64e2014-12-10 14:20:17 -08001083 RET
1084
1085aes65plus:
Keith Randall91059de2015-08-31 16:26:12 -07001086 // make 3 more starting seeds
1087 MOVO X1, X2
1088 MOVO X1, X3
1089 PXOR runtime·aeskeysched+16(SB), X1
1090 PXOR runtime·aeskeysched+32(SB), X2
1091 PXOR runtime·aeskeysched+48(SB), X3
1092 AESENC X1, X1
1093 AESENC X2, X2
1094 AESENC X3, X3
1095
Keith Randall7a4a64e2014-12-10 14:20:17 -08001096 // start with last (possibly overlapping) block
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +13001097 MOVOU -64(AX)(BX*1), X4
1098 MOVOU -48(AX)(BX*1), X5
1099 MOVOU -32(AX)(BX*1), X6
1100 MOVOU -16(AX)(BX*1), X7
Keith Randall7a4a64e2014-12-10 14:20:17 -08001101
1102 // scramble state once
Keith Randall91059de2015-08-31 16:26:12 -07001103 AESENC X0, X4
1104 AESENC X1, X5
1105 AESENC X2, X6
1106 AESENC X3, X7
Keith Randall7a4a64e2014-12-10 14:20:17 -08001107
1108 // compute number of remaining 64-byte blocks
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +13001109 DECL BX
1110 SHRL $6, BX
Keith Randall7a4a64e2014-12-10 14:20:17 -08001111
1112aesloop:
1113 // scramble state, xor in a block
Keith Randall91059de2015-08-31 16:26:12 -07001114 MOVOU (AX), X0
1115 MOVOU 16(AX), X1
1116 MOVOU 32(AX), X2
1117 MOVOU 48(AX), X3
1118 AESENC X0, X4
1119 AESENC X1, X5
1120 AESENC X2, X6
1121 AESENC X3, X7
Keith Randall7a4a64e2014-12-10 14:20:17 -08001122
1123 // scramble state
Keith Randall91059de2015-08-31 16:26:12 -07001124 AESENC X4, X4
1125 AESENC X5, X5
1126 AESENC X6, X6
1127 AESENC X7, X7
Keith Randall7a4a64e2014-12-10 14:20:17 -08001128
1129 ADDL $64, AX
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +13001130 DECL BX
Keith Randall7a4a64e2014-12-10 14:20:17 -08001131 JNE aesloop
1132
1133 // 2 more scrambles to finish
Keith Randall91059de2015-08-31 16:26:12 -07001134 AESENC X4, X4
1135 AESENC X5, X5
1136 AESENC X6, X6
1137 AESENC X7, X7
1138
1139 AESENC X4, X4
1140 AESENC X5, X5
1141 AESENC X6, X6
1142 AESENC X7, X7
Keith Randall7a4a64e2014-12-10 14:20:17 -08001143
Keith Randall91059de2015-08-31 16:26:12 -07001144 PXOR X6, X4
1145 PXOR X7, X5
1146 PXOR X5, X4
1147 MOVL X4, (DX)
Keith Randalla5d40242013-03-12 10:47:44 -07001148 RET
1149
Keith Randalld5e4c402015-01-06 16:42:48 -08001150TEXT runtime·aeshash32(SB),NOSPLIT,$0-12
Keith Randalla2a97682014-07-31 15:07:05 -07001151 MOVL p+0(FP), AX // ptr to data
Keith Randalld5e4c402015-01-06 16:42:48 -08001152 MOVL h+4(FP), X0 // seed
Keith Randalla5d40242013-03-12 10:47:44 -07001153 PINSRD $1, (AX), X0 // data
Keith Randalldb53d972013-03-20 14:34:26 -07001154 AESENC runtime·aeskeysched+0(SB), X0
1155 AESENC runtime·aeskeysched+16(SB), X0
Keith Randall7a4a64e2014-12-10 14:20:17 -08001156 AESENC runtime·aeskeysched+32(SB), X0
Keith Randalld5e4c402015-01-06 16:42:48 -08001157 MOVL X0, ret+8(FP)
Keith Randalla5d40242013-03-12 10:47:44 -07001158 RET
1159
Keith Randalld5e4c402015-01-06 16:42:48 -08001160TEXT runtime·aeshash64(SB),NOSPLIT,$0-12
Keith Randalla2a97682014-07-31 15:07:05 -07001161 MOVL p+0(FP), AX // ptr to data
Keith Randalla5d40242013-03-12 10:47:44 -07001162 MOVQ (AX), X0 // data
Keith Randalld5e4c402015-01-06 16:42:48 -08001163 PINSRD $2, h+4(FP), X0 // seed
Keith Randalldb53d972013-03-20 14:34:26 -07001164 AESENC runtime·aeskeysched+0(SB), X0
1165 AESENC runtime·aeskeysched+16(SB), X0
Keith Randall7a4a64e2014-12-10 14:20:17 -08001166 AESENC runtime·aeskeysched+32(SB), X0
Keith Randalld5e4c402015-01-06 16:42:48 -08001167 MOVL X0, ret+8(FP)
Keith Randalla5d40242013-03-12 10:47:44 -07001168 RET
1169
Keith Randalla5d40242013-03-12 10:47:44 -07001170// simple mask to get rid of data in the high part of the register.
Russ Cox9ddfb642013-07-16 16:24:09 -04001171DATA masks<>+0x00(SB)/4, $0x00000000
1172DATA masks<>+0x04(SB)/4, $0x00000000
1173DATA masks<>+0x08(SB)/4, $0x00000000
1174DATA masks<>+0x0c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001175
Russ Cox9ddfb642013-07-16 16:24:09 -04001176DATA masks<>+0x10(SB)/4, $0x000000ff
1177DATA masks<>+0x14(SB)/4, $0x00000000
1178DATA masks<>+0x18(SB)/4, $0x00000000
1179DATA masks<>+0x1c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001180
Russ Cox9ddfb642013-07-16 16:24:09 -04001181DATA masks<>+0x20(SB)/4, $0x0000ffff
1182DATA masks<>+0x24(SB)/4, $0x00000000
1183DATA masks<>+0x28(SB)/4, $0x00000000
1184DATA masks<>+0x2c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001185
Russ Cox9ddfb642013-07-16 16:24:09 -04001186DATA masks<>+0x30(SB)/4, $0x00ffffff
1187DATA masks<>+0x34(SB)/4, $0x00000000
1188DATA masks<>+0x38(SB)/4, $0x00000000
1189DATA masks<>+0x3c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001190
Russ Cox9ddfb642013-07-16 16:24:09 -04001191DATA masks<>+0x40(SB)/4, $0xffffffff
1192DATA masks<>+0x44(SB)/4, $0x00000000
1193DATA masks<>+0x48(SB)/4, $0x00000000
1194DATA masks<>+0x4c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001195
Russ Cox9ddfb642013-07-16 16:24:09 -04001196DATA masks<>+0x50(SB)/4, $0xffffffff
1197DATA masks<>+0x54(SB)/4, $0x000000ff
1198DATA masks<>+0x58(SB)/4, $0x00000000
1199DATA masks<>+0x5c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001200
Russ Cox9ddfb642013-07-16 16:24:09 -04001201DATA masks<>+0x60(SB)/4, $0xffffffff
1202DATA masks<>+0x64(SB)/4, $0x0000ffff
1203DATA masks<>+0x68(SB)/4, $0x00000000
1204DATA masks<>+0x6c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001205
Russ Cox9ddfb642013-07-16 16:24:09 -04001206DATA masks<>+0x70(SB)/4, $0xffffffff
1207DATA masks<>+0x74(SB)/4, $0x00ffffff
1208DATA masks<>+0x78(SB)/4, $0x00000000
1209DATA masks<>+0x7c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001210
Russ Cox9ddfb642013-07-16 16:24:09 -04001211DATA masks<>+0x80(SB)/4, $0xffffffff
1212DATA masks<>+0x84(SB)/4, $0xffffffff
1213DATA masks<>+0x88(SB)/4, $0x00000000
1214DATA masks<>+0x8c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001215
Russ Cox9ddfb642013-07-16 16:24:09 -04001216DATA masks<>+0x90(SB)/4, $0xffffffff
1217DATA masks<>+0x94(SB)/4, $0xffffffff
1218DATA masks<>+0x98(SB)/4, $0x000000ff
1219DATA masks<>+0x9c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001220
Russ Cox9ddfb642013-07-16 16:24:09 -04001221DATA masks<>+0xa0(SB)/4, $0xffffffff
1222DATA masks<>+0xa4(SB)/4, $0xffffffff
1223DATA masks<>+0xa8(SB)/4, $0x0000ffff
1224DATA masks<>+0xac(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001225
Russ Cox9ddfb642013-07-16 16:24:09 -04001226DATA masks<>+0xb0(SB)/4, $0xffffffff
1227DATA masks<>+0xb4(SB)/4, $0xffffffff
1228DATA masks<>+0xb8(SB)/4, $0x00ffffff
1229DATA masks<>+0xbc(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001230
Russ Cox9ddfb642013-07-16 16:24:09 -04001231DATA masks<>+0xc0(SB)/4, $0xffffffff
1232DATA masks<>+0xc4(SB)/4, $0xffffffff
1233DATA masks<>+0xc8(SB)/4, $0xffffffff
1234DATA masks<>+0xcc(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001235
Russ Cox9ddfb642013-07-16 16:24:09 -04001236DATA masks<>+0xd0(SB)/4, $0xffffffff
1237DATA masks<>+0xd4(SB)/4, $0xffffffff
1238DATA masks<>+0xd8(SB)/4, $0xffffffff
1239DATA masks<>+0xdc(SB)/4, $0x000000ff
Keith Randalla5d40242013-03-12 10:47:44 -07001240
Russ Cox9ddfb642013-07-16 16:24:09 -04001241DATA masks<>+0xe0(SB)/4, $0xffffffff
1242DATA masks<>+0xe4(SB)/4, $0xffffffff
1243DATA masks<>+0xe8(SB)/4, $0xffffffff
1244DATA masks<>+0xec(SB)/4, $0x0000ffff
Keith Randalla5d40242013-03-12 10:47:44 -07001245
Russ Cox9ddfb642013-07-16 16:24:09 -04001246DATA masks<>+0xf0(SB)/4, $0xffffffff
1247DATA masks<>+0xf4(SB)/4, $0xffffffff
1248DATA masks<>+0xf8(SB)/4, $0xffffffff
1249DATA masks<>+0xfc(SB)/4, $0x00ffffff
Keith Randalla5d40242013-03-12 10:47:44 -07001250
Keith Randall5a546962013-08-07 10:23:24 -07001251GLOBL masks<>(SB),RODATA,$256
Keith Randalla5d40242013-03-12 10:47:44 -07001252
Brad Fitzpatrick5fea2cc2016-03-01 23:21:55 +00001253// these are arguments to pshufb. They move data down from
Russ Cox9ddfb642013-07-16 16:24:09 -04001254// the high bytes of the register to the low bytes of the register.
1255// index is how many bytes to move.
1256DATA shifts<>+0x00(SB)/4, $0x00000000
1257DATA shifts<>+0x04(SB)/4, $0x00000000
1258DATA shifts<>+0x08(SB)/4, $0x00000000
1259DATA shifts<>+0x0c(SB)/4, $0x00000000
1260
1261DATA shifts<>+0x10(SB)/4, $0xffffff0f
1262DATA shifts<>+0x14(SB)/4, $0xffffffff
1263DATA shifts<>+0x18(SB)/4, $0xffffffff
1264DATA shifts<>+0x1c(SB)/4, $0xffffffff
1265
1266DATA shifts<>+0x20(SB)/4, $0xffff0f0e
1267DATA shifts<>+0x24(SB)/4, $0xffffffff
1268DATA shifts<>+0x28(SB)/4, $0xffffffff
1269DATA shifts<>+0x2c(SB)/4, $0xffffffff
1270
1271DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
1272DATA shifts<>+0x34(SB)/4, $0xffffffff
1273DATA shifts<>+0x38(SB)/4, $0xffffffff
1274DATA shifts<>+0x3c(SB)/4, $0xffffffff
1275
1276DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
1277DATA shifts<>+0x44(SB)/4, $0xffffffff
1278DATA shifts<>+0x48(SB)/4, $0xffffffff
1279DATA shifts<>+0x4c(SB)/4, $0xffffffff
1280
1281DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
1282DATA shifts<>+0x54(SB)/4, $0xffffff0f
1283DATA shifts<>+0x58(SB)/4, $0xffffffff
1284DATA shifts<>+0x5c(SB)/4, $0xffffffff
1285
1286DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
1287DATA shifts<>+0x64(SB)/4, $0xffff0f0e
1288DATA shifts<>+0x68(SB)/4, $0xffffffff
1289DATA shifts<>+0x6c(SB)/4, $0xffffffff
1290
1291DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
1292DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
1293DATA shifts<>+0x78(SB)/4, $0xffffffff
1294DATA shifts<>+0x7c(SB)/4, $0xffffffff
1295
1296DATA shifts<>+0x80(SB)/4, $0x0b0a0908
1297DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
1298DATA shifts<>+0x88(SB)/4, $0xffffffff
1299DATA shifts<>+0x8c(SB)/4, $0xffffffff
1300
1301DATA shifts<>+0x90(SB)/4, $0x0a090807
1302DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
1303DATA shifts<>+0x98(SB)/4, $0xffffff0f
1304DATA shifts<>+0x9c(SB)/4, $0xffffffff
1305
1306DATA shifts<>+0xa0(SB)/4, $0x09080706
1307DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
1308DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
1309DATA shifts<>+0xac(SB)/4, $0xffffffff
1310
1311DATA shifts<>+0xb0(SB)/4, $0x08070605
1312DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
1313DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
1314DATA shifts<>+0xbc(SB)/4, $0xffffffff
1315
1316DATA shifts<>+0xc0(SB)/4, $0x07060504
1317DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
1318DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
1319DATA shifts<>+0xcc(SB)/4, $0xffffffff
1320
1321DATA shifts<>+0xd0(SB)/4, $0x06050403
1322DATA shifts<>+0xd4(SB)/4, $0x0a090807
1323DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
1324DATA shifts<>+0xdc(SB)/4, $0xffffff0f
1325
1326DATA shifts<>+0xe0(SB)/4, $0x05040302
1327DATA shifts<>+0xe4(SB)/4, $0x09080706
1328DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
1329DATA shifts<>+0xec(SB)/4, $0xffff0f0e
1330
1331DATA shifts<>+0xf0(SB)/4, $0x04030201
1332DATA shifts<>+0xf4(SB)/4, $0x08070605
1333DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
1334DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
1335
Keith Randall5a546962013-08-07 10:23:24 -07001336GLOBL shifts<>(SB),RODATA,$256
Russ Cox9ddfb642013-07-16 16:24:09 -04001337
Shenghou Ma3583a442015-09-03 02:44:26 -04001338TEXT ·checkASM(SB),NOSPLIT,$0-1
1339 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1340 MOVL $masks<>(SB), AX
1341 MOVL $shifts<>(SB), BX
1342 ORL BX, AX
1343 TESTL $15, AX
1344 SETEQ ret+0(FP)
1345 RET
1346
Keith Randallbd70bd92016-02-22 13:20:38 -08001347// memequal(p, q unsafe.Pointer, size uintptr) bool
1348TEXT runtime·memequal(SB),NOSPLIT,$0-13
Keith Randall0c6b55e2014-07-16 14:16:19 -07001349 MOVL a+0(FP), SI
1350 MOVL b+4(FP), DI
Keith Randallbd70bd92016-02-22 13:20:38 -08001351 CMPL SI, DI
1352 JEQ eq
Keith Randall0c6b55e2014-07-16 14:16:19 -07001353 MOVL size+8(FP), BX
Keith Randallc526f3a2015-04-21 14:22:41 -07001354 LEAL ret+12(FP), AX
1355 JMP runtime·memeqbody(SB)
Keith Randallbd70bd92016-02-22 13:20:38 -08001356eq:
1357 MOVB $1, ret+12(FP)
1358 RET
Keith Randall0c6b55e2014-07-16 14:16:19 -07001359
Keith Randalld5e4c402015-01-06 16:42:48 -08001360// memequal_varlen(a, b unsafe.Pointer) bool
1361TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
1362 MOVL a+0(FP), SI
1363 MOVL b+4(FP), DI
1364 CMPL SI, DI
1365 JEQ eq
1366 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure
Keith Randallc526f3a2015-04-21 14:22:41 -07001367 LEAL ret+8(FP), AX
1368 JMP runtime·memeqbody(SB)
Keith Randalld5e4c402015-01-06 16:42:48 -08001369eq:
1370 MOVB $1, ret+8(FP)
1371 RET
1372
Keith Randall5a546962013-08-07 10:23:24 -07001373TEXT bytes·Equal(SB),NOSPLIT,$0-25
Keith Randall3d5daa22013-04-02 16:26:15 -07001374 MOVL a_len+4(FP), BX
1375 MOVL b_len+16(FP), CX
Keith Randall3d5daa22013-04-02 16:26:15 -07001376 CMPL BX, CX
1377 JNE eqret
1378 MOVL a+0(FP), SI
1379 MOVL b+12(FP), DI
Keith Randallc526f3a2015-04-21 14:22:41 -07001380 LEAL ret+24(FP), AX
1381 JMP runtime·memeqbody(SB)
Keith Randall3d5daa22013-04-02 16:26:15 -07001382eqret:
Keith Randallc526f3a2015-04-21 14:22:41 -07001383 MOVB $0, ret+24(FP)
Keith Randall3d5daa22013-04-02 16:26:15 -07001384 RET
1385
1386// a in SI
1387// b in DI
1388// count in BX
Keith Randallc526f3a2015-04-21 14:22:41 -07001389// address of result byte in AX
Keith Randall5a546962013-08-07 10:23:24 -07001390TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
Keith Randall3d5daa22013-04-02 16:26:15 -07001391 CMPL BX, $4
1392 JB small
1393
1394 // 64 bytes at a time using xmm registers
1395hugeloop:
1396 CMPL BX, $64
1397 JB bigloop
Martin Möhrmann5a6c5802017-04-27 08:30:27 +02001398 CMPB runtime·support_sse2(SB), $1
1399 JNE bigloop
Keith Randall3d5daa22013-04-02 16:26:15 -07001400 MOVOU (SI), X0
1401 MOVOU (DI), X1
1402 MOVOU 16(SI), X2
1403 MOVOU 16(DI), X3
1404 MOVOU 32(SI), X4
1405 MOVOU 32(DI), X5
1406 MOVOU 48(SI), X6
1407 MOVOU 48(DI), X7
1408 PCMPEQB X1, X0
1409 PCMPEQB X3, X2
1410 PCMPEQB X5, X4
1411 PCMPEQB X7, X6
1412 PAND X2, X0
1413 PAND X6, X4
1414 PAND X4, X0
1415 PMOVMSKB X0, DX
1416 ADDL $64, SI
1417 ADDL $64, DI
1418 SUBL $64, BX
1419 CMPL DX, $0xffff
1420 JEQ hugeloop
Keith Randallc526f3a2015-04-21 14:22:41 -07001421 MOVB $0, (AX)
Keith Randall3d5daa22013-04-02 16:26:15 -07001422 RET
1423
1424 // 4 bytes at a time using 32-bit register
1425bigloop:
1426 CMPL BX, $4
1427 JBE leftover
1428 MOVL (SI), CX
1429 MOVL (DI), DX
1430 ADDL $4, SI
1431 ADDL $4, DI
1432 SUBL $4, BX
1433 CMPL CX, DX
1434 JEQ bigloop
Keith Randallc526f3a2015-04-21 14:22:41 -07001435 MOVB $0, (AX)
Keith Randall3d5daa22013-04-02 16:26:15 -07001436 RET
1437
1438 // remaining 0-4 bytes
1439leftover:
1440 MOVL -4(SI)(BX*1), CX
1441 MOVL -4(DI)(BX*1), DX
1442 CMPL CX, DX
Keith Randallc526f3a2015-04-21 14:22:41 -07001443 SETEQ (AX)
Keith Randall3d5daa22013-04-02 16:26:15 -07001444 RET
1445
1446small:
1447 CMPL BX, $0
1448 JEQ equal
1449
1450 LEAL 0(BX*8), CX
1451 NEGL CX
1452
1453 MOVL SI, DX
1454 CMPB DX, $0xfc
1455 JA si_high
1456
1457 // load at SI won't cross a page boundary.
1458 MOVL (SI), SI
1459 JMP si_finish
1460si_high:
Brad Fitzpatrick5fea2cc2016-03-01 23:21:55 +00001461 // address ends in 111111xx. Load up to bytes we want, move to correct position.
Keith Randall3d5daa22013-04-02 16:26:15 -07001462 MOVL -4(SI)(BX*1), SI
1463 SHRL CX, SI
1464si_finish:
1465
1466 // same for DI.
1467 MOVL DI, DX
1468 CMPB DX, $0xfc
1469 JA di_high
1470 MOVL (DI), DI
1471 JMP di_finish
1472di_high:
1473 MOVL -4(DI)(BX*1), DI
1474 SHRL CX, DI
1475di_finish:
1476
1477 SUBL SI, DI
1478 SHLL CX, DI
1479equal:
Keith Randallc526f3a2015-04-21 14:22:41 -07001480 SETEQ (AX)
Keith Randall3d5daa22013-04-02 16:26:15 -07001481 RET
Keith Randallb3946dc2013-05-14 16:05:51 -07001482
Keith Randall5a546962013-08-07 10:23:24 -07001483TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
Russ Cox25f6b022014-08-27 11:32:17 -04001484 MOVL s1_base+0(FP), SI
1485 MOVL s1_len+4(FP), BX
1486 MOVL s2_base+8(FP), DI
1487 MOVL s2_len+12(FP), DX
Keith Randallc526f3a2015-04-21 14:22:41 -07001488 LEAL ret+16(FP), AX
1489 JMP runtime·cmpbody(SB)
Keith Randallb3946dc2013-05-14 16:05:51 -07001490
Russ Cox7a524a12014-12-22 13:27:53 -05001491TEXT bytes·Compare(SB),NOSPLIT,$0-28
Keith Randallb3946dc2013-05-14 16:05:51 -07001492 MOVL s1+0(FP), SI
1493 MOVL s1+4(FP), BX
1494 MOVL s2+12(FP), DI
1495 MOVL s2+16(FP), DX
Keith Randallc526f3a2015-04-21 14:22:41 -07001496 LEAL ret+24(FP), AX
1497 JMP runtime·cmpbody(SB)
Keith Randallb3946dc2013-05-14 16:05:51 -07001498
Shenghou Ma3b001972015-03-07 00:18:16 -05001499TEXT bytes·IndexByte(SB),NOSPLIT,$0-20
Brad Fitzpatricke2a1bd62013-08-01 16:11:19 -07001500 MOVL s+0(FP), SI
1501 MOVL s_len+4(FP), CX
1502 MOVB c+12(FP), AL
1503 MOVL SI, DI
1504 CLD; REPN; SCASB
1505 JZ 3(PC)
1506 MOVL $-1, ret+16(FP)
1507 RET
1508 SUBL SI, DI
1509 SUBL $1, DI
1510 MOVL DI, ret+16(FP)
1511 RET
1512
Shenghou Ma3b001972015-03-07 00:18:16 -05001513TEXT strings·IndexByte(SB),NOSPLIT,$0-16
Brad Fitzpatrick598c7892013-08-05 15:04:05 -07001514 MOVL s+0(FP), SI
1515 MOVL s_len+4(FP), CX
1516 MOVB c+8(FP), AL
1517 MOVL SI, DI
1518 CLD; REPN; SCASB
1519 JZ 3(PC)
1520 MOVL $-1, ret+12(FP)
1521 RET
1522 SUBL SI, DI
1523 SUBL $1, DI
1524 MOVL DI, ret+12(FP)
1525 RET
1526
Keith Randallb3946dc2013-05-14 16:05:51 -07001527// input:
1528// SI = a
1529// DI = b
1530// BX = alen
1531// DX = blen
Keith Randallc526f3a2015-04-21 14:22:41 -07001532// AX = address of return word (set to 1/0/-1)
Keith Randall5a546962013-08-07 10:23:24 -07001533TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
Keith Randallc526f3a2015-04-21 14:22:41 -07001534 MOVL DX, BP
1535 SUBL BX, DX // DX = blen-alen
Keith Randall8c9ef9d2016-01-13 13:09:46 -08001536 JLE 2(PC)
1537 MOVL BX, BP // BP = min(alen, blen)
Keith Randallb3946dc2013-05-14 16:05:51 -07001538 CMPL SI, DI
Russ Coxb55791e2014-10-28 21:50:16 -04001539 JEQ allsame
Keith Randallb3946dc2013-05-14 16:05:51 -07001540 CMPL BP, $4
Russ Coxb55791e2014-10-28 21:50:16 -04001541 JB small
Martin Möhrmann5a6c5802017-04-27 08:30:27 +02001542 CMPB runtime·support_sse2(SB), $1
1543 JNE mediumloop
Russ Coxb55791e2014-10-28 21:50:16 -04001544largeloop:
Keith Randallb3946dc2013-05-14 16:05:51 -07001545 CMPL BP, $16
Russ Coxb55791e2014-10-28 21:50:16 -04001546 JB mediumloop
Keith Randallb3946dc2013-05-14 16:05:51 -07001547 MOVOU (SI), X0
1548 MOVOU (DI), X1
1549 PCMPEQB X0, X1
Keith Randallc526f3a2015-04-21 14:22:41 -07001550 PMOVMSKB X1, BX
1551 XORL $0xffff, BX // convert EQ to NE
Russ Coxb55791e2014-10-28 21:50:16 -04001552 JNE diff16 // branch if at least one byte is not equal
Keith Randallb3946dc2013-05-14 16:05:51 -07001553 ADDL $16, SI
1554 ADDL $16, DI
1555 SUBL $16, BP
Russ Coxb55791e2014-10-28 21:50:16 -04001556 JMP largeloop
Keith Randallb3946dc2013-05-14 16:05:51 -07001557
Russ Coxb55791e2014-10-28 21:50:16 -04001558diff16:
Keith Randallc526f3a2015-04-21 14:22:41 -07001559 BSFL BX, BX // index of first byte that differs
1560 XORL DX, DX
Keith Randallb3946dc2013-05-14 16:05:51 -07001561 MOVB (SI)(BX*1), CX
1562 CMPB CX, (DI)(BX*1)
Keith Randallc526f3a2015-04-21 14:22:41 -07001563 SETHI DX
1564 LEAL -1(DX*2), DX // convert 1/0 to +1/-1
1565 MOVL DX, (AX)
Keith Randallb3946dc2013-05-14 16:05:51 -07001566 RET
1567
Russ Coxb55791e2014-10-28 21:50:16 -04001568mediumloop:
Keith Randallb3946dc2013-05-14 16:05:51 -07001569 CMPL BP, $4
Russ Coxb55791e2014-10-28 21:50:16 -04001570 JBE _0through4
Keith Randallc526f3a2015-04-21 14:22:41 -07001571 MOVL (SI), BX
Keith Randallb3946dc2013-05-14 16:05:51 -07001572 MOVL (DI), CX
Keith Randallc526f3a2015-04-21 14:22:41 -07001573 CMPL BX, CX
Russ Coxb55791e2014-10-28 21:50:16 -04001574 JNE diff4
Keith Randallb3946dc2013-05-14 16:05:51 -07001575 ADDL $4, SI
1576 ADDL $4, DI
1577 SUBL $4, BP
Russ Coxb55791e2014-10-28 21:50:16 -04001578 JMP mediumloop
Keith Randallb3946dc2013-05-14 16:05:51 -07001579
Russ Coxb55791e2014-10-28 21:50:16 -04001580_0through4:
Keith Randallc526f3a2015-04-21 14:22:41 -07001581 MOVL -4(SI)(BP*1), BX
Keith Randallb3946dc2013-05-14 16:05:51 -07001582 MOVL -4(DI)(BP*1), CX
Keith Randallc526f3a2015-04-21 14:22:41 -07001583 CMPL BX, CX
Russ Coxb55791e2014-10-28 21:50:16 -04001584 JEQ allsame
Keith Randallb3946dc2013-05-14 16:05:51 -07001585
Russ Coxb55791e2014-10-28 21:50:16 -04001586diff4:
Keith Randallc526f3a2015-04-21 14:22:41 -07001587 BSWAPL BX // reverse order of bytes
Keith Randallb3946dc2013-05-14 16:05:51 -07001588 BSWAPL CX
Keith Randallc526f3a2015-04-21 14:22:41 -07001589 XORL BX, CX // find bit differences
Keith Randallb3946dc2013-05-14 16:05:51 -07001590 BSRL CX, CX // index of highest bit difference
Keith Randallc526f3a2015-04-21 14:22:41 -07001591 SHRL CX, BX // move a's bit to bottom
1592 ANDL $1, BX // mask bit
1593 LEAL -1(BX*2), BX // 1/0 => +1/-1
1594 MOVL BX, (AX)
Keith Randallb3946dc2013-05-14 16:05:51 -07001595 RET
1596
1597 // 0-3 bytes in common
Russ Coxb55791e2014-10-28 21:50:16 -04001598small:
Keith Randallb3946dc2013-05-14 16:05:51 -07001599 LEAL (BP*8), CX
1600 NEGL CX
Russ Coxb55791e2014-10-28 21:50:16 -04001601 JEQ allsame
Keith Randallb3946dc2013-05-14 16:05:51 -07001602
1603 // load si
1604 CMPB SI, $0xfc
Russ Coxb55791e2014-10-28 21:50:16 -04001605 JA si_high
Keith Randallb3946dc2013-05-14 16:05:51 -07001606 MOVL (SI), SI
Russ Coxb55791e2014-10-28 21:50:16 -04001607 JMP si_finish
1608si_high:
Keith Randallb3946dc2013-05-14 16:05:51 -07001609 MOVL -4(SI)(BP*1), SI
1610 SHRL CX, SI
Russ Coxb55791e2014-10-28 21:50:16 -04001611si_finish:
Keith Randallb3946dc2013-05-14 16:05:51 -07001612 SHLL CX, SI
1613
1614 // same for di
1615 CMPB DI, $0xfc
Russ Coxb55791e2014-10-28 21:50:16 -04001616 JA di_high
Keith Randallb3946dc2013-05-14 16:05:51 -07001617 MOVL (DI), DI
Russ Coxb55791e2014-10-28 21:50:16 -04001618 JMP di_finish
1619di_high:
Keith Randallb3946dc2013-05-14 16:05:51 -07001620 MOVL -4(DI)(BP*1), DI
1621 SHRL CX, DI
Russ Coxb55791e2014-10-28 21:50:16 -04001622di_finish:
Keith Randallb3946dc2013-05-14 16:05:51 -07001623 SHLL CX, DI
1624
1625 BSWAPL SI // reverse order of bytes
1626 BSWAPL DI
1627 XORL SI, DI // find bit differences
Russ Coxb55791e2014-10-28 21:50:16 -04001628 JEQ allsame
Keith Randallb3946dc2013-05-14 16:05:51 -07001629 BSRL DI, CX // index of highest bit difference
1630 SHRL CX, SI // move a's bit to bottom
1631 ANDL $1, SI // mask bit
Keith Randallc526f3a2015-04-21 14:22:41 -07001632 LEAL -1(SI*2), BX // 1/0 => +1/-1
1633 MOVL BX, (AX)
Keith Randallb3946dc2013-05-14 16:05:51 -07001634 RET
1635
1636 // all the bytes in common are the same, so we just need
1637 // to compare the lengths.
Russ Coxb55791e2014-10-28 21:50:16 -04001638allsame:
Keith Randallc526f3a2015-04-21 14:22:41 -07001639 XORL BX, BX
Keith Randallb3946dc2013-05-14 16:05:51 -07001640 XORL CX, CX
Keith Randallc526f3a2015-04-21 14:22:41 -07001641 TESTL DX, DX
1642 SETLT BX // 1 if alen > blen
Keith Randallb3946dc2013-05-14 16:05:51 -07001643 SETEQ CX // 1 if alen == blen
Keith Randallc526f3a2015-04-21 14:22:41 -07001644 LEAL -1(CX)(BX*2), BX // 1,0,-1 result
1645 MOVL BX, (AX)
Keith Randallb3946dc2013-05-14 16:05:51 -07001646 RET
Keith Randall6c7cbf02014-04-01 12:51:02 -07001647
Keith Randallf4407372014-09-03 08:49:43 -07001648TEXT runtime·return0(SB), NOSPLIT, $0
1649 MOVL $0, AX
1650 RET
Keith Randall1b6807b2014-09-25 07:59:01 -07001651
1652// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1653// Must obey the gcc calling convention.
Keith Randall1aa65fe2014-09-25 08:37:04 -07001654TEXT _cgo_topofstack(SB),NOSPLIT,$0
Keith Randall1b6807b2014-09-25 07:59:01 -07001655 get_tls(CX)
1656 MOVL g(CX), AX
1657 MOVL g_m(AX), AX
1658 MOVL m_curg(AX), AX
1659 MOVL (g_stack+stack_hi)(AX), AX
1660 RET
Russ Coxa5a07332014-10-29 20:37:44 -04001661
1662// The top-most function running on a goroutine
1663// returns to goexit+PCQuantum.
1664TEXT runtime·goexit(SB),NOSPLIT,$0-0
1665 BYTE $0x90 // NOP
1666 CALL runtime·goexit1(SB) // does not return
Dmitry Vyukov894024f2015-02-20 20:07:02 +03001667 // traceback from goexit1 must hit code range of goexit
1668 BYTE $0x90 // NOP
Russ Cox15ced2d2014-11-11 17:06:22 -05001669
Brad Fitzpatrick5fea2cc2016-03-01 23:21:55 +00001670// Add a module's moduledata to the linked list of moduledata objects. This
Michael Hudson-Doyle09d7de82015-10-28 12:15:43 +13001671// is called from .init_array by a function generated in the linker and so
1672// follows the platform ABI wrt register preservation -- it only touches AX,
1673// CX (implicitly) and DX, but it does not follow the ABI wrt arguments:
1674// instead the pointer to the moduledata is passed in AX.
1675TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1676 MOVL runtime·lastmoduledatap(SB), DX
1677 MOVL AX, moduledata_next(DX)
1678 MOVL AX, runtime·lastmoduledatap(SB)
1679 RET
Keith Randalldf2f8132016-07-21 10:37:59 -07001680
Keith Randallc069bc42016-07-26 11:51:33 -07001681TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12
Keith Randalldf2f8132016-07-21 10:37:59 -07001682 MOVL a+0(FP), AX
Keith Randallc069bc42016-07-26 11:51:33 -07001683 MOVL AX, 0(SP)
1684 MOVL $0, 4(SP)
1685 FMOVV 0(SP), F0
1686 FMOVDP F0, ret+4(FP)
Keith Randalldf2f8132016-07-21 10:37:59 -07001687 RET
1688
Keith Randallc069bc42016-07-26 11:51:33 -07001689TEXT runtime·float64touint32(SB),NOSPLIT,$12-12
1690 FMOVD a+0(FP), F0
1691 FSTCW 0(SP)
1692 FLDCW runtime·controlWord64trunc(SB)
1693 FMOVVP F0, 4(SP)
1694 FLDCW 0(SP)
1695 MOVL 4(SP), AX
Keith Randalldf2f8132016-07-21 10:37:59 -07001696 MOVL AX, ret+8(FP)
1697 RET