blob: dc5db91ea856eb7e803f5111636df2c274f0c2fe [file] [log] [blame]
Russ Cox0d3a0432009-03-30 00:01:07 -07001// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
Russ Cox15ced2d2014-11-11 17:06:22 -05005#include "go_asm.h"
6#include "go_tls.h"
Russ Cox9ddfb642013-07-16 16:24:09 -04007#include "funcdata.h"
Russ Coxcb040d52014-09-04 23:05:18 -04008#include "textflag.h"
Russ Cox8522a472009-06-17 15:15:55 -07009
Russ Cox7ba41e92014-09-03 11:11:16 -040010TEXT runtime·rt0_go(SB),NOSPLIT,$0
Russ Cox0d3a0432009-03-30 00:01:07 -070011 // copy arguments forward on an even stack
Russ Coxdfc22e292013-03-07 19:57:10 -080012 MOVL argc+0(FP), AX
13 MOVL argv+4(FP), BX
Russ Cox0d3a0432009-03-30 00:01:07 -070014 SUBL $128, SP // plenty of scratch
Russ Cox133a1582009-10-03 10:37:12 -070015 ANDL $~15, SP
Russ Cox0d3a0432009-03-30 00:01:07 -070016 MOVL AX, 120(SP) // save argc, argv away
17 MOVL BX, 124(SP)
18
Dmitriy Vyukov428062d2011-12-07 16:53:17 +030019 // set default stack bounds.
Russ Coxf8d49b52013-02-28 16:24:38 -050020 // _cgo_init may update stackguard.
Dmitriy Vyukov428062d2011-12-07 16:53:17 +030021 MOVL $runtime·g0(SB), BP
22 LEAL (-64*1024+104)(SP), BX
Russ Coxe6d35112015-01-05 16:29:21 +000023 MOVL BX, g_stackguard0(BP)
24 MOVL BX, g_stackguard1(BP)
Russ Cox15b76ad2014-09-09 13:39:57 -040025 MOVL BX, (g_stack+stack_lo)(BP)
26 MOVL SP, (g_stack+stack_hi)(BP)
Dmitriy Vyukov428062d2011-12-07 16:53:17 +030027
Keith Randalla5d40242013-03-12 10:47:44 -070028 // find out information about the processor we're on
Shenghou Ma35e84542015-10-17 17:46:25 -040029#ifdef GOOS_nacl // NaCl doesn't like PUSHFL/POPFL
30 JMP has_cpuid
31#else
32 // first see if CPUID instruction is supported.
33 PUSHFL
34 PUSHFL
35 XORL $(1<<21), 0(SP) // flip ID bit
36 POPFL
37 PUSHFL
38 POPL AX
39 XORL 0(SP), AX
40 POPFL // restore EFLAGS
41 TESTL $(1<<21), AX
42 JNE has_cpuid
43#endif
44
45bad_proc: // show that the program requires MMX.
46 MOVL $2, 0(SP)
47 MOVL $bad_proc_msg<>(SB), 4(SP)
48 MOVL $0x3d, 8(SP)
49 CALL runtime·write(SB)
50 MOVL $1, 0(SP)
51 CALL runtime·exit(SB)
52 INT $3
53
54has_cpuid:
Keith Randalla5d40242013-03-12 10:47:44 -070055 MOVL $0, AX
56 CPUID
Keith Randall4b209db2016-03-29 21:25:33 -070057 MOVL AX, SI
Keith Randalla5d40242013-03-12 10:47:44 -070058 CMPL AX, $0
59 JE nocpuinfo
Dmitry Vyukov6e70fdd2015-02-17 14:25:49 +030060
61 // Figure out how to serialize RDTSC.
62 // On Intel processors LFENCE is enough. AMD requires MFENCE.
63 // Don't know about the rest, so let's do MFENCE.
64 CMPL BX, $0x756E6547 // "Genu"
65 JNE notintel
66 CMPL DX, $0x49656E69 // "ineI"
67 JNE notintel
68 CMPL CX, $0x6C65746E // "ntel"
69 JNE notintel
Martin Möhrmannb64e8172017-04-24 16:59:33 +020070 MOVB $1, runtime·isIntel(SB)
Dmitry Vyukov6e70fdd2015-02-17 14:25:49 +030071 MOVB $1, runtime·lfenceBeforeRdtsc(SB)
72notintel:
73
Keith Randall4b209db2016-03-29 21:25:33 -070074 // Load EAX=1 cpuid flags
Keith Randalla5d40242013-03-12 10:47:44 -070075 MOVL $1, AX
76 CPUID
Martin Möhrmannb64e8172017-04-24 16:59:33 +020077 MOVL CX, DI // Move to global variable clobbers CX when generating PIC
Martin Möhrmann5a6c5802017-04-27 08:30:27 +020078 MOVL AX, runtime·processorVersionInfo(SB)
Martin Möhrmannb64e8172017-04-24 16:59:33 +020079 MOVL DI, runtime·cpuid_ecx(SB)
Keith Randalla5d40242013-03-12 10:47:44 -070080 MOVL DX, runtime·cpuid_edx(SB)
Shenghou Ma35e84542015-10-17 17:46:25 -040081
82 // Check for MMX support
Martin Möhrmann5a6c5802017-04-27 08:30:27 +020083 TESTL $(1<<23), DX // MMX
84 JZ bad_proc
Shenghou Ma35e84542015-10-17 17:46:25 -040085
Martin Möhrmann5a6c5802017-04-27 08:30:27 +020086 TESTL $(1<<26), DX // SSE2
87 SETNE runtime·support_sse2(SB)
88
89 TESTL $(1<<9), DI // SSSE3
90 SETNE runtime·support_ssse3(SB)
91
92 TESTL $(1<<19), DI // SSE4.1
93 SETNE runtime·support_sse41(SB)
94
95 TESTL $(1<<20), DI // SSE4.2
96 SETNE runtime·support_sse42(SB)
97
98 TESTL $(1<<23), DI // POPCNT
99 SETNE runtime·support_popcnt(SB)
100
101 TESTL $(1<<25), DI // AES
102 SETNE runtime·support_aes(SB)
103
104 TESTL $(1<<27), DI // OSXSAVE
105 SETNE runtime·support_osxsave(SB)
106
107 // If OS support for XMM and YMM is not present
108 // support_avx will be set back to false later.
109 TESTL $(1<<28), DI // AVX
110 SETNE runtime·support_avx(SB)
111
112eax7:
Keith Randall4b209db2016-03-29 21:25:33 -0700113 // Load EAX=7/ECX=0 cpuid flags
114 CMPL SI, $7
Martin Möhrmann5a6c5802017-04-27 08:30:27 +0200115 JLT osavx
Keith Randall4b209db2016-03-29 21:25:33 -0700116 MOVL $7, AX
117 MOVL $0, CX
118 CPUID
119 MOVL BX, runtime·cpuid_ebx7(SB)
120
Martin Möhrmann5a6c5802017-04-27 08:30:27 +0200121 TESTL $(1<<3), BX // BMI1
122 SETNE runtime·support_bmi1(SB)
Keith Randalla5d40242013-03-12 10:47:44 -0700123
Martin Möhrmann5a6c5802017-04-27 08:30:27 +0200124 // If OS support for XMM and YMM is not present
125 // support_avx2 will be set back to false later.
126 TESTL $(1<<5), BX
127 SETNE runtime·support_avx2(SB)
128
129 TESTL $(1<<8), BX // BMI2
130 SETNE runtime·support_bmi2(SB)
131
132 TESTL $(1<<9), BX // ERMS
133 SETNE runtime·support_erms(SB)
134
135osavx:
136 // nacl does not support XGETBV to test
137 // for XMM and YMM OS support.
138#ifndef GOOS_nacl
139 CMPB runtime·support_osxsave(SB), $1
140 JNE noavx
141 MOVL $0, CX
142 // For XGETBV, OSXSAVE bit is required and sufficient
143 XGETBV
144 ANDL $6, AX
145 CMPL AX, $6 // Check for OS support of XMM and YMM registers.
146 JE nocpuinfo
147#endif
148noavx:
149 MOVB $0, runtime·support_avx(SB)
150 MOVB $0, runtime·support_avx2(SB)
151
152nocpuinfo:
Russ Coxf8d49b52013-02-28 16:24:38 -0500153 // if there is an _cgo_init, call it to let it
Russ Cox133a1582009-10-03 10:37:12 -0700154 // initialize and to set up GS. if not,
155 // we set up GS ourselves.
Russ Coxf8d49b52013-02-28 16:24:38 -0500156 MOVL _cgo_init(SB), AX
Russ Cox133a1582009-10-03 10:37:12 -0700157 TESTL AX, AX
Dmitriy Vyukovfbfed492011-11-09 23:11:48 +0300158 JZ needtls
Russ Cox89f185f2014-06-26 11:54:39 -0400159 MOVL $setg_gcc<>(SB), BX
Russ Cox6a70f9d2013-03-25 18:14:02 -0400160 MOVL BX, 4(SP)
Russ Cox3b85b722013-03-11 00:51:42 -0400161 MOVL BP, 0(SP)
Russ Cox133a1582009-10-03 10:37:12 -0700162 CALL AX
Russ Cox15b76ad2014-09-09 13:39:57 -0400163
Dmitriy Vyukovf5becf42013-06-03 12:28:24 +0400164 // update stackguard after _cgo_init
165 MOVL $runtime·g0(SB), CX
Russ Cox15b76ad2014-09-09 13:39:57 -0400166 MOVL (g_stack+stack_lo)(CX), AX
Russ Cox15ced2d2014-11-11 17:06:22 -0500167 ADDL $const__StackGuard, AX
Russ Coxe6d35112015-01-05 16:29:21 +0000168 MOVL AX, g_stackguard0(CX)
169 MOVL AX, g_stackguard1(CX)
Russ Cox15b76ad2014-09-09 13:39:57 -0400170
Matthew Dempsky8ee0fd82015-06-09 15:24:38 -0700171#ifndef GOOS_windows
Russ Coxf8d49b52013-02-28 16:24:38 -0500172 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
Matthew Dempsky8ee0fd82015-06-09 15:24:38 -0700173 JMP ok
174#endif
Dmitriy Vyukovfbfed492011-11-09 23:11:48 +0300175needtls:
Matthew Dempsky8ee0fd82015-06-09 15:24:38 -0700176#ifdef GOOS_plan9
Yuval Pavel Zholkover2aa2ceb2011-07-25 12:25:41 -0400177 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
Matthew Dempsky8ee0fd82015-06-09 15:24:38 -0700178 JMP ok
179#endif
Yuval Pavel Zholkover2aa2ceb2011-07-25 12:25:41 -0400180
Russ Cox1b14bdb2009-09-22 16:28:32 -0700181 // set up %gs
Russ Cox68b42552010-11-04 14:00:19 -0400182 CALL runtime·ldt0setup(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -0700183
Russ Cox0d3a0432009-03-30 00:01:07 -0700184 // store through it, to make sure it works
Hector Chu6bfe5f52010-01-06 17:58:55 -0800185 get_tls(BX)
186 MOVL $0x123, g(BX)
Matthew Dempsky7bb38f62015-11-12 15:35:50 -0800187 MOVL runtime·m0+m_tls(SB), AX
Russ Cox0d3a0432009-03-30 00:01:07 -0700188 CMPL AX, $0x123
189 JEQ ok
Russ Cox133a1582009-10-03 10:37:12 -0700190 MOVL AX, 0 // abort
Russ Cox0d3a0432009-03-30 00:01:07 -0700191ok:
Russ Cox0d3a0432009-03-30 00:01:07 -0700192 // set up m and g "registers"
Hector Chu6bfe5f52010-01-06 17:58:55 -0800193 get_tls(BX)
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300194 LEAL runtime·g0(SB), DX
195 MOVL DX, g(BX)
Russ Cox68b42552010-11-04 14:00:19 -0400196 LEAL runtime·m0(SB), AX
Russ Cox0d3a0432009-03-30 00:01:07 -0700197
198 // save m->g0 = g0
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300199 MOVL DX, m_g0(AX)
Russ Cox89f185f2014-06-26 11:54:39 -0400200 // save g0->m = m0
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300201 MOVL AX, g_m(DX)
Russ Cox0d3a0432009-03-30 00:01:07 -0700202
Russ Cox68b42552010-11-04 14:00:19 -0400203 CALL runtime·emptyfunc(SB) // fault if stack check is wrong
Russ Cox0d3a0432009-03-30 00:01:07 -0700204
205 // convention is D is always cleared
206 CLD
207
Russ Cox68b42552010-11-04 14:00:19 -0400208 CALL runtime·check(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -0700209
210 // saved argc, argv
211 MOVL 120(SP), AX
212 MOVL AX, 0(SP)
213 MOVL 124(SP), AX
214 MOVL AX, 4(SP)
Russ Cox68b42552010-11-04 14:00:19 -0400215 CALL runtime·args(SB)
216 CALL runtime·osinit(SB)
217 CALL runtime·schedinit(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -0700218
219 // create a new goroutine to start program
Michael Hudson-Doylef78dc1d2015-03-29 23:38:20 +0000220 PUSHL $runtime·mainPC(SB) // entry
Russ Cox8522a472009-06-17 15:15:55 -0700221 PUSHL $0 // arg size
Russ Cox68b42552010-11-04 14:00:19 -0400222 CALL runtime·newproc(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -0700223 POPL AX
224 POPL AX
225
226 // start this M
Russ Cox68b42552010-11-04 14:00:19 -0400227 CALL runtime·mstart(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -0700228
229 INT $3
230 RET
231
Shenghou Ma35e84542015-10-17 17:46:25 -0400232DATA bad_proc_msg<>+0x00(SB)/8, $"This pro"
233DATA bad_proc_msg<>+0x08(SB)/8, $"gram can"
234DATA bad_proc_msg<>+0x10(SB)/8, $" only be"
235DATA bad_proc_msg<>+0x18(SB)/8, $" run on "
Keith Randalla8714642016-06-05 09:24:09 -0700236DATA bad_proc_msg<>+0x20(SB)/8, $"processo"
Shenghou Ma35e84542015-10-17 17:46:25 -0400237DATA bad_proc_msg<>+0x28(SB)/8, $"rs with "
238DATA bad_proc_msg<>+0x30(SB)/8, $"MMX supp"
239DATA bad_proc_msg<>+0x38(SB)/4, $"ort."
240DATA bad_proc_msg<>+0x3c(SB)/1, $0xa
241GLOBL bad_proc_msg<>(SB), RODATA, $0x3d
242
Michael Hudson-Doylef78dc1d2015-03-29 23:38:20 +0000243DATA runtime·mainPC+0(SB)/4,$runtime·main(SB)
244GLOBL runtime·mainPC(SB),RODATA,$4
Russ Cox1903ad72013-02-21 17:01:13 -0500245
Keith Randall5a546962013-08-07 10:23:24 -0700246TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
Russ Cox1b14bdb2009-09-22 16:28:32 -0700247 INT $3
Russ Cox0d3a0432009-03-30 00:01:07 -0700248 RET
249
Keith Randall5a546962013-08-07 10:23:24 -0700250TEXT runtime·asminit(SB),NOSPLIT,$0-0
Carl Shapiro019c8fc2013-04-02 13:45:56 -0700251 // Linux and MinGW start the FPU in extended double precision.
Russ Cox1707a992012-02-14 01:23:15 -0500252 // Other operating systems use double precision.
253 // Change to double precision to match them,
254 // and to match other hardware that only has double.
Keith Randallc069bc42016-07-26 11:51:33 -0700255 FLDCW runtime·controlWord64(SB)
Russ Cox1707a992012-02-14 01:23:15 -0500256 RET
257
Russ Cox8522a472009-06-17 15:15:55 -0700258/*
259 * go-routine
260 */
Russ Cox0d3a0432009-03-30 00:01:07 -0700261
Russ Coxf9ca3b52011-03-07 10:37:42 -0500262// void gosave(Gobuf*)
Russ Cox8522a472009-06-17 15:15:55 -0700263// save state in Gobuf; setjmp
Keith Randall5a546962013-08-07 10:23:24 -0700264TEXT runtime·gosave(SB), NOSPLIT, $0-4
Russ Cox25f6b022014-08-27 11:32:17 -0400265 MOVL buf+0(FP), AX // gobuf
266 LEAL buf+0(FP), BX // caller's SP
Russ Cox8522a472009-06-17 15:15:55 -0700267 MOVL BX, gobuf_sp(AX)
268 MOVL 0(SP), BX // caller's PC
269 MOVL BX, gobuf_pc(AX)
Russ Coxd67e7e32013-06-12 15:22:26 -0400270 MOVL $0, gobuf_ret(AX)
Austin Clements70c107c2016-10-19 15:49:31 -0400271 // Assert ctxt is zero. See func save.
272 MOVL gobuf_ctxt(AX), BX
273 TESTL BX, BX
274 JZ 2(PC)
275 CALL runtime·badctxt(SB)
Hector Chu6bfe5f52010-01-06 17:58:55 -0800276 get_tls(CX)
277 MOVL g(CX), BX
Russ Cox8522a472009-06-17 15:15:55 -0700278 MOVL BX, gobuf_g(AX)
Russ Cox0d3a0432009-03-30 00:01:07 -0700279 RET
280
Ian Lance Taylor06272482013-06-12 15:05:10 -0700281// void gogo(Gobuf*)
Russ Cox8522a472009-06-17 15:15:55 -0700282// restore state from Gobuf; longjmp
Austin Clements70c107c2016-10-19 15:49:31 -0400283TEXT runtime·gogo(SB), NOSPLIT, $8-4
Russ Cox25f6b022014-08-27 11:32:17 -0400284 MOVL buf+0(FP), BX // gobuf
Austin Clements70c107c2016-10-19 15:49:31 -0400285
286 // If ctxt is not nil, invoke deletion barrier before overwriting.
287 MOVL gobuf_ctxt(BX), DX
288 TESTL DX, DX
289 JZ nilctxt
290 LEAL gobuf_ctxt(BX), AX
291 MOVL AX, 0(SP)
292 MOVL $0, 4(SP)
293 CALL runtime·writebarrierptr_prewrite(SB)
294 MOVL buf+0(FP), BX
295
296nilctxt:
Russ Cox8522a472009-06-17 15:15:55 -0700297 MOVL gobuf_g(BX), DX
298 MOVL 0(DX), CX // make sure g != nil
Hector Chu6bfe5f52010-01-06 17:58:55 -0800299 get_tls(CX)
300 MOVL DX, g(CX)
Russ Cox8522a472009-06-17 15:15:55 -0700301 MOVL gobuf_sp(BX), SP // restore SP
Russ Coxd67e7e32013-06-12 15:22:26 -0400302 MOVL gobuf_ret(BX), AX
303 MOVL gobuf_ctxt(BX), DX
304 MOVL $0, gobuf_sp(BX) // clear to help garbage collector
305 MOVL $0, gobuf_ret(BX)
306 MOVL $0, gobuf_ctxt(BX)
Russ Cox8522a472009-06-17 15:15:55 -0700307 MOVL gobuf_pc(BX), BX
Russ Cox0d3a0432009-03-30 00:01:07 -0700308 JMP BX
Russ Cox8522a472009-06-17 15:15:55 -0700309
Russ Cox012ceed2014-09-03 11:35:22 -0400310// func mcall(fn func(*g))
Russ Coxf9ca3b52011-03-07 10:37:42 -0500311// Switch to m->g0's stack, call fn(g).
Brad Fitzpatrick5fea2cc2016-03-01 23:21:55 +0000312// Fn must never return. It should gogo(&g->sched)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500313// to keep running g.
Keith Randall5a546962013-08-07 10:23:24 -0700314TEXT runtime·mcall(SB), NOSPLIT, $0-4
Russ Coxf9ca3b52011-03-07 10:37:42 -0500315 MOVL fn+0(FP), DI
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300316
317 get_tls(DX)
318 MOVL g(DX), AX // save state in g->sched
Russ Coxf9ca3b52011-03-07 10:37:42 -0500319 MOVL 0(SP), BX // caller's PC
320 MOVL BX, (g_sched+gobuf_pc)(AX)
Russ Cox25f6b022014-08-27 11:32:17 -0400321 LEAL fn+0(FP), BX // caller's SP
Russ Coxf9ca3b52011-03-07 10:37:42 -0500322 MOVL BX, (g_sched+gobuf_sp)(AX)
323 MOVL AX, (g_sched+gobuf_g)(AX)
324
325 // switch to m->g0 & its stack, call fn
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300326 MOVL g(DX), BX
Russ Cox89f185f2014-06-26 11:54:39 -0400327 MOVL g_m(BX), BX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500328 MOVL m_g0(BX), SI
329 CMPL SI, AX // if g == m->g0 call badmcall
Keith Randall32b770b2013-08-29 15:53:34 -0700330 JNE 3(PC)
331 MOVL $runtime·badmcall(SB), AX
332 JMP AX
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300333 MOVL SI, g(DX) // g = m->g0
Russ Cox528534c2013-06-05 07:16:53 -0400334 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
Russ Coxf9ca3b52011-03-07 10:37:42 -0500335 PUSHL AX
Russ Cox012ceed2014-09-03 11:35:22 -0400336 MOVL DI, DX
337 MOVL 0(DI), DI
Russ Coxf9ca3b52011-03-07 10:37:42 -0500338 CALL DI
339 POPL AX
Keith Randall32b770b2013-08-29 15:53:34 -0700340 MOVL $runtime·badmcall2(SB), AX
341 JMP AX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500342 RET
343
Russ Cox656be312014-11-12 14:54:31 -0500344// systemstack_switch is a dummy routine that systemstack leaves at the bottom
Brad Fitzpatrick5fea2cc2016-03-01 23:21:55 +0000345// of the G stack. We need to distinguish the routine that
Keith Randall4aa50432014-07-30 09:01:52 -0700346// lives at the bottom of the G stack from the one that lives
Russ Cox656be312014-11-12 14:54:31 -0500347// at the top of the system stack because the one at the top of
348// the system stack terminates the stack walk (see topofstack()).
349TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
Keith Randall4aa50432014-07-30 09:01:52 -0700350 RET
351
Russ Cox656be312014-11-12 14:54:31 -0500352// func systemstack(fn func())
353TEXT runtime·systemstack(SB), NOSPLIT, $0-4
354 MOVL fn+0(FP), DI // DI = fn
Russ Cox1d550b82014-09-11 12:08:30 -0400355 get_tls(CX)
356 MOVL g(CX), AX // AX = g
357 MOVL g_m(AX), BX // BX = m
Russ Cox656be312014-11-12 14:54:31 -0500358
Russ Cox1d550b82014-09-11 12:08:30 -0400359 MOVL m_gsignal(BX), DX // DX = gsignal
360 CMPL AX, DX
Russ Cox656be312014-11-12 14:54:31 -0500361 JEQ noswitch
Russ Cox32ecf572014-09-04 00:10:10 -0400362
Keith Randall4aa50432014-07-30 09:01:52 -0700363 MOVL m_g0(BX), DX // DX = g0
364 CMPL AX, DX
Russ Cox656be312014-11-12 14:54:31 -0500365 JEQ noswitch
Keith Randall4aa50432014-07-30 09:01:52 -0700366
Russ Cox32ecf572014-09-04 00:10:10 -0400367 MOVL m_curg(BX), BP
368 CMPL AX, BP
Russ Cox656be312014-11-12 14:54:31 -0500369 JEQ switch
Russ Cox32ecf572014-09-04 00:10:10 -0400370
Russ Cox656be312014-11-12 14:54:31 -0500371 // Bad: g is not gsignal, not g0, not curg. What is it?
Russ Cox32ecf572014-09-04 00:10:10 -0400372 // Hide call from linker nosplit analysis.
Russ Cox656be312014-11-12 14:54:31 -0500373 MOVL $runtime·badsystemstack(SB), AX
Russ Cox32ecf572014-09-04 00:10:10 -0400374 CALL AX
375
Russ Cox656be312014-11-12 14:54:31 -0500376switch:
Brad Fitzpatrick5fea2cc2016-03-01 23:21:55 +0000377 // save our state in g->sched. Pretend to
Russ Cox656be312014-11-12 14:54:31 -0500378 // be systemstack_switch if the G stack is scanned.
379 MOVL $runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX)
Keith Randall4aa50432014-07-30 09:01:52 -0700380 MOVL SP, (g_sched+gobuf_sp)(AX)
381 MOVL AX, (g_sched+gobuf_g)(AX)
382
383 // switch to g0
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300384 get_tls(CX)
Keith Randall4aa50432014-07-30 09:01:52 -0700385 MOVL DX, g(CX)
Russ Coxd16a2ad2014-09-04 22:48:08 -0400386 MOVL (g_sched+gobuf_sp)(DX), BX
Russ Cox656be312014-11-12 14:54:31 -0500387 // make it look like mstart called systemstack on g0, to stop traceback
Russ Coxd16a2ad2014-09-04 22:48:08 -0400388 SUBL $4, BX
389 MOVL $runtime·mstart(SB), DX
390 MOVL DX, 0(BX)
391 MOVL BX, SP
Keith Randall4aa50432014-07-30 09:01:52 -0700392
393 // call target function
Russ Cox012ceed2014-09-03 11:35:22 -0400394 MOVL DI, DX
395 MOVL 0(DI), DI
Keith Randall4aa50432014-07-30 09:01:52 -0700396 CALL DI
397
398 // switch back to g
399 get_tls(CX)
400 MOVL g(CX), AX
401 MOVL g_m(AX), BX
402 MOVL m_curg(BX), AX
403 MOVL AX, g(CX)
404 MOVL (g_sched+gobuf_sp)(AX), SP
405 MOVL $0, (g_sched+gobuf_sp)(AX)
406 RET
407
Russ Cox656be312014-11-12 14:54:31 -0500408noswitch:
409 // already on system stack, just call directly
Russ Cox012ceed2014-09-03 11:35:22 -0400410 MOVL DI, DX
411 MOVL 0(DI), DI
Keith Randall4aa50432014-07-30 09:01:52 -0700412 CALL DI
413 RET
414
Russ Cox8522a472009-06-17 15:15:55 -0700415/*
416 * support for morestack
417 */
418
419// Called during function prolog when more stack is needed.
Russ Cox58f12ff2013-07-18 16:53:45 -0400420//
421// The traceback routines see morestack on a g0 as being
422// the top of a stack (for example, morestack calling newstack
423// calling the scheduler calling newm calling gc), so we must
424// record an argument size. For that purpose, it has no arguments.
Keith Randall5a546962013-08-07 10:23:24 -0700425TEXT runtime·morestack(SB),NOSPLIT,$0-0
Russ Cox8522a472009-06-17 15:15:55 -0700426 // Cannot grow scheduler stack (m->g0).
Hector Chu6bfe5f52010-01-06 17:58:55 -0800427 get_tls(CX)
Russ Cox89f185f2014-06-26 11:54:39 -0400428 MOVL g(CX), BX
429 MOVL g_m(BX), BX
Russ Cox8522a472009-06-17 15:15:55 -0700430 MOVL m_g0(BX), SI
Hector Chu6bfe5f52010-01-06 17:58:55 -0800431 CMPL g(CX), SI
Austin Clements687d9d52016-10-13 10:44:57 -0400432 JNE 3(PC)
433 CALL runtime·badmorestackg0(SB)
Russ Cox8522a472009-06-17 15:15:55 -0700434 INT $3
435
Russ Coxf8f630f2014-09-05 16:51:45 -0400436 // Cannot grow signal stack.
437 MOVL m_gsignal(BX), SI
438 CMPL g(CX), SI
Austin Clements687d9d52016-10-13 10:44:57 -0400439 JNE 3(PC)
440 CALL runtime·badmorestackgsignal(SB)
Russ Coxf8f630f2014-09-05 16:51:45 -0400441 INT $3
442
Russ Cox8522a472009-06-17 15:15:55 -0700443 // Called from f.
444 // Set m->morebuf to f's caller.
445 MOVL 4(SP), DI // f's caller's PC
446 MOVL DI, (m_morebuf+gobuf_pc)(BX)
447 LEAL 8(SP), CX // f's caller's SP
448 MOVL CX, (m_morebuf+gobuf_sp)(BX)
Hector Chu6bfe5f52010-01-06 17:58:55 -0800449 get_tls(CX)
450 MOVL g(CX), SI
Russ Cox8522a472009-06-17 15:15:55 -0700451 MOVL SI, (m_morebuf+gobuf_g)(BX)
452
Russ Cox6fa3c892013-06-27 11:32:01 -0400453 // Set g->sched to context in f.
454 MOVL 0(SP), AX // f's PC
455 MOVL AX, (g_sched+gobuf_pc)(SI)
456 MOVL SI, (g_sched+gobuf_g)(SI)
457 LEAL 4(SP), AX // f's SP
458 MOVL AX, (g_sched+gobuf_sp)(SI)
Austin Clementsbf9c71c2016-10-19 18:27:39 -0400459 // newstack will fill gobuf.ctxt.
Russ Cox8522a472009-06-17 15:15:55 -0700460
Russ Coxf9ca3b52011-03-07 10:37:42 -0500461 // Call newstack on m->g0's stack.
Russ Cox8522a472009-06-17 15:15:55 -0700462 MOVL m_g0(BX), BP
Hector Chu6bfe5f52010-01-06 17:58:55 -0800463 MOVL BP, g(CX)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500464 MOVL (g_sched+gobuf_sp)(BP), AX
Russ Cox7e14bd82010-12-07 17:19:36 -0500465 MOVL -4(AX), BX // fault if CALL would, before smashing SP
466 MOVL AX, SP
Austin Clementsbf9c71c2016-10-19 18:27:39 -0400467 PUSHL DX // ctxt argument
Russ Cox68b42552010-11-04 14:00:19 -0400468 CALL runtime·newstack(SB)
Russ Cox8522a472009-06-17 15:15:55 -0700469 MOVL $0, 0x1003 // crash if newstack returns
Austin Clementsbf9c71c2016-10-19 18:27:39 -0400470 POPL DX // keep balance check happy
Russ Cox0d3a0432009-03-30 00:01:07 -0700471 RET
472
Russ Coxc2dd33a2014-03-04 13:53:08 -0500473TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
474 MOVL $0, DX
475 JMP runtime·morestack(SB)
476
Keith Randall52631982014-09-08 10:14:41 -0700477// reflectcall: call a function with the given argument list
Russ Coxdf027ac2014-12-30 13:59:55 -0500478// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
Keith Randall9cd57062013-08-02 13:03:14 -0700479// we don't have variable-sized frames, so we use a small number
480// of constant-sized-frame functions to encode a few bits of size in the pc.
481// Caution: ugly multiline assembly macros in your future!
482
483#define DISPATCH(NAME,MAXSIZE) \
484 CMPL CX, $MAXSIZE; \
485 JA 3(PC); \
Russ Coxf8f630f2014-09-05 16:51:45 -0400486 MOVL $NAME(SB), AX; \
Keith Randall9cd57062013-08-02 13:03:14 -0700487 JMP AX
Rob Pikeaff78832014-07-30 10:11:44 -0700488// Note: can't just "JMP NAME(SB)" - bad inlining results.
Keith Randall9cd57062013-08-02 13:03:14 -0700489
Russ Cox7a524a12014-12-22 13:27:53 -0500490TEXT reflect·call(SB), NOSPLIT, $0-0
491 JMP ·reflectcall(SB)
492
Russ Coxdf027ac2014-12-30 13:59:55 -0500493TEXT ·reflectcall(SB), NOSPLIT, $0-20
494 MOVL argsize+12(FP), CX
Rob Pikeaff78832014-07-30 10:11:44 -0700495 DISPATCH(runtime·call16, 16)
496 DISPATCH(runtime·call32, 32)
497 DISPATCH(runtime·call64, 64)
498 DISPATCH(runtime·call128, 128)
499 DISPATCH(runtime·call256, 256)
500 DISPATCH(runtime·call512, 512)
501 DISPATCH(runtime·call1024, 1024)
502 DISPATCH(runtime·call2048, 2048)
503 DISPATCH(runtime·call4096, 4096)
504 DISPATCH(runtime·call8192, 8192)
505 DISPATCH(runtime·call16384, 16384)
506 DISPATCH(runtime·call32768, 32768)
507 DISPATCH(runtime·call65536, 65536)
508 DISPATCH(runtime·call131072, 131072)
509 DISPATCH(runtime·call262144, 262144)
510 DISPATCH(runtime·call524288, 524288)
511 DISPATCH(runtime·call1048576, 1048576)
512 DISPATCH(runtime·call2097152, 2097152)
513 DISPATCH(runtime·call4194304, 4194304)
514 DISPATCH(runtime·call8388608, 8388608)
515 DISPATCH(runtime·call16777216, 16777216)
516 DISPATCH(runtime·call33554432, 33554432)
517 DISPATCH(runtime·call67108864, 67108864)
518 DISPATCH(runtime·call134217728, 134217728)
519 DISPATCH(runtime·call268435456, 268435456)
520 DISPATCH(runtime·call536870912, 536870912)
521 DISPATCH(runtime·call1073741824, 1073741824)
Keith Randall9cd57062013-08-02 13:03:14 -0700522 MOVL $runtime·badreflectcall(SB), AX
523 JMP AX
524
Keith Randall12e46e42013-08-06 14:33:55 -0700525#define CALLFN(NAME,MAXSIZE) \
Russ Coxdf027ac2014-12-30 13:59:55 -0500526TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \
Russ Coxcb6f5ac2014-10-15 13:12:16 -0400527 NO_LOCAL_POINTERS; \
Keith Randall9cd57062013-08-02 13:03:14 -0700528 /* copy arguments to stack */ \
Russ Coxdf027ac2014-12-30 13:59:55 -0500529 MOVL argptr+8(FP), SI; \
530 MOVL argsize+12(FP), CX; \
Keith Randall9cd57062013-08-02 13:03:14 -0700531 MOVL SP, DI; \
532 REP;MOVSB; \
533 /* call function */ \
Russ Coxdf027ac2014-12-30 13:59:55 -0500534 MOVL f+4(FP), DX; \
Russ Cox4a000b92014-02-25 17:00:08 -0500535 MOVL (DX), AX; \
Keith Randallcee8bca2014-05-21 14:28:34 -0700536 PCDATA $PCDATA_StackMapIndex, $0; \
Russ Cox4a000b92014-02-25 17:00:08 -0500537 CALL AX; \
Keith Randall9cd57062013-08-02 13:03:14 -0700538 /* copy return values back */ \
Austin Clements79561a82016-10-20 22:45:18 -0400539 MOVL argtype+0(FP), DX; \
Russ Coxdf027ac2014-12-30 13:59:55 -0500540 MOVL argptr+8(FP), DI; \
541 MOVL argsize+12(FP), CX; \
542 MOVL retoffset+16(FP), BX; \
Keith Randall9cd57062013-08-02 13:03:14 -0700543 MOVL SP, SI; \
Russ Cox72c5d5e2014-04-08 11:11:35 -0400544 ADDL BX, DI; \
545 ADDL BX, SI; \
546 SUBL BX, CX; \
Austin Clements79561a82016-10-20 22:45:18 -0400547 CALL callRet<>(SB); \
548 RET
549
550// callRet copies return values back at the end of call*. This is a
551// separate function so it can allocate stack space for the arguments
552// to reflectcallmove. It does not follow the Go ABI; it expects its
553// arguments in registers.
554TEXT callRet<>(SB), NOSPLIT, $16-0
555 MOVL DX, 0(SP)
556 MOVL DI, 4(SP)
557 MOVL SI, 8(SP)
558 MOVL CX, 12(SP)
559 CALL runtime·reflectcallmove(SB)
Keith Randall9cd57062013-08-02 13:03:14 -0700560 RET
561
Russ Coxcb6f5ac2014-10-15 13:12:16 -0400562CALLFN(·call16, 16)
563CALLFN(·call32, 32)
564CALLFN(·call64, 64)
565CALLFN(·call128, 128)
566CALLFN(·call256, 256)
567CALLFN(·call512, 512)
568CALLFN(·call1024, 1024)
569CALLFN(·call2048, 2048)
570CALLFN(·call4096, 4096)
571CALLFN(·call8192, 8192)
572CALLFN(·call16384, 16384)
573CALLFN(·call32768, 32768)
574CALLFN(·call65536, 65536)
575CALLFN(·call131072, 131072)
576CALLFN(·call262144, 262144)
577CALLFN(·call524288, 524288)
578CALLFN(·call1048576, 1048576)
579CALLFN(·call2097152, 2097152)
580CALLFN(·call4194304, 4194304)
581CALLFN(·call8388608, 8388608)
582CALLFN(·call16777216, 16777216)
583CALLFN(·call33554432, 33554432)
584CALLFN(·call67108864, 67108864)
585CALLFN(·call134217728, 134217728)
586CALLFN(·call268435456, 268435456)
587CALLFN(·call536870912, 536870912)
588CALLFN(·call1073741824, 1073741824)
Russ Coxbba278a2009-07-08 18:16:09 -0700589
Keith Randall5a546962013-08-07 10:23:24 -0700590TEXT runtime·procyield(SB),NOSPLIT,$0-0
Russ Cox25f6b022014-08-27 11:32:17 -0400591 MOVL cycles+0(FP), AX
Dmitriy Vyukov4e5086b2011-07-29 12:44:06 -0400592again:
593 PAUSE
594 SUBL $1, AX
595 JNZ again
596 RET
597
Austin Clementsf5d494b2015-06-15 12:30:23 -0400598TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
599 // Stores are already ordered on x86, so this is just a
600 // compile barrier.
601 RET
602
Russ Coxaa3222d82009-06-02 23:02:12 -0700603// void jmpdefer(fn, sp);
604// called from deferreturn.
Russ Cox0d3a0432009-03-30 00:01:07 -0700605// 1. pop the caller
Michael Hudson-Doyle26849742016-06-02 11:07:55 +1200606// 2. sub 5 bytes (the length of CALL & a 32 bit displacement) from the callers
607// return (when building for shared libraries, subtract 16 bytes -- 5 bytes
608// for CALL & displacement to call __x86.get_pc_thunk.cx, 6 bytes for the
609// LEAL to load the offset into BX, and finally 5 for the call & displacement)
Russ Cox0d3a0432009-03-30 00:01:07 -0700610// 3. jmp to the argument
Keith Randalla97a91d2013-08-07 14:03:50 -0700611TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
Russ Cox25f6b022014-08-27 11:32:17 -0400612 MOVL fv+0(FP), DX // fn
613 MOVL argp+4(FP), BX // caller sp
Russ Coxaa3222d82009-06-02 23:02:12 -0700614 LEAL -4(BX), SP // caller sp after CALL
Michael Hudson-Doyle26849742016-06-02 11:07:55 +1200615#ifdef GOBUILDMODE_shared
616 SUBL $16, (SP) // return to CALL again
617#else
Russ Coxaa3222d82009-06-02 23:02:12 -0700618 SUBL $5, (SP) // return to CALL again
Michael Hudson-Doyle26849742016-06-02 11:07:55 +1200619#endif
Russ Cox6066fdc2013-02-22 10:47:54 -0500620 MOVL 0(DX), BX
Russ Cox1903ad72013-02-21 17:01:13 -0500621 JMP BX // but first run the deferred function
Russ Cox0d3a0432009-03-30 00:01:07 -0700622
Russ Coxd67e7e32013-06-12 15:22:26 -0400623// Save state of caller into g->sched.
Keith Randall5a546962013-08-07 10:23:24 -0700624TEXT gosave<>(SB),NOSPLIT,$0
Russ Coxd67e7e32013-06-12 15:22:26 -0400625 PUSHL AX
626 PUSHL BX
627 get_tls(BX)
628 MOVL g(BX), BX
629 LEAL arg+0(FP), AX
630 MOVL AX, (g_sched+gobuf_sp)(BX)
631 MOVL -4(AX), AX
632 MOVL AX, (g_sched+gobuf_pc)(BX)
633 MOVL $0, (g_sched+gobuf_ret)(BX)
Austin Clements70c107c2016-10-19 15:49:31 -0400634 // Assert ctxt is zero. See func save.
635 MOVL (g_sched+gobuf_ctxt)(BX), AX
636 TESTL AX, AX
637 JZ 2(PC)
638 CALL runtime·badctxt(SB)
Russ Coxd67e7e32013-06-12 15:22:26 -0400639 POPL BX
640 POPL AX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500641 RET
642
Alex Brainman9d968cb2015-04-27 17:32:23 +1000643// func asmcgocall(fn, arg unsafe.Pointer) int32
Russ Coxf9ca3b52011-03-07 10:37:42 -0500644// Call fn(arg) on the scheduler stack,
645// aligned appropriately for the gcc ABI.
Alex Brainman9d968cb2015-04-27 17:32:23 +1000646// See cgocall.go for more details.
647TEXT ·asmcgocall(SB),NOSPLIT,$0-12
Russ Coxf9ca3b52011-03-07 10:37:42 -0500648 MOVL fn+0(FP), AX
649 MOVL arg+4(FP), BX
Russ Coxcb767242014-09-04 00:01:55 -0400650
Russ Coxf9ca3b52011-03-07 10:37:42 -0500651 MOVL SP, DX
652
653 // Figure out if we need to switch to m->g0 stack.
654 // We get called to create new OS threads too, and those
655 // come in on the m->g0 stack already.
656 get_tls(CX)
Russ Cox89f185f2014-06-26 11:54:39 -0400657 MOVL g(CX), BP
658 MOVL g_m(BP), BP
Russ Coxf9ca3b52011-03-07 10:37:42 -0500659 MOVL m_g0(BP), SI
660 MOVL g(CX), DI
661 CMPL SI, DI
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300662 JEQ noswitch
Russ Coxd67e7e32013-06-12 15:22:26 -0400663 CALL gosave<>(SB)
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300664 get_tls(CX)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500665 MOVL SI, g(CX)
666 MOVL (g_sched+gobuf_sp)(SI), SP
667
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300668noswitch:
Russ Coxf9ca3b52011-03-07 10:37:42 -0500669 // Now on a scheduling stack (a pthread-created stack).
670 SUBL $32, SP
671 ANDL $~15, SP // alignment, perhaps unnecessary
672 MOVL DI, 8(SP) // save g
Keith Randall47f251c2014-09-11 20:36:23 -0700673 MOVL (g_stack+stack_hi)(DI), DI
674 SUBL DX, DI
675 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500676 MOVL BX, 0(SP) // first argument in x86-32 ABI
677 CALL AX
678
679 // Restore registers, g, stack pointer.
680 get_tls(CX)
681 MOVL 8(SP), DI
Keith Randall47f251c2014-09-11 20:36:23 -0700682 MOVL (g_stack+stack_hi)(DI), SI
683 SUBL 4(SP), SI
Russ Coxf9ca3b52011-03-07 10:37:42 -0500684 MOVL DI, g(CX)
Keith Randall47f251c2014-09-11 20:36:23 -0700685 MOVL SI, SP
Alex Brainman9d968cb2015-04-27 17:32:23 +1000686
687 MOVL AX, ret+8(FP)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500688 RET
689
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700690// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
Russ Cox3d2dfc52013-02-22 16:08:56 -0500691// Turn the fn into a Go func (by taking its address) and call
692// cgocallback_gofunc.
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700693TEXT runtime·cgocallback(SB),NOSPLIT,$16-16
Russ Cox3d2dfc52013-02-22 16:08:56 -0500694 LEAL fn+0(FP), AX
695 MOVL AX, 0(SP)
696 MOVL frame+4(FP), AX
697 MOVL AX, 4(SP)
698 MOVL framesize+8(FP), AX
699 MOVL AX, 8(SP)
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700700 MOVL ctxt+12(FP), AX
701 MOVL AX, 12(SP)
Russ Cox3d2dfc52013-02-22 16:08:56 -0500702 MOVL $runtime·cgocallback_gofunc(SB), AX
703 CALL AX
704 RET
705
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700706// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
Alex Brainman9d968cb2015-04-27 17:32:23 +1000707// See cgocall.go for more details.
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700708TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-16
Russ Coxe844f532014-09-12 07:46:11 -0400709 NO_LOCAL_POINTERS
710
Russ Cox89f185f2014-06-26 11:54:39 -0400711 // If g is nil, Go did not create the current thread.
Russ Cox6c976392013-02-20 17:48:23 -0500712 // Call needm to obtain one for temporary use.
713 // In this case, we're running on the thread stack, so there's
714 // lots of space, but the linker doesn't know. Hide the call from
715 // the linker analysis by using an indirect call through AX.
716 get_tls(CX)
717#ifdef GOOS_windows
Russ Coxdba623b2013-07-23 18:40:02 -0400718 MOVL $0, BP
Russ Cox6c976392013-02-20 17:48:23 -0500719 CMPL CX, $0
Russ Cox89f185f2014-06-26 11:54:39 -0400720 JEQ 2(PC) // TODO
Russ Cox6c976392013-02-20 17:48:23 -0500721#endif
Russ Cox89f185f2014-06-26 11:54:39 -0400722 MOVL g(CX), BP
Russ Cox6c976392013-02-20 17:48:23 -0500723 CMPL BP, $0
Russ Cox89f185f2014-06-26 11:54:39 -0400724 JEQ needm
725 MOVL g_m(BP), BP
726 MOVL BP, DX // saved copy of oldm
727 JMP havem
Russ Cox6c976392013-02-20 17:48:23 -0500728needm:
Russ Cox89f185f2014-06-26 11:54:39 -0400729 MOVL $0, 0(SP)
Russ Cox6c976392013-02-20 17:48:23 -0500730 MOVL $runtime·needm(SB), AX
731 CALL AX
Russ Coxf0112822013-07-24 09:01:57 -0400732 MOVL 0(SP), DX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500733 get_tls(CX)
Russ Cox89f185f2014-06-26 11:54:39 -0400734 MOVL g(CX), BP
735 MOVL g_m(BP), BP
Russ Cox9b732382012-03-08 12:12:40 -0500736
Russ Coxc4efaac2014-10-28 21:53:09 -0400737 // Set m->sched.sp = SP, so that if a panic happens
738 // during the function we are about to execute, it will
739 // have a valid SP to run on the g0 stack.
740 // The next few lines (after the havem label)
741 // will save this SP onto the stack and then write
742 // the same SP back to m->sched.sp. That seems redundant,
743 // but if an unrecovered panic happens, unwindm will
744 // restore the g->sched.sp from the stack location
Russ Cox656be312014-11-12 14:54:31 -0500745 // and then systemstack will try to use it. If we don't set it here,
Russ Coxc4efaac2014-10-28 21:53:09 -0400746 // that restored SP will be uninitialized (typically 0) and
747 // will not be usable.
748 MOVL m_g0(BP), SI
749 MOVL SP, (g_sched+gobuf_sp)(SI)
750
Russ Cox6c976392013-02-20 17:48:23 -0500751havem:
752 // Now there's a valid m, and we're running on its m->g0.
753 // Save current m->g0->sched.sp on stack and then set it to SP.
754 // Save current sp in m->g0->sched.sp in preparation for
755 // switch back to m->curg stack.
Russ Coxdba623b2013-07-23 18:40:02 -0400756 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
Russ Coxf9ca3b52011-03-07 10:37:42 -0500757 MOVL m_g0(BP), SI
Russ Coxdba623b2013-07-23 18:40:02 -0400758 MOVL (g_sched+gobuf_sp)(SI), AX
759 MOVL AX, 0(SP)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500760 MOVL SP, (g_sched+gobuf_sp)(SI)
761
Russ Coxdba623b2013-07-23 18:40:02 -0400762 // Switch to m->curg stack and call runtime.cgocallbackg.
763 // Because we are taking over the execution of m->curg
764 // but *not* resuming what had been running, we need to
765 // save that information (m->curg->sched) so we can restore it.
Russ Cox528534c2013-06-05 07:16:53 -0400766 // We can restore m->curg->sched.sp easily, because calling
Alex Brainman72e83482011-08-18 12:17:09 -0400767 // runtime.cgocallbackg leaves SP unchanged upon return.
Russ Cox528534c2013-06-05 07:16:53 -0400768 // To save m->curg->sched.pc, we push it onto the stack.
Russ Coxf9ca3b52011-03-07 10:37:42 -0500769 // This has the added benefit that it looks to the traceback
Alex Brainman72e83482011-08-18 12:17:09 -0400770 // routine like cgocallbackg is going to return to that
Russ Coxdba623b2013-07-23 18:40:02 -0400771 // PC (because the frame we allocate below has the same
772 // size as cgocallback_gofunc's frame declared above)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500773 // so that the traceback will seamlessly trace back into
774 // the earlier calls.
Russ Coxdba623b2013-07-23 18:40:02 -0400775 //
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700776 // In the new goroutine, 4(SP) holds the saved oldm (DX) register.
777 // 8(SP) is unused.
Russ Coxf9ca3b52011-03-07 10:37:42 -0500778 MOVL m_curg(BP), SI
779 MOVL SI, g(CX)
Russ Coxdba623b2013-07-23 18:40:02 -0400780 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
Russ Coxf9ca3b52011-03-07 10:37:42 -0500781 MOVL (g_sched+gobuf_pc)(SI), BP
Russ Coxdba623b2013-07-23 18:40:02 -0400782 MOVL BP, -4(DI)
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700783 MOVL ctxt+12(FP), CX
Russ Coxf0112822013-07-24 09:01:57 -0400784 LEAL -(4+12)(DI), SP
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700785 MOVL DX, 4(SP)
786 MOVL CX, 0(SP)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500787 CALL runtime·cgocallbackg(SB)
Ian Lance Taylor5f9a8702016-04-27 14:18:29 -0700788 MOVL 4(SP), DX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500789
Russ Cox528534c2013-06-05 07:16:53 -0400790 // Restore g->sched (== m->curg->sched) from saved values.
Russ Coxf9ca3b52011-03-07 10:37:42 -0500791 get_tls(CX)
792 MOVL g(CX), SI
Russ Coxf0112822013-07-24 09:01:57 -0400793 MOVL 12(SP), BP
Russ Coxf9ca3b52011-03-07 10:37:42 -0500794 MOVL BP, (g_sched+gobuf_pc)(SI)
Russ Coxf0112822013-07-24 09:01:57 -0400795 LEAL (12+4)(SP), DI
Russ Coxf9ca3b52011-03-07 10:37:42 -0500796 MOVL DI, (g_sched+gobuf_sp)(SI)
797
798 // Switch back to m->g0's stack and restore m->g0->sched.sp.
799 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
800 // so we do not have to restore it.)
Russ Cox89f185f2014-06-26 11:54:39 -0400801 MOVL g(CX), BP
802 MOVL g_m(BP), BP
Russ Coxf9ca3b52011-03-07 10:37:42 -0500803 MOVL m_g0(BP), SI
804 MOVL SI, g(CX)
805 MOVL (g_sched+gobuf_sp)(SI), SP
Russ Coxdba623b2013-07-23 18:40:02 -0400806 MOVL 0(SP), AX
807 MOVL AX, (g_sched+gobuf_sp)(SI)
Russ Cox6c976392013-02-20 17:48:23 -0500808
809 // If the m on entry was nil, we called needm above to borrow an m
810 // for the duration of the call. Since the call is over, return it with dropm.
Russ Coxf0112822013-07-24 09:01:57 -0400811 CMPL DX, $0
Russ Cox6c976392013-02-20 17:48:23 -0500812 JNE 3(PC)
813 MOVL $runtime·dropm(SB), AX
814 CALL AX
Russ Coxf9ca3b52011-03-07 10:37:42 -0500815
816 // Done!
817 RET
818
Russ Cox89f185f2014-06-26 11:54:39 -0400819// void setg(G*); set g. for use by needm.
Russ Cox25f6b022014-08-27 11:32:17 -0400820TEXT runtime·setg(SB), NOSPLIT, $0-4
Russ Cox89f185f2014-06-26 11:54:39 -0400821 MOVL gg+0(FP), BX
Russ Cox6c976392013-02-20 17:48:23 -0500822#ifdef GOOS_windows
Russ Cox89f185f2014-06-26 11:54:39 -0400823 CMPL BX, $0
Russ Cox6c976392013-02-20 17:48:23 -0500824 JNE settls
825 MOVL $0, 0x14(FS)
826 RET
827settls:
Russ Cox89f185f2014-06-26 11:54:39 -0400828 MOVL g_m(BX), AX
Russ Cox6c976392013-02-20 17:48:23 -0500829 LEAL m_tls(AX), AX
830 MOVL AX, 0x14(FS)
831#endif
Russ Cox6c976392013-02-20 17:48:23 -0500832 get_tls(CX)
Russ Cox6c976392013-02-20 17:48:23 -0500833 MOVL BX, g(CX)
834 RET
835
Russ Cox89f185f2014-06-26 11:54:39 -0400836// void setg_gcc(G*); set g. for use by gcc
837TEXT setg_gcc<>(SB), NOSPLIT, $0
Russ Cox6a70f9d2013-03-25 18:14:02 -0400838 get_tls(AX)
Russ Cox89f185f2014-06-26 11:54:39 -0400839 MOVL gg+0(FP), DX
840 MOVL DX, g(AX)
Russ Cox6a70f9d2013-03-25 18:14:02 -0400841 RET
842
Russ Cox8ac35be2014-09-09 14:02:37 -0400843// check that SP is in range [g->stack.lo, g->stack.hi)
Keith Randall5a546962013-08-07 10:23:24 -0700844TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
Russ Coxf9ca3b52011-03-07 10:37:42 -0500845 get_tls(CX)
846 MOVL g(CX), AX
Russ Cox15b76ad2014-09-09 13:39:57 -0400847 CMPL (g_stack+stack_hi)(AX), SP
Russ Coxf9ca3b52011-03-07 10:37:42 -0500848 JHI 2(PC)
849 INT $3
Russ Cox15b76ad2014-09-09 13:39:57 -0400850 CMPL SP, (g_stack+stack_lo)(AX)
Russ Coxf9ca3b52011-03-07 10:37:42 -0500851 JHI 2(PC)
852 INT $3
853 RET
854
Austin Clementsfaa7a7e2015-05-20 16:30:49 -0400855TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8
Russ Cox25f6b022014-08-27 11:32:17 -0400856 MOVL argp+0(FP),AX // addr of first arg
Russ Cox0d3a0432009-03-30 00:01:07 -0700857 MOVL -4(AX),AX // get calling pc
Russ Cox25f6b022014-08-27 11:32:17 -0400858 MOVL AX, ret+4(FP)
Russ Cox0d3a0432009-03-30 00:01:07 -0700859 RET
860
Dmitry Vyukov6e70fdd2015-02-17 14:25:49 +0300861// func cputicks() int64
Russ Cox25f6b022014-08-27 11:32:17 -0400862TEXT runtime·cputicks(SB),NOSPLIT,$0-8
Martin Möhrmann5a6c5802017-04-27 08:30:27 +0200863 CMPB runtime·support_sse2(SB), $1
864 JNE done
Dmitry Vyukov6e70fdd2015-02-17 14:25:49 +0300865 CMPB runtime·lfenceBeforeRdtsc(SB), $1
866 JNE mfence
867 BYTE $0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE
868 JMP done
869mfence:
870 BYTE $0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE
871done:
Shenghou Ma6392b432012-02-06 12:49:28 -0500872 RDTSC
Russ Cox25f6b022014-08-27 11:32:17 -0400873 MOVL AX, ret_lo+0(FP)
874 MOVL DX, ret_hi+4(FP)
Damian Gryski8e765da2012-02-02 14:09:27 -0500875 RET
876
Keith Randall5a546962013-08-07 10:23:24 -0700877TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0
Matthew Dempsky7bb38f62015-11-12 15:35:50 -0800878 // set up ldt 7 to point at m0.tls
Russ Cox0d3a0432009-03-30 00:01:07 -0700879 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
Russ Cox1b14bdb2009-09-22 16:28:32 -0700880 // the entry number is just a hint. setldt will set up GS with what it used.
Russ Cox0d3a0432009-03-30 00:01:07 -0700881 MOVL $7, 0(SP)
Matthew Dempsky7bb38f62015-11-12 15:35:50 -0800882 LEAL runtime·m0+m_tls(SB), AX
Russ Cox0d3a0432009-03-30 00:01:07 -0700883 MOVL AX, 4(SP)
884 MOVL $32, 8(SP) // sizeof(tls array)
Russ Cox68b42552010-11-04 14:00:19 -0400885 CALL runtime·setldt(SB)
Russ Cox0d3a0432009-03-30 00:01:07 -0700886 RET
887
Russ Cox9ddfb642013-07-16 16:24:09 -0400888TEXT runtime·emptyfunc(SB),0,$0-0
Russ Cox0d3a0432009-03-30 00:01:07 -0700889 RET
890
Keith Randalld5e4c402015-01-06 16:42:48 -0800891// memhash_varlen(p unsafe.Pointer, h seed) uintptr
892// redirects to memhash(p, h, size) using the size
893// stored in the closure.
894TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12
895 GO_ARGS
896 NO_LOCAL_POINTERS
897 MOVL p+0(FP), AX
898 MOVL h+4(FP), BX
899 MOVL 4(DX), CX
900 MOVL AX, 0(SP)
901 MOVL BX, 4(SP)
902 MOVL CX, 8(SP)
903 CALL runtime·memhash(SB)
904 MOVL 12(SP), AX
905 MOVL AX, ret+8(FP)
906 RET
907
Keith Randalla5d40242013-03-12 10:47:44 -0700908// hash function using AES hardware instructions
Keith Randalla2a97682014-07-31 15:07:05 -0700909TEXT runtime·aeshash(SB),NOSPLIT,$0-16
910 MOVL p+0(FP), AX // ptr to data
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300911 MOVL s+8(FP), BX // size
Keith Randalld5e4c402015-01-06 16:42:48 -0800912 LEAL ret+12(FP), DX
Keith Randalla5d40242013-03-12 10:47:44 -0700913 JMP runtime·aeshashbody(SB)
914
Keith Randalld5e4c402015-01-06 16:42:48 -0800915TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12
Keith Randalla2a97682014-07-31 15:07:05 -0700916 MOVL p+0(FP), AX // ptr to string object
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300917 MOVL 4(AX), BX // length of string
Keith Randalla5d40242013-03-12 10:47:44 -0700918 MOVL (AX), AX // string data
Keith Randalld5e4c402015-01-06 16:42:48 -0800919 LEAL ret+8(FP), DX
Keith Randalla5d40242013-03-12 10:47:44 -0700920 JMP runtime·aeshashbody(SB)
921
922// AX: data
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300923// BX: length
Keith Randalld5e4c402015-01-06 16:42:48 -0800924// DX: address to put return value
925TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0
Keith Randall91059de2015-08-31 16:26:12 -0700926 MOVL h+4(FP), X0 // 32 bits of per-table hash seed
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300927 PINSRW $4, BX, X0 // 16 bits of length
Keith Randall91059de2015-08-31 16:26:12 -0700928 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times
929 MOVO X0, X1 // save unscrambled seed
930 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
931 AESENC X0, X0 // scramble seed
932
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300933 CMPL BX, $16
Keith Randall7a4a64e2014-12-10 14:20:17 -0800934 JB aes0to15
935 JE aes16
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300936 CMPL BX, $32
Keith Randall7a4a64e2014-12-10 14:20:17 -0800937 JBE aes17to32
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300938 CMPL BX, $64
Keith Randall7a4a64e2014-12-10 14:20:17 -0800939 JBE aes33to64
940 JMP aes65plus
941
942aes0to15:
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300943 TESTL BX, BX
Keith Randall7a4a64e2014-12-10 14:20:17 -0800944 JE aes0
Keith Randalla5d40242013-03-12 10:47:44 -0700945
Keith Randall7a4a64e2014-12-10 14:20:17 -0800946 ADDL $16, AX
947 TESTW $0xff0, AX
948 JE endofpage
Keith Randalla5d40242013-03-12 10:47:44 -0700949
Keith Randallee669722013-05-15 09:40:14 -0700950 // 16 bytes loaded at this address won't cross
951 // a page boundary, so we can load it directly.
Keith Randall91059de2015-08-31 16:26:12 -0700952 MOVOU -16(AX), X1
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300953 ADDL BX, BX
954 PAND masks<>(SB)(BX*8), X1
Keith Randall7a4a64e2014-12-10 14:20:17 -0800955
Keith Randall91059de2015-08-31 16:26:12 -0700956final1:
957 AESENC X0, X1 // scramble input, xor in seed
958 AESENC X1, X1 // scramble combo 2 times
959 AESENC X1, X1
960 MOVL X1, (DX)
Keith Randall7a4a64e2014-12-10 14:20:17 -0800961 RET
962
963endofpage:
Brad Fitzpatrick5fea2cc2016-03-01 23:21:55 +0000964 // address ends in 1111xxxx. Might be up against
Keith Randalla5d40242013-03-12 10:47:44 -0700965 // a page boundary, so load ending at last byte.
966 // Then shift bytes down using pshufb.
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300967 MOVOU -32(AX)(BX*1), X1
968 ADDL BX, BX
969 PSHUFB shifts<>(SB)(BX*8), X1
Keith Randall91059de2015-08-31 16:26:12 -0700970 JMP final1
Keith Randall7a4a64e2014-12-10 14:20:17 -0800971
972aes0:
Keith Randall731bdc52015-09-01 12:53:15 -0700973 // Return scrambled input seed
Keith Randall91059de2015-08-31 16:26:12 -0700974 AESENC X0, X0
975 MOVL X0, (DX)
Keith Randall7a4a64e2014-12-10 14:20:17 -0800976 RET
977
978aes16:
Keith Randall91059de2015-08-31 16:26:12 -0700979 MOVOU (AX), X1
980 JMP final1
Keith Randall7a4a64e2014-12-10 14:20:17 -0800981
982aes17to32:
Keith Randall91059de2015-08-31 16:26:12 -0700983 // make second starting seed
984 PXOR runtime·aeskeysched+16(SB), X1
985 AESENC X1, X1
986
Keith Randall7a4a64e2014-12-10 14:20:17 -0800987 // load data to be hashed
Keith Randall91059de2015-08-31 16:26:12 -0700988 MOVOU (AX), X2
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +1300989 MOVOU -16(AX)(BX*1), X3
Keith Randall7a4a64e2014-12-10 14:20:17 -0800990
991 // scramble 3 times
Keith Randall91059de2015-08-31 16:26:12 -0700992 AESENC X0, X2
993 AESENC X1, X3
994 AESENC X2, X2
995 AESENC X3, X3
996 AESENC X2, X2
997 AESENC X3, X3
Keith Randall7a4a64e2014-12-10 14:20:17 -0800998
999 // combine results
Keith Randall91059de2015-08-31 16:26:12 -07001000 PXOR X3, X2
1001 MOVL X2, (DX)
Keith Randall7a4a64e2014-12-10 14:20:17 -08001002 RET
1003
1004aes33to64:
Keith Randall91059de2015-08-31 16:26:12 -07001005 // make 3 more starting seeds
1006 MOVO X1, X2
1007 MOVO X1, X3
1008 PXOR runtime·aeskeysched+16(SB), X1
1009 PXOR runtime·aeskeysched+32(SB), X2
1010 PXOR runtime·aeskeysched+48(SB), X3
1011 AESENC X1, X1
1012 AESENC X2, X2
1013 AESENC X3, X3
Keith Randall7a4a64e2014-12-10 14:20:17 -08001014
Keith Randall91059de2015-08-31 16:26:12 -07001015 MOVOU (AX), X4
1016 MOVOU 16(AX), X5
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +13001017 MOVOU -32(AX)(BX*1), X6
1018 MOVOU -16(AX)(BX*1), X7
Keith Randall91059de2015-08-31 16:26:12 -07001019
1020 AESENC X0, X4
1021 AESENC X1, X5
1022 AESENC X2, X6
1023 AESENC X3, X7
1024
1025 AESENC X4, X4
1026 AESENC X5, X5
1027 AESENC X6, X6
1028 AESENC X7, X7
1029
1030 AESENC X4, X4
1031 AESENC X5, X5
1032 AESENC X6, X6
1033 AESENC X7, X7
Keith Randall7a4a64e2014-12-10 14:20:17 -08001034
Keith Randall91059de2015-08-31 16:26:12 -07001035 PXOR X6, X4
1036 PXOR X7, X5
1037 PXOR X5, X4
1038 MOVL X4, (DX)
Keith Randall7a4a64e2014-12-10 14:20:17 -08001039 RET
1040
1041aes65plus:
Keith Randall91059de2015-08-31 16:26:12 -07001042 // make 3 more starting seeds
1043 MOVO X1, X2
1044 MOVO X1, X3
1045 PXOR runtime·aeskeysched+16(SB), X1
1046 PXOR runtime·aeskeysched+32(SB), X2
1047 PXOR runtime·aeskeysched+48(SB), X3
1048 AESENC X1, X1
1049 AESENC X2, X2
1050 AESENC X3, X3
1051
Keith Randall7a4a64e2014-12-10 14:20:17 -08001052 // start with last (possibly overlapping) block
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +13001053 MOVOU -64(AX)(BX*1), X4
1054 MOVOU -48(AX)(BX*1), X5
1055 MOVOU -32(AX)(BX*1), X6
1056 MOVOU -16(AX)(BX*1), X7
Keith Randall7a4a64e2014-12-10 14:20:17 -08001057
1058 // scramble state once
Keith Randall91059de2015-08-31 16:26:12 -07001059 AESENC X0, X4
1060 AESENC X1, X5
1061 AESENC X2, X6
1062 AESENC X3, X7
Keith Randall7a4a64e2014-12-10 14:20:17 -08001063
1064 // compute number of remaining 64-byte blocks
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +13001065 DECL BX
1066 SHRL $6, BX
Keith Randall7a4a64e2014-12-10 14:20:17 -08001067
1068aesloop:
1069 // scramble state, xor in a block
Keith Randall91059de2015-08-31 16:26:12 -07001070 MOVOU (AX), X0
1071 MOVOU 16(AX), X1
1072 MOVOU 32(AX), X2
1073 MOVOU 48(AX), X3
1074 AESENC X0, X4
1075 AESENC X1, X5
1076 AESENC X2, X6
1077 AESENC X3, X7
Keith Randall7a4a64e2014-12-10 14:20:17 -08001078
1079 // scramble state
Keith Randall91059de2015-08-31 16:26:12 -07001080 AESENC X4, X4
1081 AESENC X5, X5
1082 AESENC X6, X6
1083 AESENC X7, X7
Keith Randall7a4a64e2014-12-10 14:20:17 -08001084
1085 ADDL $64, AX
Michael Hudson-Doyle6056cc52015-10-28 12:10:28 +13001086 DECL BX
Keith Randall7a4a64e2014-12-10 14:20:17 -08001087 JNE aesloop
1088
1089 // 2 more scrambles to finish
Keith Randall91059de2015-08-31 16:26:12 -07001090 AESENC X4, X4
1091 AESENC X5, X5
1092 AESENC X6, X6
1093 AESENC X7, X7
1094
1095 AESENC X4, X4
1096 AESENC X5, X5
1097 AESENC X6, X6
1098 AESENC X7, X7
Keith Randall7a4a64e2014-12-10 14:20:17 -08001099
Keith Randall91059de2015-08-31 16:26:12 -07001100 PXOR X6, X4
1101 PXOR X7, X5
1102 PXOR X5, X4
1103 MOVL X4, (DX)
Keith Randalla5d40242013-03-12 10:47:44 -07001104 RET
1105
Keith Randalld5e4c402015-01-06 16:42:48 -08001106TEXT runtime·aeshash32(SB),NOSPLIT,$0-12
Keith Randalla2a97682014-07-31 15:07:05 -07001107 MOVL p+0(FP), AX // ptr to data
Keith Randalld5e4c402015-01-06 16:42:48 -08001108 MOVL h+4(FP), X0 // seed
Keith Randalla5d40242013-03-12 10:47:44 -07001109 PINSRD $1, (AX), X0 // data
Keith Randalldb53d972013-03-20 14:34:26 -07001110 AESENC runtime·aeskeysched+0(SB), X0
1111 AESENC runtime·aeskeysched+16(SB), X0
Keith Randall7a4a64e2014-12-10 14:20:17 -08001112 AESENC runtime·aeskeysched+32(SB), X0
Keith Randalld5e4c402015-01-06 16:42:48 -08001113 MOVL X0, ret+8(FP)
Keith Randalla5d40242013-03-12 10:47:44 -07001114 RET
1115
Keith Randalld5e4c402015-01-06 16:42:48 -08001116TEXT runtime·aeshash64(SB),NOSPLIT,$0-12
Keith Randalla2a97682014-07-31 15:07:05 -07001117 MOVL p+0(FP), AX // ptr to data
Keith Randalla5d40242013-03-12 10:47:44 -07001118 MOVQ (AX), X0 // data
Keith Randalld5e4c402015-01-06 16:42:48 -08001119 PINSRD $2, h+4(FP), X0 // seed
Keith Randalldb53d972013-03-20 14:34:26 -07001120 AESENC runtime·aeskeysched+0(SB), X0
1121 AESENC runtime·aeskeysched+16(SB), X0
Keith Randall7a4a64e2014-12-10 14:20:17 -08001122 AESENC runtime·aeskeysched+32(SB), X0
Keith Randalld5e4c402015-01-06 16:42:48 -08001123 MOVL X0, ret+8(FP)
Keith Randalla5d40242013-03-12 10:47:44 -07001124 RET
1125
Keith Randalla5d40242013-03-12 10:47:44 -07001126// simple mask to get rid of data in the high part of the register.
Russ Cox9ddfb642013-07-16 16:24:09 -04001127DATA masks<>+0x00(SB)/4, $0x00000000
1128DATA masks<>+0x04(SB)/4, $0x00000000
1129DATA masks<>+0x08(SB)/4, $0x00000000
1130DATA masks<>+0x0c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001131
Russ Cox9ddfb642013-07-16 16:24:09 -04001132DATA masks<>+0x10(SB)/4, $0x000000ff
1133DATA masks<>+0x14(SB)/4, $0x00000000
1134DATA masks<>+0x18(SB)/4, $0x00000000
1135DATA masks<>+0x1c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001136
Russ Cox9ddfb642013-07-16 16:24:09 -04001137DATA masks<>+0x20(SB)/4, $0x0000ffff
1138DATA masks<>+0x24(SB)/4, $0x00000000
1139DATA masks<>+0x28(SB)/4, $0x00000000
1140DATA masks<>+0x2c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001141
Russ Cox9ddfb642013-07-16 16:24:09 -04001142DATA masks<>+0x30(SB)/4, $0x00ffffff
1143DATA masks<>+0x34(SB)/4, $0x00000000
1144DATA masks<>+0x38(SB)/4, $0x00000000
1145DATA masks<>+0x3c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001146
Russ Cox9ddfb642013-07-16 16:24:09 -04001147DATA masks<>+0x40(SB)/4, $0xffffffff
1148DATA masks<>+0x44(SB)/4, $0x00000000
1149DATA masks<>+0x48(SB)/4, $0x00000000
1150DATA masks<>+0x4c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001151
Russ Cox9ddfb642013-07-16 16:24:09 -04001152DATA masks<>+0x50(SB)/4, $0xffffffff
1153DATA masks<>+0x54(SB)/4, $0x000000ff
1154DATA masks<>+0x58(SB)/4, $0x00000000
1155DATA masks<>+0x5c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001156
Russ Cox9ddfb642013-07-16 16:24:09 -04001157DATA masks<>+0x60(SB)/4, $0xffffffff
1158DATA masks<>+0x64(SB)/4, $0x0000ffff
1159DATA masks<>+0x68(SB)/4, $0x00000000
1160DATA masks<>+0x6c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001161
Russ Cox9ddfb642013-07-16 16:24:09 -04001162DATA masks<>+0x70(SB)/4, $0xffffffff
1163DATA masks<>+0x74(SB)/4, $0x00ffffff
1164DATA masks<>+0x78(SB)/4, $0x00000000
1165DATA masks<>+0x7c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001166
Russ Cox9ddfb642013-07-16 16:24:09 -04001167DATA masks<>+0x80(SB)/4, $0xffffffff
1168DATA masks<>+0x84(SB)/4, $0xffffffff
1169DATA masks<>+0x88(SB)/4, $0x00000000
1170DATA masks<>+0x8c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001171
Russ Cox9ddfb642013-07-16 16:24:09 -04001172DATA masks<>+0x90(SB)/4, $0xffffffff
1173DATA masks<>+0x94(SB)/4, $0xffffffff
1174DATA masks<>+0x98(SB)/4, $0x000000ff
1175DATA masks<>+0x9c(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001176
Russ Cox9ddfb642013-07-16 16:24:09 -04001177DATA masks<>+0xa0(SB)/4, $0xffffffff
1178DATA masks<>+0xa4(SB)/4, $0xffffffff
1179DATA masks<>+0xa8(SB)/4, $0x0000ffff
1180DATA masks<>+0xac(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001181
Russ Cox9ddfb642013-07-16 16:24:09 -04001182DATA masks<>+0xb0(SB)/4, $0xffffffff
1183DATA masks<>+0xb4(SB)/4, $0xffffffff
1184DATA masks<>+0xb8(SB)/4, $0x00ffffff
1185DATA masks<>+0xbc(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001186
Russ Cox9ddfb642013-07-16 16:24:09 -04001187DATA masks<>+0xc0(SB)/4, $0xffffffff
1188DATA masks<>+0xc4(SB)/4, $0xffffffff
1189DATA masks<>+0xc8(SB)/4, $0xffffffff
1190DATA masks<>+0xcc(SB)/4, $0x00000000
Keith Randalla5d40242013-03-12 10:47:44 -07001191
Russ Cox9ddfb642013-07-16 16:24:09 -04001192DATA masks<>+0xd0(SB)/4, $0xffffffff
1193DATA masks<>+0xd4(SB)/4, $0xffffffff
1194DATA masks<>+0xd8(SB)/4, $0xffffffff
1195DATA masks<>+0xdc(SB)/4, $0x000000ff
Keith Randalla5d40242013-03-12 10:47:44 -07001196
Russ Cox9ddfb642013-07-16 16:24:09 -04001197DATA masks<>+0xe0(SB)/4, $0xffffffff
1198DATA masks<>+0xe4(SB)/4, $0xffffffff
1199DATA masks<>+0xe8(SB)/4, $0xffffffff
1200DATA masks<>+0xec(SB)/4, $0x0000ffff
Keith Randalla5d40242013-03-12 10:47:44 -07001201
Russ Cox9ddfb642013-07-16 16:24:09 -04001202DATA masks<>+0xf0(SB)/4, $0xffffffff
1203DATA masks<>+0xf4(SB)/4, $0xffffffff
1204DATA masks<>+0xf8(SB)/4, $0xffffffff
1205DATA masks<>+0xfc(SB)/4, $0x00ffffff
Keith Randalla5d40242013-03-12 10:47:44 -07001206
Keith Randall5a546962013-08-07 10:23:24 -07001207GLOBL masks<>(SB),RODATA,$256
Keith Randalla5d40242013-03-12 10:47:44 -07001208
Brad Fitzpatrick5fea2cc2016-03-01 23:21:55 +00001209// these are arguments to pshufb. They move data down from
Russ Cox9ddfb642013-07-16 16:24:09 -04001210// the high bytes of the register to the low bytes of the register.
1211// index is how many bytes to move.
1212DATA shifts<>+0x00(SB)/4, $0x00000000
1213DATA shifts<>+0x04(SB)/4, $0x00000000
1214DATA shifts<>+0x08(SB)/4, $0x00000000
1215DATA shifts<>+0x0c(SB)/4, $0x00000000
1216
1217DATA shifts<>+0x10(SB)/4, $0xffffff0f
1218DATA shifts<>+0x14(SB)/4, $0xffffffff
1219DATA shifts<>+0x18(SB)/4, $0xffffffff
1220DATA shifts<>+0x1c(SB)/4, $0xffffffff
1221
1222DATA shifts<>+0x20(SB)/4, $0xffff0f0e
1223DATA shifts<>+0x24(SB)/4, $0xffffffff
1224DATA shifts<>+0x28(SB)/4, $0xffffffff
1225DATA shifts<>+0x2c(SB)/4, $0xffffffff
1226
1227DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
1228DATA shifts<>+0x34(SB)/4, $0xffffffff
1229DATA shifts<>+0x38(SB)/4, $0xffffffff
1230DATA shifts<>+0x3c(SB)/4, $0xffffffff
1231
1232DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
1233DATA shifts<>+0x44(SB)/4, $0xffffffff
1234DATA shifts<>+0x48(SB)/4, $0xffffffff
1235DATA shifts<>+0x4c(SB)/4, $0xffffffff
1236
1237DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
1238DATA shifts<>+0x54(SB)/4, $0xffffff0f
1239DATA shifts<>+0x58(SB)/4, $0xffffffff
1240DATA shifts<>+0x5c(SB)/4, $0xffffffff
1241
1242DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
1243DATA shifts<>+0x64(SB)/4, $0xffff0f0e
1244DATA shifts<>+0x68(SB)/4, $0xffffffff
1245DATA shifts<>+0x6c(SB)/4, $0xffffffff
1246
1247DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
1248DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
1249DATA shifts<>+0x78(SB)/4, $0xffffffff
1250DATA shifts<>+0x7c(SB)/4, $0xffffffff
1251
1252DATA shifts<>+0x80(SB)/4, $0x0b0a0908
1253DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
1254DATA shifts<>+0x88(SB)/4, $0xffffffff
1255DATA shifts<>+0x8c(SB)/4, $0xffffffff
1256
1257DATA shifts<>+0x90(SB)/4, $0x0a090807
1258DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
1259DATA shifts<>+0x98(SB)/4, $0xffffff0f
1260DATA shifts<>+0x9c(SB)/4, $0xffffffff
1261
1262DATA shifts<>+0xa0(SB)/4, $0x09080706
1263DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
1264DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
1265DATA shifts<>+0xac(SB)/4, $0xffffffff
1266
1267DATA shifts<>+0xb0(SB)/4, $0x08070605
1268DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
1269DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
1270DATA shifts<>+0xbc(SB)/4, $0xffffffff
1271
1272DATA shifts<>+0xc0(SB)/4, $0x07060504
1273DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
1274DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
1275DATA shifts<>+0xcc(SB)/4, $0xffffffff
1276
1277DATA shifts<>+0xd0(SB)/4, $0x06050403
1278DATA shifts<>+0xd4(SB)/4, $0x0a090807
1279DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
1280DATA shifts<>+0xdc(SB)/4, $0xffffff0f
1281
1282DATA shifts<>+0xe0(SB)/4, $0x05040302
1283DATA shifts<>+0xe4(SB)/4, $0x09080706
1284DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
1285DATA shifts<>+0xec(SB)/4, $0xffff0f0e
1286
1287DATA shifts<>+0xf0(SB)/4, $0x04030201
1288DATA shifts<>+0xf4(SB)/4, $0x08070605
1289DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
1290DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
1291
Keith Randall5a546962013-08-07 10:23:24 -07001292GLOBL shifts<>(SB),RODATA,$256
Russ Cox9ddfb642013-07-16 16:24:09 -04001293
Shenghou Ma3583a442015-09-03 02:44:26 -04001294TEXT ·checkASM(SB),NOSPLIT,$0-1
1295 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1296 MOVL $masks<>(SB), AX
1297 MOVL $shifts<>(SB), BX
1298 ORL BX, AX
1299 TESTL $15, AX
1300 SETEQ ret+0(FP)
1301 RET
1302
Keith Randallbd70bd92016-02-22 13:20:38 -08001303// memequal(p, q unsafe.Pointer, size uintptr) bool
1304TEXT runtime·memequal(SB),NOSPLIT,$0-13
Keith Randall0c6b55e2014-07-16 14:16:19 -07001305 MOVL a+0(FP), SI
1306 MOVL b+4(FP), DI
Keith Randallbd70bd92016-02-22 13:20:38 -08001307 CMPL SI, DI
1308 JEQ eq
Keith Randall0c6b55e2014-07-16 14:16:19 -07001309 MOVL size+8(FP), BX
Keith Randallc526f3a2015-04-21 14:22:41 -07001310 LEAL ret+12(FP), AX
1311 JMP runtime·memeqbody(SB)
Keith Randallbd70bd92016-02-22 13:20:38 -08001312eq:
1313 MOVB $1, ret+12(FP)
1314 RET
Keith Randall0c6b55e2014-07-16 14:16:19 -07001315
Keith Randalld5e4c402015-01-06 16:42:48 -08001316// memequal_varlen(a, b unsafe.Pointer) bool
1317TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
1318 MOVL a+0(FP), SI
1319 MOVL b+4(FP), DI
1320 CMPL SI, DI
1321 JEQ eq
1322 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure
Keith Randallc526f3a2015-04-21 14:22:41 -07001323 LEAL ret+8(FP), AX
1324 JMP runtime·memeqbody(SB)
Keith Randalld5e4c402015-01-06 16:42:48 -08001325eq:
1326 MOVB $1, ret+8(FP)
1327 RET
1328
Keith Randallb36ed902014-06-16 21:00:37 -07001329// eqstring tests whether two strings are equal.
Josh Bleecher Snyder135ef492015-02-04 17:31:37 -08001330// The compiler guarantees that strings passed
1331// to eqstring have equal length.
Keith Randallb36ed902014-06-16 21:00:37 -07001332// See runtime_test.go:eqstring_generic for
Josh Bleecher Snyder339a24d2014-08-19 08:50:35 -07001333// equivalent Go code.
Keith Randallb36ed902014-06-16 21:00:37 -07001334TEXT runtime·eqstring(SB),NOSPLIT,$0-17
Josh Bleecher Snyder71ab9fa2016-07-11 16:05:57 -07001335 MOVL s1_base+0(FP), SI
1336 MOVL s2_base+8(FP), DI
Keith Randallb36ed902014-06-16 21:00:37 -07001337 CMPL SI, DI
1338 JEQ same
Josh Bleecher Snyder71ab9fa2016-07-11 16:05:57 -07001339 MOVL s1_len+4(FP), BX
1340 LEAL ret+16(FP), AX
Keith Randallc526f3a2015-04-21 14:22:41 -07001341 JMP runtime·memeqbody(SB)
Keith Randallb36ed902014-06-16 21:00:37 -07001342same:
Josh Bleecher Snyder71ab9fa2016-07-11 16:05:57 -07001343 MOVB $1, ret+16(FP)
Keith Randallb36ed902014-06-16 21:00:37 -07001344 RET
Keith Randallb36ed902014-06-16 21:00:37 -07001345
Keith Randall5a546962013-08-07 10:23:24 -07001346TEXT bytes·Equal(SB),NOSPLIT,$0-25
Keith Randall3d5daa22013-04-02 16:26:15 -07001347 MOVL a_len+4(FP), BX
1348 MOVL b_len+16(FP), CX
Keith Randall3d5daa22013-04-02 16:26:15 -07001349 CMPL BX, CX
1350 JNE eqret
1351 MOVL a+0(FP), SI
1352 MOVL b+12(FP), DI
Keith Randallc526f3a2015-04-21 14:22:41 -07001353 LEAL ret+24(FP), AX
1354 JMP runtime·memeqbody(SB)
Keith Randall3d5daa22013-04-02 16:26:15 -07001355eqret:
Keith Randallc526f3a2015-04-21 14:22:41 -07001356 MOVB $0, ret+24(FP)
Keith Randall3d5daa22013-04-02 16:26:15 -07001357 RET
1358
1359// a in SI
1360// b in DI
1361// count in BX
Keith Randallc526f3a2015-04-21 14:22:41 -07001362// address of result byte in AX
Keith Randall5a546962013-08-07 10:23:24 -07001363TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
Keith Randall3d5daa22013-04-02 16:26:15 -07001364 CMPL BX, $4
1365 JB small
1366
1367 // 64 bytes at a time using xmm registers
1368hugeloop:
1369 CMPL BX, $64
1370 JB bigloop
Martin Möhrmann5a6c5802017-04-27 08:30:27 +02001371 CMPB runtime·support_sse2(SB), $1
1372 JNE bigloop
Keith Randall3d5daa22013-04-02 16:26:15 -07001373 MOVOU (SI), X0
1374 MOVOU (DI), X1
1375 MOVOU 16(SI), X2
1376 MOVOU 16(DI), X3
1377 MOVOU 32(SI), X4
1378 MOVOU 32(DI), X5
1379 MOVOU 48(SI), X6
1380 MOVOU 48(DI), X7
1381 PCMPEQB X1, X0
1382 PCMPEQB X3, X2
1383 PCMPEQB X5, X4
1384 PCMPEQB X7, X6
1385 PAND X2, X0
1386 PAND X6, X4
1387 PAND X4, X0
1388 PMOVMSKB X0, DX
1389 ADDL $64, SI
1390 ADDL $64, DI
1391 SUBL $64, BX
1392 CMPL DX, $0xffff
1393 JEQ hugeloop
Keith Randallc526f3a2015-04-21 14:22:41 -07001394 MOVB $0, (AX)
Keith Randall3d5daa22013-04-02 16:26:15 -07001395 RET
1396
1397 // 4 bytes at a time using 32-bit register
1398bigloop:
1399 CMPL BX, $4
1400 JBE leftover
1401 MOVL (SI), CX
1402 MOVL (DI), DX
1403 ADDL $4, SI
1404 ADDL $4, DI
1405 SUBL $4, BX
1406 CMPL CX, DX
1407 JEQ bigloop
Keith Randallc526f3a2015-04-21 14:22:41 -07001408 MOVB $0, (AX)
Keith Randall3d5daa22013-04-02 16:26:15 -07001409 RET
1410
1411 // remaining 0-4 bytes
1412leftover:
1413 MOVL -4(SI)(BX*1), CX
1414 MOVL -4(DI)(BX*1), DX
1415 CMPL CX, DX
Keith Randallc526f3a2015-04-21 14:22:41 -07001416 SETEQ (AX)
Keith Randall3d5daa22013-04-02 16:26:15 -07001417 RET
1418
1419small:
1420 CMPL BX, $0
1421 JEQ equal
1422
1423 LEAL 0(BX*8), CX
1424 NEGL CX
1425
1426 MOVL SI, DX
1427 CMPB DX, $0xfc
1428 JA si_high
1429
1430 // load at SI won't cross a page boundary.
1431 MOVL (SI), SI
1432 JMP si_finish
1433si_high:
Brad Fitzpatrick5fea2cc2016-03-01 23:21:55 +00001434 // address ends in 111111xx. Load up to bytes we want, move to correct position.
Keith Randall3d5daa22013-04-02 16:26:15 -07001435 MOVL -4(SI)(BX*1), SI
1436 SHRL CX, SI
1437si_finish:
1438
1439 // same for DI.
1440 MOVL DI, DX
1441 CMPB DX, $0xfc
1442 JA di_high
1443 MOVL (DI), DI
1444 JMP di_finish
1445di_high:
1446 MOVL -4(DI)(BX*1), DI
1447 SHRL CX, DI
1448di_finish:
1449
1450 SUBL SI, DI
1451 SHLL CX, DI
1452equal:
Keith Randallc526f3a2015-04-21 14:22:41 -07001453 SETEQ (AX)
Keith Randall3d5daa22013-04-02 16:26:15 -07001454 RET
Keith Randallb3946dc2013-05-14 16:05:51 -07001455
Keith Randall5a546962013-08-07 10:23:24 -07001456TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
Russ Cox25f6b022014-08-27 11:32:17 -04001457 MOVL s1_base+0(FP), SI
1458 MOVL s1_len+4(FP), BX
1459 MOVL s2_base+8(FP), DI
1460 MOVL s2_len+12(FP), DX
Keith Randallc526f3a2015-04-21 14:22:41 -07001461 LEAL ret+16(FP), AX
1462 JMP runtime·cmpbody(SB)
Keith Randallb3946dc2013-05-14 16:05:51 -07001463
Russ Cox7a524a12014-12-22 13:27:53 -05001464TEXT bytes·Compare(SB),NOSPLIT,$0-28
Keith Randallb3946dc2013-05-14 16:05:51 -07001465 MOVL s1+0(FP), SI
1466 MOVL s1+4(FP), BX
1467 MOVL s2+12(FP), DI
1468 MOVL s2+16(FP), DX
Keith Randallc526f3a2015-04-21 14:22:41 -07001469 LEAL ret+24(FP), AX
1470 JMP runtime·cmpbody(SB)
Keith Randallb3946dc2013-05-14 16:05:51 -07001471
Shenghou Ma3b001972015-03-07 00:18:16 -05001472TEXT bytes·IndexByte(SB),NOSPLIT,$0-20
Brad Fitzpatricke2a1bd62013-08-01 16:11:19 -07001473 MOVL s+0(FP), SI
1474 MOVL s_len+4(FP), CX
1475 MOVB c+12(FP), AL
1476 MOVL SI, DI
1477 CLD; REPN; SCASB
1478 JZ 3(PC)
1479 MOVL $-1, ret+16(FP)
1480 RET
1481 SUBL SI, DI
1482 SUBL $1, DI
1483 MOVL DI, ret+16(FP)
1484 RET
1485
Shenghou Ma3b001972015-03-07 00:18:16 -05001486TEXT strings·IndexByte(SB),NOSPLIT,$0-16
Brad Fitzpatrick598c7892013-08-05 15:04:05 -07001487 MOVL s+0(FP), SI
1488 MOVL s_len+4(FP), CX
1489 MOVB c+8(FP), AL
1490 MOVL SI, DI
1491 CLD; REPN; SCASB
1492 JZ 3(PC)
1493 MOVL $-1, ret+12(FP)
1494 RET
1495 SUBL SI, DI
1496 SUBL $1, DI
1497 MOVL DI, ret+12(FP)
1498 RET
1499
Keith Randallb3946dc2013-05-14 16:05:51 -07001500// input:
1501// SI = a
1502// DI = b
1503// BX = alen
1504// DX = blen
Keith Randallc526f3a2015-04-21 14:22:41 -07001505// AX = address of return word (set to 1/0/-1)
Keith Randall5a546962013-08-07 10:23:24 -07001506TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
Keith Randallc526f3a2015-04-21 14:22:41 -07001507 MOVL DX, BP
1508 SUBL BX, DX // DX = blen-alen
Keith Randall8c9ef9d2016-01-13 13:09:46 -08001509 JLE 2(PC)
1510 MOVL BX, BP // BP = min(alen, blen)
Keith Randallb3946dc2013-05-14 16:05:51 -07001511 CMPL SI, DI
Russ Coxb55791e2014-10-28 21:50:16 -04001512 JEQ allsame
Keith Randallb3946dc2013-05-14 16:05:51 -07001513 CMPL BP, $4
Russ Coxb55791e2014-10-28 21:50:16 -04001514 JB small
Martin Möhrmann5a6c5802017-04-27 08:30:27 +02001515 CMPB runtime·support_sse2(SB), $1
1516 JNE mediumloop
Russ Coxb55791e2014-10-28 21:50:16 -04001517largeloop:
Keith Randallb3946dc2013-05-14 16:05:51 -07001518 CMPL BP, $16
Russ Coxb55791e2014-10-28 21:50:16 -04001519 JB mediumloop
Keith Randallb3946dc2013-05-14 16:05:51 -07001520 MOVOU (SI), X0
1521 MOVOU (DI), X1
1522 PCMPEQB X0, X1
Keith Randallc526f3a2015-04-21 14:22:41 -07001523 PMOVMSKB X1, BX
1524 XORL $0xffff, BX // convert EQ to NE
Russ Coxb55791e2014-10-28 21:50:16 -04001525 JNE diff16 // branch if at least one byte is not equal
Keith Randallb3946dc2013-05-14 16:05:51 -07001526 ADDL $16, SI
1527 ADDL $16, DI
1528 SUBL $16, BP
Russ Coxb55791e2014-10-28 21:50:16 -04001529 JMP largeloop
Keith Randallb3946dc2013-05-14 16:05:51 -07001530
Russ Coxb55791e2014-10-28 21:50:16 -04001531diff16:
Keith Randallc526f3a2015-04-21 14:22:41 -07001532 BSFL BX, BX // index of first byte that differs
1533 XORL DX, DX
Keith Randallb3946dc2013-05-14 16:05:51 -07001534 MOVB (SI)(BX*1), CX
1535 CMPB CX, (DI)(BX*1)
Keith Randallc526f3a2015-04-21 14:22:41 -07001536 SETHI DX
1537 LEAL -1(DX*2), DX // convert 1/0 to +1/-1
1538 MOVL DX, (AX)
Keith Randallb3946dc2013-05-14 16:05:51 -07001539 RET
1540
Russ Coxb55791e2014-10-28 21:50:16 -04001541mediumloop:
Keith Randallb3946dc2013-05-14 16:05:51 -07001542 CMPL BP, $4
Russ Coxb55791e2014-10-28 21:50:16 -04001543 JBE _0through4
Keith Randallc526f3a2015-04-21 14:22:41 -07001544 MOVL (SI), BX
Keith Randallb3946dc2013-05-14 16:05:51 -07001545 MOVL (DI), CX
Keith Randallc526f3a2015-04-21 14:22:41 -07001546 CMPL BX, CX
Russ Coxb55791e2014-10-28 21:50:16 -04001547 JNE diff4
Keith Randallb3946dc2013-05-14 16:05:51 -07001548 ADDL $4, SI
1549 ADDL $4, DI
1550 SUBL $4, BP
Russ Coxb55791e2014-10-28 21:50:16 -04001551 JMP mediumloop
Keith Randallb3946dc2013-05-14 16:05:51 -07001552
Russ Coxb55791e2014-10-28 21:50:16 -04001553_0through4:
Keith Randallc526f3a2015-04-21 14:22:41 -07001554 MOVL -4(SI)(BP*1), BX
Keith Randallb3946dc2013-05-14 16:05:51 -07001555 MOVL -4(DI)(BP*1), CX
Keith Randallc526f3a2015-04-21 14:22:41 -07001556 CMPL BX, CX
Russ Coxb55791e2014-10-28 21:50:16 -04001557 JEQ allsame
Keith Randallb3946dc2013-05-14 16:05:51 -07001558
Russ Coxb55791e2014-10-28 21:50:16 -04001559diff4:
Keith Randallc526f3a2015-04-21 14:22:41 -07001560 BSWAPL BX // reverse order of bytes
Keith Randallb3946dc2013-05-14 16:05:51 -07001561 BSWAPL CX
Keith Randallc526f3a2015-04-21 14:22:41 -07001562 XORL BX, CX // find bit differences
Keith Randallb3946dc2013-05-14 16:05:51 -07001563 BSRL CX, CX // index of highest bit difference
Keith Randallc526f3a2015-04-21 14:22:41 -07001564 SHRL CX, BX // move a's bit to bottom
1565 ANDL $1, BX // mask bit
1566 LEAL -1(BX*2), BX // 1/0 => +1/-1
1567 MOVL BX, (AX)
Keith Randallb3946dc2013-05-14 16:05:51 -07001568 RET
1569
1570 // 0-3 bytes in common
Russ Coxb55791e2014-10-28 21:50:16 -04001571small:
Keith Randallb3946dc2013-05-14 16:05:51 -07001572 LEAL (BP*8), CX
1573 NEGL CX
Russ Coxb55791e2014-10-28 21:50:16 -04001574 JEQ allsame
Keith Randallb3946dc2013-05-14 16:05:51 -07001575
1576 // load si
1577 CMPB SI, $0xfc
Russ Coxb55791e2014-10-28 21:50:16 -04001578 JA si_high
Keith Randallb3946dc2013-05-14 16:05:51 -07001579 MOVL (SI), SI
Russ Coxb55791e2014-10-28 21:50:16 -04001580 JMP si_finish
1581si_high:
Keith Randallb3946dc2013-05-14 16:05:51 -07001582 MOVL -4(SI)(BP*1), SI
1583 SHRL CX, SI
Russ Coxb55791e2014-10-28 21:50:16 -04001584si_finish:
Keith Randallb3946dc2013-05-14 16:05:51 -07001585 SHLL CX, SI
1586
1587 // same for di
1588 CMPB DI, $0xfc
Russ Coxb55791e2014-10-28 21:50:16 -04001589 JA di_high
Keith Randallb3946dc2013-05-14 16:05:51 -07001590 MOVL (DI), DI
Russ Coxb55791e2014-10-28 21:50:16 -04001591 JMP di_finish
1592di_high:
Keith Randallb3946dc2013-05-14 16:05:51 -07001593 MOVL -4(DI)(BP*1), DI
1594 SHRL CX, DI
Russ Coxb55791e2014-10-28 21:50:16 -04001595di_finish:
Keith Randallb3946dc2013-05-14 16:05:51 -07001596 SHLL CX, DI
1597
1598 BSWAPL SI // reverse order of bytes
1599 BSWAPL DI
1600 XORL SI, DI // find bit differences
Russ Coxb55791e2014-10-28 21:50:16 -04001601 JEQ allsame
Keith Randallb3946dc2013-05-14 16:05:51 -07001602 BSRL DI, CX // index of highest bit difference
1603 SHRL CX, SI // move a's bit to bottom
1604 ANDL $1, SI // mask bit
Keith Randallc526f3a2015-04-21 14:22:41 -07001605 LEAL -1(SI*2), BX // 1/0 => +1/-1
1606 MOVL BX, (AX)
Keith Randallb3946dc2013-05-14 16:05:51 -07001607 RET
1608
1609 // all the bytes in common are the same, so we just need
1610 // to compare the lengths.
Russ Coxb55791e2014-10-28 21:50:16 -04001611allsame:
Keith Randallc526f3a2015-04-21 14:22:41 -07001612 XORL BX, BX
Keith Randallb3946dc2013-05-14 16:05:51 -07001613 XORL CX, CX
Keith Randallc526f3a2015-04-21 14:22:41 -07001614 TESTL DX, DX
1615 SETLT BX // 1 if alen > blen
Keith Randallb3946dc2013-05-14 16:05:51 -07001616 SETEQ CX // 1 if alen == blen
Keith Randallc526f3a2015-04-21 14:22:41 -07001617 LEAL -1(CX)(BX*2), BX // 1,0,-1 result
1618 MOVL BX, (AX)
Keith Randallb3946dc2013-05-14 16:05:51 -07001619 RET
Keith Randall6c7cbf02014-04-01 12:51:02 -07001620
Keith Randallf4407372014-09-03 08:49:43 -07001621TEXT runtime·return0(SB), NOSPLIT, $0
1622 MOVL $0, AX
1623 RET
Keith Randall1b6807b2014-09-25 07:59:01 -07001624
1625// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1626// Must obey the gcc calling convention.
Keith Randall1aa65fe2014-09-25 08:37:04 -07001627TEXT _cgo_topofstack(SB),NOSPLIT,$0
Keith Randall1b6807b2014-09-25 07:59:01 -07001628 get_tls(CX)
1629 MOVL g(CX), AX
1630 MOVL g_m(AX), AX
1631 MOVL m_curg(AX), AX
1632 MOVL (g_stack+stack_hi)(AX), AX
1633 RET
Russ Coxa5a07332014-10-29 20:37:44 -04001634
1635// The top-most function running on a goroutine
1636// returns to goexit+PCQuantum.
1637TEXT runtime·goexit(SB),NOSPLIT,$0-0
1638 BYTE $0x90 // NOP
1639 CALL runtime·goexit1(SB) // does not return
Dmitry Vyukov894024f2015-02-20 20:07:02 +03001640 // traceback from goexit1 must hit code range of goexit
1641 BYTE $0x90 // NOP
Russ Cox15ced2d2014-11-11 17:06:22 -05001642
Shenghou Ma0adf6dc2015-10-17 18:21:44 -04001643// Prefetching doesn't seem to help.
Russ Cox8c3f6402014-11-21 15:57:10 -05001644TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
Russ Cox8c3f6402014-11-21 15:57:10 -05001645 RET
1646
1647TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
Russ Cox8c3f6402014-11-21 15:57:10 -05001648 RET
1649
Russ Cox8c3f6402014-11-21 15:57:10 -05001650TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
Russ Cox8c3f6402014-11-21 15:57:10 -05001651 RET
1652
1653TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
Russ Cox8c3f6402014-11-21 15:57:10 -05001654 RET
Michael Hudson-Doyle09d7de82015-10-28 12:15:43 +13001655
Brad Fitzpatrick5fea2cc2016-03-01 23:21:55 +00001656// Add a module's moduledata to the linked list of moduledata objects. This
Michael Hudson-Doyle09d7de82015-10-28 12:15:43 +13001657// is called from .init_array by a function generated in the linker and so
1658// follows the platform ABI wrt register preservation -- it only touches AX,
1659// CX (implicitly) and DX, but it does not follow the ABI wrt arguments:
1660// instead the pointer to the moduledata is passed in AX.
1661TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1662 MOVL runtime·lastmoduledatap(SB), DX
1663 MOVL AX, moduledata_next(DX)
1664 MOVL AX, runtime·lastmoduledatap(SB)
1665 RET
Keith Randalldf2f8132016-07-21 10:37:59 -07001666
Keith Randallc069bc42016-07-26 11:51:33 -07001667TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12
Keith Randalldf2f8132016-07-21 10:37:59 -07001668 MOVL a+0(FP), AX
Keith Randallc069bc42016-07-26 11:51:33 -07001669 MOVL AX, 0(SP)
1670 MOVL $0, 4(SP)
1671 FMOVV 0(SP), F0
1672 FMOVDP F0, ret+4(FP)
Keith Randalldf2f8132016-07-21 10:37:59 -07001673 RET
1674
Keith Randallc069bc42016-07-26 11:51:33 -07001675TEXT runtime·float64touint32(SB),NOSPLIT,$12-12
1676 FMOVD a+0(FP), F0
1677 FSTCW 0(SP)
1678 FLDCW runtime·controlWord64trunc(SB)
1679 FMOVVP F0, 4(SP)
1680 FLDCW 0(SP)
1681 MOVL 4(SP), AX
Keith Randalldf2f8132016-07-21 10:37:59 -07001682 MOVL AX, ret+8(FP)
1683 RET