blob: 972cbe3f8a3714411d0c7df9362a91764e829e51 [file] [log] [blame]
Russ Coxec892be2013-02-22 13:06:43 -05001// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build race
6
Russ Cox15ced2d2014-11-11 17:06:22 -05007#include "go_asm.h"
8#include "go_tls.h"
Dmitriy Vyukova1695d22014-03-06 23:48:30 +04009#include "funcdata.h"
Russ Coxcb040d52014-09-04 23:05:18 -040010#include "textflag.h"
Keith Randall0273dc12013-08-07 12:20:05 -070011
Dmitriy Vyukova1695d22014-03-06 23:48:30 +040012// The following thunks allow calling the gcc-compiled race runtime directly
13// from Go code without going all the way through cgo.
14// First, it's much faster (up to 50% speedup for real Go programs).
15// Second, it eliminates race-related special cases from cgocall and scheduler.
16// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
17
18// A brief recap of the amd64 calling convention.
19// Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack.
20// Callee-saved registers are: BX, BP, R12-R15.
21// SP must be 16-byte aligned.
22// On Windows:
23// Arguments are passed in CX, DX, R8, R9, the rest is on stack.
24// Callee-saved registers are: BX, BP, DI, SI, R12-R15.
25// SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments:
26// http://msdn.microsoft.com/en-us/library/ms235286.aspx
27// We do not do this, because it seems to be intended for vararg/unprototyped functions.
28// Gcc-compiled race runtime does not try to use that space.
29
30#ifdef GOOS_windows
31#define RARG0 CX
32#define RARG1 DX
33#define RARG2 R8
34#define RARG3 R9
35#else
36#define RARG0 DI
37#define RARG1 SI
38#define RARG2 DX
39#define RARG3 CX
40#endif
41
42// func runtime·raceread(addr uintptr)
43// Called from instrumented code.
44TEXT runtime·raceread(SB), NOSPLIT, $0-8
45 MOVQ addr+0(FP), RARG1
46 MOVQ (SP), RARG2
47 // void __tsan_read(ThreadState *thr, void *addr, void *pc);
48 MOVQ $__tsan_read(SB), AX
49 JMP racecalladdr<>(SB)
50
51// func runtime·RaceRead(addr uintptr)
52TEXT runtime·RaceRead(SB), NOSPLIT, $0-8
53 // This needs to be a tail call, because raceread reads caller pc.
54 JMP runtime·raceread(SB)
55
56// void runtime·racereadpc(void *addr, void *callpc, void *pc)
57TEXT runtime·racereadpc(SB), NOSPLIT, $0-24
58 MOVQ addr+0(FP), RARG1
59 MOVQ callpc+8(FP), RARG2
60 MOVQ pc+16(FP), RARG3
61 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
62 MOVQ $__tsan_read_pc(SB), AX
63 JMP racecalladdr<>(SB)
64
65// func runtime·racewrite(addr uintptr)
66// Called from instrumented code.
67TEXT runtime·racewrite(SB), NOSPLIT, $0-8
68 MOVQ addr+0(FP), RARG1
69 MOVQ (SP), RARG2
70 // void __tsan_write(ThreadState *thr, void *addr, void *pc);
71 MOVQ $__tsan_write(SB), AX
72 JMP racecalladdr<>(SB)
73
74// func runtime·RaceWrite(addr uintptr)
75TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8
76 // This needs to be a tail call, because racewrite reads caller pc.
77 JMP runtime·racewrite(SB)
78
79// void runtime·racewritepc(void *addr, void *callpc, void *pc)
80TEXT runtime·racewritepc(SB), NOSPLIT, $0-24
81 MOVQ addr+0(FP), RARG1
82 MOVQ callpc+8(FP), RARG2
Dmitriy Vyukovf1672972014-09-01 08:04:33 -040083 MOVQ pc+16(FP), RARG3
Dmitriy Vyukova1695d22014-03-06 23:48:30 +040084 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
85 MOVQ $__tsan_write_pc(SB), AX
86 JMP racecalladdr<>(SB)
87
88// func runtime·racereadrange(addr, size uintptr)
89// Called from instrumented code.
90TEXT runtime·racereadrange(SB), NOSPLIT, $0-16
91 MOVQ addr+0(FP), RARG1
92 MOVQ size+8(FP), RARG2
93 MOVQ (SP), RARG3
94 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
95 MOVQ $__tsan_read_range(SB), AX
96 JMP racecalladdr<>(SB)
97
98// func runtime·RaceReadRange(addr, size uintptr)
99TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16
100 // This needs to be a tail call, because racereadrange reads caller pc.
101 JMP runtime·racereadrange(SB)
102
103// void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
104TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24
105 MOVQ addr+0(FP), RARG1
106 MOVQ size+8(FP), RARG2
107 MOVQ pc+16(FP), RARG3
108 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
109 MOVQ $__tsan_read_range(SB), AX
110 JMP racecalladdr<>(SB)
111
112// func runtime·racewriterange(addr, size uintptr)
113// Called from instrumented code.
114TEXT runtime·racewriterange(SB), NOSPLIT, $0-16
115 MOVQ addr+0(FP), RARG1
116 MOVQ size+8(FP), RARG2
117 MOVQ (SP), RARG3
118 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
119 MOVQ $__tsan_write_range(SB), AX
120 JMP racecalladdr<>(SB)
121
122// func runtime·RaceWriteRange(addr, size uintptr)
123TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16
124 // This needs to be a tail call, because racewriterange reads caller pc.
125 JMP runtime·racewriterange(SB)
126
127// void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
128TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24
129 MOVQ addr+0(FP), RARG1
130 MOVQ size+8(FP), RARG2
131 MOVQ pc+16(FP), RARG3
132 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
133 MOVQ $__tsan_write_range(SB), AX
134 JMP racecalladdr<>(SB)
135
136// If addr (RARG1) is out of range, do nothing.
137// Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
138TEXT racecalladdr<>(SB), NOSPLIT, $0-0
139 get_tls(R12)
140 MOVQ g(R12), R14
141 MOVQ g_racectx(R14), RARG0 // goroutine context
Dmitriy Vyukov2b3f3792014-11-20 09:51:02 -0500142 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
Dmitriy Vyukova1695d22014-03-06 23:48:30 +0400143 CMPQ RARG1, runtime·racearenastart(SB)
Russ Coxb55791e2014-10-28 21:50:16 -0400144 JB data
Dmitriy Vyukova1695d22014-03-06 23:48:30 +0400145 CMPQ RARG1, runtime·racearenaend(SB)
Russ Coxb55791e2014-10-28 21:50:16 -0400146 JB call
147data:
Dmitriy Vyukov2b3f3792014-11-20 09:51:02 -0500148 CMPQ RARG1, runtime·racedatastart(SB)
Russ Coxb55791e2014-10-28 21:50:16 -0400149 JB ret
Dmitriy Vyukov2b3f3792014-11-20 09:51:02 -0500150 CMPQ RARG1, runtime·racedataend(SB)
Russ Coxb55791e2014-10-28 21:50:16 -0400151 JAE ret
152call:
Dmitriy Vyukova1695d22014-03-06 23:48:30 +0400153 MOVQ AX, AX // w/o this 6a miscompiles this function
154 JMP racecall<>(SB)
Russ Coxb55791e2014-10-28 21:50:16 -0400155ret:
Dmitriy Vyukova1695d22014-03-06 23:48:30 +0400156 RET
157
Rémy Oudomphenge2f9e812013-02-28 07:32:29 +0100158// func runtime·racefuncenter(pc uintptr)
Dmitriy Vyukova1695d22014-03-06 23:48:30 +0400159// Called from instrumented code.
160TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8
161 MOVQ DX, R15 // save function entry context (for closures)
162 get_tls(R12)
163 MOVQ g(R12), R14
164 MOVQ g_racectx(R14), RARG0 // goroutine context
165 MOVQ callpc+0(FP), RARG1
166 // void __tsan_func_enter(ThreadState *thr, void *pc);
167 MOVQ $__tsan_func_enter(SB), AX
Dmitriy Vyukov2b3f3792014-11-20 09:51:02 -0500168 // racecall<> preserves R15
Dmitriy Vyukova1695d22014-03-06 23:48:30 +0400169 CALL racecall<>(SB)
170 MOVQ R15, DX // restore function entry context
171 RET
172
173// func runtime·racefuncexit()
174// Called from instrumented code.
175TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0
176 get_tls(R12)
177 MOVQ g(R12), R14
178 MOVQ g_racectx(R14), RARG0 // goroutine context
179 // void __tsan_func_exit(ThreadState *thr);
180 MOVQ $__tsan_func_exit(SB), AX
181 JMP racecall<>(SB)
182
Dmitriy Vyukovf1672972014-09-01 08:04:33 -0400183// Atomic operations for sync/atomic package.
184
185// Load
186TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-0
187 MOVQ $__tsan_go_atomic32_load(SB), AX
188 CALL racecallatomic<>(SB)
189 RET
190
191TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-0
192 MOVQ $__tsan_go_atomic64_load(SB), AX
193 CALL racecallatomic<>(SB)
194 RET
195
196TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-0
197 JMP sync∕atomic·LoadInt32(SB)
198
199TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-0
200 JMP sync∕atomic·LoadInt64(SB)
201
202TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-0
203 JMP sync∕atomic·LoadInt64(SB)
204
Shenghou Ma5f179c72015-01-06 20:40:16 -0500205TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-0
206 JMP sync∕atomic·LoadInt64(SB)
207
Dmitriy Vyukovf1672972014-09-01 08:04:33 -0400208// Store
209TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-0
210 MOVQ $__tsan_go_atomic32_store(SB), AX
211 CALL racecallatomic<>(SB)
212 RET
213
214TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-0
215 MOVQ $__tsan_go_atomic64_store(SB), AX
216 CALL racecallatomic<>(SB)
217 RET
218
219TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-0
220 JMP sync∕atomic·StoreInt32(SB)
221
222TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-0
223 JMP sync∕atomic·StoreInt64(SB)
224
225TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-0
226 JMP sync∕atomic·StoreInt64(SB)
227
Dmitriy Vyukovf1672972014-09-01 08:04:33 -0400228// Swap
229TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-0
230 MOVQ $__tsan_go_atomic32_exchange(SB), AX
231 CALL racecallatomic<>(SB)
232 RET
233
234TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-0
235 MOVQ $__tsan_go_atomic64_exchange(SB), AX
236 CALL racecallatomic<>(SB)
237 RET
238
239TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-0
240 JMP sync∕atomic·SwapInt32(SB)
241
242TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-0
243 JMP sync∕atomic·SwapInt64(SB)
244
245TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-0
246 JMP sync∕atomic·SwapInt64(SB)
247
Dmitriy Vyukovf1672972014-09-01 08:04:33 -0400248// Add
249TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-0
250 MOVQ $__tsan_go_atomic32_fetch_add(SB), AX
251 CALL racecallatomic<>(SB)
252 MOVL add+8(FP), AX // convert fetch_add to add_fetch
253 ADDL AX, ret+16(FP)
254 RET
255
256TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-0
257 MOVQ $__tsan_go_atomic64_fetch_add(SB), AX
258 CALL racecallatomic<>(SB)
259 MOVQ add+8(FP), AX // convert fetch_add to add_fetch
260 ADDQ AX, ret+16(FP)
261 RET
262
263TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-0
264 JMP sync∕atomic·AddInt32(SB)
265
266TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-0
267 JMP sync∕atomic·AddInt64(SB)
268
269TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-0
270 JMP sync∕atomic·AddInt64(SB)
271
Dmitriy Vyukovf1672972014-09-01 08:04:33 -0400272// CompareAndSwap
273TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-0
274 MOVQ $__tsan_go_atomic32_compare_exchange(SB), AX
275 CALL racecallatomic<>(SB)
276 RET
277
278TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-0
279 MOVQ $__tsan_go_atomic64_compare_exchange(SB), AX
280 CALL racecallatomic<>(SB)
281 RET
282
283TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-0
284 JMP sync∕atomic·CompareAndSwapInt32(SB)
285
286TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-0
287 JMP sync∕atomic·CompareAndSwapInt64(SB)
288
289TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-0
290 JMP sync∕atomic·CompareAndSwapInt64(SB)
291
Dmitriy Vyukovf1672972014-09-01 08:04:33 -0400292// Generic atomic operation implementation.
293// AX already contains target function.
294TEXT racecallatomic<>(SB), NOSPLIT, $0-0
295 // Trigger SIGSEGV early.
296 MOVQ 16(SP), R12
Dmitriy Vyukov2b3f3792014-11-20 09:51:02 -0500297 MOVL (R12), R13
298 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
299 CMPQ R12, runtime·racearenastart(SB)
300 JB racecallatomic_data
301 CMPQ R12, runtime·racearenaend(SB)
302 JB racecallatomic_ok
303racecallatomic_data:
304 CMPQ R12, runtime·racedatastart(SB)
305 JB racecallatomic_ignore
306 CMPQ R12, runtime·racedataend(SB)
307 JAE racecallatomic_ignore
308racecallatomic_ok:
309 // Addr is within the good range, call the atomic function.
Dmitriy Vyukovf1672972014-09-01 08:04:33 -0400310 get_tls(R12)
311 MOVQ g(R12), R14
312 MOVQ g_racectx(R14), RARG0 // goroutine context
313 MOVQ 8(SP), RARG1 // caller pc
314 MOVQ (SP), RARG2 // pc
315 LEAQ 16(SP), RARG3 // arguments
Dmitriy Vyukov2b3f3792014-11-20 09:51:02 -0500316 JMP racecall<>(SB) // does not return
317racecallatomic_ignore:
318 // Addr is outside the good range.
319 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
320 // An attempt to synchronize on the address would cause crash.
321 MOVQ AX, R15 // remember the original function
322 MOVQ $__tsan_go_ignore_sync_begin(SB), AX
323 MOVQ g(R12), R14
324 MOVQ g_racectx(R14), RARG0 // goroutine context
325 CALL racecall<>(SB)
326 MOVQ R15, AX // restore the original function
327 // Call the atomic function.
328 MOVQ g_racectx(R14), RARG0 // goroutine context
329 MOVQ 8(SP), RARG1 // caller pc
330 MOVQ (SP), RARG2 // pc
331 LEAQ 16(SP), RARG3 // arguments
332 CALL racecall<>(SB)
333 // Call __tsan_go_ignore_sync_end.
334 MOVQ $__tsan_go_ignore_sync_end(SB), AX
335 MOVQ g_racectx(R14), RARG0 // goroutine context
Dmitriy Vyukovf1672972014-09-01 08:04:33 -0400336 JMP racecall<>(SB)
337
Dmitriy Vyukova1695d22014-03-06 23:48:30 +0400338// void runtime·racecall(void(*f)(...), ...)
339// Calls C function f from race runtime and passes up to 4 arguments to it.
340// The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
341TEXT runtime·racecall(SB), NOSPLIT, $0-0
342 MOVQ fn+0(FP), AX
343 MOVQ arg0+8(FP), RARG0
344 MOVQ arg1+16(FP), RARG1
345 MOVQ arg2+24(FP), RARG2
346 MOVQ arg3+32(FP), RARG3
347 JMP racecall<>(SB)
348
349// Switches SP to g0 stack and calls (AX). Arguments already set.
350TEXT racecall<>(SB), NOSPLIT, $0-0
351 get_tls(R12)
Dmitriy Vyukova1695d22014-03-06 23:48:30 +0400352 MOVQ g(R12), R14
Russ Cox89f185f2014-06-26 11:54:39 -0400353 MOVQ g_m(R14), R13
Dmitriy Vyukova1695d22014-03-06 23:48:30 +0400354 // Switch to g0 stack.
355 MOVQ SP, R12 // callee-saved, preserved across the CALL
356 MOVQ m_g0(R13), R10
357 CMPQ R10, R14
Russ Coxb55791e2014-10-28 21:50:16 -0400358 JE call // already on g0
Dmitriy Vyukova1695d22014-03-06 23:48:30 +0400359 MOVQ (g_sched+gobuf_sp)(R10), SP
Russ Coxb55791e2014-10-28 21:50:16 -0400360call:
Dmitriy Vyukova1695d22014-03-06 23:48:30 +0400361 ANDQ $~15, SP // alignment for gcc ABI
362 CALL AX
363 MOVQ R12, SP
364 RET
365
366// C->Go callback thunk that allows to call runtime·racesymbolize from C code.
367// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
368// The overall effect of Go->C->Go call chain is similar to that of mcall.
369TEXT runtime·racesymbolizethunk(SB), NOSPLIT, $56-8
370 // Save callee-saved registers (Go code won't respect that).
371 // This is superset of darwin/linux/windows registers.
372 PUSHQ BX
373 PUSHQ BP
374 PUSHQ DI
375 PUSHQ SI
376 PUSHQ R12
377 PUSHQ R13
378 PUSHQ R14
379 PUSHQ R15
380 // Set g = g0.
381 get_tls(R12)
Russ Cox89f185f2014-06-26 11:54:39 -0400382 MOVQ g(R12), R13
383 MOVQ g_m(R13), R13
Dmitriy Vyukova1695d22014-03-06 23:48:30 +0400384 MOVQ m_g0(R13), R14
385 MOVQ R14, g(R12) // g = m->g0
386 MOVQ RARG0, 0(SP) // func arg
387 CALL runtime·racesymbolize(SB)
388 // All registers are smashed after Go code, reload.
389 get_tls(R12)
Russ Cox89f185f2014-06-26 11:54:39 -0400390 MOVQ g(R12), R13
391 MOVQ g_m(R13), R13
Dmitriy Vyukova1695d22014-03-06 23:48:30 +0400392 MOVQ m_curg(R13), R14
393 MOVQ R14, g(R12) // g = m->curg
394 // Restore callee-saved registers.
395 POPQ R15
396 POPQ R14
397 POPQ R13
398 POPQ R12
399 POPQ SI
400 POPQ DI
401 POPQ BP
402 POPQ BX
Russ Coxec892be2013-02-22 13:06:43 -0500403 RET