blob: 1520aea2e0fbbb220f1b9836e5d3bdd7f95afa1b [file] [log] [blame]
Keith Randallda7cf0b2014-02-06 17:43:22 -08001// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
Anthony Martin8303a132014-03-12 18:12:25 -07005// +build !plan9
6
Russ Coxcb040d52014-09-04 23:05:18 -04007#include "textflag.h"
Keith Randallda7cf0b2014-02-06 17:43:22 -08008
Russ Coxee6c6d92014-09-09 17:12:05 -04009// NOTE: Windows externalthreadhandler expects memclr to preserve DX.
10
Keith Randallda7cf0b2014-02-06 17:43:22 -080011// void runtime·memclr(void*, uintptr)
12TEXT runtime·memclr(SB), NOSPLIT, $0-8
13 MOVL ptr+0(FP), DI
14 MOVL n+4(FP), BX
15 XORL AX, AX
16
17 // MOVOU seems always faster than REP STOSL.
18clr_tail:
19 TESTL BX, BX
20 JEQ clr_0
21 CMPL BX, $2
22 JBE clr_1or2
23 CMPL BX, $4
24 JBE clr_3or4
25 CMPL BX, $8
26 JBE clr_5through8
27 CMPL BX, $16
28 JBE clr_9through16
29 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
30 JEQ nosse2
31 PXOR X0, X0
32 CMPL BX, $32
33 JBE clr_17through32
34 CMPL BX, $64
35 JBE clr_33through64
36 CMPL BX, $128
37 JBE clr_65through128
38 CMPL BX, $256
39 JBE clr_129through256
40 // TODO: use branch table and BSR to make this just a single dispatch
41
42clr_loop:
43 MOVOU X0, 0(DI)
44 MOVOU X0, 16(DI)
45 MOVOU X0, 32(DI)
46 MOVOU X0, 48(DI)
47 MOVOU X0, 64(DI)
48 MOVOU X0, 80(DI)
49 MOVOU X0, 96(DI)
50 MOVOU X0, 112(DI)
51 MOVOU X0, 128(DI)
52 MOVOU X0, 144(DI)
53 MOVOU X0, 160(DI)
54 MOVOU X0, 176(DI)
55 MOVOU X0, 192(DI)
56 MOVOU X0, 208(DI)
57 MOVOU X0, 224(DI)
58 MOVOU X0, 240(DI)
59 SUBL $256, BX
60 ADDL $256, DI
61 CMPL BX, $256
62 JAE clr_loop
63 JMP clr_tail
64
65clr_1or2:
66 MOVB AX, (DI)
67 MOVB AX, -1(DI)(BX*1)
Josh Bleecher Snyderbc9e1602014-08-01 06:21:08 -070068 RET
Keith Randallda7cf0b2014-02-06 17:43:22 -080069clr_0:
70 RET
71clr_3or4:
72 MOVW AX, (DI)
73 MOVW AX, -2(DI)(BX*1)
74 RET
75clr_5through8:
76 MOVL AX, (DI)
77 MOVL AX, -4(DI)(BX*1)
78 RET
79clr_9through16:
80 MOVL AX, (DI)
81 MOVL AX, 4(DI)
82 MOVL AX, -8(DI)(BX*1)
83 MOVL AX, -4(DI)(BX*1)
84 RET
85clr_17through32:
86 MOVOU X0, (DI)
87 MOVOU X0, -16(DI)(BX*1)
88 RET
89clr_33through64:
90 MOVOU X0, (DI)
91 MOVOU X0, 16(DI)
92 MOVOU X0, -32(DI)(BX*1)
93 MOVOU X0, -16(DI)(BX*1)
94 RET
95clr_65through128:
96 MOVOU X0, (DI)
97 MOVOU X0, 16(DI)
98 MOVOU X0, 32(DI)
99 MOVOU X0, 48(DI)
100 MOVOU X0, -64(DI)(BX*1)
101 MOVOU X0, -48(DI)(BX*1)
102 MOVOU X0, -32(DI)(BX*1)
103 MOVOU X0, -16(DI)(BX*1)
104 RET
105clr_129through256:
106 MOVOU X0, (DI)
107 MOVOU X0, 16(DI)
108 MOVOU X0, 32(DI)
109 MOVOU X0, 48(DI)
110 MOVOU X0, 64(DI)
111 MOVOU X0, 80(DI)
112 MOVOU X0, 96(DI)
113 MOVOU X0, 112(DI)
114 MOVOU X0, -128(DI)(BX*1)
115 MOVOU X0, -112(DI)(BX*1)
116 MOVOU X0, -96(DI)(BX*1)
117 MOVOU X0, -80(DI)(BX*1)
118 MOVOU X0, -64(DI)(BX*1)
119 MOVOU X0, -48(DI)(BX*1)
120 MOVOU X0, -32(DI)(BX*1)
121 MOVOU X0, -16(DI)(BX*1)
122 RET
123nosse2:
124 MOVL BX, CX
125 SHRL $2, CX
126 REP
127 STOSL
128 ANDL $3, BX
129 JNE clr_tail
130 RET