|  | // Copyright 2018 The Go Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style | 
|  | // license that can be found in the LICENSE file. | 
|  |  | 
|  | #include "textflag.h" | 
|  |  | 
|  | // func xorBytesSSE2(dst, a, b *byte, n int) | 
|  | TEXT ·xorBytesSSE2(SB), NOSPLIT, $0 | 
|  | MOVQ  dst+0(FP), BX | 
|  | MOVQ  a+8(FP), SI | 
|  | MOVQ  b+16(FP), CX | 
|  | MOVQ  n+24(FP), DX | 
|  | TESTQ $15, DX            // AND 15 & len, if not zero jump to not_aligned. | 
|  | JNZ   not_aligned | 
|  |  | 
|  | aligned: | 
|  | MOVQ $0, AX // position in slices | 
|  |  | 
|  | loop16b: | 
|  | MOVOU (SI)(AX*1), X0   // XOR 16byte forwards. | 
|  | MOVOU (CX)(AX*1), X1 | 
|  | PXOR  X1, X0 | 
|  | MOVOU X0, (BX)(AX*1) | 
|  | ADDQ  $16, AX | 
|  | CMPQ  DX, AX | 
|  | JNE   loop16b | 
|  | RET | 
|  |  | 
|  | loop_1b: | 
|  | SUBQ  $1, DX           // XOR 1byte backwards. | 
|  | MOVB  (SI)(DX*1), DI | 
|  | MOVB  (CX)(DX*1), AX | 
|  | XORB  AX, DI | 
|  | MOVB  DI, (BX)(DX*1) | 
|  | TESTQ $7, DX           // AND 7 & len, if not zero jump to loop_1b. | 
|  | JNZ   loop_1b | 
|  | CMPQ  DX, $0           // if len is 0, ret. | 
|  | JE    ret | 
|  | TESTQ $15, DX          // AND 15 & len, if zero jump to aligned. | 
|  | JZ    aligned | 
|  |  | 
|  | not_aligned: | 
|  | TESTQ $7, DX           // AND $7 & len, if not zero jump to loop_1b. | 
|  | JNE   loop_1b | 
|  | SUBQ  $8, DX           // XOR 8bytes backwards. | 
|  | MOVQ  (SI)(DX*1), DI | 
|  | MOVQ  (CX)(DX*1), AX | 
|  | XORQ  AX, DI | 
|  | MOVQ  DI, (BX)(DX*1) | 
|  | CMPQ  DX, $16          // if len is greater or equal 16 here, it must be aligned. | 
|  | JGE   aligned | 
|  |  | 
|  | ret: | 
|  | RET |