|  | // Copyright 2018 The Go Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style | 
|  | // license that can be found in the LICENSE file. | 
|  |  | 
|  | #include "go_asm.h" | 
|  | #include "textflag.h" | 
|  |  | 
|  | TEXT ·Compare(SB),NOSPLIT,$0-28 | 
|  | MOVL	a_base+0(FP), SI | 
|  | MOVL	a_len+4(FP), BX | 
|  | MOVL	b_base+12(FP), DI | 
|  | MOVL	b_len+16(FP), DX | 
|  | CALL	cmpbody<>(SB) | 
|  | MOVL	AX, ret+24(FP) | 
|  | RET | 
|  |  | 
|  | TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 | 
|  | MOVL	a_base+0(FP), SI | 
|  | MOVL	a_len+4(FP), BX | 
|  | MOVL	b_base+8(FP), DI | 
|  | MOVL	b_len+12(FP), DX | 
|  | CALL	cmpbody<>(SB) | 
|  | MOVL	AX, ret+16(FP) | 
|  | RET | 
|  |  | 
|  | // input: | 
|  | //   SI = a | 
|  | //   DI = b | 
|  | //   BX = alen | 
|  | //   DX = blen | 
|  | // output: | 
|  | //   AX = 1/0/-1 | 
|  | TEXT cmpbody<>(SB),NOSPLIT,$0-0 | 
|  | CMPQ	SI, DI | 
|  | JEQ	allsame | 
|  | CMPQ	BX, DX | 
|  | MOVQ	DX, R8 | 
|  | CMOVQLT	BX, R8 // R8 = min(alen, blen) = # of bytes to compare | 
|  | CMPQ	R8, $8 | 
|  | JB	small | 
|  |  | 
|  | loop: | 
|  | CMPQ	R8, $16 | 
|  | JBE	_0through16 | 
|  | MOVOU	(SI), X0 | 
|  | MOVOU	(DI), X1 | 
|  | PCMPEQB X0, X1 | 
|  | PMOVMSKB X1, AX | 
|  | XORQ	$0xffff, AX	// convert EQ to NE | 
|  | JNE	diff16	// branch if at least one byte is not equal | 
|  | ADDQ	$16, SI | 
|  | ADDQ	$16, DI | 
|  | SUBQ	$16, R8 | 
|  | JMP	loop | 
|  |  | 
|  | // AX = bit mask of differences | 
|  | diff16: | 
|  | BSFQ	AX, BX	// index of first byte that differs | 
|  | XORQ	AX, AX | 
|  | ADDQ	BX, SI | 
|  | MOVB	(SI), CX | 
|  | ADDQ	BX, DI | 
|  | CMPB	CX, (DI) | 
|  | SETHI	AX | 
|  | LEAQ	-1(AX*2), AX	// convert 1/0 to +1/-1 | 
|  | RET | 
|  |  | 
|  | // 0 through 16 bytes left, alen>=8, blen>=8 | 
|  | _0through16: | 
|  | CMPQ	R8, $8 | 
|  | JBE	_0through8 | 
|  | MOVQ	(SI), AX | 
|  | MOVQ	(DI), CX | 
|  | CMPQ	AX, CX | 
|  | JNE	diff8 | 
|  | _0through8: | 
|  | ADDQ	R8, SI | 
|  | ADDQ	R8, DI | 
|  | MOVQ	-8(SI), AX | 
|  | MOVQ	-8(DI), CX | 
|  | CMPQ	AX, CX | 
|  | JEQ	allsame | 
|  |  | 
|  | // AX and CX contain parts of a and b that differ. | 
|  | diff8: | 
|  | BSWAPQ	AX	// reverse order of bytes | 
|  | BSWAPQ	CX | 
|  | XORQ	AX, CX | 
|  | BSRQ	CX, CX	// index of highest bit difference | 
|  | SHRQ	CX, AX	// move a's bit to bottom | 
|  | ANDQ	$1, AX	// mask bit | 
|  | LEAQ	-1(AX*2), AX // 1/0 => +1/-1 | 
|  | RET | 
|  |  | 
|  | // 0-7 bytes in common | 
|  | small: | 
|  | LEAQ	(R8*8), CX	// bytes left -> bits left | 
|  | NEGQ	CX		//  - bits lift (== 64 - bits left mod 64) | 
|  | JEQ	allsame | 
|  |  | 
|  | // load bytes of a into high bytes of AX | 
|  | CMPB	SI, $0xf8 | 
|  | JA	si_high | 
|  | MOVQ	(SI), SI | 
|  | JMP	si_finish | 
|  | si_high: | 
|  | ADDQ	R8, SI | 
|  | MOVQ	-8(SI), SI | 
|  | SHRQ	CX, SI | 
|  | si_finish: | 
|  | SHLQ	CX, SI | 
|  |  | 
|  | // load bytes of b in to high bytes of BX | 
|  | CMPB	DI, $0xf8 | 
|  | JA	di_high | 
|  | MOVQ	(DI), DI | 
|  | JMP	di_finish | 
|  | di_high: | 
|  | ADDQ	R8, DI | 
|  | MOVQ	-8(DI), DI | 
|  | SHRQ	CX, DI | 
|  | di_finish: | 
|  | SHLQ	CX, DI | 
|  |  | 
|  | BSWAPQ	SI	// reverse order of bytes | 
|  | BSWAPQ	DI | 
|  | XORQ	SI, DI	// find bit differences | 
|  | JEQ	allsame | 
|  | BSRQ	DI, CX	// index of highest bit difference | 
|  | SHRQ	CX, SI	// move a's bit to bottom | 
|  | ANDQ	$1, SI	// mask bit | 
|  | LEAQ	-1(SI*2), AX // 1/0 => +1/-1 | 
|  | RET | 
|  |  | 
|  | allsame: | 
|  | XORQ	AX, AX | 
|  | XORQ	CX, CX | 
|  | CMPQ	BX, DX | 
|  | SETGT	AX	// 1 if alen > blen | 
|  | SETEQ	CX	// 1 if alen == blen | 
|  | LEAQ	-1(CX)(AX*2), AX	// 1,0,-1 result | 
|  | RET |