| // Copyright 2014 The Go Authors. All rights reserved. | 
 | // Use of this source code is governed by a BSD-style | 
 | // license that can be found in the LICENSE file. | 
 | // | 
 | // ARM version of md5block.go | 
 |  | 
 | #include "textflag.h" | 
 |  | 
 | // SHA-1 block routine. See sha1block.go for Go equivalent. | 
 | // | 
 | // There are 80 rounds of 4 types: | 
 | //   - rounds 0-15 are type 1 and load data (ROUND1 macro). | 
 | //   - rounds 16-19 are type 1 and do not load data (ROUND1x macro). | 
 | //   - rounds 20-39 are type 2 and do not load data (ROUND2 macro). | 
 | //   - rounds 40-59 are type 3 and do not load data (ROUND3 macro). | 
 | //   - rounds 60-79 are type 4 and do not load data (ROUND4 macro). | 
 | // | 
 | // Each round loads or shuffles the data, then computes a per-round | 
 | // function of b, c, d, and then mixes the result into and rotates the | 
 | // five registers a, b, c, d, e holding the intermediate results. | 
 | // | 
 | // The register rotation is implemented by rotating the arguments to | 
 | // the round macros instead of by explicit move instructions. | 
 |  | 
 | // Register definitions | 
 | #define Rdata	R0	// Pointer to incoming data | 
 | #define Rconst	R1	// Current constant for SHA round | 
 | #define Ra	R2		// SHA-1 accumulator | 
 | #define Rb	R3		// SHA-1 accumulator | 
 | #define Rc	R4		// SHA-1 accumulator | 
 | #define Rd	R5		// SHA-1 accumulator | 
 | #define Re	R6		// SHA-1 accumulator | 
 | #define Rt0	R7		// Temporary | 
 | #define Rt1	R8		// Temporary | 
 | // r9, r10 are forbidden | 
 | // r11 is OK provided you check the assembler that no synthetic instructions use it | 
 | #define Rt2	R11		// Temporary | 
 | #define Rctr	R12	// loop counter | 
 | #define Rw	R14		// point to w buffer | 
 |  | 
 | // func block(dig *digest, p []byte) | 
 | // 0(FP) is *digest | 
 | // 4(FP) is p.array (struct Slice) | 
 | // 8(FP) is p.len | 
 | //12(FP) is p.cap | 
 | // | 
 | // Stack frame | 
 | #define p_end	end-4(SP)		// pointer to the end of data | 
 | #define p_data	data-8(SP)	// current data pointer (unused?) | 
 | #define w_buf	buf-(8+4*80)(SP)	//80 words temporary buffer w uint32[80] | 
 | #define saved	abcde-(8+4*80+4*5)(SP)	// saved sha1 registers a,b,c,d,e - these must be last (unused?) | 
 | // Total size +4 for saved LR is 352 | 
 |  | 
 | 	// w[i] = p[j]<<24 | p[j+1]<<16 | p[j+2]<<8 | p[j+3] | 
 | 	// e += w[i] | 
 | #define LOAD(Re) \ | 
 | 	MOVBU	2(Rdata), Rt0 ; \ | 
 | 	MOVBU	3(Rdata), Rt1 ; \ | 
 | 	MOVBU	1(Rdata), Rt2 ; \ | 
 | 	ORR	Rt0<<8, Rt1, Rt0	    ; \ | 
 | 	MOVBU.P	4(Rdata), Rt1 ; \ | 
 | 	ORR	Rt2<<16, Rt0, Rt0	    ; \ | 
 | 	ORR	Rt1<<24, Rt0, Rt0	    ; \ | 
 | 	MOVW.P	Rt0, 4(Rw)		    ; \ | 
 | 	ADD	Rt0, Re, Re | 
 | 	 | 
 | 	// tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf] | 
 | 	// w[i&0xf] = tmp<<1 | tmp>>(32-1) | 
 | 	// e += w[i&0xf]  | 
 | #define SHUFFLE(Re) \ | 
 | 	MOVW	(-16*4)(Rw), Rt0 ; \ | 
 | 	MOVW	(-14*4)(Rw), Rt1 ; \ | 
 | 	MOVW	(-8*4)(Rw), Rt2  ; \ | 
 | 	EOR	Rt0, Rt1, Rt0  ; \ | 
 | 	MOVW	(-3*4)(Rw), Rt1  ; \ | 
 | 	EOR	Rt2, Rt0, Rt0  ; \ | 
 | 	EOR	Rt0, Rt1, Rt0  ; \ | 
 | 	MOVW	Rt0@>(32-1), Rt0  ; \ | 
 | 	MOVW.P	Rt0, 4(Rw)	  ; \ | 
 | 	ADD	Rt0, Re, Re | 
 |  | 
 | 	// t1 = (b & c) | ((~b) & d) | 
 | #define FUNC1(Ra, Rb, Rc, Rd, Re) \ | 
 | 	MVN	Rb, Rt1	   ; \ | 
 | 	AND	Rb, Rc, Rt0  ; \ | 
 | 	AND	Rd, Rt1, Rt1 ; \ | 
 | 	ORR	Rt0, Rt1, Rt1 | 
 |  | 
 | 	// t1 = b ^ c ^ d | 
 | #define FUNC2(Ra, Rb, Rc, Rd, Re) \ | 
 | 	EOR	Rb, Rc, Rt1 ; \ | 
 | 	EOR	Rd, Rt1, Rt1 | 
 |  | 
 | 	// t1 = (b & c) | (b & d) | (c & d) = | 
 | 	// t1 = (b & c) | ((b | c) & d) | 
 | #define FUNC3(Ra, Rb, Rc, Rd, Re) \ | 
 | 	ORR	Rb, Rc, Rt0  ; \ | 
 | 	AND	Rb, Rc, Rt1  ; \ | 
 | 	AND	Rd, Rt0, Rt0 ; \ | 
 | 	ORR	Rt0, Rt1, Rt1 | 
 |  | 
 | #define FUNC4 FUNC2 | 
 |  | 
 | 	// a5 := a<<5 | a>>(32-5) | 
 | 	// b = b<<30 | b>>(32-30) | 
 | 	// e = a5 + t1 + e + const | 
 | #define MIX(Ra, Rb, Rc, Rd, Re) \ | 
 | 	ADD	Rt1, Re, Re	 ; \ | 
 | 	MOVW	Rb@>(32-30), Rb	 ; \ | 
 | 	ADD	Ra@>(32-5), Re, Re ; \ | 
 | 	ADD	Rconst, Re, Re | 
 |  | 
 | #define ROUND1(Ra, Rb, Rc, Rd, Re) \ | 
 | 	LOAD(Re)		; \ | 
 | 	FUNC1(Ra, Rb, Rc, Rd, Re)	; \ | 
 | 	MIX(Ra, Rb, Rc, Rd, Re) | 
 |  | 
 | #define ROUND1x(Ra, Rb, Rc, Rd, Re) \ | 
 | 	SHUFFLE(Re)	; \ | 
 | 	FUNC1(Ra, Rb, Rc, Rd, Re)	; \ | 
 | 	MIX(Ra, Rb, Rc, Rd, Re) | 
 |  | 
 | #define ROUND2(Ra, Rb, Rc, Rd, Re) \ | 
 | 	SHUFFLE(Re)	; \ | 
 | 	FUNC2(Ra, Rb, Rc, Rd, Re)	; \ | 
 | 	MIX(Ra, Rb, Rc, Rd, Re) | 
 |  | 
 | #define ROUND3(Ra, Rb, Rc, Rd, Re) \ | 
 | 	SHUFFLE(Re)	; \ | 
 | 	FUNC3(Ra, Rb, Rc, Rd, Re)	; \ | 
 | 	MIX(Ra, Rb, Rc, Rd, Re) | 
 |  | 
 | #define ROUND4(Ra, Rb, Rc, Rd, Re) \ | 
 | 	SHUFFLE(Re)	; \ | 
 | 	FUNC4(Ra, Rb, Rc, Rd, Re)	; \ | 
 | 	MIX(Ra, Rb, Rc, Rd, Re) | 
 |  | 
 |  | 
 | // func block(dig *digest, p []byte) | 
 | TEXT	·block(SB), 0, $352-16 | 
 | 	MOVW	p+4(FP), Rdata	// pointer to the data | 
 | 	MOVW	p_len+8(FP), Rt0	// number of bytes | 
 | 	ADD	Rdata, Rt0 | 
 | 	MOVW	Rt0, p_end	// pointer to end of data | 
 |  | 
 | 	// Load up initial SHA-1 accumulator | 
 | 	MOVW	dig+0(FP), Rt0 | 
 | 	MOVM.IA (Rt0), [Ra,Rb,Rc,Rd,Re] | 
 |  | 
 | loop: | 
 | 	// Save registers at SP+4 onwards | 
 | 	MOVM.IB [Ra,Rb,Rc,Rd,Re], (R13) | 
 |  | 
 | 	MOVW	$w_buf, Rw | 
 | 	MOVW	$0x5A827999, Rconst | 
 | 	MOVW	$3, Rctr | 
 | loop1:	ROUND1(Ra, Rb, Rc, Rd, Re) | 
 | 	ROUND1(Re, Ra, Rb, Rc, Rd) | 
 | 	ROUND1(Rd, Re, Ra, Rb, Rc) | 
 | 	ROUND1(Rc, Rd, Re, Ra, Rb) | 
 | 	ROUND1(Rb, Rc, Rd, Re, Ra) | 
 | 	SUB.S	$1, Rctr | 
 | 	BNE	loop1 | 
 |  | 
 | 	ROUND1(Ra, Rb, Rc, Rd, Re) | 
 | 	ROUND1x(Re, Ra, Rb, Rc, Rd) | 
 | 	ROUND1x(Rd, Re, Ra, Rb, Rc) | 
 | 	ROUND1x(Rc, Rd, Re, Ra, Rb) | 
 | 	ROUND1x(Rb, Rc, Rd, Re, Ra) | 
 | 	 | 
 | 	MOVW	$0x6ED9EBA1, Rconst | 
 | 	MOVW	$4, Rctr | 
 | loop2:	ROUND2(Ra, Rb, Rc, Rd, Re) | 
 | 	ROUND2(Re, Ra, Rb, Rc, Rd) | 
 | 	ROUND2(Rd, Re, Ra, Rb, Rc) | 
 | 	ROUND2(Rc, Rd, Re, Ra, Rb) | 
 | 	ROUND2(Rb, Rc, Rd, Re, Ra) | 
 | 	SUB.S	$1, Rctr | 
 | 	BNE	loop2 | 
 | 	 | 
 | 	MOVW	$0x8F1BBCDC, Rconst | 
 | 	MOVW	$4, Rctr | 
 | loop3:	ROUND3(Ra, Rb, Rc, Rd, Re) | 
 | 	ROUND3(Re, Ra, Rb, Rc, Rd) | 
 | 	ROUND3(Rd, Re, Ra, Rb, Rc) | 
 | 	ROUND3(Rc, Rd, Re, Ra, Rb) | 
 | 	ROUND3(Rb, Rc, Rd, Re, Ra) | 
 | 	SUB.S	$1, Rctr | 
 | 	BNE	loop3 | 
 | 	 | 
 | 	MOVW	$0xCA62C1D6, Rconst | 
 | 	MOVW	$4, Rctr | 
 | loop4:	ROUND4(Ra, Rb, Rc, Rd, Re) | 
 | 	ROUND4(Re, Ra, Rb, Rc, Rd) | 
 | 	ROUND4(Rd, Re, Ra, Rb, Rc) | 
 | 	ROUND4(Rc, Rd, Re, Ra, Rb) | 
 | 	ROUND4(Rb, Rc, Rd, Re, Ra) | 
 | 	SUB.S	$1, Rctr | 
 | 	BNE	loop4 | 
 |  | 
 | 	// Accumulate - restoring registers from SP+4 | 
 | 	MOVM.IB (R13), [Rt0,Rt1,Rt2,Rctr,Rw] | 
 | 	ADD	Rt0, Ra | 
 | 	ADD	Rt1, Rb | 
 | 	ADD	Rt2, Rc | 
 | 	ADD	Rctr, Rd | 
 | 	ADD	Rw, Re | 
 |  | 
 | 	MOVW	p_end, Rt0 | 
 | 	CMP	Rt0, Rdata | 
 | 	BLO	loop | 
 |  | 
 | 	// Save final SHA-1 accumulator | 
 | 	MOVW	dig+0(FP), Rt0 | 
 | 	MOVM.IA [Ra,Rb,Rc,Rd,Re], (Rt0) | 
 |  | 
 | 	RET |