poly1305: cleanup ppc64le R0/$0 usage

There is some implicit R0 == $0 here which may not be desired.

"CMP $0, Rx" translates to "cmpd r0, rX" which is less preferred
than "cmpdi r0, 0".  Likewise, "ADDE $0, Rx" also turns into
"adde R0, Rx, Rx" which can be simplified to a similar instruction
which adds to zero with carry, "ADDZE Rx, Rx".

Change-Id: I5de17ff5b02c7c9d57daf014c7fe9420bfbeeeab
Reviewed-on: https://go-review.googlesource.com/c/crypto/+/311372
Reviewed-by: Carlos Eduardo Seo <carlos.seo@linaro.org>
Trust: Carlos Eduardo Seo <carlos.seo@linaro.org>
Trust: Lynn Boger <laboger@linux.vnet.ibm.com>
diff --git a/poly1305/sum_ppc64le.s b/poly1305/sum_ppc64le.s
index 5cd7494..3cede53 100644
--- a/poly1305/sum_ppc64le.s
+++ b/poly1305/sum_ppc64le.s
@@ -82,7 +82,7 @@
 	BGE loop
 
 bytes_between_0_and_15:
-	CMP  $0, R5
+	CMP  R5, $0
 	BEQ  done
 	MOVD $0, R16 // h0
 	MOVD $0, R17 // h1
@@ -122,7 +122,7 @@
 	// Exactly 8
 	MOVD (R4), R16
 
-	CMP $0, R17
+	CMP R17, $0
 
 	// Check if we've already set R17; if not
 	// set 1 to indicate end of msg.
@@ -151,7 +151,7 @@
 	ADD   $2, R4
 
 less2:
-	CMP   $0, R5
+	CMP   R5, $0
 	BEQ   insert1
 	MOVBZ (R4), R21
 	SLD   R22, R21, R21
@@ -166,12 +166,12 @@
 
 carry:
 	// Add new values to h0, h1, h2
-	ADDC R16, R8
-	ADDE R17, R9
-	ADDE $0, R10
-	MOVD $16, R5
-	ADD  R5, R4
-	BR   multiply
+	ADDC  R16, R8
+	ADDE  R17, R9
+	ADDZE R10, R10
+	MOVD  $16, R5
+	ADD   R5, R4
+	BR    multiply
 
 done:
 	// Save h0, h1, h2 in state