Shenghou Ma | 5f691cf | 2014-08-12 19:48:49 -0400 | [diff] [blame] | 1 | // Copyright 2014 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
Russ Cox | 09d92b6 | 2014-12-05 19:13:20 -0500 | [diff] [blame] | 5 | // +build ppc64 ppc64le |
Shenghou Ma | 5f691cf | 2014-08-12 19:48:49 -0400 | [diff] [blame] | 6 | |
Austin Clements | 062e354 | 2014-10-27 17:27:03 -0400 | [diff] [blame] | 7 | #include "textflag.h" |
Shenghou Ma | 5f691cf | 2014-08-12 19:48:49 -0400 | [diff] [blame] | 8 | |
| 9 | // void runtime·memmove(void*, void*, uintptr) |
Michael Hudson-Doyle | 45c06b2 | 2015-10-09 12:44:27 +1300 | [diff] [blame] | 10 | TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24 |
Shenghou Ma | 5f691cf | 2014-08-12 19:48:49 -0400 | [diff] [blame] | 11 | MOVD to+0(FP), R3 |
| 12 | MOVD from+8(FP), R4 |
| 13 | MOVD n+16(FP), R5 |
Shenghou Ma | 5f691cf | 2014-08-12 19:48:49 -0400 | [diff] [blame] | 14 | |
Lynn Boger | c4807d4 | 2016-04-13 08:58:10 -0500 | [diff] [blame] | 15 | // Determine if there are doublewords to |
| 16 | // copy so a more efficient move can be done |
Shenghou Ma | 5f691cf | 2014-08-12 19:48:49 -0400 | [diff] [blame] | 17 | check: |
Lynn Boger | c4807d4 | 2016-04-13 08:58:10 -0500 | [diff] [blame] | 18 | ANDCC $7, R5, R7 // R7: bytes to copy |
| 19 | SRAD $3, R5, R6 // R6: double words to copy |
| 20 | CMP R6, $0, CR1 // CR1[EQ] set if no double words to copy |
Shenghou Ma | 5f691cf | 2014-08-12 19:48:49 -0400 | [diff] [blame] | 21 | |
Lynn Boger | c4807d4 | 2016-04-13 08:58:10 -0500 | [diff] [blame] | 22 | // Determine overlap by subtracting dest - src and comparing against the |
| 23 | // length. The catches the cases where src and dest are in different types |
| 24 | // of storage such as stack and static to avoid doing backward move when not |
| 25 | // necessary. |
Michael Hudson-Doyle | 2c91114 | 2015-09-22 22:35:52 +1200 | [diff] [blame] | 26 | |
Lynn Boger | c4807d4 | 2016-04-13 08:58:10 -0500 | [diff] [blame] | 27 | SUB R4, R3, R8 // dest - src |
| 28 | CMPU R8, R5, CR2 // < len? |
| 29 | BC 12, 8, backward // BLT CR2 backward |
| 30 | |
| 31 | // Copying forward if no overlap. |
Michael Hudson-Doyle | 2c91114 | 2015-09-22 22:35:52 +1200 | [diff] [blame] | 32 | |
| 33 | BC 12, 6, noforwardlarge // "BEQ CR1, noforwardlarge" |
Lynn Boger | c4807d4 | 2016-04-13 08:58:10 -0500 | [diff] [blame] | 34 | MOVD R6,CTR // R6 = number of double words |
| 35 | SRADCC $2,R6,R8 // 32 byte chunks? |
| 36 | BNE forward32setup // |
Michael Hudson-Doyle | 2c91114 | 2015-09-22 22:35:52 +1200 | [diff] [blame] | 37 | |
Lynn Boger | c4807d4 | 2016-04-13 08:58:10 -0500 | [diff] [blame] | 38 | // Move double words |
Michael Hudson-Doyle | 2c91114 | 2015-09-22 22:35:52 +1200 | [diff] [blame] | 39 | |
Lynn Boger | c4807d4 | 2016-04-13 08:58:10 -0500 | [diff] [blame] | 40 | forward8: |
| 41 | MOVD 0(R4), R8 // double word |
| 42 | ADD $8,R4 |
| 43 | MOVD R8, 0(R3) // |
| 44 | ADD $8,R3 |
| 45 | BC 16, 0, forward8 |
| 46 | BR noforwardlarge // handle remainder |
Michael Hudson-Doyle | 2c91114 | 2015-09-22 22:35:52 +1200 | [diff] [blame] | 47 | |
Lynn Boger | c4807d4 | 2016-04-13 08:58:10 -0500 | [diff] [blame] | 48 | // Prepare for moves of 32 bytes at a time. |
Michael Hudson-Doyle | 2c91114 | 2015-09-22 22:35:52 +1200 | [diff] [blame] | 49 | |
Lynn Boger | c4807d4 | 2016-04-13 08:58:10 -0500 | [diff] [blame] | 50 | forward32setup: |
| 51 | DCBTST (R3) // prepare data cache |
| 52 | DCBT (R4) |
| 53 | MOVD R8, CTR // double work count |
| 54 | |
| 55 | forward32: |
| 56 | MOVD 0(R4), R8 // load 4 double words |
| 57 | MOVD 8(R4), R9 |
| 58 | MOVD 16(R4), R14 |
| 59 | MOVD 24(R4), R15 |
| 60 | ADD $32,R4 |
| 61 | MOVD R8, 0(R3) // store those 4 |
| 62 | MOVD R9, 8(R3) |
| 63 | MOVD R14,16(R3) |
| 64 | MOVD R15,24(R3) |
| 65 | ADD $32,R3 // bump up for next set |
| 66 | BC 16, 0, forward32 // continue |
| 67 | RLDCLCC $61,R5,$3,R6 // remaining doublewords |
| 68 | BEQ noforwardlarge |
| 69 | MOVD R6,CTR // set up the CTR |
| 70 | BR forward8 |
Michael Hudson-Doyle | 2c91114 | 2015-09-22 22:35:52 +1200 | [diff] [blame] | 71 | |
| 72 | noforwardlarge: |
Lynn Boger | c4807d4 | 2016-04-13 08:58:10 -0500 | [diff] [blame] | 73 | CMP R7,$0 // any remaining bytes |
| 74 | BC 4, 1, LR |
Michael Hudson-Doyle | 2c91114 | 2015-09-22 22:35:52 +1200 | [diff] [blame] | 75 | |
| 76 | forwardtail: |
Lynn Boger | c4807d4 | 2016-04-13 08:58:10 -0500 | [diff] [blame] | 77 | MOVD R7, CTR // move tail bytes |
Michael Hudson-Doyle | 2c91114 | 2015-09-22 22:35:52 +1200 | [diff] [blame] | 78 | |
| 79 | forwardtailloop: |
Lynn Boger | c4807d4 | 2016-04-13 08:58:10 -0500 | [diff] [blame] | 80 | MOVBZ 0(R4), R8 // move single bytes |
| 81 | ADD $1,R4 |
| 82 | MOVBZ R8, 0(R3) |
| 83 | ADD $1,R3 |
Michael Hudson-Doyle | 2c91114 | 2015-09-22 22:35:52 +1200 | [diff] [blame] | 84 | BC 16, 0, forwardtailloop |
Austin Clements | 2774b37 | 2015-06-03 14:59:27 -0400 | [diff] [blame] | 85 | RET |
Shenghou Ma | 5f691cf | 2014-08-12 19:48:49 -0400 | [diff] [blame] | 86 | |
| 87 | backward: |
Lynn Boger | c4807d4 | 2016-04-13 08:58:10 -0500 | [diff] [blame] | 88 | // Copying backwards proceeds by copying R7 bytes then copying R6 double words. |
Michael Hudson-Doyle | 2c91114 | 2015-09-22 22:35:52 +1200 | [diff] [blame] | 89 | // R3 and R4 are advanced to the end of the destination/source buffers |
| 90 | // respectively and moved back as we copy. |
| 91 | |
Lynn Boger | c4807d4 | 2016-04-13 08:58:10 -0500 | [diff] [blame] | 92 | ADD R5, R4, R4 // end of source |
| 93 | ADD R3, R5, R3 // end of dest |
Michael Hudson-Doyle | 2c91114 | 2015-09-22 22:35:52 +1200 | [diff] [blame] | 94 | |
Lynn Boger | c4807d4 | 2016-04-13 08:58:10 -0500 | [diff] [blame] | 95 | BEQ nobackwardtail // earlier condition |
Michael Hudson-Doyle | 2c91114 | 2015-09-22 22:35:52 +1200 | [diff] [blame] | 96 | |
Lynn Boger | c4807d4 | 2016-04-13 08:58:10 -0500 | [diff] [blame] | 97 | MOVD R7, CTR // bytes to move |
Michael Hudson-Doyle | 2c91114 | 2015-09-22 22:35:52 +1200 | [diff] [blame] | 98 | |
| 99 | backwardtailloop: |
Lynn Boger | c4807d4 | 2016-04-13 08:58:10 -0500 | [diff] [blame] | 100 | MOVBZ -1(R4), R8 // point to last byte |
| 101 | SUB $1,R4 |
| 102 | MOVBZ R8, -1(R3) |
| 103 | SUB $1,R3 |
Michael Hudson-Doyle | 2c91114 | 2015-09-22 22:35:52 +1200 | [diff] [blame] | 104 | BC 16, 0, backwardtailloop |
| 105 | |
| 106 | nobackwardtail: |
Lynn Boger | c4807d4 | 2016-04-13 08:58:10 -0500 | [diff] [blame] | 107 | CMP R6,$0 |
| 108 | BC 4, 5, LR |
Michael Hudson-Doyle | 2c91114 | 2015-09-22 22:35:52 +1200 | [diff] [blame] | 109 | |
| 110 | backwardlarge: |
| 111 | MOVD R6, CTR |
| 112 | |
| 113 | backwardlargeloop: |
Lynn Boger | c4807d4 | 2016-04-13 08:58:10 -0500 | [diff] [blame] | 114 | MOVD -8(R4), R8 |
| 115 | SUB $8,R4 |
| 116 | MOVD R8, -8(R3) |
| 117 | SUB $8,R3 |
| 118 | BC 16, 0, backwardlargeloop // |
Austin Clements | 2774b37 | 2015-06-03 14:59:27 -0400 | [diff] [blame] | 119 | RET |