blob: e934e4d4998c4dcdb0721395a87f19aaf058daf6 [file] [log] [blame]
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build mips mipsle
#include "textflag.h"
#ifdef GOARCH_mips
#define MOVWHI MOVWL
#define MOVWLO MOVWR
#else
#define MOVWHI MOVWR
#define MOVWLO MOVWL
#endif
// void runtime·memmove(void*, void*, uintptr)
TEXT runtime·memmove(SB),NOSPLIT,$-0-12
MOVW n+8(FP), R3
MOVW from+4(FP), R2
MOVW to+0(FP), R1
ADDU R3, R2, R4 // end pointer for source
ADDU R3, R1, R5 // end pointer for destination
// if destination is ahead of source, start at the end of the buffer and go backward.
SGTU R1, R2, R6
BNE R6, backward
// if less than 4 bytes, use byte by byte copying
SGTU $4, R3, R6
BNE R6, f_small_copy
// align destination to 4 bytes
AND $3, R1, R6
BEQ R6, f_dest_aligned
SUBU R1, R0, R6
AND $3, R6
MOVWHI 0(R2), R7
SUBU R6, R3
MOVWLO 3(R2), R7
ADDU R6, R2
MOVWHI R7, 0(R1)
ADDU R6, R1
f_dest_aligned:
AND $31, R3, R7
AND $3, R3, R6
SUBU R7, R5, R7 // end pointer for 32-byte chunks
SUBU R6, R5, R6 // end pointer for 4-byte chunks
// if source is not aligned, use unaligned reads
AND $3, R2, R8
BNE R8, f_large_ua
f_large:
BEQ R1, R7, f_words
ADDU $32, R1
MOVW 0(R2), R8
MOVW 4(R2), R9
MOVW 8(R2), R10
MOVW 12(R2), R11
MOVW 16(R2), R12
MOVW 20(R2), R13
MOVW 24(R2), R14
MOVW 28(R2), R15
ADDU $32, R2
MOVW R8, -32(R1)
MOVW R9, -28(R1)
MOVW R10, -24(R1)
MOVW R11, -20(R1)
MOVW R12, -16(R1)
MOVW R13, -12(R1)
MOVW R14, -8(R1)
MOVW R15, -4(R1)
JMP f_large
f_words:
BEQ R1, R6, f_tail
ADDU $4, R1
MOVW 0(R2), R8
ADDU $4, R2
MOVW R8, -4(R1)
JMP f_words
f_tail:
BEQ R1, R5, ret
MOVWLO -1(R4), R8
MOVWLO R8, -1(R5)
ret:
RET
f_large_ua:
BEQ R1, R7, f_words_ua
ADDU $32, R1
MOVWHI 0(R2), R8
MOVWHI 4(R2), R9
MOVWHI 8(R2), R10
MOVWHI 12(R2), R11
MOVWHI 16(R2), R12
MOVWHI 20(R2), R13
MOVWHI 24(R2), R14
MOVWHI 28(R2), R15
MOVWLO 3(R2), R8
MOVWLO 7(R2), R9
MOVWLO 11(R2), R10
MOVWLO 15(R2), R11
MOVWLO 19(R2), R12
MOVWLO 23(R2), R13
MOVWLO 27(R2), R14
MOVWLO 31(R2), R15
ADDU $32, R2
MOVW R8, -32(R1)
MOVW R9, -28(R1)
MOVW R10, -24(R1)
MOVW R11, -20(R1)
MOVW R12, -16(R1)
MOVW R13, -12(R1)
MOVW R14, -8(R1)
MOVW R15, -4(R1)
JMP f_large_ua
f_words_ua:
BEQ R1, R6, f_tail_ua
MOVWHI 0(R2), R8
ADDU $4, R1
MOVWLO 3(R2), R8
ADDU $4, R2
MOVW R8, -4(R1)
JMP f_words_ua
f_tail_ua:
BEQ R1, R5, ret
MOVWHI -4(R4), R8
MOVWLO -1(R4), R8
MOVWLO R8, -1(R5)
JMP ret
f_small_copy:
BEQ R1, R5, ret
ADDU $1, R1
MOVB 0(R2), R6
ADDU $1, R2
MOVB R6, -1(R1)
JMP f_small_copy
backward:
SGTU $4, R3, R6
BNE R6, b_small_copy
AND $3, R5, R6
BEQ R6, b_dest_aligned
MOVWHI -4(R4), R7
SUBU R6, R3
MOVWLO -1(R4), R7
SUBU R6, R4
MOVWLO R7, -1(R5)
SUBU R6, R5
b_dest_aligned:
AND $31, R3, R7
AND $3, R3, R6
ADDU R7, R1, R7
ADDU R6, R1, R6
AND $3, R4, R8
BNE R8, b_large_ua
b_large:
BEQ R5, R7, b_words
ADDU $-32, R5
MOVW -4(R4), R8
MOVW -8(R4), R9
MOVW -12(R4), R10
MOVW -16(R4), R11
MOVW -20(R4), R12
MOVW -24(R4), R13
MOVW -28(R4), R14
MOVW -32(R4), R15
ADDU $-32, R4
MOVW R8, 28(R5)
MOVW R9, 24(R5)
MOVW R10, 20(R5)
MOVW R11, 16(R5)
MOVW R12, 12(R5)
MOVW R13, 8(R5)
MOVW R14, 4(R5)
MOVW R15, 0(R5)
JMP b_large
b_words:
BEQ R5, R6, b_tail
ADDU $-4, R5
MOVW -4(R4), R8
ADDU $-4, R4
MOVW R8, 0(R5)
JMP b_words
b_tail:
BEQ R5, R1, ret
MOVWHI 0(R2), R8 // R2 and R1 have the same alignment so we don't need to load a whole word
MOVWHI R8, 0(R1)
JMP ret
b_large_ua:
BEQ R5, R7, b_words_ua
ADDU $-32, R5
MOVWHI -4(R4), R8
MOVWHI -8(R4), R9
MOVWHI -12(R4), R10
MOVWHI -16(R4), R11
MOVWHI -20(R4), R12
MOVWHI -24(R4), R13
MOVWHI -28(R4), R14
MOVWHI -32(R4), R15
MOVWLO -1(R4), R8
MOVWLO -5(R4), R9
MOVWLO -9(R4), R10
MOVWLO -13(R4), R11
MOVWLO -17(R4), R12
MOVWLO -21(R4), R13
MOVWLO -25(R4), R14
MOVWLO -29(R4), R15
ADDU $-32, R4
MOVW R8, 28(R5)
MOVW R9, 24(R5)
MOVW R10, 20(R5)
MOVW R11, 16(R5)
MOVW R12, 12(R5)
MOVW R13, 8(R5)
MOVW R14, 4(R5)
MOVW R15, 0(R5)
JMP b_large_ua
b_words_ua:
BEQ R5, R6, b_tail_ua
MOVWHI -4(R4), R8
ADDU $-4, R5
MOVWLO -1(R4), R8
ADDU $-4, R4
MOVW R8, 0(R5)
JMP b_words_ua
b_tail_ua:
BEQ R5, R1, ret
MOVWHI (R2), R8
MOVWLO 3(R2), R8
MOVWHI R8, 0(R1)
JMP ret
b_small_copy:
BEQ R5, R1, ret
ADDU $-1, R5
MOVB -1(R4), R6
ADDU $-1, R4
MOVB R6, 0(R5)
JMP b_small_copy