diff options
author | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 |
---|---|---|
committer | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 |
commit | f154da9e12608589e8d5f0508f908a0c3e88a1bb (patch) | |
tree | f8255d51e10c6f1e0ed69702200b966c9556a431 /src/runtime/memmove_arm.s | |
parent | 8d8329ed5dfb9622c82a9fbec6fd99a580f9c9f6 (diff) | |
download | golang-upstream/1.4.tar.gz |
Imported Upstream version 1.4upstream/1.4
Diffstat (limited to 'src/runtime/memmove_arm.s')
-rw-r--r-- | src/runtime/memmove_arm.s | 261 |
1 files changed, 261 insertions, 0 deletions
diff --git a/src/runtime/memmove_arm.s b/src/runtime/memmove_arm.s new file mode 100644 index 000000000..f187d4267 --- /dev/null +++ b/src/runtime/memmove_arm.s @@ -0,0 +1,261 @@ +// Inferno's libkern/memmove-arm.s +// http://code.google.com/p/inferno-os/source/browse/libkern/memmove-arm.s +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. +// Portions Copyright 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "textflag.h" + +// TE or TS are spilled to the stack during bulk register moves. +TS = 0 +TE = 8 + +// Warning: the linker will use R11 to synthesize certain instructions. Please +// take care and double check with objdump. +FROM = 11 +N = 12 +TMP = 12 /* N and TMP don't overlap */ +TMP1 = 5 + +RSHIFT = 5 +LSHIFT = 6 +OFFSET = 7 + +BR0 = 0 /* shared with TS */ +BW0 = 1 +BR1 = 1 +BW1 = 2 +BR2 = 2 +BW2 = 3 +BR3 = 3 +BW3 = 4 + +FW0 = 1 +FR0 = 2 +FW1 = 2 +FR1 = 3 +FW2 = 3 +FR2 = 4 +FW3 = 4 +FR3 = 8 /* shared with TE */ + +TEXT runtime·memmove(SB), NOSPLIT, $4-12 +_memmove: + MOVW to+0(FP), R(TS) + MOVW from+4(FP), R(FROM) + MOVW n+8(FP), R(N) + + ADD R(N), R(TS), R(TE) /* to end pointer */ + + CMP R(FROM), R(TS) + BLS _forward + +_back: + ADD R(N), R(FROM) /* from end pointer */ + CMP $4, R(N) /* need at least 4 bytes to copy */ + BLT _b1tail + +_b4align: /* align destination on 4 */ + AND.S $3, R(TE), R(TMP) + BEQ _b4aligned + + MOVBU.W -1(R(FROM)), R(TMP) /* pre-indexed */ + MOVBU.W R(TMP), -1(R(TE)) /* pre-indexed */ + B _b4align + +_b4aligned: /* is source now aligned? */ + AND.S $3, R(FROM), R(TMP) + BNE _bunaligned + + ADD $31, R(TS), R(TMP) /* do 32-byte chunks if possible */ + MOVW R(TS), savedts-4(SP) +_b32loop: + CMP R(TMP), R(TE) + BLS _b4tail + + MOVM.DB.W (R(FROM)), [R0-R7] + MOVM.DB.W [R0-R7], (R(TE)) + B _b32loop + +_b4tail: /* do remaining words if possible */ + MOVW savedts-4(SP), R(TS) + ADD $3, R(TS), R(TMP) +_b4loop: + CMP R(TMP), R(TE) + BLS _b1tail + + MOVW.W -4(R(FROM)), R(TMP1) /* pre-indexed */ + MOVW.W R(TMP1), -4(R(TE)) /* pre-indexed */ + B _b4loop + +_b1tail: /* remaining bytes */ + CMP R(TE), R(TS) + BEQ _return + + MOVBU.W -1(R(FROM)), R(TMP) /* pre-indexed */ + MOVBU.W R(TMP), -1(R(TE)) /* pre-indexed */ + B _b1tail + +_forward: + CMP $4, R(N) /* need at least 4 bytes to copy */ + BLT _f1tail + +_f4align: /* align destination on 4 */ + AND.S $3, R(TS), R(TMP) + BEQ _f4aligned + + MOVBU.P 1(R(FROM)), R(TMP) /* implicit write back */ + MOVBU.P R(TMP), 1(R(TS)) /* implicit write back */ + B _f4align + +_f4aligned: /* is source now aligned? */ + AND.S $3, R(FROM), R(TMP) + BNE _funaligned + + SUB $31, R(TE), R(TMP) /* do 32-byte chunks if possible */ + MOVW R(TE), savedte-4(SP) +_f32loop: + CMP R(TMP), R(TS) + BHS _f4tail + + MOVM.IA.W (R(FROM)), [R1-R8] + MOVM.IA.W [R1-R8], (R(TS)) + B _f32loop + +_f4tail: + MOVW savedte-4(SP), R(TE) + SUB $3, R(TE), R(TMP) /* do remaining words if possible */ +_f4loop: + CMP R(TMP), R(TS) + BHS _f1tail + + MOVW.P 4(R(FROM)), R(TMP1) /* implicit write back */ + MOVW.P R(TMP1), 4(R(TS)) /* implicit write back */ + B _f4loop + +_f1tail: + CMP R(TS), R(TE) + BEQ _return + + MOVBU.P 1(R(FROM)), R(TMP) /* implicit write back */ + MOVBU.P R(TMP), 1(R(TS)) /* implicit write back */ + B _f1tail + +_return: + MOVW to+0(FP), R0 + RET + +_bunaligned: + CMP $2, R(TMP) /* is R(TMP) < 2 ? */ + + MOVW.LT $8, R(RSHIFT) /* (R(n)<<24)|(R(n-1)>>8) */ + MOVW.LT $24, R(LSHIFT) + MOVW.LT $1, R(OFFSET) + + MOVW.EQ $16, R(RSHIFT) /* (R(n)<<16)|(R(n-1)>>16) */ + MOVW.EQ $16, R(LSHIFT) + MOVW.EQ $2, R(OFFSET) + + MOVW.GT $24, R(RSHIFT) /* (R(n)<<8)|(R(n-1)>>24) */ + MOVW.GT $8, R(LSHIFT) + MOVW.GT $3, R(OFFSET) + + ADD $16, R(TS), R(TMP) /* do 16-byte chunks if possible */ + CMP R(TMP), R(TE) + BLS _b1tail + + BIC $3, R(FROM) /* align source */ + MOVW R(TS), savedts-4(SP) + MOVW (R(FROM)), R(BR0) /* prime first block register */ + +_bu16loop: + CMP R(TMP), R(TE) + BLS _bu1tail + + MOVW R(BR0)<<R(LSHIFT), R(BW3) + MOVM.DB.W (R(FROM)), [R(BR0)-R(BR3)] + ORR R(BR3)>>R(RSHIFT), R(BW3) + + MOVW R(BR3)<<R(LSHIFT), R(BW2) + ORR R(BR2)>>R(RSHIFT), R(BW2) + + MOVW R(BR2)<<R(LSHIFT), R(BW1) + ORR R(BR1)>>R(RSHIFT), R(BW1) + + MOVW R(BR1)<<R(LSHIFT), R(BW0) + ORR R(BR0)>>R(RSHIFT), R(BW0) + + MOVM.DB.W [R(BW0)-R(BW3)], (R(TE)) + B _bu16loop + +_bu1tail: + MOVW savedts-4(SP), R(TS) + ADD R(OFFSET), R(FROM) + B _b1tail + +_funaligned: + CMP $2, R(TMP) + + MOVW.LT $8, R(RSHIFT) /* (R(n+1)<<24)|(R(n)>>8) */ + MOVW.LT $24, R(LSHIFT) + MOVW.LT $3, R(OFFSET) + + MOVW.EQ $16, R(RSHIFT) /* (R(n+1)<<16)|(R(n)>>16) */ + MOVW.EQ $16, R(LSHIFT) + MOVW.EQ $2, R(OFFSET) + + MOVW.GT $24, R(RSHIFT) /* (R(n+1)<<8)|(R(n)>>24) */ + MOVW.GT $8, R(LSHIFT) + MOVW.GT $1, R(OFFSET) + + SUB $16, R(TE), R(TMP) /* do 16-byte chunks if possible */ + CMP R(TMP), R(TS) + BHS _f1tail + + BIC $3, R(FROM) /* align source */ + MOVW R(TE), savedte-4(SP) + MOVW.P 4(R(FROM)), R(FR3) /* prime last block register, implicit write back */ + +_fu16loop: + CMP R(TMP), R(TS) + BHS _fu1tail + + MOVW R(FR3)>>R(RSHIFT), R(FW0) + MOVM.IA.W (R(FROM)), [R(FR0),R(FR1),R(FR2),R(FR3)] + ORR R(FR0)<<R(LSHIFT), R(FW0) + + MOVW R(FR0)>>R(RSHIFT), R(FW1) + ORR R(FR1)<<R(LSHIFT), R(FW1) + + MOVW R(FR1)>>R(RSHIFT), R(FW2) + ORR R(FR2)<<R(LSHIFT), R(FW2) + + MOVW R(FR2)>>R(RSHIFT), R(FW3) + ORR R(FR3)<<R(LSHIFT), R(FW3) + + MOVM.IA.W [R(FW0),R(FW1),R(FW2),R(FW3)], (R(TS)) + B _fu16loop + +_fu1tail: + MOVW savedte-4(SP), R(TE) + SUB R(OFFSET), R(FROM) + B _f1tail |