summaryrefslogtreecommitdiff
path: root/src/pkg/big/arith_arm.s
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/big/arith_arm.s')
-rw-r--r--src/pkg/big/arith_arm.s312
1 files changed, 312 insertions, 0 deletions
diff --git a/src/pkg/big/arith_arm.s b/src/pkg/big/arith_arm.s
new file mode 100644
index 000000000..60abe6eaa
--- /dev/null
+++ b/src/pkg/big/arith_arm.s
@@ -0,0 +1,312 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file provides fast assembly versions for the elementary
+// arithmetic operations on vectors implemented in arith.go.
+
+#define CFLAG 29 // bit position of carry flag
+
+// func addVV(z, x, y []Word) (c Word)
+TEXT ·addVV(SB),7,$0
+ MOVW $0, R0
+ MOVW z+0(FP), R1
+ MOVW x+12(FP), R2
+ MOVW y+24(FP), R3
+ MOVW n+4(FP), R4
+ MOVW R4<<2, R4
+ ADD R1, R4
+ B E1
+L1:
+ MOVW.P 4(R2), R5
+ MOVW.P 4(R3), R6
+ MOVW R0, CPSR
+ ADC.S R6, R5
+ MOVW.P R5, 4(R1)
+ MOVW CPSR, R0
+E1:
+ CMP R1, R4
+ BNE L1
+
+ MOVW R0>>CFLAG, R0
+ AND $1, R0
+ MOVW R0, c+36(FP)
+ RET
+
+
+// func subVV(z, x, y []Word) (c Word)
+// (same as addVV except for SBC instead of ADC and label names)
+TEXT ·subVV(SB),7,$0
+ MOVW $(1<<CFLAG), R0
+ MOVW z+0(FP), R1
+ MOVW x+12(FP), R2
+ MOVW y+24(FP), R3
+ MOVW n+4(FP), R4
+ MOVW R4<<2, R4
+ ADD R1, R4
+ B E2
+L2:
+ MOVW.P 4(R2), R5
+ MOVW.P 4(R3), R6
+ MOVW R0, CPSR
+ SBC.S R6, R5
+ MOVW.P R5, 4(R1)
+ MOVW CPSR, R0
+E2:
+ CMP R1, R4
+ BNE L2
+
+ MOVW R0>>CFLAG, R0
+ AND $1, R0
+ EOR $1, R0
+ MOVW R0, c+36(FP)
+ RET
+
+
+// func addVW(z, x []Word, y Word) (c Word)
+TEXT ·addVW(SB),7,$0
+ MOVW z+0(FP), R1
+ MOVW x+12(FP), R2
+ MOVW y+24(FP), R3
+ MOVW n+4(FP), R4
+ MOVW R4<<2, R4
+ ADD R1, R4
+ CMP R1, R4
+ BNE L3a
+ MOVW R3, c+28(FP)
+ RET
+L3a:
+ MOVW.P 4(R2), R5
+ ADD.S R3, R5
+ MOVW.P R5, 4(R1)
+ MOVW CPSR, R0
+ B E3
+L3:
+ MOVW.P 4(R2), R5
+ MOVW R0, CPSR
+ ADC.S $0, R5
+ MOVW.P R5, 4(R1)
+ MOVW CPSR, R0
+E3:
+ CMP R1, R4
+ BNE L3
+
+ MOVW R0>>CFLAG, R0
+ AND $1, R0
+ MOVW R0, c+28(FP)
+ RET
+
+
+// func subVW(z, x []Word, y Word) (c Word)
+TEXT ·subVW(SB),7,$0
+ MOVW z+0(FP), R1
+ MOVW x+12(FP), R2
+ MOVW y+24(FP), R3
+ MOVW n+4(FP), R4
+ MOVW R4<<2, R4
+ ADD R1, R4
+ CMP R1, R4
+ BNE L4a
+ MOVW R3, c+28(FP)
+ RET
+L4a:
+ MOVW.P 4(R2), R5
+ SUB.S R3, R5
+ MOVW.P R5, 4(R1)
+ MOVW CPSR, R0
+ B E4
+L4:
+ MOVW.P 4(R2), R5
+ MOVW R0, CPSR
+ SBC.S $0, R5
+ MOVW.P R5, 4(R1)
+ MOVW CPSR, R0
+E4:
+ CMP R1, R4
+ BNE L4
+
+ MOVW R0>>CFLAG, R0
+ AND $1, R0
+ EOR $1, R0
+ MOVW R0, c+28(FP)
+ RET
+
+
+// func shlVU(z, x []Word, s uint) (c Word)
+TEXT ·shlVU(SB),7,$0
+ MOVW n+4(FP), R5
+ CMP $0, R5
+ BEQ X7
+
+ MOVW z+0(FP), R1
+ MOVW x+12(FP), R2
+ MOVW R5<<2, R5
+ ADD R5, R2
+ ADD R1, R5
+ MOVW s+24(FP), R3
+ CMP $0, R3 // shift 0 is special
+ BEQ Y7
+ ADD $4, R1 // stop one word early
+ MOVW $32, R4
+ SUB R3, R4
+ MOVW $0, R7
+
+ MOVW.W -4(R2), R6
+ MOVW R6<<R3, R7
+ MOVW R6>>R4, R6
+ MOVW R6, c+28(FP)
+ B E7
+
+L7:
+ MOVW.W -4(R2), R6
+ ORR R6>>R4, R7
+ MOVW.W R7, -4(R5)
+ MOVW R6<<R3, R7
+E7:
+ CMP R1, R5
+ BNE L7
+
+ MOVW R7, -4(R5)
+ RET
+
+Y7: // copy loop, because shift 0 == shift 32
+ MOVW.W -4(R2), R6
+ MOVW.W R6, -4(R5)
+ CMP R1, R5
+ BNE Y7
+
+X7:
+ MOVW $0, R1
+ MOVW R1, c+28(FP)
+ RET
+
+
+// func shrVU(z, x []Word, s uint) (c Word)
+TEXT ·shrVU(SB),7,$0
+ MOVW n+4(FP), R5
+ CMP $0, R5
+ BEQ X6
+
+ MOVW z+0(FP), R1
+ MOVW x+12(FP), R2
+ MOVW R5<<2, R5
+ ADD R1, R5
+ MOVW s+24(FP), R3
+ CMP $0, R3 // shift 0 is special
+ BEQ Y6
+ SUB $4, R5 // stop one word early
+ MOVW $32, R4
+ SUB R3, R4
+ MOVW $0, R7
+
+ // first word
+ MOVW.P 4(R2), R6
+ MOVW R6>>R3, R7
+ MOVW R6<<R4, R6
+ MOVW R6, c+28(FP)
+ B E6
+
+ // word loop
+L6:
+ MOVW.P 4(R2), R6
+ ORR R6<<R4, R7
+ MOVW.P R7, 4(R1)
+ MOVW R6>>R3, R7
+E6:
+ CMP R1, R5
+ BNE L6
+
+ MOVW R7, 0(R1)
+ RET
+
+Y6: // copy loop, because shift 0 == shift 32
+ MOVW.P 4(R2), R6
+ MOVW.P R6, 4(R1)
+ CMP R1, R5
+ BNE Y6
+
+X6:
+ MOVW $0, R1
+ MOVW R1, c+28(FP)
+ RET
+
+
+// func mulAddVWW(z, x []Word, y, r Word) (c Word)
+TEXT ·mulAddVWW(SB),7,$0
+ MOVW $0, R0
+ MOVW z+0(FP), R1
+ MOVW x+12(FP), R2
+ MOVW y+24(FP), R3
+ MOVW r+28(FP), R4
+ MOVW n+4(FP), R5
+ MOVW R5<<2, R5
+ ADD R1, R5
+ B E8
+
+ // word loop
+L8:
+ MOVW.P 4(R2), R6
+ MULLU R6, R3, (R7, R6)
+ ADD.S R4, R6
+ ADC R0, R7
+ MOVW.P R6, 4(R1)
+ MOVW R7, R4
+E8:
+ CMP R1, R5
+ BNE L8
+
+ MOVW R4, c+32(FP)
+ RET
+
+
+// func addMulVVW(z, x []Word, y Word) (c Word)
+TEXT ·addMulVVW(SB),7,$0
+ MOVW $0, R0
+ MOVW z+0(FP), R1
+ MOVW x+12(FP), R2
+ MOVW y+24(FP), R3
+ MOVW n+4(FP), R5
+ MOVW R5<<2, R5
+ ADD R1, R5
+ MOVW $0, R4
+ B E9
+
+ // word loop
+L9:
+ MOVW.P 4(R2), R6
+ MULLU R6, R3, (R7, R6)
+ ADD.S R4, R6
+ ADC R0, R7
+ MOVW 0(R1), R4
+ ADD.S R4, R6
+ ADC R0, R7
+ MOVW.P R6, 4(R1)
+ MOVW R7, R4
+E9:
+ CMP R1, R5
+ BNE L9
+
+ MOVW R4, c+28(FP)
+ RET
+
+
+// divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
+TEXT ·divWVW(SB),7,$0
+ // ARM has no multiword division, so use portable code.
+ B ·divWVW_g(SB)
+
+
+// func divWW(x1, x0, y Word) (q, r Word)
+TEXT ·divWW(SB),7,$0
+ // ARM has no multiword division, so use portable code.
+ B ·divWW_g(SB)
+
+
+// func mulWW(x, y Word) (z1, z0 Word)
+TEXT ·mulWW(SB),7,$0
+ MOVW x+0(FP), R1
+ MOVW y+4(FP), R2
+ MULLU R1, R2, (R4, R3)
+ MOVW R4, z1+8(FP)
+ MOVW R3, z0+12(FP)
+ RET