summaryrefslogtreecommitdiff
path: root/usr/src/libm/src/mvec/vis/__vsin_ultra3.S
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/libm/src/mvec/vis/__vsin_ultra3.S')
-rw-r--r--usr/src/libm/src/mvec/vis/__vsin_ultra3.S3431
1 files changed, 3431 insertions, 0 deletions
diff --git a/usr/src/libm/src/mvec/vis/__vsin_ultra3.S b/usr/src/libm/src/mvec/vis/__vsin_ultra3.S
new file mode 100644
index 0000000..172b2ad
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vsin_ultra3.S
@@ -0,0 +1,3431 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vsin_ultra3.S 1.8 06/01/23 SMI"
+
+ .file "__vsin_ultra3.S"
+
+#include "libm.h"
+#if defined(LIBMVEC_SO_BUILD)
+ .weak __vsin
+ .type __vsin,#function
+ __vsin = __vsin_ultra3
+#endif
+
+ RO_DATA
+ .align 64
+constants:
+ .word 0x42c80000,0x00000000 ! 3 * 2^44
+ .word 0x43380000,0x00000000 ! 3 * 2^51
+ .word 0x3fe45f30,0x6dc9c883 ! invpio2
+ .word 0x3ff921fb,0x54442c00 ! pio2_1
+ .word 0x3d318469,0x898cc400 ! pio2_2
+ .word 0x3a71701b,0x839a2520 ! pio2_3
+ .word 0xbfc55555,0x55555533 ! pp1
+ .word 0x3f811111,0x10e7d53b ! pp2
+ .word 0xbf2a0167,0xe6b3cf9b ! pp3
+ .word 0xbfdfffff,0xffffff65 ! qq1
+ .word 0x3fa55555,0x54f88ed0 ! qq2
+ .word 0xbf56c12c,0xdd185f60 ! qq3
+
+! local storage indices
+
+#define xsave STACK_BIAS-0x8
+#define ysave STACK_BIAS-0x10
+#define nsave STACK_BIAS-0x14
+#define sxsave STACK_BIAS-0x18
+#define sysave STACK_BIAS-0x1c
+#define biguns STACK_BIAS-0x20
+#define nk3 STACK_BIAS-0x24
+#define nk2 STACK_BIAS-0x28
+#define nk1 STACK_BIAS-0x2c
+#define nk0 STACK_BIAS-0x30
+#define junk STACK_BIAS-0x38
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x40
+
+! register use
+
+! i0 n
+! i1 x
+! i2 stridex
+! i3 y
+! i4 stridey
+! i5 0x80000000
+
+! l0 hx0
+! l1 hx1
+! l2 hx2
+! l3 hx3
+! l4 k0
+! l5 k1
+! l6 k2
+! l7 k3
+
+! the following are 64-bit registers in both V8+ and V9
+
+! g1 __vlibm_TBL_sincos2
+! g5 scratch
+
+! o0 py0
+! o1 py1
+! o2 py2
+! o3 py3
+! o4 0x3e400000
+! o5 0x3fe921fb,0x4099251e
+! o7 scratch
+
+! f0 hx0
+! f2
+! f4
+! f6
+! f8 hx1
+! f10
+! f12
+! f14
+! f16 hx2
+! f18
+! f20
+! f22
+! f24 hx3
+! f26
+! f28
+! f30
+! f32
+! f34
+! f36
+! f38
+
+#define c3two44 %f40
+#define c3two51 %f42
+#define invpio2 %f44
+#define pio2_1 %f46
+#define pio2_2 %f48
+#define pio2_3 %f50
+#define pp1 %f52
+#define pp2 %f54
+#define pp3 %f56
+#define qq1 %f58
+#define qq2 %f60
+#define qq3 %f62
+
+ ENTRY(__vsin_ultra3)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,constants,o0)
+ PIC_SET(l7,__vlibm_TBL_sincos2,o1)
+ mov %o1,%g1
+ wr %g0,0x82,%asi ! set %asi for non-faulting loads
+#ifdef __sparcv9
+ stx %i1,[%fp+xsave] ! save arguments
+ stx %i3,[%fp+ysave]
+#else
+ st %i1,[%fp+xsave] ! save arguments
+ st %i3,[%fp+ysave]
+#endif
+ st %i0,[%fp+nsave]
+ st %i2,[%fp+sxsave]
+ st %i4,[%fp+sysave]
+ st %g0,[%fp+biguns] ! biguns = 0
+ ldd [%o0+0x00],c3two44 ! load/set up constants
+ ldd [%o0+0x08],c3two51
+ ldd [%o0+0x10],invpio2
+ ldd [%o0+0x18],pio2_1
+ ldd [%o0+0x20],pio2_2
+ ldd [%o0+0x28],pio2_3
+ ldd [%o0+0x30],pp1
+ ldd [%o0+0x38],pp2
+ ldd [%o0+0x40],pp3
+ ldd [%o0+0x48],qq1
+ ldd [%o0+0x50],qq2
+ ldd [%o0+0x58],qq3
+ sethi %hi(0x80000000),%i5
+ sethi %hi(0x3e400000),%o4
+ sethi %hi(0x3fe921fb),%o5
+ or %o5,%lo(0x3fe921fb),%o5
+ sllx %o5,32,%o5
+ sethi %hi(0x4099251e),%o7
+ or %o7,%lo(0x4099251e),%o7
+ or %o5,%o7,%o5
+ sll %i2,3,%i2 ! scale strides
+ sll %i4,3,%i4
+ add %fp,junk,%o1 ! loop prologue
+ add %fp,junk,%o2
+ add %fp,junk,%o3
+ ld [%i1],%l0 ! *x
+ ld [%i1],%f0
+ ld [%i1+4],%f3
+ andn %l0,%i5,%l0 ! mask off sign
+ ba .loop0
+ add %i1,%i2,%i1 ! x += stridex
+
+! 16-byte aligned
+ .align 16
+.loop0:
+ lda [%i1]%asi,%l1 ! preload next argument
+ sub %l0,%o4,%g5
+ sub %o5,%l0,%o7
+ fabss %f0,%f2
+
+ lda [%i1]%asi,%f8
+ orcc %o7,%g5,%g0
+ mov %i3,%o0 ! py0 = y
+ bl,pn %icc,.range0 ! hx < 0x3e400000 or hx > 0x4099251e
+
+! delay slot
+ lda [%i1+4]%asi,%f11
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! y += stridey
+ ble,pn %icc,.last1
+
+! delay slot
+ andn %l1,%i5,%l1
+ add %i1,%i2,%i1 ! x += stridex
+ faddd %f2,c3two44,%f4
+ st %f15,[%o1+4]
+
+.loop1:
+ lda [%i1]%asi,%l2 ! preload next argument
+ sub %l1,%o4,%g5
+ sub %o5,%l1,%o7
+ fabss %f8,%f10
+
+ lda [%i1]%asi,%f16
+ orcc %o7,%g5,%g0
+ mov %i3,%o1 ! py1 = y
+ bl,pn %icc,.range1 ! hx < 0x3e400000 or hx > 0x4099251e
+
+! delay slot
+ lda [%i1+4]%asi,%f19
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! y += stridey
+ ble,pn %icc,.last2
+
+! delay slot
+ andn %l2,%i5,%l2
+ add %i1,%i2,%i1 ! x += stridex
+ faddd %f10,c3two44,%f12
+ st %f23,[%o2+4]
+
+.loop2:
+ lda [%i1]%asi,%l3 ! preload next argument
+ sub %l2,%o4,%g5
+ sub %o5,%l2,%o7
+ fabss %f16,%f18
+
+ lda [%i1]%asi,%f24
+ orcc %o7,%g5,%g0
+ mov %i3,%o2 ! py2 = y
+ bl,pn %icc,.range2 ! hx < 0x3e400000 or hx > 0x4099251e
+
+! delay slot
+ lda [%i1+4]%asi,%f27
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! y += stridey
+ ble,pn %icc,.last3
+
+! delay slot
+ andn %l3,%i5,%l3
+ add %i1,%i2,%i1 ! x += stridex
+ faddd %f18,c3two44,%f20
+ st %f31,[%o3+4]
+
+.loop3:
+ sub %l3,%o4,%g5
+ sub %o5,%l3,%o7
+ fabss %f24,%f26
+ st %f5,[%fp+nk0]
+
+ orcc %o7,%g5,%g0
+ mov %i3,%o3 ! py3 = y
+ bl,pn %icc,.range3 ! hx < 0x3e400000 or > hx 0x4099251e
+! delay slot
+ st %f13,[%fp+nk1]
+
+!!! DONE?
+.cont:
+ srlx %o5,32,%o7
+ add %i3,%i4,%i3 ! y += stridey
+ fmovs %f3,%f1
+ st %f21,[%fp+nk2]
+
+ sub %o7,%l0,%l0
+ sub %o7,%l1,%l1
+ faddd %f26,c3two44,%f28
+ st %f29,[%fp+nk3]
+
+ sub %o7,%l2,%l2
+ sub %o7,%l3,%l3
+ fmovs %f11,%f9
+
+ or %l0,%l1,%l0
+ or %l2,%l3,%l2
+ fmovs %f19,%f17
+
+ fmovs %f27,%f25
+ fmuld %f0,invpio2,%f6 ! x * invpio2, for medium range
+
+ fmuld %f8,invpio2,%f14
+ ld [%fp+nk0],%l4
+
+ fmuld %f16,invpio2,%f22
+ ld [%fp+nk1],%l5
+
+ orcc %l0,%l2,%g0
+ bl,pn %icc,.medium
+! delay slot
+ fmuld %f24,invpio2,%f30
+ ld [%fp+nk2],%l6
+
+ ld [%fp+nk3],%l7
+ sll %l4,5,%l4 ! k
+ fcmpd %fcc0,%f0,pio2_3 ! x < pio2_3 iff x < 0
+
+ sll %l5,5,%l5
+ ldd [%l4+%g1],%f4
+ fcmpd %fcc1,%f8,pio2_3
+
+ sll %l6,5,%l6
+ ldd [%l5+%g1],%f12
+ fcmpd %fcc2,%f16,pio2_3
+
+ sll %l7,5,%l7
+ ldd [%l6+%g1],%f20
+ fcmpd %fcc3,%f24,pio2_3
+
+ ldd [%l7+%g1],%f28
+ fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k]
+
+ fsubd %f10,%f12,%f10
+
+ fsubd %f18,%f20,%f18
+
+ fsubd %f26,%f28,%f26
+
+ fmuld %f2,%f2,%f0 ! z = x * x
+
+ fmuld %f10,%f10,%f8
+
+ fmuld %f18,%f18,%f16
+
+ fmuld %f26,%f26,%f24
+
+ fmuld %f0,pp3,%f6
+
+ fmuld %f8,pp3,%f14
+
+ fmuld %f16,pp3,%f22
+
+ fmuld %f24,pp3,%f30
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f0,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f8,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f16,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f24,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f2,%f6,%f6
+
+ fmuld %f10,%f14,%f14
+
+ fmuld %f18,%f22,%f22
+
+ fmuld %f26,%f30,%f30
+
+ faddd %f6,%f2,%f6
+ fmuld %f0,%f4,%f4
+ ldd [%l4+16],%f2
+
+ faddd %f14,%f10,%f14
+ fmuld %f8,%f12,%f12
+ ldd [%l5+16],%f10
+
+ faddd %f22,%f18,%f22
+ fmuld %f16,%f20,%f20
+ ldd [%l6+16],%f18
+
+ faddd %f30,%f26,%f30
+ fmuld %f24,%f28,%f28
+ ldd [%l7+16],%f26
+
+ fmuld %f2,%f6,%f6
+
+ fmuld %f10,%f14,%f14
+
+ fmuld %f18,%f22,%f22
+
+ fmuld %f26,%f30,%f30
+
+ faddd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ fmovdl %fcc0,%f4,%f6 ! (hx < -0)? -s : s
+ st %f6,[%o0]
+
+ fmovdl %fcc1,%f12,%f14
+ st %f14,[%o1]
+
+ fmovdl %fcc2,%f20,%f22
+ st %f22,[%o2]
+
+ fmovdl %fcc3,%f28,%f30
+ st %f30,[%o3]
+ addcc %i0,-1,%i0
+
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+
+ .align 16
+.medium:
+ faddd %f6,c3two51,%f4
+ st %f5,[%fp+nk0]
+
+ faddd %f14,c3two51,%f12
+ st %f13,[%fp+nk1]
+
+ faddd %f22,c3two51,%f20
+ st %f21,[%fp+nk2]
+
+ faddd %f30,c3two51,%f28
+ st %f29,[%fp+nk3]
+
+ fsubd %f4,c3two51,%f6
+
+ fsubd %f12,c3two51,%f14
+
+ fsubd %f20,c3two51,%f22
+
+ fsubd %f28,c3two51,%f30
+
+ fmuld %f6,pio2_1,%f2
+ ld [%fp+nk0],%l0 ! n
+
+ fmuld %f14,pio2_1,%f10
+ ld [%fp+nk1],%l1
+
+ fmuld %f22,pio2_1,%f18
+ ld [%fp+nk2],%l2
+
+ fmuld %f30,pio2_1,%f26
+ ld [%fp+nk3],%l3
+
+ fsubd %f0,%f2,%f0
+ fmuld %f6,pio2_2,%f4
+
+ fsubd %f8,%f10,%f8
+ fmuld %f14,pio2_2,%f12
+
+ fsubd %f16,%f18,%f16
+ fmuld %f22,pio2_2,%f20
+
+ fsubd %f24,%f26,%f24
+ fmuld %f30,pio2_2,%f28
+
+ fsubd %f0,%f4,%f32
+
+ fsubd %f8,%f12,%f34
+
+ fsubd %f16,%f20,%f36
+
+ fsubd %f24,%f28,%f38
+
+ fsubd %f0,%f32,%f0
+ fcmple32 %f32,pio2_3,%l4 ! x <= pio2_3 iff x < 0
+
+ fsubd %f8,%f34,%f8
+ fcmple32 %f34,pio2_3,%l5
+
+ fsubd %f16,%f36,%f16
+ fcmple32 %f36,pio2_3,%l6
+
+ fsubd %f24,%f38,%f24
+ fcmple32 %f38,pio2_3,%l7
+
+ fsubd %f0,%f4,%f0
+ fmuld %f6,pio2_3,%f6
+ sll %l4,30,%l4 ! if (x < 0) n = -n ^ 2
+
+ fsubd %f8,%f12,%f8
+ fmuld %f14,pio2_3,%f14
+ sll %l5,30,%l5
+
+ fsubd %f16,%f20,%f16
+ fmuld %f22,pio2_3,%f22
+ sll %l6,30,%l6
+
+ fsubd %f24,%f28,%f24
+ fmuld %f30,pio2_3,%f30
+ sll %l7,30,%l7
+
+ fsubd %f6,%f0,%f6
+ sra %l4,31,%l4
+
+ fsubd %f14,%f8,%f14
+ sra %l5,31,%l5
+
+ fsubd %f22,%f16,%f22
+ sra %l6,31,%l6
+
+ fsubd %f30,%f24,%f30
+ sra %l7,31,%l7
+
+ fsubd %f32,%f6,%f0 ! reduced x
+ xor %l0,%l4,%l0
+
+ fsubd %f34,%f14,%f8
+ xor %l1,%l5,%l1
+
+ fsubd %f36,%f22,%f16
+ xor %l2,%l6,%l2
+
+ fsubd %f38,%f30,%f24
+ xor %l3,%l7,%l3
+
+ fabsd %f0,%f2
+ sub %l0,%l4,%l0
+
+ fabsd %f8,%f10
+ sub %l1,%l5,%l1
+
+ fabsd %f16,%f18
+ sub %l2,%l6,%l2
+
+ fabsd %f24,%f26
+ sub %l3,%l7,%l3
+
+ faddd %f2,c3two44,%f4
+ st %f5,[%fp+nk0]
+ and %l4,2,%l4
+
+ faddd %f10,c3two44,%f12
+ st %f13,[%fp+nk1]
+ and %l5,2,%l5
+
+ faddd %f18,c3two44,%f20
+ st %f21,[%fp+nk2]
+ and %l6,2,%l6
+
+ faddd %f26,c3two44,%f28
+ st %f29,[%fp+nk3]
+ and %l7,2,%l7
+
+ fsubd %f32,%f0,%f4
+ xor %l0,%l4,%l0
+
+ fsubd %f34,%f8,%f12
+ xor %l1,%l5,%l1
+
+ fsubd %f36,%f16,%f20
+ xor %l2,%l6,%l2
+
+ fsubd %f38,%f24,%f28
+ xor %l3,%l7,%l3
+
+ fzero %f38
+ ld [%fp+nk0],%l4
+
+ fsubd %f4,%f6,%f6 ! w
+ ld [%fp+nk1],%l5
+
+ fsubd %f12,%f14,%f14
+ ld [%fp+nk2],%l6
+
+ fnegd %f38,%f38
+ ld [%fp+nk3],%l7
+ sll %l4,5,%l4 ! k
+
+ fsubd %f20,%f22,%f22
+ sll %l5,5,%l5
+
+ fsubd %f28,%f30,%f30
+ sll %l6,5,%l6
+
+ fand %f0,%f38,%f32 ! sign bit of x
+ ldd [%l4+%g1],%f4
+ sll %l7,5,%l7
+
+ fand %f8,%f38,%f34
+ ldd [%l5+%g1],%f12
+
+ fand %f16,%f38,%f36
+ ldd [%l6+%g1],%f20
+
+ fand %f24,%f38,%f38
+ ldd [%l7+%g1],%f28
+
+ fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k]
+
+ fsubd %f10,%f12,%f10
+
+ fsubd %f18,%f20,%f18
+ nop
+
+ fsubd %f26,%f28,%f26
+ nop
+
+! 16-byte aligned
+ fmuld %f2,%f2,%f0 ! z = x * x
+ andcc %l0,1,%g0
+ bz,pn %icc,.case8
+! delay slot
+ fxor %f6,%f32,%f32
+
+ fmuld %f10,%f10,%f8
+ andcc %l1,1,%g0
+ bz,pn %icc,.case4
+! delay slot
+ fxor %f14,%f34,%f34
+
+ fmuld %f18,%f18,%f16
+ andcc %l2,1,%g0
+ bz,pn %icc,.case2
+! delay slot
+ fxor %f22,%f36,%f36
+
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case1
+! delay slot
+ fxor %f30,%f38,%f38
+
+!.case0:
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f28,%f28
+
+ fsubd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case1:
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f30,%f30
+
+ fsubd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case2:
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case3
+! delay slot
+ fxor %f30,%f38,%f38
+
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f28,%f28
+
+ fsubd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case3:
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f30,%f30
+
+ fsubd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case4:
+ fmuld %f18,%f18,%f16
+ andcc %l2,1,%g0
+ bz,pn %icc,.case6
+! delay slot
+ fxor %f22,%f36,%f36
+
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case5
+! delay slot
+ fxor %f30,%f38,%f38
+
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f28,%f28
+
+ fsubd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case5:
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f30,%f30
+
+ fsubd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case6:
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case7
+! delay slot
+ fxor %f30,%f38,%f38
+
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f28,%f28
+
+ fsubd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case7:
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f30,%f30
+
+ fsubd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case8:
+ fmuld %f10,%f10,%f8
+ andcc %l1,1,%g0
+ bz,pn %icc,.case12
+! delay slot
+ fxor %f14,%f34,%f34
+
+ fmuld %f18,%f18,%f16
+ andcc %l2,1,%g0
+ bz,pn %icc,.case10
+! delay slot
+ fxor %f22,%f36,%f36
+
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case9
+! delay slot
+ fxor %f30,%f38,%f38
+
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ fmuld %f0,%f6,%f6
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f28,%f28
+
+ faddd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case9:
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f0,%f6,%f6
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f30,%f30
+
+ faddd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case10:
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case11
+! delay slot
+ fxor %f30,%f38,%f38
+
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ fmuld %f0,%f6,%f6
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f28,%f28
+
+ faddd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case11:
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f0,%f6,%f6
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f30,%f30
+
+ faddd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case12:
+ fmuld %f18,%f18,%f16
+ andcc %l2,1,%g0
+ bz,pn %icc,.case14
+! delay slot
+ fxor %f22,%f36,%f36
+
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case13
+! delay slot
+ fxor %f30,%f38,%f38
+
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ fmuld %f0,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f28,%f28
+
+ faddd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case13:
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f0,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f30,%f30
+
+ faddd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case14:
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case15
+! delay slot
+ fxor %f30,%f38,%f38
+
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ fmuld %f0,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f28,%f28
+
+ faddd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case15:
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f0,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f30,%f30
+
+ faddd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+
+ .align 16
+.end:
+ st %f15,[%o1+4]
+ st %f23,[%o2+4]
+ st %f31,[%o3+4]
+ ld [%fp+biguns],%i5
+ tst %i5 ! check for huge arguments remaining
+ be,pt %icc,.exit
+! delay slot
+ nop
+#ifdef __sparcv9
+ ldx [%fp+xsave],%o1
+ ldx [%fp+ysave],%o3
+#else
+ ld [%fp+xsave],%o1
+ ld [%fp+ysave],%o3
+#endif
+ ld [%fp+nsave],%o0
+ ld [%fp+sxsave],%o2
+ ld [%fp+sysave],%o4
+ sra %o2,0,%o2 ! sign-extend for V9
+ sra %o4,0,%o4
+ call __vlibm_vsin_big_ultra3
+ sra %o5,0,%o5 ! delay slot
+
+.exit:
+ ret
+ restore
+
+
+ .align 16
+.last1:
+ faddd %f2,c3two44,%f4
+ st %f15,[%o1+4]
+.last1_from_range1:
+ mov 0,%l1
+ fzeros %f8
+ fzero %f10
+ add %fp,junk,%o1
+.last2:
+ faddd %f10,c3two44,%f12
+ st %f23,[%o2+4]
+.last2_from_range2:
+ mov 0,%l2
+ fzeros %f16
+ fzero %f18
+ add %fp,junk,%o2
+.last3:
+ faddd %f18,c3two44,%f20
+ st %f31,[%o3+4]
+ st %f5,[%fp+nk0]
+ st %f13,[%fp+nk1]
+.last3_from_range3:
+ mov 0,%l3
+ fzeros %f24
+ fzero %f26
+ ba,pt %icc,.cont
+! delay slot
+ add %fp,junk,%o3
+
+
+ .align 16
+.range0:
+ cmp %l0,%o4
+ bl,pt %icc,1f ! hx < 0x3e400000
+! delay slot, harmless if branch taken
+ sethi %hi(0x7ff00000),%o7
+ cmp %l0,%o7
+ bl,a,pt %icc,2f ! branch if finite
+! delay slot, squashed if branch not taken
+ st %o4,[%fp+biguns] ! set biguns
+ fzero %f0
+ fmuld %f2,%f0,%f2
+ st %f2,[%o0]
+ ba,pt %icc,2f
+! delay slot
+ st %f3,[%o0+4]
+1:
+ fdtoi %f2,%f4 ! raise inexact if not zero
+ st %f0,[%o0]
+ st %f3,[%o0+4]
+2:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.end
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! y += stridey
+ andn %l1,%i5,%l0 ! hx &= ~0x80000000
+ fmovs %f8,%f0
+ fmovs %f11,%f3
+ ba,pt %icc,.loop0
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 16
+.range1:
+ cmp %l1,%o4
+ bl,pt %icc,1f ! hx < 0x3e400000
+! delay slot, harmless if branch taken
+ sethi %hi(0x7ff00000),%o7
+ cmp %l1,%o7
+ bl,a,pt %icc,2f ! branch if finite
+! delay slot, squashed if branch not taken
+ st %o4,[%fp+biguns] ! set biguns
+ fzero %f8
+ fmuld %f10,%f8,%f10
+ st %f10,[%o1]
+ ba,pt %icc,2f
+! delay slot
+ st %f11,[%o1+4]
+1:
+ fdtoi %f10,%f12 ! raise inexact if not zero
+ st %f8,[%o1]
+ st %f11,[%o1+4]
+2:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.last1_from_range1
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! y += stridey
+ andn %l2,%i5,%l1 ! hx &= ~0x80000000
+ fmovs %f16,%f8
+ fmovs %f19,%f11
+ ba,pt %icc,.loop1
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 16
+.range2:
+ cmp %l2,%o4
+ bl,pt %icc,1f ! hx < 0x3e400000
+! delay slot, harmless if branch taken
+ sethi %hi(0x7ff00000),%o7
+ cmp %l2,%o7
+ bl,a,pt %icc,2f ! branch if finite
+! delay slot, squashed if branch not taken
+ st %o4,[%fp+biguns] ! set biguns
+ fzero %f16
+ fmuld %f18,%f16,%f18
+ st %f18,[%o2]
+ ba,pt %icc,2f
+! delay slot
+ st %f19,[%o2+4]
+1:
+ fdtoi %f18,%f20 ! raise inexact if not zero
+ st %f16,[%o2]
+ st %f19,[%o2+4]
+2:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.last2_from_range2
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! y += stridey
+ andn %l3,%i5,%l2 ! hx &= ~0x80000000
+ fmovs %f24,%f16
+ fmovs %f27,%f19
+ ba,pt %icc,.loop2
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 16
+.range3:
+ cmp %l3,%o4
+ bl,pt %icc,1f ! hx < 0x3e400000
+! delay slot, harmless if branch taken
+ sethi %hi(0x7ff00000),%o7
+ cmp %l3,%o7
+ bl,a,pt %icc,2f ! branch if finite
+! delay slot, squashed if branch not taken
+ st %o4,[%fp+biguns] ! set biguns
+ fzero %f24
+ fmuld %f26,%f24,%f26
+ st %f26,[%o3]
+ ba,pt %icc,2f
+! delay slot
+ st %f27,[%o3+4]
+1:
+ fdtoi %f26,%f28 ! raise inexact if not zero
+ st %f24,[%o3]
+ st %f27,[%o3+4]
+2:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.last3_from_range3
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! y += stridey
+ ld [%i1],%l3
+ ld [%i1],%f24
+ ld [%i1+4],%f27
+ andn %l3,%i5,%l3 ! hx &= ~0x80000000
+ ba,pt %icc,.loop3
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+ SET_SIZE(__vsin_ultra3)
+