diff options
Diffstat (limited to 'usr/src/lib/libmvec/common/vis/__vcosf.S')
-rw-r--r-- | usr/src/lib/libmvec/common/vis/__vcosf.S | 2102 |
1 files changed, 2102 insertions, 0 deletions
diff --git a/usr/src/lib/libmvec/common/vis/__vcosf.S b/usr/src/lib/libmvec/common/vis/__vcosf.S new file mode 100644 index 0000000000..a20550e23b --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vcosf.S @@ -0,0 +1,2102 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vcosf.S" + +#include "libm.h" + + RO_DATA + .align 64 +constants: + .word 0xbfc55554,0x60000000 + .word 0x3f811077,0xe0000000 + .word 0xbf29956b,0x60000000 + .word 0x3ff00000,0x00000000 + .word 0xbfe00000,0x00000000 + .word 0x3fa55554,0xa0000000 + .word 0xbf56c0c1,0xe0000000 + .word 0x3ef99e24,0xe0000000 + .word 0x3fe45f30,0x6dc9c883 + .word 0x43380000,0x00000000 + .word 0x3ff921fb,0x54400000 + .word 0x3dd0b461,0x1a626331 + .word 0x3f490fdb,0 + .word 0x49c90fdb,0 + .word 0x7f800000,0 + .word 0x80000000,0 + +#define S0 0x0 +#define S1 0x08 +#define S2 0x10 +#define one 0x18 +#define mhalf 0x20 +#define C0 0x28 +#define C1 0x30 +#define C2 0x38 +#define invpio2 0x40 +#define round 0x48 +#define pio2_1 0x50 +#define pio2_t 0x58 +#define thresh1 0x60 +#define thresh2 0x68 +#define inf 0x70 +#define signbit 0x78 + +! local storage indices + +#define xsave STACK_BIAS-0x8 +#define ysave STACK_BIAS-0x10 +#define nsave STACK_BIAS-0x14 +#define sxsave STACK_BIAS-0x18 +#define sysave STACK_BIAS-0x1c +#define junk STACK_BIAS-0x20 +#define n3 STACK_BIAS-0x24 +#define n2 STACK_BIAS-0x28 +#define n1 STACK_BIAS-0x2c +#define n0 STACK_BIAS-0x30 +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x30 + +! register use + +! i0 n +! i1 x +! i2 stridex +! i3 y +! i4 stridey +! i5 biguns + +! l0 n0 +! l1 n1 +! l2 n2 +! l3 n3 +! l4 +! l5 +! l6 +! l7 + +! the following are 64-bit registers in both V8+ and V9 + +! g1 +! g5 + +! o0 py0 +! o1 py1 +! o2 py2 +! o3 py3 +! o4 +! o5 +! o7 + +! f0 x0 +! f2 x1 +! f4 x2 +! f6 x3 +! f8 thresh1 (pi/4) +! f10 y0 +! f12 y1 +! f14 y2 +! f16 y3 +! f18 thresh2 (2^19 pi) +! f20 +! f22 +! f24 +! f26 +! f28 signbit +! f30 +! f32 +! f34 +! f36 +! f38 inf +! f40 S0 +! f42 S1 +! f44 S2 +! f46 one +! f48 mhalf +! f50 C0 +! f52 C1 +! f54 C2 +! f56 invpio2 +! f58 round +! f60 pio2_1 +! f62 pio2_t + + ENTRY(__vcosf) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,constants,l0) + mov %l0,%g1 + wr %g0,0x82,%asi ! set %asi for non-faulting loads +#ifdef __sparcv9 + stx %i1,[%fp+xsave] ! save arguments + stx %i3,[%fp+ysave] +#else + st %i1,[%fp+xsave] ! save arguments + st %i3,[%fp+ysave] +#endif + st %i0,[%fp+nsave] + st %i2,[%fp+sxsave] + st %i4,[%fp+sysave] + mov 0,%i5 ! biguns = 0 + ldd [%g1+S0],%f40 ! load constants + ldd [%g1+S1],%f42 + ldd [%g1+S2],%f44 + ldd [%g1+one],%f46 + ldd [%g1+mhalf],%f48 + ldd [%g1+C0],%f50 + ldd [%g1+C1],%f52 + ldd [%g1+C2],%f54 + ldd [%g1+invpio2],%f56 + ldd [%g1+round],%f58 + ldd [%g1+pio2_1],%f60 + ldd [%g1+pio2_t],%f62 + ldd [%g1+thresh1],%f8 + ldd [%g1+thresh2],%f18 + ldd [%g1+inf],%f38 + ldd [%g1+signbit],%f28 + sll %i2,2,%i2 ! scale strides + sll %i4,2,%i4 + fzero %f10 ! loop prologue + add %fp,junk,%o0 + fzero %f12 + add %fp,junk,%o1 + fzero %f14 + add %fp,junk,%o2 + fzero %f16 + ba .start + add %fp,junk,%o3 + + .align 16 +! 16-byte aligned +.start: + ld [%i1],%f0 ! *x + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + fdtos %f10,%f10 + + st %f10,[%o0] + mov %i3,%o0 ! py0 = y + ble,pn %icc,.last1 +! delay slot + add %i3,%i4,%i3 ! y += stridey + + ld [%i1],%f2 ! *x + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + fdtos %f12,%f12 + + st %f12,[%o1] + mov %i3,%o1 ! py1 = y + ble,pn %icc,.last2 +! delay slot + add %i3,%i4,%i3 ! y += stridey + + ld [%i1],%f4 ! *x + add %i1,%i2,%i1 ! x += stridex + addcc %i0,-1,%i0 + fdtos %f14,%f14 + + st %f14,[%o2] + mov %i3,%o2 ! py2 = y + ble,pn %icc,.last3 +! delay slot + add %i3,%i4,%i3 ! y += stridey + + ld [%i1],%f6 ! *x + add %i1,%i2,%i1 ! x += stridex + nop + fdtos %f16,%f16 + + st %f16,[%o3] + mov %i3,%o3 ! py3 = y + add %i3,%i4,%i3 ! y += stridey +.cont: + fabsd %f0,%f30 + + fabsd %f2,%f32 + + fabsd %f4,%f34 + + fabsd %f6,%f36 + fcmple32 %f30,%f18,%l0 + + fcmple32 %f32,%f18,%l1 + + fcmple32 %f34,%f18,%l2 + + fcmple32 %f36,%f18,%l3 + nop + +! 16-byte aligned + andcc %l0,2,%g0 + bz,pn %icc,.range0 ! branch if > 2^19 pi +! delay slot + fcmple32 %f30,%f8,%l0 + +.check1: + andcc %l1,2,%g0 + bz,pn %icc,.range1 ! branch if > 2^19 pi +! delay slot + fcmple32 %f32,%f8,%l1 + +.check2: + andcc %l2,2,%g0 + bz,pn %icc,.range2 ! branch if > 2^19 pi +! delay slot + fcmple32 %f34,%f8,%l2 + +.check3: + andcc %l3,2,%g0 + bz,pn %icc,.range3 ! branch if > 2^19 pi +! delay slot + fcmple32 %f36,%f8,%l3 + +.checkprimary: + fsmuld %f0,%f0,%f30 + fstod %f0,%f0 + + fsmuld %f2,%f2,%f32 + fstod %f2,%f2 + and %l0,%l1,%o4 + + fsmuld %f4,%f4,%f34 + fstod %f4,%f4 + + fsmuld %f6,%f6,%f36 + fstod %f6,%f6 + and %l2,%l3,%o5 + + fmuld %f30,%f54,%f10 + and %o4,%o5,%o5 + + fmuld %f32,%f54,%f12 + andcc %o5,2,%g0 + bz,pn %icc,.medium ! branch if any argument is > pi/4 +! delay slot + nop + + fmuld %f34,%f54,%f14 + + fmuld %f36,%f54,%f16 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + + fmuld %f30,%f10,%f10 + + fmuld %f32,%f12,%f12 + + fmuld %f34,%f14,%f14 + + fmuld %f36,%f16,%f16 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + faddd %f16,%f26,%f16 + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.medium: + fmuld %f0,%f56,%f10 + + fmuld %f2,%f56,%f12 + + fmuld %f4,%f56,%f14 + + fmuld %f6,%f56,%f16 + + faddd %f10,%f58,%f10 + st %f11,[%fp+n0] + + faddd %f12,%f58,%f12 + st %f13,[%fp+n1] + + faddd %f14,%f58,%f14 + st %f15,[%fp+n2] + + faddd %f16,%f58,%f16 + st %f17,[%fp+n3] + + fsubd %f10,%f58,%f10 + + fsubd %f12,%f58,%f12 + + fsubd %f14,%f58,%f14 + + fsubd %f16,%f58,%f16 + + fmuld %f10,%f60,%f20 + ld [%fp+n0],%l0 + + fmuld %f12,%f60,%f22 + ld [%fp+n1],%l1 + + fmuld %f14,%f60,%f24 + ld [%fp+n2],%l2 + + fmuld %f16,%f60,%f26 + ld [%fp+n3],%l3 + + fsubd %f0,%f20,%f0 + fmuld %f10,%f62,%f30 + add %l0,1,%l0 + + fsubd %f2,%f22,%f2 + fmuld %f12,%f62,%f32 + add %l1,1,%l1 + + fsubd %f4,%f24,%f4 + fmuld %f14,%f62,%f34 + add %l2,1,%l2 + + fsubd %f6,%f26,%f6 + fmuld %f16,%f62,%f36 + add %l3,1,%l3 + + fsubd %f0,%f30,%f0 + + fsubd %f2,%f32,%f2 + + fsubd %f4,%f34,%f4 + + fsubd %f6,%f36,%f6 + andcc %l0,1,%g0 + + fmuld %f0,%f0,%f30 + bz,pn %icc,.case8 +! delay slot + andcc %l1,1,%g0 + + fmuld %f2,%f2,%f32 + bz,pn %icc,.case4 +! delay slot + andcc %l2,1,%g0 + + fmuld %f4,%f4,%f34 + bz,pn %icc,.case2 +! delay slot + andcc %l3,1,%g0 + + fmuld %f6,%f6,%f36 + bz,pn %icc,.case1 +! delay slot + nop + +!.case0: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + faddd %f16,%f26,%f16 + + fxor %f10,%f0,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case1: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fzero %f36 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f0,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case2: + fmuld %f6,%f6,%f36 + bz,pn %icc,.case3 +! delay slot + nop + + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + faddd %f16,%f26,%f16 + + fxor %f10,%f0,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case3: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f16,%f16 + fzero %f36 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + + faddd %f12,%f22,%f12 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f0,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case4: + fmuld %f4,%f4,%f34 + bz,pn %icc,.case6 +! delay slot + andcc %l3,1,%g0 + + fmuld %f6,%f6,%f36 + bz,pn %icc,.case5 +! delay slot + nop + + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + faddd %f10,%f20,%f10 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + faddd %f14,%f24,%f14 + + faddd %f16,%f26,%f16 + + fxor %f10,%f0,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case5: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fzero %f36 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + faddd %f14,%f24,%f14 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f0,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case6: + fmuld %f6,%f6,%f36 + bz,pn %icc,.case7 +! delay slot + nop + + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + faddd %f10,%f20,%f10 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + faddd %f16,%f26,%f16 + + fxor %f10,%f0,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case7: + fmuld %f30,%f54,%f10 ! cos(x0) + fzero %f0 + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f48,%f20 + faddd %f10,%f52,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f10,%f10 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f30,%f30 + faddd %f10,%f50,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f16,%f16 + fzero %f36 + + fmuld %f30,%f10,%f10 + fmovrdnz %g1,%f28,%f0 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f0,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + + .align 16 +.case8: + fmuld %f2,%f2,%f32 + bz,pn %icc,.case12 +! delay slot + andcc %l2,1,%g0 + + fmuld %f4,%f4,%f34 + bz,pn %icc,.case10 +! delay slot + andcc %l3,1,%g0 + + fmuld %f6,%f6,%f36 + bz,pn %icc,.case9 +! delay slot + nop + + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + faddd %f16,%f26,%f16 + + fxor %f10,%f30,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case9: + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fzero %f36 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + faddd %f12,%f22,%f12 + + faddd %f14,%f24,%f14 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f30,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case10: + fmuld %f6,%f6,%f36 + bz,pn %icc,.case11 +! delay slot + nop + + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + faddd %f12,%f22,%f12 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + faddd %f16,%f26,%f16 + + fxor %f10,%f30,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case11: + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f54,%f12 ! cos(x1) + fzero %f2 + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f48,%f22 + faddd %f12,%f52,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f12,%f12 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f32,%f32 + faddd %f12,%f50,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f16,%f16 + fzero %f36 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + fmuld %f32,%f12,%f12 + fmovrdnz %g5,%f28,%f2 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + faddd %f12,%f22,%f12 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f30,%f10 + + fxor %f12,%f2,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case12: + fmuld %f4,%f4,%f34 + bz,pn %icc,.case14 +! delay slot + andcc %l3,1,%g0 + + fmuld %f6,%f6,%f36 + bz,pn %icc,.case13 +! delay slot + nop + + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + faddd %f14,%f24,%f14 + + faddd %f16,%f26,%f16 + + fxor %f10,%f30,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case13: + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f54,%f14 ! cos(x2) + fzero %f4 + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f48,%f24 + faddd %f14,%f52,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f14,%f14 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f34,%f34 + faddd %f14,%f50,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fzero %f36 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + fmuld %f34,%f14,%f14 + fmovrdnz %o4,%f28,%f4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + faddd %f14,%f24,%f14 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f30,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f4,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case14: + fmuld %f6,%f6,%f36 + bz,pn %icc,.case15 +! delay slot + nop + + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f54,%f16 ! cos(x3) + fzero %f6 + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f48,%f26 + faddd %f16,%f52,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f16,%f16 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f36,%f36 + faddd %f16,%f50,%f16 + and %l3,2,%o5 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + fmuld %f36,%f16,%f16 + fmovrdnz %o5,%f28,%f6 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + faddd %f16,%f26,%f16 + + fxor %f10,%f30,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f6,%f16 + + ba,pt %icc,.end +! delay slot + nop + + .align 16 +.case15: + fmuld %f30,%f44,%f10 ! sin(x0) + + fmuld %f32,%f44,%f12 ! sin(x1) + + fmuld %f34,%f44,%f14 ! sin(x2) + + fmuld %f36,%f44,%f16 ! sin(x3) + + fmuld %f30,%f40,%f20 + faddd %f10,%f42,%f10 + + fmuld %f32,%f40,%f22 + faddd %f12,%f42,%f12 + + fmuld %f34,%f40,%f24 + faddd %f14,%f42,%f14 + + fmuld %f36,%f40,%f26 + faddd %f16,%f42,%f16 + + fmuld %f30,%f30,%f30 + faddd %f20,%f46,%f20 + + fmuld %f32,%f32,%f32 + faddd %f22,%f46,%f22 + + fmuld %f34,%f34,%f34 + faddd %f24,%f46,%f24 + + fmuld %f36,%f36,%f36 + faddd %f26,%f46,%f26 + + fmuld %f30,%f10,%f10 + fzero %f30 + + fmuld %f32,%f12,%f12 + fzero %f32 + + fmuld %f34,%f14,%f14 + fzero %f34 + + fmuld %f36,%f16,%f16 + fzero %f36 + + faddd %f10,%f20,%f10 + and %l0,2,%g1 + + faddd %f12,%f22,%f12 + and %l1,2,%g5 + + faddd %f14,%f24,%f14 + and %l2,2,%o4 + + faddd %f16,%f26,%f16 + and %l3,2,%o5 + + fmuld %f0,%f10,%f10 + fmovrdnz %g1,%f28,%f30 + + fmuld %f2,%f12,%f12 + fmovrdnz %g5,%f28,%f32 + + fmuld %f4,%f14,%f14 + fmovrdnz %o4,%f28,%f34 + + fmuld %f6,%f16,%f16 + fmovrdnz %o5,%f28,%f36 + + fxor %f10,%f30,%f10 + + fxor %f12,%f32,%f12 + + fxor %f14,%f34,%f14 + + addcc %i0,-1,%i0 + bg,pt %icc,.start +! delay slot + fxor %f16,%f36,%f16 + + ba,pt %icc,.end +! delay slot + nop + + + .align 32 +.end: + fdtos %f10,%f10 + st %f10,[%o0] + fdtos %f12,%f12 + st %f12,[%o1] + fdtos %f14,%f14 + st %f14,[%o2] + fdtos %f16,%f16 + tst %i5 ! check for huge arguments remaining + be,pt %icc,.exit +! delay slot + st %f16,[%o3] +#ifdef __sparcv9 + ldx [%fp+xsave],%o1 + ldx [%fp+ysave],%o3 +#else + ld [%fp+xsave],%o1 + ld [%fp+ysave],%o3 +#endif + ld [%fp+nsave],%o0 + ld [%fp+sxsave],%o2 + ld [%fp+sysave],%o4 + sra %o2,0,%o2 ! sign-extend for V9 + call __vlibm_vcos_bigf + sra %o4,0,%o4 ! delay slot + +.exit: + ret + restore + + + .align 32 +.last1: + fdtos %f12,%f12 + st %f12,[%o1] + fzeros %f2 + add %fp,junk,%o1 +.last2: + fdtos %f14,%f14 + st %f14,[%o2] + fzeros %f4 + add %fp,junk,%o2 +.last3: + fdtos %f16,%f16 + st %f16,[%o3] + fzeros %f6 + ba,pt %icc,.cont +! delay slot + add %fp,junk,%o3 + + + .align 16 +.range0: + fcmpgt32 %f38,%f30,%l0 + andcc %l0,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f1 + fmuls %f0,%f1,%f0 + st %f0,[%o0] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f0 + add %i1,%i2,%i1 + mov %i3,%o0 + add %i3,%i4,%i3 + fabsd %f0,%f30 + fcmple32 %f30,%f18,%l0 + andcc %l0,2,%g0 + bz,pn %icc,.range0 +! delay slot + nop + ba,pt %icc,.check1 +! delay slot + fcmple32 %f30,%f8,%l0 +1: + fzero %f0 ! set up dummy argument + add %fp,junk,%o0 + mov 2,%l0 + ba,pt %icc,.check1 +! delay slot + fzero %f30 + + + .align 16 +.range1: + fcmpgt32 %f38,%f32,%l1 + andcc %l1,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f3 + fmuls %f2,%f3,%f2 + st %f2,[%o1] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f2 + add %i1,%i2,%i1 + mov %i3,%o1 + add %i3,%i4,%i3 + fabsd %f2,%f32 + fcmple32 %f32,%f18,%l1 + andcc %l1,2,%g0 + bz,pn %icc,.range1 +! delay slot + nop + ba,pt %icc,.check2 +! delay slot + fcmple32 %f32,%f8,%l1 +1: + fzero %f2 ! set up dummy argument + add %fp,junk,%o1 + mov 2,%l1 + ba,pt %icc,.check2 +! delay slot + fzero %f32 + + + .align 16 +.range2: + fcmpgt32 %f38,%f34,%l2 + andcc %l2,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f5 + fmuls %f4,%f5,%f4 + st %f4,[%o2] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f4 + add %i1,%i2,%i1 + mov %i3,%o2 + add %i3,%i4,%i3 + fabsd %f4,%f34 + fcmple32 %f34,%f18,%l2 + andcc %l2,2,%g0 + bz,pn %icc,.range2 +! delay slot + nop + ba,pt %icc,.check3 +! delay slot + fcmple32 %f34,%f8,%l2 +1: + fzero %f4 ! set up dummy argument + add %fp,junk,%o2 + mov 2,%l2 + ba,pt %icc,.check3 +! delay slot + fzero %f34 + + + .align 16 +.range3: + fcmpgt32 %f38,%f36,%l3 + andcc %l3,2,%g0 + bnz,a,pt %icc,1f ! branch if finite +! delay slot, squashed if branch not taken + mov 1,%i5 ! set biguns + fzeros %f7 + fmuls %f6,%f7,%f6 + st %f6,[%o3] +1: + addcc %i0,-1,%i0 + ble,pn %icc,1f +! delay slot + nop + ld [%i1],%f6 + add %i1,%i2,%i1 + mov %i3,%o3 + add %i3,%i4,%i3 + fabsd %f6,%f36 + fcmple32 %f36,%f18,%l3 + andcc %l3,2,%g0 + bz,pn %icc,.range3 +! delay slot + nop + ba,pt %icc,.checkprimary +! delay slot + fcmple32 %f36,%f8,%l3 +1: + fzero %f6 ! set up dummy argument + add %fp,junk,%o3 + mov 2,%l3 + ba,pt %icc,.checkprimary +! delay slot + fzero %f36 + + SET_SIZE(__vcosf) + |