summaryrefslogtreecommitdiff
path: root/usr/src/libm/src/mvec/vis
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/libm/src/mvec/vis')
-rw-r--r--usr/src/libm/src/mvec/vis/__vatan.S571
-rw-r--r--usr/src/libm/src/mvec/vis/__vatan2.S1077
-rw-r--r--usr/src/libm/src/mvec/vis/__vatan2f.S3378
-rw-r--r--usr/src/libm/src/mvec/vis/__vatanf.S1891
-rw-r--r--usr/src/libm/src/mvec/vis/__vcos.S3078
-rw-r--r--usr/src/libm/src/mvec/vis/__vcos_ultra3.S3424
-rw-r--r--usr/src/libm/src/mvec/vis/__vcosf.S2101
-rw-r--r--usr/src/libm/src/mvec/vis/__vexp.S1281
-rw-r--r--usr/src/libm/src/mvec/vis/__vexpf.S2113
-rw-r--r--usr/src/libm/src/mvec/vis/__vhypot.S1242
-rw-r--r--usr/src/libm/src/mvec/vis/__vhypotf.S1226
-rw-r--r--usr/src/libm/src/mvec/vis/__vlog.S670
-rw-r--r--usr/src/libm/src/mvec/vis/__vlog_ultra3.S2904
-rw-r--r--usr/src/libm/src/mvec/vis/__vlogf.S1276
-rw-r--r--usr/src/libm/src/mvec/vis/__vpow.S4352
-rw-r--r--usr/src/libm/src/mvec/vis/__vpowf.S3138
-rw-r--r--usr/src/libm/src/mvec/vis/__vrhypot.S3878
-rw-r--r--usr/src/libm/src/mvec/vis/__vrhypotf.S1518
-rw-r--r--usr/src/libm/src/mvec/vis/__vrsqrt.S2156
-rw-r--r--usr/src/libm/src/mvec/vis/__vrsqrtf.S1718
-rw-r--r--usr/src/libm/src/mvec/vis/__vsin.S3002
-rw-r--r--usr/src/libm/src/mvec/vis/__vsin_ultra3.S3431
-rw-r--r--usr/src/libm/src/mvec/vis/__vsincos.S958
-rw-r--r--usr/src/libm/src/mvec/vis/__vsincosf.S905
-rw-r--r--usr/src/libm/src/mvec/vis/__vsinf.S2093
-rw-r--r--usr/src/libm/src/mvec/vis/__vsqrt.S1843
-rw-r--r--usr/src/libm/src/mvec/vis/__vsqrtf.S58
-rw-r--r--usr/src/libm/src/mvec/vis/__vsqrtf_ultra3.S993
28 files changed, 56275 insertions, 0 deletions
diff --git a/usr/src/libm/src/mvec/vis/__vatan.S b/usr/src/libm/src/mvec/vis/__vatan.S
new file mode 100644
index 0000000..f531a1a
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vatan.S
@@ -0,0 +1,571 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vatan.S 1.8 06/01/23 SMI"
+
+ .file "__vatan.S"
+
+#include "libm.h"
+
+ RO_DATA
+
+! following is the C version of the ATAN algorithm
+! #include <math.h>
+! #include <stdio.h>
+! double jkatan(double *x)
+! {
+! double f, z, ans, ansu, ansl, tmp, poly, conup, conlo, dummy;
+! int index, sign, intf, intz;
+! extern const double __vlibm_TBL_atan1[];
+! long *pf = (long *) &f, *pz = (long *) &z;
+!
+! /* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7
+! * Error = -3.08254E-18 On the interval |x| < 1/64 */
+!
+! /* define dummy names for readability. Use parray to help compiler optimize loads */
+! #define p3 parray[0]
+! #define p2 parray[1]
+! #define p1 parray[2]
+! #define soffset 3
+!
+! static const double parray[] = {
+! -1.428029046844299722E-01, /* p[3] */
+! 1.999999917247000615E-01, /* p[2] */
+! -3.333333333329292858E-01, /* p[1] */
+! 1.0, /* not used for p[0], though */
+! -1.0, /* used to flip sign of answer */
+! };
+!
+! f = *x; /* fetch argument */
+! intf = pf[0]; /* grab upper half */
+! sign = intf & 0x80000000; /* sign of argument */
+! intf ^= sign; /* abs(upper argument) */
+! sign = (unsigned) sign >> 31; /* sign bit = 0 or 1 */
+! pf[0] = intf;
+!
+! if( (intf > 0x43600000) || (intf < 0x3e300000) ) /* filter out special cases */
+! {
+! if( (intf > 0x7ff00000) ||
+! ((intf == 0x7ff00000) && (pf[1] !=0)) ) return (*x-*x);/* return NaN if x=NaN*/
+! if( intf < 0x3e300000 ) /* avoid underflow for small arg */
+! {
+! dummy = 1.0e37 + f;
+! dummy = dummy;
+! return (*x);
+! }
+! if( intf > 0x43600000 ) /* avoid underflow for big arg */
+! {
+! index = 2;
+! f = __vlibm_TBL_atan1[index] + __vlibm_TBL_atan1[index+1];/* pi/2 up + pi/2 low */
+! f = parray[soffset + sign] * f; /* put sign bit on ans */
+! return (f);
+! }
+! }
+!
+! index = 0; /* points to 0,0 in table */
+! if (intf > 0x40500000) /* if(|x| > 64 */
+! { f = -1.0/f;
+! index = 2; /* point to pi/2 upper, lower */
+! }
+! else if( intf >= 0x3f900000 ) /* if |x| >= (1/64)... */
+! {
+! intz = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper */
+! pz[0] = intz; /* store as a double (z) */
+! pz[1] = 0; /* ...lower */
+! f = (f - z)/(1.0 + f*z); /* get reduced argument */
+! index = (intz - 0x3f900000) >> 15; /* (index >> 16) << 1) */
+! index += 4; /* skip over 0,0,pi/2,pi/2 */
+! }
+! conup = __vlibm_TBL_atan1[index]; /* upper table */
+! conlo = __vlibm_TBL_atan1[index+1]; /* lower table */
+! tmp = f*f;
+! poly = (f*tmp)*((p3*tmp + p2)*tmp + p1);
+! ansu = conup + f; /* compute atan(f) upper */
+! ansl = (((conup - ansu) + f) + poly) + conlo;
+! ans = ansu + ansl;
+! ans = parray[soffset + sign] * ans;
+! return ans;
+! }
+
+/* 8 bytes = 1 double f.p. word */
+#define WSIZE 8
+
+ .align 32 !align with full D-cache line
+.COEFFS:
+ .double 0r-1.428029046844299722E-01 !p[3]
+ .double 0r1.999999917247000615E-01 !p[2]
+ .double 0r-3.333333333329292858E-01 !p[1]
+ .double 0r-1.0, !constant -1.0
+ .word 0x00008000,0x0 !for fp rounding of reduced arg
+ .word 0x7fff0000,0x0 !for fp truncation
+ .word 0x47900000,0 !a number close to 1.0E37
+ .word 0x80000000,0x0 !mask for fp sign bit
+ .word 0x3f800000,0x0 !1.0/128.0 dummy "safe" argument
+ .type .COEFFS,#object
+
+ ENTRY(__vatan)
+ save %sp,-SA(MINFRAME)-16,%sp
+ PIC_SETUP(g5)
+ PIC_SET(g5,__vlibm_TBL_atan1,o4)
+ PIC_SET(g5,.COEFFS,o0)
+/*
+ __vatan(int n, double *x, int stridex, double *y, stridey)
+ computes y(i) = atan( x(i) ), for 1=1,n. Stridex, stridey
+ are the distance between x and y elements
+
+ %i0 n
+ %i1 address of x
+ %i2 stride x
+ %i3 address of y
+ %i4 stride y
+*/
+ cmp %i0,0 !if n <=0,
+ ble,pn %icc,.RETURN !....then do nothing
+ sll %i2,3,%i2 !convert stride to byte count
+ sll %i4,3,%i4 !convert stride to byte count
+
+/* pre-load constants before beginning main loop */
+
+ ldd [%o0],%f58 !load p[3]
+ mov 2,%i5 !argcount = 3
+
+ ldd [%o0+WSIZE],%f60 !load p[2]
+ add %fp,STACK_BIAS-8,%l1 !yaddr1 = &dummy
+ fzero %f18 !ansu1 = 0
+
+ ldd [%o0+2*WSIZE],%f62 !load p[1]
+ add %fp,STACK_BIAS-8,%l2 !yaddr2 = &dummy
+ fzero %f12 !(poly1) = 0
+
+ ldd [%o0+3*WSIZE],%f56 !-1.0
+ fzero %f14 !tmp1 = 0
+
+ ldd [%o0+4*WSIZE],%f52 !load rounding mask
+ fzero %f16 !conup1 = 0
+
+ ldd [%o0+5*WSIZE],%f54 !load truncation mask
+ fzero %f36 !f1 = 0
+
+ ldd [%o0+6*WSIZE],%f50 !1.0e37
+ fzero %f38 !f2 = 0
+
+ ldd [%o0+7*WSIZE],%f32 !mask for sign bit
+
+ ldd [%o4+2*WSIZE],%f46 !pi/2 upper
+ ldd [%o4+(2*WSIZE+8)],%f48 !pi/2 lower
+ sethi %hi(0x40500000),%l6 !64.0
+ sethi %hi(0x3f900000),%l7 !1/64.0
+ mov 0,%l4 !index1 = 0
+ mov 0,%l5 !index2 = 0
+
+.MAINLOOP:
+
+ /*--------------------------------------------------------------------------*/
+ /*--------------------------------------------------------------------------*/
+ /*--------------------------------------------------------------------------*/
+
+.LOOP0:
+ deccc %i0 !--n
+ bneg 1f
+ mov %i1,%o5 !xuse = x (delay slot)
+
+ ba 2f
+ nop !delay slot
+1:
+ PIC_SET(g5,.COEFFS+8*WSIZE,o5)
+ dec %i5 !argcount--
+2:
+ sethi %hi(0x80000000),%o7 !mask for sign bit
+/*2 */ sethi %hi(0x43600000),%o1 !big = 0x43600000,0
+ ld [%o5],%o0 !intf = pf[0] = f upper
+ ldd [%o4+%l5],%f26 !conup2 = __vlibm_TBL_atan1[index2]
+
+ sethi %hi(0x3e300000),%o2 !small = 0x3e300000,0
+/*4 */ andn %o0,%o7,%o0 !intf = fabs(intf)
+ ldd [%o5],%f34 !f = *x into f34
+
+ sub %o1,%o0,%o1 !(-) if intf > big
+/*6 */ sub %o0,%o2,%o2 !(-) if intf < small
+ fand %f34,%f32,%f40 !sign0 = sign bit
+ fmuld %f38,%f38,%f24 !tmp2= f2*f2
+
+/*7 */ orcc %o1,%o2,%g0 !(-) if either true
+ bneg,pn %icc,.SPECIAL0 !if (-) goto special cases below
+ fabsd %f34,%f34 !abs(f) (delay slot)
+ !----------------------
+
+
+ sethi %hi(0x8000),%o7 !rounding bit
+/*8 */ fpadd32 %f34,%f52,%f0 !intf + 0x00008000 (again)
+ faddd %f26,%f38,%f28 !ansu2 = conup2 + f2
+
+ add %o0,%o7,%o0 !intf + 0x00008000 (delay slot)
+/*9*/ fand %f0,%f54,%f0 !pz[0] = intz = (intf + 0x00008000) & 0x7fff0000 (again)
+ fmuld %f58,%f24,%f22 !p[3]*tmp2
+
+/*10 */ sethi %hi(0x7fff0000),%o7 !mask for rounding argument
+ fmuld %f34,%f0,%f10 !f*z
+ fsubd %f34,%f0,%f20 !f - z
+ add %o4,%l4,%l4 !base addr + index1
+ fmuld %f14,%f12,%f12 !poly1 = (f1*tmp1)*((p3*tmp1 + p2)*tmp1 + p1)
+ faddd %f16,%f36,%f16 !(conup1 - ansu1) + f1
+
+/*12 */ and %o0,%o7,%o0 !intz = (intf + 0x00008000) & 0x7fff0000
+ faddd %f22,%f60,%f22 !p[3]*tmp2 + p[2]
+ ldd [%l4+WSIZE],%f14 !conlo1 = __vlibm_TBL_atan1[index+1]
+
+/*13 */ sub %o0,%l7,%o2 !intz - 0x3f900000
+ fsubd %f10,%f56,%f10 !(f*z - (-1.0))
+ faddd %f16,%f12,%f12 !((conup1 - ansu1) + f1) + poly1
+
+ cmp %o0,%l6 !(|f| > 64)
+ ble .ELSE0 !if(|f| > 64) then
+/*15 */ sra %o2,15,%o3 !index = (intz - 0x3f900000) >> 15
+ mov 2,%o1 !index == 2, point to conup, conlo = pi/2 upper, lower
+ ba .ENDIF0 !continue
+/*16 */ fdivd %f56,%f34,%f34 !f = -1.0/f (delay slot)
+ .ELSE0: !else f( |x| >= (1/64))
+ cmp %o0,%l7 !if intf >= 1/64
+ bl .ENDIF0 !if( |x| >= (1/64) ) then...
+ mov 0,%o1 !index == 0 , point to conup,conlo = 0,0
+ add %o3,4,%o1 !index = index + 4
+/*16 */ fdivd %f20,%f10,%f34 !f = (f - z)/(1.0 + f*z), reduced argument
+ .ENDIF0:
+
+/*17*/ sll %o1,3,%l3 !index0 = index
+ mov %i3,%l0 !yaddr0 = address of y
+ faddd %f12,%f14,%f12 !ansl1 = (((conup1 - ansu)1 + f1) + poly1) + conlo1
+ fmuld %f22,%f24,%f22 !(p3*tmp2 + p2)*tmp2
+ fsubd %f26,%f28,%f26 !conup2 - ansu2
+
+/*20*/ add %i1,%i2,%i1 !x += stridex
+ add %i3,%i4,%i3 !y += stridey
+ faddd %f18,%f12,%f36 !ans1 = ansu1 + ansl1
+ fmuld %f38,%f24,%f24 !f*tmp2
+ faddd %f22,%f62,%f22 !(p3*tmp2 + p2)*tmp2 + p1
+
+/*23*/ for %f36,%f42,%f36 !sign(ans1) = sign of argument
+ std %f36,[%l1] !*yaddr1 = ans1
+ add %o4,%l5,%l5 !base addr + index2
+ fmuld %f24,%f22,%f22 !poly2 = (f2*tmp2)*((p3*tmp2 + p2)*tmp2 + p1)
+ faddd %f26,%f38,%f26 !(conup2 - ansu2) + f2
+ cmp %i5,0 !if argcount =0, we are done
+ be .RETURN
+ nop
+
+ /*--------------------------------------------------------------------------*/
+ /*--------------------------------------------------------------------------*/
+ /*--------------------------------------------------------------------------*/
+
+.LOOP1:
+/*25*/ deccc %i0 !--n
+ bneg 1f
+ mov %i1,%o5 !xuse = x (delay slot)
+ ba 2f
+ nop !delay slot
+1:
+ PIC_SET(g5,.COEFFS+8*WSIZE,o5)
+ dec %i5 !argcount--
+2:
+
+/*26*/ sethi %hi(0x80000000),%o7 !mask for sign bit
+ sethi %hi(0x43600000),%o1 !big = 0x43600000,0
+ ld [%o5],%o0 !intf = pf[0] = f upper
+
+/*28*/ sethi %hi(0x3e300000),%o2 !small = 0x3e300000,0
+ andn %o0,%o7,%o0 !intf = fabs(intf)
+ ldd [%o5],%f36 !f = *x into f36
+
+/*30*/ sub %o1,%o0,%o1 !(-) if intf > big
+ sub %o0,%o2,%o2 !(-) if intf < small
+ fand %f36,%f32,%f42 !sign1 = sign bit
+
+/*31*/ orcc %o1,%o2,%g0 !(-) if either true
+ bneg,pn %icc,.SPECIAL1 !if (-) goto special cases below
+ fabsd %f36,%f36 !abs(f) (delay slot)
+ !----------------------
+
+/*32*/ fpadd32 %f36,%f52,%f0 !intf + 0x00008000 (again)
+ ldd [%l5+WSIZE],%f24 !conlo2 = __vlibm_TBL_atan1[index2+1]
+
+/*33*/ fand %f0,%f54,%f0 !pz[0] = intz = (intf + 0x00008000) & 0x7fff0000 (again)
+ sethi %hi(0x8000),%o7 !rounding bit
+ faddd %f26,%f22,%f22 !((conup2 - ansu2) + f2) + poly2
+
+/*34*/ add %o0,%o7,%o0 !intf + 0x00008000 (delay slot)
+ sethi %hi(0x7fff0000),%o7 !mask for rounding argument
+ fmuld %f36,%f0,%f10 !f*z
+ fsubd %f36,%f0,%f20 !f - z
+
+/*35*/ and %o0,%o7,%o0 !intz = (intf + 0x00008000) & 0x7fff0000
+ faddd %f22,%f24,%f22 !ansl2 = (((conup2 - ansu2) + f2) + poly2) + conlo2
+
+/*37*/ sub %o0,%l7,%o2 !intz - 0x3f900000
+ fsubd %f10,%f56,%f10 !(f*z - (-1.0))
+ ldd [%o4+%l3],%f6 !conup0 = __vlibm_TBL_atan1[index0]
+
+ cmp %o0,%l6 !(|f| > 64)
+ ble .ELSE1 !if(|f| > 64) then
+/*38*/ sra %o2,15,%o3 !index = (intz - 0x3f900000) >> 15
+ mov 2,%o1 !index == 2, point to conup, conlo = pi/2 upper, lower
+ ba .ENDIF1 !continue
+/*40*/ fdivd %f56,%f36,%f36 !f = -1.0/f (delay slot)
+ .ELSE1: !else f( |x| >= (1/64))
+ cmp %o0,%l7 !if intf >= 1/64
+ bl .ENDIF1 !if( |x| >= (1/64) ) then...
+ mov 0,%o1 !index == 0 , point to conup,conlo = 0,0
+ add %o3,4,%o1 !index = index + 4
+/*40*/ fdivd %f20,%f10,%f36 !f = (f - z)/(1.0 + f*z), reduced argument
+ .ENDIF1:
+
+/*41*/sll %o1,3,%l4 !index1 = index
+ mov %i3,%l1 !yaddr1 = address of y
+ fmuld %f34,%f34,%f4 !tmp0= f0*f0
+ faddd %f28,%f22,%f38 !ans2 = ansu2 + ansl2
+
+/*44*/add %i1,%i2,%i1 !x += stridex
+ add %i3,%i4,%i3 !y += stridey
+ fmuld %f58,%f4,%f2 !p[3]*tmp0
+ faddd %f6,%f34,%f8 !ansu0 = conup0 + f0
+ for %f38,%f44,%f38 !sign(ans2) = sign of argument
+ std %f38,[%l2] !*yaddr2 = ans2
+ cmp %i5,0 !if argcount =0, we are done
+ be .RETURN
+ nop
+
+ /*--------------------------------------------------------------------------*/
+ /*--------------------------------------------------------------------------*/
+ /*--------------------------------------------------------------------------*/
+
+.LOOP2:
+/*46*/ deccc %i0 !--n
+ bneg 1f
+ mov %i1,%o5 !xuse = x (delay slot)
+ ba 2f
+ nop !delay slot
+1:
+ PIC_SET(g5,.COEFFS+8*WSIZE,o5)
+ dec %i5 !argcount--
+2:
+
+/*47*/ sethi %hi(0x80000000),%o7 !mask for sign bit
+ sethi %hi(0x43600000),%o1 !big = 0x43600000,0
+ ld [%o5],%o0 !intf = pf[0] = f upper
+
+/*49*/ sethi %hi(0x3e300000),%o2 !small = 0x3e300000,0
+ andn %o0,%o7,%o0 !intf = fabs(intf)
+ ldd [%o5],%f38 !f = *x into f38
+
+/*51*/ sub %o1,%o0,%o1 !(-) if intf > big
+ sub %o0,%o2,%o2 !(-) if intf < small
+ fand %f38,%f32,%f44 !sign2 = sign bit
+
+/*52*/ orcc %o1,%o2,%g0 !(-) if either true
+ bneg,pn %icc,.SPECIAL2 !if (-) goto special cases below
+ fabsd %f38,%f38 !abs(f) (delay slot)
+ !----------------------
+
+/*53*/ fpadd32 %f38,%f52,%f0 !intf + 0x00008000 (again)
+ faddd %f2,%f60,%f2 !p[3]*tmp0 + p[2]
+
+/*54*/ sethi %hi(0x8000),%o7 !rounding bit
+ fand %f0,%f54,%f0 !pz[0] = intz = (intf + 0x00008000) & 0x7fff0000 (again)
+
+/*55*/ add %o0,%o7,%o0 !intf + 0x00008000 (delay slot)
+ sethi %hi(0x7fff0000),%o7 !mask for rounding argument
+ fmuld %f38,%f0,%f10 !f*z
+ fsubd %f38,%f0,%f20 !f - z
+
+/*56*/ and %o0,%o7,%o0 !intz = (intf + 0x00008000) & 0x7fff0000
+ fmuld %f2,%f4,%f2 !(p3*tmp0 + p2)*tmp0
+ fsubd %f6,%f8,%f6 !conup0 - ansu0
+
+/*58*/ sub %o0,%l7,%o2 !intz - 0x3f900000
+ fsubd %f10,%f56,%f10 !(f*z - (-1.0))
+ ldd [%o4+%l4],%f16 !conup1 = __vlibm_TBL_atan1[index1]
+
+ cmp %o0,%l6 !(|f| > 64)
+ ble .ELSE2 !if(|f| > 64) then
+/*60*/ sra %o2,15,%o3 !index = (intz - 0x3f900000) >> 15
+ mov 2,%o1 !index == 2, point to conup, conlo = pi/2 upper, lower
+ ba .ENDIF2 !continue
+/*61*/ fdivd %f56,%f38,%f38 !f = -1.0/f (delay slot)
+ .ELSE2: !else f( |x| >= (1/64))
+ cmp %o0,%l7 !if intf >= 1/64
+ bl .ENDIF2 !if( |x| >= (1/64) ) then...
+ mov 0,%o1 !index == 0 , point to conup,conlo = 0,0
+ add %o3,4,%o1 !index = index + 4
+/*61*/ fdivd %f20,%f10,%f38 !f = (f - z)/(1.0 + f*z), reduced argument
+ .ENDIF2:
+
+
+/*62*/ sll %o1,3,%l5 !index2 = index
+ mov %i3,%l2 !yaddr2 = address of y
+ fmuld %f34,%f4,%f4 !f0*tmp0
+ faddd %f2,%f62,%f2 !(p3*tmp0 + p2)*tmp0 + p1
+ fmuld %f36,%f36,%f14 !tmp1= f1*f1
+
+/*65*/add %o4,%l3,%l3 !base addr + index0
+ fmuld %f4,%f2,%f2 !poly0 = (f0*tmp0)*((p3*tmp0 + p2)*tmp0 + p1)
+ faddd %f6,%f34,%f6 !(conup0 - ansu0) + f0
+ fmuld %f58,%f14,%f12 !p[3]*tmp1
+ faddd %f16,%f36,%f18 !ansu1 = conup1 + f1
+ ldd [%l3+WSIZE],%f4 !conlo0 = __vlibm_TBL_atan1[index0+1]
+
+/*68*/ add %i1,%i2,%i1 !x += stridex
+ add %i3,%i4,%i3 !y += stridey
+ faddd %f6,%f2,%f2 !((conup0 - ansu0) + f0) + poly0
+ faddd %f12,%f60,%f12 !p[3]*tmp1 + p[2]
+
+/*71*/faddd %f2,%f4,%f2 !ansl0 = (((conup0 - ansu)0 + f0) + poly0) + conlo0
+ fmuld %f12,%f14,%f12 !(p3*tmp1 + p2)*tmp1
+ fsubd %f16,%f18,%f16 !conup1 - ansu1
+
+/*74*/faddd %f8,%f2,%f34 !ans0 = ansu0 + ansl0
+ fmuld %f36,%f14,%f14 !f1*tmp1
+ faddd %f12,%f62,%f12 !(p3*tmp1 + p2)*tmp1 + p1
+
+/*77*/ for %f34,%f40,%f34 !sign(ans0) = sign of argument
+ std %f34,[%l0] !*yaddr0 = ans, always gets stored (delay slot)
+ cmp %i5,0 !if argcount =0, we are done
+ bg .MAINLOOP
+ nop
+
+ /*--------------------------------------------------------------------------*/
+ /*--------------------------------------------------------------------------*/
+ /*--------------------------------------------------------------------------*/
+
+.RETURN:
+ ret
+ restore %g0,%g0,%g0
+
+ /*--------------------------------------------------------------------------*/
+ /*------------SPECIAL CASE HANDLING FOR LOOP0 ------------------------------*/
+ /*--------------------------------------------------------------------------*/
+
+/* at this point
+ %i1 x address
+ %o0 intf
+ %o2 intf - 0x3e300000
+ %f34,36,38 f0,f1,f2
+ %f40,42,44 sign0,sign1,sign2
+*/
+
+ .align 32 !align on I-cache boundary
+.SPECIAL0:
+ orcc %o2,%g0,%g0 !(-) if intf < 0x3e300000
+ bpos 1f !if >=...continue
+ sethi %hi(0x7ff00000),%g1 !upper word of Inf (we use 64-bit wide int for this)
+ ba 3f
+ faddd %f34,%f50,%f30 !dummy op just to generate exception (delay slot)
+1:
+ ld [%o5+4],%o5 !load x lower word
+ sllx %o0,32,%o0 !left justify intf
+ sllx %g1,32,%g1 !left justify Inf
+ or %o0,%o5,%o0 !merge in lower intf
+ cmp %o0,%g1 !if intf > 0x7ff00000 00000000
+ ble,pt %xcc,2f !pass thru if NaN
+ nop
+ fmuld %f34,%f34,%f34 !...... (x*x) trigger invalid exception
+ ba 3f
+ nop
+2:
+ faddd %f46,%f48,%f34 !ans = pi/2 upper + pi/2 lower
+3:
+ add %i1,%i2,%i1 !x += stridex
+ for %f34,%f40,%f34 !sign(ans) = sign of argument
+ std %f34,[%i3] !*y = ans
+ ba .LOOP0 !keep looping
+ add %i3,%i4,%i3 !y += stridey (delay slot)
+
+ /*--------------------------------------------------------------------------*/
+ /*-----------SPECIAL CASE HANDLING FOR LOOP1 -------------------------------*/
+ /*--------------------------------------------------------------------------*/
+
+ .align 32 !align on I-cache boundary
+.SPECIAL1:
+ orcc %o2,%g0,%g0 !(-) if intf < 0x3e300000
+ bpos 1f !if >=...continue
+ sethi %hi(0x7ff00000),%g1 !upper word of Inf (we use 64-bit wide int for this)
+ ba 3f
+ faddd %f36,%f50,%f30 !dummy op just to generate exception (delay slot)
+1:
+ ld [%o5+4],%o5 !load x lower word
+ sllx %o0,32,%o0 !left justify intf
+ sllx %g1,32,%g1 !left justify Inf
+ or %o0,%o5,%o0 !merge in lower intf
+ cmp %o0,%g1 !if intf > 0x7ff00000 00000000
+ ble,pt %xcc,2f !pass thru if NaN
+ nop
+ fmuld %f36,%f36,%f36 !...... (x*x) trigger invalid exception
+ ba 3f
+ nop
+2:
+ faddd %f46,%f48,%f36 !ans = pi/2 upper + pi/2 lower
+3:
+ add %i1,%i2,%i1 !x += stridex
+ for %f36,%f42,%f36 !sign(ans) = sign of argument
+ std %f36,[%i3] !*y = ans
+ ba .LOOP1 !keep looping
+ add %i3,%i4,%i3 !y += stridey (delay slot)
+
+ /*--------------------------------------------------------------------------*/
+ /*------------SPECIAL CASE HANDLING FOR LOOP2 ------------------------------*/
+ /*--------------------------------------------------------------------------*/
+
+ .align 32 !align on I-cache boundary
+.SPECIAL2:
+ orcc %o2,%g0,%g0 !(-) if intf < 0x3e300000
+ bpos 1f !if >=...continue
+ sethi %hi(0x7ff00000),%g1 !upper word of Inf (we use 64-bit wide int for this)
+ ba 3f
+ faddd %f38,%f50,%f30 !dummy op just to generate exception (delay slot)
+1:
+ ld [%o5+4],%o5 !load x lower word
+ sllx %o0,32,%o0 !left justify intf
+ sllx %g1,32,%g1 !left justify Inf
+ or %o0,%o5,%o0 !merge in lower intf
+ cmp %o0,%g1 !if intf > 0x7ff00000 00000000
+ ble,pt %xcc,2f !pass thru if NaN
+ nop
+ fmuld %f38,%f38,%f38 !...... (x*x) trigger invalid exception
+ ba 3f
+ nop
+2:
+ faddd %f46,%f48,%f38 !ans = pi/2 upper + pi/2 lower
+3:
+ add %i1,%i2,%i1 !x += stridex
+ for %f38,%f44,%f38 !sign(ans) = sign of argument
+ std %f38,[%i3] !*y = ans
+ ba .LOOP2 !keep looping
+ add %i3,%i4,%i3 !y += stridey
+
+ /*--------------------------------------------------------------------------*/
+ /*--------------------------------------------------------------------------*/
+ /*--------------------------------------------------------------------------*/
+
+ SET_SIZE(__vatan)
+
+! .ident "03-20-96 Sparc V9 3-way-unrolled version"
diff --git a/usr/src/libm/src/mvec/vis/__vatan2.S b/usr/src/libm/src/mvec/vis/__vatan2.S
new file mode 100644
index 0000000..a696b07
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vatan2.S
@@ -0,0 +1,1077 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vatan2.S 1.5 06/01/23 SMI"
+
+ .file "__vatan2.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 64
+constants:
+ .word 0x3ff921fb,0x54442d18 ! pio2
+ .word 0x3c91a626,0x33145c07 ! pio2_lo
+ .word 0xbfd55555,0x555554ee ! p1
+ .word 0x3fc99999,0x997a1559 ! p2
+ .word 0xbfc24923,0x158dfe02 ! p3
+ .word 0x3fbc639d,0x0ed1347b ! p4
+ .word 0xffffffff,0x00000000 ! mask
+ .word 0x3fc00000,0x00000000 ! twom3
+ .word 0x46d00000,0x00000000 ! two110
+ .word 0x3fe921fb,0x54442d18 ! pio4
+
+! local storage indices
+
+#define xscl STACK_BIAS-0x8
+#define yscl STACK_BIAS-0x10
+#define twom3 STACK_BIAS-0x18
+#define two110 STACK_BIAS-0x20
+#define pio4 STACK_BIAS-0x28
+#define junk STACK_BIAS-0x30
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x30
+
+! register use
+
+! i0 n
+! i1 y
+! i2 stridey
+! i3 x
+! i4 stridex
+! i5 z
+
+! l0 k0
+! l1 k1
+! l2 k2
+! l3 hx
+! l4 pz0
+! l5 pz1
+! l6 pz2
+! l7 stridez
+
+! the following are 64-bit registers in both V8+ and V9
+
+! g1 __vlibm_TBL_atan2
+! g5
+
+! o0 hy
+! o1 0x00004000
+! o2 0x1420
+! o3 0x7fe00000
+! o4 0x03600000
+! o5 0x00100000
+! o7
+
+! f0 y0
+! f2 x0
+! f4 t0
+! f6 ah0
+! f8 al0
+! f10 y1
+! f12 x1
+! f14 t1
+! f16 ah1
+! f18 al1
+! f20 y2
+! f22 x2
+! f24 t2
+! f26 ah2
+! f28 al2
+! f30
+! f32
+! f34
+! f36 sx0
+! f38 sx1
+! f40 sx2
+! f42 sy0
+! f44 sy1
+! f46 sy2
+
+#define mask %f48
+#define signbit %f50
+#define pio2 %f52
+#define pio2_lo %f54
+#define p1 %f56
+#define p2 %f58
+#define p3 %f60
+#define p4 %f62
+
+ ENTRY(__vatan2)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,constants,o0)
+ PIC_SET(l7,__vlibm_TBL_atan2,o1)
+ wr %g0,0x82,%asi ! set %asi for non-faulting loads
+ mov %o1, %g1
+#ifdef __sparcv9
+ ldx [%fp+STACK_BIAS+0xb0],%l7
+#else
+ ld [%fp+0x5c],%l7
+#endif
+ ldd [%o0+0x00],pio2 ! load/set up constants
+ ldd [%o0+0x08],pio2_lo
+ ldd [%o0+0x10],p1
+ ldd [%o0+0x18],p2
+ ldd [%o0+0x20],p3
+ ldd [%o0+0x28],p4
+ ldd [%o0+0x30],mask
+ fzero signbit
+ fnegd signbit,signbit
+ sethi %hi(0x00004000),%o1
+ sethi %hi(0x1420),%o2
+ or %o2,%lo(0x1420),%o2
+ sethi %hi(0x7fe00000),%o3
+ sethi %hi(0x03600000),%o4
+ sethi %hi(0x00100000),%o5
+ ldd [%o0+0x38],%f0 ! copy rarely used constants to stack
+ ldd [%o0+0x40],%f2
+ ldd [%o0+0x48],%f4
+ std %f0,[%fp+twom3]
+ std %f2,[%fp+two110]
+ std %f4,[%fp+pio4]
+ sll %i2,3,%i2 ! scale strides
+ sll %i4,3,%i4
+ sll %l7,3,%l7
+ fzero %f20 ! loop prologue
+ fzero %f22
+ fzero %f24
+ fzero %f26
+ fzero %f46
+ add %fp,junk,%l6
+ ld [%i1],%f0 ! *y
+ ld [%i1+4],%f1
+ ld [%i3],%f8 ! *x
+ ld [%i3+4],%f9
+ ld [%i1],%o0 ! hy
+ ba .loop
+ ld [%i3],%l3 ! hx
+
+! 16-byte aligned
+ .align 16
+.loop:
+ fabsd %f0,%f4
+ mov %i5,%l4
+ add %i1,%i2,%i1 ! y += stridey
+
+ fabsd %f8,%f2
+ add %i3,%i4,%i3 ! x += stridex
+ add %i5,%l7,%i5 ! z += stridez
+
+ fand %f0,signbit,%f42
+ sethi %hi(0x80000000),%g5
+
+ fand %f8,signbit,%f36
+ andn %o0,%g5,%o0
+ andn %l3,%g5,%l3
+
+ fcmpd %fcc0,%f4,%f2
+
+ fmovd %f4,%f0
+
+ fmovdg %fcc0,%f2,%f0 ! swap if |y| > |x|
+
+ fmovdg %fcc0,%f4,%f2
+ mov %o0,%o7
+ lda [%i1]%asi,%f10 ! preload next argument
+
+ faddd %f26,%f20,%f26
+ lda [%i1+4]%asi,%f11
+
+ faddd %f22,%f24,%f22
+ movg %fcc0,%l3,%o0
+
+ movg %fcc0,%o7,%l3
+
+ fbu,pn %fcc0,.nan0 ! if x or y is nan
+! delay slot
+ lda [%i3]%asi,%f18
+
+ sub %l3,%o0,%l0 ! hx - hy
+ sub %l3,%o3,%g5
+ fabsd %f10,%f14
+ lda [%i3+4]%asi,%f19
+
+ sub %l0,%o4,%o7
+ faddd %f22,%f26,%f26
+
+ andcc %g5,%o7,%g0
+ bge,pn %icc,.big0 ! if |x| or |x/y| is big
+! delay slot
+ nop
+
+ fabsd %f18,%f12
+ cmp %o0,%o5
+ bl,pn %icc,.small0 ! if |y| is small
+! delay slot
+ lda [%i1]%asi,%o0
+
+ add %l0,%o1,%l0 ! k
+ addcc %i0,-1,%i0
+ ble,pn %icc,.last1
+! delay slot
+ lda [%i3]%asi,%l3
+
+.cont1:
+ srl %l0,10,%l0
+ mov %i5,%l5
+ fxor %f26,%f46,%f26
+ st %f26,[%l6]
+
+ fand %f10,signbit,%f44
+ andn %l0,0x1f,%l0
+ add %i1,%i2,%i1
+ st %f27,[%l6+4]
+
+ fand %f18,signbit,%f38
+ cmp %l0,%o2
+ movg %icc,%o2,%l0
+
+ fcmpd %fcc1,%f14,%f12
+ add %i3,%i4,%i3
+ add %i5,%l7,%i5
+
+ fmovd %f14,%f10
+ add %l0,%g1,%l0
+ sethi %hi(0x80000000),%g5
+
+ ldd [%l0+0x10],%f4
+ fand %f2,mask,%f6
+ andn %o0,%g5,%o0
+ andn %l3,%g5,%l3
+
+ fmovdg %fcc1,%f12,%f10
+
+ fmovdg %fcc1,%f14,%f12
+ mov %o0,%o7
+ lda [%i1]%asi,%f20
+
+ fsubd %f2,%f6,%f30
+ fmuld %f6,%f4,%f6
+ movg %fcc1,%l3,%o0
+
+ fmuld %f0,%f4,%f8
+ movg %fcc1,%o7,%l3
+
+ lda [%i1+4]%asi,%f21
+ fbu,pn %fcc1,.nan1
+! delay slot
+ nop
+
+ lda [%i3]%asi,%f28
+ sub %l3,%o0,%l1
+ sub %l3,%o3,%g5
+
+ lda [%i3+4]%asi,%f29
+ fmuld %f30,%f4,%f30
+ fsubd %f0,%f6,%f4
+ sub %l1,%o4,%o7
+
+ fabsd %f20,%f24
+ andcc %g5,%o7,%g0
+ bge,pn %icc,.big1
+! delay slot
+ nop
+
+ faddd %f2,%f8,%f8
+ cmp %o0,%o5
+ bl,pn %icc,.small1
+! delay slot
+ lda [%i1]%asi,%o0
+
+ fabsd %f28,%f22
+ add %l1,%o1,%l1
+ addcc %i0,-1,%i0
+ lda [%i3]%asi,%l3
+
+ fsubd %f4,%f30,%f4
+ srl %l1,10,%l1
+ ble,pn %icc,.last2
+! delay slot
+ mov %i5,%l6
+
+.cont2:
+ fand %f20,signbit,%f46
+ andn %l1,0x1f,%l1
+ add %i1,%i2,%i1
+
+ fand %f28,signbit,%f40
+ cmp %l1,%o2
+ movg %icc,%o2,%l1
+
+ fcmpd %fcc2,%f24,%f22
+ add %i3,%i4,%i3
+ add %i5,%l7,%i5
+
+ fdivd %f4,%f8,%f4
+ fmovd %f24,%f20
+ add %l1,%g1,%l1
+ sethi %hi(0x80000000),%g5
+
+ ldd [%l1+0x10],%f14
+ fand %f12,mask,%f16
+ andn %o0,%g5,%o0
+ andn %l3,%g5,%l3
+
+ fmovdg %fcc2,%f22,%f20
+
+ fmovdg %fcc2,%f24,%f22
+ mov %o0,%o7
+
+ fsubd %f12,%f16,%f32
+ fmuld %f16,%f14,%f16
+ movg %fcc2,%l3,%o0
+
+ fnegd pio2_lo,%f8 ! al
+ fmuld %f10,%f14,%f18
+ movg %fcc2,%o7,%l3
+
+ fzero %f0
+ fbu,pn %fcc2,.nan2
+! delay slot
+ nop
+
+ fmovdg %fcc0,signbit,%f0
+ sub %l3,%o0,%l2
+ sub %l3,%o3,%g5
+
+ fmuld %f32,%f14,%f32
+ fsubd %f10,%f16,%f14
+ sub %l2,%o4,%o7
+
+ faddd %f12,%f18,%f18
+ andcc %g5,%o7,%g0
+ bge,pn %icc,.big2
+! delay slot
+ nop
+
+ fxor %f36,%f0,%f36
+ cmp %o0,%o5
+ bl,pn %icc,.small2
+! delay slot
+ nop
+
+.cont3:
+ fmovdg %fcc0,signbit,%f8
+ add %l2,%o1,%l2
+
+ fsubd %f14,%f32,%f14
+ srl %l2,10,%l2
+
+ fxor %f36,pio2_lo,%f30 ! al
+ andn %l2,0x1f,%l2
+
+ fxor %f36,pio2,%f0 ! ah
+ cmp %l2,%o2
+ movg %icc,%o2,%l2
+
+ fxor %f42,%f36,%f42 ! sy
+
+ faddd %f8,%f30,%f8
+ ldd [%l0+0x8],%f30
+ add %l2,%g1,%l2
+
+ fdivd %f14,%f18,%f14
+ fzero %f10
+
+ ldd [%l2+0x10],%f24
+ fand %f22,mask,%f26
+
+ fmovdg %fcc1,signbit,%f10
+
+ fmuld %f4,%f4,%f36
+ faddd %f8,%f30,%f8
+
+ fsubd %f22,%f26,%f34
+ fmuld %f26,%f24,%f26
+
+ fmuld %f20,%f24,%f28
+ fxor %f38,%f10,%f38
+
+ fmuld %f4,p3,%f6
+ fnegd pio2_lo,%f18
+
+ fmuld %f36,p2,%f2
+ fmovdg %fcc1,signbit,%f18
+
+ fmuld %f36,%f4,%f36
+ fxor %f38,pio2,%f10
+
+ fmuld %f34,%f24,%f34
+ fsubd %f20,%f26,%f24
+
+ faddd %f22,%f28,%f28
+
+ faddd %f2,p1,%f2
+
+ fmuld %f36,p4,%f30
+ fxor %f38,pio2_lo,%f32
+
+ fsubd %f24,%f34,%f24
+
+ fxor %f44,%f38,%f44
+
+ fmuld %f36,%f2,%f2
+ faddd %f18,%f32,%f18
+ ldd [%l1+0x8],%f32
+
+ fmuld %f36,%f36,%f36
+ faddd %f6,%f30,%f30
+
+ fdivd %f24,%f28,%f24
+ fzero %f20
+
+ fmovdg %fcc2,signbit,%f20
+
+ faddd %f2,%f8,%f2
+
+ fmuld %f14,%f14,%f38
+ faddd %f18,%f32,%f18
+
+ fmuld %f36,%f30,%f36
+ fxor %f40,%f20,%f40
+
+ fnegd pio2,%f6 ! ah
+ fmuld %f14,p3,%f16
+
+ fmovdg %fcc0,signbit,%f6
+
+ fmuld %f38,p2,%f12
+ fnegd pio2_lo,%f28
+
+ faddd %f2,%f36,%f2
+ fmuld %f38,%f14,%f38
+
+ faddd %f6,%f0,%f6
+ ldd [%l0],%f0
+
+ fmovdg %fcc2,signbit,%f28
+
+ faddd %f12,p1,%f12
+
+ fmuld %f38,p4,%f32
+ fxor %f40,pio2_lo,%f34
+
+ fxor %f40,pio2,%f20
+
+ faddd %f2,%f4,%f2
+
+ fmuld %f38,%f12,%f12
+ fxor %f46,%f40,%f46
+
+ fmuld %f38,%f38,%f38
+ faddd %f16,%f32,%f32
+
+ faddd %f28,%f34,%f28
+ ldd [%l2+0x8],%f34
+
+ faddd %f6,%f0,%f6
+ lda [%i1]%asi,%f0 ! preload next argument
+
+ faddd %f12,%f18,%f12
+ lda [%i1+4]%asi,%f1
+
+ fmuld %f24,%f24,%f40
+ lda [%i3]%asi,%f8
+
+ fmuld %f38,%f32,%f38
+ faddd %f28,%f34,%f28
+ lda [%i3+4]%asi,%f9
+
+ fnegd pio2,%f16
+ fmuld %f24,p3,%f26
+ lda [%i1]%asi,%o0
+
+ fmovdg %fcc1,signbit,%f16
+ lda [%i3]%asi,%l3
+
+ fmuld %f40,p2,%f22
+
+ faddd %f12,%f38,%f12
+ fmuld %f40,%f24,%f40
+
+ faddd %f2,%f6,%f6
+
+ faddd %f16,%f10,%f16
+ ldd [%l1],%f10
+
+ faddd %f22,p1,%f22
+
+ faddd %f12,%f14,%f12
+ fmuld %f40,p4,%f34
+
+ fxor %f6,%f42,%f6
+ st %f6,[%l4]
+
+ faddd %f16,%f10,%f16
+ st %f7,[%l4+4]
+
+ fmuld %f40,%f22,%f22
+
+ fmuld %f40,%f40,%f40
+ faddd %f26,%f34,%f34
+
+ fnegd pio2,%f26
+
+ faddd %f12,%f16,%f16
+
+ faddd %f22,%f28,%f22
+
+ fmuld %f40,%f34,%f40
+ fmovdg %fcc2,signbit,%f26
+
+! -
+
+ fxor %f16,%f44,%f16
+ st %f16,[%l5]
+
+ faddd %f26,%f20,%f26
+ st %f17,[%l5+4]
+ addcc %i0,-1,%i0
+
+ faddd %f22,%f40,%f22
+ bg,pt %icc,.loop
+! delay slot
+ ldd [%l2],%f20
+
+
+ faddd %f26,%f20,%f26
+ faddd %f22,%f24,%f22
+ faddd %f22,%f26,%f26
+.done_from_special0:
+ fxor %f26,%f46,%f26
+ st %f26,[%l6]
+ st %f27,[%l6+4]
+ ret
+ restore
+
+
+
+ .align 16
+.last1:
+ fmovd pio2,%f10 ! set up dummy arguments
+ fmovd pio2,%f18
+ fabsd %f10,%f14
+ fabsd %f18,%f12
+ sethi %hi(0x3ff921fb),%o0
+ or %o0,%lo(0x3ff921fb),%o0
+ mov %o0,%l3
+ ba,pt %icc,.cont1
+! delay slot
+ add %fp,junk,%i5
+
+
+
+ .align 16
+.last2:
+ fmovd pio2,%f20
+ fmovd pio2,%f28
+ fabsd %f20,%f24
+ fabsd %f28,%f22
+ sethi %hi(0x3ff921fb),%o0
+ or %o0,%lo(0x3ff921fb),%o0
+ mov %o0,%l3
+ ba,pt %icc,.cont2
+! delay slot
+ add %fp,junk,%l6
+
+
+
+ .align 16
+.nan0:
+ faddd %f22,%f26,%f26
+.nan0_from_special0:
+ fabsd %f10,%f14
+ lda [%i3+4]%asi,%f19
+ fabsd %f18,%f12
+ lda [%i1]%asi,%o0
+ lda [%i3]%asi,%l3
+ ba,pt %icc,.special0
+! delay slot
+ fmuld %f0,%f2,%f6
+
+
+ .align 16
+.big0:
+ fabsd %f18,%f12
+ lda [%i1]%asi,%o0
+ lda [%i3]%asi,%l3
+ cmp %g5,%o5
+ bge,pn %icc,.return_ah0 ! if hx >= 0x7ff00000
+! delay slot
+ nop
+ cmp %l0,%o4
+ bge,pn %icc,1f ! if hx - hy >= 0x03600000
+! delay slot
+ nop
+ ldd [%fp+twom3],%f6
+ fmuld %f0,%f6,%f0
+ fmuld %f2,%f6,%f2
+ add %l0,%o1,%l0
+ addcc %i0,-1,%i0
+ ble,pn %icc,.last1
+! delay slot
+ nop
+ ba,pt %icc,.cont1
+! delay slot
+ nop
+1:
+ fbg,pn %fcc0,.return_ah0
+! delay slot
+ nop
+ fcmpd %fcc3,%f8,signbit
+ fbl,pn %fcc3,.return_ah0
+! delay slot
+ nop
+ ba,pt %icc,.special0
+! delay slot
+ fdivd %f0,%f2,%f6
+
+
+ .align 16
+.small0:
+ lda [%i3]%asi,%l3
+ fcmpd %fcc3,%f0,signbit
+ fbe,pt %fcc3,.return_ah0
+! delay slot
+ nop
+ ldd [%fp+two110],%f6
+ fmuld %f0,%f6,%f0
+ fmuld %f2,%f6,%f2
+ st %f0,[%fp+yscl]
+ ld [%fp+yscl],%o7
+ st %f2,[%fp+xscl]
+ ld [%fp+xscl],%l0
+ sub %l0,%o7,%l0
+ add %l0,%o1,%l0
+ addcc %i0,-1,%i0
+ ble,pn %icc,.last1
+! delay slot
+ nop
+ ba,pt %icc,.cont1
+! delay slot
+ nop
+
+
+ .align 16
+.return_ah0:
+ fzero %f0
+ fmovdg %fcc0,signbit,%f0
+ fxor %f36,%f0,%f36
+ fxor %f36,pio2,%f0
+ fxor %f42,%f36,%f42
+ fnegd pio2,%f6
+ fmovdg %fcc0,signbit,%f6
+ faddd %f6,%f0,%f6
+ sub %g5,%l0,%o7
+ cmp %o7,%o5
+ bl,pt %icc,1f ! if hy < 0x7ff00000
+! delay slot
+ nop
+ ldd [%fp+pio4],%f0
+ faddd %f6,%f0,%f6
+1:
+ fdtoi %f6,%f4
+.special0:
+ fxor %f6,%f42,%f6
+ st %f6,[%l4]
+ st %f7,[%l4+4]
+ addcc %i0,-1,%i0
+ ble,pn %icc,.done_from_special0
+! delay slot
+ nop
+ fmovd %f10,%f0
+ fmovd %f18,%f8
+ fmovd %f14,%f4
+ fmovd %f12,%f2
+ mov %i5,%l4
+ add %i1,%i2,%i1
+ add %i3,%i4,%i3
+ add %i5,%l7,%i5
+ fand %f0,signbit,%f42
+ sethi %hi(0x80000000),%g5
+ fand %f8,signbit,%f36
+ andn %o0,%g5,%o0
+ andn %l3,%g5,%l3
+ fcmpd %fcc0,%f4,%f2
+ fmovd %f4,%f0
+ fmovdg %fcc0,%f2,%f0
+ fmovdg %fcc0,%f4,%f2
+ mov %o0,%o7
+ movg %fcc0,%l3,%o0
+ movg %fcc0,%o7,%l3
+ lda [%i1]%asi,%f10
+ lda [%i1+4]%asi,%f11
+ fbu,pn %fcc0,.nan0_from_special0
+! delay slot
+ lda [%i3]%asi,%f18
+ fabsd %f10,%f14
+ lda [%i3+4]%asi,%f19
+ sub %l3,%o0,%l0
+ sub %l3,%o3,%g5
+ sub %l0,%o4,%o7
+ andcc %g5,%o7,%g0
+ bge,pn %icc,.big0
+! delay slot
+ nop
+ fabsd %f18,%f12
+ cmp %o0,%o5
+ bl,pn %icc,.small0
+! delay slot
+ lda [%i1]%asi,%o0
+ add %l0,%o1,%l0
+ addcc %i0,-1,%i0
+ ble,pn %icc,.last1
+! delay slot
+ lda [%i3]%asi,%l3
+ ba,pt %icc,.cont1
+! delay slot
+ nop
+
+
+
+ .align 16
+.nan1:
+ fmuld %f30,%f4,%f30
+ fsubd %f0,%f6,%f4
+ faddd %f2,%f8,%f8
+ fsubd %f4,%f30,%f4
+.nan1_from_special1:
+ lda [%i3]%asi,%f28
+ lda [%i3+4]%asi,%f29
+ fabsd %f20,%f24
+ lda [%i1]%asi,%o0
+ fabsd %f28,%f22
+ lda [%i3]%asi,%l3
+ mov %i5,%l6
+ ba,pt %icc,.special1
+! delay slot
+ fmuld %f10,%f12,%f16
+
+
+ .align 16
+.big1:
+ faddd %f2,%f8,%f8
+ fsubd %f4,%f30,%f4
+.big1_from_special1:
+ lda [%i1]%asi,%o0
+ fabsd %f28,%f22
+ lda [%i3]%asi,%l3
+ mov %i5,%l6
+ cmp %g5,%o5
+ bge,pn %icc,.return_ah1
+! delay slot
+ nop
+ cmp %l1,%o4
+ bge,pn %icc,1f
+! delay slot
+ nop
+ ldd [%fp+twom3],%f16
+ fmuld %f10,%f16,%f10
+ fmuld %f12,%f16,%f12
+ add %l1,%o1,%l1
+ srl %l1,10,%l1
+ addcc %i0,-1,%i0
+ ble,pn %icc,.last2
+! delay slot
+ nop
+ ba,pt %icc,.cont2
+! delay slot
+ nop
+1:
+ fbg,pn %fcc1,.return_ah1
+! delay slot
+ nop
+ fcmpd %fcc3,%f18,signbit
+ fbl,pn %fcc3,.return_ah1
+! delay slot
+ nop
+ ba,pt %icc,.special1
+! delay slot
+ fdivd %f10,%f12,%f16
+
+
+ .align 16
+.small1:
+ fsubd %f4,%f30,%f4
+.small1_from_special1:
+ fabsd %f28,%f22
+ lda [%i3]%asi,%l3
+ mov %i5,%l6
+ fcmpd %fcc3,%f10,signbit
+ fbe,pt %fcc3,.return_ah1
+! delay slot
+ nop
+ ldd [%fp+two110],%f16
+ fmuld %f10,%f16,%f10
+ fmuld %f12,%f16,%f12
+ st %f10,[%fp+yscl]
+ ld [%fp+yscl],%o7
+ st %f12,[%fp+xscl]
+ ld [%fp+xscl],%l1
+ sub %l1,%o7,%l1
+ add %l1,%o1,%l1
+ srl %l1,10,%l1
+ addcc %i0,-1,%i0
+ ble,pn %icc,.last2
+! delay slot
+ nop
+ ba,pt %icc,.cont2
+! delay slot
+ nop
+
+
+ .align 16
+.return_ah1:
+ fzero %f10
+ fmovdg %fcc1,signbit,%f10
+ fxor %f38,%f10,%f38
+ fxor %f38,pio2,%f10
+ fxor %f44,%f38,%f44
+ fnegd pio2,%f16
+ fmovdg %fcc1,signbit,%f16
+ faddd %f16,%f10,%f16
+ sub %g5,%l1,%o7
+ cmp %o7,%o5
+ bl,pt %icc,1f
+! delay slot
+ nop
+ ldd [%fp+pio4],%f10
+ faddd %f16,%f10,%f16
+1:
+ fdtoi %f16,%f14
+.special1:
+ fxor %f16,%f44,%f16
+ st %f16,[%l5]
+ st %f17,[%l5+4]
+ addcc %i0,-1,%i0
+ bg,pn %icc,1f
+! delay slot
+ nop
+ fmovd pio2,%f20 ! set up dummy argument
+ fmovd pio2,%f28
+ fabsd %f20,%f24
+ fabsd %f28,%f22
+ sethi %hi(0x3ff921fb),%o0
+ or %o0,%lo(0x3ff921fb),%o0
+ mov %o0,%l3
+ add %fp,junk,%i5
+1:
+ fmovd %f20,%f10
+ fmovd %f28,%f18
+ fmovd %f24,%f14
+ fmovd %f22,%f12
+ mov %i5,%l5
+ add %i1,%i2,%i1
+ add %i3,%i4,%i3
+ add %i5,%l7,%i5
+ fand %f10,signbit,%f44
+ sethi %hi(0x80000000),%g5
+ fand %f18,signbit,%f38
+ andn %o0,%g5,%o0
+ andn %l3,%g5,%l3
+ fcmpd %fcc1,%f14,%f12
+ fmovd %f14,%f10
+ fmovdg %fcc1,%f12,%f10
+ fmovdg %fcc1,%f14,%f12
+ mov %o0,%o7
+ movg %fcc1,%l3,%o0
+ movg %fcc1,%o7,%l3
+ lda [%i1]%asi,%f20
+ lda [%i1+4]%asi,%f21
+ fbu,pn %fcc1,.nan1_from_special1
+! delay slot
+ nop
+ lda [%i3]%asi,%f28
+ lda [%i3+4]%asi,%f29
+ fabsd %f20,%f24
+ sub %l3,%o0,%l1
+ sub %l3,%o3,%g5
+ sub %l1,%o4,%o7
+ andcc %g5,%o7,%g0
+ bge,pn %icc,.big1_from_special1
+! delay slot
+ nop
+ cmp %o0,%o5
+ bl,pn %icc,.small1_from_special1
+! delay slot
+ lda [%i1]%asi,%o0
+ fabsd %f28,%f22
+ lda [%i3]%asi,%l3
+ add %l1,%o1,%l1
+ srl %l1,10,%l1
+ addcc %i0,-1,%i0
+ ble,pn %icc,.last2
+! delay slot
+ mov %i5,%l6
+ ba,pt %icc,.cont2
+! delay slot
+ nop
+
+
+
+ .align 16
+.nan2:
+ fmovdg %fcc0,signbit,%f0
+ fmuld %f32,%f14,%f32
+ fsubd %f10,%f16,%f14
+ faddd %f12,%f18,%f18
+ fxor %f36,%f0,%f36
+.nan2_from_special2:
+ ba,pt %icc,.special2
+! delay slot
+ fmuld %f20,%f22,%f26
+
+
+ .align 16
+.big2:
+ fxor %f36,%f0,%f36
+.big2_from_special2:
+ cmp %g5,%o5
+ bge,pn %icc,.return_ah2
+! delay slot
+ nop
+ cmp %l2,%o4
+ bge,pn %icc,1f
+! delay slot
+ nop
+ ldd [%fp+twom3],%f26
+ fmuld %f20,%f26,%f20
+ fmuld %f22,%f26,%f22
+ ba,pt %icc,.cont3
+! delay slot
+ nop
+1:
+ fbg,pn %fcc2,.return_ah2
+! delay slot
+ nop
+ fcmpd %fcc3,%f28,signbit
+ fbl,pn %fcc3,.return_ah2
+! delay slot
+ nop
+ ba,pt %icc,.special2
+! delay slot
+ fdivd %f20,%f22,%f26
+
+
+ .align 16
+.small2:
+ fcmpd %fcc3,%f20,signbit
+ fbe,pt %fcc3,.return_ah2
+! delay slot
+ nop
+ ldd [%fp+two110],%f26
+ fmuld %f20,%f26,%f20
+ fmuld %f22,%f26,%f22
+ st %f20,[%fp+yscl]
+ ld [%fp+yscl],%o7
+ st %f22,[%fp+xscl]
+ ld [%fp+xscl],%l2
+ sub %l2,%o7,%l2
+ ba,pt %icc,.cont3
+! delay slot
+ nop
+
+
+ .align 16
+.return_ah2:
+ fzero %f20
+ fmovdg %fcc2,signbit,%f20
+ fxor %f40,%f20,%f40
+ fxor %f40,pio2,%f20
+ fxor %f46,%f40,%f46
+ fnegd pio2,%f26
+ fmovdg %fcc2,signbit,%f26
+ faddd %f26,%f20,%f26
+ sub %g5,%l2,%o7
+ cmp %o7,%o5
+ bl,pt %icc,1f
+! delay slot
+ nop
+ ldd [%fp+pio4],%f20
+ faddd %f26,%f20,%f26
+1:
+ fdtoi %f26,%f24
+.special2:
+ fxor %f26,%f46,%f26
+ st %f26,[%l6]
+ st %f27,[%l6+4]
+ addcc %i0,-1,%i0
+ bg,pn %icc,1f
+! delay slot
+ nop
+ fmovd pio2,%f20 ! set up dummy argument
+ fmovd pio2,%f22
+ fzero %f40
+ fzero %f46
+ mov 0,%l2
+ ba,pt %icc,.cont3
+! delay slot
+ add %fp,junk,%l6
+1:
+ lda [%i1]%asi,%f20
+ lda [%i1+4]%asi,%f21
+ lda [%i3]%asi,%f28
+ lda [%i3+4]%asi,%f29
+ fabsd %f20,%f24
+ lda [%i1]%asi,%o0
+ fabsd %f28,%f22
+ lda [%i3]%asi,%l3
+ mov %i5,%l6
+ fand %f20,signbit,%f46
+ add %i1,%i2,%i1
+ fand %f28,signbit,%f40
+ fcmpd %fcc2,%f24,%f22
+ add %i3,%i4,%i3
+ add %i5,%l7,%i5
+ fmovd %f24,%f20
+ sethi %hi(0x80000000),%g5
+ andn %o0,%g5,%o0
+ andn %l3,%g5,%l3
+ fmovdg %fcc2,%f22,%f20
+ fmovdg %fcc2,%f24,%f22
+ mov %o0,%o7
+ movg %fcc2,%l3,%o0
+ movg %fcc2,%o7,%l3
+ fbu,pn %fcc2,.nan2_from_special2
+! delay slot
+ nop
+ sub %l3,%o0,%l2
+ sub %l3,%o3,%g5
+ sub %l2,%o4,%o7
+ andcc %g5,%o7,%g0
+ bge,pn %icc,.big2_from_special2
+! delay slot
+ nop
+ cmp %o0,%o5
+ bl,pn %icc,.small2
+! delay slot
+ nop
+ ba,pt %icc,.cont3
+! delay slot
+ nop
+
+ SET_SIZE(__vatan2)
+
diff --git a/usr/src/libm/src/mvec/vis/__vatan2f.S b/usr/src/libm/src/mvec/vis/__vatan2f.S
new file mode 100644
index 0000000..2451611
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vatan2f.S
@@ -0,0 +1,3378 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vatan2f.S 1.6 06/01/23 SMI"
+
+ .file "__vatan2f.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 64
+.CONST_TBL:
+ .word 0xbff921fb, 0x54442d18 ! -M_PI_2
+ .word 0x3ff921fb, 0x54442d18 ! M_PI_2
+ .word 0xbff921fb, 0x54442d18 ! -M_PI_2
+ .word 0x3ff921fb, 0x54442d18 ! M_PI_2
+ .word 0xc00921fb, 0x54442d18 ! -M_PI
+ .word 0x400921fb, 0x54442d18 ! M_PI
+ .word 0x80000000, 0x00000000 ! -0.0
+ .word 0x00000000, 0x00000000 ! 0.0
+
+ .word 0xbff00000, 0x00000000 ! -1.0
+ .word 0x3ff00000, 0x00000000 ! 1.0
+
+ .word 0x3fefffff, 0xfe79bf93 ! K0 = 9.99999997160545464888e-01
+ .word 0xbfd55552, 0xf0db4320 ! K1 = -3.33332762919825514315e-01
+ .word 0x3fc998f8, 0x2493d066 ! K2 = 1.99980752811487135558e-01
+ .word 0xbfc240b8, 0xd994abf9 ! K3 = -1.42600160828209047720e-01
+ .word 0x3fbbfc9e, 0x8c2b0243 ! K4 = 1.09323415013030928421e-01
+ .word 0xbfb56013, 0x64b1cac3 ! K5 = -8.34972496830160174704e-02
+ .word 0x3fad3ad7, 0x9f53e142 ! K6 = 5.70895559303061900411e-02
+ .word 0xbf9f148f, 0x2a829af1 ! K7 = -3.03518647857811706139e-02
+ .word 0x3f857a8c, 0x747ed314 ! K8 = 1.04876492549493055747e-02
+ .word 0xbf5bdf39, 0x729124b6 ! K9 = -1.70117006406859722727e-03
+
+ .word 0x3fe921fb, 0x54442d18 ! M_PI_4
+ .word 0x36a00000, 0x00000000 ! 2^(-149)
+
+#define counter %o3
+#define stridex %i4
+#define stridey %i5
+#define stridez %l1
+#define cmul_arr %i0
+#define cadd_arr %i2
+#define _0x7fffffff %l0
+#define _0x7f800000 %l2
+
+#define K0 %f42
+#define K1 %f44
+#define K2 %f46
+#define K3 %f48
+#define K4 %f50
+#define K5 %f52
+#define K6 %f54
+#define K7 %f56
+#define K8 %f58
+#define K9 %f60
+
+#define tmp_counter STACK_BIAS-32
+#define tmp_py STACK_BIAS-24
+#define tmp_px STACK_BIAS-16
+#define tmp_pz STACK_BIAS-8
+
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x20
+
+!--------------------------------------------------------------------
+! !!!!! vatan2f algorithm !!!!!
+! uy0 = *(int*)py;
+! ux0 = *(int*)px;
+! ay0 = uy0 & 0x7fffffff;
+! ax0 = ux0 & 0x7fffffff;
+! if ( ax0 >= 0x7f800000 || ay0 >= 0x7f800000 )
+! {
+! /* |X| or |Y| = Nan */
+! if ( ax0 > 0x7f800000 || ay0 > 0x7f800000 )
+! {
+! ftmp0 = *(float*)&ax0 * *(float*)&ay0;
+! *pz = ftmp0;
+! }
+! signx0 = (unsigned)ux0 >> 30;
+! signx0 &= 2;
+! signy0 = uy0 >> 31;
+! if (ay0 == 0x7f800000)
+! signx0 = (ax0 == 0x7f800000) ? signx0 + 1 : 2;
+! else
+! signx0 += signx0;
+! res = signx0 * M_PI_4;
+! signy0 <<= 3;
+! dtmp0 = *(double*)((char*)(cmul_arr + 1) + signy0);
+! res *= dtmp0;
+! ftmp0 = (float) res;
+! *pz = ftmp0;
+! goto next;
+! }
+! if ( ax0 == 0 && ay0 == 0 )
+! {
+! signy0 = uy0 >> 28;
+! signx0 = ux0 >> 27;
+! ldiff0 = ax0 - ay0;
+! ldiff0 >>= 31;
+! signx0 &= -16;
+! signy0 &= -8;
+! ldiff0 <<= 5;
+! signx0 += signy0;
+! res = *(double*)((char*)(cadd_arr + 7) + ldiff0 + signx0 + signy0);
+! ftmp0 = (float) res;
+! *pz = ftmp0;
+! goto next;
+! }
+! ldiff0 = ax0 - ay0;
+! ldiff0 >>= 31;
+! addrc0 = (char*)px - (char*)py;
+! addrc0 &= ldiff0;
+! fy0 = *(float*)((char*)py + addrc0);
+! fx0 = *(float*)((char*)px - addrc0);
+! itmp0 = *(int*)&fy0;
+! if((itmp0 & 0x7fffffff) < 0x00800000)
+! {
+! itmp0 >>= 28;
+! itmp0 &= -8;
+! fy0 = fabsf(fy0);
+! dtmp0 = (double) *(int*)&fy0;
+! dtmp0 *= C2ONM149;
+! dsign = *(double*)((char*)cmul_arr + itmp0);
+! dtmp0 *= dsign;
+! y0 = dtm0;
+! }
+! else
+! y0 = (double)fy0;
+! itmp0 = *(int*)&fx0;
+! if((itmp0 & 0x7fffffff) < 0x00800000)
+! {
+! itmp0 >>= 28;
+! itmp0 &= -8;
+! fx0 = fabsf(fx0);
+! dtmp0 = (double) *(int*)&fx0;
+! dtmp0 *= C2ONM149;
+! dsign = *(double*)((char*)cmul_arr + itmp0);
+! dtmp0 *= dsign;
+! x0 = dtmp0;
+! }
+! else
+! x0 = (double)fx0;
+! px += stridex;
+! py += stridey;
+! x0 = y0 / x0;
+! x20 = x0 * x0;
+! dtmp0 = K9 * x20;
+! dtmp0 += K8;
+! dtmp0 *= x20;
+! dtmp0 += K7;
+! dtmp0 *= x20;
+! dtmp0 += K6;
+! dtmp0 *= x20;
+! dtmp0 += K5;
+! dtmp0 *= x20;
+! dtmp0 += K4;
+! dtmp0 *= x20;
+! dtmp0 += K3;
+! dtmp0 *= x20;
+! dtmp0 += K2;
+! dtmp0 *= x20;
+! dtmp0 += K1;
+! dtmp0 *= x20;
+! dtmp0 += K0;
+! x0 = dtmp0 * x0;
+! signy0 = uy0 >> 28;
+! signy0 &= -8;
+! signx0 = ux0 >> 27;
+! signx0 &= -16;
+! ltmp0 = ldiff0 << 5;
+! ltmp0 += (char*)cadd_arr;
+! ltmp0 += signx0;
+! cadd0 = *(double*)(ltmp0 + signy0);
+! cmul0_ind = ldiff0 << 3;
+! cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
+! dtmp0 = cmul0 * x0;
+! dtmp0 = cadd0 + dtmp0;
+! ftmp0 = (float)dtmp0;
+! *pz = ftmp0;
+! pz += stridez;
+!
+!--------------------------------------------------------------------
+
+ ENTRY(__vatan2f)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,.CONST_TBL,g5)
+
+#ifdef __sparcv9
+ ldx [%fp+STACK_BIAS+176],%l7
+#else
+ ld [%fp+STACK_BIAS+92],%l7
+#endif
+
+ st %i0,[%fp+tmp_counter]
+ sethi %hi(0x7ffffc00),_0x7fffffff
+ add _0x7fffffff,1023,_0x7fffffff
+ or %g0,%i2,%o2
+ sll %l7,2,stridez
+
+ sethi %hi(0x7f800000),_0x7f800000
+ mov %g5,%g1
+
+ or %g0,stridey,%o4
+ add %g1,56,cadd_arr
+
+ sll %o2,2,stridey
+ add %g1,72,cmul_arr
+
+ ldd [%g1+80],K0
+ ldd [%g1+80+8],K1
+ ldd [%g1+80+16],K2
+ ldd [%g1+80+24],K3
+ ldd [%g1+80+32],K4
+ ldd [%g1+80+40],K5
+ ldd [%g1+80+48],K6
+ ldd [%g1+80+56],K7
+ ldd [%g1+80+64],K8
+ ldd [%g1+80+72],K9
+
+ sll stridex,2,stridex
+
+ stx %i1,[%fp+tmp_py]
+ stx %i3,[%fp+tmp_px]
+.begin:
+ ld [%fp+tmp_counter],counter
+ ldx [%fp+tmp_py],%i1
+ ldx [%fp+tmp_px],%i3
+ st %g0,[%fp+tmp_counter]
+.begin1:
+ subcc counter,1,counter
+ bneg,pn %icc,.exit
+ nop
+
+ lda [%i1]0x82,%l4 ! (0_0) uy0 = *(int*)py;
+
+ lda [%i3]0x82,%l3 ! (0_0) ux0 = *(int*)px;
+
+ and %l4,_0x7fffffff,%l7 ! (0_0) ay0 = uy0 & 0x7fffffff;
+
+ cmp %l7,_0x7f800000
+ bge,pn %icc,.spec0
+ and %l3,_0x7fffffff,%l6 ! (0_0) ax0 = ux0 & 0x7fffffff;
+
+ cmp %l6,_0x7f800000
+ bge,pn %icc,.spec0
+ sethi %hi(0x00800000),%o5
+
+ cmp %l6,%o5
+ bl,pn %icc,.spec1
+ sub %l6,%l7,%o2 ! (0_0) ldiff0 = ax0 - ay0;
+
+ cmp %l7,%o5
+ bl,pn %icc,.spec1
+ nop
+
+ stx %o4,[%fp+tmp_pz]
+ sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31;
+ sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py;
+
+ and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0;
+
+ lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0);
+ sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0
+
+ lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0);
+ sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5;
+
+ sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27;
+ add %i1,stridey,%i1 ! py += stridey
+
+ add %i3,stridex,%i3 ! px += stridex
+
+ lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py;
+ sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28;
+
+ add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr;
+
+ fstod %f0,%f40 ! (0_0) y0 = (double)fy0;
+
+ fstod %f2,%f2 ! (0_0) x0 = (double)fx0;
+
+.spec1_cont:
+ lda [%i3]0x82,%l4 ! (1_0) ux0 = *(int*)px;
+ and %o5,-16,%o5 ! (0_0) signx0 &= -16;
+
+ and %o4,-8,%o4 ! (0_0) signy0 &= -8;
+
+ fdivd %f40,%f2,%f12 ! (0_0) x0 = y0 / x0;
+
+ add %l6,%o5,%o1 ! (0_0) ltmp0 += signx0;
+
+ and %l4,_0x7fffffff,%l6 ! (1_0) ax0 = ux0 & 0x7fffffff;
+ sethi %hi(0x00800000),%o5
+
+ cmp %l6,%o5
+ bl,pn %icc,.u0
+ and %l3,_0x7fffffff,%g1 ! (1_0) ay0 = uy0 & 0x7fffffff;
+.c0:
+ cmp %g1,%o5
+ bl,pn %icc,.u1
+ ldd [%o1+%o4],%f34 ! (0_0) cadd0 = *(double*)(ltmp0 + signy0);
+.c1:
+ cmp %l6,_0x7f800000
+ bge,pn %icc,.u2
+ sub %l6,%g1,%o1 ! (1_0) ldiff0 = ax0 - ay0;
+.c2:
+ cmp %g1,_0x7f800000
+ bge,pn %icc,.u3
+ nop
+.c3:
+ sra %o1,31,%g1 ! (1_0) ldiff0 >>= 31;
+ sub %i3,%i1,%l6 ! (1_0) addrc0 = (char*)px - (char*)py;
+
+ and %l6,%g1,%o1 ! (1_0) addrc0 &= ldiff0;
+
+ lda [%i1+%o1]0x82,%f0 ! (1_0) fy0 = *(float*)((char*)py + addrc0);
+ sub %i3,%o1,%o4 ! (1_0) (char*)px - addrc0;
+
+ lda [%o4]0x82,%f2 ! (1_0) fx0 = *(float*)((char*)px - addrc0);
+ sll %g1,5,%l6 ! (1_0) ltmp0 = ldiff0 << 5;
+
+ cmp %o5,_0x7f800000 ! (1_0) b0 ? 0x7f800000
+ bge,pn %icc,.update0 ! (1_0) if ( b0 > 0x7f800000 )
+ nop
+.cont0:
+ add %i1,stridey,%i1 ! py += stridey
+ fstod %f0,%f40 ! (1_0) y0 = (double)fy0;
+
+ sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27;
+ add %i3,stridex,%i3 ! px += stridex
+
+ sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28;
+ add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr;
+ fstod %f2,%f2 ! (1_0) x0 = (double)fx0;
+.d0:
+ and %o5,-16,%o5 ! (1_0) signx0 &= -16;
+ and %o4,-8,%o4 ! (1_0) signy0 &= -8;
+
+ lda [%i1]0x82,%l4 ! (2_0) uy0 = *(int*)py;
+
+ lda [%i3]0x82,%l3 ! (2_0) ux0 = *(int*)px;
+ fdivd %f40,%f2,%f10 ! (1_0) x0 = y0 / x0;
+
+ fmuld %f12,%f12,%f20 ! (0_0) x20 = x0 * x0;
+
+ add %l6,%o5,%o2 ! (1_0) ltmp0 += signx0;
+
+ and %l3,_0x7fffffff,%l6 ! (2_0) ax0 = ux0 & 0x7fffffff;
+ sethi %hi(0x00800000),%o5
+
+ cmp %l6,%o5
+ bl,pn %icc,.u4
+ and %l4,_0x7fffffff,%g5 ! (2_0) ay0 = uy0 & 0x7fffffff;
+.c4:
+ cmp %g5,%o5
+ bl,pn %icc,.u5
+ fmuld K9,%f20,%f40 ! (0_0) dtmp0 = K9 * x20;
+.c5:
+ cmp %l6,_0x7f800000
+ bge,pn %icc,.u6
+ ldd [%o2+%o4],%f32 ! (1_0) cadd0 = *(double*)(ltmp0 + signy0);
+.c6:
+ cmp %g5,_0x7f800000
+ bge,pn %icc,.u7
+ sub %l6,%g5,%o2 ! (2_0) ldiff0 = ax0 - ay0;
+.c7:
+ sra %o2,31,%g5 ! (2_0) ldiff0 >>= 31;
+ sub %i3,%i1,%l6 ! (2_0) addrc0 = (char*)px - (char*)py;
+
+ faddd %f40,K8,%f40 ! (0_0) dtmp0 += K8;
+ and %l6,%g5,%o2 ! (2_0) addrc0 &= ldiff0;
+
+ lda [%i1+%o2]0x82,%f0 ! (2_0) fy0 = *(float*)((char*)py + addrc0);
+ sub %i3,%o2,%o4 ! (2_0) (char*)px - addrc0;
+
+ lda [%o4]0x82,%f2 ! (2_0) fx0 = *(float*)((char*)px - addrc0);
+
+ cmp %o5,_0x7f800000 ! (2_0) b0 ? 0x7f800000
+ bge,pn %icc,.update1 ! (2_0) if ( b0 > 0x7f800000 )
+ nop
+.cont1:
+ fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20;
+ sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5;
+ add %i1,stridey,%i1 ! py += stridey
+ fstod %f0,%f40 ! (2_0) y0 = (double)fy0;
+
+ sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27;
+ add %i3,stridex,%i3 ! px += stridex
+
+ fstod %f2,%f2 ! (2_0) x0 = (double)fx0;
+ sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28;
+ add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr;
+.d1:
+ lda [%i1]0x82,%l3 ! (3_0) uy0 = *(int*)py;
+ and %o5,-16,%o5 ! (2_0) signx0 &= -16;
+ faddd %f30,K7,%f30 ! (0_0) dtmp0 += K7;
+
+ lda [%i3]0x82,%l4 ! (3_0) ux0 = *(int*)px;
+
+ fdivd %f40,%f2,%f8 ! (2_0) x0 = y0 / x0;
+
+ fmuld %f10,%f10,%f18 ! (1_0) x20 = x0 * x0;
+
+ add %l6,%o5,%o1 ! (2_0) ltmp0 += signx0;
+ and %o4,-8,%o4 ! (2_0) signy0 &= -8;
+ fmuld %f30,%f20,%f30 ! (0_0) dtmp0 *= x20;
+
+ and %l4,_0x7fffffff,%l6 ! (3_0) ax0 = ux0 & 0x7fffffff;
+ sethi %hi(0x00800000),%o5
+
+ cmp %l6,%o5
+ bl,pn %icc,.u8
+ and %l3,_0x7fffffff,%o0 ! (3_0) ay0 = uy0 & 0x7fffffff;
+.c8:
+ cmp %o0,%o5
+ bl,pn %icc,.u9
+ fmuld K9,%f18,%f40 ! (1_0) dtmp0 = K9 * x20;
+.c9:
+ cmp %l6,_0x7f800000
+ bge,pn %icc,.u10
+ faddd %f30,K6,%f16 ! (0_0) dtmp0 += K6;
+.c10:
+ cmp %o0,_0x7f800000
+ bge,pn %icc,.u11
+ ldd [%o1+%o4],%f30 ! (2_0) cadd0 = *(double*)(ltmp0 + signy0);
+.c11:
+ sub %l6,%o0,%o1 ! (3_0) ldiff0 = ax0 - ay0;
+
+ sra %o1,31,%o0 ! (3_0) ldiff0 >>= 31;
+ sub %i3,%i1,%l6 ! (3_0) addrc0 = (char*)px - (char*)py;
+
+ faddd %f40,K8,%f40 ! (1_0) dtmp0 += K8;
+ and %l6,%o0,%o1 ! (3_0) addrc0 &= ldiff0;
+ fmuld %f16,%f20,%f16 ! (0_0) dtmp0 *= x20;
+
+ lda [%i1+%o1]0x82,%f0 ! (3_0) fy0 = *(float*)((char*)py + addrc0);
+ sub %i3,%o1,%o4 ! (3_0) (char*)px - addrc0;
+
+ lda [%o4]0x82,%f1 ! (3_0) fx0 = *(float*)((char*)px - addrc0);
+
+ cmp %o5,_0x7f800000 ! (3_0) b0 ? 0x7f800000
+ bge,pn %icc,.update2 ! (3_0) if ( b0 > 0x7f800000 )
+ nop
+.cont2:
+ fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20;
+ sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5;
+ add %i1,stridey,%i1 ! py += stridey
+ fstod %f0,%f40 ! (3_0) y0 = (double)fy0;
+
+ faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5;
+ sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27;
+ add %i3,stridex,%i3 ! px += stridex
+
+ sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28;
+ fstod %f1,%f16 ! (3_0) x0 = (double)fx0;
+.d2:
+ faddd %f28,K7,%f28 ! (1_0) dtmp0 += K7;
+ add %l6,cadd_arr,%l6 ! (3_0) ltmp0 += (char*)cadd_arr;
+ and %o5,-16,%o5 ! (3_0) signx0 &= -16;
+
+ lda [%i1]0x82,%l4 ! (4_0) uy0 = *(int*)py;
+ fmuld %f2,%f20,%f2 ! (0_0) dtmp0 *= x20;
+
+ lda [%i3]0x82,%l3 ! (4_0) ux0 = *(int*)px;
+ fdivd %f40,%f16,%f6 ! (3_0) x0 = y0 / x0;
+
+ and %o4,-8,%o4 ! (3_0) signy0 &= -8;
+ fmuld %f8,%f8,%f16 ! (2_0) x20 = x0 * x0;
+
+ add %l6,%o5,%o2 ! (3_0) ltmp0 += signx0;
+ fmuld %f28,%f18,%f28 ! (1_0) dtmp0 *= x20;
+
+ and %l3,_0x7fffffff,%l6 ! (4_0) ax0 = ux0 & 0x7fffffff;
+ sethi %hi(0x00800000),%o5
+ faddd %f2,K4,%f2 ! (0_0) dtmp0 += K4;
+
+ cmp %l6,%o5
+ bl,pn %icc,.u12
+ and %l4,_0x7fffffff,%l5 ! (4_0) ay0 = uy0 & 0x7fffffff;
+.c12:
+ cmp %l5,%o5
+ bl,pn %icc,.u13
+ fmuld K9,%f16,%f40 ! (2_0) dtmp0 = K9 * x20;
+.c13:
+ cmp %l6,_0x7f800000
+ bge,pn %icc,.u14
+ faddd %f28,K6,%f4 ! (1_0) dtmp0 += K6;
+.c14:
+ ldd [%o2+%o4],%f28 ! (3_0) cadd0 = *(double*)(ltmp0 + signy0);
+ cmp %l5,_0x7f800000
+ bge,pn %icc,.u15
+ fmuld %f2,%f20,%f24 ! (0_0) dtmp0 *= x20;
+.c15:
+ sub %l6,%l5,%o2 ! (4_0) ldiff0 = ax0 - ay0;
+
+ sra %o2,31,%l5 ! (4_0) ldiff0 >>= 31;
+ sub %i3,%i1,%l6 ! (4_0) addrc0 = (char*)px - (char*)py;
+
+ faddd %f40,K8,%f40 ! (2_0) dtmp0 += K8;
+ and %l6,%l5,%o2 ! (4_0) addrc0 &= ldiff0;
+ fmuld %f4,%f18,%f4 ! (1_0) dtmp0 *= x20;
+
+ lda [%i1+%o2]0x82,%f0 ! (4_0) fy0 = *(float*)((char*)py + addrc0);
+ sub %i3,%o2,%o4 ! (4_0) (char*)px - addrc0;
+ faddd %f24,K3,%f24 ! (0_0) dtmp0 += K3;
+
+ lda [%o4]0x82,%f2 ! (4_0) fx0 = *(float*)((char*)px - addrc0);
+
+ cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000
+ bge,pn %icc,.update3 ! (4_0) if ( b0 > 0x7f800000 )
+ nop
+.cont3:
+ fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20;
+ sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5;
+ add %i1,stridey,%i1 ! py += stridey
+ fstod %f0,%f40 ! (4_0) y0 = (double)fy0;
+
+ faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5;
+ add %i3,stridex,%i3 ! px += stridex
+ fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20;
+
+ fstod %f2,%f2 ! (4_0) x0 = (double)fx0;
+ sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27;
+ sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28;
+.d3:
+ lda [%i1]0x82,%l3 ! (5_0) uy0 = *(int*)py;
+ add %l6,cadd_arr,%l6 ! (4_0) ltmp0 += (char*)cadd_arr;
+ faddd %f26,K7,%f26 ! (2_0) dtmp0 += K7;
+
+ fmuld %f62,%f18,%f4 ! (1_0) dtmp0 *= x20;
+ and %o5,-16,%o5 ! (4_0) signx0 &= -16;
+
+ lda [%i3]0x82,%l4 ! (5_1) ux0 = *(int*)px;
+ fdivd %f40,%f2,%f62 ! (4_1) x0 = y0 / x0;
+ faddd %f24,K2,%f40 ! (0_1) dtmp0 += K2;
+
+ and %o4,-8,%o4 ! (4_1) signy0 &= -8;
+ fmuld %f6,%f6,%f24 ! (3_1) x20 = x0 * x0;
+
+ add %l6,%o5,%o1 ! (4_1) ltmp0 += signx0;
+ fmuld %f26,%f16,%f26 ! (2_1) dtmp0 *= x20;
+
+ and %l4,_0x7fffffff,%l6 ! (5_1) ax0 = ux0 & 0x7fffffff;
+ sethi %hi(0x00800000),%o5
+ faddd %f4,K4,%f4 ! (1_1) dtmp0 += K4;
+
+ cmp %l6,%o5
+ bl,pn %icc,.u16
+ and %l3,_0x7fffffff,%o7 ! (5_1) ay0 = uy0 & 0x7fffffff;
+.c16:
+ cmp %o7,%o5
+ bl,pn %icc,.u17
+ fmuld %f40,%f20,%f38 ! (0_1) dtmp0 *= x20;
+.c17:
+ cmp %l6,_0x7f800000
+ bge,pn %icc,.u18
+ fmuld K9,%f24,%f40 ! (3_1) dtmp0 = K9 * x20;
+.c18:
+ cmp %o7,_0x7f800000
+ bge,pn %icc,.u19
+ faddd %f26,K6,%f22 ! (2_1) dtmp0 += K6;
+.c19:
+ ldd [%o1+%o4],%f26 ! (4_1) cadd0 = *(double*)(ltmp0 + signy0);
+ fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20;
+
+ sub %l6,%o7,%o1 ! (5_1) ldiff0 = ax0 - ay0;
+
+ sra %o1,31,%o7 ! (5_1) ldiff0 >>= 31;
+ sub %i3,%i1,%l6 ! (5_1) addrc0 = (char*)px - (char*)py;
+ faddd %f38,K1,%f38 ! (0_1) dtmp0 += K1;
+
+ faddd %f40,K8,%f40 ! (3_1) dtmp0 += K8;
+ and %l6,%o7,%o1 ! (5_1) addrc0 &= ldiff0;
+ fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20;
+
+ lda [%i1+%o1]0x82,%f0 ! (5_1) fy0 = *(float*)((char*)py + addrc0);
+ sll %o7,5,%l6 ! (5_1) ltmp0 = ldiff0 << 5;
+ sub %i3,%o1,%o4 ! (5_1) (char*)px - addrc0;
+ faddd %f4,K3,%f4 ! (1_1) dtmp0 += K3;
+
+ lda [%o4]0x82,%f1 ! (5_1) fx0 = *(float*)((char*)px - addrc0);
+
+ fmuld %f38,%f20,%f38 ! (0_1) dtmp0 *= x20;
+ cmp %o5,_0x7f800000 ! (5_1) b0 ? 0x7f800000
+ bge,pn %icc,.update4 ! (5_1) if ( b0 > 0x7f800000 )
+ nop
+.cont4:
+ fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20;
+ fstod %f0,%f40 ! (5_1) y0 = (double)fy0;
+
+ faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5;
+ fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20;
+
+ add %i3,stridex,%i3 ! px += stridex
+ sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3;
+ fstod %f1,%f2 ! (5_1) x0 = (double)fx0;
+.d4:
+ sra %l3,28,%o4 ! (5_1) signy0 = uy0 >> 28;
+ add %i1,stridey,%i1 ! py += stridey
+
+ faddd %f36,K7,%f36 ! (3_1) dtmp0 += K7;
+ sra %l4,27,%o5 ! (5_1) signx0 = ux0 >> 27;
+
+ lda [%i1]0x82,%l4 ! (0_0) uy0 = *(int*)py;
+ add %l6,cadd_arr,%l6 ! (5_1) ltmp0 += (char*)cadd_arr;
+ fmuld %f14,%f16,%f22 ! (2_1) dtmp0 *= x20;
+ faddd %f38,K0,%f38 ! (0_1) dtmp0 += K0;
+
+ lda [%i3]0x82,%l3 ! (0_0) ux0 = *(int*)px;
+ and %o5,-16,%o5 ! (5_1) signx0 &= -16;
+ fdivd %f40,%f2,%f14 ! (5_1) x0 = y0 / x0;
+ faddd %f4,K2,%f40 ! (1_1) dtmp0 += K2;
+
+ fmuld %f62,%f62,%f4 ! (4_1) x20 = x0 * x0;
+
+ ldd [cmul_arr+%l7],%f0 ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
+ add %l6,%o5,%o2 ! (5_1) ltmp0 += signx0;
+ and %o4,-8,%o4 ! (5_1) signy0 &= -8;
+ fmuld %f36,%f24,%f36 ! (3_1) dtmp0 *= x20;
+
+ fmuld %f38,%f12,%f12 ! (0_1) x0 = dtmp0 * x0;
+ and %l4,_0x7fffffff,%l7 ! (0_0) ay0 = uy0 & 0x7fffffff;
+ sethi %hi(0x00800000),%o5
+ faddd %f22,K4,%f22 ! (2_1) dtmp0 += K4;
+
+ and %l3,_0x7fffffff,%l6 ! (0_0) ax0 = ux0 & 0x7fffffff;
+ cmp %l7,%o5
+ bl,pn %icc,.u20
+ fmuld %f40,%f18,%f38 ! (1_1) dtmp0 *= x20;
+.c20:
+ cmp %l6,%o5
+ bl,pn %icc,.u21
+ fmuld K9,%f4,%f40 ! (4_1) dtmp0 = K9 * x20;
+.c21:
+ cmp %l7,_0x7f800000
+ bge,pn %icc,.u22
+ faddd %f36,K6,%f20 ! (3_1) dtmp0 += K6;
+.c22:
+ ldd [%o2+%o4],%f36 ! (5_1) cadd0 = *(double*)(ltmp0 + signy0);
+ cmp %l6,_0x7f800000
+ bge,pn %icc,.u23
+ fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20;
+.c23:
+ sub %l6,%l7,%o2 ! (0_0) ldiff0 = ax0 - ay0;
+
+ fmuld %f0,%f12,%f12 ! (0_1) dtmp0 = cmul0 * x0;
+ sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31;
+ sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py;
+ faddd %f38,K1,%f38 ! (1_1) dtmp0 += K1;
+
+ faddd %f40,K8,%f40 ! (4_1) dtmp0 += K8;
+ and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0;
+ fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20;
+
+ lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0);
+ sll %g1,3,%g1 ! (1_1) cmul0_ind = ldiff0 << 3;
+ sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0
+ faddd %f22,K3,%f22 ! (2_1) dtmp0 += K3;
+
+ lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0);
+ sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5;
+
+ fmuld %f38,%f18,%f38 ! (1_1) dtmp0 *= x20;
+ cmp %o5,_0x7f800000 ! (0_0) b0 ? 0x7f800000
+ bge,pn %icc,.update5 ! (0_0) if ( b0 > 0x7f800000 )
+ faddd %f34,%f12,%f18 ! (0_1) dtmp0 = cadd0 + dtmp0;
+.cont5:
+ fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20;
+ sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27;
+ add %i3,stridex,%i3 ! px += stridex
+ fstod %f0,%f40 ! (0_0) y0 = (double)fy0;
+
+ faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5;
+ add %i1,stridey,%i1 ! py += stridey
+ fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20;
+
+ lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py;
+ sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28;
+ add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr;
+ fstod %f2,%f2 ! (0_0) x0 = (double)fx0;
+.d5:
+ lda [%i3]0x82,%l4 ! (1_0) ux0 = *(int*)px;
+ and %o5,-16,%o5 ! (0_0) signx0 &= -16;
+ faddd %f34,K7,%f34 ! (4_1) dtmp0 += K7;
+
+ ldx [%fp+tmp_pz],%o1
+ fmuld %f12,%f24,%f20 ! (3_1) dtmp0 *= x20;
+ and %o4,-8,%o4 ! (0_0) signy0 &= -8;
+ faddd %f38,K0,%f38 ! (1_1) dtmp0 += K0;
+
+ fdivd %f40,%f2,%f12 ! (0_0) x0 = y0 / x0;
+ faddd %f22,K2,%f40 ! (2_1) dtmp0 += K2;
+
+ fdtos %f18,%f2 ! (0_1) ftmp0 = (float)dtmp0;
+ st %f2,[%o1] ! (0_1) *pz = ftmp0
+ add %o1,stridez,%o2
+ fmuld %f14,%f14,%f22 ! (5_1) x20 = x0 * x0;
+
+ subcc counter,1,counter
+ bneg,a,pn %icc,.begin
+ or %g0,%o2,%o4
+
+ ldd [cmul_arr+%g1],%f0 ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
+ add %l6,%o5,%o1 ! (0_0) ltmp0 += signx0;
+ fmuld %f34,%f4,%f34 ! (4_1) dtmp0 *= x20;
+
+ fmuld %f38,%f10,%f10 ! (1_1) x0 = dtmp0 * x0;
+ and %l4,_0x7fffffff,%l6 ! (1_0) ax0 = ux0 & 0x7fffffff;
+ sethi %hi(0x00800000),%o5
+ faddd %f20,K4,%f20 ! (3_1) dtmp0 += K4;
+
+ and %l3,_0x7fffffff,%g1 ! (1_0) ay0 = uy0 & 0x7fffffff;
+ cmp %l6,%o5
+ bl,pn %icc,.u24
+ fmuld %f40,%f16,%f38 ! (2_1) dtmp0 *= x20;
+.c24:
+ cmp %g1,%o5
+ bl,pn %icc,.u25
+ fmuld K9,%f22,%f40 ! (5_1) dtmp0 = K9 * x20;
+.c25:
+ cmp %l6,_0x7f800000
+ bge,pn %icc,.u26
+ faddd %f34,K6,%f18 ! (4_1) dtmp0 += K6;
+.c26:
+ ldd [%o1+%o4],%f34 ! (0_0) cadd0 = *(double*)(ltmp0 + signy0);
+ cmp %g1,_0x7f800000
+ bge,pn %icc,.u27
+ fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20;
+.c27:
+ sub %l6,%g1,%o1 ! (1_0) ldiff0 = ax0 - ay0;
+
+ fmuld %f0,%f10,%f10 ! (1_1) dtmp0 = cmul0 * x0;
+ sra %o1,31,%g1 ! (1_0) ldiff0 >>= 31;
+ sub %i3,%i1,%l6 ! (1_0) addrc0 = (char*)px - (char*)py;
+ faddd %f38,K1,%f38 ! (2_1) dtmp0 += K1;
+
+ faddd %f40,K8,%f40 ! (5_1) dtmp0 += K8;
+ and %l6,%g1,%o1 ! (1_0) addrc0 &= ldiff0;
+ fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20;
+
+ lda [%i1+%o1]0x82,%f0 ! (1_0) fy0 = *(float*)((char*)py + addrc0);
+ sll %g5,3,%g5 ! (2_1) cmul0_ind = ldiff0 << 3;
+ sub %i3,%o1,%o4 ! (1_0) (char*)px - addrc0;
+ faddd %f20,K3,%f20 ! (3_1) dtmp0 += K3;
+
+ lda [%o4]0x82,%f2 ! (1_0) fx0 = *(float*)((char*)px - addrc0);
+ sll %g1,5,%l6 ! (1_0) ltmp0 = ldiff0 << 5;
+ add %o2,stridez,%o1 ! pz += stridez
+
+ fmuld %f38,%f16,%f38 ! (2_1) dtmp0 *= x20;
+ cmp %o5,_0x7f800000 ! (1_0) b0 ? 0x7f800000
+ bge,pn %icc,.update6 ! (1_0) if ( b0 > 0x7f800000 )
+ faddd %f32,%f10,%f16 ! (1_1) dtmp0 = cadd0 + dtmp0;
+.cont6:
+ fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20;
+ add %i1,stridey,%i1 ! py += stridey
+ fstod %f0,%f40 ! (1_0) y0 = (double)fy0;
+
+ faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5;
+ sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27;
+ add %i3,stridex,%i3 ! px += stridex
+ fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20;
+
+ sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28;
+ add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr;
+ fstod %f2,%f2 ! (1_0) x0 = (double)fx0;
+.d6:
+ faddd %f32,K7,%f32 ! (5_1) dtmp0 += K7;
+ and %o5,-16,%o5 ! (1_0) signx0 &= -16;
+ and %o4,-8,%o4 ! (1_0) signy0 &= -8;
+
+ lda [%i1]0x82,%l4 ! (2_0) uy0 = *(int*)py;
+ fmuld %f10,%f4,%f18 ! (4_1) dtmp0 *= x20;
+ faddd %f38,K0,%f38 ! (2_1) dtmp0 += K0;
+
+ lda [%i3]0x82,%l3 ! (2_0) ux0 = *(int*)px;
+ fdivd %f40,%f2,%f10 ! (1_0) x0 = y0 / x0;
+ faddd %f20,K2,%f40 ! (3_1) dtmp0 += K2;
+
+ fmuld %f12,%f12,%f20 ! (0_0) x20 = x0 * x0;
+ fdtos %f16,%f2 ! (1_1) ftmp0 = (float)dtmp0;
+ st %f2,[%o2] ! (1_1) *pz = ftmp0;
+
+ subcc counter,1,counter
+ bneg,a,pn %icc,.begin
+ or %g0,%o1,%o4
+
+ ldd [cmul_arr+%g5],%f0 ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
+ add %l6,%o5,%o2 ! (1_0) ltmp0 += signx0;
+ fmuld %f32,%f22,%f32 ! (5_1) dtmp0 *= x20;
+
+ fmuld %f38,%f8,%f8 ! (2_1) x0 = dtmp0 * x0;
+ and %l3,_0x7fffffff,%l6 ! (2_0) ax0 = ux0 & 0x7fffffff;
+ sethi %hi(0x00800000),%o5
+ faddd %f18,K4,%f18 ! (4_1) dtmp0 += K4;
+
+ and %l4,_0x7fffffff,%g5 ! (2_0) ay0 = uy0 & 0x7fffffff;
+ cmp %l6,%o5
+ bl,pn %icc,.u28
+ fmuld %f40,%f24,%f38 ! (3_1) dtmp0 *= x20;
+.c28:
+ cmp %g5,%o5
+ bl,pn %icc,.u29
+ fmuld K9,%f20,%f40 ! (0_0) dtmp0 = K9 * x20;
+.c29:
+ cmp %l6,_0x7f800000
+ bge,pn %icc,.u30
+ faddd %f32,K6,%f16 ! (5_1) dtmp0 += K6;
+.c30:
+ ldd [%o2+%o4],%f32 ! (1_0) cadd0 = *(double*)(ltmp0 + signy0);
+ cmp %g5,_0x7f800000
+ bge,pn %icc,.u31
+ fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20;
+.c31:
+ sub %l6,%g5,%o2 ! (2_0) ldiff0 = ax0 - ay0;
+
+ fmuld %f0,%f8,%f8 ! (2_1) dtmp0 = cmul0 * x0;
+ sra %o2,31,%g5 ! (2_0) ldiff0 >>= 31;
+ sub %i3,%i1,%l6 ! (2_0) addrc0 = (char*)px - (char*)py;
+ faddd %f38,K1,%f38 ! (3_1) dtmp0 += K1;
+
+ faddd %f40,K8,%f40 ! (0_0) dtmp0 += K8;
+ and %l6,%g5,%o2 ! (2_0) addrc0 &= ldiff0;
+ fmuld %f16,%f22,%f16 ! (5_1) dtmp0 *= x20;
+
+ lda [%i1+%o2]0x82,%f0 ! (2_0) fy0 = *(float*)((char*)py + addrc0);
+ sub %i3,%o2,%o4 ! (2_0) (char*)px - addrc0;
+ add %o1,stridez,%o2 ! pz += stridez
+ faddd %f18,K3,%f18 ! (4_1) dtmp0 += K3;
+
+ lda [%o4]0x82,%f2 ! (2_0) fx0 = *(float*)((char*)px - addrc0);
+ sll %o0,3,%o0 ! (3_1) cmul0_ind = ldiff0 << 3;
+
+ fmuld %f38,%f24,%f38 ! (3_1) dtmp0 *= x20;
+ cmp %o5,_0x7f800000 ! (2_0) b0 ? 0x7f800000
+ bge,pn %icc,.update7 ! (2_0) if ( b0 > 0x7f800000 )
+ faddd %f30,%f8,%f24 ! (2_1) dtmp0 = cadd0 + dtmp0;
+.cont7:
+ fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20;
+ sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5;
+ add %i1,stridey,%i1 ! py += stridey
+ fstod %f0,%f40 ! (2_0) y0 = (double)fy0;
+
+ faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5;
+ sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27;
+ add %i3,stridex,%i3 ! px += stridex
+ fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20;
+
+ fstod %f2,%f2 ! (2_0) x0 = (double)fx0;
+ sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28;
+ add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr;
+.d7:
+ lda [%i1]0x82,%l3 ! (3_0) uy0 = *(int*)py;
+ and %o5,-16,%o5 ! (2_0) signx0 &= -16;
+ faddd %f30,K7,%f30 ! (0_0) dtmp0 += K7;
+
+ lda [%i3]0x82,%l4 ! (3_0) ux0 = *(int*)px;
+ fmuld %f8,%f22,%f16 ! (5_1) dtmp0 *= x20;
+ faddd %f38,K0,%f38 ! (3_1) dtmp0 += K0;
+
+ fdivd %f40,%f2,%f8 ! (2_0) x0 = y0 / x0;
+ faddd %f18,K2,%f40 ! (4_1) dtmp0 += K2;
+
+ fmuld %f10,%f10,%f18 ! (1_0) x20 = x0 * x0;
+ fdtos %f24,%f1 ! (2_1) ftmp0 = (float)dtmp0;
+ st %f1,[%o1] ! (2_1) *pz = ftmp0;
+
+ subcc counter,1,counter
+ bneg,a,pn %icc,.begin
+ or %g0,%o2,%o4
+
+ ldd [cmul_arr+%o0],%f2 ! (3_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
+ add %l6,%o5,%o1 ! (2_0) ltmp0 += signx0;
+ and %o4,-8,%o4 ! (2_0) signy0 &= -8;
+ fmuld %f30,%f20,%f30 ! (0_0) dtmp0 *= x20;
+
+ fmuld %f38,%f6,%f6 ! (3_1) x0 = dtmp0 * x0;
+ and %l4,_0x7fffffff,%l6 ! (3_0) ax0 = ux0 & 0x7fffffff;
+ sethi %hi(0x00800000),%o5
+ faddd %f16,K4,%f24 ! (5_1) dtmp0 += K4;
+
+ and %l3,_0x7fffffff,%o0 ! (3_0) ay0 = uy0 & 0x7fffffff;
+ cmp %l6,%o5
+ bl,pn %icc,.u32
+ fmuld %f40,%f4,%f38 ! (4_1) dtmp0 *= x20;
+.c32:
+ cmp %o0,%o5
+ bl,pn %icc,.u33
+ fmuld K9,%f18,%f40 ! (1_0) dtmp0 = K9 * x20;
+.c33:
+ cmp %l6,_0x7f800000
+ bge,pn %icc,.u34
+ faddd %f30,K6,%f16 ! (0_0) dtmp0 += K6;
+.c34:
+ ldd [%o1+%o4],%f30 ! (2_0) cadd0 = *(double*)(ltmp0 + signy0);
+ cmp %o0,_0x7f800000
+ bge,pn %icc,.u35
+ fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20;
+.c35:
+ sub %l6,%o0,%o1 ! (3_0) ldiff0 = ax0 - ay0;
+
+ fmuld %f2,%f6,%f6 ! (3_1) dtmp0 = cmul0 * x0;
+ sra %o1,31,%o0 ! (3_0) ldiff0 >>= 31;
+ sub %i3,%i1,%l6 ! (3_0) addrc0 = (char*)px - (char*)py;
+ faddd %f38,K1,%f38 ! (4_1) dtmp0 += K1;
+
+ faddd %f40,K8,%f40 ! (1_0) dtmp0 += K8;
+ and %l6,%o0,%o1 ! (3_0) addrc0 &= ldiff0;
+ fmuld %f16,%f20,%f16 ! (0_0) dtmp0 *= x20;
+
+ lda [%i1+%o1]0x82,%f0 ! (3_0) fy0 = *(float*)((char*)py + addrc0);
+ sub %i3,%o1,%o4 ! (3_0) (char*)px - addrc0;
+ add %o2,stridez,%o1 ! pz += stridez
+ faddd %f24,K3,%f24 ! (5_1) dtmp0 += K3;
+
+ lda [%o4]0x82,%f1 ! (3_0) fx0 = *(float*)((char*)px - addrc0);
+ sll %l5,3,%l5 ! (4_1) cmul0_ind = ldiff0 << 3;
+
+ fmuld %f38,%f4,%f38 ! (4_1) dtmp0 *= x20;
+ cmp %o5,_0x7f800000 ! (3_0) b0 ? 0x7f800000
+ bge,pn %icc,.update8 ! (3_0) if ( b0 > 0x7f800000 )
+ faddd %f28,%f6,%f4 ! (3_1) dtmp0 = cadd0 + dtmp0;
+.cont8:
+ fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20;
+ sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5;
+ add %i1,stridey,%i1 ! py += stridey
+ fstod %f0,%f40 ! (3_0) y0 = (double)fy0;
+
+ faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5;
+ sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27;
+ add %i3,stridex,%i3 ! px += stridex
+ fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20;
+
+ sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28;
+ fstod %f1,%f16 ! (3_0) x0 = (double)fx0;
+.d8:
+ faddd %f28,K7,%f28 ! (1_0) dtmp0 += K7;
+ add %l6,cadd_arr,%l6 ! (3_0) ltmp0 += (char*)cadd_arr;
+ and %o5,-16,%o5 ! (3_0) signx0 &= -16;
+
+ lda [%i1]0x82,%l4 ! (4_0) uy0 = *(int*)py;
+ fmuld %f2,%f20,%f2 ! (0_0) dtmp0 *= x20;
+ faddd %f38,K0,%f38 ! (4_1) dtmp0 += K0;
+
+ lda [%i3]0x82,%l3 ! (4_0) ux0 = *(int*)px;
+ fdivd %f40,%f16,%f6 ! (3_0) x0 = y0 / x0;
+ faddd %f24,K2,%f24 ! (5_1) dtmp0 += K2;
+
+ fdtos %f4,%f1 ! (3_1) ftmp0 = (float)dtmp0;
+ and %o4,-8,%o4 ! (3_0) signy0 &= -8;
+ st %f1,[%o2] ! (3_1) *pz = ftmp0;
+ fmuld %f8,%f8,%f16 ! (2_0) x20 = x0 * x0;
+
+ subcc counter,1,counter
+ bneg,a,pn %icc,.begin
+ or %g0,%o1,%o4
+
+ ldd [cmul_arr+%l5],%f0 ! (4_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
+ add %l6,%o5,%o2 ! (3_0) ltmp0 += signx0;
+ fmuld %f28,%f18,%f28 ! (1_0) dtmp0 *= x20;
+
+ fmuld %f38,%f62,%f62 ! (4_1) x0 = dtmp0 * x0;
+ and %l3,_0x7fffffff,%l6 ! (4_0) ax0 = ux0 & 0x7fffffff;
+ sethi %hi(0x00800000),%o5
+ faddd %f2,K4,%f2 ! (0_0) dtmp0 += K4;
+
+ and %l4,_0x7fffffff,%l5 ! (4_0) ay0 = uy0 & 0x7fffffff;
+ cmp %l6,%o5
+ bl,pn %icc,.u36
+ fmuld %f24,%f22,%f38 ! (5_1) dtmp0 *= x20;
+.c36:
+ cmp %l5,%o5
+ bl,pn %icc,.u37
+ fmuld K9,%f16,%f40 ! (2_0) dtmp0 = K9 * x20;
+.c37:
+ cmp %l6,_0x7f800000
+ bge,pn %icc,.u38
+ faddd %f28,K6,%f4 ! (1_0) dtmp0 += K6;
+.c38:
+ ldd [%o2+%o4],%f28 ! (3_0) cadd0 = *(double*)(ltmp0 + signy0);
+ cmp %l5,_0x7f800000
+ bge,pn %icc,.u39
+ fmuld %f2,%f20,%f24 ! (0_0) dtmp0 *= x20;
+.c39:
+ sub %l6,%l5,%o2 ! (4_0) ldiff0 = ax0 - ay0;
+
+ fmuld %f0,%f62,%f62 ! (4_1) dtmp0 = cmul0 * x0;
+ sra %o2,31,%l5 ! (4_0) ldiff0 >>= 31;
+ sub %i3,%i1,%l6 ! (4_0) addrc0 = (char*)px - (char*)py;
+ faddd %f38,K1,%f38 ! (5_1) dtmp0 += K1;
+
+ faddd %f40,K8,%f40 ! (2_0) dtmp0 += K8;
+ and %l6,%l5,%o2 ! (4_0) addrc0 &= ldiff0;
+ fmuld %f4,%f18,%f4 ! (1_0) dtmp0 *= x20;
+
+ lda [%i1+%o2]0x82,%f0 ! (4_0) fy0 = *(float*)((char*)py + addrc0);
+ sub %i3,%o2,%o4 ! (4_0) (char*)px - addrc0;
+ add %o1,stridez,%o2 ! pz += stridez
+ faddd %f24,K3,%f24 ! (0_0) dtmp0 += K3;
+
+ lda [%o4]0x82,%f2 ! (4_0) fx0 = *(float*)((char*)px - addrc0);
+ sll %o7,3,%o7 ! (5_1) cmul0_ind = ldiff0 << 3;
+
+ fmuld %f38,%f22,%f38 ! (5_1) dtmp0 *= x20;
+ cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000
+ bge,pn %icc,.update9 ! (4_0) if ( b0 > 0x7f800000 )
+ faddd %f26,%f62,%f22 ! (4_1) dtmp0 = cadd0 + dtmp0;
+.cont9:
+ fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20;
+ sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5;
+ add %i1,stridey,%i1 ! py += stridey
+ fstod %f0,%f40 ! (4_0) y0 = (double)fy0;
+
+ faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5;
+ sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27;
+ add %i3,stridex,%i3 ! px += stridex
+ fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20;
+
+ fstod %f2,%f2 ! (4_0) x0 = (double)fx0;
+ sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28;
+.d9:
+ lda [%i1]0x82,%l3 ! (5_0) uy0 = *(int*)py;
+ add %l6,cadd_arr,%l6 ! (4_0) ltmp0 += (char*)cadd_arr;
+ faddd %f26,K7,%f26 ! (2_0) dtmp0 += K7;
+
+ fmuld %f62,%f18,%f4 ! (1_0) dtmp0 *= x20;
+ and %o5,-16,%o5 ! (4_0) signx0 &= -16;
+ faddd %f38,K0,%f38 ! (5_1) dtmp0 += K0;
+
+ subcc counter,5,counter
+ bneg,pn %icc,.tail
+ nop
+
+ ba .main_loop
+ nop
+
+ .align 16
+.main_loop:
+ lda [%i3]0x82,%l4 ! (5_1) ux0 = *(int*)px;
+ nop
+ fdivd %f40,%f2,%f62 ! (4_1) x0 = y0 / x0;
+ faddd %f24,K2,%f40 ! (0_1) dtmp0 += K2;
+
+ fdtos %f22,%f22 ! (4_2) ftmp0 = (float)dtmp0;
+ and %o4,-8,%o4 ! (4_1) signy0 &= -8;
+ st %f22,[%o1] ! (4_2) *pz = ftmp0;
+ fmuld %f6,%f6,%f24 ! (3_1) x20 = x0 * x0;
+
+ ldd [cmul_arr+%o7],%f0 ! (5_2) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
+ add %l6,%o5,%o1 ! (4_1) ltmp0 += signx0;
+ fmuld %f26,%f16,%f26 ! (2_1) dtmp0 *= x20;
+
+ fmuld %f38,%f14,%f14 ! (5_2) x0 = dtmp0 * x0;
+ and %l4,_0x7fffffff,%l6 ! (5_1) ax0 = ux0 & 0x7fffffff;
+ sethi %hi(0x00800000),%o5
+ faddd %f4,K4,%f4 ! (1_1) dtmp0 += K4;
+
+ and %l3,_0x7fffffff,%o7 ! (5_1) ay0 = uy0 & 0x7fffffff;
+ fmuld %f40,%f20,%f38 ! (0_1) dtmp0 *= x20;
+
+ cmp %l6,%o5
+ bl,pn %icc,.up0
+ fmuld K9,%f24,%f40 ! (3_1) dtmp0 = K9 * x20;
+.co0:
+ nop
+ cmp %o7,%o5
+ bl,pn %icc,.up1
+ faddd %f26,K6,%f22 ! (2_1) dtmp0 += K6;
+.co1:
+ ldd [%o1+%o4],%f26 ! (4_1) cadd0 = *(double*)(ltmp0 + signy0);
+ cmp %l6,_0x7f800000
+ bge,pn %icc,.up2
+ fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20;
+.co2:
+ sub %l6,%o7,%o1 ! (5_1) ldiff0 = ax0 - ay0;
+ cmp %o7,_0x7f800000
+ bge,pn %icc,.up3
+
+ fmuld %f0,%f14,%f14 ! (5_2) dtmp0 = cmul0 * x0;
+.co3:
+ sra %o1,31,%o7 ! (5_1) ldiff0 >>= 31;
+ sub %i3,%i1,%l6 ! (5_1) addrc0 = (char*)px - (char*)py;
+ faddd %f38,K1,%f38 ! (0_1) dtmp0 += K1;
+
+ faddd %f40,K8,%f40 ! (3_1) dtmp0 += K8;
+ and %l6,%o7,%o1 ! (5_1) addrc0 &= ldiff0;
+ fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20;
+
+ lda [%i1+%o1]0x82,%f0 ! (5_1) fy0 = *(float*)((char*)py + addrc0);
+ sll %o7,5,%l6 ! (5_1) ltmp0 = ldiff0 << 5;
+ sub %i3,%o1,%o4 ! (5_1) (char*)px - addrc0;
+ faddd %f4,K3,%f4 ! (1_1) dtmp0 += K3;
+
+ lda [%o4]0x82,%f2 ! (5_1) fx0 = *(float*)((char*)px - addrc0);
+
+ fmuld %f38,%f20,%f38 ! (0_1) dtmp0 *= x20;
+ cmp %o5,_0x7f800000 ! (5_1) b0 ? 0x7f800000
+ bge,pn %icc,.update10 ! (5_1) if ( b0 > 0x7f800000 )
+ faddd %f36,%f14,%f20 ! (5_2) dtmp0 = cadd0 + dtmp0;
+.cont10:
+ fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20;
+ nop
+ fstod %f0,%f40 ! (5_1) y0 = (double)fy0;
+
+ faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5;
+ add %o2,stridez,%o1 ! pz += stridez
+ fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20;
+
+ sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3;
+ add %i3,stridex,%i3 ! px += stridex
+ fstod %f2,%f2 ! (5_1) x0 = (double)fx0;
+.den0:
+ sra %l3,28,%o4 ! (5_1) signy0 = uy0 >> 28;
+ add %i1,stridey,%i1 ! py += stridey
+
+ faddd %f36,K7,%f36 ! (3_1) dtmp0 += K7;
+ sra %l4,27,%o5 ! (5_1) signx0 = ux0 >> 27;
+
+ lda [%i1]0x82,%l4 ! (0_0) uy0 = *(int*)py;
+ add %l6,cadd_arr,%l6 ! (5_1) ltmp0 += (char*)cadd_arr;
+ fmuld %f14,%f16,%f22 ! (2_1) dtmp0 *= x20;
+ faddd %f38,K0,%f38 ! (0_1) dtmp0 += K0;
+
+ lda [%i3]0x82,%l3 ! (0_0) ux0 = *(int*)px;
+ and %o5,-16,%o5 ! (5_1) signx0 &= -16;
+ fdivd %f40,%f2,%f14 ! (5_1) x0 = y0 / x0;
+ faddd %f4,K2,%f40 ! (1_1) dtmp0 += K2;
+
+ fdtos %f20,%f2 ! (5_2) ftmp0 = (float)dtmp0;
+ st %f2,[%o2] ! (5_2) *pz = ftmp0;
+ fmuld %f62,%f62,%f4 ! (4_1) x20 = x0 * x0;
+
+ ldd [cmul_arr+%l7],%f0 ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
+ add %l6,%o5,%o2 ! (5_1) ltmp0 += signx0;
+ and %o4,-8,%o4 ! (5_1) signy0 &= -8;
+ fmuld %f36,%f24,%f36 ! (3_1) dtmp0 *= x20;
+
+ fmuld %f38,%f12,%f12 ! (0_1) x0 = dtmp0 * x0;
+ and %l4,_0x7fffffff,%l7 ! (0_0) ay0 = uy0 & 0x7fffffff;
+ sethi %hi(0x00800000),%o5
+ faddd %f22,K4,%f22 ! (2_1) dtmp0 += K4;
+
+ and %l3,_0x7fffffff,%l6 ! (0_0) ax0 = ux0 & 0x7fffffff;
+ fmuld %f40,%f18,%f38 ! (1_1) dtmp0 *= x20;
+
+ cmp %l7,%o5
+ bl,pn %icc,.up4
+ fmuld K9,%f4,%f40 ! (4_1) dtmp0 = K9 * x20;
+.co4:
+ nop
+ cmp %l6,%o5
+ bl,pn %icc,.up5
+ faddd %f36,K6,%f20 ! (3_1) dtmp0 += K6;
+.co5:
+ ldd [%o2+%o4],%f36 ! (5_1) cadd0 = *(double*)(ltmp0 + signy0);
+ cmp %l7,_0x7f800000
+ bge,pn %icc,.up6
+ fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20;
+.co6:
+ sub %l6,%l7,%o2 ! (0_0) ldiff0 = ax0 - ay0;
+ cmp %l6,_0x7f800000
+ bge,pn %icc,.up7
+
+ fmuld %f0,%f12,%f12 ! (0_1) dtmp0 = cmul0 * x0;
+.co7:
+ sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31;
+ sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py;
+ faddd %f38,K1,%f38 ! (1_1) dtmp0 += K1;
+
+ faddd %f40,K8,%f40 ! (4_1) dtmp0 += K8;
+ and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0;
+ fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20;
+
+ lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0);
+ sll %g1,3,%g1 ! (1_1) cmul0_ind = ldiff0 << 3;
+ sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0
+ faddd %f22,K3,%f22 ! (2_1) dtmp0 += K3;
+
+ lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0);
+ sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5;
+ add %o1,stridez,%o2 ! pz += stridez
+
+ fmuld %f38,%f18,%f38 ! (1_1) dtmp0 *= x20;
+ cmp %o5,_0x7f800000 ! (0_0) b0 ? 0x7f800000
+ bge,pn %icc,.update11 ! (0_0) if ( b0 > 0x7f800000 )
+ faddd %f34,%f12,%f18 ! (0_1) dtmp0 = cadd0 + dtmp0;
+.cont11:
+ fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20;
+ sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27;
+ add %i3,stridex,%i3 ! px += stridex
+ fstod %f0,%f40 ! (0_0) y0 = (double)fy0;
+
+ faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5;
+ add %i1,stridey,%i1 ! py += stridey
+ fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20;
+
+ lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py;
+ sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28;
+ add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr;
+ fstod %f2,%f2 ! (0_0) x0 = (double)fx0;
+.den1:
+ lda [%i3]0x82,%l4 ! (1_0) ux0 = *(int*)px;
+ and %o5,-16,%o5 ! (0_0) signx0 &= -16;
+ faddd %f34,K7,%f34 ! (4_1) dtmp0 += K7;
+
+ fmuld %f12,%f24,%f20 ! (3_1) dtmp0 *= x20;
+ and %o4,-8,%o4 ! (0_0) signy0 &= -8;
+ faddd %f38,K0,%f38 ! (1_1) dtmp0 += K0;
+
+ fdivd %f40,%f2,%f12 ! (0_0) x0 = y0 / x0;
+ faddd %f22,K2,%f40 ! (2_1) dtmp0 += K2;
+
+ fdtos %f18,%f2 ! (0_1) ftmp0 = (float)dtmp0;
+ nop
+ st %f2,[%o1] ! (0_1) *pz = ftmp0
+ fmuld %f14,%f14,%f22 ! (5_1) x20 = x0 * x0;
+
+ ldd [cmul_arr+%g1],%f0 ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
+ add %l6,%o5,%o1 ! (0_0) ltmp0 += signx0;
+ fmuld %f34,%f4,%f34 ! (4_1) dtmp0 *= x20;
+
+ fmuld %f38,%f10,%f10 ! (1_1) x0 = dtmp0 * x0;
+ and %l4,_0x7fffffff,%l6 ! (1_0) ax0 = ux0 & 0x7fffffff;
+ sethi %hi(0x00800000),%o5
+ faddd %f20,K4,%f20 ! (3_1) dtmp0 += K4;
+
+ and %l3,_0x7fffffff,%g1 ! (1_0) ay0 = uy0 & 0x7fffffff;
+ fmuld %f40,%f16,%f38 ! (2_1) dtmp0 *= x20;
+
+ cmp %l6,%o5
+ bl,pn %icc,.up8
+ fmuld K9,%f22,%f40 ! (5_1) dtmp0 = K9 * x20;
+.co8:
+ nop
+ cmp %g1,%o5
+ bl,pn %icc,.up9
+ faddd %f34,K6,%f18 ! (4_1) dtmp0 += K6;
+.co9:
+ ldd [%o1+%o4],%f34 ! (0_0) cadd0 = *(double*)(ltmp0 + signy0);
+ cmp %l6,_0x7f800000
+ bge,pn %icc,.up10
+ fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20;
+.co10:
+ sub %l6,%g1,%o1 ! (1_0) ldiff0 = ax0 - ay0;
+ cmp %g1,_0x7f800000
+ bge,pn %icc,.up11
+
+ fmuld %f0,%f10,%f10 ! (1_1) dtmp0 = cmul0 * x0;
+.co11:
+ sra %o1,31,%g1 ! (1_0) ldiff0 >>= 31;
+ sub %i3,%i1,%l6 ! (1_0) addrc0 = (char*)px - (char*)py;
+ faddd %f38,K1,%f38 ! (2_1) dtmp0 += K1;
+
+ faddd %f40,K8,%f40 ! (5_1) dtmp0 += K8;
+ and %l6,%g1,%o1 ! (1_0) addrc0 &= ldiff0;
+ fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20;
+
+ lda [%i1+%o1]0x82,%f0 ! (1_0) fy0 = *(float*)((char*)py + addrc0);
+ sll %g5,3,%g5 ! (2_1) cmul0_ind = ldiff0 << 3;
+ sub %i3,%o1,%o4 ! (1_0) (char*)px - addrc0;
+ faddd %f20,K3,%f20 ! (3_1) dtmp0 += K3;
+
+ lda [%o4]0x82,%f2 ! (1_0) fx0 = *(float*)((char*)px - addrc0);
+ sll %g1,5,%l6 ! (1_0) ltmp0 = ldiff0 << 5;
+ add %o2,stridez,%o1 ! pz += stridez
+
+ fmuld %f38,%f16,%f38 ! (2_1) dtmp0 *= x20;
+ cmp %o5,_0x7f800000 ! (1_0) b0 ? 0x7f800000
+ bge,pn %icc,.update12 ! (1_0) if ( b0 > 0x7f800000 )
+ faddd %f32,%f10,%f16 ! (1_1) dtmp0 = cadd0 + dtmp0;
+.cont12:
+ fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20;
+ add %i1,stridey,%i1 ! py += stridey
+ nop
+ fstod %f0,%f40 ! (1_0) y0 = (double)fy0;
+
+ faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5;
+ sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27;
+ add %i3,stridex,%i3 ! px += stridex
+ fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20;
+
+ sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28;
+ add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr;
+ fstod %f2,%f2 ! (1_0) x0 = (double)fx0;
+.den2:
+ faddd %f32,K7,%f32 ! (5_1) dtmp0 += K7;
+ and %o5,-16,%o5 ! (1_0) signx0 &= -16;
+ and %o4,-8,%o4 ! (1_0) signy0 &= -8;
+
+ lda [%i1]0x82,%l4 ! (2_0) uy0 = *(int*)py;
+ fmuld %f10,%f4,%f18 ! (4_1) dtmp0 *= x20;
+ faddd %f38,K0,%f38 ! (2_1) dtmp0 += K0;
+
+ lda [%i3]0x82,%l3 ! (2_0) ux0 = *(int*)px;
+ fdivd %f40,%f2,%f10 ! (1_0) x0 = y0 / x0;
+ faddd %f20,K2,%f40 ! (3_1) dtmp0 += K2;
+
+ fdtos %f16,%f2 ! (1_1) ftmp0 = (float)dtmp0;
+ nop
+ st %f2,[%o2] ! (1_1) *pz = ftmp0;
+ fmuld %f12,%f12,%f20 ! (0_0) x20 = x0 * x0;
+
+ ldd [cmul_arr+%g5],%f0 ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
+ add %l6,%o5,%o2 ! (1_0) ltmp0 += signx0;
+ fmuld %f32,%f22,%f32 ! (5_1) dtmp0 *= x20;
+
+ fmuld %f38,%f8,%f8 ! (2_1) x0 = dtmp0 * x0;
+ and %l3,_0x7fffffff,%l6 ! (2_0) ax0 = ux0 & 0x7fffffff;
+ sethi %hi(0x00800000),%o5
+ faddd %f18,K4,%f18 ! (4_1) dtmp0 += K4;
+
+ and %l4,_0x7fffffff,%g5 ! (2_0) ay0 = uy0 & 0x7fffffff;
+ fmuld %f40,%f24,%f38 ! (3_1) dtmp0 *= x20;
+
+ cmp %l6,%o5
+ bl,pn %icc,.up12
+ fmuld K9,%f20,%f40 ! (0_0) dtmp0 = K9 * x20;
+.co12:
+ nop
+ cmp %g5,%o5
+ bl,pn %icc,.up13
+ faddd %f32,K6,%f16 ! (5_1) dtmp0 += K6;
+.co13:
+ ldd [%o2+%o4],%f32 ! (1_0) cadd0 = *(double*)(ltmp0 + signy0);
+ cmp %l6,_0x7f800000
+ bge,pn %icc,.up14
+ fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20;
+.co14:
+ sub %l6,%g5,%o2 ! (2_0) ldiff0 = ax0 - ay0;
+ cmp %g5,_0x7f800000
+ bge,pn %icc,.up15
+
+ fmuld %f0,%f8,%f8 ! (2_1) dtmp0 = cmul0 * x0;
+.co15:
+ sra %o2,31,%g5 ! (2_0) ldiff0 >>= 31;
+ sub %i3,%i1,%l6 ! (2_0) addrc0 = (char*)px - (char*)py;
+ faddd %f38,K1,%f38 ! (3_1) dtmp0 += K1;
+
+ faddd %f40,K8,%f40 ! (0_0) dtmp0 += K8;
+ and %l6,%g5,%o2 ! (2_0) addrc0 &= ldiff0;
+ fmuld %f16,%f22,%f16 ! (5_1) dtmp0 *= x20;
+
+ lda [%i1+%o2]0x82,%f0 ! (2_0) fy0 = *(float*)((char*)py + addrc0);
+ sub %i3,%o2,%o4 ! (2_0) (char*)px - addrc0;
+ add %o1,stridez,%o2 ! pz += stridez
+ faddd %f18,K3,%f18 ! (4_1) dtmp0 += K3;
+
+ lda [%o4]0x82,%f2 ! (2_0) fx0 = *(float*)((char*)px - addrc0);
+ sll %o0,3,%o0 ! (3_1) cmul0_ind = ldiff0 << 3;
+ add %i3,stridex,%i3 ! px += stridex
+
+ fmuld %f38,%f24,%f38 ! (3_1) dtmp0 *= x20;
+ cmp %o5,_0x7f800000 ! (2_0) b0 ? 0x7f800000
+ bge,pn %icc,.update13 ! (2_0) if ( b0 > 0x7f800000 )
+ faddd %f30,%f8,%f24 ! (2_1) dtmp0 = cadd0 + dtmp0;
+.cont13:
+ fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20;
+ sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5;
+ add %i1,stridey,%i1 ! py += stridey
+ fstod %f0,%f40 ! (2_0) y0 = (double)fy0;
+
+ faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5;
+ sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27;
+ fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20;
+
+ fstod %f2,%f2 ! (2_0) x0 = (double)fx0;
+ sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28;
+ add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr;
+.den3:
+ lda [%i1]0x82,%l3 ! (3_0) uy0 = *(int*)py;
+ and %o5,-16,%o5 ! (2_0) signx0 &= -16;
+ faddd %f30,K7,%f30 ! (0_0) dtmp0 += K7;
+
+ lda [%i3]0x82,%l4 ! (3_0) ux0 = *(int*)px;
+ fmuld %f8,%f22,%f16 ! (5_1) dtmp0 *= x20;
+ faddd %f38,K0,%f38 ! (3_1) dtmp0 += K0;
+
+ fdivd %f40,%f2,%f8 ! (2_0) x0 = y0 / x0;
+ faddd %f18,K2,%f40 ! (4_1) dtmp0 += K2;
+
+ fdtos %f24,%f1 ! (2_1) ftmp0 = (float)dtmp0;
+ st %f1,[%o1] ! (2_1) *pz = ftmp0;
+ fmuld %f10,%f10,%f18 ! (1_0) x20 = x0 * x0;
+
+ ldd [cmul_arr+%o0],%f2 ! (3_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
+ add %l6,%o5,%o1 ! (2_0) ltmp0 += signx0;
+ and %o4,-8,%o4 ! (2_0) signy0 &= -8;
+ fmuld %f30,%f20,%f30 ! (0_0) dtmp0 *= x20;
+
+ fmuld %f38,%f6,%f6 ! (3_1) x0 = dtmp0 * x0;
+ and %l4,_0x7fffffff,%l6 ! (3_0) ax0 = ux0 & 0x7fffffff;
+ sethi %hi(0x00800000),%o5
+ faddd %f16,K4,%f24 ! (5_1) dtmp0 += K4;
+
+ and %l3,_0x7fffffff,%o0 ! (3_0) ay0 = uy0 & 0x7fffffff;
+ fmuld %f40,%f4,%f38 ! (4_1) dtmp0 *= x20;
+
+ cmp %l6,%o5
+ bl,pn %icc,.up16
+ fmuld K9,%f18,%f40 ! (1_0) dtmp0 = K9 * x20;
+.co16:
+ nop
+ cmp %o0,%o5
+ bl,pn %icc,.up17
+ faddd %f30,K6,%f16 ! (0_0) dtmp0 += K6;
+.co17:
+ ldd [%o1+%o4],%f30 ! (2_0) cadd0 = *(double*)(ltmp0 + signy0);
+ cmp %l6,_0x7f800000
+ bge,pn %icc,.up18
+ fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20;
+.co18:
+ sub %l6,%o0,%o1 ! (3_0) ldiff0 = ax0 - ay0;
+ cmp %o0,_0x7f800000
+ bge,pn %icc,.up19
+
+ fmuld %f2,%f6,%f6 ! (3_1) dtmp0 = cmul0 * x0;
+.co19:
+ sra %o1,31,%o0 ! (3_0) ldiff0 >>= 31;
+ sub %i3,%i1,%l6 ! (3_0) addrc0 = (char*)px - (char*)py;
+ faddd %f38,K1,%f38 ! (4_1) dtmp0 += K1;
+
+ faddd %f40,K8,%f40 ! (1_0) dtmp0 += K8;
+ and %l6,%o0,%o1 ! (3_0) addrc0 &= ldiff0;
+ fmuld %f16,%f20,%f16 ! (0_0) dtmp0 *= x20;
+
+ lda [%i1+%o1]0x82,%f0 ! (3_0) fy0 = *(float*)((char*)py + addrc0);
+ sub %i3,%o1,%o4 ! (3_0) (char*)px - addrc0;
+ add %o2,stridez,%o1 ! pz += stridez
+ faddd %f24,K3,%f24 ! (5_1) dtmp0 += K3;
+
+ lda [%o4]0x82,%f1 ! (3_0) fx0 = *(float*)((char*)px - addrc0);
+ sll %l5,3,%l5 ! (4_1) cmul0_ind = ldiff0 << 3;
+ add %i3,stridex,%i3 ! px += stridex
+
+ fmuld %f38,%f4,%f38 ! (4_1) dtmp0 *= x20;
+ cmp %o5,_0x7f800000 ! (3_0) b0 ? 0x7f800000
+ bge,pn %icc,.update14 ! (3_0) if ( b0 > 0x7f800000 )
+ faddd %f28,%f6,%f4 ! (3_1) dtmp0 = cadd0 + dtmp0;
+.cont14:
+ fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20;
+ sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5;
+ add %i1,stridey,%i1 ! py += stridey
+ fstod %f0,%f40 ! (3_0) y0 = (double)fy0;
+
+ faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5;
+ sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27;
+ fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20;
+
+ sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28;
+ fstod %f1,%f16 ! (3_0) x0 = (double)fx0;
+.den4:
+ faddd %f28,K7,%f28 ! (1_0) dtmp0 += K7;
+ add %l6,cadd_arr,%l6 ! (3_0) ltmp0 += (char*)cadd_arr;
+ and %o5,-16,%o5 ! (3_0) signx0 &= -16;
+
+ lda [%i1]0x82,%l4 ! (4_0) uy0 = *(int*)py;
+ fmuld %f2,%f20,%f2 ! (0_0) dtmp0 *= x20;
+ faddd %f38,K0,%f38 ! (4_1) dtmp0 += K0;
+
+ lda [%i3]0x82,%l3 ! (4_0) ux0 = *(int*)px;
+ fdivd %f40,%f16,%f6 ! (3_0) x0 = y0 / x0;
+ faddd %f24,K2,%f24 ! (5_1) dtmp0 += K2;
+
+ fdtos %f4,%f1 ! (3_1) ftmp0 = (float)dtmp0;
+ and %o4,-8,%o4 ! (3_0) signy0 &= -8;
+ st %f1,[%o2] ! (3_1) *pz = ftmp0;
+ fmuld %f8,%f8,%f16 ! (2_0) x20 = x0 * x0;
+
+ ldd [cmul_arr+%l5],%f0 ! (4_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
+ add %l6,%o5,%o2 ! (3_0) ltmp0 += signx0;
+ fmuld %f28,%f18,%f28 ! (1_0) dtmp0 *= x20;
+
+ fmuld %f38,%f62,%f62 ! (4_1) x0 = dtmp0 * x0;
+ and %l3,_0x7fffffff,%l6 ! (4_0) ax0 = ux0 & 0x7fffffff;
+ sethi %hi(0x00800000),%o5
+ faddd %f2,K4,%f2 ! (0_0) dtmp0 += K4;
+
+ and %l4,_0x7fffffff,%l5 ! (4_0) ay0 = uy0 & 0x7fffffff;
+ fmuld %f24,%f22,%f38 ! (5_1) dtmp0 *= x20;
+
+ cmp %l6,%o5
+ bl,pn %icc,.up20
+ fmuld K9,%f16,%f40 ! (2_0) dtmp0 = K9 * x20;
+.co20:
+ nop
+ cmp %l5,%o5
+ bl,pn %icc,.up21
+ faddd %f28,K6,%f4 ! (1_0) dtmp0 += K6;
+.co21:
+ ldd [%o2+%o4],%f28 ! (3_0) cadd0 = *(double*)(ltmp0 + signy0);
+ cmp %l6,_0x7f800000
+ bge,pn %icc,.up22
+ fmuld %f2,%f20,%f24 ! (0_0) dtmp0 *= x20;
+.co22:
+ sub %l6,%l5,%o2 ! (4_0) ldiff0 = ax0 - ay0;
+ cmp %l5,_0x7f800000
+ bge,pn %icc,.up23
+
+ fmuld %f0,%f62,%f62 ! (4_1) dtmp0 = cmul0 * x0;
+.co23:
+ sra %o2,31,%l5 ! (4_0) ldiff0 >>= 31;
+ sub %i3,%i1,%l6 ! (4_0) addrc0 = (char*)px - (char*)py;
+ faddd %f38,K1,%f38 ! (5_1) dtmp0 += K1;
+
+ faddd %f40,K8,%f40 ! (2_0) dtmp0 += K8;
+ and %l6,%l5,%o2 ! (4_0) addrc0 &= ldiff0;
+ fmuld %f4,%f18,%f4 ! (1_0) dtmp0 *= x20;
+
+ lda [%i1+%o2]0x82,%f0 ! (4_0) fy0 = *(float*)((char*)py + addrc0);
+ sub %i3,%o2,%o4 ! (4_0) (char*)px - addrc0;
+ add %o1,stridez,%o2 ! pz += stridez
+ faddd %f24,K3,%f24 ! (0_0) dtmp0 += K3;
+
+ lda [%o4]0x82,%f2 ! (4_0) fx0 = *(float*)((char*)px - addrc0);
+ sll %o7,3,%o7 ! (5_1) cmul0_ind = ldiff0 << 3;
+ add %i3,stridex,%i3 ! px += stridex
+
+ fmuld %f38,%f22,%f38 ! (5_1) dtmp0 *= x20;
+ cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000
+ bge,pn %icc,.update15 ! (4_0) if ( b0 > 0x7f800000 )
+ faddd %f26,%f62,%f22 ! (4_1) dtmp0 = cadd0 + dtmp0;
+.cont15:
+ fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20;
+ sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5;
+ add %i1,stridey,%i1 ! py += stridey
+ fstod %f0,%f40 ! (4_0) y0 = (double)fy0;
+
+ faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5;
+ sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27;
+ fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20;
+
+ fstod %f2,%f2 ! (4_0) x0 = (double)fx0;
+ sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28;
+.den5:
+ lda [%i1]0x82,%l3 ! (5_0) uy0 = *(int*)py;
+ subcc counter,6,counter ! counter?
+ add %l6,cadd_arr,%l6 ! (4_0) ltmp0 += (char*)cadd_arr;
+ faddd %f26,K7,%f26 ! (2_0) dtmp0 += K7;
+
+ fmuld %f62,%f18,%f4 ! (1_0) dtmp0 *= x20;
+ and %o5,-16,%o5 ! (4_0) signx0 &= -16;
+ bpos,pt %icc,.main_loop
+ faddd %f38,K0,%f38 ! (5_1) dtmp0 += K0;
+
+.tail:
+ addcc counter,5,counter
+ bneg,a,pn %icc,.begin
+ or %g0,%o1,%o4
+
+ faddd %f24,K2,%f40 ! (0_1) dtmp0 += K2;
+
+ fdtos %f22,%f22 ! (4_2) ftmp0 = (float)dtmp0;
+ st %f22,[%o1] ! (4_2) *pz = ftmp0;
+
+ subcc counter,1,counter
+ bneg,a,pn %icc,.begin
+ or %g0,%o2,%o4
+
+ ldd [cmul_arr+%o7],%f0 ! (5_2) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
+ fmuld %f26,%f16,%f26 ! (2_1) dtmp0 *= x20;
+
+ fmuld %f38,%f14,%f14 ! (5_2) x0 = dtmp0 * x0;
+ faddd %f4,K4,%f4 ! (1_1) dtmp0 += K4;
+
+ fmuld %f40,%f20,%f38 ! (0_1) dtmp0 *= x20;
+
+
+ faddd %f26,K6,%f22 ! (2_1) dtmp0 += K6;
+
+ fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20;
+
+ fmuld %f0,%f14,%f14 ! (5_2) dtmp0 = cmul0 * x0;
+ faddd %f38,K1,%f38 ! (0_1) dtmp0 += K1;
+
+ fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20;
+
+ faddd %f4,K3,%f4 ! (1_1) dtmp0 += K3;
+
+ fmuld %f38,%f20,%f38 ! (0_1) dtmp0 *= x20;
+ faddd %f36,%f14,%f20 ! (5_2) dtmp0 = cadd0 + dtmp0;
+
+ faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5;
+ add %o2,stridez,%o1 ! pz += stridez
+ fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20;
+
+ sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3;
+
+ fmuld %f14,%f16,%f22 ! (2_1) dtmp0 *= x20;
+ faddd %f38,K0,%f38 ! (0_1) dtmp0 += K0;
+
+ faddd %f4,K2,%f40 ! (1_1) dtmp0 += K2;
+
+ fdtos %f20,%f2 ! (5_2) ftmp0 = (float)dtmp0;
+ st %f2,[%o2] ! (5_2) *pz = ftmp0;
+
+ subcc counter,1,counter
+ bneg,a,pn %icc,.begin
+ or %g0,%o1,%o4
+
+ ldd [cmul_arr+%l7],%f0 ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
+
+ fmuld %f38,%f12,%f12 ! (0_1) x0 = dtmp0 * x0;
+ faddd %f22,K4,%f22 ! (2_1) dtmp0 += K4;
+
+ fmuld %f40,%f18,%f38 ! (1_1) dtmp0 *= x20;
+
+ fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20;
+
+ fmuld %f0,%f12,%f12 ! (0_1) dtmp0 = cmul0 * x0;
+ faddd %f38,K1,%f38 ! (1_1) dtmp0 += K1;
+
+ sll %g1,3,%g1 ! (1_1) cmul0_ind = ldiff0 << 3;
+ faddd %f22,K3,%f22 ! (2_1) dtmp0 += K3;
+
+ add %o1,stridez,%o2 ! pz += stridez
+
+ fmuld %f38,%f18,%f38 ! (1_1) dtmp0 *= x20;
+ faddd %f34,%f12,%f18 ! (0_1) dtmp0 = cadd0 + dtmp0;
+
+ fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20;
+
+ faddd %f38,K0,%f38 ! (1_1) dtmp0 += K0;
+
+ faddd %f22,K2,%f40 ! (2_1) dtmp0 += K2;
+
+ fdtos %f18,%f2 ! (0_1) ftmp0 = (float)dtmp0;
+ st %f2,[%o1] ! (0_1) *pz = ftmp0
+
+ subcc counter,1,counter
+ bneg,a,pn %icc,.begin
+ or %g0,%o2,%o4
+
+ ldd [cmul_arr+%g1],%f0 ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
+
+ fmuld %f38,%f10,%f10 ! (1_1) x0 = dtmp0 * x0;
+
+ fmuld %f40,%f16,%f38 ! (2_1) dtmp0 *= x20;
+
+ fmuld %f0,%f10,%f10 ! (1_1) dtmp0 = cmul0 * x0;
+ faddd %f38,K1,%f38 ! (2_1) dtmp0 += K1;
+
+ sll %g5,3,%g5 ! (2_1) cmul0_ind = ldiff0 << 3;
+
+ add %o2,stridez,%o1 ! pz += stridez
+
+ fmuld %f38,%f16,%f38 ! (2_1) dtmp0 *= x20;
+ faddd %f32,%f10,%f16 ! (1_1) dtmp0 = cadd0 + dtmp0;
+
+ faddd %f38,K0,%f38 ! (2_1) dtmp0 += K0;
+
+ fdtos %f16,%f2 ! (1_1) ftmp0 = (float)dtmp0;
+ st %f2,[%o2] ! (1_1) *pz = ftmp0;
+
+ subcc counter,1,counter
+ bneg,a,pn %icc,.begin
+ or %g0,%o1,%o4
+
+ ldd [cmul_arr+%g5],%f0 ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
+
+ fmuld %f38,%f8,%f8 ! (2_1) x0 = dtmp0 * x0;
+
+ fmuld %f0,%f8,%f8 ! (2_1) dtmp0 = cmul0 * x0;
+
+ add %o1,stridez,%o2 ! pz += stridez
+
+ faddd %f30,%f8,%f24 ! (2_1) dtmp0 = cadd0 + dtmp0;
+
+ fdtos %f24,%f1 ! (2_1) ftmp0 = (float)dtmp0;
+ st %f1,[%o1] ! (2_1) *pz = ftmp0;
+
+ ba .begin
+ or %g0,%o2,%o4
+
+ .align 16
+.spec0:
+ cmp %l6,_0x7f800000 ! ax0 ? 0x7f800000
+ bg 2f ! if ( ax0 >= 0x7f800000 )
+ srl %l3,30,%l3 ! signx0 = (unsigned)ux0 >> 30;
+
+ cmp %l7,_0x7f800000 ! ay0 ? 0x7f800000
+ bg 2f ! if ( ay0 >= 0x7f800000 )
+ and %l3,2,%l3 ! signx0 &= 2;
+
+ sra %l4,31,%l4 ! signy0 = uy0 >> 31;
+ bne,a 1f ! if (ay0 != 0x7f800000)
+ add %l3,%l3,%l3 ! signx0 += signx0;
+
+ cmp %l6,_0x7f800000 ! ax0 ? 0x7f800000
+ bne,a 1f ! if ( ax0 != 0x7f800000 )
+ add %g0,2,%l3 ! signx0 = 2
+
+ add %l3,1,%l3 ! signx0 ++;
+1:
+ sll %l4,3,%l4 ! signy0 <<= 3;
+ st %l3,[%fp+tmp_pz] ! STORE signx0
+
+ ldd [cmul_arr+88],%f0 ! LOAD M_PI_4
+
+ ld [%fp+tmp_pz],%f2 ! LOAD signx0
+
+ ldd [cmul_arr+%l4],%f4 ! dtmp0 = *(double*)((char*)(cmul_arr + 1) + signy0);
+
+ add %i1,stridey,%i1 ! py += stridey;
+ fitod %f2,%f2 ! dtmp1 = (double)signx0;
+
+ add %i3,stridex,%i3 ! px += stridex;
+
+ fmuld %f2,%f0,%f0 ! res = signx0 * M_PI_4;
+
+ fmuld %f0,%f4,%f0 ! res *= dtmp0;
+ fdtos %f0,%f0 ! ftmp0 = (float) res;
+ st %f0,[%o4] ! *pz = ftmp0;
+
+ ba .begin1
+ add %o4,stridez,%o4 ! pz += stridez;
+2:
+ std %l6,[%fp+tmp_pz] ! *(float*)&ax0, *(float*)&ay0
+ ldd [%fp+tmp_pz],%f0 ! *(float*)&ax0, *(float*)&ay0
+
+ add %i1,stridey,%i1 ! py += stridey;
+
+ fmuls %f0,%f1,%f0 ! ftmp0 = *(float*)&ax0 * *(float*)&ay0;
+ add %i3,stridex,%i3 ! pz += stridex;
+ st %f0,[%o4] ! *pz = ftmp0;
+
+ ba .begin1
+ add %o4,stridez,%o4 ! pz += stridez;
+
+ .align 16
+.spec1:
+ cmp %l6,0
+ bne,pn %icc,1f
+ nop
+
+ cmp %l7,0
+ bne,pn %icc,1f
+ nop
+
+ sra %l4,28,%l4 ! signy0 = uy0 >> 28;
+
+ sra %l3,27,%l3 ! signx0 = ux0 >> 27;
+ and %l4,-8,%l4 ! signy0 &= -8;
+
+ sra %o2,31,%o2 ! ldiff0 >>= 31;
+ and %l3,-16,%l3 ! signx0 &= -16;
+
+ sll %o2,5,%o2 ! ldiff0 <<= 5;
+ add %l4,%l3,%l3 ! signx0 += signy0;
+
+ add %o2,%l3,%l3 ! signx0 += ldiff0;
+ add %i1,stridey,%i1 ! py += stridey;
+
+ ldd [cadd_arr+%l3],%f0 ! res = *(double*)((char*)(cadd_arr + 7) + signx0);
+ add %i3,stridex,%i3 ! px += stridex;
+
+ fdtos %f0,%f0 ! ftmp0 = (float) res;
+ st %f0,[%o4] ! *pz = ftmp0;
+
+ ba .begin1
+ add %o4,stridez,%o4 ! pz += stridez;
+1:
+ stx %o4,[%fp+tmp_pz]
+ sra %o2,31,%l7 ! (0_0) ldiff0 >>= 31;
+ sub %i3,%i1,%l6 ! (0_0) addrc0 = (char*)px - (char*)py;
+
+ and %l6,%l7,%o2 ! (0_0) addrc0 &= ldiff0;
+
+ lda [%i1+%o2]0x82,%f0 ! (0_0) fy0 = *(float*)((char*)py + addrc0);
+ sub %i3,%o2,%o4 ! (0_0) (char*)px - addrc0
+
+ lda [%i1+%o2]0x82,%l5 ! (0_0) fy0 = *(float*)((char*)py + addrc0);
+
+ lda [%o4]0x82,%f2 ! (0_0) fx0 = *(float*)((char*)px - addrc0);
+ sll %l7,5,%l6 ! (0_0) ltmp0 = ldiff0 << 5;
+
+ lda [%o4]0x82,%g5 ! (0_0) fx0 = *(float*)((char*)px - addrc0);
+
+ sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27;
+ add %i1,stridey,%i1 ! py += stridey
+
+ add %i3,stridex,%i3 ! px += stridex
+
+ lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py;
+ sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28;
+
+ add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr;
+
+ and %l5,_0x7fffffff,%l4
+ sethi %hi(0x00800000),%g1
+
+ cmp %l4,%g1
+ bge,a %icc,1f
+ fstod %f0,%f40 ! (0_0) y0 = (double)fy0;
+
+ fabss %f0,%f0 ! fy0 = fabsf(fy0);
+ ldd [cmul_arr+96],%f40
+ sra %l5,28,%l4 ! itmp0 >>= 28;
+
+ and %l4,-8,%l4
+ fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0;
+
+ fmuld %f40,%f0,%f40 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%l4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f40,%f0,%f40 ! dtmp0 *= dsign;
+1:
+ and %g5,_0x7fffffff,%l4
+ cmp %l4,%g1
+ bge,a %icc,.spec1_cont
+ fstod %f2,%f2 ! (0_0) x0 = (double)fx0;
+
+ fabss %f2,%f2 ! fx0 = fabsf(fx0);
+ ldd [cmul_arr+96],%f0 ! LOAD C2ONM149
+ sra %g5,28,%l4 ! itmp0 >>= 28;
+
+ and %l4,-8,%l4 ! itmp0 = -8;
+ fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0;
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%l4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ ba .spec1_cont
+ fmuld %f2,%f0,%f2 ! dtmp0 *= dsign;
+
+ .align 16
+.update0:
+ cmp counter,0
+ bg,pn %icc,1f
+ nop
+
+ ld [cmul_arr],%f2
+ ba .cont0
+ fzero %f0
+1:
+ cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000
+ bg,pt %icc,1f
+ nop
+2:
+ sub counter,0,counter
+ st counter,[%fp+tmp_counter]
+ stx %i1,[%fp+tmp_py]
+ stx %i3,[%fp+tmp_px]
+
+ ld [cmul_arr],%f2
+ or %g0,0,counter
+ ba .cont0
+ fzero %f0
+1:
+ andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ bne,pn %icc,1f
+ sethi %hi(0x00800000),%o5
+
+ andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ be,pn %icc,2b
+ nop
+1:
+ st %f0,[%fp+tmp_px]
+ st %f2,[%fp+tmp_px+4]
+ ld [%fp+tmp_px],%o4
+
+ and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff
+ cmp %l5,%o5
+ bge,a 1f
+ fstod %f0,%f40 ! (0_0) y0 = (double)fy0;
+
+ ldd [cmul_arr+96],%f40 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f0,%f0 ! fy0 = fabsf(fy0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0;
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= dsign;
+1:
+ add %i3,stridex,%i3 ! px += stridex
+ add %i1,stridey,%i1 ! py += stridey
+
+ ld [%fp+tmp_px+4],%o4
+ and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff
+ cmp %l5,%o5
+ bge,a 1f
+ fstod %f2,%f2 ! (5_1) x0 = (double)fx0;
+
+ ldd [cmul_arr+96],%f0 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f2,%f2 ! fx0 = fabsf(fx0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0;
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= dsign;
+1:
+ sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27;
+
+ sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28;
+ ba .d0
+ add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr;
+
+ .align 16
+.update1:
+ cmp counter,1
+ bg,pn %icc,1f
+ nop
+
+ fzero %f0
+ ba .cont1
+ ld [cmul_arr],%f2
+1:
+ cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000
+ bg,pt %icc,1f
+ nop
+2:
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+ stx %i1,[%fp+tmp_py]
+ stx %i3,[%fp+tmp_px]
+
+ ld [cmul_arr],%f2
+ or %g0,1,counter
+ ba .cont1
+ fzero %f0
+1:
+ andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ bne,pn %icc,1f
+ sethi %hi(0x00800000),%o5
+
+ andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ be,pn %icc,2b
+ nop
+1:
+ st %f0,[%fp+tmp_px]
+ st %f2,[%fp+tmp_px+4]
+ ld [%fp+tmp_px],%o4
+ fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20;
+
+ and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff
+ cmp %l6,%o5
+ bge,a 1f
+ fstod %f0,%f40 ! (0_0) y0 = (double)fy0;
+
+ ldd [cmul_arr+96],%f40 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f0,%f0 ! fy0 = fabsf(fy0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0;
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= dsign;
+1:
+
+ add %i1,stridey,%i1 ! py += stridey
+
+ ld [%fp+tmp_px+4],%o4
+ and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff
+ cmp %l6,%o5
+ bge,a 1f
+ fstod %f2,%f2 ! (5_1) x0 = (double)fx0;
+
+ ldd [cmul_arr+96],%f0 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f2,%f2 ! fx0 = fabsf(fx0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0;
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= dsign;
+1:
+ sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5;
+ sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27;
+ add %i3,stridex,%i3 ! px += stridex
+
+ sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28;
+ ba .d1
+ add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr;
+
+ .align 16
+.update2:
+ cmp counter,2
+ bg,pn %icc,1f
+ nop
+
+ ld [cmul_arr],%f1
+ ba .cont2
+ fzeros %f0
+1:
+ cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000
+ bg,pt %icc,1f
+ nop
+2:
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+ stx %i1,[%fp+tmp_py]
+ stx %i3,[%fp+tmp_px]
+
+ ld [cmul_arr],%f1
+ or %g0,2,counter
+ ba .cont2
+ fzeros %f0
+1:
+ andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ bne,pn %icc,1f
+ sethi %hi(0x00800000),%o5
+
+ andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ be,pn %icc,2b
+ nop
+1:
+ std %f0,[%fp+tmp_px]
+ ld [%fp+tmp_px],%o4
+ fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20;
+
+ faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5;
+
+ and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff
+ cmp %l6,%o5
+ bge,a 1f
+ fstod %f0,%f40 ! (0_0) y0 = (double)fy0;
+
+ ldd [cmul_arr+96],%f40 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f0,%f0 ! fy0 = fabsf(fy0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f0,%f16 ! dtmp0 = (double) *(int*)&fy0;
+
+ fmuld %f16,%f40,%f40 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f16 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f16,%f40,%f40 ! dtmp0 *= dsign;
+1:
+ add %i1,stridey,%i1 ! py += stridey
+
+ ld [%fp+tmp_px+4],%o4
+ and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff
+ cmp %l6,%o5
+ bge,a 1f
+ fstod %f1,%f16 ! (5_1) x0 = (double)fx0;
+
+ fabss %f1,%f16 ! fx0 = fabsf(fx0);
+ ldd [cmul_arr+96],%f0 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f16,%f16 ! dtmp0 = (double) *(int*)&fx0;
+
+ fmuld %f16,%f0,%f16 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f16,%f0,%f16 ! dtmp0 *= dsign;
+1:
+ sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5;
+ sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27;
+
+ add %i3,stridex,%i3 ! px += stridex
+ ba .d2
+ sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28;
+
+ .align 16
+.update3:
+ cmp counter,3
+ bg,pn %icc,1f
+ nop
+
+ fzero %f0
+ ba .cont3
+ ld [cmul_arr],%f2
+1:
+ cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000
+ bg,pt %icc,1f
+ nop
+2:
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+ stx %i1,[%fp+tmp_py]
+ stx %i3,[%fp+tmp_px]
+
+ ld [cmul_arr],%f2
+ or %g0,3,counter
+ ba .cont3
+ fzero %f0
+1:
+ andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ bne,pn %icc,1f
+ sethi %hi(0x00800000),%o5
+
+ andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ be,pn %icc,2b
+ nop
+1:
+ st %f0,[%fp+tmp_px]
+ st %f2,[%fp+tmp_px+4]
+ ld [%fp+tmp_px],%o4
+ fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20;
+
+ and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff
+ cmp %l6,%o5
+ bge,a 1f
+ fstod %f0,%f40 ! (0_0) y0 = (double)fy0;
+
+ ldd [cmul_arr+96],%f40 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f0,%f0 ! fy0 = fabsf(fy0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0;
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= dsign;
+1:
+ add %i1,stridey,%i1 ! py += stridey
+ faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5;
+ fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20;
+
+ ld [%fp+tmp_px+4],%o4
+ and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff
+ cmp %l6,%o5
+ bge,a 1f
+ fstod %f2,%f2 ! (5_1) x0 = (double)fx0;
+
+ fabss %f2,%f2 ! fx0 = fabsf(fx0);
+ ldd [cmul_arr+96],%f0 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0;
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= dsign;
+1:
+ sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5;
+ sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27;
+
+ add %i3,stridex,%i3 ! px += stridex
+ ba .d3
+ sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28;
+
+ .align 16
+.update4:
+ cmp counter,4
+ bg,pn %icc,1f
+ nop
+
+ ld [cmul_arr],%f1
+ ba .cont4
+ fzeros %f0
+1:
+ cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000
+ bg,pt %icc,1f
+ nop
+2:
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+ stx %i1,[%fp+tmp_py]
+ stx %i3,[%fp+tmp_px]
+
+ ld [cmul_arr],%f1
+ or %g0,4,counter
+ ba .cont4
+ fzeros %f0
+1:
+ andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ bne,pn %icc,1f
+ sethi %hi(0x00800000),%o5
+
+ andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ be,pn %icc,2b
+ nop
+1:
+ std %f0,[%fp+tmp_px]
+ ld [%fp+tmp_px],%o4
+ fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20;
+
+ and %o4,_0x7fffffff,%o1 ! itmp0 & 0x7fffffff
+ cmp %o1,%o5
+ bge,a 1f
+ fstod %f0,%f40 ! (0_0) y0 = (double)fy0;
+
+ ldd [cmul_arr+96],%f40 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f0,%f0 ! fy0 = fabsf(fy0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f0,%f14 ! dtmp0 = (double) *(int*)&fy0;
+
+ fmuld %f14,%f40,%f40 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f14 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f14,%f40,%f40 ! dtmp0 *= dsign;
+1:
+ faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5;
+ fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20;
+
+ ld [%fp+tmp_px+4],%o4
+ and %o4,_0x7fffffff,%o1 ! itmp0 & 0x7fffffff
+ cmp %o1,%o5
+ bge,a 1f
+ fstod %f1,%f2 ! (5_1) x0 = (double)fx0;
+
+ fabss %f1,%f22 ! fx0 = fabsf(fx0);
+ ldd [cmul_arr+96],%f0 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f22,%f22 ! dtmp0 = (double) *(int*)&fx0;
+
+ fmuld %f22,%f0,%f22 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f22,%f0,%f2 ! dtmp0 *= dsign;
+1:
+ sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3;
+ ba .d4
+ add %i3,stridex,%i3 ! px += stridex
+
+ .align 16
+.update5:
+ cmp counter,5
+ bg,pn %icc,1f
+ nop
+
+ ld [cmul_arr],%f2
+ ba .cont5
+ fzero %f0
+1:
+ cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000
+ bg,pt %icc,1f
+ nop
+2:
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+ stx %i1,[%fp+tmp_py]
+ stx %i3,[%fp+tmp_px]
+
+ ld [cmul_arr],%f2
+ or %g0,5,counter
+ ba .cont5
+ fzero %f0
+1:
+ andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ bne,pn %icc,1f
+ sethi %hi(0x00800000),%o5
+
+ andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ be,pn %icc,2b
+ nop
+1:
+ st %f0,[%fp+tmp_px]
+ st %f2,[%fp+tmp_px+4]
+ ld [%fp+tmp_px],%o4
+ fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20;
+
+ stx %l5,[%fp+tmp_py]
+ and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff
+ cmp %l5,%o5
+ bge,a 1f
+ fstod %f0,%f40 ! (0_0) y0 = (double)fy0;
+
+ ldd [cmul_arr+96],%f40 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f0,%f0 ! fy0 = fabsf(fy0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0;
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= dsign;
+1:
+ faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5;
+ add %i1,stridey,%i1 ! py += stridey
+ fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20;
+
+ ld [%fp+tmp_px+4],%o4
+ and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff
+ cmp %l5,%o5
+ bge,a 1f
+ fstod %f2,%f2 ! (5_1) x0 = (double)fx0;
+
+ ldd [cmul_arr+96],%f0 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f2,%f2 ! fx0 = fabsf(fx0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0;
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= dsign;
+1:
+ ldx [%fp+tmp_py],%l5
+ sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27;
+ add %i3,stridex,%i3 ! px += stridex
+
+ lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py;
+ sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28;
+ ba .d5
+ add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr;
+
+ .align 16
+.update6:
+ cmp counter,5
+ bg,pn %icc,1f
+ nop
+
+ ld [cmul_arr],%f2
+ ba .cont6
+ fzero %f0
+1:
+ cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000
+ bg,pt %icc,1f
+ nop
+2:
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+ stx %i1,[%fp+tmp_py]
+ stx %i3,[%fp+tmp_px]
+
+ ld [cmul_arr],%f2
+ or %g0,5,counter
+ ba .cont6
+ fzero %f0
+1:
+ andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ bne,pn %icc,1f
+ sethi %hi(0x00800000),%o5
+
+ andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ be,pn %icc,2b
+ nop
+1:
+ st %f0,[%fp+tmp_pz]
+ st %f2,[%fp+tmp_pz+4]
+ ld [%fp+tmp_pz],%o4
+ fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20;
+
+ stx %l5,[%fp+tmp_px]
+ and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff
+ cmp %l5,%o5
+ bge,a 1f
+ fstod %f0,%f40 ! (0_0) y0 = (double)fy0;
+
+ ldd [cmul_arr+96],%f40 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f0,%f0 ! fy0 = fabsf(fy0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0;
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= dsign;
+1:
+ faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5;
+ add %i3,stridex,%i3 ! px += stridex
+ add %i1,stridey,%i1 ! py += stridey
+ fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20;
+
+ ld [%fp+tmp_pz+4],%o4
+ and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff
+ cmp %l5,%o5
+ bge,a 1f
+ fstod %f2,%f2 ! (5_1) x0 = (double)fx0;
+
+ ldd [cmul_arr+96],%f0 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f2,%f2 ! fx0 = fabsf(fx0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0;
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= dsign;
+1:
+ ldx [%fp+tmp_px],%l5
+
+ sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27;
+
+ sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28;
+ ba .d6
+ add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr;
+
+ .align 16
+.update7:
+ cmp counter,5
+ bg,pn %icc,1f
+ nop
+
+ ld [cmul_arr],%f2
+ ba .cont7
+ fzero %f0
+1:
+ cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000
+ bg,pt %icc,1f
+ nop
+2:
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+ stx %i1,[%fp+tmp_py]
+ stx %i3,[%fp+tmp_px]
+
+ ld [cmul_arr],%f2
+ or %g0,5,counter
+ ba .cont7
+ fzero %f0
+1:
+ andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ bne,pn %icc,1f
+ sethi %hi(0x00800000),%o5
+
+ andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ be,pn %icc,2b
+ nop
+1:
+ st %f0,[%fp+tmp_pz]
+ st %f2,[%fp+tmp_pz+4]
+ ld [%fp+tmp_pz],%o4
+ fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20;
+
+ and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff
+ cmp %l6,%o5
+ bge,a 1f
+ fstod %f0,%f40 ! (0_0) y0 = (double)fy0;
+
+ ldd [cmul_arr+96],%f40 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f0,%f0 ! fy0 = fabsf(fy0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0;
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= dsign;
+1:
+ faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5;
+ add %i1,stridey,%i1 ! py += stridey
+ fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20;
+
+ ld [%fp+tmp_pz+4],%o4
+ and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff
+ cmp %l6,%o5
+ bge,a 1f
+ fstod %f2,%f2 ! (5_1) x0 = (double)fx0;
+
+ ldd [cmul_arr+96],%f0 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f2,%f2 ! fx0 = fabsf(fx0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0;
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= dsign;
+1:
+ sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5;
+ sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27;
+ add %i3,stridex,%i3 ! px += stridex
+
+ sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28;
+ ba .d7
+ add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr;
+
+ .align 16
+.update8:
+ cmp counter,5
+ bg,pn %icc,1f
+ nop
+
+ ld [cmul_arr],%f1
+ ba .cont8
+ fzeros %f0
+1:
+ cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000
+ bg,pt %icc,1f
+ nop
+2:
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+ stx %i1,[%fp+tmp_py]
+ stx %i3,[%fp+tmp_px]
+
+ ld [cmul_arr],%f1
+ or %g0,5,counter
+ ba .cont8
+ fzeros %f0
+1:
+ andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ bne,pn %icc,1f
+ sethi %hi(0x00800000),%o5
+
+ andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ be,pn %icc,2b
+ nop
+1:
+ std %f0,[%fp+tmp_pz]
+ ld [%fp+tmp_pz],%o4
+ fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20;
+
+ faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5;
+
+ and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff
+ cmp %l6,%o5
+ bge,a 1f
+ fstod %f0,%f40 ! (0_0) y0 = (double)fy0;
+
+ ldd [cmul_arr+96],%f40 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f0,%f0 ! fy0 = fabsf(fy0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f0,%f16 ! dtmp0 = (double) *(int*)&fy0;
+
+ fmuld %f16,%f40,%f40 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f16 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f16,%f40,%f40 ! dtmp0 *= dsign;
+1:
+ add %i1,stridey,%i1 ! py += stridey
+ fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20;
+
+ ld [%fp+tmp_pz+4],%o4
+ and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff
+ cmp %l6,%o5
+ bge,a 1f
+ fstod %f1,%f16 ! (5_1) x0 = (double)fx0;
+
+ fabss %f1,%f16 ! fx0 = fabsf(fx0);
+ ldd [cmul_arr+96],%f0 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f16,%f16 ! dtmp0 = (double) *(int*)&fx0;
+
+ fmuld %f16,%f0,%f16 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f16,%f0,%f16 ! dtmp0 *= dsign;
+1:
+ sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5;
+ sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27;
+
+ add %i3,stridex,%i3 ! px += stridex
+ ba .d8
+ sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28;
+
+ .align 16
+.update9:
+ cmp counter,5
+ bg,pn %icc,1f
+ nop
+
+ ld [cmul_arr],%f2
+ ba .cont9
+ fzero %f0
+1:
+ cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000
+ bg,pt %icc,1f
+ nop
+2:
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+ stx %i1,[%fp+tmp_py]
+ stx %i3,[%fp+tmp_px]
+
+ ld [cmul_arr],%f2
+ or %g0,5,counter
+ ba .cont9
+ fzero %f0
+1:
+ andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ bne,pn %icc,1f
+ sethi %hi(0x00800000),%o5
+
+ andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ be,pn %icc,2b
+ nop
+1:
+ st %f0,[%fp+tmp_pz]
+ st %f2,[%fp+tmp_pz+4]
+ ld [%fp+tmp_pz],%o4
+ fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20;
+
+ and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff
+ cmp %l6,%o5
+ bge,a 1f
+ fstod %f0,%f40 ! (0_0) y0 = (double)fy0;
+
+ ldd [cmul_arr+96],%f40 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f0,%f0 ! fy0 = fabsf(fy0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0;
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= dsign;
+1:
+ add %i1,stridey,%i1 ! py += stridey
+ faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5;
+ fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20;
+
+ ld [%fp+tmp_pz+4],%o4
+ and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff
+ cmp %l6,%o5
+ bge,a 1f
+ fstod %f2,%f2 ! (5_1) x0 = (double)fx0;
+
+ fabss %f2,%f2 ! fx0 = fabsf(fx0);
+ ldd [cmul_arr+96],%f0 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0;
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= dsign;
+1:
+ sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5;
+ sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27;
+
+ add %i3,stridex,%i3 ! px += stridex
+ ba .d9
+ sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28;
+
+ .align 16
+.update10:
+ cmp counter,1
+ bg,pn %icc,1f
+ nop
+
+ ld [cmul_arr],%f2
+ ba .cont10
+ fzero %f0
+1:
+ cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000
+ bg,pt %icc,1f
+ nop
+2:
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+ stx %i1,[%fp+tmp_py]
+ stx %i3,[%fp+tmp_px]
+
+ ld [cmul_arr],%f2
+ or %g0,1,counter
+ ba .cont10
+ fzero %f0
+1:
+ andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ bne,pn %icc,1f
+ sethi %hi(0x00800000),%o5
+
+ andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ be,pn %icc,2b
+ nop
+1:
+ st %f0,[%fp+tmp_pz]
+ st %f2,[%fp+tmp_pz+4]
+ ld [%fp+tmp_pz],%o1
+ fmuld %f40,%f24,%f36 ! (3_1) dtmp0 *= x20;
+
+ and %o1,_0x7fffffff,%o4 ! itmp0 & 0x7fffffff
+ cmp %o4,%o5
+ bge,a 1f
+ fstod %f0,%f40 ! (5_1) y0 = (double)fy0;
+
+ ldd [cmul_arr+96],%f40 ! LOAD C2ONM149
+ sra %o1,28,%o1 ! itmp0 >>= 28;
+ fabss %f0,%f0 ! fy0 = fabsf(fy0);
+
+ and %o1,-8,%o1 ! itmp0 = -8;
+ fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0;
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o1],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= dsign;
+1:
+ faddd %f22,K5,%f14 ! (2_1) dtmp0 += K5;
+ fmuld %f4,%f18,%f4 ! (1_1) dtmp0 *= x20;
+
+ sll %l7,3,%l7 ! (0_1) cmul0_ind = ldiff0 << 3;
+ add %i3,stridex,%i3 ! px += stridex
+
+ ld [%fp+tmp_pz+4],%o1
+ and %o1,_0x7fffffff,%o4 ! itmp0 & 0x7fffffff
+ cmp %o4,%o5
+ bge,a 1f
+ fstod %f2,%f2 ! (5_1) x0 = (double)fx0;
+
+ ldd [cmul_arr+96],%f0 ! LOAD C2ONM149
+ sra %o1,28,%o1 ! itmp0 >>= 28;
+ fabss %f2,%f2 ! fx0 = fabsf(fx0);
+
+ and %o1,-8,%o1 ! itmp0 = -8;
+ fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0;
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o1],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= dsign;
+1:
+ ba .den0
+ add %o2,stridez,%o1 ! pz += stridez
+
+ .align 16
+.update11:
+ cmp counter,2
+ bg,pn %icc,1f
+ nop
+
+ ld [cmul_arr],%f2
+ ba .cont11
+ fzero %f0
+1:
+ cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000
+ bg,pt %icc,1f
+ nop
+2:
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+ stx %i1,[%fp+tmp_py]
+ stx %i3,[%fp+tmp_px]
+
+ ld [cmul_arr],%f2
+ or %g0,2,counter
+ ba .cont11
+ fzero %f0
+1:
+ andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ bne,pn %icc,1f
+ sethi %hi(0x00800000),%o5
+
+ andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ be,pn %icc,2b
+ nop
+1:
+ st %f0,[%fp+tmp_pz]
+ st %f2,[%fp+tmp_pz+4]
+ ld [%fp+tmp_pz],%o4
+ fmuld %f40,%f4,%f34 ! (4_1) dtmp0 *= x20;
+
+ stx %l5,[%fp+tmp_px]
+ and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff
+ cmp %l5,%o5
+ bge,a 1f
+ fstod %f0,%f40 ! (0_0) y0 = (double)fy0;
+
+ ldd [cmul_arr+96],%f40 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f0,%f0 ! fy0 = fabsf(fy0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0;
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= dsign;
+1:
+ faddd %f20,K5,%f12 ! (3_1) dtmp0 += K5;
+ add %i1,stridey,%i1 ! py += stridey
+ fmuld %f22,%f16,%f22 ! (2_1) dtmp0 *= x20;
+
+ ld [%fp+tmp_pz+4],%o4
+ and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff
+ cmp %l5,%o5
+ bge,a 1f
+ fstod %f2,%f2 ! (5_1) x0 = (double)fx0;
+
+ ldd [cmul_arr+96],%f0 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f2,%f2 ! fx0 = fabsf(fx0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0;
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= dsign;
+1:
+ ldx [%fp+tmp_px],%l5
+ sra %l3,27,%o5 ! (0_0) signx0 = ux0 >> 27;
+ add %i3,stridex,%i3 ! px += stridex
+
+ lda [%i1]0x82,%l3 ! (1_0) uy0 = *(int*)py;
+ sra %l4,28,%o4 ! (0_0) signy0 = uy0 >> 28;
+ ba .den1
+ add %l6,cadd_arr,%l6 ! (0_0) ltmp0 += (char*)cadd_arr;
+
+ .align 16
+.update12:
+ cmp counter,3
+ bg,pn %icc,1f
+ nop
+
+ ld [cmul_arr],%f2
+ ba .cont12
+ fzero %f0
+1:
+ cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000
+ bg,pt %icc,1f
+ nop
+2:
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+ stx %i1,[%fp+tmp_py]
+ stx %i3,[%fp+tmp_px]
+
+ ld [cmul_arr],%f2
+ or %g0,3,counter
+ ba .cont12
+ fzero %f0
+1:
+ andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ bne,pn %icc,1f
+ sethi %hi(0x00800000),%o5
+
+ andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ be,pn %icc,2b
+ nop
+1:
+ st %f0,[%fp+tmp_pz]
+ st %f2,[%fp+tmp_pz+4]
+ ld [%fp+tmp_pz],%o4
+ fmuld %f40,%f22,%f32 ! (5_1) dtmp0 *= x20;
+
+ stx %l5,[%fp+tmp_px]
+ and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff
+ cmp %l5,%o5
+ bge,a 1f
+ fstod %f0,%f40 ! (0_0) y0 = (double)fy0;
+
+ ldd [cmul_arr+96],%f40 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f0,%f0 ! fy0 = fabsf(fy0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0;
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= dsign;
+1:
+ faddd %f18,K5,%f10 ! (4_1) dtmp0 += K5;
+ add %i3,stridex,%i3 ! px += stridex
+ add %i1,stridey,%i1 ! py += stridey
+ fmuld %f20,%f24,%f20 ! (3_1) dtmp0 *= x20;
+
+ ld [%fp+tmp_pz+4],%o4
+ and %o4,_0x7fffffff,%l5 ! itmp0 & 0x7fffffff
+ cmp %l5,%o5
+ bge,a 1f
+ fstod %f2,%f2 ! (5_1) x0 = (double)fx0;
+
+ ldd [cmul_arr+96],%f0 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f2,%f2 ! fx0 = fabsf(fx0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0;
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= dsign;
+1:
+ ldx [%fp+tmp_px],%l5
+
+ sra %l4,27,%o5 ! (1_0) signx0 = ux0 >> 27;
+
+ sra %l3,28,%o4 ! (1_0) signy0 = uy0 >> 28;
+ ba .den2
+ add %l6,cadd_arr,%l6 ! (1_0) ltmp0 += (char*)cadd_arr;
+
+ .align 16
+.update13:
+ cmp counter,4
+ bg,pn %icc,1f
+ nop
+
+ ld [cmul_arr],%f2
+ ba .cont13
+ fzero %f0
+1:
+ cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000
+ bg,pt %icc,1f
+ nop
+2:
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+ stx %i1,[%fp+tmp_py]
+ sub %i3,stridex,%o5
+ stx %o5,[%fp+tmp_px]
+
+ ld [cmul_arr],%f2
+ or %g0,4,counter
+ ba .cont13
+ fzero %f0
+1:
+ andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ bne,pn %icc,1f
+ sethi %hi(0x00800000),%o5
+
+ andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ be,pn %icc,2b
+ nop
+1:
+ st %f0,[%fp+tmp_pz]
+ st %f2,[%fp+tmp_pz+4]
+ ld [%fp+tmp_pz],%o4
+ fmuld %f40,%f20,%f30 ! (0_0) dtmp0 *= x20;
+
+ and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff
+ cmp %l6,%o5
+ bge,a 1f
+ fstod %f0,%f40 ! (0_0) y0 = (double)fy0;
+
+ ldd [cmul_arr+96],%f40 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f0,%f0 ! fy0 = fabsf(fy0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0;
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= dsign;
+1:
+ faddd %f16,K5,%f8 ! (5_1) dtmp0 += K5;
+ add %i1,stridey,%i1 ! py += stridey
+ fmuld %f18,%f4,%f18 ! (4_1) dtmp0 *= x20;
+
+ ld [%fp+tmp_pz+4],%o4
+ and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff
+ cmp %l6,%o5
+ bge,a 1f
+ fstod %f2,%f2 ! (5_1) x0 = (double)fx0;
+
+ ldd [cmul_arr+96],%f0 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f2,%f2 ! fx0 = fabsf(fx0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0;
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= dsign;
+1:
+ sll %g5,5,%l6 ! (2_0) ltmp0 = ldiff0 << 5;
+ sra %l3,27,%o5 ! (2_0) signx0 = ux0 >> 27;
+
+ sra %l4,28,%o4 ! (2_0) signy0 = uy0 >> 28;
+ ba .den3
+ add %l6,cadd_arr,%l6 ! (2_0) ltmp0 += (char*)cadd_arr;
+
+ .align 16
+.update14:
+ cmp counter,5
+ bg,pn %icc,1f
+ nop
+
+ ld [cmul_arr],%f1
+ ba .cont14
+ fzeros %f0
+1:
+ cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000
+ bg,pt %icc,1f
+ nop
+2:
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+ stx %i1,[%fp+tmp_py]
+ sub %i3,stridex,%o5
+ stx %o5,[%fp+tmp_px]
+
+ ld [cmul_arr],%f1
+ or %g0,5,counter
+ ba .cont14
+ fzeros %f0
+1:
+ andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ bne,pn %icc,1f
+ sethi %hi(0x00800000),%o5
+
+ andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ be,pn %icc,2b
+ nop
+1:
+ std %f0,[%fp+tmp_pz]
+ ld [%fp+tmp_pz],%o4
+ fmuld %f40,%f18,%f28 ! (1_0) dtmp0 *= x20;
+
+ faddd %f16,K5,%f2 ! (0_0) dtmp0 += K5;
+
+ and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff
+ cmp %l6,%o5
+ bge,a 1f
+ fstod %f0,%f40 ! (0_0) y0 = (double)fy0;
+
+ ldd [cmul_arr+96],%f40 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f0,%f0 ! fy0 = fabsf(fy0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f0,%f16 ! dtmp0 = (double) *(int*)&fy0;
+
+ fmuld %f16,%f40,%f40 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f16 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f16,%f40,%f40 ! dtmp0 *= dsign;
+1:
+ add %i1,stridey,%i1 ! py += stridey
+ fmuld %f24,%f22,%f24 ! (5_1) dtmp0 *= x20;
+
+ ld [%fp+tmp_pz+4],%o4
+ and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff
+ cmp %l6,%o5
+ bge,a 1f
+ fstod %f1,%f16 ! (5_1) x0 = (double)fx0;
+
+ fabss %f1,%f16 ! fx0 = fabsf(fx0);
+ ldd [cmul_arr+96],%f0 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f16,%f16 ! dtmp0 = (double) *(int*)&fx0;
+
+ fmuld %f16,%f0,%f16 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f16,%f0,%f16 ! dtmp0 *= dsign;
+1:
+ sll %o0,5,%l6 ! (3_0) ltmp0 = ldiff0 << 5;
+ sra %l4,27,%o5 ! (3_0) signx0 = ux0 >> 27;
+
+ ba .den4
+ sra %l3,28,%o4 ! (3_0) signy0 = uy0 >> 28;
+
+ .align 16
+.update15:
+ cmp counter,6
+ bg,pn %icc,1f
+ nop
+
+ ld [cmul_arr],%f2
+ ba .cont15
+ fzero %f0
+1:
+ cmp %o5,_0x7f800000 ! (4_0) b0 ? 0x7f800000
+ bg,pt %icc,1f
+ nop
+2:
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+ stx %i1,[%fp+tmp_py]
+ sub %i3,stridex,%o5
+ stx %o5,[%fp+tmp_px]
+
+ ld [cmul_arr],%f2
+ or %g0,6,counter
+ ba .cont15
+ fzero %f0
+1:
+ andcc %l3,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ bne,pn %icc,1f
+ sethi %hi(0x00800000),%o5
+
+ andcc %l4,_0x7fffffff,%g0 ! itmp0 & 0x7fffffff
+ be,pn %icc,2b
+ nop
+1:
+ st %f0,[%fp+tmp_pz]
+ st %f2,[%fp+tmp_pz+4]
+ ld [%fp+tmp_pz],%o4
+ fmuld %f40,%f16,%f26 ! (2_0) dtmp0 *= x20;
+
+ and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff
+ cmp %l6,%o5
+ bge,a 1f
+ fstod %f0,%f40 ! (0_0) y0 = (double)fy0;
+
+ ldd [cmul_arr+96],%f40 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+ fabss %f0,%f0 ! fy0 = fabsf(fy0);
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f0,%f0 ! dtmp0 = (double) *(int*)&fy0;
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f0,%f40,%f40 ! dtmp0 *= dsign;
+1:
+ add %i1,stridey,%i1 ! py += stridey
+ faddd %f4,K5,%f62 ! (1_0) dtmp0 += K5;
+ fmuld %f24,%f20,%f24 ! (0_0) dtmp0 *= x20;
+
+ ld [%fp+tmp_pz+4],%o4
+ and %o4,_0x7fffffff,%l6 ! itmp0 & 0x7fffffff
+ cmp %l6,%o5
+ bge,a 1f
+ fstod %f2,%f2 ! (5_1) x0 = (double)fx0;
+
+ fabss %f2,%f2 ! fx0 = fabsf(fx0);
+ ldd [cmul_arr+96],%f0 ! LOAD C2ONM149
+ sra %o4,28,%o4 ! itmp0 >>= 28;
+
+ and %o4,-8,%o4 ! itmp0 = -8;
+ fitod %f2,%f2 ! dtmp0 = (double) *(int*)&fx0;
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= C2ONM149;
+ ldd [cmul_arr+%o4],%f0 ! dsign = *(double*)((char*)cmul_arr + itmp0);
+
+ fmuld %f2,%f0,%f2 ! dtmp0 *= dsign;
+1:
+ sll %l5,5,%l6 ! (4_0) ltmp0 = ldiff0 << 5;
+ sra %l3,27,%o5 ! (4_0) signx0 = ux0 >> 27;
+
+ ba .den5
+ sra %l4,28,%o4 ! (4_0) signy0 = uy0 >> 28;
+
+ .align 16
+.u0:
+ ba .c0
+ or %g0,_0x7fffffff,%o5
+.u1:
+ ba .c1
+ or %g0,_0x7fffffff,%o5
+.u2:
+ ba .c2
+ or %g0,_0x7f800000,%o5
+.u3:
+ ba .c3
+ or %g0,_0x7f800000,%o5
+.u4:
+ ba .c4
+ or %g0,_0x7fffffff,%o5
+.u5:
+ ba .c5
+ or %g0,_0x7fffffff,%o5
+.u6:
+ ba .c6
+ or %g0,_0x7f800000,%o5
+.u7:
+ ba .c7
+ or %g0,_0x7f800000,%o5
+.u8:
+ ba .c8
+ or %g0,_0x7fffffff,%o5
+.u9:
+ ba .c9
+ or %g0,_0x7fffffff,%o5
+.u10:
+ ba .c10
+ or %g0,_0x7f800000,%o5
+.u11:
+ ba .c11
+ or %g0,_0x7f800000,%o5
+.u12:
+ ba .c12
+ or %g0,_0x7fffffff,%o5
+.u13:
+ ba .c13
+ or %g0,_0x7fffffff,%o5
+.u14:
+ ba .c14
+ or %g0,_0x7f800000,%o5
+.u15:
+ ba .c15
+ or %g0,_0x7f800000,%o5
+.u16:
+ ba .c16
+ or %g0,_0x7fffffff,%o5
+.u17:
+ ba .c17
+ or %g0,_0x7fffffff,%o5
+.u18:
+ ba .c18
+ or %g0,_0x7f800000,%o5
+.u19:
+ ba .c19
+ or %g0,_0x7f800000,%o5
+.u20:
+ ba .c20
+ or %g0,_0x7fffffff,%o5
+.u21:
+ ba .c21
+ or %g0,_0x7fffffff,%o5
+.u22:
+ ba .c22
+ or %g0,_0x7f800000,%o5
+.u23:
+ ba .c23
+ or %g0,_0x7f800000,%o5
+.u24:
+ ba .c24
+ or %g0,_0x7fffffff,%o5
+.u25:
+ ba .c25
+ or %g0,_0x7fffffff,%o5
+.u26:
+ ba .c26
+ or %g0,_0x7f800000,%o5
+.u27:
+ ba .c27
+ or %g0,_0x7f800000,%o5
+.u28:
+ ba .c28
+ or %g0,_0x7fffffff,%o5
+.u29:
+ ba .c29
+ or %g0,_0x7fffffff,%o5
+.u30:
+ ba .c30
+ or %g0,_0x7f800000,%o5
+.u31:
+ ba .c31
+ or %g0,_0x7f800000,%o5
+.u32:
+ ba .c32
+ or %g0,_0x7fffffff,%o5
+.u33:
+ ba .c33
+ or %g0,_0x7fffffff,%o5
+.u34:
+ ba .c34
+ or %g0,_0x7f800000,%o5
+.u35:
+ ba .c35
+ or %g0,_0x7f800000,%o5
+.u36:
+ ba .c36
+ or %g0,_0x7fffffff,%o5
+.u37:
+ ba .c37
+ or %g0,_0x7fffffff,%o5
+.u38:
+ ba .c38
+ or %g0,_0x7f800000,%o5
+.u39:
+ ba .c39
+ or %g0,_0x7f800000,%o5
+.up0:
+ ba .co0
+ or %g0,_0x7fffffff,%o5
+.up1:
+ ba .co1
+ or %g0,_0x7fffffff,%o5
+.up2:
+ ba .co2
+ or %g0,_0x7f800000,%o5
+.up3:
+ ba .co3
+ or %g0,_0x7f800000,%o5
+.up4:
+ ba .co4
+ or %g0,_0x7fffffff,%o5
+.up5:
+ ba .co5
+ or %g0,_0x7fffffff,%o5
+.up6:
+ ba .co6
+ or %g0,_0x7f800000,%o5
+.up7:
+ ba .co7
+ or %g0,_0x7f800000,%o5
+.up8:
+ ba .co8
+ or %g0,_0x7fffffff,%o5
+.up9:
+ ba .co9
+ or %g0,_0x7fffffff,%o5
+.up10:
+ ba .co10
+ or %g0,_0x7f800000,%o5
+.up11:
+ ba .co11
+ or %g0,_0x7f800000,%o5
+.up12:
+ ba .co12
+ or %g0,_0x7fffffff,%o5
+.up13:
+ ba .co13
+ or %g0,_0x7fffffff,%o5
+.up14:
+ ba .co14
+ or %g0,_0x7f800000,%o5
+.up15:
+ ba .co15
+ or %g0,_0x7f800000,%o5
+.up16:
+ ba .co16
+ or %g0,_0x7fffffff,%o5
+.up17:
+ ba .co17
+ or %g0,_0x7fffffff,%o5
+.up18:
+ ba .co18
+ or %g0,_0x7f800000,%o5
+.up19:
+ ba .co19
+ or %g0,_0x7f800000,%o5
+.up20:
+ ba .co20
+ or %g0,_0x7fffffff,%o5
+.up21:
+ ba .co21
+ or %g0,_0x7fffffff,%o5
+.up22:
+ ba .co22
+ or %g0,_0x7f800000,%o5
+.up23:
+ ba .co23
+ or %g0,_0x7f800000,%o5
+.exit:
+ ret
+ restore
+ SET_SIZE(__vatan2f)
+
diff --git a/usr/src/libm/src/mvec/vis/__vatanf.S b/usr/src/libm/src/mvec/vis/__vatanf.S
new file mode 100644
index 0000000..b7191de
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vatanf.S
@@ -0,0 +1,1891 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vatanf.S 1.7 06/01/23 SMI"
+
+ .file "__vatanf.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 64
+
+.CONST_TBL:
+ .word 0x3fefffff, 0xfffccbbc ! K0 = 9.99999999976686608841e-01
+ .word 0xbfd55554, 0x51c6b90f ! K1 = -3.33333091601972730504e-01
+ .word 0x3fc98d6d, 0x926596cc ! K2 = 1.99628540499523379702e-01
+ .word 0x00020000, 0x00000000 ! DC1
+ .word 0xfffc0000, 0x00000000 ! DC2
+ .word 0x7ff00000, 0x00000000 ! DC3
+ .word 0x3ff00000, 0x00000000 ! DONE = 1.0
+ .word 0x40000000, 0x00000000 ! DTWO = 2.0
+
+! parr0 = *(int*)&(1.0 / *(double*)&(((long long)i << 45) | 0x3ff0100000000000ULL)) + 0x3ff00000, i = [0, 127]
+
+ .word 0x7fdfe01f, 0x7fdfa11c, 0x7fdf6310, 0x7fdf25f6
+ .word 0x7fdee9c7, 0x7fdeae80, 0x7fde741a, 0x7fde3a91
+ .word 0x7fde01e0, 0x7fddca01, 0x7fdd92f2, 0x7fdd5cac
+ .word 0x7fdd272c, 0x7fdcf26e, 0x7fdcbe6d, 0x7fdc8b26
+ .word 0x7fdc5894, 0x7fdc26b5, 0x7fdbf583, 0x7fdbc4fd
+ .word 0x7fdb951e, 0x7fdb65e2, 0x7fdb3748, 0x7fdb094b
+ .word 0x7fdadbe8, 0x7fdaaf1d, 0x7fda82e6, 0x7fda5741
+ .word 0x7fda2c2a, 0x7fda01a0, 0x7fd9d79f, 0x7fd9ae24
+ .word 0x7fd9852f, 0x7fd95cbb, 0x7fd934c6, 0x7fd90d4f
+ .word 0x7fd8e652, 0x7fd8bfce, 0x7fd899c0, 0x7fd87427
+ .word 0x7fd84f00, 0x7fd82a4a, 0x7fd80601, 0x7fd7e225
+ .word 0x7fd7beb3, 0x7fd79baa, 0x7fd77908, 0x7fd756ca
+ .word 0x7fd734f0, 0x7fd71378, 0x7fd6f260, 0x7fd6d1a6
+ .word 0x7fd6b149, 0x7fd69147, 0x7fd6719f, 0x7fd6524f
+ .word 0x7fd63356, 0x7fd614b3, 0x7fd5f664, 0x7fd5d867
+ .word 0x7fd5babc, 0x7fd59d61, 0x7fd58056, 0x7fd56397
+ .word 0x7fd54725, 0x7fd52aff, 0x7fd50f22, 0x7fd4f38f
+ .word 0x7fd4d843, 0x7fd4bd3e, 0x7fd4a27f, 0x7fd48805
+ .word 0x7fd46dce, 0x7fd453d9, 0x7fd43a27, 0x7fd420b5
+ .word 0x7fd40782, 0x7fd3ee8f, 0x7fd3d5d9, 0x7fd3bd60
+ .word 0x7fd3a524, 0x7fd38d22, 0x7fd3755b, 0x7fd35dce
+ .word 0x7fd34679, 0x7fd32f5c, 0x7fd31877, 0x7fd301c8
+ .word 0x7fd2eb4e, 0x7fd2d50a, 0x7fd2bef9, 0x7fd2a91c
+ .word 0x7fd29372, 0x7fd27dfa, 0x7fd268b3, 0x7fd2539d
+ .word 0x7fd23eb7, 0x7fd22a01, 0x7fd21579, 0x7fd20120
+ .word 0x7fd1ecf4, 0x7fd1d8f5, 0x7fd1c522, 0x7fd1b17c
+ .word 0x7fd19e01, 0x7fd18ab0, 0x7fd1778a, 0x7fd1648d
+ .word 0x7fd151b9, 0x7fd13f0e, 0x7fd12c8b, 0x7fd11a30
+ .word 0x7fd107fb, 0x7fd0f5ed, 0x7fd0e406, 0x7fd0d244
+ .word 0x7fd0c0a7, 0x7fd0af2f, 0x7fd09ddb, 0x7fd08cab
+ .word 0x7fd07b9f, 0x7fd06ab5, 0x7fd059ee, 0x7fd04949
+ .word 0x7fd038c6, 0x7fd02864, 0x7fd01824, 0x7fd00804
+
+ .word 0x3ff00000, 0x00000000 ! 1.0
+ .word 0xbff00000, 0x00000000 ! -1.0
+
+! parr1[i] = atan((double)*(float*)&((i + 460) << 21)), i = [0, 155]
+
+ .word 0x3f2fffff, 0xf555555c, 0x3f33ffff, 0xf595555f
+ .word 0x3f37ffff, 0xee000018, 0x3f3bffff, 0xe36aaadf
+ .word 0x3f3fffff, 0xd55555bc, 0x3f43ffff, 0xd65555f2
+ .word 0x3f47ffff, 0xb8000185, 0x3f4bffff, 0x8daaadf3
+ .word 0x3f4fffff, 0x55555bbc, 0x3f53ffff, 0x59555f19
+ .word 0x3f57fffe, 0xe000184d, 0x3f5bfffe, 0x36aadf30
+ .word 0x3f5ffffd, 0x5555bbbc, 0x3f63fffd, 0x6555f195
+ .word 0x3f67fffb, 0x800184cc, 0x3f6bfff8, 0xdaadf302
+ .word 0x3f6ffff5, 0x555bbbb7, 0x3f73fff5, 0x955f194a
+ .word 0x3f77ffee, 0x00184ca6, 0x3f7bffe3, 0x6adf2fd1
+ .word 0x3f7fffd5, 0x55bbba97, 0x3f83ffd6, 0x55f1929c
+ .word 0x3f87ffb8, 0x0184c30a, 0x3f8bff8d, 0xadf2e78c
+ .word 0x3f8fff55, 0x5bbb729b, 0x3f93ff59, 0x5f18a700
+ .word 0x3f97fee0, 0x184a5c36, 0x3f9bfe36, 0xdf291712
+ .word 0x3f9ffd55, 0xbba97625, 0x3fa3fd65, 0xf169c9d9
+ .word 0x3fa7fb81, 0x8430da2a, 0x3fabf8dd, 0xf139c444
+ .word 0x3faff55b, 0xb72cfdea, 0x3fb3f59f, 0x0e7c559d
+ .word 0x3fb7ee18, 0x2602f10f, 0x3fbbe39e, 0xbe6f07c4
+ .word 0x3fbfd5ba, 0x9aac2f6e, 0x3fc3d6ee, 0xe8c6626c
+ .word 0x3fc7b97b, 0x4bce5b02, 0x3fcb90d7, 0x529260a2
+ .word 0x3fcf5b75, 0xf92c80dd, 0x3fd36277, 0x3707ebcc
+ .word 0x3fd6f619, 0x41e4def1, 0x3fda64ee, 0xc3cc23fd
+ .word 0x3fddac67, 0x0561bb4f, 0x3fe1e00b, 0xabdefeb4
+ .word 0x3fe4978f, 0xa3269ee1, 0x3fe700a7, 0xc5784634
+ .word 0x3fe921fb, 0x54442d18, 0x3fecac7c, 0x57846f9e
+ .word 0x3fef730b, 0xd281f69b, 0x3ff0d38f, 0x2c5ba09f
+ .word 0x3ff1b6e1, 0x92ebbe44, 0x3ff30b6d, 0x796a4da8
+ .word 0x3ff3fc17, 0x6b7a8560, 0x3ff4ae10, 0xfc6589a5
+ .word 0x3ff5368c, 0x951e9cfd, 0x3ff5f973, 0x15254857
+ .word 0x3ff67d88, 0x63bc99bd, 0x3ff6dcc5, 0x7bb565fd
+ .word 0x3ff7249f, 0xaa996a21, 0x3ff789bd, 0x2c160054
+ .word 0x3ff7cd6f, 0x6dc59db4, 0x3ff7fde8, 0x0870c2a0
+ .word 0x3ff82250, 0x768ac529, 0x3ff8555a, 0x2787981f
+ .word 0x3ff87769, 0xeb8e956b, 0x3ff88fc2, 0x18ace9dc
+ .word 0x3ff8a205, 0xfd558740, 0x3ff8bb9a, 0x63718f45
+ .word 0x3ff8cca9, 0x27cf0b3d, 0x3ff8d8d8, 0xbf65316f
+ .word 0x3ff8e1fc, 0xa98cb633, 0x3ff8eec8, 0xcfd00665
+ .word 0x3ff8f751, 0x0eba96e6, 0x3ff8fd69, 0x4acf36b0
+ .word 0x3ff901fb, 0x7eee715e, 0x3ff90861, 0xd082d9b5
+ .word 0x3ff90ca6, 0x0b9322c5, 0x3ff90fb2, 0x37a7ea27
+ .word 0x3ff911fb, 0x59997f3a, 0x3ff9152e, 0x8a326c38
+ .word 0x3ff91750, 0xab2e0d12, 0x3ff918d6, 0xc2f9c9e2
+ .word 0x3ff919fb, 0x54eed7a9, 0x3ff91b94, 0xee352849
+ .word 0x3ff91ca5, 0xff216922, 0x3ff91d69, 0x0b3f72ff
+ .word 0x3ff91dfb, 0x5459826d, 0x3ff91ec8, 0x211be619
+ .word 0x3ff91f50, 0xa99fd49a, 0x3ff91fb2, 0x2fb5defa
+ .word 0x3ff91ffb, 0x5446d7c3, 0x3ff92061, 0xbaabf105
+ .word 0x3ff920a5, 0xfeefa208, 0x3ff920d6, 0xc1fb87e7
+ .word 0x3ff920fb, 0x5444826e, 0x3ff9212e, 0x87778bfc
+ .word 0x3ff92150, 0xa9999bb6, 0x3ff92169, 0x0b1faabb
+ .word 0x3ff9217b, 0x544437c3, 0x3ff92194, 0xedddcc28
+ .word 0x3ff921a5, 0xfeeedaec, 0x3ff921b2, 0x2fb1e5f1
+ .word 0x3ff921bb, 0x54442e6e, 0x3ff921c8, 0x2110fa94
+ .word 0x3ff921d0, 0xa99982d3, 0x3ff921d6, 0xc1fb08c6
+ .word 0x3ff921db, 0x54442d43, 0x3ff921e1, 0xbaaa9395
+ .word 0x3ff921e5, 0xfeeed7d0, 0x3ff921e9, 0x0b1f9ad7
+ .word 0x3ff921eb, 0x54442d1e, 0x3ff921ee, 0x8777604e
+ .word 0x3ff921f0, 0xa999826f, 0x3ff921f2, 0x2fb1e3f5
+ .word 0x3ff921f3, 0x54442d19, 0x3ff921f4, 0xedddc6b2
+ .word 0x3ff921f5, 0xfeeed7c3, 0x3ff921f6, 0xc1fb0886
+ .word 0x3ff921f7, 0x54442d18, 0x3ff921f8, 0x2110f9e5
+ .word 0x3ff921f8, 0xa999826e, 0x3ff921f9, 0x0b1f9acf
+ .word 0x3ff921f9, 0x54442d18, 0x3ff921f9, 0xbaaa937f
+ .word 0x3ff921f9, 0xfeeed7c3, 0x3ff921fa, 0x2fb1e3f4
+ .word 0x3ff921fa, 0x54442d18, 0x3ff921fa, 0x8777604b
+ .word 0x3ff921fa, 0xa999826e, 0x3ff921fa, 0xc1fb0886
+ .word 0x3ff921fa, 0xd4442d18, 0x3ff921fa, 0xedddc6b2
+ .word 0x3ff921fa, 0xfeeed7c3, 0x3ff921fb, 0x0b1f9acf
+ .word 0x3ff921fb, 0x14442d18, 0x3ff921fb, 0x2110f9e5
+ .word 0x3ff921fb, 0x2999826e, 0x3ff921fb, 0x2fb1e3f4
+ .word 0x3ff921fb, 0x34442d18, 0x3ff921fb, 0x3aaa937f
+ .word 0x3ff921fb, 0x3eeed7c3, 0x3ff921fb, 0x41fb0886
+ .word 0x3ff921fb, 0x44442d18, 0x3ff921fb, 0x4777604b
+ .word 0x3ff921fb, 0x4999826e, 0x3ff921fb, 0x4b1f9acf
+ .word 0x3ff921fb, 0x4c442d18, 0x3ff921fb, 0x4dddc6b2
+ .word 0x3ff921fb, 0x4eeed7c3, 0x3ff921fb, 0x4fb1e3f4
+ .word 0x3ff921fb, 0x50442d18, 0x3ff921fb, 0x5110f9e5
+ .word 0x3ff921fb, 0x5199826e, 0x3ff921fb, 0x51fb0886
+
+#define DC2 %f2
+#define DTWO %f6
+#define DONE %f52
+#define K0 %f54
+#define K1 %f56
+#define K2 %f58
+#define DC1 %f60
+#define DC3 %f62
+
+#define stridex %o2
+#define stridey %o3
+#define MASK_0x7fffffff %i1
+#define MASK_0x100000 %i5
+
+#define tmp_px STACK_BIAS-32
+#define tmp_counter STACK_BIAS-24
+#define tmp0 STACK_BIAS-16
+#define tmp1 STACK_BIAS-8
+
+#define counter %l1
+
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x20
+
+!--------------------------------------------------------------------
+! !!!!! vatanf algorithm !!!!!
+! ux = ((int*)px)[0];
+! ax = ux & 0x7fffffff;
+!
+! if ( ax < 0x39b89c55 )
+! {
+! *(int*)py = ux;
+! goto next;
+! }
+!
+! if ( ax > 0x4c700518 )
+! {
+! if ( ax > 0x7f800000 )
+! {
+! float fpx = fabsf(*px);
+! fpx *= fpx;
+! *py = fpx;
+! goto next;
+! }
+!
+! sign = ux & 0x80000000;
+! sign |= pi_2;
+! *(int*)py = sign;
+! goto next;
+! }
+!
+! ftmp0 = *px;
+! x = (double)ftmp0;
+! px += stridex;
+! y = vis_fpadd32(x,DC1);
+! y = vis_fand(y,DC2);
+! div = x * y;
+! xx = x - y;
+! div += DONE;
+! i = ((unsigned long long*)&div)[0];
+! y0 = vis_fand(div,DC3);
+! i >>= 43;
+! i &= 508;
+! *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
+! y0 = vis_fpsub32(dtmp0, y0);
+! dtmp0 = div0 * y0;
+! dtmp0 = DTWO - dtmp0;
+! y0 *= dtmp0;
+! dtmp1 = div0 * y0;
+! dtmp1 = DTWO - dtmp1;
+! y0 *= dtmp1;
+! ax = ux & 0x7fffffff;
+! ax += 0x00100000;
+! ax >>= 18;
+! ax &= -8;
+! res = *(double*)((char*)parr1 + ax);
+! ux >>= 28;
+! ux &= -8;
+! dtmp0 = *(double*)((char*)sign_arr + ux);
+! res *= dtmp0;
+! xx *= y0;
+! x2 = xx * xx;
+! dtmp0 = K2 * x2;
+! dtmp0 += K1;
+! dtmp0 *= x2;
+! dtmp0 += K0;
+! dtmp0 *= xx;
+! res += dtmp0;
+! ftmp0 = (float)res;
+! py[0] = ftmp0;
+! py += stridey;
+!--------------------------------------------------------------------
+
+ ENTRY(__vatanf)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,.CONST_TBL,l2)
+
+ st %i0,[%fp+tmp_counter]
+
+ sllx %i2,2,stridex
+ sllx %i4,2,stridey
+
+ or %g0,%i3,%o1
+ stx %i1,[%fp+tmp_px]
+
+ ldd [%l2],K0
+ ldd [%l2+8],K1
+ ldd [%l2+16],K2
+ ldd [%l2+24],DC1
+ ldd [%l2+32],DC2
+ ldd [%l2+40],DC3
+ ldd [%l2+48],DONE
+ ldd [%l2+56],DTWO
+
+ add %l2,64,%i4
+ add %l2,64+512,%l0
+ add %l2,64+512+16-0x1cc*8,%l7
+
+ sethi %hi(0x100000),MASK_0x100000
+ sethi %hi(0x7ffffc00),MASK_0x7fffffff
+ add MASK_0x7fffffff,1023,MASK_0x7fffffff
+
+ sethi %hi(0x39b89c00),%o4
+ add %o4,0x55,%o4
+ sethi %hi(0x4c700400),%o5
+ add %o5,0x118,%o5
+
+.begin:
+ ld [%fp+tmp_counter],counter
+ ldx [%fp+tmp_px],%i3
+ st %g0,[%fp+tmp_counter]
+.begin1:
+ cmp counter,0
+ ble,pn %icc,.exit
+ nop
+
+ lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0];
+
+ and %l6,MASK_0x7fffffff,%l5 ! (0_0) ax = ux & 0x7fffffff;
+ lda [%i3]0x82,%f0 ! (0_0) ftmp0 = *px;
+
+ cmp %l5,%o4 ! (0_0) ax ? 0x39b89c55
+ bl,pn %icc,.spec0 ! (0_0) if ( ax < 0x39b89c55 )
+ nop
+
+ cmp %l5,%o5 ! (0_0) ax ? 0x4c700518
+ bg,pn %icc,.spec1 ! (0_0) if ( ax > 0x4c700518 )
+ nop
+
+ add %i3,stridex,%l5 ! px += stridex;
+ fstod %f0,%f22 ! (0_0) ftmp0 = *px;
+ mov %l6,%i3
+
+ lda [%l5]0x82,%l6 ! (1_0) ux = ((int*)px)[0];
+
+ and %l6,MASK_0x7fffffff,%o7 ! (1_0) ax = ux & 0x7fffffff;
+ lda [%l5]0x82,%f0 ! (1_0) ftmp0 = *px;
+ add %l5,stridex,%l4 ! px += stridex;
+ fpadd32 %f22,DC1,%f24 ! (0_0) y = vis_fpadd32(x,dconst1);
+
+ cmp %o7,%o4 ! (1_0) ax ? 0x39b89c55
+ bl,pn %icc,.update0 ! (1_0) if ( ax < 0x39b89c55 )
+ nop
+.cont0:
+ cmp %o7,%o5 ! (1_0) ax ? 0x4c700518
+ bg,pn %icc,.update1 ! (1_0) if ( ax > 0x4c700518 )
+ nop
+.cont1:
+ fstod %f0,%f20 ! (1_0) x = (double)ftmp0;
+ mov %l6,%l5
+
+ fand %f24,DC2,%f26 ! (0_0) y = vis_fand(y,dconst2);
+
+ fmuld %f22,%f26,%f32 ! (0_0) div = x * y;
+
+ lda [%l4]0x82,%l6 ! (2_0) ux = ((int*)px)[0];
+ fsubd %f22,%f26,%f22 ! (0_0) xx = x - y;
+
+ and %l6,MASK_0x7fffffff,%o7 ! (2_0) ax = ux & 0x7fffffff;
+ lda [%l4]0x82,%f0 ! (2_0) ftmp0 = *px;
+ add %l4,stridex,%l3 ! px += stridex;
+ fpadd32 %f20,DC1,%f24 ! (1_0) y = vis_fpadd32(x,dconst1);
+
+ cmp %o7,%o4 ! (2_0) ax ? 0x39b89c55
+ bl,pn %icc,.update2 ! (2_0) if ( ax < 0x39b89c55 )
+ faddd DONE,%f32,%f32 ! (0_0) div += done;
+.cont2:
+ cmp %o7,%o5 ! (2_0) ax ? 0x4c700518
+ bg,pn %icc,.update3 ! (2_0) if ( ax > 0x4c700518 )
+ nop
+.cont3:
+ std %f32,[%fp+tmp0] ! (0_0) i = ((unsigned long long*)&div)[0];
+ mov %l6,%l4
+ fstod %f0,%f18 ! (2_0) x = (double)ftmp0;
+
+ fand %f24,DC2,%f26 ! (1_0) y = vis_fand(y,dconst2);
+
+ fmuld %f20,%f26,%f30 ! (1_0) div = x * y;
+
+ lda [%l3]0x82,%l6 ! (3_0) ux = ((int*)px)[0];
+ fsubd %f20,%f26,%f20 ! (1_0) xx = x - y;
+
+ and %l6,MASK_0x7fffffff,%o7 ! (3_0) ax = ux & 0x7fffffff;
+ lda [%l3]0x82,%f0 ! (3_0) ftmp0 = *px;
+ add %l3,stridex,%i0 ! px += stridex;
+ fpadd32 %f18,DC1,%f24 ! (2_0) y = vis_fpadd32(x,dconst1);
+
+ cmp %o7,%o4 ! (3_0) ax ? 0x39b89c55
+ bl,pn %icc,.update4 ! (3_0) if ( ax < 0x39b89c55 )
+ faddd DONE,%f30,%f30 ! (1_0) div += done;
+.cont4:
+ cmp %o7,%o5 ! (3_0) ax ? 0x4c700518
+ bg,pn %icc,.update5 ! (3_0) if ( ax > 0x4c700518 )
+ nop
+.cont5:
+ std %f30,[%fp+tmp1] ! (1_0) i = ((unsigned long long*)&div)[0];
+ mov %l6,%l3
+ fstod %f0,%f16 ! (3_0) x = (double)ftmp0;
+
+ ldx [%fp+tmp0],%o0 ! (0_0) i = ((unsigned long long*)&div)[0];
+ fand %f24,DC2,%f26 ! (2_0) y = vis_fand(y,dconst2);
+
+ fand %f32,DC3,%f24 ! (0_0) y0 = vis_fand(div,dconst3);
+
+ srlx %o0,43,%o0 ! (0_0) i >>= 43;
+
+ and %o0,508,%l6 ! (0_0) i &= 508;
+
+ ld [%i4+%l6],%f0 ! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
+
+ fmuld %f18,%f26,%f28 ! (2_0) div = x * y;
+
+ lda [%i0]0x82,%l6 ! (4_0) ux = ((int*)px)[0];
+ fsubd %f18,%f26,%f18 ! (2_0) xx = x - y;
+
+ fpsub32 %f0,%f24,%f40 ! (0_0) y0 = vis_fpsub32(dtmp0, y0);
+
+ and %l6,MASK_0x7fffffff,%o7 ! (4_0) ax = ux & 0x7fffffff;
+ lda [%i0]0x82,%f0 ! (4_0) ftmp0 = *px;
+ add %i0,stridex,%i2 ! px += stridex;
+ fpadd32 %f16,DC1,%f24 ! (3_0) y = vis_fpadd32(x,dconst1);
+
+ cmp %o7,%o4 ! (4_0) ax ? 0x39b89c55
+ bl,pn %icc,.update6 ! (4_0) if ( ax < 0x39b89c55 )
+ faddd DONE,%f28,%f28 ! (2_0) div += done;
+.cont6:
+ fmuld %f32,%f40,%f42 ! (0_0) dtmp0 = div0 * y0;
+ cmp %o7,%o5 ! (4_0) ax ? 0x4c700518
+ bg,pn %icc,.update7 ! (4_0) if ( ax > 0x4c700518 )
+ nop
+.cont7:
+ std %f28,[%fp+tmp0] ! (2_0) i = ((unsigned long long*)&div)[0];
+ mov %l6,%i0
+ fstod %f0,%f14 ! (4_0) x = (double)ftmp0;
+
+ ldx [%fp+tmp1],%g1 ! (1_0) i = ((unsigned long long*)&div)[0];
+ fand %f24,DC2,%f26 ! (3_0) y = vis_fand(y,dconst2);
+
+ fand %f30,DC3,%f24 ! (1_0) y0 = vis_fand(div,dconst3);
+
+ fsubd DTWO,%f42,%f44 ! (0_0) dtmp0 = dtwo - dtmp0;
+ srlx %g1,43,%g1 ! (1_0) i >>= 43;
+
+ and %g1,508,%l6 ! (1_0) i &= 508;
+
+ ld [%i4+%l6],%f0 ! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
+
+ fmuld %f16,%f26,%f34 ! (3_0) div = x * y;
+
+ lda [%i2]0x82,%l6 ! (5_0) ux = ((int*)px)[0];
+ fsubd %f16,%f26,%f16 ! (3_0) xx = x - y;
+
+ fpsub32 %f0,%f24,%f38 ! (1_0) y0 = vis_fpsub32(dtmp0, y0);
+ add %i2,stridex,%l2 ! px += stridex;
+
+ fmuld %f40,%f44,%f40 ! (0_0) y0 *= dtmp0;
+ and %l6,MASK_0x7fffffff,%o7 ! (5_0) ax = ux & 0x7fffffff;
+ lda [%i2]0x82,%f0 ! (5_0) ftmp0 = *px;
+ fpadd32 %f14,DC1,%f24 ! (4_0) y = vis_fpadd32(x,dconst1);
+
+ cmp %o7,%o4 ! (5_0) ax ? 0x39b89c55
+ bl,pn %icc,.update8 ! (5_0) if ( ax < 0x39b89c55 )
+ faddd DONE,%f34,%f34 ! (3_0) div += done;
+.cont8:
+ fmuld %f30,%f38,%f42 ! (1_0) dtmp0 = div0 * y0;
+ cmp %o7,%o5 ! (5_0) ax ? 0x4c700518
+ bg,pn %icc,.update9 ! (5_0) if ( ax > 0x4c700518 )
+ nop
+.cont9:
+ std %f34,[%fp+tmp1] ! (3_0) i = ((unsigned long long*)&div)[0];
+ mov %l6,%i2
+ fstod %f0,%f36 ! (5_0) x = (double)ftmp0;
+
+ fmuld %f32,%f40,%f32 ! (0_0) dtmp1 = div0 * y0;
+ ldx [%fp+tmp0],%o0 ! (2_0) i = ((unsigned long long*)&div)[0];
+ fand %f24,DC2,%f26 ! (4_0) y = vis_fand(y,dconst2);
+
+ fand %f28,DC3,%f24 ! (2_0) y0 = vis_fand(div,dconst3);
+
+ fsubd DTWO,%f42,%f44 ! (1_0) dtmp0 = dtwo - dtmp0;
+ srlx %o0,43,%o0 ! (2_0) i >>= 43;
+
+ and %o0,508,%l6 ! (2_0) i &= 508;
+ fsubd DTWO,%f32,%f46 ! (0_0) dtmp1 = dtwo - dtmp1;
+
+ ld [%i4+%l6],%f0 ! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
+
+ fmuld %f14,%f26,%f32 ! (4_0) div = x * y;
+
+ lda [%l2]0x82,%l6 ! (6_0) ux = ((int*)px)[0];
+ fsubd %f14,%f26,%f14 ! (4_0) xx = x - y;
+
+ fmuld %f40,%f46,%f26 ! (0_0) y0 *= dtmp1;
+ add %l2,stridex,%g5 ! px += stridex;
+ fpsub32 %f0,%f24,%f40 ! (2_0) y0 = vis_fpsub32(dtmp0, y0);
+
+ fmuld %f38,%f44,%f38 ! (1_0) y0 *= dtmp0;
+ and %l6,MASK_0x7fffffff,%o7 ! (6_0) ax = ux & 0x7fffffff;
+ lda [%l2]0x82,%f0 ! (6_0) ftmp0 = *px;
+ fpadd32 %f36,DC1,%f24 ! (5_0) y = vis_fpadd32(x,dconst1);
+
+ cmp %o7,%o4 ! (6_0) ax ? 0x39b89c55
+ bl,pn %icc,.update10 ! (6_0) if ( ax < 0x39b89c55 )
+ faddd DONE,%f32,%f32 ! (4_0) div += done;
+.cont10:
+ fmuld %f28,%f40,%f42 ! (2_0) dtmp0 = div0 * y0;
+ cmp %o7,%o5 ! (6_0) ax ? 0x4c700518
+ bg,pn %icc,.update11 ! (6_0) if ( ax > 0x4c700518 )
+ nop
+.cont11:
+ fmuld %f22,%f26,%f22 ! (0_0) xx *= y0;
+ mov %l6,%l2
+ std %f32,[%fp+tmp0] ! (4_0) i = ((unsigned long long*)&div)[0];
+ fstod %f0,%f10 ! (6_0) x = (double)ftmp0;
+
+ fmuld %f30,%f38,%f30 ! (1_0) dtmp1 = div0 * y0;
+ ldx [%fp+tmp1],%g1 ! (3_0) i = ((unsigned long long*)&div)[0];
+ fand %f24,DC2,%f26 ! (5_0) y = vis_fand(y,dconst2);
+
+ fand %f34,DC3,%f24 ! (3_0) y0 = vis_fand(div,dconst3);
+
+ fmuld %f22,%f22,%f50 ! (0_0) x2 = xx * xx;
+ srlx %g1,43,%g1 ! (3_0) i >>= 43;
+ fsubd DTWO,%f42,%f44 ! (2_0) dtmp0 = dtwo - dtmp0;
+
+ and %g1,508,%l6 ! (3_0) i &= 508;
+ mov %i3,%o7
+ fsubd DTWO,%f30,%f46 ! (1_0) dtmp1 = dtwo - dtmp1;
+
+ ld [%i4+%l6],%f0 ! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
+
+ fmuld %f36,%f26,%f30 ! (5_0) div = x * y;
+ srl %o7,28,%g1 ! (0_0) ux >>= 28;
+ add %g5,stridex,%i3 ! px += stridex;
+
+ fmuld K2,%f50,%f4 ! (0_0) dtmp0 = K2 * x2;
+ and %o7,MASK_0x7fffffff,%o0 ! (0_0) ax = ux & 0x7fffffff;
+ lda [%g5]0x82,%l6 ! (7_0) ux = ((int*)px)[0];
+ fsubd %f36,%f26,%f36 ! (5_0) xx = x - y;
+
+ fmuld %f38,%f46,%f26 ! (1_0) y0 *= dtmp1;
+ add %o0,MASK_0x100000,%o0 ! (0_0) ax += 0x00100000;
+ and %g1,-8,%g1 ! (0_0) ux &= -8;
+ fpsub32 %f0,%f24,%f38 ! (3_0) y0 = vis_fpsub32(dtmp0, y0);
+
+ fmuld %f40,%f44,%f40 ! (2_0) y0 *= dtmp0;
+ and %l6,MASK_0x7fffffff,%o7 ! (7_0) ax = ux & 0x7fffffff;
+ lda [%g5]0x82,%f0 ! (7_0) ftmp0 = *px;
+ fpadd32 %f10,DC1,%f24 ! (6_0) y = vis_fpadd32(x,dconst1);
+
+ cmp %o7,%o4 ! (7_0) ax ? 0x39b89c55
+ bl,pn %icc,.update12 ! (7_0) if ( ax < 0x39b89c55 )
+ faddd DONE,%f30,%f30 ! (5_0) div += done;
+.cont12:
+ fmuld %f34,%f38,%f42 ! (3_0) dtmp0 = div0 * y0;
+ cmp %o7,%o5 ! (7_0) ax ? 0x4c700518
+ bg,pn %icc,.update13 ! (7_0) if ( ax > 0x4c700518 )
+ faddd %f4,K1,%f4 ! (0_0) dtmp0 += K1;
+.cont13:
+ fmuld %f20,%f26,%f20 ! (1_0) xx *= y0;
+ srl %o0,18,%o7 ! (0_0) ax >>= 18;
+ std %f30,[%fp+tmp1] ! (5_0) i = ((unsigned long long*)&div)[0];
+ fstod %f0,%f8 ! (7_0) x = (double)ftmp0;
+
+ fmuld %f28,%f40,%f28 ! (2_0) dtmp1 = div0 * y0;
+ and %o7,-8,%o7 ! (0_0) ux &= -8;
+ ldx [%fp+tmp0],%o0 ! (4_0) i = ((unsigned long long*)&div)[0];
+ fand %f24,DC2,%f26 ! (6_0) y = vis_fand(y,dconst2);
+
+ add %o7,%l7,%o7 ! (0_0) (char*)parr1 + ax;
+ mov %l6,%g5
+ ldd [%l0+%g1],%f48 ! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux);
+
+ fmuld %f4,%f50,%f4 ! (0_0) dtmp0 *= x2;
+ srlx %o0,43,%o0 ! (4_0) i >>= 43;
+ ldd [%o7],%f0 ! (0_0) res = *(double*)((char*)parr1 + ax);
+ fand %f32,DC3,%f24 ! (4_0) y0 = vis_fand(div,dconst3);
+
+ fmuld %f20,%f20,%f50 ! (1_0) x2 = xx * xx;
+ and %o0,508,%l6 ! (4_0) i &= 508;
+ mov %l5,%o7
+ fsubd DTWO,%f42,%f44 ! (3_0) dtmp0 = dtwo - dtmp0;
+
+ fsubd DTWO,%f28,%f46 ! (2_0) dtmp1 = dtwo - dtmp1;
+
+ fmuld %f0,%f48,%f48 ! (0_0) res *= dtmp0;
+ srl %o7,28,%l5 ! (1_0) ux >>= 28;
+ ld [%i4+%l6],%f0 ! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
+
+ fmuld %f10,%f26,%f28 ! (6_0) div = x * y;
+ faddd %f4,K0,%f42 ! (0_0) dtmp0 += K0;
+
+ subcc counter,8,counter
+ bneg,pn %icc,.tail
+ or %g0,%o1,%o0
+
+ add %fp,tmp0,%g1
+ lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0];
+
+ ba .main_loop
+ add %i3,stridex,%l5 ! px += stridex;
+
+ .align 16
+.main_loop:
+ fsubd %f10,%f26,%f10 ! (6_1) xx = x - y;
+ and %o7,MASK_0x7fffffff,%o1 ! (1_1) ax = ux & 0x7fffffff;
+ st %f12,[%g1] ! (7_1) py[0] = ftmp0;
+ fmuld K2,%f50,%f4 ! (1_1) dtmp0 = K2 * x2;
+
+ fmuld %f40,%f46,%f26 ! (2_1) y0 *= dtmp1;
+ srl %o7,28,%o7 ! (1_0) ux >>= 28;
+ add %o1,MASK_0x100000,%g1 ! (1_1) ax += 0x00100000;
+ fpsub32 %f0,%f24,%f40 ! (4_1) y0 = vis_fpsub32(dtmp0, y0);
+
+ fmuld %f38,%f44,%f38 ! (3_1) y0 *= dtmp0;
+ and %l6,MASK_0x7fffffff,%o1 ! (0_0) ax = ux & 0x7fffffff;
+ lda [%i3]0x82,%f0 ! (0_0) ftmp0 = *px;
+ fpadd32 %f8,DC1,%f24 ! (7_1) y = vis_fpadd32(x,dconst1);
+
+ fmuld %f42,%f22,%f44 ! (0_1) dtmp0 *= xx;
+ cmp %o1,%o4 ! (0_0) ax ? 0x39b89c55
+ bl,pn %icc,.update14 ! (0_0) if ( ax < 0x39b89c55 )
+ faddd DONE,%f28,%f28 ! (6_1) div += done;
+.cont14:
+ fmuld %f32,%f40,%f42 ! (4_1) dtmp0 = div0 * y0;
+ cmp %o1,%o5 ! (0_0) ax ? 0x4c700518
+ bg,pn %icc,.update15 ! (0_0) if ( ax > 0x4c700518 )
+ faddd %f4,K1,%f4 ! (1_1) dtmp0 += K1;
+.cont15:
+ fmuld %f18,%f26,%f18 ! (2_1) xx *= y0;
+ srl %g1,18,%o1 ! (1_1) ax >>= 18;
+ std %f28,[%fp+tmp0] ! (6_1) i = ((unsigned long long*)&div)[0];
+ fstod %f0,%f22 ! (0_0) ftmp0 = *px;
+
+ fmuld %f34,%f38,%f34 ! (3_1) dtmp1 = div0 * y0;
+ and %o1,-8,%o1 ! (1_1) ax &= -8;
+ ldx [%fp+tmp1],%g1 ! (5_1) i = ((unsigned long long*)&div)[0];
+ fand %f24,DC2,%f26 ! (7_1) y = vis_fand(y,dconst2);
+
+ ldd [%o1+%l7],%f0 ! (1_1) res = *(double*)((char*)parr1 + ax);
+ and %o7,-8,%o7 ! (1_1) ux &= -8;
+ mov %l6,%i3
+ faddd %f48,%f44,%f12 ! (0_1) res += dtmp0;
+
+ fmuld %f4,%f50,%f4 ! (1_1) dtmp0 *= x2;
+ nop
+ ldd [%l0+%o7],%f48 ! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux);
+ fand %f30,DC3,%f24 ! (5_1) y0 = vis_fand(div,dconst3);
+
+ fmuld %f18,%f18,%f50 ! (2_1) x2 = xx * xx;
+ srlx %g1,43,%g1 ! (5_1) i >>= 43;
+ mov %l4,%o7
+ fsubd DTWO,%f42,%f44 ! (4_1) dtmp0 = dtwo - dtmp0;
+
+ and %g1,508,%l6 ! (5_1) i &= 508;
+ nop
+ bn,pn %icc,.exit
+ fsubd DTWO,%f34,%f46 ! (3_1) dtmp1 = dtwo - dtmp1;
+
+ fmuld %f0,%f48,%f48 ! (1_1) res *= dtmp0;
+ add %o0,stridey,%g1 ! py += stridey;
+ ld [%i4+%l6],%f0 ! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
+ fdtos %f12,%f12 ! (0_1) ftmp0 = (float)res;
+
+ fmuld %f8,%f26,%f34 ! (7_1) div = x * y;
+ srl %o7,28,%o1 ! (2_1) ux >>= 28;
+ lda [%l5]0x82,%l6 ! (1_0) ux = ((int*)px)[0];
+ faddd %f4,K0,%f42 ! (1_1) dtmp0 += K0;
+
+ fmuld K2,%f50,%f4 ! (2_1) dtmp0 = K2 * x2;
+ and %o7,MASK_0x7fffffff,%o7 ! (2_1) ax = ux & 0x7fffffff;
+ st %f12,[%o0] ! (0_1) py[0] = ftmp0;
+ fsubd %f8,%f26,%f8 ! (7_1) xx = x - y;
+
+ fmuld %f38,%f46,%f26 ! (3_1) y0 *= dtmp1;
+ add %l5,stridex,%l4 ! px += stridex;
+ add %o7,MASK_0x100000,%o0 ! (2_1) ax += 0x00100000;
+ fpsub32 %f0,%f24,%f38 ! (5_1) y0 = vis_fpsub32(dtmp0, y0);
+
+ fmuld %f40,%f44,%f40 ! (4_1) y0 *= dtmp0;
+ and %l6,MASK_0x7fffffff,%o7 ! (1_0) ax = ux & 0x7fffffff;
+ lda [%l5]0x82,%f0 ! (1_0) ftmp0 = *px;
+ fpadd32 %f22,DC1,%f24 ! (0_0) y = vis_fpadd32(x,dconst1);
+
+ fmuld %f42,%f20,%f44 ! (1_1) dtmp0 *= xx;
+ cmp %o7,%o4 ! (1_0) ax ? 0x39b89c55
+ bl,pn %icc,.update16 ! (1_0) if ( ax < 0x39b89c55 )
+ faddd DONE,%f34,%f34 ! (7_1) div += done;
+.cont16:
+ fmuld %f30,%f38,%f42 ! (5_1) dtmp0 = div0 * y0;
+ cmp %o7,%o5 ! (1_0) ax ? 0x4c700518
+ bg,pn %icc,.update17 ! (1_0) if ( ax > 0x4c700518 )
+ faddd %f4,K1,%f4 ! (2_1) dtmp0 += K1;
+.cont17:
+ fmuld %f16,%f26,%f16 ! (3_1) xx *= y0;
+ srl %o0,18,%o7 ! (2_1) ax >>= 18;
+ std %f34,[%fp+tmp1] ! (7_1) i = ((unsigned long long*)&div)[0];
+ fstod %f0,%f20 ! (1_0) x = (double)ftmp0;
+
+ fmuld %f32,%f40,%f32 ! (4_1) dtmp1 = div0 * y0;
+ ldx [%fp+tmp0],%o0 ! (6_1) i = ((unsigned long long*)&div)[0];
+ and %o1,-8,%o1 ! (2_1) ux &= -8;
+ fand %f24,DC2,%f26 ! (0_0) y = vis_fand(y,dconst2);
+
+ faddd %f48,%f44,%f12 ! (1_1) res += dtmp0;
+ and %o7,-8,%o7 ! (2_1) ax &= -8;
+ ldd [%l0+%o1],%f48 ! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux);
+ bn,pn %icc,.exit
+
+ ldd [%o7+%l7],%f0 ! (2_1) res = *(double*)((char*)parr1 + ax);
+ mov %l6,%l5
+ fmuld %f4,%f50,%f4 ! (2_1) dtmp0 *= x2;
+ fand %f28,DC3,%f24 ! (6_1) y0 = vis_fand(div,dconst3);
+
+ fmuld %f16,%f16,%f50 ! (3_1) x2 = xx * xx;
+ srlx %o0,43,%o0 ! (6_1) i >>= 43;
+ mov %l3,%o7
+ fsubd DTWO,%f42,%f44 ! (5_1) dtmp0 = dtwo - dtmp0;
+
+ and %o0,508,%l6 ! (6_1) i &= 508;
+ add %l4,stridex,%l3 ! px += stridex;
+ bn,pn %icc,.exit
+ fsubd DTWO,%f32,%f46 ! (4_1) dtmp1 = dtwo - dtmp1;
+
+ fmuld %f0,%f48,%f48 ! (2_1) res *= dtmp0;
+ add %g1,stridey,%o0 ! py += stridey;
+ ld [%i4+%l6],%f0 ! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
+ fdtos %f12,%f12 ! (1_1) ftmp0 = (float)res;
+
+ fmuld %f22,%f26,%f32 ! (0_0) div = x * y;
+ srl %o7,28,%o1 ! (3_1) ux >>= 28;
+ lda [%l4]0x82,%l6 ! (2_0) ux = ((int*)px)[0];
+ faddd %f4,K0,%f42 ! (2_1) dtmp0 += K0;
+
+ fmuld K2,%f50,%f4 ! (3_1) dtmp0 = K2 * x2;
+ and %o7,MASK_0x7fffffff,%o7 ! (3_1) ax = ux & 0x7fffffff;
+ st %f12,[%g1] ! (1_1) py[0] = ftmp0;
+ fsubd %f22,%f26,%f22 ! (0_0) xx = x - y;
+
+ fmuld %f40,%f46,%f26 ! (4_1) y0 *= dtmp1;
+ add %o7,MASK_0x100000,%g1 ! (3_1) ax += 0x00100000;
+ and %o1,-8,%o1 ! (3_1) ux &= -8;
+ fpsub32 %f0,%f24,%f40 ! (6_1) y0 = vis_fpsub32(dtmp0, y0);
+
+ fmuld %f38,%f44,%f38 ! (5_1) y0 *= dtmp0;
+ and %l6,MASK_0x7fffffff,%o7 ! (2_0) ax = ux & 0x7fffffff;
+ lda [%l4]0x82,%f0 ! (2_0) ftmp0 = *px;
+ fpadd32 %f20,DC1,%f24 ! (1_0) y = vis_fpadd32(x,dconst1);
+
+ fmuld %f42,%f18,%f44 ! (2_1) dtmp0 *= xx;
+ cmp %o7,%o4 ! (2_0) ax ? 0x39b89c55
+ bl,pn %icc,.update18 ! (2_0) if ( ax < 0x39b89c55 )
+ faddd DONE,%f32,%f32 ! (0_0) div += done;
+.cont18:
+ fmuld %f28,%f40,%f42 ! (6_1) dtmp0 = div0 * y0;
+ cmp %o7,%o5 ! (2_0) ax ? 0x4c700518
+ bg,pn %icc,.update19 ! (2_0) if ( ax > 0x4c700518 )
+ faddd %f4,K1,%f4 ! (3_1) dtmp0 += K1;
+.cont19:
+ fmuld %f14,%f26,%f14 ! (4_1) xx *= y0;
+ srl %g1,18,%o7 ! (3_1) ax >>= 18;
+ std %f32,[%fp+tmp0] ! (0_0) i = ((unsigned long long*)&div)[0];
+ fstod %f0,%f18 ! (2_0) x = (double)ftmp0;
+
+ fmuld %f30,%f38,%f30 ! (5_1) dtmp1 = div0 * y0;
+ and %o7,-8,%o7 ! (3_1) ax &= -8;
+ ldx [%fp+tmp1],%g1 ! (7_1) i = ((unsigned long long*)&div)[0];
+ fand %f24,DC2,%f26 ! (1_0) y = vis_fand(y,dconst2);
+
+ faddd %f48,%f44,%f12 ! (2_1) res += dtmp0;
+ mov %l6,%l4
+ ldd [%l0+%o1],%f48 ! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux);
+ bn,pn %icc,.exit
+
+ fmuld %f4,%f50,%f4 ! (3_1) dtmp0 *= x2;
+ ldd [%o7+%l7],%f0 ! (3_1) res = *(double*)((char*)parr1 + ax)
+ nop
+ fand %f34,DC3,%f24 ! (7_1) y0 = vis_fand(div,dconst3);
+
+ fmuld %f14,%f14,%f50 ! (4_1) x2 = xx * xx;
+ srlx %g1,43,%g1 ! (7_1) i >>= 43;
+ mov %i0,%o7
+ fsubd DTWO,%f42,%f44 ! (6_1) dtmp0 = dtwo - dtmp0;
+
+ and %g1,508,%l6 ! (7_1) i &= 508;
+ add %l3,stridex,%i0 ! px += stridex;
+ bn,pn %icc,.exit
+ fsubd DTWO,%f30,%f46 ! (5_1) dtmp1 = dtwo - dtmp1;
+
+ fmuld %f0,%f48,%f48 ! (3_1) res *= dtmp0;
+ add %o0,stridey,%g1 ! py += stridey;
+ ld [%i4+%l6],%f0 ! (7_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
+ fdtos %f12,%f12 ! (2_1) ftmp0 = (float)res;
+
+ fmuld %f20,%f26,%f30 ! (1_0) div = x * y;
+ srl %o7,28,%o1 ! (4_1) ux >>= 28;
+ lda [%l3]0x82,%l6 ! (3_0) ux = ((int*)px)[0];
+ faddd %f4,K0,%f42 ! (3_1) dtmp0 += K0;
+
+ fmuld K2,%f50,%f4 ! (4_1) dtmp0 = K2 * x2;
+ and %o7,MASK_0x7fffffff,%o7 ! (4_1) ax = ux & 0x7fffffff;
+ st %f12,[%o0] ! (2_1) py[0] = ftmp0;
+ fsubd %f20,%f26,%f20 ! (1_0) xx = x - y;
+
+ fmuld %f38,%f46,%f26 ! (5_1) y0 *= dtmp1;
+ add %o7,MASK_0x100000,%o0 ! (4_1) ax += 0x00100000;
+ and %o1,-8,%o1 ! (4_1) ux &= -8;
+ fpsub32 %f0,%f24,%f38 ! (7_1) y0 = vis_fpsub32(dtmp0, y0);
+
+ fmuld %f40,%f44,%f40 ! (6_1) y0 *= dtmp0;
+ and %l6,MASK_0x7fffffff,%o7 ! (3_0) ax = ux & 0x7fffffff;
+ lda [%l3]0x82,%f0 ! (3_0) ftmp0 = *px;
+ fpadd32 %f18,DC1,%f24 ! (2_0) y = vis_fpadd32(x,dconst1);
+
+ fmuld %f42,%f16,%f44 ! (3_1) dtmp0 *= xx;
+ cmp %o7,%o4 ! (3_0) ax ? 0x39b89c55
+ bl,pn %icc,.update20 ! (3_0) if ( ax < 0x39b89c55 )
+ faddd DONE,%f30,%f30 ! (1_0) div += done;
+.cont20:
+ fmuld %f34,%f38,%f42 ! (7_1) dtmp0 = div0 * y0;
+ cmp %o7,%o5 ! (3_0) ax ? 0x4c700518
+ bg,pn %icc,.update21 ! (3_0) if ( ax > 0x4c700518 )
+ faddd %f4,K1,%f4 ! (4_1) dtmp0 += K1;
+.cont21:
+ fmuld %f36,%f26,%f36 ! (5_1) xx *= y0;
+ srl %o0,18,%o7 ! (4_1) ax >>= 18;
+ std %f30,[%fp+tmp1] ! (1_0) i = ((unsigned long long*)&div)[0];
+ fstod %f0,%f16 ! (3_0) x = (double)ftmp0;
+
+ fmuld %f28,%f40,%f28 ! (6_1) dtmp1 = div0 * y0;
+ and %o7,-8,%o7 ! (4_1) ax &= -8;
+ ldx [%fp+tmp0],%o0 ! (0_0) i = ((unsigned long long*)&div)[0];
+ fand %f24,DC2,%f26 ! (2_0) y = vis_fand(y,dconst2);
+
+ faddd %f48,%f44,%f12 ! (3_1) res += dtmp0;
+ nop
+ ldd [%l0+%o1],%f48 ! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux);
+ bn,pn %icc,.exit
+
+ ldd [%o7+%l7],%f0 ! (4_1) res = *(double*)((char*)parr1 + ax);
+ mov %l6,%l3
+ fmuld %f4,%f50,%f4 ! (4_1) dtmp0 *= x2;
+ fand %f32,DC3,%f24 ! (0_0) y0 = vis_fand(div,dconst3);
+
+ fmuld %f36,%f36,%f50 ! (5_1) x2 = xx * xx;
+ srlx %o0,43,%o0 ! (0_0) i >>= 43;
+ mov %i2,%o7
+ fsubd DTWO,%f42,%f44 ! (7_1) dtmp0 = dtwo - dtmp0;
+
+ and %o0,508,%l6 ! (0_0) i &= 508;
+ add %i0,stridex,%i2 ! px += stridex;
+ bn,pn %icc,.exit
+ fsubd DTWO,%f28,%f46 ! (6_1) dtmp1 = dtwo - dtmp1;
+
+ fmuld %f0,%f48,%f48 ! (4_1) res *= dtmp0;
+ add %g1,stridey,%o0 ! py += stridey;
+ ld [%i4+%l6],%f0 ! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
+ fdtos %f12,%f12 ! (3_1) ftmp0 = (float)res;
+
+ fmuld %f18,%f26,%f28 ! (2_0) div = x * y;
+ srl %o7,28,%o1 ! (5_1) ux >>= 28;
+ lda [%i0]0x82,%l6 ! (4_0) ux = ((int*)px)[0];
+ faddd %f4,K0,%f42 ! (4_1) dtmp0 += K0;
+
+ fmuld K2,%f50,%f4 ! (5_1) dtmp0 = K2 * x2;
+ and %o7,MASK_0x7fffffff,%o7 ! (5_1) ax = ux & 0x7fffffff;
+ st %f12,[%g1] ! (3_1) py[0] = ftmp0;
+ fsubd %f18,%f26,%f18 ! (2_0) xx = x - y;
+
+ fmuld %f40,%f46,%f26 ! (6_1) y0 *= dtmp1;
+ add %o7,MASK_0x100000,%g1 ! (5_1) ax += 0x00100000;
+ and %o1,-8,%o1 ! (5_1) ux &= -8;
+ fpsub32 %f0,%f24,%f40 ! (0_0) y0 = vis_fpsub32(dtmp0, y0);
+
+ fmuld %f38,%f44,%f38 ! (7_1) y0 *= dtmp0;
+ and %l6,MASK_0x7fffffff,%o7 ! (4_0) ax = ux & 0x7fffffff;
+ lda [%i0]0x82,%f0 ! (4_0) ftmp0 = *px;
+ fpadd32 %f16,DC1,%f24 ! (3_0) y = vis_fpadd32(x,dconst1);
+
+ fmuld %f42,%f14,%f44 ! (4_1) dtmp0 *= xx;
+ cmp %o7,%o4 ! (4_0) ax ? 0x39b89c55
+ bl,pn %icc,.update22 ! (4_0) if ( ax < 0x39b89c55 )
+ faddd DONE,%f28,%f28 ! (2_0) div += done;
+.cont22:
+ fmuld %f32,%f40,%f42 ! (0_0) dtmp0 = div0 * y0;
+ cmp %o7,%o5 ! (4_0) ax ? 0x4c700518
+ bg,pn %icc,.update23 ! (4_0) if ( ax > 0x4c700518 )
+ faddd %f4,K1,%f4 ! (5_1) dtmp0 += K1;
+.cont23:
+ fmuld %f10,%f26,%f10 ! (6_1) xx *= y0;
+ srl %g1,18,%o7 ! (5_1) ax >>= 18;
+ std %f28,[%fp+tmp0] ! (2_0) i = ((unsigned long long*)&div)[0];
+ fstod %f0,%f14 ! (4_0) x = (double)ftmp0;
+
+ fmuld %f34,%f38,%f34 ! (7_1) dtmp1 = div0 * y0;
+ and %o7,-8,%o7 ! (5_1) ax &= -8;
+ ldx [%fp+tmp1],%g1 ! (1_0) i = ((unsigned long long*)&div)[0];
+ fand %f24,DC2,%f26 ! (3_0) y = vis_fand(y,dconst2);
+
+ faddd %f48,%f44,%f12 ! (4_1) res += dtmp0;
+ mov %l6,%i0
+ ldd [%l0+%o1],%f48 ! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux);
+ bn,pn %icc,.exit
+
+ ldd [%o7+%l7],%f0 ! (5_1) res = *(double*)((char*)parr1 + ax);
+ nop
+ fmuld %f4,%f50,%f4 ! (5_1) dtmp0 *= x2;
+ fand %f30,DC3,%f24 ! (1_0) y0 = vis_fand(div,dconst3);
+
+ fmuld %f10,%f10,%f50 ! (6_1) x2 = xx * xx;
+ srlx %g1,43,%g1 ! (1_0) i >>= 43;
+ mov %l2,%o7
+ fsubd DTWO,%f42,%f44 ! (0_0) dtmp0 = dtwo - dtmp0;
+
+ and %g1,508,%l6 ! (1_0) i &= 508;
+ add %i2,stridex,%l2 ! px += stridex;
+ bn,pn %icc,.exit
+ fsubd DTWO,%f34,%f46 ! (7_1) dtmp1 = dtwo - dtmp1;
+
+ fmuld %f0,%f48,%f48 ! (5_1) res *= dtmp0;
+ add %o0,stridey,%g1 ! py += stridey;
+ ld [%i4+%l6],%f0 ! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
+ fdtos %f12,%f12 ! (4_1) ftmp0 = (float)res;
+
+ fmuld %f16,%f26,%f34 ! (3_0) div = x * y;
+ srl %o7,28,%o1 ! (6_1) ux >>= 28;
+ lda [%i2]0x82,%l6 ! (5_0) ux = ((int*)px)[0];
+ faddd %f4,K0,%f42 ! (5_1) dtmp0 += K0;
+
+ fmuld K2,%f50,%f4 ! (6_1) dtmp0 = K2 * x2;
+ and %o7,MASK_0x7fffffff,%o7 ! (6_1) ax = ux & 0x7fffffff;
+ st %f12,[%o0] ! (4_1) py[0] = ftmp0;
+ fsubd %f16,%f26,%f16 ! (3_0) xx = x - y;
+
+ fmuld %f38,%f46,%f26 ! (7_1) y0 *= dtmp1;
+ add %o7,MASK_0x100000,%o0 ! (6_1) ax += 0x00100000;
+ and %o1,-8,%o1 ! (6_1) ux &= -8;
+ fpsub32 %f0,%f24,%f38 ! (1_0) y0 = vis_fpsub32(dtmp0, y0);
+
+ fmuld %f40,%f44,%f40 ! (0_0) y0 *= dtmp0;
+ and %l6,MASK_0x7fffffff,%o7 ! (5_0) ax = ux & 0x7fffffff;
+ lda [%i2]0x82,%f0 ! (5_0) ftmp0 = *px;
+ fpadd32 %f14,DC1,%f24 ! (4_0) y = vis_fpadd32(x,dconst1);
+
+ fmuld %f42,%f36,%f44 ! (5_1) dtmp0 *= xx;
+ cmp %o7,%o4 ! (5_0) ax ? 0x39b89c55
+ bl,pn %icc,.update24 ! (5_0) if ( ax < 0x39b89c55 )
+ faddd DONE,%f34,%f34 ! (3_0) div += done;
+.cont24:
+ fmuld %f30,%f38,%f42 ! (1_0) dtmp0 = div0 * y0;
+ cmp %o7,%o5 ! (5_0) ax ? 0x4c700518
+ bg,pn %icc,.update25 ! (5_0) if ( ax > 0x4c700518 )
+ faddd %f4,K1,%f4 ! (6_1) dtmp0 += K1;
+.cont25:
+ fmuld %f8,%f26,%f8 ! (7_1) xx *= y0;
+ srl %o0,18,%o7 ! (6_1) ax >>= 18;
+ std %f34,[%fp+tmp1] ! (3_0) i = ((unsigned long long*)&div)[0];
+ fstod %f0,%f36 ! (5_0) x = (double)ftmp0;
+
+ fmuld %f32,%f40,%f32 ! (0_0) dtmp1 = div0 * y0;
+ and %o7,-8,%o7 ! (6_1) ax &= -8;
+ ldx [%fp+tmp0],%o0 ! (2_0) i = ((unsigned long long*)&div)[0];
+ fand %f24,DC2,%f26 ! (4_0) y = vis_fand(y,dconst2);
+
+ faddd %f48,%f44,%f12 ! (5_1) res += dtmp0;
+ mov %l6,%i2
+ ldd [%l0+%o1],%f48 ! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux);
+ bn,pn %icc,.exit
+
+ ldd [%o7+%l7],%f0 ! (6_1) res = *(double*)((char*)parr1 + ax);
+ nop
+ fmuld %f4,%f50,%f4 ! (6_1) dtmp0 *= x2;
+ fand %f28,DC3,%f24 ! (2_0) y0 = vis_fand(div,dconst3);
+
+ fmuld %f8,%f8,%f50 ! (7_1) x2 = xx * xx;
+ srlx %o0,43,%o0 ! (2_0) i >>= 43;
+ mov %g5,%o7
+ fsubd DTWO,%f42,%f44 ! (1_0) dtmp0 = dtwo - dtmp0;
+
+ and %o0,508,%l6 ! (2_0) i &= 508;
+ add %l2,stridex,%g5 ! px += stridex;
+ bn,pn %icc,.exit
+ fsubd DTWO,%f32,%f46 ! (0_0) dtmp1 = dtwo - dtmp1;
+
+ fmuld %f0,%f48,%f48 ! (6_1) res *= dtmp0;
+ add %g1,stridey,%o0 ! py += stridey;
+ ld [%i4+%l6],%f0 ! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
+ fdtos %f12,%f12 ! (5_1) ftmp0 = (float)res;
+
+ fmuld %f14,%f26,%f32 ! (4_0) div = x * y;
+ srl %o7,28,%o1 ! (7_1) ux >>= 28;
+ lda [%l2]0x82,%l6 ! (6_0) ux = ((int*)px)[0];
+ faddd %f4,K0,%f42 ! (6_1) dtmp0 += K0;
+
+ fmuld K2,%f50,%f4 ! (7_1) dtmp0 = K2 * x2;
+ and %o7,MASK_0x7fffffff,%o7 ! (7_1) ax = ux & 0x7fffffff;
+ st %f12,[%g1] ! (5_1) py[0] = ftmp0;
+ fsubd %f14,%f26,%f14 ! (4_0) xx = x - y;
+
+ fmuld %f40,%f46,%f26 ! (0_0) y0 *= dtmp1;
+ add %o7,MASK_0x100000,%g1 ! (7_1) ax += 0x00100000;
+ and %o1,-8,%o1 ! (7_1) ux &= -8;
+ fpsub32 %f0,%f24,%f40 ! (2_0) y0 = vis_fpsub32(dtmp0, y0);
+
+ fmuld %f38,%f44,%f38 ! (1_0) y0 *= dtmp0;
+ and %l6,MASK_0x7fffffff,%o7 ! (6_0) ax = ux & 0x7fffffff;
+ lda [%l2]0x82,%f0 ! (6_0) ftmp0 = *px;
+ fpadd32 %f36,DC1,%f24 ! (5_0) y = vis_fpadd32(x,dconst1);
+
+ fmuld %f42,%f10,%f44 ! (6_1) dtmp0 *= xx;
+ cmp %o7,%o4 ! (6_0) ax ? 0x39b89c55
+ bl,pn %icc,.update26 ! (6_0) if ( ax < 0x39b89c55 )
+ faddd DONE,%f32,%f32 ! (4_0) div += done;
+.cont26:
+ fmuld %f28,%f40,%f42 ! (2_0) dtmp0 = div0 * y0;
+ cmp %o7,%o5 ! (6_0) ax ? 0x4c700518
+ bg,pn %icc,.update27 ! (6_0) if ( ax > 0x4c700518 )
+ faddd %f4,K1,%f4 ! (7_1) dtmp0 += K1;
+.cont27:
+ fmuld %f22,%f26,%f22 ! (0_0) xx *= y0;
+ srl %g1,18,%o7 ! (7_1) ax >>= 18;
+ std %f32,[%fp+tmp0] ! (4_0) i = ((unsigned long long*)&div)[0];
+ fstod %f0,%f10 ! (6_0) x = (double)ftmp0;
+
+ fmuld %f30,%f38,%f30 ! (1_0) dtmp1 = div0 * y0;
+ and %o7,-8,%o7 ! (7_1) ax &= -8;
+ ldx [%fp+tmp1],%g1 ! (3_0) i = ((unsigned long long*)&div)[0];
+ fand %f24,DC2,%f26 ! (5_0) y = vis_fand(y,dconst2);
+
+ faddd %f48,%f44,%f12 ! (6_1) res += dtmp0;
+ mov %l6,%l2
+ ldd [%l0+%o1],%f48 ! (7_1) dtmp0 = *(double*)((char*)sign_arr + ux);
+ bn,pn %icc,.exit
+
+ ldd [%o7+%l7],%f0 ! (7_1) res = *(double*)((char*)parr1 + ax);
+ nop
+ fmuld %f4,%f50,%f4 ! (7_1) dtmp0 *= x2;
+ fand %f34,DC3,%f24 ! (3_0) y0 = vis_fand(div,dconst3);
+
+ fmuld %f22,%f22,%f50 ! (0_0) x2 = xx * xx;
+ srlx %g1,43,%g1 ! (3_0) i >>= 43;
+ mov %i3,%o7
+ fsubd DTWO,%f42,%f44 ! (2_0) dtmp0 = dtwo - dtmp0;
+
+ and %g1,508,%l6 ! (3_0) i &= 508;
+ add %g5,stridex,%i3 ! px += stridex;
+ bn,pn %icc,.exit
+ fsubd DTWO,%f30,%f46 ! (1_0) dtmp1 = dtwo - dtmp1;
+
+ fmuld %f0,%f48,%f48 ! (7_1) res *= dtmp0;
+ add %o0,stridey,%g1 ! py += stridey;
+ ld [%i4+%l6],%f0 ! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
+ fdtos %f12,%f12 ! (6_1) ftmp0 = (float)res;
+
+ fmuld %f36,%f26,%f30 ! (5_0) div = x * y;
+ srl %o7,28,%o1 ! (0_0) ux >>= 28;
+ lda [%g5]0x82,%l6 ! (7_0) ux = ((int*)px)[0];
+ faddd %f4,K0,%f42 ! (7_1) dtmp0 += K0;
+
+ fmuld K2,%f50,%f4 ! (0_0) dtmp0 = K2 * x2;
+ and %o7,MASK_0x7fffffff,%o7 ! (0_0) ax = ux & 0x7fffffff;
+ st %f12,[%o0] ! (6_1) py[0] = ftmp0;
+ fsubd %f36,%f26,%f36 ! (5_0) xx = x - y;
+
+ fmuld %f38,%f46,%f26 ! (1_0) y0 *= dtmp1;
+ add %o7,MASK_0x100000,%o0 ! (0_0) ax += 0x00100000;
+ and %o1,-8,%o1 ! (0_0) ux &= -8;
+ fpsub32 %f0,%f24,%f38 ! (3_0) y0 = vis_fpsub32(dtmp0, y0);
+
+ fmuld %f40,%f44,%f40 ! (2_0) y0 *= dtmp0;
+ and %l6,MASK_0x7fffffff,%o7 ! (7_0) ax = ux & 0x7fffffff;
+ lda [%g5]0x82,%f0 ! (7_0) ftmp0 = *px;
+ fpadd32 %f10,DC1,%f24 ! (6_0) y = vis_fpadd32(x,dconst1);
+
+ fmuld %f42,%f8,%f44 ! (7_1) dtmp0 *= xx;
+ cmp %o7,%o4 ! (7_0) ax ? 0x39b89c55
+ bl,pn %icc,.update28 ! (7_0) if ( ax < 0x39b89c55 )
+ faddd DONE,%f30,%f30 ! (5_0) div += done;
+.cont28:
+ fmuld %f34,%f38,%f42 ! (3_0) dtmp0 = div0 * y0;
+ cmp %o7,%o5 ! (7_0) ax ? 0x4c700518
+ bg,pn %icc,.update29 ! (7_0) if ( ax > 0x4c700518 )
+ faddd %f4,K1,%f4 ! (0_0) dtmp0 += K1;
+.cont29:
+ fmuld %f20,%f26,%f20 ! (1_0) xx *= y0;
+ srl %o0,18,%o7 ! (0_0) ax >>= 18;
+ std %f30,[%fp+tmp1] ! (5_0) i = ((unsigned long long*)&div)[0];
+ fstod %f0,%f8 ! (7_0) x = (double)ftmp0;
+
+ fmuld %f28,%f40,%f28 ! (2_0) dtmp1 = div0 * y0;
+ and %o7,-8,%o7 ! (0_0) ux &= -8;
+ ldx [%fp+tmp0],%o0 ! (4_0) i = ((unsigned long long*)&div)[0];
+ fand %f24,DC2,%f26 ! (6_0) y = vis_fand(y,dconst2);
+
+ faddd %f48,%f44,%f12 ! (7_1) res += dtmp0;
+ subcc counter,8,counter
+ ldd [%l0+%o1],%f48 ! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux);
+ bn,pn %icc,.exit
+
+ fmuld %f4,%f50,%f4 ! (0_0) dtmp0 *= x2;
+ mov %l6,%g5
+ ldd [%o7+%l7],%f0 ! (0_0) res = *(double*)((char*)parr1 + ax);
+ fand %f32,DC3,%f24 ! (4_0) y0 = vis_fand(div,dconst3);
+
+ fmuld %f20,%f20,%f50 ! (1_0) x2 = xx * xx;
+ srlx %o0,43,%l6 ! (4_0) i >>= 43;
+ mov %l5,%o7
+ fsubd DTWO,%f42,%f44 ! (3_0) dtmp0 = dtwo - dtmp0;
+
+ add %g1,stridey,%o0 ! py += stridey;
+ and %l6,508,%l6 ! (4_0) i &= 508;
+ bn,pn %icc,.exit
+ fsubd DTWO,%f28,%f46 ! (2_0) dtmp1 = dtwo - dtmp1;
+
+ fmuld %f0,%f48,%f48 ! (0_0) res *= dtmp0;
+ ld [%i4+%l6],%f0 ! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
+ add %i3,stridex,%l5 ! px += stridex;
+ fdtos %f12,%f12 ! (7_1) ftmp0 = (float)res;
+
+ lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0];
+ fmuld %f10,%f26,%f28 ! (6_0) div = x * y;
+ bpos,pt %icc,.main_loop
+ faddd %f4,K0,%f42 ! (0_0) dtmp0 += K0;
+
+ srl %o7,28,%l5 ! (1_0) ux >>= 28;
+ st %f12,[%g1] ! (7_1) py[0] = ftmp0;
+
+.tail:
+ addcc counter,7,counter
+ bneg,pn %icc,.begin
+ or %g0,%o0,%o1
+
+ fsubd %f10,%f26,%f10 ! (6_1) xx = x - y;
+ and %o7,MASK_0x7fffffff,%g1 ! (1_1) ax = ux & 0x7fffffff;
+ fmuld K2,%f50,%f4 ! (1_1) dtmp0 = K2 * x2;
+
+ fmuld %f40,%f46,%f26 ! (2_1) y0 *= dtmp1;
+ add %g1,MASK_0x100000,%g1 ! (1_1) ax += 0x00100000;
+ and %l5,-8,%l5 ! (1_1) ux &= -8;
+ fpsub32 %f0,%f24,%f40 ! (4_1) y0 = vis_fpsub32(dtmp0, y0);
+
+ fmuld %f38,%f44,%f38 ! (3_1) y0 *= dtmp0;
+
+ fmuld %f42,%f22,%f44 ! (0_1) dtmp0 *= xx;
+ faddd DONE,%f28,%f28 ! (6_1) div += done;
+
+ fmuld %f32,%f40,%f42 ! (4_1) dtmp0 = div0 * y0;
+ faddd %f4,K1,%f4 ! (1_1) dtmp0 += K1;
+
+ fmuld %f18,%f26,%f18 ! (2_1) xx *= y0;
+ srl %g1,18,%o7 ! (1_1) ax >>= 18;
+ std %f28,[%fp+tmp0] ! (6_1) i = ((unsigned long long*)&div)[0];
+
+ fmuld %f34,%f38,%f34 ! (3_1) dtmp1 = div0 * y0;
+ and %o7,-8,%o7 ! (1_1) ax &= -8;
+ ldx [%fp+tmp1],%g1 ! (5_1) i = ((unsigned long long*)&div)[0];
+
+ faddd %f48,%f44,%f12 ! (0_1) res += dtmp0;
+ add %o7,%l7,%o7 ! (1_1) (char*)parr1 + ax;
+ ldd [%l0+%l5],%f48 ! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux);
+
+ fmuld %f4,%f50,%f4 ! (1_1) dtmp0 *= x2;
+ fand %f30,DC3,%f24 ! (5_1) y0 = vis_fand(div,dconst3);
+ ldd [%o7],%f0 ! (1_1) res = *(double*)((char*)parr1 + ax);
+
+ fmuld %f18,%f18,%f50 ! (2_1) x2 = xx * xx;
+ fsubd DTWO,%f42,%f44 ! (4_1) dtmp0 = dtwo - dtmp0;
+ srlx %g1,43,%g1 ! (5_1) i >>= 43;
+
+ and %g1,508,%l6 ! (5_1) i &= 508;
+ mov %l4,%o7
+ fsubd DTWO,%f34,%f46 ! (3_1) dtmp1 = dtwo - dtmp1;
+
+ fmuld %f0,%f48,%f48 ! (1_1) res *= dtmp0;
+ add %o0,stridey,%g1 ! py += stridey;
+ ld [%i4+%l6],%f0 ! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
+ fdtos %f12,%f12 ! (0_1) ftmp0 = (float)res;
+
+ srl %o7,28,%l4 ! (2_1) ux >>= 28;
+ st %f12,[%o0] ! (0_1) py[0] = ftmp0;
+ faddd %f4,K0,%f42 ! (1_1) dtmp0 += K0;
+
+ subcc counter,1,counter
+ bneg,pn %icc,.begin
+ or %g0,%g1,%o1
+
+ fmuld K2,%f50,%f4 ! (2_1) dtmp0 = K2 * x2;
+ and %o7,MASK_0x7fffffff,%o0 ! (2_1) ax = ux & 0x7fffffff;
+
+ fmuld %f38,%f46,%f26 ! (3_1) y0 *= dtmp1;
+ add %o0,MASK_0x100000,%o0 ! (2_1) ax += 0x00100000;
+ and %l4,-8,%l4 ! (2_1) ux &= -8;
+ fpsub32 %f0,%f24,%f38 ! (5_1) y0 = vis_fpsub32(dtmp0, y0);
+
+ fmuld %f40,%f44,%f40 ! (4_1) y0 *= dtmp0;
+
+ fmuld %f42,%f20,%f44 ! (1_1) dtmp0 *= xx;
+
+ fmuld %f30,%f38,%f42 ! (5_1) dtmp0 = div0 * y0;
+ faddd %f4,K1,%f4 ! (2_1) dtmp0 += K1;
+
+ fmuld %f16,%f26,%f16 ! (3_1) xx *= y0;
+ srl %o0,18,%o7 ! (2_1) ax >>= 18;
+
+ fmuld %f32,%f40,%f32 ! (4_1) dtmp1 = div0 * y0;
+ and %o7,-8,%o7 ! (2_1) ax &= -8;
+ ldx [%fp+tmp0],%o0 ! (6_1) i = ((unsigned long long*)&div)[0];
+
+ faddd %f48,%f44,%f12 ! (1_1) res += dtmp0;
+ add %o7,%l7,%o7 ! (2_1) (char*)parr1 + ax;
+ ldd [%l0+%l4],%f48 ! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux);
+
+ fmuld %f4,%f50,%f4 ! (2_1) dtmp0 *= x2;
+ fand %f28,DC3,%f24 ! (6_1) y0 = vis_fand(div,dconst3);
+ ldd [%o7],%f0 ! (2_1) res = *(double*)((char*)parr1 + ax);
+
+ fmuld %f16,%f16,%f50 ! (3_1) x2 = xx * xx;
+ fsubd DTWO,%f42,%f44 ! (5_1) dtmp0 = dtwo - dtmp0;
+ srlx %o0,43,%o0 ! (6_1) i >>= 43;
+
+ and %o0,508,%l6 ! (6_1) i &= 508;
+ mov %l3,%o7
+ fsubd DTWO,%f32,%f46 ! (4_1) dtmp1 = dtwo - dtmp1;
+
+ fmuld %f0,%f48,%f48 ! (2_1) res *= dtmp0;
+ add %g1,stridey,%o0 ! py += stridey;
+ ld [%i4+%l6],%f0 ! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
+ fdtos %f12,%f12 ! (1_1) ftmp0 = (float)res;
+
+ srl %o7,28,%l3 ! (3_1) ux >>= 28;
+ st %f12,[%g1] ! (1_1) py[0] = ftmp0;
+ faddd %f4,K0,%f42 ! (2_1) dtmp0 += K0;
+
+ subcc counter,1,counter
+ bneg,pn %icc,.begin
+ or %g0,%o0,%o1
+
+ fmuld K2,%f50,%f4 ! (3_1) dtmp0 = K2 * x2;
+ and %o7,MASK_0x7fffffff,%g1 ! (3_1) ax = ux & 0x7fffffff;
+
+ fmuld %f40,%f46,%f26 ! (4_1) y0 *= dtmp1;
+ add %g1,MASK_0x100000,%g1 ! (3_1) ax += 0x00100000;
+ and %l3,-8,%l3 ! (3_1) ux &= -8;
+ fpsub32 %f0,%f24,%f40 ! (6_1) y0 = vis_fpsub32(dtmp0, y0);
+
+ fmuld %f38,%f44,%f38 ! (5_1) y0 *= dtmp0;
+
+ fmuld %f42,%f18,%f44 ! (2_1) dtmp0 *= xx;
+
+ fmuld %f28,%f40,%f42 ! (6_1) dtmp0 = div0 * y0;
+ faddd %f4,K1,%f4 ! (3_1) dtmp0 += K1;
+
+ fmuld %f14,%f26,%f14 ! (4_1) xx *= y0;
+ srl %g1,18,%o7 ! (3_1) ax >>= 18;
+
+ fmuld %f30,%f38,%f30 ! (5_1) dtmp1 = div0 * y0;
+ and %o7,-8,%o7 ! (3_1) ax &= -8;
+
+ faddd %f48,%f44,%f12 ! (2_1) res += dtmp0;
+ add %o7,%l7,%o7 ! (3_1) (char*)parr1 + ax;
+ ldd [%l0+%l3],%f48 ! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux);
+
+ fmuld %f4,%f50,%f4 ! (3_1) dtmp0 *= x2;
+ ldd [%o7],%f0 ! (3_1) res = *(double*)((char*)parr1 + ax)
+
+ fmuld %f14,%f14,%f50 ! (4_1) x2 = xx * xx;
+ fsubd DTWO,%f42,%f44 ! (6_1) dtmp0 = dtwo - dtmp0;
+
+ mov %i0,%o7
+ fsubd DTWO,%f30,%f46 ! (5_1) dtmp1 = dtwo - dtmp1;
+
+ fmuld %f0,%f48,%f48 ! (3_1) res *= dtmp0;
+ add %o0,stridey,%g1 ! py += stridey;
+ fdtos %f12,%f12 ! (2_1) ftmp0 = (float)res;
+
+ srl %o7,28,%i0 ! (4_1) ux >>= 28;
+ st %f12,[%o0] ! (2_1) py[0] = ftmp0;
+ faddd %f4,K0,%f42 ! (3_1) dtmp0 += K0;
+
+ subcc counter,1,counter
+ bneg,pn %icc,.begin
+ or %g0,%g1,%o1
+
+ fmuld K2,%f50,%f4 ! (4_1) dtmp0 = K2 * x2;
+ and %o7,MASK_0x7fffffff,%o0 ! (4_1) ax = ux & 0x7fffffff;
+
+ fmuld %f38,%f46,%f26 ! (5_1) y0 *= dtmp1;
+ add %o0,MASK_0x100000,%o0 ! (4_1) ax += 0x00100000;
+ and %i0,-8,%i0 ! (4_1) ux &= -8;
+
+ fmuld %f40,%f44,%f40 ! (6_1) y0 *= dtmp0;
+
+ fmuld %f42,%f16,%f44 ! (3_1) dtmp0 *= xx;
+
+ faddd %f4,K1,%f4 ! (4_1) dtmp0 += K1;
+
+ fmuld %f36,%f26,%f36 ! (5_1) xx *= y0;
+ srl %o0,18,%o7 ! (4_1) ax >>= 18;
+
+ fmuld %f28,%f40,%f28 ! (6_1) dtmp1 = div0 * y0;
+ and %o7,-8,%o7 ! (4_1) ax &= -8;
+
+ faddd %f48,%f44,%f12 ! (3_1) res += dtmp0;
+ add %o7,%l7,%o7 ! (4_1) (char*)parr1 + ax;
+ ldd [%l0+%i0],%f48 ! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux);
+
+ fmuld %f4,%f50,%f4 ! (4_1) dtmp0 *= x2;
+ ldd [%o7],%f0 ! (4_1) res = *(double*)((char*)parr1 + ax);
+
+ fmuld %f36,%f36,%f50 ! (5_1) x2 = xx * xx;
+
+ mov %i2,%o7
+ fsubd DTWO,%f28,%f46 ! (6_1) dtmp1 = dtwo - dtmp1;
+
+ fmuld %f0,%f48,%f48 ! (4_1) res *= dtmp0;
+ add %g1,stridey,%o0 ! py += stridey;
+ fdtos %f12,%f12 ! (3_1) ftmp0 = (float)res;
+
+ srl %o7,28,%i2 ! (5_1) ux >>= 28;
+ st %f12,[%g1] ! (3_1) py[0] = ftmp0;
+ faddd %f4,K0,%f42 ! (4_1) dtmp0 += K0;
+
+ subcc counter,1,counter
+ bneg,pn %icc,.begin
+ or %g0,%o0,%o1
+
+ fmuld K2,%f50,%f4 ! (5_1) dtmp0 = K2 * x2;
+ and %o7,MASK_0x7fffffff,%g1 ! (5_1) ax = ux & 0x7fffffff;
+
+ fmuld %f40,%f46,%f26 ! (6_1) y0 *= dtmp1;
+ add %g1,MASK_0x100000,%g1 ! (5_1) ax += 0x00100000;
+ and %i2,-8,%i2 ! (5_1) ux &= -8;
+
+ fmuld %f42,%f14,%f44 ! (4_1) dtmp0 *= xx;
+
+ faddd %f4,K1,%f4 ! (5_1) dtmp0 += K1;
+
+ fmuld %f10,%f26,%f10 ! (6_1) xx *= y0;
+ srl %g1,18,%o7 ! (5_1) ax >>= 18;
+
+ and %o7,-8,%o7 ! (5_1) ax &= -8;
+
+ faddd %f48,%f44,%f12 ! (4_1) res += dtmp0;
+ add %o7,%l7,%o7 ! (5_1) (char*)parr1 + ax;
+ ldd [%l0+%i2],%f48 ! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux);
+
+ fmuld %f4,%f50,%f4 ! (5_1) dtmp0 *= x2;
+ ldd [%o7],%f0 ! (5_1) res = *(double*)((char*)parr1 + ax);
+
+ fmuld %f10,%f10,%f50 ! (6_1) x2 = xx * xx;
+
+ mov %l2,%o7
+
+ fmuld %f0,%f48,%f48 ! (5_1) res *= dtmp0;
+ add %o0,stridey,%g1 ! py += stridey;
+ fdtos %f12,%f12 ! (4_1) ftmp0 = (float)res;
+
+ srl %o7,28,%l2 ! (6_1) ux >>= 28;
+ st %f12,[%o0] ! (4_1) py[0] = ftmp0;
+ faddd %f4,K0,%f42 ! (5_1) dtmp0 += K0;
+
+ subcc counter,1,counter
+ bneg,pn %icc,.begin
+ or %g0,%g1,%o1
+
+ fmuld K2,%f50,%f4 ! (6_1) dtmp0 = K2 * x2;
+ and %o7,MASK_0x7fffffff,%o0 ! (6_1) ax = ux & 0x7fffffff;
+
+ add %o0,MASK_0x100000,%o0 ! (6_1) ax += 0x00100000;
+ and %l2,-8,%l2 ! (6_1) ux &= -8;
+
+ fmuld %f42,%f36,%f44 ! (5_1) dtmp0 *= xx;
+
+ faddd %f4,K1,%f4 ! (6_1) dtmp0 += K1;
+
+ srl %o0,18,%o7 ! (6_1) ax >>= 18;
+
+ and %o7,-8,%o7 ! (6_1) ax &= -8;
+
+ faddd %f48,%f44,%f12 ! (5_1) res += dtmp0;
+ add %o7,%l7,%o7 ! (6_1) (char*)parr1 + ax;
+ ldd [%l0+%l2],%f48 ! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux);
+
+ fmuld %f4,%f50,%f4 ! (6_1) dtmp0 *= x2;
+ ldd [%o7],%f0 ! (6_1) res = *(double*)((char*)parr1 + ax);
+
+ fmuld %f0,%f48,%f48 ! (6_1) res *= dtmp0;
+ add %g1,stridey,%o0 ! py += stridey;
+ fdtos %f12,%f12 ! (5_1) ftmp0 = (float)res;
+
+ st %f12,[%g1] ! (5_1) py[0] = ftmp0;
+ faddd %f4,K0,%f42 ! (6_1) dtmp0 += K0;
+
+ subcc counter,1,counter
+ bneg,pn %icc,.begin
+ or %g0,%o0,%o1
+
+ fmuld %f42,%f10,%f44 ! (6_1) dtmp0 *= xx;
+
+ faddd %f48,%f44,%f12 ! (6_1) res += dtmp0;
+
+ add %o0,stridey,%g1 ! py += stridey;
+ fdtos %f12,%f12 ! (6_1) ftmp0 = (float)res;
+
+ st %f12,[%o0] ! (6_1) py[0] = ftmp0;
+
+ ba .begin
+ or %g0,%g1,%o1 ! py += stridey;
+
+.exit:
+ ret
+ restore %g0,%g0,%g0
+
+ .align 16
+.spec0:
+ add %i3,stridex,%i3 ! px += stridex;
+ sub counter,1,counter
+ st %l6,[%o1] ! *(int*)py = ux;
+
+ ba .begin1
+ add %o1,stridey,%o1 ! py += stridey;
+
+ .align 16
+.spec1:
+ sethi %hi(0x7f800000),%l3
+ sethi %hi(0x3fc90c00),%l4 ! pi_2
+
+ sethi %hi(0x80000000),%o0
+ add %l4,0x3db,%l4 ! pi_2
+
+ cmp %l5,%l3 ! if ( ax > 0x7f800000 )
+ bg,a,pn %icc,1f
+ fabss %f0,%f0 ! fpx = fabsf(*px);
+
+ and %l6,%o0,%l6 ! sign = ux & 0x80000000;
+
+ or %l6,%l4,%l6 ! sign |= pi_2;
+
+ add %i3,stridex,%i3 ! px += stridex;
+ sub counter,1,counter
+ st %l6,[%o1] ! *(int*)py = sign;
+
+ ba .begin1
+ add %o1,stridey,%o1 ! py += stridey;
+
+1:
+ fmuls %f0,%f0,%f0 ! fpx *= fpx;
+
+ add %i3,stridex,%i3 ! px += stridex
+ sub counter,1,counter
+ st %f0,[%o1] ! *py = fpx;
+
+ ba .begin1
+ add %o1,stridey,%o1 ! py += stridey;
+
+ .align 16
+.update0:
+ cmp counter,1
+ fzeros %f0
+ ble,a .cont0
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l5,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont0
+ or %g0,1,counter
+
+ .align 16
+.update1:
+ cmp counter,1
+ fzeros %f0
+ ble,a .cont1
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l5,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont1
+ or %g0,1,counter
+
+ .align 16
+.update2:
+ cmp counter,2
+ fzeros %f0
+ ble,a .cont2
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l4,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont2
+ or %g0,2,counter
+
+ .align 16
+.update3:
+ cmp counter,2
+ fzeros %f0
+ ble,a .cont3
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l4,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont3
+ or %g0,2,counter
+
+ .align 16
+.update4:
+ cmp counter,3
+ fzeros %f0
+ ble,a .cont4
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l3,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont4
+ or %g0,3,counter
+
+ .align 16
+.update5:
+ cmp counter,3
+ fzeros %f0
+ ble,a .cont5
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l3,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont5
+ or %g0,3,counter
+
+ .align 16
+.update6:
+ cmp counter,4
+ fzeros %f0
+ ble,a .cont6
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i0,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont6
+ or %g0,4,counter
+
+ .align 16
+.update7:
+ cmp counter,4
+ fzeros %f0
+ ble,a .cont7
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i0,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont7
+ or %g0,4,counter
+
+ .align 16
+.update8:
+ cmp counter,5
+ fzeros %f0
+ ble,a .cont8
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont8
+ or %g0,5,counter
+
+ .align 16
+.update9:
+ cmp counter,5
+ fzeros %f0
+ ble,a .cont9
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont9
+ or %g0,5,counter
+
+ .align 16
+.update10:
+ cmp counter,6
+ fzeros %f0
+ ble,a .cont10
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l2,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont10
+ or %g0,6,counter
+
+ .align 16
+.update11:
+ cmp counter,6
+ fzeros %f0
+ ble,a .cont11
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l2,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont11
+ or %g0,6,counter
+
+ .align 16
+.update12:
+ cmp counter,7
+ fzeros %f0
+ ble,a .cont12
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,7,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %g5,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont12
+ or %g0,7,counter
+
+ .align 16
+.update13:
+ cmp counter,7
+ fzeros %f0
+ ble,a .cont13
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,7,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %g5,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont13
+ or %g0,7,counter
+
+ .align 16
+.update14:
+ cmp counter,0
+ fzeros %f0
+ ble,a .cont14
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,0,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i3,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont14
+ or %g0,0,counter
+
+ .align 16
+.update15:
+ cmp counter,0
+ fzeros %f0
+ ble,a .cont15
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,0,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i3,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont15
+ or %g0,0,counter
+
+ .align 16
+.update16:
+ cmp counter,1
+ fzeros %f0
+ ble,a .cont16
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l5,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont16
+ or %g0,1,counter
+
+ .align 16
+.update17:
+ cmp counter,1
+ fzeros %f0
+ ble,a .cont17
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l5,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont17
+ or %g0,1,counter
+
+ .align 16
+.update18:
+ cmp counter,2
+ fzeros %f0
+ ble,a .cont18
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l4,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont18
+ or %g0,2,counter
+
+ .align 16
+.update19:
+ cmp counter,2
+ fzeros %f0
+ ble,a .cont19
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l4,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont19
+ or %g0,2,counter
+
+ .align 16
+.update20:
+ cmp counter,3
+ fzeros %f0
+ ble,a .cont20
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l3,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont20
+ or %g0,3,counter
+
+ .align 16
+.update21:
+ cmp counter,3
+ fzeros %f0
+ ble,a .cont21
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l3,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont21
+ or %g0,3,counter
+
+ .align 16
+.update22:
+ cmp counter,4
+ fzeros %f0
+ ble,a .cont22
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i0,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont22
+ or %g0,4,counter
+
+ .align 16
+.update23:
+ cmp counter,4
+ fzeros %f0
+ ble,a .cont23
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i0,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont23
+ or %g0,4,counter
+
+ .align 16
+.update24:
+ cmp counter,5
+ fzeros %f0
+ ble,a .cont24
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont24
+ or %g0,5,counter
+
+ .align 16
+.update25:
+ cmp counter,5
+ fzeros %f0
+ ble,a .cont25
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont25
+ or %g0,5,counter
+
+ .align 16
+.update26:
+ cmp counter,6
+ fzeros %f0
+ ble,a .cont26
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l2,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont26
+ or %g0,6,counter
+
+ .align 16
+.update27:
+ cmp counter,6
+ fzeros %f0
+ ble,a .cont27
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l2,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont27
+ or %g0,6,counter
+
+ .align 16
+.update28:
+ cmp counter,7
+ fzeros %f0
+ ble,a .cont28
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,7,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %g5,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont28
+ or %g0,7,counter
+
+ .align 16
+.update29:
+ cmp counter,7
+ fzeros %f0
+ ble,a .cont29
+ sethi %hi(0x3fffffff),%l6
+
+ sub counter,7,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %g5,[%fp+tmp_px]
+ sethi %hi(0x3fffffff),%l6
+ ba .cont29
+ or %g0,7,counter
+
+ SET_SIZE(__vatanf)
+
diff --git a/usr/src/libm/src/mvec/vis/__vcos.S b/usr/src/libm/src/mvec/vis/__vcos.S
new file mode 100644
index 0000000..4cfee05
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vcos.S
@@ -0,0 +1,3078 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vcos.S 1.8 06/01/23 SMI"
+
+ .file "__vcos.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 64
+constants:
+ .word 0x3ec718e3,0xa6972785
+ .word 0x3ef9fd39,0x94293940
+ .word 0xbf2a019f,0x75ee4be1
+ .word 0xbf56c16b,0xba552569
+ .word 0x3f811111,0x1108c703
+ .word 0x3fa55555,0x554f5b35
+ .word 0xbfc55555,0x555554d0
+ .word 0xbfdfffff,0xffffff85
+ .word 0x3ff00000,0x00000000
+ .word 0xbfc55555,0x5551fc28
+ .word 0x3f811107,0x62eacc9d
+ .word 0xbfdfffff,0xffff6328
+ .word 0x3fa55551,0x5f7acf0c
+ .word 0x3fe45f30,0x6dc9c883
+ .word 0x43380000,0x00000000
+ .word 0x3ff921fb,0x54400000
+ .word 0x3dd0b461,0x1a600000
+ .word 0x3ba3198a,0x2e000000
+ .word 0x397b839a,0x252049c1
+ .word 0x80000000,0x00004000
+ .word 0xffff8000,0x00000000 ! N.B.: low-order words used
+ .word 0x3fc90000,0x80000000 ! for sign bit hacking; see
+ .word 0x3fc40000,0x00000000 ! references to "thresh" below
+
+#define p4 0x0
+#define q4 0x08
+#define p3 0x10
+#define q3 0x18
+#define p2 0x20
+#define q2 0x28
+#define p1 0x30
+#define q1 0x38
+#define one 0x40
+#define pp1 0x48
+#define pp2 0x50
+#define qq1 0x58
+#define qq2 0x60
+#define invpio2 0x68
+#define round 0x70
+#define pio2_1 0x78
+#define pio2_2 0x80
+#define pio2_3 0x88
+#define pio2_3t 0x90
+#define f30val 0x98
+#define mask 0xa0
+#define thresh 0xa8
+
+! local storage indices
+
+#define xsave STACK_BIAS-0x8
+#define ysave STACK_BIAS-0x10
+#define nsave STACK_BIAS-0x14
+#define sxsave STACK_BIAS-0x18
+#define sysave STACK_BIAS-0x1c
+#define biguns STACK_BIAS-0x20
+#define n2 STACK_BIAS-0x24
+#define n1 STACK_BIAS-0x28
+#define n0 STACK_BIAS-0x2c
+#define x2_1 STACK_BIAS-0x40
+#define x1_1 STACK_BIAS-0x50
+#define x0_1 STACK_BIAS-0x60
+#define y2_0 STACK_BIAS-0x70
+#define y1_0 STACK_BIAS-0x80
+#define y0_0 STACK_BIAS-0x90
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x90
+
+!--------------------------------------------------------------------
+! define pipes for easier reading
+
+#define P0_f0 %f0
+#define P0_f1 %f1
+#define P0_f2 %f2
+#define P0_f3 %f3
+#define P0_f4 %f4
+#define P0_f5 %f5
+#define P0_f6 %f6
+#define P0_f7 %f7
+#define P0_f8 %f8
+#define P0_f9 %f9
+
+#define P1_f10 %f10
+#define P1_f11 %f11
+#define P1_f12 %f12
+#define P1_f13 %f13
+#define P1_f14 %f14
+#define P1_f15 %f15
+#define P1_f16 %f16
+#define P1_f17 %f17
+#define P1_f18 %f18
+#define P1_f19 %f19
+
+#define P2_f20 %f20
+#define P2_f21 %f21
+#define P2_f22 %f22
+#define P2_f23 %f23
+#define P2_f24 %f24
+#define P2_f25 %f25
+#define P2_f26 %f26
+#define P2_f27 %f27
+#define P2_f28 %f28
+#define P2_f29 %f29
+
+! define __vlibm_TBL_sincos_hi & lo for easy reading
+
+#define SC_HI %l3
+#define SC_LO %l4
+
+! define constants for easy reading
+
+#define C_q1 %f46
+#define C_q2 %f48
+#define C_q3 %f50
+#define C_q4 %f52
+
+! one ( 1 ) uno eins echi un
+#define C_ONE %f54
+#define C_ONE_LO %f55
+
+! masks
+#define MSK_SIGN %i5
+#define MSK_BIT31 %f30
+#define MSK_BIT13 %f31
+#define MSK_BITSHI17 %f44
+
+
+! constants for pp and qq
+#define C_pp1 %f56
+#define C_pp2 %f58
+#define C_qq1 %f60
+#define C_qq2 %f62
+
+! sign mask
+#define C_signM %i5
+
+#define LIM_l5 %l5
+#define LIM_l6 %l6
+! when in pri range, using value as transition from poly to table.
+! for Medium range,change use of %l6 and use to keep track of biguns.
+#define LIM_l7 %l7
+
+!--------------------------------------------------------------------
+
+
+ ENTRY(__vcos)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(g5)
+ PIC_SET(g5,__vlibm_TBL_sincos_hi,l3)
+ PIC_SET(g5,__vlibm_TBL_sincos_lo,l4)
+ PIC_SET(g5,constants,o0)
+ mov %o0,%g1
+ wr %g0,0x82,%asi ! set %asi for non-faulting loads
+
+! ========== primary range ==========
+
+! register use
+
+! i0 n
+! i1 x
+! i2 stridex
+! i3 y
+! i4 stridey
+! i5 0x80000000
+
+! l0 hx0
+! l1 hx1
+! l2 hx2
+! l3 __vlibm_TBL_sincos_hi
+! l4 __vlibm_TBL_sincos_lo
+! l5 0x3fc40000
+! l6 0x3e400000
+! l7 0x3fe921fb
+
+! the following are 64-bit registers in both V8+ and V9
+
+! g1 scratch
+! g5
+
+! o0 py0
+! o1 py1
+! o2 py2
+! o3 oy0
+! o4 oy1
+! o5 oy2
+! o7 scratch
+
+! f0 x0
+! f2
+! f4
+! f6
+! f8 scratch for table base
+! f9 signbit0
+! f10 x1
+! f12
+! f14
+! f16
+! f18 scratch for table base
+! f19 signbit1
+! f20 x2
+! f22
+! f24
+! f26
+! f28 scratch for table base
+! f29 signbit2
+! f30 0x80000000
+! f31 0x4000
+! f32
+! f34
+! f36
+! f38
+! f40
+! f42
+! f44 0xffff800000000000
+! f46 p1
+! f48 p2
+! f50 p3
+! f52 p4
+! f54 one
+! f56 pp1
+! f58 pp2
+! f60 qq1
+! f62 qq2
+
+#ifdef __sparcv9
+ stx %i1,[%fp+xsave] ! save arguments
+ stx %i3,[%fp+ysave]
+#else
+ st %i1,[%fp+xsave] ! save arguments
+ st %i3,[%fp+ysave]
+#endif
+
+ st %i0,[%fp+nsave]
+ st %i2,[%fp+sxsave]
+ st %i4,[%fp+sysave]
+ sethi %hi(0x80000000),MSK_SIGN ! load/set up constants
+ sethi %hi(0x3fc40000),LIM_l5
+ sethi %hi(0x3e400000),LIM_l6
+ sethi %hi(0x3fe921fb),LIM_l7
+ or LIM_l7,%lo(0x3fe921fb),LIM_l7
+ ldd [%g1+f30val],MSK_BIT31
+ ldd [%g1+mask],MSK_BITSHI17
+ ldd [%g1+q1],C_q1
+ ldd [%g1+q2],C_q2
+ ldd [%g1+q3],C_q3
+ ldd [%g1+q4],C_q4
+ ldd [%g1+one],C_ONE
+ ldd [%g1+pp1],C_pp1
+ ldd [%g1+pp2],C_pp2
+ ldd [%g1+qq1],C_qq1
+ ldd [%g1+qq2],C_qq2
+ sll %i2,3,%i2 ! scale strides
+ sll %i4,3,%i4
+ add %fp,x0_1,%o3 ! precondition loop
+ add %fp,x0_1,%o4
+ add %fp,x0_1,%o5
+ ld [%i1],%l0 ! hx = *x
+ ld [%i1],P0_f0
+ ld [%i1+4],P0_f1
+ andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000
+ add %i1,%i2,%i1 ! x += stridex
+
+ ba,pt %icc,.loop0
+!delay slot
+ nop
+
+ .align 32
+.loop0:
+ lda [%i1]%asi,%l1 ! preload next argument
+ sub %l0,LIM_l6,%g1
+ sub LIM_l7,%l0,%o7
+ fands P0_f0,MSK_BIT31,P0_f9 ! save signbit
+
+ lda [%i1]%asi,P1_f10
+ orcc %o7,%g1,%g0
+ mov %i3,%o0 ! py0 = y
+ bl,pn %icc,.range0 ! if hx < 0x3e400000 or > 0x3fe921fb
+
+! delay slot
+ lda [%i1+4]%asi,P1_f11
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! y += stridey
+ ble,pn %icc,.endloop1
+
+! delay slot
+ andn %l1,MSK_SIGN,%l1
+ add %i1,%i2,%i1 ! x += stridex
+ fabsd P0_f0,P0_f0
+ fmuld C_ONE,C_ONE,C_ONE ! one*one; a nop for alignment only
+
+.loop1:
+ lda [%i1]%asi,%l2 ! preload next argument
+ sub %l1,LIM_l6,%g1
+ sub LIM_l7,%l1,%o7
+ fands P1_f10,MSK_BIT31,P1_f19 ! save signbit
+
+ lda [%i1]%asi,P2_f20
+ orcc %o7,%g1,%g0
+ mov %i3,%o1 ! py1 = y
+ bl,pn %icc,.range1 ! if hx < 0x3e400000 or > 0x3fe921fb
+
+! delay slot
+ lda [%i1+4]%asi,P2_f21
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! y += stridey
+ ble,pn %icc,.endloop2
+
+! delay slot
+ andn %l2,MSK_SIGN,%l2
+ add %i1,%i2,%i1 ! x += stridex
+ fabsd P1_f10,P1_f10
+ fmuld C_ONE,C_ONE,C_ONE ! one*one; a nop for alignment only
+
+.loop2:
+ st P0_f6,[%o3]
+ sub %l2,LIM_l6,%g1
+ sub LIM_l7,%l2,%o7
+ fands P2_f20,MSK_BIT31,P2_f29 ! save signbit
+
+ st P0_f7,[%o3+4]
+ orcc %g1,%o7,%g0
+ mov %i3,%o2 ! py2 = y
+ bl,pn %icc,.range2 ! if hx < 0x3e400000 or > 0x3fe921fb
+
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+ cmp %l0,LIM_l5
+ fabsd P2_f20,P2_f20
+ bl,pn %icc,.case4
+
+! delay slot
+ st P1_f16,[%o4]
+ cmp %l1,LIM_l5
+ fpadd32s P0_f0,MSK_BIT13,P0_f8
+ bl,pn %icc,.case2
+
+! delay slot
+ st P1_f17,[%o4+4]
+ cmp %l2,LIM_l5
+ fpadd32s P1_f10,MSK_BIT13,P1_f18
+ bl,pn %icc,.case1
+
+! delay slot
+ st P2_f26,[%o5]
+ mov %o0,%o3
+ sethi %hi(0x3fc3c000),%o7
+ fpadd32s P2_f20,MSK_BIT13,P2_f28
+
+ st P2_f27,[%o5+4]
+ fand P0_f8,MSK_BITSHI17,P0_f2
+ mov %o1,%o4
+
+ fand P1_f18,MSK_BITSHI17,P1_f12
+ mov %o2,%o5
+ sub %l0,%o7,%l0
+
+ fand P2_f28,MSK_BITSHI17,P2_f22
+ sub %l1,%o7,%l1
+ sub %l2,%o7,%l2
+
+ fsubd P0_f0,P0_f2,P0_f0
+ srl %l0,10,%l0
+ add SC_HI,8,%g1;add SC_LO,8,%o7
+
+ fsubd P1_f10,P1_f12,P1_f10
+ srl %l1,10,%l1
+
+ fsubd P2_f20,P2_f22,P2_f20
+ srl %l2,10,%l2
+
+ fmuld P0_f0,P0_f0,P0_f2
+ andn %l0,0x1f,%l0
+
+ fmuld P1_f10,P1_f10,P1_f12
+ andn %l1,0x1f,%l1
+
+ fmuld P2_f20,P2_f20,P2_f22
+ andn %l2,0x1f,%l2
+
+ fmuld P0_f2,C_pp2,P0_f6
+ ldd [%g1+%l0],%f32
+
+ fmuld P1_f12,C_pp2,P1_f16
+ ldd [%g1+%l1],%f36
+
+ fmuld P2_f22,C_pp2,P2_f26
+ ldd [%g1+%l2],%f40
+
+ faddd P0_f6,C_pp1,P0_f6
+ fmuld P0_f2,C_qq2,P0_f4
+ ldd [SC_HI+%l0],%f34
+
+ faddd P1_f16,C_pp1,P1_f16
+ fmuld P1_f12,C_qq2,P1_f14
+ ldd [SC_HI+%l1],%f38
+
+ faddd P2_f26,C_pp1,P2_f26
+ fmuld P2_f22,C_qq2,P2_f24
+ ldd [SC_HI+%l2],%f42
+
+ fmuld P0_f2,P0_f6,P0_f6
+ faddd P0_f4,C_qq1,P0_f4
+
+ fmuld P1_f12,P1_f16,P1_f16
+ faddd P1_f14,C_qq1,P1_f14
+
+ fmuld P2_f22,P2_f26,P2_f26
+ faddd P2_f24,C_qq1,P2_f24
+
+ faddd P0_f6,C_ONE,P0_f6
+ fmuld P0_f2,P0_f4,P0_f4
+
+ faddd P1_f16,C_ONE,P1_f16
+ fmuld P1_f12,P1_f14,P1_f14
+
+ faddd P2_f26,C_ONE,P2_f26
+ fmuld P2_f22,P2_f24,P2_f24
+
+ fmuld P0_f0,P0_f6,P0_f6
+ ldd [%o7+%l0],P0_f2
+
+ fmuld P1_f10,P1_f16,P1_f16
+ ldd [%o7+%l1],P1_f12
+
+ fmuld P2_f20,P2_f26,P2_f26
+ ldd [%o7+%l2],P2_f22
+
+ fmuld P0_f4,%f32,P0_f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fmuld P1_f14,%f36,P1_f14
+ lda [%i1]%asi,P0_f0
+
+ fmuld P2_f24,%f40,P2_f24
+ lda [%i1+4]%asi,P0_f1
+
+ fmuld P0_f6,%f34,P0_f6
+ add %i1,%i2,%i1 ! x += stridex
+
+ fmuld P1_f16,%f38,P1_f16
+
+ fmuld P2_f26,%f42,P2_f26
+
+ fsubd P0_f6,P0_f4,P0_f6
+
+ fsubd P1_f16,P1_f14,P1_f16
+
+ fsubd P2_f26,P2_f24,P2_f26
+
+ fsubd P0_f2,P0_f6,P0_f6
+
+ fsubd P1_f12,P1_f16,P1_f16
+
+ fsubd P2_f22,P2_f26,P2_f26
+
+ faddd P0_f6,%f32,P0_f6
+
+ faddd P1_f16,%f36,P1_f16
+
+ faddd P2_f26,%f40,P2_f26
+ andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000
+
+ nop !!(vsin) fors P0_f6,P0_f9,P0_f6
+ addcc %i0,-1,%i0
+
+ nop !!(vsin) fors P1_f16,P1_f19,P1_f16
+ bg,pt %icc,.loop0
+
+! delay slot
+ nop !!(vsin) fors P2_f26,P2_f29,P2_f26
+
+ ba,pt %icc,.endloop0
+! delay slot
+ nop
+
+ .align 32
+.case1:
+ st P2_f27,[%o5+4]
+ sethi %hi(0x3fc3c000),%o7
+ fand P0_f8,MSK_BITSHI17,P0_f2
+
+ sub %l0,%o7,%l0
+ sub %l1,%o7,%l1
+ add SC_HI,8,%g1;add SC_LO,8,%o7
+ fand P1_f18,MSK_BITSHI17,P1_f12
+ fmuld P2_f20,P2_f20,P2_f22
+
+ fsubd P0_f0,P0_f2,P0_f0
+ srl %l0,10,%l0
+ mov %o0,%o3
+
+ fsubd P1_f10,P1_f12,P1_f10
+ srl %l1,10,%l1
+ mov %o1,%o4
+
+ fmuld P2_f22,C_q4,P2_f24
+ mov %o2,%o5
+
+ fmuld P0_f0,P0_f0,P0_f2
+ andn %l0,0x1f,%l0
+
+ fmuld P1_f10,P1_f10,P1_f12
+ andn %l1,0x1f,%l1
+
+ faddd P2_f24,C_q3,P2_f24
+
+ fmuld P0_f2,C_pp2,P0_f6
+ ldd [%g1+%l0],%f32
+
+ fmuld P1_f12,C_pp2,P1_f16
+ ldd [%g1+%l1],%f36
+
+ fmuld P2_f22,P2_f24,P2_f24
+
+ faddd P0_f6,C_pp1,P0_f6
+ fmuld P0_f2,C_qq2,P0_f4
+ ldd [SC_HI+%l0],%f34
+
+ faddd P1_f16,C_pp1,P1_f16
+ fmuld P1_f12,C_qq2,P1_f14
+ ldd [SC_HI+%l1],%f38
+
+ faddd P2_f24,C_q2,P2_f24
+
+ fmuld P0_f2,P0_f6,P0_f6
+ faddd P0_f4,C_qq1,P0_f4
+
+ fmuld P1_f12,P1_f16,P1_f16
+ faddd P1_f14,C_qq1,P1_f14
+
+ fmuld P2_f22,P2_f24,P2_f24
+
+ faddd P0_f6,C_ONE,P0_f6
+ fmuld P0_f2,P0_f4,P0_f4
+
+ faddd P1_f16,C_ONE,P1_f16
+ fmuld P1_f12,P1_f14,P1_f14
+
+ faddd P2_f24,C_q1,P2_f24
+
+ fmuld P0_f0,P0_f6,P0_f6
+ ldd [%o7+%l0],P0_f2
+
+ fmuld P1_f10,P1_f16,P1_f16
+ ldd [%o7+%l1],P1_f12
+
+ fmuld P0_f4,%f32,P0_f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fmuld P1_f14,%f36,P1_f14
+ lda [%i1]%asi,P0_f0
+
+ fmuld P0_f6,%f34,P0_f6
+ lda [%i1+4]%asi,P0_f1
+
+ fmuld P1_f16,%f38,P1_f16
+ add %i1,%i2,%i1 ! x += stridex
+
+ fmuld P2_f22,P2_f24,P2_f24
+
+ fsubd P0_f6,P0_f4,P0_f6
+
+ fsubd P1_f16,P1_f14,P1_f16
+
+ !!(vsin)fmuld P2_f20,P2_f24,P2_f24
+
+ fsubd P0_f2,P0_f6,P0_f6
+
+ fsubd P1_f12,P1_f16,P1_f16
+
+ faddd C_ONE,P2_f24,P2_f26 !!(vsin)faddd P2_f20,P2_f24,P2_f26
+
+ faddd P0_f6,%f32,P0_f6
+
+ faddd P1_f16,%f36,P1_f16
+ andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000
+
+ nop !!(vsin) fors P2_f26,P2_f29,P2_f26
+ addcc %i0,-1,%i0
+
+ nop !!(vsin) fors P0_f6,P0_f9,P0_f6
+ bg,pt %icc,.loop0
+
+! delay slot
+ nop !!(vsin) fors P1_f16,P1_f19,P1_f16
+
+ ba,pt %icc,.endloop0
+! delay slot
+ nop
+
+ .align 32
+.case2:
+ st P2_f26,[%o5]
+ cmp %l2,LIM_l5
+ fpadd32s P2_f20,MSK_BIT13,P2_f28
+ bl,pn %icc,.case3
+
+! delay slot
+ st P2_f27,[%o5+4]
+ sethi %hi(0x3fc3c000),%o7
+ fand P0_f8,MSK_BITSHI17,P0_f2
+
+ sub %l0,%o7,%l0
+ sub %l2,%o7,%l2
+ add SC_HI,8,%g1;add SC_LO,8,%o7
+ fand P2_f28,MSK_BITSHI17,P2_f22
+ fmuld P1_f10,P1_f10,P1_f12
+
+ fsubd P0_f0,P0_f2,P0_f0
+ srl %l0,10,%l0
+ mov %o0,%o3
+
+ fsubd P2_f20,P2_f22,P2_f20
+ srl %l2,10,%l2
+ mov %o2,%o5
+
+ fmuld P1_f12,C_q4,P1_f14
+ mov %o1,%o4
+
+ fmuld P0_f0,P0_f0,P0_f2
+ andn %l0,0x1f,%l0
+
+ fmuld P2_f20,P2_f20,P2_f22
+ andn %l2,0x1f,%l2
+
+ faddd P1_f14,C_q3,P1_f14
+
+ fmuld P0_f2,C_pp2,P0_f6
+ ldd [%g1+%l0],%f32
+
+ fmuld P2_f22,C_pp2,P2_f26
+ ldd [%g1+%l2],%f40
+
+ fmuld P1_f12,P1_f14,P1_f14
+
+ faddd P0_f6,C_pp1,P0_f6
+ fmuld P0_f2,C_qq2,P0_f4
+ ldd [SC_HI+%l0],%f34
+
+ faddd P2_f26,C_pp1,P2_f26
+ fmuld P2_f22,C_qq2,P2_f24
+ ldd [SC_HI+%l2],%f42
+
+ faddd P1_f14,C_q2,P1_f14
+
+ fmuld P0_f2,P0_f6,P0_f6
+ faddd P0_f4,C_qq1,P0_f4
+
+ fmuld P2_f22,P2_f26,P2_f26
+ faddd P2_f24,C_qq1,P2_f24
+
+ fmuld P1_f12,P1_f14,P1_f14
+
+ faddd P0_f6,C_ONE,P0_f6
+ fmuld P0_f2,P0_f4,P0_f4
+
+ faddd P2_f26,C_ONE,P2_f26
+ fmuld P2_f22,P2_f24,P2_f24
+
+ faddd P1_f14,C_q1,P1_f14
+
+ fmuld P0_f0,P0_f6,P0_f6
+ ldd [%o7+%l0],P0_f2
+
+ fmuld P2_f20,P2_f26,P2_f26
+ ldd [%o7+%l2],P2_f22
+
+ fmuld P0_f4,%f32,P0_f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fmuld P2_f24,%f40,P2_f24
+ lda [%i1]%asi,P0_f0
+
+ fmuld P0_f6,%f34,P0_f6
+ lda [%i1+4]%asi,P0_f1
+
+ fmuld P2_f26,%f42,P2_f26
+ add %i1,%i2,%i1 ! x += stridex
+
+ fmuld P1_f12,P1_f14,P1_f14
+
+ fsubd P0_f6,P0_f4,P0_f6
+
+ fsubd P2_f26,P2_f24,P2_f26
+
+ !!(vsin)fmuld P1_f10,P1_f14,P1_f14
+
+ fsubd P0_f2,P0_f6,P0_f6
+
+ fsubd P2_f22,P2_f26,P2_f26
+
+ faddd C_ONE,P1_f14,P1_f16 !!(vsin)faddd P1_f10,P1_f14,P1_f16
+
+ faddd P0_f6,%f32,P0_f6
+
+ faddd P2_f26,%f40,P2_f26
+ andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000
+
+ nop !!(vsin) fors P1_f16,P1_f19,P1_f16
+ addcc %i0,-1,%i0
+
+ nop !!(vsin) fors P0_f6,P0_f9,P0_f6
+ bg,pt %icc,.loop0
+
+! delay slot
+ nop !!(vsin) fors P2_f26,P2_f29,P2_f26
+
+ ba,pt %icc,.endloop0
+! delay slot
+ nop
+
+ .align 32
+.case3:
+ sethi %hi(0x3fc3c000),%o7
+ fand P0_f8,MSK_BITSHI17,P0_f2
+ fmuld P1_f10,P1_f10,P1_f12
+
+ sub %l0,%o7,%l0
+ add SC_HI,8,%g1;add SC_LO,8,%o7
+ fmuld P2_f20,P2_f20,P2_f22
+
+ fsubd P0_f0,P0_f2,P0_f0
+ srl %l0,10,%l0
+ mov %o0,%o3
+
+ fmuld P1_f12,C_q4,P1_f14
+ mov %o1,%o4
+
+ fmuld P2_f22,C_q4,P2_f24
+ mov %o2,%o5
+
+ fmuld P0_f0,P0_f0,P0_f2
+ andn %l0,0x1f,%l0
+
+ faddd P1_f14,C_q3,P1_f14
+
+ faddd P2_f24,C_q3,P2_f24
+
+ fmuld P0_f2,C_pp2,P0_f6
+ ldd [%g1+%l0],%f32
+
+ fmuld P1_f12,P1_f14,P1_f14
+
+ fmuld P2_f22,P2_f24,P2_f24
+
+ faddd P0_f6,C_pp1,P0_f6
+ fmuld P0_f2,C_qq2,P0_f4
+ ldd [SC_HI+%l0],%f34
+
+ faddd P1_f14,C_q2,P1_f14
+
+ faddd P2_f24,C_q2,P2_f24
+
+ fmuld P0_f2,P0_f6,P0_f6
+ faddd P0_f4,C_qq1,P0_f4
+
+ fmuld P1_f12,P1_f14,P1_f14
+
+ fmuld P2_f22,P2_f24,P2_f24
+
+ faddd P0_f6,C_ONE,P0_f6
+ fmuld P0_f2,P0_f4,P0_f4
+
+ faddd P1_f14,C_q1,P1_f14
+
+ faddd P2_f24,C_q1,P2_f24
+
+ fmuld P0_f0,P0_f6,P0_f6
+ ldd [%o7+%l0],P0_f2
+
+ fmuld P0_f4,%f32,P0_f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fmuld P1_f12,P1_f14,P1_f14
+ lda [%i1]%asi,P0_f0
+
+ fmuld P0_f6,%f34,P0_f6
+ lda [%i1+4]%asi,P0_f1
+
+ fmuld P2_f22,P2_f24,P2_f24
+ add %i1,%i2,%i1 ! x += stridex
+
+ !!(vsin)fmuld P1_f10,P1_f14,P1_f14
+
+ fsubd P0_f6,P0_f4,P0_f6
+
+ !!(vsin)fmuld P2_f20,P2_f24,P2_f24
+
+ faddd C_ONE,P1_f14,P1_f16 !!(vsin)faddd P1_f10,P1_f14,P1_f16
+
+ fsubd P0_f2,P0_f6,P0_f6
+
+ faddd C_ONE,P2_f24,P2_f26 !!(vsin)faddd P2_f20,P2_f24,P2_f26
+
+ nop !!(vsin) fors P1_f16,P1_f19,P1_f16
+ andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000
+
+ faddd P0_f6,%f32,P0_f6
+ addcc %i0,-1,%i0
+
+ nop !!(vsin) fors P2_f26,P2_f29,P2_f26
+ bg,pt %icc,.loop0
+
+! delay slot
+ nop !!(vsin) fors P0_f6,P0_f9,P0_f6
+
+ ba,pt %icc,.endloop0
+! delay slot
+ nop
+
+ .align 32
+.case4:
+ st P1_f17,[%o4+4]
+ cmp %l1,LIM_l5
+ fpadd32s P1_f10,MSK_BIT13,P1_f18
+ bl,pn %icc,.case6
+
+! delay slot
+ st P2_f26,[%o5]
+ cmp %l2,LIM_l5
+ fpadd32s P2_f20,MSK_BIT13,P2_f28
+ bl,pn %icc,.case5
+
+! delay slot
+ st P2_f27,[%o5+4]
+ sethi %hi(0x3fc3c000),%o7
+ fand P1_f18,MSK_BITSHI17,P1_f12
+
+ sub %l1,%o7,%l1
+ sub %l2,%o7,%l2
+ add SC_HI,8,%g1;add SC_LO,8,%o7
+ fand P2_f28,MSK_BITSHI17,P2_f22
+ fmuld P0_f0,P0_f0,P0_f2
+
+ fsubd P1_f10,P1_f12,P1_f10
+ srl %l1,10,%l1
+ mov %o1,%o4
+
+ fsubd P2_f20,P2_f22,P2_f20
+ srl %l2,10,%l2
+ mov %o2,%o5
+
+ fmovd P0_f0,P0_f6 !ID for processing
+ fmuld P0_f2,C_q4,P0_f4
+ mov %o0,%o3
+
+ fmuld P1_f10,P1_f10,P1_f12
+ andn %l1,0x1f,%l1
+
+ fmuld P2_f20,P2_f20,P2_f22
+ andn %l2,0x1f,%l2
+
+ faddd P0_f4,C_q3,P0_f4
+
+ fmuld P1_f12,C_pp2,P1_f16
+ ldd [%g1+%l1],%f36
+
+ fmuld P2_f22,C_pp2,P2_f26
+ ldd [%g1+%l2],%f40
+
+ fmuld P0_f2,P0_f4,P0_f4
+
+ faddd P1_f16,C_pp1,P1_f16
+ fmuld P1_f12,C_qq2,P1_f14
+ ldd [SC_HI+%l1],%f38
+
+ faddd P2_f26,C_pp1,P2_f26
+ fmuld P2_f22,C_qq2,P2_f24
+ ldd [SC_HI+%l2],%f42
+
+ faddd P0_f4,C_q2,P0_f4
+
+ fmuld P1_f12,P1_f16,P1_f16
+ faddd P1_f14,C_qq1,P1_f14
+
+ fmuld P2_f22,P2_f26,P2_f26
+ faddd P2_f24,C_qq1,P2_f24
+
+ fmuld P0_f2,P0_f4,P0_f4
+
+ faddd P1_f16,C_ONE,P1_f16
+ fmuld P1_f12,P1_f14,P1_f14
+
+ faddd P2_f26,C_ONE,P2_f26
+ fmuld P2_f22,P2_f24,P2_f24
+
+ faddd P0_f4,C_q1,P0_f4
+
+ fmuld P1_f10,P1_f16,P1_f16
+ ldd [%o7+%l1],P1_f12
+
+ fmuld P2_f20,P2_f26,P2_f26
+ ldd [%o7+%l2],P2_f22
+
+ fmuld P1_f14,%f36,P1_f14
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fmuld P2_f24,%f40,P2_f24
+ lda [%i1]%asi,P0_f0
+
+ fmuld P1_f16,%f38,P1_f16
+ lda [%i1+4]%asi,P0_f1
+
+ fmuld P2_f26,%f42,P2_f26
+ add %i1,%i2,%i1 ! x += stridex
+
+ fmuld P0_f2,P0_f4,P0_f4
+
+ fsubd P1_f16,P1_f14,P1_f16
+
+ fsubd P2_f26,P2_f24,P2_f26
+
+ !!(vsin)fmuld P0_f6,P0_f4,P0_f4
+
+ fsubd P1_f12,P1_f16,P1_f16
+
+ fsubd P2_f22,P2_f26,P2_f26
+
+ faddd C_ONE,P0_f4,P0_f6 !!(vsin)faddd P0_f6,P0_f4,P0_f6 ! faddd then spaces for processing
+
+ faddd P1_f16,%f36,P1_f16
+
+ faddd P2_f26,%f40,P2_f26
+ andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000
+
+ nop !!(vsin) fors P0_f6,P0_f9,P0_f6
+ addcc %i0,-1,%i0
+
+ nop !!(vsin) fors P1_f16,P1_f19,P1_f16
+ bg,pt %icc,.loop0
+
+! delay slot
+ nop !!(vsin) fors P2_f26,P2_f29,P2_f26
+
+ ba,pt %icc,.endloop0
+! delay slot
+ nop
+
+ .align 32
+.case5:
+ sethi %hi(0x3fc3c000),%o7
+ fand P1_f18,MSK_BITSHI17,P1_f12
+ fmuld P0_f0,P0_f0,P0_f2
+
+ sub %l1,%o7,%l1
+ add SC_HI,8,%g1;add SC_LO,8,%o7
+ fmuld P2_f20,P2_f20,P2_f22
+
+ fsubd P1_f10,P1_f12,P1_f10
+ srl %l1,10,%l1
+ mov %o1,%o4
+
+ fmovd P0_f0,P0_f6 !ID for processing
+ fmuld P0_f2,C_q4,P0_f4
+ mov %o0,%o3
+
+ fmuld P2_f22,C_q4,P2_f24
+ mov %o2,%o5
+
+ fmuld P1_f10,P1_f10,P1_f12
+ andn %l1,0x1f,%l1
+
+ faddd P0_f4,C_q3,P0_f4
+
+ faddd P2_f24,C_q3,P2_f24
+
+ fmuld P1_f12,C_pp2,P1_f16
+ ldd [%g1+%l1],%f36
+
+ fmuld P0_f2,P0_f4,P0_f4
+
+ fmuld P2_f22,P2_f24,P2_f24
+
+ faddd P1_f16,C_pp1,P1_f16
+ fmuld P1_f12,C_qq2,P1_f14
+ ldd [SC_HI+%l1],%f38
+
+ faddd P0_f4,C_q2,P0_f4
+
+ faddd P2_f24,C_q2,P2_f24
+
+ fmuld P1_f12,P1_f16,P1_f16
+ faddd P1_f14,C_qq1,P1_f14
+
+ fmuld P0_f2,P0_f4,P0_f4
+
+ fmuld P2_f22,P2_f24,P2_f24
+
+ faddd P1_f16,C_ONE,P1_f16
+ fmuld P1_f12,P1_f14,P1_f14
+
+ faddd P0_f4,C_q1,P0_f4
+
+ faddd P2_f24,C_q1,P2_f24
+
+ fmuld P1_f10,P1_f16,P1_f16
+ ldd [%o7+%l1],P1_f12
+
+ fmuld P1_f14,%f36,P1_f14
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fmuld P0_f2,P0_f4,P0_f4
+ lda [%i1]%asi,P0_f0
+
+ fmuld P1_f16,%f38,P1_f16
+ lda [%i1+4]%asi,P0_f1
+
+ fmuld P2_f22,P2_f24,P2_f24
+ add %i1,%i2,%i1 ! x += stridex
+
+ !!(vsin)fmuld P0_f6,P0_f4,P0_f4
+
+ fsubd P1_f16,P1_f14,P1_f16
+
+ !!(vsin)fmuld P2_f20,P2_f24,P2_f24
+
+ faddd C_ONE,P0_f4,P0_f6 !!(vsin)faddd P0_f6,P0_f4,P0_f6 ! faddd then spaces for processing
+
+ fsubd P1_f12,P1_f16,P1_f16
+
+ faddd C_ONE,P2_f24,P2_f26 !!(vsin)faddd P2_f20,P2_f24,P2_f26
+
+ nop !!(vsin) fors P0_f6,P0_f9,P0_f6
+ andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000
+
+ faddd P1_f16,%f36,P1_f16
+ addcc %i0,-1,%i0
+
+ nop !!(vsin) fors P2_f26,P2_f29,P2_f26
+ bg,pt %icc,.loop0
+
+! delay slot
+ nop !!(vsin) fors P1_f16,P1_f19,P1_f16
+
+ ba,pt %icc,.endloop0
+! delay slot
+ nop
+
+ .align 32
+.case6:
+ st P2_f27,[%o5+4]
+ cmp %l2,LIM_l5
+ fpadd32s P2_f20,MSK_BIT13,P2_f28
+ bl,pn %icc,.case7
+
+! delay slot
+ sethi %hi(0x3fc3c000),%o7
+ fand P2_f28,MSK_BITSHI17,P2_f22
+ fmuld P0_f0,P0_f0,P0_f2
+
+ sub %l2,%o7,%l2
+ add SC_HI,8,%g1;add SC_LO,8,%o7
+ fmuld P1_f10,P1_f10,P1_f12
+
+ fsubd P2_f20,P2_f22,P2_f20
+ srl %l2,10,%l2
+ mov %o2,%o5
+
+ fmovd P0_f0,P0_f6 !ID for processing
+ fmuld P0_f2,C_q4,P0_f4
+ mov %o0,%o3
+
+ fmuld P1_f12,C_q4,P1_f14
+ mov %o1,%o4
+
+ fmuld P2_f20,P2_f20,P2_f22
+ andn %l2,0x1f,%l2
+
+ faddd P0_f4,C_q3,P0_f4
+
+ faddd P1_f14,C_q3,P1_f14
+
+ fmuld P2_f22,C_pp2,P2_f26
+ ldd [%g1+%l2],%f40
+
+ fmuld P0_f2,P0_f4,P0_f4
+
+ fmuld P1_f12,P1_f14,P1_f14
+
+ faddd P2_f26,C_pp1,P2_f26
+ fmuld P2_f22,C_qq2,P2_f24
+ ldd [SC_HI+%l2],%f42
+
+ faddd P0_f4,C_q2,P0_f4
+
+ faddd P1_f14,C_q2,P1_f14
+
+ fmuld P2_f22,P2_f26,P2_f26
+ faddd P2_f24,C_qq1,P2_f24
+
+ fmuld P0_f2,P0_f4,P0_f4
+
+ fmuld P1_f12,P1_f14,P1_f14
+
+ faddd P2_f26,C_ONE,P2_f26
+ fmuld P2_f22,P2_f24,P2_f24
+
+ faddd P0_f4,C_q1,P0_f4
+
+ faddd P1_f14,C_q1,P1_f14
+
+ fmuld P2_f20,P2_f26,P2_f26
+ ldd [%o7+%l2],P2_f22
+
+ fmuld P2_f24,%f40,P2_f24
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fmuld P0_f2,P0_f4,P0_f4
+ lda [%i1]%asi,P0_f0
+
+ fmuld P2_f26,%f42,P2_f26
+ lda [%i1+4]%asi,P0_f1
+
+ fmuld P1_f12,P1_f14,P1_f14
+ add %i1,%i2,%i1 ! x += stridex
+
+ !!(vsin)fmuld P0_f6,P0_f4,P0_f4
+
+ fsubd P2_f26,P2_f24,P2_f26
+
+ !!(vsin)fmuld P1_f10,P1_f14,P1_f14
+
+ faddd C_ONE,P0_f4,P0_f6 !!(vsin)faddd P0_f6,P0_f4,P0_f6 ! faddd then spaces for processing
+
+ fsubd P2_f22,P2_f26,P2_f26
+
+ faddd C_ONE,P1_f14,P1_f16 !!(vsin)faddd P1_f10,P1_f14,P1_f16
+
+ nop !!(vsin) fors P0_f6,P0_f9,P0_f6
+ andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000
+
+ faddd P2_f26,%f40,P2_f26
+ addcc %i0,-1,%i0
+
+ nop !!(vsin) fors P1_f16,P1_f19,P1_f16
+ bg,pt %icc,.loop0
+
+! delay slot
+ nop !!(vsin) fors P2_f26,P2_f29,P2_f26
+
+ ba,pt %icc,.endloop0
+! delay slot
+ nop
+
+ .align 32
+.case7:
+ fmuld P0_f0,P0_f0,P0_f2
+ fmovd P0_f0,P0_f6 !ID for processing
+ mov %o0,%o3
+
+ fmuld P1_f10,P1_f10,P1_f12
+ mov %o1,%o4
+
+ fmuld P2_f20,P2_f20,P2_f22
+ mov %o2,%o5
+
+ fmuld P0_f2,C_q4,P0_f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fmuld P1_f12,C_q4,P1_f14
+ lda [%i1]%asi,P0_f0
+
+ fmuld P2_f22,C_q4,P2_f24
+ lda [%i1+4]%asi,P0_f1
+
+ faddd P0_f4,C_q3,P0_f4
+ add %i1,%i2,%i1 ! x += stridex
+
+ faddd P1_f14,C_q3,P1_f14
+
+ faddd P2_f24,C_q3,P2_f24
+
+ fmuld P0_f2,P0_f4,P0_f4
+
+ fmuld P1_f12,P1_f14,P1_f14
+
+ fmuld P2_f22,P2_f24,P2_f24
+
+ faddd P0_f4,C_q2,P0_f4
+
+ faddd P1_f14,C_q2,P1_f14
+
+ faddd P2_f24,C_q2,P2_f24
+
+ fmuld P0_f2,P0_f4,P0_f4
+
+ fmuld P1_f12,P1_f14,P1_f14
+
+ fmuld P2_f22,P2_f24,P2_f24
+
+ faddd P0_f4,C_q1,P0_f4
+
+ faddd P1_f14,C_q1,P1_f14
+
+ faddd P2_f24,C_q1,P2_f24
+
+ fmuld P0_f2,P0_f4,P0_f4
+
+ fmuld P1_f12,P1_f14,P1_f14
+
+ fmuld P2_f22,P2_f24,P2_f24
+
+ !!(vsin)fmuld P0_f6,P0_f4,P0_f4
+
+ !!(vsin)fmuld P1_f10,P1_f14,P1_f14
+
+ !!(vsin)fmuld P2_f20,P2_f24,P2_f24
+
+ faddd C_ONE,P0_f4,P0_f6 !!(vsin)faddd P0_f6,P0_f4,P0_f6 ! faddd then spaces for processing
+
+ faddd C_ONE,P1_f14,P1_f16 !!(vsin)faddd P1_f10,P1_f14,P1_f16
+
+ faddd C_ONE,P2_f24,P2_f26 !!(vsin)faddd P2_f20,P2_f24,P2_f26
+ andn %l0,MSK_SIGN,%l0 ! hx &= ~0x80000000
+
+ nop !!(vsin) fors P0_f6,P0_f9,P0_f6
+ addcc %i0,-1,%i0
+
+ nop !!(vsin) fors P1_f16,P1_f19,P1_f16
+ bg,pt %icc,.loop0
+
+! delay slot
+ nop !!(vsin) fors P2_f26,P2_f29,P2_f26
+
+ ba,pt %icc,.endloop0
+! delay slot
+ nop
+
+
+ .align 32
+.endloop2:
+ cmp %l1,LIM_l5
+ bl,pn %icc,1f
+! delay slot
+ fabsd P1_f10,P1_f10
+ sethi %hi(0x3fc3c000),%o7
+ fpadd32s P1_f10,MSK_BIT13,P1_f18
+ fand P1_f18,MSK_BITSHI17,P1_f12
+ sub %l1,%o7,%l1
+ add SC_HI,8,%g1;add SC_LO,8,%o7
+ fsubd P1_f10,P1_f12,P1_f10
+ srl %l1,10,%l1
+ fmuld P1_f10,P1_f10,P1_f12
+ andn %l1,0x1f,%l1
+ fmuld P1_f12,C_pp2,P2_f20
+ ldd [%g1+%l1],%f36
+ faddd P2_f20,C_pp1,P2_f20
+ fmuld P1_f12,C_qq2,P1_f14
+ ldd [SC_HI+%l1],%f38
+ fmuld P1_f12,P2_f20,P2_f20
+ faddd P1_f14,C_qq1,P1_f14
+ faddd P2_f20,C_ONE,P2_f20
+ fmuld P1_f12,P1_f14,P1_f14
+ fmuld P1_f10,P2_f20,P2_f20
+ ldd [%o7+%l1],P1_f12
+ fmuld P1_f14,%f36,P1_f14
+ fmuld P2_f20,%f38,P2_f20
+ fsubd P2_f20,P1_f14,P2_f20
+ fsubd P1_f12,P2_f20,P2_f20
+ ba,pt %icc,2f
+! delay slot
+ faddd P2_f20,%f36,P2_f20
+1:
+ fmuld P1_f10,P1_f10,P1_f12
+ fmuld P1_f12,C_q4,P1_f14
+ faddd P1_f14,C_q3,P1_f14
+ fmuld P1_f12,P1_f14,P1_f14
+ faddd P1_f14,C_q2,P1_f14
+ fmuld P1_f12,P1_f14,P1_f14
+ faddd P1_f14,C_q1,P1_f14
+ fmuld P1_f12,P1_f14,P1_f14
+ !!(vsin)fmuld P1_f10,P1_f14,P1_f14
+ faddd C_ONE,P1_f14,P2_f20 !!(vsin)faddd P1_f10,P1_f14,P2_f20
+2:
+ nop !!(vsin) fors P2_f20,P1_f19,P2_f20
+ st P2_f20,[%o1]
+ st P2_f21,[%o1+4]
+
+.endloop1:
+ cmp %l0,LIM_l5
+ bl,pn %icc,1f
+! delay slot
+ fabsd P0_f0,P0_f0
+ sethi %hi(0x3fc3c000),%o7
+ fpadd32s P0_f0,MSK_BIT13,P0_f8
+ fand P0_f8,MSK_BITSHI17,P0_f2
+ sub %l0,%o7,%l0
+ add SC_HI,8,%g1;add SC_LO,8,%o7
+ fsubd P0_f0,P0_f2,P0_f0
+ srl %l0,10,%l0
+ fmuld P0_f0,P0_f0,P0_f2
+ andn %l0,0x1f,%l0
+ fmuld P0_f2,C_pp2,P2_f20
+ ldd [%g1+%l0],%f32
+ faddd P2_f20,C_pp1,P2_f20
+ fmuld P0_f2,C_qq2,P0_f4
+ ldd [SC_HI+%l0],%f34
+ fmuld P0_f2,P2_f20,P2_f20
+ faddd P0_f4,C_qq1,P0_f4
+ faddd P2_f20,C_ONE,P2_f20
+ fmuld P0_f2,P0_f4,P0_f4
+ fmuld P0_f0,P2_f20,P2_f20
+ ldd [%o7+%l0],P0_f2
+ fmuld P0_f4,%f32,P0_f4
+ fmuld P2_f20,%f34,P2_f20
+ fsubd P2_f20,P0_f4,P2_f20
+ fsubd P0_f2,P2_f20,P2_f20
+ ba,pt %icc,2f
+! delay slot
+ faddd P2_f20,%f32,P2_f20
+1:
+ fmuld P0_f0,P0_f0,P0_f2
+ fmuld P0_f2,C_q4,P0_f4
+ faddd P0_f4,C_q3,P0_f4
+ fmuld P0_f2,P0_f4,P0_f4
+ faddd P0_f4,C_q2,P0_f4
+ fmuld P0_f2,P0_f4,P0_f4
+ faddd P0_f4,C_q1,P0_f4
+ fmuld P0_f2,P0_f4,P0_f4
+ !!(vsin)fmuld P0_f0,P0_f4,P0_f4
+ faddd C_ONE,P0_f4,P2_f20 !!(vsin)faddd P0_f0,P0_f4,P2_f20
+2:
+ nop !!(vsin) fors P2_f20,P0_f9,P2_f20
+ st P2_f20,[%o0]
+ st P2_f21,[%o0+4]
+
+.endloop0:
+ st P0_f6,[%o3]
+ st P0_f7,[%o3+4]
+ st P1_f16,[%o4]
+ st P1_f17,[%o4+4]
+ st P2_f26,[%o5]
+ st P2_f27,[%o5+4]
+
+! return. finished off with only primary range arguments
+
+ ret
+ restore
+
+
+ .align 32
+.range0:
+ cmp %l0,LIM_l6
+ bg,a,pt %icc,.MEDIUM ! branch to Medium range on big arg.
+! delay slot, annulled if branch not taken
+ mov 0x1,LIM_l6 ! set biguns flag or
+ fdtoi P0_f0,P0_f2; fmovd C_ONE,P0_f0 ; st P0_f0,[%o0] ! *y = *x with inexact if x nonzero
+ st P0_f1,[%o0+4]
+ !nop ! (vsin) fdtoi P0_f0,P0_f2
+ addcc %i0,-1,%i0
+ ble,pn %icc,.endloop0
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! y += stridey
+ andn %l1,MSK_SIGN,%l0 ! hx &= ~0x80000000
+ fmovd P1_f10,P0_f0
+ ba,pt %icc,.loop0
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 32
+.range1:
+ cmp %l1,LIM_l6
+ bg,a,pt %icc,.MEDIUM ! branch to Medium range on big arg.
+! delay slot, annulled if branch not taken
+ mov 0x2,LIM_l6 ! set biguns flag or
+ fdtoi P1_f10,P1_f12; fmovd C_ONE,P1_f10 ; st P1_f10,[%o1] ! *y = *x with inexact if x nonzero
+ st P1_f11,[%o1+4]
+ !nop ! (vsin) fdtoi P1_f10,P1_f12
+ addcc %i0,-1,%i0
+ ble,pn %icc,.endloop1
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! y += stridey
+ andn %l2,MSK_SIGN,%l1 ! hx &= ~0x80000000
+ fmovd P2_f20,P1_f10
+ ba,pt %icc,.loop1
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 32
+.range2:
+ cmp %l2,LIM_l6
+ bg,a,pt %icc,.MEDIUM ! brance to Medium range on big arg.
+! delay slot, annulled if branch not taken
+ mov 0x3,LIM_l6 ! set biguns flag or
+ fdtoi P2_f20,P2_f22; fmovd C_ONE,P2_f20 ; st P2_f20,[%o2] ! *y = *x with inexact if x nonzero
+ st P2_f21,[%o2+4]
+ nop ! (vsin) fdtoi P2_f20,P2_f22
+1:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.endloop2
+! delay slot
+ nop
+ ld [%i1],%l2
+ ld [%i1],P2_f20
+ ld [%i1+4],P2_f21
+ andn %l2,MSK_SIGN,%l2 ! hx &= ~0x80000000
+ ba,pt %icc,.loop2
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 32
+.MEDIUM:
+
+! ========== medium range ==========
+
+! register use
+
+! i0 n
+! i1 x
+! i2 stridex
+! i3 y
+! i4 stridey
+! i5 0x80000000
+
+! l0 hx0
+! l1 hx1
+! l2 hx2
+! l3 __vlibm_TBL_sincos_hi
+! l4 __vlibm_TBL_sincos_lo
+! l5 constants
+! l6 biguns stored here : still called LIM_l6
+! l7 0x413921fb
+
+! the following are 64-bit registers in both V8+ and V9
+
+! g1 scratch
+! g5
+
+! o0 py0
+! o1 py1
+! o2 py2
+! o3 n0
+! o4 n1
+! o5 n2
+! o7 scratch
+
+! f0 x0
+! f2 n0,y0
+! f4
+! f6
+! f8 scratch for table base
+! f9 signbit0
+! f10 x1
+! f12 n1,y1
+! f14
+! f16
+! f18 scratch for table base
+! f19 signbit1
+! f20 x2
+! f22 n2,y2
+! f24
+! f26
+! f28 scratch for table base
+! f29 signbit2
+! f30 0x80000000
+! f31 0x4000
+! f32
+! f34
+! f36
+! f38
+! f40 invpio2
+! f42 round
+! f44 0xffff800000000000
+! f46 pio2_1
+! f48 pio2_2
+! f50 pio2_3
+! f52 pio2_3t
+! f54 one
+! f56 pp1
+! f58 pp2
+! f60 qq1
+! f62 qq2
+
+
+ PIC_SET(g5,constants,l5)
+
+ ! %o3,%o4,%o5 need to be stored
+ st P0_f6,[%o3]
+ sethi %hi(0x413921fb),%l7
+ st P0_f7,[%o3+4]
+ or %l7,%lo(0x413921fb),%l7
+ st P1_f16,[%o4]
+ st P1_f17,[%o4+4]
+ st P2_f26,[%o5]
+ st P2_f27,[%o5+4]
+ ldd [%l5+invpio2],%f40
+ ldd [%l5+round],%f42
+ ldd [%l5+pio2_1],%f46
+ ldd [%l5+pio2_2],%f48
+ ldd [%l5+pio2_3],%f50
+ ldd [%l5+pio2_3t],%f52
+ std %f54,[%fp+x0_1+8] ! set up stack data
+ std %f54,[%fp+x1_1+8]
+ std %f54,[%fp+x2_1+8]
+ stx %g0,[%fp+y0_0+8]
+ stx %g0,[%fp+y1_0+8]
+ stx %g0,[%fp+y2_0+8]
+
+! branched here in the middle of the array. Need to adjust
+! for the members of the triple that were selected in the primary
+! loop.
+
+! no adjustment since all three selected here
+ subcc LIM_l6,0x1,%g0 ! continue in LOOP0?
+ bz,a %icc,.LOOP0
+ mov 0x0,LIM_l6 ! delay slot set biguns=0
+
+! ajust 1st triple since 2d and 3d done here
+ subcc LIM_l6,0x2,%g0 ! continue in LOOP1?
+ fmuld %f0,%f40,%f2 ! adj LOOP0
+ bz,a %icc,.LOOP1
+ mov 0x0,LIM_l6 ! delay slot set biguns=0
+
+! ajust 1st and 2d triple since 3d done here
+ subcc LIM_l6,0x3,%g0 ! continue in LOOP2?
+ !done fmuld %f0,%f40,%f2 ! adj LOOP0
+ sub %i3,%i4,%i3 ! adjust to not double increment
+ fmuld %f10,%f40,%f12 ! adj LOOP1
+ faddd %f2,%f42,%f2 ! adj LOOP1
+ bz,a %icc,.LOOP2
+ mov 0x0,LIM_l6 ! delay slot set biguns=0
+
+ ba .LOOP0
+ nop
+
+! -- 16 byte aligned
+
+ .align 32
+.LOOP0:
+ lda [%i1]%asi,%l1 ! preload next argument
+ mov %i3,%o0 ! py0 = y
+
+ lda [%i1]%asi,%f10
+ cmp %l0,%l7
+ add %i3,%i4,%i3 ! y += stridey
+ bg,pn %icc,.BIG0 ! if hx > 0x413921fb
+
+! delay slot
+ lda [%i1+4]%asi,%f11
+ addcc %i0,-1,%i0
+ add %i1,%i2,%i1 ! x += stridex
+ ble,pn %icc,.ENDLOOP1
+
+! delay slot
+ andn %l1,%i5,%l1
+ nop
+ fmuld %f0,%f40,%f2
+ fabsd %f54,%f54 ! a nop for alignment only
+
+.LOOP1:
+ lda [%i1]%asi,%l2 ! preload next argument
+ mov %i3,%o1 ! py1 = y
+
+ lda [%i1]%asi,%f20
+ cmp %l1,%l7
+ add %i3,%i4,%i3 ! y += stridey
+ bg,pn %icc,.BIG1 ! if hx > 0x413921fb
+
+! delay slot
+ lda [%i1+4]%asi,%f21
+ addcc %i0,-1,%i0
+ add %i1,%i2,%i1 ! x += stridex
+ ble,pn %icc,.ENDLOOP2
+
+! delay slot
+ andn %l2,%i5,%l2
+ nop
+ fmuld %f10,%f40,%f12
+ faddd %f2,%f42,%f2
+
+.LOOP2:
+ st %f3,[%fp+n0]
+ mov %i3,%o2 ! py2 = y
+
+ cmp %l2,%l7
+ add %i3,%i4,%i3 ! y += stridey
+ fmuld %f20,%f40,%f22
+ bg,pn %icc,.BIG2 ! if hx > 0x413921fb
+
+! delay slot
+ add %l5,thresh+4,%o7
+ faddd %f12,%f42,%f12
+ st %f13,[%fp+n1]
+
+! -
+
+ add %l5,thresh,%g1
+ faddd %f22,%f42,%f22
+ st %f23,[%fp+n2]
+
+ fsubd %f2,%f42,%f2 ! n
+
+ fsubd %f12,%f42,%f12 ! n
+
+ fsubd %f22,%f42,%f22 ! n
+
+ fmuld %f2,%f46,%f4
+
+ fmuld %f12,%f46,%f14
+
+ fmuld %f22,%f46,%f24
+
+ fsubd %f0,%f4,%f4
+ fmuld %f2,%f48,%f6
+
+ fsubd %f10,%f14,%f14
+ fmuld %f12,%f48,%f16
+
+ fsubd %f20,%f24,%f24
+ fmuld %f22,%f48,%f26
+
+ fsubd %f4,%f6,%f0
+ ld [%fp+n0],%o3 ; add %o3,1,%o3
+
+ fsubd %f14,%f16,%f10
+ ld [%fp+n1],%o4 ; add %o4,1,%o4
+
+ fsubd %f24,%f26,%f20
+ ld [%fp+n2],%o5 ; add %o5,1,%o5
+
+ fsubd %f4,%f0,%f32
+ and %o3,1,%o3
+
+ fsubd %f14,%f10,%f34
+ and %o4,1,%o4
+
+ fsubd %f24,%f20,%f36
+ and %o5,1,%o5
+
+ fsubd %f32,%f6,%f32
+ fmuld %f2,%f50,%f8
+ sll %o3,3,%o3
+
+ fsubd %f34,%f16,%f34
+ fmuld %f12,%f50,%f18
+ sll %o4,3,%o4
+
+ fsubd %f36,%f26,%f36
+ fmuld %f22,%f50,%f28
+ sll %o5,3,%o5
+
+ fsubd %f8,%f32,%f8
+ ld [%g1+%o3],%f6
+
+ fsubd %f18,%f34,%f18
+ ld [%g1+%o4],%f16
+
+ fsubd %f28,%f36,%f28
+ ld [%g1+%o5],%f26
+
+ fsubd %f0,%f8,%f4
+
+ fsubd %f10,%f18,%f14
+
+ fsubd %f20,%f28,%f24
+
+ fsubd %f0,%f4,%f32
+
+ fsubd %f10,%f14,%f34
+
+ fsubd %f20,%f24,%f36
+
+ fsubd %f32,%f8,%f32
+ fmuld %f2,%f52,%f2
+
+ fsubd %f34,%f18,%f34
+ fmuld %f12,%f52,%f12
+
+ fsubd %f36,%f28,%f36
+ fmuld %f22,%f52,%f22
+
+ fsubd %f2,%f32,%f2
+ ld [%o7+%o3],%f8
+
+ fsubd %f12,%f34,%f12
+ ld [%o7+%o4],%f18
+
+ fsubd %f22,%f36,%f22
+ ld [%o7+%o5],%f28
+
+ fsubd %f4,%f2,%f0 ! x
+
+ fsubd %f14,%f12,%f10 ! x
+
+ fsubd %f24,%f22,%f20 ! x
+
+ fsubd %f4,%f0,%f4
+
+ fsubd %f14,%f10,%f14
+
+ fsubd %f24,%f20,%f24
+
+ fands %f0,%f30,%f9 ! save signbit
+
+ fands %f10,%f30,%f19 ! save signbit
+
+ fands %f20,%f30,%f29 ! save signbit
+
+ fabsd %f0,%f0
+ std %f0,[%fp+x0_1]
+
+ fabsd %f10,%f10
+ std %f10,[%fp+x1_1]
+
+ fabsd %f20,%f20
+ std %f20,[%fp+x2_1]
+
+ fsubd %f4,%f2,%f2 ! y
+
+ fsubd %f14,%f12,%f12 ! y
+
+ fsubd %f24,%f22,%f22 ! y
+
+ fcmpgt32 %f6,%f0,%l0
+
+ fcmpgt32 %f16,%f10,%l1
+
+ fcmpgt32 %f26,%f20,%l2
+
+! -- 16 byte aligned
+ fxors %f2,%f9,%f2
+
+ fxors %f12,%f19,%f12
+
+ fxors %f22,%f29,%f22
+
+ fands %f9,%f8,%f9 ! if (n & 1) clear sign bit
+ andcc %l0,2,%g0
+ bne,pn %icc,.CASE4
+
+! delay slot
+ fands %f19,%f18,%f19 ! if (n & 1) clear sign bit
+ andcc %l1,2,%g0
+ bne,pn %icc,.CASE2
+
+! delay slot
+ fands %f29,%f28,%f29 ! if (n & 1) clear sign bit
+ andcc %l2,2,%g0
+ bne,pn %icc,.CASE1
+
+! delay slot
+ fpadd32s %f0,%f31,%f8
+ sethi %hi(0x3fc3c000),%o7
+ ld [%fp+x0_1],%l0
+
+ fpadd32s %f10,%f31,%f18
+ add %l3,8,%g1
+ ld [%fp+x1_1],%l1
+
+ fpadd32s %f20,%f31,%f28
+ ld [%fp+x2_1],%l2
+
+ fand %f8,%f44,%f4
+ sub %l0,%o7,%l0
+
+ fand %f18,%f44,%f14
+ sub %l1,%o7,%l1
+
+ fand %f28,%f44,%f24
+ sub %l2,%o7,%l2
+
+ fsubd %f0,%f4,%f0
+ srl %l0,10,%l0
+
+ fsubd %f10,%f14,%f10
+ srl %l1,10,%l1
+
+ fsubd %f20,%f24,%f20
+ srl %l2,10,%l2
+
+ faddd %f0,%f2,%f0
+ andn %l0,0x1f,%l0
+
+ faddd %f10,%f12,%f10
+ andn %l1,0x1f,%l1
+
+ faddd %f20,%f22,%f20
+ andn %l2,0x1f,%l2
+
+ fmuld %f0,%f0,%f2
+ add %l0,%o3,%l0
+
+ fmuld %f10,%f10,%f12
+ add %l1,%o4,%l1
+
+ fmuld %f20,%f20,%f22
+ add %l2,%o5,%l2
+
+ fmuld %f2,%f58,%f6
+ ldd [%l3+%l0],%f32
+
+ fmuld %f12,%f58,%f16
+ ldd [%l3+%l1],%f34
+
+ fmuld %f22,%f58,%f26
+ ldd [%l3+%l2],%f36
+
+ faddd %f6,%f56,%f6
+ fmuld %f2,%f62,%f4
+
+ faddd %f16,%f56,%f16
+ fmuld %f12,%f62,%f14
+
+ faddd %f26,%f56,%f26
+ fmuld %f22,%f62,%f24
+
+ fmuld %f2,%f6,%f6
+ faddd %f4,%f60,%f4
+
+ fmuld %f12,%f16,%f16
+ faddd %f14,%f60,%f14
+
+ fmuld %f22,%f26,%f26
+ faddd %f24,%f60,%f24
+
+ faddd %f6,%f54,%f6
+ fmuld %f2,%f4,%f4
+
+ faddd %f16,%f54,%f16
+ fmuld %f12,%f14,%f14
+
+ faddd %f26,%f54,%f26
+ fmuld %f22,%f24,%f24
+
+ fmuld %f0,%f6,%f6
+ ldd [%g1+%l0],%f2
+
+ fmuld %f10,%f16,%f16
+ ldd [%g1+%l1],%f12
+
+ fmuld %f20,%f26,%f26
+ ldd [%g1+%l2],%f22
+
+ fmuld %f4,%f32,%f4
+ ldd [%l4+%l0],%f0
+
+ fmuld %f14,%f34,%f14
+ ldd [%l4+%l1],%f10
+
+ fmuld %f24,%f36,%f24
+ ldd [%l4+%l2],%f20
+
+ fmuld %f6,%f2,%f6
+
+ fmuld %f16,%f12,%f16
+
+ fmuld %f26,%f22,%f26
+
+ faddd %f6,%f4,%f6
+
+ faddd %f16,%f14,%f16
+
+ faddd %f26,%f24,%f26
+
+ faddd %f6,%f0,%f6
+
+ faddd %f16,%f10,%f16
+
+ faddd %f26,%f20,%f26
+
+ faddd %f6,%f32,%f6
+
+ faddd %f16,%f34,%f16
+
+ faddd %f26,%f36,%f26
+
+.FIXSIGN:
+ ld [%fp+n0],%o3 ; add %o3,1,%o3
+ add %l5,thresh-4,%g1
+
+ ld [%fp+n1],%o4 ; add %o4,1,%o4
+
+ ld [%fp+n2],%o5 ; add %o5,1,%o5
+ and %o3,2,%o3
+
+ sll %o3,2,%o3
+ and %o4,2,%o4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ sll %o4,2,%o4
+ and %o5,2,%o5
+ ld [%g1+%o3],%f8
+
+ sll %o5,2,%o5
+ ld [%g1+%o4],%f18
+
+ ld [%g1+%o5],%f28
+ fxors %f9,%f8,%f9
+
+ lda [%i1]%asi,%f0
+ fxors %f29,%f28,%f29
+
+ lda [%i1+4]%asi,%f1
+ fxors %f19,%f18,%f19
+
+ fors %f6,%f9,%f6 ! tack on sign
+ add %i1,%i2,%i1 ! x += stridex
+ st %f6,[%o0]
+
+ fors %f26,%f29,%f26 ! tack on sign
+ st %f7,[%o0+4]
+
+ fors %f16,%f19,%f16 ! tack on sign
+ st %f26,[%o2]
+
+ st %f27,[%o2+4]
+ addcc %i0,-1,%i0
+
+ st %f16,[%o1]
+ andn %l0,%i5,%l0 ! hx &= ~0x80000000
+ bg,pt %icc,.LOOP0
+
+! delay slot
+ st %f17,[%o1+4]
+
+ ba,pt %icc,.ENDLOOP0
+! delay slot
+ nop
+
+ .align 32
+.CASE1:
+ fpadd32s %f10,%f31,%f18
+ sethi %hi(0x3fc3c000),%o7
+ ld [%fp+x0_1],%l0
+
+ fand %f8,%f44,%f4
+ add %l3,8,%g1
+ ld [%fp+x1_1],%l1
+
+ fand %f18,%f44,%f14
+ sub %l0,%o7,%l0
+
+ fsubd %f0,%f4,%f0
+ srl %l0,10,%l0
+ sub %l1,%o7,%l1
+
+ fsubd %f10,%f14,%f10
+ srl %l1,10,%l1
+
+ fmuld %f20,%f20,%f20
+ ldd [%l5+%o5],%f36
+ add %l5,%o5,%l2
+
+ faddd %f0,%f2,%f0
+ andn %l0,0x1f,%l0
+
+ faddd %f10,%f12,%f10
+ andn %l1,0x1f,%l1
+
+ fmuld %f20,%f36,%f24
+ ldd [%l2+0x10],%f26
+ add %fp,%o5,%o5
+
+ fmuld %f0,%f0,%f2
+ add %l0,%o3,%l0
+
+ fmuld %f10,%f10,%f12
+ add %l1,%o4,%l1
+
+ faddd %f24,%f26,%f24
+ ldd [%l2+0x20],%f36
+
+ fmuld %f2,%f58,%f6
+ ldd [%l3+%l0],%f32
+
+ fmuld %f12,%f58,%f16
+ ldd [%l3+%l1],%f34
+
+ fmuld %f20,%f24,%f24
+ ldd [%l2+0x30],%f26
+
+ faddd %f6,%f56,%f6
+ fmuld %f2,%f62,%f4
+
+ faddd %f16,%f56,%f16
+ fmuld %f12,%f62,%f14
+
+ faddd %f24,%f36,%f24
+ ldd [%o5+x2_1],%f36
+
+ fmuld %f2,%f6,%f6
+ faddd %f4,%f60,%f4
+
+ fmuld %f12,%f16,%f16
+ faddd %f14,%f60,%f14
+
+ fmuld %f20,%f24,%f24
+
+ faddd %f6,%f54,%f6
+ fmuld %f2,%f4,%f4
+ ldd [%g1+%l0],%f2
+
+ faddd %f16,%f54,%f16
+ fmuld %f12,%f14,%f14
+ ldd [%g1+%l1],%f12
+
+ faddd %f24,%f26,%f24
+
+ fmuld %f0,%f6,%f6
+ ldd [%l4+%l0],%f0
+
+ fmuld %f10,%f16,%f16
+ ldd [%l4+%l1],%f10
+
+ fmuld %f4,%f32,%f4
+ std %f22,[%fp+y2_0]
+
+ fmuld %f14,%f34,%f14
+
+ fmuld %f6,%f2,%f6
+
+ fmuld %f16,%f12,%f16
+
+ fmuld %f20,%f24,%f24
+
+ faddd %f6,%f4,%f6
+
+ faddd %f16,%f14,%f16
+
+ fmuld %f36,%f24,%f24
+ ldd [%o5+y2_0],%f22
+
+ faddd %f6,%f0,%f6
+
+ faddd %f16,%f10,%f16
+
+ faddd %f24,%f22,%f24
+
+ faddd %f6,%f32,%f6
+
+ faddd %f16,%f34,%f16
+ ba,pt %icc,.FIXSIGN
+
+! delay slot
+ faddd %f36,%f24,%f26
+
+ .align 32
+.CASE2:
+ fpadd32s %f0,%f31,%f8
+ ld [%fp+x0_1],%l0
+ andcc %l2,2,%g0
+ bne,pn %icc,.CASE3
+
+! delay slot
+ sethi %hi(0x3fc3c000),%o7
+ fpadd32s %f20,%f31,%f28
+ ld [%fp+x2_1],%l2
+
+ fand %f8,%f44,%f4
+ sub %l0,%o7,%l0
+ add %l3,8,%g1
+
+ fand %f28,%f44,%f24
+ sub %l2,%o7,%l2
+
+ fsubd %f0,%f4,%f0
+ srl %l0,10,%l0
+
+ fsubd %f20,%f24,%f20
+ srl %l2,10,%l2
+
+ fmuld %f10,%f10,%f10
+ ldd [%l5+%o4],%f34
+ add %l5,%o4,%l1
+
+ faddd %f0,%f2,%f0
+ andn %l0,0x1f,%l0
+
+ faddd %f20,%f22,%f20
+ andn %l2,0x1f,%l2
+
+ fmuld %f10,%f34,%f14
+ ldd [%l1+0x10],%f16
+ add %fp,%o4,%o4
+
+ fmuld %f0,%f0,%f2
+ add %l0,%o3,%l0
+
+ fmuld %f20,%f20,%f22
+ add %l2,%o5,%l2
+
+ faddd %f14,%f16,%f14
+ ldd [%l1+0x20],%f34
+
+ fmuld %f2,%f58,%f6
+ ldd [%l3+%l0],%f32
+
+ fmuld %f22,%f58,%f26
+ ldd [%l3+%l2],%f36
+
+ fmuld %f10,%f14,%f14
+ ldd [%l1+0x30],%f16
+
+ faddd %f6,%f56,%f6
+ fmuld %f2,%f62,%f4
+
+ faddd %f26,%f56,%f26
+ fmuld %f22,%f62,%f24
+
+ faddd %f14,%f34,%f14
+ ldd [%o4+x1_1],%f34
+
+ fmuld %f2,%f6,%f6
+ faddd %f4,%f60,%f4
+
+ fmuld %f22,%f26,%f26
+ faddd %f24,%f60,%f24
+
+ fmuld %f10,%f14,%f14
+
+ faddd %f6,%f54,%f6
+ fmuld %f2,%f4,%f4
+ ldd [%g1+%l0],%f2
+
+ faddd %f26,%f54,%f26
+ fmuld %f22,%f24,%f24
+ ldd [%g1+%l2],%f22
+
+ faddd %f14,%f16,%f14
+
+ fmuld %f0,%f6,%f6
+ ldd [%l4+%l0],%f0
+
+ fmuld %f20,%f26,%f26
+ ldd [%l4+%l2],%f20
+
+ fmuld %f4,%f32,%f4
+ std %f12,[%fp+y1_0]
+
+ fmuld %f24,%f36,%f24
+
+ fmuld %f6,%f2,%f6
+
+ fmuld %f26,%f22,%f26
+
+ fmuld %f10,%f14,%f14
+
+ faddd %f6,%f4,%f6
+
+ faddd %f26,%f24,%f26
+
+ fmuld %f34,%f14,%f14
+ ldd [%o4+y1_0],%f12
+
+ faddd %f6,%f0,%f6
+
+ faddd %f26,%f20,%f26
+
+ faddd %f14,%f12,%f14
+
+ faddd %f6,%f32,%f6
+
+ faddd %f26,%f36,%f26
+ ba,pt %icc,.FIXSIGN
+
+! delay slot
+ faddd %f34,%f14,%f16
+
+ .align 32
+.CASE3:
+ fand %f8,%f44,%f4
+ add %l3,8,%g1
+ sub %l0,%o7,%l0
+
+ fmuld %f10,%f10,%f10
+ ldd [%l5+%o4],%f34
+ add %l5,%o4,%l1
+
+ fsubd %f0,%f4,%f0
+ srl %l0,10,%l0
+
+ fmuld %f20,%f20,%f20
+ ldd [%l5+%o5],%f36
+ add %l5,%o5,%l2
+
+ fmuld %f10,%f34,%f14
+ ldd [%l1+0x10],%f16
+ add %fp,%o4,%o4
+
+ faddd %f0,%f2,%f0
+ andn %l0,0x1f,%l0
+
+ fmuld %f20,%f36,%f24
+ ldd [%l2+0x10],%f26
+ add %fp,%o5,%o5
+
+ faddd %f14,%f16,%f14
+ ldd [%l1+0x20],%f34
+
+ fmuld %f0,%f0,%f2
+ add %l0,%o3,%l0
+
+ faddd %f24,%f26,%f24
+ ldd [%l2+0x20],%f36
+
+ fmuld %f10,%f14,%f14
+ ldd [%l1+0x30],%f16
+
+ fmuld %f2,%f58,%f6
+ ldd [%l3+%l0],%f32
+
+ fmuld %f20,%f24,%f24
+ ldd [%l2+0x30],%f26
+
+ faddd %f14,%f34,%f14
+ ldd [%o4+x1_1],%f34
+
+ faddd %f6,%f56,%f6
+ fmuld %f2,%f62,%f4
+
+ faddd %f24,%f36,%f24
+ ldd [%o5+x2_1],%f36
+
+ fmuld %f10,%f14,%f14
+ std %f12,[%fp+y1_0]
+
+ fmuld %f2,%f6,%f6
+ faddd %f4,%f60,%f4
+
+ fmuld %f20,%f24,%f24
+ std %f22,[%fp+y2_0]
+
+ faddd %f14,%f16,%f14
+
+ faddd %f6,%f54,%f6
+ fmuld %f2,%f4,%f4
+ ldd [%g1+%l0],%f2
+
+ faddd %f24,%f26,%f24
+
+ fmuld %f10,%f14,%f14
+
+ fmuld %f0,%f6,%f6
+ ldd [%l4+%l0],%f0
+
+ fmuld %f4,%f32,%f4
+
+ fmuld %f20,%f24,%f24
+
+ fmuld %f6,%f2,%f6
+
+ fmuld %f34,%f14,%f14
+ ldd [%o4+y1_0],%f12
+
+ fmuld %f36,%f24,%f24
+ ldd [%o5+y2_0],%f22
+
+ faddd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ faddd %f24,%f22,%f24
+
+ faddd %f6,%f0,%f6
+
+ faddd %f34,%f14,%f16
+
+ faddd %f36,%f24,%f26
+ ba,pt %icc,.FIXSIGN
+
+! delay slot
+ faddd %f6,%f32,%f6
+
+ .align 32
+.CASE4:
+ fands %f29,%f28,%f29 ! if (n & 1) clear sign bit
+ sethi %hi(0x3fc3c000),%o7
+ andcc %l1,2,%g0
+ bne,pn %icc,.CASE6
+
+! delay slot
+ andcc %l2,2,%g0
+ fpadd32s %f10,%f31,%f18
+ ld [%fp+x1_1],%l1
+ bne,pn %icc,.CASE5
+
+! delay slot
+ add %l3,8,%g1
+ ld [%fp+x2_1],%l2
+ fpadd32s %f20,%f31,%f28
+
+ fand %f18,%f44,%f14
+ sub %l1,%o7,%l1
+
+ fand %f28,%f44,%f24
+ sub %l2,%o7,%l2
+
+ fsubd %f10,%f14,%f10
+ srl %l1,10,%l1
+
+ fsubd %f20,%f24,%f20
+ srl %l2,10,%l2
+
+ fmuld %f0,%f0,%f0
+ ldd [%l5+%o3],%f32
+ add %l5,%o3,%l0
+
+ faddd %f10,%f12,%f10
+ andn %l1,0x1f,%l1
+
+ faddd %f20,%f22,%f20
+ andn %l2,0x1f,%l2
+
+ fmuld %f0,%f32,%f4
+ ldd [%l0+0x10],%f6
+ add %fp,%o3,%o3
+
+ fmuld %f10,%f10,%f12
+ add %l1,%o4,%l1
+
+ fmuld %f20,%f20,%f22
+ add %l2,%o5,%l2
+
+ faddd %f4,%f6,%f4
+ ldd [%l0+0x20],%f32
+
+ fmuld %f12,%f58,%f16
+ ldd [%l3+%l1],%f34
+
+ fmuld %f22,%f58,%f26
+ ldd [%l3+%l2],%f36
+
+ fmuld %f0,%f4,%f4
+ ldd [%l0+0x30],%f6
+
+ faddd %f16,%f56,%f16
+ fmuld %f12,%f62,%f14
+
+ faddd %f26,%f56,%f26
+ fmuld %f22,%f62,%f24
+
+ faddd %f4,%f32,%f4
+ ldd [%o3+x0_1],%f32
+
+ fmuld %f12,%f16,%f16
+ faddd %f14,%f60,%f14
+
+ fmuld %f22,%f26,%f26
+ faddd %f24,%f60,%f24
+
+ fmuld %f0,%f4,%f4
+
+ faddd %f16,%f54,%f16
+ fmuld %f12,%f14,%f14
+ ldd [%g1+%l1],%f12
+
+ faddd %f26,%f54,%f26
+ fmuld %f22,%f24,%f24
+ ldd [%g1+%l2],%f22
+
+ faddd %f4,%f6,%f4
+
+ fmuld %f10,%f16,%f16
+ ldd [%l4+%l1],%f10
+
+ fmuld %f20,%f26,%f26
+ ldd [%l4+%l2],%f20
+
+ fmuld %f14,%f34,%f14
+ std %f2,[%fp+y0_0]
+
+ fmuld %f24,%f36,%f24
+
+ fmuld %f0,%f4,%f4
+
+ fmuld %f16,%f12,%f16
+
+ fmuld %f26,%f22,%f26
+
+ fmuld %f32,%f4,%f4
+ ldd [%o3+y0_0],%f2
+
+ faddd %f16,%f14,%f16
+
+ faddd %f26,%f24,%f26
+
+ faddd %f4,%f2,%f4
+
+ faddd %f16,%f10,%f16
+
+ faddd %f26,%f20,%f26
+
+ faddd %f32,%f4,%f6
+
+ faddd %f16,%f34,%f16
+ ba,pt %icc,.FIXSIGN
+
+! delay slot
+ faddd %f26,%f36,%f26
+
+ .align 32
+.CASE5:
+ fand %f18,%f44,%f14
+ sub %l1,%o7,%l1
+
+ fmuld %f0,%f0,%f0
+ ldd [%l5+%o3],%f32
+ add %l5,%o3,%l0
+
+ fsubd %f10,%f14,%f10
+ srl %l1,10,%l1
+
+ fmuld %f20,%f20,%f20
+ ldd [%l5+%o5],%f36
+ add %l5,%o5,%l2
+
+ fmuld %f0,%f32,%f4
+ ldd [%l0+0x10],%f6
+ add %fp,%o3,%o3
+
+ faddd %f10,%f12,%f10
+ andn %l1,0x1f,%l1
+
+ fmuld %f20,%f36,%f24
+ ldd [%l2+0x10],%f26
+ add %fp,%o5,%o5
+
+ faddd %f4,%f6,%f4
+ ldd [%l0+0x20],%f32
+
+ fmuld %f10,%f10,%f12
+ add %l1,%o4,%l1
+
+ faddd %f24,%f26,%f24
+ ldd [%l2+0x20],%f36
+
+ fmuld %f0,%f4,%f4
+ ldd [%l0+0x30],%f6
+
+ fmuld %f12,%f58,%f16
+ ldd [%l3+%l1],%f34
+
+ fmuld %f20,%f24,%f24
+ ldd [%l2+0x30],%f26
+
+ faddd %f4,%f32,%f4
+ ldd [%o3+x0_1],%f32
+
+ faddd %f16,%f56,%f16
+ fmuld %f12,%f62,%f14
+
+ faddd %f24,%f36,%f24
+ ldd [%o5+x2_1],%f36
+
+ fmuld %f0,%f4,%f4
+ std %f2,[%fp+y0_0]
+
+ fmuld %f12,%f16,%f16
+ faddd %f14,%f60,%f14
+
+ fmuld %f20,%f24,%f24
+ std %f22,[%fp+y2_0]
+
+ faddd %f4,%f6,%f4
+
+ faddd %f16,%f54,%f16
+ fmuld %f12,%f14,%f14
+ ldd [%g1+%l1],%f12
+
+ faddd %f24,%f26,%f24
+
+ fmuld %f0,%f4,%f4
+
+ fmuld %f10,%f16,%f16
+ ldd [%l4+%l1],%f10
+
+ fmuld %f14,%f34,%f14
+
+ fmuld %f20,%f24,%f24
+
+ fmuld %f16,%f12,%f16
+
+ fmuld %f32,%f4,%f4
+ ldd [%o3+y0_0],%f2
+
+ fmuld %f36,%f24,%f24
+ ldd [%o5+y2_0],%f22
+
+ faddd %f16,%f14,%f16
+
+ faddd %f4,%f2,%f4
+
+ faddd %f24,%f22,%f24
+
+ faddd %f16,%f10,%f16
+
+ faddd %f32,%f4,%f6
+
+ faddd %f36,%f24,%f26
+ ba,pt %icc,.FIXSIGN
+
+! delay slot
+ faddd %f16,%f34,%f16
+
+ .align 32
+.CASE6:
+ ld [%fp+x2_1],%l2
+ add %l3,8,%g1
+ bne,pn %icc,.CASE7
+! delay slot
+ fpadd32s %f20,%f31,%f28
+
+ fand %f28,%f44,%f24
+ ldd [%l5+%o3],%f32
+ add %l5,%o3,%l0
+
+ fmuld %f0,%f0,%f0
+ sub %l2,%o7,%l2
+
+ fsubd %f20,%f24,%f20
+ srl %l2,10,%l2
+
+ fmuld %f10,%f10,%f10
+ ldd [%l5+%o4],%f34
+ add %l5,%o4,%l1
+
+ fmuld %f0,%f32,%f4
+ ldd [%l0+0x10],%f6
+ add %fp,%o3,%o3
+
+ faddd %f20,%f22,%f20
+ andn %l2,0x1f,%l2
+
+ fmuld %f10,%f34,%f14
+ ldd [%l1+0x10],%f16
+ add %fp,%o4,%o4
+
+ faddd %f4,%f6,%f4
+ ldd [%l0+0x20],%f32
+
+ fmuld %f20,%f20,%f22
+ add %l2,%o5,%l2
+
+ faddd %f14,%f16,%f14
+ ldd [%l1+0x20],%f34
+
+ fmuld %f0,%f4,%f4
+ ldd [%l0+0x30],%f6
+
+ fmuld %f22,%f58,%f26
+ ldd [%l3+%l2],%f36
+
+ fmuld %f10,%f14,%f14
+ ldd [%l1+0x30],%f16
+
+ faddd %f4,%f32,%f4
+ ldd [%o3+x0_1],%f32
+
+ faddd %f26,%f56,%f26
+ fmuld %f22,%f62,%f24
+
+ faddd %f14,%f34,%f14
+ ldd [%o4+x1_1],%f34
+
+ fmuld %f0,%f4,%f4
+ std %f2,[%fp+y0_0]
+
+ fmuld %f22,%f26,%f26
+ faddd %f24,%f60,%f24
+
+ fmuld %f10,%f14,%f14
+ std %f12,[%fp+y1_0]
+
+ faddd %f4,%f6,%f4
+
+ faddd %f26,%f54,%f26
+ fmuld %f22,%f24,%f24
+ ldd [%g1+%l2],%f22
+
+ faddd %f14,%f16,%f14
+
+ fmuld %f0,%f4,%f4
+
+ fmuld %f20,%f26,%f26
+ ldd [%l4+%l2],%f20
+
+ fmuld %f24,%f36,%f24
+
+ fmuld %f10,%f14,%f14
+
+ fmuld %f26,%f22,%f26
+
+ fmuld %f32,%f4,%f4
+ ldd [%o3+y0_0],%f2
+
+ fmuld %f34,%f14,%f14
+ ldd [%o4+y1_0],%f12
+
+ faddd %f26,%f24,%f26
+
+ faddd %f4,%f2,%f4
+
+ faddd %f14,%f12,%f14
+
+ faddd %f26,%f20,%f26
+
+ faddd %f32,%f4,%f6
+
+ faddd %f34,%f14,%f16
+ ba,pt %icc,.FIXSIGN
+
+! delay slot
+ faddd %f26,%f36,%f26
+
+ .align 32
+.CASE7:
+ fmuld %f0,%f0,%f0
+ ldd [%l5+%o3],%f32
+ add %l5,%o3,%l0
+
+ fmuld %f10,%f10,%f10
+ ldd [%l5+%o4],%f34
+ add %l5,%o4,%l1
+
+ fmuld %f20,%f20,%f20
+ ldd [%l5+%o5],%f36
+ add %l5,%o5,%l2
+
+ fmuld %f0,%f32,%f4
+ ldd [%l0+0x10],%f6
+ add %fp,%o3,%o3
+
+ fmuld %f10,%f34,%f14
+ ldd [%l1+0x10],%f16
+ add %fp,%o4,%o4
+
+ fmuld %f20,%f36,%f24
+ ldd [%l2+0x10],%f26
+ add %fp,%o5,%o5
+
+ faddd %f4,%f6,%f4
+ ldd [%l0+0x20],%f32
+
+ faddd %f14,%f16,%f14
+ ldd [%l1+0x20],%f34
+
+ faddd %f24,%f26,%f24
+ ldd [%l2+0x20],%f36
+
+ fmuld %f0,%f4,%f4
+ ldd [%l0+0x30],%f6
+
+ fmuld %f10,%f14,%f14
+ ldd [%l1+0x30],%f16
+
+ fmuld %f20,%f24,%f24
+ ldd [%l2+0x30],%f26
+
+ faddd %f4,%f32,%f4
+ ldd [%o3+x0_1],%f32
+
+ faddd %f14,%f34,%f14
+ ldd [%o4+x1_1],%f34
+
+ faddd %f24,%f36,%f24
+ ldd [%o5+x2_1],%f36
+
+ fmuld %f0,%f4,%f4
+ std %f2,[%fp+y0_0]
+
+ fmuld %f10,%f14,%f14
+ std %f12,[%fp+y1_0]
+
+ fmuld %f20,%f24,%f24
+ std %f22,[%fp+y2_0]
+
+ faddd %f4,%f6,%f4
+
+ faddd %f14,%f16,%f14
+
+ faddd %f24,%f26,%f24
+
+ fmuld %f0,%f4,%f4
+
+ fmuld %f10,%f14,%f14
+
+ fmuld %f20,%f24,%f24
+
+ fmuld %f32,%f4,%f4
+ ldd [%o3+y0_0],%f2
+
+ fmuld %f34,%f14,%f14
+ ldd [%o4+y1_0],%f12
+
+ fmuld %f36,%f24,%f24
+ ldd [%o5+y2_0],%f22
+
+ faddd %f4,%f2,%f4
+
+ faddd %f14,%f12,%f14
+
+ faddd %f24,%f22,%f24
+
+ faddd %f32,%f4,%f6
+
+ faddd %f34,%f14,%f16
+ ba,pt %icc,.FIXSIGN
+
+! delay slot
+ faddd %f36,%f24,%f26
+
+
+ .align 32
+.ENDLOOP2:
+ fmuld %f10,%f40,%f12
+ add %l5,thresh,%g1
+ faddd %f12,%f42,%f12
+ st %f13,[%fp+n1]
+ fsubd %f12,%f42,%f12 ! n
+ fmuld %f12,%f46,%f14
+ fsubd %f10,%f14,%f14
+ fmuld %f12,%f48,%f16
+ fsubd %f14,%f16,%f10
+ ld [%fp+n1],%o4 ; add %o4,1,%o4
+ fsubd %f14,%f10,%f34
+ and %o4,1,%o4
+ fsubd %f34,%f16,%f34
+ fmuld %f12,%f50,%f18
+ sll %o4,3,%o4
+ fsubd %f18,%f34,%f18
+ ld [%g1+%o4],%f16
+ fsubd %f10,%f18,%f14
+ fsubd %f10,%f14,%f34
+ add %l5,thresh+4,%o7
+ fsubd %f34,%f18,%f34
+ fmuld %f12,%f52,%f12
+ fsubd %f12,%f34,%f12
+ ld [%o7+%o4],%f18
+ fsubd %f14,%f12,%f10 ! x
+ fsubd %f14,%f10,%f14
+ fands %f10,%f30,%f19 ! save signbit
+ fabsd %f10,%f10
+ std %f10,[%fp+x1_1]
+ fsubd %f14,%f12,%f12 ! y
+ fcmpgt32 %f16,%f10,%l1
+ fxors %f12,%f19,%f12
+ fands %f19,%f18,%f19 ! if (n & 1) clear sign bit
+ andcc %l1,2,%g0
+ bne,pn %icc,1f
+! delay slot
+ nop
+ fpadd32s %f10,%f31,%f18
+ ld [%fp+x1_1],%l1
+ fand %f18,%f44,%f14
+ sethi %hi(0x3fc3c000),%o7
+ add %l3,8,%g1
+ fsubd %f10,%f14,%f10
+ sub %l1,%o7,%l1
+ srl %l1,10,%l1
+ faddd %f10,%f12,%f10
+ andn %l1,0x1f,%l1
+ fmuld %f10,%f10,%f12
+ add %l1,%o4,%l1
+ fmuld %f12,%f58,%f16
+ ldd [%l3+%l1],%f34
+ faddd %f16,%f56,%f16
+ fmuld %f12,%f62,%f14
+ fmuld %f12,%f16,%f16
+ faddd %f14,%f60,%f14
+ faddd %f16,%f54,%f16
+ fmuld %f12,%f14,%f14
+ ldd [%g1+%l1],%f12
+ fmuld %f10,%f16,%f16
+ ldd [%l4+%l1],%f10
+ fmuld %f14,%f34,%f14
+ fmuld %f16,%f12,%f16
+ faddd %f16,%f14,%f16
+ faddd %f16,%f10,%f16
+ ba,pt %icc,2f
+ faddd %f16,%f34,%f16
+1:
+ fmuld %f10,%f10,%f10
+ ldd [%l5+%o4],%f34
+ add %l5,%o4,%l1
+ fmuld %f10,%f34,%f14
+ ldd [%l1+0x10],%f16
+ add %fp,%o4,%o4
+ faddd %f14,%f16,%f14
+ ldd [%l1+0x20],%f34
+ fmuld %f10,%f14,%f14
+ ldd [%l1+0x30],%f16
+ faddd %f14,%f34,%f14
+ ldd [%o4+x1_1],%f34
+ fmuld %f10,%f14,%f14
+ std %f12,[%fp+y1_0]
+ faddd %f14,%f16,%f14
+ fmuld %f10,%f14,%f14
+ fmuld %f34,%f14,%f14
+ ldd [%o4+y1_0],%f12
+ faddd %f14,%f12,%f14
+ faddd %f34,%f14,%f16
+2:
+ add %l5,thresh-4,%g1
+ ld [%fp+n1],%o4 ; add %o4,1,%o4
+ and %o4,2,%o4
+ sll %o4,2,%o4
+ ld [%g1+%o4],%f18
+ fxors %f19,%f18,%f19
+ fors %f16,%f19,%f16 ! tack on sign
+ st %f16,[%o1]
+ st %f17,[%o1+4]
+
+.ENDLOOP1:
+ fmuld %f0,%f40,%f2
+ add %l5,thresh,%g1
+ faddd %f2,%f42,%f2
+ st %f3,[%fp+n0]
+ fsubd %f2,%f42,%f2 ! n
+ fmuld %f2,%f46,%f4
+ fsubd %f0,%f4,%f4
+ fmuld %f2,%f48,%f6
+ fsubd %f4,%f6,%f0
+ ld [%fp+n0],%o3 ; add %o3,1,%o3
+ fsubd %f4,%f0,%f32
+ and %o3,1,%o3
+ fsubd %f32,%f6,%f32
+ fmuld %f2,%f50,%f8
+ sll %o3,3,%o3
+ fsubd %f8,%f32,%f8
+ ld [%g1+%o3],%f6
+ fsubd %f0,%f8,%f4
+ fsubd %f0,%f4,%f32
+ add %l5,thresh+4,%o7
+ fsubd %f32,%f8,%f32
+ fmuld %f2,%f52,%f2
+ fsubd %f2,%f32,%f2
+ ld [%o7+%o3],%f8
+ fsubd %f4,%f2,%f0 ! x
+ fsubd %f4,%f0,%f4
+ fands %f0,%f30,%f9 ! save signbit
+ fabsd %f0,%f0
+ std %f0,[%fp+x0_1]
+ fsubd %f4,%f2,%f2 ! y
+ fcmpgt32 %f6,%f0,%l0
+ fxors %f2,%f9,%f2
+ fands %f9,%f8,%f9 ! if (n & 1) clear sign bit
+ andcc %l0,2,%g0
+ bne,pn %icc,1f
+! delay slot
+ nop
+ fpadd32s %f0,%f31,%f8
+ ld [%fp+x0_1],%l0
+ fand %f8,%f44,%f4
+ sethi %hi(0x3fc3c000),%o7
+ add %l3,8,%g1
+ fsubd %f0,%f4,%f0
+ sub %l0,%o7,%l0
+ srl %l0,10,%l0
+ faddd %f0,%f2,%f0
+ andn %l0,0x1f,%l0
+ fmuld %f0,%f0,%f2
+ add %l0,%o3,%l0
+ fmuld %f2,%f58,%f6
+ ldd [%l3+%l0],%f32
+ faddd %f6,%f56,%f6
+ fmuld %f2,%f62,%f4
+ fmuld %f2,%f6,%f6
+ faddd %f4,%f60,%f4
+ faddd %f6,%f54,%f6
+ fmuld %f2,%f4,%f4
+ ldd [%g1+%l0],%f2
+ fmuld %f0,%f6,%f6
+ ldd [%l4+%l0],%f0
+ fmuld %f4,%f32,%f4
+ fmuld %f6,%f2,%f6
+ faddd %f6,%f4,%f6
+ faddd %f6,%f0,%f6
+ ba,pt %icc,2f
+ faddd %f6,%f32,%f6
+1:
+ fmuld %f0,%f0,%f0
+ ldd [%l5+%o3],%f32
+ add %l5,%o3,%l0
+ fmuld %f0,%f32,%f4
+ ldd [%l0+0x10],%f6
+ add %fp,%o3,%o3
+ faddd %f4,%f6,%f4
+ ldd [%l0+0x20],%f32
+ fmuld %f0,%f4,%f4
+ ldd [%l0+0x30],%f6
+ faddd %f4,%f32,%f4
+ ldd [%o3+x0_1],%f32
+ fmuld %f0,%f4,%f4
+ std %f2,[%fp+y0_0]
+ faddd %f4,%f6,%f4
+ fmuld %f0,%f4,%f4
+ fmuld %f32,%f4,%f4
+ ldd [%o3+y0_0],%f2
+ faddd %f4,%f2,%f4
+ faddd %f32,%f4,%f6
+2:
+ add %l5,thresh-4,%g1
+ ld [%fp+n0],%o3 ; add %o3,1,%o3
+ and %o3,2,%o3
+ sll %o3,2,%o3
+ ld [%g1+%o3],%f8
+ fxors %f9,%f8,%f9
+ fors %f6,%f9,%f6 ! tack on sign
+ st %f6,[%o0]
+ st %f7,[%o0+4]
+
+.ENDLOOP0:
+
+! check for huge arguments remaining
+
+ tst LIM_l6
+ be,pt %icc,.exit
+! delay slot
+ nop
+
+! ========== huge range (use C code) ==========
+
+#ifdef __sparcv9
+ ldx [%fp+xsave],%o1
+ ldx [%fp+ysave],%o3
+#else
+ ld [%fp+xsave],%o1
+ ld [%fp+ysave],%o3
+#endif
+ ld [%fp+nsave],%o0
+ ld [%fp+sxsave],%o2
+ ld [%fp+sysave],%o4
+ sra %o2,0,%o2 ! sign-extend for V9
+ sra %o4,0,%o4
+ call __vlibm_vcos_big
+ mov %l7,%o5 ! delay slot
+
+.exit:
+ ret
+ restore
+
+
+ .align 32
+.SKIP0:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.ENDLOOP0
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! y += stridey
+ andn %l1,%i5,%l0 ! hx &= ~0x80000000
+ fmovs %f10,%f0
+ ld [%i1+4],%f1
+ ba,pt %icc,.LOOP0
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 32
+.SKIP1:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.ENDLOOP1
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! y += stridey
+ andn %l2,%i5,%l1 ! hx &= ~0x80000000
+ fmovs %f20,%f10
+ ld [%i1+4],%f11
+ ba,pt %icc,.LOOP1
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 32
+.SKIP2:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.ENDLOOP2
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! y += stridey
+ ld [%i1],%l2
+ ld [%i1],%f20
+ ld [%i1+4],%f21
+ andn %l2,%i5,%l2 ! hx &= ~0x80000000
+ ba,pt %icc,.LOOP2
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 32
+.BIG0:
+ sethi %hi(0x7ff00000),%o7
+ cmp %l0,%o7
+ bl,a,pt %icc,1f ! if hx < 0x7ff00000
+! delay slot, annulled if branch not taken
+ mov %l7,LIM_l6 ! set biguns flag or
+ fsubd %f0,%f0,%f0 ! y = x - x
+ st %f0,[%o0]
+ st %f1,[%o0+4]
+1:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.ENDLOOP0
+! delay slot, harmless if branch taken
+ andn %l1,%i5,%l0 ! hx &= ~0x80000000
+ fmovd %f10,%f0
+ ba,pt %icc,.LOOP0
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 32
+.BIG1:
+ sethi %hi(0x7ff00000),%o7
+ cmp %l1,%o7
+ bl,a,pt %icc,1f ! if hx < 0x7ff00000
+! delay slot, annulled if branch not taken
+ mov %l7,LIM_l6 ! set biguns flag or
+ fsubd %f10,%f10,%f10 ! y = x - x
+ st %f10,[%o1]
+ st %f11,[%o1+4]
+1:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.ENDLOOP1
+! delay slot, harmless if branch taken
+ andn %l2,%i5,%l1 ! hx &= ~0x80000000
+ fmovd %f20,%f10
+ ba,pt %icc,.LOOP1
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 32
+.BIG2:
+ sethi %hi(0x7ff00000),%o7
+ cmp %l2,%o7
+ bl,a,pt %icc,1f ! if hx < 0x7ff00000
+! delay slot, annulled if branch not taken
+ mov %l7,LIM_l6 ! set biguns flag or
+ fsubd %f20,%f20,%f20 ! y = x - x
+ st %f20,[%o2]
+ st %f21,[%o2+4]
+1:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.ENDLOOP2
+! delay slot
+ nop
+ ld [%i1],%l2
+ ld [%i1],%f20
+ ld [%i1+4],%f21
+ andn %l2,%i5,%l2 ! hx &= ~0x80000000
+ ba,pt %icc,.LOOP2
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+ SET_SIZE(__vcos)
+
diff --git a/usr/src/libm/src/mvec/vis/__vcos_ultra3.S b/usr/src/libm/src/mvec/vis/__vcos_ultra3.S
new file mode 100644
index 0000000..2809bd9
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vcos_ultra3.S
@@ -0,0 +1,3424 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vcos_ultra3.S 1.8 06/01/23 SMI"
+
+ .file "__vcos_ultra3.S"
+
+#include "libm.h"
+#if defined(LIBMVEC_SO_BUILD)
+ .weak __vcos
+ .type __vcos,#function
+ __vcos = __vcos_ultra3
+#endif
+
+ RO_DATA
+ .align 64
+constants:
+ .word 0x42c80000,0x00000000 ! 3 * 2^44
+ .word 0x43380000,0x00000000 ! 3 * 2^51
+ .word 0x3fe45f30,0x6dc9c883 ! invpio2
+ .word 0x3ff921fb,0x54442c00 ! pio2_1
+ .word 0x3d318469,0x898cc400 ! pio2_2
+ .word 0x3a71701b,0x839a2520 ! pio2_3
+ .word 0xbfc55555,0x55555533 ! pp1
+ .word 0x3f811111,0x10e7d53b ! pp2
+ .word 0xbf2a0167,0xe6b3cf9b ! pp3
+ .word 0xbfdfffff,0xffffff65 ! qq1
+ .word 0x3fa55555,0x54f88ed0 ! qq2
+ .word 0xbf56c12c,0xdd185f60 ! qq3
+
+! local storage indices
+
+#define xsave STACK_BIAS-0x8
+#define ysave STACK_BIAS-0x10
+#define nsave STACK_BIAS-0x14
+#define sxsave STACK_BIAS-0x18
+#define sysave STACK_BIAS-0x1c
+#define biguns STACK_BIAS-0x20
+#define nk3 STACK_BIAS-0x24
+#define nk2 STACK_BIAS-0x28
+#define nk1 STACK_BIAS-0x2c
+#define nk0 STACK_BIAS-0x30
+#define junk STACK_BIAS-0x38
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x40
+
+! register use
+
+! i0 n
+! i1 x
+! i2 stridex
+! i3 y
+! i4 stridey
+! i5 0x80000000
+
+! l0 hx0
+! l1 hx1
+! l2 hx2
+! l3 hx3
+! l4 k0
+! l5 k1
+! l6 k2
+! l7 k3
+
+! the following are 64-bit registers in both V8+ and V9
+
+! g1 __vlibm_TBL_sincos2
+! g5 scratch
+
+! o0 py0
+! o1 py1
+! o2 py2
+! o3 py3
+! o4 0x3e400000
+! o5 0x3fe921fb,0x4099251e
+! o7 scratch
+
+! f0 hx0
+! f2
+! f4
+! f6
+! f8 hx1
+! f10
+! f12
+! f14
+! f16 hx2
+! f18
+! f20
+! f22
+! f24 hx3
+! f26
+! f28
+! f30
+! f32
+! f34
+! f36
+! f38
+
+#define c3two44 %f40
+#define c3two51 %f42
+#define invpio2 %f44
+#define pio2_1 %f46
+#define pio2_2 %f48
+#define pio2_3 %f50
+#define pp1 %f52
+#define pp2 %f54
+#define pp3 %f56
+#define qq1 %f58
+#define qq2 %f60
+#define qq3 %f62
+
+ ENTRY(__vcos_ultra3)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,constants,o0)
+ PIC_SET(l7,__vlibm_TBL_sincos2,o1)
+ mov %o1,%g1
+ wr %g0,0x82,%asi ! set %asi for non-faulting loads
+#ifdef __sparcv9
+ stx %i1,[%fp+xsave] ! save arguments
+ stx %i3,[%fp+ysave]
+#else
+ st %i1,[%fp+xsave] ! save arguments
+ st %i3,[%fp+ysave]
+#endif
+ st %i0,[%fp+nsave]
+ st %i2,[%fp+sxsave]
+ st %i4,[%fp+sysave]
+ st %g0,[%fp+biguns] ! biguns = 0
+ ldd [%o0+0x00],c3two44 ! load/set up constants
+ ldd [%o0+0x08],c3two51
+ ldd [%o0+0x10],invpio2
+ ldd [%o0+0x18],pio2_1
+ ldd [%o0+0x20],pio2_2
+ ldd [%o0+0x28],pio2_3
+ ldd [%o0+0x30],pp1
+ ldd [%o0+0x38],pp2
+ ldd [%o0+0x40],pp3
+ ldd [%o0+0x48],qq1
+ ldd [%o0+0x50],qq2
+ ldd [%o0+0x58],qq3
+ sethi %hi(0x80000000),%i5
+ sethi %hi(0x3e400000),%o4
+ sethi %hi(0x3fe921fb),%o5
+ or %o5,%lo(0x3fe921fb),%o5
+ sllx %o5,32,%o5
+ sethi %hi(0x4099251e),%o7
+ or %o7,%lo(0x4099251e),%o7
+ or %o5,%o7,%o5
+ sll %i2,3,%i2 ! scale strides
+ sll %i4,3,%i4
+ add %fp,junk,%o1 ! loop prologue
+ add %fp,junk,%o2
+ add %fp,junk,%o3
+ ld [%i1],%l0 ! *x
+ ld [%i1],%f0
+ ld [%i1+4],%f3
+ andn %l0,%i5,%l0 ! mask off sign
+ add %i1,%i2,%i1 ! x += stridex
+ ba .loop0
+ nop
+
+! 16-byte aligned
+ .align 16
+.loop0:
+ lda [%i1]%asi,%l1 ! preload next argument
+ sub %l0,%o4,%g5
+ sub %o5,%l0,%o7
+ fabss %f0,%f2
+
+ lda [%i1]%asi,%f8
+ orcc %o7,%g5,%g0
+ mov %i3,%o0 ! py0 = y
+ bl,pn %icc,.range0 ! hx < 0x3e400000 or hx > 0x4099251e
+
+! delay slot
+ lda [%i1+4]%asi,%f11
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! y += stridey
+ ble,pn %icc,.last1
+
+! delay slot
+ andn %l1,%i5,%l1
+ add %i1,%i2,%i1 ! x += stridex
+ faddd %f2,c3two44,%f4
+ st %f15,[%o1+4]
+
+.loop1:
+ lda [%i1]%asi,%l2 ! preload next argument
+ sub %l1,%o4,%g5
+ sub %o5,%l1,%o7
+ fabss %f8,%f10
+
+ lda [%i1]%asi,%f16
+ orcc %o7,%g5,%g0
+ mov %i3,%o1 ! py1 = y
+ bl,pn %icc,.range1 ! hx < 0x3e400000 or hx > 0x4099251e
+
+! delay slot
+ lda [%i1+4]%asi,%f19
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! y += stridey
+ ble,pn %icc,.last2
+
+! delay slot
+ andn %l2,%i5,%l2
+ add %i1,%i2,%i1 ! x += stridex
+ faddd %f10,c3two44,%f12
+ st %f23,[%o2+4]
+
+.loop2:
+ lda [%i1]%asi,%l3 ! preload next argument
+ sub %l2,%o4,%g5
+ sub %o5,%l2,%o7
+ fabss %f16,%f18
+
+ lda [%i1]%asi,%f24
+ orcc %o7,%g5,%g0
+ mov %i3,%o2 ! py2 = y
+ bl,pn %icc,.range2 ! hx < 0x3e400000 or hx > 0x4099251e
+
+! delay slot
+ lda [%i1+4]%asi,%f27
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! y += stridey
+ ble,pn %icc,.last3
+
+! delay slot
+ andn %l3,%i5,%l3
+ add %i1,%i2,%i1 ! x += stridex
+ faddd %f18,c3two44,%f20
+ st %f31,[%o3+4]
+
+.loop3:
+ sub %l3,%o4,%g5
+ sub %o5,%l3,%o7
+ fabss %f24,%f26
+ st %f5,[%fp+nk0]
+
+ orcc %o7,%g5,%g0
+ mov %i3,%o3 ! py3 = y
+ bl,pn %icc,.range3 ! hx < 0x3e400000 or > hx 0x4099251e
+! delay slot
+ st %f13,[%fp+nk1]
+
+!!! DONE?
+.cont:
+ srlx %o5,32,%o7
+ add %i3,%i4,%i3 ! y += stridey
+ fmovs %f3,%f1
+ st %f21,[%fp+nk2]
+
+ sub %o7,%l0,%l0
+ sub %o7,%l1,%l1
+ faddd %f26,c3two44,%f28
+ st %f29,[%fp+nk3]
+
+ sub %o7,%l2,%l2
+ sub %o7,%l3,%l3
+ fmovs %f11,%f9
+
+ or %l0,%l1,%l0
+ or %l2,%l3,%l2
+ fmovs %f19,%f17
+
+ fmovs %f27,%f25
+ fmuld %f0,invpio2,%f6 ! x * invpio2, for medium range
+
+ fmuld %f8,invpio2,%f14
+ ld [%fp+nk0],%l4
+
+ fmuld %f16,invpio2,%f22
+ ld [%fp+nk1],%l5
+
+ orcc %l0,%l2,%g0
+ bl,pn %icc,.medium
+! delay slot
+ fmuld %f24,invpio2,%f30
+ ld [%fp+nk2],%l6
+
+ ld [%fp+nk3],%l7
+ sll %l4,5,%l4 ! k
+ fcmpd %fcc0,%f0,pio2_3 ! x < pio2_3 iff x < 0
+
+ sll %l5,5,%l5
+ ldd [%l4+%g1],%f4
+ fcmpd %fcc1,%f8,pio2_3
+
+ sll %l6,5,%l6
+ ldd [%l5+%g1],%f12
+ fcmpd %fcc2,%f16,pio2_3
+
+ sll %l7,5,%l7
+ ldd [%l6+%g1],%f20
+ fcmpd %fcc3,%f24,pio2_3
+
+ ldd [%l7+%g1],%f28
+ fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k]
+
+ fsubd %f10,%f12,%f10
+
+ fsubd %f18,%f20,%f18
+
+ fsubd %f26,%f28,%f26
+
+ fmuld %f2,%f2,%f0 ! z = x * x
+
+ fmuld %f10,%f10,%f8
+
+ fmuld %f18,%f18,%f16
+
+ fmuld %f26,%f26,%f24
+
+ fmuld %f0,qq3,%f6
+
+ fmuld %f8,qq3,%f14
+
+ fmuld %f16,qq3,%f22
+
+ fmuld %f24,qq3,%f30
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+16],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+16],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+16],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f6,%f6
+ ldd [%l4+8],%f2
+
+ fmuld %f34,%f14,%f14
+ ldd [%l5+8],%f10
+
+ fmuld %f36,%f22,%f22
+ ldd [%l6+8],%f18
+
+ fmuld %f38,%f30,%f30
+ ldd [%l7+8],%f26
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f28,%f28
+
+ fsubd %f6,%f4,%f6
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fsubd %f14,%f12,%f14
+ lda [%i1]%asi,%f0
+
+ fsubd %f22,%f20,%f22
+ lda [%i1+4]%asi,%f3
+
+ fsubd %f30,%f28,%f30
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ faddd %f6,%f32,%f6
+ st %f6,[%o0]
+
+ faddd %f14,%f34,%f14
+ st %f14,[%o1]
+
+ faddd %f22,%f36,%f22
+ st %f22,[%o2]
+
+ faddd %f30,%f38,%f30
+ st %f30,[%o3]
+ addcc %i0,-1,%i0
+
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+
+ .align 16
+.medium:
+ faddd %f6,c3two51,%f4
+ st %f5,[%fp+nk0]
+
+ faddd %f14,c3two51,%f12
+ st %f13,[%fp+nk1]
+
+ faddd %f22,c3two51,%f20
+ st %f21,[%fp+nk2]
+
+ faddd %f30,c3two51,%f28
+ st %f29,[%fp+nk3]
+
+ fsubd %f4,c3two51,%f6
+
+ fsubd %f12,c3two51,%f14
+
+ fsubd %f20,c3two51,%f22
+
+ fsubd %f28,c3two51,%f30
+
+ fmuld %f6,pio2_1,%f2
+ ld [%fp+nk0],%l0 ! n
+
+ fmuld %f14,pio2_1,%f10
+ ld [%fp+nk1],%l1
+
+ fmuld %f22,pio2_1,%f18
+ ld [%fp+nk2],%l2
+
+ fmuld %f30,pio2_1,%f26
+ ld [%fp+nk3],%l3
+
+ fsubd %f0,%f2,%f0
+ fmuld %f6,pio2_2,%f4
+ add %l0,1,%l0
+
+ fsubd %f8,%f10,%f8
+ fmuld %f14,pio2_2,%f12
+ add %l1,1,%l1
+
+ fsubd %f16,%f18,%f16
+ fmuld %f22,pio2_2,%f20
+ add %l2,1,%l2
+
+ fsubd %f24,%f26,%f24
+ fmuld %f30,pio2_2,%f28
+ add %l3,1,%l3
+
+ fsubd %f0,%f4,%f32
+
+ fsubd %f8,%f12,%f34
+
+ fsubd %f16,%f20,%f36
+
+ fsubd %f24,%f28,%f38
+
+ fsubd %f0,%f32,%f0
+ fcmple32 %f32,pio2_3,%l4 ! x <= pio2_3 iff x < 0
+
+ fsubd %f8,%f34,%f8
+ fcmple32 %f34,pio2_3,%l5
+
+ fsubd %f16,%f36,%f16
+ fcmple32 %f36,pio2_3,%l6
+
+ fsubd %f24,%f38,%f24
+ fcmple32 %f38,pio2_3,%l7
+
+ fsubd %f0,%f4,%f0
+ fmuld %f6,pio2_3,%f6
+ sll %l4,30,%l4 ! if (x < 0) n = -n ^ 2
+
+ fsubd %f8,%f12,%f8
+ fmuld %f14,pio2_3,%f14
+ sll %l5,30,%l5
+
+ fsubd %f16,%f20,%f16
+ fmuld %f22,pio2_3,%f22
+ sll %l6,30,%l6
+
+ fsubd %f24,%f28,%f24
+ fmuld %f30,pio2_3,%f30
+ sll %l7,30,%l7
+
+ fsubd %f6,%f0,%f6
+ sra %l4,31,%l4
+
+ fsubd %f14,%f8,%f14
+ sra %l5,31,%l5
+
+ fsubd %f22,%f16,%f22
+ sra %l6,31,%l6
+
+ fsubd %f30,%f24,%f30
+ sra %l7,31,%l7
+
+ fsubd %f32,%f6,%f0 ! reduced x
+ xor %l0,%l4,%l0
+
+ fsubd %f34,%f14,%f8
+ xor %l1,%l5,%l1
+
+ fsubd %f36,%f22,%f16
+ xor %l2,%l6,%l2
+
+ fsubd %f38,%f30,%f24
+ xor %l3,%l7,%l3
+
+ fabsd %f0,%f2
+ sub %l0,%l4,%l0
+
+ fabsd %f8,%f10
+ sub %l1,%l5,%l1
+
+ fabsd %f16,%f18
+ sub %l2,%l6,%l2
+
+ fabsd %f24,%f26
+ sub %l3,%l7,%l3
+
+ faddd %f2,c3two44,%f4
+ st %f5,[%fp+nk0]
+ and %l4,2,%l4
+
+ faddd %f10,c3two44,%f12
+ st %f13,[%fp+nk1]
+ and %l5,2,%l5
+
+ faddd %f18,c3two44,%f20
+ st %f21,[%fp+nk2]
+ and %l6,2,%l6
+
+ faddd %f26,c3two44,%f28
+ st %f29,[%fp+nk3]
+ and %l7,2,%l7
+
+ fsubd %f32,%f0,%f4
+ xor %l0,%l4,%l0
+
+ fsubd %f34,%f8,%f12
+ xor %l1,%l5,%l1
+
+ fsubd %f36,%f16,%f20
+ xor %l2,%l6,%l2
+
+ fsubd %f38,%f24,%f28
+ xor %l3,%l7,%l3
+
+ fzero %f38
+ ld [%fp+nk0],%l4
+
+ fsubd %f4,%f6,%f6 ! w
+ ld [%fp+nk1],%l5
+
+ fsubd %f12,%f14,%f14
+ ld [%fp+nk2],%l6
+
+ fnegd %f38,%f38
+ ld [%fp+nk3],%l7
+ sll %l4,5,%l4 ! k
+
+ fsubd %f20,%f22,%f22
+ sll %l5,5,%l5
+
+ fsubd %f28,%f30,%f30
+ sll %l6,5,%l6
+
+ fand %f0,%f38,%f32 ! sign bit of x
+ ldd [%l4+%g1],%f4
+ sll %l7,5,%l7
+
+ fand %f8,%f38,%f34
+ ldd [%l5+%g1],%f12
+
+ fand %f16,%f38,%f36
+ ldd [%l6+%g1],%f20
+
+ fand %f24,%f38,%f38
+ ldd [%l7+%g1],%f28
+
+ fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k]
+
+ fsubd %f10,%f12,%f10
+
+ fsubd %f18,%f20,%f18
+ nop
+
+ fsubd %f26,%f28,%f26
+ nop
+
+! 16-byte aligned
+ fmuld %f2,%f2,%f0 ! z = x * x
+ andcc %l0,1,%g0
+ bz,pn %icc,.case8
+! delay slot
+ fxor %f6,%f32,%f32
+
+ fmuld %f10,%f10,%f8
+ andcc %l1,1,%g0
+ bz,pn %icc,.case4
+! delay slot
+ fxor %f14,%f34,%f34
+
+ fmuld %f18,%f18,%f16
+ andcc %l2,1,%g0
+ bz,pn %icc,.case2
+! delay slot
+ fxor %f22,%f36,%f36
+
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case1
+! delay slot
+ fxor %f30,%f38,%f38
+
+!.case0:
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f28,%f28
+
+ fsubd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case1:
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f30,%f30
+
+ fsubd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case2:
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case3
+! delay slot
+ fxor %f30,%f38,%f38
+
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f28,%f28
+
+ fsubd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case3:
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f30,%f30
+
+ fsubd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case4:
+ fmuld %f18,%f18,%f16
+ andcc %l2,1,%g0
+ bz,pn %icc,.case6
+! delay slot
+ fxor %f22,%f36,%f36
+
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case5
+! delay slot
+ fxor %f30,%f38,%f38
+
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f28,%f28
+
+ fsubd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case5:
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f30,%f30
+
+ fsubd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case6:
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case7
+! delay slot
+ fxor %f30,%f38,%f38
+
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f28,%f28
+
+ fsubd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case7:
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f30,%f30
+
+ fsubd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case8:
+ fmuld %f10,%f10,%f8
+ andcc %l1,1,%g0
+ bz,pn %icc,.case12
+! delay slot
+ fxor %f14,%f34,%f34
+
+ fmuld %f18,%f18,%f16
+ andcc %l2,1,%g0
+ bz,pn %icc,.case10
+! delay slot
+ fxor %f22,%f36,%f36
+
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case9
+! delay slot
+ fxor %f30,%f38,%f38
+
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ fmuld %f0,%f6,%f6
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f28,%f28
+
+ faddd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case9:
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f0,%f6,%f6
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f30,%f30
+
+ faddd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case10:
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case11
+! delay slot
+ fxor %f30,%f38,%f38
+
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ fmuld %f0,%f6,%f6
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f28,%f28
+
+ faddd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case11:
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f0,%f6,%f6
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f30,%f30
+
+ faddd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case12:
+ fmuld %f18,%f18,%f16
+ andcc %l2,1,%g0
+ bz,pn %icc,.case14
+! delay slot
+ fxor %f22,%f36,%f36
+
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case13
+! delay slot
+ fxor %f30,%f38,%f38
+
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ fmuld %f0,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f28,%f28
+
+ faddd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case13:
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f0,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f30,%f30
+
+ faddd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case14:
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case15
+! delay slot
+ fxor %f30,%f38,%f38
+
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ fmuld %f0,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f28,%f28
+
+ faddd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case15:
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f0,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f30,%f30
+
+ faddd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+
+ .align 16
+.end:
+ st %f15,[%o1+4]
+ st %f23,[%o2+4]
+ st %f31,[%o3+4]
+ ld [%fp+biguns],%i5
+ tst %i5 ! check for huge arguments remaining
+ be,pt %icc,.exit
+! delay slot
+ nop
+#ifdef __sparcv9
+ ldx [%fp+xsave],%o1
+ ldx [%fp+ysave],%o3
+#else
+ ld [%fp+xsave],%o1
+ ld [%fp+ysave],%o3
+#endif
+ ld [%fp+nsave],%o0
+ ld [%fp+sxsave],%o2
+ ld [%fp+sysave],%o4
+ sra %o2,0,%o2 ! sign-extend for V9
+ sra %o4,0,%o4
+ call __vlibm_vcos_big_ultra3
+ sra %o5,0,%o5 ! delay slot
+
+.exit:
+ ret
+ restore
+
+
+ .align 16
+.last1:
+ faddd %f2,c3two44,%f4
+ st %f15,[%o1+4]
+.last1_from_range1:
+ mov 0,%l1
+ fzeros %f8
+ fzero %f10
+ add %fp,junk,%o1
+.last2:
+ faddd %f10,c3two44,%f12
+ st %f23,[%o2+4]
+.last2_from_range2:
+ mov 0,%l2
+ fzeros %f16
+ fzero %f18
+ add %fp,junk,%o2
+.last3:
+ faddd %f18,c3two44,%f20
+ st %f31,[%o3+4]
+ st %f5,[%fp+nk0]
+ st %f13,[%fp+nk1]
+.last3_from_range3:
+ mov 0,%l3
+ fzeros %f24
+ fzero %f26
+ ba,pt %icc,.cont
+! delay slot
+ add %fp,junk,%o3
+
+
+ .align 16
+.range0:
+ cmp %l0,%o4
+ bl,pt %icc,1f ! hx < 0x3e400000
+! delay slot, harmless if branch taken
+ sethi %hi(0x7ff00000),%o7
+ cmp %l0,%o7
+ bl,a,pt %icc,2f ! branch if finite
+! delay slot, squashed if branch not taken
+ st %o4,[%fp+biguns] ! set biguns
+ fzero %f0
+ fmuld %f2,%f0,%f2
+ st %f2,[%o0]
+ ba,pt %icc,2f
+! delay slot
+ st %f3,[%o0+4]
+1:
+ fdtoi %f2,%f4 ! raise inexact if not zero
+ sethi %hi(0x3ff00000),%o7
+ st %o7,[%o0]
+ st %g0,[%o0+4]
+2:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.end
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! y += stridey
+ andn %l1,%i5,%l0 ! hx &= ~0x80000000
+ fmovs %f8,%f0
+ fmovs %f11,%f3
+ ba,pt %icc,.loop0
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 16
+.range1:
+ cmp %l1,%o4
+ bl,pt %icc,1f ! hx < 0x3e400000
+! delay slot, harmless if branch taken
+ sethi %hi(0x7ff00000),%o7
+ cmp %l1,%o7
+ bl,a,pt %icc,2f ! branch if finite
+! delay slot, squashed if branch not taken
+ st %o4,[%fp+biguns] ! set biguns
+ fzero %f8
+ fmuld %f10,%f8,%f10
+ st %f10,[%o1]
+ ba,pt %icc,2f
+! delay slot
+ st %f11,[%o1+4]
+1:
+ fdtoi %f10,%f12 ! raise inexact if not zero
+ sethi %hi(0x3ff00000),%o7
+ st %o7,[%o1]
+ st %g0,[%o1+4]
+2:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.last1_from_range1
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! y += stridey
+ andn %l2,%i5,%l1 ! hx &= ~0x80000000
+ fmovs %f16,%f8
+ fmovs %f19,%f11
+ ba,pt %icc,.loop1
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 16
+.range2:
+ cmp %l2,%o4
+ bl,pt %icc,1f ! hx < 0x3e400000
+! delay slot, harmless if branch taken
+ sethi %hi(0x7ff00000),%o7
+ cmp %l2,%o7
+ bl,a,pt %icc,2f ! branch if finite
+! delay slot, squashed if branch not taken
+ st %o4,[%fp+biguns] ! set biguns
+ fzero %f16
+ fmuld %f18,%f16,%f18
+ st %f18,[%o2]
+ ba,pt %icc,2f
+! delay slot
+ st %f19,[%o2+4]
+1:
+ fdtoi %f18,%f20 ! raise inexact if not zero
+ sethi %hi(0x3ff00000),%o7
+ st %o7,[%o2]
+ st %g0,[%o2+4]
+2:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.last2_from_range2
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! y += stridey
+ andn %l3,%i5,%l2 ! hx &= ~0x80000000
+ fmovs %f24,%f16
+ fmovs %f27,%f19
+ ba,pt %icc,.loop2
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 16
+.range3:
+ cmp %l3,%o4
+ bl,pt %icc,1f ! hx < 0x3e400000
+! delay slot, harmless if branch taken
+ sethi %hi(0x7ff00000),%o7
+ cmp %l3,%o7
+ bl,a,pt %icc,2f ! branch if finite
+! delay slot, squashed if branch not taken
+ st %o4,[%fp+biguns] ! set biguns
+ fzero %f24
+ fmuld %f26,%f24,%f26
+ st %f26,[%o3]
+ ba,pt %icc,2f
+! delay slot
+ st %f27,[%o3+4]
+1:
+ fdtoi %f26,%f28 ! raise inexact if not zero
+ sethi %hi(0x3ff00000),%o7
+ st %o7,[%o3]
+ st %g0,[%o3+4]
+2:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.last3_from_range3
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! y += stridey
+ ld [%i1],%l3
+ ld [%i1],%f24
+ ld [%i1+4],%f27
+ andn %l3,%i5,%l3 ! hx &= ~0x80000000
+ ba,pt %icc,.loop3
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+ SET_SIZE(__vcos_ultra3)
+
diff --git a/usr/src/libm/src/mvec/vis/__vcosf.S b/usr/src/libm/src/mvec/vis/__vcosf.S
new file mode 100644
index 0000000..31429c7
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vcosf.S
@@ -0,0 +1,2101 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vcosf.S 1.9 06/01/23 SMI"
+
+ .file "__vcosf.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 64
+constants:
+ .word 0xbfc55554,0x60000000
+ .word 0x3f811077,0xe0000000
+ .word 0xbf29956b,0x60000000
+ .word 0x3ff00000,0x00000000
+ .word 0xbfe00000,0x00000000
+ .word 0x3fa55554,0xa0000000
+ .word 0xbf56c0c1,0xe0000000
+ .word 0x3ef99e24,0xe0000000
+ .word 0x3fe45f30,0x6dc9c883
+ .word 0x43380000,0x00000000
+ .word 0x3ff921fb,0x54400000
+ .word 0x3dd0b461,0x1a626331
+ .word 0x3f490fdb,0
+ .word 0x49c90fdb,0
+ .word 0x7f800000,0
+ .word 0x80000000,0
+
+#define S0 0x0
+#define S1 0x08
+#define S2 0x10
+#define one 0x18
+#define mhalf 0x20
+#define C0 0x28
+#define C1 0x30
+#define C2 0x38
+#define invpio2 0x40
+#define round 0x48
+#define pio2_1 0x50
+#define pio2_t 0x58
+#define thresh1 0x60
+#define thresh2 0x68
+#define inf 0x70
+#define signbit 0x78
+
+! local storage indices
+
+#define xsave STACK_BIAS-0x8
+#define ysave STACK_BIAS-0x10
+#define nsave STACK_BIAS-0x14
+#define sxsave STACK_BIAS-0x18
+#define sysave STACK_BIAS-0x1c
+#define junk STACK_BIAS-0x20
+#define n3 STACK_BIAS-0x24
+#define n2 STACK_BIAS-0x28
+#define n1 STACK_BIAS-0x2c
+#define n0 STACK_BIAS-0x30
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x30
+
+! register use
+
+! i0 n
+! i1 x
+! i2 stridex
+! i3 y
+! i4 stridey
+! i5 biguns
+
+! l0 n0
+! l1 n1
+! l2 n2
+! l3 n3
+! l4
+! l5
+! l6
+! l7
+
+! the following are 64-bit registers in both V8+ and V9
+
+! g1
+! g5
+
+! o0 py0
+! o1 py1
+! o2 py2
+! o3 py3
+! o4
+! o5
+! o7
+
+! f0 x0
+! f2 x1
+! f4 x2
+! f6 x3
+! f8 thresh1 (pi/4)
+! f10 y0
+! f12 y1
+! f14 y2
+! f16 y3
+! f18 thresh2 (2^19 pi)
+! f20
+! f22
+! f24
+! f26
+! f28 signbit
+! f30
+! f32
+! f34
+! f36
+! f38 inf
+! f40 S0
+! f42 S1
+! f44 S2
+! f46 one
+! f48 mhalf
+! f50 C0
+! f52 C1
+! f54 C2
+! f56 invpio2
+! f58 round
+! f60 pio2_1
+! f62 pio2_t
+
+ ENTRY(__vcosf)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,constants,l0)
+ mov %l0,%g1
+ wr %g0,0x82,%asi ! set %asi for non-faulting loads
+#ifdef __sparcv9
+ stx %i1,[%fp+xsave] ! save arguments
+ stx %i3,[%fp+ysave]
+#else
+ st %i1,[%fp+xsave] ! save arguments
+ st %i3,[%fp+ysave]
+#endif
+ st %i0,[%fp+nsave]
+ st %i2,[%fp+sxsave]
+ st %i4,[%fp+sysave]
+ mov 0,%i5 ! biguns = 0
+ ldd [%g1+S0],%f40 ! load constants
+ ldd [%g1+S1],%f42
+ ldd [%g1+S2],%f44
+ ldd [%g1+one],%f46
+ ldd [%g1+mhalf],%f48
+ ldd [%g1+C0],%f50
+ ldd [%g1+C1],%f52
+ ldd [%g1+C2],%f54
+ ldd [%g1+invpio2],%f56
+ ldd [%g1+round],%f58
+ ldd [%g1+pio2_1],%f60
+ ldd [%g1+pio2_t],%f62
+ ldd [%g1+thresh1],%f8
+ ldd [%g1+thresh2],%f18
+ ldd [%g1+inf],%f38
+ ldd [%g1+signbit],%f28
+ sll %i2,2,%i2 ! scale strides
+ sll %i4,2,%i4
+ fzero %f10 ! loop prologue
+ add %fp,junk,%o0
+ fzero %f12
+ add %fp,junk,%o1
+ fzero %f14
+ add %fp,junk,%o2
+ fzero %f16
+ ba .start
+ add %fp,junk,%o3
+
+ .align 16
+! 16-byte aligned
+.start:
+ ld [%i1],%f0 ! *x
+ add %i1,%i2,%i1 ! x += stridex
+ addcc %i0,-1,%i0
+ fdtos %f10,%f10
+
+ st %f10,[%o0]
+ mov %i3,%o0 ! py0 = y
+ ble,pn %icc,.last1
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+
+ ld [%i1],%f2 ! *x
+ add %i1,%i2,%i1 ! x += stridex
+ addcc %i0,-1,%i0
+ fdtos %f12,%f12
+
+ st %f12,[%o1]
+ mov %i3,%o1 ! py1 = y
+ ble,pn %icc,.last2
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+
+ ld [%i1],%f4 ! *x
+ add %i1,%i2,%i1 ! x += stridex
+ addcc %i0,-1,%i0
+ fdtos %f14,%f14
+
+ st %f14,[%o2]
+ mov %i3,%o2 ! py2 = y
+ ble,pn %icc,.last3
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+
+ ld [%i1],%f6 ! *x
+ add %i1,%i2,%i1 ! x += stridex
+ nop
+ fdtos %f16,%f16
+
+ st %f16,[%o3]
+ mov %i3,%o3 ! py3 = y
+ add %i3,%i4,%i3 ! y += stridey
+.cont:
+ fabsd %f0,%f30
+
+ fabsd %f2,%f32
+
+ fabsd %f4,%f34
+
+ fabsd %f6,%f36
+ fcmple32 %f30,%f18,%l0
+
+ fcmple32 %f32,%f18,%l1
+
+ fcmple32 %f34,%f18,%l2
+
+ fcmple32 %f36,%f18,%l3
+ nop
+
+! 16-byte aligned
+ andcc %l0,2,%g0
+ bz,pn %icc,.range0 ! branch if > 2^19 pi
+! delay slot
+ fcmple32 %f30,%f8,%l0
+
+.check1:
+ andcc %l1,2,%g0
+ bz,pn %icc,.range1 ! branch if > 2^19 pi
+! delay slot
+ fcmple32 %f32,%f8,%l1
+
+.check2:
+ andcc %l2,2,%g0
+ bz,pn %icc,.range2 ! branch if > 2^19 pi
+! delay slot
+ fcmple32 %f34,%f8,%l2
+
+.check3:
+ andcc %l3,2,%g0
+ bz,pn %icc,.range3 ! branch if > 2^19 pi
+! delay slot
+ fcmple32 %f36,%f8,%l3
+
+.checkprimary:
+ fsmuld %f0,%f0,%f30
+ fstod %f0,%f0
+
+ fsmuld %f2,%f2,%f32
+ fstod %f2,%f2
+ and %l0,%l1,%o4
+
+ fsmuld %f4,%f4,%f34
+ fstod %f4,%f4
+
+ fsmuld %f6,%f6,%f36
+ fstod %f6,%f6
+ and %l2,%l3,%o5
+
+ fmuld %f30,%f54,%f10
+ and %o4,%o5,%o5
+
+ fmuld %f32,%f54,%f12
+ andcc %o5,2,%g0
+ bz,pn %icc,.medium ! branch if any argument is > pi/4
+! delay slot
+ nop
+
+ fmuld %f34,%f54,%f14
+
+ fmuld %f36,%f54,%f16
+
+ fmuld %f30,%f48,%f20
+ faddd %f10,%f52,%f10
+
+ fmuld %f32,%f48,%f22
+ faddd %f12,%f52,%f12
+
+ fmuld %f34,%f48,%f24
+ faddd %f14,%f52,%f14
+
+ fmuld %f36,%f48,%f26
+ faddd %f16,%f52,%f16
+
+ fmuld %f30,%f10,%f10
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f12,%f12
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f14,%f14
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f16,%f16
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f30,%f30
+ faddd %f10,%f50,%f10
+
+ fmuld %f32,%f32,%f32
+ faddd %f12,%f50,%f12
+
+ fmuld %f34,%f34,%f34
+ faddd %f14,%f50,%f14
+
+ fmuld %f36,%f36,%f36
+ faddd %f16,%f50,%f16
+
+ fmuld %f30,%f10,%f10
+
+ fmuld %f32,%f12,%f12
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f16,%f16
+
+ faddd %f10,%f20,%f10
+
+ faddd %f12,%f22,%f12
+
+ faddd %f14,%f24,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ faddd %f16,%f26,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+
+ .align 16
+.medium:
+ fmuld %f0,%f56,%f10
+
+ fmuld %f2,%f56,%f12
+
+ fmuld %f4,%f56,%f14
+
+ fmuld %f6,%f56,%f16
+
+ faddd %f10,%f58,%f10
+ st %f11,[%fp+n0]
+
+ faddd %f12,%f58,%f12
+ st %f13,[%fp+n1]
+
+ faddd %f14,%f58,%f14
+ st %f15,[%fp+n2]
+
+ faddd %f16,%f58,%f16
+ st %f17,[%fp+n3]
+
+ fsubd %f10,%f58,%f10
+
+ fsubd %f12,%f58,%f12
+
+ fsubd %f14,%f58,%f14
+
+ fsubd %f16,%f58,%f16
+
+ fmuld %f10,%f60,%f20
+ ld [%fp+n0],%l0
+
+ fmuld %f12,%f60,%f22
+ ld [%fp+n1],%l1
+
+ fmuld %f14,%f60,%f24
+ ld [%fp+n2],%l2
+
+ fmuld %f16,%f60,%f26
+ ld [%fp+n3],%l3
+
+ fsubd %f0,%f20,%f0
+ fmuld %f10,%f62,%f30
+ add %l0,1,%l0
+
+ fsubd %f2,%f22,%f2
+ fmuld %f12,%f62,%f32
+ add %l1,1,%l1
+
+ fsubd %f4,%f24,%f4
+ fmuld %f14,%f62,%f34
+ add %l2,1,%l2
+
+ fsubd %f6,%f26,%f6
+ fmuld %f16,%f62,%f36
+ add %l3,1,%l3
+
+ fsubd %f0,%f30,%f0
+
+ fsubd %f2,%f32,%f2
+
+ fsubd %f4,%f34,%f4
+
+ fsubd %f6,%f36,%f6
+ andcc %l0,1,%g0
+
+ fmuld %f0,%f0,%f30
+ bz,pn %icc,.case8
+! delay slot
+ andcc %l1,1,%g0
+
+ fmuld %f2,%f2,%f32
+ bz,pn %icc,.case4
+! delay slot
+ andcc %l2,1,%g0
+
+ fmuld %f4,%f4,%f34
+ bz,pn %icc,.case2
+! delay slot
+ andcc %l3,1,%g0
+
+ fmuld %f6,%f6,%f36
+ bz,pn %icc,.case1
+! delay slot
+ nop
+
+!.case0:
+ fmuld %f30,%f54,%f10 ! cos(x0)
+ fzero %f0
+
+ fmuld %f32,%f54,%f12 ! cos(x1)
+ fzero %f2
+
+ fmuld %f34,%f54,%f14 ! cos(x2)
+ fzero %f4
+
+ fmuld %f36,%f54,%f16 ! cos(x3)
+ fzero %f6
+
+ fmuld %f30,%f48,%f20
+ faddd %f10,%f52,%f10
+
+ fmuld %f32,%f48,%f22
+ faddd %f12,%f52,%f12
+
+ fmuld %f34,%f48,%f24
+ faddd %f14,%f52,%f14
+
+ fmuld %f36,%f48,%f26
+ faddd %f16,%f52,%f16
+
+ fmuld %f30,%f10,%f10
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f12,%f12
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f14,%f14
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f16,%f16
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f30,%f30
+ faddd %f10,%f50,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f32,%f32
+ faddd %f12,%f50,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f34,%f34
+ faddd %f14,%f50,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f36,%f36
+ faddd %f16,%f50,%f16
+ and %l3,2,%o5
+
+ fmuld %f30,%f10,%f10
+ fmovrdnz %g1,%f28,%f0
+
+ fmuld %f32,%f12,%f12
+ fmovrdnz %g5,%f28,%f2
+
+ fmuld %f34,%f14,%f14
+ fmovrdnz %o4,%f28,%f4
+
+ fmuld %f36,%f16,%f16
+ fmovrdnz %o5,%f28,%f6
+
+ faddd %f10,%f20,%f10
+
+ faddd %f12,%f22,%f12
+
+ faddd %f14,%f24,%f14
+
+ faddd %f16,%f26,%f16
+
+ fxor %f10,%f0,%f10
+
+ fxor %f12,%f2,%f12
+
+ fxor %f14,%f4,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f6,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case1:
+ fmuld %f30,%f54,%f10 ! cos(x0)
+ fzero %f0
+
+ fmuld %f32,%f54,%f12 ! cos(x1)
+ fzero %f2
+
+ fmuld %f34,%f54,%f14 ! cos(x2)
+ fzero %f4
+
+ fmuld %f36,%f44,%f16 ! sin(x3)
+
+ fmuld %f30,%f48,%f20
+ faddd %f10,%f52,%f10
+
+ fmuld %f32,%f48,%f22
+ faddd %f12,%f52,%f12
+
+ fmuld %f34,%f48,%f24
+ faddd %f14,%f52,%f14
+
+ fmuld %f36,%f40,%f26
+ faddd %f16,%f42,%f16
+
+ fmuld %f30,%f10,%f10
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f12,%f12
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f14,%f14
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f36,%f36
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f30,%f30
+ faddd %f10,%f50,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f32,%f32
+ faddd %f12,%f50,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f34,%f34
+ faddd %f14,%f50,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f16,%f16
+ fzero %f36
+
+ fmuld %f30,%f10,%f10
+ fmovrdnz %g1,%f28,%f0
+
+ fmuld %f32,%f12,%f12
+ fmovrdnz %g5,%f28,%f2
+
+ fmuld %f34,%f14,%f14
+ fmovrdnz %o4,%f28,%f4
+
+ faddd %f16,%f26,%f16
+ and %l3,2,%o5
+
+ faddd %f10,%f20,%f10
+
+ faddd %f12,%f22,%f12
+
+ faddd %f14,%f24,%f14
+
+ fmuld %f6,%f16,%f16
+ fmovrdnz %o5,%f28,%f36
+
+ fxor %f10,%f0,%f10
+
+ fxor %f12,%f2,%f12
+
+ fxor %f14,%f4,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f36,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case2:
+ fmuld %f6,%f6,%f36
+ bz,pn %icc,.case3
+! delay slot
+ nop
+
+ fmuld %f30,%f54,%f10 ! cos(x0)
+ fzero %f0
+
+ fmuld %f32,%f54,%f12 ! cos(x1)
+ fzero %f2
+
+ fmuld %f34,%f44,%f14 ! sin(x2)
+
+ fmuld %f36,%f54,%f16 ! cos(x3)
+ fzero %f6
+
+ fmuld %f30,%f48,%f20
+ faddd %f10,%f52,%f10
+
+ fmuld %f32,%f48,%f22
+ faddd %f12,%f52,%f12
+
+ fmuld %f34,%f40,%f24
+ faddd %f14,%f42,%f14
+
+ fmuld %f36,%f48,%f26
+ faddd %f16,%f52,%f16
+
+ fmuld %f30,%f10,%f10
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f12,%f12
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f34,%f34
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f16,%f16
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f30,%f30
+ faddd %f10,%f50,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f32,%f32
+ faddd %f12,%f50,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f14,%f14
+ fzero %f34
+
+ fmuld %f36,%f36,%f36
+ faddd %f16,%f50,%f16
+ and %l3,2,%o5
+
+ fmuld %f30,%f10,%f10
+ fmovrdnz %g1,%f28,%f0
+
+ fmuld %f32,%f12,%f12
+ fmovrdnz %g5,%f28,%f2
+
+ faddd %f14,%f24,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f16,%f16
+ fmovrdnz %o5,%f28,%f6
+
+ faddd %f10,%f20,%f10
+
+ faddd %f12,%f22,%f12
+
+ fmuld %f4,%f14,%f14
+ fmovrdnz %o4,%f28,%f34
+
+ faddd %f16,%f26,%f16
+
+ fxor %f10,%f0,%f10
+
+ fxor %f12,%f2,%f12
+
+ fxor %f14,%f34,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f6,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case3:
+ fmuld %f30,%f54,%f10 ! cos(x0)
+ fzero %f0
+
+ fmuld %f32,%f54,%f12 ! cos(x1)
+ fzero %f2
+
+ fmuld %f34,%f44,%f14 ! sin(x2)
+
+ fmuld %f36,%f44,%f16 ! sin(x3)
+
+ fmuld %f30,%f48,%f20
+ faddd %f10,%f52,%f10
+
+ fmuld %f32,%f48,%f22
+ faddd %f12,%f52,%f12
+
+ fmuld %f34,%f40,%f24
+ faddd %f14,%f42,%f14
+
+ fmuld %f36,%f40,%f26
+ faddd %f16,%f42,%f16
+
+ fmuld %f30,%f10,%f10
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f12,%f12
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f34,%f34
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f36,%f36
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f30,%f30
+ faddd %f10,%f50,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f32,%f32
+ faddd %f12,%f50,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f14,%f14
+ fzero %f34
+
+ fmuld %f36,%f16,%f16
+ fzero %f36
+
+ fmuld %f30,%f10,%f10
+ fmovrdnz %g1,%f28,%f0
+
+ fmuld %f32,%f12,%f12
+ fmovrdnz %g5,%f28,%f2
+
+ faddd %f14,%f24,%f14
+ and %l2,2,%o4
+
+ faddd %f16,%f26,%f16
+ and %l3,2,%o5
+
+ faddd %f10,%f20,%f10
+
+ faddd %f12,%f22,%f12
+
+ fmuld %f4,%f14,%f14
+ fmovrdnz %o4,%f28,%f34
+
+ fmuld %f6,%f16,%f16
+ fmovrdnz %o5,%f28,%f36
+
+ fxor %f10,%f0,%f10
+
+ fxor %f12,%f2,%f12
+
+ fxor %f14,%f34,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f36,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case4:
+ fmuld %f4,%f4,%f34
+ bz,pn %icc,.case6
+! delay slot
+ andcc %l3,1,%g0
+
+ fmuld %f6,%f6,%f36
+ bz,pn %icc,.case5
+! delay slot
+ nop
+
+ fmuld %f30,%f54,%f10 ! cos(x0)
+ fzero %f0
+
+ fmuld %f32,%f44,%f12 ! sin(x1)
+
+ fmuld %f34,%f54,%f14 ! cos(x2)
+ fzero %f4
+
+ fmuld %f36,%f54,%f16 ! cos(x3)
+ fzero %f6
+
+ fmuld %f30,%f48,%f20
+ faddd %f10,%f52,%f10
+
+ fmuld %f32,%f40,%f22
+ faddd %f12,%f42,%f12
+
+ fmuld %f34,%f48,%f24
+ faddd %f14,%f52,%f14
+
+ fmuld %f36,%f48,%f26
+ faddd %f16,%f52,%f16
+
+ fmuld %f30,%f10,%f10
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f32,%f32
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f14,%f14
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f16,%f16
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f30,%f30
+ faddd %f10,%f50,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f12,%f12
+ fzero %f32
+
+ fmuld %f34,%f34,%f34
+ faddd %f14,%f50,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f36,%f36
+ faddd %f16,%f50,%f16
+ and %l3,2,%o5
+
+ fmuld %f30,%f10,%f10
+ fmovrdnz %g1,%f28,%f0
+
+ faddd %f12,%f22,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f14,%f14
+ fmovrdnz %o4,%f28,%f4
+
+ fmuld %f36,%f16,%f16
+ fmovrdnz %o5,%f28,%f6
+
+ faddd %f10,%f20,%f10
+
+ fmuld %f2,%f12,%f12
+ fmovrdnz %g5,%f28,%f32
+
+ faddd %f14,%f24,%f14
+
+ faddd %f16,%f26,%f16
+
+ fxor %f10,%f0,%f10
+
+ fxor %f12,%f32,%f12
+
+ fxor %f14,%f4,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f6,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case5:
+ fmuld %f30,%f54,%f10 ! cos(x0)
+ fzero %f0
+
+ fmuld %f32,%f44,%f12 ! sin(x1)
+
+ fmuld %f34,%f54,%f14 ! cos(x2)
+ fzero %f4
+
+ fmuld %f36,%f44,%f16 ! sin(x3)
+
+ fmuld %f30,%f48,%f20
+ faddd %f10,%f52,%f10
+
+ fmuld %f32,%f40,%f22
+ faddd %f12,%f42,%f12
+
+ fmuld %f34,%f48,%f24
+ faddd %f14,%f52,%f14
+
+ fmuld %f36,%f40,%f26
+ faddd %f16,%f42,%f16
+
+ fmuld %f30,%f10,%f10
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f32,%f32
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f14,%f14
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f36,%f36
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f30,%f30
+ faddd %f10,%f50,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f12,%f12
+ fzero %f32
+
+ fmuld %f34,%f34,%f34
+ faddd %f14,%f50,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f16,%f16
+ fzero %f36
+
+ fmuld %f30,%f10,%f10
+ fmovrdnz %g1,%f28,%f0
+
+ faddd %f12,%f22,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f14,%f14
+ fmovrdnz %o4,%f28,%f4
+
+ faddd %f16,%f26,%f16
+ and %l3,2,%o5
+
+ faddd %f10,%f20,%f10
+
+ fmuld %f2,%f12,%f12
+ fmovrdnz %g5,%f28,%f32
+
+ faddd %f14,%f24,%f14
+
+ fmuld %f6,%f16,%f16
+ fmovrdnz %o5,%f28,%f36
+
+ fxor %f10,%f0,%f10
+
+ fxor %f12,%f32,%f12
+
+ fxor %f14,%f4,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f36,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case6:
+ fmuld %f6,%f6,%f36
+ bz,pn %icc,.case7
+! delay slot
+ nop
+
+ fmuld %f30,%f54,%f10 ! cos(x0)
+ fzero %f0
+
+ fmuld %f32,%f44,%f12 ! sin(x1)
+
+ fmuld %f34,%f44,%f14 ! sin(x2)
+
+ fmuld %f36,%f54,%f16 ! cos(x3)
+ fzero %f6
+
+ fmuld %f30,%f48,%f20
+ faddd %f10,%f52,%f10
+
+ fmuld %f32,%f40,%f22
+ faddd %f12,%f42,%f12
+
+ fmuld %f34,%f40,%f24
+ faddd %f14,%f42,%f14
+
+ fmuld %f36,%f48,%f26
+ faddd %f16,%f52,%f16
+
+ fmuld %f30,%f10,%f10
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f32,%f32
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f34,%f34
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f16,%f16
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f30,%f30
+ faddd %f10,%f50,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f12,%f12
+ fzero %f32
+
+ fmuld %f34,%f14,%f14
+ fzero %f34
+
+ fmuld %f36,%f36,%f36
+ faddd %f16,%f50,%f16
+ and %l3,2,%o5
+
+ fmuld %f30,%f10,%f10
+ fmovrdnz %g1,%f28,%f0
+
+ faddd %f12,%f22,%f12
+ and %l1,2,%g5
+
+ faddd %f14,%f24,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f16,%f16
+ fmovrdnz %o5,%f28,%f6
+
+ faddd %f10,%f20,%f10
+
+ fmuld %f2,%f12,%f12
+ fmovrdnz %g5,%f28,%f32
+
+ fmuld %f4,%f14,%f14
+ fmovrdnz %o4,%f28,%f34
+
+ faddd %f16,%f26,%f16
+
+ fxor %f10,%f0,%f10
+
+ fxor %f12,%f32,%f12
+
+ fxor %f14,%f34,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f6,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case7:
+ fmuld %f30,%f54,%f10 ! cos(x0)
+ fzero %f0
+
+ fmuld %f32,%f44,%f12 ! sin(x1)
+
+ fmuld %f34,%f44,%f14 ! sin(x2)
+
+ fmuld %f36,%f44,%f16 ! sin(x3)
+
+ fmuld %f30,%f48,%f20
+ faddd %f10,%f52,%f10
+
+ fmuld %f32,%f40,%f22
+ faddd %f12,%f42,%f12
+
+ fmuld %f34,%f40,%f24
+ faddd %f14,%f42,%f14
+
+ fmuld %f36,%f40,%f26
+ faddd %f16,%f42,%f16
+
+ fmuld %f30,%f10,%f10
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f32,%f32
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f34,%f34
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f36,%f36
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f30,%f30
+ faddd %f10,%f50,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f12,%f12
+ fzero %f32
+
+ fmuld %f34,%f14,%f14
+ fzero %f34
+
+ fmuld %f36,%f16,%f16
+ fzero %f36
+
+ fmuld %f30,%f10,%f10
+ fmovrdnz %g1,%f28,%f0
+
+ faddd %f12,%f22,%f12
+ and %l1,2,%g5
+
+ faddd %f14,%f24,%f14
+ and %l2,2,%o4
+
+ faddd %f16,%f26,%f16
+ and %l3,2,%o5
+
+ faddd %f10,%f20,%f10
+
+ fmuld %f2,%f12,%f12
+ fmovrdnz %g5,%f28,%f32
+
+ fmuld %f4,%f14,%f14
+ fmovrdnz %o4,%f28,%f34
+
+ fmuld %f6,%f16,%f16
+ fmovrdnz %o5,%f28,%f36
+
+ fxor %f10,%f0,%f10
+
+ fxor %f12,%f32,%f12
+
+ fxor %f14,%f34,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f36,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+
+ .align 16
+.case8:
+ fmuld %f2,%f2,%f32
+ bz,pn %icc,.case12
+! delay slot
+ andcc %l2,1,%g0
+
+ fmuld %f4,%f4,%f34
+ bz,pn %icc,.case10
+! delay slot
+ andcc %l3,1,%g0
+
+ fmuld %f6,%f6,%f36
+ bz,pn %icc,.case9
+! delay slot
+ nop
+
+ fmuld %f30,%f44,%f10 ! sin(x0)
+
+ fmuld %f32,%f54,%f12 ! cos(x1)
+ fzero %f2
+
+ fmuld %f34,%f54,%f14 ! cos(x2)
+ fzero %f4
+
+ fmuld %f36,%f54,%f16 ! cos(x3)
+ fzero %f6
+
+ fmuld %f30,%f40,%f20
+ faddd %f10,%f42,%f10
+
+ fmuld %f32,%f48,%f22
+ faddd %f12,%f52,%f12
+
+ fmuld %f34,%f48,%f24
+ faddd %f14,%f52,%f14
+
+ fmuld %f36,%f48,%f26
+ faddd %f16,%f52,%f16
+
+ fmuld %f30,%f30,%f30
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f12,%f12
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f14,%f14
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f16,%f16
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f10,%f10
+ fzero %f30
+
+ fmuld %f32,%f32,%f32
+ faddd %f12,%f50,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f34,%f34
+ faddd %f14,%f50,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f36,%f36
+ faddd %f16,%f50,%f16
+ and %l3,2,%o5
+
+ faddd %f10,%f20,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f12,%f12
+ fmovrdnz %g5,%f28,%f2
+
+ fmuld %f34,%f14,%f14
+ fmovrdnz %o4,%f28,%f4
+
+ fmuld %f36,%f16,%f16
+ fmovrdnz %o5,%f28,%f6
+
+ fmuld %f0,%f10,%f10
+ fmovrdnz %g1,%f28,%f30
+
+ faddd %f12,%f22,%f12
+
+ faddd %f14,%f24,%f14
+
+ faddd %f16,%f26,%f16
+
+ fxor %f10,%f30,%f10
+
+ fxor %f12,%f2,%f12
+
+ fxor %f14,%f4,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f6,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case9:
+ fmuld %f30,%f44,%f10 ! sin(x0)
+
+ fmuld %f32,%f54,%f12 ! cos(x1)
+ fzero %f2
+
+ fmuld %f34,%f54,%f14 ! cos(x2)
+ fzero %f4
+
+ fmuld %f36,%f44,%f16 ! sin(x3)
+
+ fmuld %f30,%f40,%f20
+ faddd %f10,%f42,%f10
+
+ fmuld %f32,%f48,%f22
+ faddd %f12,%f52,%f12
+
+ fmuld %f34,%f48,%f24
+ faddd %f14,%f52,%f14
+
+ fmuld %f36,%f40,%f26
+ faddd %f16,%f42,%f16
+
+ fmuld %f30,%f30,%f30
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f12,%f12
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f14,%f14
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f36,%f36
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f10,%f10
+ fzero %f30
+
+ fmuld %f32,%f32,%f32
+ faddd %f12,%f50,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f34,%f34
+ faddd %f14,%f50,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f16,%f16
+ fzero %f36
+
+ faddd %f10,%f20,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f12,%f12
+ fmovrdnz %g5,%f28,%f2
+
+ fmuld %f34,%f14,%f14
+ fmovrdnz %o4,%f28,%f4
+
+ faddd %f16,%f26,%f16
+ and %l3,2,%o5
+
+ fmuld %f0,%f10,%f10
+ fmovrdnz %g1,%f28,%f30
+
+ faddd %f12,%f22,%f12
+
+ faddd %f14,%f24,%f14
+
+ fmuld %f6,%f16,%f16
+ fmovrdnz %o5,%f28,%f36
+
+ fxor %f10,%f30,%f10
+
+ fxor %f12,%f2,%f12
+
+ fxor %f14,%f4,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f36,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case10:
+ fmuld %f6,%f6,%f36
+ bz,pn %icc,.case11
+! delay slot
+ nop
+
+ fmuld %f30,%f44,%f10 ! sin(x0)
+
+ fmuld %f32,%f54,%f12 ! cos(x1)
+ fzero %f2
+
+ fmuld %f34,%f44,%f14 ! sin(x2)
+
+ fmuld %f36,%f54,%f16 ! cos(x3)
+ fzero %f6
+
+ fmuld %f30,%f40,%f20
+ faddd %f10,%f42,%f10
+
+ fmuld %f32,%f48,%f22
+ faddd %f12,%f52,%f12
+
+ fmuld %f34,%f40,%f24
+ faddd %f14,%f42,%f14
+
+ fmuld %f36,%f48,%f26
+ faddd %f16,%f52,%f16
+
+ fmuld %f30,%f30,%f30
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f12,%f12
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f34,%f34
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f16,%f16
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f10,%f10
+ fzero %f30
+
+ fmuld %f32,%f32,%f32
+ faddd %f12,%f50,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f14,%f14
+ fzero %f34
+
+ fmuld %f36,%f36,%f36
+ faddd %f16,%f50,%f16
+ and %l3,2,%o5
+
+ faddd %f10,%f20,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f12,%f12
+ fmovrdnz %g5,%f28,%f2
+
+ faddd %f14,%f24,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f16,%f16
+ fmovrdnz %o5,%f28,%f6
+
+ fmuld %f0,%f10,%f10
+ fmovrdnz %g1,%f28,%f30
+
+ faddd %f12,%f22,%f12
+
+ fmuld %f4,%f14,%f14
+ fmovrdnz %o4,%f28,%f34
+
+ faddd %f16,%f26,%f16
+
+ fxor %f10,%f30,%f10
+
+ fxor %f12,%f2,%f12
+
+ fxor %f14,%f34,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f6,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case11:
+ fmuld %f30,%f44,%f10 ! sin(x0)
+
+ fmuld %f32,%f54,%f12 ! cos(x1)
+ fzero %f2
+
+ fmuld %f34,%f44,%f14 ! sin(x2)
+
+ fmuld %f36,%f44,%f16 ! sin(x3)
+
+ fmuld %f30,%f40,%f20
+ faddd %f10,%f42,%f10
+
+ fmuld %f32,%f48,%f22
+ faddd %f12,%f52,%f12
+
+ fmuld %f34,%f40,%f24
+ faddd %f14,%f42,%f14
+
+ fmuld %f36,%f40,%f26
+ faddd %f16,%f42,%f16
+
+ fmuld %f30,%f30,%f30
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f12,%f12
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f34,%f34
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f36,%f36
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f10,%f10
+ fzero %f30
+
+ fmuld %f32,%f32,%f32
+ faddd %f12,%f50,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f14,%f14
+ fzero %f34
+
+ fmuld %f36,%f16,%f16
+ fzero %f36
+
+ faddd %f10,%f20,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f12,%f12
+ fmovrdnz %g5,%f28,%f2
+
+ faddd %f14,%f24,%f14
+ and %l2,2,%o4
+
+ faddd %f16,%f26,%f16
+ and %l3,2,%o5
+
+ fmuld %f0,%f10,%f10
+ fmovrdnz %g1,%f28,%f30
+
+ faddd %f12,%f22,%f12
+
+ fmuld %f4,%f14,%f14
+ fmovrdnz %o4,%f28,%f34
+
+ fmuld %f6,%f16,%f16
+ fmovrdnz %o5,%f28,%f36
+
+ fxor %f10,%f30,%f10
+
+ fxor %f12,%f2,%f12
+
+ fxor %f14,%f34,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f36,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case12:
+ fmuld %f4,%f4,%f34
+ bz,pn %icc,.case14
+! delay slot
+ andcc %l3,1,%g0
+
+ fmuld %f6,%f6,%f36
+ bz,pn %icc,.case13
+! delay slot
+ nop
+
+ fmuld %f30,%f44,%f10 ! sin(x0)
+
+ fmuld %f32,%f44,%f12 ! sin(x1)
+
+ fmuld %f34,%f54,%f14 ! cos(x2)
+ fzero %f4
+
+ fmuld %f36,%f54,%f16 ! cos(x3)
+ fzero %f6
+
+ fmuld %f30,%f40,%f20
+ faddd %f10,%f42,%f10
+
+ fmuld %f32,%f40,%f22
+ faddd %f12,%f42,%f12
+
+ fmuld %f34,%f48,%f24
+ faddd %f14,%f52,%f14
+
+ fmuld %f36,%f48,%f26
+ faddd %f16,%f52,%f16
+
+ fmuld %f30,%f30,%f30
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f32,%f32
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f14,%f14
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f16,%f16
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f10,%f10
+ fzero %f30
+
+ fmuld %f32,%f12,%f12
+ fzero %f32
+
+ fmuld %f34,%f34,%f34
+ faddd %f14,%f50,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f36,%f36
+ faddd %f16,%f50,%f16
+ and %l3,2,%o5
+
+ faddd %f10,%f20,%f10
+ and %l0,2,%g1
+
+ faddd %f12,%f22,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f14,%f14
+ fmovrdnz %o4,%f28,%f4
+
+ fmuld %f36,%f16,%f16
+ fmovrdnz %o5,%f28,%f6
+
+ fmuld %f0,%f10,%f10
+ fmovrdnz %g1,%f28,%f30
+
+ fmuld %f2,%f12,%f12
+ fmovrdnz %g5,%f28,%f32
+
+ faddd %f14,%f24,%f14
+
+ faddd %f16,%f26,%f16
+
+ fxor %f10,%f30,%f10
+
+ fxor %f12,%f32,%f12
+
+ fxor %f14,%f4,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f6,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case13:
+ fmuld %f30,%f44,%f10 ! sin(x0)
+
+ fmuld %f32,%f44,%f12 ! sin(x1)
+
+ fmuld %f34,%f54,%f14 ! cos(x2)
+ fzero %f4
+
+ fmuld %f36,%f44,%f16 ! sin(x3)
+
+ fmuld %f30,%f40,%f20
+ faddd %f10,%f42,%f10
+
+ fmuld %f32,%f40,%f22
+ faddd %f12,%f42,%f12
+
+ fmuld %f34,%f48,%f24
+ faddd %f14,%f52,%f14
+
+ fmuld %f36,%f40,%f26
+ faddd %f16,%f42,%f16
+
+ fmuld %f30,%f30,%f30
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f32,%f32
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f14,%f14
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f36,%f36
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f10,%f10
+ fzero %f30
+
+ fmuld %f32,%f12,%f12
+ fzero %f32
+
+ fmuld %f34,%f34,%f34
+ faddd %f14,%f50,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f16,%f16
+ fzero %f36
+
+ faddd %f10,%f20,%f10
+ and %l0,2,%g1
+
+ faddd %f12,%f22,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f14,%f14
+ fmovrdnz %o4,%f28,%f4
+
+ faddd %f16,%f26,%f16
+ and %l3,2,%o5
+
+ fmuld %f0,%f10,%f10
+ fmovrdnz %g1,%f28,%f30
+
+ fmuld %f2,%f12,%f12
+ fmovrdnz %g5,%f28,%f32
+
+ faddd %f14,%f24,%f14
+
+ fmuld %f6,%f16,%f16
+ fmovrdnz %o5,%f28,%f36
+
+ fxor %f10,%f30,%f10
+
+ fxor %f12,%f32,%f12
+
+ fxor %f14,%f4,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f36,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case14:
+ fmuld %f6,%f6,%f36
+ bz,pn %icc,.case15
+! delay slot
+ nop
+
+ fmuld %f30,%f44,%f10 ! sin(x0)
+
+ fmuld %f32,%f44,%f12 ! sin(x1)
+
+ fmuld %f34,%f44,%f14 ! sin(x2)
+
+ fmuld %f36,%f54,%f16 ! cos(x3)
+ fzero %f6
+
+ fmuld %f30,%f40,%f20
+ faddd %f10,%f42,%f10
+
+ fmuld %f32,%f40,%f22
+ faddd %f12,%f42,%f12
+
+ fmuld %f34,%f40,%f24
+ faddd %f14,%f42,%f14
+
+ fmuld %f36,%f48,%f26
+ faddd %f16,%f52,%f16
+
+ fmuld %f30,%f30,%f30
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f32,%f32
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f34,%f34
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f16,%f16
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f10,%f10
+ fzero %f30
+
+ fmuld %f32,%f12,%f12
+ fzero %f32
+
+ fmuld %f34,%f14,%f14
+ fzero %f34
+
+ fmuld %f36,%f36,%f36
+ faddd %f16,%f50,%f16
+ and %l3,2,%o5
+
+ faddd %f10,%f20,%f10
+ and %l0,2,%g1
+
+ faddd %f12,%f22,%f12
+ and %l1,2,%g5
+
+ faddd %f14,%f24,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f16,%f16
+ fmovrdnz %o5,%f28,%f6
+
+ fmuld %f0,%f10,%f10
+ fmovrdnz %g1,%f28,%f30
+
+ fmuld %f2,%f12,%f12
+ fmovrdnz %g5,%f28,%f32
+
+ fmuld %f4,%f14,%f14
+ fmovrdnz %o4,%f28,%f34
+
+ faddd %f16,%f26,%f16
+
+ fxor %f10,%f30,%f10
+
+ fxor %f12,%f32,%f12
+
+ fxor %f14,%f34,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f6,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case15:
+ fmuld %f30,%f44,%f10 ! sin(x0)
+
+ fmuld %f32,%f44,%f12 ! sin(x1)
+
+ fmuld %f34,%f44,%f14 ! sin(x2)
+
+ fmuld %f36,%f44,%f16 ! sin(x3)
+
+ fmuld %f30,%f40,%f20
+ faddd %f10,%f42,%f10
+
+ fmuld %f32,%f40,%f22
+ faddd %f12,%f42,%f12
+
+ fmuld %f34,%f40,%f24
+ faddd %f14,%f42,%f14
+
+ fmuld %f36,%f40,%f26
+ faddd %f16,%f42,%f16
+
+ fmuld %f30,%f30,%f30
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f32,%f32
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f34,%f34
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f36,%f36
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f10,%f10
+ fzero %f30
+
+ fmuld %f32,%f12,%f12
+ fzero %f32
+
+ fmuld %f34,%f14,%f14
+ fzero %f34
+
+ fmuld %f36,%f16,%f16
+ fzero %f36
+
+ faddd %f10,%f20,%f10
+ and %l0,2,%g1
+
+ faddd %f12,%f22,%f12
+ and %l1,2,%g5
+
+ faddd %f14,%f24,%f14
+ and %l2,2,%o4
+
+ faddd %f16,%f26,%f16
+ and %l3,2,%o5
+
+ fmuld %f0,%f10,%f10
+ fmovrdnz %g1,%f28,%f30
+
+ fmuld %f2,%f12,%f12
+ fmovrdnz %g5,%f28,%f32
+
+ fmuld %f4,%f14,%f14
+ fmovrdnz %o4,%f28,%f34
+
+ fmuld %f6,%f16,%f16
+ fmovrdnz %o5,%f28,%f36
+
+ fxor %f10,%f30,%f10
+
+ fxor %f12,%f32,%f12
+
+ fxor %f14,%f34,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f36,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+
+ .align 32
+.end:
+ fdtos %f10,%f10
+ st %f10,[%o0]
+ fdtos %f12,%f12
+ st %f12,[%o1]
+ fdtos %f14,%f14
+ st %f14,[%o2]
+ fdtos %f16,%f16
+ tst %i5 ! check for huge arguments remaining
+ be,pt %icc,.exit
+! delay slot
+ st %f16,[%o3]
+#ifdef __sparcv9
+ ldx [%fp+xsave],%o1
+ ldx [%fp+ysave],%o3
+#else
+ ld [%fp+xsave],%o1
+ ld [%fp+ysave],%o3
+#endif
+ ld [%fp+nsave],%o0
+ ld [%fp+sxsave],%o2
+ ld [%fp+sysave],%o4
+ sra %o2,0,%o2 ! sign-extend for V9
+ call __vlibm_vcos_bigf
+ sra %o4,0,%o4 ! delay slot
+
+.exit:
+ ret
+ restore
+
+
+ .align 32
+.last1:
+ fdtos %f12,%f12
+ st %f12,[%o1]
+ fzeros %f2
+ add %fp,junk,%o1
+.last2:
+ fdtos %f14,%f14
+ st %f14,[%o2]
+ fzeros %f4
+ add %fp,junk,%o2
+.last3:
+ fdtos %f16,%f16
+ st %f16,[%o3]
+ fzeros %f6
+ ba,pt %icc,.cont
+! delay slot
+ add %fp,junk,%o3
+
+
+ .align 16
+.range0:
+ fcmpgt32 %f38,%f30,%l0
+ andcc %l0,2,%g0
+ bnz,a,pt %icc,1f ! branch if finite
+! delay slot, squashed if branch not taken
+ mov 1,%i5 ! set biguns
+ fzeros %f1
+ fmuls %f0,%f1,%f0
+ st %f0,[%o0]
+1:
+ addcc %i0,-1,%i0
+ ble,pn %icc,1f
+! delay slot
+ nop
+ ld [%i1],%f0
+ add %i1,%i2,%i1
+ mov %i3,%o0
+ add %i3,%i4,%i3
+ fabsd %f0,%f30
+ fcmple32 %f30,%f18,%l0
+ andcc %l0,2,%g0
+ bz,pn %icc,.range0
+! delay slot
+ nop
+ ba,pt %icc,.check1
+! delay slot
+ fcmple32 %f30,%f8,%l0
+1:
+ fzero %f0 ! set up dummy argument
+ add %fp,junk,%o0
+ mov 2,%l0
+ ba,pt %icc,.check1
+! delay slot
+ fzero %f30
+
+
+ .align 16
+.range1:
+ fcmpgt32 %f38,%f32,%l1
+ andcc %l1,2,%g0
+ bnz,a,pt %icc,1f ! branch if finite
+! delay slot, squashed if branch not taken
+ mov 1,%i5 ! set biguns
+ fzeros %f3
+ fmuls %f2,%f3,%f2
+ st %f2,[%o1]
+1:
+ addcc %i0,-1,%i0
+ ble,pn %icc,1f
+! delay slot
+ nop
+ ld [%i1],%f2
+ add %i1,%i2,%i1
+ mov %i3,%o1
+ add %i3,%i4,%i3
+ fabsd %f2,%f32
+ fcmple32 %f32,%f18,%l1
+ andcc %l1,2,%g0
+ bz,pn %icc,.range1
+! delay slot
+ nop
+ ba,pt %icc,.check2
+! delay slot
+ fcmple32 %f32,%f8,%l1
+1:
+ fzero %f2 ! set up dummy argument
+ add %fp,junk,%o1
+ mov 2,%l1
+ ba,pt %icc,.check2
+! delay slot
+ fzero %f32
+
+
+ .align 16
+.range2:
+ fcmpgt32 %f38,%f34,%l2
+ andcc %l2,2,%g0
+ bnz,a,pt %icc,1f ! branch if finite
+! delay slot, squashed if branch not taken
+ mov 1,%i5 ! set biguns
+ fzeros %f5
+ fmuls %f4,%f5,%f4
+ st %f4,[%o2]
+1:
+ addcc %i0,-1,%i0
+ ble,pn %icc,1f
+! delay slot
+ nop
+ ld [%i1],%f4
+ add %i1,%i2,%i1
+ mov %i3,%o2
+ add %i3,%i4,%i3
+ fabsd %f4,%f34
+ fcmple32 %f34,%f18,%l2
+ andcc %l2,2,%g0
+ bz,pn %icc,.range2
+! delay slot
+ nop
+ ba,pt %icc,.check3
+! delay slot
+ fcmple32 %f34,%f8,%l2
+1:
+ fzero %f4 ! set up dummy argument
+ add %fp,junk,%o2
+ mov 2,%l2
+ ba,pt %icc,.check3
+! delay slot
+ fzero %f34
+
+
+ .align 16
+.range3:
+ fcmpgt32 %f38,%f36,%l3
+ andcc %l3,2,%g0
+ bnz,a,pt %icc,1f ! branch if finite
+! delay slot, squashed if branch not taken
+ mov 1,%i5 ! set biguns
+ fzeros %f7
+ fmuls %f6,%f7,%f6
+ st %f6,[%o3]
+1:
+ addcc %i0,-1,%i0
+ ble,pn %icc,1f
+! delay slot
+ nop
+ ld [%i1],%f6
+ add %i1,%i2,%i1
+ mov %i3,%o3
+ add %i3,%i4,%i3
+ fabsd %f6,%f36
+ fcmple32 %f36,%f18,%l3
+ andcc %l3,2,%g0
+ bz,pn %icc,.range3
+! delay slot
+ nop
+ ba,pt %icc,.checkprimary
+! delay slot
+ fcmple32 %f36,%f8,%l3
+1:
+ fzero %f6 ! set up dummy argument
+ add %fp,junk,%o3
+ mov 2,%l3
+ ba,pt %icc,.checkprimary
+! delay slot
+ fzero %f36
+
+ SET_SIZE(__vcosf)
+
diff --git a/usr/src/libm/src/mvec/vis/__vexp.S b/usr/src/libm/src/mvec/vis/__vexp.S
new file mode 100644
index 0000000..b5f6200
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vexp.S
@@ -0,0 +1,1281 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vexp.S 1.9 06/01/23 SMI"
+
+ .file "__vexp.S"
+
+#include "libm.h"
+
+ RO_DATA
+
+/********************************************************************
+ * vexp() algorithm is from mopt:f_exp.c. Basics are included here
+ * to supplement comments within this file. vexp() has been unrolled
+ * to a depth of 3. Only element 0 is documented.
+ *
+ * Note 1: INVLN2_256, LN2_256H, and LN2_256L were originally scaled by
+ * 2^44 to allow *2^k w/o shifting within the FP registers. These
+ * had to be removed for CHEETAH to avoid the fdtox of a very large
+ * number, which would trap to kernel (2^52).
+ *
+ * Let x = (k + j/256)ln2 + r
+ * then exp(x) = exp(ln2^(k+j/256)) * exp(r)
+ * = 2^k * 2^(j/256) * exp(r)
+ * where r is polynomial approximation
+ * exp(r) = 1 + r + r^2*B1 + r^3*B2 + r^4*B3
+ * = 1 + r*(1+r*(B1+r*(B2+r*B3)))
+ * let
+ * p = r*(1+r*(B1+r*(B2+r*B3))) ! notice, not quite exp(r)
+ * q = 2^(j/256) (high 64 bits)
+ * t = 2^(j/256) (extra precision) ! both from _TBL_exp_z[]
+ * then
+ * 2^(j/256) * exp(r) = (q+t)(1+p) ~ q + ( t + q*p )
+ * then actual computation is 2^k * ( q + ( t + q*p ) )
+ *
+ ********************************************************************/
+
+ .align 16
+TBL:
+ .word 0x3ff00000,0x00000000
+ .word 0x00000000,0x00000000
+ .word 0x3ff00b1a,0xfa5abcbf
+ .word 0xbc84f6b2,0xa7609f71
+ .word 0x3ff0163d,0xa9fb3335
+ .word 0x3c9b6129,0x9ab8cdb7
+ .word 0x3ff02168,0x143b0281
+ .word 0xbc82bf31,0x0fc54eb6
+ .word 0x3ff02c9a,0x3e778061
+ .word 0xbc719083,0x535b085d
+ .word 0x3ff037d4,0x2e11bbcc
+ .word 0x3c656811,0xeeade11a
+ .word 0x3ff04315,0xe86e7f85
+ .word 0xbc90a31c,0x1977c96e
+ .word 0x3ff04e5f,0x72f654b1
+ .word 0x3c84c379,0x3aa0d08c
+ .word 0x3ff059b0,0xd3158574
+ .word 0x3c8d73e2,0xa475b465
+ .word 0x3ff0650a,0x0e3c1f89
+ .word 0xbc95cb7b,0x5799c396
+ .word 0x3ff0706b,0x29ddf6de
+ .word 0xbc8c91df,0xe2b13c26
+ .word 0x3ff07bd4,0x2b72a836
+ .word 0x3c832334,0x54458700
+ .word 0x3ff08745,0x18759bc8
+ .word 0x3c6186be,0x4bb284ff
+ .word 0x3ff092bd,0xf66607e0
+ .word 0xbc968063,0x800a3fd1
+ .word 0x3ff09e3e,0xcac6f383
+ .word 0x3c914878,0x18316136
+ .word 0x3ff0a9c7,0x9b1f3919
+ .word 0x3c85d16c,0x873d1d38
+ .word 0x3ff0b558,0x6cf9890f
+ .word 0x3c98a62e,0x4adc610a
+ .word 0x3ff0c0f1,0x45e46c85
+ .word 0x3c94f989,0x06d21cef
+ .word 0x3ff0cc92,0x2b7247f7
+ .word 0x3c901edc,0x16e24f71
+ .word 0x3ff0d83b,0x23395dec
+ .word 0xbc9bc14d,0xe43f316a
+ .word 0x3ff0e3ec,0x32d3d1a2
+ .word 0x3c403a17,0x27c57b53
+ .word 0x3ff0efa5,0x5fdfa9c5
+ .word 0xbc949db9,0xbc54021b
+ .word 0x3ff0fb66,0xaffed31b
+ .word 0xbc6b9bed,0xc44ebd7b
+ .word 0x3ff10730,0x28d7233e
+ .word 0x3c8d46eb,0x1692fdd5
+ .word 0x3ff11301,0xd0125b51
+ .word 0xbc96c510,0x39449b3a
+ .word 0x3ff11edb,0xab5e2ab6
+ .word 0xbc9ca454,0xf703fb72
+ .word 0x3ff12abd,0xc06c31cc
+ .word 0xbc51b514,0xb36ca5c7
+ .word 0x3ff136a8,0x14f204ab
+ .word 0xbc67108f,0xba48dcf0
+ .word 0x3ff1429a,0xaea92de0
+ .word 0xbc932fbf,0x9af1369e
+ .word 0x3ff14e95,0x934f312e
+ .word 0xbc8b91e8,0x39bf44ab
+ .word 0x3ff15a98,0xc8a58e51
+ .word 0x3c82406a,0xb9eeab0a
+ .word 0x3ff166a4,0x5471c3c2
+ .word 0x3c58f23b,0x82ea1a32
+ .word 0x3ff172b8,0x3c7d517b
+ .word 0xbc819041,0xb9d78a76
+ .word 0x3ff17ed4,0x8695bbc0
+ .word 0x3c709e3f,0xe2ac5a64
+ .word 0x3ff18af9,0x388c8dea
+ .word 0xbc911023,0xd1970f6c
+ .word 0x3ff19726,0x58375d2f
+ .word 0x3c94aadd,0x85f17e08
+ .word 0x3ff1a35b,0xeb6fcb75
+ .word 0x3c8e5b4c,0x7b4968e4
+ .word 0x3ff1af99,0xf8138a1c
+ .word 0x3c97bf85,0xa4b69280
+ .word 0x3ff1bbe0,0x84045cd4
+ .word 0xbc995386,0x352ef607
+ .word 0x3ff1c82f,0x95281c6b
+ .word 0x3c900977,0x8010f8c9
+ .word 0x3ff1d487,0x3168b9aa
+ .word 0x3c9e016e,0x00a2643c
+ .word 0x3ff1e0e7,0x5eb44027
+ .word 0xbc96fdd8,0x088cb6de
+ .word 0x3ff1ed50,0x22fcd91d
+ .word 0xbc91df98,0x027bb78c
+ .word 0x3ff1f9c1,0x8438ce4d
+ .word 0xbc9bf524,0xa097af5c
+ .word 0x3ff2063b,0x88628cd6
+ .word 0x3c8dc775,0x814a8494
+ .word 0x3ff212be,0x3578a819
+ .word 0x3c93592d,0x2cfcaac9
+ .word 0x3ff21f49,0x917ddc96
+ .word 0x3c82a97e,0x9494a5ee
+ .word 0x3ff22bdd,0xa27912d1
+ .word 0x3c8d34fb,0x5577d69e
+ .word 0x3ff2387a,0x6e756238
+ .word 0x3c99b07e,0xb6c70573
+ .word 0x3ff2451f,0xfb82140a
+ .word 0x3c8acfcc,0x911ca996
+ .word 0x3ff251ce,0x4fb2a63f
+ .word 0x3c8ac155,0xbef4f4a4
+ .word 0x3ff25e85,0x711ece75
+ .word 0x3c93e1a2,0x4ac31b2c
+ .word 0x3ff26b45,0x65e27cdd
+ .word 0x3c82bd33,0x9940e9d9
+ .word 0x3ff2780e,0x341ddf29
+ .word 0x3c9e067c,0x05f9e76c
+ .word 0x3ff284df,0xe1f56381
+ .word 0xbc9a4c3a,0x8c3f0d7e
+ .word 0x3ff291ba,0x7591bb70
+ .word 0xbc82cc72,0x28401cbc
+ .word 0x3ff29e9d,0xf51fdee1
+ .word 0x3c8612e8,0xafad1255
+ .word 0x3ff2ab8a,0x66d10f13
+ .word 0xbc995743,0x191690a7
+ .word 0x3ff2b87f,0xd0dad990
+ .word 0xbc410adc,0xd6381aa4
+ .word 0x3ff2c57e,0x39771b2f
+ .word 0xbc950145,0xa6eb5124
+ .word 0x3ff2d285,0xa6e4030b
+ .word 0x3c900247,0x54db41d5
+ .word 0x3ff2df96,0x1f641589
+ .word 0x3c9d16cf,0xfbbce198
+ .word 0x3ff2ecaf,0xa93e2f56
+ .word 0x3c71ca0f,0x45d52383
+ .word 0x3ff2f9d2,0x4abd886b
+ .word 0xbc653c55,0x532bda93
+ .word 0x3ff306fe,0x0a31b715
+ .word 0x3c86f46a,0xd23182e4
+ .word 0x3ff31432,0xedeeb2fd
+ .word 0x3c8959a3,0xf3f3fcd0
+ .word 0x3ff32170,0xfc4cd831
+ .word 0x3c8a9ce7,0x8e18047c
+ .word 0x3ff32eb8,0x3ba8ea32
+ .word 0xbc9c45e8,0x3cb4f318
+ .word 0x3ff33c08,0xb26416ff
+ .word 0x3c932721,0x843659a6
+ .word 0x3ff34962,0x66e3fa2d
+ .word 0xbc835a75,0x930881a4
+ .word 0x3ff356c5,0x5f929ff1
+ .word 0xbc8b5cee,0x5c4e4628
+ .word 0x3ff36431,0xa2de883b
+ .word 0xbc8c3144,0xa06cb85e
+ .word 0x3ff371a7,0x373aa9cb
+ .word 0xbc963aea,0xbf42eae2
+ .word 0x3ff37f26,0x231e754a
+ .word 0xbc99f5ca,0x9eceb23c
+ .word 0x3ff38cae,0x6d05d866
+ .word 0xbc9e958d,0x3c9904bd
+ .word 0x3ff39a40,0x1b7140ef
+ .word 0xbc99a9a5,0xfc8e2934
+ .word 0x3ff3a7db,0x34e59ff7
+ .word 0xbc75e436,0xd661f5e3
+ .word 0x3ff3b57f,0xbfec6cf4
+ .word 0x3c954c66,0xe26fff18
+ .word 0x3ff3c32d,0xc313a8e5
+ .word 0xbc9efff8,0x375d29c3
+ .word 0x3ff3d0e5,0x44ede173
+ .word 0x3c7fe8d0,0x8c284c71
+ .word 0x3ff3dea6,0x4c123422
+ .word 0x3c8ada09,0x11f09ebc
+ .word 0x3ff3ec70,0xdf1c5175
+ .word 0xbc8af663,0x7b8c9bca
+ .word 0x3ff3fa45,0x04ac801c
+ .word 0xbc97d023,0xf956f9f3
+ .word 0x3ff40822,0xc367a024
+ .word 0x3c8bddf8,0xb6f4d048
+ .word 0x3ff4160a,0x21f72e2a
+ .word 0xbc5ef369,0x1c309278
+ .word 0x3ff423fb,0x2709468a
+ .word 0xbc98462d,0xc0b314dd
+ .word 0x3ff431f5,0xd950a897
+ .word 0xbc81c7dd,0xe35f7998
+ .word 0x3ff43ffa,0x3f84b9d4
+ .word 0x3c8880be,0x9704c002
+ .word 0x3ff44e08,0x6061892d
+ .word 0x3c489b7a,0x04ef80d0
+ .word 0x3ff45c20,0x42a7d232
+ .word 0xbc686419,0x82fb1f8e
+ .word 0x3ff46a41,0xed1d0057
+ .word 0x3c9c944b,0xd1648a76
+ .word 0x3ff4786d,0x668b3237
+ .word 0xbc9c20f0,0xed445733
+ .word 0x3ff486a2,0xb5c13cd0
+ .word 0x3c73c1a3,0xb69062f0
+ .word 0x3ff494e1,0xe192aed2
+ .word 0xbc83b289,0x5e499ea0
+ .word 0x3ff4a32a,0xf0d7d3de
+ .word 0x3c99cb62,0xf3d1be56
+ .word 0x3ff4b17d,0xea6db7d7
+ .word 0xbc8125b8,0x7f2897f0
+ .word 0x3ff4bfda,0xd5362a27
+ .word 0x3c7d4397,0xafec42e2
+ .word 0x3ff4ce41,0xb817c114
+ .word 0x3c905e29,0x690abd5d
+ .word 0x3ff4dcb2,0x99fddd0d
+ .word 0x3c98ecdb,0xbc6a7833
+ .word 0x3ff4eb2d,0x81d8abff
+ .word 0xbc95257d,0x2e5d7a52
+ .word 0x3ff4f9b2,0x769d2ca7
+ .word 0xbc94b309,0xd25957e3
+ .word 0x3ff50841,0x7f4531ee
+ .word 0x3c7a249b,0x49b7465f
+ .word 0x3ff516da,0xa2cf6642
+ .word 0xbc8f7685,0x69bd93ee
+ .word 0x3ff5257d,0xe83f4eef
+ .word 0xbc7c998d,0x43efef71
+ .word 0x3ff5342b,0x569d4f82
+ .word 0xbc807abe,0x1db13cac
+ .word 0x3ff542e2,0xf4f6ad27
+ .word 0x3c87926d,0x192d5f7e
+ .word 0x3ff551a4,0xca5d920f
+ .word 0xbc8d689c,0xefede59a
+ .word 0x3ff56070,0xdde910d2
+ .word 0xbc90fb6e,0x168eebf0
+ .word 0x3ff56f47,0x36b527da
+ .word 0x3c99bb2c,0x011d93ad
+ .word 0x3ff57e27,0xdbe2c4cf
+ .word 0xbc90b98c,0x8a57b9c4
+ .word 0x3ff58d12,0xd497c7fd
+ .word 0x3c8295e1,0x5b9a1de8
+ .word 0x3ff59c08,0x27ff07cc
+ .word 0xbc97e2ce,0xe467e60f
+ .word 0x3ff5ab07,0xdd485429
+ .word 0x3c96324c,0x054647ad
+ .word 0x3ff5ba11,0xfba87a03
+ .word 0xbc9b77a1,0x4c233e1a
+ .word 0x3ff5c926,0x8a5946b7
+ .word 0x3c3c4b1b,0x816986a2
+ .word 0x3ff5d845,0x90998b93
+ .word 0xbc9cd6a7,0xa8b45642
+ .word 0x3ff5e76f,0x15ad2148
+ .word 0x3c9ba6f9,0x3080e65e
+ .word 0x3ff5f6a3,0x20dceb71
+ .word 0xbc89eadd,0xe3cdcf92
+ .word 0x3ff605e1,0xb976dc09
+ .word 0xbc93e242,0x9b56de47
+ .word 0x3ff6152a,0xe6cdf6f4
+ .word 0x3c9e4b3e,0x4ab84c27
+ .word 0x3ff6247e,0xb03a5585
+ .word 0xbc9383c1,0x7e40b497
+ .word 0x3ff633dd,0x1d1929fd
+ .word 0x3c984710,0xbeb964e5
+ .word 0x3ff64346,0x34ccc320
+ .word 0xbc8c483c,0x759d8932
+ .word 0x3ff652b9,0xfebc8fb7
+ .word 0xbc9ae3d5,0xc9a73e08
+ .word 0x3ff66238,0x82552225
+ .word 0xbc9bb609,0x87591c34
+ .word 0x3ff671c1,0xc70833f6
+ .word 0xbc8e8732,0x586c6134
+ .word 0x3ff68155,0xd44ca973
+ .word 0x3c6038ae,0x44f73e65
+ .word 0x3ff690f4,0xb19e9538
+ .word 0x3c8804bd,0x9aeb445c
+ .word 0x3ff6a09e,0x667f3bcd
+ .word 0xbc9bdd34,0x13b26456
+ .word 0x3ff6b052,0xfa75173e
+ .word 0x3c7a38f5,0x2c9a9d0e
+ .word 0x3ff6c012,0x750bdabf
+ .word 0xbc728956,0x67ff0b0d
+ .word 0x3ff6cfdc,0xddd47645
+ .word 0x3c9c7aa9,0xb6f17309
+ .word 0x3ff6dfb2,0x3c651a2f
+ .word 0xbc6bbe3a,0x683c88ab
+ .word 0x3ff6ef92,0x98593ae5
+ .word 0xbc90b974,0x9e1ac8b2
+ .word 0x3ff6ff7d,0xf9519484
+ .word 0xbc883c0f,0x25860ef6
+ .word 0x3ff70f74,0x66f42e87
+ .word 0x3c59d644,0xd45aa65f
+ .word 0x3ff71f75,0xe8ec5f74
+ .word 0xbc816e47,0x86887a99
+ .word 0x3ff72f82,0x86ead08a
+ .word 0xbc920aa0,0x2cd62c72
+ .word 0x3ff73f9a,0x48a58174
+ .word 0xbc90a8d9,0x6c65d53c
+ .word 0x3ff74fbd,0x35d7cbfd
+ .word 0x3c9047fd,0x618a6e1c
+ .word 0x3ff75feb,0x564267c9
+ .word 0xbc902459,0x57316dd3
+ .word 0x3ff77024,0xb1ab6e09
+ .word 0x3c9b7877,0x169147f8
+ .word 0x3ff78069,0x4fde5d3f
+ .word 0x3c9866b8,0x0a02162c
+ .word 0x3ff790b9,0x38ac1cf6
+ .word 0x3c9349a8,0x62aadd3e
+ .word 0x3ff7a114,0x73eb0187
+ .word 0xbc841577,0xee04992f
+ .word 0x3ff7b17b,0x0976cfdb
+ .word 0xbc9bebb5,0x8468dc88
+ .word 0x3ff7c1ed,0x0130c132
+ .word 0x3c9f124c,0xd1164dd6
+ .word 0x3ff7d26a,0x62ff86f0
+ .word 0x3c91bddb,0xfb72b8b4
+ .word 0x3ff7e2f3,0x36cf4e62
+ .word 0x3c705d02,0xba15797e
+ .word 0x3ff7f387,0x8491c491
+ .word 0xbc807f11,0xcf9311ae
+ .word 0x3ff80427,0x543e1a12
+ .word 0xbc927c86,0x626d972b
+ .word 0x3ff814d2,0xadd106d9
+ .word 0x3c946437,0x0d151d4d
+ .word 0x3ff82589,0x994cce13
+ .word 0xbc9d4c1d,0xd41532d8
+ .word 0x3ff8364c,0x1eb941f7
+ .word 0x3c999b9a,0x31df2bd5
+ .word 0x3ff8471a,0x4623c7ad
+ .word 0xbc88d684,0xa341cdfb
+ .word 0x3ff857f4,0x179f5b21
+ .word 0xbc5ba748,0xf8b216d0
+ .word 0x3ff868d9,0x9b4492ec
+ .word 0x3ca01c83,0xb21584a3
+ .word 0x3ff879ca,0xd931a436
+ .word 0x3c85d2d7,0xd2db47bc
+ .word 0x3ff88ac7,0xd98a6699
+ .word 0x3c9994c2,0xf37cb53a
+ .word 0x3ff89bd0,0xa478580f
+ .word 0x3c9d5395,0x4475202a
+ .word 0x3ff8ace5,0x422aa0db
+ .word 0x3c96e9f1,0x56864b27
+ .word 0x3ff8be05,0xbad61778
+ .word 0x3c9ecb5e,0xfc43446e
+ .word 0x3ff8cf32,0x16b5448c
+ .word 0xbc70d55e,0x32e9e3aa
+ .word 0x3ff8e06a,0x5e0866d9
+ .word 0xbc97114a,0x6fc9b2e6
+ .word 0x3ff8f1ae,0x99157736
+ .word 0x3c85cc13,0xa2e3976c
+ .word 0x3ff902fe,0xd0282c8a
+ .word 0x3c9592ca,0x85fe3fd2
+ .word 0x3ff9145b,0x0b91ffc6
+ .word 0xbc9dd679,0x2e582524
+ .word 0x3ff925c3,0x53aa2fe2
+ .word 0xbc83455f,0xa639db7f
+ .word 0x3ff93737,0xb0cdc5e5
+ .word 0xbc675fc7,0x81b57ebc
+ .word 0x3ff948b8,0x2b5f98e5
+ .word 0xbc8dc3d6,0x797d2d99
+ .word 0x3ff95a44,0xcbc8520f
+ .word 0xbc764b7c,0x96a5f039
+ .word 0x3ff96bdd,0x9a7670b3
+ .word 0xbc5ba596,0x7f19c896
+ .word 0x3ff97d82,0x9fde4e50
+ .word 0xbc9d185b,0x7c1b85d0
+ .word 0x3ff98f33,0xe47a22a2
+ .word 0x3c7cabda,0xa24c78ed
+ .word 0x3ff9a0f1,0x70ca07ba
+ .word 0xbc9173bd,0x91cee632
+ .word 0x3ff9b2bb,0x4d53fe0d
+ .word 0xbc9dd84e,0x4df6d518
+ .word 0x3ff9c491,0x82a3f090
+ .word 0x3c7c7c46,0xb071f2be
+ .word 0x3ff9d674,0x194bb8d5
+ .word 0xbc9516be,0xa3dd8233
+ .word 0x3ff9e863,0x19e32323
+ .word 0x3c7824ca,0x78e64c6e
+ .word 0x3ff9fa5e,0x8d07f29e
+ .word 0xbc84a9ce,0xaaf1face
+ .word 0x3ffa0c66,0x7b5de565
+ .word 0xbc935949,0x5d1cd533
+ .word 0x3ffa1e7a,0xed8eb8bb
+ .word 0x3c9c6618,0xee8be70e
+ .word 0x3ffa309b,0xec4a2d33
+ .word 0x3c96305c,0x7ddc36ab
+ .word 0x3ffa42c9,0x80460ad8
+ .word 0xbc9aa780,0x589fb120
+ .word 0x3ffa5503,0xb23e255d
+ .word 0xbc9d2f6e,0xdb8d41e1
+ .word 0x3ffa674a,0x8af46052
+ .word 0x3c650f56,0x30670366
+ .word 0x3ffa799e,0x1330b358
+ .word 0x3c9bcb7e,0xcac563c6
+ .word 0x3ffa8bfe,0x53c12e59
+ .word 0xbc94f867,0xb2ba15a8
+ .word 0x3ffa9e6b,0x5579fdbf
+ .word 0x3c90fac9,0x0ef7fd31
+ .word 0x3ffab0e5,0x21356eba
+ .word 0x3c889c31,0xdae94544
+ .word 0x3ffac36b,0xbfd3f37a
+ .word 0xbc8f9234,0xcae76cd0
+ .word 0x3ffad5ff,0x3a3c2774
+ .word 0x3c97ef3b,0xb6b1b8e4
+ .word 0x3ffae89f,0x995ad3ad
+ .word 0x3c97a1cd,0x345dcc81
+ .word 0x3ffafb4c,0xe622f2ff
+ .word 0xbc94b2fc,0x0f315ecc
+ .word 0x3ffb0e07,0x298db666
+ .word 0xbc9bdef5,0x4c80e425
+ .word 0x3ffb20ce,0x6c9a8952
+ .word 0x3c94dd02,0x4a0756cc
+ .word 0x3ffb33a2,0xb84f15fb
+ .word 0xbc62805e,0x3084d708
+ .word 0x3ffb4684,0x15b749b1
+ .word 0xbc7f763d,0xe9df7c90
+ .word 0x3ffb5972,0x8de5593a
+ .word 0xbc9c71df,0xbbba6de3
+ .word 0x3ffb6c6e,0x29f1c52a
+ .word 0x3c92a8f3,0x52883f6e
+ .word 0x3ffb7f76,0xf2fb5e47
+ .word 0xbc75584f,0x7e54ac3b
+ .word 0x3ffb928c,0xf22749e4
+ .word 0xbc9b7216,0x54cb65c6
+ .word 0x3ffba5b0,0x30a1064a
+ .word 0xbc9efcd3,0x0e54292e
+ .word 0x3ffbb8e0,0xb79a6f1f
+ .word 0xbc3f52d1,0xc9696205
+ .word 0x3ffbcc1e,0x904bc1d2
+ .word 0x3c823dd0,0x7a2d9e84
+ .word 0x3ffbdf69,0xc3f3a207
+ .word 0xbc3c2623,0x60ea5b52
+ .word 0x3ffbf2c2,0x5bd71e09
+ .word 0xbc9efdca,0x3f6b9c73
+ .word 0x3ffc0628,0x6141b33d
+ .word 0xbc8d8a5a,0xa1fbca34
+ .word 0x3ffc199b,0xdd85529c
+ .word 0x3c811065,0x895048dd
+ .word 0x3ffc2d1c,0xd9fa652c
+ .word 0xbc96e516,0x17c8a5d7
+ .word 0x3ffc40ab,0x5fffd07a
+ .word 0x3c9b4537,0xe083c60a
+ .word 0x3ffc5447,0x78fafb22
+ .word 0x3c912f07,0x2493b5af
+ .word 0x3ffc67f1,0x2e57d14b
+ .word 0x3c92884d,0xff483cad
+ .word 0x3ffc7ba8,0x8988c933
+ .word 0xbc8e76bb,0xbe255559
+ .word 0x3ffc8f6d,0x9406e7b5
+ .word 0x3c71acbc,0x48805c44
+ .word 0x3ffca340,0x5751c4db
+ .word 0xbc87f2be,0xd10d08f4
+ .word 0x3ffcb720,0xdcef9069
+ .word 0x3c7503cb,0xd1e949db
+ .word 0x3ffccb0f,0x2e6d1675
+ .word 0xbc7d220f,0x86009093
+ .word 0x3ffcdf0b,0x555dc3fa
+ .word 0xbc8dd83b,0x53829d72
+ .word 0x3ffcf315,0x5b5bab74
+ .word 0xbc9a08e9,0xb86dff57
+ .word 0x3ffd072d,0x4a07897c
+ .word 0xbc9cbc37,0x43797a9c
+ .word 0x3ffd1b53,0x2b08c968
+ .word 0x3c955636,0x219a36ee
+ .word 0x3ffd2f87,0x080d89f2
+ .word 0xbc9d487b,0x719d8578
+ .word 0x3ffd43c8,0xeacaa1d6
+ .word 0x3c93db53,0xbf5a1614
+ .word 0x3ffd5818,0xdcfba487
+ .word 0x3c82ed02,0xd75b3706
+ .word 0x3ffd6c76,0xe862e6d3
+ .word 0x3c5fe87a,0x4a8165a0
+ .word 0x3ffd80e3,0x16c98398
+ .word 0xbc911ec1,0x8beddfe8
+ .word 0x3ffd955d,0x71ff6075
+ .word 0x3c9a052d,0xbb9af6be
+ .word 0x3ffda9e6,0x03db3285
+ .word 0x3c9c2300,0x696db532
+ .word 0x3ffdbe7c,0xd63a8315
+ .word 0xbc9b76f1,0x926b8be4
+ .word 0x3ffdd321,0xf301b460
+ .word 0x3c92da57,0x78f018c2
+ .word 0x3ffde7d5,0x641c0658
+ .word 0xbc9ca552,0x8e79ba8f
+ .word 0x3ffdfc97,0x337b9b5f
+ .word 0xbc91a5cd,0x4f184b5c
+ .word 0x3ffe1167,0x6b197d17
+ .word 0xbc72b529,0xbd5c7f44
+ .word 0x3ffe2646,0x14f5a129
+ .word 0xbc97b627,0x817a1496
+ .word 0x3ffe3b33,0x3b16ee12
+ .word 0xbc99f4a4,0x31fdc68a
+ .word 0x3ffe502e,0xe78b3ff6
+ .word 0x3c839e89,0x80a9cc8f
+ .word 0x3ffe6539,0x24676d76
+ .word 0xbc863ff8,0x7522b734
+ .word 0x3ffe7a51,0xfbc74c83
+ .word 0x3c92d522,0xca0c8de2
+ .word 0x3ffe8f79,0x77cdb740
+ .word 0xbc910894,0x80b054b1
+ .word 0x3ffea4af,0xa2a490da
+ .word 0xbc9e9c23,0x179c2893
+ .word 0x3ffeb9f4,0x867cca6e
+ .word 0x3c94832f,0x2293e4f2
+ .word 0x3ffecf48,0x2d8e67f1
+ .word 0xbc9c93f3,0xb411ad8c
+ .word 0x3ffee4aa,0xa2188510
+ .word 0x3c91c68d,0xa487568d
+ .word 0x3ffefa1b,0xee615a27
+ .word 0x3c9dc7f4,0x86a4b6b0
+ .word 0x3fff0f9c,0x1cb6412a
+ .word 0xbc932200,0x65181d45
+ .word 0x3fff252b,0x376bba97
+ .word 0x3c93a1a5,0xbf0d8e43
+ .word 0x3fff3ac9,0x48dd7274
+ .word 0xbc795a5a,0x3ed837de
+ .word 0x3fff5076,0x5b6e4540
+ .word 0x3c99d3e1,0x2dd8a18b
+ .word 0x3fff6632,0x798844f8
+ .word 0x3c9fa37b,0x3539343e
+ .word 0x3fff7bfd,0xad9cbe14
+ .word 0xbc9dbb12,0xd006350a
+ .word 0x3fff91d8,0x02243c89
+ .word 0xbc612ea8,0xa779f689
+ .word 0x3fffa7c1,0x819e90d8
+ .word 0x3c874853,0xf3a5931e
+ .word 0x3fffbdba,0x3692d514
+ .word 0xbc796773,0x15098eb6
+ .word 0x3fffd3c2,0x2b8f71f1
+ .word 0x3c62eb74,0x966579e7
+ .word 0x3fffe9d9,0x6b2a23d9
+ .word 0x3c74a603,0x7442fde3
+
+ .align 16
+constants:
+ .word 0x3ef00000,0x00000000
+ .word 0x40862e42,0xfefa39ef
+ .word 0x01000000,0x00000000
+ .word 0x7f000000,0x00000000
+ .word 0x80000000,0x00000000
+ .word 0x43f00000,0x00000000 ! scaling 2^12 two96
+ .word 0xfff00000,0x00000000
+ .word 0x3ff00000,0x00000000
+ .word 0x3fdfffff,0xfffffff6
+ .word 0x3fc55555,0x721a1d14
+ .word 0x3fa55555,0x6e0896af
+ .word 0x41371547,0x652b82fe ! scaling 2^12 invln2_256
+ .word 0x3ea62e42,0xfee00000 ! scaling 2^(-12) ln2_256h
+ .word 0x3caa39ef,0x35793c76 ! scaling 2^(-12) ln2_256l
+
+ ! base set w/o scaling
+ ! .word 0x43300000,0x00000000 ! scaling two96
+ ! .word 0x40771547,0x652b82fe ! scaling invln2_256
+ ! .word 0x3f662e42,0xfee00000 ! scaling ln2_256h
+ ! .word 0x3d6a39ef,0x35793c76 ! scaling ln2_256l
+
+#define ox3ef 0x0
+#define thresh 0x8
+#define tiny 0x10
+#define huge 0x18
+#define signbit 0x20
+#define two96 0x28
+#define neginf 0x30
+#define one 0x38
+#define B1OFF 0x40
+#define B2OFF 0x48
+#define B3OFF 0x50
+#define invln2_256 0x58
+#define ln2_256h 0x60
+#define ln2_256l 0x68
+
+! local storage indices
+
+#define m2 STACK_BIAS-0x4
+#define m1 STACK_BIAS-0x8
+#define m0 STACK_BIAS-0xc
+#define jnk STACK_BIAS-0x20
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x20
+
+! register use
+
+! i0 n
+! i1 x
+! i2 stridex
+! i3 y
+! i4 stridey
+! i5 0x80000000
+
+! g1 TBL
+
+! l0 m0
+! l1 m1
+! l2 m2
+! l3 j0,oy0
+! l4 j1,oy1
+! l5 j2,oy2
+! l6 0x3e300000
+! l7 0x40862e41
+
+! o0 py0
+! o1 py1
+! o2 py2
+! o3 scratch
+! o4 scratch
+! o5 0x40874910
+! o7 0x7ff00000
+
+! f0 x0
+! f2
+! f4
+! f6
+! f8
+! f10 x1
+! f12
+! f14
+! f16
+! f18
+! f20 x2
+! f22
+! f24
+! f26
+! f28
+! f30
+! f32
+! f34
+! f36 0x3ef0...
+! f38 thresh
+! f40 tiny
+! f42 huge
+! f44 signbit
+! f46 two96
+! f48 neginf
+! f50 one
+! f52 B1
+! f54 B2
+! f56 B3
+! f58 invln2_256
+! f60 ln2_256h
+! f62 ln2_256l
+#define BOUNDRY %f36
+#define THRESH %f38
+#define TINY %f40
+#define HUGE %f42
+#define SIGNBIT %f44
+#define TWO96 %f46
+#define NEGINF %f48
+#define ONE %f50
+#define B1 %f52
+#define B2 %f54
+#define B3 %f56
+#define INVLN2_256 %f58
+#define LN2_256H %f60
+#define LN2_256L %f62
+
+ ENTRY(__vexp)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,constants,o3)
+ PIC_SET(l7,TBL,o0)
+ mov %o0,%g1
+ wr %g0,0x82,%asi ! set %asi for non-faulting loads
+
+ sethi %hi(0x80000000),%i5
+ sethi %hi(0x3e300000),%l6
+ sethi %hi(0x40862e41),%l7
+ or %l7,%lo(0x40862e41),%l7
+ sethi %hi(0x40874910),%o5
+ or %o5,%lo(0x40874910),%o5
+ sethi %hi(0x7ff00000),%o7
+ ldd [%o3+ox3ef],BOUNDRY
+ ldd [%o3+thresh],THRESH
+ ldd [%o3+tiny],TINY
+ ldd [%o3+huge],HUGE
+ ldd [%o3+signbit],SIGNBIT
+ ldd [%o3+two96],TWO96
+ ldd [%o3+neginf],NEGINF
+ ldd [%o3+one],ONE
+ ldd [%o3+B1OFF],B1
+ ldd [%o3+B2OFF],B2
+ ldd [%o3+B3OFF],B3
+ ldd [%o3+invln2_256],INVLN2_256
+ ldd [%o3+ln2_256h],LN2_256H
+ ldd [%o3+ln2_256l],LN2_256L
+ sll %i2,3,%i2 ! scale strides
+ sll %i4,3,%i4
+ add %fp,jnk,%l3 ! precondition loop
+ add %fp,jnk,%l4
+ add %fp,jnk,%l5
+ ld [%i1],%l0 ! hx = *x
+ ld [%i1],%f0
+ ld [%i1+4],%f1
+ andn %l0,%i5,%l0 ! hx &= ~0x80000000
+ ba .loop0
+ add %i1,%i2,%i1 ! x += stridex
+
+ .align 16
+! -- 16 byte aligned
+.loop0:
+ lda [%i1]%asi,%l1 ! preload next argument
+ sub %l0,%l6,%o3
+ sub %l7,%l0,%o4
+ fand %f0,SIGNBIT,%f2 ! get sign bit
+
+ lda [%i1]%asi,%f10
+ orcc %o3,%o4,%g0
+ mov %i3,%o0 ! py0 = y
+ bl,pn %icc,.range0 ! if hx < 0x3e300000 or > 0x40862e41
+
+! delay slot
+ lda [%i1+4]%asi,%f11
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! y += stridey
+ ble,pn %icc,.endloop1
+
+! delay slot
+ andn %l1,%i5,%l1
+ add %i1,%i2,%i1 ! x += stridex
+ for %f2,TWO96,%f2 ! used to strip least sig bits
+ fmuld %f0,INVLN2_256,%f4 ! x/ (ln2/256) , creating k
+
+.loop1:
+ lda [%i1]%asi,%l2 ! preload next argument
+ sub %l1,%l6,%o3
+ sub %l7,%l1,%o4
+ fand %f10,SIGNBIT,%f12
+
+ lda [%i1]%asi,%f20
+ orcc %o3,%o4,%g0
+ mov %i3,%o1 ! py1 = y
+ bl,pn %icc,.range1 ! if hx < 0x3e300000 or > 0x40862e41
+
+! delay slot
+ lda [%i1+4]%asi,%f21
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! y += stridey
+ ble,pn %icc,.endloop2
+
+! delay slot
+ andn %l2,%i5,%l2
+ add %i1,%i2,%i1 ! x += stridex
+ for %f12,TWO96,%f12
+ fmuld %f10,INVLN2_256,%f14
+
+.loop2:
+ sub %l2,%l6,%o3
+ sub %l7,%l2,%o4
+ fand %f20,SIGNBIT,%f22
+ fmuld %f20,INVLN2_256,%f24 ! okay to put this here; for alignment
+
+ orcc %o3,%o4,%g0
+ bl,pn %icc,.range2 ! if hx < 0x3e300000 or > 0x40862e41
+! delay slot
+ for %f22,TWO96,%f22
+ faddd %f4,%f2,%f4 ! creating k+j/256, sra to zero bits
+
+.cont:
+ faddd %f14,%f12,%f14
+ mov %i3,%o2 ! py2 = y
+
+ faddd %f24,%f22,%f24
+ add %i3,%i4,%i3 ! y += stridey
+
+ ! BUBBLE USIII
+
+ fsubd %f4,%f2,%f8 ! creating k+j/256: sll
+ st %f6,[%l3] ! store previous loop x0
+
+ fsubd %f14,%f12,%f18
+ st %f7,[%l3+4] ! store previous loop x0
+
+ fsubd %f24,%f22,%f28
+ st %f16,[%l4]
+
+ ! BUBBLE USIII
+
+ fmuld %f8,LN2_256H,%f2 ! closest LN2_256 to x
+ st %f17,[%l4+4]
+
+ fmuld %f18,LN2_256H,%f12
+ st %f26,[%l5]
+
+ fmuld %f28,LN2_256H,%f22
+ st %f27,[%l5+4]
+
+ ! BUBBLE USIII
+
+ fsubd %f0,%f2,%f0 ! r = x - p*LN2_256H
+ fmuld %f8,LN2_256L,%f4 ! closest LN2_256 to x , added prec
+
+ fsubd %f10,%f12,%f10
+ fmuld %f18,LN2_256L,%f14
+
+ fsubd %f20,%f22,%f20
+ fmuld %f28,LN2_256L,%f24
+
+ ! BUBBLE USIII
+
+ fsubd %f0,%f4,%f0 ! r -= p*LN2_256L
+
+ fsubd %f10,%f14,%f10
+
+ fsubd %f20,%f24,%f20
+
+!!!!!!!!!!!!!!!!!!! New polynomial reorder starts here
+
+ ! Alternate polynomial grouping allowing non-sequential calc of p
+ ! OLD : p = r * ( 1 + r * ( B1 + r * ( B2 + r * B3) ) )
+ ! NEW : p = r * [ (1+r*B1) + (r*r) * ( B2 + r * B3) ) ]
+ !
+ ! let SLi Ri SRi be accumulators
+
+ fmuld %f0,B3,%f2 ! SR1 = r1 * B3
+ fdtoi %f8,%f8 ! convert k+j/256 to int
+ st %f8,[%fp+m0] ! store k, to shift return/use
+
+ fmuld %f10,B3,%f12 ! SR2 = r2 * B3
+ fdtoi %f18,%f18 ! convert k+j/256 to int
+ st %f18,[%fp+m1] ! store k, to shift return/use
+
+ fmuld %f20,B3,%f22 ! SR3 = r3 * B3
+ fdtoi %f28,%f28 ! convert k+j/256 to int
+ st %f28,[%fp+m2] ! store k, to shift return/use
+
+ fmuld %f0,%f0,%f4 ! R1 = r1 * r1
+
+ fmuld %f10,%f10,%f14 ! R2 = r2 * r2
+ faddd %f2,B2,%f2 ! SR1 += B2
+
+ fmuld %f20,%f20,%f24 ! R3 = r3 * r3
+ faddd %f12,B2,%f12 ! SR2 += B2
+
+ faddd %f22,B2,%f22 ! SR3 += B2
+ fmuld %f0,B1,%f6 ! SL1 = r1 * B1
+
+ fmuld %f10,B1,%f32 ! SL2 = r2 * B1
+ fand %f8,NEGINF,%f8
+ ! best here for RAW BYPASS
+ ld [%fp+m0],%l0 ! get nonshifted k into intreg
+
+ fmuld %f20,B1,%f34 ! SL3 = r3 * B1
+ fand %f18,NEGINF,%f18
+ ld [%fp+m1],%l1 ! get nonshifted k into intreg
+
+ fmuld %f4,%f2,%f4 ! R1 = R1 * SR1
+ fand %f28,NEGINF,%f28
+ ld [%fp+m2],%l2 ! get nonshifted k into intreg
+
+ fmuld %f14,%f12,%f14 ! R2 = R2 * SR2
+ faddd %f6,ONE,%f6 ! SL1 += 1
+
+ fmuld %f24,%f22,%f24 ! R3 = R3 * SR3
+ faddd %f32,ONE,%f32 ! SL2 += 1
+ sra %l0,8,%l3 ! shift k tobe offset 256-8byte
+
+ faddd %f34,ONE,%f34 ! SL3 += 1
+ sra %l1,8,%l4 ! shift k tobe offset 256-8byte
+ sra %l2,8,%l5 ! shift k tobe offset 256-8byte
+
+ ! BUBBLE in USIII
+ and %l3,0xff0,%l3
+ and %l4,0xff0,%l4
+
+
+
+ faddd %f6,%f4,%f6 ! R1 = SL1 + R1
+ ldd [%g1+%l3],%f4 ! tbl[j]
+ add %l3,8,%l3 ! inc j
+ and %l5,0xff0,%l5
+
+
+ faddd %f32,%f14,%f32 ! R2 = SL2 + R2
+ ldd [%g1+%l4],%f14 ! tbl[j]
+ add %l4,8,%l4 ! inc j
+ sra %l0,20,%o3
+
+ faddd %f34,%f24,%f34 ! R3 = SL3 + R3
+ ldd [%g1+%l5],%f24 ! tbl[j]
+ add %l5,8,%l5 ! inc j
+ sra %l1,20,%l1
+
+ ! BUBBLE in USIII
+ ldd [%g1+%l4],%f16 ! tbl[j+1]
+ add %o3,1021,%o3 ! inc j
+
+ fmuld %f0,%f6,%f0 ! p1 = r1 * R1
+ ldd [%g1+%l3],%f6 ! tbl[j+1]
+ add %l1,1021,%l1 ! inc j
+ sra %l2,20,%l2
+
+ fmuld %f10,%f32,%f10 ! p2 = r2 * R2
+ ldd [%g1+%l5],%f26 ! tbl[j+1]
+ add %l2,1021,%l2 ! inc j
+
+ fmuld %f20,%f34,%f20 ! p3 = r3 * R3
+
+
+
+
+
+!!!!!!!!!!!!!!!!!!! poly-reorder - ends here
+
+ fmuld %f0,%f4,%f0 ! start exp(x) = exp(r) * tbl[j]
+ mov %o0,%l3
+
+ fmuld %f10,%f14,%f10
+ mov %o1,%l4
+
+ fmuld %f20,%f24,%f20
+ mov %o2,%l5
+
+ faddd %f0,%f6,%f6 ! cont exp(x) : apply tbl[j] high bits
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ faddd %f10,%f16,%f16
+ lda [%i1]%asi,%f0
+
+ faddd %f20,%f26,%f26
+ lda [%i1+4]%asi,%f1
+
+ faddd %f6,%f4,%f6 ! cont exp(x) : apply tbl[j+1] low bits
+ add %i1,%i2,%i1 ! x += stridex
+
+ faddd %f16,%f14,%f16
+ andn %l0,%i5,%l0
+ or %o3,%l1,%o4
+
+! -- 16 byte aligned
+ orcc %o4,%l2,%o4
+ bl,pn %icc,.small
+! delay slot
+ faddd %f26,%f24,%f26
+
+ fpadd32 %f6,%f8,%f6 ! done exp(x) : apply 2^k
+ fpadd32 %f16,%f18,%f16
+
+
+ addcc %i0,-1,%i0
+ bg,pn %icc,.loop0
+! delay slot
+ fpadd32 %f26,%f28,%f26
+
+ ba,pt %icc,.endloop0
+! delay slot
+ nop
+
+
+ .align 16
+.small:
+ tst %o3
+ bge,pt %icc,1f
+! delay slot
+ fpadd32 %f6,%f8,%f6
+ fpadd32 %f6,BOUNDRY,%f6
+ fmuld %f6,TINY,%f6
+1:
+ tst %l1
+ bge,pt %icc,1f
+! delay slot
+ fpadd32 %f16,%f18,%f16
+ fpadd32 %f16,BOUNDRY,%f16
+ fmuld %f16,TINY,%f16
+1:
+ tst %l2
+ bge,pt %icc,1f
+! delay slot
+ fpadd32 %f26,%f28,%f26
+ fpadd32 %f26,BOUNDRY,%f26
+ fmuld %f26,TINY,%f26
+1:
+ addcc %i0,-1,%i0
+ bg,pn %icc,.loop0
+! delay slot
+ nop
+ ba,pt %icc,.endloop0
+! delay slot
+ nop
+
+
+.endloop2:
+ for %f12,TWO96,%f12
+ fmuld %f10,INVLN2_256,%f14
+ faddd %f14,%f12,%f14
+ fsubd %f14,%f12,%f18
+ fmuld %f18,LN2_256H,%f12
+ fsubd %f10,%f12,%f10
+ fmuld %f18,LN2_256L,%f14
+ fsubd %f10,%f14,%f10
+ fmuld %f10,B3,%f12
+ fdtoi %f18,%f18
+ st %f18,[%fp+m1]
+ fmuld %f10,%f10,%f14
+ faddd %f12,B2,%f12
+ fmuld %f10,B1,%f32
+ fand %f18,NEGINF,%f18
+ ld [%fp+m1],%l1
+ fmuld %f14,%f12,%f14
+ faddd %f32,ONE,%f32
+ sra %l1,8,%o4
+ and %o4,0xff0,%o4
+ faddd %f32,%f14,%f32
+ ldd [%g1+%o4],%f14
+ add %o4,8,%o4
+ sra %l1,20,%l1
+ ldd [%g1+%o4],%f30
+ addcc %l1,1021,%l1
+ fmuld %f10,%f32,%f10
+ fmuld %f10,%f14,%f10
+ faddd %f10,%f30,%f30
+ faddd %f30,%f14,%f30
+ bge,pt %icc,1f
+! delay slot
+ fpadd32 %f30,%f18,%f30
+ fpadd32 %f30,BOUNDRY,%f30
+ fmuld %f30,TINY,%f30
+1:
+ st %f30,[%o1]
+ st %f31,[%o1+4]
+
+.endloop1:
+ for %f2,TWO96,%f2
+ fmuld %f0,INVLN2_256,%f4
+ faddd %f4,%f2,%f4
+ fsubd %f4,%f2,%f8
+ fmuld %f8,LN2_256H,%f2
+ fsubd %f0,%f2,%f0
+ fmuld %f8,LN2_256L,%f4
+ fsubd %f0,%f4,%f0
+ fmuld %f0,B3,%f2
+ fdtoi %f8,%f8
+ st %f8,[%fp+m0]
+ fmuld %f0,%f0,%f4
+ faddd %f2,B2,%f2
+ fmuld %f0,B1,%f32
+ fand %f8,NEGINF,%f8
+ ld [%fp+m0],%l0
+ fmuld %f4,%f2,%f4
+ faddd %f32,ONE,%f32
+ sra %l0,8,%o4
+ and %o4,0xff0,%o4
+ faddd %f32,%f4,%f32
+ ldd [%g1+%o4],%f4
+ add %o4,8,%o4
+ sra %l0,20,%o3
+ ldd [%g1+%o4],%f30
+ addcc %o3,1021,%o3
+ fmuld %f0,%f32,%f0
+ fmuld %f0,%f4,%f0
+ faddd %f0,%f30,%f30
+ faddd %f30,%f4,%f30
+ bge,pt %icc,1f
+! delay slot
+ fpadd32 %f30,%f8,%f30
+ fpadd32 %f30,BOUNDRY,%f30
+ fmuld %f30,TINY,%f30
+1:
+ st %f30,[%o0]
+ st %f31,[%o0+4]
+
+.endloop0:
+ st %f6,[%l3]
+ st %f7,[%l3+4]
+ st %f16,[%l4]
+ st %f17,[%l4+4]
+ st %f26,[%l5]
+ st %f27,[%l5+4]
+ ret
+ restore
+
+
+.range0:
+ cmp %l0,%l6
+ bl,a,pt %icc,3f ! if x is tiny
+! delay slot, annulled if branch not taken
+ faddd %f0,ONE,%f4
+
+ cmp %l0,%o5
+ bg,pt %icc,1f ! if x is huge, inf, nan
+! delay slot
+ nop
+
+ fcmpd %fcc0,%f0,THRESH
+ fbg,a,pt %fcc0,3f ! if x is huge and positive
+! delay slot, annulled if branch not taken
+ fmuld HUGE,HUGE,%f4
+
+! x is near the extremes but within range; return to the loop
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! y += stridey
+ ble,pn %icc,.endloop1
+! delay slot
+ andn %l1,%i5,%l1
+ add %i1,%i2,%i1 ! x += stridex
+ for %f2,TWO96,%f2
+ ba,pt %icc,.loop1
+! delay slot
+ fmuld %f0,INVLN2_256,%f4
+
+1:
+ cmp %l0,%o7
+ bl,pn %icc,2f ! if x is finite
+! delay slot
+ nop
+ fzero %f4
+ fcmpd %fcc0,%f0,NEGINF
+ fmovdne %fcc0,%f0,%f4
+ ba,pt %icc,3f
+ fmuld %f4,%f4,%f4 ! x*x or zero*zero
+2:
+ fmovd HUGE,%f4
+ fcmpd %fcc0,%f0,ONE
+ fmovdl %fcc0,TINY,%f4
+ fmuld %f4,%f4,%f4 ! huge*huge or tiny*tiny
+3:
+ st %f4,[%o0]
+ andn %l1,%i5,%l0
+ add %i1,%i2,%i1 ! x += stridex
+ fmovd %f10,%f0
+ st %f5,[%o0+4]
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+ ba,pt %icc,.endloop0
+! delay slot
+ nop
+
+
+.range1:
+ cmp %l1,%l6
+ bl,a,pt %icc,3f ! if x is tiny
+! delay slot, annulled if branch not taken
+ faddd %f10,ONE,%f14
+
+ cmp %l1,%o5
+ bg,pt %icc,1f ! if x is huge, inf, nan
+! delay slot
+ nop
+
+ fcmpd %fcc0,%f10,THRESH
+ fbg,a,pt %fcc0,3f ! if x is huge and positive
+! delay slot, annulled if branch not taken
+ fmuld HUGE,HUGE,%f14
+
+! x is near the extremes but within range; return to the loop
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! y += stridey
+ ble,pn %icc,.endloop2
+! delay slot
+ andn %l2,%i5,%l2
+ add %i1,%i2,%i1 ! x += stridex
+ for %f12,TWO96,%f12
+ ba,pt %icc,.loop2
+! delay slot
+ fmuld %f10,INVLN2_256,%f14
+
+1:
+ cmp %l1,%o7
+ bl,pn %icc,2f ! if x is finite
+! delay slot
+ nop
+ fzero %f14
+ fcmpd %fcc0,%f10,NEGINF
+ fmovdne %fcc0,%f10,%f14
+ ba,pt %icc,3f
+ fmuld %f14,%f14,%f14 ! x*x or zero*zero
+2:
+ fmovd HUGE,%f14
+ fcmpd %fcc0,%f10,ONE
+ fmovdl %fcc0,TINY,%f14
+ fmuld %f14,%f14,%f14 ! huge*huge or tiny*tiny
+3:
+ st %f14,[%o1]
+ andn %l2,%i5,%l1
+ add %i1,%i2,%i1 ! x += stridex
+ fmovd %f20,%f10
+ st %f15,[%o1+4]
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop1
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+ ba,pt %icc,.endloop1
+! delay slot
+ nop
+
+
+.range2:
+ cmp %l2,%l6
+ bl,a,pt %icc,3f ! if x is tiny
+! delay slot, annulled if branch not taken
+ faddd %f20,ONE,%f24
+
+ cmp %l2,%o5
+ bg,pt %icc,1f ! if x is huge, inf, nan
+! delay slot
+ nop
+
+ fcmpd %fcc0,%f20,THRESH
+ fbg,a,pt %fcc0,3f ! if x is huge and positive
+! delay slot, annulled if branch not taken
+ fmuld HUGE,HUGE,%f24
+
+! x is near the extremes but within range; return to the loop
+ ba,pt %icc,.cont
+! delay slot
+ faddd %f4,%f2,%f4
+
+1:
+ cmp %l2,%o7
+ bl,pn %icc,2f ! if x is finite
+! delay slot
+ nop
+ fzero %f24
+ fcmpd %fcc0,%f20,NEGINF
+ fmovdne %fcc0,%f20,%f24
+ ba,pt %icc,3f
+ fmuld %f24,%f24,%f24 ! x*x or zero*zero
+2:
+ fmovd HUGE,%f24
+ fcmpd %fcc0,%f20,ONE
+ fmovdl %fcc0,TINY,%f24
+ fmuld %f24,%f24,%f24 ! huge*huge or tiny*tiny
+3:
+ st %f24,[%i3]
+ st %f25,[%i3+4]
+ lda [%i1]%asi,%l2 ! preload next argument
+ lda [%i1]%asi,%f20
+ lda [%i1+4]%asi,%f21
+ andn %l2,%i5,%l2
+ add %i1,%i2,%i1 ! x += stridex
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop2
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+ ba,pt %icc,.endloop2
+! delay slot
+ nop
+
+ SET_SIZE(__vexp)
+
diff --git a/usr/src/libm/src/mvec/vis/__vexpf.S b/usr/src/libm/src/mvec/vis/__vexpf.S
new file mode 100644
index 0000000..b533e3b
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vexpf.S
@@ -0,0 +1,2113 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vexpf.S 1.7 06/01/23 SMI"
+
+ .file "__vexpf.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 64
+!! 2^(i/256) - ((i & 0xf0) << 44), i = [0, 255]
+.CONST_TBL:
+ .word 0x3ff00000, 0x00000000, 0x3ff00b1a, 0xfa5abcbf
+ .word 0x3ff0163d, 0xa9fb3335, 0x3ff02168, 0x143b0281
+ .word 0x3ff02c9a, 0x3e778061, 0x3ff037d4, 0x2e11bbcc
+ .word 0x3ff04315, 0xe86e7f85, 0x3ff04e5f, 0x72f654b1
+ .word 0x3ff059b0, 0xd3158574, 0x3ff0650a, 0x0e3c1f89
+ .word 0x3ff0706b, 0x29ddf6de, 0x3ff07bd4, 0x2b72a836
+ .word 0x3ff08745, 0x18759bc8, 0x3ff092bd, 0xf66607e0
+ .word 0x3ff09e3e, 0xcac6f383, 0x3ff0a9c7, 0x9b1f3919
+ .word 0x3fefb558, 0x6cf9890f, 0x3fefc0f1, 0x45e46c85
+ .word 0x3fefcc92, 0x2b7247f7, 0x3fefd83b, 0x23395dec
+ .word 0x3fefe3ec, 0x32d3d1a2, 0x3fefefa5, 0x5fdfa9c5
+ .word 0x3feffb66, 0xaffed31b, 0x3ff00730, 0x28d7233e
+ .word 0x3ff01301, 0xd0125b51, 0x3ff01edb, 0xab5e2ab6
+ .word 0x3ff02abd, 0xc06c31cc, 0x3ff036a8, 0x14f204ab
+ .word 0x3ff0429a, 0xaea92de0, 0x3ff04e95, 0x934f312e
+ .word 0x3ff05a98, 0xc8a58e51, 0x3ff066a4, 0x5471c3c2
+ .word 0x3fef72b8, 0x3c7d517b, 0x3fef7ed4, 0x8695bbc0
+ .word 0x3fef8af9, 0x388c8dea, 0x3fef9726, 0x58375d2f
+ .word 0x3fefa35b, 0xeb6fcb75, 0x3fefaf99, 0xf8138a1c
+ .word 0x3fefbbe0, 0x84045cd4, 0x3fefc82f, 0x95281c6b
+ .word 0x3fefd487, 0x3168b9aa, 0x3fefe0e7, 0x5eb44027
+ .word 0x3fefed50, 0x22fcd91d, 0x3feff9c1, 0x8438ce4d
+ .word 0x3ff0063b, 0x88628cd6, 0x3ff012be, 0x3578a819
+ .word 0x3ff01f49, 0x917ddc96, 0x3ff02bdd, 0xa27912d1
+ .word 0x3fef387a, 0x6e756238, 0x3fef451f, 0xfb82140a
+ .word 0x3fef51ce, 0x4fb2a63f, 0x3fef5e85, 0x711ece75
+ .word 0x3fef6b45, 0x65e27cdd, 0x3fef780e, 0x341ddf29
+ .word 0x3fef84df, 0xe1f56381, 0x3fef91ba, 0x7591bb70
+ .word 0x3fef9e9d, 0xf51fdee1, 0x3fefab8a, 0x66d10f13
+ .word 0x3fefb87f, 0xd0dad990, 0x3fefc57e, 0x39771b2f
+ .word 0x3fefd285, 0xa6e4030b, 0x3fefdf96, 0x1f641589
+ .word 0x3fefecaf, 0xa93e2f56, 0x3feff9d2, 0x4abd886b
+ .word 0x3fef06fe, 0x0a31b715, 0x3fef1432, 0xedeeb2fd
+ .word 0x3fef2170, 0xfc4cd831, 0x3fef2eb8, 0x3ba8ea32
+ .word 0x3fef3c08, 0xb26416ff, 0x3fef4962, 0x66e3fa2d
+ .word 0x3fef56c5, 0x5f929ff1, 0x3fef6431, 0xa2de883b
+ .word 0x3fef71a7, 0x373aa9cb, 0x3fef7f26, 0x231e754a
+ .word 0x3fef8cae, 0x6d05d866, 0x3fef9a40, 0x1b7140ef
+ .word 0x3fefa7db, 0x34e59ff7, 0x3fefb57f, 0xbfec6cf4
+ .word 0x3fefc32d, 0xc313a8e5, 0x3fefd0e5, 0x44ede173
+ .word 0x3feedea6, 0x4c123422, 0x3feeec70, 0xdf1c5175
+ .word 0x3feefa45, 0x04ac801c, 0x3fef0822, 0xc367a024
+ .word 0x3fef160a, 0x21f72e2a, 0x3fef23fb, 0x2709468a
+ .word 0x3fef31f5, 0xd950a897, 0x3fef3ffa, 0x3f84b9d4
+ .word 0x3fef4e08, 0x6061892d, 0x3fef5c20, 0x42a7d232
+ .word 0x3fef6a41, 0xed1d0057, 0x3fef786d, 0x668b3237
+ .word 0x3fef86a2, 0xb5c13cd0, 0x3fef94e1, 0xe192aed2
+ .word 0x3fefa32a, 0xf0d7d3de, 0x3fefb17d, 0xea6db7d7
+ .word 0x3feebfda, 0xd5362a27, 0x3feece41, 0xb817c114
+ .word 0x3feedcb2, 0x99fddd0d, 0x3feeeb2d, 0x81d8abff
+ .word 0x3feef9b2, 0x769d2ca7, 0x3fef0841, 0x7f4531ee
+ .word 0x3fef16da, 0xa2cf6642, 0x3fef257d, 0xe83f4eef
+ .word 0x3fef342b, 0x569d4f82, 0x3fef42e2, 0xf4f6ad27
+ .word 0x3fef51a4, 0xca5d920f, 0x3fef6070, 0xdde910d2
+ .word 0x3fef6f47, 0x36b527da, 0x3fef7e27, 0xdbe2c4cf
+ .word 0x3fef8d12, 0xd497c7fd, 0x3fef9c08, 0x27ff07cc
+ .word 0x3feeab07, 0xdd485429, 0x3feeba11, 0xfba87a03
+ .word 0x3feec926, 0x8a5946b7, 0x3feed845, 0x90998b93
+ .word 0x3feee76f, 0x15ad2148, 0x3feef6a3, 0x20dceb71
+ .word 0x3fef05e1, 0xb976dc09, 0x3fef152a, 0xe6cdf6f4
+ .word 0x3fef247e, 0xb03a5585, 0x3fef33dd, 0x1d1929fd
+ .word 0x3fef4346, 0x34ccc320, 0x3fef52b9, 0xfebc8fb7
+ .word 0x3fef6238, 0x82552225, 0x3fef71c1, 0xc70833f6
+ .word 0x3fef8155, 0xd44ca973, 0x3fef90f4, 0xb19e9538
+ .word 0x3feea09e, 0x667f3bcd, 0x3feeb052, 0xfa75173e
+ .word 0x3feec012, 0x750bdabf, 0x3feecfdc, 0xddd47645
+ .word 0x3feedfb2, 0x3c651a2f, 0x3feeef92, 0x98593ae5
+ .word 0x3feeff7d, 0xf9519484, 0x3fef0f74, 0x66f42e87
+ .word 0x3fef1f75, 0xe8ec5f74, 0x3fef2f82, 0x86ead08a
+ .word 0x3fef3f9a, 0x48a58174, 0x3fef4fbd, 0x35d7cbfd
+ .word 0x3fef5feb, 0x564267c9, 0x3fef7024, 0xb1ab6e09
+ .word 0x3fef8069, 0x4fde5d3f, 0x3fef90b9, 0x38ac1cf6
+ .word 0x3feea114, 0x73eb0187, 0x3feeb17b, 0x0976cfdb
+ .word 0x3feec1ed, 0x0130c132, 0x3feed26a, 0x62ff86f0
+ .word 0x3feee2f3, 0x36cf4e62, 0x3feef387, 0x8491c491
+ .word 0x3fef0427, 0x543e1a12, 0x3fef14d2, 0xadd106d9
+ .word 0x3fef2589, 0x994cce13, 0x3fef364c, 0x1eb941f7
+ .word 0x3fef471a, 0x4623c7ad, 0x3fef57f4, 0x179f5b21
+ .word 0x3fef68d9, 0x9b4492ed, 0x3fef79ca, 0xd931a436
+ .word 0x3fef8ac7, 0xd98a6699, 0x3fef9bd0, 0xa478580f
+ .word 0x3feeace5, 0x422aa0db, 0x3feebe05, 0xbad61778
+ .word 0x3feecf32, 0x16b5448c, 0x3feee06a, 0x5e0866d9
+ .word 0x3feef1ae, 0x99157736, 0x3fef02fe, 0xd0282c8a
+ .word 0x3fef145b, 0x0b91ffc6, 0x3fef25c3, 0x53aa2fe2
+ .word 0x3fef3737, 0xb0cdc5e5, 0x3fef48b8, 0x2b5f98e5
+ .word 0x3fef5a44, 0xcbc8520f, 0x3fef6bdd, 0x9a7670b3
+ .word 0x3fef7d82, 0x9fde4e50, 0x3fef8f33, 0xe47a22a2
+ .word 0x3fefa0f1, 0x70ca07ba, 0x3fefb2bb, 0x4d53fe0d
+ .word 0x3feec491, 0x82a3f090, 0x3feed674, 0x194bb8d5
+ .word 0x3feee863, 0x19e32323, 0x3feefa5e, 0x8d07f29e
+ .word 0x3fef0c66, 0x7b5de565, 0x3fef1e7a, 0xed8eb8bb
+ .word 0x3fef309b, 0xec4a2d33, 0x3fef42c9, 0x80460ad8
+ .word 0x3fef5503, 0xb23e255d, 0x3fef674a, 0x8af46052
+ .word 0x3fef799e, 0x1330b358, 0x3fef8bfe, 0x53c12e59
+ .word 0x3fef9e6b, 0x5579fdbf, 0x3fefb0e5, 0x21356eba
+ .word 0x3fefc36b, 0xbfd3f37a, 0x3fefd5ff, 0x3a3c2774
+ .word 0x3feee89f, 0x995ad3ad, 0x3feefb4c, 0xe622f2ff
+ .word 0x3fef0e07, 0x298db666, 0x3fef20ce, 0x6c9a8952
+ .word 0x3fef33a2, 0xb84f15fb, 0x3fef4684, 0x15b749b1
+ .word 0x3fef5972, 0x8de5593a, 0x3fef6c6e, 0x29f1c52a
+ .word 0x3fef7f76, 0xf2fb5e47, 0x3fef928c, 0xf22749e4
+ .word 0x3fefa5b0, 0x30a1064a, 0x3fefb8e0, 0xb79a6f1f
+ .word 0x3fefcc1e, 0x904bc1d2, 0x3fefdf69, 0xc3f3a207
+ .word 0x3feff2c2, 0x5bd71e09, 0x3ff00628, 0x6141b33d
+ .word 0x3fef199b, 0xdd85529c, 0x3fef2d1c, 0xd9fa652c
+ .word 0x3fef40ab, 0x5fffd07a, 0x3fef5447, 0x78fafb22
+ .word 0x3fef67f1, 0x2e57d14b, 0x3fef7ba8, 0x8988c933
+ .word 0x3fef8f6d, 0x9406e7b5, 0x3fefa340, 0x5751c4db
+ .word 0x3fefb720, 0xdcef9069, 0x3fefcb0f, 0x2e6d1675
+ .word 0x3fefdf0b, 0x555dc3fa, 0x3feff315, 0x5b5bab74
+ .word 0x3ff0072d, 0x4a07897c, 0x3ff01b53, 0x2b08c968
+ .word 0x3ff02f87, 0x080d89f2, 0x3ff043c8, 0xeacaa1d6
+ .word 0x3fef5818, 0xdcfba487, 0x3fef6c76, 0xe862e6d3
+ .word 0x3fef80e3, 0x16c98398, 0x3fef955d, 0x71ff6075
+ .word 0x3fefa9e6, 0x03db3285, 0x3fefbe7c, 0xd63a8315
+ .word 0x3fefd321, 0xf301b460, 0x3fefe7d5, 0x641c0658
+ .word 0x3feffc97, 0x337b9b5f, 0x3ff01167, 0x6b197d17
+ .word 0x3ff02646, 0x14f5a129, 0x3ff03b33, 0x3b16ee12
+ .word 0x3ff0502e, 0xe78b3ff6, 0x3ff06539, 0x24676d76
+ .word 0x3ff07a51, 0xfbc74c83, 0x3ff08f79, 0x77cdb740
+ .word 0x3fefa4af, 0xa2a490da, 0x3fefb9f4, 0x867cca6e
+ .word 0x3fefcf48, 0x2d8e67f1, 0x3fefe4aa, 0xa2188510
+ .word 0x3feffa1b, 0xee615a27, 0x3ff00f9c, 0x1cb6412a
+ .word 0x3ff0252b, 0x376bba97, 0x3ff03ac9, 0x48dd7274
+ .word 0x3ff05076, 0x5b6e4540, 0x3ff06632, 0x798844f8
+ .word 0x3ff07bfd, 0xad9cbe14, 0x3ff091d8, 0x02243c89
+ .word 0x3ff0a7c1, 0x819e90d8, 0x3ff0bdba, 0x3692d514
+ .word 0x3ff0d3c2, 0x2b8f71f1, 0x3ff0e9d9, 0x6b2a23d9
+
+ .word 0x7149f2ca, 0x0da24260 ! 1.0e30f, 1.0e-30f
+ .word 0x3ecebfbe, 0x9d182250 ! KA2 = 3.66556671660783833261e-06
+ .word 0x3f662e43, 0xe2528362 ! KA1 = 2.70760782821392980564e-03
+ .word 0x40771547, 0x652b82fe ! K256ONLN2 = 369.3299304675746271
+ .word 0x42aeac4f, 0x42b17218 ! THRESHOLD = 87.3365402f
+ ! THRESHOLDL = 88.7228394f
+! local storage indices
+
+#define tmp0 STACK_BIAS-32
+#define tmp1 STACK_BIAS-28
+#define tmp2 STACK_BIAS-24
+#define tmp3 STACK_BIAS-20
+#define tmp4 STACK_BIAS-16
+#define tmp5 STACK_BIAS-12
+#define tmp6 STACK_BIAS-8
+#define tmp7 STACK_BIAS-4
+
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x20
+
+#define I5_THRESHOLD %i5
+#define G1_CONST_TBL %g5
+#define G5_CONST %g1
+
+#define F62_K256ONLN2 %f62
+#define F60_KA2 %f60
+#define F58_KA1 %f58
+
+#define THRESHOLDL %f0
+
+! register use
+! i0 n
+! i1 x
+! i2 stridex
+! i3 y
+! i4 stridey
+
+! i5 0x42aeac4f (87.3365402f)
+
+! g1 CONST_TBL
+! g5 0x7fffffff
+
+! f62 K256ONLN2 = 369.3299304675746271
+! f60 KA2 = 3.66556671660783833261e-06
+! f58 KA1 = 2.70760782821392980564e-03
+
+
+! !!!!! Algorithm !!!!!
+!
+! double y, dtmp, drez;
+! int k, sign, Xi;
+! float X, Y;
+! int THRESHOLD = 0x42aeac4f; /* 87.3365402f */
+! float THRESHOLDL = 88.7228394f;
+! double KA2 = 3.66556671660783833261e-06;
+! double KA1 = 2.70760782821392980564e-03;
+! double K256ONLN2 = 369.3299304675746271;
+! char *CONST_TBL;
+!
+! X = px[0];
+! Xi = ((int*)px)[0];
+! ax = Xi & 0x7fffffff;
+!
+! if (ax > THRESHOLD) {
+! sign = ((unsigned)Xi >> 29) & 4;
+! if (ax >= 0x7f800000) { /* Inf or NaN */
+! if (ax > 0x7f800000) { /* NaN */
+! Y = X * X; /* NaN -> NaN */
+! return Y;
+! }
+! Y = (sign) ? zero : X; /* +Inf -> +Inf , -Inf -> zero */
+! return Y;
+! }
+!
+! if ( X < 0.0f || X >= THRESHOLDL ) {
+! Y = ((float*)(CONST_TBL + 2048 + sign))[0];
+! /* Xi >= THRESHOLDL : Y = 1.0e+30f */
+! /* Xi < -THRESHOLD : Y = 1.0e-30f */
+! Y = Y * Y;
+! /* Xi >= THRESHOLDL : +Inf + overflow */
+! /* Xi < -THRESHOLD : +0 + underflow */
+! return Y;
+! }
+! }
+! vis_write_gsr(12 << 3);
+! y = (double) X;
+! y = K256ONLN2 * y;
+! k = (int) y;
+! dtmp = (double) k;
+! y -= dtmp;
+! dtmp = y * KA2;
+! dtmp += KA1;
+! y *= dtmp;
+! y = (y * KA2 + KA1) * y;
+! ((int*)&drez)[0] = k;
+! ((int*)&drez)[1] = 0;
+! ((float*)&drez)[0] = vis_fpackfix(drez);
+! k &= 255;
+! k <<= 3;
+! dtmp = ((double*)(CONST_TBL + k))[0];
+! drez = vis_fpadd32(drez,dtmp);
+! y *= drez;
+! y += drez;
+! Y = (float) y;
+!
+!
+! fstod %f16,%f40 ! y = (double) X
+! fmuld F62_K256ONLN2,%f40,%f40 ! y *= K256ONLN2
+! fdtoi %f40,%f16 ! k = (int) y
+! st %f16,[%fp+tmp0] ! store k
+! fitod %f16,%f34 ! dtmp = (double) k
+! fpackfix %f16,%f16 ! ((float*)&drez)[0] = vis_fpackfix(drez)
+! fsubd %f40,%f34,%f40 ! y -= dtmp
+! fmuld F60_KA2,%f40,%f34 ! dtmp = y * KA2
+! faddd F58_KA1,%f34,%f34 ! dtmp += KA1
+! ld [%fp+tmp0],%o0 ! load k
+! fmuld %f34,%f40,%f40 ! y *= dtmp
+! and %o0,255,%o0 ! k &= 255
+! sll %o0,3,%o0 ! k <<= 3
+! ldd [G1_CONST_TBL+%o0],%f34 ! dtmp = ((double*)(CONST_TBL + k))[0]
+! fpadd32 %f16,%f34,%f34 ! drez = vis_fpadd32(drez,dtmp)
+! fmuld %f34,%f40,%f40 ! y *= drez
+! faddd %f34,%f40,%f40 ! y += drez
+! fdtos %f40,%f26 ! (float) y
+!--------------------------------------------------------------------
+
+ ENTRY(__vexpf)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,.CONST_TBL,g5)
+
+ wr %g0,0x82,%asi ! set %asi for non-faulting loads
+ wr %g0,0x60,%gsr
+
+ sll %i2,2,%i2
+ sll %i4,2,%i4
+
+ ldd [G1_CONST_TBL+2056],F60_KA2
+ sethi %hi(0x7ffffc00),G5_CONST
+ ldd [G1_CONST_TBL+2064],F58_KA1
+ add G5_CONST,1023,G5_CONST
+ ldd [G1_CONST_TBL+2072],F62_K256ONLN2
+ ld [G1_CONST_TBL+2080],I5_THRESHOLD
+ ld [G1_CONST_TBL+2084],THRESHOLDL
+
+ subcc %i0,8,%i0
+ bneg,pn %icc,.tail
+ fzeros %f3
+
+.main_loop_preload:
+
+! preload 8 elements and get absolute values
+ ld [%i1],%l0 ! (0) Xi = ((int*)px)[0]
+ fzeros %f5
+ ld [%i1],%f16 ! (0) X = px[0]
+ fzeros %f7
+ add %i1,%i2,%o5 ! px += stridex
+ ld [%o5],%l1 ! (1) Xi = ((int*)px)[0]
+ and %l0,G5_CONST,%l0 ! (0) ax = Xi & 0x7fffffff
+ fzeros %f9
+ ld [%o5],%f2 ! (1) X = px[0]
+ fzeros %f11
+ add %o5,%i2,%i1 ! px += stridex
+ ld [%i1],%l2 ! (2) Xi = ((int*)px)[0]
+ and %l1,G5_CONST,%l1 ! (1) ax = Xi & 0x7fffffff
+ fzeros %f13
+ ld [%i1],%f4 ! (2) X = px[0]
+ fzeros %f15
+ add %i1,%i2,%o5 ! px += stridex
+ ld [%o5],%l3 ! (3) Xi = ((int*)px)[0]
+ and %l2,G5_CONST,%l2 ! (2) ax = Xi & 0x7fffffff
+ fzeros %f17
+ ld [%o5],%f6 ! (3) X = px[0]
+ add %o5,%i2,%o0 ! px += stridex
+ ld [%o0],%l4 ! (4) Xi = ((int*)px)[0]
+ and %l3,G5_CONST,%l3 ! (3) ax = Xi & 0x7fffffff
+ add %o0,%i2,%o1 ! px += stridex
+ ld [%o1],%l5 ! (5) Xi = ((int*)px)[0]
+ add %o1,%i2,%o2 ! px += stridex
+ ld [%o2],%l6 ! (6) Xi = ((int*)px)[0]
+ and %l4,G5_CONST,%l4 ! (4) ax = Xi & 0x7fffffff
+ add %o2,%i2,%o3 ! px += stridex
+ ld [%o3],%l7 ! (7) Xi = ((int*)px)[0]
+ add %o3,%i2,%i1 ! px += stridex
+ and %l5,G5_CONST,%l5 ! (5) ax = Xi & 0x7fffffff
+ and %l6,G5_CONST,%l6 ! (6) ax = Xi & 0x7fffffff
+ ba .main_loop
+ and %l7,G5_CONST,%l7 ! (7) ax = Xi & 0x7fffffff
+
+ .align 16
+.main_loop:
+ cmp %l0,I5_THRESHOLD
+ bg,pn %icc,.spec0 ! (0) if (ax > THRESHOLD)
+ lda [%o0]%asi,%f8 ! (4) X = px[0]
+ fstod %f16,%f40 ! (0) y = (double) X
+.spec0_cont:
+ cmp %l1,I5_THRESHOLD
+ bg,pn %icc,.spec1 ! (1) if (ax > THRESHOLD)
+ lda [%o1]%asi,%f10 ! (5) X = px[0]
+ fstod %f2,%f42 ! (1) y = (double) X
+.spec1_cont:
+ cmp %l2,I5_THRESHOLD
+ bg,pn %icc,.spec2 ! (2) if (ax > THRESHOLD)
+ lda [%o2]%asi,%f12 ! (6) X = px[0]
+ fstod %f4,%f44 ! (2) y = (double) X
+.spec2_cont:
+ cmp %l3,I5_THRESHOLD
+ bg,pn %icc,.spec3 ! (3) if (ax > THRESHOLD)
+ lda [%o3]%asi,%f14 ! (7) X = px[0]
+ fstod %f6,%f46 ! (3) y = (double) X
+.spec3_cont:
+ cmp %l4,I5_THRESHOLD
+ bg,pn %icc,.spec4 ! (4) if (ax > THRESHOLD)
+ fmuld F62_K256ONLN2,%f40,%f40 ! (0) y *= K256ONLN2
+ fstod %f8,%f48 ! (4) y = (double) X
+.spec4_cont:
+ cmp %l5,I5_THRESHOLD
+ bg,pn %icc,.spec5 ! (5) if (ax > THRESHOLD)
+ fmuld F62_K256ONLN2,%f42,%f42 ! (1) y *= K256ONLN2
+ fstod %f10,%f50 ! (5) y = (double) X
+.spec5_cont:
+ cmp %l6,I5_THRESHOLD
+ bg,pn %icc,.spec6 ! (6) if (ax > THRESHOLD)
+ fmuld F62_K256ONLN2,%f44,%f44 ! (2) y *= K256ONLN2
+ fstod %f12,%f52 ! (6) y = (double) X
+.spec6_cont:
+ cmp %l7,I5_THRESHOLD
+ bg,pn %icc,.spec7 ! (7) if (ax > THRESHOLD)
+ fmuld F62_K256ONLN2,%f46,%f46 ! (3) y *= K256ONLN2
+ fstod %f14,%f54 ! (7) y = (double) X
+.spec7_cont:
+ fdtoi %f40,%f16 ! (0) k = (int) y
+ st %f16,[%fp+tmp0]
+ fmuld F62_K256ONLN2,%f48,%f48 ! (4) y *= K256ONLN2
+
+ fdtoi %f42,%f2 ! (1) k = (int) y
+ st %f2,[%fp+tmp1]
+ fmuld F62_K256ONLN2,%f50,%f50 ! (5) y *= K256ONLN2
+
+ fdtoi %f44,%f4 ! (2) k = (int) y
+ st %f4,[%fp+tmp2]
+ fmuld F62_K256ONLN2,%f52,%f52 ! (6) y *= K256ONLN2
+
+ fdtoi %f46,%f6 ! (3) k = (int) y
+ st %f6,[%fp+tmp3]
+ fmuld F62_K256ONLN2,%f54,%f54 ! (7) y *= K256ONLN2
+
+ fdtoi %f48,%f8 ! (4) k = (int) y
+ st %f8,[%fp+tmp4]
+
+ fdtoi %f50,%f10 ! (5) k = (int) y
+ st %f10,[%fp+tmp5]
+
+ fitod %f16,%f34 ! (0) dtmp = (double) k
+ fpackfix %f16,%f16 ! (0) ((float*)&drez)[0] = vis_fpackfix(drez)
+ nop
+ nop
+
+ fdtoi %f52,%f12 ! (6) k = (int) y
+ st %f12,[%fp+tmp6]
+
+ fdtoi %f54,%f14 ! (7) k = (int) y
+ st %f14,[%fp+tmp7]
+
+ lda [%i1]%asi,%l0 ! (8) Xi = ((int*)px)[0]
+ add %i1,%i2,%o5 ! px += stridex
+ fitod %f2,%f18 ! (1) dtmp = (double) k
+ fpackfix %f2,%f2 ! (1) ((float*)&drez)[0] = vis_fpackfix(drez)
+
+ lda [%o5]%asi,%l1 ! (9) Xi = ((int*)px)[0]
+ add %o5,%i2,%i1 ! px += stridex
+ fitod %f4,%f20 ! (2) dtmp = (double) k
+ fpackfix %f4,%f4 ! (2) ((float*)&drez)[0] = vis_fpackfix(drez)
+
+ lda [%i1]%asi,%l2 ! (10) Xi = ((int*)px)[0]
+ add %i1,%i2,%o5 ! px += stridex
+ fitod %f6,%f22 ! (3) dtmp = (double) k
+ fpackfix %f6,%f6 ! (3) ((float*)&drez)[0] = vis_fpackfix(drez)
+
+ lda [%o5]%asi,%l3 ! (11) Xi = ((int*)px)[0]
+ add %o5,%i2,%i1 ! px += stridex
+ fitod %f8,%f24 ! (4) dtmp = (double) k
+ fpackfix %f8,%f8 ! (4) ((float*)&drez)[0] = vis_fpackfix(drez)
+
+ fitod %f10,%f26 ! (5) dtmp = (double) k
+ fpackfix %f10,%f10 ! (5) ((float*)&drez)[0] = vis_fpackfix(drez)
+
+ fitod %f12,%f28 ! (6) dtmp = (double) k
+ fpackfix %f12,%f12 ! (6) ((float*)&drez)[0] = vis_fpackfix(drez)
+
+ fitod %f14,%f30 ! (7) dtmp = (double) k
+ fpackfix %f14,%f14 ! (7) ((float*)&drez)[0] = vis_fpackfix(drez)
+
+ ld [%fp+tmp0],%o0 ! (0) load k
+ and %l0,G5_CONST,%l0 ! (8) ax = Xi & 0x7fffffff
+ fsubd %f40,%f34,%f40 ! (0) y -= dtmp
+
+ ld [%fp+tmp1],%o1 ! (1) load k
+ and %l1,G5_CONST,%l1 ! (9) ax = Xi & 0x7fffffff
+ fsubd %f42,%f18,%f42 ! (1) y -= dtmp
+
+ ld [%fp+tmp2],%o2 ! (2) load k
+ and %l2,G5_CONST,%l2 ! (10) ax = Xi & 0x7fffffff
+ and %o0,255,%o0 ! (0) k &= 255
+ fsubd %f44,%f20,%f44 ! (2) y -= dtmp
+
+ ld [%fp+tmp3],%o3 ! (3) load k
+ and %o1,255,%o1 ! (1) k &= 255
+ fsubd %f46,%f22,%f46 ! (3) y -= dtmp
+
+ sll %o0,3,%o0 ! (0) k <<= 3
+ sll %o1,3,%o1 ! (1) k <<= 3
+ fmuld F60_KA2,%f40,%f34 ! (0) dtmp = y * KA2
+ fsubd %f48,%f24,%f48 ! (4) y -= dtmp
+
+ and %l3,G5_CONST,%l3 ! (11) ax = Xi & 0x7fffffff
+ and %o2,255,%o2 ! (2) k &= 255
+ fmuld F60_KA2,%f42,%f18 ! (1) dtmp = y * KA2
+ fsubd %f50,%f26,%f50 ! (5) y -= dtmp
+
+ sll %o2,3,%o2 ! (2) k <<= 3
+ fmuld F60_KA2,%f44,%f20 ! (2) dtmp = y * KA2
+ fsubd %f52,%f28,%f52 ! (6) y -= dtmp
+
+ ld [%fp+tmp4],%o4 ! (4) load k
+ and %o3,255,%o3 ! (3) k &= 255
+ fmuld F60_KA2,%f46,%f22 ! (3) dtmp = y * KA2
+ fsubd %f54,%f30,%f54 ! (7) y -= dtmp
+
+ ld [%fp+tmp5],%o5 ! (5) load k
+ sll %o3,3,%o3 ! (3) k <<= 3
+ fmuld F60_KA2,%f48,%f24 ! (4) dtmp = y * KA2
+ faddd F58_KA1,%f34,%f34 ! (0) dtmp += KA1
+
+ ld [%fp+tmp6],%o7 ! (6) load k
+ and %o4,255,%o4 ! (4) k &= 255
+ fmuld F60_KA2,%f50,%f26 ! (5) dtmp = y * KA2
+ faddd F58_KA1,%f18,%f18 ! (1) dtmp += KA1
+
+ ld [%fp+tmp7],%l4 ! (7) load k
+ and %o5,255,%o5 ! (5) k &= 255
+ fmuld F60_KA2,%f52,%f28 ! (6) dtmp = y * KA2
+ faddd F58_KA1,%f20,%f20 ! (2) dtmp += KA1
+
+ sll %o5,3,%o5 ! (5) k <<= 3
+ fmuld F60_KA2,%f54,%f30 ! (7) dtmp = y * KA2
+ faddd F58_KA1,%f22,%f22 ! (3) dtmp += KA1
+
+ fmuld %f34,%f40,%f40 ! (0) y *= dtmp
+ ldd [G1_CONST_TBL+%o0],%f34 ! (0) dtmp = ((double*)(CONST_TBL + k))[0]
+ and %l4,255,%l4 ! (7) k &= 255
+ faddd F58_KA1,%f24,%f24 ! (4) dtmp += KA1
+
+ fmuld %f18,%f42,%f42 ! (1) y *= dtmp
+ ldd [G1_CONST_TBL+%o1],%f18 ! (1) dtmp = ((double*)(CONST_TBL + k))[0]
+ sll %l4,3,%l4 ! (7) k <<= 3
+ faddd F58_KA1,%f26,%f26 ! (5) dtmp += KA1
+
+ fmuld %f20,%f44,%f44 ! (2) y *= dtmp
+ ldd [G1_CONST_TBL+%o2],%f20 ! (2) dtmp = ((double*)(CONST_TBL + k))[0]
+ faddd F58_KA1,%f28,%f28 ! (6) dtmp += KA1
+
+ fmuld %f22,%f46,%f46 ! (3) y *= dtmp
+ ldd [G1_CONST_TBL+%o3],%f22 ! (3) dtmp = ((double*)(CONST_TBL + k))[0]
+ sll %o4,3,%o4 ! (4) k <<= 3
+ faddd F58_KA1,%f30,%f30 ! (7) dtmp += KA1
+
+ fmuld %f24,%f48,%f48 ! (4) y *= dtmp
+ ldd [G1_CONST_TBL+%o4],%f24 ! (4) dtmp = ((double*)(CONST_TBL + k))[0]
+ and %o7,255,%o7 ! (6) k &= 255
+ fpadd32 %f16,%f34,%f34 ! (0) drez = vis_fpadd32(drez,dtmp)
+
+ fmuld %f26,%f50,%f50 ! (5) y *= dtmp
+ ldd [G1_CONST_TBL+%o5],%f26 ! (5) dtmp = ((double*)(CONST_TBL + k))[0]
+ sll %o7,3,%o7 ! (6) k <<= 3
+ fpadd32 %f2,%f18,%f18 ! (1) drez = vis_fpadd32(drez,dtmp)
+
+ fmuld %f28,%f52,%f52 ! (6) y *= dtmp
+ ldd [G1_CONST_TBL+%o7],%f28 ! (6) dtmp = ((double*)(CONST_TBL + k))[0]
+ sll %i2,2,%o0
+ fpadd32 %f4,%f20,%f20 ! (2) drez = vis_fpadd32(drez,dtmp)
+
+ fmuld %f30,%f54,%f54 ! (7) y *= dtmp
+ ldd [G1_CONST_TBL+%l4],%f30 ! (7) dtmp = ((double*)(CONST_TBL + k))[0]
+ sub %i1,%o0,%o0
+ fpadd32 %f6,%f22,%f22 ! (3) drez = vis_fpadd32(drez,dtmp)
+
+ lda [%i1]%asi,%l4 ! (12) Xi = ((int*)px)[0]
+ add %i1,%i2,%o1 ! px += stridex
+ fpadd32 %f8,%f24,%f24 ! (4) drez = vis_fpadd32(drez,dtmp)
+ fmuld %f34,%f40,%f40 ! (0) y *= drez
+
+ lda [%o1]%asi,%l5 ! (13) Xi = ((int*)px)[0]
+ add %o1,%i2,%o2 ! px += stridex
+ fpadd32 %f10,%f26,%f26 ! (5) drez = vis_fpadd32(drez,dtmp)
+ fmuld %f18,%f42,%f42 ! (1) y *= drez
+
+ lda [%o2]%asi,%l6 ! (14) Xi = ((int*)px)[0]
+ add %o2,%i2,%o3 ! px += stridex
+ fpadd32 %f12,%f28,%f28 ! (6) drez = vis_fpadd32(drez,dtmp)
+ fmuld %f20,%f44,%f44 ! (2) y *= drez
+
+ lda [%o3]%asi,%l7 ! (15) Xi = ((int*)px)[0]
+ add %o3,%i2,%i1 ! px += stridex
+ fpadd32 %f14,%f30,%f30 ! (7) drez = vis_fpadd32(drez,dtmp)
+ fmuld %f22,%f46,%f46 ! (3) y *= drez
+
+ lda [%o0]%asi,%f16 ! (8) X = px[0]
+ add %o0,%i2,%o5
+ fmuld %f24,%f48,%f48 ! (4) y *= drez
+ faddd %f34,%f40,%f40 ! (0) y += drez
+
+ lda [%o5]%asi,%f2 ! (9) X = px[0]
+ add %o5,%i2,%o0
+ fmuld %f26,%f50,%f50 ! (5) y *= drez
+ faddd %f18,%f42,%f42 ! (1) y += drez
+
+ lda [%o0]%asi,%f4 ! (10) X = px[0]
+ add %o0,%i2,%o5
+ fmuld %f28,%f52,%f52 ! (6) y *= drez
+ faddd %f20,%f44,%f44 ! (2) y += drez
+
+ lda [%o5]%asi,%f6 ! (11) X = px[0]
+ add %o5,%i2,%o0
+ fmuld %f30,%f54,%f54 ! (7) y *= drez
+ faddd %f22,%f46,%f46 ! (3) y += drez
+
+ and %l4,G5_CONST,%l4 ! (12) ax = Xi & 0x7fffffff
+ faddd %f24,%f48,%f48 ! (4) y += drez
+
+ and %l5,G5_CONST,%l5 ! (13) ax = Xi & 0x7fffffff
+ faddd %f26,%f50,%f50 ! (5) y += drez
+
+ and %l6,G5_CONST,%l6 ! (14) ax = Xi & 0x7fffffff
+ faddd %f28,%f52,%f52 ! (6) y += drez
+
+ and %l7,G5_CONST,%l7 ! (15) ax = Xi & 0x7fffffff
+ faddd %f30,%f54,%f54 ! (7) y += drez
+
+ fdtos %f40,%f26 ! (0) (float) y
+ st %f26,[%i3]
+ add %i3,%i4,%o4 ! py += stridey
+
+ fdtos %f42,%f18 ! (1) (float) y
+ st %f18,[%o4]
+ add %o4,%i4,%i3 ! py += stridey
+
+ fdtos %f44,%f20 ! (2) (float) y
+ st %f20,[%i3]
+ add %i3,%i4,%o4 ! py += stridey
+
+ fdtos %f46,%f22 ! (3) (float) y
+ st %f22,[%o4]
+ add %o4,%i4,%i3 ! py += stridey
+
+ fdtos %f48,%f24 ! (4) (float) y
+ st %f24,[%i3]
+ subcc %i0,8,%i0
+ add %i3,%i4,%o4 ! py += stridey
+
+ fdtos %f50,%f26 ! (5) (float) y
+ st %f26,[%o4]
+ add %o4,%i4,%o5 ! py += stridey
+ add %i4,%i4,%o7
+
+ fdtos %f52,%f28 ! (6) (float) y
+ st %f28,[%o5]
+ add %o5,%i4,%o4 ! py += stridey
+ add %o5,%o7,%i3 ! py += stridey
+
+ fdtos %f54,%f30 ! (7) (float) y
+ st %f30,[%o4]
+ bpos,pt %icc,.main_loop
+ nop
+.after_main_loop:
+ sll %i2,3,%o2
+ sub %i1,%o2,%i1
+
+.tail:
+ add %i0,8,%i0
+ subcc %i0,1,%i0
+ bneg,pn %icc,.exit
+
+ ld [%i1],%l0
+ ld [%i1],%f2
+ add %i1,%i2,%i1
+
+.tail_loop:
+ and %l0,G5_CONST,%l1
+ cmp %l1,I5_THRESHOLD
+ bg,pn %icc,.tail_spec
+ nop
+.tail_spec_cont:
+ fstod %f2,%f40
+ fmuld F62_K256ONLN2,%f40,%f40
+ fdtoi %f40,%f2
+ st %f2,[%fp+tmp0]
+ fitod %f2,%f16
+ fpackfix %f2,%f2
+ fsubd %f40,%f16,%f40
+ fmuld F60_KA2,%f40,%f16
+ faddd F58_KA1,%f16,%f16
+ ld [%fp+tmp0],%o0
+ fmuld %f16,%f40,%f40
+ and %o0,255,%o0
+ sll %o0,3,%o0
+ ldd [G1_CONST_TBL+%o0],%f16
+ fpadd32 %f2,%f16,%f16
+ lda [%i1]%asi,%l0
+ fmuld %f16,%f40,%f40
+ lda [%i1]%asi,%f2
+ faddd %f16,%f40,%f40
+ add %i1,%i2,%i1
+ fdtos %f40,%f16
+ st %f16,[%i3]
+ add %i3,%i4,%i3
+ subcc %i0,1,%i0
+ bpos,pt %icc,.tail_loop
+ nop
+
+.exit:
+ ret
+ restore
+
+.tail_spec:
+ sethi %hi(0x7f800000),%o4
+ cmp %l1,%o4
+ bl,pt %icc,.tail_spec_out_of_range
+ nop
+
+ srl %l0,29,%l0
+ ble,pn %icc,.tail_spec_inf
+ andcc %l0,4,%g0
+
+! NaN -> NaN
+
+ fmuls %f2,%f2,%f2
+ ba .tail_spec_exit
+ st %f2,[%i3]
+
+.tail_spec_inf:
+ be,a,pn %icc,.tail_spec_exit
+ st %f2,[%i3]
+
+ ba .tail_spec_exit
+ st %f3,[%i3]
+
+.tail_spec_out_of_range:
+ fcmpes %fcc0,%f2,%f3
+ fcmpes %fcc1,%f2,THRESHOLDL
+ fbl,pn %fcc0,1f ! if ( X < 0.0f )
+ nop
+ fbl,pt %fcc1,.tail_spec_cont ! if ( X < THRESHOLDL )
+ nop
+1:
+ srl %l0,29,%l0
+ and %l0,4,%l0
+ add %l0,2048,%l0
+ ld [G1_CONST_TBL+%l0],%f2
+ fmuls %f2,%f2,%f2
+ st %f2,[%i3]
+
+.tail_spec_exit:
+ lda [%i1]%asi,%l0
+ lda [%i1]%asi,%f2
+ add %i1,%i2,%i1
+
+ subcc %i0,1,%i0
+ bpos,pt %icc,.tail_loop
+ add %i3,%i4,%i3
+ ba .exit
+ nop
+
+ .align 16
+.spec0:
+ sethi %hi(0x7f800000),%o5
+ cmp %l0,%o5
+ bl,pt %icc,.spec0_out_of_range
+ sll %i2,3,%o4
+
+ ble,pn %icc,.spec0_inf
+ sub %i1,%o4,%o4
+
+! NaN -> NaN
+
+ fmuls %f16,%f16,%f16
+ ba .spec0_exit
+ st %f16,[%i3]
+
+.spec0_inf:
+ ld [%o4],%l0
+ srl %l0,29,%l0
+ andcc %l0,4,%l0
+ be,a,pn %icc,.spec0_exit
+ st %f16,[%i3]
+
+ ba .spec0_exit
+ st %f3,[%i3]
+
+.spec0_out_of_range:
+ fcmpes %fcc0,%f16,%f3
+ fcmpes %fcc1,%f16,THRESHOLDL
+ fbl,a,pn %fcc0,1f ! if ( X < 0.0f )
+ fstod %f16,%f40 ! (0) y = (double) X
+ fbl,a,pt %fcc1,.spec0_cont ! if ( X < THRESHOLDL )
+ fstod %f16,%f40 ! (0) y = (double) X
+1:
+ sub %i1,%o4,%o4
+ ld [%o4],%l0
+ srl %l0,29,%l0
+ and %l0,4,%l0
+ add %l0,2048,%l0
+ ld [G1_CONST_TBL+%l0],%f16
+ fmuls %f16,%f16,%f16
+ st %f16,[%i3]
+
+.spec0_exit:
+ fmovs %f2,%f16
+ mov %l1,%l0
+ fmovs %f4,%f2
+ mov %l2,%l1
+ fmovs %f6,%f4
+ mov %l3,%l2
+ fmovs %f8,%f6
+ mov %l4,%l3
+ mov %l5,%l4
+ mov %l6,%l5
+ mov %l7,%l6
+ lda [%i1]%asi,%l7
+ add %i1,%i2,%i1
+ mov %o1,%o0
+ mov %o2,%o1
+ mov %o3,%o2
+ and %l7,G5_CONST,%l7
+ add %o2,%i2,%o3
+
+ subcc %i0,1,%i0
+ bpos,pt %icc,.main_loop
+ add %i3,%i4,%i3
+ ba .after_main_loop
+ nop
+
+ .align 16
+.spec1:
+ sethi %hi(0x7f800000),%o5
+ cmp %l1,%o5
+ bge,pn %icc,1f
+ nop
+ fcmpes %fcc0,%f2,%f3
+ fcmpes %fcc1,%f2,THRESHOLDL
+ fbl,a,pn %fcc0,1f ! if ( X < 0.0f )
+ fstod %f2,%f42 ! (1) y = (double) X
+ fbl,a,pt %fcc1,.spec1_cont ! if ( X < THRESHOLDL )
+ fstod %f2,%f42 ! (1) y = (double) X
+1:
+ fmuld F62_K256ONLN2,%f40,%f40
+ fdtoi %f40,%f16
+ st %f16,[%fp+tmp0]
+ fitod %f16,%f34
+ fpackfix %f16,%f16
+ fsubd %f40,%f34,%f40
+ fmuld F60_KA2,%f40,%f34
+ faddd F58_KA1,%f34,%f34
+ ld [%fp+tmp0],%o0
+ fmuld %f34,%f40,%f40
+ and %o0,255,%o0
+ sll %o0,3,%o0
+ ldd [G1_CONST_TBL+%o0],%f34
+ fpadd32 %f16,%f34,%f34
+ fmuld %f34,%f40,%f40
+ faddd %f34,%f40,%f40
+ fdtos %f40,%f26
+ st %f26,[%i3]
+ add %i3,%i4,%i3
+
+ cmp %l1,%o5
+ bl,pt %icc,.spec1_out_of_range
+ sll %i2,3,%o4
+
+ ble,pn %icc,.spec1_inf
+ sub %i1,%o4,%o4
+
+! NaN -> NaN
+
+ fmuls %f2,%f2,%f2
+ ba .spec1_exit
+ st %f2,[%i3]
+
+.spec1_inf:
+ add %o4,%i2,%o4
+ ld [%o4],%l0
+ srl %l0,29,%l0
+ andcc %l0,4,%l0
+ be,a,pn %icc,.spec1_exit
+ st %f2,[%i3]
+
+ ba .spec1_exit
+ st %f3,[%i3]
+
+.spec1_out_of_range:
+ sub %i1,%o4,%o4
+ add %o4,%i2,%o4
+ ld [%o4],%l0
+ srl %l0,29,%l0
+ and %l0,4,%l0
+ add %l0,2048,%l0
+ ld [G1_CONST_TBL+%l0],%f2
+ fmuls %f2,%f2,%f2
+ st %f2,[%i3]
+
+.spec1_exit:
+ fmovs %f4,%f16
+ mov %l2,%l0
+ fmovs %f6,%f2
+ mov %l3,%l1
+ fmovs %f8,%f4
+ mov %l4,%l2
+ fmovs %f10,%f6
+ mov %l5,%l3
+ mov %l6,%l4
+ mov %l7,%l5
+ lda [%i1]%asi,%l6
+ add %i1,%i2,%i1
+ lda [%i1]%asi,%l7
+ add %i1,%i2,%i1
+ and %l6,G5_CONST,%l6
+ and %l7,G5_CONST,%l7
+ mov %o2,%o0
+ mov %o3,%o1
+ add %o1,%i2,%o2
+ add %o2,%i2,%o3
+
+ subcc %i0,2,%i0
+ bpos,pt %icc,.main_loop
+ add %i3,%i4,%i3
+ ba .after_main_loop
+ nop
+
+ .align 16
+.spec2:
+ sethi %hi(0x7f800000),%o5
+ cmp %l2,%o5
+ bge,pn %icc,1f
+ nop
+ fcmpes %fcc0,%f4,%f3
+ fcmpes %fcc1,%f4,THRESHOLDL
+ fbl,a,pn %fcc0,1f ! if ( X < 0.0f )
+ fstod %f4,%f44 ! (2) y = (double) X
+ fbl,a,pt %fcc1,.spec2_cont ! if ( X < THRESHOLDL )
+ fstod %f4,%f44 ! (2) y = (double) X
+1:
+ fmuld F62_K256ONLN2,%f40,%f40
+
+ fmuld F62_K256ONLN2,%f42,%f42
+
+ fdtoi %f40,%f16
+ st %f16,[%fp+tmp0]
+
+ fdtoi %f42,%f2
+ st %f2,[%fp+tmp1]
+
+ fitod %f16,%f34
+ fpackfix %f16,%f16
+
+ fitod %f2,%f18
+ fpackfix %f2,%f2
+
+ fsubd %f40,%f34,%f40
+
+ fsubd %f42,%f18,%f42
+
+ fmuld F60_KA2,%f40,%f34
+
+ fmuld F60_KA2,%f42,%f18
+
+ faddd F58_KA1,%f34,%f34
+
+ faddd F58_KA1,%f18,%f18
+
+ ld [%fp+tmp0],%o0
+ fmuld %f34,%f40,%f40
+
+ ld [%fp+tmp1],%o1
+ fmuld %f18,%f42,%f42
+
+ and %o0,255,%o0
+
+ and %o1,255,%o1
+
+ sll %o0,3,%o0
+
+ sll %o1,3,%o1
+
+ ldd [G1_CONST_TBL+%o0],%f34
+
+ ldd [G1_CONST_TBL+%o1],%f18
+
+ fpadd32 %f16,%f34,%f34
+
+ fpadd32 %f2,%f18,%f18
+
+ fmuld %f34,%f40,%f40
+
+ fmuld %f18,%f42,%f42
+
+ faddd %f34,%f40,%f40
+
+ faddd %f18,%f42,%f42
+
+ fdtos %f40,%f26
+ st %f26,[%i3]
+ add %i3,%i4,%o4
+
+ fdtos %f42,%f18
+ st %f18,[%o4]
+ add %o4,%i4,%i3
+
+ cmp %l2,%o5
+ sll %i2,1,%o5
+ bl,pt %icc,.spec2_out_of_range
+ sll %i2,2,%o4
+
+ ble,pn %icc,.spec2_inf
+ add %o4,%o5,%o4
+
+! NaN -> NaN
+
+ fmuls %f4,%f4,%f4
+ ba .spec2_exit
+ st %f4,[%i3]
+
+.spec2_inf:
+ sub %i1,%o4,%o4
+ ld [%o4],%l0
+ srl %l0,29,%l0
+ andcc %l0,4,%l0
+ be,a,pn %icc,.spec2_exit
+ st %f4,[%i3]
+
+ ba .spec2_exit
+ st %f3,[%i3]
+
+.spec2_out_of_range:
+ add %o4,%o5,%o4
+ sub %i1,%o4,%o4
+ ld [%o4],%l0
+ srl %l0,29,%l0
+ and %l0,4,%l0
+ add %l0,2048,%l0
+ ld [G1_CONST_TBL+%l0],%f2
+ fmuls %f2,%f2,%f2
+ st %f2,[%i3]
+
+.spec2_exit:
+ fmovs %f6,%f16
+ mov %l3,%l0
+ mov %o3,%o0
+ fmovs %f8,%f2
+ mov %l4,%l1
+ add %o0,%i2,%o1
+ fmovs %f10,%f4
+ mov %l5,%l2
+ add %o1,%i2,%o2
+ fmovs %f12,%f6
+ mov %l6,%l3
+ mov %l7,%l4
+ lda [%i1]%asi,%l5
+ add %i1,%i2,%i1
+ add %o2,%i2,%o3
+ lda [%i1]%asi,%l6
+ add %i1,%i2,%i1
+ lda [%i1]%asi,%l7
+ add %i1,%i2,%i1
+ and %l5,G5_CONST,%l5
+ and %l6,G5_CONST,%l6
+ and %l7,G5_CONST,%l7
+
+ subcc %i0,3,%i0
+ bpos,pt %icc,.main_loop
+ add %i3,%i4,%i3
+ ba .after_main_loop
+ nop
+.spec3:
+ sethi %hi(0x7f800000),%o5
+ cmp %l3,%o5
+ bge,pn %icc,1f
+ nop
+ fcmpes %fcc0,%f6,%f3
+ fcmpes %fcc1,%f6,THRESHOLDL
+ fbl,a,pn %fcc0,1f ! if ( X < 0.0f )
+ fstod %f6,%f46 ! (3) y = (double) X
+ fbl,a,pt %fcc1,.spec3_cont ! if ( X < THRESHOLDL )
+ fstod %f6,%f46 ! (3) y = (double) X
+1:
+ fmuld F62_K256ONLN2,%f40,%f40
+
+ fmuld F62_K256ONLN2,%f42,%f42
+
+ fmuld F62_K256ONLN2,%f44,%f44
+
+ fdtoi %f40,%f16
+ st %f16,[%fp+tmp0]
+
+ fdtoi %f42,%f2
+ st %f2,[%fp+tmp1]
+
+ fdtoi %f44,%f4
+ st %f4,[%fp+tmp2]
+
+ fitod %f16,%f34
+ fpackfix %f16,%f16
+
+ fitod %f2,%f18
+ fpackfix %f2,%f2
+
+ fitod %f4,%f20
+ fpackfix %f4,%f4
+
+ fsubd %f40,%f34,%f40
+
+ fsubd %f42,%f18,%f42
+
+ fsubd %f44,%f20,%f44
+
+ fmuld F60_KA2,%f40,%f34
+
+ fmuld F60_KA2,%f42,%f18
+
+ fmuld F60_KA2,%f44,%f20
+
+ faddd F58_KA1,%f34,%f34
+
+ faddd F58_KA1,%f18,%f18
+
+ faddd F58_KA1,%f20,%f20
+
+ ld [%fp+tmp0],%o0
+ fmuld %f34,%f40,%f40
+
+ ld [%fp+tmp1],%o1
+ fmuld %f18,%f42,%f42
+
+ ld [%fp+tmp2],%o2
+ fmuld %f20,%f44,%f44
+
+ and %o0,255,%o0
+ and %o1,255,%o1
+
+ and %o2,255,%o2
+ sll %o0,3,%o0
+
+ sll %o1,3,%o1
+ sll %o2,3,%o2
+
+ ldd [G1_CONST_TBL+%o0],%f34
+
+ ldd [G1_CONST_TBL+%o1],%f18
+
+ ldd [G1_CONST_TBL+%o2],%f20
+
+ fpadd32 %f16,%f34,%f34
+
+ fpadd32 %f2,%f18,%f18
+
+ fpadd32 %f4,%f20,%f20
+
+ fmuld %f34,%f40,%f40
+
+ fmuld %f18,%f42,%f42
+
+ fmuld %f20,%f44,%f44
+
+ faddd %f34,%f40,%f40
+
+ faddd %f18,%f42,%f42
+
+ faddd %f20,%f44,%f44
+
+ fdtos %f40,%f26
+ st %f26,[%i3]
+ add %i3,%i4,%o4
+
+ fdtos %f42,%f18
+ st %f18,[%o4]
+ add %o4,%i4,%i3
+
+ fdtos %f44,%f20
+ st %f20,[%i3]
+ add %i3,%i4,%i3
+
+ cmp %l3,%o5
+ bl,pt %icc,.spec3_out_of_range
+ sll %i2,2,%o4
+
+ ble,pn %icc,.spec3_inf
+ add %o4,%i2,%o4
+
+! NaN -> NaN
+
+ fmuls %f6,%f6,%f6
+ ba .spec3_exit
+ st %f6,[%i3]
+
+.spec3_inf:
+ sub %i1,%o4,%o4
+ ld [%o4],%l0
+ srl %l0,29,%l0
+ andcc %l0,4,%l0
+ be,a,pn %icc,.spec3_exit
+ st %f6,[%i3]
+
+ ba .spec3_exit
+ st %f3,[%i3]
+
+.spec3_out_of_range:
+ add %o4,%i2,%o4
+ sub %i1,%o4,%o4
+ ld [%o4],%l0
+ srl %l0,29,%l0
+ and %l0,4,%l0
+ add %l0,2048,%l0
+ ld [G1_CONST_TBL+%l0],%f2
+ fmuls %f2,%f2,%f2
+ st %f2,[%i3]
+
+.spec3_exit:
+ fmovs %f8,%f16
+ mov %l4,%l0
+ fmovs %f10,%f2
+ mov %l5,%l1
+ fmovs %f12,%f4
+ mov %l6,%l2
+ fmovs %f14,%f6
+ mov %l7,%l3
+ mov %i1,%o0
+ lda [%o0]%asi,%l4
+ add %o0,%i2,%o1
+ lda [%o1]%asi,%l5
+ add %o1,%i2,%o2
+ lda [%o2]%asi,%l6
+ add %o2,%i2,%o3
+ lda [%o3]%asi,%l7
+ add %o3,%i2,%i1
+ and %l4,G5_CONST,%l4
+ and %l5,G5_CONST,%l5
+ and %l6,G5_CONST,%l6
+ and %l7,G5_CONST,%l7
+
+ subcc %i0,4,%i0
+ bpos,pt %icc,.main_loop
+ add %i3,%i4,%i3
+ ba .after_main_loop
+ nop
+
+ .align 16
+.spec4:
+ sethi %hi(0x7f800000),%o5
+ cmp %l4,%o5
+ bge,pn %icc,1f
+ nop
+ fcmpes %fcc0,%f8,%f3
+ fcmpes %fcc1,%f8,THRESHOLDL
+ fbl,a,pn %fcc0,1f ! if ( X < 0.0f )
+ fstod %f8,%f48 ! (4) y = (double) X
+ fbl,a,pt %fcc1,.spec4_cont ! if ( X < THRESHOLDL )
+ fstod %f8,%f48 ! (4) y = (double) X
+1:
+ fmuld F62_K256ONLN2,%f42,%f42
+
+ fmuld F62_K256ONLN2,%f44,%f44
+
+ fmuld F62_K256ONLN2,%f46,%f46
+
+ fdtoi %f40,%f16
+ st %f16,[%fp+tmp0]
+
+ fdtoi %f42,%f2
+ st %f2,[%fp+tmp1]
+
+ fdtoi %f44,%f4
+ st %f4,[%fp+tmp2]
+
+ fdtoi %f46,%f6
+ st %f6,[%fp+tmp3]
+
+ fitod %f16,%f34
+ fpackfix %f16,%f16
+
+ fitod %f2,%f18
+ fpackfix %f2,%f2
+
+ fitod %f4,%f20
+ fpackfix %f4,%f4
+
+ fitod %f6,%f22
+ fpackfix %f6,%f6
+
+ fsubd %f40,%f34,%f40
+
+ fsubd %f42,%f18,%f42
+
+ fsubd %f44,%f20,%f44
+
+ fsubd %f46,%f22,%f46
+
+ fmuld F60_KA2,%f40,%f34
+
+ fmuld F60_KA2,%f42,%f18
+
+ fmuld F60_KA2,%f44,%f20
+
+ fmuld F60_KA2,%f46,%f22
+
+ faddd F58_KA1,%f34,%f34
+
+ faddd F58_KA1,%f18,%f18
+
+ faddd F58_KA1,%f20,%f20
+
+ faddd F58_KA1,%f22,%f22
+
+ ld [%fp+tmp0],%o0
+ fmuld %f34,%f40,%f40
+
+ ld [%fp+tmp1],%o1
+ fmuld %f18,%f42,%f42
+
+ ld [%fp+tmp2],%o2
+ fmuld %f20,%f44,%f44
+
+ ld [%fp+tmp3],%o3
+ fmuld %f22,%f46,%f46
+
+ and %o0,255,%o0
+ and %o1,255,%o1
+
+ and %o2,255,%o2
+ and %o3,255,%o3
+
+ sll %o0,3,%o0
+ sll %o1,3,%o1
+
+ sll %o2,3,%o2
+ sll %o3,3,%o3
+
+ ldd [G1_CONST_TBL+%o0],%f34
+
+ ldd [G1_CONST_TBL+%o1],%f18
+
+ ldd [G1_CONST_TBL+%o2],%f20
+
+ ldd [G1_CONST_TBL+%o3],%f22
+
+ fpadd32 %f16,%f34,%f34
+
+ fpadd32 %f2,%f18,%f18
+
+ fpadd32 %f4,%f20,%f20
+
+ fpadd32 %f6,%f22,%f22
+
+ fmuld %f34,%f40,%f40
+
+ fmuld %f18,%f42,%f42
+
+ fmuld %f20,%f44,%f44
+
+ fmuld %f22,%f46,%f46
+
+ faddd %f34,%f40,%f40
+
+ faddd %f18,%f42,%f42
+
+ faddd %f20,%f44,%f44
+
+ faddd %f22,%f46,%f46
+
+ fdtos %f40,%f26
+ st %f26,[%i3]
+ add %i3,%i4,%o4
+
+ fdtos %f42,%f18
+ st %f18,[%o4]
+ add %o4,%i4,%i3
+
+ fdtos %f44,%f20
+ st %f20,[%i3]
+ add %i3,%i4,%o4
+
+ fdtos %f46,%f22
+ st %f22,[%o4]
+ add %o4,%i4,%i3
+
+ cmp %l4,%o5
+ bl,pt %icc,.spec4_out_of_range
+ sll %i2,2,%o4
+
+ ble,pn %icc,.spec4_inf
+ sub %i1,%o4,%o4
+
+! NaN -> NaN
+
+ fmuls %f8,%f8,%f8
+ ba .spec4_exit
+ st %f8,[%i3]
+
+.spec4_inf:
+ ld [%o4],%l0
+ srl %l0,29,%l0
+ andcc %l0,4,%l0
+ be,a,pn %icc,.spec4_exit
+ st %f8,[%i3]
+
+ ba .spec4_exit
+ st %f3,[%i3]
+
+.spec4_out_of_range:
+ sub %i1,%o4,%o4
+ ld [%o4],%l0
+ srl %l0,29,%l0
+ and %l0,4,%l0
+ add %l0,2048,%l0
+ ld [G1_CONST_TBL+%l0],%f2
+ fmuls %f2,%f2,%f2
+ st %f2,[%i3]
+
+.spec4_exit:
+ fmovs %f10,%f16
+ mov %l5,%l0
+ fmovs %f12,%f2
+ mov %l6,%l1
+ fmovs %f14,%f4
+ mov %l7,%l2
+ lda [%i1]%asi,%l3
+ lda [%i1]%asi,%f6
+ add %i1,%i2,%o0
+ lda [%o0]%asi,%l4
+ add %o0,%i2,%o1
+ lda [%o1]%asi,%l5
+ add %o1,%i2,%o2
+ lda [%o2]%asi,%l6
+ add %o2,%i2,%o3
+ lda [%o3]%asi,%l7
+ add %o3,%i2,%i1
+ and %l3,G5_CONST,%l3
+ and %l4,G5_CONST,%l4
+ and %l5,G5_CONST,%l5
+ and %l6,G5_CONST,%l6
+ and %l7,G5_CONST,%l7
+
+ subcc %i0,5,%i0
+ bpos,pt %icc,.main_loop
+ add %i3,%i4,%i3
+ ba .after_main_loop
+ nop
+
+ .align 16
+.spec5:
+ sethi %hi(0x7f800000),%o5
+ cmp %l5,%o5
+ bge,pn %icc,1f
+ nop
+ fcmpes %fcc0,%f10,%f3
+ fcmpes %fcc1,%f10,THRESHOLDL
+ fbl,a,pn %fcc0,1f ! if ( X < 0.0f )
+ fstod %f10,%f50 ! (5) y = (double) X
+ fbl,a,pt %fcc1,.spec5_cont ! if ( X < THRESHOLDL )
+ fstod %f10,%f50 ! (5) y = (double) X
+1:
+ fmuld F62_K256ONLN2,%f44,%f44
+
+ fmuld F62_K256ONLN2,%f46,%f46
+
+ fdtoi %f40,%f16
+ st %f16,[%fp+tmp0]
+ fmuld F62_K256ONLN2,%f48,%f48
+
+ fdtoi %f42,%f2
+ st %f2,[%fp+tmp1]
+
+ fdtoi %f44,%f4
+ st %f4,[%fp+tmp2]
+
+ fdtoi %f46,%f6
+ st %f6,[%fp+tmp3]
+
+ fdtoi %f48,%f8
+ st %f8,[%fp+tmp4]
+
+ fitod %f16,%f34
+ fpackfix %f16,%f16
+
+ fitod %f2,%f18
+ fpackfix %f2,%f2
+
+ fitod %f4,%f20
+ fpackfix %f4,%f4
+
+ fitod %f6,%f22
+ fpackfix %f6,%f6
+
+ fitod %f8,%f24
+ fpackfix %f8,%f8
+
+ ld [%fp+tmp0],%o0
+ fsubd %f40,%f34,%f40
+
+ ld [%fp+tmp1],%o1
+ fsubd %f42,%f18,%f42
+
+ ld [%fp+tmp2],%o2
+ and %o0,255,%o0
+ fsubd %f44,%f20,%f44
+
+ ld [%fp+tmp3],%o3
+ and %o1,255,%o1
+ fsubd %f46,%f22,%f46
+
+ sll %o0,3,%o0
+ sll %o1,3,%o1
+ fmuld F60_KA2,%f40,%f34
+ fsubd %f48,%f24,%f48
+
+ and %o2,255,%o2
+ fmuld F60_KA2,%f42,%f18
+
+ sll %o2,3,%o2
+ fmuld F60_KA2,%f44,%f20
+
+ ld [%fp+tmp4],%o4
+ and %o3,255,%o3
+ fmuld F60_KA2,%f46,%f22
+
+ sll %o3,3,%o3
+ fmuld F60_KA2,%f48,%f24
+ faddd F58_KA1,%f34,%f34
+
+ and %o4,255,%o4
+ faddd F58_KA1,%f18,%f18
+
+ faddd F58_KA1,%f20,%f20
+
+ faddd F58_KA1,%f22,%f22
+
+ fmuld %f34,%f40,%f40
+ ldd [G1_CONST_TBL+%o0],%f34
+ faddd F58_KA1,%f24,%f24
+
+ fmuld %f18,%f42,%f42
+ ldd [G1_CONST_TBL+%o1],%f18
+
+ fmuld %f20,%f44,%f44
+ ldd [G1_CONST_TBL+%o2],%f20
+
+ fmuld %f22,%f46,%f46
+ ldd [G1_CONST_TBL+%o3],%f22
+ sll %o4,3,%o4
+
+ fmuld %f24,%f48,%f48
+ ldd [G1_CONST_TBL+%o4],%f24
+ fpadd32 %f16,%f34,%f34
+
+ fpadd32 %f2,%f18,%f18
+
+ fpadd32 %f4,%f20,%f20
+
+ fpadd32 %f6,%f22,%f22
+
+ fpadd32 %f8,%f24,%f24
+ fmuld %f34,%f40,%f40
+
+ fmuld %f18,%f42,%f42
+
+ fmuld %f20,%f44,%f44
+
+ fmuld %f22,%f46,%f46
+
+ fmuld %f24,%f48,%f48
+ faddd %f34,%f40,%f40
+
+ faddd %f18,%f42,%f42
+
+ faddd %f20,%f44,%f44
+
+ faddd %f22,%f46,%f46
+
+ faddd %f24,%f48,%f48
+
+ fdtos %f40,%f26
+ st %f26,[%i3]
+ add %i3,%i4,%o4
+
+ fdtos %f42,%f18
+ st %f18,[%o4]
+ add %o4,%i4,%i3
+
+ fdtos %f44,%f20
+ st %f20,[%i3]
+ add %i3,%i4,%o4
+
+ fdtos %f46,%f22
+ st %f22,[%o4]
+ add %o4,%i4,%i3
+
+ fdtos %f48,%f24
+ st %f24,[%i3]
+ add %i3,%i4,%i3
+
+ cmp %l5,%o5
+ bl,pt %icc,.spec5_out_of_range
+ sll %i2,2,%o4
+
+ ble,pn %icc,.spec5_inf
+ sub %o4,%i2,%o4
+
+! NaN -> NaN
+
+ fmuls %f10,%f10,%f10
+ ba .spec5_exit
+ st %f10,[%i3]
+
+.spec5_inf:
+ sub %i1,%o4,%o4
+ ld [%o4],%l0
+ srl %l0,29,%l0
+ andcc %l0,4,%l0
+ be,a,pn %icc,.spec5_exit
+ st %f10,[%i3]
+
+ ba .spec5_exit
+ st %f3,[%i3]
+
+.spec5_out_of_range:
+ sub %o4,%i2,%o4
+ sub %i1,%o4,%o4
+ ld [%o4],%l0
+ srl %l0,29,%l0
+ and %l0,4,%l0
+ add %l0,2048,%l0
+ ld [G1_CONST_TBL+%l0],%f2
+ fmuls %f2,%f2,%f2
+ st %f2,[%i3]
+
+.spec5_exit:
+ fmovs %f12,%f16
+ mov %l6,%l0
+ fmovs %f14,%f2
+ mov %l7,%l1
+ lda [%i1]%asi,%l2
+ lda [%i1]%asi,%f4
+ add %i1,%i2,%i1
+ lda [%i1]%asi,%l3
+ lda [%i1]%asi,%f6
+ add %i1,%i2,%o0
+ lda [%o0]%asi,%l4
+ add %o0,%i2,%o1
+ lda [%o1]%asi,%l5
+ add %o1,%i2,%o2
+ lda [%o2]%asi,%l6
+ add %o2,%i2,%o3
+ lda [%o3]%asi,%l7
+ add %o3,%i2,%i1
+ and %l2,G5_CONST,%l2
+ and %l3,G5_CONST,%l3
+ and %l4,G5_CONST,%l4
+ and %l5,G5_CONST,%l5
+ and %l6,G5_CONST,%l6
+ and %l7,G5_CONST,%l7
+
+ subcc %i0,6,%i0
+ bpos,pt %icc,.main_loop
+ add %i3,%i4,%i3
+ ba .after_main_loop
+ nop
+.spec6:
+ sethi %hi(0x7f800000),%o5
+ cmp %l6,%o5
+ bge,pn %icc,1f
+ nop
+ fcmpes %fcc0,%f12,%f3
+ fcmpes %fcc1,%f12,THRESHOLDL
+ fbl,a,pn %fcc0,1f ! if ( X < 0.0f )
+ fstod %f12,%f52 ! (6) y = (double) X
+ fbl,a,pt %fcc1,.spec6_cont ! if ( X < THRESHOLDL )
+ fstod %f12,%f52 ! (6) y = (double) X
+1:
+ fmuld F62_K256ONLN2,%f46,%f46
+
+ fdtoi %f40,%f16
+ st %f16,[%fp+tmp0]
+ fmuld F62_K256ONLN2,%f48,%f48
+
+ fdtoi %f42,%f2
+ st %f2,[%fp+tmp1]
+ fmuld F62_K256ONLN2,%f50,%f50
+
+ fdtoi %f44,%f4
+ st %f4,[%fp+tmp2]
+
+ fdtoi %f46,%f6
+ st %f6,[%fp+tmp3]
+
+ fdtoi %f48,%f8
+ st %f8,[%fp+tmp4]
+
+ fdtoi %f50,%f10
+ st %f10,[%fp+tmp5]
+
+ fitod %f16,%f34
+ fpackfix %f16,%f16
+
+ fitod %f2,%f18
+ fpackfix %f2,%f2
+
+ fitod %f4,%f20
+ fpackfix %f4,%f4
+
+ fitod %f6,%f22
+ fpackfix %f6,%f6
+
+ fitod %f8,%f24
+ fpackfix %f8,%f8
+
+ fitod %f10,%f26
+ fpackfix %f10,%f10
+
+ ld [%fp+tmp0],%o0
+ fsubd %f40,%f34,%f40
+
+ ld [%fp+tmp1],%o1
+ fsubd %f42,%f18,%f42
+
+ ld [%fp+tmp2],%o2
+ and %o0,255,%o0
+ fsubd %f44,%f20,%f44
+
+ ld [%fp+tmp3],%o3
+ and %o1,255,%o1
+ fsubd %f46,%f22,%f46
+
+ sll %o0,3,%o0
+ sll %o1,3,%o1
+ fmuld F60_KA2,%f40,%f34
+ fsubd %f48,%f24,%f48
+
+ and %o2,255,%o2
+ fmuld F60_KA2,%f42,%f18
+ fsubd %f50,%f26,%f50
+
+ sll %o2,3,%o2
+ fmuld F60_KA2,%f44,%f20
+
+ ld [%fp+tmp4],%o4
+ and %o3,255,%o3
+ fmuld F60_KA2,%f46,%f22
+
+ ld [%fp+tmp5],%o5
+ sll %o3,3,%o3
+ fmuld F60_KA2,%f48,%f24
+ faddd F58_KA1,%f34,%f34
+
+ and %o4,255,%o4
+ fmuld F60_KA2,%f50,%f26
+ faddd F58_KA1,%f18,%f18
+
+ and %o5,255,%o5
+ faddd F58_KA1,%f20,%f20
+
+ sll %o5,3,%o5
+ faddd F58_KA1,%f22,%f22
+
+ fmuld %f34,%f40,%f40
+ ldd [G1_CONST_TBL+%o0],%f34
+ faddd F58_KA1,%f24,%f24
+
+ fmuld %f18,%f42,%f42
+ ldd [G1_CONST_TBL+%o1],%f18
+ faddd F58_KA1,%f26,%f26
+
+ fmuld %f20,%f44,%f44
+ ldd [G1_CONST_TBL+%o2],%f20
+
+ fmuld %f22,%f46,%f46
+ ldd [G1_CONST_TBL+%o3],%f22
+ sll %o4,3,%o4
+
+ fmuld %f24,%f48,%f48
+ ldd [G1_CONST_TBL+%o4],%f24
+ fpadd32 %f16,%f34,%f34
+
+ fmuld %f26,%f50,%f50
+ ldd [G1_CONST_TBL+%o5],%f26
+ fpadd32 %f2,%f18,%f18
+
+ fpadd32 %f4,%f20,%f20
+
+ fpadd32 %f6,%f22,%f22
+
+ fpadd32 %f8,%f24,%f24
+ fmuld %f34,%f40,%f40
+
+ fpadd32 %f10,%f26,%f26
+ fmuld %f18,%f42,%f42
+
+ fmuld %f20,%f44,%f44
+
+ fmuld %f22,%f46,%f46
+
+ fmuld %f24,%f48,%f48
+ faddd %f34,%f40,%f40
+
+ fmuld %f26,%f50,%f50
+ faddd %f18,%f42,%f42
+
+ faddd %f20,%f44,%f44
+
+ faddd %f22,%f46,%f46
+
+ faddd %f24,%f48,%f48
+
+ faddd %f26,%f50,%f50
+
+ fdtos %f40,%f26
+ st %f26,[%i3]
+ add %i3,%i4,%o4
+
+ fdtos %f42,%f18
+ st %f18,[%o4]
+ add %o4,%i4,%i3
+
+ fdtos %f44,%f20
+ st %f20,[%i3]
+ add %i3,%i4,%o4
+
+ fdtos %f46,%f22
+ st %f22,[%o4]
+ add %o4,%i4,%i3
+
+ fdtos %f48,%f24
+ st %f24,[%i3]
+ add %i3,%i4,%o4
+
+ fdtos %f50,%f26
+ st %f26,[%o4]
+ add %o4,%i4,%i3
+
+ sethi %hi(0x7f800000),%o5
+ cmp %l6,%o5
+ bl,pt %icc,.spec6_out_of_range
+ sll %i2,1,%o4
+
+ ble,pn %icc,.spec6_inf
+ sub %i1,%o4,%o4
+
+! NaN -> NaN
+
+ fmuls %f12,%f12,%f12
+ ba .spec6_exit
+ st %f12,[%i3]
+
+.spec6_inf:
+ ld [%o4],%l0
+ srl %l0,29,%l0
+ andcc %l0,4,%l0
+ be,a,pn %icc,.spec6_exit
+ st %f12,[%i3]
+
+ ba .spec6_exit
+ st %f3,[%i3]
+
+.spec6_out_of_range:
+ sub %i1,%o4,%o4
+ ld [%o4],%l0
+ srl %l0,29,%l0
+ and %l0,4,%l0
+ add %l0,2048,%l0
+ ld [G1_CONST_TBL+%l0],%f2
+ fmuls %f2,%f2,%f2
+ st %f2,[%i3]
+
+.spec6_exit:
+ fmovs %f14,%f16
+ mov %l7,%l0
+ lda [%i1]%asi,%l1
+ lda [%i1]%asi,%f2
+ add %i1,%i2,%i1
+ lda [%i1]%asi,%l2
+ lda [%i1]%asi,%f4
+ add %i1,%i2,%i1
+ lda [%i1]%asi,%l3
+ lda [%i1]%asi,%f6
+ add %i1,%i2,%o0
+ lda [%o0]%asi,%l4
+ add %o0,%i2,%o1
+ lda [%o1]%asi,%l5
+ add %o1,%i2,%o2
+ lda [%o2]%asi,%l6
+ add %o2,%i2,%o3
+ lda [%o3]%asi,%l7
+ add %o3,%i2,%i1
+ and %l1,G5_CONST,%l1
+ and %l2,G5_CONST,%l2
+ and %l3,G5_CONST,%l3
+ and %l4,G5_CONST,%l4
+ and %l5,G5_CONST,%l5
+ and %l6,G5_CONST,%l6
+ and %l7,G5_CONST,%l7
+
+ subcc %i0,7,%i0
+ bpos,pt %icc,.main_loop
+ add %i3,%i4,%i3
+ ba .after_main_loop
+ nop
+
+ .align 16
+.spec7:
+ sethi %hi(0x7f800000),%o5
+ cmp %l7,%o5
+ bge,pn %icc,1f
+ nop
+ fcmpes %fcc0,%f14,%f3
+ fcmpes %fcc1,%f14,THRESHOLDL
+ fbl,a,pn %fcc0,1f ! if ( X < 0.0f )
+ fstod %f14,%f54 ! (7) y = (double) X
+ fbl,a,pt %fcc1,.spec7_cont ! if ( X < THRESHOLDL )
+ fstod %f14,%f54 ! (7) y = (double) X
+1:
+ fdtoi %f40,%f16
+ st %f16,[%fp+tmp0]
+ fmuld F62_K256ONLN2,%f48,%f48
+
+ fdtoi %f42,%f2
+ st %f2,[%fp+tmp1]
+ fmuld F62_K256ONLN2,%f50,%f50
+
+ fdtoi %f44,%f4
+ st %f4,[%fp+tmp2]
+ fmuld F62_K256ONLN2,%f52,%f52
+
+ fdtoi %f46,%f6
+ st %f6,[%fp+tmp3]
+
+ fdtoi %f48,%f8
+ st %f8,[%fp+tmp4]
+
+ fdtoi %f50,%f10
+ st %f10,[%fp+tmp5]
+
+ fdtoi %f52,%f12
+ st %f12,[%fp+tmp6]
+
+ fitod %f16,%f34
+ fpackfix %f16,%f16
+
+ fitod %f2,%f18
+ fpackfix %f2,%f2
+
+ fitod %f4,%f20
+ fpackfix %f4,%f4
+
+ fitod %f6,%f22
+ fpackfix %f6,%f6
+
+ fitod %f8,%f24
+ fpackfix %f8,%f8
+
+ fitod %f10,%f26
+ fpackfix %f10,%f10
+
+ fitod %f12,%f28
+ fpackfix %f12,%f12
+
+ ld [%fp+tmp0],%o0
+ fsubd %f40,%f34,%f40
+
+ ld [%fp+tmp1],%o1
+ fsubd %f42,%f18,%f42
+
+ ld [%fp+tmp2],%o2
+ and %o0,255,%o0
+ fsubd %f44,%f20,%f44
+
+ ld [%fp+tmp3],%o3
+ and %o1,255,%o1
+ fsubd %f46,%f22,%f46
+
+ sll %o0,3,%o0
+ sll %o1,3,%o1
+ fmuld F60_KA2,%f40,%f34
+ fsubd %f48,%f24,%f48
+
+ and %o2,255,%o2
+ fmuld F60_KA2,%f42,%f18
+ fsubd %f50,%f26,%f50
+
+ sll %o2,3,%o2
+ fmuld F60_KA2,%f44,%f20
+ fsubd %f52,%f28,%f52
+
+ ld [%fp+tmp4],%o4
+ and %o3,255,%o3
+ fmuld F60_KA2,%f46,%f22
+
+ ld [%fp+tmp5],%o5
+ sll %o3,3,%o3
+ fmuld F60_KA2,%f48,%f24
+ faddd F58_KA1,%f34,%f34
+
+ ld [%fp+tmp6],%o7
+ and %o4,255,%o4
+ fmuld F60_KA2,%f50,%f26
+ faddd F58_KA1,%f18,%f18
+
+ and %o5,255,%o5
+ fmuld F60_KA2,%f52,%f28
+ faddd F58_KA1,%f20,%f20
+
+ sll %o5,3,%o5
+ faddd F58_KA1,%f22,%f22
+
+ fmuld %f34,%f40,%f40
+ ldd [G1_CONST_TBL+%o0],%f34
+ faddd F58_KA1,%f24,%f24
+
+ fmuld %f18,%f42,%f42
+ ldd [G1_CONST_TBL+%o1],%f18
+ faddd F58_KA1,%f26,%f26
+
+ fmuld %f20,%f44,%f44
+ ldd [G1_CONST_TBL+%o2],%f20
+ faddd F58_KA1,%f28,%f28
+
+ fmuld %f22,%f46,%f46
+ ldd [G1_CONST_TBL+%o3],%f22
+ sll %o4,3,%o4
+
+ fmuld %f24,%f48,%f48
+ ldd [G1_CONST_TBL+%o4],%f24
+ and %o7,255,%o7
+ fpadd32 %f16,%f34,%f34
+
+ fmuld %f26,%f50,%f50
+ ldd [G1_CONST_TBL+%o5],%f26
+ sll %o7,3,%o7
+ fpadd32 %f2,%f18,%f18
+
+ fmuld %f28,%f52,%f52
+ ldd [G1_CONST_TBL+%o7],%f28
+ fpadd32 %f4,%f20,%f20
+
+ fpadd32 %f6,%f22,%f22
+
+ fpadd32 %f8,%f24,%f24
+ fmuld %f34,%f40,%f40
+
+ fpadd32 %f10,%f26,%f26
+ fmuld %f18,%f42,%f42
+
+ fpadd32 %f12,%f28,%f28
+ fmuld %f20,%f44,%f44
+
+ fmuld %f22,%f46,%f46
+
+ fmuld %f24,%f48,%f48
+ faddd %f34,%f40,%f40
+
+ fmuld %f26,%f50,%f50
+ faddd %f18,%f42,%f42
+
+ fmuld %f28,%f52,%f52
+ faddd %f20,%f44,%f44
+
+ faddd %f22,%f46,%f46
+
+ faddd %f24,%f48,%f48
+
+ faddd %f26,%f50,%f50
+
+ faddd %f28,%f52,%f52
+
+ fdtos %f40,%f26
+ st %f26,[%i3]
+ add %i3,%i4,%o4
+
+ fdtos %f42,%f18
+ st %f18,[%o4]
+ add %o4,%i4,%i3
+
+ fdtos %f44,%f20
+ st %f20,[%i3]
+ add %i3,%i4,%o4
+
+ fdtos %f46,%f22
+ st %f22,[%o4]
+ add %o4,%i4,%i3
+
+ fdtos %f48,%f24
+ st %f24,[%i3]
+ add %i3,%i4,%o4
+
+ fdtos %f50,%f26
+ st %f26,[%o4]
+ add %o4,%i4,%i3
+
+ fdtos %f52,%f28
+ st %f28,[%i3]
+ add %i3,%i4,%i3
+
+ sethi %hi(0x7f800000),%o5
+ cmp %l7,%o5
+ bl,pt %icc,.spec7_out_of_range
+ sub %i1,%i2,%o4
+
+ ble,pn %icc,.spec7_inf
+ ld [%o4],%l0
+
+! NaN -> NaN
+
+ fmuls %f14,%f14,%f14
+ ba .spec7_exit
+ st %f14,[%i3]
+
+.spec7_inf:
+ srl %l0,29,%l0
+ andcc %l0,4,%l0
+ be,a,pn %icc,.spec7_exit
+ st %f14,[%i3]
+
+ ba .spec7_exit
+ st %f3,[%i3]
+
+.spec7_out_of_range:
+ ld [%o4],%l0
+ srl %l0,29,%l0
+ and %l0,4,%l0
+ add %l0,2048,%l0
+ ld [G1_CONST_TBL+%l0],%f2
+ fmuls %f2,%f2,%f2
+ st %f2,[%i3]
+
+.spec7_exit:
+ subcc %i0,8,%i0
+ bpos,pt %icc,.main_loop_preload
+ add %i3,%i4,%i3
+
+ ba .tail
+ nop
+ SET_SIZE(__vexpf)
+
diff --git a/usr/src/libm/src/mvec/vis/__vhypot.S b/usr/src/libm/src/mvec/vis/__vhypot.S
new file mode 100644
index 0000000..7d1962b
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vhypot.S
@@ -0,0 +1,1242 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vhypot.S 1.7 06/01/23 SMI"
+
+ .file "__vhypot.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 64
+
+.CONST_TBL:
+ .word 0x7ff00000, 0 ! DC0
+ .word 0x7fe00000, 0 ! DC1
+ .word 0x00100000, 0 ! DC2
+ .word 0x41b00000, 0 ! D2ON28 = 268435456.0
+ .word 0x7fd00000, 0 ! DC3
+
+#define counter %i0
+#define tmp_counter %l3
+#define tmp_px %l5
+#define tmp_py %o7
+#define stridex %i2
+#define stridey %i4
+#define stridez %l0
+
+#define DC0 %f8
+#define DC0_HI %f8
+#define DC0_LO %f9
+#define DC1 %f46
+#define DC2 %f48
+#define DC3 %f0
+#define D2ON28 %f62
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! !!!!! algorithm !!!!!
+! ((float*)&x)[0] = ((float*)px)[0];
+! ((float*)&x)[1] = ((float*)px)[1];
+!
+! ((float*)&y)[0] = ((float*)py)[0];
+! ((float*)&y)[1] = ((float*)py)[1];
+!
+! x = fabs(x);
+! y = fabs(y);
+!
+! c0 = vis_fcmple32(DC1,x);
+! c2 = vis_fcmple32(DC1,y);
+! c1 = vis_fcmpgt32(DC2,x);
+! c3 = vis_fcmpgt32(DC2,y);
+!
+! c0 |= c2;
+! c1 &= c3;
+! if ( (c0 & 2) != 0 )
+! {
+! lx = ((int*)px)[1];
+! ly = ((int*)py)[1];
+! hx = *(int*)px;
+! hy = *(int*)py;
+!
+! hx &= 0x7fffffff;
+! hy &= 0x7fffffff;
+!
+! j0 = hx;
+! if ( j0 < hy ) j0 = hy;
+! j0 &= 0x7ff00000;
+! if ( j0 >= 0x7ff00000 )
+! {
+! if ( hx == 0x7ff00000 && lx == 0 ) res = x == y ? y : x;
+! else if ( hy == 0x7ff00000 && ly == 0 ) res = x == y ? x : y;
+! else res = x * y;
+!
+! ((float*)pz)[0] = ((float*)&res)[0];
+! ((float*)pz)[1] = ((float*)&res)[1];
+! }
+! else
+! {
+! diff = hy - hx;
+! j0 = diff >> 31;
+! if ( ((diff ^ j0) - j0) < 0x03600000 )
+! {!
+! x *= D2ONM1022;
+! y *= D2ONM1022;
+!
+! x_hi = ( x + two28 ) - two28;
+! x_lo = x - x_hi;
+! y_hi = ( y + two28 ) - two28;
+! y_lo = y - y_hi;
+! res = (x_hi * x_hi + y_hi * y_hi);
+! res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
+!
+! res = sqrt(res);
+!
+! res = D2ONP1022 * res;
+! ((float*)pz)[0] = ((float*)&res)[0];
+! ((float*)pz)[1] = ((float*)&res)[1];
+! }
+! else
+! {
+! res = x + y;
+! ((float*)pz)[0] = ((float*)&res)[0];
+! ((float*)pz)[1] = ((float*)&res)[1];
+! }
+! }
+! px += stridex;
+! py += stridey;
+! pz += stridez;
+! continue;
+! }
+! if ( (c1 & 2) != 0 )
+! {
+! x *= D2ONP1022;
+! y *= D2ONP1022;
+!
+! x_hi = ( x + two28 ) - two28;
+! x_lo = x - x_hi;
+! y_hi = ( y + two28 ) - two28;
+! y_lo = y - y_hi;
+! res = (x_hi * x_hi + y_hi * y_hi);
+! res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
+!
+! res = sqrt(res);
+!
+! res = D2ONM1022 * res;
+! ((float*)pz)[0] = ((float*)&res)[0];
+! ((float*)pz)[1] = ((float*)&res)[1];
+! px += stridex;
+! py += stridey;
+! pz += stridez;
+! continue;
+! }
+!
+! dmax = x;
+! if ( dmax < y ) dmax = y;
+!
+! dmax = vis_fand(dmax,DC0);
+! dnorm = vis_fpsub32(DC1,dmax);
+!
+! x *= dnorm;
+! y *= dnorm;
+!
+! x_hi = x + D2ON28;
+! x_hi -= D2ON28;
+! x_lo = x - x_hi;
+!
+! y_hi = y + D2ON28;
+! y_hi -= D2ON28;
+! y_lo = y - y_hi;
+!
+! res = x_hi * x_hi;
+! dtmp1 = x + x_hi;
+! dtmp0 = y_hi * y_hi;
+! dtmp2 = y + y_hi;
+!
+! res += dtmp0;
+! dtmp1 *= x_lo;
+! dtmp2 *= y_lo;
+! dtmp1 += dtmp2;
+! res += dtmp1;
+!
+! res = sqrt(res);
+!
+! res = dmax * res;
+! ((float*)pz)[0] = ((float*)&res)[0];
+! ((float*)pz)[1] = ((float*)&res)[1];
+!
+! px += stridex;
+! py += stridey;
+! pz += stridez;
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ ENTRY(__vhypot)
+ save %sp,-SA(MINFRAME),%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,.CONST_TBL,o3)
+ wr %g0,0x82,%asi
+
+#ifdef __sparcv9
+ ldx [%fp+STACK_BIAS+176],%l0
+#else
+ ld [%fp+STACK_BIAS+92],%l0
+#endif
+ ldd [%o3],DC0
+ sll %i2,3,stridex
+ mov %i0,tmp_counter
+
+ ldd [%o3+8],DC1
+ sll %i4,3,stridey
+ mov %i1,tmp_px
+
+ ldd [%o3+16],DC2
+ sll %l0,3,stridez
+ mov %i3,tmp_py
+
+ ldd [%o3+24],D2ON28
+
+ ldd [%o3+32],DC3
+
+.begin:
+ mov tmp_counter,counter
+ mov tmp_px,%i1
+ mov tmp_py,%i3
+ clr tmp_counter
+.begin1:
+ cmp counter,0
+ ble,pn %icc,.exit
+ nop
+
+ lda [%i1]%asi,%o0
+ sethi %hi(0x7ffffc00),%o5
+
+ lda [%i3]%asi,%o2
+ add %o5,1023,%o5
+
+ lda [%i1]%asi,%f26 ! (1_0) ((float*)&x)[0] = ((float*)px)[0];
+
+ lda [%i1+4]%asi,%f27 ! (1_0) ((float*)&x)[1] = ((float*)px)[1];
+ add %i1,stridex,%o1 ! px += stridex
+
+ lda [%i3]%asi,%f24 ! (1_0) ((float*)&y)[0] = ((float*)py)[0];
+ sethi %hi(0x00100000),%l7
+ and %o0,%o5,%o0
+
+ lda [%i3+4]%asi,%f25 ! (1_0) ((float*)&y)[1] = ((float*)py)[1];
+ and %o2,%o5,%o2
+ sethi %hi(0x7fe00000),%l6
+
+ fabsd %f26,%f36 ! (1_0) x = fabs(x);
+ cmp %o0,%o2
+ mov %o2,%l4
+
+ fabsd %f24,%f54 ! (1_0) y = fabs(y);
+ add %i3,stridey,%o5 ! py += stridey
+ movg %icc,%o0,%o2
+ lda [%o5]%asi,%f28 ! (2_0) ((float*)&y)[0] = ((float*)py)[0];
+
+ cmp %o2,%l6
+ sethi %hi(0x7ff00000),%o4
+ bge,pn %icc,.spec0
+ lda [%o5+4]%asi,%f29 ! (2_0) ((float*)&y)[1] = ((float*)py)[1];
+
+ cmp %o2,%l7
+ bl,pn %icc,.spec1
+ nop
+ lda [%o1]%asi,%f26 ! (2_0) ((float*)&x)[0] = ((float*)px)[0];
+
+ lda [%o1+4]%asi,%f27 ! (2_0) ((float*)&x)[1] = ((float*)px)[1];
+ add %i3,stridey,%i3 ! py += stridey
+
+ fabsd %f28,%f34 ! (2_0) y = fabs(y);
+
+ fabsd %f26,%f50 ! (2_0) x = fabs(x);
+
+ fcmple32 DC1,%f50,%o3 ! (2_0) c0 = vis_fcmple32(DC1,x);
+
+ fcmple32 DC1,%f34,%o0 ! (2_0) c2 = vis_fcmple32(DC1,y);
+
+ fcmpgt32 DC2,%f50,%o4 ! (2_0) c1 = vis_fcmpgt32(DC2,x);
+
+ fcmpgt32 DC2,%f34,%o5 ! (2_0) c3 = vis_fcmpgt32(DC2,y);
+
+ or %o3,%o0,%o3 ! (2_0) c0 |= c2;
+
+ andcc %o3,2,%g0 ! (2_0) c0 & 2
+ bnz,pn %icc,.update0 ! (2_0) if ( (c0 & 2) != 0 )
+ and %o4,%o5,%o4 ! (2_0) c1 &= c3;
+.cont0:
+ add %i3,stridey,%l4 ! py += stridey
+ andcc %o4,2,%g0 ! (2_0) c1 & 2
+ bnz,pn %icc,.update1 ! (2_0) if ( (c1 & 2) != 0 )
+ fmovd %f36,%f56 ! (1_0) dmax = x;
+.cont1:
+ lda [%l4]%asi,%f30 ! (3_0) ((float*)&y)[0] = ((float*)py)[0];
+ add %o1,stridex,%l2 ! px += stridex
+
+ lda [%l4+4]%asi,%f31 ! (3_0) ((float*)&y)[1] = ((float*)py)[1];
+
+ lda [%l2]%asi,%f18 ! (3_1) ((float*)&x)[0] = ((float*)px)[0];
+
+ lda [%l2+4]%asi,%f19 ! (3_1) ((float*)&x)[1] = ((float*)px)[1];
+
+ fabsd %f30,%f30 ! (3_1) y = fabs(y);
+
+ fabsd %f18,%f18 ! (3_1) x = fabs(x);
+
+ fcmped %fcc2,%f54,%f56 ! (1_1) dmax ? y
+
+ fmovdg %fcc2,%f54,%f56 ! (1_1) if ( dmax < y ) dmax = y;
+
+ fcmple32 DC1,%f18,%o3 ! (3_1) c0 = vis_fcmple32(DC1,x);
+
+ fcmple32 DC1,%f30,%o0 ! (3_1) c2 = vis_fcmple32(DC1,y);
+
+ fcmpgt32 DC2,%f18,%o4 ! (3_1) c1 = vis_fcmpgt32(DC2,x);
+
+ fcmpgt32 DC2,%f30,%o1 ! (3_1) c3 = vis_fcmpgt32(DC2,y);
+
+ fand %f56,DC0,%f38 ! (1_1) dmax = vis_fand(dmax,DC0);
+
+ or %o3,%o0,%o3 ! (3_1) c0 |= c2;
+
+ andcc %o3,2,%g0 ! (3_1) c0 & 2
+ bnz,pn %icc,.update2 ! (3_1) if ( (c0 & 2) != 0 )
+ and %o4,%o1,%o4 ! (3_1) c1 &= c3;
+.cont2:
+ add %l4,stridey,%i3 ! py += stridey
+ andcc %o4,2,%g0 ! (3_1) c1 & 2
+ bnz,pn %icc,.update3 ! (3_1) if ( (c1 & 2) != 0 )
+ fmovd %f50,%f32 ! (2_1) dmax = x;
+.cont3:
+ fpsub32 DC1,%f38,%f10 ! (1_1) dnorm = vis_fpsub32(DC1,dmax);
+ lda [%i3]%asi,%f20 ! (0_0) ((float*)&y)[0] = ((float*)py)[0];
+
+ lda [%i3+4]%asi,%f21 ! (0_0) ((float*)&y)[1] = ((float*)py)[1];
+
+ add %l2,stridex,%l1 ! px += stridex
+
+ fmuld %f36,%f10,%f36 ! (1_1) x *= dnorm;
+ lda [%l1]%asi,%f22 ! (0_0) ((float*)&x)[0] = ((float*)px)[0]
+
+ lda [%l1+4]%asi,%f23 ! (0_0) ((float*)&x)[1] = ((float*)px)[1];
+
+ fmuld %f54,%f10,%f56 ! (1_1) y *= dnorm;
+ fabsd %f20,%f40 ! (0_0) y = fabs(y);
+
+ fabsd %f22,%f20 ! (0_0) x = fabs(x);
+
+ fcmped %fcc3,%f34,%f32 ! (2_1) dmax ? y
+
+
+ fmovdg %fcc3,%f34,%f32 ! (2_1) if ( dmax < y ) dmax = y;
+
+ faddd %f36,D2ON28,%f58 ! (1_1) x_hi = x + D2ON28;
+ fcmple32 DC1,%f20,%g5 ! (0_0) c0 = vis_fcmple32(DC1,x);
+
+ faddd %f56,D2ON28,%f22 ! (1_1) y_hi = y + D2ON28;
+ fcmple32 DC1,%f40,%o2 ! (0_0) c2 = vis_fcmple32(DC1,y);
+
+ fcmpgt32 DC2,%f20,%g1 ! (0_0) c1 = vis_fcmpgt32(DC2,x);
+
+ fcmpgt32 DC2,%f40,%o4 ! (0_0) c3 = vis_fcmpgt32(DC2,y);
+
+ fand %f32,DC0,%f52 ! (2_1) dmax = vis_fand(dmax,DC0);
+
+ or %g5,%o2,%g5 ! (0_0) c0 |= c2;
+ fsubd %f58,D2ON28,%f58 ! (1_1) x_hi -= D2ON28;
+
+ andcc %g5,2,%g0 ! (0_0) c0 & 2
+ bnz,pn %icc,.update4 ! (0_0) if ( (c0 & 2) != 0 )
+ fsubd %f22,D2ON28,%f22 ! (1_1) y_hi -= D2ON28;
+.cont4:
+ and %g1,%o4,%g1 ! (0_0) c1 &= c3;
+
+ add %i3,stridey,%l2 ! py += stridey
+ andcc %g1,2,%g0 ! (0_0) c1 & 2
+ bnz,pn %icc,.update5 ! (0_0) if ( (c1 & 2) != 0 )
+ fmovd %f18,%f44 ! (3_1) dmax = x;
+.cont5:
+ fpsub32 DC1,%f52,%f10 ! (2_1) dnorm = vis_fpsub32(DC1,dmax);
+ lda [%l2]%asi,%f24 ! (1_0) ((float*)&y)[0] = ((float*)py)[0];
+
+ fmuld %f58,%f58,%f60 ! (1_1) res = x_hi * x_hi;
+ lda [%l2+4]%asi,%f25 ! (1_0) ((float*)&y)[1] = ((float*)py)[1];
+ add %l1,stridex,%l7 ! px += stridex
+ faddd %f56,%f22,%f28 ! (1_1) dtmp2 = y + y_hi;
+
+ faddd %f36,%f58,%f6 ! (1_1) dtmp1 = x + x_hi;
+ lda [%l7]%asi,%f26 ! (1_0) ((float*)&x)[0] = ((float*)px)[0];
+
+ fmuld %f50,%f10,%f50 ! (2_1) x *= dnorm;
+ fsubd %f36,%f58,%f58 ! (1_1) x_lo = x - x_hi;
+ lda [%l7+4]%asi,%f27 ! (1_0) ((float*)&x)[1] = ((float*)px)[1];
+
+ fmuld %f22,%f22,%f2 ! (1_1) dtmp0 = y_hi * y_hi;
+ fsubd %f56,%f22,%f56 ! (1_1) y_lo = y - y_hi;
+
+ fmuld %f34,%f10,%f34 ! (2_1) y *= dnorm;
+ fabsd %f24,%f54 ! (1_0) y = fabs(y);
+
+ fabsd %f26,%f36 ! (1_0) x = fabs(x);
+
+ fmuld %f6,%f58,%f10 ! (1_1) dtmp1 *= x_lo;
+ fcmped %fcc0,%f30,%f44 ! (3_1) dmax ? y
+
+ fmuld %f28,%f56,%f26 ! (1_1) dtmp2 *= y_lo;
+
+ fmovdg %fcc0,%f30,%f44 ! (3_1) if ( dmax < y ) dmax = y;
+
+ faddd %f50,D2ON28,%f58 ! (2_1) x_hi = x + D2ON28;
+ fcmple32 DC1,%f36,%g1 ! (1_0) c0 = vis_fcmple32(DC1,x);
+
+ faddd %f34,D2ON28,%f22 ! (2_1) y_hi = y + D2ON28;
+ fcmple32 DC1,%f54,%g5 ! (1_0) c2 = vis_fcmple32(DC1,y);
+
+ faddd %f60,%f2,%f24 ! (1_1) res += dtmp0;
+ fcmpgt32 DC2,%f36,%o5 ! (1_0) c1 = vis_fcmpgt32(DC2,x);
+
+ faddd %f10,%f26,%f28 ! (1_1) dtmp1 += dtmp2;
+ fcmpgt32 DC2,%f54,%o1 ! (1_0) c3 = vis_fcmpgt32(DC2,y);
+
+ fand %f44,DC0,%f14 ! (3_1) dmax = vis_fand(dmax,DC0);
+
+ or %g1,%g5,%g1 ! (1_0) c0 |= c2;
+ fsubd %f58,D2ON28,%f44 ! (2_1) x_hi -= D2ON28;
+
+ andcc %g1,2,%g0 ! (1_0) c0 & 2
+ bnz,pn %icc,.update6 ! (1_0) if ( (c0 & 2) != 0 )
+ fsubd %f22,D2ON28,%f58 ! (2_1) y_hi -= D2ON28;
+.cont6:
+ and %o5,%o1,%o5 ! (1_0) c1 &= c3;
+ faddd %f24,%f28,%f26 ! (1_1) res += dtmp1;
+
+ add %l2,stridey,%i3 ! py += stridey
+ andcc %o5,2,%g0 ! (1_0) c1 & 2
+ bnz,pn %icc,.update7 ! (1_0) if ( (c1 & 2) != 0 )
+ fmovd %f20,%f4 ! (0_0) dmax = x;
+.cont7:
+ fpsub32 DC1,%f14,%f10 ! (3_1) dnorm = vis_fpsub32(DC1,dmax);
+ lda [%i3]%asi,%f28 ! (2_0) ((float*)&y)[0] = ((float*)py)[0];
+
+ fmuld %f44,%f44,%f2 ! (2_1) res = x_hi * x_hi;
+ lda [%i3+4]%asi,%f29 ! (2_0) ((float*)&y)[1] = ((float*)py)[1];
+ add %l7,stridex,%o1 ! px += stridex
+ faddd %f34,%f58,%f60 ! (2_1) dtmp2 = y + y_hi;
+
+ fsqrtd %f26,%f24 ! (1_1) res = sqrt(res);
+ lda [%o1]%asi,%f26 ! (2_0) ((float*)&x)[0] = ((float*)px)[0];
+ faddd %f50,%f44,%f56 ! (2_1) dtmp1 = x + x_hi;
+
+ fmuld %f18,%f10,%f6 ! (3_1) x *= dnorm;
+ fsubd %f50,%f44,%f18 ! (2_1) x_lo = x - x_hi;
+ lda [%o1+4]%asi,%f27 ! (2_0) ((float*)&x)[1] = ((float*)px)[1];
+
+ fmuld %f58,%f58,%f44 ! (2_1) dtmp0 = y_hi * y_hi;
+ fsubd %f34,%f58,%f22 ! (2_1) y_lo = y - y_hi;
+
+ fmuld %f30,%f10,%f58 ! (3_1) y *= dnorm;
+ fabsd %f28,%f34 ! (2_0) y = fabs(y);
+
+ fabsd %f26,%f50 ! (2_0) x = fabs(x);
+
+ fmuld %f56,%f18,%f10 ! (2_1) dtmp1 *= x_lo;
+ fcmped %fcc1,%f40,%f4 ! (0_0) dmax ? y
+
+ fmuld %f60,%f22,%f12 ! (2_1) dtmp2 *= y_lo;
+
+ fmovdg %fcc1,%f40,%f4 ! (0_0) if ( dmax < y ) dmax = y;
+
+ faddd %f6,D2ON28,%f56 ! (3_1) x_hi = x + D2ON28;
+ fcmple32 DC1,%f50,%o3 ! (2_0) c0 = vis_fcmple32(DC1,x);
+
+ faddd %f58,D2ON28,%f28 ! (3_1) y_hi = y + D2ON28;
+ fcmple32 DC1,%f34,%o0 ! (2_0) c2 = vis_fcmple32(DC1,y);
+
+ faddd %f2,%f44,%f30 ! (2_1) res += dtmp0;
+ fcmpgt32 DC2,%f50,%o4 ! (2_0) c1 = vis_fcmpgt32(DC2,x);
+
+ faddd %f10,%f12,%f26 ! (2_1) dtmp1 += dtmp2;
+ fcmpgt32 DC2,%f34,%o5 ! (2_0) c3 = vis_fcmpgt32(DC2,y);
+
+ fand %f4,DC0,%f16 ! (0_0) dmax = vis_fand(dmax,DC0);
+
+ or %o3,%o0,%o3 ! (2_0) c0 |= c2;
+ fsubd %f56,D2ON28,%f18 ! (3_1) x_hi -= D2ON28;
+
+ andcc %o3,2,%g0 ! (2_0) c0 & 2
+ bnz,pn %icc,.update8 ! (2_0) if ( (c0 & 2) != 0 )
+ fsubd %f28,D2ON28,%f4 ! (3_1) y_hi -= D2ON28;
+.cont8:
+ and %o4,%o5,%o4 ! (2_0) c1 &= c3;
+ faddd %f30,%f26,%f12 ! (2_1) res += dtmp1;
+
+ add %i3,stridey,%l4 ! py += stridey
+ andcc %o4,2,%g0 ! (2_0) c1 & 2
+ bnz,pn %icc,.update9 ! (2_0) if ( (c1 & 2) != 0 )
+ fmovd %f36,%f56 ! (1_0) dmax = x;
+.cont9:
+ lda [%l4]%asi,%f30 ! (3_0) ((float*)&y)[0] = ((float*)py)[0];
+ add %o1,stridex,%l2 ! px += stridex
+ fpsub32 DC1,%f16,%f44 ! (0_0) dnorm = vis_fpsub32(DC1,dmax);
+
+ fmuld %f18,%f18,%f60 ! (3_1) res = x_hi * x_hi;
+ lda [%l4+4]%asi,%f31 ! (3_0) ((float*)&y)[1] = ((float*)py)[1];
+ faddd %f58,%f4,%f32 ! (3_1) dtmp2 = y + y_hi;
+
+ fsqrtd %f12,%f12 ! (2_1) res = sqrt(res);
+ faddd %f6,%f18,%f28 ! (3_1) dtmp1 = x + x_hi;
+
+ cmp counter,4
+ bl,pn %icc,.tail
+ nop
+
+ ba .main_loop
+ sub counter,4,counter
+
+ .align 16
+.main_loop:
+ fmuld %f20,%f44,%f2 ! (0_1) x *= dnorm;
+ fsubd %f6,%f18,%f20 ! (3_2) x_lo = x - x_hi;
+ lda [%l2]%asi,%f18 ! (3_1) ((float*)&x)[0] = ((float*)px)[0];
+
+ fmuld %f4,%f4,%f22 ! (3_2) dtmp0 = y_hi * y_hi;
+ lda [%l2+4]%asi,%f19 ! (3_1) ((float*)&x)[1] = ((float*)px)[1];
+ fsubd %f58,%f4,%f58 ! (3_2) y_lo = y - y_hi;
+
+ fmuld %f40,%f44,%f44 ! (0_1) y *= dnorm;
+ fabsd %f30,%f30 ! (3_1) y = fabs(y);
+
+ fmuld %f38,%f24,%f10 ! (1_2) res = dmax * res;
+ fabsd %f18,%f18 ! (3_1) x = fabs(x);
+ st %f10,[%i5] ! (1_2) ((float*)pz)[0] = ((float*)&res)[0];
+
+ fmuld %f28,%f20,%f28 ! (3_2) dtmp1 *= x_lo;
+ st %f11,[%i5+4] ! (1_2) ((float*)pz)[1] = ((float*)&res)[1];
+ fcmped %fcc2,%f54,%f56 ! (1_1) dmax ? y
+
+ fmuld %f32,%f58,%f24 ! (3_2) dtmp2 *= y_lo;
+
+ fmovdg %fcc2,%f54,%f56 ! (1_1) if ( dmax < y ) dmax = y;
+
+ faddd %f2,D2ON28,%f10 ! (0_1) x_hi = x + D2ON28;
+ fcmple32 DC1,%f18,%o3 ! (3_1) c0 = vis_fcmple32(DC1,x);
+
+ faddd %f44,D2ON28,%f20 ! (0_1) y_hi = y + D2ON28;
+ fcmple32 DC1,%f30,%o0 ! (3_1) c2 = vis_fcmple32(DC1,y);
+
+ faddd %f60,%f22,%f22 ! (3_2) res += dtmp0;
+ fcmpgt32 DC2,%f18,%o4 ! (3_1) c1 = vis_fcmpgt32(DC2,x);
+
+ faddd %f28,%f24,%f26 ! (3_2) dtmp1 += dtmp2;
+ fcmpgt32 DC2,%f30,%o1 ! (3_1) c3 = vis_fcmpgt32(DC2,y);
+
+ fand %f56,DC0,%f38 ! (1_1) dmax = vis_fand(dmax,DC0);
+
+ or %o3,%o0,%o3 ! (3_1) c0 |= c2;
+ fsubd %f10,D2ON28,%f58 ! (0_1) x_hi -= D2ON28;
+
+ andcc %o3,2,%g0 ! (3_1) c0 & 2
+ bnz,pn %icc,.update10 ! (3_1) if ( (c0 & 2) != 0 )
+ fsubd %f20,D2ON28,%f56 ! (0_1) y_hi -= D2ON28;
+.cont10:
+ faddd %f22,%f26,%f28 ! (3_2) res += dtmp1;
+ and %o4,%o1,%o4 ! (3_1) c1 &= c3;
+
+ add %l4,stridey,%i3 ! py += stridey
+ andcc %o4,2,%g0 ! (3_1) c1 & 2
+ bnz,pn %icc,.update11 ! (3_1) if ( (c1 & 2) != 0 )
+ fmovd %f50,%f32 ! (2_1) dmax = x;
+.cont11:
+ fpsub32 DC1,%f38,%f10 ! (1_1) dnorm = vis_fpsub32(DC1,dmax);
+ add %l2,stridex,%l1 ! px += stridex
+ lda [%i3]%asi,%f20 ! (0_0) ((float*)&y)[0] = ((float*)py)[0];
+
+ fmuld %f58,%f58,%f6 ! (0_1) res = x_hi * x_hi;
+ lda [%i3+4]%asi,%f21 ! (0_0) ((float*)&y)[1] = ((float*)py)[1];
+ add %i5,stridez,%l6 ! pz += stridez
+ faddd %f44,%f56,%f60 ! (0_1) dtmp2 = y + y_hi;
+
+ fsqrtd %f28,%f4 ! (3_2) res = sqrt(res);
+ lda [%l1]%asi,%f22 ! (0_0) ((float*)&x)[0] = ((float*)px)[0];
+ faddd %f2,%f58,%f24 ! (0_1) dtmp1 = x + x_hi;
+
+ fmuld %f36,%f10,%f36 ! (1_1) x *= dnorm;
+ fsubd %f2,%f58,%f26 ! (0_1) x_lo = x - x_hi;
+ lda [%l1+4]%asi,%f23 ! (0_0) ((float*)&x)[1] = ((float*)px)[1];
+
+ fmuld %f56,%f56,%f28 ! (0_1) dtmp0 = y_hi * y_hi;
+ fsubd %f44,%f56,%f44 ! (0_1) y_lo = y - y_hi;
+
+ fmuld %f54,%f10,%f56 ! (1_1) y *= dnorm;
+ fabsd %f20,%f40 ! (0_0) y = fabs(y);
+
+ fmuld %f52,%f12,%f12 ! (2_2) res = dmax * res;
+ fabsd %f22,%f20 ! (0_0) x = fabs(x);
+ st %f12,[%l6] ! (2_2) ((float*)pz)[0] = ((float*)&res)[0];
+
+ fmuld %f24,%f26,%f10 ! (0_1) dtmp1 *= x_lo;
+ st %f13,[%l6+4] ! (2_2) ((float*)pz)[1] = ((float*)&res)[1];
+ fcmped %fcc3,%f34,%f32 ! (2_1) dmax ? y
+
+ fmuld %f60,%f44,%f12 ! (0_1) dtmp2 *= y_lo;
+
+ fmovdg %fcc3,%f34,%f32 ! (2_1) if ( dmax < y ) dmax = y;
+
+ faddd %f36,D2ON28,%f58 ! (1_1) x_hi = x + D2ON28;
+ fcmple32 DC1,%f20,%g5 ! (0_0) c0 = vis_fcmple32(DC1,x);
+
+ faddd %f56,D2ON28,%f22 ! (1_1) y_hi = y + D2ON28;
+ fcmple32 DC1,%f40,%o2 ! (0_0) c2 = vis_fcmple32(DC1,y);
+
+ faddd %f6,%f28,%f24 ! (0_1) res += dtmp0;
+ fcmpgt32 DC2,%f20,%g1 ! (0_0) c1 = vis_fcmpgt32(DC2,x);
+
+ faddd %f10,%f12,%f26 ! (0_1) dtmp1 += dtmp2;
+ fcmpgt32 DC2,%f40,%o4 ! (0_0) c3 = vis_fcmpgt32(DC2,y);
+
+ fand %f32,DC0,%f52 ! (2_1) dmax = vis_fand(dmax,DC0);
+
+ or %g5,%o2,%g5 ! (0_0) c0 |= c2;
+ fsubd %f58,D2ON28,%f58 ! (1_1) x_hi -= D2ON28;
+
+ andcc %g5,2,%g0 ! (0_0) c0 & 2
+ bnz,pn %icc,.update12 ! (0_0) if ( (c0 & 2) != 0 )
+ fsubd %f22,D2ON28,%f22 ! (1_1) y_hi -= D2ON28;
+.cont12:
+ and %g1,%o4,%g1 ! (0_0) c1 &= c3;
+ faddd %f24,%f26,%f12 ! (0_1) res += dtmp1;
+
+ add %i3,stridey,%l2 ! py += stridey
+ andcc %g1,2,%g0 ! (0_0) c1 & 2
+ bnz,pn %icc,.update13 ! (0_0) if ( (c1 & 2) != 0 )
+ fmovd %f18,%f44 ! (3_1) dmax = x;
+.cont13:
+ fpsub32 DC1,%f52,%f10 ! (2_1) dnorm = vis_fpsub32(DC1,dmax);
+ add %l1,stridex,%l7 ! px += stridex
+ lda [%l2]%asi,%f24 ! (1_0) ((float*)&y)[0] = ((float*)py)[0];
+
+ fmuld %f58,%f58,%f60 ! (1_1) res = x_hi * x_hi;
+ add %l6,stridez,%i5 ! pz += stridez
+ lda [%l2+4]%asi,%f25 ! (1_0) ((float*)&y)[1] = ((float*)py)[1];
+ faddd %f56,%f22,%f28 ! (1_1) dtmp2 = y + y_hi;
+
+ fsqrtd %f12,%f12 ! (0_1) res = sqrt(res);
+ lda [%l7]%asi,%f26 ! (1_0) ((float*)&x)[0] = ((float*)px)[0];
+ faddd %f36,%f58,%f6 ! (1_1) dtmp1 = x + x_hi;
+
+ fmuld %f50,%f10,%f50 ! (2_1) x *= dnorm;
+ fsubd %f36,%f58,%f58 ! (1_1) x_lo = x - x_hi;
+ lda [%l7+4]%asi,%f27 ! (1_0) ((float*)&x)[1] = ((float*)px)[1];
+
+ fmuld %f22,%f22,%f2 ! (1_1) dtmp0 = y_hi * y_hi;
+ fsubd %f56,%f22,%f56 ! (1_1) y_lo = y - y_hi;
+
+ fmuld %f34,%f10,%f34 ! (2_1) y *= dnorm;
+ fabsd %f24,%f54 ! (1_0) y = fabs(y);
+
+ fmuld %f14,%f4,%f14 ! (3_2) res = dmax * res;
+ fabsd %f26,%f36 ! (1_0) x = fabs(x);
+ st %f14,[%i5] ! (3_2) ((float*)pz)[0] = ((float*)&res)[0];
+
+ fmuld %f6,%f58,%f10 ! (1_1) dtmp1 *= x_lo;
+ st %f15,[%i5+4] ! (3_2) ((float*)pz)[1] = ((float*)&res)[1];
+ fcmped %fcc0,%f30,%f44 ! (3_1) dmax ? y
+
+ fmuld %f28,%f56,%f26 ! (1_1) dtmp2 *= y_lo;
+
+ fmovdg %fcc0,%f30,%f44 ! (3_1) if ( dmax < y ) dmax = y;
+
+ faddd %f50,D2ON28,%f58 ! (2_1) x_hi = x + D2ON28;
+ fcmple32 DC1,%f36,%g1 ! (1_0) c0 = vis_fcmple32(DC1,x);
+
+ faddd %f34,D2ON28,%f22 ! (2_1) y_hi = y + D2ON28;
+ fcmple32 DC1,%f54,%g5 ! (1_0) c2 = vis_fcmple32(DC1,y);
+
+ faddd %f60,%f2,%f24 ! (1_1) res += dtmp0;
+ fcmpgt32 DC2,%f36,%o5 ! (1_0) c1 = vis_fcmpgt32(DC2,x);
+
+ faddd %f10,%f26,%f28 ! (1_1) dtmp1 += dtmp2;
+ fcmpgt32 DC2,%f54,%o1 ! (1_0) c3 = vis_fcmpgt32(DC2,y);
+
+ fand %f44,DC0,%f14 ! (3_1) dmax = vis_fand(dmax,DC0);
+
+ or %g1,%g5,%g1 ! (1_0) c0 |= c2;
+ fsubd %f58,D2ON28,%f44 ! (2_1) x_hi -= D2ON28;
+
+ andcc %g1,2,%g0 ! (1_0) c0 & 2
+ bnz,pn %icc,.update14 ! (1_0) if ( (c0 & 2) != 0 )
+ fsubd %f22,D2ON28,%f58 ! (2_1) y_hi -= D2ON28;
+.cont14:
+ and %o5,%o1,%o5 ! (1_0) c1 &= c3;
+ faddd %f24,%f28,%f26 ! (1_1) res += dtmp1;
+
+ add %l2,stridey,%i3 ! py += stridey
+ andcc %o5,2,%g0 ! (1_0) c1 & 2
+ bnz,pn %icc,.update15 ! (1_0) if ( (c1 & 2) != 0 )
+ fmovd %f20,%f4 ! (0_0) dmax = x;
+.cont15:
+ fpsub32 DC1,%f14,%f10 ! (3_1) dnorm = vis_fpsub32(DC1,dmax);
+ add %l7,stridex,%o1 ! px += stridex
+ lda [%i3]%asi,%f28 ! (2_0) ((float*)&y)[0] = ((float*)py)[0];
+
+ fmuld %f44,%f44,%f2 ! (2_1) res = x_hi * x_hi;
+ add %i5,stridez,%g5 ! pz += stridez
+ lda [%i3+4]%asi,%f29 ! (2_0) ((float*)&y)[1] = ((float*)py)[1];
+ faddd %f34,%f58,%f60 ! (2_1) dtmp2 = y + y_hi;
+
+ fsqrtd %f26,%f24 ! (1_1) res = sqrt(res);
+ lda [%o1]%asi,%f26 ! (2_0) ((float*)&x)[0] = ((float*)px)[0];
+ faddd %f50,%f44,%f56 ! (2_1) dtmp1 = x + x_hi;
+
+ fmuld %f18,%f10,%f6 ! (3_1) x *= dnorm;
+ fsubd %f50,%f44,%f18 ! (2_1) x_lo = x - x_hi;
+ lda [%o1+4]%asi,%f27 ! (2_0) ((float*)&x)[1] = ((float*)px)[1];
+
+ fmuld %f58,%f58,%f44 ! (2_1) dtmp0 = y_hi * y_hi;
+ fsubd %f34,%f58,%f22 ! (2_1) y_lo = y - y_hi;
+
+ fmuld %f30,%f10,%f58 ! (3_1) y *= dnorm;
+ fabsd %f28,%f34 ! (2_0) y = fabs(y);
+
+ fmuld %f16,%f12,%f16 ! (0_1) res = dmax * res;
+ fabsd %f26,%f50 ! (2_0) x = fabs(x);
+ st %f16,[%g5] ! (0_1) ((float*)pz)[0] = ((float*)&res)[0];
+
+ fmuld %f56,%f18,%f10 ! (2_1) dtmp1 *= x_lo;
+ st %f17,[%g5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res)[1];
+ fcmped %fcc1,%f40,%f4 ! (0_0) dmax ? y
+
+ fmuld %f60,%f22,%f12 ! (2_1) dtmp2 *= y_lo;
+
+ fmovdg %fcc1,%f40,%f4 ! (0_0) if ( dmax < y ) dmax = y;
+
+ faddd %f6,D2ON28,%f56 ! (3_1) x_hi = x + D2ON28;
+ fcmple32 DC1,%f50,%o3 ! (2_0) c0 = vis_fcmple32(DC1,x);
+
+ faddd %f58,D2ON28,%f28 ! (3_1) y_hi = y + D2ON28;
+ fcmple32 DC1,%f34,%o0 ! (2_0) c2 = vis_fcmple32(DC1,y);
+
+ faddd %f2,%f44,%f30 ! (2_1) res += dtmp0;
+ fcmpgt32 DC2,%f50,%o4 ! (2_0) c1 = vis_fcmpgt32(DC2,x);
+
+ faddd %f10,%f12,%f26 ! (2_1) dtmp1 += dtmp2;
+ fcmpgt32 DC2,%f34,%o5 ! (2_0) c3 = vis_fcmpgt32(DC2,y);
+
+ fand %f4,DC0,%f16 ! (0_0) dmax = vis_fand(dmax,DC0);
+
+ or %o3,%o0,%o3 ! (2_0) c0 |= c2;
+ fsubd %f56,D2ON28,%f18 ! (3_1) x_hi -= D2ON28;
+
+ andcc %o3,2,%g0 ! (2_0) c0 & 2
+ bnz,pn %icc,.update16 ! (2_0) if ( (c0 & 2) != 0 )
+ fsubd %f28,D2ON28,%f4 ! (3_1) y_hi -= D2ON28;
+.cont16:
+ and %o4,%o5,%o4 ! (2_0) c1 &= c3;
+ faddd %f30,%f26,%f12 ! (2_1) res += dtmp1;
+
+ add %i3,stridey,%l4 ! py += stridey
+ andcc %o4,2,%g0 ! (2_0) c1 & 2
+ bnz,pn %icc,.update17 ! (2_0) if ( (c1 & 2) != 0 )
+ fmovd %f36,%f56 ! (1_0) dmax = x;
+.cont17:
+ lda [%l4]%asi,%f30 ! (3_0) ((float*)&y)[0] = ((float*)py)[0];
+ add %o1,stridex,%l2 ! px += stridex
+ fpsub32 DC1,%f16,%f44 ! (0_0) dnorm = vis_fpsub32(DC1,dmax);
+
+ fmuld %f18,%f18,%f60 ! (3_1) res = x_hi * x_hi;
+ add %g5,stridez,%i5 ! pz += stridez
+ lda [%l4+4]%asi,%f31 ! (3_0) ((float*)&y)[1] = ((float*)py)[1];
+ faddd %f58,%f4,%f32 ! (3_1) dtmp2 = y + y_hi;
+
+ fsqrtd %f12,%f12 ! (2_1) res = sqrt(res);
+ subcc counter,4,counter ! counter -= 4;
+ bpos,pt %icc,.main_loop
+ faddd %f6,%f18,%f28 ! (3_1) dtmp1 = x + x_hi;
+
+ add counter,4,counter
+
+.tail:
+ subcc counter,1,counter
+ bneg,a .begin
+ nop
+
+ fsubd %f6,%f18,%f20 ! (3_2) x_lo = x - x_hi;
+
+ fmuld %f4,%f4,%f22 ! (3_2) dtmp0 = y_hi * y_hi;
+ fsubd %f58,%f4,%f58 ! (3_2) y_lo = y - y_hi;
+
+ fmuld %f38,%f24,%f10 ! (1_2) res = dmax * res;
+ st %f10,[%i5] ! (1_2) ((float*)pz)[0] = ((float*)&res)[0];
+
+ st %f11,[%i5+4] ! (1_2) ((float*)pz)[1] = ((float*)&res)[1];
+
+ subcc counter,1,counter
+ bneg,a .begin
+ add %i5,stridez,%i5
+
+ fmuld %f28,%f20,%f28 ! (3_2) dtmp1 *= x_lo;
+
+ fmuld %f32,%f58,%f24 ! (3_2) dtmp2 *= y_lo;
+
+ faddd %f60,%f22,%f22 ! (3_2) res += dtmp0;
+
+ faddd %f28,%f24,%f26 ! (3_2) dtmp1 += dtmp2;
+
+ faddd %f22,%f26,%f28 ! (3_2) res += dtmp1;
+
+ add %i5,stridez,%l6 ! pz += stridez
+
+ fsqrtd %f28,%f4 ! (3_2) res = sqrt(res);
+ add %l2,stridex,%l1 ! px += stridex
+
+ fmuld %f52,%f12,%f12 ! (2_2) res = dmax * res;
+ st %f12,[%l6] ! (2_2) ((float*)pz)[0] = ((float*)&res)[0];
+
+ st %f13,[%l6+4] ! (2_2) ((float*)pz)[1] = ((float*)&res)[1];
+
+ subcc counter,1,counter
+ bneg .begin
+ add %l6,stridez,%i5
+
+ fmuld %f14,%f4,%f14 ! (3_2) res = dmax * res;
+ st %f14,[%i5] ! (3_2) ((float*)pz)[0] = ((float*)&res)[0];
+
+ st %f15,[%i5+4] ! (3_2) ((float*)pz)[1] = ((float*)&res)[1];
+
+ ba .begin
+ add %i5,stridez,%i5
+
+ .align 16
+.spec0:
+ ld [%i1+4],%l1 ! lx = ((int*)px)[1];
+ cmp %o2,%o4 ! j0 ? 0x7ff00000
+ bge,pn %icc,1f ! if ( j0 >= 0x7ff00000 )
+ fabsd %f26,%f26 ! x = fabs(x);
+
+ sub %o0,%l4,%o0 ! diff = hy - hx;
+ fabsd %f24,%f24 ! y = fabs(y);
+
+ sra %o0,31,%l4 ! j0 = diff >> 31;
+
+ xor %o0,%l4,%o0 ! diff ^ j0
+
+ sethi %hi(0x03600000),%l1
+ sub %o0,%l4,%o0 ! (diff ^ j0) - j0
+
+ cmp %o0,%l1 ! ((diff ^ j0) - j0) ? 0x03600000
+ bge,a,pn %icc,2f ! if ( ((diff ^ j0) - j0) >= 0x03600000 )
+ faddd %f26,%f24,%f24 ! *pz = x + y
+
+ fmuld %f26,DC2,%f36 ! (1_1) x *= dnorm;
+
+ fmuld %f24,DC2,%f56 ! (1_1) y *= dnorm;
+
+ faddd %f36,D2ON28,%f58 ! (1_1) x_hi = x + D2ON28;
+
+ faddd %f56,D2ON28,%f22 ! (1_1) y_hi = y + D2ON28;
+
+ fsubd %f58,D2ON28,%f58 ! (1_1) x_hi -= D2ON28;
+
+ fsubd %f22,D2ON28,%f22 ! (1_1) y_hi -= D2ON28;
+
+ fmuld %f58,%f58,%f60 ! (1_1) res = x_hi * x_hi;
+ faddd %f56,%f22,%f28 ! (1_1) dtmp2 = y + y_hi;
+
+ faddd %f36,%f58,%f6 ! (1_1) dtmp1 = x + x_hi;
+
+ fsubd %f36,%f58,%f58 ! (1_1) x_lo = x - x_hi;
+
+ fmuld %f22,%f22,%f2 ! (1_1) dtmp0 = y_hi * y_hi;
+ fsubd %f56,%f22,%f56 ! (1_1) y_lo = y - y_hi;
+
+ fmuld %f6,%f58,%f10 ! (1_1) dtmp1 *= x_lo;
+
+ fmuld %f28,%f56,%f26 ! (1_1) dtmp2 *= y_lo;
+
+ faddd %f60,%f2,%f24 ! (1_1) res += dtmp0;
+
+ faddd %f10,%f26,%f28 ! (1_1) dtmp1 += dtmp2;
+
+ faddd %f24,%f28,%f26 ! (1_1) res += dtmp1;
+
+ fsqrtd %f26,%f24 ! (1_1) res = sqrt(res);
+
+ fmuld DC3,%f24,%f24 ! (1_2) res = dmax * res;
+2:
+ add %i3,stridey,%i3
+ add %i1,stridex,%i1
+ st %f24,[%i5] ! ((float*)pz)[0] = ((float*)&res)[0];
+ st %f25,[%i5+4] ! ((float*)pz)[1] = ((float*)&res)[1];
+
+ add %i5,stridez,%i5
+ ba .begin1
+ sub counter,1,counter
+
+1:
+ ld [%i3+4],%l2 ! ly = ((int*)py)[1];
+ cmp %o0,%o4 ! hx ? 0x7ff00000
+ bne,pn %icc,1f ! if ( hx != 0x7ff00000 )
+ fabsd %f24,%f24 ! y = fabs(y);
+
+ cmp %l1,0 ! lx ? 0
+ be,pn %icc,2f ! if ( lx == 0 )
+ nop
+1:
+ cmp %l4,%o4 ! hy ? 0x7ff00000
+ bne,pn %icc,1f ! if ( hy != 0x7ff00000 )
+ nop
+
+ cmp %l2,0 ! ly ? 0
+ be,pn %icc,2f ! if ( ly == 0 )
+ nop
+1:
+ add %i3,stridey,%i3
+ add %i1,stridex,%i1
+ fmuld %f26,%f24,%f24 ! res = x * y;
+ st %f24,[%i5] ! ((float*)pz)[0] = ((float*)&res)[0];
+
+ st %f25,[%i5+4] ! ((float*)pz)[1] = ((float*)&res)[1];
+
+ add %i5,stridez,%i5
+ ba .begin1
+ sub counter,1,counter
+
+2:
+ add %i1,stridex,%i1
+ add %i3,stridey,%i3
+ st DC0_HI,[%i5] ! ((int*)pz)[0] = 0x7ff00000;
+ st DC0_LO,[%i5+4] ! ((int*)pz)[1] = 0;
+ fcmpd %f26,%f24 ! x ? y
+
+ add %i5,stridez,%i5
+ ba .begin1
+ sub counter,1,counter
+
+ .align 16
+.spec1:
+ fmuld %f26,DC3,%f36 ! (1_1) x *= dnorm;
+
+ fmuld %f24,DC3,%f56 ! (1_1) y *= dnorm;
+
+ faddd %f36,D2ON28,%f58 ! (1_1) x_hi = x + D2ON28;
+
+ faddd %f56,D2ON28,%f22 ! (1_1) y_hi = y + D2ON28;
+
+ fsubd %f58,D2ON28,%f58 ! (1_1) x_hi -= D2ON28;
+
+ fsubd %f22,D2ON28,%f22 ! (1_1) y_hi -= D2ON28;
+
+ fmuld %f58,%f58,%f60 ! (1_1) res = x_hi * x_hi;
+ faddd %f56,%f22,%f28 ! (1_1) dtmp2 = y + y_hi;
+
+ faddd %f36,%f58,%f6 ! (1_1) dtmp1 = x + x_hi;
+
+ fsubd %f36,%f58,%f58 ! (1_1) x_lo = x - x_hi;
+
+ fmuld %f22,%f22,%f2 ! (1_1) dtmp0 = y_hi * y_hi;
+ fsubd %f56,%f22,%f56 ! (1_1) y_lo = y - y_hi;
+
+ fmuld %f6,%f58,%f10 ! (1_1) dtmp1 *= x_lo;
+
+ fmuld %f28,%f56,%f26 ! (1_1) dtmp2 *= y_lo;
+
+ faddd %f60,%f2,%f24 ! (1_1) res += dtmp0;
+
+ faddd %f10,%f26,%f28 ! (1_1) dtmp1 += dtmp2;
+
+ faddd %f24,%f28,%f26 ! (1_1) res += dtmp1;
+
+ fsqrtd %f26,%f24 ! (1_1) res = sqrt(res);
+
+ fmuld DC2,%f24,%f24 ! (1_2) res = dmax * res;
+
+ add %i3,stridey,%i3
+ add %i1,stridex,%i1
+ st %f24,[%i5] ! ((float*)pz)[0] = ((float*)&res)[0];
+
+ st %f25,[%i5+4] ! ((float*)pz)[1] = ((float*)&res)[1];
+ add %i5,stridez,%i5
+ ba .begin1
+ sub counter,1,counter
+
+ .align 16
+.update0:
+ fzero %f50
+ cmp counter,1
+ ble .cont0
+ fzero %f34
+
+ mov %o1,tmp_px
+ mov %i3,tmp_py
+
+ sub counter,1,tmp_counter
+ ba .cont0
+ mov 1,counter
+
+ .align 16
+.update1:
+ fzero %f50
+ cmp counter,1
+ ble .cont1
+ fzero %f34
+
+ mov %o1,tmp_px
+ mov %i3,tmp_py
+
+ sub counter,1,tmp_counter
+ ba .cont1
+ mov 1,counter
+
+ .align 16
+.update2:
+ fzero %f18
+ cmp counter,2
+ ble .cont2
+ fzero %f30
+
+ mov %l2,tmp_px
+ mov %l4,tmp_py
+
+ sub counter,2,tmp_counter
+ ba .cont1
+ mov 2,counter
+
+ .align 16
+.update3:
+ fzero %f18
+ cmp counter,2
+ ble .cont3
+ fzero %f30
+
+ mov %l2,tmp_px
+ mov %l4,tmp_py
+
+ sub counter,2,tmp_counter
+ ba .cont3
+ mov 2,counter
+
+ .align 16
+.update4:
+ fzero %f20
+ cmp counter,3
+ ble .cont4
+ fzero %f40
+
+ mov %l1,tmp_px
+ mov %i3,tmp_py
+
+ sub counter,3,tmp_counter
+ ba .cont4
+ mov 3,counter
+
+ .align 16
+.update5:
+ fzero %f20
+ cmp counter,3
+ ble .cont5
+ fzero %f40
+
+ mov %l1,tmp_px
+ mov %i3,tmp_py
+
+ sub counter,3,tmp_counter
+ ba .cont5
+ mov 3,counter
+
+ .align 16
+.update6:
+ fzero %f36
+ cmp counter,4
+ ble .cont6
+ fzero %f54
+
+ mov %l7,tmp_px
+ mov %l2,tmp_py
+
+ sub counter,4,tmp_counter
+ ba .cont6
+ mov 4,counter
+
+ .align 16
+.update7:
+ fzero %f36
+ cmp counter,4
+ ble .cont7
+ fzero %f54
+
+ mov %l7,tmp_px
+ mov %l2,tmp_py
+
+ sub counter,4,tmp_counter
+ ba .cont7
+ mov 4,counter
+
+ .align 16
+.update8:
+ fzero %f50
+ cmp counter,5
+ ble .cont8
+ fzero %f34
+
+ mov %o1,tmp_px
+ mov %i3,tmp_py
+
+ sub counter,5,tmp_counter
+ ba .cont8
+ mov 5,counter
+
+ .align 16
+.update9:
+ fzero %f50
+ cmp counter,5
+ ble .cont9
+ fzero %f34
+
+ mov %o1,tmp_px
+ mov %i3,tmp_py
+
+ sub counter,5,tmp_counter
+ ba .cont9
+ mov 5,counter
+
+
+ .align 16
+.update10:
+ fzero %f18
+ cmp counter,2
+ ble .cont10
+ fzero %f30
+
+ mov %l2,tmp_px
+ mov %l4,tmp_py
+
+ sub counter,2,tmp_counter
+ ba .cont10
+ mov 2,counter
+
+ .align 16
+.update11:
+ fzero %f18
+ cmp counter,2
+ ble .cont11
+ fzero %f30
+
+ mov %l2,tmp_px
+ mov %l4,tmp_py
+
+ sub counter,2,tmp_counter
+ ba .cont11
+ mov 2,counter
+
+ .align 16
+.update12:
+ fzero %f20
+ cmp counter,3
+ ble .cont12
+ fzero %f40
+
+ mov %l1,tmp_px
+ mov %i3,tmp_py
+
+ sub counter,3,tmp_counter
+ ba .cont12
+ mov 3,counter
+
+ .align 16
+.update13:
+ fzero %f20
+ cmp counter,3
+ ble .cont13
+ fzero %f40
+
+ mov %l1,tmp_px
+ mov %i3,tmp_py
+
+ sub counter,3,tmp_counter
+ ba .cont13
+ mov 3,counter
+
+ .align 16
+.update14:
+ fzero %f54
+ cmp counter,4
+ ble .cont14
+ fzero %f36
+
+ mov %l7,tmp_px
+ mov %l2,tmp_py
+
+ sub counter,4,tmp_counter
+ ba .cont14
+ mov 4,counter
+
+ .align 16
+.update15:
+ fzero %f54
+ cmp counter,4
+ ble .cont15
+ fzero %f36
+
+ mov %l7,tmp_px
+ mov %l2,tmp_py
+
+ sub counter,4,tmp_counter
+ ba .cont15
+ mov 4,counter
+
+ .align 16
+.update16:
+ fzero %f50
+ cmp counter,5
+ ble .cont16
+ fzero %f34
+
+ mov %o1,tmp_px
+ mov %i3,tmp_py
+
+ sub counter,5,tmp_counter
+ ba .cont16
+ mov 5,counter
+
+ .align 16
+.update17:
+ fzero %f50
+ cmp counter,5
+ ble .cont17
+ fzero %f34
+
+ mov %o1,tmp_px
+ mov %i3,tmp_py
+
+ sub counter,5,tmp_counter
+ ba .cont17
+ mov 5,counter
+
+ .align 16
+.exit:
+ ret
+ restore
+ SET_SIZE(__vhypot)
+
diff --git a/usr/src/libm/src/mvec/vis/__vhypotf.S b/usr/src/libm/src/mvec/vis/__vhypotf.S
new file mode 100644
index 0000000..7bfddc3
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vhypotf.S
@@ -0,0 +1,1226 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vhypotf.S 1.6 06/01/23 SMI"
+
+ .file "__vhypotf.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 64
+
+.CONST_TBL:
+ .word 0x3fe00001, 0x80007e00 ! K1 = 5.00000715259318464227e-01
+ .word 0xbfc00003, 0xc0017a01 ! K2 = -1.25000447037521686593e-01
+ .word 0x000fffff, 0xffffffff ! DC0 = 0x000fffffffffffff
+ .word 0x3ff00000, 0x00000000 ! DC1 = 0x3ff0000000000000
+ .word 0x7ffff000, 0x00000000 ! DC2 = 0x7ffff00000000000
+ .word 0x7fe00000, 0x00000000 ! DA0 = 0x7fe0000000000000
+ .word 0x47efffff, 0xe0000000 ! DFMAX = 3.402823e+38
+ .word 0x7f7fffff, 0x80808080 ! FMAX = 3.402823e+38 , SCALE = 0x80808080
+ .word 0x20000000, 0x00000000 ! DA1 = 0x2000000000000000
+
+#define DC0 %f12
+#define DC1 %f10
+#define DC2 %f42
+#define DA0 %f6
+#define DA1 %f4
+#define K2 %f26
+#define K1 %f28
+#define SCALE %f3
+#define FMAX %f2
+#define DFMAX %f50
+
+#define stridex %l6
+#define stridey %i4
+#define stridez %l5
+#define _0x7fffffff %o1
+#define _0x7f3504f3 %o2
+#define _0x1ff0 %l2
+#define TBL %l1
+
+#define counter %l0
+
+#define tmp_px STACK_BIAS-0x30
+#define tmp_py STACK_BIAS-0x28
+#define tmp_counter STACK_BIAS-0x20
+#define tmp0 STACK_BIAS-0x18
+#define tmp1 STACK_BIAS-0x10
+#define tmp2 STACK_BIAS-0x0c
+#define tmp3 STACK_BIAS-0x08
+#define tmp4 STACK_BIAS-0x04
+
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x30
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! !!!!! algorithm !!!!!
+! hx0 = *(int*)px;
+! x0 = *px;
+! px += stridex;
+!
+! hy0 = *(int*)py;
+! y0 = *py;
+! py += stridey;
+!
+! hx0 &= 0x7fffffff;
+! hy0 &= 0x7fffffff;
+!
+! if ( hx >= 0x7f3504f3 || hy >= 0x7f3504f3 )
+! {
+! if ( hx >= 0x7f800000 || hy >= 0x7f800000 )
+! {
+! if ( hx == 0x7f800000 || hy == 0x7f800000 )
+! *(int*)pz = 0x7f800000;
+! else *pz = x * y;
+! }
+! else
+! {
+! hyp = sqrt(x * (double)x + y * (double)y);
+! if ( hyp <= DMAX ) ftmp0 = (float)hyp;
+! else ftmp0 = FMAX * FMAX;
+! *pz = ftmp0;
+! }
+! pz += stridez;
+! continue;
+! }
+! if ( (hx | hy) == 0 )
+! {
+! *pz = 0;
+! pz += stridez;
+! continue;
+! }
+! dx0 = x0 * (double)x0;
+! dy0 = y0 * (double)y0;
+! db0 = dx0 + dy0;
+!
+! iexp0 = ((int*)&db0)[0];
+!
+! h0 = vis_fand(db0,DC0);
+! h0 = vis_for(h0,DC1);
+! h_hi0 = vis_fand(h0,DC2);
+!
+! db0 = vis_fand(db0,DA0);
+! db0 = vis_fmul8x16(SCALE, db0);
+! db0 = vis_fpadd32(db0,DA1);
+!
+! iexp0 >>= 8;
+! di0 = iexp0 & 0x1ff0;
+! si0 = (char*)sqrt_arr + di0;
+!
+! dtmp0 = ((double*)((char*)div_arr + di0))[0];
+! xx0 = h0 - h_hi0;
+! xx0 *= dmp0;
+!
+! dtmp0 = ((double*)si0)[1];
+! res0 = K2 * xx0;
+! res0 += K1;
+! res0 *= xx0;
+! res0 += DC1;
+! res0 = dtmp0 * res0;
+! res0 *= db0;
+! ftmp0 = (float)res0;
+! *pz = ftmp0;
+! pz += stridez;
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ ENTRY(__vhypotf)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,.CONST_TBL,o3)
+ PIC_SET(l7,__vlibm_TBL_sqrtf,l1)
+
+#ifdef __sparcv9
+ ldx [%fp+STACK_BIAS+176],stridez
+#else
+ ld [%fp+STACK_BIAS+92],stridez
+#endif
+ st %i0,[%fp+tmp_counter]
+
+ stx %i1,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ ldd [%o3],K1
+ sethi %hi(0x7ffffc00),%o1
+
+ ldd [%o3+8],K2
+ sethi %hi(0x7f350400),%o2
+
+ ldd [%o3+16],DC0
+ add %o1,1023,_0x7fffffff
+ add %o2,0xf3,_0x7f3504f3
+
+ ldd [%o3+24],DC1
+ sll %i2,2,stridex
+
+ ld [%o3+56],FMAX
+
+ ldd [%o3+32],DC2
+ sll %i4,2,stridey
+
+ ldd [%o3+40],DA0
+ sll stridez,2,stridez
+
+ ldd [%o3+48],DFMAX
+
+ ld [%o3+60],SCALE
+ or %g0,0xff8,%l2
+
+ ldd [%o3+64],DA1
+ sll %l2,1,_0x1ff0
+ or %g0,%i5,%l7
+
+.begin:
+ ld [%fp+tmp_counter],counter
+ ldx [%fp+tmp_px],%i1
+ ldx [%fp+tmp_py],%i2
+ st %g0,[%fp+tmp_counter]
+.begin1:
+ cmp counter,0
+ ble,pn %icc,.exit
+ lda [%i1]0x82,%l3 ! (3_0) hx0 = *(int*)px;
+
+ lda [%i2]0x82,%l4 ! (3_0) hy0 = *(int*)py;
+
+ lda [%i1]0x82,%f17 ! (3_0) x0 = *px;
+ and %l3,_0x7fffffff,%l3 ! (3_0) hx0 &= 0x7fffffff;
+
+ cmp %l3,_0x7f3504f3 ! (3_0) hx ? 0x7f3504f3
+ bge,pn %icc,.spec ! (3_0) if ( hx >= 0x7f3504f3 )
+ and %l4,_0x7fffffff,%l4 ! (3_0) hy0 &= 0x7fffffff;
+
+ cmp %l4,_0x7f3504f3 ! (3_0) hy ? 0x7f3504f3
+ bge,pn %icc,.spec ! (3_0) if ( hy >= 0x7f3504f3 )
+ or %g0,%i2,%o7
+
+ orcc %l3,%l4,%g0
+ bz,pn %icc,.spec1
+
+ add %i1,stridex,%i1 ! px += stridex
+ fsmuld %f17,%f17,%f44 ! (3_0) dx0 = x0 * (double)x0;
+ lda [%i2]0x82,%f17 ! (3_0) y0 = *py;
+
+ lda [%i1]0x82,%l3 ! (4_0) hx0 = *(int*)px;
+
+ lda [stridey+%o7]0x82,%l4 ! (4_0) hy0 = *(int*)py;
+
+ and %l3,_0x7fffffff,%l3 ! (4_0) hx0 &= 0x7fffffff;
+
+ fsmuld %f17,%f17,%f24 ! (3_0) dy0 = y0 * (double)y0;
+ cmp %l3,_0x7f3504f3 ! (4_0) hx ? 0x7f3504f3
+ bge,pn %icc,.update0 ! (4_0) if ( hx >= 0x7f3504f3 )
+ and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff;
+
+ orcc %l3,%l4,%g0
+ bz,pn %icc,.update0
+ lda [%i1]0x82,%f17 ! (4_0) x0 = *px;
+.cont0:
+ faddd %f44,%f24,%f24 ! (3_0) db0 = dx0 + dy0;
+
+ fsmuld %f17,%f17,%f40 ! (4_1) dy0 = x0 * (double)x0;
+ cmp %l4,_0x7f3504f3 ! (4_1) hy ? 0x7f3504f3
+ lda [stridey+%o7]0x82,%f17 ! (4_1) hy0 = *py;
+
+ add %o7,stridey,%i5 ! py += stridey
+ lda [%i1+stridex]0x82,%l3 ! (0_0) hx0 = *(int*)px;
+
+ bge,pn %icc,.update1 ! (4_1) if ( hy >= 0x7f3504f3 )
+ st %f24,[%fp+tmp0] ! (3_1) iexp0 = ((int*)&db0)[0];
+.cont1:
+ and %l3,_0x7fffffff,%l3 ! (0_0) hx0 &= 0x7fffffff;
+
+ fsmuld %f17,%f17,%f48 ! (4_1) dy0 = y0 * (double)y0;
+ lda [%i1+stridex]0x82,%f8 ! (0_0) x0 = *px;
+
+ add %i1,stridex,%i1 ! px += stridex
+
+ lda [%i5+stridey]0x82,%l4 ! (0_0) hy0 = *(int*)py;
+ cmp %l3,_0x7f3504f3 ! (0_0) hx ? 0x7f3504f3
+ bge,pn %icc,.update2 ! (0_0) if ( hx >= 0x7f3504f3 )
+ add %i5,stridey,%o4 ! py += stridey
+.cont2:
+ faddd %f40,%f48,%f20 ! (4_1) db0 = dx0 + dy0;
+
+ fsmuld %f8,%f8,%f40 ! (0_0) dx0 = x0 * (double)x0;
+ and %l4,_0x7fffffff,%l4 ! (0_0) hy0 &= 0x7fffffff;
+ lda [%i5+stridey]0x82,%f17 ! (0_0) hy0 = *py;
+
+ cmp %l4,_0x7f3504f3 ! (0_0) hy ? 0x7f3504f3
+ bge,pn %icc,.update3 ! (0_0) if ( hy >= 0x7f3504f3 )
+ st %f20,[%fp+tmp1] ! (4_1) iexp0 = ((int*)&db0)[0];
+
+ orcc %l3,%l4,%g0
+ bz,pn %icc,.update3
+.cont3:
+ lda [%i1+stridex]0x82,%l3 ! (1_0) hx0 = *(int*)px;
+
+ fand %f24,DC0,%f60 ! (3_1) h0 = vis_fand(db0,DC0);
+
+ and %l3,_0x7fffffff,%l3 ! (1_0) hx0 &= 0x7fffffff;
+
+ fsmuld %f17,%f17,%f34 ! (0_0) dy0 = y0 * (double)y0;
+ cmp %l3,_0x7f3504f3 ! (1_0) hx ? 0x7f3504f3
+ lda [%o4+stridey]0x82,%l4 ! (1_0) hy0 = *(int*)py;
+
+ add %i1,stridex,%i1 ! px += stridex
+
+ lda [%i1]0x82,%f17 ! (1_0) x0 = *px;
+ bge,pn %icc,.update4 ! (1_0) if ( hx >= 0x7f3504f3 )
+ add %o4,stridey,%i5 ! py += stridey
+.cont4:
+ and %l4,_0x7fffffff,%l4 ! (1_0) hy0 &= 0x7fffffff;
+ for %f60,DC1,%f46 ! (3_1) h0 = vis_for(h0,DC1);
+
+ cmp %l4,_0x7f3504f3 ! (1_0) hy ? 0x7f3504f3
+ ld [%fp+tmp0],%o0 ! (3_1) iexp0 = ((int*)&db0)[0];
+ faddd %f40,%f34,%f0 ! (0_0) db0 = dx0 + dy0;
+
+ fsmuld %f17,%f17,%f40 ! (1_0) dx0 = x0 * (double)x0;
+ add %i1,stridex,%i1 ! px += stridex
+ lda [%o4+stridey]0x82,%f17 ! (1_0) y0 = *py;
+
+ srax %o0,8,%o0 ! (3_1) iexp0 >>= 8;
+ bge,pn %icc,.update5 ! (1_0) if ( hy >= 0x7f3504f3 )
+ fand %f46,DC2,%f38 ! (3_1) h_hi0 = vis_fand(h0,DC2);
+
+ orcc %l3,%l4,%g0
+ bz,pn %icc,.update5
+.cont5:
+ lda [%i1]0x82,%l3 ! (2_0) hx0 = *(int*)px;
+
+ and %o0,_0x1ff0,%o0 ! (3_1) di0 = iexp0 & 0x1ff0;
+ st %f0,[%fp+tmp2] ! (0_0) iexp0 = ((int*)&db0)[0];
+ fand %f20,DC0,%f60 ! (4_1) h0 = vis_fand(db0,DC0);
+
+ ldd [TBL+%o0],%f22 ! (3_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
+ fsubd %f46,%f38,%f38 ! (3_1) xx0 = h0 - h_hi0;
+
+ fsmuld %f17,%f17,%f32 ! (1_0) dy0 = y0 * (double)y0;
+ add %i5,stridey,%i2 ! py += stridey
+ lda [stridey+%i5]0x82,%l4 ! (2_0) hy0 = *(int*)py;
+
+ and %l3,_0x7fffffff,%l3 ! (2_0) hx0 &= 0x7fffffff;
+
+ lda [%i1]0x82,%f17 ! (2_0) x0 = *px;
+ cmp %l3,_0x7f3504f3 ! (2_0) hx ? 0x7f3504f3
+
+ fmuld %f38,%f22,%f38 ! (3_1) xx0 *= dmp0;
+ and %l4,_0x7fffffff,%l4 ! (2_0) hy0 &= 0x7fffffff;
+ for %f60,DC1,%f46 ! (4_1) h0 = vis_for(h0,DC1);
+
+ bge,pn %icc,.update6 ! (2_0) if ( hx >= 0x7f3504f3 )
+ ld [%fp+tmp1],%o3 ! (4_1) iexp0 = ((int*)&db0)[0];
+.cont6:
+ faddd %f40,%f32,%f18 ! (1_0) db0 = dx0 + dy0;
+
+ fsmuld %f17,%f17,%f44 ! (2_0) dx0 = x0 * (double)x0;
+ cmp %l4,_0x7f3504f3 ! (2_0) hy ? 0x7f3504f3
+ lda [stridey+%i5]0x82,%f17 ! (2_0) y0 = *py;
+
+ add %i1,stridex,%i1 ! px += stridex
+ bge,pn %icc,.update7 ! (2_0) if ( hy >= 0x7f3504f3 )
+ fand %f46,DC2,%f58 ! (4_1) h_hi0 = vis_fand(h0,DC2);
+
+ orcc %l3,%l4,%g0
+ bz,pn %icc,.update7
+ nop
+.cont7:
+ fmuld K2,%f38,%f56 ! (3_1) res0 = K2 * xx0;
+ srax %o3,8,%o3 ! (4_1) iexp0 >>= 8;
+ lda [%i1]0x82,%l3 ! (3_0) hx0 = *(int*)px;
+
+ and %o3,_0x1ff0,%o3 ! (4_1) di0 = iexp0 & 0x1ff0;
+ st %f18,[%fp+tmp3] ! (1_0) iexp0 = ((int*)&db0)[0];
+ fand %f0,DC0,%f60 ! (0_0) h0 = vis_fand(db0,DC0);
+
+ ldd [TBL+%o3],%f22 ! (4_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
+ add %i2,stridey,%o7 ! py += stridey
+ fsubd %f46,%f58,%f58 ! (4_1) xx0 = h0 - h_hi0;
+
+ fsmuld %f17,%f17,%f30 ! (2_0) dy0 = y0 * (double)y0;
+ lda [stridey+%i2]0x82,%l4 ! (3_0) hy0 = *(int*)py;
+ and %l3,_0x7fffffff,%l3 ! (3_0) hx0 &= 0x7fffffff;
+
+ faddd %f56,K1,%f54 ! (3_1) res0 += K1;
+ cmp %l3,_0x7f3504f3 ! (3_0) hx ? 0x7f3504f3
+
+ lda [%i1]0x82,%f17 ! (3_0) x0 = *px;
+ add %i1,stridex,%i1 ! px += stridex
+ bge,pn %icc,.update8 ! (3_0) if ( hx >= 0x7f3504f3 )
+
+ fmuld %f58,%f22,%f58 ! (4_1) xx0 *= dmp0;
+.cont8:
+ and %l4,_0x7fffffff,%l4 ! (3_0) hy0 &= 0x7fffffff;
+ for %f60,DC1,%f46 ! (0_0) h0 = vis_for(h0,DC1);
+
+ cmp %l4,_0x7f3504f3 ! (3_0) hy ? 0x7f3504f3
+ ld [%fp+tmp2],%g1 ! (0_0) iexp0 = ((int*)&db0)[0];
+ faddd %f44,%f30,%f30 ! (2_0) db0 = dx0 + dy0;
+
+ fsmuld %f17,%f17,%f44 ! (3_0) dx0 = x0 * (double)x0;
+ bge,pn %icc,.update9 ! (3_0) if ( hy >= 0x7f3504f3 )
+ lda [stridey+%i2]0x82,%f17 ! (3_0) y0 = *py;
+
+ orcc %l3,%l4,%g0
+ bz,pn %icc,.update9
+ nop
+.cont9:
+ fmuld %f54,%f38,%f40 ! (3_1) res0 *= xx0;
+ lda [%i1]0x82,%l3 ! (4_0) hx0 = *(int*)px;
+ fand %f46,DC2,%f38 ! (0_0) h_hi0 = vis_fand(h0,DC2);
+
+ fmuld K2,%f58,%f54 ! (4_1) res0 = K2 * xx0;
+ srax %g1,8,%o5 ! (0_0) iexp0 >>= 8;
+ lda [stridey+%o7]0x82,%l4 ! (4_0) hy0 = *(int*)py;
+ fand %f24,DA0,%f56 ! (3_1) db0 = vis_fand(db0,DA0);
+
+ and %o5,_0x1ff0,%o5 ! (0_0) di0 = iexp0 & 0x1ff0;
+ st %f30,[%fp+tmp4] ! (2_0) iexp0 = ((int*)&db0)[0];
+ fand %f18,DC0,%f60 ! (1_0) h0 = vis_fand(db0,DC0);
+
+ ldd [TBL+%o5],%f22 ! (0_0) dtmp0 = ((double*)((char*)div_arr + di0))[0];
+ add %o0,TBL,%g1 ! (3_1) si0 = (char*)sqrt_arr + di0;
+ and %l3,_0x7fffffff,%l3 ! (4_0) hx0 &= 0x7fffffff;
+ fsubd %f46,%f38,%f38 ! (0_0) xx0 = h0 - h_hi0;
+
+ fsmuld %f17,%f17,%f24 ! (3_0) dy0 = y0 * (double)y0;
+ cmp %l3,_0x7f3504f3 ! (4_0) hx ? 0x7f3504f3
+ bge,pn %icc,.update10 ! (4_0) if ( hx >= 0x7f3504f3 )
+ faddd %f40,DC1,%f40 ! (3_1) res0 += DC1;
+
+ fmul8x16 SCALE,%f56,%f36 ! (3_1) db0 = vis_fmul8x16(SCALE, db0);
+ and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff;
+ ldd [%g1+8],%f56 ! (3_1) dtmp0 = ((double*)si0)[1];
+ faddd %f54,K1,%f54 ! (4_1) res0 += K1;
+
+ lda [%i1]0x82,%f17 ! (4_0) x0 = *px;
+.cont10:
+ fmuld %f38,%f22,%f38 ! (0_0) xx0 *= dmp0;
+ cmp counter,5
+ for %f60,DC1,%f46 ! (1_0) h0 = vis_for(h0,DC1);
+
+ ld [%fp+tmp3],%g1 ! (1_0) iexp0 = ((int*)&db0)[0];
+ fmuld %f56,%f40,%f62 ! (3_1) res0 = dtmp0 * res0;
+ faddd %f44,%f24,%f24 ! (3_0) db0 = dx0 + dy0;
+
+ bl,pn %icc,.tail
+ nop
+
+ ba .main_loop
+ sub counter,5,counter
+
+ .align 16
+.main_loop:
+ fsmuld %f17,%f17,%f40 ! (4_1) dy0 = x0 * (double)x0;
+ cmp %l4,_0x7f3504f3 ! (4_1) hy ? 0x7f3504f3
+ lda [stridey+%o7]0x82,%f17 ! (4_1) hy0 = *py;
+ fpadd32 %f36,DA1,%f36 ! (3_2) db0 = vis_fpadd32(db0,DA1);
+
+ fmuld %f54,%f58,%f58 ! (4_2) res0 *= xx0;
+ add %o7,stridey,%i5 ! py += stridey
+ st %f24,[%fp+tmp0] ! (3_1) iexp0 = ((int*)&db0)[0];
+ fand %f46,DC2,%f44 ! (1_1) h_hi0 = vis_fand(h0,DC2);
+
+ fmuld K2,%f38,%f56 ! (0_1) res0 = K2 * xx0;
+ srax %g1,8,%g5 ! (1_1) iexp0 >>= 8;
+ bge,pn %icc,.update11 ! (4_1) if ( hy >= 0x7f3504f3 )
+ fand %f20,DA0,%f54 ! (4_2) db0 = vis_fand(db0,DA0);
+
+ orcc %l3,%l4,%g0
+ nop
+ bz,pn %icc,.update11
+ fzero %f52
+.cont11:
+ fmuld %f62,%f36,%f62 ! (3_2) res0 *= db0;
+ and %g5,_0x1ff0,%g5 ! (1_1) di0 = iexp0 & 0x1ff0;
+ lda [%i1+stridex]0x82,%l3 ! (0_0) hx0 = *(int*)px;
+ fand %f30,DC0,%f60 ! (2_1) h0 = vis_fand(db0,DC0);
+
+ ldd [%g5+TBL],%f22 ! (1_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
+ add %o3,TBL,%g1 ! (4_2) si0 = (char*)sqrt_arr + di0;
+ add %i1,stridex,%i0 ! px += stridex
+ fsubd %f46,%f44,%f44 ! (1_1) xx0 = h0 - h_hi0;
+
+ fsmuld %f17,%f17,%f48 ! (4_1) dy0 = y0 * (double)y0;
+ nop
+ lda [%i1+stridex]0x82,%f8 ! (0_0) x0 = *px;
+ faddd %f58,DC1,%f36 ! (4_2) res0 += DC1;
+
+ faddd %f56,K1,%f58 ! (0_1) res0 += K1;
+ and %l3,_0x7fffffff,%l3 ! (0_0) hx0 &= 0x7fffffff;
+ ldd [%g1+8],%f56 ! (4_2) dtmp0 = ((double*)si0)[1];
+ fmul8x16 SCALE,%f54,%f54 ! (4_2) db0 = vis_fmul8x16(SCALE, db0);
+
+ lda [%i5+stridey]0x82,%l4 ! (0_0) hy0 = *(int*)py;
+ cmp %l3,_0x7f3504f3 ! (0_0) hx ? 0x7f3504f3
+ bge,pn %icc,.update12 ! (0_0) if ( hx >= 0x7f3504f3 )
+ fdtos %f62,%f14 ! (3_2) ftmp0 = (float)res0;
+.cont12:
+ fmuld %f44,%f22,%f44 ! (1_1) xx0 *= dmp0;
+ add %l7,stridez,%o7 ! pz += stridez
+ st %f14,[%l7] ! (3_2) *pz = ftmp0;
+ for %f60,DC1,%f46 ! (2_1) h0 = vis_for(h0,DC1);
+
+ fmuld %f56,%f36,%f36 ! (4_2) res0 = dtmp0 * res0;
+ add %i5,stridey,%o4 ! py += stridey
+ ld [%fp+tmp4],%g1 ! (2_1) iexp0 = ((int*)&db0)[0];
+ faddd %f40,%f48,%f20 ! (4_1) db0 = dx0 + dy0;
+
+ fsmuld %f8,%f8,%f40 ! (0_0) dx0 = x0 * (double)x0;
+ and %l4,_0x7fffffff,%l4 ! (0_0) hy0 &= 0x7fffffff;
+ lda [%i5+stridey]0x82,%f17 ! (0_0) hy0 = *py;
+ fpadd32 %f54,DA1,%f62 ! (4_2) db0 = vis_fpadd32(db0,DA1);
+
+ fmuld %f58,%f38,%f38 ! (0_1) res0 *= xx0;
+ cmp %l4,_0x7f3504f3 ! (0_0) hy ? 0x7f3504f3
+ st %f20,[%fp+tmp1] ! (4_1) iexp0 = ((int*)&db0)[0];
+ fand %f46,DC2,%f58 ! (2_1) h_hi0 = vis_fand(h0,DC2);
+
+ fmuld K2,%f44,%f56 ! (1_1) res0 = K2 * xx0;
+ srax %g1,8,%g1 ! (2_1) iexp0 >>= 8;
+ bge,pn %icc,.update13 ! (0_0) if ( hy >= 0x7f3504f3 )
+ fand %f0,DA0,%f54 ! (0_1) db0 = vis_fand(db0,DA0);
+
+ orcc %l3,%l4,%g0
+ nop
+ bz,pn %icc,.update13
+ fzero %f52
+.cont13:
+ fmuld %f36,%f62,%f62 ! (4_2) res0 *= db0;
+ and %g1,_0x1ff0,%g1 ! (2_1) di0 = iexp0 & 0x1ff0;
+ lda [%i0+stridex]0x82,%l3 ! (1_0) hx0 = *(int*)px;
+ fand %f24,DC0,%f60 ! (3_1) h0 = vis_fand(db0,DC0);
+
+ ldd [TBL+%g1],%f22 ! (2_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
+ add %o5,TBL,%o0 ! (0_1) si0 = (char*)sqrt_arr + di0;
+ add %i0,stridex,%i1 ! px += stridex
+ fsubd %f46,%f58,%f58 ! (2_1) xx0 = h0 - h_hi0;
+
+ fsmuld %f17,%f17,%f34 ! (0_0) dy0 = y0 * (double)y0;
+ add %o7,stridez,%i0 ! pz += stridez
+ lda [%o4+stridey]0x82,%l4 ! (1_0) hy0 = *(int*)py;
+ faddd %f38,DC1,%f36 ! (0_1) res0 += DC1;
+
+ faddd %f56,K1,%f38 ! (1_1) res0 += K1;
+ and %l3,_0x7fffffff,%l3 ! (1_0) hx0 &= 0x7fffffff;
+ ldd [%o0+8],%f56 ! (0_1) dtmp0 = ((double*)si0)[1];
+ fmul8x16 SCALE,%f54,%f54 ! (0_1) db0 = vis_fmul8x16(SCALE, db0);
+
+ lda [%i1]0x82,%f17 ! (1_0) x0 = *px;
+ cmp %l3,_0x7f3504f3 ! (1_0) hx ? 0x7f3504f3
+ bge,pn %icc,.update14 ! (1_0) if ( hx >= 0x7f3504f3 )
+ fdtos %f62,%f14 ! (4_2) ftmp0 = (float)res0;
+.cont14:
+ fmuld %f58,%f22,%f58 ! (2_1) xx0 *= dmp0;
+ and %l4,_0x7fffffff,%l4 ! (1_0) hy0 &= 0x7fffffff;
+ add %o4,stridey,%i5 ! py += stridey
+ for %f60,DC1,%f46 ! (3_1) h0 = vis_for(h0,DC1);
+
+ fmuld %f56,%f36,%f36 ! (0_1) res0 = dtmp0 * res0;
+ cmp %l4,_0x7f3504f3 ! (1_0) hy ? 0x7f3504f3
+ ld [%fp+tmp0],%o0 ! (3_1) iexp0 = ((int*)&db0)[0];
+ faddd %f40,%f34,%f0 ! (0_0) db0 = dx0 + dy0;
+
+ fsmuld %f17,%f17,%f40 ! (1_0) dx0 = x0 * (double)x0;
+ add %i1,stridex,%i1 ! px += stridex
+ lda [%o4+stridey]0x82,%f17 ! (1_0) y0 = *py;
+ fpadd32 %f54,DA1,%f62 ! (0_1) db0 = vis_fpadd32(db0,DA1);
+
+ fmuld %f38,%f44,%f44 ! (1_1) res0 *= xx0;
+ st %f14,[%o7] ! (4_2) *pz = ftmp0;
+ bge,pn %icc,.update15 ! (1_0) if ( hy >= 0x7f3504f3 )
+ fand %f46,DC2,%f38 ! (3_1) h_hi0 = vis_fand(h0,DC2);
+
+ orcc %l3,%l4,%g0
+ bz,pn %icc,.update15
+ nop
+.cont15:
+ fmuld K2,%f58,%f54 ! (2_1) res0 = K2 * xx0;
+ srax %o0,8,%o0 ! (3_1) iexp0 >>= 8;
+ st %f0,[%fp+tmp2] ! (0_0) iexp0 = ((int*)&db0)[0];
+ fand %f18,DA0,%f56 ! (1_1) db0 = vis_fand(db0,DA0);
+
+ fmuld %f36,%f62,%f62 ! (0_1) res0 *= db0;
+ and %o0,_0x1ff0,%o0 ! (3_1) di0 = iexp0 & 0x1ff0;
+ lda [%i1]0x82,%l3 ! (2_0) hx0 = *(int*)px;
+ fand %f20,DC0,%f60 ! (4_1) h0 = vis_fand(db0,DC0);
+
+ ldd [TBL+%o0],%f22 ! (3_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
+ add %g5,TBL,%o3 ! (1_1) si0 = (char*)sqrt_arr + di0;
+ add %i0,stridez,%i3 ! pz += stridez
+ fsubd %f46,%f38,%f38 ! (3_1) xx0 = h0 - h_hi0;
+
+ fsmuld %f17,%f17,%f32 ! (1_0) dy0 = y0 * (double)y0;
+ add %i5,stridey,%i2 ! py += stridey
+ lda [stridey+%i5]0x82,%l4 ! (2_0) hy0 = *(int*)py;
+ faddd %f44,DC1,%f44 ! (1_1) res0 += DC1;
+
+ fmul8x16 SCALE,%f56,%f36 ! (1_1) db0 = vis_fmul8x16(SCALE, db0);
+ and %l3,_0x7fffffff,%l3 ! (2_0) hx0 &= 0x7fffffff;
+ ldd [%o3+8],%f56 ! (1_1) dtmp0 = ((double*)si0)[1];
+ faddd %f54,K1,%f54 ! (2_1) res0 += K1;
+
+ lda [%i1]0x82,%f17 ! (2_0) x0 = *px;
+ cmp %l3,_0x7f3504f3 ! (2_0) hx ? 0x7f3504f3
+ add %i3,stridez,%o4 ! pz += stridez
+ fdtos %f62,%f14 ! (0_1) ftmp0 = (float)res0;
+
+ fmuld %f38,%f22,%f38 ! (3_1) xx0 *= dmp0;
+ and %l4,_0x7fffffff,%l4 ! (2_0) hy0 &= 0x7fffffff;
+ st %f14,[%i0] ! (0_1) *pz = ftmp0;
+ for %f60,DC1,%f46 ! (4_1) h0 = vis_for(h0,DC1);
+
+ fmuld %f56,%f44,%f62 ! (1_1) res0 = dtmp0 * res0;
+ bge,pn %icc,.update16 ! (2_0) if ( hx >= 0x7f3504f3 )
+ ld [%fp+tmp1],%o3 ! (4_1) iexp0 = ((int*)&db0)[0];
+ faddd %f40,%f32,%f18 ! (1_0) db0 = dx0 + dy0;
+.cont16:
+ fsmuld %f17,%f17,%f44 ! (2_0) dx0 = x0 * (double)x0;
+ cmp %l4,_0x7f3504f3 ! (2_0) hy ? 0x7f3504f3
+ lda [stridey+%i5]0x82,%f17 ! (2_0) y0 = *py;
+ fpadd32 %f36,DA1,%f36 ! (1_1) db0 = vis_fpadd32(db0,DA1);
+
+ fmuld %f54,%f58,%f54 ! (2_1) res0 *= xx0;
+ add %i1,stridex,%l7 ! px += stridex
+ bge,pn %icc,.update17 ! (2_0) if ( hy >= 0x7f3504f3 )
+ fand %f46,DC2,%f58 ! (4_1) h_hi0 = vis_fand(h0,DC2);
+
+ orcc %l3,%l4,%g0
+ nop
+ bz,pn %icc,.update17
+ fzero %f52
+.cont17:
+ fmuld K2,%f38,%f56 ! (3_1) res0 = K2 * xx0;
+ srax %o3,8,%o3 ! (4_1) iexp0 >>= 8;
+ st %f18,[%fp+tmp3] ! (1_0) iexp0 = ((int*)&db0)[0];
+ fand %f30,DA0,%f40 ! (2_1) db0 = vis_fand(db0,DA0);
+
+ fmuld %f62,%f36,%f62 ! (1_1) res0 *= db0;
+ and %o3,_0x1ff0,%o3 ! (4_1) di0 = iexp0 & 0x1ff0;
+ lda [%l7]0x82,%l3 ! (3_0) hx0 = *(int*)px;
+ fand %f0,DC0,%f60 ! (0_0) h0 = vis_fand(db0,DC0);
+
+ ldd [TBL+%o3],%f22 ! (4_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
+ add %g1,TBL,%g1 ! (2_1) si0 = (char*)sqrt_arr + di0;
+ add %i2,stridey,%o7 ! py += stridey
+ fsubd %f46,%f58,%f58 ! (4_1) xx0 = h0 - h_hi0;
+
+ fsmuld %f17,%f17,%f30 ! (2_0) dy0 = y0 * (double)y0;
+ lda [stridey+%i2]0x82,%l4 ! (3_0) hy0 = *(int*)py;
+ add %l7,stridex,%i1 ! px += stridex
+ faddd %f54,DC1,%f36 ! (2_1) res0 += DC1;
+
+ faddd %f56,K1,%f54 ! (3_1) res0 += K1;
+ and %l3,_0x7fffffff,%l3 ! (3_0) hx0 &= 0x7fffffff;
+ ldd [%g1+8],%f56 ! (2_1) dtmp0 = ((double*)si0)[1];
+ fmul8x16 SCALE,%f40,%f40 ! (2_1) db0 = vis_fmul8x16(SCALE, db0);
+
+ lda [%l7]0x82,%f17 ! (3_0) x0 = *px;
+ cmp %l3,_0x7f3504f3 ! (3_0) hx ? 0x7f3504f3
+ bge,pn %icc,.update18 ! (3_0) if ( hx >= 0x7f3504f3 )
+ fdtos %f62,%f14 ! (1_1) ftmp0 = (float)res0;
+.cont18:
+ fmuld %f58,%f22,%f58 ! (4_1) xx0 *= dmp0;
+ and %l4,_0x7fffffff,%l4 ! (3_0) hy0 &= 0x7fffffff;
+ st %f14,[%i3] ! (1_1) *pz = ftmp0;
+ for %f60,DC1,%f46 ! (0_0) h0 = vis_for(h0,DC1);
+
+ fmuld %f56,%f36,%f36 ! (2_1) res0 = dtmp0 * res0;
+ cmp %l4,_0x7f3504f3 ! (3_0) hy ? 0x7f3504f3
+ ld [%fp+tmp2],%g1 ! (0_0) iexp0 = ((int*)&db0)[0];
+ faddd %f44,%f30,%f30 ! (2_0) db0 = dx0 + dy0;
+
+ fsmuld %f17,%f17,%f44 ! (3_0) dx0 = x0 * (double)x0;
+ bge,pn %icc,.update19 ! (3_0) if ( hy >= 0x7f3504f3 )
+ lda [stridey+%i2]0x82,%f17 ! (3_0) y0 = *py;
+ fpadd32 %f40,DA1,%f62 ! (2_1) db0 = vis_fpadd32(db0,DA1);
+
+.cont19:
+ fmuld %f54,%f38,%f40 ! (3_1) res0 *= xx0;
+ orcc %l3,%l4,%g0
+ st %f30,[%fp+tmp4] ! (2_0) iexp0 = ((int*)&db0)[0];
+ fand %f46,DC2,%f38 ! (0_0) h_hi0 = vis_fand(h0,DC2);
+
+ fmuld K2,%f58,%f54 ! (4_1) res0 = K2 * xx0;
+ srax %g1,8,%o5 ! (0_0) iexp0 >>= 8;
+ lda [%i1]0x82,%l3 ! (4_0) hx0 = *(int*)px;
+ fand %f24,DA0,%f56 ! (3_1) db0 = vis_fand(db0,DA0);
+
+ fmuld %f36,%f62,%f62 ! (2_1) res0 *= db0;
+ and %o5,_0x1ff0,%o5 ! (0_0) di0 = iexp0 & 0x1ff0;
+ bz,pn %icc,.update19a
+ fand %f18,DC0,%f60 ! (1_0) h0 = vis_fand(db0,DC0);
+.cont19a:
+ ldd [TBL+%o5],%f22 ! (0_0) dtmp0 = ((double*)((char*)div_arr + di0))[0];
+ add %o0,TBL,%g1 ! (3_1) si0 = (char*)sqrt_arr + di0;
+ and %l3,_0x7fffffff,%l3 ! (4_0) hx0 &= 0x7fffffff;
+ fsubd %f46,%f38,%f38 ! (0_0) xx0 = h0 - h_hi0;
+
+ fsmuld %f17,%f17,%f24 ! (3_0) dy0 = y0 * (double)y0;
+ cmp %l3,_0x7f3504f3 ! (4_0) hx ? 0x7f3504f3
+ lda [stridey+%o7]0x82,%l4 ! (4_0) hy0 = *(int*)py;
+ faddd %f40,DC1,%f40 ! (3_1) res0 += DC1;
+
+ fmul8x16 SCALE,%f56,%f36 ! (3_1) db0 = vis_fmul8x16(SCALE, db0);
+ bge,pn %icc,.update20 ! (4_0) if ( hx >= 0x7f3504f3 )
+ ldd [%g1+8],%f56 ! (3_1) dtmp0 = ((double*)si0)[1];
+ faddd %f54,K1,%f54 ! (4_1) res0 += K1;
+
+ lda [%i1]0x82,%f17 ! (4_0) x0 = *px;
+.cont20:
+ subcc counter,5,counter ! counter -= 5
+ add %o4,stridez,%l7 ! pz += stridez
+ fdtos %f62,%f14 ! (2_1) ftmp0 = (float)res0;
+
+ fmuld %f38,%f22,%f38 ! (0_0) xx0 *= dmp0;
+ and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff;
+ st %f14,[%o4] ! (2_1) *pz = ftmp0;
+ for %f60,DC1,%f46 ! (1_0) h0 = vis_for(h0,DC1);
+
+ ld [%fp+tmp3],%g1 ! (1_0) iexp0 = ((int*)&db0)[0];
+ fmuld %f56,%f40,%f62 ! (3_1) res0 = dtmp0 * res0;
+ bpos,pt %icc,.main_loop
+ faddd %f44,%f24,%f24 ! (3_0) db0 = dx0 + dy0;
+
+ add counter,5,counter
+
+.tail:
+ subcc counter,1,counter
+ bneg .begin
+ nop
+
+ fpadd32 %f36,DA1,%f36 ! (3_2) db0 = vis_fpadd32(db0,DA1);
+
+ fmuld %f54,%f58,%f58 ! (4_2) res0 *= xx0;
+ fand %f46,DC2,%f44 ! (1_1) h_hi0 = vis_fand(h0,DC2);
+
+ fmuld K2,%f38,%f56 ! (0_1) res0 = K2 * xx0;
+ srax %g1,8,%g5 ! (1_1) iexp0 >>= 8;
+ fand %f20,DA0,%f54 ! (4_2) db0 = vis_fand(db0,DA0);
+
+ fmuld %f62,%f36,%f62 ! (3_2) res0 *= db0;
+ and %g5,_0x1ff0,%g5 ! (1_1) di0 = iexp0 & 0x1ff0;
+
+ ldd [%g5+TBL],%f22 ! (1_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
+ add %o3,TBL,%g1 ! (4_2) si0 = (char*)sqrt_arr + di0;
+ fsubd %f46,%f44,%f44 ! (1_1) xx0 = h0 - h_hi0;
+
+ faddd %f58,DC1,%f36 ! (4_2) res0 += DC1;
+
+ faddd %f56,K1,%f58 ! (0_1) res0 += K1;
+ ldd [%g1+8],%f56 ! (4_2) dtmp0 = ((double*)si0)[1];
+ fmul8x16 SCALE,%f54,%f54 ! (4_2) db0 = vis_fmul8x16(SCALE, db0);
+
+ fdtos %f62,%f14 ! (3_2) ftmp0 = (float)res0;
+
+ fmuld %f44,%f22,%f44 ! (1_1) xx0 *= dmp0;
+ add %l7,stridez,%o7 ! pz += stridez
+ st %f14,[%l7] ! (3_2) *pz = ftmp0;
+
+ subcc counter,1,counter
+ bneg .begin
+ or %g0,%o7,%l7
+
+ fmuld %f56,%f36,%f36 ! (4_2) res0 = dtmp0 * res0;
+
+ fpadd32 %f54,DA1,%f62 ! (4_2) db0 = vis_fpadd32(db0,DA1);
+
+ fmuld %f58,%f38,%f38 ! (0_1) res0 *= xx0;
+
+ fmuld K2,%f44,%f56 ! (1_1) res0 = K2 * xx0;
+ fand %f0,DA0,%f54 ! (0_1) db0 = vis_fand(db0,DA0);
+
+ fmuld %f36,%f62,%f62 ! (4_2) res0 *= db0;
+
+ add %o5,TBL,%o0 ! (0_1) si0 = (char*)sqrt_arr + di0;
+
+ faddd %f38,DC1,%f36 ! (0_1) res0 += DC1;
+
+ faddd %f56,K1,%f38 ! (1_1) res0 += K1;
+ ldd [%o0+8],%f56 ! (0_1) dtmp0 = ((double*)si0)[1];
+ fmul8x16 SCALE,%f54,%f54 ! (0_1) db0 = vis_fmul8x16(SCALE, db0);
+
+ add %o7,stridez,%i0 ! pz += stridez
+ fdtos %f62,%f14 ! (4_2) ftmp0 = (float)res0;
+
+ fmuld %f56,%f36,%f36 ! (0_1) res0 = dtmp0 * res0;
+
+ fpadd32 %f54,DA1,%f62 ! (0_1) db0 = vis_fpadd32(db0,DA1);
+
+ fmuld %f38,%f44,%f44 ! (1_1) res0 *= xx0;
+ add %i0,stridez,%i3 ! pz += stridez
+ st %f14,[%o7] ! (4_2) *pz = ftmp0;
+
+ subcc counter,1,counter
+ bneg .begin
+ or %g0,%i0,%l7
+
+ fand %f18,DA0,%f56 ! (1_1) db0 = vis_fand(db0,DA0);
+
+ fmuld %f36,%f62,%f62 ! (0_1) res0 *= db0;
+
+ add %g5,TBL,%o3 ! (1_1) si0 = (char*)sqrt_arr + di0;
+
+ faddd %f44,DC1,%f44 ! (1_1) res0 += DC1;
+
+ fmul8x16 SCALE,%f56,%f36 ! (1_1) db0 = vis_fmul8x16(SCALE, db0);
+ ldd [%o3+8],%f56 ! (1_1) dtmp0 = ((double*)si0)[1];
+
+ add %i3,stridez,%o4 ! pz += stridez
+ fdtos %f62,%f14 ! (0_1) ftmp0 = (float)res0;
+
+ st %f14,[%i0] ! (0_1) *pz = ftmp0;
+
+ subcc counter,1,counter
+ bneg .begin
+ or %g0,%i3,%l7
+
+ fmuld %f56,%f44,%f62 ! (1_1) res0 = dtmp0 * res0;
+
+ fpadd32 %f36,DA1,%f36 ! (1_1) db0 = vis_fpadd32(db0,DA1);
+
+ fmuld %f62,%f36,%f62 ! (1_1) res0 *= db0;
+
+ fdtos %f62,%f14 ! (1_1) ftmp0 = (float)res0;
+
+ st %f14,[%i3] ! (1_1) *pz = ftmp0;
+
+ ba .begin
+ or %g0,%o4,%l7
+
+ .align 16
+.spec1:
+ st %g0,[%l7] ! *pz = 0;
+ add %l7,stridez,%l7 ! pz += stridez
+
+ add %i2,stridey,%i2 ! py += stridey
+ ba .begin1
+ sub counter,1,counter ! counter--
+
+ .align 16
+.spec:
+ sethi %hi(0x7f800000),%i0
+ cmp %l3,%i0 ! hx ? 0x7f800000
+ bge,pt %icc,2f ! if ( hx >= 0x7f800000 )
+ ld [%i2],%f8
+
+ cmp %l4,%i0 ! hy ? 0x7f800000
+ bge,pt %icc,2f ! if ( hy >= 0x7f800000 )
+ nop
+
+ fsmuld %f17,%f17,%f44 ! x * (double)x
+ fsmuld %f8,%f8,%f24 ! y * (double)y
+ faddd %f44,%f24,%f24 ! x * (double)x + y * (double)y
+ fsqrtd %f24,%f24 ! hyp = sqrt(x * (double)x + y * (double)y);
+ fcmped %f24,DFMAX ! hyp ? DMAX
+ fbug,a 1f ! if ( hyp > DMAX )
+ fmuls FMAX,FMAX,%f20 ! ftmp0 = FMAX * FMAX;
+
+ fdtos %f24,%f20 ! ftmp0 = (float)hyp;
+1:
+ st %f20,[%l7] ! *pz = ftmp0;
+ add %l7,stridez,%l7 ! pz += stridez
+ add %i1,stridex,%i1 ! px += stridex
+
+ add %i2,stridey,%i2 ! py += stridey
+ ba .begin1
+ sub counter,1,counter ! counter--
+2:
+ fcmps %f17,%f8 ! exceptions
+ cmp %l3,%i0 ! hx ? 0x7f800000
+ be,a %icc,1f ! if ( hx == 0x7f800000 )
+ st %i0,[%l7] ! *(int*)pz = 0x7f800000;
+
+ cmp %l4,%i0 ! hy ? 0x7f800000
+ be,a %icc,1f ! if ( hy == 0x7f800000
+ st %i0,[%l7] ! *(int*)pz = 0x7f800000;
+
+ fmuls %f17,%f8,%f8 ! x * y
+ st %f8,[%l7] ! *pz = x * y;
+
+1:
+ add %l7,stridez,%l7 ! pz += stridez
+ add %i1,stridex,%i1 ! px += stridex
+
+ add %i2,stridey,%i2 ! py += stridey
+ ba .begin1
+ sub counter,1,counter ! counter--
+
+ .align 16
+.update0:
+ cmp counter,1
+ ble .cont0
+ fzeros %f17
+
+ stx %i1,[%fp+tmp_px]
+
+ add %o7,stridey,%i5
+ stx %i5,[%fp+tmp_py]
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont0
+ or %g0,1,counter
+
+ .align 16
+.update1:
+ cmp counter,1
+ ble .cont1
+ fzeros %f17
+
+ stx %i1,[%fp+tmp_px]
+ stx %i5,[%fp+tmp_py]
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont1
+ or %g0,1,counter
+
+ .align 16
+.update2:
+ cmp counter,2
+ ble .cont2
+ fzeros %f8
+
+ stx %i1,[%fp+tmp_px]
+ stx %o4,[%fp+tmp_py]
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont2
+ or %g0,2,counter
+
+ .align 16
+.update3:
+ cmp counter,2
+ ble .cont3
+ fzeros %f17
+
+ stx %i1,[%fp+tmp_px]
+ stx %o4,[%fp+tmp_py]
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont3
+ or %g0,2,counter
+
+ .align 16
+.update4:
+ cmp counter,3
+ ble .cont4
+ fzeros %f17
+
+ stx %i1,[%fp+tmp_px]
+ stx %i5,[%fp+tmp_py]
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont4
+ or %g0,3,counter
+
+ .align 16
+.update5:
+ cmp counter,3
+ ble .cont5
+ fzeros %f17
+
+ sub %i1,stridex,%i2
+ stx %i2,[%fp+tmp_px]
+ stx %i5,[%fp+tmp_py]
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont5
+ or %g0,3,counter
+
+ .align 16
+.update6:
+ cmp counter,4
+ ble .cont6
+ fzeros %f17
+
+ stx %i1,[%fp+tmp_px]
+ stx %i2,[%fp+tmp_py]
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont6
+ or %g0,4,counter
+
+ .align 16
+.update7:
+ cmp counter,4
+ ble .cont7
+ fzeros %f17
+
+ sub %i1,stridex,%o7
+ stx %o7,[%fp+tmp_px]
+ stx %i2,[%fp+tmp_py]
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont7
+ or %g0,4,counter
+
+ .align 16
+.update8:
+ cmp counter,5
+ ble .cont8
+ fzeros %f17
+
+ sub %i1,stridex,%o5
+ stx %o5,[%fp+tmp_px]
+ stx %o7,[%fp+tmp_py]
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont8
+ or %g0,5,counter
+
+ .align 16
+.update9:
+ cmp counter,5
+ ble .cont9
+ fzeros %f17
+
+ sub %i1,stridex,%o5
+ stx %o5,[%fp+tmp_px]
+ stx %o7,[%fp+tmp_py]
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont9
+ or %g0,5,counter
+
+ .align 16
+.update10:
+ fmul8x16 SCALE,%f56,%f36 ! (3_1) db0 = vis_fmul8x16(SCALE, db0);
+ and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff;
+ ldd [%g1+8],%f56 ! (3_1) dtmp0 = ((double*)si0)[1];
+ faddd %f54,K1,%f54 ! (4_1) res0 += K1;
+
+ cmp counter,6
+ ble .cont10
+ fzeros %f17
+
+ stx %i1,[%fp+tmp_px]
+ add %o7,stridey,%i5
+ stx %i5,[%fp+tmp_py]
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont10
+ or %g0,6,counter
+
+ .align 16
+.update11:
+ cmp counter,1
+ ble .cont11
+ fzeros %f17
+
+ stx %i1,[%fp+tmp_px]
+ stx %i5,[%fp+tmp_py]
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont11
+ or %g0,1,counter
+
+ .align 16
+.update12:
+ cmp counter,2
+ ble .cont12
+ fzeros %f8
+
+ stx %i0,[%fp+tmp_px]
+ add %i5,stridey,%o4
+ stx %o4,[%fp+tmp_py]
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont12
+ or %g0,2,counter
+
+ .align 16
+.update13:
+ cmp counter,2
+ ble .cont13
+ fzeros %f17
+
+ stx %i0,[%fp+tmp_px]
+ stx %o4,[%fp+tmp_py]
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont13
+ or %g0,2,counter
+
+ .align 16
+.update14:
+ cmp counter,3
+ ble .cont14
+ fzeros %f17
+
+ stx %i1,[%fp+tmp_px]
+ add %o4,stridey,%i5
+ stx %i5,[%fp+tmp_py]
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont14
+ or %g0,3,counter
+
+ .align 16
+.update15:
+ cmp counter,3
+ ble .cont15
+ fzeros %f17
+
+ sub %i1,stridex,%i2
+ stx %i2,[%fp+tmp_px]
+ stx %i5,[%fp+tmp_py]
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont15
+ or %g0,3,counter
+
+ .align 16
+.update16:
+ faddd %f40,%f32,%f18 ! (1_0) db0 = dx0 + dy0;
+ cmp counter,4
+ ble .cont16
+ fzeros %f17
+
+ stx %i1,[%fp+tmp_px]
+ stx %i2,[%fp+tmp_py]
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont16
+ or %g0,4,counter
+
+ .align 16
+.update17:
+ cmp counter,4
+ ble .cont17
+ fzeros %f17
+
+ stx %i1,[%fp+tmp_px]
+ stx %i2,[%fp+tmp_py]
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont17
+ or %g0,4,counter
+
+ .align 16
+.update18:
+ cmp counter,5
+ ble .cont18
+ fzeros %f17
+
+ stx %l7,[%fp+tmp_px]
+ stx %o7,[%fp+tmp_py]
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont18
+ or %g0,5,counter
+
+ .align 16
+.update19:
+ fpadd32 %f40,DA1,%f62 ! (2_1) db0 = vis_fpadd32(db0,DA1);
+ cmp counter,5
+ ble .cont19
+ fzeros %f17
+
+ stx %l7,[%fp+tmp_px]
+ stx %o7,[%fp+tmp_py]
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont19
+ or %g0,5,counter
+
+ .align 16
+.update19a:
+ cmp counter,5
+ ble .cont19a
+ fzeros %f17
+
+ stx %l7,[%fp+tmp_px]
+ stx %o7,[%fp+tmp_py]
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont19a
+ or %g0,5,counter
+
+ .align 16
+.update20:
+ faddd %f54,K1,%f54 ! (4_1) res0 += K1;
+ cmp counter,6
+ ble .cont20
+ fzeros %f17
+
+ stx %i1,[%fp+tmp_px]
+ add %o7,stridey,%g1
+ stx %g1,[%fp+tmp_py]
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont20
+ or %g0,6,counter
+
+.exit:
+ ret
+ restore
+ SET_SIZE(__vhypotf)
+
diff --git a/usr/src/libm/src/mvec/vis/__vlog.S b/usr/src/libm/src/mvec/vis/__vlog.S
new file mode 100644
index 0000000..bf5e478
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vlog.S
@@ -0,0 +1,670 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vlog.S 1.8 06/01/23 SMI"
+
+ .file "__vlog.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 32
+TBL:
+ .word 0xbfd522ae, 0x0738a000
+ .word 0xbd2ebe70, 0x8164c759
+ .word 0xbfd3c252, 0x77333000
+ .word 0xbd183b54, 0xb606bd5c
+ .word 0xbfd26962, 0x1134e000
+ .word 0x3d31b61f, 0x10522625
+ .word 0xbfd1178e, 0x8227e000
+ .word 0xbd31ef78, 0xce2d07f2
+ .word 0xbfcf991c, 0x6cb3c000
+ .word 0x3d390d04, 0xcd7cc834
+ .word 0xbfcd1037, 0xf2656000
+ .word 0x3d084a7e, 0x75b6f6e4
+ .word 0xbfca93ed, 0x3c8ae000
+ .word 0x3d287243, 0x50562169
+ .word 0xbfc823c1, 0x6551a000
+ .word 0xbd1e0ddb, 0x9a631e83
+ .word 0xbfc5bf40, 0x6b544000
+ .word 0x3d127023, 0xeb68981c
+ .word 0xbfc365fc, 0xb015a000
+ .word 0x3d3fd3a0, 0xafb9691b
+ .word 0xbfc1178e, 0x8227e000
+ .word 0xbd21ef78, 0xce2d07f2
+ .word 0xbfbda727, 0x63844000
+ .word 0xbd1a8940, 0x1fa71733
+ .word 0xbfb9335e, 0x5d594000
+ .word 0xbd23115c, 0x3abd47da
+ .word 0xbfb4d311, 0x5d208000
+ .word 0x3cf53a25, 0x82f4e1ef
+ .word 0xbfb08598, 0xb59e4000
+ .word 0x3d17e5dd, 0x7009902c
+ .word 0xbfa894aa, 0x149f8000
+ .word 0xbd39a19a, 0x8be97661
+ .word 0xbfa0415d, 0x89e78000
+ .word 0x3d3dddc7, 0xf461c516
+ .word 0xbf902056, 0x58930000
+ .word 0xbd3611d2, 0x7c8e8417
+ .word 0x00000000, 0x00000000
+ .word 0x00000000, 0x00000000
+ .word 0x3f9f829b, 0x0e780000
+ .word 0x3d298026, 0x7c7e09e4
+ .word 0x3faf0a30, 0xc0110000
+ .word 0x3d48a998, 0x5f325c5c
+ .word 0x3fb6f0d2, 0x8ae58000
+ .word 0xbd34b464, 0x1b664613
+ .word 0x3fbe2707, 0x6e2b0000
+ .word 0xbd2a342c, 0x2af0003c
+ .word 0x3fc29552, 0xf8200000
+ .word 0xbd35b967, 0xf4471dfc
+ .word 0x3fc5ff30, 0x70a78000
+ .word 0x3d43d3c8, 0x73e20a07
+ .word 0x3fc9525a, 0x9cf44000
+ .word 0x3d46b476, 0x41307539
+ .word 0x3fcc8ff7, 0xc79a8000
+ .word 0x3d4a21ac, 0x25d81ef3
+ .word 0x3fcfb918, 0x6d5e4000
+ .word 0xbd0d572a, 0xab993c87
+ .word 0x3fd1675c, 0xababa000
+ .word 0x3d38380e, 0x731f55c4
+ .word 0x3fd2e8e2, 0xbae12000
+ .word 0xbd267b1e, 0x99b72bd8
+ .word 0x3fd4618b, 0xc21c6000
+ .word 0xbd13d82f, 0x484c84cc
+ .word 0x3fd5d1bd, 0xbf580000
+ .word 0x3d4394a1, 0x1b1c1ee4
+! constants:
+ .word 0x40000000,0x00000000
+ .word 0x3fe55555,0x555571da
+ .word 0x3fd99999,0x8702be3a
+ .word 0x3fd24af7,0x3f4569b1
+ .word 0x3ea62e42,0xfee00000 ! scaled by 2**-20
+ .word 0x3caa39ef,0x35793c76 ! scaled by 2**-20
+ .word 0xffff8000,0x00000000
+ .word 0x43200000
+ .word 0xfff00000
+ .word 0xc0194000
+ .word 0x4000
+
+#define two 0x200
+#define A1 0x208
+#define A2 0x210
+#define A3 0x218
+#define ln2hi 0x220
+#define ln2lo 0x228
+#define mask 0x230
+#define ox43200000 0x238
+#define oxfff00000 0x23c
+#define oxc0194000 0x240
+#define ox4000 0x244
+
+! local storage indices
+
+#define jnk STACK_BIAS-0x8
+#define tmp2 STACK_BIAS-0x10
+#define tmp1 STACK_BIAS-0x18
+#define tmp0 STACK_BIAS-0x20
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x20
+
+! register use
+
+! i0 n
+! i1 x
+! i2 stridex
+! i3 y
+! i4 stridey
+! i5
+
+! g1 TBL
+
+! l0 j0
+! l1 j1
+! l2 j2
+! l3
+! l4 0x94000
+! l5
+! l6 0x000fffff
+! l7 0x7ff00000
+
+! o0 py0
+! o1 py1
+! o2 py2
+! o3
+! o4
+! o5
+! o7
+
+! f0 u0,q0
+! f2 v0,(two-v0)-u0,z0
+! f4 n0,f0,q0
+! f6 s0
+! f8 q
+! f10 u1,q1
+! f12 v1,(two-v1)-u1,z1
+! f14 n1,f1,q1
+! f16 s1
+! f18 t
+! f20 u2,q2
+! f22 v2,(two-v2)-u2,q2
+! f24 n2,f2,q2
+! f26 s2
+! f28 0xfff00000
+! f29 0x43200000
+! f30 0x4000
+! f31 0xc0194000
+! f32 t0
+! f34 h0,f0-(c0-h0)
+! f36 c0
+! f38 A1
+! f40 two
+! f42 t1
+! f44 h1,f1-(c1-h1)
+! f46 c1
+! f48 A2
+! f50 0xffff8000...
+! f52 t2
+! f54 h2,f2-(c2-h2)
+! f56 c2
+! f58 A3
+! f60 ln2hi
+! f62 ln2lo
+
+ ENTRY(__vlog)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,TBL,o0)
+ mov %o0,%g1
+ wr %g0,0x82,%asi ! set %asi for non-faulting loads
+ sethi %hi(0x94000),%l4
+ sethi %hi(0x000fffff),%l6
+ or %l6,%lo(0x000fffff),%l6
+ sethi %hi(0x7ff00000),%l7
+ ldd [%g1+two],%f40
+ ldd [%g1+A1],%f38
+ ldd [%g1+A2],%f48
+ ldd [%g1+A3],%f58
+ ldd [%g1+ln2hi],%f60
+ ldd [%g1+ln2lo],%f62
+ ldd [%g1+mask],%f50
+ ld [%g1+ox43200000],%f29
+ ld [%g1+oxfff00000],%f28
+ ld [%g1+oxc0194000],%f31
+ ld [%g1+ox4000],%f30
+ sll %i2,3,%i2 ! scale strides
+ sll %i4,3,%i4
+ add %fp,jnk,%o0 ! precondition loop
+ add %fp,jnk,%o1
+ add %fp,jnk,%o2
+ fzero %f2
+ fzero %f6
+ fzero %f18
+ fzero %f36
+ fzero %f12
+ fzero %f14
+ fzero %f16
+ fzero %f42
+ fzero %f44
+ fzero %f46
+ std %f46,[%fp+tmp1]
+ fzero %f24
+ fzero %f26
+ fzero %f52
+ fzero %f54
+ std %f54,[%fp+tmp2]
+ sub %i3,%i4,%i3
+ ld [%i1],%l0 ! ix
+ ld [%i1],%f0 ! u.l[0] = *x
+ ba .loop0
+ ld [%i1+4],%f1 ! u.l[1] = *(1+x)
+
+ .align 16
+! -- 16 byte aligned
+.loop0:
+ sub %l0,%l7,%o3
+ sub %l6,%l0,%o4
+ fpadd32s %f0,%f31,%f4 ! n = (ix + 0xc0194000) & 0xfff00000
+ fmuld %f6,%f2,%f8 ! (previous iteration)
+
+ andcc %o3,%o4,%o4
+ bge,pn %icc,.range0 ! ix <= 0x000fffff or >= 0x7ff00000
+! delay slot
+ fands %f4,%f28,%f4
+
+ add %i1,%i2,%i1 ! x += stridex
+ add %i3,%i4,%i3 ! y += stridey
+ fpsub32s %f0,%f4,%f0 ! u.l[0] -= n
+
+.cont0:
+ lda [%i1]%asi,%l1 ! preload next argument
+ add %l0,%l4,%l0 ! j = ix + 0x94000
+ fpadd32s %f0,%f30,%f2 ! v.l[0] = u.l[0] + 0x4000
+
+ lda [%i1]%asi,%f10
+ srl %l0,11,%l0 ! j = (j >> 11) & 0x1f0
+ fand %f2,%f50,%f2 ! v.l &= 0xffff8000...
+
+ lda [%i1+4]%asi,%f11
+ and %l0,0x1f0,%l0
+ fitod %f4,%f32 ! (double) n
+
+ add %l0,8,%l3
+ fsubd %f0,%f2,%f4 ! f = u.d - v.d
+
+ faddd %f0,%f2,%f6 ! s = f / (u.d + v.d)
+
+ fsubd %f40,%f2,%f2 ! two - v.d
+ fmuld %f32,%f60,%f34 ! h = n * ln2hi + TBL[j]
+
+ faddd %f8,%f18,%f8 ! y = c + (t + q)
+ fmuld %f32,%f62,%f32 ! t = n * ln2lo + TBL[j+1]
+
+ fdivd %f4,%f6,%f6
+
+ faddd %f54,%f24,%f56 ! c = h + f
+ fmuld %f26,%f26,%f22 ! z = s * s
+
+ faddd %f8,%f36,%f8
+ st %f8,[%o0]
+
+ st %f9,[%o0+4]
+ mov %i3,%o0
+ faddd %f14,%f38,%f14
+
+ fsubd %f56,%f54,%f54 ! t += f - (c - h)
+ fmuld %f22,%f58,%f20 ! q = ...
+
+ fsubd %f2,%f0,%f2 ! (two - v.d) - u.d
+ ldd [%g1+%l0],%f36
+
+ faddd %f42,%f44,%f18
+ fmuld %f12,%f14,%f14
+ ldd [%fp+tmp1],%f12
+
+ faddd %f20,%f48,%f20
+ nop
+
+ faddd %f34,%f36,%f34
+ ldd [%g1+%l3],%f0
+
+ faddd %f14,%f12,%f12
+
+ fsubd %f24,%f54,%f54
+ fmuld %f22,%f20,%f24
+
+ std %f2,[%fp+tmp0]
+ addcc %i0,-1,%i0
+ ble,pn %icc,.endloop0
+! delay slot
+ faddd %f32,%f0,%f32
+
+! -- 16 byte aligned
+.loop1:
+ sub %l1,%l7,%o3
+ sub %l6,%l1,%o4
+ fpadd32s %f10,%f31,%f14 ! n = (ix + 0xc0194000) & 0xfff00000
+ fmuld %f16,%f12,%f8 ! (previous iteration)
+
+ andcc %o3,%o4,%o4
+ bge,pn %icc,.range1 ! ix <= 0x000fffff or >= 0x7ff00000
+! delay slot
+ fands %f14,%f28,%f14
+
+ add %i1,%i2,%i1 ! x += stridex
+ add %i3,%i4,%i3 ! y += stridey
+ fpsub32s %f10,%f14,%f10 ! u.l[0] -= n
+
+.cont1:
+ lda [%i1]%asi,%l2 ! preload next argument
+ add %l1,%l4,%l1 ! j = ix + 0x94000
+ fpadd32s %f10,%f30,%f12 ! v.l[0] = u.l[0] + 0x4000
+
+ lda [%i1]%asi,%f20
+ srl %l1,11,%l1 ! j = (j >> 11) & 0x1f0
+ fand %f12,%f50,%f12 ! v.l &= 0xffff8000...
+
+ lda [%i1+4]%asi,%f21
+ and %l1,0x1f0,%l1
+ fitod %f14,%f42 ! (double) n
+
+ add %l1,8,%l3
+ fsubd %f10,%f12,%f14 ! f = u.d - v.d
+
+ faddd %f10,%f12,%f16 ! s = f / (u.d + v.d)
+
+ fsubd %f40,%f12,%f12 ! two - v.d
+ fmuld %f42,%f60,%f44 ! h = n * ln2hi + TBL[j]
+
+ faddd %f8,%f18,%f8 ! y = c + (t + q)
+ fmuld %f42,%f62,%f42 ! t = n * ln2lo + TBL[j+1]
+
+ fdivd %f14,%f16,%f16
+
+ faddd %f34,%f4,%f36 ! c = h + f
+ fmuld %f6,%f6,%f2 ! z = s * s
+
+ faddd %f8,%f46,%f8
+ st %f8,[%o1]
+
+ st %f9,[%o1+4]
+ mov %i3,%o1
+ faddd %f24,%f38,%f24
+
+ fsubd %f36,%f34,%f34 ! t += f - (c - h)
+ fmuld %f2,%f58,%f0 ! q = ...
+
+ fsubd %f12,%f10,%f12 ! (two - v.d) - u.d
+ ldd [%g1+%l1],%f46
+
+ faddd %f52,%f54,%f18
+ fmuld %f22,%f24,%f24
+ ldd [%fp+tmp2],%f22
+
+ faddd %f0,%f48,%f0
+ nop
+
+ faddd %f44,%f46,%f44
+ ldd [%g1+%l3],%f10
+
+ faddd %f24,%f22,%f22
+
+ fsubd %f4,%f34,%f34
+ fmuld %f2,%f0,%f4
+
+ std %f12,[%fp+tmp1]
+ addcc %i0,-1,%i0
+ ble,pn %icc,.endloop1
+! delay slot
+ faddd %f42,%f10,%f42
+
+! -- 16 byte aligned
+.loop2:
+ sub %l2,%l7,%o3
+ sub %l6,%l2,%o4
+ fpadd32s %f20,%f31,%f24 ! n = (ix + 0xc0194000) & 0xfff00000
+ fmuld %f26,%f22,%f8 ! (previous iteration)
+
+ andcc %o3,%o4,%o4
+ bge,pn %icc,.range2 ! ix <= 0x000fffff or >= 0x7ff00000
+! delay slot
+ fands %f24,%f28,%f24
+
+ add %i1,%i2,%i1 ! x += stridex
+ add %i3,%i4,%i3 ! y += stridey
+ fpsub32s %f20,%f24,%f20 ! u.l[0] -= n
+
+.cont2:
+ lda [%i1]%asi,%l0 ! preload next argument
+ add %l2,%l4,%l2 ! j = ix + 0x94000
+ fpadd32s %f20,%f30,%f22 ! v.l[0] = u.l[0] + 0x4000
+
+ lda [%i1]%asi,%f0
+ srl %l2,11,%l2 ! j = (j >> 11) & 0x1f0
+ fand %f22,%f50,%f22 ! v.l &= 0xffff8000...
+
+ lda [%i1+4]%asi,%f1
+ and %l2,0x1f0,%l2
+ fitod %f24,%f52 ! (double) n
+
+ add %l2,8,%l3
+ fsubd %f20,%f22,%f24 ! f = u.d - v.d
+
+ faddd %f20,%f22,%f26 ! s = f / (u.d + v.d)
+
+ fsubd %f40,%f22,%f22 ! two - v.d
+ fmuld %f52,%f60,%f54 ! h = n * ln2hi + TBL[j]
+
+ faddd %f8,%f18,%f8 ! y = c + (t + q)
+ fmuld %f52,%f62,%f52 ! t = n * ln2lo + TBL[j+1]
+
+ fdivd %f24,%f26,%f26
+
+ faddd %f44,%f14,%f46 ! c = h + f
+ fmuld %f16,%f16,%f12 ! z = s * s
+
+ faddd %f8,%f56,%f8
+ st %f8,[%o2]
+
+ st %f9,[%o2+4]
+ mov %i3,%o2
+ faddd %f4,%f38,%f4
+
+ fsubd %f46,%f44,%f44 ! t += f - (c - h)
+ fmuld %f12,%f58,%f10 ! q = ...
+
+ fsubd %f22,%f20,%f22 ! (two - v.d) - u.d
+ ldd [%g1+%l2],%f56
+
+ faddd %f32,%f34,%f18
+ fmuld %f2,%f4,%f4
+ ldd [%fp+tmp0],%f2
+
+ faddd %f10,%f48,%f10
+ nop
+
+ faddd %f54,%f56,%f54
+ ldd [%g1+%l3],%f20
+
+ faddd %f4,%f2,%f2
+
+ fsubd %f14,%f44,%f44
+ fmuld %f12,%f10,%f14
+
+ std %f22,[%fp+tmp2]
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ faddd %f52,%f20,%f52
+
+
+! Once we get to the last element, we loop three more times to finish
+! the computations in progress. This means we will load past the end
+! of the argument vector, but since we use non-faulting loads and never
+! use the data, the only potential problem is cache miss. (Note that
+! when the argument is 2, the only exception that occurs in the compu-
+! tation is an inexact result in the final addition, and we break out
+! of the "extra" iterations before then.)
+.endloop2:
+ sethi %hi(0x40000000),%l0 ! "next argument" = two
+ cmp %i0,-3
+ bg,a,pt %icc,.loop0
+! delay slot
+ fmovd %f40,%f0
+ ret
+ restore
+
+ .align 16
+.endloop0:
+ sethi %hi(0x40000000),%l1 ! "next argument" = two
+ cmp %i0,-3
+ bg,a,pt %icc,.loop1
+! delay slot
+ fmovd %f40,%f10
+ ret
+ restore
+
+ .align 16
+.endloop1:
+ sethi %hi(0x40000000),%l2 ! "next argument" = two
+ cmp %i0,-3
+ bg,a,pt %icc,.loop2
+! delay slot
+ fmovd %f40,%f20
+ ret
+ restore
+
+
+ .align 16
+.range0:
+ cmp %l0,%l7
+ bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000
+! delay slot
+ ld [%i1+4],%o5
+ fxtod %f0,%f0 ! scale by 2**1074 w/o trapping
+ st %f0,[%fp+tmp0]
+ add %i1,%i2,%i1 ! x += stridex
+ orcc %l0,%o5,%g0
+ be,pn %icc,1f ! if x == 0
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+ fpadd32s %f0,%f31,%f4 ! n = (ix + 0xc0194000) & 0xfff00000
+ fands %f4,%f28,%f4
+ fpsub32s %f0,%f4,%f0 ! u.l[0] -= n
+ ld [%fp+tmp0],%l0
+ ba,pt %icc,.cont0
+! delay slot
+ fpsub32s %f4,%f29,%f4 ! n -= 0x43200000
+1:
+ fdivs %f29,%f1,%f4 ! raise div-by-zero
+ ba,pt %icc,3f
+! delay slot
+ st %f28,[%i3] ! store -inf
+2:
+ sll %l0,1,%l0 ! lop off sign bit
+ add %i1,%i2,%i1 ! x += stridex
+ orcc %l0,%o5,%g0
+ be,pn %icc,1b ! if x == -0
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+ fabsd %f0,%f4 ! *y = (x + |x|) * inf
+ faddd %f0,%f4,%f0
+ fand %f28,%f50,%f4
+ fnegd %f4,%f4
+ fmuld %f0,%f4,%f0
+ st %f0,[%i3]
+3:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.endloop2
+! delay slot
+ st %f1,[%i3+4]
+ ld [%i1],%l0 ! get next argument
+ ld [%i1],%f0
+ ba,pt %icc,.loop0
+! delay slot
+ ld [%i1+4],%f1
+
+
+ .align 16
+.range1:
+ cmp %l1,%l7
+ bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000
+! delay slot
+ ld [%i1+4],%o5
+ fxtod %f10,%f10 ! scale by 2**1074 w/o trapping
+ st %f10,[%fp+tmp1]
+ add %i1,%i2,%i1 ! x += stridex
+ orcc %l1,%o5,%g0
+ be,pn %icc,1f ! if x == 0
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+ fpadd32s %f10,%f31,%f14 ! n = (ix + 0xc0194000) & 0xfff00000
+ fands %f14,%f28,%f14
+ fpsub32s %f10,%f14,%f10 ! u.l[0] -= n
+ ld [%fp+tmp1],%l1
+ ba,pt %icc,.cont1
+! delay slot
+ fpsub32s %f14,%f29,%f14 ! n -= 0x43200000
+1:
+ fdivs %f29,%f11,%f14 ! raise div-by-zero
+ ba,pt %icc,3f
+! delay slot
+ st %f28,[%i3] ! store -inf
+2:
+ sll %l1,1,%l1 ! lop off sign bit
+ add %i1,%i2,%i1 ! x += stridex
+ orcc %l1,%o5,%g0
+ be,pn %icc,1b ! if x == -0
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+ fabsd %f10,%f14 ! *y = (x + |x|) * inf
+ faddd %f10,%f14,%f10
+ fand %f28,%f50,%f14
+ fnegd %f14,%f14
+ fmuld %f10,%f14,%f10
+ st %f10,[%i3]
+3:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.endloop0
+! delay slot
+ st %f11,[%i3+4]
+ ld [%i1],%l1 ! get next argument
+ ld [%i1],%f10
+ ba,pt %icc,.loop1
+! delay slot
+ ld [%i1+4],%f11
+
+
+ .align 16
+.range2:
+ cmp %l2,%l7
+ bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000
+! delay slot
+ ld [%i1+4],%o5
+ fxtod %f20,%f20 ! scale by 2**1074 w/o trapping
+ st %f20,[%fp+tmp2]
+ add %i1,%i2,%i1 ! x += stridex
+ orcc %l2,%o5,%g0
+ be,pn %icc,1f ! if x == 0
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+ fpadd32s %f20,%f31,%f24 ! n = (ix + 0xc0194000) & 0xfff00000
+ fands %f24,%f28,%f24
+ fpsub32s %f20,%f24,%f20 ! u.l[0] -= n
+ ld [%fp+tmp2],%l2
+ ba,pt %icc,.cont2
+! delay slot
+ fpsub32s %f24,%f29,%f24 ! n -= 0x43200000
+1:
+ fdivs %f29,%f21,%f24 ! raise div-by-zero
+ ba,pt %icc,3f
+! delay slot
+ st %f28,[%i3] ! store -inf
+2:
+ sll %l2,1,%l2 ! lop off sign bit
+ add %i1,%i2,%i1 ! x += stridex
+ orcc %l2,%o5,%g0
+ be,pn %icc,1b ! if x == -0
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+ fabsd %f20,%f24 ! *y = (x + |x|) * inf
+ faddd %f20,%f24,%f20
+ fand %f28,%f50,%f24
+ fnegd %f24,%f24
+ fmuld %f20,%f24,%f20
+ st %f20,[%i3]
+3:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.endloop1
+! delay slot
+ st %f21,[%i3+4]
+ ld [%i1],%l2 ! get next argument
+ ld [%i1],%f20
+ ba,pt %icc,.loop2
+! delay slot
+ ld [%i1+4],%f21
+
+ SET_SIZE(__vlog)
+
diff --git a/usr/src/libm/src/mvec/vis/__vlog_ultra3.S b/usr/src/libm/src/mvec/vis/__vlog_ultra3.S
new file mode 100644
index 0000000..aed1b59
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vlog_ultra3.S
@@ -0,0 +1,2904 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vlog_ultra3.S 1.9 06/01/23 SMI"
+
+ .file "__vlog_ultra3.S"
+
+#include "libm.h"
+#if defined(LIBMVEC_SO_BUILD)
+ .weak __vlog
+ .type __vlog,#function
+ __vlog = __vlog_ultra3
+#endif
+
+/*
+ * ELEVENBIT table and order 5 POLYNOMIAL no explicit correction t
+ */
+
+ RO_DATA
+ .align 64
+!! this is a new 11 bit table.
+TBL:
+ .word 0xbfd522ae, 0x0738a000
+ .word 0xbd2ebe70, 0x8164c759
+ .word 0xbfd5178d, 0x9ab55000
+ .word 0xbd35c153, 0x0fe963b3
+ .word 0xbfd50c6f, 0x1d11b000
+ .word 0xbd42f8ca, 0x40bec1ea
+ .word 0xbfd50152, 0x8da1f000
+ .word 0xbd42cfac, 0x6d29f4d7
+ .word 0xbfd4f637, 0xebba9000
+ .word 0xbd401f53, 0x9a676da3
+ .word 0xbfd4eb1f, 0x36b07000
+ .word 0xbd184047, 0x46e5797b
+ .word 0xbfd4e008, 0x6dd8b000
+ .word 0xbd4594b6, 0xaf0ddc3c
+ .word 0xbfd4d4f3, 0x90890000
+ .word 0xbd19fd79, 0x3a9f1441
+ .word 0xbfd4c9e0, 0x9e172000
+ .word 0xbd4877dd, 0xb93d49d7
+ .word 0xbfd4becf, 0x95d97000
+ .word 0xbd422662, 0x6ffee2c8
+ .word 0xbfd4b3c0, 0x77267000
+ .word 0xbd4d3497, 0x2fdf5a8c
+ .word 0xbfd4a8b3, 0x41552000
+ .word 0xbd46127e, 0x3d0dc8d1
+ .word 0xbfd49da7, 0xf3bcc000
+ .word 0xbd307b33, 0x4daf4b9a
+ .word 0xbfd4929e, 0x8db4e000
+ .word 0xbd3b9056, 0x556c70de
+ .word 0xbfd48797, 0x0e958000
+ .word 0xbd3dc1b8, 0x465cf25f
+ .word 0xbfd47c91, 0x75b6f000
+ .word 0xbd05acd1, 0x7009e35b
+ .word 0xbfd4718d, 0xc271c000
+ .word 0xbd306c18, 0xfb4c14c5
+ .word 0xbfd4668b, 0xf41ef000
+ .word 0xbd432874, 0x4e9d2b85
+ .word 0xbfd45b8c, 0x0a17d000
+ .word 0xbd4e26ed, 0xf182f57b
+ .word 0xbfd4508e, 0x03b61000
+ .word 0xbd40ef1c, 0x2579199c
+ .word 0xbfd44591, 0xe0539000
+ .word 0xbd4e916a, 0x76d6dc28
+ .word 0xbfd43a97, 0x9f4ac000
+ .word 0xbd23ee07, 0x6a81f88e
+ .word 0xbfd42f9f, 0x3ff62000
+ .word 0xbd390644, 0x0f7d3354
+ .word 0xbfd424a8, 0xc1b0c000
+ .word 0xbd2dc57c, 0x99ae2a25
+ .word 0xbfd419b4, 0x23d5e000
+ .word 0xbd418e43, 0x6ec90e0a
+ .word 0xbfd40ec1, 0x65c13000
+ .word 0xbd3f59a8, 0xa01757f6
+ .word 0xbfd403d0, 0x86cea000
+ .word 0xbd3e6ef5, 0x74487308
+ .word 0xbfd3f8e1, 0x865a8000
+ .word 0xbd26f338, 0x912773e3
+ .word 0xbfd3edf4, 0x63c16000
+ .word 0xbd407cc1, 0xeb4069e1
+ .word 0xbfd3e309, 0x1e604000
+ .word 0xbd43f634, 0xa2afb68d
+ .word 0xbfd3d81f, 0xb5946000
+ .word 0xbd4b74e0, 0xf558b217
+ .word 0xbfd3cd38, 0x28bb6000
+ .word 0xbd489faf, 0xb06c8342
+ .word 0xbfd3c252, 0x77333000
+ .word 0xbd183b54, 0xb606bd5c
+ .word 0xbfd3b76e, 0xa059f000
+ .word 0xbd47b5cf, 0x9912c7cb
+ .word 0xbfd3ac8c, 0xa38e5000
+ .word 0xbd48bd04, 0x10ff506d
+ .word 0xbfd3a1ac, 0x802f3000
+ .word 0xbd398ecf, 0x399abd8d
+ .word 0xbfd396ce, 0x359bb000
+ .word 0xbd4ea7c6, 0x3a99c99c
+ .word 0xbfd38bf1, 0xc3337000
+ .word 0xbd4ce9e9, 0x41e9516d
+ .word 0xbfd38117, 0x28564000
+ .word 0xbd496386, 0xdb17e3f5
+ .word 0xbfd3763e, 0x64645000
+ .word 0xbd318b1f, 0x291dcb56
+ .word 0xbfd36b67, 0x76be1000
+ .word 0xbd116ecd, 0xb0f177c8
+ .word 0xbfd36092, 0x5ec44000
+ .word 0xbd4eb929, 0xf344bbd1
+ .word 0xbfd355bf, 0x1bd82000
+ .word 0xbd491599, 0x1da6c3c6
+ .word 0xbfd34aed, 0xad5b1000
+ .word 0xbd3a2aac, 0xf2be1fdd
+ .word 0xbfd3401e, 0x12aec000
+ .word 0xbd4741c6, 0x5548eb71
+ .word 0xbfd33550, 0x4b355000
+ .word 0xbd446efc, 0x89cefc92
+ .word 0xbfd32a84, 0x56512000
+ .word 0xbd04f928, 0x139af5d6
+ .word 0xbfd31fba, 0x3364c000
+ .word 0xbd4a08d8, 0x6ce5a16e
+ .word 0xbfd314f1, 0xe1d35000
+ .word 0xbd49c761, 0x4b37b0d2
+ .word 0xbfd30a2b, 0x61001000
+ .word 0xbd4a53e9, 0x6290ef5b
+ .word 0xbfd2ff66, 0xb04ea000
+ .word 0xbd43a896, 0xd5f0c8e9
+ .word 0xbfd2f4a3, 0xcf22e000
+ .word 0xbd4b8693, 0xf85f2705
+ .word 0xbfd2e9e2, 0xbce12000
+ .word 0xbd24300c, 0x128d1dc2
+ .word 0xbfd2df23, 0x78edd000
+ .word 0xbce292b7, 0xcd95c595
+ .word 0xbfd2d466, 0x02adc000
+ .word 0xbd49dcbc, 0x88caaf9b
+ .word 0xbfd2c9aa, 0x59863000
+ .word 0xbd4a7f90, 0xe829d4d2
+ .word 0xbfd2bef0, 0x7cdc9000
+ .word 0xbd2a9cfa, 0x4a5004f4
+ .word 0xbfd2b438, 0x6c168000
+ .word 0xbd4e1827, 0x3a343630
+ .word 0xbfd2a982, 0x269a3000
+ .word 0xbd4b7e9c, 0x6aa35e8c
+ .word 0xbfd29ecd, 0xabcdf000
+ .word 0xbd44073b, 0x3bdc2243
+ .word 0xbfd2941a, 0xfb186000
+ .word 0xbd46f79e, 0xa4678ebb
+ .word 0xbfd2896a, 0x13e08000
+ .word 0xbd3a8ed0, 0x27e16952
+ .word 0xbfd27eba, 0xf58d8000
+ .word 0xbd49399d, 0xffd2d096
+ .word 0xbfd2740d, 0x9f870000
+ .word 0xbd45f660, 0x0b9a802a
+ .word 0xbfd26962, 0x1134d000
+ .word 0xbd4724f0, 0x77d6ecee
+ .word 0xbfd25eb8, 0x49ff2000
+ .word 0xbd310c25, 0x03f76b8e
+ .word 0xbfd25410, 0x494e5000
+ .word 0xbd3b1d7a, 0xc0ef77f2
+ .word 0xbfd2496a, 0x0e8b3000
+ .word 0xbd003238, 0x687cfe2e
+ .word 0xbfd23ec5, 0x991eb000
+ .word 0xbd44920d, 0xdbae8d6f
+ .word 0xbfd23422, 0xe8724000
+ .word 0xbd40708a, 0x931c895b
+ .word 0xbfd22981, 0xfbef7000
+ .word 0xbd42f5ef, 0x4fb53f93
+ .word 0xbfd21ee2, 0xd3003000
+ .word 0xbd40382e, 0x41be00e3
+ .word 0xbfd21445, 0x6d0eb000
+ .word 0xbd41a87d, 0xeba46baf
+ .word 0xbfd209a9, 0xc9857000
+ .word 0xbd45b053, 0x3ba9c94d
+ .word 0xbfd1ff0f, 0xe7cf4000
+ .word 0xbd3e9d5b, 0x513ff0c1
+ .word 0xbfd1f477, 0xc7573000
+ .word 0xbd26d6d4, 0x010d751a
+ .word 0xbfd1e9e1, 0x67889000
+ .word 0xbd43e8a8, 0x961ba4d1
+ .word 0xbfd1df4c, 0xc7cf2000
+ .word 0xbd30b43f, 0x0455f7e4
+ .word 0xbfd1d4b9, 0xe796c000
+ .word 0xbd222a66, 0x7c42e56d
+ .word 0xbfd1ca28, 0xc64ba000
+ .word 0xbd4ca760, 0xf7a15533
+ .word 0xbfd1bf99, 0x635a6000
+ .word 0xbd4729bb, 0x5451ef6e
+ .word 0xbfd1b50b, 0xbe2fc000
+ .word 0xbd38ecd7, 0x3263201f
+ .word 0xbfd1aa7f, 0xd638d000
+ .word 0xbd29f60a, 0x9616f7a0
+ .word 0xbfd19ff5, 0xaae2f000
+ .word 0xbce69fd9, 0x9ec05ba8
+ .word 0xbfd1956d, 0x3b9bc000
+ .word 0xbd27d2f7, 0x3ad1aa14
+ .word 0xbfd18ae6, 0x87d13000
+ .word 0xbd43a034, 0x64df39ff
+ .word 0xbfd18061, 0x8ef18000
+ .word 0xbd45be80, 0x1bc9638d
+ .word 0xbfd175de, 0x506b3000
+ .word 0xbd30c07c, 0x4da5752f
+ .word 0xbfd16b5c, 0xcbacf000
+ .word 0xbd46e6b3, 0x7de945a0
+ .word 0xbfd160dd, 0x0025e000
+ .word 0xbd4ba5c1, 0xc499684a
+ .word 0xbfd1565e, 0xed455000
+ .word 0xbd4f8629, 0x48125517
+ .word 0xbfd14be2, 0x927ae000
+ .word 0xbd49a817, 0xc85685e2
+ .word 0xbfd14167, 0xef367000
+ .word 0xbd3e0c07, 0x824daaf5
+ .word 0xbfd136ef, 0x02e82000
+ .word 0xbd4217d3, 0xe78d3ed8
+ .word 0xbfd12c77, 0xcd007000
+ .word 0xbd13b294, 0x8a11f797
+ .word 0xbfd12202, 0x4cf00000
+ .word 0xbd38fdd9, 0x76fabda5
+ .word 0xbfd1178e, 0x8227e000
+ .word 0xbd31ef78, 0xce2d07f2
+ .word 0xbfd10d1c, 0x6c194000
+ .word 0xbd4cb3de, 0x00324ee4
+ .word 0xbfd102ac, 0x0a35c000
+ .word 0xbd483810, 0x88080a5e
+ .word 0xbfd0f83d, 0x5bef2000
+ .word 0xbd475fa0, 0x37a37ba8
+ .word 0xbfd0edd0, 0x60b78000
+ .word 0xbd0019b5, 0x2d8435f5
+ .word 0xbfd0e365, 0x18012000
+ .word 0xbd2a5943, 0x8bbdca93
+ .word 0xbfd0d8fb, 0x813eb000
+ .word 0xbd1ee8c8, 0x8753fa35
+ .word 0xbfd0ce93, 0x9be30000
+ .word 0xbd4e8266, 0xd788ddf1
+ .word 0xbfd0c42d, 0x67616000
+ .word 0xbd27188b, 0x163ceae9
+ .word 0xbfd0b9c8, 0xe32d1000
+ .word 0xbd42224e, 0x89208f94
+ .word 0xbfd0af66, 0x0eb9e000
+ .word 0xbd23c7c3, 0xf528d80a
+ .word 0xbfd0a504, 0xe97bb000
+ .word 0xbd303094, 0xe6690c44
+ .word 0xbfd09aa5, 0x72e6c000
+ .word 0xbd3b50a1, 0xe1734342
+ .word 0xbfd09047, 0xaa6f9000
+ .word 0xbd3f18e8, 0x3ce75c0e
+ .word 0xbfd085eb, 0x8f8ae000
+ .word 0xbd3e5d51, 0x3f45fe7b
+ .word 0xbfd07b91, 0x21adb000
+ .word 0xbd4520ba, 0x8e9b8a72
+ .word 0xbfd07138, 0x604d5000
+ .word 0xbd40c4e6, 0xd8b76a75
+ .word 0xbfd066e1, 0x4adf4000
+ .word 0xbd47f6bb, 0x351a4a71
+ .word 0xbfd05c8b, 0xe0d96000
+ .word 0xbd2ad0f1, 0xc77ccb58
+ .word 0xbfd05238, 0x21b1a000
+ .word 0xbd4ec752, 0xd39776ce
+ .word 0xbfd047e6, 0x0cde8000
+ .word 0xbd2dbdf1, 0x0d397f3c
+ .word 0xbfd03d95, 0xa1d67000
+ .word 0xbd3a1788, 0x0f236109
+ .word 0xbfd03346, 0xe0106000
+ .word 0xbcf89ff8, 0xa966395c
+ .word 0xbfd028f9, 0xc7035000
+ .word 0xbd483851, 0x858333c0
+ .word 0xbfd01eae, 0x5626c000
+ .word 0xbd3a43dc, 0xfade85ae
+ .word 0xbfd01464, 0x8cf23000
+ .word 0xbd4d082a, 0x567b45ed
+ .word 0xbfd00a1c, 0x6adda000
+ .word 0xbd31cd8d, 0x688b9e18
+ .word 0xbfcfffab, 0xdec23000
+ .word 0xbd236a1a, 0xdb4a75a4
+ .word 0xbfcfeb22, 0x33ea0000
+ .word 0xbd2f3418, 0xde00938b
+ .word 0xbfcfd69b, 0xd4240000
+ .word 0xbd3641a8, 0xff2ccc45
+ .word 0xbfcfc218, 0xbe620000
+ .word 0xbd34bba4, 0x6f1cf6a0
+ .word 0xbfcfad98, 0xf1965000
+ .word 0xbd16ee92, 0x73d7c2de
+ .word 0xbfcf991c, 0x6cb3b000
+ .word 0xbd1bcbec, 0xca0cdf30
+ .word 0xbfcf84a3, 0x2ead7000
+ .word 0xbd386af1, 0xd33d9e37
+ .word 0xbfcf702d, 0x36777000
+ .word 0xbd3bdf9a, 0xba663077
+ .word 0xbfcf5bba, 0x83060000
+ .word 0xbd341b25, 0x4a43da63
+ .word 0xbfcf474b, 0x134df000
+ .word 0xbd1146d8, 0x38821289
+ .word 0xbfcf32de, 0xe6448000
+ .word 0xbd2efb83, 0x625f1609
+ .word 0xbfcf1e75, 0xfadf9000
+ .word 0xbd37bcea, 0x6d13e04a
+ .word 0xbfcf0a10, 0x50157000
+ .word 0xbd3dad5f, 0x7347f55b
+ .word 0xbfcef5ad, 0xe4dcf000
+ .word 0xbd3fcbbd, 0xd53488e4
+ .word 0xbfcee14e, 0xb82d6000
+ .word 0xbd39d172, 0x6f4de261
+ .word 0xbfceccf2, 0xc8fe9000
+ .word 0xbd104e71, 0x7062a6fe
+ .word 0xbfceb89a, 0x1648b000
+ .word 0xbd32e26f, 0x74808b80
+ .word 0xbfcea444, 0x9f04a000
+ .word 0xbd35e916, 0x63732a36
+ .word 0xbfce8ff2, 0x622ba000
+ .word 0xbd378e13, 0xd33981e5
+ .word 0xbfce7ba3, 0x5eb77000
+ .word 0xbd3c5422, 0x3b90d937
+ .word 0xbfce6757, 0x93a26000
+ .word 0xbd01dc8e, 0xc0554762
+ .word 0xbfce530e, 0xffe71000
+ .word 0xbcc21227, 0x6041f430
+ .word 0xbfce3ec9, 0xa280c000
+ .word 0xbd14bd96, 0x3fb80bff
+ .word 0xbfce2a87, 0x7a6b2000
+ .word 0xbd382381, 0x7787081a
+ .word 0xbfce1648, 0x86a27000
+ .word 0xbd36ce95, 0xba645527
+ .word 0xbfce020c, 0xc6235000
+ .word 0xbd356a7f, 0xa92375ee
+ .word 0xbfcdedd4, 0x37eae000
+ .word 0xbd3e0125, 0x53595898
+ .word 0xbfcdd99e, 0xdaf6d000
+ .word 0xbd2fa273, 0x2c71522a
+ .word 0xbfcdc56c, 0xae452000
+ .word 0xbd3eb37a, 0xa24e1817
+ .word 0xbfcdb13d, 0xb0d48000
+ .word 0xbd32806a, 0x847527e6
+ .word 0xbfcd9d11, 0xe1a3f000
+ .word 0xbd19da04, 0xfa9fa4c6
+ .word 0xbfcd88e9, 0x3fb2f000
+ .word 0xbd2141af, 0xfb96815e
+ .word 0xbfcd74c3, 0xca018000
+ .word 0xbd393e4c, 0xfa17dce1
+ .word 0xbfcd60a1, 0x7f903000
+ .word 0xbd24523f, 0x207be58e
+ .word 0xbfcd4c82, 0x5f5fd000
+ .word 0xbd3e3f04, 0x21df291e
+ .word 0xbfcd3866, 0x6871f000
+ .word 0xbd21935e, 0x98ed9a88
+ .word 0xbfcd244d, 0x99c85000
+ .word 0xbd29cfb0, 0x0c890770
+ .word 0xbfcd1037, 0xf2655000
+ .word 0xbd3cf6b0, 0x31492124
+ .word 0xbfccfc25, 0x714bd000
+ .word 0xbd39fbd3, 0x34e03910
+ .word 0xbfcce816, 0x157f1000
+ .word 0xbd330faa, 0x2efb3576
+ .word 0xbfccd409, 0xde02d000
+ .word 0xbd132115, 0x39f1dcc5
+ .word 0xbfccc000, 0xc9db3000
+ .word 0xbd38a4a9, 0xe8aa1402
+ .word 0xbfccabfa, 0xd80d0000
+ .word 0xbd11e253, 0x70a10e3e
+ .word 0xbfcc97f8, 0x079d4000
+ .word 0xbd23b161, 0xa8c6e6c5
+ .word 0xbfcc83f8, 0x57919000
+ .word 0xbd358740, 0x00c94a0f
+ .word 0xbfcc6ffb, 0xc6f00000
+ .word 0xbd3ee138, 0xd3a69d43
+ .word 0xbfcc5c02, 0x54bf2000
+ .word 0xbd1d2f55, 0x73da163b
+ .word 0xbfcc480c, 0x0005c000
+ .word 0xbd39a294, 0xd5e44e76
+ .word 0xbfcc3418, 0xc7cb7000
+ .word 0xbd234b5d, 0xe46e0516
+ .word 0xbfcc2028, 0xab17f000
+ .word 0xbd3368f8, 0x8d51c29d
+ .word 0xbfcc0c3b, 0xa8f3a000
+ .word 0xbd3ac339, 0x48e7f56a
+ .word 0xbfcbf851, 0xc0675000
+ .word 0xbd257be3, 0x67ef56a7
+ .word 0xbfcbe46a, 0xf07c2000
+ .word 0xbd350591, 0x910f505a
+ .word 0xbfcbd087, 0x383bd000
+ .word 0xbd315a1d, 0xd355f6a5
+ .word 0xbfcbbca6, 0x96b07000
+ .word 0xbd3d0045, 0xea3f2624
+ .word 0xbfcba8c9, 0x0ae4a000
+ .word 0xbd3a32e7, 0xf44432da
+ .word 0xbfcb94ee, 0x93e36000
+ .word 0xbd2f2a06, 0xe2db48a3
+ .word 0xbfcb8117, 0x30b82000
+ .word 0xbd1e9068, 0x3b9cd768
+ .word 0xbfcb6d42, 0xe06ec000
+ .word 0xbd302afe, 0x254869ba
+ .word 0xbfcb5971, 0xa213a000
+ .word 0xbd39b50e, 0x83aa91df
+ .word 0xbfcb45a3, 0x74b39000
+ .word 0xbd3701df, 0x22138fc3
+ .word 0xbfcb31d8, 0x575bc000
+ .word 0xbd3c794e, 0x562a63cb
+ .word 0xbfcb1e10, 0x4919e000
+ .word 0xbd3fa006, 0x2597f33a
+ .word 0xbfcb0a4b, 0x48fc1000
+ .word 0xbd368c69, 0x51e3338a
+ .word 0xbfcaf689, 0x5610d000
+ .word 0xbd375beb, 0xba042b64
+ .word 0xbfcae2ca, 0x6f672000
+ .word 0xbd37a8d5, 0xae54f550
+ .word 0xbfcacf0e, 0x940e7000
+ .word 0xbd2800e3, 0xa7e64e07
+ .word 0xbfcabb55, 0xc3169000
+ .word 0xbd1d6694, 0xd43acc9f
+ .word 0xbfcaa79f, 0xfb8fc000
+ .word 0xbd3a8bf1, 0x1c0d8aaa
+ .word 0xbfca93ed, 0x3c8ad000
+ .word 0xbd33c6de, 0x57d4ef4c
+ .word 0xbfca803d, 0x8518d000
+ .word 0xbd3e09d1, 0x87f293cc
+ .word 0xbfca6c90, 0xd44b7000
+ .word 0xbce38901, 0xf909e74b
+ .word 0xbfca58e7, 0x29348000
+ .word 0xbd3e867d, 0x504551b1
+ .word 0xbfca4540, 0x82e6a000
+ .word 0xbd360a77, 0xc81f7171
+ .word 0xbfca319c, 0xe074a000
+ .word 0xbcbd7dba, 0xe650d5b3
+ .word 0xbfca1dfc, 0x40f1b000
+ .word 0xbd2fc3e1, 0xff6190fe
+ .word 0xbfca0a5e, 0xa371a000
+ .word 0xbd322191, 0x988b2e31
+ .word 0xbfc9f6c4, 0x07089000
+ .word 0xbd29904d, 0x6865817a
+ .word 0xbfc9e32c, 0x6acb0000
+ .word 0xbd3e5e8d, 0xbc0fb4ac
+ .word 0xbfc9cf97, 0xcdce0000
+ .word 0xbd3d862f, 0x10c414e3
+ .word 0xbfc9bc06, 0x2f26f000
+ .word 0xbd3874d8, 0x1809e6d5
+ .word 0xbfc9a877, 0x8deba000
+ .word 0xbd3470fa, 0x3efec390
+ .word 0xbfc994eb, 0xe9325000
+ .word 0xbd2a9c9d, 0x28bcbe25
+ .word 0xbfc98163, 0x4011a000
+ .word 0xbd34eadd, 0x9e9045e2
+ .word 0xbfc96ddd, 0x91a0b000
+ .word 0xbd32ac6b, 0x11cf6f2b
+ .word 0xbfc95a5a, 0xdcf70000
+ .word 0xbd07f228, 0x58a0ff6f
+ .word 0xbfc946db, 0x212c6000
+ .word 0xbd36cf76, 0x74ca02ba
+ .word 0xbfc9335e, 0x5d594000
+ .word 0xbd33115c, 0x3abd47da
+ .word 0xbfc91fe4, 0x90965000
+ .word 0xbd30369c, 0xf30a1c32
+ .word 0xbfc90c6d, 0xb9fcb000
+ .word 0xbd39b282, 0xa239ca0d
+ .word 0xbfc8f8f9, 0xd8a60000
+ .word 0xbd2af16c, 0x8230ceca
+ .word 0xbfc8e588, 0xebac2000
+ .word 0xbd3b7d5c, 0xab2d1140
+ .word 0xbfc8d21a, 0xf2299000
+ .word 0xbd14d652, 0x74757226
+ .word 0xbfc8beaf, 0xeb38f000
+ .word 0xbd3d1855, 0x6aa2da66
+ .word 0xbfc8ab47, 0xd5f5a000
+ .word 0xbd187eb8, 0x505d468f
+ .word 0xbfc897e2, 0xb17b1000
+ .word 0xbd334a64, 0x63f9a0b1
+ .word 0xbfc88480, 0x7ce56000
+ .word 0xbd1c77ce, 0xf4a8712c
+ .word 0xbfc87121, 0x3750e000
+ .word 0xbd3328eb, 0x42f9af75
+ .word 0xbfc85dc4, 0xdfda7000
+ .word 0xbd3785ab, 0x048301ba
+ .word 0xbfc84a6b, 0x759f5000
+ .word 0xbd02ebfe, 0xa903cfb8
+ .word 0xbfc83714, 0xf7bd0000
+ .word 0xbd2ed83a, 0xf85a2ced
+ .word 0xbfc823c1, 0x6551a000
+ .word 0xbd1e0ddb, 0x9a631e83
+ .word 0xbfc81070, 0xbd7b9000
+ .word 0xbcafe80a, 0x6682e646
+ .word 0xbfc7fd22, 0xff599000
+ .word 0xbd3a9d05, 0x02ea120c
+ .word 0xbfc7e9d8, 0x2a0b0000
+ .word 0xbd116849, 0xfa40e4f0
+ .word 0xbfc7d690, 0x3caf5000
+ .word 0xbd359fca, 0x741e7f15
+ .word 0xbfc7c34b, 0x3666a000
+ .word 0xbd3175c9, 0x81b45e10
+ .word 0xbfc7b009, 0x16515000
+ .word 0xbd146280, 0xd3e606a3
+ .word 0xbfc79cc9, 0xdb902000
+ .word 0xbd1e00d0, 0x375e70bd
+ .word 0xbfc7898d, 0x85444000
+ .word 0xbd38e67b, 0xe3dbaf3f
+ .word 0xbfc77654, 0x128f6000
+ .word 0xbd0274ba, 0xdf268e7c
+ .word 0xbfc7631d, 0x82935000
+ .word 0xbd350c41, 0x1c1d060f
+ .word 0xbfc74fe9, 0xd4729000
+ .word 0xbd249736, 0xd91da11e
+ .word 0xbfc73cb9, 0x074fd000
+ .word 0xbd04cab7, 0x97ffd2cc
+ .word 0xbfc7298b, 0x1a4e3000
+ .word 0xbd15accc, 0xe43ce383
+ .word 0xbfc71660, 0x0c914000
+ .word 0xbce51b15, 0x7cec3838
+ .word 0xbfc70337, 0xdd3ce000
+ .word 0xbd206a17, 0x8a5eab9c
+ .word 0xbfc6f012, 0x8b756000
+ .word 0xbd357739, 0x0d31ef0f
+ .word 0xbfc6dcf0, 0x165f8000
+ .word 0xbd1b9566, 0x9a33e4c6
+ .word 0xbfc6c9d0, 0x7d203000
+ .word 0xbd3f8e30, 0x14099349
+ .word 0xbfc6b6b3, 0xbedd1000
+ .word 0xbd1a8f73, 0xa64d3813
+ .word 0xbfc6a399, 0xdabbd000
+ .word 0xbd1c1b2c, 0x6657a967
+ .word 0xbfc69082, 0xcfe2b000
+ .word 0xbd2da1e7, 0x20b79662
+ .word 0xbfc67d6e, 0x9d785000
+ .word 0xbd2dc2ef, 0x9eb1f25a
+ .word 0xbfc66a5d, 0x42a3a000
+ .word 0xbd3a6893, 0x3aa00298
+ .word 0xbfc6574e, 0xbe8c1000
+ .word 0xbd19cf8b, 0x2c3c2e78
+ .word 0xbfc64443, 0x10594000
+ .word 0xbd22f605, 0xb0281916
+ .word 0xbfc6313a, 0x37335000
+ .word 0xbd3aec82, 0xac378565
+ .word 0xbfc61e34, 0x3242d000
+ .word 0xbd32bb2d, 0x97ecd861
+ .word 0xbfc60b31, 0x00b09000
+ .word 0xbd21d752, 0x6cee0fd8
+ .word 0xbfc5f830, 0xa1a5c000
+ .word 0xbd352268, 0x98ffc1bc
+ .word 0xbfc5e533, 0x144c1000
+ .word 0xbd2c63e8, 0x189ade2b
+ .word 0xbfc5d238, 0x57cd7000
+ .word 0xbd23530a, 0x5ba6e7ac
+ .word 0xbfc5bf40, 0x6b543000
+ .word 0xbd3b63f7, 0x0525d9f9
+ .word 0xbfc5ac4b, 0x4e0b2000
+ .word 0xbd351709, 0xd7275f36
+ .word 0xbfc59958, 0xff1d5000
+ .word 0xbd178be9, 0xa258d7eb
+ .word 0xbfc58669, 0x7db62000
+ .word 0xbd39e26c, 0x65e8cb44
+ .word 0xbfc5737c, 0xc9018000
+ .word 0xbd39baa7, 0xa6b887f6
+ .word 0xbfc56092, 0xe02ba000
+ .word 0xbd245850, 0x06899d98
+ .word 0xbfc54dab, 0xc2610000
+ .word 0xbd2746fe, 0xe5c8d0d8
+ .word 0xbfc53ac7, 0x6ece9000
+ .word 0xbd39ca8a, 0x2a8725d5
+ .word 0xbfc527e5, 0xe4a1b000
+ .word 0xbd2633e8, 0xe5697dc7
+ .word 0xbfc51507, 0x2307f000
+ .word 0xbd306b11, 0xecc0d77b
+ .word 0xbfc5022b, 0x292f6000
+ .word 0xbd348a05, 0xff36a25b
+ .word 0xbfc4ef51, 0xf6466000
+ .word 0xbd3bc83d, 0x21c8cd53
+ .word 0xbfc4dc7b, 0x897bc000
+ .word 0xbd0c79b6, 0x0ae1ff0f
+ .word 0xbfc4c9a7, 0xe1fe8000
+ .word 0xbcff39f7, 0x50dbbb30
+ .word 0xbfc4b6d6, 0xfefe2000
+ .word 0xbd1522ec, 0xf56e7952
+ .word 0xbfc4a408, 0xdfaa7000
+ .word 0xbd33b41f, 0x86e5dd72
+ .word 0xbfc4913d, 0x8333b000
+ .word 0xbd258379, 0x54fdb678
+ .word 0xbfc47e74, 0xe8ca5000
+ .word 0xbd3ef836, 0xa48fdfcf
+ .word 0xbfc46baf, 0x0f9f5000
+ .word 0xbd3b6d8c, 0xbe1bdef9
+ .word 0xbfc458eb, 0xf6e3f000
+ .word 0xbcf5c0fe, 0x1f2b8094
+ .word 0xbfc4462b, 0x9dc9b000
+ .word 0xbd1ede9d, 0x63b93e7a
+ .word 0xbfc4336e, 0x03829000
+ .word 0xbd3ac363, 0xa859c2af
+ .word 0xbfc420b3, 0x2740f000
+ .word 0xbd3ba75f, 0x4de97ddf
+ .word 0xbfc40dfb, 0x08378000
+ .word 0xbc9bb453, 0xc4f7b685
+ .word 0xbfc3fb45, 0xa5992000
+ .word 0xbd319713, 0xc0cae559
+ .word 0xbfc3e892, 0xfe995000
+ .word 0xbd2b6aad, 0x914d5249
+ .word 0xbfc3d5e3, 0x126bc000
+ .word 0xbd13fb2f, 0x85096c4b
+ .word 0xbfc3c335, 0xe0447000
+ .word 0xbd3ae77d, 0x114a8b5f
+ .word 0xbfc3b08b, 0x6757f000
+ .word 0xbd15485c, 0x35b37c15
+ .word 0xbfc39de3, 0xa6dae000
+ .word 0xbd284fc7, 0x32ce95f1
+ .word 0xbfc38b3e, 0x9e027000
+ .word 0xbd21e21f, 0x5747d00e
+ .word 0xbfc3789c, 0x4c041000
+ .word 0xbd19b4f4, 0x44d31e60
+ .word 0xbfc365fc, 0xb0159000
+ .word 0xbcc62fa8, 0x234b7289
+ .word 0xbfc3535f, 0xc96d1000
+ .word 0xbd013f1c, 0x3b1fab68
+ .word 0xbfc340c5, 0x97411000
+ .word 0xbd20b846, 0x104c58f3
+ .word 0xbfc32e2e, 0x18c86000
+ .word 0xbd3e6220, 0x6c327115
+ .word 0xbfc31b99, 0x4d3a4000
+ .word 0xbd3f098e, 0xe3a50810
+ .word 0xbfc30907, 0x33ce3000
+ .word 0xbd33f323, 0x7c4d853e
+ .word 0xbfc2f677, 0xcbbc0000
+ .word 0xbd352b30, 0x2160f40d
+ .word 0xbfc2e3eb, 0x143bf000
+ .word 0xbd218910, 0x2710016e
+ .word 0xbfc2d161, 0x0c868000
+ .word 0xbd039d6c, 0xcb81b4a1
+ .word 0xbfc2bed9, 0xb3d49000
+ .word 0xbd095245, 0x4a40d26b
+ .word 0xbfc2ac55, 0x095f5000
+ .word 0xbd38b2e6, 0x4bce4dd6
+ .word 0xbfc299d3, 0x0c606000
+ .word 0xbd3d4d00, 0x79dc08d9
+ .word 0xbfc28753, 0xbc11a000
+ .word 0xbd37494e, 0x359302e6
+ .word 0xbfc274d7, 0x17ad4000
+ .word 0xbd38a65b, 0xa0967592
+ .word 0xbfc2625d, 0x1e6dd000
+ .word 0xbd3ead69, 0xd0f61c28
+ .word 0xbfc24fe5, 0xcf8e4000
+ .word 0xbd318f96, 0x26b10d30
+ .word 0xbfc23d71, 0x2a49c000
+ .word 0xbd100d23, 0x8fd3df5c
+ .word 0xbfc22aff, 0x2ddbd000
+ .word 0xbd32e1ea, 0xca7cb4f0
+ .word 0xbfc2188f, 0xd9807000
+ .word 0xbd131786, 0x02bce3fb
+ .word 0xbfc20623, 0x2c73c000
+ .word 0xbd2351a5, 0x02bb95f5
+ .word 0xbfc1f3b9, 0x25f25000
+ .word 0xbd3a822c, 0x593df273
+ .word 0xbfc1e151, 0xc5391000
+ .word 0xbd38e5f5, 0xf578d80e
+ .word 0xbfc1ceed, 0x09853000
+ .word 0xbd2d47c7, 0x8dcdaa0e
+ .word 0xbfc1bc8a, 0xf2143000
+ .word 0xbd2acd64, 0xfb955458
+ .word 0xbfc1aa2b, 0x7e23f000
+ .word 0xbd2ca78e, 0x44389934
+ .word 0xbfc197ce, 0xacf2a000
+ .word 0xbd31ab14, 0x4caf6736
+ .word 0xbfc18574, 0x7dbec000
+ .word 0xbd3e6744, 0x45bd9b49
+ .word 0xbfc1731c, 0xefc74000
+ .word 0xbcfde27c, 0xd98317fd
+ .word 0xbfc160c8, 0x024b2000
+ .word 0xbd2ec2d2, 0xa9009e3d
+ .word 0xbfc14e75, 0xb489f000
+ .word 0xbd3fdf84, 0x66dfe192
+ .word 0xbfc13c26, 0x05c39000
+ .word 0xbd318501, 0x13584d7c
+ .word 0xbfc129d8, 0xf5381000
+ .word 0xbd1d77cc, 0x415a172e
+ .word 0xbfc1178e, 0x8227e000
+ .word 0xbd21ef78, 0xce2d07f2
+ .word 0xbfc10546, 0xabd3d000
+ .word 0xbd00189b, 0x51d162e8
+ .word 0xbfc0f301, 0x717cf000
+ .word 0xbcff64bb, 0xe51793b4
+ .word 0xbfc0e0be, 0xd264a000
+ .word 0xbd3bafe2, 0x3aeb549c
+ .word 0xbfc0ce7e, 0xcdccc000
+ .word 0xbd14652d, 0xabff5447
+ .word 0xbfc0bc41, 0x62f73000
+ .word 0xbd36ca04, 0x73bd9c29
+ .word 0xbfc0aa06, 0x91267000
+ .word 0xbd2755cc, 0x51f9bdae
+ .word 0xbfc097ce, 0x579d2000
+ .word 0xbce33742, 0xda652881
+ .word 0xbfc08598, 0xb59e3000
+ .word 0xbd340d11, 0x47fb37ea
+ .word 0xbfc07365, 0xaa6d1000
+ .word 0xbd16e172, 0x43f1226a
+ .word 0xbfc06135, 0x354d4000
+ .word 0xbd363046, 0x28340ee9
+ .word 0xbfc04f07, 0x5582d000
+ .word 0xbd1a3d31, 0x4c780403
+ .word 0xbfc03cdc, 0x0a51e000
+ .word 0xbd381a9c, 0xf169fc5c
+ .word 0xbfc02ab3, 0x52ff2000
+ .word 0xbd27ce63, 0x5d569b2b
+ .word 0xbfc0188d, 0x2ecf6000
+ .word 0xbd03f965, 0x1cff9dfe
+ .word 0xbfc00669, 0x9d07c000
+ .word 0xbd3b8775, 0x304686e1
+ .word 0xbfbfe891, 0x39dbd000
+ .word 0xbd159653, 0x60bdea07
+ .word 0xbfbfc454, 0x5b8f0000
+ .word 0xbd29cba7, 0xd5591204
+ .word 0xbfbfa01c, 0x9db57000
+ .word 0xbd29c32b, 0x816dd634
+ .word 0xbfbf7be9, 0xfedbf000
+ .word 0xbd2bcbe8, 0xb535310e
+ .word 0xbfbf57bc, 0x7d900000
+ .word 0xbd176a6c, 0x9ea8b04e
+ .word 0xbfbf3394, 0x185fa000
+ .word 0xbd1ea383, 0x09d097b7
+ .word 0xbfbf0f70, 0xcdd99000
+ .word 0xbd0718fb, 0x613960ee
+ .word 0xbfbeeb52, 0x9c8d1000
+ .word 0xbd0b6260, 0x903c8f99
+ .word 0xbfbec739, 0x830a1000
+ .word 0xbcf1fcba, 0x80cdd0fe
+ .word 0xbfbea325, 0x7fe10000
+ .word 0xbd2ef30d, 0x47e4627a
+ .word 0xbfbe7f16, 0x91a32000
+ .word 0xbd2a7c74, 0xc871080d
+ .word 0xbfbe5b0c, 0xb6e22000
+ .word 0xbd109021, 0x3b34d95f
+ .word 0xbfbe3707, 0xee304000
+ .word 0xbd20f684, 0xe6766abd
+ .word 0xbfbe1308, 0x36208000
+ .word 0xbd21aeea, 0xf90019f9
+ .word 0xbfbdef0d, 0x8d466000
+ .word 0xbd2b715f, 0x7da2cb17
+ .word 0xbfbdcb17, 0xf2361000
+ .word 0xbd226a0a, 0x5ba47956
+ .word 0xbfbda727, 0x63844000
+ .word 0xbd1a8940, 0x1fa71733
+ .word 0xbfbd833b, 0xdfc64000
+ .word 0xbd24805c, 0x07408695
+ .word 0xbfbd5f55, 0x65921000
+ .word 0xbcec4739, 0x830a8d2a
+ .word 0xbfbd3b73, 0xf37e1000
+ .word 0xbd2f3501, 0x33da5007
+ .word 0xbfbd1797, 0x88219000
+ .word 0xbd0b219d, 0xaf7df76b
+ .word 0xbfbcf3c0, 0x22142000
+ .word 0xbce9d2b6, 0x6ddd996f
+ .word 0xbfbccfed, 0xbfee1000
+ .word 0xbd0d4119, 0x7f3892ad
+ .word 0xbfbcac20, 0x60484000
+ .word 0xbd2d53ed, 0xcc4f420b
+ .word 0xbfbc8858, 0x01bc4000
+ .word 0xbd2646d1, 0xc65aacd3
+ .word 0xbfbc6494, 0xa2e41000
+ .word 0xbd214bd1, 0x564189cb
+ .word 0xbfbc40d6, 0x425a5000
+ .word 0xbd296224, 0x3a3261b9
+ .word 0xbfbc1d1c, 0xdeba5000
+ .word 0xbd02f7e7, 0x23a02373
+ .word 0xbfbbf968, 0x769fc000
+ .word 0xbd24218c, 0x8d824283
+ .word 0xbfbbd5b9, 0x08a72000
+ .word 0xbd2236aa, 0x3ae84f31
+ .word 0xbfbbb20e, 0x936d6000
+ .word 0xbd22e8af, 0x9574c8e4
+ .word 0xbfbb8e69, 0x15901000
+ .word 0xbd22bef7, 0xf208fbd9
+ .word 0xbfbb6ac8, 0x8dad5000
+ .word 0xbd2637bf, 0xea044b8d
+ .word 0xbfbb472c, 0xfa63e000
+ .word 0xbd1246f5, 0xc7f4588b
+ .word 0xbfbb2396, 0x5a52f000
+ .word 0xbd2e009b, 0x115ec8f8
+ .word 0xbfbb0004, 0xac1a8000
+ .word 0xbd1aaf97, 0x037f2b35
+ .word 0xbfbadc77, 0xee5ae000
+ .word 0xbd25189b, 0xec79cdf7
+ .word 0xbfbab8f0, 0x1fb52000
+ .word 0xbd27f69d, 0xd23d3ac2
+ .word 0xbfba956d, 0x3ecad000
+ .word 0xbd2cc6f2, 0x9805895f
+ .word 0xbfba71ef, 0x4a3e2000
+ .word 0xbd1bbc94, 0x7b201fbf
+ .word 0xbfba4e76, 0x40b1b000
+ .word 0xbd286f52, 0x51aefe0e
+ .word 0xbfba2b02, 0x20c8e000
+ .word 0xbd17d329, 0x8e6b7dbf
+ .word 0xbfba0792, 0xe9277000
+ .word 0xbd2958c6, 0x4d94ab90
+ .word 0xbfb9e428, 0x9871e000
+ .word 0xbd22c483, 0xd0942b9c
+ .word 0xbfb9c0c3, 0x2d4d2000
+ .word 0xbd1520fd, 0x85f1e661
+ .word 0xbfb99d62, 0xa65eb000
+ .word 0xbd22dd17, 0xd834450a
+ .word 0xbfb97a07, 0x024cb000
+ .word 0xbd2ce867, 0xd19bed86
+ .word 0xbfb956b0, 0x3fbdd000
+ .word 0xbd286fb6, 0x03fe1b67
+ .word 0xbfb9335e, 0x5d594000
+ .word 0xbd23115c, 0x3abd47da
+ .word 0xbfb91011, 0x59c6c000
+ .word 0xbd27af17, 0x9df80b59
+ .word 0xbfb8ecc9, 0x33aeb000
+ .word 0xbd1ba18c, 0x833010ab
+ .word 0xbfb8c985, 0xe9b9e000
+ .word 0xbd290791, 0x0379ff94
+ .word 0xbfb8a647, 0x7a91d000
+ .word 0xbd285181, 0x5f37adbf
+ .word 0xbfb8830d, 0xe4e08000
+ .word 0xbd05f60b, 0x79c8f66a
+ .word 0xbfb85fd9, 0x27506000
+ .word 0xbd248fcf, 0xccd1e7c7
+ .word 0xbfb83ca9, 0x408ca000
+ .word 0xbd2326c8, 0xd744c7d1
+ .word 0xbfb8197e, 0x2f40e000
+ .word 0xbd0f80dc, 0xf96ffdf7
+ .word 0xbfb7f657, 0xf2194000
+ .word 0xbd21bef9, 0x43faf4d2
+ .word 0xbfb7d336, 0x87c29000
+ .word 0xbd0e4461, 0xf3833832
+ .word 0xbfb7b019, 0xeeea0000
+ .word 0xbd275649, 0xaee848d4
+ .word 0xbfb78d02, 0x263d8000
+ .word 0xbd069b57, 0x94b69fb7
+ .word 0xbfb769ef, 0x2c6b5000
+ .word 0xbd1a35d8, 0xc73b6a55
+ .word 0xbfb746e1, 0x00226000
+ .word 0xbd2db25d, 0x23c3bc5b
+ .word 0xbfb723d7, 0xa0123000
+ .word 0xbd2c3cbb, 0x84fef08e
+ .word 0xbfb700d3, 0x0aeac000
+ .word 0xbcec1e8d, 0xa99ded32
+ .word 0xbfb6ddd3, 0x3f5c7000
+ .word 0xbd2aeb06, 0x82906a06
+ .word 0xbfb6bad8, 0x3c188000
+ .word 0xbd0daf3c, 0xc08926ae
+ .word 0xbfb697e1, 0xffd06000
+ .word 0xbd296c57, 0x15a12bb6
+ .word 0xbfb674f0, 0x89365000
+ .word 0xbd24f332, 0x993a6604
+ .word 0xbfb65203, 0xd6fcf000
+ .word 0xbd1ea006, 0x8199326b
+ .word 0xbfb62f1b, 0xe7d77000
+ .word 0xbd1d0cd5, 0x02538764
+ .word 0xbfb60c38, 0xba799000
+ .word 0xbd1172c4, 0x3aec1296
+ .word 0xbfb5e95a, 0x4d979000
+ .word 0xbcfcb7ce, 0x1d171711
+ .word 0xbfb5c680, 0x9fe63000
+ .word 0xbd23c479, 0x935581b6
+ .word 0xbfb5a3ab, 0xb01ad000
+ .word 0xbd2c4ae9, 0x3cd5f430
+ .word 0xbfb580db, 0x7ceb5000
+ .word 0xbd1c07f6, 0xcbe60d53
+ .word 0xbfb55e10, 0x050e0000
+ .word 0xbd0c1d74, 0x0c53c72e
+ .word 0xbfb53b49, 0x4739c000
+ .word 0xbd221868, 0x5306aaa5
+ .word 0xbfb51887, 0x42261000
+ .word 0xbd0850ec, 0xb12c59ec
+ .word 0xbfb4f5c9, 0xf48ad000
+ .word 0xbd0580c1, 0x2c81f8fd
+ .word 0xbfb4d311, 0x5d207000
+ .word 0xbd2d58bb, 0x4fa163c2
+ .word 0xbfb4b05d, 0x7aa01000
+ .word 0xbd07029c, 0x6ef93715
+ .word 0xbfb48dae, 0x4bc31000
+ .word 0xbcb85b20, 0x8c200bea
+ .word 0xbfb46b03, 0xcf437000
+ .word 0xbd2787a5, 0x2f0f6296
+ .word 0xbfb4485e, 0x03dbd000
+ .word 0xbd2f5a8d, 0xd1a4d56e
+ .word 0xbfb425bc, 0xe8474000
+ .word 0xbd2365ac, 0x5219daef
+ .word 0xbfb40320, 0x7b414000
+ .word 0xbd26fd84, 0xaa8157c0
+ .word 0xbfb3e088, 0xbb85f000
+ .word 0xbd248068, 0xbdc331fa
+ .word 0xbfb3bdf5, 0xa7d1e000
+ .word 0xbd2cc85e, 0xa5db4ed7
+ .word 0xbfb39b67, 0x3ee24000
+ .word 0xbd0a759b, 0xa99f5667
+ .word 0xbfb378dd, 0x7f749000
+ .word 0xbd1c5044, 0xa3c7eb28
+ .word 0xbfb35658, 0x68470000
+ .word 0xbd2464d7, 0x0035b508
+ .word 0xbfb333d7, 0xf8183000
+ .word 0xbd2e96d4, 0x957e477c
+ .word 0xbfb3115c, 0x2da75000
+ .word 0xbd25bc37, 0x00651448
+ .word 0xbfb2eee5, 0x07b40000
+ .word 0xbd08081e, 0xdd77c860
+ .word 0xbfb2cc72, 0x84fe5000
+ .word 0xbd2e38bd, 0x0cb32a28
+ .word 0xbfb2aa04, 0xa4471000
+ .word 0xbd1e922e, 0xa2c72d06
+ .word 0xbfb2879b, 0x644f5000
+ .word 0xbd1752b6, 0xf65943ec
+ .word 0xbfb26536, 0xc3d8c000
+ .word 0xbd0b4bac, 0x097c5ba3
+ .word 0xbfb242d6, 0xc1a58000
+ .word 0xbd24b838, 0xac648481
+ .word 0xbfb2207b, 0x5c785000
+ .word 0xbd127633, 0xf0431efb
+ .word 0xbfb1fe24, 0x93144000
+ .word 0xbd27a374, 0xe1a7c696
+ .word 0xbfb1dbd2, 0x643d1000
+ .word 0xbd221649, 0xb2ef8928
+ .word 0xbfb1b984, 0xceb6e000
+ .word 0xbd121a31, 0x2f307601
+ .word 0xbfb1973b, 0xd1465000
+ .word 0xbd159b45, 0x53e4c2cb
+ .word 0xbfb174f7, 0x6ab09000
+ .word 0xbcf71031, 0x7ee2e483
+ .word 0xbfb152b7, 0x99bb3000
+ .word 0xbd299135, 0xbe3f3df6
+ .word 0xbfb1307c, 0x5d2c7000
+ .word 0xbd2357c9, 0xfa3dbf1f
+ .word 0xbfb10e45, 0xb3cae000
+ .word 0xbd20612d, 0xaf6b9737
+ .word 0xbfb0ec13, 0x9c5da000
+ .word 0xbd180247, 0xe54ebd73
+ .word 0xbfb0c9e6, 0x15ac4000
+ .word 0xbd2c2da8, 0x0974d976
+ .word 0xbfb0a7bd, 0x1e7ef000
+ .word 0xbd20f926, 0xcdf8dfb4
+ .word 0xbfb08598, 0xb59e3000
+ .word 0xbd240d11, 0x47fb37ea
+ .word 0xbfb06378, 0xd9d32000
+ .word 0xbd104990, 0x672b0729
+ .word 0xbfb0415d, 0x89e74000
+ .word 0xbd1111c0, 0x5cf1d753
+ .word 0xbfb01f46, 0xc4a4a000
+ .word 0xbd11157c, 0x89ecf845
+ .word 0xbfaffa69, 0x11ab9000
+ .word 0xbcf80464, 0xc1c0d47a
+ .word 0xbfafb64d, 0xaa8b6000
+ .word 0xbd13830d, 0xaeb373e0
+ .word 0xbfaf723b, 0x517fc000
+ .word 0xbd048a79, 0x154f796a
+ .word 0xbfaf2e32, 0x04209000
+ .word 0xbcfb9ba8, 0x2f4d6e7f
+ .word 0xbfaeea31, 0xc006b000
+ .word 0xbd10f760, 0xd81b6242
+ .word 0xbfaea63a, 0x82cc0000
+ .word 0xbd19f144, 0x08e210e7
+ .word 0xbfae624c, 0x4a0b5000
+ .word 0xbd1c368e, 0x2e6265dd
+ .word 0xbfae1e67, 0x13606000
+ .word 0xbd1a0d3c, 0xb7b141db
+ .word 0xbfadda8a, 0xdc67e000
+ .word 0xbd1c9ca7, 0x364c37a2
+ .word 0xbfad96b7, 0xa2bf8000
+ .word 0xbd12eb81, 0xf49d3d78
+ .word 0xbfad52ed, 0x6405d000
+ .word 0xbd10de8b, 0x575910a6
+ .word 0xbfad0f2c, 0x1dda6000
+ .word 0xbd0c6fc7, 0x04385ddf
+ .word 0xbfaccb73, 0xcdddb000
+ .word 0xbcf65c36, 0xe09f5fe2
+ .word 0xbfac87c4, 0x71b12000
+ .word 0xbd13799a, 0xf29d923d
+ .word 0xbfac441e, 0x06f72000
+ .word 0xbd153c7d, 0x26143455
+ .word 0xbfac0080, 0x8b530000
+ .word 0xbd003c05, 0x63baea2e
+ .word 0xbfabbceb, 0xfc68f000
+ .word 0xbd0080f2, 0xe79d07ab
+ .word 0xbfab7960, 0x57de2000
+ .word 0xbd0f5af1, 0xf7b24d0f
+ .word 0xbfab35dd, 0x9b58b000
+ .word 0xbd1559d3, 0x5b3d5639
+ .word 0xbfaaf263, 0xc47fb000
+ .word 0xbd085458, 0x172a97ad
+ .word 0xbfaaaef2, 0xd0fb1000
+ .word 0xbcdf8346, 0xa77685c1
+ .word 0xbfaa6b8a, 0xbe73a000
+ .word 0xbd1e988d, 0x46e25c90
+ .word 0xbfaa282b, 0x8a936000
+ .word 0xbce70a67, 0xf10371d7
+ .word 0xbfa9e4d5, 0x3304e000
+ .word 0xbcfec4a6, 0x991acef2
+ .word 0xbfa9a187, 0xb573d000
+ .word 0xbd1cf746, 0xc4ec9bca
+ .word 0xbfa95e43, 0x0f8ce000
+ .word 0xbd01774c, 0x225e2c8d
+ .word 0xbfa91b07, 0x3efd7000
+ .word 0xbcf8a0eb, 0x0224d5a9
+ .word 0xbfa8d7d4, 0x4173f000
+ .word 0xbcf24a7b, 0x7a089116
+ .word 0xbfa894aa, 0x149fb000
+ .word 0xbcfa19a8, 0xbe97660a
+ .word 0xbfa85188, 0xb630f000
+ .word 0xbcca0544, 0x165f80aa
+ .word 0xbfa80e70, 0x23d8c000
+ .word 0xbd1988fa, 0x435d02ec
+ .word 0xbfa7cb60, 0x5b495000
+ .word 0xbcfc8af3, 0x69d6d0f4
+ .word 0xbfa78859, 0x5a357000
+ .word 0xbd0ee9e5, 0xef898b68
+ .word 0xbfa7455b, 0x1e511000
+ .word 0xbcfb28ce, 0xb91e296d
+ .word 0xbfa70265, 0xa550e000
+ .word 0xbd0ddc83, 0xb80a8c63
+ .word 0xbfa6bf78, 0xecea9000
+ .word 0xbd163cc0, 0x0f16f7e9
+ .word 0xbfa67c94, 0xf2d4b000
+ .word 0xbd16b082, 0x09f3282f
+ .word 0xbfa639b9, 0xb4c6b000
+ .word 0xbd14f37b, 0x6b7f9673
+ .word 0xbfa5f6e7, 0x3078e000
+ .word 0xbd1f6f4a, 0xffdb6d69
+ .word 0xbfa5b41d, 0x63a49000
+ .word 0xbd0abcc4, 0x7e8a0c20
+ .word 0xbfa5715c, 0x4c03c000
+ .word 0xbd1dddc8, 0x80ee2760
+ .word 0xbfa52ea3, 0xe7519000
+ .word 0xbd16ff79, 0x68012363
+ .word 0xbfa4ebf4, 0x3349e000
+ .word 0xbcf37578, 0x4620c465
+ .word 0xbfa4a94d, 0x2da96000
+ .word 0xbd18ace0, 0x8a56ed78
+ .word 0xbfa466ae, 0xd42de000
+ .word 0xbcff4c64, 0x521016be
+ .word 0xbfa42419, 0x2495d000
+ .word 0xbd05f329, 0x88dd64a6
+ .word 0xbfa3e18c, 0x1ca0a000
+ .word 0xbd1d23b4, 0xfdb8de39
+ .word 0xbfa39f07, 0xba0eb000
+ .word 0xbd1ac4a7, 0x590b95de
+ .word 0xbfa35c8b, 0xfaa13000
+ .word 0xbccabeaf, 0x7cf59aac
+ .word 0xbfa31a18, 0xdc1a1000
+ .word 0xbd07dd58, 0xd860ceab
+ .word 0xbfa2d7ae, 0x5c3c5000
+ .word 0xbd175b1a, 0xe989664c
+ .word 0xbfa2954c, 0x78cbc000
+ .word 0xbd1c3526, 0x570c1572
+ .word 0xbfa252f3, 0x2f8d1000
+ .word 0xbd107d35, 0xc0436cf5
+ .word 0xbfa210a2, 0x7e45c000
+ .word 0xbcf8ceca, 0x131bef9c
+ .word 0xbfa1ce5a, 0x62bc3000
+ .word 0xbd04e63c, 0x6c6fccc5
+ .word 0xbfa18c1a, 0xdab7b000
+ .word 0xbcf22af4, 0xd32f2ac0
+ .word 0xbfa149e3, 0xe4005000
+ .word 0xbd1519d5, 0x96fa5c0c
+ .word 0xbfa107b5, 0x7c5f2000
+ .word 0xbd152b81, 0xe94af0a6
+ .word 0xbfa0c58f, 0xa19df000
+ .word 0xbd155317, 0x53a74377
+ .word 0xbfa08372, 0x51877000
+ .word 0xbd1cc91e, 0xb2004222
+ .word 0xbfa0415d, 0x89e74000
+ .word 0xbd0111c0, 0x5cf1d753
+ .word 0xbf9ffea2, 0x91136000
+ .word 0xbd04dd01, 0xd7640dc2
+ .word 0xbf9f7a9b, 0x16782000
+ .word 0xbd00ab64, 0x9c6f9f5c
+ .word 0xbf9ef6a4, 0x9f98f000
+ .word 0xbd0671e4, 0xe8f151a3
+ .word 0xbf9e72bf, 0x2813c000
+ .word 0xbd0ca2ba, 0xda22cae5
+ .word 0xbf9deeea, 0xab883000
+ .word 0xbd0c6e1d, 0x7741b591
+ .word 0xbf9d6b27, 0x25979000
+ .word 0xbd000425, 0x79723e3d
+ .word 0xbf9ce774, 0x91e4d000
+ .word 0xbd00d7ce, 0xf3d25198
+ .word 0xbf9c63d2, 0xec14a000
+ .word 0xbd05e318, 0xfe7acbca
+ .word 0xbf9be042, 0x2fcd6000
+ .word 0xbd01ec42, 0x87f2c9ca
+ .word 0xbf9b5cc2, 0x58b71000
+ .word 0xbd01cc23, 0x715f7fd0
+ .word 0xbf9ad953, 0x627b6000
+ .word 0xbd0ab5a1, 0x1a805efd
+ .word 0xbf9a55f5, 0x48c5c000
+ .word 0xbcf0fc7b, 0x0697e1b5
+ .word 0xbf99d2a8, 0x07432000
+ .word 0xbcf7cf80, 0x538b441e
+ .word 0xbf994f6b, 0x99a24000
+ .word 0xbcf1d5ef, 0x96cf7f51
+ .word 0xbf98cc3f, 0xfb937000
+ .word 0xbd050394, 0x323f2c7a
+ .word 0xbf984925, 0x28c8c000
+ .word 0xbd057d17, 0x3697cf30
+ .word 0xbf97c61b, 0x1cf5d000
+ .word 0xbd0dc0dc, 0x1ed96ee4
+ .word 0xbf974321, 0xd3d00000
+ .word 0xbcfb4a69, 0x0fe94778
+ .word 0xbf96c039, 0x490e3000
+ .word 0xbcff7b34, 0x02fd59ca
+ .word 0xbf963d61, 0x78690000
+ .word 0xbd07abf3, 0x89596542
+ .word 0xbf95ba9a, 0x5d9ac000
+ .word 0xbcacbb84, 0xe08d78ac
+ .word 0xbf9537e3, 0xf45f3000
+ .word 0xbcf592ce, 0x96bf9299
+ .word 0xbf94b53e, 0x3873e000
+ .word 0xbd0b6ee9, 0xbca265c1
+ .word 0xbf9432a9, 0x25980000
+ .word 0xbd098139, 0x928637fe
+ .word 0xbf93b024, 0xb78c5000
+ .word 0xbcf9a5e2, 0x3a02f82a
+ .word 0xbf932db0, 0xea132000
+ .word 0xbd0c432c, 0x4c2257ef
+ .word 0xbf92ab4d, 0xb8f09000
+ .word 0xbcf82c84, 0xa532c74c
+ .word 0xbf9228fb, 0x1fea2000
+ .word 0xbd0c4f8c, 0xa12647f9
+ .word 0xbf91a6b9, 0x1ac73000
+ .word 0xbcec30e9, 0xb54e2dd6
+ .word 0xbf912487, 0xa5507000
+ .word 0xbd0edf2f, 0xf6a59c94
+ .word 0xbf90a266, 0xbb508000
+ .word 0xbcfa5be1, 0x7c2ec500
+ .word 0xbf902056, 0x58935000
+ .word 0xbd008e93, 0xe47420b7
+ .word 0xbf8f3cac, 0xf1cd3000
+ .word 0xbcf64d83, 0xc9a6875d
+ .word 0xbf8e38ce, 0x30333000
+ .word 0xbcc0bbae, 0x12ebf308
+ .word 0xbf8d3510, 0x63fa4000
+ .word 0xbcea8d92, 0xdf000beb
+ .word 0xbf8c3173, 0x84c75000
+ .word 0xbcfe0cc0, 0x31046026
+ .word 0xbf8b2df7, 0x8a428000
+ .word 0xbcf4c647, 0xa5d4542f
+ .word 0xbf8a2a9c, 0x6c170000
+ .word 0xbce18876, 0x525971be
+ .word 0xbf892762, 0x21f33000
+ .word 0xbcd456ba, 0x9344a27f
+ .word 0xbf882448, 0xa388a000
+ .word 0xbcd55104, 0xb16137f1
+ .word 0xbf87214f, 0xe88c0000
+ .word 0xbcf27275, 0xd7338080
+ .word 0xbf861e77, 0xe8b53000
+ .word 0xbcff8c11, 0x507150cb
+ .word 0xbf851bc0, 0x9bbf4000
+ .word 0xbcdae1ea, 0x5258a3c6
+ .word 0xbf841929, 0xf9683000
+ .word 0xbcd77c75, 0x5d013688
+ .word 0xbf8316b3, 0xf9714000
+ .word 0xbcfb8dcc, 0x8ba5563d
+ .word 0xbf82145e, 0x939ef000
+ .word 0xbcce891c, 0x6274ffda
+ .word 0xbf811229, 0xbfb89000
+ .word 0xbcf50ee4, 0x5fd053b1
+ .word 0xbf801015, 0x7588d000
+ .word 0xbcfce251, 0x998b505f
+ .word 0xbf7e1c43, 0x59bad000
+ .word 0xbce9f504, 0xadbb6021
+ .word 0xbf7c189c, 0xbb0e2000
+ .word 0xbcdfeabb, 0x69dea7ed
+ .word 0xbf7a1536, 0xfeb35000
+ .word 0xbcecb8e8, 0x91b69c25
+ .word 0xbf781212, 0x14586000
+ .word 0xbce6a81c, 0x14b9f937
+ .word 0xbf760f2d, 0xebb16000
+ .word 0xbcbb6835, 0x84891753
+ .word 0xbf740c8a, 0x74787000
+ .word 0xbce1c38e, 0xf838000c
+ .word 0xbf720a27, 0x9e6e0000
+ .word 0xbce34d96, 0x922727aa
+ .word 0xbf700805, 0x59588000
+ .word 0xbce66afc, 0xb31c67b2
+ .word 0xbf6c0c47, 0x2a092000
+ .word 0xbc657d36, 0x31cacba0
+ .word 0xbf680904, 0x82898000
+ .word 0xbcc701a5, 0xa9c30314
+ .word 0xbf640642, 0x9be3c000
+ .word 0xbcccf0de, 0xc26e96f3
+ .word 0xbf600401, 0x55d58000
+ .word 0xbcd13bce, 0x0ce3ddd8
+ .word 0xbf580481, 0x20511000
+ .word 0xbcc0a8ce, 0x7ceb0de6
+ .word 0xbf500200, 0x55655000
+ .word 0xbcc11266, 0xaf9afc3f
+ .word 0xbf400100, 0x15575000
+ .word 0xbca62237, 0x79c0dc11
+ .word 0x00000000, 0x00000000
+ .word 0x00000000, 0x00000000
+ .word 0x3f4ffc00, 0xaa8ab000
+ .word 0x3c80fbc0, 0x4d051925
+ .word 0x3f5ff802, 0xa9ab1000
+ .word 0x3c8ccf14, 0xf1d0a9f2
+ .word 0x3f67f704, 0x7d798000
+ .word 0x3cbed344, 0xeb43240a
+ .word 0x3f6ff00a, 0xa2b10000
+ .word 0x3cd78094, 0x10d6ad37
+ .word 0x3f73f38a, 0x60f06000
+ .word 0x3cd22569, 0x3c937494
+ .word 0x3f77ee11, 0xebd82000
+ .word 0x3ced274f, 0x0b48e81d
+ .word 0x3f7be79c, 0x70058000
+ .word 0x3ced91f3, 0x4d808088
+ .word 0x3f7fe02a, 0x6b106000
+ .word 0x3cde23f0, 0xdda40e47
+ .word 0x3f81ebde, 0x2d199000
+ .word 0x3cef97c0, 0x0b723c9a
+ .word 0x3f83e729, 0x5d25a000
+ .word 0x3cef63e0, 0x0d65eebc
+ .word 0x3f85e1f7, 0x03ecb000
+ .word 0x3cfca09f, 0x585da1b5
+ .word 0x3f87dc47, 0x5f810000
+ .word 0x3cf4edba, 0x4a25e0b1
+ .word 0x3f89d61a, 0xadc6b000
+ .word 0x3cfb1963, 0x27b4256d
+ .word 0x3f8bcf71, 0x2c743000
+ .word 0x3cf09782, 0x5ef65dc3
+ .word 0x3f8dc84b, 0x19123000
+ .word 0x3cf02950, 0x78e96cc1
+ .word 0x3f8fc0a8, 0xb0fc0000
+ .word 0x3cdf1e7c, 0xf6d3a69c
+ .word 0x3f90dc45, 0x18afc000
+ .word 0x3d090f43, 0x1ff3b010
+ .word 0x3f91d7f7, 0xeb9ee000
+ .word 0x3d07cd8a, 0xf80670b5
+ .word 0x3f92d36c, 0xefb55000
+ .word 0x3cff0bb3, 0x41706c38
+ .word 0x3f93cea4, 0x4346a000
+ .word 0x3cf5d3bc, 0xd295bf53
+ .word 0x3f94c99e, 0x04901000
+ .word 0x3d0bd98c, 0xbbebe949
+ .word 0x3f95c45a, 0x51b8d000
+ .word 0x3cec449d, 0xe927827c
+ .word 0x3f96bed9, 0x48d1b000
+ .word 0x3cff43be, 0x9f5bc086
+ .word 0x3f97b91b, 0x07d5b000
+ .word 0x3cd1aa92, 0x7f54c717
+ .word 0x3f98b31f, 0xaca9b000
+ .word 0x3c8c3ab4, 0x8db4decf
+ .word 0x3f99ace7, 0x551cc000
+ .word 0x3cf45134, 0x09c1df81
+ .word 0x3f9aa672, 0x1ee83000
+ .word 0x3cf6a75a, 0xe2d7a49d
+ .word 0x3f9b9fc0, 0x27af9000
+ .word 0x3cd97fbd, 0x465b7589
+ .word 0x3f9c98d1, 0x8d00c000
+ .word 0x3d0027ab, 0xe9d883c3
+ .word 0x3f9d91a6, 0x6c543000
+ .word 0x3d0987c5, 0x9633ee68
+ .word 0x3f9e8a3e, 0xe30cd000
+ .word 0x3d095817, 0x086b1c01
+ .word 0x3f9f829b, 0x0e783000
+ .word 0x3ce80267, 0xc7e09e3e
+ .word 0x3fa03d5d, 0x85e73000
+ .word 0x3d1dde25, 0x83b4a73b
+ .word 0x3fa0b94f, 0x7c196000
+ .word 0x3ce76769, 0x0fdd87d3
+ .word 0x3fa13523, 0x78597000
+ .word 0x3cef29e2, 0x4702d328
+ .word 0x3fa1b0d9, 0x8923d000
+ .word 0x3d12ff85, 0x945dd915
+ .word 0x3fa22c71, 0xbcea8000
+ .word 0x3cfd2818, 0xf87f888f
+ .word 0x3fa2a7ec, 0x2214e000
+ .word 0x3d10e631, 0x0add3804
+ .word 0x3fa32348, 0xc7001000
+ .word 0x3d0a5b6e, 0x42c7927d
+ .word 0x3fa39e87, 0xb9feb000
+ .word 0x3d1abf52, 0x02b64055
+ .word 0x3fa419a9, 0x09593000
+ .word 0x3d0ae6e3, 0x3ea4753a
+ .word 0x3fa494ac, 0xc34d9000
+ .word 0x3ce1c78a, 0x56fd2473
+ .word 0x3fa50f92, 0xf60f9000
+ .word 0x3d12d9f6, 0x1523ffc6
+ .word 0x3fa58a5b, 0xafc8e000
+ .word 0x3d035231, 0xaa3d4b1d
+ .word 0x3fa60506, 0xfe98d000
+ .word 0x3d1516fd, 0xf9ac7f28
+ .word 0x3fa67f94, 0xf094b000
+ .word 0x3d1b307c, 0xf9f93b5b
+ .word 0x3fa6fa05, 0x93c7b000
+ .word 0x3d0a0af2, 0x0eb1a504
+ .word 0x3fa77458, 0xf632d000
+ .word 0x3d19f88c, 0x69e543dd
+ .word 0x3fa7ee8f, 0x25cd4000
+ .word 0x3ce7bd3d, 0xcb47c2e4
+ .word 0x3fa868a8, 0x3083f000
+ .word 0x3d0b3b8b, 0xd96a72db
+ .word 0x3fa8e2a4, 0x243a1000
+ .word 0x3d173dd6, 0x0284c920
+ .word 0x3fa95c83, 0x0ec8e000
+ .word 0x3cff5beb, 0x41d00a41
+ .word 0x3fa9d644, 0xfdffa000
+ .word 0x3cf3c905, 0x39a473b6
+ .word 0x3faa4fe9, 0xffa3d000
+ .word 0x3cf1a7b5, 0xfbfd6db2
+ .word 0x3faac972, 0x21711000
+ .word 0x3d1f1a7d, 0xe0264459
+ .word 0x3fab42dd, 0x71197000
+ .word 0x3cebec28, 0xd14c7d9f
+ .word 0x3fabbc2b, 0xfc44f000
+ .word 0x3d005cf2, 0xdd7d04a2
+ .word 0x3fac355d, 0xd0921000
+ .word 0x3d1e5999, 0x357f0710
+ .word 0x3facae72, 0xfb95c000
+ .word 0x3cf0540d, 0xfda4e418
+ .word 0x3fad276b, 0x8adb0000
+ .word 0x3d16a423, 0xc78a64b0
+ .word 0x3fada047, 0x8be39000
+ .word 0x3cf2963d, 0x8fb7f02b
+ .word 0x3fae1907, 0x0c276000
+ .word 0x3ca5b99b, 0x9d617a09
+ .word 0x3fae91aa, 0x1914f000
+ .word 0x3d10beaf, 0xf119cac5
+ .word 0x3faf0a30, 0xc0116000
+ .word 0x3cf5330b, 0xe64b8b77
+ .word 0x3faf829b, 0x0e783000
+ .word 0x3cf80267, 0xc7e09e3e
+ .word 0x3faffae9, 0x119b9000
+ .word 0x3cf819ba, 0x13162a9c
+ .word 0x3fb0398d, 0x6b622000
+ .word 0x3d153ac8, 0x0d00cc01
+ .word 0x3fb07598, 0x3598e000
+ .word 0x3d11c4c0, 0x6d2999e2
+ .word 0x3fb0b194, 0xee0d1000
+ .word 0x3d199ba9, 0x3da7b72e
+ .word 0x3fb0ed83, 0x9b552000
+ .word 0x3d1bf82e, 0x4add5131
+ .word 0x3fb12964, 0x4402e000
+ .word 0x3d056224, 0x572ac464
+ .word 0x3fb16536, 0xeea37000
+ .word 0x3d25c1d0, 0xc4b82e7c
+ .word 0x3fb1a0fb, 0xa1bf8000
+ .word 0x3d24a3fc, 0xc319d6dc
+ .word 0x3fb1dcb2, 0x63db1000
+ .word 0x3d22889e, 0xbd3d1303
+ .word 0x3fb2185b, 0x3b75a000
+ .word 0x3cfce760, 0x70cdcfc5
+ .word 0x3fb253f6, 0x2f0a1000
+ .word 0x3d105be3, 0xeda69c04
+ .word 0x3fb28f83, 0x450ed000
+ .word 0x3d251aeb, 0x54232ed1
+ .word 0x3fb2cb02, 0x83f5d000
+ .word 0x3d2c3dc5, 0x94cae043
+ .word 0x3fb30673, 0xf22c8000
+ .word 0x3d24c9e2, 0x9dcf0ba5
+ .word 0x3fb341d7, 0x961bd000
+ .word 0x3cfd0929, 0x98376105
+ .word 0x3fb37d2d, 0x76283000
+ .word 0x3cfcfaab, 0x2400751e
+ .word 0x3fb3b875, 0x98b1b000
+ .word 0x3d1bb7d4, 0xd6a6b9db
+ .word 0x3fb3f3b0, 0x04140000
+ .word 0x3cee2474, 0xacdfcec5
+ .word 0x3fb42edc, 0xbea64000
+ .word 0x3d1bc0ee, 0xea7c9acd
+ .word 0x3fb469fb, 0xcebb5000
+ .word 0x3d26cc78, 0x9e4ae327
+ .word 0x3fb4a50d, 0x3aa1b000
+ .word 0x3cd003d9, 0xeed183bb
+ .word 0x3fb4e011, 0x08a35000
+ .word 0x3d25cb9f, 0xbe58b5c9
+ .word 0x3fb51b07, 0x3f061000
+ .word 0x3d207ed2, 0x4f1cd0d4
+ .word 0x3fb555ef, 0xe40b5000
+ .word 0x3ce692f1, 0x90d1c46b
+ .word 0x3fb590ca, 0xfdf01000
+ .word 0x3d28509e, 0xae455754
+ .word 0x3fb5cb98, 0x92ed4000
+ .word 0x3d17be44, 0xa64fc52f
+ .word 0x3fb60658, 0xa9375000
+ .word 0x3ce8763b, 0xdd389ef2
+ .word 0x3fb6410b, 0x46fe7000
+ .word 0x3d256038, 0x61a13976
+ .word 0x3fb67bb0, 0x726ec000
+ .word 0x3cef724b, 0x69ef5912
+ .word 0x3fb6b648, 0x31afe000
+ .word 0x3d1033d7, 0xb22085b8
+ .word 0x3fb6f0d2, 0x8ae56000
+ .word 0x3d269737, 0xc93373da
+ .word 0x3fb72b4f, 0x842ea000
+ .word 0x3d21f666, 0x7fe6c45a
+ .word 0x3fb765bf, 0x23a6b000
+ .word 0x3d2c2687, 0xf9477b53
+ .word 0x3fb7a021, 0x6f649000
+ .word 0x3d2c2499, 0x430831ff
+ .word 0x3fb7da76, 0x6d7b1000
+ .word 0x3d066422, 0x240644d8
+ .word 0x3fb814be, 0x23f8c000
+ .word 0x3ccb2381, 0xda82fdfd
+ .word 0x3fb84ef8, 0x98e82000
+ .word 0x3d205465, 0xb72d106e
+ .word 0x3fb88925, 0xd24fa000
+ .word 0x3d2c55f5, 0x76088ff3
+ .word 0x3fb8c345, 0xd6319000
+ .word 0x3d2641eb, 0x596854cc
+ .word 0x3fb8fd58, 0xaa8c2000
+ .word 0x3cf136fe, 0x4348da4e
+ .word 0x3fb9375e, 0x55595000
+ .word 0x3d2dbb86, 0xe70186c9
+ .word 0x3fb97156, 0xdc8f6000
+ .word 0x3d0f01f3, 0x28123425
+ .word 0x3fb9ab42, 0x46203000
+ .word 0x3d0d66df, 0x661e3e7b
+ .word 0x3fb9e520, 0x97f9c000
+ .word 0x3d235fac, 0xb52dd050
+ .word 0x3fba1ef1, 0xd8061000
+ .word 0x3d29a82e, 0xdbf2f796
+ .word 0x3fba58b6, 0x0c2b2000
+ .word 0x3d091c65, 0x1d1b06b1
+ .word 0x3fba926d, 0x3a4ad000
+ .word 0x3d158d94, 0x2f48aa71
+ .word 0x3fbacc17, 0x68433000
+ .word 0x3d0561f1, 0x7d2016d1
+ .word 0x3fbb05b4, 0x9bee4000
+ .word 0x3d0ff22c, 0x18f84a5e
+ .word 0x3fbb3f44, 0xdb221000
+ .word 0x3d2fa2a7, 0xb1bc135d
+ .word 0x3fbb78c8, 0x2bb0e000
+ .word 0x3d2b4210, 0x878cf032
+ .word 0x3fbbb23e, 0x9368e000
+ .word 0x3d22e9cf, 0x954c48ea
+ .word 0x3fbbeba8, 0x18146000
+ .word 0x3d1d921d, 0x248382a6
+ .word 0x3fbc2504, 0xbf79d000
+ .word 0x3d1c5f13, 0x43bd2b70
+ .word 0x3fbc5e54, 0x8f5bc000
+ .word 0x3d1d0c57, 0x585fbe06
+ .word 0x3fbc9797, 0x8d78e000
+ .word 0x3d223fde, 0xd105cef9
+ .word 0x3fbcd0cd, 0xbf8c1000
+ .word 0x3d0f0a6d, 0xa86eba18
+ .word 0x3fbd09f7, 0x2b4c4000
+ .word 0x3d2048c0, 0x00354e33
+ .word 0x3fbd4313, 0xd66cb000
+ .word 0x3d0aeaf2, 0x1bb2a3b2
+ .word 0x3fbd7c23, 0xc69cb000
+ .word 0x3d0a046c, 0x8b35e23e
+ .word 0x3fbdb527, 0x0187d000
+ .word 0x3d224ef0, 0xad5c303f
+ .word 0x3fbdee1d, 0x8cd5e000
+ .word 0x3d2ae4bf, 0x1ac200ee
+ .word 0x3fbe2707, 0x6e2af000
+ .word 0x3d072f4f, 0x543fff10
+ .word 0x3fbe5fe4, 0xab272000
+ .word 0x3d240a2c, 0x11600366
+ .word 0x3fbe98b5, 0x49671000
+ .word 0x3d119dd2, 0x27143a5b
+ .word 0x3fbed179, 0x4e837000
+ .word 0x3d20175e, 0x45b17dbe
+ .word 0x3fbf0a30, 0xc0116000
+ .word 0x3d05330b, 0xe64b8b77
+ .word 0x3fbf42db, 0xa3a22000
+ .word 0x3d29da91, 0x9a4127e6
+ .word 0x3fbf7b79, 0xfec37000
+ .word 0x3d2bbd9e, 0x05da04c0
+ .word 0x3fbfb40b, 0xd6ff4000
+ .word 0x3d2c0bec, 0xb7b53b5b
+ .word 0x3fbfec91, 0x31dbe000
+ .word 0x3d257554, 0x5ca333f2
+ .word 0x3fc01285, 0x0a6df000
+ .word 0x3d395e79, 0xadfe901b
+ .word 0x3fc02ebb, 0x42bf3000
+ .word 0x3d3a95c1, 0x68c7fc69
+ .word 0x3fc04aeb, 0x449f6000
+ .word 0x3d2afa90, 0x65ccd35c
+ .word 0x3fc06715, 0x12ca5000
+ .word 0x3d32dc54, 0x3191fae2
+ .word 0x3fc08338, 0xaffa2000
+ .word 0x3d30533c, 0xac823e27
+ .word 0x3fc09f56, 0x1ee71000
+ .word 0x3d33867d, 0x4754172c
+ .word 0x3fc0bb6d, 0x6247a000
+ .word 0x3d35464f, 0x3ccd04b3
+ .word 0x3fc0d77e, 0x7cd08000
+ .word 0x3d3cb2cd, 0x2ee2f482
+ .word 0x3fc0f389, 0x7134b000
+ .word 0x3d02e530, 0xbb6149cf
+ .word 0x3fc10f8e, 0x42253000
+ .word 0x3d336263, 0xde634e7c
+ .word 0x3fc12b8c, 0xf2518000
+ .word 0x3d348a4a, 0x13c0a0fc
+ .word 0x3fc14785, 0x84674000
+ .word 0x3d156345, 0x1027c750
+ .word 0x3fc16377, 0xfb124000
+ .word 0x3d091e1a, 0xbf41763e
+ .word 0x3fc17f64, 0x58fca000
+ .word 0x3d2843fa, 0xd093c8dc
+ .word 0x3fc19b4a, 0xa0ced000
+ .word 0x3d03bedb, 0x4ef663a7
+ .word 0x3fc1b72a, 0xd52f6000
+ .word 0x3d2e80a4, 0x1811a396
+ .word 0x3fc1d304, 0xf8c35000
+ .word 0x3d164aec, 0x82ebbef7
+ .word 0x3fc1eed9, 0x0e2dc000
+ .word 0x3d161563, 0x7097648f
+ .word 0x3fc20aa7, 0x18102000
+ .word 0x3d3f2c94, 0x348552fe
+ .word 0x3fc2266f, 0x190a5000
+ .word 0x3d3596fa, 0xa3df8c05
+ .word 0x3fc24231, 0x13ba5000
+ .word 0x3cfc5ff8, 0x71162641
+ .word 0x3fc25ded, 0x0abc6000
+ .word 0x3d35a385, 0x4f176449
+ .word 0x3fc279a3, 0x00ab4000
+ .word 0x3d3ef432, 0xb3235108
+ .word 0x3fc29552, 0xf81ff000
+ .word 0x3d248d30, 0x1771c408
+ .word 0x3fc2b0fc, 0xf3b1a000
+ .word 0x3d177ca3, 0xe30a59ea
+ .word 0x3fc2cca0, 0xf5f5f000
+ .word 0x3d128439, 0xb9403b82
+ .word 0x3fc2e83f, 0x0180d000
+ .word 0x3cee7aa7, 0xaf63c632
+ .word 0x3fc303d7, 0x18e47000
+ .word 0x3d3fa5fd, 0x28c704d4
+ .word 0x3fc31f69, 0x3eb19000
+ .word 0x3d32cc6c, 0x8d2e3482
+ .word 0x3fc33af5, 0x75770000
+ .word 0x3d3c9ecc, 0xa2fe72a5
+ .word 0x3fc3567b, 0xbfc22000
+ .word 0x3d3250d2, 0x53991a1f
+ .word 0x3fc371fc, 0x201e8000
+ .word 0x3d3ee877, 0x9b2d8abc
+ .word 0x3fc38d76, 0x99164000
+ .word 0x3d1844a5, 0x9e39bb70
+ .word 0x3fc3a8eb, 0x2d31a000
+ .word 0x3d1bafb7, 0x7d5d503e
+ .word 0x3fc3c459, 0xdef76000
+ .word 0x3d3edc86, 0xf6b70d33
+ .word 0x3fc3dfc2, 0xb0ecc000
+ .word 0x3d28a72a, 0x62b8c13f
+ .word 0x3fc3fb25, 0xa5952000
+ .word 0x3d3195be, 0x6b358ff7
+ .word 0x3fc41682, 0xbf727000
+ .word 0x3d377fdc, 0x7bf03db2
+ .word 0x3fc431da, 0x01050000
+ .word 0x3d304837, 0x836e0391
+ .word 0x3fc44d2b, 0x6ccb7000
+ .word 0x3d3a3ccf, 0xa7b2a1f1
+ .word 0x3fc46877, 0x0542f000
+ .word 0x3d03f5d0, 0x3957bc10
+ .word 0x3fc483bc, 0xcce6e000
+ .word 0x3d1eea52, 0x723f6369
+ .word 0x3fc49efc, 0xc6313000
+ .word 0x3d3cde14, 0xcc15551b
+ .word 0x3fc4ba36, 0xf39a5000
+ .word 0x3d279568, 0x981bcc36
+ .word 0x3fc4d56b, 0x5798e000
+ .word 0x3d380580, 0x15a96555
+ .word 0x3fc4f099, 0xf4a23000
+ .word 0x3cf640d0, 0x50150d92
+ .word 0x3fc50bc2, 0xcd29c000
+ .word 0x3d1ada57, 0x28db8d4f
+ .word 0x3fc526e5, 0xe3a1b000
+ .word 0x3d20de8b, 0x90075b8f
+ .word 0x3fc54203, 0x3a7a8000
+ .word 0x3d268d68, 0xed855f0e
+ .word 0x3fc55d1a, 0xd4232000
+ .word 0x3d3add94, 0xdda647e8
+ .word 0x3fc5782c, 0xb3091000
+ .word 0x3d28b739, 0x5d0d777d
+ .word 0x3fc59338, 0xd9982000
+ .word 0x3cf0ba68, 0xb7555d4a
+ .word 0x3fc5ae3f, 0x4a3aa000
+ .word 0x3d21ea25, 0xf012a8b9
+ .word 0x3fc5c940, 0x07597000
+ .word 0x3d15c9ad, 0xccb7337a
+ .word 0x3fc5e43b, 0x135bd000
+ .word 0x3d278a96, 0x6224c79e
+ .word 0x3fc5ff30, 0x70a79000
+ .word 0x3d1e9e43, 0x9f105039
+ .word 0x3fc61a20, 0x21a0e000
+ .word 0x3d3dd9dd, 0x1bdf3cdd
+ .word 0x3fc6350a, 0x28aaa000
+ .word 0x3d2d5ec0, 0xab8163af
+ .word 0x3fc64fee, 0x8825f000
+ .word 0x3d3896fc, 0xa298884b
+ .word 0x3fc66acd, 0x4272a000
+ .word 0x3d3aa1bd, 0xbfc6c785
+ .word 0x3fc685a6, 0x59eef000
+ .word 0x3d3706ab, 0x49f7e6f6
+ .word 0x3fc6a079, 0xd0f7a000
+ .word 0x3d35a3f8, 0x448d14f5
+ .word 0x3fc6bb47, 0xa9e80000
+ .word 0x3d19f64d, 0x23ea3296
+ .word 0x3fc6d60f, 0xe719d000
+ .word 0x3d10e46a, 0xa3b2e266
+ .word 0x3fc6f0d2, 0x8ae56000
+ .word 0x3d369737, 0xc93373da
+ .word 0x3fc70b8f, 0x97a1a000
+ .word 0x3d34ea64, 0xf6a95bef
+ .word 0x3fc72647, 0x0fa3f000
+ .word 0x3d211641, 0xe3178b76
+ .word 0x3fc740f8, 0xf5403000
+ .word 0x3d2e9326, 0xcdfceabe
+ .word 0x3fc75ba5, 0x4ac8e000
+ .word 0x3d3ddca5, 0x8bc4a7c0
+ .word 0x3fc7764c, 0x128f2000
+ .word 0x3d027490, 0x3479e3d1
+ .word 0x3fc790ed, 0x4ee26000
+ .word 0x3d199bbd, 0x4e7746f6
+ .word 0x3fc7ab89, 0x0210d000
+ .word 0x3d321237, 0xc6d65ad4
+ .word 0x3fc7c61f, 0x2e673000
+ .word 0x3d2b8da4, 0x99c82e40
+ .word 0x3fc7e0af, 0xd630c000
+ .word 0x3d139e7c, 0x1d8f1034
+ .word 0x3fc7fb3a, 0xfbb75000
+ .word 0x3d204815, 0xb73ec551
+ .word 0x3fc815c0, 0xa1435000
+ .word 0x3d2fab5a, 0x0dbfc630
+ .word 0x3fc83040, 0xc91bc000
+ .word 0x3d3e5b71, 0xc6e66f32
+ .word 0x3fc84abb, 0x75865000
+ .word 0x3d0392a9, 0x058ea173
+ .word 0x3fc86530, 0xa8c70000
+ .word 0x3d398bb0, 0xcb4ea3e3
+ .word 0x3fc87fa0, 0x6520c000
+ .word 0x3d322120, 0x401202fc
+ .word 0x3fc89a0a, 0xacd4e000
+ .word 0x3d2c0bfb, 0xda8f5a72
+ .word 0x3fc8b46f, 0x82236000
+ .word 0x3d12d9f2, 0x102dd7c9
+ .word 0x3fc8cece, 0xe74ad000
+ .word 0x3d16917d, 0x56f5912d
+ .word 0x3fc8e928, 0xde886000
+ .word 0x3d3a8154, 0xb13d72d5
+ .word 0x3fc9037d, 0x6a180000
+ .word 0x3d230dea, 0x57c1c8d9
+ .word 0x3fc91dcc, 0x8c340000
+ .word 0x3d37bc6a, 0xbddeff46
+ .word 0x3fc93816, 0x47159000
+ .word 0x3d267385, 0x2b8b8c4f
+ .word 0x3fc9525a, 0x9cf45000
+ .word 0x3d2ad1d9, 0x04c1d4e3
+ .word 0x3fc96c99, 0x9006a000
+ .word 0x3d2a88d5, 0x9cbb452c
+ .word 0x3fc986d3, 0x22818000
+ .word 0x3cf93b56, 0x4dd44000
+ .word 0x3fc9a107, 0x56988000
+ .word 0x3d264aa6, 0x242cd098
+ .word 0x3fc9bb36, 0x2e7df000
+ .word 0x3d3706ab, 0xaf18f802
+ .word 0x3fc9d55f, 0xac62d000
+ .word 0x3ce732c0, 0x789487af
+ .word 0x3fc9ef83, 0xd2769000
+ .word 0x3d3467a4, 0x26031900
+ .word 0x3fca09a2, 0xa2e79000
+ .word 0x3d311331, 0x195f76e6
+ .word 0x3fca23bc, 0x1fe2b000
+ .word 0x3d258c64, 0xdc46c1ea
+ .word 0x3fca3dd0, 0x4b938000
+ .word 0x3d297da1, 0x366e2c5a
+ .word 0x3fca57df, 0x28244000
+ .word 0x3d3b99c8, 0xca1d9abb
+ .word 0x3fca71e8, 0xb7bdf000
+ .word 0x3d377a9a, 0xc887d66f
+ .word 0x3fca8bec, 0xfc882000
+ .word 0x3d3e3185, 0xcf21b9cf
+ .word 0x3fcaa5eb, 0xf8a93000
+ .word 0x3d2abead, 0x92d5cae2
+ .word 0x3fcabfe5, 0xae461000
+ .word 0x3d125c2b, 0x1a83b18e
+ .word 0x3fcad9da, 0x1f827000
+ .word 0x3d1df520, 0xdff03ebe
+ .word 0x3fcaf3c9, 0x4e80b000
+ .word 0x3d3fe5b1, 0x9cc03270
+ .word 0x3fcb0db3, 0x3d620000
+ .word 0x3d3fee14, 0x38eab906
+ .word 0x3fcb2797, 0xee463000
+ .word 0x3d105dd5, 0xbe4bfd5c
+ .word 0x3fcb4177, 0x634ba000
+ .word 0x3d355d01, 0x5666069f
+ .word 0x3fcb5b51, 0x9e8fb000
+ .word 0x3d2691ba, 0x27fdc19e
+ .word 0x3fcb7526, 0xa22e4000
+ .word 0x3d2c0dbf, 0x2e785490
+ .word 0x3fcb8ef6, 0x70420000
+ .word 0x3d387533, 0x321788e0
+ .word 0x3fcba8c1, 0x0ae46000
+ .word 0x3d3a32e2, 0x9eee9d85
+ .word 0x3fcbc286, 0x742d8000
+ .word 0x3d39ac53, 0xf39d121c
+ .word 0x3fcbdc46, 0xae344000
+ .word 0x3d3625b4, 0x023d6505
+ .word 0x3fcbf601, 0xbb0e4000
+ .word 0x3d2386a9, 0x47c378b5
+ .word 0x3fcc0fb7, 0x9ccfd000
+ .word 0x3d272000, 0xcc2eb551
+ .word 0x3fcc2968, 0x558c1000
+ .word 0x3d318146, 0x108e3ae0
+ .word 0x3fcc4313, 0xe754e000
+ .word 0x3d3279be, 0x74cad7d6
+ .word 0x3fcc5cba, 0x543ae000
+ .word 0x3d20929d, 0xecb454fc
+ .word 0x3fcc765b, 0x9e4d6000
+ .word 0x3d31ab6b, 0x36976f6c
+ .word 0x3fcc8ff7, 0xc79a9000
+ .word 0x3d344358, 0x4bb03de6
+ .word 0x3fcca98e, 0xd22f5000
+ .word 0x3d3e9673, 0xe735df63
+ .word 0x3fccc320, 0xc0176000
+ .word 0x3d240903, 0x9a653794
+ .word 0x3fccdcad, 0x935d1000
+ .word 0x3d3cbe01, 0xf966cb77
+ .word 0x3fccf635, 0x4e09c000
+ .word 0x3d277123, 0x9a07d55b
+ .word 0x3fcd0fb7, 0xf2255000
+ .word 0x3d3ca15a, 0x9bf3989b
+ .word 0x3fcd2935, 0x81b6b000
+ .word 0x3d1f363f, 0xb5d55685
+ .word 0x3fcd42ad, 0xfec35000
+ .word 0x3d3a28ff, 0xc09fef63
+ .word 0x3fcd5c21, 0x6b4fb000
+ .word 0x3d3722b7, 0x221acbf2
+ .word 0x3fcd758f, 0xc95ef000
+ .word 0x3d3a97bd, 0x5d2fa755
+ .word 0x3fcd8ef9, 0x1af31000
+ .word 0x3d3abbe8, 0x0f26ce1f
+ .word 0x3fcda85d, 0x620ce000
+ .word 0x3d240194, 0xc16cc7ec
+ .word 0x3fcdc1bc, 0xa0abe000
+ .word 0x3d38fac1, 0xa628ccc6
+ .word 0x3fcddb16, 0xd8ce9000
+ .word 0x3d384421, 0xa3bed1d1
+ .word 0x3fcdf46c, 0x0c722000
+ .word 0x3d3a5e82, 0xb0b79039
+ .word 0x3fce0dbc, 0x3d92a000
+ .word 0x3d359233, 0xf0529bf1
+ .word 0x3fce2707, 0x6e2af000
+ .word 0x3d172f4f, 0x543fff10
+ .word 0x3fce404d, 0xa034b000
+ .word 0x3d2cf022, 0x3ecbb0ce
+ .word 0x3fce598e, 0xd5a87000
+ .word 0x3d3c5d96, 0x861c2cec
+ .word 0x3fce72cb, 0x107da000
+ .word 0x3d1dd48c, 0xcdf5471c
+ .word 0x3fce8c02, 0x52aa5000
+ .word 0x3d34bfd2, 0x3f8b8c80
+ .word 0x3fcea534, 0x9e23a000
+ .word 0x3d381b93, 0x4c73ccb5
+ .word 0x3fcebe61, 0xf4dd7000
+ .word 0x3d3615d6, 0x67811ada
+ .word 0x3fced78a, 0x58ca8000
+ .word 0x3d16f1b5, 0x3793387e
+ .word 0x3fcef0ad, 0xcbdc5000
+ .word 0x3d326ca4, 0x31bca86e
+ .word 0x3fcf09cc, 0x50036000
+ .word 0x3d3da094, 0x18d999db
+ .word 0x3fcf22e5, 0xe72f1000
+ .word 0x3ce7561d, 0x7d037c19
+ .word 0x3fcf3bfa, 0x934d6000
+ .word 0x3d2d9f2a, 0x937b903b
+ .word 0x3fcf550a, 0x564b7000
+ .word 0x3d366e0e, 0x2fb6fe81
+ .word 0x3fcf6e15, 0x32153000
+ .word 0x3d0b2b44, 0x29d89c5c
+ .word 0x3fcf871b, 0x28955000
+ .word 0x3ce14052, 0xb5b2204b
+ .word 0x3fcfa01c, 0x3bb57000
+ .word 0x3d397823, 0x81478a1f
+ .word 0x3fcfb918, 0x6d5e3000
+ .word 0x3d3c551a, 0xaa8cd86f
+ .word 0x3fcfd20f, 0xbf76f000
+ .word 0x3d3b8ea9, 0x234e4064
+ .word 0x3fcfeb02, 0x33e60000
+ .word 0x3d2f316e, 0x32d5e8c7
+ .word 0x3fd001f7, 0xe6484000
+ .word 0x3d38a957, 0x40c9abbc
+ .word 0x3fd00e6c, 0x45ad5000
+ .word 0x3cdcc68d, 0x52e01203
+ .word 0x3fd01ade, 0x39139000
+ .word 0x3d4deed9, 0xe6647d5c
+ .word 0x3fd0274d, 0xc16c2000
+ .word 0x3d2979e8, 0x9cf835c2
+ .word 0x3fd033ba, 0xdfa74000
+ .word 0x3d0c30bc, 0x1485bdff
+ .word 0x3fd04025, 0x94b4d000
+ .word 0x3cf036b8, 0x9ef42d7f
+ .word 0x3fd04c8d, 0xe1841000
+ .word 0x3d4c0328, 0xb5da628f
+ .word 0x3fd058f3, 0xc703e000
+ .word 0x3d478bcc, 0xa196e4a9
+ .word 0x3fd06557, 0x46227000
+ .word 0x3d0131df, 0xb4868d6a
+ .word 0x3fd071b8, 0x5fcd5000
+ .word 0x3d421a3a, 0x2e0ff2f8
+ .word 0x3fd07e17, 0x14f1c000
+ .word 0x3d40819c, 0xd863da16
+ .word 0x3fd08a73, 0x667c5000
+ .word 0x3d3ebc1d, 0x40c5a329
+ .word 0x3fd096cd, 0x55591000
+ .word 0x3d3f998d, 0x20550a31
+ .word 0x3fd0a324, 0xe2739000
+ .word 0x3d0c6bee, 0x7ef4030e
+ .word 0x3fd0af7a, 0x0eb6c000
+ .word 0x3d23ccf9, 0x4945adad
+ .word 0x3fd0bbcc, 0xdb0d2000
+ .word 0x3d32f32c, 0xcc5dcdfb
+ .word 0x3fd0c81d, 0x4860a000
+ .word 0x3d40d218, 0x5ff17467
+ .word 0x3fd0d46b, 0x579ab000
+ .word 0x3d3d2c81, 0xf640e1e6
+ .word 0x3fd0e0b7, 0x09a43000
+ .word 0x3d32a038, 0xa7862f2a
+ .word 0x3fd0ed00, 0x5f657000
+ .word 0x3d4b48e2, 0xb5e955ff
+ .word 0x3fd0f947, 0x59c66000
+ .word 0x3d4356cf, 0x407bf3a5
+ .word 0x3fd1058b, 0xf9ae4000
+ .word 0x3d45aa31, 0x3f415699
+ .word 0x3fd111ce, 0x4003e000
+ .word 0x3d4c99b9, 0x1ed29693
+ .word 0x3fd11e0e, 0x2dad9000
+ .word 0x3d496e01, 0xdc0cc691
+ .word 0x3fd12a4b, 0xc3911000
+ .word 0x3d452c57, 0xcf5c66d4
+ .word 0x3fd13687, 0x0293a000
+ .word 0x3d4160bd, 0xb314c76f
+ .word 0x3fd142bf, 0xeb9a0000
+ .word 0x3d31ce61, 0x85b58a9e
+ .word 0x3fd14ef6, 0x7f886000
+ .word 0x3d40b42c, 0xd101b436
+ .word 0x3fd15b2a, 0xbf428000
+ .word 0x3d489c71, 0x2d927594
+ .word 0x3fd1675c, 0xababa000
+ .word 0x3d38380e, 0x731f55c4
+ .word 0x3fd1738c, 0x45a66000
+ .word 0x3d431c8b, 0x7fe69f45
+ .word 0x3fd17fb9, 0x8e150000
+ .word 0x3d42baba, 0x2c5aecbe
+ .word 0x3fd18be4, 0x85d93000
+ .word 0x3d3c167f, 0x6f3604ab
+ .word 0x3fd1980d, 0x2dd42000
+ .word 0x3d2b7b3a, 0x7a361c9a
+ .word 0x3fd1a433, 0x86e67000
+ .word 0x3d4e857a, 0xf9cb1f55
+ .word 0x3fd1b057, 0x91f07000
+ .word 0x3d46915c, 0xc91d50e9
+ .word 0x3fd1bc79, 0x4fd1c000
+ .word 0x3d419879, 0xc5c22c21
+ .word 0x3fd1c898, 0xc1699000
+ .word 0x3d43f5f7, 0x8d1cea80
+ .word 0x3fd1d4b5, 0xe796a000
+ .word 0x3d222a5b, 0xd197bac2
+ .word 0x3fd1e0d0, 0xc3371000
+ .word 0x3d3af8f2, 0xa9b0d4a0
+ .word 0x3fd1ece9, 0x5528a000
+ .word 0x3d4cf630, 0x9ec96b89
+ .word 0x3fd1f8ff, 0x9e48a000
+ .word 0x3d27946c, 0x040cbe77
+ .word 0x3fd20513, 0x9f73b000
+ .word 0x3cf6e15e, 0x1609e0a4
+ .word 0x3fd21125, 0x59861000
+ .word 0x3d382e78, 0xba2950c4
+ .word 0x3fd21d34, 0xcd5b9000
+ .word 0x3d3b552f, 0xb28badaa
+ .word 0x3fd22941, 0xfbcf7000
+ .word 0x3d42cb44, 0x850a7b4f
+ .word 0x3fd2354c, 0xe5bc8000
+ .word 0x3d414389, 0x7cfeacce
+ .word 0x3fd24155, 0x8bfd1000
+ .word 0x3d300fff, 0x3228fcad
+ .word 0x3fd24d5b, 0xef6ae000
+ .word 0x3d4ff114, 0x3f81b02a
+ .word 0x3fd25960, 0x10df7000
+ .word 0x3d38e7bc, 0x224ea3e3
+ .word 0x3fd26561, 0xf1338000
+ .word 0x3d38b488, 0x66faa45f
+ .word 0x3fd27161, 0x913f8000
+ .word 0x3d34f4f1, 0xf61564b4
+ .word 0x3fd27d5e, 0xf1db5000
+ .word 0x3d4e6dc8, 0xb8735361
+ .word 0x3fd2895a, 0x13de8000
+ .word 0x3d3a8d7a, 0xd24c13f0
+ .word 0x3fd29552, 0xf81ff000
+ .word 0x3d348d30, 0x1771c408
+ .word 0x3fd2a149, 0x9f762000
+ .word 0x3d479220, 0x57062a92
+ .word 0x3fd2ad3e, 0x0ab73000
+ .word 0x3d2b972e, 0x488c359f
+ .word 0x3fd2b930, 0x3ab89000
+ .word 0x3d4a493b, 0x4a5013d7
+ .word 0x3fd2c520, 0x304f8000
+ .word 0x3d230852, 0x8c342f39
+ .word 0x3fd2d10d, 0xec508000
+ .word 0x3d360c61, 0xf7088353
+ .word 0x3fd2dcf9, 0x6f8fd000
+ .word 0x3d20b4a2, 0x8e33c9ce
+ .word 0x3fd2e8e2, 0xbae11000
+ .word 0x3d4a6138, 0x5992350a
+ .word 0x3fd2f4c9, 0xcf17a000
+ .word 0x3d371f04, 0x9374b87b
+ .word 0x3fd300ae, 0xad063000
+ .word 0x3d342f56, 0x8b75fcac
+ .word 0x3fd30c91, 0x557f1000
+ .word 0x3d4d7ad4, 0xebd75d15
+ .word 0x3fd31871, 0xc9544000
+ .word 0x3d184fab, 0x94cecfd9
+ .word 0x3fd32450, 0x09570000
+ .word 0x3d3d271b, 0x9bdae59d
+ .word 0x3fd3302c, 0x16586000
+ .word 0x3d36217d, 0xc2a3e08b
+ .word 0x3fd33c05, 0xf128d000
+ .word 0x3d4b51be, 0x71fc7961
+ .word 0x3fd347dd, 0x9a987000
+ .word 0x3d4aa9ac, 0x8ace9fdc
+ .word 0x3fd353b3, 0x1376d000
+ .word 0x3d4d99ca, 0x0327b24d
+ .word 0x3fd35f86, 0x5c932000
+ .word 0x3d427c10, 0xd8af2d5b
+ .word 0x3fd36b57, 0x76bc1000
+ .word 0x3d116978, 0x5a9c223f
+ .word 0x3fd37726, 0x62bfd000
+ .word 0x3d40b5e4, 0xa9d627ef
+ .word 0x3fd382f3, 0x216c4000
+ .word 0x3d4df3c5, 0xbc5cb012
+ .word 0x3fd38ebd, 0xb38ed000
+ .word 0x3d290582, 0xe67d4ca0
+ .word 0x3fd39a86, 0x19f45000
+ .word 0x3d18ee51, 0x937354f5
+ .word 0x3fd3a64c, 0x55694000
+ .word 0x3d37a71c, 0xbcd735d0
+ .word 0x3fd3b210, 0x66b9b000
+ .word 0x3d461f09, 0x33f754f9
+ .word 0x3fd3bdd2, 0x4eb14000
+ .word 0x3d46d425, 0xb478c893
+ .word 0x3fd3c992, 0x0e1b2000
+ .word 0x3d141c28, 0xaa680b76
+ .word 0x3fd3d54f, 0xa5c1f000
+ .word 0x3d3c3e1c, 0xd9a395e3
+ .word 0x3fd3e10b, 0x16701000
+ .word 0x3d3f3bcf, 0x145429c7
+ .word 0x3fd3ecc4, 0x60ef5000
+ .word 0x3d4e9fd7, 0x9d83ecff
+ .word 0x3fd3f87b, 0x86093000
+ .word 0x3d451014, 0x55d3b3bc
+ .word 0x3fd40430, 0x8686a000
+ .word 0x3d3f8ef4, 0x3049f7d3
+ .word 0x3fd40fe3, 0x63303000
+ .word 0x3d3e5c5f, 0xe79f05c6
+ .word 0x3fd41b94, 0x1cce0000
+ .word 0x3d47dcb7, 0xf60de01c
+ .word 0x3fd42742, 0xb427d000
+ .word 0x3d433c6c, 0x7ea3ecc5
+ .word 0x3fd432ef, 0x2a04e000
+ .word 0x3d40276b, 0x3674752a
+ .word 0x3fd43e99, 0x7f2c1000
+ .word 0x3d1c3f72, 0x40c41a04
+ .word 0x3fd44a41, 0xb463c000
+ .word 0x3d31ee28, 0xf37cf612
+ .word 0x3fd455e7, 0xca720000
+ .word 0x3d1ad8c6, 0x36629aed
+ .word 0x3fd4618b, 0xc21c5000
+ .word 0x3d4d84fa, 0x16f66f66
+ .word 0x3fd46d2d, 0x9c280000
+ .word 0x3d359b27, 0x5f67f75a
+ .word 0x3fd478cd, 0x5959b000
+ .word 0x3d2ec89b, 0xf0c8d098
+ .word 0x3fd4846a, 0xfa75b000
+ .word 0x3d4a7057, 0x47219c8d
+ .word 0x3fd49006, 0x80400000
+ .word 0x3d43a198, 0x00f2f83a
+ .word 0x3fd49b9f, 0xeb7c1000
+ .word 0x3d3dac1c, 0x58ab60d7
+ .word 0x3fd4a737, 0x3cecf000
+ .word 0x3d432ee5, 0x8a0655db
+ .word 0x3fd4b2cc, 0x75555000
+ .word 0x3d43f81a, 0x1c3a02db
+ .word 0x3fd4be5f, 0x95777000
+ .word 0x3d4141b6, 0x993293ee
+ .word 0x3fd4c9f0, 0x9e152000
+ .word 0x3d487888, 0x63c7f488
+ .word 0x3fd4d57f, 0x8fefe000
+ .word 0x3d23f926, 0x7fd06868
+ .word 0x3fd4e10c, 0x6bc8a000
+ .word 0x3cf8283f, 0x1636f061
+ .word 0x3fd4ec97, 0x32600000
+ .word 0x3d234d7a, 0xaf04d104
+ .word 0x3fd4f81f, 0xe4763000
+ .word 0x3d4a00c2, 0x6f2c03dd
+ .word 0x3fd503a6, 0x82cb1000
+ .word 0x3d4965cd, 0xc3a41929
+ .word 0x3fd50f2b, 0x0e1e0000
+ .word 0x3d3a0940, 0x8c47b8d8
+ .word 0x3fd51aad, 0x872df000
+ .word 0x3d405a13, 0x927ac19f
+ .word 0x3fd5262d, 0xeeb98000
+ .word 0x3d40f230, 0x47bb5b00
+ .word 0x3fd531ac, 0x457ee000
+ .word 0x3d3df83b, 0x7d931501
+ .word 0x3fd53d28, 0x8c3bd000
+ .word 0x3d4ddd8d, 0x029240a7
+ .word 0x3fd548a2, 0xc3add000
+ .word 0x3d23167e, 0x63081cf7
+ .word 0x3fd5541a, 0xec91b000
+ .word 0x3d4f3f4a, 0xa91c688a
+ .word 0x3fd55f91, 0x07a43000
+ .word 0x3d4dc337, 0x10e416b4
+ .word 0x3fd56b05, 0x15a18000
+ .word 0x3d29247b, 0xbc4a23fc
+ .word 0x3fd57677, 0x17455000
+ .word 0x3d44d8a9, 0x356d941b
+ .word 0x3fd581e7, 0x0d4b2000
+ .word 0x3d4c19c3, 0xc9da4e1c
+ .word 0x3fd58d54, 0xf86e0000
+ .word 0x3d2791f3, 0x0a795215
+ .word 0x3fd598c0, 0xd9687000
+ .word 0x3d43d05b, 0x4793492e
+ .word 0x3fd5a42a, 0xb0f4c000
+ .word 0x3d4fc338, 0xa1a4108b
+ .word 0x3fd5af92, 0x7fccd000
+ .word 0x3d4c7f9a, 0x01400711
+ .word 0x3fd5baf8, 0x46aa1000
+ .word 0x3d46328b, 0x83c602e0
+ .word 0x3fd5c65c, 0x06459000
+ .word 0x3d4300fc, 0xff3f88cd
+ .word 0x3fd5d1bd, 0xbf580000
+ .word 0x3d4394a1, 0x1b1c1ee4
+ .word 0x3fd5dd1d, 0x7299b000
+ .word 0x3d43a84f, 0x3bf518f5
+ .word 0x3fd5e87b, 0x20c29000
+ .word 0x3d3527d1, 0x8f7738fa
+ .word 0x3fd5f3d6, 0xca8a2000
+ .word 0x3d37af84, 0x8e19cc75
+ .word 0x3fd5ff30, 0x70a79000
+ .word 0x3d2e9e43, 0x9f105039
+ .word 0x3fd60a88, 0x13d1a000
+ .word 0x3d36e9b9, 0xc879af55
+ .word 0x3fd615dd, 0xb4bec000
+ .word 0x3d13c7ca, 0x90bc04b2
+ .word 0x3fd62131, 0x5424e000
+ .word 0x3d463e81, 0xdaacbccc
+ .word 0x3fd62c82, 0xf2b9c000
+ .word 0x3d3e54bd, 0xbd7c8a98
+ .word 0x3fd637d2, 0x91329000
+ .word 0x3d450450, 0x865165ea
+ .word 0x3fd64320, 0x30444000
+ .word 0x3d3efe02, 0x7a01d7df
+ .word 0x3fd64e6b, 0xd0a35000
+ .word 0x3d2afe80, 0x69d61295
+ .word 0x3fd659b5, 0x7303e000
+ .word 0x3d1f281d, 0xb0af8efc
+ .word 0x3fd664fd, 0x1819b000
+ .word 0x3d418e55, 0xe463b5fe
+ .word 0x3fd67042, 0xc0983000
+ .word 0x3d4c6148, 0xdbdcf10d
+ .word 0x3fd67b86, 0x6d327000
+ .word 0x3d438fd6, 0x3ea11c64
+ .word 0x3fd686c8, 0x1e9b1000
+ .word 0x3d32bb11, 0x0af84054
+ .word 0x3fd69207, 0xd5845000
+ .word 0x3d43a44f, 0x4861e4ab
+ .word 0x3fd69d45, 0x92a03000
+ .word 0x3d38b1bd, 0xbf97ffa6
+ .word 0x3fd6a881, 0x56a03000
+ .word 0x3d420e9b, 0xd9d37351
+ .word 0x3fd6b3bb, 0x22359000
+ .word 0x3d30f625, 0x7a933268
+ .word 0x3fd6bef2, 0xf6111000
+ .word 0x3d48f8fc, 0x947d5965
+ .word 0x3fd6ca28, 0xd2e34000
+ .word 0x3d430ad0, 0xb8c49166
+ .word 0x3fd6d55c, 0xb95c3000
+ .word 0x3d39b9c8, 0xae9a6ee2
+ .word 0x3fd6e08e, 0xaa2ba000
+ .word 0x3d1e38c1, 0x39318d71
+ .word 0x3fd6ebbe, 0xa600e000
+ .word 0x3d4cce14, 0xc7dd17dd
+ .word 0x3fd6f6ec, 0xad8b2000
+ .word 0x3d249058, 0xfdf08376
+ .word 0x3fd70218, 0xc178e000
+ .word 0x3d42a947, 0x0e225428
+ .word 0x3fd70d42, 0xe2789000
+ .word 0x3d21aead, 0x337ee287
+ .word 0x3fd7186b, 0x11381000
+ .word 0x3d1934e2, 0x677d272b
+ .word 0x3fd72391, 0x4e650000
+ .word 0x3d0c1d52, 0xbdc87d8a
+ .word 0x3fd72eb5, 0x9aac9000
+ .word 0x3d4dd010, 0xd08a7a15
+!! TBL - end
+
+! constants:
+ .align 64
+CONSTANTS:
+ .word 0x40000000,0x00000000
+ .word 0x3fe55555,0x555571da
+ .word 0x3fd99999,0x8702be3a
+ .word 0x3fd24af7,0x3f4569b1
+ .word 0x3ea62e42,0xfee00000 ! scaled by 2**-20
+ .word 0x3caa39ef,0x35793c76 ! scaled by 2**-20
+ .word 0xfffffc00,0x00000000 ! ELEVENBIT
+ .word 0x43200000
+ .word 0xfff00000
+ .word 0xc0190200 ! ELEVENBIT
+ .word 0x0200 ! ELEVENBIT
+
+#define two 0x00
+#define A1 0x08
+#define A2 0x10
+#define A3 0x18
+#define ln2hi 0x20
+#define ln2lo 0x28
+#define mask 0x30
+#define ox43200000 0x38
+#define oxfff00000 0x3c
+#define oxc0194000 0x40
+#define ox4000 0x44
+
+
+! local storage indices
+
+#define jnk STACK_BIAS-0x8
+#define tmp2 STACK_BIAS-0x10
+#define tmp1 STACK_BIAS-0x18
+#define tmp0 STACK_BIAS-0x20
+#define tmp3 STACK_BIAS-0x28
+#define tmp4 STACK_BIAS-0x30
+#define tmp5 STACK_BIAS-0x38
+#define tmp6 STACK_BIAS-0x40
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x40
+
+! register use
+
+! i0 n
+! i1 x
+! i2 stridex
+! i3 y
+! i4 stridey
+! i5
+
+! g1 TBL
+
+! l0 j0
+! l1 j1
+! l2 j2
+! l3
+! l4 0x94000
+! l5 CONSTANTS
+! l6 0x000fffff
+! l7 0x7ff00000
+
+! o0 py0
+! o1 py1
+! o2 py2
+! o3 used in primary range bounds check
+! o4 used in primary range bounds check
+! o5 used in .rangeI check section as temporary
+! o7 NOT USED
+
+! f0 u0,q0
+! f2 v0,(two-v0)-u0,z0
+! f4 n0,f0,q0
+! f6 s0
+! f8 q
+! f10 u1,q1
+! f12 v1,(two-v1)-u1,z1
+! f14 n1,f1,q1
+! f16 s1
+! f18 t ! now tmp0 storage
+! f20 u2,q2
+! f22 v2,(two-v2)-u2,q2
+! f24 n2,f2,q2
+! f26 s2
+! f28 0xfff00000
+! f29 0x43200000
+! f30 0x4000
+! f31 0xc0194000
+! f32 t0
+! f34 h0,f0-(c0-h0)
+! f36 c0
+! f38 A1
+! f40 two
+! f42 t1
+! f44 h1,f1-(c1-h1)
+! f46 c1
+! f48 A2
+! f50 0xffff8000... or 0xfffffc00 for 6 or 11 bit tbl resp
+! f52 t2
+! f54 h2,f2-(c2-h2)
+! f56 c2
+! f58 A3 now tmp1 storage
+! f60 ln2hi
+! f62 ln2lo
+!--------------------------------------------------------------------
+!--------------------------------------------------------------------
+! PREFETCH info
+#define PREFETCH_MULT_READS 0
+!--------------------------------------------------------------------
+!--------------------------------------------------------------------
+! define pipes for easier reading
+
+#define ICNT %i0
+
+#define XPTR %i1
+#define XSTR %i2
+#define YPTR %i3
+#define YSTR %i4
+
+#define RANGE_LO %l6
+#define RANGE_HI %l7
+
+#define P0_X1 %f0
+#define P0_f1 %f1
+#define P0_f2 %f2
+#define P0_f3 %f3
+#define P0_f4 %f4
+#define P0_f5 %f5
+#define P0_f6 %f6
+#define P0_f7 %f7
+!#define P0_f8 %f8
+#define T0_f8 %f8
+#define P0_f9 %f9
+
+#define P1_X2 %f10
+#define P1_f11 %f11
+#define P1_f12 %f12
+#define P1_f13 %f13
+#define P1_f14 %f14
+#define P1_f15 %f15
+#define P1_f16 %f16
+#define P1_f17 %f17
+
+!#define P1_f18 %f18
+#define T1_f18 %f18
+
+#define P1_f19 %f19
+
+#define P2_X3 %f20
+#define P2_f21 %f21
+#define P2_f22 %f22
+#define P2_f23 %f23
+#define P2_f24 %f24
+#define P2_f25 %f25
+#define P2_f26 %f26
+#define P2_f27 %f27
+#define INF_f28 %f28
+#define CONSTE432_f29 %f29
+
+#define CONST_f30 %f30
+
+#define TTOPMSK %f31
+
+#define P0_f32 %f32
+#define P0_f34 %f34
+#define P0_f36 %f36
+
+#define P1_f42 %f42
+#define P1_f44 %f44
+#define P1_f46 %f46
+
+#define P2_f52 %f52
+#define P2_f54 %f54
+#define P2_f56 %f56
+
+#define G1_TBL %g1
+#define L5_CONSTANTS %l5
+#define FP40_TWO %f40
+#define FP38_A1 %f38
+#define FP48_A2 %f48
+#define FP50_MASK %f50
+!!!#define FP58_A3 %f58
+#define T2_f58 %f58
+#define FP60_LN2HI %f60
+#define FP62_LN2LO %f62
+
+
+!--------------------------------------------------------------------
+
+ ENTRY(__vlog_ultra3)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,CONSTANTS,l5)
+ PIC_SET(l7,TBL,o0)
+ mov %o0,%g1
+ wr %g0,0x82,%asi ! set %asi for non-faulting loads
+
+ ld [XPTR],%l0 ! quickly !X1
+
+ sethi %hi(0x90200),%l4 ! ELEVENBIT
+ or %l4,%lo(0x90200),%l4 ! ELEVENBIT
+ ldd [XPTR],P0_X1 ! u.l[0] = *x !X1
+ sethi %hi(0x000fffff),RANGE_LO
+ or RANGE_LO,%lo(0x000fffff),RANGE_LO
+ sethi %hi(0x7ff00000),RANGE_HI
+ ldd [L5_CONSTANTS+two],FP40_TWO
+ fzero P1_X2
+ fzero P2_X3
+ ldd [L5_CONSTANTS+A1],FP38_A1
+ ldd [L5_CONSTANTS+A2],FP48_A2
+ ldd [L5_CONSTANTS+ln2hi],FP60_LN2HI
+ ldd [L5_CONSTANTS+ln2lo],FP62_LN2LO
+ ldd [L5_CONSTANTS+mask],FP50_MASK
+ ld [L5_CONSTANTS+ox43200000],CONSTE432_f29
+ ld [L5_CONSTANTS+oxfff00000],INF_f28
+ ld [L5_CONSTANTS+oxc0194000],TTOPMSK
+ fpadd32s P0_X1,TTOPMSK,P0_f2 ! X+TTOP !X1 START
+ ld [L5_CONSTANTS+ox4000],CONST_f30
+ sll XSTR,3,XSTR ! scale strides
+ sll YSTR,3,YSTR
+ add %fp,jnk,%o0 ! precondition loop
+ fands P0_f2,INF_f28,P0_f2 ! (X+TTOP)&INF->n X1
+! st P0_X1,[%fp+tmp0] !BYPASS in
+ fzero P0_f4
+ fzero P0_f6
+! ld [%fp+tmp0],%l0 !BYPASS out ix X1
+ add %fp,jnk,%o1
+ add %fp,jnk,%o2
+ fzero P0_f32
+ fzero P0_f34
+ fzero P0_f36
+ fzero P1_f12
+ sub %l0,RANGE_HI,%o3 ! bounds for X1
+ sub RANGE_LO,%l0,%o4 ! bounds for X1
+ fzero P1_f14
+ fzero P1_f16
+ sub YPTR,YSTR,YPTR
+ fzero P1_f42
+ mov %g0,%l1 ! zero out for first pass
+ mov %g0,%l2 ! zero out for first pass
+ fzero P1_f44
+ fzero P1_f46
+ fzero T0_f8
+ fzero T1_f18
+ fzero T2_f58
+ fzero P2_f24
+ fzero P2_f26
+ fzero P2_f52
+ fzero P2_f54
+ fzero P2_f56
+ ba .loop0
+ std P2_f26,[%fp+tmp2]
+
+ .align 16
+! -- 16 byte aligned
+.loop0:
+!############################# AREA 1 (0-19) ###################################!
+!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 1.1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 0
+
+ fmuld P1_f44,FP48_A2,P1_f46 ! s^2,A2 ! X2-2
+ andcc %o3,%o4,%o4 ! X1
+ bge,pn %icc,.range0 ! ix<=0x000fffff or >=0x7ff00000 ! X1
+! delay slot
+ nop
+ ! x , n , reduction
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 1
+ fpsub32s P0_X1,P0_f2,P0_X1 ! X - n -> x ! X1
+ add XPTR,XSTR,XPTR ! x += stridex
+ add YPTR,YSTR,YPTR ! y += stridey !
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 2
+.cont0:
+ ! n*l2lo , lylo
+ faddd P0_f4,P0_f34,P0_f34 !n*l2lo,lylo ! X1-2
+ ! TBL calc
+ add %l0,%l4,%l0 ! j = ix + 0x94000 X1
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 3
+ fsubd FP40_TWO,P2_f24,P2_f24 ! two - xT ! X3-2
+
+
+!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 1.2 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 4
+ ! round up redunction
+ fpadd32s P0_X1,CONST_f30,P0_f4 ! x round up X1
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 5
+ ! s ( poly + ( 2-xT-x)), n*l2lo+lylo
+ faddd P0_f36,P0_f34,P0_f36 ! + n*l2lo+lylo X1-2
+ ! n*l2hi
+ fmuld T0_f8,FP60_LN2HI,T0_f8 ! n*l2hi ! X1-2
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 6
+ fmuld T1_f18,FP62_LN2LO,P1_f12 ! n*l2lo ! X2
+ faddd P1_f46,FP38_A1,P1_f46 ! (s^2*A2), A1 X2-2
+ ! TBL calc
+ srl %l0,10,%l0 ! j=(j>>11)&0x1f0 !ELEVENBIT ! X1
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 7
+ fsubd P2_f24,P2_X3,P2_f24 ! (two - xT) - x ! !X3-2
+
+!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 1.3 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 8
+ ldda [XPTR]%asi,P1_X2 ! X2-nextX START
+ ! x-roundedup & 0xffff8000 -> xT i.e 11bit value of x
+ fand P0_f4,FP50_MASK,P0_f4 ! xT ! X1
+
+
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 9
+ faddd P0_f36,P0_f32,P0_f36 ! + (x-xT) X1-2
+ and %l0,0x3ff,%l0 ! ELEVENBIT ! X1
+ st P1_X2,[%fp+tmp0] !BYPASS in ! X2
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 10
+ fmuld P1_f46,P1_f44,P1_f46 ! s^2*A2+A1 , s^2 X2-2
+ ldd [G1_TBL+%l1],P1_f44 !lylo ! X2-2
+ sub %l1,8,%l1 ! get back ptr to lyhi X2-2
+ faddd P1_f12,P1_f44,P1_f44 !n*l2lo,lylo ! X2-2
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 11
+
+!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 1.4 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 12
+ faddd P0_f36,P0_f6,P0_f36 ! + lyhi X1-2
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 13
+ ! x+xT
+ faddd P0_X1,P0_f4,P0_f6 ! x + xT ! X1
+ ! TBL calc
+ sll %l0,4,%l0 ! ELEVENBIT ! X1
+
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 14
+ faddd P1_f46,P1_f14,P1_f46 ! (s^2*A2+A1)s^2 + (2-xT-x) X2-2
+
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 15
+ fpadd32s P1_X2,TTOPMSK,P1_f12 ! X + TTOP ! X2
+ ld [%fp+tmp0],%l3 !BYPASS out ! X2
+
+!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 1.5 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 16
+ ! x-xT
+ fsubd P0_X1,P0_f4,P0_f32 ! x-xT ! X1
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 17
+ faddd P0_f36,T0_f8,P0_f36 ! + n*l2hi X1-2
+ ! TBL+1
+ add %l0,8,%l0 ! X1
+
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 18
+ fmuld P1_f16,P1_f46,P1_f46 ! s*(POLY) ! X2-2
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 19
+ fands P1_f12,INF_f28,P1_f12 ! X2
+ fmuld P2_f26,P2_f26,P2_f54 ! z = s * s ! !X3-2
+
+!############################# AREA 2 (20#39) ###################################!
+!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 2.1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 20
+ ! (x-xT) / (x+xT) => s
+ fdivd P0_f32,P0_f6,P0_f6 ! -> s ! X1
+ faddd P1_f46,P1_f44,P1_f46 ! + n*l2lo+lylo X2-2
+ ldd [G1_TBL+%l1],P1_f44 ! ld lyhi ! X2-2
+ mov %l3,%l1 ! BYPASS temp ! X2
+ ! wrap !!! done for X0
+ std P0_f36,[%o0] ! X1-2 FINI
+ mov YPTR,%o0 ! X1-2 INC
+
+ addcc ICNT,-1,ICNT !
+ ble,pn %icc,.endloop0 !
+! delay slot
+ nop
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 21
+! -- 16 byte aligned
+.loop1:
+ sub %l1,RANGE_HI,%o3 ! bounds for X2
+ sub RANGE_LO,%l1,%o4 ! bounds for X2
+ andcc %o3,%o4,%o4 ! X2
+ bge,pn %icc,.range1 ! ix<=0x000fffff or >=0x7ff00000 ! X2
+! delay slot
+ nop
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 22
+ fpsub32s P1_X2,P1_f12,P1_X2 ! X - n -> x ! X2
+ add XPTR,XSTR,XPTR ! x += stridex
+ add YPTR,YSTR,YPTR ! y += stridey !
+.cont1:
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 23
+ fmuld P2_f54,FP48_A2,P2_f56 ! s^2,A2! X3-2
+
+!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 2.2 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 24
+ ! n to double
+ fitod P0_f2,T0_f8 ! (double) n ! X1
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 25
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 26
+ faddd P1_f46,P1_f42,P1_f46 ! + (x-xT) X2-2
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 27
+ fpadd32s P1_X2,CONST_f30,P1_f14 ! x round up X2
+ faddd P2_f56,FP38_A1,P2_f56 ! (s^2*A2), A1 X3-2
+
+!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 2.3 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 28
+ ! 2 , xT
+ fsubd FP40_TWO,P0_f4,P0_f4 ! two - xT ! X1
+ fmuld T1_f18,FP60_LN2HI,T1_f18 ! n*l2hi ! X2-2
+ ldda [XPTR]%asi,P2_X3 ! X3-nextX START
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 29
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 30
+ faddd P1_f46,P1_f44,P1_f46 ! + lyhi X2-2
+ st P2_X3,[%fp+tmp0] !BYPASS in ! X3
+
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 31
+ fand P1_f14,FP50_MASK,P1_f14 ! xT ! X2
+ fmuld P2_f56,P2_f54,P2_f56 ! s^2*A2+A1 , s^2 X3-2
+ ldd [G1_TBL+%l2],P2_f54 !lylo ! X3
+ sub %l2,8,%l2 ! back to TBL hi ! X3
+ add %l1,%l4,%l1 ! j = ix + 0x94000 X2
+
+!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 2.4 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 32
+ ! 2-xT , x
+ fsubd P0_f4,P0_X1,P0_f4 ! (two - xT) - x ! !X1
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 33
+ fpadd32s P2_X3,TTOPMSK,P2_f22 ! X + TTOP ! X3
+
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 34
+ faddd P1_f46,T1_f18,P1_f46 ! + n*l2hi X2-2
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 35
+ faddd P1_X2,P1_f14,P1_f16 ! x + xT ! X2
+ srl %l1,10,%l1 ! j=(j>>11)&0x1f0 !ELEVENBIT ! X2
+ faddd P2_f56,P2_f24,P2_f56 ! + 2-xT-x X3-2
+
+
+!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 2.5 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 36
+ fitod P1_f12,T1_f18 ! (double) n ! X2
+ fmuld T2_f58,FP62_LN2LO,P2_f24 ! n*l2lo ! X3-2
+
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 37
+ fands P2_f22,INF_f28,P2_f22 ! X3
+ ld [%fp+tmp0],%l3 !BYPASS out ! X3
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 38
+ std P1_f46,[%o1] ! X2-2 FINI
+ mov YPTR,%o1 ! X2-2 INC
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 39
+ fsubd P1_X2,P1_f14,P1_f42 ! x-xT ! X2
+ fmuld P2_f26,P2_f56,P2_f56 ! s*(POLY) ! X3-2
+ ldd [G1_TBL+%l2],P2_f26 ! ld lyhi ! X3
+ mov %l3,%l2 ! BYPASS for X3 ! X3
+ and %l1,0x3ff,%l1 ! ELEVENBIT ! X2
+
+!############################# AREA 3 (40#59) ###################################!
+!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 3.1 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 40
+ faddd P2_f24,P2_f54,P2_f54 !n*l2lo,lylo ! X3-2
+ ! s , s
+ fmuld P0_f6,P0_f6,P0_f34 ! z = s * s ! !X1
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 41
+ fdivd P1_f42,P1_f16,P1_f16 ! -> s ! X2
+! -- 16 byte aligned
+ addcc ICNT,-1,ICNT !
+ ble,pn %icc,.endloop1 !
+ nop
+.loop2:
+
+ sub %l2,RANGE_HI,%o3 ! bounds for X3
+ sub RANGE_LO,%l2,%o4 ! bounds for X3
+ andcc %o3,%o4,%o4 ! X3
+ bge,pn %icc,.range2 ! ix<=0x000fffff or >=0x7ff00000 ! X3
+! delay slot
+ nop
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 42
+ fpsub32s P2_X3,P2_f22,P2_X3 ! X - n -> x ! X3
+ add XPTR,XSTR,XPTR ! x += stridex
+ add YPTR,YSTR,YPTR ! y += stridey !
+.cont2:
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 43
+ sll %l1,4,%l1 ! ELEVENBIT ! X2
+ fmuld T2_f58,FP60_LN2HI,T2_f58 ! n*l2hi ! X3-2
+ faddd P2_f56,P2_f54,P2_f56 ! + n*l2lo+lylo X3-2
+
+
+!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 3.2 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 44
+ ! s^ , A2
+ fmuld P0_f34,FP48_A2,P0_f36 ! s^2,A2 ! X1
+ fsubd FP40_TWO,P1_f14,P1_f14 ! two - xT ! X2
+ add %l2,%l4,%l2 ! j = ix + 0x94000 X3
+ srl %l2,10,%l2 ! j=(j>>11)&0x1f0 !ELEVENBIT ! X3
+ ldda [XPTR]%asi,P0_X1 ! X1-nextX START
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 45
+ st P0_X1,[%fp+tmp0] !BYPASS in ! X1-nextX
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 46
+ fpadd32s P2_X3,CONST_f30,P2_f24 ! x round up X3
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 47
+ add %l1,8,%l1 ! X2
+ faddd P2_f56,P2_f52,P2_f56 ! + (x-xT) X3-2
+
+!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 3.3 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 48
+ ! s^2*A2 , A1
+ faddd P0_f36,FP38_A1,P0_f36 ! (s^2*A2), A1 X1
+
+ and %l2,0x3ff,%l2 ! ELEVENBIT ! X3
+
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 49
+ fsubd P1_f14,P1_X2,P1_f14 ! (two - xT) - x ! !X2
+
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 50
+ fand P2_f24,FP50_MASK,P2_f24 ! xT ! X3
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 51
+ faddd P2_f56,P2_f26,P2_f56 ! + lyhi X3-2
+
+!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 3.4 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 52
+ ! s^2*A2+A1 , s^2
+ fmuld P0_f36,P0_f34,P0_f36 ! s^2*A2+A1 , s^2 X1
+ fpadd32s P0_X1,TTOPMSK,P0_f2 ! X + TTOP ! X1-nextX
+ sll %l2,4,%l2 ! ELEVENBIT ! X3
+
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 53
+ ! lylo
+ ldd [G1_TBL+%l0],P0_f34 !lylo ! X1
+ add %l0,-8,%l0 !lyhi pointer ! X1
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 54
+ faddd P2_X3,P2_f24,P2_f26 ! x + xT ! X3
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 55
+ faddd P2_f56,T2_f58,P2_f56 ! + n*l2hi X3-2
+
+!>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 3.5 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 56
+ ! s^2(s^2*A1+A1) + (2-xT-x)
+ faddd P0_f36,P0_f4,P0_f36 ! X1
+ add %l2,8,%l2 ! TBL+8 is TBL lo ! X3
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 57
+ ! X+TTOP & INF -> n
+ fands P0_f2,INF_f28,P0_f2 ! X1-nextX
+ ! n * l2lo
+ fmuld T0_f8,FP62_LN2LO,P0_f4 ! n*l2lo ! X1
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 58
+ fsubd P2_X3,P2_f24,P2_f52 ! x-xT ! X3
+!BEST ld [%fp+tmp0],%l3 !BYPASS out ! X1-nextX
+ ld [%fp+tmp0],%l3 !BYPASS out ! X1-nextX
+
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 59
+ fitod P2_f22,T2_f58 ! (double) n ! X3
+ std P2_f56,[%o2] ! X3 FINI
+ mov YPTR,%o2 ! X3 INC
+
+!############################# AREA 4 (OVERFLOW) ###################################!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 60
+ ! s * (s^2(s^2*A1+A1) + (2-xT-x))
+ fmuld P0_f6,P0_f36,P0_f36 ! s*(POLY) ! X1
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 61
+ fmuld P1_f16,P1_f16,P1_f44 ! z = s * s ! !X2
+ ! lyhi
+ ldd [G1_TBL+%l0],P0_f6 ! ld lyhi ! X1
+ mov %l3,%l0 ! BYPASS tmp for X1 ! X1
+ sub %l0,RANGE_HI,%o3 ! bounds for X1
+ sub RANGE_LO,%l0,%o4 ! bounds for X1
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 62
+ addcc ICNT,-1,ICNT !
+! FALL THROUGH if running out of X array here
+ bg,pt %icc,.loop0 !62
+! delay slot
+ fdivd P2_f52,P2_f26,P2_f26 ! -> s ! X3
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!CYCLE 63
+!LOSTC
+
+
+
+
+
+ ! Once we get to the last element, we loop three more times to finish
+ ! the computations in progress. This means we will load past the end
+ ! of the argument vector, but since we use non-faulting loads and never
+ ! use the data, the only potential problem is cache miss. (Strictly
+ ! speaking, since we pad the argument vector with twos, we incorrectly
+ ! raise inexact if the actual argument vector is all ones.)
+ .endloop2:
+ sethi %hi(0x40000000),%l0 ! "next argument" = two
+ sub %l0,RANGE_HI,%o3 ! bnds chk x1 !54
+ sub RANGE_LO,%l0,%o4 ! bounds chk x1 !54
+ fmovd FP40_TWO,P0_X1
+ cmp ICNT,-3
+ bg,a,pt %icc,.loop0
+ ! delay slot
+ fpadd32s P0_X1,TTOPMSK,P0_f2 ! n=(ix+0xc0194000)&0xfff00000
+ ret
+ restore
+
+ .align 16
+ .endloop0:
+ sethi %hi(0x40000000),%l1 ! "next argument" = two
+ fmovd FP40_TWO,P1_X2
+ cmp ICNT,-3
+ bg,a,pt %icc,.loop1
+ ! delay slot
+ fpadd32s P1_X2,TTOPMSK,P1_f12 ! n=(ix+0xc0194000)&0xfff00000
+ ret
+ restore
+
+ .align 16
+ .endloop1:
+ sethi %hi(0x40000000),%l2 ! "next argument" = two
+ fmovd FP40_TWO,P2_X3
+ cmp ICNT,-3
+ bg,a,pt %icc,.loop2
+ ! delay slot
+ fpadd32s P2_X3,TTOPMSK,P2_f22 ! n=(ix+0xc0194000)&0xfff00000
+ ret
+ restore
+
+
+ .align 16
+ .range0:
+ cmp %l0,RANGE_HI
+ bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000
+ ! delay slot
+ ld [XPTR+4],%o5
+ !THERE
+ fxtod P0_X1,P0_X1 ! scale by 2**1074 w/o trapping
+ st P0_X1,[%fp+tmp0] !BYPASS in
+ add XPTR,XSTR,XPTR ! x += stridex
+ orcc %l0,%o5,%g0
+ be,pn %icc,1f ! if x == 0
+ ! delay slot
+ add YPTR,YSTR,YPTR ! y += stridey
+ ! HERE
+ fpadd32s P0_X1,TTOPMSK,P0_f2 ! n = (ix + 0xc0194000) & 0xfff00000
+ fands P0_f2,INF_f28,P0_f2
+ fpsub32s P0_X1,P0_f2,P0_X1 ! u.l[0] -= n
+ ld [%fp+tmp0],%l0 !BYPASS out
+ ba,pt %icc,.cont0
+ ! delay slot
+ fpsub32s P0_f2,CONSTE432_f29,P0_f2 ! n -= 0x43200000
+ 1:
+ fdivs CONSTE432_f29,P0_f1,P0_f2 ! raise div-by-zero
+ ba,pt %icc,3f
+ ! delay slot
+ st INF_f28,[YPTR] ! store -inf
+ 2:
+ sll %l0,1,%l0 ! lop off sign bit
+ add XPTR,XSTR,XPTR ! x += stridex
+ orcc %l0,%o5,%g0
+ be,pn %icc,1b ! if x == -0
+ ! delay slot
+ add YPTR,YSTR,YPTR ! y += stridey
+ fzero P0_f2 ! *y = (x < 0.0? 0.0 : x) * inf
+ fcmpd %fcc0,P0_X1,P0_f2
+ fmovdl %fcc0,P0_f2,P0_X1
+ fand INF_f28,FP50_MASK,P0_f2
+ fnegd P0_f2,P0_f2
+ fmuld P0_X1,P0_f2,P0_X1
+ st P0_X1,[YPTR]
+ 3:
+ addcc ICNT,-1,ICNT
+ ble,pn %icc,.endloop2
+ ! delay slot
+ st P0_f1,[YPTR+4]
+ ld [XPTR],%l0 ! get next argument
+ sub %l0,RANGE_HI,%o3 ! bnds chk x1 !54
+ sub RANGE_LO,%l0,%o4 ! bounds chk x1 !54
+ ldd [XPTR],P0_X1
+ fpadd32s P0_X1,TTOPMSK,P0_f2 ! n=(ix+0xc0194000)&0xfff00000
+ ba,pt %icc,.loop0
+ ! delay slot
+ fands P0_f2,INF_f28,P0_f2 !58
+
+
+ .align 16
+ .range1:
+ cmp %l1,RANGE_HI
+ bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000
+ ! delay slot
+ ld [XPTR+4],%o5
+ fxtod P1_X2,P1_X2 ! scale by 2**1074 w/o trapping
+ st P1_X2,[%fp+tmp1]
+ add XPTR,XSTR,XPTR ! x += stridex
+ orcc %l1,%o5,%g0
+ be,pn %icc,1f ! if x == 0
+ ! delay slot
+ add YPTR,YSTR,YPTR ! y += stridey
+ fpadd32s P1_X2,TTOPMSK,P1_f12 ! n = (ix + 0xc0194000) & 0xfff00000
+ fands P1_f12,INF_f28,P1_f12
+ fpsub32s P1_X2,P1_f12,P1_X2 ! u.l[0] -= n
+ ld [%fp+tmp1],%l1
+ ba,pt %icc,.cont1
+ ! delay slot
+ fpsub32s P1_f12,CONSTE432_f29,P1_f12 ! n -= 0x43200000
+ 1:
+ fdivs CONSTE432_f29,P1_f11,P1_f12 ! raise div-by-zero
+ ba,pt %icc,3f
+ ! delay slot
+ st INF_f28,[YPTR] ! store -inf
+ 2:
+ sll %l1,1,%l1 ! lop off sign bit
+ add XPTR,XSTR,XPTR ! x += stridex
+ orcc %l1,%o5,%g0
+ be,pn %icc,1b ! if x == -0
+ ! delay slot
+ add YPTR,YSTR,YPTR ! y += stridey
+ fzero P1_f12 ! *y = (x < 0.0? 0.0 : x) * inf
+ fcmpd %fcc0,P1_X2,P1_f12
+ fmovdl %fcc0,P1_f12,P1_X2
+ fand INF_f28,FP50_MASK,P1_f12
+ fnegd P1_f12,P1_f12
+ fmuld P1_X2,P1_f12,P1_X2
+ st P1_X2,[YPTR]
+ 3:
+ addcc ICNT,-1,ICNT
+ ble,pn %icc,.endloop0
+ ! delay slot
+ st P1_f11,[YPTR+4]
+ ld [XPTR],%l1 ! get next argument
+ ldd [XPTR],P1_X2
+ fpadd32s P1_X2,TTOPMSK,P1_f12 ! X + TTOP
+ ba,pt %icc,.loop1
+ ! delay slot
+ fands P1_f12,INF_f28,P1_f12 ! & INF
+
+
+ .align 16
+.range2:
+ cmp %l2,RANGE_HI
+ bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000
+! delay slot
+ ld [XPTR+4],%o5
+ fxtod P2_X3,P2_X3 ! scale by 2**1074 w/o trapping
+ st P2_X3,[%fp+tmp2]
+ add XPTR,XSTR,XPTR ! x += stridex
+ orcc %l2,%o5,%g0
+ be,pn %icc,1f ! if x == 0
+! delay slot
+ add YPTR,YSTR,YPTR ! y += stridey
+ fpadd32s P2_X3,TTOPMSK,P2_f22 ! n = (ix + 0xc0194000) & 0xfff00000
+ fands P2_f22,INF_f28,P2_f22
+ fpsub32s P2_X3,P2_f22,P2_X3 ! u.l[0] -= n
+ ld [%fp+tmp2],%l2
+ ba,pt %icc,.cont2
+! delay slot
+ fpsub32s P2_f22,CONSTE432_f29,P2_f22 ! n -= 0x43200000
+1:
+ fdivs CONSTE432_f29,P2_f21,P2_f22 ! raise div-by-zero
+ ba,pt %icc,3f
+! delay slot
+ st INF_f28,[YPTR] ! store -inf
+2:
+ sll %l2,1,%l2 ! lop off sign bit
+ add XPTR,XSTR,XPTR ! x += stridex
+ orcc %l2,%o5,%g0
+ be,pn %icc,1b ! if x == -0
+! delay slot
+ add YPTR,YSTR,YPTR ! y += stridey
+ fzero P2_f22 ! *y = (x < 0.0? 0.0 : x) * inf
+ fcmpd %fcc0,P2_X3,P2_f22
+ fmovdl %fcc0,P2_f22,P2_X3
+ fand INF_f28,FP50_MASK,P2_f22
+ fnegd P2_f22,P2_f22
+ fmuld P2_X3,P2_f22,P2_X3
+ st P2_X3,[YPTR]
+3:
+ addcc ICNT,-1,ICNT
+ ble,pn %icc,.endloop1
+! delay slot
+ st P2_f21,[YPTR+4]
+ ld [XPTR],%l2 ! get next argument
+ ldd [XPTR],P2_X3
+ fpadd32s P2_X3,TTOPMSK,P2_f22 ! X + TTOP
+ ba,pt %icc,.loop2
+! delay slot
+ fands P2_f22,INF_f28,P2_f22 ! X3
+ nop !ld [XPTR+4],P2_f21
+
+ SET_SIZE(__vlog_ultra3)
+
diff --git a/usr/src/libm/src/mvec/vis/__vlogf.S b/usr/src/libm/src/mvec/vis/__vlogf.S
new file mode 100644
index 0000000..a6fcd21
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vlogf.S
@@ -0,0 +1,1276 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vlogf.S 1.11 06/01/23 SMI"
+
+ .file "__vlogf.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 64
+!! CONST_TBL[2*i] = 127*log(2) - log(1+i/32), i = [0, 32]
+!! CONST_TBL[2*i+1] = 2**(-23)/(1+i/32), i = [0, 32]
+
+.CONST_TBL:
+ .word 0x405601e6, 0x78fc457b, 0x3e800000, 0x00000000,
+ .word 0x4055ffee, 0x4f4b5df8, 0x3e7f07c1, 0xf07c1f08,
+ .word 0x4055fe05, 0x32e4434f, 0x3e7e1e1e, 0x1e1e1e1e,
+ .word 0x4055fc2a, 0x44598c21, 0x3e7d41d4, 0x1d41d41d,
+ .word 0x4055fa5c, 0xb720babf, 0x3e7c71c7, 0x1c71c71c,
+ .word 0x4055f89b, 0xcf803581, 0x3e7bacf9, 0x14c1bad0,
+ .word 0x4055f6e6, 0xe0c3f1b1, 0x3e7af286, 0xbca1af28,
+ .word 0x4055f53d, 0x4badcb50, 0x3e7a41a4, 0x1a41a41a,
+ .word 0x4055f39e, 0x7d18782e, 0x3e799999, 0x9999999a,
+ .word 0x4055f209, 0xecc5965c, 0x3e78f9c1, 0x8f9c18fa,
+ .word 0x4055f07f, 0x1c5099d5, 0x3e786186, 0x18618618,
+ .word 0x4055eefd, 0x9641645e, 0x3e77d05f, 0x417d05f4,
+ .word 0x4055ed84, 0xed3a291d, 0x3e7745d1, 0x745d1746,
+ .word 0x4055ec14, 0xbb3ced72, 0x3e76c16c, 0x16c16c17,
+ .word 0x4055eaac, 0xa10589ab, 0x3e7642c8, 0x590b2164,
+ .word 0x4055e94c, 0x45758439, 0x3e75c988, 0x2b931057,
+ .word 0x4055e7f3, 0x550f85e3, 0x3e755555, 0x55555555,
+ .word 0x4055e6a1, 0x818078ec, 0x3e74e5e0, 0xa72f0539,
+ .word 0x4055e556, 0x8134aae1, 0x3e747ae1, 0x47ae147b,
+ .word 0x4055e412, 0x0ef783b7, 0x3e741414, 0x14141414,
+ .word 0x4055e2d3, 0xe99c9674, 0x3e73b13b, 0x13b13b14,
+ .word 0x4055e19b, 0xd3b0f9d9, 0x3e73521c, 0xfb2b78c1,
+ .word 0x4055e069, 0x9333fb26, 0x3e72f684, 0xbda12f68,
+ .word 0x4055df3c, 0xf1565bd0, 0x3e729e41, 0x29e4129e,
+ .word 0x4055de15, 0xba3f64fa, 0x3e724924, 0x92492492,
+ .word 0x4055dcf3, 0xbcd73219, 0x3e71f704, 0x7dc11f70,
+ .word 0x4055dbd6, 0xca95a75a, 0x3e71a7b9, 0x611a7b96,
+ .word 0x4055dabe, 0xb7559927, 0x3e715b1e, 0x5f75270d,
+ .word 0x4055d9ab, 0x592bb896, 0x3e711111, 0x11111111,
+ .word 0x4055d89c, 0x8840e4fe, 0x3e70c971, 0x4fbcda3b,
+ .word 0x4055d792, 0x1eaf8df0, 0x3e708421, 0x08421084,
+ .word 0x4055d68b, 0xf863da3d, 0x3e704104, 0x10410410,
+ .word 0x4055d589, 0xf2fe5107, 0x3e700000, 0x00000000,
+ .word 0xbfcffb16, 0xbfa3db6e, ! K3 = -2.49850123953105416108e-01
+ .word 0x3fd5561b, 0xa4b3110b, ! K2 = 3.33380614127478394992e-01
+ .word 0xbfe00000, 0x0b666d0b, ! K1 = -5.00000021234343492201e-01
+ .word 0x3fefffff, 0xff3fd118, ! K0 = 9.99999998601683029714e-01
+ .word 0x3fe62e42, 0xfefa39ef, ! LN2 = 6.931471805599452862e-01
+ .word 0xbf800000, 0x7f800000, ! MONE = -1.0f ; INF
+
+! local storage indices
+#define tmp0 STACK_BIAS-0x8
+#define tmp1 STACK_BIAS-0x10
+#define tmp2 STACK_BIAS-0x18
+#define tmp3 STACK_BIAS-0x20
+#define tmp4 STACK_BIAS-0x28
+#define tmp5 STACK_BIAS-0x30
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x30
+
+#define ZERO %f28
+#define K3 %f30
+#define K2 %f32
+#define K1 %f34
+#define K0 %f36
+#define LN2 %f38
+
+#define stridex %o0
+#define stridex2 %o1
+#define stridey %o2
+#define x0 %o3
+#define x1 %o4
+#define y %o5
+
+#define ind0 %i0
+#define ind1 %i1
+#define ind2 %i2
+#define ind3 %i3
+#define MASK_0x007fffff %i4
+#define MASK_0xfffc0000 %i5
+#define CONST_0x20000 %o7
+#define MASK_0x7f800000 %l3
+
+#define ival0 %l0
+#define iy0 %l1
+#define ival1 %l2
+#define iy1 %l1
+#define ival2 %l4
+#define iy2 %l5
+#define ival3 %l6
+#define iy3 %l2
+#define counter %l7
+
+#define LOGFTBL %g5
+#define LOGFTBL_P8 %g1
+
+! register use
+
+! i0 ind0
+! i1 ind1
+! i2 ind2
+! i3 ind3
+! i4 0x007fffff
+! i5 0xfffc0000
+
+! l0 ival0
+! l1 iy0, iy1
+! l2 ival1, iy3
+! l3 0x7f800000
+! l4 ival2
+! l5 iy2
+! l6 ival3
+! l7 cycle counter
+
+! o0 stridex
+! o1 stridex * 2
+! o2 stridey
+! o3 x
+! o4 x
+! o5 y
+! o7 0x20000
+
+! g1 CONST_TBL
+! g5 CONST_TBL + 8
+
+! f2
+! f4
+! f6
+! f8
+! f9
+! f10
+! f12
+! f14
+! f16
+! f18
+! f19
+! f20
+! f22
+! f24
+! f26
+! f28 ZERO = 0
+! f30 K3 = -2.49850123953105416108e-01
+! f32 K2 = 3.33380614127478394992e-01
+! f34 K1 = -5.00000021234343492201e-01
+! f36 K0 = 9.99999998601683029714e-01
+! f38 LN2 = 6.931471805599452862e-01
+! f40
+! f42
+! f44
+! f46
+! f48
+! f50
+! f52
+! f54
+! f56
+! f58
+! f60
+! f62
+
+
+! !!!!! Algorithm !!!!!
+!
+! double exp, ty, yy, ldtmp0, ldtmp1;
+! double dtmp0, dtmp1, dtmp2, dtmp3, dtmp4, dtmp5;
+! float value;
+! int ival, iy, i, ind, iexp;
+! double K3 = -2.49850123953105416108e-01;
+! double K2 = 3.33380614127478394992e-01;
+! double K1 = -5.00000021234343492201e-01;
+! double K0 = 9.99999998601683029714e-01;
+! double LN2 = 6.931471805599452862e-01;
+! double ZERO = 0;
+! float INF;
+!
+! ival = *(int*)(x);
+! if (ival >= 0x7f800000) goto spec;
+! if (ival <= 0x7fffff) goto spec;
+! *(float*)&*(float*)&exp = *(float*)(x);
+! exp = vis_fpack32(ZERO, exp);
+! iy = ival & 0x007fffff;
+! ival = iy + 0x20000;
+! ival = ival & 0xfffc0000;
+! i = ival >> 14;
+! ind = i & (-8);
+! iy = iy - ival;
+! ty = LN2 * (double)(*(int*)&exp);
+! ldtmp0 = *(double*)((char*)CONST_TBL+ind);
+! ldtmp1 = *(double*)((char*)CONST_TBL+ind+8);
+! ty = ty - ldtmp0;
+! yy = (double) iy;
+! yy = yy * ldtmp1;
+! dtmp0 = K3 * yy;
+! dtmp1 = dtmp0 + K2;
+! dtmp2 = dtmp1 * yy;
+! dtmp3 = dtmp2 + K1;
+! dtmp4 = dtmp3 * yy;
+! dtmp5 = dtmp4 + K0;
+! yy = dtmp5 * yy;
+! yy = yy + ty;
+! y[0] = (float)(yy);
+! return;
+!
+!spec:
+! if ((ival & 0x7fffffff) >= 0x7f800000) { /* X = NaN or Inf */
+! value = *(float*) &ival;
+! y[0] = (value < 0.0f? 0.0f : value) * value;
+! return;
+! } else if (ival <= 0) {
+! y[0] = ((ival & 0x7fffffff) == 0) ?
+! -1.0f / 0f. : 0f. /0f.; /* X = +-0 : X < 0 */
+! return;
+! } else { /* Denom. number */
+! value = (float) ival;
+! ival = *(int*) &value;
+! iexp = (ival >> 23) - 149;
+! iy = ival & 0x007fffff;
+! ival = iy + 0x20000;
+! ival = ival & 0xfffc0000;
+! i = ival >> 14;
+! ind = i & (-8);
+! iy = iy - ival;
+! ty = LN2 * (double)iexp;
+! ldtmp0 = *(double*)((char*)CONST_TBL+ind);
+! ldtmp1 = *(double*)((char*)CONST_TBL+ind+8);
+! ty = ty - ldtmp0;
+! yy = (double) iy;
+! yy = yy * ldtmp1;
+! dtmp0 = K3 * yy;
+! dtmp1 = dtmp0 + K2;
+! dtmp2 = dtmp1 * yy;
+! dtmp3 = dtmp2 + K1;
+! dtmp4 = dtmp3 * yy;
+! dtmp5 = dtmp4 + K0;
+! yy = dtmp5 * yy;
+! yy = yy + ty;
+! y[0] = (float)(yy);
+! return;
+! }
+!--------------------------------------------------------------------
+
+ ENTRY(__vlogf)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,.CONST_TBL,g5)
+ wr %g0,0,%gsr
+
+ st %i0,[%fp+tmp0]
+ stx %i1,[%fp+tmp5]
+
+ sra %i2,0,%l4
+ ldd [LOGFTBL+528],K3
+ add %i3,0,y
+ sllx %l4,2,stridex
+ sllx %l4,3,stridex2
+ ldd [LOGFTBL+536],K2
+ sra %i4,0,%l3
+ ldd [LOGFTBL+544],K1
+ sllx %l3,2,stridey
+ sethi %hi(0x7ffc00),MASK_0x007fffff
+ add MASK_0x007fffff,1023,MASK_0x007fffff
+ ldd [LOGFTBL+552],K0
+ sethi %hi(0xfffc0000),MASK_0xfffc0000
+ ldd [LOGFTBL+560],LN2
+ sethi %hi(0x20000),CONST_0x20000
+ fzero ZERO
+ sethi %hi(0x7f800000),MASK_0x7f800000
+ sub y,stridey,y
+
+.begin:
+ ld [%fp+tmp0],counter
+ ldx [%fp+tmp5],x0
+ st %g0,[%fp+tmp0]
+.begin1:
+ add x0,stridex2,x1! x += 2*stridex
+ subcc counter,1,counter
+ bneg,pn %icc,.end
+ lda [x0]0x82,ival0 ! (Y0_0) ival = *(int*)(x)
+
+ add LOGFTBL,8,LOGFTBL_P8
+ lda [stridex+x0]0x82,ival1 ! (Y1_0) ival = *(int*)(x)
+
+ cmp ival0,MASK_0x7f800000 ! (Y0_0) if (ival >= 0x7f800000)
+ lda [x1]0x82,ival2 ! (Y2_0) ival = *(int*)(x);
+
+ bge,pn %icc,.spec ! (Y0_0) if (ival >= 0x7f800000)
+ nop
+
+ cmp ival0,MASK_0x007fffff ! (Y0_0) if (ival <= 0x7fffff)
+ ble,pn %icc,.spec ! (Y0_0) if (ival <= 0x7fffff)
+ nop
+
+ cmp ival1,MASK_0x7f800000 ! (Y1_0) if (ival >= 0x7f800000)
+ and ival0,MASK_0x007fffff,iy0 ! (Y0_0) iy = ival & 0x007fffff
+
+
+ add iy0,CONST_0x20000,ival0 ! (Y0_0) ival = iy + 0x20000
+
+ and ival0,MASK_0xfffc0000,ival0 ! (Y0_0) ival = ival & 0xfffc0000
+ bge,pn %icc,.update2 ! (Y1_0) if (ival >= 0x7f800000)
+ nop
+.cont2:
+ sub iy0,ival0,iy0 ! (Y0_0) iy = iy - ival
+ cmp ival1,MASK_0x007fffff ! (Y1_0) if (ival <= 0x7fffff)
+ lda [stridex+x1]0x82,ival3 ! (Y3_0) ival = *(int*)(x)
+
+ st iy0,[%fp+tmp1] ! (Y0_0) (double) iy
+ ble,pn %icc,.update3 ! (Y1_0) if (ival <= 0x7fffff)
+ nop
+.cont3:
+ cmp ival2,MASK_0x7f800000 ! (Y2_0) if (ival >= 0x7f800000)
+ and ival1,MASK_0x007fffff,iy1 ! (Y1_0) iy = ival & 0x007fffff
+ bge,pn %icc,.update4 ! (Y2_0) if (ival >= 0x7f800000)
+ nop
+.cont4:
+ cmp ival2,MASK_0x007fffff ! (Y2_0) if (ival <= 0x7fffff)
+ ble,pn %icc,.update5 ! (Y2_0) if (ival <= 0x7fffff)
+ nop
+.cont5:
+ add iy1,CONST_0x20000,ival1 ! (Y1_0) ival = iy + 0x20000
+ and ival2,MASK_0x007fffff,iy2 ! (Y2_0) iy = ival & 0x007fffff
+
+ and ival1,MASK_0xfffc0000,ival1 ! (Y1_0) ival = ival & 0xfffc0000
+ add iy2,CONST_0x20000,ival2 ! (Y2_0) ival = iy + 0x20000
+
+ sub iy1,ival1,iy1 ! (Y1_0) iy = iy - ival
+ and ival2,MASK_0xfffc0000,ival2 ! (Y2_0) ival = ival & 0xfffc0000
+
+ cmp ival3,MASK_0x7f800000 ! (Y3_0) (ival >= 0x7f800000)
+ sub iy2,ival2,iy2 ! (Y2_0) iy = iy - ival
+ st iy1,[%fp+tmp3] ! (Y1_0) (double) iy
+
+ st iy2,[%fp+tmp2] ! (Y2_0) (double) iy
+ bge,pn %icc,.update6 ! (Y3_0) (ival >= 0x7f800000)
+ nop
+.cont6:
+ cmp ival3,MASK_0x007fffff ! (Y3_0) if (ival <= 0x7fffff)
+ ld [%fp+tmp1],%f2 ! (Y0_0) (double) iy
+ ble,pn %icc,.update7 ! (Y3_0) if (ival <= 0x7fffff)
+ sra ival0,14,ival0 ! (Y0_0) i = ival >> 14;
+.cont7:
+ sra ival1,14,ind1 ! (Y1_0) i = ival >> 14;
+ ld [%fp+tmp3],%f4 ! (Y1_0) (double) iy
+
+ sra ival2,14,ival2 ! (Y2_0) i = ival >> 14;
+ and ival0,-8,ind0 ! (Y0_0) ind = i & (-8)
+ lda [x0]0x82,%f6 ! (Y0_0) *(float*)&exp = *(float*)(x)
+
+ and ind1,-8,ind1 ! (Y1_0) ind = i & (-8)
+ ldd [LOGFTBL_P8+ind0],%f14 ! (Y0_0) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8)
+ fitod %f2,%f48 ! (Y0_0) yy = (double) iy
+
+ and ival3,MASK_0x007fffff,iy3 ! (Y3_0) iy = ival & 0x007fffff
+ lda [stridex+x0]0x82,%f8 ! (Y1_0) *(float*)&exp = *(float*)(x)
+
+ add iy3,CONST_0x20000,ival3 ! (Y3_0) iy + 0x20000
+ ldd [LOGFTBL_P8+ind1],%f16 ! (Y1_0) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8)
+ fitod %f4,%f26 ! (Y1_0) yy = (double) iy
+
+ sub y,stridey,y ! y += stridey
+ and ival3,MASK_0xfffc0000,ival3 ! (Y3_0) ival = ival & 0xfffc0000
+ lda [x1]0x82,%f10 ! (Y2_0) *(float*)&exp = *(float*)(x)
+
+ add x1,stridex2,x0 ! x += 2*stridex
+ sub iy3,ival3,iy3 ! (Y3_0) iy = iy - ival
+ ld [%fp+tmp2],%f2 ! (Y2_0) (double) iy
+ fmuld %f48,%f14,%f46 ! (Y0_0) yy = yy * ldtmp1
+
+ lda [stridex+x1]0x82,%f12 ! (Y3_0) *(float*)&exp = *(float*)(x)
+ fmuld %f26,%f16,%f62 ! (Y1_0) yy = yy * ldtmp1
+
+ sra ival3,14,ival3 ! (Y3_0) i = ival >> 14;
+ lda [x0]0x82,ival0 ! (Y0_1) ival = *(int*)(x)
+
+ add x0,stridex2,x1 ! x += 2*stridex
+ st iy3,[%fp+tmp3] ! (Y3_0) (double) iy
+ fmuld K3,%f46,%f22 ! (Y0_0) dtmp0 = K3 * yy
+
+ and ival2,-8,ind2 ! (Y2_0) ind = i & (-8)
+ lda [stridex+x0]0x82,ival1 ! (Y1_1) ival = *(int*)(x)
+
+ cmp ival0,MASK_0x7f800000 ! (Y0_1) if (ival >= 0x7f800000)
+ lda [x1]0x82,ival2 ! (Y2_1) ival = *(int*)(x);
+ fmuld K3,%f62,%f50 ! (Y1_0) dtmp0 = K3 * yy
+
+ bge,pn %icc,.update8 ! (Y0_1) if (ival >= 0x7f800000)
+ nop
+.cont8:
+ cmp ival0,MASK_0x007fffff ! (Y0_1) if (ival <= 0x7fffff)
+ ble,pn %icc,.update9 ! (Y0_1) if (ival <= 0x7fffff)
+ faddd %f22,K2,%f48 ! (Y0_0) dtmp1 = dtmp0 + K2
+
+.cont9:
+ cmp ival1,MASK_0x7f800000 ! (Y1_1) if (ival >= 0x7f800000)
+ and ival0,MASK_0x007fffff,iy0 ! (Y0_1) iy = ival & 0x007fffff
+
+ add iy0,CONST_0x20000,ival0 ! (Y0_1) ival = iy + 0x20000
+ ldd [LOGFTBL_P8+ind2],%f14 ! (Y2_0) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8);
+ fpack32 ZERO,%f6,%f6 ! (Y0_0) exp = vis_fpack32(ZERO, exp)
+
+ and ival0,MASK_0xfffc0000,ival0 ! (Y0_1) ival = ival & 0xfffc0000
+ faddd %f50,K2,%f26 ! (Y1_0) dtmp1 = dtmp0 + K2
+ bge,pn %icc,.update10 ! (Y1_1) if (ival >= 0x7f800000)
+ nop
+.cont10:
+ sub iy0,ival0,iy0 ! (Y0_1) iy = iy - ival
+ and ival3,-8,ind3 ! (Y3_0) ind = i & (-8)
+ ld [%fp+tmp3],%f4 ! (Y3_0) (double) iy
+
+ cmp ival1,MASK_0x007fffff ! (Y1_1) if (ival <= 0x7fffff)
+ lda [stridex+x1]0x82,ival3 ! (Y3_1) ival = *(int*)(x)
+ fmuld %f48,%f46,%f50 ! (Y0_0) dtmp2 = dtmp1 * yy
+ fitod %f2,%f48 ! (Y2_0) yy = (double) iy
+
+ st iy0,[%fp+tmp1] ! (Y0_1) (double) iy
+ ble,pn %icc,.update11 ! (Y1_1) if (ival <= 0x7fffff)
+ nop
+.cont11:
+ cmp ival2,MASK_0x7f800000 ! (Y2_1) if (ival >= 0x7f800000)
+ and ival1,MASK_0x007fffff,iy1 ! (Y1_1) iy = ival & 0x007fffff
+ bge,pn %icc,.update12 ! (Y2_1) if (ival >= 0x7f800000)
+ fmuld %f26,%f62,%f42 ! (Y1_0) dtmp2 = dtmp1 * yy
+.cont12:
+ cmp ival2,MASK_0x007fffff ! (Y2_1) if (ival <= 0x7fffff)
+ ldd [LOGFTBL_P8+ind3],%f16 ! (Y3_0) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8)
+ ble,pn %icc,.update13 ! (Y2_1) if (ival <= 0x7fffff)
+ fitod %f4,%f26 ! (Y3_0) yy = (double) iy
+.cont13:
+ add iy1,CONST_0x20000,ival1 ! (Y1_1) ival = iy + 0x20000
+ and ival2,MASK_0x007fffff,iy2 ! (Y2_1) iy = ival & 0x007fffff
+
+ and ival1,MASK_0xfffc0000,ival1 ! (Y1_1) ival = ival & 0xfffc0000
+ add iy2,CONST_0x20000,ival2 ! (Y2_1) ival = iy + 0x20000
+ fmuld %f48,%f14,%f44 ! (Y2_0) yy = yy * ldtmp1
+ faddd %f50,K1,%f50 ! (Y0_0) dtmp3 = dtmp2 + K1
+
+ cmp ival3,MASK_0x7f800000 ! (Y3_1) if (ival >= 0x7f800000)
+ sub iy1,ival1,iy1 ! (Y1_1) iy = iy - ival
+ and ival2,MASK_0xfffc0000,ival2 ! (Y2_1) ival = ival & 0xfffc0000
+ fpack32 ZERO,%f8,%f8 ! (Y1_0) exp = vis_fpack32(ZERO, exp)
+
+ sub iy2,ival2,iy2 ! (Y2_1) iy = iy - ival
+ st iy1,[%fp+tmp3] ! (Y1_1) (double) iy
+ fmuld %f26,%f16,%f60 ! (Y3_0) yy = yy * ldtmp1
+ faddd %f42,K1,%f54 ! (Y1_0) dtmp3 = dtmp2 + K1
+
+ st iy2,[%fp+tmp2] ! (Y2_1) (double) iy
+ fmuld K3,%f44,%f22 ! (Y2_0) dtmp0 = K3 * yy
+ bge,pn %icc,.update14 ! (Y3_1) if (ival >= 0x7f800000)
+ fitod %f6,%f40 ! (Y0_0) (double)(*(int*)&exp)
+.cont14:
+ cmp ival3,MASK_0x007fffff ! (Y3_1) if (ival <= 0x7fffff)
+ ldd [LOGFTBL+ind1],%f58 ! (Y1_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind)
+ fmuld %f50,%f46,%f52 ! (Y0_0) dtmp4 = dtmp3 * yy
+ fitod %f8,%f56 ! (Y1_0) (double)(*(int*)&exp)
+
+ ld [%fp+tmp1],%f2 ! (Y0_1) (double) iy
+ fmuld K3,%f60,%f50 ! (Y3_0) dtmp0 = K3 * yy
+ ble,pn %icc,.update15 ! (Y3_1) if (ival <= 0x7fffff)
+ nop
+.cont15:
+ subcc counter,7,counter
+ fmuld %f54,%f62,%f54 ! (Y1_0) dtmp4 = dtmp3 * yy
+
+ sra ival0,14,ival0 ! (Y0_1) i = ival >> 14;
+ bneg,pn %icc,.tail
+ faddd %f22,K2,%f48 ! (Y2_0) dtmp1 = dtmp0 + K2
+
+ ba .main_loop
+ nop
+
+ .align 16
+.main_loop:
+ sra ival2,14,ival2 ! (Y2_1) i = ival >> 14;
+ ldd [LOGFTBL+ind0],%f42 ! (Y0_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind)
+ fmuld LN2,%f40,%f40 ! (Y0_0) ty = LN2 * (double)(*(int*)&exp)
+ faddd %f52,K0,%f22 ! (Y0_0) dtmp5 = dtmp4 + K0
+
+ sra ival1,14,ind1 ! (Y1_1) i = ival >> 14;
+ ld [%fp+tmp3],%f4 ! (Y1_1) (double) iy
+ fpack32 ZERO,%f10,%f18 ! (Y2_0) exp = vis_fpack32(ZERO, exp)
+ faddd %f50,K2,%f26 ! (Y3_0) dtmp1 = dtmp0 + K2
+
+ and ival0,-8,ind0 ! (Y0_1) ind = i & (-8)
+ lda [x0]0x82,%f6 ! (Y0_1) *(float*)&exp = *(float*)(x)
+ fmuld LN2,%f56,%f56 ! (Y1_0) LN2 * (double)(*(int*)&exp)
+ faddd %f54,K0,%f24 ! (Y1_0) dtmp5 = dtmp4 + K0
+
+ and ind1,-8,ind1 ! (Y1_1) ind = i & (-8)
+ ldd [LOGFTBL_P8+ind0],%f14 ! (Y0_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8)
+ fmuld %f48,%f44,%f50 ! (Y2_0) dtmp2 = dtmp1 * yy
+ fitod %f2,%f48 ! (Y0_1) yy = (double) iy
+
+ and ival3,MASK_0x007fffff,iy3 ! (Y3_1) iy = ival & 0x007fffff
+ lda [stridex+x0]0x82,%f8 ! (Y1_1) *(float*)&exp = *(float*)(x)
+ fmuld %f22,%f46,%f22 ! (Y0_0) yy = dtmp5 * yy
+ fsubd %f40,%f42,%f40 ! (Y0_0) ty = ty - ldtmp0
+
+ add iy3,CONST_0x20000,ival3 ! (Y3_1) iy + 0x20000
+ ldd [LOGFTBL_P8+ind1],%f16 ! (Y1_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8)
+ fmuld %f26,%f60,%f42 ! (Y3_0) dtmp2 = dtmp1 * yy
+ fitod %f4,%f26 ! (Y1_1) yy = (double) iy
+
+ and ival3,MASK_0xfffc0000,ival3 ! (Y3_1) ival = ival & 0xfffc0000
+ lda [x1]0x82,%f10 ! (Y2_1) *(float*)&exp = *(float*)(x)
+ fmuld %f24,%f62,%f24 ! (Y1_0) yy = dtmp5 * yy
+ fsubd %f56,%f58,%f58 ! (Y1_0) ty = ty - ldtmp0
+
+ sub iy3,ival3,iy3 ! (Y3_1) iy = iy - ival
+ ld [%fp+tmp2],%f2 ! (Y2_1) (double) iy
+ fmuld %f48,%f14,%f46 ! (Y0_1) yy = yy * ldtmp1
+ faddd %f50,K1,%f50 ! (Y2_0) dtmp3 = dtmp2 + K1
+
+ add x1,stridex2,x0 ! x += 2*stridex
+ st iy3,[%fp+tmp3] ! (Y3_1) (double) iy
+ fpack32 ZERO,%f12,%f20 ! (Y3_0) exp = vis_fpack32(ZERO, exp)
+ faddd %f22,%f40,%f48 ! (Y0_0) yy = yy + ty
+
+ add y,stridey,y ! y += stridey
+ lda [stridex+x1]0x82,%f12 ! (Y3_1) *(float*)&exp = *(float*)(x)
+ fmuld %f26,%f16,%f62 ! (Y1_1) yy = yy * ldtmp1
+ faddd %f42,K1,%f54 ! (Y3_0) dtmp3 = dtmp2 + K1
+
+ sra ival3,14,ival3 ! (Y3_1) i = ival >> 14;
+ add y,stridey,y ! y += stridey
+ lda [x0]0x82,ival0 ! (Y0_2) ival = *(int*)(x)
+ faddd %f24,%f58,%f24 ! (Y1_0) yy = yy + ty
+
+ add x0,stridex2,x1 ! x += 2*stridex
+ ldd [LOGFTBL+ind2],%f42 ! (Y2_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind)
+ fmuld K3,%f46,%f22 ! (Y0_1) dtmp0 = K3 * yy
+ fitod %f18,%f40 ! (Y2_0) (double)(*(int*)&exp)
+
+ and ival2,-8,ind2 ! (Y2_1) ind = i & (-8)
+ lda [stridex+x0]0x82,ival1 ! (Y1_2) ival = *(int*)(x)
+ fmuld %f50,%f44,%f52 ! (Y2_0) dtmp4 = dtmp3 * yy
+ fitod %f20,%f56 ! (Y3_0) (double)(*(int*)&exp)
+
+ cmp ival0,MASK_0x7f800000 ! (Y0_2) if (ival >= 0x7f800000)
+ lda [x1]0x82,ival2 ! (Y2_2) ival = *(int*)(x);
+ fmuld K3,%f62,%f50 ! (Y1_1) dtmp0 = K3 * yy
+ fdtos %f48,%f4 ! (Y0_0) (float)(yy)
+
+ st %f4,[y] ! (Y0_0) write into memory
+ fmuld %f54,%f60,%f54 ! (Y3_0) dtmp4 = dtmp3 * yy
+ bge,pn %icc,.update16 ! (Y0_2) if (ival >= 0x7f800000)
+ fdtos %f24,%f4 ! (Y1_0) (float)(yy)
+.cont16:
+ cmp ival0,MASK_0x007fffff ! (Y0_2) if (ival <= 0x7fffff
+ ldd [LOGFTBL+ind3],%f58 ! (Y3_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind)
+ ble,pn %icc,.update17 ! (Y0_2) if (ival <= 0x7fffff
+ faddd %f22,K2,%f48 ! (Y0_1) dtmp1 = dtmp0 + K2
+.cont17:
+ cmp ival1,MASK_0x7f800000 ! (Y1_2) if (ival >= 0x7f800000)
+ and ival0,MASK_0x007fffff,iy0 ! (Y0_2) iy = ival & 0x007fffff
+ st %f4,[stridey+y] ! (Y1_0) write into memory
+ fmuld LN2,%f40,%f40 ! (Y2_0) ty = LN2 * (double)(*(int*)&exp)
+
+ add iy0,CONST_0x20000,ival0 ! (Y0_2) ival = iy + 0x20000
+ ldd [LOGFTBL_P8+ind2],%f14 ! (Y2_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8);
+ faddd %f52,K0,%f22 ! (Y2_0) dtmp5 = dtmp4 + K0
+ fpack32 ZERO,%f6,%f6 ! (Y0_1) exp = vis_fpack32(ZERO, exp)
+
+ and ival0,MASK_0xfffc0000,ival0 ! (Y0_2) ival = ival & 0xfffc0000
+ faddd %f50,K2,%f26 ! (Y1_1) dtmp1 = dtmp0 + K2
+ bge,pn %icc,.update18 ! (Y1_2) if (ival >= 0x7f800000)
+ fmuld LN2,%f56,%f56 ! (Y3_0) ty = LN2 * (double)(*(int*)&exp)
+.cont18:
+ sub iy0,ival0,iy0 ! (Y0_2) iy = iy - ival
+ and ival3,-8,ind3 ! (Y3_1) ind = i & (-8)
+ ld [%fp+tmp3],%f4 ! (Y3_1) (double) iy
+ faddd %f54,K0,%f24 ! (Y3_0) dtmp5 = dtmp4 + K0
+
+ cmp ival1,MASK_0x007fffff ! (Y1_2) if (ival <= 0x7fffff)
+ lda [stridex+x1]0x82,ival3 ! (Y3_2) ival = *(int*)(x)
+ fmuld %f48,%f46,%f50 ! (Y0_1) dtmp2 = dtmp1 * yy
+ fitod %f2,%f48 ! (Y2_1) yy = (double) iy
+
+ st iy0,[%fp+tmp1] ! (Y0_2) (double) iy
+ fmuld %f22,%f44,%f22 ! (Y2_0) yy = dtmp5 * yy
+ ble,pn %icc,.update19 ! (Y1_2) if (ival <= 0x7fffff)
+ fsubd %f40,%f42,%f40 ! (Y2_0) ty = ty - ldtmp0
+.cont19:
+ cmp ival2,MASK_0x7f800000 ! (Y2_2) if (ival >= 0x7f800000)
+ and ival1,MASK_0x007fffff,iy1 ! (Y1_2) iy = ival & 0x007fffff
+ bge,pn %icc,.update20 ! (Y2_2) if (ival >= 0x7f800000)
+ fmuld %f26,%f62,%f42 ! (Y1_1) dtmp2 = dtmp1 * yy
+.cont20:
+ cmp ival2,MASK_0x007fffff ! (Y2_2) if (ival <= 0x7fffff)
+ ldd [LOGFTBL_P8+ind3],%f16 ! (Y3_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8)
+ ble,pn %icc,.update21 ! (Y2_2) if (ival <= 0x7fffff)
+ fitod %f4,%f26 ! (Y3_1) yy = (double) iy
+.cont21:
+ add iy1,CONST_0x20000,ival1 ! (Y1_2) ival = iy + 0x20000
+ and ival2,MASK_0x007fffff,iy2 ! (Y2_2) iy = ival & 0x007fffff
+ fmuld %f24,%f60,%f24 ! (Y3_0) yy = dtmp5 * yy
+ fsubd %f56,%f58,%f58 ! (Y3_0) ty = ty - ldtmp0
+
+ and ival1,MASK_0xfffc0000,ival1 ! (Y1_2) ival = ival & 0xfffc0000
+ add iy2,CONST_0x20000,ival2 ! (Y2_2) ival = iy + 0x20000
+ fmuld %f48,%f14,%f44 ! (Y2_1) yy = yy * ldtmp1
+ faddd %f50,K1,%f50 ! (Y0_1) dtmp3 = dtmp2 + K1
+
+ sub iy1,ival1,iy1 ! (Y1_2) iy = iy - ival
+ and ival2,MASK_0xfffc0000,ival2 ! (Y2_2) ival = ival & 0xfffc0000
+ fpack32 ZERO,%f8,%f8 ! (Y1_1) exp = vis_fpack32(ZERO, exp)
+ faddd %f22,%f40,%f48 ! (Y2_0) yy = yy + ty
+
+ sub iy2,ival2,iy2 ! (Y2_2) iy = iy - ival
+ st iy1,[%fp+tmp3] ! (Y1_2) (double) iy
+ fmuld %f26,%f16,%f60 ! (Y3_1) yy = yy * ldtmp1
+ faddd %f42,K1,%f54 ! (Y1_1) dtmp3 = dtmp2 + K1
+
+ cmp ival3,MASK_0x7f800000 ! (Y3_2) if (ival >= 0x7f800000)
+ add y,stridey,y ! y += stridey
+ st iy2,[%fp+tmp2] ! (Y2_2) (double) iy
+ faddd %f24,%f58,%f24 ! (Y3_0) yy = yy + ty
+
+ add y,stridey,y ! y += stridey
+ fmuld K3,%f44,%f22 ! (Y2_1) dtmp0 = K3 * yy
+ bge,pn %icc,.update22 ! (Y3_2) if (ival >= 0x7f800000)
+ fitod %f6,%f40 ! (Y0_1)(double)(*(int*)&exp)
+.cont22:
+ cmp ival3,MASK_0x007fffff ! (Y3_2) if (ival <= 0x7fffff)
+ ldd [LOGFTBL+ind1],%f58 ! (Y1_1) ldtmp0 = *(double*)((char*)CONST_TBL+ind)
+ fmuld %f50,%f46,%f52 ! (Y0_1) dtmp4 = dtmp3 * yy
+ fitod %f8,%f56 ! (Y1_1) (double)(*(int*)&exp)
+
+ ld [%fp+tmp1],%f2 ! (Y0_2) (double) iy
+ fmuld K3,%f60,%f50 ! (Y3_1) dtmp0 = K3 * yy
+ ble,pn %icc,.update23 ! (Y3_2) if (ival <= 0x7fffff)
+ fdtos %f48,%f4 ! (Y2_0) (float)(yy)
+.cont23:
+ subcc counter,4,counter ! update cycle counter
+ st %f4,[y] ! (Y2_0) write into memory
+ fmuld %f54,%f62,%f54 ! (Y1_1) dtmp4 = dtmp3 * yy
+ fdtos %f24,%f4 ! (Y3_0)(float)(yy)
+
+ sra ival0,14,ival0 ! (Y0_2) i = ival >> 14;
+ st %f4,[stridey+y] ! (Y3_0) write into memory
+ bpos,pt %icc,.main_loop
+ faddd %f22,K2,%f48 ! (Y2_1) dtmp1 = dtmp0 + K2
+
+.tail:
+ addcc counter,7,counter
+ add y,stridey,y ! y += stridey
+ bneg,pn %icc,.end_loop
+
+ sra ival2,14,ival2 ! (Y2_1) i = ival >> 14;
+ ldd [LOGFTBL+ind0],%f42 ! (Y0_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind)
+ fmuld LN2,%f40,%f40 ! (Y0_0) ty = LN2 * (double)(*(int*)&exp)
+ faddd %f52,K0,%f22 ! (Y0_0) dtmp5 = dtmp4 + K0
+
+ sra ival1,14,ind1 ! (Y1_1) i = ival >> 14;
+ ld [%fp+tmp3],%f4 ! (Y1_1) (double) iy
+ fpack32 ZERO,%f10,%f18 ! (Y2_0) exp = vis_fpack32(ZERO, exp)
+ faddd %f50,K2,%f26 ! (Y3_0) dtmp1 = dtmp0 + K2
+
+ and ival0,-8,ind0 ! (Y0_1) ind = i & (-8)
+ lda [x0]0x82,%f6 ! (Y0_1) *(float*)&exp = *(float*)(x)
+ fmuld LN2,%f56,%f56 ! (Y1_0) LN2 * (double)(*(int*)&exp)
+ faddd %f54,K0,%f24 ! (Y1_0) dtmp5 = dtmp4 + K0
+
+ and ind1,-8,ind1 ! (Y1_1) ind = i & (-8)
+ ldd [LOGFTBL_P8+ind0],%f14 ! (Y0_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8)
+ fmuld %f48,%f44,%f50 ! (Y2_0) dtmp2 = dtmp1 * yy
+ fitod %f2,%f48 ! (Y0_1) yy = (double) iy
+
+ and ival3,MASK_0x007fffff,ival1 ! (Y3_1) iy = ival & 0x007fffff
+ lda [stridex+x0]0x82,%f8 ! (Y1_1) *(float*)&exp = *(float*)(x)
+ fmuld %f22,%f46,%f22 ! (Y0_0) yy = dtmp5 * yy
+ fsubd %f40,%f42,%f40 ! (Y0_0) ty = ty - ldtmp0
+
+ add iy3,CONST_0x20000,ival3 ! (Y3_1) iy + 0x20000
+ ldd [LOGFTBL_P8+ind1],%f16 ! (Y1_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8)
+ fmuld %f26,%f60,%f42 ! (Y3_0) dtmp2 = dtmp1 * yy
+ fitod %f4,%f26 ! (Y1_1) yy = (double) iy
+
+ and ival3,MASK_0xfffc0000,ival3 ! (Y3_1) ival = ival & 0xfffc0000
+ lda [x1]0x82,%f10 ! (Y2_1) *(float*)&exp = *(float*)(x)
+ fmuld %f24,%f62,%f24 ! (Y1_0) yy = dtmp5 * yy
+ fsubd %f56,%f58,%f58 ! (Y1_0) ty = ty - ldtmp0
+
+ sub iy3,ival3,iy3 ! (Y3_1) iy = iy - ival
+ ld [%fp+tmp2],%f2 ! (Y2_1) (double) iy
+ fmuld %f48,%f14,%f46 ! (Y0_1) yy = yy * ldtmp1
+ faddd %f50,K1,%f50 ! (Y2_0) dtmp3 = dtmp2 + K1
+
+ add x1,stridex2,x0 ! x += 2*stridex
+ st iy3,[%fp+tmp3] ! (Y3_1) (double) iy
+ fpack32 ZERO,%f12,%f20 ! (Y3_0) exp = vis_fpack32(ZERO, exp)
+ faddd %f22,%f40,%f48 ! (Y0_0) yy = yy + ty
+
+ lda [stridex+x1]0x82,%f12 ! (Y3_1) *(float*)&exp = *(float*)(x)
+ fmuld %f26,%f16,%f62 ! (Y1_1) yy = yy * ldtmp1
+ faddd %f42,K1,%f54 ! (Y3_0) dtmp3 = dtmp2 + K1
+
+ sra ival3,14,ival3 ! (Y3_1) i = ival >> 14;
+ add y,stridey,y ! y += stridey
+ faddd %f24,%f58,%f24 ! (Y1_0) yy = yy + ty
+
+ subcc counter,1,counter
+ ldd [LOGFTBL+ind2],%f42 ! (Y2_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind)
+ fmuld K3,%f46,%f22 ! (Y0_1) dtmp0 = K3 * yy
+ fitod %f18,%f40 ! (Y2_0) (double)(*(int*)&exp)
+
+ and ival2,-8,ind2 ! (Y2_1) ind = i & (-8)
+ fmuld %f50,%f44,%f52 ! (Y2_0) dtmp4 = dtmp3 * yy
+ fitod %f20,%f56 ! (Y3_0) (double)(*(int*)&exp)
+
+ fmuld K3,%f62,%f50 ! (Y1_1) dtmp0 = K3 * yy
+ fdtos %f48,%f4 ! (Y0_0) (float)(yy)
+
+ st %f4,[y] ! (Y0_0) write into memory
+ fmuld %f54,%f60,%f54 ! (Y3_0) dtmp4 = dtmp3 * yy
+ bneg,pn %icc,.end_loop
+ fdtos %f24,%f4 ! (Y1_0) (float)(yy)
+
+ add y,stridey,y ! y += stridey
+ subcc counter,1,counter
+ ldd [LOGFTBL+ind3],%f58 ! (Y3_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind)
+ faddd %f22,K2,%f48 ! (Y0_1) dtmp1 = dtmp0 + K2
+
+ st %f4,[y] ! (Y1_0) write into memory
+ bneg,pn %icc,.end_loop
+ fmuld LN2,%f40,%f40 ! (Y2_0) ty = LN2 * (double)(*(int*)&exp)
+
+ ldd [LOGFTBL_P8+ind2],%f14 ! (Y2_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8);
+ faddd %f52,K0,%f22 ! (Y2_0) dtmp5 = dtmp4 + K0
+ fpack32 ZERO,%f6,%f6 ! (Y0_1) exp = vis_fpack32(ZERO, exp)
+
+ faddd %f50,K2,%f26 ! (Y1_1) dtmp1 = dtmp0 + K2
+ fmuld LN2,%f56,%f56 ! (Y3_0) ty = LN2 * (double)(*(int*)&exp)
+
+ and ival3,-8,ind3 ! (Y3_1) ind = i & (-8)
+ ld [%fp+tmp3],%f4 ! (Y3_1) (double) iy
+ faddd %f54,K0,%f24 ! (Y3_0) dtmp5 = dtmp4 + K0
+
+ fmuld %f48,%f46,%f50 ! (Y0_1) dtmp2 = dtmp1 * yy
+ fitod %f2,%f48 ! (Y2_1) yy = (double) iy
+
+ fmuld %f22,%f44,%f22 ! (Y2_0) yy = dtmp5 * yy
+ fsubd %f40,%f42,%f40 ! (Y2_0) ty = ty - ldtmp0
+
+ fmuld %f26,%f62,%f42 ! (Y1_1) dtmp2 = dtmp1 * yy
+
+ ldd [LOGFTBL_P8+ind3],%f16 ! (Y3_1) ldtmp1 = *(double*)((char*)CONST_TBL+ind+8)
+ fitod %f4,%f26 ! (Y3_1) yy = (double) iy
+
+ fmuld %f24,%f60,%f24 ! (Y3_0) yy = dtmp5 * yy
+ fsubd %f56,%f58,%f58 ! (Y3_0) ty = ty - ldtmp0
+
+ fmuld %f48,%f14,%f44 ! (Y2_1) yy = yy * ldtmp1
+ faddd %f50,K1,%f50 ! (Y0_1) dtmp3 = dtmp2 + K1
+
+ fpack32 ZERO,%f8,%f8 ! (Y1_1) exp = vis_fpack32(ZERO, exp)
+ faddd %f22,%f40,%f48 ! (Y2_0) yy = yy + ty
+
+ fmuld %f26,%f16,%f60 ! (Y3_1) yy = yy * ldtmp1
+ faddd %f42,K1,%f54 ! (Y1_1) dtmp3 = dtmp2 + K1
+
+ add y,stridey,y ! y += stridey
+ faddd %f24,%f58,%f24 ! (Y3_0) yy = yy + ty
+
+ subcc counter,1,counter
+ fmuld K3,%f44,%f22 ! (Y2_1) dtmp0 = K3 * yy
+ fitod %f6,%f40 ! (Y0_1)(double)(*(int*)&exp)
+
+ ldd [LOGFTBL+ind1],%f58 ! (Y1_1) ldtmp0 = *(double*)((char*)CONST_TBL+ind)
+ fmuld %f50,%f46,%f52 ! (Y0_1) dtmp4 = dtmp3 * yy
+ fitod %f8,%f56 ! (Y1_1) (double)(*(int*)&exp)
+
+ fmuld K3,%f60,%f50 ! (Y3_1) dtmp0 = K3 * yy
+ fdtos %f48,%f4 ! (Y2_0) (float)(yy)
+
+ st %f4,[y] ! (Y2_0) write into memory
+ fmuld %f54,%f62,%f54 ! (Y1_1) dtmp4 = dtmp3 * yy
+ bneg,pn %icc,.end_loop
+ fdtos %f24,%f4 ! (Y3_0)(float)(yy)
+
+ subcc counter,1,counter ! update cycle counter
+ add y,stridey,y
+
+ st %f4,[y] ! (Y3_0) write into memory
+ bneg,pn %icc,.end_loop
+ faddd %f22,K2,%f48 ! (Y2_1) dtmp1 = dtmp0 + K2
+
+ ldd [LOGFTBL+ind0],%f42 ! (Y0_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind)
+ fmuld LN2,%f40,%f40 ! (Y0_0) ty = LN2 * (double)(*(int*)&exp)
+ faddd %f52,K0,%f22 ! (Y0_0) dtmp5 = dtmp4 + K0
+
+ fpack32 ZERO,%f10,%f18 ! (Y2_0) exp = vis_fpack32(ZERO, exp)
+
+ fmuld LN2,%f56,%f56 ! (Y1_0) LN2 * (double)(*(int*)&exp)
+ faddd %f54,K0,%f24 ! (Y1_0) dtmp5 = dtmp4 + K0
+
+ fmuld %f48,%f44,%f50 ! (Y2_0) dtmp2 = dtmp1 * yy
+
+ fmuld %f22,%f46,%f22 ! (Y0_0) yy = dtmp5 * yy
+ fsubd %f40,%f42,%f40 ! (Y0_0) ty = ty - ldtmp0
+
+ fmuld %f24,%f62,%f24 ! (Y1_0) yy = dtmp5 * yy
+ fsubd %f56,%f58,%f58 ! (Y1_0) ty = ty - ldtmp0
+
+ subcc counter,1,counter
+ faddd %f50,K1,%f50 ! (Y2_0) dtmp3 = dtmp2 + K1
+
+ faddd %f22,%f40,%f48 ! (Y0_0) yy = yy + ty
+
+ add y,stridey,y ! y += stridey
+ faddd %f24,%f58,%f24 ! (Y1_0) yy = yy + ty
+
+ ldd [LOGFTBL+ind2],%f42 ! (Y2_0) ldtmp0 = *(double*)((char*)CONST_TBL+ind)
+ fitod %f18,%f40 ! (Y2_0) (double)(*(int*)&exp)
+
+ fmuld %f50,%f44,%f52 ! (Y2_0) dtmp4 = dtmp3 * yy
+
+ fdtos %f48,%f4 ! (Y0_0) (float)(yy)
+
+ st %f4,[y] ! (Y0_0) write into memory
+ bneg,pn %icc,.end_loop
+ fdtos %f24,%f4 ! (Y1_0) (float)(yy)
+
+ add y,stridey,y ! y += stridey
+ subcc counter,1,counter
+ st %f4,[y] ! (Y1_0) write into memory
+ bneg,pn %icc,.end_loop
+ fmuld LN2,%f40,%f40 ! (Y2_0) ty = LN2 * (double)(*(int*)&exp)
+
+ faddd %f52,K0,%f22 ! (Y2_0) dtmp5 = dtmp4 + K0
+
+ fmuld %f22,%f44,%f22 ! (Y2_0) yy = dtmp5 * yy
+ fsubd %f40,%f42,%f40 ! (Y2_0) ty = ty - ldtmp0
+
+ add y,stridey,y ! y += stridey
+ faddd %f22,%f40,%f48 ! (Y2_0) yy = yy + ty
+
+ fdtos %f48,%f4 ! (Y2_0) (float)(yy)
+
+ st %f4,[y] ! (Y2_0) write into memory
+.end_loop:
+ ba .begin
+ nop
+
+.end:
+ ret
+ restore %g0,0,%o0
+
+ .align 16
+.update2:
+ cmp counter,0
+ ble .cont2
+ nop
+
+ add x0,stridex,x0
+ stx x0,[%fp+tmp5]
+ sub x0,stridex,x0
+ st counter,[%fp+tmp0]
+ or %g0,0,counter
+ ba .cont2
+ nop
+
+ .align 16
+.update3:
+ cmp counter,0
+ ble .cont3
+ nop
+
+ add x0,stridex,x0
+ stx x0,[%fp+tmp5]
+ sub x0,stridex,x0
+ st counter,[%fp+tmp0]
+ or %g0,0,counter
+ ba .cont3
+ nop
+
+ .align 16
+.update4:
+ cmp counter,1
+ ble .cont4
+ nop
+
+ stx x1,[%fp+tmp5]
+ sub counter,1,counter
+ st counter,[%fp+tmp0]
+ or %g0,1,counter
+ ba .cont4
+ nop
+
+ .align 16
+.update5:
+ cmp counter,1
+ ble .cont5
+ nop
+
+ stx x1,[%fp+tmp5]
+ sub counter,1,counter
+ st counter,[%fp+tmp0]
+ or %g0,1,counter
+ ba .cont5
+ nop
+
+ .align 16
+.update6:
+ cmp counter,2
+ ble .cont6
+ nop
+
+ add x1,stridex,x1
+ stx x1,[%fp+tmp5]
+ sub x1,stridex,x1
+ sub counter,2,counter
+ st counter,[%fp+tmp0]
+ or %g0,2,counter
+ ba .cont6
+ nop
+
+ .align 16
+.update7:
+ cmp counter,2
+ ble .cont7
+ nop
+
+ add x1,stridex,x1
+ stx x1,[%fp+tmp5]
+ sub x1,stridex,x1
+ sub counter,2,counter
+ st counter,[%fp+tmp0]
+ or %g0,2,counter
+ ba .cont7
+ nop
+
+ .align 16
+.update8:
+ cmp counter,3
+ ble .cont8
+ nop
+
+ stx x0,[%fp+tmp5]
+ sub counter,3,counter
+ st counter,[%fp+tmp0]
+ or %g0,3,counter
+ ba .cont8
+ nop
+
+ .align 16
+.update9:
+ cmp counter,3
+ ble .cont9
+ nop
+
+ stx x0,[%fp+tmp5]
+ sub counter,3,counter
+ st counter,[%fp+tmp0]
+ or %g0,3,counter
+ ba .cont9
+ nop
+
+ .align 16
+.update10:
+ cmp counter,4
+ ble .cont10
+ nop
+
+ add x0,stridex,x0
+ stx x0,[%fp+tmp5]
+ sub x0, stridex, x0
+ sub counter,4,counter
+ st counter,[%fp+tmp0]
+ or %g0,4,counter
+ ba .cont10
+ nop
+
+ .align 16
+.update11:
+ cmp counter,4
+ ble .cont11
+ nop
+
+ add x0,stridex,x0
+ stx x0,[%fp+tmp5]
+ sub x0,stridex,x0
+ sub counter,4,counter
+ st counter,[%fp+tmp0]
+ or %g0,4,counter
+ ba .cont11
+ nop
+
+ .align 16
+.update12:
+ cmp counter,5
+ ble .cont12
+ nop
+
+ stx x1,[%fp+tmp5]
+ sub counter,5,counter
+ st counter,[%fp+tmp0]
+ or %g0,5,counter
+ ba .cont12
+ nop
+
+ .align 16
+.update13:
+ cmp counter,5
+ ble .cont13
+ nop
+
+ stx x1,[%fp+tmp5]
+ sub counter,5,counter
+ st counter,[%fp+tmp0]
+ or %g0,5,counter
+ ba .cont13
+ nop
+
+ .align 16
+.update14:
+ cmp counter,6
+ ble .cont14
+ nop
+
+ add x1,stridex,x1
+ stx x1,[%fp+tmp5]
+ sub x1, stridex, x1
+ sub counter,6,counter
+ st counter,[%fp+tmp0]
+ or %g0,6,counter
+ ba .cont14
+ nop
+
+ .align 16
+.update15:
+ cmp counter,6
+ ble .cont15
+ nop
+
+ add x1,stridex,x1
+ stx x1,[%fp+tmp5]
+ sub x1, stridex, x1
+ sub counter,6,counter
+ st counter,[%fp+tmp0]
+ or %g0,6,counter
+ ba .cont15
+ nop
+
+ .align 16
+.update16:
+ cmp counter,0
+ ble,pt %icc, .cont16
+ nop
+
+ stx x0,[%fp+tmp5]
+ st counter,[%fp+tmp0]
+ or %g0,0,counter
+ ba .cont16
+ nop
+
+ .align 16
+.update17:
+ cmp counter,0
+ ble,pt %icc, .cont17
+ nop
+
+ stx x0,[%fp+tmp5]
+ st counter,[%fp+tmp0]
+ or %g0,0,counter
+ ba .cont17
+ nop
+
+ .align 16
+.update18:
+ cmp counter,1
+ ble,pt %icc, .cont18
+ nop
+
+ add x0,stridex,x0
+ stx x0,[%fp+tmp5]
+ sub x0,stridex,x0
+ sub counter,1,counter
+ st counter,[%fp+tmp0]
+ or %g0,1,counter
+ ba .cont18
+ nop
+
+ .align 16
+.update19:
+ cmp counter,1
+ ble,pt %icc, .cont19
+ nop
+
+ add x0,stridex,x0
+ sub counter,1,counter
+ stx x0,[%fp+tmp5]
+ sub x0, stridex, x0
+ st counter,[%fp+tmp0]
+ or %g0,1,counter
+ ba .cont19
+ nop
+
+ .align 16
+.update20:
+ cmp counter,2
+ ble,pt %icc, .cont20
+ nop
+
+ stx x1,[%fp+tmp5]
+ sub counter,2,counter
+ st counter,[%fp+tmp0]
+ or %g0,2,counter
+ ba .cont20
+ nop
+
+ .align 16
+.update21:
+ cmp counter,2
+ ble,pt %icc, .cont21
+ nop
+
+ stx x1,[%fp+tmp5]
+ sub counter, 2, counter
+ st counter,[%fp+tmp0]
+ or %g0,2,counter
+ ba .cont21
+ nop
+
+ .align 16
+.update22:
+ cmp counter,3
+ ble,pt %icc, .cont22
+ nop
+
+ add x1,stridex,x1
+ stx x1,[%fp+tmp5]
+ sub x1,stridex,x1
+ sub counter,3,counter
+ st counter,[%fp+tmp0]
+ or %g0,3,counter
+ ba .cont22
+ nop
+
+ .align 16
+.update23:
+ cmp counter,3
+ ble,pt %icc, .cont23
+ nop
+
+ add x1,stridex,x1
+ stx x1,[%fp+tmp5]
+ sub x1,stridex,x1
+ sub counter,3,counter
+ st counter,[%fp+tmp0]
+ or %g0,3,counter
+ ba .cont23
+ nop
+
+ .align 16
+.spec:
+ or %g0,1,ind3 ! ind3 = 1
+ sll ind3,31,ind3 ! ind3 = 0x8000000
+ add x0,stridex,x0 ! x += stridex
+ sub ind3,1,ind3 ! ind3 = 0x7ffffff
+ add y,stridey,y ! y += stridey
+ and ival0,ind3,iy0 ! ival & 0x7fffffff
+ cmp iy0,MASK_0x7f800000 ! if ((ival & 0x7fffffff) >= 0x7f800000)
+ bge,pn %icc, .spec0 ! if ((ival & 0x7fffffff) >= 0x7f800000)
+ st ival0,[%fp+tmp1]
+ cmp ival0,0 ! if (ival <= 0)
+ ble,pn %icc,.spec1 ! if (ival <= 0)
+ nop
+
+ ld [%fp+tmp1],%f12
+ fitos %f12,%f14 ! value = (float) ival
+ st %f14,[%fp+tmp2] ! ival = *(int*) &value
+ ld [%fp+tmp2],ival0 ! ival = *(int*) &value
+
+ and ival0,MASK_0x007fffff,iy0 ! iy = ival & 0x007fffff
+ sra ival0,23,ival2 ! iexp = ival >> 23
+
+ add iy0,CONST_0x20000,ival0 ! ival = iy + 0x20000
+ sub ival2,149,ival2 ! iexp = iexp - 149
+
+ and ival0,MASK_0xfffc0000,ival0 ! ival = ival & 0xfffc0000
+ st ival2,[%fp+tmp2] ! (double) iexp
+
+ sub iy0,ival0,iy0 ! iy = iy - ival
+
+ sra ival0,14,ival0 ! i = ival >> 14;
+ st iy0,[%fp+tmp1] ! (double) iy
+
+ and ival0,-8,ind0 ! ind = i & (-8)
+ ld [%fp+tmp1],%f2 ! (double) iy
+
+ ldd [LOGFTBL_P8+ind0],%f14 ! ldtmp1 = *(double*)((char*)CONST_TBL+ind+8)
+ fitod %f2,%f48 ! yy = (double) iy
+
+ fmuld %f48,%f14,%f46 ! yy = yy * ldtmp1
+
+ ld [%fp+tmp2],%f6 ! (double) iexp
+ fmuld K3,%f46,%f22 ! dtmp0 = K3 * yy
+
+ ldd [LOGFTBL+ind0],%f42 ! ldtmp0 = *(double*)((char*)CONST_TBL+ind)
+ faddd %f22,K2,%f48 ! dtmp1 = dtmp0 + K2
+
+ fmuld %f48,%f46,%f50 ! dtmp2 = dtmp1 * yy
+
+ faddd %f50,K1,%f50 ! dtmp3 = dtmp2 + K1
+
+ fitod %f6,%f40 ! (double) iexp
+ fmuld %f50,%f46,%f52 ! dtmp4 = dtmp3 * yy
+
+ fmuld LN2,%f40,%f40 ! ty = LN2 * (double) iexp
+ faddd %f52,K0,%f22 ! dtmp5 = dtmp4 + K0
+
+ fmuld %f22,%f46,%f22 ! yy = dtmp5 * yy
+ fsubd %f40,%f42,%f40 ! ty = ty - ldtmp0
+
+ faddd %f22,%f40,%f48 ! yy = yy + ty
+
+ fdtos %f48,%f4 ! (float)(yy)
+
+ ba .begin1
+ st %f4,[y] ! write into memory
+
+ .align 16
+.spec0:
+ ld [%fp+tmp1],%f12 ! value = *(float*) &ival
+ fzeros %f2 ! y[0] = (value < 0.0f?
+ fcmps %fcc0,%f12,%f2 ! 0.0f : value) * value
+ fmovsug %fcc0,%f12,%f2
+ fmuls %f12,%f2,%f2
+ ba .begin1
+ st %f2,[y] ! write into memory
+
+ .align 16
+.spec1:
+ cmp iy0,0 ! if ((ival & 0x7fffffff) == 0)
+ bne,pn %icc,.spec2 ! if ((ival & 0x7fffffff) == 0)
+ nop
+ ld [LOGFTBL+568],%f4
+ fdivs %f4,ZERO,%f6 ! y[0] = -1.0f / 0f
+ ba .begin1
+ st %f6,[y] ! write into memory
+
+ .align 16
+.spec2:
+ fdivs ZERO,ZERO,%f6 ! y[0] = 0f / 0f
+ ba .begin1
+ st %f6,[y] ! write into memory
+
+ SET_SIZE(__vlogf)
+
diff --git a/usr/src/libm/src/mvec/vis/__vpow.S b/usr/src/libm/src/mvec/vis/__vpow.S
new file mode 100644
index 0000000..a86d776
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vpow.S
@@ -0,0 +1,4352 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vpow.S 1.8 06/01/23 SMI"
+
+ .file "__vpow.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 64
+
+.CONST_TBL:
+
+! __mt_constlog2[2*i] = high order rounded 32 bits log2(1+i/256)*256, i = [0, 255]
+! __mt_constlog2[2*i+1] = low order least bits log2(1+i/256)*256, i = [0, 255]
+
+ .word 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ .word 0x3ff709c4, 0x00000000, 0x3e9b5eab, 0x1dd2b66f,
+ .word 0x4006fe51, 0x00000000, 0xbea2443d, 0xeba01c72,
+ .word 0x40113631, 0x00000000, 0x3e97a97b, 0x0c4bb41a,
+ .word 0x4016e797, 0x00000000, 0xbebe8f4b, 0x759d6476,
+ .word 0x401c9364, 0x00000000, 0xbeb15ebc, 0x1e666460,
+ .word 0x40211cd2, 0x00000000, 0xbeb57665, 0xf6893f5d,
+ .word 0x4023ed31, 0x00000000, 0xbecae5e9, 0x7677f62d,
+ .word 0x4026bad3, 0x00000000, 0x3ecd63bf, 0x61cc4d82,
+ .word 0x402985c0, 0x00000000, 0xbebe5b57, 0x35cfaf8e,
+ .word 0x402c4dfb, 0x00000000, 0xbec1bd55, 0x2842c1c2,
+ .word 0x402f138a, 0x00000000, 0xbecf336b, 0x18178cbe,
+ .word 0x4030eb39, 0x00000000, 0xbed81758, 0x19530c23,
+ .word 0x40324b5b, 0x00000000, 0x3edf84d6, 0x8f2268b4,
+ .word 0x4033aa30, 0x00000000, 0xbec16c07, 0x1e93fd97,
+ .word 0x403507b8, 0x00000000, 0x3ecb019d, 0xdb6a796a,
+ .word 0x403663f7, 0x00000000, 0xbe94dbb3, 0xa60cceb2,
+ .word 0x4037beef, 0x00000000, 0xbeda51d7, 0x5fb0ef94,
+ .word 0x403918a1, 0x00000000, 0x3edb918c, 0xd6ab9c8d,
+ .word 0x403a7112, 0x00000000, 0xbec065bd, 0xb60a5dd4,
+ .word 0x403bc842, 0x00000000, 0x3ed02b6a, 0xee98ecb1,
+ .word 0x403d1e35, 0x00000000, 0xbebca47d, 0x25b2f4c7,
+ .word 0x403e72ec, 0x00000000, 0x3eb17fa5, 0xb21cbdb6,
+ .word 0x403fc66a, 0x00000000, 0x3eae1601, 0x49209a69,
+ .word 0x40408c59, 0x00000000, 0xbeecc961, 0x871a7611,
+ .word 0x404134e2, 0x00000000, 0xbee2ddbe, 0x74803297,
+ .word 0x4041dcd2, 0x00000000, 0xbeea2ab5, 0x212856eb,
+ .word 0x40428429, 0x00000000, 0x3ee2c1e9, 0x8fe35da3,
+ .word 0x40432aea, 0x00000000, 0xbecd8751, 0xe5e0ae0d,
+ .word 0x4043d114, 0x00000000, 0x3eeb66a2, 0x98fc02ce,
+ .word 0x404476aa, 0x00000000, 0xbea9f022, 0xcb3b1c5b,
+ .word 0x40451bac, 0x00000000, 0xbeebe168, 0xdd6dd3fe,
+ .word 0x4045c01a, 0x00000000, 0x3edcfdeb, 0x43cfd006,
+ .word 0x404663f7, 0x00000000, 0xbea4dbb3, 0xa60cceb2,
+ .word 0x40470743, 0x00000000, 0xbed5887e, 0xc06b1ff2,
+ .word 0x4047a9ff, 0x00000000, 0xbedc17d1, 0x108740d9,
+ .word 0x40484c2c, 0x00000000, 0xbed7e87e, 0x268116ee,
+ .word 0x4048edcb, 0x00000000, 0xbec7cad4, 0x944a32be,
+ .word 0x40498edd, 0x00000000, 0x3eadf9c3, 0x7c0beb3a,
+ .word 0x404a2f63, 0x00000000, 0x3ed1905c, 0x35651c43,
+ .word 0x404acf5e, 0x00000000, 0x3ed6da76, 0x49f7f08f,
+ .word 0x404b6ecf, 0x00000000, 0x3ec75f95, 0xe96bed8d,
+ .word 0x404c0db7, 0x00000000, 0xbed91359, 0x08df8ec9,
+ .word 0x404cac16, 0x00000000, 0x3ede3b86, 0xe44b6265,
+ .word 0x404d49ee, 0x00000000, 0x3ee30c96, 0x5bf23d2d,
+ .word 0x404de740, 0x00000000, 0xbecc4eb7, 0xf11e41be,
+ .word 0x404e840c, 0x00000000, 0xbec8b195, 0xb338360c,
+ .word 0x404f2053, 0x00000000, 0x3edc9047, 0x93a3ba95,
+ .word 0x404fbc17, 0x00000000, 0xbee1bf65, 0xfd7715ca,
+ .word 0x40502bac, 0x00000000, 0xbef76cbe, 0x67113a18,
+ .word 0x4050790b, 0x00000000, 0xbee227e7, 0xfb487e73,
+ .word 0x4050c629, 0x00000000, 0x3efd550a, 0xa3a93ec8,
+ .word 0x40511308, 0x00000000, 0xbee2967a, 0x451a7b48,
+ .word 0x40515fa6, 0x00000000, 0x3efdaec2, 0x3fd65f8e,
+ .word 0x4051ac06, 0x00000000, 0xbef35b83, 0xe3eb5ce3,
+ .word 0x4051f826, 0x00000000, 0xbec24ee3, 0xd9a82f2e,
+ .word 0x40524408, 0x00000000, 0xbef53c7e, 0x319f6e92,
+ .word 0x40528fab, 0x00000000, 0x3eead993, 0x41b181d1,
+ .word 0x4052db11, 0x00000000, 0xbead932a, 0x8487642e,
+ .word 0x40532639, 0x00000000, 0x3ef8daca, 0x0d66b8f9,
+ .word 0x40537125, 0x00000000, 0xbee8ad99, 0x09933766,
+ .word 0x4053bbd4, 0x00000000, 0xbef7d788, 0xc15a9f3d,
+ .word 0x40540646, 0x00000000, 0x3eed8d82, 0x24bad97a,
+ .word 0x4054507d, 0x00000000, 0xbe922b03, 0xc6b2a5f6,
+ .word 0x40549a78, 0x00000000, 0x3ef2f346, 0xe2bf924b,
+ .word 0x4054e439, 0x00000000, 0xbeffc5c1, 0x258110a4,
+ .word 0x40552dbe, 0x00000000, 0xbead9b4a, 0x641184f9,
+ .word 0x40557709, 0x00000000, 0x3edb3378, 0xcab10782,
+ .word 0x4055c01a, 0x00000000, 0x3eecfdeb, 0x43cfd006,
+ .word 0x405608f2, 0x00000000, 0xbef2f5ad, 0xd49a43fc,
+ .word 0x40565190, 0x00000000, 0xbedb9884, 0x591add87,
+ .word 0x405699f5, 0x00000000, 0x3ee2466a, 0x5c3462a4,
+ .word 0x4056e222, 0x00000000, 0xbee93179, 0x90d43957,
+ .word 0x40572a16, 0x00000000, 0x3eebe5e0, 0xc14a1a6d,
+ .word 0x405771d3, 0x00000000, 0xbef16041, 0x3106e405,
+ .word 0x4057b958, 0x00000000, 0xbef4eb95, 0x4eea2724,
+ .word 0x405800a5, 0x00000000, 0x3ef8c587, 0x150cabae,
+ .word 0x405847bc, 0x00000000, 0x3ee9ec30, 0xc6e3e04a,
+ .word 0x40588e9c, 0x00000000, 0x3efcb82c, 0x89692d99,
+ .word 0x4058d546, 0x00000000, 0x3efced70, 0xdc6acf42,
+ .word 0x40591bbb, 0x00000000, 0xbefdb83a, 0x3dd2d353,
+ .word 0x405961f9, 0x00000000, 0x3eb49d02, 0x6e33d676,
+ .word 0x4059a802, 0x00000000, 0x3eec8f11, 0x979a5db7,
+ .word 0x4059edd6, 0x00000000, 0x3efd66c9, 0x77e236c7,
+ .word 0x405a3376, 0x00000000, 0x3ec4fec0, 0xa13af882,
+ .word 0x405a78e1, 0x00000000, 0x3ef1bdef, 0xbd14a081,
+ .word 0x405abe18, 0x00000000, 0x3efe5fc7, 0xd238691d,
+ .word 0x405b031c, 0x00000000, 0xbed01f9b, 0xcb999fe9,
+ .word 0x405b47ec, 0x00000000, 0xbec18efa, 0xbeb7d722,
+ .word 0x405b8c89, 0x00000000, 0xbee203bc, 0xc3346511,
+ .word 0x405bd0f3, 0x00000000, 0xbed6186f, 0xcf54bbd3,
+ .word 0x405c152a, 0x00000000, 0x3efb0932, 0xb9700973,
+ .word 0x405c5930, 0x00000000, 0xbef4b5a9, 0x2a606047,
+ .word 0x405c9d03, 0x00000000, 0xbec26b70, 0x98590071,
+ .word 0x405ce0a5, 0x00000000, 0xbefb7169, 0xe0cda8bd,
+ .word 0x405d2415, 0x00000000, 0xbeebfa06, 0xc156f521,
+ .word 0x405d6754, 0x00000000, 0xbedfcd15, 0xf101c142,
+ .word 0x405daa62, 0x00000000, 0x3ee10327, 0xdc8093a5,
+ .word 0x405ded40, 0x00000000, 0xbee5dee4, 0xd9d8a273,
+ .word 0x405e2fed, 0x00000000, 0x3eee84b9, 0x4c06f913,
+ .word 0x405e726b, 0x00000000, 0xbef7862a, 0xcb7ceb98,
+ .word 0x405eb4b8, 0x00000000, 0x3ef1f456, 0xf394f972,
+ .word 0x405ef6d6, 0x00000000, 0x3efcca38, 0x881f4780,
+ .word 0x405f38c5, 0x00000000, 0x3ef9ef31, 0x50343f8e,
+ .word 0x405f7a85, 0x00000000, 0x3efa32c1, 0xb3b3864c,
+ .word 0x405fbc17, 0x00000000, 0xbef1bf65, 0xfd7715ca,
+ .word 0x405ffd7a, 0x00000000, 0xbef95f00, 0x19518ce0,
+ .word 0x40601f57, 0x00000000, 0x3ef3b932, 0x6ff91960,
+ .word 0x40603fdb, 0x00000000, 0xbf0d1a19, 0xa0331af3,
+ .word 0x40606047, 0x00000000, 0x3ee9f24e, 0xb23e991f,
+ .word 0x4060809d, 0x00000000, 0xbedb011f, 0x855b4988,
+ .word 0x4060a0dc, 0x00000000, 0x3efa7c70, 0xfde006c7,
+ .word 0x4060c105, 0x00000000, 0x3e9ac754, 0xcb104aea,
+ .word 0x4060e117, 0x00000000, 0x3f0d535f, 0x0444ebab,
+ .word 0x40610114, 0x00000000, 0xbf03ab0d, 0xc56138c9,
+ .word 0x406120fa, 0x00000000, 0xbef630f3, 0xfc695a97,
+ .word 0x406140ca, 0x00000000, 0xbec5786a, 0xf187a96b,
+ .word 0x40616084, 0x00000000, 0x3f012578, 0x0181e2b3,
+ .word 0x40618029, 0x00000000, 0xbef846b4, 0x4ad8a38b,
+ .word 0x40619fb8, 0x00000000, 0xbf01c336, 0xf7a3a78f,
+ .word 0x4061bf31, 0x00000000, 0x3eee95d0, 0x0de3b514,
+ .word 0x4061de95, 0x00000000, 0x3eed9cbb, 0xa6187a4d,
+ .word 0x4061fde4, 0x00000000, 0xbef678bf, 0x6cdedf51,
+ .word 0x40621d1d, 0x00000000, 0x3f06edb5, 0x668c543d,
+ .word 0x40623c42, 0x00000000, 0xbef5ec6c, 0x1bfbf89a,
+ .word 0x40625b51, 0x00000000, 0x3f062dcf, 0x4115a1a3,
+ .word 0x40627a4c, 0x00000000, 0x3ec6172f, 0xe015e13c,
+ .word 0x40629932, 0x00000000, 0xbed30dd5, 0x3f5c184c,
+ .word 0x4062b803, 0x00000000, 0x3f01cfde, 0xb43cfd00,
+ .word 0x4062d6c0, 0x00000000, 0x3ee35013, 0x8064a94e,
+ .word 0x4062f568, 0x00000000, 0x3f0d7acf, 0xc98509e3,
+ .word 0x406313fd, 0x00000000, 0xbf0d7932, 0x43718371,
+ .word 0x4063327c, 0x00000000, 0x3f0aad27, 0x29b21ae5,
+ .word 0x406350e8, 0x00000000, 0x3ef92b83, 0xec743665,
+ .word 0x40636f40, 0x00000000, 0xbec249ba, 0x76fee235,
+ .word 0x40638d84, 0x00000000, 0xbeefd0a2, 0xf6d7e41e,
+ .word 0x4063abb4, 0x00000000, 0xbec57f7a, 0x64ccd537,
+ .word 0x4063c9d0, 0x00000000, 0x3f09242b, 0x8488b305,
+ .word 0x4063e7d9, 0x00000000, 0x3efbcfb8, 0x0b357154,
+ .word 0x406405cf, 0x00000000, 0xbf0cb1c2, 0xd10504b4,
+ .word 0x406423b0, 0x00000000, 0x3f0fa61a, 0xaa59c1d8,
+ .word 0x4064417f, 0x00000000, 0x3ef26410, 0xb256d8d7,
+ .word 0x40645f3b, 0x00000000, 0xbf09d77e, 0x31d6ca00,
+ .word 0x40647ce3, 0x00000000, 0xbeda5fb4, 0xf23978de,
+ .word 0x40649a78, 0x00000000, 0x3f02f346, 0xe2bf924b,
+ .word 0x4064b7fb, 0x00000000, 0xbf0106da, 0x1aa0e9e7,
+ .word 0x4064d56a, 0x00000000, 0x3f06ccf3, 0xb1129b7c,
+ .word 0x4064f2c7, 0x00000000, 0x3f006a7c, 0xcf9dd420,
+ .word 0x40651012, 0x00000000, 0xbf0e3dd5, 0xc1c885ae,
+ .word 0x40652d49, 0x00000000, 0x3f00b91e, 0x4253bd27,
+ .word 0x40654a6f, 0x00000000, 0xbf0cd6af, 0x1c9393cd,
+ .word 0x40656781, 0x00000000, 0x3f0ee1ac, 0x0b1ec5ea,
+ .word 0x40658482, 0x00000000, 0x3ef34c4e, 0x99e1c6c6,
+ .word 0x4065a171, 0x00000000, 0xbf06d01c, 0xa8f50e5f,
+ .word 0x4065be4d, 0x00000000, 0x3ed96a28, 0x6955d67e,
+ .word 0x4065db17, 0x00000000, 0x3f0d4210, 0x4f127092,
+ .word 0x4065f7d0, 0x00000000, 0xbed7c3ec, 0xa28e69ca,
+ .word 0x40661477, 0x00000000, 0xbf07f393, 0xbdd98c47,
+ .word 0x4066310c, 0x00000000, 0xbf0c2ab3, 0xedefe569,
+ .word 0x40664d8f, 0x00000000, 0xbef44732, 0x0833c207,
+ .word 0x40666a01, 0x00000000, 0xbf0c6e1d, 0xcd0cb449,
+ .word 0x40668661, 0x00000000, 0xbefb4848, 0x3c643a24,
+ .word 0x4066a2b0, 0x00000000, 0xbf08697c, 0x3d7dfd9b,
+ .word 0x4066beed, 0x00000000, 0x3ef12866, 0xd705c554,
+ .word 0x4066db19, 0x00000000, 0x3f0a9d86, 0x52765f7c,
+ .word 0x4066f735, 0x00000000, 0xbf0d0e8e, 0x7a165e04,
+ .word 0x4067133f, 0x00000000, 0xbf093aa4, 0xe106ba60,
+ .word 0x40672f38, 0x00000000, 0xbf04bace, 0x940d18ba,
+ .word 0x40674b20, 0x00000000, 0xbef4d8fc, 0x561c8d44,
+ .word 0x406766f7, 0x00000000, 0x3ef5931e, 0xf6e6f15b,
+ .word 0x406782be, 0x00000000, 0xbf000896, 0x6a210de0,
+ .word 0x40679e74, 0x00000000, 0xbf05dbfe, 0x780eccdb,
+ .word 0x4067ba19, 0x00000000, 0xbecb2bf4, 0x6fd85522,
+ .word 0x4067d5ae, 0x00000000, 0xbefd2fc3, 0xaddfdee2,
+ .word 0x4067f132, 0x00000000, 0x3ef0c167, 0x8ae89767,
+ .word 0x40680ca6, 0x00000000, 0x3ef034a6, 0xfc6488d1,
+ .word 0x4068280a, 0x00000000, 0xbef520c7, 0xc69211fe,
+ .word 0x4068435d, 0x00000000, 0x3f05328d, 0xdcedf39e,
+ .word 0x40685ea1, 0x00000000, 0xbf03d361, 0x367bde41,
+ .word 0x406879d4, 0x00000000, 0xbebc2624, 0x7a0cdfbb,
+ .word 0x406894f7, 0x00000000, 0x3f02c1bb, 0xe2d01ba9,
+ .word 0x4068b00b, 0x00000000, 0xbf043a4a, 0xd5c7a4dd,
+ .word 0x4068cb0e, 0x00000000, 0x3efda59d, 0xded9b445,
+ .word 0x4068e602, 0x00000000, 0x3eb11eb3, 0x043f5602,
+ .word 0x406900e6, 0x00000000, 0x3ee60002, 0xccfe43f5,
+ .word 0x40691bbb, 0x00000000, 0xbf0db83a, 0x3dd2d353,
+ .word 0x4069367f, 0x00000000, 0x3f0b682a, 0xcba73219,
+ .word 0x40695135, 0x00000000, 0xbef53d8e, 0x8e4c59c3,
+ .word 0x40696bdb, 0x00000000, 0xbef6a9a5, 0x050809db,
+ .word 0x40698671, 0x00000000, 0x3f0db68e, 0x0ba15359,
+ .word 0x4069a0f9, 0x00000000, 0xbef6278f, 0xd810b546,
+ .word 0x4069bb71, 0x00000000, 0xbec528c6, 0xcdef4d8d,
+ .word 0x4069d5da, 0x00000000, 0xbeb57f7a, 0x64ccd537,
+ .word 0x4069f034, 0x00000000, 0xbee33716, 0xa9ae332f,
+ .word 0x406a0a7f, 0x00000000, 0xbef2d9f7, 0x698ce769,
+ .word 0x406a24bb, 0x00000000, 0xbef48c02, 0x44aa8cfc,
+ .word 0x406a3ee8, 0x00000000, 0xbed8e3cf, 0xc25f0ce6,
+ .word 0x406a5906, 0x00000000, 0x3f0044c5, 0x590979a0,
+ .word 0x406a7316, 0x00000000, 0xbef7e86f, 0x9c2154fb,
+ .word 0x406a8d17, 0x00000000, 0xbf03a076, 0x2ed351cd,
+ .word 0x406aa709, 0x00000000, 0xbed4ffd6, 0x59064390,
+ .word 0x406ac0ed, 0x00000000, 0xbf04d9bb, 0x3135f0b1,
+ .word 0x406adac2, 0x00000000, 0xbee8ee37, 0xcd2ea9d3,
+ .word 0x406af489, 0x00000000, 0xbf02ba1b, 0x4a95229c,
+ .word 0x406b0e41, 0x00000000, 0x3ef35e64, 0x35ebd377,
+ .word 0x406b27eb, 0x00000000, 0x3f02fe3c, 0x2291b5ad,
+ .word 0x406b4187, 0x00000000, 0x3efa5480, 0x45ecbc5d,
+ .word 0x406b5b15, 0x00000000, 0xbedee0d3, 0x3432f2c3,
+ .word 0x406b7495, 0x00000000, 0xbf0c2ab3, 0x496d2d24,
+ .word 0x406b8e06, 0x00000000, 0x3ef04439, 0x848e9d1e,
+ .word 0x406ba76a, 0x00000000, 0xbf03186d, 0xa6fc41e0,
+ .word 0x406bc0bf, 0x00000000, 0x3f05fc8d, 0x8164754e,
+ .word 0x406bda07, 0x00000000, 0x3eecc67e, 0x6db516de,
+ .word 0x406bf341, 0x00000000, 0x3ee14464, 0xa6bcdf48,
+ .word 0x406c0c6d, 0x00000000, 0x3f011f17, 0x74d8b66a,
+ .word 0x406c258c, 0x00000000, 0xbefd4cdb, 0xebaa4121,
+ .word 0x406c3e9d, 0x00000000, 0xbf074797, 0xeab3259d,
+ .word 0x406c57a0, 0x00000000, 0xbee44a49, 0xa82ed669,
+ .word 0x406c7096, 0x00000000, 0xbf045b87, 0x8e27d0d9,
+ .word 0x406c897e, 0x00000000, 0xbec7c929, 0xc9e33277,
+ .word 0x406ca259, 0x00000000, 0xbef1ab66, 0x74e5008e,
+ .word 0x406cbb26, 0x00000000, 0x3f09333f, 0x3d6bb35f,
+ .word 0x406cd3e7, 0x00000000, 0xbf07cd5d, 0xbe4f6f23,
+ .word 0x406cec9a, 0x00000000, 0xbf0848eb, 0x7f40a752,
+ .word 0x406d053f, 0x00000000, 0x3f0b4982, 0x259cc626,
+ .word 0x406d1dd8, 0x00000000, 0x3ee9b4c3, 0xf0c92723,
+ .word 0x406d3664, 0x00000000, 0xbf036033, 0x8ab5a1f2,
+ .word 0x406d4ee2, 0x00000000, 0x3f015971, 0x8aacb6ec,
+ .word 0x406d6754, 0x00000000, 0xbeefcd15, 0xf101c142,
+ .word 0x406d7fb9, 0x00000000, 0xbf0bd935, 0x64ee1bf6,
+ .word 0x406d9810, 0x00000000, 0x3f090f59, 0x8530f102,
+ .word 0x406db05b, 0x00000000, 0x3f0a28be, 0xd929effb,
+ .word 0x406dc89a, 0x00000000, 0xbf053002, 0xa4e86631,
+ .word 0x406de0cb, 0x00000000, 0x3efcb99c, 0x5233429f,
+ .word 0x406df8f0, 0x00000000, 0x3ef04357, 0x9625f7a4,
+ .word 0x406e1108, 0x00000000, 0x3f0b6bdd, 0x258a7b23,
+ .word 0x406e2914, 0x00000000, 0x3ef70700, 0xa00fdd55,
+ .word 0x406e4113, 0x00000000, 0x3f0bab95, 0x4f46b93f,
+ .word 0x406e5906, 0x00000000, 0x3efe4411, 0x672b0c89,
+ .word 0x406e70ed, 0x00000000, 0xbf06e041, 0xe4467502,
+ .word 0x406e88c7, 0x00000000, 0xbf032765, 0x63557797,
+ .word 0x406ea094, 0x00000000, 0x3f0d7b8f, 0x0e7b8e75,
+ .word 0x406eb856, 0x00000000, 0xbeccd5dc, 0x13cad28e,
+ .word 0x406ed00b, 0x00000000, 0x3f0222fb, 0x08d5c3f2,
+ .word 0x406ee7b4, 0x00000000, 0x3f0c6cea, 0x541f5b70,
+ .word 0x406eff52, 0x00000000, 0xbf0fd40b, 0x070e6c33,
+ .word 0x406f16e3, 0x00000000, 0xbf0f8922, 0x73f1379b,
+ .word 0x406f2e68, 0x00000000, 0xbf0fa051, 0xeebd4f74,
+ .word 0x406f45e1, 0x00000000, 0xbf0d0c3e, 0x6aac6ca9,
+ .word 0x406f5d4e, 0x00000000, 0xbf04c432, 0x5068bc88,
+ .word 0x406f74af, 0x00000000, 0xbede20a0, 0xa450bc93,
+ .word 0x406f8c04, 0x00000000, 0x3f08f3a3, 0x1a23946e,
+ .word 0x406fa34e, 0x00000000, 0x3ee177c2, 0x3362928c,
+ .word 0x406fba8c, 0x00000000, 0x3ec71513, 0x7cfebaa0,
+ .word 0x406fd1be, 0x00000000, 0x3f031fca, 0xbe50ac88,
+ .word 0x406fe8e5, 0x00000000, 0xbedd485c, 0xbfb44c3b,
+!
+ .word 0x01a56e1f, 0xc2f8f359, ! _TINY = 1.0e-300
+ .word 0x7e37e43c, 0x8800759c, ! _HUGE = 1.0e+300
+ .word 0x3f6d94ae, 0x0bf85de6, ! KA1_LO = (1.41052154268147309568e-05*256)
+ .word 0x40871540, 0x00000000, ! KA1_HI = (2.8853759765625e+00*256)
+ .word 0x3cd5d528, 0x93bc7fec, ! KB5 = 1.21195555854068860923e-15
+ .word 0x3e2c6b08, 0xd71f5d1e, ! KB3 = 3.30830268126604677436e-09
+ .word 0x3ecebfbd, 0xff82c4ed, ! KB2 = 3.66556559691003767877e-06
+ .word 0x3f662e42, 0xfefa39ef, ! KB1 = 2.70760617406228636578e-03
+!
+! __mt_constexp2[2*i] = high order bits 2^(i/256), i = [0, 255]
+! __mt_constexp2[2*i+1] = least bits 2^(i/256), i = [0, 255]
+
+ .word 0x3ff00000, 0x00000000, 0x00000000, 0x00000000,
+ .word 0x3ff00b1a, 0xfa5abcbf, 0xbc84f6b2, 0xa7609f71,
+ .word 0x3ff0163d, 0xa9fb3335, 0x3c9b6129, 0x9ab8cdb7,
+ .word 0x3ff02168, 0x143b0281, 0xbc82bf31, 0x0fc54eb6,
+ .word 0x3ff02c9a, 0x3e778061, 0xbc719083, 0x535b085d,
+ .word 0x3ff037d4, 0x2e11bbcc, 0x3c656811, 0xeeade11a,
+ .word 0x3ff04315, 0xe86e7f85, 0xbc90a31c, 0x1977c96e,
+ .word 0x3ff04e5f, 0x72f654b1, 0x3c84c379, 0x3aa0d08c,
+ .word 0x3ff059b0, 0xd3158574, 0x3c8d73e2, 0xa475b465,
+ .word 0x3ff0650a, 0x0e3c1f89, 0xbc95cb7b, 0x5799c397,
+ .word 0x3ff0706b, 0x29ddf6de, 0xbc8c91df, 0xe2b13c27,
+ .word 0x3ff07bd4, 0x2b72a836, 0x3c832334, 0x54458700,
+ .word 0x3ff08745, 0x18759bc8, 0x3c6186be, 0x4bb284ff,
+ .word 0x3ff092bd, 0xf66607e0, 0xbc968063, 0x800a3fd1,
+ .word 0x3ff09e3e, 0xcac6f383, 0x3c914878, 0x18316136,
+ .word 0x3ff0a9c7, 0x9b1f3919, 0x3c85d16c, 0x873d1d38,
+ .word 0x3ff0b558, 0x6cf9890f, 0x3c98a62e, 0x4adc610b,
+ .word 0x3ff0c0f1, 0x45e46c85, 0x3c94f989, 0x06d21cef,
+ .word 0x3ff0cc92, 0x2b7247f7, 0x3c901edc, 0x16e24f71,
+ .word 0x3ff0d83b, 0x23395dec, 0xbc9bc14d, 0xe43f316a,
+ .word 0x3ff0e3ec, 0x32d3d1a2, 0x3c403a17, 0x27c57b52,
+ .word 0x3ff0efa5, 0x5fdfa9c5, 0xbc949db9, 0xbc54021b,
+ .word 0x3ff0fb66, 0xaffed31b, 0xbc6b9bed, 0xc44ebd7b,
+ .word 0x3ff10730, 0x28d7233e, 0x3c8d46eb, 0x1692fdd5,
+ .word 0x3ff11301, 0xd0125b51, 0xbc96c510, 0x39449b3a,
+ .word 0x3ff11edb, 0xab5e2ab6, 0xbc9ca454, 0xf703fb72,
+ .word 0x3ff12abd, 0xc06c31cc, 0xbc51b514, 0xb36ca5c7,
+ .word 0x3ff136a8, 0x14f204ab, 0xbc67108f, 0xba48dcf0,
+ .word 0x3ff1429a, 0xaea92de0, 0xbc932fbf, 0x9af1369e,
+ .word 0x3ff14e95, 0x934f312e, 0xbc8b91e8, 0x39bf44ab,
+ .word 0x3ff15a98, 0xc8a58e51, 0x3c82406a, 0xb9eeab0a,
+ .word 0x3ff166a4, 0x5471c3c2, 0x3c58f23b, 0x82ea1a32,
+ .word 0x3ff172b8, 0x3c7d517b, 0xbc819041, 0xb9d78a76,
+ .word 0x3ff17ed4, 0x8695bbc0, 0x3c709e3f, 0xe2ac5a64,
+ .word 0x3ff18af9, 0x388c8dea, 0xbc911023, 0xd1970f6c,
+ .word 0x3ff19726, 0x58375d2f, 0x3c94aadd, 0x85f17e08,
+ .word 0x3ff1a35b, 0xeb6fcb75, 0x3c8e5b4c, 0x7b4968e4,
+ .word 0x3ff1af99, 0xf8138a1c, 0x3c97bf85, 0xa4b69280,
+ .word 0x3ff1bbe0, 0x84045cd4, 0xbc995386, 0x352ef607,
+ .word 0x3ff1c82f, 0x95281c6b, 0x3c900977, 0x8010f8c9,
+ .word 0x3ff1d487, 0x3168b9aa, 0x3c9e016e, 0x00a2643c,
+ .word 0x3ff1e0e7, 0x5eb44027, 0xbc96fdd8, 0x088cb6de,
+ .word 0x3ff1ed50, 0x22fcd91d, 0xbc91df98, 0x027bb78c,
+ .word 0x3ff1f9c1, 0x8438ce4d, 0xbc9bf524, 0xa097af5c,
+ .word 0x3ff2063b, 0x88628cd6, 0x3c8dc775, 0x814a8495,
+ .word 0x3ff212be, 0x3578a819, 0x3c93592d, 0x2cfcaac9,
+ .word 0x3ff21f49, 0x917ddc96, 0x3c82a97e, 0x9494a5ee,
+ .word 0x3ff22bdd, 0xa27912d1, 0x3c8d34fb, 0x5577d69f,
+ .word 0x3ff2387a, 0x6e756238, 0x3c99b07e, 0xb6c70573,
+ .word 0x3ff2451f, 0xfb82140a, 0x3c8acfcc, 0x911ca996,
+ .word 0x3ff251ce, 0x4fb2a63f, 0x3c8ac155, 0xbef4f4a4,
+ .word 0x3ff25e85, 0x711ece75, 0x3c93e1a2, 0x4ac31b2c,
+ .word 0x3ff26b45, 0x65e27cdd, 0x3c82bd33, 0x9940e9d9,
+ .word 0x3ff2780e, 0x341ddf29, 0x3c9e067c, 0x05f9e76c,
+ .word 0x3ff284df, 0xe1f56381, 0xbc9a4c3a, 0x8c3f0d7e,
+ .word 0x3ff291ba, 0x7591bb70, 0xbc82cc72, 0x28401cbd,
+ .word 0x3ff29e9d, 0xf51fdee1, 0x3c8612e8, 0xafad1255,
+ .word 0x3ff2ab8a, 0x66d10f13, 0xbc995743, 0x191690a7,
+ .word 0x3ff2b87f, 0xd0dad990, 0xbc410adc, 0xd6381aa4,
+ .word 0x3ff2c57e, 0x39771b2f, 0xbc950145, 0xa6eb5124,
+ .word 0x3ff2d285, 0xa6e4030b, 0x3c900247, 0x54db41d5,
+ .word 0x3ff2df96, 0x1f641589, 0x3c9d16cf, 0xfbbce198,
+ .word 0x3ff2ecaf, 0xa93e2f56, 0x3c71ca0f, 0x45d52383,
+ .word 0x3ff2f9d2, 0x4abd886b, 0xbc653c55, 0x532bda93,
+ .word 0x3ff306fe, 0x0a31b715, 0x3c86f46a, 0xd23182e4,
+ .word 0x3ff31432, 0xedeeb2fd, 0x3c8959a3, 0xf3f3fcd1,
+ .word 0x3ff32170, 0xfc4cd831, 0x3c8a9ce7, 0x8e18047c,
+ .word 0x3ff32eb8, 0x3ba8ea32, 0xbc9c45e8, 0x3cb4f318,
+ .word 0x3ff33c08, 0xb26416ff, 0x3c932721, 0x843659a6,
+ .word 0x3ff34962, 0x66e3fa2d, 0xbc835a75, 0x930881a4,
+ .word 0x3ff356c5, 0x5f929ff1, 0xbc8b5cee, 0x5c4e4628,
+ .word 0x3ff36431, 0xa2de883b, 0xbc8c3144, 0xa06cb85e,
+ .word 0x3ff371a7, 0x373aa9cb, 0xbc963aea, 0xbf42eae2,
+ .word 0x3ff37f26, 0x231e754a, 0xbc99f5ca, 0x9eceb23c,
+ .word 0x3ff38cae, 0x6d05d866, 0xbc9e958d, 0x3c9904bd,
+ .word 0x3ff39a40, 0x1b7140ef, 0xbc99a9a5, 0xfc8e2934,
+ .word 0x3ff3a7db, 0x34e59ff7, 0xbc75e436, 0xd661f5e3,
+ .word 0x3ff3b57f, 0xbfec6cf4, 0x3c954c66, 0xe26fff18,
+ .word 0x3ff3c32d, 0xc313a8e5, 0xbc9efff8, 0x375d29c3,
+ .word 0x3ff3d0e5, 0x44ede173, 0x3c7fe8d0, 0x8c284c71,
+ .word 0x3ff3dea6, 0x4c123422, 0x3c8ada09, 0x11f09ebc,
+ .word 0x3ff3ec70, 0xdf1c5175, 0xbc8af663, 0x7b8c9bca,
+ .word 0x3ff3fa45, 0x04ac801c, 0xbc97d023, 0xf956f9f3,
+ .word 0x3ff40822, 0xc367a024, 0x3c8bddf8, 0xb6f4d048,
+ .word 0x3ff4160a, 0x21f72e2a, 0xbc5ef369, 0x1c309278,
+ .word 0x3ff423fb, 0x2709468a, 0xbc98462d, 0xc0b314dd,
+ .word 0x3ff431f5, 0xd950a897, 0xbc81c7dd, 0xe35f7999,
+ .word 0x3ff43ffa, 0x3f84b9d4, 0x3c8880be, 0x9704c003,
+ .word 0x3ff44e08, 0x6061892d, 0x3c489b7a, 0x04ef80d0,
+ .word 0x3ff45c20, 0x42a7d232, 0xbc686419, 0x82fb1f8e,
+ .word 0x3ff46a41, 0xed1d0057, 0x3c9c944b, 0xd1648a76,
+ .word 0x3ff4786d, 0x668b3237, 0xbc9c20f0, 0xed445733,
+ .word 0x3ff486a2, 0xb5c13cd0, 0x3c73c1a3, 0xb69062f0,
+ .word 0x3ff494e1, 0xe192aed2, 0xbc83b289, 0x5e499ea0,
+ .word 0x3ff4a32a, 0xf0d7d3de, 0x3c99cb62, 0xf3d1be56,
+ .word 0x3ff4b17d, 0xea6db7d7, 0xbc8125b8, 0x7f2897f0,
+ .word 0x3ff4bfda, 0xd5362a27, 0x3c7d4397, 0xafec42e2,
+ .word 0x3ff4ce41, 0xb817c114, 0x3c905e29, 0x690abd5d,
+ .word 0x3ff4dcb2, 0x99fddd0d, 0x3c98ecdb, 0xbc6a7833,
+ .word 0x3ff4eb2d, 0x81d8abff, 0xbc95257d, 0x2e5d7a52,
+ .word 0x3ff4f9b2, 0x769d2ca7, 0xbc94b309, 0xd25957e3,
+ .word 0x3ff50841, 0x7f4531ee, 0x3c7a249b, 0x49b7465f,
+ .word 0x3ff516da, 0xa2cf6642, 0xbc8f7685, 0x69bd93ef,
+ .word 0x3ff5257d, 0xe83f4eef, 0xbc7c998d, 0x43efef71,
+ .word 0x3ff5342b, 0x569d4f82, 0xbc807abe, 0x1db13cad,
+ .word 0x3ff542e2, 0xf4f6ad27, 0x3c87926d, 0x192d5f7e,
+ .word 0x3ff551a4, 0xca5d920f, 0xbc8d689c, 0xefede59b,
+ .word 0x3ff56070, 0xdde910d2, 0xbc90fb6e, 0x168eebf0,
+ .word 0x3ff56f47, 0x36b527da, 0x3c99bb2c, 0x011d93ad,
+ .word 0x3ff57e27, 0xdbe2c4cf, 0xbc90b98c, 0x8a57b9c4,
+ .word 0x3ff58d12, 0xd497c7fd, 0x3c8295e1, 0x5b9a1de8,
+ .word 0x3ff59c08, 0x27ff07cc, 0xbc97e2ce, 0xe467e60f,
+ .word 0x3ff5ab07, 0xdd485429, 0x3c96324c, 0x054647ad,
+ .word 0x3ff5ba11, 0xfba87a03, 0xbc9b77a1, 0x4c233e1a,
+ .word 0x3ff5c926, 0x8a5946b7, 0x3c3c4b1b, 0x816986a2,
+ .word 0x3ff5d845, 0x90998b93, 0xbc9cd6a7, 0xa8b45643,
+ .word 0x3ff5e76f, 0x15ad2148, 0x3c9ba6f9, 0x3080e65e,
+ .word 0x3ff5f6a3, 0x20dceb71, 0xbc89eadd, 0xe3cdcf92,
+ .word 0x3ff605e1, 0xb976dc09, 0xbc93e242, 0x9b56de47,
+ .word 0x3ff6152a, 0xe6cdf6f4, 0x3c9e4b3e, 0x4ab84c27,
+ .word 0x3ff6247e, 0xb03a5585, 0xbc9383c1, 0x7e40b497,
+ .word 0x3ff633dd, 0x1d1929fd, 0x3c984710, 0xbeb964e5,
+ .word 0x3ff64346, 0x34ccc320, 0xbc8c483c, 0x759d8933,
+ .word 0x3ff652b9, 0xfebc8fb7, 0xbc9ae3d5, 0xc9a73e09,
+ .word 0x3ff66238, 0x82552225, 0xbc9bb609, 0x87591c34,
+ .word 0x3ff671c1, 0xc70833f6, 0xbc8e8732, 0x586c6134,
+ .word 0x3ff68155, 0xd44ca973, 0x3c6038ae, 0x44f73e65,
+ .word 0x3ff690f4, 0xb19e9538, 0x3c8804bd, 0x9aeb445d,
+ .word 0x3ff6a09e, 0x667f3bcd, 0xbc9bdd34, 0x13b26456,
+ .word 0x3ff6b052, 0xfa75173e, 0x3c7a38f5, 0x2c9a9d0e,
+ .word 0x3ff6c012, 0x750bdabf, 0xbc728956, 0x67ff0b0d,
+ .word 0x3ff6cfdc, 0xddd47645, 0x3c9c7aa9, 0xb6f17309,
+ .word 0x3ff6dfb2, 0x3c651a2f, 0xbc6bbe3a, 0x683c88ab,
+ .word 0x3ff6ef92, 0x98593ae5, 0xbc90b974, 0x9e1ac8b2,
+ .word 0x3ff6ff7d, 0xf9519484, 0xbc883c0f, 0x25860ef6,
+ .word 0x3ff70f74, 0x66f42e87, 0x3c59d644, 0xd45aa65f,
+ .word 0x3ff71f75, 0xe8ec5f74, 0xbc816e47, 0x86887a99,
+ .word 0x3ff72f82, 0x86ead08a, 0xbc920aa0, 0x2cd62c72,
+ .word 0x3ff73f9a, 0x48a58174, 0xbc90a8d9, 0x6c65d53c,
+ .word 0x3ff74fbd, 0x35d7cbfd, 0x3c9047fd, 0x618a6e1c,
+ .word 0x3ff75feb, 0x564267c9, 0xbc902459, 0x57316dd3,
+ .word 0x3ff77024, 0xb1ab6e09, 0x3c9b7877, 0x169147f8,
+ .word 0x3ff78069, 0x4fde5d3f, 0x3c9866b8, 0x0a02162d,
+ .word 0x3ff790b9, 0x38ac1cf6, 0x3c9349a8, 0x62aadd3e,
+ .word 0x3ff7a114, 0x73eb0187, 0xbc841577, 0xee04992f,
+ .word 0x3ff7b17b, 0x0976cfdb, 0xbc9bebb5, 0x8468dc88,
+ .word 0x3ff7c1ed, 0x0130c132, 0x3c9f124c, 0xd1164dd6,
+ .word 0x3ff7d26a, 0x62ff86f0, 0x3c91bddb, 0xfb72b8b4,
+ .word 0x3ff7e2f3, 0x36cf4e62, 0x3c705d02, 0xba15797e,
+ .word 0x3ff7f387, 0x8491c491, 0xbc807f11, 0xcf9311ae,
+ .word 0x3ff80427, 0x543e1a12, 0xbc927c86, 0x626d972b,
+ .word 0x3ff814d2, 0xadd106d9, 0x3c946437, 0x0d151d4d,
+ .word 0x3ff82589, 0x994cce13, 0xbc9d4c1d, 0xd41532d8,
+ .word 0x3ff8364c, 0x1eb941f7, 0x3c999b9a, 0x31df2bd5,
+ .word 0x3ff8471a, 0x4623c7ad, 0xbc88d684, 0xa341cdfb,
+ .word 0x3ff857f4, 0x179f5b21, 0xbc5ba748, 0xf8b216d0,
+ .word 0x3ff868d9, 0x9b4492ed, 0xbc9fc6f8, 0x9bd4f6ba,
+ .word 0x3ff879ca, 0xd931a436, 0x3c85d2d7, 0xd2db47bd,
+ .word 0x3ff88ac7, 0xd98a6699, 0x3c9994c2, 0xf37cb53a,
+ .word 0x3ff89bd0, 0xa478580f, 0x3c9d5395, 0x4475202a,
+ .word 0x3ff8ace5, 0x422aa0db, 0x3c96e9f1, 0x56864b27,
+ .word 0x3ff8be05, 0xbad61778, 0x3c9ecb5e, 0xfc43446e,
+ .word 0x3ff8cf32, 0x16b5448c, 0xbc70d55e, 0x32e9e3aa,
+ .word 0x3ff8e06a, 0x5e0866d9, 0xbc97114a, 0x6fc9b2e6,
+ .word 0x3ff8f1ae, 0x99157736, 0x3c85cc13, 0xa2e3976c,
+ .word 0x3ff902fe, 0xd0282c8a, 0x3c9592ca, 0x85fe3fd2,
+ .word 0x3ff9145b, 0x0b91ffc6, 0xbc9dd679, 0x2e582524,
+ .word 0x3ff925c3, 0x53aa2fe2, 0xbc83455f, 0xa639db7f,
+ .word 0x3ff93737, 0xb0cdc5e5, 0xbc675fc7, 0x81b57ebc,
+ .word 0x3ff948b8, 0x2b5f98e5, 0xbc8dc3d6, 0x797d2d99,
+ .word 0x3ff95a44, 0xcbc8520f, 0xbc764b7c, 0x96a5f039,
+ .word 0x3ff96bdd, 0x9a7670b3, 0xbc5ba596, 0x7f19c896,
+ .word 0x3ff97d82, 0x9fde4e50, 0xbc9d185b, 0x7c1b85d1,
+ .word 0x3ff98f33, 0xe47a22a2, 0x3c7cabda, 0xa24c78ec,
+ .word 0x3ff9a0f1, 0x70ca07ba, 0xbc9173bd, 0x91cee632,
+ .word 0x3ff9b2bb, 0x4d53fe0d, 0xbc9dd84e, 0x4df6d518,
+ .word 0x3ff9c491, 0x82a3f090, 0x3c7c7c46, 0xb071f2be,
+ .word 0x3ff9d674, 0x194bb8d5, 0xbc9516be, 0xa3dd8233,
+ .word 0x3ff9e863, 0x19e32323, 0x3c7824ca, 0x78e64c6e,
+ .word 0x3ff9fa5e, 0x8d07f29e, 0xbc84a9ce, 0xaaf1face,
+ .word 0x3ffa0c66, 0x7b5de565, 0xbc935949, 0x5d1cd533,
+ .word 0x3ffa1e7a, 0xed8eb8bb, 0x3c9c6618, 0xee8be70e,
+ .word 0x3ffa309b, 0xec4a2d33, 0x3c96305c, 0x7ddc36ab,
+ .word 0x3ffa42c9, 0x80460ad8, 0xbc9aa780, 0x589fb120,
+ .word 0x3ffa5503, 0xb23e255d, 0xbc9d2f6e, 0xdb8d41e1,
+ .word 0x3ffa674a, 0x8af46052, 0x3c650f56, 0x30670366,
+ .word 0x3ffa799e, 0x1330b358, 0x3c9bcb7e, 0xcac563c7,
+ .word 0x3ffa8bfe, 0x53c12e59, 0xbc94f867, 0xb2ba15a9,
+ .word 0x3ffa9e6b, 0x5579fdbf, 0x3c90fac9, 0x0ef7fd31,
+ .word 0x3ffab0e5, 0x21356eba, 0x3c889c31, 0xdae94545,
+ .word 0x3ffac36b, 0xbfd3f37a, 0xbc8f9234, 0xcae76cd0,
+ .word 0x3ffad5ff, 0x3a3c2774, 0x3c97ef3b, 0xb6b1b8e5,
+ .word 0x3ffae89f, 0x995ad3ad, 0x3c97a1cd, 0x345dcc81,
+ .word 0x3ffafb4c, 0xe622f2ff, 0xbc94b2fc, 0x0f315ecd,
+ .word 0x3ffb0e07, 0x298db666, 0xbc9bdef5, 0x4c80e425,
+ .word 0x3ffb20ce, 0x6c9a8952, 0x3c94dd02, 0x4a0756cc,
+ .word 0x3ffb33a2, 0xb84f15fb, 0xbc62805e, 0x3084d708,
+ .word 0x3ffb4684, 0x15b749b1, 0xbc7f763d, 0xe9df7c90,
+ .word 0x3ffb5972, 0x8de5593a, 0xbc9c71df, 0xbbba6de3,
+ .word 0x3ffb6c6e, 0x29f1c52a, 0x3c92a8f3, 0x52883f6e,
+ .word 0x3ffb7f76, 0xf2fb5e47, 0xbc75584f, 0x7e54ac3b,
+ .word 0x3ffb928c, 0xf22749e4, 0xbc9b7216, 0x54cb65c6,
+ .word 0x3ffba5b0, 0x30a1064a, 0xbc9efcd3, 0x0e54292e,
+ .word 0x3ffbb8e0, 0xb79a6f1f, 0xbc3f52d1, 0xc9696205,
+ .word 0x3ffbcc1e, 0x904bc1d2, 0x3c823dd0, 0x7a2d9e84,
+ .word 0x3ffbdf69, 0xc3f3a207, 0xbc3c2623, 0x60ea5b52,
+ .word 0x3ffbf2c2, 0x5bd71e09, 0xbc9efdca, 0x3f6b9c73,
+ .word 0x3ffc0628, 0x6141b33d, 0xbc8d8a5a, 0xa1fbca34,
+ .word 0x3ffc199b, 0xdd85529c, 0x3c811065, 0x895048dd,
+ .word 0x3ffc2d1c, 0xd9fa652c, 0xbc96e516, 0x17c8a5d7,
+ .word 0x3ffc40ab, 0x5fffd07a, 0x3c9b4537, 0xe083c60a,
+ .word 0x3ffc5447, 0x78fafb22, 0x3c912f07, 0x2493b5af,
+ .word 0x3ffc67f1, 0x2e57d14b, 0x3c92884d, 0xff483cad,
+ .word 0x3ffc7ba8, 0x8988c933, 0xbc8e76bb, 0xbe255559,
+ .word 0x3ffc8f6d, 0x9406e7b5, 0x3c71acbc, 0x48805c44,
+ .word 0x3ffca340, 0x5751c4db, 0xbc87f2be, 0xd10d08f5,
+ .word 0x3ffcb720, 0xdcef9069, 0x3c7503cb, 0xd1e949db,
+ .word 0x3ffccb0f, 0x2e6d1675, 0xbc7d220f, 0x86009092,
+ .word 0x3ffcdf0b, 0x555dc3fa, 0xbc8dd83b, 0x53829d72,
+ .word 0x3ffcf315, 0x5b5bab74, 0xbc9a08e9, 0xb86dff57,
+ .word 0x3ffd072d, 0x4a07897c, 0xbc9cbc37, 0x43797a9c,
+ .word 0x3ffd1b53, 0x2b08c968, 0x3c955636, 0x219a36ee,
+ .word 0x3ffd2f87, 0x080d89f2, 0xbc9d487b, 0x719d8578,
+ .word 0x3ffd43c8, 0xeacaa1d6, 0x3c93db53, 0xbf5a1614,
+ .word 0x3ffd5818, 0xdcfba487, 0x3c82ed02, 0xd75b3707,
+ .word 0x3ffd6c76, 0xe862e6d3, 0x3c5fe87a, 0x4a8165a0,
+ .word 0x3ffd80e3, 0x16c98398, 0xbc911ec1, 0x8beddfe8,
+ .word 0x3ffd955d, 0x71ff6075, 0x3c9a052d, 0xbb9af6be,
+ .word 0x3ffda9e6, 0x03db3285, 0x3c9c2300, 0x696db532,
+ .word 0x3ffdbe7c, 0xd63a8315, 0xbc9b76f1, 0x926b8be4,
+ .word 0x3ffdd321, 0xf301b460, 0x3c92da57, 0x78f018c3,
+ .word 0x3ffde7d5, 0x641c0658, 0xbc9ca552, 0x8e79ba8f,
+ .word 0x3ffdfc97, 0x337b9b5f, 0xbc91a5cd, 0x4f184b5c,
+ .word 0x3ffe1167, 0x6b197d17, 0xbc72b529, 0xbd5c7f44,
+ .word 0x3ffe2646, 0x14f5a129, 0xbc97b627, 0x817a1496,
+ .word 0x3ffe3b33, 0x3b16ee12, 0xbc99f4a4, 0x31fdc68b,
+ .word 0x3ffe502e, 0xe78b3ff6, 0x3c839e89, 0x80a9cc8f,
+ .word 0x3ffe6539, 0x24676d76, 0xbc863ff8, 0x7522b735,
+ .word 0x3ffe7a51, 0xfbc74c83, 0x3c92d522, 0xca0c8de2,
+ .word 0x3ffe8f79, 0x77cdb740, 0xbc910894, 0x80b054b1,
+ .word 0x3ffea4af, 0xa2a490da, 0xbc9e9c23, 0x179c2893,
+ .word 0x3ffeb9f4, 0x867cca6e, 0x3c94832f, 0x2293e4f2,
+ .word 0x3ffecf48, 0x2d8e67f1, 0xbc9c93f3, 0xb411ad8c,
+ .word 0x3ffee4aa, 0xa2188510, 0x3c91c68d, 0xa487568d,
+ .word 0x3ffefa1b, 0xee615a27, 0x3c9dc7f4, 0x86a4b6b0,
+ .word 0x3fff0f9c, 0x1cb6412a, 0xbc932200, 0x65181d45,
+ .word 0x3fff252b, 0x376bba97, 0x3c93a1a5, 0xbf0d8e43,
+ .word 0x3fff3ac9, 0x48dd7274, 0xbc795a5a, 0x3ed837de,
+ .word 0x3fff5076, 0x5b6e4540, 0x3c99d3e1, 0x2dd8a18b,
+ .word 0x3fff6632, 0x798844f8, 0x3c9fa37b, 0x3539343e,
+ .word 0x3fff7bfd, 0xad9cbe14, 0xbc9dbb12, 0xd006350a,
+ .word 0x3fff91d8, 0x02243c89, 0xbc612ea8, 0xa779f689,
+ .word 0x3fffa7c1, 0x819e90d8, 0x3c874853, 0xf3a5931e,
+ .word 0x3fffbdba, 0x3692d514, 0xbc796773, 0x15098eb6,
+ .word 0x3fffd3c2, 0x2b8f71f1, 0x3c62eb74, 0x966579e7,
+ .word 0x3fffe9d9, 0x6b2a23d9, 0x3c74a603, 0x7442fde3,
+!
+ .word 0x3c900000, 0x00000000, ! 2**(-54) = 5.551115123125782702e-17
+ .word 0x3ff00000, 0x00000000, ! DONE = 1.0
+ .word 0x43300000, 0x00000000, ! DVAIN52 = 2**52 = 4.503599627370496e15
+ .word 0xffffffff, 0x00000000, ! MHI32 = 0xffffffff00000000
+ .word 0x4062776d, 0x8ce329bd, ! KA5 = (5.77078604860893737986e-01*256)
+ .word 0x406ec709, 0xdc39fc99, ! KA3 = (9.61796693925765549423e-01*256)
+ .word 0x40871547, 0x652b82fe, ! KA1 = (2.885390081777926774e+00*256)
+ .word 0x41100000, 0x00000000, ! HTHRESH = 262144.0
+ .word 0xc110cc00, 0x00000000, ! LTHRESH = -275200.0
+ .word 0x3d83b2ab, 0xc07c93d0, ! KB4 = 2.23939573811855104311e-12
+ .word 0x000fffff, 0xffffffff, ! MMANT
+ .word 0x00000800, 0x00000000, ! MROUND
+ .word 0xfffff000, 0x00000000, ! MHI20
+
+! local storage indices
+#define tmp0_lo STACK_BIAS-4
+#define tmp0_hi STACK_BIAS-8
+#define tmp1_lo STACK_BIAS-12
+#define tmp1_hi STACK_BIAS-16
+#define tmp2_lo STACK_BIAS-20
+#define tmp2_hi STACK_BIAS-24
+#define tmp3 STACK_BIAS-28
+#define tmp4 STACK_BIAS-32
+#define ind_buf STACK_BIAS-48
+#define tmp_counter STACK_BIAS-56
+#define tmp_px STACK_BIAS-64
+#define tmp_py STACK_BIAS-72
+#define tmp_mant STACK_BIAS-80
+#define tmp5 STACK_BIAS-88
+#define tmp6 STACK_BIAS-96
+
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 96
+
+#define LOGTBL %g5
+#define EXPTBL %g1
+#define EXPTBL_P8 %l4
+
+#define MASK_0x7fffffff %o4
+#define MASK_0x000fffff %o3
+#define MASK_0x3ff00000 %o1
+
+#define counter %i0
+#define px %i1
+#define stridex %l5
+#define py %i3
+#define stridey %l6
+#define pz %i5
+#define stridez %l7
+
+#define HTHRESH %f0
+#define LTHRESH %f2
+
+#define MHI32 %f38
+#define KA1_LO %f40
+#define KA1_HI %f40
+
+#define KB1 %f42
+#define KB2 %f42
+#define KB3 %f42
+#define KB4 %f44
+#define KB5 %f42
+
+#define KA1 %f46
+#define KA3 %f28
+#define KA5 %f50
+
+#define DZERO %f24
+#define DZERO_HI %f24
+#define DZERO_LO %f25
+#define DONE %f18
+#define DONE_HI %f18
+#define DONE_LO %f19
+
+#define XKB1 %f42
+#define XKB2 %f40
+#define XKB3 %f32
+#define XKB4 %f36
+#define XKB5 %f34
+
+#define s_h %f46
+#define yr %f30
+
+#define ind_TINY 64
+#define ind_HUGE 56
+#define ind_LO 48
+#define ind_HI 40
+#define ind_KB5 32
+#define ind_KB3 24
+#define ind_KB2 16
+#define ind_KB1 8
+
+!--------------------------------------------------------------------
+! !!!!! vpow algorithm !!!!!
+!
+! hx = ((unsigned*)px)[0];
+! lx = ((unsigned*)px)[1];
+! hy = ((unsigned*)py)[0];
+! ly = ((unsigned*)py)[1];
+! sx = hx >> 31;
+! sy = hy >> 31;
+! hx &= 0x7fffffff;
+! hy &= 0x7fffffff;
+! y0 = *px;
+!
+! if (hy < 0x3bf00000) { /* |Y| < 2^(-64) */
+! if ((hy | ly) == 0) { /* pow(X,0) */
+! *pz = DONE;
+! goto next;
+! }
+! if (hx > 0x7ff00000 || (hx == 0x7ff00000 && lx != 0)) { /* |X| = Nan */
+! *pz = y0 * y0;
+! goto next;
+! }
+! else if ((hx | lx) == 0 || (hx == 0x7ff00000 && lx == 0)) { /* X = 0 or Inf */
+! ((int*)pz)[0] = hx;
+! ((int*)pz)[1] = lx;
+! if (sy) *pz = DONE / *pz;
+! goto next;
+! }
+! else *pz = (sx) ? DZERO / DZERO : DONE;
+! goto next;
+! }
+! yisint = 0; /* Y - non-integer */
+! expy = hy >> 20; /* Y exponent */
+!
+! if (hx >= 0x7ff00000 || expy >= 0x43e) { /* X=Inf,Nan or |Y|>2^63,Inf,Nan */
+! if (hx > 0x7ff00000 || (hx == 0x7ff00000 && lx != 0) ||
+! hy > 0x7ff00000 || (hy == 0x7ff00000 && ly != 0))
+! *pz = y0 * *py; /* |X| or |Y| = Nan */
+! goto next;
+! if (hy == 0x7ff00000 && (ly == 0)) { /* |Y| = Inf */
+! if (hx == 0x3ff00000 && (lx == 0))
+! *pz = *py - *py; /* +-1 ** +-Inf */
+! else if ((hx < 0x3ff00000) != sy)
+! *pz = DZERO;
+! else {
+! ((int*)pz)[0] = hy;
+! ((int*)pz)[1] = ly;
+! }
+! goto next;
+! }
+! if (expy < 0x43e) { /* |Y| < 2^63 */
+! if (sx) { /* X = -Inf */
+! if (expy >= 0x434) /* |Y| >= 2^53 */
+! yisint##I = 2; /* Y - even */
+! else {
+! if (expy >= 0x3ff) { /* |Y| >= 1 */
+! if (expy > (20 + 0x3ff)) {
+! i0 = ly >> (52 - (expy - 0x3ff));
+! if ((i0 << (52 - (expy - 0x3ff))) == ly) yisint = 2 - (i0 & 1);
+! }
+! else if (ly == 0) {
+! i0 = hy >> (20 - (expy - 0x3ff));
+! if ((i0 << (20 - (expy - 0x3ff))) == hy) yisint = 2 - (i0 & 1);
+! }
+! }
+! }
+! }
+! if (sy) hx = lx = 0;
+! hx += yisint << 31;
+! ((int*)pz)[0] = hx;
+! ((int*)pz)[1] = lx;
+! goto next;
+! }
+! else { /* |Y| >= 2^63 */
+! if (lx == 0 && /* |X| = 0, 1, Inf */
+! (hx == 0 || hx == 0x3ff00000 || hx == 0x7ff00000)) {
+! ((int*)pz)[0] = hx;
+! ((int*)pz)[1] = lx;
+! if (sy) *pz = DONE / *pz;
+! }
+! else {
+! y0 = ((hx < 0x3ff00000) != sy) ? _TINY : _HUGE;
+! *pz = y0 * y0;
+! }
+! goto next;
+! }
+! }
+! if (sx || (hx | lx) == 0) { /* X <= 0 */
+! if (expy >= 0x434) /* |Y| >= 2^53 */
+! yisint = 2; /* Y - even */
+! else {
+! if (expy >= 0x3ff) { /* |Y| >= 1 */
+! if (expy > (20 + 0x3ff)) {
+! i0 = ly >> (52 - (expy - 0x3ff));
+! if ((i0 << (52 - (expy - 0x3ff))) == ly) yisint = 2 - (i0 & 1);
+! }
+! else if (ly == 0) {
+! i0 = hy >> (20 - (expy - 0x3ff));
+! if ((i0 << (20 - (expy - 0x3ff))) == hy) yisint = 2 - (i0 & 1);
+! }
+! }
+! }
+! if ((hx | lx) == 0) { /* X == 0 */
+! y0 = DZERO;
+! if (sy) y0 = DONE / y0;
+! if (sx & yisint) y0 = -y0;
+! *pz = y0;
+! goto next;
+! }
+! if (yisint == 0) { /* pow(neg,non-integer) */
+! *pz = DZERO / DZERO; /* NaN */
+! goto next;
+! }
+! }
+!
+! *((int*)&x + 1) = ((unsigned*)px)[1];
+! *((int*)&ax + 1) = 0;
+! exp = hx;
+! hx &= 0xfffff;
+! hx |= 0x3ff00000;
+! *(int*)&x = hx;
+! hx += 0x800;
+! hx &= 0xfffff000;
+! *(int*)&ax = hx;
+! if (exp <= 0xfffff) {
+! y0 = vis_fand(x, MMANT);
+! ax = (double) ((long long *) & y0)[0];
+! x = vis_fand(ax, MMANT);
+! x = vis_for(x, DONE);
+! exp = ((unsigned int*) & ax)[0];
+! exp -= (1023 + 51) << 20;
+! hx = exp & 0xfffff;
+! hx |= 0x3ff00000;
+! hx += 0x800;
+! *(int*)&ax = hx;
+! }
+! exp = (exp >> 20);
+! exp = exp - 2046;
+! ux = x + ax;
+! yd = DONE / ux;
+! u = x - ax;
+! s = u * yd;
+! ux = vis_fand(ux, MHI32);
+! y = s * s;
+! s_h = vis_fand(s, MHI32);
+! dtmp8 = KA5 * y;
+! dtmp8 = dtmp8 + KA3;
+! dtmp8 = dtmp8 * y;
+! s = dtmp8 * s;
+! dtmp0 = (ux - ax);
+! s_l = (x - dtmp0);
+! dtmp0 = s_h * ux;
+! dtmp1 = s_h * s_l;
+! s_l = u - dtmp0;
+! s_l -= dtmp1;
+! dtmp0 = KA1 * yd;
+! s_l = dtmp0 * s_l;
+! i = (hx >> 8);
+! i = i & 0xff0;
+! itmp0 = (hx >> 20);
+! exp += itmp0;
+! yd = KA1_HI * s_h;
+! y = *(double *)((char*)__mt_constlog2 + i);
+! itmp0 = exp << 8;
+! y += (double)itmp0;
+! m_h = y + yd;
+! dtmp2 = m_h - y;
+! dtmp2 -= yd;
+! dtmp2 -= s_l;
+! y = s - dtmp2;
+! dtmp0 = *(double *)((char*)__mt_constlog2 + i + 8);
+! dtmp1 = KA1_LO * s_h;
+! dtmp0 += dtmp1;
+! y += dtmp0;
+! dtmp0 = y + m_h;
+! s_h = vis_fand(dtmp0, MHI32);
+! dtmp0 = (s_h - m_h);
+! y = y - dtmp0;
+! yd = *py;
+! s = vis_fand(yd, MHI32);
+! dtmp0 = (yd - s);
+! dtmp1 = yd * y;
+! dtmp0 *= s_h;
+! yd = dtmp0 + dtmp1;
+! s = s_h * s;
+! if (s > HTHRESH) {s = HTHRESH; yd = DZERO;}
+! if (s < LTHRESH) {s = LTHRESH; yd = DZERO;}
+! dtmp0 = (s + yd);
+! ind = (int)dtmp0;
+! i = ind & 0xff;
+! i = i << 4;
+! u = (double)(int)dtmp0;
+! ind >>= 8;
+! y = s - u;
+! y = y + yd;
+! u = *(double*)((char*)__mt_constexp2 + i);
+! dtmp0 = KB5 * y;
+! dtmp1 = dtmp0 + KB4;
+! dtmp2 = dtmp1 * y;
+! dtmp3 = dtmp2 + KB3;
+! dtmp4 = dtmp3 * y;
+! dtmp5 = dtmp4 + KB2;
+! dtmp6 = dtmp5 * y;
+! dtmp7 = dtmp6 + KB1;
+! y = dtmp7 * y;
+! eflag = (ind + 1021);
+! eflag = eflag >> 31;
+! gflag = (1022 - ind);
+! gflag = gflag >> 31;
+! dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8);
+! dtmp1 = u * y;
+! dtmp2 = dtmp0 + dtmp1;
+! u = dtmp2 + u;
+! ind = yisint + ind;
+! itmp0 = 54 & eflag;
+! itmp1 = 52 & gflag;
+! ind = ind + itmp0;
+! ind = ind - itmp1;
+! ind <<= 20;
+! *(int*)&dtmp0 = ind;
+! *((int*)&dtmp0 + 1) = 0;
+! u = vis_fpadd32(u, dtmp0);
+! ind = eflag - gflag;
+! ind += 1;
+! ind *= 8;
+! dtmp1 = (*(double*)((char*)lconst + ind);
+! dtmp1 = u * dtmp1;
+! *pz = dtmp1;
+!--------------------------------------------------------------------
+! !!!!! vpowx algorithm !!!!! (x > 0 and x != Inf, NaN)
+!
+! /* perform s_h + yr = 256*log2(x) */
+!
+! exp = ((unsigned*)px)[0];
+! y0 = px[0];
+! if (exp <= 0xfffff) {
+! y0 = (double) ((long long *) & y0)[0];
+! exp = ((unsigned int*) & y0)[0];
+! exp -= (1023 + 51) << 20;
+! }
+! x = vis_fand(y0, MMANT);
+! x = vis_for(x, DONE);
+! ax = vis_fpadd32(x, MROUND);
+! ax = vis_fand(ax, MHI20);
+! hx = *(int*)&ax;
+! exp = (exp >> 20);
+! exp = exp - 2046;
+! ux = x + ax;
+! yd = DONE / ux;
+! u = x - ax;
+! s = u * yd;
+! ux = vis_fand(ux, MHI32);
+! y = s * s;
+! s_h = vis_fand(s, MHI32);
+! dtmp8 = KA5 * y;
+! dtmp8 = dtmp8 + KA3;
+! dtmp8 = dtmp8 * y;
+! s = dtmp8 * s;
+! dtmp0 = (ux - ax);
+! s_l = (x - dtmp0);
+! dtmp0 = s_h * ux;
+! dtmp1 = s_h * s_l;
+! s_l = u - dtmp0;
+! s_l -= dtmp1;
+! dtmp0 = KA1 * yd;
+! s_l = dtmp0 * s_l;
+! i = (hx >> 8);
+! i = i & 0xff0;
+! itmp0 = (hx >> 20);
+! exp += itmp0;
+! yd = KA1_HI * s_h;
+! y = *(double *)((char*)__mt_constlog2 + i);
+! itmp0 = exp << 8;
+! y += (double)itmp0;
+! m_h = y + yd;
+! dtmp2 = m_h - y;
+! dtmp2 -= yd;
+! dtmp2 -= s_l;
+! y = s - dtmp2;
+! dtmp0 = *(double *)((char*)__mt_constlog2 + i + 8);
+! dtmp1 = KA1_LO * s_h;
+! dtmp0 += dtmp1;
+! y += dtmp0;
+! dtmp0 = y + m_h;
+! s_h = vis_fand(dtmp0, MHI32);
+! dtmp0 = (s_h - m_h);
+! yr = y - dtmp0;
+!
+! hy = ((unsigned*)py)[0];
+! ly = ((unsigned*)py)[1];
+! hx = ((unsigned*)px)[0];
+! lx = ((unsigned*)px)[1];
+! sy = hy >> 31;
+! hy &= 0x7fffffff;
+!
+! if (hy < 0x3bf00000) {/* |Y| < 2^(-64) */
+! *pz = DONE;
+! goto next;
+! }
+!
+! if (hy >= 0x43e00000) { /* |Y|>2^63,Inf,Nan */
+! if (hy == 0x7ff00000 && (ly == 0)) { /* |Y| = Inf */
+! if (hx == 0x3ff00000 && (lx == 0))
+! *pz = *py - *py; /* 1 ** +-Inf */
+! else if ((hx < 0x3ff00000) != sy)
+! *pz = DZERO;
+! else {
+! ((int*)pz)[0] = hy;
+! ((int*)pz)[1] = ly;
+! }
+! goto next;
+! }
+! if (hy >= 0x7ff00000) {
+! *pz = *px + *py; /* |Y| = Nan */
+! goto next;
+! }
+! /* |Y| >= 2^63 */
+! if (lx == 0 && (hx == 0x3ff00000)) { /* X = 1 */
+! *pz = DONE;
+! }
+! else {
+! y0 = ((hx < 0x3ff00000) != sy) ? _TINY : _HUGE;
+! *pz = y0 * y0;
+! }
+! goto next;
+! }
+!
+! yd = *py;
+! s = vis_fand(yd, MHI32);
+! dtmp0 = (yd - s);
+! dtmp1 = yd * yr;
+! dtmp0 *= s_h;
+! yd = dtmp0 + dtmp1;
+! s = s_h * s;
+! if (s > HTHRESH) {s = HTHRESH; yd = DZERO;}
+! if (s < LTHRESH) {s = LTHRESH; yd = DZERO;}
+! dtmp0 = (s + yd);
+! ind = (int)dtmp0;
+! i = ind & 0xff;
+! i = i << 4;
+! u = (double)(int)dtmp0;
+! ind >>= 8;
+! y = s - u;
+! y = y + yd;
+! u = *(double*)((char*)__mt_constexp2 + i);
+! dtmp0 = XKB5 * y;
+! dtmp1 = dtmp0 + XKB4;
+! dtmp2 = dtmp1 * y;
+! dtmp3 = dtmp2 + XKB3;
+! dtmp4 = dtmp3 * y;
+! dtmp5 = dtmp4 + XKB2;
+! dtmp6 = dtmp5 * y;
+! dtmp7 = dtmp6 + XKB1;
+! y = dtmp7 * y;
+! eflag = (ind + 1021);
+! eflag = eflag >> 31;
+! gflag = (1022 - ind);
+! gflag = gflag >> 31;
+! dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8);
+! dtmp1 = u * y;
+! dtmp2 = dtmp0 + dtmp1;
+! u = dtmp2 + u;
+! itmp0 = 54 & eflag;
+! itmp1 = 52 & gflag;
+! ind = ind + itmp0;
+! ind = ind - itmp1;
+! ind <<= 20;
+! *(int*)&dtmp0 = ind;
+! *((int*)&dtmp0 + 1) = 0;
+! u = vis_fpadd32(u, dtmp0);
+! ind = eflag - gflag;
+! ind += 1;
+! ind *= 8;
+! dtmp1 = (*(double*)((char*)__mt_constexp2 + ind);
+! dtmp1 = u * dtmp1;
+! *pz = dtmp1;
+!--------------------------------------------------------------------
+
+ ENTRY(__vpow)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,.CONST_TBL,g5)
+ wr %g0,0x82,%asi ! set %asi for non-faulting loads
+
+ cmp counter,0
+ ble,pn %icc,.end
+
+#ifdef __sparcv9
+ ldx [%fp+STACK_BIAS+176],stridez
+#else
+ ld [%fp+STACK_BIAS+92],stridez
+#endif
+
+ ld [px],%o0
+ add LOGTBL,4095,EXPTBL
+ st counter,[%fp+tmp_counter]
+ add EXPTBL,65,EXPTBL
+ sra %i2,0,stridex
+ stx px,[%fp+tmp_px]
+ add EXPTBL,4095,%l0
+ fzero DZERO
+ stx py,[%fp+tmp_py]
+
+ cmp stridex,0
+ bne,pt %icc,.common_case
+ add %l0,1,%l0
+
+ cmp %o0,0
+ ble,pt %icc,.common_case
+ sethi %hi(0x7f800000),%o1
+
+ cmp %o0,%o1
+ bl,pn %icc,.stridex_zero
+ nop
+
+.common_case:
+ sra stridez,0,stridez
+ ldd [%l0+8],DONE
+ ldd [%l0+24],MHI32
+ sra %i4,0,stridey
+ ldd [%l0+32],KA5
+ sethi %hi(0x7ffffc00),MASK_0x7fffffff
+ ldd [%l0+40],KA3
+ sethi %hi(0xffc00),MASK_0x000fffff
+ ldd [%l0+48],KA1
+ sethi %hi(0x3ff00000),MASK_0x3ff00000
+ ldd [%l0+56],HTHRESH
+ sllx stridex,3,stridex
+ add MASK_0x7fffffff,0x3ff,MASK_0x7fffffff
+ ldd [%l0+64],LTHRESH
+ sllx stridey,3,stridey
+ add MASK_0x000fffff,0x3ff,MASK_0x000fffff
+ ldd [%l0+72],KB4
+ sllx stridez,3,stridez
+ st %g0,[%fp+tmp1_lo] ! *((int*)&ax + 1) = 0;
+ sub %g0,1,%o2
+ st %g0,[%fp+tmp2_lo] ! (Y0_0) *((int*)&dtmp0 + 1) = 0;
+ st MASK_0x000fffff,[%fp+tmp_mant]
+ sub pz,stridez,pz
+ st %o2,[%fp+tmp_mant+4]
+
+.begin:
+ ld [%fp+tmp_counter],counter
+ ldx [%fp+tmp_px],px
+ ldx [%fp+tmp_py],py
+ st %g0,[%fp+tmp_counter]
+.begin1:
+ subcc counter,1,counter
+ bneg,pn %icc,.end
+ or %g0,ind_buf,%o7
+
+ lda [py]%asi,%o2 ! (Y0_1) hy = *py;
+
+ and %o2,MASK_0x7fffffff,%l1 ! (Y0_3) hy &= 0x7fffffff;
+ lda [px]%asi,%l0 ! (Y0_3) hx = ((unsigned*)px)[0];
+
+ sra %l1,20,%o0 ! (Y0_3) expy = hy >> 20;
+ lda [px+4]%asi,%i2 ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1];
+
+ and MASK_0x000fffff,%l0,%o5 ! (Y0_3) hx &= 0xfffff;
+
+ or MASK_0x3ff00000,%o5,%o5 ! (Y0_3) hx |= 0x3ff00000;
+
+ st %o5,[%fp+tmp0_hi] ! (Y0_3) *(int*)&x = hx;
+
+ add %o5,2048,%o5 ! (Y0_3) hx += 0x800;
+
+ st %i2,[%fp+tmp0_lo] ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1];
+ and %o5,-4096,%l4 ! (Y0_3) hx &= 0xfffff000;
+
+ add pz,stridez,pz
+ st %l4,[%fp+tmp1_hi] ! (Y0_3) *(int*)&ax = hx;
+
+ and %l0,MASK_0x7fffffff,%l3 ! (Y0_3) hx &= 0x7fffffff;
+
+ sra %l3,20,%l2 ! (Y0_3) exp = (exp >> 20);
+
+ cmp %o0,959 ! (Y0_3) if (expy < 0x3fb);
+ bl,pn %icc,.spec0 ! (Y0_3) if (expy < 0x3fb);
+ st %g0,[%fp+%o7] ! (Y0_3) yisint = 0;
+
+ cmp %o0,1086 ! (Y0_3) if (expy >= 0x43e);
+ bge,pn %icc,.spec1 ! (Y0_3) if (expy >= 0x43e);
+ nop
+
+ cmp %l2,2047 ! (Y0_2) if (exp >= 0x7ff)
+ bge,pn %icc,.spec1 ! (Y0_2) if (exp >= 0x7ff)
+ nop
+
+ cmp %l0,MASK_0x000fffff ! (Y0_2) if (hx <= 0xfffff)
+
+ ldd [%fp+tmp0_hi],%f32 ! (Y0_2) *(int*)&x = hx;
+ ble,pn %icc,.update0 ! (Y0_2) if (hx <= 0xfffff)
+ nop
+.cont0:
+ sub %o7,ind_buf,%o7 ! stack buffer pointer update
+ sub pz,stridez,pz
+ ldd [%fp+tmp1_hi],%f54 ! (Y0_2) *(int*)&ax = hx;
+
+ add %o7,4,%o7 ! stack buffer pointer update
+ faddd %f32,%f54,%f12 ! (Y0_2) ux = x + ax;
+
+ and %o7,15,%o7 ! stack buffer pointer update
+
+ add %o7,ind_buf,%o7 ! stack buffer pointer update
+ add px,stridex,px ! px += stridex;
+
+ lda [px]%asi,%l0 ! (Y1_2) hx = ((unsigned*)px)[0];
+
+ lda [px+4]%asi,%i2 ! (Y1_2) *((int*)&x + 1) = ((unsigned*)px)[1];
+ and MASK_0x000fffff,%l0,%i4 ! (Y1_2) hx &= 0xfffff;
+
+ st %g0,[%fp+%o7] ! (Y1_2) yisint = 0;
+ or MASK_0x3ff00000,%i4,%i4 ! (Y1_2) hx |= 0x3ff00000;
+
+ st %i4,[%fp+tmp0_hi] ! (Y1_2) *(int*)&x = hx;
+ add %i4,2048,%i4 ! (Y1_2) hx += 0x800;
+
+ st %i2,[%fp+tmp0_lo] ! (Y1_2) *((int*)&x + 1) = ((unsigned*)px)[1];
+ and %i4,-4096,%i4 ! (Y1_2) hx &= 0xfffff000;
+
+ st %i4,[%fp+tmp1_hi] ! (Y1_2) *(int*)&ax = hx;
+ and %l0,MASK_0x7fffffff,%l2 ! (Y1_2) hx &= 0x7fffffff;
+ cmp %l0,MASK_0x000fffff ! (Y1_2) if (hx <= 0xfffff)
+
+ ble,pn %icc,.update1 ! (Y1_2) if (hx <= 0xfffff)
+ nop
+.cont1:
+ sub %o7,ind_buf,%o7 ! stack buffer pointer update
+
+ add %o7,4,%o7 ! stack buffer pointer update
+ fdivd DONE,%f12,%f20 ! (Y0_2) yd = DONE / ux;
+
+ and %o7,15,%o7 ! stack buffer pointer update
+
+ sra %l3,20,%l3 ! (Y0_2) exp = (exp >> 20);
+ add %o7,ind_buf,%o7 ! stack buffer pointer update
+ ldd [%fp+tmp0_hi],%f8 ! (Y1_2) *(int*)&x = hx;
+
+ ldd [%fp+tmp1_hi],%f14 ! (Y1_2) *(int*)&ax = hx;
+ sra %l4,20,%l0 ! (Y0_2) itmp0 = (hx >> 20);
+ sub %l3,2046,%o5 ! (Y0_2) exp = exp - 2046;
+
+ add %o5,%l0,%o5 ! (Y0_2) exp += itmp0;
+
+ sll %o5,8,%l0 ! (Y0_2) itmp0 = exp << 8;
+ st %l0,[%fp+tmp3] ! (Y0_2) (double)itmp0;
+ faddd %f8,%f14,%f26 ! (Y1_2) ux = x + ax;
+
+ fand %f12,MHI32,%f12 ! (Y0_2) ux = vis_fand(ux, MHI32);
+ add px,stridex,px ! px += stridex;
+
+ ldd [EXPTBL-ind_HI],KA1_HI ! (Y0_2) load KA1_HI;
+ fsubd %f12,%f54,%f10 ! (Y0_2) dtmp0 = (ux - ax);
+
+ ld [%fp+tmp3],%f16 ! (Y0_2) (double)itmp0;
+ fsubd %f32,%f54,%f58 ! (Y0_2) u = x - ax;
+
+ sra %l4,8,%l4 ! (Y0_2) i = (hx >> 8);
+
+ and %l4,4080,%l4 ! (Y0_2) i = i & 0xff0;
+
+ ldd [LOGTBL+%l4],%f62 ! (Y0_2) y = *(double *)((char*)__mt_constlog2 + i);
+ fmuld %f58,%f20,%f52 ! (Y0_2) s = u * yd;
+ fsubd %f32,%f10,%f10 ! (Y0_2) s_l = (x - dtmp0);
+
+ fitod %f16,%f54 ! (Y0_2) (double)itmp0;
+ add %l4,8,%o0 ! (Y0_2) i += 8;
+
+ lda [px]%asi,%l0 ! (Y0_3) hx = ((unsigned*)px)[0];
+ fand %f52,MHI32,%f4 ! (Y0_2) s_h = vis_fand(s, MHI32);
+
+ faddd %f62,%f54,%f54 ! (Y0_2) y += (double)itmp0;
+ lda [px+4]%asi,%i2 ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1];
+ fmuld %f4,%f12,%f32 ! (Y0_2) dtmp0 = s_h * ux;
+
+ and MASK_0x000fffff,%l0,%o5 ! (Y0_3) hx &= 0xfffff;
+ fmuld %f52,%f52,%f12 ! (Y0_2) y = s * s;
+
+ or MASK_0x3ff00000,%o5,%o5 ! (Y0_3) hx |= 0x3ff00000;
+
+ st %o5,[%fp+tmp0_hi] ! (Y0_3) *(int*)&x = hx;
+ fsubd %f58,%f32,%f32 ! (Y0_2) s_l = u - dtmp0;
+
+ add %o5,2048,%o5 ! (Y0_3) hx += 0x800;
+
+ st %i2,[%fp+tmp0_lo] ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1];
+ and %o5,-4096,%l4 ! (Y0_3) hx &= 0xfffff000;
+ fmuld KA5,%f12,%f36 ! (Y0_2) dtmp8 = KA5 * y;
+
+ st %l4,[%fp+tmp1_hi] ! (Y0_3) *(int*)&ax = hx;
+ fmuld KA1_HI,%f4,%f48 ! (Y0_2) yd = KA1_HI * s_h;
+
+ fmuld %f4,%f10,%f10 ! (Y0_2) dtmp1 = s_h * s_l;
+ ldd [EXPTBL-ind_LO],KA1_LO ! (y0_2) load KA1_LO;
+ and %l0,MASK_0x7fffffff,%l3 ! (Y0_3) hx &= 0x7fffffff;
+ faddd %f36,KA3,%f62 ! (Y0_2) dtmp8 = dtmp8 + KA3;
+
+ st %g0,[%fp+%o7] ! (Y0_3) yisint = 0;
+ faddd %f54,%f48,%f36 ! (Y0_2) m_h = y + yd;
+
+ fdivd DONE,%f26,%f22 ! (Y1_2) yd = DONE / ux;
+ fsubd %f32,%f10,%f10 ! (Y0_2) s_l -= dtmp1;
+
+ cmp %l0,MASK_0x000fffff ! (Y0_2) if (hx <= 0xfffff)
+
+ sra %l2,20,%l2 ! (Y1_1) exp = (exp >> 20);
+ ldd [%fp+tmp0_hi],%f32 ! (Y0_2) *(int*)&x = hx;
+ ble,pn %icc,.update2 ! (Y0_2) if (hx <= 0xfffff)
+ fsubd %f36,%f54,%f30 ! (Y0_1) dtmp2 = m_h - y;
+.cont2:
+ cmp %l2,2047 ! (Y1_1) if (exp >= 0x7ff)
+ sub %o7,ind_buf,%o7 ! stack buffer pointer update
+ ldd [%fp+tmp1_hi],%f54 ! (Y0_2) *(int*)&ax = hx;
+
+ sra %i4,20,%l0 ! (Y1_1) itmp0 = (hx >> 20);
+ sub %l2,2046,%o5 ! (Y1_1) exp = exp - 2046;
+ fmuld KA1,%f20,%f20 ! (Y0_1) dtmp0 = KA1 * yd;
+
+ add %o5,%l0,%o5 ! (Y1_1) exp += itmp0;
+ fmuld %f62,%f12,%f62 ! (Y0_1) dtmp8 = dtmp8 * y;
+
+ sll %o5,8,%l0 ! (Y1_1) itmp0 = exp << 8;
+ add %o7,4,%o7 ! stack buffer pointer update
+ st %l0,[%fp+tmp3] ! (Y1_1) (double)itmp0;
+ faddd %f32,%f54,%f12 ! (Y0_2) ux = x + ax;
+
+ bge,pn %icc,.update3 ! (Y1_1) if (exp >= 0x7ff)
+ fsubd %f30,%f48,%f48 ! (Y0_1) dtmp2 -= yd;
+.cont3:
+ and %o7,15,%o7 ! stack buffer pointer update
+ fmuld %f20,%f10,%f10 ! (Y0_1) s_l = dtmp0 * s_l;
+
+ add %o7,ind_buf,%o7 ! stack buffer pointer update
+ fmuld KA1_LO,%f4,%f4 ! (Y0_1) dtmp1 = KA1_LO * s_h;
+ fand %f26,MHI32,%f26 ! (Y1_1) ux = vis_fand(ux, MHI32);
+
+ fmuld %f62,%f52,%f62 ! (Y0_1) s = dtmp8 * s;
+ ldd [LOGTBL+%o0],%f52 ! (Y0_1) dtmp0 = *(double *)((char*)__mt_constlog2 + i + 8);
+ fsubd %f48,%f10,%f20 ! (Y0_1) dtmp2 -= s_l;
+
+ add px,stridex,px ! px += stridex;
+ fsubd %f26,%f14,%f10 ! (Y1_1) dtmp0 = (ux - ax);
+
+ faddd %f52,%f4,%f52 ! (Y0_1) dtmp0 += dtmp1;
+
+ ldd [EXPTBL-ind_HI],KA1_HI ! (Y1_1) load KA1_HI;
+ fsubd %f62,%f20,%f4 ! (Y0_1) y = s - dtmp2;
+
+ ld [%fp+tmp3],%f16 ! (Y1_1) (double)itmp0;
+ fsubd %f8,%f14,%f58 ! (Y1_1) u = x - ax;
+
+ sra %i4,8,%o0 ! (Y1_1) i = (hx >> 8);
+
+ faddd %f4,%f52,%f48 ! (Y0_1) y += dtmp0;
+ and %o0,4080,%o0 ! (Y1_1) i = i & 0xff0;
+
+ ldd [LOGTBL+%o0],%f62 ! (Y1_1) y = *(double *)((char*)__mt_constlog2 + i);
+ fmuld %f58,%f22,%f52 ! (Y1_1) s = u * yd;
+ fsubd %f8,%f10,%f10 ! (Y1_1) s_l = (x - dtmp0);
+
+ lda [py]%asi,%f30 ! (Y0_1) yd = *py;
+ fitod %f16,%f14 ! (Y1_1) (double)itmp0;
+
+ lda [py+4]%asi,%f31 ! (Y0_1) yd = *py;
+ faddd %f48,%f36,%f8 ! (Y0_1) dtmp0 = y + m_h;
+
+ add %o0,8,%o0 ! (Y1_1) i += 8;
+ lda [px]%asi,%l0 ! (Y1_2) hx = ((unsigned*)px)[0];
+ fand %f52,MHI32,%f4 ! (Y1_1) s_h = vis_fand(s, MHI32);
+
+ faddd %f62,%f14,%f14 ! (Y1_1) y += (double)itmp0;
+
+ lda [px+4]%asi,%i2 ! (Y1_2) *((int*)&x + 1) = ((unsigned*)px)[1];
+ fand %f8,MHI32,%f20 ! (Y0_1) s_h = vis_fand(dtmp0, MHI32);
+ fmuld %f4,%f26,%f8 ! (Y1_1) dtmp0 = s_h * ux;
+
+ fand %f30,MHI32,%f6 ! (Y0_1) s = vis_fand(yd, MHI32);
+ and MASK_0x000fffff,%l0,%i4 ! (Y1_2) hx &= 0xfffff;
+ fmuld %f52,%f52,%f26 ! (Y1_1) y = s * s;
+
+ st %g0,[%fp+%o7] ! (Y1_2) yisint = 0;
+ or MASK_0x3ff00000,%i4,%i4 ! (Y1_2) hx |= 0x3ff00000;
+ fsubd %f20,%f36,%f62 ! (Y0_1) dtmp0 = (s_h - m_h);
+
+ st %i4,[%fp+tmp0_hi] ! (Y1_2) *(int*)&x = hx;
+ fsubd %f58,%f8,%f8 ! (Y1_1) s_l = u - dtmp0;
+
+ add %i4,2048,%i4 ! (Y1_2) hx += 0x800;
+ fmuld %f20,%f6,%f34 ! (Y0_1) s = s_h * s;
+ fsubd %f30,%f6,%f6 ! (Y0_1) dtmp0 = (yd - s);
+
+ st %i2,[%fp+tmp0_lo] ! (Y1_2) *((int*)&x + 1) = ((unsigned*)px)[1];
+ and %i4,-4096,%i4 ! (Y1_2) hx &= 0xfffff000;
+ fmuld KA5,%f26,%f36 ! (Y1_1) dtmp8 = KA5 * y;
+
+ st %i4,[%fp+tmp1_hi] ! (Y1_2) *(int*)&ax = hx;
+ fsubd %f48,%f62,%f62 ! (Y0_1) y = y - dtmp0;
+ fmuld KA1_HI,%f4,%f48 ! (Y1_1) yd = KA1_HI * s_h;
+
+ fmuld %f4,%f10,%f10 ! (Y1_1) dtmp1 = s_h * s_l;
+
+ ldd [EXPTBL-ind_LO],KA1_LO ! (Y1_1) load KA1_LO;
+ and %l0,MASK_0x7fffffff,%l2 ! (Y1_2) hx &= 0x7fffffff;
+ fmuld %f6,%f20,%f6 ! (Y0_1) dtmp0 *= s_h;
+ fcmped %fcc0,%f34,HTHRESH ! (Y0_1) s > HTHRESH
+
+ cmp %l0,MASK_0x000fffff ! (Y1_2) if (hx <= 0xfffff)
+ fmuld %f30,%f62,%f30 ! (Y0_1) dtmp1 = yd * y;
+ faddd %f36,KA3,%f62 ! (Y1_1) dtmp8 = dtmp8 + KA3;
+
+ ble,pn %icc,.update4 ! (Y1_2) if (hx <= 0xfffff)
+ faddd %f14,%f48,%f36 ! (Y1_1) m_h = y + yd;
+.cont4:
+ sub %o7,ind_buf,%o7 ! stack buffer pointer update
+ fmovdg %fcc0,HTHRESH,%f34 ! (Y0_1) s = HTHRESH
+
+ add %o7,4,%o7 ! stack buffer pointer update
+ fdivd DONE,%f12,%f20 ! (Y0_2) yd = DONE / ux;
+ fsubd %f8,%f10,%f10 ! (Y1_1) s_l -= dtmp1;
+
+ and %o7,15,%o7 ! stack buffer pointer update
+ faddd %f6,%f30,%f6 ! (Y0_1) yd = dtmp0 + dtmp1;
+
+ sra %l3,20,%l3 ! (Y0_2) exp = (exp >> 20);
+ add %o7,ind_buf,%o7 ! stack buffer pointer update
+ ldd [%fp+tmp0_hi],%f8 ! (Y1_2) *(int*)&x = hx;
+ fsubd %f36,%f14,%f30 ! (Y1_1) dtmp2 = m_h - y;
+
+ cmp %l3,2047 ! (Y0_2) if (exp >= 0x7ff)
+ ldd [%fp+tmp1_hi],%f14 ! (Y1_2) *(int*)&ax = hx;
+ fmuld KA1,%f22,%f22 ! (Y1_1) dtmp0 = KA1 * yd;
+
+ sra %l4,20,%l0 ! (Y0_2) itmp0 = (hx >> 20);
+ sub %l3,2046,%o5 ! (Y0_2) exp = exp - 2046;
+ fcmped %fcc1,%f34,LTHRESH ! (Y0_1) s < LTHRESH
+
+ add %o5,%l0,%o5 ! (Y0_2) exp += itmp0;
+ add py,stridey,py ! py += stridey;
+ fmuld %f62,%f26,%f62 ! (Y1_1) dtmp8 = dtmp8 * y;
+ fmovdg %fcc0,DZERO,%f6 ! (Y0_1) yd = DZERO
+
+ sll %o5,8,%l0 ! (Y0_2) itmp0 = exp << 8;
+ st %l0,[%fp+tmp3] ! (Y0_2) (double)itmp0;
+ faddd %f8,%f14,%f26 ! (Y1_2) ux = x + ax;
+
+ bge,pn %icc,.update5 ! (Y0_2) if (exp >= 0x7ff)
+ fsubd %f30,%f48,%f48 ! (Y1_1) dtmp2 -= yd;
+.cont5:
+ lda [py]%asi,%l1 ! (Y1_1) hy = *py;
+ fmuld %f22,%f10,%f10 ! (Y1_1) s_l = dtmp0 * s_l;
+ fmovdl %fcc1,LTHRESH,%f34 ! (Y0_1) s = LTHRESH
+
+ fmovdl %fcc1,DZERO,%f6 ! (Y0_1) yd = DZERO
+
+ fand %f12,MHI32,%f12 ! (Y0_2) ux = vis_fand(ux, MHI32);
+ fmuld KA1_LO,%f4,%f4 ! (Y1_1) dtmp1 = KA1_LO * s_h;
+
+ fmuld %f62,%f52,%f62 ! (Y1_1) s = dtmp8 * s;
+ ldd [LOGTBL+%o0],%f52 ! (Y1_1) dtmp0 = *(double *)((char*)__mt_constlog2 + i + 8);
+ fsubd %f48,%f10,%f22 ! (Y1_1) dtmp2 -= s_l;
+
+ add px,stridex,px ! px += stridex;
+ faddd %f34,%f6,%f58 ! (Y0_1) dtmp0 = (s + yd);
+
+ and %l1,MASK_0x7fffffff,%l1 ! (Y1_1) hy &= 0x7fffffff;
+ ldd [EXPTBL-ind_HI],KA1_HI ! (Y0_2) load KA1_HI;
+ fsubd %f12,%f54,%f10 ! (Y0_2) dtmp0 = (ux - ax);
+
+ faddd %f52,%f4,%f52 ! (Y1_1) dtmp0 += dtmp1;
+
+ fsubd %f62,%f22,%f4 ! (Y1_1) y = s - dtmp2;
+
+ fdtoi %f58,%f17 ! (Y0_1) (int)dtmp0;
+
+ ld [%fp+tmp3],%f16 ! (Y0_2) (double)itmp0;
+ fsubd %f32,%f54,%f58 ! (Y0_2) u = x - ax;
+ sra %l4,8,%l4 ! (Y0_2) i = (hx >> 8);
+
+ sra %l1,20,%l1 ! (Y1_1) expy = hy >> 20;
+ ldd [EXPTBL-ind_KB5],KB5 ! (Y0_1) load KB5;
+ faddd %f4,%f52,%f48 ! (Y1_1) y += dtmp0;
+
+ and %l4,4080,%l4 ! (Y0_2) i = i & 0xff0;
+ st %f17,[%fp+tmp4] ! (Y0_1) ind = (int)dtmp0;
+ fitod %f17,%f4 ! (Y0_1) u = (double)(int)dtmp0;
+
+ ldd [LOGTBL+%l4],%f62 ! (Y0_2) y = *(double *)((char*)__mt_constlog2 + i);
+ fmuld %f58,%f20,%f52 ! (Y0_2) s = u * yd;
+ fsubd %f32,%f10,%f10 ! (Y0_2) s_l = (x - dtmp0);
+
+ lda [py]%asi,%f30 ! (Y1_1) yd = *py;
+ fitod %f16,%f54 ! (Y0_2) (double)itmp0;
+
+ lda [py+4]%asi,%f31 ! (Y1_1) yd = *py;
+ faddd %f48,%f36,%f32 ! (Y1_1) dtmp0 = y + m_h;
+
+ add %l4,8,%o0 ! (Y0_2) i += 8;
+ fsubd %f34,%f4,%f60 ! (Y0_1) y = s - u;
+
+ cmp %l1,959 ! (Y1_1) if (expy < 0x3fb);
+ lda [px]%asi,%l0 ! (Y0_3) hx = ((unsigned*)px)[0];
+ fand %f52,MHI32,%f4 ! (Y0_2) s_h = vis_fand(s, MHI32);
+
+ bl,pn %icc,.update6 ! (Y1_1) if (expy < 0x3fb);
+ faddd %f62,%f54,%f54 ! (Y0_2) y += (double)itmp0;
+.cont6:
+ cmp %l1,1086 ! (Y1_1) if (expy >= 0x43e);
+ lda [px+4]%asi,%i2 ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1];
+ fand %f32,MHI32,%f22 ! (Y1_1) s_h = vis_fand(dtmp0, MHI32);
+
+ fmuld %f4,%f12,%f32 ! (Y0_2) dtmp0 = s_h * ux;
+ bge,pn %icc,.update7 ! (Y1_1) if (expy >= 0x43e);
+ faddd %f60,%f6,%f60 ! (Y0_1) y = y + yd;
+.cont7:
+ ld [%fp+%o7],%o2 ! (Y0_1) load yisint
+ fand %f30,MHI32,%f6 ! (Y1_1) s = vis_fand(yd, MHI32);
+
+ and MASK_0x000fffff,%l0,%o5 ! (Y0_3) hx &= 0xfffff;
+ fmuld %f52,%f52,%f12 ! (Y0_2) y = s * s;
+
+ or MASK_0x3ff00000,%o5,%o5 ! (Y0_3) hx |= 0x3ff00000;
+ fsubd %f22,%f36,%f62 ! (Y1_1) dtmp0 = (s_h - m_h);
+
+ st %o5,[%fp+tmp0_hi] ! (Y0_3) *(int*)&x = hx;
+ fsubd %f58,%f32,%f32 ! (Y0_2) s_l = u - dtmp0;
+ fmuld KB5,%f60,%f58 ! (Y0_1) dtmp0 = KB5 * y;
+
+ ldd [EXPTBL-ind_KB3],KB3 ! (Y0_1) load KB3;
+ add %o5,2048,%o5 ! (Y0_3) hx += 0x800;
+ fmuld %f22,%f6,%f34 ! (Y1_1) s = s_h * s;
+ fsubd %f30,%f6,%f6 ! (Y1_1) dtmp0 = (yd - s);
+
+ st %i2,[%fp+tmp0_lo] ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1];
+ and %o5,-4096,%l4 ! (Y0_3) hx &= 0xfffff000;
+ fmuld KA5,%f12,%f36 ! (Y0_2) dtmp8 = KA5 * y;
+
+ st %l4,[%fp+tmp1_hi] ! (Y0_3) *(int*)&ax = hx;
+ fsubd %f48,%f62,%f62 ! (Y1_1) y = y - dtmp0;
+ fmuld KA1_HI,%f4,%f48 ! (Y0_2) yd = KA1_HI * s_h;
+
+ subcc counter,1,counter
+ fmuld %f4,%f10,%f10 ! (Y0_2) dtmp1 = s_h * s_l;
+ faddd %f58,KB4,%f58 ! (Y0_1) dtmp1 = dtmp0 + KB4;
+
+ ldd [EXPTBL-ind_LO],KA1_LO ! (y0_2) load KA1_LO;
+ and %l0,MASK_0x7fffffff,%l3 ! (Y0_3) hx &= 0x7fffffff;
+ fmuld %f6,%f22,%f6 ! (Y1_1) dtmp0 *= s_h;
+ fcmped %fcc0,%f34,HTHRESH ! (Y1_1) s > HTHRESH;
+
+ fmuld %f30,%f62,%f30 ! (Y1_1) dtmp1 = yd * y;
+ ba 1f
+ faddd %f36,KA3,%f62 ! (Y0_2) dtmp8 = dtmp8 + KA3;
+
+ .align 16
+1:
+ st %g0,[%fp+%o7] ! (Y0_3) yisint = 0;
+ fmuld %f58,%f60,%f58 ! (Y0_1) dtmp2 = dtmp1 * y;
+ bneg,pn %icc,.tail
+ faddd %f54,%f48,%f36 ! (Y0_2) m_h = y + yd;
+
+ nop
+ fmovdg %fcc0,HTHRESH,%f34 ! (Y1_1) s = HTHRESH;
+
+ fdivd DONE,%f26,%f22 ! (Y1_2) yd = DONE / ux;
+ fsubd %f32,%f10,%f10 ! (Y0_2) s_l -= dtmp1;
+
+.main_loop:
+ cmp %l0,MASK_0x000fffff ! (Y0_2) if (hx <= 0xfffff)
+ add py,stridey,py ! py += stridey;
+ faddd %f6,%f30,%f6 ! (Y1_0) yd = dtmp0 + dtmp1;
+
+ sra %l2,20,%l2 ! (Y1_1) exp = (exp >> 20);
+ ldd [%fp+tmp0_hi],%f32 ! (Y0_2) *(int*)&x = hx;
+ ble,pn %icc,.update8 ! (Y0_2) if (hx <= 0xfffff)
+ fsubd %f36,%f54,%f30 ! (Y0_1) dtmp2 = m_h - y;
+.cont8:
+ cmp %l2,2047 ! (Y1_1) if (exp >= 0x7ff)
+ sub %o7,ind_buf,%o7 ! stack buffer pointer update
+ ldd [%fp+tmp1_hi],%f54 ! (Y0_2) *(int*)&ax = hx;
+ faddd %f58,KB3,%f58 ! (Y0_0) dtmp3 = dtmp2 + KB3;
+
+ sra %i4,20,%l0 ! (Y1_1) itmp0 = (hx >> 20);
+ sub %l2,2046,%o5 ! (Y1_1) exp = exp - 2046;
+ fmuld KA1,%f20,%f20 ! (Y0_1) dtmp0 = KA1 * yd;
+ fcmped %fcc1,%f34,LTHRESH ! (Y1_0) s < LTHRESH;
+
+ ldd [EXPTBL-ind_KB2],KB2 ! (Y0_0) load KB2;
+ add %o5,%l0,%o5 ! (Y1_1) exp += itmp0;
+ fmuld %f62,%f12,%f62 ! (Y0_1) dtmp8 = dtmp8 * y;
+ fmovdg %fcc0,DZERO,%f6 ! (Y1_0) yd = DZERO
+
+ sll %o5,8,%l0 ! (Y1_1) itmp0 = exp << 8;
+ add %o7,4,%o7 ! stack buffer pointer update
+ st %l0,[%fp+tmp3] ! (Y1_1) (double)itmp0;
+ faddd %f32,%f54,%f12 ! (Y0_2) ux = x + ax;
+
+ ld [%fp+tmp4],%i2 ! (Y0_0) ind = (int)dtmp0;
+ fsubd %f30,%f48,%f48 ! (Y0_1) dtmp2 -= yd;
+ bge,pn %icc,.update9 ! (Y1_1) if (exp >= 0x7ff)
+ fmuld %f58,%f60,%f58 ! (Y0_0) dtmp4 = dtmp3 * y;
+.cont9:
+ lda [py]%asi,%l1 ! (Y0_1) hy = *py;
+ and %o7,15,%o7 ! stack buffer pointer update
+ fmuld %f20,%f10,%f10 ! (Y0_1) s_l = dtmp0 * s_l;
+ fmovdl %fcc1,LTHRESH,%f34 ! (Y1_0) s = LTHRESH;
+
+ add %o7,ind_buf,%o7 ! stack buffer pointer update
+ fmovdl %fcc1,DZERO,%f6 ! (Y1_0) yd = DZERO
+
+ fmuld KA1_LO,%f4,%f4 ! (Y0_1) dtmp1 = KA1_LO * s_h;
+ fand %f26,MHI32,%f26 ! (Y1_1) ux = vis_fand(ux, MHI32);
+
+ fmuld %f62,%f52,%f62 ! (Y0_1) s = dtmp8 * s;
+ nop
+ faddd %f58,KB2,%f30 ! (Y0_0) dtmp5 = dtmp4 + KB2;
+
+ nop
+ add pz,stridez,pz ! pz += stridez;
+ ldd [LOGTBL+%o0],%f52 ! (Y0_1) dtmp0 = *(double *)((char*)__mt_constlog2 + i + 8);
+ fsubd %f48,%f10,%f20 ! (Y0_1) dtmp2 -= s_l;
+
+ sra %i2,8,%l0 ! (Y0_0) ind >>= 8;
+ ldd [EXPTBL-ind_KB1],KB1 ! (Y0_0) load KB1;
+ add px,stridex,px ! px += stridex;
+ faddd %f34,%f6,%f58 ! (Y1_0) dtmp0 = (s + yd);
+
+ add %l0,1021,%l2 ! (Y0_0) eflag = (ind + 1021);
+ sub %g0,%l0,%o5 ! (Y0_0) gflag = (1022 - ind);
+ fsubd %f26,%f14,%f10 ! (Y1_1) dtmp0 = (ux - ax);
+
+ sra %l2,31,%l2 ! (Y0_0) eflag = eflag >> 31;
+ add %o5,1022,%o5 ! (Y0_0) gflag = (1022 - ind);
+ fmuld %f30,%f60,%f48 ! (Y0_0) dtmp6 = dtmp5 * y;
+ faddd %f52,%f4,%f52 ! (Y0_1) dtmp0 += dtmp1;
+
+ sra %o5,31,%o5 ! (Y0_0) gflag = gflag >> 31;
+ and %l2,54,%o0 ! (Y0_0) itmp0 = 54 & eflag;
+ ldd [EXPTBL-ind_HI],KA1_HI ! (Y1_1) load KA1_HI;
+ fsubd %f62,%f20,%f4 ! (Y0_1) y = s - dtmp2;
+
+ lda [py]%asi,%f30 ! (Y0_1) yd = *py;
+ sub %l2,%o5,%l2 ! (Y0_0) ind = eflag - gflag;
+ add %l0,%o0,%l0 ! (Y0_0) ind = ind + itmp0;
+ fdtoi %f58,%f20 ! (Y1_0) u = (double)(int)dtmp0;
+
+ sra %i4,8,%o0 ! (Y1_1) i = (hx >> 8);
+ and %o5,52,%o5 ! (Y0_0) itmp1 = 52 & gflag;
+ ld [%fp+tmp3],%f16 ! (Y1_1) (double)itmp0;
+ fsubd %f8,%f14,%f58 ! (Y1_1) u = x - ax;
+
+ and %o0,4080,%o0 ! (Y1_1) i = i & 0xff0;
+ sub %l0,%o5,%i4 ! (Y0_0) ind = ind - itmp1;
+ st %f20,[%fp+tmp4] ! (Y1_0) ind = (int)dtmp0;
+ faddd %f48,KB1,%f14 ! (Y0_0) dtmp7 = dtmp6 + KB1;
+
+ add %o2,%i4,%i4 ! (Y0_0) ind = yisint + ind;
+ and %i2,255,%o5 ! (Y0_0) i = ind & 0xff;
+ lda [px]%asi,%l0 ! (Y1_2) hx = ((unsigned*)px)[0];
+ faddd %f4,%f52,%f48 ! (Y0_1) y += dtmp0;
+
+ sll %i4,20,%i4 ! (Y0_0) ind <<= 20;
+ ldd [LOGTBL+%o0],%f62 ! (Y1_1) y = *(double *)((char*)__mt_constlog2 + i);
+ and %l1,MASK_0x7fffffff,%l1 ! (Y0_1) hy &= 0x7fffffff;
+ fitod %f20,%f4 ! (Y1_0) u = (double)(int)dtmp0;
+
+ lda [px+4]%asi,%i2 ! (Y1_2) *((int*)&x + 1) = ((unsigned*)px)[1];
+ nop
+ fmuld %f58,%f22,%f52 ! (Y1_1) s = u * yd;
+ fsubd %f8,%f10,%f10 ! (Y1_1) s_l = (x - dtmp0);
+
+ sll %o5,4,%o5 ! (Y0_0) i = i << 4;
+ st %i4,[%fp+tmp2_hi] ! (Y0_0) *(int*)&dtmp0 = ind;
+ fmuld %f14,%f60,%f20 ! (Y0_0) y = dtmp7 * y;
+ fitod %f16,%f14 ! (Y1_1) (double)itmp0;
+
+ sra %l1,20,%l1 ! (Y0_1) expy = hy >> 20;
+ nop
+ ldd [EXPTBL+%o5],%f56 ! (Y0_0) u = *(double*)((char*)__mt_constexp2 + i);
+ faddd %f48,%f36,%f8 ! (Y0_1) dtmp0 = y + m_h;
+
+ add %o5,8,%o5 ! (Y0_0) i += 8;
+ add %o0,8,%o0 ! (Y1_1) i += 8;
+ lda [py+4]%asi,%f31 ! (Y0_1) yd = *py;
+ fsubd %f34,%f4,%f60 ! (Y1_0) y = s - u;
+
+ cmp %l1,959 ! (Y0_1) if (expy < 0x3fb);
+ and MASK_0x000fffff,%l0,%i4 ! (Y1_2) hx &= 0xfffff;
+ ldd [EXPTBL-ind_KB5],KB5 ! (Y1_0) load KB5;
+ fand %f52,MHI32,%f4 ! (Y1_1) s_h = vis_fand(s, MHI32);
+
+ ldd [EXPTBL+%o5],%f16 ! (Y0_0) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8);
+ fmuld %f56,%f20,%f34 ! (Y0_0) dtmp1 = u * y;
+ bl,pn %icc,.update10 ! (Y0_1) if (expy < 0x3fb);
+ faddd %f62,%f14,%f14 ! (Y1_1) y += (double)itmp0;
+.cont10:
+ or MASK_0x3ff00000,%i4,%i4 ! (Y1_2) hx |= 0x3ff00000;
+ cmp %l1,1086 ! (Y0_1) if (expy >= 0x43e);
+ fand %f8,MHI32,%f20 ! (Y0_1) s_h = vis_fand(dtmp0, MHI32);
+
+ fmuld %f4,%f26,%f8 ! (Y1_1) dtmp0 = s_h * ux;
+ st %i4,[%fp+tmp0_hi] ! (Y1_2) *(int*)&x = hx;
+ bge,pn %icc,.update11 ! (Y0_1) if (expy >= 0x43e);
+ faddd %f60,%f6,%f60 ! (Y1_0) y = y + yd;
+.cont11:
+ add %i4,2048,%i4 ! (Y1_2) hx += 0x800;
+ ld [%fp+%o7],%o2 ! (Y1_0) load yisint
+ fand %f30,MHI32,%f6 ! (Y0_1) s = vis_fand(yd, MHI32);
+
+ st %i2,[%fp+tmp0_lo] ! (Y1_2) *((int*)&x + 1) = ((unsigned*)px)[1];
+ and %i4,-4096,%i4 ! (Y1_2) hx &= 0xfffff000;
+ fmuld %f52,%f52,%f26 ! (Y1_1) y = s * s;
+ faddd %f16,%f34,%f16 ! (Y0_0) dtmp2 = dtmp0 + dtmp1;
+
+ st %i4,[%fp+tmp1_hi] ! (Y1_2) *(int*)&ax = hx;
+ fsubd %f20,%f36,%f62 ! (Y0_1) dtmp0 = (s_h - m_h);
+
+ fsubd %f58,%f8,%f8 ! (Y1_1) s_l = u - dtmp0;
+ fmuld KB5,%f60,%f58 ! (Y1_0) dtmp0 = KB5 * y;
+
+ ldd [EXPTBL-ind_KB3],KB3 ! (Y1_0) load KB3;
+ fmuld %f20,%f6,%f34 ! (Y0_1) s = s_h * s;
+ fsubd %f30,%f6,%f6 ! (Y0_1) dtmp0 = (yd - s);
+
+ faddd %f16,%f56,%f56 ! (Y0_0) u = dtmp2 + u;
+ nop
+ fmuld KA5,%f26,%f36 ! (Y1_1) dtmp8 = KA5 * y;
+
+ nop
+ add %l2,513,%l2 ! (Y0_0) ind += 513;
+ fsubd %f48,%f62,%f62 ! (Y0_1) y = y - dtmp0;
+ fmuld KA1_HI,%f4,%f48 ! (Y1_1) yd = KA1_HI * s_h;
+
+ sll %l2,3,%o5 ! (Y0_0) ind *= 8;
+ ldd [%fp+tmp2_hi],%f16 ! (Y0_0) ld dtmp0;
+ fmuld %f4,%f10,%f10 ! (Y1_1) dtmp1 = s_h * s_l;
+ faddd %f58,KB4,%f58 ! (Y1_0) dtmp1 = dtmp0 + KB4;
+
+ ldd [EXPTBL-ind_LO],KA1_LO ! (Y1_1) load KA1_LO;
+ and %l0,MASK_0x7fffffff,%l2 ! (Y1_2) hx &= 0x7fffffff;
+ fmuld %f6,%f20,%f6 ! (Y0_1) dtmp0 *= s_h;
+ fcmped %fcc0,%f34,HTHRESH ! (Y0_1) s > HTHRESH
+
+ ldd [EXPTBL+%o5],%f20 ! (Y0_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind);
+ nop
+ nop
+ fpadd32 %f56,%f16,%f56 ! (Y0_0) u = vis_fpadd32(u, dtmp0);
+
+ nop
+ cmp %l0,MASK_0x000fffff ! (Y1_2) if (hx <= 0xfffff)
+ fmuld %f30,%f62,%f30 ! (Y0_1) dtmp1 = yd * y;
+ faddd %f36,KA3,%f62 ! (Y1_1) dtmp8 = dtmp8 + KA3;
+
+ fmuld %f58,%f60,%f58 ! (Y1_0) dtmp2 = dtmp1 * y;
+ st %g0,[%fp+%o7] ! (Y1_2) yisint = 0;
+ ble,pn %icc,.update12 ! (Y1_2) if (hx <= 0xfffff)
+ faddd %f14,%f48,%f36 ! (Y1_1) m_h = y + yd;
+.cont12:
+ sra %l3,20,%l3 ! (Y0_2) exp = (exp >> 20);
+ sub %o7,ind_buf,%o7 ! stack buffer pointer update
+ fmuld %f56,%f20,%f16 ! (Y0_0) dtmp1 = u * dtmp1;
+ fmovdg %fcc0,HTHRESH,%f34 ! (Y0_1) s = HTHRESH
+
+ cmp %l3,2047 ! (Y0_2) if (exp >= 0x7ff)
+ st %f16,[pz] ! (Y0_0) write into memory
+ fdivd DONE,%f12,%f20 ! (Y0_2) yd = DONE / ux;
+ fsubd %f8,%f10,%f10 ! (Y1_1) s_l -= dtmp1;
+
+ sra %l4,20,%l0 ! (Y0_2) itmp0 = (hx >> 20);
+ sub %l3,2046,%o5 ! (Y0_2) exp = exp - 2046;
+ st %f17,[pz+4] ! (Y0_0) write into memory
+ faddd %f6,%f30,%f6 ! (Y0_1) yd = dtmp0 + dtmp1;
+
+ add %o5,%l0,%o5 ! (Y0_2) exp += itmp0;
+ add py,stridey,py ! py += stridey;
+ ldd [%fp+tmp0_hi],%f8 ! (Y1_2) *(int*)&x = hx;
+ fsubd %f36,%f14,%f30 ! (Y1_1) dtmp2 = m_h - y;
+
+ sll %o5,8,%l0 ! (Y0_2) itmp0 = exp << 8;
+ ldd [%fp+tmp1_hi],%f14 ! (Y1_2) *(int*)&ax = hx;
+ fmuld KA1,%f22,%f22 ! (Y1_1) dtmp0 = KA1 * yd;
+ faddd %f58,KB3,%f58 ! (Y1_0) dtmp3 = dtmp2 + KB3;
+
+ add %o7,4,%o7 ! stack buffer pointer update
+ st %l0,[%fp+tmp3] ! (Y0_2) (double)itmp0;
+ fcmped %fcc1,%f34,LTHRESH ! (Y0_1) s < LTHRESH
+
+ and %o7,15,%o7 ! stack buffer pointer update
+ ld [%fp+tmp4],%l0 ! (Y1_0) ind = (int)dtmp0;
+ fmuld %f62,%f26,%f62 ! (Y1_1) dtmp8 = dtmp8 * y;
+ fmovdg %fcc0,DZERO,%f6 ! (Y0_1) yd = DZERO
+
+ nop
+ add %o7,ind_buf,%o7 ! stack buffer pointer update
+ ldd [EXPTBL-ind_KB2],KB2 ! (Y1_0) load KB2;
+ faddd %f8,%f14,%f26 ! (Y1_2) ux = x + ax;
+
+ fmuld %f58,%f60,%f58 ! (Y1_0) dtmp4 = dtmp3 * y;
+ nop
+ bge,pn %icc,.update13 ! (Y0_2) if (exp >= 0x7ff)
+ fsubd %f30,%f48,%f48 ! (Y1_1) dtmp2 -= yd;
+.cont13:
+ lda [py]%asi,%l1 ! (Y1_1) hy = *py;
+ nop
+ fmuld %f22,%f10,%f10 ! (Y1_1) s_l = dtmp0 * s_l;
+ fmovdl %fcc1,LTHRESH,%f34 ! (Y0_1) s = LTHRESH
+
+ nop
+ nop
+ fmovdl %fcc1,DZERO,%f6 ! (Y0_1) yd = DZERO
+
+ fand %f12,MHI32,%f12 ! (Y0_2) ux = vis_fand(ux, MHI32);
+ nop
+ nop
+ fmuld KA1_LO,%f4,%f4 ! (Y1_1) dtmp1 = KA1_LO * s_h;
+
+ nop
+ add px,stridex,px ! px += stridex;
+ faddd %f58,KB2,%f30 ! (Y1_0) dtmp5 = dtmp4 + KB2;
+ fmuld %f62,%f52,%f62 ! (Y1_1) s = dtmp8 * s;
+
+ sra %l0,8,%i2 ! (Y1_0) ind >>= 8;
+ add pz,stridez,pz ! pz += stridez;
+ ldd [LOGTBL+%o0],%f52 ! (Y1_1) dtmp0 = *(double *)((char*)__mt_constlog2 + i + 8);
+ fsubd %f48,%f10,%f22 ! (Y1_1) dtmp2 -= s_l;
+
+ add %i2,1021,%l3 ! (Y1_0) eflag = (ind + 1021);
+ sub %g0,%i2,%o5 ! (Y1_0) gflag = (1022 - ind);
+ ldd [EXPTBL-ind_KB1],KB1 ! (Y1_0) load KB1;
+ faddd %f34,%f6,%f58 ! (Y0_1) dtmp0 = (s + yd);
+
+ sra %l3,31,%l3 ! (Y1_0) eflag = eflag >> 31;
+ add %o5,1022,%o5 ! (Y1_0) gflag = (1022 - ind);
+ ldd [EXPTBL-ind_HI],KA1_HI ! (Y0_2) load KA1_HI;
+ fsubd %f12,%f54,%f10 ! (Y0_2) dtmp0 = (ux - ax);
+
+ sra %o5,31,%o5 ! (Y1_0) gflag = gflag >> 31;
+ and %l3,54,%o0 ! (Y1_0) itmp0 = 54 & eflag;
+ fmuld %f30,%f60,%f48 ! (Y1_0) dtmp6 = dtmp5 * y;
+ faddd %f52,%f4,%f52 ! (Y1_1) dtmp0 += dtmp1;
+
+ sra %l4,8,%l4 ! (Y0_2) i = (hx >> 8);
+ add %i2,%o0,%i2 ! (Y1_0) ind = ind + itmp0;
+ fsubd %f62,%f22,%f4 ! (Y1_1) y = s - dtmp2;
+
+ lda [py]%asi,%f30 ! (Y1_1) yd = *py;
+ and %l4,4080,%l4 ! (Y0_2) i = i & 0xff0;
+ and %o5,52,%o0 ! (Y1_0) itmp1 = 52 & gflag;
+ fdtoi %f58,%f22 ! (Y0_1) (int)dtmp0;
+
+ sub %l3,%o5,%l3 ! (Y1_0) ind = eflag - gflag;
+ sub %i2,%o0,%i2 ! (Y1_0) ind = ind - itmp1;
+ ld [%fp+tmp3],%f16 ! (Y0_2) (double)itmp0;
+ fsubd %f32,%f54,%f58 ! (Y0_2) u = x - ax;
+
+ add %o2,%i2,%i2 ! (Y1_0) ind = yisint + ind;
+ and %l0,255,%o5 ! (Y1_0) i = ind & 0xff;
+ st %f22,[%fp+tmp4] ! (Y0_1) ind = (int)dtmp0;
+ faddd %f48,KB1,%f54 ! (Y1_0) dtmp7 = dtmp6 + KB1;
+
+ sll %i2,20,%o0 ! (Y1_0) ind <<= 20;
+ nop
+ lda [px]%asi,%l0 ! (Y0_3) hx = ((unsigned*)px)[0];
+ faddd %f4,%f52,%f48 ! (Y1_1) y += dtmp0;
+
+ and %l1,MASK_0x7fffffff,%l1 ! (Y1_1) hy &= 0x7fffffff;
+ nop
+ st %o0,[%fp+tmp2_hi] ! (Y1_0) *(int*)&dtmp0 = ind;
+ fitod %f22,%f4 ! (Y0_1) u = (double)(int)dtmp0;
+
+ lda [px+4]%asi,%i2 ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1];
+ nop
+ fmuld %f58,%f20,%f52 ! (Y0_2) s = u * yd;
+ fsubd %f32,%f10,%f10 ! (Y0_2) s_l = (x - dtmp0);
+
+ sll %o5,4,%o5 ! (Y1_0) i = i << 4;
+ ldd [LOGTBL+%l4],%f62 ! (Y0_2) y = *(double *)((char*)__mt_constlog2 + i);
+ fmuld %f54,%f60,%f22 ! (Y1_0) y = dtmp7 * y;
+ fitod %f16,%f54 ! (Y0_2) (double)itmp0;
+
+ sra %l1,20,%l1 ! (Y1_1) expy = hy >> 20;
+ nop
+ ldd [EXPTBL+%o5],%f56 ! (Y1_0) u = *(double*)((char*)__mt_constexp2 + i);
+ faddd %f48,%f36,%f32 ! (Y1_1) dtmp0 = y + m_h;
+
+ add %o5,8,%o5 ! (Y1_0) i += 8;
+ add %l4,8,%o0 ! (Y0_2) i += 8;
+ lda [py+4]%asi,%f31 ! (Y1_1) yd = *py;
+ fsubd %f34,%f4,%f60 ! (Y0_1) y = s - u;
+
+ cmp %l1,959 ! (Y1_1) if (expy < 0x3fb);
+ and MASK_0x000fffff,%l0,%l4 ! (Y0_3) hx &= 0xfffff;
+ fand %f52,MHI32,%f4 ! (Y0_2) s_h = vis_fand(s, MHI32);
+
+ ldd [EXPTBL+%o5],%f16 ! (Y1_0) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8);
+ fmuld %f56,%f22,%f34 ! (Y1_0) dtmp1 = u * y;
+ bl,pn %icc,.update14 ! (Y1_1) if (expy < 0x3fb);
+ faddd %f62,%f54,%f54 ! (Y0_2) y += (double)itmp0;
+.cont14:
+ ldd [EXPTBL-ind_KB5],KB5 ! (Y0_1) load KB5;
+ or MASK_0x3ff00000,%l4,%o5 ! (Y0_3) hx |= 0x3ff00000;
+ cmp %l1,1086 ! (Y1_1) if (expy >= 0x43e);
+ fand %f32,MHI32,%f22 ! (Y1_1) s_h = vis_fand(dtmp0, MHI32);
+
+ fmuld %f4,%f12,%f32 ! (Y0_2) dtmp0 = s_h * ux;
+ st %o5,[%fp+tmp0_hi] ! (Y0_3) *(int*)&x = hx;
+ bge,pn %icc,.update15 ! (Y1_1) if (expy >= 0x43e);
+ faddd %f60,%f6,%f60 ! (Y0_1) y = y + yd;
+.cont15:
+ add %o5,2048,%o5 ! (Y0_3) hx += 0x800;
+ nop
+ ld [%fp+%o7],%o2 ! (Y0_1) load yisint
+ fand %f30,MHI32,%f6 ! (Y1_1) s = vis_fand(yd, MHI32);
+
+ and %o5,-4096,%l4 ! (Y0_3) hx &= 0xfffff000;
+ st %i2,[%fp+tmp0_lo] ! (Y0_3) *((int*)&x + 1) = ((unsigned*)px)[1];
+ fmuld %f52,%f52,%f12 ! (Y0_2) y = s * s;
+ faddd %f16,%f34,%f16 ! (Y1_0) dtmp2 = dtmp0 + dtmp1;
+
+ nop
+ nop
+ st %l4,[%fp+tmp1_hi] ! (Y0_3) *(int*)&ax = hx;
+ fsubd %f22,%f36,%f62 ! (Y1_1) dtmp0 = (s_h - m_h);
+
+ fsubd %f58,%f32,%f32 ! (Y0_2) s_l = u - dtmp0;
+ nop
+ nop
+ fmuld KB5,%f60,%f58 ! (Y0_1) dtmp0 = KB5 * y;
+
+ ldd [EXPTBL-ind_KB3],KB3 ! (Y0_1) load KB3;
+ nop
+ fmuld %f22,%f6,%f34 ! (Y1_1) s = s_h * s;
+ fsubd %f30,%f6,%f6 ! (Y1_1) dtmp0 = (yd - s);
+
+ fmuld KA5,%f12,%f36 ! (Y0_2) dtmp8 = KA5 * y;
+ nop
+ faddd %f16,%f56,%f56 ! (Y1_0) u = dtmp2 + u;
+
+ add %l3,513,%l3 ! (Y1_0) ind += 1;
+ fsubd %f48,%f62,%f62 ! (Y1_1) y = y - dtmp0;
+ fmuld KA1_HI,%f4,%f48 ! (Y0_2) yd = KA1_HI * s_h;
+
+ sll %l3,3,%o5 ! (Y1_0) ind *= 8;
+ ldd [%fp+tmp2_hi],%f16 ! (Y1_0) *(int*)&dtmp0 = ind;
+ fmuld %f4,%f10,%f10 ! (Y0_2) dtmp1 = s_h * s_l;
+ faddd %f58,KB4,%f58 ! (Y0_1) dtmp1 = dtmp0 + KB4;
+
+ ldd [EXPTBL-ind_LO],KA1_LO ! (y0_2) load KA1_LO;
+ and %l0,MASK_0x7fffffff,%l3 ! (Y0_3) hx &= 0x7fffffff;
+ fmuld %f6,%f22,%f6 ! (Y1_1) dtmp0 *= s_h;
+ fcmped %fcc0,%f34,HTHRESH ! (Y1_1) s > HTHRESH;
+
+ nop
+ subcc counter,2,counter ! update cycle counter
+ ldd [EXPTBL+%o5],%f22 ! (Y1_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind);
+ fpadd32 %f56,%f16,%f56 ! (Y1_0) u = vis_fpadd32(u, dtmp0);
+
+ fmuld %f30,%f62,%f30 ! (Y1_1) dtmp1 = yd * y;
+ nop
+ nop
+ faddd %f36,KA3,%f62 ! (Y0_2) dtmp8 = dtmp8 + KA3;
+
+ nop
+ st %g0,[%fp+%o7] ! (Y0_3) yisint = 0;
+ fmuld %f58,%f60,%f58 ! (Y0_1) dtmp2 = dtmp1 * y;
+ faddd %f54,%f48,%f36 ! (Y0_2) m_h = y + yd;
+
+ fmuld %f56,%f22,%f16 ! (Y1_0) dtmp1 = u * dtmp1;
+ nop
+ st %f16,[pz] ! (Y1_0) write into memory
+ fmovdg %fcc0,HTHRESH,%f34 ! (Y1_1) s = HTHRESH;
+
+ fdivd DONE,%f26,%f22 ! (Y1_2) yd = DONE / ux;
+ st %f17,[pz+4] ! (Y1_0) write into memory
+ bpos,pt %icc,.main_loop
+ fsubd %f32,%f10,%f10 ! (Y0_2) s_l -= dtmp1;
+
+.tail:
+ addcc counter,1,counter
+ bneg,pn %icc,.end_loop
+
+ faddd %f58,KB3,%f58 ! (Y0_0) dtmp3 = dtmp2 + KB3;
+ ldd [EXPTBL-ind_KB2],KB2 ! (Y0_0) load KB2;
+
+ ld [%fp+tmp4],%i2 ! (Y0_0) ind = (int)dtmp0;
+ fmuld %f58,%f60,%f58 ! (Y0_0) dtmp4 = dtmp3 * y;
+ faddd %f58,KB2,%f30 ! (Y0_0) dtmp5 = dtmp4 + KB2;
+
+ add pz,stridez,pz ! pz += stridez;
+ ldd [EXPTBL-ind_KB1],KB1 ! (Y0_0) load KB1;
+ sra %i2,8,%l0 ! (Y0_0) ind >>= 8;
+
+ add %l0,1021,%l2 ! (Y0_0) eflag = (ind + 1021);
+ sub %g0,%l0,%o5 ! (Y0_0) gflag = (1022 - ind);
+ fmuld %f30,%f60,%f48 ! (Y0_0) dtmp6 = dtmp5 * y;
+
+ sra %l2,31,%l2 ! (Y0_0) eflag = eflag >> 31;
+ add %o5,1022,%o5 ! (Y0_0) gflag = (1022 - ind);
+
+ sra %o5,31,%o5 ! (Y0_0) gflag = gflag >> 31;
+ and %l2,54,%o0 ! (Y0_0) itmp0 = 54 & eflag;
+
+ sub %l2,%o5,%l2 ! (Y0_0) ind = eflag - gflag;
+ add %l0,%o0,%l0 ! (Y0_0) ind = ind + itmp0;
+
+ and %o5,52,%o5 ! (Y0_0) itmp1 = 52 & gflag;
+ faddd %f48,KB1,%f14 ! (Y0_0) dtmp7 = dtmp6 + KB1;
+
+ sub %l0,%o5,%l0 ! (Y0_0) ind = ind - itmp1;
+ and %i2,255,%i4 ! (Y0_0) i = ind & 0xff;
+
+ sll %i4,4,%o5 ! (Y0_0) i = i << 4;
+
+ ldd [EXPTBL+%o5],%f56 ! (Y0_0) u = *(double*)((char*)__mt_constexp2 + i);
+ add %o2,%l0,%l0 ! (Y0_0) ind = yisint + ind;
+ fmuld %f14,%f60,%f20 ! (Y0_0) y = dtmp7 * y;
+
+ sll %l0,20,%i2 ! (Y0_0) ind <<= 20;
+
+ add %o5,8,%o5 ! (Y0_0) i += 8;
+ st %i2,[%fp+tmp2_hi] ! (Y0_0) *(int*)&dtmp0 = ind;
+
+ ldd [EXPTBL+%o5],%f16 ! (Y0_0) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8);
+ fmuld %f56,%f20,%f34 ! (Y0_0) dtmp1 = u * y;
+
+ faddd %f16,%f34,%f16 ! (Y0_0) dtmp2 = dtmp0 + dtmp1;
+
+ faddd %f16,%f56,%f56 ! (Y0_0) u = dtmp2 + u;
+ add %l2,513,%l2 ! (Y0_0) ind += 513;
+
+ sll %l2,3,%o5 ! (Y0_0) ind *= 8;
+ ldd [%fp+tmp2_hi],%f16 ! (Y0_0) ld dtmp0;
+
+ ldd [EXPTBL+%o5],%f20 ! (Y0_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind);
+ fpadd32 %f56,%f16,%f56 ! (Y0_0) u = vis_fpadd32(u, dtmp0);
+
+ fmuld %f56,%f20,%f16 ! (Y0_0) dtmp1 = u * dtmp1;
+ st %f16,[pz] ! (Y0_0) write into memory
+ st %f17,[pz+4] ! (Y0_0) write into memory
+
+.end_loop:
+ ba .begin
+ nop
+.end:
+ ret
+ restore %g0,0,%o0
+
+ .align 16
+.update0:
+ cmp %l0,%g0 ! if (x >= 0);
+ fzero %f30
+
+ lda [py+4]%asi,%l0 ! ld ly
+ bge,pt %icc,.pos0 ! if (x >= 0);
+ or %g0,%g0,%o5 ! yisint = 0;
+
+ cmp %o0,1076 ! if (expy >= 0x434);
+ bge .neg0 ! if (expy >= 0x434);
+ or %g0,2,%o5 ! yisint = 2;
+
+ cmp %o0,1023 ! if (expy < 0x3ff);
+ bl .neg0 ! if (expy < 0x3ff);
+ or %g0,0,%o5 ! yisint = 0;
+
+ cmp %o0,1043 ! if (expy <= (20 + 0x3ff));
+ ble .small0 ! if (expy <= (20 + 0x3ff));
+ sub %o0,1023,%o0 ! expy - 0x3ff;
+
+ sub %g0,%o0,%o0
+ add %o0,52,%o0 ! sh = (52 - (expy - 0x3ff);
+ srl %l0,%o0,%i4 ! i0 = (ly >> sh);
+
+ sll %i4,%o0,%i4 ! (i0 << sh);
+
+ srl %l0,%o0,%o0 ! i0 = (ly >> sh);
+ cmp %i4,%l0 ! if ((i0 << sh) == ly);
+
+ and %o0,1,%o0 ! i0 &= 1;
+
+ sub %g0,%o0,%o0
+ add %o0,2,%o0 ! i0 = 2 - i0;
+
+ move %icc,%o0,%o5 ! yisint = i0;
+
+ ba .neg0
+ nop
+.small0:
+ sub %g0,%o0,%o0
+ cmp %l0,%g0 ! if (ly != 0);
+
+ add %o0,20,%o0 ! sh = (20 - (expy - 0x3ff);
+ bne .neg0 ! if (ly != 0);
+ or %g0,0,%o5 ! yisint = 0;
+
+ srl %l1,%o0,%i4 ! i0 = (hy >> sh);
+
+ sll %i4,%o0,%i4 ! (i0 << sh);
+
+ srl %l1,%o0,%o0 ! i0 = (hy >> sh);
+ cmp %i4,%l1 ! if ((i0 << sh) == hy);
+
+ and %o0,1,%o0 ! i0 &= 1;
+
+ sub %g0,%o0,%o0
+ add %o0,2,%o0 ! i0 = 2 - i0;
+
+ move %icc,%o0,%o5 ! yisint = i0;
+.neg0:
+ orcc %l3,%i2,%g0 ! if (x != 0);
+
+ sra %o2,31,%i4 ! sy = (*((unsigned*)py)[0]) >> 31;
+ bne,pt %icc,3f ! if (x != 0);
+ nop
+
+ cmp %i4,%g0 ! if (sy == 0);
+ be 1f ! if (sy == 0);
+ and %o5,1,%i4 ! yisint &= 1;
+
+ fdivd DONE,%f30,%f30 ! y0 = DONE / y0;
+1:
+ cmp %i4,%g0 ! if ((yisint & 1) == 0);
+ be 2f ! if ((yisint & 1) == 0);
+ nop
+
+ fnegd %f30,%f30 ! y0 = -y0;
+2:
+ st %f30,[pz]
+ ba .update_point
+ st %f31,[pz+4]
+3:
+ cmp %o5,%g0 ! if (yisint != 0);
+ bne .pos0 ! if (yisint != 0);
+ nop
+
+ fdivd DZERO,DZERO,%f30 ! y0 = DZERO / DZERO;
+ st %f30,[pz]
+ ba .update_point
+ st %f31,[pz+4]
+.pos0:
+ orcc %l3,%i2,%g0 ! if (x != 0);
+
+ sra %o2,31,%i4 ! sy = (*((unsigned*)py)[0]) >> 31;
+ bne,pt %icc,.nzero0 ! if (x != 0);
+ nop
+
+ cmp %i4,%g0 ! if (sy == 0);
+ be 1f ! if (sy == 0);
+ nop
+
+ fdivd DONE,%f30,%f30 ! y0 = DONE / y0;
+1:
+ st %f30,[pz]
+ ba .update_point
+ st %f31,[pz+4]
+.nzero0:
+ sll %o5,11,%o5
+ cmp %l3,MASK_0x000fffff ! if (exp > 0xfffff);
+
+ bg,pt %icc,.cont0 ! if (exp > 0xfffff);
+ st %o5,[%fp+%o7]
+
+ ldd [%fp+tmp_mant],%f54
+
+ or %g0,1074,%o5
+ fand %f32,%f54,%f32 ! y0 = vis_fand(x, MMANT);
+
+ sll %o5,20,%o5
+ fxtod %f32,%f32 ! ax = (double) ((long long *) & y0)[0];
+
+ std %f32,[%fp+tmp0_hi] ! exp = ((unsigned int*) & ax)[0];
+ fand %f32,%f54,%f32 ! x = vis_fand(ax, MMANT);
+
+ ld [%fp+tmp0_hi],%i2 ! exp = ((unsigned int*) & ax)[0];
+ for %f32,DONE,%f32 ! x = vis_for(x, DONE);
+
+ sub %i2,%o5,%l3 ! exp -= (1023 + 51) << 20;
+ and MASK_0x000fffff,%i2,%l4 ! hx = exp & 0xfffff;
+ or MASK_0x3ff00000,%l4,%l4 ! hx |= 0x3ff00000;
+ add %l4,2048,%l4 ! hx += 0x800;
+ and %l4,-4096,%l4 ! hx &= 0xfffff000;
+
+ ba .cont0
+ st %l4,[%fp+tmp1_hi] ! *(int*)&ax = hx;
+
+ .align 16
+.update1:
+ cmp counter,0
+ ble,pt %icc,.cont1
+ add py,stridey,%o5
+
+ stx px,[%fp+tmp_px]
+
+ orcc %l2,%i2,%g0 ! if (x == 0);
+ bne,pt %icc,.nzero1 ! if (x == 0);
+ stx %o5,[%fp+tmp_py]
+.u1:
+ st counter,[%fp+tmp_counter]
+ ba .cont1
+ or %g0,0,counter
+.nzero1:
+ lda [%o5]%asi,%l1 ! ld hy;
+ cmp %l0,%g0 ! if (x >= 0);
+
+ lda [%o5+4]%asi,%l0 ! ld ly
+ bge,pt %icc,.pos1 ! if (x >= 0);
+ or %g0,%g0,%o5 ! yisint = 0;
+
+ and %l1,MASK_0x7fffffff,%i2 ! hy &= 0x7fffffff;
+
+ sra %i2,20,%i2 ! expy = hy >> 20;
+
+ cmp %i2,1076 ! if (expy >= 0x434);
+ bge .neg1 ! if (expy >= 0x434);
+ or %g0,2,%o5 ! yisint = 2;
+
+ cmp %i2,1023 ! if (expy < 0x3ff);
+ bl .neg1 ! if (expy < 0x3ff);
+ or %g0,0,%o5 ! yisint = 0;
+
+ cmp %i2,1043 ! if (expy <= (20 + 0x3ff));
+ ble .small1 ! if (expy <= (20 + 0x3ff));
+ sub %i2,1023,%i2 ! expy - 0x3ff;
+
+ sub %g0,%i2,%i2
+ add %i2,52,%i2 ! sh = (52 - (expy - 0x3ff);
+ srl %l0,%i2,%l1 ! i0 = (ly >> sh);
+
+ sll %l1,%i2,%l1 ! (i0 << sh);
+
+ srl %l0,%i2,%i2 ! i0 = (ly >> sh);
+ cmp %l1,%l0 ! if ((i0 << sh) == ly);
+
+ and %i2,1,%i2 ! i0 &= 1;
+
+ sub %g0,%i2,%i2
+ add %i2,2,%i2 ! i0 = 2 - i0;
+
+ move %icc,%i2,%o5 ! yisint = i0;
+
+ ba .neg1
+ nop
+.small1:
+ sub %g0,%i2,%i2
+ cmp %l0,%g0 ! if (ly != 0);
+
+ add %i2,20,%i2 ! sh = (20 - (expy - 0x3ff);
+ bne .neg1 ! if (ly != 0);
+ or %g0,0,%o5 ! yisint = 0;
+
+ srl %l1,%i2,%l0 ! i0 = (hy >> sh);
+
+ sll %l0,%i2,%l0 ! (i0 << sh);
+
+ srl %l1,%i2,%i2 ! i0 = (hy >> sh);
+ cmp %l0,%l1 ! if ((i0 << sh) == hy);
+
+ and %i2,1,%i2 ! i0 &= 1;
+
+ sub %g0,%i2,%i2
+ add %i2,2,%i2 ! i0 = 2 - i0;
+
+ move %icc,%i2,%o5 ! yisint = i0;
+.neg1:
+ cmp %o5,%g0
+ be .u1
+ nop
+.pos1:
+ sll %o5,11,%o5
+ cmp %l2,MASK_0x000fffff ! if (exp > 0xfffff);
+
+ bg,pt %icc,.cont1 ! if (exp > 0xfffff);
+ st %o5,[%fp+%o7]
+
+ std %f32,[%fp+tmp5];
+ std %f54,[%fp+tmp6];
+ ldd [%fp+tmp0_hi],%f32
+ ldd [%fp+tmp_mant],%f54
+
+ or %g0,1074,%o5
+ fand %f32,%f54,%f32 ! y0 = vis_fand(x, MMANT);
+
+ sll %o5,20,%o5
+ fxtod %f32,%f32 ! ax = (double) ((long long *) & y0)[0];
+
+ std %f32,[%fp+tmp0_hi] ! exp = ((unsigned int*) & ax)[0];
+ fand %f32,%f54,%f32 ! x = vis_fand(ax, MMANT);
+
+ ld [%fp+tmp0_hi],%i2 ! exp = ((unsigned int*) & ax)[0];
+ for %f32,DONE,%f32 ! x = vis_for(x, DONE);
+
+ std %f32,[%fp+tmp0_hi];
+ sub %i2,%o5,%l2 ! exp -= (1023 + 51) << 20;
+ and MASK_0x000fffff,%i2,%i4 ! hx = exp & 0xfffff;
+ ldd [%fp+tmp5],%f32
+ or MASK_0x3ff00000,%i4,%i4 ! hx |= 0x3ff00000;
+ add %i4,2048,%i4 ! hx += 0x800;
+ ldd [%fp+tmp6],%f54
+ and %i4,-4096,%i4 ! hx &= 0xfffff000;
+
+ ba .cont1
+ st %i4,[%fp+tmp1_hi] ! *(int*)&ax = hx;
+
+ .align 16
+.update2:
+ cmp counter,1
+ ble,pt %icc,.cont2
+ add py,stridey,%o5
+
+ add %o5,stridey,%o5
+ stx px,[%fp+tmp_px]
+
+ orcc %l3,%i2,%g0 ! if (x == 0);
+ bne,pt %icc,.nzero2 ! if (x == 0);
+ stx %o5,[%fp+tmp_py]
+.u2:
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont2
+ or %g0,1,counter
+.nzero2:
+ lda [%o5]%asi,%l1 ! ld hy;
+ cmp %l0,%g0 ! if (x >= 0);
+
+ lda [%o5+4]%asi,%l0 ! ld ly
+ bge,pt %icc,.pos2 ! if (x >= 0);
+ or %g0,%g0,%o5 ! yisint = 0;
+
+ and %l1,MASK_0x7fffffff,%i2 ! hy &= 0x7fffffff;
+
+ sra %i2,20,%i2 ! expy = hy >> 20;
+
+ cmp %i2,1076 ! if (expy >= 0x434);
+ bge .neg2 ! if (expy >= 0x434);
+ or %g0,2,%o5 ! yisint = 2;
+
+ cmp %i2,1023 ! if (expy < 0x3ff);
+ bl .neg2 ! if (expy < 0x3ff);
+ or %g0,0,%o5 ! yisint = 0;
+
+ cmp %i2,1043 ! if (expy <= (20 + 0x3ff));
+ ble .small2 ! if (expy <= (20 + 0x3ff));
+ sub %i2,1023,%i2 ! expy - 0x3ff;
+
+ sub %g0,%i2,%i2
+ add %i2,52,%i2 ! sh = (52 - (expy - 0x3ff);
+ srl %l0,%i2,%l1 ! i0 = (ly >> sh);
+
+ sll %l1,%i2,%l1 ! (i0 << sh);
+
+ srl %l0,%i2,%i2 ! i0 = (ly >> sh);
+ cmp %l1,%l0 ! if ((i0 << sh) == ly);
+
+ and %i2,1,%i2 ! i0 &= 1;
+
+ sub %g0,%i2,%i2
+ add %i2,2,%i2 ! i0 = 2 - i0;
+
+ move %icc,%i2,%o5 ! yisint = i0;
+
+ ba .neg2
+ nop
+.small2:
+ sub %g0,%i2,%i2
+ cmp %l0,%g0 ! if (ly != 0);
+
+ add %i2,20,%i2 ! sh = (20 - (expy - 0x3ff);
+ bne .neg2 ! if (ly != 0);
+ or %g0,0,%o5 ! yisint = 0;
+
+ srl %l1,%i2,%l0 ! i0 = (hy >> sh);
+
+ sll %l0,%i2,%l0 ! (i0 << sh);
+
+ srl %l1,%i2,%i2 ! i0 = (hy >> sh);
+ cmp %l0,%l1 ! if ((i0 << sh) == hy);
+
+ and %i2,1,%i2 ! i0 &= 1;
+
+ sub %g0,%i2,%i2
+ add %i2,2,%i2 ! i0 = 2 - i0;
+
+ move %icc,%i2,%o5 ! yisint = i0;
+.neg2:
+ cmp %o5,%g0
+ be .u2
+ nop
+.pos2:
+ sll %o5,11,%o5
+ cmp %l3,MASK_0x000fffff ! if (exp > 0xfffff);
+
+ bg,pt %icc,.cont2 ! if (exp > 0xfffff);
+ st %o5,[%fp+%o7]
+
+ ldd [%fp+tmp_mant],%f54
+
+ or %g0,1074,%o5
+ fand %f32,%f54,%f32 ! y0 = vis_fand(x, MMANT);
+
+ sll %o5,20,%o5
+ fxtod %f32,%f32 ! ax = (double) ((long long *) & y0)[0]
+
+ std %f32,[%fp+tmp0_hi] ! exp = ((unsigned int*) & ax)[0];
+ fand %f32,%f54,%f32 ! x = vis_fand(ax, MMANT);
+
+ ld [%fp+tmp0_hi],%i2 ! exp = ((unsigned int*) & ax)[0];
+ for %f32,DONE,%f32 ! x = vis_for(x, DONE);
+
+ sub %i2,%o5,%l3 ! exp -= (1023 + 51) << 20;
+ and MASK_0x000fffff,%i2,%l4 ! hx = exp & 0xfffff;
+ or MASK_0x3ff00000,%l4,%l4 ! hx |= 0x3ff00000;
+ add %l4,2048,%l4 ! hx += 0x800;
+ and %l4,-4096,%l4 ! hx &= 0xfffff000;
+
+ ba .cont2
+ st %l4,[%fp+tmp1_hi] ! *(int*)&ax = hx;
+
+ .align 16
+.update3:
+ cmp counter,0
+ ble,pt %icc,.cont3
+ sub px,stridex,%o5
+
+ ld [%fp+tmp_counter],%l1
+
+ stx %o5,[%fp+tmp_px]
+ add py,stridey,%o5
+
+ add %l1,counter,counter
+ stx %o5,[%fp+tmp_py]
+
+ st counter,[%fp+tmp_counter]
+ ba .cont3
+ or %g0,0,counter
+
+ .align 16
+.update4:
+ cmp counter,2
+ ble,pt %icc,.cont4
+ add py,stridey,%o5
+
+ add %o5,stridey,%o5
+ add %o5,stridey,%o5
+ stx px,[%fp+tmp_px]
+
+ orcc %l2,%i2,%g0 ! if (x == 0);
+ bne,pt %icc,.nzero4 ! if (x == 0);
+ stx %o5,[%fp+tmp_py]
+.u4:
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont4
+ or %g0,2,counter
+.nzero4:
+ lda [%o5]%asi,%l1 ! ld hy;
+ cmp %l0,%g0 ! if (x >= 0);
+
+ lda [%o5+4]%asi,%l0 ! ld ly
+ bge,pt %icc,.pos4 ! if (x >= 0);
+ or %g0,%g0,%o5 ! yisint = 0;
+
+ and %l1,MASK_0x7fffffff,%i2 ! hy &= 0x7fffffff;
+
+ sra %i2,20,%i2 ! expy = hy >> 20;
+
+ cmp %i2,1076 ! if (expy >= 0x434);
+ bge .neg4 ! if (expy >= 0x434);
+ or %g0,2,%o5 ! yisint = 2;
+
+ cmp %i2,1023 ! if (expy < 0x3ff);
+ bl .neg4 ! if (expy < 0x3ff);
+ or %g0,0,%o5 ! yisint = 2;
+
+ cmp %i2,1043 ! if (expy <= (20 + 0x3ff));
+ ble .small4 ! if (expy <= (20 + 0x3ff));
+ sub %i2,1023,%i2 ! expy - 0x3ff;
+
+ sub %g0,%i2,%i2
+ add %i2,52,%i2 ! sh = (52 - (expy - 0x3ff);
+ srl %l0,%i2,%l1 ! i0 = (ly >> sh);
+
+ sll %l1,%i2,%l1 ! (i0 << sh);
+
+ srl %l0,%i2,%i2 ! i0 = (ly >> sh);
+ cmp %l1,%l0 ! if ((i0 << sh) == ly);
+
+ and %i2,1,%i2 ! i0 &= 1;
+
+ sub %g0,%i2,%i2
+ add %i2,2,%i2 ! i0 = 2 - i0;
+
+ move %icc,%i2,%o5 ! yisint = i0;
+
+ ba .neg4
+ nop
+.small4:
+ sub %g0,%i2,%i2
+ cmp %l0,%g0 ! if (ly != 0);
+
+ add %i2,20,%i2 ! sh = (20 - (expy - 0x3ff);
+ bne .neg4 ! if (ly != 0);
+ or %g0,0,%o5 ! yisint = 0;
+
+ srl %l1,%i2,%l0 ! i0 = (hy >> sh);
+
+ sll %l0,%i2,%l0 ! (i0 << sh);
+
+ srl %l1,%i2,%i2 ! i0 = (hy >> sh);
+ cmp %l0,%l1 ! if ((i0 << sh) == hy);
+
+ and %i2,1,%i2 ! i0 &= 1;
+
+ sub %g0,%i2,%i2
+ add %i2,2,%i2 ! i0 = 2 - i0;
+
+ move %icc,%i2,%o5 ! yisint = i0;
+.neg4:
+ cmp %o5,%g0
+ be .u4
+ nop
+.pos4:
+ sll %o5,11,%o5
+ cmp %l2,MASK_0x000fffff ! if (exp > 0xfffff);
+
+ bg,pt %icc,.cont4 ! if (exp > 0xfffff);
+ st %o5,[%fp+%o7]
+
+ std %f32,[%fp+tmp5];
+ std %f54,[%fp+tmp6];
+ ldd [%fp+tmp0_hi],%f32
+ ldd [%fp+tmp_mant],%f54
+
+ or %g0,1074,%o5
+ fand %f32,%f54,%f32 ! y0 = vis_fand(x, MMANT);
+
+ sll %o5,20,%o5
+ fxtod %f32,%f32 ! ax = (double) ((long long *) & y0)[0]
+
+ std %f32,[%fp+tmp0_hi] ! exp = ((unsigned int*) & ax)[0];
+ fand %f32,%f54,%f32 ! x = vis_fand(ax, MMANT);
+
+ ld [%fp+tmp0_hi],%i2 ! exp = ((unsigned int*) & ax)[0];
+ for %f32,DONE,%f32 ! x = vis_for(x, DONE);
+
+ std %f32,[%fp+tmp0_hi];
+ sub %i2,%o5,%l2 ! exp -= (1023 + 51) << 20;
+ and MASK_0x000fffff,%i2,%i4 ! hx = exp & 0xfffff;
+ ldd [%fp+tmp5],%f32
+ or MASK_0x3ff00000,%i4,%i4 ! hx |= 0x3ff00000;
+ add %i4,2048,%i4 ! hx += 0x800;
+ ldd [%fp+tmp6],%f54
+ and %i4,-4096,%i4 ! hx &= 0xfffff000;
+
+ ba .cont4
+ st %i4,[%fp+tmp1_hi] ! *(int*)&ax = hx;
+
+ .align 16
+.update5:
+ cmp counter,1
+ ble,pt %icc,.cont5
+ sub px,stridex,%o5
+
+ ld [%fp+tmp_counter],%l1
+
+ stx %o5,[%fp+tmp_px]
+ add py,stridey,%o5
+
+ add %l1,counter,counter
+ stx %o5,[%fp+tmp_py]
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont5
+ or %g0,1,counter
+
+ .align 16
+.update6:
+ cmp counter,0
+ ble,pt %icc,.cont6
+ fmovd DONE,%f30
+
+ ld [%fp+tmp_counter],%o2
+ sub px,stridex,%o5
+
+ sub %o5,stridex,%o5
+ stx py,[%fp+tmp_py]
+
+ add %o2,counter,counter
+ sub %o5,stridex,%o5
+ stx %o5,[%fp+tmp_px]
+
+ st counter,[%fp+tmp_counter]
+ ba .cont6
+ or %g0,0,counter
+
+ .align 16
+.update7:
+ cmp counter,0
+ ble,pt %icc,.cont7
+ fmovd DONE,%f30
+ sub px,stridex,%o5
+
+ ld [%fp+tmp_counter],%o2
+
+ sub %o5,stridex,%o5
+ stx py,[%fp+tmp_py]
+
+ add %o2,counter,counter
+ sub %o5,stridex,%o5
+ stx %o5,[%fp+tmp_px]
+
+ st counter,[%fp+tmp_counter]
+ ba .cont7
+ or %g0,0,counter
+
+ .align 16
+.update8:
+ cmp counter,2
+ ble,pt %icc,.cont8
+ add py,stridey,%o5
+
+ add %o5,stridey,%o5
+ stx px,[%fp+tmp_px]
+
+ orcc %l3,%i2,%g0 ! if (x == 0);
+ bne,pt %icc,.nzero8 ! if (x == 0);
+ stx %o5,[%fp+tmp_py]
+.u8:
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont8
+ or %g0,2,counter
+.nzero8:
+ lda [%o5]%asi,%l1 ! ld hy;
+ cmp %l0,%g0 ! if (x >= 0);
+
+ lda [%o5+4]%asi,%l0 ! ld ly
+ bge,pt %icc,.pos8 ! if (x >= 0);
+ or %g0,%g0,%o5 ! yisint = 0;
+
+ and %l1,MASK_0x7fffffff,%i2 ! hy &= 0x7fffffff;
+
+ sra %i2,20,%i2 ! expy = hy >> 20;
+
+ cmp %i2,1076 ! if (expy >= 0x434);
+ bge .pos8 ! if (expy >= 0x434);
+ or %g0,2,%o5 ! yisint = 2;
+
+ cmp %i2,1023 ! if (expy < 0x3ff);
+ bl .neg8 ! if (expy < 0x3ff);
+ or %g0,0,%o5 ! yisint = 0;
+
+ cmp %i2,1043 ! if (expy <= (20 + 0x3ff));
+ ble .small8 ! if (expy <= (20 + 0x3ff));
+ sub %i2,1023,%i2 ! expy - 0x3ff;
+
+ sub %g0,%i2,%i2
+ add %i2,52,%i2 ! sh = (52 - (expy - 0x3ff);
+ srl %l0,%i2,%l1 ! i0 = (ly >> sh);
+
+ sll %l1,%i2,%l1 ! (i0 << sh);
+
+ srl %l0,%i2,%i2 ! i0 = (ly >> sh);
+ cmp %l1,%l0 ! if ((i0 << sh) == ly);
+
+ and %i2,1,%i2 ! i0 &= 1;
+
+ sub %g0,%i2,%i2
+ add %i2,2,%i2 ! i0 = 2 - i0;
+
+ move %icc,%i2,%o5 ! yisint = i0;
+
+ ba .neg8
+ nop
+.small8:
+ sub %g0,%i2,%i2
+ cmp %l0,%g0 ! if (ly != 0);
+
+ add %i2,20,%i2 ! sh = (20 - (expy - 0x3ff);
+ bne .neg8 ! if (ly != 0);
+ or %g0,0,%o5 ! yisint = 0;
+
+ srl %l1,%i2,%l0 ! i0 = (hy >> sh);
+
+ sll %l0,%i2,%l0 ! (i0 << sh);
+
+ srl %l1,%i2,%i2 ! i0 = (hy >> sh);
+ cmp %l0,%l1 ! if ((i0 << sh) == hy);
+
+ and %i2,1,%i2 ! i0 &= 1;
+
+ sub %g0,%i2,%i2
+ add %i2,2,%i2 ! i0 = 2 - i0;
+
+ move %icc,%i2,%o5 ! yisint = i0;
+.neg8:
+ cmp %o5,%g0
+ be .u8
+ nop
+.pos8:
+ sll %o5,11,%o5
+ cmp %l3,MASK_0x000fffff ! if (exp > 0xfffff);
+
+ bg,pt %icc,.cont8 ! if (exp > 0xfffff);
+ st %o5,[%fp+%o7]
+
+ ldd [%fp+tmp_mant],%f54
+
+ or %g0,1074,%o5
+ fand %f32,%f54,%f32 ! y0 = vis_fand(x, MMANT);
+
+ sll %o5,20,%o5
+ fxtod %f32,%f32 ! ax = (double) ((long long *) & y0)[0]
+
+ std %f32,[%fp+tmp0_hi] ! exp = ((unsigned int*) & ax)[0];
+ fand %f32,%f54,%f32 ! x = vis_fand(ax, MMANT);
+
+ ld [%fp+tmp0_hi],%i2 ! exp = ((unsigned int*) & ax)[0];
+ for %f32,DONE,%f32 ! x = vis_for(x, DONE);
+
+ sub %i2,%o5,%l3 ! exp -= (1023 + 51) << 20;
+ and MASK_0x000fffff,%i2,%l4 ! hx &= 0xfffff;
+ or MASK_0x3ff00000,%l4,%l4 ! hx |= 0x3ff00000;
+ add %l4,2048,%l4 ! hx += 0x800;
+ and %l4,-4096,%l4 ! hx &= 0xfffff000;
+
+ ba .cont8
+ st %l4,[%fp+tmp1_hi] ! *(int*)&ax = hx;
+
+ .align 16
+.update9:
+ cmp counter,1
+ ble,pt %icc,.cont9
+ sub px,stridex,%o5
+
+ ld [%fp+tmp_counter],%l1
+
+ stx %o5,[%fp+tmp_px]
+ add py,stridey,%o5
+
+ add %l1,counter,counter
+ stx %o5,[%fp+tmp_py]
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont9
+ or %g0,1,counter
+
+ .align 16
+.update10:
+ cmp counter,0
+ ble,pt %icc,.cont10
+ fmovd DONE,%f30
+
+ ld [%fp+tmp_counter],%o2
+ sub px,stridex,%o5
+
+ sub %o5,stridex,%o5
+ stx py,[%fp+tmp_py]
+
+ add %o2,counter,counter
+ sub %o5,stridex,%o5
+ stx %o5,[%fp+tmp_px]
+
+ st counter,[%fp+tmp_counter]
+ ba .cont10
+ or %g0,0,counter
+
+ .align 16
+.update11:
+ cmp counter,0
+ ble,pt %icc,.cont11
+ fmovd DONE,%f30
+
+ ld [%fp+tmp_counter],%o2
+ sub px,stridex,%o5
+
+ sub %o5,stridex,%o5
+ stx py,[%fp+tmp_py]
+
+ add %o2,counter,counter
+ sub %o5,stridex,%o5
+ stx %o5,[%fp+tmp_px]
+
+ st counter,[%fp+tmp_counter]
+ ba .cont11
+ or %g0,0,counter
+
+ .align 16
+.update12:
+ cmp counter,3
+ ble,pt %icc,.cont12
+ add py,stridey,%o5
+
+ add %o5,stridey,%o5
+ stx px,[%fp+tmp_px]
+
+ add %o5,stridey,%o5
+ orcc %l2,%i2,%g0 ! if (x == 0);
+
+ bne,pt %icc,.nzero12 ! if (x == 0);
+ stx %o5,[%fp+tmp_py]
+.u12:
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont12
+ or %g0,3,counter
+.nzero12:
+ lda [%o5]%asi,%l1 ! ld hy;
+ cmp %l0,%g0 ! if (x >= 0);
+
+ lda [%o5+4]%asi,%l0 ! ld ly
+ bge,pt %icc,.pos12 ! if (x >= 0);
+ or %g0,%g0,%o5 ! yisint = 0;
+
+ and %l1,MASK_0x7fffffff,%i2 ! hy &= 0x7fffffff;
+
+ sra %i2,20,%i2 ! expy = hy >> 20;
+
+ cmp %i2,1076 ! if (expy >= 0x434);
+ bge .neg12 ! if (expy >= 0x434);
+ or %g0,2,%o5 ! yisint = 2;
+
+ cmp %i2,1023 ! if (expy < 0x3ff);
+ bl .neg12 ! if (expy < 0x3ff);
+ or %g0,0,%o5 ! yisint = 0;
+
+ cmp %i2,1043 ! if (expy <= (20 + 0x3ff));
+ ble .small12 ! if (expy <= (20 + 0x3ff));
+ sub %i2,1023,%i2 ! expy - 0x3ff;
+
+ sub %g0,%i2,%i2
+ add %i2,52,%i2 ! sh = (52 - (expy - 0x3ff);
+ srl %l0,%i2,%l1 ! i0 = (ly >> sh);
+
+ sll %l1,%i2,%l1 ! (i0 << sh);
+
+ srl %l0,%i2,%i2 ! i0 = (ly >> sh);
+ cmp %l1,%l0 ! if ((i0 << sh) == ly);
+
+ and %i2,1,%i2 ! i0 &= 1;
+
+ sub %g0,%i2,%i2
+ add %i2,2,%i2 ! i0 = 2 - i0;
+
+ move %icc,%i2,%o5 ! yisint = i0;
+
+ ba .neg12
+ nop
+.small12:
+ sub %g0,%i2,%i2
+ cmp %l0,%g0 ! if (ly != 0);
+
+ add %i2,20,%i2 ! sh = (20 - (expy - 0x3ff);
+ bne .neg12 ! if (ly != 0);
+ or %g0,0,%o5 ! yisint = 0;
+
+ srl %l1,%i2,%l0 ! i0 = (hy >> sh);
+
+ sll %l0,%i2,%l0 ! (i0 << sh);
+
+ srl %l1,%i2,%i2 ! i0 = (hy >> sh);
+ cmp %l0,%l1 ! if ((i0 << sh) == hy);
+
+ and %i2,1,%i2 ! i0 &= 1;
+
+ sub %g0,%i2,%i2
+ add %i2,2,%i2 ! i0 = 2 - i0;
+
+ move %icc,%i2,%o5 ! yisint = i0;
+.neg12:
+ cmp %o5,%g0
+ be .u12
+ nop
+.pos12:
+ sll %o5,11,%o5
+ cmp %l2,MASK_0x000fffff ! y0 = vis_fand(x, MMANT);
+
+ bg,pt %icc,.cont12 ! y0 = vis_fand(x, MMANT);
+ st %o5,[%fp+%o7]
+
+ std %f32,[%fp+tmp5];
+ std %f54,[%fp+tmp6];
+ ldd [%fp+tmp0_hi],%f32
+ ldd [%fp+tmp_mant],%f54
+
+ or %g0,1074,%o5
+ fand %f32,%f54,%f32 ! y0 = vis_fand(x, MMANT);
+
+ sll %o5,20,%o5
+ fxtod %f32,%f32 ! ax = (double) ((long long *) & y0)[0]
+
+ std %f32,[%fp+tmp0_hi] ! exp = ((unsigned int*) & ax)[0];
+ fand %f32,%f54,%f32 ! x = vis_fand(ax, MMANT);
+
+ ld [%fp+tmp0_hi],%i2 ! exp = ((unsigned int*) & ax)[0];
+ for %f32,DONE,%f32 ! x = vis_for(x, DONE);
+
+ std %f32,[%fp+tmp0_hi];
+ sub %i2,%o5,%l2 ! exp -= (1023 + 51) << 20;
+ and MASK_0x000fffff,%i2,%i4 ! hx &= 0xfffff;
+ ldd [%fp+tmp5],%f32
+ or MASK_0x3ff00000,%i4,%i4 ! hx |= 0x3ff00000;
+ add %i4,2048,%i4 ! hx += 0x800;
+ ldd [%fp+tmp6],%f54
+ and %i4,-4096,%i4 ! hx &= 0xfffff000;
+
+ ba .cont12
+ st %i4,[%fp+tmp1_hi] ! *(int*)&ax = hx;
+
+ .align 16
+.update13:
+ cmp counter,2
+ ble,pt %icc,.cont13
+ sub px,stridex,%o5
+
+ ld [%fp+tmp_counter],%l1
+
+ stx %o5,[%fp+tmp_px]
+ add py,stridey,%o5
+
+ add %l1,counter,counter
+ stx %o5,[%fp+tmp_py]
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont13
+ or %g0,2,counter
+
+ .align 16
+.update14:
+ cmp counter,1
+ ble,pt %icc,.cont14
+ fmovd DONE,%f30
+
+ ld [%fp+tmp_counter],%o2
+ sub px,stridex,%o5
+
+ sub %o5,stridex,%o5
+ stx py,[%fp+tmp_py]
+
+ add %o2,counter,counter
+ sub %o5,stridex,%o5
+ stx %o5,[%fp+tmp_px]
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont14
+ or %g0,1,counter
+
+ .align 16
+.update15:
+ cmp counter,1
+ ble,pt %icc,.cont15
+ fmovd DONE,%f30
+
+ sub px,stridex,%o5
+
+ ld [%fp+tmp_counter],%o2
+ sub %o5,stridex,%o5
+ stx py,[%fp+tmp_py]
+
+ add %o2,counter,counter
+ sub %o5,stridex,%o5
+ stx %o5,[%fp+tmp_px]
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont15
+ or %g0,1,counter
+
+ .align 16
+.spec0:
+ lda [py+4]%asi,%o5 ! ld ly;
+ lda [px]%asi,%f16 ! y0 = *px;
+ lda [px+4]%asi,%f17 ! y0 = *px;
+ orcc %l1,%o5,%g0 ! if (hy | ly) != 0;
+
+ bne,pn %icc,1f
+ sethi %hi(0x7ff00000),%o5
+
+ st DONE_HI,[pz]
+ ba .update_point
+ st DONE_LO,[pz+4]
+1:
+ cmp %l3,%o5 ! if (hx > 0x7ff00000);
+ bgu,a,pn %icc,6f ! if (hx > 0x7ff00000);
+ fmuld %f16,%f16,%f16 ! *pz = y0 * y0;
+
+ bne,pt %icc,2f ! if (hx != 0x7ff00000);
+ orcc %l3,%i2,%g0 ! if (hx | lx) != 0;
+
+ cmp %i2,0 ! if (lx) != 0;
+ bne,pn %icc,5f ! if (lx) != 0;
+ srl %o2,31,%o5 ! sy;
+
+ st %l3,[pz] ! ((int*)pz)[0] = hx;
+ ba 3f
+ cmp %o5,0 ! if (sy == 0);
+2:
+ bne,pt %icc,4f ! if (hx | lx) != 0;
+ srl %l0,31,%o5 ! sx;
+
+ st %l3,[pz] ! ((int*)pz)[0] = hx;
+ srl %o2,31,%o5 ! sy;
+ cmp %o5,0 ! if (sy == 0);
+3:
+ be,pt %icc,.update_point ! if (sy == 0);
+ st %i2,[pz+4] ! ((int*)pz)[1] = lx;
+
+ ld [pz],%f16 ! *pz;
+ ld [pz+4],%f17 ! *pz;
+ fdivd DONE,%f16,%f16 ! *pz = DONE / *pz;
+
+ st %f16,[pz]
+ ba .update_point
+ st %f17,[pz+4]
+4:
+ cmp %o5,0 ! if (sx == 0);
+ bne,a,pt %icc,1f
+ nop
+
+ st DONE_HI,[pz] ! *pz = DONE;
+ ba .update_point
+ st DONE_LO,[pz+4] ! *pz = DONE;
+1:
+ fdivd DZERO,DZERO,%f16 ! *pz = DZERO / DZERO;
+ st %f16,[pz]
+ ba .update_point
+ st %f17,[pz+4]
+5:
+ fmuld %f16,%f16,%f16 ! *pz = y0 * y0;
+6:
+ st %f16,[pz]
+ ba .update_point
+ st %f17,[pz+4]
+
+ .align 16
+.spec1:
+ lda [px]%asi,%f14 ! y0 = *px;
+ lda [px+4]%asi,%f15 ! y0 = *px;
+ sethi %hi(0x7ff00000),%o5
+ lda [py+4]%asi,%i4 ! ld ly;
+ srl %o2,31,%o2 ! sy
+ cmp %l3,%o5 ! if (hx >= 0x7ff00000);
+ bcc,pn %icc,3f
+ nop
+
+ cmp %l1,%o5 ! if (hy > 0x7ff00000);
+ bgu,a,pt %icc,.spec1_nan_inf ! if (hy > 0x7ff00000);
+ lda [py]%asi,%f16 ! ld y
+
+ bne,a,pt %icc,1f ! if (hy != 0x7ff00000);
+ cmp %i2,0 ! if (lx != 0);
+
+ ba 2f ! if (hy == 0x7ff00000);
+ cmp %i4,0 ! if (ly != 0);
+1:
+ bne,pt %icc,7f ! if (lx != 0);
+ nop
+
+ cmp %l3,0 ! if (hx == 0);
+ be,a,pt %icc,6f ! if (hx == 0);
+ st %l3,[pz] ! ((int*)pz)[0] = hx;
+
+ cmp %l3,MASK_0x3ff00000 ! if (hx == 0x3ff00000);
+ be,a,pn %icc,6f ! if (hx == 0x3ff00000);
+ st %l3,[pz] ! ((int*)pz)[0] = hx;
+
+ ba 5f
+ cmp %l3,%o5 ! if (hx != 0x7ff00000);
+3:
+ bgu,a,pt %icc,.spec1_nan_inf ! if (hx > 0x7ff00000);
+ lda [py]%asi,%f16 ! ld y
+
+ bne,a,pn %icc,1f ! if (hx != 0x7ff00000);
+ cmp %l1,%o5 ! if (hy > 0x7ff00000);
+
+ cmp %i2,0 ! if (lx != 0);
+ bne,a,pt %icc,.spec1_nan_inf ! if (lx != 0);
+ lda [py]%asi,%f16 ! ld y
+
+ cmp %l1,%o5 ! if (hy > 0x7ff00000);
+1:
+ bgu,a,pt %icc,.spec1_nan_inf ! if (hy > 0x7ff00000);
+ lda [py]%asi,%f16 ! ld y
+
+ bne,pn %icc,3f ! if (hy != 0x7ff00000);
+ nop
+
+ cmp %i4,0 ! if (ly != 0);
+2:
+ bne,a,pn %icc,.spec1_nan_inf ! if (ly != 0);
+ lda [py]%asi,%f16 ! ld y
+
+ cmp %l3,MASK_0x3ff00000 ! if (hx != 0x3ff00000);
+ bne,pn %icc,1f ! if (hx != 0x3ff00000);
+ cmp %i2,0 ! if (lx != 0);
+
+ bne,pn %icc,1f ! if (lx != 0);
+ nop
+
+ ld [py],%f16 ! ld y
+ ld [py+4],%f17 ! ld y
+ fzero %f14
+ fmuld %f16,%f14,%f14 ! *pz = *py * 0.0;
+ st %f14,[pz]
+ ba .update_point
+ st %f15,[pz+4]
+1:
+ sub %l3,MASK_0x3ff00000,%o7 ! (hx - 0x3ff00000);
+ srlx %o7,63,%l2 ! (hx - 0x3ff00000) >> 63;
+
+ cmp %l2,%o2 ! if ((hx < 0x3ff00000) == sy)
+ be,a,pn %icc,1f ! if ((hx < 0x3ff00000) == sy)
+ st %l1,[pz] ! ((int*)pz)[0] = hy;
+
+ st DZERO_HI,[pz] ! *pz = DZERO;
+ ba .update_point
+ st DZERO_LO,[pz+4] ! *pz = DZERO;
+1:
+ ba .update_point
+ st %i4,[pz+4] ! ((int*)pz)[0] = ly;
+3:
+ cmp %o0,1086 ! if (expy >= 0x43e);
+ bge,pn %icc,4f ! if (expy >= 0x43e)
+ nop
+
+ srl %l0,31,%l0 ! sx;
+ cmp %l0,0 ! if (sx == 0);
+ be,pn %icc,2f
+ or %g0,0,%l4
+
+ cmp %o0,1076 ! if (expy >= 0x434);
+
+ bge,pn %icc,2f ! if (expy >= 0x434);
+ or %g0,2,%l4 ! yisint = 2;
+
+ cmp %o0,1023 ! if (expy < 0x3ff);
+ bl,a,pn %icc,2f ! if (expy < 0x3ff);
+ or %g0,0,%l4 ! yisint = 0;
+
+ cmp %o0,1043 ! if (expy <= (20 + 0x3ff));
+ ble,pn %icc,1f
+ sub %o0,1023,%l2 ! (expy - 0x3ff);
+
+ sub %g0,%l2,%l2 ! 0 - (expy - 0x3ff);
+ add %l2,52,%l2 ! sh = 52 - (expy - 0x3ff);
+ srl %i4,%l2,%o0 ! i0 = ly >> sh;
+ sll %o0,%l2,%l2 ! i0 << sh;
+ cmp %l2,%i4 ! if ((i0 << sh) != ly);
+ bne,a,pn %icc,2f ! if ((i0 << sh) != ly);
+ or %g0,0,%l4 ! yisint = 0;
+
+ and %o0,1,%o0 ! i0 &= 1;
+ sub %g0,%o0,%o0
+
+ ba 2f
+ add %o0,2,%l4 ! yisint = 2 - (i0 & 1);
+1:
+ cmp %i4,0 ! if (ly != 0)
+ bne,a,pn %icc,2f ! if (ly != 0)
+ or %g0,0,%l4 ! yisint = 0;
+
+ sub %o0,1023,%l2 ! (expy - 0x3ff);
+ sub %g0,%l2,%l2 ! 0 - (expy - 0x3ff);
+ add %l2,20,%l2 ! sh = 20 - (expy - 0x3ff);
+ srl %l1,%l2,%o0 ! i0 = hy >> sh;
+ sll %o0,%l2,%l2 ! i0 << sh;
+ cmp %l2,%l1 ! if ((i0 << sh) != hy);
+ bne,a,pn %icc,2f ! if ((i0 << sh) != hy);
+ or %g0,0,%l4 ! yisint = 0;
+
+ and %o0,1,%o0 ! i0 &= 1;
+ sub %g0,%o0,%o0
+ add %o0,2,%l4 ! yisint = 2 - (i0 & 1);
+2:
+ cmp %o2,0 ! if (sy == 0);
+ sll %l4,31,%l4 ! yisint << 31;
+ be,pt %icc,1f ! if (sy == 0);
+ add %l3,%l4,%l3 ! hx += yisint << 31;
+
+ or %g0,%l4,%l3 ! hx = yisint << 31;
+ or %g0,0,%i2 ! lx = 0;
+1:
+ st %l3,[pz] ! ((int*)pz)[0] = hx;
+ ba .update_point
+ st %i2,[pz+4] ! ((int*)pz)[1] = lx;
+4:
+ cmp %i2,0 ! if (lx != 0);
+ bne,pn %icc,7f ! if (lx != 0);
+ nop
+
+ cmp %l3,%o5 ! if (hx != 0x7ff00000);
+5:
+ bne,pn %icc,7f ! if (hx != 0x7ff00000);
+ nop
+
+ st %l3,[pz] ! ((int*)pz)[0] = hx;
+6:
+ cmp %o2,0 ! if (sy == 0);
+ be,pt %icc,.update_point
+ st %i2,[pz+4] ! ((int*)pz)[1] = lx;
+
+ ld [pz],%f14 ! ld *pz;
+ ld [pz+4],%f15 ! ld *pz;
+ fdivd DONE,%f14,%f14 ! *pz = DONE / *pz;
+ st %f14,[pz]
+ ba .update_point
+ st %f15,[pz+4]
+7:
+ sub %l3,MASK_0x3ff00000,%o7 ! hx - 0x3ff00000;
+ srlx %o7,63,%l2 ! (hx - 0x3ff00000) >> 63;
+ cmp %l2,%o2 ! if (hx < 0x3ff00000) == sy);
+ be,a,pn %icc,1f ! if (hx < 0x3ff00000) == sy);
+ ldd [EXPTBL-ind_HUGE],%f14 ! y0 = _HUGE;
+
+ ldd [EXPTBL-ind_TINY],%f14 ! y0 = _TINY;
+1:
+ fmuld %f14,%f14,%f14 ! *pz = y0 * y0
+
+ st %f14,[pz]
+ ba .update_point
+ st %f15,[pz+4]
+
+ .align 16
+.spec1_nan_inf:
+ lda [py+4]%asi,%f17 ! ld y
+ fmuld %f14,%f16,%f16 ! *pz = *px * *py
+ st %f16,[pz]
+ ba .update_point
+ st %f17,[pz+4]
+
+
+ .align 16
+.update_point:
+ add px,stridex,px
+ ba .begin1
+ add py,stridey,py
+
+ .align 64
+.stridex_zero:
+
+ sra stridez,0,stridez
+ ld [%i1],%f18 ! y0 = px[0];
+ ld [%i1+4],%f19 ! y0 = px[0];
+
+ sra %i4,0,stridey
+ sethi %hi(0xffc00),MASK_0x000fffff
+ ldd [%l0+80],%f12 ! ld MMANT
+
+ sllx stridez,3,stridez
+ add MASK_0x000fffff,0x3ff,MASK_0x000fffff
+ ldd [%l0+8],%f56 ! ld DONE
+
+ sllx stridey,3,stridey
+ ldd [%l0+88],%f14 ! ld MROUND
+
+ ldd [%l0+96],%f16 ! ld MHI20
+ cmp %o0,MASK_0x000fffff ! if (exp <= 0xfffff)
+
+ bg,pt %icc,1f
+ srl %o0,20,%o0 ! exp = (exp >> 20);
+
+ fxtod %f18,%f18 ! y0 = (double) ((long long *) & y0)[0];
+ std %f18,[%fp+tmp0_hi] ! exp = ((unsigned int*) & y0)[0];
+ or %g0,1074,%i2
+ ld [%fp+tmp0_hi],%o0 ! exp = ((unsigned int*) & y0)[0];
+ srl %o0,20,%o0 ! exp = (exp >> 20);
+ sub %o0,%i2,%o0 ! exp -= (1023 + 51) << 20;
+1:
+ ldd [%l0+24],MHI32
+ sub %o0,2046,%l5 ! exp = exp - 2046;
+ fand %f18,%f12,%f18 ! x = vis_fand(y0, MMANT);
+
+ ldd [%l0+48],%f10 ! ld KA1
+ for %f18,%f56,%f18 ! x = vis_for(x, DONE);
+
+ ldd [EXPTBL-ind_HI],%f28 ! ld KA1_HI
+ fpadd32 %f18,%f14,%f44 ! ax = vis_fpadd32(x, MROUND);
+
+ ldd [%l0+32],%f46 ! ld KA5
+ fand %f44,%f16,%f60 ! ax = vis_fand(ax, MHI20);
+
+ std %f60,[%fp+tmp0_hi] ! itmp0 = (hx >> 20);
+ faddd %f18,%f60,%f50 ! ux = x + ax;
+
+ ldd [EXPTBL-ind_LO],%f52 ! ld KA1_LO
+ fsubd %f18,%f60,%f30 ! u = x - ax;
+
+ ld [%fp+tmp0_hi],%i2 ! itmp0 = (hx >> 20);
+ fdivd %f56,%f50,%f56 ! yd = DONE / ux;
+ fand %f50,MHI32,%f50 ! ux = vis_fand(ux, MHI32);
+
+ srl %i2,20,%l3 ! itmp0 = (hx >> 20);
+ ldd [%l0+40],%f26 ! ld KA3
+
+ srl %i2,8,%i2 ! i = (hx >> 8);
+ add %l5,%l3,%l5 ! exp += itmp0;
+
+ and %i2,4080,%o3 ! i = i & 0xff0;
+ sll %l5,8,%l3 ! itmp0 = exp << 8;
+ st %l3,[%fp+tmp1_hi] ! (double)itmp0;
+ fsubd %f50,%f60,%f60 ! dtmp0 = (ux - ax);
+
+ add %o3,8,%i2
+ ldd [%o3+LOGTBL],%f58 ! y = *(double *)((char*)__mt_constlog2 + i);
+
+ ldd [%i2+LOGTBL],%f20 ! dtmp0 = *(double *)((char*)__mt_constlog2 + i + 8);
+
+ ld [%fp+tmp1_hi],%f8 ! (double)itmp0;
+
+ fitod %f8,%f62 ! (double)itmp0;
+
+ faddd %f58,%f62,%f22 ! y += (double)itmp0;
+
+ fsubd %f18,%f60,%f62 ! s_l = (x - dtmp0);
+ fmuld %f30,%f56,%f16 ! s = u * yd;
+
+ fmuld %f10,%f56,%f8 ! dtmp0 = KA1 * yd;
+ fand %f16,MHI32,%f58 ! s_h = vis_fand(s, MHI32);
+
+ ldd [%l0+56],HTHRESH
+ fmuld %f16,%f16,%f18 ! y = s * s;
+
+ ldd [%l0+64],LTHRESH
+ fmuld %f58,%f50,%f60 ! dtmp0 = s_h * ux;
+
+ ldd [%l0+72],XKB4
+ fmuld %f28,%f58,%f50 ! yd = KA1_HI * s_h;
+
+ ldd [EXPTBL-ind_KB1],XKB1
+ fmuld %f46,%f18,%f56 ! dtmp8 = KA5 * y;
+
+ ldd [EXPTBL-ind_KB2],XKB2
+ fmuld %f58,%f62,%f46 ! dtmp1 = s_h * s_l;
+ fsubd %f30,%f60,%f62 ! s_l = u - dtmp0;
+
+ ldd [EXPTBL-ind_KB3],XKB3
+ fmuld %f52,%f58,%f10 ! dtmp1 = KA1_LO * s_h;
+ faddd %f22,%f50,%f28 ! m_h = y + yd;
+
+ ldd [EXPTBL-ind_KB5],XKB5
+ faddd %f56,%f26,%f58 ! dtmp8 = dtmp8 + KA3;
+
+ add EXPTBL,8,EXPTBL_P8
+ fsubd %f62,%f46,%f46 ! s_l -= dtmp1;
+
+ fsubd %f28,%f22,%f60 ! dtmp2 = m_h - y;
+
+ st %g0,[%fp+tmp0_lo] ! *((int*)&dtmp0 + 1) = 0;
+ faddd %f20,%f10,%f56 ! dtmp0 += dtmp1;
+
+ st %g0,[%fp+tmp1_lo] ! *((int*)&dtmp0 + 1) = 0;
+ fmuld %f58,%f18,%f18 ! dtmp8 = dtmp8 * y;
+
+ st %g0,[%fp+tmp2_lo] ! *((int*)&dtmp0 + 1) = 0;
+ fmuld %f8,%f46,%f62 ! s_l = dtmp0 * s_l;
+
+ fsubd %f60,%f50,%f10 ! dtmp2 -= yd;
+
+ fmuld %f18,%f16,%f58 ! s = dtmp8 * s;
+
+ fsubd %f10,%f62,%f46 ! dtmp2 -= s_l;
+
+ fsubd %f58,%f46,%f50 ! y = s - dtmp2;
+
+ faddd %f50,%f56,%f60 ! y += dtmp0;
+
+ faddd %f60,%f28,%f18 ! dtmp0 = y + m_h;
+
+ fand %f18,MHI32,s_h ! s_h = vis_fand(dtmp0, MHI32);
+
+ fsubd s_h,%f28,%f62 ! dtmp0 = (s_h - m_h);
+
+ fsubd %f60,%f62,yr ! yr = y - dtmp0;
+
+.xbegin:
+ ld [%fp+tmp_counter],counter
+ ldx [%fp+tmp_py],py
+ st %g0,[%fp+tmp_counter]
+.xbegin1:
+ subcc counter,1,counter
+ bneg,pn %icc,.end
+ nop
+
+ lda [py]0x82,%l2 ! (Y0_3) hy = *py;
+
+ lda [py]0x82,%f18 ! (Y0_3) yd = *py;
+ lda [py+4]%asi,%f19 ! (Y0_3) yd = *py;
+
+ sra %l2,20,%l5 ! (Y0_3) expy = hy >> 20;
+
+ and %l5,0x7ff,%l5 ! (Y0_3) expy &= 0x7ff;
+
+ cmp %l5,959 ! (Y0_3) if (expy < 0x3fb);
+
+ bl,pn %icc,.xspec0 ! (Y0_3) if (expy < 0x3fb);
+ nop
+
+ cmp %l5,1086 ! (Y0_2) if (expy >= 0x43e);
+
+ bge,pn %icc,.xspec1 ! (Y0_2) if (expy >= 0x43e);
+ nop
+
+ add py,stridey,py ! y += stridey;
+ fand %f18,MHI32,%f12 ! (Y0_2) s = vis_fand(yd, MHI32);
+
+ lda [py]0x82,%l5 ! (Y1_2) hy = *py;
+
+ lda [py]0x82,%f10 ! (Y1_2) yd = *py;
+ lda [py+4]%asi,%f11 ! (Y1_2) yd = *py;
+
+ sra %l5,20,%l5 ! (Y1_2) expy = hy >> 20;
+
+ and %l5,0x7ff,%l5 ! (Y1_2) expy &= 0x7ff;
+
+ cmp %l5,959 ! (Y1_2) if (expy < 0x3fb);
+ add py,stridey,py ! y += stridey;
+ fmuld s_h,%f12,%f50 ! (Y0_2) s = s_h * s;
+ fsubd %f18,%f12,%f56 ! (Y0_2) dtmp0 = (yd - s);
+
+ fmuld %f18,yr,%f26 ! (Y0_2) dtmp1 = yd * yr;
+ bl,pn %icc,.xupdate0 ! (Y1_2) if (expy < 0x3fb);
+ nop
+.xcont0:
+ cmp %l5,1086 ! (Y1_2) if (expy >= 0x43e);
+ bge,pn %icc,.xupdate1 ! (Y0_2) if (expy >= 0x43e);
+ nop
+.xcont1:
+ fmuld %f56,s_h,%f58 ! (Y0_2) dtmp0 *= s_h;
+ fand %f10,MHI32,%f12 ! (Y1_2) s = vis_fand(yd, MHI32);
+
+ fcmped %fcc0,%f50,HTHRESH ! (Y0_2) if (s > HTHRESH);
+
+ faddd %f58,%f26,%f48 ! (Y0_2) yd = dtmp0 + dtmp1;
+
+ lda [py]0x82,%l5 ! (Y2_2) hy = *py;
+ fmovdg %fcc0,HTHRESH,%f50 ! (Y0_2) s = HTHRESH;
+
+ fmovdg %fcc0,DZERO,%f48 ! (Y0_2) yd = DZERO;
+
+ fcmped %fcc1,%f50,LTHRESH ! (Y0_2) if (s < LTHRESH);
+
+ lda [py]0x82,%f14 ! (Y2_2) yd = *py;
+ lda [py+4]%asi,%f15 ! (Y2_2) yd = *py;
+
+ sra %l5,20,%l5 ! (Y2_2) expy = hy >> 20;
+
+ fmovdl %fcc1,DZERO,%f48 ! (Y0_2) yd = DZERO;
+
+ add py,stridey,py ! y += stridey;
+ and %l5,0x7ff,%l5 ! (Y2_2) expy &= 0x7ff;
+ fmovdl %fcc1,LTHRESH,%f50 ! (Y0_2) s = LTHRESH;
+
+ cmp %l5,959 ! (Y2_2) if (expy < 0x3fb);
+
+ fmuld s_h,%f12,%f16 ! (Y1_2) s = s_h * s;
+ bl,pn %icc,.xupdate2 ! (Y2_2) if (expy < 0x3fb);
+ fsubd %f10,%f12,%f56 ! (Y1_2) dtmp0 = (yd - s);
+.xcont2:
+ cmp %l5,1086 ! (Y2_2) if (expy >= 0x43e);
+ fmuld %f10,yr,%f8 ! (Y1_2) dtmp1 = yd * yr;
+ faddd %f50,%f48,%f28 ! (Y0_2) dtmp0 = (s + yd);
+
+ lda [py]0x82,%l5 ! (Y0_3) hy = *py;
+ bge,pn %icc,.xupdate3 ! (Y2_2) if (expy >= 0x43e);
+ nop
+.xcont3:
+ fmuld %f56,s_h,%f58 ! (Y1_2) dtmp0 *= s_h;
+ fand %f14,MHI32,%f44 ! (Y2_2) s = vis_fand(yd, MHI32);
+
+ fcmped %fcc0,%f16,HTHRESH ! (Y1_2) if (s > HTHRESH);
+
+ fdtoi %f28,%f3 ! (Y0_2) u = (double)(int)dtmp0;
+
+ st %f3,[%fp+tmp3] ! (Y0_2) ind = (int)dtmp0;
+
+ faddd %f58,%f8,%f10 ! (Y1_2) yd = dtmp0 + dtmp1;
+
+ lda [py]0x82,%f18 ! (Y0_3) yd = *py;
+ lda [py+4]%asi,%f19 ! (Y0_3) yd = *py;
+ fmovdg %fcc0,HTHRESH,%f16 ! (Y1_2) s = HTHRESH;
+
+ fitod %f3,%f58 ! (Y0_2) u = (double)(int)dtmp0;
+
+ fmovdg %fcc0,DZERO,%f10 ! (Y1_2) yd = DZERO;
+
+ sra %l5,20,%l5 ! (Y0_3) expy = hy >> 20;
+ fcmped %fcc1,%f16,LTHRESH ! (Y1_2) if (s < LTHRESH);
+
+ and %l5,0x7ff,%l5 ! (Y0_3) expy &= 0x7ff;
+ fsubd %f50,%f58,%f54 ! (Y0_2) y = s - u;
+
+ cmp %l5,959 ! (Y0_3) if (expy < 0x3fb);
+
+ bl,pn %icc,.xupdate4 ! (Y0_3) if (expy < 0x3fb);
+ nop
+.xcont4:
+ fmovdl %fcc1,DZERO,%f10 ! (Y1_2) yd = DZERO;
+
+ fmovdl %fcc1,LTHRESH,%f16 ! (Y1_2) s = LTHRESH;
+
+ faddd %f54,%f48,%f54 ! (Y0_2) y = y + yd;
+
+ ld [%fp+tmp3],%o2 ! (Y0_2) ind = (int)dtmp0;
+
+
+ fsubd %f14,%f44,%f50 ! (Y2_1) dtmp0 = (yd - s);
+
+ cmp %l5,1086 ! (Y0_2) if (expy >= 0x43e);
+
+ fmuld s_h,%f44,%f44 ! (Y2_1) s = s_h * s;
+ bge,pn %icc,.xupdate5 ! (Y0_2) if (expy >= 0x43e);
+ faddd %f16,%f10,%f22 ! (Y1_1) dtmp0 = (s + yd);
+.xcont5:
+ sra %o2,8,%o0 ! (Y0_1) ind >>= 8;
+ add py,stridey,py ! y += stridey;
+ fmuld %f14,yr,%f20 ! (Y2_1) dtmp1 = yd * yr;
+
+ add %o0,1021,%i1 ! (Y0_1) eflag = (ind + 1021);
+ fmuld XKB5,%f54,%f48 ! (Y0_1) dtmp0 = XKB5 * y;
+
+ sub %g0,%o0,%o3 ! (Y0_1) gflag = (1022 - ind);
+ fmuld %f50,s_h,%f52 ! (Y2_1) dtmp0 *= s_h;
+ fand %f18,MHI32,%f12 ! (Y0_2) s = vis_fand(yd, MHI32);
+
+ sra %i1,31,%o1 ! (Y0_1) eflag = eflag >> 31;
+ add %o3,1022,%l0 ! (Y0_1) gflag = (1022 - ind);
+ fcmped %fcc0,%f44,HTHRESH ! (Y2_1) if (s > HTHRESH);
+
+ sra %l0,31,%o4 ! (Y0_1) gflag = gflag >> 31;
+ and %o1,54,%i4 ! (Y0_1) itmp0 = 54 & eflag;
+ fdtoi %f22,%f4 ! (Y1_1) u = (double)(int)dtmp0;
+
+ add %o0,%i4,%i2 ! (Y0_1) ind = ind + itmp0;
+ and %o4,52,%l3 ! (Y0_1) itmp1 = 52 & gflag;
+ st %f4,[%fp+tmp4] ! (Y1_1) ind = (int)dtmp0;
+ faddd %f48,XKB4,%f60 ! (Y0_1) dtmp1 = dtmp0 + XKB4;
+
+ sub %i2,%l3,%l2 ! (Y0_1) ind = ind - itmp1;
+ sub %o1,%o4,%o4 ! (Y0_1) ind = eflag - gflag;
+ faddd %f52,%f20,%f62 ! (Y2_1) yd = dtmp0 + dtmp1;
+
+ sll %l2,20,%o3 ! (Y0_1) ind <<= 20;
+ lda [py]0x82,%l5 ! (Y1_2) hy = *py;
+ fmovdg %fcc0,HTHRESH,%f44 ! (Y2_1) s = HTHRESH;
+
+ st %o3,[%fp+tmp0_hi] ! (Y0_1) *(int*)&dtmp0 = ind;
+ fitod %f4,%f48 ! (Y1_1) u = (double)(int)dtmp0;
+
+ fmuld %f60,%f54,%f60 ! (Y0_1) dtmp2 = dtmp1 * y;
+
+ lda [py]0x82,%f20 ! (Y1_2) yd = *py;
+ lda [py+4]%asi,%f21 ! (Y1_2) yd = *py;
+ fmovdg %fcc0,DZERO,%f62 ! (Y2_1) yd = DZERO;
+
+ fcmped %fcc1,%f44,LTHRESH ! (Y2_1) if (s < LTHRESH);
+
+ fsubd %f16,%f48,%f50 ! (Y1_1) y = s - u;
+
+ faddd %f60,XKB3,%f60 ! (Y0_1) dtmp3 = dtmp2 + XKB3;
+
+ sra %l5,20,%l5 ! (Y1_2) expy = hy >> 20;
+
+ fmovdl %fcc1,DZERO,%f62 ! (Y2_1) yd = DZERO;
+
+ and %l5,0x7ff,%l5 ! (Y1_2) expy &= 0x7ff;
+ fmovdl %fcc1,LTHRESH,%f44 ! (Y2_1) s = LTHRESH;
+
+ cmp %l5,959 ! (Y1_2) if (expy < 0x3fb);
+ fmuld %f60,%f54,%f48 ! (Y0_1) dtmp4 = dtmp3 * y;
+ faddd %f50,%f10,%f52 ! (Y1_1) y = y + yd;
+
+ ld [%fp+tmp4],%o1 ! (Y1_1) ind = (int)dtmp0;
+
+ add py,stridey,py ! y += stridey;
+ fmuld s_h,%f12,%f50 ! (Y0_2) s = s_h * s;
+ fsubd %f18,%f12,%f56 ! (Y0_2) dtmp0 = (yd - s);
+
+ fmuld %f18,yr,%f26 ! (Y0_2) dtmp1 = yd * yr;
+ bl,pn %icc,.xupdate6 ! (Y1_2) if (expy < 0x3fb);
+ faddd %f44,%f62,%f28 ! (Y2_1) dtmp0 = (s + yd);
+.xcont6:
+ sra %o1,8,%o3 ! (Y1_1) ind >>= 8;
+ cmp %l5,1086 ! (Y1_2) if (expy >= 0x43e);
+ fmuld XKB5,%f52,%f22 ! (Y1_1) dtmp0 = XKB5 * y;
+ faddd %f48,XKB2,%f14 ! (Y0_1) dtmp5 = dtmp4 + XKB2;
+
+ add %o3,1021,%o0 ! (Y1_1) eflag = (ind + 1021);
+ bge,pn %icc,.xupdate7 ! (Y0_2) if (expy >= 0x43e);
+ nop
+.xcont7:
+ sub %g0,%o3,%i2 ! (Y1_1) gflag = (1022 - ind);
+ fmuld %f56,s_h,%f58 ! (Y0_2) dtmp0 *= s_h;
+ fand %f20,MHI32,%f12 ! (Y1_2) s = vis_fand(yd, MHI32);
+
+ sra %o0,31,%l3 ! (Y1_1) eflag = eflag >> 31;
+ add %i2,1022,%l2 ! (Y1_1) gflag = (1022 - ind);
+ fcmped %fcc0,%f50,HTHRESH ! (Y0_2) if (s > HTHRESH);
+
+ sra %l2,31,%o7 ! (Y1_1) gflag = gflag >> 31;
+ and %l3,54,%i1 ! (Y1_1) itmp0 = 54 & eflag;
+ fdtoi %f28,%f3 ! (Y2_1) u = (double)(int)dtmp0;
+
+ add %o3,%i1,%l0 ! (Y1_1) ind = ind + itmp0;
+ and %o7,52,%l1 ! (Y1_1) itmp1 = 52 & gflag;
+ st %f3,[%fp+ind_buf] ! (Y2_1) ind = (int)dtmp0;
+ faddd %f22,XKB4,%f60 ! (Y1_1) dtmp1 = dtmp0 + XKB4;
+
+ sub %l0,%l1,%i4 ! (Y1_1) ind = ind - itmp1;
+ sub %l3,%o7,%o7 ! (Y1_1) ind = eflag - gflag;
+ faddd %f58,%f26,%f48 ! (Y0_2) yd = dtmp0 + dtmp1;
+
+ sll %i4,20,%i2 ! (Y1_1) ind <<= 20;
+ lda [py]0x82,%l5 ! (Y2_2) hy = *py;
+ fmovdg %fcc0,HTHRESH,%f50 ! (Y0_2) s = HTHRESH;
+
+ st %i2,[%fp+tmp1_hi] ! (Y1_1) *(int*)&dtmp0 = ind;
+ fitod %f3,%f18 ! (Y2_1) u = (double)(int)dtmp0;
+
+ fmuld %f60,%f52,%f60 ! (Y1_1) dtmp2 = dtmp1 * y;
+
+ fmuld %f14,%f54,%f56 ! (Y0_1) dtmp6 = dtmp5 * y;
+ fmovdg %fcc0,DZERO,%f48 ! (Y0_2) yd = DZERO;
+
+ fcmped %fcc1,%f50,LTHRESH ! (Y0_2) if (s < LTHRESH);
+
+ lda [py]0x82,%f26 ! (Y2_2) yd = *py;
+ lda [py+4]%asi,%f27 ! (Y2_2) yd = *py;
+ fsubd %f44,%f18,%f18 ! (Y2_1) y = s - u;
+
+ faddd %f60,XKB3,%f44 ! (Y1_1) dtmp3 = dtmp2 + XKB3;
+
+ sra %l5,20,%l5 ! (Y2_2) expy = hy >> 20;
+ and %o2,255,%o2 ! (Y0_1) i = ind & 0xff;
+ faddd %f56,XKB1,%f58 ! (Y0_1) dtmp7 = dtmp6 + XKB1;
+
+ sll %o2,4,%l2 ! (Y0_1) i = i << 4;
+ fmovdl %fcc1,DZERO,%f48 ! (Y0_2) yd = DZERO;
+
+ add py,stridey,py ! y += stridey;
+ and %l5,0x7ff,%l5 ! (Y2_2) expy &= 0x7ff;
+ fmovdl %fcc1,LTHRESH,%f50 ! (Y0_2) s = LTHRESH;
+
+ cmp %l5,959 ! (Y2_2) if (expy < 0x3fb);
+ ldd [EXPTBL+%l2],%f22 ! (Y0_1) u = *(double*)((char*)__mt_constexp2 + i);
+ faddd %f18,%f62,%f18 ! (Y2_1) y = y + yd;
+ fmuld %f44,%f52,%f62 ! (Y1_1) dtmp4 = dtmp3 * y;
+
+ ld [%fp+ind_buf],%l1 ! (Y2_1) ind = (int)dtmp0;
+ fmuld %f58,%f54,%f54 ! (Y0_1) y = dtmp7 * y;
+
+ fmuld s_h,%f12,%f16 ! (Y1_2) s = s_h * s;
+ bl,pn %icc,.xupdate8 ! (Y2_2) if (expy < 0x3fb);
+ fsubd %f20,%f12,%f56 ! (Y1_2) dtmp0 = (yd - s);
+.xcont8:
+ cmp %l5,1086 ! (Y2_2) if (expy >= 0x43e);
+ fmuld %f20,yr,%f8 ! (Y1_2) dtmp1 = yd * yr;
+ faddd %f50,%f48,%f28 ! (Y0_2) dtmp0 = (s + yd);
+
+ sra %l1,8,%o2 ! (Y2_1) ind >>= 8;
+ lda [py]0x82,%l5 ! (Y0_3) hy = *py;
+ fmuld XKB5,%f18,%f20 ! (Y2_1) dtmp0 = XKB5 * y;
+ faddd %f62,XKB2,%f12 ! (Y1_1) dtmp5 = dtmp4 + XKB2;
+
+ add %o2,1021,%l0 ! (Y2_1) eflag = (ind + 1021);
+ bge,pn %icc,.xupdate9 ! (Y2_2) if (expy >= 0x43e);
+ nop
+.xcont9:
+ sub %g0,%o2,%l3 ! (Y2_1) gflag = (1022 - ind);
+ ldd [EXPTBL_P8+%l2],%f14 ! (Y0_1) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8);
+ fmuld %f56,s_h,%f58 ! (Y1_2) dtmp0 *= s_h;
+ fand %f26,MHI32,%f44 ! (Y2_2) s = vis_fand(yd, MHI32);
+
+ sra %l0,31,%o0 ! (Y2_1) eflag = eflag >> 31;
+ add %l3,1022,%i4 ! (Y2_1) gflag = (1022 - ind);
+ fmuld %f22,%f54,%f56 ! (Y0_1) dtmp1 = u * y;
+ fcmped %fcc0,%f16,HTHRESH ! (Y1_2) if (s > HTHRESH);
+
+ sra %i4,31,%o5 ! (Y2_1) gflag = gflag >> 31;
+ and %o0,54,%i2 ! (Y2_1) itmp0 = 54 & eflag;
+ fdtoi %f28,%f3 ! (Y0_2) u = (double)(int)dtmp0;
+
+ add %o2,%i2,%i1 ! (Y2_1) ind = ind + itmp0;
+ and %o5,52,%l2 ! (Y2_1) itmp1 = 52 & gflag;
+ st %f3,[%fp+tmp3] ! (Y0_2) ind = (int)dtmp0;
+ faddd %f20,XKB4,%f60 ! (Y2_1) dtmp1 = dtmp0 + XKB4;
+
+ sub %i1,%l2,%o3 ! (Y2_1) ind = ind - itmp1;
+ sub %o0,%o5,%o5 ! (Y2_1) ind = eflag - gflag;
+ faddd %f58,%f8,%f10 ! (Y1_2) yd = dtmp0 + dtmp1;
+
+ sll %o3,20,%l3 ! (Y2_1) ind <<= 20;
+ lda [py]0x82,%f28 ! (Y0_3) yd = *py;
+ lda [py+4]%asi,%f29 ! (Y0_3) yd = *py;
+ fmovdg %fcc0,HTHRESH,%f16 ! (Y1_2) s = HTHRESH;
+
+ st %l3,[%fp+tmp2_hi] ! (Y2_1) *(int*)&dtmp0 = ind;
+ fitod %f3,%f58 ! (Y0_2) u = (double)(int)dtmp0;
+
+ fmuld %f60,%f18,%f60 ! (Y2_1) dtmp2 = dtmp1 * y;
+ faddd %f14,%f56,%f20 ! (Y0_1) dtmp2 = dtmp0 + dtmp1;
+
+ fmuld %f12,%f52,%f56 ! (Y1_1) dtmp6 = dtmp5 * y;
+ fmovdg %fcc0,DZERO,%f10 ! (Y1_2) yd = DZERO;
+
+ sra %l5,20,%l5 ! (Y0_3) expy = hy >> 20;
+ fcmped %fcc1,%f16,LTHRESH ! (Y1_2) if (s < LTHRESH);
+
+ and %l5,0x7ff,%l5 ! (Y0_3) expy &= 0x7ff;
+ fsubd %f50,%f58,%f54 ! (Y0_2) y = s - u;
+
+ cmp %l5,959 ! (Y0_3) if (expy < 0x3fb);
+ faddd %f60,XKB3,%f60 ! (Y2_1) dtmp3 = dtmp2 + XKB3;
+
+ and %o1,255,%o1 ! (Y1_1) i = ind & 0xff;
+ bl,pn %icc,.xupdate10 ! (Y0_3) if (expy < 0x3fb);
+ faddd %f56,XKB1,%f8 ! (Y1_1) dtmp7 = dtmp6 + XKB1;
+.xcont10:
+ sll %o1,4,%l0 ! (Y1_1) i = i << 4;
+ fmovdl %fcc1,DZERO,%f10 ! (Y1_2) yd = DZERO;
+
+ nop
+ ba 1f
+ fmovdl %fcc1,LTHRESH,%f16 ! (Y1_2) s = LTHRESH;
+
+ .align 16
+1:
+ subcc counter,2,counter
+ ldd [EXPTBL+%l0],%f56 ! (Y1_1) u = *(double*)((char*)__mt_constexp2 + i);
+ fmuld %f60,%f18,%f58 ! (Y2_1) dtmp4 = dtmp3 * y;
+ faddd %f54,%f48,%f54 ! (Y0_2) y = y + yd;
+
+ fmuld %f8,%f52,%f60 ! (Y1_1) y = dtmp7 * y;
+ ld [%fp+tmp3],%o2 ! (Y0_2) ind = (int)dtmp0;
+ bneg,pn %icc,.xtail
+ faddd %f20,%f22,%f12 ! (Y0_1) u = dtmp2 + u;
+
+.xmain_loop:
+ cmp %l5,1086 ! (Y0_2) if (expy >= 0x43e);
+ add %o4,513,%o4 ! (Y0_0) ind += 513;
+ ldd [%fp+tmp0_hi],%f52 ! (Y0_0) *(int*)&dtmp0 = ind;
+ fsubd %f26,%f44,%f50 ! (Y2_1) dtmp0 = (yd - s);
+
+ fmuld s_h,%f44,%f44 ! (Y2_1) s = s_h * s;
+ sra %o2,8,%o0 ! (Y0_1) ind >>= 8;
+ bge,pn %icc,.xupdate11 ! (Y0_2) if (expy >= 0x43e);
+ faddd %f16,%f10,%f22 ! (Y1_1) dtmp0 = (s + yd);
+.xcont11:
+ sll %o4,3,%l2 ! (Y0_0) ind *= 8;
+ add py,stridey,py ! y += stridey;
+ fmuld %f26,yr,%f20 ! (Y2_1) dtmp1 = yd * yr;
+ faddd %f58,XKB2,%f14 ! (Y2_0) dtmp5 = dtmp4 + XKB2;
+
+ add %o0,1021,%i1 ! (Y0_1) eflag = (ind + 1021);
+ ldd [%l2+EXPTBL],%f62 ! (Y0_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind);
+ fmuld XKB5,%f54,%f48 ! (Y0_1) dtmp0 = XKB5 * y;
+ fpadd32 %f12,%f52,%f58 ! (Y0_0) u = vis_fpadd32(u, dtmp0);
+
+ sub %g0,%o0,%o3 ! (Y0_1) gflag = (1022 - ind);
+ ldd [EXPTBL_P8+%l0],%f8 ! (Y1_0) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8);
+ fand %f28,MHI32,%f12 ! (Y0_2) s = vis_fand(yd, MHI32);
+ fmuld %f50,s_h,%f52 ! (Y2_1) dtmp0 *= s_h;
+
+ sra %i1,31,%o1 ! (Y0_1) eflag = eflag >> 31;
+ add %o3,1022,%l0 ! (Y0_1) gflag = (1022 - ind);
+ fmuld %f56,%f60,%f26 ! (Y1_0) dtmp1 = u * y;
+ fcmped %fcc0,%f44,HTHRESH ! (Y2_1) if (s > HTHRESH);
+
+ sra %l0,31,%o4 ! (Y0_1) gflag = gflag >> 31;
+ and %o1,54,%i4 ! (Y0_1) itmp0 = 54 & eflag;
+ fmuld %f58,%f62,%f6 ! (Y0_0) dtmp1 = u * dtmp1;
+ fdtoi %f22,%f4 ! (Y1_1) u = (double)(int)dtmp0;
+
+ add %o0,%i4,%i2 ! (Y0_1) ind = ind + itmp0;
+ and %o4,52,%l3 ! (Y0_1) itmp1 = 52 & gflag;
+ st %f4,[%fp+tmp4] ! (Y1_1) ind = (int)dtmp0;
+ faddd %f48,XKB4,%f60 ! (Y0_1) dtmp1 = dtmp0 + XKB4;
+
+ sub %i2,%l3,%l2 ! (Y0_1) ind = ind - itmp1;
+ sub %o1,%o4,%o4 ! (Y0_1) ind = eflag - gflag;
+ st %f6,[pz] ! (Y0_0) write into memory
+ faddd %f52,%f20,%f62 ! (Y2_1) yd = dtmp0 + dtmp1;
+
+ sll %l2,20,%o3 ! (Y0_1) ind <<= 20;
+ nop
+ st %o3,[%fp+tmp0_hi] ! (Y0_1) *(int*)&dtmp0 = ind;
+ fmovdg %fcc0,HTHRESH,%f44 ! (Y2_1) s = HTHRESH;
+
+ lda [py]0x82,%l5 ! (Y1_2) hy = *py;
+ nop
+ fitod %f4,%f48 ! (Y1_1) u = (double)(int)dtmp0;
+
+ fmuld %f60,%f54,%f60 ! (Y0_1) dtmp2 = dtmp1 * y;
+ nop
+ st %f7,[pz+4] ! (Y0_0) write into memory
+ faddd %f8,%f26,%f26 ! (Y1_0) dtmp2 = dtmp0 + dtmp1;
+
+ lda [py]0x82,%f8 ! (Y1_2) yd = *py;
+ nop
+ fmuld %f14,%f18,%f52 ! (Y2_0) dtmp6 = dtmp5 * y;
+ fmovdg %fcc0,DZERO,%f62 ! (Y2_1) yd = DZERO;
+
+ lda [py+4]%asi,%f9 ! (Y1_2) yd = *py;
+ add pz,stridez,pz ! z += stridez;
+ fcmped %fcc1,%f44,LTHRESH ! (Y2_1) if (s < LTHRESH);
+
+ fsubd %f16,%f48,%f50 ! (Y1_1) y = s - u;
+
+ faddd %f60,XKB3,%f60 ! (Y0_1) dtmp3 = dtmp2 + XKB3;
+
+ sra %l5,20,%l5 ! (Y1_2) expy = hy >> 20;
+ and %l1,255,%l1 ! (Y2_0) i = ind & 0xff;
+ faddd %f52,XKB1,%f58 ! (Y2_0) dtmp7 = dtmp6 + XKB1;
+
+ sll %l1,4,%l0 ! (Y2_0) i = i << 4;
+ fmovdl %fcc1,DZERO,%f62 ! (Y2_1) yd = DZERO;
+
+ and %l5,0x7ff,%l5 ! (Y1_2) expy &= 0x7ff;
+ nop
+ fmovdl %fcc1,LTHRESH,%f44 ! (Y2_1) s = LTHRESH;
+
+ cmp %l5,959 ! (Y1_2) if (expy < 0x3fb);
+ ldd [EXPTBL+%l0],%f20 ! (Y2_0) u = *(double*)((char*)__mt_constexp2 + i);
+ fmuld %f60,%f54,%f48 ! (Y0_1) dtmp4 = dtmp3 * y;
+ faddd %f50,%f10,%f52 ! (Y1_1) y = y + yd;
+
+ add %o7,513,%o7 ! (Y1_0) ind += 513;
+ ld [%fp+tmp4],%o1 ! (Y1_1) ind = (int)dtmp0;
+ fmuld %f58,%f18,%f18 ! (Y2_0) y = dtmp7 * y;
+ faddd %f26,%f56,%f58 ! (Y1_0) u = dtmp2 + u;
+
+ add py,stridey,py ! y += stridey;
+ ldd [%fp+tmp1_hi],%f60 ! (Y1_0) *(int*)&dtmp0 = ind;
+ fmuld s_h,%f12,%f50 ! (Y0_2) s = s_h * s;
+ fsubd %f28,%f12,%f56 ! (Y0_2) dtmp0 = (yd - s);
+
+ sll %o7,3,%l3 ! (Y1_0) ind *= 8;
+ fmuld %f28,yr,%f26 ! (Y0_2) dtmp1 = yd * yr;
+ bl,pn %icc,.xupdate12 ! (Y1_2) if (expy < 0x3fb);
+ faddd %f44,%f62,%f28 ! (Y2_1) dtmp0 = (s + yd);
+.xcont12:
+ sra %o1,8,%o3 ! (Y1_1) ind >>= 8;
+ cmp %l5,1086 ! (Y1_2) if (expy >= 0x43e);
+ fmuld XKB5,%f52,%f22 ! (Y1_1) dtmp0 = XKB5 * y;
+ faddd %f48,XKB2,%f14 ! (Y0_1) dtmp5 = dtmp4 + XKB2;
+
+ add %o3,1021,%o0 ! (Y1_1) eflag = (ind + 1021);
+ ldd [%l3+EXPTBL],%f48 ! (Y1_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind);
+ bge,pn %icc,.xupdate13 ! (Y1_2) if (expy >= 0x43e);
+ fpadd32 %f58,%f60,%f60 ! (Y1_0) u = vis_fpadd32(u, dtmp0);
+.xcont13:
+ sub %g0,%o3,%i2 ! (Y1_1) gflag = (1022 - ind);
+ ldd [EXPTBL_P8+%l0],%f16 ! (Y2_0) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8);
+ fmuld %f56,s_h,%f58 ! (Y0_2) dtmp0 *= s_h;
+ fand %f8,MHI32,%f12 ! (Y1_2) s = vis_fand(yd, MHI32);
+
+ sra %o0,31,%l3 ! (Y1_1) eflag = eflag >> 31;
+ add %i2,1022,%l2 ! (Y1_1) gflag = (1022 - ind);
+ fmuld %f20,%f18,%f56 ! (Y2_0) dtmp1 = u * y;
+ fcmped %fcc0,%f50,HTHRESH ! (Y0_2) if (s > HTHRESH);
+
+ sra %l2,31,%o7 ! (Y1_1) gflag = gflag >> 31;
+ and %l3,54,%i1 ! (Y1_1) itmp0 = 54 & eflag;
+ fmuld %f60,%f48,%f18 ! (Y1_0) dtmp1 = u * dtmp1;
+ fdtoi %f28,%f3 ! (Y2_1) u = (double)(int)dtmp0;
+
+ add %o3,%i1,%l0 ! (Y1_1) ind = ind + itmp0;
+ and %o7,52,%l1 ! (Y1_1) itmp1 = 52 & gflag;
+ st %f3,[%fp+ind_buf] ! (Y2_1) ind = (int)dtmp0;
+ faddd %f22,XKB4,%f60 ! (Y1_1) dtmp1 = dtmp0 + XKB4;
+
+ sub %l0,%l1,%i4 ! (Y1_1) ind = ind - itmp1;
+ sub %l3,%o7,%o7 ! (Y1_1) ind = eflag - gflag;
+ st %f18,[pz] ! (Y1_0) write into memory
+ faddd %f58,%f26,%f48 ! (Y0_2) yd = dtmp0 + dtmp1;
+
+ sll %i4,20,%i2 ! (Y1_1) ind <<= 20;
+ lda [py]0x82,%l5 ! (Y2_2) hy = *py;
+ fmovdg %fcc0,HTHRESH,%f50 ! (Y0_2) s = HTHRESH;
+
+ st %i2,[%fp+tmp1_hi] ! (Y1_1) *(int*)&dtmp0 = ind;
+ fitod %f3,%f10 ! (Y2_1) u = (double)(int)dtmp0;
+
+ fmuld %f60,%f52,%f60 ! (Y1_1) dtmp2 = dtmp1 * y;
+ st %f19,[pz+4] ! (Y1_0) write into memory
+ faddd %f16,%f56,%f28 ! (Y2_0) dtmp2 = dtmp0 + dtmp1;
+
+ fmuld %f14,%f54,%f56 ! (Y0_1) dtmp6 = dtmp5 * y;
+ fmovdg %fcc0,DZERO,%f48 ! (Y0_2) yd = DZERO;
+
+ add pz,stridez,pz ! z += stridez;
+ fcmped %fcc1,%f50,LTHRESH ! (Y0_2) if (s < LTHRESH);
+
+ lda [py]0x82,%f26 ! (Y2_2) yd = *py;
+ fsubd %f44,%f10,%f18 ! (Y2_1) y = s - u;
+
+ lda [py+4]%asi,%f27 ! (Y2_2) yd = *py;
+ faddd %f60,XKB3,%f44 ! (Y1_1) dtmp3 = dtmp2 + XKB3;
+
+ sra %l5,20,%l5 ! (Y2_2) expy = hy >> 20;
+ and %o2,255,%o2 ! (Y0_1) i = ind & 0xff;
+ faddd %f56,XKB1,%f58 ! (Y0_1) dtmp7 = dtmp6 + XKB1;
+
+ sll %o2,4,%l2 ! (Y0_1) i = i << 4;
+ fmovdl %fcc1,DZERO,%f48 ! (Y0_2) yd = DZERO;
+
+ add py,stridey,py ! y += stridey;
+ and %l5,0x7ff,%l5 ! (Y2_2) expy &= 0x7ff;
+ fmovdl %fcc1,LTHRESH,%f50 ! (Y0_2) s = LTHRESH;
+
+ cmp %l5,959 ! (Y2_2) if (expy < 0x3fb);
+ ldd [EXPTBL+%l2],%f22 ! (Y0_1) u = *(double*)((char*)__mt_constexp2 + i);
+ faddd %f18,%f62,%f18 ! (Y2_1) y = y + yd;
+ fmuld %f44,%f52,%f62 ! (Y1_1) dtmp4 = dtmp3 * y;
+
+ add %o5,513,%o5 ! (Y2_0) ind += 513;
+ ld [%fp+ind_buf],%l1 ! (Y2_1) ind = (int)dtmp0;
+ fmuld %f58,%f54,%f54 ! (Y0_1) y = dtmp7 * y;
+ faddd %f28,%f20,%f58 ! (Y2_0) u = dtmp2 + u;
+
+ ldd [%fp+tmp2_hi],%f60 ! (Y2_0) *(int*)&dtmp0 = ind;
+ fmuld s_h,%f12,%f16 ! (Y1_2) s = s_h * s;
+ bl,pn %icc,.xupdate14 ! (Y2_2) if (expy < 0x3fb);
+ fsubd %f8,%f12,%f56 ! (Y1_2) dtmp0 = (yd - s);
+.xcont14:
+ sll %o5,3,%i1 ! (Y2_0) ind *= 8;
+ cmp %l5,1086 ! (Y2_2) if (expy >= 0x43e);
+ fmuld %f8,yr,%f8 ! (Y1_2) dtmp1 = yd * yr;
+ faddd %f50,%f48,%f28 ! (Y0_2) dtmp0 = (s + yd);
+
+ sra %l1,8,%o2 ! (Y2_1) ind >>= 8;
+ lda [py]0x82,%l5 ! (Y0_3) hy = *py;
+ fmuld XKB5,%f18,%f20 ! (Y2_1) dtmp0 = XKB5 * y;
+ faddd %f62,XKB2,%f12 ! (Y1_1) dtmp5 = dtmp4 + XKB2;
+
+ add %o2,1021,%l0 ! (Y2_1) eflag = (ind + 1021);
+ ldd [%i1+EXPTBL],%f62 ! (Y2_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind);
+ bge,pn %icc,.xupdate15 ! (Y2_2) if (expy >= 0x43e);
+ fpadd32 %f58,%f60,%f60 ! (Y2_0) u = vis_fpadd32(u, dtmp0);
+.xcont15:
+ sub %g0,%o2,%l3 ! (Y2_1) gflag = (1022 - ind);
+ ldd [EXPTBL_P8+%l2],%f14 ! (Y0_1) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8);
+ fmuld %f56,s_h,%f58 ! (Y1_2) dtmp0 *= s_h;
+ fand %f26,MHI32,%f44 ! (Y2_2) s = vis_fand(yd, MHI32);
+
+ sra %l0,31,%o0 ! (Y2_1) eflag = eflag >> 31;
+ add %l3,1022,%i4 ! (Y2_1) gflag = (1022 - ind);
+ fmuld %f22,%f54,%f56 ! (Y0_1) dtmp1 = u * y;
+ fcmped %fcc0,%f16,HTHRESH ! (Y1_2) if (s > HTHRESH);
+
+ sra %i4,31,%o5 ! (Y2_1) gflag = gflag >> 31;
+ and %o0,54,%i2 ! (Y2_1) itmp0 = 54 & eflag;
+ fmuld %f60,%f62,%f6 ! (Y2_0) dtmp1 = u * dtmp1;
+ fdtoi %f28,%f3 ! (Y0_2) u = (double)(int)dtmp0;
+
+ add %o2,%i2,%i1 ! (Y2_1) ind = ind + itmp0;
+ and %o5,52,%l2 ! (Y2_1) itmp1 = 52 & gflag;
+ st %f3,[%fp+tmp3] ! (Y0_2) ind = (int)dtmp0;
+ faddd %f20,XKB4,%f60 ! (Y2_1) dtmp1 = dtmp0 + XKB4;
+
+ sub %i1,%l2,%o3 ! (Y2_1) ind = ind - itmp1;
+ sub %o0,%o5,%o5 ! (Y2_1) ind = eflag - gflag;
+ st %f6,[pz] ! (Y2_0) write into memory
+ faddd %f58,%f8,%f10 ! (Y1_2) yd = dtmp0 + dtmp1;
+
+ sll %o3,20,%l3 ! (Y2_1) ind <<= 20;
+ lda [py]0x82,%f28 ! (Y0_3) yd = *py;
+ fmovdg %fcc0,HTHRESH,%f16 ! (Y1_2) s = HTHRESH;
+
+ lda [py+4]%asi,%f29 ! (Y0_3) yd = *py;
+ fitod %f3,%f58 ! (Y0_2) u = (double)(int)dtmp0;
+
+ fmuld %f60,%f18,%f60 ! (Y2_1) dtmp2 = dtmp1 * y;
+ st %l3,[%fp+tmp2_hi] ! (Y2_1) *(int*)&dtmp0 = ind;
+ faddd %f14,%f56,%f20 ! (Y0_1) dtmp2 = dtmp0 + dtmp1;
+
+ fmuld %f12,%f52,%f56 ! (Y1_1) dtmp6 = dtmp5 * y;
+ st %f7,[pz+4] ! (Y2_0) write into memory
+ fmovdg %fcc0,DZERO,%f10 ! (Y1_2) yd = DZERO;
+
+ sra %l5,20,%l5 ! (Y0_3) expy = hy >> 20;
+ add pz,stridez,pz ! z += stridez;
+ fcmped %fcc1,%f16,LTHRESH ! (Y1_2) if (s < LTHRESH);
+
+ and %l5,0x7ff,%l5 ! (Y0_3) expy &= 0x7ff;
+ fsubd %f50,%f58,%f54 ! (Y0_2) y = s - u;
+
+ cmp %l5,959 ! (Y0_3) if (expy < 0x3fb);
+ faddd %f60,XKB3,%f60 ! (Y2_1) dtmp3 = dtmp2 + XKB3;
+
+ and %o1,255,%o1 ! (Y1_1) i = ind & 0xff;
+ bl,pn %icc,.xupdate16 ! (Y0_3) if (expy < 0x3fb);
+ faddd %f56,XKB1,%f8 ! (Y1_1) dtmp7 = dtmp6 + XKB1;
+.xcont16:
+ sll %o1,4,%l0 ! (Y1_1) i = i << 4;
+ fmovdl %fcc1,DZERO,%f10 ! (Y1_2) yd = DZERO;
+
+ subcc counter,3,counter ! update cycle counter
+ fmovdl %fcc1,LTHRESH,%f16 ! (Y1_2) s = LTHRESH;
+
+ ldd [EXPTBL+%l0],%f56 ! (Y1_1) u = *(double*)((char*)__mt_constexp2 + i);
+ fmuld %f60,%f18,%f58 ! (Y2_1) dtmp4 = dtmp3 * y;
+ faddd %f54,%f48,%f54 ! (Y0_2) y = y + yd;
+
+ fmuld %f8,%f52,%f60 ! (Y1_1) y = dtmp7 * y;
+ ld [%fp+tmp3],%o2 ! (Y0_2) ind = (int)dtmp0;
+ bpos,pt %icc,.xmain_loop
+ faddd %f20,%f22,%f12 ! (Y0_1) u = dtmp2 + u;
+
+.xtail:
+ addcc counter,2,counter
+ ldd [%fp+tmp0_hi],%f52 ! (Y0_0) *(int*)&dtmp0 = ind;
+
+ add %o4,513,%o4 ! (Y0_0) ind += 513;
+ bneg,pn %icc,.xend_loop
+ nop
+
+ sll %o4,3,%l2 ! (Y0_0) ind *= 8;
+
+ subcc counter,1,counter
+ ldd [%l2+EXPTBL],%f62 ! (Y0_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind);
+ fpadd32 %f12,%f52,%f58 ! (Y0_0) u = vis_fpadd32(u, dtmp0);
+
+ ldd [EXPTBL_P8+%l0],%f8 ! (Y1_0) dtmp0 = *(double*)((char*)__mt_constexp2 + i + 8);
+
+ fmuld %f56,%f60,%f26 ! (Y1_0) dtmp1 = u * y;
+
+ fmuld %f58,%f62,%f6 ! (Y0_0) dtmp1 = u * dtmp1;
+
+ st %f6,[pz] ! (Y0_0) write into memory
+ st %f7,[pz+4] ! (Y0_0) write into memory
+ bneg,pn %icc,.xend_loop
+ add pz,stridez,pz ! z += stridez;
+
+ faddd %f8,%f26,%f26 ! (Y1_0) dtmp2 = dtmp0 + dtmp1;
+
+ add %o7,513,%o7 ! (Y1_0) ind += 513;
+ faddd %f26,%f56,%f58 ! (Y1_0) u = dtmp2 + u;
+
+ ldd [%fp+tmp1_hi],%f60 ! (Y1_0) *(int*)&dtmp0 = ind;
+
+ sll %o7,3,%l3 ! (Y1_0) ind *= 8;
+
+ ldd [%l3+EXPTBL],%f48 ! (Y1_0) dtmp1 = (*(double*)((char*)__mt_constexp2 + ind);
+ fpadd32 %f58,%f60,%f60 ! (Y1_0) u = vis_fpadd32(u, dtmp0);
+
+ fmuld %f60,%f48,%f18 ! (Y1_0) dtmp1 = u * dtmp1;
+
+ st %f18,[pz] ! (Y1_0) write into memory
+ st %f19,[pz+4] ! (Y1_0) write into memory
+ add pz,stridez,pz ! z += stridez;
+
+.xend_loop:
+ ba .xbegin
+ nop
+
+ .align 16
+.xupdate0:
+ cmp counter,0
+ sub py,stridey,%i2
+ ble,pt %icc,.xcont0
+ fmovd DZERO,%f10
+
+ stx %i2,[%fp+tmp_py]
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont0
+ or %g0,0,counter
+
+ .align 16
+.xupdate1:
+ cmp counter,0
+ sub py,stridey,%i2
+ ble,pt %icc,.xcont1
+ fmovd DZERO,%f10
+
+ stx %i2,[%fp+tmp_py]
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont1
+ or %g0,0,counter
+
+ .align 16
+.xupdate2:
+ cmp counter,1
+ sub py,stridey,%l3
+ ble,pt %icc,.xcont2
+ fmovd DZERO,%f14
+
+ stx %l3,[%fp+tmp_py]
+ sub counter,1,counter
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont2
+ or %g0,1,counter
+
+ .align 16
+.xupdate3:
+ cmp counter,1
+ sub py,stridey,%l3
+ ble,pt %icc,.xcont3
+ fmovd DZERO,%f14
+
+ stx %l3,[%fp+tmp_py]
+ sub counter,1,counter
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont3
+ or %g0,1,counter
+
+ .align 16
+.xupdate4:
+ cmp counter,2
+ ble,pt %icc,.xcont4
+ fmovd DZERO,%f18
+
+ stx py,[%fp+tmp_py]
+ sub counter,2,counter
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont4
+ or %g0,2,counter
+
+ .align 16
+.xupdate5:
+ cmp counter,2
+ ble,pt %icc,.xcont5
+ fmovd DZERO,%f18
+
+ stx py,[%fp+tmp_py]
+ sub counter,2,counter
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont5
+ or %g0,2,counter
+
+ .align 16
+.xupdate6:
+ cmp counter,3
+ sub py,stridey,%i2
+ ble,pt %icc,.xcont6
+ fmovd DZERO,%f20
+
+ stx %i2,[%fp+tmp_py]
+ sub counter,3,counter
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont6
+ or %g0,3,counter
+
+ .align 16
+.xupdate7:
+ cmp counter,3
+ sub py,stridey,%i2
+ ble,pt %icc,.xcont7
+ fmovd DZERO,%f20
+
+ stx %i2,[%fp+tmp_py]
+ sub counter,3,counter
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont7
+ or %g0,3,counter
+
+ .align 16
+.xupdate8:
+ cmp counter,4
+ sub py,stridey,%l3
+ ble,pt %icc,.xcont8
+ fmovd DZERO,%f26
+
+ stx %l3,[%fp+tmp_py]
+ sub counter,4,counter
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont8
+ or %g0,4,counter
+
+ .align 16
+.xupdate9:
+ cmp counter,4
+ sub py,stridey,%l3
+ ble,pt %icc,.xcont9
+ fmovd DZERO,%f26
+
+ stx %l3,[%fp+tmp_py]
+ sub counter,4,counter
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont9
+ or %g0,4,counter
+
+ .align 16
+.xupdate10:
+ cmp counter,5
+ ble,pt %icc,.xcont10
+ fmovd DZERO,%f28
+
+ stx py,[%fp+tmp_py]
+ sub counter,5,counter
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont10
+ or %g0,5,counter
+
+ .align 16
+.xupdate11:
+ cmp counter,3
+ ble,pt %icc,.xcont11
+ fmovd DZERO,%f28
+
+ stx py,[%fp+tmp_py]
+ sub counter,3,counter
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont11
+ or %g0,3,counter
+
+ .align 16
+.xupdate12:
+ cmp counter,4
+ sub py,stridey,%i2
+ ble,pt %icc,.xcont12
+ fmovd DZERO,%f8
+
+ stx %i2,[%fp+tmp_py]
+ sub counter,4,counter
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont12
+ or %g0,4,counter
+
+ .align 16
+.xupdate13:
+ cmp counter,4
+ sub py,stridey,%i2
+ ble,pt %icc,.xcont13
+ fmovd DZERO,%f8
+
+ stx %i2,[%fp+tmp_py]
+ sub counter,4,counter
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont13
+ or %g0,4,counter
+
+ .align 16
+.xupdate14:
+ cmp counter,5
+ sub py,stridey,%l3
+ ble,pt %icc,.xcont14
+ fmovd DZERO,%f26
+
+ stx %l3,[%fp+tmp_py]
+ sub counter,5,counter
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont14
+ or %g0,5,counter
+
+ .align 16
+.xupdate15:
+ cmp counter,5
+ sub py,stridey,%l3
+ ble,pt %icc,.xcont15
+ fmovd DZERO,%f26
+
+ stx %l3,[%fp+tmp_py]
+ sub counter,5,counter
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont15
+ or %g0,5,counter
+
+ .align 16
+.xupdate16:
+ cmp counter,6
+ ble,pt %icc,.xcont16
+ fmovd DZERO,%f28
+
+ stx py,[%fp+tmp_py]
+ sub counter,6,counter
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont16
+ or %g0,6,counter
+
+ .align 16
+.xspec0:
+ add EXPTBL,4095,%l0
+ add %l0,1,%l0
+ ldd [%l0+8],%f20 ! ld DONE
+ st %f20,[pz] ! *pz = DONE;
+ ba .xupdate_point
+ st %f21,[pz+4] ! *pz = DONE;
+
+ .align 16
+.xspec1:
+ ldx [%fp+tmp_px],%l1
+ sethi %hi(0x7ffffc00),MASK_0x7fffffff
+
+ sethi %hi(0x7ff00000),%o3
+ add MASK_0x7fffffff,0x3ff,MASK_0x7fffffff
+
+ and %l2,MASK_0x7fffffff,%o2 ! if (hy &= 0x7fffffff);
+ sethi %hi(0x3ff00000),MASK_0x3ff00000
+
+ cmp %o2,%o3 ! if (hy != 0x7ff00000);
+ bne,pn %icc,2f ! if (hy != 0x7ff00000);
+ nop
+
+ ld [py+4],%l3 ! ld ly;
+ cmp %l3,0 ! if (ly != 0);
+ bne,a,pt %icc,3f ! if (ly != 0);
+ nop
+
+ ld [%l1],%i1 ! ld hx;
+ cmp %i1,MASK_0x3ff00000 ! if (hx != 0x3ff00000);
+ bne,a,pn %icc,1f ! if (hx != 0x3ff00000);
+ srl %l2,31,%o7 ! sy = hy >> 31;
+
+ ld [%l1+4],%i2 ! ld lx;
+ cmp %i2,0 ! if (lx != 0);
+ bne,pn %icc,1f ! if (lx != 0);
+ srl %l2,31,%o7 ! sy = hy >> 31;
+
+ fzero %f28
+ fmuld %f18,%f28,%f28 ! *pz = *py * 0.0;
+ st %f28,[pz]
+ ba .xupdate_point
+ st %f29,[pz+4]
+1:
+ sub %i1,MASK_0x3ff00000,%o0 ! hx - 0x3ff00000;
+ srlx %o0,63,%o0 ! (hx - 0x3ff00000) >> 63;
+
+ cmp %o0,%o7 ! if ((hx < 0x3ff00000) == sy);
+ be,pn %icc,1f ! if ((hx < 0x3ff00000) == sy);
+
+ st DZERO_HI,[pz]
+ ba .xupdate_point
+ st DZERO_LO,[pz+4]
+1:
+ st %o2,[pz] ! ((int*)pz)[0] = hy;
+ ba .xupdate_point
+ st %l3,[pz+4] ! ((int*)pz)[1] = ly;
+2:
+ bl,a,pn %icc,1f ! if (hy < 0x7ff00000);
+ ld [%l1+4],%i2 ! ld lx;
+3:
+ ld [%l1],%f20 ! x = *px;
+ ld [%l1+4],%f21 ! x = *px;
+ fmuld %f20,%f18,%f28 ! *pz = *px * *py;
+ st %f28,[pz]
+ ba .xupdate_point
+ st %f29,[pz+4]
+1:
+ ld [%l1],%i1 ! ld hx;
+ cmp %i2,0 ! if (lx != 0);
+ bne,pn %icc,1f ! if (lx != 0);
+ nop
+
+ cmp %i1,MASK_0x3ff00000 ! if (hx != 0x3ff00000);
+ add EXPTBL,4095,%l0
+ bne,pn %icc,1f ! if (hx != 0x3ff00000);
+ add %l0,1,%l0
+
+ ldd [%l0+8],%f20 ! ld DONE
+ st %f20,[pz] ! *pz = DONE;
+ ba .xupdate_point
+ st %f21,[pz+4] ! *pz = DONE;
+1:
+ srl %l2,31,%o7 ! sy = hy >> 31;
+ sub %i1,MASK_0x3ff00000,%o0 ! hx - 0x3ff00000;
+
+ srlx %o0,63,%o0 ! (hx - 0x3ff00000) >> 63;
+
+ cmp %o0,%o7 ! if (hx < 0x3ff00000) == sy);
+ be,a,pn %icc,1f ! if (hx < 0x3ff00000) == sy);
+ ldd [EXPTBL-ind_HUGE],%f20 ! y0 = _HUGE;
+
+ ldd [EXPTBL-ind_TINY],%f20 ! y0 = _TINY;
+1:
+ fmuld %f20,%f20,%f20 ! *pz = y0 * y0
+ st %f20,[pz]
+ ba .xupdate_point
+ st %f21,[pz+4]
+
+.xupdate_point:
+ add py,stridey,py
+ ba .xbegin1
+ add pz,stridez,pz
+
+ SET_SIZE(__vpow)
+
diff --git a/usr/src/libm/src/mvec/vis/__vpowf.S b/usr/src/libm/src/mvec/vis/__vpowf.S
new file mode 100644
index 0000000..f6e7722
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vpowf.S
@@ -0,0 +1,3138 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vpowf.S 1.7 06/01/23 SMI"
+
+ .file "__vpowf.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 64
+
+! __mt_constexp2fa:
+ .word 0x3ff00000, 0x00000000, 0x3ff00b1a, 0xfa5abcbf
+ .word 0x3ff0163d, 0xa9fb3335, 0x3ff02168, 0x143b0281
+ .word 0x3ff02c9a, 0x3e778061, 0x3ff037d4, 0x2e11bbcc
+ .word 0x3ff04315, 0xe86e7f85, 0x3ff04e5f, 0x72f654b1
+ .word 0x3ff059b0, 0xd3158574, 0x3ff0650a, 0x0e3c1f89
+ .word 0x3ff0706b, 0x29ddf6de, 0x3ff07bd4, 0x2b72a836
+ .word 0x3ff08745, 0x18759bc8, 0x3ff092bd, 0xf66607e0
+ .word 0x3ff09e3e, 0xcac6f383, 0x3ff0a9c7, 0x9b1f3919
+ .word 0x3ff0b558, 0x6cf9890f, 0x3ff0c0f1, 0x45e46c85
+ .word 0x3ff0cc92, 0x2b7247f7, 0x3ff0d83b, 0x23395dec
+ .word 0x3ff0e3ec, 0x32d3d1a2, 0x3ff0efa5, 0x5fdfa9c5
+ .word 0x3ff0fb66, 0xaffed31b, 0x3ff10730, 0x28d7233e
+ .word 0x3ff11301, 0xd0125b51, 0x3ff11edb, 0xab5e2ab6
+ .word 0x3ff12abd, 0xc06c31cc, 0x3ff136a8, 0x14f204ab
+ .word 0x3ff1429a, 0xaea92de0, 0x3ff14e95, 0x934f312e
+ .word 0x3ff15a98, 0xc8a58e51, 0x3ff166a4, 0x5471c3c2
+ .word 0x3ff172b8, 0x3c7d517b, 0x3ff17ed4, 0x8695bbc0
+ .word 0x3ff18af9, 0x388c8dea, 0x3ff19726, 0x58375d2f
+ .word 0x3ff1a35b, 0xeb6fcb75, 0x3ff1af99, 0xf8138a1c
+ .word 0x3ff1bbe0, 0x84045cd4, 0x3ff1c82f, 0x95281c6b
+ .word 0x3ff1d487, 0x3168b9aa, 0x3ff1e0e7, 0x5eb44027
+ .word 0x3ff1ed50, 0x22fcd91d, 0x3ff1f9c1, 0x8438ce4d
+ .word 0x3ff2063b, 0x88628cd6, 0x3ff212be, 0x3578a819
+ .word 0x3ff21f49, 0x917ddc96, 0x3ff22bdd, 0xa27912d1
+ .word 0x3ff2387a, 0x6e756238, 0x3ff2451f, 0xfb82140a
+ .word 0x3ff251ce, 0x4fb2a63f, 0x3ff25e85, 0x711ece75
+ .word 0x3ff26b45, 0x65e27cdd, 0x3ff2780e, 0x341ddf29
+ .word 0x3ff284df, 0xe1f56381, 0x3ff291ba, 0x7591bb70
+ .word 0x3ff29e9d, 0xf51fdee1, 0x3ff2ab8a, 0x66d10f13
+ .word 0x3ff2b87f, 0xd0dad990, 0x3ff2c57e, 0x39771b2f
+ .word 0x3ff2d285, 0xa6e4030b, 0x3ff2df96, 0x1f641589
+ .word 0x3ff2ecaf, 0xa93e2f56, 0x3ff2f9d2, 0x4abd886b
+ .word 0x3ff306fe, 0x0a31b715, 0x3ff31432, 0xedeeb2fd
+ .word 0x3ff32170, 0xfc4cd831, 0x3ff32eb8, 0x3ba8ea32
+ .word 0x3ff33c08, 0xb26416ff, 0x3ff34962, 0x66e3fa2d
+ .word 0x3ff356c5, 0x5f929ff1, 0x3ff36431, 0xa2de883b
+ .word 0x3ff371a7, 0x373aa9cb, 0x3ff37f26, 0x231e754a
+ .word 0x3ff38cae, 0x6d05d866, 0x3ff39a40, 0x1b7140ef
+ .word 0x3ff3a7db, 0x34e59ff7, 0x3ff3b57f, 0xbfec6cf4
+ .word 0x3ff3c32d, 0xc313a8e5, 0x3ff3d0e5, 0x44ede173
+ .word 0x3ff3dea6, 0x4c123422, 0x3ff3ec70, 0xdf1c5175
+ .word 0x3ff3fa45, 0x04ac801c, 0x3ff40822, 0xc367a024
+ .word 0x3ff4160a, 0x21f72e2a, 0x3ff423fb, 0x2709468a
+ .word 0x3ff431f5, 0xd950a897, 0x3ff43ffa, 0x3f84b9d4
+ .word 0x3ff44e08, 0x6061892d, 0x3ff45c20, 0x42a7d232
+ .word 0x3ff46a41, 0xed1d0057, 0x3ff4786d, 0x668b3237
+ .word 0x3ff486a2, 0xb5c13cd0, 0x3ff494e1, 0xe192aed2
+ .word 0x3ff4a32a, 0xf0d7d3de, 0x3ff4b17d, 0xea6db7d7
+ .word 0x3ff4bfda, 0xd5362a27, 0x3ff4ce41, 0xb817c114
+ .word 0x3ff4dcb2, 0x99fddd0d, 0x3ff4eb2d, 0x81d8abff
+ .word 0x3ff4f9b2, 0x769d2ca7, 0x3ff50841, 0x7f4531ee
+ .word 0x3ff516da, 0xa2cf6642, 0x3ff5257d, 0xe83f4eef
+ .word 0x3ff5342b, 0x569d4f82, 0x3ff542e2, 0xf4f6ad27
+ .word 0x3ff551a4, 0xca5d920f, 0x3ff56070, 0xdde910d2
+ .word 0x3ff56f47, 0x36b527da, 0x3ff57e27, 0xdbe2c4cf
+ .word 0x3ff58d12, 0xd497c7fd, 0x3ff59c08, 0x27ff07cc
+ .word 0x3ff5ab07, 0xdd485429, 0x3ff5ba11, 0xfba87a03
+ .word 0x3ff5c926, 0x8a5946b7, 0x3ff5d845, 0x90998b93
+ .word 0x3ff5e76f, 0x15ad2148, 0x3ff5f6a3, 0x20dceb71
+ .word 0x3ff605e1, 0xb976dc09, 0x3ff6152a, 0xe6cdf6f4
+ .word 0x3ff6247e, 0xb03a5585, 0x3ff633dd, 0x1d1929fd
+ .word 0x3ff64346, 0x34ccc320, 0x3ff652b9, 0xfebc8fb7
+ .word 0x3ff66238, 0x82552225, 0x3ff671c1, 0xc70833f6
+ .word 0x3ff68155, 0xd44ca973, 0x3ff690f4, 0xb19e9538
+ .word 0x3ff6a09e, 0x667f3bcd, 0x3ff6b052, 0xfa75173e
+ .word 0x3ff6c012, 0x750bdabf, 0x3ff6cfdc, 0xddd47645
+ .word 0x3ff6dfb2, 0x3c651a2f, 0x3ff6ef92, 0x98593ae5
+ .word 0x3ff6ff7d, 0xf9519484, 0x3ff70f74, 0x66f42e87
+ .word 0x3ff71f75, 0xe8ec5f74, 0x3ff72f82, 0x86ead08a
+ .word 0x3ff73f9a, 0x48a58174, 0x3ff74fbd, 0x35d7cbfd
+ .word 0x3ff75feb, 0x564267c9, 0x3ff77024, 0xb1ab6e09
+ .word 0x3ff78069, 0x4fde5d3f, 0x3ff790b9, 0x38ac1cf6
+ .word 0x3ff7a114, 0x73eb0187, 0x3ff7b17b, 0x0976cfdb
+ .word 0x3ff7c1ed, 0x0130c132, 0x3ff7d26a, 0x62ff86f0
+ .word 0x3ff7e2f3, 0x36cf4e62, 0x3ff7f387, 0x8491c491
+ .word 0x3ff80427, 0x543e1a12, 0x3ff814d2, 0xadd106d9
+ .word 0x3ff82589, 0x994cce13, 0x3ff8364c, 0x1eb941f7
+ .word 0x3ff8471a, 0x4623c7ad, 0x3ff857f4, 0x179f5b21
+ .word 0x3ff868d9, 0x9b4492ed, 0x3ff879ca, 0xd931a436
+ .word 0x3ff88ac7, 0xd98a6699, 0x3ff89bd0, 0xa478580f
+ .word 0x3ff8ace5, 0x422aa0db, 0x3ff8be05, 0xbad61778
+ .word 0x3ff8cf32, 0x16b5448c, 0x3ff8e06a, 0x5e0866d9
+ .word 0x3ff8f1ae, 0x99157736, 0x3ff902fe, 0xd0282c8a
+ .word 0x3ff9145b, 0x0b91ffc6, 0x3ff925c3, 0x53aa2fe2
+ .word 0x3ff93737, 0xb0cdc5e5, 0x3ff948b8, 0x2b5f98e5
+ .word 0x3ff95a44, 0xcbc8520f, 0x3ff96bdd, 0x9a7670b3
+ .word 0x3ff97d82, 0x9fde4e50, 0x3ff98f33, 0xe47a22a2
+ .word 0x3ff9a0f1, 0x70ca07ba, 0x3ff9b2bb, 0x4d53fe0d
+ .word 0x3ff9c491, 0x82a3f090, 0x3ff9d674, 0x194bb8d5
+ .word 0x3ff9e863, 0x19e32323, 0x3ff9fa5e, 0x8d07f29e
+ .word 0x3ffa0c66, 0x7b5de565, 0x3ffa1e7a, 0xed8eb8bb
+ .word 0x3ffa309b, 0xec4a2d33, 0x3ffa42c9, 0x80460ad8
+ .word 0x3ffa5503, 0xb23e255d, 0x3ffa674a, 0x8af46052
+ .word 0x3ffa799e, 0x1330b358, 0x3ffa8bfe, 0x53c12e59
+ .word 0x3ffa9e6b, 0x5579fdbf, 0x3ffab0e5, 0x21356eba
+ .word 0x3ffac36b, 0xbfd3f37a, 0x3ffad5ff, 0x3a3c2774
+ .word 0x3ffae89f, 0x995ad3ad, 0x3ffafb4c, 0xe622f2ff
+ .word 0x3ffb0e07, 0x298db666, 0x3ffb20ce, 0x6c9a8952
+ .word 0x3ffb33a2, 0xb84f15fb, 0x3ffb4684, 0x15b749b1
+ .word 0x3ffb5972, 0x8de5593a, 0x3ffb6c6e, 0x29f1c52a
+ .word 0x3ffb7f76, 0xf2fb5e47, 0x3ffb928c, 0xf22749e4
+ .word 0x3ffba5b0, 0x30a1064a, 0x3ffbb8e0, 0xb79a6f1f
+ .word 0x3ffbcc1e, 0x904bc1d2, 0x3ffbdf69, 0xc3f3a207
+ .word 0x3ffbf2c2, 0x5bd71e09, 0x3ffc0628, 0x6141b33d
+ .word 0x3ffc199b, 0xdd85529c, 0x3ffc2d1c, 0xd9fa652c
+ .word 0x3ffc40ab, 0x5fffd07a, 0x3ffc5447, 0x78fafb22
+ .word 0x3ffc67f1, 0x2e57d14b, 0x3ffc7ba8, 0x8988c933
+ .word 0x3ffc8f6d, 0x9406e7b5, 0x3ffca340, 0x5751c4db
+ .word 0x3ffcb720, 0xdcef9069, 0x3ffccb0f, 0x2e6d1675
+ .word 0x3ffcdf0b, 0x555dc3fa, 0x3ffcf315, 0x5b5bab74
+ .word 0x3ffd072d, 0x4a07897c, 0x3ffd1b53, 0x2b08c968
+ .word 0x3ffd2f87, 0x080d89f2, 0x3ffd43c8, 0xeacaa1d6
+ .word 0x3ffd5818, 0xdcfba487, 0x3ffd6c76, 0xe862e6d3
+ .word 0x3ffd80e3, 0x16c98398, 0x3ffd955d, 0x71ff6075
+ .word 0x3ffda9e6, 0x03db3285, 0x3ffdbe7c, 0xd63a8315
+ .word 0x3ffdd321, 0xf301b460, 0x3ffde7d5, 0x641c0658
+ .word 0x3ffdfc97, 0x337b9b5f, 0x3ffe1167, 0x6b197d17
+ .word 0x3ffe2646, 0x14f5a129, 0x3ffe3b33, 0x3b16ee12
+ .word 0x3ffe502e, 0xe78b3ff6, 0x3ffe6539, 0x24676d76
+ .word 0x3ffe7a51, 0xfbc74c83, 0x3ffe8f79, 0x77cdb740
+ .word 0x3ffea4af, 0xa2a490da, 0x3ffeb9f4, 0x867cca6e
+ .word 0x3ffecf48, 0x2d8e67f1, 0x3ffee4aa, 0xa2188510
+ .word 0x3ffefa1b, 0xee615a27, 0x3fff0f9c, 0x1cb6412a
+ .word 0x3fff252b, 0x376bba97, 0x3fff3ac9, 0x48dd7274
+ .word 0x3fff5076, 0x5b6e4540, 0x3fff6632, 0x798844f8
+ .word 0x3fff7bfd, 0xad9cbe14, 0x3fff91d8, 0x02243c89
+ .word 0x3fffa7c1, 0x819e90d8, 0x3fffbdba, 0x3692d514
+ .word 0x3fffd3c2, 0x2b8f71f1, 0x3fffe9d9, 0x6b2a23d9
+
+! __mt_constexp2fb:
+ .word 0x36900000, 0x36a00000, 0x36b00000, 0x36c00000
+ .word 0x36d00000, 0x36e00000, 0x36f00000, 0x37000000
+ .word 0x37100000, 0x37200000, 0x37300000, 0x37400000
+ .word 0x37500000, 0x37600000, 0x37700000, 0x37800000
+ .word 0x37900000, 0x37a00000, 0x37b00000, 0x37c00000
+ .word 0x37d00000, 0x37e00000, 0x37f00000, 0x38000000
+ .word 0x38100000, 0x38200000, 0x38300000, 0x38400000
+ .word 0x38500000, 0x38600000, 0x38700000, 0x38800000
+ .word 0x38900000, 0x38a00000, 0x38b00000, 0x38c00000
+ .word 0x38d00000, 0x38e00000, 0x38f00000, 0x39000000
+ .word 0x39100000, 0x39200000, 0x39300000, 0x39400000
+ .word 0x39500000, 0x39600000, 0x39700000, 0x39800000
+ .word 0x39900000, 0x39a00000, 0x39b00000, 0x39c00000
+ .word 0x39d00000, 0x39e00000, 0x39f00000, 0x3a000000
+ .word 0x3a100000, 0x3a200000, 0x3a300000, 0x3a400000
+ .word 0x3a500000, 0x3a600000, 0x3a700000, 0x3a800000
+ .word 0x3a900000, 0x3aa00000, 0x3ab00000, 0x3ac00000
+ .word 0x3ad00000, 0x3ae00000, 0x3af00000, 0x3b000000
+ .word 0x3b100000, 0x3b200000, 0x3b300000, 0x3b400000
+ .word 0x3b500000, 0x3b600000, 0x3b700000, 0x3b800000
+ .word 0x3b900000, 0x3ba00000, 0x3bb00000, 0x3bc00000
+ .word 0x3bd00000, 0x3be00000, 0x3bf00000, 0x3c000000
+ .word 0x3c100000, 0x3c200000, 0x3c300000, 0x3c400000
+ .word 0x3c500000, 0x3c600000, 0x3c700000, 0x3c800000
+ .word 0x3c900000, 0x3ca00000, 0x3cb00000, 0x3cc00000
+ .word 0x3cd00000, 0x3ce00000, 0x3cf00000, 0x3d000000
+ .word 0x3d100000, 0x3d200000, 0x3d300000, 0x3d400000
+ .word 0x3d500000, 0x3d600000, 0x3d700000, 0x3d800000
+ .word 0x3d900000, 0x3da00000, 0x3db00000, 0x3dc00000
+ .word 0x3dd00000, 0x3de00000, 0x3df00000, 0x3e000000
+ .word 0x3e100000, 0x3e200000, 0x3e300000, 0x3e400000
+ .word 0x3e500000, 0x3e600000, 0x3e700000, 0x3e800000
+ .word 0x3e900000, 0x3ea00000, 0x3eb00000, 0x3ec00000
+ .word 0x3ed00000, 0x3ee00000, 0x3ef00000, 0x3f000000
+ .word 0x3f100000, 0x3f200000, 0x3f300000, 0x3f400000
+ .word 0x3f500000, 0x3f600000, 0x3f700000, 0x3f800000
+ .word 0x3f900000, 0x3fa00000, 0x3fb00000, 0x3fc00000
+ .word 0x3fd00000, 0x3fe00000, 0x3ff00000, 0x40000000
+ .word 0x40100000, 0x40200000, 0x40300000, 0x40400000
+ .word 0x40500000, 0x40600000, 0x40700000, 0x40800000
+ .word 0x40900000, 0x40a00000, 0x40b00000, 0x40c00000
+ .word 0x40d00000, 0x40e00000, 0x40f00000, 0x41000000
+ .word 0x41100000, 0x41200000, 0x41300000, 0x41400000
+ .word 0x41500000, 0x41600000, 0x41700000, 0x41800000
+ .word 0x41900000, 0x41a00000, 0x41b00000, 0x41c00000
+ .word 0x41d00000, 0x41e00000, 0x41f00000, 0x42000000
+ .word 0x42100000, 0x42200000, 0x42300000, 0x42400000
+ .word 0x42500000, 0x42600000, 0x42700000, 0x42800000
+ .word 0x42900000, 0x42a00000, 0x42b00000, 0x42c00000
+ .word 0x42d00000, 0x42e00000, 0x42f00000, 0x43000000
+ .word 0x43100000, 0x43200000, 0x43300000, 0x43400000
+ .word 0x43500000, 0x43600000, 0x43700000, 0x43800000
+ .word 0x43900000, 0x43a00000, 0x43b00000, 0x43c00000
+ .word 0x43d00000, 0x43e00000, 0x43f00000, 0x44000000
+ .word 0x44100000, 0x44200000, 0x44300000, 0x44400000
+ .word 0x44500000, 0x44600000, 0x44700000, 0x44800000
+ .word 0x44900000, 0x44a00000, 0x44b00000, 0x44c00000
+ .word 0x44d00000, 0x44e00000, 0x44f00000, 0x45000000
+ .word 0x45100000, 0x45200000, 0x45300000, 0x45400000
+ .word 0x45500000, 0x45600000, 0x45700000, 0x45800000
+ .word 0x45900000, 0x45a00000, 0x45b00000, 0x45c00000
+ .word 0x45d00000, 0x45e00000, 0x45f00000, 0x46000000
+ .word 0x46100000, 0x46200000, 0x46300000, 0x46400000
+ .word 0x46500000, 0x46600000, 0x46700000, 0x46800000
+ .word 0x46900000, 0x46a00000, 0x46b00000, 0x46c00000
+ .word 0x46d00000, 0x46e00000, 0x46f00000, 0x47000000
+ .word 0x47100000, 0x47200000, 0x47300000, 0x47400000
+ .word 0x47500000, 0x47600000, 0x47700000, 0x47800000
+ .word 0x47900000, 0x47a00000, 0x47b00000, 0x47c00000
+ .word 0x47d00000, 0x47e00000, 0x47f00000, 0x00000000
+
+ .word 0,0,0,0
+ .word 0,0,0,0
+
+.CONST_TBL:
+! __mt_constlog4f:
+ .word 0x00000000, 0x00000000, 0x3e800000, 0x00000000
+ .word 0x4006fe50, 0xb6ef0851, 0x3e7fc07f, 0x01fc07f0
+ .word 0x4016e796, 0x85c2d22a, 0x3e7f81f8, 0x1f81f820
+ .word 0x40211cd1, 0xd5133413, 0x3e7f4465, 0x9e4a4271
+ .word 0x4026bad3, 0x758efd87, 0x3e7f07c1, 0xf07c1f08
+ .word 0x402c4dfa, 0xb90aab5f, 0x3e7ecc07, 0xb301ecc0
+ .word 0x4030eb38, 0x9fa29f9b, 0x3e7e9131, 0xabf0b767
+ .word 0x4033aa2f, 0xdd27f1c3, 0x3e7e573a, 0xc901e574
+ .word 0x403663f6, 0xfac91316, 0x3e7e1e1e, 0x1e1e1e1e
+ .word 0x403918a1, 0x6e46335b, 0x3e7de5d6, 0xe3f8868a
+ .word 0x403bc842, 0x40adabba, 0x3e7dae60, 0x76b981db
+ .word 0x403e72ec, 0x117fa5b2, 0x3e7d77b6, 0x54b82c34
+ .word 0x40408c58, 0x8cda79e4, 0x3e7d41d4, 0x1d41d41d
+ .word 0x4041dcd1, 0x97552b7b, 0x3e7d0cb5, 0x8f6ec074
+ .word 0x40432ae9, 0xe278ae1a, 0x3e7cd856, 0x89039b0b
+ .word 0x404476a9, 0xf983f74d, 0x3e7ca4b3, 0x055ee191
+ .word 0x4045c01a, 0x39fbd688, 0x3e7c71c7, 0x1c71c71c
+ .word 0x40470742, 0xd4ef027f, 0x3e7c3f8f, 0x01c3f8f0
+ .word 0x40484c2b, 0xd02f03b3, 0x3e7c0e07, 0x0381c0e0
+ .word 0x40498edd, 0x077e70df, 0x3e7bdd2b, 0x899406f7
+ .word 0x404acf5e, 0x2db4ec94, 0x3e7bacf9, 0x14c1bad0
+ .word 0x404c0db6, 0xcdd94dee, 0x3e7b7d6c, 0x3dda338b
+ .word 0x404d49ee, 0x4c325970, 0x3e7b4e81, 0xb4e81b4f
+ .word 0x404e840b, 0xe74e6a4d, 0x3e7b2036, 0x406c80d9
+ .word 0x404fbc16, 0xb902680a, 0x3e7af286, 0xbca1af28
+ .word 0x4050790a, 0xdbb03009, 0x3e7ac570, 0x1ac5701b
+ .word 0x40511307, 0xdad30b76, 0x3e7a98ef, 0x606a63be
+ .word 0x4051ac05, 0xb291f070, 0x3e7a6d01, 0xa6d01a6d
+ .word 0x40524407, 0xab0e073a, 0x3e7a41a4, 0x1a41a41a
+ .word 0x4052db10, 0xfc4d9aaf, 0x3e7a16d3, 0xf97a4b02
+ .word 0x40537124, 0xcea4cded, 0x3e79ec8e, 0x951033d9
+ .word 0x40540646, 0x3b1b0449, 0x3e79c2d1, 0x4ee4a102
+ .word 0x40549a78, 0x4bcd1b8b, 0x3e799999, 0x9999999a
+ .word 0x40552dbd, 0xfc4c96b3, 0x3e7970e4, 0xf80cb872
+ .word 0x4055c01a, 0x39fbd688, 0x3e7948b0, 0xfcd6e9e0
+ .word 0x4056518f, 0xe4677ba7, 0x3e7920fb, 0x49d0e229
+ .word 0x4056e221, 0xcd9d0cde, 0x3e78f9c1, 0x8f9c18fa
+ .word 0x405771d2, 0xba7efb3c, 0x3e78d301, 0x8d3018d3
+ .word 0x405800a5, 0x63161c54, 0x3e78acb9, 0x0f6bf3aa
+ .word 0x40588e9c, 0x72e0b226, 0x3e7886e5, 0xf0abb04a
+ .word 0x40591bba, 0x891f1709, 0x3e786186, 0x18618618
+ .word 0x4059a802, 0x391e232f, 0x3e783c97, 0x7ab2bedd
+ .word 0x405a3376, 0x0a7f6051, 0x3e781818, 0x18181818
+ .word 0x405abe18, 0x797f1f49, 0x3e77f405, 0xfd017f40
+ .word 0x405b47eb, 0xf73882a1, 0x3e77d05f, 0x417d05f4
+ .word 0x405bd0f2, 0xe9e79031, 0x3e77ad22, 0x08e0ecc3
+ .word 0x405c592f, 0xad295b56, 0x3e778a4c, 0x8178a4c8
+ .word 0x405ce0a4, 0x923a587d, 0x3e7767dc, 0xe434a9b1
+ .word 0x405d6753, 0xe032ea0f, 0x3e7745d1, 0x745d1746
+ .word 0x405ded3f, 0xd442364c, 0x3e772428, 0x7f46debc
+ .word 0x405e726a, 0xa1e754d2, 0x3e7702e0, 0x5c0b8170
+ .word 0x405ef6d6, 0x7328e220, 0x3e76e1f7, 0x6b4337c7
+ .word 0x405f7a85, 0x68cb06cf, 0x3e76c16c, 0x16c16c17
+ .word 0x405ffd79, 0x9a83ff9b, 0x3e76a13c, 0xd1537290
+ .word 0x40603fda, 0x8b97997f, 0x3e768168, 0x16816817
+ .word 0x4060809c, 0xf27f703d, 0x3e7661ec, 0x6a5122f9
+ .word 0x4060c105, 0x00d63aa6, 0x3e7642c8, 0x590b2164
+ .word 0x40610113, 0xb153c8ea, 0x3e7623fa, 0x77016240
+ .word 0x406140c9, 0xfaa1e544, 0x3e760581, 0x60581606
+ .word 0x40618028, 0xcf72976a, 0x3e75e75b, 0xb8d015e7
+ .word 0x4061bf31, 0x1e95d00e, 0x3e75c988, 0x2b931057
+ .word 0x4061fde3, 0xd30e8126, 0x3e75ac05, 0x6b015ac0
+ .word 0x40623c41, 0xd42727c8, 0x3e758ed2, 0x308158ed
+ .word 0x40627a4c, 0x0585cbf8, 0x3e7571ed, 0x3c506b3a
+ .word 0x4062b803, 0x473f7ad1, 0x3e755555, 0x55555555
+ .word 0x4062f568, 0x75eb3f26, 0x3e753909, 0x48f40feb
+ .word 0x4063327c, 0x6ab49ca7, 0x3e751d07, 0xeae2f815
+ .word 0x40636f3f, 0xfb6d9162, 0x3e750150, 0x15015015
+ .word 0x4063abb3, 0xfaa02167, 0x3e74e5e0, 0xa72f0539
+ .word 0x4063e7d9, 0x379f7016, 0x3e74cab8, 0x8725af6e
+ .word 0x406423b0, 0x7e986aa9, 0x3e74afd6, 0xa052bf5b
+ .word 0x40645f3a, 0x98a20739, 0x3e749539, 0xe3b2d067
+ .word 0x40649a78, 0x4bcd1b8b, 0x3e747ae1, 0x47ae147b
+ .word 0x4064d56a, 0x5b33cec4, 0x3e7460cb, 0xc7f5cf9a
+ .word 0x40651011, 0x8708a8f9, 0x3e7446f8, 0x6562d9fb
+ .word 0x40654a6e, 0x8ca5438e, 0x3e742d66, 0x25d51f87
+ .word 0x40658482, 0x26989d34, 0x3e741414, 0x14141414
+ .word 0x4065be4d, 0x0cb51435, 0x3e73fb01, 0x3fb013fb
+ .word 0x4065f7cf, 0xf41e09af, 0x3e73e22c, 0xbce4a902
+ .word 0x4066310b, 0x8f553048, 0x3e73c995, 0xa47babe7
+ .word 0x40666a00, 0x8e4788cc, 0x3e73b13b, 0x13b13b14
+ .word 0x4066a2af, 0x9e5a0f0a, 0x3e73991c, 0x2c187f63
+ .word 0x4066db19, 0x6a76194a, 0x3e738138, 0x13813814
+ .word 0x4067133e, 0x9b156c7c, 0x3e73698d, 0xf3de0748
+ .word 0x40674b1f, 0xd64e0754, 0x3e73521c, 0xfb2b78c1
+ .word 0x406782bd, 0xbfdda657, 0x3e733ae4, 0x5b57bcb2
+ .word 0x4067ba18, 0xf93502e4, 0x3e7323e3, 0x4a2b10bf
+ .word 0x4067f132, 0x2182cf16, 0x3e730d19, 0x0130d190
+ .word 0x40682809, 0xd5be7073, 0x3e72f684, 0xbda12f68
+ .word 0x40685ea0, 0xb0b27b26, 0x3e72e025, 0xc04b8097
+ .word 0x406894f7, 0x4b06ef8b, 0x3e72c9fb, 0x4d812ca0
+ .word 0x4068cb0e, 0x3b4b3bbe, 0x3e72b404, 0xad012b40
+ .word 0x406900e6, 0x160002cd, 0x3e729e41, 0x29e4129e
+ .word 0x4069367f, 0x6da0ab2f, 0x3e7288b0, 0x1288b013
+ .word 0x40696bda, 0xd2acb5f6, 0x3e727350, 0xb8812735
+ .word 0x4069a0f8, 0xd3b0e050, 0x3e725e22, 0x708092f1
+ .word 0x4069d5d9, 0xfd5010b3, 0x3e724924, 0x92492492
+ .word 0x406a0a7e, 0xda4c112d, 0x3e723456, 0x789abcdf
+ .word 0x406a3ee7, 0xf38e181f, 0x3e721fb7, 0x8121fb78
+ .word 0x406a7315, 0xd02f20c8, 0x3e720b47, 0x0c67c0d9
+ .word 0x406aa708, 0xf58014d3, 0x3e71f704, 0x7dc11f70
+ .word 0x406adac1, 0xe711c833, 0x3e71e2ef, 0x3b3fb874
+ .word 0x406b0e41, 0x26bcc86c, 0x3e71cf06, 0xada2811d
+ .word 0x406b4187, 0x34a9008c, 0x3e71bb4a, 0x4046ed29
+ .word 0x406b7494, 0x8f5532da, 0x3e71a7b9, 0x611a7b96
+ .word 0x406ba769, 0xb39e4964, 0x3e719453, 0x808ca29c
+ .word 0x406bda07, 0x1cc67e6e, 0x3e718118, 0x11811812
+ .word 0x406c0c6d, 0x447c5dd3, 0x3e716e06, 0x89427379
+ .word 0x406c3e9c, 0xa2e1a055, 0x3e715b1e, 0x5f75270d
+ .word 0x406c7095, 0xae91e1c7, 0x3e71485f, 0x0e0acd3b
+ .word 0x406ca258, 0xdca93316, 0x3e7135c8, 0x1135c811
+ .word 0x406cd3e6, 0xa0ca8907, 0x3e712358, 0xe75d3033
+ .word 0x406d053f, 0x6d260896, 0x3e711111, 0x11111111
+ .word 0x406d3663, 0xb27f31d5, 0x3e70fef0, 0x10fef011
+ .word 0x406d6753, 0xe032ea0f, 0x3e70ecf5, 0x6be69c90
+ .word 0x406d9810, 0x643d6615, 0x3e70db20, 0xa88f4696
+ .word 0x406dc899, 0xab3ff56c, 0x3e70c971, 0x4fbcda3b
+ .word 0x406df8f0, 0x2086af2c, 0x3e70b7e6, 0xec259dc8
+ .word 0x406e2914, 0x2e0e0140, 0x3e70a681, 0x0a6810a7
+ .word 0x406e5906, 0x3c8822ce, 0x3e70953f, 0x39010954
+ .word 0x406e88c6, 0xb3626a73, 0x3e708421, 0x08421084
+ .word 0x406eb855, 0xf8ca88fb, 0x3e707326, 0x0a47f7c6
+ .word 0x406ee7b4, 0x71b3a950, 0x3e70624d, 0xd2f1a9fc
+ .word 0x406f16e2, 0x81db7630, 0x3e705197, 0xf7d73404
+ .word 0x406f45e0, 0x8bcf0655, 0x3e704104, 0x10410410
+ .word 0x406f74ae, 0xf0efafae, 0x3e703091, 0xb51f5e1a
+ .word 0x406fa34e, 0x1177c233, 0x3e702040, 0x81020408
+ .word 0x406fd1be, 0x4c7f2af9, 0x3e701010, 0x10101010
+ .word 0x40700000, 0x00000000, 0x3e700000, 0x00000000
+
+! __mt_constexp2f:
+ .word 0x3ff00000, 0x00000000, 0x3ff00b1a, 0xfa5abcbf
+ .word 0x3ff0163d, 0xa9fb3335, 0x3ff02168, 0x143b0281
+ .word 0x3ff02c9a, 0x3e778061, 0x3ff037d4, 0x2e11bbcc
+ .word 0x3ff04315, 0xe86e7f85, 0x3ff04e5f, 0x72f654b1
+ .word 0x3ff059b0, 0xd3158574, 0x3ff0650a, 0x0e3c1f89
+ .word 0x3ff0706b, 0x29ddf6de, 0x3ff07bd4, 0x2b72a836
+ .word 0x3ff08745, 0x18759bc8, 0x3ff092bd, 0xf66607e0
+ .word 0x3ff09e3e, 0xcac6f383, 0x3ff0a9c7, 0x9b1f3919
+ .word 0x3fefb558, 0x6cf9890f, 0x3fefc0f1, 0x45e46c85
+ .word 0x3fefcc92, 0x2b7247f7, 0x3fefd83b, 0x23395dec
+ .word 0x3fefe3ec, 0x32d3d1a2, 0x3fefefa5, 0x5fdfa9c5
+ .word 0x3feffb66, 0xaffed31b, 0x3ff00730, 0x28d7233e
+ .word 0x3ff01301, 0xd0125b51, 0x3ff01edb, 0xab5e2ab6
+ .word 0x3ff02abd, 0xc06c31cc, 0x3ff036a8, 0x14f204ab
+ .word 0x3ff0429a, 0xaea92de0, 0x3ff04e95, 0x934f312e
+ .word 0x3ff05a98, 0xc8a58e51, 0x3ff066a4, 0x5471c3c2
+ .word 0x3fef72b8, 0x3c7d517b, 0x3fef7ed4, 0x8695bbc0
+ .word 0x3fef8af9, 0x388c8dea, 0x3fef9726, 0x58375d2f
+ .word 0x3fefa35b, 0xeb6fcb75, 0x3fefaf99, 0xf8138a1c
+ .word 0x3fefbbe0, 0x84045cd4, 0x3fefc82f, 0x95281c6b
+ .word 0x3fefd487, 0x3168b9aa, 0x3fefe0e7, 0x5eb44027
+ .word 0x3fefed50, 0x22fcd91d, 0x3feff9c1, 0x8438ce4d
+ .word 0x3ff0063b, 0x88628cd6, 0x3ff012be, 0x3578a819
+ .word 0x3ff01f49, 0x917ddc96, 0x3ff02bdd, 0xa27912d1
+ .word 0x3fef387a, 0x6e756238, 0x3fef451f, 0xfb82140a
+ .word 0x3fef51ce, 0x4fb2a63f, 0x3fef5e85, 0x711ece75
+ .word 0x3fef6b45, 0x65e27cdd, 0x3fef780e, 0x341ddf29
+ .word 0x3fef84df, 0xe1f56381, 0x3fef91ba, 0x7591bb70
+ .word 0x3fef9e9d, 0xf51fdee1, 0x3fefab8a, 0x66d10f13
+ .word 0x3fefb87f, 0xd0dad990, 0x3fefc57e, 0x39771b2f
+ .word 0x3fefd285, 0xa6e4030b, 0x3fefdf96, 0x1f641589
+ .word 0x3fefecaf, 0xa93e2f56, 0x3feff9d2, 0x4abd886b
+ .word 0x3fef06fe, 0x0a31b715, 0x3fef1432, 0xedeeb2fd
+ .word 0x3fef2170, 0xfc4cd831, 0x3fef2eb8, 0x3ba8ea32
+ .word 0x3fef3c08, 0xb26416ff, 0x3fef4962, 0x66e3fa2d
+ .word 0x3fef56c5, 0x5f929ff1, 0x3fef6431, 0xa2de883b
+ .word 0x3fef71a7, 0x373aa9cb, 0x3fef7f26, 0x231e754a
+ .word 0x3fef8cae, 0x6d05d866, 0x3fef9a40, 0x1b7140ef
+ .word 0x3fefa7db, 0x34e59ff7, 0x3fefb57f, 0xbfec6cf4
+ .word 0x3fefc32d, 0xc313a8e5, 0x3fefd0e5, 0x44ede173
+ .word 0x3feedea6, 0x4c123422, 0x3feeec70, 0xdf1c5175
+ .word 0x3feefa45, 0x04ac801c, 0x3fef0822, 0xc367a024
+ .word 0x3fef160a, 0x21f72e2a, 0x3fef23fb, 0x2709468a
+ .word 0x3fef31f5, 0xd950a897, 0x3fef3ffa, 0x3f84b9d4
+ .word 0x3fef4e08, 0x6061892d, 0x3fef5c20, 0x42a7d232
+ .word 0x3fef6a41, 0xed1d0057, 0x3fef786d, 0x668b3237
+ .word 0x3fef86a2, 0xb5c13cd0, 0x3fef94e1, 0xe192aed2
+ .word 0x3fefa32a, 0xf0d7d3de, 0x3fefb17d, 0xea6db7d7
+ .word 0x3feebfda, 0xd5362a27, 0x3feece41, 0xb817c114
+ .word 0x3feedcb2, 0x99fddd0d, 0x3feeeb2d, 0x81d8abff
+ .word 0x3feef9b2, 0x769d2ca7, 0x3fef0841, 0x7f4531ee
+ .word 0x3fef16da, 0xa2cf6642, 0x3fef257d, 0xe83f4eef
+ .word 0x3fef342b, 0x569d4f82, 0x3fef42e2, 0xf4f6ad27
+ .word 0x3fef51a4, 0xca5d920f, 0x3fef6070, 0xdde910d2
+ .word 0x3fef6f47, 0x36b527da, 0x3fef7e27, 0xdbe2c4cf
+ .word 0x3fef8d12, 0xd497c7fd, 0x3fef9c08, 0x27ff07cc
+ .word 0x3feeab07, 0xdd485429, 0x3feeba11, 0xfba87a03
+ .word 0x3feec926, 0x8a5946b7, 0x3feed845, 0x90998b93
+ .word 0x3feee76f, 0x15ad2148, 0x3feef6a3, 0x20dceb71
+ .word 0x3fef05e1, 0xb976dc09, 0x3fef152a, 0xe6cdf6f4
+ .word 0x3fef247e, 0xb03a5585, 0x3fef33dd, 0x1d1929fd
+ .word 0x3fef4346, 0x34ccc320, 0x3fef52b9, 0xfebc8fb7
+ .word 0x3fef6238, 0x82552225, 0x3fef71c1, 0xc70833f6
+ .word 0x3fef8155, 0xd44ca973, 0x3fef90f4, 0xb19e9538
+ .word 0x3feea09e, 0x667f3bcd, 0x3feeb052, 0xfa75173e
+ .word 0x3feec012, 0x750bdabf, 0x3feecfdc, 0xddd47645
+ .word 0x3feedfb2, 0x3c651a2f, 0x3feeef92, 0x98593ae5
+ .word 0x3feeff7d, 0xf9519484, 0x3fef0f74, 0x66f42e87
+ .word 0x3fef1f75, 0xe8ec5f74, 0x3fef2f82, 0x86ead08a
+ .word 0x3fef3f9a, 0x48a58174, 0x3fef4fbd, 0x35d7cbfd
+ .word 0x3fef5feb, 0x564267c9, 0x3fef7024, 0xb1ab6e09
+ .word 0x3fef8069, 0x4fde5d3f, 0x3fef90b9, 0x38ac1cf6
+ .word 0x3feea114, 0x73eb0187, 0x3feeb17b, 0x0976cfdb
+ .word 0x3feec1ed, 0x0130c132, 0x3feed26a, 0x62ff86f0
+ .word 0x3feee2f3, 0x36cf4e62, 0x3feef387, 0x8491c491
+ .word 0x3fef0427, 0x543e1a12, 0x3fef14d2, 0xadd106d9
+ .word 0x3fef2589, 0x994cce13, 0x3fef364c, 0x1eb941f7
+ .word 0x3fef471a, 0x4623c7ad, 0x3fef57f4, 0x179f5b21
+ .word 0x3fef68d9, 0x9b4492ed, 0x3fef79ca, 0xd931a436
+ .word 0x3fef8ac7, 0xd98a6699, 0x3fef9bd0, 0xa478580f
+ .word 0x3feeace5, 0x422aa0db, 0x3feebe05, 0xbad61778
+ .word 0x3feecf32, 0x16b5448c, 0x3feee06a, 0x5e0866d9
+ .word 0x3feef1ae, 0x99157736, 0x3fef02fe, 0xd0282c8a
+ .word 0x3fef145b, 0x0b91ffc6, 0x3fef25c3, 0x53aa2fe2
+ .word 0x3fef3737, 0xb0cdc5e5, 0x3fef48b8, 0x2b5f98e5
+ .word 0x3fef5a44, 0xcbc8520f, 0x3fef6bdd, 0x9a7670b3
+ .word 0x3fef7d82, 0x9fde4e50, 0x3fef8f33, 0xe47a22a2
+ .word 0x3fefa0f1, 0x70ca07ba, 0x3fefb2bb, 0x4d53fe0d
+ .word 0x3feec491, 0x82a3f090, 0x3feed674, 0x194bb8d5
+ .word 0x3feee863, 0x19e32323, 0x3feefa5e, 0x8d07f29e
+ .word 0x3fef0c66, 0x7b5de565, 0x3fef1e7a, 0xed8eb8bb
+ .word 0x3fef309b, 0xec4a2d33, 0x3fef42c9, 0x80460ad8
+ .word 0x3fef5503, 0xb23e255d, 0x3fef674a, 0x8af46052
+ .word 0x3fef799e, 0x1330b358, 0x3fef8bfe, 0x53c12e59
+ .word 0x3fef9e6b, 0x5579fdbf, 0x3fefb0e5, 0x21356eba
+ .word 0x3fefc36b, 0xbfd3f37a, 0x3fefd5ff, 0x3a3c2774
+ .word 0x3feee89f, 0x995ad3ad, 0x3feefb4c, 0xe622f2ff
+ .word 0x3fef0e07, 0x298db666, 0x3fef20ce, 0x6c9a8952
+ .word 0x3fef33a2, 0xb84f15fb, 0x3fef4684, 0x15b749b1
+ .word 0x3fef5972, 0x8de5593a, 0x3fef6c6e, 0x29f1c52a
+ .word 0x3fef7f76, 0xf2fb5e47, 0x3fef928c, 0xf22749e4
+ .word 0x3fefa5b0, 0x30a1064a, 0x3fefb8e0, 0xb79a6f1f
+ .word 0x3fefcc1e, 0x904bc1d2, 0x3fefdf69, 0xc3f3a207
+ .word 0x3feff2c2, 0x5bd71e09, 0x3ff00628, 0x6141b33d
+ .word 0x3fef199b, 0xdd85529c, 0x3fef2d1c, 0xd9fa652c
+ .word 0x3fef40ab, 0x5fffd07a, 0x3fef5447, 0x78fafb22
+ .word 0x3fef67f1, 0x2e57d14b, 0x3fef7ba8, 0x8988c933
+ .word 0x3fef8f6d, 0x9406e7b5, 0x3fefa340, 0x5751c4db
+ .word 0x3fefb720, 0xdcef9069, 0x3fefcb0f, 0x2e6d1675
+ .word 0x3fefdf0b, 0x555dc3fa, 0x3feff315, 0x5b5bab74
+ .word 0x3ff0072d, 0x4a07897c, 0x3ff01b53, 0x2b08c968
+ .word 0x3ff02f87, 0x080d89f2, 0x3ff043c8, 0xeacaa1d6
+ .word 0x3fef5818, 0xdcfba487, 0x3fef6c76, 0xe862e6d3
+ .word 0x3fef80e3, 0x16c98398, 0x3fef955d, 0x71ff6075
+ .word 0x3fefa9e6, 0x03db3285, 0x3fefbe7c, 0xd63a8315
+ .word 0x3fefd321, 0xf301b460, 0x3fefe7d5, 0x641c0658
+ .word 0x3feffc97, 0x337b9b5f, 0x3ff01167, 0x6b197d17
+ .word 0x3ff02646, 0x14f5a129, 0x3ff03b33, 0x3b16ee12
+ .word 0x3ff0502e, 0xe78b3ff6, 0x3ff06539, 0x24676d76
+ .word 0x3ff07a51, 0xfbc74c83, 0x3ff08f79, 0x77cdb740
+ .word 0x3fefa4af, 0xa2a490da, 0x3fefb9f4, 0x867cca6e
+ .word 0x3fefcf48, 0x2d8e67f1, 0x3fefe4aa, 0xa2188510
+ .word 0x3feffa1b, 0xee615a27, 0x3ff00f9c, 0x1cb6412a
+ .word 0x3ff0252b, 0x376bba97, 0x3ff03ac9, 0x48dd7274
+ .word 0x3ff05076, 0x5b6e4540, 0x3ff06632, 0x798844f8
+ .word 0x3ff07bfd, 0xad9cbe14, 0x3ff091d8, 0x02243c89
+ .word 0x3ff0a7c1, 0x819e90d8, 0x3ff0bdba, 0x3692d514
+ .word 0x3ff0d3c2, 0x2b8f71f1, 0x3ff0e9d9, 0x6b2a23d9
+
+ .word 0xc057150d, 0x5f6e1c54 ! KA3 = -3.60659926599003171364e-01*256.0
+ .word 0x405ec71c, 0x2e92efda ! KA2 = 4.80902715189356683026e-01*256.0
+ .word 0xc0671547, 0x653cbec4 ! KA1 = -7.21347520569871841065e-01*256.0
+ .word 0x40771547, 0x652af190 ! KA0 = 1.44269504088069658645e+00*256.0
+ .word 0x3ecebfbe, 0x9d182250 ! KB2 = 3.66556671660783833261e-06
+ .word 0x3f662e43, 0xe2528362 ! KB1 = 2.70760782821392980564e-03
+ .word 0x40e00000, 0x00000000 ! HTHRESH = 32768.0
+ .word 0xc0e2c000, 0x00000000 ! LTHRESH = -38400.0 ; 0.0f
+ .word 0x3f800000, 0x00000000 ! 1.0f ; free
+
+#define tmp_px STACK_BIAS-48
+#define tmp_py STACK_BIAS-40
+#define tmp_counter STACK_BIAS-32
+#define tmp0 STACK_BIAS-28
+#define tmp1 STACK_BIAS-24
+#define tmp2 STACK_BIAS-20
+#define tmp3 STACK_BIAS-16
+#define tmp4 STACK_BIAS-12
+#define tmp5 STACK_BIAS-8
+#define tmp6 STACK_BIAS-4
+
+
+#define KA3 %f34
+#define KA2 %f36
+#define KA1 %f38
+#define KA0 %f40
+#define KB2 %f42
+#define KB1 %f44
+#define HTHRESHOLD %f30
+#define LTHRESHOLD %f32
+
+#define counter %o7
+#define stridex %i0
+#define stridey %i4
+#define stridez %l3
+
+#define CONST_0x8000 %l1
+#define MASK_0x007fffff %l4
+#define MASK_0x7fffffff %l5
+
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x30
+
+!--------------------------------------------------------------------
+! !!!!! vpowf algorithm !!!!!
+! uy = *(unsigned int*)py;
+! ux = *(unsigned int*)px;
+! ay = uy & 0x7fffffff;
+! ax0 = ux & 0x7fffffff;
+! sx = ux >> 31;
+! yisint0 = 0; /* Y - non-integer */
+! if (ax0 >= 0x7f800000 || ay >= 0x7f800000) { /* |X| or |Y| = Inf,Nan */
+! if (ax0 > 0x7f800000 || ay > 0x7f800000) /* |X| or |Y| = Nan */
+! pz[0] = *px * *py;
+! goto next;
+! if (ay == 0x7f800000) { /* |Y| = Inf */
+! float fy;
+! if (ax0 == 0x3f800000) fy = *py - *py; /* +-1 ** +-Inf = NaN */
+! else fy = ((ax0 < 0x3f800000) != (uy >> 31)) ? ZERO : *(float*) &ay;
+! pz[0] = fy;
+! goto next;
+! }
+! if (sx) { /* X = -Inf */
+! exp = ay >> 23;
+! if (exp >= 0x97) /* |Y| >= 2^24 */
+! yisint0 = 2; /* Y - even */
+! else {
+! if (exp >= 0x7f) { /* |Y| >= 1 */
+! i0 = ay >> ((0x7f + 23) - exp);
+! if ((i0 << ((0x7f + 23) - exp)) == ay) yisint0 = 2 - (i0 & 1);
+! }
+! }
+! }
+! if (uy >> 31) ax0 = 0;
+! ax0 += yisint0 << 31;
+! pz[0] = *(float*)&ax0;
+! goto next;
+! }
+! exp0 = (ax0 >> 23) - 127;
+! if ((int)ux < 0x00800000) { /* X = denormal or negative */
+! if ((int)ax0 < 0x00800000) { /* X = denormal */
+! *((float*) &ax0) = (float) (int)ax0;
+! exp0 = (ax0 >> 23) - (127 + 149);
+! }
+! if ((int)ux <= 0) { /* X <= 0 */
+! exp = ay >> 23;
+! if (exp >= 0x97) /* |Y| >= 2^24 */
+! yisint0 = 2; /* Y - even */
+! else {
+! if (exp >= 0x7f) { /* |Y| >= 1 */
+! i0 = ay >> ((0x7f + 23) - exp);
+! if ((i0 << ((0x7f + 23) - exp)) == ay) yisint0 = 2 - (i0 & 1);
+! }
+! }
+! if (ax0 == 0) { /* pow(0,Y) */
+! float fy;
+! fy = (uy >> 31) ? ONE / ZERO : ZERO;
+! if (sx & yisint0) fy = -fy;
+! pz[0] = fy;
+! goto next;
+! }
+! if (yisint0 == 0) { /* pow(neg,non-integer) */
+! pz[0] = ZERO / ZERO; /* NaN */
+! goto next;
+! }
+! }
+! }
+!
+! ax0 = *px;
+! exp0 = ax0 & 0x7fffffff;
+! exp0 >>= 23;
+! exp0 -= 127;
+! exp0 <<= 8;
+! ax0 &= 0x007fffff;
+! i0 = ax0 + 0x8000;
+! i0 &= 0xffff0000;
+! ind0 = i0 >> 12;
+! ind0 &= -8;
+! i0 = ax0 - i0;
+! dtmp0 = (double) i0;
+! dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8);
+! y0 = dtmp0 * dtmp1;
+! dtmp0 = *(double *)((char*)__mt_constlog4f + ind0);
+! dtmp1 = (double) exp0;
+! yy0 = dtmp0 + dtmp1;
+! dtmp0 = KA3 * y0;
+! dtmp0 += KA2;
+! dtmp0 *= y0;
+! dtmp0 += KA1;
+! dtmp0 *= y0;
+! dtmp0 += KA0;
+! dtmp0 *= y0;
+! yy0 += dtmp0;
+! ftmp0 = *py0;
+! dtmp0 = (double)ftmp0;
+! yy0 *= dtmp0;
+! if (yy0 >= HTHRESH)
+! yy0 = HTHRESH;
+! if (yy0 <= LTHRESH)
+! yy0 = LTHRESH;
+! ind0 = (int) yy0;
+! ((int*)&dtmp1)[0] = ind0;
+! ((int*)&dtmp1)[1] = 0;
+! dtmp1 = vis_fpackfix(dtmp1);
+! dtmp0 = (double)ind0;
+! y0 = yy0 - dtmp0;
+! dtmp0 = KB2 * y0;
+! dtmp0 += KB1;
+! yy0 = dtmp0 * y0;
+! ind0 &= 255;
+! ind0 <<= 3;
+! di0 = *(double*)((char*)__mt_constexp2f + ind0);
+! di0 = vis_fpadd32(di0,dtmp1);
+! yy0 *= di0;
+! yy0 += di0;
+! ftmp0 = (float)yy0;
+! *pz0 = ftmp0;
+!--------------------------------------------------------------------
+! !!!!! vpowf algorithm,stridex=0 !!!!!
+!
+! ax = ax0 = *px;
+! exp0 = ax0 & 0x7fffffff;
+! exp0 >>= 23;
+! exp0 -= 127;
+! exp0 <<= 8;
+! ax0 &= 0x007fffff;
+! i0 = ax0 + 0x8000;
+! i0 &= 0xffff0000;
+! ind0 = i0 >> 12;
+! ind0 &= -8;
+! i0 = ax0 - i0;
+! dtmp0 = (double) i0;
+! dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8);
+! y0 = dtmp0 * dtmp1;
+! dtmp0 = *(double *)((char*)__mt_constlog4f + ind0);
+! dtmp1 = (double) exp0;
+! yy0 = dtmp0 + dtmp1;
+! dtmp0 = KA3 * y0;
+! dtmp0 += KA2;
+! dtmp0 *= y0;
+! dtmp0 += KA1;
+! dtmp0 *= y0;
+! dtmp0 += KA0;
+! dtmp0 *= y0;
+! yy = yy0 + dtmp0;
+!
+! uy = ((int*)py)[0];
+! ay = uy & 0x7fffffff;
+! if (ay >= 0x7f800000) { /* |Y| = Inf or Nan */
+! float fy;
+! if (ay > 0x7f800000) fy = *py + *py; /* |Y| = Nan */
+! else fy = ((ax < 0x3f800000) != (uy >> 31)) ? ZERO : *(float*)&ay;
+! pz[0] = fy;
+! goto next;
+! }
+!
+!
+! ftmp0 = py[0];
+! dtmp0 = (double)ftmp0;
+! yy0 = dtmp0 * yy;
+! if (yy0 >= HTHRESH)
+! if (yy0 <= LTHRESH)
+! yy0 = HTHRESH;
+! yy0 = LTHRESH;
+! ii0 = (int) yy0;
+! dtmp0 = (double)ii0;
+! i0 = ii0 >> 5;
+! i0 &= -8;
+! di0 = ((double*)((char*)(__mt_constexp2fb + 150) + i0))[0];
+! y0 = yy0 - dtmp0;
+! dtmp0 = KB2 * y0;
+! dtmp0 += KB1;
+! yy0 = dtmp0 * y0;
+! ii0 &= 255;
+! ii0 <<= 3;
+! dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0];
+! di0 *= dtmp0;
+! dtmp0 = yy0 * di0;
+! dtmp0 += di0;
+! ftmp0 = (float)dtmp0;
+! pz[0] = ftmp0;
+!--------------------------------------------------------------------
+ ENTRY(__vpowf)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,.CONST_TBL,l2)
+ wr %g0,0x60,%gsr
+
+#ifdef __sparcv9
+ ldx [%fp+STACK_BIAS+176],stridez
+#else
+ ld [%fp+STACK_BIAS+92],stridez
+#endif
+
+ ld [%i1],%o3
+ add %l2,2064,%l0
+ st %i0,[%fp+tmp_counter]
+ add %l0,2048,%l6
+ ldd [%l6],KA3
+ ldd [%l6+8],KA2
+ sll stridey,2,stridey
+ ldd [%l6+16],KA1
+ sll stridez,2,stridez
+ ldd [%l6+24],KA0
+ sll %i2,2,stridex
+ ldd [%l6+32],KB2
+ sethi %hi(0x7ffffc00),MASK_0x7fffffff
+ fzero %f2
+ ldd [%l6+40],KB1
+ add MASK_0x7fffffff,1023,MASK_0x7fffffff
+ fzero %f10
+ ldd [%l6+48],HTHRESHOLD
+ sethi %hi(0x7ffc00),MASK_0x007fffff
+ fzero %f20
+ ldd [%l6+56],LTHRESHOLD
+ sethi %hi(0x8000),CONST_0x8000
+ add MASK_0x007fffff,1023,MASK_0x007fffff
+
+ cmp stridex,0
+ bne,pt %icc,.common_case
+ sethi %hi(0x00800000),%l6
+
+ cmp %o3,%l6
+ bl,pn %icc,.common_case
+ sethi %hi(0x7f800000),%o1
+
+ cmp %o3,%o1
+ bge,pn %icc,.common_case
+ sethi %hi(0x3f800000),%l6
+
+ cmp %o3,%l6
+ bne,pt %icc,.stridex_zero
+ nop
+
+.common_case:
+ stx %i1,[%fp+tmp_px]
+ stx %i3,[%fp+tmp_py]
+.begin:
+ ld [%fp+tmp_counter],counter
+ ldx [%fp+tmp_px],%o2
+ ldx [%fp+tmp_py],%i2
+ st %g0,[%fp+tmp_counter]
+.begin1:
+ cmp counter,0
+ ble,pn %icc,.exit
+ lda [%o2]0x82,%i1 ! (Y0_2) ax0 = *px;
+
+ lda [%i2]0x82,%l7
+ sethi %hi(0xffff0000),%l6
+ sethi %hi(0x7f800000),%o5
+
+ and %i1,MASK_0x7fffffff,%i3 ! (Y0_2) exp0 = ax0 & 0x7fffffff;
+ and %i1,MASK_0x007fffff,%g5 ! (Y0_2) ax0 &= 0x007fffff;
+
+ cmp %i3,%o5 ! (Y0_2) ax0 ? 0x7f800000
+ bge,pn %icc,.spec1 ! (Y0_2) if( ax0 >= 0x7f800000 )
+ and %l7,MASK_0x7fffffff,%o4
+
+ cmp %o4,%o5 ! (Y0_2) ay0 ? 0x7f800000
+ bge,pn %icc,.spec1 ! (Y0_2) if( ay0 >= 0x7f800000 )
+ nop
+
+ cmp %i1,MASK_0x007fffff ! (Y0_2) ux0 ? 0x800000
+ ble,pn %icc,.spec2 ! (Y0_2) if(ux0 < 0x800000)
+ srl %i3,23,%o3 ! (Y0_2) exp0 >>= 23;
+
+ sub %o3,127,%o3 ! (Y0_2) exp0 -= 127;
+
+ add %g5,CONST_0x8000,%i3 ! (Y0_2) i0 = ax0 + 0x8000;
+
+ sll %o3,8,%o4 ! (Y0_2) exp0 <<= 8;
+ and %i3,%l6,%i3 ! (Y0_2) i0 &= 0xffff0000;
+ st %o4,[%fp+tmp3] ! (Y0_2) STORE exp0
+
+ sub %g5,%i3,%o4 ! (Y0_2) i0 = ax0 - i0;
+ st %o4,[%fp+tmp2] ! (Y0_2) STORE i0
+ add %o2,stridex,%o2 ! px += stridex
+
+ sra %i3,12,%o0 ! (Y0_2) ind0 = i0 >> 12;
+ lda [%o2]0x82,%o3 ! (Y1_2) ax0 = *px;
+
+ and %o0,-8,%g5 ! (Y0_2) ind0 &= -8;
+ ld [%fp+tmp2],%f14 ! (Y0_2) dtmp0 = (double) i0;
+
+ and %o3,MASK_0x7fffffff,%i3 ! (Y1_2) exp0 = ax0 & 0x7fffffff;
+ and %o3,MASK_0x007fffff,%o0 ! (Y1_2) ax0 &= 0x007fffff;
+
+ cmp %i3,%o5 ! (Y1_2) ax0 ? 0x7f800000
+ add %l2,%g5,%g1 ! (Y0_2) (char*)__mt_constlog4f + ind0
+
+ srl %i3,23,%i3 ! (Y1_2) exp0 >>= 23;
+ add %o0,CONST_0x8000,%i1 ! (Y1_2) i0 = ax0 + 0x8000;
+
+ ldd [%g1+8],%f48 ! (Y0_2) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8);
+ sub %i3,127,%i3 ! (Y1_2) exp0 -= 127;
+ fitod %f14,%f60 ! (Y0_2) dtmp0 = (double) i0;
+
+ sll %i3,8,%i3 ! (Y1_2) exp0 <<= 8;
+ and %i1,%l6,%i1 ! (Y1_2) i0 &= 0xffff0000;
+ st %i3,[%fp+tmp4] ! (Y1_2) STORE exp0
+
+ sub %o0,%i1,%o0 ! (Y1_2) i0 = ax0 - i0;
+ st %o0,[%fp+tmp5] ! (Y1_2) STORE i0
+ bge,pn %icc,.update0 ! (Y1_2) if(ax0 >= 0x7f800000)
+ nop
+.cont0:
+ cmp %o3,MASK_0x007fffff ! (Y1_2) ux0 ? 0x800000
+
+ fmuld %f60,%f48,%f48 ! (Y0_2) y0 = dtmp0 * dtmp1;
+ ble,pn %icc,.update1 ! (Y1_2) if(ux0 < 0x800000)
+ nop
+.cont1:
+ fmuld KA3,%f48,%f62 ! (Y0_2) dtmp0 = KA3 * y0;
+
+ faddd %f62,KA2,%f22 ! (Y0_2) dtmp0 += KA2;
+
+ sra %i1,12,%o1 ! (Y1_2) ind0 = i0 >> 12;
+ add %o2,stridex,%i3 ! px += stridex
+ lda [stridex+%o2]0x82,%g1 ! (Y2_2) ax0 = *px;
+
+ and %o1,-8,%o0 ! (Y1_2) ind0 &= -8;
+ ld [%fp+tmp5],%f12 ! (Y1_2) LOAD i0
+
+ and %g1,MASK_0x7fffffff,%i1 ! (Y2_2) exp0 = ax0 & 0x7fffffff;
+ and %g1,MASK_0x007fffff,%o2 ! (Y2_2) ax0 &= 0x007fffff;
+ lda [%i2]0x82,%f0 ! (Y0_2) ftmp0 = *py0;
+
+ srl %i1,23,%o3 ! (Y2_2) exp0 >>= 23;
+ cmp %i1,%o5 ! (Y2_2) ax0 ? 0x7f800000
+
+ fmuld %f22,%f48,%f26 ! (Y0_2) dtmp0 *= y0;
+ add %l2,%o0,%i1 ! (Y1_2) (char*)__mt_constlog4f + ind0
+ sub %o3,127,%l7 ! (Y2_2) exp0 -= 127;
+
+ add %o2,CONST_0x8000,%o1 ! (Y2_2) i0 = ax0 + 0x8000;
+ ldd [%i1+8],%f50 ! (Y1_2) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8);
+ fitod %f12,%f28 ! (Y1_2) dtmp0 = (double) i0;
+
+ sll %l7,8,%l7 ! (Y2_2) exp0 <<= 8;
+ and %o1,%l6,%o1 ! (Y2_2) i0 &= 0xffff0000;
+ st %l7,[%fp+tmp6] ! (Y2_2) STORE exp0
+
+ sub %o2,%o1,%i1 ! (Y2_2) i0 = ax0 - i0;
+ st %i1,[%fp+tmp2] ! (Y2_2) STORE i0
+ bge,pn %icc,.update2 ! (Y2_2) if(ax0 >= 0x7f800000)
+ nop
+.cont2:
+ cmp %g1,MASK_0x007fffff ! (Y2_2) ux0 ? 0x800000
+
+ fmuld %f28,%f50,%f46 ! (Y1_2) y0 = dtmp0 * dtmp1;
+ ble,pn %icc,.update3 ! (Y2_2) if(ux0 < 0x800000)
+ faddd %f26,KA1,%f50 ! (Y0_2) dtmp0 += KA1;
+.cont3:
+ ld [%fp+tmp3],%f4 ! (Y0_2) dtmp1 = (double) exp0;
+
+ fstod %f0,%f24 ! (Y0_2) dtmp0 = (double)ftmp0;
+
+ fmuld KA3,%f46,%f28 ! (Y1_1) dtmp0 = KA3 * y0;
+
+ fitod %f4,%f26 ! (Y0_1) dtmp1 = (double) exp0;
+
+ fmuld %f50,%f48,%f50 ! (Y0_1) dtmp0 *= y0;
+
+ faddd %f28,KA2,%f28 ! (Y1_1) dtmp0 += KA2;
+
+ ldd [%l2+%g5],%f60 ! (Y0_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0);
+ add %i3,stridex,%o2 ! px += stridex
+
+ lda [%o2]0x82,%i1 ! (Y0_2) ax0 = *px;
+ sra %o1,12,%g5 ! (Y2_1) ind0 = i0 >> 12;
+
+ faddd %f50,KA0,%f58 ! (Y0_1) dtmp0 += KA0;
+ and %g5,-8,%o1 ! (Y2_1) ind0 &= -8;
+ ld [%fp+tmp2],%f6 ! (Y2_1) dtmp0 = (double) i0;
+
+ and %i1,MASK_0x7fffffff,%i3 ! (Y0_2) exp0 = ax0 & 0x7fffffff;
+ and %i1,MASK_0x007fffff,%g5 ! (Y0_2) ax0 &= 0x007fffff;
+
+ srl %i3,23,%o3 ! (Y0_2) exp0 >>= 23;
+ add %l2,%o1,%g1 ! (Y2_1) (char*)__mt_constlog4f + ind0
+ faddd %f60,%f26,%f26 ! (Y0_1) yy0 = dtmp0 + dtmp1;
+
+ fmuld %f28,%f46,%f50 ! (Y1_1) dtmp0 *= y0;
+ sub %o3,127,%o3 ! (Y0_2) exp0 -= 127;
+ cmp %i3,%o5 ! (Y0_2) ax0 ? 0x7f800000
+
+ fmuld %f58,%f48,%f48 ! (Y0_1) dtmp0 *= y0;
+ add %g5,CONST_0x8000,%i3 ! (Y0_2) i0 = ax0 + 0x8000;
+ ldd [%g1+8],%f58 ! (Y2_1) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8);
+ fitod %f6,%f54 ! (Y2_1) dtmp0 = (double) i0;
+
+ sll %o3,8,%o4 ! (Y0_2) exp0 <<= 8;
+ and %i3,%l6,%i3 ! (Y0_2) i0 &= 0xffff0000;
+ st %o4,[%fp+tmp3] ! (Y0_2) STORE exp0
+
+ sub %g5,%i3,%o4 ! (Y0_2) i0 = ax0 - i0;
+ st %o4,[%fp+tmp2] ! (Y0_2) STORE i0
+ bge,pn %icc,.update4 ! (Y0_2) if( ax0 >= 0x7f800000 )
+ nop
+.cont4:
+ lda [stridey+%i2]0x82,%g1 ! (Y1_1) ay0 = *(unsigned*)py0
+ add %i2,stridey,%o4 ! py += stridey
+ cmp %i1,MASK_0x007fffff ! (Y0_2) ux0 ? 0x800000
+
+ fmuld %f54,%f58,%f28 ! (Y2_1) y0 = dtmp0 * dtmp1;
+ lda [stridey+%i2]0x82,%f2 ! (Y1_1) ftmp0 = *py0;
+ ble,pn %icc,.update5 ! (Y0_2) if(ux0 < 0x800000)
+ faddd %f50,KA1,%f54 ! (Y1_1) dtmp0 += KA1;
+.cont5:
+ and %g1,MASK_0x7fffffff,%g1 ! (Y1_1) ay0 &= 0x7fffffff;
+ ld [%fp+tmp4],%f1 ! (Y1_1) LOAD exp0
+ faddd %f26,%f48,%f58 ! (Y0_1) yy0 += dtmp0;
+
+ cmp %g1,%o5 ! (Y1_1) ay0 ? 0x7f800000
+ bge,pn %icc,.update6 ! (Y1_1) if(ay0 >= 0x7f800000)
+ nop
+.cont6:
+ fmuld KA3,%f28,%f62 ! (Y2_1) dtmp0 = KA3 * y0;
+ fstod %f2,%f22 ! (Y1_1) dtmp0 = (double)ftmp0;
+
+ fmuld %f24,%f58,%f58 ! (Y0_1) yy0 *= dtmp0;
+
+ fitod %f1,%f48 ! (Y1_1) dtmp1 = (double) exp0;
+
+ fmuld %f54,%f46,%f54 ! (Y1_1) dtmp0 *= y0;
+
+ faddd %f62,KA2,%f26 ! (Y2_1) dtmp0 += KA2;
+
+ add %o2,stridex,%o2 ! px += stridex
+ ldd [%l2+%o0],%f60 ! (Y1_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0);
+ fcmped %fcc0,HTHRESHOLD,%f58 ! (Y0_1) if (yy0 >= HTHRESH)
+
+ sra %i3,12,%o0 ! (Y0_2) ind0 = i0 >> 12;
+ lda [%o2]0x82,%o3 ! (Y1_2) ax0 = *px;
+
+ faddd %f54,KA0,%f56 ! (Y1_1) dtmp0 += KA0;
+ and %o0,-8,%g5 ! (Y0_2) ind0 &= -8;
+ ld [%fp+tmp2],%f14 ! (Y0_2) dtmp0 = (double) i0;
+
+ and %o3,MASK_0x7fffffff,%i3 ! (Y1_2) exp0 = ax0 & 0x7fffffff;
+ and %o3,MASK_0x007fffff,%o0 ! (Y1_2) ax0 &= 0x007fffff;
+
+ cmp %i3,%o5 ! (Y1_2) ax0 ? 0x7f800000
+ add %l2,%g5,%g1 ! (Y0_2) (char*)__mt_constlog4f + ind0
+ faddd %f60,%f48,%f12 ! (Y1_1) yy0 = dtmp0 + dtmp1;
+
+ fmuld %f26,%f28,%f50 ! (Y2_1) dtmp0 *= y0;
+ srl %i3,23,%i3 ! (Y1_2) exp0 >>= 23;
+ add %o0,CONST_0x8000,%i1 ! (Y1_2) i0 = ax0 + 0x8000;
+ fcmped %fcc1,LTHRESHOLD,%f58 ! (Y0_1) if (yy0 <= LTHRESH)
+
+ fmuld %f56,%f46,%f46 ! (Y1_1) dtmp0 *= y0;
+ ldd [%g1+8],%f48 ! (Y0_2) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8);
+ sub %i3,127,%i3 ! (Y1_2) exp0 -= 127;
+ fitod %f14,%f60 ! (Y0_2) dtmp0 = (double) i0;
+
+ sll %i3,8,%i2 ! (Y1_2) exp0 <<= 8;
+ and %i1,%l6,%i1 ! (Y1_2) i0 &= 0xffff0000;
+ st %i2,[%fp+tmp4] ! (Y1_2) STORE exp0
+
+ sub %o0,%i1,%o0 ! (Y1_2) i0 = ax0 - i0;
+ st %o0,[%fp+tmp5] ! (Y1_2) STORE i0
+ bge,pn %icc,.update7 ! (Y1_2) if(ax0 >= 0x7f800000)
+ nop
+.cont7:
+ lda [stridey+%o4]0x82,%i3 ! Y(2_1) ay0 = *py0
+ cmp %o3,MASK_0x007fffff ! (Y1_2) ux0 ? 0x800000
+ add %o4,stridey,%i2 ! py += stridey;
+ fmovdl %fcc0,HTHRESHOLD,%f58 ! (Y0_1) yy0 = HTHRESH;
+
+ fmuld %f60,%f48,%f48 ! (Y0_2) y0 = dtmp0 * dtmp1;
+ lda [stridey+%o4]0x82,%f16 ! (Y2_1) ftmp0 = *py0;
+ ble,pn %icc,.update8 ! (Y1_2) if(ux0 < 0x800000)
+ faddd %f50,KA1,%f52 ! (Y2_1) dtmp0 += KA1;
+.cont8:
+ and %i3,MASK_0x7fffffff,%i3 ! (Y2_1) ay0 &= 0x7fffffff
+ ld [%fp+tmp6],%f17 ! (Y2_1) dtmp1 = (double) exp0;
+ faddd %f12,%f46,%f60 ! (Y1_1) yy0 += dtmp0;
+
+ cmp %i3,%o5 ! (Y2_1) ay0 ? 0x7f800000
+ bge,pn %icc,.update9 ! (Y2_1) if(ay0 >= 0x7f800000)
+ nop
+
+.cont9:
+ fmovdg %fcc1,LTHRESHOLD,%f58 ! (Y0_1) yy0 = LTHRESH;
+
+ fmuld KA3,%f48,%f62 ! (Y0_2) dtmp0 = KA3 * y0;
+ fstod %f16,%f54 ! (Y2_1) dtmp0 = (double)ftmp0;
+
+ fmuld %f22,%f60,%f56 ! (Y1_1) yy0 *= dtmp0;
+
+ fitod %f17,%f24 ! (Y2_1) dtmp1 = (double) exp0;
+
+ fmuld %f52,%f28,%f52 ! (Y2_1) dtmp0 *= y0;
+ fdtoi %f58,%f10 ! (Y0_1) ind0 = (int) yy0;
+
+ st %f10,[%fp+tmp0] ! (Y0_1) STORE ind0
+ faddd %f62,KA2,%f22 ! (Y0_2) dtmp0 += KA2;
+
+ fcmped %fcc0,HTHRESHOLD,%f56 ! (Y1_1) if (yy0 >= HTHRESH)
+ ldd [%l2+%o1],%f60 ! (Y2_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0);
+
+ sra %i1,12,%o1 ! (Y1_2) ind0 = i0 >> 12;
+ add %o2,stridex,%i3 ! px += stridex
+ lda [stridex+%o2]0x82,%g1 ! (Y2_2) ax0 = *px;
+
+ and %o1,-8,%o0 ! (Y1_2) ind0 &= -8;
+ add %i2,stridey,%i2 ! py += stridey
+ ld [%fp+tmp5],%f12 ! (Y1_2) LOAD i0
+ faddd %f52,KA0,%f4 ! (Y2_1) dtmp0 += KA0;
+
+ and %g1,MASK_0x7fffffff,%i1 ! (Y2_2) exp0 = ax0 & 0x7fffffff;
+ and %g1,MASK_0x007fffff,%o2 ! (Y2_2) ax0 &= 0x007fffff;
+ lda [%i2]0x82,%f0 ! (Y0_2) ftmp0 = *py0;
+ fitod %f10,%f52 ! (Y0_1) dtmp0 = (double)ind0;
+
+ srl %i1,23,%o3 ! (Y2_2) exp0 >>= 23;
+ cmp %i1,%o5 ! (Y2_2) ax0 ? 0x7f800000
+ faddd %f60,%f24,%f18 ! (Y2_1) yy0 = dtmp0 + dtmp1;
+
+ fmuld %f22,%f48,%f26 ! (Y0_2) dtmp0 *= y0;
+ add %l2,%o0,%i1 ! (Y1_2) (char*)__mt_constlog4f + ind0
+ sub %o3,127,%l7 ! (Y2_2) exp0 -= 127;
+ fcmped %fcc1,LTHRESHOLD,%f56 ! (Y1_1) if (yy0 <= LTHRESH)
+
+ fmuld %f4,%f28,%f24 ! (Y2_1) dtmp0 *= y0;
+ add %o2,CONST_0x8000,%o1 ! (Y2_2) i0 = ax0 + 0x8000;
+ ldd [%i1+8],%f50 ! (Y1_2) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8);
+ fitod %f12,%f28 ! (Y1_2) dtmp0 = (double) i0;
+
+ sll %l7,8,%l7 ! (Y2_2) exp0 <<= 8;
+ and %o1,%l6,%o1 ! (Y2_2) i0 &= 0xffff0000;
+ st %l7,[%fp+tmp6] ! (Y2_2) STORE exp0
+ fsubd %f58,%f52,%f60 ! (Y0_1) y0 = yy0 - dtmp0;
+
+
+ sub %o2,%o1,%i1 ! (Y2_2) i0 = ax0 - i0;
+ st %i1,[%fp+tmp2] ! (Y2_2) STORE i0
+ bge,pn %icc,.update10 ! (Y2_2) if(ax0 >= 0x7f800000)
+ nop
+.cont10:
+ lda [%i2]0x82,%o2 ! (Y0_2) ay0 = *(int*)py0;
+ cmp %g1,MASK_0x007fffff ! (Y2_2) ux0 ? 0x800000
+ fmovdl %fcc0,HTHRESHOLD,%f56 ! (Y1_1) yy0 = HTHRESH;
+
+ fmuld %f28,%f50,%f46 ! (Y1_2) y0 = dtmp0 * dtmp1;
+ ble,pn %icc,.update11 ! (Y2_2) if(ux0 < 0x800000)
+ faddd %f26,KA1,%f50 ! (Y0_2) dtmp0 += KA1;
+.cont11:
+ fmuld KB2,%f60,%f62 ! (Y0_1) dtmp0 = KB2 * y0;
+ and %o2,MASK_0x7fffffff,%o2 ! (Y0_2) ay0 &= 0x7fffffff
+ ld [%fp+tmp3],%f4 ! (Y0_2) dtmp1 = (double) exp0;
+ faddd %f18,%f24,%f52 ! (Y2_1) yy0 += dtmp0;
+
+ ld [%fp+tmp0],%g1 ! (Y0_1) LAOD ind0
+ cmp %o2,%o5 ! (Y0_2) ay0 ? 0x7f800000
+ bge,pn %icc,.update12 ! (Y0_2) if( ay0 >= 0x7f800000)
+ nop
+.cont12:
+ fstod %f0,%f24 ! (Y0_2) dtmp0 = (double)ftmp0;
+
+ cmp counter,6 ! counter
+ bl,pn %icc,.tail
+ sub %i5,stridez,%o4
+
+ ba .main_loop
+ nop
+
+ .align 16
+.main_loop:
+ fmuld KA3,%f46,%f28 ! (Y1_1) dtmp0 = KA3 * y0;
+ and %g1,255,%o2 ! (Y0_0) ind0 &= 255;
+ sub counter,3,counter ! counter
+ fmovdg %fcc1,LTHRESHOLD,%f56 ! (Y1_0) yy0 = LTHRESH;
+
+ fmuld %f54,%f52,%f18 ! (Y2_0) yy0 *= dtmp0;
+ sll %o2,3,%i1 ! (Y0_0) ind0 <<= 3;
+ add %o4,stridez,%l7 ! pz += stridez
+ faddd %f62,KB1,%f62 ! (Y0_0) dtmp0 += KB1;
+
+ fpackfix %f10,%f10 ! (Y0_0) dtmp1 = vis_fpackfix(dtmp1);
+ fitod %f4,%f26 ! (Y0_1) dtmp1 = (double) exp0;
+ ldd [%l0+%i1],%f58 ! (Y0_0) di0 = *(double*)((char*)__mt_constexp2f + ind0);
+
+ fmuld %f50,%f48,%f50 ! (Y0_1) dtmp0 *= y0;
+ fdtoi %f56,%f20 ! (Y1_0) ind0 = (int) yy0;
+ st %f20,[%fp+tmp1] ! (Y1_0) STORE ind0
+
+ faddd %f28,KA2,%f28 ! (Y1_1) dtmp0 += KA2;
+
+ fmuld %f62,%f60,%f62 ! (Y0_0) yy0 = dtmp0 * y0;
+ ldd [%l2+%g5],%f60 ! (Y0_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0);
+ add %i3,stridex,%o2 ! px += stridex
+ fcmped %fcc0,HTHRESHOLD,%f18 ! (Y2_0) if (yy0 >= HTHRESH)
+
+ lda [%o2]0x82,%i1 ! (Y0_2) ax0 = *px;
+ sra %o1,12,%g5 ! (Y2_1) ind0 = i0 >> 12;
+ fpadd32 %f10,%f58,%f22 ! (Y0_0) di0 = vis_fpadd32(di0,dtmp1);
+
+ faddd %f50,KA0,%f58 ! (Y0_1) dtmp0 += KA0;
+ and %g5,-8,%o1 ! (Y2_1) ind0 &= -8;
+ ld [%fp+tmp2],%f6 ! (Y2_1) dtmp0 = (double) i0;
+
+ fitod %f20,%f52 ! (Y1_0) dtmp0 = (double)ind0;
+ and %i1,MASK_0x7fffffff,%i3 ! (Y0_2) exp0 = ax0 & 0x7fffffff;
+ and %i1,MASK_0x007fffff,%g5 ! (Y0_2) ax0 &= 0x007fffff;
+
+ fmuld %f62,%f22,%f62 ! (Y0_0) yy0 *= di0;
+ srl %i3,23,%o3 ! (Y0_2) exp0 >>= 23;
+ add %l2,%o1,%g1 ! (Y2_1) (char*)__mt_constlog4f + ind0
+ faddd %f60,%f26,%f26 ! (Y0_1) yy0 = dtmp0 + dtmp1;
+
+ fmuld %f28,%f46,%f50 ! (Y1_1) dtmp0 *= y0;
+ sub %o3,127,%o3 ! (Y0_2) exp0 -= 127;
+ cmp %i3,%o5 ! (Y0_2) ax0 ? 0x7f800000
+ fcmped %fcc1,LTHRESHOLD,%f18 ! (Y2_0) if (yy0 <= LTHRESH)
+
+ fmuld %f58,%f48,%f48 ! (Y0_1) dtmp0 *= y0;
+ add %g5,CONST_0x8000,%i3 ! (Y0_2) i0 = ax0 + 0x8000;
+ ldd [%g1+8],%f58 ! (Y2_1) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8);
+ fitod %f6,%f54 ! (Y2_1) dtmp0 = (double) i0;
+
+ sll %o3,8,%o4 ! (Y0_2) exp0 <<= 8;
+ and %i3,%l6,%i3 ! (Y0_2) i0 &= 0xffff0000;
+ st %o4,[%fp+tmp3] ! (Y0_2) STORE exp0
+ fsubd %f56,%f52,%f52 ! (Y1_0) y0 = yy0 - dtmp0;
+
+ sub %g5,%i3,%o4 ! (Y0_2) i0 = ax0 - i0;
+ st %o4,[%fp+tmp2] ! (Y0_2) STORE i0
+ bge,pn %icc,.update13 ! (Y0_2) if( ax0 >= 0x7f800000 )
+ faddd %f62,%f22,%f62 ! (Y0_0) yy0 += di0;
+.cont13:
+ lda [stridey+%i2]0x82,%g1 ! (Y1_1) ay0 = *(unsigned*)py0
+ add %i2,stridey,%o4 ! py += stridey
+ cmp %i1,MASK_0x007fffff ! (Y0_2) ux0 ? 0x800000
+ fmovdl %fcc0,HTHRESHOLD,%f18 ! (Y2_0) yy0 = HTHRESH;
+
+ fmuld %f54,%f58,%f28 ! (Y2_1) y0 = dtmp0 * dtmp1;
+ lda [stridey+%i2]0x82,%f2 ! (Y1_1) ftmp0 = *py0;
+ ble,pn %icc,.update14 ! (Y0_2) if(ux0 < 0x800000)
+ faddd %f50,KA1,%f54 ! (Y1_1) dtmp0 += KA1;
+.cont14:
+ fmuld KB2,%f52,%f56 ! (Y1_0) dtmp0 = KB2 * y0;
+ and %g1,MASK_0x7fffffff,%g1 ! (Y1_1) ay0 &= 0x7fffffff;
+ ld [%fp+tmp4],%f1 ! (Y1_1) LOAD exp0
+ faddd %f26,%f48,%f58 ! (Y0_1) yy0 += dtmp0;
+
+ ld [%fp+tmp1],%g5 ! (Y1_0) ind0 = (int) yy0;
+ cmp %g1,%o5 ! (Y1_1) ay0 ? 0x7f800000
+ bge,pn %icc,.update15 ! (Y1_1) if(ay0 >= 0x7f800000)
+ fdtos %f62,%f8 ! (Y0_0) ftmp0 = (float)yy0;
+.cont15:
+ st %f8,[%l7] ! (Y0_0) *pz0 = ftmp0;
+ fmovdg %fcc1,LTHRESHOLD,%f18 ! (Y2_0) yy0 = LTHRESH;
+
+ add %l7,stridez,%l7 ! pz += stridez
+ fmuld KA3,%f28,%f62 ! (Y2_1) dtmp0 = KA3 * y0;
+ and %g5,255,%g5 ! (Y1_0) ind0 &= 255;
+ fstod %f2,%f22 ! (Y1_1) dtmp0 = (double)ftmp0;
+
+ fmuld %f24,%f58,%f58 ! (Y0_1) yy0 *= dtmp0;
+ sll %g5,3,%i2 ! (Y1_0) ind0 <<= 3;
+ faddd %f56,KB1,%f60 ! (Y1_0) dtmp0 += KB1;
+
+ fpackfix %f20,%f20 ! (Y1_0) dtmp1 = vis_fpackfix(dtmp1);
+ fitod %f1,%f48 ! (Y1_1) dtmp1 = (double) exp0;
+ ldd [%l0+%i2],%f56 ! (Y1_0) di0 = *(double*)((char*)__mt_constexp2f + ind0);
+
+ fmuld %f54,%f46,%f54 ! (Y1_1) dtmp0 *= y0;
+ fdtoi %f18,%f2 ! (Y2_0) ind0 = (int) yy0;
+ st %f2,[%fp+tmp1] ! (Y2_0) STORE ind0
+
+ faddd %f62,KA2,%f26 ! (Y2_1) dtmp0 += KA2;
+
+ fmuld %f60,%f52,%f62 ! (Y1_0) yy0 = dtmp0 * y0;
+ add %o2,stridex,%o2 ! px += stridex
+ ldd [%l2+%o0],%f60 ! (Y1_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0);
+ fcmped %fcc0,HTHRESHOLD,%f58 ! (Y0_1) if (yy0 >= HTHRESH)
+
+ fpadd32 %f20,%f56,%f52 ! (Y1_0) di0 = vis_fpadd32(di0,dtmp1);
+ sra %i3,12,%o0 ! (Y0_2) ind0 = i0 >> 12;
+ lda [%o2]0x82,%o3 ! (Y1_2) ax0 = *px;
+
+ faddd %f54,KA0,%f56 ! (Y1_1) dtmp0 += KA0;
+ and %o0,-8,%g5 ! (Y0_2) ind0 &= -8;
+ ld [%fp+tmp2],%f14 ! (Y0_2) dtmp0 = (double) i0;
+
+ fitod %f2,%f54 ! (Y2_0) dtmp0 = (double)ind0;
+ and %o3,MASK_0x7fffffff,%i3 ! (Y1_2) exp0 = ax0 & 0x7fffffff;
+ and %o3,MASK_0x007fffff,%o0 ! (Y1_2) ax0 &= 0x007fffff;
+
+ fmuld %f62,%f52,%f62 ! (Y1_0) yy0 *= di0;
+ cmp %i3,%o5 ! (Y1_2) ax0 ? 0x7f800000
+ add %l2,%g5,%g1 ! (Y0_2) (char*)__mt_constlog4f + ind0
+ faddd %f60,%f48,%f12 ! (Y1_1) yy0 = dtmp0 + dtmp1;
+
+ fmuld %f26,%f28,%f50 ! (Y2_1) dtmp0 *= y0;
+ srl %i3,23,%i3 ! (Y1_2) exp0 >>= 23;
+ add %o0,CONST_0x8000,%i1 ! (Y1_2) i0 = ax0 + 0x8000;
+ fcmped %fcc1,LTHRESHOLD,%f58 ! (Y0_1) if (yy0 <= LTHRESH)
+
+ fmuld %f56,%f46,%f46 ! (Y1_1) dtmp0 *= y0;
+ ldd [%g1+8],%f48 ! (Y0_2) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8);
+ sub %i3,127,%i3 ! (Y1_2) exp0 -= 127;
+ fitod %f14,%f60 ! (Y0_2) dtmp0 = (double) i0;
+
+ sll %i3,8,%i2 ! (Y1_2) exp0 <<= 8;
+ and %i1,%l6,%i1 ! (Y1_2) i0 &= 0xffff0000;
+ st %i2,[%fp+tmp4] ! (Y1_2) STORE exp0
+ fsubd %f18,%f54,%f26 ! (Y2_0) y0 = yy0 - dtmp0;
+
+ sub %o0,%i1,%o0 ! (Y1_2) i0 = ax0 - i0;
+ st %o0,[%fp+tmp5] ! (Y1_2) STORE i0
+ bge,pn %icc,.update16 ! (Y1_2) if(ax0 >= 0x7f800000)
+ faddd %f62,%f52,%f54 ! (Y1_0) yy0 += di0;
+.cont16:
+ lda [stridey+%o4]0x82,%i3 ! Y(2_1) ay0 = *py0
+ cmp %o3,MASK_0x007fffff ! (Y1_2) ux0 ? 0x800000
+ add %o4,stridey,%i2 ! py += stridey;
+ fmovdl %fcc0,HTHRESHOLD,%f58 ! (Y0_1) yy0 = HTHRESH;
+
+ fmuld %f60,%f48,%f48 ! (Y0_2) y0 = dtmp0 * dtmp1;
+ lda [stridey+%o4]0x82,%f16 ! (Y2_1) ftmp0 = *py0;
+ ble,pn %icc,.update17 ! (Y1_2) if(ux0 < 0x800000)
+ faddd %f50,KA1,%f52 ! (Y2_1) dtmp0 += KA1;
+.cont17:
+ fmuld KB2,%f26,%f4 ! (Y2_0) dtmp0 = KB2 * y0;
+ and %i3,MASK_0x7fffffff,%i3 ! (Y2_1) ay0 &= 0x7fffffff
+ ld [%fp+tmp6],%f17 ! (Y2_1) dtmp1 = (double) exp0;
+ faddd %f12,%f46,%f60 ! (Y1_1) yy0 += dtmp0;
+
+ ld [%fp+tmp1],%o0
+ cmp %i3,%o5 ! (Y2_1) ay0 ? 0x7f800000
+ bge,pn %icc,.update18 ! (Y2_1) if(ay0 >= 0x7f800000)
+ fdtos %f54,%f15 ! (Y1_0) ftmp0 = (float)yy0;
+.cont18:
+ st %f15,[%l7] ! (Y1_0) *pz0 = ftmp0;
+ add %l7,stridez,%o4 ! pz += stridez
+ fmovdg %fcc1,LTHRESHOLD,%f58 ! (Y0_1) yy0 = LTHRESH;
+
+ fmuld KA3,%f48,%f62 ! (Y0_2) dtmp0 = KA3 * y0;
+ and %o0,255,%o0 ! (Y2_0) ind0 &= 255;
+ fstod %f16,%f54 ! (Y2_1) dtmp0 = (double)ftmp0;
+
+ fmuld %f22,%f60,%f56 ! (Y1_1) yy0 *= dtmp0;
+ sll %o0,3,%l7 ! (Y2_0) ind0 <<= 3;
+ faddd %f4,KB1,%f60 ! (Y2_0) dtmp0 += KB1;
+
+ fpackfix %f2,%f2 ! (Y2_0) dtmp1 = vis_fpackfix(dtmp1);
+ fitod %f17,%f24 ! (Y2_1) dtmp1 = (double) exp0;
+ ldd [%l0+%l7],%f4 ! (Y2_0) di0 = *(double*)((char*)__mt_constexp2f + ind0);
+
+ fmuld %f52,%f28,%f52 ! (Y2_1) dtmp0 *= y0;
+ fdtoi %f58,%f10 ! (Y0_1) ind0 = (int) yy0;
+
+ st %f10,[%fp+tmp0] ! (Y0_1) STORE ind0
+ faddd %f62,KA2,%f22 ! (Y0_2) dtmp0 += KA2;
+
+ fmuld %f60,%f26,%f62 ! (Y2_0) yy0 = dtmp0 * y0;
+ fcmped %fcc0,HTHRESHOLD,%f56 ! (Y1_1) if (yy0 >= HTHRESH)
+ ldd [%l2+%o1],%f60 ! (Y2_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0);
+
+ sra %i1,12,%o1 ! (Y1_2) ind0 = i0 >> 12;
+ add %o2,stridex,%i3 ! px += stridex
+ lda [stridex+%o2]0x82,%g1 ! (Y2_2) ax0 = *px;
+ fpadd32 %f2,%f4,%f46 ! (Y2_0) di0 = vis_fpadd32(di0,dtmp1);
+
+ and %o1,-8,%o0 ! (Y1_2) ind0 &= -8;
+ add %i2,stridey,%i2 ! py += stridey
+ ld [%fp+tmp5],%f12 ! (Y1_2) LOAD i0
+ faddd %f52,KA0,%f4 ! (Y2_1) dtmp0 += KA0;
+
+ and %g1,MASK_0x7fffffff,%i1 ! (Y2_2) exp0 = ax0 & 0x7fffffff;
+ and %g1,MASK_0x007fffff,%o2 ! (Y2_2) ax0 &= 0x007fffff;
+ lda [%i2]0x82,%f0 ! (Y0_2) ftmp0 = *py0;
+ fitod %f10,%f52 ! (Y0_1) dtmp0 = (double)ind0;
+
+ fmuld %f62,%f46,%f62 ! (Y2_0) yy0 *= di0;
+ srl %i1,23,%o3 ! (Y2_2) exp0 >>= 23;
+ cmp %i1,%o5 ! (Y2_2) ax0 ? 0x7f800000
+ faddd %f60,%f24,%f18 ! (Y2_1) yy0 = dtmp0 + dtmp1;
+
+ fmuld %f22,%f48,%f26 ! (Y0_2) dtmp0 *= y0;
+ add %l2,%o0,%i1 ! (Y1_2) (char*)__mt_constlog4f + ind0
+ sub %o3,127,%l7 ! (Y2_2) exp0 -= 127;
+ fcmped %fcc1,LTHRESHOLD,%f56 ! (Y1_1) if (yy0 <= LTHRESH)
+
+ fmuld %f4,%f28,%f24 ! (Y2_1) dtmp0 *= y0;
+ add %o2,CONST_0x8000,%o1 ! (Y2_2) i0 = ax0 + 0x8000;
+ ldd [%i1+8],%f50 ! (Y1_2) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8);
+ fitod %f12,%f28 ! (Y1_2) dtmp0 = (double) i0;
+
+ sll %l7,8,%l7 ! (Y2_2) exp0 <<= 8;
+ and %o1,%l6,%o1 ! (Y2_2) i0 &= 0xffff0000;
+ st %l7,[%fp+tmp6] ! (Y2_2) STORE exp0
+ fsubd %f58,%f52,%f60 ! (Y0_1) y0 = yy0 - dtmp0;
+
+ sub %o2,%o1,%i1 ! (Y2_2) i0 = ax0 - i0;
+ st %i1,[%fp+tmp2] ! (Y2_2) STORE i0
+ bge,pn %icc,.update19 ! (Y2_2) if(ax0 >= 0x7f800000)
+ faddd %f62,%f46,%f22 ! (Y2_0) yy0 += di0;
+.cont19:
+ lda [%i2]0x82,%o2 ! (Y0_2) ay0 = *(int*)py0;
+ cmp %g1,MASK_0x007fffff ! (Y2_2) ux0 ? 0x800000
+ fmovdl %fcc0,HTHRESHOLD,%f56 ! (Y1_1) yy0 = HTHRESH;
+
+ fmuld %f28,%f50,%f46 ! (Y1_2) y0 = dtmp0 * dtmp1;
+ ble,pn %icc,.update20 ! (Y2_2) if(ux0 < 0x800000)
+ faddd %f26,KA1,%f50 ! (Y0_2) dtmp0 += KA1;
+.cont20:
+ fmuld KB2,%f60,%f62 ! (Y0_1) dtmp0 = KB2 * y0;
+ and %o2,MASK_0x7fffffff,%o2 ! (Y0_2) ay0 &= 0x7fffffff
+ ld [%fp+tmp3],%f4 ! (Y0_2) dtmp1 = (double) exp0;
+ faddd %f18,%f24,%f52 ! (Y2_1) yy0 += dtmp0;
+
+ ld [%fp+tmp0],%g1 ! (Y0_1) LAOD ind0
+ cmp %o2,%o5 ! (Y0_2) ay0 ? 0x7f800000
+ bge,pn %icc,.update21 ! (Y0_2) if( ay0 >= 0x7f800000)
+ fdtos %f22,%f12 ! (Y2_0) ftmp0 = (float)yy0;
+.cont21:
+ st %f12,[%o4] ! (Y2_0) *pz0 = ftmp0;
+ cmp counter,6 ! counter
+ bge,pt %icc,.main_loop
+ fstod %f0,%f24 ! (Y0_2) dtmp0 = (double)ftmp0;
+
+.tail:
+ subcc counter,1,counter
+ bneg,pn %icc,.begin
+ add %o4,stridez,%i5
+
+ fmuld KA3,%f46,%f28 ! (Y1_1) dtmp0 = KA3 * y0;
+ and %g1,255,%o2 ! (Y0_0) ind0 &= 255;
+ fmovdg %fcc1,LTHRESHOLD,%f56 ! (Y1_0) yy0 = LTHRESH;
+
+ fmuld %f54,%f52,%f18 ! (Y2_0) yy0 *= dtmp0;
+ sll %o2,3,%i1 ! (Y0_0) ind0 <<= 3;
+ add %o4,stridez,%l7 ! pz += stridez
+ faddd %f62,KB1,%f62 ! (Y0_0) dtmp0 += KB1;
+
+ fpackfix %f10,%f10 ! (Y0_0) dtmp1 = vis_fpackfix(dtmp1);
+ fitod %f4,%f26 ! (Y0_1) dtmp1 = (double) exp0;
+ ldd [%l0+%i1],%f58 ! (Y0_0) di0 = *(double*)((char*)__mt_constexp2f + ind0);
+
+ fmuld %f50,%f48,%f50 ! (Y0_1) dtmp0 *= y0;
+ fdtoi %f56,%f20 ! (Y1_0) ind0 = (int) yy0;
+ st %f20,[%fp+tmp1] ! (Y1_0) STORE ind0
+
+ faddd %f28,KA2,%f28 ! (Y1_1) dtmp0 += KA2;
+
+ fmuld %f62,%f60,%f62 ! (Y0_0) yy0 = dtmp0 * y0;
+ ldd [%l2+%g5],%f60 ! (Y0_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0);
+ fcmped %fcc0,HTHRESHOLD,%f18 ! (Y2_0) if (yy0 >= HTHRESH)
+
+ fpadd32 %f10,%f58,%f22 ! (Y0_0) di0 = vis_fpadd32(di0,dtmp1);
+
+ faddd %f50,KA0,%f58 ! (Y0_1) dtmp0 += KA0;
+
+ fitod %f20,%f52 ! (Y1_0) dtmp0 = (double)ind0;
+
+ fmuld %f62,%f22,%f62 ! (Y0_0) yy0 *= di0;
+ faddd %f60,%f26,%f26 ! (Y0_1) yy0 = dtmp0 + dtmp1;
+
+ fmuld %f28,%f46,%f50 ! (Y1_1) dtmp0 *= y0;
+ fcmped %fcc1,LTHRESHOLD,%f18 ! (Y2_0) if (yy0 <= LTHRESH)
+
+ fmuld %f58,%f48,%f48 ! (Y0_1) dtmp0 *= y0;
+
+ fsubd %f56,%f52,%f52 ! (Y1_0) y0 = yy0 - dtmp0;
+
+ faddd %f62,%f22,%f62 ! (Y0_0) yy0 += di0;
+
+ lda [stridey+%i2]0x82,%g1 ! (Y1_1) ay0 = *(unsigned*)py0
+ add %i2,stridey,%o4 ! py += stridey
+ fmovdl %fcc0,HTHRESHOLD,%f18 ! (Y2_0) yy0 = HTHRESH;
+
+ lda [stridey+%i2]0x82,%f2 ! (Y1_1) ftmp0 = *py0;
+ faddd %f50,KA1,%f54 ! (Y1_1) dtmp0 += KA1;
+
+ fmuld KB2,%f52,%f56 ! (Y1_0) dtmp0 = KB2 * y0;
+ and %g1,MASK_0x7fffffff,%g1 ! (Y1_1) ay0 &= 0x7fffffff;
+ ld [%fp+tmp4],%f1 ! (Y1_1) LOAD exp0
+ faddd %f26,%f48,%f58 ! (Y0_1) yy0 += dtmp0;
+
+ ld [%fp+tmp1],%g5 ! (Y1_0) ind0 = (int) yy0;
+ cmp %g1,%o5 ! (Y1_1) ay0 ? 0x7f800000
+ bge,pn %icc,.update22 ! (Y1_1) if(ay0 >= 0x7f800000)
+ fdtos %f62,%f8 ! (Y0_0) ftmp0 = (float)yy0;
+.cont22:
+ st %f8,[%l7] ! (Y0_0) *pz0 = ftmp0;
+ fmovdg %fcc1,LTHRESHOLD,%f18 ! (Y2_0) yy0 = LTHRESH;
+
+ subcc counter,1,counter
+ bneg,pn %icc,.begin
+ add %l7,stridez,%i5
+
+ add %l7,stridez,%l7 ! pz += stridez
+ and %g5,255,%g5 ! (Y1_0) ind0 &= 255;
+ fstod %f2,%f22 ! (Y1_1) dtmp0 = (double)ftmp0;
+
+ fmuld %f24,%f58,%f58 ! (Y0_1) yy0 *= dtmp0;
+ sll %g5,3,%i2 ! (Y1_0) ind0 <<= 3;
+ faddd %f56,KB1,%f60 ! (Y1_0) dtmp0 += KB1;
+
+ fpackfix %f20,%f20 ! (Y1_0) dtmp1 = vis_fpackfix(dtmp1);
+ fitod %f1,%f48 ! (Y1_1) dtmp1 = (double) exp0;
+ ldd [%l0+%i2],%f56 ! (Y1_0) di0 = *(double*)((char*)__mt_constexp2f + ind0);
+
+ fmuld %f54,%f46,%f54 ! (Y1_1) dtmp0 *= y0;
+ fdtoi %f18,%f2 ! (Y2_0) ind0 = (int) yy0;
+ st %f2,[%fp+tmp1] ! (Y2_0) STORE ind0
+
+
+ fmuld %f60,%f52,%f62 ! (Y1_0) yy0 = dtmp0 * y0;
+ ldd [%l2+%o0],%f60 ! (Y1_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0);
+ fcmped %fcc0,HTHRESHOLD,%f58 ! (Y0_1) if (yy0 >= HTHRESH)
+
+ fpadd32 %f20,%f56,%f52 ! (Y1_0) di0 = vis_fpadd32(di0,dtmp1);
+
+ faddd %f54,KA0,%f56 ! (Y1_1) dtmp0 += KA0;
+
+ fitod %f2,%f54 ! (Y2_0) dtmp0 = (double)ind0;
+
+ fmuld %f62,%f52,%f62 ! (Y1_0) yy0 *= di0;
+ faddd %f60,%f48,%f12 ! (Y1_1) yy0 = dtmp0 + dtmp1;
+
+ fcmped %fcc1,LTHRESHOLD,%f58 ! (Y0_1) if (yy0 <= LTHRESH)
+
+ fmuld %f56,%f46,%f46 ! (Y1_1) dtmp0 *= y0;
+
+ fsubd %f18,%f54,%f26 ! (Y2_0) y0 = yy0 - dtmp0;
+
+ faddd %f62,%f52,%f54 ! (Y1_0) yy0 += di0;
+
+ fmovdl %fcc0,HTHRESHOLD,%f58 ! (Y0_1) yy0 = HTHRESH;
+
+
+ fmuld KB2,%f26,%f4 ! (Y2_0) dtmp0 = KB2 * y0;
+ faddd %f12,%f46,%f60 ! (Y1_1) yy0 += dtmp0;
+
+ ld [%fp+tmp1],%o0
+ fdtos %f54,%f15 ! (Y1_0) ftmp0 = (float)yy0;
+
+ st %f15,[%l7] ! (Y1_0) *pz0 = ftmp0;
+ add %l7,stridez,%o4 ! pz += stridez
+ fmovdg %fcc1,LTHRESHOLD,%f58 ! (Y0_1) yy0 = LTHRESH;
+
+ subcc counter,1,counter
+ bneg,pn %icc,.begin
+ or %g0,%o4,%i5
+
+ and %o0,255,%o0 ! (Y2_0) ind0 &= 255;
+
+ fmuld %f22,%f60,%f56 ! (Y1_1) yy0 *= dtmp0;
+ sll %o0,3,%l7 ! (Y2_0) ind0 <<= 3;
+ faddd %f4,KB1,%f60 ! (Y2_0) dtmp0 += KB1;
+
+ fpackfix %f2,%f2 ! (Y2_0) dtmp1 = vis_fpackfix(dtmp1);
+ ldd [%l0+%l7],%f4 ! (Y2_0) di0 = *(double*)((char*)__mt_constexp2f + ind0);
+
+ fdtoi %f58,%f10 ! (Y0_1) ind0 = (int) yy0;
+
+ st %f10,[%fp+tmp0] ! (Y0_1) STORE ind0
+
+ fmuld %f60,%f26,%f62 ! (Y2_0) yy0 = dtmp0 * y0;
+ fcmped %fcc0,HTHRESHOLD,%f56 ! (Y1_1) if (yy0 >= HTHRESH)
+
+ fpadd32 %f2,%f4,%f46 ! (Y2_0) di0 = vis_fpadd32(di0,dtmp1);
+
+ add %i2,stridey,%i2 ! py += stridey
+
+ fitod %f10,%f52 ! (Y0_1) dtmp0 = (double)ind0;
+
+ fmuld %f62,%f46,%f62 ! (Y2_0) yy0 *= di0;
+
+ fcmped %fcc1,LTHRESHOLD,%f56 ! (Y1_1) if (yy0 <= LTHRESH)
+
+
+ fsubd %f58,%f52,%f60 ! (Y0_1) y0 = yy0 - dtmp0;
+
+ faddd %f62,%f46,%f22 ! (Y2_0) yy0 += di0;
+
+ fmovdl %fcc0,HTHRESHOLD,%f56 ! (Y1_1) yy0 = HTHRESH;
+
+ fmuld KB2,%f60,%f62 ! (Y0_1) dtmp0 = KB2 * y0;
+
+ ld [%fp+tmp0],%g1 ! (Y0_1) LAOD ind0
+ fdtos %f22,%f12 ! (Y2_0) ftmp0 = (float)yy0;
+
+ st %f12,[%o4] ! (Y2_0) *pz0 = ftmp0;
+
+ subcc counter,1,counter
+ bneg,pn %icc,.begin
+ add %o4,stridez,%i5
+
+ and %g1,255,%o2 ! (Y0_0) ind0 &= 255;
+ fmovdg %fcc1,LTHRESHOLD,%f56 ! (Y1_0) yy0 = LTHRESH;
+
+ sll %o2,3,%i1 ! (Y0_0) ind0 <<= 3;
+ add %o4,stridez,%l7 ! pz += stridez
+ faddd %f62,KB1,%f62 ! (Y0_0) dtmp0 += KB1;
+
+ fpackfix %f10,%f10 ! (Y0_0) dtmp1 = vis_fpackfix(dtmp1);
+ ldd [%l0+%i1],%f58 ! (Y0_0) di0 = *(double*)((char*)__mt_constexp2f + ind0);
+
+ fdtoi %f56,%f20 ! (Y1_0) ind0 = (int) yy0;
+ st %f20,[%fp+tmp1] ! (Y1_0) STORE ind0
+
+ fmuld %f62,%f60,%f62 ! (Y0_0) yy0 = dtmp0 * y0;
+
+ fpadd32 %f10,%f58,%f22 ! (Y0_0) di0 = vis_fpadd32(di0,dtmp1);
+
+ fitod %f20,%f52 ! (Y1_0) dtmp0 = (double)ind0;
+
+ fmuld %f62,%f22,%f62 ! (Y0_0) yy0 *= di0;
+
+ fsubd %f56,%f52,%f52 ! (Y1_0) y0 = yy0 - dtmp0;
+
+ faddd %f62,%f22,%f62 ! (Y0_0) yy0 += di0;
+
+ fmuld KB2,%f52,%f56 ! (Y1_0) dtmp0 = KB2 * y0;
+
+ ld [%fp+tmp1],%g5 ! (Y1_0) ind0 = (int) yy0;
+ fdtos %f62,%f8 ! (Y0_0) ftmp0 = (float)yy0;
+ st %f8,[%l7] ! (Y0_0) *pz0 = ftmp0;
+
+ subcc counter,1,counter
+ bneg .begin
+ add %l7,stridez,%i5
+
+ add %l7,stridez,%l7 ! pz += stridez
+ and %g5,255,%g5 ! (Y1_0) ind0 &= 255;
+
+ sll %g5,3,%i2 ! (Y1_0) ind0 <<= 3;
+ faddd %f56,KB1,%f60 ! (Y1_0) dtmp0 += KB1;
+
+ fpackfix %f20,%f20 ! (Y1_0) dtmp1 = vis_fpackfix(dtmp1);
+ ldd [%l0+%i2],%f56 ! (Y1_0) di0 = *(double*)((char*)__mt_constexp2f + ind0);
+
+ fmuld %f60,%f52,%f62 ! (Y1_0) yy0 = dtmp0 * y0;
+
+ fpadd32 %f20,%f56,%f52 ! (Y1_0) di0 = vis_fpadd32(di0,dtmp1);
+
+ fmuld %f62,%f52,%f62 ! (Y1_0) yy0 *= di0;
+
+ faddd %f62,%f52,%f54 ! (Y1_0) yy0 += di0;
+
+ fdtos %f54,%f15 ! (Y1_0) ftmp0 = (float)yy0;
+
+ st %f15,[%l7] ! (Y1_0) *pz0 = ftmp0;
+ ba .begin
+ add %l7,stridez,%i5 ! pz += stridez
+
+.exit:
+ ret
+ restore
+
+ .align 16
+.specs_exit:
+ add %i1,stridex,%o2
+ add %i3,stridey,%i2
+ st %f4,[%i5]
+
+ sub counter,1,counter
+ ba .begin1
+ add %i5,stridez,%i5
+
+.spec1:
+ ld [%l0+2048+64],%f0 ! LOAD 1.0f
+ or %g0,%i1,%o1
+ or %g0,%i3,%o3
+
+ ld [%o2],%f4 ! *px
+ or %g0,%o2,%i1
+ or %g0,%i2,%i3
+
+ ld [%i3],%f6 ! *py
+ or %g0,%l7,%o2
+ fsubs %f0,%f0,%f5 ! 0.0f
+
+ sethi %hi(0x7f800000),%l6
+ cmp %o4,0 ! ay ? 0
+ be,a,pn %icc,.specs_exit ! if(ay == 0)
+ fmovs %f0,%f4 ! return 1.0f
+
+ cmp %o3,%l6 ! ax0 ? 0x7f800000
+ bgu,a %icc,.specs_exit ! ax0 > 0x7f800000
+ fmuls %f4,%f6,%f4 ! return *px * *py; /* |X| or |Y| = Nan */
+
+ cmp %o4,%l6 ! ay ? 0x7f800000
+ bgu,a .specs_exit ! ay > 0x7f800000
+ fmuls %f4,%f6,%f4 ! return *px * *py; /* |X| or |Y| = Nan */
+
+ sethi %hi(0x3f800000),%o5
+ bne,a %icc,1f ! if (ay != 0x7f800000) { /* |Y| = Inf */
+ srl %o1,31,%o1 ! sx = ux >> 31
+
+ cmp %o3,%o5 ! ax0 ? 0x3f800000
+ be,a .specs_exit ! if (ax0 == 0x3f800000)
+ fmuls %f6,%f5,%f4 ! return *py * 0.0f; /* +-1 ** +-Inf = NaN */
+
+ sub %o3,%o5,%o3 ! ax0 - 0x3f800000
+ srl %o2,31,%o2 ! uy >> 31
+
+ srlx %o3,63,%o3 ! (ax0 - 0x3f800000) << 63
+
+ cmp %o3,%o2 ! ((ax0 - 0x3f800000) << 63) ? (uy >> 31)
+ bne,a .specs_exit
+ fzeros %f4 ! return 0.f;
+
+ ba .specs_exit
+ fabss %f6,%f4 ! return fabss(*py)
+1:
+ cmp %o1,0 ! sx ? 0
+ be,pn %icc,.spec1_exit ! if (sx == 0)
+ or %g0,%g0,%o5 ! yisint0 = 0;
+
+ srl %o4,23,%l7 ! exp = ay >> 23;
+ cmp %l7,0x97 ! exp ? 0x97
+ bge,a,pn %icc,.spec1_exit ! if (exp >= 0x97) /* |Y| >= 2^24 */
+ add %g0,2,%o5 ! yisint = 2;
+
+ cmp %l7,0x7f ! exp ? 0x7f
+ bl,pn %icc,.spec1_exit ! if (exp < 0x7f)
+ sub %g0,%l7,%l7 ! exp = -exp;
+
+ add %l7,(0x7f + 23),%l7 ! exp += (0x07f + 23);
+ srl %o4,%l7,%l6 ! i0 = ay >> exp
+ sll %l6,%l7,%l7 ! i0 << exp
+
+ cmp %l7,%o4 ! (i0 << exp) ? ay
+ bne,pn %icc,.spec1_exit ! if((i0 << exp) != ay)
+ and %l6,1,%l6 ! i0 &= 1
+
+ sub %g0,%l6,%l6 ! i0 = -i0;
+ add %l6,2,%o5 ! yisint0 = 2 + i0;
+
+.spec1_exit:
+ srl %o2,31,%o2 ! uy >> 31
+ cmp %o2,0 ! (uy >> 31) ? 0
+ movne %icc,%g0,%o3 ! if (uy >> 31) ax0 = 0;
+
+ sll %o5,31,%o5 ! yisint0 <<= 31;
+ add %o5,%o3,%o5 ! ax0 += yisint0;
+
+ add %i1,stridex,%o2 ! px += stridex;
+ add %i3,stridey,%i2 ! py += stridey;
+ st %o5,[%i5] ! return *(float*)&ax0;
+
+ sub counter,1,counter ! counter--;
+ ba .begin1
+ add %i5,stridez,%i5 ! pz += stridez;
+
+.spec2:
+ or %g0,%i1,%o1
+ or %g0,%i3,%o3
+ ld [%l0+2048+64],%f0 ! LOAD 1.0f
+ or %g0,%o2,%i1
+ or %g0,%i2,%i3
+
+ or %g0,%l7,%o2
+ cmp %o4,0 ! ay ? 0
+ be,a,pn %icc,.specs_exit ! if(ay == 0)
+ fmovs %f0,%f4 ! return 1.0f
+
+ srl %o3,23,%l7 ! exp0 = (ax0 >> 23);
+ sub %l7,127,%l7 ! exp = exp0 = exp0 - 127;
+
+ or %g0,%g0,%o5 ! yisint = 0;
+ cmp %o3,MASK_0x007fffff ! (int)ax0 ? 0x00800000
+ bg,pn %icc,1f ! if ((int)ax0 >= 0x00800000)
+ nop
+
+ ! X = denormal or negative
+ st %o3,[%fp+tmp0] ! *((float*) &ax0) = (float) (int)ax0;
+ ld [%fp+tmp0],%f4
+ fitos %f4,%f4
+ st %f4,[%fp+tmp0]
+ ld [%fp+tmp0],%o3
+
+ srl %o3,23,%l7 ! exp = (ax0 >> 23)
+ sub %l7,127+149,%l7 ! exp -= (127+149)
+1:
+ cmp %o1,0 ! ux ? 0
+ bg,a %icc,.spec_proc ! if((int)ux > 0)
+ sethi %hi(0xffff0000),%l6
+
+ srl %o4,23,%o0 ! exp = ay >> 23;
+ cmp %o0,0x97 ! exp ? 0x97
+ bge,a,pn %icc,2f ! if (exp >= 0x97) /* |Y| >= 2^24 */
+ add %g0,2,%o5 ! yisint0 = 2; /* Y - even */
+
+ cmp %o0,0x7f ! exp ? 0x7f
+ bl,pn %icc,2f ! if(exp < 0x7f)
+ nop
+
+ sub %g0,%o0,%o0 ! exp = -exp;
+ add %o0,(0x7f + 23),%o0 ! exp += (0x7f + 23)
+ srl %o4,%o0,%l6 ! i0 = ay >> ((0x7f + 23) - exp);
+ sll %l6,%o0,%o0 ! i0 << ((0x7f + 23) - exp
+ cmp %o0,%o4 ! (i0 << ((0x7f + 23) - exp)) ? ay
+ bne,pn %icc,2f ! if(i0 << ((0x7f + 23) - exp)) != ay)
+ nop
+
+ and %l6,1,%l6 ! i0 &= 1;
+ sub %g0,%l6,%l6 ! i0 = -i0;
+ add %l6,2,%o5 ! yisint = i0 + 2;
+2:
+ cmp %o3,0 ! ax0 ? 0
+ bne,pn %icc,4f ! if(ax0 != 0)
+ nop
+
+ srl %o1,31,%o1 ! sx = ux >> 31
+ srl %o2,31,%o2 ! uy >> 31
+
+ cmp %o2,0 ! (uy >> 31) ? 0
+ be,a,pn %icc,3f ! if((uy >> 31) == 0)
+ fzeros %f4 ! return ZERO
+
+ fdivs %f0,%f3,%f4 ! fy = ONE/ZERO
+3:
+ andcc %o1,%o5,%g0 ! sx & yisint0
+ be,pn %icc,.specs_exit ! if( (sx & yisint0) == 0 )
+ nop
+
+ ba .specs_exit
+ fnegs %f4,%f4 ! fy = -fy;
+4:
+ cmp %o5,0 ! ysisint0 ? 0
+ be,a %icc,.specs_exit ! if(yisint0 == 0)
+ fdivs %f3,%f3,%f4 ! return ZERO/ZERO
+
+ sethi %hi(0xffff0000),%l6
+
+.spec_proc:
+ sll %l7,8,%l7 ! exp0 = exp0 << 8;
+ st %l7,[%fp+tmp1] ! STORE exp0
+ and %o3,MASK_0x007fffff,%g5 ! ax0 &= 0x007fffff;
+ ld [%i3],%f14 ! ftmp0 = py[0]
+ sllx %o5,63,%o5 ! ysisint0 <<= 63;
+ add %g5,CONST_0x8000,%o3 ! i0 = ax0 + 0x8000;
+ stx %o5,[%fp+tmp5] ! STORE yisint0
+ and %o3,%l6,%l7 ! i0 &= 0xffff0000;
+ sub %g5,%l7,%o1 ! i0 = ax0 - i0;
+ sra %l7,12,%g5 ! ind0 = i0 >> 12;
+ st %o1,[%fp+tmp2] ! STORE i0
+ fstod %f14,%f54 ! dtmp1 = (double)ftmp0
+ and %g5,-8,%g5 ! ind0 &= -8;
+ add %l2,%g5,%l7 ! (char*)__mt_constlog4f + ind0
+ ld [%fp+tmp1],%f18 ! LOAD exp0
+ ld [%fp+tmp2],%f16 ! LOAD i0
+ ldd [%l7+8],%f62 ! dtmp2 = *(double *)((char*)__mt_constlog4f + ind0 + 8);
+ ldd [%l2+%g5],%f56 ! dtmp3 = *(double *)((char*)__mt_constlog4f + ind0);
+ fitod %f18,%f58 ! dtmp4 = (double)exp0
+ fitod %f16,%f60 ! dtmp5 = (double)i0
+ fmuld %f60,%f62,%f60 ! y0 = dtmp5 * dtmp2;
+ faddd %f56,%f58,%f58 ! yy0 = dtmp3 + dtmp4;
+ fmuld KA3,%f60,%f52 ! dtmp0 = KA3 * y0;
+ faddd %f52,KA2,%f50 ! dtmp0 += KA2;
+ fmuld %f50,%f60,%f48 ! dtmp0 *= y0;
+ faddd %f48,KA1,%f46 ! dtmp0 += KA1;
+ fmuld %f46,%f60,%f62 ! dtmp0 *= y0;
+ ldd [%fp+tmp5],%f24 ! LOAD yisint0
+ faddd %f62,KA0,%f56 ! dtmp0 += KA0;
+ fmuld %f56,%f60,%f52 ! dtmp0 *= y0;
+ faddd %f58,%f52,%f50 ! yy0 += dtmp1;
+ fmuld %f54,%f50,%f52 ! yy0 *= dtmp1;
+ fcmped %fcc0,HTHRESHOLD,%f52 ! if (yy0 >= HTHRESH)
+ fcmped %fcc1,LTHRESHOLD,%f52 ! yy0 = HTHRESH;
+ fmovdl %fcc0,HTHRESHOLD,%f52 ! if (yy0 <= LTHRESH)
+ fmovdg %fcc1,LTHRESHOLD,%f52 ! yy0 = LTHRESH;
+ fdtoi %f52,%f20 ! ind0 = (int) yy0;
+ st %f20,[%fp+tmp3] ! STORE ind0
+ fitod %f20,%f58 ! dtmp0 = (double) ind0;
+ fpackfix %f20,%f20 ! dtmp1 = vis_fpackfix(dtmp1)
+ ld [%fp+tmp3],%g1 ! LOAD ind0
+ fsubd %f52,%f58,%f46 ! y0 = yy0 - dtmp0;
+ fpadd32 %f20,%f24,%f56 ! dtmp1 += yisint0
+ and %g1,255,%o4 ! ind0 &= 255;
+ sll %o4,3,%o3 ! ind0 <<= 3;
+ ldd [%l0+%o3],%f54 ! di0 = *(double*)((char*)__mt_constexp2f + ind0);
+ fmuld KB2,%f46,%f48 ! dtmp0 = KB2 * y0;
+ fpadd32 %f56,%f54,%f56 ! di0 = vis_fpadd32(di0,dtmp1);
+ faddd %f48,KB1,%f62 ! dtmp0 += KB1;
+ fmuld %f62,%f46,%f60 ! yy0 = dtmp0 * y0;
+ fmuld %f60,%f56,%f52 ! yy0 *= di0;
+ faddd %f52,%f56,%f58 ! yy0 += di0;
+ ba .specs_exit
+ fdtos %f58,%f4 ! ftmp0 = (float)yy0;
+
+ .align 16
+.update0:
+ cmp counter,1
+ ble .cont0
+ nop
+
+ add %i2,stridey,%o1
+ stx %o2,[%fp+tmp_px]
+
+ stx %o1,[%fp+tmp_py]
+ sub counter,1,counter
+
+ st counter,[%fp+tmp_counter]
+ ba .cont0
+ or %g0,1,counter
+
+ .align 16
+.update1:
+ cmp counter,1
+ ble .cont1
+ nop
+
+ add %i2,stridey,%o1
+ stx %o2,[%fp+tmp_px]
+
+ stx %o1,[%fp+tmp_py]
+ sub counter,1,counter
+
+ st counter,[%fp+tmp_counter]
+ ba .cont1
+ or %g0,1,counter
+
+ .align 16
+.update2:
+ cmp counter,2
+ ble .cont2
+ nop
+
+ add %i2,stridey,%o2
+ stx %i3,[%fp+tmp_px]
+
+ add %o2,stridey,%o2
+ stx %o2,[%fp+tmp_py]
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont2
+ or %g0,2,counter
+
+ .align 16
+.update3:
+ cmp counter,2
+ ble .cont3
+ nop
+
+ add %i2,stridey,%o2
+ stx %i3,[%fp+tmp_px]
+
+ add %o2,stridey,%o2
+ stx %o2,[%fp+tmp_py]
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont3
+ or %g0,2,counter
+
+ .align 16
+.update4:
+ cmp counter,3
+ ble .cont4
+ nop
+
+ sll stridey,1,%g5
+ add %i2,stridey,%o3
+ stx %o2,[%fp+tmp_px]
+
+ add %o3,%g5,%o3
+ stx %o3,[%fp+tmp_py]
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont4
+ or %g0,3,counter
+
+ .align 16
+.update5:
+ cmp counter,3
+ ble .cont5
+ nop
+
+ sll stridey,1,%g5
+ add %i2,stridey,%o3
+ stx %o2,[%fp+tmp_px]
+
+ add %o3,%g5,%o3
+ stx %o3,[%fp+tmp_py]
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont5
+ or %g0,3,counter
+
+ .align 16
+.update6:
+ fzeros %f2
+ cmp counter,1
+ ble .cont6
+ nop
+
+ ld [%fp+tmp_counter],%g1
+
+ sub %o2,stridex,%o3
+ stx %o4,[%fp+tmp_py]
+
+ sub %o3,stridex,%o3
+ add %g1,counter,counter
+ stx %o3,[%fp+tmp_px]
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont6
+ or %g0,1,counter
+
+ .align 16
+.update7:
+ cmp counter,4
+ ble .cont7
+ nop
+
+ sll stridey,1,%g1
+ add %o4,stridey,%o0
+ stx %o2,[%fp+tmp_px]
+
+ add %o0,%g1,%o0
+ stx %o0,[%fp+tmp_py]
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont7
+ or %g0,4,counter
+
+ .align 16
+.update8:
+ cmp counter,4
+ ble .cont8
+ nop
+
+ sll stridey,1,%g1
+ add %o4,stridey,%o0
+ stx %o2,[%fp+tmp_px]
+
+ add %o0,%g1,%o0
+ stx %o0,[%fp+tmp_py]
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont8
+ or %g0,4,counter
+
+ .align 16
+.update9:
+ cmp counter,2
+ ble .cont9
+ fzeros %f16
+
+ ld [%fp+tmp_counter],%i3
+
+ sub %o2,stridex,%g1
+ stx %i2,[%fp+tmp_py]
+
+ sub %g1,stridex,%g1
+ add %i3,counter,counter
+ stx %g1,[%fp+tmp_px]
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont9
+ or %g0,2,counter
+
+ .align 16
+.update10:
+ cmp counter,5
+ ble .cont10
+ nop
+
+ add %i2,stridey,%i1
+ stx %i3,[%fp+tmp_px]
+
+ add %i1,stridey,%i1
+ stx %i1,[%fp+tmp_py]
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont10
+ or %g0,5,counter
+
+ .align 16
+.update11:
+ cmp counter,5
+ ble .cont11
+ nop
+
+ add %i2,stridey,%i1
+ stx %i3,[%fp+tmp_px]
+
+ add %i1,stridey,%i1
+ stx %i1,[%fp+tmp_py]
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont11
+ or %g0,5,counter
+
+ .align 16
+.update12:
+ fzeros %f0
+ cmp counter,3
+ ble .cont12
+ nop
+
+ ld [%fp+tmp_counter],%o2
+
+ sub %i3,stridex,%i1
+ stx %i2,[%fp+tmp_py]
+
+ sub %i1,stridex,%i1
+ add %o2,counter,counter
+ stx %i1,[%fp+tmp_px]
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont12
+ or %g0,3,counter
+
+ .align 16
+.update13:
+ cmp counter,3
+ ble .cont13
+ nop
+
+ sll stridey,1,%g5
+ add %i2,stridey,%o3
+ stx %o2,[%fp+tmp_px]
+
+ add %o3,%g5,%o3
+ stx %o3,[%fp+tmp_py]
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont13
+ or %g0,3,counter
+
+ .align 16
+.update14:
+ cmp counter,3
+ ble .cont14
+ nop
+
+ sll stridey,1,%g5
+ add %i2,stridey,%o3
+ stx %o2,[%fp+tmp_px]
+
+ add %o3,%g5,%o3
+ stx %o3,[%fp+tmp_py]
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont14
+ or %g0,3,counter
+
+ .align 16
+.update15:
+ cmp counter,1
+ ble .cont15
+ fzeros %f2
+
+ ld [%fp+tmp_counter],%g1
+
+ sub %o2,stridex,%o3
+ stx %o4,[%fp+tmp_py]
+
+ sub %o3,stridex,%o3
+ add %g1,counter,counter
+ stx %o3,[%fp+tmp_px]
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont15
+ or %g0,1,counter
+
+ .align 16
+.update16:
+ cmp counter,4
+ ble .cont16
+ nop
+
+ sll stridey,1,%g1
+ add %o4,stridey,%o0
+ stx %o2,[%fp+tmp_px]
+
+ add %o0,%g1,%o0
+ stx %o0,[%fp+tmp_py]
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont16
+ or %g0,4,counter
+
+ .align 16
+.update17:
+ cmp counter,4
+ ble .cont17
+ nop
+
+ sll stridey,1,%g1
+ add %o4,stridey,%o0
+ stx %o2,[%fp+tmp_px]
+
+ add %o0,%g1,%o0
+ stx %o0,[%fp+tmp_py]
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont17
+ or %g0,4,counter
+
+ .align 16
+.update18:
+ fzeros %f16
+ cmp counter,2
+ ble .cont18
+ nop
+
+ ld [%fp+tmp_counter],%i3
+
+ sub %o2,stridex,%g1
+ stx %i2,[%fp+tmp_py]
+
+ sub %g1,stridex,%g1
+ add %i3,counter,counter
+ stx %g1,[%fp+tmp_px]
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont18
+ or %g0,2,counter
+
+ .align 16
+.update19:
+ cmp counter,5
+ ble .cont19
+ nop
+
+ add %i2,stridey,%i1
+ stx %i3,[%fp+tmp_px]
+
+ add %i1,stridey,%i1
+ stx %i1,[%fp+tmp_py]
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont19
+ or %g0,5,counter
+
+ .align 16
+.update20:
+ cmp counter,5
+ ble .cont20
+ nop
+
+ add %i2,stridey,%i1
+ stx %i3,[%fp+tmp_px]
+
+ add %i1,stridey,%i1
+ stx %i1,[%fp+tmp_py]
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont20
+ or %g0,5,counter
+
+ .align 16
+.update21:
+ cmp counter,3
+ ble .cont21
+ fzeros %f0
+
+ ld [%fp+tmp_counter],%o2
+
+ sub %i3,stridex,%i1
+ stx %i2,[%fp+tmp_py]
+
+ sub %i1,stridex,%i1
+ add %o2,counter,counter
+ stx %i1,[%fp+tmp_px]
+
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont21
+ or %g0,3,counter
+
+ .align 16
+.update22:
+ cmp counter,3
+ ble .cont22
+ fzeros %f2
+
+ ld [%fp+tmp_counter],%g1
+
+ sub %i3,stridex,%i2
+ stx %i2,[%fp+tmp_px]
+
+ add %g1,counter,counter
+ stx %o4,[%fp+tmp_py]
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+ ba .cont22
+ or %g0,3,counter
+
+.stridex_zero:
+ ld [%fp+tmp_counter],counter
+
+ stx %i3,[%fp+tmp_py]
+
+ cmp counter,0
+ ble,pn %icc,.exit
+ lda [%i1]0x82,%i1 ! (Y0_2) ax0 = *px;
+
+ and %i1,MASK_0x7fffffff,%i3 ! (Y0_2) exp0 = ax0 & 0x7fffffff;
+ sub %i3,%l6,%l6
+ and %i1,MASK_0x007fffff,%g5 ! (Y0_2) ax0 &= 0x007fffff;
+ srl %i3,23,%o3 ! (Y0_2) exp0 >>= 23;
+ srl %l6,31,%l6
+ st %l6,[%fp+tmp5]
+ add %g5,CONST_0x8000,%i3 ! (Y0_2) i0 = ax0 + 0x8000;
+ sethi %hi(0xffff0000),%l6
+ sub %o3,127,%o3 ! (Y0_2) exp0 -= 127;
+ and %i3,%l6,%i3 ! (Y0_2) i0 &= 0xffff0000;
+ sll %o3,8,%o4 ! (Y0_2) exp0 <<= 8;
+ st %o4,[%fp+tmp3] ! (Y0_2) STORE exp0
+ sra %i3,12,%o0 ! (Y0_2) ind0 = i0 >> 12;
+ sub %g5,%i3,%o4 ! (Y0_2) i0 = ax0 - i0;
+ st %o4,[%fp+tmp2] ! (Y0_2) STORE i0
+ and %o0,-8,%g5 ! (Y0_2) ind0 &= -8;
+ ld [%fp+tmp2],%f14 ! (Y0_2) dtmp0 = (double) i0;
+ add %l2,%g5,%g1 ! (Y0_2) (char*)__mt_constlog4f + ind0
+ ldd [%g1+8],%f48 ! (Y0_2) dtmp1 = *(double *)((char*)__mt_constlog4f + ind0 + 8);
+ fitod %f14,%f60 ! (Y0_2) dtmp0 = (double) i0;
+ fmuld %f60,%f48,%f48 ! (Y0_2) y0 = dtmp0 * dtmp1;
+ fmuld KA3,%f48,%f62 ! (Y0_2) dtmp0 = KA3 * y0;
+ faddd %f62,KA2,%f22 ! (Y0_2) dtmp0 += KA2;
+ fmuld %f22,%f48,%f26 ! (Y0_2) dtmp0 *= y0;
+ faddd %f26,KA1,%f50 ! (Y0_2) dtmp0 += KA1;
+ ld [%fp+tmp3],%f4 ! (Y0_2) dtmp1 = (double) exp0;
+ fitod %f4,%f26 ! (Y0_1) dtmp1 = (double) exp0;
+ fmuld %f50,%f48,%f50 ! (Y0_1) dtmp0 *= y0;
+ ldd [%l2+%g5],%f60 ! (Y0_1) dtmp0 = *(double *)((char*)__mt_constlog4f + ind0);
+ faddd %f50,KA0,%f58 ! (Y0_1) dtmp0 += KA0;
+ faddd %f60,%f26,%f26 ! (Y0_1) yy0 = dtmp0 + dtmp1;
+ fmuld %f58,%f48,%f48 ! (Y0_1) dtmp0 *= y0;
+ sub %l2,3200,%o4
+ sub %l2,1152-600,%o3
+ faddd %f26,%f48,%f46 ! (Y0_1) yy0 += dtmp0;
+ or %g0,%i5,%g1
+ sethi %hi(0x7f800000),%o1
+
+.xbegin:
+ ld [%fp+tmp_counter],counter
+ ldx [%fp+tmp_py],%o5
+ st %g0,[%fp+tmp_counter]
+.xbegin1:
+ subcc counter,1,counter
+ bneg,pn %icc,.exit
+ nop
+
+ lda [%o5]0x82,%i5 ! (Y0_0) ay = py[0];
+
+ lda [%o5]0x82,%f5 ! (Y0_0) ftmp0 = py[0];
+
+ and %i5,MASK_0x7fffffff,%i3 ! (Y0_0) ay &= 0x7fffffff
+
+ cmp %i3,%o1
+ bge,pn %icc,.xspec
+ nop
+
+ fstod %f5,%f52 ! (Y0_0) dtmp0 = (double)ftmp0;
+
+ fmuld %f52,%f46,%f26 ! (Y0_0) yy0 = dtmp0 * yy;
+ add %o5,stridey,%o5 ! py += stridey
+
+ lda [%o5]0x82,%i5 ! (Y1_0) ay = ((int*)py)[0];
+
+ lda [%o5]0x82,%f7 ! (Y1_0) ftmp0 = py[0];
+
+ and %i5,MASK_0x7fffffff,%i5 ! (Y1_0) ay &= 0x7fffffff
+ fcmped %fcc0,HTHRESHOLD,%f26 ! (Y0_0) if (yy0 >= HTHRESH)
+
+ cmp %i5,%o1
+ bge,pn %icc,.xupdate0
+ nop
+
+.xcont0:
+ fstod %f7,%f48 ! (Y1_0) dtmp0 = (double)ftmp0;
+
+ fcmped %fcc1,LTHRESHOLD,%f26 ! (Y0_1) if (yy0 <= LTHRESH)
+
+ add %o5,stridey,%o5 ! py += stridey
+ fmuld %f48,%f46,%f28 ! (Y1_1) yy0 = dtmp0 * yy;
+
+ lda [%o5]0x82,%i3 ! (Y0_0) ay = py[0];
+
+ lda [%o5]0x82,%f5 ! (Y0_0) ftmp0 = py[0];
+
+ and %i3,MASK_0x7fffffff,%i3 ! (Y0_0) ay &= 0x7fffffff
+ fmovdl %fcc0,HTHRESHOLD,%f26 ! (Y0_1) yy0 = HTHRESH;
+
+ cmp %i3,%o1
+ bge,pn %icc,.xupdate1
+ fcmped %fcc2,HTHRESHOLD,%f28 ! (Y1_1) if (yy0 >= HTHRESH)
+.xcont1:
+ fstod %f5,%f52 ! (Y0_0) dtmp0 = (double)ftmp0;
+
+ fmovdg %fcc1,LTHRESHOLD,%f26 ! (Y0_1) yy0 = LTHRESH;
+
+ fcmped %fcc3,LTHRESHOLD,%f28 ! (Y1_1) if (yy0 <= LTHRESH)
+
+ fmuld %f52,%f46,%f22 ! (Y0_0) yy0 = dtmp0 * yy;
+
+ fdtoi %f26,%f0 ! (Y0_1) ii0 = (int) yy0;
+
+ add %o5,stridey,%o5 ! py += stridey
+ st %f0,[%fp+tmp1] ! (Y0_1) STORE ii0
+
+ lda [%o5]0x82,%l7 ! (Y1_0) ay = ((int*)py)[0];
+
+ lda [%o5]0x82,%f7 ! (Y1_0) ftmp0 = py[0];
+ fmovdl %fcc2,HTHRESHOLD,%f28 ! (Y1_1) yy0 = HTHRESH;
+
+ and %l7,MASK_0x7fffffff,%l7 ! (Y1_0) ay &= 0x7fffffff
+ fcmped %fcc0,HTHRESHOLD,%f22 ! (Y0_0) if (yy0 >= HTHRESH)
+
+ cmp %l7,%o1
+ bge,pn %icc,.xupdate2
+ nop
+.xcont2:
+ fstod %f7,%f48 ! (Y1_0) dtmp0 = (double)ftmp0;
+
+ fmovdg %fcc3,LTHRESHOLD,%f28 ! (Y1_2) yy0 = LTHRESH;
+
+ fcmped %fcc1,LTHRESHOLD,%f22 ! (Y0_1) if (yy0 <= LTHRESH)
+
+ fitod %f0,%f52 ! (Y0_2) dtmp0 = (double)ii0;
+
+ add %o5,stridey,%o5 ! py += stridey
+ fmuld %f48,%f46,%f24 ! (Y1_1) yy0 = dtmp0 * yy;
+
+ fdtoi %f28,%f3 ! (Y1_2) ii0 = (int) yy0;
+ lda [%o5]0x82,%i3 ! (Y0_0) ay = py[0];
+
+ st %f3,[%fp+tmp0] ! (Y1_2) STORE ii0
+
+ fsubd %f26,%f52,%f40 ! (Y0_2) y0 = yy0 - dtmp0;
+ lda [%o5]0x82,%f5 ! (Y0_0) ftmp0 = py[0];
+
+ and %i3,MASK_0x7fffffff,%i3 ! (Y0_0) ay &= 0x7fffffff
+ fmovdl %fcc0,HTHRESHOLD,%f22 ! (Y0_1) yy0 = HTHRESH;
+
+ cmp %i3,%o1
+ bge,pn %icc,.xupdate3
+ fcmped %fcc2,HTHRESHOLD,%f24 ! (Y1_1) if (yy0 >= HTHRESH)
+.xcont3:
+ ld [%fp+tmp1],%i2 ! (Y0_2) LOAD ii0
+ fmuld KB2,%f40,%f36 ! (Y0_2) dtmp0 = KB2 * y0;
+ fstod %f5,%f52 ! (Y0_0) dtmp0 = (double)ftmp0;
+
+ fmovdg %fcc1,LTHRESHOLD,%f22 ! (Y0_1) yy0 = LTHRESH;
+
+ sra %i2,6,%l6 ! (Y0_2) i0 = ii0 >> 6;
+ and %i2,255,%l7 ! (Y0_2) ii0 &= 255;
+ fcmped %fcc3,LTHRESHOLD,%f24 ! (Y1_1) if (yy0 <= LTHRESH)
+
+ fitod %f3,%f56 ! (Y1_2) dtmp0 = (double)ii0;
+ sll %l7,3,%o0 ! (Y0_2) ii0 <<= 3;
+ and %l6,-4,%g5 ! (Y0_2) i0 &= -4;
+
+ faddd %f36,KB1,%f60 ! (Y0_2) dtmp0 += KB1;
+ fmuld %f52,%f46,%f26 ! (Y0_0) yy0 = dtmp0 * yy;
+ ld [%g5+%o3],%f10 ! (Y0_2) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0]
+
+ fdtoi %f22,%f0 ! (Y0_1) ii0 = (int) yy0;
+ ldd [%o4+%o0],%f62 ! (Y0_2) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0];
+
+ add %o5,stridey,%o5 ! py += stridey
+ st %f0,[%fp+tmp1] ! (Y0_1) STORE ii0
+
+ fsubd %f28,%f56,%f56 ! (Y1_2) y0 = yy0 - dtmp0;
+ lda [%o5]0x82,%i5 ! (Y1_0) ay = ((int*)py)[0];
+
+ fmuld %f60,%f40,%f60 ! (Y0_2) yy0 = dtmp0 * y0;
+ fmovdl %fcc2,HTHRESHOLD,%f24 ! (Y1_1) yy0 = HTHRESH;
+ lda [%o5]0x82,%f7 ! (Y1_0) ftmp0 = py[0];
+
+ fmuld %f10,%f62,%f62 ! (Y0_2) di0 *= dtmp0;
+ ld [%fp+tmp0],%g5 ! (Y1_2) LOAD ii0
+ and %i5,MASK_0x7fffffff,%i5 ! (Y1_0) ay &= 0x7fffffff
+ fcmped %fcc0,HTHRESHOLD,%f26 ! (Y0_0) if (yy0 >= HTHRESH)
+
+ cmp %i5,%o1
+ bge,pn %icc,.xupdate4
+.xcont4:
+ fmuld KB2,%f56,%f58 ! (Y1_2) dtmp0 = KB2 * y0;
+ fstod %f7,%f48 ! (Y1_0) dtmp0 = (double)ftmp0;
+
+ fmovdg %fcc3,LTHRESHOLD,%f24 ! (Y1_2) yy0 = LTHRESH;
+ sra %g5,6,%i0 ! (Y1_3) i0 = ii0 >> 6;
+ and %g5,255,%i1 ! (Y1_3) ii0 &= 255;
+ fmuld %f60,%f62,%f40 ! (Y0_3) dtmp0 = yy0 * di0;
+
+ fcmped %fcc1,LTHRESHOLD,%f26 ! (Y0_1) if (yy0 <= LTHRESH)
+ sll %i1,3,%i3 ! (Y1_3) ii0 <<= 3;
+ and %i0,-4,%i0 ! (Y1_3) i0 &= -4;
+
+ fitod %f0,%f52 ! (Y0_2) dtmp0 = (double)ii0;
+ ld [%i0+%o3],%f10 ! (Y1_3) di0 = ((double*)((char*)(__mt_constexp2fb + 150) + i0))[0];
+
+ faddd %f58,KB1,%f58 ! (Y1_3) dtmp0 += KB1;
+ add %o5,stridey,%o5 ! py += stridey
+ ldd [%o4+%i3],%f18 ! (Y1_3) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0];
+ fmuld %f48,%f46,%f28 ! (Y1_1) yy0 = dtmp0 * yy;
+
+ fdtoi %f24,%f3 ! (Y1_2) ii0 = (int) yy0;
+ lda [%o5]0x82,%i3 ! (Y0_0) ay = py[0];
+
+ faddd %f40,%f62,%f60 ! (Y0_3) dtmp0 += di0;
+ st %f3,[%fp+tmp0] ! (Y1_2) STORE ii0
+
+ fsubd %f22,%f52,%f40 ! (Y0_2) y0 = yy0 - dtmp0;
+ lda [%o5]0x82,%f5 ! (Y0_0) ftmp0 = py[0];
+
+ fmuld %f58,%f56,%f56 ! (Y1_3) yy0 = dtmp0 * y0;
+ and %i3,MASK_0x7fffffff,%i3 ! (Y0_0) ay &= 0x7fffffff
+ fmovdl %fcc0,HTHRESHOLD,%f26 ! (Y0_1) yy0 = HTHRESH;
+
+ fmuld %f10,%f18,%f50 ! (Y1_3) di0 *= dtmp0;
+ cmp %i3,%o1
+ bge,pn %icc,.xupdate5
+ fcmped %fcc2,HTHRESHOLD,%f28 ! (Y1_1) if (yy0 >= HTHRESH)
+.xcont5:
+ fdtos %f60,%f1 ! (Y0_3) ftmp0 = (float)dtmp0;
+ add %g1,stridez,%i3 ! pz += stridez
+ st %f1,[%g1] ! (Y0_3) pz[0] = ftmp0;
+
+ subcc counter,1,counter
+ bneg,pn %icc,.xbegin
+ or %g0,%i3,%g1
+
+ ld [%fp+tmp1],%i2 ! (Y0_2) LOAD ii0
+ fmuld KB2,%f40,%f36 ! (Y0_2) dtmp0 = KB2 * y0;
+ fstod %f5,%f52 ! (Y0_0) dtmp0 = (double)ftmp0;
+
+ fmovdg %fcc1,LTHRESHOLD,%f26 ! (Y0_1) yy0 = LTHRESH;
+
+ fmuld %f56,%f50,%f58 ! (Y1_3) dtmp0 = yy0 * di0;
+ sra %i2,6,%l6 ! (Y0_2) i0 = ii0 >> 6;
+ and %i2,255,%l7 ! (Y0_2) ii0 &= 255;
+ fcmped %fcc3,LTHRESHOLD,%f28 ! (Y1_1) if (yy0 <= LTHRESH)
+
+ fitod %f3,%f56 ! (Y1_2) dtmp0 = (double)ii0;
+ sll %l7,3,%o0 ! (Y0_2) ii0 <<= 3;
+ and %l6,-4,%g5 ! (Y0_2) i0 &= -4;
+
+ faddd %f36,KB1,%f60 ! (Y0_2) dtmp0 += KB1;
+ fmuld %f52,%f46,%f22 ! (Y0_0) yy0 = dtmp0 * yy;
+ ld [%g5+%o3],%f10 ! (Y0_2) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0]
+
+ fdtoi %f26,%f0 ! (Y0_1) ii0 = (int) yy0;
+ ldd [%o4+%o0],%f62 ! (Y0_2) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0];
+
+ faddd %f58,%f50,%f58 ! (Y1_3) dtmp0 += di0;
+ add %o5,stridey,%o5 ! py += stridey
+ st %f0,[%fp+tmp1] ! (Y0_1) STORE ii0
+
+ fsubd %f24,%f56,%f56 ! (Y1_2) y0 = yy0 - dtmp0;
+ lda [%o5]0x82,%l7 ! (Y1_0) ay = ((int*)py)[0];
+
+ fmuld %f60,%f40,%f60 ! (Y0_2) yy0 = dtmp0 * y0;
+ add %i3,stridez,%i5 ! pz += stridez
+ lda [%o5]0x82,%f7 ! (Y1_0) ftmp0 = py[0];
+ fmovdl %fcc2,HTHRESHOLD,%f28 ! (Y1_1) yy0 = HTHRESH;
+
+ fmuld %f10,%f62,%f62 ! (Y0_2) di0 *= dtmp0;
+ and %l7,MASK_0x7fffffff,%l7 ! (Y1_0) ay &= 0x7fffffff
+ ld [%fp+tmp0],%g5 ! (Y1_2) LOAD ii0
+ fcmped %fcc0,HTHRESHOLD,%f22 ! (Y0_0) if (yy0 >= HTHRESH)
+
+ fdtos %f58,%f9 ! (Y1_3) ftmp0 = (float)dtmp0;
+ st %f9,[%i3] ! (Y1_3) pz[0] = ftmp0;
+ cmp %l7,%o1
+ bge,pn %icc,.xupdate6
+
+.xcont6:
+ fmuld KB2,%f56,%f58 ! (Y1_2) dtmp0 = KB2 * y0;
+ fstod %f7,%f48 ! (Y1_0) dtmp0 = (double)ftmp0;
+
+ cmp counter,8
+ bl,pn %icc,.xtail
+ nop
+
+ ba .xmain_loop
+ nop
+
+ .align 16
+.xmain_loop:
+ fmovdg %fcc3,LTHRESHOLD,%f28 ! (Y1_2) yy0 = LTHRESH;
+ sra %g5,6,%i0 ! (Y1_3) i0 = ii0 >> 6;
+ and %g5,255,%i1 ! (Y1_3) ii0 &= 255;
+ fmuld %f60,%f62,%f40 ! (Y0_3) dtmp0 = yy0 * di0;
+
+ fcmped %fcc1,LTHRESHOLD,%f22 ! (Y0_1) if (yy0 <= LTHRESH)
+ sll %i1,3,%i3 ! (Y1_3) ii0 <<= 3;
+ and %i0,-4,%i0 ! (Y1_3) i0 &= -4;
+
+ fitod %f0,%f52 ! (Y0_2) dtmp0 = (double)ii0;
+ sub counter,4,counter
+ ld [%i0+%o3],%f10 ! (Y1_3) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0];
+
+ faddd %f58,KB1,%f58 ! (Y1_3) dtmp0 += KB1;
+ add %o5,stridey,%o5 ! py += stridey
+ ldd [%o4+%i3],%f18 ! (Y1_3) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0];
+ fmuld %f48,%f46,%f24 ! (Y1_1) yy0 = dtmp0 * yy;
+
+ fdtoi %f28,%f3 ! (Y1_2) ii0 = (int) yy0;
+ lda [%o5]0x82,%i3 ! (Y0_0) ay = py[0];
+
+ faddd %f40,%f62,%f60 ! (Y0_3) dtmp0 += di0;
+ st %f3,[%fp+tmp0] ! (Y1_2) STORE ii0
+
+ fsubd %f26,%f52,%f40 ! (Y0_2) y0 = yy0 - dtmp0;
+ lda [%o5]0x82,%f5 ! (Y0_0) ftmp0 = py[0];
+
+ fmuld %f58,%f56,%f56 ! (Y1_3) yy0 = dtmp0 * y0;
+ and %i3,MASK_0x7fffffff,%i3 ! (Y0_0) ay &= 0x7fffffff
+ fmovdl %fcc0,HTHRESHOLD,%f22 ! (Y0_1) yy0 = HTHRESH;
+
+ fmuld %f10,%f18,%f50 ! (Y1_3) di0 *= dtmp0;
+ cmp %i3,%o1
+ bge,pn %icc,.xupdate7
+ fcmped %fcc2,HTHRESHOLD,%f24 ! (Y1_1) if (yy0 >= HTHRESH)
+.xcont7:
+ fdtos %f60,%f1 ! (Y0_3) ftmp0 = (float)dtmp0;
+ add %i5,stridez,%i3 ! pz += stridez
+ st %f1,[%i5] ! (Y0_3) pz[0] = ftmp0;
+
+ ld [%fp+tmp1],%i2 ! (Y0_2) LOAD ii0
+ fmuld KB2,%f40,%f36 ! (Y0_2) dtmp0 = KB2 * y0;
+ fstod %f5,%f52 ! (Y0_0) dtmp0 = (double)ftmp0;
+
+ fmovdg %fcc1,LTHRESHOLD,%f22 ! (Y0_1) yy0 = LTHRESH;
+
+ fmuld %f56,%f50,%f58 ! (Y1_3) dtmp0 = yy0 * di0;
+ sra %i2,6,%l6 ! (Y0_2) i0 = ii0 >> 6;
+ and %i2,255,%l7 ! (Y0_2) ii0 &= 255;
+ fcmped %fcc3,LTHRESHOLD,%f24 ! (Y1_1) if (yy0 <= LTHRESH)
+
+ fitod %f3,%f56 ! (Y1_2) dtmp0 = (double)ii0;
+ sll %l7,3,%o0 ! (Y0_2) ii0 <<= 3;
+ and %l6,-4,%g5 ! (Y0_2) i0 &= -4;
+
+ faddd %f36,KB1,%f60 ! (Y0_2) dtmp0 += KB1;
+ fmuld %f52,%f46,%f26 ! (Y0_0) yy0 = dtmp0 * yy;
+ ld [%g5+%o3],%f10 ! (Y0_2) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0]
+
+ fdtoi %f22,%f0 ! (Y0_1) ii0 = (int) yy0;
+ ldd [%o4+%o0],%f62 ! (Y0_2) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0];
+
+ faddd %f58,%f50,%f58 ! (Y1_3) dtmp0 += di0;
+ add %o5,stridey,%o5 ! py += stridey
+ st %f0,[%fp+tmp1] ! (Y0_1) STORE ii0
+
+ fsubd %f28,%f56,%f56 ! (Y1_2) y0 = yy0 - dtmp0;
+ lda [%o5]0x82,%i5 ! (Y1_0) ay = ((int*)py)[0];
+
+ fmuld %f60,%f40,%f60 ! (Y0_2) yy0 = dtmp0 * y0;
+ fmovdl %fcc2,HTHRESHOLD,%f24 ! (Y1_1) yy0 = HTHRESH;
+ lda [%o5]0x82,%f7 ! (Y1_0) ftmp0 = py[0];
+
+ fmuld %f10,%f62,%f62 ! (Y0_2) di0 *= dtmp0;
+ ld [%fp+tmp0],%g5 ! (Y1_2) LOAD ii0
+ and %i5,MASK_0x7fffffff,%i5 ! (Y1_0) ay &= 0x7fffffff
+ fcmped %fcc0,HTHRESHOLD,%f26 ! (Y0_0) if (yy0 >= HTHRESH)
+
+ fdtos %f58,%f9 ! (Y1_3) ftmp0 = (float)dtmp0;
+ cmp %i5,%o1
+ bge,pn %icc,.xupdate8
+
+.xcont8:
+ fmuld KB2,%f56,%f58 ! (Y1_2) dtmp0 = KB2 * y0;
+ add %i3,stridez,%i5 ! pz += stridez
+ st %f9,[%i3] ! (Y1_3) pz[0] = ftmp0;
+ fstod %f7,%f48 ! (Y1_0) dtmp0 = (double)ftmp0;
+
+ fmovdg %fcc3,LTHRESHOLD,%f24 ! (Y1_2) yy0 = LTHRESH;
+ sra %g5,6,%i0 ! (Y1_3) i0 = ii0 >> 6;
+ and %g5,255,%i1 ! (Y1_3) ii0 &= 255;
+ fmuld %f60,%f62,%f40 ! (Y0_3) dtmp0 = yy0 * di0;
+
+ fcmped %fcc1,LTHRESHOLD,%f26 ! (Y0_1) if (yy0 <= LTHRESH)
+ sll %i1,3,%i3 ! (Y1_3) ii0 <<= 3;
+ and %i0,-4,%i0 ! (Y1_3) i0 &= -4;
+
+ fitod %f0,%f52 ! (Y0_2) dtmp0 = (double)ii0;
+ ld [%i0+%o3],%f10 ! (Y1_3) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0];
+
+ faddd %f58,KB1,%f58 ! (Y1_3) dtmp0 += KB1;
+ add %o5,stridey,%o5 ! py += stridey
+ ldd [%o4+%i3],%f18 ! (Y1_3) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0];
+ fmuld %f48,%f46,%f28 ! (Y1_1) yy0 = dtmp0 * yy;
+
+ fdtoi %f24,%f3 ! (Y1_2) ii0 = (int) yy0;
+ lda [%o5]0x82,%i3 ! (Y0_0) ay = py[0];
+
+ faddd %f40,%f62,%f60 ! (Y0_3) dtmp0 += di0;
+ st %f3,[%fp+tmp0] ! (Y1_2) STORE ii0
+
+ fsubd %f22,%f52,%f40 ! (Y0_2) y0 = yy0 - dtmp0;
+ lda [%o5]0x82,%f5 ! (Y0_0) ftmp0 = py[0];
+
+ fmuld %f58,%f56,%f56 ! (Y1_3) yy0 = dtmp0 * y0;
+ and %i3,MASK_0x7fffffff,%i3 ! (Y0_0) ay &= 0x7fffffff
+ fmovdl %fcc0,HTHRESHOLD,%f26 ! (Y0_1) yy0 = HTHRESH;
+
+ fmuld %f10,%f18,%f50 ! (Y1_3) di0 *= dtmp0;
+ cmp %i3,%o1
+ bge,pn %icc,.xupdate9
+ fcmped %fcc2,HTHRESHOLD,%f28 ! (Y1_1) if (yy0 >= HTHRESH)
+.xcont9:
+ fdtos %f60,%f1 ! (Y0_3) ftmp0 = (float)dtmp0;
+ add %i5,stridez,%i3 ! pz += stridez
+ st %f1,[%i5] ! (Y0_3) pz[0] = ftmp0;
+
+ ld [%fp+tmp1],%i2 ! (Y0_2) LOAD ii0
+ fmuld KB2,%f40,%f36 ! (Y0_2) dtmp0 = KB2 * y0;
+ fstod %f5,%f52 ! (Y0_0) dtmp0 = (double)ftmp0;
+
+ fmovdg %fcc1,LTHRESHOLD,%f26 ! (Y0_1) yy0 = LTHRESH;
+
+ fmuld %f56,%f50,%f58 ! (Y1_3) dtmp0 = yy0 * di0;
+ sra %i2,6,%l6 ! (Y0_2) i0 = ii0 >> 6;
+ and %i2,255,%l7 ! (Y0_2) ii0 &= 255;
+ fcmped %fcc3,LTHRESHOLD,%f28 ! (Y1_1) if (yy0 <= LTHRESH)
+
+ fitod %f3,%f56 ! (Y1_2) dtmp0 = (double)ii0;
+ sll %l7,3,%o0 ! (Y0_2) ii0 <<= 3;
+ and %l6,-4,%g5 ! (Y0_2) i0 &= -4;
+
+ faddd %f36,KB1,%f60 ! (Y0_2) dtmp0 += KB1;
+ fmuld %f52,%f46,%f22 ! (Y0_0) yy0 = dtmp0 * yy;
+ ld [%g5+%o3],%f10 ! (Y0_2) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0]
+
+ fdtoi %f26,%f0 ! (Y0_1) ii0 = (int) yy0;
+ ldd [%o4+%o0],%f62 ! (Y0_2) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0];
+
+ faddd %f58,%f50,%f58 ! (Y1_3) dtmp0 += di0;
+ add %o5,stridey,%o5 ! py += stridey
+ st %f0,[%fp+tmp1] ! (Y0_1) STORE ii0
+
+ fsubd %f24,%f56,%f56 ! (Y1_2) y0 = yy0 - dtmp0;
+ lda [%o5]0x82,%l7 ! (Y1_0) ay = ((int*)py)[0];
+
+ fmuld %f60,%f40,%f60 ! (Y0_2) yy0 = dtmp0 * y0;
+ add %i3,stridez,%i5 ! pz += stridez
+ lda [%o5]0x82,%f7 ! (Y1_0) ftmp0 = py[0];
+ fmovdl %fcc2,HTHRESHOLD,%f28 ! (Y1_1) yy0 = HTHRESH;
+
+ fmuld %f10,%f62,%f62 ! (Y0_2) di0 *= dtmp0;
+ and %l7,MASK_0x7fffffff,%l7 ! (Y1_0) ay &= 0x7fffffff
+ ld [%fp+tmp0],%g5 ! (Y1_2) LOAD ii0
+ fcmped %fcc0,HTHRESHOLD,%f22 ! (Y0_0) if (yy0 >= HTHRESH)
+
+ fdtos %f58,%f9 ! (Y1_3) ftmp0 = (float)dtmp0;
+ st %f9,[%i3] ! (Y1_3) pz[0] = ftmp0;
+ cmp %l7,%o1
+ bge,pn %icc,.xupdate10
+.xcont10:
+ fmuld KB2,%f56,%f58 ! (Y1_2) dtmp0 = KB2 * y0;
+ cmp counter,4
+ bge,pt %icc,.xmain_loop
+ fstod %f7,%f48 ! (Y1_0) dtmp0 = (double)ftmp0;
+
+.xtail:
+ subcc counter,1,counter
+ bneg,pn %icc,.xbegin
+ or %g0,%i5,%g1
+
+ fmovdg %fcc3,LTHRESHOLD,%f28 ! (Y1_2) yy0 = LTHRESH;
+ sra %g5,6,%i0 ! (Y1_3) i0 = ii0 >> 6;
+ and %g5,255,%i1 ! (Y1_3) ii0 &= 255;
+ fmuld %f60,%f62,%f40 ! (Y0_3) dtmp0 = yy0 * di0;
+
+ fcmped %fcc1,LTHRESHOLD,%f22 ! (Y0_1) if (yy0 <= LTHRESH)
+ sll %i1,3,%i3 ! (Y1_3) ii0 <<= 3;
+ and %i0,-4,%i0 ! (Y1_3) i0 &= -4;
+
+ fitod %f0,%f52 ! (Y0_2) dtmp0 = (double)ii0;
+ ld [%i0+%o3],%f10 ! (Y1_3) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0];
+
+ faddd %f58,KB1,%f58 ! (Y1_3) dtmp0 += KB1;
+ add %o5,stridey,%o5 ! py += stridey
+ ldd [%o4+%i3],%f18 ! (Y1_3) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0];
+ fmuld %f48,%f46,%f24 ! (Y1_1) yy0 = dtmp0 * yy;
+
+ fdtoi %f28,%f3 ! (Y1_2) ii0 = (int) yy0;
+ lda [%o5]0x82,%i3 ! (Y0_0) ay = py[0];
+
+ faddd %f40,%f62,%f60 ! (Y0_3) dtmp0 += di0;
+ st %f3,[%fp+tmp0] ! (Y1_2) STORE ii0
+
+ fsubd %f26,%f52,%f40 ! (Y0_2) y0 = yy0 - dtmp0;
+ lda [%o5]0x82,%f5 ! (Y0_0) ftmp0 = py[0];
+
+ fmuld %f58,%f56,%f56 ! (Y1_3) yy0 = dtmp0 * y0;
+ and %i3,MASK_0x7fffffff,%i3 ! (Y0_0) ay &= 0x7fffffff
+ fmovdl %fcc0,HTHRESHOLD,%f22 ! (Y0_1) yy0 = HTHRESH;
+
+ fmuld %f10,%f18,%f50 ! (Y1_3) di0 *= dtmp0;
+ cmp %i3,%o1
+ bge,pn %icc,.xupdate11
+ fcmped %fcc2,HTHRESHOLD,%f24 ! (Y1_1) if (yy0 >= HTHRESH)
+.xcont11:
+ fdtos %f60,%f1 ! (Y0_3) ftmp0 = (float)dtmp0;
+ add %i5,stridez,%i3 ! pz += stridez
+ st %f1,[%i5] ! (Y0_3) pz[0] = ftmp0;
+
+ subcc counter,1,counter
+ bneg,pn %icc,.xbegin
+ or %g0,%i3,%g1
+
+ ld [%fp+tmp1],%i2 ! (Y0_2) LOAD ii0
+ fmuld KB2,%f40,%f36 ! (Y0_2) dtmp0 = KB2 * y0;
+ fstod %f5,%f52 ! (Y0_0) dtmp0 = (double)ftmp0;
+
+ fmovdg %fcc1,LTHRESHOLD,%f22 ! (Y0_1) yy0 = LTHRESH;
+
+ fmuld %f56,%f50,%f58 ! (Y1_3) dtmp0 = yy0 * di0;
+ sra %i2,6,%l6 ! (Y0_2) i0 = ii0 >> 6;
+ and %i2,255,%l7 ! (Y0_2) ii0 &= 255;
+ fcmped %fcc3,LTHRESHOLD,%f24 ! (Y1_1) if (yy0 <= LTHRESH)
+
+ fitod %f3,%f56 ! (Y1_2) dtmp0 = (double)ii0;
+ sll %l7,3,%o0 ! (Y0_2) ii0 <<= 3;
+ and %l6,-4,%g5 ! (Y0_2) i0 &= -4;
+
+ faddd %f36,KB1,%f60 ! (Y0_2) dtmp0 += KB1;
+ fmuld %f52,%f46,%f26 ! (Y0_0) yy0 = dtmp0 * yy;
+ ld [%g5+%o3],%f10 ! (Y0_2) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0]
+
+ fdtoi %f22,%f0 ! (Y0_1) ii0 = (int) yy0;
+ ldd [%o4+%o0],%f62 ! (Y0_2) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0];
+
+ faddd %f58,%f50,%f58 ! (Y1_3) dtmp0 += di0;
+ st %f0,[%fp+tmp1] ! (Y0_1) STORE ii0
+
+ fsubd %f28,%f56,%f56 ! (Y1_2) y0 = yy0 - dtmp0;
+
+ fmuld %f60,%f40,%f60 ! (Y0_2) yy0 = dtmp0 * y0;
+ fmovdl %fcc2,HTHRESHOLD,%f24 ! (Y1_1) yy0 = HTHRESH;
+
+ fmuld %f10,%f62,%f62 ! (Y0_2) di0 *= dtmp0;
+ ld [%fp+tmp0],%g5 ! (Y1_2) LOAD ii0
+ fcmped %fcc0,HTHRESHOLD,%f26 ! (Y0_0) if (yy0 >= HTHRESH)
+
+ fdtos %f58,%f9 ! (Y1_3) ftmp0 = (float)dtmp0;
+
+ fmuld KB2,%f56,%f58 ! (Y1_2) dtmp0 = KB2 * y0;
+ add %i3,stridez,%i5 ! pz += stridez
+ st %f9,[%i3] ! (Y1_3) pz[0] = ftmp0;
+
+ subcc counter,1,counter
+ bneg,pn %icc,.xbegin
+ or %g0,%i5,%g1
+
+ fmovdg %fcc3,LTHRESHOLD,%f24 ! (Y1_2) yy0 = LTHRESH;
+ sra %g5,6,%i0 ! (Y1_3) i0 = ii0 >> 6;
+ and %g5,255,%i1 ! (Y1_3) ii0 &= 255;
+ fmuld %f60,%f62,%f40 ! (Y0_3) dtmp0 = yy0 * di0;
+
+ fcmped %fcc1,LTHRESHOLD,%f26 ! (Y0_1) if (yy0 <= LTHRESH)
+ sll %i1,3,%i3 ! (Y1_3) ii0 <<= 3;
+ and %i0,-4,%i0 ! (Y1_3) i0 &= -4;
+
+ fitod %f0,%f52 ! (Y0_2) dtmp0 = (double)ii0;
+ ld [%i0+%o3],%f10 ! (Y1_3) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0];
+
+ faddd %f58,KB1,%f58 ! (Y1_3) dtmp0 += KB1;
+ ldd [%o4+%i3],%f18 ! (Y1_3) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0];
+
+ fdtoi %f24,%f3 ! (Y1_2) ii0 = (int) yy0;
+
+ faddd %f40,%f62,%f60 ! (Y0_3) dtmp0 += di0;
+ st %f3,[%fp+tmp0] ! (Y1_2) STORE ii0
+
+ fsubd %f22,%f52,%f40 ! (Y0_2) y0 = yy0 - dtmp0;
+
+ fmuld %f58,%f56,%f56 ! (Y1_3) yy0 = dtmp0 * y0;
+ fmovdl %fcc0,HTHRESHOLD,%f26 ! (Y0_1) yy0 = HTHRESH;
+
+ fmuld %f10,%f18,%f50 ! (Y1_3) di0 *= dtmp0;
+
+ fdtos %f60,%f1 ! (Y0_3) ftmp0 = (float)dtmp0;
+ add %i5,stridez,%i3 ! pz += stridez
+ st %f1,[%i5] ! (Y0_3) pz[0] = ftmp0;
+
+ subcc counter,1,counter
+ bneg,pn %icc,.xbegin
+ or %g0,%i3,%g1
+
+ ld [%fp+tmp1],%i2 ! (Y0_2) LOAD ii0
+ fmuld KB2,%f40,%f36 ! (Y0_2) dtmp0 = KB2 * y0;
+
+ fmovdg %fcc1,LTHRESHOLD,%f26 ! (Y0_1) yy0 = LTHRESH;
+
+ fmuld %f56,%f50,%f58 ! (Y1_3) dtmp0 = yy0 * di0;
+ sra %i2,6,%l6 ! (Y0_2) i0 = ii0 >> 6;
+ and %i2,255,%l7 ! (Y0_2) ii0 &= 255;
+
+ fitod %f3,%f56 ! (Y1_2) dtmp0 = (double)ii0;
+ sll %l7,3,%o0 ! (Y0_2) ii0 <<= 3;
+ and %l6,-4,%g5 ! (Y0_2) i0 &= -4;
+
+ faddd %f36,KB1,%f60 ! (Y0_2) dtmp0 += KB1;
+ ld [%g5+%o3],%f10 ! (Y0_2) di0 = ((double*)((char*)(__mt_constexp2fb + 150) + i0))[0];
+
+ fdtoi %f26,%f0 ! (Y0_1) ii0 = (int) yy0;
+ ldd [%o4+%o0],%f62 ! (Y0_2) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0];
+
+ faddd %f58,%f50,%f58 ! (Y1_3) dtmp0 += di0;
+ st %f0,[%fp+tmp1] ! (Y0_1) STORE ii0
+
+ fsubd %f24,%f56,%f56 ! (Y1_2) y0 = yy0 - dtmp0;
+
+ fmuld %f60,%f40,%f60 ! (Y0_2) yy0 = dtmp0 * y0;
+ add %i3,stridez,%i5 ! pz += stridez
+
+ fmuld %f10,%f62,%f62 ! (Y0_2) di0 *= dtmp0;
+ ld [%fp+tmp0],%g5 ! (Y1_2) LOAD ii0
+
+ fdtos %f58,%f9 ! (Y1_3) ftmp0 = (float)dtmp0;
+ st %f9,[%i3] ! (Y1_3) pz[0] = ftmp0;
+
+ subcc counter,1,counter
+ bneg,pn %icc,.xbegin
+ or %g0,%i5,%g1
+
+ fmuld KB2,%f56,%f58 ! (Y1_2) dtmp0 = KB2 * y0;
+
+ sra %g5,6,%i0 ! (Y1_3) i0 = ii0 >> 6;
+ and %g5,255,%i1 ! (Y1_3) ii0 &= 255;
+ fmuld %f60,%f62,%f40 ! (Y0_3) dtmp0 = yy0 * di0;
+
+ sll %i1,3,%i3 ! (Y1_3) ii0 <<= 3;
+ and %i0,-4,%i0 ! (Y1_3) i0 &= -4;
+
+ fitod %f0,%f52 ! (Y0_2) dtmp0 = (double)ii0;
+ ld [%i0+%o3],%f10 ! (Y1_3) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0];
+
+ faddd %f58,KB1,%f58 ! (Y1_3) dtmp0 += KB1;
+ ldd [%o4+%i3],%f18 ! (Y1_3) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0];
+
+ faddd %f40,%f62,%f60 ! (Y0_3) dtmp0 += di0;
+
+ fsubd %f26,%f52,%f40 ! (Y0_2) y0 = yy0 - dtmp0;
+
+ fmuld %f58,%f56,%f56 ! (Y1_3) yy0 = dtmp0 * y0;
+
+ fmuld %f10,%f18,%f50 ! (Y1_3) di0 *= dtmp0;
+
+ fdtos %f60,%f1 ! (Y0_3) ftmp0 = (float)dtmp0;
+ add %i5,stridez,%i3 ! pz += stridez
+ st %f1,[%i5] ! (Y0_3) pz[0] = ftmp0;
+
+ subcc counter,1,counter
+ bneg,pn %icc,.xbegin
+ or %g0,%i3,%g1
+
+ ld [%fp+tmp1],%i2 ! (Y0_2) LOAD ii0
+ fmuld KB2,%f40,%f36 ! (Y0_2) dtmp0 = KB2 * y0;
+
+ fmuld %f56,%f50,%f58 ! (Y1_3) dtmp0 = yy0 * di0;
+ sra %i2,6,%l6 ! (Y0_2) i0 = ii0 >> 6;
+ and %i2,255,%l7 ! (Y0_2) ii0 &= 255;
+
+ sll %l7,3,%o0 ! (Y0_2) ii0 <<= 3;
+ and %l6,-4,%g5 ! (Y0_2) i0 &= -4;
+
+ faddd %f36,KB1,%f60 ! (Y0_2) dtmp0 += KB1;
+ ld [%g5+%o3],%f10 ! (Y0_2) di0 = ((double*)((char*)(__mt_constexp2fb + 150 ) + i0))[0]
+
+ ldd [%o4+%o0],%f62 ! (Y0_2) dtmp0 = ((double*)((char*)__mt_constexp2fa + ii0))[0];
+
+ faddd %f58,%f50,%f58 ! (Y1_3) dtmp0 += di0;
+
+ fmuld %f60,%f40,%f60 ! (Y0_2) yy0 = dtmp0 * y0;
+
+ fmuld %f10,%f62,%f62 ! (Y0_2) di0 *= dtmp0;
+
+ fdtos %f58,%f9 ! (Y1_3) ftmp0 = (float)dtmp0;
+ add %i3,stridez,%i5 ! pz += stridez
+ st %f9,[%i3] ! (Y1_3) pz[0] = ftmp0;
+
+ subcc counter,1,counter
+ bneg,pn %icc,.xbegin
+ or %g0,%i5,%g1
+
+ fmuld %f60,%f62,%f40 ! (Y0_3) dtmp0 = yy0 * di0;
+
+ faddd %f40,%f62,%f60 ! (Y0_3) dtmp0 += di0;
+
+ fdtos %f60,%f1 ! (Y0_3) ftmp0 = (float)dtmp0;
+ add %i5,stridez,%i3 ! pz += stridez
+ st %f1,[%i5] ! (Y0_3) pz[0] = ftmp0;
+
+ ba .xbegin
+ or %g0,%i3,%g1
+
+.xspec:
+ bg,a,pn %icc,.yisnan ! if (ay > 0x7f800000) /* |Y| = Nan */
+ ld [%o5],%f8 ! fy = *py;
+
+ ld [%fp+tmp5],%l6 ! LOAD (ax-0x3f800000)<<63
+ srl %i5,31,%i5 ! uy >> 31
+
+ cmp %l6,%i5 ! if((ax < 0x3f800000) != (uy >> 31))
+ be,a,pn %icc,.xspec_exit ! if((ax < 0x3f800000) != (uy >> 31))
+ st %i3,[%g1] ! fy = *(float*)&ay;
+
+ st %g0,[%g1] ! fy = ZERO
+ add %g1,stridez,%g1
+ ba .xbegin1
+ add %o5,stridey,%o5
+
+.yisnan:
+ fmuls %f8,%f8,%f8 ! fy = *py * *py; /* |Y| = Nan */
+ st %f8,[%g1]
+
+.xspec_exit:
+ add %g1,stridez,%g1
+ ba .xbegin1
+ add %o5,stridey,%o5
+
+ .align 16
+.xupdate0:
+ cmp counter,0
+ ble .xcont0
+ fzeros %f7
+
+ stx %o5,[%fp+tmp_py]
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont0
+ or %g0,0,counter
+
+ .align 16
+.xupdate1:
+ cmp counter,1
+ ble .xcont1
+ fzeros %f5
+
+ sub counter,1,counter
+ stx %o5,[%fp+tmp_py]
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont1
+ or %g0,1,counter
+
+ .align 16
+.xupdate2:
+ cmp counter,2
+ ble .xcont2
+ fzeros %f7
+
+ sub counter,2,counter
+ stx %o5,[%fp+tmp_py]
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont2
+ or %g0,2,counter
+
+ .align 16
+.xupdate3:
+ cmp counter,3
+ ble .xcont3
+ fzeros %f5
+
+ sub counter,3,counter
+ stx %o5,[%fp+tmp_py]
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont3
+ or %g0,3,counter
+
+ .align 16
+.xupdate4:
+ cmp counter,4
+ ble .xcont4
+ fzeros %f7
+
+ sub counter,4,counter
+ stx %o5,[%fp+tmp_py]
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont4
+ or %g0,4,counter
+
+ .align 16
+.xupdate5:
+ cmp counter,5
+ ble .xcont5
+ fzeros %f5
+
+ sub counter,5,counter
+ stx %o5,[%fp+tmp_py]
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont5
+ or %g0,5,counter
+
+ .align 16
+.xupdate6:
+ cmp counter,5
+ ble .xcont6
+ fzeros %f7
+
+ sub counter,5,counter
+ stx %o5,[%fp+tmp_py]
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont6
+ or %g0,5,counter
+
+ .align 16
+.xupdate7:
+ cmp counter,2
+ ble .xcont7
+ fzeros %f5
+
+ sub counter,2,counter
+ stx %o5,[%fp+tmp_py]
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont7
+ or %g0,2,counter
+
+ .align 16
+.xupdate8:
+ cmp counter,3
+ ble .xcont8
+ fzeros %f7
+
+ sub counter,3,counter
+ stx %o5,[%fp+tmp_py]
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont8
+ or %g0,3,counter
+
+ .align 16
+.xupdate9:
+ cmp counter,4
+ ble .xcont9
+ fzeros %f5
+
+ sub counter,4,counter
+ stx %o5,[%fp+tmp_py]
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont9
+ or %g0,4,counter
+
+ .align 16
+.xupdate10:
+ cmp counter,5
+ ble .xcont10
+ fzeros %f7
+
+ sub counter,5,counter
+ stx %o5,[%fp+tmp_py]
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont10
+ or %g0,5,counter
+
+ .align 16
+.xupdate11:
+ cmp counter,5
+ ble .xcont11
+ fzeros %f5
+
+ sub counter,5,counter
+ stx %o5,[%fp+tmp_py]
+
+ st counter,[%fp+tmp_counter]
+ ba .xcont11
+ or %g0,5,counter
+
+ SET_SIZE(__vpowf)
+
diff --git a/usr/src/libm/src/mvec/vis/__vrhypot.S b/usr/src/libm/src/mvec/vis/__vrhypot.S
new file mode 100644
index 0000000..07954d6
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vrhypot.S
@@ -0,0 +1,3878 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vrhypot.S 1.7 06/01/23 SMI"
+
+ .file "__vrhypot.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 64
+
+.CONST_TBL:
+ .word 0x7fe00000, 0x7fdfc07f, 0x7fdf81f8, 0x7fdf4465,
+ .word 0x7fdf07c1, 0x7fdecc07, 0x7fde9131, 0x7fde573a,
+ .word 0x7fde1e1e, 0x7fdde5d6, 0x7fddae60, 0x7fdd77b6,
+ .word 0x7fdd41d4, 0x7fdd0cb5, 0x7fdcd856, 0x7fdca4b3,
+ .word 0x7fdc71c7, 0x7fdc3f8f, 0x7fdc0e07, 0x7fdbdd2b,
+ .word 0x7fdbacf9, 0x7fdb7d6c, 0x7fdb4e81, 0x7fdb2036,
+ .word 0x7fdaf286, 0x7fdac570, 0x7fda98ef, 0x7fda6d01,
+ .word 0x7fda41a4, 0x7fda16d3, 0x7fd9ec8e, 0x7fd9c2d1,
+ .word 0x7fd99999, 0x7fd970e4, 0x7fd948b0, 0x7fd920fb,
+ .word 0x7fd8f9c1, 0x7fd8d301, 0x7fd8acb9, 0x7fd886e5,
+ .word 0x7fd86186, 0x7fd83c97, 0x7fd81818, 0x7fd7f405,
+ .word 0x7fd7d05f, 0x7fd7ad22, 0x7fd78a4c, 0x7fd767dc,
+ .word 0x7fd745d1, 0x7fd72428, 0x7fd702e0, 0x7fd6e1f7,
+ .word 0x7fd6c16c, 0x7fd6a13c, 0x7fd68168, 0x7fd661ec,
+ .word 0x7fd642c8, 0x7fd623fa, 0x7fd60581, 0x7fd5e75b,
+ .word 0x7fd5c988, 0x7fd5ac05, 0x7fd58ed2, 0x7fd571ed,
+ .word 0x7fd55555, 0x7fd53909, 0x7fd51d07, 0x7fd50150,
+ .word 0x7fd4e5e0, 0x7fd4cab8, 0x7fd4afd6, 0x7fd49539,
+ .word 0x7fd47ae1, 0x7fd460cb, 0x7fd446f8, 0x7fd42d66,
+ .word 0x7fd41414, 0x7fd3fb01, 0x7fd3e22c, 0x7fd3c995,
+ .word 0x7fd3b13b, 0x7fd3991c, 0x7fd38138, 0x7fd3698d,
+ .word 0x7fd3521c, 0x7fd33ae4, 0x7fd323e3, 0x7fd30d19,
+ .word 0x7fd2f684, 0x7fd2e025, 0x7fd2c9fb, 0x7fd2b404,
+ .word 0x7fd29e41, 0x7fd288b0, 0x7fd27350, 0x7fd25e22,
+ .word 0x7fd24924, 0x7fd23456, 0x7fd21fb7, 0x7fd20b47,
+ .word 0x7fd1f704, 0x7fd1e2ef, 0x7fd1cf06, 0x7fd1bb4a,
+ .word 0x7fd1a7b9, 0x7fd19453, 0x7fd18118, 0x7fd16e06,
+ .word 0x7fd15b1e, 0x7fd1485f, 0x7fd135c8, 0x7fd12358,
+ .word 0x7fd11111, 0x7fd0fef0, 0x7fd0ecf5, 0x7fd0db20,
+ .word 0x7fd0c971, 0x7fd0b7e6, 0x7fd0a681, 0x7fd0953f,
+ .word 0x7fd08421, 0x7fd07326, 0x7fd0624d, 0x7fd05197,
+ .word 0x7fd04104, 0x7fd03091, 0x7fd02040, 0x7fd01010,
+
+ .word 0x42300000, 0 ! D2ON36 = 2**36
+ .word 0xffffff00, 0 ! DA0
+ .word 0xfff00000, 0 ! DA1
+ .word 0x3ff00000, 0 ! DONE = 1.0
+ .word 0x40000000, 0 ! DTWO = 2.0
+ .word 0x7fd00000, 0 ! D2ON1022
+ .word 0x3cb00000, 0 ! D2ONM52
+ .word 0x43200000, 0 ! D2ON51
+ .word 0x0007ffff, 0xffffffff ! 0x0007ffffffffffff
+
+#define stridex %l2
+#define stridey %l3
+#define stridez %l5
+
+#define TBL_SHIFT 512
+
+#define TBL %l1
+#define counter %l4
+
+#define _0x7ff00000 %l0
+#define _0x00100000 %o5
+#define _0x7fffffff %l6
+
+#define D2ON36 %f4
+#define DTWO %f6
+#define DONE %f8
+#define DA0 %f58
+#define DA1 %f56
+
+#define dtmp0 STACK_BIAS-0x80
+#define dtmp1 STACK_BIAS-0x78
+#define dtmp2 STACK_BIAS-0x70
+#define dtmp3 STACK_BIAS-0x68
+#define dtmp4 STACK_BIAS-0x60
+#define dtmp5 STACK_BIAS-0x58
+#define dtmp6 STACK_BIAS-0x50
+#define dtmp7 STACK_BIAS-0x48
+#define dtmp8 STACK_BIAS-0x40
+#define dtmp9 STACK_BIAS-0x38
+#define dtmp10 STACK_BIAS-0x30
+#define dtmp11 STACK_BIAS-0x28
+#define dtmp12 STACK_BIAS-0x20
+#define dtmp13 STACK_BIAS-0x18
+#define dtmp14 STACK_BIAS-0x10
+#define dtmp15 STACK_BIAS-0x08
+
+#define ftmp0 STACK_BIAS-0x100
+#define tmp_px STACK_BIAS-0x98
+#define tmp_py STACK_BIAS-0x90
+#define tmp_counter STACK_BIAS-0x88
+
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x100
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! !!!!! algorithm !!!!!
+! hx0 = *(int*)px;
+! hy0 = *(int*)py;
+!
+! ((float*)&x0)[0] = ((float*)px)[0];
+! ((float*)&x0)[1] = ((float*)px)[1];
+! ((float*)&y0)[0] = ((float*)py)[0];
+! ((float*)&y0)[1] = ((float*)py)[1];
+!
+! hx0 &= 0x7fffffff;
+! hy0 &= 0x7fffffff;
+!
+! diff0 = hy0 - hx0;
+! j0 = diff0 >> 31;
+! j0 &= diff0;
+! j0 = hy0 - j0;
+! j0 &= 0x7ff00000;
+!
+! j0 = 0x7ff00000 - j0;
+! ll = (long long)j0 << 32;
+! *(long long*)&scl0 = ll;
+!
+! if ( hx0 >= 0x7ff00000 || hy0 >= 0x7ff00000 )
+! {
+! lx = ((int*)px)[1];
+! ly = ((int*)py)[1];
+!
+! if ( hx0 == 0x7ff00000 && lx == 0 ) res0 = 0.0;
+! else if ( hy0 == 0x7ff00000 && ly == 0 ) res0 = 0.0;
+! else res0 = fabs(x0) * fabs(y0);
+!
+! ((float*)pz)[0] = ((float*)&res0)[0];
+! ((float*)pz)[1] = ((float*)&res0)[1];
+!
+! px += stridex;
+! py += stridey;
+! pz += stridez;
+! continue;
+! }
+! if ( hx0 < 0x00100000 && hy0 < 0x00100000 )
+! {
+! lx = ((int*)px)[1];
+! ly = ((int*)py)[1];
+! ii = hx0 | hy0;
+! ii |= lx;
+! ii |= ly;
+! if ( ii == 0 )
+! {
+! res0 = 1.0 / 0.0;
+! ((float*)pz)[0] = ((float*)&res0)[0];
+! ((float*)pz)[1] = ((float*)&res0)[1];
+!
+! px += stridex;
+! py += stridey;
+! pz += stridez;
+! continue;
+! }
+! x0 = fabs(x0);
+! y0 = fabs(y0);
+! if ( hx0 < 0x00080000 )
+! {
+! x0 = *(long long*)&x0;
+! }
+! else
+! {
+! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL;
+! x0 = vis_fand(x0, dtmp0);
+! x0 = *(long long*)&x0;
+! x0 += D2ON51;
+! }
+! x0 *= D2ONM52;
+! if ( hy0 < 0x00080000 )
+! {
+! y0 = *(long long*)&y0;
+! }
+! else
+! {
+! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL;
+! y0 = vis_fand(y0, dtmp0);
+! y0 = *(long long*)&y0;
+! y0 += D2ON51;
+! }
+! y0 *= D2ONM52;
+! *(long long*)&scl0 = 0x7fd0000000000000ULL;
+! }
+! else
+! {
+! x0 *= scl0;
+! y0 *= scl0;
+! }
+!
+! x_hi0 = x0 + D2ON36;
+! y_hi0 = y0 + D2ON36;
+! x_hi0 -= D2ON36;
+! y_hi0 -= D2ON36;
+! x_lo0 = x0 - x_hi0;
+! y_lo0 = y0 - y_hi0;
+! res0_hi = x_hi0 * x_hi0;
+! dtmp0 = y_hi0 * y_hi0;
+! res0_hi += dtmp0;
+! res0_lo = x0 + x_hi0;
+! res0_lo *= x_lo0;
+! dtmp1 = y0 + y_hi0;
+! dtmp1 *= y_lo0;
+! res0_lo += dtmp1;
+!
+! dres = res0_hi + res0_lo;
+! dexp0 = vis_fand(dres,DA1);
+! iarr = ((int*)&dres)[0];
+!
+! iarr >>= 11;
+! iarr &= 0x1fc;
+! dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+! dd = vis_fpsub32(dtmp0, dexp0);
+!
+! dtmp0 = dd * dres;
+! dtmp0 = DTWO - dtmp0;
+! dd *= dtmp0;
+! dtmp1 = dd * dres;
+! dtmp1 = DTWO - dtmp1;
+! dd *= dtmp1;
+! dtmp2 = dd * dres;
+! dtmp2 = DTWO - dtmp2;
+! dres = dd * dtmp2;
+!
+! res0 = vis_fand(dres,DA0);
+!
+! dtmp0 = res0_hi * res0;
+! dtmp0 = DONE - dtmp0;
+! dtmp1 = res0_lo * res0;
+! dtmp0 -= dtmp1;
+! dtmp0 *= dres;
+! res0 += dtmp0;
+!
+! res0 = sqrt ( res0 );
+!
+! res0 = scl0 * res0;
+!
+! ((float*)pz)[0] = ((float*)&res0)[0];
+! ((float*)pz)[1] = ((float*)&res0)[1];
+!
+! px += stridex;
+! py += stridey;
+! pz += stridez;
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ ENTRY(__vrhypot)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,.CONST_TBL,l1)
+ wr %g0,0x82,%asi
+
+#ifdef __sparcv9
+ ldx [%fp+STACK_BIAS+176],stridez
+#else
+ ld [%fp+STACK_BIAS+92],stridez
+#endif
+
+ sll %i2,3,stridex
+ sethi %hi(0x7ff00000),_0x7ff00000
+ st %i0,[%fp+tmp_counter]
+
+ sll %i4,3,stridey
+ sethi %hi(0x00100000),_0x00100000
+ stx %i1,[%fp+tmp_px]
+
+ sll stridez,3,stridez
+ sethi %hi(0x7ffffc00),_0x7fffffff
+ stx %i3,[%fp+tmp_py]
+
+ ldd [TBL+TBL_SHIFT],D2ON36
+ add _0x7fffffff,1023,_0x7fffffff
+
+ ldd [TBL+TBL_SHIFT+8],DA0
+
+ ldd [TBL+TBL_SHIFT+16],DA1
+
+ ldd [TBL+TBL_SHIFT+24],DONE
+
+ ldd [TBL+TBL_SHIFT+32],DTWO
+
+.begin:
+ ld [%fp+tmp_counter],counter
+ ldx [%fp+tmp_px],%i4
+ ldx [%fp+tmp_py],%i3
+ st %g0,[%fp+tmp_counter]
+.begin1:
+ cmp counter,0
+ ble,pn %icc,.exit
+
+ lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px;
+ add %i4,stridex,%i1
+
+ lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py;
+ add %i3,stridey,%i0 ! py += stridey
+
+ and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff;
+
+ cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000
+ bge,pn %icc,.spec0 ! (7_0) if ( hx0 >= 0x7ff00000 )
+ and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff;
+
+ cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000
+ bge,pn %icc,.spec0 ! (7_0) if ( hy0 >= 0x7ff00000 )
+ sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0;
+
+ sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31;
+ cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000
+ bl,pn %icc,.spec1 ! (7_0) if ( hx0 < 0x00100000 )
+
+ and %o1,%o3,%o1 ! (7_0) j0 &= diff0;
+.cont_spec0:
+ sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0;
+
+ and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000;
+
+ sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0;
+
+ sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32;
+
+ stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll;
+
+ stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll;
+.cont_spec1:
+ lda [%i1]0x82,%o1 ! (0_0) hx0 = *(int*)px;
+ mov %i1,%i2
+
+ lda [%i0]0x82,%o4 ! (0_0) hy0 = *(int*)py;
+
+ and %o1,_0x7fffffff,%o7 ! (0_0) hx0 &= 0x7fffffff;
+ mov %i0,%o0
+
+ cmp %o7,_0x7ff00000 ! (0_0) hx0 ? 0x7ff00000
+ bge,pn %icc,.update0 ! (0_0) if ( hx0 >= 0x7ff00000 )
+ and %o4,_0x7fffffff,%l7 ! (0_0) hy0 &= 0x7fffffff;
+
+ cmp %l7,_0x7ff00000 ! (0_0) hy0 ? 0x7ff00000
+ sub %l7,%o7,%o1 ! (0_0) diff0 = hy0 - hx0;
+ bge,pn %icc,.update0 ! (0_0) if ( hy0 >= 0x7ff00000 )
+ sra %o1,31,%o3 ! (0_0) j0 = diff0 >> 31;
+
+ cmp %o7,_0x00100000 ! (0_0) hx0 ? 0x00100000
+
+ and %o1,%o3,%o1 ! (0_0) j0 &= diff0;
+ bl,pn %icc,.update1 ! (0_0) if ( hx0 < 0x00100000 )
+ sub %l7,%o1,%o4 ! (0_0) j0 = hy0 - j0;
+.cont0:
+ and %o4,%l0,%o4 ! (0_0) j0 &= 0x7ff00000;
+
+ sub %l0,%o4,%o4 ! (0_0) j0 = 0x7ff00000 - j0;
+.cont1:
+ sllx %o4,32,%o4 ! (0_0) ll = (long long)j0 << 32;
+ stx %o4,[%fp+dtmp1] ! (0_0) *(long long*)&scl0 = ll;
+
+ ldd [%fp+dtmp15],%f62 ! (7_1) *(long long*)&scl0 = ll;
+
+ lda [%i4]%asi,%f10 ! (7_1) ((float*)&x0)[0] = ((float*)px)[0];
+
+ lda [%i4+4]%asi,%f11 ! (7_1) ((float*)&x0)[1] = ((float*)px)[1];
+
+ lda [%i3]%asi,%f12 ! (7_1) ((float*)&y0)[0] = ((float*)py)[0];
+
+ add %i1,stridex,%i4 ! px += stridex
+ lda [%i3+4]%asi,%f13 ! (7_1) ((float*)&y0)[1] = ((float*)py)[1];
+
+ fmuld %f10,%f62,%f10 ! (7_1) x0 *= scl0;
+ add %i4,stridex,%i1 ! px += stridex
+
+ fmuld %f12,%f62,%f60 ! (7_1) y0 *= scl0;
+
+ lda [%i4]0x82,%o1 ! (1_0) hx0 = *(int*)px;
+
+ add %i0,stridey,%i3 ! py += stridey
+ faddd %f10,D2ON36,%f46 ! (7_1) x_hi0 = x0 + D2ON36;
+
+ lda [%i3]0x82,%g1 ! (1_0) hy0 = *(int*)py;
+ add %i3,stridey,%i0 ! py += stridey
+ faddd %f60,D2ON36,%f50 ! (7_1) y_hi0 = y0 + D2ON36;
+
+ and %o1,_0x7fffffff,%o7 ! (1_0) hx0 &= 0x7fffffff;
+
+ cmp %o7,_0x7ff00000 ! (1_0) hx0 ? 0x7ff00000
+ stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll;
+
+ and %g1,_0x7fffffff,%l7 ! (1_0) hy0 &= 0x7fffffff;
+ bge,pn %icc,.update2 ! (1_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f46,D2ON36,%f20 ! (7_1) x_hi0 -= D2ON36;
+
+ cmp %l7,_0x7ff00000 ! (1_0) hy0 ? 0x7ff00000
+ sub %l7,%o7,%o1 ! (1_0) diff0 = hy0 - hx0;
+ bge,pn %icc,.update3 ! (1_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36;
+
+ sra %o1,31,%o3 ! (1_0) j0 = diff0 >> 31;
+
+ and %o1,%o3,%o1 ! (1_0) j0 &= diff0;
+
+ fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0;
+ sub %l7,%o1,%o4 ! (1_0) j0 = hy0 - j0;
+ cmp %o7,_0x00100000 ! (1_0) hx0 ? 0x00100000
+ fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (1_0) j0 &= 0x7ff00000;
+ bl,pn %icc,.update4 ! (1_0) if ( hx0 < 0x00100000 )
+ faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0;
+
+ sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0;
+.cont4:
+ sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32;
+ stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0;
+
+ fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0;
+
+ fmuld %f62,%f0,%f0 ! (7_1) res0_lo *= x_lo0;
+ ldd [%fp+dtmp1],%f62 ! (0_0) *(long long*)&scl0 = ll;
+ faddd %f2,%f46,%f44 ! (7_1) res0_hi += dtmp0;
+
+ lda [%i2]%asi,%f10 ! (0_0) ((float*)&x0)[0] = ((float*)px)[0];
+
+ lda [%i2+4]%asi,%f11 ! (0_0) ((float*)&x0)[1] = ((float*)px)[1];
+
+ fmuld %f50,%f12,%f26 ! (7_1) dtmp1 *= y_lo0;
+ lda [%o0]%asi,%f12 ! (0_0) ((float*)&y0)[0] = ((float*)py)[0];
+
+ lda [%o0+4]%asi,%f13 ! (0_0) ((float*)&y0)[1] = ((float*)py)[1];
+
+ fmuld %f10,%f62,%f10 ! (0_0) x0 *= scl0;
+
+ fmuld %f12,%f62,%f60 ! (0_0) y0 *= scl0;
+ faddd %f0,%f26,%f38 ! (7_1) res0_lo += dtmp1;
+
+ lda [%i1]0x82,%o1 ! (2_0) hx0 = *(int*)px;
+ mov %i1,%i2
+
+ faddd %f10,D2ON36,%f46 ! (0_0) x_hi0 = x0 + D2ON36;
+
+ lda [%i0]0x82,%g1 ! (2_0) hy0 = *(int*)py;
+ mov %i0,%o0
+ faddd %f60,D2ON36,%f12 ! (0_0) y_hi0 = y0 + D2ON36;
+
+ faddd %f44,%f38,%f14 ! (7_1) dres = res0_hi + res0_lo;
+ and %o1,_0x7fffffff,%o7 ! (2_0) hx0 &= 0x7fffffff;
+
+ cmp %o7,_0x7ff00000 ! (2_0) hx0 ? 0x7ff00000
+ bge,pn %icc,.update5 ! (2_0) if ( hx0 >= 0x7ff00000 )
+ stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll;
+
+ and %g1,_0x7fffffff,%l7 ! (2_0) hx0 &= 0x7fffffff;
+ st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0];
+ fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (2_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (2_0) hy0 ? 0x7ff00000
+ bge,pn %icc,.update6 ! (2_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36;
+
+ sra %o1,31,%o3 ! (2_0) j0 = diff0 >> 31;
+
+ and %o1,%o3,%o1 ! (2_0) j0 &= diff0;
+
+ fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0;
+ cmp %o7,_0x00100000 ! (2_0) hx0 ? 0x00100000
+ sub %l7,%o1,%o4 ! (2_0) j0 = hy0 - j0;
+ fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (2_0) j0 &= 0x7ff00000;
+ bl,pn %icc,.update7 ! (2_0) if ( hx0 < 0x00100000 )
+ faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0;
+.cont7:
+ sub %l0,%o4,%g1 ! (2_0) j0 = 0x7ff00000 - j0;
+
+ sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32;
+.cont8:
+ stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0;
+
+ fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0;
+
+ fmuld %f62,%f0,%f0 ! (0_0) res0_lo *= x_lo0;
+ ldd [%fp+dtmp3],%f62 ! (1_0) *(long long*)&scl0 = ll;
+ faddd %f2,%f46,%f32 ! (0_0) res0_hi += dtmp0;
+
+ lda [%i4]%asi,%f10 ! (1_0) ((float*)&x0)[0] = ((float*)px)[0];
+
+ lda [%i4+4]%asi,%f11 ! (1_0) ((float*)&x0)[1] = ((float*)px)[1];
+
+ fmuld %f50,%f12,%f28 ! (0_0) dtmp1 *= y_lo0;
+ lda [%i3]%asi,%f12 ! (1_0) ((float*)&y0)[0] = ((float*)py)[0];
+
+ add %i1,stridex,%i4 ! px += stridex
+ lda [%i3+4]%asi,%f13 ! (1_0) ((float*)&y0)[1] = ((float*)py)[1];
+
+ ld [%fp+ftmp0],%o2 ! (7_1) iarr = ((int*)&dres)[0];
+ add %i4,stridex,%i1 ! px += stridex
+ fand %f14,DA1,%f2 ! (7_1) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (1_0) x0 *= scl0;
+
+ fmuld %f12,%f62,%f60 ! (1_0) y0 *= scl0;
+ sra %o2,11,%i3 ! (7_1) iarr >>= 11;
+ faddd %f0,%f28,%f36 ! (0_0) res0_lo += dtmp1;
+
+ and %i3,0x1fc,%i3 ! (7_1) iarr &= 0x1fc;
+
+ add %i3,TBL,%o4 ! (7_1) (char*)dll1 + iarr
+ lda [%i4]0x82,%o1 ! (3_0) hx0 = *(int*)px;
+
+ add %i0,stridey,%i3 ! py += stridey
+ ld [%o4],%f26 ! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f10,D2ON36,%f46 ! (1_0) x_hi0 = x0 + D2ON36;
+
+ lda [%i3]0x82,%o4 ! (3_0) hy0 = *(int*)py;
+ add %i3,stridey,%i0 ! py += stridey
+ faddd %f60,D2ON36,%f12 ! (1_0) y_hi0 = y0 + D2ON36;
+
+ faddd %f32,%f36,%f22 ! (0_0) dres = res0_hi + res0_lo;
+ and %o1,_0x7fffffff,%o7 ! (3_0) hx0 &= 0x7fffffff;
+
+ cmp %o7,_0x7ff00000 ! (3_0) hx0 ? 0x7ff00000
+ stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll;
+ bge,pn %icc,.update9 ! (3_0) if ( hx0 >= 0x7ff00000 )
+ fpsub32 %f26,%f2,%f26 ! (7_1) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %o4,_0x7fffffff,%l7 ! (3_0) hy0 &= 0x7fffffff;
+ st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0];
+ fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (3_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (3_0) hy0 ? 0x7ff00000
+ bge,pn %icc,.update10 ! (3_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36;
+
+ fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (3_0) j0 = diff0 >> 31;
+
+ and %o1,%o3,%o1 ! (3_0) j0 &= diff0;
+
+ fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0;
+ cmp %o7,_0x00100000 ! (3_0) hx0 ? 0x00100000
+ sub %l7,%o1,%o4 ! (3_0) j0 = hy0 - j0;
+ fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (3_0) j0 &= 0x7ff00000;
+ bl,pn %icc,.update11 ! (3_0) if ( hx0 < 0x00100000 )
+ faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0;
+.cont11:
+ sub %l0,%o4,%g1 ! (3_0) j0 = 0x7ff00000 - j0;
+ fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
+.cont12:
+ sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0;
+
+ fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0
+
+ fmuld %f62,%f0,%f0 ! (1_0) res0_lo *= x_lo0;
+ ldd [%fp+dtmp5],%f62 ! (2_0) *(long long*)&scl0 = ll;
+ faddd %f2,%f46,%f42 ! (1_0) res0_hi += dtmp0;
+
+ lda [%i2]%asi,%f10 ! (2_0) ((float*)&x0)[0] = ((float*)px)[0];
+ fmuld %f26,%f20,%f54 ! (7_1) dd *= dtmp0;
+
+ lda [%i2+4]%asi,%f11 ! (2_0) ((float*)&x0)[1] = ((float*)px)[1];
+
+ fmuld %f50,%f12,%f26 ! (1_0) dtmp1 *= y_lo0;
+ lda [%o0]%asi,%f12 ! (2_0) ((float*)&y0)[0] = ((float*)py)[0];
+
+ lda [%o0+4]%asi,%f13 ! (2_0) ((float*)&y0)[1] = ((float*)py)[1];
+
+ fmuld %f54,%f14,%f50 ! (7_1) dtmp1 = dd * dres;
+ ld [%fp+ftmp0],%o2 ! (0_0) iarr = ((int*)&dres)[0];
+ fand %f22,DA1,%f2 ! (0_0) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (2_0) x0 *= scl0;
+
+ fmuld %f12,%f62,%f60 ! (2_0) y0 *= scl0;
+ sra %o2,11,%o4 ! (0_0) iarr >>= 11;
+ faddd %f0,%f26,%f34 ! (1_0) res0_lo += dtmp1;
+
+ and %o4,0x1fc,%o4 ! (0_0) iarr &= 0x1fc;
+
+ add %o4,TBL,%o4 ! (0_0) (char*)dll1 + iarr
+ mov %i1,%i2
+ lda [%i1]0x82,%o1 ! (4_0) hx0 = *(int*)px;
+ fsubd DTWO,%f50,%f20 ! (7_1) dtmp1 = DTWO - dtmp1;
+
+ ld [%o4],%f28 ! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f10,D2ON36,%f46 ! (2_0) x_hi0 = x0 + D2ON36;
+
+ lda [%i0]0x82,%o4 ! (4_0) hy0 = *(int*)py;
+ mov %i0,%o0
+ faddd %f60,D2ON36,%f50 ! (2_0) y_hi0 = y0 + D2ON36;
+
+ and %o1,_0x7fffffff,%o7 ! (4_0) hx0 &= 0x7fffffff;
+ faddd %f42,%f34,%f18 ! (1_0) dres = res0_hi + res0_lo;
+
+ fmuld %f54,%f20,%f16 ! (7_1) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (4_0) hx0 ? 0x7ff00000
+ stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll;
+ fpsub32 %f28,%f2,%f28 ! (0_0) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %o4,_0x7fffffff,%l7 ! (4_0) hy0 &= 0x7fffffff;
+ bge,pn %icc,.update13 ! (4_0) if ( hx0 >= 0x7ff00000 )
+ st %f18,[%fp+ftmp0] ! (1_0) iarr = ((int*)&dres)[0];
+ fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (4_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (4_0) hy0 ? 0x7ff00000
+ bge,pn %icc,.update14 ! (4_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36;
+
+ fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (4_0) j0 = diff0 >> 31;
+
+ and %o1,%o3,%o1 ! (4_0) j0 &= diff0;
+
+ fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0;
+ sub %l7,%o1,%o4 ! (4_0) j0 = hy0 - j0;
+ cmp %o7,_0x00100000 ! (4_0) hx0 ? 0x00100000
+ fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (4_0) j0 &= 0x7ff00000;
+ bl,pn %icc,.update15 ! (4_0) if ( hx0 < 0x00100000 )
+ faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0;
+.cont15:
+ sub %l0,%o4,%g1 ! (4_0) j0 = 0x7ff00000 - j0;
+ fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
+.cont16:
+ fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres;
+ sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0;
+
+ fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0;
+
+ fmuld %f62,%f0,%f0 ! (2_0) res0_lo *= x_lo0;
+ ldd [%fp+dtmp7],%f62 ! (3_0) *(long long*)&scl0 = ll;
+ faddd %f2,%f46,%f30 ! (2_0) res0_hi += dtmp0;
+
+ lda [%i4]%asi,%f10 ! (3_0) ((float*)&x0)[0] = ((float*)px)[0];
+ fmuld %f28,%f20,%f54 ! (0_0) dd *= dtmp0;
+
+ lda [%i4+4]%asi,%f11 ! (3_0) ((float*)&x0)[1] = ((float*)px)[1];
+
+ fmuld %f50,%f12,%f28 ! (2_0) dtmp1 *= y_lo0;
+ lda [%i3]%asi,%f12 ! (3_0) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f14,%f20 ! (7_1) dtmp2 = DTWO - dtmp2;
+
+ lda [%i3+4]%asi,%f13 ! (3_0) ((float*)&y0)[1] = ((float*)py)[1];
+ add %i1,stridex,%i4 ! px += stridex
+
+ fmuld %f54,%f22,%f50 ! (0_0) dtmp1 = dd * dres;
+ ld [%fp+ftmp0],%o2 ! (1_0) iarr = ((int*)&dres)[0];
+ add %i4,stridex,%i1 ! px += stridex
+ fand %f18,DA1,%f2 ! (1_0) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (3_0) x0 *= scl0;
+
+ fmuld %f12,%f62,%f60 ! (3_0) y0 *= scl0;
+ sra %o2,11,%i3 ! (1_0) iarr >>= 11;
+ faddd %f0,%f28,%f40 ! (2_0) res0_lo += dtmp1;
+
+ and %i3,0x1fc,%i3 ! (1_0) iarr &= 0x1fc;
+ fmuld %f16,%f20,%f28 ! (7_1) dres = dd * dtmp2;
+
+ add %i3,TBL,%o4 ! (1_0) (char*)dll1 + iarr
+ lda [%i4]0x82,%o1 ! (5_0) hx0 = *(int*)px;
+ fsubd DTWO,%f50,%f20 ! (0_0) dtmp1 = DTWO - dtmp1;
+
+ add %i0,stridey,%i3 ! py += stridey
+ ld [%o4],%f26 ! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f10,D2ON36,%f46 ! (3_0) x_hi0 = x0 + D2ON36;
+
+ lda [%i3]0x82,%o4 ! (5_0) hy0 = *(int*)py;
+ add %i3,stridey,%i0 ! py += stridey
+ faddd %f60,D2ON36,%f50 ! (3_0) y_hi0 = y0 + D2ON36;
+
+ and %o1,_0x7fffffff,%o7 ! (5_0) hx0 &= 0x7fffffff;
+ faddd %f30,%f40,%f14 ! (2_0) dres = res0_hi + res0_lo;
+
+ fmuld %f54,%f20,%f24 ! (0_0) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (5_0) hx0 ? 0x7ff00000
+ stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll;
+ fpsub32 %f26,%f2,%f26 ! (1_0) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %o4,_0x7fffffff,%l7 ! (5_0) hy0 &= 0x7fffffff;
+ st %f14,[%fp+ftmp0] ! (2_0) iarr = ((int*)&dres)[0];
+ bge,pn %icc,.update17 ! (5_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f46,D2ON36,%f20 ! (3_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (5_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (5_0) hy0 ? 0x7ff00000
+ bge,pn %icc,.update18 ! (5_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36;
+
+ fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (5_0) j0 = diff0 >> 31;
+
+ and %o1,%o3,%o1 ! (5_0) j0 &= diff0;
+ fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0;
+ sub %l7,%o1,%o4 ! (5_0) j0 = hy0 - j0;
+ cmp %o7,_0x00100000 ! (5_0) hx0 ? 0x00100000
+ fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (5_0) j0 &= 0x7ff00000;
+ bl,pn %icc,.update19 ! (5_0) if ( hx0 < 0x00100000 )
+ faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0;
+.cont19a:
+ fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
+ sub %l0,%o4,%g1 ! (5_0) j0 = 0x7ff00000 - j0;
+ fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
+.cont19b:
+ fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres;
+ sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0;
+ fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0;
+.cont20:
+ fmuld %f62,%f0,%f0 ! (3_0) res0_lo *= x_lo0;
+ ldd [%fp+dtmp9],%f62 ! (4_0) *(long long*)&scl0 = ll;
+ faddd %f2,%f46,%f44 ! (3_0) res0_hi += dtmp0;
+
+ fsubd DONE,%f10,%f60 ! (7_1) dtmp0 = DONE - dtmp0;
+ lda [%i2]%asi,%f10 ! (4_0) ((float*)&x0)[0] = ((float*)px)[0];
+ fmuld %f26,%f20,%f54 ! (1_0) dd *= dtmp0;
+
+ lda [%i2+4]%asi,%f11 ! (4_0) ((float*)&x0)[1] = ((float*)px)[1];
+
+ fmuld %f50,%f12,%f26 ! (3_0) dtmp1 *= y_lo0;
+ lda [%o0]%asi,%f12 ! (4_0) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f22,%f20 ! (0_0) dtmp2 = DTWO - dtmp2;
+
+ lda [%o0+4]%asi,%f13 ! (4_0) ((float*)&y0)[1] = ((float*)py)[1];
+
+ fmuld %f54,%f18,%f50 ! (1_0) dtmp1 = dd * dres;
+ ld [%fp+ftmp0],%o2 ! (2_0) iarr = ((int*)&dres)[0];
+ fand %f14,DA1,%f2 ! (2_0) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (4_0) x0 *= scl0;
+ fsubd %f60,%f38,%f46 ! (7_1) dtmp0 -= dtmp1;
+
+ fmuld %f12,%f62,%f60 ! (4_0) y0 *= scl0;
+ sra %o2,11,%o4 ! (2_0) iarr >>= 11;
+ faddd %f0,%f26,%f38 ! (3_0) res0_lo += dtmp1;
+
+ and %o4,0x1fc,%o4 ! (2_0) iarr &= 0x1fc;
+ fmuld %f24,%f20,%f26 ! (0_0) dres = dd * dtmp2;
+
+ add %o4,TBL,%o4 ! (2_0) (char*)dll1 + iarr
+ mov %i1,%i2
+ lda [%i1]0x82,%o1 ! (6_0) hx0 = *(int*)px;
+ fsubd DTWO,%f50,%f52 ! (1_0) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f46,%f28,%f28 ! (7_1) dtmp0 *= dres;
+ ld [%o4],%f20 ! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f10,D2ON36,%f46 ! (4_0) x_hi0 = x0 + D2ON36;
+
+ lda [%i0]0x82,%o4 ! (6_0) hy0 = *(int*)py;
+ mov %i0,%o0
+ faddd %f60,D2ON36,%f50 ! (4_0) y_hi0 = y0 + D2ON36;
+
+ and %o1,_0x7fffffff,%o7 ! (6_0) hx0 &= 0x7fffffff;
+ faddd %f44,%f38,%f22 ! (3_0) dres = res0_hi + res0_lo;
+
+ fmuld %f54,%f52,%f16 ! (1_0) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (6_0) hx0 ? 0x7ff00000
+ stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll;
+ fpsub32 %f20,%f2,%f52 ! (2_0) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %o4,_0x7fffffff,%l7 ! (6_0) hy0 &= 0x7fffffff;
+ st %f22,[%fp+ftmp0] ! (3_0) iarr = ((int*)&dres)[0];
+ bge,pn %icc,.update21 ! (6_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f46,D2ON36,%f46 ! (4_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (6_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (6_0) hy0 ? 0x7ff00000
+ bge,pn %icc,.update22 ! (6_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36;
+
+ fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (6_0) j0 = diff0 >> 31;
+ faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0;
+
+ and %o1,%o3,%o1 ! (6_0) j0 &= diff0;
+ fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0);
+
+ fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0;
+ sub %l7,%o1,%o4 ! (6_0) j0 = hy0 - j0;
+ cmp %o7,_0x00100000 ! (6_0) hx0 ? 0x00100000
+ fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (6_0) j0 &= 0x7ff00000;
+ bl,pn %icc,.update23 ! (6_0) if ( hx0 < 0x00100000 )
+ faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0;
+.cont23a:
+ fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
+ sub %l0,%o4,%g1 ! (6_0) j0 = 0x7ff00000 - j0;
+ fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
+.cont23b:
+ fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0;
+ sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0;
+ fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0;
+.cont24:
+ fmuld %f62,%f2,%f2 ! (4_0) res0_lo *= x_lo0;
+ ldd [%fp+dtmp11],%f62 ! (5_0) *(long long*)&scl0 = ll;
+ faddd %f0,%f20,%f32 ! (4_0) res0_hi += dtmp0;
+
+ lda [%i4]%asi,%f0 ! (5_0) ((float*)&x0)[0] = ((float*)px)[0];
+ fmuld %f52,%f10,%f10 ! (2_0) dd *= dtmp0;
+
+ lda [%i4+4]%asi,%f1 ! (5_0) ((float*)&x0)[1] = ((float*)px)[1];
+ fsubd DONE,%f50,%f52 ! (0_0) dtmp0 = DONE - dtmp0;
+
+ fmuld %f46,%f60,%f46 ! (4_0) dtmp1 *= y_lo0;
+ lda [%i3]%asi,%f12 ! (5_0) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f18,%f18 ! (1_0) dtmp2 = DTWO - dtmp2;
+
+ add %i1,stridex,%i4 ! px += stridex
+ lda [%i3+4]%asi,%f13 ! (5_0) ((float*)&y0)[1] = ((float*)py)[1];
+
+ fmuld %f10,%f14,%f50 ! (2_0) dtmp1 = dd * dres;
+ add %i4,stridex,%i1 ! px += stridex
+ ld [%fp+ftmp0],%o2 ! (3_0) iarr = ((int*)&dres)[0];
+ fand %f22,DA1,%f54 ! (3_0) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f0,%f62,%f60 ! (5_0) x0 *= scl0;
+ fsubd %f52,%f36,%f20 ! (0_0) dtmp0 -= dtmp1;
+
+ fmuld %f12,%f62,%f52 ! (5_0) y0 *= scl0;
+ sra %o2,11,%i3 ! (3_0) iarr >>= 11;
+ faddd %f2,%f46,%f36 ! (4_0) res0_lo += dtmp1;
+
+ and %i3,0x1fc,%i3 ! (3_0) iarr &= 0x1fc;
+ fmuld %f16,%f18,%f16 ! (1_0) dres = dd * dtmp2;
+
+ fsqrtd %f48,%f18 ! (7_1) res0 = sqrt ( res0 );
+ add %i3,TBL,%o4 ! (3_0) (char*)dll1 + iarr
+ lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px;
+ fsubd DTWO,%f50,%f46 ! (2_0) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f20,%f26,%f48 ! (0_0) dtmp0 *= dres;
+ add %i0,stridey,%i3 ! py += stridey
+ ld [%o4],%f20 ! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f60,D2ON36,%f50 ! (5_0) x_hi0 = x0 + D2ON36;
+
+ lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py;
+ add %i3,stridey,%i0 ! py += stridey
+ faddd %f52,D2ON36,%f12 ! (5_0) y_hi0 = y0 + D2ON36;
+
+ and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff;
+ faddd %f32,%f36,%f24 ! (4_0) dres = res0_hi + res0_lo;
+
+ fmuld %f10,%f46,%f26 ! (2_0) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000
+ stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll;
+ fpsub32 %f20,%f54,%f10 ! (3_0) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff;
+ st %f24,[%fp+ftmp0] ! (4_0) iarr = ((int*)&dres)[0];
+ bge,pn %icc,.update25 ! (7_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f50,D2ON36,%f20 ! (5_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000
+ bge,pn %icc,.update26 ! (7_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36;
+
+ fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31;
+ faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0;
+
+ and %o1,%o3,%o1 ! (7_0) j0 &= diff0;
+ fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0;
+ sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0;
+ cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000
+ fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000;
+ bl,pn %icc,.update27 ! (7_0) if ( hx0 < 0x00100000 )
+ faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0;
+.cont27a:
+ fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
+ sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0;
+ fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
+.cont27b:
+ fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0;
+ sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll;
+ faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0;
+ fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0;
+.cont28:
+ fmuld %f62,%f2,%f2 ! (5_0) res0_lo *= x_lo0;
+ ldd [%fp+dtmp13],%f62 ! (6_0) *(long long*)&scl0 = ll;
+ faddd %f0,%f46,%f42 ! (5_0) res0_hi += dtmp0;
+
+ fmuld %f10,%f20,%f52 ! (3_0) dd *= dtmp0;
+ lda [%i2]%asi,%f10 ! (6_0) ((float*)&x0)[0] = ((float*)px)[0];
+
+ lda [%i2+4]%asi,%f11 ! (6_0) ((float*)&x0)[1] = ((float*)px)[1];
+ fsubd DONE,%f60,%f60 ! (1_0) dtmp0 = DONE - dtmp0;
+
+ fmuld %f50,%f54,%f46 ! (5_0) dtmp1 *= y_lo0;
+ lda [%o0]%asi,%f12 ! (6_0) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f14,%f14 ! (2_0) dtmp2 = DTWO - dtmp2;
+
+ lda [%o0+4]%asi,%f13 ! (6_0) ((float*)&y0)[1] = ((float*)py)[1];
+
+ fmuld %f52,%f22,%f50 ! (3_0) dtmp1 = dd * dres;
+ ld [%fp+ftmp0],%o2 ! (4_0) iarr = ((int*)&dres)[0];
+ fand %f24,DA1,%f54 ! (4_0) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (6_0) x0 *= scl0;
+ ldd [%fp+dtmp0],%f0 ! (7_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f34,%f20 ! (1_0) dtmp0 -= dtmp1;
+
+ fmuld %f12,%f62,%f60 ! (6_0) y0 *= scl0;
+ sra %o2,11,%o4 ! (4_0) iarr >>= 11;
+ faddd %f2,%f46,%f34 ! (5_0) res0_lo += dtmp1;
+
+ and %o4,0x1fc,%o4 ! (4_0) iarr &= 0x1fc;
+ fmuld %f26,%f14,%f26 ! (2_0) dres = dd * dtmp2;
+
+ cmp counter,8
+ bl,pn %icc,.tail
+ nop
+
+ ba .main_loop
+ sub counter,8,counter
+
+ .align 16
+.main_loop:
+ fsqrtd %f48,%f14 ! (0_1) res0 = sqrt ( res0 );
+ add %o4,TBL,%o4 ! (4_1) (char*)dll1 + iarr
+ lda [%i1]0x82,%o1 ! (0_0) hx0 = *(int*)px;
+ fsubd DTWO,%f50,%f46 ! (3_1) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f20,%f16,%f48 ! (1_1) dtmp0 *= dres;
+ mov %i1,%i2
+ ld [%o4],%f20 ! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f10,D2ON36,%f50 ! (6_1) x_hi0 = x0 + D2ON36;
+
+ nop
+ mov %i0,%o0
+ lda [%i0]0x82,%o4 ! (0_0) hy0 = *(int*)py;
+ faddd %f60,D2ON36,%f2 ! (6_1) y_hi0 = y0 + D2ON36;
+
+ faddd %f42,%f34,%f16 ! (5_1) dres = res0_hi + res0_lo;
+ and %o1,_0x7fffffff,%o7 ! (0_0) hx0 &= 0x7fffffff;
+ st %f16,[%fp+ftmp0] ! (5_1) iarr = ((int*)&dres)[0];
+ fmuld %f0,%f18,%f0 ! (7_2) res0 = scl0 * res0;
+
+ fmuld %f52,%f46,%f18 ! (3_1) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (0_0) hx0 ? 0x7ff00000
+ st %f0,[%i5] ! (7_2) ((float*)pz)[0] = ((float*)&res0)[0];
+ fpsub32 %f20,%f54,%f54 ! (4_1) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %o4,_0x7fffffff,%l7 ! (0_0) hy0 &= 0x7fffffff;
+ st %f1,[%i5+4] ! (7_2) ((float*)pz)[1] = ((float*)&res0)[1];
+ bge,pn %icc,.update29 ! (0_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f50,D2ON36,%f20 ! (6_1) x_hi0 -= D2ON36;
+
+ cmp %l7,_0x7ff00000 ! (0_0) hy0 ? 0x7ff00000
+ sub %l7,%o7,%o1 ! (0_0) diff0 = hy0 - hx0;
+ bge,pn %icc,.update30 ! (0_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f2,D2ON36,%f2 ! (6_1) y_hi0 -= D2ON36;
+
+ fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (0_0) j0 = diff0 >> 31;
+ stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll;
+ faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0;
+
+ and %o1,%o3,%o1 ! (0_0) j0 &= diff0;
+ cmp %o7,_0x00100000 ! (0_0) hx0 ? 0x00100000
+ bl,pn %icc,.update31 ! (0_0) if ( hx0 < 0x00100000 )
+ fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0);
+.cont31:
+ fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0;
+ sub %l7,%o1,%o4 ! (0_0) j0 = hy0 - j0;
+ nop
+ fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0;
+
+ fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ and %o4,%l0,%o4 ! (0_0) j0 &= 0x7ff00000;
+ faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0;
+
+ fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres;
+ sub %l0,%o4,%o4 ! (0_0) j0 = 0x7ff00000 - j0;
+ nop
+ fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0;
+.cont32:
+ fmuld %f30,%f48,%f12 ! (2_1) dtmp0 = res0_hi * res0;
+ sllx %o4,32,%o4 ! (0_0) ll = (long long)j0 << 32;
+ stx %o4,[%fp+dtmp1] ! (0_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f2,%f50 ! (6_1) dtmp1 = y0 + y_hi0;
+
+ fmuld %f40,%f48,%f40 ! (2_1) dtmp1 = res0_lo * res0;
+ nop
+ bn,pn %icc,.exit
+ fsubd %f60,%f2,%f2 ! (6_1) y_lo0 = y0 - y_hi0;
+
+ fmuld %f62,%f28,%f28 ! (6_1) res0_lo *= x_lo0;
+ nop
+ ldd [%fp+dtmp15],%f62 ! (7_1) *(long long*)&scl0 = ll;
+ faddd %f0,%f46,%f30 ! (6_1) res0_hi += dtmp0;
+
+ nop
+ nop
+ lda [%i4]%asi,%f10 ! (7_1) ((float*)&x0)[0] = ((float*)px)[0];
+ fmuld %f54,%f20,%f54 ! (4_1) dd *= dtmp0;
+
+ nop
+ nop
+ lda [%i4+4]%asi,%f11 ! (7_1) ((float*)&x0)[1] = ((float*)px)[1];
+ fsubd DONE,%f12,%f60 ! (2_1) dtmp0 = DONE - dtmp0;
+
+ fmuld %f50,%f2,%f46 ! (6_1) dtmp1 *= y_lo0;
+ nop
+ lda [%i3]%asi,%f12 ! (7_1) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f22,%f22 ! (3_1) dtmp2 = DTWO - dtmp2;
+
+ add %i1,stridex,%i4 ! px += stridex
+ nop
+ lda [%i3+4]%asi,%f13 ! (7_1) ((float*)&y0)[1] = ((float*)py)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f54,%f24,%f50 ! (4_1) dtmp1 = dd * dres;
+ add %i4,stridex,%i1 ! px += stridex
+ ld [%fp+ftmp0],%o2 ! (5_1) iarr = ((int*)&dres)[0];
+ fand %f16,DA1,%f2 ! (5_1) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (7_1) x0 *= scl0;
+ nop
+ ldd [%fp+dtmp2],%f0 ! (0_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f40,%f20 ! (2_1) dtmp0 -= dtmp1;
+
+ fmuld %f12,%f62,%f60 ! (7_1) y0 *= scl0;
+ sra %o2,11,%i3 ! (5_1) iarr >>= 11;
+ nop
+ faddd %f28,%f46,%f40 ! (6_1) res0_lo += dtmp1;
+
+ and %i3,0x1fc,%i3 ! (5_1) iarr &= 0x1fc;
+ nop
+ bn,pn %icc,.exit
+ fmuld %f18,%f22,%f28 ! (3_1) dres = dd * dtmp2;
+
+ fsqrtd %f52,%f22 ! (1_1) res0 = sqrt ( res0 );
+ lda [%i4]0x82,%o1 ! (1_0) hx0 = *(int*)px;
+ add %i3,TBL,%g1 ! (5_1) (char*)dll1 + iarr
+ fsubd DTWO,%f50,%f62 ! (4_1) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f20,%f26,%f52 ! (2_1) dtmp0 *= dres;
+ add %i0,stridey,%i3 ! py += stridey
+ ld [%g1],%f26 ! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f10,D2ON36,%f46 ! (7_1) x_hi0 = x0 + D2ON36;
+
+ nop
+ add %i3,stridey,%i0 ! py += stridey
+ lda [%i3]0x82,%g1 ! (1_0) hy0 = *(int*)py;
+ faddd %f60,D2ON36,%f50 ! (7_1) y_hi0 = y0 + D2ON36;
+
+ faddd %f30,%f40,%f18 ! (6_1) dres = res0_hi + res0_lo;
+ and %o1,_0x7fffffff,%o7 ! (1_0) hx0 &= 0x7fffffff;
+ st %f18,[%fp+ftmp0] ! (6_1) iarr = ((int*)&dres)[0];
+ fmuld %f0,%f14,%f0 ! (0_1) res0 = scl0 * res0;
+
+ fmuld %f54,%f62,%f14 ! (4_1) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (1_0) hx0 ? 0x7ff00000
+ st %f0,[%i5] ! (0_1) ((float*)pz)[0] = ((float*)&res0)[0];
+ fpsub32 %f26,%f2,%f26 ! (5_1) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %g1,_0x7fffffff,%l7 ! (1_0) hy0 &= 0x7fffffff;
+ nop
+ bge,pn %icc,.update33 ! (1_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f46,D2ON36,%f20 ! (7_1) x_hi0 -= D2ON36;
+
+ cmp %l7,_0x7ff00000 ! (1_0) hy0 ? 0x7ff00000
+ sub %l7,%o7,%o1 ! (1_0) diff0 = hy0 - hx0;
+ st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36;
+
+ fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (1_0) j0 = diff0 >> 31;
+ bge,pn %icc,.update34 ! (1_0) if ( hy0 >= 0x7ff00000 )
+ faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0;
+
+ and %o1,%o3,%o1 ! (1_0) j0 &= diff0;
+ add %i5,stridez,%i5 ! pz += stridez
+ stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll;
+ fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0;
+ sub %l7,%o1,%o4 ! (1_0) j0 = hy0 - j0;
+ cmp %o7,_0x00100000 ! (1_0) hx0 ? 0x00100000
+ fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (1_0) j0 &= 0x7ff00000;
+ bl,pn %icc,.update35 ! (1_0) if ( hx0 < 0x00100000 )
+ faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0;
+.cont35a:
+ fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0;
+ nop
+ sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0;
+ fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0;
+.cont35b:
+ fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres;
+ sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32;
+ stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0;
+
+ fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0;
+ nop
+ nop
+ fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0;
+.cont36:
+ fmuld %f62,%f0,%f0 ! (7_1) res0_lo *= x_lo0;
+ nop
+ ldd [%fp+dtmp1],%f62 ! (0_0) *(long long*)&scl0 = ll;
+ faddd %f2,%f46,%f44 ! (7_1) res0_hi += dtmp0;
+
+ fsubd DONE,%f10,%f60 ! (3_1) dtmp0 = DONE - dtmp0;
+ nop
+ lda [%i2]%asi,%f10 ! (0_0) ((float*)&x0)[0] = ((float*)px)[0];
+ fmuld %f26,%f20,%f54 ! (5_1) dd *= dtmp0;
+
+ nop
+ nop
+ lda [%i2+4]%asi,%f11 ! (0_0) ((float*)&x0)[1] = ((float*)px)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f50,%f12,%f26 ! (7_1) dtmp1 *= y_lo0;
+ nop
+ lda [%o0]%asi,%f12 ! (0_0) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f24,%f24 ! (4_1) dtmp2 = DTWO - dtmp2;
+
+ nop
+ nop
+ lda [%o0+4]%asi,%f13 ! (0_0) ((float*)&y0)[1] = ((float*)py)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f54,%f16,%f46 ! (5_1) dtmp1 = dd * dres;
+ nop
+ ld [%fp+ftmp0],%o2 ! (6_1) iarr = ((int*)&dres)[0];
+ fand %f18,DA1,%f2 ! (6_1) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (0_0) x0 *= scl0;
+ nop
+ ldd [%fp+dtmp4],%f50 ! (1_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f38,%f20 ! (3_1) dtmp0 -= dtmp1;
+
+ fmuld %f12,%f62,%f60 ! (0_0) y0 *= scl0;
+ sra %o2,11,%g1 ! (6_1) iarr >>= 11;
+ nop
+ faddd %f0,%f26,%f38 ! (7_1) res0_lo += dtmp1;
+
+ nop
+ and %g1,0x1fc,%g1 ! (6_1) iarr &= 0x1fc;
+ bn,pn %icc,.exit
+ fmuld %f14,%f24,%f26 ! (4_1) dres = dd * dtmp2;
+
+ fsqrtd %f52,%f24 ! (2_1) res0 = sqrt ( res0 );
+ lda [%i1]0x82,%o1 ! (2_0) hx0 = *(int*)px;
+ add %g1,TBL,%g1 ! (6_1) (char*)dll1 + iarr
+ fsubd DTWO,%f46,%f62 ! (5_1) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f20,%f28,%f52 ! (3_1) dtmp0 *= dres;
+ mov %i1,%i2
+ ld [%g1],%f28 ! (6_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f10,D2ON36,%f46 ! (0_0) x_hi0 = x0 + D2ON36;
+
+ nop
+ mov %i0,%o0
+ lda [%i0]0x82,%g1 ! (2_0) hy0 = *(int*)py;
+ faddd %f60,D2ON36,%f12 ! (0_0) y_hi0 = y0 + D2ON36;
+
+ faddd %f44,%f38,%f14 ! (7_1) dres = res0_hi + res0_lo;
+ and %o1,_0x7fffffff,%o7 ! (2_0) hx0 &= 0x7fffffff;
+ st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0];
+ fmuld %f50,%f22,%f0 ! (1_1) res0 = scl0 * res0;
+
+ fmuld %f54,%f62,%f22 ! (5_1) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (2_0) hx0 ? 0x7ff00000
+ st %f0,[%i5] ! (1_1) ((float*)pz)[0] = ((float*)&res0)[0];
+ fpsub32 %f28,%f2,%f28 ! (6_1) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %g1,_0x7fffffff,%l7 ! (2_0) hx0 &= 0x7fffffff;
+ nop
+ bge,pn %icc,.update37 ! (2_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (2_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (2_0) hy0 ? 0x7ff00000
+ st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36;
+
+ fmuld %f28,%f18,%f50 ! (6_1) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (2_0) j0 = diff0 >> 31;
+ bge,pn %icc,.update38 ! (2_0) if ( hy0 >= 0x7ff00000 )
+ faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0;
+
+ and %o1,%o3,%o1 ! (2_0) j0 &= diff0;
+ add %i5,stridez,%i5 ! pz += stridez
+ stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll;
+ fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0;
+ cmp %o7,_0x00100000 ! (2_0) hx0 ? 0x00100000
+ sub %l7,%o1,%o4 ! (2_0) j0 = hy0 - j0;
+ fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (2_0) j0 &= 0x7ff00000;
+ bl,pn %icc,.update39 ! (2_0) if ( hx0 < 0x00100000 )
+ faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0;
+.cont39a:
+ fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0;
+ sub %l0,%o4,%g1 ! (2_0) j0 = 0x7ff00000 - j0;
+ nop
+ fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0;
+.cont39b:
+ fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres;
+ sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0;
+ nop
+ nop
+ fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0;
+.cont40:
+ fmuld %f62,%f0,%f0 ! (0_0) res0_lo *= x_lo0;
+ nop
+ ldd [%fp+dtmp3],%f62 ! (1_0) *(long long*)&scl0 = ll;
+ faddd %f2,%f46,%f32 ! (0_0) res0_hi += dtmp0;
+
+ fsubd DONE,%f10,%f60 ! (4_1) dtmp0 = DONE - dtmp0;
+ nop
+ lda [%i4]%asi,%f10 ! (1_0) ((float*)&x0)[0] = ((float*)px)[0];
+ fmuld %f28,%f20,%f54 ! (6_1) dd *= dtmp0;
+
+ nop
+ nop
+ lda [%i4+4]%asi,%f11 ! (1_0) ((float*)&x0)[1] = ((float*)px)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f50,%f12,%f28 ! (0_0) dtmp1 *= y_lo0;
+ nop
+ lda [%i3]%asi,%f12 ! (1_0) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f16,%f16 ! (5_1) dtmp2 = DTWO - dtmp2;
+
+ add %i1,stridex,%i4 ! px += stridex
+ nop
+ lda [%i3+4]%asi,%f13 ! (1_0) ((float*)&y0)[1] = ((float*)py)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f54,%f18,%f46 ! (6_1) dtmp1 = dd * dres;
+ add %i4,stridex,%i1 ! px += stridex
+ ld [%fp+ftmp0],%o2 ! (7_1) iarr = ((int*)&dres)[0];
+ fand %f14,DA1,%f2 ! (7_1) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (1_0) x0 *= scl0;
+ nop
+ ldd [%fp+dtmp6],%f50 ! (2_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f36,%f20 ! (4_1) dtmp0 -= dtmp1;
+
+ fmuld %f12,%f62,%f60 ! (1_0) y0 *= scl0;
+ sra %o2,11,%i3 ! (7_1) iarr >>= 11;
+ nop
+ faddd %f0,%f28,%f36 ! (0_0) res0_lo += dtmp1;
+
+ and %i3,0x1fc,%i3 ! (7_1) iarr &= 0x1fc;
+ nop
+ bn,pn %icc,.exit
+ fmuld %f22,%f16,%f28 ! (5_1) dres = dd * dtmp2;
+
+ fsqrtd %f52,%f16 ! (3_1) res0 = sqrt ( res0 );
+ add %i3,TBL,%o4 ! (7_1) (char*)dll1 + iarr
+ lda [%i4]0x82,%o1 ! (3_0) hx0 = *(int*)px;
+ fsubd DTWO,%f46,%f62 ! (6_1) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f20,%f26,%f52 ! (4_1) dtmp0 *= dres;
+ add %i0,stridey,%i3 ! py += stridey
+ ld [%o4],%f26 ! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f10,D2ON36,%f46 ! (1_0) x_hi0 = x0 + D2ON36;
+
+ nop
+ add %i3,stridey,%i0 ! py += stridey
+ lda [%i3]0x82,%o4 ! (3_0) hy0 = *(int*)py;
+ faddd %f60,D2ON36,%f12 ! (1_0) y_hi0 = y0 + D2ON36;
+
+ faddd %f32,%f36,%f22 ! (0_0) dres = res0_hi + res0_lo;
+ and %o1,_0x7fffffff,%o7 ! (3_0) hx0 &= 0x7fffffff;
+ st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0];
+ fmuld %f50,%f24,%f0 ! (2_1) res0 = scl0 * res0;
+
+ fmuld %f54,%f62,%f24 ! (6_1) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (3_0) hx0 ? 0x7ff00000
+ st %f0,[%i5] ! (2_1) ((float*)pz)[0] = ((float*)&res0)[0];
+ fpsub32 %f26,%f2,%f26 ! (7_1) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %o4,_0x7fffffff,%l7 ! (3_0) hy0 &= 0x7fffffff;
+ nop
+ bge,pn %icc,.update41 ! (3_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (3_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (3_0) hy0 ? 0x7ff00000
+ st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36;
+
+ fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (3_0) j0 = diff0 >> 31;
+ bge,pn %icc,.update42 ! (3_0) if ( hy0 >= 0x7ff00000 )
+ faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0;
+
+ and %o1,%o3,%o1 ! (3_0) j0 &= diff0;
+ add %i5,stridez,%i5 ! pz += stridez
+ stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll;
+ fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0;
+ cmp %o7,_0x00100000 ! (3_0) hx0 ? 0x00100000
+ sub %l7,%o1,%o4 ! (3_0) j0 = hy0 - j0;
+ fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (3_0) j0 &= 0x7ff00000;
+ bl,pn %icc,.update43 ! (3_0) if ( hx0 < 0x00100000 )
+ faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0;
+.cont43a:
+ fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0;
+ nop
+ sub %l0,%o4,%g1 ! (3_0) j0 = 0x7ff00000 - j0;
+ fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
+.cont43b:
+ fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres;
+ sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0;
+ nop
+ nop
+ fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0
+.cont44:
+ fmuld %f62,%f0,%f0 ! (1_0) res0_lo *= x_lo0;
+ nop
+ ldd [%fp+dtmp5],%f62 ! (2_0) *(long long*)&scl0 = ll;
+ faddd %f2,%f46,%f42 ! (1_0) res0_hi += dtmp0;
+
+ fsubd DONE,%f10,%f60 ! (5_1) dtmp0 = DONE - dtmp0;
+ nop
+ lda [%i2]%asi,%f10 ! (2_0) ((float*)&x0)[0] = ((float*)px)[0];
+ fmuld %f26,%f20,%f54 ! (7_1) dd *= dtmp0;
+
+ nop
+ nop
+ lda [%i2+4]%asi,%f11 ! (2_0) ((float*)&x0)[1] = ((float*)px)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f50,%f12,%f26 ! (1_0) dtmp1 *= y_lo0;
+ nop
+ lda [%o0]%asi,%f12 ! (2_0) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f18,%f20 ! (6_1) dtmp2 = DTWO - dtmp2;
+
+ nop
+ nop
+ lda [%o0+4]%asi,%f13 ! (2_0) ((float*)&y0)[1] = ((float*)py)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f54,%f14,%f50 ! (7_1) dtmp1 = dd * dres;
+ nop
+ ld [%fp+ftmp0],%o2 ! (0_0) iarr = ((int*)&dres)[0];
+ fand %f22,DA1,%f2 ! (0_0) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (2_0) x0 *= scl0;
+ nop
+ ldd [%fp+dtmp8],%f18 ! (3_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f34,%f46 ! (5_1) dtmp0 -= dtmp1;
+
+ fmuld %f12,%f62,%f60 ! (2_0) y0 *= scl0;
+ sra %o2,11,%o4 ! (0_0) iarr >>= 11;
+ nop
+ faddd %f0,%f26,%f34 ! (1_0) res0_lo += dtmp1;
+
+ and %o4,0x1fc,%o4 ! (0_0) iarr &= 0x1fc;
+ nop
+ bn,pn %icc,.exit
+ fmuld %f24,%f20,%f26 ! (6_1) dres = dd * dtmp2;
+
+ fsqrtd %f52,%f24 ! (4_1) res0 = sqrt ( res0 );
+ add %o4,TBL,%o4 ! (0_0) (char*)dll1 + iarr
+ lda [%i1]0x82,%o1 ! (4_0) hx0 = *(int*)px;
+ fsubd DTWO,%f50,%f20 ! (7_1) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f46,%f28,%f52 ! (5_1) dtmp0 -= dtmp1;
+ mov %i1,%i2
+ ld [%o4],%f28 ! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f10,D2ON36,%f46 ! (2_0) x_hi0 = x0 + D2ON36;
+
+ nop
+ mov %i0,%o0
+ lda [%i0]0x82,%o4 ! (4_0) hy0 = *(int*)py;
+ faddd %f60,D2ON36,%f50 ! (2_0) y_hi0 = y0 + D2ON36;
+
+ fmuld %f18,%f16,%f0 ! (3_1) res0 = scl0 * res0;
+ nop
+ and %o1,_0x7fffffff,%o7 ! (4_0) hx0 &= 0x7fffffff;
+ faddd %f42,%f34,%f18 ! (1_0) dres = res0_hi + res0_lo;
+
+ fmuld %f54,%f20,%f16 ! (7_1) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (4_0) hx0 ? 0x7ff00000
+ st %f18,[%fp+ftmp0] ! (1_0) iarr = ((int*)&dres)[0];
+ fpsub32 %f28,%f2,%f28 ! (0_0) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %o4,_0x7fffffff,%l7 ! (4_0) hy0 &= 0x7fffffff;
+ st %f0,[%i5] ! (3_1) ((float*)pz)[0] = ((float*)&res0)[0];
+ bge,pn %icc,.update45 ! (4_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (4_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (4_0) hy0 ? 0x7ff00000
+ bge,pn %icc,.update46 ! (4_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36;
+
+ fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (4_0) j0 = diff0 >> 31;
+ st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0;
+
+ and %o1,%o3,%o1 ! (4_0) j0 &= diff0;
+ cmp %o7,_0x00100000 ! (4_0) hx0 ? 0x00100000
+ bl,pn %icc,.update47 ! (4_0) if ( hx0 < 0x00100000 )
+ fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0);
+.cont47a:
+ fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0;
+ sub %l7,%o1,%o4 ! (4_0) j0 = hy0 - j0;
+ stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (4_0) j0 &= 0x7ff00000;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0;
+ nop
+ sub %l0,%o4,%g1 ! (4_0) j0 = 0x7ff00000 - j0;
+ fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
+.cont47b:
+ fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres;
+ sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0;
+ nop
+ nop
+ fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0;
+.cont48:
+ fmuld %f62,%f0,%f0 ! (2_0) res0_lo *= x_lo0;
+ nop
+ ldd [%fp+dtmp7],%f62 ! (3_0) *(long long*)&scl0 = ll;
+ faddd %f2,%f46,%f30 ! (2_0) res0_hi += dtmp0;
+
+ fsubd DONE,%f10,%f60 ! (6_1) dtmp0 = DONE - dtmp0;
+ nop
+ lda [%i4]%asi,%f10 ! (3_0) ((float*)&x0)[0] = ((float*)px)[0];
+ fmuld %f28,%f20,%f54 ! (0_0) dd *= dtmp0;
+
+ nop
+ nop
+ lda [%i4+4]%asi,%f11 ! (3_0) ((float*)&x0)[1] = ((float*)px)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f50,%f12,%f28 ! (2_0) dtmp1 *= y_lo0;
+ nop
+ lda [%i3]%asi,%f12 ! (3_0) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f14,%f20 ! (7_1) dtmp2 = DTWO - dtmp2;
+
+ lda [%i3+4]%asi,%f13 ! (3_0) ((float*)&y0)[1] = ((float*)py)[1];
+ add %i1,stridex,%i4 ! px += stridex
+ nop
+ bn,pn %icc,.exit
+
+ fmuld %f54,%f22,%f50 ! (0_0) dtmp1 = dd * dres;
+ add %i4,stridex,%i1 ! px += stridex
+ ld [%fp+ftmp0],%o2 ! (1_0) iarr = ((int*)&dres)[0];
+ fand %f18,DA1,%f2 ! (1_0) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (3_0) x0 *= scl0;
+ nop
+ ldd [%fp+dtmp10],%f14 ! (4_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f40,%f46 ! (6_1) dtmp0 -= dtmp1;
+
+ fmuld %f12,%f62,%f60 ! (3_0) y0 *= scl0;
+ sra %o2,11,%i3 ! (1_0) iarr >>= 11;
+ nop
+ faddd %f0,%f28,%f40 ! (2_0) res0_lo += dtmp1;
+
+ and %i3,0x1fc,%i3 ! (1_0) iarr &= 0x1fc;
+ nop
+ bn,pn %icc,.exit
+ fmuld %f16,%f20,%f28 ! (7_1) dres = dd * dtmp2;
+
+ fsqrtd %f52,%f16 ! (5_1) res0 = sqrt ( res0 );
+ add %i3,TBL,%o4 ! (1_0) (char*)dll1 + iarr
+ lda [%i4]0x82,%o1 ! (5_0) hx0 = *(int*)px;
+ fsubd DTWO,%f50,%f20 ! (0_0) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f46,%f26,%f52 ! (6_1) dtmp0 *= dres;
+ add %i0,stridey,%i3 ! py += stridey
+ ld [%o4],%f26 ! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f10,D2ON36,%f46 ! (3_0) x_hi0 = x0 + D2ON36;
+
+ nop
+ add %i3,stridey,%i0 ! py += stridey
+ lda [%i3]0x82,%o4 ! (5_0) hy0 = *(int*)py;
+ faddd %f60,D2ON36,%f50 ! (3_0) y_hi0 = y0 + D2ON36;
+
+ fmuld %f14,%f24,%f0 ! (4_1) res0 = scl0 * res0;
+ and %o1,_0x7fffffff,%o7 ! (5_0) hx0 &= 0x7fffffff;
+ nop
+ faddd %f30,%f40,%f14 ! (2_0) dres = res0_hi + res0_lo;
+
+ fmuld %f54,%f20,%f24 ! (0_0) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (5_0) hx0 ? 0x7ff00000
+ st %f14,[%fp+ftmp0] ! (2_0) iarr = ((int*)&dres)[0];
+ fpsub32 %f26,%f2,%f26 ! (1_0) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %o4,_0x7fffffff,%l7 ! (5_0) hy0 &= 0x7fffffff;
+ st %f0,[%i5] ! (4_1) ((float*)pz)[0] = ((float*)&res0)[0];
+ bge,pn %icc,.update49 ! (5_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f46,D2ON36,%f20 ! (3_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (5_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (5_0) hy0 ? 0x7ff00000
+ bge,pn %icc,.update50 ! (5_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36;
+
+ fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (5_0) j0 = diff0 >> 31;
+ st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0;
+
+ and %o1,%o3,%o1 ! (5_0) j0 &= diff0;
+ cmp %o7,_0x00100000 ! (5_0) hx0 ? 0x00100000
+ bl,pn %icc,.update51 ! (5_0) if ( hx0 < 0x00100000 )
+ fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0);
+.cont51a:
+ fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0;
+ sub %l7,%o1,%o4 ! (5_0) j0 = hy0 - j0;
+ stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (5_0) j0 &= 0x7ff00000;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
+ sub %l0,%o4,%g1 ! (5_0) j0 = 0x7ff00000 - j0;
+ nop
+ fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
+.cont51b:
+ fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres;
+ sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0;
+ nop
+ nop
+ fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0;
+.cont52:
+ fmuld %f62,%f0,%f0 ! (3_0) res0_lo *= x_lo0;
+ nop
+ ldd [%fp+dtmp9],%f62 ! (4_0) *(long long*)&scl0 = ll;
+ faddd %f2,%f46,%f44 ! (3_0) res0_hi += dtmp0;
+
+ fsubd DONE,%f10,%f60 ! (7_1) dtmp0 = DONE - dtmp0;
+ nop
+ lda [%i2]%asi,%f10 ! (4_0) ((float*)&x0)[0] = ((float*)px)[0];
+ fmuld %f26,%f20,%f54 ! (1_0) dd *= dtmp0;
+
+ nop
+ nop
+ lda [%i2+4]%asi,%f11 ! (4_0) ((float*)&x0)[1] = ((float*)px)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f50,%f12,%f26 ! (3_0) dtmp1 *= y_lo0;
+ nop
+ lda [%o0]%asi,%f12 ! (4_0) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f22,%f20 ! (0_0) dtmp2 = DTWO - dtmp2;
+
+ nop
+ nop
+ lda [%o0+4]%asi,%f13 ! (4_0) ((float*)&y0)[1] = ((float*)py)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f54,%f18,%f50 ! (1_0) dtmp1 = dd * dres;
+ nop
+ ld [%fp+ftmp0],%o2 ! (2_0) iarr = ((int*)&dres)[0];
+ fand %f14,DA1,%f2 ! (2_0) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (4_0) x0 *= scl0;
+ nop
+ ldd [%fp+dtmp12],%f22 ! (5_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f38,%f46 ! (7_1) dtmp0 -= dtmp1;
+
+ fmuld %f12,%f62,%f60 ! (4_0) y0 *= scl0;
+ sra %o2,11,%o4 ! (2_0) iarr >>= 11;
+ nop
+ faddd %f0,%f26,%f38 ! (3_0) res0_lo += dtmp1;
+
+ and %o4,0x1fc,%o4 ! (2_0) iarr &= 0x1fc;
+ nop
+ bn,pn %icc,.exit
+ fmuld %f24,%f20,%f26 ! (0_0) dres = dd * dtmp2;
+
+ fsqrtd %f52,%f24 ! (6_1) res0 = sqrt ( res0 );
+ add %o4,TBL,%o4 ! (2_0) (char*)dll1 + iarr
+ lda [%i1]0x82,%o1 ! (6_0) hx0 = *(int*)px;
+ fsubd DTWO,%f50,%f52 ! (1_0) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f46,%f28,%f28 ! (7_1) dtmp0 *= dres;
+ mov %i1,%i2
+ ld [%o4],%f20 ! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f10,D2ON36,%f46 ! (4_0) x_hi0 = x0 + D2ON36;
+
+ nop
+ mov %i0,%o0
+ lda [%i0]0x82,%o4 ! (6_0) hy0 = *(int*)py;
+ faddd %f60,D2ON36,%f50 ! (4_0) y_hi0 = y0 + D2ON36;
+
+ fmuld %f22,%f16,%f0 ! (5_1) res0 = scl0 * res0;
+ and %o1,_0x7fffffff,%o7 ! (6_0) hx0 &= 0x7fffffff;
+ nop
+ faddd %f44,%f38,%f22 ! (3_0) dres = res0_hi + res0_lo;
+
+ fmuld %f54,%f52,%f16 ! (1_0) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (6_0) hx0 ? 0x7ff00000
+ st %f22,[%fp+ftmp0] ! (3_0) iarr = ((int*)&dres)[0];
+ fpsub32 %f20,%f2,%f52 ! (2_0) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %o4,_0x7fffffff,%l7 ! (6_0) hy0 &= 0x7fffffff;
+ st %f0,[%i5] ! (5_1) ((float*)pz)[0] = ((float*)&res0)[0];
+ bge,pn %icc,.update53 ! (6_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f46,D2ON36,%f46 ! (4_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (6_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (6_0) hy0 ? 0x7ff00000
+ bge,pn %icc,.update54 ! (6_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36;
+
+ fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (6_0) j0 = diff0 >> 31;
+ st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0;
+
+ and %o1,%o3,%o1 ! (6_0) j0 &= diff0;
+ cmp %o7,_0x00100000 ! (6_0) hx0 ? 0x00100000
+ bl,pn %icc,.update55 ! (6_0) if ( hx0 < 0x00100000 )
+ fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0);
+.cont55a:
+ fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0;
+ sub %l7,%o1,%o4 ! (6_0) j0 = hy0 - j0;
+ stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (6_0) j0 &= 0x7ff00000;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
+ sub %l0,%o4,%g1 ! (6_0) j0 = 0x7ff00000 - j0;
+ nop
+ fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
+.cont55b:
+ fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0;
+ sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0;
+ nop
+ nop
+ fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0;
+.cont56:
+ fmuld %f62,%f2,%f2 ! (4_0) res0_lo *= x_lo0;
+ nop
+ ldd [%fp+dtmp11],%f62 ! (5_0) *(long long*)&scl0 = ll;
+ faddd %f0,%f20,%f32 ! (4_0) res0_hi += dtmp0;
+
+ lda [%i4]%asi,%f0 ! (5_0) ((float*)&x0)[0] = ((float*)px)[0];
+ nop
+ nop
+ fmuld %f52,%f10,%f10 ! (2_0) dd *= dtmp0;
+
+ lda [%i4+4]%asi,%f1 ! (5_0) ((float*)&x0)[1] = ((float*)px)[1];
+ nop
+ nop
+ fsubd DONE,%f50,%f52 ! (0_0) dtmp0 = DONE - dtmp0;
+
+ fmuld %f46,%f60,%f46 ! (4_0) dtmp1 *= y_lo0;
+ nop
+ lda [%i3]%asi,%f12 ! (5_0) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f18,%f18 ! (1_0) dtmp2 = DTWO - dtmp2;
+
+ nop
+ add %i1,stridex,%i4 ! px += stridex
+ lda [%i3+4]%asi,%f13 ! (5_0) ((float*)&y0)[1] = ((float*)py)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f10,%f14,%f50 ! (2_0) dtmp1 = dd * dres;
+ add %i4,stridex,%i1 ! px += stridex
+ ld [%fp+ftmp0],%o2 ! (3_0) iarr = ((int*)&dres)[0];
+ fand %f22,DA1,%f54 ! (3_0) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f0,%f62,%f60 ! (5_0) x0 *= scl0;
+ nop
+ ldd [%fp+dtmp14],%f0 ! (6_1) *(long long*)&scl0 = ll;
+ fsubd %f52,%f36,%f20 ! (0_0) dtmp0 -= dtmp1;
+
+ fmuld %f12,%f62,%f52 ! (5_0) y0 *= scl0;
+ sra %o2,11,%i3 ! (3_0) iarr >>= 11;
+ nop
+ faddd %f2,%f46,%f36 ! (4_0) res0_lo += dtmp1;
+
+ and %i3,0x1fc,%i3 ! (3_0) iarr &= 0x1fc;
+ nop
+ bn,pn %icc,.exit
+ fmuld %f16,%f18,%f16 ! (1_0) dres = dd * dtmp2;
+
+ fsqrtd %f48,%f18 ! (7_1) res0 = sqrt ( res0 );
+ add %i3,TBL,%o4 ! (3_0) (char*)dll1 + iarr
+ lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px;
+ fsubd DTWO,%f50,%f46 ! (2_0) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f20,%f26,%f48 ! (0_0) dtmp0 *= dres;
+ add %i0,stridey,%i3 ! py += stridey
+ ld [%o4],%f20 ! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f60,D2ON36,%f50 ! (5_0) x_hi0 = x0 + D2ON36;
+
+ nop
+ add %i3,stridey,%i0 ! py += stridey
+ lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py;
+ faddd %f52,D2ON36,%f12 ! (5_0) y_hi0 = y0 + D2ON36;
+
+ fmuld %f0,%f24,%f2 ! (6_1) res0 = scl0 * res0;
+ and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff;
+ nop
+ faddd %f32,%f36,%f24 ! (4_0) dres = res0_hi + res0_lo;
+
+ fmuld %f10,%f46,%f26 ! (2_0) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000
+ st %f24,[%fp+ftmp0] ! (4_0) iarr = ((int*)&dres)[0];
+ fpsub32 %f20,%f54,%f10 ! (3_0) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff;
+ st %f2,[%i5] ! (6_1) ((float*)pz)[0] = ((float*)&res0)[0];
+ bge,pn %icc,.update57 ! (7_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f50,D2ON36,%f20 ! (5_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000
+ bge,pn %icc,.update58 ! (7_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36;
+
+ fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31;
+ st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0;
+
+ and %o1,%o3,%o1 ! (7_0) j0 &= diff0;
+ cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000
+ bl,pn %icc,.update59 ! (7_0) if ( hx0 < 0x00100000 )
+ fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0);
+.cont59a:
+ fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0;
+ sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0;
+ stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll;
+ fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
+ sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0;
+ nop
+ fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
+.cont59b:
+ fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0;
+ sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll;
+ faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0;
+ nop
+ nop
+ fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0;
+.cont60:
+ fmuld %f62,%f2,%f2 ! (5_0) res0_lo *= x_lo0;
+ nop
+ ldd [%fp+dtmp13],%f62 ! (6_0) *(long long*)&scl0 = ll;
+ faddd %f0,%f46,%f42 ! (5_0) res0_hi += dtmp0;
+
+ fmuld %f10,%f20,%f52 ! (3_0) dd *= dtmp0;
+ nop
+ lda [%i2]%asi,%f10 ! (6_0) ((float*)&x0)[0] = ((float*)px)[0];
+ bn,pn %icc,.exit
+
+ lda [%i2+4]%asi,%f11 ! (6_0) ((float*)&x0)[1] = ((float*)px)[1];
+ nop
+ nop
+ fsubd DONE,%f60,%f60 ! (1_0) dtmp0 = DONE - dtmp0;
+
+ fmuld %f50,%f54,%f46 ! (5_0) dtmp1 *= y_lo0;
+ nop
+ lda [%o0]%asi,%f12 ! (6_0) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f14,%f14 ! (2_0) dtmp2 = DTWO - dtmp2;
+
+ nop
+ nop
+ lda [%o0+4]%asi,%f13 ! (6_0) ((float*)&y0)[1] = ((float*)py)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f52,%f22,%f50 ! (3_0) dtmp1 = dd * dres;
+ nop
+ ld [%fp+ftmp0],%o2 ! (4_0) iarr = ((int*)&dres)[0];
+ fand %f24,DA1,%f54 ! (4_0) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (6_0) x0 *= scl0;
+ nop
+ ldd [%fp+dtmp0],%f0 ! (7_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f34,%f20 ! (1_0) dtmp0 -= dtmp1;
+
+ fmuld %f12,%f62,%f60 ! (6_0) y0 *= scl0;
+ sra %o2,11,%o4 ! (4_0) iarr >>= 11;
+ nop
+ faddd %f2,%f46,%f34 ! (5_0) res0_lo += dtmp1;
+
+ and %o4,0x1fc,%o4 ! (4_0) iarr &= 0x1fc;
+ subcc counter,8,counter ! counter -= 8;
+ bpos,pt %icc,.main_loop
+ fmuld %f26,%f14,%f26 ! (2_0) dres = dd * dtmp2;
+
+ add counter,8,counter
+
+.tail:
+ subcc counter,1,counter
+ bneg .begin
+ nop
+
+ fsqrtd %f48,%f14 ! (0_1) res0 = sqrt ( res0 );
+ add %o4,TBL,%o4 ! (4_1) (char*)dll1 + iarr
+ fsubd DTWO,%f50,%f46 ! (3_1) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f20,%f16,%f48 ! (1_1) dtmp0 *= dres;
+ ld [%o4],%f20 ! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+
+ fmuld %f0,%f18,%f0 ! (7_2) res0 = scl0 * res0;
+ st %f0,[%i5] ! (7_2) ((float*)pz)[0] = ((float*)&res0)[0];
+ faddd %f42,%f34,%f16 ! (5_1) dres = res0_hi + res0_lo;
+
+ subcc counter,1,counter
+ st %f1,[%i5+4] ! (7_2) ((float*)pz)[1] = ((float*)&res0)[1];
+ bneg .begin
+ add %i5,stridez,%i5 ! pz += stridez
+
+ fmuld %f52,%f46,%f18 ! (3_1) dd *= dtmp1;
+ st %f16,[%fp+ftmp0] ! (5_1) iarr = ((int*)&dres)[0];
+ fpsub32 %f20,%f54,%f54 ! (4_1) dd = vis_fpsub32(dtmp0, dexp0);
+
+ fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres;
+ faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0;
+
+
+ fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres;
+ fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f30,%f48,%f12 ! (2_1) dtmp0 = res0_hi * res0;
+
+ fmuld %f40,%f48,%f40 ! (2_1) dtmp1 = res0_lo * res0;
+
+ fmuld %f54,%f20,%f54 ! (4_1) dd *= dtmp0;
+
+ fsubd DONE,%f12,%f60 ! (2_1) dtmp0 = DONE - dtmp0;
+
+ fsubd DTWO,%f22,%f22 ! (3_1) dtmp2 = DTWO - dtmp2;
+
+ fmuld %f54,%f24,%f50 ! (4_1) dtmp1 = dd * dres;
+ ld [%fp+ftmp0],%o2 ! (5_1) iarr = ((int*)&dres)[0];
+ fand %f16,DA1,%f2 ! (5_1) dexp0 = vis_fand(dres,DA1);
+
+ ldd [%fp+dtmp2],%f0 ! (0_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f40,%f20 ! (2_1) dtmp0 -= dtmp1;
+
+ sra %o2,11,%i3 ! (5_1) iarr >>= 11;
+
+ and %i3,0x1fc,%i3 ! (5_1) iarr &= 0x1fc;
+ fmuld %f18,%f22,%f28 ! (3_1) dres = dd * dtmp2;
+
+ fsqrtd %f52,%f22 ! (1_1) res0 = sqrt ( res0 );
+ add %i3,TBL,%g1 ! (5_1) (char*)dll1 + iarr
+ fsubd DTWO,%f50,%f62 ! (4_1) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f20,%f26,%f52 ! (2_1) dtmp0 *= dres;
+ ld [%g1],%f26 ! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+
+ fmuld %f0,%f14,%f0 ! (0_1) res0 = scl0 * res0;
+
+ fmuld %f54,%f62,%f14 ! (4_1) dd *= dtmp1;
+ fpsub32 %f26,%f2,%f26 ! (5_1) dd = vis_fpsub32(dtmp0, dexp0);
+
+ st %f0,[%i5] ! (0_1) ((float*)pz)[0] = ((float*)&res0)[0];
+
+ fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres;
+ st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0;
+
+ subcc counter,1,counter
+ bneg .begin
+ add %i5,stridez,%i5 ! pz += stridez
+
+ fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres;
+
+ fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0;
+
+ fsubd DONE,%f10,%f60 ! (3_1) dtmp0 = DONE - dtmp0;
+ fmuld %f26,%f20,%f54 ! (5_1) dd *= dtmp0;
+
+ fsubd DTWO,%f24,%f24 ! (4_1) dtmp2 = DTWO - dtmp2;
+
+ fmuld %f54,%f16,%f46 ! (5_1) dtmp1 = dd * dres;
+
+ ldd [%fp+dtmp4],%f50 ! (1_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f38,%f20 ! (3_1) dtmp0 -= dtmp1;
+
+ fmuld %f14,%f24,%f26 ! (4_1) dres = dd * dtmp2;
+
+ fsqrtd %f52,%f24 ! (2_1) res0 = sqrt ( res0 );
+ fsubd DTWO,%f46,%f62 ! (5_1) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f20,%f28,%f52 ! (3_1) dtmp0 *= dres;
+
+ fmuld %f50,%f22,%f0 ! (1_1) res0 = scl0 * res0;
+
+ fmuld %f54,%f62,%f22 ! (5_1) dd *= dtmp1;
+
+ st %f0,[%i5] ! (1_1) ((float*)pz)[0] = ((float*)&res0)[0];
+
+ subcc counter,1,counter
+ st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ bneg .begin
+ add %i5,stridez,%i5 ! pz += stridez
+
+ faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0;
+
+ fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0;
+
+ fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres;
+
+ fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0;
+
+ fsubd DONE,%f10,%f60 ! (4_1) dtmp0 = DONE - dtmp0;
+
+ fsubd DTWO,%f16,%f16 ! (5_1) dtmp2 = DTWO - dtmp2;
+
+ ldd [%fp+dtmp6],%f50 ! (2_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f36,%f20 ! (4_1) dtmp0 -= dtmp1;
+
+ fmuld %f22,%f16,%f28 ! (5_1) dres = dd * dtmp2;
+
+ fsqrtd %f52,%f16 ! (3_1) res0 = sqrt ( res0 );
+
+ fmuld %f20,%f26,%f52 ! (4_1) dtmp0 *= dres;
+
+ fmuld %f50,%f24,%f0 ! (2_1) res0 = scl0 * res0;
+
+ st %f0,[%i5] ! (2_1) ((float*)pz)[0] = ((float*)&res0)[0];
+
+ st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0;
+
+ subcc counter,1,counter
+ bneg .begin
+ add %i5,stridez,%i5 ! pz += stridez
+
+ fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0;
+
+ fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0;
+
+ fsubd DONE,%f10,%f60 ! (5_1) dtmp0 = DONE - dtmp0;
+
+ ldd [%fp+dtmp8],%f18 ! (3_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f34,%f46 ! (5_1) dtmp0 -= dtmp1;
+
+ fsqrtd %f52,%f24 ! (4_1) res0 = sqrt ( res0 );
+
+ fmuld %f46,%f28,%f52 ! (5_1) dtmp0 -= dtmp1;
+
+ fmuld %f18,%f16,%f0 ! (3_1) res0 = scl0 * res0;
+ st %f0,[%i5] ! (3_1) ((float*)pz)[0] = ((float*)&res0)[0];
+ st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0;
+
+ subcc counter,1,counter
+ bneg .begin
+ add %i5,stridez,%i5 ! pz += stridez
+
+ ldd [%fp+dtmp10],%f14 ! (4_1) *(long long*)&scl0 = ll;
+
+ fsqrtd %f52,%f16 ! (5_1) res0 = sqrt ( res0 );
+
+ fmuld %f14,%f24,%f0 ! (4_1) res0 = scl0 * res0
+ st %f0,[%i5] ! (4_1) ((float*)pz)[0] = ((float*)&res0)[0];
+ st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
+
+ subcc counter,1,counter
+ bneg .begin
+ add %i5,stridez,%i5 ! pz += stridez
+
+ ldd [%fp+dtmp12],%f22 ! (5_1) *(long long*)&scl0 = ll;
+
+ fmuld %f22,%f16,%f0 ! (5_1) res0 = scl0 * res0;
+ st %f0,[%i5] ! (5_1) ((float*)pz)[0] = ((float*)&res0)[0];
+ st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
+
+ ba .begin
+ add %i5,stridez,%i5
+
+ .align 16
+.spec0:
+ cmp %o7,_0x7ff00000 ! hx0 ? 0x7ff00000
+ bne 1f ! if ( hx0 != 0x7ff00000 )
+ ld [%i4+4],%i2 ! lx = ((int*)px)[1];
+
+ cmp %i2,0 ! lx ? 0
+ be 3f ! if ( lx == 0 )
+ nop
+1:
+ cmp %l7,_0x7ff00000 ! hy0 ? 0x7ff00000
+ bne 2f ! if ( hy0 != 0x7ff00000 )
+ ld [%i3+4],%o2 ! ly = ((int*)py)[1];
+
+ cmp %o2,0 ! ly ? 0
+ be 3f ! if ( ly == 0 )
+2:
+ ld [%i4],%f0 ! ((float*)&x0)[0] = ((float*)px)[0];
+ ld [%i4+4],%f1 ! ((float*)&x0)[1] = ((float*)px)[1];
+
+ ld [%i3],%f2 ! ((float*)&y0)[0] = ((float*)py)[0];
+ add %i4,stridex,%i4 ! px += stridex
+ ld [%i3+4],%f3 ! ((float*)&y0)[1] = ((float*)py)[1];
+
+ fabsd %f0,%f0
+
+ fabsd %f2,%f2
+
+ fmuld %f0,%f2,%f0 ! res0 = fabs(x0) * fabs(y0);
+ add %i3,stridey,%i3 ! py += stridey;
+ st %f0,[%i5] ! ((float*)pz)[0] = ((float*)&res0)[0];
+
+ st %f1,[%i5+4] ! ((float*)pz)[1] = ((float*)&res0)[1];
+ add %i5,stridez,%i5 ! pz += stridez
+ ba .begin1
+ sub counter,1,counter
+3:
+ add %i4,stridex,%i4 ! px += stridex
+ add %i3,stridey,%i3 ! py += stridey
+ st %g0,[%i5] ! ((int*)pz)[0] = 0;
+
+ add %i5,stridez,%i5 ! pz += stridez;
+ st %g0,[%i5+4] ! ((int*)pz)[1] = 0;
+ ba .begin1
+ sub counter,1,counter
+
+ .align 16
+.spec1:
+ and %o1,%o3,%o1 ! (7_0) j0 &= diff0;
+
+ cmp %l7,_0x00100000 ! (7_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont_spec0 ! (7_0) if ( hy0 < 0x00100000 )
+
+ ld [%i4+4],%i2 ! lx = ((int*)px)[1];
+ or %o7,%l7,%g5 ! ii = hx0 | hy0;
+ fzero %f0
+
+ ld [%i3+4],%o2 ! ly = ((int*)py)[1];
+ or %i2,%g5,%g5 ! ii |= lx;
+
+ orcc %o2,%g5,%g5 ! ii |= ly;
+ bnz,a,pn %icc,1f ! if ( ii != 0 )
+ sethi %hi(0x00080000),%i2
+
+ fdivd DONE,%f0,%f0 ! res0 = 1.0 / 0.0;
+
+ st %f0,[%i5] ! ((float*)pz)[0] = ((float*)&res0)[0];
+
+ add %i4,stridex,%i4 ! px += stridex;
+ add %i3,stridey,%i3 ! py += stridey;
+ st %f1,[%i5+4] ! ((float*)pz)[1] = ((float*)&res0)[1];
+
+ add %i5,stridez,%i5 ! pz += stridez;
+ ba .begin1
+ sub counter,1,counter
+1:
+ ld [%i4],%f0 ! ((float*)&x0)[0] = ((float*)px)[0];
+
+ ld [%i4+4],%f1 ! ((float*)&x0)[1] = ((float*)px)[1];
+
+ ld [%i3],%f2 ! ((float*)&y0)[0] = ((float*)py)[0];
+
+ fabsd %f0,%f0 ! x0 = fabs(x0);
+ ld [%i3+4],%f3 ! ((float*)&y0)[1] = ((float*)py)[1];
+
+ ldd [TBL+TBL_SHIFT+64],%f12 ! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL;
+ add %fp,dtmp2,%i4
+ add %fp,dtmp3,%i3
+
+ fabsd %f2,%f2 ! y0 = fabs(y0);
+ ldd [TBL+TBL_SHIFT+56],%f10 ! D2ON51
+
+ ldx [TBL+TBL_SHIFT+48],%g5 ! D2ONM52
+ cmp %o7,%i2 ! hx0 ? 0x00080000
+ bl,a 1f ! if ( hx0 < 0x00080000 )
+ fxtod %f0,%f0 ! x0 = *(long long*)&x0;
+
+ fand %f0,%f12,%f0 ! x0 = vis_fand(x0, dtmp0);
+ fxtod %f0,%f0 ! x0 = *(long long*)&x0;
+ faddd %f0,%f10,%f0 ! x0 += D2ON51;
+1:
+ std %f0,[%i4]
+
+ ldx [TBL+TBL_SHIFT+40],%g1 ! D2ON1022
+ cmp %l7,%i2 ! hy0 ? 0x00080000
+ bl,a 1f ! if ( hy0 < 0x00080000 )
+ fxtod %f2,%f2 ! y0 = *(long long*)&y0;
+
+ fand %f2,%f12,%f2 ! y0 = vis_fand(y0, dtmp0);
+ fxtod %f2,%f2 ! y0 = *(long long*)&y0;
+ faddd %f2,%f10,%f2 ! y0 += D2ON51;
+1:
+ std %f2,[%i3]
+
+ stx %g5,[%fp+dtmp15] ! D2ONM52
+
+ ba .cont_spec1
+ stx %g1,[%fp+dtmp0] ! D2ON1022
+
+ .align 16
+.update0:
+ cmp counter,1
+ ble 1f
+ nop
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 1,counter
+1:
+ sethi %hi(0x3ff00000),%o4
+ add TBL,TBL_SHIFT+24,%i2
+ ba .cont1
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update1:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont0 ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,1
+ ble,a 1f
+ nop
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ mov 1,counter
+ stx %o0,[%fp+tmp_py]
+1:
+ sethi %hi(0x3ff00000),%o4
+ add TBL,TBL_SHIFT+24,%i2
+ ba .cont1
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update2:
+ cmp counter,2
+ ble 1f
+ nop
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 2,counter
+1:
+ fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36;
+
+ fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0;
+
+ sethi %hi(0x3ff00000),%o4
+ add TBL,TBL_SHIFT+24,%i4
+ ba .cont4
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update3:
+ cmp counter,2
+ ble 1f
+ nop
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 2,counter
+1:
+ fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0;
+
+ sethi %hi(0x3ff00000),%o4
+ add TBL,TBL_SHIFT+24,%i4
+ ba .cont4
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update4:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,a,pn %icc,.cont4 ! (0_0) if ( hy0 < 0x00100000 )
+ sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0;
+
+ cmp counter,2
+ ble,a 1f
+ nop
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ mov 2,counter
+ stx %i3,[%fp+tmp_py]
+1:
+ sethi %hi(0x3ff00000),%o4
+ add TBL,TBL_SHIFT+24,%i4
+ ba .cont4
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update5:
+ cmp counter,3
+ ble 1f
+ nop
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 3,counter
+1:
+ st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0];
+ fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36;
+
+ fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36;
+
+ fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0;
+
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+
+ sllx %g1,32,%g1
+ ba .cont8
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update6:
+ cmp counter,3
+ ble 1f
+ nop
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 3,counter
+1:
+ fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0;
+
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+
+ sllx %g1,32,%g1
+ ba .cont8
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update7:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont7 ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,3
+ ble,a 1f
+ nop
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ mov 3,counter
+ stx %o0,[%fp+tmp_py]
+1:
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+
+ sllx %g1,32,%g1
+ ba .cont8
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update9:
+ cmp counter,4
+ ble 1f
+ nop
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 4,counter
+1:
+ st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0];
+ fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36;
+
+ fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36;
+
+ fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres;
+
+
+ fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0;
+
+ fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
+
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ ba .cont12
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update10:
+ cmp counter,4
+ ble 1f
+ nop
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 4,counter
+1:
+ fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres;
+
+
+ fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0;
+
+ fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
+
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ ba .cont12
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update11:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont11 ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,4
+ ble,a 1f
+ nop
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ mov 4,counter
+ stx %i3,[%fp+tmp_py]
+1:
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+
+ fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
+ ba .cont12
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update13:
+ cmp counter,5
+ ble 1f
+ nop
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 5,counter
+1:
+ fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36;
+
+ fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36;
+
+ fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres;
+
+ fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0;
+
+ fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
+
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ ba .cont16
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update14:
+ cmp counter,5
+ ble 1f
+ nop
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 5,counter
+1:
+ fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres;
+
+ fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0;
+
+ fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
+
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ ba .cont16
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update15:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont15 ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,5
+ ble,a 1f
+ nop
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ mov 5,counter
+ stx %o0,[%fp+tmp_py]
+1:
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+
+ fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
+ ba .cont16
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update17:
+ cmp counter,6
+ ble 1f
+ nop
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 6,counter
+1:
+ fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36;
+
+ fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres;
+
+ fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0;
+ fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0;
+
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+
+ sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll;
+ ba .cont20
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update18:
+ cmp counter,6
+ ble 1f
+ nop
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 6,counter
+1:
+ fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres;
+
+ fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0;
+ fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0;
+
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+
+ sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll;
+ ba .cont20
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update19:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont19a ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,6
+ ble,a 1f
+ nop
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ mov 6,counter
+ stx %i3,[%fp+tmp_py]
+1:
+ fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
+
+ ba .cont19b
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update21:
+ cmp counter,7
+ ble 1f
+ nop
+
+ sub counter,7,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 7,counter
+1:
+ fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36;
+
+ fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres;
+ faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0;
+
+ fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0);
+
+ fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
+ fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0;
+ faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll;
+ ba .cont24
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update22:
+ cmp counter,7
+ ble 1f
+ nop
+
+ sub counter,7,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 7,counter
+1:
+ fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres;
+ faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0;
+
+ fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0);
+
+ fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
+ fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0;
+ faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll;
+ ba .cont24
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update23:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont23a ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,7
+ ble,a 1f
+ nop
+
+ sub counter,7,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ mov 7,counter
+ stx %o0,[%fp+tmp_py]
+1:
+ fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
+
+ ba .cont23b
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update25:
+ cmp counter,8
+ ble 1f
+ nop
+
+ sub counter,8,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 8,counter
+1:
+ fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36;
+
+ fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres;
+ faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0;
+
+ fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
+ fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0;
+ faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll;
+ ba .cont28
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update26:
+ cmp counter,8
+ ble 1f
+ nop
+
+ sub counter,8,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 8,counter
+1:
+ fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres;
+ faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0;
+
+ fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
+ fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0;
+ faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll;
+ ba .cont28
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update27:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont27a ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,8
+ ble,a 1f
+ nop
+
+ sub counter,8,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ mov 8,counter
+ stx %i3,[%fp+tmp_py]
+1:
+ fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
+
+ ba .cont27b
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update29:
+ cmp counter,1
+ ble 1f
+ nop
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 1,counter
+1:
+ fsubd %f2,D2ON36,%f2 ! (6_1) y_hi0 -= D2ON36;
+
+ fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres;
+ stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll;
+ faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0;
+
+ fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0;
+
+ fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0;
+
+ fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres;
+ sethi %hi(0x3ff00000),%o4
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0;
+
+ ba .cont32
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update30:
+ cmp counter,1
+ ble 1f
+ nop
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 1,counter
+1:
+ fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres;
+ stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll;
+ faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0;
+
+ fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0;
+
+ fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0;
+
+ fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres;
+ sethi %hi(0x3ff00000),%o4
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0;
+
+ ba .cont32
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update31:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont31 ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,1
+ ble,a 1f
+ nop
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ mov 1,counter
+ stx %o0,[%fp+tmp_py]
+1:
+ fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0;
+
+ fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0;
+
+ fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres;
+ sethi %hi(0x3ff00000),%o4
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0;
+
+ ba .cont32
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update33:
+ cmp counter,2
+ ble 1f
+ nop
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 2,counter
+1:
+ st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36;
+
+ fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres;
+ faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0;
+
+ add %i5,stridez,%i5 ! pz += stridez
+ stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll;
+ fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0;
+
+ fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0;
+
+ fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%o4
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0;
+
+ sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32;
+ stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll;
+ ba .cont36
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update34:
+ cmp counter,2
+ ble 1f
+ nop
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 2,counter
+1:
+ add %i5,stridez,%i5 ! pz += stridez
+ stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll;
+ fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0;
+
+ fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0;
+
+ fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%o4
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0;
+
+ sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32;
+ stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll;
+ ba .cont36
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update35:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont35a ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,2
+ ble,a 1f
+ nop
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ mov 2,counter
+ stx %i3,[%fp+tmp_py]
+1:
+ fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0;
+ sethi %hi(0x3ff00000),%o4
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0;
+
+ ba .cont35b
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update37:
+ cmp counter,3
+ ble 1f
+ nop
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 3,counter
+1:
+ st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36;
+
+ fmuld %f28,%f18,%f50 ! (6_1) dtmp0 = dd * dres;
+ faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0;
+
+ add %i5,stridez,%i5 ! pz += stridez
+ stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll;
+ fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll;
+ ba .cont40
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update38:
+ cmp counter,3
+ ble 1f
+ nop
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 3,counter
+1:
+ add %i5,stridez,%i5 ! pz += stridez
+ stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll;
+ fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll;
+ ba .cont40
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update39:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont39a ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,3
+ ble,a 1f
+ nop
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ mov 3,counter
+ stx %o0,[%fp+tmp_py]
+1:
+ fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0;
+
+ ba .cont39b
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update41:
+ cmp counter,4
+ ble 1f
+ nop
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 4,counter
+1:
+ st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36;
+
+ fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres;
+ faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0;
+
+ add %i5,stridez,%i5 ! pz += stridez
+ stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll;
+ fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0
+
+ sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll;
+ ba .cont44
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update42:
+ cmp counter,4
+ ble 1f
+ nop
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 4,counter
+1:
+ add %i5,stridez,%i5 ! pz += stridez
+ stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll;
+ fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0
+
+ sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll;
+ ba .cont44
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update43:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont43a ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,4
+ ble,a 1f
+ nop
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ mov 4,counter
+ stx %i3,[%fp+tmp_py]
+1:
+ fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
+
+ ba .cont43b
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update45:
+ cmp counter,5
+ ble 1f
+ nop
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 5,counter
+1:
+ fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36;
+
+ fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres;
+ st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0;
+
+ fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll;
+ ba .cont48
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update46:
+ cmp counter,5
+ ble 1f
+ nop
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 5,counter
+1:
+ fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres;
+ st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0;
+
+ fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll;
+ ba .cont48
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update47:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont47a ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,5
+ ble,a 1f
+ nop
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ mov 5,counter
+ stx %o0,[%fp+tmp_py]
+1:
+ fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
+
+ ba .cont47b
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update49:
+ cmp counter,6
+ ble 1f
+ nop
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 6,counter
+1:
+ fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36;
+
+ fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres;
+ st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0;
+
+ fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll;
+ ba .cont52
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update50:
+ cmp counter,6
+ ble 1f
+ nop
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 6,counter
+1:
+ fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres;
+ st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0;
+
+ fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll;
+ ba .cont52
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update51:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont51a ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,6
+ ble,a 1f
+ nop
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ mov 6,counter
+ stx %i3,[%fp+tmp_py]
+1:
+ fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
+
+ ba .cont51b
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update53:
+ cmp counter,7
+ ble 1f
+ nop
+
+ sub counter,7,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 7,counter
+1:
+ fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36;
+
+ fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres;
+ st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0;
+
+ fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0);
+
+ fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
+ fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0;
+ faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll;
+ ba .cont56
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update54:
+ cmp counter,7
+ ble 1f
+ nop
+
+ sub counter,7,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 7,counter
+1:
+ fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres;
+ st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0;
+
+ fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0);
+
+ fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
+ fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0;
+ faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll;
+ ba .cont56
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update55:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont55a ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,7
+ ble,a 1f
+ nop
+
+ sub counter,7,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ mov 7,counter
+ stx %o0,[%fp+tmp_py]
+1:
+ fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
+
+ ba .cont55b
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update57:
+ cmp counter,8
+ ble 1f
+ nop
+
+ sub counter,8,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 8,counter
+1:
+ fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36;
+
+ fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres;
+ st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0;
+
+ fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll;
+ fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
+ fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0;
+ faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0;
+ fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0;
+
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+
+ sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll;
+ ba .cont60
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update58:
+ cmp counter,8
+ ble 1f
+ nop
+
+ sub counter,8,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 8,counter
+1:
+ fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres;
+ st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0;
+
+ fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll;
+ fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
+ fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0;
+ faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0;
+ fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0;
+
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+
+ sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll;
+ ba .cont60
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update59:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont59a ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,8
+ ble,a 1f
+ nop
+
+ sub counter,8,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ mov 8,counter
+ stx %i3,[%fp+tmp_py]
+1:
+ fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll;
+ fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
+
+ ba .cont59b
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.exit:
+ ret
+ restore
+ SET_SIZE(__vrhypot)
+
diff --git a/usr/src/libm/src/mvec/vis/__vrhypotf.S b/usr/src/libm/src/mvec/vis/__vrhypotf.S
new file mode 100644
index 0000000..8db59bc
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vrhypotf.S
@@ -0,0 +1,1518 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vrhypotf.S 1.5 06/01/23 SMI"
+
+ .file "__vrhypotf.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 64
+.CONST_TBL:
+! i = [0,63]
+! TBL[2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46)));
+! TBL[2*i+1] = (double)(0.5/sqrtl(2) / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46))));
+! TBL[128+2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46)));
+! TBL[128+2*i+1] = (double)(0.25 / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46))));
+
+ .word 0x3ff00000, 0x00000000, 0x3fd6a09e, 0x667f3bcd,
+ .word 0x3fef81f8, 0x1f81f820, 0x3fd673e3, 0x2ef63a03,
+ .word 0x3fef07c1, 0xf07c1f08, 0x3fd6482d, 0x37a5a3d2,
+ .word 0x3fee9131, 0xabf0b767, 0x3fd61d72, 0xb7978671,
+ .word 0x3fee1e1e, 0x1e1e1e1e, 0x3fd5f3aa, 0x673fa911,
+ .word 0x3fedae60, 0x76b981db, 0x3fd5cacb, 0x7802f342,
+ .word 0x3fed41d4, 0x1d41d41d, 0x3fd5a2cd, 0x8c69d61a,
+ .word 0x3fecd856, 0x89039b0b, 0x3fd57ba8, 0xb0ee01b9,
+ .word 0x3fec71c7, 0x1c71c71c, 0x3fd55555, 0x55555555,
+ .word 0x3fec0e07, 0x0381c0e0, 0x3fd52fcc, 0x468d6b54,
+ .word 0x3febacf9, 0x14c1bad0, 0x3fd50b06, 0xa8fc6b70,
+ .word 0x3feb4e81, 0xb4e81b4f, 0x3fd4e6fd, 0xf33cf032,
+ .word 0x3feaf286, 0xbca1af28, 0x3fd4c3ab, 0xe93bcf74,
+ .word 0x3fea98ef, 0x606a63be, 0x3fd4a10a, 0x97af7b92,
+ .word 0x3fea41a4, 0x1a41a41a, 0x3fd47f14, 0x4fe17f9f,
+ .word 0x3fe9ec8e, 0x951033d9, 0x3fd45dc3, 0xa3c34fa3,
+ .word 0x3fe99999, 0x9999999a, 0x3fd43d13, 0x6248490f,
+ .word 0x3fe948b0, 0xfcd6e9e0, 0x3fd41cfe, 0x93ff5199,
+ .word 0x3fe8f9c1, 0x8f9c18fa, 0x3fd3fd80, 0x77e70577,
+ .word 0x3fe8acb9, 0x0f6bf3aa, 0x3fd3de94, 0x8077db58,
+ .word 0x3fe86186, 0x18618618, 0x3fd3c036, 0x50e00e03,
+ .word 0x3fe81818, 0x18181818, 0x3fd3a261, 0xba6d7a37,
+ .word 0x3fe7d05f, 0x417d05f4, 0x3fd38512, 0xba21f51e,
+ .word 0x3fe78a4c, 0x8178a4c8, 0x3fd36845, 0x766eec92,
+ .word 0x3fe745d1, 0x745d1746, 0x3fd34bf6, 0x3d156826,
+ .word 0x3fe702e0, 0x5c0b8170, 0x3fd33021, 0x8127c0e0,
+ .word 0x3fe6c16c, 0x16c16c17, 0x3fd314c3, 0xd92a9e91,
+ .word 0x3fe68168, 0x16816817, 0x3fd2f9d9, 0xfd52fd50,
+ .word 0x3fe642c8, 0x590b2164, 0x3fd2df60, 0xc5df2c9e,
+ .word 0x3fe60581, 0x60581606, 0x3fd2c555, 0x2988e428,
+ .word 0x3fe5c988, 0x2b931057, 0x3fd2abb4, 0x3c0eb0f4,
+ .word 0x3fe58ed2, 0x308158ed, 0x3fd2927b, 0x2cd320f5,
+ .word 0x3fe55555, 0x55555555, 0x3fd279a7, 0x4590331c,
+ .word 0x3fe51d07, 0xeae2f815, 0x3fd26135, 0xe91daf55,
+ .word 0x3fe4e5e0, 0xa72f0539, 0x3fd24924, 0x92492492,
+ .word 0x3fe4afd6, 0xa052bf5b, 0x3fd23170, 0xd2be638a,
+ .word 0x3fe47ae1, 0x47ae147b, 0x3fd21a18, 0x51ff630a,
+ .word 0x3fe446f8, 0x6562d9fb, 0x3fd20318, 0xcc6a8f5d,
+ .word 0x3fe41414, 0x14141414, 0x3fd1ec70, 0x124e98f9,
+ .word 0x3fe3e22c, 0xbce4a902, 0x3fd1d61c, 0x070ae7d3,
+ .word 0x3fe3b13b, 0x13b13b14, 0x3fd1c01a, 0xa03be896,
+ .word 0x3fe38138, 0x13813814, 0x3fd1aa69, 0xe4f2777f,
+ .word 0x3fe3521c, 0xfb2b78c1, 0x3fd19507, 0xecf5b9e9,
+ .word 0x3fe323e3, 0x4a2b10bf, 0x3fd17ff2, 0xe00ec3ee,
+ .word 0x3fe2f684, 0xbda12f68, 0x3fd16b28, 0xf55d72d4,
+ .word 0x3fe2c9fb, 0x4d812ca0, 0x3fd156a8, 0x72b5ef62,
+ .word 0x3fe29e41, 0x29e4129e, 0x3fd1426f, 0xac0654db,
+ .word 0x3fe27350, 0xb8812735, 0x3fd12e7d, 0x02c40253,
+ .word 0x3fe24924, 0x92492492, 0x3fd11ace, 0xe560242a,
+ .word 0x3fe21fb7, 0x8121fb78, 0x3fd10763, 0xcec30b26,
+ .word 0x3fe1f704, 0x7dc11f70, 0x3fd0f43a, 0x45cdedad,
+ .word 0x3fe1cf06, 0xada2811d, 0x3fd0e150, 0xdce2b60c,
+ .word 0x3fe1a7b9, 0x611a7b96, 0x3fd0cea6, 0x317186dc,
+ .word 0x3fe18118, 0x11811812, 0x3fd0bc38, 0xeb8ba412,
+ .word 0x3fe15b1e, 0x5f75270d, 0x3fd0aa07, 0xbd7b7488,
+ .word 0x3fe135c8, 0x1135c811, 0x3fd09811, 0x63615499,
+ .word 0x3fe11111, 0x11111111, 0x3fd08654, 0xa2d4f6db,
+ .word 0x3fe0ecf5, 0x6be69c90, 0x3fd074d0, 0x4a8b1438,
+ .word 0x3fe0c971, 0x4fbcda3b, 0x3fd06383, 0x31ff307a,
+ .word 0x3fe0a681, 0x0a6810a7, 0x3fd0526c, 0x39213bfa,
+ .word 0x3fe08421, 0x08421084, 0x3fd0418a, 0x4806de7d,
+ .word 0x3fe0624d, 0xd2f1a9fc, 0x3fd030dc, 0x4ea03a72,
+ .word 0x3fe04104, 0x10410410, 0x3fd02061, 0x446ffa9a,
+ .word 0x3fe02040, 0x81020408, 0x3fd01018, 0x28467ee9,
+ .word 0x3ff00000, 0x00000000, 0x3fd00000, 0x00000000,
+ .word 0x3fef81f8, 0x1f81f820, 0x3fcfc0bd, 0x88a0f1d9,
+ .word 0x3fef07c1, 0xf07c1f08, 0x3fcf82ec, 0x882c0f9b,
+ .word 0x3fee9131, 0xabf0b767, 0x3fcf467f, 0x2814b0cc,
+ .word 0x3fee1e1e, 0x1e1e1e1e, 0x3fcf0b68, 0x48d2af1c,
+ .word 0x3fedae60, 0x76b981db, 0x3fced19b, 0x75e78957,
+ .word 0x3fed41d4, 0x1d41d41d, 0x3fce990c, 0xdad55ed2,
+ .word 0x3fecd856, 0x89039b0b, 0x3fce61b1, 0x38f18adc,
+ .word 0x3fec71c7, 0x1c71c71c, 0x3fce2b7d, 0xddfefa66,
+ .word 0x3fec0e07, 0x0381c0e0, 0x3fcdf668, 0x9b7e6350,
+ .word 0x3febacf9, 0x14c1bad0, 0x3fcdc267, 0xbea45549,
+ .word 0x3feb4e81, 0xb4e81b4f, 0x3fcd8f72, 0x08e6b82d,
+ .word 0x3feaf286, 0xbca1af28, 0x3fcd5d7e, 0xa914b937,
+ .word 0x3fea98ef, 0x606a63be, 0x3fcd2c85, 0x34ed6d86,
+ .word 0x3fea41a4, 0x1a41a41a, 0x3fccfc7d, 0xa32a9213,
+ .word 0x3fe9ec8e, 0x951033d9, 0x3fcccd60, 0x45f5d358,
+ .word 0x3fe99999, 0x9999999a, 0x3fcc9f25, 0xc5bfedd9,
+ .word 0x3fe948b0, 0xfcd6e9e0, 0x3fcc71c7, 0x1c71c71c,
+ .word 0x3fe8f9c1, 0x8f9c18fa, 0x3fcc453d, 0x90f057a2,
+ .word 0x3fe8acb9, 0x0f6bf3aa, 0x3fcc1982, 0xb2ece47b,
+ .word 0x3fe86186, 0x18618618, 0x3fcbee90, 0x56fb9c39,
+ .word 0x3fe81818, 0x18181818, 0x3fcbc460, 0x92eb3118,
+ .word 0x3fe7d05f, 0x417d05f4, 0x3fcb9aed, 0xba588347,
+ .word 0x3fe78a4c, 0x8178a4c8, 0x3fcb7232, 0x5b79db11,
+ .word 0x3fe745d1, 0x745d1746, 0x3fcb4a29, 0x3c1d9550,
+ .word 0x3fe702e0, 0x5c0b8170, 0x3fcb22cd, 0x56d87d7e,
+ .word 0x3fe6c16c, 0x16c16c17, 0x3fcafc19, 0xd8606169,
+ .word 0x3fe68168, 0x16816817, 0x3fcad60a, 0x1d0fb394,
+ .word 0x3fe642c8, 0x590b2164, 0x3fcab099, 0xae8f539a,
+ .word 0x3fe60581, 0x60581606, 0x3fca8bc4, 0x41a3d02c,
+ .word 0x3fe5c988, 0x2b931057, 0x3fca6785, 0xb41bacf7,
+ .word 0x3fe58ed2, 0x308158ed, 0x3fca43da, 0x0adc6899,
+ .word 0x3fe55555, 0x55555555, 0x3fca20bd, 0x700c2c3e,
+ .word 0x3fe51d07, 0xeae2f815, 0x3fc9fe2c, 0x315637ee,
+ .word 0x3fe4e5e0, 0xa72f0539, 0x3fc9dc22, 0xbe484458,
+ .word 0x3fe4afd6, 0xa052bf5b, 0x3fc9ba9d, 0xa6c73588,
+ .word 0x3fe47ae1, 0x47ae147b, 0x3fc99999, 0x9999999a,
+ .word 0x3fe446f8, 0x6562d9fb, 0x3fc97913, 0x63068b54,
+ .word 0x3fe41414, 0x14141414, 0x3fc95907, 0xeb87ab44,
+ .word 0x3fe3e22c, 0xbce4a902, 0x3fc93974, 0x368cfa31,
+ .word 0x3fe3b13b, 0x13b13b14, 0x3fc91a55, 0x6151761c,
+ .word 0x3fe38138, 0x13813814, 0x3fc8fba8, 0xa1bf6f96,
+ .word 0x3fe3521c, 0xfb2b78c1, 0x3fc8dd6b, 0x4563a009,
+ .word 0x3fe323e3, 0x4a2b10bf, 0x3fc8bf9a, 0xb06e1af3,
+ .word 0x3fe2f684, 0xbda12f68, 0x3fc8a234, 0x5cc04426,
+ .word 0x3fe2c9fb, 0x4d812ca0, 0x3fc88535, 0xd90703c6,
+ .word 0x3fe29e41, 0x29e4129e, 0x3fc8689c, 0xc7e07e7d,
+ .word 0x3fe27350, 0xb8812735, 0x3fc84c66, 0xdf0ca4c2,
+ .word 0x3fe24924, 0x92492492, 0x3fc83091, 0xe6a7f7e7,
+ .word 0x3fe21fb7, 0x8121fb78, 0x3fc8151b, 0xb86fee1d,
+ .word 0x3fe1f704, 0x7dc11f70, 0x3fc7fa02, 0x3f1068d1,
+ .word 0x3fe1cf06, 0xada2811d, 0x3fc7df43, 0x7579b9b5,
+ .word 0x3fe1a7b9, 0x611a7b96, 0x3fc7c4dd, 0x663ebb88,
+ .word 0x3fe18118, 0x11811812, 0x3fc7aace, 0x2afa8b72,
+ .word 0x3fe15b1e, 0x5f75270d, 0x3fc79113, 0xebbd7729,
+ .word 0x3fe135c8, 0x1135c811, 0x3fc777ac, 0xde80baea,
+ .word 0x3fe11111, 0x11111111, 0x3fc75e97, 0x46a0b098,
+ .word 0x3fe0ecf5, 0x6be69c90, 0x3fc745d1, 0x745d1746,
+ .word 0x3fe0c971, 0x4fbcda3b, 0x3fc72d59, 0xc45f1fc5,
+ .word 0x3fe0a681, 0x0a6810a7, 0x3fc7152e, 0x9f44f01f,
+ .word 0x3fe08421, 0x08421084, 0x3fc6fd4e, 0x79325467,
+ .word 0x3fe0624d, 0xd2f1a9fc, 0x3fc6e5b7, 0xd16657e1,
+ .word 0x3fe04104, 0x10410410, 0x3fc6ce69, 0x31d5858d,
+ .word 0x3fe02040, 0x81020408, 0x3fc6b761, 0x2ec892f6,
+
+ .word 0x000fffff, 0xffffffff ! DC0
+ .word 0x3ff00000, 0 ! DC1
+ .word 0x7fffc000, 0 ! DC2
+ .word 0x7fe00000, 0 ! DA0
+ .word 0x60000000, 0 ! DA1
+ .word 0x80808080, 0x3f800000 ! SCALE , FONE = 1.0f
+ .word 0x3fefffff, 0xfee7f18f ! KA0 = 9.99999997962321453275e-01
+ .word 0xbfdfffff, 0xfe07e52f ! KA1 = -4.99999998166077580600e-01
+ .word 0x3fd80118, 0x0ca296d9 ! KA2 = 3.75066768969515586277e-01
+ .word 0xbfd400fc, 0x0bbb8e78 ! KA3 = -3.12560092408808548438e-01
+
+#define _0x7f800000 %o0
+#define _0x7fffffff %o7
+#define TBL %l2
+
+#define TBL_SHIFT 2048
+
+#define stridex %l3
+#define stridey %l4
+#define stridez %l5
+#define counter %i0
+
+#define DA0 %f52
+#define DA1 %f44
+#define SCALE %f6
+
+#define DC0 %f46
+#define DC1 %f8
+#define FZERO %f9
+#define DC2 %f50
+
+#define KA3 %f56
+#define KA2 %f58
+#define KA1 %f60
+#define KA0 %f54
+
+#define tmp_counter STACK_BIAS-0x04
+#define tmp_px STACK_BIAS-0x20
+#define tmp_py STACK_BIAS-0x18
+
+#define ftmp0 STACK_BIAS-0x10
+#define ftmp1 STACK_BIAS-0x0c
+#define ftmp2 STACK_BIAS-0x10
+#define ftmp3 STACK_BIAS-0x0c
+#define ftmp4 STACK_BIAS-0x08
+
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x20
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! !!!!! algorithm !!!!!
+! x0 = *px;
+! ax = *(int*)px;
+!
+! y0 = *py;
+! ay = *(int*)py;
+!
+! ax &= 0x7fffffff;
+! ay &= 0x7fffffff;
+!
+! px += stridex;
+! py += stridey;
+!
+! if ( ax >= 0x7f800000 || ay >= 0x7f800000 )
+! {
+! *pz = fabsf(x0) * fabsf(y0);
+! if( ax == 0x7f800000 ) *pz = 0.0f;
+! else if( ay == 0x7f800000 ) *pz = 0.0f;
+! pz += stridez;
+! continue;
+! }
+!
+! if ( ay == 0 )
+! {
+! if ( ax == 0 )
+! {
+! *pz = 1.0f / 0.0f;
+! pz += stridez;
+! continue;
+! }
+! }
+!
+! hyp0 = x0 * (double)x0;
+! dtmp0 = y0 * (double)y0;
+! hyp0 += dtmp0;
+!
+! ibase0 = ((int*)&hyp0)[0];
+!
+! dbase0 = vis_fand(hyp0,DA0);
+! dbase0 = vis_fmul8x16(SCALE, dbase0);
+! dbase0 = vis_fpsub32(DA1,dbase0);
+!
+! hyp0 = vis_fand(hyp0,DC0);
+! hyp0 = vis_for(hyp0,DC1);
+! h_hi0 = vis_fand(hyp0,DC2);
+!
+! ibase0 >>= 10;
+! si0 = ibase0 & 0x7f0;
+! xx0 = ((double*)((char*)TBL + si0))[0];
+!
+! dtmp1 = hyp0 - h_hi0;
+! xx0 = dtmp1 * xx0;
+! res0 = ((double*)((char*)arr + si0))[1];
+! dtmp2 = KA3 * xx0;
+! dtmp2 += KA2;
+! dtmp2 *= xx0;
+! dtmp2 += KA1;
+! dtmp2 *= xx0;
+! dtmp2 += KA0;
+! res0 *= dtmp2;
+! res0 *= dbase0;
+! ftmp0 = (float)res0;
+! *pz = ftmp0;
+! pz += stridez;
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ ENTRY(__vrhypotf)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,.CONST_TBL,l2)
+ wr %g0,0x82,%asi
+
+#ifdef __sparcv9
+ ldx [%fp+STACK_BIAS+176],stridez
+#else
+ ld [%fp+STACK_BIAS+92],stridez
+#endif
+
+ stx %i1,[%fp+tmp_px]
+ sll %i2,2,stridex
+
+ stx %i3,[%fp+tmp_py]
+ sll %i4,2,stridey
+
+ st %i0,[%fp+tmp_counter]
+ sll stridez,2,stridez
+ mov %i5,%o1
+
+ ldd [TBL+TBL_SHIFT],DC0
+ ldd [TBL+TBL_SHIFT+8],DC1
+ ldd [TBL+TBL_SHIFT+16],DC2
+ ldd [TBL+TBL_SHIFT+24],DA0
+ ldd [TBL+TBL_SHIFT+32],DA1
+ ldd [TBL+TBL_SHIFT+40],SCALE
+ ldd [TBL+TBL_SHIFT+48],KA0
+
+ ldd [TBL+TBL_SHIFT+56],KA1
+ sethi %hi(0x7f800000),%o0
+
+ ldd [TBL+TBL_SHIFT+64],KA2
+ sethi %hi(0x7ffffc00),%o7
+
+ ldd [TBL+TBL_SHIFT+72],KA3
+ add %o7,1023,%o7
+
+.begin:
+ ld [%fp+tmp_counter],counter
+ ldx [%fp+tmp_px],%o4
+ ldx [%fp+tmp_py],%i2
+ st %g0,[%fp+tmp_counter]
+.begin1:
+ cmp counter,0
+ ble,pn %icc,.exit
+ nop
+
+ lda [%i2]0x82,%l6 ! (3_0) ay = *(int*)py;
+
+ lda [%o4]0x82,%i5 ! (3_0) ax = *(int*)px;
+
+ lda [%i2]0x82,%f2 ! (3_0) y0 = *py;
+ and %l6,_0x7fffffff,%l6 ! (3_0) ay &= 0x7fffffff;
+
+ and %i5,_0x7fffffff,%i5 ! (3_0) ax &= 0x7fffffff;
+ cmp %l6,_0x7f800000 ! (3_0) ay ? 0x7f800000
+ bge,pn %icc,.spec0 ! (3_0) if ( ay >= 0x7f800000 )
+ lda [%o4]0x82,%f4 ! (3_0) x0 = *px;
+
+ cmp %i5,_0x7f800000 ! (3_0) ax ? 0x7f800000
+ bge,pn %icc,.spec0 ! (3_0) if ( ax >= 0x7f800000 )
+ nop
+
+ cmp %l6,0 ! (3_0)
+ be,pn %icc,.spec1 ! (3_0) if ( ay == 0 )
+ fsmuld %f4,%f4,%f36 ! (3_0) hyp0 = x0 * (double)x0;
+.cont_spec1:
+ lda [%i2+stridey]0x82,%l6 ! (4_0) ay = *(int*)py;
+
+ fsmuld %f2,%f2,%f62 ! (3_0) dtmp0 = y0 * (double)y0;
+ lda [stridex+%o4]0x82,%i5 ! (4_0) ax = *(int*)px;
+
+ add %o4,stridex,%l0 ! px += stridex
+
+ add %i2,stridey,%i2 ! py += stridey
+ and %l6,_0x7fffffff,%l6 ! (4_0) ay &= 0x7fffffff;
+
+ and %i5,_0x7fffffff,%i5 ! (4_0) ax &= 0x7fffffff;
+ lda [%i2]0x82,%f2 ! (4_0) y0 = *py;
+
+ faddd %f36,%f62,%f20 ! (3_0) hyp0 += dtmp0;
+ cmp %l6,_0x7f800000 ! (4_0) ay ? 0x7f800000
+
+ bge,pn %icc,.update0 ! (4_0) if ( ay >= 0x7f800000 )
+ lda [stridex+%o4]0x82,%f4 ! (4_0) x0 = *px;
+.cont0:
+ cmp %i5,_0x7f800000 ! (4_0) ax ? 0x7f800000
+ bge,pn %icc,.update1 ! (4_0) if ( ax >= 0x7f800000 )
+ st %f20,[%fp+ftmp4] ! (3_0) ibase0 = ((int*)&hyp0)[0];
+.cont1:
+ cmp %l6,0 ! (4_1) ay ? 0
+ be,pn %icc,.update2 ! (4_1) if ( ay == 0 )
+ fsmuld %f4,%f4,%f38 ! (4_1) hyp0 = x0 * (double)x0;
+.cont2:
+ lda [%i2+stridey]0x82,%l6 ! (0_0) ay = *(int*)py;
+
+ fsmuld %f2,%f2,%f62 ! (4_1) dtmp0 = y0 * (double)y0;
+ lda [%l0+stridex]0x82,%i5 ! (0_0) ax = *(int*)px;
+
+ add %l0,stridex,%i1 ! px += stridex
+
+ add %i2,stridey,%i2 ! py += stridey
+ and %l6,_0x7fffffff,%l6 ! (0_0) ay &= 0x7fffffff;
+
+ and %i5,_0x7fffffff,%i5 ! (0_0) ax &= 0x7fffffff;
+ lda [%i2]0x82,%f2 ! (0_0) y0 = *py;
+
+ cmp %l6,_0x7f800000 ! (0_0) ay ? 0x7f800000
+ bge,pn %icc,.update3 ! (0_0) if ( ay >= 0x7f800000 )
+ faddd %f38,%f62,%f12 ! (4_1) hyp0 += dtmp0;
+.cont3:
+ lda [%i1]0x82,%f4 ! (0_0) x0 = *px;
+
+ cmp %i5,_0x7f800000 ! (0_0) ax ? 0x7f800000
+ bge,pn %icc,.update4 ! (0_0) if ( ax >= 0x7f800000 )
+ st %f12,[%fp+ftmp0] ! (4_1) ibase0 = ((int*)&hyp0)[0];
+.cont4:
+ cmp %l6,0 ! (0_0) ay ? 0
+ be,pn %icc,.update5 ! (0_0) if ( ay == 0 )
+ fsmuld %f4,%f4,%f38 ! (0_0) hyp0 = x0 * (double)x0;
+.cont5:
+ lda [%i2+stridey]0x82,%l6 ! (1_0) ay = *(int*)py;
+
+ fsmuld %f2,%f2,%f62 ! (0_0) dtmp0 = y0 * (double)y0;
+ lda [%i1+stridex]0x82,%i5 ! (1_0) ax = *(int*)px;
+
+ add %i1,stridex,%g5 ! px += stridex
+
+ add %i2,stridey,%o3 ! py += stridey
+ and %l6,_0x7fffffff,%l6 ! (1_0) ay &= 0x7fffffff;
+ fand %f20,DC0,%f30 ! (3_1) hyp0 = vis_fand(hyp0,DC0);
+
+ and %i5,_0x7fffffff,%i5 ! (1_0) ax &= 0x7fffffff;
+ lda [%o3]0x82,%f2 ! (1_0) y0 = *py;
+
+ faddd %f38,%f62,%f14 ! (0_0) hyp0 += dtmp0;
+ cmp %l6,_0x7f800000 ! (1_0) ay ? 0x7f800000
+
+ lda [%g5]0x82,%f4 ! (1_0) x0 = *px;
+ bge,pn %icc,.update6 ! (1_0) if ( ay >= 0x7f800000 )
+ for %f30,DC1,%f28 ! (3_1) hyp0 = vis_for(hyp0,DC1);
+.cont6:
+ cmp %i5,_0x7f800000 ! (1_0) ax ? 0x7f800000
+ bge,pn %icc,.update7 ! (1_0) if ( ax >= 0x7f800000 )
+ ld [%fp+ftmp4],%l1 ! (3_1) ibase0 = ((int*)&hyp0)[0];
+.cont7:
+ st %f14,[%fp+ftmp1] ! (0_0) ibase0 = ((int*)&hyp0)[0];
+
+ cmp %l6,0 ! (1_0) ay ? 0
+ be,pn %icc,.update8 ! (1_0) if ( ay == 0 )
+ fand %f28,DC2,%f30 ! (3_1) h_hi0 = vis_fand(hyp0,DC2);
+.cont8:
+ fsmuld %f4,%f4,%f38 ! (1_0) hyp0 = x0 * (double)x0;
+ sra %l1,10,%o5 ! (3_1) ibase0 >>= 10;
+
+ and %o5,2032,%o4 ! (3_1) si0 = ibase0 & 0x7f0;
+ lda [%o3+stridey]0x82,%l6 ! (2_0) ay = *(int*)py;
+
+ fsmuld %f2,%f2,%f62 ! (1_0) dtmp0 = y0 * (double)y0;
+ add %o4,TBL,%l7 ! (3_1) (char*)TBL + si0
+ lda [stridex+%g5]0x82,%i5 ! (2_0) ax = *(int*)px;
+ fsubd %f28,%f30,%f28 ! (3_1) dtmp1 = hyp0 - h_hi0;
+
+ add %g5,stridex,%i4 ! px += stridex
+ ldd [TBL+%o4],%f42 ! (3_1) xx0 = ((double*)((char*)TBL + si0))[0];
+
+ and %l6,_0x7fffffff,%l6 ! (2_0) ay &= 0x7fffffff;
+ add %o3,stridey,%i2 ! py += stridey
+ fand %f12,DC0,%f30 ! (4_1) hyp0 = vis_fand(hyp0,DC0);
+
+ and %i5,_0x7fffffff,%i5 ! (2_0) ax &= 0x7fffffff;
+ lda [%i2]0x82,%f2 ! (2_0) y0 = *py;
+
+ faddd %f38,%f62,%f16 ! (1_0) hyp0 += dtmp0;
+ cmp %l6,_0x7f800000 ! (2_0) ay ? 0x7f800000
+ fmuld %f28,%f42,%f26 ! (3_1) xx0 = dtmp1 * xx0;
+
+ lda [stridex+%g5]0x82,%f4 ! (2_0) x0 = *px;
+ bge,pn %icc,.update9 ! (2_0) if ( ay >= 0x7f800000
+ for %f30,DC1,%f28 ! (4_1) hyp0 = vis_for(hyp0,DC1);
+.cont9:
+ cmp %i5,_0x7f800000 ! (2_0) ax ? 0x7f800000
+ bge,pn %icc,.update10 ! (2_0) if ( ax >= 0x7f800000 )
+ ld [%fp+ftmp0],%i3 ! (4_1) ibase0 = ((int*)&hyp0)[0];
+.cont10:
+ st %f16,[%fp+ftmp2] ! (1_0) ibase0 = ((int*)&hyp0)[0];
+
+ fmuld KA3,%f26,%f34 ! (3_1) dtmp2 = KA3 * xx0;
+ cmp %l6,0 ! (2_0) ay ? 0
+ be,pn %icc,.update11 ! (2_0) if ( ay == 0 )
+ fand %f28,DC2,%f30 ! (4_1) h_hi0 = vis_fand(hyp0,DC2);
+.cont11:
+ fsmuld %f4,%f4,%f36 ! (2_0) hyp0 = x0 * (double)x0;
+ sra %i3,10,%i3 ! (4_1) ibase0 >>= 10;
+
+ and %i3,2032,%i3 ! (4_1) si0 = ibase0 & 0x7f0;
+ lda [%i2+stridey]0x82,%l6 ! (3_0) ay = *(int*)py;
+
+ fsmuld %f2,%f2,%f62 ! (2_0) dtmp0 = y0 * (double)y0;
+ add %i3,TBL,%i3 ! (4_1) (char*)TBL + si0
+ lda [%i4+stridex]0x82,%i5 ! (3_0) ax = *(int*)px;
+ fsubd %f28,%f30,%f28 ! (4_1) dtmp1 = hyp0 - h_hi0;
+
+ add %i4,stridex,%o4 ! px += stridex
+ ldd [%i3],%f42 ! (4_1) xx0 = ((double*)((char*)TBL + si0))[0];
+ faddd %f34,KA2,%f10 ! (3_1) dtmp2 += KA2;
+
+ add %i2,stridey,%i2 ! py += stridey
+ and %l6,_0x7fffffff,%l6 ! (3_0) ay &= 0x7fffffff;
+ fand %f14,DC0,%f30 ! (0_0) hyp0 = vis_fand(hyp0,DC0);
+
+ and %i5,_0x7fffffff,%i5 ! (3_0) ax &= 0x7fffffff;
+ lda [%i2]0x82,%f2 ! (3_0) y0 = *py;
+
+ faddd %f36,%f62,%f18 ! (2_0) hyp0 += dtmp0;
+ cmp %l6,_0x7f800000 ! (3_0) ay ? 0x7f800000
+ fmuld %f28,%f42,%f32 ! (4_1) xx0 = dtmp1 * xx0;
+
+ fmuld %f10,%f26,%f10 ! (3_1) dtmp2 *= xx0;
+ lda [%o4]0x82,%f4 ! (3_0) x0 = *px;
+ bge,pn %icc,.update12 ! (3_0) if ( ay >= 0x7f800000 )
+ for %f30,DC1,%f28 ! (0_0) hyp0 = vis_for(hyp0,DC1);
+.cont12:
+ cmp %i5,_0x7f800000 ! (3_0) ax ? 0x7f800000
+ bge,pn %icc,.update13 ! (3_0) if ( ax >= 0x7f800000 )
+ ld [%fp+ftmp1],%i1 ! (0_0) ibase0 = ((int*)&hyp0)[0];
+.cont13:
+ st %f18,[%fp+ftmp3] ! (2_0) ibase0 = ((int*)&hyp0)[0];
+
+ fmuld KA3,%f32,%f34 ! (4_1) dtmp2 = KA3 * xx0;
+ cmp %l6,0 ! (3_0)
+ be,pn %icc,.update14 ! (3_0) if ( ay == 0 )
+ fand %f28,DC2,%f30 ! (0_0) h_hi0 = vis_fand(hyp0,DC2);
+.cont14:
+ fsmuld %f4,%f4,%f36 ! (3_0) hyp0 = x0 * (double)x0;
+ sra %i1,10,%l1 ! (0_0) ibase0 >>= 10;
+ faddd %f10,KA1,%f40 ! (3_1) dtmp2 += KA1;
+
+ and %l1,2032,%o5 ! (0_0) si0 = ibase0 & 0x7f0;
+ lda [%i2+stridey]0x82,%l6 ! (4_0) ay = *(int*)py;
+
+ fsmuld %f2,%f2,%f62 ! (3_0) dtmp0 = y0 * (double)y0;
+ add %o5,TBL,%l1 ! (0_0) (char*)TBL + si0
+ lda [stridex+%o4]0x82,%i5 ! (4_0) ax = *(int*)px;
+ fsubd %f28,%f30,%f28 ! (0_0) dtmp1 = hyp0 - h_hi0;
+
+ add %o4,stridex,%l0 ! px += stridex
+ ldd [TBL+%o5],%f42 ! (0_0) xx0 = ((double*)((char*)TBL + si0))[0];
+ faddd %f34,KA2,%f10 ! (4_1) dtmp2 += KA2;
+
+ fmuld %f40,%f26,%f40 ! (3_1) dtmp2 *= xx0;
+ add %i2,stridey,%i2 ! py += stridey
+ and %l6,_0x7fffffff,%l6 ! (4_0) ay &= 0x7fffffff;
+ fand %f16,DC0,%f30 ! (1_0) hyp0 = vis_fand(hyp0,DC0);
+
+ and %i5,_0x7fffffff,%i5 ! (4_0) ax &= 0x7fffffff;
+ lda [%i2]0x82,%f2 ! (4_0) y0 = *py;
+ fand %f20,DA0,%f24 ! (3_1) dbase0 = vis_fand(hyp0,DA0);
+
+ faddd %f36,%f62,%f20 ! (3_0) hyp0 += dtmp0;
+ cmp %l6,_0x7f800000 ! (4_0) ay ? 0x7f800000
+ ldd [%l7+8],%f36 ! (3_1) res0 = ((double*)((char*)arr + si0))[1];
+ fmuld %f28,%f42,%f26 ! (0_0) xx0 = dtmp1 * xx0;
+
+ fmuld %f10,%f32,%f10 ! (4_1) dtmp2 *= xx0;
+ lda [stridex+%o4]0x82,%f4 ! (4_0) x0 = *px;
+ bge,pn %icc,.update15 ! (4_0) if ( ay >= 0x7f800000 )
+ for %f30,DC1,%f28 ! (1_0) hyp0 = vis_for(hyp0,DC1);
+.cont15:
+ fmul8x16 SCALE,%f24,%f24 ! (3_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
+ cmp %i5,_0x7f800000 ! (4_0) ax ? 0x7f800000
+ ld [%fp+ftmp2],%i1 ! (1_0) ibase0 = ((int*)&hyp0)[0];
+ faddd %f40,KA0,%f62 ! (3_1) dtmp2 += KA0;
+
+ bge,pn %icc,.update16 ! (4_0) if ( ax >= 0x7f800000 )
+ st %f20,[%fp+ftmp4] ! (3_0) ibase0 = ((int*)&hyp0)[0];
+.cont16:
+ fmuld KA3,%f26,%f34 ! (0_0) dtmp2 = KA3 * xx0;
+ fand %f28,DC2,%f30 ! (1_0) h_hi0 = vis_fand(hyp0,DC2);
+
+ mov %o1,%i4
+ cmp counter,5
+ bl,pn %icc,.tail
+ nop
+
+ ba .main_loop
+ sub counter,5,counter
+
+ .align 16
+.main_loop:
+ fsmuld %f4,%f4,%f38 ! (4_1) hyp0 = x0 * (double)x0;
+ sra %i1,10,%o2 ! (1_1) ibase0 >>= 10;
+ cmp %l6,0 ! (4_1) ay ? 0
+ faddd %f10,KA1,%f40 ! (4_2) dtmp2 += KA1;
+
+ fmuld %f36,%f62,%f36 ! (3_2) res0 *= dtmp2;
+ and %o2,2032,%o2 ! (1_1) si0 = ibase0 & 0x7f0;
+ lda [%i2+stridey]0x82,%l6 ! (0_0) ay = *(int*)py;
+ fpsub32 DA1,%f24,%f24 ! (3_2) dbase0 = vis_fpsub32(DA1,dbase0);
+
+ fsmuld %f2,%f2,%f62 ! (4_1) dtmp0 = y0 * (double)y0;
+ add %o2,TBL,%o2 ! (1_1) (char*)TBL + si0
+ lda [%l0+stridex]0x82,%o1 ! (0_0) ax = *(int*)px;
+ fsubd %f28,%f30,%f28 ! (1_1) dtmp1 = hyp0 - h_hi0;
+
+ add %l0,stridex,%i1 ! px += stridex
+ ldd [%o2],%f42 ! (1_1) xx0 = ((double*)((char*)TBL + si0))[0];
+ be,pn %icc,.update17 ! (4_1) if ( ay == 0 )
+ faddd %f34,KA2,%f10 ! (0_1) dtmp2 += KA2;
+.cont17:
+ fmuld %f40,%f32,%f40 ! (4_2) dtmp2 *= xx0;
+ add %i2,stridey,%i2 ! py += stridey
+ and %l6,_0x7fffffff,%l6 ! (0_0) ay &= 0x7fffffff;
+ fand %f18,DC0,%f30 ! (2_1) hyp0 = vis_fand(hyp0,DC0);
+
+ fmuld %f36,%f24,%f32 ! (3_2) res0 *= dbase0;
+ and %o1,_0x7fffffff,%o1 ! (0_0) ax &= 0x7fffffff;
+ lda [%i2]0x82,%f2 ! (0_0) y0 = *py;
+ fand %f12,DA0,%f24 ! (4_2) dbase0 = vis_fand(hyp0,DA0);
+
+ faddd %f38,%f62,%f12 ! (4_1) hyp0 += dtmp0;
+ cmp %l6,_0x7f800000 ! (0_0) ay ? 0x7f800000
+ ldd [%i3+8],%f62 ! (4_2) res0 = ((double*)((char*)arr + si0))[1];
+ fmuld %f28,%f42,%f36 ! (1_1) xx0 = dtmp1 * xx0;
+
+ fmuld %f10,%f26,%f10 ! (0_1) dtmp2 *= xx0;
+ lda [%i1]0x82,%f4 ! (0_0) x0 = *px;
+ bge,pn %icc,.update18 ! (0_0) if ( ay >= 0x7f800000 )
+ for %f30,DC1,%f28 ! (2_1) hyp0 = vis_for(hyp0,DC1);
+.cont18:
+ fmul8x16 SCALE,%f24,%f24 ! (4_2) dbase0 = vis_fmul8x16(SCALE, dbase0);
+ cmp %o1,_0x7f800000 ! (0_0) ax ? 0x7f800000
+ ld [%fp+ftmp3],%l0 ! (2_1) ibase0 = ((int*)&hyp0)[0];
+ faddd %f40,KA0,%f42 ! (4_2) dtmp2 += KA0;
+
+ add %i4,stridez,%i3 ! pz += stridez
+ st %f12,[%fp+ftmp0] ! (4_1) ibase0 = ((int*)&hyp0)[0];
+ bge,pn %icc,.update19 ! (0_0) if ( ax >= 0x7f800000 )
+ fdtos %f32,%f1 ! (3_2) ftmp0 = (float)res0;
+.cont19:
+ fmuld KA3,%f36,%f34 ! (1_1) dtmp2 = KA3 * xx0;
+ cmp %l6,0 ! (0_0) ay ? 0
+ st %f1,[%i4] ! (3_2) *pz = ftmp0;
+ fand %f28,DC2,%f30 ! (2_1) h_hi0 = vis_fand(hyp0,DC2);
+
+ fsmuld %f4,%f4,%f38 ! (0_0) hyp0 = x0 * (double)x0;
+ sra %l0,10,%i4 ! (2_1) ibase0 >>= 10;
+ be,pn %icc,.update20 ! (0_0) if ( ay == 0 )
+ faddd %f10,KA1,%f40 ! (0_1) dtmp2 += KA1;
+.cont20:
+ fmuld %f62,%f42,%f32 ! (4_2) res0 *= dtmp2;
+ and %i4,2032,%g1 ! (2_1) si0 = ibase0 & 0x7f0;
+ lda [%i2+stridey]0x82,%l6 ! (1_0) ay = *(int*)py;
+ fpsub32 DA1,%f24,%f24 ! (4_2) dbase0 = vis_fpsub32(DA1,dbase0);
+
+ fsmuld %f2,%f2,%f62 ! (0_0) dtmp0 = y0 * (double)y0;
+ add %g1,TBL,%l0 ! (2_1) (char*)TBL + si0
+ lda [%i1+stridex]0x82,%i5 ! (1_0) ax = *(int*)px;
+ fsubd %f28,%f30,%f28 ! (2_1) dtmp1 = hyp0 - h_hi0;
+
+ nop
+ add %i1,stridex,%g5 ! px += stridex
+ ldd [TBL+%g1],%f42 ! (2_1) xx0 = ((double*)((char*)TBL + si0))[0];
+ faddd %f34,KA2,%f10 ! (1_1) dtmp2 += KA2;
+
+ fmuld %f40,%f26,%f40 ! (0_1) dtmp2 *= xx0;
+ add %i2,stridey,%o3 ! py += stridey
+ and %l6,_0x7fffffff,%l6 ! (1_0) ay &= 0x7fffffff;
+ fand %f20,DC0,%f30 ! (3_1) hyp0 = vis_fand(hyp0,DC0);
+
+ fmuld %f32,%f24,%f26 ! (4_2) res0 *= dbase0;
+ and %i5,_0x7fffffff,%i5 ! (1_0) ax &= 0x7fffffff;
+ lda [%o3]0x82,%f2 ! (1_0) y0 = *py;
+ fand %f14,DA0,%f24 ! (0_1) dbase0 = vis_fand(hyp0,DA0);
+
+ faddd %f38,%f62,%f14 ! (0_0) hyp0 += dtmp0;
+ cmp %l6,_0x7f800000 ! (1_0) ay ? 0x7f800000
+ ldd [%l1+8],%f62 ! (0_1) res0 = ((double*)((char*)arr + si0))[1];
+ fmuld %f28,%f42,%f32 ! (2_1) xx0 = dtmp1 * xx0;
+
+ fmuld %f10,%f36,%f10 ! (1_1) dtmp2 *= xx0;
+ lda [%g5]0x82,%f4 ! (1_0) x0 = *px;
+ bge,pn %icc,.update21 ! (1_0) if ( ay >= 0x7f800000 )
+ for %f30,DC1,%f28 ! (3_1) hyp0 = vis_for(hyp0,DC1);
+.cont21:
+ fmul8x16 SCALE,%f24,%f24 ! (0_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
+ cmp %i5,_0x7f800000 ! (1_0) ax ? 0x7f800000
+ ld [%fp+ftmp4],%l1 ! (3_1) ibase0 = ((int*)&hyp0)[0];
+ faddd %f40,KA0,%f42 ! (0_1) dtmp2 += KA0
+
+ add %i3,stridez,%o1 ! pz += stridez
+ st %f14,[%fp+ftmp1] ! (0_0) ibase0 = ((int*)&hyp0)[0];
+ bge,pn %icc,.update22 ! (1_0) if ( ax >= 0x7f800000 )
+ fdtos %f26,%f1 ! (4_2) ftmp0 = (float)res0;
+.cont22:
+ fmuld KA3,%f32,%f34 ! (2_1) dtmp2 = KA3 * xx0;
+ cmp %l6,0 ! (1_0) ay ? 0
+ st %f1,[%i3] ! (4_2) *pz = ftmp0;
+ fand %f28,DC2,%f30 ! (3_1) h_hi0 = vis_fand(hyp0,DC2);
+
+ fsmuld %f4,%f4,%f38 ! (1_0) hyp0 = x0 * (double)x0;
+ sra %l1,10,%o5 ! (3_1) ibase0 >>= 10;
+ be,pn %icc,.update23 ! (1_0) if ( ay == 0 )
+ faddd %f10,KA1,%f40 ! (1_1) dtmp2 += KA1;
+.cont23:
+ fmuld %f62,%f42,%f26 ! (0_1) res0 *= dtmp2;
+ and %o5,2032,%o4 ! (3_1) si0 = ibase0 & 0x7f0;
+ lda [%o3+stridey]0x82,%l6 ! (2_0) ay = *(int*)py;
+ fpsub32 DA1,%f24,%f24 ! (0_1) dbase0 = vis_fpsub32(DA1,dbase0);
+
+ fsmuld %f2,%f2,%f62 ! (1_0) dtmp0 = y0 * (double)y0;
+ add %o4,TBL,%l7 ! (3_1) (char*)TBL + si0
+ lda [stridex+%g5]0x82,%i5 ! (2_0) ax = *(int*)px;
+ fsubd %f28,%f30,%f28 ! (3_1) dtmp1 = hyp0 - h_hi0;
+
+ nop
+ add %g5,stridex,%i4 ! px += stridex
+ ldd [TBL+%o4],%f42 ! (3_1) xx0 = ((double*)((char*)TBL + si0))[0];
+ faddd %f34,KA2,%f10 ! (2_1) dtmp2 += KA2;
+
+ fmuld %f40,%f36,%f40 ! (1_1) dtmp2 *= xx0;
+ and %l6,_0x7fffffff,%l6 ! (2_0) ay &= 0x7fffffff;
+ add %o3,stridey,%i2 ! py += stridey
+ fand %f12,DC0,%f30 ! (4_1) hyp0 = vis_fand(hyp0,DC0);
+
+ fmuld %f26,%f24,%f36 ! (0_1) res0 *= dbase0;
+ and %i5,_0x7fffffff,%i5 ! (2_0) ax &= 0x7fffffff;
+ lda [%i2]0x82,%f2 ! (2_0) y0 = *py;
+ fand %f16,DA0,%f24 ! (1_1) dbase0 = vis_fand(hyp0,DA0);
+
+ faddd %f38,%f62,%f16 ! (1_0) hyp0 += dtmp0;
+ cmp %l6,_0x7f800000 ! (2_0) ay ? 0x7f800000
+ ldd [%o2+8],%f38 ! (1_1) res0 = ((double*)((char*)arr + si0))[1];
+ fmuld %f28,%f42,%f26 ! (3_1) xx0 = dtmp1 * xx0;
+
+ fmuld %f10,%f32,%f10 ! (2_1) dtmp2 *= xx0;
+ lda [stridex+%g5]0x82,%f4 ! (2_0) x0 = *px;
+ bge,pn %icc,.update24 ! (2_0) if ( ay >= 0x7f800000
+ for %f30,DC1,%f28 ! (4_1) hyp0 = vis_for(hyp0,DC1);
+.cont24:
+ fmul8x16 SCALE,%f24,%f24 ! (1_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
+ cmp %i5,_0x7f800000 ! (2_0) ax ? 0x7f800000
+ ld [%fp+ftmp0],%i3 ! (4_1) ibase0 = ((int*)&hyp0)[0];
+ faddd %f40,KA0,%f62 ! (1_1) dtmp2 += KA0;
+
+ add %o1,stridez,%g1 ! pz += stridez
+ st %f16,[%fp+ftmp2] ! (1_0) ibase0 = ((int*)&hyp0)[0];
+ bge,pn %icc,.update25 ! (2_0) if ( ax >= 0x7f800000 )
+ fdtos %f36,%f1 ! (0_1) ftmp0 = (float)res0;
+.cont25:
+ fmuld KA3,%f26,%f34 ! (3_1) dtmp2 = KA3 * xx0;
+ cmp %l6,0 ! (2_0) ay ? 0
+ st %f1,[%o1] ! (0_1) *pz = ftmp0;
+ fand %f28,DC2,%f30 ! (4_1) h_hi0 = vis_fand(hyp0,DC2);
+
+ fsmuld %f4,%f4,%f36 ! (2_0) hyp0 = x0 * (double)x0;
+ sra %i3,10,%i3 ! (4_1) ibase0 >>= 10;
+ be,pn %icc,.update26 ! (2_0) if ( ay == 0 )
+ faddd %f10,KA1,%f40 ! (2_1) dtmp2 += KA1;
+.cont26:
+ fmuld %f38,%f62,%f38 ! (1_1) res0 *= dtmp2;
+ and %i3,2032,%i3 ! (4_1) si0 = ibase0 & 0x7f0;
+ lda [%i2+stridey]0x82,%l6 ! (3_0) ay = *(int*)py;
+ fpsub32 DA1,%f24,%f24 ! (1_1) dbase0 = vis_fpsub32(DA1,dbase0);
+
+ fsmuld %f2,%f2,%f62 ! (2_0) dtmp0 = y0 * (double)y0;
+ add %i3,TBL,%i3 ! (4_1) (char*)TBL + si0
+ lda [%i4+stridex]0x82,%i5 ! (3_0) ax = *(int*)px;
+ fsubd %f28,%f30,%f28 ! (4_1) dtmp1 = hyp0 - h_hi0;
+
+ nop
+ add %i4,stridex,%o4 ! px += stridex
+ ldd [%i3],%f42 ! (4_1) xx0 = ((double*)((char*)TBL + si0))[0];
+ faddd %f34,KA2,%f10 ! (3_1) dtmp2 += KA2;
+
+ fmuld %f40,%f32,%f40 ! (2_1) dtmp2 *= xx0;
+ add %i2,stridey,%i2 ! py += stridey
+ and %l6,_0x7fffffff,%l6 ! (3_0) ay &= 0x7fffffff;
+ fand %f14,DC0,%f30 ! (0_0) hyp0 = vis_fand(hyp0,DC0);
+
+ fmuld %f38,%f24,%f38 ! (1_1) res0 *= dbase0;
+ and %i5,_0x7fffffff,%i5 ! (3_0) ax &= 0x7fffffff;
+ lda [%i2]0x82,%f2 ! (3_0) y0 = *py;
+ fand %f18,DA0,%f24 ! (2_1) dbase0 = vis_fand(hyp0,DA0);
+
+ faddd %f36,%f62,%f18 ! (2_0) hyp0 += dtmp0;
+ cmp %l6,_0x7f800000 ! (3_0) ay ? 0x7f800000
+ ldd [%l0+8],%f62 ! (2_1) res0 = ((double*)((char*)arr + si0))[1];
+ fmuld %f28,%f42,%f32 ! (4_1) xx0 = dtmp1 * xx0;
+
+ fmuld %f10,%f26,%f10 ! (3_1) dtmp2 *= xx0;
+ lda [%o4]0x82,%f4 ! (3_0) x0 = *px;
+ bge,pn %icc,.update27 ! (3_0) if ( ay >= 0x7f800000 )
+ for %f30,DC1,%f28 ! (0_0) hyp0 = vis_for(hyp0,DC1);
+.cont27:
+ fmul8x16 SCALE,%f24,%f24 ! (2_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
+ cmp %i5,_0x7f800000 ! (3_0) ax ? 0x7f800000
+ ld [%fp+ftmp1],%i1 ! (0_0) ibase0 = ((int*)&hyp0)[0];
+ faddd %f40,KA0,%f42 ! (2_1) dtmp2 += KA0;
+
+ add %g1,stridez,%o3 ! pz += stridez
+ st %f18,[%fp+ftmp3] ! (2_0) ibase0 = ((int*)&hyp0)[0];
+ bge,pn %icc,.update28 ! (3_0) if ( ax >= 0x7f800000 )
+ fdtos %f38,%f1 ! (1_1) ftmp0 = (float)res0;
+.cont28:
+ fmuld KA3,%f32,%f34 ! (4_1) dtmp2 = KA3 * xx0;
+ cmp %l6,0 ! (3_0)
+ st %f1,[%g1] ! (1_1) *pz = ftmp0;
+ fand %f28,DC2,%f30 ! (0_0) h_hi0 = vis_fand(hyp0,DC2);
+
+ fsmuld %f4,%f4,%f36 ! (3_0) hyp0 = x0 * (double)x0;
+ sra %i1,10,%l1 ! (0_0) ibase0 >>= 10;
+ be,pn %icc,.update29 ! (3_0) if ( ay == 0 )
+ faddd %f10,KA1,%f40 ! (3_1) dtmp2 += KA1;
+.cont29:
+ fmuld %f62,%f42,%f38 ! (2_1) res0 *= dtmp2;
+ and %l1,2032,%o5 ! (0_0) si0 = ibase0 & 0x7f0;
+ lda [%i2+stridey]0x82,%l6 ! (4_0) ay = *(int*)py;
+ fpsub32 DA1,%f24,%f24 ! (2_1) dbase0 = vis_fpsub32(DA1,dbase0);
+
+ fsmuld %f2,%f2,%f62 ! (3_0) dtmp0 = y0 * (double)y0;
+ add %o5,TBL,%l1 ! (0_0) (char*)TBL + si0
+ lda [stridex+%o4]0x82,%i5 ! (4_0) ax = *(int*)px;
+ fsubd %f28,%f30,%f28 ! (0_0) dtmp1 = hyp0 - h_hi0;
+
+ add %o3,stridez,%i4 ! pz += stridez
+ add %o4,stridex,%l0 ! px += stridex
+ ldd [TBL+%o5],%f42 ! (0_0) xx0 = ((double*)((char*)TBL + si0))[0];
+ faddd %f34,KA2,%f10 ! (4_1) dtmp2 += KA2;
+
+ fmuld %f40,%f26,%f40 ! (3_1) dtmp2 *= xx0;
+ add %i2,stridey,%i2 ! py += stridey
+ and %l6,_0x7fffffff,%l6 ! (4_0) ay &= 0x7fffffff;
+ fand %f16,DC0,%f30 ! (1_0) hyp0 = vis_fand(hyp0,DC0);
+
+ fmuld %f38,%f24,%f38 ! (2_1) res0 *= dbase0;
+ and %i5,_0x7fffffff,%i5 ! (4_0) ax &= 0x7fffffff;
+ lda [%i2]0x82,%f2 ! (4_0) y0 = *py;
+ fand %f20,DA0,%f24 ! (3_1) dbase0 = vis_fand(hyp0,DA0);
+
+ faddd %f36,%f62,%f20 ! (3_0) hyp0 += dtmp0;
+ cmp %l6,_0x7f800000 ! (4_0) ay ? 0x7f800000
+ ldd [%l7+8],%f36 ! (3_1) res0 = ((double*)((char*)arr + si0))[1];
+ fmuld %f28,%f42,%f26 ! (0_0) xx0 = dtmp1 * xx0;
+
+ fmuld %f10,%f32,%f10 ! (4_1) dtmp2 *= xx0;
+ lda [stridex+%o4]0x82,%f4 ! (4_0) x0 = *px;
+ bge,pn %icc,.update30 ! (4_0) if ( ay >= 0x7f800000 )
+ for %f30,DC1,%f28 ! (1_0) hyp0 = vis_for(hyp0,DC1);
+.cont30:
+ fmul8x16 SCALE,%f24,%f24 ! (3_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
+ cmp %i5,_0x7f800000 ! (4_0) ax ? 0x7f800000
+ ld [%fp+ftmp2],%i1 ! (1_0) ibase0 = ((int*)&hyp0)[0];
+ faddd %f40,KA0,%f62 ! (3_1) dtmp2 += KA0;
+
+ bge,pn %icc,.update31 ! (4_0) if ( ax >= 0x7f800000 )
+ st %f20,[%fp+ftmp4] ! (3_0) ibase0 = ((int*)&hyp0)[0];
+.cont31:
+ subcc counter,5,counter ! counter -= 5;
+ fdtos %f38,%f1 ! (2_1) ftmp0 = (float)res0;
+
+ fmuld KA3,%f26,%f34 ! (0_0) dtmp2 = KA3 * xx0;
+ st %f1,[%o3] ! (2_1) *pz = ftmp0;
+ bpos,pt %icc,.main_loop
+ fand %f28,DC2,%f30 ! (1_0) h_hi0 = vis_fand(hyp0,DC2);
+
+ add counter,5,counter
+
+.tail:
+ subcc counter,1,counter
+ bneg .begin
+ mov %i4,%o1
+
+ sra %i1,10,%o2 ! (1_1) ibase0 >>= 10;
+ faddd %f10,KA1,%f40 ! (4_2) dtmp2 += KA1;
+
+ fmuld %f36,%f62,%f36 ! (3_2) res0 *= dtmp2;
+ and %o2,2032,%o2 ! (1_1) si0 = ibase0 & 0x7f0;
+ fpsub32 DA1,%f24,%f24 ! (3_2) dbase0 = vis_fpsub32(DA1,dbase0);
+
+ add %o2,TBL,%o2 ! (1_1) (char*)TBL + si0
+ fsubd %f28,%f30,%f28 ! (1_1) dtmp1 = hyp0 - h_hi0;
+
+ ldd [%o2],%f42 ! (1_1) xx0 = ((double*)((char*)TBL + si0))[0];
+ faddd %f34,KA2,%f10 ! (0_1) dtmp2 += KA2;
+
+ fmuld %f40,%f32,%f40 ! (4_2) dtmp2 *= xx0;
+
+ fmuld %f36,%f24,%f32 ! (3_2) res0 *= dbase0;
+ fand %f12,DA0,%f24 ! (4_2) dbase0 = vis_fand(hyp0,DA0);
+
+ ldd [%i3+8],%f62 ! (4_2) res0 = ((double*)((char*)arr + si0))[1];
+ fmuld %f28,%f42,%f36 ! (1_1) xx0 = dtmp1 * xx0;
+
+ fmuld %f10,%f26,%f10 ! (0_1) dtmp2 *= xx0;
+
+ fmul8x16 SCALE,%f24,%f24 ! (4_2) dbase0 = vis_fmul8x16(SCALE, dbase0);
+ faddd %f40,KA0,%f42 ! (4_2) dtmp2 += KA0;
+
+ add %i4,stridez,%i3 ! pz += stridez
+ fdtos %f32,%f1 ! (3_2) ftmp0 = (float)res0;
+
+ fmuld KA3,%f36,%f34 ! (1_1) dtmp2 = KA3 * xx0;
+ st %f1,[%i4] ! (3_2) *pz = ftmp0;
+
+ subcc counter,1,counter
+ bneg .begin
+ mov %i3,%o1
+
+ faddd %f10,KA1,%f40 ! (0_1) dtmp2 += KA1;
+
+ fmuld %f62,%f42,%f32 ! (4_2) res0 *= dtmp2;
+ fpsub32 DA1,%f24,%f24 ! (4_2) dbase0 = vis_fpsub32(DA1,dbase0);
+
+
+ faddd %f34,KA2,%f10 ! (1_1) dtmp2 += KA2;
+
+ fmuld %f40,%f26,%f40 ! (0_1) dtmp2 *= xx0;
+
+ fmuld %f32,%f24,%f26 ! (4_2) res0 *= dbase0;
+ fand %f14,DA0,%f24 ! (0_1) dbase0 = vis_fand(hyp0,DA0);
+
+ ldd [%l1+8],%f62 ! (0_1) res0 = ((double*)((char*)arr + si0))[1];
+
+ fmuld %f10,%f36,%f10 ! (1_1) dtmp2 *= xx0;
+
+ fmul8x16 SCALE,%f24,%f24 ! (0_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
+ faddd %f40,KA0,%f42 ! (0_1) dtmp2 += KA0
+
+ add %i3,stridez,%o1 ! pz += stridez
+ fdtos %f26,%f1 ! (4_2) ftmp0 = (float)res0;
+
+ st %f1,[%i3] ! (4_2) *pz = ftmp0;
+
+ subcc counter,1,counter
+ bneg .begin
+ nop
+
+ faddd %f10,KA1,%f40 ! (1_1) dtmp2 += KA1;
+
+ fmuld %f62,%f42,%f26 ! (0_1) res0 *= dtmp2;
+ fpsub32 DA1,%f24,%f24 ! (0_1) dbase0 = vis_fpsub32(DA1,dbase0);
+
+ fmuld %f40,%f36,%f40 ! (1_1) dtmp2 *= xx0;
+
+ fmuld %f26,%f24,%f36 ! (0_1) res0 *= dbase0;
+ fand %f16,DA0,%f24 ! (1_1) dbase0 = vis_fand(hyp0,DA0);
+
+ ldd [%o2+8],%f38 ! (1_1) res0 = ((double*)((char*)arr + si0))[1];
+
+ fmul8x16 SCALE,%f24,%f24 ! (1_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
+ faddd %f40,KA0,%f62 ! (1_1) dtmp2 += KA0;
+
+ add %o1,stridez,%g1 ! pz += stridez
+ fdtos %f36,%f1 ! (0_1) ftmp0 = (float)res0;
+
+ st %f1,[%o1] ! (0_1) *pz = ftmp0;
+
+ subcc counter,1,counter
+ bneg .begin
+ mov %g1,%o1
+
+ fmuld %f38,%f62,%f38 ! (1_1) res0 *= dtmp2;
+ fpsub32 DA1,%f24,%f24 ! (1_1) dbase0 = vis_fpsub32(DA1,dbase0);
+
+ fmuld %f38,%f24,%f38 ! (1_1) res0 *= dbase0;
+
+ fdtos %f38,%f1 ! (1_1) ftmp0 = (float)res0;
+ st %f1,[%g1] ! (1_1) *pz = ftmp0;
+
+ ba .begin
+ add %g1,stridez,%o1 ! pz += stridez
+
+ .align 16
+.spec0:
+ fabss %f2,%f2 ! fabsf(y0);
+
+ fabss %f4,%f4 ! fabsf(x0);
+
+ fcmps %f2,%f4
+
+ cmp %l6,_0x7f800000 ! ay ? 0x7f800000
+ be,a 1f ! if( ay == 0x7f800000 )
+ st %g0,[%o1] ! *pz = 0.0f;
+
+ cmp %i5,_0x7f800000 ! ax ? 0x7f800000
+ be,a 1f ! if( ax == 0x7f800000 )
+ st %g0,[%o1] ! *pz = 0.0f;
+
+ fmuls %f2,%f4,%f2 ! fabsf(x0) * fabsf(y0);
+ st %f2,[%o1] ! *pz = fabsf(x0) + fabsf(y0);
+1:
+ add %o4,stridex,%o4 ! px += stridex;
+ add %i2,stridey,%i2 ! py += stridey;
+
+ add %o1,stridez,%o1 ! pz += stridez;
+ ba .begin1
+ sub counter,1,counter ! counter--;
+
+ .align 16
+.spec1:
+ cmp %i5,0 ! ax ? 0
+ bne,pt %icc,.cont_spec1 ! if ( ax != 0 )
+ nop
+
+ add %o4,stridex,%o4 ! px += stridex;
+ add %i2,stridey,%i2 ! py += stridey;
+
+ fdivs %f7,%f9,%f2 ! 1.0f / 0.0f
+ st %f2,[%o1] ! *pz = 1.0f / 0.0f;
+
+ add %o1,stridez,%o1 ! pz += stridez;
+ ba .begin1
+ sub counter,1,counter ! counter--;
+
+ .align 16
+.update0:
+ cmp counter,1
+ ble .cont0
+ ld [TBL+TBL_SHIFT+44],%f2
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l0,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont0
+ mov 1,counter
+
+ .align 16
+.update1:
+ cmp counter,1
+ ble .cont1
+ ld [TBL+TBL_SHIFT+44],%f4
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l0,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont1
+ mov 1,counter
+
+ .align 16
+.update2:
+ cmp %i5,0
+ bne .cont2
+
+ cmp counter,1
+ ble .cont2
+ ld [TBL+TBL_SHIFT+44],%f2
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l0,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont2
+ mov 1,counter
+
+ .align 16
+.update3:
+ cmp counter,2
+ ble .cont3
+ ld [TBL+TBL_SHIFT+44],%f2
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i1,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont3
+ mov 2,counter
+
+ .align 16
+.update4:
+ cmp counter,2
+ ble .cont4
+ ld [TBL+TBL_SHIFT+44],%f4
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i1,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont4
+ mov 2,counter
+
+ .align 16
+.update5:
+ cmp %i5,0
+ bne .cont5
+
+ cmp counter,2
+ ble .cont5
+ ld [TBL+TBL_SHIFT+44],%f2
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i1,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont5
+ mov 2,counter
+
+ .align 16
+.update6:
+ cmp counter,3
+ ble .cont6
+ ld [TBL+TBL_SHIFT+44],%f2
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %g5,[%fp+tmp_px]
+
+ stx %o3,[%fp+tmp_py]
+ ba .cont6
+ mov 3,counter
+
+ .align 16
+.update7:
+ cmp counter,3
+ ble .cont7
+ ld [TBL+TBL_SHIFT+44],%f4
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %g5,[%fp+tmp_px]
+
+ stx %o3,[%fp+tmp_py]
+ ba .cont7
+ mov 3,counter
+
+ .align 16
+.update8:
+ cmp %i5,0
+ bne .cont8
+
+ cmp counter,3
+ ble .cont8
+ ld [TBL+TBL_SHIFT+44],%f2
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %g5,[%fp+tmp_px]
+
+ stx %o3,[%fp+tmp_py]
+ ba .cont8
+ mov 3,counter
+
+ .align 16
+.update9:
+ cmp counter,4
+ ble .cont9
+ ld [TBL+TBL_SHIFT+44],%f2
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont9
+ mov 4,counter
+
+ .align 16
+.update10:
+ cmp counter,4
+ ble .cont10
+ ld [TBL+TBL_SHIFT+44],%f4
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont10
+ mov 4,counter
+
+ .align 16
+.update11:
+ cmp %i5,0
+ bne .cont11
+
+ cmp counter,4
+ ble .cont11
+ ld [TBL+TBL_SHIFT+44],%f2
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont11
+ mov 4,counter
+
+ .align 16
+.update12:
+ cmp counter,5
+ ble .cont12
+ ld [TBL+TBL_SHIFT+44],%f2
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %o4,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont12
+ mov 5,counter
+
+ .align 16
+.update13:
+ cmp counter,5
+ ble .cont13
+ ld [TBL+TBL_SHIFT+44],%f4
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %o4,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont13
+ mov 5,counter
+
+ .align 16
+.update14:
+ cmp %i5,0
+ bne .cont14
+
+ cmp counter,5
+ ble .cont14
+ ld [TBL+TBL_SHIFT+44],%f2
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %o4,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont14
+ mov 5,counter
+
+ .align 16
+.update15:
+ cmp counter,6
+ ble .cont15
+ ld [TBL+TBL_SHIFT+44],%f2
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l0,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont15
+ mov 6,counter
+
+ .align 16
+.update16:
+ cmp counter,6
+ ble .cont16
+ ld [TBL+TBL_SHIFT+44],%f4
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l0,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont16
+ mov 6,counter
+
+ .align 16
+.update17:
+ cmp %i5,0
+ bne .cont17
+
+ cmp counter,1
+ ble .cont17
+ fmovd DC1,%f62
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l0,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont17
+ mov 1,counter
+
+ .align 16
+.update18:
+ cmp counter,2
+ ble .cont18
+ ld [TBL+TBL_SHIFT+44],%f2
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i1,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont18
+ mov 2,counter
+
+ .align 16
+.update19:
+ cmp counter,2
+ ble .cont19
+ ld [TBL+TBL_SHIFT+44],%f4
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i1,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont19
+ mov 2,counter
+
+ .align 16
+.update20:
+ cmp %o1,0
+ bne .cont20
+
+ cmp counter,2
+ ble .cont20
+ ld [TBL+TBL_SHIFT+44],%f2
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i1,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont20
+ mov 2,counter
+
+ .align 16
+.update21:
+ cmp counter,3
+ ble .cont21
+ ld [TBL+TBL_SHIFT+44],%f2
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %g5,[%fp+tmp_px]
+
+ stx %o3,[%fp+tmp_py]
+ ba .cont21
+ mov 3,counter
+
+ .align 16
+.update22:
+ cmp counter,3
+ ble .cont22
+ ld [TBL+TBL_SHIFT+44],%f4
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %g5,[%fp+tmp_px]
+
+ stx %o3,[%fp+tmp_py]
+ ba .cont22
+ mov 3,counter
+
+ .align 16
+.update23:
+ cmp %i5,0
+ bne .cont23
+
+ cmp counter,3
+ ble .cont23
+ ld [TBL+TBL_SHIFT+44],%f2
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %g5,[%fp+tmp_px]
+
+ stx %o3,[%fp+tmp_py]
+ ba .cont23
+ mov 3,counter
+
+ .align 16
+.update24:
+ cmp counter,4
+ ble .cont24
+ ld [TBL+TBL_SHIFT+44],%f2
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont24
+ mov 4,counter
+
+ .align 16
+.update25:
+ cmp counter,4
+ ble .cont25
+ ld [TBL+TBL_SHIFT+44],%f4
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont25
+ mov 4,counter
+
+ .align 16
+.update26:
+ cmp %i5,0
+ bne .cont26
+
+ cmp counter,4
+ ble .cont26
+ ld [TBL+TBL_SHIFT+44],%f2
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont26
+ mov 4,counter
+
+ .align 16
+.update27:
+ cmp counter,5
+ ble .cont27
+ ld [TBL+TBL_SHIFT+44],%f2
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %o4,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont27
+ mov 5,counter
+
+ .align 16
+.update28:
+ cmp counter,5
+ ble .cont28
+ ld [TBL+TBL_SHIFT+44],%f4
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %o4,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont28
+ mov 5,counter
+
+ .align 16
+.update29:
+ cmp %i5,0
+ bne .cont29
+
+ cmp counter,5
+ ble .cont29
+ ld [TBL+TBL_SHIFT+44],%f2
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %o4,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont29
+ mov 5,counter
+
+ .align 16
+.update30:
+ cmp counter,6
+ ble .cont30
+ ld [TBL+TBL_SHIFT+44],%f2
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l0,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont30
+ mov 6,counter
+
+ .align 16
+.update31:
+ cmp counter,6
+ ble .cont31
+ ld [TBL+TBL_SHIFT+44],%f4
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %l0,[%fp+tmp_px]
+
+ stx %i2,[%fp+tmp_py]
+ ba .cont31
+ mov 6,counter
+
+ .align 16
+.exit:
+ ret
+ restore
+ SET_SIZE(__vrhypotf)
+
diff --git a/usr/src/libm/src/mvec/vis/__vrsqrt.S b/usr/src/libm/src/mvec/vis/__vrsqrt.S
new file mode 100644
index 0000000..08c9146
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vrsqrt.S
@@ -0,0 +1,2156 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vrsqrt.S 1.4 06/01/23 SMI"
+
+ .file "__vrsqrt.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 64
+
+.CONST_TBL:
+ .word 0xbfe00000, 0x0000002f ! K1 =-5.00000000000005209867e-01;
+ .word 0x3fd80000, 0x00000058 ! K2 = 3.75000000000004884257e-01;
+ .word 0xbfd3ffff, 0xff444bc8 ! K3 =-3.12499999317136886551e-01;
+ .word 0x3fd17fff, 0xff5006fe ! K4 = 2.73437499359815081532e-01;
+ .word 0xbfcf80bb, 0xb33ef574 ! K5 =-2.46116125605037803130e-01;
+ .word 0x3fcce0af, 0xf8156949 ! K6 = 2.25606914648617522896e-01;
+
+ .word 0x001fffff, 0xffffffff ! DC0
+ .word 0x3fe00000, 0x00000000 ! DC1
+ .word 0x00002000, 0x00000000 ! DC2
+ .word 0x7fffc000, 0x00000000 ! DC3
+ .word 0x0007ffff, 0xffffffff ! DC4
+
+ .word 0x43200000, 0x00000000 ! D2ON51 = pow(2,51)
+ .word 0x3ff00000, 0x00000000 ! DONE = 1.0
+
+#define stridex %l5
+#define stridey %l7
+#define counter %l0
+#define TBL %l3
+#define _0x7ff00000 %o0
+#define _0x00100000 %o1
+
+#define DC0 %f56
+#define DC1 %f54
+#define DC2 %f48
+#define DC3 %f46
+#define K6 %f42
+#define K5 %f20
+#define K4 %f52
+#define K3 %f50
+#define K2 %f14
+#define K1 %f12
+#define DONE %f4
+
+#define tmp_counter %g5
+#define tmp_px %o5
+
+#define tmp0 STACK_BIAS-0x40
+#define tmp1 STACK_BIAS-0x38
+#define tmp2 STACK_BIAS-0x30
+#define tmp3 STACK_BIAS-0x28
+#define tmp4 STACK_BIAS-0x20
+#define tmp5 STACK_BIAS-0x18
+#define tmp6 STACK_BIAS-0x10
+#define tmp7 STACK_BIAS-0x08
+
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x40
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! !!!!! algorithm !!!!!
+! ((float*)&res)[0] = ((float*)px)[0];
+! ((float*)&res)[1] = ((float*)px)[1];
+! hx = *(int*)px;
+! if ( hx >= 0x7ff00000 )
+! {
+! res = DONE / res;
+! ((float*)py)[0] = ((float*)&res)[0];
+! ((float*)py)[1] = ((float*)&res)[1];
+! px += stridex;
+! py += stridey;
+! continue;
+! }
+! if ( hx < 0x00100000 )
+! {
+! ax = hx & 0x7fffffff;
+! lx = ((int*)px)[1];
+!
+! if ( (ax | lx) == 0 )
+! {
+! res = DONE / res;
+! ((float*)py)[0] = ((float*)&res)[0];
+! ((float*)py)[1] = ((float*)&res)[1];
+! px += stridex;
+! py += stridey;
+! continue;
+! }
+! else if ( hx >= 0 )
+! {
+! if ( hx < 0x00080000 )
+! {
+! res = *(long long*)&res;
+! hx = *(int*)&res - (537 << 21);
+! }
+! else
+! {
+! res = vis_fand(res,DC4);
+! res = *(long long*)&res;
+! res += D2ON51;
+! hx = *(int*)&res - (537 << 21);
+! }
+! }
+! else
+! {
+! res = sqrt(res);
+! ((float*)py)[0] = ((float*)&res)[0];
+! ((float*)py)[1] = ((float*)&res)[1];
+! px += stridex;
+! py += stridey;
+! continue;
+! }
+! }
+!
+! iexp = hx >> 21;
+! iexp = -iexp;
+! iexp += 0x5fe;
+! lexp = iexp << 52;
+! dlexp = *(double*)&lexp;
+! hx >>= 10;
+! hx &= 0x7f8;
+! hx += 8;
+! hx &= -16;
+!
+! res = vis_fand(res,DC0);
+! res = vis_for(res,DC1);
+! res_c = vis_fpadd32(res,DC2);
+! res_c = vis_fand(res_c,DC3);
+!
+! addr = (char*)arr + hx;
+! dexp_hi = ((double*)addr)[0];
+! dexp_lo = ((double*)addr)[1];
+! dtmp0 = dexp_hi * dexp_hi;
+! xx = res - res_c;
+! xx *= dtmp0;
+! res = K6 * xx;
+! res += K5;
+! res *= xx;
+! res += K4;
+! res *= xx;
+! res += K3;
+! res *= xx;
+! res += K2;
+! res *= xx;
+! res += K1;
+! res *= xx;
+! res = dexp_hi * res;
+! res += dexp_lo;
+! res += dexp_hi;
+!
+! res *= dlexp;
+!
+! ((float*)py)[0] = ((float*)&res)[0];
+! ((float*)py)[1] = ((float*)&res)[1];
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ ENTRY(__vrsqrt)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,.CONST_TBL,o3)
+ PIC_SET(l7,__vlibm_TBL_rsqrt,l3)
+ wr %g0,0x82,%asi
+
+ ldd [%o3],K1
+ sethi %hi(0x7ff00000),%o0
+ mov %i3,%o4
+
+ ldd [%o3+0x08],K2
+ sethi %hi(0x00100000),%o1
+ mov %i1,tmp_px
+
+ ldd [%o3+0x10],K3
+ sll %i2,3,stridex
+ mov %i0,tmp_counter
+
+ ldd [%o3+0x18],K4
+ sll %i4,3,stridey
+
+ ldd [%o3+0x20],K5
+ ldd [%o3+0x28],K6
+ ldd [%o3+0x30],DC0
+ ldd [%o3+0x38],DC1
+ ldd [%o3+0x40],DC2
+ ldd [%o3+0x48],DC3
+
+.begin:
+ mov tmp_counter,counter
+ mov tmp_px,%i1
+ clr tmp_counter
+.begin1:
+ cmp counter,0
+ ble,pn %icc,.exit
+ ldd [%o3+0x60],DONE
+
+ lda [%i1]%asi,%f0 ! (6_0) ((float*)res)[0] = ((float*)px)[0];
+ sethi %hi(0x7ffffc00),%i0
+
+ lda [%i1+4]%asi,%f1 ! (6_0) ((float*)res)[1] = ((float*)px)[1];
+ add %i0,1023,%i0
+
+ fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
+
+ lda [%i1]%asi,%g1 ! (6_1) hx = *(int*)px;
+ sethi %hi(0x00080000),%i4
+
+ lda [%i1+4]%asi,%l4
+ add %i1,stridex,%l6 ! px += stridex
+
+ sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
+ lda [%l6]%asi,%f8 ! (0_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
+
+ lda [%l6+4]%asi,%f9 ! (0_0) ((float*)res)[1] = ((float*)px)[1];
+ sra %g1,10,%o2 ! (6_1) hx >>= 10;
+ and %g1,%i0,%i2
+
+ cmp %g1,_0x7ff00000 ! (6_1) hx ? 0x7ff00000
+ bge,pn %icc,.spec0 ! (6_1) if ( hx >= 0x7ff00000 )
+ and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
+
+ cmp %g1,_0x00100000 ! (6_1) hx ? 0x00100000
+ bl,pn %icc,.spec1 ! (6_1) if ( hx < 0x00100000 )
+ sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
+.cont_spec:
+ fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0);
+
+ fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
+
+ add %o2,8,%l4 ! (6_1) hx += 8;
+
+ add %o7,1534,%o7 ! (6_1) iexp += 0x5fe;
+
+ lda [%l6]%asi,%g1 ! (0_0) hx = *(int*)px;
+ sllx %o7,52,%o7 ! (6_1) iexp << 52;
+ and %l4,-16,%l4 ! (6_1) hx = -16;
+
+ add %l4,TBL,%l4 ! (6_1) addr = (char*)arr + hx;
+ stx %o7,[%fp+tmp1] ! (6_1) dlexp = *(double*)lexp;
+
+ add %l6,stridex,%l6 ! px += stridex
+ ldd [%l4],%f30 ! (6_1) dtmp0 = ((double*)addr)[0];
+
+ sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
+ lda [%l6]%asi,%f0 ! (1_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
+
+ sra %g1,10,%o2 ! (0_0) hx >>= 10;
+ sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
+ lda [%l6+4]%asi,%f1 ! (1_0) ((float*)res)[1] = ((float*)px)[1];
+
+ cmp %g1,_0x7ff00000 ! (0_0) hx ? 0x7ff00000
+ bge,pn %icc,.update0 ! (0_0) if ( hx >= 0x7ff00000 )
+ fand %f18,DC3,%f6 ! (6_1) res_c = vis_fand(res_c,DC3);
+.cont0:
+ and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
+ fmuld %f30,%f30,%f10 ! (6_1) dtmp0 = dexp_hi * dexp_hi;
+
+ cmp %g1,_0x00100000 ! (0_0) hx ? 0x00100000
+ bl,pn %icc,.update1 ! (0_0) if ( hx < 0x00100000 )
+ add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
+.cont1:
+ fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0);
+
+ fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
+
+ add %o2,8,%l2 ! (0_0) hx += 8;
+ fsubd %f44,%f6,%f6 ! (6_1) xx = res - res_c;
+
+ lda [%l6]%asi,%g1 ! (1_0) hx = *(int*)px;
+ sllx %o7,52,%o7 ! (0_0) iexp << 52;
+ and %l2,-16,%l2 ! (0_0) hx = -16;
+
+ add %l2,TBL,%l2 ! (0_0) addr = (char*)arr + hx;
+ add %l6,stridex,%l6 ! px += stridex
+ stx %o7,[%fp+tmp2] ! (0_0) dlexp = *(double*)lexp;
+
+ fmuld %f6,%f10,%f26 ! (6_1) xx *= dtmp0;
+ ldd [%l2],%f10 ! (0_0) dtmp0 = ((double*)addr)[0];
+
+ sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
+ lda [%l6]%asi,%f6 ! (2_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
+
+ sra %g1,10,%o2 ! (1_0) hx >>= 10;
+ cmp %g1,_0x7ff00000 ! (1_0) hx ? 0x7ff00000
+ bge,pn %icc,.update2 ! (1_0) if ( hx >= 0x7ff00000 )
+ lda [%l6+4]%asi,%f7 ! (2_0) ((float*)res)[1] = ((float*)px)[1];
+.cont2:
+ fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3);
+
+ fmuld %f10,%f10,%f10 ! (0_0) dtmp0 = dexp_hi * dexp_hi;
+ cmp %g1,_0x00100000 ! (1_0) hx ? 0x00100000
+ bl,pn %icc,.update3 ! (1_0) if ( hx < 0x00100000 )
+ and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
+.cont3:
+ sub %g0,%o7,%o7 ! (1_0) iexp = -iexp;
+ fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
+
+ add %o7,1534,%o7 ! (1_0) iexp += 0x5fe;
+ fpadd32 %f44,DC2,%f18 ! (1_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld K6,%f26,%f62 ! (6_1) res = K6 * xx;
+ add %o2,8,%i2 ! (1_0) hx += 8;
+ fsubd %f28,%f8,%f32 ! (0_0) xx = res - res_c;
+
+ lda [%l6]%asi,%g1 ! (2_0) hx = *(int*)px;
+ sllx %o7,52,%o7 ! (1_0) iexp << 52;
+ and %i2,-16,%i2 ! (1_0) hx = -16;
+
+ add %i2,TBL,%i2 ! (1_0) addr = (char*)arr + hx;
+ stx %o7,[%fp+tmp3] ! (1_0) dlexp = *(double*)lexp;
+
+ fmuld %f32,%f10,%f32 ! (0_0) xx *= dtmp0;
+ add %l6,stridex,%l6 ! px += stridex
+ ldd [%i2],%f10 ! (1_0) dtmp0 = ((double*)addr)[0];
+ faddd %f62,K5,%f62 ! (6_1) res += K5;
+
+ sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
+ lda [%l6]%asi,%f0 ! (3_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
+
+ sra %g1,10,%o2 ! (2_0) hx >>= 10;
+ cmp %g1,_0x7ff00000 ! (2_0) hx ? 0x7ff00000
+ bge,pn %icc,.update4 ! (2_0) if ( hx >= 0x7ff00000 )
+ lda [%l6+4]%asi,%f1 ! (3_0) ((float*)res)[1] = ((float*)px)[1];
+.cont4:
+ fmuld %f62,%f26,%f40 ! (6_1) res *= xx;
+ fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3);
+
+ fmuld %f10,%f10,%f10 ! (1_0) dtmp0 = dexp_hi * dexp_hi;
+ cmp %g1,_0x00100000 ! (2_0) hx ? 0x00100000
+ bl,pn %icc,.update5 ! (2_0) if ( hx < 0x00100000 )
+ and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
+.cont5:
+ sub %g0,%o7,%o7 ! (2_0) iexp = -iexp;
+ fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
+
+ add %o7,1534,%o7 ! (2_0) iexp += 0x5fe;
+ fpadd32 %f28,DC2,%f18 ! (2_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld K6,%f32,%f62 ! (0_0) res = K6 * xx;
+ add %o2,8,%i4 ! (2_0) hx += 8;
+ fsubd %f44,%f8,%f6 ! (1_0) xx = res - res_c;
+
+ faddd %f40,K4,%f40 ! (6_1) res += K4;
+
+ lda [%l6]%asi,%g1 ! (3_0) hx = *(int*)px;
+ sllx %o7,52,%o7 ! (2_0) iexp << 52;
+ and %i4,-16,%i4 ! (2_0) hx = -16;
+
+ add %i4,TBL,%i4 ! (2_0) addr = (char*)arr + hx;
+ stx %o7,[%fp+tmp4] ! (2_0) dlexp = *(double*)lexp;
+
+ fmuld %f6,%f10,%f38 ! (1_0) xx *= dtmp0;
+ ldd [%i4],%f24 ! (2_0) dtmp0 = ((double*)addr)[0];
+ faddd %f62,K5,%f62 ! (0_0) res += K5;
+
+ fmuld %f40,%f26,%f34 ! (6_1) res *= xx;
+ add %l6,stridex,%l6 ! px += stridex
+
+ sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
+ lda [%l6]%asi,%f8 ! (4_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
+
+ sra %g1,10,%o2 ! (3_0) hx >>= 10;
+ cmp %g1,_0x7ff00000 ! (3_0) hx ? 0x7ff00000
+ bge,pn %icc,.update6 ! (3_0) if ( hx >= 0x7ff00000 )
+ lda [%l6+4]%asi,%f9 ! (4_0) ((float*)res)[1] = ((float*)px)[1];
+.cont6:
+ fmuld %f62,%f32,%f60 ! (0_0) res *= xx;
+ cmp %g1,_0x00100000 ! (3_0) hx ? 0x00100000
+ fand %f18,DC3,%f22 ! (2_0) res_c = vis_fand(res_c,DC3);
+
+ fmuld %f24,%f24,%f24 ! (2_0) dtmp0 = dexp_hi * dexp_hi;
+ bl,pn %icc,.update7 ! (3_0) if ( hx < 0x00100000 )
+ and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
+ faddd %f34,K3,%f6 ! (6_1) res += K3;
+.cont7:
+ sub %g0,%o7,%o7 ! (3_0) iexp = -iexp;
+ fand %f8,DC0,%f16 ! (4_0) res = vis_fand(res,DC0);
+
+ add %o7,1534,%o7 ! (3_0) iexp += 0x5fe;
+ fpadd32 %f44,DC2,%f18 ! (3_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld K6,%f38,%f62 ! (1_0) res = K6 * xx;
+ add %o2,8,%i5 ! (3_0) hx += 8;
+ fsubd %f28,%f22,%f28 ! (2_0) xx = res - res_c;
+
+ fmuld %f6,%f26,%f22 ! (6_1) res *= xx;
+ faddd %f60,K4,%f60 ! (0_0) res += K4;
+
+ lda [%l6]%asi,%g1 ! (4_0) hx = *(int*)px;
+ sllx %o7,52,%o7 ! (3_0) iexp << 52;
+ and %i5,-16,%i5 ! (3_0) hx = -16;
+
+ add %i5,TBL,%i5 ! (3_0) addr = (char*)arr + hx;
+ stx %o7,[%fp+tmp5] ! (3_0) dlexp = *(double*)lexp;
+
+ fmuld %f28,%f24,%f36 ! (2_0) xx *= dtmp0;
+ add %l6,stridex,%i0 ! px += stridex
+ ldd [%i5],%f28 ! (3_0) dtmp0 = ((double*)addr)[0];
+ faddd %f62,K5,%f62 ! (1_0) res += K5;
+
+ faddd %f22,K2,%f10 ! (6_1) res += K2;
+ fmuld %f60,%f32,%f34 ! (0_0) res *= xx;
+
+ sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
+ lda [%i0]%asi,%f0 ! (5_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
+
+ sra %g1,10,%o2 ! (4_0) hx >>= 10;
+ cmp %g1,_0x7ff00000 ! (4_0) hx ? 0x7ff00000
+ bge,pn %icc,.update8 ! (4_0) if ( hx >= 0x7ff00000 )
+ lda [%i0+4]%asi,%f1 ! (5_0) ((float*)res)[1] = ((float*)px)[1];
+.cont8:
+ fand %f18,DC3,%f40 ! (3_0) res_c = vis_fand(res_c,DC3);
+ fmuld %f62,%f38,%f62 ! (1_0) res *= xx;
+
+ fmuld %f10,%f26,%f58 ! (6_1) res *= xx;
+ cmp %g1,_0x00100000 ! (4_0) hx ? 0x00100000
+ and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
+ faddd %f34,K3,%f60 ! (0_0) res += K3;
+
+ fmuld %f28,%f28,%f28 ! (3_0) dtmp0 = dexp_hi * dexp_hi;
+ bl,pn %icc,.update9 ! (4_0) if ( hx < 0x00100000 )
+ sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
+ fand %f0,DC0,%f16 ! (5_0) res = vis_fand(res,DC0);
+.cont9:
+ add %o7,1534,%o7 ! (4_0) iexp += 0x5fe;
+ fpadd32 %f24,DC2,%f18 ! (4_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld K6,%f36,%f10 ! (2_0) res = K6 * xx;
+ add %o2,8,%l1 ! (4_0) hx += 8;
+ fsubd %f44,%f40,%f44 ! (3_0) xx = res - res_c;
+
+ fmuld %f60,%f32,%f60 ! (0_0) res *= xx;
+ faddd %f62,K4,%f6 ! (1_0) res += K4;
+
+ lda [%i0]%asi,%g1 ! (5_0) hx = *(int*)px;
+ sllx %o7,52,%o7 ! (4_0) iexp << 52;
+ and %l1,-16,%l1 ! (4_0) hx = -16;
+ faddd %f58,K1,%f58 ! (6_1) res += K1;
+
+ add %i0,stridex,%i1 ! px += stridex
+ add %l1,TBL,%l1 ! (4_0) addr = (char*)arr + hx;
+ stx %o7,[%fp+tmp6] ! (4_0) dlexp = *(double*)lexp;
+
+ fmuld %f44,%f28,%f40 ! (3_0) xx *= dtmp0;
+ ldd [%l1],%f44 ! (4_0) dtmp0 = ((double*)addr)[0];
+ faddd %f10,K5,%f62 ! (2_0) res += K5;
+
+ fmuld %f6,%f38,%f34 ! (1_0) res *= xx;
+ sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
+ nop
+ faddd %f60,K2,%f60 ! (0_0) res += K2;
+
+ for %f16,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
+ sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
+ lda [%i1]%asi,%f6 ! (6_0) ((float*)res)[0] = ((float*)px)[0];
+ fmuld %f58,%f26,%f26 ! (6_1) res *= xx;
+
+ sra %g1,10,%o2 ! (5_0) hx >>= 10;
+ cmp %g1,_0x7ff00000 ! (5_0) hx ? 0x7ff00000
+ bge,pn %icc,.update10 ! (5_0) if ( hx >= 0x7ff00000 )
+ lda [%i1+4]%asi,%f7 ! (6_0) ((float*)res)[1] = ((float*)px)[1];
+.cont10:
+ fand %f18,DC3,%f8 ! (4_0) res_c = vis_fand(res_c,DC3);
+ fmuld %f62,%f36,%f62 ! (2_0) res *= xx;
+
+ fmuld %f60,%f32,%f58 ! (0_0) res *= xx;
+ cmp %g1,_0x00100000 ! (5_0) hx ? 0x00100000
+ and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
+ faddd %f34,K3,%f34 ! (1_0) res += K3;
+
+ fmuld %f30,%f26,%f26 ! (6_1) res = dexp_hi * res;
+ bl,pn %icc,.update11 ! (5_0) if ( hx < 0x00100000 )
+ nop
+ fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
+.cont11:
+ ldd [%l4+8],%f60 ! (6_1) dexp_lo = ((double*)addr)[1];
+ fmuld %f44,%f44,%f44 ! (4_0) dtmp0 = dexp_hi * dexp_hi;
+ fpadd32 %f28,DC2,%f18 ! (5_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld K6,%f40,%f22 ! (3_0) res = K6 * xx;
+ add %o2,8,%i3 ! (5_0) hx += 8;
+ fsubd %f24,%f8,%f10 ! (4_0) xx = res - res_c;
+
+ fmuld %f34,%f38,%f24 ! (1_0) res *= xx;
+ or %g0,%o4,%i0
+
+ cmp counter,7
+ bl,pn %icc,.tail
+ faddd %f62,K4,%f34 ! (2_0) res += K4;
+
+ ba .main_loop
+ sub counter,7,counter ! counter
+
+ .align 16
+.main_loop:
+ add %o7,1534,%o7 ! (5_0) iexp += 0x5fe;
+ and %i3,-16,%i3 ! (5_1) hx = -16;
+ lda [%i1]%asi,%g1 ! (6_1) hx = *(int*)px;
+ faddd %f58,K1,%f58 ! (0_1) res += K1;
+
+ add %i3,TBL,%i3 ! (5_1) addr = (char*)arr + hx;
+ sllx %o7,52,%o7 ! (5_1) iexp << 52;
+ stx %o7,[%fp+tmp0] ! (5_1) dlexp = *(double*)lexp;
+ faddd %f26,%f60,%f8 ! (6_2) res += dexp_lo;
+
+ faddd %f22,K5,%f62 ! (3_1) res += K5;
+ add %i1,stridex,%l6 ! px += stridex
+ ldd [%i3],%f22 ! (5_1) dtmp0 = ((double*)addr)[0];
+ fmuld %f10,%f44,%f60 ! (4_1) xx *= dtmp0;
+
+ faddd %f24,K2,%f26 ! (1_1) res += K2;
+ add %i0,stridey,%i1 ! px += stridey
+ ldd [%l2],%f24 ! (0_1) dexp_hi = ((double*)addr)[0];
+ fmuld %f34,%f36,%f34 ! (2_1) res *= xx;
+
+ fmuld %f58,%f32,%f58 ! (0_1) res *= xx;
+ sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
+ lda [%l6]%asi,%f0 ! (0_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
+
+ lda [%l6+4]%asi,%f1 ! (0_0) ((float*)res)[1] = ((float*)px)[1];
+ sra %g1,10,%o2 ! (6_1) hx >>= 10;
+ fmuld %f22,%f22,%f10 ! (5_1) dtmp0 = dexp_hi * dexp_hi;
+ faddd %f8,%f30,%f30 ! (6_2) res += dexp_hi;
+
+ fmuld %f62,%f40,%f32 ! (3_1) res *= xx;
+ cmp %g1,_0x7ff00000 ! (6_1) hx ? 0x7ff00000
+ ldd [%fp+tmp1],%f62 ! (6_2) dlexp = *(double*)lexp;
+ fand %f18,DC3,%f8 ! (5_1) res_c = vis_fand(res_c,DC3);
+
+ fmuld %f26,%f38,%f26 ! (1_1) res *= xx;
+ bge,pn %icc,.update12 ! (6_1) if ( hx >= 0x7ff00000 )
+ and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
+ faddd %f34,K3,%f34 ! (2_1) res += K3;
+.cont12:
+ fmuld %f24,%f58,%f58 ! (0_1) res = dexp_hi * res;
+ cmp %g1,_0x00100000 ! (6_1) hx ? 0x00100000
+ sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
+ fand %f0,DC0,%f16 ! (0_0) res = vis_fand(res,DC0);
+
+ fmuld %f30,%f62,%f2 ! (6_2) res *= dlexp;
+ bl,pn %icc,.update13 ! (6_1) if ( hx < 0x00100000 )
+ ldd [%l2+8],%f30 ! (0_1) dexp_lo = ((double*)addr)[1];
+ fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
+.cont13:
+ fmuld K6,%f60,%f62 ! (4_1) res = K6 * xx;
+ add %o2,8,%l4 ! (6_1) hx += 8;
+ st %f2,[%i0] ! (6_2) ((float*)py)[0] = ((float*)res)[0];
+ fsubd %f28,%f8,%f6 ! (5_1) xx = res - res_c;
+
+ fmuld %f34,%f36,%f28 ! (2_1) res *= xx;
+ add %o7,1534,%o7 ! (6_1) iexp += 0x5fe;
+ st %f3,[%i0+4] ! (6_2) ((float*)py)[1] = ((float*)res)[1];
+ faddd %f32,K4,%f32 ! (3_1) res += K4;
+
+ lda [%l6]%asi,%g1 ! (0_0) hx = *(int*)px;
+ sllx %o7,52,%o7 ! (6_1) iexp << 52;
+ and %l4,-16,%l4 ! (6_1) hx = -16;
+ faddd %f26,K1,%f26 ! (1_1) res += K1;
+
+ add %i1,stridey,%i0 ! px += stridey
+ add %l4,TBL,%l4 ! (6_1) addr = (char*)arr + hx;
+ stx %o7,[%fp+tmp1] ! (6_1) dlexp = *(double*)lexp;
+ faddd %f58,%f30,%f8 ! (0_1) res += dexp_lo;
+
+ fmuld %f6,%f10,%f58 ! (5_1) xx *= dtmp0;
+ add %l6,stridex,%l6 ! px += stridex
+ ldd [%l4],%f30 ! (6_1) dtmp0 = ((double*)addr)[0];
+ faddd %f62,K5,%f62 ! (4_1) res += K5;
+
+ fmuld %f32,%f40,%f34 ! (3_1) res *= xx;
+ sra %g1,10,%o2 ! (0_0) hx >>= 10;
+ ldd [%i2],%f4 ! (1_1) dexp_hi = ((double*)addr)[0];
+ faddd %f28,K2,%f32 ! (2_1) res += K2;
+
+ fmuld %f26,%f38,%f26 ! (1_1) res *= xx;
+ sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
+ lda [%l6]%asi,%f6 ! (1_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
+
+ fmuld %f30,%f30,%f30 ! (6_1) dtmp0 = dexp_hi * dexp_hi;
+ sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
+ lda [%l6+4]%asi,%f7 ! (1_0) ((float*)res)[1] = ((float*)px)[1];
+ faddd %f8,%f24,%f24 ! (0_1) res += dexp_hi;
+
+ fmuld %f62,%f60,%f38 ! (4_1) res *= xx;
+ cmp %g1,_0x7ff00000 ! (0_0) hx ? 0x7ff00000
+ ldd [%fp+tmp2],%f62 ! (0_1) dlexp = *(double*)lexp;
+ fand %f18,DC3,%f8 ! (6_1) res_c = vis_fand(res_c,DC3);
+
+ fmuld %f32,%f36,%f32 ! (2_1) res *= xx;
+ bge,pn %icc,.update14 ! (0_0) if ( hx >= 0x7ff00000 )
+ and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
+ faddd %f34,K3,%f34 ! (3_1) res += K3;
+.cont14:
+ fmuld %f4,%f26,%f26 ! (1_1) res = dexp_hi * res;
+ cmp %g1,_0x00100000 ! (0_0) hx ? 0x00100000
+ add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
+ fand %f6,DC0,%f16 ! (1_0) res = vis_fand(res,DC0);
+
+ fmuld %f24,%f62,%f2 ! (0_1) res *= dlexp;
+ bl,pn %icc,.update15 ! (0_0) if ( hx < 0x00100000 )
+ ldd [%i2+8],%f24 ! (1_1) dexp_lo = ((double*)addr)[1];
+ fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
+.cont15:
+ fmuld K6,%f58,%f62 ! (5_1) res = K6 * xx;
+ add %o2,8,%l2 ! (0_0) hx += 8;
+ st %f2,[%i1] ! (0_1) ((float*)py)[0] = ((float*)res)[0];
+ fsubd %f44,%f8,%f10 ! (6_1) xx = res - res_c;
+
+ fmuld %f34,%f40,%f44 ! (3_1) res *= xx;
+ nop
+ st %f3,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)res)[1];
+ faddd %f38,K4,%f38 ! (4_1) res += K4;
+
+ lda [%l6]%asi,%g1 ! (1_0) hx = *(int*)px;
+ sllx %o7,52,%o7 ! (0_0) iexp << 52;
+ and %l2,-16,%l2 ! (0_0) hx = -16;
+ faddd %f32,K1,%f32 ! (2_1) res += K1;
+
+ add %l2,TBL,%l2 ! (0_0) addr = (char*)arr + hx;
+ add %l6,stridex,%l6 ! px += stridex
+ stx %o7,[%fp+tmp2] ! (0_0) dlexp = *(double*)lexp;
+ faddd %f26,%f24,%f8 ! (1_1) res += dexp_lo;
+
+ fmuld %f10,%f30,%f26 ! (6_1) xx *= dtmp0;
+ add %i0,stridey,%i1 ! px += stridey
+ ldd [%l2],%f30 ! (0_0) dtmp0 = ((double*)addr)[0];
+ faddd %f62,K5,%f62 ! (5_1) res += K5;
+
+ fmuld %f38,%f60,%f34 ! (4_1) res *= xx;
+ sra %g1,10,%o2 ! (1_0) hx >>= 10;
+ ldd [%i4],%f24 ! (2_1) dexp_hi = ((double*)addr)[0];
+ faddd %f44,K2,%f38 ! (3_1) res += K2;
+
+ fmuld %f32,%f36,%f32 ! (2_1) res *= xx;
+ sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
+ lda [%l6]%asi,%f0 ! (2_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
+
+ fmuld %f30,%f30,%f30 ! (0_0) dtmp0 = dexp_hi * dexp_hi;
+ cmp %g1,_0x7ff00000 ! (1_0) hx ? 0x7ff00000
+ lda [%l6+4]%asi,%f1 ! (2_0) ((float*)res)[1] = ((float*)px)[1];
+ faddd %f8,%f4,%f4 ! (1_1) res += dexp_hi;
+
+ fmuld %f62,%f58,%f36 ! (5_1) res *= xx;
+ bge,pn %icc,.update16 ! (1_0) if ( hx >= 0x7ff00000 )
+ ldd [%fp+tmp3],%f62 ! (1_1) dlexp = *(double*)lexp;
+ fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3);
+.cont16:
+ fmuld %f38,%f40,%f38 ! (3_1) res *= xx;
+ cmp %g1,_0x00100000 ! (1_0) hx ? 0x00100000
+ and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
+ faddd %f34,K3,%f34 ! (4_1) res += K3;
+
+ fmuld %f24,%f32,%f32 ! (2_1) res = dexp_hi * res;
+ bl,pn %icc,.update17 ! (1_0) if ( hx < 0x00100000 )
+ sub %g0,%o7,%o7 ! (1_0) iexp = -iexp;
+ fand %f0,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
+.cont17:
+ fmuld %f4,%f62,%f2 ! (1_1) res *= dlexp;
+ add %o7,1534,%o7 ! (1_0) iexp += 0x5fe;
+ ldd [%i4+8],%f4 ! (2_1) dexp_lo = ((double*)addr)[1];
+ fpadd32 %f44,DC2,%f18 ! (1_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld K6,%f26,%f62 ! (6_1) res = K6 * xx;
+ add %o2,8,%i2 ! (1_0) hx += 8;
+ st %f2,[%i0] ! (1_1) ((float*)py)[0] = ((float*)res)[0];
+ fsubd %f28,%f8,%f6 ! (0_0) xx = res - res_c;
+
+ fmuld %f34,%f60,%f28 ! (4_1) res *= xx;
+ nop
+ st %f3,[%i0+4] ! (1_1) ((float*)py)[1] = ((float*)res)[1];
+ faddd %f36,K4,%f36 ! (5_1) res += K4;
+
+ lda [%l6]%asi,%g1 ! (2_0) hx = *(int*)px;
+ sllx %o7,52,%o7 ! (1_0) iexp << 52;
+ and %i2,-16,%i2 ! (1_0) hx = -16;
+ faddd %f38,K1,%f38 ! (3_1) res += K1;
+
+ add %i1,stridey,%i0 ! px += stridey
+ add %i2,TBL,%i2 ! (1_0) addr = (char*)arr + hx;
+ stx %o7,[%fp+tmp3] ! (1_0) dlexp = *(double*)lexp;
+ faddd %f32,%f4,%f8 ! (2_1) res += dexp_lo;
+
+ fmuld %f6,%f30,%f32 ! (0_0) xx *= dtmp0;
+ add %l6,stridex,%l6 ! px += stridex
+ ldd [%i2],%f30 ! (1_0) dtmp0 = ((double*)addr)[0];
+ faddd %f62,K5,%f62 ! (6_1) res += K5;
+
+ fmuld %f36,%f58,%f34 ! (5_1) res *= xx;
+ sra %g1,10,%o2 ! (2_0) hx >>= 10;
+ ldd [%i5],%f4 ! (3_1) dexp_hi = ((double*)addr)[0];
+ faddd %f28,K2,%f36 ! (4_1) res += K2;
+
+ fmuld %f38,%f40,%f38 ! (3_1) res *= xx;
+ sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
+ lda [%l6]%asi,%f6 ! (3_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
+
+ fmuld %f30,%f30,%f30 ! (1_0) dtmp0 = dexp_hi * dexp_hi;
+ cmp %g1,_0x7ff00000 ! (2_0) hx ? 0x7ff00000
+ lda [%l6+4]%asi,%f7 ! (3_0) ((float*)res)[1] = ((float*)px)[1];
+ faddd %f8,%f24,%f24 ! (2_1) res += dexp_hi;
+
+ fmuld %f62,%f26,%f40 ! (6_1) res *= xx;
+ bge,pn %icc,.update18 ! (2_0) if ( hx >= 0x7ff00000 )
+ ldd [%fp+tmp4],%f62 ! (2_1) dlexp = *(double*)lexp;
+ fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3);
+.cont18:
+ fmuld %f36,%f60,%f36 ! (4_1) res *= xx;
+ cmp %g1,_0x00100000 ! (2_0) hx ? 0x00100000
+ and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
+ faddd %f34,K3,%f34 ! (5_1) res += K3;
+
+ fmuld %f4,%f38,%f38 ! (3_1) res = dexp_hi * res;
+ bl,pn %icc,.update19 ! (2_0) if ( hx < 0x00100000 )
+ sub %g0,%o7,%o7 ! (2_0) iexp = -iexp;
+ fand %f6,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
+.cont19:
+ fmuld %f24,%f62,%f2 ! (2_1) res *= dlexp;
+ add %o7,1534,%o7 ! (2_0) iexp += 0x5fe;
+ ldd [%i5+8],%f24 ! (3_1) dexp_lo = ((double*)addr)[1];
+ fpadd32 %f28,DC2,%f18 ! (2_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld K6,%f32,%f62 ! (0_0) res = K6 * xx;
+ add %o2,8,%i4 ! (2_0) hx += 8;
+ st %f2,[%i1] ! (2_1) ((float*)py)[0] = ((float*)res)[0];
+ fsubd %f44,%f8,%f10 ! (1_0) xx = res - res_c;
+
+ fmuld %f34,%f58,%f44 ! (5_1) res *= xx;
+ nop
+ st %f3,[%i1+4] ! (2_1) ((float*)py)[1] = ((float*)res)[1];
+ faddd %f40,K4,%f40 ! (6_1) res += K4;
+
+ lda [%l6]%asi,%g1 ! (3_0) hx = *(int*)px;
+ sllx %o7,52,%o7 ! (2_0) iexp << 52;
+ and %i4,-16,%i4 ! (2_0) hx = -16;
+ faddd %f36,K1,%f36 ! (4_1) res += K1;
+
+ add %l6,stridex,%l6 ! px += stridex
+ add %i4,TBL,%i4 ! (2_0) addr = (char*)arr + hx;
+ stx %o7,[%fp+tmp4] ! (2_0) dlexp = *(double*)lexp;
+ faddd %f38,%f24,%f8 ! (3_1) res += dexp_lo;
+
+ fmuld %f10,%f30,%f38 ! (1_0) xx *= dtmp0;
+ add %i0,stridey,%i1 ! px += stridey
+ ldd [%i4],%f24 ! (2_0) dtmp0 = ((double*)addr)[0];
+ faddd %f62,K5,%f62 ! (0_0) res += K5;
+
+ fmuld %f40,%f26,%f34 ! (6_1) res *= xx;
+ sra %g1,10,%o2 ! (3_0) hx >>= 10;
+ ldd [%l1],%f30 ! (4_1) dexp_hi = ((double*)addr)[0];
+ faddd %f44,K2,%f40 ! (5_1) res += K2;
+
+ fmuld %f36,%f60,%f36 ! (4_1) res *= xx;
+ sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
+ lda [%l6]%asi,%f0 ! (4_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
+
+ fmuld %f24,%f24,%f24 ! (2_0) dtmp0 = dexp_hi * dexp_hi;
+ cmp %g1,_0x7ff00000 ! (3_0) hx ? 0x7ff00000
+ lda [%l6+4]%asi,%f1 ! (4_0) ((float*)res)[1] = ((float*)px)[1];
+ faddd %f8,%f4,%f8 ! (3_1) res += dexp_hi;
+
+ fmuld %f62,%f32,%f60 ! (0_0) res *= xx;
+ bge,pn %icc,.update20 ! (3_0) if ( hx >= 0x7ff00000 )
+ ldd [%fp+tmp5],%f62 ! (3_1) dlexp = *(double*)lexp;
+ fand %f18,DC3,%f4 ! (2_0) res_c = vis_fand(res_c,DC3);
+.cont20:
+ fmuld %f40,%f58,%f40 ! (5_1) res *= xx;
+ cmp %g1,_0x00100000 ! (3_0) hx ? 0x00100000
+ and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
+ faddd %f34,K3,%f10 ! (6_1) res += K3;
+
+ fmuld %f30,%f36,%f36 ! (4_1) res = dexp_hi * res;
+ bl,pn %icc,.update21 ! (3_0) if ( hx < 0x00100000 )
+ sub %g0,%o7,%o7 ! (3_0) iexp = -iexp;
+ fand %f0,DC0,%f16 ! (4_0) res = vis_fand(res,DC0);
+.cont21:
+ fmuld %f8,%f62,%f8 ! (3_1) res *= dlexp;
+ add %o7,1534,%o7 ! (3_0) iexp += 0x5fe;
+ ldd [%l1+8],%f34 ! (4_1) dexp_lo = ((double*)addr)[1];
+ fpadd32 %f44,DC2,%f18 ! (3_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld K6,%f38,%f62 ! (1_0) res = K6 * xx;
+ add %o2,8,%i5 ! (3_0) hx += 8;
+ st %f8,[%i0] ! (3_1) ((float*)py)[0] = ((float*)res)[0];
+ fsubd %f28,%f4,%f28 ! (2_0) xx = res - res_c;
+
+ fmuld %f10,%f26,%f4 ! (6_1) res *= xx;
+ nop
+ st %f9,[%i0+4] ! (3_1) ((float*)py)[1] = ((float*)res)[1];
+ faddd %f60,K4,%f60 ! (0_0) res += K4;
+
+ lda [%l6]%asi,%g1 ! (4_0) hx = *(int*)px;
+ sllx %o7,52,%o7 ! (3_0) iexp << 52;
+ and %i5,-16,%i5 ! (3_0) hx = -16;
+ faddd %f40,K1,%f40 ! (5_1) res += K1;
+
+ add %l6,stridex,%i0 ! px += stridex
+ add %i5,TBL,%i5 ! (3_0) addr = (char*)arr + hx;
+ stx %o7,[%fp+tmp5] ! (3_0) dlexp = *(double*)lexp;
+ faddd %f36,%f34,%f8 ! (4_1) res += dexp_lo;
+
+ fmuld %f28,%f24,%f36 ! (2_0) xx *= dtmp0;
+ add %i1,stridey,%l6 ! px += stridey
+ ldd [%i5],%f28 ! (3_0) dtmp0 = ((double*)addr)[0];
+ faddd %f62,K5,%f62 ! (1_0) res += K5;
+
+ faddd %f4,K2,%f10 ! (6_1) res += K2;
+ sra %g1,10,%o2 ! (4_0) hx >>= 10;
+ nop
+ fmuld %f60,%f32,%f34 ! (0_0) res *= xx;
+
+ fmuld %f40,%f58,%f40 ! (5_1) res *= xx;
+ sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
+ lda [%i0]%asi,%f6 ! (5_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
+
+ fmuld %f28,%f28,%f28 ! (3_0) dtmp0 = dexp_hi * dexp_hi;
+ cmp %g1,_0x7ff00000 ! (4_0) hx ? 0x7ff00000
+ lda [%i0+4]%asi,%f7 ! (5_0) ((float*)res)[1] = ((float*)px)[1];
+ faddd %f8,%f30,%f30 ! (4_1) res += dexp_hi;
+
+ fand %f18,DC3,%f8 ! (3_0) res_c = vis_fand(res_c,DC3);
+ bge,pn %icc,.update22 ! (4_0) if ( hx >= 0x7ff00000 )
+ ldd [%fp+tmp6],%f18 ! (4_1) dlexp = *(double*)lexp;
+ fmuld %f62,%f38,%f62 ! (1_0) res *= xx;
+.cont22:
+ fmuld %f10,%f26,%f58 ! (6_1) res *= xx;
+ cmp %g1,_0x00100000 ! (4_0) hx ? 0x00100000
+ and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
+ faddd %f34,K3,%f60 ! (0_0) res += K3;
+
+ fmuld %f22,%f40,%f40 ! (5_1) res = dexp_hi * res;
+ bl,pn %icc,.update23 ! (4_0) if ( hx < 0x00100000 )
+ sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
+ fand %f6,DC0,%f16 ! (5_0) res = vis_fand(res,DC0);
+.cont23:
+ fmuld %f30,%f18,%f6 ! (4_1) res *= dlexp;
+ add %o7,1534,%o7 ! (4_0) iexp += 0x5fe;
+ ldd [%i3+8],%f34 ! (5_1) dexp_lo = ((double*)addr)[1];
+ fpadd32 %f24,DC2,%f18 ! (4_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld K6,%f36,%f30 ! (2_0) res = K6 * xx;
+ add %o2,8,%l1 ! (4_0) hx += 8;
+ st %f6,[%i1] ! (4_1) ((float*)py)[0] = ((float*)res)[0];
+ fsubd %f44,%f8,%f44 ! (3_0) xx = res - res_c;
+
+ fmuld %f60,%f32,%f60 ! (0_0) res *= xx;
+ sllx %o7,52,%o7 ! (4_0) iexp << 52;
+ st %f7,[%i1+4] ! (4_1) ((float*)py)[1] = ((float*)res)[1];
+ faddd %f62,K4,%f6 ! (1_0) res += K4;
+
+ lda [%i0]%asi,%g1 ! (5_0) hx = *(int*)px;
+ add %i0,stridex,%i1 ! px += stridex
+ and %l1,-16,%l1 ! (4_0) hx = -16;
+ faddd %f58,K1,%f58 ! (6_1) res += K1;
+
+ add %l1,TBL,%l1 ! (4_0) addr = (char*)arr + hx;
+ add %l6,stridey,%i0 ! px += stridey
+ stx %o7,[%fp+tmp6] ! (4_0) dlexp = *(double*)lexp;
+ faddd %f40,%f34,%f8 ! (5_1) res += dexp_lo;
+
+ fmuld %f44,%f28,%f40 ! (3_0) xx *= dtmp0;
+ nop
+ ldd [%l1],%f44 ! (4_0) dtmp0 = ((double*)addr)[0];
+ faddd %f30,K5,%f62 ! (2_0) res += K5;
+
+ fmuld %f6,%f38,%f34 ! (1_0) res *= xx;
+ sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
+ ldd [%l4],%f30 ! (6_1) dexp_hi = ((double*)addr)[0];
+ faddd %f60,K2,%f60 ! (0_0) res += K2;
+
+ for %f16,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
+ sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
+ lda [%i1]%asi,%f6 ! (6_0) ((float*)res)[0] = ((float*)px)[0];
+ fmuld %f58,%f26,%f26 ! (6_1) res *= xx;
+
+ fmuld %f44,%f44,%f44 ! (4_0) dtmp0 = dexp_hi * dexp_hi;
+ cmp %g1,_0x7ff00000 ! (5_0) hx ? 0x7ff00000
+ lda [%i1+4]%asi,%f7 ! (6_0) ((float*)res)[1] = ((float*)px)[1];
+ faddd %f8,%f22,%f22 ! (5_1) res += dexp_hi;
+
+ fand %f18,DC3,%f8 ! (4_0) res_c = vis_fand(res_c,DC3);
+ bge,pn %icc,.update24 ! (5_0) if ( hx >= 0x7ff00000 )
+ ldd [%fp+tmp0],%f18 ! (5_1) dlexp = *(double*)lexp;
+ fmuld %f62,%f36,%f62 ! (2_0) res *= xx;
+.cont24:
+ fmuld %f60,%f32,%f58 ! (0_0) res *= xx;
+ sra %g1,10,%o2 ! (5_0) hx >>= 10;
+ cmp %g1,_0x00100000 ! (5_0) hx ? 0x00100000
+ faddd %f34,K3,%f34 ! (1_0) res += K3;
+
+ fmuld %f30,%f26,%f26 ! (6_1) res = dexp_hi * res;
+ bl,pn %icc,.update25 ! (5_0) if ( hx < 0x00100000 )
+ and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
+ fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
+.cont25:
+ fmuld %f22,%f18,%f2 ! (5_1) res *= dlexp;
+ subcc counter,7,counter ! counter -= 7;
+ ldd [%l4+8],%f60 ! (6_1) dexp_lo = ((double*)addr)[1];
+ fpadd32 %f28,DC2,%f18 ! (5_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld K6,%f40,%f22 ! (3_0) res = K6 * xx;
+ add %o2,8,%i3 ! (5_0) hx += 8;
+ st %f2,[%l6] ! (5_1) ((float*)py)[0] = ((float*)res)[0];
+ fsubd %f24,%f8,%f10 ! (4_0) xx = res - res_c;
+
+ fmuld %f34,%f38,%f24 ! (1_0) res *= xx;
+ st %f3,[%l6+4] ! (5_1) ((float*)py)[1] = ((float*)res)[1];
+ bpos,pt %icc,.main_loop
+ faddd %f62,K4,%f34 ! (2_0) res += K4;
+
+ add counter,7,counter
+.tail:
+ add %o7,1534,%o7 ! (5_0) iexp += 0x5fe;
+ subcc counter,1,counter
+ bneg,a .begin
+ mov %i0,%o4
+
+ faddd %f58,K1,%f58 ! (0_1) res += K1;
+
+ faddd %f26,%f60,%f8 ! (6_2) res += dexp_lo;
+
+ faddd %f22,K5,%f62 ! (3_1) res += K5;
+ fmuld %f10,%f44,%f60 ! (4_1) xx *= dtmp0;
+
+ faddd %f24,K2,%f26 ! (1_1) res += K2;
+ add %i1,stridex,%l6 ! px += stridex
+ ldd [%l2],%f24 ! (0_1) dexp_hi = ((double*)addr)[0];
+ fmuld %f34,%f36,%f34 ! (2_1) res *= xx;
+
+ fmuld %f58,%f32,%f58 ! (0_1) res *= xx;
+
+ add %i0,stridey,%i1 ! px += stridey
+ faddd %f8,%f30,%f30 ! (6_2) res += dexp_hi;
+
+ fmuld %f62,%f40,%f32 ! (3_1) res *= xx;
+ ldd [%fp+tmp1],%f62 ! (6_2) dlexp = *(double*)lexp;
+
+ fmuld %f26,%f38,%f26 ! (1_1) res *= xx;
+ faddd %f34,K3,%f34 ! (2_1) res += K3;
+
+ fmuld %f24,%f58,%f58 ! (0_1) res = dexp_hi * res;
+
+ fmuld %f30,%f62,%f2 ! (6_2) res *= dlexp;
+ ldd [%l2+8],%f30 ! (0_1) dexp_lo = ((double*)addr)[1];
+
+ fmuld K6,%f60,%f62 ! (4_1) res = K6 * xx;
+ st %f2,[%i0] ! (6_2) ((float*)py)[0] = ((float*)res)[0];
+
+ fmuld %f34,%f36,%f28 ! (2_1) res *= xx;
+ st %f3,[%i0+4] ! (6_2) ((float*)py)[1] = ((float*)res)[1];
+ faddd %f32,K4,%f32 ! (3_1) res += K4;
+
+ subcc counter,1,counter
+ bneg,a .begin
+ mov %i1,%o4
+
+ faddd %f26,K1,%f26 ! (1_1) res += K1;
+
+ faddd %f58,%f30,%f8 ! (0_1) res += dexp_lo;
+
+ add %l6,stridex,%l6 ! px += stridex
+ faddd %f62,K5,%f62 ! (4_1) res += K5;
+
+ fmuld %f32,%f40,%f34 ! (3_1) res *= xx;
+ add %i1,stridey,%i0 ! px += stridey
+ ldd [%i2],%f22 ! (1_1) dexp_hi = ((double*)addr)[0];
+ faddd %f28,K2,%f32 ! (2_1) res += K2;
+
+ fmuld %f26,%f38,%f26 ! (1_1) res *= xx;
+
+ faddd %f8,%f24,%f24 ! (0_1) res += dexp_hi;
+
+ fmuld %f62,%f60,%f38 ! (4_1) res *= xx;
+ ldd [%fp+tmp2],%f62 ! (0_1) dlexp = *(double*)lexp;
+
+ fmuld %f32,%f36,%f32 ! (2_1) res *= xx;
+ faddd %f34,K3,%f34 ! (3_1) res += K3;
+
+ fmuld %f22,%f26,%f26 ! (1_1) res = dexp_hi * res;
+
+ fmuld %f24,%f62,%f2 ! (0_1) res *= dlexp;
+ ldd [%i2+8],%f24 ! (1_1) dexp_lo = ((double*)addr)[1];
+
+ st %f2,[%i1] ! (0_1) ((float*)py)[0] = ((float*)res)[0];
+
+ fmuld %f34,%f40,%f44 ! (3_1) res *= xx;
+ st %f3,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)res)[1];
+ faddd %f38,K4,%f38 ! (4_1) res += K4;
+
+ subcc counter,1,counter
+ bneg,a .begin
+ mov %i0,%o4
+
+ faddd %f32,K1,%f32 ! (2_1) res += K1;
+
+ add %l6,stridex,%l6 ! px += stridex
+ faddd %f26,%f24,%f8 ! (1_1) res += dexp_lo;
+
+ add %i0,stridey,%i1 ! px += stridey
+
+ fmuld %f38,%f60,%f34 ! (4_1) res *= xx;
+ ldd [%i4],%f24 ! (2_1) dexp_hi = ((double*)addr)[0];
+ faddd %f44,K2,%f38 ! (3_1) res += K2;
+
+ fmuld %f32,%f36,%f32 ! (2_1) res *= xx;
+
+ faddd %f8,%f22,%f22 ! (1_1) res += dexp_hi;
+
+ ldd [%fp+tmp3],%f62 ! (1_1) dlexp = *(double*)lexp;
+
+ fmuld %f38,%f40,%f38 ! (3_1) res *= xx;
+ faddd %f34,K3,%f34 ! (4_1) res += K3;
+
+ fmuld %f24,%f32,%f32 ! (2_1) res = dexp_hi * res;
+
+ fmuld %f22,%f62,%f2 ! (1_1) res *= dlexp;
+ ldd [%i4+8],%f22 ! (2_1) dexp_lo = ((double*)addr)[1];
+
+ st %f2,[%i0] ! (1_1) ((float*)py)[0] = ((float*)res)[0];
+
+ fmuld %f34,%f60,%f28 ! (4_1) res *= xx;
+ st %f3,[%i0+4] ! (1_1) ((float*)py)[1] = ((float*)res)[1];
+
+ subcc counter,1,counter
+ bneg,a .begin
+ mov %i1,%o4
+
+ faddd %f38,K1,%f38 ! (3_1) res += K1;
+
+ faddd %f32,%f22,%f8 ! (2_1) res += dexp_lo;
+
+ add %l6,stridex,%l6 ! px += stridex
+
+ add %i1,stridey,%i0 ! px += stridey
+ ldd [%i5],%f22 ! (3_1) dexp_hi = ((double*)addr)[0];
+ faddd %f28,K2,%f36 ! (4_1) res += K2;
+
+ fmuld %f38,%f40,%f38 ! (3_1) res *= xx;
+
+ faddd %f8,%f24,%f24 ! (2_1) res += dexp_hi;
+
+ ldd [%fp+tmp4],%f62 ! (2_1) dlexp = *(double*)lexp;
+
+ fmuld %f36,%f60,%f36 ! (4_1) res *= xx;
+
+ fmuld %f22,%f38,%f38 ! (3_1) res = dexp_hi * res;
+
+ fmuld %f24,%f62,%f2 ! (2_1) res *= dlexp;
+ ldd [%i5+8],%f24 ! (3_1) dexp_lo = ((double*)addr)[1];
+
+ st %f2,[%i1] ! (2_1) ((float*)py)[0] = ((float*)res)[0];
+
+ st %f3,[%i1+4] ! (2_1) ((float*)py)[1] = ((float*)res)[1];
+
+ subcc counter,1,counter
+ bneg,a .begin
+ mov %i0,%o4
+
+ faddd %f36,K1,%f36 ! (4_1) res += K1;
+
+ faddd %f38,%f24,%f8 ! (3_1) res += dexp_lo;
+
+ add %i0,stridey,%i1 ! px += stridey
+
+ add %l6,stridex,%l6 ! px += stridex
+ ldd [%l1],%f30 ! (4_1) dexp_hi = ((double*)addr)[0];
+
+ fmuld %f36,%f60,%f36 ! (4_1) res *= xx;
+
+ faddd %f8,%f22,%f8 ! (3_1) res += dexp_hi;
+
+ ldd [%fp+tmp5],%f62 ! (3_1) dlexp = *(double*)lexp;
+
+ fmuld %f30,%f36,%f36 ! (4_1) res = dexp_hi * res;
+
+ fmuld %f8,%f62,%f8 ! (3_1) res *= dlexp;
+ ldd [%l1+8],%f34 ! (4_1) dexp_lo = ((double*)addr)[1];
+
+ st %f8,[%i0] ! (3_1) ((float*)py)[0] = ((float*)res)[0];
+
+ st %f9,[%i0+4] ! (3_1) ((float*)py)[1] = ((float*)res)[1];
+
+ subcc counter,1,counter
+ bneg,a .begin
+ mov %i1,%o4
+
+ faddd %f36,%f34,%f8 ! (4_1) res += dexp_lo;
+
+ add %l6,stridex,%i0 ! px += stridex
+
+ add %i1,stridey,%l6 ! px += stridey
+
+ faddd %f8,%f30,%f30 ! (4_1) res += dexp_hi;
+
+ ldd [%fp+tmp6],%f18 ! (4_1) dlexp = *(double*)lexp;
+
+ fmuld %f30,%f18,%f6 ! (4_1) res *= dlexp;
+
+ st %f6,[%i1] ! (4_1) ((float*)py)[0] = ((float*)res)[0];
+
+ st %f7,[%i1+4] ! (4_1) ((float*)py)[1] = ((float*)res)[1];
+
+ ba .begin
+ add %i1,stridey,%o4
+
+ .align 16
+.spec0:
+ fdivd DONE,%f0,%f0 ! res = DONE / res;
+ add %i1,stridex,%i1 ! px += stridex
+ st %f0,[%o4] ! ((float*)py)[0] = ((float*)&res)[0];
+ st %f1,[%o4+4] ! ((float*)py)[1] = ((float*)&res)[1];
+ add %o4,stridey,%o4 ! py += stridey
+ ba .begin1
+ sub counter,1,counter
+
+ .align 16
+.spec1:
+ orcc %i2,%l4,%g0
+ bz,a 2f
+ fdivd DONE,%f0,%f0 ! res = DONE / res;
+
+ cmp %g1,0
+ bl,a 2f
+ fsqrtd %f0,%f0 ! res = sqrt(res);
+
+ cmp %g1,%i4
+ bge,a 1f
+ ldd [%o3+0x50],%f18
+
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ st %f0,[%fp+tmp0]
+
+ fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp0],%g1
+
+ sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
+ for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
+
+ sra %g1,10,%o2 ! (6_1) hx >>= 10;
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
+ ba .cont_spec
+ sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
+
+1:
+ fand %f0,%f18,%f0 ! res = vis_fand(res,DC4);
+
+ ldd [%o3+0x58],%f28
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+
+ faddd %f0,%f28,%f0 ! res += D2ON51;
+ st %f0,[%fp+tmp0]
+
+ fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp0],%g1
+
+ sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
+ for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
+
+ sra %g1,10,%o2 ! (6_1) hx >>= 10;
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
+ ba .cont_spec
+ sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
+
+2:
+ add %i1,stridex,%i1 ! px += stridex
+ st %f0,[%o4] ! ((float*)py)[0] = ((float*)&res)[0];
+ st %f1,[%o4+4] ! ((float*)py)[1] = ((float*)&res)[1];
+ add %o4,stridey,%o4 ! py += stridey
+ ba .begin1
+ sub counter,1,counter
+
+ .align 16
+.update0:
+ cmp counter,1
+ ble .cont0
+ nop
+
+ sub %l6,stridex,tmp_px
+ sub counter,1,tmp_counter
+
+ ba .cont0
+ mov 1,counter
+
+ .align 16
+.update1:
+ cmp counter,1
+ ble .cont1
+ sub %l6,stridex,%i1
+
+ ld [%i1+4],%i2
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%i2,%g0
+ bz 1f
+ sethi %hi(0x00080000),%i3
+
+ cmp %g1,%i3
+ bge,a 2f
+ ldd [%o3+0x50],%f18
+
+ fxtod %f8,%f8 ! res = *(long long*)&res;
+ st %f8,[%fp+tmp7]
+
+ fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (0_0) hx >>= 10;
+ for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
+
+ sub %o7,537,%o7
+
+ sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
+
+ and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
+ ba .cont1
+ add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
+2:
+ fand %f8,%f18,%f8
+ fxtod %f8,%f8 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f18
+ faddd %f8,%f18,%f8
+ st %f8,[%fp+tmp7]
+
+ fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (0_0) hx >>= 10;
+ for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
+
+ sub %o7,537,%o7
+
+ sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
+
+ and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
+ ba .cont1
+ add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
+1:
+ sub %l6,stridex,tmp_px
+ sub counter,1,tmp_counter
+
+ ba .cont1
+ mov 1,counter
+
+ .align 16
+.update2:
+ cmp counter,2
+ ble .cont2
+ nop
+
+ sub %l6,stridex,tmp_px
+ sub counter,2,tmp_counter
+
+ ba .cont2
+ mov 2,counter
+
+ .align 16
+.update3:
+ cmp counter,2
+ ble .cont3
+ sub %l6,stridex,%i1
+
+ ld [%i1+4],%i2
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%i2,%g0
+ bz 1f
+ sethi %hi(0x00080000),%i3
+
+ cmp %g1,%i3
+ bge,a 2f
+ ldd [%o3+0x50],%f18
+
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
+ for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
+
+ sra %g1,10,%o2 ! (1_0) hx >>= 10;
+ sub %o7,537,%o7
+ ba .cont3
+ and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
+2:
+ fand %f0,%f18,%f0
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f18
+ faddd %f0,%f18,%f0
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
+ for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
+
+ sra %g1,10,%o2 ! (1_0) hx >>= 10;
+ sub %o7,537,%o7
+ ba .cont3
+ and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
+1:
+ sub %l6,stridex,tmp_px
+ sub counter,2,tmp_counter
+
+ ba .cont3
+ mov 2,counter
+
+ .align 16
+.update4:
+ cmp counter,3
+ ble .cont4
+ nop
+
+ sub %l6,stridex,tmp_px
+ sub counter,3,tmp_counter
+
+ ba .cont4
+ mov 3,counter
+
+ .align 16
+.update5:
+ cmp counter,3
+ ble .cont5
+ sub %l6,stridex,%i1
+
+ ld [%i1+4],%i3
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%i3,%g0
+ bz 1f
+ sethi %hi(0x00080000),%i4
+
+ cmp %g1,%i4
+ bge,a 2f
+ ldd [%o3+0x50],%f18
+
+ fxtod %f6,%f6 ! res = *(long long*)&res;
+ st %f6,[%fp+tmp7]
+
+ fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (2_0) hx >>= 10;
+
+ sub %o7,537,%o7
+ and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
+ ba .cont5
+ for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
+2:
+ fand %f6,%f18,%f6
+ fxtod %f6,%f6 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f18
+ faddd %f6,%f18,%f6
+ st %f6,[%fp+tmp7]
+
+ fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (2_0) hx >>= 10;
+
+ sub %o7,537,%o7
+ and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
+ ba .cont5
+ for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
+1:
+ sub %l6,stridex,tmp_px
+ sub counter,3,tmp_counter
+
+ ba .cont5
+ mov 3,counter
+
+ .align 16
+.update6:
+ cmp counter,4
+ ble .cont6
+ nop
+
+ sub %l6,stridex,tmp_px
+ sub counter,4,tmp_counter
+
+ ba .cont6
+ mov 4,counter
+
+ .align 16
+.update7:
+ sub %l6,stridex,%i1
+ cmp counter,4
+ ble .cont7
+ faddd %f34,K3,%f6 ! (6_1) res += K3;
+
+ ld [%i1+4],%i3
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%i3,%g0
+ bz 1f
+ sethi %hi(0x00080000),%i5
+
+ cmp %g1,%i5
+ bge,a 2f
+ ldd [%o3+0x50],%f18
+
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (3_0) hx >>= 10;
+
+ sub %o7,537,%o7
+ and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
+ ba .cont7
+ for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
+2:
+ fand %f0,%f18,%f0
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f18
+ faddd %f0,%f18,%f0
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (3_0) hx >>= 10;
+
+ sub %o7,537,%o7
+ and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
+ ba .cont7
+ for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
+1:
+ sub %l6,stridex,tmp_px
+ sub counter,4,tmp_counter
+
+ ba .cont7
+ mov 4,counter
+
+ .align 16
+.update8:
+ cmp counter,5
+ ble .cont8
+ nop
+
+ mov %l6,tmp_px
+ sub counter,5,tmp_counter
+
+ ba .cont8
+ mov 5,counter
+
+ .align 16
+.update9:
+ ld [%l6+4],%i3
+ cmp counter,5
+ ble .cont9
+ fand %f0,DC0,%f16 ! (5_0) res = vis_fand(res,DC0);
+
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%i3,%g0
+ bz 1f
+ sethi %hi(0x00080000),%i1
+
+ cmp %g1,%i1
+ bge,a 2f
+ ldd [%o3+0x50],%f18
+
+ fxtod %f8,%f8 ! res = *(long long*)&res;
+ st %f8,[%fp+tmp7]
+
+ fand %f8,DC0,%f24 ! (4_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (4_0) hx >>= 10;
+
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
+ sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
+ ba .cont9
+ for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
+2:
+ fand %f8,%f18,%f8
+ fxtod %f8,%f8 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f18
+ faddd %f8,%f18,%f8
+ st %f8,[%fp+tmp7]
+
+ fand %f8,DC0,%f24 ! (4_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (4_0) hx >>= 10;
+
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
+ sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
+ ba .cont9
+ for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
+1:
+ mov %l6,tmp_px
+ sub counter,5,tmp_counter
+
+ ba .cont9
+ mov 5,counter
+
+ .align 16
+.update10:
+ cmp counter,6
+ ble .cont10
+ nop
+
+ mov %i0,tmp_px
+ sub counter,6,tmp_counter
+
+ ba .cont10
+ mov 6,counter
+
+ .align 16
+.update11:
+ ld [%i0+4],%i3
+ cmp counter,6
+ ble .cont11
+ fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
+
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%i3,%g0
+ bz 1f
+ sethi %hi(0x00080000),%i3
+
+ cmp %g1,%i3
+ bge,a 2f
+ ldd [%o3+0x50],%f18
+
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f28 ! (5_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (5_0) hx >>= 10;
+
+ sub %o7,537,%o7
+
+ sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
+
+ and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
+ ba .cont11
+ for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
+2:
+ fand %f0,%f18,%f0
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f18
+ faddd %f0,%f18,%f0
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f28 ! (5_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (5_0) hx >>= 10;
+
+ sub %o7,537,%o7
+
+ sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
+
+ and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
+ ba .cont11
+ for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
+1:
+ mov %i0,tmp_px
+ sub counter,6,tmp_counter
+
+ ba .cont11
+ mov 6,counter
+
+ .align 16
+.update12:
+ cmp counter,0
+ ble .cont12
+ faddd %f34,K3,%f34 ! (2_1) res += K3;
+
+ sub %l6,stridex,tmp_px
+ sub counter,0,tmp_counter
+
+ ba .cont12
+ mov 0,counter
+
+ .align 16
+.update13:
+ sub %l6,stridex,%l4
+ cmp counter,0
+ ble .cont13
+ fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
+
+ ld [%l4+4],%l4
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%l4,%g0
+ bz 1f
+ sethi %hi(0x00080000),%l4
+
+ cmp %g1,%l4
+ bge,a 2f
+ ldd [%o3+0x50],%f62
+
+ fxtod %f6,%f6 ! res = *(long long*)&res;
+ st %f6,[%fp+tmp7]
+
+ fand %f6,DC0,%f44 ! (6_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (6_1) hx >>= 10;
+
+ sub %o7,537,%o7
+ and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
+ for %f44,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
+
+ sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
+ ba .cont13
+ fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
+2:
+ fand %f6,%f62,%f6
+ fxtod %f6,%f6 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f62
+ faddd %f6,%f62,%f6
+ st %f6,[%fp+tmp7]
+
+ fand %f6,DC0,%f44 ! (6_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (6_1) hx >>= 10;
+ for %f44,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
+
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
+ sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
+ ba .cont13
+ fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
+1:
+ sub %l6,stridex,tmp_px
+ sub counter,0,tmp_counter
+
+ ba .cont13
+ mov 0,counter
+
+ .align 16
+.update14:
+ cmp counter,1
+ ble .cont14
+ faddd %f34,K3,%f34 ! (3_1) res += K3;
+
+ sub %l6,stridex,tmp_px
+ sub counter,1,tmp_counter
+
+ ba .cont14
+ mov 1,counter
+
+ .align 16
+.update15:
+ sub %l6,stridex,%l2
+ cmp counter,1
+ ble .cont15
+ fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
+
+ ld [%l2+4],%l2
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%l2,%g0
+ bz 1f
+ sethi %hi(0x00080000),%l2
+
+ cmp %g1,%l2
+ bge,a 2f
+ ldd [%o3+0x50],%f62
+
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f18 ! (0_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (0_0) hx >>= 10;
+
+ sub %o7,537,%o7
+ for %f18,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
+
+ sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
+
+ and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
+ add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
+ ba .cont15
+ fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
+2:
+ fand %f0,%f62,%f0
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f62
+ faddd %f0,%f62,%f0
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f18 ! (0_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (0_0) hx >>= 10;
+ for %f18,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
+
+ sub %o7,537,%o7
+
+ sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
+
+ and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
+ add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
+ ba .cont15
+ fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
+1:
+ sub %l6,stridex,tmp_px
+ sub counter,1,tmp_counter
+
+ ba .cont15
+ mov 1,counter
+
+ .align 16
+.update16:
+ cmp counter,2
+ ble .cont16
+ fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3);
+
+ sub %l6,stridex,tmp_px
+ sub counter,2,tmp_counter
+
+ ba .cont16
+ mov 2,counter
+
+ .align 16
+.update17:
+ sub %l6,stridex,%i2
+ cmp counter,2
+ ble .cont17
+ fand %f0,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
+
+ ld [%i2+4],%i2
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%i2,%g0
+ bz 1f
+ sethi %hi(0x00080000),%i2
+
+ cmp %g1,%i2
+ bge,a 2f
+ ldd [%o3+0x50],%f2
+
+ fxtod %f6,%f6 ! res = *(long long*)&res;
+ st %f6,[%fp+tmp7]
+
+ fand %f6,DC0,%f44 ! (1_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (1_0) hx >>= 10;
+
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
+ sub %g0,%o7,%o7 ! (1_0) iexp = -iexp;
+ ba .cont17
+ for %f44,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
+2:
+ fand %f6,%f2,%f6
+ fxtod %f6,%f6 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f2
+ faddd %f6,%f2,%f6
+ st %f6,[%fp+tmp7]
+
+ fand %f6,DC0,%f44 ! (1_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (1_0) hx >>= 10;
+
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
+ sub %g0,%o7,%o7 ! (1_0) iexp = -iexp;
+ ba .cont17
+ for %f44,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
+1:
+ sub %l6,stridex,tmp_px
+ sub counter,2,tmp_counter
+
+ ba .cont17
+ mov 2,counter
+
+ .align 16
+.update18:
+ cmp counter,3
+ ble .cont18
+ fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3);
+
+ sub %l6,stridex,tmp_px
+ sub counter,3,tmp_counter
+
+ ba .cont18
+ mov 3,counter
+
+ .align 16
+.update19:
+ sub %l6,stridex,%i4
+ cmp counter,3
+ ble .cont19
+ fand %f6,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
+
+ ld [%i4+4],%i4
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%i4,%g0
+ bz 1f
+ sethi %hi(0x00080000),%i4
+
+ cmp %g1,%i4
+ bge,a 2f
+ ldd [%o3+0x50],%f2
+
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f28 ! (2_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
+
+ sra %g1,10,%o2 ! (2_0) hx >>= 10;
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
+ sub %g0,%o7,%o7 ! (2_0) iexp = -iexp;
+ ba .cont19
+ for %f28,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
+2:
+ fand %f0,%f2,%f0
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f2
+ faddd %f0,%f2,%f0
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f28 ! (2_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
+
+ sra %g1,10,%o2 ! (2_0) hx >>= 10;
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
+ sub %g0,%o7,%o7 ! (2_0) iexp = -iexp;
+ ba .cont19
+ for %f28,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
+1:
+ sub %l6,stridex,tmp_px
+ sub counter,3,tmp_counter
+
+ ba .cont19
+ mov 3,counter
+
+ .align 16
+.update20:
+ cmp counter,4
+ ble .cont20
+ fand %f18,DC3,%f4 ! (2_0) res_c = vis_fand(res_c,DC3);
+
+ sub %l6,stridex,tmp_px
+ sub counter,4,tmp_counter
+
+ ba .cont20
+ mov 4,counter
+
+ .align 16
+.update21:
+ sub %l6,stridex,%i5
+ cmp counter,4
+ ble .cont21
+ fand %f0,DC0,%f16 ! (4_0) res = vis_fand(res,DC0);
+
+ ld [%i5+4],%i5
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%i5,%g0
+ bz 1f
+ sethi %hi(0x00080000),%i5
+
+ cmp %g1,%i5
+ bge,a 2f
+ ldd [%o3+0x50],%f34
+
+ fxtod %f6,%f6 ! res = *(long long*)&res;
+ st %f6,[%fp+tmp7]
+
+ fand %f6,DC0,%f44 ! (3_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (3_0) hx >>= 10;
+
+ sub %o7,537,%o7
+ and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
+
+ sub %g0,%o7,%o7 ! (3_0) iexp = -iexp;
+ ba .cont21
+ for %f44,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
+2:
+ fand %f6,%f34,%f6
+ fxtod %f6,%f6 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f34
+ faddd %f6,%f34,%f6
+ st %f6,[%fp+tmp7]
+
+ fand %f6,DC0,%f44 ! (3_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (3_0) hx >>= 10;
+
+ sub %o7,537,%o7
+ and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
+
+ sub %g0,%o7,%o7 ! (3_0) iexp = -iexp;
+ ba .cont21
+ for %f44,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
+1:
+ sub %l6,stridex,tmp_px
+ sub counter,4,tmp_counter
+
+ ba .cont21
+ mov 4,counter
+
+ .align 16
+.update22:
+ cmp counter,5
+ ble .cont22
+ fmuld %f62,%f38,%f62 ! (1_0) res *= xx;
+
+ sub %i0,stridex,tmp_px
+ sub counter,5,tmp_counter
+
+ ba .cont22
+ mov 5,counter
+
+ .align 16
+.update23:
+ sub %i0,stridex,%l1
+ cmp counter,5
+ ble .cont23
+ fand %f6,DC0,%f16 ! (5_0) res = vis_fand(res,DC0);
+
+ ld [%l1+4],%l1
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%l1,%g0
+ bz 1f
+ sethi %hi(0x00080000),%l1
+
+ cmp %g1,%l1
+ bge,a 2f
+ ldd [%o3+0x50],%f34
+
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f24 ! (4_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
+
+ sra %g1,10,%o2 ! (4_0) hx >>= 10;
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
+ sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
+ ba .cont23
+ for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
+2:
+ fand %f0,%f34,%f0
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f34
+ faddd %f0,%f34,%f0
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f24 ! (4_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
+
+ sra %g1,10,%o2 ! (4_0) hx >>= 10;
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
+ sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
+ ba .cont23
+ for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
+1:
+ sub %i0,stridex,tmp_px
+ sub counter,5,tmp_counter
+
+ ba .cont23
+ mov 5,counter
+
+ .align 16
+.update24:
+ cmp counter,6
+ ble .cont24
+ fmuld %f62,%f36,%f62 ! (2_0) res *= xx;
+
+ sub %i1,stridex,tmp_px
+ sub counter,6,tmp_counter
+
+ ba .cont24
+ mov 6,counter
+
+ .align 16
+.update25:
+ sub %i1,stridex,%i3
+ cmp counter,6
+ ble .cont25
+ fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
+
+ ld [%i3+4],%i3
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%i3,%g0
+ bz 1f
+ nop
+
+ sub %i1,stridex,%i3
+ ld [%i3],%f10
+ ld [%i3+4],%f11
+
+ sethi %hi(0x00080000),%i3
+
+ cmp %g1,%i3
+ bge,a 2f
+ ldd [%o3+0x50],%f60
+
+ fxtod %f10,%f10 ! res = *(long long*)&res;
+ st %f10,[%fp+tmp7]
+
+ fand %f10,DC0,%f28 ! (5_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
+
+ sra %g1,10,%o2 ! (5_0) hx >>= 10;
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
+ sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
+
+ ba .cont25
+ for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
+2:
+ fand %f10,%f60,%f10
+ fxtod %f10,%f10 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f60
+ faddd %f10,%f60,%f10
+ st %f10,[%fp+tmp7]
+
+ fand %f10,DC0,%f28 ! (5_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
+
+ sra %g1,10,%o2 ! (5_0) hx >>= 10;
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
+ sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
+
+ ba .cont25
+ for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
+1:
+ sub %i1,stridex,tmp_px
+ sub counter,6,tmp_counter
+
+ ba .cont25
+ mov 6,counter
+
+.exit:
+ ret
+ restore
+ SET_SIZE(__vrsqrt)
+
diff --git a/usr/src/libm/src/mvec/vis/__vrsqrtf.S b/usr/src/libm/src/mvec/vis/__vrsqrtf.S
new file mode 100644
index 0000000..beb56c1
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vrsqrtf.S
@@ -0,0 +1,1718 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vrsqrtf.S 1.4 06/01/23 SMI"
+
+ .file "__vrsqrtf.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 64
+
+! i = [0,63]
+! TBL[2*i ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-24;
+! TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46)));
+! i = [64,127]
+! TBL[2*i ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-23;
+! TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46)));
+
+.CONST_TBL:
+ .word 0x3e800000, 0x00000000, 0x3ff6a09e, 0x667f3bcd,
+ .word 0x3e7f81f8, 0x1f81f820, 0x3ff673e3, 0x2ef63a03,
+ .word 0x3e7f07c1, 0xf07c1f08, 0x3ff6482d, 0x37a5a3d2,
+ .word 0x3e7e9131, 0xabf0b767, 0x3ff61d72, 0xb7978671,
+ .word 0x3e7e1e1e, 0x1e1e1e1e, 0x3ff5f3aa, 0x673fa911,
+ .word 0x3e7dae60, 0x76b981db, 0x3ff5cacb, 0x7802f342,
+ .word 0x3e7d41d4, 0x1d41d41d, 0x3ff5a2cd, 0x8c69d61a,
+ .word 0x3e7cd856, 0x89039b0b, 0x3ff57ba8, 0xb0ee01b9,
+ .word 0x3e7c71c7, 0x1c71c71c, 0x3ff55555, 0x55555555,
+ .word 0x3e7c0e07, 0x0381c0e0, 0x3ff52fcc, 0x468d6b54,
+ .word 0x3e7bacf9, 0x14c1bad0, 0x3ff50b06, 0xa8fc6b70,
+ .word 0x3e7b4e81, 0xb4e81b4f, 0x3ff4e6fd, 0xf33cf032,
+ .word 0x3e7af286, 0xbca1af28, 0x3ff4c3ab, 0xe93bcf74,
+ .word 0x3e7a98ef, 0x606a63be, 0x3ff4a10a, 0x97af7b92,
+ .word 0x3e7a41a4, 0x1a41a41a, 0x3ff47f14, 0x4fe17f9f,
+ .word 0x3e79ec8e, 0x951033d9, 0x3ff45dc3, 0xa3c34fa3,
+ .word 0x3e799999, 0x9999999a, 0x3ff43d13, 0x6248490f,
+ .word 0x3e7948b0, 0xfcd6e9e0, 0x3ff41cfe, 0x93ff5199,
+ .word 0x3e78f9c1, 0x8f9c18fa, 0x3ff3fd80, 0x77e70577,
+ .word 0x3e78acb9, 0x0f6bf3aa, 0x3ff3de94, 0x8077db58,
+ .word 0x3e786186, 0x18618618, 0x3ff3c036, 0x50e00e03,
+ .word 0x3e781818, 0x18181818, 0x3ff3a261, 0xba6d7a37,
+ .word 0x3e77d05f, 0x417d05f4, 0x3ff38512, 0xba21f51e,
+ .word 0x3e778a4c, 0x8178a4c8, 0x3ff36845, 0x766eec92,
+ .word 0x3e7745d1, 0x745d1746, 0x3ff34bf6, 0x3d156826,
+ .word 0x3e7702e0, 0x5c0b8170, 0x3ff33021, 0x8127c0e0,
+ .word 0x3e76c16c, 0x16c16c17, 0x3ff314c3, 0xd92a9e91,
+ .word 0x3e768168, 0x16816817, 0x3ff2f9d9, 0xfd52fd50,
+ .word 0x3e7642c8, 0x590b2164, 0x3ff2df60, 0xc5df2c9e,
+ .word 0x3e760581, 0x60581606, 0x3ff2c555, 0x2988e428,
+ .word 0x3e75c988, 0x2b931057, 0x3ff2abb4, 0x3c0eb0f4,
+ .word 0x3e758ed2, 0x308158ed, 0x3ff2927b, 0x2cd320f5,
+ .word 0x3e755555, 0x55555555, 0x3ff279a7, 0x4590331c,
+ .word 0x3e751d07, 0xeae2f815, 0x3ff26135, 0xe91daf55,
+ .word 0x3e74e5e0, 0xa72f0539, 0x3ff24924, 0x92492492,
+ .word 0x3e74afd6, 0xa052bf5b, 0x3ff23170, 0xd2be638a,
+ .word 0x3e747ae1, 0x47ae147b, 0x3ff21a18, 0x51ff630a,
+ .word 0x3e7446f8, 0x6562d9fb, 0x3ff20318, 0xcc6a8f5d,
+ .word 0x3e741414, 0x14141414, 0x3ff1ec70, 0x124e98f9,
+ .word 0x3e73e22c, 0xbce4a902, 0x3ff1d61c, 0x070ae7d3,
+ .word 0x3e73b13b, 0x13b13b14, 0x3ff1c01a, 0xa03be896,
+ .word 0x3e738138, 0x13813814, 0x3ff1aa69, 0xe4f2777f,
+ .word 0x3e73521c, 0xfb2b78c1, 0x3ff19507, 0xecf5b9e9,
+ .word 0x3e7323e3, 0x4a2b10bf, 0x3ff17ff2, 0xe00ec3ee,
+ .word 0x3e72f684, 0xbda12f68, 0x3ff16b28, 0xf55d72d4,
+ .word 0x3e72c9fb, 0x4d812ca0, 0x3ff156a8, 0x72b5ef62,
+ .word 0x3e729e41, 0x29e4129e, 0x3ff1426f, 0xac0654db,
+ .word 0x3e727350, 0xb8812735, 0x3ff12e7d, 0x02c40253,
+ .word 0x3e724924, 0x92492492, 0x3ff11ace, 0xe560242a,
+ .word 0x3e721fb7, 0x8121fb78, 0x3ff10763, 0xcec30b26,
+ .word 0x3e71f704, 0x7dc11f70, 0x3ff0f43a, 0x45cdedad,
+ .word 0x3e71cf06, 0xada2811d, 0x3ff0e150, 0xdce2b60c,
+ .word 0x3e71a7b9, 0x611a7b96, 0x3ff0cea6, 0x317186dc,
+ .word 0x3e718118, 0x11811812, 0x3ff0bc38, 0xeb8ba412,
+ .word 0x3e715b1e, 0x5f75270d, 0x3ff0aa07, 0xbd7b7488,
+ .word 0x3e7135c8, 0x1135c811, 0x3ff09811, 0x63615499,
+ .word 0x3e711111, 0x11111111, 0x3ff08654, 0xa2d4f6db,
+ .word 0x3e70ecf5, 0x6be69c90, 0x3ff074d0, 0x4a8b1438,
+ .word 0x3e70c971, 0x4fbcda3b, 0x3ff06383, 0x31ff307a,
+ .word 0x3e70a681, 0x0a6810a7, 0x3ff0526c, 0x39213bfa,
+ .word 0x3e708421, 0x08421084, 0x3ff0418a, 0x4806de7d,
+ .word 0x3e70624d, 0xd2f1a9fc, 0x3ff030dc, 0x4ea03a72,
+ .word 0x3e704104, 0x10410410, 0x3ff02061, 0x446ffa9a,
+ .word 0x3e702040, 0x81020408, 0x3ff01018, 0x28467ee9,
+ .word 0x3e800000, 0x00000000, 0x3ff00000, 0x00000000,
+ .word 0x3e7f81f8, 0x1f81f820, 0x3fefc0bd, 0x88a0f1d9,
+ .word 0x3e7f07c1, 0xf07c1f08, 0x3fef82ec, 0x882c0f9b,
+ .word 0x3e7e9131, 0xabf0b767, 0x3fef467f, 0x2814b0cc,
+ .word 0x3e7e1e1e, 0x1e1e1e1e, 0x3fef0b68, 0x48d2af1c,
+ .word 0x3e7dae60, 0x76b981db, 0x3feed19b, 0x75e78957,
+ .word 0x3e7d41d4, 0x1d41d41d, 0x3fee990c, 0xdad55ed2,
+ .word 0x3e7cd856, 0x89039b0b, 0x3fee61b1, 0x38f18adc,
+ .word 0x3e7c71c7, 0x1c71c71c, 0x3fee2b7d, 0xddfefa66,
+ .word 0x3e7c0e07, 0x0381c0e0, 0x3fedf668, 0x9b7e6350,
+ .word 0x3e7bacf9, 0x14c1bad0, 0x3fedc267, 0xbea45549,
+ .word 0x3e7b4e81, 0xb4e81b4f, 0x3fed8f72, 0x08e6b82d,
+ .word 0x3e7af286, 0xbca1af28, 0x3fed5d7e, 0xa914b937,
+ .word 0x3e7a98ef, 0x606a63be, 0x3fed2c85, 0x34ed6d86,
+ .word 0x3e7a41a4, 0x1a41a41a, 0x3fecfc7d, 0xa32a9213,
+ .word 0x3e79ec8e, 0x951033d9, 0x3feccd60, 0x45f5d358,
+ .word 0x3e799999, 0x9999999a, 0x3fec9f25, 0xc5bfedd9,
+ .word 0x3e7948b0, 0xfcd6e9e0, 0x3fec71c7, 0x1c71c71c,
+ .word 0x3e78f9c1, 0x8f9c18fa, 0x3fec453d, 0x90f057a2,
+ .word 0x3e78acb9, 0x0f6bf3aa, 0x3fec1982, 0xb2ece47b,
+ .word 0x3e786186, 0x18618618, 0x3febee90, 0x56fb9c39,
+ .word 0x3e781818, 0x18181818, 0x3febc460, 0x92eb3118,
+ .word 0x3e77d05f, 0x417d05f4, 0x3feb9aed, 0xba588347,
+ .word 0x3e778a4c, 0x8178a4c8, 0x3feb7232, 0x5b79db11,
+ .word 0x3e7745d1, 0x745d1746, 0x3feb4a29, 0x3c1d9550,
+ .word 0x3e7702e0, 0x5c0b8170, 0x3feb22cd, 0x56d87d7e,
+ .word 0x3e76c16c, 0x16c16c17, 0x3feafc19, 0xd8606169,
+ .word 0x3e768168, 0x16816817, 0x3fead60a, 0x1d0fb394,
+ .word 0x3e7642c8, 0x590b2164, 0x3feab099, 0xae8f539a,
+ .word 0x3e760581, 0x60581606, 0x3fea8bc4, 0x41a3d02c,
+ .word 0x3e75c988, 0x2b931057, 0x3fea6785, 0xb41bacf7,
+ .word 0x3e758ed2, 0x308158ed, 0x3fea43da, 0x0adc6899,
+ .word 0x3e755555, 0x55555555, 0x3fea20bd, 0x700c2c3e,
+ .word 0x3e751d07, 0xeae2f815, 0x3fe9fe2c, 0x315637ee,
+ .word 0x3e74e5e0, 0xa72f0539, 0x3fe9dc22, 0xbe484458,
+ .word 0x3e74afd6, 0xa052bf5b, 0x3fe9ba9d, 0xa6c73588,
+ .word 0x3e747ae1, 0x47ae147b, 0x3fe99999, 0x9999999a,
+ .word 0x3e7446f8, 0x6562d9fb, 0x3fe97913, 0x63068b54,
+ .word 0x3e741414, 0x14141414, 0x3fe95907, 0xeb87ab44,
+ .word 0x3e73e22c, 0xbce4a902, 0x3fe93974, 0x368cfa31,
+ .word 0x3e73b13b, 0x13b13b14, 0x3fe91a55, 0x6151761c,
+ .word 0x3e738138, 0x13813814, 0x3fe8fba8, 0xa1bf6f96,
+ .word 0x3e73521c, 0xfb2b78c1, 0x3fe8dd6b, 0x4563a009,
+ .word 0x3e7323e3, 0x4a2b10bf, 0x3fe8bf9a, 0xb06e1af3,
+ .word 0x3e72f684, 0xbda12f68, 0x3fe8a234, 0x5cc04426,
+ .word 0x3e72c9fb, 0x4d812ca0, 0x3fe88535, 0xd90703c6,
+ .word 0x3e729e41, 0x29e4129e, 0x3fe8689c, 0xc7e07e7d,
+ .word 0x3e727350, 0xb8812735, 0x3fe84c66, 0xdf0ca4c2,
+ .word 0x3e724924, 0x92492492, 0x3fe83091, 0xe6a7f7e7,
+ .word 0x3e721fb7, 0x8121fb78, 0x3fe8151b, 0xb86fee1d,
+ .word 0x3e71f704, 0x7dc11f70, 0x3fe7fa02, 0x3f1068d1,
+ .word 0x3e71cf06, 0xada2811d, 0x3fe7df43, 0x7579b9b5,
+ .word 0x3e71a7b9, 0x611a7b96, 0x3fe7c4dd, 0x663ebb88,
+ .word 0x3e718118, 0x11811812, 0x3fe7aace, 0x2afa8b72,
+ .word 0x3e715b1e, 0x5f75270d, 0x3fe79113, 0xebbd7729,
+ .word 0x3e7135c8, 0x1135c811, 0x3fe777ac, 0xde80baea,
+ .word 0x3e711111, 0x11111111, 0x3fe75e97, 0x46a0b098,
+ .word 0x3e70ecf5, 0x6be69c90, 0x3fe745d1, 0x745d1746,
+ .word 0x3e70c971, 0x4fbcda3b, 0x3fe72d59, 0xc45f1fc5,
+ .word 0x3e70a681, 0x0a6810a7, 0x3fe7152e, 0x9f44f01f,
+ .word 0x3e708421, 0x08421084, 0x3fe6fd4e, 0x79325467,
+ .word 0x3e70624d, 0xd2f1a9fc, 0x3fe6e5b7, 0xd16657e1,
+ .word 0x3e704104, 0x10410410, 0x3fe6ce69, 0x31d5858d,
+ .word 0x3e702040, 0x81020408, 0x3fe6b761, 0x2ec892f6,
+
+ .word 0x3fefffff, 0xfee7f18f ! K0 = 9.99999997962321453275e-01
+ .word 0xbfdfffff, 0xfe07e52f ! K1 = -4.99999998166077580600e-01
+ .word 0x3fd80118, 0x0ca296d9 ! K2 = 3.75066768969515586277e-01
+ .word 0xbfd400fc, 0x0bbb8e78 ! K3 = -3.12560092408808548438e-01
+ .word 0x7ffe0000, 0x7ffe0000 ! DC0
+ .word 0x3f800000, 0x40000000 ! FTWO
+
+#define stridex %l4
+#define stridex2 %l1
+#define stridey %l3
+#define stridey2 %i2
+#define TBL %l2
+#define counter %i5
+
+#define K3 %f38
+#define K2 %f36
+#define K1 %f34
+#define K0 %f32
+#define DC0 %f4
+#define FONE %f2
+#define FTWO %f3
+
+#define _0x00800000 %o2
+#define _0x7f800000 %o4
+
+#define tmp0 STACK_BIAS-0x30
+#define tmp1 STACK_BIAS-0x28
+#define tmp2 STACK_BIAS-0x20
+#define tmp3 STACK_BIAS-0x18
+#define tmp_counter STACK_BIAS-0x10
+#define tmp_px STACK_BIAS-0x08
+
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x30
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! !!!!! algorithm !!!!!
+! ((float*)&ddx0)[0] = *px;
+! ax0 = *(int*)px;
+!
+! ((float*)&ddx0)[1] = *(px + stridex);
+! ax1 = *(int*)(px + stridex);
+!
+! px += stridex2;
+!
+! if ( ax0 >= 0x7f800000 )
+! {
+! RETURN ( FONE / ((float*)&dres0)[0] );
+! }
+! if ( ax0 < 0x00800000 )
+! {
+! float res = ((float*)&dres0)[0];
+!
+! if ( (ax0 & 0x7fffffff) == 0 ) /* |X| = zero */
+! {
+! RETURN ( FONE / res )
+! }
+! else if ( ax0 >= 0 ) /* X = denormal */
+! {
+! double res0, xx0, tbl_div0, tbl_sqrt0;
+! float fres0;
+! int iax0, si0, iexp0;
+!
+! res = *(int*)&res;
+! res *= FTWO;
+! ax0 = *(int*)&res;
+! iexp0 = ax0 >> 24;
+! iexp0 = 0x3f + 0x4b - iexp0;
+! iexp0 = iexp0 << 23;
+!
+! si0 = (ax0 >> 13) & 0x7f0;
+!
+! tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0];
+! tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1];
+! iax0 = ax0 & 0x7ffe0000;
+! iax0 = ax0 - iax0;
+! xx0 = iax0 * tbl_div0;
+! res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0);
+!
+! fres0 = res0;
+! iexp0 += *(int*)&fres0;
+! RETURN(*(float*)&iexp0)
+! }
+! else /* X = negative */
+! {
+! RETURN ( sqrtf(res) )
+! }
+! }
+! if ( ax1 >= 0x7f800000 )
+! {
+! RETURN ( FONE / ((float*)&dres0)[1] )
+! }
+! if ( ax1 < 0x00800000 )
+! {
+! float res = ((float*)&dres0)[1];
+! if ( (ax0 & 0x7fffffff) == 0 ) /* |X| = zero */
+! {
+! RETURN ( FONE / res )
+! }
+! else if ( ax0 >= 0 ) /* X = denormal */
+! {
+! double res0, xx0, tbl_div0, tbl_sqrt0;
+! float fres0;
+! int iax1, si0, iexp0;
+!
+! res = *(int*)&res;
+! res *= FTWO;
+! ax1 = *(int*)&res;
+! iexp0 = ax1 >> 24;
+! iexp0 = 0x3f + 0x4b - iexp0;
+! iexp0 = iexp0 << 23;
+!
+! si0 = (ax1 >> 13) & 0x7f0;
+!
+! tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0];
+! tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1];
+! iax1 = ax1 & 0x7ffe0000;
+! iax1 = ax1 - iax1;
+! xx0 = iax1 * tbl_div0;
+! res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0);
+!
+! fres0 = res0;
+! iexp0 += *(int*)&fres0;
+! RETURN(*(float*)&iexp0)
+! }
+! else /* X = negative */
+! {
+! RETURN ( sqrtf(res) )
+! }
+! }
+!
+! iexp0 = ax0 >> 24;
+! iexp1 = ax1 >> 24;
+! iexp0 = 0x3f - iexp0;
+! iexp1 = 0x3f - iexp1;
+! iexp1 &= 0x1ff;
+! lexp0 = iexp0 << 55;
+! lexp1 = iexp1 << 23;
+!
+! lexp0 |= lexp1;
+!
+! fdx0 = *((double*)&lexp0);
+!
+! si0 = ax0 >> 13;
+! si1 = ax1 >> 13;
+! si0 &= 0x7f0;
+! si1 &= 0x7f0;
+!
+! addr0 = (char*)TBL + si0;
+! addr1 = (char*)TBL + si1;
+! tbl_div0 = ((double*)((char*)TBL + si0))[0];
+! tbl_div1 = ((double*)((char*)TBL + si1))[0];
+! tbl_sqrt0 = ((double*)addr0)[1];
+! tbl_sqrt1 = ((double*)addr1)[1];
+! dfx0 = vis_fand(ddx0,DC0);
+! dfx0 = vis_fpsub32(ddx0,dfx0);
+! dtmp0 = (double)(((int*)&dfx0)[0]);
+! dtmp1 = (double)(((int*)&dfx0)[1]);
+! xx0 = dtmp0 * tbl_div0;
+! xx1 = dtmp1 * tbl_div1;
+! res0 = K3 * xx0;
+! res1 = K3 * xx1;
+! res0 += K2;
+! res1 += K2;
+! res0 *= xx0;
+! res1 *= xx1;
+! res0 += K1;
+! res1 += K1;
+! res0 *= xx0;
+! res1 *= xx1;
+! res0 += K0;
+! res1 += K0;
+! res0 = tbl_sqrt0 * res0;
+! res1 = tbl_sqrt1 * res1;
+! ((float*)&dres0)[0] = (float)res0;
+! ((float*)&dres0)[1] = (float)res1;
+! dres0 = vis_fpadd32(dres0,fdx0);
+! *py = ((float*)&dres0)[0];
+! *(py + stridey) = ((float*)&dres0)[1];
+! py += stridey2;
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ ENTRY(__vrsqrtf)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,.CONST_TBL,l2)
+
+ st %i0,[%fp+tmp_counter]
+ stx %i1,[%fp+tmp_px]
+
+ ldd [TBL+2048],K0
+ sll %i2,2,stridex
+
+ ldd [TBL+2048+8],K1
+ sll %i4,2,stridey
+ mov %i3,%i2
+
+ ldd [TBL+2048+16],K2
+ sethi %hi(0x7f800000),_0x7f800000
+ sll stridex,1,stridex2
+
+ ldd [TBL+2048+24],K3
+ sethi %hi(0x00800000),_0x00800000
+
+ ldd [TBL+2048+32],DC0
+ add %g0,0x3f,%l0
+
+ ldd [TBL+2048+40],FONE
+! ld [TBL+2048+44],FTWO
+.begin:
+ ld [%fp+tmp_counter],counter
+ ldx [%fp+tmp_px],%l7
+ st %g0,[%fp+tmp_counter]
+.begin1:
+ cmp counter,0
+ ble,pn %icc,.exit
+
+ lda [%l7]0x82,%f14 ! (4_0) ((float*)&ddx0)[0] = *px;
+
+ lda [stridex+%l7]0x82,%f15 ! (5_0) ((float*)&ddx0)[1] = *(px + stridex);
+ sethi %hi(0x7ffffc00),%o0
+
+ lda [%l7]0x82,%g1 ! (4_0) ax0 = *(int*)px;
+ add %l7,stridex2,%i1 ! px += stridex2
+ add %o0,0x3ff,%o0
+
+ lda [stridex+%l7]0x82,%g5 ! (5_0) ax1 = *(int*)(px + stridex);
+ fand %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0);
+
+ sra %g1,13,%l5 ! (4_0) si0 = ax0 >> 13;
+ add %i1,stridex2,%o5 ! px += stridex2
+
+ cmp %g1,_0x7f800000 ! (4_1) ax0 ? 0x7f800000
+ bge,pn %icc,.spec0 ! (4_1) if ( ax0 >= 0x7f800000 )
+ nop
+
+ cmp %g1,_0x00800000 ! (4_1) ax0 ? 0x00800000
+ bl,pn %icc,.spec1 ! (4_1) if ( ax0 < 0x00800000 )
+ sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13;
+.cont_spec:
+ and %l5,2032,%l5 ! (4_0) si0 &= 0x7f0;
+
+ ldd [%l5+TBL],%f54 ! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
+ sra %g5,24,%l7 ! (5_0) iexp1 = ax1 >> 24;
+ and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0;
+ fpsub32 %f14,%f16,%f16 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0);
+
+ ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
+ sra %g1,24,%i3 ! (4_0) iexp0 = ax0 >> 24;
+ sub %l0,%l7,%l7 ! (5_0) iexp1 = 0x3f - iexp1;
+
+ and %l7,511,%l1 ! (5_0) iexp1 = 0x1ff;
+ add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1;
+
+ sllx %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23;
+ sub %l0,%i3,%o0 ! (4_0) iexp0 = 0x3f - iexp0;
+ fitod %f16,%f56 ! (4_0) dtmp0 = (double)(((int*)dfx0)[0]);
+
+ sllx %o0,55,%o0 ! (4_0) lexp0 = iexp0 << 55;
+ fitod %f17,%f44 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]);
+
+ or %o0,%l1,%o0 ! (4_0) lexp0 |= lexp1;
+
+ stx %o0,[%fp+tmp0] ! (4_0) fdx0 = *((double*)lexp0);
+
+ fmuld %f56,%f54,%f40 ! (4_0) xx0 = dtmp0 * tbl_div0;
+
+ lda [%i1]0x82,%f18 ! (0_0) ((float*)&ddx0)[0] = *px;
+ fmuld %f44,%f46,%f46 ! (5_1) xx1 = dtmp1 * tbl_div1;
+
+ lda [stridex+%i1]0x82,%f19 ! (1_0) ((float*)&ddx0)[1] = *(px + stridex);
+
+ lda [%i1]0x82,%g1 ! (0_0) ax0 = *(int*)px;
+
+ lda [stridex+%i1]0x82,%i4 ! (1_0) ax1 = *(int*)(px + stridex);
+ cmp %g5,_0x7f800000 ! (5_1) ax1 ? 0x7f800000
+ bge,pn %icc,.update0 ! (5_1) if ( ax1 >= 0x7f800000 )
+ fmuld K3,%f40,%f52 ! (4_1) res0 = K3 * xx0;
+.cont0:
+ fmuld K3,%f46,%f50 ! (5_1) res1 = K3 * xx1;
+ cmp %g5,_0x00800000 ! (5_1) ax1 ? 0x00800000
+ bl,pn %icc,.update1 ! (5_1) if ( ax1 < 0x00800000 )
+ fand %f18,DC0,%f56 ! (0_0) dfx0 = vis_fand(ddx0,DC0);
+.cont1:
+ sra %g1,13,%o0 ! (0_0) si0 = ax0 >> 13;
+ cmp %g1,_0x7f800000 ! (0_0) ax0 ? 0x7f800000
+
+ sra %i4,13,%g5 ! (1_0) si1 = ax1 >> 13;
+ and %o0,2032,%o0 ! (0_0) si0 &= 0x7f0;
+
+ ldd [%o0+TBL],%f54 ! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
+ sra %i4,24,%i1 ! (1_0) iexp1 = ax1 >> 24;
+ and %g5,2032,%o7 ! (1_0) si1 &= 0x7f0;
+ fpsub32 %f18,%f56,%f30 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0);
+
+ ldd [%o7+TBL],%f44 ! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
+ sra %g1,24,%i3 ! (0_0) iexp0 = ax0 >> 24;
+ sub %l0,%i1,%i1 ! (1_0) iexp1 = 0x3f - iexp1;
+ faddd %f52,K2,%f62 ! (4_1) res0 += K2;
+
+ sub %l0,%i3,%g5 ! (0_0) iexp0 = 0x3f - iexp0;
+ bge,pn %icc,.update2 ! (0_0) if ( ax0 >= 0x7f800000 )
+ faddd %f50,K2,%f60 ! (5_1) res1 += K2;
+.cont2:
+ cmp %g1,_0x00800000 ! (0_0) ax0 ? 0x00800000
+ and %i1,511,%i0 ! (1_0) iexp1 = 0x1ff;
+ fitod %f30,%f56 ! (0_0) dtmp0 = (double)(((int*)dfx0)[0]);
+
+ sllx %i0,23,%i0 ! (1_0) lexp1 = iexp1 << 23;
+ bl,pn %icc,.update3 ! (0_0) if ( ax0 < 0x00800000 )
+ fitod %f31,%f50 ! (1_0) dtmp0 = (double)(((int*)dfx0)[0]);
+.cont3:
+ fmuld %f62,%f40,%f30 ! (4_1) res0 *= xx0;
+ sllx %g5,55,%g5 ! (0_0) lexp0 = iexp0 << 55;
+
+ fmuld %f60,%f46,%f48 ! (5_1) res1 *= xx1;
+ or %g5,%i0,%g5 ! (0_0) lexp0 |= lexp1;
+ stx %g5,[%fp+tmp1] ! (0_0) fdx0 = *((double*)lexp0);
+
+ fmuld %f56,%f54,%f26 ! (0_0) xx0 = dtmp0 * tbl_div0;
+ sll stridex,1,stridex2 ! stridex2 = stridex * 2;
+
+ lda [%o5]0x82,%f24 ! (2_0) ((float*)&ddx0)[0] = *px;
+ add %o7,TBL,%o7 ! (1_0) addr0 = (char*)TBL + si0;
+ fmuld %f50,%f44,%f44 ! (1_0) xx0 = dtmp0 * tbl_div0;
+
+ lda [stridex+%o5]0x82,%f25 ! (3_0) ((float*)&ddx0)[1] = *(px + stridex);
+ add %l5,TBL,%l5 ! (4_1) addr0 = (char*)TBL + si0;
+ faddd %f30,K1,%f62 ! (4_1) res0 += K1;
+
+ lda [%o5]0x82,%g1 ! (2_0) ax0 = *(int*)px;
+ add %o5,stridex2,%l7 ! px += stridex2
+ faddd %f48,K1,%f42 ! (5_1) res1 += K1;
+
+ lda [stridex+%o5]0x82,%o5 ! (3_0) ax1 = *(int*)(px + stridex);
+ cmp %i4,_0x7f800000 ! (1_0) ax1 ? 0x7f800000
+ bge,pn %icc,.update4 ! (1_0) if ( ax1 >= 0x7f800000 )
+ fmuld K3,%f26,%f52 ! (0_0) res0 = K3 * xx0;
+.cont4:
+ fmuld K3,%f44,%f50 ! (1_0) res1 = K3 * xx1;
+ cmp %i4,_0x00800000 ! (1_0) ax1 ? 0x00800000
+ bl,pn %icc,.update5 ! (1_0) if ( ax1 < 0x00800000 )
+ fand %f24,DC0,%f54 ! (2_0) dfx0 = vis_fand(ddx0,DC0);
+.cont5:
+ fmuld %f62,%f40,%f48 ! (4_1) res0 *= xx0;
+ sra %g1,13,%i0 ! (2_0) si0 = ax0 >> 13;
+ cmp %g1,_0x7f800000 ! (2_0) ax0 ? 0x7f800000
+
+ fmuld %f42,%f46,%f58 ! (5_1) res1 *= xx1;
+ sra %o5,13,%o1 ! (3_0) si1 = ax1 >> 13;
+ and %i0,2032,%i0 ! (2_0) si0 &= 0x7f0;
+
+ ldd [%i0+TBL],%f30 ! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
+ sra %o5,24,%o3 ! (3_0) iexp1 = ax1 >> 24;
+ and %o1,2032,%o1 ! (3_0) si1 &= 0x7f0;
+ fpsub32 %f24,%f54,%f12 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0);
+
+ ldd [%o1+TBL],%f46 ! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
+ sra %g1,24,%i3 ! (2_0) iexp0 = ax0 >> 24;
+ sub %l0,%o3,%o3 ! (3_0) iexp1 = 0x3f - iexp1;
+ faddd %f52,K2,%f40 ! (0_0) res0 += K2;
+
+ ldd [%l5+8],%f42 ! (4_1) tbl_sqrt0 = ((double*)addr0)[1];
+ sub %l0,%i3,%g5 ! (2_0) iexp0 = 0x3f - iexp0;
+ and %o3,511,%i3 ! (3_0) iexp1 &= 0x1ff;
+ faddd %f50,K2,%f60 ! (1_0) res0 += K2;
+
+ ldd [%l6+8],%f28 ! (5_1) tbl_sqrt1 = ((double*)addr1)[1];
+ sllx %g5,55,%g5 ! (2_0) lexp0 = iexp0 << 55;
+ add %i0,TBL,%i0 ! (2_0) addr0 = (char*)TBL + si0;
+ fitod %f12,%f56 ! (2_0) dtmp0 = (double)(((int*)dfx0)[0]);
+
+ sllx %i3,23,%i3 ! (3_0) lexp1 = iexp1 << 23;
+ fitod %f13,%f50 ! (3_0) dtmp1 = (double)(((int*)dfx0)[1]);
+
+ fmuld %f40,%f26,%f40 ! (0_0) res0 *= xx0;
+ or %g5,%i3,%g5 ! (2_0) lexp0 |= lexp1;
+ faddd %f48,K0,%f62 ! (4_1) res0 += K0;
+
+ fmuld %f60,%f44,%f48 ! (1_0) res1 *= xx1;
+ add %o1,TBL,%o1 ! (3_0) addr1 = (char*)TBL + si1;
+ stx %g5,[%fp+tmp2] ! (2_0) fdx0 = *((double*)lexp0);
+ faddd %f58,K0,%f60 ! (5_1) res1 += K0;
+
+ fmuld %f56,%f30,%f30 ! (2_0) xx0 = dtmp0 * tbl_div0;
+ bge,pn %icc,.update6 ! (2_0) if ( ax0 >= 0x7f800000 )
+ lda [%l7]0x82,%f14 ! (4_0) ((float*)&ddx0)[0] = *px;
+.cont6:
+ cmp %g1,_0x00800000 ! (2_0) ax0 ? 0x00800000
+ bl,pn %icc,.update7 ! (2_0) if ( ax0 < 0x00800000 )
+ nop
+.cont7:
+ fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1;
+
+ lda [stridex+%l7]0x82,%f15 ! (5_0) ((float*)&ddx0)[1] = *(px + stridex);
+ cmp %o5,_0x7f800000 ! (3_0) ax1 ? 0x7f800000
+ fmuld %f42,%f62,%f58 ! (4_1) res0 = tbl_sqrt0 * res0;
+ faddd %f40,K1,%f46 ! (0_0) res0 += K1;
+
+ lda [%l7]0x82,%g1 ! (4_0) ax0 = *(int*)px;
+ add %l7,stridex2,%i1 ! px += stridex2
+ fmuld %f28,%f60,%f56 ! (5_1) res1 = tbl_sqrt1 * res1;
+ faddd %f48,K1,%f62 ! (1_0) res1 += K1;
+
+ lda [stridex+%l7]0x82,%g5 ! (5_0) ax1 = *(int*)(px + stridex);
+ add %o0,TBL,%o0 ! (0_0) addr0 = (char*)TBL + si0;
+ bge,pn %icc,.update8 ! (3_0) if ( ax1 >= 0x7f800000 )
+ fmuld K3,%f30,%f52 ! (2_0) res0 = K3 * xx0;
+.cont8:
+ fmuld K3,%f24,%f50 ! (3_0) res1 = K3 * xx1;
+ cmp %o5,_0x00800000 ! (3_0) ax1 ? 0x00800000
+ bl,pn %icc,.update9 ! (3_0) if ( ax1 < 0x00800000 )
+ fand %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0);
+.cont9:
+ fmuld %f46,%f26,%f48 ! (0_0) res0 *= xx0;
+ sra %g1,13,%l5 ! (4_0) si0 = ax0 >> 13;
+ add %i1,stridex2,%o5 ! px += stridex2
+ fdtos %f58,%f6 ! (4_1) ((float*)&dres0)[0] = (float)res0;
+
+ fmuld %f62,%f44,%f40 ! (1_0) res1 *= xx1;
+ sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13;
+ and %l5,2032,%l5 ! (4_0) si0 &= 0x7f0;
+ fdtos %f56,%f7 ! (5_1) ((float*)&dres0)[1] = (float)res1;
+
+ ldd [%l5+TBL],%f54 ! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
+ sra %g5,24,%l7 ! (5_0) iexp1 = ax1 >> 24;
+ and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0;
+ fpsub32 %f14,%f16,%f16 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0);
+
+ ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
+ sra %g1,24,%i3 ! (4_0) iexp0 = ax0 >> 24;
+ sub %l0,%l7,%l7 ! (5_0) iexp1 = 0x3f - iexp1;
+ faddd %f52,K2,%f58 ! (2_0) res0 += K2;
+
+ ldd [%o0+8],%f42 ! (0_0) tbl_sqrt0 = ((double*)addr0)[1];
+ and %l7,511,%l1 ! (5_0) iexp1 = 0x1ff;
+ add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1;
+ faddd %f50,K2,%f60 ! (3_0) res1 += K2;
+
+ ldd [%o7+8],%f28 ! (1_0) tbl_sqrt1 = ((double*)addr1)[1];
+ sllx %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23;
+ sub %l0,%i3,%o0 ! (4_0) iexp0 = 0x3f - iexp0;
+ fitod %f16,%f56 ! (4_0) dtmp0 = (double)(((int*)dfx0)[0]);
+
+ ldd [%fp+tmp0],%f52 ! (4_1) fdx0 = *((double*)lexp0);
+ sllx %o0,55,%o0 ! (4_0) lexp0 = iexp0 << 55;
+ fitod %f17,%f44 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]);
+
+ fmuld %f58,%f30,%f62 ! (2_0) res0 *= xx0;
+ or %o0,%l1,%o0 ! (4_0) lexp0 |= lexp1;
+ faddd %f48,K0,%f22 ! (0_0) res0 += K0;
+
+ fmuld %f60,%f24,%f58 ! (3_0) res1 *= xx1;
+ stx %o0,[%fp+tmp0] ! (4_0) fdx0 = *((double*)lexp0);
+ faddd %f40,K0,%f26 ! (1_0) res1 += K0;
+
+ fmuld %f56,%f54,%f40 ! (4_0) xx0 = dtmp0 * tbl_div0;
+ fpadd32 %f6,%f52,%f10 ! (4_1) dres0 = vis_fpadd32(dres0,fdx0);
+
+ or %g0,%i2,%l7
+ add stridey,stridey,stridey2
+
+ cmp counter,6
+ bl,pn %icc,.tail
+ nop
+
+ ba .main_loop
+ sub counter,6,counter ! counter
+
+ .align 16
+.main_loop:
+ lda [%i1]0x82,%f18 ! (0_0) ((float*)&ddx0)[0] = *px;
+ cmp %g1,_0x7f800000 ! (4_1) ax0 ? 0x7f800000
+ bge,pn %icc,.update10 ! (4_1) if ( ax0 >= 0x7f800000 )
+ fmuld %f44,%f46,%f46 ! (5_1) xx1 = dtmp1 * tbl_div1;
+.cont10:
+ lda [stridex+%i1]0x82,%f19 ! (1_0) ((float*)&ddx0)[1] = *(px + stridex);
+ cmp %g1,_0x00800000 ! (4_1) ax0 ? 0x00800000
+ fmuld %f42,%f22,%f44 ! (0_1) res0 = tbl_sqrt0 * res0;
+ faddd %f62,K1,%f42 ! (2_1) res0 += K1;
+
+ lda [%i1]0x82,%g1 ! (0_0) ax0 = *(int*)px;
+ fmuld %f28,%f26,%f60 ! (1_1) res1 = tbl_sqrt1 * res1;
+ bl,pn %icc,.update11 ! (4_1) if ( ax0 < 0x00800000 )
+ faddd %f58,K1,%f62 ! (3_1) res1 += K1;
+.cont11:
+ lda [stridex+%i1]0x82,%i4 ! (1_0) ax1 = *(int*)(px + stridex);
+ cmp %g5,_0x7f800000 ! (5_1) ax1 ? 0x7f800000
+ bge,pn %icc,.update12 ! (5_1) if ( ax1 >= 0x7f800000 )
+ fmuld K3,%f40,%f52 ! (4_1) res0 = K3 * xx0;
+.cont12:
+ fmuld K3,%f46,%f50 ! (5_1) res1 = K3 * xx1;
+ cmp %g5,_0x00800000 ! (5_1) ax1 ? 0x00800000
+ bl,pn %icc,.update13 ! (5_1) if ( ax1 < 0x00800000 )
+ fand %f18,DC0,%f56 ! (0_0) dfx0 = vis_fand(ddx0,DC0);
+.cont13:
+ fmuld %f42,%f30,%f48 ! (2_1) res0 *= xx0;
+ sra %g1,13,%o0 ! (0_0) si0 = ax0 >> 13;
+ cmp %g1,_0x7f800000 ! (0_0) ax0 ? 0x7f800000
+ fdtos %f44,%f8 ! (0_1) ((float*)&dres0)[0] = (float)res0;
+
+ fmuld %f62,%f24,%f58 ! (3_1) res1 *= xx1;
+ sra %i4,13,%g5 ! (1_0) si1 = ax1 >> 13;
+ and %o0,2032,%o0 ! (0_0) si0 &= 0x7f0;
+ fdtos %f60,%f9 ! (1_1) ((float*)&dres0)[1] = (float)res1;
+
+ ldd [%o0+TBL],%f54 ! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
+ sra %i4,24,%i1 ! (1_0) iexp1 = ax1 >> 24;
+ and %g5,2032,%o7 ! (1_0) si1 &= 0x7f0;
+ fpsub32 %f18,%f56,%f30 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0);
+
+ ldd [%o7+TBL],%f44 ! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
+ sra %g1,24,%i3 ! (0_0) iexp0 = ax0 >> 24;
+ sub %l0,%i1,%i1 ! (1_0) iexp1 = 0x3f - iexp1;
+ faddd %f52,K2,%f62 ! (4_1) res0 += K2;
+
+ ldd [%i0+8],%f42 ! (2_1) tbl_sqrt0 = ((double*)addr0)[1];
+ sub %l0,%i3,%g5 ! (0_0) iexp0 = 0x3f - iexp0;
+ bge,pn %icc,.update14 ! (0_0) if ( ax0 >= 0x7f800000 )
+ faddd %f50,K2,%f60 ! (5_1) res1 += K2;
+.cont14:
+ ldd [%o1+8],%f28 ! (3_1) tbl_sqrt1 = ((double*)addr0)[1];
+ cmp %g1,_0x00800000 ! (0_0) ax0 ? 0x00800000
+ and %i1,511,%i0 ! (1_0) iexp1 = 0x1ff;
+ fitod %f30,%f56 ! (0_0) dtmp0 = (double)(((int*)dfx0)[0]);
+
+ ldd [%fp+tmp1],%f52 ! (0_1) fdx0 = *((double*)lexp0);
+ sllx %i0,23,%i0 ! (1_0) lexp1 = iexp1 << 23;
+ bl,pn %icc,.update15 ! (0_0) if ( ax0 < 0x00800000 )
+ fitod %f31,%f50 ! (1_0) dtmp0 = (double)(((int*)dfx0)[0]);
+.cont15:
+ fmuld %f62,%f40,%f30 ! (4_1) res0 *= xx0;
+ sllx %g5,55,%g5 ! (0_0) lexp0 = iexp0 << 55;
+ st %f10,[%l7] ! (4_2) *py = ((float*)&dres0)[0];
+ faddd %f48,K0,%f62 ! (2_1) res0 += K0;
+
+ fmuld %f60,%f46,%f48 ! (5_1) res1 *= xx1;
+ or %g5,%i0,%g5 ! (0_0) lexp0 |= lexp1;
+ stx %g5,[%fp+tmp1] ! (0_0) fdx0 = *((double*)lexp0);
+ faddd %f58,K0,%f60 ! (3_1) res1 += K0;
+
+ fmuld %f56,%f54,%f26 ! (0_0) xx0 = dtmp0 * tbl_div0;
+ sll stridex,1,stridex2 ! stridex2 = stridex * 2;
+ st %f11,[stridey+%l7] ! (5_2) *(py + stridey) = ((float*)&dres0)[1];
+ fpadd32 %f8,%f52,%f10 ! (0_1) dres0 = vis_fpadd32(dres0,fdx0);
+
+ lda [%o5]0x82,%f24 ! (2_0) ((float*)&ddx0)[0] = *px;
+ add %l7,stridey2,%i1 ! py += stridey2
+ add %o7,TBL,%o7 ! (1_0) addr0 = (char*)TBL + si0;
+ fmuld %f50,%f44,%f44 ! (1_0) xx0 = dtmp0 * tbl_div0;
+
+ lda [stridex+%o5]0x82,%f25 ! (3_0) ((float*)&ddx0)[1] = *(px + stridex);
+ add %l5,TBL,%l5 ! (4_1) addr0 = (char*)TBL + si0;
+ fmuld %f42,%f62,%f58 ! (2_1) res0 = tbl_sqrt0 * res0;
+ faddd %f30,K1,%f62 ! (4_1) res0 += K1;
+
+ lda [%o5]0x82,%g1 ! (2_0) ax0 = *(int*)px;
+ add %o5,stridex2,%l7 ! px += stridex2
+ fmuld %f28,%f60,%f56 ! (3_1) res1 = tbl_sqrt1 * res1;
+ faddd %f48,K1,%f42 ! (5_1) res1 += K1;
+
+ lda [stridex+%o5]0x82,%o5 ! (3_0) ax1 = *(int*)(px + stridex);
+ cmp %i4,_0x7f800000 ! (1_0) ax1 ? 0x7f800000
+ bge,pn %icc,.update16 ! (1_0) if ( ax1 >= 0x7f800000 )
+ fmuld K3,%f26,%f52 ! (0_0) res0 = K3 * xx0;
+.cont16:
+ fmuld K3,%f44,%f50 ! (1_0) res1 = K3 * xx1;
+ cmp %i4,_0x00800000 ! (1_0) ax1 ? 0x00800000
+ bl,pn %icc,.update17 ! (1_0) if ( ax1 < 0x00800000 )
+ fand %f24,DC0,%f54 ! (2_0) dfx0 = vis_fand(ddx0,DC0);
+.cont17:
+ fmuld %f62,%f40,%f48 ! (4_1) res0 *= xx0;
+ sra %g1,13,%i0 ! (2_0) si0 = ax0 >> 13;
+ cmp %g1,_0x7f800000 ! (2_0) ax0 ? 0x7f800000
+ fdtos %f58,%f20 ! (2_1) ((float*)&dres0)[0] = (float)res0;
+
+ fmuld %f42,%f46,%f58 ! (5_1) res1 *= xx1;
+ sra %o5,13,%o1 ! (3_0) si1 = ax1 >> 13;
+ and %i0,2032,%i0 ! (2_0) si0 &= 0x7f0;
+ fdtos %f56,%f21 ! (3_1) ((float*)&dres0)[0] = (float)res0;
+
+ ldd [%i0+TBL],%f30 ! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
+ sra %o5,24,%o3 ! (3_0) iexp1 = ax1 >> 24;
+ and %o1,2032,%o1 ! (3_0) si1 &= 0x7f0;
+ fpsub32 %f24,%f54,%f12 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0);
+
+ ldd [%o1+TBL],%f46 ! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
+ sra %g1,24,%i3 ! (2_0) iexp0 = ax0 >> 24;
+ sub %l0,%o3,%o3 ! (3_0) iexp1 = 0x3f - iexp1;
+ faddd %f52,K2,%f40 ! (0_0) res0 += K2;
+
+ ldd [%l5+8],%f42 ! (4_1) tbl_sqrt0 = ((double*)addr0)[1];
+ sub %l0,%i3,%g5 ! (2_0) iexp0 = 0x3f - iexp0;
+ and %o3,511,%i3 ! (3_0) iexp1 &= 0x1ff;
+ faddd %f50,K2,%f60 ! (1_0) res0 += K2;
+
+ ldd [%l6+8],%f28 ! (5_1) tbl_sqrt1 = ((double*)addr1)[1];
+ sllx %g5,55,%g5 ! (2_0) lexp0 = iexp0 << 55;
+ add %i0,TBL,%i0 ! (2_0) addr0 = (char*)TBL + si0;
+ fitod %f12,%f56 ! (2_0) dtmp0 = (double)(((int*)dfx0)[0]);
+
+ ldd [%fp+tmp2],%f52 ! (2_1) fdx0 = *((double*)lexp0);
+ sllx %i3,23,%i3 ! (3_0) lexp1 = iexp1 << 23;
+ add %i1,stridey2,%o3 ! py += stridey2
+ fitod %f13,%f50 ! (3_0) dtmp1 = (double)(((int*)dfx0)[1]);
+
+ fmuld %f40,%f26,%f40 ! (0_0) res0 *= xx0;
+ or %g5,%i3,%g5 ! (2_0) lexp0 |= lexp1;
+ st %f10,[%i1] ! (0_1) *py = ((float*)&dres0)[0];
+ faddd %f48,K0,%f62 ! (4_1) res0 += K0;
+
+ fmuld %f60,%f44,%f48 ! (1_0) res1 *= xx1;
+ add %o1,TBL,%o1 ! (3_0) addr1 = (char*)TBL + si1;
+ stx %g5,[%fp+tmp2] ! (2_0) fdx0 = *((double*)lexp0);
+ faddd %f58,K0,%f60 ! (5_1) res1 += K0;
+
+ fmuld %f56,%f30,%f30 ! (2_0) xx0 = dtmp0 * tbl_div0;
+ bge,pn %icc,.update18 ! (2_0) if ( ax0 >= 0x7f800000 )
+ st %f11,[stridey+%i1] ! (1_1) *(py + stridey) = ((float*)&dres0)[1];
+ fpadd32 %f20,%f52,%f0 ! (2_1) dres0 = vis_fpadd32(dres0,fdx0);
+.cont18:
+ cmp %g1,_0x00800000 ! (2_0) ax0 ? 0x00800000
+ bl,pn %icc,.update19 ! (2_0) if ( ax0 < 0x00800000 )
+ lda [%l7]0x82,%f14 ! (4_0) ((float*)&ddx0)[0] = *px;
+ fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1;
+.cont19:
+ lda [stridex+%l7]0x82,%f15 ! (5_0) ((float*)&ddx0)[1] = *(px + stridex);
+ cmp %o5,_0x7f800000 ! (3_0) ax1 ? 0x7f800000
+ fmuld %f42,%f62,%f58 ! (4_1) res0 = tbl_sqrt0 * res0;
+ faddd %f40,K1,%f46 ! (0_0) res0 += K1;
+
+ lda [%l7]0x82,%g1 ! (4_0) ax0 = *(int*)px;
+ add %l7,stridex2,%i1 ! px += stridex2
+ fmuld %f28,%f60,%f56 ! (5_1) res1 = tbl_sqrt1 * res1;
+ faddd %f48,K1,%f62 ! (1_0) res1 += K1;
+
+ lda [stridex+%l7]0x82,%g5 ! (5_0) ax1 = *(int*)(px + stridex);
+ add %o0,TBL,%o0 ! (0_0) addr0 = (char*)TBL + si0;
+ bge,pn %icc,.update20 ! (3_0) if ( ax1 >= 0x7f800000 )
+ fmuld K3,%f30,%f52 ! (2_0) res0 = K3 * xx0;
+.cont20:
+ fmuld K3,%f24,%f50 ! (3_0) res1 = K3 * xx1;
+ cmp %o5,_0x00800000 ! (3_0) ax1 ? 0x00800000
+ bl,pn %icc,.update21 ! (3_0) if ( ax1 < 0x00800000 )
+ fand %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0);
+.cont21:
+ fmuld %f46,%f26,%f48 ! (0_0) res0 *= xx0;
+ sra %g1,13,%l5 ! (4_0) si0 = ax0 >> 13;
+ add %i1,stridex2,%o5 ! px += stridex2
+ fdtos %f58,%f6 ! (4_1) ((float*)&dres0)[0] = (float)res0;
+
+ fmuld %f62,%f44,%f40 ! (1_0) res1 *= xx1;
+ sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13;
+ and %l5,2032,%l5 ! (4_0) si0 &= 0x7f0;
+ fdtos %f56,%f7 ! (5_1) ((float*)&dres0)[1] = (float)res1;
+
+ ldd [%l5+TBL],%f54 ! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
+ sra %g5,24,%l7 ! (5_0) iexp1 = ax1 >> 24;
+ and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0;
+ fpsub32 %f14,%f16,%f16 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0);
+
+ ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
+ sra %g1,24,%i3 ! (4_0) iexp0 = ax0 >> 24;
+ sub %l0,%l7,%l7 ! (5_0) iexp1 = 0x3f - iexp1;
+ faddd %f52,K2,%f58 ! (2_0) res0 += K2;
+
+ ldd [%o0+8],%f42 ! (0_0) tbl_sqrt0 = ((double*)addr0)[1];
+ and %l7,511,%l1 ! (5_0) iexp1 = 0x1ff;
+ add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1;
+ faddd %f50,K2,%f60 ! (3_0) res1 += K2;
+
+ ldd [%o7+8],%f28 ! (1_0) tbl_sqrt1 = ((double*)addr1)[1];
+ sllx %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23;
+ sub %l0,%i3,%o0 ! (4_0) iexp0 = 0x3f - iexp0;
+ fitod %f16,%f56 ! (4_0) dtmp0 = (double)(((int*)dfx0)[0]);
+
+ ldd [%fp+tmp0],%f52 ! (4_1) fdx0 = *((double*)lexp0);
+ sllx %o0,55,%o0 ! (4_0) lexp0 = iexp0 << 55;
+ add %o3,stridey2,%l7 ! py += stridey2
+ fitod %f17,%f44 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]);
+
+ fmuld %f58,%f30,%f62 ! (2_0) res0 *= xx0;
+ or %o0,%l1,%o0 ! (4_0) lexp0 |= lexp1;
+ st %f0,[%o3] ! (2_1) *py = ((float*)&dres0)[0];
+ faddd %f48,K0,%f22 ! (0_0) res0 += K0;
+
+ fmuld %f60,%f24,%f58 ! (3_0) res1 *= xx1;
+ subcc counter,6,counter ! counter -= 6;
+ stx %o0,[%fp+tmp0] ! (4_0) fdx0 = *((double*)lexp0);
+ faddd %f40,K0,%f26 ! (1_0) res1 += K0;
+
+ fmuld %f56,%f54,%f40 ! (4_0) xx0 = dtmp0 * tbl_div0;
+ st %f1,[stridey+%o3] ! (3_1) *(py + stridey) = ((float*)&dres0)[1];
+ bpos,pt %icc,.main_loop
+ fpadd32 %f6,%f52,%f10 ! (4_1) dres0 = vis_fpadd32(dres0,fdx0);
+
+ add counter,6,counter
+.tail:
+ sll stridex,1,stridex2
+ subcc counter,1,counter
+ bneg,a .begin
+ mov %l7,%i2
+
+ fmuld %f42,%f22,%f44 ! (0_1) res0 = tbl_sqrt0 * res0;
+ faddd %f62,K1,%f42 ! (2_1) res0 += K1;
+
+ fmuld %f28,%f26,%f60 ! (1_1) res1 = tbl_sqrt1 * res1;
+
+ fmuld %f42,%f30,%f48 ! (2_1) res0 *= xx0;
+ fdtos %f44,%f8 ! (0_1) ((float*)&dres0)[0] = (float)res0;
+
+ fdtos %f60,%f9 ! (1_1) ((float*)&dres0)[1] = (float)res1;
+
+ ldd [%i0+8],%f42 ! (2_1) tbl_sqrt0 = ((double*)addr0)[1];
+
+ ldd [%fp+tmp1],%f52 ! (0_1) fdx0 = *((double*)lexp0);
+
+ st %f10,[%l7] ! (4_2) *py = ((float*)&dres0)[0];
+ subcc counter,1,counter
+ bneg,a .begin
+ add %l7,stridey,%i2
+
+ faddd %f48,K0,%f62 ! (2_1) res0 += K0;
+ st %f11,[stridey+%l7] ! (5_2) *(py + stridey) = ((float*)&dres0)[1];
+ subcc counter,1,counter
+ bneg,a .begin
+ add %l7,stridey2,%i2
+ fpadd32 %f8,%f52,%f10 ! (0_1) dres0 = vis_fpadd32(dres0,fdx0);
+
+ add %l7,stridey2,%i1 ! py += stridey2
+
+ fmuld %f42,%f62,%f58 ! (2_1) res0 = tbl_sqrt0 * res0;
+
+ fdtos %f58,%f20 ! (2_1) ((float*)&dres0)[0] = (float)res0;
+
+ ldd [%fp+tmp2],%f52 ! (2_1) fdx0 = *((double*)lexp0);
+ add %i1,stridey2,%o3 ! py += stridey2
+
+ st %f10,[%i1] ! (0_1) *py = ((float*)&dres0)[0];
+ subcc counter,1,counter
+ bneg,a .begin
+ add %i1,stridey,%i2
+
+ st %f11,[stridey+%i1] ! (1_1) *(py + stridey) = ((float*)&dres0)[1];
+ subcc counter,1,counter
+ bneg,a .begin
+ mov %o3,%i2
+ fpadd32 %f20,%f52,%f0 ! (2_1) dres0 = vis_fpadd32(dres0,fdx0);
+
+ st %f0,[%o3] ! (2_1) *py = ((float*)&dres0)[0];
+ ba .begin
+ add %o3,stridey,%i2
+
+ .align 16
+.spec0:
+ fdivs FONE,%f14,%f14 ! x0 = FONE / x0;
+ add %l7,stridex,%l7 ! px += stridex
+ st %f14,[%i2] ! *py = x0;
+ sub counter,1,counter
+ ba .begin1
+ add %i2,stridey,%i2 ! py += stridey
+
+ .align 16
+.spec1:
+ andcc %g1,%o0,%g0
+ bz,a 1f
+ fdivs FONE,%f14,%f14 ! x0 = DONE / x0;
+
+ cmp %g1,0
+ bl,a 1f
+ fsqrts %f14,%f14 ! x0 = sqrtf(x0);
+
+ fitod %f14,%f0
+ fdtos %f0,%f14
+ fmuls %f14,FTWO,%f14
+ st %f14,[%fp+tmp3]
+ ld [%fp+tmp3],%g1
+ sethi %hi(0x4b000000),%o0
+ sra %g1,13,%l5 ! (4_0) si0 = ax0 >> 13;
+ fands %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0);
+ ba .cont_spec
+ sub %g1,%o0,%g1
+1:
+ add %l7,stridex,%l7 ! px += stridex
+ sub counter,1,counter
+ st %f14,[%i2] ! *py = x0;
+ ba .begin1
+ add %i2,stridey,%i2 ! py += stridey
+
+ .align 16
+.update0:
+ cmp counter,1
+ ble .cont0
+ nop
+
+ sub %i1,stridex,%o1
+ stx %o1,[%fp+tmp_px]
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont0
+ mov 1,counter
+
+ .align 16
+.update1:
+ sethi %hi(0x7ffffc00),%o0
+ cmp counter,1
+ ble .cont1
+
+ add %o0,0x3ff,%o0
+
+ andcc %g5,%o0,%g0
+ bz,a 1f
+ nop
+
+ cmp %g5,0
+ bl,a 1f
+ nop
+
+ fitod %f15,%f0
+ fdtos %f0,%f15
+ fmuls %f15,FTWO,%f15
+ st %f15,[%fp+tmp3]
+ ld [%fp+tmp3],%g5
+ sethi %hi(0x4b000000),%o0
+ sub %g5,%o0,%g5
+
+ fands %f15,DC0,%f17 ! (4_0) dfx0 = vis_fand(ddx0,DC0);
+
+ sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13;
+
+ sra %g5,24,%l7 ! (5_0) iexp1 = ax1 >> 24;
+ and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0;
+
+ fpsub32s %f15,%f17,%f17 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0);
+
+ ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
+ sub %l0,%l7,%l1 ! (5_0) iexp1 = 0x3f - iexp1;
+
+ sll %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23;
+ add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1;
+ st %l1,[%fp+tmp0+4] ! (4_0) fdx0 = *((double*)lexp0);
+ fitod %f17,%f44 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]);
+
+ fmuld %f44,%f46,%f46 ! (5_1) xx1 = dtmp1 * tbl_div1;
+
+ ba .cont1
+ fmuld K3,%f46,%f50 ! (5_1) res1 = K3 * xx1;
+1:
+ sub %i1,stridex,%o1
+ stx %o1,[%fp+tmp_px]
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont1
+ mov 1,counter
+
+ .align 16
+.update2:
+ cmp counter,2
+ ble .cont2
+ sub %o5,stridex,%o1
+
+ sub %o1,stridex,%o1
+ stx %o1,[%fp+tmp_px]
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont2
+ mov 2,counter
+
+ .align 16
+.update3:
+ sethi %hi(0x7ffffc00),%o1
+ cmp counter,2
+ ble .cont3
+
+ add %o1,0x3ff,%o1
+
+ andcc %g1,%o1,%g0
+ bz,a 1f
+ sub %o5,stridex,%o1
+
+ cmp %g1,0
+ bl,a 1f
+ sub %o5,stridex,%o1
+
+ fitod %f18,%f0
+ fdtos %f0,%f18
+ fmuls %f18,FTWO,%f18
+ st %f18,[%fp+tmp3]
+ ld [%fp+tmp3],%g1
+ sethi %hi(0x4b000000),%o1
+ sub %g1,%o1,%g1
+
+ fand %f18,DC0,%f56 ! (0_0) dfx0 = vis_fand(ddx0,DC0);
+ sra %g1,13,%o0 ! (0_0) si0 = ax0 >> 13;
+
+ and %o0,2032,%o0 ! (0_0) si0 &= 0x7f0;
+
+ ldd [%o0+TBL],%f54 ! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
+ fpsub32 %f18,%f56,%f30 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0);
+
+ sra %g1,24,%i3 ! (0_0) iexp0 = ax0 >> 24;
+ sub %l0,%i3,%g5 ! (0_0) iexp0 = 0x3f - iexp0;
+ ba .cont3
+ fitod %f30,%f56 ! (0_0) dtmp0 = (double)(((int*)dfx0)[0]);
+1:
+ sub %o1,stridex,%o1
+ stx %o1,[%fp+tmp_px]
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont3
+ mov 2,counter
+
+ .align 16
+.update4:
+ cmp counter,3
+ ble .cont4
+ sub %l7,stridex2,%o1
+
+ sub %o1,stridex,%o1
+ stx %o1,[%fp+tmp_px]
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont4
+ mov 3,counter
+
+ .align 16
+.update5:
+ sethi %hi(0x7ffffc00),%o1
+ cmp counter,3
+ ble .cont5
+
+ add %o1,0x3ff,%o1
+
+ andcc %i4,%o1,%g0
+ bz,a 1f
+ sub %l7,stridex2,%o1
+
+ cmp %i4,0
+ bl,a 1f
+ sub %l7,stridex2,%o1
+
+ fitod %f19,%f0
+ fdtos %f0,%f19
+ fmuls %f19,FTWO,%f19
+ st %f19,[%fp+tmp3]
+ ld [%fp+tmp3],%i4
+ sethi %hi(0x4b000000),%o1
+ sub %i4,%o1,%i4
+
+ fands %f19,DC0,%f0 ! (0_0) dfx0 = vis_fand(ddx0,DC0);
+
+ sra %i4,13,%g5 ! (1_0) si1 = ax1 >> 13;
+
+ sra %i4,24,%i1 ! (1_0) iexp1 = ax1 >> 24;
+ and %g5,2032,%o7 ! (1_0) si1 &= 0x7f0;
+ fpsub32s %f19,%f0,%f31 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0);
+
+ ldd [%o7+TBL],%f44 ! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
+ sub %l0,%i1,%i0 ! (1_0) iexp1 = 0x3f - iexp1;
+
+ sll %i0,23,%i0 ! (1_0) lexp1 = iexp1 << 23;
+ fitod %f31,%f50 ! (1_0) dtmp0 = (double)(((int*)dfx0)[0]);
+
+ st %i0,[%fp+tmp1+4] ! (0_0) fdx0 = *((double*)lexp0);
+
+ add %o7,TBL,%o7 ! (1_0) addr0 = (char*)TBL + si0;
+ fmuld %f50,%f44,%f44 ! (1_0) xx0 = dtmp0 * tbl_div0;
+
+ ba .cont5
+ fmuld K3,%f44,%f50 ! (1_0) res1 = K3 * xx1;
+1:
+ sub %o1,stridex,%o1
+ stx %o1,[%fp+tmp_px]
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont5
+ mov 3,counter
+
+ .align 16
+.update6:
+ cmp counter,4
+ ble .cont6
+ sub %l7,stridex,%o3
+
+ sub %o3,stridex,%o3
+ stx %o3,[%fp+tmp_px]
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont6
+ mov 4,counter
+
+ .align 16
+.update7:
+ sethi %hi(0x7ffffc00),%o3
+ cmp counter,4
+ ble .cont7
+
+ add %o3,0x3ff,%o3
+
+ andcc %g1,%o3,%g0
+ bz,a 1f
+ sub %l7,stridex,%o3
+
+ cmp %g1,0
+ bl,a 1f
+ sub %l7,stridex,%o3
+
+ fitod %f24,%f0
+ fdtos %f0,%f24
+ fmuls %f24,FTWO,%f24
+ st %f24,[%fp+tmp3]
+ ld [%fp+tmp3],%g1
+ sethi %hi(0x4b000000),%o3
+ sub %g1,%o3,%g1
+
+ fands %f24,DC0,%f0 ! (2_0) dfx0 = vis_fand(ddx0,DC0);
+ sra %g1,13,%i0 ! (2_0) si0 = ax0 >> 13;
+
+ and %i0,2032,%i0 ! (2_0) si0 &= 0x7f0;
+
+ ldd [%i0+TBL],%f30 ! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
+ fpsub32s %f24,%f0,%f12 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0);
+
+ sra %g1,24,%i3 ! (2_0) iexp0 = ax0 >> 24;
+
+ sub %l0,%i3,%g5 ! (2_0) iexp0 = 0x3f - iexp0;
+
+ sll %g5,23,%g5 ! (2_0) lexp0 = iexp0 << 55;
+ add %i0,TBL,%i0 ! (2_0) addr0 = (char*)TBL + si0;
+ fitod %f12,%f56 ! (2_0) dtmp0 = (double)(((int*)dfx0)[0]);
+
+ st %g5,[%fp+tmp2] ! (2_0) fdx0 = *((double*)lexp0);
+ ba .cont7
+ fmuld %f56,%f30,%f30 ! (2_0) xx0 = dtmp0 * tbl_div0;
+1:
+ sub %o3,stridex,%o3
+ stx %o3,[%fp+tmp_px]
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont7
+ mov 4,counter
+
+ .align 16
+.update8:
+ cmp counter,5
+ ble .cont8
+ nop
+
+ sub %l7,stridex,%o3
+ stx %o3,[%fp+tmp_px]
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont8
+ mov 5,counter
+
+ .align 16
+.update9:
+ sethi %hi(0x7ffffc00),%o3
+ cmp counter,5
+ ble .cont9
+ sub %l7,stridex,%i3
+
+ add %o3,0x3ff,%o3
+
+ andcc %o5,%o3,%g0
+ bz 1f
+ ld [%i3],%f0
+
+ cmp %o5,0
+ bl,a 1f
+ nop
+
+ fitod %f0,%f0
+ fdtos %f0,%f0
+ fmuls %f0,FTWO,%f0
+ st %f0,[%fp+tmp3]
+ ld [%fp+tmp3],%o5
+ sethi %hi(0x4b000000),%o3
+ sub %o5,%o3,%o5
+
+ fands %f0,DC0,%f8 ! (2_0) dfx0 = vis_fand(ddx0,DC0);
+
+ sra %o5,13,%o1 ! (3_0) si1 = ax1 >> 13;
+
+ sra %o5,24,%o3 ! (3_0) iexp1 = ax1 >> 24;
+ and %o1,2032,%o1 ! (3_0) si1 &= 0x7f0;
+ fpsub32s %f0,%f8,%f0 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0);
+
+ ldd [%o1+TBL],%f8 ! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
+ sub %l0,%o3,%i3 ! (3_0) iexp1 = 0x3f - iexp1;
+
+ sllx %i3,23,%i3 ! (3_0) lexp1 = iexp1 << 23;
+ fitod %f0,%f50 ! (3_0) dtmp1 = (double)(((int*)dfx0)[1]);
+
+ add %o1,TBL,%o1 ! (3_0) addr1 = (char*)TBL + si1;
+ st %i3,[%fp+tmp2+4] ! (2_0) fdx0 = *((double*)lexp0);
+
+ fmuld %f50,%f8,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1;
+
+ ba .cont9
+ fmuld K3,%f24,%f50 ! (3_0) res1 = K3 * xx1;
+1:
+ stx %i3,[%fp+tmp_px]
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont9
+ mov 5,counter
+
+ .align 16
+.update10:
+ cmp counter,0
+ ble .cont10
+ sub %i1,stridex,%o3
+
+ sub %o3,stridex,%o3
+ stx %o3,[%fp+tmp_px]
+
+ st counter,[%fp+tmp_counter]
+
+ ba .cont10
+ mov 0,counter
+
+ .align 16
+.update11:
+ sethi %hi(0x7ffffc00),%i4
+ cmp counter,0
+ ble .cont11
+ sub %i1,stridex,%o3
+
+ sub %o3,stridex,%o3
+ add %i4,0x3ff,%i4
+ ld [%o3],%i3
+
+ andcc %i3,%i4,%g0
+ bz 1f
+
+ cmp %i3,0
+ bl,a 1f
+ nop
+
+ fitod %f14,%f0
+ fdtos %f0,%f14
+ fmuls %f14,FTWO,%f14
+ st %f14,[%fp+tmp3]
+ ld [%fp+tmp3],%i3
+ sethi %hi(0x4b000000),%o3
+ sub %i3,%o3,%i3
+
+ fands %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0);
+ sra %i3,13,%l5 ! (4_0) si0 = ax0 >> 13;
+
+ and %l5,2032,%l5 ! (4_0) si0 &= 0x7f0;
+
+ ldd [%l5+TBL],%f54 ! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
+ fpsub32s %f14,%f16,%f16 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0);
+
+ sra %i3,24,%i3 ! (4_0) iexp0 = ax0 >> 24;
+
+ sub %l0,%i3,%o0 ! (4_0) iexp0 = 0x3f - iexp0;
+ fitod %f16,%f56 ! (4_0) dtmp0 = (double)(((int*)dfx0)[0]);
+
+ sllx %o0,23,%o0 ! (4_0) lexp0 = iexp0 << 55;
+
+ st %o0,[%fp+tmp0] ! (4_0) fdx0 = *((double*)lexp0);
+
+ ba .cont11
+ fmuld %f56,%f54,%f40 ! (4_0) xx0 = dtmp0 * tbl_div0;
+1:
+ stx %o3,[%fp+tmp_px]
+
+ st counter,[%fp+tmp_counter]
+
+ ba .cont11
+ mov 0,counter
+
+ .align 16
+.update12:
+ cmp counter,1
+ ble .cont12
+ nop
+
+ sub %i1,stridex,%i1
+ stx %i1,[%fp+tmp_px]
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont12
+ mov 1,counter
+
+ .align 16
+.update13:
+ sethi %hi(0x7ffffc00),%o3
+ cmp counter,1
+ ble .cont13
+
+ add %o3,0x3ff,%o3
+
+ andcc %g5,%o3,%g0
+ bz 1f
+
+ cmp %g5,0
+ bl,a 1f
+ nop
+
+ fitod %f15,%f0
+ fdtos %f0,%f15
+ fmuls %f15,FTWO,%f15
+ st %f15,[%fp+tmp3]
+ ld [%fp+tmp3],%g5
+ sethi %hi(0x4b000000),%o3
+ sub %g5,%o3,%g5
+
+ fands %f15,DC0,%f17 ! (4_0) dfx0 = vis_fand(ddx0,DC0);
+
+ sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13;
+ sra %g5,24,%o3 ! (5_0) iexp1 = ax1 >> 24;
+ and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0;
+ fpsub32s %f15,%f17,%f17 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0);
+
+ ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
+ sub %l0,%o3,%l1 ! (5_0) iexp1 = 0x3f - iexp1;
+
+ add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1;
+
+ sllx %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23;
+ st %l1,[%fp+tmp0+4] ! (4_0) fdx0 = *((double*)lexp0);
+
+ fitod %f17,%f0 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]);
+
+ fmuld %f0,%f46,%f46 ! (5_1) xx1 = dtmp1 * tbl_div1;
+ ba .cont13
+ fmuld K3,%f46,%f50 ! (5_1) res1 = K3 * xx1;
+1:
+ sub %i1,stridex,%i1
+ stx %i1,[%fp+tmp_px]
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont13
+ mov 1,counter
+
+ .align 16
+.update14:
+ cmp counter,2
+ ble .cont14
+ sub %o5,stridex,%o3
+
+ sub %o3,stridex,%o3
+ stx %o3,[%fp+tmp_px]
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont14
+ mov 2,counter
+
+ .align 16
+.update15:
+ sethi %hi(0x7ffffc00),%i3
+ cmp counter,2
+ ble .cont15
+ sub %o5,stridex,%o3
+
+ add %i3,0x3ff,%i3
+
+ andcc %g1,%i3,%g0
+ bz 1f
+ sub %o3,stridex,%o3
+
+ cmp %g1,0
+ bl,a 1f
+ nop
+
+ fitod %f18,%f0
+ fdtos %f0,%f18
+ fmuls %f18,FTWO,%f18
+ st %f18,[%fp+tmp3]
+ ld [%fp+tmp3],%g1
+ sethi %hi(0x4b000000),%o3
+ sub %g1,%o3,%g1
+
+ fands %f18,DC0,%f0 ! (0_0) dfx0 = vis_fand(ddx0,DC0);
+ sra %g1,13,%o0 ! (0_0) si0 = ax0 >> 13;
+ and %o0,2032,%o0 ! (0_0) si0 &= 0x7f0;
+
+ ldd [%o0+TBL],%f54 ! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
+ fpsub32s %f18,%f0,%f30 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0);
+
+ sra %g1,24,%i3 ! (0_0) iexp0 = ax0 >> 24;
+
+ sub %l0,%i3,%g5 ! (0_0) iexp0 = 0x3f - iexp0;
+
+ ba .cont15
+ fitod %f30,%f56 ! (0_0) dtmp0 = (double)(((int*)dfx0)[0]);
+1:
+ stx %o3,[%fp+tmp_px]
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont15
+ mov 2,counter
+
+ .align 16
+.update16:
+ cmp counter,3
+ ble .cont16
+ sub %l7,stridex2,%o3
+
+ sub %o3,stridex,%o3
+ stx %o3,[%fp+tmp_px]
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont16
+ mov 3,counter
+
+ .align 16
+.update17:
+ sethi %hi(0x7ffffc00),%i3
+ cmp counter,3
+ ble .cont17
+ sub %l7,stridex2,%o3
+
+ add %i3,0x3ff,%i3
+
+ andcc %i4,%i3,%g0
+ bz 1f
+ sub %o3,stridex,%o3
+
+ cmp %i4,0
+ bl,a 1f
+ nop
+
+ fitod %f19,%f0
+ fdtos %f0,%f19
+ fmuls %f19,FTWO,%f19
+ st %f19,[%fp+tmp3]
+ ld [%fp+tmp3],%i4
+ sethi %hi(0x4b000000),%o3
+ sub %i4,%o3,%i4
+
+ fands %f19,DC0,%f0 ! (0_0) dfx0 = vis_fand(ddx0,DC0);
+
+ sra %i4,13,%g5 ! (1_0) si1 = ax1 >> 13;
+
+ sra %i4,24,%i0 ! (1_0) iexp1 = ax1 >> 24;
+ and %g5,2032,%o7 ! (1_0) si1 &= 0x7f0;
+ fpsub32s %f19,%f0,%f31 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0);
+
+ ldd [%o7+TBL],%f44 ! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
+ sub %l0,%i0,%i0 ! (1_0) iexp1 = 0x3f - iexp1;
+
+ sllx %i0,23,%i0 ! (1_0) lexp1 = iexp1 << 23;
+ fitod %f31,%f50 ! (1_0) dtmp0 = (double)(((int*)dfx0)[0]);
+
+ st %i0,[%fp+tmp1+4] ! (0_0) fdx0 = *((double*)lexp0);
+
+ add %o7,TBL,%o7 ! (1_0) addr0 = (char*)TBL + si0;
+ fmuld %f50,%f44,%f44 ! (1_0) xx0 = dtmp0 * tbl_div0;
+
+ ba .cont17
+ fmuld K3,%f44,%f50 ! (1_0) res1 = K3 * xx1;
+1:
+ stx %o3,[%fp+tmp_px]
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont17
+ mov 3,counter
+
+ .align 16
+.update18:
+ cmp counter,4
+ ble .cont18
+ fpadd32 %f20,%f52,%f0 ! (2_1) dres0 = vis_fpadd32(dres0,fdx0);
+
+ sub %l7,stridex2,%i3
+ stx %i3,[%fp+tmp_px]
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont18
+ mov 4,counter
+
+ .align 16
+.update19:
+ sethi %hi(0x7ffffc00),%i3
+ cmp counter,4
+ ble,a .cont19
+ fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1;
+
+ add %i3,0x3ff,%i3
+
+ andcc %g1,%i3,%g0
+ bz 1f
+ nop
+
+ cmp %g1,0
+ bl,a 1f
+ nop
+
+ fitod %f24,%f24
+ fdtos %f24,%f24
+ fmuls %f24,FTWO,%f24
+ st %f24,[%fp+tmp3]
+ ld [%fp+tmp3],%g1
+ sethi %hi(0x4b000000),%i3
+ sub %g1,%i3,%g1
+
+ fands %f24,DC0,%f8 ! (2_0) dfx0 = vis_fand(ddx0,DC0);
+ sra %g1,13,%i0 ! (2_0) si0 = ax0 >> 13;
+
+ and %i0,2032,%i0 ! (2_0) si0 &= 0x7f0;
+
+ ldd [%i0+TBL],%f30 ! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
+ fpsub32s %f24,%f8,%f12 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0);
+
+ sra %g1,24,%i3 ! (2_0) iexp0 = ax0 >> 24;
+
+ sub %l0,%i3,%g5 ! (2_0) iexp0 = 0x3f - iexp0;
+
+ sllx %g5,23,%g5 ! (2_0) lexp0 = iexp0 << 55;
+ add %i0,TBL,%i0 ! (2_0) addr0 = (char*)TBL + si0;
+ fitod %f12,%f56 ! (2_0) dtmp0 = (double)(((int*)dfx0)[0]);
+
+ st %g5,[%fp+tmp2] ! (2_0) fdx0 = *((double*)lexp0);
+ fmuld %f56,%f30,%f30 ! (2_0) xx0 = dtmp0 * tbl_div0;
+
+ ba .cont19
+ fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1;
+1:
+ sub %l7,stridex2,%i3
+ stx %i3,[%fp+tmp_px]
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ mov 4,counter
+ ba .cont19
+ fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1;
+
+ .align 16
+.update20:
+ cmp counter,5
+ ble .cont20
+ nop
+
+ sub %l7,stridex,%i3
+ stx %i3,[%fp+tmp_px]
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont20
+ mov 5,counter
+
+ .align 16
+.update21:
+ sethi %hi(0x7ffffc00),%i3
+ cmp counter,5
+ ble,a .cont21
+ nop
+
+ sub %l7,stridex,%i4
+ add %i3,0x3ff,%i3
+
+ andcc %o5,%i3,%g0
+ bz 1f
+ ld [%i4],%f8
+
+ cmp %o5,0
+ bl,a 1f
+ nop
+
+ fitod %f8,%f8
+ fdtos %f8,%f8
+ fmuls %f8,FTWO,%f8
+ st %f8,[%fp+tmp3]
+ ld [%fp+tmp3],%o5
+ sethi %hi(0x4b000000),%i3
+ sub %o5,%i3,%o5
+
+ fands %f8,DC0,%f24 ! (2_0) dfx0 = vis_fand(ddx0,DC0);
+
+ sra %o5,13,%o1 ! (3_0) si1 = ax1 >> 13;
+
+ sra %o5,24,%i3 ! (3_0) iexp1 = ax1 >> 24;
+ and %o1,2032,%o1 ! (3_0) si1 &= 0x7f0;
+ fpsub32s %f8,%f24,%f24 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0);
+
+ ldd [%o1+TBL],%f8 ! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
+ sub %l0,%i3,%i3 ! (3_0) iexp1 = 0x3f - iexp1;
+
+ sllx %i3,23,%i3 ! (3_0) lexp1 = iexp1 << 23;
+ fitod %f24,%f50 ! (3_0) dtmp1 = (double)(((int*)dfx0)[1]);
+
+ add %o1,TBL,%o1 ! (3_0) addr1 = (char*)TBL + si1;
+ st %i3,[%fp+tmp2+4] ! (2_0) fdx0 = *((double*)lexp0);
+
+ fmuld %f50,%f8,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1;
+
+ ba .cont21
+ fmuld K3,%f24,%f50 ! (3_0) res1 = K3 * xx1;
+1:
+ sub %l7,stridex,%i3
+ stx %i3,[%fp+tmp_px]
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont21
+ mov 5,counter
+
+ .align 16
+.exit:
+ ret
+ restore
+
+ SET_SIZE(__vrsqrtf)
+
diff --git a/usr/src/libm/src/mvec/vis/__vsin.S b/usr/src/libm/src/mvec/vis/__vsin.S
new file mode 100644
index 0000000..3f93d4c
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vsin.S
@@ -0,0 +1,3002 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vsin.S 1.9 06/01/23 SMI"
+
+ .file "__vsin.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 64
+constants:
+ .word 0x3ec718e3,0xa6972785
+ .word 0x3ef9fd39,0x94293940
+ .word 0xbf2a019f,0x75ee4be1
+ .word 0xbf56c16b,0xba552569
+ .word 0x3f811111,0x1108c703
+ .word 0x3fa55555,0x554f5b35
+ .word 0xbfc55555,0x555554d0
+ .word 0xbfdfffff,0xffffff85
+ .word 0x3ff00000,0x00000000
+ .word 0xbfc55555,0x5551fc28
+ .word 0x3f811107,0x62eacc9d
+ .word 0xbfdfffff,0xffff6328
+ .word 0x3fa55551,0x5f7acf0c
+ .word 0x3fe45f30,0x6dc9c883
+ .word 0x43380000,0x00000000
+ .word 0x3ff921fb,0x54400000
+ .word 0x3dd0b461,0x1a600000
+ .word 0x3ba3198a,0x2e000000
+ .word 0x397b839a,0x252049c1
+ .word 0x80000000,0x00004000
+ .word 0xffff8000,0x00000000 ! N.B.: low-order words used
+ .word 0x3fc90000,0x80000000 ! for sign bit hacking; see
+ .word 0x3fc40000,0x00000000 ! references to "thresh" below
+
+#define p4 0x0
+#define q4 0x08
+#define p3 0x10
+#define q3 0x18
+#define p2 0x20
+#define q2 0x28
+#define p1 0x30
+#define q1 0x38
+#define one 0x40
+#define pp1 0x48
+#define pp2 0x50
+#define qq1 0x58
+#define qq2 0x60
+#define invpio2 0x68
+#define round 0x70
+#define pio2_1 0x78
+#define pio2_2 0x80
+#define pio2_3 0x88
+#define pio2_3t 0x90
+#define f30val 0x98
+#define mask 0xa0
+#define thresh 0xa8
+
+! local storage indices
+
+#define xsave STACK_BIAS-0x8
+#define ysave STACK_BIAS-0x10
+#define nsave STACK_BIAS-0x14
+#define sxsave STACK_BIAS-0x18
+#define sysave STACK_BIAS-0x1c
+#define biguns STACK_BIAS-0x20
+#define n2 STACK_BIAS-0x24
+#define n1 STACK_BIAS-0x28
+#define n0 STACK_BIAS-0x2c
+#define x2_1 STACK_BIAS-0x40
+#define x1_1 STACK_BIAS-0x50
+#define x0_1 STACK_BIAS-0x60
+#define y2_0 STACK_BIAS-0x70
+#define y1_0 STACK_BIAS-0x80
+#define y0_0 STACK_BIAS-0x90
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x90
+
+!--------------------------------------------------------------
+! Some defines to keep code more readable
+#define LIM_l6 %l6
+! in primary range, contains |x| upper limit when cos(x)=1.
+! in transferring to medium range, denotes what loop was active.
+!--------------------------------------------------------------
+
+ ENTRY(__vsin)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(g5)
+ PIC_SET(g5,__vlibm_TBL_sincos_hi,l3)
+ PIC_SET(g5,__vlibm_TBL_sincos_lo,l4)
+ PIC_SET(g5,constants,l5)
+ mov %l5,%g1
+ wr %g0,0x82,%asi ! set %asi for non-faulting loads
+
+! ========== primary range ==========
+
+! register use
+
+! i0 n
+! i1 x
+! i2 stridex
+! i3 y
+! i4 stridey
+! i5 0x80000000
+
+! l0 hx0
+! l1 hx1
+! l2 hx2
+! l3 __vlibm_TBL_sincos_hi
+! l4 __vlibm_TBL_sincos_lo
+! l5 0x3fc90000
+! l6 0x3e400000
+! l7 0x3fe921fb
+
+! the following are 64-bit registers in both V8+ and V9
+
+! g1 scratch
+! g5
+
+! o0 py0
+! o1 py1
+! o2 py2
+! o3 oy0
+! o4 oy1
+! o5 oy2
+! o7 scratch
+
+! f0 x0
+! f2
+! f4
+! f6
+! f8 scratch for table base
+! f9 signbit0
+! f10 x1
+! f12
+! f14
+! f16
+! f18 scratch for table base
+! f19 signbit1
+! f20 x2
+! f22
+! f24
+! f26
+! f28 scratch for table base
+! f29 signbit2
+! f30 0x80000000
+! f31 0x4000
+! f32
+! f34
+! f36
+! f38
+! f40
+! f42
+! f44 0xffff800000000000
+! f46 p1
+! f48 p2
+! f50 p3
+! f52 p4
+! f54 one
+! f56 pp1
+! f58 pp2
+! f60 qq1
+! f62 qq2
+
+#ifdef __sparcv9
+ stx %i1,[%fp+xsave] ! save arguments
+ stx %i3,[%fp+ysave]
+#else
+ st %i1,[%fp+xsave] ! save arguments
+ st %i3,[%fp+ysave]
+#endif
+ st %i0,[%fp+nsave]
+ st %i2,[%fp+sxsave]
+ st %i4,[%fp+sysave]
+ sethi %hi(0x80000000),%i5 ! load/set up constants
+ sethi %hi(0x3fc90000),%l5
+ sethi %hi(0x3e400000),LIM_l6
+ sethi %hi(0x3fe921fb),%l7
+ or %l7,%lo(0x3fe921fb),%l7
+ ldd [%g1+f30val],%f30
+ ldd [%g1+mask],%f44
+ ldd [%g1+p1],%f46
+ ldd [%g1+p2],%f48
+ ldd [%g1+p3],%f50
+ ldd [%g1+p4],%f52
+ ldd [%g1+one],%f54
+ ldd [%g1+pp1],%f56
+ ldd [%g1+pp2],%f58
+ ldd [%g1+qq1],%f60
+ ldd [%g1+qq2],%f62
+ sll %i2,3,%i2 ! scale strides
+ sll %i4,3,%i4
+ add %fp,x0_1,%o3 ! precondition loop
+ add %fp,x0_1,%o4
+ add %fp,x0_1,%o5
+ ld [%i1],%l0 ! hx = *x
+ ld [%i1],%f0
+ ld [%i1+4],%f1
+ andn %l0,%i5,%l0 ! hx &= ~0x80000000
+ add %i1,%i2,%i1 ! x += stridex
+
+ ba,pt %icc,.loop0
+! delay slot
+ nop
+
+ .align 32
+.loop0:
+ lda [%i1]%asi,%l1 ! preload next argument
+ sub %l0,LIM_l6,%g1
+ sub %l7,%l0,%o7
+ fands %f0,%f30,%f9 ! save signbit
+
+ lda [%i1]%asi,%f10
+ orcc %o7,%g1,%g0
+ mov %i3,%o0 ! py0 = y
+ bl,pn %icc,.range0 ! if hx < 0x3e400000 or > 0x3fe921fb
+
+! delay slot
+ lda [%i1+4]%asi,%f11
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! y += stridey
+ ble,pn %icc,.endloop1
+
+! delay slot
+ andn %l1,%i5,%l1
+ add %i1,%i2,%i1 ! x += stridex
+ fabsd %f0,%f0
+ fmuld %f54,%f54,%f54 ! one*one; a nop for alignment only
+
+.loop1:
+ lda [%i1]%asi,%l2 ! preload next argument
+ sub %l1,LIM_l6,%g1
+ sub %l7,%l1,%o7
+ fands %f10,%f30,%f19 ! save signbit
+
+ lda [%i1]%asi,%f20
+ orcc %o7,%g1,%g0
+ mov %i3,%o1 ! py1 = y
+ bl,pn %icc,.range1 ! if hx < 0x3e400000 or > 0x3fe921fb
+
+! delay slot
+ lda [%i1+4]%asi,%f21
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! y += stridey
+ ble,pn %icc,.endloop2
+
+! delay slot
+ andn %l2,%i5,%l2
+ add %i1,%i2,%i1 ! x += stridex
+ fabsd %f10,%f10
+ fmuld %f54,%f54,%f54 ! one*one; a nop for alignment only
+
+.loop2:
+ st %f6,[%o3]
+ sub %l2,LIM_l6,%g1
+ sub %l7,%l2,%o7
+ fands %f20,%f30,%f29 ! save signbit
+
+ st %f7,[%o3+4]
+ orcc %g1,%o7,%g0
+ mov %i3,%o2 ! py2 = y
+ bl,pn %icc,.range2 ! if hx < 0x3e400000 or > 0x3fe921fb
+
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+ cmp %l0,%l5
+ fabsd %f20,%f20
+ bl,pn %icc,.case4
+
+! delay slot
+ st %f16,[%o4]
+ cmp %l1,%l5
+ fpadd32s %f0,%f31,%f8
+ bl,pn %icc,.case2
+
+! delay slot
+ st %f17,[%o4+4]
+ cmp %l2,%l5
+ fpadd32s %f10,%f31,%f18
+ bl,pn %icc,.case1
+
+! delay slot
+ st %f26,[%o5]
+ mov %o0,%o3
+ sethi %hi(0x3fc3c000),%o7
+ fpadd32s %f20,%f31,%f28
+
+ st %f27,[%o5+4]
+ fand %f8,%f44,%f2
+ mov %o1,%o4
+
+ fand %f18,%f44,%f12
+ mov %o2,%o5
+ sub %l0,%o7,%l0
+
+ fand %f28,%f44,%f22
+ sub %l1,%o7,%l1
+ sub %l2,%o7,%l2
+
+ fsubd %f0,%f2,%f0
+ srl %l0,10,%l0
+ add %l3,8,%g1
+
+ fsubd %f10,%f12,%f10
+ srl %l1,10,%l1
+
+ fsubd %f20,%f22,%f20
+ srl %l2,10,%l2
+
+ fmuld %f0,%f0,%f2
+ andn %l0,0x1f,%l0
+
+ fmuld %f10,%f10,%f12
+ andn %l1,0x1f,%l1
+
+ fmuld %f20,%f20,%f22
+ andn %l2,0x1f,%l2
+
+ fmuld %f2,%f58,%f6
+ ldd [%l3+%l0],%f32
+
+ fmuld %f12,%f58,%f16
+ ldd [%l3+%l1],%f36
+
+ fmuld %f22,%f58,%f26
+ ldd [%l3+%l2],%f40
+
+ faddd %f6,%f56,%f6
+ fmuld %f2,%f62,%f4
+ ldd [%g1+%l0],%f34
+
+ faddd %f16,%f56,%f16
+ fmuld %f12,%f62,%f14
+ ldd [%g1+%l1],%f38
+
+ faddd %f26,%f56,%f26
+ fmuld %f22,%f62,%f24
+ ldd [%g1+%l2],%f42
+
+ fmuld %f2,%f6,%f6
+ faddd %f4,%f60,%f4
+
+ fmuld %f12,%f16,%f16
+ faddd %f14,%f60,%f14
+
+ fmuld %f22,%f26,%f26
+ faddd %f24,%f60,%f24
+
+ faddd %f6,%f54,%f6
+ fmuld %f2,%f4,%f4
+
+ faddd %f16,%f54,%f16
+ fmuld %f12,%f14,%f14
+
+ faddd %f26,%f54,%f26
+ fmuld %f22,%f24,%f24
+
+ fmuld %f0,%f6,%f6
+ ldd [%l4+%l0],%f2
+
+ fmuld %f10,%f16,%f16
+ ldd [%l4+%l1],%f12
+
+ fmuld %f20,%f26,%f26
+ ldd [%l4+%l2],%f22
+
+ fmuld %f4,%f32,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fmuld %f14,%f36,%f14
+ lda [%i1]%asi,%f0
+
+ fmuld %f24,%f40,%f24
+ lda [%i1+4]%asi,%f1
+
+ fmuld %f6,%f34,%f6
+ add %i1,%i2,%i1 ! x += stridex
+
+ fmuld %f16,%f38,%f16
+
+ fmuld %f26,%f42,%f26
+
+ faddd %f6,%f4,%f6
+
+ faddd %f16,%f14,%f16
+
+ faddd %f26,%f24,%f26
+
+ faddd %f6,%f2,%f6
+
+ faddd %f16,%f12,%f16
+
+ faddd %f26,%f22,%f26
+
+ faddd %f6,%f32,%f6
+
+ faddd %f16,%f36,%f16
+
+ faddd %f26,%f40,%f26
+ andn %l0,%i5,%l0 ! hx &= ~0x80000000
+
+ fors %f6,%f9,%f6
+ addcc %i0,-1,%i0
+
+ fors %f16,%f19,%f16
+ bg,pt %icc,.loop0
+
+! delay slot
+ fors %f26,%f29,%f26
+
+ ba,pt %icc,.endloop0
+! delay slot
+ nop
+
+ .align 32
+.case1:
+ st %f27,[%o5+4]
+ sethi %hi(0x3fc3c000),%o7
+ add %l3,8,%g1
+ fand %f8,%f44,%f2
+
+ sub %l0,%o7,%l0
+ sub %l1,%o7,%l1
+ fand %f18,%f44,%f12
+ fmuld %f20,%f20,%f22
+
+ fsubd %f0,%f2,%f0
+ srl %l0,10,%l0
+ mov %o0,%o3
+
+ fsubd %f10,%f12,%f10
+ srl %l1,10,%l1
+ mov %o1,%o4
+
+ fmuld %f22,%f52,%f24
+ mov %o2,%o5
+
+ fmuld %f0,%f0,%f2
+ andn %l0,0x1f,%l0
+
+ fmuld %f10,%f10,%f12
+ andn %l1,0x1f,%l1
+
+ faddd %f24,%f50,%f24
+
+ fmuld %f2,%f58,%f6
+ ldd [%l3+%l0],%f32
+
+ fmuld %f12,%f58,%f16
+ ldd [%l3+%l1],%f36
+
+ fmuld %f22,%f24,%f24
+
+ faddd %f6,%f56,%f6
+ fmuld %f2,%f62,%f4
+ ldd [%g1+%l0],%f34
+
+ faddd %f16,%f56,%f16
+ fmuld %f12,%f62,%f14
+ ldd [%g1+%l1],%f38
+
+ faddd %f24,%f48,%f24
+
+ fmuld %f2,%f6,%f6
+ faddd %f4,%f60,%f4
+
+ fmuld %f12,%f16,%f16
+ faddd %f14,%f60,%f14
+
+ fmuld %f22,%f24,%f24
+
+ faddd %f6,%f54,%f6
+ fmuld %f2,%f4,%f4
+
+ faddd %f16,%f54,%f16
+ fmuld %f12,%f14,%f14
+
+ faddd %f24,%f46,%f24
+
+ fmuld %f0,%f6,%f6
+ ldd [%l4+%l0],%f2
+
+ fmuld %f10,%f16,%f16
+ ldd [%l4+%l1],%f12
+
+ fmuld %f4,%f32,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fmuld %f14,%f36,%f14
+ lda [%i1]%asi,%f0
+
+ fmuld %f6,%f34,%f6
+ lda [%i1+4]%asi,%f1
+
+ fmuld %f16,%f38,%f16
+ add %i1,%i2,%i1 ! x += stridex
+
+ fmuld %f22,%f24,%f24
+
+ faddd %f6,%f4,%f6
+
+ faddd %f16,%f14,%f16
+
+ fmuld %f20,%f24,%f24
+
+ faddd %f6,%f2,%f6
+
+ faddd %f16,%f12,%f16
+
+ faddd %f20,%f24,%f26
+
+ faddd %f6,%f32,%f6
+
+ faddd %f16,%f36,%f16
+ andn %l0,%i5,%l0 ! hx &= ~0x80000000
+
+ fors %f26,%f29,%f26
+ addcc %i0,-1,%i0
+
+ fors %f6,%f9,%f6
+ bg,pt %icc,.loop0
+
+! delay slot
+ fors %f16,%f19,%f16
+
+ ba,pt %icc,.endloop0
+! delay slot
+ nop
+
+ .align 32
+.case2:
+ st %f26,[%o5]
+ cmp %l2,%l5
+ fpadd32s %f20,%f31,%f28
+ bl,pn %icc,.case3
+
+! delay slot
+ st %f27,[%o5+4]
+ sethi %hi(0x3fc3c000),%o7
+ add %l3,8,%g1
+ fand %f8,%f44,%f2
+
+ sub %l0,%o7,%l0
+ sub %l2,%o7,%l2
+ fand %f28,%f44,%f22
+ fmuld %f10,%f10,%f12
+
+ fsubd %f0,%f2,%f0
+ srl %l0,10,%l0
+ mov %o0,%o3
+
+ fsubd %f20,%f22,%f20
+ srl %l2,10,%l2
+ mov %o2,%o5
+
+ fmuld %f12,%f52,%f14
+ mov %o1,%o4
+
+ fmuld %f0,%f0,%f2
+ andn %l0,0x1f,%l0
+
+ fmuld %f20,%f20,%f22
+ andn %l2,0x1f,%l2
+
+ faddd %f14,%f50,%f14
+
+ fmuld %f2,%f58,%f6
+ ldd [%l3+%l0],%f32
+
+ fmuld %f22,%f58,%f26
+ ldd [%l3+%l2],%f40
+
+ fmuld %f12,%f14,%f14
+
+ faddd %f6,%f56,%f6
+ fmuld %f2,%f62,%f4
+ ldd [%g1+%l0],%f34
+
+ faddd %f26,%f56,%f26
+ fmuld %f22,%f62,%f24
+ ldd [%g1+%l2],%f42
+
+ faddd %f14,%f48,%f14
+
+ fmuld %f2,%f6,%f6
+ faddd %f4,%f60,%f4
+
+ fmuld %f22,%f26,%f26
+ faddd %f24,%f60,%f24
+
+ fmuld %f12,%f14,%f14
+
+ faddd %f6,%f54,%f6
+ fmuld %f2,%f4,%f4
+
+ faddd %f26,%f54,%f26
+ fmuld %f22,%f24,%f24
+
+ faddd %f14,%f46,%f14
+
+ fmuld %f0,%f6,%f6
+ ldd [%l4+%l0],%f2
+
+ fmuld %f20,%f26,%f26
+ ldd [%l4+%l2],%f22
+
+ fmuld %f4,%f32,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fmuld %f24,%f40,%f24
+ lda [%i1]%asi,%f0
+
+ fmuld %f6,%f34,%f6
+ lda [%i1+4]%asi,%f1
+
+ fmuld %f26,%f42,%f26
+ add %i1,%i2,%i1 ! x += stridex
+
+ fmuld %f12,%f14,%f14
+
+ faddd %f6,%f4,%f6
+
+ faddd %f26,%f24,%f26
+
+ fmuld %f10,%f14,%f14
+
+ faddd %f6,%f2,%f6
+
+ faddd %f26,%f22,%f26
+
+ faddd %f10,%f14,%f16
+
+ faddd %f6,%f32,%f6
+
+ faddd %f26,%f40,%f26
+ andn %l0,%i5,%l0 ! hx &= ~0x80000000
+
+ fors %f16,%f19,%f16
+ addcc %i0,-1,%i0
+
+ fors %f6,%f9,%f6
+ bg,pt %icc,.loop0
+
+! delay slot
+ fors %f26,%f29,%f26
+
+ ba,pt %icc,.endloop0
+! delay slot
+ nop
+
+ .align 32
+.case3:
+ sethi %hi(0x3fc3c000),%o7
+ add %l3,8,%g1
+ fand %f8,%f44,%f2
+ fmuld %f10,%f10,%f12
+
+ sub %l0,%o7,%l0
+ fmuld %f20,%f20,%f22
+
+ fsubd %f0,%f2,%f0
+ srl %l0,10,%l0
+ mov %o0,%o3
+
+ fmuld %f12,%f52,%f14
+ mov %o1,%o4
+
+ fmuld %f22,%f52,%f24
+ mov %o2,%o5
+
+ fmuld %f0,%f0,%f2
+ andn %l0,0x1f,%l0
+
+ faddd %f14,%f50,%f14
+
+ faddd %f24,%f50,%f24
+
+ fmuld %f2,%f58,%f6
+ ldd [%l3+%l0],%f32
+
+ fmuld %f12,%f14,%f14
+
+ fmuld %f22,%f24,%f24
+
+ faddd %f6,%f56,%f6
+ fmuld %f2,%f62,%f4
+ ldd [%g1+%l0],%f34
+
+ faddd %f14,%f48,%f14
+
+ faddd %f24,%f48,%f24
+
+ fmuld %f2,%f6,%f6
+ faddd %f4,%f60,%f4
+
+ fmuld %f12,%f14,%f14
+
+ fmuld %f22,%f24,%f24
+
+ faddd %f6,%f54,%f6
+ fmuld %f2,%f4,%f4
+
+ faddd %f14,%f46,%f14
+
+ faddd %f24,%f46,%f24
+
+ fmuld %f0,%f6,%f6
+ ldd [%l4+%l0],%f2
+
+ fmuld %f4,%f32,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fmuld %f12,%f14,%f14
+ lda [%i1]%asi,%f0
+
+ fmuld %f6,%f34,%f6
+ lda [%i1+4]%asi,%f1
+
+ fmuld %f22,%f24,%f24
+ add %i1,%i2,%i1 ! x += stridex
+
+ fmuld %f10,%f14,%f14
+
+ faddd %f6,%f4,%f6
+
+ fmuld %f20,%f24,%f24
+
+ faddd %f10,%f14,%f16
+
+ faddd %f6,%f2,%f6
+
+ faddd %f20,%f24,%f26
+
+ fors %f16,%f19,%f16
+ andn %l0,%i5,%l0 ! hx &= ~0x80000000
+
+ faddd %f6,%f32,%f6
+ addcc %i0,-1,%i0
+
+ fors %f26,%f29,%f26
+ bg,pt %icc,.loop0
+
+! delay slot
+ fors %f6,%f9,%f6
+
+ ba,pt %icc,.endloop0
+! delay slot
+ nop
+
+ .align 32
+.case4:
+ st %f17,[%o4+4]
+ cmp %l1,%l5
+ fpadd32s %f10,%f31,%f18
+ bl,pn %icc,.case6
+
+! delay slot
+ st %f26,[%o5]
+ cmp %l2,%l5
+ fpadd32s %f20,%f31,%f28
+ bl,pn %icc,.case5
+
+! delay slot
+ st %f27,[%o5+4]
+ sethi %hi(0x3fc3c000),%o7
+ add %l3,8,%g1
+ fand %f18,%f44,%f12
+
+ sub %l1,%o7,%l1
+ sub %l2,%o7,%l2
+ fand %f28,%f44,%f22
+ fmuld %f0,%f0,%f2
+
+ fsubd %f10,%f12,%f10
+ srl %l1,10,%l1
+ mov %o1,%o4
+
+ fsubd %f20,%f22,%f20
+ srl %l2,10,%l2
+ mov %o2,%o5
+
+ fmovd %f0,%f6
+ fmuld %f2,%f52,%f4
+ mov %o0,%o3
+
+ fmuld %f10,%f10,%f12
+ andn %l1,0x1f,%l1
+
+ fmuld %f20,%f20,%f22
+ andn %l2,0x1f,%l2
+
+ faddd %f4,%f50,%f4
+
+ fmuld %f12,%f58,%f16
+ ldd [%l3+%l1],%f36
+
+ fmuld %f22,%f58,%f26
+ ldd [%l3+%l2],%f40
+
+ fmuld %f2,%f4,%f4
+
+ faddd %f16,%f56,%f16
+ fmuld %f12,%f62,%f14
+ ldd [%g1+%l1],%f38
+
+ faddd %f26,%f56,%f26
+ fmuld %f22,%f62,%f24
+ ldd [%g1+%l2],%f42
+
+ faddd %f4,%f48,%f4
+
+ fmuld %f12,%f16,%f16
+ faddd %f14,%f60,%f14
+
+ fmuld %f22,%f26,%f26
+ faddd %f24,%f60,%f24
+
+ fmuld %f2,%f4,%f4
+
+ faddd %f16,%f54,%f16
+ fmuld %f12,%f14,%f14
+
+ faddd %f26,%f54,%f26
+ fmuld %f22,%f24,%f24
+
+ faddd %f4,%f46,%f4
+
+ fmuld %f10,%f16,%f16
+ ldd [%l4+%l1],%f12
+
+ fmuld %f20,%f26,%f26
+ ldd [%l4+%l2],%f22
+
+ fmuld %f14,%f36,%f14
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fmuld %f24,%f40,%f24
+ lda [%i1]%asi,%f0
+
+ fmuld %f16,%f38,%f16
+ lda [%i1+4]%asi,%f1
+
+ fmuld %f26,%f42,%f26
+ add %i1,%i2,%i1 ! x += stridex
+
+ fmuld %f2,%f4,%f4
+
+ faddd %f16,%f14,%f16
+
+ faddd %f26,%f24,%f26
+
+ fmuld %f6,%f4,%f4
+
+ faddd %f16,%f12,%f16
+
+ faddd %f26,%f22,%f26
+
+ faddd %f6,%f4,%f6
+
+ faddd %f16,%f36,%f16
+
+ faddd %f26,%f40,%f26
+ andn %l0,%i5,%l0 ! hx &= ~0x80000000
+
+ fors %f6,%f9,%f6
+ addcc %i0,-1,%i0
+
+ fors %f16,%f19,%f16
+ bg,pt %icc,.loop0
+
+! delay slot
+ fors %f26,%f29,%f26
+
+ ba,pt %icc,.endloop0
+! delay slot
+ nop
+
+ .align 32
+.case5:
+ sethi %hi(0x3fc3c000),%o7
+ add %l3,8,%g1
+ fand %f18,%f44,%f12
+ fmuld %f0,%f0,%f2
+
+ sub %l1,%o7,%l1
+ fmuld %f20,%f20,%f22
+
+ fsubd %f10,%f12,%f10
+ srl %l1,10,%l1
+ mov %o1,%o4
+
+ fmovd %f0,%f6
+ fmuld %f2,%f52,%f4
+ mov %o0,%o3
+
+ fmuld %f22,%f52,%f24
+ mov %o2,%o5
+
+ fmuld %f10,%f10,%f12
+ andn %l1,0x1f,%l1
+
+ faddd %f4,%f50,%f4
+
+ faddd %f24,%f50,%f24
+
+ fmuld %f12,%f58,%f16
+ ldd [%l3+%l1],%f36
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f22,%f24,%f24
+
+ faddd %f16,%f56,%f16
+ fmuld %f12,%f62,%f14
+ ldd [%g1+%l1],%f38
+
+ faddd %f4,%f48,%f4
+
+ faddd %f24,%f48,%f24
+
+ fmuld %f12,%f16,%f16
+ faddd %f14,%f60,%f14
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f22,%f24,%f24
+
+ faddd %f16,%f54,%f16
+ fmuld %f12,%f14,%f14
+
+ faddd %f4,%f46,%f4
+
+ faddd %f24,%f46,%f24
+
+ fmuld %f10,%f16,%f16
+ ldd [%l4+%l1],%f12
+
+ fmuld %f14,%f36,%f14
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fmuld %f2,%f4,%f4
+ lda [%i1]%asi,%f0
+
+ fmuld %f16,%f38,%f16
+ lda [%i1+4]%asi,%f1
+
+ fmuld %f22,%f24,%f24
+ add %i1,%i2,%i1 ! x += stridex
+
+ fmuld %f6,%f4,%f4
+
+ faddd %f16,%f14,%f16
+
+ fmuld %f20,%f24,%f24
+
+ faddd %f6,%f4,%f6
+
+ faddd %f16,%f12,%f16
+
+ faddd %f20,%f24,%f26
+
+ fors %f6,%f9,%f6
+ andn %l0,%i5,%l0 ! hx &= ~0x80000000
+
+ faddd %f16,%f36,%f16
+ addcc %i0,-1,%i0
+
+ fors %f26,%f29,%f26
+ bg,pt %icc,.loop0
+
+! delay slot
+ fors %f16,%f19,%f16
+
+ ba,pt %icc,.endloop0
+! delay slot
+ nop
+
+ .align 32
+.case6:
+ st %f27,[%o5+4]
+ cmp %l2,%l5
+ fpadd32s %f20,%f31,%f28
+ bl,pn %icc,.case7
+
+! delay slot
+ sethi %hi(0x3fc3c000),%o7
+ add %l3,8,%g1
+ fand %f28,%f44,%f22
+ fmuld %f0,%f0,%f2
+
+ sub %l2,%o7,%l2
+ fmuld %f10,%f10,%f12
+
+ fsubd %f20,%f22,%f20
+ srl %l2,10,%l2
+ mov %o2,%o5
+
+ fmovd %f0,%f6
+ fmuld %f2,%f52,%f4
+ mov %o0,%o3
+
+ fmuld %f12,%f52,%f14
+ mov %o1,%o4
+
+ fmuld %f20,%f20,%f22
+ andn %l2,0x1f,%l2
+
+ faddd %f4,%f50,%f4
+
+ faddd %f14,%f50,%f14
+
+ fmuld %f22,%f58,%f26
+ ldd [%l3+%l2],%f40
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f12,%f14,%f14
+
+ faddd %f26,%f56,%f26
+ fmuld %f22,%f62,%f24
+ ldd [%g1+%l2],%f42
+
+ faddd %f4,%f48,%f4
+
+ faddd %f14,%f48,%f14
+
+ fmuld %f22,%f26,%f26
+ faddd %f24,%f60,%f24
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f12,%f14,%f14
+
+ faddd %f26,%f54,%f26
+ fmuld %f22,%f24,%f24
+
+ faddd %f4,%f46,%f4
+
+ faddd %f14,%f46,%f14
+
+ fmuld %f20,%f26,%f26
+ ldd [%l4+%l2],%f22
+
+ fmuld %f24,%f40,%f24
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fmuld %f2,%f4,%f4
+ lda [%i1]%asi,%f0
+
+ fmuld %f26,%f42,%f26
+ lda [%i1+4]%asi,%f1
+
+ fmuld %f12,%f14,%f14
+ add %i1,%i2,%i1 ! x += stridex
+
+ fmuld %f6,%f4,%f4
+
+ faddd %f26,%f24,%f26
+
+ fmuld %f10,%f14,%f14
+
+ faddd %f6,%f4,%f6
+
+ faddd %f26,%f22,%f26
+
+ faddd %f10,%f14,%f16
+
+ fors %f6,%f9,%f6
+ andn %l0,%i5,%l0 ! hx &= ~0x80000000
+
+ faddd %f26,%f40,%f26
+ addcc %i0,-1,%i0
+
+ fors %f16,%f19,%f16
+ bg,pt %icc,.loop0
+
+! delay slot
+ fors %f26,%f29,%f26
+
+ ba,pt %icc,.endloop0
+! delay slot
+ nop
+
+ .align 32
+.case7:
+ fmuld %f0,%f0,%f2
+ fmovd %f0,%f6
+ mov %o0,%o3
+
+ fmuld %f10,%f10,%f12
+ mov %o1,%o4
+
+ fmuld %f20,%f20,%f22
+ mov %o2,%o5
+
+ fmuld %f2,%f52,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fmuld %f12,%f52,%f14
+ lda [%i1]%asi,%f0
+
+ fmuld %f22,%f52,%f24
+ lda [%i1+4]%asi,%f1
+
+ faddd %f4,%f50,%f4
+ add %i1,%i2,%i1 ! x += stridex
+
+ faddd %f14,%f50,%f14
+
+ faddd %f24,%f50,%f24
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f12,%f14,%f14
+
+ fmuld %f22,%f24,%f24
+
+ faddd %f4,%f48,%f4
+
+ faddd %f14,%f48,%f14
+
+ faddd %f24,%f48,%f24
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f12,%f14,%f14
+
+ fmuld %f22,%f24,%f24
+
+ faddd %f4,%f46,%f4
+
+ faddd %f14,%f46,%f14
+
+ faddd %f24,%f46,%f24
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f12,%f14,%f14
+
+ fmuld %f22,%f24,%f24
+
+ fmuld %f6,%f4,%f4
+
+ fmuld %f10,%f14,%f14
+
+ fmuld %f20,%f24,%f24
+
+ faddd %f6,%f4,%f6
+
+ faddd %f10,%f14,%f16
+
+ faddd %f20,%f24,%f26
+ andn %l0,%i5,%l0 ! hx &= ~0x80000000
+
+ fors %f6,%f9,%f6
+ addcc %i0,-1,%i0
+
+ fors %f16,%f19,%f16
+ bg,pt %icc,.loop0
+
+! delay slot
+ fors %f26,%f29,%f26
+
+ ba,pt %icc,.endloop0
+! delay slot
+ nop
+
+
+ .align 32
+.endloop2:
+ cmp %l1,%l5
+ bl,pn %icc,1f
+! delay slot
+ fabsd %f10,%f10
+ sethi %hi(0x3fc3c000),%o7
+ fpadd32s %f10,%f31,%f18
+ add %l3,8,%g1
+ fand %f18,%f44,%f12
+ sub %l1,%o7,%l1
+ fsubd %f10,%f12,%f10
+ srl %l1,10,%l1
+ fmuld %f10,%f10,%f12
+ andn %l1,0x1f,%l1
+ fmuld %f12,%f58,%f20
+ ldd [%l3+%l1],%f36
+ faddd %f20,%f56,%f20
+ fmuld %f12,%f62,%f14
+ ldd [%g1+%l1],%f38
+ fmuld %f12,%f20,%f20
+ faddd %f14,%f60,%f14
+ faddd %f20,%f54,%f20
+ fmuld %f12,%f14,%f14
+ fmuld %f10,%f20,%f20
+ ldd [%l4+%l1],%f12
+ fmuld %f14,%f36,%f14
+ fmuld %f20,%f38,%f20
+ faddd %f20,%f14,%f20
+ faddd %f20,%f12,%f20
+ ba,pt %icc,2f
+! delay slot
+ faddd %f20,%f36,%f20
+1:
+ fmuld %f10,%f10,%f12
+ fmuld %f12,%f52,%f14
+ faddd %f14,%f50,%f14
+ fmuld %f12,%f14,%f14
+ faddd %f14,%f48,%f14
+ fmuld %f12,%f14,%f14
+ faddd %f14,%f46,%f14
+ fmuld %f12,%f14,%f14
+ fmuld %f10,%f14,%f14
+ faddd %f10,%f14,%f20
+2:
+ fors %f20,%f19,%f20
+ st %f20,[%o1]
+ st %f21,[%o1+4]
+
+.endloop1:
+ cmp %l0,%l5
+ bl,pn %icc,1f
+! delay slot
+ fabsd %f0,%f0
+ sethi %hi(0x3fc3c000),%o7
+ fpadd32s %f0,%f31,%f8
+ add %l3,8,%g1
+ fand %f8,%f44,%f2
+ sub %l0,%o7,%l0
+ fsubd %f0,%f2,%f0
+ srl %l0,10,%l0
+ fmuld %f0,%f0,%f2
+ andn %l0,0x1f,%l0
+ fmuld %f2,%f58,%f20
+ ldd [%l3+%l0],%f32
+ faddd %f20,%f56,%f20
+ fmuld %f2,%f62,%f4
+ ldd [%g1+%l0],%f34
+ fmuld %f2,%f20,%f20
+ faddd %f4,%f60,%f4
+ faddd %f20,%f54,%f20
+ fmuld %f2,%f4,%f4
+ fmuld %f0,%f20,%f20
+ ldd [%l4+%l0],%f2
+ fmuld %f4,%f32,%f4
+ fmuld %f20,%f34,%f20
+ faddd %f20,%f4,%f20
+ faddd %f20,%f2,%f20
+ ba,pt %icc,2f
+! delay slot
+ faddd %f20,%f32,%f20
+1:
+ fmuld %f0,%f0,%f2
+ fmuld %f2,%f52,%f4
+ faddd %f4,%f50,%f4
+ fmuld %f2,%f4,%f4
+ faddd %f4,%f48,%f4
+ fmuld %f2,%f4,%f4
+ faddd %f4,%f46,%f4
+ fmuld %f2,%f4,%f4
+ fmuld %f0,%f4,%f4
+ faddd %f0,%f4,%f20
+2:
+ fors %f20,%f9,%f20
+ st %f20,[%o0]
+ st %f21,[%o0+4]
+
+.endloop0:
+ st %f6,[%o3]
+ st %f7,[%o3+4]
+ st %f16,[%o4]
+ st %f17,[%o4+4]
+ st %f26,[%o5]
+ st %f27,[%o5+4]
+
+! return. finished off with only primary range arguments.
+
+ ret
+ restore
+
+
+ .align 32
+.range0:
+ cmp %l0,LIM_l6
+ bg,a,pt %icc,.MEDIUM ! branch if x is not tiny
+! delay slot, annulled if branch not taken
+ mov 0x1,LIM_l6 ! set "processing loop0"
+ st %f0,[%o0] ! *y = *x with inexact if x nonzero
+ st %f1,[%o0+4]
+ fdtoi %f0,%f2
+ addcc %i0,-1,%i0
+ ble,pn %icc,.endloop0
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! y += stridey
+ andn %l1,%i5,%l0 ! hx &= ~0x80000000
+ fmovd %f10,%f0
+ ba,pt %icc,.loop0
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 32
+.range1:
+ cmp %l1,LIM_l6
+ bg,a,pt %icc,.MEDIUM ! branch if x is not tiny
+! delay slot, annulled if branch not taken
+ mov 0x2,LIM_l6 ! set "processing loop1"
+ st %f10,[%o1] ! *y = *x with inexact if x nonzero
+ st %f11,[%o1+4]
+ fdtoi %f10,%f12
+ addcc %i0,-1,%i0
+ ble,pn %icc,.endloop1
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! y += stridey
+ andn %l2,%i5,%l1 ! hx &= ~0x80000000
+ fmovd %f20,%f10
+ ba,pt %icc,.loop1
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 32
+.range2:
+ cmp %l2,LIM_l6
+ bg,a,pt %icc,.MEDIUM ! branch if x is not tiny
+! delay slot, annulled if branch not taken
+ mov 0x3,LIM_l6 ! set "processing loop2"
+ st %f20,[%o2] ! *y = *x with inexact if x nonzero
+ st %f21,[%o2+4]
+ fdtoi %f20,%f22
+1:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.endloop2
+! delay slot
+ nop
+ ld [%i1],%l2
+ ld [%i1],%f20
+ ld [%i1+4],%f21
+ andn %l2,%i5,%l2 ! hx &= ~0x80000000
+ ba,pt %icc,.loop2
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 32
+.MEDIUM:
+
+! ========== medium range ==========
+
+! register use
+
+! i0 n
+! i1 x
+! i2 stridex
+! i3 y
+! i4 stridey
+! i5 0x80000000
+
+! l0 hx0
+! l1 hx1
+! l2 hx2
+! l3 __vlibm_TBL_sincos_hi
+! l4 __vlibm_TBL_sincos_lo
+! l5 constants
+! l6 in transition from pri-range and here, use for biguns
+! l7 0x413921fb
+
+! the following are 64-bit registers in both V8+ and V9
+
+! g1 scratch
+! g5
+
+! o0 py0
+! o1 py1
+! o2 py2
+! o3 n0
+! o4 n1
+! o5 n2
+! o7 scratch
+
+! f0 x0
+! f2 n0,y0
+! f4
+! f6
+! f8 scratch for table base
+! f9 signbit0
+! f10 x1
+! f12 n1,y1
+! f14
+! f16
+! f18 scratch for table base
+! f19 signbit1
+! f20 x2
+! f22 n2,y2
+! f24
+! f26
+! f28 scratch for table base
+! f29 signbit2
+! f30 0x80000000
+! f31 0x4000
+! f32
+! f34
+! f36
+! f38
+! f40 invpio2
+! f42 round
+! f44 0xffff800000000000
+! f46 pio2_1
+! f48 pio2_2
+! f50 pio2_3
+! f52 pio2_3t
+! f54 one
+! f56 pp1
+! f58 pp2
+! f60 qq1
+! f62 qq2
+
+ PIC_SET(g5,constants,l5)
+
+ ! %o3,%o4,%o5 need to be stored
+ st %f6,[%o3]
+ sethi %hi(0x413921fb),%l7
+ st %f7,[%o3+4]
+ or %l7,%lo(0x413921fb),%l7
+ st %f16,[%o4]
+ st %f17,[%o4+4]
+ st %f26,[%o5]
+ st %f27,[%o5+4]
+ ldd [%l5+invpio2],%f40
+ ldd [%l5+round],%f42
+ ldd [%l5+pio2_1],%f46
+ ldd [%l5+pio2_2],%f48
+ ldd [%l5+pio2_3],%f50
+ ldd [%l5+pio2_3t],%f52
+ std %f54,[%fp+x0_1+8] ! set up stack data
+ std %f54,[%fp+x1_1+8]
+ std %f54,[%fp+x2_1+8]
+ stx %g0,[%fp+y0_0+8]
+ stx %g0,[%fp+y1_0+8]
+ stx %g0,[%fp+y2_0+8]
+
+! branched here in the middle of the array. Need to adjust
+! for the members of the triple that were selected in the primary
+! loop.
+
+! no adjustment since all three selected here
+ subcc LIM_l6,0x1,%g0 ! continue in LOOP0?
+ bz,a %icc,.LOOP0
+ mov 0x0,LIM_l6 ! delay slot set biguns=0
+
+! ajust 1st triple since 2d and 3d done here
+ subcc LIM_l6,0x2,%g0 ! continue in LOOP1?
+ fors %f0,%f9,%f0 ! restore sign bit
+ fmuld %f0,%f40,%f2 ! adj LOOP0
+ bz,a %icc,.LOOP1
+ mov 0x0,LIM_l6 ! delay slot set biguns=0
+
+! ajust 1st and 2d triple since 3d done here
+ subcc LIM_l6,0x3,%g0 ! continue in LOOP2?
+ !done fmuld %f0,%f40,%f2 ! adj LOOP0
+ sub %i3,%i4,%i3 ! adjust to not double increment
+ fors %f10,%f19,%f10 ! restore sign bit
+ fmuld %f10,%f40,%f12 ! adj LOOP1
+ faddd %f2,%f42,%f2 ! adj LOOP1
+ bz,a %icc,.LOOP2
+ mov 0x0,LIM_l6 ! delay slot set biguns=0
+
+ .align 32
+.LOOP0:
+ lda [%i1]%asi,%l1 ! preload next argument
+ mov %i3,%o0 ! py0 = y
+ lda [%i1]%asi,%f10
+ cmp %l0,%l7
+ add %i3,%i4,%i3 ! y += stridey
+ bg,pn %icc,.BIG0 ! if hx > 0x413921fb
+
+! delay slot
+ lda [%i1+4]%asi,%f11
+ addcc %i0,-1,%i0
+ add %i1,%i2,%i1 ! x += stridex
+ ble,pn %icc,.ENDLOOP1
+
+! delay slot
+ andn %l1,%i5,%l1
+ nop
+ fmuld %f0,%f40,%f2
+ fabsd %f54,%f54 ! a nop for alignment only
+
+.LOOP1:
+ lda [%i1]%asi,%l2 ! preload next argument
+ mov %i3,%o1 ! py1 = y
+
+ lda [%i1]%asi,%f20
+ cmp %l1,%l7
+ add %i3,%i4,%i3 ! y += stridey
+ bg,pn %icc,.BIG1 ! if hx > 0x413921fb
+
+! delay slot
+ lda [%i1+4]%asi,%f21
+ addcc %i0,-1,%i0
+ add %i1,%i2,%i1 ! x += stridex
+ ble,pn %icc,.ENDLOOP2
+
+! delay slot
+ andn %l2,%i5,%l2
+ nop
+ fmuld %f10,%f40,%f12
+ faddd %f2,%f42,%f2
+
+.LOOP2:
+ st %f3,[%fp+n0]
+ mov %i3,%o2 ! py2 = y
+
+ cmp %l2,%l7
+ add %i3,%i4,%i3 ! y += stridey
+ fmuld %f20,%f40,%f22
+ bg,pn %icc,.BIG2 ! if hx > 0x413921fb
+
+! delay slot
+ add %l5,thresh+4,%o7
+ faddd %f12,%f42,%f12
+ st %f13,[%fp+n1]
+
+! -
+
+ add %l5,thresh,%g1
+ faddd %f22,%f42,%f22
+ st %f23,[%fp+n2]
+
+ fsubd %f2,%f42,%f2 ! n
+
+ fsubd %f12,%f42,%f12 ! n
+
+ fsubd %f22,%f42,%f22 ! n
+
+ fmuld %f2,%f46,%f4
+
+ fmuld %f12,%f46,%f14
+
+ fmuld %f22,%f46,%f24
+
+ fsubd %f0,%f4,%f4
+ fmuld %f2,%f48,%f6
+
+ fsubd %f10,%f14,%f14
+ fmuld %f12,%f48,%f16
+
+ fsubd %f20,%f24,%f24
+ fmuld %f22,%f48,%f26
+
+ fsubd %f4,%f6,%f0
+ ld [%fp+n0],%o3
+
+ fsubd %f14,%f16,%f10
+ ld [%fp+n1],%o4
+
+ fsubd %f24,%f26,%f20
+ ld [%fp+n2],%o5
+
+ fsubd %f4,%f0,%f32
+ and %o3,1,%o3
+
+ fsubd %f14,%f10,%f34
+ and %o4,1,%o4
+
+ fsubd %f24,%f20,%f36
+ and %o5,1,%o5
+
+ fsubd %f32,%f6,%f32
+ fmuld %f2,%f50,%f8
+ sll %o3,3,%o3
+
+ fsubd %f34,%f16,%f34
+ fmuld %f12,%f50,%f18
+ sll %o4,3,%o4
+
+ fsubd %f36,%f26,%f36
+ fmuld %f22,%f50,%f28
+ sll %o5,3,%o5
+
+ fsubd %f8,%f32,%f8
+ ld [%g1+%o3],%f6
+
+ fsubd %f18,%f34,%f18
+ ld [%g1+%o4],%f16
+
+ fsubd %f28,%f36,%f28
+ ld [%g1+%o5],%f26
+
+ fsubd %f0,%f8,%f4
+
+ fsubd %f10,%f18,%f14
+
+ fsubd %f20,%f28,%f24
+
+ fsubd %f0,%f4,%f32
+
+ fsubd %f10,%f14,%f34
+
+ fsubd %f20,%f24,%f36
+
+ fsubd %f32,%f8,%f32
+ fmuld %f2,%f52,%f2
+
+ fsubd %f34,%f18,%f34
+ fmuld %f12,%f52,%f12
+
+ fsubd %f36,%f28,%f36
+ fmuld %f22,%f52,%f22
+
+ fsubd %f2,%f32,%f2
+ ld [%o7+%o3],%f8
+
+ fsubd %f12,%f34,%f12
+ ld [%o7+%o4],%f18
+
+ fsubd %f22,%f36,%f22
+ ld [%o7+%o5],%f28
+
+ fsubd %f4,%f2,%f0 ! x
+
+ fsubd %f14,%f12,%f10 ! x
+
+ fsubd %f24,%f22,%f20 ! x
+
+ fsubd %f4,%f0,%f4
+
+ fsubd %f14,%f10,%f14
+
+ fsubd %f24,%f20,%f24
+
+ fands %f0,%f30,%f9 ! save signbit
+
+ fands %f10,%f30,%f19 ! save signbit
+
+ fands %f20,%f30,%f29 ! save signbit
+
+ fabsd %f0,%f0
+ std %f0,[%fp+x0_1]
+
+ fabsd %f10,%f10
+ std %f10,[%fp+x1_1]
+
+ fabsd %f20,%f20
+ std %f20,[%fp+x2_1]
+
+ fsubd %f4,%f2,%f2 ! y
+
+ fsubd %f14,%f12,%f12 ! y
+
+ fsubd %f24,%f22,%f22 ! y
+
+ fcmpgt32 %f6,%f0,%l0
+
+ fcmpgt32 %f16,%f10,%l1
+
+ fcmpgt32 %f26,%f20,%l2
+
+! -- 16 byte aligned
+ fxors %f2,%f9,%f2
+
+ fxors %f12,%f19,%f12
+
+ fxors %f22,%f29,%f22
+
+ fands %f9,%f8,%f9 ! if (n & 1) clear sign bit
+ andcc %l0,2,%g0
+ bne,pn %icc,.CASE4
+
+! delay slot
+ fands %f19,%f18,%f19 ! if (n & 1) clear sign bit
+ andcc %l1,2,%g0
+ bne,pn %icc,.CASE2
+
+! delay slot
+ fands %f29,%f28,%f29 ! if (n & 1) clear sign bit
+ andcc %l2,2,%g0
+ bne,pn %icc,.CASE1
+
+! delay slot
+ fpadd32s %f0,%f31,%f8
+ sethi %hi(0x3fc3c000),%o7
+ ld [%fp+x0_1],%l0
+
+ fpadd32s %f10,%f31,%f18
+ add %l3,8,%g1
+ ld [%fp+x1_1],%l1
+
+ fpadd32s %f20,%f31,%f28
+ ld [%fp+x2_1],%l2
+
+ fand %f8,%f44,%f4
+ sub %l0,%o7,%l0
+
+ fand %f18,%f44,%f14
+ sub %l1,%o7,%l1
+
+ fand %f28,%f44,%f24
+ sub %l2,%o7,%l2
+
+ fsubd %f0,%f4,%f0
+ srl %l0,10,%l0
+
+ fsubd %f10,%f14,%f10
+ srl %l1,10,%l1
+
+ fsubd %f20,%f24,%f20
+ srl %l2,10,%l2
+
+ faddd %f0,%f2,%f0
+ andn %l0,0x1f,%l0
+
+ faddd %f10,%f12,%f10
+ andn %l1,0x1f,%l1
+
+ faddd %f20,%f22,%f20
+ andn %l2,0x1f,%l2
+
+ fmuld %f0,%f0,%f2
+ add %l0,%o3,%l0
+
+ fmuld %f10,%f10,%f12
+ add %l1,%o4,%l1
+
+ fmuld %f20,%f20,%f22
+ add %l2,%o5,%l2
+
+ fmuld %f2,%f58,%f6
+ ldd [%l3+%l0],%f32
+
+ fmuld %f12,%f58,%f16
+ ldd [%l3+%l1],%f34
+
+ fmuld %f22,%f58,%f26
+ ldd [%l3+%l2],%f36
+
+ faddd %f6,%f56,%f6
+ fmuld %f2,%f62,%f4
+
+ faddd %f16,%f56,%f16
+ fmuld %f12,%f62,%f14
+
+ faddd %f26,%f56,%f26
+ fmuld %f22,%f62,%f24
+
+ fmuld %f2,%f6,%f6
+ faddd %f4,%f60,%f4
+
+ fmuld %f12,%f16,%f16
+ faddd %f14,%f60,%f14
+
+ fmuld %f22,%f26,%f26
+ faddd %f24,%f60,%f24
+
+ faddd %f6,%f54,%f6
+ fmuld %f2,%f4,%f4
+
+ faddd %f16,%f54,%f16
+ fmuld %f12,%f14,%f14
+
+ faddd %f26,%f54,%f26
+ fmuld %f22,%f24,%f24
+
+ fmuld %f0,%f6,%f6
+ ldd [%g1+%l0],%f2
+
+ fmuld %f10,%f16,%f16
+ ldd [%g1+%l1],%f12
+
+ fmuld %f20,%f26,%f26
+ ldd [%g1+%l2],%f22
+
+ fmuld %f4,%f32,%f4
+ ldd [%l4+%l0],%f0
+
+ fmuld %f14,%f34,%f14
+ ldd [%l4+%l1],%f10
+
+ fmuld %f24,%f36,%f24
+ ldd [%l4+%l2],%f20
+
+ fmuld %f6,%f2,%f6
+
+ fmuld %f16,%f12,%f16
+
+ fmuld %f26,%f22,%f26
+
+ faddd %f6,%f4,%f6
+
+ faddd %f16,%f14,%f16
+
+ faddd %f26,%f24,%f26
+
+ faddd %f6,%f0,%f6
+
+ faddd %f16,%f10,%f16
+
+ faddd %f26,%f20,%f26
+
+ faddd %f6,%f32,%f6
+
+ faddd %f16,%f34,%f16
+
+ faddd %f26,%f36,%f26
+
+.FIXSIGN:
+ ld [%fp+n0],%o3
+ add %l5,thresh-4,%g1
+
+ ld [%fp+n1],%o4
+
+ ld [%fp+n2],%o5
+ and %o3,2,%o3
+
+ sll %o3,2,%o3
+ and %o4,2,%o4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ sll %o4,2,%o4
+ and %o5,2,%o5
+ ld [%g1+%o3],%f8
+
+ sll %o5,2,%o5
+ ld [%g1+%o4],%f18
+
+ ld [%g1+%o5],%f28
+ fxors %f9,%f8,%f9
+
+ lda [%i1]%asi,%f0
+ fxors %f29,%f28,%f29
+
+ lda [%i1+4]%asi,%f1
+ fxors %f19,%f18,%f19
+
+ fors %f6,%f9,%f6 ! tack on sign
+ add %i1,%i2,%i1 ! x += stridex
+ st %f6,[%o0]
+
+ fors %f26,%f29,%f26 ! tack on sign
+ st %f7,[%o0+4]
+
+ fors %f16,%f19,%f16 ! tack on sign
+ st %f26,[%o2]
+
+ st %f27,[%o2+4]
+ addcc %i0,-1,%i0
+
+ st %f16,[%o1]
+ andn %l0,%i5,%l0 ! hx &= ~0x80000000
+ bg,pt %icc,.LOOP0
+
+! delay slot
+ st %f17,[%o1+4]
+
+ ba,pt %icc,.ENDLOOP0
+! delay slot
+ nop
+
+ .align 32
+.CASE1:
+ fpadd32s %f10,%f31,%f18
+ sethi %hi(0x3fc3c000),%o7
+ ld [%fp+x0_1],%l0
+
+ fand %f8,%f44,%f4
+ add %l3,8,%g1
+ ld [%fp+x1_1],%l1
+
+ fand %f18,%f44,%f14
+ sub %l0,%o7,%l0
+
+ fsubd %f0,%f4,%f0
+ srl %l0,10,%l0
+ sub %l1,%o7,%l1
+
+ fsubd %f10,%f14,%f10
+ srl %l1,10,%l1
+
+ fmuld %f20,%f20,%f20
+ ldd [%l5+%o5],%f36
+ add %l5,%o5,%l2
+
+ faddd %f0,%f2,%f0
+ andn %l0,0x1f,%l0
+
+ faddd %f10,%f12,%f10
+ andn %l1,0x1f,%l1
+
+ fmuld %f20,%f36,%f24
+ ldd [%l2+0x10],%f26
+ add %fp,%o5,%o5
+
+ fmuld %f0,%f0,%f2
+ add %l0,%o3,%l0
+
+ fmuld %f10,%f10,%f12
+ add %l1,%o4,%l1
+
+ faddd %f24,%f26,%f24
+ ldd [%l2+0x20],%f36
+
+ fmuld %f2,%f58,%f6
+ ldd [%l3+%l0],%f32
+
+ fmuld %f12,%f58,%f16
+ ldd [%l3+%l1],%f34
+
+ fmuld %f20,%f24,%f24
+ ldd [%l2+0x30],%f26
+
+ faddd %f6,%f56,%f6
+ fmuld %f2,%f62,%f4
+
+ faddd %f16,%f56,%f16
+ fmuld %f12,%f62,%f14
+
+ faddd %f24,%f36,%f24
+ ldd [%o5+x2_1],%f36
+
+ fmuld %f2,%f6,%f6
+ faddd %f4,%f60,%f4
+
+ fmuld %f12,%f16,%f16
+ faddd %f14,%f60,%f14
+
+ fmuld %f20,%f24,%f24
+
+ faddd %f6,%f54,%f6
+ fmuld %f2,%f4,%f4
+ ldd [%g1+%l0],%f2
+
+ faddd %f16,%f54,%f16
+ fmuld %f12,%f14,%f14
+ ldd [%g1+%l1],%f12
+
+ faddd %f24,%f26,%f24
+
+ fmuld %f0,%f6,%f6
+ ldd [%l4+%l0],%f0
+
+ fmuld %f10,%f16,%f16
+ ldd [%l4+%l1],%f10
+
+ fmuld %f4,%f32,%f4
+ std %f22,[%fp+y2_0]
+
+ fmuld %f14,%f34,%f14
+
+ fmuld %f6,%f2,%f6
+
+ fmuld %f16,%f12,%f16
+
+ fmuld %f20,%f24,%f24
+
+ faddd %f6,%f4,%f6
+
+ faddd %f16,%f14,%f16
+
+ fmuld %f36,%f24,%f24
+ ldd [%o5+y2_0],%f22
+
+ faddd %f6,%f0,%f6
+
+ faddd %f16,%f10,%f16
+
+ faddd %f24,%f22,%f24
+
+ faddd %f6,%f32,%f6
+
+ faddd %f16,%f34,%f16
+ ba,pt %icc,.FIXSIGN
+
+! delay slot
+ faddd %f36,%f24,%f26
+
+ .align 32
+.CASE2:
+ fpadd32s %f0,%f31,%f8
+ ld [%fp+x0_1],%l0
+ andcc %l2,2,%g0
+ bne,pn %icc,.CASE3
+
+! delay slot
+ sethi %hi(0x3fc3c000),%o7
+ fpadd32s %f20,%f31,%f28
+ ld [%fp+x2_1],%l2
+
+ fand %f8,%f44,%f4
+ sub %l0,%o7,%l0
+ add %l3,8,%g1
+
+ fand %f28,%f44,%f24
+ sub %l2,%o7,%l2
+
+ fsubd %f0,%f4,%f0
+ srl %l0,10,%l0
+
+ fsubd %f20,%f24,%f20
+ srl %l2,10,%l2
+
+ fmuld %f10,%f10,%f10
+ ldd [%l5+%o4],%f34
+ add %l5,%o4,%l1
+
+ faddd %f0,%f2,%f0
+ andn %l0,0x1f,%l0
+
+ faddd %f20,%f22,%f20
+ andn %l2,0x1f,%l2
+
+ fmuld %f10,%f34,%f14
+ ldd [%l1+0x10],%f16
+ add %fp,%o4,%o4
+
+ fmuld %f0,%f0,%f2
+ add %l0,%o3,%l0
+
+ fmuld %f20,%f20,%f22
+ add %l2,%o5,%l2
+
+ faddd %f14,%f16,%f14
+ ldd [%l1+0x20],%f34
+
+ fmuld %f2,%f58,%f6
+ ldd [%l3+%l0],%f32
+
+ fmuld %f22,%f58,%f26
+ ldd [%l3+%l2],%f36
+
+ fmuld %f10,%f14,%f14
+ ldd [%l1+0x30],%f16
+
+ faddd %f6,%f56,%f6
+ fmuld %f2,%f62,%f4
+
+ faddd %f26,%f56,%f26
+ fmuld %f22,%f62,%f24
+
+ faddd %f14,%f34,%f14
+ ldd [%o4+x1_1],%f34
+
+ fmuld %f2,%f6,%f6
+ faddd %f4,%f60,%f4
+
+ fmuld %f22,%f26,%f26
+ faddd %f24,%f60,%f24
+
+ fmuld %f10,%f14,%f14
+
+ faddd %f6,%f54,%f6
+ fmuld %f2,%f4,%f4
+ ldd [%g1+%l0],%f2
+
+ faddd %f26,%f54,%f26
+ fmuld %f22,%f24,%f24
+ ldd [%g1+%l2],%f22
+
+ faddd %f14,%f16,%f14
+
+ fmuld %f0,%f6,%f6
+ ldd [%l4+%l0],%f0
+
+ fmuld %f20,%f26,%f26
+ ldd [%l4+%l2],%f20
+
+ fmuld %f4,%f32,%f4
+ std %f12,[%fp+y1_0]
+
+ fmuld %f24,%f36,%f24
+
+ fmuld %f6,%f2,%f6
+
+ fmuld %f26,%f22,%f26
+
+ fmuld %f10,%f14,%f14
+
+ faddd %f6,%f4,%f6
+
+ faddd %f26,%f24,%f26
+
+ fmuld %f34,%f14,%f14
+ ldd [%o4+y1_0],%f12
+
+ faddd %f6,%f0,%f6
+
+ faddd %f26,%f20,%f26
+
+ faddd %f14,%f12,%f14
+
+ faddd %f6,%f32,%f6
+
+ faddd %f26,%f36,%f26
+ ba,pt %icc,.FIXSIGN
+
+! delay slot
+ faddd %f34,%f14,%f16
+
+ .align 32
+.CASE3:
+ fand %f8,%f44,%f4
+ add %l3,8,%g1
+ sub %l0,%o7,%l0
+
+ fmuld %f10,%f10,%f10
+ ldd [%l5+%o4],%f34
+ add %l5,%o4,%l1
+
+ fsubd %f0,%f4,%f0
+ srl %l0,10,%l0
+
+ fmuld %f20,%f20,%f20
+ ldd [%l5+%o5],%f36
+ add %l5,%o5,%l2
+
+ fmuld %f10,%f34,%f14
+ ldd [%l1+0x10],%f16
+ add %fp,%o4,%o4
+
+ faddd %f0,%f2,%f0
+ andn %l0,0x1f,%l0
+
+ fmuld %f20,%f36,%f24
+ ldd [%l2+0x10],%f26
+ add %fp,%o5,%o5
+
+ faddd %f14,%f16,%f14
+ ldd [%l1+0x20],%f34
+
+ fmuld %f0,%f0,%f2
+ add %l0,%o3,%l0
+
+ faddd %f24,%f26,%f24
+ ldd [%l2+0x20],%f36
+
+ fmuld %f10,%f14,%f14
+ ldd [%l1+0x30],%f16
+
+ fmuld %f2,%f58,%f6
+ ldd [%l3+%l0],%f32
+
+ fmuld %f20,%f24,%f24
+ ldd [%l2+0x30],%f26
+
+ faddd %f14,%f34,%f14
+ ldd [%o4+x1_1],%f34
+
+ faddd %f6,%f56,%f6
+ fmuld %f2,%f62,%f4
+
+ faddd %f24,%f36,%f24
+ ldd [%o5+x2_1],%f36
+
+ fmuld %f10,%f14,%f14
+ std %f12,[%fp+y1_0]
+
+ fmuld %f2,%f6,%f6
+ faddd %f4,%f60,%f4
+
+ fmuld %f20,%f24,%f24
+ std %f22,[%fp+y2_0]
+
+ faddd %f14,%f16,%f14
+
+ faddd %f6,%f54,%f6
+ fmuld %f2,%f4,%f4
+ ldd [%g1+%l0],%f2
+
+ faddd %f24,%f26,%f24
+
+ fmuld %f10,%f14,%f14
+
+ fmuld %f0,%f6,%f6
+ ldd [%l4+%l0],%f0
+
+ fmuld %f4,%f32,%f4
+
+ fmuld %f20,%f24,%f24
+
+ fmuld %f6,%f2,%f6
+
+ fmuld %f34,%f14,%f14
+ ldd [%o4+y1_0],%f12
+
+ fmuld %f36,%f24,%f24
+ ldd [%o5+y2_0],%f22
+
+ faddd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ faddd %f24,%f22,%f24
+
+ faddd %f6,%f0,%f6
+
+ faddd %f34,%f14,%f16
+
+ faddd %f36,%f24,%f26
+ ba,pt %icc,.FIXSIGN
+
+! delay slot
+ faddd %f6,%f32,%f6
+
+ .align 32
+.CASE4:
+ fands %f29,%f28,%f29 ! if (n & 1) clear sign bit
+ sethi %hi(0x3fc3c000),%o7
+ andcc %l1,2,%g0
+ bne,pn %icc,.CASE6
+
+! delay slot
+ andcc %l2,2,%g0
+ fpadd32s %f10,%f31,%f18
+ ld [%fp+x1_1],%l1
+ bne,pn %icc,.CASE5
+
+! delay slot
+ add %l3,8,%g1
+ ld [%fp+x2_1],%l2
+ fpadd32s %f20,%f31,%f28
+
+ fand %f18,%f44,%f14
+ sub %l1,%o7,%l1
+
+ fand %f28,%f44,%f24
+ sub %l2,%o7,%l2
+
+ fsubd %f10,%f14,%f10
+ srl %l1,10,%l1
+
+ fsubd %f20,%f24,%f20
+ srl %l2,10,%l2
+
+ fmuld %f0,%f0,%f0
+ ldd [%l5+%o3],%f32
+ add %l5,%o3,%l0
+
+ faddd %f10,%f12,%f10
+ andn %l1,0x1f,%l1
+
+ faddd %f20,%f22,%f20
+ andn %l2,0x1f,%l2
+
+ fmuld %f0,%f32,%f4
+ ldd [%l0+0x10],%f6
+ add %fp,%o3,%o3
+
+ fmuld %f10,%f10,%f12
+ add %l1,%o4,%l1
+
+ fmuld %f20,%f20,%f22
+ add %l2,%o5,%l2
+
+ faddd %f4,%f6,%f4
+ ldd [%l0+0x20],%f32
+
+ fmuld %f12,%f58,%f16
+ ldd [%l3+%l1],%f34
+
+ fmuld %f22,%f58,%f26
+ ldd [%l3+%l2],%f36
+
+ fmuld %f0,%f4,%f4
+ ldd [%l0+0x30],%f6
+
+ faddd %f16,%f56,%f16
+ fmuld %f12,%f62,%f14
+
+ faddd %f26,%f56,%f26
+ fmuld %f22,%f62,%f24
+
+ faddd %f4,%f32,%f4
+ ldd [%o3+x0_1],%f32
+
+ fmuld %f12,%f16,%f16
+ faddd %f14,%f60,%f14
+
+ fmuld %f22,%f26,%f26
+ faddd %f24,%f60,%f24
+
+ fmuld %f0,%f4,%f4
+
+ faddd %f16,%f54,%f16
+ fmuld %f12,%f14,%f14
+ ldd [%g1+%l1],%f12
+
+ faddd %f26,%f54,%f26
+ fmuld %f22,%f24,%f24
+ ldd [%g1+%l2],%f22
+
+ faddd %f4,%f6,%f4
+
+ fmuld %f10,%f16,%f16
+ ldd [%l4+%l1],%f10
+
+ fmuld %f20,%f26,%f26
+ ldd [%l4+%l2],%f20
+
+ fmuld %f14,%f34,%f14
+ std %f2,[%fp+y0_0]
+
+ fmuld %f24,%f36,%f24
+
+ fmuld %f0,%f4,%f4
+
+ fmuld %f16,%f12,%f16
+
+ fmuld %f26,%f22,%f26
+
+ fmuld %f32,%f4,%f4
+ ldd [%o3+y0_0],%f2
+
+ faddd %f16,%f14,%f16
+
+ faddd %f26,%f24,%f26
+
+ faddd %f4,%f2,%f4
+
+ faddd %f16,%f10,%f16
+
+ faddd %f26,%f20,%f26
+
+ faddd %f32,%f4,%f6
+
+ faddd %f16,%f34,%f16
+ ba,pt %icc,.FIXSIGN
+
+! delay slot
+ faddd %f26,%f36,%f26
+
+ .align 32
+.CASE5:
+ fand %f18,%f44,%f14
+ sub %l1,%o7,%l1
+
+ fmuld %f0,%f0,%f0
+ ldd [%l5+%o3],%f32
+ add %l5,%o3,%l0
+
+ fsubd %f10,%f14,%f10
+ srl %l1,10,%l1
+
+ fmuld %f20,%f20,%f20
+ ldd [%l5+%o5],%f36
+ add %l5,%o5,%l2
+
+ fmuld %f0,%f32,%f4
+ ldd [%l0+0x10],%f6
+ add %fp,%o3,%o3
+
+ faddd %f10,%f12,%f10
+ andn %l1,0x1f,%l1
+
+ fmuld %f20,%f36,%f24
+ ldd [%l2+0x10],%f26
+ add %fp,%o5,%o5
+
+ faddd %f4,%f6,%f4
+ ldd [%l0+0x20],%f32
+
+ fmuld %f10,%f10,%f12
+ add %l1,%o4,%l1
+
+ faddd %f24,%f26,%f24
+ ldd [%l2+0x20],%f36
+
+ fmuld %f0,%f4,%f4
+ ldd [%l0+0x30],%f6
+
+ fmuld %f12,%f58,%f16
+ ldd [%l3+%l1],%f34
+
+ fmuld %f20,%f24,%f24
+ ldd [%l2+0x30],%f26
+
+ faddd %f4,%f32,%f4
+ ldd [%o3+x0_1],%f32
+
+ faddd %f16,%f56,%f16
+ fmuld %f12,%f62,%f14
+
+ faddd %f24,%f36,%f24
+ ldd [%o5+x2_1],%f36
+
+ fmuld %f0,%f4,%f4
+ std %f2,[%fp+y0_0]
+
+ fmuld %f12,%f16,%f16
+ faddd %f14,%f60,%f14
+
+ fmuld %f20,%f24,%f24
+ std %f22,[%fp+y2_0]
+
+ faddd %f4,%f6,%f4
+
+ faddd %f16,%f54,%f16
+ fmuld %f12,%f14,%f14
+ ldd [%g1+%l1],%f12
+
+ faddd %f24,%f26,%f24
+
+ fmuld %f0,%f4,%f4
+
+ fmuld %f10,%f16,%f16
+ ldd [%l4+%l1],%f10
+
+ fmuld %f14,%f34,%f14
+
+ fmuld %f20,%f24,%f24
+
+ fmuld %f16,%f12,%f16
+
+ fmuld %f32,%f4,%f4
+ ldd [%o3+y0_0],%f2
+
+ fmuld %f36,%f24,%f24
+ ldd [%o5+y2_0],%f22
+
+ faddd %f16,%f14,%f16
+
+ faddd %f4,%f2,%f4
+
+ faddd %f24,%f22,%f24
+
+ faddd %f16,%f10,%f16
+
+ faddd %f32,%f4,%f6
+
+ faddd %f36,%f24,%f26
+ ba,pt %icc,.FIXSIGN
+
+! delay slot
+ faddd %f16,%f34,%f16
+
+ .align 32
+.CASE6:
+ ld [%fp+x2_1],%l2
+ add %l3,8,%g1
+ bne,pn %icc,.CASE7
+! delay slot
+ fpadd32s %f20,%f31,%f28
+
+ fand %f28,%f44,%f24
+ ldd [%l5+%o3],%f32
+ add %l5,%o3,%l0
+
+ fmuld %f0,%f0,%f0
+ sub %l2,%o7,%l2
+
+ fsubd %f20,%f24,%f20
+ srl %l2,10,%l2
+
+ fmuld %f10,%f10,%f10
+ ldd [%l5+%o4],%f34
+ add %l5,%o4,%l1
+
+ fmuld %f0,%f32,%f4
+ ldd [%l0+0x10],%f6
+ add %fp,%o3,%o3
+
+ faddd %f20,%f22,%f20
+ andn %l2,0x1f,%l2
+
+ fmuld %f10,%f34,%f14
+ ldd [%l1+0x10],%f16
+ add %fp,%o4,%o4
+
+ faddd %f4,%f6,%f4
+ ldd [%l0+0x20],%f32
+
+ fmuld %f20,%f20,%f22
+ add %l2,%o5,%l2
+
+ faddd %f14,%f16,%f14
+ ldd [%l1+0x20],%f34
+
+ fmuld %f0,%f4,%f4
+ ldd [%l0+0x30],%f6
+
+ fmuld %f22,%f58,%f26
+ ldd [%l3+%l2],%f36
+
+ fmuld %f10,%f14,%f14
+ ldd [%l1+0x30],%f16
+
+ faddd %f4,%f32,%f4
+ ldd [%o3+x0_1],%f32
+
+ faddd %f26,%f56,%f26
+ fmuld %f22,%f62,%f24
+
+ faddd %f14,%f34,%f14
+ ldd [%o4+x1_1],%f34
+
+ fmuld %f0,%f4,%f4
+ std %f2,[%fp+y0_0]
+
+ fmuld %f22,%f26,%f26
+ faddd %f24,%f60,%f24
+
+ fmuld %f10,%f14,%f14
+ std %f12,[%fp+y1_0]
+
+ faddd %f4,%f6,%f4
+
+ faddd %f26,%f54,%f26
+ fmuld %f22,%f24,%f24
+ ldd [%g1+%l2],%f22
+
+ faddd %f14,%f16,%f14
+
+ fmuld %f0,%f4,%f4
+
+ fmuld %f20,%f26,%f26
+ ldd [%l4+%l2],%f20
+
+ fmuld %f24,%f36,%f24
+
+ fmuld %f10,%f14,%f14
+
+ fmuld %f26,%f22,%f26
+
+ fmuld %f32,%f4,%f4
+ ldd [%o3+y0_0],%f2
+
+ fmuld %f34,%f14,%f14
+ ldd [%o4+y1_0],%f12
+
+ faddd %f26,%f24,%f26
+
+ faddd %f4,%f2,%f4
+
+ faddd %f14,%f12,%f14
+
+ faddd %f26,%f20,%f26
+
+ faddd %f32,%f4,%f6
+
+ faddd %f34,%f14,%f16
+ ba,pt %icc,.FIXSIGN
+
+! delay slot
+ faddd %f26,%f36,%f26
+
+ .align 32
+.CASE7:
+ fmuld %f0,%f0,%f0
+ ldd [%l5+%o3],%f32
+ add %l5,%o3,%l0
+
+ fmuld %f10,%f10,%f10
+ ldd [%l5+%o4],%f34
+ add %l5,%o4,%l1
+
+ fmuld %f20,%f20,%f20
+ ldd [%l5+%o5],%f36
+ add %l5,%o5,%l2
+
+ fmuld %f0,%f32,%f4
+ ldd [%l0+0x10],%f6
+ add %fp,%o3,%o3
+
+ fmuld %f10,%f34,%f14
+ ldd [%l1+0x10],%f16
+ add %fp,%o4,%o4
+
+ fmuld %f20,%f36,%f24
+ ldd [%l2+0x10],%f26
+ add %fp,%o5,%o5
+
+ faddd %f4,%f6,%f4
+ ldd [%l0+0x20],%f32
+
+ faddd %f14,%f16,%f14
+ ldd [%l1+0x20],%f34
+
+ faddd %f24,%f26,%f24
+ ldd [%l2+0x20],%f36
+
+ fmuld %f0,%f4,%f4
+ ldd [%l0+0x30],%f6
+
+ fmuld %f10,%f14,%f14
+ ldd [%l1+0x30],%f16
+
+ fmuld %f20,%f24,%f24
+ ldd [%l2+0x30],%f26
+
+ faddd %f4,%f32,%f4
+ ldd [%o3+x0_1],%f32
+
+ faddd %f14,%f34,%f14
+ ldd [%o4+x1_1],%f34
+
+ faddd %f24,%f36,%f24
+ ldd [%o5+x2_1],%f36
+
+ fmuld %f0,%f4,%f4
+ std %f2,[%fp+y0_0]
+
+ fmuld %f10,%f14,%f14
+ std %f12,[%fp+y1_0]
+
+ fmuld %f20,%f24,%f24
+ std %f22,[%fp+y2_0]
+
+ faddd %f4,%f6,%f4
+
+ faddd %f14,%f16,%f14
+
+ faddd %f24,%f26,%f24
+
+ fmuld %f0,%f4,%f4
+
+ fmuld %f10,%f14,%f14
+
+ fmuld %f20,%f24,%f24
+
+ fmuld %f32,%f4,%f4
+ ldd [%o3+y0_0],%f2
+
+ fmuld %f34,%f14,%f14
+ ldd [%o4+y1_0],%f12
+
+ fmuld %f36,%f24,%f24
+ ldd [%o5+y2_0],%f22
+
+ faddd %f4,%f2,%f4
+
+ faddd %f14,%f12,%f14
+
+ faddd %f24,%f22,%f24
+
+ faddd %f32,%f4,%f6
+
+ faddd %f34,%f14,%f16
+ ba,pt %icc,.FIXSIGN
+
+! delay slot
+ faddd %f36,%f24,%f26
+
+
+ .align 32
+.ENDLOOP2:
+ fmuld %f10,%f40,%f12
+ add %l5,thresh,%g1
+ faddd %f12,%f42,%f12
+ st %f13,[%fp+n1]
+ fsubd %f12,%f42,%f12 ! n
+ fmuld %f12,%f46,%f14
+ fsubd %f10,%f14,%f14
+ fmuld %f12,%f48,%f16
+ fsubd %f14,%f16,%f10
+ ld [%fp+n1],%o4
+ fsubd %f14,%f10,%f34
+ and %o4,1,%o4
+ fsubd %f34,%f16,%f34
+ fmuld %f12,%f50,%f18
+ sll %o4,3,%o4
+ fsubd %f18,%f34,%f18
+ ld [%g1+%o4],%f16
+ fsubd %f10,%f18,%f14
+ fsubd %f10,%f14,%f34
+ add %l5,thresh+4,%o7
+ fsubd %f34,%f18,%f34
+ fmuld %f12,%f52,%f12
+ fsubd %f12,%f34,%f12
+ ld [%o7+%o4],%f18
+ fsubd %f14,%f12,%f10 ! x
+ fsubd %f14,%f10,%f14
+ fands %f10,%f30,%f19 ! save signbit
+ fabsd %f10,%f10
+ std %f10,[%fp+x1_1]
+ fsubd %f14,%f12,%f12 ! y
+ fcmpgt32 %f16,%f10,%l1
+ fxors %f12,%f19,%f12
+ fands %f19,%f18,%f19 ! if (n & 1) clear sign bit
+ andcc %l1,2,%g0
+ bne,pn %icc,1f
+! delay slot
+ nop
+ fpadd32s %f10,%f31,%f18
+ ld [%fp+x1_1],%l1
+ fand %f18,%f44,%f14
+ sethi %hi(0x3fc3c000),%o7
+ add %l3,8,%g1
+ fsubd %f10,%f14,%f10
+ sub %l1,%o7,%l1
+ srl %l1,10,%l1
+ faddd %f10,%f12,%f10
+ andn %l1,0x1f,%l1
+ fmuld %f10,%f10,%f12
+ add %l1,%o4,%l1
+ fmuld %f12,%f58,%f16
+ ldd [%l3+%l1],%f34
+ faddd %f16,%f56,%f16
+ fmuld %f12,%f62,%f14
+ fmuld %f12,%f16,%f16
+ faddd %f14,%f60,%f14
+ faddd %f16,%f54,%f16
+ fmuld %f12,%f14,%f14
+ ldd [%g1+%l1],%f12
+ fmuld %f10,%f16,%f16
+ ldd [%l4+%l1],%f10
+ fmuld %f14,%f34,%f14
+ fmuld %f16,%f12,%f16
+ faddd %f16,%f14,%f16
+ faddd %f16,%f10,%f16
+ ba,pt %icc,2f
+ faddd %f16,%f34,%f16
+1:
+ fmuld %f10,%f10,%f10
+ ldd [%l5+%o4],%f34
+ add %l5,%o4,%l1
+ fmuld %f10,%f34,%f14
+ ldd [%l1+0x10],%f16
+ add %fp,%o4,%o4
+ faddd %f14,%f16,%f14
+ ldd [%l1+0x20],%f34
+ fmuld %f10,%f14,%f14
+ ldd [%l1+0x30],%f16
+ faddd %f14,%f34,%f14
+ ldd [%o4+x1_1],%f34
+ fmuld %f10,%f14,%f14
+ std %f12,[%fp+y1_0]
+ faddd %f14,%f16,%f14
+ fmuld %f10,%f14,%f14
+ fmuld %f34,%f14,%f14
+ ldd [%o4+y1_0],%f12
+ faddd %f14,%f12,%f14
+ faddd %f34,%f14,%f16
+2:
+ add %l5,thresh-4,%g1
+ ld [%fp+n1],%o4
+ and %o4,2,%o4
+ sll %o4,2,%o4
+ ld [%g1+%o4],%f18
+ fxors %f19,%f18,%f19
+ fors %f16,%f19,%f16 ! tack on sign
+ st %f16,[%o1]
+ st %f17,[%o1+4]
+
+.ENDLOOP1:
+ fmuld %f0,%f40,%f2
+ add %l5,thresh,%g1
+ faddd %f2,%f42,%f2
+ st %f3,[%fp+n0]
+ fsubd %f2,%f42,%f2 ! n
+ fmuld %f2,%f46,%f4
+ fsubd %f0,%f4,%f4
+ fmuld %f2,%f48,%f6
+ fsubd %f4,%f6,%f0
+ ld [%fp+n0],%o3
+ fsubd %f4,%f0,%f32
+ and %o3,1,%o3
+ fsubd %f32,%f6,%f32
+ fmuld %f2,%f50,%f8
+ sll %o3,3,%o3
+ fsubd %f8,%f32,%f8
+ ld [%g1+%o3],%f6
+ fsubd %f0,%f8,%f4
+ fsubd %f0,%f4,%f32
+ add %l5,thresh+4,%o7
+ fsubd %f32,%f8,%f32
+ fmuld %f2,%f52,%f2
+ fsubd %f2,%f32,%f2
+ ld [%o7+%o3],%f8
+ fsubd %f4,%f2,%f0 ! x
+ fsubd %f4,%f0,%f4
+ fands %f0,%f30,%f9 ! save signbit
+ fabsd %f0,%f0
+ std %f0,[%fp+x0_1]
+ fsubd %f4,%f2,%f2 ! y
+ fcmpgt32 %f6,%f0,%l0
+ fxors %f2,%f9,%f2
+ fands %f9,%f8,%f9 ! if (n & 1) clear sign bit
+ andcc %l0,2,%g0
+ bne,pn %icc,1f
+! delay slot
+ nop
+ fpadd32s %f0,%f31,%f8
+ ld [%fp+x0_1],%l0
+ fand %f8,%f44,%f4
+ sethi %hi(0x3fc3c000),%o7
+ add %l3,8,%g1
+ fsubd %f0,%f4,%f0
+ sub %l0,%o7,%l0
+ srl %l0,10,%l0
+ faddd %f0,%f2,%f0
+ andn %l0,0x1f,%l0
+ fmuld %f0,%f0,%f2
+ add %l0,%o3,%l0
+ fmuld %f2,%f58,%f6
+ ldd [%l3+%l0],%f32
+ faddd %f6,%f56,%f6
+ fmuld %f2,%f62,%f4
+ fmuld %f2,%f6,%f6
+ faddd %f4,%f60,%f4
+ faddd %f6,%f54,%f6
+ fmuld %f2,%f4,%f4
+ ldd [%g1+%l0],%f2
+ fmuld %f0,%f6,%f6
+ ldd [%l4+%l0],%f0
+ fmuld %f4,%f32,%f4
+ fmuld %f6,%f2,%f6
+ faddd %f6,%f4,%f6
+ faddd %f6,%f0,%f6
+ ba,pt %icc,2f
+ faddd %f6,%f32,%f6
+1:
+ fmuld %f0,%f0,%f0
+ ldd [%l5+%o3],%f32
+ add %l5,%o3,%l0
+ fmuld %f0,%f32,%f4
+ ldd [%l0+0x10],%f6
+ add %fp,%o3,%o3
+ faddd %f4,%f6,%f4
+ ldd [%l0+0x20],%f32
+ fmuld %f0,%f4,%f4
+ ldd [%l0+0x30],%f6
+ faddd %f4,%f32,%f4
+ ldd [%o3+x0_1],%f32
+ fmuld %f0,%f4,%f4
+ std %f2,[%fp+y0_0]
+ faddd %f4,%f6,%f4
+ fmuld %f0,%f4,%f4
+ fmuld %f32,%f4,%f4
+ ldd [%o3+y0_0],%f2
+ faddd %f4,%f2,%f4
+ faddd %f32,%f4,%f6
+2:
+ add %l5,thresh-4,%g1
+ ld [%fp+n0],%o3
+ and %o3,2,%o3
+ sll %o3,2,%o3
+ ld [%g1+%o3],%f8
+ fxors %f9,%f8,%f9
+ fors %f6,%f9,%f6 ! tack on sign
+ st %f6,[%o0]
+ st %f7,[%o0+4]
+
+.ENDLOOP0:
+
+! check for huge arguments remaining
+
+ tst LIM_l6
+ be,pt %icc,.exit
+! delay slot
+ nop
+
+! ========== huge range (use C code) ==========
+
+#ifdef __sparcv9
+ ldx [%fp+xsave],%o1
+ ldx [%fp+ysave],%o3
+#else
+ ld [%fp+xsave],%o1
+ ld [%fp+ysave],%o3
+#endif
+ ld [%fp+nsave],%o0
+ ld [%fp+sxsave],%o2
+ ld [%fp+sysave],%o4
+ sra %o2,0,%o2 ! sign-extend for V9
+ sra %o4,0,%o4
+ call __vlibm_vsin_big
+ mov %l7,%o5 ! delay slot
+
+.exit:
+ ret
+ restore
+
+
+ .align 32
+.SKIP0:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.ENDLOOP0
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! y += stridey
+ andn %l1,%i5,%l0 ! hx &= ~0x80000000
+ fmovs %f10,%f0
+ ld [%i1+4],%f1
+ ba,pt %icc,.LOOP0
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 32
+.SKIP1:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.ENDLOOP1
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! y += stridey
+ andn %l2,%i5,%l1 ! hx &= ~0x80000000
+ fmovs %f20,%f10
+ ld [%i1+4],%f11
+ ba,pt %icc,.LOOP1
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 32
+.SKIP2:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.ENDLOOP2
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! y += stridey
+ ld [%i1],%l2
+ ld [%i1],%f20
+ ld [%i1+4],%f21
+ andn %l2,%i5,%l2 ! hx &= ~0x80000000
+ ba,pt %icc,.LOOP2
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 32
+.BIG0:
+ sethi %hi(0x7ff00000),%o7
+ cmp %l0,%o7
+ bl,a,pt %icc,1f ! if hx < 0x7ff00000
+! delay slot, annulled if branch not taken
+ mov %l7,LIM_l6 ! set biguns flag or
+ fsubd %f0,%f0,%f0 ! y = x - x
+ st %f0,[%o0]
+ st %f1,[%o0+4]
+1:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.ENDLOOP0
+! delay slot, harmless if branch taken
+ andn %l1,%i5,%l0 ! hx &= ~0x80000000
+ fmovd %f10,%f0
+ ba,pt %icc,.LOOP0
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 32
+.BIG1:
+ sethi %hi(0x7ff00000),%o7
+ cmp %l1,%o7
+ bl,a,pt %icc,1f ! if hx < 0x7ff00000
+! delay slot, annulled if branch not taken
+ mov %l7,LIM_l6 ! set biguns flag or
+ fsubd %f10,%f10,%f10 ! y = x - x
+ st %f10,[%o1]
+ st %f11,[%o1+4]
+1:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.ENDLOOP1
+! delay slot, harmless if branch taken
+ andn %l2,%i5,%l1 ! hx &= ~0x80000000
+ fmovd %f20,%f10
+ ba,pt %icc,.LOOP1
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 32
+.BIG2:
+ sethi %hi(0x7ff00000),%o7
+ cmp %l2,%o7
+ bl,a,pt %icc,1f ! if hx < 0x7ff00000
+! delay slot, annulled if branch not taken
+ mov %l7,LIM_l6 ! set biguns flag or
+ fsubd %f20,%f20,%f20 ! y = x - x
+ st %f20,[%o2]
+ st %f21,[%o2+4]
+1:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.ENDLOOP2
+! delay slot
+ nop
+ ld [%i1],%l2
+ ld [%i1],%f20
+ ld [%i1+4],%f21
+ andn %l2,%i5,%l2 ! hx &= ~0x80000000
+ ba,pt %icc,.LOOP2
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+ SET_SIZE(__vsin)
+
diff --git a/usr/src/libm/src/mvec/vis/__vsin_ultra3.S b/usr/src/libm/src/mvec/vis/__vsin_ultra3.S
new file mode 100644
index 0000000..172b2ad
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vsin_ultra3.S
@@ -0,0 +1,3431 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vsin_ultra3.S 1.8 06/01/23 SMI"
+
+ .file "__vsin_ultra3.S"
+
+#include "libm.h"
+#if defined(LIBMVEC_SO_BUILD)
+ .weak __vsin
+ .type __vsin,#function
+ __vsin = __vsin_ultra3
+#endif
+
+ RO_DATA
+ .align 64
+constants:
+ .word 0x42c80000,0x00000000 ! 3 * 2^44
+ .word 0x43380000,0x00000000 ! 3 * 2^51
+ .word 0x3fe45f30,0x6dc9c883 ! invpio2
+ .word 0x3ff921fb,0x54442c00 ! pio2_1
+ .word 0x3d318469,0x898cc400 ! pio2_2
+ .word 0x3a71701b,0x839a2520 ! pio2_3
+ .word 0xbfc55555,0x55555533 ! pp1
+ .word 0x3f811111,0x10e7d53b ! pp2
+ .word 0xbf2a0167,0xe6b3cf9b ! pp3
+ .word 0xbfdfffff,0xffffff65 ! qq1
+ .word 0x3fa55555,0x54f88ed0 ! qq2
+ .word 0xbf56c12c,0xdd185f60 ! qq3
+
+! local storage indices
+
+#define xsave STACK_BIAS-0x8
+#define ysave STACK_BIAS-0x10
+#define nsave STACK_BIAS-0x14
+#define sxsave STACK_BIAS-0x18
+#define sysave STACK_BIAS-0x1c
+#define biguns STACK_BIAS-0x20
+#define nk3 STACK_BIAS-0x24
+#define nk2 STACK_BIAS-0x28
+#define nk1 STACK_BIAS-0x2c
+#define nk0 STACK_BIAS-0x30
+#define junk STACK_BIAS-0x38
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x40
+
+! register use
+
+! i0 n
+! i1 x
+! i2 stridex
+! i3 y
+! i4 stridey
+! i5 0x80000000
+
+! l0 hx0
+! l1 hx1
+! l2 hx2
+! l3 hx3
+! l4 k0
+! l5 k1
+! l6 k2
+! l7 k3
+
+! the following are 64-bit registers in both V8+ and V9
+
+! g1 __vlibm_TBL_sincos2
+! g5 scratch
+
+! o0 py0
+! o1 py1
+! o2 py2
+! o3 py3
+! o4 0x3e400000
+! o5 0x3fe921fb,0x4099251e
+! o7 scratch
+
+! f0 hx0
+! f2
+! f4
+! f6
+! f8 hx1
+! f10
+! f12
+! f14
+! f16 hx2
+! f18
+! f20
+! f22
+! f24 hx3
+! f26
+! f28
+! f30
+! f32
+! f34
+! f36
+! f38
+
+#define c3two44 %f40
+#define c3two51 %f42
+#define invpio2 %f44
+#define pio2_1 %f46
+#define pio2_2 %f48
+#define pio2_3 %f50
+#define pp1 %f52
+#define pp2 %f54
+#define pp3 %f56
+#define qq1 %f58
+#define qq2 %f60
+#define qq3 %f62
+
+ ENTRY(__vsin_ultra3)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,constants,o0)
+ PIC_SET(l7,__vlibm_TBL_sincos2,o1)
+ mov %o1,%g1
+ wr %g0,0x82,%asi ! set %asi for non-faulting loads
+#ifdef __sparcv9
+ stx %i1,[%fp+xsave] ! save arguments
+ stx %i3,[%fp+ysave]
+#else
+ st %i1,[%fp+xsave] ! save arguments
+ st %i3,[%fp+ysave]
+#endif
+ st %i0,[%fp+nsave]
+ st %i2,[%fp+sxsave]
+ st %i4,[%fp+sysave]
+ st %g0,[%fp+biguns] ! biguns = 0
+ ldd [%o0+0x00],c3two44 ! load/set up constants
+ ldd [%o0+0x08],c3two51
+ ldd [%o0+0x10],invpio2
+ ldd [%o0+0x18],pio2_1
+ ldd [%o0+0x20],pio2_2
+ ldd [%o0+0x28],pio2_3
+ ldd [%o0+0x30],pp1
+ ldd [%o0+0x38],pp2
+ ldd [%o0+0x40],pp3
+ ldd [%o0+0x48],qq1
+ ldd [%o0+0x50],qq2
+ ldd [%o0+0x58],qq3
+ sethi %hi(0x80000000),%i5
+ sethi %hi(0x3e400000),%o4
+ sethi %hi(0x3fe921fb),%o5
+ or %o5,%lo(0x3fe921fb),%o5
+ sllx %o5,32,%o5
+ sethi %hi(0x4099251e),%o7
+ or %o7,%lo(0x4099251e),%o7
+ or %o5,%o7,%o5
+ sll %i2,3,%i2 ! scale strides
+ sll %i4,3,%i4
+ add %fp,junk,%o1 ! loop prologue
+ add %fp,junk,%o2
+ add %fp,junk,%o3
+ ld [%i1],%l0 ! *x
+ ld [%i1],%f0
+ ld [%i1+4],%f3
+ andn %l0,%i5,%l0 ! mask off sign
+ ba .loop0
+ add %i1,%i2,%i1 ! x += stridex
+
+! 16-byte aligned
+ .align 16
+.loop0:
+ lda [%i1]%asi,%l1 ! preload next argument
+ sub %l0,%o4,%g5
+ sub %o5,%l0,%o7
+ fabss %f0,%f2
+
+ lda [%i1]%asi,%f8
+ orcc %o7,%g5,%g0
+ mov %i3,%o0 ! py0 = y
+ bl,pn %icc,.range0 ! hx < 0x3e400000 or hx > 0x4099251e
+
+! delay slot
+ lda [%i1+4]%asi,%f11
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! y += stridey
+ ble,pn %icc,.last1
+
+! delay slot
+ andn %l1,%i5,%l1
+ add %i1,%i2,%i1 ! x += stridex
+ faddd %f2,c3two44,%f4
+ st %f15,[%o1+4]
+
+.loop1:
+ lda [%i1]%asi,%l2 ! preload next argument
+ sub %l1,%o4,%g5
+ sub %o5,%l1,%o7
+ fabss %f8,%f10
+
+ lda [%i1]%asi,%f16
+ orcc %o7,%g5,%g0
+ mov %i3,%o1 ! py1 = y
+ bl,pn %icc,.range1 ! hx < 0x3e400000 or hx > 0x4099251e
+
+! delay slot
+ lda [%i1+4]%asi,%f19
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! y += stridey
+ ble,pn %icc,.last2
+
+! delay slot
+ andn %l2,%i5,%l2
+ add %i1,%i2,%i1 ! x += stridex
+ faddd %f10,c3two44,%f12
+ st %f23,[%o2+4]
+
+.loop2:
+ lda [%i1]%asi,%l3 ! preload next argument
+ sub %l2,%o4,%g5
+ sub %o5,%l2,%o7
+ fabss %f16,%f18
+
+ lda [%i1]%asi,%f24
+ orcc %o7,%g5,%g0
+ mov %i3,%o2 ! py2 = y
+ bl,pn %icc,.range2 ! hx < 0x3e400000 or hx > 0x4099251e
+
+! delay slot
+ lda [%i1+4]%asi,%f27
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! y += stridey
+ ble,pn %icc,.last3
+
+! delay slot
+ andn %l3,%i5,%l3
+ add %i1,%i2,%i1 ! x += stridex
+ faddd %f18,c3two44,%f20
+ st %f31,[%o3+4]
+
+.loop3:
+ sub %l3,%o4,%g5
+ sub %o5,%l3,%o7
+ fabss %f24,%f26
+ st %f5,[%fp+nk0]
+
+ orcc %o7,%g5,%g0
+ mov %i3,%o3 ! py3 = y
+ bl,pn %icc,.range3 ! hx < 0x3e400000 or > hx 0x4099251e
+! delay slot
+ st %f13,[%fp+nk1]
+
+!!! DONE?
+.cont:
+ srlx %o5,32,%o7
+ add %i3,%i4,%i3 ! y += stridey
+ fmovs %f3,%f1
+ st %f21,[%fp+nk2]
+
+ sub %o7,%l0,%l0
+ sub %o7,%l1,%l1
+ faddd %f26,c3two44,%f28
+ st %f29,[%fp+nk3]
+
+ sub %o7,%l2,%l2
+ sub %o7,%l3,%l3
+ fmovs %f11,%f9
+
+ or %l0,%l1,%l0
+ or %l2,%l3,%l2
+ fmovs %f19,%f17
+
+ fmovs %f27,%f25
+ fmuld %f0,invpio2,%f6 ! x * invpio2, for medium range
+
+ fmuld %f8,invpio2,%f14
+ ld [%fp+nk0],%l4
+
+ fmuld %f16,invpio2,%f22
+ ld [%fp+nk1],%l5
+
+ orcc %l0,%l2,%g0
+ bl,pn %icc,.medium
+! delay slot
+ fmuld %f24,invpio2,%f30
+ ld [%fp+nk2],%l6
+
+ ld [%fp+nk3],%l7
+ sll %l4,5,%l4 ! k
+ fcmpd %fcc0,%f0,pio2_3 ! x < pio2_3 iff x < 0
+
+ sll %l5,5,%l5
+ ldd [%l4+%g1],%f4
+ fcmpd %fcc1,%f8,pio2_3
+
+ sll %l6,5,%l6
+ ldd [%l5+%g1],%f12
+ fcmpd %fcc2,%f16,pio2_3
+
+ sll %l7,5,%l7
+ ldd [%l6+%g1],%f20
+ fcmpd %fcc3,%f24,pio2_3
+
+ ldd [%l7+%g1],%f28
+ fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k]
+
+ fsubd %f10,%f12,%f10
+
+ fsubd %f18,%f20,%f18
+
+ fsubd %f26,%f28,%f26
+
+ fmuld %f2,%f2,%f0 ! z = x * x
+
+ fmuld %f10,%f10,%f8
+
+ fmuld %f18,%f18,%f16
+
+ fmuld %f26,%f26,%f24
+
+ fmuld %f0,pp3,%f6
+
+ fmuld %f8,pp3,%f14
+
+ fmuld %f16,pp3,%f22
+
+ fmuld %f24,pp3,%f30
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f0,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f8,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f16,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f24,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f2,%f6,%f6
+
+ fmuld %f10,%f14,%f14
+
+ fmuld %f18,%f22,%f22
+
+ fmuld %f26,%f30,%f30
+
+ faddd %f6,%f2,%f6
+ fmuld %f0,%f4,%f4
+ ldd [%l4+16],%f2
+
+ faddd %f14,%f10,%f14
+ fmuld %f8,%f12,%f12
+ ldd [%l5+16],%f10
+
+ faddd %f22,%f18,%f22
+ fmuld %f16,%f20,%f20
+ ldd [%l6+16],%f18
+
+ faddd %f30,%f26,%f30
+ fmuld %f24,%f28,%f28
+ ldd [%l7+16],%f26
+
+ fmuld %f2,%f6,%f6
+
+ fmuld %f10,%f14,%f14
+
+ fmuld %f18,%f22,%f22
+
+ fmuld %f26,%f30,%f30
+
+ faddd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ fmovdl %fcc0,%f4,%f6 ! (hx < -0)? -s : s
+ st %f6,[%o0]
+
+ fmovdl %fcc1,%f12,%f14
+ st %f14,[%o1]
+
+ fmovdl %fcc2,%f20,%f22
+ st %f22,[%o2]
+
+ fmovdl %fcc3,%f28,%f30
+ st %f30,[%o3]
+ addcc %i0,-1,%i0
+
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+
+ .align 16
+.medium:
+ faddd %f6,c3two51,%f4
+ st %f5,[%fp+nk0]
+
+ faddd %f14,c3two51,%f12
+ st %f13,[%fp+nk1]
+
+ faddd %f22,c3two51,%f20
+ st %f21,[%fp+nk2]
+
+ faddd %f30,c3two51,%f28
+ st %f29,[%fp+nk3]
+
+ fsubd %f4,c3two51,%f6
+
+ fsubd %f12,c3two51,%f14
+
+ fsubd %f20,c3two51,%f22
+
+ fsubd %f28,c3two51,%f30
+
+ fmuld %f6,pio2_1,%f2
+ ld [%fp+nk0],%l0 ! n
+
+ fmuld %f14,pio2_1,%f10
+ ld [%fp+nk1],%l1
+
+ fmuld %f22,pio2_1,%f18
+ ld [%fp+nk2],%l2
+
+ fmuld %f30,pio2_1,%f26
+ ld [%fp+nk3],%l3
+
+ fsubd %f0,%f2,%f0
+ fmuld %f6,pio2_2,%f4
+
+ fsubd %f8,%f10,%f8
+ fmuld %f14,pio2_2,%f12
+
+ fsubd %f16,%f18,%f16
+ fmuld %f22,pio2_2,%f20
+
+ fsubd %f24,%f26,%f24
+ fmuld %f30,pio2_2,%f28
+
+ fsubd %f0,%f4,%f32
+
+ fsubd %f8,%f12,%f34
+
+ fsubd %f16,%f20,%f36
+
+ fsubd %f24,%f28,%f38
+
+ fsubd %f0,%f32,%f0
+ fcmple32 %f32,pio2_3,%l4 ! x <= pio2_3 iff x < 0
+
+ fsubd %f8,%f34,%f8
+ fcmple32 %f34,pio2_3,%l5
+
+ fsubd %f16,%f36,%f16
+ fcmple32 %f36,pio2_3,%l6
+
+ fsubd %f24,%f38,%f24
+ fcmple32 %f38,pio2_3,%l7
+
+ fsubd %f0,%f4,%f0
+ fmuld %f6,pio2_3,%f6
+ sll %l4,30,%l4 ! if (x < 0) n = -n ^ 2
+
+ fsubd %f8,%f12,%f8
+ fmuld %f14,pio2_3,%f14
+ sll %l5,30,%l5
+
+ fsubd %f16,%f20,%f16
+ fmuld %f22,pio2_3,%f22
+ sll %l6,30,%l6
+
+ fsubd %f24,%f28,%f24
+ fmuld %f30,pio2_3,%f30
+ sll %l7,30,%l7
+
+ fsubd %f6,%f0,%f6
+ sra %l4,31,%l4
+
+ fsubd %f14,%f8,%f14
+ sra %l5,31,%l5
+
+ fsubd %f22,%f16,%f22
+ sra %l6,31,%l6
+
+ fsubd %f30,%f24,%f30
+ sra %l7,31,%l7
+
+ fsubd %f32,%f6,%f0 ! reduced x
+ xor %l0,%l4,%l0
+
+ fsubd %f34,%f14,%f8
+ xor %l1,%l5,%l1
+
+ fsubd %f36,%f22,%f16
+ xor %l2,%l6,%l2
+
+ fsubd %f38,%f30,%f24
+ xor %l3,%l7,%l3
+
+ fabsd %f0,%f2
+ sub %l0,%l4,%l0
+
+ fabsd %f8,%f10
+ sub %l1,%l5,%l1
+
+ fabsd %f16,%f18
+ sub %l2,%l6,%l2
+
+ fabsd %f24,%f26
+ sub %l3,%l7,%l3
+
+ faddd %f2,c3two44,%f4
+ st %f5,[%fp+nk0]
+ and %l4,2,%l4
+
+ faddd %f10,c3two44,%f12
+ st %f13,[%fp+nk1]
+ and %l5,2,%l5
+
+ faddd %f18,c3two44,%f20
+ st %f21,[%fp+nk2]
+ and %l6,2,%l6
+
+ faddd %f26,c3two44,%f28
+ st %f29,[%fp+nk3]
+ and %l7,2,%l7
+
+ fsubd %f32,%f0,%f4
+ xor %l0,%l4,%l0
+
+ fsubd %f34,%f8,%f12
+ xor %l1,%l5,%l1
+
+ fsubd %f36,%f16,%f20
+ xor %l2,%l6,%l2
+
+ fsubd %f38,%f24,%f28
+ xor %l3,%l7,%l3
+
+ fzero %f38
+ ld [%fp+nk0],%l4
+
+ fsubd %f4,%f6,%f6 ! w
+ ld [%fp+nk1],%l5
+
+ fsubd %f12,%f14,%f14
+ ld [%fp+nk2],%l6
+
+ fnegd %f38,%f38
+ ld [%fp+nk3],%l7
+ sll %l4,5,%l4 ! k
+
+ fsubd %f20,%f22,%f22
+ sll %l5,5,%l5
+
+ fsubd %f28,%f30,%f30
+ sll %l6,5,%l6
+
+ fand %f0,%f38,%f32 ! sign bit of x
+ ldd [%l4+%g1],%f4
+ sll %l7,5,%l7
+
+ fand %f8,%f38,%f34
+ ldd [%l5+%g1],%f12
+
+ fand %f16,%f38,%f36
+ ldd [%l6+%g1],%f20
+
+ fand %f24,%f38,%f38
+ ldd [%l7+%g1],%f28
+
+ fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k]
+
+ fsubd %f10,%f12,%f10
+
+ fsubd %f18,%f20,%f18
+ nop
+
+ fsubd %f26,%f28,%f26
+ nop
+
+! 16-byte aligned
+ fmuld %f2,%f2,%f0 ! z = x * x
+ andcc %l0,1,%g0
+ bz,pn %icc,.case8
+! delay slot
+ fxor %f6,%f32,%f32
+
+ fmuld %f10,%f10,%f8
+ andcc %l1,1,%g0
+ bz,pn %icc,.case4
+! delay slot
+ fxor %f14,%f34,%f34
+
+ fmuld %f18,%f18,%f16
+ andcc %l2,1,%g0
+ bz,pn %icc,.case2
+! delay slot
+ fxor %f22,%f36,%f36
+
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case1
+! delay slot
+ fxor %f30,%f38,%f38
+
+!.case0:
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f28,%f28
+
+ fsubd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case1:
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f30,%f30
+
+ fsubd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case2:
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case3
+! delay slot
+ fxor %f30,%f38,%f38
+
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f28,%f28
+
+ fsubd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case3:
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f30,%f30
+
+ fsubd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case4:
+ fmuld %f18,%f18,%f16
+ andcc %l2,1,%g0
+ bz,pn %icc,.case6
+! delay slot
+ fxor %f22,%f36,%f36
+
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case5
+! delay slot
+ fxor %f30,%f38,%f38
+
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f28,%f28
+
+ fsubd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case5:
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f30,%f30
+
+ fsubd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case6:
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case7
+! delay slot
+ fxor %f30,%f38,%f38
+
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f28,%f28
+
+ fsubd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case7:
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ fmuld %f0,qq3,%f6 ! cos(x0)
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ faddd %f6,qq2,%f6
+ fmuld %f0,pp2,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,pp1,%f4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ faddd %f6,qq1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f4,%f4
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f32,%f4
+ ldd [%l4+16],%f0
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,%f2,%f4
+ ldd [%l4+8],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f4,%f4
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f30,%f30
+
+ fsubd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case8:
+ fmuld %f10,%f10,%f8
+ andcc %l1,1,%g0
+ bz,pn %icc,.case12
+! delay slot
+ fxor %f14,%f34,%f34
+
+ fmuld %f18,%f18,%f16
+ andcc %l2,1,%g0
+ bz,pn %icc,.case10
+! delay slot
+ fxor %f22,%f36,%f36
+
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case9
+! delay slot
+ fxor %f30,%f38,%f38
+
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ fmuld %f0,%f6,%f6
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f28,%f28
+
+ faddd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case9:
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f0,%f6,%f6
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f30,%f30
+
+ faddd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case10:
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case11
+! delay slot
+ fxor %f30,%f38,%f38
+
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ fmuld %f0,%f6,%f6
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f28,%f28
+
+ faddd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case11:
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ fmuld %f8,qq3,%f14 ! cos(x1)
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ faddd %f14,qq2,%f14
+ fmuld %f8,pp2,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,pp1,%f12
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f0,%f6,%f6
+
+ faddd %f14,qq1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f12,%f12
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f34,%f12
+ ldd [%l5+16],%f8
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,%f10,%f12
+ ldd [%l5+8],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f12,%f12
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f30,%f30
+
+ faddd %f6,%f4,%f6
+
+ fsubd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case12:
+ fmuld %f18,%f18,%f16
+ andcc %l2,1,%g0
+ bz,pn %icc,.case14
+! delay slot
+ fxor %f22,%f36,%f36
+
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case13
+! delay slot
+ fxor %f30,%f38,%f38
+
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ fmuld %f0,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f28,%f28
+
+ faddd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case13:
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ fmuld %f16,qq3,%f22 ! cos(x2)
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ faddd %f22,qq2,%f22
+ fmuld %f16,pp2,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,pp1,%f20
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f0,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+
+ faddd %f22,qq1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f20,%f20
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f36,%f20
+ ldd [%l6+16],%f16
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,%f18,%f20
+ ldd [%l6+8],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f20,%f20
+
+ fmuld %f38,%f30,%f30
+
+ faddd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ fsubd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case14:
+ fmuld %f26,%f26,%f24
+ andcc %l3,1,%g0
+ bz,pn %icc,.case15
+! delay slot
+ fxor %f30,%f38,%f38
+
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ fmuld %f24,qq3,%f30 ! cos(x3)
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ faddd %f30,qq2,%f30
+ fmuld %f24,pp2,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,pp1,%f28
+
+ fmuld %f0,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+
+ faddd %f30,qq1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f28,%f28
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f38,%f28
+ ldd [%l7+16],%f24
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,%f26,%f28
+ ldd [%l7+8],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f28,%f28
+
+ faddd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ fsubd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case15:
+ fmuld %f0,pp3,%f6 ! sin(x0)
+
+ fmuld %f8,pp3,%f14 ! sin(x1)
+
+ fmuld %f16,pp3,%f22 ! sin(x2)
+
+ fmuld %f24,pp3,%f30 ! sin(x3)
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq2,%f4
+
+ faddd %f14,pp2,%f14
+ fmuld %f8,qq2,%f12
+
+ faddd %f22,pp2,%f22
+ fmuld %f16,qq2,%f20
+
+ faddd %f30,pp2,%f30
+ fmuld %f24,qq2,%f28
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ fmuld %f8,%f14,%f14
+ faddd %f12,qq1,%f12
+
+ fmuld %f16,%f22,%f22
+ faddd %f20,qq1,%f20
+
+ fmuld %f24,%f30,%f30
+ faddd %f28,qq1,%f28
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l4,%g1,%l4
+
+ faddd %f14,pp1,%f14
+ fmuld %f8,%f12,%f12
+ add %l5,%g1,%l5
+
+ faddd %f22,pp1,%f22
+ fmuld %f16,%f20,%f20
+ add %l6,%g1,%l6
+
+ faddd %f30,pp1,%f30
+ fmuld %f24,%f28,%f28
+ add %l7,%g1,%l7
+
+ fmuld %f0,%f6,%f6
+
+ fmuld %f8,%f14,%f14
+
+ fmuld %f16,%f22,%f22
+
+ fmuld %f24,%f30,%f30
+
+ fmuld %f2,%f6,%f6
+ ldd [%l4+8],%f0
+
+ fmuld %f10,%f14,%f14
+ ldd [%l5+8],%f8
+
+ fmuld %f18,%f22,%f22
+ ldd [%l6+8],%f16
+
+ fmuld %f26,%f30,%f30
+ ldd [%l7+8],%f24
+
+ fmuld %f0,%f4,%f4
+ faddd %f32,%f6,%f6
+
+ fmuld %f8,%f12,%f12
+ faddd %f34,%f14,%f14
+
+ fmuld %f16,%f20,%f20
+ faddd %f36,%f22,%f22
+
+ fmuld %f24,%f28,%f28
+ faddd %f38,%f30,%f30
+
+ faddd %f2,%f6,%f6
+ ldd [%l4+16],%f32
+
+ faddd %f10,%f14,%f14
+ ldd [%l5+16],%f34
+
+ faddd %f18,%f22,%f22
+ ldd [%l6+16],%f36
+
+ faddd %f26,%f30,%f30
+ ldd [%l7+16],%f38
+
+ fmuld %f32,%f6,%f6
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f22,%f22
+
+ fmuld %f38,%f30,%f30
+
+ faddd %f6,%f4,%f6
+
+ faddd %f14,%f12,%f14
+
+ faddd %f22,%f20,%f22
+
+ faddd %f30,%f28,%f30
+
+ faddd %f6,%f0,%f6
+
+ faddd %f14,%f8,%f14
+
+ faddd %f22,%f16,%f22
+
+ faddd %f30,%f24,%f30
+ mov %l0,%l4
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f14,%f12
+ lda [%i1]%asi,%f0
+
+ fnegd %f22,%f20
+ lda [%i1+4]%asi,%f3
+
+ fnegd %f30,%f28
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ andcc %l4,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f12,%f14
+ st %f14,[%o1]
+
+ andcc %l2,2,%g0
+ fmovdnz %icc,%f20,%f22
+ st %f22,[%o2]
+
+ andcc %l3,2,%g0
+ fmovdnz %icc,%f28,%f30
+ st %f30,[%o3]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+
+ .align 16
+.end:
+ st %f15,[%o1+4]
+ st %f23,[%o2+4]
+ st %f31,[%o3+4]
+ ld [%fp+biguns],%i5
+ tst %i5 ! check for huge arguments remaining
+ be,pt %icc,.exit
+! delay slot
+ nop
+#ifdef __sparcv9
+ ldx [%fp+xsave],%o1
+ ldx [%fp+ysave],%o3
+#else
+ ld [%fp+xsave],%o1
+ ld [%fp+ysave],%o3
+#endif
+ ld [%fp+nsave],%o0
+ ld [%fp+sxsave],%o2
+ ld [%fp+sysave],%o4
+ sra %o2,0,%o2 ! sign-extend for V9
+ sra %o4,0,%o4
+ call __vlibm_vsin_big_ultra3
+ sra %o5,0,%o5 ! delay slot
+
+.exit:
+ ret
+ restore
+
+
+ .align 16
+.last1:
+ faddd %f2,c3two44,%f4
+ st %f15,[%o1+4]
+.last1_from_range1:
+ mov 0,%l1
+ fzeros %f8
+ fzero %f10
+ add %fp,junk,%o1
+.last2:
+ faddd %f10,c3two44,%f12
+ st %f23,[%o2+4]
+.last2_from_range2:
+ mov 0,%l2
+ fzeros %f16
+ fzero %f18
+ add %fp,junk,%o2
+.last3:
+ faddd %f18,c3two44,%f20
+ st %f31,[%o3+4]
+ st %f5,[%fp+nk0]
+ st %f13,[%fp+nk1]
+.last3_from_range3:
+ mov 0,%l3
+ fzeros %f24
+ fzero %f26
+ ba,pt %icc,.cont
+! delay slot
+ add %fp,junk,%o3
+
+
+ .align 16
+.range0:
+ cmp %l0,%o4
+ bl,pt %icc,1f ! hx < 0x3e400000
+! delay slot, harmless if branch taken
+ sethi %hi(0x7ff00000),%o7
+ cmp %l0,%o7
+ bl,a,pt %icc,2f ! branch if finite
+! delay slot, squashed if branch not taken
+ st %o4,[%fp+biguns] ! set biguns
+ fzero %f0
+ fmuld %f2,%f0,%f2
+ st %f2,[%o0]
+ ba,pt %icc,2f
+! delay slot
+ st %f3,[%o0+4]
+1:
+ fdtoi %f2,%f4 ! raise inexact if not zero
+ st %f0,[%o0]
+ st %f3,[%o0+4]
+2:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.end
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! y += stridey
+ andn %l1,%i5,%l0 ! hx &= ~0x80000000
+ fmovs %f8,%f0
+ fmovs %f11,%f3
+ ba,pt %icc,.loop0
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 16
+.range1:
+ cmp %l1,%o4
+ bl,pt %icc,1f ! hx < 0x3e400000
+! delay slot, harmless if branch taken
+ sethi %hi(0x7ff00000),%o7
+ cmp %l1,%o7
+ bl,a,pt %icc,2f ! branch if finite
+! delay slot, squashed if branch not taken
+ st %o4,[%fp+biguns] ! set biguns
+ fzero %f8
+ fmuld %f10,%f8,%f10
+ st %f10,[%o1]
+ ba,pt %icc,2f
+! delay slot
+ st %f11,[%o1+4]
+1:
+ fdtoi %f10,%f12 ! raise inexact if not zero
+ st %f8,[%o1]
+ st %f11,[%o1+4]
+2:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.last1_from_range1
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! y += stridey
+ andn %l2,%i5,%l1 ! hx &= ~0x80000000
+ fmovs %f16,%f8
+ fmovs %f19,%f11
+ ba,pt %icc,.loop1
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 16
+.range2:
+ cmp %l2,%o4
+ bl,pt %icc,1f ! hx < 0x3e400000
+! delay slot, harmless if branch taken
+ sethi %hi(0x7ff00000),%o7
+ cmp %l2,%o7
+ bl,a,pt %icc,2f ! branch if finite
+! delay slot, squashed if branch not taken
+ st %o4,[%fp+biguns] ! set biguns
+ fzero %f16
+ fmuld %f18,%f16,%f18
+ st %f18,[%o2]
+ ba,pt %icc,2f
+! delay slot
+ st %f19,[%o2+4]
+1:
+ fdtoi %f18,%f20 ! raise inexact if not zero
+ st %f16,[%o2]
+ st %f19,[%o2+4]
+2:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.last2_from_range2
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! y += stridey
+ andn %l3,%i5,%l2 ! hx &= ~0x80000000
+ fmovs %f24,%f16
+ fmovs %f27,%f19
+ ba,pt %icc,.loop2
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 16
+.range3:
+ cmp %l3,%o4
+ bl,pt %icc,1f ! hx < 0x3e400000
+! delay slot, harmless if branch taken
+ sethi %hi(0x7ff00000),%o7
+ cmp %l3,%o7
+ bl,a,pt %icc,2f ! branch if finite
+! delay slot, squashed if branch not taken
+ st %o4,[%fp+biguns] ! set biguns
+ fzero %f24
+ fmuld %f26,%f24,%f26
+ st %f26,[%o3]
+ ba,pt %icc,2f
+! delay slot
+ st %f27,[%o3+4]
+1:
+ fdtoi %f26,%f28 ! raise inexact if not zero
+ st %f24,[%o3]
+ st %f27,[%o3+4]
+2:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.last3_from_range3
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! y += stridey
+ ld [%i1],%l3
+ ld [%i1],%f24
+ ld [%i1+4],%f27
+ andn %l3,%i5,%l3 ! hx &= ~0x80000000
+ ba,pt %icc,.loop3
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+ SET_SIZE(__vsin_ultra3)
+
diff --git a/usr/src/libm/src/mvec/vis/__vsincos.S b/usr/src/libm/src/mvec/vis/__vsincos.S
new file mode 100644
index 0000000..c01b394
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vsincos.S
@@ -0,0 +1,958 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vsincos.S 1.6 06/01/23 SMI"
+
+ .file "__vsincos.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 64
+constants:
+ .word 0x42c80000,0x00000000 ! 3 * 2^44
+ .word 0x43380000,0x00000000 ! 3 * 2^51
+ .word 0x3fe45f30,0x6dc9c883 ! invpio2
+ .word 0x3ff921fb,0x54442c00 ! pio2_1
+ .word 0x3d318469,0x898cc400 ! pio2_2
+ .word 0x3a71701b,0x839a2520 ! pio2_3
+ .word 0xbfc55555,0x55555533 ! pp1
+ .word 0x3f811111,0x10e7d53b ! pp2
+ .word 0xbf2a0167,0xe6b3cf9b ! pp3
+ .word 0xbfdfffff,0xffffff65 ! qq1
+ .word 0x3fa55555,0x54f88ed0 ! qq2
+ .word 0xbf56c12c,0xdd185f60 ! qq3
+
+! local storage indices
+
+#define xsave STACK_BIAS-0x8
+#define ssave STACK_BIAS-0x10
+#define csave STACK_BIAS-0x18
+#define nsave STACK_BIAS-0x1c
+#define sxsave STACK_BIAS-0x20
+#define sssave STACK_BIAS-0x24
+#define biguns STACK_BIAS-0x28
+#define junk STACK_BIAS-0x30
+#define nk2 STACK_BIAS-0x38
+#define nk1 STACK_BIAS-0x3c
+#define nk0 STACK_BIAS-0x40
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x40
+
+! register use
+
+! i0 n
+! i1 x
+! i2 stridex
+! i3 s
+! i4 strides
+! i5 0x80000000,n0
+
+! l0 hx0,k0
+! l1 hx1,k1
+! l2 hx2,k2
+! l3 c
+! l4 pc0
+! l5 pc1
+! l6 pc2
+! l7 stridec
+
+! the following are 64-bit registers in both V8+ and V9
+
+! g1 __vlibm_TBL_sincos2
+! g5 scratch,n1
+
+! o0 ps0
+! o1 ps1
+! o2 ps2
+! o3 0x3fe921fb
+! o4 0x3e400000
+! o5 0x4099251e
+! o7 scratch,n2
+
+! f0 x0,z0
+! f2 abs(x0)
+! f4
+! f6
+! f8
+! f10 x1,z1
+! f12 abs(x1)
+! f14
+! f16
+! f18
+! f20 x2,z2
+! f22 abs(x2)
+! f24
+! f26
+! f28
+! f30
+! f32
+! f34
+! f36
+! f38
+
+#define c3two44 %f40
+#define c3two51 %f42
+#define invpio2 %f44
+#define pio2_1 %f46
+#define pio2_2 %f48
+#define pio2_3 %f50
+#define pp1 %f52
+#define pp2 %f54
+#define pp3 %f56
+#define qq1 %f58
+#define qq2 %f60
+#define qq3 %f62
+
+ ENTRY(__vsincos)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,constants,o0)
+ PIC_SET(l7,__vlibm_TBL_sincos2,o1)
+ mov %o1,%g1
+ wr %g0,0x82,%asi ! set %asi for non-faulting loads
+#ifdef __sparcv9
+ stx %i1,[%fp+xsave] ! save arguments
+ stx %i3,[%fp+ssave]
+ stx %i5,[%fp+csave]
+ ldx [%fp+STACK_BIAS+0xb0],%l7
+#else
+ st %i1,[%fp+xsave] ! save arguments
+ st %i3,[%fp+ssave]
+ st %i5,[%fp+csave]
+ ld [%fp+0x5c],%l7
+#endif
+ st %i0,[%fp+nsave]
+ st %i2,[%fp+sxsave]
+ st %i4,[%fp+sssave]
+ mov %i5,%l3
+ st %g0,[%fp+biguns] ! biguns = 0
+ ldd [%o0+0x00],c3two44 ! load/set up constants
+ ldd [%o0+0x08],c3two51
+ ldd [%o0+0x10],invpio2
+ ldd [%o0+0x18],pio2_1
+ ldd [%o0+0x20],pio2_2
+ ldd [%o0+0x28],pio2_3
+ ldd [%o0+0x30],pp1
+ ldd [%o0+0x38],pp2
+ ldd [%o0+0x40],pp3
+ ldd [%o0+0x48],qq1
+ ldd [%o0+0x50],qq2
+ ldd [%o0+0x58],qq3
+ sethi %hi(0x80000000),%i5
+ sethi %hi(0x3e400000),%o4
+ sethi %hi(0x3fe921fb),%o3
+ or %o3,%lo(0x3fe921fb),%o3
+ sethi %hi(0x4099251e),%o5
+ or %o5,%lo(0x4099251e),%o5
+ sll %i2,3,%i2 ! scale strides
+ sll %i4,3,%i4
+ sll %l7,3,%l7
+ add %fp,junk,%o0 ! loop prologue
+ add %fp,junk,%o1
+ add %fp,junk,%o2
+ ld [%i1],%l0 ! *x
+ ld [%i1],%f0
+ ld [%i1+4],%f3
+ andn %l0,%i5,%l0 ! mask off sign
+ ba .loop0
+ add %i1,%i2,%i1 ! x += stridex
+
+! 16-byte aligned
+ .align 16
+.loop0:
+ lda [%i1]%asi,%l1 ! preload next argument
+ sub %l0,%o4,%g5
+ sub %o5,%l0,%o7
+ fabss %f0,%f2
+
+ lda [%i1]%asi,%f10
+ orcc %o7,%g5,%g0
+ mov %i3,%o0 ! ps0 = s
+ bl,pn %icc,.range0 ! hx < 0x3e400000 or hx > 0x4099251e
+
+! delay slot
+ lda [%i1+4]%asi,%f13
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! s += strides
+
+ mov %l3,%l4 ! pc0 = c
+ add %l3,%l7,%l3 ! c += stridec
+ ble,pn %icc,.last1
+
+! delay slot
+ andn %l1,%i5,%l1
+ add %i1,%i2,%i1 ! x += stridex
+ faddd %f2,c3two44,%f4
+ st %f17,[%o1+4]
+
+.loop1:
+ lda [%i1]%asi,%l2 ! preload next argument
+ sub %l1,%o4,%g5
+ sub %o5,%l1,%o7
+ fabss %f10,%f12
+
+ lda [%i1]%asi,%f20
+ orcc %o7,%g5,%g0
+ mov %i3,%o1 ! ps1 = s
+ bl,pn %icc,.range1 ! hx < 0x3e400000 or hx > 0x4099251e
+
+! delay slot
+ lda [%i1+4]%asi,%f23
+ addcc %i0,-1,%i0
+ add %i3,%i4,%i3 ! s += strides
+
+ mov %l3,%l5 ! pc1 = c
+ add %l3,%l7,%l3 ! c += stridec
+ ble,pn %icc,.last2
+
+! delay slot
+ andn %l2,%i5,%l2
+ add %i1,%i2,%i1 ! x += stridex
+ faddd %f12,c3two44,%f14
+ st %f27,[%o2+4]
+
+.loop2:
+ sub %l2,%o4,%g5
+ sub %o5,%l2,%o7
+ fabss %f20,%f22
+ st %f5,[%fp+nk0]
+
+ orcc %o7,%g5,%g0
+ mov %i3,%o2 ! ps2 = s
+ bl,pn %icc,.range2 ! hx < 0x3e400000 or hx > 0x4099251e
+! delay slot
+ st %f15,[%fp+nk1]
+
+ mov %l3,%l6 ! pc2 = c
+
+.cont:
+ add %i3,%i4,%i3 ! s += strides
+ add %l3,%l7,%l3 ! c += stridec
+ faddd %f22,c3two44,%f24
+ st %f25,[%fp+nk2]
+
+ sub %o3,%l0,%l0
+ sub %o3,%l1,%l1
+ fmovs %f3,%f1
+
+ sub %o3,%l2,%l2
+ fmovs %f13,%f11
+
+ or %l0,%l1,%l0
+ orcc %l0,%l2,%g0
+ fmovs %f23,%f21
+
+ fmuld %f0,invpio2,%f6 ! x * invpio2, for medium range
+
+ fmuld %f10,invpio2,%f16
+ ld [%fp+nk0],%l0
+
+ fmuld %f20,invpio2,%f26
+ ld [%fp+nk1],%l1
+
+ bl,pn %icc,.medium
+! delay slot
+ ld [%fp+nk2],%l2
+
+ sll %l0,5,%l0 ! k
+ fcmpd %fcc0,%f0,pio2_3 ! x < pio2_3 iff x < 0
+
+ sll %l1,5,%l1
+ ldd [%l0+%g1],%f4
+ fcmpd %fcc1,%f10,pio2_3
+
+ sll %l2,5,%l2
+ ldd [%l1+%g1],%f14
+ fcmpd %fcc2,%f20,pio2_3
+
+ ldd [%l2+%g1],%f24
+
+ fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k]
+
+ fsubd %f12,%f14,%f12
+
+ fsubd %f22,%f24,%f22
+
+ fmuld %f2,%f2,%f0 ! z = x * x
+
+ fmuld %f12,%f12,%f10
+
+ fmuld %f22,%f22,%f20
+
+ fmuld %f0,pp3,%f6
+
+ fmuld %f10,pp3,%f16
+
+ fmuld %f20,pp3,%f26
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq3,%f4
+
+ faddd %f16,pp2,%f16
+ fmuld %f10,qq3,%f14
+
+ faddd %f26,pp2,%f26
+ fmuld %f20,qq3,%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq2,%f4
+
+ fmuld %f10,%f16,%f16
+ faddd %f14,qq2,%f14
+
+ fmuld %f20,%f26,%f26
+ faddd %f24,qq2,%f24
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l0,%g1,%l0
+
+ faddd %f16,pp1,%f16
+ fmuld %f10,%f14,%f14
+ add %l1,%g1,%l1
+
+ faddd %f26,pp1,%f26
+ fmuld %f20,%f24,%f24
+ add %l2,%g1,%l2
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ fmuld %f10,%f16,%f16
+ faddd %f14,qq1,%f14
+
+ fmuld %f20,%f26,%f26
+ faddd %f24,qq1,%f24
+
+ fmuld %f2,%f6,%f6
+ ldd [%l0+8],%f8
+
+ fmuld %f12,%f16,%f16
+ ldd [%l1+8],%f18
+
+ fmuld %f22,%f26,%f26
+ ldd [%l2+8],%f28
+
+ faddd %f6,%f2,%f6
+ fmuld %f0,%f4,%f4
+ ldd [%l0+16],%f30
+
+ faddd %f16,%f12,%f16
+ fmuld %f10,%f14,%f14
+ ldd [%l1+16],%f32
+
+ faddd %f26,%f22,%f26
+ fmuld %f20,%f24,%f24
+ ldd [%l2+16],%f34
+
+ fmuld %f8,%f6,%f0 ! s * spoly
+
+ fmuld %f18,%f16,%f10
+
+ fmuld %f28,%f26,%f20
+
+ fmuld %f30,%f4,%f2 ! c * cpoly
+
+ fmuld %f32,%f14,%f12
+
+ fmuld %f34,%f24,%f22
+
+ fmuld %f30,%f6,%f6 ! c * spoly
+ fsubd %f2,%f0,%f2
+
+ fmuld %f32,%f16,%f16
+ fsubd %f12,%f10,%f12
+
+ fmuld %f34,%f26,%f26
+ fsubd %f22,%f20,%f22
+
+ fmuld %f8,%f4,%f4 ! s * cpoly
+ faddd %f2,%f30,%f2
+ st %f2,[%l4]
+
+ fmuld %f18,%f14,%f14
+ faddd %f12,%f32,%f12
+ st %f3,[%l4+4]
+
+ fmuld %f28,%f24,%f24
+ faddd %f22,%f34,%f22
+ st %f12,[%l5]
+
+ faddd %f6,%f4,%f6
+ st %f13,[%l5+4]
+
+ faddd %f16,%f14,%f16
+ st %f22,[%l6]
+
+ faddd %f26,%f24,%f26
+ st %f23,[%l6+4]
+
+ faddd %f6,%f8,%f6
+
+ faddd %f16,%f18,%f16
+
+ faddd %f26,%f28,%f26
+
+ fnegd %f6,%f4
+ lda [%i1]%asi,%l0 ! preload next argument
+
+ fnegd %f16,%f14
+ lda [%i1]%asi,%f0
+
+ fnegd %f26,%f24
+ lda [%i1+4]%asi,%f3
+ andn %l0,%i5,%l0
+ add %i1,%i2,%i1
+
+ fmovdl %fcc0,%f4,%f6 ! (hx < -0)? -s : s
+ st %f6,[%o0]
+
+ fmovdl %fcc1,%f14,%f16
+ st %f16,[%o1]
+
+ fmovdl %fcc2,%f24,%f26
+ st %f26,[%o2]
+ addcc %i0,-1,%i0
+
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+
+ .align 16
+.medium:
+ faddd %f6,c3two51,%f4
+ st %f5,[%fp+nk0]
+
+ faddd %f16,c3two51,%f14
+ st %f15,[%fp+nk1]
+
+ faddd %f26,c3two51,%f24
+ st %f25,[%fp+nk2]
+
+ fsubd %f4,c3two51,%f6
+
+ fsubd %f14,c3two51,%f16
+
+ fsubd %f24,c3two51,%f26
+
+ fmuld %f6,pio2_1,%f2
+ ld [%fp+nk0],%i5 ! n
+
+ fmuld %f16,pio2_1,%f12
+ ld [%fp+nk1],%g5
+
+ fmuld %f26,pio2_1,%f22
+ ld [%fp+nk2],%o7
+
+ fsubd %f0,%f2,%f0
+ fmuld %f6,pio2_2,%f4
+ mov %o0,%o4 ! if (n & 1) swap ps, pc
+ andcc %i5,1,%g0
+
+ fsubd %f10,%f12,%f10
+ fmuld %f16,pio2_2,%f14
+ movnz %icc,%l4,%o0
+ and %i5,3,%i5
+
+ fsubd %f20,%f22,%f20
+ fmuld %f26,pio2_2,%f24
+ movnz %icc,%o4,%l4
+
+ fsubd %f0,%f4,%f30
+ mov %o1,%o4
+ andcc %g5,1,%g0
+
+ fsubd %f10,%f14,%f32
+ movnz %icc,%l5,%o1
+ and %g5,3,%g5
+
+ fsubd %f20,%f24,%f34
+ movnz %icc,%o4,%l5
+
+ fsubd %f0,%f30,%f0
+ fcmple32 %f30,pio2_3,%l0 ! x <= pio2_3 iff x < 0
+ mov %o2,%o4
+ andcc %o7,1,%g0
+
+ fsubd %f10,%f32,%f10
+ fcmple32 %f32,pio2_3,%l1
+ movnz %icc,%l6,%o2
+ and %o7,3,%o7
+
+ fsubd %f20,%f34,%f20
+ fcmple32 %f34,pio2_3,%l2
+ movnz %icc,%o4,%l6
+
+ fsubd %f0,%f4,%f0
+ fmuld %f6,pio2_3,%f6
+ add %i5,1,%o4 ! n = (n >> 1) | (((n + 1) ^ l) & 2)
+ srl %i5,1,%i5
+
+ fsubd %f10,%f14,%f10
+ fmuld %f16,pio2_3,%f16
+ xor %o4,%l0,%o4
+
+ fsubd %f20,%f24,%f20
+ fmuld %f26,pio2_3,%f26
+ and %o4,2,%o4
+
+ fsubd %f6,%f0,%f6
+ or %i5,%o4,%i5
+
+ fsubd %f16,%f10,%f16
+ add %g5,1,%o4
+ srl %g5,1,%g5
+
+ fsubd %f26,%f20,%f26
+ xor %o4,%l1,%o4
+
+ fsubd %f30,%f6,%f0 ! reduced x
+ and %o4,2,%o4
+
+ fsubd %f32,%f16,%f10
+ or %g5,%o4,%g5
+
+ fsubd %f34,%f26,%f20
+ add %o7,1,%o4
+ srl %o7,1,%o7
+
+ fzero %f38
+ xor %o4,%l2,%o4
+
+ fabsd %f0,%f2
+ and %o4,2,%o4
+
+ fabsd %f10,%f12
+ or %o7,%o4,%o7
+
+ fabsd %f20,%f22
+ sethi %hi(0x3e400000),%o4
+
+ fnegd %f38,%f38
+
+ faddd %f2,c3two44,%f4
+ st %f5,[%fp+nk0]
+
+ faddd %f12,c3two44,%f14
+ st %f15,[%fp+nk1]
+
+ faddd %f22,c3two44,%f24
+ st %f25,[%fp+nk2]
+
+ fsubd %f30,%f0,%f4
+
+ fsubd %f32,%f10,%f14
+
+ fsubd %f34,%f20,%f24
+
+ fsubd %f4,%f6,%f6 ! w
+ ld [%fp+nk0],%l0
+
+ fsubd %f14,%f16,%f16
+ ld [%fp+nk1],%l1
+
+ fsubd %f24,%f26,%f26
+ ld [%fp+nk2],%l2
+ sll %l0,5,%l0 ! k
+
+ fand %f0,%f38,%f30 ! sign bit of x
+ ldd [%l0+%g1],%f4
+ sll %l1,5,%l1
+
+ fand %f10,%f38,%f32
+ ldd [%l1+%g1],%f14
+ sll %l2,5,%l2
+
+ fand %f20,%f38,%f34
+ ldd [%l2+%g1],%f24
+
+ fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k]
+
+ fsubd %f12,%f14,%f12
+
+ fsubd %f22,%f24,%f22
+
+ fmuld %f2,%f2,%f0 ! z = x * x
+ fxor %f6,%f30,%f30
+
+ fmuld %f12,%f12,%f10
+ fxor %f16,%f32,%f32
+
+ fmuld %f22,%f22,%f20
+ fxor %f26,%f34,%f34
+
+ fmuld %f0,pp3,%f6
+
+ fmuld %f10,pp3,%f16
+
+ fmuld %f20,pp3,%f26
+
+ faddd %f6,pp2,%f6
+ fmuld %f0,qq3,%f4
+
+ faddd %f16,pp2,%f16
+ fmuld %f10,qq3,%f14
+
+ faddd %f26,pp2,%f26
+ fmuld %f20,qq3,%f24
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq2,%f4
+
+ fmuld %f10,%f16,%f16
+ faddd %f14,qq2,%f14
+
+ fmuld %f20,%f26,%f26
+ faddd %f24,qq2,%f24
+
+ faddd %f6,pp1,%f6
+ fmuld %f0,%f4,%f4
+ add %l0,%g1,%l0
+
+ faddd %f16,pp1,%f16
+ fmuld %f10,%f14,%f14
+ add %l1,%g1,%l1
+
+ faddd %f26,pp1,%f26
+ fmuld %f20,%f24,%f24
+ add %l2,%g1,%l2
+
+ fmuld %f0,%f6,%f6
+ faddd %f4,qq1,%f4
+
+ fmuld %f10,%f16,%f16
+ faddd %f14,qq1,%f14
+
+ fmuld %f20,%f26,%f26
+ faddd %f24,qq1,%f24
+
+ fmuld %f2,%f6,%f6
+ ldd [%l0+16],%f8
+
+ fmuld %f12,%f16,%f16
+ ldd [%l1+16],%f18
+
+ fmuld %f22,%f26,%f26
+ ldd [%l2+16],%f28
+
+ faddd %f6,%f30,%f6
+ fmuld %f0,%f4,%f4
+ ldd [%l0+8],%f30
+
+ faddd %f16,%f32,%f16
+ fmuld %f10,%f14,%f14
+ ldd [%l1+8],%f32
+
+ faddd %f26,%f34,%f26
+ fmuld %f20,%f24,%f24
+ ldd [%l2+8],%f34
+
+ fmuld %f8,%f4,%f0 ! c * cpoly
+ faddd %f6,%f2,%f6
+
+ fmuld %f18,%f14,%f10
+ faddd %f16,%f12,%f16
+
+ fmuld %f28,%f24,%f20
+ faddd %f26,%f22,%f26
+
+ fmuld %f30,%f6,%f2 ! s * spoly
+
+ fmuld %f32,%f16,%f12
+
+ fmuld %f34,%f26,%f22
+
+ fmuld %f8,%f6,%f6 ! c * spoly
+ fsubd %f0,%f2,%f2
+
+ fmuld %f18,%f16,%f16
+ fsubd %f10,%f12,%f12
+
+ fmuld %f28,%f26,%f26
+ fsubd %f20,%f22,%f22
+
+ fmuld %f30,%f4,%f4 ! s * cpoly
+ faddd %f8,%f2,%f8
+
+ fmuld %f32,%f14,%f14
+ faddd %f18,%f12,%f18
+
+ fmuld %f34,%f24,%f24
+ faddd %f28,%f22,%f28
+
+ faddd %f4,%f6,%f6
+
+ faddd %f14,%f16,%f16
+
+ faddd %f24,%f26,%f26
+
+ faddd %f30,%f6,%f6 ! now %f6 = sin |x|, %f8 = cos |x|
+
+ faddd %f32,%f16,%f16
+
+ faddd %f34,%f26,%f26
+
+ fnegd %f8,%f4 ! if (n & 1) c = -c
+ lda [%i1]%asi,%l0 ! preload next argument
+ mov %i5,%l1
+
+ fnegd %f18,%f14
+ lda [%i1]%asi,%f0
+ sethi %hi(0x80000000),%i5
+
+ fnegd %f28,%f24
+ lda [%i1+4]%asi,%f3
+
+ andcc %l1,1,%g0
+ fmovdnz %icc,%f4,%f8
+ st %f8,[%l4]
+
+ andcc %g5,1,%g0
+ fmovdnz %icc,%f14,%f18
+ st %f9,[%l4+4]
+
+ andcc %o7,1,%g0
+ fmovdnz %icc,%f24,%f28
+ st %f18,[%l5]
+
+ fnegd %f6,%f4 ! if (n & 2) s = -s
+ st %f19,[%l5+4]
+ andn %l0,%i5,%l0
+
+ fnegd %f16,%f14
+ st %f28,[%l6]
+ add %i1,%i2,%i1
+
+ fnegd %f26,%f24
+ st %f29,[%l6+4]
+
+ andcc %l1,2,%g0
+ fmovdnz %icc,%f4,%f6
+ st %f6,[%o0]
+
+ andcc %g5,2,%g0
+ fmovdnz %icc,%f14,%f16
+ st %f16,[%o1]
+
+ andcc %o7,2,%g0
+ fmovdnz %icc,%f24,%f26
+ st %f26,[%o2]
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.loop0
+! delay slot
+ st %f7,[%o0+4]
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+
+ .align 16
+.end:
+ st %f17,[%o1+4]
+ st %f27,[%o2+4]
+ ld [%fp+biguns],%i5
+ tst %i5 ! check for huge arguments remaining
+ be,pt %icc,.exit
+! delay slot
+ nop
+#ifdef __sparcv9
+ stx %o5,[%sp+STACK_BIAS+0xb8]
+ ldx [%fp+xsave],%o1
+ ldx [%fp+ssave],%o3
+ ldx [%fp+csave],%o5
+ ldx [%fp+STACK_BIAS+0xb0],%i5
+ stx %i5,[%sp+STACK_BIAS+0xb0]
+#else
+ st %o5,[%sp+0x60]
+ ld [%fp+xsave],%o1
+ ld [%fp+ssave],%o3
+ ld [%fp+csave],%o5
+ ld [%fp+0x5c],%i5
+ st %i5,[%sp+0x5c]
+#endif
+ ld [%fp+nsave],%o0
+ ld [%fp+sxsave],%o2
+ ld [%fp+sssave],%o4
+ sra %o2,0,%o2 ! sign-extend for V9
+ call __vlibm_vsincos_big
+ sra %o4,0,%o4 ! delay slot
+
+.exit:
+ ret
+ restore
+
+
+ .align 16
+.last1:
+ faddd %f2,c3two44,%f4
+ st %f17,[%o1+4]
+.last1_from_range1:
+ mov 0,%l1
+ fzeros %f10
+ fzero %f12
+ add %fp,junk,%o1
+ add %fp,junk,%l5
+.last2:
+ faddd %f12,c3two44,%f14
+ st %f27,[%o2+4]
+ st %f5,[%fp+nk0]
+ st %f15,[%fp+nk1]
+.last2_from_range2:
+ mov 0,%l2
+ fzeros %f20
+ fzero %f22
+ add %fp,junk,%o2
+ ba,pt %icc,.cont
+! delay slot
+ add %fp,junk,%l6
+
+
+ .align 16
+.range0:
+ cmp %l0,%o4
+ bl,pt %icc,1f ! hx < 0x3e400000
+! delay slot, harmless if branch taken
+ sethi %hi(0x7ff00000),%o7
+ cmp %l0,%o7
+ bl,a,pt %icc,2f ! branch if finite
+! delay slot, squashed if branch not taken
+ st %o4,[%fp+biguns] ! set biguns
+ fzero %f0
+ fmuld %f2,%f0,%f2
+ st %f2,[%o0]
+ st %f3,[%o0+4]
+ st %f2,[%l3]
+ ba,pt %icc,2f
+! delay slot
+ st %f3,[%l3+4]
+1:
+ fdtoi %f2,%f4 ! raise inexact if not zero
+ st %f0,[%o0]
+ st %f3,[%o0+4]
+ sethi %hi(0x3ff00000),%g5
+ st %g5,[%l3]
+ st %g0,[%l3+4]
+2:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.end
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! s += strides
+ add %l3,%l7,%l3 ! c += stridec
+ andn %l1,%i5,%l0 ! hx &= ~0x80000000
+ fmovs %f10,%f0
+ fmovs %f13,%f3
+ ba,pt %icc,.loop0
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 16
+.range1:
+ cmp %l1,%o4
+ bl,pt %icc,1f ! hx < 0x3e400000
+! delay slot, harmless if branch taken
+ sethi %hi(0x7ff00000),%o7
+ cmp %l1,%o7
+ bl,a,pt %icc,2f ! branch if finite
+! delay slot, squashed if branch not taken
+ st %o4,[%fp+biguns] ! set biguns
+ fzero %f10
+ fmuld %f12,%f10,%f12
+ st %f12,[%o1]
+ st %f13,[%o1+4]
+ st %f12,[%l3]
+ ba,pt %icc,2f
+! delay slot
+ st %f13,[%l3+4]
+1:
+ fdtoi %f12,%f14 ! raise inexact if not zero
+ st %f10,[%o1]
+ st %f13,[%o1+4]
+ sethi %hi(0x3ff00000),%g5
+ st %g5,[%l3]
+ st %g0,[%l3+4]
+2:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.last1_from_range1
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! s += strides
+ add %l3,%l7,%l3 ! c += stridec
+ andn %l2,%i5,%l1 ! hx &= ~0x80000000
+ fmovs %f20,%f10
+ fmovs %f23,%f13
+ ba,pt %icc,.loop1
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+
+ .align 16
+.range2:
+ cmp %l2,%o4
+ bl,pt %icc,1f ! hx < 0x3e400000
+! delay slot, harmless if branch taken
+ sethi %hi(0x7ff00000),%o7
+ cmp %l2,%o7
+ bl,a,pt %icc,2f ! branch if finite
+! delay slot, squashed if branch not taken
+ st %o4,[%fp+biguns] ! set biguns
+ fzero %f20
+ fmuld %f22,%f20,%f22
+ st %f22,[%o2]
+ st %f23,[%o2+4]
+ st %f22,[%l3]
+ ba,pt %icc,2f
+! delay slot
+ st %f23,[%l3+4]
+1:
+ fdtoi %f22,%f24 ! raise inexact if not zero
+ st %f20,[%o2]
+ st %f23,[%o2+4]
+ sethi %hi(0x3ff00000),%g5
+ st %g5,[%l3]
+ st %g0,[%l3+4]
+2:
+ addcc %i0,-1,%i0
+ ble,pn %icc,.last2_from_range2
+! delay slot, harmless if branch taken
+ add %i3,%i4,%i3 ! s += strides
+ add %l3,%l7,%l3 ! c += stridec
+ ld [%i1],%l2
+ ld [%i1],%f20
+ ld [%i1+4],%f23
+ andn %l2,%i5,%l2 ! hx &= ~0x80000000
+ ba,pt %icc,.loop2
+! delay slot
+ add %i1,%i2,%i1 ! x += stridex
+
+ SET_SIZE(__vsincos)
+
diff --git a/usr/src/libm/src/mvec/vis/__vsincosf.S b/usr/src/libm/src/mvec/vis/__vsincosf.S
new file mode 100644
index 0000000..c071d91
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vsincosf.S
@@ -0,0 +1,905 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vsincosf.S 1.8 06/01/23 SMI"
+
+ .file "__vsincosf.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 64
+constants:
+ .word 0xbfc55554,0x60000000
+ .word 0x3f811077,0xe0000000
+ .word 0xbf29956b,0x60000000
+ .word 0x3ff00000,0x00000000
+ .word 0xbfe00000,0x00000000
+ .word 0x3fa55554,0xa0000000
+ .word 0xbf56c0c1,0xe0000000
+ .word 0x3ef99e24,0xe0000000
+ .word 0x3fe45f30,0x6dc9c883
+ .word 0x43380000,0x00000000
+ .word 0x3ff921fb,0x54400000
+ .word 0x3dd0b461,0x1a626331
+ .word 0x3f490fdb,0
+ .word 0x49c90fdb,0
+ .word 0x7f800000,0
+ .word 0x80000000,0
+
+#define S0 0x0
+#define S1 0x08
+#define S2 0x10
+#define one 0x18
+#define mhalf 0x20
+#define C0 0x28
+#define C1 0x30
+#define C2 0x38
+#define invpio2 0x40
+#define round 0x48
+#define pio2_1 0x50
+#define pio2_t 0x58
+#define thresh1 0x60
+#define thresh2 0x68
+#define inf 0x70
+#define signbit 0x78
+
+! local storage indices
+
+#define xsave STACK_BIAS-0x8
+#define ssave STACK_BIAS-0x10
+#define csave STACK_BIAS-0x18
+#define nsave STACK_BIAS-0x1c
+#define sxsave STACK_BIAS-0x20
+#define sssave STACK_BIAS-0x24
+#define junk STACK_BIAS-0x28
+#define n3 STACK_BIAS-0x38
+#define n2 STACK_BIAS-0x40
+#define n1 STACK_BIAS-0x48
+#define n0 STACK_BIAS-0x50
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x50
+
+! register use
+
+! i0 n
+! i1 x
+! i2 stridex
+! i3 s
+! i4 strides
+! i5 biguns
+
+! l0 ps0
+! l1 ps1
+! l2 ps2
+! l3 ps3
+! l4 pc0
+! l5 pc1
+! l6 pc2
+! l7 pc3
+
+! the following are 64-bit registers in both V8+ and V9
+
+! g1
+! g5
+
+! o0 n0
+! o1 n1
+! o2 n2
+! o3 n3
+! o4 c
+! o5 stridec
+! o7
+
+! f0 x0
+! f2 x1
+! f4 x2
+! f6 x3
+! f8 thresh1 (pi/4)
+! f10 s0
+! f12 s1
+! f14 s2
+! f16 s3
+! f18 thresh2 (2^19 pi)
+! f20 c0
+! f22 c1
+! f24 c2
+! f26 c3
+! f28 signbit
+! f30
+! f32
+! f34
+! f36
+! f38 inf
+! f40 S0
+! f42 S1
+! f44 S2
+! f46 one
+! f48 mhalf
+! f50 C0
+! f52 C1
+! f54 C2
+! f56 invpio2
+! f58 round
+! f60 pio2_1
+! f62 pio2_t
+
+ ENTRY(__vsincosf)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,constants,o0)
+ mov %o0,%g1
+
+#ifdef __sparcv9
+ stx %i1,[%fp+xsave] ! save arguments
+ stx %i3,[%fp+ssave]
+ stx %i5,[%fp+csave]
+ ldx [%fp+STACK_BIAS+0xb0],%o5
+#else
+ st %i1,[%fp+xsave] ! save arguments
+ st %i3,[%fp+ssave]
+ st %i5,[%fp+csave]
+ ld [%fp+0x5c],%o5
+#endif
+ st %i0,[%fp+nsave]
+ st %i2,[%fp+sxsave]
+ st %i4,[%fp+sssave]
+ mov %i5,%o4
+ mov 0,%i5 ! biguns = 0
+ ldd [%g1+S0],%f40 ! load constants
+ ldd [%g1+S1],%f42
+ ldd [%g1+S2],%f44
+ ldd [%g1+one],%f46
+ ldd [%g1+mhalf],%f48
+ ldd [%g1+C0],%f50
+ ldd [%g1+C1],%f52
+ ldd [%g1+C2],%f54
+ ldd [%g1+invpio2],%f56
+ ldd [%g1+round],%f58
+ ldd [%g1+pio2_1],%f60
+ ldd [%g1+pio2_t],%f62
+ ldd [%g1+thresh1],%f8
+ ldd [%g1+thresh2],%f18
+ ldd [%g1+inf],%f38
+ ldd [%g1+signbit],%f28
+ sll %i2,2,%i2 ! scale strides
+ sll %i4,2,%i4
+ sll %o5,2,%o5
+ nop
+ fzero %f10 ! loop prologue
+ add %fp,junk,%l0
+ fzero %f20
+ add %fp,junk,%l4
+ fzero %f12
+ add %fp,junk,%l1
+ fzero %f22
+ add %fp,junk,%l5
+ fzero %f14
+ add %fp,junk,%l2
+ fzero %f24
+ add %fp,junk,%l6
+ fzero %f16
+ add %fp,junk,%l3
+ fzero %f26
+ ba .start
+ add %fp,junk,%l7
+
+! 16-byte aligned
+ .align 16
+.start:
+ ld [%i1],%f0 ! *x
+ add %i1,%i2,%i1 ! x += stridex
+ addcc %i0,-1,%i0
+ fdtos %f10,%f10
+
+ st %f10,[%l0]
+ mov %i3,%l0 ! ps0 = s
+ add %i3,%i4,%i3 ! s += strides
+ fdtos %f20,%f20
+
+ st %f20,[%l4]
+ mov %o4,%l4 ! pc0 = c
+ ble,pn %icc,.last1
+! delay slot
+ add %o4,%o5,%o4 ! c += stridec
+
+ ld [%i1],%f2 ! *x
+ add %i1,%i2,%i1 ! x += stridex
+ addcc %i0,-1,%i0
+ fdtos %f12,%f12
+
+ st %f12,[%l1]
+ mov %i3,%l1 ! ps1 = s
+ add %i3,%i4,%i3 ! s += strides
+ fdtos %f22,%f22
+
+ st %f22,[%l5]
+ mov %o4,%l5 ! pc1 = c
+ ble,pn %icc,.last2
+! delay slot
+ add %o4,%o5,%o4 ! c += stridec
+
+ ld [%i1],%f4 ! *x
+ add %i1,%i2,%i1 ! x += stridex
+ addcc %i0,-1,%i0
+ fdtos %f14,%f14
+
+ st %f14,[%l2]
+ mov %i3,%l2 ! ps2 = s
+ add %i3,%i4,%i3 ! s += strides
+ fdtos %f24,%f24
+
+ st %f24,[%l6]
+ mov %o4,%l6 ! pc2 = c
+ ble,pn %icc,.last3
+! delay slot
+ add %o4,%o5,%o4 ! c += stridec
+
+ ld [%i1],%f6 ! *x
+ add %i1,%i2,%i1 ! x += stridex
+ nop
+ fdtos %f16,%f16
+
+ st %f16,[%l3]
+ mov %i3,%l3 ! ps3 = s
+ add %i3,%i4,%i3 ! s += strides
+ fdtos %f26,%f26
+
+ st %f26,[%l7]
+ mov %o4,%l7 ! pc3 = c
+ add %o4,%o5,%o4 ! c += stridec
+.cont:
+ fabsd %f0,%f30
+
+ fabsd %f2,%f32
+
+ fabsd %f4,%f34
+
+ fabsd %f6,%f36
+ fcmple32 %f30,%f18,%o0
+
+ fcmple32 %f32,%f18,%o1
+
+ fcmple32 %f34,%f18,%o2
+
+ fcmple32 %f36,%f18,%o3
+ nop
+
+! 16-byte aligned
+ andcc %o0,2,%g0
+ bz,pn %icc,.range0 ! branch if > 2^19 pi
+! delay slot
+ fcmple32 %f30,%f8,%o0
+
+.check1:
+ andcc %o1,2,%g0
+ bz,pn %icc,.range1 ! branch if > 2^19 pi
+! delay slot
+ fcmple32 %f32,%f8,%o1
+
+.check2:
+ andcc %o2,2,%g0
+ bz,pn %icc,.range2 ! branch if > 2^19 pi
+! delay slot
+ fcmple32 %f34,%f8,%o2
+
+.check3:
+ andcc %o3,2,%g0
+ bz,pn %icc,.range3 ! branch if > 2^19 pi
+! delay slot
+ fcmple32 %f36,%f8,%o3
+
+.checkprimary:
+ fsmuld %f0,%f0,%f30
+ fstod %f0,%f0
+
+ fsmuld %f2,%f2,%f32
+ fstod %f2,%f2
+ and %o0,%o1,%o7
+
+ fsmuld %f4,%f4,%f34
+ fstod %f4,%f4
+ and %o2,%o7,%o7
+
+ fsmuld %f6,%f6,%f36
+ fstod %f6,%f6
+ and %o3,%o7,%o7
+
+ fmuld %f30,%f54,%f20
+ andcc %o7,2,%g0
+ bz,pn %icc,.medium ! branch if any argument is > pi/4
+! delay slot
+ nop
+
+ fmuld %f32,%f54,%f22
+
+ fmuld %f34,%f54,%f24
+
+ fmuld %f36,%f54,%f26
+
+ faddd %f20,%f52,%f20
+ fmuld %f30,%f44,%f10
+
+ faddd %f22,%f52,%f22
+ fmuld %f32,%f44,%f12
+
+ faddd %f24,%f52,%f24
+ fmuld %f34,%f44,%f14
+
+ faddd %f26,%f52,%f26
+ fmuld %f36,%f44,%f16
+
+ fmuld %f30,%f20,%f20
+ faddd %f10,%f42,%f10
+
+ fmuld %f32,%f22,%f22
+ faddd %f12,%f42,%f12
+
+ fmuld %f34,%f24,%f24
+ faddd %f14,%f42,%f14
+
+ fmuld %f36,%f26,%f26
+ faddd %f16,%f42,%f16
+
+ faddd %f20,%f50,%f20
+ fmuld %f30,%f10,%f10
+
+ faddd %f22,%f50,%f22
+ fmuld %f32,%f12,%f12
+
+ faddd %f24,%f50,%f24
+ fmuld %f34,%f14,%f14
+
+ faddd %f26,%f50,%f26
+ fmuld %f36,%f16,%f16
+
+ fmuld %f30,%f20,%f20
+ faddd %f10,%f40,%f10
+
+ fmuld %f32,%f22,%f22
+ faddd %f12,%f40,%f12
+
+ fmuld %f34,%f24,%f24
+ faddd %f14,%f40,%f14
+
+ fmuld %f36,%f26,%f26
+ faddd %f16,%f40,%f16
+
+ faddd %f20,%f48,%f20
+ fmuld %f30,%f10,%f10
+
+ faddd %f22,%f48,%f22
+ fmuld %f32,%f12,%f12
+
+ faddd %f24,%f48,%f24
+ fmuld %f34,%f14,%f14
+
+ faddd %f26,%f48,%f26
+ fmuld %f36,%f16,%f16
+
+ fmuld %f30,%f20,%f20
+ faddd %f10,%f46,%f10
+
+ fmuld %f32,%f22,%f22
+ faddd %f12,%f46,%f12
+
+ fmuld %f34,%f24,%f24
+ faddd %f14,%f46,%f14
+
+ fmuld %f36,%f26,%f26
+ faddd %f16,%f46,%f16
+
+ faddd %f20,%f46,%f20
+ fmuld %f0,%f10,%f10
+
+ faddd %f22,%f46,%f22
+ fmuld %f2,%f12,%f12
+
+ faddd %f24,%f46,%f24
+ fmuld %f4,%f14,%f14
+ addcc %i0,-1,%i0
+
+ faddd %f26,%f46,%f26
+ bg,pt %icc,.start
+! delay slot
+ fmuld %f6,%f16,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+
+ .align 16
+.medium:
+ fmuld %f0,%f56,%f10
+
+ fmuld %f2,%f56,%f12
+
+ fmuld %f4,%f56,%f14
+
+ fmuld %f6,%f56,%f16
+
+ faddd %f10,%f58,%f10
+ st %f11,[%fp+n0]
+
+ faddd %f12,%f58,%f12
+ st %f13,[%fp+n1]
+
+ faddd %f14,%f58,%f14
+ st %f15,[%fp+n2]
+
+ faddd %f16,%f58,%f16
+ st %f17,[%fp+n3]
+
+ fsubd %f10,%f58,%f10
+
+ fsubd %f12,%f58,%f12
+
+ fsubd %f14,%f58,%f14
+
+ fsubd %f16,%f58,%f16
+
+ fmuld %f10,%f60,%f20
+ ld [%fp+n0],%o0
+
+ fmuld %f12,%f60,%f22
+ ld [%fp+n1],%o1
+
+ fmuld %f14,%f60,%f24
+ ld [%fp+n2],%o2
+
+ fmuld %f16,%f60,%f26
+ ld [%fp+n3],%o3
+
+ fsubd %f0,%f20,%f0
+ fmuld %f10,%f62,%f30
+ and %o0,1,%o0
+ mov %l0,%g1
+
+ fsubd %f2,%f22,%f2
+ fmuld %f12,%f62,%f32
+ and %o1,1,%o1
+ movrnz %o0,%l4,%l0 ! if (n & 1) exchange ps and pc
+
+ fsubd %f4,%f24,%f4
+ fmuld %f14,%f62,%f34
+ and %o2,1,%o2
+ movrnz %o0,%g1,%l4
+
+ fsubd %f6,%f26,%f6
+ fmuld %f16,%f62,%f36
+ and %o3,1,%o3
+ mov %l1,%g1
+
+ fsubd %f0,%f30,%f0
+ movrnz %o1,%l5,%l1
+
+ fsubd %f2,%f32,%f2
+ movrnz %o1,%g1,%l5
+
+ fsubd %f4,%f34,%f4
+ mov %l2,%g1
+
+ fsubd %f6,%f36,%f6
+ movrnz %o2,%l6,%l2
+
+ fmuld %f0,%f0,%f30
+ fnegd %f0,%f10
+ movrnz %o2,%g1,%l6
+
+ fmuld %f2,%f2,%f32
+ fnegd %f2,%f12
+ mov %l3,%g1
+
+ fmuld %f4,%f4,%f34
+ fnegd %f4,%f14
+ movrnz %o3,%l7,%l3
+
+ fmuld %f6,%f6,%f36
+ fnegd %f6,%f16
+ movrnz %o3,%g1,%l7
+
+ fmuld %f30,%f54,%f20
+ fmovrdnz %o0,%f10,%f0 ! if (n & 1) x = -x
+
+ fmuld %f32,%f54,%f22
+ fmovrdnz %o1,%f12,%f2
+
+ fmuld %f34,%f54,%f24
+ fmovrdnz %o2,%f14,%f4
+
+ fmuld %f36,%f54,%f26
+ fmovrdnz %o3,%f16,%f6
+
+ faddd %f20,%f52,%f20
+ fmuld %f30,%f44,%f10
+ ld [%fp+n0],%o0
+
+ faddd %f22,%f52,%f22
+ fmuld %f32,%f44,%f12
+ and %o0,2,%o0
+
+ faddd %f24,%f52,%f24
+ fmuld %f34,%f44,%f14
+ sllx %o0,62,%g1
+ stx %g1,[%fp+n0]
+
+ faddd %f26,%f52,%f26
+ fmuld %f36,%f44,%f16
+ ld [%fp+n1],%o1
+
+ fmuld %f30,%f20,%f20
+ faddd %f10,%f42,%f10
+ and %o1,2,%o1
+
+ fmuld %f32,%f22,%f22
+ faddd %f12,%f42,%f12
+ sllx %o1,62,%g1
+ stx %g1,[%fp+n1]
+
+ fmuld %f34,%f24,%f24
+ faddd %f14,%f42,%f14
+ ld [%fp+n2],%o2
+
+ fmuld %f36,%f26,%f26
+ faddd %f16,%f42,%f16
+ and %o2,2,%o2
+
+ faddd %f20,%f50,%f20
+ fmuld %f30,%f10,%f10
+ sllx %o2,62,%g1
+ stx %g1,[%fp+n2]
+
+ faddd %f22,%f50,%f22
+ fmuld %f32,%f12,%f12
+ ld [%fp+n3],%o3
+
+ faddd %f24,%f50,%f24
+ fmuld %f34,%f14,%f14
+ and %o3,2,%o3
+
+ faddd %f26,%f50,%f26
+ fmuld %f36,%f16,%f16
+ sllx %o3,62,%g1
+ stx %g1,[%fp+n3]
+
+ fmuld %f30,%f20,%f20
+ faddd %f10,%f40,%f10
+
+ fmuld %f32,%f22,%f22
+ faddd %f12,%f40,%f12
+
+ fmuld %f34,%f24,%f24
+ faddd %f14,%f40,%f14
+
+ fmuld %f36,%f26,%f26
+ faddd %f16,%f40,%f16
+
+ faddd %f20,%f48,%f20
+ fmuld %f30,%f10,%f10
+
+ faddd %f22,%f48,%f22
+ fmuld %f32,%f12,%f12
+
+ faddd %f24,%f48,%f24
+ fmuld %f34,%f14,%f14
+
+ faddd %f26,%f48,%f26
+ fmuld %f36,%f16,%f16
+
+ fmuld %f30,%f20,%f20
+ faddd %f10,%f46,%f10
+
+ fmuld %f32,%f22,%f22
+ faddd %f12,%f46,%f12
+
+ fmuld %f34,%f24,%f24
+ faddd %f14,%f46,%f14
+
+ fmuld %f36,%f26,%f26
+ faddd %f16,%f46,%f16
+
+ faddd %f20,%f46,%f20
+ fmuld %f0,%f10,%f10
+ ldd [%fp+n0],%f30
+
+ faddd %f22,%f46,%f22
+ fmuld %f2,%f12,%f12
+ ldd [%fp+n1],%f32
+
+ faddd %f24,%f46,%f24
+ fmuld %f4,%f14,%f14
+ ldd [%fp+n2],%f34
+
+ faddd %f26,%f46,%f26
+ fmuld %f6,%f16,%f16
+ ldd [%fp+n3],%f36
+
+ fxor %f10,%f30,%f10 ! if (n & 2) negate s, c
+
+ fxor %f12,%f32,%f12
+
+ fxor %f14,%f34,%f14
+
+ fxor %f16,%f36,%f16
+
+ fxor %f20,%f30,%f20
+
+ fxor %f22,%f32,%f22
+
+ fxor %f24,%f34,%f24
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f26,%f36,%f26
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+
+ .align 32
+.end:
+ fdtos %f10,%f10
+ st %f10,[%l0]
+ fdtos %f20,%f20
+ st %f20,[%l4]
+ fdtos %f12,%f12
+ st %f12,[%l1]
+ fdtos %f22,%f22
+ st %f22,[%l5]
+ fdtos %f14,%f14
+ st %f14,[%l2]
+ fdtos %f24,%f24
+ st %f24,[%l6]
+ fdtos %f16,%f16
+ st %f16,[%l3]
+ fdtos %f26,%f26
+ tst %i5 ! check for huge arguments remaining
+ be,pt %icc,.exit
+! delay slot
+ st %f26,[%l7]
+#ifdef __sparcv9
+ ldx [%fp+xsave],%o1
+ ldx [%fp+ssave],%o3
+ ldx [%fp+csave],%o5
+ ldx [%fp+STACK_BIAS+0xb0],%i5
+ stx %i5,[%sp+STACK_BIAS+0xb0]
+#else
+ ld [%fp+xsave],%o1
+ ld [%fp+ssave],%o3
+ ld [%fp+csave],%o5
+ ld [%fp+0x5c],%i5
+ st %i5,[%sp+0x5c]
+#endif
+ ld [%fp+nsave],%o0
+ ld [%fp+sxsave],%o2
+ ld [%fp+sssave],%o4
+ sra %o2,0,%o2 ! sign-extend for V9
+ call __vlibm_vsincos_bigf
+ sra %o4,0,%o4 ! delay slot
+
+.exit:
+ ret
+ restore
+
+
+ .align 32
+.last1:
+ fdtos %f12,%f12
+ st %f12,[%l1]
+ nop
+ fdtos %f22,%f22
+ st %f22,[%l5]
+ fzeros %f2
+ add %fp,junk,%l5
+ add %fp,junk,%l1
+.last2:
+ fdtos %f14,%f14
+ st %f14,[%l2]
+ nop
+ fdtos %f24,%f24
+ st %f24,[%l6]
+ fzeros %f4
+ add %fp,junk,%l2
+ add %fp,junk,%l6
+.last3:
+ fdtos %f16,%f16
+ st %f16,[%l3]
+ fdtos %f26,%f26
+ st %f26,[%l7]
+ fzeros %f6
+ add %fp,junk,%l3
+ ba,pt %icc,.cont
+! delay slot
+ add %fp,junk,%l7
+
+
+ .align 16
+.range0:
+ fcmpgt32 %f38,%f30,%o0
+ andcc %o0,2,%g0
+ bnz,a,pt %icc,1f ! branch if finite
+! delay slot, squashed if branch not taken
+ mov 1,%i5 ! set biguns
+ fzeros %f1
+ fmuls %f0,%f1,%f0
+ st %f0,[%l0]
+ st %f0,[%l4]
+1:
+ addcc %i0,-1,%i0
+ ble,pn %icc,1f
+! delay slot
+ nop
+ ld [%i1],%f0
+ add %i1,%i2,%i1
+ mov %i3,%l0
+ add %i3,%i4,%i3
+ fabsd %f0,%f30
+ mov %o4,%l4
+ add %o4,%o5,%o4
+ fcmple32 %f30,%f18,%o0
+ andcc %o0,2,%g0
+ bz,pn %icc,.range0
+! delay slot
+ nop
+ ba,pt %icc,.check1
+! delay slot
+ fcmple32 %f30,%f8,%o0
+1:
+ fzero %f0 ! set up dummy argument
+ add %fp,junk,%l0
+ add %fp,junk,%l4
+ mov 2,%o0
+ ba,pt %icc,.check1
+! delay slot
+ fzero %f30
+
+
+ .align 16
+.range1:
+ fcmpgt32 %f38,%f32,%o1
+ andcc %o1,2,%g0
+ bnz,a,pt %icc,1f ! branch if finite
+! delay slot, squashed if branch not taken
+ mov 1,%i5 ! set biguns
+ fzeros %f3
+ fmuls %f2,%f3,%f2
+ st %f2,[%l1]
+ st %f2,[%l5]
+1:
+ addcc %i0,-1,%i0
+ ble,pn %icc,1f
+! delay slot
+ nop
+ ld [%i1],%f2
+ add %i1,%i2,%i1
+ mov %i3,%l1
+ add %i3,%i4,%i3
+ fabsd %f2,%f32
+ mov %o4,%l5
+ add %o4,%o5,%o4
+ fcmple32 %f32,%f18,%o1
+ andcc %o1,2,%g0
+ bz,pn %icc,.range1
+! delay slot
+ nop
+ ba,pt %icc,.check2
+! delay slot
+ fcmple32 %f32,%f8,%o1
+1:
+ fzero %f2 ! set up dummy argument
+ add %fp,junk,%l1
+ add %fp,junk,%l5
+ mov 2,%o1
+ ba,pt %icc,.check2
+! delay slot
+ fzero %f32
+
+
+ .align 16
+.range2:
+ fcmpgt32 %f38,%f34,%o2
+ andcc %o2,2,%g0
+ bnz,a,pt %icc,1f ! branch if finite
+! delay slot, squashed if branch not taken
+ mov 1,%i5 ! set biguns
+ fzeros %f5
+ fmuls %f4,%f5,%f4
+ st %f4,[%l2]
+ st %f4,[%l6]
+1:
+ addcc %i0,-1,%i0
+ ble,pn %icc,1f
+! delay slot
+ nop
+ ld [%i1],%f4
+ add %i1,%i2,%i1
+ mov %i3,%l2
+ add %i3,%i4,%i3
+ fabsd %f4,%f34
+ mov %o4,%l6
+ add %o4,%o5,%o4
+ fcmple32 %f34,%f18,%o2
+ andcc %o2,2,%g0
+ bz,pn %icc,.range2
+! delay slot
+ nop
+ ba,pt %icc,.check3
+! delay slot
+ fcmple32 %f34,%f8,%o2
+1:
+ fzero %f4 ! set up dummy argument
+ add %fp,junk,%l2
+ add %fp,junk,%l6
+ mov 2,%o2
+ ba,pt %icc,.check3
+! delay slot
+ fzero %f34
+
+
+ .align 16
+.range3:
+ fcmpgt32 %f38,%f36,%o3
+ andcc %o3,2,%g0
+ bnz,a,pt %icc,1f ! branch if finite
+! delay slot, squashed if branch not taken
+ mov 1,%i5 ! set biguns
+ fzeros %f7
+ fmuls %f6,%f7,%f6
+ st %f6,[%l3]
+ st %f6,[%l7]
+1:
+ addcc %i0,-1,%i0
+ ble,pn %icc,1f
+! delay slot
+ nop
+ ld [%i1],%f6
+ add %i1,%i2,%i1
+ mov %i3,%l3
+ add %i3,%i4,%i3
+ fabsd %f6,%f36
+ mov %o4,%l7
+ add %o4,%o5,%o4
+ fcmple32 %f36,%f18,%o3
+ andcc %o3,2,%g0
+ bz,pn %icc,.range3
+! delay slot
+ nop
+ ba,pt %icc,.checkprimary
+! delay slot
+ fcmple32 %f36,%f8,%o3
+1:
+ fzero %f6 ! set up dummy argument
+ add %fp,junk,%l3
+ add %fp,junk,%l7
+ mov 2,%o3
+ ba,pt %icc,.checkprimary
+! delay slot
+ fzero %f36
+
+ SET_SIZE(__vsincosf)
+
diff --git a/usr/src/libm/src/mvec/vis/__vsinf.S b/usr/src/libm/src/mvec/vis/__vsinf.S
new file mode 100644
index 0000000..2e570b7
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vsinf.S
@@ -0,0 +1,2093 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vsinf.S 1.9 06/01/23 SMI"
+
+ .file "__vsinf.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 64
+constants:
+ .word 0xbfc55554,0x60000000
+ .word 0x3f811077,0xe0000000
+ .word 0xbf29956b,0x60000000
+ .word 0x3ff00000,0x00000000
+ .word 0xbfe00000,0x00000000
+ .word 0x3fa55554,0xa0000000
+ .word 0xbf56c0c1,0xe0000000
+ .word 0x3ef99e24,0xe0000000
+ .word 0x3fe45f30,0x6dc9c883
+ .word 0x43380000,0x00000000
+ .word 0x3ff921fb,0x54400000
+ .word 0x3dd0b461,0x1a626331
+ .word 0x3f490fdb,0
+ .word 0x49c90fdb,0
+ .word 0x7f800000,0
+ .word 0x80000000,0
+
+#define S0 0x0
+#define S1 0x08
+#define S2 0x10
+#define one 0x18
+#define mhalf 0x20
+#define C0 0x28
+#define C1 0x30
+#define C2 0x38
+#define invpio2 0x40
+#define round 0x48
+#define pio2_1 0x50
+#define pio2_t 0x58
+#define thresh1 0x60
+#define thresh2 0x68
+#define inf 0x70
+#define signbit 0x78
+
+! local storage indices
+
+#define xsave STACK_BIAS-0x8
+#define ysave STACK_BIAS-0x10
+#define nsave STACK_BIAS-0x14
+#define sxsave STACK_BIAS-0x18
+#define sysave STACK_BIAS-0x1c
+#define junk STACK_BIAS-0x20
+#define n3 STACK_BIAS-0x24
+#define n2 STACK_BIAS-0x28
+#define n1 STACK_BIAS-0x2c
+#define n0 STACK_BIAS-0x30
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x30
+
+! register use
+
+! i0 n
+! i1 x
+! i2 stridex
+! i3 y
+! i4 stridey
+! i5 biguns
+
+! l0 n0
+! l1 n1
+! l2 n2
+! l3 n3
+! l4
+! l5
+! l6
+! l7
+
+! the following are 64-bit registers in both V8+ and V9
+
+! g1
+! g5
+
+! o0 py0
+! o1 py1
+! o2 py2
+! o3 py3
+! o4
+! o5
+! o7
+
+! f0 x0
+! f2 x1
+! f4 x2
+! f6 x3
+! f8 thresh1 (pi/4)
+! f10 y0
+! f12 y1
+! f14 y2
+! f16 y3
+! f18 thresh2 (2^19 pi)
+! f20
+! f22
+! f24
+! f26
+! f28 signbit
+! f30
+! f32
+! f34
+! f36
+! f38 inf
+! f40 S0
+! f42 S1
+! f44 S2
+! f46 one
+! f48 mhalf
+! f50 C0
+! f52 C1
+! f54 C2
+! f56 invpio2
+! f58 round
+! f60 pio2_1
+! f62 pio2_t
+
+ ENTRY(__vsinf)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,constants,l1)
+ mov %l1,%g1
+ wr %g0,0x82,%asi ! set %asi for non-faulting loads
+#ifdef __sparcv9
+ stx %i1,[%fp+xsave] ! save arguments
+ stx %i3,[%fp+ysave]
+#else
+ st %i1,[%fp+xsave] ! save arguments
+ st %i3,[%fp+ysave]
+#endif
+ st %i0,[%fp+nsave]
+ st %i2,[%fp+sxsave]
+ st %i4,[%fp+sysave]
+ mov 0,%i5 ! biguns = 0
+ ldd [%g1+S0],%f40 ! load constants
+ ldd [%g1+S1],%f42
+ ldd [%g1+S2],%f44
+ ldd [%g1+one],%f46
+ ldd [%g1+mhalf],%f48
+ ldd [%g1+C0],%f50
+ ldd [%g1+C1],%f52
+ ldd [%g1+C2],%f54
+ ldd [%g1+invpio2],%f56
+ ldd [%g1+round],%f58
+ ldd [%g1+pio2_1],%f60
+ ldd [%g1+pio2_t],%f62
+ ldd [%g1+thresh1],%f8
+ ldd [%g1+thresh2],%f18
+ ldd [%g1+inf],%f38
+ ldd [%g1+signbit],%f28
+ sll %i2,2,%i2 ! scale strides
+ sll %i4,2,%i4
+ fzero %f10 ! loop prologue
+ add %fp,junk,%o0
+ fzero %f12
+ add %fp,junk,%o1
+ fzero %f14
+ add %fp,junk,%o2
+ fzero %f16
+ ba .start
+ add %fp,junk,%o3
+
+! 16-byte aligned
+ .align 16
+.start:
+ ld [%i1],%f0 ! *x
+ add %i1,%i2,%i1 ! x += stridex
+ addcc %i0,-1,%i0
+ fdtos %f10,%f10
+
+ st %f10,[%o0]
+ mov %i3,%o0 ! py0 = y
+ ble,pn %icc,.last1
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+
+ ld [%i1],%f2 ! *x
+ add %i1,%i2,%i1 ! x += stridex
+ addcc %i0,-1,%i0
+ fdtos %f12,%f12
+
+ st %f12,[%o1]
+ mov %i3,%o1 ! py1 = y
+ ble,pn %icc,.last2
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+
+ ld [%i1],%f4 ! *x
+ add %i1,%i2,%i1 ! x += stridex
+ addcc %i0,-1,%i0
+ fdtos %f14,%f14
+
+ st %f14,[%o2]
+ mov %i3,%o2 ! py2 = y
+ ble,pn %icc,.last3
+! delay slot
+ add %i3,%i4,%i3 ! y += stridey
+
+ ld [%i1],%f6 ! *x
+ add %i1,%i2,%i1 ! x += stridex
+ nop
+ fdtos %f16,%f16
+
+ st %f16,[%o3]
+ mov %i3,%o3 ! py3 = y
+ add %i3,%i4,%i3 ! y += stridey
+.cont:
+ fabsd %f0,%f30
+
+ fabsd %f2,%f32
+
+ fabsd %f4,%f34
+
+ fabsd %f6,%f36
+ fcmple32 %f30,%f18,%l0
+
+ fcmple32 %f32,%f18,%l1
+
+ fcmple32 %f34,%f18,%l2
+
+ fcmple32 %f36,%f18,%l3
+ nop
+
+! 16-byte aligned
+ andcc %l0,2,%g0
+ bz,pn %icc,.range0 ! branch if > 2^19 pi
+! delay slot
+ fcmple32 %f30,%f8,%l0
+
+.check1:
+ andcc %l1,2,%g0
+ bz,pn %icc,.range1 ! branch if > 2^19 pi
+! delay slot
+ fcmple32 %f32,%f8,%l1
+
+.check2:
+ andcc %l2,2,%g0
+ bz,pn %icc,.range2 ! branch if > 2^19 pi
+! delay slot
+ fcmple32 %f34,%f8,%l2
+
+.check3:
+ andcc %l3,2,%g0
+ bz,pn %icc,.range3 ! branch if > 2^19 pi
+! delay slot
+ fcmple32 %f36,%f8,%l3
+
+.checkprimary:
+ fsmuld %f0,%f0,%f30
+ fstod %f0,%f0
+
+ fsmuld %f2,%f2,%f32
+ fstod %f2,%f2
+ and %l0,%l1,%o4
+
+ fsmuld %f4,%f4,%f34
+ fstod %f4,%f4
+
+ fsmuld %f6,%f6,%f36
+ fstod %f6,%f6
+ and %l2,%l3,%o5
+
+ fmuld %f30,%f44,%f10
+ and %o4,%o5,%o5
+
+ fmuld %f32,%f44,%f12
+ andcc %o5,2,%g0
+ bz,pn %icc,.medium ! branch if any argument is > pi/4
+! delay slot
+ nop
+
+ fmuld %f34,%f44,%f14
+
+ fmuld %f36,%f44,%f16
+
+ fmuld %f30,%f40,%f20
+ faddd %f10,%f42,%f10
+
+ fmuld %f32,%f40,%f22
+ faddd %f12,%f42,%f12
+
+ fmuld %f34,%f40,%f24
+ faddd %f14,%f42,%f14
+
+ fmuld %f36,%f40,%f26
+ faddd %f16,%f42,%f16
+
+ fmuld %f30,%f30,%f30
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f32,%f32
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f34,%f34
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f36,%f36
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f10,%f10
+
+ fmuld %f32,%f12,%f12
+
+ fmuld %f34,%f14,%f14
+
+ fmuld %f36,%f16,%f16
+
+ faddd %f10,%f20,%f10
+
+ faddd %f12,%f22,%f12
+
+ faddd %f14,%f24,%f14
+
+ faddd %f16,%f26,%f16
+
+ fmuld %f0,%f10,%f10
+
+ fmuld %f2,%f12,%f12
+
+ fmuld %f4,%f14,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fmuld %f6,%f16,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+
+ .align 16
+.medium:
+ fmuld %f0,%f56,%f10
+
+ fmuld %f2,%f56,%f12
+
+ fmuld %f4,%f56,%f14
+
+ fmuld %f6,%f56,%f16
+
+ faddd %f10,%f58,%f10
+ st %f11,[%fp+n0]
+
+ faddd %f12,%f58,%f12
+ st %f13,[%fp+n1]
+
+ faddd %f14,%f58,%f14
+ st %f15,[%fp+n2]
+
+ faddd %f16,%f58,%f16
+ st %f17,[%fp+n3]
+
+ fsubd %f10,%f58,%f10
+
+ fsubd %f12,%f58,%f12
+
+ fsubd %f14,%f58,%f14
+
+ fsubd %f16,%f58,%f16
+
+ fmuld %f10,%f60,%f20
+ ld [%fp+n0],%l0
+
+ fmuld %f12,%f60,%f22
+ ld [%fp+n1],%l1
+
+ fmuld %f14,%f60,%f24
+ ld [%fp+n2],%l2
+
+ fmuld %f16,%f60,%f26
+ ld [%fp+n3],%l3
+
+ fsubd %f0,%f20,%f0
+ fmuld %f10,%f62,%f30
+
+ fsubd %f2,%f22,%f2
+ fmuld %f12,%f62,%f32
+
+ fsubd %f4,%f24,%f4
+ fmuld %f14,%f62,%f34
+
+ fsubd %f6,%f26,%f6
+ fmuld %f16,%f62,%f36
+
+ fsubd %f0,%f30,%f0
+
+ fsubd %f2,%f32,%f2
+
+ fsubd %f4,%f34,%f4
+
+ fsubd %f6,%f36,%f6
+ andcc %l0,1,%g0
+
+ fmuld %f0,%f0,%f30
+ bz,pn %icc,.case8
+! delay slot
+ andcc %l1,1,%g0
+
+ fmuld %f2,%f2,%f32
+ bz,pn %icc,.case4
+! delay slot
+ andcc %l2,1,%g0
+
+ fmuld %f4,%f4,%f34
+ bz,pn %icc,.case2
+! delay slot
+ andcc %l3,1,%g0
+
+ fmuld %f6,%f6,%f36
+ bz,pn %icc,.case1
+! delay slot
+ nop
+
+!.case0:
+ fmuld %f30,%f54,%f10 ! cos(x0)
+ fzero %f0
+
+ fmuld %f32,%f54,%f12 ! cos(x1)
+ fzero %f2
+
+ fmuld %f34,%f54,%f14 ! cos(x2)
+ fzero %f4
+
+ fmuld %f36,%f54,%f16 ! cos(x3)
+ fzero %f6
+
+ fmuld %f30,%f48,%f20
+ faddd %f10,%f52,%f10
+
+ fmuld %f32,%f48,%f22
+ faddd %f12,%f52,%f12
+
+ fmuld %f34,%f48,%f24
+ faddd %f14,%f52,%f14
+
+ fmuld %f36,%f48,%f26
+ faddd %f16,%f52,%f16
+
+ fmuld %f30,%f10,%f10
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f12,%f12
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f14,%f14
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f16,%f16
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f30,%f30
+ faddd %f10,%f50,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f32,%f32
+ faddd %f12,%f50,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f34,%f34
+ faddd %f14,%f50,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f36,%f36
+ faddd %f16,%f50,%f16
+ and %l3,2,%o5
+
+ fmuld %f30,%f10,%f10
+ fmovrdnz %g1,%f28,%f0
+
+ fmuld %f32,%f12,%f12
+ fmovrdnz %g5,%f28,%f2
+
+ fmuld %f34,%f14,%f14
+ fmovrdnz %o4,%f28,%f4
+
+ fmuld %f36,%f16,%f16
+ fmovrdnz %o5,%f28,%f6
+
+ faddd %f10,%f20,%f10
+
+ faddd %f12,%f22,%f12
+
+ faddd %f14,%f24,%f14
+
+ faddd %f16,%f26,%f16
+
+ fxor %f10,%f0,%f10
+
+ fxor %f12,%f2,%f12
+
+ fxor %f14,%f4,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f6,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case1:
+ fmuld %f30,%f54,%f10 ! cos(x0)
+ fzero %f0
+
+ fmuld %f32,%f54,%f12 ! cos(x1)
+ fzero %f2
+
+ fmuld %f34,%f54,%f14 ! cos(x2)
+ fzero %f4
+
+ fmuld %f36,%f44,%f16 ! sin(x3)
+
+ fmuld %f30,%f48,%f20
+ faddd %f10,%f52,%f10
+
+ fmuld %f32,%f48,%f22
+ faddd %f12,%f52,%f12
+
+ fmuld %f34,%f48,%f24
+ faddd %f14,%f52,%f14
+
+ fmuld %f36,%f40,%f26
+ faddd %f16,%f42,%f16
+
+ fmuld %f30,%f10,%f10
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f12,%f12
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f14,%f14
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f36,%f36
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f30,%f30
+ faddd %f10,%f50,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f32,%f32
+ faddd %f12,%f50,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f34,%f34
+ faddd %f14,%f50,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f16,%f16
+ fzero %f36
+
+ fmuld %f30,%f10,%f10
+ fmovrdnz %g1,%f28,%f0
+
+ fmuld %f32,%f12,%f12
+ fmovrdnz %g5,%f28,%f2
+
+ fmuld %f34,%f14,%f14
+ fmovrdnz %o4,%f28,%f4
+
+ faddd %f16,%f26,%f16
+ and %l3,2,%o5
+
+ faddd %f10,%f20,%f10
+
+ faddd %f12,%f22,%f12
+
+ faddd %f14,%f24,%f14
+
+ fmuld %f6,%f16,%f16
+ fmovrdnz %o5,%f28,%f36
+
+ fxor %f10,%f0,%f10
+
+ fxor %f12,%f2,%f12
+
+ fxor %f14,%f4,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f36,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case2:
+ fmuld %f6,%f6,%f36
+ bz,pn %icc,.case3
+! delay slot
+ nop
+
+ fmuld %f30,%f54,%f10 ! cos(x0)
+ fzero %f0
+
+ fmuld %f32,%f54,%f12 ! cos(x1)
+ fzero %f2
+
+ fmuld %f34,%f44,%f14 ! sin(x2)
+
+ fmuld %f36,%f54,%f16 ! cos(x3)
+ fzero %f6
+
+ fmuld %f30,%f48,%f20
+ faddd %f10,%f52,%f10
+
+ fmuld %f32,%f48,%f22
+ faddd %f12,%f52,%f12
+
+ fmuld %f34,%f40,%f24
+ faddd %f14,%f42,%f14
+
+ fmuld %f36,%f48,%f26
+ faddd %f16,%f52,%f16
+
+ fmuld %f30,%f10,%f10
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f12,%f12
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f34,%f34
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f16,%f16
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f30,%f30
+ faddd %f10,%f50,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f32,%f32
+ faddd %f12,%f50,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f14,%f14
+ fzero %f34
+
+ fmuld %f36,%f36,%f36
+ faddd %f16,%f50,%f16
+ and %l3,2,%o5
+
+ fmuld %f30,%f10,%f10
+ fmovrdnz %g1,%f28,%f0
+
+ fmuld %f32,%f12,%f12
+ fmovrdnz %g5,%f28,%f2
+
+ faddd %f14,%f24,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f16,%f16
+ fmovrdnz %o5,%f28,%f6
+
+ faddd %f10,%f20,%f10
+
+ faddd %f12,%f22,%f12
+
+ fmuld %f4,%f14,%f14
+ fmovrdnz %o4,%f28,%f34
+
+ faddd %f16,%f26,%f16
+
+ fxor %f10,%f0,%f10
+
+ fxor %f12,%f2,%f12
+
+ fxor %f14,%f34,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f6,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case3:
+ fmuld %f30,%f54,%f10 ! cos(x0)
+ fzero %f0
+
+ fmuld %f32,%f54,%f12 ! cos(x1)
+ fzero %f2
+
+ fmuld %f34,%f44,%f14 ! sin(x2)
+
+ fmuld %f36,%f44,%f16 ! sin(x3)
+
+ fmuld %f30,%f48,%f20
+ faddd %f10,%f52,%f10
+
+ fmuld %f32,%f48,%f22
+ faddd %f12,%f52,%f12
+
+ fmuld %f34,%f40,%f24
+ faddd %f14,%f42,%f14
+
+ fmuld %f36,%f40,%f26
+ faddd %f16,%f42,%f16
+
+ fmuld %f30,%f10,%f10
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f12,%f12
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f34,%f34
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f36,%f36
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f30,%f30
+ faddd %f10,%f50,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f32,%f32
+ faddd %f12,%f50,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f14,%f14
+ fzero %f34
+
+ fmuld %f36,%f16,%f16
+ fzero %f36
+
+ fmuld %f30,%f10,%f10
+ fmovrdnz %g1,%f28,%f0
+
+ fmuld %f32,%f12,%f12
+ fmovrdnz %g5,%f28,%f2
+
+ faddd %f14,%f24,%f14
+ and %l2,2,%o4
+
+ faddd %f16,%f26,%f16
+ and %l3,2,%o5
+
+ faddd %f10,%f20,%f10
+
+ faddd %f12,%f22,%f12
+
+ fmuld %f4,%f14,%f14
+ fmovrdnz %o4,%f28,%f34
+
+ fmuld %f6,%f16,%f16
+ fmovrdnz %o5,%f28,%f36
+
+ fxor %f10,%f0,%f10
+
+ fxor %f12,%f2,%f12
+
+ fxor %f14,%f34,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f36,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case4:
+ fmuld %f4,%f4,%f34
+ bz,pn %icc,.case6
+! delay slot
+ andcc %l3,1,%g0
+
+ fmuld %f6,%f6,%f36
+ bz,pn %icc,.case5
+! delay slot
+ nop
+
+ fmuld %f30,%f54,%f10 ! cos(x0)
+ fzero %f0
+
+ fmuld %f32,%f44,%f12 ! sin(x1)
+
+ fmuld %f34,%f54,%f14 ! cos(x2)
+ fzero %f4
+
+ fmuld %f36,%f54,%f16 ! cos(x3)
+ fzero %f6
+
+ fmuld %f30,%f48,%f20
+ faddd %f10,%f52,%f10
+
+ fmuld %f32,%f40,%f22
+ faddd %f12,%f42,%f12
+
+ fmuld %f34,%f48,%f24
+ faddd %f14,%f52,%f14
+
+ fmuld %f36,%f48,%f26
+ faddd %f16,%f52,%f16
+
+ fmuld %f30,%f10,%f10
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f32,%f32
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f14,%f14
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f16,%f16
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f30,%f30
+ faddd %f10,%f50,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f12,%f12
+ fzero %f32
+
+ fmuld %f34,%f34,%f34
+ faddd %f14,%f50,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f36,%f36
+ faddd %f16,%f50,%f16
+ and %l3,2,%o5
+
+ fmuld %f30,%f10,%f10
+ fmovrdnz %g1,%f28,%f0
+
+ faddd %f12,%f22,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f14,%f14
+ fmovrdnz %o4,%f28,%f4
+
+ fmuld %f36,%f16,%f16
+ fmovrdnz %o5,%f28,%f6
+
+ faddd %f10,%f20,%f10
+
+ fmuld %f2,%f12,%f12
+ fmovrdnz %g5,%f28,%f32
+
+ faddd %f14,%f24,%f14
+
+ faddd %f16,%f26,%f16
+
+ fxor %f10,%f0,%f10
+
+ fxor %f12,%f32,%f12
+
+ fxor %f14,%f4,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f6,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case5:
+ fmuld %f30,%f54,%f10 ! cos(x0)
+ fzero %f0
+
+ fmuld %f32,%f44,%f12 ! sin(x1)
+
+ fmuld %f34,%f54,%f14 ! cos(x2)
+ fzero %f4
+
+ fmuld %f36,%f44,%f16 ! sin(x3)
+
+ fmuld %f30,%f48,%f20
+ faddd %f10,%f52,%f10
+
+ fmuld %f32,%f40,%f22
+ faddd %f12,%f42,%f12
+
+ fmuld %f34,%f48,%f24
+ faddd %f14,%f52,%f14
+
+ fmuld %f36,%f40,%f26
+ faddd %f16,%f42,%f16
+
+ fmuld %f30,%f10,%f10
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f32,%f32
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f14,%f14
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f36,%f36
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f30,%f30
+ faddd %f10,%f50,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f12,%f12
+ fzero %f32
+
+ fmuld %f34,%f34,%f34
+ faddd %f14,%f50,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f16,%f16
+ fzero %f36
+
+ fmuld %f30,%f10,%f10
+ fmovrdnz %g1,%f28,%f0
+
+ faddd %f12,%f22,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f14,%f14
+ fmovrdnz %o4,%f28,%f4
+
+ faddd %f16,%f26,%f16
+ and %l3,2,%o5
+
+ faddd %f10,%f20,%f10
+
+ fmuld %f2,%f12,%f12
+ fmovrdnz %g5,%f28,%f32
+
+ faddd %f14,%f24,%f14
+
+ fmuld %f6,%f16,%f16
+ fmovrdnz %o5,%f28,%f36
+
+ fxor %f10,%f0,%f10
+
+ fxor %f12,%f32,%f12
+
+ fxor %f14,%f4,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f36,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case6:
+ fmuld %f6,%f6,%f36
+ bz,pn %icc,.case7
+! delay slot
+ nop
+
+ fmuld %f30,%f54,%f10 ! cos(x0)
+ fzero %f0
+
+ fmuld %f32,%f44,%f12 ! sin(x1)
+
+ fmuld %f34,%f44,%f14 ! sin(x2)
+
+ fmuld %f36,%f54,%f16 ! cos(x3)
+ fzero %f6
+
+ fmuld %f30,%f48,%f20
+ faddd %f10,%f52,%f10
+
+ fmuld %f32,%f40,%f22
+ faddd %f12,%f42,%f12
+
+ fmuld %f34,%f40,%f24
+ faddd %f14,%f42,%f14
+
+ fmuld %f36,%f48,%f26
+ faddd %f16,%f52,%f16
+
+ fmuld %f30,%f10,%f10
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f32,%f32
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f34,%f34
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f16,%f16
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f30,%f30
+ faddd %f10,%f50,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f12,%f12
+ fzero %f32
+
+ fmuld %f34,%f14,%f14
+ fzero %f34
+
+ fmuld %f36,%f36,%f36
+ faddd %f16,%f50,%f16
+ and %l3,2,%o5
+
+ fmuld %f30,%f10,%f10
+ fmovrdnz %g1,%f28,%f0
+
+ faddd %f12,%f22,%f12
+ and %l1,2,%g5
+
+ faddd %f14,%f24,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f16,%f16
+ fmovrdnz %o5,%f28,%f6
+
+ faddd %f10,%f20,%f10
+
+ fmuld %f2,%f12,%f12
+ fmovrdnz %g5,%f28,%f32
+
+ fmuld %f4,%f14,%f14
+ fmovrdnz %o4,%f28,%f34
+
+ faddd %f16,%f26,%f16
+
+ fxor %f10,%f0,%f10
+
+ fxor %f12,%f32,%f12
+
+ fxor %f14,%f34,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f6,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case7:
+ fmuld %f30,%f54,%f10 ! cos(x0)
+ fzero %f0
+
+ fmuld %f32,%f44,%f12 ! sin(x1)
+
+ fmuld %f34,%f44,%f14 ! sin(x2)
+
+ fmuld %f36,%f44,%f16 ! sin(x3)
+
+ fmuld %f30,%f48,%f20
+ faddd %f10,%f52,%f10
+
+ fmuld %f32,%f40,%f22
+ faddd %f12,%f42,%f12
+
+ fmuld %f34,%f40,%f24
+ faddd %f14,%f42,%f14
+
+ fmuld %f36,%f40,%f26
+ faddd %f16,%f42,%f16
+
+ fmuld %f30,%f10,%f10
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f32,%f32
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f34,%f34
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f36,%f36
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f30,%f30
+ faddd %f10,%f50,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f12,%f12
+ fzero %f32
+
+ fmuld %f34,%f14,%f14
+ fzero %f34
+
+ fmuld %f36,%f16,%f16
+ fzero %f36
+
+ fmuld %f30,%f10,%f10
+ fmovrdnz %g1,%f28,%f0
+
+ faddd %f12,%f22,%f12
+ and %l1,2,%g5
+
+ faddd %f14,%f24,%f14
+ and %l2,2,%o4
+
+ faddd %f16,%f26,%f16
+ and %l3,2,%o5
+
+ faddd %f10,%f20,%f10
+
+ fmuld %f2,%f12,%f12
+ fmovrdnz %g5,%f28,%f32
+
+ fmuld %f4,%f14,%f14
+ fmovrdnz %o4,%f28,%f34
+
+ fmuld %f6,%f16,%f16
+ fmovrdnz %o5,%f28,%f36
+
+ fxor %f10,%f0,%f10
+
+ fxor %f12,%f32,%f12
+
+ fxor %f14,%f34,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f36,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+
+ .align 16
+.case8:
+ fmuld %f2,%f2,%f32
+ bz,pn %icc,.case12
+! delay slot
+ andcc %l2,1,%g0
+
+ fmuld %f4,%f4,%f34
+ bz,pn %icc,.case10
+! delay slot
+ andcc %l3,1,%g0
+
+ fmuld %f6,%f6,%f36
+ bz,pn %icc,.case9
+! delay slot
+ nop
+
+ fmuld %f30,%f44,%f10 ! sin(x0)
+
+ fmuld %f32,%f54,%f12 ! cos(x1)
+ fzero %f2
+
+ fmuld %f34,%f54,%f14 ! cos(x2)
+ fzero %f4
+
+ fmuld %f36,%f54,%f16 ! cos(x3)
+ fzero %f6
+
+ fmuld %f30,%f40,%f20
+ faddd %f10,%f42,%f10
+
+ fmuld %f32,%f48,%f22
+ faddd %f12,%f52,%f12
+
+ fmuld %f34,%f48,%f24
+ faddd %f14,%f52,%f14
+
+ fmuld %f36,%f48,%f26
+ faddd %f16,%f52,%f16
+
+ fmuld %f30,%f30,%f30
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f12,%f12
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f14,%f14
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f16,%f16
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f10,%f10
+ fzero %f30
+
+ fmuld %f32,%f32,%f32
+ faddd %f12,%f50,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f34,%f34
+ faddd %f14,%f50,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f36,%f36
+ faddd %f16,%f50,%f16
+ and %l3,2,%o5
+
+ faddd %f10,%f20,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f12,%f12
+ fmovrdnz %g5,%f28,%f2
+
+ fmuld %f34,%f14,%f14
+ fmovrdnz %o4,%f28,%f4
+
+ fmuld %f36,%f16,%f16
+ fmovrdnz %o5,%f28,%f6
+
+ fmuld %f0,%f10,%f10
+ fmovrdnz %g1,%f28,%f30
+
+ faddd %f12,%f22,%f12
+
+ faddd %f14,%f24,%f14
+
+ faddd %f16,%f26,%f16
+
+ fxor %f10,%f30,%f10
+
+ fxor %f12,%f2,%f12
+
+ fxor %f14,%f4,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f6,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case9:
+ fmuld %f30,%f44,%f10 ! sin(x0)
+
+ fmuld %f32,%f54,%f12 ! cos(x1)
+ fzero %f2
+
+ fmuld %f34,%f54,%f14 ! cos(x2)
+ fzero %f4
+
+ fmuld %f36,%f44,%f16 ! sin(x3)
+
+ fmuld %f30,%f40,%f20
+ faddd %f10,%f42,%f10
+
+ fmuld %f32,%f48,%f22
+ faddd %f12,%f52,%f12
+
+ fmuld %f34,%f48,%f24
+ faddd %f14,%f52,%f14
+
+ fmuld %f36,%f40,%f26
+ faddd %f16,%f42,%f16
+
+ fmuld %f30,%f30,%f30
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f12,%f12
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f14,%f14
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f36,%f36
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f10,%f10
+ fzero %f30
+
+ fmuld %f32,%f32,%f32
+ faddd %f12,%f50,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f34,%f34
+ faddd %f14,%f50,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f16,%f16
+ fzero %f36
+
+ faddd %f10,%f20,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f12,%f12
+ fmovrdnz %g5,%f28,%f2
+
+ fmuld %f34,%f14,%f14
+ fmovrdnz %o4,%f28,%f4
+
+ faddd %f16,%f26,%f16
+ and %l3,2,%o5
+
+ fmuld %f0,%f10,%f10
+ fmovrdnz %g1,%f28,%f30
+
+ faddd %f12,%f22,%f12
+
+ faddd %f14,%f24,%f14
+
+ fmuld %f6,%f16,%f16
+ fmovrdnz %o5,%f28,%f36
+
+ fxor %f10,%f30,%f10
+
+ fxor %f12,%f2,%f12
+
+ fxor %f14,%f4,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f36,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case10:
+ fmuld %f6,%f6,%f36
+ bz,pn %icc,.case11
+! delay slot
+ nop
+
+ fmuld %f30,%f44,%f10 ! sin(x0)
+
+ fmuld %f32,%f54,%f12 ! cos(x1)
+ fzero %f2
+
+ fmuld %f34,%f44,%f14 ! sin(x2)
+
+ fmuld %f36,%f54,%f16 ! cos(x3)
+ fzero %f6
+
+ fmuld %f30,%f40,%f20
+ faddd %f10,%f42,%f10
+
+ fmuld %f32,%f48,%f22
+ faddd %f12,%f52,%f12
+
+ fmuld %f34,%f40,%f24
+ faddd %f14,%f42,%f14
+
+ fmuld %f36,%f48,%f26
+ faddd %f16,%f52,%f16
+
+ fmuld %f30,%f30,%f30
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f12,%f12
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f34,%f34
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f16,%f16
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f10,%f10
+ fzero %f30
+
+ fmuld %f32,%f32,%f32
+ faddd %f12,%f50,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f14,%f14
+ fzero %f34
+
+ fmuld %f36,%f36,%f36
+ faddd %f16,%f50,%f16
+ and %l3,2,%o5
+
+ faddd %f10,%f20,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f12,%f12
+ fmovrdnz %g5,%f28,%f2
+
+ faddd %f14,%f24,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f16,%f16
+ fmovrdnz %o5,%f28,%f6
+
+ fmuld %f0,%f10,%f10
+ fmovrdnz %g1,%f28,%f30
+
+ faddd %f12,%f22,%f12
+
+ fmuld %f4,%f14,%f14
+ fmovrdnz %o4,%f28,%f34
+
+ faddd %f16,%f26,%f16
+
+ fxor %f10,%f30,%f10
+
+ fxor %f12,%f2,%f12
+
+ fxor %f14,%f34,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f6,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case11:
+ fmuld %f30,%f44,%f10 ! sin(x0)
+
+ fmuld %f32,%f54,%f12 ! cos(x1)
+ fzero %f2
+
+ fmuld %f34,%f44,%f14 ! sin(x2)
+
+ fmuld %f36,%f44,%f16 ! sin(x3)
+
+ fmuld %f30,%f40,%f20
+ faddd %f10,%f42,%f10
+
+ fmuld %f32,%f48,%f22
+ faddd %f12,%f52,%f12
+
+ fmuld %f34,%f40,%f24
+ faddd %f14,%f42,%f14
+
+ fmuld %f36,%f40,%f26
+ faddd %f16,%f42,%f16
+
+ fmuld %f30,%f30,%f30
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f12,%f12
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f34,%f34
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f36,%f36
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f10,%f10
+ fzero %f30
+
+ fmuld %f32,%f32,%f32
+ faddd %f12,%f50,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f14,%f14
+ fzero %f34
+
+ fmuld %f36,%f16,%f16
+ fzero %f36
+
+ faddd %f10,%f20,%f10
+ and %l0,2,%g1
+
+ fmuld %f32,%f12,%f12
+ fmovrdnz %g5,%f28,%f2
+
+ faddd %f14,%f24,%f14
+ and %l2,2,%o4
+
+ faddd %f16,%f26,%f16
+ and %l3,2,%o5
+
+ fmuld %f0,%f10,%f10
+ fmovrdnz %g1,%f28,%f30
+
+ faddd %f12,%f22,%f12
+
+ fmuld %f4,%f14,%f14
+ fmovrdnz %o4,%f28,%f34
+
+ fmuld %f6,%f16,%f16
+ fmovrdnz %o5,%f28,%f36
+
+ fxor %f10,%f30,%f10
+
+ fxor %f12,%f2,%f12
+
+ fxor %f14,%f34,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f36,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case12:
+ fmuld %f4,%f4,%f34
+ bz,pn %icc,.case14
+! delay slot
+ andcc %l3,1,%g0
+
+ fmuld %f6,%f6,%f36
+ bz,pn %icc,.case13
+! delay slot
+ nop
+
+ fmuld %f30,%f44,%f10 ! sin(x0)
+
+ fmuld %f32,%f44,%f12 ! sin(x1)
+
+ fmuld %f34,%f54,%f14 ! cos(x2)
+ fzero %f4
+
+ fmuld %f36,%f54,%f16 ! cos(x3)
+ fzero %f6
+
+ fmuld %f30,%f40,%f20
+ faddd %f10,%f42,%f10
+
+ fmuld %f32,%f40,%f22
+ faddd %f12,%f42,%f12
+
+ fmuld %f34,%f48,%f24
+ faddd %f14,%f52,%f14
+
+ fmuld %f36,%f48,%f26
+ faddd %f16,%f52,%f16
+
+ fmuld %f30,%f30,%f30
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f32,%f32
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f14,%f14
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f16,%f16
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f10,%f10
+ fzero %f30
+
+ fmuld %f32,%f12,%f12
+ fzero %f32
+
+ fmuld %f34,%f34,%f34
+ faddd %f14,%f50,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f36,%f36
+ faddd %f16,%f50,%f16
+ and %l3,2,%o5
+
+ faddd %f10,%f20,%f10
+ and %l0,2,%g1
+
+ faddd %f12,%f22,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f14,%f14
+ fmovrdnz %o4,%f28,%f4
+
+ fmuld %f36,%f16,%f16
+ fmovrdnz %o5,%f28,%f6
+
+ fmuld %f0,%f10,%f10
+ fmovrdnz %g1,%f28,%f30
+
+ fmuld %f2,%f12,%f12
+ fmovrdnz %g5,%f28,%f32
+
+ faddd %f14,%f24,%f14
+
+ faddd %f16,%f26,%f16
+
+ fxor %f10,%f30,%f10
+
+ fxor %f12,%f32,%f12
+
+ fxor %f14,%f4,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f6,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case13:
+ fmuld %f30,%f44,%f10 ! sin(x0)
+
+ fmuld %f32,%f44,%f12 ! sin(x1)
+
+ fmuld %f34,%f54,%f14 ! cos(x2)
+ fzero %f4
+
+ fmuld %f36,%f44,%f16 ! sin(x3)
+
+ fmuld %f30,%f40,%f20
+ faddd %f10,%f42,%f10
+
+ fmuld %f32,%f40,%f22
+ faddd %f12,%f42,%f12
+
+ fmuld %f34,%f48,%f24
+ faddd %f14,%f52,%f14
+
+ fmuld %f36,%f40,%f26
+ faddd %f16,%f42,%f16
+
+ fmuld %f30,%f30,%f30
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f32,%f32
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f14,%f14
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f36,%f36
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f10,%f10
+ fzero %f30
+
+ fmuld %f32,%f12,%f12
+ fzero %f32
+
+ fmuld %f34,%f34,%f34
+ faddd %f14,%f50,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f16,%f16
+ fzero %f36
+
+ faddd %f10,%f20,%f10
+ and %l0,2,%g1
+
+ faddd %f12,%f22,%f12
+ and %l1,2,%g5
+
+ fmuld %f34,%f14,%f14
+ fmovrdnz %o4,%f28,%f4
+
+ faddd %f16,%f26,%f16
+ and %l3,2,%o5
+
+ fmuld %f0,%f10,%f10
+ fmovrdnz %g1,%f28,%f30
+
+ fmuld %f2,%f12,%f12
+ fmovrdnz %g5,%f28,%f32
+
+ faddd %f14,%f24,%f14
+
+ fmuld %f6,%f16,%f16
+ fmovrdnz %o5,%f28,%f36
+
+ fxor %f10,%f30,%f10
+
+ fxor %f12,%f32,%f12
+
+ fxor %f14,%f4,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f36,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case14:
+ fmuld %f6,%f6,%f36
+ bz,pn %icc,.case15
+! delay slot
+ nop
+
+ fmuld %f30,%f44,%f10 ! sin(x0)
+
+ fmuld %f32,%f44,%f12 ! sin(x1)
+
+ fmuld %f34,%f44,%f14 ! sin(x2)
+
+ fmuld %f36,%f54,%f16 ! cos(x3)
+ fzero %f6
+
+ fmuld %f30,%f40,%f20
+ faddd %f10,%f42,%f10
+
+ fmuld %f32,%f40,%f22
+ faddd %f12,%f42,%f12
+
+ fmuld %f34,%f40,%f24
+ faddd %f14,%f42,%f14
+
+ fmuld %f36,%f48,%f26
+ faddd %f16,%f52,%f16
+
+ fmuld %f30,%f30,%f30
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f32,%f32
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f34,%f34
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f16,%f16
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f10,%f10
+ fzero %f30
+
+ fmuld %f32,%f12,%f12
+ fzero %f32
+
+ fmuld %f34,%f14,%f14
+ fzero %f34
+
+ fmuld %f36,%f36,%f36
+ faddd %f16,%f50,%f16
+ and %l3,2,%o5
+
+ faddd %f10,%f20,%f10
+ and %l0,2,%g1
+
+ faddd %f12,%f22,%f12
+ and %l1,2,%g5
+
+ faddd %f14,%f24,%f14
+ and %l2,2,%o4
+
+ fmuld %f36,%f16,%f16
+ fmovrdnz %o5,%f28,%f6
+
+ fmuld %f0,%f10,%f10
+ fmovrdnz %g1,%f28,%f30
+
+ fmuld %f2,%f12,%f12
+ fmovrdnz %g5,%f28,%f32
+
+ fmuld %f4,%f14,%f14
+ fmovrdnz %o4,%f28,%f34
+
+ faddd %f16,%f26,%f16
+
+ fxor %f10,%f30,%f10
+
+ fxor %f12,%f32,%f12
+
+ fxor %f14,%f34,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f6,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+ .align 16
+.case15:
+ fmuld %f30,%f44,%f10 ! sin(x0)
+
+ fmuld %f32,%f44,%f12 ! sin(x1)
+
+ fmuld %f34,%f44,%f14 ! sin(x2)
+
+ fmuld %f36,%f44,%f16 ! sin(x3)
+
+ fmuld %f30,%f40,%f20
+ faddd %f10,%f42,%f10
+
+ fmuld %f32,%f40,%f22
+ faddd %f12,%f42,%f12
+
+ fmuld %f34,%f40,%f24
+ faddd %f14,%f42,%f14
+
+ fmuld %f36,%f40,%f26
+ faddd %f16,%f42,%f16
+
+ fmuld %f30,%f30,%f30
+ faddd %f20,%f46,%f20
+
+ fmuld %f32,%f32,%f32
+ faddd %f22,%f46,%f22
+
+ fmuld %f34,%f34,%f34
+ faddd %f24,%f46,%f24
+
+ fmuld %f36,%f36,%f36
+ faddd %f26,%f46,%f26
+
+ fmuld %f30,%f10,%f10
+ fzero %f30
+
+ fmuld %f32,%f12,%f12
+ fzero %f32
+
+ fmuld %f34,%f14,%f14
+ fzero %f34
+
+ fmuld %f36,%f16,%f16
+ fzero %f36
+
+ faddd %f10,%f20,%f10
+ and %l0,2,%g1
+
+ faddd %f12,%f22,%f12
+ and %l1,2,%g5
+
+ faddd %f14,%f24,%f14
+ and %l2,2,%o4
+
+ faddd %f16,%f26,%f16
+ and %l3,2,%o5
+
+ fmuld %f0,%f10,%f10
+ fmovrdnz %g1,%f28,%f30
+
+ fmuld %f2,%f12,%f12
+ fmovrdnz %g5,%f28,%f32
+
+ fmuld %f4,%f14,%f14
+ fmovrdnz %o4,%f28,%f34
+
+ fmuld %f6,%f16,%f16
+ fmovrdnz %o5,%f28,%f36
+
+ fxor %f10,%f30,%f10
+
+ fxor %f12,%f32,%f12
+
+ fxor %f14,%f34,%f14
+
+ addcc %i0,-1,%i0
+ bg,pt %icc,.start
+! delay slot
+ fxor %f16,%f36,%f16
+
+ ba,pt %icc,.end
+! delay slot
+ nop
+
+
+ .align 32
+.end:
+ fdtos %f10,%f10
+ st %f10,[%o0]
+ fdtos %f12,%f12
+ st %f12,[%o1]
+ fdtos %f14,%f14
+ st %f14,[%o2]
+ fdtos %f16,%f16
+ tst %i5 ! check for huge arguments remaining
+ be,pt %icc,.exit
+! delay slot
+ st %f16,[%o3]
+#ifdef __sparcv9
+ ldx [%fp+xsave],%o1
+ ldx [%fp+ysave],%o3
+#else
+ ld [%fp+xsave],%o1
+ ld [%fp+ysave],%o3
+#endif
+ ld [%fp+nsave],%o0
+ ld [%fp+sxsave],%o2
+ ld [%fp+sysave],%o4
+ sra %o2,0,%o2 ! sign-extend for V9
+ call __vlibm_vsin_bigf
+ sra %o4,0,%o4 ! delay slot
+
+.exit:
+ ret
+ restore
+
+
+ .align 32
+.last1:
+ fdtos %f12,%f12
+ st %f12,[%o1]
+ fzeros %f2
+ add %fp,junk,%o1
+.last2:
+ fdtos %f14,%f14
+ st %f14,[%o2]
+ fzeros %f4
+ add %fp,junk,%o2
+.last3:
+ fdtos %f16,%f16
+ st %f16,[%o3]
+ fzeros %f6
+ ba,pt %icc,.cont
+! delay slot
+ add %fp,junk,%o3
+
+
+ .align 16
+.range0:
+ fcmpgt32 %f38,%f30,%l0
+ andcc %l0,2,%g0
+ bnz,a,pt %icc,1f ! branch if finite
+! delay slot, squashed if branch not taken
+ mov 1,%i5 ! set biguns
+ fzeros %f1
+ fmuls %f0,%f1,%f0
+ st %f0,[%o0]
+1:
+ addcc %i0,-1,%i0
+ ble,pn %icc,1f
+! delay slot
+ nop
+ ld [%i1],%f0
+ add %i1,%i2,%i1
+ mov %i3,%o0
+ add %i3,%i4,%i3
+ fabsd %f0,%f30
+ fcmple32 %f30,%f18,%l0
+ andcc %l0,2,%g0
+ bz,pn %icc,.range0
+! delay slot
+ nop
+ ba,pt %icc,.check1
+! delay slot
+ fcmple32 %f30,%f8,%l0
+1:
+ fzero %f0 ! set up dummy argument
+ add %fp,junk,%o0
+ mov 2,%l0
+ ba,pt %icc,.check1
+! delay slot
+ fzero %f30
+
+
+ .align 16
+.range1:
+ fcmpgt32 %f38,%f32,%l1
+ andcc %l1,2,%g0
+ bnz,a,pt %icc,1f ! branch if finite
+! delay slot, squashed if branch not taken
+ mov 1,%i5 ! set biguns
+ fzeros %f3
+ fmuls %f2,%f3,%f2
+ st %f2,[%o1]
+1:
+ addcc %i0,-1,%i0
+ ble,pn %icc,1f
+! delay slot
+ nop
+ ld [%i1],%f2
+ add %i1,%i2,%i1
+ mov %i3,%o1
+ add %i3,%i4,%i3
+ fabsd %f2,%f32
+ fcmple32 %f32,%f18,%l1
+ andcc %l1,2,%g0
+ bz,pn %icc,.range1
+! delay slot
+ nop
+ ba,pt %icc,.check2
+! delay slot
+ fcmple32 %f32,%f8,%l1
+1:
+ fzero %f2 ! set up dummy argument
+ add %fp,junk,%o1
+ mov 2,%l1
+ ba,pt %icc,.check2
+! delay slot
+ fzero %f32
+
+
+ .align 16
+.range2:
+ fcmpgt32 %f38,%f34,%l2
+ andcc %l2,2,%g0
+ bnz,a,pt %icc,1f ! branch if finite
+! delay slot, squashed if branch not taken
+ mov 1,%i5 ! set biguns
+ fzeros %f5
+ fmuls %f4,%f5,%f4
+ st %f4,[%o2]
+1:
+ addcc %i0,-1,%i0
+ ble,pn %icc,1f
+! delay slot
+ nop
+ ld [%i1],%f4
+ add %i1,%i2,%i1
+ mov %i3,%o2
+ add %i3,%i4,%i3
+ fabsd %f4,%f34
+ fcmple32 %f34,%f18,%l2
+ andcc %l2,2,%g0
+ bz,pn %icc,.range2
+! delay slot
+ nop
+ ba,pt %icc,.check3
+! delay slot
+ fcmple32 %f34,%f8,%l2
+1:
+ fzero %f4 ! set up dummy argument
+ add %fp,junk,%o2
+ mov 2,%l2
+ ba,pt %icc,.check3
+! delay slot
+ fzero %f34
+
+
+ .align 16
+.range3:
+ fcmpgt32 %f38,%f36,%l3
+ andcc %l3,2,%g0
+ bnz,a,pt %icc,1f ! branch if finite
+! delay slot, squashed if branch not taken
+ mov 1,%i5 ! set biguns
+ fzeros %f7
+ fmuls %f6,%f7,%f6
+ st %f6,[%o3]
+1:
+ addcc %i0,-1,%i0
+ ble,pn %icc,1f
+! delay slot
+ nop
+ ld [%i1],%f6
+ add %i1,%i2,%i1
+ mov %i3,%o3
+ add %i3,%i4,%i3
+ fabsd %f6,%f36
+ fcmple32 %f36,%f18,%l3
+ andcc %l3,2,%g0
+ bz,pn %icc,.range3
+! delay slot
+ nop
+ ba,pt %icc,.checkprimary
+! delay slot
+ fcmple32 %f36,%f8,%l3
+1:
+ fzero %f6 ! set up dummy argument
+ add %fp,junk,%o3
+ mov 2,%l3
+ ba,pt %icc,.checkprimary
+! delay slot
+ fzero %f36
+
+ SET_SIZE(__vsinf)
+
diff --git a/usr/src/libm/src/mvec/vis/__vsqrt.S b/usr/src/libm/src/mvec/vis/__vsqrt.S
new file mode 100644
index 0000000..2d536f7
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vsqrt.S
@@ -0,0 +1,1843 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vsqrt.S 1.5 06/01/23 SMI"
+
+ .file "__vsqrt.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 64
+
+.CONST_TBL:
+ .word 0x3fe00000, 0x00000000 ! A1 = 5.00000000000000001789e-01
+ .word 0xbfbfffff, 0xfffd0bfd ! A2 = -1.24999999997314110667e-01
+ .word 0x3fafffff, 0xfffb5bfb ! A3 = 6.24999999978896565817e-02
+ .word 0xbfa4000f, 0xc00b4fc8 ! A4 = -3.90629693917215481458e-02
+ .word 0x3f9c0018, 0xc012da4e ! A5 = 2.73441188080261677282e-02
+ .word 0x000fffff, 0xffffffff ! DC0 = 0x000fffffffffffff
+ .word 0x00001000, 0x00000000 ! DC2 = 0x0000100000000000
+ .word 0x7fffe000, 0x00000000 ! DC3 = 0x7fffe00000000000
+
+! i = [0,128]
+! TBL[8*i+0] = 1.0 / (*(double*)&(0x3fe0000000000000LL + (i << 45)));
+! TBL[8*i+1] = (double)(2.0 * sqrtl(*(double*)&(0x3fe0000000000000LL + (i << 45))));
+! TBL[8*i+2] = (double)(2.0 * sqrtl(*(double*)&(0x3fe0000000000000LL + (i << 45))) - TBL[8*i+1]);
+! TBL[8*i+3] = 0
+! TBL[8*i+4] = 1.0 / (*(double*)&(0x3fe0000000000000LL + (i << 45)));
+! TBL[8*i+5] = (double)(2.0 * sqrtl(2.0) * sqrtl(*(double*)&(0x3fe0000000000000LL + (i << 45))));
+! TBL[8*i+6] = (double)(2.0 * sqrtl(2.0) * sqrtl(*(double*)&(0x3fe0000000000000LL + (i << 45))) - TBL[8*i+5]);
+! TBL[8*i+7] = 0
+
+ .word 0x40000000, 0x00000000, 0x3ff6a09e, 0x667f3bcd
+ .word 0xbc9bdd34, 0x13b26456, 0x00000000, 0x00000000
+ .word 0x40000000, 0x00000000, 0x40000000, 0x00000000
+ .word 0xb8f00000, 0x00000000, 0x00000000, 0x00000000
+ .word 0x3fffc07f, 0x01fc07f0, 0x3ff6b733, 0xbfd8c648
+ .word 0x3c53b629, 0x05629048, 0x00000000, 0x00000000
+ .word 0x3fffc07f, 0x01fc07f0, 0x40000ff8, 0x07f60deb
+ .word 0x3c90655c, 0x648a53f1, 0x00000000, 0x00000000
+ .word 0x3fff81f8, 0x1f81f820, 0x3ff6cdb2, 0xbbb212eb
+ .word 0x3c960332, 0xcdbaba2d, 0x00000000, 0x00000000
+ .word 0x3fff81f8, 0x1f81f820, 0x40001fe0, 0x3f61bad0
+ .word 0x3ca2c41a, 0x15cbfaf2, 0x00000000, 0x00000000
+ .word 0x3fff4465, 0x9e4a4271, 0x3ff6e41b, 0x9bfb3b75
+ .word 0xbc925d8c, 0xfd6d5c87, 0x00000000, 0x00000000
+ .word 0x3fff4465, 0x9e4a4271, 0x40002fb8, 0xd4e30f48
+ .word 0xbca64203, 0xab1ba910, 0x00000000, 0x00000000
+ .word 0x3fff07c1, 0xf07c1f08, 0x3ff6fa6e, 0xa162d0f0
+ .word 0x3c691a24, 0x3d6297e9, 0x00000000, 0x00000000
+ .word 0x3fff07c1, 0xf07c1f08, 0x40003f81, 0xf636b80c
+ .word 0xbca0efc8, 0xba812a8c, 0x00000000, 0x00000000
+ .word 0x3ffecc07, 0xb301ecc0, 0x3ff710ac, 0x0b5e5e32
+ .word 0xbc991218, 0xb8d2850d, 0x00000000, 0x00000000
+ .word 0x3ffecc07, 0xb301ecc0, 0x40004f3b, 0xd03c0a64
+ .word 0x3c9ee2cf, 0x2d8ae22b, 0x00000000, 0x00000000
+ .word 0x3ffe9131, 0xabf0b767, 0x3ff726d4, 0x1832a0be
+ .word 0xbc2d9b1a, 0xa8ecb058, 0x00000000, 0x00000000
+ .word 0x3ffe9131, 0xabf0b767, 0x40005ee6, 0x8efad48b
+ .word 0xbc9c35f4, 0x8f4b89f7, 0x00000000, 0x00000000
+ .word 0x3ffe573a, 0xc901e574, 0x3ff73ce7, 0x04fb7b23
+ .word 0x3c91470b, 0x816b17a6, 0x00000000, 0x00000000
+ .word 0x3ffe573a, 0xc901e574, 0x40006e82, 0x5da8fc2b
+ .word 0x3c9a315a, 0x8bd8a03b, 0x00000000, 0x00000000
+ .word 0x3ffe1e1e, 0x1e1e1e1e, 0x3ff752e5, 0x0db3a3a2
+ .word 0xbc939331, 0x3eea4381, 0x00000000, 0x00000000
+ .word 0x3ffe1e1e, 0x1e1e1e1e, 0x40007e0f, 0x66afed07
+ .word 0xbc74a6e1, 0xdcd59eaf, 0x00000000, 0x00000000
+ .word 0x3ffde5d6, 0xe3f8868a, 0x3ff768ce, 0x6d3c11e0
+ .word 0xbc9478b8, 0xab33074d, 0x00000000, 0x00000000
+ .word 0x3ffde5d6, 0xe3f8868a, 0x40008d8d, 0xd3b1d9aa
+ .word 0x3c81d533, 0x85fe2b96, 0x00000000, 0x00000000
+ .word 0x3ffdae60, 0x76b981db, 0x3ff77ea3, 0x5d632e43
+ .word 0x3c92f714, 0x9a22fa4f, 0x00000000, 0x00000000
+ .word 0x3ffdae60, 0x76b981db, 0x40009cfd, 0xcd8ed009
+ .word 0xbc4862a9, 0xbcf7f372, 0x00000000, 0x00000000
+ .word 0x3ffd77b6, 0x54b82c34, 0x3ff79464, 0x16ebc56c
+ .word 0x3c9a7cd5, 0x224c7375, 0x00000000, 0x00000000
+ .word 0x3ffd77b6, 0x54b82c34, 0x4000ac5f, 0x7c69a3c8
+ .word 0x3ca94dff, 0x7bfa2757, 0x00000000, 0x00000000
+ .word 0x3ffd41d4, 0x1d41d41d, 0x3ff7aa10, 0xd193c22d
+ .word 0xbc790ed9, 0x403afe85, 0x00000000, 0x00000000
+ .word 0x3ffd41d4, 0x1d41d41d, 0x4000bbb3, 0x07acafdb
+ .word 0xbc852a97, 0x686f9d2e, 0x00000000, 0x00000000
+ .word 0x3ffd0cb5, 0x8f6ec074, 0x3ff7bfa9, 0xc41ab040
+ .word 0x3c8d6bc3, 0x02ae758f, 0x00000000, 0x00000000
+ .word 0x3ffd0cb5, 0x8f6ec074, 0x4000caf8, 0x960e710d
+ .word 0x3c9caa6b, 0xe2366171, 0x00000000, 0x00000000
+ .word 0x3ffcd856, 0x89039b0b, 0x3ff7d52f, 0x244809e9
+ .word 0x3c9081f6, 0xf3b99d5f, 0x00000000, 0x00000000
+ .word 0x3ffcd856, 0x89039b0b, 0x4000da30, 0x4d95fb06
+ .word 0xbc9e1269, 0x76855586, 0x00000000, 0x00000000
+ .word 0x3ffca4b3, 0x055ee191, 0x3ff7eaa1, 0x26f15284
+ .word 0xbc846ce4, 0x68c1882b, 0x00000000, 0x00000000
+ .word 0x3ffca4b3, 0x055ee191, 0x4000e95a, 0x539f492c
+ .word 0xbc80c73f, 0xc38a2184, 0x00000000, 0x00000000
+ .word 0x3ffc71c7, 0x1c71c71c, 0x3ff80000, 0x00000000
+ .word 0x00000000, 0x00000000, 0x00000000, 0x00000000
+ .word 0x3ffc71c7, 0x1c71c71c, 0x4000f876, 0xccdf6cd9
+ .word 0x3cab1a18, 0xf13a34c0, 0x00000000, 0x00000000
+ .word 0x3ffc3f8f, 0x01c3f8f0, 0x3ff8154b, 0xe2773526
+ .word 0xbc857147, 0xe067d0ee, 0x00000000, 0x00000000
+ .word 0x3ffc3f8f, 0x01c3f8f0, 0x40010785, 0xdd689a29
+ .word 0xbcaaabbe, 0x9e4d810a, 0x00000000, 0x00000000
+ .word 0x3ffc0e07, 0x0381c0e0, 0x3ff82a85, 0x00794e6c
+ .word 0xbc82edaa, 0x75e6ac5f, 0x00000000, 0x00000000
+ .word 0x3ffc0e07, 0x0381c0e0, 0x40011687, 0xa8ae14a3
+ .word 0x3cac9b43, 0xbcf06106, 0x00000000, 0x00000000
+ .word 0x3ffbdd2b, 0x899406f7, 0x3ff83fab, 0x8b4d4315
+ .word 0x3c829e06, 0x2d3e134d, 0x00000000, 0x00000000
+ .word 0x3ffbdd2b, 0x899406f7, 0x4001257c, 0x5187fd09
+ .word 0xbca4a750, 0xa83950a4, 0x00000000, 0x00000000
+ .word 0x3ffbacf9, 0x14c1bad0, 0x3ff854bf, 0xb363dc39
+ .word 0x3c99399f, 0xca38787e, 0x00000000, 0x00000000
+ .word 0x3ffbacf9, 0x14c1bad0, 0x40013463, 0xfa37014e
+ .word 0x3c7b295b, 0xaa698cd3, 0x00000000, 0x00000000
+ .word 0x3ffb7d6c, 0x3dda338b, 0x3ff869c1, 0xa85cc346
+ .word 0x3c9fcc99, 0xde11b1d1, 0x00000000, 0x00000000
+ .word 0x3ffb7d6c, 0x3dda338b, 0x4001433e, 0xc467effb
+ .word 0x3c92c031, 0x3b7278c8, 0x00000000, 0x00000000
+ .word 0x3ffb4e81, 0xb4e81b4f, 0x3ff87eb1, 0x990b697a
+ .word 0x3c7c43e9, 0xf593ea0f, 0x00000000, 0x00000000
+ .word 0x3ffb4e81, 0xb4e81b4f, 0x4001520c, 0xd1372feb
+ .word 0xbcadec22, 0x5d8e66d2, 0x00000000, 0x00000000
+ .word 0x3ffb2036, 0x406c80d9, 0x3ff8938f, 0xb37bc9c1
+ .word 0xbc7c115f, 0x9f5c8d6f, 0x00000000, 0x00000000
+ .word 0x3ffb2036, 0x406c80d9, 0x400160ce, 0x41341d74
+ .word 0x3c967036, 0x863a1bb2, 0x00000000, 0x00000000
+ .word 0x3ffaf286, 0xbca1af28, 0x3ff8a85c, 0x24f70659
+ .word 0x3c9f6e07, 0x6b588a50, 0x00000000, 0x00000000
+ .word 0x3ffaf286, 0xbca1af28, 0x40016f83, 0x34644df9
+ .word 0xbcae8679, 0x80a1c48e, 0x00000000, 0x00000000
+ .word 0x3ffac570, 0x1ac5701b, 0x3ff8bd17, 0x1a07e38a
+ .word 0x3c9c20b5, 0xa697f23f, 0x00000000, 0x00000000
+ .word 0x3ffac570, 0x1ac5701b, 0x40017e2b, 0xca46bab9
+ .word 0x3ca1519b, 0x10d04d5f, 0x00000000, 0x00000000
+ .word 0x3ffa98ef, 0x606a63be, 0x3ff8d1c0, 0xbe7f20ac
+ .word 0xbc8bdb8a, 0x6df021f3, 0x00000000, 0x00000000
+ .word 0x3ffa98ef, 0x606a63be, 0x40018cc8, 0x21d6d3e3
+ .word 0xbca30af1, 0xd725cc5b, 0x00000000, 0x00000000
+ .word 0x3ffa6d01, 0xa6d01a6d, 0x3ff8e659, 0x3d77b0b8
+ .word 0xbc7d99d7, 0x64769954, 0x00000000, 0x00000000
+ .word 0x3ffa6d01, 0xa6d01a6d, 0x40019b58, 0x598f7c9f
+ .word 0xbc72e0d8, 0x51c0e011, 0x00000000, 0x00000000
+ .word 0x3ffa41a4, 0x1a41a41a, 0x3ff8fae0, 0xc15ad38a
+ .word 0xbc7db7ad, 0xb6817f6d, 0x00000000, 0x00000000
+ .word 0x3ffa41a4, 0x1a41a41a, 0x4001a9dc, 0x8f6df104
+ .word 0xbcafc519, 0xc18dc1d5, 0x00000000, 0x00000000
+ .word 0x3ffa16d3, 0xf97a4b02, 0x3ff90f57, 0x73e410e4
+ .word 0x3c6fb605, 0xcee75482, 0x00000000, 0x00000000
+ .word 0x3ffa16d3, 0xf97a4b02, 0x4001b854, 0xe0f496a0
+ .word 0x3ca27006, 0x899b7c3a, 0x00000000, 0x00000000
+ .word 0x3ff9ec8e, 0x951033d9, 0x3ff923bd, 0x7e25164d
+ .word 0xbc9278d1, 0x901d3b40, 0x00000000, 0x00000000
+ .word 0x3ff9ec8e, 0x951033d9, 0x4001c6c1, 0x6b2db870
+ .word 0x3c887e1d, 0x8335fb28, 0x00000000, 0x00000000
+ .word 0x3ff9c2d1, 0x4ee4a102, 0x3ff93813, 0x088978c5
+ .word 0xbc54312c, 0x627e5c52, 0x00000000, 0x00000000
+ .word 0x3ff9c2d1, 0x4ee4a102, 0x4001d522, 0x4aae2ee1
+ .word 0x3ca91222, 0xf6aebdc9, 0x00000000, 0x00000000
+ .word 0x3ff99999, 0x9999999a, 0x3ff94c58, 0x3ada5b53
+ .word 0xbc9b7ed7, 0x50df3cca, 0x00000000, 0x00000000
+ .word 0x3ff99999, 0x9999999a, 0x4001e377, 0x9b97f4a8
+ .word 0xbc9f5063, 0x19fcfd19, 0x00000000, 0x00000000
+ .word 0x3ff970e4, 0xf80cb872, 0x3ff9608d, 0x3c41fb4b
+ .word 0x3c73df32, 0xeaa86b83, 0x00000000, 0x00000000
+ .word 0x3ff970e4, 0xf80cb872, 0x4001f1c1, 0x799ca8ff
+ .word 0xbca28b52, 0xeb725e0a, 0x00000000, 0x00000000
+ .word 0x3ff948b0, 0xfcd6e9e0, 0x3ff974b2, 0x334f2346
+ .word 0x3c814e4a, 0xd3ae9e3f, 0x00000000, 0x00000000
+ .word 0x3ff948b0, 0xfcd6e9e0, 0x40020000, 0x00000000
+ .word 0xb9000000, 0x00000000, 0x00000000, 0x00000000
+ .word 0x3ff920fb, 0x49d0e229, 0x3ff988c7, 0x45f88592
+ .word 0x3c95af70, 0x1a56047b, 0x00000000, 0x00000000
+ .word 0x3ff920fb, 0x49d0e229, 0x40020e33, 0x499a21a9
+ .word 0xbc924ba2, 0x74fea9a1, 0x00000000, 0x00000000
+ .word 0x3ff8f9c1, 0x8f9c18fa, 0x3ff99ccc, 0x999fff00
+ .word 0x3c866234, 0x063b88ee, 0x00000000, 0x00000000
+ .word 0x3ff8f9c1, 0x8f9c18fa, 0x40021c5b, 0x70d9f824
+ .word 0xbca844f9, 0x9eee6fc3, 0x00000000, 0x00000000
+ .word 0x3ff8d301, 0x8d3018d3, 0x3ff9b0c2, 0x5315c2ce
+ .word 0xbc87f64a, 0x65cc6887, 0x00000000, 0x00000000
+ .word 0x3ff8d301, 0x8d3018d3, 0x40022a78, 0x8fc76de5
+ .word 0x3c931e32, 0xd4e07a48, 0x00000000, 0x00000000
+ .word 0x3ff8acb9, 0x0f6bf3aa, 0x3ff9c4a8, 0x969b7077
+ .word 0xbc96ca9e, 0x5cd4517a, 0x00000000, 0x00000000
+ .word 0x3ff8acb9, 0x0f6bf3aa, 0x4002388a, 0xc0059c28
+ .word 0xbc96072f, 0xbe0e5da3, 0x00000000, 0x00000000
+ .word 0x3ff886e5, 0xf0abb04a, 0x3ff9d87f, 0x87e71422
+ .word 0xbc85fdd8, 0xb11b7b1d, 0x00000000, 0x00000000
+ .word 0x3ff886e5, 0xf0abb04a, 0x40024692, 0x1ad4ea49
+ .word 0xbcaa6d9b, 0x268ef62d, 0x00000000, 0x00000000
+ .word 0x3ff86186, 0x18618618, 0x3ff9ec47, 0x4a261264
+ .word 0xbc8540c4, 0x89ba5074, 0x00000000, 0x00000000
+ .word 0x3ff86186, 0x18618618, 0x4002548e, 0xb9151e85
+ .word 0x3c999820, 0x0a774879, 0x00000000, 0x00000000
+ .word 0x3ff83c97, 0x7ab2bedd, 0x3ffa0000, 0x00000000
+ .word 0x00000000, 0x00000000, 0x00000000, 0x00000000
+ .word 0x3ff83c97, 0x7ab2bedd, 0x40026280, 0xb3476096
+ .word 0x3c9ab88b, 0x5ffe1cf5, 0x00000000, 0x00000000
+ .word 0x3ff81818, 0x18181818, 0x3ffa13a9, 0xcb996651
+ .word 0xbc9f9ab9, 0x0e4e85c3, 0x00000000, 0x00000000
+ .word 0x3ff81818, 0x18181818, 0x40027068, 0x21902e9a
+ .word 0x3c90ff4c, 0x20f541f6, 0x00000000, 0x00000000
+ .word 0x3ff7f405, 0xfd017f40, 0x3ffa2744, 0xce9674f5
+ .word 0xbc8b936c, 0x81e54daa, 0x00000000, 0x00000000
+ .word 0x3ff7f405, 0xfd017f40, 0x40027e45, 0x1bb944c3
+ .word 0x3c8e4a16, 0x42099ef0, 0x00000000, 0x00000000
+ .word 0x3ff7d05f, 0x417d05f4, 0x3ffa3ad1, 0x2a1da160
+ .word 0x3c951168, 0xf4be5984, 0x00000000, 0x00000000
+ .word 0x3ff7d05f, 0x417d05f4, 0x40028c17, 0xb9337834
+ .word 0xbc8af150, 0xa0e88972, 0x00000000, 0x00000000
+ .word 0x3ff7ad22, 0x08e0ecc3, 0x3ffa4e4e, 0xfeda34de
+ .word 0x3c6afbb4, 0xdbdadd0d, 0x00000000, 0x00000000
+ .word 0x3ff7ad22, 0x08e0ecc3, 0x400299e0, 0x11188575
+ .word 0xbc9a6169, 0x3fb250e5, 0x00000000, 0x00000000
+ .word 0x3ff78a4c, 0x8178a4c8, 0x3ffa61be, 0x6cfec997
+ .word 0xbc8c37ea, 0xb2bb5ca0, 0x00000000, 0x00000000
+ .word 0x3ff78a4c, 0x8178a4c8, 0x4002a79e, 0x3a2cd2e6
+ .word 0xbca5ddd4, 0x9cc9ad59, 0x00000000, 0x00000000
+ .word 0x3ff767dc, 0xe434a9b1, 0x3ffa751f, 0x9447b724
+ .word 0x3c82b909, 0x477e9ed1, 0x00000000, 0x00000000
+ .word 0x3ff767dc, 0xe434a9b1, 0x4002b552, 0x4ae1278e
+ .word 0xbca2f2a9, 0x8841b934, 0x00000000, 0x00000000
+ .word 0x3ff745d1, 0x745d1746, 0x3ffa8872, 0x93fd6f34
+ .word 0x3c768ef2, 0x4f198721, 0x00000000, 0x00000000
+ .word 0x3ff745d1, 0x745d1746, 0x4002c2fc, 0x595456a7
+ .word 0xbc996f60, 0xb0fc7e96, 0x00000000, 0x00000000
+ .word 0x3ff72428, 0x7f46debc, 0x3ffa9bb7, 0x8af6cabc
+ .word 0x3c8ba60d, 0xc999aba7, 0x00000000, 0x00000000
+ .word 0x3ff72428, 0x7f46debc, 0x4002d09c, 0x7b54e03e
+ .word 0x3c98c747, 0xfdeda6de, 0x00000000, 0x00000000
+ .word 0x3ff702e0, 0x5c0b8170, 0x3ffaaeee, 0x979b4838
+ .word 0xbc91f08a, 0xef9ef6c0, 0x00000000, 0x00000000
+ .word 0x3ff702e0, 0x5c0b8170, 0x4002de32, 0xc6628741
+ .word 0x3ca78746, 0xc499a4f7, 0x00000000, 0x00000000
+ .word 0x3ff6e1f7, 0x6b4337c7, 0x3ffac217, 0xd7e53b66
+ .word 0xbc64282a, 0xaa967e4f, 0x00000000, 0x00000000
+ .word 0x3ff6e1f7, 0x6b4337c7, 0x4002ebbf, 0x4fafdd4b
+ .word 0xbca78a73, 0xb72d5c41, 0x00000000, 0x00000000
+ .word 0x3ff6c16c, 0x16c16c17, 0x3ffad533, 0x6963eefc
+ .word 0xbc977c4a, 0x537dbdd2, 0x00000000, 0x00000000
+ .word 0x3ff6c16c, 0x16c16c17, 0x4002f942, 0x2c23c47e
+ .word 0xbc827c85, 0xf29db65d, 0x00000000, 0x00000000
+ .word 0x3ff6a13c, 0xd1537290, 0x3ffae841, 0x693db8b4
+ .word 0x3c90f773, 0xcd7a0713, 0x00000000, 0x00000000
+ .word 0x3ff6a13c, 0xd1537290, 0x400306bb, 0x705ae7c3
+ .word 0x3caf4933, 0x907af47a, 0x00000000, 0x00000000
+ .word 0x3ff68168, 0x16816817, 0x3ffafb41, 0xf432002e
+ .word 0xbc7ac94a, 0xfdfe8c5b, 0x00000000, 0x00000000
+ .word 0x3ff68168, 0x16816817, 0x4003142b, 0x30a929ab
+ .word 0x3c98dc01, 0x081a6c5c, 0x00000000, 0x00000000
+ .word 0x3ff661ec, 0x6a5122f9, 0x3ffb0e35, 0x269b38f5
+ .word 0xbc4f69a8, 0x05c3271a, 0x00000000, 0x00000000
+ .word 0x3ff661ec, 0x6a5122f9, 0x40032191, 0x811b0a41
+ .word 0xbc9ce3f0, 0xb38c0bf7, 0x00000000, 0x00000000
+ .word 0x3ff642c8, 0x590b2164, 0x3ffb211b, 0x1c70d023
+ .word 0x3c2e4c5e, 0x66eae2f0, 0x00000000, 0x00000000
+ .word 0x3ff642c8, 0x590b2164, 0x40032eee, 0x75770416
+ .word 0x3caed8e7, 0x730eaff2, 0x00000000, 0x00000000
+ .word 0x3ff623fa, 0x77016240, 0x3ffb33f3, 0xf1490def
+ .word 0xbc95894b, 0xcb02373b, 0x00000000, 0x00000000
+ .word 0x3ff623fa, 0x77016240, 0x40033c42, 0x213ee0c9
+ .word 0x3ca84c24, 0x4ba98124, 0x00000000, 0x00000000
+ .word 0x3ff60581, 0x60581606, 0x3ffb46bf, 0xc05aeb89
+ .word 0x3c9b1c7c, 0xc39adc9f, 0x00000000, 0x00000000
+ .word 0x3ff60581, 0x60581606, 0x4003498c, 0x97b10540
+ .word 0x3c734193, 0xbc8543b4, 0x00000000, 0x00000000
+ .word 0x3ff5e75b, 0xb8d015e7, 0x3ffb597e, 0xa47fdda3
+ .word 0xbc923cc8, 0x9d1e4635, 0x00000000, 0x00000000
+ .word 0x3ff5e75b, 0xb8d015e7, 0x400356cd, 0xebc9b5e2
+ .word 0x3c96dee1, 0x46bb1571, 0x00000000, 0x00000000
+ .word 0x3ff5c988, 0x2b931057, 0x3ffb6c30, 0xb83593e6
+ .word 0x3c8f4e3f, 0xd28d84bc, 0x00000000, 0x00000000
+ .word 0x3ff5c988, 0x2b931057, 0x40036406, 0x30445306
+ .word 0xbca78d86, 0x2327430a, 0x00000000, 0x00000000
+ .word 0x3ff5ac05, 0x6b015ac0, 0x3ffb7ed6, 0x159fadc8
+ .word 0xbc899bcf, 0xf04d134b, 0x00000000, 0x00000000
+ .word 0x3ff5ac05, 0x6b015ac0, 0x40037135, 0x779c8dcb
+ .word 0xbc8fe126, 0xce9778ae, 0x00000000, 0x00000000
+ .word 0x3ff58ed2, 0x308158ed, 0x3ffb916e, 0xd68964ec
+ .word 0x3c826a5d, 0x5dbaae29, 0x00000000, 0x00000000
+ .word 0x3ff58ed2, 0x308158ed, 0x40037e5b, 0xd40f95a1
+ .word 0x3cac6ff5, 0xeca5d122, 0x00000000, 0x00000000
+ .word 0x3ff571ed, 0x3c506b3a, 0x3ffba3fb, 0x14672d7c
+ .word 0xbc8117d3, 0x97dcefc9, 0x00000000, 0x00000000
+ .word 0x3ff571ed, 0x3c506b3a, 0x40038b79, 0x579d3eab
+ .word 0xbcac254f, 0xc0db598e, 0x00000000, 0x00000000
+ .word 0x3ff55555, 0x55555555, 0x3ffbb67a, 0xe8584caa
+ .word 0x3c9cec95, 0xd0b5c1e3, 0x00000000, 0x00000000
+ .word 0x3ff55555, 0x55555555, 0x4003988e, 0x1409212e
+ .word 0x3caf40c8, 0x6450c869, 0x00000000, 0x00000000
+ .word 0x3ff53909, 0x48f40feb, 0x3ffbc8ee, 0x6b2865b9
+ .word 0x3c9394eb, 0x90f645c8, 0x00000000, 0x00000000
+ .word 0x3ff53909, 0x48f40feb, 0x4003a59a, 0x1adbb257
+ .word 0x3ca6adce, 0x020a308d, 0x00000000, 0x00000000
+ .word 0x3ff51d07, 0xeae2f815, 0x3ffbdb55, 0xb550fdbc
+ .word 0x3c7365e9, 0x6aa5fae3, 0x00000000, 0x00000000
+ .word 0x3ff51d07, 0xeae2f815, 0x4003b29d, 0x7d635662
+ .word 0x3cac99b0, 0x5e282129, 0x00000000, 0x00000000
+ .word 0x3ff50150, 0x15015015, 0x3ffbedb0, 0xdefaf661
+ .word 0x3c91a627, 0xb279170d, 0x00000000, 0x00000000
+ .word 0x3ff50150, 0x15015015, 0x4003bf98, 0x4cb56c77
+ .word 0x3ca8f653, 0xbcc0c4a1, 0x00000000, 0x00000000
+ .word 0x3ff4e5e0, 0xa72f0539, 0x3ffc0000, 0x00000000
+ .word 0x00000000, 0x00000000, 0x00000000, 0x00000000
+ .word 0x3ff4e5e0, 0xa72f0539, 0x4003cc8a, 0x99af5453
+ .word 0xbc486364, 0x4f05f2be, 0x00000000, 0x00000000
+ .word 0x3ff4cab8, 0x8725af6e, 0x3ffc1243, 0x2fec0329
+ .word 0x3c96e0d7, 0x8dd23a7d, 0x00000000, 0x00000000
+ .word 0x3ff4cab8, 0x8725af6e, 0x4003d974, 0x74f76df2
+ .word 0x3c82e3c9, 0xfdbbbdc2, 0x00000000, 0x00000000
+ .word 0x3ff4afd6, 0xa052bf5b, 0x3ffc247a, 0x85fe81fa
+ .word 0x3c89d8ee, 0xf6854220, 0x00000000, 0x00000000
+ .word 0x3ff4afd6, 0xa052bf5b, 0x4003e655, 0xeefe1367
+ .word 0x3c80eb35, 0xbb532559, 0x00000000, 0x00000000
+ .word 0x3ff49539, 0xe3b2d067, 0x3ffc36a6, 0x192bf168
+ .word 0xbc9083d8, 0x1a423b11, 0x00000000, 0x00000000
+ .word 0x3ff49539, 0xe3b2d067, 0x4003f32f, 0x17fe8d04
+ .word 0xbc905d6c, 0x1c437de0, 0x00000000, 0x00000000
+ .word 0x3ff47ae1, 0x47ae147b, 0x3ffc48c6, 0x001f0ac0
+ .word 0xbc92d481, 0x189efd6b, 0x00000000, 0x00000000
+ .word 0x3ff47ae1, 0x47ae147b, 0x40040000, 0x00000000
+ .word 0x00000000, 0x00000000, 0x00000000, 0x00000000
+ .word 0x3ff460cb, 0xc7f5cf9a, 0x3ffc5ada, 0x513a1593
+ .word 0xbc7aaedd, 0x014f5f03, 0x00000000, 0x00000000
+ .word 0x3ff460cb, 0xc7f5cf9a, 0x40040cc8, 0xb6d657c2
+ .word 0xbc9c05ab, 0xf480ce19, 0x00000000, 0x00000000
+ .word 0x3ff446f8, 0x6562d9fb, 0x3ffc6ce3, 0x22982a3f
+ .word 0x3c891b2d, 0xf3e15f29, 0x00000000, 0x00000000
+ .word 0x3ff446f8, 0x6562d9fb, 0x40041989, 0x4c2329f0
+ .word 0x3c976037, 0x46da0ea6, 0x00000000, 0x00000000
+ .word 0x3ff42d66, 0x25d51f87, 0x3ffc7ee0, 0x8a0e6d4c
+ .word 0x3c991c54, 0xc53e75c8, 0x00000000, 0x00000000
+ .word 0x3ff42d66, 0x25d51f87, 0x40042641, 0xcf569572
+ .word 0xbcadf80b, 0x1442c029, 0x00000000, 0x00000000
+ .word 0x3ff41414, 0x14141414, 0x3ffc90d2, 0x9d2d43ce
+ .word 0xbc9edadb, 0x07f1137a, 0x00000000, 0x00000000
+ .word 0x3ff41414, 0x14141414, 0x400432f2, 0x4fb01c7a
+ .word 0x3ca38bfe, 0x0e012c1c, 0x00000000, 0x00000000
+ .word 0x3ff3fb01, 0x3fb013fb, 0x3ffca2b9, 0x714180f7
+ .word 0xbc81a63d, 0x6750c57c, 0x00000000, 0x00000000
+ .word 0x3ff3fb01, 0x3fb013fb, 0x40043f9a, 0xdc3f79ce
+ .word 0x3c66d2b1, 0x767ae30a, 0x00000000, 0x00000000
+ .word 0x3ff3e22c, 0xbce4a902, 0x3ffcb495, 0x1b558d17
+ .word 0x3c8fcbcb, 0x357f2308, 0x00000000, 0x00000000
+ .word 0x3ff3e22c, 0xbce4a902, 0x40044c3b, 0x83e57153
+ .word 0x3c98c853, 0xc6be5ee1, 0x00000000, 0x00000000
+ .word 0x3ff3c995, 0xa47babe7, 0x3ffcc665, 0xb0328622
+ .word 0xbc91baa4, 0xd369f814, 0x00000000, 0x00000000
+ .word 0x3ff3c995, 0xa47babe7, 0x400458d4, 0x55549c1a
+ .word 0x3ca02d72, 0x8d9a6054, 0x00000000, 0x00000000
+ .word 0x3ff3b13b, 0x13b13b14, 0x3ffcd82b, 0x446159f3
+ .word 0x3c983fb7, 0xb33cdfe8, 0x00000000, 0x00000000
+ .word 0x3ff3b13b, 0x13b13b14, 0x40046565, 0x5f122ff6
+ .word 0x3ca862c5, 0xd2f0ca4c, 0x00000000, 0x00000000
+ .word 0x3ff3991c, 0x2c187f63, 0x3ffce9e5, 0xec2bda80
+ .word 0xbc94ccf3, 0xd8e249ab, 0x00000000, 0x00000000
+ .word 0x3ff3991c, 0x2c187f63, 0x400471ee, 0xaf76c2c6
+ .word 0x3c975c62, 0xeff26e8e, 0x00000000, 0x00000000
+ .word 0x3ff38138, 0x13813814, 0x3ffcfb95, 0xbb9dcc0c
+ .word 0x3c92cea2, 0x0857ae03, 0x00000000, 0x00000000
+ .word 0x3ff38138, 0x13813814, 0x40047e70, 0x54af0989
+ .word 0x3c9d8c33, 0xc0054830, 0x00000000, 0x00000000
+ .word 0x3ff3698d, 0xf3de0748, 0x3ffd0d3a, 0xc685eda4
+ .word 0x3c94115a, 0x0ff4cf9e, 0x00000000, 0x00000000
+ .word 0x3ff3698d, 0xf3de0748, 0x40048aea, 0x5cbc935f
+ .word 0xbca8cb00, 0x12d14ff5, 0x00000000, 0x00000000
+ .word 0x3ff3521c, 0xfb2b78c1, 0x3ffd1ed5, 0x2076fbe9
+ .word 0x3c8f48a8, 0x6b72875f, 0x00000000, 0x00000000
+ .word 0x3ff3521c, 0xfb2b78c1, 0x4004975c, 0xd5768088
+ .word 0xbca1731e, 0xbc02f748, 0x00000000, 0x00000000
+ .word 0x3ff33ae4, 0x5b57bcb2, 0x3ffd3064, 0xdcc8ae67
+ .word 0x3c93480e, 0x805158ba, 0x00000000, 0x00000000
+ .word 0x3ff33ae4, 0x5b57bcb2, 0x4004a3c7, 0xcc8a358a
+ .word 0xbc9d8f7f, 0xd2726ffa, 0x00000000, 0x00000000
+ .word 0x3ff323e3, 0x4a2b10bf, 0x3ffd41ea, 0x0e98af91
+ .word 0x3c824640, 0x0309962f, 0x00000000, 0x00000000
+ .word 0x3ff323e3, 0x4a2b10bf, 0x4004b02b, 0x4f7c0a88
+ .word 0xbcaf71e1, 0xf6cafde2, 0x00000000, 0x00000000
+ .word 0x3ff30d19, 0x0130d190, 0x3ffd5364, 0xc8cb8f86
+ .word 0x3c8ad003, 0xc00630e1, 0x00000000, 0x00000000
+ .word 0x3ff30d19, 0x0130d190, 0x4004bc87, 0x6ba7f6ec
+ .word 0x3c9c1edb, 0x2be943b8, 0x00000000, 0x00000000
+ .word 0x3ff2f684, 0xbda12f68, 0x3ffd64d5, 0x1e0db1c6
+ .word 0xbc911ed3, 0x6986d362, 0x00000000, 0x00000000
+ .word 0x3ff2f684, 0xbda12f68, 0x4004c8dc, 0x2e423980
+ .word 0xbc949d1f, 0x46ef5d2c, 0x00000000, 0x00000000
+ .word 0x3ff2e025, 0xc04b8097, 0x3ffd763b, 0x20d435ef
+ .word 0x3c9d6780, 0xf76cb258, 0x00000000, 0x00000000
+ .word 0x3ff2e025, 0xc04b8097, 0x4004d529, 0xa457fcfc
+ .word 0xbca1404a, 0x46484e3d, 0x00000000, 0x00000000
+ .word 0x3ff2c9fb, 0x4d812ca0, 0x3ffd8796, 0xe35ddbb2
+ .word 0x3c83fdd9, 0x1aeb637a, 0x00000000, 0x00000000
+ .word 0x3ff2c9fb, 0x4d812ca0, 0x4004e16f, 0xdacff937
+ .word 0xbca1deb9, 0xd3815ad2, 0x00000000, 0x00000000
+ .word 0x3ff2b404, 0xad012b40, 0x3ffd98e8, 0x77b3e207
+ .word 0xbc48c301, 0xee02dee8, 0x00000000, 0x00000000
+ .word 0x3ff2b404, 0xad012b40, 0x4004edae, 0xde6b10fe
+ .word 0x3ca99709, 0x4a91a780, 0x00000000, 0x00000000
+ .word 0x3ff29e41, 0x29e4129e, 0x3ffdaa2f, 0xefaae1d8
+ .word 0xbc63fe0e, 0x03f44594, 0x00000000, 0x00000000
+ .word 0x3ff29e41, 0x29e4129e, 0x4004f9e6, 0xbbc4ecb3
+ .word 0x3c6ce5a6, 0x018493f1, 0x00000000, 0x00000000
+ .word 0x3ff288b0, 0x1288b013, 0x3ffdbb6d, 0x5ce3a42f
+ .word 0xbc922c27, 0xf71c8337, 0x00000000, 0x00000000
+ .word 0x3ff288b0, 0x1288b013, 0x40050617, 0x7f5491bb
+ .word 0xbc9e591e, 0x7b2a6d1a, 0x00000000, 0x00000000
+ .word 0x3ff27350, 0xb8812735, 0x3ffdcca0, 0xd0cbf408
+ .word 0x3c7a6d16, 0x2310db57, 0x00000000, 0x00000000
+ .word 0x3ff27350, 0xb8812735, 0x40051241, 0x356cf6e0
+ .word 0x3ca37dc2, 0x60e8bc2d, 0x00000000, 0x00000000
+ .word 0x3ff25e22, 0x708092f1, 0x3ffdddca, 0x5c9f6be8
+ .word 0x3c818520, 0xf0a3f809, 0x00000000, 0x00000000
+ .word 0x3ff25e22, 0x708092f1, 0x40051e63, 0xea3d95b0
+ .word 0x3caecf78, 0x2e88d5ce, 0x00000000, 0x00000000
+ .word 0x3ff24924, 0x92492492, 0x3ffdeeea, 0x11683f49
+ .word 0x3c802aae, 0x4bfa7c27, 0x00000000, 0x00000000
+ .word 0x3ff24924, 0x92492492, 0x40052a7f, 0xa9d2f8ea
+ .word 0xbca21c62, 0xb033c079, 0x00000000, 0x00000000
+ .word 0x3ff23456, 0x789abcdf, 0x3ffe0000, 0x00000000
+ .word 0x00000000, 0x00000000, 0x00000000, 0x00000000
+ .word 0x3ff23456, 0x789abcdf, 0x40053694, 0x80174810
+ .word 0xbc9c3ec1, 0xa4ee7c21, 0x00000000, 0x00000000
+ .word 0x3ff21fb7, 0x8121fb78, 0x3ffe110c, 0x39105faf
+ .word 0x3c776161, 0x4c513964, 0x00000000, 0x00000000
+ .word 0x3ff21fb7, 0x8121fb78, 0x400542a2, 0x78d2d036
+ .word 0xbca495c2, 0x45254df4, 0x00000000, 0x00000000
+ .word 0x3ff20b47, 0x0c67c0d9, 0x3ffe220e, 0xcd13ed60
+ .word 0xbc729f01, 0xf18c9dc9, 0x00000000, 0x00000000
+ .word 0x3ff20b47, 0x0c67c0d9, 0x40054ea9, 0x9fac8a0f
+ .word 0x3c80cfbb, 0x19353b3d, 0x00000000, 0x00000000
+ .word 0x3ff1f704, 0x7dc11f70, 0x3ffe3307, 0xcc56cf5c
+ .word 0xbc81f04e, 0xc3189131, 0x00000000, 0x00000000
+ .word 0x3ff1f704, 0x7dc11f70, 0x40055aaa, 0x002a9d5a
+ .word 0xbc4bf504, 0x76241f94, 0x00000000, 0x00000000
+ .word 0x3ff1e2ef, 0x3b3fb874, 0x3ffe43f7, 0x46f7795b
+ .word 0xbc931e7f, 0x8af68f8c, 0x00000000, 0x00000000
+ .word 0x3ff1e2ef, 0x3b3fb874, 0x400566a3, 0xa5b2e1b1
+ .word 0x3caa1fd2, 0x8cc92e33, 0x00000000, 0x00000000
+ .word 0x3ff1cf06, 0xada2811d, 0x3ffe54dd, 0x4ce75f1e
+ .word 0xbc811b19, 0x5dfc62e5, 0x00000000, 0x00000000
+ .word 0x3ff1cf06, 0xada2811d, 0x40057296, 0x9b8b5cd8
+ .word 0x3ca30cbf, 0x1c53312e, 0x00000000, 0x00000000
+ .word 0x3ff1bb4a, 0x4046ed29, 0x3ffe65b9, 0xedeba38e
+ .word 0xbc7bb732, 0x51e8c364, 0x00000000, 0x00000000
+ .word 0x3ff1bb4a, 0x4046ed29, 0x40057e82, 0xecdabe8d
+ .word 0xbc7c2aed, 0xf3c4c4bd, 0x00000000, 0x00000000
+ .word 0x3ff1a7b9, 0x611a7b96, 0x3ffe768d, 0x399dc470
+ .word 0xbc9a8c81, 0x3405c01c, 0x00000000, 0x00000000
+ .word 0x3ff1a7b9, 0x611a7b96, 0x40058a68, 0xa4a8d9f3
+ .word 0x3ca50798, 0xe67012d9, 0x00000000, 0x00000000
+ .word 0x3ff19453, 0x808ca29c, 0x3ffe8757, 0x3f6c42c5
+ .word 0x3c9dbf9c, 0xf7bbcda3, 0x00000000, 0x00000000
+ .word 0x3ff19453, 0x808ca29c, 0x40059647, 0xcddf1ca5
+ .word 0x3ca14a95, 0xf35dea0b, 0x00000000, 0x00000000
+ .word 0x3ff18118, 0x11811812, 0x3ffe9818, 0x0e9b47f2
+ .word 0xbc9b6bd7, 0x4396d08e, 0x00000000, 0x00000000
+ .word 0x3ff18118, 0x11811812, 0x4005a220, 0x73490377
+ .word 0xbcadd036, 0x39925812, 0x00000000, 0x00000000
+ .word 0x3ff16e06, 0x89427379, 0x3ffea8cf, 0xb64547ab
+ .word 0x3c8721b2, 0x6374e19f, 0x00000000, 0x00000000
+ .word 0x3ff16e06, 0x89427379, 0x4005adf2, 0x9f948cfb
+ .word 0xbca42520, 0xf7716fa6, 0x00000000, 0x00000000
+ .word 0x3ff15b1e, 0x5f75270d, 0x3ffeb97e, 0x455b9edb
+ .word 0x3c999b45, 0x40857883, 0x00000000, 0x00000000
+ .word 0x3ff15b1e, 0x5f75270d, 0x4005b9be, 0x5d52a9da
+ .word 0x3c9098cd, 0x1b3af777, 0x00000000, 0x00000000
+ .word 0x3ff1485f, 0x0e0acd3b, 0x3ffeca23, 0xcaa72f73
+ .word 0x3c7e3ed5, 0x29679959, 0x00000000, 0x00000000
+ .word 0x3ff1485f, 0x0e0acd3b, 0x4005c583, 0xb6f7ab03
+ .word 0x3ca963bc, 0x9d795b51, 0x00000000, 0x00000000
+ .word 0x3ff135c8, 0x1135c811, 0x3ffedac0, 0x54c8f94c
+ .word 0x3c90b5c1, 0x15a56207, 0x00000000, 0x00000000
+ .word 0x3ff135c8, 0x1135c811, 0x4005d142, 0xb6dbadc5
+ .word 0x3ca6f1f5, 0x5323d116, 0x00000000, 0x00000000
+ .word 0x3ff12358, 0xe75d3033, 0x3ffeeb53, 0xf23ab028
+ .word 0xbc8617e4, 0xb5384f5d, 0x00000000, 0x00000000
+ .word 0x3ff12358, 0xe75d3033, 0x4005dcfb, 0x673b05df
+ .word 0xbca099df, 0xc321634f, 0x00000000, 0x00000000
+ .word 0x3ff11111, 0x11111111, 0x3ffefbde, 0xb14f4eda
+ .word 0xbc93a145, 0xfe1be078, 0x00000000, 0x00000000
+ .word 0x3ff11111, 0x11111111, 0x4005e8ad, 0xd236a58f
+ .word 0xbc7ef8c7, 0xc0d1fec6, 0x00000000, 0x00000000
+ .word 0x3ff0fef0, 0x10fef011, 0x3fff0c60, 0xa033a7b3
+ .word 0xbc91b0fc, 0x15cd89c6, 0x00000000, 0x00000000
+ .word 0x3ff0fef0, 0x10fef011, 0x4005f45a, 0x01d483b4
+ .word 0xbc94a237, 0xdc0fa105, 0x00000000, 0x00000000
+ .word 0x3ff0ecf5, 0x6be69c90, 0x3fff1cd9, 0xcceef239
+ .word 0x3c91afd8, 0x64eab60a, 0x00000000, 0x00000000
+ .word 0x3ff0ecf5, 0x6be69c90, 0x40060000, 0x00000000
+ .word 0x00000000, 0x00000000, 0x00000000, 0x00000000
+ .word 0x3ff0db20, 0xa88f4696, 0x3fff2d4a, 0x45635640
+ .word 0xbc8eebae, 0xea670bc2, 0x00000000, 0x00000000
+ .word 0x3ff0db20, 0xa88f4696, 0x40060b9f, 0xd68a4554
+ .word 0x3ca328e1, 0x70dae176, 0x00000000, 0x00000000
+ .word 0x3ff0c971, 0x4fbcda3b, 0x3fff3db2, 0x174e7468
+ .word 0x3c9e1513, 0x2d6ac52a, 0x00000000, 0x00000000
+ .word 0x3ff0c971, 0x4fbcda3b, 0x40061739, 0x8f2aaa48
+ .word 0xbc9b672b, 0xba260735, 0x00000000, 0x00000000
+ .word 0x3ff0b7e6, 0xec259dc8, 0x3fff4e11, 0x5049ec26
+ .word 0xbc9b6656, 0xb6bd5d76, 0x00000000, 0x00000000
+ .word 0x3ff0b7e6, 0xec259dc8, 0x400622cd, 0x337f0fe8
+ .word 0x3c9fe207, 0x3279559f, 0x00000000, 0x00000000
+ .word 0x3ff0a681, 0x0a6810a7, 0x3fff5e67, 0xfdcbdf44
+ .word 0xbc98af06, 0x1849d6fc, 0x00000000, 0x00000000
+ .word 0x3ff0a681, 0x0a6810a7, 0x40062e5a, 0xcd0c3ebe
+ .word 0xbca2c50e, 0x2092203a, 0x00000000, 0x00000000
+ .word 0x3ff0953f, 0x39010954, 0x3fff6eb6, 0x2d27730d
+ .word 0xbc9401d9, 0x5ca1ce34, 0x00000000, 0x00000000
+ .word 0x3ff0953f, 0x39010954, 0x400639e2, 0x653e421b
+ .word 0xbc9f75e0, 0x5835e4b9, 0x00000000, 0x00000000
+ .word 0x3ff08421, 0x08421084, 0x3fff7efb, 0xeb8d4f12
+ .word 0xbc7e84e8, 0xa6ff3256, 0x00000000, 0x00000000
+ .word 0x3ff08421, 0x08421084, 0x40064564, 0x0568c1c3
+ .word 0x3cad1778, 0x7e4c8970, 0x00000000, 0x00000000
+ .word 0x3ff07326, 0x0a47f7c6, 0x3fff8f39, 0x460c19a8
+ .word 0x3c989b4e, 0x16ee9aaf, 0x00000000, 0x00000000
+ .word 0x3ff07326, 0x0a47f7c6, 0x400650df, 0xb6c759f4
+ .word 0x3c99063c, 0x91db4c77, 0x00000000, 0x00000000
+ .word 0x3ff0624d, 0xd2f1a9fc, 0x3fff9f6e, 0x4990f227
+ .word 0x3c8b42e5, 0xb5d1e808, 0x00000000, 0x00000000
+ .word 0x3ff0624d, 0xd2f1a9fc, 0x40065c55, 0x827df1d2
+ .word 0xbca3923d, 0xf03e1e2f, 0x00000000, 0x00000000
+ .word 0x3ff05197, 0xf7d73404, 0x3fffaf9b, 0x02e7e8f2
+ .word 0x3c897a76, 0x8f34e1c2, 0x00000000, 0x00000000
+ .word 0x3ff05197, 0xf7d73404, 0x400667c5, 0x7199104b
+ .word 0x3c875b89, 0x6f332e70, 0x00000000, 0x00000000
+ .word 0x3ff04104, 0x10410410, 0x3fffbfbf, 0x7ebc755f
+ .word 0xbc9b2a94, 0x084da0b6, 0x00000000, 0x00000000
+ .word 0x3ff04104, 0x10410410, 0x4006732f, 0x8d0e2f77
+ .word 0xbc93dffd, 0x470422e3, 0x00000000, 0x00000000
+ .word 0x3ff03091, 0xb51f5e1a, 0x3fffcfdb, 0xc999e97d
+ .word 0x3c82be17, 0xecdd3bbc, 0x00000000, 0x00000000
+ .word 0x3ff03091, 0xb51f5e1a, 0x40067e93, 0xddbc0e73
+ .word 0xbc86eb9f, 0x32ac1a5c, 0x00000000, 0x00000000
+ .word 0x3ff02040, 0x81020408, 0x3fffdfef, 0xefebe3d6
+ .word 0xbc909afc, 0xfc7c1f3b, 0x00000000, 0x00000000
+ .word 0x3ff02040, 0x81020408, 0x400689f2, 0x6c6b01d0
+ .word 0x3cae816f, 0x9d2a1032, 0x00000000, 0x00000000
+ .word 0x3ff01010, 0x10101010, 0x3fffeffb, 0xfdfebf1f
+ .word 0x3c95dee5, 0x1994f18b, 0x00000000, 0x00000000
+ .word 0x3ff01010, 0x10101010, 0x4006954b, 0x41cd4293
+ .word 0x3ca3d5bc, 0xcc443076, 0x00000000, 0x00000000
+ .word 0x3ff00000, 0x00000000, 0x40000000, 0x00000000
+ .word 0x00000000, 0x00000000, 0x00000000, 0x00000000
+ .word 0x3ff00000, 0x00000000, 0x4006a09e, 0x667f3bcd
+ .word 0xbcabdd34, 0x13b26456, 0x00000000, 0x00000000
+
+#define A5 %f32
+#define A4 %f30
+#define A3 %f28
+#define A2 %f26
+#define A1 %f56
+
+#define DC0 %f8
+#define DC2 %f6
+#define DC3 %f4
+
+#define counter %l3
+#define TBL %l5
+#define stridex %l6
+#define stridey %l7
+
+#define _0x00001ff8 %i0
+#define _0x7ff00000 %o0
+#define _0x00100000 %o2
+
+#define tmp_counter STACK_BIAS-0x40
+#define tmp_px STACK_BIAS-0x38
+#define tmp0 STACK_BIAS-0x30
+#define tmp1 STACK_BIAS-0x28
+#define tmp2 STACK_BIAS-0x20
+#define tmp3 STACK_BIAS-0x18
+#define tmp4 STACK_BIAS-0x10
+#define tmp5 STACK_BIAS-0x08
+
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x40
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! !!!!! algorithm !!!!!
+! ((float*)&res)[0] = ((float*)px)[0];
+! ((float*)&res)[1] = ((float*)px)[1];
+! hx = *(int*)px;
+! px += stridex;
+!
+! if ( hx >= 0x7ff00000 )
+! {
+! res = sqrt(res);
+! ((float*)py)[0] = ((float*)&res)[0];
+! ((float*)py)[1] = ((float*)&res)[1];
+! py += stridey;
+! goto next;
+! }
+! if ( hx < 0x00100000 )
+! {
+! res = sqrt(res);
+! ((float*)py)[0] = ((float*)&res)[0];
+! ((float*)py)[1] = ((float*)&res)[1];
+! py += stridey;
+! goto next;
+! }
+!
+! sqrt_exp = hx >> 21;
+! sqrt_exp -= 512;
+! sqrt_exp <<= 52;
+! dsqrt_exp = *(double*)&sqrt_exp;
+! bit = hx >> 15;
+! bit &= 32;
+! ind0 = hx >> 7;
+! ind0 &= 0x1ff8;
+! ind0 += 32;
+! ind0 &= -64;
+! ind1 = ind0;
+! ind1 += bit;
+!
+! res = vis_fand(res,DC0); /* DC0 = vis_to_double(0x000fffff, 0xffffffff); */
+! res = vis_for(res,A1); /* A1 = vis_to_double(0x3fe00000, 0x00000000); */
+! res_c = vis_fpadd32(res,DC2); /* DC2 = vis_to_double(0x00001000, 0x00000000); */
+! res_c = vis_fand(res_c,DC3); /* DC3 = vis_to_double(0x7fffe000, 0x00000000); */
+!
+! pind = (char*)TBL + ind1;
+! dexp_hi = ((double*)pind)[1];
+! dexp_lo = ((double*)pind)[2];
+!
+! dtmp0 = ((double*)pind)[0];
+! xx = (res - res_c);
+! xx *= dtmp0;
+!
+! res = A5 * xx;
+! res += A4;
+! res *= xx;
+! res += A3;
+! res *= xx;
+! res += A2;
+! res *= xx;
+! res += A1;
+! res *= xx;
+!
+! res = dexp_hi * res;
+! res += dexp_lo;
+! res += dexp_hi;
+!
+! dtmp0 = vis_fpadd32(dsqrt_exp,res);
+! ((float*)py)[0] = ((float*)&dtmp0)[0];
+! ((float*)py)[1] = ((float*)&dtmp0)[1];
+! py += stridey;
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ ENTRY(__vsqrt)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,.CONST_TBL,l5)
+ wr %g0,0x82,%asi
+
+ ldd [TBL],A1
+ sll %i2,3,stridex
+ or %g0,%i3,%o4
+
+ ldd [TBL+8],A2
+ sll %i4,3,stridey
+ or %g0,0x7ff,%o0
+
+ ldd [TBL+16],A3
+ sll %o0,20,_0x7ff00000
+ or %g0,0x001,%o2
+
+ ldd [TBL+24],A4
+ sll %o2,20,_0x00100000
+
+ ldd [TBL+32],A5
+ ldd [TBL+40],DC0
+ ldd [TBL+48],DC2
+ ldd [TBL+56],DC3
+
+ add TBL,64,TBL
+ add %g0,1023,%o5
+ st %i0,[%fp+tmp_counter]
+
+ sll %o5,3,_0x00001ff8
+ stx %i1,[%fp+tmp_px]
+
+.begin:
+ ld [%fp+tmp_counter],counter
+ ldx [%fp+tmp_px],%l2
+ st %g0,[%fp+tmp_counter]
+.begin1:
+ cmp counter,0
+ ble,pn %icc,.exit
+ lda [%l2]%asi,%o5 ! (5_1) hx = *(int*)px;
+
+ lda [%l2]%asi,%f10 ! (5_0) ((float*)&res)[0] = ((float*)px)[0];
+
+ lda [%l2+4]%asi,%f11 ! (5_0) ((float*)&res)[1] = ((float*)px)[1];
+
+ cmp %o5,_0x7ff00000 ! (5_1) hx ? 0x7ff00000
+ bge,pn %icc,.spec ! (5_1) if ( hx >= 0x7ff00000 )
+ nop
+
+ cmp %o5,_0x00100000 ! (5_1) hx ? 0x00100000
+ bl,pn %icc,.spec ! (5_1) if ( hx < 0x00100000 )
+ nop
+
+ add %l2,stridex,%l2 ! px += stridex
+ fand %f10,DC0,%f50 ! (5_1) res = vis_fand(res,DC0);
+
+ for %f50,A1,%f40 ! (5_1) res = vis_for(res,A1);
+ sra %o5,21,%l1 ! (5_1) sqrt_exp = hx >> 21;
+ sra %o5,15,%i1 ! (5_1) bit = hx >> 15;
+
+ sra %o5,7,%o1 ! (5_1) ind0 = hx >> 7;
+ sub %l1,512,%o3 ! (5_1) sqrt_exp -= 512;
+
+ and %o1,_0x00001ff8,%o1 ! (5_1) ind0 &= 0x1ff8;
+ lda [%l2]%asi,%f10 ! (0_0) ((float*)&res)[0] = ((float*)px)[0];
+
+ add %o1,32,%o1 ! (5_1) ind0 += 32;
+ lda [%l2+4]%asi,%f11 ! (0_0) ((float*)&res)[1] = ((float*)px)[1];
+
+ and %i1,32,%i4 ! (5_1) bit &= 32;
+ and %o1,-64,%o1 ! (5_1) ind0 &= -8;
+
+ sll %o1,0,%o7 ! (5_1) ind1 = ind0;
+
+ sllx %o3,52,%o3 ! (5_1) sqrt_exp <<= 52;
+ add %o7,%i4,%l0 ! (5_1) ind1 += bit;
+ lda [%l2]%asi,%o5 ! (0_0) hx = *(int*)px;
+
+ stx %o3,[%fp+tmp0] ! (5_1) dsqrt_exp = *(double*)&sqrt_exp;
+ fand %f10,DC0,%f50 ! (0_0) res = vis_fand(res,DC0);
+
+ add %l2,stridex,%l2 ! px += stridex
+ fpadd32 %f40,DC2,%f54 ! (5_1) res_c = vis_fpadd32(res,DC2);
+
+ add %l0,TBL,%o1 ! (5_1) pind = (char*)TBL + ind1
+
+ cmp %o5,_0x7ff00000 ! (0_0) hx ? 0x7ff00000
+ bge,pn %icc,.update0 ! (0_0) if ( hx >= 0x7ff00000 )
+ for %f50,A1,%f42 ! (0_0) res = vis_for(res,A1);
+.cont0:
+ sra %o5,21,%l1 ! (0_0) sqrt_exp = hx >> 21;
+ sra %o5,15,%i2 ! (0_0) bit = hx >> 15;
+ ldd [%o1],%f50 ! (5_1) dtmp0 = ((double*)pind)[0];
+
+ sra %o5,7,%o1 ! (0_0) ind0 = hx >> 7;
+ sub %l1,512,%o3 ! (0_0) sqrt_exp -= 512;
+ fand %f54,DC3,%f54 ! (5_1) res_c = vis_fand(res_c,DC3);
+
+ and %o1,_0x00001ff8,%o1 ! (0_0) ind0 &= 0x1ff8;
+ lda [%l2]%asi,%f10 ! (1_0) ((float*)&res)[0] = ((float*)px)[0];
+
+ add %o1,32,%o1 ! (0_0) ind0 += 32;
+ lda [%l2+4]%asi,%f11 ! (1_0) ((float*)&res)[1] = ((float*)px)[1];
+
+ and %i2,32,%i4 ! (0_0) bit &= 32;
+ and %o1,-64,%o1 ! (0_0) ind0 &= -8;
+ fsubd %f40,%f54,%f40 ! (5_1) xx = (res - res_c);
+
+ sll %o1,0,%o7 ! (0_0) ind1 = ind0;
+
+ cmp %o5,_0x00100000 ! (0_0) hx ? 0x00100000
+ bl,pn %icc,.update1 ! (0_0) if ( hx < 0x00100000 )
+ lda [%l2]%asi,%o5 ! (1_0) hx = *(int*)px;
+.cont1:
+ sllx %o3,52,%o3 ! (0_0) sqrt_exp <<= 52;
+ add %o7,%i4,%i1 ! (0_0) ind1 += bit;
+
+ fmuld %f40,%f50,%f40 ! (5_1) xx *= dtmp0;
+ stx %o3,[%fp+tmp1] ! (0_0) dsqrt_exp = *(double*)&sqrt_exp;
+ fand %f10,DC0,%f50 ! (1_0) res = vis_fand(res,DC0);
+
+ add %l2,stridex,%l2 ! px += stridex
+ fpadd32 %f42,DC2,%f54 ! (0_0) res_c = vis_fpadd32(res,DC2);
+
+ add %i1,TBL,%o1 ! (0_0) pind = (char*)TBL + ind1
+
+ cmp %o5,_0x7ff00000 ! (1_0) hx ? 0x7ff00000
+ bge,pn %icc,.update2 ! (1_0) if ( hx >= 0x7ff00000 )
+ for %f50,A1,%f14 ! (1_0) res = vis_for(res,A1);
+.cont2:
+ sra %o5,21,%l1 ! (1_0) sqrt_exp = hx >> 21;
+ sra %o5,15,%g5 ! (1_0) bit = hx >> 15;
+ ldd [%o1],%f50 ! (0_0) dtmp0 = ((double*)pind)[0];
+
+ fmuld A5,%f40,%f52 ! (5_1) res = A5 * xx;
+ sra %o5,7,%o1 ! (1_0) ind0 = hx >> 7;
+ sub %l1,512,%o3 ! (1_0) sqrt_exp -= 512;
+ fand %f54,DC3,%f54 ! (0_0) res_c = vis_fand(res_c,DC3);
+
+ and %o1,_0x00001ff8,%o1 ! (1_0) ind0 &= 0x1ff8;
+ lda [%l2]%asi,%f10 ! (2_0) ((float*)&res)[0] = ((float*)px)[0];
+
+ add %o1,32,%o1 ! (1_0) ind0 += 32;
+ lda [%l2+4]%asi,%f11 ! (2_0) ((float*)&res)[1] = ((float*)px)[1];
+
+ and %g5,32,%i4 ! (1_0) bit &= 32;
+ and %o1,-64,%o1 ! (1_0) ind0 &= -8;
+ fsubd %f42,%f54,%f42 ! (0_0) xx = (res - res_c);
+
+ sll %o1,0,%o7 ! (1_0) ind1 = ind0;
+ faddd %f52,A4,%f54 ! (5_1) res += A4;
+
+ cmp %o5,_0x00100000 ! (1_0) hx ? 0x00100000
+ bl,pn %icc,.update3 ! (1_0) if ( hx < 0x00100000 )
+ lda [%l2]%asi,%o5 ! (2_0) hx = *(int*)px;
+.cont3:
+ sllx %o3,52,%o3 ! (1_0) sqrt_exp <<= 52;
+ add %o7,%i4,%i2 ! (1_0) ind1 += bit;
+
+ fmuld %f42,%f50,%f42 ! (0_0) xx *= dtmp0;
+ stx %o3,[%fp+tmp2] ! (1_0) dsqrt_exp = *(double*)&sqrt_exp;
+ fand %f10,DC0,%f50 ! (2_0) res = vis_fand(res,DC0);
+
+ fmuld %f54,%f40,%f34 ! (5_1) res *= xx;
+ fpadd32 %f14,DC2,%f54 ! (1_0) res_c = vis_fpadd32(res,DC2);
+ add %l2,stridex,%l2 ! px += stridex
+
+ add %i2,TBL,%o1 ! (1_0) pind = (char*)TBL + ind1
+
+ cmp %o5,_0x7ff00000 ! (2_0) hx ? 0x7ff00000
+ bge,pn %icc,.update4 ! (2_0) if ( hx >= 0x7ff00000 )
+ for %f50,A1,%f18 ! (2_0) res = vis_for(res,A1);
+.cont4:
+ sra %o5,21,%l1 ! (2_0) sqrt_exp = hx >> 21;
+ sra %o5,15,%g1 ! (2_0) bit = hx >> 15;
+ ldd [%o1],%f50 ! (1_0) dtmp0 = ((double*)pind)[0];
+
+ fmuld A5,%f42,%f52 ! (0_0) res = A5 * xx;
+ sra %o5,7,%o1 ! (2_0) ind0 = hx >> 7;
+ sub %l1,512,%o3 ! (2_0) sqrt_exp -= 512;
+ fand %f54,DC3,%f54 ! (1_0) res_c = vis_fand(res_c,DC3);
+
+ and %o1,_0x00001ff8,%o1 ! (2_0) ind0 &= 0x1ff8;
+ lda [%l2]%asi,%f10 ! (3_0) ((float*)&res)[0] = ((float*)px)[0];
+ faddd %f34,A3,%f62 ! (5_1) res += A3;
+
+ add %o1,32,%o1 ! (2_0) ind0 += 32;
+ lda [%l2+4]%asi,%f11 ! (3_0) ((float*)&res)[1] = ((float*)px)[1];
+
+ and %g1,32,%i4 ! (2_0) bit &= 32;
+ and %o1,-64,%o1 ! (2_0) ind0 &= -8;
+ fsubd %f14,%f54,%f14 ! (1_0) xx = (res - res_c);
+
+ sll %o1,0,%o7 ! (2_0) ind1 = ind0;
+ faddd %f52,A4,%f54 ! (0_0) res += A4;
+
+ fmuld %f62,%f40,%f52 ! (5_1) res *= xx;
+ cmp %o5,_0x00100000 ! (2_0) hx ? 0x00100000
+ bl,pn %icc,.update5 ! (2_0) if ( hx < 0x00100000 )
+ lda [%l2]%asi,%o5 ! (3_0) hx = *(int*)px;
+.cont5:
+ sllx %o3,52,%o3 ! (2_0) sqrt_exp <<= 52;
+ add %o7,%i4,%g5 ! (2_0) ind1 += bit;
+
+ fmuld %f14,%f50,%f14 ! (1_0) xx *= dtmp0;
+ stx %o3,[%fp+tmp3] ! (2_0) dsqrt_exp = *(double*)&sqrt_exp;
+ fand %f10,DC0,%f50 ! (3_0) res = vis_fand(res,DC0);
+
+ fmuld %f54,%f42,%f34 ! (0_0) res *= xx;
+ fpadd32 %f18,DC2,%f54 ! (2_0) res_c = vis_fpadd32(res,DC2);
+ add %l2,stridex,%l2 ! px += stridex
+
+ add %g5,TBL,%o1 ! (2_0) pind = (char*)TBL + ind1
+ faddd %f52,A2,%f20 ! (5_1) res += A2;
+
+ cmp %o5,_0x7ff00000 ! (3_0) hx ? 0x7ff00000
+ bge,pn %icc,.update6 ! (3_0) if ( hx >= 0x7ff00000 )
+ for %f50,A1,%f44 ! (3_0) res = vis_for(res,A1);
+.cont6:
+ sra %o5,21,%l1 ! (3_0) sqrt_exp = hx >> 21;
+ sra %o5,15,%i3 ! (3_0) bit = hx >> 15;
+ ldd [%o1],%f50 ! (2_0) dtmp0 = ((double*)pind)[0];
+
+ fmuld A5,%f14,%f52 ! (1_0) res = A5 * xx;
+ sra %o5,7,%o1 ! (3_0) ind0 = hx >> 7;
+ sub %l1,512,%o3 ! (3_0) sqrt_exp -= 512;
+ fand %f54,DC3,%f54 ! (2_0) res_c = vis_fand(res_c,DC3);
+
+ fmuld %f20,%f40,%f20 ! (5_1) res *= xx;
+ and %o1,_0x00001ff8,%o1 ! (3_0) ind0 &= 0x1ff8;
+ lda [%l2]%asi,%f10 ! (4_0) ((float*)&res)[0] = ((float*)px)[0];
+ faddd %f34,A3,%f62 ! (0_0) res += A3;
+
+ add %o1,32,%o1 ! (3_0) ind0 += 32;
+ lda [%l2+4]%asi,%f11 ! (4_0) ((float*)&res)[1] = ((float*)px)[1];
+
+ and %i3,32,%i4 ! (3_0) bit &= 32;
+ and %o1,-64,%o1 ! (3_0) ind0 &= -8;
+ fsubd %f18,%f54,%f18 ! (2_0) xx = (res - res_c);
+
+ sll %o1,0,%o7 ! (3_0) ind1 = ind0;
+ faddd %f52,A4,%f54 ! (1_0) res += A4;
+
+ fmuld %f62,%f42,%f52 ! (0_0) res *= xx;
+ cmp %o5,_0x00100000 ! (3_0) hx ? 0x00100000
+ bl,pn %icc,.update7 ! (3_0) if ( hx < 0x00100000 )
+ faddd %f20,A1,%f12 ! (5_1) res += A1;
+.cont7:
+ lda [%l2]%asi,%o5 ! (4_0) hx = *(int*)px;
+ sllx %o3,52,%o3 ! (3_0) sqrt_exp <<= 52;
+ add %o7,%i4,%g1 ! (3_0) ind1 += bit;
+
+ fmuld %f18,%f50,%f18 ! (2_0) xx *= dtmp0;
+ add %l0,TBL,%l0 ! (5_1) pind = (char*)TBL + ind1;
+ stx %o3,[%fp+tmp4] ! (3_0) dsqrt_exp = *(double*)&sqrt_exp;
+ fand %f10,DC0,%f50 ! (4_0) res = vis_fand(res,DC0);
+
+ fmuld %f54,%f14,%f34 ! (1_0) res *= xx;
+ add %l2,stridex,%l2 ! px += stridex
+ ldd [%l0+16],%f36 ! (5_1) dexp_lo = ((double*)pind)[2];
+ fpadd32 %f44,DC2,%f54 ! (3_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld %f12,%f40,%f12 ! (5_1) res *= xx;
+ add %g1,TBL,%o1 ! (3_0) (char*)div_arr+ind0
+ ldd [%l0+8],%f40 ! (5_1) dexp_hi = ((double*)pind)[1];
+ faddd %f52,A2,%f20 ! (0_0) res += A2;
+
+ cmp %o5,_0x7ff00000 ! (4_0) hx ? 0x7ff00000
+ bge,pn %icc,.update8 ! (4_0) if ( hx >= 0x7ff00000 )
+ for %f50,A1,%f24 ! (4_0) res = vis_for(res,A1);
+.cont8:
+ sra %o5,21,%l1 ! (4_0) sqrt_exp = hx >> 21;
+ sra %o5,15,%l0 ! (4_0) bit = hx >> 15;
+ ldd [%o1],%f22 ! (3_0) dtmp0 = ((double*)pind)[0];
+
+ fmuld A5,%f18,%f52 ! (2_0) res = A5 * xx;
+ sra %o5,7,%o1 ! (4_0) ind0 = hx >> 7;
+ sub %l1,512,%o3 ! (4_0) sqrt_exp -= 512;
+ fand %f54,DC3,%f54 ! (3_0) res_c = vis_fand(res_c,DC3);
+
+ fmuld %f20,%f42,%f20 ! (0_0) res *= xx;
+ and %o1,_0x00001ff8,%o1 ! (4_0) ind0 &= 0x1ff8;
+ lda [%l2]%asi,%f10 ! (5_0) ((float*)&res)[0] = ((float*)px)[0];
+ faddd %f34,A3,%f62 ! (1_0) res += A3;
+
+ fmuld %f40,%f12,%f34 ! (5_1) res = dexp_hi * res;
+ add %o1,32,%o1 ! (4_0) ind0 += 32;
+ lda [%l2+4]%asi,%f11 ! (5_0) ((float*)&res)[1] = ((float*)px)[1];
+
+ and %l0,32,%i4 ! (4_0) bit &= 32;
+ cmp %o5,_0x00100000 ! (4_0) hx ? 0x00100000
+ bl,pn %icc,.update9 ! (4_0) if ( hx < 0x00100000 )
+ fsubd %f44,%f54,%f44 ! (3_0) xx = (res - res_c);
+.cont9:
+ and %o1,-64,%o1 ! (4_0) ind0 &= -8;
+ faddd %f52,A4,%f54 ! (2_0) res += A4;
+
+ cmp counter,6
+ bl,pn %icc,.tail
+ or %g0,%o4,%l0
+
+ ba .main_loop
+ nop
+
+ .align 16
+.main_loop:
+ fmuld %f62,%f14,%f52 ! (1_1) res *= xx;
+ sll %o1,0,%i3 ! (4_1) ind1 = ind0;
+ add %i1,TBL,%i1 ! (0_1) pind = (char*)TBL + ind1;
+ faddd %f20,A1,%f12 ! (0_1) res += A1;
+
+ lda [%l2]%asi,%o5 ! (5_1) hx = *(int*)px;
+ sllx %o3,52,%o3 ! (4_1) sqrt_exp <<= 52;
+ add %i3,%i4,%i3 ! (4_1) ind1 += bit;
+ faddd %f34,%f36,%f60 ! (5_2) res += dexp_lo;
+
+ fmuld %f44,%f22,%f44 ! (3_1) xx *= dtmp0;
+ add %l2,stridex,%l2 ! px += stridex
+ stx %o3,[%fp+tmp5] ! (4_1) dsqrt_exp = *(double*)&sqrt_exp;
+ fand %f10,DC0,%f50 ! (5_1) res = vis_fand(res,DC0);
+
+ fmuld %f54,%f18,%f34 ! (2_1) res *= xx;
+ nop
+ ldd [%i1+16],%f36 ! (0_1) dexp_lo = ((double*)pind)[2];
+ fpadd32 %f24,DC2,%f54 ! (4_1) res_c = vis_fpadd32(res,DC2);
+
+ fmuld %f12,%f42,%f16 ! (0_1) res *= xx;
+ sra %o5,21,%l1 ! (5_1) sqrt_exp = hx >> 21;
+ ldd [%i1+8],%f42 ! (0_1) dexp_hi = ((double*)pind)[1];
+ faddd %f52,A2,%f20 ! (1_1) res += A2;
+
+ ldd [%fp+tmp0],%f48 ! (5_2) dsqrt_exp = *(double*)&sqrt_exp;
+ cmp %o5,_0x7ff00000 ! (5_1) hx ? 0x7ff00000
+ bge,pn %icc,.update10 ! (5_1) if ( hx >= 0x7ff00000 )
+ faddd %f60,%f40,%f60 ! (5_2) res += dexp_hi;
+.cont10:
+ lda [%l2]%asi,%f10 ! (0_0) ((float*)&res)[0] = ((float*)px)[0];
+ sra %o5,15,%i1 ! (5_1) bit = hx >> 15;
+ add %i3,TBL,%o7 ! (4_1) pind = (char*)TBL + ind1
+ for %f50,A1,%f40 ! (5_1) res = vis_for(res,A1);
+
+ fmuld A5,%f44,%f52 ! (3_1) res = A5 * xx;
+ sra %o5,7,%o1 ! (5_1) ind0 = hx >> 7;
+ ldd [%o7],%f22 ! (4_1) dtmp0 = ((double*)pind)[0];
+ fand %f54,DC3,%f54 ! (4_1) res_c = vis_fand(res_c,DC3);
+
+ fmuld %f20,%f14,%f20 ! (1_1) res *= xx;
+ and %o1,_0x00001ff8,%o1 ! (5_1) ind0 &= 0x1ff8;
+ sub %l1,512,%o3 ! (5_1) sqrt_exp -= 512;
+ faddd %f34,A3,%f62 ! (2_1) res += A3;
+
+ fpadd32 %f48,%f60,%f12 ! (5_2) dtmp0 = vis_fpadd32(dsqrt_exp,res);
+ add %o1,32,%o1 ! (5_1) ind0 += 32;
+ st %f12,[%l0] ! (5_2) ((float*)py)[0] = ((float*)&dtmp0)[0];
+ fmuld %f42,%f16,%f34 ! (0_1) res = dexp_hi * res;
+
+ lda [%l2+4]%asi,%f11 ! (0_0) ((float*)&res)[1] = ((float*)px)[1];
+ and %i1,32,%i4 ! (5_1) bit &= 32;
+ and %o1,-64,%o1 ! (5_1) ind0 &= -8;
+ fsubd %f24,%f54,%f24 ! (4_1) xx = (res - res_c);
+
+ sll %o1,0,%o7 ! (5_1) ind1 = ind0;
+ add %l0,stridey,%i1 ! py += stridey
+ st %f13,[%l0+4] ! (5_2) ((float*)py)[1] = ((float*)&dtmp0)[1];
+ faddd %f52,A4,%f54 ! (3_1) res += A4;
+
+ fmuld %f62,%f18,%f52 ! (2_1) res *= xx;
+ cmp %o5,_0x00100000 ! (5_1) hx ? 0x00100000
+ bl,pn %icc,.update11 ! (5_1) if ( hx < 0x00100000 )
+ faddd %f20,A1,%f12 ! (1_1) res += A1;
+.cont11:
+ sllx %o3,52,%o3 ! (5_1) sqrt_exp <<= 52;
+ add %o7,%i4,%l0 ! (5_1) ind1 += bit;
+ lda [%l2]%asi,%o5 ! (0_0) hx = *(int*)px;
+ faddd %f34,%f36,%f60 ! (0_1) res += dexp_lo;
+
+ fmuld %f24,%f22,%f24 ! (4_1) xx *= dtmp0;
+ add %i2,TBL,%i2 ! (1_1) pind = (char*)TBL + ind1;
+ stx %o3,[%fp+tmp0] ! (5_1) dsqrt_exp = *(double*)&sqrt_exp;
+ fand %f10,DC0,%f50 ! (0_0) res = vis_fand(res,DC0);
+
+ fmuld %f54,%f44,%f34 ! (3_1) res *= xx;
+ add %l2,stridex,%l2 ! px += stridex
+ ldd [%i2+16],%f36 ! (1_1) dexp_lo = ((double*)pind)[2];
+ fpadd32 %f40,DC2,%f54 ! (5_1) res_c = vis_fpadd32(res,DC2);
+
+ fmuld %f12,%f14,%f16 ! (1_1) res *= xx;
+ sra %o5,21,%l1 ! (0_0) sqrt_exp = hx >> 21;
+ ldd [%i2+8],%f14 ! (1_1) dexp_hi = ((double*)pind)[1];
+ faddd %f52,A2,%f20 ! (2_1) res += A2;
+
+ ldd [%fp+tmp1],%f48 ! (0_1) dsqrt_exp = *(double*)&sqrt_exp;
+ cmp %o5,_0x7ff00000 ! (0_0) hx ? 0x7ff00000
+ bge,pn %icc,.update12 ! (0_0) if ( hx >= 0x7ff00000 )
+ faddd %f60,%f42,%f60 ! (0_1) res += dexp_hi;
+.cont12:
+ lda [%l2]%asi,%f10 ! (1_0) ((float*)&res)[0] = ((float*)px)[0];
+ sra %o5,15,%i2 ! (0_0) bit = hx >> 15;
+ add %l0,TBL,%o7 ! (5_1) pind = (char*)TBL + ind1
+ for %f50,A1,%f42 ! (0_0) res = vis_for(res,A1);
+
+ fmuld A5,%f24,%f52 ! (4_1) res = A5 * xx;
+ sra %o5,7,%o1 ! (0_0) ind0 = hx >> 7;
+ ldd [%o7],%f22 ! (5_1) dtmp0 = ((double*)pind)[0];
+ fand %f54,DC3,%f54 ! (5_1) res_c = vis_fand(res_c,DC3);
+
+ fmuld %f20,%f18,%f20 ! (2_1) res *= xx;
+ and %o1,_0x00001ff8,%o1 ! (0_0) ind0 &= 0x1ff8;
+ sub %l1,512,%o3 ! (0_0) sqrt_exp -= 512;
+ faddd %f34,A3,%f62 ! (3_1) res += A3;
+
+ fpadd32 %f48,%f60,%f12 ! (0_1) dtmp0 = vis_fpadd32(dsqrt_exp,res);
+ add %o1,32,%o1 ! (0_0) ind0 += 32;
+ st %f12,[%i1] ! (0_1) ((float*)py)[0] = ((float*)&dtmp0)[0];
+ fmuld %f14,%f16,%f34 ! (1_1) res = dexp_hi * res;
+
+ lda [%l2+4]%asi,%f11 ! (1_0) ((float*)&res)[1] = ((float*)px)[1];
+ and %i2,32,%i4 ! (0_0) bit &= 32;
+ and %o1,-64,%o1 ! (0_0) ind0 &= -8;
+ fsubd %f40,%f54,%f40 ! (5_1) xx = (res - res_c);
+
+ sll %o1,0,%o7 ! (0_0) ind1 = ind0;
+ add %i1,stridey,%i2 ! py += stridey
+ st %f13,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)&dtmp0)[1];
+ faddd %f52,A4,%f54 ! (4_1) res += A4;
+
+ fmuld %f62,%f44,%f52 ! (3_1) res *= xx;
+ cmp %o5,_0x00100000 ! (0_0) hx ? 0x00100000
+ bl,pn %icc,.update13 ! (0_0) if ( hx < 0x00100000 )
+ faddd %f20,A1,%f12 ! (2_1) res += A1;
+.cont13:
+ lda [%l2]%asi,%o5 ! (1_0) hx = *(int*)px;
+ sllx %o3,52,%o3 ! (0_0) sqrt_exp <<= 52;
+ add %o7,%i4,%i1 ! (0_0) ind1 += bit;
+ faddd %f34,%f36,%f60 ! (1_1) res += dexp_lo;
+
+ fmuld %f40,%f22,%f40 ! (5_1) xx *= dtmp0;
+ add %g5,TBL,%g5 ! (2_1) pind = (char*)TBL + ind1;
+ stx %o3,[%fp+tmp1] ! (0_0) dsqrt_exp = *(double*)&sqrt_exp;
+ fand %f10,DC0,%f50 ! (1_0) res = vis_fand(res,DC0);
+
+ fmuld %f54,%f24,%f34 ! (4_1) res *= xx;
+ add %l2,stridex,%l2 ! px += stridex
+ ldd [%g5+16],%f36 ! (2_1) dexp_lo = ((double*)pind)[2];
+ fpadd32 %f42,DC2,%f54 ! (0_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld %f12,%f18,%f16 ! (2_1) res *= xx;
+ sra %o5,21,%l1 ! (1_0) sqrt_exp = hx >> 21;
+ ldd [%g5+8],%f18 ! (2_1) dexp_hi = ((double*)pind)[1];
+ faddd %f52,A2,%f20 ! (3_1) res += A2;
+
+ ldd [%fp+tmp2],%f48 ! (1_1) dsqrt_exp = *(double*)&sqrt_exp;
+ cmp %o5,_0x7ff00000 ! (1_0) hx ? 0x7ff00000
+ bge,pn %icc,.update14 ! (1_0) if ( hx >= 0x7ff00000 )
+ faddd %f60,%f14,%f60 ! (1_1) res += dexp_hi;
+.cont14:
+ lda [%l2]%asi,%f10 ! (2_0) ((float*)&res)[0] = ((float*)px)[0];
+ sra %o5,15,%g5 ! (1_0) bit = hx >> 15;
+ add %i1,TBL,%o7 ! (0_0) pind = (char*)TBL + ind1
+ for %f50,A1,%f14 ! (1_0) res = vis_for(res,A1);
+
+ fmuld A5,%f40,%f52 ! (5_1) res = A5 * xx;
+ sra %o5,7,%o1 ! (1_0) ind0 = hx >> 7;
+ ldd [%o7],%f22 ! (0_0) dtmp0 = ((double*)pind)[0];
+ fand %f54,DC3,%f54 ! (0_0) res_c = vis_fand(res_c,DC3);
+
+ fmuld %f20,%f44,%f20 ! (3_1) res *= xx;
+ and %o1,_0x00001ff8,%o1 ! (1_0) ind0 &= 0x1ff8;
+ sub %l1,512,%o3 ! (1_0) sqrt_exp -= 512;
+ faddd %f34,A3,%f62 ! (4_1) res += A3;
+
+ fpadd32 %f48,%f60,%f12 ! (1_1) dtmp0 = vis_fpadd32(dsqrt_exp,res);
+ add %o1,32,%o1 ! (1_0) ind0 += 32;
+ st %f12,[%i2] ! (1_1) ((float*)py)[0] = ((float*)&dtmp0)[0];
+ fmuld %f18,%f16,%f34 ! (2_1) res = dexp_hi * res;
+
+ lda [%l2+4]%asi,%f11 ! (2_0) ((float*)&res)[1] = ((float*)px)[1];
+ and %g5,32,%i4 ! (1_0) bit &= 32;
+ and %o1,-64,%o1 ! (1_0) ind0 &= -8;
+ fsubd %f42,%f54,%f42 ! (0_0) xx = (res - res_c);
+
+ sll %o1,0,%o7 ! (1_0) ind1 = ind0;
+ add %i2,stridey,%g5 ! py += stridey
+ st %f13,[%i2+4] ! (1_1) ((float*)py)[1] = ((float*)&dtmp0)[1];
+ faddd %f52,A4,%f54 ! (5_1) res += A4;
+
+ fmuld %f62,%f24,%f52 ! (4_1) res *= xx;
+ cmp %o5,_0x00100000 ! (1_0) hx ? 0x00100000
+ bl,pn %icc,.update15 ! (1_0) if ( hx < 0x00100000 )
+ faddd %f20,A1,%f12 ! (3_1) res += A1;
+.cont15:
+ lda [%l2]%asi,%o5 ! (2_0) hx = *(int*)px;
+ sllx %o3,52,%o3 ! (1_0) sqrt_exp <<= 52;
+ add %o7,%i4,%i2 ! (1_0) ind1 += bit;
+ faddd %f34,%f36,%f60 ! (2_1) res += dexp_lo;
+
+ fmuld %f42,%f22,%f42 ! (0_0) xx *= dtmp0;
+ add %g1,TBL,%g1 ! (3_1) pind = (char*)TBL + ind1;
+ stx %o3,[%fp+tmp2] ! (1_0) dsqrt_exp = *(double*)&sqrt_exp;
+ fand %f10,DC0,%f50 ! (2_0) res = vis_fand(res,DC0);
+
+ fmuld %f54,%f40,%f34 ! (5_1) res *= xx;
+ fpadd32 %f14,DC2,%f54 ! (1_0) res_c = vis_fpadd32(res,DC2);
+ add %l2,stridex,%l2 ! px += stridex
+ ldd [%g1+16],%f36 ! (3_1) dexp_lo = ((double*)pind)[2];
+
+ fmuld %f12,%f44,%f16 ! (3_1) res *= xx;
+ sra %o5,21,%l1 ! (2_0) sqrt_exp = hx >> 21;
+ ldd [%g1+8],%f44 ! (3_1) dexp_hi = ((double*)pind)[1];
+ faddd %f52,A2,%f20 ! (4_1) res += A2;
+
+ ldd [%fp+tmp3],%f48 ! (2_1) dsqrt_exp = *(double*)&sqrt_exp;
+ cmp %o5,_0x7ff00000 ! (2_0) hx ? 0x7ff00000
+ bge,pn %icc,.update16 ! (2_0) if ( hx >= 0x7ff00000 )
+ faddd %f60,%f18,%f60 ! (2_1) res += dexp_hi;
+.cont16:
+ lda [%l2]%asi,%f10 ! (3_0) ((float*)&res)[0] = ((float*)px)[0];
+ sra %o5,15,%g1 ! (2_0) bit = hx >> 15;
+ add %i2,TBL,%o7 ! (1_0) pind = (char*)TBL + ind1
+ for %f50,A1,%f18 ! (2_0) res = vis_for(res,A1);
+
+ fmuld A5,%f42,%f52 ! (0_0) res = A5 * xx;
+ sra %o5,7,%o1 ! (2_0) ind0 = hx >> 7;
+ ldd [%o7],%f22 ! (1_0) dtmp0 = ((double*)pind)[0];
+ fand %f54,DC3,%f54 ! (1_0) res_c = vis_fand(res_c,DC3);
+
+ fmuld %f20,%f24,%f20 ! (4_1) res *= xx;
+ and %o1,_0x00001ff8,%o1 ! (2_0) ind0 &= 0x1ff8;
+ sub %l1,512,%o3 ! (2_0) sqrt_exp -= 512;
+ faddd %f34,A3,%f62 ! (5_1) res += A3;
+
+ fpadd32 %f48,%f60,%f12 ! (2_1) dtmp0 = vis_fpadd32(dsqrt_exp,res);
+ add %o1,32,%o1 ! (2_0) ind0 += 32;
+ st %f12,[%g5] ! (2_1) ((float*)py)[0] = ((float*)&dtmp0)[0];
+ fmuld %f44,%f16,%f34 ! (3_1) res = dexp_hi * res;
+
+ lda [%l2+4]%asi,%f11 ! (3_0) ((float*)&res)[1] = ((float*)px)[1];
+ and %g1,32,%i4 ! (2_0) bit &= 32;
+ and %o1,-64,%o1 ! (2_0) ind0 &= -8;
+ fsubd %f14,%f54,%f14 ! (1_0) xx = (res - res_c);
+
+ sll %o1,0,%o7 ! (2_0) ind1 = ind0;
+ add %g5,stridey,%g1 ! py += stridey
+ st %f13,[%g5+4] ! (2_1) ((float*)py)[1] = ((float*)&dtmp0)[1];
+ faddd %f52,A4,%f54 ! (0_0) res += A4;
+
+ fmuld %f62,%f40,%f52 ! (5_1) res *= xx;
+ cmp %o5,_0x00100000 ! (2_0) hx ? 0x00100000
+ bl,pn %icc,.update17 ! (2_0) if ( hx < 0x00100000 )
+ faddd %f20,A1,%f12 ! (4_1) res += A1;
+.cont17:
+ lda [%l2]%asi,%o5 ! (3_0) hx = *(int*)px;
+ sllx %o3,52,%o3 ! (2_0) sqrt_exp <<= 52;
+ add %o7,%i4,%g5 ! (2_0) ind1 += bit;
+ faddd %f34,%f36,%f60 ! (3_1) res += dexp_lo;
+
+ fmuld %f14,%f22,%f14 ! (1_0) xx *= dtmp0;
+ add %i3,TBL,%i3 ! (4_1) pind = (char*)TBL + ind1;
+ stx %o3,[%fp+tmp3] ! (2_0) dsqrt_exp = *(double*)&sqrt_exp;
+ fand %f10,DC0,%f50 ! (3_0) res = vis_fand(res,DC0);
+
+ fmuld %f54,%f42,%f34 ! (0_0) res *= xx;
+ fpadd32 %f18,DC2,%f54 ! (2_0) res_c = vis_fpadd32(res,DC2);
+ add %l2,stridex,%l2 ! px += stridex
+ ldd [%i3+16],%f36 ! (4_1) dexp_lo = ((double*)pind)[2];
+
+ fmuld %f12,%f24,%f16 ! (4_1) res *= xx;
+ sra %o5,21,%l1 ! (3_0) sqrt_exp = hx >> 21;
+ ldd [%i3+8],%f24 ! (4_1) dexp_hi = ((double*)pind)[1];
+ faddd %f52,A2,%f20 ! (5_1) res += A2;
+
+ ldd [%fp+tmp4],%f48 ! (3_1) dsqrt_exp = *(double*)&sqrt_exp;
+ cmp %o5,_0x7ff00000 ! (3_0) hx ? 0x7ff00000
+ bge,pn %icc,.update18 ! (3_0) if ( hx >= 0x7ff00000 )
+ faddd %f60,%f44,%f60 ! (3_1) res += dexp_hi;
+.cont18:
+ lda [%l2]%asi,%f10 ! (4_0) ((float*)&res)[0] = ((float*)px)[0];
+ sra %o5,15,%i3 ! (3_0) bit = hx >> 15;
+ add %g5,TBL,%o7 ! (2_0) pind = (char*)TBL + ind1
+ for %f50,A1,%f44 ! (3_0) res = vis_for(res,A1);
+
+ fmuld A5,%f14,%f52 ! (1_0) res = A5 * xx;
+ sra %o5,7,%o1 ! (3_0) ind0 = hx >> 7;
+ ldd [%o7],%f22 ! (2_0) dtmp0 = ((double*)pind)[0];
+ fand %f54,DC3,%f54 ! (2_0) res_c = vis_fand(res_c,DC3);
+
+ fmuld %f20,%f40,%f20 ! (5_1) res *= xx;
+ and %o1,_0x00001ff8,%o1 ! (3_0) ind0 &= 0x1ff8;
+ sub %l1,512,%o3 ! (3_0) sqrt_exp -= 512;
+ faddd %f34,A3,%f62 ! (0_0) res += A3;
+
+ fpadd32 %f48,%f60,%f12 ! (3_1) dtmp0 = vis_fpadd32(dsqrt_exp,res);
+ add %o1,32,%o1 ! (3_0) ind0 += 32;
+ st %f12,[%g1] ! (3_1) ((float*)py)[0] = ((float*)&dtmp0)[0];
+ fmuld %f24,%f16,%f34 ! (4_1) res = dexp_hi * res;
+
+ lda [%l2+4]%asi,%f11 ! (4_0) ((float*)&res)[1] = ((float*)px)[1];
+ and %i3,32,%i4 ! (3_0) bit &= 32;
+ and %o1,-64,%o1 ! (3_0) ind0 &= -8;
+ fsubd %f18,%f54,%f18 ! (2_0) xx = (res - res_c);
+
+ or %g0,%o1,%o7 ! (3_0) ind1 = ind0;
+ add %g1,stridey,%i3 ! py += stridey
+ st %f13,[%g1+4] ! (3_1) ((float*)py)[1] = ((float*)&dtmp0)[1];
+ faddd %f52,A4,%f54 ! (1_0) res += A4;
+
+ fmuld %f62,%f42,%f52 ! (0_0) res *= xx;
+ cmp %o5,_0x00100000 ! (3_0) hx ? 0x00100000
+ bl,pn %icc,.update19 ! (3_0) if ( hx < 0x00100000 )
+ faddd %f20,A1,%f12 ! (5_1) res += A1;
+.cont19:
+ lda [%l2]%asi,%o5 ! (4_0) hx = *(int*)px;
+ sllx %o3,52,%o3 ! (3_0) sqrt_exp <<= 52;
+ add %o7,%i4,%g1 ! (3_0) ind1 += bit;
+ faddd %f34,%f36,%f60 ! (4_1) res += dexp_lo;
+
+ fmuld %f18,%f22,%f18 ! (2_0) xx *= dtmp0;
+ add %l0,TBL,%l0 ! (5_1) pind = (char*)TBL + ind1;
+ stx %o3,[%fp+tmp4] ! (3_0) dsqrt_exp = *(double*)&sqrt_exp;
+ fand %f10,DC0,%f50 ! (4_0) res = vis_fand(res,DC0);
+
+ fmuld %f54,%f14,%f34 ! (1_0) res *= xx;
+ add %l2,stridex,%l2 ! px += stridex
+ ldd [%l0+16],%f36 ! (5_1) dexp_lo = ((double*)pind)[2];
+ fpadd32 %f44,DC2,%f54 ! (3_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld %f12,%f40,%f16 ! (5_1) res *= xx;
+ sra %o5,21,%l1 ! (4_0) sqrt_exp = hx >> 21;
+ ldd [%l0+8],%f40 ! (5_1) dexp_hi = ((double*)pind)[1];
+ faddd %f52,A2,%f20 ! (0_0) res += A2;
+
+ ldd [%fp+tmp5],%f48 ! (4_1) dsqrt_exp = *(double*)&sqrt_exp;
+ cmp %o5,_0x7ff00000 ! (4_0) hx ? 0x7ff00000
+ bge,pn %icc,.update20 ! (4_0) if ( hx >= 0x7ff00000 )
+ faddd %f60,%f24,%f60 ! (4_1) res += dexp_hi;
+.cont20:
+ lda [%l2]%asi,%f10 ! (5_0) ((float*)&res)[0] = ((float*)px)[0];
+ sra %o5,15,%l0 ! (4_0) bit = hx >> 15;
+ add %g1,TBL,%o7 ! (3_0) (char*)div_arr+ind0
+ for %f50,A1,%f24 ! (4_0) res = vis_for(res,A1);
+
+ fmuld A5,%f18,%f52 ! (2_0) res = A5 * xx;
+ sra %o5,7,%o1 ! (4_0) ind0 = hx >> 7;
+ ldd [%o7],%f22 ! (3_0) dtmp0 = ((double*)pind)[0];
+ fand %f54,DC3,%f54 ! (3_0) res_c = vis_fand(res_c,DC3);
+
+ fmuld %f20,%f42,%f20 ! (0_0) res *= xx;
+ and %o1,_0x00001ff8,%o1 ! (4_0) ind0 &= 0x1ff8;
+ sub %l1,512,%o3 ! (4_0) sqrt_exp -= 512;
+ faddd %f34,A3,%f62 ! (1_0) res += A3;
+
+ lda [%l2+4]%asi,%f11 ! (5_0) ((float*)&res)[1] = ((float*)px)[1];
+ add %o1,32,%o1 ! (4_0) ind0 += 32;
+ fpadd32 %f48,%f60,%f12 ! (4_1) dtmp0 = vis_fpadd32(dsqrt_exp,res);
+ fmuld %f40,%f16,%f34 ! (5_1) res = dexp_hi * res;
+
+ and %l0,32,%i4 ! (4_0) bit &= 32;
+ cmp %o5,_0x00100000 ! (4_0) hx ? 0x00100000
+ bl,pn %icc,.update21 ! (4_0) if ( hx < 0x00100000 )
+ fsubd %f44,%f54,%f44 ! (3_0) xx = (res - res_c);
+.cont21:
+ and %o1,-64,%o1 ! (4_0) ind0 &= -8;
+ sub counter,6,counter ! counter
+ st %f12,[%i3] ! (4_1) ((float*)py)[0] = ((float*)&dtmp0)[0];
+ faddd %f52,A4,%f54 ! (2_0) res += A4;
+
+ st %f13,[%i3+4] ! (4_1) ((float*)py)[1] = ((float*)&dtmp0)[1];
+ cmp counter,6
+ bge,pt %icc,.main_loop
+ add %i3,stridey,%l0 ! py += stridey
+
+.tail:
+ subcc counter,1,counter
+ bneg .begin
+ or %g0,%l0,%o4
+
+ fmuld %f62,%f14,%f52 ! (1_1) res *= xx;
+ add %i1,TBL,%i1 ! (0_1) pind = (char*)TBL + ind1;
+ faddd %f20,A1,%f12 ! (0_1) res += A1;
+
+ faddd %f34,%f36,%f60 ! (5_2) res += dexp_lo;
+
+ fmuld %f44,%f22,%f44 ! (3_1) xx *= dtmp0;
+ add %l2,stridex,%l2 ! px += stridex
+
+ fmuld %f54,%f18,%f34 ! (2_1) res *= xx;
+ ldd [%i1+16],%f36 ! (0_1) dexp_lo = ((double*)pind)[2];
+
+ fmuld %f12,%f42,%f12 ! (0_1) res *= xx;
+ ldd [%i1+8],%f42 ! (0_1) dexp_hi = ((double*)pind)[1];
+ faddd %f52,A2,%f20 ! (1_1) res += A2;
+
+ ldd [%fp+tmp0],%f48 ! (5_2) dsqrt_exp = *(double*)&sqrt_exp;
+ faddd %f60,%f40,%f60 ! (5_2) res += dexp_hi;
+
+ fmuld A5,%f44,%f52 ! (3_1) res = A5 * xx;
+
+ fmuld %f20,%f14,%f20 ! (1_1) res *= xx;
+ faddd %f34,A3,%f62 ! (2_1) res += A3;
+
+ fmuld %f42,%f12,%f34 ! (0_1) res = dexp_hi * res;
+ fpadd32 %f48,%f60,%f12 ! (5_2) dtmp0 = vis_fpadd32(dsqrt_exp,res);
+
+ st %f12,[%l0] ! (5_2) ((float*)py)[0] = ((float*)&dtmp0)[0];
+
+ add %l0,stridey,%i1 ! py += stridey
+ st %f13,[%l0+4] ! (5_2) ((float*)py)[1] = ((float*)&dtmp0)[1];
+ faddd %f52,A4,%f54 ! (3_1) res += A4;
+
+ subcc counter,1,counter
+ bneg .begin
+ or %g0,%i1,%o4
+
+ fmuld %f62,%f18,%f52 ! (2_1) res *= xx;
+ faddd %f20,A1,%f12 ! (1_1) res += A1;
+
+ faddd %f34,%f36,%f60 ! (0_1) res += dexp_lo;
+
+ add %i2,TBL,%i2 ! (1_1) pind = (char*)TBL + ind1;
+
+ fmuld %f54,%f44,%f34 ! (3_1) res *= xx;
+ add %l2,stridex,%l2 ! px += stridex
+ ldd [%i2+16],%f36 ! (1_1) dexp_lo = ((double*)pind)[2];
+
+ fmuld %f12,%f14,%f12 ! (1_1) res *= xx;
+ ldd [%i2+8],%f14 ! (1_1) dexp_hi = ((double*)pind)[1];
+ faddd %f52,A2,%f20 ! (2_1) res += A2;
+
+ ldd [%fp+tmp1],%f48 ! (0_1) dsqrt_exp = *(double*)&sqrt_exp;
+ faddd %f60,%f42,%f60 ! (0_1) res += dexp_hi;
+
+ fmuld %f20,%f18,%f20 ! (2_1) res *= xx;
+ faddd %f34,A3,%f62 ! (3_1) res += A3;
+
+ fmuld %f14,%f12,%f34 ! (1_1) res = dexp_hi * res;
+ fpadd32 %f48,%f60,%f12 ! (0_1) dtmp0 = vis_fpadd32(dsqrt_exp,res);
+
+ st %f12,[%i1] ! (0_1) ((float*)py)[0] = ((float*)&dtmp0)[0];
+
+ add %i1,stridey,%i2 ! py += stridey
+ st %f13,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)&dtmp0)[1];
+
+ subcc counter,1,counter
+ bneg .begin
+ or %g0,%i2,%o4
+
+ fmuld %f62,%f44,%f52 ! (3_1) res *= xx;
+ faddd %f20,A1,%f12 ! (2_1) res += A1;
+
+ faddd %f34,%f36,%f60 ! (1_1) res += dexp_lo;
+
+ add %g5,TBL,%g5 ! (2_1) pind = (char*)TBL + ind1;
+
+ add %l2,stridex,%l2 ! px += stridex
+ ldd [%g5+16],%f36 ! (2_1) dexp_lo = ((double*)pind)[2];
+
+ fmuld %f12,%f18,%f12 ! (2_1) res *= xx;
+ ldd [%g5+8],%f18 ! (2_1) dexp_hi = ((double*)pind)[1];
+ faddd %f52,A2,%f20 ! (3_1) res += A2;
+
+ ldd [%fp+tmp2],%f48 ! (1_1) dsqrt_exp = *(double*)&sqrt_exp;
+ faddd %f60,%f14,%f60 ! (1_1) res += dexp_hi;
+
+ fmuld %f20,%f44,%f20 ! (3_1) res *= xx;
+
+ fmuld %f18,%f12,%f34 ! (2_1) res = dexp_hi * res;
+ fpadd32 %f48,%f60,%f12 ! (1_1) dtmp0 = vis_fpadd32(dsqrt_exp,res);
+
+ st %f12,[%i2] ! (1_1) ((float*)py)[0] = ((float*)&dtmp0)[0];
+
+ add %i2,stridey,%g5 ! py += stridey
+ st %f13,[%i2+4] ! (1_1) ((float*)py)[1] = ((float*)&dtmp0)[1];
+
+ subcc counter,1,counter
+ bneg .begin
+ or %g0,%g5,%o4
+
+ faddd %f20,A1,%f12 ! (3_1) res += A1;
+
+ faddd %f34,%f36,%f60 ! (2_1) res += dexp_lo;
+
+ add %g1,TBL,%g1 ! (3_1) pind = (char*)TBL + ind1;
+
+ add %l2,stridex,%l2 ! px += stridex
+ ldd [%g1+16],%f36 ! (3_1) dexp_lo = ((double*)pind)[2];
+
+ fmuld %f12,%f44,%f12 ! (3_1) res *= xx;
+ ldd [%g1+8],%f44 ! (3_1) dexp_hi = ((double*)pind)[1];
+
+ ldd [%fp+tmp3],%f48 ! (2_1) dsqrt_exp = *(double*)&sqrt_exp;
+ faddd %f60,%f18,%f60 ! (2_1) res += dexp_hi;
+
+ fmuld %f44,%f12,%f34 ! (3_1) res = dexp_hi * res;
+ fpadd32 %f48,%f60,%f12 ! (2_1) dtmp0 = vis_fpadd32(dsqrt_exp,res);
+
+ st %f12,[%g5] ! (2_1) ((float*)py)[0] = ((float*)&dtmp0)[0];
+
+ add %g5,stridey,%g1 ! py += stridey
+ st %f13,[%g5+4] ! (2_1) ((float*)py)[1] = ((float*)&dtmp0)[1];
+
+ subcc counter,1,counter
+ bneg .begin
+ or %g0,%g1,%o4
+
+ faddd %f34,%f36,%f60 ! (3_1) res += dexp_lo;
+
+ add %l2,stridex,%l2 ! px += stridex
+
+ ldd [%fp+tmp4],%f48 ! (3_1) dsqrt_exp = *(double*)&sqrt_exp;
+ faddd %f60,%f44,%f60 ! (3_1) res += dexp_hi;
+
+ fpadd32 %f48,%f60,%f12 ! (3_1) dtmp0 = vis_fpadd32(dsqrt_exp,res);
+
+ st %f12,[%g1] ! (3_1) ((float*)py)[0] = ((float*)&dtmp0)[0];
+
+ add %g1,stridey,%i3 ! py += stridey
+ st %f13,[%g1+4] ! (3_1) ((float*)py)[1] = ((float*)&dtmp0)[1];
+
+ ba .begin
+ or %g0,%i3,%o4
+
+ .align 16
+.spec:
+ fsqrtd %f10,%f10
+ add %l2,stridex,%l2
+
+ st %f10,[%o4]
+ st %f11,[%o4+4]
+
+ add %o4,stridey,%o4
+ ba .begin1
+ sub counter,1,counter
+
+ .align 16
+.update0:
+ cmp counter,1
+ ble .cont0
+ nop
+
+ sub %l2,stridex,%i5
+ stx %i5,[%fp+tmp_px]
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont0
+ or %g0,1,counter
+
+ .align 16
+.update1:
+ cmp counter,1
+ ble .cont1
+ nop
+
+ sub %l2,stridex,%i5
+ stx %i5,[%fp+tmp_px]
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont1
+ or %g0,1,counter
+
+ .align 16
+.update2:
+ cmp counter,2
+ ble .cont2
+ nop
+
+ sub %l2,stridex,%i5
+ stx %i5,[%fp+tmp_px]
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont2
+ or %g0,2,counter
+
+ .align 16
+.update3:
+ cmp counter,2
+ ble .cont3
+ nop
+
+ sub %l2,stridex,%i5
+ stx %i5,[%fp+tmp_px]
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont3
+ or %g0,2,counter
+
+ .align 16
+.update4:
+ cmp counter,3
+ ble .cont4
+ nop
+
+ sub %l2,stridex,%i5
+ stx %i5,[%fp+tmp_px]
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont4
+ or %g0,3,counter
+
+ .align 16
+.update5:
+ cmp counter,3
+ ble .cont5
+ nop
+
+ sub %l2,stridex,%i5
+ stx %i5,[%fp+tmp_px]
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont5
+ or %g0,3,counter
+
+ .align 16
+.update6:
+ cmp counter,4
+ ble .cont6
+ nop
+
+ sub %l2,stridex,%i5
+ stx %i5,[%fp+tmp_px]
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont6
+ or %g0,4,counter
+
+ .align 16
+.update7:
+ cmp counter,4
+ ble .cont7
+ nop
+
+ sub %l2,stridex,%i5
+ stx %i5,[%fp+tmp_px]
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont7
+ or %g0,4,counter
+
+ .align 16
+.update8:
+ cmp counter,5
+ ble .cont8
+ nop
+
+ sub %l2,stridex,%i5
+ stx %i5,[%fp+tmp_px]
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont8
+ or %g0,5,counter
+
+ .align 16
+.update9:
+ cmp counter,5
+ ble .cont9
+ nop
+
+ sub %l2,stridex,%i5
+ stx %i5,[%fp+tmp_px]
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont9
+ or %g0,5,counter
+
+ .align 16
+.update10:
+ cmp counter,6
+ ble .cont10
+ nop
+
+ sub %l2,stridex,%i5
+ stx %i5,[%fp+tmp_px]
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont10
+ or %g0,6,counter
+
+ .align 16
+.update11:
+ cmp counter,6
+ ble .cont11
+ nop
+
+ sub %l2,stridex,%i5
+ stx %i5,[%fp+tmp_px]
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont11
+ or %g0,6,counter
+
+ .align 16
+.update12:
+ cmp counter,7
+ ble .cont12
+ nop
+
+ sub %l2,stridex,%i5
+ stx %i5,[%fp+tmp_px]
+
+ sub counter,7,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont12
+ or %g0,7,counter
+
+ .align 16
+.update13:
+ cmp counter,7
+ ble .cont13
+ nop
+
+ sub %l2,stridex,%i5
+ stx %i5,[%fp+tmp_px]
+
+ sub counter,7,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont13
+ or %g0,7,counter
+
+ .align 16
+.update14:
+ cmp counter,8
+ ble .cont14
+ nop
+
+ sub %l2,stridex,%i5
+ stx %i5,[%fp+tmp_px]
+
+ sub counter,8,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont14
+ or %g0,8,counter
+
+ .align 16
+.update15:
+ cmp counter,8
+ ble .cont15
+ nop
+
+ sub %l2,stridex,%i5
+ stx %i5,[%fp+tmp_px]
+
+ sub counter,8,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont15
+ or %g0,8,counter
+
+ .align 16
+.update16:
+ cmp counter,9
+ ble .cont16
+ nop
+
+ sub %l2,stridex,%i5
+ stx %i5,[%fp+tmp_px]
+
+ sub counter,9,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont16
+ or %g0,9,counter
+
+ .align 16
+.update17:
+ cmp counter,9
+ ble .cont17
+ nop
+
+ sub %l2,stridex,%i5
+ stx %i5,[%fp+tmp_px]
+
+ sub counter,9,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont17
+ or %g0,9,counter
+
+ .align 16
+.update18:
+ cmp counter,10
+ ble .cont18
+ nop
+
+ sub %l2,stridex,%i5
+ stx %i5,[%fp+tmp_px]
+
+ sub counter,10,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont18
+ or %g0,10,counter
+
+ .align 16
+.update19:
+ cmp counter,10
+ ble .cont19
+ nop
+
+ sub %l2,stridex,%i5
+ stx %i5,[%fp+tmp_px]
+
+ sub counter,10,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont19
+ or %g0,10,counter
+
+ .align 16
+.update20:
+ cmp counter,11
+ ble .cont20
+ nop
+
+ sub %l2,stridex,%i5
+ stx %i5,[%fp+tmp_px]
+
+ sub counter,11,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont20
+ or %g0,11,counter
+
+ .align 16
+.update21:
+ cmp counter,11
+ ble .cont21
+ nop
+
+ sub %l2,stridex,%i5
+ stx %i5,[%fp+tmp_px]
+
+ sub counter,11,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont21
+ or %g0,11,counter
+
+.exit:
+ ret
+ restore
+
+ SET_SIZE(__vsqrt)
+
diff --git a/usr/src/libm/src/mvec/vis/__vsqrtf.S b/usr/src/libm/src/mvec/vis/__vsqrtf.S
new file mode 100644
index 0000000..0f321f7
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vsqrtf.S
@@ -0,0 +1,58 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vsqrtf.S 1.4 06/01/23 SMI"
+
+ .file "__vsqrtf.S"
+
+#include "libm.h"
+
+ .section ".text"
+ .file "__vsqrtf.S"
+
+ ENTRY(__vsqrtf)
+
+ lda [%o1]0x82,%f0
+ subcc %o0,1,%o0
+ bneg,pn %icc,.exit
+ sll %o2,2,%o2
+ ba .loop
+ sll %o4,2,%o4
+
+ .align 16
+.loop:
+ fsqrts %f0,%f2
+ lda [%o1+%o2]0x82,%f0
+ add %o1,%o2,%o1
+ subcc %o0,1,%o0
+ st %f2,[%o3]
+ bpos,pt %icc,.loop
+ add %o3,%o4,%o3
+.exit:
+ retl
+ nop
+
+ SET_SIZE(__vsqrtf)
+
diff --git a/usr/src/libm/src/mvec/vis/__vsqrtf_ultra3.S b/usr/src/libm/src/mvec/vis/__vsqrtf_ultra3.S
new file mode 100644
index 0000000..ca41db5
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vsqrtf_ultra3.S
@@ -0,0 +1,993 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vsqrtf_ultra3.S 1.6 06/01/23 SMI"
+
+ .file "__vsqrtf_ultra3.S"
+
+#include "libm.h"
+#if defined(LIBMVEC_SO_BUILD)
+ .weak __vsqrtf
+ .type __vsqrtf,#function
+ __vsqrtf = __vsqrtf_ultra3
+#endif
+
+ RO_DATA
+ .align 64
+
+.CONST_TBL:
+ .word 0x3fe00001, 0x80007e00 ! K1 = 5.00000715259318464227e-01
+ .word 0xbfc00003, 0xc0017a01 ! K2 = -1.25000447037521686593e-01
+ .word 0x000fffff, 0xffffffff ! DC0 = 0x000fffffffffffff
+ .word 0x3ff00000, 0x00000000 ! DC1 = 0x3ff0000000000000
+ .word 0x7ffff000, 0x00000000 ! DC2 = 0x7ffff00000000000
+
+#define DC0 %f6
+#define DC1 %f4
+#define DC2 %f2
+#define K2 %f38
+#define K1 %f36
+#define TBL %l2
+#define stridex %l3
+#define stridey %l4
+#define _0x1ff0 %l5
+#define counter %l6
+#define _0x00800000 %l7
+#define _0x7f800000 %o0
+
+#define tmp_px STACK_BIAS-0x40
+#define tmp_counter STACK_BIAS-0x38
+#define tmp0 STACK_BIAS-0x30
+#define tmp1 STACK_BIAS-0x28
+#define tmp2 STACK_BIAS-0x20
+#define tmp3 STACK_BIAS-0x18
+#define tmp4 STACK_BIAS-0x10
+
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x40
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! !!!!! algorithm !!!!!
+!
+! x0 = *px;
+! ax = *(int*)px;
+! px += stridex;
+!
+! if( ax >= 0x7f800000 )
+! {
+! *py = sqrtf(x0);
+! py += stridey;
+! continue;
+! }
+! if( ax < 0x00800000 )
+! {
+! *py = sqrtf(x0);
+! py += stridey;
+! continue;
+! }
+!
+! db0 = (double)x0;
+! iexp0 = ax >> 24;
+! iexp0 += 0x3c0;
+! lexp0 = (long long)iexp0 << 52;
+!
+! db0 = vis_fand(db0,DC0);
+! db0 = vis_for(db0,DC1);
+! hi0 = vis_fand(db0,DC2);
+!
+! ax >>= 11;
+! si0 = ax & 0x1ff0;
+! dtmp0 = ((double*)((char*)TBL + si0))[0];
+! xx0 = (db0 - hi0);
+! xx0 *= dtmp0;
+! dtmp0 = ((double*)((char*)TBL + si0))[1]
+! res0 = K2 * xx0;
+! res0 += K1;
+! res0 *= xx0;
+! res0 += DC1;
+! res0 = dtmp0 * res0;
+! dtmp1 = *((double*)&lexp0);
+! res0 *= dtmp1;
+! fres0 = (float)res0;
+! *py = fres0;
+! py += stridey;
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ ENTRY(__vsqrtf_ultra3)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,.CONST_TBL,o2)
+ PIC_SET(l7,__vlibm_TBL_sqrtf,l2)
+
+ st %i0,[%fp+tmp_counter]
+ sll %i2,2,stridex
+ or %g0,0xff8,%l5
+
+ stx %i1,[%fp+tmp_px]
+ sll %l5,1,_0x1ff0
+
+ ldd [%o2],K1
+ sll %i4,2,stridey
+
+ ldd [%o2+8],K2
+ or %g0,%i3,%g5
+
+ ldd [%o2+16],DC0
+ sethi %hi(0x7f800000),%o0
+
+ ldd [%o2+24],DC1
+ sethi %hi(0x00800000),%l7
+
+ ldd [%o2+32],DC2
+
+.begin:
+ ld [%fp+tmp_counter],counter
+ ldx [%fp+tmp_px],%i1
+ st %g0,[%fp+tmp_counter]
+.begin1:
+ cmp counter,0
+ ble,pn %icc,.exit
+
+ lda [%i1]0x82,%o2 ! (2_0) ax = *(int*)px;
+
+ or %g0,%i1,%o7
+ lda [%i1]0x82,%f25 ! (2_0) x0 = *px;
+
+ cmp %o2,_0x7f800000 ! (2_0) ax ? 0x7f800000
+ bge,pn %icc,.spec ! (2_0) if( ax >= 0x7f800000 )
+ nop
+
+ cmp %o2,_0x00800000 ! (2_0) ax ? 0x00800000
+ bl,pn %icc,.spec ! (2_0) if( ax < 0x00800000 )
+ nop
+
+ fstod %f25,%f56 ! (2_0) db0 = (double)x0;
+
+ lda [stridex+%o7]0x82,%o1 ! (3_0) ax = *(int*)px;
+
+ sra %o2,24,%l1 ! (2_0) iexp0 = ax >> 24;
+
+ add %o7,stridex,%i1 ! px += stridex
+ add %l1,960,%l0 ! (2_0) iexp0 += 0x3c0;
+ lda [stridex+%o7]0x82,%f0 ! (3_0) x0 = *px;
+ fand %f56,DC0,%f60 ! (2_0) db0 = vis_fand(db0,DC0);
+
+ cmp %o1,_0x7f800000 ! (3_0) ax ? 0x7f800000
+ bge,pn %icc,.update0 ! (3_0) if( ax >= 0x7f800000 )
+ nop
+.cont0:
+ sllx %l0,52,%o3 ! (2_0) lexp0 = (long long)iexp0 << 52;
+
+ sra %o2,11,%i2 ! (2_0) ax >>= 11;
+ stx %o3,[%fp+tmp0] ! (2_0) dtmp1 = *((double*)&lexp0);
+ for %f60,DC1,%f40 ! (2_0) db0 = vis_for(db0,DC1);
+
+ cmp %o1,_0x00800000 ! (3_0) ax ? 0x00800000
+ bl,pn %icc,.update1 ! (3_0) if( ax < 0x00800000 )
+ nop
+.cont1:
+ fstod %f0,%f48 ! (3_0) db0 = (double)x0;
+
+ and %i2,_0x1ff0,%o3 ! (2_0) si0 = ax & 0x1ff0;
+ lda [%i1+stridex]0x82,%o2 ! (4_0) ax = *(int*)px;
+
+ add %i1,stridex,%i1 ! px += stridex
+ add %o3,TBL,%i2 ! (2_0) (char*)TBL + si0
+ fand %f40,DC2,%f46 ! (2_0) hi0 = vis_fand(db0,DC2);
+
+ sra %o1,24,%o4 ! (3_0) iexp0 = ax >> 24;
+
+ lda [%i1]0x82,%f13 ! (4_0) x0 = *px;
+ fand %f48,DC0,%f58 ! (3_0) db0 = vis_fand(db0,DC0);
+
+ add %o4,960,%i0 ! (3_0) iexp0 += 0x3c0;
+
+ cmp %o2,_0x7f800000 ! (4_1) ax ? 0x7f800000
+ bge,pn %icc,.update2 ! (4_1) if( ax >= 0x7f800000 )
+ nop
+.cont2:
+ fsubd %f40,%f46,%f44 ! (2_1) xx0 = (db0 - hi0);
+ sllx %i0,52,%g1 ! (3_1) lexp0 = (long long)iexp0 << 52;
+ ldd [%i2],%f40 ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
+
+ sra %o1,11,%l0 ! (3_1) ax >>= 11;
+ stx %g1,[%fp+tmp1] ! (3_1) dtmp1 = *((double*)&lexp0);
+ for %f58,DC1,%f48 ! (3_1) db0 = vis_for(db0,DC1);
+
+ cmp %o2,_0x00800000 ! (4_1) ax ? 0x00800000
+ bl,pn %icc,.update3 ! (4_1) if( ax < 0x00800000 )
+ nop
+.cont3:
+ fstod %f13,%f50 ! (4_1) db0 = (double)x0;
+
+ fmuld %f44,%f40,%f46 ! (2_1) xx0 *= dtmp0;
+ and %l0,_0x1ff0,%i0 ! (3_1) si0 = ax & 0x1ff0;
+ lda [%i1+stridex]0x82,%l1 ! (0_0) ax = *(int*)px;
+
+ add %i0,TBL,%l0 ! (3_1) (char*)TBL + si0
+ fand %f48,DC2,%f62 ! (3_1) hi0 = vis_fand(db0,DC2);
+
+ sra %o2,24,%o7 ! (4_1) iexp0 = ax >> 24;
+
+ add %i1,stridex,%o4 ! px += stridex
+ add %o7,960,%o7 ! (4_1) iexp0 += 0x3c0;
+ lda [%i1+stridex]0x82,%f17 ! (0_0) x0 = *px;
+ fand %f50,DC0,%f54 ! (4_1) db0 = vis_fand(db0,DC0);
+
+ fmuld K2,%f46,%f52 ! (2_1) res0 = K2 * xx0;
+ cmp %l1,_0x7f800000 ! (0_0) ax ? 0x7f800000
+ bge,pn %icc,.update4 ! (0_0) if( ax >= 0x7f800000 )
+ fsubd %f48,%f62,%f42 ! (3_1) xx0 = (db0 - hi0);
+.cont4:
+ sllx %o7,52,%o1 ! (4_1) lexp0 = (long long)iexp0 << 52;
+ ldd [%i0+TBL],%f40 ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
+
+ sra %o2,11,%i5 ! (4_1) ax >>= 11;
+ stx %o1,[%fp+tmp2] ! (4_1) dtmp1 = *((double*)&lexp0);
+ for %f54,DC1,%f34 ! (4_1) db0 = vis_for(db0,DC1);
+
+ cmp %l1,_0x00800000 ! (0_0) ax ? 0x00800000
+ bl,pn %icc,.update5 ! (0_0) if( ax < 0x00800000 )
+ nop
+.cont5:
+ fstod %f17,%f56 ! (0_0) db0 = (double)x0;
+
+ fmuld %f42,%f40,%f42 ! (3_1) xx0 *= dtmp0;
+ lda [stridex+%o4]0x82,%i0 ! (1_0) ax = *(int*)px;
+ faddd %f52,K1,%f52 ! (2_1) res0 += K1;
+
+ sra %l1,24,%g1 ! (0_0) iexp0 = ax >> 24;
+ and %i5,_0x1ff0,%i5 ! (4_1) si0 = ax & 0x1ff0;
+ fand %f34,DC2,%f62 ! (4_1) hi0 = vis_fand(db0,DC2);
+
+ add %o4,stridex,%i1 ! px += stridex
+
+ add %g1,960,%o5 ! (0_0) iexp0 += 0x3c0;
+ add %i5,TBL,%i3 ! (4_1) (char*)TBL + si0
+ lda [stridex+%o4]0x82,%f21 ! (1_0) x0 = *px;
+ fand %f56,DC0,%f32 ! (0_0) db0 = vis_fand(db0,DC0);
+
+ fmuld K2,%f42,%f50 ! (3_1) res0 = K2 * xx0;
+ cmp %i0,_0x7f800000 ! (1_0) ax ? 0x7f800000
+ bge,pn %icc,.update6 ! (1_0) if( ax >= 0x7f800000 )
+ fsubd %f34,%f62,%f54 ! (4_1) xx0 = (db0 - hi0);
+.cont6:
+ fmuld %f52,%f46,%f52 ! (2_1) res0 *= xx0;
+ sllx %o5,52,%o7 ! (0_0) lexp0 = (long long)iexp0 << 52;
+ ldd [TBL+%i5],%f62 ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
+
+ sra %l1,11,%i4 ! (0_0) ax >>= 11;
+ stx %o7,[%fp+tmp3] ! (0_0) dtmp1 = *((double*)&lexp0);
+ for %f32,DC1,%f48 ! (0_0) db0 = vis_for(db0,DC1);
+
+ cmp %i0,_0x00800000 ! (1_0) ax ? 0x00800000
+ bl,pn %icc,.update7 ! (1_0) if( ax < 0x00800000 )
+ nop
+.cont7:
+ fstod %f21,%f56 ! (1_0) db0 = (double)x0;
+
+ fmuld %f54,%f62,%f46 ! (4_1) xx0 *= dtmp0;
+ and %i4,_0x1ff0,%g1 ! (0_0) si0 = ax & 0x1ff0;
+ lda [%i1+stridex]0x82,%o2 ! (2_0) ax = *(int*)px;
+ faddd %f50,K1,%f62 ! (3_1) res0 += K1;
+
+ add %g1,TBL,%i5 ! (0_0) (double*)((char*)TBL + si0
+ fand %f48,DC2,%f32 ! (0_0) hi0 = vis_fand(db0,DC2);
+
+ sra %i0,24,%o4 ! (1_0) iexp0 = ax >> 24;
+ ldd [%i2+8],%f60 ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
+ faddd %f52,DC1,%f58 ! (2_1) res0 += DC1;
+
+ add %i1,stridex,%o7 ! px += stridex
+ add %o4,960,%i2 ! (1_0) iexp0 += 0x3c0;
+ lda [%i1+stridex]0x82,%f25 ! (2_0) x0 = *px;
+ fand %f56,DC0,%f34 ! (1_0) db0 = vis_fand(db0,DC0);
+
+ fmuld K2,%f46,%f50 ! (4_1) res0 = K2 * xx0;
+ cmp %o2,_0x7f800000 ! (2_0) ax ? 0x7f800000
+ bge,pn %icc,.update8 ! (2_0) if( ax >= 0x7f800000 )
+ fsubd %f48,%f32,%f52 ! (0_0) xx0 = (db0 - hi0);
+.cont8:
+ fmuld %f62,%f42,%f54 ! (3_1) res0 *= xx0;
+ sllx %i2,52,%o4 ! (1_0) lexp0 = (long long)iexp0 << 52;
+ ldd [TBL+%g1],%f32 ! (0_0) dtmp0 = ((double*)((char*)TBL + si0))[0];
+
+ fmuld %f60,%f58,%f60 ! (2_1) res0 = dtmp0 * res0;
+ sra %i0,11,%g1 ! (1_0) ax >>= 11;
+ stx %o4,[%fp+tmp4] ! (1_0) dtmp1 = *((double*)&lexp0);
+ for %f34,DC1,%f48 ! (1_0) db0 = vis_for(db0,DC1);
+
+ cmp %o2,_0x00800000 ! (2_0) ax ? 0x00800000
+ bl,pn %icc,.update9 ! (2_0) if( ax < 0x00800000 )
+ ldd [%fp+tmp0],%f40 ! (2_1) dtmp1 = *((double*)&lexp0);
+ fstod %f25,%f56 ! (2_0) db0 = (double)x0;
+.cont9:
+ fmuld %f52,%f32,%f42 ! (0_0) xx0 *= dtmp0;
+ and %g1,_0x1ff0,%o5 ! (1_0) si0 = ax & 0x1ff0;
+ lda [stridex+%o7]0x82,%o1 ! (3_0) ax = *(int*)px;
+ faddd %f50,K1,%f34 ! (4_1) res0 += K1;
+
+ add %o5,TBL,%i4 ! (1_0) (char*)TBL + si0
+ fand %f48,DC2,%f62 ! (1_0) hi0 = vis_fand(db0,DC2);
+
+ fmuld %f60,%f40,%f32 ! (2_1) res0 *= dtmp1;
+ sra %o2,24,%l1 ! (2_0) iexp0 = ax >> 24;
+ ldd [%l0+8],%f40 ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
+ faddd %f54,DC1,%f58 ! (3_1) res0 += DC1;
+
+ add %o7,stridex,%i1 ! px += stridex
+ add %l1,960,%l0 ! (2_0) iexp0 += 0x3c0;
+ lda [stridex+%o7]0x82,%f0 ! (3_0) x0 = *px;
+ fand %f56,DC0,%f60 ! (2_0) db0 = vis_fand(db0,DC0);
+
+ fmuld K2,%f42,%f50 ! (0_0) res0 = K2 * xx0;
+ cmp %o1,_0x7f800000 ! (3_0) ax ? 0x7f800000
+ bge,pn %icc,.update10 ! (3_0) if( ax >= 0x7f800000 )
+ fsubd %f48,%f62,%f54 ! (1_0) xx0 = (db0 - hi0);
+.cont10:
+ fmuld %f34,%f46,%f52 ! (4_1) res0 *= xx0;
+ sllx %l0,52,%o3 ! (2_0) lexp0 = (long long)iexp0 << 52;
+ ldd [TBL+%o5],%f56 ! (1_0) dtmp0 = ((double*)((char*)TBL + si0))[0];
+
+ fmuld %f40,%f58,%f34 ! (3_1) res0 = dtmp0 * res0;
+ sra %o2,11,%i2 ! (2_0) ax >>= 11;
+ stx %o3,[%fp+tmp0] ! (2_0) dtmp1 = *((double*)&lexp0);
+ for %f60,DC1,%f40 ! (2_0) db0 = vis_for(db0,DC1);
+
+ cmp %o1,_0x00800000 ! (3_0) ax ? 0x00800000
+ bl,pn %icc,.update11 ! (3_0) if( ax < 0x00800000 )
+ ldd [%fp+tmp1],%f62 ! (3_1) dtmp1 = *((double*)&lexp0);
+ fstod %f0,%f48 ! (3_0) db0 = (double)x0;
+.cont11:
+ fmuld %f54,%f56,%f30 ! (1_0) xx0 *= dtmp0;
+ and %i2,_0x1ff0,%o3 ! (2_0) si0 = ax & 0x1ff0;
+ lda [%i1+stridex]0x82,%o2 ! (4_0) ax = *(int*)px;
+ faddd %f50,K1,%f56 ! (0_0) res0 += K1;
+
+ add %i1,stridex,%i1 ! px += stridex
+ add %o3,TBL,%i2 ! (2_0) (char*)TBL + si0
+ fand %f40,DC2,%f46 ! (2_0) hi0 = vis_fand(db0,DC2);
+
+ fmuld %f34,%f62,%f28 ! (3_1) res0 *= dtmp1;
+ sra %o1,24,%o4 ! (3_0) iexp0 = ax >> 24;
+ ldd [%i3+8],%f50 ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
+ faddd %f52,DC1,%f54 ! (4_1) res0 += DC1;
+
+ lda [%i1]0x82,%f13 ! (4_0) x0 = *px;
+ fand %f48,DC0,%f58 ! (3_0) db0 = vis_fand(db0,DC0);
+
+ or %g0,%g5,%i3
+ cmp counter,5
+ bl,pn %icc,.tail
+ add %o4,960,%g5 ! (3_0) iexp0 += 0x3c0;
+
+ ba .main_loop
+ sub counter,5,counter ! counter
+
+ .align 16
+.main_loop:
+ fmuld K2,%f30,%f60 ! (1_1) res0 = K2 * xx0;
+ cmp %o2,_0x7f800000 ! (4_1) ax ? 0x7f800000
+ bge,pn %icc,.update12 ! (4_1) if( ax >= 0x7f800000 )
+ fsubd %f40,%f46,%f44 ! (2_1) xx0 = (db0 - hi0);
+.cont12:
+ fmuld %f56,%f42,%f52 ! (0_1) res0 *= xx0;
+ sllx %g5,52,%g5 ! (3_1) lexp0 = (long long)iexp0 << 52;
+ ldd [%i2],%f40 ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
+ fdtos %f32,%f15 ! (2_2) fres0 = (float)res0;
+
+ fmuld %f50,%f54,%f42 ! (4_2) res0 = dtmp0 * res0;
+ sra %o1,11,%l0 ! (3_1) ax >>= 11;
+ stx %g5,[%fp+tmp1] ! (3_1) dtmp1 = *((double*)&lexp0);
+ for %f58,DC1,%f48 ! (3_1) db0 = vis_for(db0,DC1);
+
+ cmp %o2,_0x00800000 ! (4_1) ax ? 0x00800000
+ bl,pn %icc,.update13 ! (4_1) if( ax < 0x00800000 )
+ ldd [%fp+tmp2],%f56 ! (4_2) dtmp1 = *((double*)&lexp0);
+ fstod %f13,%f50 ! (4_1) db0 = (double)x0;
+.cont13:
+ fmuld %f44,%f40,%f46 ! (2_1) xx0 *= dtmp0;
+ and %l0,_0x1ff0,%i0 ! (3_1) si0 = ax & 0x1ff0;
+ lda [%i1+stridex]0x82,%l1 ! (0_0) ax = *(int*)px;
+ faddd %f60,K1,%f32 ! (1_1) res0 += K1;
+
+ add %i0,TBL,%l0 ! (3_1) (char*)TBL + si0
+ add %i3,stridey,%o3 ! py += stridey
+ st %f15,[%i3] ! (2_2) *py = fres0;
+ fand %f48,DC2,%f62 ! (3_1) hi0 = vis_fand(db0,DC2);
+
+ fmuld %f42,%f56,%f44 ! (4_2) res0 *= dtmp1;
+ sra %o2,24,%o7 ! (4_1) iexp0 = ax >> 24;
+ ldd [%i5+8],%f58 ! (0_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
+ faddd %f52,DC1,%f34 ! (0_1) res0 += DC1;
+
+ add %i1,stridex,%o4 ! px += stridex
+ add %o7,960,%o7 ! (4_1) iexp0 += 0x3c0;
+ lda [%i1+stridex]0x82,%f17 ! (0_0) x0 = *px;
+ fand %f50,DC0,%f54 ! (4_1) db0 = vis_fand(db0,DC0);
+
+ fmuld K2,%f46,%f52 ! (2_1) res0 = K2 * xx0;
+ cmp %l1,_0x7f800000 ! (0_0) ax ? 0x7f800000
+ bge,pn %icc,.update14 ! (0_0) if( ax >= 0x7f800000 )
+ fsubd %f48,%f62,%f42 ! (3_1) xx0 = (db0 - hi0);
+.cont14:
+ fmuld %f32,%f30,%f48 ! (1_1) res0 *= xx0;
+ sllx %o7,52,%o1 ! (4_1) lexp0 = (long long)iexp0 << 52;
+ ldd [%i0+TBL],%f40 ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
+ fdtos %f28,%f19 ! (3_2) fres0 = (float)res0;
+
+ fmuld %f58,%f34,%f32 ! (0_1) res0 = dtmp0 * res0;
+ sra %o2,11,%i5 ! (4_1) ax >>= 11;
+ stx %o1,[%fp+tmp2] ! (4_1) dtmp1 = *((double*)&lexp0);
+ for %f54,DC1,%f34 ! (4_1) db0 = vis_for(db0,DC1);
+
+ cmp %l1,_0x00800000 ! (0_0) ax ? 0x00800000
+ bl,pn %icc,.update15 ! (0_0) if( ax < 0x00800000 )
+ ldd [%fp+tmp3],%f60 ! (0_1) dtmp1 = *((double*)&lexp0);
+ fstod %f17,%f56 ! (0_0) db0 = (double)x0;
+.cont15:
+ fmuld %f42,%f40,%f42 ! (3_1) xx0 *= dtmp0;
+ add %o3,stridey,%g5 ! py += stridey
+ lda [stridex+%o4]0x82,%i0 ! (1_0) ax = *(int*)px;
+ faddd %f52,K1,%f52 ! (2_1) res0 += K1;
+
+ sra %l1,24,%g1 ! (0_0) iexp0 = ax >> 24;
+ and %i5,_0x1ff0,%i5 ! (4_1) si0 = ax & 0x1ff0;
+ st %f19,[%o3] ! (3_2) *py = fres0;
+ fand %f34,DC2,%f62 ! (4_1) hi0 = vis_fand(db0,DC2);
+
+ fmuld %f32,%f60,%f40 ! (0_1) res0 *= dtmp1;
+ add %o4,stridex,%i1 ! px += stridex
+ ldd [%i4+8],%f60 ! (1_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
+ faddd %f48,DC1,%f58 ! (1_1) res0 += DC1;
+
+ add %g1,960,%o5 ! (0_0) iexp0 += 0x3c0;
+ add %i5,TBL,%i3 ! (4_1) (char*)TBL + si0
+ lda [stridex+%o4]0x82,%f21 ! (1_0) x0 = *px;
+ fand %f56,DC0,%f32 ! (0_0) db0 = vis_fand(db0,DC0);
+
+ fmuld K2,%f42,%f50 ! (3_1) res0 = K2 * xx0;
+ cmp %i0,_0x7f800000 ! (1_0) ax ? 0x7f800000
+ bge,pn %icc,.update16 ! (1_0) if( ax >= 0x7f800000 )
+ fsubd %f34,%f62,%f54 ! (4_1) xx0 = (db0 - hi0);
+.cont16:
+ fmuld %f52,%f46,%f52 ! (2_1) res0 *= xx0;
+ sllx %o5,52,%o7 ! (0_0) lexp0 = (long long)iexp0 << 52;
+ ldd [TBL+%i5],%f62 ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
+ fdtos %f44,%f23 ! (4_2) fres0 = (float)res0;
+
+ fmuld %f60,%f58,%f44 ! (1_1) res0 = dtmp0 * res0;
+ sra %l1,11,%i4 ! (0_0) ax >>= 11;
+ stx %o7,[%fp+tmp3] ! (0_0) dtmp1 = *((double*)&lexp0);
+ for %f32,DC1,%f48 ! (0_0) db0 = vis_for(db0,DC1);
+
+ cmp %i0,_0x00800000 ! (1_0) ax ? 0x00800000
+ bl,pn %icc,.update17 ! (1_0) if( ax < 0x00800000 )
+ ldd [%fp+tmp4],%f34 ! (1_1) dtmp1 = *((double*)&lexp0);
+ fstod %f21,%f56 ! (1_0) db0 = (double)x0;
+.cont17:
+ fmuld %f54,%f62,%f46 ! (4_1) xx0 *= dtmp0;
+ and %i4,_0x1ff0,%g1 ! (0_0) si0 = ax & 0x1ff0;
+ lda [%i1+stridex]0x82,%o2 ! (2_0) ax = *(int*)px;
+ faddd %f50,K1,%f62 ! (3_1) res0 += K1;
+
+ add %g1,TBL,%i5 ! (0_0) (double*)((char*)TBL + si0
+ add %g5,stridey,%g5 ! py += stridey
+ st %f23,[stridey+%o3] ! (4_2) *py = fres0;
+ fand %f48,DC2,%f32 ! (0_0) hi0 = vis_fand(db0,DC2);
+
+ fmuld %f44,%f34,%f44 ! (1_1) res0 *= dtmp1;
+ sra %i0,24,%o4 ! (1_0) iexp0 = ax >> 24;
+ ldd [%i2+8],%f60 ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
+ faddd %f52,DC1,%f58 ! (2_1) res0 += DC1;
+
+ add %i1,stridex,%o7 ! px += stridex
+ add %o4,960,%i2 ! (1_0) iexp0 += 0x3c0;
+ lda [%i1+stridex]0x82,%f25 ! (2_0) x0 = *px;
+ fand %f56,DC0,%f34 ! (1_0) db0 = vis_fand(db0,DC0);
+
+ fmuld K2,%f46,%f50 ! (4_1) res0 = K2 * xx0;
+ cmp %o2,_0x7f800000 ! (2_0) ax ? 0x7f800000
+ bge,pn %icc,.update18 ! (2_0) if( ax >= 0x7f800000 )
+ fsubd %f48,%f32,%f52 ! (0_0) xx0 = (db0 - hi0);
+.cont18:
+ fmuld %f62,%f42,%f54 ! (3_1) res0 *= xx0;
+ sllx %i2,52,%o4 ! (1_0) lexp0 = (long long)iexp0 << 52;
+ ldd [TBL+%g1],%f32 ! (0_0) dtmp0 = ((double*)((char*)TBL + si0))[0];
+ fdtos %f40,%f27 ! (0_1) fres0 = (float)res0;
+
+ fmuld %f60,%f58,%f60 ! (2_1) res0 = dtmp0 * res0;
+ sra %i0,11,%g1 ! (1_0) ax >>= 11;
+ stx %o4,[%fp+tmp4] ! (1_0) dtmp1 = *((double*)&lexp0);
+ for %f34,DC1,%f48 ! (1_0) db0 = vis_for(db0,DC1);
+
+ cmp %o2,_0x00800000 ! (2_0) ax ? 0x00800000
+ bl,pn %icc,.update19 ! (2_0) if( ax < 0x00800000 )
+ ldd [%fp+tmp0],%f40 ! (2_1) dtmp1 = *((double*)&lexp0);
+ fstod %f25,%f56 ! (2_0) db0 = (double)x0;
+.cont19:
+ fmuld %f52,%f32,%f42 ! (0_0) xx0 *= dtmp0;
+ and %g1,_0x1ff0,%o5 ! (1_0) si0 = ax & 0x1ff0;
+ lda [stridex+%o7]0x82,%o1 ! (3_0) ax = *(int*)px;
+ faddd %f50,K1,%f34 ! (4_1) res0 += K1;
+
+ add %o5,TBL,%i4 ! (1_0) (char*)TBL + si0
+ add %g5,stridey,%g1 ! py += stridey
+ st %f27,[%g5] ! (0_1) *py = fres0;
+ fand %f48,DC2,%f62 ! (1_0) hi0 = vis_fand(db0,DC2);
+
+ fmuld %f60,%f40,%f32 ! (2_1) res0 *= dtmp1;
+ sra %o2,24,%l1 ! (2_0) iexp0 = ax >> 24;
+ ldd [%l0+8],%f40 ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
+ faddd %f54,DC1,%f58 ! (3_1) res0 += DC1;
+
+ add %o7,stridex,%i1 ! px += stridex
+ add %l1,960,%l0 ! (2_0) iexp0 += 0x3c0;
+ lda [stridex+%o7]0x82,%f0 ! (3_0) x0 = *px;
+ fand %f56,DC0,%f60 ! (2_0) db0 = vis_fand(db0,DC0);
+
+ fmuld K2,%f42,%f50 ! (0_0) res0 = K2 * xx0;
+ cmp %o1,_0x7f800000 ! (3_0) ax ? 0x7f800000
+ bge,pn %icc,.update20 ! (3_0) if( ax >= 0x7f800000 )
+ fsubd %f48,%f62,%f54 ! (1_0) xx0 = (db0 - hi0);
+.cont20:
+ fmuld %f34,%f46,%f52 ! (4_1) res0 *= xx0;
+ sllx %l0,52,%o3 ! (2_0) lexp0 = (long long)iexp0 << 52;
+ ldd [TBL+%o5],%f56 ! (1_0) dtmp0 = ((double*)((char*)TBL + si0))[0];
+ fdtos %f44,%f8 ! (1_1) fres0 = (float)res0;
+
+ fmuld %f40,%f58,%f34 ! (3_1) res0 = dtmp0 * res0;
+ sra %o2,11,%i2 ! (2_0) ax >>= 11;
+ stx %o3,[%fp+tmp0] ! (2_0) dtmp1 = *((double*)&lexp0);
+ for %f60,DC1,%f40 ! (2_0) db0 = vis_for(db0,DC1);
+
+ cmp %o1,_0x00800000 ! (3_0) ax ? 0x00800000
+ bl,pn %icc,.update21 ! (3_0) if( ax < 0x00800000 )
+ ldd [%fp+tmp1],%f62 ! (3_1) dtmp1 = *((double*)&lexp0);
+ fstod %f0,%f48 ! (3_0) db0 = (double)x0;
+.cont21:
+ fmuld %f54,%f56,%f30 ! (1_0) xx0 *= dtmp0;
+ and %i2,_0x1ff0,%o3 ! (2_0) si0 = ax & 0x1ff0;
+ lda [%i1+stridex]0x82,%o2 ! (4_0) ax = *(int*)px;
+ faddd %f50,K1,%f56 ! (0_0) res0 += K1;
+
+ add %i1,stridex,%i1 ! px += stridex
+ add %o3,TBL,%i2 ! (2_0) (char*)TBL + si0
+ st %f8,[stridey+%g5] ! (1_1) *py = fres0;
+ fand %f40,DC2,%f46 ! (2_0) hi0 = vis_fand(db0,DC2);
+
+ fmuld %f34,%f62,%f28 ! (3_1) res0 *= dtmp1;
+ sra %o1,24,%o4 ! (3_0) iexp0 = ax >> 24;
+ ldd [%i3+8],%f50 ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
+ faddd %f52,DC1,%f54 ! (4_1) res0 += DC1;
+
+ add %g1,stridey,%i3 ! py += stridey
+ subcc counter,5,counter ! counter
+ lda [%i1]0x82,%f13 ! (4_0) x0 = *px;
+ fand %f48,DC0,%f58 ! (3_0) db0 = vis_fand(db0,DC0);
+
+ bpos,pt %icc,.main_loop
+ add %o4,960,%g5 ! (3_0) iexp0 += 0x3c0;
+
+ add counter,5,counter
+.tail:
+ subcc counter,1,counter
+ bneg,a .begin
+ or %g0,%i3,%g5
+
+ fmuld %f56,%f42,%f52 ! (0_1) res0 *= xx0;
+ fdtos %f32,%f15 ! (2_2) fres0 = (float)res0;
+
+ fmuld %f50,%f54,%f42 ! (4_2) res0 = dtmp0 * res0;
+
+ ldd [%fp+tmp2],%f56 ! (4_2) dtmp1 = *((double*)&lexp0);
+
+ add %i3,stridey,%o3 ! py += stridey
+ st %f15,[%i3] ! (2_2) *py = fres0;
+
+ subcc counter,1,counter
+ bneg,a .begin
+ or %g0,%o3,%g5
+
+ fmuld %f42,%f56,%f44 ! (4_2) res0 *= dtmp1;
+ ldd [%i5+8],%f58 ! (0_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
+ faddd %f52,DC1,%f34 ! (0_1) res0 += DC1;
+
+ fdtos %f28,%f19 ! (3_2) fres0 = (float)res0;
+
+ fmuld %f58,%f34,%f32 ! (0_1) res0 = dtmp0 * res0;
+
+ ldd [%fp+tmp3],%f60 ! (0_1) dtmp1 = *((double*)&lexp0);
+
+ add %o3,stridey,%g5 ! py += stridey
+
+ st %f19,[%o3] ! (3_2) *py = fres0;
+
+ subcc counter,1,counter
+ bneg,a .begin
+ nop
+
+ fmuld %f32,%f60,%f40 ! (0_1) res0 *= dtmp1;
+
+ fdtos %f44,%f23 ! (4_2) fres0 = (float)res0;
+
+ add %g5,stridey,%g5 ! py += stridey
+ st %f23,[stridey+%o3] ! (4_2) *py = fres0;
+
+ subcc counter,1,counter
+ bneg,a .begin
+ nop
+
+ fdtos %f40,%f27 ! (0_1) fres0 = (float)res0;
+
+ st %f27,[%g5] ! (0_1) *py = fres0;
+
+ ba .begin
+ add %g5,stridey,%g5
+
+ .align 16
+.spec:
+ fsqrts %f25,%f25
+ sub counter,1,counter
+ add %i1,stridex,%i1
+ st %f25,[%g5]
+ ba .begin1
+ add %g5,stridey,%g5
+
+ .align 16
+.update0:
+ cmp counter,1
+ ble .cont0
+ fzeros %f0
+
+ stx %i1,[%fp+tmp_px]
+ sethi %hi(0x7f800000),%o1
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont0
+ or %g0,1,counter
+
+ .align 16
+.update1:
+ cmp counter,1
+ ble .cont1
+ fzeros %f0
+
+ stx %i1,[%fp+tmp_px]
+ clr %o1
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont1
+ or %g0,1,counter
+
+ .align 16
+.update2:
+ cmp counter,2
+ ble .cont2
+ fzeros %f13
+
+ stx %i1,[%fp+tmp_px]
+ sethi %hi(0x7f800000),%o2
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont2
+ or %g0,2,counter
+
+ .align 16
+.update3:
+ cmp counter,2
+ ble .cont3
+ fzeros %f13
+
+ stx %i1,[%fp+tmp_px]
+ clr %o2
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont3
+ or %g0,2,counter
+
+ .align 16
+.update4:
+ cmp counter,3
+ ble .cont4
+ fzeros %f17
+
+ stx %o4,[%fp+tmp_px]
+ sethi %hi(0x7f800000),%l1
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont4
+ or %g0,3,counter
+
+ .align 16
+.update5:
+ cmp counter,3
+ ble .cont5
+ fzeros %f17
+
+ stx %o4,[%fp+tmp_px]
+ clr %l1
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont5
+ or %g0,3,counter
+
+ .align 16
+.update6:
+ cmp counter,4
+ ble .cont6
+ fzeros %f21
+
+ stx %i1,[%fp+tmp_px]
+ sethi %hi(0x7f800000),%i0
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont6
+ or %g0,4,counter
+
+ .align 16
+.update7:
+ cmp counter,4
+ ble .cont7
+ fzeros %f21
+
+ stx %i1,[%fp+tmp_px]
+ clr %i0
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont7
+ or %g0,4,counter
+
+ .align 16
+.update8:
+ cmp counter,5
+ ble .cont8
+ fzeros %f25
+
+ stx %o7,[%fp+tmp_px]
+ sethi %hi(0x7f800000),%o2
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont8
+ or %g0,5,counter
+
+ .align 16
+.update9:
+ cmp counter,5
+ ble .cont9
+ fzeros %f25
+
+ stx %o7,[%fp+tmp_px]
+ clr %o2
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont9
+ or %g0,5,counter
+
+ .align 16
+.update10:
+ cmp counter,6
+ ble .cont10
+ fzeros %f0
+
+ stx %i1,[%fp+tmp_px]
+ sethi %hi(0x7f800000),%o1
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont10
+ or %g0,6,counter
+
+ .align 16
+.update11:
+ cmp counter,6
+ ble .cont11
+ fzeros %f0
+
+ stx %i1,[%fp+tmp_px]
+ clr %o1
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont11
+ or %g0,6,counter
+
+ .align 16
+.update12:
+ cmp counter,2
+ ble .cont12
+ fzeros %f13
+
+ stx %i1,[%fp+tmp_px]
+ sethi %hi(0x7f800000),%o2
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont12
+ or %g0,2,counter
+
+ .align 16
+.update13:
+ cmp counter,2
+ ble .cont13
+ fzeros %f13
+
+ stx %i1,[%fp+tmp_px]
+ clr %o2
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont13
+ or %g0,2,counter
+
+ .align 16
+.update14:
+ cmp counter,3
+ ble .cont14
+ fzeros %f17
+
+ stx %o4,[%fp+tmp_px]
+ sethi %hi(0x7f800000),%l1
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont14
+ or %g0,3,counter
+
+ .align 16
+.update15:
+ cmp counter,3
+ ble .cont15
+ fzeros %f17
+
+ stx %o4,[%fp+tmp_px]
+ clr %l1
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont15
+ or %g0,3,counter
+
+ .align 16
+.update16:
+ cmp counter,4
+ ble .cont16
+ fzeros %f21
+
+ stx %i1,[%fp+tmp_px]
+ sethi %hi(0x7f800000),%i0
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont16
+ or %g0,4,counter
+
+ .align 16
+.update17:
+ cmp counter,4
+ ble .cont17
+ fzeros %f21
+
+ stx %i1,[%fp+tmp_px]
+ clr %i0
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont17
+ or %g0,4,counter
+
+ .align 16
+.update18:
+ cmp counter,5
+ ble .cont18
+ fzeros %f25
+
+ stx %o7,[%fp+tmp_px]
+ sethi %hi(0x7f800000),%o2
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont18
+ or %g0,5,counter
+
+ .align 16
+.update19:
+ cmp counter,5
+ ble .cont19
+ fzeros %f25
+
+ stx %o7,[%fp+tmp_px]
+ clr %o2
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont19
+ or %g0,5,counter
+
+ .align 16
+.update20:
+ cmp counter,6
+ ble .cont20
+ fzeros %f0
+
+ stx %i1,[%fp+tmp_px]
+ sethi %hi(0x7f800000),%o1
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont20
+ or %g0,6,counter
+
+ .align 16
+.update21:
+ cmp counter,6
+ ble .cont21
+ fzeros %f0
+
+ stx %i1,[%fp+tmp_px]
+ clr %o1
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ ba .cont21
+ or %g0,6,counter
+
+.exit:
+ ret
+ restore
+ SET_SIZE(__vsqrtf_ultra3)
+