summaryrefslogtreecommitdiff
path: root/usr/src/libm/src/mvec/vis/__vrsqrt.S
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/libm/src/mvec/vis/__vrsqrt.S')
-rw-r--r--usr/src/libm/src/mvec/vis/__vrsqrt.S2156
1 files changed, 2156 insertions, 0 deletions
diff --git a/usr/src/libm/src/mvec/vis/__vrsqrt.S b/usr/src/libm/src/mvec/vis/__vrsqrt.S
new file mode 100644
index 0000000..08c9146
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vrsqrt.S
@@ -0,0 +1,2156 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vrsqrt.S 1.4 06/01/23 SMI"
+
+ .file "__vrsqrt.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 64
+
+.CONST_TBL:
+ .word 0xbfe00000, 0x0000002f ! K1 =-5.00000000000005209867e-01;
+ .word 0x3fd80000, 0x00000058 ! K2 = 3.75000000000004884257e-01;
+ .word 0xbfd3ffff, 0xff444bc8 ! K3 =-3.12499999317136886551e-01;
+ .word 0x3fd17fff, 0xff5006fe ! K4 = 2.73437499359815081532e-01;
+ .word 0xbfcf80bb, 0xb33ef574 ! K5 =-2.46116125605037803130e-01;
+ .word 0x3fcce0af, 0xf8156949 ! K6 = 2.25606914648617522896e-01;
+
+ .word 0x001fffff, 0xffffffff ! DC0
+ .word 0x3fe00000, 0x00000000 ! DC1
+ .word 0x00002000, 0x00000000 ! DC2
+ .word 0x7fffc000, 0x00000000 ! DC3
+ .word 0x0007ffff, 0xffffffff ! DC4
+
+ .word 0x43200000, 0x00000000 ! D2ON51 = pow(2,51)
+ .word 0x3ff00000, 0x00000000 ! DONE = 1.0
+
+#define stridex %l5
+#define stridey %l7
+#define counter %l0
+#define TBL %l3
+#define _0x7ff00000 %o0
+#define _0x00100000 %o1
+
+#define DC0 %f56
+#define DC1 %f54
+#define DC2 %f48
+#define DC3 %f46
+#define K6 %f42
+#define K5 %f20
+#define K4 %f52
+#define K3 %f50
+#define K2 %f14
+#define K1 %f12
+#define DONE %f4
+
+#define tmp_counter %g5
+#define tmp_px %o5
+
+#define tmp0 STACK_BIAS-0x40
+#define tmp1 STACK_BIAS-0x38
+#define tmp2 STACK_BIAS-0x30
+#define tmp3 STACK_BIAS-0x28
+#define tmp4 STACK_BIAS-0x20
+#define tmp5 STACK_BIAS-0x18
+#define tmp6 STACK_BIAS-0x10
+#define tmp7 STACK_BIAS-0x08
+
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x40
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! !!!!! algorithm !!!!!
+! ((float*)&res)[0] = ((float*)px)[0];
+! ((float*)&res)[1] = ((float*)px)[1];
+! hx = *(int*)px;
+! if ( hx >= 0x7ff00000 )
+! {
+! res = DONE / res;
+! ((float*)py)[0] = ((float*)&res)[0];
+! ((float*)py)[1] = ((float*)&res)[1];
+! px += stridex;
+! py += stridey;
+! continue;
+! }
+! if ( hx < 0x00100000 )
+! {
+! ax = hx & 0x7fffffff;
+! lx = ((int*)px)[1];
+!
+! if ( (ax | lx) == 0 )
+! {
+! res = DONE / res;
+! ((float*)py)[0] = ((float*)&res)[0];
+! ((float*)py)[1] = ((float*)&res)[1];
+! px += stridex;
+! py += stridey;
+! continue;
+! }
+! else if ( hx >= 0 )
+! {
+! if ( hx < 0x00080000 )
+! {
+! res = *(long long*)&res;
+! hx = *(int*)&res - (537 << 21);
+! }
+! else
+! {
+! res = vis_fand(res,DC4);
+! res = *(long long*)&res;
+! res += D2ON51;
+! hx = *(int*)&res - (537 << 21);
+! }
+! }
+! else
+! {
+! res = sqrt(res);
+! ((float*)py)[0] = ((float*)&res)[0];
+! ((float*)py)[1] = ((float*)&res)[1];
+! px += stridex;
+! py += stridey;
+! continue;
+! }
+! }
+!
+! iexp = hx >> 21;
+! iexp = -iexp;
+! iexp += 0x5fe;
+! lexp = iexp << 52;
+! dlexp = *(double*)&lexp;
+! hx >>= 10;
+! hx &= 0x7f8;
+! hx += 8;
+! hx &= -16;
+!
+! res = vis_fand(res,DC0);
+! res = vis_for(res,DC1);
+! res_c = vis_fpadd32(res,DC2);
+! res_c = vis_fand(res_c,DC3);
+!
+! addr = (char*)arr + hx;
+! dexp_hi = ((double*)addr)[0];
+! dexp_lo = ((double*)addr)[1];
+! dtmp0 = dexp_hi * dexp_hi;
+! xx = res - res_c;
+! xx *= dtmp0;
+! res = K6 * xx;
+! res += K5;
+! res *= xx;
+! res += K4;
+! res *= xx;
+! res += K3;
+! res *= xx;
+! res += K2;
+! res *= xx;
+! res += K1;
+! res *= xx;
+! res = dexp_hi * res;
+! res += dexp_lo;
+! res += dexp_hi;
+!
+! res *= dlexp;
+!
+! ((float*)py)[0] = ((float*)&res)[0];
+! ((float*)py)[1] = ((float*)&res)[1];
+!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ ENTRY(__vrsqrt)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,.CONST_TBL,o3)
+ PIC_SET(l7,__vlibm_TBL_rsqrt,l3)
+ wr %g0,0x82,%asi
+
+ ldd [%o3],K1
+ sethi %hi(0x7ff00000),%o0
+ mov %i3,%o4
+
+ ldd [%o3+0x08],K2
+ sethi %hi(0x00100000),%o1
+ mov %i1,tmp_px
+
+ ldd [%o3+0x10],K3
+ sll %i2,3,stridex
+ mov %i0,tmp_counter
+
+ ldd [%o3+0x18],K4
+ sll %i4,3,stridey
+
+ ldd [%o3+0x20],K5
+ ldd [%o3+0x28],K6
+ ldd [%o3+0x30],DC0
+ ldd [%o3+0x38],DC1
+ ldd [%o3+0x40],DC2
+ ldd [%o3+0x48],DC3
+
+.begin:
+ mov tmp_counter,counter
+ mov tmp_px,%i1
+ clr tmp_counter
+.begin1:
+ cmp counter,0
+ ble,pn %icc,.exit
+ ldd [%o3+0x60],DONE
+
+ lda [%i1]%asi,%f0 ! (6_0) ((float*)res)[0] = ((float*)px)[0];
+ sethi %hi(0x7ffffc00),%i0
+
+ lda [%i1+4]%asi,%f1 ! (6_0) ((float*)res)[1] = ((float*)px)[1];
+ add %i0,1023,%i0
+
+ fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
+
+ lda [%i1]%asi,%g1 ! (6_1) hx = *(int*)px;
+ sethi %hi(0x00080000),%i4
+
+ lda [%i1+4]%asi,%l4
+ add %i1,stridex,%l6 ! px += stridex
+
+ sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
+ lda [%l6]%asi,%f8 ! (0_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
+
+ lda [%l6+4]%asi,%f9 ! (0_0) ((float*)res)[1] = ((float*)px)[1];
+ sra %g1,10,%o2 ! (6_1) hx >>= 10;
+ and %g1,%i0,%i2
+
+ cmp %g1,_0x7ff00000 ! (6_1) hx ? 0x7ff00000
+ bge,pn %icc,.spec0 ! (6_1) if ( hx >= 0x7ff00000 )
+ and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
+
+ cmp %g1,_0x00100000 ! (6_1) hx ? 0x00100000
+ bl,pn %icc,.spec1 ! (6_1) if ( hx < 0x00100000 )
+ sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
+.cont_spec:
+ fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0);
+
+ fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
+
+ add %o2,8,%l4 ! (6_1) hx += 8;
+
+ add %o7,1534,%o7 ! (6_1) iexp += 0x5fe;
+
+ lda [%l6]%asi,%g1 ! (0_0) hx = *(int*)px;
+ sllx %o7,52,%o7 ! (6_1) iexp << 52;
+ and %l4,-16,%l4 ! (6_1) hx = -16;
+
+ add %l4,TBL,%l4 ! (6_1) addr = (char*)arr + hx;
+ stx %o7,[%fp+tmp1] ! (6_1) dlexp = *(double*)lexp;
+
+ add %l6,stridex,%l6 ! px += stridex
+ ldd [%l4],%f30 ! (6_1) dtmp0 = ((double*)addr)[0];
+
+ sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
+ lda [%l6]%asi,%f0 ! (1_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
+
+ sra %g1,10,%o2 ! (0_0) hx >>= 10;
+ sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
+ lda [%l6+4]%asi,%f1 ! (1_0) ((float*)res)[1] = ((float*)px)[1];
+
+ cmp %g1,_0x7ff00000 ! (0_0) hx ? 0x7ff00000
+ bge,pn %icc,.update0 ! (0_0) if ( hx >= 0x7ff00000 )
+ fand %f18,DC3,%f6 ! (6_1) res_c = vis_fand(res_c,DC3);
+.cont0:
+ and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
+ fmuld %f30,%f30,%f10 ! (6_1) dtmp0 = dexp_hi * dexp_hi;
+
+ cmp %g1,_0x00100000 ! (0_0) hx ? 0x00100000
+ bl,pn %icc,.update1 ! (0_0) if ( hx < 0x00100000 )
+ add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
+.cont1:
+ fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0);
+
+ fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
+
+ add %o2,8,%l2 ! (0_0) hx += 8;
+ fsubd %f44,%f6,%f6 ! (6_1) xx = res - res_c;
+
+ lda [%l6]%asi,%g1 ! (1_0) hx = *(int*)px;
+ sllx %o7,52,%o7 ! (0_0) iexp << 52;
+ and %l2,-16,%l2 ! (0_0) hx = -16;
+
+ add %l2,TBL,%l2 ! (0_0) addr = (char*)arr + hx;
+ add %l6,stridex,%l6 ! px += stridex
+ stx %o7,[%fp+tmp2] ! (0_0) dlexp = *(double*)lexp;
+
+ fmuld %f6,%f10,%f26 ! (6_1) xx *= dtmp0;
+ ldd [%l2],%f10 ! (0_0) dtmp0 = ((double*)addr)[0];
+
+ sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
+ lda [%l6]%asi,%f6 ! (2_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
+
+ sra %g1,10,%o2 ! (1_0) hx >>= 10;
+ cmp %g1,_0x7ff00000 ! (1_0) hx ? 0x7ff00000
+ bge,pn %icc,.update2 ! (1_0) if ( hx >= 0x7ff00000 )
+ lda [%l6+4]%asi,%f7 ! (2_0) ((float*)res)[1] = ((float*)px)[1];
+.cont2:
+ fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3);
+
+ fmuld %f10,%f10,%f10 ! (0_0) dtmp0 = dexp_hi * dexp_hi;
+ cmp %g1,_0x00100000 ! (1_0) hx ? 0x00100000
+ bl,pn %icc,.update3 ! (1_0) if ( hx < 0x00100000 )
+ and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
+.cont3:
+ sub %g0,%o7,%o7 ! (1_0) iexp = -iexp;
+ fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
+
+ add %o7,1534,%o7 ! (1_0) iexp += 0x5fe;
+ fpadd32 %f44,DC2,%f18 ! (1_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld K6,%f26,%f62 ! (6_1) res = K6 * xx;
+ add %o2,8,%i2 ! (1_0) hx += 8;
+ fsubd %f28,%f8,%f32 ! (0_0) xx = res - res_c;
+
+ lda [%l6]%asi,%g1 ! (2_0) hx = *(int*)px;
+ sllx %o7,52,%o7 ! (1_0) iexp << 52;
+ and %i2,-16,%i2 ! (1_0) hx = -16;
+
+ add %i2,TBL,%i2 ! (1_0) addr = (char*)arr + hx;
+ stx %o7,[%fp+tmp3] ! (1_0) dlexp = *(double*)lexp;
+
+ fmuld %f32,%f10,%f32 ! (0_0) xx *= dtmp0;
+ add %l6,stridex,%l6 ! px += stridex
+ ldd [%i2],%f10 ! (1_0) dtmp0 = ((double*)addr)[0];
+ faddd %f62,K5,%f62 ! (6_1) res += K5;
+
+ sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
+ lda [%l6]%asi,%f0 ! (3_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
+
+ sra %g1,10,%o2 ! (2_0) hx >>= 10;
+ cmp %g1,_0x7ff00000 ! (2_0) hx ? 0x7ff00000
+ bge,pn %icc,.update4 ! (2_0) if ( hx >= 0x7ff00000 )
+ lda [%l6+4]%asi,%f1 ! (3_0) ((float*)res)[1] = ((float*)px)[1];
+.cont4:
+ fmuld %f62,%f26,%f40 ! (6_1) res *= xx;
+ fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3);
+
+ fmuld %f10,%f10,%f10 ! (1_0) dtmp0 = dexp_hi * dexp_hi;
+ cmp %g1,_0x00100000 ! (2_0) hx ? 0x00100000
+ bl,pn %icc,.update5 ! (2_0) if ( hx < 0x00100000 )
+ and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
+.cont5:
+ sub %g0,%o7,%o7 ! (2_0) iexp = -iexp;
+ fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
+
+ add %o7,1534,%o7 ! (2_0) iexp += 0x5fe;
+ fpadd32 %f28,DC2,%f18 ! (2_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld K6,%f32,%f62 ! (0_0) res = K6 * xx;
+ add %o2,8,%i4 ! (2_0) hx += 8;
+ fsubd %f44,%f8,%f6 ! (1_0) xx = res - res_c;
+
+ faddd %f40,K4,%f40 ! (6_1) res += K4;
+
+ lda [%l6]%asi,%g1 ! (3_0) hx = *(int*)px;
+ sllx %o7,52,%o7 ! (2_0) iexp << 52;
+ and %i4,-16,%i4 ! (2_0) hx = -16;
+
+ add %i4,TBL,%i4 ! (2_0) addr = (char*)arr + hx;
+ stx %o7,[%fp+tmp4] ! (2_0) dlexp = *(double*)lexp;
+
+ fmuld %f6,%f10,%f38 ! (1_0) xx *= dtmp0;
+ ldd [%i4],%f24 ! (2_0) dtmp0 = ((double*)addr)[0];
+ faddd %f62,K5,%f62 ! (0_0) res += K5;
+
+ fmuld %f40,%f26,%f34 ! (6_1) res *= xx;
+ add %l6,stridex,%l6 ! px += stridex
+
+ sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
+ lda [%l6]%asi,%f8 ! (4_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
+
+ sra %g1,10,%o2 ! (3_0) hx >>= 10;
+ cmp %g1,_0x7ff00000 ! (3_0) hx ? 0x7ff00000
+ bge,pn %icc,.update6 ! (3_0) if ( hx >= 0x7ff00000 )
+ lda [%l6+4]%asi,%f9 ! (4_0) ((float*)res)[1] = ((float*)px)[1];
+.cont6:
+ fmuld %f62,%f32,%f60 ! (0_0) res *= xx;
+ cmp %g1,_0x00100000 ! (3_0) hx ? 0x00100000
+ fand %f18,DC3,%f22 ! (2_0) res_c = vis_fand(res_c,DC3);
+
+ fmuld %f24,%f24,%f24 ! (2_0) dtmp0 = dexp_hi * dexp_hi;
+ bl,pn %icc,.update7 ! (3_0) if ( hx < 0x00100000 )
+ and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
+ faddd %f34,K3,%f6 ! (6_1) res += K3;
+.cont7:
+ sub %g0,%o7,%o7 ! (3_0) iexp = -iexp;
+ fand %f8,DC0,%f16 ! (4_0) res = vis_fand(res,DC0);
+
+ add %o7,1534,%o7 ! (3_0) iexp += 0x5fe;
+ fpadd32 %f44,DC2,%f18 ! (3_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld K6,%f38,%f62 ! (1_0) res = K6 * xx;
+ add %o2,8,%i5 ! (3_0) hx += 8;
+ fsubd %f28,%f22,%f28 ! (2_0) xx = res - res_c;
+
+ fmuld %f6,%f26,%f22 ! (6_1) res *= xx;
+ faddd %f60,K4,%f60 ! (0_0) res += K4;
+
+ lda [%l6]%asi,%g1 ! (4_0) hx = *(int*)px;
+ sllx %o7,52,%o7 ! (3_0) iexp << 52;
+ and %i5,-16,%i5 ! (3_0) hx = -16;
+
+ add %i5,TBL,%i5 ! (3_0) addr = (char*)arr + hx;
+ stx %o7,[%fp+tmp5] ! (3_0) dlexp = *(double*)lexp;
+
+ fmuld %f28,%f24,%f36 ! (2_0) xx *= dtmp0;
+ add %l6,stridex,%i0 ! px += stridex
+ ldd [%i5],%f28 ! (3_0) dtmp0 = ((double*)addr)[0];
+ faddd %f62,K5,%f62 ! (1_0) res += K5;
+
+ faddd %f22,K2,%f10 ! (6_1) res += K2;
+ fmuld %f60,%f32,%f34 ! (0_0) res *= xx;
+
+ sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
+ lda [%i0]%asi,%f0 ! (5_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
+
+ sra %g1,10,%o2 ! (4_0) hx >>= 10;
+ cmp %g1,_0x7ff00000 ! (4_0) hx ? 0x7ff00000
+ bge,pn %icc,.update8 ! (4_0) if ( hx >= 0x7ff00000 )
+ lda [%i0+4]%asi,%f1 ! (5_0) ((float*)res)[1] = ((float*)px)[1];
+.cont8:
+ fand %f18,DC3,%f40 ! (3_0) res_c = vis_fand(res_c,DC3);
+ fmuld %f62,%f38,%f62 ! (1_0) res *= xx;
+
+ fmuld %f10,%f26,%f58 ! (6_1) res *= xx;
+ cmp %g1,_0x00100000 ! (4_0) hx ? 0x00100000
+ and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
+ faddd %f34,K3,%f60 ! (0_0) res += K3;
+
+ fmuld %f28,%f28,%f28 ! (3_0) dtmp0 = dexp_hi * dexp_hi;
+ bl,pn %icc,.update9 ! (4_0) if ( hx < 0x00100000 )
+ sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
+ fand %f0,DC0,%f16 ! (5_0) res = vis_fand(res,DC0);
+.cont9:
+ add %o7,1534,%o7 ! (4_0) iexp += 0x5fe;
+ fpadd32 %f24,DC2,%f18 ! (4_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld K6,%f36,%f10 ! (2_0) res = K6 * xx;
+ add %o2,8,%l1 ! (4_0) hx += 8;
+ fsubd %f44,%f40,%f44 ! (3_0) xx = res - res_c;
+
+ fmuld %f60,%f32,%f60 ! (0_0) res *= xx;
+ faddd %f62,K4,%f6 ! (1_0) res += K4;
+
+ lda [%i0]%asi,%g1 ! (5_0) hx = *(int*)px;
+ sllx %o7,52,%o7 ! (4_0) iexp << 52;
+ and %l1,-16,%l1 ! (4_0) hx = -16;
+ faddd %f58,K1,%f58 ! (6_1) res += K1;
+
+ add %i0,stridex,%i1 ! px += stridex
+ add %l1,TBL,%l1 ! (4_0) addr = (char*)arr + hx;
+ stx %o7,[%fp+tmp6] ! (4_0) dlexp = *(double*)lexp;
+
+ fmuld %f44,%f28,%f40 ! (3_0) xx *= dtmp0;
+ ldd [%l1],%f44 ! (4_0) dtmp0 = ((double*)addr)[0];
+ faddd %f10,K5,%f62 ! (2_0) res += K5;
+
+ fmuld %f6,%f38,%f34 ! (1_0) res *= xx;
+ sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
+ nop
+ faddd %f60,K2,%f60 ! (0_0) res += K2;
+
+ for %f16,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
+ sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
+ lda [%i1]%asi,%f6 ! (6_0) ((float*)res)[0] = ((float*)px)[0];
+ fmuld %f58,%f26,%f26 ! (6_1) res *= xx;
+
+ sra %g1,10,%o2 ! (5_0) hx >>= 10;
+ cmp %g1,_0x7ff00000 ! (5_0) hx ? 0x7ff00000
+ bge,pn %icc,.update10 ! (5_0) if ( hx >= 0x7ff00000 )
+ lda [%i1+4]%asi,%f7 ! (6_0) ((float*)res)[1] = ((float*)px)[1];
+.cont10:
+ fand %f18,DC3,%f8 ! (4_0) res_c = vis_fand(res_c,DC3);
+ fmuld %f62,%f36,%f62 ! (2_0) res *= xx;
+
+ fmuld %f60,%f32,%f58 ! (0_0) res *= xx;
+ cmp %g1,_0x00100000 ! (5_0) hx ? 0x00100000
+ and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
+ faddd %f34,K3,%f34 ! (1_0) res += K3;
+
+ fmuld %f30,%f26,%f26 ! (6_1) res = dexp_hi * res;
+ bl,pn %icc,.update11 ! (5_0) if ( hx < 0x00100000 )
+ nop
+ fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
+.cont11:
+ ldd [%l4+8],%f60 ! (6_1) dexp_lo = ((double*)addr)[1];
+ fmuld %f44,%f44,%f44 ! (4_0) dtmp0 = dexp_hi * dexp_hi;
+ fpadd32 %f28,DC2,%f18 ! (5_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld K6,%f40,%f22 ! (3_0) res = K6 * xx;
+ add %o2,8,%i3 ! (5_0) hx += 8;
+ fsubd %f24,%f8,%f10 ! (4_0) xx = res - res_c;
+
+ fmuld %f34,%f38,%f24 ! (1_0) res *= xx;
+ or %g0,%o4,%i0
+
+ cmp counter,7
+ bl,pn %icc,.tail
+ faddd %f62,K4,%f34 ! (2_0) res += K4;
+
+ ba .main_loop
+ sub counter,7,counter ! counter
+
+ .align 16
+.main_loop:
+ add %o7,1534,%o7 ! (5_0) iexp += 0x5fe;
+ and %i3,-16,%i3 ! (5_1) hx = -16;
+ lda [%i1]%asi,%g1 ! (6_1) hx = *(int*)px;
+ faddd %f58,K1,%f58 ! (0_1) res += K1;
+
+ add %i3,TBL,%i3 ! (5_1) addr = (char*)arr + hx;
+ sllx %o7,52,%o7 ! (5_1) iexp << 52;
+ stx %o7,[%fp+tmp0] ! (5_1) dlexp = *(double*)lexp;
+ faddd %f26,%f60,%f8 ! (6_2) res += dexp_lo;
+
+ faddd %f22,K5,%f62 ! (3_1) res += K5;
+ add %i1,stridex,%l6 ! px += stridex
+ ldd [%i3],%f22 ! (5_1) dtmp0 = ((double*)addr)[0];
+ fmuld %f10,%f44,%f60 ! (4_1) xx *= dtmp0;
+
+ faddd %f24,K2,%f26 ! (1_1) res += K2;
+ add %i0,stridey,%i1 ! px += stridey
+ ldd [%l2],%f24 ! (0_1) dexp_hi = ((double*)addr)[0];
+ fmuld %f34,%f36,%f34 ! (2_1) res *= xx;
+
+ fmuld %f58,%f32,%f58 ! (0_1) res *= xx;
+ sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
+ lda [%l6]%asi,%f0 ! (0_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
+
+ lda [%l6+4]%asi,%f1 ! (0_0) ((float*)res)[1] = ((float*)px)[1];
+ sra %g1,10,%o2 ! (6_1) hx >>= 10;
+ fmuld %f22,%f22,%f10 ! (5_1) dtmp0 = dexp_hi * dexp_hi;
+ faddd %f8,%f30,%f30 ! (6_2) res += dexp_hi;
+
+ fmuld %f62,%f40,%f32 ! (3_1) res *= xx;
+ cmp %g1,_0x7ff00000 ! (6_1) hx ? 0x7ff00000
+ ldd [%fp+tmp1],%f62 ! (6_2) dlexp = *(double*)lexp;
+ fand %f18,DC3,%f8 ! (5_1) res_c = vis_fand(res_c,DC3);
+
+ fmuld %f26,%f38,%f26 ! (1_1) res *= xx;
+ bge,pn %icc,.update12 ! (6_1) if ( hx >= 0x7ff00000 )
+ and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
+ faddd %f34,K3,%f34 ! (2_1) res += K3;
+.cont12:
+ fmuld %f24,%f58,%f58 ! (0_1) res = dexp_hi * res;
+ cmp %g1,_0x00100000 ! (6_1) hx ? 0x00100000
+ sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
+ fand %f0,DC0,%f16 ! (0_0) res = vis_fand(res,DC0);
+
+ fmuld %f30,%f62,%f2 ! (6_2) res *= dlexp;
+ bl,pn %icc,.update13 ! (6_1) if ( hx < 0x00100000 )
+ ldd [%l2+8],%f30 ! (0_1) dexp_lo = ((double*)addr)[1];
+ fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
+.cont13:
+ fmuld K6,%f60,%f62 ! (4_1) res = K6 * xx;
+ add %o2,8,%l4 ! (6_1) hx += 8;
+ st %f2,[%i0] ! (6_2) ((float*)py)[0] = ((float*)res)[0];
+ fsubd %f28,%f8,%f6 ! (5_1) xx = res - res_c;
+
+ fmuld %f34,%f36,%f28 ! (2_1) res *= xx;
+ add %o7,1534,%o7 ! (6_1) iexp += 0x5fe;
+ st %f3,[%i0+4] ! (6_2) ((float*)py)[1] = ((float*)res)[1];
+ faddd %f32,K4,%f32 ! (3_1) res += K4;
+
+ lda [%l6]%asi,%g1 ! (0_0) hx = *(int*)px;
+ sllx %o7,52,%o7 ! (6_1) iexp << 52;
+ and %l4,-16,%l4 ! (6_1) hx = -16;
+ faddd %f26,K1,%f26 ! (1_1) res += K1;
+
+ add %i1,stridey,%i0 ! px += stridey
+ add %l4,TBL,%l4 ! (6_1) addr = (char*)arr + hx;
+ stx %o7,[%fp+tmp1] ! (6_1) dlexp = *(double*)lexp;
+ faddd %f58,%f30,%f8 ! (0_1) res += dexp_lo;
+
+ fmuld %f6,%f10,%f58 ! (5_1) xx *= dtmp0;
+ add %l6,stridex,%l6 ! px += stridex
+ ldd [%l4],%f30 ! (6_1) dtmp0 = ((double*)addr)[0];
+ faddd %f62,K5,%f62 ! (4_1) res += K5;
+
+ fmuld %f32,%f40,%f34 ! (3_1) res *= xx;
+ sra %g1,10,%o2 ! (0_0) hx >>= 10;
+ ldd [%i2],%f4 ! (1_1) dexp_hi = ((double*)addr)[0];
+ faddd %f28,K2,%f32 ! (2_1) res += K2;
+
+ fmuld %f26,%f38,%f26 ! (1_1) res *= xx;
+ sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
+ lda [%l6]%asi,%f6 ! (1_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
+
+ fmuld %f30,%f30,%f30 ! (6_1) dtmp0 = dexp_hi * dexp_hi;
+ sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
+ lda [%l6+4]%asi,%f7 ! (1_0) ((float*)res)[1] = ((float*)px)[1];
+ faddd %f8,%f24,%f24 ! (0_1) res += dexp_hi;
+
+ fmuld %f62,%f60,%f38 ! (4_1) res *= xx;
+ cmp %g1,_0x7ff00000 ! (0_0) hx ? 0x7ff00000
+ ldd [%fp+tmp2],%f62 ! (0_1) dlexp = *(double*)lexp;
+ fand %f18,DC3,%f8 ! (6_1) res_c = vis_fand(res_c,DC3);
+
+ fmuld %f32,%f36,%f32 ! (2_1) res *= xx;
+ bge,pn %icc,.update14 ! (0_0) if ( hx >= 0x7ff00000 )
+ and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
+ faddd %f34,K3,%f34 ! (3_1) res += K3;
+.cont14:
+ fmuld %f4,%f26,%f26 ! (1_1) res = dexp_hi * res;
+ cmp %g1,_0x00100000 ! (0_0) hx ? 0x00100000
+ add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
+ fand %f6,DC0,%f16 ! (1_0) res = vis_fand(res,DC0);
+
+ fmuld %f24,%f62,%f2 ! (0_1) res *= dlexp;
+ bl,pn %icc,.update15 ! (0_0) if ( hx < 0x00100000 )
+ ldd [%i2+8],%f24 ! (1_1) dexp_lo = ((double*)addr)[1];
+ fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
+.cont15:
+ fmuld K6,%f58,%f62 ! (5_1) res = K6 * xx;
+ add %o2,8,%l2 ! (0_0) hx += 8;
+ st %f2,[%i1] ! (0_1) ((float*)py)[0] = ((float*)res)[0];
+ fsubd %f44,%f8,%f10 ! (6_1) xx = res - res_c;
+
+ fmuld %f34,%f40,%f44 ! (3_1) res *= xx;
+ nop
+ st %f3,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)res)[1];
+ faddd %f38,K4,%f38 ! (4_1) res += K4;
+
+ lda [%l6]%asi,%g1 ! (1_0) hx = *(int*)px;
+ sllx %o7,52,%o7 ! (0_0) iexp << 52;
+ and %l2,-16,%l2 ! (0_0) hx = -16;
+ faddd %f32,K1,%f32 ! (2_1) res += K1;
+
+ add %l2,TBL,%l2 ! (0_0) addr = (char*)arr + hx;
+ add %l6,stridex,%l6 ! px += stridex
+ stx %o7,[%fp+tmp2] ! (0_0) dlexp = *(double*)lexp;
+ faddd %f26,%f24,%f8 ! (1_1) res += dexp_lo;
+
+ fmuld %f10,%f30,%f26 ! (6_1) xx *= dtmp0;
+ add %i0,stridey,%i1 ! px += stridey
+ ldd [%l2],%f30 ! (0_0) dtmp0 = ((double*)addr)[0];
+ faddd %f62,K5,%f62 ! (5_1) res += K5;
+
+ fmuld %f38,%f60,%f34 ! (4_1) res *= xx;
+ sra %g1,10,%o2 ! (1_0) hx >>= 10;
+ ldd [%i4],%f24 ! (2_1) dexp_hi = ((double*)addr)[0];
+ faddd %f44,K2,%f38 ! (3_1) res += K2;
+
+ fmuld %f32,%f36,%f32 ! (2_1) res *= xx;
+ sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
+ lda [%l6]%asi,%f0 ! (2_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
+
+ fmuld %f30,%f30,%f30 ! (0_0) dtmp0 = dexp_hi * dexp_hi;
+ cmp %g1,_0x7ff00000 ! (1_0) hx ? 0x7ff00000
+ lda [%l6+4]%asi,%f1 ! (2_0) ((float*)res)[1] = ((float*)px)[1];
+ faddd %f8,%f4,%f4 ! (1_1) res += dexp_hi;
+
+ fmuld %f62,%f58,%f36 ! (5_1) res *= xx;
+ bge,pn %icc,.update16 ! (1_0) if ( hx >= 0x7ff00000 )
+ ldd [%fp+tmp3],%f62 ! (1_1) dlexp = *(double*)lexp;
+ fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3);
+.cont16:
+ fmuld %f38,%f40,%f38 ! (3_1) res *= xx;
+ cmp %g1,_0x00100000 ! (1_0) hx ? 0x00100000
+ and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
+ faddd %f34,K3,%f34 ! (4_1) res += K3;
+
+ fmuld %f24,%f32,%f32 ! (2_1) res = dexp_hi * res;
+ bl,pn %icc,.update17 ! (1_0) if ( hx < 0x00100000 )
+ sub %g0,%o7,%o7 ! (1_0) iexp = -iexp;
+ fand %f0,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
+.cont17:
+ fmuld %f4,%f62,%f2 ! (1_1) res *= dlexp;
+ add %o7,1534,%o7 ! (1_0) iexp += 0x5fe;
+ ldd [%i4+8],%f4 ! (2_1) dexp_lo = ((double*)addr)[1];
+ fpadd32 %f44,DC2,%f18 ! (1_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld K6,%f26,%f62 ! (6_1) res = K6 * xx;
+ add %o2,8,%i2 ! (1_0) hx += 8;
+ st %f2,[%i0] ! (1_1) ((float*)py)[0] = ((float*)res)[0];
+ fsubd %f28,%f8,%f6 ! (0_0) xx = res - res_c;
+
+ fmuld %f34,%f60,%f28 ! (4_1) res *= xx;
+ nop
+ st %f3,[%i0+4] ! (1_1) ((float*)py)[1] = ((float*)res)[1];
+ faddd %f36,K4,%f36 ! (5_1) res += K4;
+
+ lda [%l6]%asi,%g1 ! (2_0) hx = *(int*)px;
+ sllx %o7,52,%o7 ! (1_0) iexp << 52;
+ and %i2,-16,%i2 ! (1_0) hx = -16;
+ faddd %f38,K1,%f38 ! (3_1) res += K1;
+
+ add %i1,stridey,%i0 ! px += stridey
+ add %i2,TBL,%i2 ! (1_0) addr = (char*)arr + hx;
+ stx %o7,[%fp+tmp3] ! (1_0) dlexp = *(double*)lexp;
+ faddd %f32,%f4,%f8 ! (2_1) res += dexp_lo;
+
+ fmuld %f6,%f30,%f32 ! (0_0) xx *= dtmp0;
+ add %l6,stridex,%l6 ! px += stridex
+ ldd [%i2],%f30 ! (1_0) dtmp0 = ((double*)addr)[0];
+ faddd %f62,K5,%f62 ! (6_1) res += K5;
+
+ fmuld %f36,%f58,%f34 ! (5_1) res *= xx;
+ sra %g1,10,%o2 ! (2_0) hx >>= 10;
+ ldd [%i5],%f4 ! (3_1) dexp_hi = ((double*)addr)[0];
+ faddd %f28,K2,%f36 ! (4_1) res += K2;
+
+ fmuld %f38,%f40,%f38 ! (3_1) res *= xx;
+ sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
+ lda [%l6]%asi,%f6 ! (3_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
+
+ fmuld %f30,%f30,%f30 ! (1_0) dtmp0 = dexp_hi * dexp_hi;
+ cmp %g1,_0x7ff00000 ! (2_0) hx ? 0x7ff00000
+ lda [%l6+4]%asi,%f7 ! (3_0) ((float*)res)[1] = ((float*)px)[1];
+ faddd %f8,%f24,%f24 ! (2_1) res += dexp_hi;
+
+ fmuld %f62,%f26,%f40 ! (6_1) res *= xx;
+ bge,pn %icc,.update18 ! (2_0) if ( hx >= 0x7ff00000 )
+ ldd [%fp+tmp4],%f62 ! (2_1) dlexp = *(double*)lexp;
+ fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3);
+.cont18:
+ fmuld %f36,%f60,%f36 ! (4_1) res *= xx;
+ cmp %g1,_0x00100000 ! (2_0) hx ? 0x00100000
+ and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
+ faddd %f34,K3,%f34 ! (5_1) res += K3;
+
+ fmuld %f4,%f38,%f38 ! (3_1) res = dexp_hi * res;
+ bl,pn %icc,.update19 ! (2_0) if ( hx < 0x00100000 )
+ sub %g0,%o7,%o7 ! (2_0) iexp = -iexp;
+ fand %f6,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
+.cont19:
+ fmuld %f24,%f62,%f2 ! (2_1) res *= dlexp;
+ add %o7,1534,%o7 ! (2_0) iexp += 0x5fe;
+ ldd [%i5+8],%f24 ! (3_1) dexp_lo = ((double*)addr)[1];
+ fpadd32 %f28,DC2,%f18 ! (2_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld K6,%f32,%f62 ! (0_0) res = K6 * xx;
+ add %o2,8,%i4 ! (2_0) hx += 8;
+ st %f2,[%i1] ! (2_1) ((float*)py)[0] = ((float*)res)[0];
+ fsubd %f44,%f8,%f10 ! (1_0) xx = res - res_c;
+
+ fmuld %f34,%f58,%f44 ! (5_1) res *= xx;
+ nop
+ st %f3,[%i1+4] ! (2_1) ((float*)py)[1] = ((float*)res)[1];
+ faddd %f40,K4,%f40 ! (6_1) res += K4;
+
+ lda [%l6]%asi,%g1 ! (3_0) hx = *(int*)px;
+ sllx %o7,52,%o7 ! (2_0) iexp << 52;
+ and %i4,-16,%i4 ! (2_0) hx = -16;
+ faddd %f36,K1,%f36 ! (4_1) res += K1;
+
+ add %l6,stridex,%l6 ! px += stridex
+ add %i4,TBL,%i4 ! (2_0) addr = (char*)arr + hx;
+ stx %o7,[%fp+tmp4] ! (2_0) dlexp = *(double*)lexp;
+ faddd %f38,%f24,%f8 ! (3_1) res += dexp_lo;
+
+ fmuld %f10,%f30,%f38 ! (1_0) xx *= dtmp0;
+ add %i0,stridey,%i1 ! px += stridey
+ ldd [%i4],%f24 ! (2_0) dtmp0 = ((double*)addr)[0];
+ faddd %f62,K5,%f62 ! (0_0) res += K5;
+
+ fmuld %f40,%f26,%f34 ! (6_1) res *= xx;
+ sra %g1,10,%o2 ! (3_0) hx >>= 10;
+ ldd [%l1],%f30 ! (4_1) dexp_hi = ((double*)addr)[0];
+ faddd %f44,K2,%f40 ! (5_1) res += K2;
+
+ fmuld %f36,%f60,%f36 ! (4_1) res *= xx;
+ sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
+ lda [%l6]%asi,%f0 ! (4_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
+
+ fmuld %f24,%f24,%f24 ! (2_0) dtmp0 = dexp_hi * dexp_hi;
+ cmp %g1,_0x7ff00000 ! (3_0) hx ? 0x7ff00000
+ lda [%l6+4]%asi,%f1 ! (4_0) ((float*)res)[1] = ((float*)px)[1];
+ faddd %f8,%f4,%f8 ! (3_1) res += dexp_hi;
+
+ fmuld %f62,%f32,%f60 ! (0_0) res *= xx;
+ bge,pn %icc,.update20 ! (3_0) if ( hx >= 0x7ff00000 )
+ ldd [%fp+tmp5],%f62 ! (3_1) dlexp = *(double*)lexp;
+ fand %f18,DC3,%f4 ! (2_0) res_c = vis_fand(res_c,DC3);
+.cont20:
+ fmuld %f40,%f58,%f40 ! (5_1) res *= xx;
+ cmp %g1,_0x00100000 ! (3_0) hx ? 0x00100000
+ and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
+ faddd %f34,K3,%f10 ! (6_1) res += K3;
+
+ fmuld %f30,%f36,%f36 ! (4_1) res = dexp_hi * res;
+ bl,pn %icc,.update21 ! (3_0) if ( hx < 0x00100000 )
+ sub %g0,%o7,%o7 ! (3_0) iexp = -iexp;
+ fand %f0,DC0,%f16 ! (4_0) res = vis_fand(res,DC0);
+.cont21:
+ fmuld %f8,%f62,%f8 ! (3_1) res *= dlexp;
+ add %o7,1534,%o7 ! (3_0) iexp += 0x5fe;
+ ldd [%l1+8],%f34 ! (4_1) dexp_lo = ((double*)addr)[1];
+ fpadd32 %f44,DC2,%f18 ! (3_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld K6,%f38,%f62 ! (1_0) res = K6 * xx;
+ add %o2,8,%i5 ! (3_0) hx += 8;
+ st %f8,[%i0] ! (3_1) ((float*)py)[0] = ((float*)res)[0];
+ fsubd %f28,%f4,%f28 ! (2_0) xx = res - res_c;
+
+ fmuld %f10,%f26,%f4 ! (6_1) res *= xx;
+ nop
+ st %f9,[%i0+4] ! (3_1) ((float*)py)[1] = ((float*)res)[1];
+ faddd %f60,K4,%f60 ! (0_0) res += K4;
+
+ lda [%l6]%asi,%g1 ! (4_0) hx = *(int*)px;
+ sllx %o7,52,%o7 ! (3_0) iexp << 52;
+ and %i5,-16,%i5 ! (3_0) hx = -16;
+ faddd %f40,K1,%f40 ! (5_1) res += K1;
+
+ add %l6,stridex,%i0 ! px += stridex
+ add %i5,TBL,%i5 ! (3_0) addr = (char*)arr + hx;
+ stx %o7,[%fp+tmp5] ! (3_0) dlexp = *(double*)lexp;
+ faddd %f36,%f34,%f8 ! (4_1) res += dexp_lo;
+
+ fmuld %f28,%f24,%f36 ! (2_0) xx *= dtmp0;
+ add %i1,stridey,%l6 ! px += stridey
+ ldd [%i5],%f28 ! (3_0) dtmp0 = ((double*)addr)[0];
+ faddd %f62,K5,%f62 ! (1_0) res += K5;
+
+ faddd %f4,K2,%f10 ! (6_1) res += K2;
+ sra %g1,10,%o2 ! (4_0) hx >>= 10;
+ nop
+ fmuld %f60,%f32,%f34 ! (0_0) res *= xx;
+
+ fmuld %f40,%f58,%f40 ! (5_1) res *= xx;
+ sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
+ lda [%i0]%asi,%f6 ! (5_0) ((float*)res)[0] = ((float*)px)[0];
+ for %f16,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
+
+ fmuld %f28,%f28,%f28 ! (3_0) dtmp0 = dexp_hi * dexp_hi;
+ cmp %g1,_0x7ff00000 ! (4_0) hx ? 0x7ff00000
+ lda [%i0+4]%asi,%f7 ! (5_0) ((float*)res)[1] = ((float*)px)[1];
+ faddd %f8,%f30,%f30 ! (4_1) res += dexp_hi;
+
+ fand %f18,DC3,%f8 ! (3_0) res_c = vis_fand(res_c,DC3);
+ bge,pn %icc,.update22 ! (4_0) if ( hx >= 0x7ff00000 )
+ ldd [%fp+tmp6],%f18 ! (4_1) dlexp = *(double*)lexp;
+ fmuld %f62,%f38,%f62 ! (1_0) res *= xx;
+.cont22:
+ fmuld %f10,%f26,%f58 ! (6_1) res *= xx;
+ cmp %g1,_0x00100000 ! (4_0) hx ? 0x00100000
+ and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
+ faddd %f34,K3,%f60 ! (0_0) res += K3;
+
+ fmuld %f22,%f40,%f40 ! (5_1) res = dexp_hi * res;
+ bl,pn %icc,.update23 ! (4_0) if ( hx < 0x00100000 )
+ sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
+ fand %f6,DC0,%f16 ! (5_0) res = vis_fand(res,DC0);
+.cont23:
+ fmuld %f30,%f18,%f6 ! (4_1) res *= dlexp;
+ add %o7,1534,%o7 ! (4_0) iexp += 0x5fe;
+ ldd [%i3+8],%f34 ! (5_1) dexp_lo = ((double*)addr)[1];
+ fpadd32 %f24,DC2,%f18 ! (4_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld K6,%f36,%f30 ! (2_0) res = K6 * xx;
+ add %o2,8,%l1 ! (4_0) hx += 8;
+ st %f6,[%i1] ! (4_1) ((float*)py)[0] = ((float*)res)[0];
+ fsubd %f44,%f8,%f44 ! (3_0) xx = res - res_c;
+
+ fmuld %f60,%f32,%f60 ! (0_0) res *= xx;
+ sllx %o7,52,%o7 ! (4_0) iexp << 52;
+ st %f7,[%i1+4] ! (4_1) ((float*)py)[1] = ((float*)res)[1];
+ faddd %f62,K4,%f6 ! (1_0) res += K4;
+
+ lda [%i0]%asi,%g1 ! (5_0) hx = *(int*)px;
+ add %i0,stridex,%i1 ! px += stridex
+ and %l1,-16,%l1 ! (4_0) hx = -16;
+ faddd %f58,K1,%f58 ! (6_1) res += K1;
+
+ add %l1,TBL,%l1 ! (4_0) addr = (char*)arr + hx;
+ add %l6,stridey,%i0 ! px += stridey
+ stx %o7,[%fp+tmp6] ! (4_0) dlexp = *(double*)lexp;
+ faddd %f40,%f34,%f8 ! (5_1) res += dexp_lo;
+
+ fmuld %f44,%f28,%f40 ! (3_0) xx *= dtmp0;
+ nop
+ ldd [%l1],%f44 ! (4_0) dtmp0 = ((double*)addr)[0];
+ faddd %f30,K5,%f62 ! (2_0) res += K5;
+
+ fmuld %f6,%f38,%f34 ! (1_0) res *= xx;
+ sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
+ ldd [%l4],%f30 ! (6_1) dexp_hi = ((double*)addr)[0];
+ faddd %f60,K2,%f60 ! (0_0) res += K2;
+
+ for %f16,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
+ sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
+ lda [%i1]%asi,%f6 ! (6_0) ((float*)res)[0] = ((float*)px)[0];
+ fmuld %f58,%f26,%f26 ! (6_1) res *= xx;
+
+ fmuld %f44,%f44,%f44 ! (4_0) dtmp0 = dexp_hi * dexp_hi;
+ cmp %g1,_0x7ff00000 ! (5_0) hx ? 0x7ff00000
+ lda [%i1+4]%asi,%f7 ! (6_0) ((float*)res)[1] = ((float*)px)[1];
+ faddd %f8,%f22,%f22 ! (5_1) res += dexp_hi;
+
+ fand %f18,DC3,%f8 ! (4_0) res_c = vis_fand(res_c,DC3);
+ bge,pn %icc,.update24 ! (5_0) if ( hx >= 0x7ff00000 )
+ ldd [%fp+tmp0],%f18 ! (5_1) dlexp = *(double*)lexp;
+ fmuld %f62,%f36,%f62 ! (2_0) res *= xx;
+.cont24:
+ fmuld %f60,%f32,%f58 ! (0_0) res *= xx;
+ sra %g1,10,%o2 ! (5_0) hx >>= 10;
+ cmp %g1,_0x00100000 ! (5_0) hx ? 0x00100000
+ faddd %f34,K3,%f34 ! (1_0) res += K3;
+
+ fmuld %f30,%f26,%f26 ! (6_1) res = dexp_hi * res;
+ bl,pn %icc,.update25 ! (5_0) if ( hx < 0x00100000 )
+ and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
+ fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
+.cont25:
+ fmuld %f22,%f18,%f2 ! (5_1) res *= dlexp;
+ subcc counter,7,counter ! counter -= 7;
+ ldd [%l4+8],%f60 ! (6_1) dexp_lo = ((double*)addr)[1];
+ fpadd32 %f28,DC2,%f18 ! (5_0) res_c = vis_fpadd32(res,DC2);
+
+ fmuld K6,%f40,%f22 ! (3_0) res = K6 * xx;
+ add %o2,8,%i3 ! (5_0) hx += 8;
+ st %f2,[%l6] ! (5_1) ((float*)py)[0] = ((float*)res)[0];
+ fsubd %f24,%f8,%f10 ! (4_0) xx = res - res_c;
+
+ fmuld %f34,%f38,%f24 ! (1_0) res *= xx;
+ st %f3,[%l6+4] ! (5_1) ((float*)py)[1] = ((float*)res)[1];
+ bpos,pt %icc,.main_loop
+ faddd %f62,K4,%f34 ! (2_0) res += K4;
+
+ add counter,7,counter
+.tail:
+ add %o7,1534,%o7 ! (5_0) iexp += 0x5fe;
+ subcc counter,1,counter
+ bneg,a .begin
+ mov %i0,%o4
+
+ faddd %f58,K1,%f58 ! (0_1) res += K1;
+
+ faddd %f26,%f60,%f8 ! (6_2) res += dexp_lo;
+
+ faddd %f22,K5,%f62 ! (3_1) res += K5;
+ fmuld %f10,%f44,%f60 ! (4_1) xx *= dtmp0;
+
+ faddd %f24,K2,%f26 ! (1_1) res += K2;
+ add %i1,stridex,%l6 ! px += stridex
+ ldd [%l2],%f24 ! (0_1) dexp_hi = ((double*)addr)[0];
+ fmuld %f34,%f36,%f34 ! (2_1) res *= xx;
+
+ fmuld %f58,%f32,%f58 ! (0_1) res *= xx;
+
+ add %i0,stridey,%i1 ! px += stridey
+ faddd %f8,%f30,%f30 ! (6_2) res += dexp_hi;
+
+ fmuld %f62,%f40,%f32 ! (3_1) res *= xx;
+ ldd [%fp+tmp1],%f62 ! (6_2) dlexp = *(double*)lexp;
+
+ fmuld %f26,%f38,%f26 ! (1_1) res *= xx;
+ faddd %f34,K3,%f34 ! (2_1) res += K3;
+
+ fmuld %f24,%f58,%f58 ! (0_1) res = dexp_hi * res;
+
+ fmuld %f30,%f62,%f2 ! (6_2) res *= dlexp;
+ ldd [%l2+8],%f30 ! (0_1) dexp_lo = ((double*)addr)[1];
+
+ fmuld K6,%f60,%f62 ! (4_1) res = K6 * xx;
+ st %f2,[%i0] ! (6_2) ((float*)py)[0] = ((float*)res)[0];
+
+ fmuld %f34,%f36,%f28 ! (2_1) res *= xx;
+ st %f3,[%i0+4] ! (6_2) ((float*)py)[1] = ((float*)res)[1];
+ faddd %f32,K4,%f32 ! (3_1) res += K4;
+
+ subcc counter,1,counter
+ bneg,a .begin
+ mov %i1,%o4
+
+ faddd %f26,K1,%f26 ! (1_1) res += K1;
+
+ faddd %f58,%f30,%f8 ! (0_1) res += dexp_lo;
+
+ add %l6,stridex,%l6 ! px += stridex
+ faddd %f62,K5,%f62 ! (4_1) res += K5;
+
+ fmuld %f32,%f40,%f34 ! (3_1) res *= xx;
+ add %i1,stridey,%i0 ! px += stridey
+ ldd [%i2],%f22 ! (1_1) dexp_hi = ((double*)addr)[0];
+ faddd %f28,K2,%f32 ! (2_1) res += K2;
+
+ fmuld %f26,%f38,%f26 ! (1_1) res *= xx;
+
+ faddd %f8,%f24,%f24 ! (0_1) res += dexp_hi;
+
+ fmuld %f62,%f60,%f38 ! (4_1) res *= xx;
+ ldd [%fp+tmp2],%f62 ! (0_1) dlexp = *(double*)lexp;
+
+ fmuld %f32,%f36,%f32 ! (2_1) res *= xx;
+ faddd %f34,K3,%f34 ! (3_1) res += K3;
+
+ fmuld %f22,%f26,%f26 ! (1_1) res = dexp_hi * res;
+
+ fmuld %f24,%f62,%f2 ! (0_1) res *= dlexp;
+ ldd [%i2+8],%f24 ! (1_1) dexp_lo = ((double*)addr)[1];
+
+ st %f2,[%i1] ! (0_1) ((float*)py)[0] = ((float*)res)[0];
+
+ fmuld %f34,%f40,%f44 ! (3_1) res *= xx;
+ st %f3,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)res)[1];
+ faddd %f38,K4,%f38 ! (4_1) res += K4;
+
+ subcc counter,1,counter
+ bneg,a .begin
+ mov %i0,%o4
+
+ faddd %f32,K1,%f32 ! (2_1) res += K1;
+
+ add %l6,stridex,%l6 ! px += stridex
+ faddd %f26,%f24,%f8 ! (1_1) res += dexp_lo;
+
+ add %i0,stridey,%i1 ! px += stridey
+
+ fmuld %f38,%f60,%f34 ! (4_1) res *= xx;
+ ldd [%i4],%f24 ! (2_1) dexp_hi = ((double*)addr)[0];
+ faddd %f44,K2,%f38 ! (3_1) res += K2;
+
+ fmuld %f32,%f36,%f32 ! (2_1) res *= xx;
+
+ faddd %f8,%f22,%f22 ! (1_1) res += dexp_hi;
+
+ ldd [%fp+tmp3],%f62 ! (1_1) dlexp = *(double*)lexp;
+
+ fmuld %f38,%f40,%f38 ! (3_1) res *= xx;
+ faddd %f34,K3,%f34 ! (4_1) res += K3;
+
+ fmuld %f24,%f32,%f32 ! (2_1) res = dexp_hi * res;
+
+ fmuld %f22,%f62,%f2 ! (1_1) res *= dlexp;
+ ldd [%i4+8],%f22 ! (2_1) dexp_lo = ((double*)addr)[1];
+
+ st %f2,[%i0] ! (1_1) ((float*)py)[0] = ((float*)res)[0];
+
+ fmuld %f34,%f60,%f28 ! (4_1) res *= xx;
+ st %f3,[%i0+4] ! (1_1) ((float*)py)[1] = ((float*)res)[1];
+
+ subcc counter,1,counter
+ bneg,a .begin
+ mov %i1,%o4
+
+ faddd %f38,K1,%f38 ! (3_1) res += K1;
+
+ faddd %f32,%f22,%f8 ! (2_1) res += dexp_lo;
+
+ add %l6,stridex,%l6 ! px += stridex
+
+ add %i1,stridey,%i0 ! px += stridey
+ ldd [%i5],%f22 ! (3_1) dexp_hi = ((double*)addr)[0];
+ faddd %f28,K2,%f36 ! (4_1) res += K2;
+
+ fmuld %f38,%f40,%f38 ! (3_1) res *= xx;
+
+ faddd %f8,%f24,%f24 ! (2_1) res += dexp_hi;
+
+ ldd [%fp+tmp4],%f62 ! (2_1) dlexp = *(double*)lexp;
+
+ fmuld %f36,%f60,%f36 ! (4_1) res *= xx;
+
+ fmuld %f22,%f38,%f38 ! (3_1) res = dexp_hi * res;
+
+ fmuld %f24,%f62,%f2 ! (2_1) res *= dlexp;
+ ldd [%i5+8],%f24 ! (3_1) dexp_lo = ((double*)addr)[1];
+
+ st %f2,[%i1] ! (2_1) ((float*)py)[0] = ((float*)res)[0];
+
+ st %f3,[%i1+4] ! (2_1) ((float*)py)[1] = ((float*)res)[1];
+
+ subcc counter,1,counter
+ bneg,a .begin
+ mov %i0,%o4
+
+ faddd %f36,K1,%f36 ! (4_1) res += K1;
+
+ faddd %f38,%f24,%f8 ! (3_1) res += dexp_lo;
+
+ add %i0,stridey,%i1 ! px += stridey
+
+ add %l6,stridex,%l6 ! px += stridex
+ ldd [%l1],%f30 ! (4_1) dexp_hi = ((double*)addr)[0];
+
+ fmuld %f36,%f60,%f36 ! (4_1) res *= xx;
+
+ faddd %f8,%f22,%f8 ! (3_1) res += dexp_hi;
+
+ ldd [%fp+tmp5],%f62 ! (3_1) dlexp = *(double*)lexp;
+
+ fmuld %f30,%f36,%f36 ! (4_1) res = dexp_hi * res;
+
+ fmuld %f8,%f62,%f8 ! (3_1) res *= dlexp;
+ ldd [%l1+8],%f34 ! (4_1) dexp_lo = ((double*)addr)[1];
+
+ st %f8,[%i0] ! (3_1) ((float*)py)[0] = ((float*)res)[0];
+
+ st %f9,[%i0+4] ! (3_1) ((float*)py)[1] = ((float*)res)[1];
+
+ subcc counter,1,counter
+ bneg,a .begin
+ mov %i1,%o4
+
+ faddd %f36,%f34,%f8 ! (4_1) res += dexp_lo;
+
+ add %l6,stridex,%i0 ! px += stridex
+
+ add %i1,stridey,%l6 ! px += stridey
+
+ faddd %f8,%f30,%f30 ! (4_1) res += dexp_hi;
+
+ ldd [%fp+tmp6],%f18 ! (4_1) dlexp = *(double*)lexp;
+
+ fmuld %f30,%f18,%f6 ! (4_1) res *= dlexp;
+
+ st %f6,[%i1] ! (4_1) ((float*)py)[0] = ((float*)res)[0];
+
+ st %f7,[%i1+4] ! (4_1) ((float*)py)[1] = ((float*)res)[1];
+
+ ba .begin
+ add %i1,stridey,%o4
+
+ .align 16
+.spec0:
+ fdivd DONE,%f0,%f0 ! res = DONE / res;
+ add %i1,stridex,%i1 ! px += stridex
+ st %f0,[%o4] ! ((float*)py)[0] = ((float*)&res)[0];
+ st %f1,[%o4+4] ! ((float*)py)[1] = ((float*)&res)[1];
+ add %o4,stridey,%o4 ! py += stridey
+ ba .begin1
+ sub counter,1,counter
+
+ .align 16
+.spec1:
+ orcc %i2,%l4,%g0
+ bz,a 2f
+ fdivd DONE,%f0,%f0 ! res = DONE / res;
+
+ cmp %g1,0
+ bl,a 2f
+ fsqrtd %f0,%f0 ! res = sqrt(res);
+
+ cmp %g1,%i4
+ bge,a 1f
+ ldd [%o3+0x50],%f18
+
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ st %f0,[%fp+tmp0]
+
+ fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp0],%g1
+
+ sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
+ for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
+
+ sra %g1,10,%o2 ! (6_1) hx >>= 10;
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
+ ba .cont_spec
+ sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
+
+1:
+ fand %f0,%f18,%f0 ! res = vis_fand(res,DC4);
+
+ ldd [%o3+0x58],%f28
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+
+ faddd %f0,%f28,%f0 ! res += D2ON51;
+ st %f0,[%fp+tmp0]
+
+ fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp0],%g1
+
+ sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
+ for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
+
+ sra %g1,10,%o2 ! (6_1) hx >>= 10;
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
+ ba .cont_spec
+ sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
+
+2:
+ add %i1,stridex,%i1 ! px += stridex
+ st %f0,[%o4] ! ((float*)py)[0] = ((float*)&res)[0];
+ st %f1,[%o4+4] ! ((float*)py)[1] = ((float*)&res)[1];
+ add %o4,stridey,%o4 ! py += stridey
+ ba .begin1
+ sub counter,1,counter
+
+ .align 16
+.update0:
+ cmp counter,1
+ ble .cont0
+ nop
+
+ sub %l6,stridex,tmp_px
+ sub counter,1,tmp_counter
+
+ ba .cont0
+ mov 1,counter
+
+ .align 16
+.update1:
+ cmp counter,1
+ ble .cont1
+ sub %l6,stridex,%i1
+
+ ld [%i1+4],%i2
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%i2,%g0
+ bz 1f
+ sethi %hi(0x00080000),%i3
+
+ cmp %g1,%i3
+ bge,a 2f
+ ldd [%o3+0x50],%f18
+
+ fxtod %f8,%f8 ! res = *(long long*)&res;
+ st %f8,[%fp+tmp7]
+
+ fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (0_0) hx >>= 10;
+ for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
+
+ sub %o7,537,%o7
+
+ sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
+
+ and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
+ ba .cont1
+ add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
+2:
+ fand %f8,%f18,%f8
+ fxtod %f8,%f8 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f18
+ faddd %f8,%f18,%f8
+ st %f8,[%fp+tmp7]
+
+ fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (0_0) hx >>= 10;
+ for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
+
+ sub %o7,537,%o7
+
+ sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
+
+ and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
+ ba .cont1
+ add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
+1:
+ sub %l6,stridex,tmp_px
+ sub counter,1,tmp_counter
+
+ ba .cont1
+ mov 1,counter
+
+ .align 16
+.update2:
+ cmp counter,2
+ ble .cont2
+ nop
+
+ sub %l6,stridex,tmp_px
+ sub counter,2,tmp_counter
+
+ ba .cont2
+ mov 2,counter
+
+ .align 16
+.update3:
+ cmp counter,2
+ ble .cont3
+ sub %l6,stridex,%i1
+
+ ld [%i1+4],%i2
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%i2,%g0
+ bz 1f
+ sethi %hi(0x00080000),%i3
+
+ cmp %g1,%i3
+ bge,a 2f
+ ldd [%o3+0x50],%f18
+
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
+ for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
+
+ sra %g1,10,%o2 ! (1_0) hx >>= 10;
+ sub %o7,537,%o7
+ ba .cont3
+ and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
+2:
+ fand %f0,%f18,%f0
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f18
+ faddd %f0,%f18,%f0
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
+ for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
+
+ sra %g1,10,%o2 ! (1_0) hx >>= 10;
+ sub %o7,537,%o7
+ ba .cont3
+ and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
+1:
+ sub %l6,stridex,tmp_px
+ sub counter,2,tmp_counter
+
+ ba .cont3
+ mov 2,counter
+
+ .align 16
+.update4:
+ cmp counter,3
+ ble .cont4
+ nop
+
+ sub %l6,stridex,tmp_px
+ sub counter,3,tmp_counter
+
+ ba .cont4
+ mov 3,counter
+
+ .align 16
+.update5:
+ cmp counter,3
+ ble .cont5
+ sub %l6,stridex,%i1
+
+ ld [%i1+4],%i3
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%i3,%g0
+ bz 1f
+ sethi %hi(0x00080000),%i4
+
+ cmp %g1,%i4
+ bge,a 2f
+ ldd [%o3+0x50],%f18
+
+ fxtod %f6,%f6 ! res = *(long long*)&res;
+ st %f6,[%fp+tmp7]
+
+ fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (2_0) hx >>= 10;
+
+ sub %o7,537,%o7
+ and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
+ ba .cont5
+ for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
+2:
+ fand %f6,%f18,%f6
+ fxtod %f6,%f6 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f18
+ faddd %f6,%f18,%f6
+ st %f6,[%fp+tmp7]
+
+ fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (2_0) hx >>= 10;
+
+ sub %o7,537,%o7
+ and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
+ ba .cont5
+ for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
+1:
+ sub %l6,stridex,tmp_px
+ sub counter,3,tmp_counter
+
+ ba .cont5
+ mov 3,counter
+
+ .align 16
+.update6:
+ cmp counter,4
+ ble .cont6
+ nop
+
+ sub %l6,stridex,tmp_px
+ sub counter,4,tmp_counter
+
+ ba .cont6
+ mov 4,counter
+
+ .align 16
+.update7:
+ sub %l6,stridex,%i1
+ cmp counter,4
+ ble .cont7
+ faddd %f34,K3,%f6 ! (6_1) res += K3;
+
+ ld [%i1+4],%i3
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%i3,%g0
+ bz 1f
+ sethi %hi(0x00080000),%i5
+
+ cmp %g1,%i5
+ bge,a 2f
+ ldd [%o3+0x50],%f18
+
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (3_0) hx >>= 10;
+
+ sub %o7,537,%o7
+ and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
+ ba .cont7
+ for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
+2:
+ fand %f0,%f18,%f0
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f18
+ faddd %f0,%f18,%f0
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (3_0) hx >>= 10;
+
+ sub %o7,537,%o7
+ and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
+ ba .cont7
+ for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
+1:
+ sub %l6,stridex,tmp_px
+ sub counter,4,tmp_counter
+
+ ba .cont7
+ mov 4,counter
+
+ .align 16
+.update8:
+ cmp counter,5
+ ble .cont8
+ nop
+
+ mov %l6,tmp_px
+ sub counter,5,tmp_counter
+
+ ba .cont8
+ mov 5,counter
+
+ .align 16
+.update9:
+ ld [%l6+4],%i3
+ cmp counter,5
+ ble .cont9
+ fand %f0,DC0,%f16 ! (5_0) res = vis_fand(res,DC0);
+
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%i3,%g0
+ bz 1f
+ sethi %hi(0x00080000),%i1
+
+ cmp %g1,%i1
+ bge,a 2f
+ ldd [%o3+0x50],%f18
+
+ fxtod %f8,%f8 ! res = *(long long*)&res;
+ st %f8,[%fp+tmp7]
+
+ fand %f8,DC0,%f24 ! (4_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (4_0) hx >>= 10;
+
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
+ sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
+ ba .cont9
+ for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
+2:
+ fand %f8,%f18,%f8
+ fxtod %f8,%f8 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f18
+ faddd %f8,%f18,%f8
+ st %f8,[%fp+tmp7]
+
+ fand %f8,DC0,%f24 ! (4_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (4_0) hx >>= 10;
+
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
+ sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
+ ba .cont9
+ for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
+1:
+ mov %l6,tmp_px
+ sub counter,5,tmp_counter
+
+ ba .cont9
+ mov 5,counter
+
+ .align 16
+.update10:
+ cmp counter,6
+ ble .cont10
+ nop
+
+ mov %i0,tmp_px
+ sub counter,6,tmp_counter
+
+ ba .cont10
+ mov 6,counter
+
+ .align 16
+.update11:
+ ld [%i0+4],%i3
+ cmp counter,6
+ ble .cont11
+ fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
+
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%i3,%g0
+ bz 1f
+ sethi %hi(0x00080000),%i3
+
+ cmp %g1,%i3
+ bge,a 2f
+ ldd [%o3+0x50],%f18
+
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f28 ! (5_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (5_0) hx >>= 10;
+
+ sub %o7,537,%o7
+
+ sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
+
+ and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
+ ba .cont11
+ for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
+2:
+ fand %f0,%f18,%f0
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f18
+ faddd %f0,%f18,%f0
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f28 ! (5_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (5_0) hx >>= 10;
+
+ sub %o7,537,%o7
+
+ sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
+
+ and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
+ ba .cont11
+ for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
+1:
+ mov %i0,tmp_px
+ sub counter,6,tmp_counter
+
+ ba .cont11
+ mov 6,counter
+
+ .align 16
+.update12:
+ cmp counter,0
+ ble .cont12
+ faddd %f34,K3,%f34 ! (2_1) res += K3;
+
+ sub %l6,stridex,tmp_px
+ sub counter,0,tmp_counter
+
+ ba .cont12
+ mov 0,counter
+
+ .align 16
+.update13:
+ sub %l6,stridex,%l4
+ cmp counter,0
+ ble .cont13
+ fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
+
+ ld [%l4+4],%l4
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%l4,%g0
+ bz 1f
+ sethi %hi(0x00080000),%l4
+
+ cmp %g1,%l4
+ bge,a 2f
+ ldd [%o3+0x50],%f62
+
+ fxtod %f6,%f6 ! res = *(long long*)&res;
+ st %f6,[%fp+tmp7]
+
+ fand %f6,DC0,%f44 ! (6_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (6_1) hx >>= 10;
+
+ sub %o7,537,%o7
+ and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
+ for %f44,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
+
+ sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
+ ba .cont13
+ fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
+2:
+ fand %f6,%f62,%f6
+ fxtod %f6,%f6 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f62
+ faddd %f6,%f62,%f6
+ st %f6,[%fp+tmp7]
+
+ fand %f6,DC0,%f44 ! (6_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (6_1) hx >>= 10;
+ for %f44,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
+
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
+ sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
+ ba .cont13
+ fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
+1:
+ sub %l6,stridex,tmp_px
+ sub counter,0,tmp_counter
+
+ ba .cont13
+ mov 0,counter
+
+ .align 16
+.update14:
+ cmp counter,1
+ ble .cont14
+ faddd %f34,K3,%f34 ! (3_1) res += K3;
+
+ sub %l6,stridex,tmp_px
+ sub counter,1,tmp_counter
+
+ ba .cont14
+ mov 1,counter
+
+ .align 16
+.update15:
+ sub %l6,stridex,%l2
+ cmp counter,1
+ ble .cont15
+ fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
+
+ ld [%l2+4],%l2
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%l2,%g0
+ bz 1f
+ sethi %hi(0x00080000),%l2
+
+ cmp %g1,%l2
+ bge,a 2f
+ ldd [%o3+0x50],%f62
+
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f18 ! (0_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (0_0) hx >>= 10;
+
+ sub %o7,537,%o7
+ for %f18,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
+
+ sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
+
+ and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
+ add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
+ ba .cont15
+ fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
+2:
+ fand %f0,%f62,%f0
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f62
+ faddd %f0,%f62,%f0
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f18 ! (0_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (0_0) hx >>= 10;
+ for %f18,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
+
+ sub %o7,537,%o7
+
+ sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
+
+ and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
+ add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
+ ba .cont15
+ fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
+1:
+ sub %l6,stridex,tmp_px
+ sub counter,1,tmp_counter
+
+ ba .cont15
+ mov 1,counter
+
+ .align 16
+.update16:
+ cmp counter,2
+ ble .cont16
+ fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3);
+
+ sub %l6,stridex,tmp_px
+ sub counter,2,tmp_counter
+
+ ba .cont16
+ mov 2,counter
+
+ .align 16
+.update17:
+ sub %l6,stridex,%i2
+ cmp counter,2
+ ble .cont17
+ fand %f0,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
+
+ ld [%i2+4],%i2
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%i2,%g0
+ bz 1f
+ sethi %hi(0x00080000),%i2
+
+ cmp %g1,%i2
+ bge,a 2f
+ ldd [%o3+0x50],%f2
+
+ fxtod %f6,%f6 ! res = *(long long*)&res;
+ st %f6,[%fp+tmp7]
+
+ fand %f6,DC0,%f44 ! (1_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (1_0) hx >>= 10;
+
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
+ sub %g0,%o7,%o7 ! (1_0) iexp = -iexp;
+ ba .cont17
+ for %f44,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
+2:
+ fand %f6,%f2,%f6
+ fxtod %f6,%f6 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f2
+ faddd %f6,%f2,%f6
+ st %f6,[%fp+tmp7]
+
+ fand %f6,DC0,%f44 ! (1_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (1_0) hx >>= 10;
+
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
+ sub %g0,%o7,%o7 ! (1_0) iexp = -iexp;
+ ba .cont17
+ for %f44,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
+1:
+ sub %l6,stridex,tmp_px
+ sub counter,2,tmp_counter
+
+ ba .cont17
+ mov 2,counter
+
+ .align 16
+.update18:
+ cmp counter,3
+ ble .cont18
+ fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3);
+
+ sub %l6,stridex,tmp_px
+ sub counter,3,tmp_counter
+
+ ba .cont18
+ mov 3,counter
+
+ .align 16
+.update19:
+ sub %l6,stridex,%i4
+ cmp counter,3
+ ble .cont19
+ fand %f6,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
+
+ ld [%i4+4],%i4
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%i4,%g0
+ bz 1f
+ sethi %hi(0x00080000),%i4
+
+ cmp %g1,%i4
+ bge,a 2f
+ ldd [%o3+0x50],%f2
+
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f28 ! (2_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
+
+ sra %g1,10,%o2 ! (2_0) hx >>= 10;
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
+ sub %g0,%o7,%o7 ! (2_0) iexp = -iexp;
+ ba .cont19
+ for %f28,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
+2:
+ fand %f0,%f2,%f0
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f2
+ faddd %f0,%f2,%f0
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f28 ! (2_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
+
+ sra %g1,10,%o2 ! (2_0) hx >>= 10;
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
+ sub %g0,%o7,%o7 ! (2_0) iexp = -iexp;
+ ba .cont19
+ for %f28,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
+1:
+ sub %l6,stridex,tmp_px
+ sub counter,3,tmp_counter
+
+ ba .cont19
+ mov 3,counter
+
+ .align 16
+.update20:
+ cmp counter,4
+ ble .cont20
+ fand %f18,DC3,%f4 ! (2_0) res_c = vis_fand(res_c,DC3);
+
+ sub %l6,stridex,tmp_px
+ sub counter,4,tmp_counter
+
+ ba .cont20
+ mov 4,counter
+
+ .align 16
+.update21:
+ sub %l6,stridex,%i5
+ cmp counter,4
+ ble .cont21
+ fand %f0,DC0,%f16 ! (4_0) res = vis_fand(res,DC0);
+
+ ld [%i5+4],%i5
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%i5,%g0
+ bz 1f
+ sethi %hi(0x00080000),%i5
+
+ cmp %g1,%i5
+ bge,a 2f
+ ldd [%o3+0x50],%f34
+
+ fxtod %f6,%f6 ! res = *(long long*)&res;
+ st %f6,[%fp+tmp7]
+
+ fand %f6,DC0,%f44 ! (3_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (3_0) hx >>= 10;
+
+ sub %o7,537,%o7
+ and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
+
+ sub %g0,%o7,%o7 ! (3_0) iexp = -iexp;
+ ba .cont21
+ for %f44,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
+2:
+ fand %f6,%f34,%f6
+ fxtod %f6,%f6 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f34
+ faddd %f6,%f34,%f6
+ st %f6,[%fp+tmp7]
+
+ fand %f6,DC0,%f44 ! (3_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
+ sra %g1,10,%o2 ! (3_0) hx >>= 10;
+
+ sub %o7,537,%o7
+ and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
+
+ sub %g0,%o7,%o7 ! (3_0) iexp = -iexp;
+ ba .cont21
+ for %f44,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
+1:
+ sub %l6,stridex,tmp_px
+ sub counter,4,tmp_counter
+
+ ba .cont21
+ mov 4,counter
+
+ .align 16
+.update22:
+ cmp counter,5
+ ble .cont22
+ fmuld %f62,%f38,%f62 ! (1_0) res *= xx;
+
+ sub %i0,stridex,tmp_px
+ sub counter,5,tmp_counter
+
+ ba .cont22
+ mov 5,counter
+
+ .align 16
+.update23:
+ sub %i0,stridex,%l1
+ cmp counter,5
+ ble .cont23
+ fand %f6,DC0,%f16 ! (5_0) res = vis_fand(res,DC0);
+
+ ld [%l1+4],%l1
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%l1,%g0
+ bz 1f
+ sethi %hi(0x00080000),%l1
+
+ cmp %g1,%l1
+ bge,a 2f
+ ldd [%o3+0x50],%f34
+
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f24 ! (4_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
+
+ sra %g1,10,%o2 ! (4_0) hx >>= 10;
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
+ sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
+ ba .cont23
+ for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
+2:
+ fand %f0,%f34,%f0
+ fxtod %f0,%f0 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f34
+ faddd %f0,%f34,%f0
+ st %f0,[%fp+tmp7]
+
+ fand %f0,DC0,%f24 ! (4_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
+
+ sra %g1,10,%o2 ! (4_0) hx >>= 10;
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
+ sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
+ ba .cont23
+ for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
+1:
+ sub %i0,stridex,tmp_px
+ sub counter,5,tmp_counter
+
+ ba .cont23
+ mov 5,counter
+
+ .align 16
+.update24:
+ cmp counter,6
+ ble .cont24
+ fmuld %f62,%f36,%f62 ! (2_0) res *= xx;
+
+ sub %i1,stridex,tmp_px
+ sub counter,6,tmp_counter
+
+ ba .cont24
+ mov 6,counter
+
+ .align 16
+.update25:
+ sub %i1,stridex,%i3
+ cmp counter,6
+ ble .cont25
+ fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
+
+ ld [%i3+4],%i3
+ cmp %g1,0
+ bl 1f
+
+ orcc %g1,%i3,%g0
+ bz 1f
+ nop
+
+ sub %i1,stridex,%i3
+ ld [%i3],%f10
+ ld [%i3+4],%f11
+
+ sethi %hi(0x00080000),%i3
+
+ cmp %g1,%i3
+ bge,a 2f
+ ldd [%o3+0x50],%f60
+
+ fxtod %f10,%f10 ! res = *(long long*)&res;
+ st %f10,[%fp+tmp7]
+
+ fand %f10,DC0,%f28 ! (5_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
+
+ sra %g1,10,%o2 ! (5_0) hx >>= 10;
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
+ sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
+
+ ba .cont25
+ for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
+2:
+ fand %f10,%f60,%f10
+ fxtod %f10,%f10 ! res = *(long long*)&res;
+ ldd [%o3+0x58],%f60
+ faddd %f10,%f60,%f10
+ st %f10,[%fp+tmp7]
+
+ fand %f10,DC0,%f28 ! (5_0) res = vis_fand(res,DC0);
+ ld [%fp+tmp7],%g1
+
+ sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
+
+ sra %g1,10,%o2 ! (5_0) hx >>= 10;
+ sub %o7,537,%o7
+
+ and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
+ sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
+
+ ba .cont25
+ for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
+1:
+ sub %i1,stridex,tmp_px
+ sub counter,6,tmp_counter
+
+ ba .cont25
+ mov 6,counter
+
+.exit:
+ ret
+ restore
+ SET_SIZE(__vrsqrt)
+