summaryrefslogtreecommitdiff
path: root/usr/src/libm/src/mvec/vis/__vrhypot.S
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/libm/src/mvec/vis/__vrhypot.S')
-rw-r--r--usr/src/libm/src/mvec/vis/__vrhypot.S3878
1 files changed, 3878 insertions, 0 deletions
diff --git a/usr/src/libm/src/mvec/vis/__vrhypot.S b/usr/src/libm/src/mvec/vis/__vrhypot.S
new file mode 100644
index 0000000..07954d6
--- /dev/null
+++ b/usr/src/libm/src/mvec/vis/__vrhypot.S
@@ -0,0 +1,3878 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+ .ident "@(#)__vrhypot.S 1.7 06/01/23 SMI"
+
+ .file "__vrhypot.S"
+
+#include "libm.h"
+
+ RO_DATA
+ .align 64
+
+.CONST_TBL:
+ .word 0x7fe00000, 0x7fdfc07f, 0x7fdf81f8, 0x7fdf4465,
+ .word 0x7fdf07c1, 0x7fdecc07, 0x7fde9131, 0x7fde573a,
+ .word 0x7fde1e1e, 0x7fdde5d6, 0x7fddae60, 0x7fdd77b6,
+ .word 0x7fdd41d4, 0x7fdd0cb5, 0x7fdcd856, 0x7fdca4b3,
+ .word 0x7fdc71c7, 0x7fdc3f8f, 0x7fdc0e07, 0x7fdbdd2b,
+ .word 0x7fdbacf9, 0x7fdb7d6c, 0x7fdb4e81, 0x7fdb2036,
+ .word 0x7fdaf286, 0x7fdac570, 0x7fda98ef, 0x7fda6d01,
+ .word 0x7fda41a4, 0x7fda16d3, 0x7fd9ec8e, 0x7fd9c2d1,
+ .word 0x7fd99999, 0x7fd970e4, 0x7fd948b0, 0x7fd920fb,
+ .word 0x7fd8f9c1, 0x7fd8d301, 0x7fd8acb9, 0x7fd886e5,
+ .word 0x7fd86186, 0x7fd83c97, 0x7fd81818, 0x7fd7f405,
+ .word 0x7fd7d05f, 0x7fd7ad22, 0x7fd78a4c, 0x7fd767dc,
+ .word 0x7fd745d1, 0x7fd72428, 0x7fd702e0, 0x7fd6e1f7,
+ .word 0x7fd6c16c, 0x7fd6a13c, 0x7fd68168, 0x7fd661ec,
+ .word 0x7fd642c8, 0x7fd623fa, 0x7fd60581, 0x7fd5e75b,
+ .word 0x7fd5c988, 0x7fd5ac05, 0x7fd58ed2, 0x7fd571ed,
+ .word 0x7fd55555, 0x7fd53909, 0x7fd51d07, 0x7fd50150,
+ .word 0x7fd4e5e0, 0x7fd4cab8, 0x7fd4afd6, 0x7fd49539,
+ .word 0x7fd47ae1, 0x7fd460cb, 0x7fd446f8, 0x7fd42d66,
+ .word 0x7fd41414, 0x7fd3fb01, 0x7fd3e22c, 0x7fd3c995,
+ .word 0x7fd3b13b, 0x7fd3991c, 0x7fd38138, 0x7fd3698d,
+ .word 0x7fd3521c, 0x7fd33ae4, 0x7fd323e3, 0x7fd30d19,
+ .word 0x7fd2f684, 0x7fd2e025, 0x7fd2c9fb, 0x7fd2b404,
+ .word 0x7fd29e41, 0x7fd288b0, 0x7fd27350, 0x7fd25e22,
+ .word 0x7fd24924, 0x7fd23456, 0x7fd21fb7, 0x7fd20b47,
+ .word 0x7fd1f704, 0x7fd1e2ef, 0x7fd1cf06, 0x7fd1bb4a,
+ .word 0x7fd1a7b9, 0x7fd19453, 0x7fd18118, 0x7fd16e06,
+ .word 0x7fd15b1e, 0x7fd1485f, 0x7fd135c8, 0x7fd12358,
+ .word 0x7fd11111, 0x7fd0fef0, 0x7fd0ecf5, 0x7fd0db20,
+ .word 0x7fd0c971, 0x7fd0b7e6, 0x7fd0a681, 0x7fd0953f,
+ .word 0x7fd08421, 0x7fd07326, 0x7fd0624d, 0x7fd05197,
+ .word 0x7fd04104, 0x7fd03091, 0x7fd02040, 0x7fd01010,
+
+ .word 0x42300000, 0 ! D2ON36 = 2**36
+ .word 0xffffff00, 0 ! DA0
+ .word 0xfff00000, 0 ! DA1
+ .word 0x3ff00000, 0 ! DONE = 1.0
+ .word 0x40000000, 0 ! DTWO = 2.0
+ .word 0x7fd00000, 0 ! D2ON1022
+ .word 0x3cb00000, 0 ! D2ONM52
+ .word 0x43200000, 0 ! D2ON51
+ .word 0x0007ffff, 0xffffffff ! 0x0007ffffffffffff
+
+#define stridex %l2
+#define stridey %l3
+#define stridez %l5
+
+#define TBL_SHIFT 512
+
+#define TBL %l1
+#define counter %l4
+
+#define _0x7ff00000 %l0
+#define _0x00100000 %o5
+#define _0x7fffffff %l6
+
+#define D2ON36 %f4
+#define DTWO %f6
+#define DONE %f8
+#define DA0 %f58
+#define DA1 %f56
+
+#define dtmp0 STACK_BIAS-0x80
+#define dtmp1 STACK_BIAS-0x78
+#define dtmp2 STACK_BIAS-0x70
+#define dtmp3 STACK_BIAS-0x68
+#define dtmp4 STACK_BIAS-0x60
+#define dtmp5 STACK_BIAS-0x58
+#define dtmp6 STACK_BIAS-0x50
+#define dtmp7 STACK_BIAS-0x48
+#define dtmp8 STACK_BIAS-0x40
+#define dtmp9 STACK_BIAS-0x38
+#define dtmp10 STACK_BIAS-0x30
+#define dtmp11 STACK_BIAS-0x28
+#define dtmp12 STACK_BIAS-0x20
+#define dtmp13 STACK_BIAS-0x18
+#define dtmp14 STACK_BIAS-0x10
+#define dtmp15 STACK_BIAS-0x08
+
+#define ftmp0 STACK_BIAS-0x100
+#define tmp_px STACK_BIAS-0x98
+#define tmp_py STACK_BIAS-0x90
+#define tmp_counter STACK_BIAS-0x88
+
+! sizeof temp storage - must be a multiple of 16 for V9
+#define tmps 0x100
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+! !!!!! algorithm !!!!!
+! hx0 = *(int*)px;
+! hy0 = *(int*)py;
+!
+! ((float*)&x0)[0] = ((float*)px)[0];
+! ((float*)&x0)[1] = ((float*)px)[1];
+! ((float*)&y0)[0] = ((float*)py)[0];
+! ((float*)&y0)[1] = ((float*)py)[1];
+!
+! hx0 &= 0x7fffffff;
+! hy0 &= 0x7fffffff;
+!
+! diff0 = hy0 - hx0;
+! j0 = diff0 >> 31;
+! j0 &= diff0;
+! j0 = hy0 - j0;
+! j0 &= 0x7ff00000;
+!
+! j0 = 0x7ff00000 - j0;
+! ll = (long long)j0 << 32;
+! *(long long*)&scl0 = ll;
+!
+! if ( hx0 >= 0x7ff00000 || hy0 >= 0x7ff00000 )
+! {
+! lx = ((int*)px)[1];
+! ly = ((int*)py)[1];
+!
+! if ( hx0 == 0x7ff00000 && lx == 0 ) res0 = 0.0;
+! else if ( hy0 == 0x7ff00000 && ly == 0 ) res0 = 0.0;
+! else res0 = fabs(x0) * fabs(y0);
+!
+! ((float*)pz)[0] = ((float*)&res0)[0];
+! ((float*)pz)[1] = ((float*)&res0)[1];
+!
+! px += stridex;
+! py += stridey;
+! pz += stridez;
+! continue;
+! }
+! if ( hx0 < 0x00100000 && hy0 < 0x00100000 )
+! {
+! lx = ((int*)px)[1];
+! ly = ((int*)py)[1];
+! ii = hx0 | hy0;
+! ii |= lx;
+! ii |= ly;
+! if ( ii == 0 )
+! {
+! res0 = 1.0 / 0.0;
+! ((float*)pz)[0] = ((float*)&res0)[0];
+! ((float*)pz)[1] = ((float*)&res0)[1];
+!
+! px += stridex;
+! py += stridey;
+! pz += stridez;
+! continue;
+! }
+! x0 = fabs(x0);
+! y0 = fabs(y0);
+! if ( hx0 < 0x00080000 )
+! {
+! x0 = *(long long*)&x0;
+! }
+! else
+! {
+! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL;
+! x0 = vis_fand(x0, dtmp0);
+! x0 = *(long long*)&x0;
+! x0 += D2ON51;
+! }
+! x0 *= D2ONM52;
+! if ( hy0 < 0x00080000 )
+! {
+! y0 = *(long long*)&y0;
+! }
+! else
+! {
+! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL;
+! y0 = vis_fand(y0, dtmp0);
+! y0 = *(long long*)&y0;
+! y0 += D2ON51;
+! }
+! y0 *= D2ONM52;
+! *(long long*)&scl0 = 0x7fd0000000000000ULL;
+! }
+! else
+! {
+! x0 *= scl0;
+! y0 *= scl0;
+! }
+!
+! x_hi0 = x0 + D2ON36;
+! y_hi0 = y0 + D2ON36;
+! x_hi0 -= D2ON36;
+! y_hi0 -= D2ON36;
+! x_lo0 = x0 - x_hi0;
+! y_lo0 = y0 - y_hi0;
+! res0_hi = x_hi0 * x_hi0;
+! dtmp0 = y_hi0 * y_hi0;
+! res0_hi += dtmp0;
+! res0_lo = x0 + x_hi0;
+! res0_lo *= x_lo0;
+! dtmp1 = y0 + y_hi0;
+! dtmp1 *= y_lo0;
+! res0_lo += dtmp1;
+!
+! dres = res0_hi + res0_lo;
+! dexp0 = vis_fand(dres,DA1);
+! iarr = ((int*)&dres)[0];
+!
+! iarr >>= 11;
+! iarr &= 0x1fc;
+! dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+! dd = vis_fpsub32(dtmp0, dexp0);
+!
+! dtmp0 = dd * dres;
+! dtmp0 = DTWO - dtmp0;
+! dd *= dtmp0;
+! dtmp1 = dd * dres;
+! dtmp1 = DTWO - dtmp1;
+! dd *= dtmp1;
+! dtmp2 = dd * dres;
+! dtmp2 = DTWO - dtmp2;
+! dres = dd * dtmp2;
+!
+! res0 = vis_fand(dres,DA0);
+!
+! dtmp0 = res0_hi * res0;
+! dtmp0 = DONE - dtmp0;
+! dtmp1 = res0_lo * res0;
+! dtmp0 -= dtmp1;
+! dtmp0 *= dres;
+! res0 += dtmp0;
+!
+! res0 = sqrt ( res0 );
+!
+! res0 = scl0 * res0;
+!
+! ((float*)pz)[0] = ((float*)&res0)[0];
+! ((float*)pz)[1] = ((float*)&res0)[1];
+!
+! px += stridex;
+! py += stridey;
+! pz += stridez;
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ ENTRY(__vrhypot)
+ save %sp,-SA(MINFRAME)-tmps,%sp
+ PIC_SETUP(l7)
+ PIC_SET(l7,.CONST_TBL,l1)
+ wr %g0,0x82,%asi
+
+#ifdef __sparcv9
+ ldx [%fp+STACK_BIAS+176],stridez
+#else
+ ld [%fp+STACK_BIAS+92],stridez
+#endif
+
+ sll %i2,3,stridex
+ sethi %hi(0x7ff00000),_0x7ff00000
+ st %i0,[%fp+tmp_counter]
+
+ sll %i4,3,stridey
+ sethi %hi(0x00100000),_0x00100000
+ stx %i1,[%fp+tmp_px]
+
+ sll stridez,3,stridez
+ sethi %hi(0x7ffffc00),_0x7fffffff
+ stx %i3,[%fp+tmp_py]
+
+ ldd [TBL+TBL_SHIFT],D2ON36
+ add _0x7fffffff,1023,_0x7fffffff
+
+ ldd [TBL+TBL_SHIFT+8],DA0
+
+ ldd [TBL+TBL_SHIFT+16],DA1
+
+ ldd [TBL+TBL_SHIFT+24],DONE
+
+ ldd [TBL+TBL_SHIFT+32],DTWO
+
+.begin:
+ ld [%fp+tmp_counter],counter
+ ldx [%fp+tmp_px],%i4
+ ldx [%fp+tmp_py],%i3
+ st %g0,[%fp+tmp_counter]
+.begin1:
+ cmp counter,0
+ ble,pn %icc,.exit
+
+ lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px;
+ add %i4,stridex,%i1
+
+ lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py;
+ add %i3,stridey,%i0 ! py += stridey
+
+ and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff;
+
+ cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000
+ bge,pn %icc,.spec0 ! (7_0) if ( hx0 >= 0x7ff00000 )
+ and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff;
+
+ cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000
+ bge,pn %icc,.spec0 ! (7_0) if ( hy0 >= 0x7ff00000 )
+ sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0;
+
+ sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31;
+ cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000
+ bl,pn %icc,.spec1 ! (7_0) if ( hx0 < 0x00100000 )
+
+ and %o1,%o3,%o1 ! (7_0) j0 &= diff0;
+.cont_spec0:
+ sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0;
+
+ and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000;
+
+ sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0;
+
+ sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32;
+
+ stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll;
+
+ stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll;
+.cont_spec1:
+ lda [%i1]0x82,%o1 ! (0_0) hx0 = *(int*)px;
+ mov %i1,%i2
+
+ lda [%i0]0x82,%o4 ! (0_0) hy0 = *(int*)py;
+
+ and %o1,_0x7fffffff,%o7 ! (0_0) hx0 &= 0x7fffffff;
+ mov %i0,%o0
+
+ cmp %o7,_0x7ff00000 ! (0_0) hx0 ? 0x7ff00000
+ bge,pn %icc,.update0 ! (0_0) if ( hx0 >= 0x7ff00000 )
+ and %o4,_0x7fffffff,%l7 ! (0_0) hy0 &= 0x7fffffff;
+
+ cmp %l7,_0x7ff00000 ! (0_0) hy0 ? 0x7ff00000
+ sub %l7,%o7,%o1 ! (0_0) diff0 = hy0 - hx0;
+ bge,pn %icc,.update0 ! (0_0) if ( hy0 >= 0x7ff00000 )
+ sra %o1,31,%o3 ! (0_0) j0 = diff0 >> 31;
+
+ cmp %o7,_0x00100000 ! (0_0) hx0 ? 0x00100000
+
+ and %o1,%o3,%o1 ! (0_0) j0 &= diff0;
+ bl,pn %icc,.update1 ! (0_0) if ( hx0 < 0x00100000 )
+ sub %l7,%o1,%o4 ! (0_0) j0 = hy0 - j0;
+.cont0:
+ and %o4,%l0,%o4 ! (0_0) j0 &= 0x7ff00000;
+
+ sub %l0,%o4,%o4 ! (0_0) j0 = 0x7ff00000 - j0;
+.cont1:
+ sllx %o4,32,%o4 ! (0_0) ll = (long long)j0 << 32;
+ stx %o4,[%fp+dtmp1] ! (0_0) *(long long*)&scl0 = ll;
+
+ ldd [%fp+dtmp15],%f62 ! (7_1) *(long long*)&scl0 = ll;
+
+ lda [%i4]%asi,%f10 ! (7_1) ((float*)&x0)[0] = ((float*)px)[0];
+
+ lda [%i4+4]%asi,%f11 ! (7_1) ((float*)&x0)[1] = ((float*)px)[1];
+
+ lda [%i3]%asi,%f12 ! (7_1) ((float*)&y0)[0] = ((float*)py)[0];
+
+ add %i1,stridex,%i4 ! px += stridex
+ lda [%i3+4]%asi,%f13 ! (7_1) ((float*)&y0)[1] = ((float*)py)[1];
+
+ fmuld %f10,%f62,%f10 ! (7_1) x0 *= scl0;
+ add %i4,stridex,%i1 ! px += stridex
+
+ fmuld %f12,%f62,%f60 ! (7_1) y0 *= scl0;
+
+ lda [%i4]0x82,%o1 ! (1_0) hx0 = *(int*)px;
+
+ add %i0,stridey,%i3 ! py += stridey
+ faddd %f10,D2ON36,%f46 ! (7_1) x_hi0 = x0 + D2ON36;
+
+ lda [%i3]0x82,%g1 ! (1_0) hy0 = *(int*)py;
+ add %i3,stridey,%i0 ! py += stridey
+ faddd %f60,D2ON36,%f50 ! (7_1) y_hi0 = y0 + D2ON36;
+
+ and %o1,_0x7fffffff,%o7 ! (1_0) hx0 &= 0x7fffffff;
+
+ cmp %o7,_0x7ff00000 ! (1_0) hx0 ? 0x7ff00000
+ stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll;
+
+ and %g1,_0x7fffffff,%l7 ! (1_0) hy0 &= 0x7fffffff;
+ bge,pn %icc,.update2 ! (1_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f46,D2ON36,%f20 ! (7_1) x_hi0 -= D2ON36;
+
+ cmp %l7,_0x7ff00000 ! (1_0) hy0 ? 0x7ff00000
+ sub %l7,%o7,%o1 ! (1_0) diff0 = hy0 - hx0;
+ bge,pn %icc,.update3 ! (1_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36;
+
+ sra %o1,31,%o3 ! (1_0) j0 = diff0 >> 31;
+
+ and %o1,%o3,%o1 ! (1_0) j0 &= diff0;
+
+ fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0;
+ sub %l7,%o1,%o4 ! (1_0) j0 = hy0 - j0;
+ cmp %o7,_0x00100000 ! (1_0) hx0 ? 0x00100000
+ fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (1_0) j0 &= 0x7ff00000;
+ bl,pn %icc,.update4 ! (1_0) if ( hx0 < 0x00100000 )
+ faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0;
+
+ sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0;
+.cont4:
+ sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32;
+ stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0;
+
+ fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0;
+
+ fmuld %f62,%f0,%f0 ! (7_1) res0_lo *= x_lo0;
+ ldd [%fp+dtmp1],%f62 ! (0_0) *(long long*)&scl0 = ll;
+ faddd %f2,%f46,%f44 ! (7_1) res0_hi += dtmp0;
+
+ lda [%i2]%asi,%f10 ! (0_0) ((float*)&x0)[0] = ((float*)px)[0];
+
+ lda [%i2+4]%asi,%f11 ! (0_0) ((float*)&x0)[1] = ((float*)px)[1];
+
+ fmuld %f50,%f12,%f26 ! (7_1) dtmp1 *= y_lo0;
+ lda [%o0]%asi,%f12 ! (0_0) ((float*)&y0)[0] = ((float*)py)[0];
+
+ lda [%o0+4]%asi,%f13 ! (0_0) ((float*)&y0)[1] = ((float*)py)[1];
+
+ fmuld %f10,%f62,%f10 ! (0_0) x0 *= scl0;
+
+ fmuld %f12,%f62,%f60 ! (0_0) y0 *= scl0;
+ faddd %f0,%f26,%f38 ! (7_1) res0_lo += dtmp1;
+
+ lda [%i1]0x82,%o1 ! (2_0) hx0 = *(int*)px;
+ mov %i1,%i2
+
+ faddd %f10,D2ON36,%f46 ! (0_0) x_hi0 = x0 + D2ON36;
+
+ lda [%i0]0x82,%g1 ! (2_0) hy0 = *(int*)py;
+ mov %i0,%o0
+ faddd %f60,D2ON36,%f12 ! (0_0) y_hi0 = y0 + D2ON36;
+
+ faddd %f44,%f38,%f14 ! (7_1) dres = res0_hi + res0_lo;
+ and %o1,_0x7fffffff,%o7 ! (2_0) hx0 &= 0x7fffffff;
+
+ cmp %o7,_0x7ff00000 ! (2_0) hx0 ? 0x7ff00000
+ bge,pn %icc,.update5 ! (2_0) if ( hx0 >= 0x7ff00000 )
+ stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll;
+
+ and %g1,_0x7fffffff,%l7 ! (2_0) hx0 &= 0x7fffffff;
+ st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0];
+ fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (2_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (2_0) hy0 ? 0x7ff00000
+ bge,pn %icc,.update6 ! (2_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36;
+
+ sra %o1,31,%o3 ! (2_0) j0 = diff0 >> 31;
+
+ and %o1,%o3,%o1 ! (2_0) j0 &= diff0;
+
+ fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0;
+ cmp %o7,_0x00100000 ! (2_0) hx0 ? 0x00100000
+ sub %l7,%o1,%o4 ! (2_0) j0 = hy0 - j0;
+ fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (2_0) j0 &= 0x7ff00000;
+ bl,pn %icc,.update7 ! (2_0) if ( hx0 < 0x00100000 )
+ faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0;
+.cont7:
+ sub %l0,%o4,%g1 ! (2_0) j0 = 0x7ff00000 - j0;
+
+ sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32;
+.cont8:
+ stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0;
+
+ fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0;
+
+ fmuld %f62,%f0,%f0 ! (0_0) res0_lo *= x_lo0;
+ ldd [%fp+dtmp3],%f62 ! (1_0) *(long long*)&scl0 = ll;
+ faddd %f2,%f46,%f32 ! (0_0) res0_hi += dtmp0;
+
+ lda [%i4]%asi,%f10 ! (1_0) ((float*)&x0)[0] = ((float*)px)[0];
+
+ lda [%i4+4]%asi,%f11 ! (1_0) ((float*)&x0)[1] = ((float*)px)[1];
+
+ fmuld %f50,%f12,%f28 ! (0_0) dtmp1 *= y_lo0;
+ lda [%i3]%asi,%f12 ! (1_0) ((float*)&y0)[0] = ((float*)py)[0];
+
+ add %i1,stridex,%i4 ! px += stridex
+ lda [%i3+4]%asi,%f13 ! (1_0) ((float*)&y0)[1] = ((float*)py)[1];
+
+ ld [%fp+ftmp0],%o2 ! (7_1) iarr = ((int*)&dres)[0];
+ add %i4,stridex,%i1 ! px += stridex
+ fand %f14,DA1,%f2 ! (7_1) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (1_0) x0 *= scl0;
+
+ fmuld %f12,%f62,%f60 ! (1_0) y0 *= scl0;
+ sra %o2,11,%i3 ! (7_1) iarr >>= 11;
+ faddd %f0,%f28,%f36 ! (0_0) res0_lo += dtmp1;
+
+ and %i3,0x1fc,%i3 ! (7_1) iarr &= 0x1fc;
+
+ add %i3,TBL,%o4 ! (7_1) (char*)dll1 + iarr
+ lda [%i4]0x82,%o1 ! (3_0) hx0 = *(int*)px;
+
+ add %i0,stridey,%i3 ! py += stridey
+ ld [%o4],%f26 ! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f10,D2ON36,%f46 ! (1_0) x_hi0 = x0 + D2ON36;
+
+ lda [%i3]0x82,%o4 ! (3_0) hy0 = *(int*)py;
+ add %i3,stridey,%i0 ! py += stridey
+ faddd %f60,D2ON36,%f12 ! (1_0) y_hi0 = y0 + D2ON36;
+
+ faddd %f32,%f36,%f22 ! (0_0) dres = res0_hi + res0_lo;
+ and %o1,_0x7fffffff,%o7 ! (3_0) hx0 &= 0x7fffffff;
+
+ cmp %o7,_0x7ff00000 ! (3_0) hx0 ? 0x7ff00000
+ stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll;
+ bge,pn %icc,.update9 ! (3_0) if ( hx0 >= 0x7ff00000 )
+ fpsub32 %f26,%f2,%f26 ! (7_1) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %o4,_0x7fffffff,%l7 ! (3_0) hy0 &= 0x7fffffff;
+ st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0];
+ fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (3_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (3_0) hy0 ? 0x7ff00000
+ bge,pn %icc,.update10 ! (3_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36;
+
+ fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (3_0) j0 = diff0 >> 31;
+
+ and %o1,%o3,%o1 ! (3_0) j0 &= diff0;
+
+ fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0;
+ cmp %o7,_0x00100000 ! (3_0) hx0 ? 0x00100000
+ sub %l7,%o1,%o4 ! (3_0) j0 = hy0 - j0;
+ fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (3_0) j0 &= 0x7ff00000;
+ bl,pn %icc,.update11 ! (3_0) if ( hx0 < 0x00100000 )
+ faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0;
+.cont11:
+ sub %l0,%o4,%g1 ! (3_0) j0 = 0x7ff00000 - j0;
+ fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
+.cont12:
+ sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0;
+
+ fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0
+
+ fmuld %f62,%f0,%f0 ! (1_0) res0_lo *= x_lo0;
+ ldd [%fp+dtmp5],%f62 ! (2_0) *(long long*)&scl0 = ll;
+ faddd %f2,%f46,%f42 ! (1_0) res0_hi += dtmp0;
+
+ lda [%i2]%asi,%f10 ! (2_0) ((float*)&x0)[0] = ((float*)px)[0];
+ fmuld %f26,%f20,%f54 ! (7_1) dd *= dtmp0;
+
+ lda [%i2+4]%asi,%f11 ! (2_0) ((float*)&x0)[1] = ((float*)px)[1];
+
+ fmuld %f50,%f12,%f26 ! (1_0) dtmp1 *= y_lo0;
+ lda [%o0]%asi,%f12 ! (2_0) ((float*)&y0)[0] = ((float*)py)[0];
+
+ lda [%o0+4]%asi,%f13 ! (2_0) ((float*)&y0)[1] = ((float*)py)[1];
+
+ fmuld %f54,%f14,%f50 ! (7_1) dtmp1 = dd * dres;
+ ld [%fp+ftmp0],%o2 ! (0_0) iarr = ((int*)&dres)[0];
+ fand %f22,DA1,%f2 ! (0_0) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (2_0) x0 *= scl0;
+
+ fmuld %f12,%f62,%f60 ! (2_0) y0 *= scl0;
+ sra %o2,11,%o4 ! (0_0) iarr >>= 11;
+ faddd %f0,%f26,%f34 ! (1_0) res0_lo += dtmp1;
+
+ and %o4,0x1fc,%o4 ! (0_0) iarr &= 0x1fc;
+
+ add %o4,TBL,%o4 ! (0_0) (char*)dll1 + iarr
+ mov %i1,%i2
+ lda [%i1]0x82,%o1 ! (4_0) hx0 = *(int*)px;
+ fsubd DTWO,%f50,%f20 ! (7_1) dtmp1 = DTWO - dtmp1;
+
+ ld [%o4],%f28 ! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f10,D2ON36,%f46 ! (2_0) x_hi0 = x0 + D2ON36;
+
+ lda [%i0]0x82,%o4 ! (4_0) hy0 = *(int*)py;
+ mov %i0,%o0
+ faddd %f60,D2ON36,%f50 ! (2_0) y_hi0 = y0 + D2ON36;
+
+ and %o1,_0x7fffffff,%o7 ! (4_0) hx0 &= 0x7fffffff;
+ faddd %f42,%f34,%f18 ! (1_0) dres = res0_hi + res0_lo;
+
+ fmuld %f54,%f20,%f16 ! (7_1) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (4_0) hx0 ? 0x7ff00000
+ stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll;
+ fpsub32 %f28,%f2,%f28 ! (0_0) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %o4,_0x7fffffff,%l7 ! (4_0) hy0 &= 0x7fffffff;
+ bge,pn %icc,.update13 ! (4_0) if ( hx0 >= 0x7ff00000 )
+ st %f18,[%fp+ftmp0] ! (1_0) iarr = ((int*)&dres)[0];
+ fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (4_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (4_0) hy0 ? 0x7ff00000
+ bge,pn %icc,.update14 ! (4_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36;
+
+ fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (4_0) j0 = diff0 >> 31;
+
+ and %o1,%o3,%o1 ! (4_0) j0 &= diff0;
+
+ fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0;
+ sub %l7,%o1,%o4 ! (4_0) j0 = hy0 - j0;
+ cmp %o7,_0x00100000 ! (4_0) hx0 ? 0x00100000
+ fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (4_0) j0 &= 0x7ff00000;
+ bl,pn %icc,.update15 ! (4_0) if ( hx0 < 0x00100000 )
+ faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0;
+.cont15:
+ sub %l0,%o4,%g1 ! (4_0) j0 = 0x7ff00000 - j0;
+ fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
+.cont16:
+ fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres;
+ sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0;
+
+ fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0;
+
+ fmuld %f62,%f0,%f0 ! (2_0) res0_lo *= x_lo0;
+ ldd [%fp+dtmp7],%f62 ! (3_0) *(long long*)&scl0 = ll;
+ faddd %f2,%f46,%f30 ! (2_0) res0_hi += dtmp0;
+
+ lda [%i4]%asi,%f10 ! (3_0) ((float*)&x0)[0] = ((float*)px)[0];
+ fmuld %f28,%f20,%f54 ! (0_0) dd *= dtmp0;
+
+ lda [%i4+4]%asi,%f11 ! (3_0) ((float*)&x0)[1] = ((float*)px)[1];
+
+ fmuld %f50,%f12,%f28 ! (2_0) dtmp1 *= y_lo0;
+ lda [%i3]%asi,%f12 ! (3_0) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f14,%f20 ! (7_1) dtmp2 = DTWO - dtmp2;
+
+ lda [%i3+4]%asi,%f13 ! (3_0) ((float*)&y0)[1] = ((float*)py)[1];
+ add %i1,stridex,%i4 ! px += stridex
+
+ fmuld %f54,%f22,%f50 ! (0_0) dtmp1 = dd * dres;
+ ld [%fp+ftmp0],%o2 ! (1_0) iarr = ((int*)&dres)[0];
+ add %i4,stridex,%i1 ! px += stridex
+ fand %f18,DA1,%f2 ! (1_0) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (3_0) x0 *= scl0;
+
+ fmuld %f12,%f62,%f60 ! (3_0) y0 *= scl0;
+ sra %o2,11,%i3 ! (1_0) iarr >>= 11;
+ faddd %f0,%f28,%f40 ! (2_0) res0_lo += dtmp1;
+
+ and %i3,0x1fc,%i3 ! (1_0) iarr &= 0x1fc;
+ fmuld %f16,%f20,%f28 ! (7_1) dres = dd * dtmp2;
+
+ add %i3,TBL,%o4 ! (1_0) (char*)dll1 + iarr
+ lda [%i4]0x82,%o1 ! (5_0) hx0 = *(int*)px;
+ fsubd DTWO,%f50,%f20 ! (0_0) dtmp1 = DTWO - dtmp1;
+
+ add %i0,stridey,%i3 ! py += stridey
+ ld [%o4],%f26 ! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f10,D2ON36,%f46 ! (3_0) x_hi0 = x0 + D2ON36;
+
+ lda [%i3]0x82,%o4 ! (5_0) hy0 = *(int*)py;
+ add %i3,stridey,%i0 ! py += stridey
+ faddd %f60,D2ON36,%f50 ! (3_0) y_hi0 = y0 + D2ON36;
+
+ and %o1,_0x7fffffff,%o7 ! (5_0) hx0 &= 0x7fffffff;
+ faddd %f30,%f40,%f14 ! (2_0) dres = res0_hi + res0_lo;
+
+ fmuld %f54,%f20,%f24 ! (0_0) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (5_0) hx0 ? 0x7ff00000
+ stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll;
+ fpsub32 %f26,%f2,%f26 ! (1_0) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %o4,_0x7fffffff,%l7 ! (5_0) hy0 &= 0x7fffffff;
+ st %f14,[%fp+ftmp0] ! (2_0) iarr = ((int*)&dres)[0];
+ bge,pn %icc,.update17 ! (5_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f46,D2ON36,%f20 ! (3_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (5_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (5_0) hy0 ? 0x7ff00000
+ bge,pn %icc,.update18 ! (5_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36;
+
+ fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (5_0) j0 = diff0 >> 31;
+
+ and %o1,%o3,%o1 ! (5_0) j0 &= diff0;
+ fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0;
+ sub %l7,%o1,%o4 ! (5_0) j0 = hy0 - j0;
+ cmp %o7,_0x00100000 ! (5_0) hx0 ? 0x00100000
+ fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (5_0) j0 &= 0x7ff00000;
+ bl,pn %icc,.update19 ! (5_0) if ( hx0 < 0x00100000 )
+ faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0;
+.cont19a:
+ fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
+ sub %l0,%o4,%g1 ! (5_0) j0 = 0x7ff00000 - j0;
+ fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
+.cont19b:
+ fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres;
+ sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0;
+ fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0;
+.cont20:
+ fmuld %f62,%f0,%f0 ! (3_0) res0_lo *= x_lo0;
+ ldd [%fp+dtmp9],%f62 ! (4_0) *(long long*)&scl0 = ll;
+ faddd %f2,%f46,%f44 ! (3_0) res0_hi += dtmp0;
+
+ fsubd DONE,%f10,%f60 ! (7_1) dtmp0 = DONE - dtmp0;
+ lda [%i2]%asi,%f10 ! (4_0) ((float*)&x0)[0] = ((float*)px)[0];
+ fmuld %f26,%f20,%f54 ! (1_0) dd *= dtmp0;
+
+ lda [%i2+4]%asi,%f11 ! (4_0) ((float*)&x0)[1] = ((float*)px)[1];
+
+ fmuld %f50,%f12,%f26 ! (3_0) dtmp1 *= y_lo0;
+ lda [%o0]%asi,%f12 ! (4_0) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f22,%f20 ! (0_0) dtmp2 = DTWO - dtmp2;
+
+ lda [%o0+4]%asi,%f13 ! (4_0) ((float*)&y0)[1] = ((float*)py)[1];
+
+ fmuld %f54,%f18,%f50 ! (1_0) dtmp1 = dd * dres;
+ ld [%fp+ftmp0],%o2 ! (2_0) iarr = ((int*)&dres)[0];
+ fand %f14,DA1,%f2 ! (2_0) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (4_0) x0 *= scl0;
+ fsubd %f60,%f38,%f46 ! (7_1) dtmp0 -= dtmp1;
+
+ fmuld %f12,%f62,%f60 ! (4_0) y0 *= scl0;
+ sra %o2,11,%o4 ! (2_0) iarr >>= 11;
+ faddd %f0,%f26,%f38 ! (3_0) res0_lo += dtmp1;
+
+ and %o4,0x1fc,%o4 ! (2_0) iarr &= 0x1fc;
+ fmuld %f24,%f20,%f26 ! (0_0) dres = dd * dtmp2;
+
+ add %o4,TBL,%o4 ! (2_0) (char*)dll1 + iarr
+ mov %i1,%i2
+ lda [%i1]0x82,%o1 ! (6_0) hx0 = *(int*)px;
+ fsubd DTWO,%f50,%f52 ! (1_0) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f46,%f28,%f28 ! (7_1) dtmp0 *= dres;
+ ld [%o4],%f20 ! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f10,D2ON36,%f46 ! (4_0) x_hi0 = x0 + D2ON36;
+
+ lda [%i0]0x82,%o4 ! (6_0) hy0 = *(int*)py;
+ mov %i0,%o0
+ faddd %f60,D2ON36,%f50 ! (4_0) y_hi0 = y0 + D2ON36;
+
+ and %o1,_0x7fffffff,%o7 ! (6_0) hx0 &= 0x7fffffff;
+ faddd %f44,%f38,%f22 ! (3_0) dres = res0_hi + res0_lo;
+
+ fmuld %f54,%f52,%f16 ! (1_0) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (6_0) hx0 ? 0x7ff00000
+ stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll;
+ fpsub32 %f20,%f2,%f52 ! (2_0) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %o4,_0x7fffffff,%l7 ! (6_0) hy0 &= 0x7fffffff;
+ st %f22,[%fp+ftmp0] ! (3_0) iarr = ((int*)&dres)[0];
+ bge,pn %icc,.update21 ! (6_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f46,D2ON36,%f46 ! (4_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (6_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (6_0) hy0 ? 0x7ff00000
+ bge,pn %icc,.update22 ! (6_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36;
+
+ fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (6_0) j0 = diff0 >> 31;
+ faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0;
+
+ and %o1,%o3,%o1 ! (6_0) j0 &= diff0;
+ fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0);
+
+ fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0;
+ sub %l7,%o1,%o4 ! (6_0) j0 = hy0 - j0;
+ cmp %o7,_0x00100000 ! (6_0) hx0 ? 0x00100000
+ fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (6_0) j0 &= 0x7ff00000;
+ bl,pn %icc,.update23 ! (6_0) if ( hx0 < 0x00100000 )
+ faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0;
+.cont23a:
+ fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
+ sub %l0,%o4,%g1 ! (6_0) j0 = 0x7ff00000 - j0;
+ fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
+.cont23b:
+ fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0;
+ sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0;
+ fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0;
+.cont24:
+ fmuld %f62,%f2,%f2 ! (4_0) res0_lo *= x_lo0;
+ ldd [%fp+dtmp11],%f62 ! (5_0) *(long long*)&scl0 = ll;
+ faddd %f0,%f20,%f32 ! (4_0) res0_hi += dtmp0;
+
+ lda [%i4]%asi,%f0 ! (5_0) ((float*)&x0)[0] = ((float*)px)[0];
+ fmuld %f52,%f10,%f10 ! (2_0) dd *= dtmp0;
+
+ lda [%i4+4]%asi,%f1 ! (5_0) ((float*)&x0)[1] = ((float*)px)[1];
+ fsubd DONE,%f50,%f52 ! (0_0) dtmp0 = DONE - dtmp0;
+
+ fmuld %f46,%f60,%f46 ! (4_0) dtmp1 *= y_lo0;
+ lda [%i3]%asi,%f12 ! (5_0) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f18,%f18 ! (1_0) dtmp2 = DTWO - dtmp2;
+
+ add %i1,stridex,%i4 ! px += stridex
+ lda [%i3+4]%asi,%f13 ! (5_0) ((float*)&y0)[1] = ((float*)py)[1];
+
+ fmuld %f10,%f14,%f50 ! (2_0) dtmp1 = dd * dres;
+ add %i4,stridex,%i1 ! px += stridex
+ ld [%fp+ftmp0],%o2 ! (3_0) iarr = ((int*)&dres)[0];
+ fand %f22,DA1,%f54 ! (3_0) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f0,%f62,%f60 ! (5_0) x0 *= scl0;
+ fsubd %f52,%f36,%f20 ! (0_0) dtmp0 -= dtmp1;
+
+ fmuld %f12,%f62,%f52 ! (5_0) y0 *= scl0;
+ sra %o2,11,%i3 ! (3_0) iarr >>= 11;
+ faddd %f2,%f46,%f36 ! (4_0) res0_lo += dtmp1;
+
+ and %i3,0x1fc,%i3 ! (3_0) iarr &= 0x1fc;
+ fmuld %f16,%f18,%f16 ! (1_0) dres = dd * dtmp2;
+
+ fsqrtd %f48,%f18 ! (7_1) res0 = sqrt ( res0 );
+ add %i3,TBL,%o4 ! (3_0) (char*)dll1 + iarr
+ lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px;
+ fsubd DTWO,%f50,%f46 ! (2_0) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f20,%f26,%f48 ! (0_0) dtmp0 *= dres;
+ add %i0,stridey,%i3 ! py += stridey
+ ld [%o4],%f20 ! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f60,D2ON36,%f50 ! (5_0) x_hi0 = x0 + D2ON36;
+
+ lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py;
+ add %i3,stridey,%i0 ! py += stridey
+ faddd %f52,D2ON36,%f12 ! (5_0) y_hi0 = y0 + D2ON36;
+
+ and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff;
+ faddd %f32,%f36,%f24 ! (4_0) dres = res0_hi + res0_lo;
+
+ fmuld %f10,%f46,%f26 ! (2_0) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000
+ stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll;
+ fpsub32 %f20,%f54,%f10 ! (3_0) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff;
+ st %f24,[%fp+ftmp0] ! (4_0) iarr = ((int*)&dres)[0];
+ bge,pn %icc,.update25 ! (7_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f50,D2ON36,%f20 ! (5_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000
+ bge,pn %icc,.update26 ! (7_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36;
+
+ fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31;
+ faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0;
+
+ and %o1,%o3,%o1 ! (7_0) j0 &= diff0;
+ fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0;
+ sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0;
+ cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000
+ fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000;
+ bl,pn %icc,.update27 ! (7_0) if ( hx0 < 0x00100000 )
+ faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0;
+.cont27a:
+ fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
+ sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0;
+ fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
+.cont27b:
+ fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0;
+ sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll;
+ faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0;
+ fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0;
+.cont28:
+ fmuld %f62,%f2,%f2 ! (5_0) res0_lo *= x_lo0;
+ ldd [%fp+dtmp13],%f62 ! (6_0) *(long long*)&scl0 = ll;
+ faddd %f0,%f46,%f42 ! (5_0) res0_hi += dtmp0;
+
+ fmuld %f10,%f20,%f52 ! (3_0) dd *= dtmp0;
+ lda [%i2]%asi,%f10 ! (6_0) ((float*)&x0)[0] = ((float*)px)[0];
+
+ lda [%i2+4]%asi,%f11 ! (6_0) ((float*)&x0)[1] = ((float*)px)[1];
+ fsubd DONE,%f60,%f60 ! (1_0) dtmp0 = DONE - dtmp0;
+
+ fmuld %f50,%f54,%f46 ! (5_0) dtmp1 *= y_lo0;
+ lda [%o0]%asi,%f12 ! (6_0) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f14,%f14 ! (2_0) dtmp2 = DTWO - dtmp2;
+
+ lda [%o0+4]%asi,%f13 ! (6_0) ((float*)&y0)[1] = ((float*)py)[1];
+
+ fmuld %f52,%f22,%f50 ! (3_0) dtmp1 = dd * dres;
+ ld [%fp+ftmp0],%o2 ! (4_0) iarr = ((int*)&dres)[0];
+ fand %f24,DA1,%f54 ! (4_0) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (6_0) x0 *= scl0;
+ ldd [%fp+dtmp0],%f0 ! (7_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f34,%f20 ! (1_0) dtmp0 -= dtmp1;
+
+ fmuld %f12,%f62,%f60 ! (6_0) y0 *= scl0;
+ sra %o2,11,%o4 ! (4_0) iarr >>= 11;
+ faddd %f2,%f46,%f34 ! (5_0) res0_lo += dtmp1;
+
+ and %o4,0x1fc,%o4 ! (4_0) iarr &= 0x1fc;
+ fmuld %f26,%f14,%f26 ! (2_0) dres = dd * dtmp2;
+
+ cmp counter,8
+ bl,pn %icc,.tail
+ nop
+
+ ba .main_loop
+ sub counter,8,counter
+
+ .align 16
+.main_loop:
+ fsqrtd %f48,%f14 ! (0_1) res0 = sqrt ( res0 );
+ add %o4,TBL,%o4 ! (4_1) (char*)dll1 + iarr
+ lda [%i1]0x82,%o1 ! (0_0) hx0 = *(int*)px;
+ fsubd DTWO,%f50,%f46 ! (3_1) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f20,%f16,%f48 ! (1_1) dtmp0 *= dres;
+ mov %i1,%i2
+ ld [%o4],%f20 ! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f10,D2ON36,%f50 ! (6_1) x_hi0 = x0 + D2ON36;
+
+ nop
+ mov %i0,%o0
+ lda [%i0]0x82,%o4 ! (0_0) hy0 = *(int*)py;
+ faddd %f60,D2ON36,%f2 ! (6_1) y_hi0 = y0 + D2ON36;
+
+ faddd %f42,%f34,%f16 ! (5_1) dres = res0_hi + res0_lo;
+ and %o1,_0x7fffffff,%o7 ! (0_0) hx0 &= 0x7fffffff;
+ st %f16,[%fp+ftmp0] ! (5_1) iarr = ((int*)&dres)[0];
+ fmuld %f0,%f18,%f0 ! (7_2) res0 = scl0 * res0;
+
+ fmuld %f52,%f46,%f18 ! (3_1) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (0_0) hx0 ? 0x7ff00000
+ st %f0,[%i5] ! (7_2) ((float*)pz)[0] = ((float*)&res0)[0];
+ fpsub32 %f20,%f54,%f54 ! (4_1) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %o4,_0x7fffffff,%l7 ! (0_0) hy0 &= 0x7fffffff;
+ st %f1,[%i5+4] ! (7_2) ((float*)pz)[1] = ((float*)&res0)[1];
+ bge,pn %icc,.update29 ! (0_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f50,D2ON36,%f20 ! (6_1) x_hi0 -= D2ON36;
+
+ cmp %l7,_0x7ff00000 ! (0_0) hy0 ? 0x7ff00000
+ sub %l7,%o7,%o1 ! (0_0) diff0 = hy0 - hx0;
+ bge,pn %icc,.update30 ! (0_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f2,D2ON36,%f2 ! (6_1) y_hi0 -= D2ON36;
+
+ fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (0_0) j0 = diff0 >> 31;
+ stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll;
+ faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0;
+
+ and %o1,%o3,%o1 ! (0_0) j0 &= diff0;
+ cmp %o7,_0x00100000 ! (0_0) hx0 ? 0x00100000
+ bl,pn %icc,.update31 ! (0_0) if ( hx0 < 0x00100000 )
+ fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0);
+.cont31:
+ fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0;
+ sub %l7,%o1,%o4 ! (0_0) j0 = hy0 - j0;
+ nop
+ fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0;
+
+ fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ and %o4,%l0,%o4 ! (0_0) j0 &= 0x7ff00000;
+ faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0;
+
+ fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres;
+ sub %l0,%o4,%o4 ! (0_0) j0 = 0x7ff00000 - j0;
+ nop
+ fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0;
+.cont32:
+ fmuld %f30,%f48,%f12 ! (2_1) dtmp0 = res0_hi * res0;
+ sllx %o4,32,%o4 ! (0_0) ll = (long long)j0 << 32;
+ stx %o4,[%fp+dtmp1] ! (0_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f2,%f50 ! (6_1) dtmp1 = y0 + y_hi0;
+
+ fmuld %f40,%f48,%f40 ! (2_1) dtmp1 = res0_lo * res0;
+ nop
+ bn,pn %icc,.exit
+ fsubd %f60,%f2,%f2 ! (6_1) y_lo0 = y0 - y_hi0;
+
+ fmuld %f62,%f28,%f28 ! (6_1) res0_lo *= x_lo0;
+ nop
+ ldd [%fp+dtmp15],%f62 ! (7_1) *(long long*)&scl0 = ll;
+ faddd %f0,%f46,%f30 ! (6_1) res0_hi += dtmp0;
+
+ nop
+ nop
+ lda [%i4]%asi,%f10 ! (7_1) ((float*)&x0)[0] = ((float*)px)[0];
+ fmuld %f54,%f20,%f54 ! (4_1) dd *= dtmp0;
+
+ nop
+ nop
+ lda [%i4+4]%asi,%f11 ! (7_1) ((float*)&x0)[1] = ((float*)px)[1];
+ fsubd DONE,%f12,%f60 ! (2_1) dtmp0 = DONE - dtmp0;
+
+ fmuld %f50,%f2,%f46 ! (6_1) dtmp1 *= y_lo0;
+ nop
+ lda [%i3]%asi,%f12 ! (7_1) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f22,%f22 ! (3_1) dtmp2 = DTWO - dtmp2;
+
+ add %i1,stridex,%i4 ! px += stridex
+ nop
+ lda [%i3+4]%asi,%f13 ! (7_1) ((float*)&y0)[1] = ((float*)py)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f54,%f24,%f50 ! (4_1) dtmp1 = dd * dres;
+ add %i4,stridex,%i1 ! px += stridex
+ ld [%fp+ftmp0],%o2 ! (5_1) iarr = ((int*)&dres)[0];
+ fand %f16,DA1,%f2 ! (5_1) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (7_1) x0 *= scl0;
+ nop
+ ldd [%fp+dtmp2],%f0 ! (0_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f40,%f20 ! (2_1) dtmp0 -= dtmp1;
+
+ fmuld %f12,%f62,%f60 ! (7_1) y0 *= scl0;
+ sra %o2,11,%i3 ! (5_1) iarr >>= 11;
+ nop
+ faddd %f28,%f46,%f40 ! (6_1) res0_lo += dtmp1;
+
+ and %i3,0x1fc,%i3 ! (5_1) iarr &= 0x1fc;
+ nop
+ bn,pn %icc,.exit
+ fmuld %f18,%f22,%f28 ! (3_1) dres = dd * dtmp2;
+
+ fsqrtd %f52,%f22 ! (1_1) res0 = sqrt ( res0 );
+ lda [%i4]0x82,%o1 ! (1_0) hx0 = *(int*)px;
+ add %i3,TBL,%g1 ! (5_1) (char*)dll1 + iarr
+ fsubd DTWO,%f50,%f62 ! (4_1) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f20,%f26,%f52 ! (2_1) dtmp0 *= dres;
+ add %i0,stridey,%i3 ! py += stridey
+ ld [%g1],%f26 ! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f10,D2ON36,%f46 ! (7_1) x_hi0 = x0 + D2ON36;
+
+ nop
+ add %i3,stridey,%i0 ! py += stridey
+ lda [%i3]0x82,%g1 ! (1_0) hy0 = *(int*)py;
+ faddd %f60,D2ON36,%f50 ! (7_1) y_hi0 = y0 + D2ON36;
+
+ faddd %f30,%f40,%f18 ! (6_1) dres = res0_hi + res0_lo;
+ and %o1,_0x7fffffff,%o7 ! (1_0) hx0 &= 0x7fffffff;
+ st %f18,[%fp+ftmp0] ! (6_1) iarr = ((int*)&dres)[0];
+ fmuld %f0,%f14,%f0 ! (0_1) res0 = scl0 * res0;
+
+ fmuld %f54,%f62,%f14 ! (4_1) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (1_0) hx0 ? 0x7ff00000
+ st %f0,[%i5] ! (0_1) ((float*)pz)[0] = ((float*)&res0)[0];
+ fpsub32 %f26,%f2,%f26 ! (5_1) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %g1,_0x7fffffff,%l7 ! (1_0) hy0 &= 0x7fffffff;
+ nop
+ bge,pn %icc,.update33 ! (1_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f46,D2ON36,%f20 ! (7_1) x_hi0 -= D2ON36;
+
+ cmp %l7,_0x7ff00000 ! (1_0) hy0 ? 0x7ff00000
+ sub %l7,%o7,%o1 ! (1_0) diff0 = hy0 - hx0;
+ st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36;
+
+ fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (1_0) j0 = diff0 >> 31;
+ bge,pn %icc,.update34 ! (1_0) if ( hy0 >= 0x7ff00000 )
+ faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0;
+
+ and %o1,%o3,%o1 ! (1_0) j0 &= diff0;
+ add %i5,stridez,%i5 ! pz += stridez
+ stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll;
+ fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0;
+ sub %l7,%o1,%o4 ! (1_0) j0 = hy0 - j0;
+ cmp %o7,_0x00100000 ! (1_0) hx0 ? 0x00100000
+ fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (1_0) j0 &= 0x7ff00000;
+ bl,pn %icc,.update35 ! (1_0) if ( hx0 < 0x00100000 )
+ faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0;
+.cont35a:
+ fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0;
+ nop
+ sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0;
+ fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0;
+.cont35b:
+ fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres;
+ sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32;
+ stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0;
+
+ fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0;
+ nop
+ nop
+ fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0;
+.cont36:
+ fmuld %f62,%f0,%f0 ! (7_1) res0_lo *= x_lo0;
+ nop
+ ldd [%fp+dtmp1],%f62 ! (0_0) *(long long*)&scl0 = ll;
+ faddd %f2,%f46,%f44 ! (7_1) res0_hi += dtmp0;
+
+ fsubd DONE,%f10,%f60 ! (3_1) dtmp0 = DONE - dtmp0;
+ nop
+ lda [%i2]%asi,%f10 ! (0_0) ((float*)&x0)[0] = ((float*)px)[0];
+ fmuld %f26,%f20,%f54 ! (5_1) dd *= dtmp0;
+
+ nop
+ nop
+ lda [%i2+4]%asi,%f11 ! (0_0) ((float*)&x0)[1] = ((float*)px)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f50,%f12,%f26 ! (7_1) dtmp1 *= y_lo0;
+ nop
+ lda [%o0]%asi,%f12 ! (0_0) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f24,%f24 ! (4_1) dtmp2 = DTWO - dtmp2;
+
+ nop
+ nop
+ lda [%o0+4]%asi,%f13 ! (0_0) ((float*)&y0)[1] = ((float*)py)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f54,%f16,%f46 ! (5_1) dtmp1 = dd * dres;
+ nop
+ ld [%fp+ftmp0],%o2 ! (6_1) iarr = ((int*)&dres)[0];
+ fand %f18,DA1,%f2 ! (6_1) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (0_0) x0 *= scl0;
+ nop
+ ldd [%fp+dtmp4],%f50 ! (1_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f38,%f20 ! (3_1) dtmp0 -= dtmp1;
+
+ fmuld %f12,%f62,%f60 ! (0_0) y0 *= scl0;
+ sra %o2,11,%g1 ! (6_1) iarr >>= 11;
+ nop
+ faddd %f0,%f26,%f38 ! (7_1) res0_lo += dtmp1;
+
+ nop
+ and %g1,0x1fc,%g1 ! (6_1) iarr &= 0x1fc;
+ bn,pn %icc,.exit
+ fmuld %f14,%f24,%f26 ! (4_1) dres = dd * dtmp2;
+
+ fsqrtd %f52,%f24 ! (2_1) res0 = sqrt ( res0 );
+ lda [%i1]0x82,%o1 ! (2_0) hx0 = *(int*)px;
+ add %g1,TBL,%g1 ! (6_1) (char*)dll1 + iarr
+ fsubd DTWO,%f46,%f62 ! (5_1) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f20,%f28,%f52 ! (3_1) dtmp0 *= dres;
+ mov %i1,%i2
+ ld [%g1],%f28 ! (6_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f10,D2ON36,%f46 ! (0_0) x_hi0 = x0 + D2ON36;
+
+ nop
+ mov %i0,%o0
+ lda [%i0]0x82,%g1 ! (2_0) hy0 = *(int*)py;
+ faddd %f60,D2ON36,%f12 ! (0_0) y_hi0 = y0 + D2ON36;
+
+ faddd %f44,%f38,%f14 ! (7_1) dres = res0_hi + res0_lo;
+ and %o1,_0x7fffffff,%o7 ! (2_0) hx0 &= 0x7fffffff;
+ st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0];
+ fmuld %f50,%f22,%f0 ! (1_1) res0 = scl0 * res0;
+
+ fmuld %f54,%f62,%f22 ! (5_1) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (2_0) hx0 ? 0x7ff00000
+ st %f0,[%i5] ! (1_1) ((float*)pz)[0] = ((float*)&res0)[0];
+ fpsub32 %f28,%f2,%f28 ! (6_1) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %g1,_0x7fffffff,%l7 ! (2_0) hx0 &= 0x7fffffff;
+ nop
+ bge,pn %icc,.update37 ! (2_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (2_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (2_0) hy0 ? 0x7ff00000
+ st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36;
+
+ fmuld %f28,%f18,%f50 ! (6_1) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (2_0) j0 = diff0 >> 31;
+ bge,pn %icc,.update38 ! (2_0) if ( hy0 >= 0x7ff00000 )
+ faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0;
+
+ and %o1,%o3,%o1 ! (2_0) j0 &= diff0;
+ add %i5,stridez,%i5 ! pz += stridez
+ stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll;
+ fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0;
+ cmp %o7,_0x00100000 ! (2_0) hx0 ? 0x00100000
+ sub %l7,%o1,%o4 ! (2_0) j0 = hy0 - j0;
+ fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (2_0) j0 &= 0x7ff00000;
+ bl,pn %icc,.update39 ! (2_0) if ( hx0 < 0x00100000 )
+ faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0;
+.cont39a:
+ fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0;
+ sub %l0,%o4,%g1 ! (2_0) j0 = 0x7ff00000 - j0;
+ nop
+ fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0;
+.cont39b:
+ fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres;
+ sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0;
+ nop
+ nop
+ fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0;
+.cont40:
+ fmuld %f62,%f0,%f0 ! (0_0) res0_lo *= x_lo0;
+ nop
+ ldd [%fp+dtmp3],%f62 ! (1_0) *(long long*)&scl0 = ll;
+ faddd %f2,%f46,%f32 ! (0_0) res0_hi += dtmp0;
+
+ fsubd DONE,%f10,%f60 ! (4_1) dtmp0 = DONE - dtmp0;
+ nop
+ lda [%i4]%asi,%f10 ! (1_0) ((float*)&x0)[0] = ((float*)px)[0];
+ fmuld %f28,%f20,%f54 ! (6_1) dd *= dtmp0;
+
+ nop
+ nop
+ lda [%i4+4]%asi,%f11 ! (1_0) ((float*)&x0)[1] = ((float*)px)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f50,%f12,%f28 ! (0_0) dtmp1 *= y_lo0;
+ nop
+ lda [%i3]%asi,%f12 ! (1_0) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f16,%f16 ! (5_1) dtmp2 = DTWO - dtmp2;
+
+ add %i1,stridex,%i4 ! px += stridex
+ nop
+ lda [%i3+4]%asi,%f13 ! (1_0) ((float*)&y0)[1] = ((float*)py)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f54,%f18,%f46 ! (6_1) dtmp1 = dd * dres;
+ add %i4,stridex,%i1 ! px += stridex
+ ld [%fp+ftmp0],%o2 ! (7_1) iarr = ((int*)&dres)[0];
+ fand %f14,DA1,%f2 ! (7_1) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (1_0) x0 *= scl0;
+ nop
+ ldd [%fp+dtmp6],%f50 ! (2_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f36,%f20 ! (4_1) dtmp0 -= dtmp1;
+
+ fmuld %f12,%f62,%f60 ! (1_0) y0 *= scl0;
+ sra %o2,11,%i3 ! (7_1) iarr >>= 11;
+ nop
+ faddd %f0,%f28,%f36 ! (0_0) res0_lo += dtmp1;
+
+ and %i3,0x1fc,%i3 ! (7_1) iarr &= 0x1fc;
+ nop
+ bn,pn %icc,.exit
+ fmuld %f22,%f16,%f28 ! (5_1) dres = dd * dtmp2;
+
+ fsqrtd %f52,%f16 ! (3_1) res0 = sqrt ( res0 );
+ add %i3,TBL,%o4 ! (7_1) (char*)dll1 + iarr
+ lda [%i4]0x82,%o1 ! (3_0) hx0 = *(int*)px;
+ fsubd DTWO,%f46,%f62 ! (6_1) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f20,%f26,%f52 ! (4_1) dtmp0 *= dres;
+ add %i0,stridey,%i3 ! py += stridey
+ ld [%o4],%f26 ! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f10,D2ON36,%f46 ! (1_0) x_hi0 = x0 + D2ON36;
+
+ nop
+ add %i3,stridey,%i0 ! py += stridey
+ lda [%i3]0x82,%o4 ! (3_0) hy0 = *(int*)py;
+ faddd %f60,D2ON36,%f12 ! (1_0) y_hi0 = y0 + D2ON36;
+
+ faddd %f32,%f36,%f22 ! (0_0) dres = res0_hi + res0_lo;
+ and %o1,_0x7fffffff,%o7 ! (3_0) hx0 &= 0x7fffffff;
+ st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0];
+ fmuld %f50,%f24,%f0 ! (2_1) res0 = scl0 * res0;
+
+ fmuld %f54,%f62,%f24 ! (6_1) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (3_0) hx0 ? 0x7ff00000
+ st %f0,[%i5] ! (2_1) ((float*)pz)[0] = ((float*)&res0)[0];
+ fpsub32 %f26,%f2,%f26 ! (7_1) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %o4,_0x7fffffff,%l7 ! (3_0) hy0 &= 0x7fffffff;
+ nop
+ bge,pn %icc,.update41 ! (3_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (3_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (3_0) hy0 ? 0x7ff00000
+ st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36;
+
+ fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (3_0) j0 = diff0 >> 31;
+ bge,pn %icc,.update42 ! (3_0) if ( hy0 >= 0x7ff00000 )
+ faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0;
+
+ and %o1,%o3,%o1 ! (3_0) j0 &= diff0;
+ add %i5,stridez,%i5 ! pz += stridez
+ stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll;
+ fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0;
+ cmp %o7,_0x00100000 ! (3_0) hx0 ? 0x00100000
+ sub %l7,%o1,%o4 ! (3_0) j0 = hy0 - j0;
+ fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (3_0) j0 &= 0x7ff00000;
+ bl,pn %icc,.update43 ! (3_0) if ( hx0 < 0x00100000 )
+ faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0;
+.cont43a:
+ fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0;
+ nop
+ sub %l0,%o4,%g1 ! (3_0) j0 = 0x7ff00000 - j0;
+ fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
+.cont43b:
+ fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres;
+ sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0;
+ nop
+ nop
+ fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0
+.cont44:
+ fmuld %f62,%f0,%f0 ! (1_0) res0_lo *= x_lo0;
+ nop
+ ldd [%fp+dtmp5],%f62 ! (2_0) *(long long*)&scl0 = ll;
+ faddd %f2,%f46,%f42 ! (1_0) res0_hi += dtmp0;
+
+ fsubd DONE,%f10,%f60 ! (5_1) dtmp0 = DONE - dtmp0;
+ nop
+ lda [%i2]%asi,%f10 ! (2_0) ((float*)&x0)[0] = ((float*)px)[0];
+ fmuld %f26,%f20,%f54 ! (7_1) dd *= dtmp0;
+
+ nop
+ nop
+ lda [%i2+4]%asi,%f11 ! (2_0) ((float*)&x0)[1] = ((float*)px)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f50,%f12,%f26 ! (1_0) dtmp1 *= y_lo0;
+ nop
+ lda [%o0]%asi,%f12 ! (2_0) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f18,%f20 ! (6_1) dtmp2 = DTWO - dtmp2;
+
+ nop
+ nop
+ lda [%o0+4]%asi,%f13 ! (2_0) ((float*)&y0)[1] = ((float*)py)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f54,%f14,%f50 ! (7_1) dtmp1 = dd * dres;
+ nop
+ ld [%fp+ftmp0],%o2 ! (0_0) iarr = ((int*)&dres)[0];
+ fand %f22,DA1,%f2 ! (0_0) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (2_0) x0 *= scl0;
+ nop
+ ldd [%fp+dtmp8],%f18 ! (3_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f34,%f46 ! (5_1) dtmp0 -= dtmp1;
+
+ fmuld %f12,%f62,%f60 ! (2_0) y0 *= scl0;
+ sra %o2,11,%o4 ! (0_0) iarr >>= 11;
+ nop
+ faddd %f0,%f26,%f34 ! (1_0) res0_lo += dtmp1;
+
+ and %o4,0x1fc,%o4 ! (0_0) iarr &= 0x1fc;
+ nop
+ bn,pn %icc,.exit
+ fmuld %f24,%f20,%f26 ! (6_1) dres = dd * dtmp2;
+
+ fsqrtd %f52,%f24 ! (4_1) res0 = sqrt ( res0 );
+ add %o4,TBL,%o4 ! (0_0) (char*)dll1 + iarr
+ lda [%i1]0x82,%o1 ! (4_0) hx0 = *(int*)px;
+ fsubd DTWO,%f50,%f20 ! (7_1) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f46,%f28,%f52 ! (5_1) dtmp0 -= dtmp1;
+ mov %i1,%i2
+ ld [%o4],%f28 ! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f10,D2ON36,%f46 ! (2_0) x_hi0 = x0 + D2ON36;
+
+ nop
+ mov %i0,%o0
+ lda [%i0]0x82,%o4 ! (4_0) hy0 = *(int*)py;
+ faddd %f60,D2ON36,%f50 ! (2_0) y_hi0 = y0 + D2ON36;
+
+ fmuld %f18,%f16,%f0 ! (3_1) res0 = scl0 * res0;
+ nop
+ and %o1,_0x7fffffff,%o7 ! (4_0) hx0 &= 0x7fffffff;
+ faddd %f42,%f34,%f18 ! (1_0) dres = res0_hi + res0_lo;
+
+ fmuld %f54,%f20,%f16 ! (7_1) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (4_0) hx0 ? 0x7ff00000
+ st %f18,[%fp+ftmp0] ! (1_0) iarr = ((int*)&dres)[0];
+ fpsub32 %f28,%f2,%f28 ! (0_0) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %o4,_0x7fffffff,%l7 ! (4_0) hy0 &= 0x7fffffff;
+ st %f0,[%i5] ! (3_1) ((float*)pz)[0] = ((float*)&res0)[0];
+ bge,pn %icc,.update45 ! (4_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (4_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (4_0) hy0 ? 0x7ff00000
+ bge,pn %icc,.update46 ! (4_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36;
+
+ fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (4_0) j0 = diff0 >> 31;
+ st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0;
+
+ and %o1,%o3,%o1 ! (4_0) j0 &= diff0;
+ cmp %o7,_0x00100000 ! (4_0) hx0 ? 0x00100000
+ bl,pn %icc,.update47 ! (4_0) if ( hx0 < 0x00100000 )
+ fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0);
+.cont47a:
+ fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0;
+ sub %l7,%o1,%o4 ! (4_0) j0 = hy0 - j0;
+ stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (4_0) j0 &= 0x7ff00000;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0;
+ nop
+ sub %l0,%o4,%g1 ! (4_0) j0 = 0x7ff00000 - j0;
+ fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
+.cont47b:
+ fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres;
+ sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0;
+ nop
+ nop
+ fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0;
+.cont48:
+ fmuld %f62,%f0,%f0 ! (2_0) res0_lo *= x_lo0;
+ nop
+ ldd [%fp+dtmp7],%f62 ! (3_0) *(long long*)&scl0 = ll;
+ faddd %f2,%f46,%f30 ! (2_0) res0_hi += dtmp0;
+
+ fsubd DONE,%f10,%f60 ! (6_1) dtmp0 = DONE - dtmp0;
+ nop
+ lda [%i4]%asi,%f10 ! (3_0) ((float*)&x0)[0] = ((float*)px)[0];
+ fmuld %f28,%f20,%f54 ! (0_0) dd *= dtmp0;
+
+ nop
+ nop
+ lda [%i4+4]%asi,%f11 ! (3_0) ((float*)&x0)[1] = ((float*)px)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f50,%f12,%f28 ! (2_0) dtmp1 *= y_lo0;
+ nop
+ lda [%i3]%asi,%f12 ! (3_0) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f14,%f20 ! (7_1) dtmp2 = DTWO - dtmp2;
+
+ lda [%i3+4]%asi,%f13 ! (3_0) ((float*)&y0)[1] = ((float*)py)[1];
+ add %i1,stridex,%i4 ! px += stridex
+ nop
+ bn,pn %icc,.exit
+
+ fmuld %f54,%f22,%f50 ! (0_0) dtmp1 = dd * dres;
+ add %i4,stridex,%i1 ! px += stridex
+ ld [%fp+ftmp0],%o2 ! (1_0) iarr = ((int*)&dres)[0];
+ fand %f18,DA1,%f2 ! (1_0) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (3_0) x0 *= scl0;
+ nop
+ ldd [%fp+dtmp10],%f14 ! (4_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f40,%f46 ! (6_1) dtmp0 -= dtmp1;
+
+ fmuld %f12,%f62,%f60 ! (3_0) y0 *= scl0;
+ sra %o2,11,%i3 ! (1_0) iarr >>= 11;
+ nop
+ faddd %f0,%f28,%f40 ! (2_0) res0_lo += dtmp1;
+
+ and %i3,0x1fc,%i3 ! (1_0) iarr &= 0x1fc;
+ nop
+ bn,pn %icc,.exit
+ fmuld %f16,%f20,%f28 ! (7_1) dres = dd * dtmp2;
+
+ fsqrtd %f52,%f16 ! (5_1) res0 = sqrt ( res0 );
+ add %i3,TBL,%o4 ! (1_0) (char*)dll1 + iarr
+ lda [%i4]0x82,%o1 ! (5_0) hx0 = *(int*)px;
+ fsubd DTWO,%f50,%f20 ! (0_0) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f46,%f26,%f52 ! (6_1) dtmp0 *= dres;
+ add %i0,stridey,%i3 ! py += stridey
+ ld [%o4],%f26 ! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f10,D2ON36,%f46 ! (3_0) x_hi0 = x0 + D2ON36;
+
+ nop
+ add %i3,stridey,%i0 ! py += stridey
+ lda [%i3]0x82,%o4 ! (5_0) hy0 = *(int*)py;
+ faddd %f60,D2ON36,%f50 ! (3_0) y_hi0 = y0 + D2ON36;
+
+ fmuld %f14,%f24,%f0 ! (4_1) res0 = scl0 * res0;
+ and %o1,_0x7fffffff,%o7 ! (5_0) hx0 &= 0x7fffffff;
+ nop
+ faddd %f30,%f40,%f14 ! (2_0) dres = res0_hi + res0_lo;
+
+ fmuld %f54,%f20,%f24 ! (0_0) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (5_0) hx0 ? 0x7ff00000
+ st %f14,[%fp+ftmp0] ! (2_0) iarr = ((int*)&dres)[0];
+ fpsub32 %f26,%f2,%f26 ! (1_0) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %o4,_0x7fffffff,%l7 ! (5_0) hy0 &= 0x7fffffff;
+ st %f0,[%i5] ! (4_1) ((float*)pz)[0] = ((float*)&res0)[0];
+ bge,pn %icc,.update49 ! (5_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f46,D2ON36,%f20 ! (3_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (5_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (5_0) hy0 ? 0x7ff00000
+ bge,pn %icc,.update50 ! (5_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36;
+
+ fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (5_0) j0 = diff0 >> 31;
+ st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0;
+
+ and %o1,%o3,%o1 ! (5_0) j0 &= diff0;
+ cmp %o7,_0x00100000 ! (5_0) hx0 ? 0x00100000
+ bl,pn %icc,.update51 ! (5_0) if ( hx0 < 0x00100000 )
+ fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0);
+.cont51a:
+ fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0;
+ sub %l7,%o1,%o4 ! (5_0) j0 = hy0 - j0;
+ stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (5_0) j0 &= 0x7ff00000;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
+ sub %l0,%o4,%g1 ! (5_0) j0 = 0x7ff00000 - j0;
+ nop
+ fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
+.cont51b:
+ fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres;
+ sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0;
+ nop
+ nop
+ fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0;
+.cont52:
+ fmuld %f62,%f0,%f0 ! (3_0) res0_lo *= x_lo0;
+ nop
+ ldd [%fp+dtmp9],%f62 ! (4_0) *(long long*)&scl0 = ll;
+ faddd %f2,%f46,%f44 ! (3_0) res0_hi += dtmp0;
+
+ fsubd DONE,%f10,%f60 ! (7_1) dtmp0 = DONE - dtmp0;
+ nop
+ lda [%i2]%asi,%f10 ! (4_0) ((float*)&x0)[0] = ((float*)px)[0];
+ fmuld %f26,%f20,%f54 ! (1_0) dd *= dtmp0;
+
+ nop
+ nop
+ lda [%i2+4]%asi,%f11 ! (4_0) ((float*)&x0)[1] = ((float*)px)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f50,%f12,%f26 ! (3_0) dtmp1 *= y_lo0;
+ nop
+ lda [%o0]%asi,%f12 ! (4_0) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f22,%f20 ! (0_0) dtmp2 = DTWO - dtmp2;
+
+ nop
+ nop
+ lda [%o0+4]%asi,%f13 ! (4_0) ((float*)&y0)[1] = ((float*)py)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f54,%f18,%f50 ! (1_0) dtmp1 = dd * dres;
+ nop
+ ld [%fp+ftmp0],%o2 ! (2_0) iarr = ((int*)&dres)[0];
+ fand %f14,DA1,%f2 ! (2_0) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (4_0) x0 *= scl0;
+ nop
+ ldd [%fp+dtmp12],%f22 ! (5_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f38,%f46 ! (7_1) dtmp0 -= dtmp1;
+
+ fmuld %f12,%f62,%f60 ! (4_0) y0 *= scl0;
+ sra %o2,11,%o4 ! (2_0) iarr >>= 11;
+ nop
+ faddd %f0,%f26,%f38 ! (3_0) res0_lo += dtmp1;
+
+ and %o4,0x1fc,%o4 ! (2_0) iarr &= 0x1fc;
+ nop
+ bn,pn %icc,.exit
+ fmuld %f24,%f20,%f26 ! (0_0) dres = dd * dtmp2;
+
+ fsqrtd %f52,%f24 ! (6_1) res0 = sqrt ( res0 );
+ add %o4,TBL,%o4 ! (2_0) (char*)dll1 + iarr
+ lda [%i1]0x82,%o1 ! (6_0) hx0 = *(int*)px;
+ fsubd DTWO,%f50,%f52 ! (1_0) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f46,%f28,%f28 ! (7_1) dtmp0 *= dres;
+ mov %i1,%i2
+ ld [%o4],%f20 ! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f10,D2ON36,%f46 ! (4_0) x_hi0 = x0 + D2ON36;
+
+ nop
+ mov %i0,%o0
+ lda [%i0]0x82,%o4 ! (6_0) hy0 = *(int*)py;
+ faddd %f60,D2ON36,%f50 ! (4_0) y_hi0 = y0 + D2ON36;
+
+ fmuld %f22,%f16,%f0 ! (5_1) res0 = scl0 * res0;
+ and %o1,_0x7fffffff,%o7 ! (6_0) hx0 &= 0x7fffffff;
+ nop
+ faddd %f44,%f38,%f22 ! (3_0) dres = res0_hi + res0_lo;
+
+ fmuld %f54,%f52,%f16 ! (1_0) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (6_0) hx0 ? 0x7ff00000
+ st %f22,[%fp+ftmp0] ! (3_0) iarr = ((int*)&dres)[0];
+ fpsub32 %f20,%f2,%f52 ! (2_0) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %o4,_0x7fffffff,%l7 ! (6_0) hy0 &= 0x7fffffff;
+ st %f0,[%i5] ! (5_1) ((float*)pz)[0] = ((float*)&res0)[0];
+ bge,pn %icc,.update53 ! (6_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f46,D2ON36,%f46 ! (4_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (6_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (6_0) hy0 ? 0x7ff00000
+ bge,pn %icc,.update54 ! (6_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36;
+
+ fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (6_0) j0 = diff0 >> 31;
+ st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0;
+
+ and %o1,%o3,%o1 ! (6_0) j0 &= diff0;
+ cmp %o7,_0x00100000 ! (6_0) hx0 ? 0x00100000
+ bl,pn %icc,.update55 ! (6_0) if ( hx0 < 0x00100000 )
+ fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0);
+.cont55a:
+ fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0;
+ sub %l7,%o1,%o4 ! (6_0) j0 = hy0 - j0;
+ stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (6_0) j0 &= 0x7ff00000;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
+ sub %l0,%o4,%g1 ! (6_0) j0 = 0x7ff00000 - j0;
+ nop
+ fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
+.cont55b:
+ fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0;
+ sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll;
+ faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0;
+ nop
+ nop
+ fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0;
+.cont56:
+ fmuld %f62,%f2,%f2 ! (4_0) res0_lo *= x_lo0;
+ nop
+ ldd [%fp+dtmp11],%f62 ! (5_0) *(long long*)&scl0 = ll;
+ faddd %f0,%f20,%f32 ! (4_0) res0_hi += dtmp0;
+
+ lda [%i4]%asi,%f0 ! (5_0) ((float*)&x0)[0] = ((float*)px)[0];
+ nop
+ nop
+ fmuld %f52,%f10,%f10 ! (2_0) dd *= dtmp0;
+
+ lda [%i4+4]%asi,%f1 ! (5_0) ((float*)&x0)[1] = ((float*)px)[1];
+ nop
+ nop
+ fsubd DONE,%f50,%f52 ! (0_0) dtmp0 = DONE - dtmp0;
+
+ fmuld %f46,%f60,%f46 ! (4_0) dtmp1 *= y_lo0;
+ nop
+ lda [%i3]%asi,%f12 ! (5_0) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f18,%f18 ! (1_0) dtmp2 = DTWO - dtmp2;
+
+ nop
+ add %i1,stridex,%i4 ! px += stridex
+ lda [%i3+4]%asi,%f13 ! (5_0) ((float*)&y0)[1] = ((float*)py)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f10,%f14,%f50 ! (2_0) dtmp1 = dd * dres;
+ add %i4,stridex,%i1 ! px += stridex
+ ld [%fp+ftmp0],%o2 ! (3_0) iarr = ((int*)&dres)[0];
+ fand %f22,DA1,%f54 ! (3_0) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f0,%f62,%f60 ! (5_0) x0 *= scl0;
+ nop
+ ldd [%fp+dtmp14],%f0 ! (6_1) *(long long*)&scl0 = ll;
+ fsubd %f52,%f36,%f20 ! (0_0) dtmp0 -= dtmp1;
+
+ fmuld %f12,%f62,%f52 ! (5_0) y0 *= scl0;
+ sra %o2,11,%i3 ! (3_0) iarr >>= 11;
+ nop
+ faddd %f2,%f46,%f36 ! (4_0) res0_lo += dtmp1;
+
+ and %i3,0x1fc,%i3 ! (3_0) iarr &= 0x1fc;
+ nop
+ bn,pn %icc,.exit
+ fmuld %f16,%f18,%f16 ! (1_0) dres = dd * dtmp2;
+
+ fsqrtd %f48,%f18 ! (7_1) res0 = sqrt ( res0 );
+ add %i3,TBL,%o4 ! (3_0) (char*)dll1 + iarr
+ lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px;
+ fsubd DTWO,%f50,%f46 ! (2_0) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f20,%f26,%f48 ! (0_0) dtmp0 *= dres;
+ add %i0,stridey,%i3 ! py += stridey
+ ld [%o4],%f20 ! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+ faddd %f60,D2ON36,%f50 ! (5_0) x_hi0 = x0 + D2ON36;
+
+ nop
+ add %i3,stridey,%i0 ! py += stridey
+ lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py;
+ faddd %f52,D2ON36,%f12 ! (5_0) y_hi0 = y0 + D2ON36;
+
+ fmuld %f0,%f24,%f2 ! (6_1) res0 = scl0 * res0;
+ and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff;
+ nop
+ faddd %f32,%f36,%f24 ! (4_0) dres = res0_hi + res0_lo;
+
+ fmuld %f10,%f46,%f26 ! (2_0) dd *= dtmp1;
+ cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000
+ st %f24,[%fp+ftmp0] ! (4_0) iarr = ((int*)&dres)[0];
+ fpsub32 %f20,%f54,%f10 ! (3_0) dd = vis_fpsub32(dtmp0, dexp0);
+
+ and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff;
+ st %f2,[%i5] ! (6_1) ((float*)pz)[0] = ((float*)&res0)[0];
+ bge,pn %icc,.update57 ! (7_0) if ( hx0 >= 0x7ff00000 )
+ fsubd %f50,D2ON36,%f20 ! (5_0) x_hi0 -= D2ON36;
+
+ sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0;
+ cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000
+ bge,pn %icc,.update58 ! (7_0) if ( hy0 >= 0x7ff00000 )
+ fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36;
+
+ fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres;
+ sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31;
+ st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0;
+
+ and %o1,%o3,%o1 ! (7_0) j0 &= diff0;
+ cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000
+ bl,pn %icc,.update59 ! (7_0) if ( hx0 < 0x00100000 )
+ fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0);
+.cont59a:
+ fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0;
+ sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0;
+ stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll;
+ fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0;
+ and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
+ sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0;
+ nop
+ fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
+.cont59b:
+ fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0;
+ sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll;
+ faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0;
+ nop
+ nop
+ fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0;
+.cont60:
+ fmuld %f62,%f2,%f2 ! (5_0) res0_lo *= x_lo0;
+ nop
+ ldd [%fp+dtmp13],%f62 ! (6_0) *(long long*)&scl0 = ll;
+ faddd %f0,%f46,%f42 ! (5_0) res0_hi += dtmp0;
+
+ fmuld %f10,%f20,%f52 ! (3_0) dd *= dtmp0;
+ nop
+ lda [%i2]%asi,%f10 ! (6_0) ((float*)&x0)[0] = ((float*)px)[0];
+ bn,pn %icc,.exit
+
+ lda [%i2+4]%asi,%f11 ! (6_0) ((float*)&x0)[1] = ((float*)px)[1];
+ nop
+ nop
+ fsubd DONE,%f60,%f60 ! (1_0) dtmp0 = DONE - dtmp0;
+
+ fmuld %f50,%f54,%f46 ! (5_0) dtmp1 *= y_lo0;
+ nop
+ lda [%o0]%asi,%f12 ! (6_0) ((float*)&y0)[0] = ((float*)py)[0];
+ fsubd DTWO,%f14,%f14 ! (2_0) dtmp2 = DTWO - dtmp2;
+
+ nop
+ nop
+ lda [%o0+4]%asi,%f13 ! (6_0) ((float*)&y0)[1] = ((float*)py)[1];
+ bn,pn %icc,.exit
+
+ fmuld %f52,%f22,%f50 ! (3_0) dtmp1 = dd * dres;
+ nop
+ ld [%fp+ftmp0],%o2 ! (4_0) iarr = ((int*)&dres)[0];
+ fand %f24,DA1,%f54 ! (4_0) dexp0 = vis_fand(dres,DA1);
+
+ fmuld %f10,%f62,%f10 ! (6_0) x0 *= scl0;
+ nop
+ ldd [%fp+dtmp0],%f0 ! (7_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f34,%f20 ! (1_0) dtmp0 -= dtmp1;
+
+ fmuld %f12,%f62,%f60 ! (6_0) y0 *= scl0;
+ sra %o2,11,%o4 ! (4_0) iarr >>= 11;
+ nop
+ faddd %f2,%f46,%f34 ! (5_0) res0_lo += dtmp1;
+
+ and %o4,0x1fc,%o4 ! (4_0) iarr &= 0x1fc;
+ subcc counter,8,counter ! counter -= 8;
+ bpos,pt %icc,.main_loop
+ fmuld %f26,%f14,%f26 ! (2_0) dres = dd * dtmp2;
+
+ add counter,8,counter
+
+.tail:
+ subcc counter,1,counter
+ bneg .begin
+ nop
+
+ fsqrtd %f48,%f14 ! (0_1) res0 = sqrt ( res0 );
+ add %o4,TBL,%o4 ! (4_1) (char*)dll1 + iarr
+ fsubd DTWO,%f50,%f46 ! (3_1) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f20,%f16,%f48 ! (1_1) dtmp0 *= dres;
+ ld [%o4],%f20 ! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+
+ fmuld %f0,%f18,%f0 ! (7_2) res0 = scl0 * res0;
+ st %f0,[%i5] ! (7_2) ((float*)pz)[0] = ((float*)&res0)[0];
+ faddd %f42,%f34,%f16 ! (5_1) dres = res0_hi + res0_lo;
+
+ subcc counter,1,counter
+ st %f1,[%i5+4] ! (7_2) ((float*)pz)[1] = ((float*)&res0)[1];
+ bneg .begin
+ add %i5,stridez,%i5 ! pz += stridez
+
+ fmuld %f52,%f46,%f18 ! (3_1) dd *= dtmp1;
+ st %f16,[%fp+ftmp0] ! (5_1) iarr = ((int*)&dres)[0];
+ fpsub32 %f20,%f54,%f54 ! (4_1) dd = vis_fpsub32(dtmp0, dexp0);
+
+ fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres;
+ faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0;
+
+
+ fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres;
+ fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f30,%f48,%f12 ! (2_1) dtmp0 = res0_hi * res0;
+
+ fmuld %f40,%f48,%f40 ! (2_1) dtmp1 = res0_lo * res0;
+
+ fmuld %f54,%f20,%f54 ! (4_1) dd *= dtmp0;
+
+ fsubd DONE,%f12,%f60 ! (2_1) dtmp0 = DONE - dtmp0;
+
+ fsubd DTWO,%f22,%f22 ! (3_1) dtmp2 = DTWO - dtmp2;
+
+ fmuld %f54,%f24,%f50 ! (4_1) dtmp1 = dd * dres;
+ ld [%fp+ftmp0],%o2 ! (5_1) iarr = ((int*)&dres)[0];
+ fand %f16,DA1,%f2 ! (5_1) dexp0 = vis_fand(dres,DA1);
+
+ ldd [%fp+dtmp2],%f0 ! (0_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f40,%f20 ! (2_1) dtmp0 -= dtmp1;
+
+ sra %o2,11,%i3 ! (5_1) iarr >>= 11;
+
+ and %i3,0x1fc,%i3 ! (5_1) iarr &= 0x1fc;
+ fmuld %f18,%f22,%f28 ! (3_1) dres = dd * dtmp2;
+
+ fsqrtd %f52,%f22 ! (1_1) res0 = sqrt ( res0 );
+ add %i3,TBL,%g1 ! (5_1) (char*)dll1 + iarr
+ fsubd DTWO,%f50,%f62 ! (4_1) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f20,%f26,%f52 ! (2_1) dtmp0 *= dres;
+ ld [%g1],%f26 ! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
+
+ fmuld %f0,%f14,%f0 ! (0_1) res0 = scl0 * res0;
+
+ fmuld %f54,%f62,%f14 ! (4_1) dd *= dtmp1;
+ fpsub32 %f26,%f2,%f26 ! (5_1) dd = vis_fpsub32(dtmp0, dexp0);
+
+ st %f0,[%i5] ! (0_1) ((float*)pz)[0] = ((float*)&res0)[0];
+
+ fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres;
+ st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0;
+
+ subcc counter,1,counter
+ bneg .begin
+ add %i5,stridez,%i5 ! pz += stridez
+
+ fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres;
+
+ fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0;
+
+ fsubd DONE,%f10,%f60 ! (3_1) dtmp0 = DONE - dtmp0;
+ fmuld %f26,%f20,%f54 ! (5_1) dd *= dtmp0;
+
+ fsubd DTWO,%f24,%f24 ! (4_1) dtmp2 = DTWO - dtmp2;
+
+ fmuld %f54,%f16,%f46 ! (5_1) dtmp1 = dd * dres;
+
+ ldd [%fp+dtmp4],%f50 ! (1_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f38,%f20 ! (3_1) dtmp0 -= dtmp1;
+
+ fmuld %f14,%f24,%f26 ! (4_1) dres = dd * dtmp2;
+
+ fsqrtd %f52,%f24 ! (2_1) res0 = sqrt ( res0 );
+ fsubd DTWO,%f46,%f62 ! (5_1) dtmp1 = DTWO - dtmp1;
+
+ fmuld %f20,%f28,%f52 ! (3_1) dtmp0 *= dres;
+
+ fmuld %f50,%f22,%f0 ! (1_1) res0 = scl0 * res0;
+
+ fmuld %f54,%f62,%f22 ! (5_1) dd *= dtmp1;
+
+ st %f0,[%i5] ! (1_1) ((float*)pz)[0] = ((float*)&res0)[0];
+
+ subcc counter,1,counter
+ st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ bneg .begin
+ add %i5,stridez,%i5 ! pz += stridez
+
+ faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0;
+
+ fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0;
+
+ fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres;
+
+ fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0;
+
+ fsubd DONE,%f10,%f60 ! (4_1) dtmp0 = DONE - dtmp0;
+
+ fsubd DTWO,%f16,%f16 ! (5_1) dtmp2 = DTWO - dtmp2;
+
+ ldd [%fp+dtmp6],%f50 ! (2_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f36,%f20 ! (4_1) dtmp0 -= dtmp1;
+
+ fmuld %f22,%f16,%f28 ! (5_1) dres = dd * dtmp2;
+
+ fsqrtd %f52,%f16 ! (3_1) res0 = sqrt ( res0 );
+
+ fmuld %f20,%f26,%f52 ! (4_1) dtmp0 *= dres;
+
+ fmuld %f50,%f24,%f0 ! (2_1) res0 = scl0 * res0;
+
+ st %f0,[%i5] ! (2_1) ((float*)pz)[0] = ((float*)&res0)[0];
+
+ st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0;
+
+ subcc counter,1,counter
+ bneg .begin
+ add %i5,stridez,%i5 ! pz += stridez
+
+ fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0;
+
+ fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0;
+
+ fsubd DONE,%f10,%f60 ! (5_1) dtmp0 = DONE - dtmp0;
+
+ ldd [%fp+dtmp8],%f18 ! (3_1) *(long long*)&scl0 = ll;
+ fsubd %f60,%f34,%f46 ! (5_1) dtmp0 -= dtmp1;
+
+ fsqrtd %f52,%f24 ! (4_1) res0 = sqrt ( res0 );
+
+ fmuld %f46,%f28,%f52 ! (5_1) dtmp0 -= dtmp1;
+
+ fmuld %f18,%f16,%f0 ! (3_1) res0 = scl0 * res0;
+ st %f0,[%i5] ! (3_1) ((float*)pz)[0] = ((float*)&res0)[0];
+ st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0;
+
+ subcc counter,1,counter
+ bneg .begin
+ add %i5,stridez,%i5 ! pz += stridez
+
+ ldd [%fp+dtmp10],%f14 ! (4_1) *(long long*)&scl0 = ll;
+
+ fsqrtd %f52,%f16 ! (5_1) res0 = sqrt ( res0 );
+
+ fmuld %f14,%f24,%f0 ! (4_1) res0 = scl0 * res0
+ st %f0,[%i5] ! (4_1) ((float*)pz)[0] = ((float*)&res0)[0];
+ st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
+
+ subcc counter,1,counter
+ bneg .begin
+ add %i5,stridez,%i5 ! pz += stridez
+
+ ldd [%fp+dtmp12],%f22 ! (5_1) *(long long*)&scl0 = ll;
+
+ fmuld %f22,%f16,%f0 ! (5_1) res0 = scl0 * res0;
+ st %f0,[%i5] ! (5_1) ((float*)pz)[0] = ((float*)&res0)[0];
+ st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
+
+ ba .begin
+ add %i5,stridez,%i5
+
+ .align 16
+.spec0:
+ cmp %o7,_0x7ff00000 ! hx0 ? 0x7ff00000
+ bne 1f ! if ( hx0 != 0x7ff00000 )
+ ld [%i4+4],%i2 ! lx = ((int*)px)[1];
+
+ cmp %i2,0 ! lx ? 0
+ be 3f ! if ( lx == 0 )
+ nop
+1:
+ cmp %l7,_0x7ff00000 ! hy0 ? 0x7ff00000
+ bne 2f ! if ( hy0 != 0x7ff00000 )
+ ld [%i3+4],%o2 ! ly = ((int*)py)[1];
+
+ cmp %o2,0 ! ly ? 0
+ be 3f ! if ( ly == 0 )
+2:
+ ld [%i4],%f0 ! ((float*)&x0)[0] = ((float*)px)[0];
+ ld [%i4+4],%f1 ! ((float*)&x0)[1] = ((float*)px)[1];
+
+ ld [%i3],%f2 ! ((float*)&y0)[0] = ((float*)py)[0];
+ add %i4,stridex,%i4 ! px += stridex
+ ld [%i3+4],%f3 ! ((float*)&y0)[1] = ((float*)py)[1];
+
+ fabsd %f0,%f0
+
+ fabsd %f2,%f2
+
+ fmuld %f0,%f2,%f0 ! res0 = fabs(x0) * fabs(y0);
+ add %i3,stridey,%i3 ! py += stridey;
+ st %f0,[%i5] ! ((float*)pz)[0] = ((float*)&res0)[0];
+
+ st %f1,[%i5+4] ! ((float*)pz)[1] = ((float*)&res0)[1];
+ add %i5,stridez,%i5 ! pz += stridez
+ ba .begin1
+ sub counter,1,counter
+3:
+ add %i4,stridex,%i4 ! px += stridex
+ add %i3,stridey,%i3 ! py += stridey
+ st %g0,[%i5] ! ((int*)pz)[0] = 0;
+
+ add %i5,stridez,%i5 ! pz += stridez;
+ st %g0,[%i5+4] ! ((int*)pz)[1] = 0;
+ ba .begin1
+ sub counter,1,counter
+
+ .align 16
+.spec1:
+ and %o1,%o3,%o1 ! (7_0) j0 &= diff0;
+
+ cmp %l7,_0x00100000 ! (7_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont_spec0 ! (7_0) if ( hy0 < 0x00100000 )
+
+ ld [%i4+4],%i2 ! lx = ((int*)px)[1];
+ or %o7,%l7,%g5 ! ii = hx0 | hy0;
+ fzero %f0
+
+ ld [%i3+4],%o2 ! ly = ((int*)py)[1];
+ or %i2,%g5,%g5 ! ii |= lx;
+
+ orcc %o2,%g5,%g5 ! ii |= ly;
+ bnz,a,pn %icc,1f ! if ( ii != 0 )
+ sethi %hi(0x00080000),%i2
+
+ fdivd DONE,%f0,%f0 ! res0 = 1.0 / 0.0;
+
+ st %f0,[%i5] ! ((float*)pz)[0] = ((float*)&res0)[0];
+
+ add %i4,stridex,%i4 ! px += stridex;
+ add %i3,stridey,%i3 ! py += stridey;
+ st %f1,[%i5+4] ! ((float*)pz)[1] = ((float*)&res0)[1];
+
+ add %i5,stridez,%i5 ! pz += stridez;
+ ba .begin1
+ sub counter,1,counter
+1:
+ ld [%i4],%f0 ! ((float*)&x0)[0] = ((float*)px)[0];
+
+ ld [%i4+4],%f1 ! ((float*)&x0)[1] = ((float*)px)[1];
+
+ ld [%i3],%f2 ! ((float*)&y0)[0] = ((float*)py)[0];
+
+ fabsd %f0,%f0 ! x0 = fabs(x0);
+ ld [%i3+4],%f3 ! ((float*)&y0)[1] = ((float*)py)[1];
+
+ ldd [TBL+TBL_SHIFT+64],%f12 ! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL;
+ add %fp,dtmp2,%i4
+ add %fp,dtmp3,%i3
+
+ fabsd %f2,%f2 ! y0 = fabs(y0);
+ ldd [TBL+TBL_SHIFT+56],%f10 ! D2ON51
+
+ ldx [TBL+TBL_SHIFT+48],%g5 ! D2ONM52
+ cmp %o7,%i2 ! hx0 ? 0x00080000
+ bl,a 1f ! if ( hx0 < 0x00080000 )
+ fxtod %f0,%f0 ! x0 = *(long long*)&x0;
+
+ fand %f0,%f12,%f0 ! x0 = vis_fand(x0, dtmp0);
+ fxtod %f0,%f0 ! x0 = *(long long*)&x0;
+ faddd %f0,%f10,%f0 ! x0 += D2ON51;
+1:
+ std %f0,[%i4]
+
+ ldx [TBL+TBL_SHIFT+40],%g1 ! D2ON1022
+ cmp %l7,%i2 ! hy0 ? 0x00080000
+ bl,a 1f ! if ( hy0 < 0x00080000 )
+ fxtod %f2,%f2 ! y0 = *(long long*)&y0;
+
+ fand %f2,%f12,%f2 ! y0 = vis_fand(y0, dtmp0);
+ fxtod %f2,%f2 ! y0 = *(long long*)&y0;
+ faddd %f2,%f10,%f2 ! y0 += D2ON51;
+1:
+ std %f2,[%i3]
+
+ stx %g5,[%fp+dtmp15] ! D2ONM52
+
+ ba .cont_spec1
+ stx %g1,[%fp+dtmp0] ! D2ON1022
+
+ .align 16
+.update0:
+ cmp counter,1
+ ble 1f
+ nop
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 1,counter
+1:
+ sethi %hi(0x3ff00000),%o4
+ add TBL,TBL_SHIFT+24,%i2
+ ba .cont1
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update1:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont0 ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,1
+ ble,a 1f
+ nop
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ mov 1,counter
+ stx %o0,[%fp+tmp_py]
+1:
+ sethi %hi(0x3ff00000),%o4
+ add TBL,TBL_SHIFT+24,%i2
+ ba .cont1
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update2:
+ cmp counter,2
+ ble 1f
+ nop
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 2,counter
+1:
+ fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36;
+
+ fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0;
+
+ sethi %hi(0x3ff00000),%o4
+ add TBL,TBL_SHIFT+24,%i4
+ ba .cont4
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update3:
+ cmp counter,2
+ ble 1f
+ nop
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 2,counter
+1:
+ fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0;
+
+ sethi %hi(0x3ff00000),%o4
+ add TBL,TBL_SHIFT+24,%i4
+ ba .cont4
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update4:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,a,pn %icc,.cont4 ! (0_0) if ( hy0 < 0x00100000 )
+ sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0;
+
+ cmp counter,2
+ ble,a 1f
+ nop
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ mov 2,counter
+ stx %i3,[%fp+tmp_py]
+1:
+ sethi %hi(0x3ff00000),%o4
+ add TBL,TBL_SHIFT+24,%i4
+ ba .cont4
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update5:
+ cmp counter,3
+ ble 1f
+ nop
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 3,counter
+1:
+ st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0];
+ fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36;
+
+ fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36;
+
+ fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0;
+
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+
+ sllx %g1,32,%g1
+ ba .cont8
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update6:
+ cmp counter,3
+ ble 1f
+ nop
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 3,counter
+1:
+ fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0;
+
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+
+ sllx %g1,32,%g1
+ ba .cont8
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update7:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont7 ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,3
+ ble,a 1f
+ nop
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ mov 3,counter
+ stx %o0,[%fp+tmp_py]
+1:
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+
+ sllx %g1,32,%g1
+ ba .cont8
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update9:
+ cmp counter,4
+ ble 1f
+ nop
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 4,counter
+1:
+ st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0];
+ fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36;
+
+ fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36;
+
+ fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres;
+
+
+ fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0;
+
+ fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
+
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ ba .cont12
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update10:
+ cmp counter,4
+ ble 1f
+ nop
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 4,counter
+1:
+ fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres;
+
+
+ fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0;
+
+ fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
+
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ ba .cont12
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update11:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont11 ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,4
+ ble,a 1f
+ nop
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ mov 4,counter
+ stx %i3,[%fp+tmp_py]
+1:
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+
+ fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
+ ba .cont12
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update13:
+ cmp counter,5
+ ble 1f
+ nop
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 5,counter
+1:
+ fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36;
+
+ fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36;
+
+ fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres;
+
+ fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0;
+
+ fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
+
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ ba .cont16
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update14:
+ cmp counter,5
+ ble 1f
+ nop
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 5,counter
+1:
+ fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres;
+
+ fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0;
+
+ fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
+
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ ba .cont16
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update15:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont15 ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,5
+ ble,a 1f
+ nop
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ mov 5,counter
+ stx %o0,[%fp+tmp_py]
+1:
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+
+ fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
+ ba .cont16
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update17:
+ cmp counter,6
+ ble 1f
+ nop
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 6,counter
+1:
+ fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36;
+
+ fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres;
+
+ fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0;
+ fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0;
+
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+
+ sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll;
+ ba .cont20
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update18:
+ cmp counter,6
+ ble 1f
+ nop
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 6,counter
+1:
+ fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres;
+
+ fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0;
+ fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0;
+
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+
+ sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll;
+ ba .cont20
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update19:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont19a ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,6
+ ble,a 1f
+ nop
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ mov 6,counter
+ stx %i3,[%fp+tmp_py]
+1:
+ fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
+
+ ba .cont19b
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update21:
+ cmp counter,7
+ ble 1f
+ nop
+
+ sub counter,7,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 7,counter
+1:
+ fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36;
+
+ fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres;
+ faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0;
+
+ fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0);
+
+ fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
+ fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0;
+ faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll;
+ ba .cont24
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update22:
+ cmp counter,7
+ ble 1f
+ nop
+
+ sub counter,7,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 7,counter
+1:
+ fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres;
+ faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0;
+
+ fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0);
+
+ fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
+ fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0;
+ faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll;
+ ba .cont24
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update23:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont23a ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,7
+ ble,a 1f
+ nop
+
+ sub counter,7,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ mov 7,counter
+ stx %o0,[%fp+tmp_py]
+1:
+ fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
+
+ ba .cont23b
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update25:
+ cmp counter,8
+ ble 1f
+ nop
+
+ sub counter,8,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 8,counter
+1:
+ fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36;
+
+ fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres;
+ faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0;
+
+ fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
+ fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0;
+ faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll;
+ ba .cont28
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update26:
+ cmp counter,8
+ ble 1f
+ nop
+
+ sub counter,8,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 8,counter
+1:
+ fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres;
+ faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0;
+
+ fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
+ fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0;
+ faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll;
+ ba .cont28
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update27:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont27a ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,8
+ ble,a 1f
+ nop
+
+ sub counter,8,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ mov 8,counter
+ stx %i3,[%fp+tmp_py]
+1:
+ fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
+
+ ba .cont27b
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update29:
+ cmp counter,1
+ ble 1f
+ nop
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 1,counter
+1:
+ fsubd %f2,D2ON36,%f2 ! (6_1) y_hi0 -= D2ON36;
+
+ fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres;
+ stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll;
+ faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0;
+
+ fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0;
+
+ fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0;
+
+ fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres;
+ sethi %hi(0x3ff00000),%o4
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0;
+
+ ba .cont32
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update30:
+ cmp counter,1
+ ble 1f
+ nop
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 1,counter
+1:
+ fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres;
+ stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll;
+ faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0;
+
+ fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0;
+
+ fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0;
+
+ fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres;
+ sethi %hi(0x3ff00000),%o4
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0;
+
+ ba .cont32
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update31:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont31 ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,1
+ ble,a 1f
+ nop
+
+ sub counter,1,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ mov 1,counter
+ stx %o0,[%fp+tmp_py]
+1:
+ fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0;
+
+ fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0;
+
+ fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres;
+ sethi %hi(0x3ff00000),%o4
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0;
+
+ ba .cont32
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update33:
+ cmp counter,2
+ ble 1f
+ nop
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 2,counter
+1:
+ st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36;
+
+ fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres;
+ faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0;
+
+ add %i5,stridez,%i5 ! pz += stridez
+ stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll;
+ fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0;
+
+ fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0;
+
+ fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%o4
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0;
+
+ sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32;
+ stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll;
+ ba .cont36
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update34:
+ cmp counter,2
+ ble 1f
+ nop
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 2,counter
+1:
+ add %i5,stridez,%i5 ! pz += stridez
+ stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll;
+ fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0;
+
+ fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0;
+
+ fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%o4
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0;
+
+ sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32;
+ stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll;
+ ba .cont36
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update35:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont35a ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,2
+ ble,a 1f
+ nop
+
+ sub counter,2,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ mov 2,counter
+ stx %i3,[%fp+tmp_py]
+1:
+ fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0;
+ sethi %hi(0x3ff00000),%o4
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0;
+
+ ba .cont35b
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update37:
+ cmp counter,3
+ ble 1f
+ nop
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 3,counter
+1:
+ st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36;
+
+ fmuld %f28,%f18,%f50 ! (6_1) dtmp0 = dd * dres;
+ faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0;
+
+ add %i5,stridez,%i5 ! pz += stridez
+ stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll;
+ fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll;
+ ba .cont40
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update38:
+ cmp counter,3
+ ble 1f
+ nop
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 3,counter
+1:
+ add %i5,stridez,%i5 ! pz += stridez
+ stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll;
+ fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll;
+ ba .cont40
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update39:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont39a ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,3
+ ble,a 1f
+ nop
+
+ sub counter,3,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ mov 3,counter
+ stx %o0,[%fp+tmp_py]
+1:
+ fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0;
+
+ ba .cont39b
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update41:
+ cmp counter,4
+ ble 1f
+ nop
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 4,counter
+1:
+ st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36;
+
+ fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres;
+ faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0;
+
+ add %i5,stridez,%i5 ! pz += stridez
+ stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll;
+ fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0
+
+ sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll;
+ ba .cont44
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update42:
+ cmp counter,4
+ ble 1f
+ nop
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 4,counter
+1:
+ add %i5,stridez,%i5 ! pz += stridez
+ stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll;
+ fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0;
+ fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0;
+ faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0
+
+ sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll;
+ ba .cont44
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update43:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont43a ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,4
+ ble,a 1f
+ nop
+
+ sub counter,4,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ mov 4,counter
+ stx %i3,[%fp+tmp_py]
+1:
+ fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
+
+ ba .cont43b
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update45:
+ cmp counter,5
+ ble 1f
+ nop
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 5,counter
+1:
+ fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36;
+
+ fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres;
+ st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0;
+
+ fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll;
+ ba .cont48
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update46:
+ cmp counter,5
+ ble 1f
+ nop
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 5,counter
+1:
+ fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres;
+ st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0;
+
+ fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll;
+ ba .cont48
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update47:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont47a ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,5
+ ble,a 1f
+ nop
+
+ sub counter,5,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ mov 5,counter
+ stx %o0,[%fp+tmp_py]
+1:
+ fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
+
+ ba .cont47b
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update49:
+ cmp counter,6
+ ble 1f
+ nop
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 6,counter
+1:
+ fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36;
+
+ fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres;
+ st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0;
+
+ fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll;
+ ba .cont52
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update50:
+ cmp counter,6
+ ble 1f
+ nop
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 6,counter
+1:
+ fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres;
+ st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0;
+
+ fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
+ fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres;
+ faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll;
+ ba .cont52
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update51:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont51a ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,6
+ ble,a 1f
+ nop
+
+ sub counter,6,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ mov 6,counter
+ stx %i3,[%fp+tmp_py]
+1:
+ fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
+
+ ba .cont51b
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update53:
+ cmp counter,7
+ ble 1f
+ nop
+
+ sub counter,7,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 7,counter
+1:
+ fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36;
+
+ fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres;
+ st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0;
+
+ fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0);
+
+ fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
+ fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0;
+ faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll;
+ ba .cont56
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update54:
+ cmp counter,7
+ ble 1f
+ nop
+
+ sub counter,7,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ stx %o0,[%fp+tmp_py]
+
+ mov 7,counter
+1:
+ fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres;
+ st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0;
+
+ fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0);
+
+ fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
+ fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0;
+ faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0;
+
+ sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll;
+ ba .cont56
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update55:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont55a ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,7
+ ble,a 1f
+ nop
+
+ sub counter,7,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i2,[%fp+tmp_px]
+
+ mov 7,counter
+ stx %o0,[%fp+tmp_py]
+1:
+ fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll;
+ fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i2
+ fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
+
+ ba .cont55b
+ add TBL,TBL_SHIFT+24,%o0
+
+ .align 16
+.update57:
+ cmp counter,8
+ ble 1f
+ nop
+
+ sub counter,8,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 8,counter
+1:
+ fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36;
+
+ fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres;
+ st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0;
+
+ fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll;
+ fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
+ fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0;
+ faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0;
+ fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0;
+
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+
+ sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll;
+ ba .cont60
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update58:
+ cmp counter,8
+ ble 1f
+ nop
+
+ sub counter,8,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ stx %i3,[%fp+tmp_py]
+
+ mov 8,counter
+1:
+ fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres;
+ st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1];
+ faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0;
+
+ fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0);
+
+ fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll;
+ fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
+ fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
+
+ fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0;
+ faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0;
+
+ fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0;
+ fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0;
+
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+
+ sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32;
+ stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll;
+ ba .cont60
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.update59:
+ cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
+ bge,pn %icc,.cont59a ! (0_0) if ( hy0 < 0x00100000 )
+
+ cmp counter,8
+ ble,a 1f
+ nop
+
+ sub counter,8,counter
+ st counter,[%fp+tmp_counter]
+
+ stx %i4,[%fp+tmp_px]
+
+ mov 8,counter
+ stx %i3,[%fp+tmp_py]
+1:
+ fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0;
+ stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll;
+ fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0;
+
+ fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0;
+ add %i5,stridez,%i5 ! pz += stridez
+ faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0;
+
+ fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
+ sethi %hi(0x3ff00000),%g1
+ add TBL,TBL_SHIFT+24,%i4
+ fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
+
+ ba .cont59b
+ add TBL,TBL_SHIFT+24,%i3
+
+ .align 16
+.exit:
+ ret
+ restore
+ SET_SIZE(__vrhypot)
+