diff options
Diffstat (limited to 'usr/src/libm/src/mvec/vis/__vrhypot.S')
-rw-r--r-- | usr/src/libm/src/mvec/vis/__vrhypot.S | 3878 |
1 files changed, 3878 insertions, 0 deletions
diff --git a/usr/src/libm/src/mvec/vis/__vrhypot.S b/usr/src/libm/src/mvec/vis/__vrhypot.S new file mode 100644 index 0000000..07954d6 --- /dev/null +++ b/usr/src/libm/src/mvec/vis/__vrhypot.S @@ -0,0 +1,3878 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .ident "@(#)__vrhypot.S 1.7 06/01/23 SMI" + + .file "__vrhypot.S" + +#include "libm.h" + + RO_DATA + .align 64 + +.CONST_TBL: + .word 0x7fe00000, 0x7fdfc07f, 0x7fdf81f8, 0x7fdf4465, + .word 0x7fdf07c1, 0x7fdecc07, 0x7fde9131, 0x7fde573a, + .word 0x7fde1e1e, 0x7fdde5d6, 0x7fddae60, 0x7fdd77b6, + .word 0x7fdd41d4, 0x7fdd0cb5, 0x7fdcd856, 0x7fdca4b3, + .word 0x7fdc71c7, 0x7fdc3f8f, 0x7fdc0e07, 0x7fdbdd2b, + .word 0x7fdbacf9, 0x7fdb7d6c, 0x7fdb4e81, 0x7fdb2036, + .word 0x7fdaf286, 0x7fdac570, 0x7fda98ef, 0x7fda6d01, + .word 0x7fda41a4, 0x7fda16d3, 0x7fd9ec8e, 0x7fd9c2d1, + .word 0x7fd99999, 0x7fd970e4, 0x7fd948b0, 0x7fd920fb, + .word 0x7fd8f9c1, 0x7fd8d301, 0x7fd8acb9, 0x7fd886e5, + .word 0x7fd86186, 0x7fd83c97, 0x7fd81818, 0x7fd7f405, + .word 0x7fd7d05f, 0x7fd7ad22, 0x7fd78a4c, 0x7fd767dc, + .word 0x7fd745d1, 0x7fd72428, 0x7fd702e0, 0x7fd6e1f7, + .word 0x7fd6c16c, 0x7fd6a13c, 0x7fd68168, 0x7fd661ec, + .word 0x7fd642c8, 0x7fd623fa, 0x7fd60581, 0x7fd5e75b, + .word 0x7fd5c988, 0x7fd5ac05, 0x7fd58ed2, 0x7fd571ed, + .word 0x7fd55555, 0x7fd53909, 0x7fd51d07, 0x7fd50150, + .word 0x7fd4e5e0, 0x7fd4cab8, 0x7fd4afd6, 0x7fd49539, + .word 0x7fd47ae1, 0x7fd460cb, 0x7fd446f8, 0x7fd42d66, + .word 0x7fd41414, 0x7fd3fb01, 0x7fd3e22c, 0x7fd3c995, + .word 0x7fd3b13b, 0x7fd3991c, 0x7fd38138, 0x7fd3698d, + .word 0x7fd3521c, 0x7fd33ae4, 0x7fd323e3, 0x7fd30d19, + .word 0x7fd2f684, 0x7fd2e025, 0x7fd2c9fb, 0x7fd2b404, + .word 0x7fd29e41, 0x7fd288b0, 0x7fd27350, 0x7fd25e22, + .word 0x7fd24924, 0x7fd23456, 0x7fd21fb7, 0x7fd20b47, + .word 0x7fd1f704, 0x7fd1e2ef, 0x7fd1cf06, 0x7fd1bb4a, + .word 0x7fd1a7b9, 0x7fd19453, 0x7fd18118, 0x7fd16e06, + .word 0x7fd15b1e, 0x7fd1485f, 0x7fd135c8, 0x7fd12358, + .word 0x7fd11111, 0x7fd0fef0, 0x7fd0ecf5, 0x7fd0db20, + .word 0x7fd0c971, 0x7fd0b7e6, 0x7fd0a681, 0x7fd0953f, + .word 0x7fd08421, 0x7fd07326, 0x7fd0624d, 0x7fd05197, + .word 0x7fd04104, 0x7fd03091, 0x7fd02040, 0x7fd01010, + + .word 0x42300000, 0 ! D2ON36 = 2**36 + .word 0xffffff00, 0 ! DA0 + .word 0xfff00000, 0 ! DA1 + .word 0x3ff00000, 0 ! DONE = 1.0 + .word 0x40000000, 0 ! DTWO = 2.0 + .word 0x7fd00000, 0 ! D2ON1022 + .word 0x3cb00000, 0 ! D2ONM52 + .word 0x43200000, 0 ! D2ON51 + .word 0x0007ffff, 0xffffffff ! 0x0007ffffffffffff + +#define stridex %l2 +#define stridey %l3 +#define stridez %l5 + +#define TBL_SHIFT 512 + +#define TBL %l1 +#define counter %l4 + +#define _0x7ff00000 %l0 +#define _0x00100000 %o5 +#define _0x7fffffff %l6 + +#define D2ON36 %f4 +#define DTWO %f6 +#define DONE %f8 +#define DA0 %f58 +#define DA1 %f56 + +#define dtmp0 STACK_BIAS-0x80 +#define dtmp1 STACK_BIAS-0x78 +#define dtmp2 STACK_BIAS-0x70 +#define dtmp3 STACK_BIAS-0x68 +#define dtmp4 STACK_BIAS-0x60 +#define dtmp5 STACK_BIAS-0x58 +#define dtmp6 STACK_BIAS-0x50 +#define dtmp7 STACK_BIAS-0x48 +#define dtmp8 STACK_BIAS-0x40 +#define dtmp9 STACK_BIAS-0x38 +#define dtmp10 STACK_BIAS-0x30 +#define dtmp11 STACK_BIAS-0x28 +#define dtmp12 STACK_BIAS-0x20 +#define dtmp13 STACK_BIAS-0x18 +#define dtmp14 STACK_BIAS-0x10 +#define dtmp15 STACK_BIAS-0x08 + +#define ftmp0 STACK_BIAS-0x100 +#define tmp_px STACK_BIAS-0x98 +#define tmp_py STACK_BIAS-0x90 +#define tmp_counter STACK_BIAS-0x88 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x100 + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! !!!!! algorithm !!!!! +! hx0 = *(int*)px; +! hy0 = *(int*)py; +! +! ((float*)&x0)[0] = ((float*)px)[0]; +! ((float*)&x0)[1] = ((float*)px)[1]; +! ((float*)&y0)[0] = ((float*)py)[0]; +! ((float*)&y0)[1] = ((float*)py)[1]; +! +! hx0 &= 0x7fffffff; +! hy0 &= 0x7fffffff; +! +! diff0 = hy0 - hx0; +! j0 = diff0 >> 31; +! j0 &= diff0; +! j0 = hy0 - j0; +! j0 &= 0x7ff00000; +! +! j0 = 0x7ff00000 - j0; +! ll = (long long)j0 << 32; +! *(long long*)&scl0 = ll; +! +! if ( hx0 >= 0x7ff00000 || hy0 >= 0x7ff00000 ) +! { +! lx = ((int*)px)[1]; +! ly = ((int*)py)[1]; +! +! if ( hx0 == 0x7ff00000 && lx == 0 ) res0 = 0.0; +! else if ( hy0 == 0x7ff00000 && ly == 0 ) res0 = 0.0; +! else res0 = fabs(x0) * fabs(y0); +! +! ((float*)pz)[0] = ((float*)&res0)[0]; +! ((float*)pz)[1] = ((float*)&res0)[1]; +! +! px += stridex; +! py += stridey; +! pz += stridez; +! continue; +! } +! if ( hx0 < 0x00100000 && hy0 < 0x00100000 ) +! { +! lx = ((int*)px)[1]; +! ly = ((int*)py)[1]; +! ii = hx0 | hy0; +! ii |= lx; +! ii |= ly; +! if ( ii == 0 ) +! { +! res0 = 1.0 / 0.0; +! ((float*)pz)[0] = ((float*)&res0)[0]; +! ((float*)pz)[1] = ((float*)&res0)[1]; +! +! px += stridex; +! py += stridey; +! pz += stridez; +! continue; +! } +! x0 = fabs(x0); +! y0 = fabs(y0); +! if ( hx0 < 0x00080000 ) +! { +! x0 = *(long long*)&x0; +! } +! else +! { +! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL; +! x0 = vis_fand(x0, dtmp0); +! x0 = *(long long*)&x0; +! x0 += D2ON51; +! } +! x0 *= D2ONM52; +! if ( hy0 < 0x00080000 ) +! { +! y0 = *(long long*)&y0; +! } +! else +! { +! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL; +! y0 = vis_fand(y0, dtmp0); +! y0 = *(long long*)&y0; +! y0 += D2ON51; +! } +! y0 *= D2ONM52; +! *(long long*)&scl0 = 0x7fd0000000000000ULL; +! } +! else +! { +! x0 *= scl0; +! y0 *= scl0; +! } +! +! x_hi0 = x0 + D2ON36; +! y_hi0 = y0 + D2ON36; +! x_hi0 -= D2ON36; +! y_hi0 -= D2ON36; +! x_lo0 = x0 - x_hi0; +! y_lo0 = y0 - y_hi0; +! res0_hi = x_hi0 * x_hi0; +! dtmp0 = y_hi0 * y_hi0; +! res0_hi += dtmp0; +! res0_lo = x0 + x_hi0; +! res0_lo *= x_lo0; +! dtmp1 = y0 + y_hi0; +! dtmp1 *= y_lo0; +! res0_lo += dtmp1; +! +! dres = res0_hi + res0_lo; +! dexp0 = vis_fand(dres,DA1); +! iarr = ((int*)&dres)[0]; +! +! iarr >>= 11; +! iarr &= 0x1fc; +! dtmp0 = ((double*)((char*)dll1 + iarr))[0]; +! dd = vis_fpsub32(dtmp0, dexp0); +! +! dtmp0 = dd * dres; +! dtmp0 = DTWO - dtmp0; +! dd *= dtmp0; +! dtmp1 = dd * dres; +! dtmp1 = DTWO - dtmp1; +! dd *= dtmp1; +! dtmp2 = dd * dres; +! dtmp2 = DTWO - dtmp2; +! dres = dd * dtmp2; +! +! res0 = vis_fand(dres,DA0); +! +! dtmp0 = res0_hi * res0; +! dtmp0 = DONE - dtmp0; +! dtmp1 = res0_lo * res0; +! dtmp0 -= dtmp1; +! dtmp0 *= dres; +! res0 += dtmp0; +! +! res0 = sqrt ( res0 ); +! +! res0 = scl0 * res0; +! +! ((float*)pz)[0] = ((float*)&res0)[0]; +! ((float*)pz)[1] = ((float*)&res0)[1]; +! +! px += stridex; +! py += stridey; +! pz += stridez; +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ENTRY(__vrhypot) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,l1) + wr %g0,0x82,%asi + +#ifdef __sparcv9 + ldx [%fp+STACK_BIAS+176],stridez +#else + ld [%fp+STACK_BIAS+92],stridez +#endif + + sll %i2,3,stridex + sethi %hi(0x7ff00000),_0x7ff00000 + st %i0,[%fp+tmp_counter] + + sll %i4,3,stridey + sethi %hi(0x00100000),_0x00100000 + stx %i1,[%fp+tmp_px] + + sll stridez,3,stridez + sethi %hi(0x7ffffc00),_0x7fffffff + stx %i3,[%fp+tmp_py] + + ldd [TBL+TBL_SHIFT],D2ON36 + add _0x7fffffff,1023,_0x7fffffff + + ldd [TBL+TBL_SHIFT+8],DA0 + + ldd [TBL+TBL_SHIFT+16],DA1 + + ldd [TBL+TBL_SHIFT+24],DONE + + ldd [TBL+TBL_SHIFT+32],DTWO + +.begin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_px],%i4 + ldx [%fp+tmp_py],%i3 + st %g0,[%fp+tmp_counter] +.begin1: + cmp counter,0 + ble,pn %icc,.exit + + lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px; + add %i4,stridex,%i1 + + lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py; + add %i3,stridey,%i0 ! py += stridey + + and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff; + + cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000 + bge,pn %icc,.spec0 ! (7_0) if ( hx0 >= 0x7ff00000 ) + and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff; + + cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000 + bge,pn %icc,.spec0 ! (7_0) if ( hy0 >= 0x7ff00000 ) + sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0; + + sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31; + cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000 + bl,pn %icc,.spec1 ! (7_0) if ( hx0 < 0x00100000 ) + + and %o1,%o3,%o1 ! (7_0) j0 &= diff0; +.cont_spec0: + sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0; + + and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000; + + sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0; + + sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; + + stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; + + stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; +.cont_spec1: + lda [%i1]0x82,%o1 ! (0_0) hx0 = *(int*)px; + mov %i1,%i2 + + lda [%i0]0x82,%o4 ! (0_0) hy0 = *(int*)py; + + and %o1,_0x7fffffff,%o7 ! (0_0) hx0 &= 0x7fffffff; + mov %i0,%o0 + + cmp %o7,_0x7ff00000 ! (0_0) hx0 ? 0x7ff00000 + bge,pn %icc,.update0 ! (0_0) if ( hx0 >= 0x7ff00000 ) + and %o4,_0x7fffffff,%l7 ! (0_0) hy0 &= 0x7fffffff; + + cmp %l7,_0x7ff00000 ! (0_0) hy0 ? 0x7ff00000 + sub %l7,%o7,%o1 ! (0_0) diff0 = hy0 - hx0; + bge,pn %icc,.update0 ! (0_0) if ( hy0 >= 0x7ff00000 ) + sra %o1,31,%o3 ! (0_0) j0 = diff0 >> 31; + + cmp %o7,_0x00100000 ! (0_0) hx0 ? 0x00100000 + + and %o1,%o3,%o1 ! (0_0) j0 &= diff0; + bl,pn %icc,.update1 ! (0_0) if ( hx0 < 0x00100000 ) + sub %l7,%o1,%o4 ! (0_0) j0 = hy0 - j0; +.cont0: + and %o4,%l0,%o4 ! (0_0) j0 &= 0x7ff00000; + + sub %l0,%o4,%o4 ! (0_0) j0 = 0x7ff00000 - j0; +.cont1: + sllx %o4,32,%o4 ! (0_0) ll = (long long)j0 << 32; + stx %o4,[%fp+dtmp1] ! (0_0) *(long long*)&scl0 = ll; + + ldd [%fp+dtmp15],%f62 ! (7_1) *(long long*)&scl0 = ll; + + lda [%i4]%asi,%f10 ! (7_1) ((float*)&x0)[0] = ((float*)px)[0]; + + lda [%i4+4]%asi,%f11 ! (7_1) ((float*)&x0)[1] = ((float*)px)[1]; + + lda [%i3]%asi,%f12 ! (7_1) ((float*)&y0)[0] = ((float*)py)[0]; + + add %i1,stridex,%i4 ! px += stridex + lda [%i3+4]%asi,%f13 ! (7_1) ((float*)&y0)[1] = ((float*)py)[1]; + + fmuld %f10,%f62,%f10 ! (7_1) x0 *= scl0; + add %i4,stridex,%i1 ! px += stridex + + fmuld %f12,%f62,%f60 ! (7_1) y0 *= scl0; + + lda [%i4]0x82,%o1 ! (1_0) hx0 = *(int*)px; + + add %i0,stridey,%i3 ! py += stridey + faddd %f10,D2ON36,%f46 ! (7_1) x_hi0 = x0 + D2ON36; + + lda [%i3]0x82,%g1 ! (1_0) hy0 = *(int*)py; + add %i3,stridey,%i0 ! py += stridey + faddd %f60,D2ON36,%f50 ! (7_1) y_hi0 = y0 + D2ON36; + + and %o1,_0x7fffffff,%o7 ! (1_0) hx0 &= 0x7fffffff; + + cmp %o7,_0x7ff00000 ! (1_0) hx0 ? 0x7ff00000 + stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; + + and %g1,_0x7fffffff,%l7 ! (1_0) hy0 &= 0x7fffffff; + bge,pn %icc,.update2 ! (1_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f20 ! (7_1) x_hi0 -= D2ON36; + + cmp %l7,_0x7ff00000 ! (1_0) hy0 ? 0x7ff00000 + sub %l7,%o7,%o1 ! (1_0) diff0 = hy0 - hx0; + bge,pn %icc,.update3 ! (1_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; + + sra %o1,31,%o3 ! (1_0) j0 = diff0 >> 31; + + and %o1,%o3,%o1 ! (1_0) j0 &= diff0; + + fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (1_0) j0 = hy0 - j0; + cmp %o7,_0x00100000 ! (1_0) hx0 ? 0x00100000 + fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (1_0) j0 &= 0x7ff00000; + bl,pn %icc,.update4 ! (1_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; + + sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0; +.cont4: + sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; + stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; + + fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; + + fmuld %f62,%f0,%f0 ! (7_1) res0_lo *= x_lo0; + ldd [%fp+dtmp1],%f62 ! (0_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f44 ! (7_1) res0_hi += dtmp0; + + lda [%i2]%asi,%f10 ! (0_0) ((float*)&x0)[0] = ((float*)px)[0]; + + lda [%i2+4]%asi,%f11 ! (0_0) ((float*)&x0)[1] = ((float*)px)[1]; + + fmuld %f50,%f12,%f26 ! (7_1) dtmp1 *= y_lo0; + lda [%o0]%asi,%f12 ! (0_0) ((float*)&y0)[0] = ((float*)py)[0]; + + lda [%o0+4]%asi,%f13 ! (0_0) ((float*)&y0)[1] = ((float*)py)[1]; + + fmuld %f10,%f62,%f10 ! (0_0) x0 *= scl0; + + fmuld %f12,%f62,%f60 ! (0_0) y0 *= scl0; + faddd %f0,%f26,%f38 ! (7_1) res0_lo += dtmp1; + + lda [%i1]0x82,%o1 ! (2_0) hx0 = *(int*)px; + mov %i1,%i2 + + faddd %f10,D2ON36,%f46 ! (0_0) x_hi0 = x0 + D2ON36; + + lda [%i0]0x82,%g1 ! (2_0) hy0 = *(int*)py; + mov %i0,%o0 + faddd %f60,D2ON36,%f12 ! (0_0) y_hi0 = y0 + D2ON36; + + faddd %f44,%f38,%f14 ! (7_1) dres = res0_hi + res0_lo; + and %o1,_0x7fffffff,%o7 ! (2_0) hx0 &= 0x7fffffff; + + cmp %o7,_0x7ff00000 ! (2_0) hx0 ? 0x7ff00000 + bge,pn %icc,.update5 ! (2_0) if ( hx0 >= 0x7ff00000 ) + stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; + + and %g1,_0x7fffffff,%l7 ! (2_0) hx0 &= 0x7fffffff; + st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0]; + fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (2_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (2_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update6 ! (2_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; + + sra %o1,31,%o3 ! (2_0) j0 = diff0 >> 31; + + and %o1,%o3,%o1 ! (2_0) j0 &= diff0; + + fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; + cmp %o7,_0x00100000 ! (2_0) hx0 ? 0x00100000 + sub %l7,%o1,%o4 ! (2_0) j0 = hy0 - j0; + fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (2_0) j0 &= 0x7ff00000; + bl,pn %icc,.update7 ! (2_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; +.cont7: + sub %l0,%o4,%g1 ! (2_0) j0 = 0x7ff00000 - j0; + + sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; +.cont8: + stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; + + fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; + + fmuld %f62,%f0,%f0 ! (0_0) res0_lo *= x_lo0; + ldd [%fp+dtmp3],%f62 ! (1_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f32 ! (0_0) res0_hi += dtmp0; + + lda [%i4]%asi,%f10 ! (1_0) ((float*)&x0)[0] = ((float*)px)[0]; + + lda [%i4+4]%asi,%f11 ! (1_0) ((float*)&x0)[1] = ((float*)px)[1]; + + fmuld %f50,%f12,%f28 ! (0_0) dtmp1 *= y_lo0; + lda [%i3]%asi,%f12 ! (1_0) ((float*)&y0)[0] = ((float*)py)[0]; + + add %i1,stridex,%i4 ! px += stridex + lda [%i3+4]%asi,%f13 ! (1_0) ((float*)&y0)[1] = ((float*)py)[1]; + + ld [%fp+ftmp0],%o2 ! (7_1) iarr = ((int*)&dres)[0]; + add %i4,stridex,%i1 ! px += stridex + fand %f14,DA1,%f2 ! (7_1) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (1_0) x0 *= scl0; + + fmuld %f12,%f62,%f60 ! (1_0) y0 *= scl0; + sra %o2,11,%i3 ! (7_1) iarr >>= 11; + faddd %f0,%f28,%f36 ! (0_0) res0_lo += dtmp1; + + and %i3,0x1fc,%i3 ! (7_1) iarr &= 0x1fc; + + add %i3,TBL,%o4 ! (7_1) (char*)dll1 + iarr + lda [%i4]0x82,%o1 ! (3_0) hx0 = *(int*)px; + + add %i0,stridey,%i3 ! py += stridey + ld [%o4],%f26 ! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (1_0) x_hi0 = x0 + D2ON36; + + lda [%i3]0x82,%o4 ! (3_0) hy0 = *(int*)py; + add %i3,stridey,%i0 ! py += stridey + faddd %f60,D2ON36,%f12 ! (1_0) y_hi0 = y0 + D2ON36; + + faddd %f32,%f36,%f22 ! (0_0) dres = res0_hi + res0_lo; + and %o1,_0x7fffffff,%o7 ! (3_0) hx0 &= 0x7fffffff; + + cmp %o7,_0x7ff00000 ! (3_0) hx0 ? 0x7ff00000 + stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; + bge,pn %icc,.update9 ! (3_0) if ( hx0 >= 0x7ff00000 ) + fpsub32 %f26,%f2,%f26 ! (7_1) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (3_0) hy0 &= 0x7fffffff; + st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0]; + fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (3_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (3_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update10 ! (3_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; + + fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (3_0) j0 = diff0 >> 31; + + and %o1,%o3,%o1 ! (3_0) j0 &= diff0; + + fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; + cmp %o7,_0x00100000 ! (3_0) hx0 ? 0x00100000 + sub %l7,%o1,%o4 ! (3_0) j0 = hy0 - j0; + fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (3_0) j0 &= 0x7ff00000; + bl,pn %icc,.update11 ! (3_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; +.cont11: + sub %l0,%o4,%g1 ! (3_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; +.cont12: + sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; + + fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 + + fmuld %f62,%f0,%f0 ! (1_0) res0_lo *= x_lo0; + ldd [%fp+dtmp5],%f62 ! (2_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f42 ! (1_0) res0_hi += dtmp0; + + lda [%i2]%asi,%f10 ! (2_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f26,%f20,%f54 ! (7_1) dd *= dtmp0; + + lda [%i2+4]%asi,%f11 ! (2_0) ((float*)&x0)[1] = ((float*)px)[1]; + + fmuld %f50,%f12,%f26 ! (1_0) dtmp1 *= y_lo0; + lda [%o0]%asi,%f12 ! (2_0) ((float*)&y0)[0] = ((float*)py)[0]; + + lda [%o0+4]%asi,%f13 ! (2_0) ((float*)&y0)[1] = ((float*)py)[1]; + + fmuld %f54,%f14,%f50 ! (7_1) dtmp1 = dd * dres; + ld [%fp+ftmp0],%o2 ! (0_0) iarr = ((int*)&dres)[0]; + fand %f22,DA1,%f2 ! (0_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (2_0) x0 *= scl0; + + fmuld %f12,%f62,%f60 ! (2_0) y0 *= scl0; + sra %o2,11,%o4 ! (0_0) iarr >>= 11; + faddd %f0,%f26,%f34 ! (1_0) res0_lo += dtmp1; + + and %o4,0x1fc,%o4 ! (0_0) iarr &= 0x1fc; + + add %o4,TBL,%o4 ! (0_0) (char*)dll1 + iarr + mov %i1,%i2 + lda [%i1]0x82,%o1 ! (4_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f20 ! (7_1) dtmp1 = DTWO - dtmp1; + + ld [%o4],%f28 ! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (2_0) x_hi0 = x0 + D2ON36; + + lda [%i0]0x82,%o4 ! (4_0) hy0 = *(int*)py; + mov %i0,%o0 + faddd %f60,D2ON36,%f50 ! (2_0) y_hi0 = y0 + D2ON36; + + and %o1,_0x7fffffff,%o7 ! (4_0) hx0 &= 0x7fffffff; + faddd %f42,%f34,%f18 ! (1_0) dres = res0_hi + res0_lo; + + fmuld %f54,%f20,%f16 ! (7_1) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (4_0) hx0 ? 0x7ff00000 + stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; + fpsub32 %f28,%f2,%f28 ! (0_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (4_0) hy0 &= 0x7fffffff; + bge,pn %icc,.update13 ! (4_0) if ( hx0 >= 0x7ff00000 ) + st %f18,[%fp+ftmp0] ! (1_0) iarr = ((int*)&dres)[0]; + fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (4_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (4_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update14 ! (4_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; + + fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (4_0) j0 = diff0 >> 31; + + and %o1,%o3,%o1 ! (4_0) j0 &= diff0; + + fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (4_0) j0 = hy0 - j0; + cmp %o7,_0x00100000 ! (4_0) hx0 ? 0x00100000 + fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (4_0) j0 &= 0x7ff00000; + bl,pn %icc,.update15 ! (4_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; +.cont15: + sub %l0,%o4,%g1 ! (4_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; +.cont16: + fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; + sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; + + fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; + + fmuld %f62,%f0,%f0 ! (2_0) res0_lo *= x_lo0; + ldd [%fp+dtmp7],%f62 ! (3_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f30 ! (2_0) res0_hi += dtmp0; + + lda [%i4]%asi,%f10 ! (3_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f28,%f20,%f54 ! (0_0) dd *= dtmp0; + + lda [%i4+4]%asi,%f11 ! (3_0) ((float*)&x0)[1] = ((float*)px)[1]; + + fmuld %f50,%f12,%f28 ! (2_0) dtmp1 *= y_lo0; + lda [%i3]%asi,%f12 ! (3_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f14,%f20 ! (7_1) dtmp2 = DTWO - dtmp2; + + lda [%i3+4]%asi,%f13 ! (3_0) ((float*)&y0)[1] = ((float*)py)[1]; + add %i1,stridex,%i4 ! px += stridex + + fmuld %f54,%f22,%f50 ! (0_0) dtmp1 = dd * dres; + ld [%fp+ftmp0],%o2 ! (1_0) iarr = ((int*)&dres)[0]; + add %i4,stridex,%i1 ! px += stridex + fand %f18,DA1,%f2 ! (1_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (3_0) x0 *= scl0; + + fmuld %f12,%f62,%f60 ! (3_0) y0 *= scl0; + sra %o2,11,%i3 ! (1_0) iarr >>= 11; + faddd %f0,%f28,%f40 ! (2_0) res0_lo += dtmp1; + + and %i3,0x1fc,%i3 ! (1_0) iarr &= 0x1fc; + fmuld %f16,%f20,%f28 ! (7_1) dres = dd * dtmp2; + + add %i3,TBL,%o4 ! (1_0) (char*)dll1 + iarr + lda [%i4]0x82,%o1 ! (5_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f20 ! (0_0) dtmp1 = DTWO - dtmp1; + + add %i0,stridey,%i3 ! py += stridey + ld [%o4],%f26 ! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (3_0) x_hi0 = x0 + D2ON36; + + lda [%i3]0x82,%o4 ! (5_0) hy0 = *(int*)py; + add %i3,stridey,%i0 ! py += stridey + faddd %f60,D2ON36,%f50 ! (3_0) y_hi0 = y0 + D2ON36; + + and %o1,_0x7fffffff,%o7 ! (5_0) hx0 &= 0x7fffffff; + faddd %f30,%f40,%f14 ! (2_0) dres = res0_hi + res0_lo; + + fmuld %f54,%f20,%f24 ! (0_0) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (5_0) hx0 ? 0x7ff00000 + stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; + fpsub32 %f26,%f2,%f26 ! (1_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (5_0) hy0 &= 0x7fffffff; + st %f14,[%fp+ftmp0] ! (2_0) iarr = ((int*)&dres)[0]; + bge,pn %icc,.update17 ! (5_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f20 ! (3_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (5_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (5_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update18 ! (5_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; + + fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (5_0) j0 = diff0 >> 31; + + and %o1,%o3,%o1 ! (5_0) j0 &= diff0; + fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (5_0) j0 = hy0 - j0; + cmp %o7,_0x00100000 ! (5_0) hx0 ? 0x00100000 + fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (5_0) j0 &= 0x7ff00000; + bl,pn %icc,.update19 ! (5_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; +.cont19a: + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + sub %l0,%o4,%g1 ! (5_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; +.cont19b: + fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; + sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; + fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; +.cont20: + fmuld %f62,%f0,%f0 ! (3_0) res0_lo *= x_lo0; + ldd [%fp+dtmp9],%f62 ! (4_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f44 ! (3_0) res0_hi += dtmp0; + + fsubd DONE,%f10,%f60 ! (7_1) dtmp0 = DONE - dtmp0; + lda [%i2]%asi,%f10 ! (4_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f26,%f20,%f54 ! (1_0) dd *= dtmp0; + + lda [%i2+4]%asi,%f11 ! (4_0) ((float*)&x0)[1] = ((float*)px)[1]; + + fmuld %f50,%f12,%f26 ! (3_0) dtmp1 *= y_lo0; + lda [%o0]%asi,%f12 ! (4_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f22,%f20 ! (0_0) dtmp2 = DTWO - dtmp2; + + lda [%o0+4]%asi,%f13 ! (4_0) ((float*)&y0)[1] = ((float*)py)[1]; + + fmuld %f54,%f18,%f50 ! (1_0) dtmp1 = dd * dres; + ld [%fp+ftmp0],%o2 ! (2_0) iarr = ((int*)&dres)[0]; + fand %f14,DA1,%f2 ! (2_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (4_0) x0 *= scl0; + fsubd %f60,%f38,%f46 ! (7_1) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (4_0) y0 *= scl0; + sra %o2,11,%o4 ! (2_0) iarr >>= 11; + faddd %f0,%f26,%f38 ! (3_0) res0_lo += dtmp1; + + and %o4,0x1fc,%o4 ! (2_0) iarr &= 0x1fc; + fmuld %f24,%f20,%f26 ! (0_0) dres = dd * dtmp2; + + add %o4,TBL,%o4 ! (2_0) (char*)dll1 + iarr + mov %i1,%i2 + lda [%i1]0x82,%o1 ! (6_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f52 ! (1_0) dtmp1 = DTWO - dtmp1; + + fmuld %f46,%f28,%f28 ! (7_1) dtmp0 *= dres; + ld [%o4],%f20 ! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (4_0) x_hi0 = x0 + D2ON36; + + lda [%i0]0x82,%o4 ! (6_0) hy0 = *(int*)py; + mov %i0,%o0 + faddd %f60,D2ON36,%f50 ! (4_0) y_hi0 = y0 + D2ON36; + + and %o1,_0x7fffffff,%o7 ! (6_0) hx0 &= 0x7fffffff; + faddd %f44,%f38,%f22 ! (3_0) dres = res0_hi + res0_lo; + + fmuld %f54,%f52,%f16 ! (1_0) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (6_0) hx0 ? 0x7ff00000 + stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; + fpsub32 %f20,%f2,%f52 ! (2_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (6_0) hy0 &= 0x7fffffff; + st %f22,[%fp+ftmp0] ! (3_0) iarr = ((int*)&dres)[0]; + bge,pn %icc,.update21 ! (6_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f46 ! (4_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (6_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (6_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update22 ! (6_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; + + fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (6_0) j0 = diff0 >> 31; + faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (6_0) j0 &= diff0; + fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); + + fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (6_0) j0 = hy0 - j0; + cmp %o7,_0x00100000 ! (6_0) hx0 ? 0x00100000 + fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (6_0) j0 &= 0x7ff00000; + bl,pn %icc,.update23 ! (6_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; +.cont23a: + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + sub %l0,%o4,%g1 ! (6_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; +.cont23b: + fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; + sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; + fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; +.cont24: + fmuld %f62,%f2,%f2 ! (4_0) res0_lo *= x_lo0; + ldd [%fp+dtmp11],%f62 ! (5_0) *(long long*)&scl0 = ll; + faddd %f0,%f20,%f32 ! (4_0) res0_hi += dtmp0; + + lda [%i4]%asi,%f0 ! (5_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f52,%f10,%f10 ! (2_0) dd *= dtmp0; + + lda [%i4+4]%asi,%f1 ! (5_0) ((float*)&x0)[1] = ((float*)px)[1]; + fsubd DONE,%f50,%f52 ! (0_0) dtmp0 = DONE - dtmp0; + + fmuld %f46,%f60,%f46 ! (4_0) dtmp1 *= y_lo0; + lda [%i3]%asi,%f12 ! (5_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f18,%f18 ! (1_0) dtmp2 = DTWO - dtmp2; + + add %i1,stridex,%i4 ! px += stridex + lda [%i3+4]%asi,%f13 ! (5_0) ((float*)&y0)[1] = ((float*)py)[1]; + + fmuld %f10,%f14,%f50 ! (2_0) dtmp1 = dd * dres; + add %i4,stridex,%i1 ! px += stridex + ld [%fp+ftmp0],%o2 ! (3_0) iarr = ((int*)&dres)[0]; + fand %f22,DA1,%f54 ! (3_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f0,%f62,%f60 ! (5_0) x0 *= scl0; + fsubd %f52,%f36,%f20 ! (0_0) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f52 ! (5_0) y0 *= scl0; + sra %o2,11,%i3 ! (3_0) iarr >>= 11; + faddd %f2,%f46,%f36 ! (4_0) res0_lo += dtmp1; + + and %i3,0x1fc,%i3 ! (3_0) iarr &= 0x1fc; + fmuld %f16,%f18,%f16 ! (1_0) dres = dd * dtmp2; + + fsqrtd %f48,%f18 ! (7_1) res0 = sqrt ( res0 ); + add %i3,TBL,%o4 ! (3_0) (char*)dll1 + iarr + lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f46 ! (2_0) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f26,%f48 ! (0_0) dtmp0 *= dres; + add %i0,stridey,%i3 ! py += stridey + ld [%o4],%f20 ! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f60,D2ON36,%f50 ! (5_0) x_hi0 = x0 + D2ON36; + + lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py; + add %i3,stridey,%i0 ! py += stridey + faddd %f52,D2ON36,%f12 ! (5_0) y_hi0 = y0 + D2ON36; + + and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff; + faddd %f32,%f36,%f24 ! (4_0) dres = res0_hi + res0_lo; + + fmuld %f10,%f46,%f26 ! (2_0) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000 + stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; + fpsub32 %f20,%f54,%f10 ! (3_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff; + st %f24,[%fp+ftmp0] ! (4_0) iarr = ((int*)&dres)[0]; + bge,pn %icc,.update25 ! (7_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f20 ! (5_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update26 ! (7_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; + + fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31; + faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; + + and %o1,%o3,%o1 ! (7_0) j0 &= diff0; + fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0; + cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000 + fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000; + bl,pn %icc,.update27 ! (7_0) if ( hx0 < 0x00100000 ) + faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; +.cont27a: + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; +.cont27b: + fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; + sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; + faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; + fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; +.cont28: + fmuld %f62,%f2,%f2 ! (5_0) res0_lo *= x_lo0; + ldd [%fp+dtmp13],%f62 ! (6_0) *(long long*)&scl0 = ll; + faddd %f0,%f46,%f42 ! (5_0) res0_hi += dtmp0; + + fmuld %f10,%f20,%f52 ! (3_0) dd *= dtmp0; + lda [%i2]%asi,%f10 ! (6_0) ((float*)&x0)[0] = ((float*)px)[0]; + + lda [%i2+4]%asi,%f11 ! (6_0) ((float*)&x0)[1] = ((float*)px)[1]; + fsubd DONE,%f60,%f60 ! (1_0) dtmp0 = DONE - dtmp0; + + fmuld %f50,%f54,%f46 ! (5_0) dtmp1 *= y_lo0; + lda [%o0]%asi,%f12 ! (6_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f14,%f14 ! (2_0) dtmp2 = DTWO - dtmp2; + + lda [%o0+4]%asi,%f13 ! (6_0) ((float*)&y0)[1] = ((float*)py)[1]; + + fmuld %f52,%f22,%f50 ! (3_0) dtmp1 = dd * dres; + ld [%fp+ftmp0],%o2 ! (4_0) iarr = ((int*)&dres)[0]; + fand %f24,DA1,%f54 ! (4_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (6_0) x0 *= scl0; + ldd [%fp+dtmp0],%f0 ! (7_1) *(long long*)&scl0 = ll; + fsubd %f60,%f34,%f20 ! (1_0) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (6_0) y0 *= scl0; + sra %o2,11,%o4 ! (4_0) iarr >>= 11; + faddd %f2,%f46,%f34 ! (5_0) res0_lo += dtmp1; + + and %o4,0x1fc,%o4 ! (4_0) iarr &= 0x1fc; + fmuld %f26,%f14,%f26 ! (2_0) dres = dd * dtmp2; + + cmp counter,8 + bl,pn %icc,.tail + nop + + ba .main_loop + sub counter,8,counter + + .align 16 +.main_loop: + fsqrtd %f48,%f14 ! (0_1) res0 = sqrt ( res0 ); + add %o4,TBL,%o4 ! (4_1) (char*)dll1 + iarr + lda [%i1]0x82,%o1 ! (0_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f46 ! (3_1) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f16,%f48 ! (1_1) dtmp0 *= dres; + mov %i1,%i2 + ld [%o4],%f20 ! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f50 ! (6_1) x_hi0 = x0 + D2ON36; + + nop + mov %i0,%o0 + lda [%i0]0x82,%o4 ! (0_0) hy0 = *(int*)py; + faddd %f60,D2ON36,%f2 ! (6_1) y_hi0 = y0 + D2ON36; + + faddd %f42,%f34,%f16 ! (5_1) dres = res0_hi + res0_lo; + and %o1,_0x7fffffff,%o7 ! (0_0) hx0 &= 0x7fffffff; + st %f16,[%fp+ftmp0] ! (5_1) iarr = ((int*)&dres)[0]; + fmuld %f0,%f18,%f0 ! (7_2) res0 = scl0 * res0; + + fmuld %f52,%f46,%f18 ! (3_1) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (0_0) hx0 ? 0x7ff00000 + st %f0,[%i5] ! (7_2) ((float*)pz)[0] = ((float*)&res0)[0]; + fpsub32 %f20,%f54,%f54 ! (4_1) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (0_0) hy0 &= 0x7fffffff; + st %f1,[%i5+4] ! (7_2) ((float*)pz)[1] = ((float*)&res0)[1]; + bge,pn %icc,.update29 ! (0_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f20 ! (6_1) x_hi0 -= D2ON36; + + cmp %l7,_0x7ff00000 ! (0_0) hy0 ? 0x7ff00000 + sub %l7,%o7,%o1 ! (0_0) diff0 = hy0 - hx0; + bge,pn %icc,.update30 ! (0_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f2,D2ON36,%f2 ! (6_1) y_hi0 -= D2ON36; + + fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (0_0) j0 = diff0 >> 31; + stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; + faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (0_0) j0 &= diff0; + cmp %o7,_0x00100000 ! (0_0) hx0 ? 0x00100000 + bl,pn %icc,.update31 ! (0_0) if ( hx0 < 0x00100000 ) + fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); +.cont31: + fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (0_0) j0 = hy0 - j0; + nop + fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; + + fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + and %o4,%l0,%o4 ! (0_0) j0 &= 0x7ff00000; + faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; + + fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; + sub %l0,%o4,%o4 ! (0_0) j0 = 0x7ff00000 - j0; + nop + fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; +.cont32: + fmuld %f30,%f48,%f12 ! (2_1) dtmp0 = res0_hi * res0; + sllx %o4,32,%o4 ! (0_0) ll = (long long)j0 << 32; + stx %o4,[%fp+dtmp1] ! (0_0) *(long long*)&scl0 = ll; + faddd %f60,%f2,%f50 ! (6_1) dtmp1 = y0 + y_hi0; + + fmuld %f40,%f48,%f40 ! (2_1) dtmp1 = res0_lo * res0; + nop + bn,pn %icc,.exit + fsubd %f60,%f2,%f2 ! (6_1) y_lo0 = y0 - y_hi0; + + fmuld %f62,%f28,%f28 ! (6_1) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp15],%f62 ! (7_1) *(long long*)&scl0 = ll; + faddd %f0,%f46,%f30 ! (6_1) res0_hi += dtmp0; + + nop + nop + lda [%i4]%asi,%f10 ! (7_1) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f54,%f20,%f54 ! (4_1) dd *= dtmp0; + + nop + nop + lda [%i4+4]%asi,%f11 ! (7_1) ((float*)&x0)[1] = ((float*)px)[1]; + fsubd DONE,%f12,%f60 ! (2_1) dtmp0 = DONE - dtmp0; + + fmuld %f50,%f2,%f46 ! (6_1) dtmp1 *= y_lo0; + nop + lda [%i3]%asi,%f12 ! (7_1) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f22,%f22 ! (3_1) dtmp2 = DTWO - dtmp2; + + add %i1,stridex,%i4 ! px += stridex + nop + lda [%i3+4]%asi,%f13 ! (7_1) ((float*)&y0)[1] = ((float*)py)[1]; + bn,pn %icc,.exit + + fmuld %f54,%f24,%f50 ! (4_1) dtmp1 = dd * dres; + add %i4,stridex,%i1 ! px += stridex + ld [%fp+ftmp0],%o2 ! (5_1) iarr = ((int*)&dres)[0]; + fand %f16,DA1,%f2 ! (5_1) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (7_1) x0 *= scl0; + nop + ldd [%fp+dtmp2],%f0 ! (0_1) *(long long*)&scl0 = ll; + fsubd %f60,%f40,%f20 ! (2_1) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (7_1) y0 *= scl0; + sra %o2,11,%i3 ! (5_1) iarr >>= 11; + nop + faddd %f28,%f46,%f40 ! (6_1) res0_lo += dtmp1; + + and %i3,0x1fc,%i3 ! (5_1) iarr &= 0x1fc; + nop + bn,pn %icc,.exit + fmuld %f18,%f22,%f28 ! (3_1) dres = dd * dtmp2; + + fsqrtd %f52,%f22 ! (1_1) res0 = sqrt ( res0 ); + lda [%i4]0x82,%o1 ! (1_0) hx0 = *(int*)px; + add %i3,TBL,%g1 ! (5_1) (char*)dll1 + iarr + fsubd DTWO,%f50,%f62 ! (4_1) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f26,%f52 ! (2_1) dtmp0 *= dres; + add %i0,stridey,%i3 ! py += stridey + ld [%g1],%f26 ! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (7_1) x_hi0 = x0 + D2ON36; + + nop + add %i3,stridey,%i0 ! py += stridey + lda [%i3]0x82,%g1 ! (1_0) hy0 = *(int*)py; + faddd %f60,D2ON36,%f50 ! (7_1) y_hi0 = y0 + D2ON36; + + faddd %f30,%f40,%f18 ! (6_1) dres = res0_hi + res0_lo; + and %o1,_0x7fffffff,%o7 ! (1_0) hx0 &= 0x7fffffff; + st %f18,[%fp+ftmp0] ! (6_1) iarr = ((int*)&dres)[0]; + fmuld %f0,%f14,%f0 ! (0_1) res0 = scl0 * res0; + + fmuld %f54,%f62,%f14 ! (4_1) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (1_0) hx0 ? 0x7ff00000 + st %f0,[%i5] ! (0_1) ((float*)pz)[0] = ((float*)&res0)[0]; + fpsub32 %f26,%f2,%f26 ! (5_1) dd = vis_fpsub32(dtmp0, dexp0); + + and %g1,_0x7fffffff,%l7 ! (1_0) hy0 &= 0x7fffffff; + nop + bge,pn %icc,.update33 ! (1_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f20 ! (7_1) x_hi0 -= D2ON36; + + cmp %l7,_0x7ff00000 ! (1_0) hy0 ? 0x7ff00000 + sub %l7,%o7,%o1 ! (1_0) diff0 = hy0 - hx0; + st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1]; + fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; + + fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (1_0) j0 = diff0 >> 31; + bge,pn %icc,.update34 ! (1_0) if ( hy0 >= 0x7ff00000 ) + faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (1_0) j0 &= diff0; + add %i5,stridez,%i5 ! pz += stridez + stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; + fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (1_0) j0 = hy0 - j0; + cmp %o7,_0x00100000 ! (1_0) hx0 ? 0x00100000 + fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (1_0) j0 &= 0x7ff00000; + bl,pn %icc,.update35 ! (1_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; +.cont35a: + fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; + nop + sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; +.cont35b: + fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; + sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; + stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; + nop + nop + fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; +.cont36: + fmuld %f62,%f0,%f0 ! (7_1) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp1],%f62 ! (0_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f44 ! (7_1) res0_hi += dtmp0; + + fsubd DONE,%f10,%f60 ! (3_1) dtmp0 = DONE - dtmp0; + nop + lda [%i2]%asi,%f10 ! (0_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f26,%f20,%f54 ! (5_1) dd *= dtmp0; + + nop + nop + lda [%i2+4]%asi,%f11 ! (0_0) ((float*)&x0)[1] = ((float*)px)[1]; + bn,pn %icc,.exit + + fmuld %f50,%f12,%f26 ! (7_1) dtmp1 *= y_lo0; + nop + lda [%o0]%asi,%f12 ! (0_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f24,%f24 ! (4_1) dtmp2 = DTWO - dtmp2; + + nop + nop + lda [%o0+4]%asi,%f13 ! (0_0) ((float*)&y0)[1] = ((float*)py)[1]; + bn,pn %icc,.exit + + fmuld %f54,%f16,%f46 ! (5_1) dtmp1 = dd * dres; + nop + ld [%fp+ftmp0],%o2 ! (6_1) iarr = ((int*)&dres)[0]; + fand %f18,DA1,%f2 ! (6_1) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (0_0) x0 *= scl0; + nop + ldd [%fp+dtmp4],%f50 ! (1_1) *(long long*)&scl0 = ll; + fsubd %f60,%f38,%f20 ! (3_1) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (0_0) y0 *= scl0; + sra %o2,11,%g1 ! (6_1) iarr >>= 11; + nop + faddd %f0,%f26,%f38 ! (7_1) res0_lo += dtmp1; + + nop + and %g1,0x1fc,%g1 ! (6_1) iarr &= 0x1fc; + bn,pn %icc,.exit + fmuld %f14,%f24,%f26 ! (4_1) dres = dd * dtmp2; + + fsqrtd %f52,%f24 ! (2_1) res0 = sqrt ( res0 ); + lda [%i1]0x82,%o1 ! (2_0) hx0 = *(int*)px; + add %g1,TBL,%g1 ! (6_1) (char*)dll1 + iarr + fsubd DTWO,%f46,%f62 ! (5_1) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f28,%f52 ! (3_1) dtmp0 *= dres; + mov %i1,%i2 + ld [%g1],%f28 ! (6_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (0_0) x_hi0 = x0 + D2ON36; + + nop + mov %i0,%o0 + lda [%i0]0x82,%g1 ! (2_0) hy0 = *(int*)py; + faddd %f60,D2ON36,%f12 ! (0_0) y_hi0 = y0 + D2ON36; + + faddd %f44,%f38,%f14 ! (7_1) dres = res0_hi + res0_lo; + and %o1,_0x7fffffff,%o7 ! (2_0) hx0 &= 0x7fffffff; + st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0]; + fmuld %f50,%f22,%f0 ! (1_1) res0 = scl0 * res0; + + fmuld %f54,%f62,%f22 ! (5_1) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (2_0) hx0 ? 0x7ff00000 + st %f0,[%i5] ! (1_1) ((float*)pz)[0] = ((float*)&res0)[0]; + fpsub32 %f28,%f2,%f28 ! (6_1) dd = vis_fpsub32(dtmp0, dexp0); + + and %g1,_0x7fffffff,%l7 ! (2_0) hx0 &= 0x7fffffff; + nop + bge,pn %icc,.update37 ! (2_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (2_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (2_0) hy0 ? 0x7ff00000 + st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1]; + fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; + + fmuld %f28,%f18,%f50 ! (6_1) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (2_0) j0 = diff0 >> 31; + bge,pn %icc,.update38 ! (2_0) if ( hy0 >= 0x7ff00000 ) + faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (2_0) j0 &= diff0; + add %i5,stridez,%i5 ! pz += stridez + stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; + fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; + cmp %o7,_0x00100000 ! (2_0) hx0 ? 0x00100000 + sub %l7,%o1,%o4 ! (2_0) j0 = hy0 - j0; + fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (2_0) j0 &= 0x7ff00000; + bl,pn %icc,.update39 ! (2_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; +.cont39a: + fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; + sub %l0,%o4,%g1 ! (2_0) j0 = 0x7ff00000 - j0; + nop + fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; +.cont39b: + fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; + sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; + nop + nop + fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; +.cont40: + fmuld %f62,%f0,%f0 ! (0_0) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp3],%f62 ! (1_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f32 ! (0_0) res0_hi += dtmp0; + + fsubd DONE,%f10,%f60 ! (4_1) dtmp0 = DONE - dtmp0; + nop + lda [%i4]%asi,%f10 ! (1_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f28,%f20,%f54 ! (6_1) dd *= dtmp0; + + nop + nop + lda [%i4+4]%asi,%f11 ! (1_0) ((float*)&x0)[1] = ((float*)px)[1]; + bn,pn %icc,.exit + + fmuld %f50,%f12,%f28 ! (0_0) dtmp1 *= y_lo0; + nop + lda [%i3]%asi,%f12 ! (1_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f16,%f16 ! (5_1) dtmp2 = DTWO - dtmp2; + + add %i1,stridex,%i4 ! px += stridex + nop + lda [%i3+4]%asi,%f13 ! (1_0) ((float*)&y0)[1] = ((float*)py)[1]; + bn,pn %icc,.exit + + fmuld %f54,%f18,%f46 ! (6_1) dtmp1 = dd * dres; + add %i4,stridex,%i1 ! px += stridex + ld [%fp+ftmp0],%o2 ! (7_1) iarr = ((int*)&dres)[0]; + fand %f14,DA1,%f2 ! (7_1) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (1_0) x0 *= scl0; + nop + ldd [%fp+dtmp6],%f50 ! (2_1) *(long long*)&scl0 = ll; + fsubd %f60,%f36,%f20 ! (4_1) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (1_0) y0 *= scl0; + sra %o2,11,%i3 ! (7_1) iarr >>= 11; + nop + faddd %f0,%f28,%f36 ! (0_0) res0_lo += dtmp1; + + and %i3,0x1fc,%i3 ! (7_1) iarr &= 0x1fc; + nop + bn,pn %icc,.exit + fmuld %f22,%f16,%f28 ! (5_1) dres = dd * dtmp2; + + fsqrtd %f52,%f16 ! (3_1) res0 = sqrt ( res0 ); + add %i3,TBL,%o4 ! (7_1) (char*)dll1 + iarr + lda [%i4]0x82,%o1 ! (3_0) hx0 = *(int*)px; + fsubd DTWO,%f46,%f62 ! (6_1) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f26,%f52 ! (4_1) dtmp0 *= dres; + add %i0,stridey,%i3 ! py += stridey + ld [%o4],%f26 ! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (1_0) x_hi0 = x0 + D2ON36; + + nop + add %i3,stridey,%i0 ! py += stridey + lda [%i3]0x82,%o4 ! (3_0) hy0 = *(int*)py; + faddd %f60,D2ON36,%f12 ! (1_0) y_hi0 = y0 + D2ON36; + + faddd %f32,%f36,%f22 ! (0_0) dres = res0_hi + res0_lo; + and %o1,_0x7fffffff,%o7 ! (3_0) hx0 &= 0x7fffffff; + st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0]; + fmuld %f50,%f24,%f0 ! (2_1) res0 = scl0 * res0; + + fmuld %f54,%f62,%f24 ! (6_1) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (3_0) hx0 ? 0x7ff00000 + st %f0,[%i5] ! (2_1) ((float*)pz)[0] = ((float*)&res0)[0]; + fpsub32 %f26,%f2,%f26 ! (7_1) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (3_0) hy0 &= 0x7fffffff; + nop + bge,pn %icc,.update41 ! (3_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (3_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (3_0) hy0 ? 0x7ff00000 + st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1]; + fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; + + fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (3_0) j0 = diff0 >> 31; + bge,pn %icc,.update42 ! (3_0) if ( hy0 >= 0x7ff00000 ) + faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (3_0) j0 &= diff0; + add %i5,stridez,%i5 ! pz += stridez + stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; + fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; + cmp %o7,_0x00100000 ! (3_0) hx0 ? 0x00100000 + sub %l7,%o1,%o4 ! (3_0) j0 = hy0 - j0; + fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (3_0) j0 &= 0x7ff00000; + bl,pn %icc,.update43 ! (3_0) if ( hx0 < 0x00100000 ) + faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; +.cont43a: + fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; + nop + sub %l0,%o4,%g1 ! (3_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; +.cont43b: + fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres; + sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; + nop + nop + fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 +.cont44: + fmuld %f62,%f0,%f0 ! (1_0) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp5],%f62 ! (2_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f42 ! (1_0) res0_hi += dtmp0; + + fsubd DONE,%f10,%f60 ! (5_1) dtmp0 = DONE - dtmp0; + nop + lda [%i2]%asi,%f10 ! (2_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f26,%f20,%f54 ! (7_1) dd *= dtmp0; + + nop + nop + lda [%i2+4]%asi,%f11 ! (2_0) ((float*)&x0)[1] = ((float*)px)[1]; + bn,pn %icc,.exit + + fmuld %f50,%f12,%f26 ! (1_0) dtmp1 *= y_lo0; + nop + lda [%o0]%asi,%f12 ! (2_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f18,%f20 ! (6_1) dtmp2 = DTWO - dtmp2; + + nop + nop + lda [%o0+4]%asi,%f13 ! (2_0) ((float*)&y0)[1] = ((float*)py)[1]; + bn,pn %icc,.exit + + fmuld %f54,%f14,%f50 ! (7_1) dtmp1 = dd * dres; + nop + ld [%fp+ftmp0],%o2 ! (0_0) iarr = ((int*)&dres)[0]; + fand %f22,DA1,%f2 ! (0_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (2_0) x0 *= scl0; + nop + ldd [%fp+dtmp8],%f18 ! (3_1) *(long long*)&scl0 = ll; + fsubd %f60,%f34,%f46 ! (5_1) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (2_0) y0 *= scl0; + sra %o2,11,%o4 ! (0_0) iarr >>= 11; + nop + faddd %f0,%f26,%f34 ! (1_0) res0_lo += dtmp1; + + and %o4,0x1fc,%o4 ! (0_0) iarr &= 0x1fc; + nop + bn,pn %icc,.exit + fmuld %f24,%f20,%f26 ! (6_1) dres = dd * dtmp2; + + fsqrtd %f52,%f24 ! (4_1) res0 = sqrt ( res0 ); + add %o4,TBL,%o4 ! (0_0) (char*)dll1 + iarr + lda [%i1]0x82,%o1 ! (4_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f20 ! (7_1) dtmp1 = DTWO - dtmp1; + + fmuld %f46,%f28,%f52 ! (5_1) dtmp0 -= dtmp1; + mov %i1,%i2 + ld [%o4],%f28 ! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (2_0) x_hi0 = x0 + D2ON36; + + nop + mov %i0,%o0 + lda [%i0]0x82,%o4 ! (4_0) hy0 = *(int*)py; + faddd %f60,D2ON36,%f50 ! (2_0) y_hi0 = y0 + D2ON36; + + fmuld %f18,%f16,%f0 ! (3_1) res0 = scl0 * res0; + nop + and %o1,_0x7fffffff,%o7 ! (4_0) hx0 &= 0x7fffffff; + faddd %f42,%f34,%f18 ! (1_0) dres = res0_hi + res0_lo; + + fmuld %f54,%f20,%f16 ! (7_1) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (4_0) hx0 ? 0x7ff00000 + st %f18,[%fp+ftmp0] ! (1_0) iarr = ((int*)&dres)[0]; + fpsub32 %f28,%f2,%f28 ! (0_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (4_0) hy0 &= 0x7fffffff; + st %f0,[%i5] ! (3_1) ((float*)pz)[0] = ((float*)&res0)[0]; + bge,pn %icc,.update45 ! (4_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (4_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (4_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update46 ! (4_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; + + fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (4_0) j0 = diff0 >> 31; + st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (4_0) j0 &= diff0; + cmp %o7,_0x00100000 ! (4_0) hx0 ? 0x00100000 + bl,pn %icc,.update47 ! (4_0) if ( hx0 < 0x00100000 ) + fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0); +.cont47a: + fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (4_0) j0 = hy0 - j0; + stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (4_0) j0 &= 0x7ff00000; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; + + fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; + nop + sub %l0,%o4,%g1 ! (4_0) j0 = 0x7ff00000 - j0; + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; +.cont47b: + fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; + sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; + + fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0; + nop + nop + fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; +.cont48: + fmuld %f62,%f0,%f0 ! (2_0) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp7],%f62 ! (3_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f30 ! (2_0) res0_hi += dtmp0; + + fsubd DONE,%f10,%f60 ! (6_1) dtmp0 = DONE - dtmp0; + nop + lda [%i4]%asi,%f10 ! (3_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f28,%f20,%f54 ! (0_0) dd *= dtmp0; + + nop + nop + lda [%i4+4]%asi,%f11 ! (3_0) ((float*)&x0)[1] = ((float*)px)[1]; + bn,pn %icc,.exit + + fmuld %f50,%f12,%f28 ! (2_0) dtmp1 *= y_lo0; + nop + lda [%i3]%asi,%f12 ! (3_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f14,%f20 ! (7_1) dtmp2 = DTWO - dtmp2; + + lda [%i3+4]%asi,%f13 ! (3_0) ((float*)&y0)[1] = ((float*)py)[1]; + add %i1,stridex,%i4 ! px += stridex + nop + bn,pn %icc,.exit + + fmuld %f54,%f22,%f50 ! (0_0) dtmp1 = dd * dres; + add %i4,stridex,%i1 ! px += stridex + ld [%fp+ftmp0],%o2 ! (1_0) iarr = ((int*)&dres)[0]; + fand %f18,DA1,%f2 ! (1_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (3_0) x0 *= scl0; + nop + ldd [%fp+dtmp10],%f14 ! (4_1) *(long long*)&scl0 = ll; + fsubd %f60,%f40,%f46 ! (6_1) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (3_0) y0 *= scl0; + sra %o2,11,%i3 ! (1_0) iarr >>= 11; + nop + faddd %f0,%f28,%f40 ! (2_0) res0_lo += dtmp1; + + and %i3,0x1fc,%i3 ! (1_0) iarr &= 0x1fc; + nop + bn,pn %icc,.exit + fmuld %f16,%f20,%f28 ! (7_1) dres = dd * dtmp2; + + fsqrtd %f52,%f16 ! (5_1) res0 = sqrt ( res0 ); + add %i3,TBL,%o4 ! (1_0) (char*)dll1 + iarr + lda [%i4]0x82,%o1 ! (5_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f20 ! (0_0) dtmp1 = DTWO - dtmp1; + + fmuld %f46,%f26,%f52 ! (6_1) dtmp0 *= dres; + add %i0,stridey,%i3 ! py += stridey + ld [%o4],%f26 ! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (3_0) x_hi0 = x0 + D2ON36; + + nop + add %i3,stridey,%i0 ! py += stridey + lda [%i3]0x82,%o4 ! (5_0) hy0 = *(int*)py; + faddd %f60,D2ON36,%f50 ! (3_0) y_hi0 = y0 + D2ON36; + + fmuld %f14,%f24,%f0 ! (4_1) res0 = scl0 * res0; + and %o1,_0x7fffffff,%o7 ! (5_0) hx0 &= 0x7fffffff; + nop + faddd %f30,%f40,%f14 ! (2_0) dres = res0_hi + res0_lo; + + fmuld %f54,%f20,%f24 ! (0_0) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (5_0) hx0 ? 0x7ff00000 + st %f14,[%fp+ftmp0] ! (2_0) iarr = ((int*)&dres)[0]; + fpsub32 %f26,%f2,%f26 ! (1_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (5_0) hy0 &= 0x7fffffff; + st %f0,[%i5] ! (4_1) ((float*)pz)[0] = ((float*)&res0)[0]; + bge,pn %icc,.update49 ! (5_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f20 ! (3_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (5_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (5_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update50 ! (5_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; + + fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (5_0) j0 = diff0 >> 31; + st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (5_0) j0 &= diff0; + cmp %o7,_0x00100000 ! (5_0) hx0 ? 0x00100000 + bl,pn %icc,.update51 ! (5_0) if ( hx0 < 0x00100000 ) + fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); +.cont51a: + fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (5_0) j0 = hy0 - j0; + stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (5_0) j0 &= 0x7ff00000; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + sub %l0,%o4,%g1 ! (5_0) j0 = 0x7ff00000 - j0; + nop + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; +.cont51b: + fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; + sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; + nop + nop + fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; +.cont52: + fmuld %f62,%f0,%f0 ! (3_0) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp9],%f62 ! (4_0) *(long long*)&scl0 = ll; + faddd %f2,%f46,%f44 ! (3_0) res0_hi += dtmp0; + + fsubd DONE,%f10,%f60 ! (7_1) dtmp0 = DONE - dtmp0; + nop + lda [%i2]%asi,%f10 ! (4_0) ((float*)&x0)[0] = ((float*)px)[0]; + fmuld %f26,%f20,%f54 ! (1_0) dd *= dtmp0; + + nop + nop + lda [%i2+4]%asi,%f11 ! (4_0) ((float*)&x0)[1] = ((float*)px)[1]; + bn,pn %icc,.exit + + fmuld %f50,%f12,%f26 ! (3_0) dtmp1 *= y_lo0; + nop + lda [%o0]%asi,%f12 ! (4_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f22,%f20 ! (0_0) dtmp2 = DTWO - dtmp2; + + nop + nop + lda [%o0+4]%asi,%f13 ! (4_0) ((float*)&y0)[1] = ((float*)py)[1]; + bn,pn %icc,.exit + + fmuld %f54,%f18,%f50 ! (1_0) dtmp1 = dd * dres; + nop + ld [%fp+ftmp0],%o2 ! (2_0) iarr = ((int*)&dres)[0]; + fand %f14,DA1,%f2 ! (2_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (4_0) x0 *= scl0; + nop + ldd [%fp+dtmp12],%f22 ! (5_1) *(long long*)&scl0 = ll; + fsubd %f60,%f38,%f46 ! (7_1) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (4_0) y0 *= scl0; + sra %o2,11,%o4 ! (2_0) iarr >>= 11; + nop + faddd %f0,%f26,%f38 ! (3_0) res0_lo += dtmp1; + + and %o4,0x1fc,%o4 ! (2_0) iarr &= 0x1fc; + nop + bn,pn %icc,.exit + fmuld %f24,%f20,%f26 ! (0_0) dres = dd * dtmp2; + + fsqrtd %f52,%f24 ! (6_1) res0 = sqrt ( res0 ); + add %o4,TBL,%o4 ! (2_0) (char*)dll1 + iarr + lda [%i1]0x82,%o1 ! (6_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f52 ! (1_0) dtmp1 = DTWO - dtmp1; + + fmuld %f46,%f28,%f28 ! (7_1) dtmp0 *= dres; + mov %i1,%i2 + ld [%o4],%f20 ! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f10,D2ON36,%f46 ! (4_0) x_hi0 = x0 + D2ON36; + + nop + mov %i0,%o0 + lda [%i0]0x82,%o4 ! (6_0) hy0 = *(int*)py; + faddd %f60,D2ON36,%f50 ! (4_0) y_hi0 = y0 + D2ON36; + + fmuld %f22,%f16,%f0 ! (5_1) res0 = scl0 * res0; + and %o1,_0x7fffffff,%o7 ! (6_0) hx0 &= 0x7fffffff; + nop + faddd %f44,%f38,%f22 ! (3_0) dres = res0_hi + res0_lo; + + fmuld %f54,%f52,%f16 ! (1_0) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (6_0) hx0 ? 0x7ff00000 + st %f22,[%fp+ftmp0] ! (3_0) iarr = ((int*)&dres)[0]; + fpsub32 %f20,%f2,%f52 ! (2_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (6_0) hy0 &= 0x7fffffff; + st %f0,[%i5] ! (5_1) ((float*)pz)[0] = ((float*)&res0)[0]; + bge,pn %icc,.update53 ! (6_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f46,D2ON36,%f46 ! (4_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (6_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (6_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update54 ! (6_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; + + fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (6_0) j0 = diff0 >> 31; + st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; + + and %o1,%o3,%o1 ! (6_0) j0 &= diff0; + cmp %o7,_0x00100000 ! (6_0) hx0 ? 0x00100000 + bl,pn %icc,.update55 ! (6_0) if ( hx0 < 0x00100000 ) + fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); +.cont55a: + fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (6_0) j0 = hy0 - j0; + stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; + fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (6_0) j0 &= 0x7ff00000; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; + + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + sub %l0,%o4,%g1 ! (6_0) j0 = 0x7ff00000 - j0; + nop + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; +.cont55b: + fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; + sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; + faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; + nop + nop + fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; +.cont56: + fmuld %f62,%f2,%f2 ! (4_0) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp11],%f62 ! (5_0) *(long long*)&scl0 = ll; + faddd %f0,%f20,%f32 ! (4_0) res0_hi += dtmp0; + + lda [%i4]%asi,%f0 ! (5_0) ((float*)&x0)[0] = ((float*)px)[0]; + nop + nop + fmuld %f52,%f10,%f10 ! (2_0) dd *= dtmp0; + + lda [%i4+4]%asi,%f1 ! (5_0) ((float*)&x0)[1] = ((float*)px)[1]; + nop + nop + fsubd DONE,%f50,%f52 ! (0_0) dtmp0 = DONE - dtmp0; + + fmuld %f46,%f60,%f46 ! (4_0) dtmp1 *= y_lo0; + nop + lda [%i3]%asi,%f12 ! (5_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f18,%f18 ! (1_0) dtmp2 = DTWO - dtmp2; + + nop + add %i1,stridex,%i4 ! px += stridex + lda [%i3+4]%asi,%f13 ! (5_0) ((float*)&y0)[1] = ((float*)py)[1]; + bn,pn %icc,.exit + + fmuld %f10,%f14,%f50 ! (2_0) dtmp1 = dd * dres; + add %i4,stridex,%i1 ! px += stridex + ld [%fp+ftmp0],%o2 ! (3_0) iarr = ((int*)&dres)[0]; + fand %f22,DA1,%f54 ! (3_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f0,%f62,%f60 ! (5_0) x0 *= scl0; + nop + ldd [%fp+dtmp14],%f0 ! (6_1) *(long long*)&scl0 = ll; + fsubd %f52,%f36,%f20 ! (0_0) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f52 ! (5_0) y0 *= scl0; + sra %o2,11,%i3 ! (3_0) iarr >>= 11; + nop + faddd %f2,%f46,%f36 ! (4_0) res0_lo += dtmp1; + + and %i3,0x1fc,%i3 ! (3_0) iarr &= 0x1fc; + nop + bn,pn %icc,.exit + fmuld %f16,%f18,%f16 ! (1_0) dres = dd * dtmp2; + + fsqrtd %f48,%f18 ! (7_1) res0 = sqrt ( res0 ); + add %i3,TBL,%o4 ! (3_0) (char*)dll1 + iarr + lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px; + fsubd DTWO,%f50,%f46 ! (2_0) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f26,%f48 ! (0_0) dtmp0 *= dres; + add %i0,stridey,%i3 ! py += stridey + ld [%o4],%f20 ! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + faddd %f60,D2ON36,%f50 ! (5_0) x_hi0 = x0 + D2ON36; + + nop + add %i3,stridey,%i0 ! py += stridey + lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py; + faddd %f52,D2ON36,%f12 ! (5_0) y_hi0 = y0 + D2ON36; + + fmuld %f0,%f24,%f2 ! (6_1) res0 = scl0 * res0; + and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff; + nop + faddd %f32,%f36,%f24 ! (4_0) dres = res0_hi + res0_lo; + + fmuld %f10,%f46,%f26 ! (2_0) dd *= dtmp1; + cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000 + st %f24,[%fp+ftmp0] ! (4_0) iarr = ((int*)&dres)[0]; + fpsub32 %f20,%f54,%f10 ! (3_0) dd = vis_fpsub32(dtmp0, dexp0); + + and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff; + st %f2,[%i5] ! (6_1) ((float*)pz)[0] = ((float*)&res0)[0]; + bge,pn %icc,.update57 ! (7_0) if ( hx0 >= 0x7ff00000 ) + fsubd %f50,D2ON36,%f20 ! (5_0) x_hi0 -= D2ON36; + + sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0; + cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000 + bge,pn %icc,.update58 ! (7_0) if ( hy0 >= 0x7ff00000 ) + fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; + + fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; + sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31; + st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; + + and %o1,%o3,%o1 ! (7_0) j0 &= diff0; + cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000 + bl,pn %icc,.update59 ! (7_0) if ( hx0 < 0x00100000 ) + fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); +.cont59a: + fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; + sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0; + stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; + fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; + and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000; + add %i5,stridez,%i5 ! pz += stridez + faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; + + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0; + nop + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; +.cont59b: + fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; + sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; + faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; + nop + nop + fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; +.cont60: + fmuld %f62,%f2,%f2 ! (5_0) res0_lo *= x_lo0; + nop + ldd [%fp+dtmp13],%f62 ! (6_0) *(long long*)&scl0 = ll; + faddd %f0,%f46,%f42 ! (5_0) res0_hi += dtmp0; + + fmuld %f10,%f20,%f52 ! (3_0) dd *= dtmp0; + nop + lda [%i2]%asi,%f10 ! (6_0) ((float*)&x0)[0] = ((float*)px)[0]; + bn,pn %icc,.exit + + lda [%i2+4]%asi,%f11 ! (6_0) ((float*)&x0)[1] = ((float*)px)[1]; + nop + nop + fsubd DONE,%f60,%f60 ! (1_0) dtmp0 = DONE - dtmp0; + + fmuld %f50,%f54,%f46 ! (5_0) dtmp1 *= y_lo0; + nop + lda [%o0]%asi,%f12 ! (6_0) ((float*)&y0)[0] = ((float*)py)[0]; + fsubd DTWO,%f14,%f14 ! (2_0) dtmp2 = DTWO - dtmp2; + + nop + nop + lda [%o0+4]%asi,%f13 ! (6_0) ((float*)&y0)[1] = ((float*)py)[1]; + bn,pn %icc,.exit + + fmuld %f52,%f22,%f50 ! (3_0) dtmp1 = dd * dres; + nop + ld [%fp+ftmp0],%o2 ! (4_0) iarr = ((int*)&dres)[0]; + fand %f24,DA1,%f54 ! (4_0) dexp0 = vis_fand(dres,DA1); + + fmuld %f10,%f62,%f10 ! (6_0) x0 *= scl0; + nop + ldd [%fp+dtmp0],%f0 ! (7_1) *(long long*)&scl0 = ll; + fsubd %f60,%f34,%f20 ! (1_0) dtmp0 -= dtmp1; + + fmuld %f12,%f62,%f60 ! (6_0) y0 *= scl0; + sra %o2,11,%o4 ! (4_0) iarr >>= 11; + nop + faddd %f2,%f46,%f34 ! (5_0) res0_lo += dtmp1; + + and %o4,0x1fc,%o4 ! (4_0) iarr &= 0x1fc; + subcc counter,8,counter ! counter -= 8; + bpos,pt %icc,.main_loop + fmuld %f26,%f14,%f26 ! (2_0) dres = dd * dtmp2; + + add counter,8,counter + +.tail: + subcc counter,1,counter + bneg .begin + nop + + fsqrtd %f48,%f14 ! (0_1) res0 = sqrt ( res0 ); + add %o4,TBL,%o4 ! (4_1) (char*)dll1 + iarr + fsubd DTWO,%f50,%f46 ! (3_1) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f16,%f48 ! (1_1) dtmp0 *= dres; + ld [%o4],%f20 ! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + + fmuld %f0,%f18,%f0 ! (7_2) res0 = scl0 * res0; + st %f0,[%i5] ! (7_2) ((float*)pz)[0] = ((float*)&res0)[0]; + faddd %f42,%f34,%f16 ! (5_1) dres = res0_hi + res0_lo; + + subcc counter,1,counter + st %f1,[%i5+4] ! (7_2) ((float*)pz)[1] = ((float*)&res0)[1]; + bneg .begin + add %i5,stridez,%i5 ! pz += stridez + + fmuld %f52,%f46,%f18 ! (3_1) dd *= dtmp1; + st %f16,[%fp+ftmp0] ! (5_1) iarr = ((int*)&dres)[0]; + fpsub32 %f20,%f54,%f54 ! (4_1) dd = vis_fpsub32(dtmp0, dexp0); + + fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; + faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; + + + fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); + + fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; + + fmuld %f30,%f48,%f12 ! (2_1) dtmp0 = res0_hi * res0; + + fmuld %f40,%f48,%f40 ! (2_1) dtmp1 = res0_lo * res0; + + fmuld %f54,%f20,%f54 ! (4_1) dd *= dtmp0; + + fsubd DONE,%f12,%f60 ! (2_1) dtmp0 = DONE - dtmp0; + + fsubd DTWO,%f22,%f22 ! (3_1) dtmp2 = DTWO - dtmp2; + + fmuld %f54,%f24,%f50 ! (4_1) dtmp1 = dd * dres; + ld [%fp+ftmp0],%o2 ! (5_1) iarr = ((int*)&dres)[0]; + fand %f16,DA1,%f2 ! (5_1) dexp0 = vis_fand(dres,DA1); + + ldd [%fp+dtmp2],%f0 ! (0_1) *(long long*)&scl0 = ll; + fsubd %f60,%f40,%f20 ! (2_1) dtmp0 -= dtmp1; + + sra %o2,11,%i3 ! (5_1) iarr >>= 11; + + and %i3,0x1fc,%i3 ! (5_1) iarr &= 0x1fc; + fmuld %f18,%f22,%f28 ! (3_1) dres = dd * dtmp2; + + fsqrtd %f52,%f22 ! (1_1) res0 = sqrt ( res0 ); + add %i3,TBL,%g1 ! (5_1) (char*)dll1 + iarr + fsubd DTWO,%f50,%f62 ! (4_1) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f26,%f52 ! (2_1) dtmp0 *= dres; + ld [%g1],%f26 ! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; + + fmuld %f0,%f14,%f0 ! (0_1) res0 = scl0 * res0; + + fmuld %f54,%f62,%f14 ! (4_1) dd *= dtmp1; + fpsub32 %f26,%f2,%f26 ! (5_1) dd = vis_fpsub32(dtmp0, dexp0); + + st %f0,[%i5] ! (0_1) ((float*)pz)[0] = ((float*)&res0)[0]; + + fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres; + st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0; + + subcc counter,1,counter + bneg .begin + add %i5,stridez,%i5 ! pz += stridez + + fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); + + fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; + + fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; + + fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; + + fsubd DONE,%f10,%f60 ! (3_1) dtmp0 = DONE - dtmp0; + fmuld %f26,%f20,%f54 ! (5_1) dd *= dtmp0; + + fsubd DTWO,%f24,%f24 ! (4_1) dtmp2 = DTWO - dtmp2; + + fmuld %f54,%f16,%f46 ! (5_1) dtmp1 = dd * dres; + + ldd [%fp+dtmp4],%f50 ! (1_1) *(long long*)&scl0 = ll; + fsubd %f60,%f38,%f20 ! (3_1) dtmp0 -= dtmp1; + + fmuld %f14,%f24,%f26 ! (4_1) dres = dd * dtmp2; + + fsqrtd %f52,%f24 ! (2_1) res0 = sqrt ( res0 ); + fsubd DTWO,%f46,%f62 ! (5_1) dtmp1 = DTWO - dtmp1; + + fmuld %f20,%f28,%f52 ! (3_1) dtmp0 *= dres; + + fmuld %f50,%f22,%f0 ! (1_1) res0 = scl0 * res0; + + fmuld %f54,%f62,%f22 ! (5_1) dd *= dtmp1; + + st %f0,[%i5] ! (1_1) ((float*)pz)[0] = ((float*)&res0)[0]; + + subcc counter,1,counter + st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1]; + bneg .begin + add %i5,stridez,%i5 ! pz += stridez + + faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0; + + fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); + + fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; + + fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; + + fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; + + fsubd DONE,%f10,%f60 ! (4_1) dtmp0 = DONE - dtmp0; + + fsubd DTWO,%f16,%f16 ! (5_1) dtmp2 = DTWO - dtmp2; + + ldd [%fp+dtmp6],%f50 ! (2_1) *(long long*)&scl0 = ll; + fsubd %f60,%f36,%f20 ! (4_1) dtmp0 -= dtmp1; + + fmuld %f22,%f16,%f28 ! (5_1) dres = dd * dtmp2; + + fsqrtd %f52,%f16 ! (3_1) res0 = sqrt ( res0 ); + + fmuld %f20,%f26,%f52 ! (4_1) dtmp0 *= dres; + + fmuld %f50,%f24,%f0 ! (2_1) res0 = scl0 * res0; + + st %f0,[%i5] ! (2_1) ((float*)pz)[0] = ((float*)&res0)[0]; + + st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0; + + subcc counter,1,counter + bneg .begin + add %i5,stridez,%i5 ! pz += stridez + + fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); + + fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; + + fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; + + fsubd DONE,%f10,%f60 ! (5_1) dtmp0 = DONE - dtmp0; + + ldd [%fp+dtmp8],%f18 ! (3_1) *(long long*)&scl0 = ll; + fsubd %f60,%f34,%f46 ! (5_1) dtmp0 -= dtmp1; + + fsqrtd %f52,%f24 ! (4_1) res0 = sqrt ( res0 ); + + fmuld %f46,%f28,%f52 ! (5_1) dtmp0 -= dtmp1; + + fmuld %f18,%f16,%f0 ! (3_1) res0 = scl0 * res0; + st %f0,[%i5] ! (3_1) ((float*)pz)[0] = ((float*)&res0)[0]; + st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; + + subcc counter,1,counter + bneg .begin + add %i5,stridez,%i5 ! pz += stridez + + ldd [%fp+dtmp10],%f14 ! (4_1) *(long long*)&scl0 = ll; + + fsqrtd %f52,%f16 ! (5_1) res0 = sqrt ( res0 ); + + fmuld %f14,%f24,%f0 ! (4_1) res0 = scl0 * res0 + st %f0,[%i5] ! (4_1) ((float*)pz)[0] = ((float*)&res0)[0]; + st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; + + subcc counter,1,counter + bneg .begin + add %i5,stridez,%i5 ! pz += stridez + + ldd [%fp+dtmp12],%f22 ! (5_1) *(long long*)&scl0 = ll; + + fmuld %f22,%f16,%f0 ! (5_1) res0 = scl0 * res0; + st %f0,[%i5] ! (5_1) ((float*)pz)[0] = ((float*)&res0)[0]; + st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; + + ba .begin + add %i5,stridez,%i5 + + .align 16 +.spec0: + cmp %o7,_0x7ff00000 ! hx0 ? 0x7ff00000 + bne 1f ! if ( hx0 != 0x7ff00000 ) + ld [%i4+4],%i2 ! lx = ((int*)px)[1]; + + cmp %i2,0 ! lx ? 0 + be 3f ! if ( lx == 0 ) + nop +1: + cmp %l7,_0x7ff00000 ! hy0 ? 0x7ff00000 + bne 2f ! if ( hy0 != 0x7ff00000 ) + ld [%i3+4],%o2 ! ly = ((int*)py)[1]; + + cmp %o2,0 ! ly ? 0 + be 3f ! if ( ly == 0 ) +2: + ld [%i4],%f0 ! ((float*)&x0)[0] = ((float*)px)[0]; + ld [%i4+4],%f1 ! ((float*)&x0)[1] = ((float*)px)[1]; + + ld [%i3],%f2 ! ((float*)&y0)[0] = ((float*)py)[0]; + add %i4,stridex,%i4 ! px += stridex + ld [%i3+4],%f3 ! ((float*)&y0)[1] = ((float*)py)[1]; + + fabsd %f0,%f0 + + fabsd %f2,%f2 + + fmuld %f0,%f2,%f0 ! res0 = fabs(x0) * fabs(y0); + add %i3,stridey,%i3 ! py += stridey; + st %f0,[%i5] ! ((float*)pz)[0] = ((float*)&res0)[0]; + + st %f1,[%i5+4] ! ((float*)pz)[1] = ((float*)&res0)[1]; + add %i5,stridez,%i5 ! pz += stridez + ba .begin1 + sub counter,1,counter +3: + add %i4,stridex,%i4 ! px += stridex + add %i3,stridey,%i3 ! py += stridey + st %g0,[%i5] ! ((int*)pz)[0] = 0; + + add %i5,stridez,%i5 ! pz += stridez; + st %g0,[%i5+4] ! ((int*)pz)[1] = 0; + ba .begin1 + sub counter,1,counter + + .align 16 +.spec1: + and %o1,%o3,%o1 ! (7_0) j0 &= diff0; + + cmp %l7,_0x00100000 ! (7_0) hy0 ? 0x00100000 + bge,pn %icc,.cont_spec0 ! (7_0) if ( hy0 < 0x00100000 ) + + ld [%i4+4],%i2 ! lx = ((int*)px)[1]; + or %o7,%l7,%g5 ! ii = hx0 | hy0; + fzero %f0 + + ld [%i3+4],%o2 ! ly = ((int*)py)[1]; + or %i2,%g5,%g5 ! ii |= lx; + + orcc %o2,%g5,%g5 ! ii |= ly; + bnz,a,pn %icc,1f ! if ( ii != 0 ) + sethi %hi(0x00080000),%i2 + + fdivd DONE,%f0,%f0 ! res0 = 1.0 / 0.0; + + st %f0,[%i5] ! ((float*)pz)[0] = ((float*)&res0)[0]; + + add %i4,stridex,%i4 ! px += stridex; + add %i3,stridey,%i3 ! py += stridey; + st %f1,[%i5+4] ! ((float*)pz)[1] = ((float*)&res0)[1]; + + add %i5,stridez,%i5 ! pz += stridez; + ba .begin1 + sub counter,1,counter +1: + ld [%i4],%f0 ! ((float*)&x0)[0] = ((float*)px)[0]; + + ld [%i4+4],%f1 ! ((float*)&x0)[1] = ((float*)px)[1]; + + ld [%i3],%f2 ! ((float*)&y0)[0] = ((float*)py)[0]; + + fabsd %f0,%f0 ! x0 = fabs(x0); + ld [%i3+4],%f3 ! ((float*)&y0)[1] = ((float*)py)[1]; + + ldd [TBL+TBL_SHIFT+64],%f12 ! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL; + add %fp,dtmp2,%i4 + add %fp,dtmp3,%i3 + + fabsd %f2,%f2 ! y0 = fabs(y0); + ldd [TBL+TBL_SHIFT+56],%f10 ! D2ON51 + + ldx [TBL+TBL_SHIFT+48],%g5 ! D2ONM52 + cmp %o7,%i2 ! hx0 ? 0x00080000 + bl,a 1f ! if ( hx0 < 0x00080000 ) + fxtod %f0,%f0 ! x0 = *(long long*)&x0; + + fand %f0,%f12,%f0 ! x0 = vis_fand(x0, dtmp0); + fxtod %f0,%f0 ! x0 = *(long long*)&x0; + faddd %f0,%f10,%f0 ! x0 += D2ON51; +1: + std %f0,[%i4] + + ldx [TBL+TBL_SHIFT+40],%g1 ! D2ON1022 + cmp %l7,%i2 ! hy0 ? 0x00080000 + bl,a 1f ! if ( hy0 < 0x00080000 ) + fxtod %f2,%f2 ! y0 = *(long long*)&y0; + + fand %f2,%f12,%f2 ! y0 = vis_fand(y0, dtmp0); + fxtod %f2,%f2 ! y0 = *(long long*)&y0; + faddd %f2,%f10,%f2 ! y0 += D2ON51; +1: + std %f2,[%i3] + + stx %g5,[%fp+dtmp15] ! D2ONM52 + + ba .cont_spec1 + stx %g1,[%fp+dtmp0] ! D2ON1022 + + .align 16 +.update0: + cmp counter,1 + ble 1f + nop + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 1,counter +1: + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i2 + ba .cont1 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update1: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont0 ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,1 + ble,a 1f + nop + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 1,counter + stx %o0,[%fp+tmp_py] +1: + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i2 + ba .cont1 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update2: + cmp counter,2 + ble 1f + nop + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 2,counter +1: + fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; + + fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; + + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i4 + ba .cont4 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update3: + cmp counter,2 + ble 1f + nop + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 2,counter +1: + fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; + + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i4 + ba .cont4 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update4: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,a,pn %icc,.cont4 ! (0_0) if ( hy0 < 0x00100000 ) + sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0; + + cmp counter,2 + ble,a 1f + nop + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 2,counter + stx %i3,[%fp+tmp_py] +1: + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i4 + ba .cont4 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update5: + cmp counter,3 + ble 1f + nop + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 3,counter +1: + st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0]; + fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36; + + fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; + + fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + + sllx %g1,32,%g1 + ba .cont8 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update6: + cmp counter,3 + ble 1f + nop + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 3,counter +1: + fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + + sllx %g1,32,%g1 + ba .cont8 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update7: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont7 ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,3 + ble,a 1f + nop + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 3,counter + stx %o0,[%fp+tmp_py] +1: + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + + sllx %g1,32,%g1 + ba .cont8 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update9: + cmp counter,4 + ble 1f + nop + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 4,counter +1: + st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0]; + fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36; + + fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; + + fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; + + + fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; + + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + ba .cont12 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update10: + cmp counter,4 + ble 1f + nop + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 4,counter +1: + fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; + + + fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; + + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + ba .cont12 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update11: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont11 ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,4 + ble,a 1f + nop + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 4,counter + stx %i3,[%fp+tmp_py] +1: + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; + ba .cont12 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update13: + cmp counter,5 + ble 1f + nop + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 5,counter +1: + fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36; + + fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; + + fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; + + fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; + + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + ba .cont16 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update14: + cmp counter,5 + ble 1f + nop + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 5,counter +1: + fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; + + fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; + + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + ba .cont16 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update15: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont15 ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,5 + ble,a 1f + nop + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 5,counter + stx %o0,[%fp+tmp_py] +1: + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; + ba .cont16 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update17: + cmp counter,6 + ble 1f + nop + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 6,counter +1: + fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; + + fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; + + fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; + + fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; + fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + + sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; + ba .cont20 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update18: + cmp counter,6 + ble 1f + nop + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 6,counter +1: + fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; + + fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; + + fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; + fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + + sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; + ba .cont20 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update19: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont19a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,6 + ble,a 1f + nop + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 6,counter + stx %i3,[%fp+tmp_py] +1: + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; + + ba .cont19b + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update21: + cmp counter,7 + ble 1f + nop + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 7,counter +1: + fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; + + fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; + faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; + + fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); + + fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; + + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; + + fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; + faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; + ba .cont24 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update22: + cmp counter,7 + ble 1f + nop + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 7,counter +1: + fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; + faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; + + fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); + + fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; + + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; + + fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; + faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; + ba .cont24 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update23: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont23a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,7 + ble,a 1f + nop + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 7,counter + stx %o0,[%fp+tmp_py] +1: + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; + + ba .cont23b + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update25: + cmp counter,8 + ble 1f + nop + + sub counter,8,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 8,counter +1: + fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; + + fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; + faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; + + fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; + fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; + faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; + + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; + + fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; + faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; + ba .cont28 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update26: + cmp counter,8 + ble 1f + nop + + sub counter,8,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 8,counter +1: + fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; + faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; + + fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; + fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; + faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; + + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; + + fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; + faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; + ba .cont28 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update27: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont27a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,8 + ble,a 1f + nop + + sub counter,8,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 8,counter + stx %i3,[%fp+tmp_py] +1: + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; + + ba .cont27b + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update29: + cmp counter,1 + ble 1f + nop + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 1,counter +1: + fsubd %f2,D2ON36,%f2 ! (6_1) y_hi0 -= D2ON36; + + fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; + stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; + faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; + + fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; + + fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; + + fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i2 + fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; + + ba .cont32 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update30: + cmp counter,1 + ble 1f + nop + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 1,counter +1: + fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; + stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; + faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; + + fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; + + fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; + + fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i2 + fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; + + ba .cont32 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update31: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont31 ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,1 + ble,a 1f + nop + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 1,counter + stx %o0,[%fp+tmp_py] +1: + fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; + + fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; + + fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i2 + fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; + + ba .cont32 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update33: + cmp counter,2 + ble 1f + nop + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 2,counter +1: + st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1]; + fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; + + fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres; + faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0; + + add %i5,stridez,%i5 ! pz += stridez + stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; + fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; + + fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; + + sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; + stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; + ba .cont36 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update34: + cmp counter,2 + ble 1f + nop + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 2,counter +1: + add %i5,stridez,%i5 ! pz += stridez + stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; + fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; + + fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; + + sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; + stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; + ba .cont36 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update35: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont35a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,2 + ble,a 1f + nop + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 2,counter + stx %i3,[%fp+tmp_py] +1: + fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; + sethi %hi(0x3ff00000),%o4 + add TBL,TBL_SHIFT+24,%i4 + fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; + + ba .cont35b + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update37: + cmp counter,3 + ble 1f + nop + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 3,counter +1: + st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1]; + fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; + + fmuld %f28,%f18,%f50 ! (6_1) dtmp0 = dd * dres; + faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0; + + add %i5,stridez,%i5 ! pz += stridez + stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; + fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; + + fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; + + fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; + ba .cont40 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update38: + cmp counter,3 + ble 1f + nop + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 3,counter +1: + add %i5,stridez,%i5 ! pz += stridez + stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; + fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; + + fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; + + fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; + ba .cont40 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update39: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont39a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,3 + ble,a 1f + nop + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 3,counter + stx %o0,[%fp+tmp_py] +1: + fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; + + ba .cont39b + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update41: + cmp counter,4 + ble 1f + nop + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 4,counter +1: + st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1]; + fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; + + fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; + faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0; + + add %i5,stridez,%i5 ! pz += stridez + stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; + fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; + + fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; + + fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 + + sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; + ba .cont44 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update42: + cmp counter,4 + ble 1f + nop + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 4,counter +1: + add %i5,stridez,%i5 ! pz += stridez + stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; + fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; + fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; + faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; + + fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; + + fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 + + sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; + ba .cont44 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update43: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont43a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,4 + ble,a 1f + nop + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 4,counter + stx %i3,[%fp+tmp_py] +1: + fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; + + ba .cont43b + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update45: + cmp counter,5 + ble 1f + nop + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 5,counter +1: + fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; + + fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; + st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; + + fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; + + fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; + + fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; + + fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; + ba .cont48 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update46: + cmp counter,5 + ble 1f + nop + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 5,counter +1: + fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; + st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; + + fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; + + fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; + + fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; + + fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; + ba .cont48 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update47: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont47a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,5 + ble,a 1f + nop + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 5,counter + stx %o0,[%fp+tmp_py] +1: + fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; + + fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; + + ba .cont47b + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update49: + cmp counter,6 + ble 1f + nop + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 6,counter +1: + fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; + + fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; + st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0; + + fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; + + fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; + ba .cont52 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update50: + cmp counter,6 + ble 1f + nop + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 6,counter +1: + fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; + st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0; + + fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; + + fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; + faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; + + fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; + ba .cont52 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update51: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont51a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,6 + ble,a 1f + nop + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 6,counter + stx %i3,[%fp+tmp_py] +1: + fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; + fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; + + fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; + + ba .cont51b + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update53: + cmp counter,7 + ble 1f + nop + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 7,counter +1: + fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; + + fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; + st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; + + fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); + + fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; + fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; + + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; + + fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; + faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; + ba .cont56 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update54: + cmp counter,7 + ble 1f + nop + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + stx %o0,[%fp+tmp_py] + + mov 7,counter +1: + fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; + st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; + + fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); + + fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; + fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; + + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; + + fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; + faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; + + fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; + + sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; + ba .cont56 + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update55: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont55a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,7 + ble,a 1f + nop + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + + mov 7,counter + stx %o0,[%fp+tmp_py] +1: + fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; + fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; + + fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i2 + fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; + + ba .cont55b + add TBL,TBL_SHIFT+24,%o0 + + .align 16 +.update57: + cmp counter,8 + ble 1f + nop + + sub counter,8,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 8,counter +1: + fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; + + fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; + st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; + + fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; + fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; + + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; + + fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; + faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; + fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + + sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; + ba .cont60 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update58: + cmp counter,8 + ble 1f + nop + + sub counter,8,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + stx %i3,[%fp+tmp_py] + + mov 8,counter +1: + fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; + st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1]; + faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; + + fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); + + fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; + fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; + + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; + + fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; + faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; + + fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; + fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; + + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + + sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; + stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; + ba .cont60 + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.update59: + cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 + bge,pn %icc,.cont59a ! (0_0) if ( hy0 < 0x00100000 ) + + cmp counter,8 + ble,a 1f + nop + + sub counter,8,counter + st counter,[%fp+tmp_counter] + + stx %i4,[%fp+tmp_px] + + mov 8,counter + stx %i3,[%fp+tmp_py] +1: + fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; + stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; + fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; + + fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; + add %i5,stridez,%i5 ! pz += stridez + faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; + + fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; + sethi %hi(0x3ff00000),%g1 + add TBL,TBL_SHIFT+24,%i4 + fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; + + ba .cont59b + add TBL,TBL_SHIFT+24,%i3 + + .align 16 +.exit: + ret + restore + SET_SIZE(__vrhypot) + |