diff options
Diffstat (limited to 'usr/src/lib/libmvec/common/vis/__vatanf.S')
-rw-r--r-- | usr/src/lib/libmvec/common/vis/__vatanf.S | 1892 |
1 files changed, 1892 insertions, 0 deletions
diff --git a/usr/src/lib/libmvec/common/vis/__vatanf.S b/usr/src/lib/libmvec/common/vis/__vatanf.S new file mode 100644 index 0000000000..8bd44bc1ba --- /dev/null +++ b/usr/src/lib/libmvec/common/vis/__vatanf.S @@ -0,0 +1,1892 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + + .file "__vatanf.S" + +#include "libm.h" + + RO_DATA + .align 64 + +.CONST_TBL: + .word 0x3fefffff, 0xfffccbbc ! K0 = 9.99999999976686608841e-01 + .word 0xbfd55554, 0x51c6b90f ! K1 = -3.33333091601972730504e-01 + .word 0x3fc98d6d, 0x926596cc ! K2 = 1.99628540499523379702e-01 + .word 0x00020000, 0x00000000 ! DC1 + .word 0xfffc0000, 0x00000000 ! DC2 + .word 0x7ff00000, 0x00000000 ! DC3 + .word 0x3ff00000, 0x00000000 ! DONE = 1.0 + .word 0x40000000, 0x00000000 ! DTWO = 2.0 + +! parr0 = *(int*)&(1.0 / *(double*)&(((long long)i << 45) | 0x3ff0100000000000ULL)) + 0x3ff00000, i = [0, 127] + + .word 0x7fdfe01f, 0x7fdfa11c, 0x7fdf6310, 0x7fdf25f6 + .word 0x7fdee9c7, 0x7fdeae80, 0x7fde741a, 0x7fde3a91 + .word 0x7fde01e0, 0x7fddca01, 0x7fdd92f2, 0x7fdd5cac + .word 0x7fdd272c, 0x7fdcf26e, 0x7fdcbe6d, 0x7fdc8b26 + .word 0x7fdc5894, 0x7fdc26b5, 0x7fdbf583, 0x7fdbc4fd + .word 0x7fdb951e, 0x7fdb65e2, 0x7fdb3748, 0x7fdb094b + .word 0x7fdadbe8, 0x7fdaaf1d, 0x7fda82e6, 0x7fda5741 + .word 0x7fda2c2a, 0x7fda01a0, 0x7fd9d79f, 0x7fd9ae24 + .word 0x7fd9852f, 0x7fd95cbb, 0x7fd934c6, 0x7fd90d4f + .word 0x7fd8e652, 0x7fd8bfce, 0x7fd899c0, 0x7fd87427 + .word 0x7fd84f00, 0x7fd82a4a, 0x7fd80601, 0x7fd7e225 + .word 0x7fd7beb3, 0x7fd79baa, 0x7fd77908, 0x7fd756ca + .word 0x7fd734f0, 0x7fd71378, 0x7fd6f260, 0x7fd6d1a6 + .word 0x7fd6b149, 0x7fd69147, 0x7fd6719f, 0x7fd6524f + .word 0x7fd63356, 0x7fd614b3, 0x7fd5f664, 0x7fd5d867 + .word 0x7fd5babc, 0x7fd59d61, 0x7fd58056, 0x7fd56397 + .word 0x7fd54725, 0x7fd52aff, 0x7fd50f22, 0x7fd4f38f + .word 0x7fd4d843, 0x7fd4bd3e, 0x7fd4a27f, 0x7fd48805 + .word 0x7fd46dce, 0x7fd453d9, 0x7fd43a27, 0x7fd420b5 + .word 0x7fd40782, 0x7fd3ee8f, 0x7fd3d5d9, 0x7fd3bd60 + .word 0x7fd3a524, 0x7fd38d22, 0x7fd3755b, 0x7fd35dce + .word 0x7fd34679, 0x7fd32f5c, 0x7fd31877, 0x7fd301c8 + .word 0x7fd2eb4e, 0x7fd2d50a, 0x7fd2bef9, 0x7fd2a91c + .word 0x7fd29372, 0x7fd27dfa, 0x7fd268b3, 0x7fd2539d + .word 0x7fd23eb7, 0x7fd22a01, 0x7fd21579, 0x7fd20120 + .word 0x7fd1ecf4, 0x7fd1d8f5, 0x7fd1c522, 0x7fd1b17c + .word 0x7fd19e01, 0x7fd18ab0, 0x7fd1778a, 0x7fd1648d + .word 0x7fd151b9, 0x7fd13f0e, 0x7fd12c8b, 0x7fd11a30 + .word 0x7fd107fb, 0x7fd0f5ed, 0x7fd0e406, 0x7fd0d244 + .word 0x7fd0c0a7, 0x7fd0af2f, 0x7fd09ddb, 0x7fd08cab + .word 0x7fd07b9f, 0x7fd06ab5, 0x7fd059ee, 0x7fd04949 + .word 0x7fd038c6, 0x7fd02864, 0x7fd01824, 0x7fd00804 + + .word 0x3ff00000, 0x00000000 ! 1.0 + .word 0xbff00000, 0x00000000 ! -1.0 + +! parr1[i] = atan((double)*(float*)&((i + 460) << 21)), i = [0, 155] + + .word 0x3f2fffff, 0xf555555c, 0x3f33ffff, 0xf595555f + .word 0x3f37ffff, 0xee000018, 0x3f3bffff, 0xe36aaadf + .word 0x3f3fffff, 0xd55555bc, 0x3f43ffff, 0xd65555f2 + .word 0x3f47ffff, 0xb8000185, 0x3f4bffff, 0x8daaadf3 + .word 0x3f4fffff, 0x55555bbc, 0x3f53ffff, 0x59555f19 + .word 0x3f57fffe, 0xe000184d, 0x3f5bfffe, 0x36aadf30 + .word 0x3f5ffffd, 0x5555bbbc, 0x3f63fffd, 0x6555f195 + .word 0x3f67fffb, 0x800184cc, 0x3f6bfff8, 0xdaadf302 + .word 0x3f6ffff5, 0x555bbbb7, 0x3f73fff5, 0x955f194a + .word 0x3f77ffee, 0x00184ca6, 0x3f7bffe3, 0x6adf2fd1 + .word 0x3f7fffd5, 0x55bbba97, 0x3f83ffd6, 0x55f1929c + .word 0x3f87ffb8, 0x0184c30a, 0x3f8bff8d, 0xadf2e78c + .word 0x3f8fff55, 0x5bbb729b, 0x3f93ff59, 0x5f18a700 + .word 0x3f97fee0, 0x184a5c36, 0x3f9bfe36, 0xdf291712 + .word 0x3f9ffd55, 0xbba97625, 0x3fa3fd65, 0xf169c9d9 + .word 0x3fa7fb81, 0x8430da2a, 0x3fabf8dd, 0xf139c444 + .word 0x3faff55b, 0xb72cfdea, 0x3fb3f59f, 0x0e7c559d + .word 0x3fb7ee18, 0x2602f10f, 0x3fbbe39e, 0xbe6f07c4 + .word 0x3fbfd5ba, 0x9aac2f6e, 0x3fc3d6ee, 0xe8c6626c + .word 0x3fc7b97b, 0x4bce5b02, 0x3fcb90d7, 0x529260a2 + .word 0x3fcf5b75, 0xf92c80dd, 0x3fd36277, 0x3707ebcc + .word 0x3fd6f619, 0x41e4def1, 0x3fda64ee, 0xc3cc23fd + .word 0x3fddac67, 0x0561bb4f, 0x3fe1e00b, 0xabdefeb4 + .word 0x3fe4978f, 0xa3269ee1, 0x3fe700a7, 0xc5784634 + .word 0x3fe921fb, 0x54442d18, 0x3fecac7c, 0x57846f9e + .word 0x3fef730b, 0xd281f69b, 0x3ff0d38f, 0x2c5ba09f + .word 0x3ff1b6e1, 0x92ebbe44, 0x3ff30b6d, 0x796a4da8 + .word 0x3ff3fc17, 0x6b7a8560, 0x3ff4ae10, 0xfc6589a5 + .word 0x3ff5368c, 0x951e9cfd, 0x3ff5f973, 0x15254857 + .word 0x3ff67d88, 0x63bc99bd, 0x3ff6dcc5, 0x7bb565fd + .word 0x3ff7249f, 0xaa996a21, 0x3ff789bd, 0x2c160054 + .word 0x3ff7cd6f, 0x6dc59db4, 0x3ff7fde8, 0x0870c2a0 + .word 0x3ff82250, 0x768ac529, 0x3ff8555a, 0x2787981f + .word 0x3ff87769, 0xeb8e956b, 0x3ff88fc2, 0x18ace9dc + .word 0x3ff8a205, 0xfd558740, 0x3ff8bb9a, 0x63718f45 + .word 0x3ff8cca9, 0x27cf0b3d, 0x3ff8d8d8, 0xbf65316f + .word 0x3ff8e1fc, 0xa98cb633, 0x3ff8eec8, 0xcfd00665 + .word 0x3ff8f751, 0x0eba96e6, 0x3ff8fd69, 0x4acf36b0 + .word 0x3ff901fb, 0x7eee715e, 0x3ff90861, 0xd082d9b5 + .word 0x3ff90ca6, 0x0b9322c5, 0x3ff90fb2, 0x37a7ea27 + .word 0x3ff911fb, 0x59997f3a, 0x3ff9152e, 0x8a326c38 + .word 0x3ff91750, 0xab2e0d12, 0x3ff918d6, 0xc2f9c9e2 + .word 0x3ff919fb, 0x54eed7a9, 0x3ff91b94, 0xee352849 + .word 0x3ff91ca5, 0xff216922, 0x3ff91d69, 0x0b3f72ff + .word 0x3ff91dfb, 0x5459826d, 0x3ff91ec8, 0x211be619 + .word 0x3ff91f50, 0xa99fd49a, 0x3ff91fb2, 0x2fb5defa + .word 0x3ff91ffb, 0x5446d7c3, 0x3ff92061, 0xbaabf105 + .word 0x3ff920a5, 0xfeefa208, 0x3ff920d6, 0xc1fb87e7 + .word 0x3ff920fb, 0x5444826e, 0x3ff9212e, 0x87778bfc + .word 0x3ff92150, 0xa9999bb6, 0x3ff92169, 0x0b1faabb + .word 0x3ff9217b, 0x544437c3, 0x3ff92194, 0xedddcc28 + .word 0x3ff921a5, 0xfeeedaec, 0x3ff921b2, 0x2fb1e5f1 + .word 0x3ff921bb, 0x54442e6e, 0x3ff921c8, 0x2110fa94 + .word 0x3ff921d0, 0xa99982d3, 0x3ff921d6, 0xc1fb08c6 + .word 0x3ff921db, 0x54442d43, 0x3ff921e1, 0xbaaa9395 + .word 0x3ff921e5, 0xfeeed7d0, 0x3ff921e9, 0x0b1f9ad7 + .word 0x3ff921eb, 0x54442d1e, 0x3ff921ee, 0x8777604e + .word 0x3ff921f0, 0xa999826f, 0x3ff921f2, 0x2fb1e3f5 + .word 0x3ff921f3, 0x54442d19, 0x3ff921f4, 0xedddc6b2 + .word 0x3ff921f5, 0xfeeed7c3, 0x3ff921f6, 0xc1fb0886 + .word 0x3ff921f7, 0x54442d18, 0x3ff921f8, 0x2110f9e5 + .word 0x3ff921f8, 0xa999826e, 0x3ff921f9, 0x0b1f9acf + .word 0x3ff921f9, 0x54442d18, 0x3ff921f9, 0xbaaa937f + .word 0x3ff921f9, 0xfeeed7c3, 0x3ff921fa, 0x2fb1e3f4 + .word 0x3ff921fa, 0x54442d18, 0x3ff921fa, 0x8777604b + .word 0x3ff921fa, 0xa999826e, 0x3ff921fa, 0xc1fb0886 + .word 0x3ff921fa, 0xd4442d18, 0x3ff921fa, 0xedddc6b2 + .word 0x3ff921fa, 0xfeeed7c3, 0x3ff921fb, 0x0b1f9acf + .word 0x3ff921fb, 0x14442d18, 0x3ff921fb, 0x2110f9e5 + .word 0x3ff921fb, 0x2999826e, 0x3ff921fb, 0x2fb1e3f4 + .word 0x3ff921fb, 0x34442d18, 0x3ff921fb, 0x3aaa937f + .word 0x3ff921fb, 0x3eeed7c3, 0x3ff921fb, 0x41fb0886 + .word 0x3ff921fb, 0x44442d18, 0x3ff921fb, 0x4777604b + .word 0x3ff921fb, 0x4999826e, 0x3ff921fb, 0x4b1f9acf + .word 0x3ff921fb, 0x4c442d18, 0x3ff921fb, 0x4dddc6b2 + .word 0x3ff921fb, 0x4eeed7c3, 0x3ff921fb, 0x4fb1e3f4 + .word 0x3ff921fb, 0x50442d18, 0x3ff921fb, 0x5110f9e5 + .word 0x3ff921fb, 0x5199826e, 0x3ff921fb, 0x51fb0886 + +#define DC2 %f2 +#define DTWO %f6 +#define DONE %f52 +#define K0 %f54 +#define K1 %f56 +#define K2 %f58 +#define DC1 %f60 +#define DC3 %f62 + +#define stridex %o2 +#define stridey %o3 +#define MASK_0x7fffffff %i1 +#define MASK_0x100000 %i5 + +#define tmp_px STACK_BIAS-32 +#define tmp_counter STACK_BIAS-24 +#define tmp0 STACK_BIAS-16 +#define tmp1 STACK_BIAS-8 + +#define counter %l1 + +! sizeof temp storage - must be a multiple of 16 for V9 +#define tmps 0x20 + +!-------------------------------------------------------------------- +! !!!!! vatanf algorithm !!!!! +! ux = ((int*)px)[0]; +! ax = ux & 0x7fffffff; +! +! if ( ax < 0x39b89c55 ) +! { +! *(int*)py = ux; +! goto next; +! } +! +! if ( ax > 0x4c700518 ) +! { +! if ( ax > 0x7f800000 ) +! { +! float fpx = fabsf(*px); +! fpx *= fpx; +! *py = fpx; +! goto next; +! } +! +! sign = ux & 0x80000000; +! sign |= pi_2; +! *(int*)py = sign; +! goto next; +! } +! +! ftmp0 = *px; +! x = (double)ftmp0; +! px += stridex; +! y = vis_fpadd32(x,DC1); +! y = vis_fand(y,DC2); +! div = x * y; +! xx = x - y; +! div += DONE; +! i = ((unsigned long long*)&div)[0]; +! y0 = vis_fand(div,DC3); +! i >>= 43; +! i &= 508; +! *(float*)&dtmp0 = *(float*)((char*)parr0 + i); +! y0 = vis_fpsub32(dtmp0, y0); +! dtmp0 = div0 * y0; +! dtmp0 = DTWO - dtmp0; +! y0 *= dtmp0; +! dtmp1 = div0 * y0; +! dtmp1 = DTWO - dtmp1; +! y0 *= dtmp1; +! ax = ux & 0x7fffffff; +! ax += 0x00100000; +! ax >>= 18; +! ax &= -8; +! res = *(double*)((char*)parr1 + ax); +! ux >>= 28; +! ux &= -8; +! dtmp0 = *(double*)((char*)sign_arr + ux); +! res *= dtmp0; +! xx *= y0; +! x2 = xx * xx; +! dtmp0 = K2 * x2; +! dtmp0 += K1; +! dtmp0 *= x2; +! dtmp0 += K0; +! dtmp0 *= xx; +! res += dtmp0; +! ftmp0 = (float)res; +! py[0] = ftmp0; +! py += stridey; +!-------------------------------------------------------------------- + + ENTRY(__vatanf) + save %sp,-SA(MINFRAME)-tmps,%sp + PIC_SETUP(l7) + PIC_SET(l7,.CONST_TBL,l2) + + st %i0,[%fp+tmp_counter] + + sllx %i2,2,stridex + sllx %i4,2,stridey + + or %g0,%i3,%o1 + stx %i1,[%fp+tmp_px] + + ldd [%l2],K0 + ldd [%l2+8],K1 + ldd [%l2+16],K2 + ldd [%l2+24],DC1 + ldd [%l2+32],DC2 + ldd [%l2+40],DC3 + ldd [%l2+48],DONE + ldd [%l2+56],DTWO + + add %l2,64,%i4 + add %l2,64+512,%l0 + add %l2,64+512+16-0x1cc*8,%l7 + + sethi %hi(0x100000),MASK_0x100000 + sethi %hi(0x7ffffc00),MASK_0x7fffffff + add MASK_0x7fffffff,1023,MASK_0x7fffffff + + sethi %hi(0x39b89c00),%o4 + add %o4,0x55,%o4 + sethi %hi(0x4c700400),%o5 + add %o5,0x118,%o5 + +.begin: + ld [%fp+tmp_counter],counter + ldx [%fp+tmp_px],%i3 + st %g0,[%fp+tmp_counter] +.begin1: + cmp counter,0 + ble,pn %icc,.exit + nop + + lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0]; + + and %l6,MASK_0x7fffffff,%l5 ! (0_0) ax = ux & 0x7fffffff; + lda [%i3]0x82,%f0 ! (0_0) ftmp0 = *px; + + cmp %l5,%o4 ! (0_0) ax ? 0x39b89c55 + bl,pn %icc,.spec0 ! (0_0) if ( ax < 0x39b89c55 ) + nop + + cmp %l5,%o5 ! (0_0) ax ? 0x4c700518 + bg,pn %icc,.spec1 ! (0_0) if ( ax > 0x4c700518 ) + nop + + add %i3,stridex,%l5 ! px += stridex; + fstod %f0,%f22 ! (0_0) ftmp0 = *px; + mov %l6,%i3 + + lda [%l5]0x82,%l6 ! (1_0) ux = ((int*)px)[0]; + + and %l6,MASK_0x7fffffff,%o7 ! (1_0) ax = ux & 0x7fffffff; + lda [%l5]0x82,%f0 ! (1_0) ftmp0 = *px; + add %l5,stridex,%l4 ! px += stridex; + fpadd32 %f22,DC1,%f24 ! (0_0) y = vis_fpadd32(x,dconst1); + + cmp %o7,%o4 ! (1_0) ax ? 0x39b89c55 + bl,pn %icc,.update0 ! (1_0) if ( ax < 0x39b89c55 ) + nop +.cont0: + cmp %o7,%o5 ! (1_0) ax ? 0x4c700518 + bg,pn %icc,.update1 ! (1_0) if ( ax > 0x4c700518 ) + nop +.cont1: + fstod %f0,%f20 ! (1_0) x = (double)ftmp0; + mov %l6,%l5 + + fand %f24,DC2,%f26 ! (0_0) y = vis_fand(y,dconst2); + + fmuld %f22,%f26,%f32 ! (0_0) div = x * y; + + lda [%l4]0x82,%l6 ! (2_0) ux = ((int*)px)[0]; + fsubd %f22,%f26,%f22 ! (0_0) xx = x - y; + + and %l6,MASK_0x7fffffff,%o7 ! (2_0) ax = ux & 0x7fffffff; + lda [%l4]0x82,%f0 ! (2_0) ftmp0 = *px; + add %l4,stridex,%l3 ! px += stridex; + fpadd32 %f20,DC1,%f24 ! (1_0) y = vis_fpadd32(x,dconst1); + + cmp %o7,%o4 ! (2_0) ax ? 0x39b89c55 + bl,pn %icc,.update2 ! (2_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f32,%f32 ! (0_0) div += done; +.cont2: + cmp %o7,%o5 ! (2_0) ax ? 0x4c700518 + bg,pn %icc,.update3 ! (2_0) if ( ax > 0x4c700518 ) + nop +.cont3: + std %f32,[%fp+tmp0] ! (0_0) i = ((unsigned long long*)&div)[0]; + mov %l6,%l4 + fstod %f0,%f18 ! (2_0) x = (double)ftmp0; + + fand %f24,DC2,%f26 ! (1_0) y = vis_fand(y,dconst2); + + fmuld %f20,%f26,%f30 ! (1_0) div = x * y; + + lda [%l3]0x82,%l6 ! (3_0) ux = ((int*)px)[0]; + fsubd %f20,%f26,%f20 ! (1_0) xx = x - y; + + and %l6,MASK_0x7fffffff,%o7 ! (3_0) ax = ux & 0x7fffffff; + lda [%l3]0x82,%f0 ! (3_0) ftmp0 = *px; + add %l3,stridex,%i0 ! px += stridex; + fpadd32 %f18,DC1,%f24 ! (2_0) y = vis_fpadd32(x,dconst1); + + cmp %o7,%o4 ! (3_0) ax ? 0x39b89c55 + bl,pn %icc,.update4 ! (3_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f30,%f30 ! (1_0) div += done; +.cont4: + cmp %o7,%o5 ! (3_0) ax ? 0x4c700518 + bg,pn %icc,.update5 ! (3_0) if ( ax > 0x4c700518 ) + nop +.cont5: + std %f30,[%fp+tmp1] ! (1_0) i = ((unsigned long long*)&div)[0]; + mov %l6,%l3 + fstod %f0,%f16 ! (3_0) x = (double)ftmp0; + + ldx [%fp+tmp0],%o0 ! (0_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (2_0) y = vis_fand(y,dconst2); + + fand %f32,DC3,%f24 ! (0_0) y0 = vis_fand(div,dconst3); + + srlx %o0,43,%o0 ! (0_0) i >>= 43; + + and %o0,508,%l6 ! (0_0) i &= 508; + + ld [%i4+%l6],%f0 ! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + + fmuld %f18,%f26,%f28 ! (2_0) div = x * y; + + lda [%i0]0x82,%l6 ! (4_0) ux = ((int*)px)[0]; + fsubd %f18,%f26,%f18 ! (2_0) xx = x - y; + + fpsub32 %f0,%f24,%f40 ! (0_0) y0 = vis_fpsub32(dtmp0, y0); + + and %l6,MASK_0x7fffffff,%o7 ! (4_0) ax = ux & 0x7fffffff; + lda [%i0]0x82,%f0 ! (4_0) ftmp0 = *px; + add %i0,stridex,%i2 ! px += stridex; + fpadd32 %f16,DC1,%f24 ! (3_0) y = vis_fpadd32(x,dconst1); + + cmp %o7,%o4 ! (4_0) ax ? 0x39b89c55 + bl,pn %icc,.update6 ! (4_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f28,%f28 ! (2_0) div += done; +.cont6: + fmuld %f32,%f40,%f42 ! (0_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (4_0) ax ? 0x4c700518 + bg,pn %icc,.update7 ! (4_0) if ( ax > 0x4c700518 ) + nop +.cont7: + std %f28,[%fp+tmp0] ! (2_0) i = ((unsigned long long*)&div)[0]; + mov %l6,%i0 + fstod %f0,%f14 ! (4_0) x = (double)ftmp0; + + ldx [%fp+tmp1],%g1 ! (1_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (3_0) y = vis_fand(y,dconst2); + + fand %f30,DC3,%f24 ! (1_0) y0 = vis_fand(div,dconst3); + + fsubd DTWO,%f42,%f44 ! (0_0) dtmp0 = dtwo - dtmp0; + srlx %g1,43,%g1 ! (1_0) i >>= 43; + + and %g1,508,%l6 ! (1_0) i &= 508; + + ld [%i4+%l6],%f0 ! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + + fmuld %f16,%f26,%f34 ! (3_0) div = x * y; + + lda [%i2]0x82,%l6 ! (5_0) ux = ((int*)px)[0]; + fsubd %f16,%f26,%f16 ! (3_0) xx = x - y; + + fpsub32 %f0,%f24,%f38 ! (1_0) y0 = vis_fpsub32(dtmp0, y0); + add %i2,stridex,%l2 ! px += stridex; + + fmuld %f40,%f44,%f40 ! (0_0) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (5_0) ax = ux & 0x7fffffff; + lda [%i2]0x82,%f0 ! (5_0) ftmp0 = *px; + fpadd32 %f14,DC1,%f24 ! (4_0) y = vis_fpadd32(x,dconst1); + + cmp %o7,%o4 ! (5_0) ax ? 0x39b89c55 + bl,pn %icc,.update8 ! (5_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f34,%f34 ! (3_0) div += done; +.cont8: + fmuld %f30,%f38,%f42 ! (1_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (5_0) ax ? 0x4c700518 + bg,pn %icc,.update9 ! (5_0) if ( ax > 0x4c700518 ) + nop +.cont9: + std %f34,[%fp+tmp1] ! (3_0) i = ((unsigned long long*)&div)[0]; + mov %l6,%i2 + fstod %f0,%f36 ! (5_0) x = (double)ftmp0; + + fmuld %f32,%f40,%f32 ! (0_0) dtmp1 = div0 * y0; + ldx [%fp+tmp0],%o0 ! (2_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (4_0) y = vis_fand(y,dconst2); + + fand %f28,DC3,%f24 ! (2_0) y0 = vis_fand(div,dconst3); + + fsubd DTWO,%f42,%f44 ! (1_0) dtmp0 = dtwo - dtmp0; + srlx %o0,43,%o0 ! (2_0) i >>= 43; + + and %o0,508,%l6 ! (2_0) i &= 508; + fsubd DTWO,%f32,%f46 ! (0_0) dtmp1 = dtwo - dtmp1; + + ld [%i4+%l6],%f0 ! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + + fmuld %f14,%f26,%f32 ! (4_0) div = x * y; + + lda [%l2]0x82,%l6 ! (6_0) ux = ((int*)px)[0]; + fsubd %f14,%f26,%f14 ! (4_0) xx = x - y; + + fmuld %f40,%f46,%f26 ! (0_0) y0 *= dtmp1; + add %l2,stridex,%g5 ! px += stridex; + fpsub32 %f0,%f24,%f40 ! (2_0) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f38,%f44,%f38 ! (1_0) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (6_0) ax = ux & 0x7fffffff; + lda [%l2]0x82,%f0 ! (6_0) ftmp0 = *px; + fpadd32 %f36,DC1,%f24 ! (5_0) y = vis_fpadd32(x,dconst1); + + cmp %o7,%o4 ! (6_0) ax ? 0x39b89c55 + bl,pn %icc,.update10 ! (6_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f32,%f32 ! (4_0) div += done; +.cont10: + fmuld %f28,%f40,%f42 ! (2_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (6_0) ax ? 0x4c700518 + bg,pn %icc,.update11 ! (6_0) if ( ax > 0x4c700518 ) + nop +.cont11: + fmuld %f22,%f26,%f22 ! (0_0) xx *= y0; + mov %l6,%l2 + std %f32,[%fp+tmp0] ! (4_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f10 ! (6_0) x = (double)ftmp0; + + fmuld %f30,%f38,%f30 ! (1_0) dtmp1 = div0 * y0; + ldx [%fp+tmp1],%g1 ! (3_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (5_0) y = vis_fand(y,dconst2); + + fand %f34,DC3,%f24 ! (3_0) y0 = vis_fand(div,dconst3); + + fmuld %f22,%f22,%f50 ! (0_0) x2 = xx * xx; + srlx %g1,43,%g1 ! (3_0) i >>= 43; + fsubd DTWO,%f42,%f44 ! (2_0) dtmp0 = dtwo - dtmp0; + + and %g1,508,%l6 ! (3_0) i &= 508; + mov %i3,%o7 + fsubd DTWO,%f30,%f46 ! (1_0) dtmp1 = dtwo - dtmp1; + + ld [%i4+%l6],%f0 ! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + + fmuld %f36,%f26,%f30 ! (5_0) div = x * y; + srl %o7,28,%g1 ! (0_0) ux >>= 28; + add %g5,stridex,%i3 ! px += stridex; + + fmuld K2,%f50,%f4 ! (0_0) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o0 ! (0_0) ax = ux & 0x7fffffff; + lda [%g5]0x82,%l6 ! (7_0) ux = ((int*)px)[0]; + fsubd %f36,%f26,%f36 ! (5_0) xx = x - y; + + fmuld %f38,%f46,%f26 ! (1_0) y0 *= dtmp1; + add %o0,MASK_0x100000,%o0 ! (0_0) ax += 0x00100000; + and %g1,-8,%g1 ! (0_0) ux &= -8; + fpsub32 %f0,%f24,%f38 ! (3_0) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f40,%f44,%f40 ! (2_0) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (7_0) ax = ux & 0x7fffffff; + lda [%g5]0x82,%f0 ! (7_0) ftmp0 = *px; + fpadd32 %f10,DC1,%f24 ! (6_0) y = vis_fpadd32(x,dconst1); + + cmp %o7,%o4 ! (7_0) ax ? 0x39b89c55 + bl,pn %icc,.update12 ! (7_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f30,%f30 ! (5_0) div += done; +.cont12: + fmuld %f34,%f38,%f42 ! (3_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (7_0) ax ? 0x4c700518 + bg,pn %icc,.update13 ! (7_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (0_0) dtmp0 += K1; +.cont13: + fmuld %f20,%f26,%f20 ! (1_0) xx *= y0; + srl %o0,18,%o7 ! (0_0) ax >>= 18; + std %f30,[%fp+tmp1] ! (5_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f8 ! (7_0) x = (double)ftmp0; + + fmuld %f28,%f40,%f28 ! (2_0) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (0_0) ux &= -8; + ldx [%fp+tmp0],%o0 ! (4_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (6_0) y = vis_fand(y,dconst2); + + add %o7,%l7,%o7 ! (0_0) (char*)parr1 + ax; + mov %l6,%g5 + ldd [%l0+%g1],%f48 ! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux); + + fmuld %f4,%f50,%f4 ! (0_0) dtmp0 *= x2; + srlx %o0,43,%o0 ! (4_0) i >>= 43; + ldd [%o7],%f0 ! (0_0) res = *(double*)((char*)parr1 + ax); + fand %f32,DC3,%f24 ! (4_0) y0 = vis_fand(div,dconst3); + + fmuld %f20,%f20,%f50 ! (1_0) x2 = xx * xx; + and %o0,508,%l6 ! (4_0) i &= 508; + mov %l5,%o7 + fsubd DTWO,%f42,%f44 ! (3_0) dtmp0 = dtwo - dtmp0; + + fsubd DTWO,%f28,%f46 ! (2_0) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (0_0) res *= dtmp0; + srl %o7,28,%l5 ! (1_0) ux >>= 28; + ld [%i4+%l6],%f0 ! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + + fmuld %f10,%f26,%f28 ! (6_0) div = x * y; + faddd %f4,K0,%f42 ! (0_0) dtmp0 += K0; + + subcc counter,8,counter + bneg,pn %icc,.tail + or %g0,%o1,%o0 + + add %fp,tmp0,%g1 + lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0]; + + ba .main_loop + add %i3,stridex,%l5 ! px += stridex; + + .align 16 +.main_loop: + fsubd %f10,%f26,%f10 ! (6_1) xx = x - y; + and %o7,MASK_0x7fffffff,%o1 ! (1_1) ax = ux & 0x7fffffff; + st %f12,[%g1] ! (7_1) py[0] = ftmp0; + fmuld K2,%f50,%f4 ! (1_1) dtmp0 = K2 * x2; + + fmuld %f40,%f46,%f26 ! (2_1) y0 *= dtmp1; + srl %o7,28,%o7 ! (1_0) ux >>= 28; + add %o1,MASK_0x100000,%g1 ! (1_1) ax += 0x00100000; + fpsub32 %f0,%f24,%f40 ! (4_1) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f38,%f44,%f38 ! (3_1) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o1 ! (0_0) ax = ux & 0x7fffffff; + lda [%i3]0x82,%f0 ! (0_0) ftmp0 = *px; + fpadd32 %f8,DC1,%f24 ! (7_1) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f22,%f44 ! (0_1) dtmp0 *= xx; + cmp %o1,%o4 ! (0_0) ax ? 0x39b89c55 + bl,pn %icc,.update14 ! (0_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f28,%f28 ! (6_1) div += done; +.cont14: + fmuld %f32,%f40,%f42 ! (4_1) dtmp0 = div0 * y0; + cmp %o1,%o5 ! (0_0) ax ? 0x4c700518 + bg,pn %icc,.update15 ! (0_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (1_1) dtmp0 += K1; +.cont15: + fmuld %f18,%f26,%f18 ! (2_1) xx *= y0; + srl %g1,18,%o1 ! (1_1) ax >>= 18; + std %f28,[%fp+tmp0] ! (6_1) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f22 ! (0_0) ftmp0 = *px; + + fmuld %f34,%f38,%f34 ! (3_1) dtmp1 = div0 * y0; + and %o1,-8,%o1 ! (1_1) ax &= -8; + ldx [%fp+tmp1],%g1 ! (5_1) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (7_1) y = vis_fand(y,dconst2); + + ldd [%o1+%l7],%f0 ! (1_1) res = *(double*)((char*)parr1 + ax); + and %o7,-8,%o7 ! (1_1) ux &= -8; + mov %l6,%i3 + faddd %f48,%f44,%f12 ! (0_1) res += dtmp0; + + fmuld %f4,%f50,%f4 ! (1_1) dtmp0 *= x2; + nop + ldd [%l0+%o7],%f48 ! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux); + fand %f30,DC3,%f24 ! (5_1) y0 = vis_fand(div,dconst3); + + fmuld %f18,%f18,%f50 ! (2_1) x2 = xx * xx; + srlx %g1,43,%g1 ! (5_1) i >>= 43; + mov %l4,%o7 + fsubd DTWO,%f42,%f44 ! (4_1) dtmp0 = dtwo - dtmp0; + + and %g1,508,%l6 ! (5_1) i &= 508; + nop + bn,pn %icc,.exit + fsubd DTWO,%f34,%f46 ! (3_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (1_1) res *= dtmp0; + add %o0,stridey,%g1 ! py += stridey; + ld [%i4+%l6],%f0 ! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (0_1) ftmp0 = (float)res; + + fmuld %f8,%f26,%f34 ! (7_1) div = x * y; + srl %o7,28,%o1 ! (2_1) ux >>= 28; + lda [%l5]0x82,%l6 ! (1_0) ux = ((int*)px)[0]; + faddd %f4,K0,%f42 ! (1_1) dtmp0 += K0; + + fmuld K2,%f50,%f4 ! (2_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o7 ! (2_1) ax = ux & 0x7fffffff; + st %f12,[%o0] ! (0_1) py[0] = ftmp0; + fsubd %f8,%f26,%f8 ! (7_1) xx = x - y; + + fmuld %f38,%f46,%f26 ! (3_1) y0 *= dtmp1; + add %l5,stridex,%l4 ! px += stridex; + add %o7,MASK_0x100000,%o0 ! (2_1) ax += 0x00100000; + fpsub32 %f0,%f24,%f38 ! (5_1) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f40,%f44,%f40 ! (4_1) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (1_0) ax = ux & 0x7fffffff; + lda [%l5]0x82,%f0 ! (1_0) ftmp0 = *px; + fpadd32 %f22,DC1,%f24 ! (0_0) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f20,%f44 ! (1_1) dtmp0 *= xx; + cmp %o7,%o4 ! (1_0) ax ? 0x39b89c55 + bl,pn %icc,.update16 ! (1_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f34,%f34 ! (7_1) div += done; +.cont16: + fmuld %f30,%f38,%f42 ! (5_1) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (1_0) ax ? 0x4c700518 + bg,pn %icc,.update17 ! (1_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (2_1) dtmp0 += K1; +.cont17: + fmuld %f16,%f26,%f16 ! (3_1) xx *= y0; + srl %o0,18,%o7 ! (2_1) ax >>= 18; + std %f34,[%fp+tmp1] ! (7_1) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f20 ! (1_0) x = (double)ftmp0; + + fmuld %f32,%f40,%f32 ! (4_1) dtmp1 = div0 * y0; + ldx [%fp+tmp0],%o0 ! (6_1) i = ((unsigned long long*)&div)[0]; + and %o1,-8,%o1 ! (2_1) ux &= -8; + fand %f24,DC2,%f26 ! (0_0) y = vis_fand(y,dconst2); + + faddd %f48,%f44,%f12 ! (1_1) res += dtmp0; + and %o7,-8,%o7 ! (2_1) ax &= -8; + ldd [%l0+%o1],%f48 ! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux); + bn,pn %icc,.exit + + ldd [%o7+%l7],%f0 ! (2_1) res = *(double*)((char*)parr1 + ax); + mov %l6,%l5 + fmuld %f4,%f50,%f4 ! (2_1) dtmp0 *= x2; + fand %f28,DC3,%f24 ! (6_1) y0 = vis_fand(div,dconst3); + + fmuld %f16,%f16,%f50 ! (3_1) x2 = xx * xx; + srlx %o0,43,%o0 ! (6_1) i >>= 43; + mov %l3,%o7 + fsubd DTWO,%f42,%f44 ! (5_1) dtmp0 = dtwo - dtmp0; + + and %o0,508,%l6 ! (6_1) i &= 508; + add %l4,stridex,%l3 ! px += stridex; + bn,pn %icc,.exit + fsubd DTWO,%f32,%f46 ! (4_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (2_1) res *= dtmp0; + add %g1,stridey,%o0 ! py += stridey; + ld [%i4+%l6],%f0 ! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (1_1) ftmp0 = (float)res; + + fmuld %f22,%f26,%f32 ! (0_0) div = x * y; + srl %o7,28,%o1 ! (3_1) ux >>= 28; + lda [%l4]0x82,%l6 ! (2_0) ux = ((int*)px)[0]; + faddd %f4,K0,%f42 ! (2_1) dtmp0 += K0; + + fmuld K2,%f50,%f4 ! (3_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o7 ! (3_1) ax = ux & 0x7fffffff; + st %f12,[%g1] ! (1_1) py[0] = ftmp0; + fsubd %f22,%f26,%f22 ! (0_0) xx = x - y; + + fmuld %f40,%f46,%f26 ! (4_1) y0 *= dtmp1; + add %o7,MASK_0x100000,%g1 ! (3_1) ax += 0x00100000; + and %o1,-8,%o1 ! (3_1) ux &= -8; + fpsub32 %f0,%f24,%f40 ! (6_1) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f38,%f44,%f38 ! (5_1) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (2_0) ax = ux & 0x7fffffff; + lda [%l4]0x82,%f0 ! (2_0) ftmp0 = *px; + fpadd32 %f20,DC1,%f24 ! (1_0) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f18,%f44 ! (2_1) dtmp0 *= xx; + cmp %o7,%o4 ! (2_0) ax ? 0x39b89c55 + bl,pn %icc,.update18 ! (2_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f32,%f32 ! (0_0) div += done; +.cont18: + fmuld %f28,%f40,%f42 ! (6_1) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (2_0) ax ? 0x4c700518 + bg,pn %icc,.update19 ! (2_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (3_1) dtmp0 += K1; +.cont19: + fmuld %f14,%f26,%f14 ! (4_1) xx *= y0; + srl %g1,18,%o7 ! (3_1) ax >>= 18; + std %f32,[%fp+tmp0] ! (0_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f18 ! (2_0) x = (double)ftmp0; + + fmuld %f30,%f38,%f30 ! (5_1) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (3_1) ax &= -8; + ldx [%fp+tmp1],%g1 ! (7_1) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (1_0) y = vis_fand(y,dconst2); + + faddd %f48,%f44,%f12 ! (2_1) res += dtmp0; + mov %l6,%l4 + ldd [%l0+%o1],%f48 ! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux); + bn,pn %icc,.exit + + fmuld %f4,%f50,%f4 ! (3_1) dtmp0 *= x2; + ldd [%o7+%l7],%f0 ! (3_1) res = *(double*)((char*)parr1 + ax) + nop + fand %f34,DC3,%f24 ! (7_1) y0 = vis_fand(div,dconst3); + + fmuld %f14,%f14,%f50 ! (4_1) x2 = xx * xx; + srlx %g1,43,%g1 ! (7_1) i >>= 43; + mov %i0,%o7 + fsubd DTWO,%f42,%f44 ! (6_1) dtmp0 = dtwo - dtmp0; + + and %g1,508,%l6 ! (7_1) i &= 508; + add %l3,stridex,%i0 ! px += stridex; + bn,pn %icc,.exit + fsubd DTWO,%f30,%f46 ! (5_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (3_1) res *= dtmp0; + add %o0,stridey,%g1 ! py += stridey; + ld [%i4+%l6],%f0 ! (7_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (2_1) ftmp0 = (float)res; + + fmuld %f20,%f26,%f30 ! (1_0) div = x * y; + srl %o7,28,%o1 ! (4_1) ux >>= 28; + lda [%l3]0x82,%l6 ! (3_0) ux = ((int*)px)[0]; + faddd %f4,K0,%f42 ! (3_1) dtmp0 += K0; + + fmuld K2,%f50,%f4 ! (4_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o7 ! (4_1) ax = ux & 0x7fffffff; + st %f12,[%o0] ! (2_1) py[0] = ftmp0; + fsubd %f20,%f26,%f20 ! (1_0) xx = x - y; + + fmuld %f38,%f46,%f26 ! (5_1) y0 *= dtmp1; + add %o7,MASK_0x100000,%o0 ! (4_1) ax += 0x00100000; + and %o1,-8,%o1 ! (4_1) ux &= -8; + fpsub32 %f0,%f24,%f38 ! (7_1) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f40,%f44,%f40 ! (6_1) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (3_0) ax = ux & 0x7fffffff; + lda [%l3]0x82,%f0 ! (3_0) ftmp0 = *px; + fpadd32 %f18,DC1,%f24 ! (2_0) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f16,%f44 ! (3_1) dtmp0 *= xx; + cmp %o7,%o4 ! (3_0) ax ? 0x39b89c55 + bl,pn %icc,.update20 ! (3_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f30,%f30 ! (1_0) div += done; +.cont20: + fmuld %f34,%f38,%f42 ! (7_1) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (3_0) ax ? 0x4c700518 + bg,pn %icc,.update21 ! (3_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (4_1) dtmp0 += K1; +.cont21: + fmuld %f36,%f26,%f36 ! (5_1) xx *= y0; + srl %o0,18,%o7 ! (4_1) ax >>= 18; + std %f30,[%fp+tmp1] ! (1_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f16 ! (3_0) x = (double)ftmp0; + + fmuld %f28,%f40,%f28 ! (6_1) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (4_1) ax &= -8; + ldx [%fp+tmp0],%o0 ! (0_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (2_0) y = vis_fand(y,dconst2); + + faddd %f48,%f44,%f12 ! (3_1) res += dtmp0; + nop + ldd [%l0+%o1],%f48 ! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux); + bn,pn %icc,.exit + + ldd [%o7+%l7],%f0 ! (4_1) res = *(double*)((char*)parr1 + ax); + mov %l6,%l3 + fmuld %f4,%f50,%f4 ! (4_1) dtmp0 *= x2; + fand %f32,DC3,%f24 ! (0_0) y0 = vis_fand(div,dconst3); + + fmuld %f36,%f36,%f50 ! (5_1) x2 = xx * xx; + srlx %o0,43,%o0 ! (0_0) i >>= 43; + mov %i2,%o7 + fsubd DTWO,%f42,%f44 ! (7_1) dtmp0 = dtwo - dtmp0; + + and %o0,508,%l6 ! (0_0) i &= 508; + add %i0,stridex,%i2 ! px += stridex; + bn,pn %icc,.exit + fsubd DTWO,%f28,%f46 ! (6_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (4_1) res *= dtmp0; + add %g1,stridey,%o0 ! py += stridey; + ld [%i4+%l6],%f0 ! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (3_1) ftmp0 = (float)res; + + fmuld %f18,%f26,%f28 ! (2_0) div = x * y; + srl %o7,28,%o1 ! (5_1) ux >>= 28; + lda [%i0]0x82,%l6 ! (4_0) ux = ((int*)px)[0]; + faddd %f4,K0,%f42 ! (4_1) dtmp0 += K0; + + fmuld K2,%f50,%f4 ! (5_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o7 ! (5_1) ax = ux & 0x7fffffff; + st %f12,[%g1] ! (3_1) py[0] = ftmp0; + fsubd %f18,%f26,%f18 ! (2_0) xx = x - y; + + fmuld %f40,%f46,%f26 ! (6_1) y0 *= dtmp1; + add %o7,MASK_0x100000,%g1 ! (5_1) ax += 0x00100000; + and %o1,-8,%o1 ! (5_1) ux &= -8; + fpsub32 %f0,%f24,%f40 ! (0_0) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f38,%f44,%f38 ! (7_1) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (4_0) ax = ux & 0x7fffffff; + lda [%i0]0x82,%f0 ! (4_0) ftmp0 = *px; + fpadd32 %f16,DC1,%f24 ! (3_0) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f14,%f44 ! (4_1) dtmp0 *= xx; + cmp %o7,%o4 ! (4_0) ax ? 0x39b89c55 + bl,pn %icc,.update22 ! (4_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f28,%f28 ! (2_0) div += done; +.cont22: + fmuld %f32,%f40,%f42 ! (0_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (4_0) ax ? 0x4c700518 + bg,pn %icc,.update23 ! (4_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (5_1) dtmp0 += K1; +.cont23: + fmuld %f10,%f26,%f10 ! (6_1) xx *= y0; + srl %g1,18,%o7 ! (5_1) ax >>= 18; + std %f28,[%fp+tmp0] ! (2_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f14 ! (4_0) x = (double)ftmp0; + + fmuld %f34,%f38,%f34 ! (7_1) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (5_1) ax &= -8; + ldx [%fp+tmp1],%g1 ! (1_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (3_0) y = vis_fand(y,dconst2); + + faddd %f48,%f44,%f12 ! (4_1) res += dtmp0; + mov %l6,%i0 + ldd [%l0+%o1],%f48 ! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux); + bn,pn %icc,.exit + + ldd [%o7+%l7],%f0 ! (5_1) res = *(double*)((char*)parr1 + ax); + nop + fmuld %f4,%f50,%f4 ! (5_1) dtmp0 *= x2; + fand %f30,DC3,%f24 ! (1_0) y0 = vis_fand(div,dconst3); + + fmuld %f10,%f10,%f50 ! (6_1) x2 = xx * xx; + srlx %g1,43,%g1 ! (1_0) i >>= 43; + mov %l2,%o7 + fsubd DTWO,%f42,%f44 ! (0_0) dtmp0 = dtwo - dtmp0; + + and %g1,508,%l6 ! (1_0) i &= 508; + add %i2,stridex,%l2 ! px += stridex; + bn,pn %icc,.exit + fsubd DTWO,%f34,%f46 ! (7_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (5_1) res *= dtmp0; + add %o0,stridey,%g1 ! py += stridey; + ld [%i4+%l6],%f0 ! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (4_1) ftmp0 = (float)res; + + fmuld %f16,%f26,%f34 ! (3_0) div = x * y; + srl %o7,28,%o1 ! (6_1) ux >>= 28; + lda [%i2]0x82,%l6 ! (5_0) ux = ((int*)px)[0]; + faddd %f4,K0,%f42 ! (5_1) dtmp0 += K0; + + fmuld K2,%f50,%f4 ! (6_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o7 ! (6_1) ax = ux & 0x7fffffff; + st %f12,[%o0] ! (4_1) py[0] = ftmp0; + fsubd %f16,%f26,%f16 ! (3_0) xx = x - y; + + fmuld %f38,%f46,%f26 ! (7_1) y0 *= dtmp1; + add %o7,MASK_0x100000,%o0 ! (6_1) ax += 0x00100000; + and %o1,-8,%o1 ! (6_1) ux &= -8; + fpsub32 %f0,%f24,%f38 ! (1_0) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f40,%f44,%f40 ! (0_0) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (5_0) ax = ux & 0x7fffffff; + lda [%i2]0x82,%f0 ! (5_0) ftmp0 = *px; + fpadd32 %f14,DC1,%f24 ! (4_0) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f36,%f44 ! (5_1) dtmp0 *= xx; + cmp %o7,%o4 ! (5_0) ax ? 0x39b89c55 + bl,pn %icc,.update24 ! (5_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f34,%f34 ! (3_0) div += done; +.cont24: + fmuld %f30,%f38,%f42 ! (1_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (5_0) ax ? 0x4c700518 + bg,pn %icc,.update25 ! (5_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (6_1) dtmp0 += K1; +.cont25: + fmuld %f8,%f26,%f8 ! (7_1) xx *= y0; + srl %o0,18,%o7 ! (6_1) ax >>= 18; + std %f34,[%fp+tmp1] ! (3_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f36 ! (5_0) x = (double)ftmp0; + + fmuld %f32,%f40,%f32 ! (0_0) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (6_1) ax &= -8; + ldx [%fp+tmp0],%o0 ! (2_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (4_0) y = vis_fand(y,dconst2); + + faddd %f48,%f44,%f12 ! (5_1) res += dtmp0; + mov %l6,%i2 + ldd [%l0+%o1],%f48 ! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux); + bn,pn %icc,.exit + + ldd [%o7+%l7],%f0 ! (6_1) res = *(double*)((char*)parr1 + ax); + nop + fmuld %f4,%f50,%f4 ! (6_1) dtmp0 *= x2; + fand %f28,DC3,%f24 ! (2_0) y0 = vis_fand(div,dconst3); + + fmuld %f8,%f8,%f50 ! (7_1) x2 = xx * xx; + srlx %o0,43,%o0 ! (2_0) i >>= 43; + mov %g5,%o7 + fsubd DTWO,%f42,%f44 ! (1_0) dtmp0 = dtwo - dtmp0; + + and %o0,508,%l6 ! (2_0) i &= 508; + add %l2,stridex,%g5 ! px += stridex; + bn,pn %icc,.exit + fsubd DTWO,%f32,%f46 ! (0_0) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (6_1) res *= dtmp0; + add %g1,stridey,%o0 ! py += stridey; + ld [%i4+%l6],%f0 ! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (5_1) ftmp0 = (float)res; + + fmuld %f14,%f26,%f32 ! (4_0) div = x * y; + srl %o7,28,%o1 ! (7_1) ux >>= 28; + lda [%l2]0x82,%l6 ! (6_0) ux = ((int*)px)[0]; + faddd %f4,K0,%f42 ! (6_1) dtmp0 += K0; + + fmuld K2,%f50,%f4 ! (7_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o7 ! (7_1) ax = ux & 0x7fffffff; + st %f12,[%g1] ! (5_1) py[0] = ftmp0; + fsubd %f14,%f26,%f14 ! (4_0) xx = x - y; + + fmuld %f40,%f46,%f26 ! (0_0) y0 *= dtmp1; + add %o7,MASK_0x100000,%g1 ! (7_1) ax += 0x00100000; + and %o1,-8,%o1 ! (7_1) ux &= -8; + fpsub32 %f0,%f24,%f40 ! (2_0) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f38,%f44,%f38 ! (1_0) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (6_0) ax = ux & 0x7fffffff; + lda [%l2]0x82,%f0 ! (6_0) ftmp0 = *px; + fpadd32 %f36,DC1,%f24 ! (5_0) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f10,%f44 ! (6_1) dtmp0 *= xx; + cmp %o7,%o4 ! (6_0) ax ? 0x39b89c55 + bl,pn %icc,.update26 ! (6_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f32,%f32 ! (4_0) div += done; +.cont26: + fmuld %f28,%f40,%f42 ! (2_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (6_0) ax ? 0x4c700518 + bg,pn %icc,.update27 ! (6_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (7_1) dtmp0 += K1; +.cont27: + fmuld %f22,%f26,%f22 ! (0_0) xx *= y0; + srl %g1,18,%o7 ! (7_1) ax >>= 18; + std %f32,[%fp+tmp0] ! (4_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f10 ! (6_0) x = (double)ftmp0; + + fmuld %f30,%f38,%f30 ! (1_0) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (7_1) ax &= -8; + ldx [%fp+tmp1],%g1 ! (3_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (5_0) y = vis_fand(y,dconst2); + + faddd %f48,%f44,%f12 ! (6_1) res += dtmp0; + mov %l6,%l2 + ldd [%l0+%o1],%f48 ! (7_1) dtmp0 = *(double*)((char*)sign_arr + ux); + bn,pn %icc,.exit + + ldd [%o7+%l7],%f0 ! (7_1) res = *(double*)((char*)parr1 + ax); + nop + fmuld %f4,%f50,%f4 ! (7_1) dtmp0 *= x2; + fand %f34,DC3,%f24 ! (3_0) y0 = vis_fand(div,dconst3); + + fmuld %f22,%f22,%f50 ! (0_0) x2 = xx * xx; + srlx %g1,43,%g1 ! (3_0) i >>= 43; + mov %i3,%o7 + fsubd DTWO,%f42,%f44 ! (2_0) dtmp0 = dtwo - dtmp0; + + and %g1,508,%l6 ! (3_0) i &= 508; + add %g5,stridex,%i3 ! px += stridex; + bn,pn %icc,.exit + fsubd DTWO,%f30,%f46 ! (1_0) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (7_1) res *= dtmp0; + add %o0,stridey,%g1 ! py += stridey; + ld [%i4+%l6],%f0 ! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (6_1) ftmp0 = (float)res; + + fmuld %f36,%f26,%f30 ! (5_0) div = x * y; + srl %o7,28,%o1 ! (0_0) ux >>= 28; + lda [%g5]0x82,%l6 ! (7_0) ux = ((int*)px)[0]; + faddd %f4,K0,%f42 ! (7_1) dtmp0 += K0; + + fmuld K2,%f50,%f4 ! (0_0) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o7 ! (0_0) ax = ux & 0x7fffffff; + st %f12,[%o0] ! (6_1) py[0] = ftmp0; + fsubd %f36,%f26,%f36 ! (5_0) xx = x - y; + + fmuld %f38,%f46,%f26 ! (1_0) y0 *= dtmp1; + add %o7,MASK_0x100000,%o0 ! (0_0) ax += 0x00100000; + and %o1,-8,%o1 ! (0_0) ux &= -8; + fpsub32 %f0,%f24,%f38 ! (3_0) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f40,%f44,%f40 ! (2_0) y0 *= dtmp0; + and %l6,MASK_0x7fffffff,%o7 ! (7_0) ax = ux & 0x7fffffff; + lda [%g5]0x82,%f0 ! (7_0) ftmp0 = *px; + fpadd32 %f10,DC1,%f24 ! (6_0) y = vis_fpadd32(x,dconst1); + + fmuld %f42,%f8,%f44 ! (7_1) dtmp0 *= xx; + cmp %o7,%o4 ! (7_0) ax ? 0x39b89c55 + bl,pn %icc,.update28 ! (7_0) if ( ax < 0x39b89c55 ) + faddd DONE,%f30,%f30 ! (5_0) div += done; +.cont28: + fmuld %f34,%f38,%f42 ! (3_0) dtmp0 = div0 * y0; + cmp %o7,%o5 ! (7_0) ax ? 0x4c700518 + bg,pn %icc,.update29 ! (7_0) if ( ax > 0x4c700518 ) + faddd %f4,K1,%f4 ! (0_0) dtmp0 += K1; +.cont29: + fmuld %f20,%f26,%f20 ! (1_0) xx *= y0; + srl %o0,18,%o7 ! (0_0) ax >>= 18; + std %f30,[%fp+tmp1] ! (5_0) i = ((unsigned long long*)&div)[0]; + fstod %f0,%f8 ! (7_0) x = (double)ftmp0; + + fmuld %f28,%f40,%f28 ! (2_0) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (0_0) ux &= -8; + ldx [%fp+tmp0],%o0 ! (4_0) i = ((unsigned long long*)&div)[0]; + fand %f24,DC2,%f26 ! (6_0) y = vis_fand(y,dconst2); + + faddd %f48,%f44,%f12 ! (7_1) res += dtmp0; + subcc counter,8,counter + ldd [%l0+%o1],%f48 ! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux); + bn,pn %icc,.exit + + fmuld %f4,%f50,%f4 ! (0_0) dtmp0 *= x2; + mov %l6,%g5 + ldd [%o7+%l7],%f0 ! (0_0) res = *(double*)((char*)parr1 + ax); + fand %f32,DC3,%f24 ! (4_0) y0 = vis_fand(div,dconst3); + + fmuld %f20,%f20,%f50 ! (1_0) x2 = xx * xx; + srlx %o0,43,%l6 ! (4_0) i >>= 43; + mov %l5,%o7 + fsubd DTWO,%f42,%f44 ! (3_0) dtmp0 = dtwo - dtmp0; + + add %g1,stridey,%o0 ! py += stridey; + and %l6,508,%l6 ! (4_0) i &= 508; + bn,pn %icc,.exit + fsubd DTWO,%f28,%f46 ! (2_0) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (0_0) res *= dtmp0; + ld [%i4+%l6],%f0 ! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + add %i3,stridex,%l5 ! px += stridex; + fdtos %f12,%f12 ! (7_1) ftmp0 = (float)res; + + lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0]; + fmuld %f10,%f26,%f28 ! (6_0) div = x * y; + bpos,pt %icc,.main_loop + faddd %f4,K0,%f42 ! (0_0) dtmp0 += K0; + + srl %o7,28,%l5 ! (1_0) ux >>= 28; + st %f12,[%g1] ! (7_1) py[0] = ftmp0; + +.tail: + addcc counter,7,counter + bneg,pn %icc,.begin + or %g0,%o0,%o1 + + fsubd %f10,%f26,%f10 ! (6_1) xx = x - y; + and %o7,MASK_0x7fffffff,%g1 ! (1_1) ax = ux & 0x7fffffff; + fmuld K2,%f50,%f4 ! (1_1) dtmp0 = K2 * x2; + + fmuld %f40,%f46,%f26 ! (2_1) y0 *= dtmp1; + add %g1,MASK_0x100000,%g1 ! (1_1) ax += 0x00100000; + and %l5,-8,%l5 ! (1_1) ux &= -8; + fpsub32 %f0,%f24,%f40 ! (4_1) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f38,%f44,%f38 ! (3_1) y0 *= dtmp0; + + fmuld %f42,%f22,%f44 ! (0_1) dtmp0 *= xx; + faddd DONE,%f28,%f28 ! (6_1) div += done; + + fmuld %f32,%f40,%f42 ! (4_1) dtmp0 = div0 * y0; + faddd %f4,K1,%f4 ! (1_1) dtmp0 += K1; + + fmuld %f18,%f26,%f18 ! (2_1) xx *= y0; + srl %g1,18,%o7 ! (1_1) ax >>= 18; + std %f28,[%fp+tmp0] ! (6_1) i = ((unsigned long long*)&div)[0]; + + fmuld %f34,%f38,%f34 ! (3_1) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (1_1) ax &= -8; + ldx [%fp+tmp1],%g1 ! (5_1) i = ((unsigned long long*)&div)[0]; + + faddd %f48,%f44,%f12 ! (0_1) res += dtmp0; + add %o7,%l7,%o7 ! (1_1) (char*)parr1 + ax; + ldd [%l0+%l5],%f48 ! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux); + + fmuld %f4,%f50,%f4 ! (1_1) dtmp0 *= x2; + fand %f30,DC3,%f24 ! (5_1) y0 = vis_fand(div,dconst3); + ldd [%o7],%f0 ! (1_1) res = *(double*)((char*)parr1 + ax); + + fmuld %f18,%f18,%f50 ! (2_1) x2 = xx * xx; + fsubd DTWO,%f42,%f44 ! (4_1) dtmp0 = dtwo - dtmp0; + srlx %g1,43,%g1 ! (5_1) i >>= 43; + + and %g1,508,%l6 ! (5_1) i &= 508; + mov %l4,%o7 + fsubd DTWO,%f34,%f46 ! (3_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (1_1) res *= dtmp0; + add %o0,stridey,%g1 ! py += stridey; + ld [%i4+%l6],%f0 ! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (0_1) ftmp0 = (float)res; + + srl %o7,28,%l4 ! (2_1) ux >>= 28; + st %f12,[%o0] ! (0_1) py[0] = ftmp0; + faddd %f4,K0,%f42 ! (1_1) dtmp0 += K0; + + subcc counter,1,counter + bneg,pn %icc,.begin + or %g0,%g1,%o1 + + fmuld K2,%f50,%f4 ! (2_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o0 ! (2_1) ax = ux & 0x7fffffff; + + fmuld %f38,%f46,%f26 ! (3_1) y0 *= dtmp1; + add %o0,MASK_0x100000,%o0 ! (2_1) ax += 0x00100000; + and %l4,-8,%l4 ! (2_1) ux &= -8; + fpsub32 %f0,%f24,%f38 ! (5_1) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f40,%f44,%f40 ! (4_1) y0 *= dtmp0; + + fmuld %f42,%f20,%f44 ! (1_1) dtmp0 *= xx; + + fmuld %f30,%f38,%f42 ! (5_1) dtmp0 = div0 * y0; + faddd %f4,K1,%f4 ! (2_1) dtmp0 += K1; + + fmuld %f16,%f26,%f16 ! (3_1) xx *= y0; + srl %o0,18,%o7 ! (2_1) ax >>= 18; + + fmuld %f32,%f40,%f32 ! (4_1) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (2_1) ax &= -8; + ldx [%fp+tmp0],%o0 ! (6_1) i = ((unsigned long long*)&div)[0]; + + faddd %f48,%f44,%f12 ! (1_1) res += dtmp0; + add %o7,%l7,%o7 ! (2_1) (char*)parr1 + ax; + ldd [%l0+%l4],%f48 ! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux); + + fmuld %f4,%f50,%f4 ! (2_1) dtmp0 *= x2; + fand %f28,DC3,%f24 ! (6_1) y0 = vis_fand(div,dconst3); + ldd [%o7],%f0 ! (2_1) res = *(double*)((char*)parr1 + ax); + + fmuld %f16,%f16,%f50 ! (3_1) x2 = xx * xx; + fsubd DTWO,%f42,%f44 ! (5_1) dtmp0 = dtwo - dtmp0; + srlx %o0,43,%o0 ! (6_1) i >>= 43; + + and %o0,508,%l6 ! (6_1) i &= 508; + mov %l3,%o7 + fsubd DTWO,%f32,%f46 ! (4_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (2_1) res *= dtmp0; + add %g1,stridey,%o0 ! py += stridey; + ld [%i4+%l6],%f0 ! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i); + fdtos %f12,%f12 ! (1_1) ftmp0 = (float)res; + + srl %o7,28,%l3 ! (3_1) ux >>= 28; + st %f12,[%g1] ! (1_1) py[0] = ftmp0; + faddd %f4,K0,%f42 ! (2_1) dtmp0 += K0; + + subcc counter,1,counter + bneg,pn %icc,.begin + or %g0,%o0,%o1 + + fmuld K2,%f50,%f4 ! (3_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%g1 ! (3_1) ax = ux & 0x7fffffff; + + fmuld %f40,%f46,%f26 ! (4_1) y0 *= dtmp1; + add %g1,MASK_0x100000,%g1 ! (3_1) ax += 0x00100000; + and %l3,-8,%l3 ! (3_1) ux &= -8; + fpsub32 %f0,%f24,%f40 ! (6_1) y0 = vis_fpsub32(dtmp0, y0); + + fmuld %f38,%f44,%f38 ! (5_1) y0 *= dtmp0; + + fmuld %f42,%f18,%f44 ! (2_1) dtmp0 *= xx; + + fmuld %f28,%f40,%f42 ! (6_1) dtmp0 = div0 * y0; + faddd %f4,K1,%f4 ! (3_1) dtmp0 += K1; + + fmuld %f14,%f26,%f14 ! (4_1) xx *= y0; + srl %g1,18,%o7 ! (3_1) ax >>= 18; + + fmuld %f30,%f38,%f30 ! (5_1) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (3_1) ax &= -8; + + faddd %f48,%f44,%f12 ! (2_1) res += dtmp0; + add %o7,%l7,%o7 ! (3_1) (char*)parr1 + ax; + ldd [%l0+%l3],%f48 ! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux); + + fmuld %f4,%f50,%f4 ! (3_1) dtmp0 *= x2; + ldd [%o7],%f0 ! (3_1) res = *(double*)((char*)parr1 + ax) + + fmuld %f14,%f14,%f50 ! (4_1) x2 = xx * xx; + fsubd DTWO,%f42,%f44 ! (6_1) dtmp0 = dtwo - dtmp0; + + mov %i0,%o7 + fsubd DTWO,%f30,%f46 ! (5_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (3_1) res *= dtmp0; + add %o0,stridey,%g1 ! py += stridey; + fdtos %f12,%f12 ! (2_1) ftmp0 = (float)res; + + srl %o7,28,%i0 ! (4_1) ux >>= 28; + st %f12,[%o0] ! (2_1) py[0] = ftmp0; + faddd %f4,K0,%f42 ! (3_1) dtmp0 += K0; + + subcc counter,1,counter + bneg,pn %icc,.begin + or %g0,%g1,%o1 + + fmuld K2,%f50,%f4 ! (4_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o0 ! (4_1) ax = ux & 0x7fffffff; + + fmuld %f38,%f46,%f26 ! (5_1) y0 *= dtmp1; + add %o0,MASK_0x100000,%o0 ! (4_1) ax += 0x00100000; + and %i0,-8,%i0 ! (4_1) ux &= -8; + + fmuld %f40,%f44,%f40 ! (6_1) y0 *= dtmp0; + + fmuld %f42,%f16,%f44 ! (3_1) dtmp0 *= xx; + + faddd %f4,K1,%f4 ! (4_1) dtmp0 += K1; + + fmuld %f36,%f26,%f36 ! (5_1) xx *= y0; + srl %o0,18,%o7 ! (4_1) ax >>= 18; + + fmuld %f28,%f40,%f28 ! (6_1) dtmp1 = div0 * y0; + and %o7,-8,%o7 ! (4_1) ax &= -8; + + faddd %f48,%f44,%f12 ! (3_1) res += dtmp0; + add %o7,%l7,%o7 ! (4_1) (char*)parr1 + ax; + ldd [%l0+%i0],%f48 ! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux); + + fmuld %f4,%f50,%f4 ! (4_1) dtmp0 *= x2; + ldd [%o7],%f0 ! (4_1) res = *(double*)((char*)parr1 + ax); + + fmuld %f36,%f36,%f50 ! (5_1) x2 = xx * xx; + + mov %i2,%o7 + fsubd DTWO,%f28,%f46 ! (6_1) dtmp1 = dtwo - dtmp1; + + fmuld %f0,%f48,%f48 ! (4_1) res *= dtmp0; + add %g1,stridey,%o0 ! py += stridey; + fdtos %f12,%f12 ! (3_1) ftmp0 = (float)res; + + srl %o7,28,%i2 ! (5_1) ux >>= 28; + st %f12,[%g1] ! (3_1) py[0] = ftmp0; + faddd %f4,K0,%f42 ! (4_1) dtmp0 += K0; + + subcc counter,1,counter + bneg,pn %icc,.begin + or %g0,%o0,%o1 + + fmuld K2,%f50,%f4 ! (5_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%g1 ! (5_1) ax = ux & 0x7fffffff; + + fmuld %f40,%f46,%f26 ! (6_1) y0 *= dtmp1; + add %g1,MASK_0x100000,%g1 ! (5_1) ax += 0x00100000; + and %i2,-8,%i2 ! (5_1) ux &= -8; + + fmuld %f42,%f14,%f44 ! (4_1) dtmp0 *= xx; + + faddd %f4,K1,%f4 ! (5_1) dtmp0 += K1; + + fmuld %f10,%f26,%f10 ! (6_1) xx *= y0; + srl %g1,18,%o7 ! (5_1) ax >>= 18; + + and %o7,-8,%o7 ! (5_1) ax &= -8; + + faddd %f48,%f44,%f12 ! (4_1) res += dtmp0; + add %o7,%l7,%o7 ! (5_1) (char*)parr1 + ax; + ldd [%l0+%i2],%f48 ! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux); + + fmuld %f4,%f50,%f4 ! (5_1) dtmp0 *= x2; + ldd [%o7],%f0 ! (5_1) res = *(double*)((char*)parr1 + ax); + + fmuld %f10,%f10,%f50 ! (6_1) x2 = xx * xx; + + mov %l2,%o7 + + fmuld %f0,%f48,%f48 ! (5_1) res *= dtmp0; + add %o0,stridey,%g1 ! py += stridey; + fdtos %f12,%f12 ! (4_1) ftmp0 = (float)res; + + srl %o7,28,%l2 ! (6_1) ux >>= 28; + st %f12,[%o0] ! (4_1) py[0] = ftmp0; + faddd %f4,K0,%f42 ! (5_1) dtmp0 += K0; + + subcc counter,1,counter + bneg,pn %icc,.begin + or %g0,%g1,%o1 + + fmuld K2,%f50,%f4 ! (6_1) dtmp0 = K2 * x2; + and %o7,MASK_0x7fffffff,%o0 ! (6_1) ax = ux & 0x7fffffff; + + add %o0,MASK_0x100000,%o0 ! (6_1) ax += 0x00100000; + and %l2,-8,%l2 ! (6_1) ux &= -8; + + fmuld %f42,%f36,%f44 ! (5_1) dtmp0 *= xx; + + faddd %f4,K1,%f4 ! (6_1) dtmp0 += K1; + + srl %o0,18,%o7 ! (6_1) ax >>= 18; + + and %o7,-8,%o7 ! (6_1) ax &= -8; + + faddd %f48,%f44,%f12 ! (5_1) res += dtmp0; + add %o7,%l7,%o7 ! (6_1) (char*)parr1 + ax; + ldd [%l0+%l2],%f48 ! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux); + + fmuld %f4,%f50,%f4 ! (6_1) dtmp0 *= x2; + ldd [%o7],%f0 ! (6_1) res = *(double*)((char*)parr1 + ax); + + fmuld %f0,%f48,%f48 ! (6_1) res *= dtmp0; + add %g1,stridey,%o0 ! py += stridey; + fdtos %f12,%f12 ! (5_1) ftmp0 = (float)res; + + st %f12,[%g1] ! (5_1) py[0] = ftmp0; + faddd %f4,K0,%f42 ! (6_1) dtmp0 += K0; + + subcc counter,1,counter + bneg,pn %icc,.begin + or %g0,%o0,%o1 + + fmuld %f42,%f10,%f44 ! (6_1) dtmp0 *= xx; + + faddd %f48,%f44,%f12 ! (6_1) res += dtmp0; + + add %o0,stridey,%g1 ! py += stridey; + fdtos %f12,%f12 ! (6_1) ftmp0 = (float)res; + + st %f12,[%o0] ! (6_1) py[0] = ftmp0; + + ba .begin + or %g0,%g1,%o1 ! py += stridey; + +.exit: + ret + restore %g0,%g0,%g0 + + .align 16 +.spec0: + add %i3,stridex,%i3 ! px += stridex; + sub counter,1,counter + st %l6,[%o1] ! *(int*)py = ux; + + ba .begin1 + add %o1,stridey,%o1 ! py += stridey; + + .align 16 +.spec1: + sethi %hi(0x7f800000),%l3 + sethi %hi(0x3fc90c00),%l4 ! pi_2 + + sethi %hi(0x80000000),%o0 + add %l4,0x3db,%l4 ! pi_2 + + cmp %l5,%l3 ! if ( ax > 0x7f800000 ) + bg,a,pn %icc,1f + fabss %f0,%f0 ! fpx = fabsf(*px); + + and %l6,%o0,%l6 ! sign = ux & 0x80000000; + + or %l6,%l4,%l6 ! sign |= pi_2; + + add %i3,stridex,%i3 ! px += stridex; + sub counter,1,counter + st %l6,[%o1] ! *(int*)py = sign; + + ba .begin1 + add %o1,stridey,%o1 ! py += stridey; + +1: + fmuls %f0,%f0,%f0 ! fpx *= fpx; + + add %i3,stridex,%i3 ! px += stridex + sub counter,1,counter + st %f0,[%o1] ! *py = fpx; + + ba .begin1 + add %o1,stridey,%o1 ! py += stridey; + + .align 16 +.update0: + cmp counter,1 + fzeros %f0 + ble,a .cont0 + sethi %hi(0x3fffffff),%l6 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %l5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont0 + or %g0,1,counter + + .align 16 +.update1: + cmp counter,1 + fzeros %f0 + ble,a .cont1 + sethi %hi(0x3fffffff),%l6 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %l5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont1 + or %g0,1,counter + + .align 16 +.update2: + cmp counter,2 + fzeros %f0 + ble,a .cont2 + sethi %hi(0x3fffffff),%l6 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %l4,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont2 + or %g0,2,counter + + .align 16 +.update3: + cmp counter,2 + fzeros %f0 + ble,a .cont3 + sethi %hi(0x3fffffff),%l6 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %l4,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont3 + or %g0,2,counter + + .align 16 +.update4: + cmp counter,3 + fzeros %f0 + ble,a .cont4 + sethi %hi(0x3fffffff),%l6 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %l3,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont4 + or %g0,3,counter + + .align 16 +.update5: + cmp counter,3 + fzeros %f0 + ble,a .cont5 + sethi %hi(0x3fffffff),%l6 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %l3,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont5 + or %g0,3,counter + + .align 16 +.update6: + cmp counter,4 + fzeros %f0 + ble,a .cont6 + sethi %hi(0x3fffffff),%l6 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i0,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont6 + or %g0,4,counter + + .align 16 +.update7: + cmp counter,4 + fzeros %f0 + ble,a .cont7 + sethi %hi(0x3fffffff),%l6 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i0,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont7 + or %g0,4,counter + + .align 16 +.update8: + cmp counter,5 + fzeros %f0 + ble,a .cont8 + sethi %hi(0x3fffffff),%l6 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont8 + or %g0,5,counter + + .align 16 +.update9: + cmp counter,5 + fzeros %f0 + ble,a .cont9 + sethi %hi(0x3fffffff),%l6 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont9 + or %g0,5,counter + + .align 16 +.update10: + cmp counter,6 + fzeros %f0 + ble,a .cont10 + sethi %hi(0x3fffffff),%l6 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %l2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont10 + or %g0,6,counter + + .align 16 +.update11: + cmp counter,6 + fzeros %f0 + ble,a .cont11 + sethi %hi(0x3fffffff),%l6 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %l2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont11 + or %g0,6,counter + + .align 16 +.update12: + cmp counter,7 + fzeros %f0 + ble,a .cont12 + sethi %hi(0x3fffffff),%l6 + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont12 + or %g0,7,counter + + .align 16 +.update13: + cmp counter,7 + fzeros %f0 + ble,a .cont13 + sethi %hi(0x3fffffff),%l6 + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont13 + or %g0,7,counter + + .align 16 +.update14: + cmp counter,0 + fzeros %f0 + ble,a .cont14 + sethi %hi(0x3fffffff),%l6 + + sub counter,0,counter + st counter,[%fp+tmp_counter] + + stx %i3,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont14 + or %g0,0,counter + + .align 16 +.update15: + cmp counter,0 + fzeros %f0 + ble,a .cont15 + sethi %hi(0x3fffffff),%l6 + + sub counter,0,counter + st counter,[%fp+tmp_counter] + + stx %i3,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont15 + or %g0,0,counter + + .align 16 +.update16: + cmp counter,1 + fzeros %f0 + ble,a .cont16 + sethi %hi(0x3fffffff),%l6 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %l5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont16 + or %g0,1,counter + + .align 16 +.update17: + cmp counter,1 + fzeros %f0 + ble,a .cont17 + sethi %hi(0x3fffffff),%l6 + + sub counter,1,counter + st counter,[%fp+tmp_counter] + + stx %l5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont17 + or %g0,1,counter + + .align 16 +.update18: + cmp counter,2 + fzeros %f0 + ble,a .cont18 + sethi %hi(0x3fffffff),%l6 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %l4,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont18 + or %g0,2,counter + + .align 16 +.update19: + cmp counter,2 + fzeros %f0 + ble,a .cont19 + sethi %hi(0x3fffffff),%l6 + + sub counter,2,counter + st counter,[%fp+tmp_counter] + + stx %l4,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont19 + or %g0,2,counter + + .align 16 +.update20: + cmp counter,3 + fzeros %f0 + ble,a .cont20 + sethi %hi(0x3fffffff),%l6 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %l3,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont20 + or %g0,3,counter + + .align 16 +.update21: + cmp counter,3 + fzeros %f0 + ble,a .cont21 + sethi %hi(0x3fffffff),%l6 + + sub counter,3,counter + st counter,[%fp+tmp_counter] + + stx %l3,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont21 + or %g0,3,counter + + .align 16 +.update22: + cmp counter,4 + fzeros %f0 + ble,a .cont22 + sethi %hi(0x3fffffff),%l6 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i0,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont22 + or %g0,4,counter + + .align 16 +.update23: + cmp counter,4 + fzeros %f0 + ble,a .cont23 + sethi %hi(0x3fffffff),%l6 + + sub counter,4,counter + st counter,[%fp+tmp_counter] + + stx %i0,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont23 + or %g0,4,counter + + .align 16 +.update24: + cmp counter,5 + fzeros %f0 + ble,a .cont24 + sethi %hi(0x3fffffff),%l6 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont24 + or %g0,5,counter + + .align 16 +.update25: + cmp counter,5 + fzeros %f0 + ble,a .cont25 + sethi %hi(0x3fffffff),%l6 + + sub counter,5,counter + st counter,[%fp+tmp_counter] + + stx %i2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont25 + or %g0,5,counter + + .align 16 +.update26: + cmp counter,6 + fzeros %f0 + ble,a .cont26 + sethi %hi(0x3fffffff),%l6 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %l2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont26 + or %g0,6,counter + + .align 16 +.update27: + cmp counter,6 + fzeros %f0 + ble,a .cont27 + sethi %hi(0x3fffffff),%l6 + + sub counter,6,counter + st counter,[%fp+tmp_counter] + + stx %l2,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont27 + or %g0,6,counter + + .align 16 +.update28: + cmp counter,7 + fzeros %f0 + ble,a .cont28 + sethi %hi(0x3fffffff),%l6 + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont28 + or %g0,7,counter + + .align 16 +.update29: + cmp counter,7 + fzeros %f0 + ble,a .cont29 + sethi %hi(0x3fffffff),%l6 + + sub counter,7,counter + st counter,[%fp+tmp_counter] + + stx %g5,[%fp+tmp_px] + sethi %hi(0x3fffffff),%l6 + ba .cont29 + or %g0,7,counter + + SET_SIZE(__vatanf) + |