diff options
Diffstat (limited to 'usr/src/lib/libmvec/common/__vatan2.c')
-rw-r--r-- | usr/src/lib/libmvec/common/__vatan2.c | 453 |
1 files changed, 453 insertions, 0 deletions
diff --git a/usr/src/lib/libmvec/common/__vatan2.c b/usr/src/lib/libmvec/common/__vatan2.c new file mode 100644 index 0000000000..49500b8f91 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vatan2.c @@ -0,0 +1,453 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/isa_defs.h> +#include "libm_inlines.h" + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +extern const double __vlibm_TBL_atan2[]; + +static const double +zero = 0.0, +twom3 = 0.125, +one = 1.0, +two110 = 1.2980742146337069071e+33, +pio4 = 7.8539816339744827900e-01, +pio2 = 1.5707963267948965580e+00, +pio2_lo = 6.1232339957367658860e-17, +pi = 3.1415926535897931160e+00, +pi_lo = 1.2246467991473531772e-16, +p1 = -3.33333333333327571893331786354179101074860633009e-0001, +p2 = 1.99999999942671624230086497610394721817438631379e-0001, +p3 = -1.42856965565428636896183013324727205980484158356e-0001, +p4 = 1.10894981496317081405107718475040168084164825641e-0001; + +/* Don't __ the following; acomp will handle it */ +extern double fabs(double); + +void +__vatan2(int n, double * restrict y, int stridey, double * restrict x, + int stridex, double * restrict z, int stridez) +{ + double x0, x1, x2, y0, y1, y2, *pz0, *pz1, *pz2; + double ah0, ah1, ah2, al0, al1, al2, t0, t1, t2; + double z0, z1, z2, sign0, sign1, sign2, xh; + int i, k, hx, hy, sx, sy; + + do + { +loop0: + hy = HI(y); + sy = hy & 0x80000000; + hy &= ~0x80000000; + sign0 = (sy)? -one : one; + + hx = HI(x); + sx = hx & 0x80000000; + hx &= ~0x80000000; + + if (hy > hx || (hy == hx && LO(y) > LO(x))) + { + i = hx; + hx = hy; + hy = i; + x0 = fabs(*y); + y0 = fabs(*x); + if (sx) + { + ah0 = pio2; + al0 = pio2_lo; + } + else + { + ah0 = -pio2; + al0 = -pio2_lo; + sign0 = -sign0; + } + } + else + { + x0 = fabs(*x); + y0 = fabs(*y); + if (sx) + { + ah0 = -pi; + al0 = -pi_lo; + sign0 = -sign0; + } + else + ah0 = al0 = zero; + } + + if (hx >= 0x7fe00000 || hx - hy >= 0x03600000) + { + if (hx >= 0x7ff00000) + { + if ((hx ^ 0x7ff00000) | LO(&x0)) /* nan */ + ah0 = x0 + y0; + else if (hy >= 0x7ff00000) + ah0 += pio4; + *z = sign0 * ah0; + x += stridex; + y += stridey; + z += stridez; + i = 0; + if (--n <= 0) + break; + goto loop0; + } + if (hx - hy >= 0x03600000) + { + if ((int) ah0 == 0) + ah0 = y0 / x0; + *z = sign0 * ah0; + x += stridex; + y += stridey; + z += stridez; + i = 0; + if (--n <= 0) + break; + goto loop0; + } + y0 *= twom3; + x0 *= twom3; + hy -= 0x00300000; + hx -= 0x00300000; + } + else if (hy < 0x00100000) + { + if ((hy | LO(&y0)) == 0) + { + *z = sign0 * ah0; + x += stridex; + y += stridey; + z += stridez; + i = 0; + if (--n <= 0) + break; + goto loop0; + } + y0 *= two110; + x0 *= two110; + hy = HI(&y0); + hx = HI(&x0); + } + + k = (((hx - hy) + 0x00004000) >> 13) & ~0x3; + if (k > 644) + k = 644; + ah0 += __vlibm_TBL_atan2[k]; + al0 += __vlibm_TBL_atan2[k+1]; + t0 = __vlibm_TBL_atan2[k+2]; + + xh = x0; + LO(&xh) = 0; + z0 = ((y0 - t0 * xh) - t0 * (x0 - xh)) / (x0 + y0 * t0); + pz0 = z; + x += stridex; + y += stridey; + z += stridez; + i = 1; + if (--n <= 0) + break; + +loop1: + hy = HI(y); + sy = hy & 0x80000000; + hy &= ~0x80000000; + sign1 = (sy)? -one : one; + + hx = HI(x); + sx = hx & 0x80000000; + hx &= ~0x80000000; + + if (hy > hx || (hy == hx && LO(y) > LO(x))) + { + i = hx; + hx = hy; + hy = i; + x1 = fabs(*y); + y1 = fabs(*x); + if (sx) + { + ah1 = pio2; + al1 = pio2_lo; + } + else + { + ah1 = -pio2; + al1 = -pio2_lo; + sign1 = -sign1; + } + } + else + { + x1 = fabs(*x); + y1 = fabs(*y); + if (sx) + { + ah1 = -pi; + al1 = -pi_lo; + sign1 = -sign1; + } + else + ah1 = al1 = zero; + } + + if (hx >= 0x7fe00000 || hx - hy >= 0x03600000) + { + if (hx >= 0x7ff00000) + { + if ((hx ^ 0x7ff00000) | LO(&x1)) /* nan */ + ah1 = x1 + y1; + else if (hy >= 0x7ff00000) + ah1 += pio4; + *z = sign1 * ah1; + x += stridex; + y += stridey; + z += stridez; + i = 1; + if (--n <= 0) + break; + goto loop1; + } + if (hx - hy >= 0x03600000) + { + if ((int) ah1 == 0) + ah1 = y1 / x1; + *z = sign1 * ah1; + x += stridex; + y += stridey; + z += stridez; + i = 1; + if (--n <= 0) + break; + goto loop1; + } + y1 *= twom3; + x1 *= twom3; + hy -= 0x00300000; + hx -= 0x00300000; + } + else if (hy < 0x00100000) + { + if ((hy | LO(&y1)) == 0) + { + *z = sign1 * ah1; + x += stridex; + y += stridey; + z += stridez; + i = 1; + if (--n <= 0) + break; + goto loop1; + } + y1 *= two110; + x1 *= two110; + hy = HI(&y1); + hx = HI(&x1); + } + + k = (((hx - hy) + 0x00004000) >> 13) & ~0x3; + if (k > 644) + k = 644; + ah1 += __vlibm_TBL_atan2[k]; + al1 += __vlibm_TBL_atan2[k+1]; + t1 = __vlibm_TBL_atan2[k+2]; + + xh = x1; + LO(&xh) = 0; + z1 = ((y1 - t1 * xh) - t1 * (x1 - xh)) / (x1 + y1 * t1); + pz1 = z; + x += stridex; + y += stridey; + z += stridez; + i = 2; + if (--n <= 0) + break; + +loop2: + hy = HI(y); + sy = hy & 0x80000000; + hy &= ~0x80000000; + sign2 = (sy)? -one : one; + + hx = HI(x); + sx = hx & 0x80000000; + hx &= ~0x80000000; + + if (hy > hx || (hy == hx && LO(y) > LO(x))) + { + i = hx; + hx = hy; + hy = i; + x2 = fabs(*y); + y2 = fabs(*x); + if (sx) + { + ah2 = pio2; + al2 = pio2_lo; + } + else + { + ah2 = -pio2; + al2 = -pio2_lo; + sign2 = -sign2; + } + } + else + { + x2 = fabs(*x); + y2 = fabs(*y); + if (sx) + { + ah2 = -pi; + al2 = -pi_lo; + sign2 = -sign2; + } + else + ah2 = al2 = zero; + } + + if (hx >= 0x7fe00000 || hx - hy >= 0x03600000) + { + if (hx >= 0x7ff00000) + { + if ((hx ^ 0x7ff00000) | LO(&x2)) /* nan */ + ah2 = x2 + y2; + else if (hy >= 0x7ff00000) + ah2 += pio4; + *z = sign2 * ah2; + x += stridex; + y += stridey; + z += stridez; + i = 2; + if (--n <= 0) + break; + goto loop2; + } + if (hx - hy >= 0x03600000) + { + if ((int) ah2 == 0) + ah2 = y2 / x2; + *z = sign2 * ah2; + x += stridex; + y += stridey; + z += stridez; + i = 2; + if (--n <= 0) + break; + goto loop2; + } + y2 *= twom3; + x2 *= twom3; + hy -= 0x00300000; + hx -= 0x00300000; + } + else if (hy < 0x00100000) + { + if ((hy | LO(&y2)) == 0) + { + *z = sign2 * ah2; + x += stridex; + y += stridey; + z += stridez; + i = 2; + if (--n <= 0) + break; + goto loop2; + } + y2 *= two110; + x2 *= two110; + hy = HI(&y2); + hx = HI(&x2); + } + + k = (((hx - hy) + 0x00004000) >> 13) & ~0x3; + if (k > 644) + k = 644; + ah2 += __vlibm_TBL_atan2[k]; + al2 += __vlibm_TBL_atan2[k+1]; + t2 = __vlibm_TBL_atan2[k+2]; + + xh = x2; + LO(&xh) = 0; + z2 = ((y2 - t2 * xh) - t2 * (x2 - xh)) / (x2 + y2 * t2); + pz2 = z; + + x0 = z0 * z0; + x1 = z1 * z1; + x2 = z2 * z2; + + t0 = ah0 + (z0 + (al0 + (z0 * x0) * (p1 + x0 * + (p2 + x0 * (p3 + x0 * p4))))); + t1 = ah1 + (z1 + (al1 + (z1 * x1) * (p1 + x1 * + (p2 + x1 * (p3 + x1 * p4))))); + t2 = ah2 + (z2 + (al2 + (z2 * x2) * (p1 + x2 * + (p2 + x2 * (p3 + x2 * p4))))); + + *pz0 = sign0 * t0; + *pz1 = sign1 * t1; + *pz2 = sign2 * t2; + + x += stridex; + y += stridey; + z += stridez; + i = 0; + } while (--n > 0); + + if (i > 0) + { + if (i > 1) + { + x1 = z1 * z1; + t1 = ah1 + (z1 + (al1 + (z1 * x1) * (p1 + x1 * + (p2 + x1 * (p3 + x1 * p4))))); + *pz1 = sign1 * t1; + } + + x0 = z0 * z0; + t0 = ah0 + (z0 + (al0 + (z0 * x0) * (p1 + x0 * + (p2 + x0 * (p3 + x0 * p4))))); + *pz0 = sign0 * t0; + } +} |