summaryrefslogtreecommitdiff
path: root/usr/src/lib/libmvec/common/__vatan2.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/lib/libmvec/common/__vatan2.c')
-rw-r--r--usr/src/lib/libmvec/common/__vatan2.c453
1 files changed, 453 insertions, 0 deletions
diff --git a/usr/src/lib/libmvec/common/__vatan2.c b/usr/src/lib/libmvec/common/__vatan2.c
new file mode 100644
index 0000000000..49500b8f91
--- /dev/null
+++ b/usr/src/lib/libmvec/common/__vatan2.c
@@ -0,0 +1,453 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/isa_defs.h>
+#include "libm_inlines.h"
+
+#ifdef _LITTLE_ENDIAN
+#define HI(x) *(1+(int*)x)
+#define LO(x) *(unsigned*)x
+#else
+#define HI(x) *(int*)x
+#define LO(x) *(1+(unsigned*)x)
+#endif
+
+#ifdef __RESTRICT
+#define restrict _Restrict
+#else
+#define restrict
+#endif
+
+extern const double __vlibm_TBL_atan2[];
+
+static const double
+zero = 0.0,
+twom3 = 0.125,
+one = 1.0,
+two110 = 1.2980742146337069071e+33,
+pio4 = 7.8539816339744827900e-01,
+pio2 = 1.5707963267948965580e+00,
+pio2_lo = 6.1232339957367658860e-17,
+pi = 3.1415926535897931160e+00,
+pi_lo = 1.2246467991473531772e-16,
+p1 = -3.33333333333327571893331786354179101074860633009e-0001,
+p2 = 1.99999999942671624230086497610394721817438631379e-0001,
+p3 = -1.42856965565428636896183013324727205980484158356e-0001,
+p4 = 1.10894981496317081405107718475040168084164825641e-0001;
+
+/* Don't __ the following; acomp will handle it */
+extern double fabs(double);
+
+void
+__vatan2(int n, double * restrict y, int stridey, double * restrict x,
+ int stridex, double * restrict z, int stridez)
+{
+ double x0, x1, x2, y0, y1, y2, *pz0, *pz1, *pz2;
+ double ah0, ah1, ah2, al0, al1, al2, t0, t1, t2;
+ double z0, z1, z2, sign0, sign1, sign2, xh;
+ int i, k, hx, hy, sx, sy;
+
+ do
+ {
+loop0:
+ hy = HI(y);
+ sy = hy & 0x80000000;
+ hy &= ~0x80000000;
+ sign0 = (sy)? -one : one;
+
+ hx = HI(x);
+ sx = hx & 0x80000000;
+ hx &= ~0x80000000;
+
+ if (hy > hx || (hy == hx && LO(y) > LO(x)))
+ {
+ i = hx;
+ hx = hy;
+ hy = i;
+ x0 = fabs(*y);
+ y0 = fabs(*x);
+ if (sx)
+ {
+ ah0 = pio2;
+ al0 = pio2_lo;
+ }
+ else
+ {
+ ah0 = -pio2;
+ al0 = -pio2_lo;
+ sign0 = -sign0;
+ }
+ }
+ else
+ {
+ x0 = fabs(*x);
+ y0 = fabs(*y);
+ if (sx)
+ {
+ ah0 = -pi;
+ al0 = -pi_lo;
+ sign0 = -sign0;
+ }
+ else
+ ah0 = al0 = zero;
+ }
+
+ if (hx >= 0x7fe00000 || hx - hy >= 0x03600000)
+ {
+ if (hx >= 0x7ff00000)
+ {
+ if ((hx ^ 0x7ff00000) | LO(&x0)) /* nan */
+ ah0 = x0 + y0;
+ else if (hy >= 0x7ff00000)
+ ah0 += pio4;
+ *z = sign0 * ah0;
+ x += stridex;
+ y += stridey;
+ z += stridez;
+ i = 0;
+ if (--n <= 0)
+ break;
+ goto loop0;
+ }
+ if (hx - hy >= 0x03600000)
+ {
+ if ((int) ah0 == 0)
+ ah0 = y0 / x0;
+ *z = sign0 * ah0;
+ x += stridex;
+ y += stridey;
+ z += stridez;
+ i = 0;
+ if (--n <= 0)
+ break;
+ goto loop0;
+ }
+ y0 *= twom3;
+ x0 *= twom3;
+ hy -= 0x00300000;
+ hx -= 0x00300000;
+ }
+ else if (hy < 0x00100000)
+ {
+ if ((hy | LO(&y0)) == 0)
+ {
+ *z = sign0 * ah0;
+ x += stridex;
+ y += stridey;
+ z += stridez;
+ i = 0;
+ if (--n <= 0)
+ break;
+ goto loop0;
+ }
+ y0 *= two110;
+ x0 *= two110;
+ hy = HI(&y0);
+ hx = HI(&x0);
+ }
+
+ k = (((hx - hy) + 0x00004000) >> 13) & ~0x3;
+ if (k > 644)
+ k = 644;
+ ah0 += __vlibm_TBL_atan2[k];
+ al0 += __vlibm_TBL_atan2[k+1];
+ t0 = __vlibm_TBL_atan2[k+2];
+
+ xh = x0;
+ LO(&xh) = 0;
+ z0 = ((y0 - t0 * xh) - t0 * (x0 - xh)) / (x0 + y0 * t0);
+ pz0 = z;
+ x += stridex;
+ y += stridey;
+ z += stridez;
+ i = 1;
+ if (--n <= 0)
+ break;
+
+loop1:
+ hy = HI(y);
+ sy = hy & 0x80000000;
+ hy &= ~0x80000000;
+ sign1 = (sy)? -one : one;
+
+ hx = HI(x);
+ sx = hx & 0x80000000;
+ hx &= ~0x80000000;
+
+ if (hy > hx || (hy == hx && LO(y) > LO(x)))
+ {
+ i = hx;
+ hx = hy;
+ hy = i;
+ x1 = fabs(*y);
+ y1 = fabs(*x);
+ if (sx)
+ {
+ ah1 = pio2;
+ al1 = pio2_lo;
+ }
+ else
+ {
+ ah1 = -pio2;
+ al1 = -pio2_lo;
+ sign1 = -sign1;
+ }
+ }
+ else
+ {
+ x1 = fabs(*x);
+ y1 = fabs(*y);
+ if (sx)
+ {
+ ah1 = -pi;
+ al1 = -pi_lo;
+ sign1 = -sign1;
+ }
+ else
+ ah1 = al1 = zero;
+ }
+
+ if (hx >= 0x7fe00000 || hx - hy >= 0x03600000)
+ {
+ if (hx >= 0x7ff00000)
+ {
+ if ((hx ^ 0x7ff00000) | LO(&x1)) /* nan */
+ ah1 = x1 + y1;
+ else if (hy >= 0x7ff00000)
+ ah1 += pio4;
+ *z = sign1 * ah1;
+ x += stridex;
+ y += stridey;
+ z += stridez;
+ i = 1;
+ if (--n <= 0)
+ break;
+ goto loop1;
+ }
+ if (hx - hy >= 0x03600000)
+ {
+ if ((int) ah1 == 0)
+ ah1 = y1 / x1;
+ *z = sign1 * ah1;
+ x += stridex;
+ y += stridey;
+ z += stridez;
+ i = 1;
+ if (--n <= 0)
+ break;
+ goto loop1;
+ }
+ y1 *= twom3;
+ x1 *= twom3;
+ hy -= 0x00300000;
+ hx -= 0x00300000;
+ }
+ else if (hy < 0x00100000)
+ {
+ if ((hy | LO(&y1)) == 0)
+ {
+ *z = sign1 * ah1;
+ x += stridex;
+ y += stridey;
+ z += stridez;
+ i = 1;
+ if (--n <= 0)
+ break;
+ goto loop1;
+ }
+ y1 *= two110;
+ x1 *= two110;
+ hy = HI(&y1);
+ hx = HI(&x1);
+ }
+
+ k = (((hx - hy) + 0x00004000) >> 13) & ~0x3;
+ if (k > 644)
+ k = 644;
+ ah1 += __vlibm_TBL_atan2[k];
+ al1 += __vlibm_TBL_atan2[k+1];
+ t1 = __vlibm_TBL_atan2[k+2];
+
+ xh = x1;
+ LO(&xh) = 0;
+ z1 = ((y1 - t1 * xh) - t1 * (x1 - xh)) / (x1 + y1 * t1);
+ pz1 = z;
+ x += stridex;
+ y += stridey;
+ z += stridez;
+ i = 2;
+ if (--n <= 0)
+ break;
+
+loop2:
+ hy = HI(y);
+ sy = hy & 0x80000000;
+ hy &= ~0x80000000;
+ sign2 = (sy)? -one : one;
+
+ hx = HI(x);
+ sx = hx & 0x80000000;
+ hx &= ~0x80000000;
+
+ if (hy > hx || (hy == hx && LO(y) > LO(x)))
+ {
+ i = hx;
+ hx = hy;
+ hy = i;
+ x2 = fabs(*y);
+ y2 = fabs(*x);
+ if (sx)
+ {
+ ah2 = pio2;
+ al2 = pio2_lo;
+ }
+ else
+ {
+ ah2 = -pio2;
+ al2 = -pio2_lo;
+ sign2 = -sign2;
+ }
+ }
+ else
+ {
+ x2 = fabs(*x);
+ y2 = fabs(*y);
+ if (sx)
+ {
+ ah2 = -pi;
+ al2 = -pi_lo;
+ sign2 = -sign2;
+ }
+ else
+ ah2 = al2 = zero;
+ }
+
+ if (hx >= 0x7fe00000 || hx - hy >= 0x03600000)
+ {
+ if (hx >= 0x7ff00000)
+ {
+ if ((hx ^ 0x7ff00000) | LO(&x2)) /* nan */
+ ah2 = x2 + y2;
+ else if (hy >= 0x7ff00000)
+ ah2 += pio4;
+ *z = sign2 * ah2;
+ x += stridex;
+ y += stridey;
+ z += stridez;
+ i = 2;
+ if (--n <= 0)
+ break;
+ goto loop2;
+ }
+ if (hx - hy >= 0x03600000)
+ {
+ if ((int) ah2 == 0)
+ ah2 = y2 / x2;
+ *z = sign2 * ah2;
+ x += stridex;
+ y += stridey;
+ z += stridez;
+ i = 2;
+ if (--n <= 0)
+ break;
+ goto loop2;
+ }
+ y2 *= twom3;
+ x2 *= twom3;
+ hy -= 0x00300000;
+ hx -= 0x00300000;
+ }
+ else if (hy < 0x00100000)
+ {
+ if ((hy | LO(&y2)) == 0)
+ {
+ *z = sign2 * ah2;
+ x += stridex;
+ y += stridey;
+ z += stridez;
+ i = 2;
+ if (--n <= 0)
+ break;
+ goto loop2;
+ }
+ y2 *= two110;
+ x2 *= two110;
+ hy = HI(&y2);
+ hx = HI(&x2);
+ }
+
+ k = (((hx - hy) + 0x00004000) >> 13) & ~0x3;
+ if (k > 644)
+ k = 644;
+ ah2 += __vlibm_TBL_atan2[k];
+ al2 += __vlibm_TBL_atan2[k+1];
+ t2 = __vlibm_TBL_atan2[k+2];
+
+ xh = x2;
+ LO(&xh) = 0;
+ z2 = ((y2 - t2 * xh) - t2 * (x2 - xh)) / (x2 + y2 * t2);
+ pz2 = z;
+
+ x0 = z0 * z0;
+ x1 = z1 * z1;
+ x2 = z2 * z2;
+
+ t0 = ah0 + (z0 + (al0 + (z0 * x0) * (p1 + x0 *
+ (p2 + x0 * (p3 + x0 * p4)))));
+ t1 = ah1 + (z1 + (al1 + (z1 * x1) * (p1 + x1 *
+ (p2 + x1 * (p3 + x1 * p4)))));
+ t2 = ah2 + (z2 + (al2 + (z2 * x2) * (p1 + x2 *
+ (p2 + x2 * (p3 + x2 * p4)))));
+
+ *pz0 = sign0 * t0;
+ *pz1 = sign1 * t1;
+ *pz2 = sign2 * t2;
+
+ x += stridex;
+ y += stridey;
+ z += stridez;
+ i = 0;
+ } while (--n > 0);
+
+ if (i > 0)
+ {
+ if (i > 1)
+ {
+ x1 = z1 * z1;
+ t1 = ah1 + (z1 + (al1 + (z1 * x1) * (p1 + x1 *
+ (p2 + x1 * (p3 + x1 * p4)))));
+ *pz1 = sign1 * t1;
+ }
+
+ x0 = z0 * z0;
+ t0 = ah0 + (z0 + (al0 + (z0 * x0) * (p1 + x0 *
+ (p2 + x0 * (p3 + x0 * p4)))));
+ *pz0 = sign0 * t0;
+ }
+}