summaryrefslogtreecommitdiff
path: root/usr/src/lib/libmvec/common/__vatanf.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/lib/libmvec/common/__vatanf.c')
-rw-r--r--usr/src/lib/libmvec/common/__vatanf.c411
1 files changed, 411 insertions, 0 deletions
diff --git a/usr/src/lib/libmvec/common/__vatanf.c b/usr/src/lib/libmvec/common/__vatanf.c
new file mode 100644
index 0000000000..bf14dd9ffb
--- /dev/null
+++ b/usr/src/lib/libmvec/common/__vatanf.c
@@ -0,0 +1,411 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifdef __RESTRICT
+#define restrict _Restrict
+#else
+#define restrict
+#endif
+
+void
+__vatanf(int n, float * restrict x, int stridex, float * restrict y, int stridey)
+{
+ extern const double __vlibm_TBL_atan1[];
+ double conup0, conup1, conup2;
+ float dummy, ansf = 0.0;
+ float f0, f1, f2;
+ float ans0, ans1, ans2;
+ float poly0, poly1, poly2;
+ float sign0, sign1, sign2;
+ int intf, intz, argcount;
+ int index0, index1, index2;
+ float z,*yaddr0,*yaddr1,*yaddr2;
+ int *pz = (int *) &z;
+#ifdef UNROLL4
+ double conup3;
+ int index3;
+ float f3, ans3, poly3, sign3, *yaddr3;
+#endif
+
+/* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7
+ * Error = -3.08254E-18 On the interval |x| < 1/64 */
+
+ static const float p1 = -0.33329644f /* -3.333333333329292858E-01f */ ;
+ static const float pone = 1.0f;
+
+ if (n <= 0) return; /* if no. of elements is 0 or neg, do nothing */
+ do
+ {
+ LOOP0:
+
+ intf = *(int *) x; /* upper half of x, as integer */
+ f0 = *x;
+ sign0 = pone;
+ if (intf < 0) {
+ intf = intf & ~0x80000000; /* abs(upper argument) */
+ f0 = -f0;
+ sign0 = -sign0;
+ }
+
+ if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
+ {
+ if (intf > 0x7f800000)
+ {
+ ansf = f0- f0; /* return NaN if x=NaN*/
+ }
+ else if (intf < 0x31800000) /* avoid underflow for small arg */
+ {
+ dummy = 1.0e37 + f0;
+ dummy = dummy;
+ ansf = f0;
+ }
+ else if (intf > 0x5B000000) /* avoid underflow for big arg */
+ {
+ index0= 2;
+ ansf = __vlibm_TBL_atan1[index0];/* pi/2 up */
+ }
+ *y = sign0*ansf; /* store answer, with sign bit */
+ x += stridex;
+ y += stridey;
+ argcount = 0; /* initialize argcount */
+ if (--n <=0) break; /* we are done */
+ goto LOOP0; /* otherwise, examine next arg */
+ }
+
+ if (intf > 0x42800000) /* if (|x| > 64 */
+ {
+ f0 = -pone/f0;
+ index0 = 2; /* point to pi/2 upper, lower */
+ }
+ else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */
+ {
+ intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */
+ pz[0] = intz; /* store as a float (z) */
+ f0 = (f0 - z)/(pone + f0*z);
+ index0 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */
+ index0 = index0+ 4; /* skip over 0,0,pi/2,pi/2 */
+ }
+ else /* |x| < 1/64 */
+ {
+ index0 = 0; /* points to 0,0 in table */
+ }
+ yaddr0 = y; /* address to store this answer */
+ x += stridex; /* point to next arg */
+ y += stridey; /* point to next result */
+ argcount = 1; /* we now have 1 good argument */
+ if (--n <=0)
+ {
+ goto UNROLL; /* finish up with 1 good arg */
+ }
+
+ /*--------------------------------------------------------------------------*/
+ /*--------------------------------------------------------------------------*/
+ /*--------------------------------------------------------------------------*/
+
+ LOOP1:
+
+ intf = *(int *) x; /* upper half of x, as integer */
+ f1 = *x;
+ sign1 = pone;
+ if (intf < 0) {
+ intf = intf & ~0x80000000; /* abs(upper argument) */
+ f1 = -f1;
+ sign1 = -sign1;
+ }
+
+ if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
+ {
+ if (intf > 0x7f800000)
+ {
+ ansf = f1 - f1; /* return NaN if x=NaN*/
+ }
+ else if (intf < 0x31800000) /* avoid underflow for small arg */
+ {
+ dummy = 1.0e37 + f1;
+ dummy = dummy;
+ ansf = f1;
+ }
+ else if (intf > 0x5B000000) /* avoid underflow for big arg */
+ {
+ index1 = 2;
+ ansf = __vlibm_TBL_atan1[index1] ;/* pi/2 up */
+ }
+ *y = sign1 * ansf; /* store answer, with sign bit */
+ x += stridex;
+ y += stridey;
+ argcount = 1; /* we still have 1 good arg */
+ if (--n <=0)
+ {
+ goto UNROLL; /* finish up with 1 good arg */
+ }
+ goto LOOP1; /* otherwise, examine next arg */
+ }
+
+ if (intf > 0x42800000) /* if (|x| > 64 */
+ {
+ f1 = -pone/f1;
+ index1 = 2; /* point to pi/2 upper, lower */
+ }
+ else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */
+ {
+ intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */
+ pz[0] = intz; /* store as a float (z) */
+ f1 = (f1 - z)/(pone + f1*z);
+ index1 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */
+ index1 = index1 + 4; /* skip over 0,0,pi/2,pi/2 */
+ }
+ else
+ {
+ index1 = 0; /* points to 0,0 in table */
+ }
+
+ yaddr1 = y; /* address to store this answer */
+ x += stridex; /* point to next arg */
+ y += stridey; /* point to next result */
+ argcount = 2; /* we now have 2 good arguments */
+ if (--n <=0)
+ {
+ goto UNROLL; /* finish up with 2 good args */
+ }
+
+ /*--------------------------------------------------------------------------*/
+ /*--------------------------------------------------------------------------*/
+ /*--------------------------------------------------------------------------*/
+
+ LOOP2:
+
+ intf = *(int *) x; /* upper half of x, as integer */
+ f2 = *x;
+ sign2 = pone;
+ if (intf < 0) {
+ intf = intf & ~0x80000000; /* abs(upper argument) */
+ f2 = -f2;
+ sign2 = -sign2;
+ }
+
+ if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
+ {
+ if (intf > 0x7f800000)
+ {
+ ansf = f2 - f2; /* return NaN if x=NaN*/
+ }
+ else if (intf < 0x31800000) /* avoid underflow for small arg */
+ {
+ dummy = 1.0e37 + f2;
+ dummy = dummy;
+ ansf = f2;
+ }
+ else if (intf > 0x5B000000) /* avoid underflow for big arg */
+ {
+ index2 = 2;
+ ansf = __vlibm_TBL_atan1[index2] ;/* pi/2 up */
+ }
+ *y = sign2 * ansf; /* store answer, with sign bit */
+ x += stridex;
+ y += stridey;
+ argcount = 2; /* we still have 2 good args */
+ if (--n <=0)
+ {
+ goto UNROLL; /* finish up with 2 good args */
+ }
+ goto LOOP2; /* otherwise, examine next arg */
+ }
+
+ if (intf > 0x42800000) /* if (|x| > 64 */
+ {
+ f2 = -pone/f2;
+ index2 = 2; /* point to pi/2 upper, lower */
+ }
+ else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */
+ {
+ intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */
+ pz[0] = intz; /* store as a float (z) */
+ f2 = (f2 - z)/(pone + f2*z);
+ index2 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */
+ index2 = index2 + 4; /* skip over 0,0,pi/2,pi/2 */
+ }
+ else
+ {
+ index2 = 0; /* points to 0,0 in table */
+ }
+ yaddr2 = y; /* address to store this answer */
+ x += stridex; /* point to next arg */
+ y += stridey; /* point to next result */
+ argcount = 3; /* we now have 3 good arguments */
+ if (--n <=0)
+ {
+ goto UNROLL; /* finish up with 2 good args */
+ }
+
+
+ /*--------------------------------------------------------------------------*/
+ /*--------------------------------------------------------------------------*/
+ /*--------------------------------------------------------------------------*/
+
+#ifdef UNROLL4
+ LOOP3:
+
+ intf = *(int *) x; /* upper half of x, as integer */
+ f3 = *x;
+ sign3 = pone;
+ if (intf < 0) {
+ intf = intf & ~0x80000000; /* abs(upper argument) */
+ f3 = -f3;
+ sign3 = -sign3;
+ }
+
+ if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
+ {
+ if (intf > 0x7f800000)
+ {
+ ansf = f3 - f3; /* return NaN if x=NaN*/
+ }
+ else if (intf < 0x31800000) /* avoid underflow for small arg */
+ {
+ dummy = 1.0e37 + f3;
+ dummy = dummy;
+ ansf = f3;
+ }
+ else if (intf > 0x5B000000) /* avoid underflow for big arg */
+ {
+ index3 = 2;
+ ansf = __vlibm_TBL_atan1[index3] ;/* pi/2 up */
+ }
+ *y = sign3 * ansf; /* store answer, with sign bit */
+ x += stridex;
+ y += stridey;
+ argcount = 3; /* we still have 3 good args */
+ if (--n <=0)
+ {
+ goto UNROLL; /* finish up with 3 good args */
+ }
+ goto LOOP3; /* otherwise, examine next arg */
+ }
+
+ if (intf > 0x42800000) /* if (|x| > 64 */
+ {
+ n3 = -pone;
+ d3 = f3;
+ f3 = n3/d3;
+ index3 = 2; /* point to pi/2 upper, lower */
+ }
+ else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */
+ {
+ intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */
+ pz[0] = intz; /* store as a float (z) */
+ n3 = (f3 - z);
+ d3 = (pone + f3*z); /* get reduced argument */
+ f3 = n3/d3;
+ index3 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */
+ index3 = index3 + 4; /* skip over 0,0,pi/2,pi/2 */
+ }
+ else
+ {
+ n3 = f3;
+ d3 = pone;
+ index3 = 0; /* points to 0,0 in table */
+ }
+ yaddr3 = y; /* address to store this answer */
+ x += stridex; /* point to next arg */
+ y += stridey; /* point to next result */
+ argcount = 4; /* we now have 4 good arguments */
+ if (--n <=0)
+ {
+ goto UNROLL; /* finish up with 3 good args */
+ }
+#endif /* UNROLL4 */
+
+/* here is the n-way unrolled section,
+ but we may actually have less than n
+ arguments at this point
+*/
+
+UNROLL:
+
+#ifdef UNROLL4
+ if (argcount == 4)
+ {
+ conup0 = __vlibm_TBL_atan1[index0];
+ conup1 = __vlibm_TBL_atan1[index1];
+ conup2 = __vlibm_TBL_atan1[index2];
+ conup3 = __vlibm_TBL_atan1[index3];
+ poly0 = p1*f0*f0*f0 + f0;
+ ans0 = sign0 * (float)(conup0 + poly0);
+ poly1 = p1*f1*f1*f1 + f1;
+ ans1 = sign1 * (float)(conup1 + poly1);
+ poly2 = p1*f2*f2*f2 + f2;
+ ans2 = sign2 * (float)(conup2 + poly2);
+ poly3 = p1*f3*f3*f3 + f3;
+ ans3 = sign3 * (float)(conup3 + poly3);
+ *yaddr0 = ans0;
+ *yaddr1 = ans1;
+ *yaddr2 = ans2;
+ *yaddr3 = ans3;
+ }
+ else
+#endif
+ if (argcount == 3)
+ {
+ conup0 = __vlibm_TBL_atan1[index0];
+ conup1 = __vlibm_TBL_atan1[index1];
+ conup2 = __vlibm_TBL_atan1[index2];
+ poly0 = p1*f0*f0*f0 + f0;
+ poly1 = p1*f1*f1*f1 + f1;
+ poly2 = p1*f2*f2*f2 + f2;
+ ans0 = sign0 * (float)(conup0 + poly0);
+ ans1 = sign1 * (float)(conup1 + poly1);
+ ans2 = sign2 * (float)(conup2 + poly2);
+ *yaddr0 = ans0;
+ *yaddr1 = ans1;
+ *yaddr2 = ans2;
+ }
+ else
+ if (argcount == 2)
+ {
+ conup0 = __vlibm_TBL_atan1[index0];
+ conup1 = __vlibm_TBL_atan1[index1];
+ poly0 = p1*f0*f0*f0 + f0;
+ poly1 = p1*f1*f1*f1 + f1;
+ ans0 = sign0 * (float)(conup0 + poly0);
+ ans1 = sign1 * (float)(conup1 + poly1);
+ *yaddr0 = ans0;
+ *yaddr1 = ans1;
+ }
+ else
+ if (argcount == 1)
+ {
+ conup0 = __vlibm_TBL_atan1[index0];
+ poly0 = p1*f0*f0*f0 + f0;
+ ans0 = sign0 * (float)(conup0 + poly0);
+ *yaddr0 = ans0;
+ }
+
+ } while (n > 0);
+
+}