1 files changed, 411 insertions, 0 deletions
diff --git a/usr/src/lib/libmvec/common/__vatanf.c b/usr/src/lib/libmvec/common/__vatanf.c
new file mode 100644
index 0000000000..bf14dd9ffb
--- /dev/null
+++ b/usr/src/lib/libmvec/common/__vatanf.c
@@ -0,0 +1,411 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifdef __RESTRICT
+#define restrict _Restrict
+#else
+#define restrict
+#endif
+
+void
+__vatanf(int n, float * restrict x, int stridex, float * restrict y, int stridey)
+{
+  extern const double __vlibm_TBL_atan1[];
+  double  conup0, conup1, conup2;
+  float dummy, ansf = 0.0;
+  float f0, f1, f2;
+  float ans0, ans1, ans2;
+  float poly0, poly1, poly2;
+  float sign0, sign1, sign2;
+  int intf, intz, argcount;
+  int index0, index1, index2; 
+  float z,*yaddr0,*yaddr1,*yaddr2;
+  int *pz = (int *) &z;
+#ifdef UNROLL4
+  double conup3;
+  int index3;
+  float f3, ans3, poly3, sign3, *yaddr3;
+#endif
+
+/*    Power series  atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7
+ *    Error =  -3.08254E-18   On the interval  |x| < 1/64 */
+
+  static const float p1 = -0.33329644f /* -3.333333333329292858E-01f */ ;
+  static const float pone = 1.0f;
+
+  if (n <= 0) return;		/* if no. of elements is 0 or neg, do nothing */
+  do
+  {
+  LOOP0:
+
+	intf     = *(int *) x;		/* upper half of x, as integer */
+	f0 = *x;
+	sign0 = pone;
+    	if (intf < 0) {
+    		intf = intf & ~0x80000000; /* abs(upper argument) */
+		f0 = -f0;
+		sign0 = -sign0;
+	}
+  
+    if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
+    {
+      if (intf > 0x7f800000) 
+      {  
+	ansf  = f0- f0; 				/* return NaN if x=NaN*/
+      }
+      else if (intf < 0x31800000) 		/* avoid underflow for small arg */
+      {
+        dummy = 1.0e37 + f0;
+        dummy = dummy;
+	ansf  = f0;
+      }
+      else if (intf > 0x5B000000)		/* avoid underflow for big arg  */
+      {
+        index0= 2;
+        ansf  = __vlibm_TBL_atan1[index0];/* pi/2 up */
+      }
+      *y      = sign0*ansf;		/* store answer, with sign bit 	*/
+      x      += stridex;
+      y      += stridey;
+      argcount = 0;				/* initialize argcount		*/
+      if (--n <=0) break;			/* we are done 			*/
+      goto LOOP0;				/* otherwise, examine next arg  */
+    }
+  
+    if (intf > 0x42800000)			/* if (|x| > 64               	*/
+    { 
+    f0 = -pone/f0;
+	index0 = 2; 				/* point to pi/2 upper, lower	*/
+    }
+    else if (intf >= 0x3C800000)		/* if |x| >= (1/64)... 		*/
+    {
+      intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper	*/
+      pz[0]  = intz;				/* store as a float (z)		*/
+    f0 = (f0 - z)/(pone + f0*z);
+	index0 = (intz - 0x3C800000) >> 18;	/* (index >> 19) << 1)		*/
+	index0 = index0+ 4;			/* skip over 0,0,pi/2,pi/2	*/
+    } 
+    else					/* |x| < 1/64 */
+    {
+	index0   = 0;				/* points to 0,0 in table	*/
+    }
+    yaddr0   = y;				/* address to store this answer */ 
+    x       += stridex;				/* point to next arg		*/
+    y       += stridey;				/* point to next result		*/
+    argcount = 1;				/* we now have 1 good argument  */
+    if (--n <=0) 
+    {
+      goto UNROLL;				/* finish up with 1 good arg 	*/
+    }
+
+    /*--------------------------------------------------------------------------*/
+    /*--------------------------------------------------------------------------*/
+    /*--------------------------------------------------------------------------*/
+
+  LOOP1:
+
+	intf     = *(int *) x;		/* upper half of x, as integer */
+	f1 = *x;
+	sign1 = pone;
+    	if (intf < 0) {
+    		intf = intf & ~0x80000000; /* abs(upper argument) */
+		f1 = -f1;
+		sign1 = -sign1;
+	}
+  
+    if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
+    {
+      if (intf > 0x7f800000) 
+      {  
+	ansf   = f1 - f1;			/* return NaN if x=NaN*/
+      }
+      else if (intf < 0x31800000) 		/* avoid underflow for small arg */
+      {
+        dummy = 1.0e37 + f1;
+        dummy = dummy;
+	ansf   = f1;
+      }
+      else if (intf > 0x5B000000)		/* avoid underflow for big arg  */
+      {
+        index1 = 2;
+        ansf   = __vlibm_TBL_atan1[index1] ;/* pi/2 up */
+      }
+      *y      = sign1 * ansf;		/* store answer, with sign bit 	*/
+      x      += stridex;
+      y      += stridey;
+      argcount = 1;				/* we still have 1 good arg 	*/
+      if (--n <=0) 
+      {
+        goto UNROLL;				/* finish up with 1 good arg 	*/
+      }
+      goto LOOP1;				/* otherwise, examine next arg  */
+    }
+  
+    if (intf > 0x42800000)			/* if (|x| > 64               	*/
+    { 
+    f1 = -pone/f1;
+      index1 = 2; 				/* point to pi/2 upper, lower	*/
+    }
+    else if (intf >= 0x3C800000)		/* if |x| >= (1/64)... 		*/
+    {
+      intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper	*/
+      pz[0]  = intz;				/* store as a float (z)		*/
+    f1 = (f1 - z)/(pone + f1*z); 
+      index1 = (intz - 0x3C800000) >> 18;	/* (index >> 19) << 1)		*/
+      index1 = index1 + 4;			/* skip over 0,0,pi/2,pi/2	*/
+    }
+    else
+    {
+	index1   = 0;				/* points to 0,0 in table	*/
+    }
+
+    yaddr1   = y;				/* address to store this answer */ 
+    x       += stridex;				/* point to next arg		*/
+    y       += stridey;				/* point to next result		*/
+    argcount = 2;				/* we now have 2 good arguments */
+    if (--n <=0) 
+    {
+      goto UNROLL;				/* finish up with 2 good args 	*/
+    }
+
+    /*--------------------------------------------------------------------------*/
+    /*--------------------------------------------------------------------------*/
+    /*--------------------------------------------------------------------------*/
+
+  LOOP2:
+
+	intf     = *(int *) x;		/* upper half of x, as integer */
+	f2 = *x;
+	sign2 = pone;
+    	if (intf < 0) {
+    		intf = intf & ~0x80000000; /* abs(upper argument) */
+		f2 = -f2;
+		sign2 = -sign2;
+	}
+  
+    if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
+    {
+      if (intf > 0x7f800000) 
+      {  
+	ansf   = f2 - f2;			/* return NaN if x=NaN*/
+      }
+      else if (intf < 0x31800000) 		/* avoid underflow for small arg */
+      {
+        dummy = 1.0e37 + f2;
+        dummy = dummy;
+	ansf   = f2;
+      }
+      else if (intf > 0x5B000000)		/* avoid underflow for big arg  */
+      {
+        index2 = 2;
+        ansf   = __vlibm_TBL_atan1[index2] ;/* pi/2 up */
+      }
+      *y      = sign2 * ansf;		/* store answer, with sign bit 	*/
+      x      += stridex;
+      y      += stridey;
+      argcount = 2;				/* we still have 2 good args 	*/
+      if (--n <=0) 
+      {
+        goto UNROLL;				/* finish up with 2 good args 	*/
+      }
+      goto LOOP2;				/* otherwise, examine next arg  */
+    }
+  
+    if (intf > 0x42800000)			/* if (|x| > 64               	*/
+    { 
+    f2 = -pone/f2;
+      index2 = 2; 				/* point to pi/2 upper, lower	*/
+    }
+    else if (intf >= 0x3C800000)		/* if |x| >= (1/64)... 		*/
+    {
+      intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper	*/
+      pz[0]  = intz;				/* store as a float (z)		*/
+    f2 = (f2 - z)/(pone + f2*z);
+      index2 = (intz - 0x3C800000) >> 18;	/* (index >> 19) << 1)		*/
+      index2 = index2 + 4;			/* skip over 0,0,pi/2,pi/2	*/
+    }
+    else
+    {
+	index2   = 0;				/* points to 0,0 in table	*/
+    }
+    yaddr2   = y;				/* address to store this answer */ 
+    x       += stridex;				/* point to next arg		*/
+    y       += stridey;				/* point to next result		*/
+    argcount = 3;				/* we now have 3 good arguments */
+    if (--n <=0) 
+    {
+      goto UNROLL;				/* finish up with 2 good args 	*/
+    }
+
+
+    /*--------------------------------------------------------------------------*/
+    /*--------------------------------------------------------------------------*/
+    /*--------------------------------------------------------------------------*/
+
+#ifdef UNROLL4
+  LOOP3:
+
+	intf     = *(int *) x;		/* upper half of x, as integer */
+	f3 = *x;
+	sign3 = pone;
+    	if (intf < 0) {
+    		intf = intf & ~0x80000000; /* abs(upper argument) */
+		f3 = -f3;
+		sign3 = -sign3;
+	}
+  
+    if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */
+    {
+      if (intf > 0x7f800000) 
+      {  
+	ansf   = f3 - f3;			/* return NaN if x=NaN*/
+      }
+      else if (intf < 0x31800000) 		/* avoid underflow for small arg */
+      {
+        dummy = 1.0e37 + f3;
+        dummy = dummy;
+	ansf   = f3;
+      }
+      else if (intf > 0x5B000000)		/* avoid underflow for big arg  */
+      {
+        index3 = 2;
+        ansf   = __vlibm_TBL_atan1[index3] ;/* pi/2 up */
+      }
+      *y      = sign3 * ansf;		/* store answer, with sign bit 	*/
+      x      += stridex;
+      y      += stridey;
+      argcount = 3;				/* we still have 3 good args 	*/
+      if (--n <=0) 
+      {
+        goto UNROLL;				/* finish up with 3 good args 	*/
+      }
+      goto LOOP3;				/* otherwise, examine next arg  */
+    }
+  
+    if (intf > 0x42800000)			/* if (|x| > 64               	*/
+    { 
+	n3 = -pone;
+        d3 = f3;
+    f3 = n3/d3;
+      index3 = 2; 				/* point to pi/2 upper, lower	*/
+    }
+    else if (intf >= 0x3C800000)		/* if |x| >= (1/64)... 		*/
+    {
+      intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper	*/
+      pz[0]  = intz;				/* store as a float (z)		*/
+	n3     = (f3 - z);
+	d3     = (pone + f3*z); 		/* get reduced argument		*/
+    f3 = n3/d3;
+      index3 = (intz - 0x3C800000) >> 18;	/* (index >> 19) << 1)		*/
+      index3 = index3 + 4;			/* skip over 0,0,pi/2,pi/2	*/
+    }
+    else
+    {
+	n3 = f3;
+	d3 = pone;
+	index3   = 0;				/* points to 0,0 in table	*/
+    }
+    yaddr3   = y;				/* address to store this answer */ 
+    x       += stridex;				/* point to next arg		*/
+    y       += stridey;				/* point to next result		*/
+    argcount = 4;				/* we now have 4 good arguments */
+    if (--n <=0) 
+    {
+      goto UNROLL;				/* finish up with 3 good args 	*/
+    }
+#endif /* UNROLL4 */
+
+/* here is the n-way unrolled section, 
+   but we may actually have less than n 
+   arguments at this point
+*/
+
+UNROLL:
+
+#ifdef UNROLL4
+    if (argcount == 4)
+    {
+    conup0   = __vlibm_TBL_atan1[index0];
+    conup1   = __vlibm_TBL_atan1[index1];
+    conup2   = __vlibm_TBL_atan1[index2];
+    conup3   = __vlibm_TBL_atan1[index3];
+    poly0    = p1*f0*f0*f0 + f0;
+    ans0     = sign0 * (float)(conup0 + poly0);
+    poly1    = p1*f1*f1*f1 + f1;
+    ans1     = sign1 * (float)(conup1 + poly1);
+    poly2    = p1*f2*f2*f2 + f2;
+    ans2     = sign2 * (float)(conup2 + poly2);
+    poly3    = p1*f3*f3*f3 + f3;
+    ans3     = sign3 * (float)(conup3 + poly3);
+    *yaddr0  = ans0;
+    *yaddr1  = ans1;
+    *yaddr2  = ans2;
+    *yaddr3  = ans3;
+    }
+    else 
+#endif
+    if (argcount == 3)
+    {
+    conup0   = __vlibm_TBL_atan1[index0];
+    conup1   = __vlibm_TBL_atan1[index1];
+    conup2   = __vlibm_TBL_atan1[index2];
+    poly0    = p1*f0*f0*f0 + f0;
+    poly1    = p1*f1*f1*f1 + f1;
+    poly2    = p1*f2*f2*f2 + f2;
+    ans0     = sign0 * (float)(conup0 + poly0);
+    ans1     = sign1 * (float)(conup1 + poly1);
+    ans2     = sign2 * (float)(conup2 + poly2);
+    *yaddr0  = ans0;
+    *yaddr1  = ans1;
+    *yaddr2  = ans2;
+    }
+    else 
+    if (argcount == 2)
+    {
+    conup0   = __vlibm_TBL_atan1[index0];
+    conup1   = __vlibm_TBL_atan1[index1];
+    poly0    = p1*f0*f0*f0 + f0;
+    poly1    = p1*f1*f1*f1 + f1;
+    ans0     = sign0 * (float)(conup0 + poly0);
+    ans1     = sign1 * (float)(conup1 + poly1);
+    *yaddr0  = ans0;
+    *yaddr1  = ans1;
+    }
+    else 
+    if (argcount == 1)
+    {
+    conup0   = __vlibm_TBL_atan1[index0];
+    poly0    = p1*f0*f0*f0 + f0;
+    ans0     = sign0 * (float)(conup0 + poly0);
+    *yaddr0  = ans0;
+     }
+
+  }  while (n > 0);
+
+}