diff options
Diffstat (limited to 'usr/src/lib/libmvec/common/__vatanf.c')
-rw-r--r-- | usr/src/lib/libmvec/common/__vatanf.c | 411 |
1 files changed, 411 insertions, 0 deletions
diff --git a/usr/src/lib/libmvec/common/__vatanf.c b/usr/src/lib/libmvec/common/__vatanf.c new file mode 100644 index 0000000000..bf14dd9ffb --- /dev/null +++ b/usr/src/lib/libmvec/common/__vatanf.c @@ -0,0 +1,411 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +void +__vatanf(int n, float * restrict x, int stridex, float * restrict y, int stridey) +{ + extern const double __vlibm_TBL_atan1[]; + double conup0, conup1, conup2; + float dummy, ansf = 0.0; + float f0, f1, f2; + float ans0, ans1, ans2; + float poly0, poly1, poly2; + float sign0, sign1, sign2; + int intf, intz, argcount; + int index0, index1, index2; + float z,*yaddr0,*yaddr1,*yaddr2; + int *pz = (int *) &z; +#ifdef UNROLL4 + double conup3; + int index3; + float f3, ans3, poly3, sign3, *yaddr3; +#endif + +/* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7 + * Error = -3.08254E-18 On the interval |x| < 1/64 */ + + static const float p1 = -0.33329644f /* -3.333333333329292858E-01f */ ; + static const float pone = 1.0f; + + if (n <= 0) return; /* if no. of elements is 0 or neg, do nothing */ + do + { + LOOP0: + + intf = *(int *) x; /* upper half of x, as integer */ + f0 = *x; + sign0 = pone; + if (intf < 0) { + intf = intf & ~0x80000000; /* abs(upper argument) */ + f0 = -f0; + sign0 = -sign0; + } + + if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */ + { + if (intf > 0x7f800000) + { + ansf = f0- f0; /* return NaN if x=NaN*/ + } + else if (intf < 0x31800000) /* avoid underflow for small arg */ + { + dummy = 1.0e37 + f0; + dummy = dummy; + ansf = f0; + } + else if (intf > 0x5B000000) /* avoid underflow for big arg */ + { + index0= 2; + ansf = __vlibm_TBL_atan1[index0];/* pi/2 up */ + } + *y = sign0*ansf; /* store answer, with sign bit */ + x += stridex; + y += stridey; + argcount = 0; /* initialize argcount */ + if (--n <=0) break; /* we are done */ + goto LOOP0; /* otherwise, examine next arg */ + } + + if (intf > 0x42800000) /* if (|x| > 64 */ + { + f0 = -pone/f0; + index0 = 2; /* point to pi/2 upper, lower */ + } + else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */ + { + intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */ + pz[0] = intz; /* store as a float (z) */ + f0 = (f0 - z)/(pone + f0*z); + index0 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */ + index0 = index0+ 4; /* skip over 0,0,pi/2,pi/2 */ + } + else /* |x| < 1/64 */ + { + index0 = 0; /* points to 0,0 in table */ + } + yaddr0 = y; /* address to store this answer */ + x += stridex; /* point to next arg */ + y += stridey; /* point to next result */ + argcount = 1; /* we now have 1 good argument */ + if (--n <=0) + { + goto UNROLL; /* finish up with 1 good arg */ + } + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + + LOOP1: + + intf = *(int *) x; /* upper half of x, as integer */ + f1 = *x; + sign1 = pone; + if (intf < 0) { + intf = intf & ~0x80000000; /* abs(upper argument) */ + f1 = -f1; + sign1 = -sign1; + } + + if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */ + { + if (intf > 0x7f800000) + { + ansf = f1 - f1; /* return NaN if x=NaN*/ + } + else if (intf < 0x31800000) /* avoid underflow for small arg */ + { + dummy = 1.0e37 + f1; + dummy = dummy; + ansf = f1; + } + else if (intf > 0x5B000000) /* avoid underflow for big arg */ + { + index1 = 2; + ansf = __vlibm_TBL_atan1[index1] ;/* pi/2 up */ + } + *y = sign1 * ansf; /* store answer, with sign bit */ + x += stridex; + y += stridey; + argcount = 1; /* we still have 1 good arg */ + if (--n <=0) + { + goto UNROLL; /* finish up with 1 good arg */ + } + goto LOOP1; /* otherwise, examine next arg */ + } + + if (intf > 0x42800000) /* if (|x| > 64 */ + { + f1 = -pone/f1; + index1 = 2; /* point to pi/2 upper, lower */ + } + else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */ + { + intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */ + pz[0] = intz; /* store as a float (z) */ + f1 = (f1 - z)/(pone + f1*z); + index1 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */ + index1 = index1 + 4; /* skip over 0,0,pi/2,pi/2 */ + } + else + { + index1 = 0; /* points to 0,0 in table */ + } + + yaddr1 = y; /* address to store this answer */ + x += stridex; /* point to next arg */ + y += stridey; /* point to next result */ + argcount = 2; /* we now have 2 good arguments */ + if (--n <=0) + { + goto UNROLL; /* finish up with 2 good args */ + } + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + + LOOP2: + + intf = *(int *) x; /* upper half of x, as integer */ + f2 = *x; + sign2 = pone; + if (intf < 0) { + intf = intf & ~0x80000000; /* abs(upper argument) */ + f2 = -f2; + sign2 = -sign2; + } + + if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */ + { + if (intf > 0x7f800000) + { + ansf = f2 - f2; /* return NaN if x=NaN*/ + } + else if (intf < 0x31800000) /* avoid underflow for small arg */ + { + dummy = 1.0e37 + f2; + dummy = dummy; + ansf = f2; + } + else if (intf > 0x5B000000) /* avoid underflow for big arg */ + { + index2 = 2; + ansf = __vlibm_TBL_atan1[index2] ;/* pi/2 up */ + } + *y = sign2 * ansf; /* store answer, with sign bit */ + x += stridex; + y += stridey; + argcount = 2; /* we still have 2 good args */ + if (--n <=0) + { + goto UNROLL; /* finish up with 2 good args */ + } + goto LOOP2; /* otherwise, examine next arg */ + } + + if (intf > 0x42800000) /* if (|x| > 64 */ + { + f2 = -pone/f2; + index2 = 2; /* point to pi/2 upper, lower */ + } + else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */ + { + intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */ + pz[0] = intz; /* store as a float (z) */ + f2 = (f2 - z)/(pone + f2*z); + index2 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */ + index2 = index2 + 4; /* skip over 0,0,pi/2,pi/2 */ + } + else + { + index2 = 0; /* points to 0,0 in table */ + } + yaddr2 = y; /* address to store this answer */ + x += stridex; /* point to next arg */ + y += stridey; /* point to next result */ + argcount = 3; /* we now have 3 good arguments */ + if (--n <=0) + { + goto UNROLL; /* finish up with 2 good args */ + } + + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + +#ifdef UNROLL4 + LOOP3: + + intf = *(int *) x; /* upper half of x, as integer */ + f3 = *x; + sign3 = pone; + if (intf < 0) { + intf = intf & ~0x80000000; /* abs(upper argument) */ + f3 = -f3; + sign3 = -sign3; + } + + if ((intf > 0x5B000000) || (intf < 0x31800000)) /* filter out special cases */ + { + if (intf > 0x7f800000) + { + ansf = f3 - f3; /* return NaN if x=NaN*/ + } + else if (intf < 0x31800000) /* avoid underflow for small arg */ + { + dummy = 1.0e37 + f3; + dummy = dummy; + ansf = f3; + } + else if (intf > 0x5B000000) /* avoid underflow for big arg */ + { + index3 = 2; + ansf = __vlibm_TBL_atan1[index3] ;/* pi/2 up */ + } + *y = sign3 * ansf; /* store answer, with sign bit */ + x += stridex; + y += stridey; + argcount = 3; /* we still have 3 good args */ + if (--n <=0) + { + goto UNROLL; /* finish up with 3 good args */ + } + goto LOOP3; /* otherwise, examine next arg */ + } + + if (intf > 0x42800000) /* if (|x| > 64 */ + { + n3 = -pone; + d3 = f3; + f3 = n3/d3; + index3 = 2; /* point to pi/2 upper, lower */ + } + else if (intf >= 0x3C800000) /* if |x| >= (1/64)... */ + { + intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */ + pz[0] = intz; /* store as a float (z) */ + n3 = (f3 - z); + d3 = (pone + f3*z); /* get reduced argument */ + f3 = n3/d3; + index3 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */ + index3 = index3 + 4; /* skip over 0,0,pi/2,pi/2 */ + } + else + { + n3 = f3; + d3 = pone; + index3 = 0; /* points to 0,0 in table */ + } + yaddr3 = y; /* address to store this answer */ + x += stridex; /* point to next arg */ + y += stridey; /* point to next result */ + argcount = 4; /* we now have 4 good arguments */ + if (--n <=0) + { + goto UNROLL; /* finish up with 3 good args */ + } +#endif /* UNROLL4 */ + +/* here is the n-way unrolled section, + but we may actually have less than n + arguments at this point +*/ + +UNROLL: + +#ifdef UNROLL4 + if (argcount == 4) + { + conup0 = __vlibm_TBL_atan1[index0]; + conup1 = __vlibm_TBL_atan1[index1]; + conup2 = __vlibm_TBL_atan1[index2]; + conup3 = __vlibm_TBL_atan1[index3]; + poly0 = p1*f0*f0*f0 + f0; + ans0 = sign0 * (float)(conup0 + poly0); + poly1 = p1*f1*f1*f1 + f1; + ans1 = sign1 * (float)(conup1 + poly1); + poly2 = p1*f2*f2*f2 + f2; + ans2 = sign2 * (float)(conup2 + poly2); + poly3 = p1*f3*f3*f3 + f3; + ans3 = sign3 * (float)(conup3 + poly3); + *yaddr0 = ans0; + *yaddr1 = ans1; + *yaddr2 = ans2; + *yaddr3 = ans3; + } + else +#endif + if (argcount == 3) + { + conup0 = __vlibm_TBL_atan1[index0]; + conup1 = __vlibm_TBL_atan1[index1]; + conup2 = __vlibm_TBL_atan1[index2]; + poly0 = p1*f0*f0*f0 + f0; + poly1 = p1*f1*f1*f1 + f1; + poly2 = p1*f2*f2*f2 + f2; + ans0 = sign0 * (float)(conup0 + poly0); + ans1 = sign1 * (float)(conup1 + poly1); + ans2 = sign2 * (float)(conup2 + poly2); + *yaddr0 = ans0; + *yaddr1 = ans1; + *yaddr2 = ans2; + } + else + if (argcount == 2) + { + conup0 = __vlibm_TBL_atan1[index0]; + conup1 = __vlibm_TBL_atan1[index1]; + poly0 = p1*f0*f0*f0 + f0; + poly1 = p1*f1*f1*f1 + f1; + ans0 = sign0 * (float)(conup0 + poly0); + ans1 = sign1 * (float)(conup1 + poly1); + *yaddr0 = ans0; + *yaddr1 = ans1; + } + else + if (argcount == 1) + { + conup0 = __vlibm_TBL_atan1[index0]; + poly0 = p1*f0*f0*f0 + f0; + ans0 = sign0 * (float)(conup0 + poly0); + *yaddr0 = ans0; + } + + } while (n > 0); + +} |