diff options
Diffstat (limited to 'usr/src/lib/libmvec/common/__vatan.c')
-rw-r--r-- | usr/src/lib/libmvec/common/__vatan.c | 317 |
1 files changed, 317 insertions, 0 deletions
diff --git a/usr/src/lib/libmvec/common/__vatan.c b/usr/src/lib/libmvec/common/__vatan.c new file mode 100644 index 0000000000..f2a7ae1190 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vatan.c @@ -0,0 +1,317 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/isa_defs.h> +#include "libm_inlines.h" + +#ifdef _LITTLE_ENDIAN +#define HI(x) *(1+(int*)x) +#define LO(x) *(unsigned*)x +#else +#define HI(x) *(int*)x +#define LO(x) *(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +void +__vatan(int n, double * restrict x, int stridex, double * restrict y, int stridey) +{ + double f, z, ans = 0.0L, ansu, ansl, tmp, poly, conup, conlo, dummy; + double f1, ans1, ansu1, ansl1, tmp1, poly1, conup1, conlo1; + double f2, ans2, ansu2, ansl2, tmp2, poly2, conup2, conlo2; + int index, sign, intf, intflo, intz, argcount; + int index1, sign1 = 0; + int index2, sign2 = 0; + double *yaddr,*yaddr1 = 0,*yaddr2 = 0; + extern const double __vlibm_TBL_atan1[]; + extern double fabs(double); + +/* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7 + * Error = -3.08254E-18 On the interval |x| < 1/64 */ + +/* define dummy names for readability. Use parray to help compiler optimize loads */ +#define p3 parray[0] +#define p2 parray[1] +#define p1 parray[2] + + static const double parray[] = { + -1.428029046844299722E-01, /* p[3] */ + 1.999999917247000615E-01, /* p[2] */ + -3.333333333329292858E-01, /* p[1] */ + 1.0, /* not used for p[0], though */ + -1.0, /* used to flip sign of answer */ + }; + + if (n <= 0) return; /* if no. of elements is 0 or neg, do nothing */ + do + { + LOOP0: + + f = fabs(*x); /* fetch argument */ + intf = HI(x); /* upper half of x, as integer */ + intflo = LO(x); /* lower half of x, as integer */ + sign = intf & 0x80000000; /* sign of argument */ + intf = intf & ~0x80000000; /* abs(upper argument) */ + + if ((intf > 0x43600000) || (intf < 0x3e300000)) /* filter out special cases */ + { + if ( (intf > 0x7ff00000) || ((intf == 0x7ff00000) && (intflo !=0))) + { + ans = f - f; /* return NaN if x=NaN*/ + } + else if (intf < 0x3e300000) /* avoid underflow for small arg */ + { + dummy = 1.0e37 + f; + dummy = dummy; + ans = f; + } + else if (intf > 0x43600000) /* avoid underflow for big arg */ + { + index = 2; + ans = __vlibm_TBL_atan1[index] + __vlibm_TBL_atan1[index+1];/* pi/2 up + pi/2 low */ + } + *y = (sign) ? -ans: ans; /* store answer, with sign bit */ + x += stridex; + y += stridey; + argcount = 0; /* initialize argcount */ + if (--n <=0) break; /* we are done */ + goto LOOP0; /* otherwise, examine next arg */ + } + + index = 0; /* points to 0,0 in table */ + if (intf > 0x40500000) /* if (|x| > 64 */ + { f = -1.0/f; + index = 2; /* point to pi/2 upper, lower */ + } + else if (intf >= 0x3f900000) /* if |x| >= (1/64)... */ + { + intz = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper */ + HI(&z) = intz; /* store as a double (z) */ + LO(&z) = 0; /* ...lower */ + f = (f - z)/(1.0 + f*z); /* get reduced argument */ + index = (intz - 0x3f900000) >> 15; /* (index >> 16) << 1) */ + index = index + 4; /* skip over 0,0,pi/2,pi/2 */ + } + yaddr = y; /* address to store this answer */ + x += stridex; /* point to next arg */ + y += stridey; /* point to next result */ + argcount = 1; /* we now have 1 good argument */ + if (--n <=0) + { + f1 = 0.0; /* put dummy values in args 1,2 */ + f2 = 0.0; + index1 = 0; + index2 = 0; + goto UNROLL3; /* finish up with 1 good arg */ + } + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + + LOOP1: + + f1 = fabs(*x); /* fetch argument */ + intf = HI(x); /* upper half of x, as integer */ + intflo = LO(x); /* lower half of x, as integer */ + sign1 = intf & 0x80000000; /* sign of argument */ + intf = intf & ~0x80000000; /* abs(upper argument) */ + + if ((intf > 0x43600000) || (intf < 0x3e300000)) /* filter out special cases */ + { + if ( (intf > 0x7ff00000) || ((intf == 0x7ff00000) && (intflo !=0))) + { + ans = f1 - f1; /* return NaN if x=NaN*/ + } + else if (intf < 0x3e300000) /* avoid underflow for small arg */ + { + dummy = 1.0e37 + f1; + dummy = dummy; + ans = f1; + } + else if (intf > 0x43600000) /* avoid underflow for big arg */ + { + index1 = 2; + ans = __vlibm_TBL_atan1[index1] + __vlibm_TBL_atan1[index1+1];/* pi/2 up + pi/2 low */ + } + *y = (sign1) ? -ans: ans; /* store answer, with sign bit */ + x += stridex; + y += stridey; + argcount = 1; /* we still have 1 good arg */ + if (--n <=0) + { + f1 = 0.0; /* put dummy values in args 1,2 */ + f2 = 0.0; + index1 = 0; + index2 = 0; + goto UNROLL3; /* finish up with 1 good arg */ + } + goto LOOP1; /* otherwise, examine next arg */ + } + + index1 = 0; /* points to 0,0 in table */ + if (intf > 0x40500000) /* if (|x| > 64 */ + { f1 = -1.0/f1; + index1 = 2; /* point to pi/2 upper, lower */ + } + else if (intf >= 0x3f900000) /* if |x| >= (1/64)... */ + { + intz = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper */ + HI(&z) = intz; /* store as a double (z) */ + LO(&z) = 0; /* ...lower */ + f1 = (f1 - z)/(1.0 + f1*z); /* get reduced argument */ + index1 = (intz - 0x3f900000) >> 15; /* (index >> 16) << 1) */ + index1 = index1 + 4; /* skip over 0,0,pi/2,pi/2 */ + } + yaddr1 = y; /* address to store this answer */ + x += stridex; /* point to next arg */ + y += stridey; /* point to next result */ + argcount = 2; /* we now have 2 good arguments */ + if (--n <=0) + { + f2 = 0.0; /* put dummy value in arg 2 */ + index2 = 0; + goto UNROLL3; /* finish up with 2 good args */ + } + + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + /*--------------------------------------------------------------------------*/ + + LOOP2: + + f2 = fabs(*x); /* fetch argument */ + intf = HI(x); /* upper half of x, as integer */ + intflo = LO(x); /* lower half of x, as integer */ + sign2 = intf & 0x80000000; /* sign of argument */ + intf = intf & ~0x80000000; /* abs(upper argument) */ + + if ((intf > 0x43600000) || (intf < 0x3e300000)) /* filter out special cases */ + { + if ( (intf > 0x7ff00000) || ((intf == 0x7ff00000) && (intflo !=0))) + { + ans = f2 - f2; /* return NaN if x=NaN*/ + } + else if (intf < 0x3e300000) /* avoid underflow for small arg */ + { + dummy = 1.0e37 + f2; + dummy = dummy; + ans = f2; + } + else if (intf > 0x43600000) /* avoid underflow for big arg */ + { + index2 = 2; + ans = __vlibm_TBL_atan1[index2] + __vlibm_TBL_atan1[index2+1];/* pi/2 up + pi/2 low */ + } + *y = (sign2) ? -ans: ans; /* store answer, with sign bit */ + x += stridex; + y += stridey; + argcount = 2; /* we still have 2 good args */ + if (--n <=0) + { + f2 = 0.0; /* put dummy value in arg 2 */ + index2 = 0; + goto UNROLL3; /* finish up with 2 good args */ + } + goto LOOP2; /* otherwise, examine next arg */ + } + + index2 = 0; /* points to 0,0 in table */ + if (intf > 0x40500000) /* if (|x| > 64 */ + { f2 = -1.0/f2; + index2 = 2; /* point to pi/2 upper, lower */ + } + else if (intf >= 0x3f900000) /* if |x| >= (1/64)... */ + { + intz = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper */ + HI(&z) = intz; /* store as a double (z) */ + LO(&z) = 0; /* ...lower */ + f2 = (f2 - z)/(1.0 + f2*z); /* get reduced argument */ + index2 = (intz - 0x3f900000) >> 15; /* (index >> 16) << 1) */ + index2 = index2 + 4; /* skip over 0,0,pi/2,pi/2 */ + } + yaddr2 = y; /* address to store this answer */ + x += stridex; /* point to next arg */ + y += stridey; /* point to next result */ + argcount = 3; /* we now have 3 good arguments */ + + +/* here is the 3 way unrolled section, + note, we may actually only have + 1,2, or 3 'real' arguments at this point +*/ + +UNROLL3: + + conup = __vlibm_TBL_atan1[index ]; /* upper table */ + conup1 = __vlibm_TBL_atan1[index1]; /* upper table */ + conup2 = __vlibm_TBL_atan1[index2]; /* upper table */ + + conlo = __vlibm_TBL_atan1[index +1]; /* lower table */ + conlo1 = __vlibm_TBL_atan1[index1+1]; /* lower table */ + conlo2 = __vlibm_TBL_atan1[index2+1]; /* lower table */ + + tmp = f *f ; + tmp1 = f1*f1; + tmp2 = f2*f2; + + poly = f *((p3*tmp + p2)*tmp + p1)*tmp ; + poly1 = f1*((p3*tmp1 + p2)*tmp1 + p1)*tmp1; + poly2 = f2*((p3*tmp2 + p2)*tmp2 + p1)*tmp2; + + ansu = conup + f ; /* compute atan(f) upper */ + ansu1 = conup1 + f1; /* compute atan(f) upper */ + ansu2 = conup2 + f2; /* compute atan(f) upper */ + + ansl = (((conup - ansu) + f) + poly) + conlo ; + ansl1 = (((conup1 - ansu1) + f1) + poly1) + conlo1; + ansl2 = (((conup2 - ansu2) + f2) + poly2) + conlo2; + + ans = ansu + ansl ; + ans1 = ansu1 + ansl1; + ans2 = ansu2 + ansl2; + +/* now check to see if these are 'real' or 'dummy' arguments BEFORE storing */ + + *yaddr = sign ? -ans: ans; /* this one is always good */ + if (argcount < 3) break; /* end loop and finish up */ + *yaddr1 = sign1 ? -ans1: ans1; + *yaddr2 = sign2 ? -ans2: ans2; + + } while (--n > 0); + + if (argcount == 2) + { *yaddr1 = sign1 ? -ans1: ans1; + } +} |