diff options
Diffstat (limited to 'usr/src/lib/libmvec/common/__vcos.c')
| -rw-r--r-- | usr/src/lib/libmvec/common/__vcos.c | 1100 | 
1 files changed, 1100 insertions, 0 deletions
| diff --git a/usr/src/lib/libmvec/common/__vcos.c b/usr/src/lib/libmvec/common/__vcos.c new file mode 100644 index 0000000000..28f40c50d5 --- /dev/null +++ b/usr/src/lib/libmvec/common/__vcos.c @@ -0,0 +1,1100 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2011 Nexenta Systems, Inc.  All rights reserved. + */ +/* + * Copyright 2006 Sun Microsystems, Inc.  All rights reserved. + * Use is subject to license terms. + */ + +#include <sys/isa_defs.h> +#include <sys/ccompile.h> + +#ifdef _LITTLE_ENDIAN +#define HI(x)	*(1+(int*)x) +#define LO(x)	*(unsigned*)x +#else +#define HI(x)	*(int*)x +#define LO(x)	*(1+(unsigned*)x) +#endif + +#ifdef __RESTRICT +#define restrict _Restrict +#else +#define restrict +#endif + +/* + * vcos.1.c + * + * Vector cosine function.  Just slight modifications to vsin.8.c, mainly + * in the primary range part. + * + * Modification to primary range processing.  If an argument that does not + * fall in the primary range is encountered, then processing is continued + * in the medium range. + * + */ + +extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; + +static const double +	half[2]	= { 0.5, -0.5 }, +	one		= 1.0, +	invpio2 = 0.636619772367581343075535,  /* 53 bits of pi/2 */ +	pio2_1	= 1.570796326734125614166,	/* first 33 bits of pi/2 */ +	pio2_2	= 6.077100506303965976596e-11, /* second 33 bits of pi/2 */ +	pio2_3	= 2.022266248711166455796e-21, /* third 33 bits of pi/2 */ +	pio2_3t	= 8.478427660368899643959e-32, /* pi/2 - pio2_3 */ +	pp1		= -1.666666666605760465276263943134982554676e-0001, +	pp2		=  8.333261209690963126718376566146180944442e-0003, +	qq1		= -4.999999999977710986407023955908711557870e-0001, +	qq2		=  4.166654863857219350645055881018842089580e-0002, +	poly1[2]= { -1.666666666666629669805215138920301589656e-0001, +				-4.999999999999931701464060878888294524481e-0001 }, +	poly2[2]= {  8.333333332390951295683993455280336376663e-0003, +				 4.166666666394861917535640593963708222319e-0002 }, +	poly3[2]= { -1.984126237997976692791551778230098403960e-0004, +				-1.388888552656142867832756687736851681462e-0003 }, +	poly4[2]= {  2.753403624854277237649987622848330351110e-0006, +				 2.478519423681460796618128289454530524759e-0005 }; + +static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 }; + +/* Don't __ the following; acomp will handle it */ +extern double fabs(double); +extern void __vlibm_vcos_big(int, double *, int, double *, int, int); + +/* + * y[i*stridey] := cos( x[i*stridex] ), for i = 0..n. + * + * Calls __vlibm_vcos_big to handle all elts which have abs >~ 1.647e+06. + * Argument reduction is done here for elts pi/4 < arg < 1.647e+06. + * + * elts < 2^-27 use the approximation 1.0 ~ cos(x). + */ +void +__vcos(int n, double * restrict x, int stridex, double * restrict y, +	int stridey) +{ +	double		x0_or_one[4], x1_or_one[4], x2_or_one[4]; +	double		y0_or_zero[4], y1_or_zero[4], y2_or_zero[4]; +	double		x0, x1, x2, *py0 = 0, *py1 = 0, *py2, *xsave, *ysave; +	unsigned	hx0, hx1, hx2, xsb0, xsb1 = 0, xsb2; +	int		i, biguns, nsave, sxsave, sysave; +	volatile int	v __GNU_UNUSED; +	nsave = n; +	xsave = x; +	sxsave = stridex; +	ysave = y; +	sysave = stridey; +	biguns = 0; + +	do /* MAIN LOOP */ +	{ +		/* Gotos here so _break_ exits MAIN LOOP. */ +LOOP0:  /* Find first arg in right range. */ +		xsb0 = HI(x); /* get most significant word */ +		hx0 = xsb0 & ~0x80000000; /* mask off sign bit */ +		if (hx0 > 0x3fe921fb) { +			/* Too big: arg reduction needed, so leave for second part */ +			biguns = 1; +			goto MEDIUM; +		} +		if (hx0 < 0x3e400000) { +			/* Too small.  cos x ~ 1. */ +			v = *x; +			*y = 1.0; +			x += stridex; +			y += stridey; +			i = 0; +			if (--n <= 0) +				break; +			goto LOOP0; +		} +		x0 = *x; +		py0 = y; +		x += stridex; +		y += stridey; +		i = 1; +		if (--n <= 0) +			break; + +LOOP1: /* Get second arg, same as above. */ +		xsb1 = HI(x); +		hx1 = xsb1 & ~0x80000000; +		if (hx1 > 0x3fe921fb) +		{ +			biguns = 2; +			goto MEDIUM; +		} +		if (hx1 < 0x3e400000) +		{ +			v = *x; +			*y = 1.0; +			x += stridex; +			y += stridey; +			i = 1; +			if (--n <= 0) +				break; +			goto LOOP1; +		} +		x1 = *x; +		py1 = y; +		x += stridex; +		y += stridey; +		i = 2; +		if (--n <= 0) +			break; + +LOOP2: /* Get third arg, same as above. */ +		xsb2 = HI(x); +		hx2 = xsb2 & ~0x80000000; +		if (hx2 > 0x3fe921fb) +		{ +			biguns = 3; +			goto MEDIUM; +		} +		if (hx2 < 0x3e400000) +		{ +			v = *x; +			*y = 1.0; +			x += stridex; +			y += stridey; +			i = 2; +			if (--n <= 0) +				break; +			goto LOOP2; +		} +		x2 = *x; +		py2 = y; + +		/* +		 * 0x3fc40000 = 5/32 ~ 0.15625 +		 * Get msb after subtraction.  Will be 1 only if +		 * hx0 - 5/32 is negative. +		 */ +		i = (hx0 - 0x3fc40000) >> 31; +		i |= ((hx1 - 0x3fc40000) >> 30) & 2; +		i |= ((hx2 - 0x3fc40000) >> 29) & 4; +		switch (i) +		{ +			double		a0, a1, a2, w0, w1, w2; +			double		t0, t1, t2, z0, z1, z2; +			unsigned	j0, j1, j2; + +		case 0: /* All are > 5/32 */ +			j0 = (xsb0 + 0x4000) & 0xffff8000; +			j1 = (xsb1 + 0x4000) & 0xffff8000; +			j2 = (xsb2 + 0x4000) & 0xffff8000; +			HI(&t0) = j0; +			HI(&t1) = j1; +			HI(&t2) = j2; +			LO(&t0) = 0; +			LO(&t1) = 0; +			LO(&t2) = 0; +			x0 -= t0; +			x1 -= t1; +			x2 -= t2; +			z0 = x0 * x0; +			z1 = x1 * x1; +			z2 = x2 * x2; +			t0 = z0 * (qq1 + z0 * qq2); +			t1 = z1 * (qq1 + z1 * qq2); +			t2 = z2 * (qq1 + z2 * qq2); +			w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); +			w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); +			w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); +			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			xsb0 = (xsb0 >> 30) & 2; +			xsb1 = (xsb1 >> 30) & 2; +			xsb2 = (xsb2 >> 30) & 2; +			a0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */ +			a1 = __vlibm_TBL_sincos_hi[j1+1]; +			a2 = __vlibm_TBL_sincos_hi[j2+1]; +			   /*   cos_lo(t)			 sin_hi(t) */ +			t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0); +			t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1); +			t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2); + +			*py0 = a0 + t0; +			*py1 = a1 + t1; +			*py2 = a2 + t2; +			break; + +		case 1: +			j1 = (xsb1 + 0x4000) & 0xffff8000; +			j2 = (xsb2 + 0x4000) & 0xffff8000; +			HI(&t1) = j1; +			HI(&t2) = j2; +			LO(&t1) = 0; +			LO(&t2) = 0; +			x1 -= t1; +			x2 -= t2; +			z0 = x0 * x0; +			z1 = x1 * x1; +			z2 = x2 * x2; +			t0 = z0 * (poly3[1] + z0 * poly4[1]); +			t1 = z1 * (qq1 + z1 * qq2); +			t2 = z2 * (qq1 + z2 * qq2); +			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); +			w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); +			w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); +			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			xsb1 = (xsb1 >> 30) & 2; +			xsb2 = (xsb2 >> 30) & 2; +			a1 = __vlibm_TBL_sincos_hi[j1+1]; +			a2 = __vlibm_TBL_sincos_hi[j2+1]; +			t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1); +			t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2); +			*py0 = one + t0; +			*py1 = a1 + t1; +			*py2 = a2 + t2; +			break; + +		case 2: +			j0 = (xsb0 + 0x4000) & 0xffff8000; +			j2 = (xsb2 + 0x4000) & 0xffff8000; +			HI(&t0) = j0; +			HI(&t2) = j2; +			LO(&t0) = 0; +			LO(&t2) = 0; +			x0 -= t0; +			x2 -= t2; +			z0 = x0 * x0; +			z1 = x1 * x1; +			z2 = x2 * x2; +			t0 = z0 * (qq1 + z0 * qq2); +			t1 = z1 * (poly3[1] + z1 * poly4[1]); +			t2 = z2 * (qq1 + z2 * qq2); +			w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); +			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); +			w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); +			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			xsb0 = (xsb0 >> 30) & 2; +			xsb2 = (xsb2 >> 30) & 2; +			a0 = __vlibm_TBL_sincos_hi[j0+1]; +			a2 = __vlibm_TBL_sincos_hi[j2+1]; +			t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0); +			t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2); +			*py0 = a0 + t0; +			*py1 = one + t1; +			*py2 = a2 + t2; +			break; + +		case 3: +			j2 = (xsb2 + 0x4000) & 0xffff8000; +			HI(&t2) = j2; +			LO(&t2) = 0; +			x2 -= t2; +			z0 = x0 * x0; +			z1 = x1 * x1; +			z2 = x2 * x2; +			t0 = z0 * (poly3[1] + z0 * poly4[1]); +			t1 = z1 * (poly3[1] + z1 * poly4[1]); +			t2 = z2 * (qq1 + z2 * qq2); +			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); +			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); +			w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); +			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			xsb2 = (xsb2 >> 30) & 2; +			a2 = __vlibm_TBL_sincos_hi[j2+1]; +			t2 = __vlibm_TBL_sincos_lo[j2+1] - (__vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2); +			*py0 = one + t0; +			*py1 = one + t1; +			*py2 = a2 + t2; +			break; + +		case 4: +			j0 = (xsb0 + 0x4000) & 0xffff8000; +			j1 = (xsb1 + 0x4000) & 0xffff8000; +			HI(&t0) = j0; +			HI(&t1) = j1; +			LO(&t0) = 0; +			LO(&t1) = 0; +			x0 -= t0; +			x1 -= t1; +			z0 = x0 * x0; +			z1 = x1 * x1; +			z2 = x2 * x2; +			t0 = z0 * (qq1 + z0 * qq2); +			t1 = z1 * (qq1 + z1 * qq2); +			t2 = z2 * (poly3[1] + z2 * poly4[1]); +			w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); +			w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); +			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); +			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			xsb0 = (xsb0 >> 30) & 2; +			xsb1 = (xsb1 >> 30) & 2; +			a0 = __vlibm_TBL_sincos_hi[j0+1]; +			a1 = __vlibm_TBL_sincos_hi[j1+1]; +			t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0); +			t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1); +			*py0 = a0 + t0; +			*py1 = a1 + t1; +			*py2 = one + t2; +			break; + +		case 5: +			j1 = (xsb1 + 0x4000) & 0xffff8000; +			HI(&t1) = j1; +			LO(&t1) = 0; +			x1 -= t1; +			z0 = x0 * x0; +			z1 = x1 * x1; +			z2 = x2 * x2; +			t0 = z0 * (poly3[1] + z0 * poly4[1]); +			t1 = z1 * (qq1 + z1 * qq2); +			t2 = z2 * (poly3[1] + z2 * poly4[1]); +			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); +			w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); +			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); +			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			xsb1 = (xsb1 >> 30) & 2; +			a1 = __vlibm_TBL_sincos_hi[j1+1]; +			t1 = __vlibm_TBL_sincos_lo[j1+1] - (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1); +			*py0 = one + t0; +			*py1 = a1 + t1; +			*py2 = one + t2; +			break; + +		case 6: +			j0 = (xsb0 + 0x4000) & 0xffff8000; +			HI(&t0) = j0; +			LO(&t0) = 0; +			x0 -= t0; +			z0 = x0 * x0; +			z1 = x1 * x1; +			z2 = x2 * x2; +			t0 = z0 * (qq1 + z0 * qq2); +			t1 = z1 * (poly3[1] + z1 * poly4[1]); +			t2 = z2 * (poly3[1] + z2 * poly4[1]); +			w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); +			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); +			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); +			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			xsb0 = (xsb0 >> 30) & 2; +			a0 = __vlibm_TBL_sincos_hi[j0+1]; +			t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0); +			*py0 = a0 + t0; +			*py1 = one + t1; +			*py2 = one + t2; +			break; + +		case 7: /* All are < 5/32 */ +			z0 = x0 * x0; +			z1 = x1 * x1; +			z2 = x2 * x2; +			t0 = z0 * (poly3[1] + z0 * poly4[1]); +			t1 = z1 * (poly3[1] + z1 * poly4[1]); +			t2 = z2 * (poly3[1] + z2 * poly4[1]); +			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); +			t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); +			t2 = z2 * (poly1[1] + z2 * (poly2[1] + t2)); +			*py0 = one + t0; +			*py1 = one + t1; +			*py2 = one + t2; +			break; +		} + +		x += stridex; +		y += stridey; +		i = 0; +	} while (--n > 0); /* END MAIN LOOP */ + +	/* +	 * CLEAN UP last 0, 1, or 2 elts. +	 */ +	if (i > 0) /* Clean up elts at tail.  i < 3. */ +	{ +		double		a0, a1, w0, w1; +		double		t0, t1, z0, z1; +		unsigned	j0, j1; + +		if (i > 1) +		{ +			if (hx1 < 0x3fc40000) +			{ +				z1 = x1 * x1; +				t1 = z1 * (poly3[1] + z1 * poly4[1]); +				t1 = z1 * (poly1[1] + z1 * (poly2[1] + t1)); +				t1 = one + t1; +				*py1 = t1; +			} +			else +			{ +				j1 = (xsb1 + 0x4000) & 0xffff8000; +				HI(&t1) = j1; +				LO(&t1) = 0; +				x1 -= t1; +				z1 = x1 * x1; +				t1 = z1 * (qq1 + z1 * qq2); +				w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); +				j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +				xsb1 = (xsb1 >> 30) & 2; +				a1 = __vlibm_TBL_sincos_hi[j1+1]; +				t1 = __vlibm_TBL_sincos_lo[j1+1] +					- (__vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1); +				*py1 = a1 + t1; +			} +		} +		if (hx0 < 0x3fc40000) +		{ +			z0 = x0 * x0; +			t0 = z0 * (poly3[1] + z0 * poly4[1]); +			t0 = z0 * (poly1[1] + z0 * (poly2[1] + t0)); +			t0 = one + t0; +			*py0 = t0; +		} +		else +		{ +			j0 = (xsb0 + 0x4000) & 0xffff8000; +			HI(&t0) = j0; +			LO(&t0) = 0; +			x0 -= t0; +			z0 = x0 * x0; +			t0 = z0 * (qq1 + z0 * qq2); +			w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); +			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			xsb0 = (xsb0 >> 30) & 2; +			a0 = __vlibm_TBL_sincos_hi[j0+1]; +			t0 = __vlibm_TBL_sincos_lo[j0+1] - (__vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0); +			*py0 = a0 + t0; +		} +	} /* END CLEAN UP */ + +	return; + +	/* +	 * Take care of BIGUNS. +	 * +	 * We have jumped here in the middle of processing after having +	 * encountered a medium range argument.  Therefore things are in a +	 * bit of a tizzy. +	 */ + +MEDIUM: + +	x0_or_one[1] = 1.0; +	x1_or_one[1] = 1.0; +	x2_or_one[1] = 1.0; +	x0_or_one[3] = -1.0; +	x1_or_one[3] = -1.0; +	x2_or_one[3] = -1.0; +	y0_or_zero[1] = 0.0; +	y1_or_zero[1] = 0.0; +	y2_or_zero[1] = 0.0; +	y0_or_zero[3] = 0.0; +	y1_or_zero[3] = 0.0; +	y2_or_zero[3] = 0.0; + +	if (biguns == 3) +	{ +		biguns = 0; +		xsb0 = xsb0 >> 31; +		xsb1 = xsb1 >> 31; +		goto loop2; +	} +	else if (biguns == 2) +	{ +		xsb0 = xsb0 >> 31; +		biguns = 0; +		goto loop1; +	} +	biguns = 0; + +	do +	{ +		double		fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2; +		unsigned	hx; +		int			n0, n1, n2; + +		/* +		 * Find 3 more to work on: Not already done, not too big. +		 */ + +loop0: +		hx = HI(x); +		xsb0 = hx >> 31; +		hx &= ~0x80000000; +		if (hx > 0x413921fb) /* (1.6471e+06) Too big: leave it. */ +		{ +			if (hx >= 0x7ff00000) /* Inf or NaN */ +			{ +				x0 = *x; +				*y = x0 - x0; +			} +			else +				biguns = 1; +			x += stridex; +			y += stridey; +			i = 0; +			if (--n <= 0) +				break; +			goto loop0; +		} +		x0 = *x; +		py0 = y; +		x += stridex; +		y += stridey; +		i = 1; +		if (--n <= 0) +			break; + +loop1: +		hx = HI(x); +		xsb1 = hx >> 31; +		hx &= ~0x80000000; +		if (hx > 0x413921fb) +		{ +			if (hx >= 0x7ff00000) +			{ +				x1 = *x; +				*y = x1 - x1; +			} +			else +				biguns = 1; +			x += stridex; +			y += stridey; +			i = 1; +			if (--n <= 0) +				break; +			goto loop1; +		} +		x1 = *x; +		py1 = y; +		x += stridex; +		y += stridey; +		i = 2; +		if (--n <= 0) +			break; + +loop2: +		hx = HI(x); +		xsb2 = hx >> 31; +		hx &= ~0x80000000; +		if (hx > 0x413921fb) +		{ +			if (hx >= 0x7ff00000) +			{ +				x2 = *x; +				*y = x2 - x2; +			} +			else +				biguns = 1; +			x += stridex; +			y += stridey; +			i = 2; +			if (--n <= 0) +				break; +			goto loop2; +		} +		x2 = *x; +		py2 = y; + +		n0 = (int) (x0 * invpio2 + half[xsb0]); +		n1 = (int) (x1 * invpio2 + half[xsb1]); +		n2 = (int) (x2 * invpio2 + half[xsb2]); +		fn0 = (double) n0; +		fn1 = (double) n1; +		fn2 = (double) n2; +		n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ +		n1 = (n1 + 1) & 3; +		n2 = (n2 + 1) & 3; +		a0 = x0 - fn0 * pio2_1; +		a1 = x1 - fn1 * pio2_1; +		a2 = x2 - fn2 * pio2_1; +		w0 = fn0 * pio2_2; +		w1 = fn1 * pio2_2; +		w2 = fn2 * pio2_2; +		x0 = a0 - w0; +		x1 = a1 - w1; +		x2 = a2 - w2; +		y0 = (a0 - x0) - w0; +		y1 = (a1 - x1) - w1; +		y2 = (a2 - x2) - w2; +		a0 = x0; +		a1 = x1; +		a2 = x2; +		w0 = fn0 * pio2_3 - y0; +		w1 = fn1 * pio2_3 - y1; +		w2 = fn2 * pio2_3 - y2; +		x0 = a0 - w0; +		x1 = a1 - w1; +		x2 = a2 - w2; +		y0 = (a0 - x0) - w0; +		y1 = (a1 - x1) - w1; +		y2 = (a2 - x2) - w2; +		a0 = x0; +		a1 = x1; +		a2 = x2; +		w0 = fn0 * pio2_3t - y0; +		w1 = fn1 * pio2_3t - y1; +		w2 = fn2 * pio2_3t - y2; +		x0 = a0 - w0; +		x1 = a1 - w1; +		x2 = a2 - w2; +		y0 = (a0 - x0) - w0; +		y1 = (a1 - x1) - w1; +		y2 = (a2 - x2) - w2; +		xsb0 = HI(&x0); +		i = ((xsb0 & ~0x80000000) - thresh[n0&1]) >> 31; +		xsb1 = HI(&x1); +		i |= (((xsb1 & ~0x80000000) - thresh[n1&1]) >> 30) & 2; +		xsb2 = HI(&x2); +		i |= (((xsb2 & ~0x80000000) - thresh[n2&1]) >> 29) & 4; +		switch (i) +		{ +			double		t0, t1, t2, z0, z1, z2; +			unsigned	j0, j1, j2; + +		case 0: +			j0 = (xsb0 + 0x4000) & 0xffff8000; +			j1 = (xsb1 + 0x4000) & 0xffff8000; +			j2 = (xsb2 + 0x4000) & 0xffff8000; +			HI(&t0) = j0; +			HI(&t1) = j1; +			HI(&t2) = j2; +			LO(&t0) = 0; +			LO(&t1) = 0; +			LO(&t2) = 0; +			x0 = (x0 - t0) + y0; +			x1 = (x1 - t1) + y1; +			x2 = (x2 - t2) + y2; +			z0 = x0 * x0; +			z1 = x1 * x1; +			z2 = x2 * x2; +			t0 = z0 * (qq1 + z0 * qq2); +			t1 = z1 * (qq1 + z1 * qq2); +			t2 = z2 * (qq1 + z2 * qq2); +			w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); +			w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); +			w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); +			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			xsb0 = (xsb0 >> 30) & 2; +			xsb1 = (xsb1 >> 30) & 2; +			xsb2 = (xsb2 >> 30) & 2; +			n0 ^= (xsb0 & ~(n0 << 1)); +			n1 ^= (xsb1 & ~(n1 << 1)); +			n2 ^= (xsb2 & ~(n2 << 1)); +			xsb0 |= 1; +			xsb1 |= 1; +			xsb2 |= 1; +			a0 = __vlibm_TBL_sincos_hi[j0+n0]; +			a1 = __vlibm_TBL_sincos_hi[j1+n1]; +			a2 = __vlibm_TBL_sincos_hi[j2+n2]; +			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; +			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; +			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; +			*py0 = ( a0 + t0 ); +			*py1 = ( a1 + t1 ); +			*py2 = ( a2 + t2 ); +			break; + +		case 1: +			j0 = n0 & 1; +			j1 = (xsb1 + 0x4000) & 0xffff8000; +			j2 = (xsb2 + 0x4000) & 0xffff8000; +			HI(&t1) = j1; +			HI(&t2) = j2; +			LO(&t1) = 0; +			LO(&t2) = 0; +			x0_or_one[0] = x0; +			x0_or_one[2] = -x0; +			y0_or_zero[0] = y0; +			y0_or_zero[2] = -y0; +			x1 = (x1 - t1) + y1; +			x2 = (x2 - t2) + y2; +			z0 = x0 * x0; +			z1 = x1 * x1; +			z2 = x2 * x2; +			t0 = z0 * (poly3[j0] + z0 * poly4[j0]); +			t1 = z1 * (qq1 + z1 * qq2); +			t2 = z2 * (qq1 + z2 * qq2); +			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); +			w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); +			w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); +			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			xsb1 = (xsb1 >> 30) & 2; +			xsb2 = (xsb2 >> 30) & 2; +			n1 ^= (xsb1 & ~(n1 << 1)); +			n2 ^= (xsb2 & ~(n2 << 1)); +			xsb1 |= 1; +			xsb2 |= 1; +			a1 = __vlibm_TBL_sincos_hi[j1+n1]; +			a2 = __vlibm_TBL_sincos_hi[j2+n2]; +			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); +			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; +			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; +			*py0 = t0; +			*py1 = ( a1 + t1 ); +			*py2 = ( a2 + t2 ); +			break; + +		case 2: +			j0 = (xsb0 + 0x4000) & 0xffff8000; +			j1 = n1 & 1; +			j2 = (xsb2 + 0x4000) & 0xffff8000; +			HI(&t0) = j0; +			HI(&t2) = j2; +			LO(&t0) = 0; +			LO(&t2) = 0; +			x1_or_one[0] = x1; +			x1_or_one[2] = -x1; +			x0 = (x0 - t0) + y0; +			y1_or_zero[0] = y1; +			y1_or_zero[2] = -y1; +			x2 = (x2 - t2) + y2; +			z0 = x0 * x0; +			z1 = x1 * x1; +			z2 = x2 * x2; +			t0 = z0 * (qq1 + z0 * qq2); +			t1 = z1 * (poly3[j1] + z1 * poly4[j1]); +			t2 = z2 * (qq1 + z2 * qq2); +			w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); +			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); +			w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); +			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			xsb0 = (xsb0 >> 30) & 2; +			xsb2 = (xsb2 >> 30) & 2; +			n0 ^= (xsb0 & ~(n0 << 1)); +			n2 ^= (xsb2 & ~(n2 << 1)); +			xsb0 |= 1; +			xsb2 |= 1; +			a0 = __vlibm_TBL_sincos_hi[j0+n0]; +			a2 = __vlibm_TBL_sincos_hi[j2+n2]; +			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; +			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); +			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; +			*py0 = ( a0 + t0 ); +			*py1 = t1; +			*py2 = ( a2 + t2 ); +			break; + +		case 3: +			j0 = n0 & 1; +			j1 = n1 & 1; +			j2 = (xsb2 + 0x4000) & 0xffff8000; +			HI(&t2) = j2; +			LO(&t2) = 0; +			x0_or_one[0] = x0; +			x0_or_one[2] = -x0; +			x1_or_one[0] = x1; +			x1_or_one[2] = -x1; +			y0_or_zero[0] = y0; +			y0_or_zero[2] = -y0; +			y1_or_zero[0] = y1; +			y1_or_zero[2] = -y1; +			x2 = (x2 - t2) + y2; +			z0 = x0 * x0; +			z1 = x1 * x1; +			z2 = x2 * x2; +			t0 = z0 * (poly3[j0] + z0 * poly4[j0]); +			t1 = z1 * (poly3[j1] + z1 * poly4[j1]); +			t2 = z2 * (qq1 + z2 * qq2); +			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); +			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); +			w2 = x2 * (one + z2 * (pp1 + z2 * pp2)); +			j2 = (((j2 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			xsb2 = (xsb2 >> 30) & 2; +			n2 ^= (xsb2 & ~(n2 << 1)); +			xsb2 |= 1; +			a2 = __vlibm_TBL_sincos_hi[j2+n2]; +			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); +			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); +			t2 = (__vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2) + __vlibm_TBL_sincos_lo[j2+n2]; +			*py0 = t0; +			*py1 = t1; +			*py2 = ( a2 + t2 ); +			break; + +		case 4: +			j0 = (xsb0 + 0x4000) & 0xffff8000; +			j1 = (xsb1 + 0x4000) & 0xffff8000; +			j2 = n2 & 1; +			HI(&t0) = j0; +			HI(&t1) = j1; +			LO(&t0) = 0; +			LO(&t1) = 0; +			x2_or_one[0] = x2; +			x2_or_one[2] = -x2; +			x0 = (x0 - t0) + y0; +			x1 = (x1 - t1) + y1; +			y2_or_zero[0] = y2; +			y2_or_zero[2] = -y2; +			z0 = x0 * x0; +			z1 = x1 * x1; +			z2 = x2 * x2; +			t0 = z0 * (qq1 + z0 * qq2); +			t1 = z1 * (qq1 + z1 * qq2); +			t2 = z2 * (poly3[j2] + z2 * poly4[j2]); +			w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); +			w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); +			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); +			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			xsb0 = (xsb0 >> 30) & 2; +			xsb1 = (xsb1 >> 30) & 2; +			n0 ^= (xsb0 & ~(n0 << 1)); +			n1 ^= (xsb1 & ~(n1 << 1)); +			xsb0 |= 1; +			xsb1 |= 1; +			a0 = __vlibm_TBL_sincos_hi[j0+n0]; +			a1 = __vlibm_TBL_sincos_hi[j1+n1]; +			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; +			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; +			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); +			*py0 = ( a0 + t0 ); +			*py1 = ( a1 + t1 ); +			*py2 = t2; +			break; + +		case 5: +			j0 = n0 & 1; +			j1 = (xsb1 + 0x4000) & 0xffff8000; +			j2 = n2 & 1; +			HI(&t1) = j1; +			LO(&t1) = 0; +			x0_or_one[0] = x0; +			x0_or_one[2] = -x0; +			x2_or_one[0] = x2; +			x2_or_one[2] = -x2; +			y0_or_zero[0] = y0; +			y0_or_zero[2] = -y0; +			x1 = (x1 - t1) + y1; +			y2_or_zero[0] = y2; +			y2_or_zero[2] = -y2; +			z0 = x0 * x0; +			z1 = x1 * x1; +			z2 = x2 * x2; +			t0 = z0 * (poly3[j0] + z0 * poly4[j0]); +			t1 = z1 * (qq1 + z1 * qq2); +			t2 = z2 * (poly3[j2] + z2 * poly4[j2]); +			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); +			w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); +			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); +			j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			xsb1 = (xsb1 >> 30) & 2; +			n1 ^= (xsb1 & ~(n1 << 1)); +			xsb1 |= 1; +			a1 = __vlibm_TBL_sincos_hi[j1+n1]; +			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); +			t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; +			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); +			*py0 = t0; +			*py1 = ( a1 + t1 ); +			*py2 = t2; +			break; + +		case 6: +			j0 = (xsb0 + 0x4000) & 0xffff8000; +			j1 = n1 & 1; +			j2 = n2 & 1; +			HI(&t0) = j0; +			LO(&t0) = 0; +			x1_or_one[0] = x1; +			x1_or_one[2] = -x1; +			x2_or_one[0] = x2; +			x2_or_one[2] = -x2; +			x0 = (x0 - t0) + y0; +			y1_or_zero[0] = y1; +			y1_or_zero[2] = -y1; +			y2_or_zero[0] = y2; +			y2_or_zero[2] = -y2; +			z0 = x0 * x0; +			z1 = x1 * x1; +			z2 = x2 * x2; +			t0 = z0 * (qq1 + z0 * qq2); +			t1 = z1 * (poly3[j1] + z1 * poly4[j1]); +			t2 = z2 * (poly3[j2] + z2 * poly4[j2]); +			w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); +			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); +			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); +			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			xsb0 = (xsb0 >> 30) & 2; +			n0 ^= (xsb0 & ~(n0 << 1)); +			xsb0 |= 1; +			a0 = __vlibm_TBL_sincos_hi[j0+n0]; +			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; +			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); +			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); +			*py0 = ( a0 + t0 ); +			*py1 = t1; +			*py2 = t2; +			break; + +		case 7: +			j0 = n0 & 1; +			j1 = n1 & 1; +			j2 = n2 & 1; +			x0_or_one[0] = x0; +			x0_or_one[2] = -x0; +			x1_or_one[0] = x1; +			x1_or_one[2] = -x1; +			x2_or_one[0] = x2; +			x2_or_one[2] = -x2; +			y0_or_zero[0] = y0; +			y0_or_zero[2] = -y0; +			y1_or_zero[0] = y1; +			y1_or_zero[2] = -y1; +			y2_or_zero[0] = y2; +			y2_or_zero[2] = -y2; +			z0 = x0 * x0; +			z1 = x1 * x1; +			z2 = x2 * x2; +			t0 = z0 * (poly3[j0] + z0 * poly4[j0]); +			t1 = z1 * (poly3[j1] + z1 * poly4[j1]); +			t2 = z2 * (poly3[j2] + z2 * poly4[j2]); +			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); +			t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); +			t2 = z2 * (poly1[j2] + z2 * (poly2[j2] + t2)); +			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); +			t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); +			t2 = x2_or_one[n2] + (y2_or_zero[n2] + x2_or_one[n2] * t2); +			*py0 = t0; +			*py1 = t1; +			*py2 = t2; +			break; +		} + +		x += stridex; +		y += stridey; +		i = 0; +	} while (--n > 0); + +	if (i > 0) +	{ +		double		fn0, fn1, a0, a1, w0, w1, y0, y1; +		double		t0, t1, z0, z1; +		unsigned	j0, j1; +		int			n0, n1; + +		if (i > 1) +		{ +			n1 = (int) (x1 * invpio2 + half[xsb1]); +			fn1 = (double) n1; +			n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ +			a1 = x1 - fn1 * pio2_1; +			w1 = fn1 * pio2_2; +			x1 = a1 - w1; +			y1 = (a1 - x1) - w1; +			a1 = x1; +			w1 = fn1 * pio2_3 - y1; +			x1 = a1 - w1; +			y1 = (a1 - x1) - w1; +			a1 = x1; +			w1 = fn1 * pio2_3t - y1; +			x1 = a1 - w1; +			y1 = (a1 - x1) - w1; +			xsb1 = HI(&x1); +			if ((xsb1 & ~0x80000000) < thresh[n1&1]) +			{ +				j1 = n1 & 1; +				x1_or_one[0] = x1; +				x1_or_one[2] = -x1; +				y1_or_zero[0] = y1; +				y1_or_zero[2] = -y1; +				z1 = x1 * x1; +				t1 = z1 * (poly3[j1] + z1 * poly4[j1]); +				t1 = z1 * (poly1[j1] + z1 * (poly2[j1] + t1)); +				t1 = x1_or_one[n1] + (y1_or_zero[n1] + x1_or_one[n1] * t1); +				*py1 = t1; +			} +			else +			{ +				j1 = (xsb1 + 0x4000) & 0xffff8000; +				HI(&t1) = j1; +				LO(&t1) = 0; +				x1 = (x1 - t1) + y1; +				z1 = x1 * x1; +				t1 = z1 * (qq1 + z1 * qq2); +				w1 = x1 * (one + z1 * (pp1 + z1 * pp2)); +				j1 = (((j1 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +				xsb1 = (xsb1 >> 30) & 2; +				n1 ^= (xsb1 & ~(n1 << 1)); +				xsb1 |= 1; +				a1 = __vlibm_TBL_sincos_hi[j1+n1]; +				t1 = (__vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1) + __vlibm_TBL_sincos_lo[j1+n1]; +				*py1 = ( a1 + t1 ); +			} +		} +		n0 = (int) (x0 * invpio2 + half[xsb0]); +		fn0 = (double) n0; +		n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ +		a0 = x0 - fn0 * pio2_1; +		w0 = fn0 * pio2_2; +		x0 = a0 - w0; +		y0 = (a0 - x0) - w0; +		a0 = x0; +		w0 = fn0 * pio2_3 - y0; +		x0 = a0 - w0; +		y0 = (a0 - x0) - w0; +		a0 = x0; +		w0 = fn0 * pio2_3t - y0; +		x0 = a0 - w0; +		y0 = (a0 - x0) - w0; +		xsb0 = HI(&x0); +		if ((xsb0 & ~0x80000000) < thresh[n0&1]) +		{ +			j0 = n0 & 1; +			x0_or_one[0] = x0; +			x0_or_one[2] = -x0; +			y0_or_zero[0] = y0; +			y0_or_zero[2] = -y0; +			z0 = x0 * x0; +			t0 = z0 * (poly3[j0] + z0 * poly4[j0]); +			t0 = z0 * (poly1[j0] + z0 * (poly2[j0] + t0)); +			t0 = x0_or_one[n0] + (y0_or_zero[n0] + x0_or_one[n0] * t0); +			*py0 = t0; +		} +		else +		{ +			j0 = (xsb0 + 0x4000) & 0xffff8000; +			HI(&t0) = j0; +			LO(&t0) = 0; +			x0 = (x0 - t0) + y0; +			z0 = x0 * x0; +			t0 = z0 * (qq1 + z0 * qq2); +			w0 = x0 * (one + z0 * (pp1 + z0 * pp2)); +			j0 = (((j0 & ~0x80000000) - 0x3fc40000) >> 13) & ~0x3; +			xsb0 = (xsb0 >> 30) & 2; +			n0 ^= (xsb0 & ~(n0 << 1)); +			xsb0 |= 1; +			a0 = __vlibm_TBL_sincos_hi[j0+n0]; +			t0 = (__vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0) + __vlibm_TBL_sincos_lo[j0+n0]; +			*py0 = ( a0 + t0 ); +		} +	} + +	if (biguns) +		__vlibm_vcos_big(nsave, xsave, sxsave, ysave, sysave, 0x413921fb); +} | 
