diff options
Diffstat (limited to 'usr/src/libm/src/m9x')
75 files changed, 18689 insertions, 0 deletions
diff --git a/usr/src/libm/src/m9x/__fenv_amd64.il b/usr/src/libm/src/m9x/__fenv_amd64.il new file mode 100644 index 0000000..bae2414 --- /dev/null +++ b/usr/src/libm/src/m9x/__fenv_amd64.il @@ -0,0 +1,349 @@ +/ +/ CDDL HEADER START +/ +/ The contents of this file are subject to the terms of the +/ Common Development and Distribution License (the "License"). +/ You may not use this file except in compliance with the License. +/ +/ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +/ or http://www.opensolaris.org/os/licensing. +/ See the License for the specific language governing permissions +/ and limitations under the License. +/ +/ When distributing Covered Code, include this CDDL HEADER in each +/ file and include the License file at usr/src/OPENSOLARIS.LICENSE. +/ If applicable, add the following below this CDDL HEADER, with the +/ fields enclosed by brackets "[]" replaced with your own identifying +/ information: Portions Copyright [yyyy] [name of copyright owner] +/ +/ CDDL HEADER END +/ +/ Copyright 2006 Sun Microsystems, Inc. All rights reserved. +/ Use is subject to license terms. +/ +/ @(#)__fenv_amd64.il 1.5 06/01/31 SMI +/ + .inline __fenv_getcwsw,1 + fstsw (%rdi) + fstcw 2(%rdi) + .end + + .inline __fenv_setcwsw,1 + movw (%rdi),%dx + movw 2(%rdi),%cx + subq $32,%rsp + fstenv (%rsp) + movw %cx,(%rsp) + movw %dx,4(%rsp) + fldenv (%rsp) + fwait + addq $32,%rsp + .end + + .inline __fenv_getmxcsr,1 + stmxcsr (%rdi) + .end + + .inline __fenv_setmxcsr,1 + ldmxcsr (%rdi) + .end + + .inline f2xm1,1 + fldt (%rsp) + f2xm1 + .end + + .inline fyl2x,2 + fldt (%rsp) + fldt 16(%rsp) + fyl2x + .end + + .inline fptan,1 + fldt (%rsp) + fptan + fstpt (%rsp) + .end + + .inline fpatan,2 + fldt (%rsp) + fldt 16(%rsp) + fpatan + .end + + .inline fxtract,1 + fldt (%rsp) + fxtract + .end + + .inline fprem1,2 + fldt (%rsp) + fldt 16(%rsp) + fprem1 + fstp %st(1) + .end + + .inline fprem,2 + fldt (%rsp) + fldt 16(%rsp) + fprem + fstp %st(1) + .end + + .inline fyl2xp1,2 + fldt (%rsp) + fldt 16(%rsp) + fyl2xp1 + .end + + .inline fsqrt,1 + fldt (%rsp) + fsqrt + .end + + .inline fsincos,1 + fldt (%rsp) + fsincos + .end + + .inline frndint,1 + fldt (%rsp) + frndint + .end + + .inline fscale,2 + fldt (%rsp) + fldt 16(%rsp) + fscale + fstp %st(1) + .end + + .inline fsin,1 + fldt (%rsp) + fsin + .end + + .inline fcos,1 + fldt (%rsp) + fcos + .end + + .inline sse_cmpeqss,3 + movss (%rdi),%xmm0 + cmpeqss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_cmpltss,3 + movss (%rdi),%xmm0 + cmpltss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_cmpless,3 + movss (%rdi),%xmm0 + cmpless (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_cmpunordss,3 + movss (%rdi),%xmm0 + cmpunordss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_minss,3 + movss (%rdi),%xmm0 + minss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_maxss,3 + movss (%rdi),%xmm0 + maxss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_addss,3 + movss (%rdi),%xmm0 + addss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_subss,3 + movss (%rdi),%xmm0 + subss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_mulss,3 + movss (%rdi),%xmm0 + mulss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_divss,3 + movss (%rdi),%xmm0 + divss (%rsi),%xmm0 + movss %xmm0,(%rdx) + .end + + .inline sse_sqrtss,2 + sqrtss (%rdi),%xmm0 + movss %xmm0,(%rsi) + .end + + .inline sse_ucomiss,2 + movss (%rdi),%xmm0 + ucomiss (%rsi),%xmm0 + .end + + .inline sse_comiss,2 + movss (%rdi),%xmm0 + comiss (%rsi),%xmm0 + .end + + .inline sse_cvtss2sd,2 + cvtss2sd (%rdi),%xmm0 + movsd %xmm0,(%rsi) + .end + + .inline sse_cvtsi2ss,2 + cvtsi2ss (%rdi),%xmm0 + movss %xmm0,(%rsi) + .end + + .inline sse_cvttss2si,2 + cvttss2si (%rdi),%ecx + movw %ecx,(%rsi) + .end + + .inline sse_cvtss2si,2 + cvtss2si (%rdi),%ecx + movw %ecx,(%rsi) + .end + + .inline sse_cvtsi2ssq,2 + cvtsi2ssq (%rdi),%xmm0 + movss %xmm0,(%rsi) + .end + + .inline sse_cvttss2siq,2 + cvttss2siq (%rdi),%rcx + movq %rcx,(%rsi) + .end + + .inline sse_cvtss2siq,2 + cvtss2siq (%rdi),%rcx + movq %rcx,(%rsi) + .end + + .inline sse_cmpeqsd,3 + movsd (%rdi),%xmm0 + cmpeqsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_cmpltsd,3 + movsd (%rdi),%xmm0 + cmpltsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_cmplesd,3 + movsd (%rdi),%xmm0 + cmplesd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_cmpunordsd,3 + movsd (%rdi),%xmm0 + cmpunordsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_minsd,3 + movsd (%rdi),%xmm0 + minsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_maxsd,3 + movsd (%rdi),%xmm0 + maxsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_addsd,3 + movsd (%rdi),%xmm0 + addsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_subsd,3 + movsd (%rdi),%xmm0 + subsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_mulsd,3 + movsd (%rdi),%xmm0 + mulsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_divsd,3 + movsd (%rdi),%xmm0 + divsd (%rsi),%xmm0 + movsd %xmm0,(%rdx) + .end + + .inline sse_sqrtsd,2 + sqrtsd (%rdi),%xmm0 + movsd %xmm0,(%rsi) + .end + + .inline sse_ucomisd,2 + movsd (%rdi),%xmm0 + ucomisd (%rsi),%xmm0 + .end + + .inline sse_comisd,2 + movsd (%rdi),%xmm0 + comisd (%rsi),%xmm0 + .end + + .inline sse_cvtsd2ss,2 + cvtsd2ss (%rdi),%xmm0 + movss %xmm0,(%rsi) + .end + + .inline sse_cvtsi2sd,2 + cvtsi2sd (%rdi),%xmm0 + movsd %xmm0,(%rsi) + .end + + .inline sse_cvttsd2si,2 + cvttsd2si (%rdi),%ecx + movw %ecx,(%rsi) + .end + + .inline sse_cvtsd2si,2 + cvtsd2si (%rdi),%ecx + movw %ecx,(%rsi) + .end + + .inline sse_cvtsi2sdq,2 + cvtsi2sdq (%rdi),%xmm0 + movsd %xmm0,(%rsi) + .end + + .inline sse_cvttsd2siq,2 + cvttsd2siq (%rdi),%rcx + movq %rcx,(%rsi) + .end + + .inline sse_cvtsd2siq,2 + cvtsd2siq (%rdi),%rcx + movq %rcx,(%rsi) + .end diff --git a/usr/src/libm/src/m9x/__fenv_i386.il b/usr/src/libm/src/m9x/__fenv_i386.il new file mode 100644 index 0000000..253d4db --- /dev/null +++ b/usr/src/libm/src/m9x/__fenv_i386.il @@ -0,0 +1,411 @@ +/ +/ CDDL HEADER START +/ +/ The contents of this file are subject to the terms of the +/ Common Development and Distribution License (the "License"). +/ You may not use this file except in compliance with the License. +/ +/ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +/ or http://www.opensolaris.org/os/licensing. +/ See the License for the specific language governing permissions +/ and limitations under the License. +/ +/ When distributing Covered Code, include this CDDL HEADER in each +/ file and include the License file at usr/src/OPENSOLARIS.LICENSE. +/ If applicable, add the following below this CDDL HEADER, with the +/ fields enclosed by brackets "[]" replaced with your own identifying +/ information: Portions Copyright [yyyy] [name of copyright owner] +/ +/ CDDL HEADER END +/ +/ Copyright 2006 Sun Microsystems, Inc. All rights reserved. +/ Use is subject to license terms. +/ +/ @(#)__fenv_i386.il 1.8 06/01/31 SMI +/ + .inline __fenv_getcwsw,1 + movl (%esp),%eax + fstsw (%eax) + fstcw 2(%eax) + .end + + .inline __fenv_setcwsw,1 + movl (%esp),%eax + movw (%eax),%dx + movw 2(%eax),%cx + subl $28,%esp + fstenv (%esp) + movw %cx,(%esp) + movw %dx,4(%esp) + fldenv (%esp) + fwait + addl $28,%esp + .end + + .inline __fenv_getmxcsr,1 + movl (%esp),%eax + stmxcsr (%eax) + .end + + .inline __fenv_setmxcsr,1 + movl (%esp),%eax + ldmxcsr (%eax) + .end + + .inline f2xm1,1 + fldt (%esp) + f2xm1 + .end + + .inline fyl2x,2 + fldt (%esp) + fldt 12(%esp) + fyl2x + .end + + .inline fptan,1 + fldt (%esp) + fptan + fstpt (%esp) + .end + + .inline fpatan,2 + fldt (%esp) + fldt 12(%esp) + fpatan + .end + + .inline fxtract,1 + fldt (%esp) + fxtract + .end + + .inline fprem1,2 + fldt (%esp) + fldt 12(%esp) + fprem1 + fstp %st(1) + .end + + .inline fprem,2 + fldt (%esp) + fldt 12(%esp) + fprem + fstp %st(1) + .end + + .inline fyl2xp1,2 + fldt (%esp) + fldt 12(%esp) + fyl2xp1 + .end + + .inline fsqrt,1 + fldt (%esp) + fsqrt + .end + + .inline fsincos,1 + fldt (%esp) + fsincos + .end + + .inline frndint,1 + fldt (%esp) + frndint + .end + + .inline fscale,2 + fldt (%esp) + fldt 12(%esp) + fscale + fstp %st(1) + .end + + .inline fsin,1 + fldt (%esp) + fsin + .end + + .inline fcos,1 + fldt (%esp) + fcos + .end + + .inline sse_cmpeqss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + cmpeqss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_cmpltss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + cmpltss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_cmpless,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + cmpless (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_cmpunordss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + cmpunordss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_minss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + minss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_maxss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + maxss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_addss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + addss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_subss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + subss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_mulss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + mulss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_divss,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movss (%eax),%xmm0 + divss (%edx),%xmm0 + movss %xmm0,(%ecx) + .end + + .inline sse_sqrtss,2 + movl (%esp),%eax + movl 4(%esp),%edx + sqrtss (%eax),%xmm0 + movss %xmm0,(%edx) + .end + + .inline sse_ucomiss,2 + movl (%esp),%eax + movl 4(%esp),%edx + movss (%eax),%xmm0 + ucomiss (%edx),%xmm0 + .end + + .inline sse_comiss,2 + movl (%esp),%eax + movl 4(%esp),%edx + movss (%eax),%xmm0 + comiss (%edx),%xmm0 + .end + + .inline sse_cvtss2sd,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvtss2sd (%eax),%xmm0 + movsd %xmm0,(%edx) + .end + + .inline sse_cvtsi2ss,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvtsi2ss (%eax),%xmm0 + movss %xmm0,(%edx) + .end + + .inline sse_cvttss2si,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvttss2si (%eax),%ecx + movw %ecx,(%edx) + .end + + .inline sse_cvtss2si,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvtss2si (%eax),%ecx + movw %ecx,(%edx) + .end + + .inline sse_cmpeqsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + cmpeqsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_cmpltsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + cmpltsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_cmplesd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + cmplesd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_cmpunordsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + cmpunordsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_minsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + minsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_maxsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + maxsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_addsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + addsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_subsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + subsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_mulsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + mulsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_divsd,3 + movl (%esp),%eax + movl 4(%esp),%edx + movl 8(%esp),%ecx + movsd (%eax),%xmm0 + divsd (%edx),%xmm0 + movsd %xmm0,(%ecx) + .end + + .inline sse_sqrtsd,2 + movl (%esp),%eax + movl 4(%esp),%edx + sqrtsd (%eax),%xmm0 + movsd %xmm0,(%edx) + .end + + .inline sse_ucomisd,2 + movl (%esp),%eax + movl 4(%esp),%edx + movsd (%eax),%xmm0 + ucomisd (%edx),%xmm0 + .end + + .inline sse_comisd,2 + movl (%esp),%eax + movl 4(%esp),%edx + movsd (%eax),%xmm0 + comisd (%edx),%xmm0 + .end + + .inline sse_cvtsd2ss,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvtsd2ss (%eax),%xmm0 + movss %xmm0,(%edx) + .end + + .inline sse_cvtsi2sd,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvtsi2sd (%eax),%xmm0 + movsd %xmm0,(%edx) + .end + + .inline sse_cvttsd2si,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvttsd2si (%eax),%ecx + movw %ecx,(%edx) + .end + + .inline sse_cvtsd2si,2 + movl (%esp),%eax + movl 4(%esp),%edx + cvtsd2si (%eax),%ecx + movw %ecx,(%edx) + .end diff --git a/usr/src/libm/src/m9x/__fenv_sparc.il b/usr/src/libm/src/m9x/__fenv_sparc.il new file mode 100644 index 0000000..d942a33 --- /dev/null +++ b/usr/src/libm/src/m9x/__fenv_sparc.il @@ -0,0 +1,40 @@ +! +! CDDL HEADER START +! +! The contents of this file are subject to the terms of the +! Common Development and Distribution License (the "License"). +! You may not use this file except in compliance with the License. +! +! You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +! or http://www.opensolaris.org/os/licensing. +! See the License for the specific language governing permissions +! and limitations under the License. +! +! When distributing Covered Code, include this CDDL HEADER in each +! file and include the License file at usr/src/OPENSOLARIS.LICENSE. +! If applicable, add the following below this CDDL HEADER, with the +! fields enclosed by brackets "[]" replaced with your own identifying +! information: Portions Copyright [yyyy] [name of copyright owner] +! +! CDDL HEADER END +! +! Copyright 2006 Sun Microsystems, Inc. All rights reserved. +! Use is subject to license terms. +! +! @(#)__fenv_sparc.il 1.5 06/01/31 SMI +! + .inline __fenv_getfsr,1 + st %fsr,[%o0] + .end + + .inline __fenv_setfsr,1 + ld [%o0],%fsr + .end + + .inline __fenv_getfsrx,1 + stx %fsr,[%o0] + .end + + .inline __fenv_setfsrx,1 + ldx [%o0],%fsr + .end diff --git a/usr/src/libm/src/m9x/__fex_hdlr.c b/usr/src/libm/src/m9x/__fex_hdlr.c new file mode 100644 index 0000000..4b87e3f --- /dev/null +++ b/usr/src/libm/src/m9x/__fex_hdlr.c @@ -0,0 +1,850 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__fex_hdlr.c 1.12 06/01/31 SMI" + +#include "fenv_synonyms.h" +#undef lint +#include <signal.h> +#include <siginfo.h> +#if defined(__i386) && !defined(__amd64) +/* for now, pick up local copy of Solaris 10 sys/regset.h; we can get rid + of this once we no longer need to build on Solaris 8 */ +#include "regset.h" +#endif +#include <ucontext.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <thread.h> +#include <math.h> +#include <sunmath.h> +#include <fenv.h> +#include "fex_handler.h" + +#if defined(__sparc) && !defined(__sparcv9) +#include <sys/procfs.h> +#endif + +/* 2.x signal.h doesn't declare sigemptyset or sigismember + if they're #defined (see sys/signal.h) */ +extern int sigemptyset(sigset_t *); +extern int sigismember(const sigset_t *, int); + +/* external globals */ +void (*__mt_fex_sync)() = NULL; /* for synchronization with libmtsk */ +#pragma weak __mt_fex_sync + +#ifdef LIBM_MT_FEX_SYNC +void (*__libm_mt_fex_sync)() = NULL; /* new, improved version of above */ +#pragma weak __libm_mt_fex_sync +#endif + +/* private variables */ +static fex_handler_t main_handlers; +static int handlers_initialized = 0; +static thread_key_t handlers_key; +static mutex_t handlers_key_lock = DEFAULTMUTEX; + +static struct sigaction oact = { 0, SIG_DFL }; +static mutex_t hdlr_lock = DEFAULTMUTEX; +static int hdlr_installed = 0; + +/* private const data */ +static const int te_bit[FEX_NUM_EXC] = { + 1 << fp_trap_inexact, + 1 << fp_trap_division, + 1 << fp_trap_underflow, + 1 << fp_trap_overflow, + 1 << fp_trap_invalid, + 1 << fp_trap_invalid, + 1 << fp_trap_invalid, + 1 << fp_trap_invalid, + 1 << fp_trap_invalid, + 1 << fp_trap_invalid, + 1 << fp_trap_invalid, + 1 << fp_trap_invalid +}; + +/* +* Return the traps to be enabled given the current handling modes +* and flags +*/ +static int +__fex_te_needed(struct fex_handler_data *thr_handlers, unsigned long fsr) +{ + int i, ex, te; + + /* set traps for handling modes */ + te = 0; + for (i = 0; i < FEX_NUM_EXC; i++) + if (thr_handlers[i].__mode != FEX_NONSTOP) + te |= te_bit[i]; + + /* add traps for retrospective diagnostics */ + if (fex_get_log()) { + ex = (int)__fenv_get_ex(fsr); + if (!(ex & FE_INEXACT)) + te |= (1 << fp_trap_inexact); + if (!(ex & FE_UNDERFLOW)) + te |= (1 << fp_trap_underflow); + if (!(ex & FE_OVERFLOW)) + te |= (1 << fp_trap_overflow); + if (!(ex & FE_DIVBYZERO)) + te |= (1 << fp_trap_division); + if (!(ex & FE_INVALID)) + te |= (1 << fp_trap_invalid); + } + + return te; +} + +/* +* The following function synchronizes with libmtsk (SPARC only, for now) +*/ +static void +__fex_sync_with_libmtsk(int begin, int master) +{ + static fenv_t master_env; + static int env_initialized = 0; + static mutex_t env_lock = DEFAULTMUTEX; + + if (begin) { + mutex_lock(&env_lock); + if (master) { + (void) fegetenv(&master_env); + env_initialized = 1; + } + else if (env_initialized) + (void) fesetenv(&master_env); + mutex_unlock(&env_lock); + } + else if (master && fex_get_log()) + __fex_update_te(); +} + +#ifdef LIBM_MT_FEX_SYNC +/* +* The following function may be used for synchronization with any +* internal project that manages multiple threads +*/ +enum __libm_mt_fex_sync_actions { + __libm_mt_fex_start_master = 0, + __libm_mt_fex_start_slave, + __libm_mt_fex_finish_master, + __libm_mt_fex_finish_slave +}; + +struct __libm_mt_fex_sync_data { + fenv_t master_env; + int initialized; + mutex_t lock; +}; + +static void +__fex_sync_with_threads(enum __libm_mt_fex_sync_actions action, + struct __libm_mt_fex_sync_data *thr_env) +{ + switch (action) { + case __libm_mt_fex_start_master: + mutex_lock(&thr_env->lock); + (void) fegetenv(&thr_env->master_env); + thr_env->initialized = 1; + mutex_unlock(&thr_env->lock); + break; + + case __libm_mt_fex_start_slave: + mutex_lock(&thr_env->lock); + if (thr_env->initialized) + (void) fesetenv(&thr_env->master_env); + mutex_unlock(&thr_env->lock); + break; + + case __libm_mt_fex_finish_master: +#ifdef __i386 + __fex_update_te(); +#else + if (fex_get_log()) + __fex_update_te(); +#endif + break; + + case __libm_mt_fex_finish_slave: +#ifdef __i386 + /* clear traps, making all accrued flags visible in status word */ + { + unsigned long fsr; + __fenv_getfsr(&fsr); + __fenv_set_te(fsr, 0); + __fenv_setfsr(&fsr); + } +#endif + break; + } +} +#endif + +#if defined(__sparc) + +/* +* Code for setting or clearing interval mode on US-III and above. +* This is embedded as data so we don't have to mark the library +* as a v8plusb/v9b object. (I could have just used one entry and +* modified the second word to set the bits I want, but that would +* have required another mutex.) +*/ +static const unsigned int siam[][2] = { + { 0x81c3e008, 0x81b01020 }, /* retl, siam 0 */ + { 0x81c3e008, 0x81b01024 }, /* retl, siam 4 */ + { 0x81c3e008, 0x81b01025 }, /* retl, siam 5 */ + { 0x81c3e008, 0x81b01026 }, /* retl, siam 6 */ + { 0x81c3e008, 0x81b01027 } /* retl, siam 7 */ +}; + +/* +* If a handling mode is in effect, apply it; otherwise invoke the +* saved handler +*/ +static void +__fex_hdlr(int sig, siginfo_t *sip, ucontext_t *uap) +{ + struct fex_handler_data *thr_handlers; + struct sigaction act; + void (*handler)(), (*siamp)(); + int mode, i; + enum fex_exception e; + fex_info_t info; + unsigned long fsr, tmpfsr, addr; + unsigned int gsr; + + /* determine which exception occurred */ + switch (sip->si_code) { + case FPE_FLTDIV: + e = fex_division; + break; + case FPE_FLTOVF: + e = fex_overflow; + break; + case FPE_FLTUND: + e = fex_underflow; + break; + case FPE_FLTRES: + e = fex_inexact; + break; + case FPE_FLTINV: + if ((int)(e = __fex_get_invalid_type(sip, uap)) < 0) + goto not_ieee; + break; + default: + /* not an IEEE exception */ + goto not_ieee; + } + + /* get the handling mode */ + mode = FEX_NOHANDLER; + handler = oact.sa_handler; /* for log; just looking, no need to lock */ + thr_handlers = __fex_get_thr_handlers(); + if (thr_handlers && thr_handlers[(int)e].__mode != FEX_NOHANDLER) { + mode = thr_handlers[(int)e].__mode; + handler = thr_handlers[(int)e].__handler; + } + + /* make an entry in the log of retro. diag. if need be */ + i = ((int)uap->uc_mcontext.fpregs.fpu_fsr >> 5) & 0x1f; + __fex_mklog(uap, (char *)sip->si_addr, i, e, mode, (void *)handler); + + /* handle the exception based on the mode */ + if (mode == FEX_NOHANDLER) + goto not_ieee; + else if (mode == FEX_ABORT) + abort(); + else if (mode == FEX_SIGNAL) { + handler(sig, sip, uap); + return; + } + + /* custom or nonstop mode; disable traps and clear flags */ + __fenv_getfsr(&fsr); + __fenv_set_te(fsr, 0); + __fenv_set_ex(fsr, 0); + + /* if interval mode was set, clear it, then substitute the + interval rounding direction and clear ns mode in the fsr */ +#ifdef __sparcv9 + gsr = uap->uc_mcontext.asrs[3]; +#else + gsr = 0; + if (uap->uc_mcontext.xrs.xrs_id == XRS_ID) + gsr = (*(unsigned long long*)((prxregset_t*)uap->uc_mcontext. + xrs.xrs_ptr)->pr_un.pr_v8p.pr_filler); +#endif + gsr = (gsr >> 25) & 7; + if (gsr & 4) { + siamp = (void (*)()) siam[0]; + siamp(); + tmpfsr = fsr; + fsr = (fsr & ~0xc0400000ul) | ((gsr & 3) << 30); + } + __fenv_setfsr(&fsr); + + /* decode the operation */ + __fex_get_op(sip, uap, &info); + + /* if a custom mode handler is installed, invoke it */ + if (mode == FEX_CUSTOM) { + /* if we got here from feraiseexcept, pass dummy info */ + addr = (unsigned long)sip->si_addr; + if (addr >= (unsigned long)feraiseexcept && + addr < (unsigned long)fetestexcept ) { + info.op = fex_other; + info.op1.type = info.op2.type = info.res.type = + fex_nodata; + } + + /* restore interval mode if it was set, and put the original + rounding direction and ns mode back in the fsr */ + if (gsr & 4) { + __fenv_setfsr(&tmpfsr); + siamp = (void (*)()) siam[1 + (gsr & 3)]; + siamp(); + } + + handler(1 << (int)e, &info); + + /* restore modes in case the user's handler changed them */ + if (gsr & 4) { + siamp = (void (*)()) siam[0]; + siamp(); + } + __fenv_setfsr(&fsr); + } + + /* stuff the result */ + __fex_st_result(sip, uap, &info); + + /* "or" in any exception flags and update traps */ + fsr = uap->uc_mcontext.fpregs.fpu_fsr; + fsr |= ((info.flags & 0x1f) << 5); + i = __fex_te_needed(thr_handlers, fsr); + __fenv_set_te(fsr, i); + uap->uc_mcontext.fpregs.fpu_fsr = fsr; + return; + +not_ieee: + /* revert to the saved handler (if any) */ + mutex_lock(&hdlr_lock); + act = oact; + mutex_unlock(&hdlr_lock); + switch ((unsigned long)act.sa_handler) { + case (unsigned long)SIG_DFL: + /* simulate trap with no handler installed */ + sigaction(SIGFPE, &act, NULL); + kill(getpid(), SIGFPE); + break; +#if !defined(__lint) + case (unsigned long)SIG_IGN: + break; +#endif + default: + act.sa_handler(sig, sip, uap); + } +} + +#elif defined(__i386) + +#if defined(__amd64) +#define test_sse_hw 1 +#else +extern int _sse_hw; +#define test_sse_hw &_sse_hw && _sse_hw +#endif + +#if !defined(REG_PC) +#define REG_PC EIP +#endif + +/* +* If a handling mode is in effect, apply it; otherwise invoke the +* saved handler +*/ +static void +__fex_hdlr(int sig, siginfo_t *sip, ucontext_t *uap) +{ + struct fex_handler_data *thr_handlers; + struct sigaction act; + void (*handler)(), (*simd_handler[4])(); + int mode, simd_mode[4], i, len, accrued, *ap; + unsigned int cwsw, oldcwsw, mxcsr, oldmxcsr; + enum fex_exception e, simd_e[4]; + fex_info_t info, simd_info[4]; + unsigned long addr; + siginfo_t osip = *sip; + sseinst_t inst; + + /* check for an exception caused by an SSE instruction */ + if (!(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.status & 0x80)) { + len = __fex_parse_sse(uap, &inst); + if (len == 0) + goto not_ieee; + + /* disable all traps and clear flags */ + __fenv_getcwsw(&oldcwsw); + cwsw = (oldcwsw & ~0x3f) | 0x003f0000; + __fenv_setcwsw(&cwsw); + __fenv_getmxcsr(&oldmxcsr); + mxcsr = (oldmxcsr & ~0x3f) | 0x1f80; + __fenv_setmxcsr(&mxcsr); + + if ((int)inst.op & SIMD) { + __fex_get_simd_op(uap, &inst, simd_e, simd_info); + + thr_handlers = __fex_get_thr_handlers(); + addr = (unsigned long)uap->uc_mcontext.gregs[REG_PC]; + accrued = uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.mxcsr; + + e = (enum fex_exception)-1; + mode = FEX_NONSTOP; + for (i = 0; i < 4; i++) { + if ((int)simd_e[i] < 0) + continue; + + e = simd_e[i]; + simd_mode[i] = FEX_NOHANDLER; + simd_handler[i] = oact.sa_handler; + if (thr_handlers && + thr_handlers[(int)e].__mode != + FEX_NOHANDLER) { + simd_mode[i] = + thr_handlers[(int)e].__mode; + simd_handler[i] = + thr_handlers[(int)e].__handler; + } + accrued &= ~te_bit[(int)e]; + switch (simd_mode[i]) { + case FEX_ABORT: + mode = FEX_ABORT; + break; + case FEX_SIGNAL: + if (mode != FEX_ABORT) + mode = FEX_SIGNAL; + handler = simd_handler[i]; + break; + case FEX_NOHANDLER: + if (mode != FEX_ABORT && mode != + FEX_SIGNAL) + mode = FEX_NOHANDLER; + break; + } + } + if (e == (enum fex_exception)-1) { + __fenv_setcwsw(&oldcwsw); + __fenv_setmxcsr(&oldmxcsr); + goto not_ieee; + } + accrued |= uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.status; + ap = __fex_accrued(); + accrued |= *ap; + accrued &= 0x3d; + + for (i = 0; i < 4; i++) { + if ((int)simd_e[i] < 0) + continue; + + __fex_mklog(uap, (char *)addr, accrued, + simd_e[i], simd_mode[i], + (void *)simd_handler[i]); + } + + if (mode == FEX_NOHANDLER) { + __fenv_setcwsw(&oldcwsw); + __fenv_setmxcsr(&oldmxcsr); + goto not_ieee; + } else if (mode == FEX_ABORT) { + abort(); + } else if (mode == FEX_SIGNAL) { + __fenv_setcwsw(&oldcwsw); + __fenv_setmxcsr(&oldmxcsr); + handler(sig, &osip, uap); + return; + } + + *ap = 0; + for (i = 0; i < 4; i++) { + if ((int)simd_e[i] < 0) + continue; + + if (simd_mode[i] == FEX_CUSTOM) { + handler(1 << (int)simd_e[i], + &simd_info[i]); + __fenv_setcwsw(&cwsw); + __fenv_setmxcsr(&mxcsr); + } + } + + __fex_st_simd_result(uap, &inst, simd_e, simd_info); + for (i = 0; i < 4; i++) { + if ((int)simd_e[i] < 0) + continue; + + accrued |= simd_info[i].flags; + } + + if ((int)inst.op & INTREG) { + /* set MMX mode */ +#if defined(__amd64) + uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.sw &= ~0x3800; + uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.fctw = 0; +#else + uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.state[1] &= ~0x3800; + uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.state[2] = 0; +#endif + } + } else { + e = __fex_get_sse_op(uap, &inst, &info); + if ((int)e < 0) { + __fenv_setcwsw(&oldcwsw); + __fenv_setmxcsr(&oldmxcsr); + goto not_ieee; + } + + mode = FEX_NOHANDLER; + handler = oact.sa_handler; + thr_handlers = __fex_get_thr_handlers(); + if (thr_handlers && thr_handlers[(int)e].__mode != + FEX_NOHANDLER) { + mode = thr_handlers[(int)e].__mode; + handler = thr_handlers[(int)e].__handler; + } + + addr = (unsigned long)uap->uc_mcontext.gregs[REG_PC]; + accrued = uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.mxcsr & ~te_bit[(int)e]; + accrued |= uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.status; + ap = __fex_accrued(); + accrued |= *ap; + accrued &= 0x3d; + __fex_mklog(uap, (char *)addr, accrued, e, mode, + (void *)handler); + + if (mode == FEX_NOHANDLER) { + __fenv_setcwsw(&oldcwsw); + __fenv_setmxcsr(&oldmxcsr); + goto not_ieee; + } else if (mode == FEX_ABORT) { + abort(); + } else if (mode == FEX_SIGNAL) { + __fenv_setcwsw(&oldcwsw); + __fenv_setmxcsr(&oldmxcsr); + handler(sig, &osip, uap); + return; + } else if (mode == FEX_CUSTOM) { + *ap = 0; + if (addr >= (unsigned long)feraiseexcept && + addr < (unsigned long)fetestexcept ) { + info.op = fex_other; + info.op1.type = info.op2.type = + info.res.type = fex_nodata; + } + handler(1 << (int)e, &info); + __fenv_setcwsw(&cwsw); + __fenv_setmxcsr(&mxcsr); + } + + __fex_st_sse_result(uap, &inst, e, &info); + accrued |= info.flags; + +#ifdef __amd64 + /* + * In 64-bit mode, the 32-bit convert-to-integer + * instructions zero the upper 32 bits of the + * destination. (We do this here and not in + * __fex_st_sse_result because __fex_st_sse_result + * can be called from __fex_st_simd_result, too.) + */ + if (inst.op == cvtss2si || inst.op == cvttss2si || + inst.op == cvtsd2si || inst.op == cvttsd2si) + inst.op1->i[1] = 0; +#endif + } + + /* advance the pc past the SSE instruction */ + uap->uc_mcontext.gregs[REG_PC] += len; + goto update_state; + } + + /* determine which exception occurred */ + __fex_get_x86_exc(sip, uap); + switch (sip->si_code) { + case FPE_FLTDIV: + e = fex_division; + break; + case FPE_FLTOVF: + e = fex_overflow; + break; + case FPE_FLTUND: + e = fex_underflow; + break; + case FPE_FLTRES: + e = fex_inexact; + break; + case FPE_FLTINV: + if ((int)(e = __fex_get_invalid_type(sip, uap)) < 0) + goto not_ieee; + break; + default: + /* not an IEEE exception */ + goto not_ieee; + } + + /* get the handling mode */ + mode = FEX_NOHANDLER; + handler = oact.sa_handler; /* for log; just looking, no need to lock */ + thr_handlers = __fex_get_thr_handlers(); + if (thr_handlers && thr_handlers[(int)e].__mode != FEX_NOHANDLER) { + mode = thr_handlers[(int)e].__mode; + handler = thr_handlers[(int)e].__handler; + } + + /* make an entry in the log of retro. diag. if need be */ +#if defined(__amd64) + addr = (unsigned long)uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.rip; +#else + addr = (unsigned long)uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.state[3]; +#endif + accrued = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.status & + ~te_bit[(int)e]; + if (test_sse_hw) + accrued |= uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state. + mxcsr; + ap = __fex_accrued(); + accrued |= *ap; + accrued &= 0x3d; + __fex_mklog(uap, (char *)addr, accrued, e, mode, (void *)handler); + + /* handle the exception based on the mode */ + if (mode == FEX_NOHANDLER) + goto not_ieee; + else if (mode == FEX_ABORT) + abort(); + else if (mode == FEX_SIGNAL) { + handler(sig, &osip, uap); + return; + } + + /* disable all traps and clear flags */ + __fenv_getcwsw(&cwsw); + cwsw = (cwsw & ~0x3f) | 0x003f0000; + __fenv_setcwsw(&cwsw); + if (test_sse_hw) { + __fenv_getmxcsr(&mxcsr); + mxcsr = (mxcsr & ~0x3f) | 0x1f80; + __fenv_setmxcsr(&mxcsr); + } + *ap = 0; + + /* decode the operation */ + __fex_get_op(sip, uap, &info); + + /* if a custom mode handler is installed, invoke it */ + if (mode == FEX_CUSTOM) { + /* if we got here from feraiseexcept, pass dummy info */ + if (addr >= (unsigned long)feraiseexcept && + addr < (unsigned long)fetestexcept ) { + info.op = fex_other; + info.op1.type = info.op2.type = info.res.type = + fex_nodata; + } + + handler(1 << (int)e, &info); + + /* restore modes in case the user's handler changed them */ + __fenv_setcwsw(&cwsw); + if (test_sse_hw) + __fenv_setmxcsr(&mxcsr); + } + + /* stuff the result */ + __fex_st_result(sip, uap, &info); + accrued |= info.flags; + +update_state: + accrued &= 0x3d; + i = __fex_te_needed(thr_handlers, accrued); + *ap = accrued & i; +#if defined(__amd64) + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw &= ~0x3d; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= (accrued & ~i); + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.cw |= 0x3d; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.cw &= ~i; +#else + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[1] &= ~0x3d; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[1] |= + (accrued & ~i); + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[0] |= 0x3d; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[0] &= ~i; +#endif + if (test_sse_hw) { + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr &= ~0x3d; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr |= + 0x1e80 | (accrued & ~i); + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr &= + ~(i << 7); + } + return; + +not_ieee: + /* revert to the saved handler (if any) */ + mutex_lock(&hdlr_lock); + act = oact; + mutex_unlock(&hdlr_lock); + switch ((unsigned long)act.sa_handler) { + case (unsigned long)SIG_DFL: + /* simulate trap with no handler installed */ + sigaction(SIGFPE, &act, NULL); + kill(getpid(), SIGFPE); + break; +#if !defined(__lint) + case (unsigned long)SIG_IGN: + break; +#endif + default: + act.sa_handler(sig, &osip, uap); + } +} + +#else +#error Unknown architecture +#endif + +/* +* Return a pointer to the thread-specific handler data, and +* initialize it if necessary +*/ +struct fex_handler_data * +__fex_get_thr_handlers() +{ + struct fex_handler_data *ptr; + unsigned long fsr; + int i, te; + + if (thr_main()) { + if (!handlers_initialized) { + /* initialize to FEX_NOHANDLER if trap is enabled, + FEX_NONSTOP if trap is disabled */ + __fenv_getfsr(&fsr); + te = (int)__fenv_get_te(fsr); + for (i = 0; i < FEX_NUM_EXC; i++) + main_handlers[i].__mode = + ((te & te_bit[i])? FEX_NOHANDLER : FEX_NONSTOP); + handlers_initialized = 1; + } + return main_handlers; + } + else { + ptr = NULL; + mutex_lock(&handlers_key_lock); + if (thr_getspecific(handlers_key, (void **)&ptr) != 0 && + thr_keycreate(&handlers_key, free) != 0) { + mutex_unlock(&handlers_key_lock); + return NULL; + } + mutex_unlock(&handlers_key_lock); + if (!ptr) { + if ((ptr = (struct fex_handler_data *) + malloc(sizeof(fex_handler_t))) == NULL) { + return NULL; + } + if (thr_setspecific(handlers_key, (void *)ptr) != 0) { + (void)free(ptr); + return NULL; + } + /* initialize to FEX_NOHANDLER if trap is enabled, + FEX_NONSTOP if trap is disabled */ + __fenv_getfsr(&fsr); + te = (int)__fenv_get_te(fsr); + for (i = 0; i < FEX_NUM_EXC; i++) + ptr[i].__mode = ((te & te_bit[i])? FEX_NOHANDLER : FEX_NONSTOP); + } + return ptr; + } +} + +/* +* Update the trap enable bits according to the selected modes +*/ +void +__fex_update_te() +{ + struct fex_handler_data *thr_handlers; + struct sigaction act, tmpact; + sigset_t blocked; + unsigned long fsr; + int te; + + /* determine which traps are needed */ + thr_handlers = __fex_get_thr_handlers(); + __fenv_getfsr(&fsr); + te = __fex_te_needed(thr_handlers, fsr); + + /* install __fex_hdlr as necessary */ + if (!hdlr_installed && te) { + act.sa_handler = __fex_hdlr; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + sigaction(SIGFPE, &act, &tmpact); + if (tmpact.sa_handler != __fex_hdlr) + { + mutex_lock(&hdlr_lock); + oact = tmpact; + mutex_unlock(&hdlr_lock); + } + hdlr_installed = 1; + } + + /* set the new trap enable bits (only if SIGFPE is not blocked) */ + if (sigprocmask(0, NULL, &blocked) == 0 && + !sigismember(&blocked, SIGFPE)) { + __fenv_set_te(fsr, te); + __fenv_setfsr(&fsr); + } + + /* synchronize with libmtsk */ + __mt_fex_sync = __fex_sync_with_libmtsk; + +#ifdef LIBM_MT_FEX_SYNC + /* synchronize with other projects */ + __libm_mt_fex_sync = __fex_sync_with_threads; +#endif +} diff --git a/usr/src/libm/src/m9x/__fex_i386.c b/usr/src/libm/src/m9x/__fex_i386.c new file mode 100644 index 0000000..62ec4ba --- /dev/null +++ b/usr/src/libm/src/m9x/__fex_i386.c @@ -0,0 +1,1671 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__fex_i386.c 1.15 06/01/31 SMI" + +#include "fenv_synonyms.h" +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <siginfo.h> +#include <ucontext.h> +#include <thread.h> +#include <math.h> +#include <sunmath.h> +#include <fenv.h> +#include "fex_handler.h" + +#if defined(__amd64) +#define test_sse_hw 1 +#else +/* + * The following variable lives in libc on Solaris 10, where it + * gets set to a nonzero value at startup time on systems with SSE. + */ +int _sse_hw = 0; +#pragma weak _sse_hw +#define test_sse_hw &_sse_hw && _sse_hw +#endif + +static int accrued = 0; +static thread_key_t accrued_key; +static mutex_t accrued_key_lock = DEFAULTMUTEX; + +int * +__fex_accrued() +{ + int *p; + + if (thr_main()) + return &accrued; + else { + p = NULL; + mutex_lock(&accrued_key_lock); + if (thr_getspecific(accrued_key, (void **)&p) != 0 && + thr_keycreate(&accrued_key, free) != 0) { + mutex_unlock(&accrued_key_lock); + return NULL; + } + mutex_unlock(&accrued_key_lock); + if (!p) { + if ((p = (int*) malloc(sizeof(int))) == NULL) + return NULL; + if (thr_setspecific(accrued_key, (void *)p) != 0) { + (void)free(p); + return NULL; + } + *p = 0; + } + return p; + } +} + +void +__fenv_getfsr(unsigned long *fsr) +{ + unsigned int cwsw, mxcsr; + + __fenv_getcwsw(&cwsw); + /* clear reserved bits for no particularly good reason */ + cwsw &= ~0xe0c00000u; + if (test_sse_hw) { + /* pick up exception flags (excluding denormal operand + flag) from mxcsr */ + __fenv_getmxcsr(&mxcsr); + cwsw |= (mxcsr & 0x3d); + } + cwsw |= *__fex_accrued(); + *fsr = cwsw ^ 0x003f0000u; +} + +void +__fenv_setfsr(const unsigned long *fsr) +{ + unsigned int cwsw, mxcsr; + int te; + + /* save accrued exception flags corresponding to enabled exceptions */ + cwsw = (unsigned int)*fsr; + te = __fenv_get_te(cwsw); + *__fex_accrued() = cwsw & te; + cwsw = (cwsw & ~te) ^ 0x003f0000; + if (test_sse_hw) { + /* propagate rounding direction, masks, and exception flags + (excluding denormal operand mask and flag) to mxcsr */ + __fenv_getmxcsr(&mxcsr); + mxcsr = (mxcsr & ~0x7ebd) | ((cwsw >> 13) & 0x6000) | + ((cwsw >> 9) & 0x1e80) | (cwsw & 0x3d); + __fenv_setmxcsr(&mxcsr); + } + __fenv_setcwsw(&cwsw); +} + +/* Offsets into the fp environment save area (assumes 32-bit protected mode) */ +#define CW 0 /* control word */ +#define SW 1 /* status word */ +#define TW 2 /* tag word */ +#define IP 3 /* instruction pointer */ +#define OP 4 /* opcode */ +#define EA 5 /* operand address */ + +/* macro for accessing fp registers in the save area */ +#if defined(__amd64) +#define fpreg(u,x) *(long double *)(10*(x)+(char*)&(u)->uc_mcontext.fpregs.fp_reg_set.fpchip_state.st) +#else +#define fpreg(u,x) *(long double *)(10*(x)+(char*)&(u)->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[7]) +#endif + +/* +* Fix sip->si_code; the Solaris x86 kernel can get it wrong +*/ +void +__fex_get_x86_exc(siginfo_t *sip, ucontext_t *uap) +{ + unsigned sw, cw; + + sw = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.status; +#if defined(__amd64) + cw = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.cw; +#else + cw = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[CW]; +#endif + if ((sw & FE_INVALID) && !(cw & (1 << fp_trap_invalid))) + /* store 0 for stack fault, FPE_FLTINV for IEEE invalid op */ + sip->si_code = ((sw & 0x40)? 0 : FPE_FLTINV); + else if ((sw & FE_DIVBYZERO) && !(cw & (1 << fp_trap_division))) + sip->si_code = FPE_FLTDIV; + else if ((sw & FE_OVERFLOW) && !(cw & (1 << fp_trap_overflow))) + sip->si_code = FPE_FLTOVF; + else if ((sw & FE_UNDERFLOW) && !(cw & (1 << fp_trap_underflow))) + sip->si_code = FPE_FLTUND; + else if ((sw & FE_INEXACT) && !(cw & (1 << fp_trap_inexact))) + sip->si_code = FPE_FLTRES; + else + sip->si_code = 0; +} + +static enum fp_class_type +my_fp_classf(float *x) +{ + int i = *(int*)x & ~0x80000000; + + if (i < 0x7f800000) { + if (i < 0x00800000) + return ((i == 0)? fp_zero : fp_subnormal); + return fp_normal; + } + else if (i == 0x7f800000) + return fp_infinity; + else if (i & 0x400000) + return fp_quiet; + else + return fp_signaling; +} + +static enum fp_class_type +my_fp_class(double *x) +{ + int i = *(1+(int*)x) & ~0x80000000; + + if (i < 0x7ff00000) { + if (i < 0x00100000) + return (((i | *(int*)x) == 0)? fp_zero : fp_subnormal); + return fp_normal; + } + else if (i == 0x7ff00000 && *(int*)x == 0) + return fp_infinity; + else if (i & 0x80000) + return fp_quiet; + else + return fp_signaling; +} + +static enum fp_class_type +my_fp_classl(long double *x) +{ + int i = *(2+(int*)x) & 0x7fff; + + if (i < 0x7fff) { + if (i < 1) { + if (*(1+(int*)x) < 0) return fp_normal; /* pseudo-denormal */ + return (((*(1+(int*)x) | *(int*)x) == 0)? + fp_zero : fp_subnormal); + } + return ((*(1+(int*)x) < 0)? fp_normal : + (enum fp_class_type) -1); /* unsupported format */ + } + else if (*(1+(int*)x) == 0x80000000 && *(int*)x == 0) + return fp_infinity; + else if (*(1+(unsigned*)x) >= 0xc0000000) + return fp_quiet; + else if (*(1+(int*)x) < 0) + return fp_signaling; + else + return (enum fp_class_type) -1; /* unsupported format */ +} + +/* +* Determine which type of invalid operation exception occurred +*/ +enum fex_exception +__fex_get_invalid_type(siginfo_t *sip, ucontext_t *uap) +{ + unsigned op; + unsigned long ea; + enum fp_class_type t1, t2; + + /* get the opcode and data address */ +#if defined(__amd64) + op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fop >> 16; + ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.rdp; +#else + op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[OP] >> 16; + ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[EA]; +#endif + + /* if the instruction is fld, the source must be snan (it can't be + an unsupported format, since fldt doesn't raise any exceptions) */ + switch (op & 0x7f8) { + case 0x100: + case 0x140: + case 0x180: + case 0x500: + case 0x540: + case 0x580: + return fex_inv_snan; + } + + /* otherwise st is one of the operands; see if it's snan */ + t1 = my_fp_classl(&fpreg(uap, 0)); + if (t1 == fp_signaling) + return fex_inv_snan; + else if (t1 == (enum fp_class_type) -1) + return (enum fex_exception) -1; + + /* determine the class of the second operand if there is one */ + t2 = fp_normal; + switch (op & 0x7e0) { + case 0x600: + case 0x620: + case 0x640: + case 0x660: + case 0x680: + case 0x6a0: + /* short memory operand */ + if (!ea) + return (enum fex_exception) -1; + if (*(short *)ea == 0) + t2 = fp_zero; + break; + + case 0x200: + case 0x220: + case 0x240: + case 0x260: + case 0x280: + case 0x2a0: + /* int memory operand */ + if (!ea) + return (enum fex_exception) -1; + if (*(int *)ea == 0) + t2 = fp_zero; + break; + + case 0x000: + case 0x020: + case 0x040: + case 0x060: + case 0x080: + case 0x0a0: + /* single precision memory operand */ + if (!ea) + return (enum fex_exception) -1; + t2 = my_fp_classf((float *)ea); + break; + + case 0x400: + case 0x420: + case 0x440: + case 0x460: + case 0x480: + case 0x4a0: + /* double precision memory operand */ + if (!ea) + return (enum fex_exception) -1; + t2 = my_fp_class((double *)ea); + break; + + case 0x0c0: + case 0x0e0: + case 0x3e0: + case 0x4c0: + case 0x4e0: + case 0x5e0: + case 0x6c0: + case 0x6e0: + case 0x7e0: + /* register operand determined by opcode */ + switch (op & 0x7f8) { + case 0x3e0: + case 0x3f8: + case 0x5f0: + case 0x5f8: + case 0x7e0: + case 0x7f8: + /* weed out nonexistent opcodes */ + break; + + default: + t2 = my_fp_classl(&fpreg(uap, op & 7)); + } + break; + + case 0x1e0: + case 0x2e0: + /* special forms */ + switch (op) { + case 0x1f1: /* fyl2x */ + case 0x1f3: /* fpatan */ + case 0x1f5: /* fprem1 */ + case 0x1f8: /* fprem */ + case 0x1f9: /* fyl2xp1 */ + case 0x1fd: /* fscale */ + case 0x2e9: /* fucompp */ + t2 = my_fp_classl(&fpreg(uap, 1)); + break; + } + break; + } + + /* see if the second op is snan */ + if (t2 == fp_signaling) + return fex_inv_snan; + else if (t2 == (enum fp_class_type) -1) + return (enum fex_exception) -1; + + /* determine the type of operation */ + switch (op & 0x7f8) { + case 0x000: + case 0x020: + case 0x028: + case 0x040: + case 0x060: + case 0x068: + case 0x080: + case 0x0a0: + case 0x0a8: + case 0x0c0: + case 0x0e0: + case 0x0e8: + case 0x400: + case 0x420: + case 0x428: + case 0x440: + case 0x460: + case 0x468: + case 0x480: + case 0x4a0: + case 0x4a8: + case 0x4c0: + case 0x4e0: + case 0x4e8: + case 0x6c0: + case 0x6e0: + case 0x6e8: + /* fadd, fsub, fsubr */ + if (t1 == fp_infinity && t2 == fp_infinity) + return fex_inv_isi; + break; + + case 0x008: + case 0x048: + case 0x088: + case 0x0c8: + case 0x208: + case 0x248: + case 0x288: + case 0x408: + case 0x448: + case 0x488: + case 0x4c8: + case 0x608: + case 0x648: + case 0x688: + case 0x6c8: + /* fmul */ + if ((t1 == fp_zero && t2 == fp_infinity) || (t2 == fp_zero && + t1 == fp_infinity)) + return fex_inv_zmi; + break; + + case 0x030: + case 0x038: + case 0x070: + case 0x078: + case 0x0b0: + case 0x0b8: + case 0x0f0: + case 0x0f8: + case 0x230: + case 0x238: + case 0x270: + case 0x278: + case 0x2b0: + case 0x2b8: + case 0x430: + case 0x438: + case 0x470: + case 0x478: + case 0x4b0: + case 0x4b8: + case 0x4f0: + case 0x4f8: + case 0x630: + case 0x638: + case 0x670: + case 0x678: + case 0x6b0: + case 0x6b8: + case 0x6f0: + case 0x6f8: + /* fdiv */ + if (t1 == fp_zero && t2 == fp_zero) + return fex_inv_zdz; + else if (t1 == fp_infinity && t2 == fp_infinity) + return fex_inv_idi; + break; + + case 0x1f0: + case 0x1f8: + /* fsqrt, other special ops */ + return fex_inv_sqrt; + + case 0x010: + case 0x018: + case 0x050: + case 0x058: + case 0x090: + case 0x098: + case 0x0d0: + case 0x0d8: + case 0x210: + case 0x218: + case 0x250: + case 0x258: + case 0x290: + case 0x298: + case 0x2e8: + case 0x3f0: + case 0x410: + case 0x418: + case 0x450: + case 0x458: + case 0x490: + case 0x498: + case 0x4d0: + case 0x4d8: + case 0x5e0: + case 0x5e8: + case 0x610: + case 0x618: + case 0x650: + case 0x658: + case 0x690: + case 0x698: + case 0x6d0: + case 0x6d8: + case 0x7f0: + /* fcom */ + if (t1 == fp_quiet || t2 == fp_quiet) + return fex_inv_cmp; + break; + + case 0x1e0: + /* ftst */ + if (op == 0x1e4 && t1 == fp_quiet) + return fex_inv_cmp; + break; + + case 0x310: + case 0x318: + case 0x350: + case 0x358: + case 0x390: + case 0x398: + case 0x710: + case 0x718: + case 0x730: + case 0x738: + case 0x750: + case 0x758: + case 0x770: + case 0x778: + case 0x790: + case 0x798: + case 0x7b0: + case 0x7b8: + /* fist, fbst */ + return fex_inv_int; + } + + return (enum fex_exception) -1; +} + +/* scale factors for exponent unwrapping */ +static const long double + two12288 = 1.139165225263043370845938579315932009e+3699l, /* 2^12288 */ + twom12288 = 8.778357852076208839765066529179033145e-3700l, /* 2^-12288 */ + twom12288mulp = 8.778357852076208839289190796475222545e-3700l; + /* (")*(1-2^-64) */ + +/* inline templates */ +extern long double f2xm1(long double); +extern long double fyl2x(long double, long double); +extern long double fptan(long double); +extern long double fpatan(long double, long double); +extern long double fxtract(long double); +extern long double fprem1(long double, long double); +extern long double fprem(long double, long double); +extern long double fyl2xp1(long double, long double); +extern long double fsqrt(long double); +extern long double fsincos(long double); +extern long double frndint(long double); +extern long double fscale(long double, long double); +extern long double fsin(long double); +extern long double fcos(long double); + +/* +* Get the operands, generate the default untrapped result with +* exceptions, and set a code indicating the type of operation +*/ +void +__fex_get_op(siginfo_t *sip, ucontext_t *uap, fex_info_t *info) +{ + fex_numeric_t t; + long double op2v, x; + unsigned int cwsw, ex, sw, op; + unsigned long ea; + volatile int c; + + /* get the exception type, status word, opcode, and data address */ + ex = sip->si_code; + sw = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.status; +#if defined(__amd64) + op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fop >> 16; + ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.rdp; +#else + op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[OP] >> 16; + ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[EA]; +#endif + + /* initialize res to the default untrapped result and ex to the + corresponding flags (assume trapping is disabled and flags + are clear) */ + + /* single operand instructions */ + info->op = fex_cnvt; + info->op2.type = fex_nodata; + switch (op & 0x7f8) { + /* load instructions */ + case 0x100: + case 0x140: + case 0x180: + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op1.type = fex_float; + info->op1.val.f = *(float *)ea; + info->res.type = fex_ldouble; + info->res.val.q = (long double) info->op1.val.f; + goto done; + + case 0x500: + case 0x540: + case 0x580: + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op1.type = fex_double; + info->op1.val.d = *(double *)ea; + info->res.type = fex_ldouble; + info->res.val.q = (long double) info->op1.val.d; + goto done; + + /* store instructions */ + case 0x110: + case 0x118: + case 0x150: + case 0x158: + case 0x190: + case 0x198: + info->res.type = fex_float; + if (ex == FPE_FLTRES && (op & 8) != 0) { + /* inexact, stack popped */ + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op1.type = fex_nodata; + info->res.val.f = *(float *)ea; + info->flags = FE_INEXACT; + return; + } + info->op1.type = fex_ldouble; + info->op1.val.q = fpreg(uap, 0); + info->res.val.f = (float) info->op1.val.q; + goto done; + + case 0x310: + case 0x318: + case 0x350: + case 0x358: + case 0x390: + case 0x398: + info->res.type = fex_int; + if (ex == FPE_FLTRES && (op & 8) != 0) { + /* inexact, stack popped */ + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op1.type = fex_nodata; + info->res.val.i = *(int *)ea; + info->flags = FE_INEXACT; + return; + } + info->op1.type = fex_ldouble; + info->op1.val.q = fpreg(uap, 0); + info->res.val.i = (int) info->op1.val.q; + goto done; + + case 0x510: + case 0x518: + case 0x550: + case 0x558: + case 0x590: + case 0x598: + info->res.type = fex_double; + if (ex == FPE_FLTRES && (op & 8) != 0) { + /* inexact, stack popped */ + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op1.type = fex_nodata; + info->res.val.d = *(double *)ea; + info->flags = FE_INEXACT; + return; + } + info->op1.type = fex_ldouble; + info->op1.val.q = fpreg(uap, 0); + info->res.val.d = (double) info->op1.val.q; + goto done; + + case 0x710: + case 0x718: + case 0x750: + case 0x758: + case 0x790: + case 0x798: + info->res.type = fex_int; + if (ex == FPE_FLTRES && (op & 8) != 0) { + /* inexact, stack popped */ + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op1.type = fex_nodata; + info->res.val.i = *(short *)ea; + info->flags = FE_INEXACT; + return; + } + info->op1.type = fex_ldouble; + info->op1.val.q = fpreg(uap, 0); + info->res.val.i = (short) info->op1.val.q; + goto done; + + case 0x730: + case 0x770: + case 0x7b0: + /* fbstp; don't bother */ + info->op = fex_other; + info->op1.type = info->res.type = fex_nodata; + info->flags = 0; + return; + + case 0x738: + case 0x778: + case 0x7b8: + info->res.type = fex_llong; + if (ex == FPE_FLTRES) { + /* inexact, stack popped */ + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op1.type = fex_nodata; + info->res.val.l = *(long long *)ea; + info->flags = FE_INEXACT; + return; + } + info->op1.type = fex_ldouble; + info->op1.val.q = fpreg(uap, 0); + info->res.val.l = (long long) info->op1.val.q; + goto done; + } + + /* all other ops (except compares) have destinations on the stack + so overflow, underflow, and inexact will stomp their operands */ + if (ex == FPE_FLTOVF || ex == FPE_FLTUND || ex == FPE_FLTRES) { + /* find the trapped result */ + info->op1.type = info->op2.type = fex_nodata; + info->res.type = fex_ldouble; + switch (op & 0x7f8) { + case 0x1f0: + /* fptan pushes 1.0 afterward, so result is in st(1) */ + info->res.val.q = ((op == 0x1f2)? fpreg(uap, 1) : + fpreg(uap, 0)); + break; + + case 0x4c0: + case 0x4c8: + case 0x4e0: + case 0x4e8: + case 0x4f0: + case 0x4f8: + info->res.val.q = fpreg(uap, op & 7); + break; + + case 0x6c0: + case 0x6c8: + case 0x6e0: + case 0x6e8: + case 0x6f0: + case 0x6f8: + /* stack was popped afterward */ + info->res.val.q = fpreg(uap, (op - 1) & 7); + break; + + default: + info->res.val.q = fpreg(uap, 0); + } + + /* reconstruct default untrapped result */ + if (ex == FPE_FLTOVF) { + /* generate an overflow with the sign of the result */ + x = two12288; + *(4+(short*)&x) |= (*(4+(short*)&info->res.val.q) & 0x8000); + info->res.val.q = x * two12288; + info->flags = FE_OVERFLOW | FE_INEXACT; + __fenv_getcwsw(&cwsw); + cwsw &= ~FE_ALL_EXCEPT; + __fenv_setcwsw(&cwsw); + } + else if (ex == FPE_FLTUND) { + /* undo the scaling; we can't distinguish a chopped result + from an exact one without futzing around to trap all in- + exact exceptions so as to keep the flag clear, so we just + punt */ + if (sw & 0x200) /* result was rounded up */ + info->res.val.q = (info->res.val.q * twom12288) * twom12288mulp; + else + info->res.val.q = (info->res.val.q * twom12288) * twom12288; + __fenv_getcwsw(&cwsw); + info->flags = (cwsw & FE_INEXACT) | FE_UNDERFLOW; + cwsw &= ~FE_ALL_EXCEPT; + __fenv_setcwsw(&cwsw); + } + else + info->flags = FE_INEXACT; + + /* determine the operation code */ + switch (op) { + case 0x1f0: /* f2xm1 */ + case 0x1f1: /* fyl2x */ + case 0x1f2: /* fptan */ + case 0x1f3: /* fpatan */ + case 0x1f5: /* fprem1 */ + case 0x1f8: /* fprem */ + case 0x1f9: /* fyl2xp1 */ + case 0x1fb: /* fsincos */ + case 0x1fc: /* frndint */ + case 0x1fd: /* fscale */ + case 0x1fe: /* fsin */ + case 0x1ff: /* fcos */ + info->op = fex_other; + return; + + case 0x1fa: /* fsqrt */ + info->op = fex_sqrt; + return; + } + + info->op = fex_other; + switch (op & 0x7c0) { + case 0x000: + case 0x040: + case 0x080: + case 0x0c0: + case 0x200: + case 0x240: + case 0x280: + case 0x400: + case 0x440: + case 0x480: + case 0x4c0: + case 0x600: + case 0x640: + case 0x680: + case 0x6c0: + switch (op & 0x38) { + case 0x00: + info->op = fex_add; + break; + + case 0x08: + info->op = fex_mul; + break; + + case 0x20: + case 0x28: + info->op = fex_sub; + break; + + case 0x30: + case 0x38: + info->op = fex_div; + break; + } + } + return; + } + + /* for other exceptions, the operands are preserved, so we can + just emulate the operation with traps disabled */ + + /* one operand is always in st */ + info->op1.type = fex_ldouble; + info->op1.val.q = fpreg(uap, 0); + + /* oddball instructions */ + info->op = fex_other; + switch (op) { + case 0x1e4: /* ftst */ + info->op = fex_cmp; + info->op2.type = fex_ldouble; + info->op2.val.q = 0.0l; + info->res.type = fex_nodata; + c = (info->op1.val.q < info->op2.val.q); + goto done; + + case 0x1f0: /* f2xm1 */ + info->res.type = fex_ldouble; + info->res.val.q = f2xm1(info->op1.val.q); + goto done; + + case 0x1f1: /* fyl2x */ + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, 1); + info->res.type = fex_ldouble; + info->res.val.q = fyl2x(info->op1.val.q, info->op2.val.q); + goto done; + + case 0x1f2: /* fptan */ + info->res.type = fex_ldouble; + info->res.val.q = fptan(info->op1.val.q); + goto done; + + case 0x1f3: /* fpatan */ + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, 1); + info->res.type = fex_ldouble; + info->res.val.q = fpatan(info->op1.val.q, info->op2.val.q); + goto done; + + case 0x1f4: /* fxtract */ + info->res.type = fex_ldouble; + info->res.val.q = fxtract(info->op1.val.q); + goto done; + + case 0x1f5: /* fprem1 */ + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, 1); + info->res.type = fex_ldouble; + info->res.val.q = fprem1(info->op1.val.q, info->op2.val.q); + goto done; + + case 0x1f8: /* fprem */ + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, 1); + info->res.type = fex_ldouble; + info->res.val.q = fprem(info->op1.val.q, info->op2.val.q); + goto done; + + case 0x1f9: /* fyl2xp1 */ + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, 1); + info->res.type = fex_ldouble; + info->res.val.q = fyl2xp1(info->op1.val.q, info->op2.val.q); + goto done; + + case 0x1fa: /* fsqrt */ + info->op = fex_sqrt; + info->res.type = fex_ldouble; + info->res.val.q = fsqrt(info->op1.val.q); + goto done; + + case 0x1fb: /* fsincos */ + info->res.type = fex_ldouble; + info->res.val.q = fsincos(info->op1.val.q); + goto done; + + case 0x1fc: /* frndint */ + info->res.type = fex_ldouble; + info->res.val.q = frndint(info->op1.val.q); + goto done; + + case 0x1fd: /* fscale */ + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, 1); + info->res.type = fex_ldouble; + info->res.val.q = fscale(info->op1.val.q, info->op2.val.q); + goto done; + + case 0x1fe: /* fsin */ + info->res.type = fex_ldouble; + info->res.val.q = fsin(info->op1.val.q); + goto done; + + case 0x1ff: /* fcos */ + info->res.type = fex_ldouble; + info->res.val.q = fcos(info->op1.val.q); + goto done; + + case 0x2e9: /* fucompp */ + info->op = fex_cmp; + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, 1); + info->res.type = fex_nodata; + c = (info->op1.val.q == info->op2.val.q); + goto done; + } + + /* fucom[p], fcomi[p], fucomi[p] */ + switch (op & 0x7f8) { + case 0x3e8: + case 0x5e0: + case 0x5e8: + case 0x7e8: /* unordered compares */ + info->op = fex_cmp; + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, op & 7); + info->res.type = fex_nodata; + c = (info->op1.val.q == info->op2.val.q); + goto done; + + case 0x3f0: + case 0x7f0: /* ordered compares */ + info->op = fex_cmp; + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, op & 7); + info->res.type = fex_nodata; + c = (info->op1.val.q < info->op2.val.q); + goto done; + } + + /* all other instructions come in groups of the form + fadd, fmul, fcom, fcomp, fsub, fsubr, fdiv, fdivr */ + + /* get the second operand */ + switch (op & 0x7c0) { + case 0x000: + case 0x040: + case 0x080: + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op2.type = fex_float; + info->op2.val.f = *(float *)ea; + op2v = (long double) info->op2.val.f; + break; + + case 0x0c0: + info->op2.type = fex_ldouble; + op2v = info->op2.val.q = fpreg(uap, op & 7); + break; + + case 0x200: + case 0x240: + case 0x280: + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op2.type = fex_int; + info->op2.val.i = *(int *)ea; + op2v = (long double) info->op2.val.i; + break; + + case 0x400: + case 0x440: + case 0x480: + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op2.type = fex_double; + info->op2.val.d = *(double *)ea; + op2v = (long double) info->op2.val.d; + break; + + case 0x4c0: + case 0x6c0: + info->op2.type = fex_ldouble; + info->op2.val.q = fpreg(uap, op & 7); + t = info->op1; + info->op1 = info->op2; + info->op2 = t; + op2v = info->op2.val.q; + break; + + case 0x600: + case 0x640: + case 0x680: + if (!ea) { + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + info->op2.type = fex_int; + info->op2.val.i = *(short *)ea; + op2v = (long double) info->op2.val.i; + break; + + default: + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + + /* distinguish different operations in the group */ + info->res.type = fex_ldouble; + switch (op & 0x38) { + case 0x00: + info->op = fex_add; + info->res.val.q = info->op1.val.q + op2v; + break; + + case 0x08: + info->op = fex_mul; + info->res.val.q = info->op1.val.q * op2v; + break; + + case 0x10: + case 0x18: + info->op = fex_cmp; + info->res.type = fex_nodata; + c = (info->op1.val.q < op2v); + break; + + case 0x20: + info->op = fex_sub; + info->res.val.q = info->op1.val.q - op2v; + break; + + case 0x28: + info->op = fex_sub; + info->res.val.q = op2v - info->op1.val.q; + t = info->op1; + info->op1 = info->op2; + info->op2 = t; + break; + + case 0x30: + info->op = fex_div; + info->res.val.q = info->op1.val.q / op2v; + break; + + case 0x38: + info->op = fex_div; + info->res.val.q = op2v / info->op1.val.q; + t = info->op1; + info->op1 = info->op2; + info->op2 = t; + break; + + default: + info->op = fex_other; + info->op1.type = info->op2.type = info->res.type = fex_nodata; + info->flags = 0; + return; + } + +done: + __fenv_getcwsw(&cwsw); + info->flags = cwsw & FE_ALL_EXCEPT; + cwsw &= ~FE_ALL_EXCEPT; + __fenv_setcwsw(&cwsw); +} + +/* pop the saved stack */ +static void pop(ucontext_t *uap) +{ + unsigned top; + + fpreg(uap, 0) = fpreg(uap, 1); + fpreg(uap, 1) = fpreg(uap, 2); + fpreg(uap, 2) = fpreg(uap, 3); + fpreg(uap, 3) = fpreg(uap, 4); + fpreg(uap, 4) = fpreg(uap, 5); + fpreg(uap, 5) = fpreg(uap, 6); + fpreg(uap, 6) = fpreg(uap, 7); +#if defined(__amd64) + top = (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw >> 10) + & 0xe; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fctw |= (3 << top); + top = (top + 2) & 0xe; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw = + (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw & ~0x3800) + | (top << 10); +#else + top = (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] >> 10) + & 0xe; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[TW] |= (3 << top); + top = (top + 2) & 0xe; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] = + (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] & ~0x3800) + | (top << 10); +#endif +} + +/* push x onto the saved stack */ +static void push(long double x, ucontext_t *uap) +{ + unsigned top; + + fpreg(uap, 7) = fpreg(uap, 6); + fpreg(uap, 6) = fpreg(uap, 5); + fpreg(uap, 5) = fpreg(uap, 4); + fpreg(uap, 4) = fpreg(uap, 3); + fpreg(uap, 3) = fpreg(uap, 2); + fpreg(uap, 2) = fpreg(uap, 1); + fpreg(uap, 1) = fpreg(uap, 0); + fpreg(uap, 0) = x; +#if defined(__amd64) + top = (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw >> 10) + & 0xe; + top = (top - 2) & 0xe; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fctw &= ~(3 << top); + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw = + (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw & ~0x3800) + | (top << 10); +#else + top = (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] >> 10) + & 0xe; + top = (top - 2) & 0xe; + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[TW] &= ~(3 << top); + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] = + (uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] & ~0x3800) + | (top << 10); +#endif +} + +/* scale factors for exponent wrapping */ +static const float + fun = 7.922816251e+28f, /* 2^96 */ + fov = 1.262177448e-29f; /* 2^-96 */ +static const double + dun = 1.552518092300708935e+231, /* 2^768 */ + dov = 6.441148769597133308e-232; /* 2^-768 */ + +/* +* Store the specified result; if no result is given but the exception +* is underflow or overflow, use the default trapped result +*/ +void +__fex_st_result(siginfo_t *sip, ucontext_t *uap, fex_info_t *info) +{ + fex_numeric_t r; + unsigned ex, op, ea, stack; + + /* get the exception type, opcode, and data address */ + ex = sip->si_code; +#if defined(__amd64) + op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.fop >> 16; + ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.rdp; /*???*/ +#else + op = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[OP] >> 16; + ea = uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[EA]; +#endif + + /* if the instruction is a compare, set the condition codes + to unordered and update the stack */ + switch (op & 0x7f8) { + case 0x010: + case 0x050: + case 0x090: + case 0x0d0: + case 0x210: + case 0x250: + case 0x290: + case 0x410: + case 0x450: + case 0x490: + case 0x4d0: + case 0x5e0: + case 0x610: + case 0x650: + case 0x690: + /* f[u]com */ +#if defined(__amd64) + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= 0x4500; +#else + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] |= 0x4500; +#endif + return; + + case 0x018: + case 0x058: + case 0x098: + case 0x0d8: + case 0x218: + case 0x258: + case 0x298: + case 0x418: + case 0x458: + case 0x498: + case 0x4d8: + case 0x5e8: + case 0x618: + case 0x658: + case 0x698: + case 0x6d0: + /* f[u]comp */ +#if defined(__amd64) + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= 0x4500; +#else + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] |= 0x4500; +#endif + pop(uap); + return; + + case 0x2e8: + case 0x6d8: + /* f[u]compp */ +#if defined(__amd64) + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= 0x4500; +#else + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] |= 0x4500; +#endif + pop(uap); + pop(uap); + return; + + case 0x1e0: + if (op == 0x1e4) { /* ftst */ +#if defined(__amd64) + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.sw |= 0x4500; +#else + uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.state[SW] |= 0x4500; +#endif + return; + } + break; + + case 0x3e8: + case 0x3f0: + /* f[u]comi */ +#if defined(__amd64) + uap->uc_mcontext.gregs[REG_PS] |= 0x45; +#else + uap->uc_mcontext.gregs[EFL] |= 0x45; +#endif + return; + + case 0x7e8: + case 0x7f0: + /* f[u]comip */ +#if defined(__amd64) + uap->uc_mcontext.gregs[REG_PS] |= 0x45; +#else + uap->uc_mcontext.gregs[EFL] |= 0x45; +#endif + pop(uap); + return; + } + + /* if there is no result available and the exception is overflow + or underflow, use the wrapped result */ + r = info->res; + if (r.type == fex_nodata) { + if (ex == FPE_FLTOVF || ex == FPE_FLTUND) { + /* for store instructions, do the scaling and store */ + switch (op & 0x7f8) { + case 0x110: + case 0x118: + case 0x150: + case 0x158: + case 0x190: + case 0x198: + if (!ea) + return; + if (ex == FPE_FLTOVF) + *(float *)ea = (fpreg(uap, 0) * fov) * fov; + else + *(float *)ea = (fpreg(uap, 0) * fun) * fun; + if ((op & 8) != 0) + pop(uap); + break; + + case 0x510: + case 0x518: + case 0x550: + case 0x558: + case 0x590: + case 0x598: + if (!ea) + return; + if (ex == FPE_FLTOVF) + *(double *)ea = (fpreg(uap, 0) * dov) * dov; + else + *(double *)ea = (fpreg(uap, 0) * dun) * dun; + if ((op & 8) != 0) + pop(uap); + break; + } + } +#ifdef DEBUG + else if (ex != FPE_FLTRES) + printf( "No result supplied, stack may be hosed\n" ); +#endif + return; + } + + /* otherwise convert the supplied result to the correct type, + put it in the destination, and update the stack as need be */ + + /* store instructions */ + switch (op & 0x7f8) { + case 0x110: + case 0x118: + case 0x150: + case 0x158: + case 0x190: + case 0x198: + if (!ea) + return; + switch (r.type) { + case fex_int: + *(float *)ea = (float) r.val.i; + break; + + case fex_llong: + *(float *)ea = (float) r.val.l; + break; + + case fex_float: + *(float *)ea = r.val.f; + break; + + case fex_double: + *(float *)ea = (float) r.val.d; + break; + + case fex_ldouble: + *(float *)ea = (float) r.val.q; + break; + } + if (ex != FPE_FLTRES && (op & 8) != 0) + pop(uap); + return; + + case 0x310: + case 0x318: + case 0x350: + case 0x358: + case 0x390: + case 0x398: + if (!ea) + return; + switch (r.type) { + case fex_int: + *(int *)ea = r.val.i; + break; + + case fex_llong: + *(int *)ea = (int) r.val.l; + break; + + case fex_float: + *(int *)ea = (int) r.val.f; + break; + + case fex_double: + *(int *)ea = (int) r.val.d; + break; + + case fex_ldouble: + *(int *)ea = (int) r.val.q; + break; + } + if (ex != FPE_FLTRES && (op & 8) != 0) + pop(uap); + return; + + case 0x510: + case 0x518: + case 0x550: + case 0x558: + case 0x590: + case 0x598: + if (!ea) + return; + switch (r.type) { + case fex_int: + *(double *)ea = (double) r.val.i; + break; + + case fex_llong: + *(double *)ea = (double) r.val.l; + break; + + case fex_float: + *(double *)ea = (double) r.val.f; + break; + + case fex_double: + *(double *)ea = r.val.d; + break; + + case fex_ldouble: + *(double *)ea = (double) r.val.q; + break; + } + if (ex != FPE_FLTRES && (op & 8) != 0) + pop(uap); + return; + + case 0x710: + case 0x718: + case 0x750: + case 0x758: + case 0x790: + case 0x798: + if (!ea) + return; + switch (r.type) { + case fex_int: + *(short *)ea = (short) r.val.i; + break; + + case fex_llong: + *(short *)ea = (short) r.val.l; + break; + + case fex_float: + *(short *)ea = (short) r.val.f; + break; + + case fex_double: + *(short *)ea = (short) r.val.d; + break; + + case fex_ldouble: + *(short *)ea = (short) r.val.q; + break; + } + if (ex != FPE_FLTRES && (op & 8) != 0) + pop(uap); + return; + + case 0x730: + case 0x770: + case 0x7b0: + /* fbstp; don't bother */ + if (ea && ex != FPE_FLTRES) + pop(uap); + return; + + case 0x738: + case 0x778: + case 0x7b8: + if (!ea) + return; + switch (r.type) { + case fex_int: + *(long long *)ea = (long long) r.val.i; + break; + + case fex_llong: + *(long long *)ea = r.val.l; + break; + + case fex_float: + *(long long *)ea = (long long) r.val.f; + break; + + case fex_double: + *(long long *)ea = (long long) r.val.d; + break; + + case fex_ldouble: + *(long long *)ea = (long long) r.val.q; + break; + } + if (ex != FPE_FLTRES) + pop(uap); + return; + } + + /* for all other instructions, the result goes into a register */ + switch (r.type) { + case fex_int: + r.val.q = (long double) r.val.i; + break; + + case fex_llong: + r.val.q = (long double) r.val.l; + break; + + case fex_float: + r.val.q = (long double) r.val.f; + break; + + case fex_double: + r.val.q = (long double) r.val.d; + break; + } + + /* for load instructions, push the result onto the stack */ + switch (op & 0x7f8) { + case 0x100: + case 0x140: + case 0x180: + case 0x500: + case 0x540: + case 0x580: + if (ea) + push(r.val.q, uap); + return; + } + + /* for all other instructions, if the exception is overflow, + underflow, or inexact, the stack has already been updated */ + stack = (ex == FPE_FLTOVF || ex == FPE_FLTUND || ex == FPE_FLTRES); + switch (op & 0x7f8) { + case 0x1f0: /* oddballs */ + switch (op) { + case 0x1f1: /* fyl2x */ + case 0x1f3: /* fpatan */ + case 0x1f9: /* fyl2xp1 */ + /* pop the stack, leaving the result in st */ + if (!stack) + pop(uap); + fpreg(uap, 0) = r.val.q; + return; + + case 0x1f2: /* fpatan */ + /* fptan pushes 1.0 afterward */ + if (stack) + fpreg(uap, 1) = r.val.q; + else { + fpreg(uap, 0) = r.val.q; + push(1.0L, uap); + } + return; + + case 0x1f4: /* fxtract */ + case 0x1fb: /* fsincos */ + /* leave the supplied result in st */ + if (stack) + fpreg(uap, 0) = r.val.q; + else { + fpreg(uap, 0) = 0.0; /* punt */ + push(r.val.q, uap); + } + return; + } + + /* all others leave the stack alone and the result in st */ + fpreg(uap, 0) = r.val.q; + return; + + case 0x4c0: + case 0x4c8: + case 0x4e0: + case 0x4e8: + case 0x4f0: + case 0x4f8: + fpreg(uap, op & 7) = r.val.q; + return; + + case 0x6c0: + case 0x6c8: + case 0x6e0: + case 0x6e8: + case 0x6f0: + case 0x6f8: + /* stack is popped afterward */ + if (stack) + fpreg(uap, (op - 1) & 7) = r.val.q; + else { + fpreg(uap, op & 7) = r.val.q; + pop(uap); + } + return; + + default: + fpreg(uap, 0) = r.val.q; + return; + } +} diff --git a/usr/src/libm/src/m9x/__fex_sparc.c b/usr/src/libm/src/m9x/__fex_sparc.c new file mode 100644 index 0000000..7682afa --- /dev/null +++ b/usr/src/libm/src/m9x/__fex_sparc.c @@ -0,0 +1,864 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__fex_sparc.c 1.7 06/01/31 SMI" + +#if defined(__sparc) +#include "fenv_synonyms.h" +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <signal.h> +#include <siginfo.h> +#include <thread.h> +#include <ucontext.h> +#include <math.h> +#include <sunmath.h> +#include <fenv.h> + +#ifdef __sparcv9 + +#define FPreg(X) &uap->uc_mcontext.fpregs.fpu_fr.fpu_regs[X] + +#define FPREG(X) &uap->uc_mcontext.fpregs.fpu_fr.fpu_dregs[(X>>1)| \ + ((X&1)<<4)] + +#else + +#include <sys/procfs.h> + +#define FPxreg(X) &((prxregset_t*)uap->uc_mcontext.xrs.xrs_ptr)->pr_un.pr_v8p.pr_xfr.pr_regs[X] + +#define FPreg(X) &uap->uc_mcontext.fpregs.fpu_fr.fpu_regs[X] + +#define FPREG(X) ((X & 1)? FPxreg(X - 1) : FPreg(X)) + +#endif /* __sparcv9 */ + +#include "fex_handler.h" + +/* avoid dependence on libsunmath */ +static enum fp_class_type +my_fp_classl(long double *a) +{ + int msw = *(int*)a & ~0x80000000; + + if (msw >= 0x7fff0000) { + if (((msw & 0xffff) | *(1+(int*)a) | *(2+(int*)a) | *(3+(int*)a)) == 0) + return fp_infinity; + else if (msw & 0x8000) + return fp_quiet; + else + return fp_signaling; + } else if (msw < 0x10000) { + if ((msw | *(1+(int*)a) | *(2+(int*)a) | *(3+(int*)a)) == 0) + return fp_zero; + else + return fp_subnormal; + } else + return fp_normal; +} + +/* +* Determine which type of invalid operation exception occurred +*/ +enum fex_exception +__fex_get_invalid_type(siginfo_t *sip, ucontext_t *uap) +{ + unsigned instr, opf, rs1, rs2; + enum fp_class_type t1, t2; + + /* parse the instruction which caused the exception */ + instr = uap->uc_mcontext.fpregs.fpu_q->FQu.fpq.fpq_instr; + opf = (instr >> 5) & 0x1ff; + rs1 = (instr >> 14) & 0x1f; + rs2 = instr & 0x1f; + + /* determine the classes of the operands */ + switch (opf & 3) { + case 1: /* single */ + t1 = fp_classf(*(float*)FPreg(rs1)); + t2 = fp_classf(*(float*)FPreg(rs2)); + break; + + case 2: /* double */ + t1 = fp_class(*(double*)FPREG(rs1)); + t2 = fp_class(*(double*)FPREG(rs2)); + break; + + case 3: /* quad */ + t1 = my_fp_classl((long double*)FPREG(rs1)); + t2 = my_fp_classl((long double*)FPREG(rs2)); + break; + + default: /* integer operands never cause an invalid operation */ + return (enum fex_exception) -1; + } + + /* if rs2 is snan, return immediately */ + if (t2 == fp_signaling) + return fex_inv_snan; + + /* determine the type of operation */ + switch ((instr >> 19) & 0x183f) { + case 0x1034: /* add, subtract, multiply, divide, square root, convert */ + switch (opf & 0x1fc) { + case 0x40: + case 0x44: /* add or subtract */ + if (t1 == fp_signaling) + return fex_inv_snan; + else + return fex_inv_isi; + + case 0x48: + case 0x68: + case 0x6c: /* multiply */ + if (t1 == fp_signaling) + return fex_inv_snan; + else + return fex_inv_zmi; + + case 0x4c: /* divide */ + if (t1 == fp_signaling) + return fex_inv_snan; + else if (t1 == fp_zero) + return fex_inv_zdz; + else + return fex_inv_idi; + + case 0x28: /* square root */ + return fex_inv_sqrt; + + case 0x80: + case 0xd0: /* convert to integer */ + return fex_inv_int; + } + break; + + case 0x1035: /* compare */ + if (t1 == fp_signaling) + return fex_inv_snan; + else + return fex_inv_cmp; + } + + return (enum fex_exception) -1; +} + +#ifdef __sparcv9 +extern void _Qp_sqrt(long double *, const long double *); +#else +extern long double _Q_sqrt(long double); +#endif + +/* +* Get the operands, generate the default untrapped result with +* exceptions, and set a code indicating the type of operation +*/ +void +__fex_get_op(siginfo_t *sip, ucontext_t *uap, fex_info_t *info) +{ + unsigned long fsr; + unsigned instr, opf, rs1, rs2; + volatile int c; + + /* parse the instruction which caused the exception */ + instr = uap->uc_mcontext.fpregs.fpu_q->FQu.fpq.fpq_instr; + opf = (instr >> 5) & 0x1ff; + rs1 = (instr >> 14) & 0x1f; + rs2 = instr & 0x1f; + + /* get the operands */ + switch (opf & 3) { + case 0: /* integer */ + info->op1.type = fex_nodata; + if (opf & 0x40) { + info->op2.type = fex_int; + info->op2.val.i = *(int*)FPreg(rs2); + } + else { + info->op2.type = fex_llong; + info->op2.val.l = *(long long*)FPREG(rs2); + } + break; + + case 1: /* single */ + info->op1.type = info->op2.type = fex_float; + info->op1.val.f = *(float*)FPreg(rs1); + info->op2.val.f = *(float*)FPreg(rs2); + break; + + case 2: /* double */ + info->op1.type = info->op2.type = fex_double; + info->op1.val.d = *(double*)FPREG(rs1); + info->op2.val.d = *(double*)FPREG(rs2); + break; + + case 3: /* quad */ + info->op1.type = info->op2.type = fex_ldouble; + info->op1.val.q = *(long double*)FPREG(rs1); + info->op2.val.q = *(long double*)FPREG(rs2); + break; + } + + /* initialize res to the default untrapped result and ex to the + corresponding flags (assume trapping is disabled and flags + are clear) */ + info->op = fex_other; + info->res.type = fex_nodata; + switch ((instr >> 19) & 0x183f) { + case 0x1035: /* compare */ + info->op = fex_cmp; + switch (opf) { + case 0x51: /* compare single */ + c = (info->op1.val.f == info->op2.val.f); + break; + + case 0x52: /* compare double */ + c = (info->op1.val.d == info->op2.val.d); + break; + + case 0x53: /* compare quad */ + c = (info->op1.val.q == info->op2.val.q); + break; + + case 0x55: /* compare single with exception */ + c = (info->op1.val.f < info->op2.val.f); + break; + + case 0x56: /* compare double with exception */ + c = (info->op1.val.d < info->op2.val.d); + break; + + case 0x57: /* compare quad with exception */ + c = (info->op1.val.q < info->op2.val.q); + break; + } + break; + + case 0x1034: /* add, subtract, multiply, divide, square root, convert */ + switch (opf) { + case 0x41: /* add single */ + info->op = fex_add; + info->res.type = fex_float; + info->res.val.f = info->op1.val.f + info->op2.val.f; + break; + + case 0x42: /* add double */ + info->op = fex_add; + info->res.type = fex_double; + info->res.val.d = info->op1.val.d + info->op2.val.d; + break; + + case 0x43: /* add quad */ + info->op = fex_add; + info->res.type = fex_ldouble; + info->res.val.q = info->op1.val.q + info->op2.val.q; + break; + + case 0x45: /* subtract single */ + info->op = fex_sub; + info->res.type = fex_float; + info->res.val.f = info->op1.val.f - info->op2.val.f; + break; + + case 0x46: /* subtract double */ + info->op = fex_sub; + info->res.type = fex_double; + info->res.val.d = info->op1.val.d - info->op2.val.d; + break; + + case 0x47: /* subtract quad */ + info->op = fex_sub; + info->res.type = fex_ldouble; + info->res.val.q = info->op1.val.q - info->op2.val.q; + break; + + case 0x49: /* multiply single */ + info->op = fex_mul; + info->res.type = fex_float; + info->res.val.f = info->op1.val.f * info->op2.val.f; + break; + + case 0x4a: /* multiply double */ + info->op = fex_mul; + info->res.type = fex_double; + info->res.val.d = info->op1.val.d * info->op2.val.d; + break; + + case 0x4b: /* multiply quad */ + info->op = fex_mul; + info->res.type = fex_ldouble; + info->res.val.q = info->op1.val.q * info->op2.val.q; + break; + + case 0x69: /* fsmuld */ + info->op = fex_mul; + info->res.type = fex_double; + info->res.val.d = (double)info->op1.val.f * (double)info->op2.val.f; + break; + + case 0x6e: /* fdmulq */ + info->op = fex_mul; + info->res.type = fex_ldouble; + info->res.val.q = (long double)info->op1.val.d * + (long double)info->op2.val.d; + break; + + case 0x4d: /* divide single */ + info->op = fex_div; + info->res.type = fex_float; + info->res.val.f = info->op1.val.f / info->op2.val.f; + break; + + case 0x4e: /* divide double */ + info->op = fex_div; + info->res.type = fex_double; + info->res.val.d = info->op1.val.d / info->op2.val.d; + break; + + case 0x4f: /* divide quad */ + info->op = fex_div; + info->res.type = fex_ldouble; + info->res.val.q = info->op1.val.q / info->op2.val.q; + break; + + case 0x29: /* square root single */ + info->op = fex_sqrt; + info->op1 = info->op2; + info->op2.type = fex_nodata; + info->res.type = fex_float; + info->res.val.f = sqrtf(info->op1.val.f); + break; + + case 0x2a: /* square root double */ + info->op = fex_sqrt; + info->op1 = info->op2; + info->op2.type = fex_nodata; + info->res.type = fex_double; + info->res.val.d = sqrt(info->op1.val.d); + break; + + case 0x2b: /* square root quad */ + info->op = fex_sqrt; + info->op1 = info->op2; + info->op2.type = fex_nodata; + info->res.type = fex_ldouble; +#ifdef __sparcv9 + _Qp_sqrt(&info->res.val.q, &info->op1.val.q); +#else + info->res.val.q = _Q_sqrt(info->op1.val.q); +#endif + break; + + default: /* conversions */ + info->op = fex_cnvt; + info->op1 = info->op2; + info->op2.type = fex_nodata; + switch (opf) { + case 0xd1: /* convert single to int */ + info->res.type = fex_int; + info->res.val.i = (int) info->op1.val.f; + break; + + case 0xd2: /* convert double to int */ + info->res.type = fex_int; + info->res.val.i = (int) info->op1.val.d; + break; + + case 0xd3: /* convert quad to int */ + info->res.type = fex_int; + info->res.val.i = (int) info->op1.val.q; + break; + + case 0x81: /* convert single to long long */ + info->res.type = fex_llong; + info->res.val.l = (long long) info->op1.val.f; + break; + + case 0x82: /* convert double to long long */ + info->res.type = fex_llong; + info->res.val.l = (long long) info->op1.val.d; + break; + + case 0x83: /* convert quad to long long */ + info->res.type = fex_llong; + info->res.val.l = (long long) info->op1.val.q; + break; + + case 0xc4: /* convert int to single */ + info->res.type = fex_float; + info->res.val.f = (float) info->op1.val.i; + break; + + case 0x84: /* convert long long to single */ + info->res.type = fex_float; + info->res.val.f = (float) info->op1.val.l; + break; + + case 0x88: /* convert long long to double */ + info->res.type = fex_double; + info->res.val.d = (double) info->op1.val.l; + break; + + case 0xc6: /* convert double to single */ + info->res.type = fex_float; + info->res.val.f = (float) info->op1.val.d; + break; + + case 0xc7: /* convert quad to single */ + info->res.type = fex_float; + info->res.val.f = (float) info->op1.val.q; + break; + + case 0xc9: /* convert single to double */ + info->res.type = fex_double; + info->res.val.d = (double) info->op1.val.f; + break; + + case 0xcb: /* convert quad to double */ + info->res.type = fex_double; + info->res.val.d = (double) info->op1.val.q; + break; + + case 0xcd: /* convert single to quad */ + info->res.type = fex_ldouble; + info->res.val.q = (long double) info->op1.val.f; + break; + + case 0xce: /* convert double to quad */ + info->res.type = fex_ldouble; + info->res.val.q = (long double) info->op1.val.d; + break; + } + } + break; + } + __fenv_getfsr(&fsr); + info->flags = (int)__fenv_get_ex(fsr); + __fenv_set_ex(fsr, 0); + __fenv_setfsr(&fsr); +} + +/* +* Store the specified result; if no result is given but the exception +* is underflow or overflow, supply the default trapped result +*/ +void +__fex_st_result(siginfo_t *sip, ucontext_t *uap, fex_info_t *info) +{ + unsigned instr, opf, rs1, rs2, rd; + long double qscl; + double dscl; + float fscl; + + /* parse the instruction which caused the exception */ + instr = uap->uc_mcontext.fpregs.fpu_q->FQu.fpq.fpq_instr; + opf = (instr >> 5) & 0x1ff; + rs1 = (instr >> 14) & 0x1f; + rs2 = instr & 0x1f; + rd = (instr >> 25) & 0x1f; + + /* if the instruction is a compare, just set fcc to unordered */ + if (((instr >> 19) & 0x183f) == 0x1035) { + if (rd == 0) + uap->uc_mcontext.fpregs.fpu_fsr |= 0xc00; + else { +#ifdef __sparcv9 + uap->uc_mcontext.fpregs.fpu_fsr |= (3l << ((rd << 1) + 30)); +#else + ((prxregset_t*)uap->uc_mcontext.xrs.xrs_ptr)->pr_un.pr_v8p.pr_xfsr |= (3 << ((rd - 1) << 1)); +#endif + } + return; + } + + /* if there is no result available, try to generate the untrapped + default */ + if (info->res.type == fex_nodata) { + /* set scale factors for exponent wrapping */ + switch (sip->si_code) { + case FPE_FLTOVF: + fscl = 1.262177448e-29f; /* 2^-96 */ + dscl = 6.441148769597133308e-232; /* 2^-768 */ + qscl = 8.778357852076208839765066529179033145e-3700l;/* 2^-12288 */ + break; + + case FPE_FLTUND: + fscl = 7.922816251e+28f; /* 2^96 */ + dscl = 1.552518092300708935e+231; /* 2^768 */ + qscl = 1.139165225263043370845938579315932009e+3699l;/* 2^12288 */ + break; + + default: + /* user may have blown away the default result by mistake, + so try to regenerate it */ + (void) __fex_get_op(sip, uap, info); + if (info->res.type != fex_nodata) + goto stuff; + /* couldn't do it */ + return; + } + + /* get the operands */ + switch (opf & 3) { + case 1: /* single */ + info->op1.val.f = *(float*)FPreg(rs1); + info->op2.val.f = *(float*)FPreg(rs2); + break; + + case 2: /* double */ + info->op1.val.d = *(double*)FPREG(rs1); + info->op2.val.d = *(double*)FPREG(rs2); + break; + + case 3: /* quad */ + info->op1.val.q = *(long double*)FPREG(rs1); + info->op2.val.q = *(long double*)FPREG(rs2); + break; + } + + /* generate the wrapped result */ + switch (opf) { + case 0x41: /* add single */ + info->res.type = fex_float; + info->res.val.f = fscl * ( fscl * info->op1.val.f + + fscl * info->op2.val.f ); + break; + + case 0x42: /* add double */ + info->res.type = fex_double; + info->res.val.d = dscl * ( dscl * info->op1.val.d + + dscl * info->op2.val.d ); + break; + + case 0x43: /* add quad */ + info->res.type = fex_ldouble; + info->res.val.q = qscl * ( qscl * info->op1.val.q + + qscl * info->op2.val.q ); + break; + + case 0x45: /* subtract single */ + info->res.type = fex_float; + info->res.val.f = fscl * ( fscl * info->op1.val.f - + fscl * info->op2.val.f ); + break; + + case 0x46: /* subtract double */ + info->res.type = fex_double; + info->res.val.d = dscl * ( dscl * info->op1.val.d - + dscl * info->op2.val.d ); + break; + + case 0x47: /* subtract quad */ + info->res.type = fex_ldouble; + info->res.val.q = qscl * ( qscl * info->op1.val.q - + qscl * info->op2.val.q ); + break; + + case 0x49: /* multiply single */ + info->res.type = fex_float; + info->res.val.f = ( fscl * info->op1.val.f ) * + ( fscl * info->op2.val.f ); + break; + + case 0x4a: /* multiply double */ + info->res.type = fex_double; + info->res.val.d = ( dscl * info->op1.val.d ) * + ( dscl * info->op2.val.d ); + break; + + case 0x4b: /* multiply quad */ + info->res.type = fex_ldouble; + info->res.val.q = ( qscl * info->op1.val.q ) * + ( qscl * info->op2.val.q ); + break; + + case 0x4d: /* divide single */ + info->res.type = fex_float; + info->res.val.f = ( fscl * info->op1.val.f ) / + ( info->op2.val.f / fscl ); + break; + + case 0x4e: /* divide double */ + info->res.type = fex_double; + info->res.val.d = ( dscl * info->op1.val.d ) / + ( info->op2.val.d / dscl ); + break; + + case 0x4f: /* divide quad */ + info->res.type = fex_ldouble; + info->res.val.q = ( qscl * info->op1.val.q ) / + ( info->op2.val.q / qscl ); + break; + + case 0xc6: /* convert double to single */ + info->res.type = fex_float; + info->res.val.f = (float) ( fscl * ( fscl * info->op1.val.d ) ); + break; + + case 0xc7: /* convert quad to single */ + info->res.type = fex_float; + info->res.val.f = (float) ( fscl * ( fscl * info->op1.val.q ) ); + break; + + case 0xcb: /* convert quad to double */ + info->res.type = fex_double; + info->res.val.d = (double) ( dscl * ( dscl * info->op1.val.q ) ); + break; + } + + if (info->res.type == fex_nodata) + /* couldn't do it */ + return; + } + +stuff: + /* stick the result in the destination */ + if (opf & 0x80) { /* conversion */ + if (opf & 0x10) { /* result is an int */ + switch (info->res.type) { + case fex_llong: + info->res.val.i = (int) info->res.val.l; + break; + + case fex_float: + info->res.val.i = (int) info->res.val.f; + break; + + case fex_double: + info->res.val.i = (int) info->res.val.d; + break; + + case fex_ldouble: + info->res.val.i = (int) info->res.val.q; + break; + } + *(int*)FPreg(rd) = info->res.val.i; + return; + } + + switch (opf & 0xc) { + case 0: /* result is long long */ + switch (info->res.type) { + case fex_int: + info->res.val.l = (long long) info->res.val.i; + break; + + case fex_float: + info->res.val.l = (long long) info->res.val.f; + break; + + case fex_double: + info->res.val.l = (long long) info->res.val.d; + break; + + case fex_ldouble: + info->res.val.l = (long long) info->res.val.q; + break; + } + *(long long*)FPREG(rd) = info->res.val.l; + break; + + case 0x4: /* result is float */ + switch (info->res.type) { + case fex_int: + info->res.val.f = (float) info->res.val.i; + break; + + case fex_llong: + info->res.val.f = (float) info->res.val.l; + break; + + case fex_double: + info->res.val.f = (float) info->res.val.d; + break; + + case fex_ldouble: + info->res.val.f = (float) info->res.val.q; + break; + } + *(float*)FPreg(rd) = info->res.val.f; + break; + + case 0x8: /* result is double */ + switch (info->res.type) { + case fex_int: + info->res.val.d = (double) info->res.val.i; + break; + + case fex_llong: + info->res.val.d = (double) info->res.val.l; + break; + + case fex_float: + info->res.val.d = (double) info->res.val.f; + break; + + case fex_ldouble: + info->res.val.d = (double) info->res.val.q; + break; + } + *(double*)FPREG(rd) = info->res.val.d; + break; + + case 0xc: /* result is long double */ + switch (info->res.type) { + case fex_int: + info->res.val.q = (long double) info->res.val.i; + break; + + case fex_llong: + info->res.val.q = (long double) info->res.val.l; + break; + + case fex_float: + info->res.val.q = (long double) info->res.val.f; + break; + + case fex_double: + info->res.val.q = (long double) info->res.val.d; + break; + } + *(long double*)FPREG(rd) = info->res.val.q; + break; + } + return; + } + + if ((opf & 0xf0) == 0x60) { /* fsmuld, fdmulq */ + switch (opf & 0xc0) { + case 0x8: /* result is double */ + switch (info->res.type) { + case fex_int: + info->res.val.d = (double) info->res.val.i; + break; + + case fex_llong: + info->res.val.d = (double) info->res.val.l; + break; + + case fex_float: + info->res.val.d = (double) info->res.val.f; + break; + + case fex_ldouble: + info->res.val.d = (double) info->res.val.q; + break; + } + *(double*)FPREG(rd) = info->res.val.d; + break; + + case 0xc: /* result is long double */ + switch (info->res.type) { + case fex_int: + info->res.val.q = (long double) info->res.val.i; + break; + + case fex_llong: + info->res.val.q = (long double) info->res.val.l; + break; + + case fex_float: + info->res.val.q = (long double) info->res.val.f; + break; + + case fex_double: + info->res.val.q = (long double) info->res.val.d; + break; + } + *(long double*)FPREG(rd) = info->res.val.q; + break; + } + return; + } + + switch (opf & 3) { /* other arithmetic op */ + case 1: /* result is float */ + switch (info->res.type) { + case fex_int: + info->res.val.f = (float) info->res.val.i; + break; + + case fex_llong: + info->res.val.f = (float) info->res.val.l; + break; + + case fex_double: + info->res.val.f = (float) info->res.val.d; + break; + + case fex_ldouble: + info->res.val.f = (float) info->res.val.q; + break; + } + *(float*)FPreg(rd) = info->res.val.f; + break; + + case 2: /* result is double */ + switch (info->res.type) { + case fex_int: + info->res.val.d = (double) info->res.val.i; + break; + + case fex_llong: + info->res.val.d = (double) info->res.val.l; + break; + + case fex_float: + info->res.val.d = (double) info->res.val.f; + break; + + case fex_ldouble: + info->res.val.d = (double) info->res.val.q; + break; + } + *(double*)FPREG(rd) = info->res.val.d; + break; + + case 3: /* result is long double */ + switch (info->res.type) { + case fex_int: + info->res.val.q = (long double) info->res.val.i; + break; + + case fex_llong: + info->res.val.q = (long double) info->res.val.l; + break; + + case fex_float: + info->res.val.q = (long double) info->res.val.f; + break; + + case fex_double: + info->res.val.q = (long double) info->res.val.d; + break; + } + *(long double*)FPREG(rd) = info->res.val.q; + break; + } +} +#endif /* defined(__sparc) */ diff --git a/usr/src/libm/src/m9x/__fex_sse.c b/usr/src/libm/src/m9x/__fex_sse.c new file mode 100644 index 0000000..e1743a9 --- /dev/null +++ b/usr/src/libm/src/m9x/__fex_sse.c @@ -0,0 +1,1581 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__fex_sse.c 1.3 06/01/31 SMI" + +#include "fenv_synonyms.h" +#if defined(__i386) && !defined(__amd64) +/* for now, pick up local copy of Solaris 10 sys/regset.h; we can get rid + of this once we no longer need to build on Solaris 8 */ +#include "regset.h" +#endif +#include <ucontext.h> +#include <fenv.h> +#include <sunmath.h> +#include "fex_handler.h" + +#if !defined(REG_PC) +#define REG_PC EIP +#endif + +#if !defined(REG_PS) +#define REG_PS EFL +#endif + +#ifdef __amd64 +#define regno(X) ((X < 4)? REG_RAX - X : \ + ((X > 4)? REG_RAX + 1 - X : REG_RSP)) +#else +#define regno(X) (EAX - X) +#endif + +/* + * Support for SSE instructions + */ + +/* + * Decode an SSE instruction. Fill in *inst and return the length of the + * instruction in bytes. Return 0 if the instruction is not recognized. + */ +int +__fex_parse_sse(ucontext_t *uap, sseinst_t *inst) +{ + unsigned char *ip; + char *addr; + int i, dbl, simd, rex, modrm, sib, r; + + i = 0; + ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC]; + + /* look for pseudo-prefixes */ + dbl = 0; + simd = SIMD; + if (ip[i] == 0xF3) { + simd = 0; + i++; + } else if (ip[i] == 0x66) { + dbl = DOUBLE; + i++; + } else if (ip[i] == 0xF2) { + dbl = DOUBLE; + simd = 0; + i++; + } + + /* look for AMD64 REX prefix */ + rex = 0; + if (ip[i] >= 0x40 && ip[i] <= 0x4F) { + rex = ip[i]; + i++; + } + + /* parse opcode */ + if (ip[i++] != 0x0F) + return 0; + switch (ip[i++]) { + case 0x2A: + inst->op = (int)cvtsi2ss + simd + dbl; + if (!simd) + inst->op = (int)inst->op + (rex & 8); + break; + + case 0x2C: + inst->op = (int)cvttss2si + simd + dbl; + if (!simd) + inst->op = (int)inst->op + (rex & 8); + break; + + case 0x2D: + inst->op = (int)cvtss2si + simd + dbl; + if (!simd) + inst->op = (int)inst->op + (rex & 8); + break; + + case 0x2E: + /* oddball: scalar instruction in a SIMD opcode group */ + if (!simd) + return 0; + inst->op = (int)ucomiss + dbl; + break; + + case 0x2F: + /* oddball: scalar instruction in a SIMD opcode group */ + if (!simd) + return 0; + inst->op = (int)comiss + dbl; + break; + + case 0x51: + inst->op = (int)sqrtss + simd + dbl; + break; + + case 0x58: + inst->op = (int)addss + simd + dbl; + break; + + case 0x59: + inst->op = (int)mulss + simd + dbl; + break; + + case 0x5A: + inst->op = (int)cvtss2sd + simd + dbl; + break; + + case 0x5B: + if (dbl) { + if (simd) + inst->op = cvtps2dq; + else + return 0; + } else { + inst->op = (simd)? cvtdq2ps : cvttps2dq; + } + break; + + case 0x5C: + inst->op = (int)subss + simd + dbl; + break; + + case 0x5D: + inst->op = (int)minss + simd + dbl; + break; + + case 0x5E: + inst->op = (int)divss + simd + dbl; + break; + + case 0x5F: + inst->op = (int)maxss + simd + dbl; + break; + + case 0xC2: + inst->op = (int)cmpss + simd + dbl; + break; + + case 0xE6: + if (simd) { + if (dbl) + inst->op = cvttpd2dq; + else + return 0; + } else { + inst->op = (dbl)? cvtpd2dq : cvtdq2pd; + } + break; + + default: + return 0; + } + + /* locate operands */ + modrm = ip[i++]; + + if (inst->op == cvtss2si || inst->op == cvttss2si || + inst->op == cvtsd2si || inst->op == cvttsd2si || + inst->op == cvtss2siq || inst->op == cvttss2siq || + inst->op == cvtsd2siq || inst->op == cvttsd2siq) { + /* op1 is a gp register */ + r = ((rex & 4) << 1) | ((modrm >> 3) & 7); + inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)]; + } else if (inst->op == cvtps2pi || inst->op == cvttps2pi || + inst->op == cvtpd2pi || inst->op == cvttpd2pi) { + /* op1 is a mmx register */ +#ifdef __amd64 + inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.st[(modrm >> 3) & 7]; +#else + inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) + + (char *)&uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.state[7]); +#endif + } else { + /* op1 is a xmm register */ + r = ((rex & 4) << 1) | ((modrm >> 3) & 7); + inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs. + fp_reg_set.fpchip_state.xmm[r]; + } + + if ((modrm >> 6) == 3) { + if (inst->op == cvtsi2ss || inst->op == cvtsi2sd || + inst->op == cvtsi2ssq || inst->op == cvtsi2sdq) { + /* op2 is a gp register */ + r = ((rex & 1) << 3) | (modrm & 7); + inst->op2 = (sseoperand_t *)&uap->uc_mcontext. + gregs[regno(r)]; + } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) { + /* op2 is a mmx register */ +#ifdef __amd64 + inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs. + fp_reg_set.fpchip_state.st[modrm & 7]; +#else + inst->op2 = (sseoperand_t *)(10 * (modrm & 7) + + (char *)&uap->uc_mcontext.fpregs.fp_reg_set. + fpchip_state.state[7]); +#endif + } else { + /* op2 is a xmm register */ + r = ((rex & 1) << 3) | (modrm & 7); + inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs. + fp_reg_set.fpchip_state.xmm[r]; + } + } else if ((modrm & 0xc7) == 0x05) { +#ifdef __amd64 + /* address of next instruction + offset */ + r = i + 4; + if (inst->op == cmpss || inst->op == cmpps || + inst->op == cmpsd || inst->op == cmppd) + r++; + inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i)); +#else + /* absolute address */ + inst->op2 = (sseoperand_t *)(*(int *)(ip + i)); +#endif + i += 4; + } else { + /* complex address */ + if ((modrm & 7) == 4) { + /* parse sib byte */ + sib = ip[i++]; + if ((sib & 7) == 5 && (modrm >> 6) == 0) { + /* start with absolute address */ + addr = (char *)(*(int *)(ip + i)); + i += 4; + } else { + /* start with base */ + r = ((rex & 1) << 3) | (sib & 7); + addr = (char *)uap->uc_mcontext.gregs[regno(r)]; + } + r = ((rex & 2) << 2) | ((sib >> 3) & 7); + if (r != 4) { + /* add scaled index */ + addr += uap->uc_mcontext.gregs[regno(r)] + << (sib >> 6); + } + } else { + r = ((rex & 1) << 3) | (modrm & 7); + addr = (char *)uap->uc_mcontext.gregs[regno(r)]; + } + + /* add displacement, if any */ + if ((modrm >> 6) == 1) { + addr += (char)ip[i++]; + } else if ((modrm >> 6) == 2) { + addr += *(int *)(ip + i); + i += 4; + } + inst->op2 = (sseoperand_t *)addr; + } + + if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd || + inst->op == cmppd) { + /* get the immediate operand */ + inst->imm = ip[i++]; + } + + return i; +} + +static enum fp_class_type +my_fp_classf(float *x) +{ + int i = *(int *)x & ~0x80000000; + + if (i < 0x7f800000) { + if (i < 0x00800000) + return ((i == 0)? fp_zero : fp_subnormal); + return fp_normal; + } + else if (i == 0x7f800000) + return fp_infinity; + else if (i & 0x400000) + return fp_quiet; + else + return fp_signaling; +} + +static enum fp_class_type +my_fp_class(double *x) +{ + int i = *(1+(int *)x) & ~0x80000000; + + if (i < 0x7ff00000) { + if (i < 0x00100000) + return (((i | *(int *)x) == 0)? fp_zero : fp_subnormal); + return fp_normal; + } + else if (i == 0x7ff00000 && *(int *)x == 0) + return fp_infinity; + else if (i & 0x80000) + return fp_quiet; + else + return fp_signaling; +} + +/* + * Inspect a scalar SSE instruction that incurred an invalid operation + * exception to determine which type of exception it was. + */ +static enum fex_exception +__fex_get_sse_invalid_type(sseinst_t *inst) +{ + enum fp_class_type t1, t2; + + /* check op2 for signaling nan */ + t2 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op2->d[0]) : + my_fp_classf(&inst->op2->f[0]); + if (t2 == fp_signaling) + return fex_inv_snan; + + /* eliminate all single-operand instructions */ + switch (inst->op) { + case cvtsd2ss: + case cvtss2sd: + /* hmm, this shouldn't have happened */ + return (enum fex_exception) -1; + + case sqrtss: + case sqrtsd: + return fex_inv_sqrt; + + case cvtss2si: + case cvtsd2si: + case cvttss2si: + case cvttsd2si: + case cvtss2siq: + case cvtsd2siq: + case cvttss2siq: + case cvttsd2siq: + return fex_inv_int; + } + + /* check op1 for signaling nan */ + t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) : + my_fp_classf(&inst->op1->f[0]); + if (t1 == fp_signaling) + return fex_inv_snan; + + /* check two-operand instructions for other cases */ + switch (inst->op) { + case cmpss: + case cmpsd: + case minss: + case minsd: + case maxss: + case maxsd: + case comiss: + case comisd: + return fex_inv_cmp; + + case addss: + case addsd: + case subss: + case subsd: + if (t1 == fp_infinity && t2 == fp_infinity) + return fex_inv_isi; + break; + + case mulss: + case mulsd: + if ((t1 == fp_zero && t2 == fp_infinity) || + (t2 == fp_zero && t1 == fp_infinity)) + return fex_inv_zmi; + break; + + case divss: + case divsd: + if (t1 == fp_zero && t2 == fp_zero) + return fex_inv_zdz; + if (t1 == fp_infinity && t2 == fp_infinity) + return fex_inv_idi; + } + + return (enum fex_exception)-1; +} + +/* inline templates */ +extern void sse_cmpeqss(float *, float *, int *); +extern void sse_cmpltss(float *, float *, int *); +extern void sse_cmpless(float *, float *, int *); +extern void sse_cmpunordss(float *, float *, int *); +extern void sse_minss(float *, float *, float *); +extern void sse_maxss(float *, float *, float *); +extern void sse_addss(float *, float *, float *); +extern void sse_subss(float *, float *, float *); +extern void sse_mulss(float *, float *, float *); +extern void sse_divss(float *, float *, float *); +extern void sse_sqrtss(float *, float *); +extern void sse_ucomiss(float *, float *); +extern void sse_comiss(float *, float *); +extern void sse_cvtss2sd(float *, double *); +extern void sse_cvtsi2ss(int *, float *); +extern void sse_cvttss2si(float *, int *); +extern void sse_cvtss2si(float *, int *); +#ifdef __amd64 +extern void sse_cvtsi2ssq(long long *, float *); +extern void sse_cvttss2siq(float *, long long *); +extern void sse_cvtss2siq(float *, long long *); +#endif +extern void sse_cmpeqsd(double *, double *, long long *); +extern void sse_cmpltsd(double *, double *, long long *); +extern void sse_cmplesd(double *, double *, long long *); +extern void sse_cmpunordsd(double *, double *, long long *); +extern void sse_minsd(double *, double *, double *); +extern void sse_maxsd(double *, double *, double *); +extern void sse_addsd(double *, double *, double *); +extern void sse_subsd(double *, double *, double *); +extern void sse_mulsd(double *, double *, double *); +extern void sse_divsd(double *, double *, double *); +extern void sse_sqrtsd(double *, double *); +extern void sse_ucomisd(double *, double *); +extern void sse_comisd(double *, double *); +extern void sse_cvtsd2ss(double *, float *); +extern void sse_cvtsi2sd(int *, double *); +extern void sse_cvttsd2si(double *, int *); +extern void sse_cvtsd2si(double *, int *); +#ifdef __amd64 +extern void sse_cvtsi2sdq(long long *, double *); +extern void sse_cvttsd2siq(double *, long long *); +extern void sse_cvtsd2siq(double *, long long *); +#endif + +/* + * Fill in *info with the operands, default untrapped result, and + * flags produced by a scalar SSE instruction, and return the type + * of trapped exception (if any). On entry, the mxcsr must have + * all exceptions masked and all flags clear. The same conditions + * will hold on exit. + * + * This routine does not work if the instruction specified by *inst + * is not a scalar instruction. + */ +enum fex_exception +__fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info) +{ + unsigned int e, te, mxcsr, oldmxcsr, subnorm; + + /* + * Perform the operation with traps disabled and check the + * exception flags. If the underflow trap was enabled, also + * check for an exact subnormal result. + */ + __fenv_getmxcsr(&oldmxcsr); + subnorm = 0; + if ((int)inst->op & DOUBLE) { + if (inst->op == cvtsi2sd) { + info->op1.type = fex_int; + info->op1.val.i = inst->op2->i[0]; + info->op2.type = fex_nodata; + } else if (inst->op == cvtsi2sdq) { + info->op1.type = fex_llong; + info->op1.val.l = inst->op2->l[0]; + info->op2.type = fex_nodata; + } else if (inst->op == sqrtsd || inst->op == cvtsd2ss || + inst->op == cvttsd2si || inst->op == cvtsd2si || + inst->op == cvttsd2siq || inst->op == cvtsd2siq) { + info->op1.type = fex_double; + info->op1.val.d = inst->op2->d[0]; + info->op2.type = fex_nodata; + } else { + info->op1.type = fex_double; + info->op1.val.d = inst->op1->d[0]; + info->op2.type = fex_double; + info->op2.val.d = inst->op2->d[0]; + } + info->res.type = fex_double; + switch (inst->op) { + case cmpsd: + info->op = fex_cmp; + info->res.type = fex_llong; + switch (inst->imm & 3) { + case 0: + sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.l); + break; + + case 1: + sse_cmpltsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.l); + break; + + case 2: + sse_cmplesd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.l); + break; + + case 3: + sse_cmpunordsd(&info->op1.val.d, + &info->op2.val.d, &info->res.val.l); + } + if (inst->imm & 4) + info->res.val.l ^= 0xffffffffffffffffull; + break; + + case minsd: + info->op = fex_other; + sse_minsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.d); + break; + + case maxsd: + info->op = fex_other; + sse_maxsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.d); + break; + + case addsd: + info->op = fex_add; + sse_addsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.d); + if (my_fp_class(&info->res.val.d) == fp_subnormal) + subnorm = 1; + break; + + case subsd: + info->op = fex_sub; + sse_subsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.d); + if (my_fp_class(&info->res.val.d) == fp_subnormal) + subnorm = 1; + break; + + case mulsd: + info->op = fex_mul; + sse_mulsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.d); + if (my_fp_class(&info->res.val.d) == fp_subnormal) + subnorm = 1; + break; + + case divsd: + info->op = fex_div; + sse_divsd(&info->op1.val.d, &info->op2.val.d, + &info->res.val.d); + if (my_fp_class(&info->res.val.d) == fp_subnormal) + subnorm = 1; + break; + + case sqrtsd: + info->op = fex_sqrt; + sse_sqrtsd(&info->op1.val.d, &info->res.val.d); + break; + + case cvtsd2ss: + info->op = fex_cnvt; + info->res.type = fex_float; + sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f); + if (my_fp_classf(&info->res.val.f) == fp_subnormal) + subnorm = 1; + break; + + case cvtsi2sd: + info->op = fex_cnvt; + sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d); + break; + + case cvttsd2si: + info->op = fex_cnvt; + info->res.type = fex_int; + sse_cvttsd2si(&info->op1.val.d, &info->res.val.i); + break; + + case cvtsd2si: + info->op = fex_cnvt; + info->res.type = fex_int; + sse_cvtsd2si(&info->op1.val.d, &info->res.val.i); + break; + +#ifdef __amd64 + case cvtsi2sdq: + info->op = fex_cnvt; + sse_cvtsi2sdq(&info->op1.val.l, &info->res.val.d); + break; + + case cvttsd2siq: + info->op = fex_cnvt; + info->res.type = fex_llong; + sse_cvttsd2siq(&info->op1.val.d, &info->res.val.l); + break; + + case cvtsd2siq: + info->op = fex_cnvt; + info->res.type = fex_llong; + sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l); + break; +#endif + + case ucomisd: + info->op = fex_cmp; + info->res.type = fex_nodata; + sse_ucomisd(&info->op1.val.d, &info->op2.val.d); + break; + + case comisd: + info->op = fex_cmp; + info->res.type = fex_nodata; + sse_comisd(&info->op1.val.d, &info->op2.val.d); + break; + } + } else { + if (inst->op == cvtsi2ss) { + info->op1.type = fex_int; + info->op1.val.i = inst->op2->i[0]; + info->op2.type = fex_nodata; + } else if (inst->op == cvtsi2ssq) { + info->op1.type = fex_llong; + info->op1.val.l = inst->op2->l[0]; + info->op2.type = fex_nodata; + } else if (inst->op == sqrtss || inst->op == cvtss2sd || + inst->op == cvttss2si || inst->op == cvtss2si || + inst->op == cvttss2siq || inst->op == cvtss2siq) { + info->op1.type = fex_float; + info->op1.val.f = inst->op2->f[0]; + info->op2.type = fex_nodata; + } else { + info->op1.type = fex_float; + info->op1.val.f = inst->op1->f[0]; + info->op2.type = fex_float; + info->op2.val.f = inst->op2->f[0]; + } + info->res.type = fex_float; + switch (inst->op) { + case cmpss: + info->op = fex_cmp; + info->res.type = fex_int; + switch (inst->imm & 3) { + case 0: + sse_cmpeqss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.i); + break; + + case 1: + sse_cmpltss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.i); + break; + + case 2: + sse_cmpless(&info->op1.val.f, &info->op2.val.f, + &info->res.val.i); + break; + + case 3: + sse_cmpunordss(&info->op1.val.f, + &info->op2.val.f, &info->res.val.i); + } + if (inst->imm & 4) + info->res.val.i ^= 0xffffffffu; + break; + + case minss: + info->op = fex_other; + sse_minss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.f); + break; + + case maxss: + info->op = fex_other; + sse_maxss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.f); + break; + + case addss: + info->op = fex_add; + sse_addss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.f); + if (my_fp_classf(&info->res.val.f) == fp_subnormal) + subnorm = 1; + break; + + case subss: + info->op = fex_sub; + sse_subss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.f); + if (my_fp_classf(&info->res.val.f) == fp_subnormal) + subnorm = 1; + break; + + case mulss: + info->op = fex_mul; + sse_mulss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.f); + if (my_fp_classf(&info->res.val.f) == fp_subnormal) + subnorm = 1; + break; + + case divss: + info->op = fex_div; + sse_divss(&info->op1.val.f, &info->op2.val.f, + &info->res.val.f); + if (my_fp_classf(&info->res.val.f) == fp_subnormal) + subnorm = 1; + break; + + case sqrtss: + info->op = fex_sqrt; + sse_sqrtss(&info->op1.val.f, &info->res.val.f); + break; + + case cvtss2sd: + info->op = fex_cnvt; + info->res.type = fex_double; + sse_cvtss2sd(&info->op1.val.f, &info->res.val.d); + break; + + case cvtsi2ss: + info->op = fex_cnvt; + sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f); + break; + + case cvttss2si: + info->op = fex_cnvt; + info->res.type = fex_int; + sse_cvttss2si(&info->op1.val.f, &info->res.val.i); + break; + + case cvtss2si: + info->op = fex_cnvt; + info->res.type = fex_int; + sse_cvtss2si(&info->op1.val.f, &info->res.val.i); + break; + +#ifdef __amd64 + case cvtsi2ssq: + info->op = fex_cnvt; + sse_cvtsi2ssq(&info->op1.val.l, &info->res.val.f); + break; + + case cvttss2siq: + info->op = fex_cnvt; + info->res.type = fex_llong; + sse_cvttss2siq(&info->op1.val.f, &info->res.val.l); + break; + + case cvtss2siq: + info->op = fex_cnvt; + info->res.type = fex_llong; + sse_cvtss2siq(&info->op1.val.f, &info->res.val.l); + break; +#endif + + case ucomiss: + info->op = fex_cmp; + info->res.type = fex_nodata; + sse_ucomiss(&info->op1.val.f, &info->op2.val.f); + break; + + case comiss: + info->op = fex_cmp; + info->res.type = fex_nodata; + sse_comiss(&info->op1.val.f, &info->op2.val.f); + break; + } + } + __fenv_getmxcsr(&mxcsr); + info->flags = mxcsr & 0x3d; + __fenv_setmxcsr(&oldmxcsr); + + /* determine which exception would have been trapped */ + te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr + >> 7) & 0x3d; + e = mxcsr & te; + if (e & FE_INVALID) + return __fex_get_sse_invalid_type(inst); + if (e & FE_DIVBYZERO) + return fex_division; + if (e & FE_OVERFLOW) + return fex_overflow; + if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW))) + return fex_underflow; + if (e & FE_INEXACT) + return fex_inexact; + return (enum fex_exception)-1; +} + +/* + * Emulate a SIMD SSE instruction to determine which exceptions occur + * in each part. For i = 0, 1, 2, and 3, set e[i] to indicate the + * trapped exception that would occur if the i-th part of the SIMD + * instruction were executed in isolation; set e[i] to -1 if no + * trapped exception would occur in this part. Also fill in info[i] + * with the corresponding operands, default untrapped result, and + * flags. + * + * This routine does not work if the instruction specified by *inst + * is not a SIMD instruction. + */ +void +__fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e, + fex_info_t *info) +{ + sseinst_t dummy; + int i; + + e[0] = e[1] = e[2] = e[3] = -1; + + /* perform each part of the SIMD operation */ + switch (inst->op) { + case cmpps: + dummy.op = cmpss; + dummy.imm = inst->imm; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case minps: + dummy.op = minss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case maxps: + dummy.op = maxss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case addps: + dummy.op = addss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case subps: + dummy.op = subss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case mulps: + dummy.op = mulss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case divps: + dummy.op = divss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case sqrtps: + dummy.op = sqrtss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtdq2ps: + dummy.op = cvtsi2ss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvttps2dq: + dummy.op = cvttss2si; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtps2dq: + dummy.op = cvtss2si; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtpi2ps: + dummy.op = cvtsi2ss; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvttps2pi: + dummy.op = cvttss2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtps2pi: + dummy.op = cvtss2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cmppd: + dummy.op = cmpsd; + dummy.imm = inst->imm; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case minpd: + dummy.op = minsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case maxpd: + dummy.op = maxsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case addpd: + dummy.op = addsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case subpd: + dummy.op = subsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case mulpd: + dummy.op = mulsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case divpd: + dummy.op = divsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case sqrtpd: + dummy.op = sqrtsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtpi2pd: + case cvtdq2pd: + dummy.op = cvtsi2sd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvttpd2pi: + case cvttpd2dq: + dummy.op = cvttsd2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtpd2pi: + case cvtpd2dq: + dummy.op = cvtsd2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtps2pd: + dummy.op = cvtss2sd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + break; + + case cvtpd2ps: + dummy.op = cvtsd2ss; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); + } + } +} + +/* + * Store the result value from *info in the destination of the scalar + * SSE instruction specified by *inst. If no result is given but the + * exception is underflow or overflow, supply the default trapped result. + * + * This routine does not work if the instruction specified by *inst + * is not a scalar instruction. + */ +void +__fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e, + fex_info_t *info) +{ + int i; + long long l; + float f, fscl; + double d, dscl; + + /* for compares that write eflags, just set the flags + to indicate "unordered" */ + if (inst->op == ucomiss || inst->op == comiss || + inst->op == ucomisd || inst->op == comisd) { + uap->uc_mcontext.gregs[REG_PS] |= 0x45; + return; + } + + /* if info doesn't specify a result value, try to generate + the default trapped result */ + if (info->res.type == fex_nodata) { + /* set scale factors for exponent wrapping */ + switch (e) { + case fex_overflow: + fscl = 1.262177448e-29f; /* 2^-96 */ + dscl = 6.441148769597133308e-232; /* 2^-768 */ + break; + + case fex_underflow: + fscl = 7.922816251e+28f; /* 2^96 */ + dscl = 1.552518092300708935e+231; /* 2^768 */ + break; + + default: + (void) __fex_get_sse_op(uap, inst, info); + if (info->res.type == fex_nodata) + return; + goto stuff; + } + + /* generate the wrapped result */ + if (inst->op == cvtsd2ss) { + info->op1.type = fex_double; + info->op1.val.d = inst->op2->d[0]; + info->op2.type = fex_nodata; + info->res.type = fex_float; + info->res.val.f = (float)(fscl * (fscl * + info->op1.val.d)); + } else if ((int)inst->op & DOUBLE) { + info->op1.type = fex_double; + info->op1.val.d = inst->op1->d[0]; + info->op2.type = fex_double; + info->op2.val.d = inst->op2->d[0]; + info->res.type = fex_double; + switch (inst->op) { + case addsd: + info->res.val.d = dscl * (dscl * + info->op1.val.d + dscl * info->op2.val.d); + break; + + case subsd: + info->res.val.d = dscl * (dscl * + info->op1.val.d - dscl * info->op2.val.d); + break; + + case mulsd: + info->res.val.d = (dscl * info->op1.val.d) * + (dscl * info->op2.val.d); + break; + + case divsd: + info->res.val.d = (dscl * info->op1.val.d) / + (info->op2.val.d / dscl); + break; + + default: + return; + } + } else { + info->op1.type = fex_float; + info->op1.val.f = inst->op1->f[0]; + info->op2.type = fex_float; + info->op2.val.f = inst->op2->f[0]; + info->res.type = fex_float; + switch (inst->op) { + case addss: + info->res.val.f = fscl * (fscl * + info->op1.val.f + fscl * info->op2.val.f); + break; + + case subss: + info->res.val.f = fscl * (fscl * + info->op1.val.f - fscl * info->op2.val.f); + break; + + case mulss: + info->res.val.f = (fscl * info->op1.val.f) * + (fscl * info->op2.val.f); + break; + + case divss: + info->res.val.f = (fscl * info->op1.val.f) / + (info->op2.val.f / fscl); + break; + + default: + return; + } + } + } + + /* put the result in the destination */ +stuff: + if (inst->op == cmpss || inst->op == cvttss2si || inst->op == cvtss2si + || inst->op == cvttsd2si || inst->op == cvtsd2si) { + switch (info->res.type) { + case fex_int: + i = info->res.val.i; + break; + + case fex_llong: + i = info->res.val.l; + break; + + case fex_float: + i = info->res.val.f; + break; + + case fex_double: + i = info->res.val.d; + break; + + case fex_ldouble: + i = info->res.val.q; + break; + } + inst->op1->i[0] = i; + } else if (inst->op == cmpsd || inst->op == cvttss2siq || + inst->op == cvtss2siq || inst->op == cvttsd2siq || + inst->op == cvtsd2siq) { + switch (info->res.type) { + case fex_int: + l = info->res.val.i; + break; + + case fex_llong: + l = info->res.val.l; + break; + + case fex_float: + l = info->res.val.f; + break; + + case fex_double: + l = info->res.val.d; + break; + + case fex_ldouble: + l = info->res.val.q; + break; + } + inst->op1->l[0] = l; + } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) || + inst->op == cvtss2sd) { + switch (info->res.type) { + case fex_int: + d = info->res.val.i; + break; + + case fex_llong: + d = info->res.val.l; + break; + + case fex_float: + d = info->res.val.f; + break; + + case fex_double: + d = info->res.val.d; + break; + + case fex_ldouble: + d = info->res.val.q; + break; + } + inst->op1->d[0] = d; + } else { + switch (info->res.type) { + case fex_int: + f = info->res.val.i; + break; + + case fex_llong: + f = info->res.val.l; + break; + + case fex_float: + f = info->res.val.f; + break; + + case fex_double: + f = info->res.val.d; + break; + + case fex_ldouble: + f = info->res.val.q; + break; + } + inst->op1->f[0] = f; + } +} + +/* + * Store the results from a SIMD instruction. For each i, store + * the result value from info[i] in the i-th part of the destination + * of the SIMD SSE instruction specified by *inst. If no result + * is given but the exception indicated by e[i] is underflow or + * overflow, supply the default trapped result. + * + * This routine does not work if the instruction specified by *inst + * is not a SIMD instruction. + */ +void +__fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e, + fex_info_t *info) +{ + sseinst_t dummy; + int i; + + /* store each part */ + switch (inst->op) { + case cmpps: + dummy.op = cmpss; + dummy.imm = inst->imm; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case minps: + dummy.op = minss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case maxps: + dummy.op = maxss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case addps: + dummy.op = addss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case subps: + dummy.op = subss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case mulps: + dummy.op = mulss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case divps: + dummy.op = divss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case sqrtps: + dummy.op = sqrtss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvtdq2ps: + dummy.op = cvtsi2ss; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvttps2dq: + dummy.op = cvttss2si; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvtps2dq: + dummy.op = cvtss2si; + for (i = 0; i < 4; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvtpi2ps: + dummy.op = cvtsi2ss; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvttps2pi: + dummy.op = cvttss2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvtps2pi: + dummy.op = cvtss2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cmppd: + dummy.op = cmpsd; + dummy.imm = inst->imm; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case minpd: + dummy.op = minsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case maxpd: + dummy.op = maxsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case addpd: + dummy.op = addsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case subpd: + dummy.op = subsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case mulpd: + dummy.op = mulsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case divpd: + dummy.op = divsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case sqrtpd: + dummy.op = sqrtsd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvtpi2pd: + case cvtdq2pd: + dummy.op = cvtsi2sd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvttpd2pi: + case cvttpd2dq: + dummy.op = cvttsd2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + /* for cvttpd2dq, zero the high 64 bits of the destination */ + if (inst->op == cvttpd2dq) + inst->op1->l[1] = 0ll; + break; + + case cvtpd2pi: + case cvtpd2dq: + dummy.op = cvtsd2si; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + /* for cvtpd2dq, zero the high 64 bits of the destination */ + if (inst->op == cvtpd2dq) + inst->op1->l[1] = 0ll; + break; + + case cvtps2pd: + dummy.op = cvtss2sd; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + break; + + case cvtpd2ps: + dummy.op = cvtsd2ss; + for (i = 0; i < 2; i++) { + dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; + dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; + __fex_st_sse_result(uap, &dummy, e[i], &info[i]); + } + /* zero the high 64 bits of the destination */ + inst->op1->l[1] = 0ll; + } +} diff --git a/usr/src/libm/src/m9x/__fex_sym.c b/usr/src/libm/src/m9x/__fex_sym.c new file mode 100644 index 0000000..7942493 --- /dev/null +++ b/usr/src/libm/src/m9x/__fex_sym.c @@ -0,0 +1,306 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)__fex_sym.c 1.7 06/01/31 SMI" + +#include "fenv_synonyms.h" +#include <elf.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <procfs.h> +#include <string.h> +#include <sys/stat.h> + +#if defined(__sparcv9) || defined(__amd64) + +#define Elf_Ehdr Elf64_Ehdr +#define Elf_Phdr Elf64_Phdr +#define Elf_Shdr Elf64_Shdr +#define Elf_Sym Elf64_Sym +#define ELF_ST_BIND ELF64_ST_BIND +#define ELF_ST_TYPE ELF64_ST_TYPE + +#else + +#define Elf_Ehdr Elf32_Ehdr +#define Elf_Phdr Elf32_Phdr +#define Elf_Shdr Elf32_Shdr +#define Elf_Sym Elf32_Sym +#define ELF_ST_BIND ELF32_ST_BIND +#define ELF_ST_TYPE ELF32_ST_TYPE + +#endif /* __sparcv9 */ + +/* semi-permanent data established by __fex_sym_init */ +static prmap_t *pm = NULL; /* prmap_t array */ +static int npm = 0; /* number of entries in pm */ + +/* transient data modified by __fex_sym */ +static prmap_t *lpm = NULL; /* prmap_t found in last call */ +static Elf_Phdr *ph = NULL; /* program header array */ +static int phsize = 0; /* size of ph */ +static int nph; /* number of entries in ph */ +static char *stbuf = NULL; /* symbol and string table buffer */ +static int stbufsize = 0; /* size of stbuf */ +static int stoffset; /* offset of string table in stbuf */ +static int nsyms; /* number of symbols in stbuf */ + +/* get a current prmap_t list (must call this before each stack trace) */ +void +__fex_sym_init() +{ + struct stat statbuf; + long n; + int i; + + /* clear out the previous prmap_t list */ + if (pm != NULL) + free(pm); + pm = lpm = NULL; + npm = 0; + + /* get the current prmap_t list */ + if (stat("/proc/self/map", &statbuf) < 0 || statbuf.st_size <= 0 || + (pm = (prmap_t*)malloc(statbuf.st_size)) == NULL) + return; + if ((i = open("/proc/self/map", O_RDONLY)) < 0) + { + free(pm); + pm = NULL; + return; + } + n = read(i, pm, statbuf.st_size); + close(i); + if (n != statbuf.st_size) + { + free(pm); + pm = NULL; + } + else + npm = (int) (n / sizeof(prmap_t)); +} + +/* read ELF program headers and symbols; return -1 on error, 0 otherwise */ +static int +__fex_read_syms(int fd) +{ + Elf_Ehdr h; + Elf_Shdr *sh; + int i, size; + + /* read the ELF header */ + if (read(fd, &h, sizeof(h)) != sizeof(h)) + return -1; + if (h.e_ident[EI_MAG0] != ELFMAG0 || + h.e_ident[EI_MAG1] != ELFMAG1 || + h.e_ident[EI_MAG2] != ELFMAG2 || + h.e_ident[EI_MAG3] != ELFMAG3 || + h.e_phentsize != sizeof(Elf_Phdr) || + h.e_shentsize != sizeof(Elf_Shdr)) + return -1; + + /* get space for the program headers */ + size = h.e_phnum * h.e_phentsize; + if (size > phsize) + { + if (ph) + free(ph); + phsize = nph = 0; + if ((ph = (Elf_Phdr*)malloc(size)) == NULL) + return -1; + phsize = size; + } + + /* read the program headers */ + if (lseek(fd, h.e_phoff, SEEK_SET) != h.e_phoff || + read(fd, ph, size) != (ssize_t)size) + { + nph = 0; + return -1; + } + nph = h.e_phnum; + + /* read the section headers */ + size = h.e_shnum * h.e_shentsize; + if ((sh = (Elf_Shdr*)malloc(size)) == NULL) + return -1; + if (lseek(fd, h.e_shoff, SEEK_SET) != h.e_shoff || + read(fd, sh, size) != (ssize_t)size) + { + free(sh); + return -1; + } + + /* find the symtab section header */ + for (i = 0; i < h.e_shnum; i++) + { + if (sh[i].sh_type == SHT_SYMTAB) + break; /* assume there is only one */ + } + if (i == h.e_shnum || sh[i].sh_size == 0 || + sh[i].sh_entsize != sizeof(Elf_Sym) || + sh[i].sh_link < 1 || sh[i].sh_link >= h.e_shnum || + sh[sh[i].sh_link].sh_type != SHT_STRTAB || + sh[sh[i].sh_link].sh_size == 0) + { + free(sh); + return -1; + } + + /* get space for the symbol and string tables */ + size = (int) (sh[i].sh_size + sh[sh[i].sh_link].sh_size); + if (size > stbufsize) + { + if (stbuf) + free(stbuf); + stbufsize = nsyms = 0; + if ((stbuf = (char*)malloc(size)) == NULL) + { + free(sh); + return -1; + } + stbufsize = size; + } + + /* read the symbol and string tables */ + if (lseek(fd, sh[i].sh_offset, SEEK_SET) != sh[i].sh_offset || + read(fd, stbuf, sh[i].sh_size) != sh[i].sh_size || + lseek(fd, sh[sh[i].sh_link].sh_offset, SEEK_SET) != + sh[sh[i].sh_link].sh_offset || + read(fd, stbuf + sh[i].sh_size, sh[sh[i].sh_link].sh_size) != + sh[sh[i].sh_link].sh_size) + { + free(sh); + return -1; + } + nsyms = (int) (sh[i].sh_size / sh[i].sh_entsize); + stoffset = (int) sh[i].sh_size; + + free(sh); + return 0; +} + +/* find the symbol corresponding to the given text address; + return NULL on error, symbol address otherwise */ +char * +__fex_sym(char *a, char **name) +{ + Elf_Sym *s; + unsigned long fo, va, value; + int fd, i, j, nm; + char fname[PRMAPSZ+20]; + + /* see if the last prmap_t found contains the indicated address */ + if (lpm) + { + if (a >= (char*)lpm->pr_vaddr && a < (char*)lpm->pr_vaddr + + lpm->pr_size) + goto cont; + } + + /* look for a prmap_t that contains the indicated address */ + for (i = 0; i < npm; i++) + { + if (a >= (char*)pm[i].pr_vaddr && a < (char*)pm[i].pr_vaddr + + pm[i].pr_size) + break; + } + if (i == npm) + return NULL; + + /* get an open file descriptor for the mapped object */ + if (pm[i].pr_mapname[0] == '\0') + return NULL; + strcpy(fname, "/proc/self/object/"); + strncat(fname, pm[i].pr_mapname, PRMAPSZ); + fd = open(fname, O_RDONLY); + if (fd < 0) + return NULL; + + /* read the program headers and symbols */ + lpm = NULL; + j = __fex_read_syms(fd); + close(fd); + if (j < 0) + return NULL; + lpm = &pm[i]; + +cont: + /* compute the file offset corresponding to the mapped address */ + fo = (a - (char*)lpm->pr_vaddr) + lpm->pr_offset; + + /* find the program header containing the file offset */ + for (i = 0; i < nph; i++) + { + if (ph[i].p_type == PT_LOAD && fo >= ph[i].p_offset && + fo < ph[i].p_offset + ph[i].p_filesz) + break; + } + if (i == nph) + return NULL; + + /* compute the virtual address corresponding to the file offset */ + va = (fo - ph[i].p_offset) + ph[i].p_vaddr; + + /* find the symbol in this segment with the highest value + less than or equal to the virtual address */ + s = (Elf_Sym*)stbuf; + value = nm = 0; + for (j = 0; j < nsyms; j++) + { + if (s[j].st_name == 0 || s[j].st_shndx == SHN_UNDEF || + (ELF_ST_BIND(s[j].st_info) != STB_LOCAL && + ELF_ST_BIND(s[j].st_info) != STB_GLOBAL && + ELF_ST_BIND(s[j].st_info) != STB_WEAK) || + (ELF_ST_TYPE(s[j].st_info) != STT_NOTYPE && + ELF_ST_TYPE(s[j].st_info) != STT_OBJECT && + ELF_ST_TYPE(s[j].st_info) != STT_FUNC)) + { + continue; + } + + if (s[j].st_value < ph[i].p_vaddr || s[j].st_value >= ph[i].p_vaddr + + ph[i].p_memsz) + { + continue; + } + + if (s[j].st_value < value || s[j].st_value > va) + continue; + + value = s[j].st_value; + nm = s[j].st_name; + } + if (nm == 0) + return NULL; + + /* pass back the name and return the mapped address of the symbol */ + *name = stbuf + stoffset + nm; + fo = (value - ph[i].p_vaddr) + ph[i].p_offset; + return (char*)lpm->pr_vaddr + (fo - lpm->pr_offset); +} diff --git a/usr/src/libm/src/m9x/fdim.c b/usr/src/libm/src/m9x/fdim.c new file mode 100644 index 0000000..5f888e1 --- /dev/null +++ b/usr/src/libm/src/m9x/fdim.c @@ -0,0 +1,55 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fdim.c 1.6 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fdim = __fdim +#endif + +/* + * fdim(x,y) returns x - y if x > y, +0 if x <= y, and NaN if x and + * y are unordered. + * + * fdim(x,y) raises overflow or inexact if x > y and x - y overflows + * or is inexact. It raises invalid if either operand is a signaling + * NaN. Otherwise, it raises no exceptions. + */ + +#include "libm.h" /* for islessequal macro */ + +double +__fdim(double x, double y) { +#if defined(COMPARISON_MACRO_BUG) + if (x == x && y == y && x <= y) { /* } */ +#else + if (islessequal(x, y)) { +#endif + x = 0.0; + y = -x; + } + return (x - y); +} diff --git a/usr/src/libm/src/m9x/fdimf.c b/usr/src/libm/src/m9x/fdimf.c new file mode 100644 index 0000000..84f56e5 --- /dev/null +++ b/usr/src/libm/src/m9x/fdimf.c @@ -0,0 +1,58 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fdimf.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fdimf = __fdimf +#endif + +#include "libm.h" /* for islessequal macro */ + +float +__fdimf(float x, float y) { + /* + * On SPARC v8plus/v9, this could be implemented as follows + * (assuming %f0 = x, %f1 = y, return value left in %f0): + * + * fcmps %fcc0,%f0,%f1 + * st %g0,[scratch] ! use fzero instead of st/ld + * ld [scratch],%f2 ! if VIS is available + * fnegs %f2,%f3 + * fmovsle %fcc0,%f2,%f0 + * fmovsle %fcc0,%f3,%f1 + * fsubs %f0,%f1,%f0 + */ +#if defined(COMPARISON_MACRO_BUG) + if (x == x && y == y && x <= y) { /* } */ +#else + if (islessequal(x, y)) { +#endif + x = 0.0f; + y = -x; + } + return (x - y); +} diff --git a/usr/src/libm/src/m9x/fdiml.c b/usr/src/libm/src/m9x/fdiml.c new file mode 100644 index 0000000..3fffdc4 --- /dev/null +++ b/usr/src/libm/src/m9x/fdiml.c @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fdiml.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fdiml = __fdiml +#endif + +#include "libm.h" /* for islessequal macro */ + +long double +__fdiml(long double x, long double y) { +#if defined(COMPARISON_MACRO_BUG) + if (x == x && y == y && x <= y) { +#else + if (islessequal(x, y)) { +#endif + x = 0.0l; + y = -x; + } + return (x - y); +} diff --git a/usr/src/libm/src/m9x/feexcept.c b/usr/src/libm/src/m9x/feexcept.c new file mode 100644 index 0000000..c4979f1 --- /dev/null +++ b/usr/src/libm/src/m9x/feexcept.c @@ -0,0 +1,135 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)feexcept.c 1.8 06/01/31 SMI" + +#pragma weak feclearexcept = __feclearexcept +#pragma weak feraiseexcept = __feraiseexcept +#pragma weak fetestexcept = __fetestexcept +#pragma weak fegetexceptflag = __fegetexceptflag +#pragma weak fesetexceptflag = __fesetexceptflag + +#pragma weak feclearexcept96 = __feclearexcept +#pragma weak feraiseexcept96 = __feraiseexcept +#pragma weak fetestexcept96 = __fetestexcept +#pragma weak fegetexceptflag96 = __fegetexceptflag +#pragma weak fesetexceptflag96 = __fesetexceptflag + +#include "fenv_synonyms.h" +#include <fenv.h> +#include <sys/ieeefp.h> +#include <ucontext.h> +#include <thread.h> +#include "fex_handler.h" + +int feclearexcept(int e) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + __fenv_set_ex(fsr, __fenv_get_ex(fsr) & ~e); + __fenv_setfsr(&fsr); + if (fex_get_log()) + __fex_update_te(); + return 0; +} + +/* +* note - __fex_hdlr depends on fetestexcept following feraiseexcept +*/ +int feraiseexcept(int e) +{ + volatile double t; + unsigned long fsr; + + if (e & FE_INVALID) { + t = 0.0; + t /= 0.0; + } + if (e & FE_DIVBYZERO) { + t = 1.0e300; + t /= 0.0; + } + if (e & FE_OVERFLOW) { + /* if overflow is not trapped, avoid raising inexact */ + __fenv_getfsr(&fsr); + if (!(__fenv_get_te(fsr) & (1 << fp_trap_overflow))) { + __fenv_set_ex(fsr, __fenv_get_ex(fsr) | FE_OVERFLOW); + __fenv_setfsr(&fsr); + } + else { + t = 1.0e300; + t *= 1.0e300; + } + } + if (e & FE_UNDERFLOW) { + /* if underflow is not trapped, avoid raising inexact */ + __fenv_getfsr(&fsr); + if (!(__fenv_get_te(fsr) & (1 << fp_trap_underflow))) { + __fenv_set_ex(fsr, __fenv_get_ex(fsr) | FE_UNDERFLOW); + __fenv_setfsr(&fsr); + } + else { + t = 1.0e-307; + t -= 1.001e-307; + } + } + if (e & FE_INEXACT) { + t = 1.0e300; + t += 1.0e-307; + } + return 0; +} + +int fetestexcept(int e) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + return (int)__fenv_get_ex(fsr) & e; +} + +int fegetexceptflag(fexcept_t *p, int e) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + *p = (int)__fenv_get_ex(fsr) & e; + return 0; +} + +int fesetexceptflag(const fexcept_t *p, int e) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + __fenv_set_ex(fsr, (((int)__fenv_get_ex(fsr) & ~e) | (*p & e)) & + FE_ALL_EXCEPT); + __fenv_setfsr(&fsr); + if (fex_get_log()) + __fex_update_te(); + return 0; +} diff --git a/usr/src/libm/src/m9x/fenv.c b/usr/src/libm/src/m9x/fenv.c new file mode 100644 index 0000000..0054871 --- /dev/null +++ b/usr/src/libm/src/m9x/fenv.c @@ -0,0 +1,116 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fenv.c 1.9 06/01/31 SMI" + +#pragma weak fex_merge_flags = __fex_merge_flags + +#pragma weak feholdexcept = __feholdexcept +#pragma weak feupdateenv = __feupdateenv +#pragma weak fegetenv = __fegetenv +#pragma weak fesetenv = __fesetenv + +#pragma weak feholdexcept96 = __feholdexcept96 +#pragma weak feupdateenv96 = __feupdateenv +#pragma weak fegetenv96 = __fegetenv +#pragma weak fesetenv96 = __fesetenv + +#include "fenv_synonyms.h" +#include <fenv.h> +#include <ucontext.h> +#include <thread.h> +#include "fex_handler.h" + +const fenv_t __fenv_dfl_env = { + { + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + { FEX_NONSTOP, (void(*)())0 }, + }, +#ifdef __i386 + 0x13000000 +#else + 0 +#endif +}; + +int feholdexcept(fenv_t *p) +{ + (void) fegetenv(p); + (void) feclearexcept(FE_ALL_EXCEPT); + return !fex_set_handling(FEX_ALL, FEX_NONSTOP, NULL); +} + +int feholdexcept96(fenv_t *p) +{ + (void) fegetenv(p); + (void) feclearexcept(FE_ALL_EXCEPT); + return fex_set_handling(FEX_ALL, FEX_NONSTOP, NULL); +} + +int feupdateenv(const fenv_t *p) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + (void) fesetenv(p); + (void) feraiseexcept((int)__fenv_get_ex(fsr)); + return 0; +} + +int fegetenv(fenv_t *p) +{ + fex_getexcepthandler(&p->__handlers, FEX_ALL); + __fenv_getfsr(&p->__fsr); + return 0; +} + +int fesetenv(const fenv_t *p) +{ + __fenv_setfsr(&p->__fsr); + fex_setexcepthandler(&p->__handlers, FEX_ALL); + return 0; +} + +void fex_merge_flags(const fenv_t *p) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + __fenv_set_ex(fsr, __fenv_get_ex(fsr) | __fenv_get_ex(p->__fsr)); + __fenv_setfsr(&fsr); + if (fex_get_log()) + __fex_update_te(); +} diff --git a/usr/src/libm/src/m9x/fenv_synonyms.h b/usr/src/libm/src/m9x/fenv_synonyms.h new file mode 100644 index 0000000..14b32fe --- /dev/null +++ b/usr/src/libm/src/m9x/fenv_synonyms.h @@ -0,0 +1,100 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fenv_synonyms.h 1.7 06/01/31 SMI" + +/* feexcept.c */ +#define feclearexcept __feclearexcept +#define feraiseexcept __feraiseexcept +#define fetestexcept __fetestexcept +#define fegetexceptflag __fegetexceptflag +#define fesetexceptflag __fesetexceptflag + +/* fenv.c */ +#define feholdexcept __feholdexcept +#define feholdexcept96 __feholdexcept96 +#define feupdateenv __feupdateenv +#define fegetenv __fegetenv +#define fesetenv __fesetenv +#define fex_merge_flags __fex_merge_flags + +#ifdef __i386 +/* feprec.c */ +#define fegetprec __fegetprec +#define fesetprec __fesetprec +#endif + +/* feround.c */ +#define fegetround __fegetround +#define fesetround __fesetround +#define fesetround96 __fesetround96 + +/* fex_handler.c */ +#define fex_get_handling __fex_get_handling +#define fex_set_handling __fex_set_handling +#define fex_getexcepthandler __fex_getexcepthandler +#define fex_setexcepthandler __fex_setexcepthandler + +/* fex_log.c */ +#define fex_get_log __fex_get_log +#define fex_set_log __fex_set_log +#define fex_get_log_depth __fex_get_log_depth +#define fex_set_log_depth __fex_set_log_depth +#define fex_log_entry __fex_log_entry + +/* libc, libthread */ +#define close _close +#define getcontext _getcontext +#define getpid _getpid +#define kill _kill +#define lseek _lseek +#define mutex_lock _mutex_lock +#define mutex_unlock _mutex_unlock +#define open _open +#define read _read +#define sigaction _sigaction +#define sigemptyset _sigemptyset +#define sigismember _sigismember +#define sigprocmask _sigprocmask +#define stat _stat +#define thr_getspecific _thr_getspecific +#define thr_keycreate _thr_keycreate +#define thr_main _thr_main +#define thr_setspecific _thr_setspecific +#define write _write + +/* ??? see V9 /usr/include/stdio.h */ +#ifdef __sparcv9 +#define fileno _fileno +#endif + +#ifdef __sparc +/* libm, libsunmath */ +#define fp_class __fp_class +#define fp_classf __fp_classf +#define sqrt __sqrt +#define sqrtf __sqrtf +#endif diff --git a/usr/src/libm/src/m9x/feprec.c b/usr/src/libm/src/m9x/feprec.c new file mode 100644 index 0000000..56a64e2 --- /dev/null +++ b/usr/src/libm/src/m9x/feprec.c @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)feprec.c 1.5 06/01/31 SMI" + +#pragma weak fegetprec = __fegetprec +#pragma weak fesetprec = __fesetprec + +#include "fenv_synonyms.h" +#include <fenv.h> +#include <ucontext.h> +#include <thread.h> +#include "fex_handler.h" + +int fegetprec(void) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + return __fenv_get_rp(fsr); +} + +int fesetprec(int r) +{ + unsigned long fsr; + + if (r != FE_FLTPREC && r != FE_DBLPREC && r != FE_LDBLPREC) + return 0; + __fenv_getfsr(&fsr); + __fenv_set_rp(fsr, r); + __fenv_setfsr(&fsr); + return 1; +} diff --git a/usr/src/libm/src/m9x/feround.c b/usr/src/libm/src/m9x/feround.c new file mode 100644 index 0000000..2f0bc99 --- /dev/null +++ b/usr/src/libm/src/m9x/feround.c @@ -0,0 +1,81 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)feround.c 1.9 06/01/31 SMI" + +#pragma weak fegetround = __fegetround +#pragma weak fesetround = __fesetround + +#pragma weak fegetround96 = __fegetround +#pragma weak fesetround96 = __fesetround96 + +#include "fenv_synonyms.h" +#include <fenv.h> +#include <ucontext.h> +#include <thread.h> +#include "fex_handler.h" + +#if defined(__i386) && !defined(__amd64) +#include <float.h> +#endif + +int fegetround(void) +{ + unsigned long fsr; + + __fenv_getfsr(&fsr); + return (int)__fenv_get_rd(fsr); +} + +int fesetround(int r) +{ + unsigned long fsr; + + if (r & ~3) + return -1; + __fenv_getfsr(&fsr); + __fenv_set_rd(fsr, r); + __fenv_setfsr(&fsr); +#if defined(__i386) && !defined(__amd64) + FLT_ROUNDS = (0x2D >> (r << 1)) & 3; /* 0->1, 1->3, 2->2, 3->0 */ +#endif + return 0; +} + +int fesetround96(int r) +{ + unsigned long fsr; + + if (r & ~3) + return 0; + __fenv_getfsr(&fsr); + __fenv_set_rd(fsr, r); + __fenv_setfsr(&fsr); +#if defined(__i386) && !defined(__amd64) + FLT_ROUNDS = (0x2D >> (r << 1)) & 3; /* 0->1, 1->3, 2->2, 3->0 */ +#endif + return 1; +} diff --git a/usr/src/libm/src/m9x/fex_handler.c b/usr/src/libm/src/m9x/fex_handler.c new file mode 100644 index 0000000..3491e1c --- /dev/null +++ b/usr/src/libm/src/m9x/fex_handler.c @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fex_handler.c 1.5 06/01/31 SMI" + +#pragma weak fex_get_handling = __fex_get_handling +#pragma weak fex_set_handling = __fex_set_handling +#pragma weak fex_getexcepthandler = __fex_getexcepthandler +#pragma weak fex_setexcepthandler = __fex_setexcepthandler + +#include "fenv_synonyms.h" +#include <fenv.h> +#include <ucontext.h> +#include <thread.h> +#include "fex_handler.h" + +int fex_get_handling(int e) +{ + struct fex_handler_data *thr_handlers; + int i; + + thr_handlers = __fex_get_thr_handlers(); + for (i = 0; i < FEX_NUM_EXC; i++) + if (e & (1 << i)) + return thr_handlers[i].__mode; + return FEX_NOHANDLER; +} + +int fex_set_handling(int e, int mode, void (*handler)()) +{ + struct fex_handler_data *thr_handlers; + int i; + + if (e & ~((1 << FEX_NUM_EXC) - 1)) + return 0; + thr_handlers = __fex_get_thr_handlers(); + for (i = 0; i < FEX_NUM_EXC; i++) { + if (e & (1 << i)) { + thr_handlers[i].__mode = mode; + thr_handlers[i].__handler = handler; + } + } + __fex_update_te(); + return 1; +} + +void fex_getexcepthandler(fex_handler_t *buf, int e) +{ + struct fex_handler_data *thr_handlers; + int i; + + thr_handlers = __fex_get_thr_handlers(); + for (i = 0; i < FEX_NUM_EXC; i++) + if (e & (1 << i)) + (*buf)[i] = thr_handlers[i]; +} + +void fex_setexcepthandler(const fex_handler_t *buf, int e) +{ + struct fex_handler_data *thr_handlers; + int i; + + thr_handlers = __fex_get_thr_handlers(); + for (i = 0; i < FEX_NUM_EXC; i++) + if (e & (1 << i)) + thr_handlers[i] = (*buf)[i]; + __fex_update_te(); +} diff --git a/usr/src/libm/src/m9x/fex_handler.h b/usr/src/libm/src/m9x/fex_handler.h new file mode 100644 index 0000000..9f8c259 --- /dev/null +++ b/usr/src/libm/src/m9x/fex_handler.h @@ -0,0 +1,215 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fex_handler.h 1.8 06/01/31 SMI" + +/* the following enums must match the bit positions in fenv.h */ +enum fex_exception { + fex_inexact = 0, + fex_division = 1, + fex_underflow = 2, + fex_overflow = 3, + fex_inv_zdz = 4, + fex_inv_idi = 5, + fex_inv_isi = 6, + fex_inv_zmi = 7, + fex_inv_sqrt = 8, + fex_inv_snan = 9, + fex_inv_int = 10, + fex_inv_cmp = 11 +}; + + +/* auxiliary functions in __fex_hdlr.c */ +extern struct fex_handler_data *__fex_get_thr_handlers(void); +extern void __fex_update_te(void); + +/* auxiliary functions in __fex_sym.c */ +extern void __fex_sym_init(void); +extern char *__fex_sym(char *, char **); + +/* auxiliary functions in fex_log.c */ +extern void __fex_mklog(ucontext_t *, char *, int, enum fex_exception, + int, void *); + +/* system-dependent auxiliary functions */ +extern enum fex_exception __fex_get_invalid_type(siginfo_t *, ucontext_t *); +extern void __fex_get_op(siginfo_t *, ucontext_t *, fex_info_t *); +extern void __fex_st_result(siginfo_t *, ucontext_t *, fex_info_t *); + +/* inline templates and macros for accessing fp state */ +#ifdef __sparcv9 +#define __fenv_getfsr __fenv_getfsrx +#define __fenv_setfsr __fenv_setfsrx +#endif +extern void __fenv_getfsr(unsigned long *); +extern void __fenv_setfsr(const unsigned long *); + +#if defined(__sparc) + +#define __fenv_get_rd(X) ((X>>30)&0x3) +#define __fenv_set_rd(X,Y) X=(X&~0xc0000000ul)|((Y)<<30) + +#define __fenv_get_te(X) ((X>>23)&0x1f) +#define __fenv_set_te(X,Y) X=(X&~0x0f800000ul)|((Y)<<23) + +#define __fenv_get_ex(X) ((X>>5)&0x1f) +#define __fenv_set_ex(X,Y) X=(X&~0x000003e0ul)|((Y)<<5) + +#elif defined(__i386) + +extern void __fenv_getcwsw(unsigned int *); +extern void __fenv_setcwsw(const unsigned int *); + +extern void __fenv_getmxcsr(unsigned int *); +extern void __fenv_setmxcsr(const unsigned int *); + +#define __fenv_get_rd(X) ((X>>26)&3) +#define __fenv_set_rd(X,Y) X=(X&~0x0c000000)|((Y)<<26) + +#define __fenv_get_rp(X) ((X>>24)&3) +#define __fenv_set_rp(X,Y) X=(X&~0x03000000)|((Y)<<24) + +#define __fenv_get_te(X) ((X>>16)&0x3d) +#define __fenv_set_te(X,Y) X=(X&~0x003d0000)|((Y)<<16) + +#define __fenv_get_ex(X) (X&0x3d) +#define __fenv_set_ex(X,Y) X=(X&~0x0000003d)|(Y) + +/* + * These macros define some useful distinctions between various + * SSE instructions. In some cases, distinctions are made for + * the purpose of simplifying the decoding of instructions, while + * in other cases, they are made for the purpose of simplying the + * emulation. Note that these values serve as bit flags within + * the enum values in sseinst_t. + */ +#define DOUBLE 0x100 +#define SIMD 0x080 +#define INTREG 0x040 + +typedef union { + double d[2]; + long long l[2]; + float f[4]; + int i[4]; +} sseoperand_t; + +/* structure to hold a decoded SSE instruction */ +typedef struct { + enum { + /* single precision scalar instructions */ + cmpss = 0, + minss = 1, + maxss = 2, + addss = 3, + subss = 4, + mulss = 5, + divss = 6, + sqrtss = 7, + ucomiss = 16, + comiss = 17, + cvtss2sd = 32, + cvtsi2ss = INTREG + 0, + cvttss2si = INTREG + 1, + cvtss2si = INTREG + 2, + cvtsi2ssq = INTREG + 8, + cvttss2siq = INTREG + 9, + cvtss2siq = INTREG + 10, + + /* single precision SIMD instructions */ + cmpps = SIMD + 0, + minps = SIMD + 1, + maxps = SIMD + 2, + addps = SIMD + 3, + subps = SIMD + 4, + mulps = SIMD + 5, + divps = SIMD + 6, + sqrtps = SIMD + 7, + cvtps2pd = SIMD + 32, + cvtdq2ps = SIMD + 34, + cvttps2dq = SIMD + 35, + cvtps2dq = SIMD + 36, + cvtpi2ps = SIMD + INTREG + 0, + cvttps2pi = SIMD + INTREG + 1, + cvtps2pi = SIMD + INTREG + 2, + + /* double precision scalar instructions */ + cmpsd = DOUBLE + 0, + minsd = DOUBLE + 1, + maxsd = DOUBLE + 2, + addsd = DOUBLE + 3, + subsd = DOUBLE + 4, + mulsd = DOUBLE + 5, + divsd = DOUBLE + 6, + sqrtsd = DOUBLE + 7, + ucomisd = DOUBLE + 16, + comisd = DOUBLE + 17, + cvtsd2ss = DOUBLE + 32, + cvtsi2sd = DOUBLE + INTREG + 0, + cvttsd2si = DOUBLE + INTREG + 1, + cvtsd2si = DOUBLE + INTREG + 2, + cvtsi2sdq = DOUBLE + INTREG + 8, + cvttsd2siq = DOUBLE + INTREG + 9, + cvtsd2siq = DOUBLE + INTREG + 10, + + /* double precision SIMD instructions */ + cmppd = DOUBLE + SIMD + 0, + minpd = DOUBLE + SIMD + 1, + maxpd = DOUBLE + SIMD + 2, + addpd = DOUBLE + SIMD + 3, + subpd = DOUBLE + SIMD + 4, + mulpd = DOUBLE + SIMD + 5, + divpd = DOUBLE + SIMD + 6, + sqrtpd = DOUBLE + SIMD + 7, + cvtpd2ps = DOUBLE + SIMD + 32, + cvtdq2pd = DOUBLE + SIMD + 34, + cvttpd2dq = DOUBLE + SIMD + 35, + cvtpd2dq = DOUBLE + SIMD + 36, + cvtpi2pd = DOUBLE + SIMD + INTREG + 0, + cvttpd2pi = DOUBLE + SIMD + INTREG + 1, + cvtpd2pi = DOUBLE + SIMD + INTREG + 2, + } op; + int imm; + sseoperand_t *op1, *op2; +} sseinst_t; + +/* x86-specific auxiliary functions */ +extern int *__fex_accrued(void); +extern void __fex_get_x86_exc(siginfo_t *, ucontext_t *); +extern int __fex_parse_sse(ucontext_t *, sseinst_t *); +extern enum fex_exception __fex_get_sse_op(ucontext_t *, sseinst_t *, + fex_info_t *); +extern void __fex_get_simd_op(ucontext_t *, sseinst_t *, + enum fex_exception *, fex_info_t *); +extern void __fex_st_sse_result(ucontext_t *, sseinst_t *, + enum fex_exception, fex_info_t *); +extern void __fex_st_simd_result(ucontext_t *, sseinst_t *, + enum fex_exception *, fex_info_t *); + +#else +#error Unknown architecture +#endif diff --git a/usr/src/libm/src/m9x/fex_log.c b/usr/src/libm/src/m9x/fex_log.c new file mode 100644 index 0000000..62a0939 --- /dev/null +++ b/usr/src/libm/src/m9x/fex_log.c @@ -0,0 +1,398 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fex_log.c 1.13 06/01/31 SMI" + +#pragma weak fex_get_log = __fex_get_log +#pragma weak fex_set_log = __fex_set_log +#pragma weak fex_get_log_depth = __fex_get_log_depth +#pragma weak fex_set_log_depth = __fex_set_log_depth +#pragma weak fex_log_entry = __fex_log_entry + +#include "fenv_synonyms.h" +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <signal.h> +#include <ucontext.h> +#include <sys/frame.h> +#include <fenv.h> +#include <sys/ieeefp.h> +#include <thread.h> +#include "fex_handler.h" + +#if !defined(PC) +#if defined(REG_PC) +#define PC REG_PC +#else +#error Neither PC nor REG_PC is defined! +#endif +#endif + +static FILE *log_fp = NULL; +static mutex_t log_lock = DEFAULTMUTEX; +static int log_depth = 100; + +FILE *fex_get_log(void) +{ + FILE *fp; + + mutex_lock(&log_lock); + fp = log_fp; + mutex_unlock(&log_lock); + return fp; +} + +int fex_set_log(FILE *fp) +{ + mutex_lock(&log_lock); + log_fp = fp; + mutex_unlock(&log_lock); + __fex_update_te(); + return 1; +} + +int fex_get_log_depth(void) +{ + int d; + + mutex_lock(&log_lock); + d = log_depth; + mutex_unlock(&log_lock); + return d; +} + +int fex_set_log_depth(int d) +{ + if (d < 0) + return 0; + mutex_lock(&log_lock); + log_depth = d; + mutex_unlock(&log_lock); + return 1; +} + +static struct exc_list { + struct exc_list *next; + char *addr; + unsigned long code; + int nstack; + char *stack[1]; /* actual length is max(1,nstack) */ +} *list = NULL; + +#ifdef __sparcv9 +#define FRAMEP(X) (struct frame *)((char*)(X)+(((long)(X)&1)?2047:0)) +#else +#define FRAMEP(X) (struct frame *)(X) +#endif + +#ifdef _LP64 +#define PDIG "16" +#else +#define PDIG "8" +#endif + +/* look for a matching exc_list; return 1 if one is found, + otherwise add this one to the list and return 0 */ +static int check_exc_list(char *addr, unsigned long code, char *stk, + struct frame *fp) +{ + struct exc_list *l, *ll; + struct frame *f; + int i, n; + + if (list) { + for (l = list; l; ll = l, l = l->next) { + if (l->addr != addr || l->code != code) + continue; + if (log_depth < 1 || l->nstack < 1) + return 1; + if (l->stack[0] != stk) + continue; + n = 1; + for (i = 1, f = fp; i < log_depth && i < l->nstack && + f && f->fr_savpc; i++, f = FRAMEP(f->fr_savfp)) + if (l->stack[i] != (char *)f->fr_savpc) { + n = 0; + break; + } + if (n) + return 1; + } + } + + /* create a new exc_list structure and tack it on the list */ + for (n = 1, f = fp; n < log_depth && f && f->fr_savpc; + n++, f = FRAMEP(f->fr_savfp)) ; + if ((l = (struct exc_list *)malloc(sizeof(struct exc_list) + + (n - 1) * sizeof(char *))) != NULL) { + l->next = NULL; + l->addr = addr; + l->code = code; + l->nstack = ((log_depth < 1)? 0 : n); + l->stack[0] = stk; + for (i = 1; i < n; i++) { + l->stack[i] = (char *)fp->fr_savpc; + fp = FRAMEP(fp->fr_savfp); + } + if (list) + ll->next = l; + else + list = l; + } + return 0; +} + +/* +* Warning: cleverness ahead +* +* In the following code, the use of sprintf+write rather than fprintf +* to send output to the log file is intentional. The reason is that +* fprintf is not async-signal-safe. "But," you protest, "SIGFPE is +* not an asynchronous signal! It's always handled by the same thread +* that executed the fpop that provoked it." That's true, but a prob- +* lem arises because (i) base conversion in fprintf can cause a fp +* exception and (ii) my signal handler acquires a mutex lock before +* sending output to the log file (so that outputs for entries from +* different threads aren't interspersed). Therefore, if the code +* were to use fprintf, a deadlock could occur as follows: +* +* Thread A Thread B +* +* Incurs a fp exception, Calls fprintf, +* acquires log_lock acquires file rmutex lock +* +* Calls fprintf, Incurs a fp exception, +* waits for file rmutex lock waits for log_lock +* +* (I could just verify that fprintf doesn't hold the rmutex lock while +* it's doing the base conversion, but since efficiency is of little +* concern here, I opted for the safe and dumb route.) +*/ + +static void print_stack(int fd, char *addr, struct frame *fp) +{ + int i; + char *name, buf[30]; + + for (i = 0; i < log_depth && addr != NULL; i++) { + if (__fex_sym(addr, &name) != NULL) { + write(fd, buf, sprintf(buf, " 0x%0" PDIG "lx ", + (long)addr)); + write(fd, name, strlen(name)); + write(fd, "\n", 1); + if (!strcmp(name, "main")) + break; + } else { + write(fd, buf, sprintf(buf, " 0x%0" PDIG "lx\n", + (long)addr)); + } + if (fp == NULL) + break; + addr = (char *)fp->fr_savpc; + fp = FRAMEP(fp->fr_savfp); + } +} + +void fex_log_entry(const char *msg) +{ + ucontext_t uc; + struct frame *fp; + char *stk; + int fd; + + /* if logging is disabled, just return */ + mutex_lock(&log_lock); + if (log_fp == NULL) { + mutex_unlock(&log_lock); + return; + } + + /* get the frame pointer from the current context and + pop our own frame */ + getcontext(&uc); +#if defined(__sparc) || defined(__amd64) + fp = FRAMEP(uc.uc_mcontext.gregs[REG_SP]); +#elif defined(__i386) /* !defined(__amd64) */ + fp = FRAMEP(uc.uc_mcontext.gregs[EBP]); +#else +#error Unknown architecture +#endif + if (fp == NULL) { + mutex_unlock(&log_lock); + return; + } + stk = (char *)fp->fr_savpc; + fp = FRAMEP(fp->fr_savfp); + + /* if we've already logged this message here, don't make an entry */ + if (check_exc_list(stk, (unsigned long)msg, stk, fp)) { + mutex_unlock(&log_lock); + return; + } + + /* make an entry */ + fd = fileno(log_fp); + write(fd, "fex_log_entry: ", 15); + write(fd, msg, strlen(msg)); + write(fd, "\n", 1); + __fex_sym_init(); + print_stack(fd, stk, fp); + mutex_unlock(&log_lock); +} + +static const char *exception[FEX_NUM_EXC] = { + "inexact result", + "division by zero", + "underflow", + "overflow", + "invalid operation (0/0)", + "invalid operation (inf/inf)", + "invalid operation (inf-inf)", + "invalid operation (0*inf)", + "invalid operation (sqrt)", + "invalid operation (snan)", + "invalid operation (int)", + "invalid operation (cmp)" +}; + +void +__fex_mklog(ucontext_t *uap, char *addr, int f, enum fex_exception e, + int m, void *p) +{ + struct frame *fp; + char *stk, *name, buf[30]; + int fd; + + /* if logging is disabled, just return */ + mutex_lock(&log_lock); + if (log_fp == NULL) { + mutex_unlock(&log_lock); + return; + } + + /* get stack info */ +#if defined(__sparc) + stk = (char*)uap->uc_mcontext.gregs[REG_PC]; + fp = FRAMEP(uap->uc_mcontext.gregs[REG_SP]); +#elif defined(__amd64) + stk = (char*)uap->uc_mcontext.gregs[REG_PC]; + fp = FRAMEP(uap->uc_mcontext.gregs[REG_RBP]); +#elif defined(__i386) /* !defined(__amd64) */ + stk = (char*)uap->uc_mcontext.gregs[PC]; + fp = FRAMEP(uap->uc_mcontext.gregs[EBP]); +#else +#error Unknown architecture +#endif + + /* if the handling mode is the default and this exception's + flag is already raised, don't make an entry */ + if (m == FEX_NONSTOP) { + switch (e) { + case fex_inexact: + if (f & FE_INEXACT) { + mutex_unlock(&log_lock); + return; + } + break; + case fex_underflow: + if (f & FE_UNDERFLOW) { + mutex_unlock(&log_lock); + return; + } + break; + case fex_overflow: + if (f & FE_OVERFLOW) { + mutex_unlock(&log_lock); + return; + } + break; + case fex_division: + if (f & FE_DIVBYZERO) { + mutex_unlock(&log_lock); + return; + } + break; + default: + if (f & FE_INVALID) { + mutex_unlock(&log_lock); + return; + } + break; + } + } + + /* if we've already logged this exception at this address, + don't make an entry */ + if (check_exc_list(addr, (unsigned long)e, stk, fp)) { + mutex_unlock(&log_lock); + return; + } + + /* make an entry */ + fd = fileno(log_fp); + write(fd, "Floating point ", 15); + write(fd, exception[e], strlen(exception[e])); + write(fd, buf, sprintf(buf, " at 0x%0" PDIG "lx", (long)addr)); + __fex_sym_init(); + if (__fex_sym(addr, &name) != NULL) { + write(fd, " ", 1); + write(fd, name, strlen(name)); + } + switch (m) { + case FEX_NONSTOP: + write(fd, ", nonstop mode\n", 15); + break; + + case FEX_ABORT: + write(fd, ", abort\n", 8); + break; + + case FEX_NOHANDLER: + if (p == (void *)SIG_DFL) { + write(fd, ", handler: SIG_DFL\n", 19); + break; + } + else if (p == (void *)SIG_IGN) { + write(fd, ", handler: SIG_IGN\n", 19); + break; + } + /* fall through*/ + default: + write(fd, ", handler: ", 11); + if (__fex_sym((char *)p, &name) != NULL) { + write(fd, name, strlen(name)); + write(fd, "\n", 1); + } else { + write(fd, buf, sprintf(buf, "0x%0" PDIG "lx\n", + (long)p)); + } + break; + } + print_stack(fd, stk, fp); + mutex_unlock(&log_lock); +} diff --git a/usr/src/libm/src/m9x/fma.c b/usr/src/libm/src/m9x/fma.c new file mode 100644 index 0000000..ff13ee3 --- /dev/null +++ b/usr/src/libm/src/m9x/fma.c @@ -0,0 +1,608 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fma.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fma = __fma +#endif + +#include "libm.h" +#include "fma.h" + +#if defined(__sparc) + +static const union { + unsigned i[2]; + double d; +} C[] = { + { 0x3fe00000u, 0 }, + { 0x40000000u, 0 }, + { 0x43300000u, 0 }, + { 0x41a00000u, 0 }, + { 0x3e500000u, 0 }, + { 0x3df00000u, 0 }, + { 0x3bf00000u, 0 }, + { 0x7fe00000u, 0 }, + { 0x00100000u, 0 }, + { 0x00100001u, 0 } +}; + +#define half C[0].d +#define two C[1].d +#define two52 C[2].d +#define two27 C[3].d +#define twom26 C[4].d +#define twom32 C[5].d +#define twom64 C[6].d +#define huge C[7].d +#define tiny C[8].d +#define tiny2 C[9].d + +static const unsigned fsr_rm = 0xc0000000u; + +/* + * fma for SPARC: 64-bit double precision, big-endian + */ +double +__fma(double x, double y, double z) { + union { + unsigned i[2]; + double d; + } xx, yy, zz; + double xhi, yhi, xlo, ylo, t; + unsigned xy0, xy1, xy2, xy3, z0, z1, z2, z3, fsr, rm, sticky; + int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit; + volatile double dummy; + + /* extract the high order words of the arguments */ + xx.d = x; + yy.d = y; + zz.d = z; + hx = xx.i[0] & ~0x80000000; + hy = yy.i[0] & ~0x80000000; + hz = zz.i[0] & ~0x80000000; + + /* dispense with inf, nan, and zero cases */ + if (hx >= 0x7ff00000 || hy >= 0x7ff00000 || (hx | xx.i[1]) == 0 || + (hy | yy.i[1]) == 0) /* x or y is inf, nan, or zero */ + return (x * y + z); + + if (hz >= 0x7ff00000) /* z is inf or nan */ + return (x + z); /* avoid spurious under/overflow in x * y */ + + if ((hz | zz.i[1]) == 0) /* z is zero */ + /* + * x * y isn't zero but could underflow to zero, + * so don't add z, lest we perturb the sign + */ + return (x * y); + + /* + * now x, y, and z are all finite and nonzero; save the fsr and + * set round-to-negative-infinity mode (and clear nonstandard + * mode before we try to scale subnormal operands) + */ + __fenv_getfsr(&fsr); + __fenv_setfsr(&fsr_rm); + + /* extract signs and exponents, and normalize subnormals */ + sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000; + sz = zz.i[0] & 0x80000000; + ex = hx >> 20; + if (!ex) { + xx.d = x * two52; + ex = ((xx.i[0] & ~0x80000000) >> 20) - 52; + } + ey = hy >> 20; + if (!ey) { + yy.d = y * two52; + ey = ((yy.i[0] & ~0x80000000) >> 20) - 52; + } + ez = hz >> 20; + if (!ez) { + zz.d = z * two52; + ez = ((zz.i[0] & ~0x80000000) >> 20) - 52; + } + + /* multiply x*y to 106 bits */ + exy = ex + ey - 0x3ff; + xx.i[0] = (xx.i[0] & 0xfffff) | 0x3ff00000; + yy.i[0] = (yy.i[0] & 0xfffff) | 0x3ff00000; + x = xx.d; + y = yy.d; + xhi = ((x + twom26) + two27) - two27; + yhi = ((y + twom26) + two27) - two27; + xlo = x - xhi; + ylo = y - yhi; + x *= y; + y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo; + if (x >= two) { + x *= half; + y *= half; + exy++; + } + + /* extract the significands */ + xx.d = x; + xy0 = (xx.i[0] & 0xfffff) | 0x100000; + xy1 = xx.i[1]; + yy.d = t = y + twom32; + xy2 = yy.i[1]; + yy.d = (y - (t - twom32)) + twom64; + xy3 = yy.i[1]; + z0 = (zz.i[0] & 0xfffff) | 0x100000; + z1 = zz.i[1]; + z2 = z3 = 0; + + /* + * now x*y is represented by sxy, exy, and xy[0-3], and z is + * represented likewise; swap if need be so |xy| <= |z| + */ + if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && + (xy1 > z1 || (xy1 == z1 && (xy2 | xy3) != 0)))))) { + e = sxy; sxy = sz; sz = e; + e = exy; exy = ez; ez = e; + e = xy0; xy0 = z0; z0 = e; + e = xy1; xy1 = z1; z1 = e; + z2 = xy2; xy2 = 0; + z3 = xy3; xy3 = 0; + } + + /* shift the significand of xy keeping a sticky bit */ + e = ez - exy; + if (e > 116) { + xy0 = xy1 = xy2 = 0; + xy3 = 1; + } else if (e >= 96) { + sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (127 - e)); + xy3 = xy0 >> (e - 96); + if (sticky) + xy3 |= 1; + xy0 = xy1 = xy2 = 0; + } else if (e >= 64) { + sticky = xy3 | xy2 | ((xy1 << 1) << (95 - e)); + xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e)); + if (sticky) + xy3 |= 1; + xy2 = xy0 >> (e - 64); + xy0 = xy1 = 0; + } else if (e >= 32) { + sticky = xy3 | ((xy2 << 1) << (63 - e)); + xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e)); + if (sticky) + xy3 |= 1; + xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e)); + xy1 = xy0 >> (e - 32); + xy0 = 0; + } else if (e) { + sticky = (xy3 << 1) << (31 - e); + xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e)); + if (sticky) + xy3 |= 1; + xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e)); + xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e)); + xy0 >>= e; + } + + /* if this is a magnitude subtract, negate the significand of xy */ + if (sxy ^ sz) { + xy0 = ~xy0; + xy1 = ~xy1; + xy2 = ~xy2; + xy3 = -xy3; + if (xy3 == 0) + if (++xy2 == 0) + if (++xy1 == 0) + xy0++; + } + + /* add, propagating carries */ + z3 += xy3; + e = (z3 < xy3); + z2 += xy2; + if (e) { + z2++; + e = (z2 <= xy2); + } else + e = (z2 < xy2); + z1 += xy1; + if (e) { + z1++; + e = (z1 <= xy1); + } else + e = (z1 < xy1); + z0 += xy0; + if (e) + z0++; + + /* postnormalize and collect rounding information into z2 */ + if (ez < 1) { + /* result is tiny; shift right until exponent is within range */ + e = 1 - ez; + if (e > 56) { + z2 = 1; /* result can't be exactly zero */ + z0 = z1 = 0; + } else if (e >= 32) { + sticky = z3 | z2 | ((z1 << 1) << (63 - e)); + z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e)); + if (sticky) + z2 |= 1; + z1 = z0 >> (e - 32); + z0 = 0; + } else { + sticky = z3 | (z2 << 1) << (31 - e); + z2 = (z2 >> e) | ((z1 << 1) << (31 - e)); + if (sticky) + z2 |= 1; + z1 = (z1 >> e) | ((z0 << 1) << (31 - e)); + z0 >>= e; + } + ez = 1; + } else if (z0 >= 0x200000) { + /* carry out; shift right by one */ + sticky = (z2 & 1) | z3; + z2 = (z2 >> 1) | (z1 << 31); + if (sticky) + z2 |= 1; + z1 = (z1 >> 1) | (z0 << 31); + z0 >>= 1; + ez++; + } else { + if (z0 < 0x100000 && (z0 | z1 | z2 | z3) != 0) { + /* + * borrow/cancellation; shift left as much as + * exponent allows + */ + while (!(z0 | (z1 & 0xffe00000)) && ez >= 33) { + z0 = z1; + z1 = z2; + z2 = z3; + z3 = 0; + ez -= 32; + } + while (z0 < 0x100000 && ez > 1) { + z0 = (z0 << 1) | (z1 >> 31); + z1 = (z1 << 1) | (z2 >> 31); + z2 = (z2 << 1) | (z3 >> 31); + z3 <<= 1; + ez--; + } + } + if (z3) + z2 |= 1; + } + + /* get the rounding mode and clear current exceptions */ + rm = fsr >> 30; + fsr &= ~FSR_CEXC; + + /* strip off the integer bit, if there is one */ + ibit = z0 & 0x100000; + if (ibit) + z0 -= 0x100000; + else { + ez = 0; + if (!(z0 | z1 | z2)) { /* exact zero */ + zz.i[0] = rm == FSR_RM ? 0x80000000 : 0; + zz.i[1] = 0; + __fenv_setfsr(&fsr); + return (zz.d); + } + } + + /* + * flip the sense of directed roundings if the result is negative; + * the logic below applies to a positive result + */ + if (sz) + rm ^= rm >> 1; + + /* round and raise exceptions */ + if (z2) { + fsr |= FSR_NXC; + + /* decide whether to round the fraction up */ + if (rm == FSR_RP || (rm == FSR_RN && (z2 > 0x80000000u || + (z2 == 0x80000000u && (z1 & 1))))) { + /* round up and renormalize if necessary */ + if (++z1 == 0) { + if (++z0 == 0x100000) { + z0 = 0; + ez++; + } + } + } + } + + /* check for under/overflow */ + if (ez >= 0x7ff) { + if (rm == FSR_RN || rm == FSR_RP) { + zz.i[0] = sz | 0x7ff00000; + zz.i[1] = 0; + } else { + zz.i[0] = sz | 0x7fefffff; + zz.i[1] = 0xffffffff; + } + fsr |= FSR_OFC | FSR_NXC; + } else { + zz.i[0] = sz | (ez << 20) | z0; + zz.i[1] = z1; + + /* + * !ibit => exact result was tiny before rounding, + * z2 nonzero => result delivered is inexact + */ + if (!ibit) { + if (z2) + fsr |= FSR_UFC | FSR_NXC; + else if (fsr & FSR_UFM) + fsr |= FSR_UFC; + } + } + + /* restore the fsr and emulate exceptions as needed */ + if ((fsr & FSR_CEXC) & (fsr >> 23)) { + __fenv_setfsr(&fsr); + if (fsr & FSR_OFC) { + dummy = huge; + dummy *= huge; + } else if (fsr & FSR_UFC) { + dummy = tiny; + if (fsr & FSR_NXC) + dummy *= tiny; + else + dummy -= tiny2; + } else { + dummy = huge; + dummy += tiny; + } + } else { + fsr |= (fsr & 0x1f) << 5; + __fenv_setfsr(&fsr); + } + return (zz.d); +} + +#elif defined(__i386) + +#if defined(__amd64) +#define NI 4 +#else +#define NI 3 +#endif + +/* + * fma for x86: 64-bit double precision, little-endian + */ +double +__fma(double x, double y, double z) { + union { + unsigned i[NI]; + long double e; + } xx, yy, zz; + long double xe, ye, xhi, xlo, yhi, ylo; + int ex, ey, ez; + unsigned cwsw, oldcwsw, rm; + + /* convert the operands to double extended */ + xx.e = (long double) x; + yy.e = (long double) y; + zz.e = (long double) z; + + /* extract the exponents of the arguments */ + ex = xx.i[2] & 0x7fff; + ey = yy.i[2] & 0x7fff; + ez = zz.i[2] & 0x7fff; + + /* dispense with inf, nan, and zero cases */ + if (ex == 0x7fff || ey == 0x7fff || ex == 0 || ey == 0) + /* x or y is inf, nan, or zero */ + return ((double) (xx.e * yy.e + zz.e)); + + if (ez >= 0x7fff) /* z is inf or nan */ + return ((double) (xx.e + zz.e)); + /* avoid spurious inexact in x * y */ + + /* + * save the control and status words, mask all exceptions, and + * set rounding to 64-bit precision and to-nearest + */ + __fenv_getcwsw(&oldcwsw); + cwsw = (oldcwsw & 0xf0c0ffff) | 0x033f0000; + __fenv_setcwsw(&cwsw); + + /* multiply x*y to 106 bits */ + xe = xx.e; + xx.i[0] = 0; + xhi = xx.e; /* hi 32 bits */ + xlo = xe - xhi; /* lo 21 bits */ + ye = yy.e; + yy.i[0] = 0; + yhi = yy.e; + ylo = ye - yhi; + xe = xe * ye; + ye = ((xhi * yhi - xe) + xhi * ylo + xlo * yhi) + xlo * ylo; + + /* distill the sum of xe, ye, and z */ + xhi = ye + zz.e; + yhi = xhi - ye; + xlo = (zz.e - yhi) + (ye - (xhi - yhi)); + /* now (xhi,xlo) = ye + z */ + + yhi = xe + xhi; + ye = yhi - xe; + ylo = (xhi - ye) + (xe - (yhi - ye)); /* now (yhi,ylo) = xe + xhi */ + + xhi = xlo + ylo; + xe = xhi - xlo; + xlo = (ylo - xe) + (xlo - (xhi - xe)); /* now (xhi,xlo) = xlo + ylo */ + + yy.e = yhi + xhi; + ylo = (yhi - yy.e) + xhi; /* now (yy.e,ylo) = xhi + yhi */ + + if (yy.i[1] != 0) { /* yy.e is nonzero */ + /* perturb yy.e if its least significant 10 bits are zero */ + if (!(yy.i[0] & 0x3ff)) { + xx.e = ylo + xlo; + if (xx.i[1] != 0) { + xx.i[2] = (xx.i[2] & 0x8000) | + ((yy.i[2] & 0x7fff) - 63); + xx.i[1] = 0x80000000; + xx.i[0] = 0; + yy.e += xx.e; + } + } + } else { + /* set sign of zero result according to rounding direction */ + rm = oldcwsw & 0x0c000000; + yy.i[2] = ((rm == FCW_RM)? 0x8000 : 0); + } + + /* + * restore the control and status words and convert the result + * to double + */ + __fenv_setcwsw(&oldcwsw); + return ((double) yy.e); +} + +#if 0 +/* + * another fma for x86: assumes return value will be left in + * long double (80-bit double extended) precision + */ +long double +__fma(double x, double y, double z) { + union { + unsigned i[3]; + long double e; + } xx, yy, zz, tt; + long double xe, ye, xhi, xlo, yhi, ylo, zhi, zlo; + int ex, ey, ez; + unsigned cwsw, oldcwsw, s; + + /* convert the operands to double extended */ + xx.e = (long double) x; + yy.e = (long double) y; + zz.e = (long double) z; + + /* extract the exponents of the arguments */ + ex = xx.i[2] & 0x7fff; + ey = yy.i[2] & 0x7fff; + ez = zz.i[2] & 0x7fff; + + /* dispense with inf, nan, and zero cases */ + if (ex == 0x7fff || ey == 0x7fff || ex == 0 || ey == 0) + /* x or y is inf, nan, or zero */ + return (xx.e * yy.e + zz.e); + + if (ez >= 0x7fff) /* z is inf or nan */ + return (xx.e + zz.e); /* avoid spurious inexact in x * y */ + + if (ez == 0) /* z is zero */ + return (xx.e * yy.e); /* x * y isn't zero; no need to add z */ + + /* + * save the control and status words, mask all exceptions, and + * set rounding to 64-bit precision and to-nearest + */ + __fenv_getcwsw(&oldcwsw); + cwsw = (oldcwsw & 0xf0c0ffff) | 0x033f0000; + __fenv_setcwsw(&cwsw); + + /* multiply x*y to 106 bits */ + xe = xx.e; + xx.i[0] = 0; + xhi = xx.e; /* hi 32 bits */ + xlo = xe - xhi; /* lo 21 bits */ + ye = yy.e; + yy.i[0] = 0; + yhi = yy.e; + ylo = ye - yhi; + xx.e = xe * ye; + xx.i[0] &= ~0x7ff; /* 53 bits of x*y */ + yy.e = ((xhi * yhi - xx.e) + xhi * ylo + xlo * yhi) + xlo * ylo; + + /* reduce to a sum of two terms */ + if (yy.e != 0.0) { + ex = xx.i[2] & 0x7fff; + if (ez - ex > 10) { + /* collapse y into a single bit and add to x */ + yy.i[0] = 0; + yy.i[1] = 0x80000000; + yy.i[2] = (yy.i[2] & 0x8000) | (ex - 60); + xx.e += yy.e; + } else if (ex - ez <= 10) { + xx.e += zz.e; /* exact */ + zz.e = yy.e; + } else if (ex - ez <= 42) { + /* split z into two pieces */ + tt.i[0] = 0; + tt.i[1] = 0x80000000; + tt.i[2] = ex + 11; + zhi = (zz.e + tt.e) - tt.e; + zlo = zz.e - zhi; + xx.e += zhi; + zz.e = yy.e + zlo; + } else if (ex - ez <= 63) { + zz.e += yy.e; /* exact */ + } else if (ex - ez <= 106) { + /* + * collapse the tail of z into a sticky bit and add z + * to y without error + */ + if (ex - ez <= 81) { + s = 1 << (ex - ez - 50); + if (zz.i[0] & (s - 1)) + zz.i[0] |= s; + zz.i[0] &= ~(s - 1); + } else { + s = 1 << (ex - ez - 82); + if ((zz.i[1] & (s - 1)) | zz.i[0]) + zz.i[1] |= s; + zz.i[1] &= ~(s - 1); + zz.i[0] = 0; + } + zz.e += yy.e; + } else { + /* collapse z into a single bit and add to y */ + zz.i[0] = 0; + zz.i[1] = 0x80000000; + zz.i[2] = (zz.i[2] & 0x8000) | (ex - 113); + zz.e += yy.e; + } + } + + /* restore the control and status words, and sum */ + __fenv_setcwsw(&oldcwsw); + return (xx.e + zz.e); +} +#endif + +#else +#error Unknown architecture +#endif diff --git a/usr/src/libm/src/m9x/fma.h b/usr/src/libm/src/m9x/fma.h new file mode 100644 index 0000000..9e2b718 --- /dev/null +++ b/usr/src/libm/src/m9x/fma.h @@ -0,0 +1,125 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _FMA_H +#define _FMA_H + +#pragma ident "@(#)fma.h 1.3 06/01/31 SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __sparc + +/* + * Common definitions for fma routines (SPARC) + */ + +/* fsr fields */ + +/* current exception bits */ +#define FSR_NXC 0x1 +#define FSR_DZC 0x2 +#define FSR_UFC 0x4 +#define FSR_OFC 0x8 +#define FSR_NVC 0x10 +#define FSR_CEXC 0x1f /* mask for all cexc bits */ + +/* accrued exception bits */ +#define FSR_NXA 0x20 +#define FSR_DZA 0x40 +#define FSR_UFA 0x80 +#define FSR_OFA 0x100 +#define FSR_NVA 0x200 + +/* trap enable bits */ +#define FSR_NXM 0x00800000 +#define FSR_DZM 0x01000000 +#define FSR_UFM 0x02000000 +#define FSR_OFM 0x04000000 +#define FSR_NVM 0x08000000 + +/* rounding directions (right-adjusted) */ +#define FSR_RN 0 +#define FSR_RZ 1 +#define FSR_RP 2 +#define FSR_RM 3 + +/* inline templates */ +extern void __fenv_getfsr(unsigned int *); +extern void __fenv_setfsr(const unsigned int *); + +#endif /* __sparc */ + + +#ifdef __i386 + +/* + * Common definitions for fma routines (x86) + */ + +/* control and status word fields */ + +/* exception flags */ +#define FSW_NV 0x1 +#define FSW_DN 0x2 +#define FSW_DZ 0x4 +#define FSW_OF 0x8 +#define FSW_UF 0x10 +#define FSW_NX 0x20 + +/* exception masks */ +#define FCW_NVM 0x00010000 +#define FCW_DNM 0x00020000 +#define FCW_DZM 0x00040000 +#define FCW_OFM 0x00080000 +#define FCW_UFM 0x00100000 +#define FCW_NXM 0x00200000 +#define FCW_ALLM 0x003f0000 + +/* rounding directions */ +#define FCW_RN 0x00000000 +#define FCW_RM 0x04000000 +#define FCW_RP 0x08000000 +#define FCW_RZ 0x0c000000 + +/* rounding precisions */ +#define FCW_P24 0x00000000 +#define FCW_P53 0x02000000 +#define FCW_P64 0x03000000 + +/* inline templates */ +extern void __fenv_getcwsw(unsigned int *); +extern void __fenv_setcwsw(const unsigned int *); + +#endif /* __i386 */ + +#ifdef __cplusplus +} +#endif + +#endif /* _FMA_H */ diff --git a/usr/src/libm/src/m9x/fmaf.c b/usr/src/libm/src/m9x/fmaf.c new file mode 100644 index 0000000..f0799b7 --- /dev/null +++ b/usr/src/libm/src/m9x/fmaf.c @@ -0,0 +1,241 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fmaf.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fmaf = __fmaf +#endif + +#include "libm.h" +#include "fma.h" + +#if defined(__sparc) + +/* + * fmaf for SPARC: 32-bit single precision, big-endian + */ +float +__fmaf(float x, float y, float z) { + union { + unsigned i[2]; + double d; + } xy, zz; + unsigned u, s; + int exy, ez; + + /* + * the following operations can only raise the invalid exception, + * and then only if either x*y is of the form Inf*0 or one of x, + * y, or z is a signaling NaN + */ + xy.d = (double) x * y; + zz.d = (double) z; + + /* + * if the sum xy + z will be exact, just compute it and cast the + * result to float + */ + exy = (xy.i[0] >> 20) & 0x7ff; + ez = (zz.i[0] >> 20) & 0x7ff; + if ((ez - exy <= 4 && exy - ez <= 28) || exy == 0x7ff || exy == 0 || + ez == 0x7ff || ez == 0) { + return ((float) (xy.d + zz.d)); + } + + /* + * collapse the tail of the smaller summand into a "sticky bit" + * so that the sum can be computed without error + */ + if (ez > exy) { + if (ez - exy < 31) { + u = xy.i[1]; + s = 2 << (ez - exy); + if (u & (s - 1)) + u |= s; + xy.i[1] = u & ~(s - 1); + } else if (ez - exy < 51) { + u = xy.i[0]; + s = 1 << (ez - exy - 31); + if ((u & (s - 1)) | xy.i[1]) + u |= s; + xy.i[0] = u & ~(s - 1); + xy.i[1] = 0; + } else { + /* collapse all of xy into a single bit */ + xy.i[0] = (xy.i[0] & 0x80000000) | ((ez - 51) << 20); + xy.i[1] = 0; + } + } else { + if (exy - ez < 31) { + u = zz.i[1]; + s = 2 << (exy - ez); + if (u & (s - 1)) + u |= s; + zz.i[1] = u & ~(s - 1); + } else if (exy - ez < 51) { + u = zz.i[0]; + s = 1 << (exy - ez - 31); + if ((u & (s - 1)) | zz.i[1]) + u |= s; + zz.i[0] = u & ~(s - 1); + zz.i[1] = 0; + } else { + /* collapse all of zz into a single bit */ + zz.i[0] = (zz.i[0] & 0x80000000) | ((exy - 51) << 20); + zz.i[1] = 0; + } + } + + return ((float) (xy.d + zz.d)); +} + +#elif defined(__i386) + +#if defined(__amd64) +#define NI 4 +#else +#define NI 3 +#endif + +/* + * fmaf for x86: 32-bit single precision, little-endian + */ +float +__fmaf(float x, float y, float z) { + union { + unsigned i[NI]; + long double e; + } xy, zz; + unsigned u, s, cwsw, oldcwsw; + int exy, ez; + + /* set rounding precision to 64 bits */ + __fenv_getcwsw(&oldcwsw); + cwsw = (oldcwsw & 0xfcffffff) | 0x03000000; + __fenv_setcwsw(&cwsw); + + /* + * the following operations can only raise the invalid exception, + * and then only if either x*y is of the form Inf*0 or one of x, + * y, or z is a signaling NaN + */ + xy.e = (long double) x * y; + zz.e = (long double) z; + + /* + * if the sum xy + z will be exact, just compute it and cast the + * result to float + */ + exy = xy.i[2] & 0x7fff; + ez = zz.i[2] & 0x7fff; + if ((ez - exy <= 15 && exy - ez <= 39) || exy == 0x7fff || exy == 0 || + ez == 0x7fff || ez == 0) { + goto cont; + } + + /* + * collapse the tail of the smaller summand into a "sticky bit" + * so that the sum can be computed without error + */ + if (ez > exy) { + if (ez - exy < 31) { + u = xy.i[0]; + s = 2 << (ez - exy); + if (u & (s - 1)) + u |= s; + xy.i[0] = u & ~(s - 1); + } else if (ez - exy < 62) { + u = xy.i[1]; + s = 1 << (ez - exy - 31); + if ((u & (s - 1)) | xy.i[0]) + u |= s; + xy.i[1] = u & ~(s - 1); + xy.i[0] = 0; + } else { + /* collapse all of xy into a single bit */ + xy.i[0] = 0; + xy.i[1] = 0x80000000; + xy.i[2] = (xy.i[2] & 0x8000) | (ez - 62); + } + } else { + if (exy - ez < 62) { + u = zz.i[1]; + s = 1 << (exy - ez - 31); + if ((u & (s - 1)) | zz.i[0]) + u |= s; + zz.i[1] = u & ~(s - 1); + zz.i[0] = 0; + } else { + /* collapse all of zz into a single bit */ + zz.i[0] = 0; + zz.i[1] = 0x80000000; + zz.i[2] = (zz.i[2] & 0x8000) | (exy - 62); + } + } + +cont: + xy.e += zz.e; + + /* restore the rounding precision */ + __fenv_getcwsw(&cwsw); + cwsw = (cwsw & 0xfcffffff) | (oldcwsw & 0x03000000); + __fenv_setcwsw(&cwsw); + + return ((float) xy.e); +} + +#if 0 +/* + * another fmaf for x86: assumes return value will be left in + * long double (80-bit double extended) precision + */ +long double +__fmaf(float x, float y, float z) { + /* + * Note: This implementation assumes the rounding precision mode + * is set to the default, rounding to 64 bit precision. If this + * routine must work in non-default rounding precision modes, do + * the following instead: + * + * long double t; + * + * <set rp mode to round to 64 bit precision> + * t = x * y; + * <restore rp mode> + * return t + z; + * + * Note that the code to change rounding precision must not alter + * the exception masks or flags, since the product x * y may raise + * an invalid operation exception. + */ + return ((long double) x * y + z); +} +#endif + +#else +#error Unknown architecture +#endif diff --git a/usr/src/libm/src/m9x/fmal.c b/usr/src/libm/src/m9x/fmal.c new file mode 100644 index 0000000..7fb9a62 --- /dev/null +++ b/usr/src/libm/src/m9x/fmal.c @@ -0,0 +1,1224 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fmal.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fmal = __fmal +#endif + +#include "libm.h" +#include "fma.h" + +#if defined(__sparc) + +static const union { + unsigned i[2]; + double d; +} C[] = { + { 0x3fe00000u, 0 }, + { 0x40000000u, 0 }, + { 0x3ef00000u, 0 }, + { 0x3e700000u, 0 }, + { 0x41300000u, 0 }, + { 0x3e300000u, 0 }, + { 0x3b300000u, 0 }, + { 0x38300000u, 0 }, + { 0x42300000u, 0 }, + { 0x3df00000u, 0 }, + { 0x7fe00000u, 0 }, + { 0x00100000u, 0 }, + { 0x00100001u, 0 }, + { 0, 0 }, + { 0x7ff00000u, 0 }, + { 0x7ff00001u, 0 } +}; + +#define half C[0].d +#define two C[1].d +#define twom16 C[2].d +#define twom24 C[3].d +#define two20 C[4].d +#define twom28 C[5].d +#define twom76 C[6].d +#define twom124 C[7].d +#define two36 C[8].d +#define twom32 C[9].d +#define huge C[10].d +#define tiny C[11].d +#define tiny2 C[12].d +#define zero C[13].d +#define inf C[14].d +#define snan C[15].d + +static const unsigned fsr_rm = 0xc0000000u; + +/* + * fmal for SPARC: 128-bit quad precision, big-endian + */ +long double +__fmal(long double x, long double y, long double z) { + union { + unsigned i[4]; + long double q; + } xx, yy, zz; + union { + unsigned i[2]; + double d; + } u; + double dx[5], dy[5], dxy[9], c, s; + unsigned xy0, xy1, xy2, xy3, xy4, xy5, xy6, xy7; + unsigned z0, z1, z2, z3, z4, z5, z6, z7; + unsigned fsr, rm, sticky; + int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit; + int cx, cy, cz; + volatile double dummy; + + /* extract the high order words of the arguments */ + xx.q = x; + yy.q = y; + zz.q = z; + hx = xx.i[0] & ~0x80000000; + hy = yy.i[0] & ~0x80000000; + hz = zz.i[0] & ~0x80000000; + + /* + * distinguish zero, finite nonzero, infinite, and quiet nan + * arguments; raise invalid and return for signaling nans + */ + if (hx >= 0x7fff0000) { + if ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3]) { + if (!(hx & 0x8000)) { + /* signaling nan, raise invalid */ + dummy = snan; + dummy += snan; + xx.i[0] |= 0x8000; + return (xx.q); + } + cx = 3; /* quiet nan */ + } else + cx = 2; /* inf */ + } else if (hx == 0) { + cx = (xx.i[1] | xx.i[2] | xx.i[3]) ? 1 : 0; + /* subnormal or zero */ + } else + cx = 1; /* finite nonzero */ + + if (hy >= 0x7fff0000) { + if ((hy & 0xffff) | yy.i[1] | yy.i[2] | yy.i[3]) { + if (!(hy & 0x8000)) { + dummy = snan; + dummy += snan; + yy.i[0] |= 0x8000; + return (yy.q); + } + cy = 3; + } else + cy = 2; + } else if (hy == 0) { + cy = (yy.i[1] | yy.i[2] | yy.i[3]) ? 1 : 0; + } else + cy = 1; + + if (hz >= 0x7fff0000) { + if ((hz & 0xffff) | zz.i[1] | zz.i[2] | zz.i[3]) { + if (!(hz & 0x8000)) { + dummy = snan; + dummy += snan; + zz.i[0] |= 0x8000; + return (zz.q); + } + cz = 3; + } else + cz = 2; + } else if (hz == 0) { + cz = (zz.i[1] | zz.i[2] | zz.i[3]) ? 1 : 0; + } else + cz = 1; + + /* get the fsr and clear current exceptions */ + __fenv_getfsr(&fsr); + fsr &= ~FSR_CEXC; + + /* handle all other zero, inf, and nan cases */ + if (cx != 1 || cy != 1 || cz != 1) { + /* if x or y is a quiet nan, return it */ + if (cx == 3) { + __fenv_setfsr(&fsr); + return (x); + } + if (cy == 3) { + __fenv_setfsr(&fsr); + return (y); + } + + /* if x*y is 0*inf, raise invalid and return the default nan */ + if ((cx == 0 && cy == 2) || (cx == 2 && cy == 0)) { + dummy = zero; + dummy *= inf; + zz.i[0] = 0x7fffffff; + zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff; + return (zz.q); + } + + /* if z is a quiet nan, return it */ + if (cz == 3) { + __fenv_setfsr(&fsr); + return (z); + } + + /* + * now none of x, y, or z is nan; handle cases where x or y + * is inf + */ + if (cx == 2 || cy == 2) { + /* + * if z is also inf, either we have inf-inf or + * the result is the same as z depending on signs + */ + if (cz == 2) { + if ((int) ((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) < 0) { + dummy = inf; + dummy -= inf; + zz.i[0] = 0x7fffffff; + zz.i[1] = zz.i[2] = zz.i[3] = + 0xffffffff; + return (zz.q); + } + __fenv_setfsr(&fsr); + return (z); + } + + /* otherwise the result is inf with appropriate sign */ + zz.i[0] = ((xx.i[0] ^ yy.i[0]) & 0x80000000) | + 0x7fff0000; + zz.i[1] = zz.i[2] = zz.i[3] = 0; + __fenv_setfsr(&fsr); + return (zz.q); + } + + /* if z is inf, return it */ + if (cz == 2) { + __fenv_setfsr(&fsr); + return (z); + } + + /* + * now x, y, and z are all finite; handle cases where x or y + * is zero + */ + if (cx == 0 || cy == 0) { + /* either we have 0-0 or the result is the same as z */ + if (cz == 0 && (int) ((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) < + 0) { + zz.i[0] = (fsr >> 30) == FSR_RM ? 0x80000000 : + 0; + __fenv_setfsr(&fsr); + return (zz.q); + } + __fenv_setfsr(&fsr); + return (z); + } + + /* if we get here, x and y are nonzero finite, z must be zero */ + return (x * y); + } + + /* + * now x, y, and z are all finite and nonzero; set round-to- + * negative-infinity mode + */ + __fenv_setfsr(&fsr_rm); + + /* + * get the signs and exponents and normalize the significands + * of x and y + */ + sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000; + ex = hx >> 16; + hx &= 0xffff; + if (!ex) { + if (hx | (xx.i[1] & 0xfffe0000)) { + ex = 1; + } else if (xx.i[1] | (xx.i[2] & 0xfffe0000)) { + hx = xx.i[1]; + xx.i[1] = xx.i[2]; + xx.i[2] = xx.i[3]; + xx.i[3] = 0; + ex = -31; + } else if (xx.i[2] | (xx.i[3] & 0xfffe0000)) { + hx = xx.i[2]; + xx.i[1] = xx.i[3]; + xx.i[2] = xx.i[3] = 0; + ex = -63; + } else { + hx = xx.i[3]; + xx.i[1] = xx.i[2] = xx.i[3] = 0; + ex = -95; + } + while ((hx & 0x10000) == 0) { + hx = (hx << 1) | (xx.i[1] >> 31); + xx.i[1] = (xx.i[1] << 1) | (xx.i[2] >> 31); + xx.i[2] = (xx.i[2] << 1) | (xx.i[3] >> 31); + xx.i[3] <<= 1; + ex--; + } + } else + hx |= 0x10000; + ey = hy >> 16; + hy &= 0xffff; + if (!ey) { + if (hy | (yy.i[1] & 0xfffe0000)) { + ey = 1; + } else if (yy.i[1] | (yy.i[2] & 0xfffe0000)) { + hy = yy.i[1]; + yy.i[1] = yy.i[2]; + yy.i[2] = yy.i[3]; + yy.i[3] = 0; + ey = -31; + } else if (yy.i[2] | (yy.i[3] & 0xfffe0000)) { + hy = yy.i[2]; + yy.i[1] = yy.i[3]; + yy.i[2] = yy.i[3] = 0; + ey = -63; + } else { + hy = yy.i[3]; + yy.i[1] = yy.i[2] = yy.i[3] = 0; + ey = -95; + } + while ((hy & 0x10000) == 0) { + hy = (hy << 1) | (yy.i[1] >> 31); + yy.i[1] = (yy.i[1] << 1) | (yy.i[2] >> 31); + yy.i[2] = (yy.i[2] << 1) | (yy.i[3] >> 31); + yy.i[3] <<= 1; + ey--; + } + } else + hy |= 0x10000; + exy = ex + ey - 0x3fff; + + /* convert the significands of x and y to doubles */ + c = twom16; + dx[0] = (double) ((int) hx) * c; + dy[0] = (double) ((int) hy) * c; + + c *= twom24; + dx[1] = (double) ((int) (xx.i[1] >> 8)) * c; + dy[1] = (double) ((int) (yy.i[1] >> 8)) * c; + + c *= twom24; + dx[2] = (double) ((int) (((xx.i[1] << 16) | (xx.i[2] >> 16)) & + 0xffffff)) * c; + dy[2] = (double) ((int) (((yy.i[1] << 16) | (yy.i[2] >> 16)) & + 0xffffff)) * c; + + c *= twom24; + dx[3] = (double) ((int) (((xx.i[2] << 8) | (xx.i[3] >> 24)) & + 0xffffff)) * c; + dy[3] = (double) ((int) (((yy.i[2] << 8) | (yy.i[3] >> 24)) & + 0xffffff)) * c; + + c *= twom24; + dx[4] = (double) ((int) (xx.i[3] & 0xffffff)) * c; + dy[4] = (double) ((int) (yy.i[3] & 0xffffff)) * c; + + /* form the "digits" of the product */ + dxy[0] = dx[0] * dy[0]; + dxy[1] = dx[0] * dy[1] + dx[1] * dy[0]; + dxy[2] = dx[0] * dy[2] + dx[1] * dy[1] + dx[2] * dy[0]; + dxy[3] = dx[0] * dy[3] + dx[1] * dy[2] + dx[2] * dy[1] + + dx[3] * dy[0]; + dxy[4] = dx[0] * dy[4] + dx[1] * dy[3] + dx[2] * dy[2] + + dx[3] * dy[1] + dx[4] * dy[0]; + dxy[5] = dx[1] * dy[4] + dx[2] * dy[3] + dx[3] * dy[2] + + dx[4] * dy[1]; + dxy[6] = dx[2] * dy[4] + dx[3] * dy[3] + dx[4] * dy[2]; + dxy[7] = dx[3] * dy[4] + dx[4] * dy[3]; + dxy[8] = dx[4] * dy[4]; + + /* split odd-numbered terms and combine into even-numbered terms */ + c = (dxy[1] + two20) - two20; + dxy[0] += c; + dxy[1] -= c; + c = (dxy[3] + twom28) - twom28; + dxy[2] += c + dxy[1]; + dxy[3] -= c; + c = (dxy[5] + twom76) - twom76; + dxy[4] += c + dxy[3]; + dxy[5] -= c; + c = (dxy[7] + twom124) - twom124; + dxy[6] += c + dxy[5]; + dxy[8] += (dxy[7] - c); + + /* propagate carries, adjusting the exponent if need be */ + dxy[7] = dxy[6] + dxy[8]; + dxy[5] = dxy[4] + dxy[7]; + dxy[3] = dxy[2] + dxy[5]; + dxy[1] = dxy[0] + dxy[3]; + if (dxy[1] >= two) { + dxy[0] *= half; + dxy[1] *= half; + dxy[2] *= half; + dxy[3] *= half; + dxy[4] *= half; + dxy[5] *= half; + dxy[6] *= half; + dxy[7] *= half; + dxy[8] *= half; + exy++; + } + + /* extract the significand of x*y */ + s = two36; + u.d = c = dxy[1] + s; + xy0 = u.i[1]; + c -= s; + dxy[1] -= c; + dxy[0] -= c; + + s *= twom32; + u.d = c = dxy[1] + s; + xy1 = u.i[1]; + c -= s; + dxy[2] += (dxy[0] - c); + dxy[3] = dxy[2] + dxy[5]; + + s *= twom32; + u.d = c = dxy[3] + s; + xy2 = u.i[1]; + c -= s; + dxy[4] += (dxy[2] - c); + dxy[5] = dxy[4] + dxy[7]; + + s *= twom32; + u.d = c = dxy[5] + s; + xy3 = u.i[1]; + c -= s; + dxy[4] -= c; + dxy[5] = dxy[4] + dxy[7]; + + s *= twom32; + u.d = c = dxy[5] + s; + xy4 = u.i[1]; + c -= s; + dxy[6] += (dxy[4] - c); + dxy[7] = dxy[6] + dxy[8]; + + s *= twom32; + u.d = c = dxy[7] + s; + xy5 = u.i[1]; + c -= s; + dxy[8] += (dxy[6] - c); + + s *= twom32; + u.d = c = dxy[8] + s; + xy6 = u.i[1]; + c -= s; + dxy[8] -= c; + + s *= twom32; + u.d = c = dxy[8] + s; + xy7 = u.i[1]; + + /* extract the sign, exponent, and significand of z */ + sz = zz.i[0] & 0x80000000; + ez = hz >> 16; + z0 = hz & 0xffff; + if (!ez) { + if (z0 | (zz.i[1] & 0xfffe0000)) { + z1 = zz.i[1]; + z2 = zz.i[2]; + z3 = zz.i[3]; + ez = 1; + } else if (zz.i[1] | (zz.i[2] & 0xfffe0000)) { + z0 = zz.i[1]; + z1 = zz.i[2]; + z2 = zz.i[3]; + z3 = 0; + ez = -31; + } else if (zz.i[2] | (zz.i[3] & 0xfffe0000)) { + z0 = zz.i[2]; + z1 = zz.i[3]; + z2 = z3 = 0; + ez = -63; + } else { + z0 = zz.i[3]; + z1 = z2 = z3 = 0; + ez = -95; + } + while ((z0 & 0x10000) == 0) { + z0 = (z0 << 1) | (z1 >> 31); + z1 = (z1 << 1) | (z2 >> 31); + z2 = (z2 << 1) | (z3 >> 31); + z3 <<= 1; + ez--; + } + } else { + z0 |= 0x10000; + z1 = zz.i[1]; + z2 = zz.i[2]; + z3 = zz.i[3]; + } + z4 = z5 = z6 = z7 = 0; + + /* + * now x*y is represented by sxy, exy, and xy[0-7], and z is + * represented likewise; swap if need be so |xy| <= |z| + */ + if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && (xy1 > z1 || + (xy1 == z1 && (xy2 > z2 || (xy2 == z2 && (xy3 > z3 || + (xy3 == z3 && (xy4 | xy5 | xy6 | xy7) != 0)))))))))) { + e = sxy; sxy = sz; sz = e; + e = exy; exy = ez; ez = e; + e = xy0; xy0 = z0; z0 = e; + e = xy1; xy1 = z1; z1 = e; + e = xy2; xy2 = z2; z2 = e; + e = xy3; xy3 = z3; z3 = e; + z4 = xy4; xy4 = 0; + z5 = xy5; xy5 = 0; + z6 = xy6; xy6 = 0; + z7 = xy7; xy7 = 0; + } + + /* shift the significand of xy keeping a sticky bit */ + e = ez - exy; + if (e > 236) { + xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0; + xy7 = 1; + } else if (e >= 224) { + sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | xy1 | + ((xy0 << 1) << (255 - e)); + xy7 = xy0 >> (e - 224); + if (sticky) + xy7 |= 1; + xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0; + } else if (e >= 192) { + sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | + ((xy1 << 1) << (223 - e)); + xy7 = (xy1 >> (e - 192)) | ((xy0 << 1) << (223 - e)); + if (sticky) + xy7 |= 1; + xy6 = xy0 >> (e - 192); + xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = 0; + } else if (e >= 160) { + sticky = xy7 | xy6 | xy5 | xy4 | xy3 | + ((xy2 << 1) << (191 - e)); + xy7 = (xy2 >> (e - 160)) | ((xy1 << 1) << (191 - e)); + if (sticky) + xy7 |= 1; + xy6 = (xy1 >> (e - 160)) | ((xy0 << 1) << (191 - e)); + xy5 = xy0 >> (e - 160); + xy0 = xy1 = xy2 = xy3 = xy4 = 0; + } else if (e >= 128) { + sticky = xy7 | xy6 | xy5 | xy4 | ((xy3 << 1) << (159 - e)); + xy7 = (xy3 >> (e - 128)) | ((xy2 << 1) << (159 - e)); + if (sticky) + xy7 |= 1; + xy6 = (xy2 >> (e - 128)) | ((xy1 << 1) << (159 - e)); + xy5 = (xy1 >> (e - 128)) | ((xy0 << 1) << (159 - e)); + xy4 = xy0 >> (e - 128); + xy0 = xy1 = xy2 = xy3 = 0; + } else if (e >= 96) { + sticky = xy7 | xy6 | xy5 | ((xy4 << 1) << (127 - e)); + xy7 = (xy4 >> (e - 96)) | ((xy3 << 1) << (127 - e)); + if (sticky) + xy7 |= 1; + xy6 = (xy3 >> (e - 96)) | ((xy2 << 1) << (127 - e)); + xy5 = (xy2 >> (e - 96)) | ((xy1 << 1) << (127 - e)); + xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e)); + xy3 = xy0 >> (e - 96); + xy0 = xy1 = xy2 = 0; + } else if (e >= 64) { + sticky = xy7 | xy6 | ((xy5 << 1) << (95 - e)); + xy7 = (xy5 >> (e - 64)) | ((xy4 << 1) << (95 - e)); + if (sticky) + xy7 |= 1; + xy6 = (xy4 >> (e - 64)) | ((xy3 << 1) << (95 - e)); + xy5 = (xy3 >> (e - 64)) | ((xy2 << 1) << (95 - e)); + xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e)); + xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e)); + xy2 = xy0 >> (e - 64); + xy0 = xy1 = 0; + } else if (e >= 32) { + sticky = xy7 | ((xy6 << 1) << (63 - e)); + xy7 = (xy6 >> (e - 32)) | ((xy5 << 1) << (63 - e)); + if (sticky) + xy7 |= 1; + xy6 = (xy5 >> (e - 32)) | ((xy4 << 1) << (63 - e)); + xy5 = (xy4 >> (e - 32)) | ((xy3 << 1) << (63 - e)); + xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e)); + xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e)); + xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e)); + xy1 = xy0 >> (e - 32); + xy0 = 0; + } else if (e) { + sticky = (xy7 << 1) << (31 - e); + xy7 = (xy7 >> e) | ((xy6 << 1) << (31 - e)); + if (sticky) + xy7 |= 1; + xy6 = (xy6 >> e) | ((xy5 << 1) << (31 - e)); + xy5 = (xy5 >> e) | ((xy4 << 1) << (31 - e)); + xy4 = (xy4 >> e) | ((xy3 << 1) << (31 - e)); + xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e)); + xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e)); + xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e)); + xy0 >>= e; + } + + /* if this is a magnitude subtract, negate the significand of xy */ + if (sxy ^ sz) { + xy0 = ~xy0; + xy1 = ~xy1; + xy2 = ~xy2; + xy3 = ~xy3; + xy4 = ~xy4; + xy5 = ~xy5; + xy6 = ~xy6; + xy7 = -xy7; + if (xy7 == 0) + if (++xy6 == 0) + if (++xy5 == 0) + if (++xy4 == 0) + if (++xy3 == 0) + if (++xy2 == 0) + if (++xy1 == 0) + xy0++; + } + + /* add, propagating carries */ + z7 += xy7; + e = (z7 < xy7); + z6 += xy6; + if (e) { + z6++; + e = (z6 <= xy6); + } else + e = (z6 < xy6); + z5 += xy5; + if (e) { + z5++; + e = (z5 <= xy5); + } else + e = (z5 < xy5); + z4 += xy4; + if (e) { + z4++; + e = (z4 <= xy4); + } else + e = (z4 < xy4); + z3 += xy3; + if (e) { + z3++; + e = (z3 <= xy3); + } else + e = (z3 < xy3); + z2 += xy2; + if (e) { + z2++; + e = (z2 <= xy2); + } else + e = (z2 < xy2); + z1 += xy1; + if (e) { + z1++; + e = (z1 <= xy1); + } else + e = (z1 < xy1); + z0 += xy0; + if (e) + z0++; + + /* postnormalize and collect rounding information into z4 */ + if (ez < 1) { + /* result is tiny; shift right until exponent is within range */ + e = 1 - ez; + if (e > 116) { + z4 = 1; /* result can't be exactly zero */ + z0 = z1 = z2 = z3 = 0; + } else if (e >= 96) { + sticky = z7 | z6 | z5 | z4 | z3 | z2 | + ((z1 << 1) << (127 - e)); + z4 = (z1 >> (e - 96)) | ((z0 << 1) << (127 - e)); + if (sticky) + z4 |= 1; + z3 = z0 >> (e - 96); + z0 = z1 = z2 = 0; + } else if (e >= 64) { + sticky = z7 | z6 | z5 | z4 | z3 | + ((z2 << 1) << (95 - e)); + z4 = (z2 >> (e - 64)) | ((z1 << 1) << (95 - e)); + if (sticky) + z4 |= 1; + z3 = (z1 >> (e - 64)) | ((z0 << 1) << (95 - e)); + z2 = z0 >> (e - 64); + z0 = z1 = 0; + } else if (e >= 32) { + sticky = z7 | z6 | z5 | z4 | ((z3 << 1) << (63 - e)); + z4 = (z3 >> (e - 32)) | ((z2 << 1) << (63 - e)); + if (sticky) + z4 |= 1; + z3 = (z2 >> (e - 32)) | ((z1 << 1) << (63 - e)); + z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e)); + z1 = z0 >> (e - 32); + z0 = 0; + } else { + sticky = z7 | z6 | z5 | (z4 << 1) << (31 - e); + z4 = (z4 >> e) | ((z3 << 1) << (31 - e)); + if (sticky) + z4 |= 1; + z3 = (z3 >> e) | ((z2 << 1) << (31 - e)); + z2 = (z2 >> e) | ((z1 << 1) << (31 - e)); + z1 = (z1 >> e) | ((z0 << 1) << (31 - e)); + z0 >>= e; + } + ez = 1; + } else if (z0 >= 0x20000) { + /* carry out; shift right by one */ + sticky = (z4 & 1) | z5 | z6 | z7; + z4 = (z4 >> 1) | (z3 << 31); + if (sticky) + z4 |= 1; + z3 = (z3 >> 1) | (z2 << 31); + z2 = (z2 >> 1) | (z1 << 31); + z1 = (z1 >> 1) | (z0 << 31); + z0 >>= 1; + ez++; + } else { + if (z0 < 0x10000 && (z0 | z1 | z2 | z3 | z4 | z5 | z6 | z7) + != 0) { + /* + * borrow/cancellation; shift left as much as + * exponent allows + */ + while (!(z0 | (z1 & 0xfffe0000)) && ez >= 33) { + z0 = z1; + z1 = z2; + z2 = z3; + z3 = z4; + z4 = z5; + z5 = z6; + z6 = z7; + z7 = 0; + ez -= 32; + } + while (z0 < 0x10000 && ez > 1) { + z0 = (z0 << 1) | (z1 >> 31); + z1 = (z1 << 1) | (z2 >> 31); + z2 = (z2 << 1) | (z3 >> 31); + z3 = (z3 << 1) | (z4 >> 31); + z4 = (z4 << 1) | (z5 >> 31); + z5 = (z5 << 1) | (z6 >> 31); + z6 = (z6 << 1) | (z7 >> 31); + z7 <<= 1; + ez--; + } + } + if (z5 | z6 | z7) + z4 |= 1; + } + + /* get the rounding mode */ + rm = fsr >> 30; + + /* strip off the integer bit, if there is one */ + ibit = z0 & 0x10000; + if (ibit) + z0 -= 0x10000; + else { + ez = 0; + if (!(z0 | z1 | z2 | z3 | z4)) { /* exact zero */ + zz.i[0] = rm == FSR_RM ? 0x80000000 : 0; + zz.i[1] = zz.i[2] = zz.i[3] = 0; + __fenv_setfsr(&fsr); + return (zz.q); + } + } + + /* + * flip the sense of directed roundings if the result is negative; + * the logic below applies to a positive result + */ + if (sz) + rm ^= rm >> 1; + + /* round and raise exceptions */ + if (z4) { + fsr |= FSR_NXC; + + /* decide whether to round the fraction up */ + if (rm == FSR_RP || (rm == FSR_RN && (z4 > 0x80000000u || + (z4 == 0x80000000u && (z3 & 1))))) { + /* round up and renormalize if necessary */ + if (++z3 == 0) + if (++z2 == 0) + if (++z1 == 0) + if (++z0 == 0x10000) { + z0 = 0; + ez++; + } + } + } + + /* check for under/overflow */ + if (ez >= 0x7fff) { + if (rm == FSR_RN || rm == FSR_RP) { + zz.i[0] = sz | 0x7fff0000; + zz.i[1] = zz.i[2] = zz.i[3] = 0; + } else { + zz.i[0] = sz | 0x7ffeffff; + zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff; + } + fsr |= FSR_OFC | FSR_NXC; + } else { + zz.i[0] = sz | (ez << 16) | z0; + zz.i[1] = z1; + zz.i[2] = z2; + zz.i[3] = z3; + + /* + * !ibit => exact result was tiny before rounding, + * z4 nonzero => result delivered is inexact + */ + if (!ibit) { + if (z4) + fsr |= FSR_UFC | FSR_NXC; + else if (fsr & FSR_UFM) + fsr |= FSR_UFC; + } + } + + /* restore the fsr and emulate exceptions as needed */ + if ((fsr & FSR_CEXC) & (fsr >> 23)) { + __fenv_setfsr(&fsr); + if (fsr & FSR_OFC) { + dummy = huge; + dummy *= huge; + } else if (fsr & FSR_UFC) { + dummy = tiny; + if (fsr & FSR_NXC) + dummy *= tiny; + else + dummy -= tiny2; + } else { + dummy = huge; + dummy += tiny; + } + } else { + fsr |= (fsr & 0x1f) << 5; + __fenv_setfsr(&fsr); + } + return (zz.q); +} + +#elif defined(__i386) + +static const union { + unsigned i[2]; + double d; +} C[] = { + { 0, 0x3fe00000u }, + { 0, 0x40000000u }, + { 0, 0x3df00000u }, + { 0, 0x3bf00000u }, + { 0, 0x41f00000u }, + { 0, 0x43e00000u }, + { 0, 0x7fe00000u }, + { 0, 0x00100000u }, + { 0, 0x00100001u } +}; + +#define half C[0].d +#define two C[1].d +#define twom32 C[2].d +#define twom64 C[3].d +#define two32 C[4].d +#define two63 C[5].d +#define huge C[6].d +#define tiny C[7].d +#define tiny2 C[8].d + +#if defined(__amd64) +#define NI 4 +#else +#define NI 3 +#endif + +/* + * fmal for x86: 80-bit extended double precision, little-endian + */ +long double +__fmal(long double x, long double y, long double z) { + union { + unsigned i[NI]; + long double e; + } xx, yy, zz; + long double xhi, yhi, xlo, ylo, t; + unsigned xy0, xy1, xy2, xy3, xy4, z0, z1, z2, z3, z4; + unsigned oldcwsw, cwsw, rm, sticky, carry; + int ex, ey, ez, exy, sxy, sz, e, tinyafter; + volatile double dummy; + + /* extract the exponents of the arguments */ + xx.e = x; + yy.e = y; + zz.e = z; + ex = xx.i[2] & 0x7fff; + ey = yy.i[2] & 0x7fff; + ez = zz.i[2] & 0x7fff; + + /* dispense with inf, nan, and zero cases */ + if (ex == 0x7fff || ey == 0x7fff || (ex | xx.i[1] | xx.i[0]) == 0 || + (ey | yy.i[1] | yy.i[0]) == 0) /* x or y is inf, nan, or 0 */ + return (x * y + z); + + if (ez == 0x7fff) /* z is inf or nan */ + return (x + z); /* avoid spurious under/overflow in x * y */ + + if ((ez | zz.i[1] | zz.i[0]) == 0) /* z is zero */ + /* + * x * y isn't zero but could underflow to zero, + * so don't add z, lest we perturb the sign + */ + return (x * y); + + /* + * now x, y, and z are all finite and nonzero; extract signs and + * normalize the significands (this will raise the denormal operand + * exception if need be) + */ + sxy = (xx.i[2] ^ yy.i[2]) & 0x8000; + sz = zz.i[2] & 0x8000; + if (!ex) { + xx.e = x * two63; + ex = (xx.i[2] & 0x7fff) - 63; + } + if (!ey) { + yy.e = y * two63; + ey = (yy.i[2] & 0x7fff) - 63; + } + if (!ez) { + zz.e = z * two63; + ez = (zz.i[2] & 0x7fff) - 63; + } + + /* + * save the control and status words, mask all exceptions, and + * set rounding to 64-bit precision and toward-zero + */ + __fenv_getcwsw(&oldcwsw); + cwsw = (oldcwsw & 0xf0c0ffff) | 0x0f3f0000; + __fenv_setcwsw(&cwsw); + + /* multiply x*y to 128 bits */ + exy = ex + ey - 0x3fff; + xx.i[2] = 0x3fff; + yy.i[2] = 0x3fff; + x = xx.e; + y = yy.e; + xhi = ((x + twom32) + two32) - two32; + yhi = ((y + twom32) + two32) - two32; + xlo = x - xhi; + ylo = y - yhi; + x *= y; + y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo; + if (x >= two) { + x *= half; + y *= half; + exy++; + } + + /* extract the significands */ + xx.e = x; + xy0 = xx.i[1]; + xy1 = xx.i[0]; + yy.e = t = y + twom32; + xy2 = yy.i[0]; + yy.e = (y - (t - twom32)) + twom64; + xy3 = yy.i[0]; + xy4 = 0; + z0 = zz.i[1]; + z1 = zz.i[0]; + z2 = z3 = z4 = 0; + + /* + * now x*y is represented by sxy, exy, and xy[0-4], and z is + * represented likewise; swap if need be so |xy| <= |z| + */ + if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && + (xy1 > z1 || (xy1 == z1 && (xy2 | xy3) != 0)))))) { + e = sxy; sxy = sz; sz = e; + e = exy; exy = ez; ez = e; + e = xy0; xy0 = z0; z0 = e; + e = xy1; xy1 = z1; z1 = e; + z2 = xy2; xy2 = 0; + z3 = xy3; xy3 = 0; + } + + /* shift the significand of xy keeping a sticky bit */ + e = ez - exy; + if (e > 130) { + xy0 = xy1 = xy2 = xy3 = 0; + xy4 = 1; + } else if (e >= 128) { + sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (159 - e)); + xy4 = xy0 >> (e - 128); + if (sticky) + xy4 |= 1; + xy0 = xy1 = xy2 = xy3 = 0; + } else if (e >= 96) { + sticky = xy3 | xy2 | ((xy1 << 1) << (127 - e)); + xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e)); + if (sticky) + xy4 |= 1; + xy3 = xy0 >> (e - 96); + xy0 = xy1 = xy2 = 0; + } else if (e >= 64) { + sticky = xy3 | ((xy2 << 1) << (95 - e)); + xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e)); + if (sticky) + xy4 |= 1; + xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e)); + xy2 = xy0 >> (e - 64); + xy0 = xy1 = 0; + } else if (e >= 32) { + sticky = (xy3 << 1) << (63 - e); + xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e)); + if (sticky) + xy4 |= 1; + xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e)); + xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e)); + xy1 = xy0 >> (e - 32); + xy0 = 0; + } else if (e) { + xy4 = (xy3 << 1) << (31 - e); + xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e)); + xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e)); + xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e)); + xy0 >>= e; + } + + /* if this is a magnitude subtract, negate the significand of xy */ + if (sxy ^ sz) { + xy0 = ~xy0; + xy1 = ~xy1; + xy2 = ~xy2; + xy3 = ~xy3; + xy4 = -xy4; + if (xy4 == 0) + if (++xy3 == 0) + if (++xy2 == 0) + if (++xy1 == 0) + xy0++; + } + + /* add, propagating carries */ + z4 += xy4; + carry = (z4 < xy4); + z3 += xy3; + if (carry) { + z3++; + carry = (z3 <= xy3); + } else + carry = (z3 < xy3); + z2 += xy2; + if (carry) { + z2++; + carry = (z2 <= xy2); + } else + carry = (z2 < xy2); + z1 += xy1; + if (carry) { + z1++; + carry = (z1 <= xy1); + } else + carry = (z1 < xy1); + z0 += xy0; + if (carry) { + z0++; + carry = (z0 <= xy0); + } else + carry = (z0 < xy0); + + /* for a magnitude subtract, ignore the last carry out */ + if (sxy ^ sz) + carry = 0; + + /* postnormalize and collect rounding information into z2 */ + if (ez < 1) { + /* result is tiny; shift right until exponent is within range */ + e = 1 - ez; + if (e > 67) { + z2 = 1; /* result can't be exactly zero */ + z0 = z1 = 0; + } else if (e >= 64) { + sticky = z4 | z3 | z2 | z1 | ((z0 << 1) << (95 - e)); + z2 = (z0 >> (e - 64)) | ((carry << 1) << (95 - e)); + if (sticky) + z2 |= 1; + z1 = carry >> (e - 64); + z0 = 0; + } else if (e >= 32) { + sticky = z4 | z3 | z2 | ((z1 << 1) << (63 - e)); + z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e)); + if (sticky) + z2 |= 1; + z1 = (z0 >> (e - 32)) | ((carry << 1) << (63 - e)); + z0 = carry >> (e - 32); + } else { + sticky = z4 | z3 | (z2 << 1) << (31 - e); + z2 = (z2 >> e) | ((z1 << 1) << (31 - e)); + if (sticky) + z2 |= 1; + z1 = (z1 >> e) | ((z0 << 1) << (31 - e)); + z0 = (z0 >> e) | ((carry << 1) << (31 - e)); + } + ez = 1; + } else if (carry) { + /* carry out; shift right by one */ + sticky = (z2 & 1) | z3 | z4; + z2 = (z2 >> 1) | (z1 << 31); + if (sticky) + z2 |= 1; + z1 = (z1 >> 1) | (z0 << 31); + z0 = (z0 >> 1) | 0x80000000; + ez++; + } else { + if (z0 < 0x80000000u && (z0 | z1 | z2 | z3 | z4) != 0) { + /* + * borrow/cancellation; shift left as much as + * exponent allows + */ + while (!z0 && ez >= 33) { + z0 = z1; + z1 = z2; + z2 = z3; + z3 = z4; + z4 = 0; + ez -= 32; + } + while (z0 < 0x80000000u && ez > 1) { + z0 = (z0 << 1) | (z1 >> 31); + z1 = (z1 << 1) | (z2 >> 31); + z2 = (z2 << 1) | (z3 >> 31); + z3 = (z3 << 1) | (z4 >> 31); + z4 <<= 1; + ez--; + } + } + if (z3 | z4) + z2 |= 1; + } + + /* get the rounding mode */ + rm = oldcwsw & 0x0c000000; + + /* adjust exponent if result is subnormal */ + tinyafter = 0; + if (!(z0 & 0x80000000)) { + ez = 0; + tinyafter = 1; + if (!(z0 | z1 | z2)) { /* exact zero */ + zz.i[2] = rm == FCW_RM ? 0x8000 : 0; + zz.i[1] = zz.i[0] = 0; + __fenv_setcwsw(&oldcwsw); + return (zz.e); + } + } + + /* + * flip the sense of directed roundings if the result is negative; + * the logic below applies to a positive result + */ + if (sz && (rm == FCW_RM || rm == FCW_RP)) + rm = (FCW_RM + FCW_RP) - rm; + + /* round */ + if (z2) { + if (rm == FCW_RP || (rm == FCW_RN && (z2 > 0x80000000u || + (z2 == 0x80000000u && (z1 & 1))))) { + /* round up and renormalize if necessary */ + if (++z1 == 0) { + if (++z0 == 0) { + z0 = 0x80000000; + ez++; + } else if (z0 == 0x80000000) { + /* rounded up to smallest normal */ + ez = 1; + if ((rm == FCW_RP && z2 > + 0x80000000u) || (rm == FCW_RN && + z2 >= 0xc0000000u)) + /* + * would have rounded up to + * smallest normal even with + * unbounded range + */ + tinyafter = 0; + } + } + } + } + + /* restore the control and status words, check for over/underflow */ + __fenv_setcwsw(&oldcwsw); + if (ez >= 0x7fff) { + if (rm == FCW_RN || rm == FCW_RP) { + zz.i[2] = sz | 0x7fff; + zz.i[1] = 0x80000000; + zz.i[0] = 0; + } else { + zz.i[2] = sz | 0x7ffe; + zz.i[1] = 0xffffffff; + zz.i[0] = 0xffffffff; + } + dummy = huge; + dummy *= huge; + } else { + zz.i[2] = sz | ez; + zz.i[1] = z0; + zz.i[0] = z1; + + /* + * tinyafter => result rounded w/ unbounded range would be tiny, + * z2 nonzero => result delivered is inexact + */ + if (tinyafter) { + dummy = tiny; + if (z2) + dummy *= tiny; + else + dummy -= tiny2; + } else if (z2) { + dummy = huge; + dummy += tiny; + } + } + + return (zz.e); +} + +#else +#error Unknown architecture +#endif diff --git a/usr/src/libm/src/m9x/fmax.c b/usr/src/libm/src/m9x/fmax.c new file mode 100644 index 0000000..454a38b --- /dev/null +++ b/usr/src/libm/src/m9x/fmax.c @@ -0,0 +1,79 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fmax.c 1.6 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fmax = __fmax +#endif + +/* + * fmax(x,y) returns the larger of x and y. If just one of the + * arguments is NaN, fmax returns the other argument. If both + * arguments are NaN, fmax returns NaN. + * + * See fmaxf.c for a discussion of implementation trade-offs. + */ + +#include "libm.h" /* for isgreaterequal macro */ + +double +__fmax(double x, double y) { + union { + unsigned i[2]; + double d; + } xx, yy; + unsigned s; + + /* if y is nan, replace it by x */ + if (y != y) + y = x; + + /* if x is less than y or x and y are unordered, replace x by y */ +#if defined(COMPARISON_MACRO_BUG) + if (x != x || x < y) +#else + if (!isgreaterequal(x, y)) +#endif + x = y; + + /* + * now x and y are either both NaN or both numeric; clear the + * sign of the result if either x or y has its sign clear + */ + xx.d = x; + yy.d = y; +#if defined(__sparc) + s = ~(xx.i[0] & yy.i[0]) & 0x80000000; + xx.i[0] &= ~s; +#elif defined(__i386) + s = ~(xx.i[1] & yy.i[1]) & 0x80000000; + xx.i[1] &= ~s; +#else +#error Unknown architecture +#endif + return (xx.d); +} diff --git a/usr/src/libm/src/m9x/fmaxf.c b/usr/src/libm/src/m9x/fmaxf.c new file mode 100644 index 0000000..f5b570d --- /dev/null +++ b/usr/src/libm/src/m9x/fmaxf.c @@ -0,0 +1,143 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fmaxf.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fmaxf = __fmaxf +#endif + +/* + * fmax(x,y) returns the larger of x and y. If just one of the + * arguments is NaN, fmax returns the other argument. If both + * arguments are NaN, fmax returns NaN (ideally, one of the + * argument NaNs). + * + * C99 does not require that fmax(-0,+0) = fmax(+0,-0) = +0, but + * ideally fmax should satisfy this. + * + * C99 makes no mention of exceptions for fmax. I suppose ideally + * either fmax never raises any exceptions or else it raises the + * invalid operation exception if and only if some argument is a + * signaling NaN. In the former case, fmax should always return + * one of its arguments. In the latter, fmax shouldn't return a + * signaling NaN, although when both arguments are signaling NaNs, + * this ideal is at odds with the stipulation that fmax should + * always return one of its arguments. + * + * Commutativity of fmax follows from the properties listed above + * except when both arguments are NaN. In that case, fmax may be + * declared commutative by fiat because there is no portable way + * to tell different NaNs apart. Ideally fmax would be truly com- + * mutative for all arguments. + * + * On SPARC V8, fmax must involve tests and branches. Ideally, + * an implementation on SPARC V9 should avoid branching, using + * conditional moves instead where necessary, and be as efficient + * as possible in its use of other resources. + * + * It appears to be impossible to attain all of the aforementioned + * ideals simultaneously. The implementation below satisfies the + * following (on SPARC): + * + * 1. fmax(x,y) returns the larger of x and y if neither x nor y + * is NaN and the non-NaN argument if just one of x or y is NaN. + * If both x and y are NaN, fmax(x,y) returns x unchanged. + * 2. fmax(-0,+0) = fmax(+0,-0) = +0. + * 3. If either argument is a signaling NaN, fmax raises the invalid + * operation exception. Otherwise, it raises no exceptions. + */ + +#include "libm.h" /* for isgreaterequal macro */ + +float +__fmaxf(float x, float y) { + /* + * On SPARC v8plus/v9, this could be implemented as follows + * (assuming %f0 = x, %f1 = y, return value left in %f0): + * + * fcmps %fcc0,%f1,%f1 + * fmovsu %fcc0,%f0,%f1 + * fcmps %fcc0,%f0,%f1 + * fmovsul %fcc0,%f1,%f0 + * st %f0,[x] + * st %f1,[y] + * ld [x],%l0 + * ld [y],%l1 + * and %l0,%l1,%l2 + * sethi %hi(0x80000000),%l3 + * andn %l3,%l2,%l2 + * andn %l0,%l2,%l0 + * st %l0,[x] + * ld [x],%f0 + * + * If VIS instructions are available, use this code instead: + * + * fcmps %fcc0,%f1,%f1 + * fmovsu %fcc0,%f0,%f1 + * fcmps %fcc0,%f0,%f1 + * fmovsul %fcc0,%f1,%f0 + * fands %f0,%f1,%f2 + * fzeros %f3 + * fnegs %f3,%f3 + * fandnot2s %f3,%f2,%f2 + * fandnot2s %f0,%f2,%f0 + * + * If VIS 3.0 instructions are available, use this: + * + * flcmps %fcc0,%f0,%f1 + * fmovslg %fcc0,%f1,%f0 ! move if %fcc0 is 1 or 2 + */ + + union { + unsigned i; + float f; + } xx, yy; + unsigned s; + + /* if y is nan, replace it by x */ + if (y != y) + y = x; + + /* if x is less than y or x and y are unordered, replace x by y */ +#if defined(COMPARISON_MACRO_BUG) + if (x != x || x < y) +#else + if (!isgreaterequal(x, y)) +#endif + x = y; + + /* + * now x and y are either both NaN or both numeric; clear the + * sign of the result if either x or y has its sign clear + */ + xx.f = x; + yy.f = y; + s = ~(xx.i & yy.i) & 0x80000000; + xx.i &= ~s; + + return (xx.f); +} diff --git a/usr/src/libm/src/m9x/fmaxl.c b/usr/src/libm/src/m9x/fmaxl.c new file mode 100644 index 0000000..68a236e --- /dev/null +++ b/usr/src/libm/src/m9x/fmaxl.c @@ -0,0 +1,78 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fmaxl.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fmaxl = __fmaxl +#endif + +#include "libm.h" /* for isgreaterequal macro */ + +long double +__fmaxl(long double x, long double y) { + union { +#if defined(__sparc) + unsigned i[4]; +#elif defined(__i386) + unsigned i[3]; +#else +#error Unknown architecture +#endif + long double ld; + } xx, yy; + unsigned s; + + /* if y is nan, replace it by x */ + if (y != y) + y = x; + + /* if x is less than y or x and y are unordered, replace x by y */ +#if defined(COMPARISON_MACRO_BUG) + if (x != x || x < y) +#else + if (!isgreaterequal(x, y)) +#endif + x = y; + + /* + * now x and y are either both NaN or both numeric; clear the + * sign of the result if either x or y has its sign clear + */ + xx.ld = x; + yy.ld = y; +#if defined(__sparc) + s = ~(xx.i[0] & yy.i[0]) & 0x80000000; + xx.i[0] &= ~s; +#elif defined(__i386) + s = ~(xx.i[2] & yy.i[2]) & 0x8000; + xx.i[2] &= ~s; +#else +#error Unknown architecture +#endif + + return (xx.ld); +} diff --git a/usr/src/libm/src/m9x/fmin.c b/usr/src/libm/src/m9x/fmin.c new file mode 100644 index 0000000..24ad2b5 --- /dev/null +++ b/usr/src/libm/src/m9x/fmin.c @@ -0,0 +1,80 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fmin.c 1.6 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fmin = __fmin +#endif + +/* + * fmin(x,y) returns the smaller of x and y. If just one of the + * arguments is NaN, fmin returns the other argument. If both + * arguments are NaN, fmin returns NaN. + * + * See fmaxf.c for a discussion of implementation trade-offs. + */ + +#include "libm.h" /* for islessequal macro */ + +double +__fmin(double x, double y) { + union { + unsigned i[2]; + double d; + } xx, yy; + unsigned s; + + /* if y is nan, replace it by x */ + if (y != y) + y = x; + + /* if x is greater than y or x and y are unordered, replace x by y */ +#if defined(COMPARISON_MACRO_BUG) + if (x != x || x > y) +#else + if (!islessequal(x, y)) +#endif + x = y; + + /* + * now x and y are either both NaN or both numeric; set the + * sign of the result if either x or y has its sign set + */ + xx.d = x; + yy.d = y; +#if defined(__sparc) + s = (xx.i[0] | yy.i[0]) & 0x80000000; + xx.i[0] |= s; +#elif defined(__i386) + s = (xx.i[1] | yy.i[1]) & 0x80000000; + xx.i[1] |= s; +#else +#error Unknown architecture +#endif + + return (xx.d); +} diff --git a/usr/src/libm/src/m9x/fminf.c b/usr/src/libm/src/m9x/fminf.c new file mode 100644 index 0000000..8c1c7c8 --- /dev/null +++ b/usr/src/libm/src/m9x/fminf.c @@ -0,0 +1,102 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fminf.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fminf = __fminf +#endif + +#include "libm.h" /* for islessequal macro */ + +float +__fminf(float x, float y) { + /* + * On SPARC v8plus/v9, this could be implemented as follows + * (assuming %f0 = x, %f1 = y, return value left in %f0): + * + * fcmps %fcc0,%f1,%f1 + * fmovsu %fcc0,%f0,%f1 + * fcmps %fcc0,%f0,%f1 + * fmovsug %fcc0,%f1,%f0 + * st %f0,[x] + * st %f1,[y] + * ld [x],%l0 + * ld [y],%l1 + * or %l0,%l1,%l2 + * sethi %hi(0x80000000),%l3 + * and %l3,%l2,%l2 + * or %l0,%l2,%l0 + * st %l0,[x] + * ld [x],%f0 + * + * If VIS instructions are available, use this code instead: + * + * fcmps %fcc0,%f1,%f1 + * fmovsu %fcc0,%f0,%f1 + * fcmps %fcc0,%f0,%f1 + * fmovsug %fcc0,%f1,%f0 + * fors %f0,%f1,%f2 + * fzeros %f3 + * fnegs %f3,%f3 + * fands %f3,%f2,%f2 + * fors %f0,%f2,%f0 + * + * If VIS 3.0 instructions are available, use this: + * + * flcmps %fcc0,%f0,%f1 + * fmovsge %fcc0,%f1,%f0 ! move if %fcc0 is 0 or 2 + */ + + union { + unsigned i; + float f; + } xx, yy; + unsigned s; + + /* if y is nan, replace it by x */ + if (y != y) + y = x; + + /* if x is greater than y or x and y are unordered, replace x by y */ +#if defined(COMPARISON_MACRO_BUG) + if (x != x || x > y) +#else + if (!islessequal(x, y)) +#endif + x = y; + + /* + * now x and y are either both NaN or both numeric; set the + * sign of the result if either x or y has its sign set + */ + xx.f = x; + yy.f = y; + s = (xx.i | yy.i) & 0x80000000; + xx.i |= s; + + return (xx.f); +} diff --git a/usr/src/libm/src/m9x/fminl.c b/usr/src/libm/src/m9x/fminl.c new file mode 100644 index 0000000..07944b6 --- /dev/null +++ b/usr/src/libm/src/m9x/fminl.c @@ -0,0 +1,78 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)fminl.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak fminl = __fminl +#endif + +#include "libm.h" /* for islessequal macro */ + +long double +__fminl(long double x, long double y) { + union { +#if defined(__sparc) + unsigned i[4]; +#elif defined(__i386) + unsigned i[3]; +#else +#error Unknown architecture +#endif + long double ld; + } xx, yy; + unsigned s; + + /* if y is nan, replace it by x */ + if (y != y) + y = x; + + /* if x is greater than y or x and y are unordered, replace x by y */ +#if defined(COMPARISON_MACRO_BUG) + if (x != x || x > y) +#else + if (!islessequal(x, y)) +#endif + x = y; + + /* + * now x and y are either both NaN or both numeric; set the + * sign of the result if either x or y has its sign set + */ + xx.ld = x; + yy.ld = y; +#if defined(__sparc) + s = (xx.i[0] | yy.i[0]) & 0x80000000; + xx.i[0] |= s; +#elif defined(__i386) + s = (xx.i[2] | yy.i[2]) & 0x8000; + xx.i[2] |= s; +#else +#error Unknown architecture +#endif + + return (xx.ld); +} diff --git a/usr/src/libm/src/m9x/frexp.c b/usr/src/libm/src/m9x/frexp.c new file mode 100644 index 0000000..a5f7ebc --- /dev/null +++ b/usr/src/libm/src/m9x/frexp.c @@ -0,0 +1,102 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)frexp.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak frexp = __frexp +#endif + +/* + * frexp(x, exp) returns the normalized significand of x and sets + * *exp so that x = r*2^(*exp) where r is the return value. If x + * is finite and nonzero, 1/2 <= |r| < 1. + * + * If x is zero, infinite or NaN, frexp returns x and sets *exp = 0. + * (The relevant standards do not specify *exp when x is infinite or + * NaN, but this code sets it anyway.) + * + * If x is a signaling NaN, this code returns x without attempting + * to raise the invalid operation exception. If x is subnormal, + * this code treats it as nonzero regardless of nonstandard mode. + */ + +#include "libm.h" + +double +__frexp(double x, int *exp) { + union { + unsigned i[2]; + double d; + } xx, yy; + double t; + unsigned hx; + int e; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + + if (hx >= 0x7ff00000) { /* x is infinite or NaN */ + *exp = 0; + return (x); + } + + e = 0; + if (hx < 0x00100000) { /* x is subnormal or zero */ + if ((hx | xx.i[LOWORD]) == 0) { + *exp = 0; + return (x); + } + + /* + * normalize x by regarding it as an integer + * + * Here we use 32-bit integer arithmetic to avoid trapping + * or emulating 64-bit arithmetic. If 64-bit arithmetic is + * available (e.g., in SPARC V9), do this instead: + * + * long lx = ((long) hx << 32) | xx.i[LOWORD]; + * xx.d = (xx.i[HIWORD] < 0)? -lx : lx; + * + * If subnormal arithmetic doesn't trap, just multiply x by + * a power of two. + */ + yy.i[HIWORD] = 0x43300000 | hx; + yy.i[LOWORD] = xx.i[LOWORD]; + t = yy.d; + yy.i[HIWORD] = 0x43300000; + yy.i[LOWORD] = 0; + t -= yy.d; /* t = |x| scaled */ + xx.d = ((int)xx.i[HIWORD] < 0)? -t : t; + hx = xx.i[HIWORD] & ~0x80000000; + e = -1074; + } + + /* now xx.d is normal */ + xx.i[HIWORD] = (xx.i[HIWORD] & ~0x7ff00000) | 0x3fe00000; + *exp = e + (hx >> 20) - 0x3fe; + return (xx.d); +} diff --git a/usr/src/libm/src/m9x/frexpf.c b/usr/src/libm/src/m9x/frexpf.c new file mode 100644 index 0000000..f137adc --- /dev/null +++ b/usr/src/libm/src/m9x/frexpf.c @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)frexpf.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak frexpf = __frexpf +#endif + +#include "libm.h" + +float +__frexpf(float x, int *exp) { + union { + unsigned i; + float f; + } xx; + unsigned hx; + int e; + + xx.f = x; + hx = xx.i & ~0x80000000; + + if (hx >= 0x7f800000) { /* x is infinite or NaN */ + *exp = 0; + return (x); + } + + e = 0; + if (hx < 0x00800000) { /* x is subnormal or zero */ + if (hx == 0) { + *exp = 0; + return (x); + } + + /* normalize x by regarding it as an integer */ + xx.f = (int) xx.i < 0 ? -(int) hx : (int) hx; + hx = xx.i & ~0x80000000; + e = -149; + } + + /* now xx.f is normal */ + xx.i = (xx.i & ~0x7f800000) | 0x3f000000; + *exp = e + (hx >> 23) - 0x7e; + return (xx.f); +} diff --git a/usr/src/libm/src/m9x/frexpl.c b/usr/src/libm/src/m9x/frexpl.c new file mode 100644 index 0000000..a0dd032 --- /dev/null +++ b/usr/src/libm/src/m9x/frexpl.c @@ -0,0 +1,126 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)frexpl.c 1.6 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak frexpl = __frexpl +#endif + +#include "libm.h" + +#if defined(__sparc) + +long double +__frexpl(long double x, int *exp) { + union { + unsigned i[4]; + long double q; + } xx; + unsigned hx; + int e, s; + + xx.q = x; + hx = xx.i[0] & ~0x80000000; + + if (hx >= 0x7fff0000) { /* x is infinite or NaN */ + *exp = 0; + return (x); + } + + e = 0; + if (hx < 0x00010000) { /* x is subnormal or zero */ + if ((hx | xx.i[1] | xx.i[2] | xx.i[3]) == 0) { + *exp = 0; + return (x); + } + + /* normalize x */ + s = xx.i[0] & 0x80000000; + while ((hx | (xx.i[1] & 0xffff0000)) == 0) { + hx = xx.i[1]; + xx.i[1] = xx.i[2]; + xx.i[2] = xx.i[3]; + xx.i[3] = 0; + e -= 32; + } + while (hx < 0x10000) { + hx = (hx << 1) | (xx.i[1] >> 31); + xx.i[1] = (xx.i[1] << 1) | (xx.i[2] >> 31); + xx.i[2] = (xx.i[2] << 1) | (xx.i[3] >> 31); + xx.i[3] <<= 1; + e--; + } + xx.i[0] = s | hx; + } + + /* now xx.q is normal */ + xx.i[0] = (xx.i[0] & ~0x7fff0000) | 0x3ffe0000; + *exp = e + (hx >> 16) - 0x3ffe; + return (xx.q); +} + +#elif defined(__i386) + +long double +__frexpl(long double x, int *exp) { + union { + unsigned i[3]; + long double e; + } xx; + unsigned hx; + int e; + + xx.e = x; + hx = xx.i[2] & 0x7fff; + + if (hx >= 0x7fff) { /* x is infinite or NaN */ + *exp = 0; + return (x); + } + + e = 0; + if (hx < 0x0001) { /* x is subnormal or zero */ + if ((xx.i[0] | xx.i[1]) == 0) { + *exp = 0; + return (x); + } + + /* normalize x */ + xx.e *= 18446744073709551616.0L; /* 2^64 */ + hx = xx.i[2] & 0x7fff; + e = -64; + } + + /* now xx.e is normal */ + xx.i[2] = (xx.i[2] & 0x8000) | 0x3ffe; + *exp = e + hx - 0x3ffe; + return (xx.e); +} + +#else +#error Unknown architecture +#endif diff --git a/usr/src/libm/src/m9x/ldexp.c b/usr/src/libm/src/m9x/ldexp.c new file mode 100644 index 0000000..60fb5b6 --- /dev/null +++ b/usr/src/libm/src/m9x/ldexp.c @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ldexp.c 1.13 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak ldexp = __ldexp +#endif + +#include "libm.h" +#include <errno.h> + +double +ldexp(double x, int n) { + int *px = (int *) &x, ix = px[HIWORD] & ~0x80000000; + + if (ix >= 0x7ff00000 || (px[LOWORD] | ix) == 0) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (ix >= 0x7ff80000 ? x : x + x); + /* assumes sparc-like QNaN */ +#else + return (x + x); +#endif + x = scalbn(x, n); + ix = px[HIWORD] & ~0x80000000; + /* + * SVID3 requires both overflow and underflow cases to set errno + * XPG3/XPG4/XPG4.2/SUSv2 requires overflow to set errno + */ + if (ix >= 0x7ff00000 || (px[LOWORD] | ix) == 0) + errno = ERANGE; + return (x); +} diff --git a/usr/src/libm/src/m9x/ldexpf.c b/usr/src/libm/src/m9x/ldexpf.c new file mode 100644 index 0000000..881874c --- /dev/null +++ b/usr/src/libm/src/m9x/ldexpf.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ldexpf.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak ldexpf = __ldexpf +#endif + +#include "libm.h" + +float +ldexpf(float x, int n) { + return (scalbnf(x, n)); +} diff --git a/usr/src/libm/src/m9x/ldexpl.c b/usr/src/libm/src/m9x/ldexpl.c new file mode 100644 index 0000000..e40ce1f --- /dev/null +++ b/usr/src/libm/src/m9x/ldexpl.c @@ -0,0 +1,38 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)ldexpl.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak ldexpl = __ldexpl +#endif + +#include "libm.h" + +long double +ldexpl(long double x, int n) { + return (scalbnl(x, n)); +} diff --git a/usr/src/libm/src/m9x/llrint.c b/usr/src/libm/src/m9x/llrint.c new file mode 100644 index 0000000..8d3e79b --- /dev/null +++ b/usr/src/libm/src/m9x/llrint.c @@ -0,0 +1,80 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)llrint.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak llrint = __llrint +#if defined(__sparcv9) || defined(__amd64) +#pragma weak lrint = __llrint +#pragma weak __lrint = __llrint +#endif +#endif + +/* + * llrint(x) rounds its argument to the nearest integer according + * to the current rounding direction and converts the result to a + * 64 bit signed integer. + * + * If x is NaN, infinite, or so large that the nearest integer would + * exceed 64 bits, the invalid operation exception is raised. If x + * is not an integer, the inexact exception is raised. + */ + +#include "libm.h" + +long long +llrint(double x) { + /* + * Note: The following code works on x86 (in the default rounding + * precision mode), but one should just use the fistpll instruction + * instead. + */ + union { + unsigned i[2]; + double d; + } xx, yy; + unsigned hx; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + + if (hx < 0x43300000) { /* |x| < 2^52 */ + /* add and subtract a power of two to round x to an integer */ +#if defined(__sparc) || defined(__amd64) + yy.i[HIWORD] = (xx.i[HIWORD] & 0x80000000) | 0x43300000; +#elif defined(__i386) /* !defined(__amd64) */ + yy.i[HIWORD] = (xx.i[HIWORD] & 0x80000000) | 0x43e00000; +#else +#error Unknown architecture +#endif + yy.i[LOWORD] = 0; + x = (x + yy.d) - yy.d; + } + + /* now x is nan, inf, or integral */ + return ((long long) x); +} diff --git a/usr/src/libm/src/m9x/llrintf.c b/usr/src/libm/src/m9x/llrintf.c new file mode 100644 index 0000000..70287f3 --- /dev/null +++ b/usr/src/libm/src/m9x/llrintf.c @@ -0,0 +1,76 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)llrintf.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak llrintf = __llrintf +#if defined(__sparcv9) || defined(__amd64) +#pragma weak lrintf = __llrintf +#pragma weak __lrintf = __llrintf +#endif +#endif + +#include "libm.h" + +long long +llrintf(float x) { + /* + * Note: The following code works on x86 (in the default rounding + * precision mode), but one should just use the fistpll instruction + * instead. + */ + union { + unsigned i; + float f; + } xx, yy; + unsigned hx; + + xx.f = x; + hx = xx.i & ~0x80000000; + + if (hx < 0x4b000000) { /* |x| < 2^23 */ + /* add and subtract a power of two to round x to an integer */ +#if defined(__sparc) || defined(__amd64) + yy.i = (xx.i & 0x80000000) | 0x4b000000; +#elif defined(__i386) + /* assume 64-bit precision */ + yy.i = (xx.i & 0x80000000) | 0x5f000000; +#else +#error Unknown architecture +#endif + x = (x + yy.f) - yy.f; + + /* + * on LP32 architectures, we can just convert x to a 32-bit + * integer and sign-extend it + */ + return ((long) x); + } + + /* now x is nan, inf, or integral */ + return ((long long) x); +} diff --git a/usr/src/libm/src/m9x/llrintl.c b/usr/src/libm/src/m9x/llrintl.c new file mode 100644 index 0000000..5d1f2b2 --- /dev/null +++ b/usr/src/libm/src/m9x/llrintl.c @@ -0,0 +1,173 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)llrintl.c 1.6 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak llrintl = __llrintl +#if defined(__sparcv9) || defined(__amd64) +#pragma weak lrintl = __llrintl +#pragma weak __lrintl = __llrintl +#endif +#endif + +#include "libm.h" + +#if defined(__sparc) + +#include "fma.h" + +long long +llrintl(long double x) { + union { + unsigned i[4]; + long double q; + } xx; + union { + unsigned i[2]; + long long l; + } zz; + union { + unsigned i; + float f; + } tt; + unsigned int hx, sx, frac, fsr; + int rm, j; + volatile float dummy; + + xx.q = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx > 0x403e0000) { /* |x| > 2^63 + ... or x is nan */ + /* convert an out-of-range float */ + tt.i = sx | 0x7f000000; + return ((long long) tt.f); + } else if ((hx | xx.i[1] | xx.i[2] | xx.i[3]) == 0) /* x is zero */ + return (0LL); + + /* get the rounding mode */ + __fenv_getfsr(&fsr); + rm = fsr >> 30; + + /* flip the sense of directed roundings if x is negative */ + if (sx) + rm ^= rm >> 1; + + /* handle |x| < 1 */ + if (hx < 0x3fff0000) { + dummy = 1.0e30f; /* x is nonzero, so raise inexact */ + dummy += 1.0e-30f; + if (rm == FSR_RP || (rm == FSR_RN && (hx >= 0x3ffe0000 && + ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3])))) + return (sx ? -1LL : 1LL); + return (0LL); + } + + /* extract the integer and fractional parts of x */ + j = 0x406f - (hx >> 16); + xx.i[0] = 0x10000 | (xx.i[0] & 0xffff); + if (j >= 96) { + zz.i[0] = 0; + zz.i[1] = xx.i[0] >> (j - 96); + frac = ((xx.i[0] << 1) << (127 - j)) | (xx.i[1] >> (j - 96)); + if (((xx.i[1] << 1) << (127 - j)) | xx.i[2] | xx.i[3]) + frac |= 1; + } else if (j >= 64) { + zz.i[0] = xx.i[0] >> (j - 64); + zz.i[1] = ((xx.i[0] << 1) << (95 - j)) | (xx.i[1] >> (j - 64)); + frac = ((xx.i[1] << 1) << (95 - j)) | (xx.i[2] >> (j - 64)); + if (((xx.i[2] << 1) << (95 - j)) | xx.i[3]) + frac |= 1; + } else { + zz.i[0] = ((xx.i[0] << 1) << (63 - j)) | (xx.i[1] >> (j - 32)); + zz.i[1] = ((xx.i[1] << 1) << (63 - j)) | (xx.i[2] >> (j - 32)); + frac = ((xx.i[2] << 1) << (63 - j)) | (xx.i[3] >> (j - 32)); + if ((xx.i[3] << 1) << (63 - j)) + frac |= 1; + } + + /* round */ + if (frac && (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000u || + (frac == 0x80000000 && (zz.i[1] & 1)))))) { + if (++zz.i[1] == 0) + zz.i[0]++; + } + + /* check for result out of range (note that z is |x| at this point) */ + if (zz.i[0] > 0x80000000u || (zz.i[0] == 0x80000000 && (zz.i[1] || + !sx))) { + tt.i = sx | 0x7f000000; + return ((long long) tt.f); + } + + /* raise inexact if need be */ + if (frac) { + dummy = 1.0e30F; + dummy += 1.0e-30F; + } + + /* negate result if need be */ + if (sx) { + zz.i[0] = ~zz.i[0]; + zz.i[1] = -zz.i[1]; + if (zz.i[1] == 0) + zz.i[0]++; + } + return (zz.l); +} +#elif defined(__i386) || defined(__amd64) +long long +llrintl(long double x) { + /* + * Note: The following code works on x86 (in the default rounding + * precision mode), but one ought to just use the fistpll instruction + * instead. + */ + union { + unsigned i[3]; + long double e; + } xx, yy; + int ex; + + xx.e = x; + ex = xx.i[2] & 0x7fff; + + if (ex < 0x403e) { /* |x| < 2^63 */ + /* add and subtract a power of two to round x to an integer */ + yy.i[2] = (xx.i[2] & 0x8000) | 0x403e; + yy.i[1] = 0x80000000; + yy.i[0] = 0; + x = (x + yy.e) - yy.e; + } + + /* now x is nan, inf, or integral */ + return ((long long) x); +} +#else +#error Unknown architecture +#endif diff --git a/usr/src/libm/src/m9x/llround.c b/usr/src/libm/src/m9x/llround.c new file mode 100644 index 0000000..b65a9e1 --- /dev/null +++ b/usr/src/libm/src/m9x/llround.c @@ -0,0 +1,84 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)llround.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak llround = __llround +#if defined(__sparcv9) || defined(__amd64) +#pragma weak lround = __llround +#pragma weak __lround = __llround +#endif +#endif + +/* + * llround(x) rounds its argument to the nearest integer, rounding + * ties away from zero, and converts the result to a 64 bit signed + * integer. + * + * If x is NaN, infinite, or so large that the nearest integer + * would exceed 64 bits, the invalid operation exception is raised. + */ + +#include "libm.h" + +long long +llround(double x) { + union { + unsigned i[2]; + double d; + } xx; + unsigned hx, sx, i; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + sx = xx.i[HIWORD] & 0x80000000; + + if (hx < 0x43300000) { /* |x| < 2^52 */ + /* handle |x| < 1 */ + if (hx < 0x3ff00000) { + if (hx >= 0x3fe00000) + return (sx ? -1LL : 1LL); + return (0LL); + } + + /* round x at the integer bit */ + if (hx < 0x41300000) { + i = 1 << (0x412 - (hx >> 20)); + xx.i[HIWORD] = (xx.i[HIWORD] + i) & ~(i | (i - 1)); + xx.i[LOWORD] = 0; + } else { + i = 1 << (0x432 - (hx >> 20)); + xx.i[LOWORD] += i; + if (xx.i[LOWORD] < i) + xx.i[HIWORD]++; + xx.i[LOWORD] &= ~(i | (i - 1)); + } + } + + /* now x is nan, inf, or integral */ + return ((long long) xx.d); +} diff --git a/usr/src/libm/src/m9x/llroundf.c b/usr/src/libm/src/m9x/llroundf.c new file mode 100644 index 0000000..7bdc977 --- /dev/null +++ b/usr/src/libm/src/m9x/llroundf.c @@ -0,0 +1,72 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)llroundf.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak llroundf = __llroundf +#if defined(__sparcv9) || defined(__amd64) +#pragma weak lroundf = __llroundf +#pragma weak __lroundf = __llroundf +#endif +#endif + +#include "libm.h" + +long long +llroundf(float x) { + union { + unsigned i; + float f; + } xx; + unsigned hx, sx, i; + + xx.f = x; + hx = xx.i & ~0x80000000; + sx = xx.i & 0x80000000; + + if (hx < 0x4b000000) { /* |x| < 2^23 */ + /* handle |x| < 1 */ + if (hx < 0x3f800000) { + if (hx >= 0x3f000000) + return (sx ? -1LL : 1LL); + return (0LL); + } + + /* round x at the integer bit */ + i = 1 << (0x95 - (hx >> 23)); + xx.i = (xx.i + i) & ~((i << 1) - 1); + + /* + * on LP32 architectures, we can just convert x to a 32-bit + * integer and sign-extend it + */ + return ((long) xx.f); + } + + /* now x is nan, inf, or integral */ + return ((long long) x); +} diff --git a/usr/src/libm/src/m9x/llroundl.c b/usr/src/libm/src/m9x/llroundl.c new file mode 100644 index 0000000..0c96cdf --- /dev/null +++ b/usr/src/libm/src/m9x/llroundl.c @@ -0,0 +1,165 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)llroundl.c 1.6 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak llroundl = __llroundl +#if defined(__sparcv9) || defined(__amd64) +#pragma weak lroundl = __llroundl +#pragma weak __lroundl = __llroundl +#endif +#endif + +#include "libm.h" + +#if defined(__sparc) +long long +llroundl(long double x) { + union { + unsigned i[4]; + long double q; + } xx; + union { + unsigned i[2]; + long long l; + } zz; + union { + unsigned i; + float f; + } tt; + unsigned hx, sx, frac; + int j; + + xx.q = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx > 0x403e0000) { /* |x| > 2^63 + ... or x is nan */ + /* convert an out-of-range float */ + tt.i = sx | 0x7f000000; + return ((long long) tt.f); + } + + /* handle |x| < 1 */ + if (hx < 0x3fff0000) { + if (hx >= 0x3ffe0000) + return (sx ? -1LL : 1LL); + return (0LL); + } + + /* extract the integer and fractional parts of x */ + j = 0x406f - (hx >> 16); + xx.i[0] = 0x10000 | (xx.i[0] & 0xffff); + if (j >= 96) { + zz.i[0] = 0; + zz.i[1] = xx.i[0] >> (j - 96); + frac = ((xx.i[0] << 1) << (127 - j)) | (xx.i[1] >> (j - 96)); + if (((xx.i[1] << 1) << (127 - j)) | xx.i[2] | xx.i[3]) + frac |= 1; + } else if (j >= 64) { + zz.i[0] = xx.i[0] >> (j - 64); + zz.i[1] = ((xx.i[0] << 1) << (95 - j)) | (xx.i[1] >> (j - 64)); + frac = ((xx.i[1] << 1) << (95 - j)) | (xx.i[2] >> (j - 64)); + if (((xx.i[2] << 1) << (95 - j)) | xx.i[3]) + frac |= 1; + } else { + zz.i[0] = ((xx.i[0] << 1) << (63 - j)) | (xx.i[1] >> (j - 32)); + zz.i[1] = ((xx.i[1] << 1) << (63 - j)) | (xx.i[2] >> (j - 32)); + frac = ((xx.i[2] << 1) << (63 - j)) | (xx.i[3] >> (j - 32)); + if ((xx.i[3] << 1) << (63 - j)) + frac |= 1; + } + + /* round */ + if (frac >= 0x80000000u) { + if (++zz.i[1] == 0) + zz.i[0]++; + } + + /* check for result out of range (note that z is |x| at this point) */ + if (zz.i[0] > 0x80000000u || (zz.i[0] == 0x80000000 && (zz.i[1] || + !sx))) { + tt.i = sx | 0x7f000000; + return ((long long) tt.f); + } + + /* negate result if need be */ + if (sx) { + zz.i[0] = ~zz.i[0]; + zz.i[1] = -zz.i[1]; + if (zz.i[1] == 0) + zz.i[0]++; + } + + return (zz.l); +} +#elif defined(__i386) || defined(__amd64) +long long +llroundl(long double x) { + union { + unsigned i[3]; + long double e; + } xx; + int ex, sx, i; + + xx.e = x; + ex = xx.i[2] & 0x7fff; + sx = xx.i[2] & 0x8000; + + if (ex < 0x403e) { /* |x| < 2^63 */ + /* handle |x| < 1 */ + if (ex < 0x3fff) { + if (ex >= 0x3ffe) + return (sx ? -1LL : 1LL); + return (0LL); + } + + /* round x at the integer bit */ + if (ex < 0x401e) { + i = 1 << (0x401d - ex); + xx.i[1] = (xx.i[1] + i) & ~(i | (i - 1)); + xx.i[0] = 0; + } else { + i = 1 << (0x403d - ex); + xx.i[0] += i; + if (xx.i[0] < i) + xx.i[1]++; + xx.i[0] &= ~(i | (i - 1)); + } + if (xx.i[1] == 0) { + xx.i[2] = sx | ++ex; + xx.i[1] = 0x80000000U; + } + } + + /* now x is nan, inf, or integral */ + return ((long long) xx.e); +} +#else +#error Unknown architecture +#endif diff --git a/usr/src/libm/src/m9x/lrint.c b/usr/src/libm/src/m9x/lrint.c new file mode 100644 index 0000000..3a5792c --- /dev/null +++ b/usr/src/libm/src/m9x/lrint.c @@ -0,0 +1,80 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)lrint.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak lrint = __lrint +#endif + +/* + * lrint(x) rounds its argument to the nearest integer according + * to the current rounding direction and converts the result to + * a 32 bit signed integer. + * + * If x is NaN, infinite, or so large that the nearest integer + * would exceed 32 bits, the invalid operation exception is raised. + * If x is not an integer, the inexact exception is raised. + */ + +#include <sys/isa_defs.h> /* _ILP32 */ +#include "libm.h" + +#if defined(_ILP32) +long +lrint(double x) { + /* + * Note: The following code works on x86 (in the default rounding + * precision mode), but one should just use the fistpl instruction + * instead. + */ + union { + unsigned i[2]; + double d; + } xx, yy; + unsigned hx; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + if (hx < 0x43300000) { /* |x| < 2^52 */ + /* add and subtract a power of two to round x to an integer */ +#if defined(__sparc) + yy.i[HIWORD] = (xx.i[HIWORD] & 0x80000000) | 0x43300000; +#elif defined(__i386) + yy.i[HIWORD] = (xx.i[HIWORD] & 0x80000000) | 0x43e00000; +#else +#error Unknown architecture +#endif + yy.i[LOWORD] = 0; + x = (x + yy.d) - yy.d; + } + + /* now x is nan, inf, or integral */ + return ((long) x); +} +#else +#error Unsupported architecture +#endif /* defined(_ILP32) */ diff --git a/usr/src/libm/src/m9x/lrintf.c b/usr/src/libm/src/m9x/lrintf.c new file mode 100644 index 0000000..92bb664 --- /dev/null +++ b/usr/src/libm/src/m9x/lrintf.c @@ -0,0 +1,71 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)lrintf.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak lrintf = __lrintf +#endif + +#include <sys/isa_defs.h> /* _ILP32 */ +#include "libm.h" + +#if defined(_ILP32) +long +lrintf(float x) { + /* + * Note: The following code works on x86 (in the default rounding + * precision mode), but one should just use the fistpl instruction + * instead. + */ + union { + unsigned i; + float f; + } xx, yy; + unsigned hx; + + xx.f = x; + hx = xx.i & ~0x80000000; + if (hx < 0x4b000000) { /* |x| < 2^23 */ + /* add and subtract a power of two to round x to an integer */ +#if defined(__sparc) + yy.i = (xx.i & 0x80000000) | 0x4b000000; +#elif defined(__i386) + /* assume 64-bit precision */ + yy.i = (xx.i & 0x80000000) | 0x5f000000; +#else +#error Unknown architecture +#endif + x = (x + yy.f) - yy.f; + return ((long) x); + } + + /* now x is nan, inf, or integral */ + return ((long) x); +} +#else +#error Unsupported architecture +#endif /* defined(_ILP32) */ diff --git a/usr/src/libm/src/m9x/lrintl.c b/usr/src/libm/src/m9x/lrintl.c new file mode 100644 index 0000000..d827195 --- /dev/null +++ b/usr/src/libm/src/m9x/lrintl.c @@ -0,0 +1,154 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)lrintl.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak lrintl = __lrintl +#endif + +#include <sys/isa_defs.h> /* _ILP32 */ +#include "libm.h" + +#if defined(_ILP32) +#if defined(__sparc) + +#include "fma.h" + +long +lrintl(long double x) { + union { + unsigned i[4]; + long double q; + } xx; + union { + unsigned i; + float f; + } tt; + unsigned hx, sx, frac, fsr, l; + int rm, j; + volatile float dummy; + + xx.q = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx > 0x401e0000) { /* |x| > 2^31 + ... or x is nan */ + /* convert an out-of-range float */ + tt.i = sx | 0x7f000000; + return ((long) tt.f); + } else if ((hx | xx.i[1] | xx.i[2] | xx.i[3]) == 0) /* x is zero */ + return (0L); + + /* get the rounding mode */ + __fenv_getfsr(&fsr); + rm = fsr >> 30; + + /* flip the sense of directed roundings if x is negative */ + if (sx) + rm ^= rm >> 1; + + /* handle |x| < 1 */ + if (hx < 0x3fff0000) { + dummy = 1.0e30F; /* x is nonzero, so raise inexact */ + dummy += 1.0e-30F; + if (rm == FSR_RP || (rm == FSR_RN && (hx >= 0x3ffe0000 && + ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3])))) + return (sx ? -1L : 1L); + return (0L); + } + + /* extract the integer and fractional parts of x */ + j = 0x406f - (hx >> 16); /* 91 <= j <= 112 */ + xx.i[0] = 0x10000 | (xx.i[0] & 0xffff); + if (j >= 96) { /* 96 <= j <= 112 */ + l = xx.i[0] >> (j - 96); + frac = ((xx.i[0] << 1) << (127 - j)) | (xx.i[1] >> (j - 96)); + if (((xx.i[1] << 1) << (127 - j)) | xx.i[2] | xx.i[3]) + frac |= 1; + } else { /* 91 <= j <= 95 */ + l = (xx.i[0] << (96 - j)) | (xx.i[1] >> (j - 64)); + frac = (xx.i[1] << (96 - j)) | (xx.i[2] >> (j - 64)); + if ((xx.i[2] << (96 - j)) | xx.i[3]) + frac |= 1; + } + + /* round */ + if (frac && (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000U || + (frac == 0x80000000 && (l & 1)))))) + l++; + + /* check for result out of range (note that z is |x| at this point) */ + if (l > 0x80000000U || (l == 0x80000000U && !sx)) { + tt.i = sx | 0x7f000000; + return ((long) tt.f); + } + + /* raise inexact if need be */ + if (frac) { + dummy = 1.0e30F; + dummy += 1.0e-30F; + } + + /* negate result if need be */ + if (sx) + l = -l; + return ((long) l); +} +#elif defined(__i386) +long +lrintl(long double x) { + /* + * Note: The following code works on x86 (in the default rounding + * precision mode), but one ought to just use the fistpl instruction + * instead. + */ + union { + unsigned i[3]; + long double e; + } xx, yy; + int ex; + + xx.e = x; + ex = xx.i[2] & 0x7fff; + if (ex < 0x403e) { /* |x| < 2^63 */ + /* add and subtract a power of two to round x to an integer */ + yy.i[2] = (xx.i[2] & 0x8000) | 0x403e; + yy.i[1] = 0x80000000; + yy.i[0] = 0; + x = (x + yy.e) - yy.e; + } + + /* now x is nan, inf, or integral */ + return ((long) x); +} +#else +#error Unknown architecture +#endif /* defined(__sparc) || defined(__i386) */ +#else +#error Unsupported architecture +#endif /* defined(_ILP32) */ diff --git a/usr/src/libm/src/m9x/lround.c b/usr/src/libm/src/m9x/lround.c new file mode 100644 index 0000000..c009156 --- /dev/null +++ b/usr/src/libm/src/m9x/lround.c @@ -0,0 +1,82 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)lround.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak lround = __lround +#endif + +/* + * lround(x) rounds its argument to the nearest integer, rounding ties + * away from zero, and converts the result to a 32 bit signed integer. + * + * If x is NaN, infinite, or so large that the nearest integer + * would exceed 32 bits, the invalid operation exception is raised. + */ + +#include <sys/isa_defs.h> /* _ILP32 */ +#include "libm.h" + +#if defined(_ILP32) +long +lround(double x) { + union { + unsigned i[2]; + double d; + } xx; + unsigned hx, sx, i; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + sx = xx.i[HIWORD] & 0x80000000; + if (hx < 0x43300000) { /* |x| < 2^52 */ + if (hx < 0x3ff00000) { /* |x| < 1 */ + if (hx >= 0x3fe00000) + return (sx ? -1L : 1L); + return (0L); + } + + /* round x at the integer bit */ + if (hx < 0x41300000) { + i = 1 << (0x412 - (hx >> 20)); + xx.i[HIWORD] = (xx.i[HIWORD] + i) & ~(i | (i - 1)); + xx.i[LOWORD] = 0; + } else { + i = 1 << (0x432 - (hx >> 20)); + xx.i[LOWORD] += i; + if (xx.i[LOWORD] < i) + xx.i[HIWORD]++; + xx.i[LOWORD] &= ~(i | (i - 1)); + } + } + + /* now x is nan, inf, or integral */ + return ((long) xx.d); +} +#else +#error Unsupported architecture +#endif /* defined(_ILP32) */ diff --git a/usr/src/libm/src/m9x/lroundf.c b/usr/src/libm/src/m9x/lroundf.c new file mode 100644 index 0000000..a00652c --- /dev/null +++ b/usr/src/libm/src/m9x/lroundf.c @@ -0,0 +1,66 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)lroundf.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak lroundf = __lroundf +#endif + +#include <sys/isa_defs.h> /* _ILP32 */ +#include "libm.h" + +#if defined(_ILP32) +long +lroundf(float x) { + union { + unsigned i; + float f; + } xx; + unsigned hx, sx, i; + + xx.f = x; + hx = xx.i & ~0x80000000; + sx = xx.i & 0x80000000; + if (hx < 0x4b000000) { /* |x| < 2^23 */ + if (hx < 0x3f800000) { /* |x| < 1 */ + if (hx >= 0x3f000000) + return (sx ? -1L : 1L); + return (0L); + } + + /* round x at the integer bit */ + i = 1 << (0x95 - (hx >> 23)); + xx.i = (xx.i + i) & ~((i << 1) - 1); + return ((long) xx.f); + } + + /* now x is nan, inf, or integral */ + return ((long) x); +} +#else +#error Unsupported architecture +#endif /* defined(_ILP32) */ diff --git a/usr/src/libm/src/m9x/lroundl.c b/usr/src/libm/src/m9x/lroundl.c new file mode 100644 index 0000000..bb931d4 --- /dev/null +++ b/usr/src/libm/src/m9x/lroundl.c @@ -0,0 +1,144 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)lroundl.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak lroundl = __lroundl +#endif + +#include <sys/isa_defs.h> /* _ILP32 */ +#include "libm.h" + +#if defined(_ILP32) +#if defined(__sparc) +long +lroundl(long double x) { + union { + unsigned i[4]; + long double q; + } xx; + union { + unsigned i; + float f; + } tt; + unsigned hx, sx, frac, l; + int j; + + xx.q = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx > 0x401e0000) { /* |x| > 2^31 + ... or x is nan */ + /* convert an out-of-range float */ + tt.i = sx | 0x7f000000; + return ((long) tt.f); + } + + /* handle |x| < 1 */ + if (hx < 0x3fff0000) { + if (hx >= 0x3ffe0000) + return (sx ? -1L : 1L); + return (0L); + } + + /* extract the integer and fractional parts of x */ + j = 0x406f - (hx >> 16); /* 91 <= j <= 112 */ + xx.i[0] = 0x10000 | (xx.i[0] & 0xffff); + if (j >= 96) { /* 96 <= j <= 112 */ + l = xx.i[0] >> (j - 96); + frac = ((xx.i[0] << 1) << (127 - j)) | (xx.i[1] >> (j - 96)); + if (((xx.i[1] << 1) << (127 - j)) | xx.i[2] | xx.i[3]) + frac |= 1; + } else { /* 91 <= j <= 95 */ + l = (xx.i[0] << (96 - j)) | (xx.i[1] >> (j - 64)); + frac = (xx.i[1] << (96 - j)) | (xx.i[2] >> (j - 64)); + if ((xx.i[2] << (96 - j)) | xx.i[3]) + frac |= 1; + } + + /* round */ + if (frac >= 0x80000000U) + l++; + + /* check for result out of range (note that z is |x| at this point) */ + if (l > 0x80000000U || (l == 0x80000000U && !sx)) { + tt.i = sx | 0x7f000000; + return ((long) tt.f); + } + + /* negate result if need be */ + if (sx) + l = -l; + return ((long) l); +} +#elif defined(__i386) +long +lroundl(long double x) { + union { + unsigned i[3]; + long double e; + } xx; + int ex, sx, i; + + xx.e = x; + ex = xx.i[2] & 0x7fff; + sx = xx.i[2] & 0x8000; + if (ex < 0x403e) { /* |x| < 2^63 */ + if (ex < 0x3fff) { /* |x| < 1 */ + if (ex >= 0x3ffe) + return (sx ? -1L : 1L); + return (0L); + } + + /* round x at the integer bit */ + if (ex < 0x401e) { + i = 1 << (0x401d - ex); + xx.i[1] = (xx.i[1] + i) & ~(i | (i - 1)); + xx.i[0] = 0; + } else { + i = 1 << (0x403d - ex); + xx.i[0] += i; + if (xx.i[0] < i) + xx.i[1]++; + xx.i[0] &= ~(i | (i - 1)); + } + if (xx.i[1] == 0) { + xx.i[2] = sx | ++ex; + xx.i[1] = 0x80000000U; + } + } + + /* now x is nan, inf, or integral */ + return ((long) xx.e); +} +#else +#error Unknown architecture +#endif /* defined(__sparc) || defined(__i386) */ +#else +#error Unsupported architecture +#endif /* defined(_ILP32) */ diff --git a/usr/src/libm/src/m9x/modf.c b/usr/src/libm/src/m9x/modf.c new file mode 100644 index 0000000..66ce2bd --- /dev/null +++ b/usr/src/libm/src/m9x/modf.c @@ -0,0 +1,92 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)modf.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak modf = __modf +#pragma weak _modf = __modf +#endif + +/* + * modf(x, iptr) decomposes x into an integral part and a fractional + * part both having the same sign as x. It stores the integral part + * in *iptr and returns the fractional part. + * + * If x is infinite, modf sets *iptr to x and returns copysign(0.0,x). + * If x is NaN, modf sets *iptr to x and returns x. + * + * If x is a signaling NaN, this code does not attempt to raise the + * invalid operation exception. + */ + +#include "libm.h" + +double +__modf(double x, double *iptr) { + union { + unsigned i[2]; + double d; + } xx, yy; + unsigned hx, s; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + + if (hx >= 0x43300000) { /* x is NaN, infinite, or integral */ + *iptr = x; + if (hx < 0x7ff00000 || (hx == 0x7ff00000 && + xx.i[LOWORD] == 0)) { + xx.i[HIWORD] &= 0x80000000; + xx.i[LOWORD] = 0; + } + return (xx.d); + } + + if (hx < 0x3ff00000) { /* |x| < 1 */ + xx.i[HIWORD] &= 0x80000000; + xx.i[LOWORD] = 0; + *iptr = xx.d; + return (x); + } + + /* split x at the binary point */ + s = xx.i[HIWORD] & 0x80000000; + if (hx < 0x41400000) { + yy.i[HIWORD] = xx.i[HIWORD] & ~((1 << (0x413 - (hx >> 20))) - + 1); + yy.i[LOWORD] = 0; + } else { + yy.i[HIWORD] = xx.i[HIWORD]; + yy.i[LOWORD] = xx.i[LOWORD] & ~((1 << (0x433 - (hx >> 20))) - + 1); + } + *iptr = yy.d; + xx.d -= yy.d; + xx.i[HIWORD] = (xx.i[HIWORD] & ~0x80000000) | s; + /* keep sign of x */ + return (xx.d); +} diff --git a/usr/src/libm/src/m9x/modff.c b/usr/src/libm/src/m9x/modff.c new file mode 100644 index 0000000..4931cc3 --- /dev/null +++ b/usr/src/libm/src/m9x/modff.c @@ -0,0 +1,68 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)modff.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak modff = __modff +#pragma weak _modff = __modff +#endif + +#include "libm.h" + +float +__modff(float x, float *iptr) { + union { + unsigned i; + float f; + } xx, yy; + unsigned hx, s; + + xx.f = x; + hx = xx.i & ~0x80000000; + + if (hx >= 0x4b000000) { /* x is NaN, infinite, or integral */ + *iptr = x; + if (hx <= 0x7f800000) + xx.i &= 0x80000000; + return (xx.f); + } + + if (hx < 0x3f800000) { /* |x| < 1 */ + xx.i &= 0x80000000; + *iptr = xx.f; + return (x); + } + + /* split x at the binary point */ + s = xx.i & 0x80000000; + yy.i = xx.i & ~((1 << (0x96 - (hx >> 23))) - 1); + *iptr = yy.f; + xx.f -= yy.f; + xx.i = (xx.i & ~0x80000000) | s; + /* restore sign in case difference is 0 */ + return (xx.f); +} diff --git a/usr/src/libm/src/m9x/modfl.c b/usr/src/libm/src/m9x/modfl.c new file mode 100644 index 0000000..134dba8 --- /dev/null +++ b/usr/src/libm/src/m9x/modfl.c @@ -0,0 +1,149 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)modfl.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak modfl = __modfl +#endif + +#include "libm.h" + +#if defined(__sparc) + +long double +__modfl(long double x, long double *iptr) { + union { + unsigned i[4]; + long double q; + } xx, yy; + unsigned hx, s; + + xx.q = x; + hx = xx.i[0] & ~0x80000000; + + if (hx >= 0x406f0000) { /* x is NaN, infinite, or integral */ + *iptr = x; + if (hx < 0x7fff0000 || (hx == 0x7fff0000 && + (xx.i[1] | xx.i[2] | xx.i[3]) == 0)) { + xx.i[0] &= 0x80000000; + xx.i[1] = xx.i[2] = xx.i[3] = 0; + } + return (xx.q); + } + + if (hx < 0x3fff0000) { /* |x| < 1 */ + xx.i[0] &= 0x80000000; + xx.i[1] = xx.i[2] = xx.i[3] = 0; + *iptr = xx.q; + return (x); + } + + /* split x at the binary point */ + s = xx.i[0] & 0x80000000; + if (hx < 0x40100000) { + yy.i[0] = xx.i[0] & ~((1 << (0x400f - (hx >> 16))) - 1); + yy.i[1] = yy.i[2] = yy.i[3] = 0; + } else if (hx < 0x40300000) { + yy.i[0] = xx.i[0]; + yy.i[1] = xx.i[1] & ~((1 << (0x402f - (hx >> 16))) - 1); + yy.i[2] = yy.i[3] = 0; + } else if (hx < 0x40500000) { + yy.i[0] = xx.i[0]; + yy.i[1] = xx.i[1]; + yy.i[2] = xx.i[2] & ~((1 << (0x404f - (hx >> 16))) - 1); + yy.i[3] = 0; + } else { + yy.i[0] = xx.i[0]; + yy.i[1] = xx.i[1]; + yy.i[2] = xx.i[2]; + yy.i[3] = xx.i[3] & ~((1 << (0x406f - (hx >> 16))) - 1); + } + *iptr = yy.q; + + /* + * we could implement the following more efficiently than by using + * software emulation of fsubq, but we'll do it this way for now + * (and hope hardware support becomes commonplace) + */ + xx.q -= yy.q; + xx.i[0] = (xx.i[0] & ~0x80000000) | s; /* keep sign of x */ + return (xx.q); +} + +#elif defined(__i386) + +long double +__modfl(long double x, long double *iptr) { + union { + unsigned i[3]; + long double e; + } xx, yy; + unsigned hx, s; + + /* + * It might be faster to use one of the x86 fpops instead of + * the following. + */ + xx.e = x; + hx = xx.i[2] & 0x7fff; + + if (hx >= 0x403e) { /* x is NaN, infinite, or integral */ + *iptr = x; + if (hx < 0x7fff || (hx == 0x7fff && + ((xx.i[1] << 1) | xx.i[0]) == 0)) { + xx.i[2] &= 0x8000; + xx.i[1] = xx.i[0] = 0; + } + return (xx.e); + } + + if (hx < 0x3fff) { /* |x| < 1 */ + xx.i[2] &= 0x8000; + xx.i[1] = xx.i[0] = 0; + *iptr = xx.e; + return (x); + } + + /* split x at the binary point */ + s = xx.i[2] & 0x8000; + yy.i[2] = xx.i[2]; + if (hx < 0x401f) { + yy.i[1] = xx.i[1] & ~((1 << (0x401e - hx)) - 1); + yy.i[0] = 0; + } else { + yy.i[1] = xx.i[1]; + yy.i[0] = xx.i[0] & ~((1 << (0x403e - hx)) - 1); + } + *iptr = yy.e; + xx.e -= yy.e; + xx.i[2] = (xx.i[2] & ~0x8000) | s; /* keep sign of x */ + return (xx.e); +} + +#else +#error Unknown architecture +#endif diff --git a/usr/src/libm/src/m9x/nan.c b/usr/src/libm/src/m9x/nan.c new file mode 100644 index 0000000..6a994ca --- /dev/null +++ b/usr/src/libm/src/m9x/nan.c @@ -0,0 +1,61 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)nan.c 1.5 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak nan = __nan +#endif + +/* + * nan(c) returns a NaN. This implementation ignores c. + */ + +#include "libm.h" + +#if defined(__sparc) + +static const union { + unsigned i[2]; + double d; +} __nan_union = { 0x7fffffff, 0xffffffff }; + +#elif defined(__i386) + +static const union { + unsigned i[2]; + double d; +} __nan_union = { 0xffffffff, 0x7fffffff }; + +#else +#error Unknown architecture +#endif + +/* ARGSUSED0 */ +double +__nan(const char *c) { + return (__nan_union.d); +} diff --git a/usr/src/libm/src/m9x/nanf.c b/usr/src/libm/src/m9x/nanf.c new file mode 100644 index 0000000..08a029e --- /dev/null +++ b/usr/src/libm/src/m9x/nanf.c @@ -0,0 +1,44 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)nanf.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak nanf = __nanf +#endif + +#include "libm.h" + +static const union { + unsigned i; + float f; +} __nanf_union = { 0x7fffffff }; + +/* ARGSUSED0 */ +float +__nanf(const char *c) { + return (__nanf_union.f); +} diff --git a/usr/src/libm/src/m9x/nanl.c b/usr/src/libm/src/m9x/nanl.c new file mode 100644 index 0000000..cf00010 --- /dev/null +++ b/usr/src/libm/src/m9x/nanl.c @@ -0,0 +1,57 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)nanl.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak nanl = __nanl +#endif + +#include "libm.h" + +#if defined(__sparc) + +static const union { + unsigned i[4]; + long double ld; +} __nanl_union = { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff }; + +#elif defined(__i386) + +static const union { + unsigned i[3]; + long double ld; +} __nanl_union = { 0xffffffff, 0xffffffff, 0x7fff }; + +#else +#error Unknown architecture +#endif + +/* ARGSUSED0 */ +long double +__nanl(const char *c) { + return (__nanl_union.ld); +} diff --git a/usr/src/libm/src/m9x/nearbyint.c b/usr/src/libm/src/m9x/nearbyint.c new file mode 100644 index 0000000..d977522 --- /dev/null +++ b/usr/src/libm/src/m9x/nearbyint.c @@ -0,0 +1,222 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)nearbyint.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak nearbyint = __nearbyint +#endif + +/* + * nearbyint(x) returns the nearest fp integer to x in the direction + * corresponding to the current rounding direction without raising + * the inexact exception. + * + * nearbyint(x) is x unchanged if x is +/-0 or +/-inf. If x is NaN, + * nearbyint(x) is also NaN. + */ + +#include "libm.h" +#include "fenv_synonyms.h" +#include <fenv.h> + +double +__nearbyint(double x) { + union { + unsigned i[2]; + double d; + } xx; + unsigned hx, sx, i, frac; + int rm, j; + + xx.d = x; + sx = xx.i[HIWORD] & 0x80000000; + hx = xx.i[HIWORD] & ~0x80000000; + + /* handle trivial cases */ + if (hx >= 0x43300000) { /* x is nan, inf, or already integral */ + if (hx >= 0x7ff00000) /* x is inf or nan */ +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (hx >= 0x7ff80000 ? x : x + x); + /* assumes sparc-like QNaN */ +#else + return (x + x); +#endif + return (x); + } else if ((hx | xx.i[LOWORD]) == 0) /* x is zero */ + return (x); + + /* get the rounding mode */ + rm = fegetround(); + + /* flip the sense of directed roundings if x is negative */ + if (sx && (rm == FE_UPWARD || rm == FE_DOWNWARD)) + rm = (FE_UPWARD + FE_DOWNWARD) - rm; + + /* handle |x| < 1 */ + if (hx < 0x3ff00000) { + if (rm == FE_UPWARD || (rm == FE_TONEAREST && + (hx >= 0x3fe00000 && ((hx & 0xfffff) | xx.i[LOWORD])))) + xx.i[HIWORD] = sx | 0x3ff00000; + else + xx.i[HIWORD] = sx; + xx.i[LOWORD] = 0; + return (xx.d); + } + + /* round x at the integer bit */ + j = 0x433 - (hx >> 20); + if (j >= 32) { + i = 1 << (j - 32); + frac = ((xx.i[HIWORD] << 1) << (63 - j)) | + (xx.i[LOWORD] >> (j - 32)); + if (xx.i[LOWORD] & (i - 1)) + frac |= 1; + if (!frac) + return (x); + xx.i[LOWORD] = 0; + xx.i[HIWORD] &= ~(i - 1); + if (rm == FE_UPWARD || (rm == FE_TONEAREST && + (frac > 0x80000000u || (frac == 0x80000000) && + (xx.i[HIWORD] & i)))) + xx.i[HIWORD] += i; + } else { + i = 1 << j; + frac = (xx.i[LOWORD] << 1) << (31 - j); + if (!frac) + return (x); + xx.i[LOWORD] &= ~(i - 1); + if (rm == FE_UPWARD || (rm == FE_TONEAREST && + (frac > 0x80000000u || (frac == 0x80000000) && + (xx.i[LOWORD] & i)))) { + xx.i[LOWORD] += i; + if (xx.i[LOWORD] == 0) + xx.i[HIWORD]++; + } + } + return (xx.d); +} + +#if 0 + +/* +* Alternate implementations for SPARC, x86, using fp ops. These may +* be faster depending on how expensive saving and restoring the fp +* modes and status flags is. +*/ + +#include "libm.h" +#include "fma.h" + +#if defined(__sparc) + +double +__nearbyint(double x) { + union { + unsigned i[2]; + double d; + } xx, yy; + double z; + unsigned hx, sx, fsr, oldfsr; + int rm; + + xx.d = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx >= 0x43300000) /* x is nan, inf, or already integral */ + return (x + 0.0); + else if ((hx | xx.i[1]) == 0) /* x is zero */ + return (x); + + /* save the fsr */ + __fenv_getfsr(&oldfsr); + + /* handle |x| < 1 */ + if (hx < 0x3ff00000) { + /* flip the sense of directed roundings if x is negative */ + rm = oldfsr >> 30; + if (sx) + rm ^= rm >> 1; + if (rm == FSR_RP || (rm == FSR_RN && (hx >= 0x3fe00000 && + ((hx & 0xfffff) | xx.i[1])))) + xx.i[0] = sx | 0x3ff00000; + else + xx.i[0] = sx; + xx.i[1] = 0; + return (xx.d); + } + + /* clear the inexact trap */ + fsr = oldfsr & ~FSR_NXM; + __fenv_setfsr(&fsr); + + /* round x at the integer bit */ + yy.i[0] = sx | 0x43300000; + yy.i[1] = 0; + z = (x + yy.d) - yy.d; + + /* restore the old fsr */ + __fenv_setfsr(&oldfsr); + + return (z); +} + +#elif defined(__i386) + +/* inline template */ +extern long double frndint(long double); + +double +__nearbyint(double x) { + long double z; + unsigned oldcwsw, cwsw; + + /* save the control and status words, mask the inexact exception */ + __fenv_getcwsw(&oldcwsw); + cwsw = oldcwsw | 0x00200000; + __fenv_setcwsw(&cwsw); + + z = frndint((long double) x); + + /* + * restore the control and status words, preserving all but the + * inexact flag + */ + __fenv_getcwsw(&cwsw); + oldcwsw |= (cwsw & 0x1f); + __fenv_setcwsw(&oldcwsw); + + /* note: the value of z is representable in double precision */ + return (z); +} + +#else +#error Unknown architecture +#endif + +#endif diff --git a/usr/src/libm/src/m9x/nearbyintf.c b/usr/src/libm/src/m9x/nearbyintf.c new file mode 100644 index 0000000..2251c89 --- /dev/null +++ b/usr/src/libm/src/m9x/nearbyintf.c @@ -0,0 +1,185 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)nearbyintf.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak nearbyintf = __nearbyintf +#endif + +#include "libm.h" +#include "fenv_synonyms.h" +#include <fenv.h> + +float +__nearbyintf(float x) { + union { + unsigned i; + float f; + } xx; + unsigned hx, sx, i, frac; + int rm; + + xx.f = x; + sx = xx.i & 0x80000000; + hx = xx.i & ~0x80000000; + + /* handle trivial cases */ + if (hx >= 0x4b000000) { /* x is nan, inf, or already integral */ + if (hx > 0x7f800000) /* x is nan */ + return (x * x); /* + -> * for Cheetah */ + return (x); + } else if (hx == 0) /* x is zero */ + return (x); + + /* get the rounding mode */ + rm = fegetround(); + + /* flip the sense of directed roundings if x is negative */ + if (sx && (rm == FE_UPWARD || rm == FE_DOWNWARD)) + rm = (FE_UPWARD + FE_DOWNWARD) - rm; + + /* handle |x| < 1 */ + if (hx < 0x3f800000) { + if (rm == FE_UPWARD || (rm == FE_TONEAREST && hx > 0x3f000000)) + xx.i = sx | 0x3f800000; + else + xx.i = sx; + return (xx.f); + } + + /* round x at the integer bit */ + i = 1 << (0x96 - (hx >> 23)); + frac = hx & (i - 1); + if (!frac) + return (x); + + hx &= ~(i - 1); + if (rm == FE_UPWARD || (rm == FE_TONEAREST && (frac > (i >> 1) || + (frac == (i >> 1)) && (hx & i)))) + xx.i = sx | (hx + i); + else + xx.i = sx | hx; + return (xx.f); +} + +#if 0 + +/* + * Alternate implementations for SPARC, x86, using fp ops. These may + * be faster depending on how expensive saving and restoring the fp + * modes and status flags is. + */ + +#include "libm.h" +#include "fma.h" + +#if defined(__sparc) + +float +__nearbyintf(float x) { + union { + unsigned i; + float f; + } xx, yy; + float z; + unsigned hx, sx, fsr, oldfsr; + int rm; + + xx.f = x; + sx = xx.i & 0x80000000; + hx = xx.i & ~0x80000000; + + /* handle trivial cases */ + if (hx >= 0x4b000000) /* x is nan, inf, or already integral */ + return (x + 0.0f); + else if (hx == 0) /* x is zero */ + return (x); + + /* save the fsr */ + __fenv_getfsr(&oldfsr); + + /* handle |x| < 1 */ + if (hx < 0x3f800000) { + /* flip the sense of directed roundings if x is negative */ + rm = oldfsr >> 30; + if (sx) + rm ^= rm >> 1; + if (rm == FSR_RP || (rm == FSR_RN && hx > 0x3f000000)) + xx.i = sx | 0x3f800000; + else + xx.i = sx; + return (xx.f); + } + + /* clear the inexact trap */ + fsr = oldfsr & ~FSR_NXM; + __fenv_setfsr(&fsr); + + /* round x at the integer bit */ + yy.i = sx | 0x4b000000; + z = (x + yy.f) - yy.f; + + /* restore the old fsr */ + __fenv_setfsr(&oldfsr); + + return (z); +} + +#elif defined(__i386) + +/* inline template */ +extern long double frndint(long double); + +float +__nearbyintf(float x) { + long double z; + unsigned oldcwsw, cwsw; + + /* save the control and status words, mask the inexact exception */ + __fenv_getcwsw(&oldcwsw); + cwsw = oldcwsw | 0x00200000; + __fenv_setcwsw(&cwsw); + + z = frndint((long double) x); + + /* + * restore the control and status words, preserving all but the + * inexact flag + */ + __fenv_getcwsw(&cwsw); + oldcwsw |= (cwsw & 0x1f); + __fenv_setcwsw(&oldcwsw); + + /* note: the value of z is representable in single precision */ + return (z); +} + +#else +#error Unknown architecture +#endif + +#endif diff --git a/usr/src/libm/src/m9x/nearbyintl.c b/usr/src/libm/src/m9x/nearbyintl.c new file mode 100644 index 0000000..98def46 --- /dev/null +++ b/usr/src/libm/src/m9x/nearbyintl.c @@ -0,0 +1,183 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)nearbyintl.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak nearbyintl = __nearbyintl +#endif + +#include "libm.h" +#include "fma.h" + +#if defined(__sparc) + +static union { + unsigned i; + float f; +} snan = { 0x7f800001 }; + +long double +__nearbyintl(long double x) { + union { + unsigned i[4]; + long double q; + } xx; + unsigned hx, sx, i, frac, fsr; + int rm, j; + volatile float dummy; + + xx.q = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx >= 0x406f0000) { /* x is nan, inf, or already integral */ + /* check for signaling nan */ + if ((hx > 0x7fff0000 || (hx == 0x7fff0000 && + (xx.i[1] | xx.i[2] | xx.i[3]))) && !(hx & 0x8000)) { + dummy = snan.f; + dummy += snan.f; + xx.i[0] = sx | hx | 0x8000; + } + return (xx.q); + } else if ((hx | xx.i[1] | xx.i[2] | xx.i[3]) == 0) /* x is zero */ + return (x); + + /* get the rounding mode */ + __fenv_getfsr(&fsr); + rm = fsr >> 30; + + /* flip the sense of directed roundings if x is negative */ + if (sx) + rm ^= rm >> 1; + + /* handle |x| < 1 */ + if (hx < 0x3fff0000) { + if (rm == FSR_RP || (rm == FSR_RN && (hx >= 0x3ffe0000 && + ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3])))) + xx.i[0] = sx | 0x3fff0000; + else + xx.i[0] = sx; + xx.i[1] = xx.i[2] = xx.i[3] = 0; + return (xx.q); + } + + /* round x at the integer bit */ + j = 0x406f - (hx >> 16); + if (j >= 96) { + i = 1 << (j - 96); + frac = ((xx.i[0] << 1) << (127 - j)) | (xx.i[1] >> (j - 96)); + if ((xx.i[1] & (i - 1)) | xx.i[2] | xx.i[3]) + frac |= 1; + if (!frac) + return (x); + xx.i[1] = xx.i[2] = xx.i[3] = 0; + xx.i[0] &= ~(i - 1); + if (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000u || + (frac == 0x80000000 && (xx.i[0] & i))))) + xx.i[0] += i; + } else if (j >= 64) { + i = 1 << (j - 64); + frac = ((xx.i[1] << 1) << (95 - j)) | (xx.i[2] >> (j - 64)); + if ((xx.i[2] & (i - 1)) | xx.i[3]) + frac |= 1; + if (!frac) + return (x); + xx.i[2] = xx.i[3] = 0; + xx.i[1] &= ~(i - 1); + if (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000u || + (frac == 0x80000000 && (xx.i[1] & i))))) { + xx.i[1] += i; + if (xx.i[1] == 0) + xx.i[0]++; + } + } else if (j >= 32) { + i = 1 << (j - 32); + frac = ((xx.i[2] << 1) << (63 - j)) | (xx.i[3] >> (j - 32)); + if (xx.i[3] & (i - 1)) + frac |= 1; + if (!frac) + return (x); + xx.i[3] = 0; + xx.i[2] &= ~(i - 1); + if (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000u || + (frac == 0x80000000 && (xx.i[2] & i))))) { + xx.i[2] += i; + if (xx.i[2] == 0) + if (++xx.i[1] == 0) + xx.i[0]++; + } + } else { + i = 1 << j; + frac = (xx.i[3] << 1) << (31 - j); + if (!frac) + return (x); + xx.i[3] &= ~(i - 1); + if (rm == FSR_RP || (rm == FSR_RN && (frac > 0x80000000u || + (frac == 0x80000000 && (xx.i[3] & i))))) { + xx.i[3] += i; + if (xx.i[3] == 0) + if (++xx.i[2] == 0) + if (++xx.i[1] == 0) + xx.i[0]++; + } + } + + return (xx.q); +} + +#elif defined(__i386) + +/* inline template */ +extern long double frndint(long double); + +long double +__nearbyintl(long double x) { + long double z; + unsigned oldcwsw, cwsw; + + /* save the control and status words, mask the inexact exception */ + __fenv_getcwsw(&oldcwsw); + cwsw = oldcwsw | 0x00200000; + __fenv_setcwsw(&cwsw); + + z = frndint(x); + + /* + * restore the control and status words, preserving all but the + * inexact flag + */ + __fenv_getcwsw(&cwsw); + oldcwsw |= (cwsw & 0x1f); + __fenv_setcwsw(&oldcwsw); + + return (z); +} + +#else +#error Unknown architecture +#endif diff --git a/usr/src/libm/src/m9x/nexttoward.c b/usr/src/libm/src/m9x/nexttoward.c new file mode 100644 index 0000000..d9bbb55 --- /dev/null +++ b/usr/src/libm/src/m9x/nexttoward.c @@ -0,0 +1,222 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)nexttoward.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak nexttoward = __nexttoward +#endif + +/* + * nexttoward(x, y) delivers the next representable number after x + * in the direction of y. If x and y are both zero, the result is + * zero with the same sign as y. If either x or y is NaN, the result + * is NaN. + * + * If x != y and the result is infinite, overflow is raised; if + * x != y and the result is subnormal or zero, underflow is raised. + * (This is wrong, but it's what C99 apparently wants.) + */ + +#include "libm.h" + +#if defined(__sparc) + +static union { + unsigned i[2]; + double d; +} C[] = { + 0x00100000, 0, + 0x7fe00000, 0, + 0x7fffffff, 0xffffffff +}; + +#define tiny C[0].d +#define huge C[1].d +#define qnan C[2].d + +enum fcc_type { + fcc_equal = 0, + fcc_less = 1, + fcc_greater = 2, + fcc_unordered = 3 +}; + +#ifdef __sparcv9 +#define _Q_cmp _Qp_cmp +#endif + +extern enum fcc_type _Q_cmp(const long double *, const long double *); + +double +__nexttoward(double x, long double y) { + union { + unsigned i[2]; + double d; + } xx; + union { + unsigned i[4]; + long double q; + } yy; + long double lx; + unsigned hx; + volatile double dummy; + enum fcc_type rel; + + /* + * It would be somewhat more efficient to check for NaN and + * zero operands before converting x to long double and then + * to code the comparison in line rather than calling _Q_cmp. + * However, since this code probably won't get used much, + * I'm opting in favor of simplicity instead. + */ + lx = xx.d = x; + hx = (xx.i[0] & ~0x80000000) | xx.i[1]; + + /* check for each of four possible orderings */ + rel = _Q_cmp(&lx, &y); + if (rel == fcc_unordered) + return (qnan); + + if (rel == fcc_equal) { + if (hx == 0) { /* x is zero; return zero with y's sign */ + yy.q = y; + xx.i[0] = yy.i[0]; + return (xx.d); + } + return (x); + } + + if (rel == fcc_less) { + if (hx == 0) { /* x is zero */ + xx.i[0] = 0; + xx.i[1] = 0x00000001; + } else if ((int)xx.i[0] >= 0) { /* x is positive */ + if (++xx.i[1] == 0) + xx.i[0]++; + } else { + if (xx.i[1]-- == 0) + xx.i[0]--; + } + } else { + if (hx == 0) { /* x is zero */ + xx.i[0] = 0x80000000; + xx.i[1] = 0x00000001; + } else if ((int)xx.i[0] >= 0) { /* x is positive */ + if (xx.i[1]-- == 0) + xx.i[0]--; + } else { + if (++xx.i[1] == 0) + xx.i[0]++; + } + } + + /* raise exceptions as needed */ + hx = xx.i[0] & ~0x80000000; + if (hx == 0x7ff00000) { + dummy = huge; + dummy *= huge; + } else if (hx < 0x00100000) { + dummy = tiny; + dummy *= tiny; + } + + return (xx.d); +} + +#elif defined(__i386) + +static union { + unsigned i[2]; + double d; +} C[] = { + 0, 0x00100000, + 0, 0x7fe00000, +}; + +#define tiny C[0].d +#define huge C[1].d + +double +__nexttoward(double x, long double y) { + union { + unsigned i[2]; + double d; + } xx; + unsigned hx; + long double lx; + volatile double dummy; + + lx = xx.d = x; + hx = (xx.i[1] & ~0x80000000) | xx.i[0]; + + /* check for each of four possible orderings */ + if (isunordered(lx, y)) + return ((double) (lx + y)); + + if (lx == y) + return ((double) y); + + if (lx < y) { + if (hx == 0) { /* x is zero */ + xx.i[0] = 0x00000001; + xx.i[1] = 0; + } else if ((int)xx.i[1] >= 0) { /* x is positive */ + if (++xx.i[0] == 0) + xx.i[1]++; + } else { + if (xx.i[0]-- == 0) + xx.i[1]--; + } + } else { + if (hx == 0) { /* x is zero */ + xx.i[0] = 0x00000001; + xx.i[1] = 0x80000000; + } else if ((int)xx.i[1] >= 0) { /* x is positive */ + if (xx.i[0]-- == 0) + xx.i[1]--; + } else { + if (++xx.i[0] == 0) + xx.i[1]++; + } + } + + /* raise exceptions as needed */ + hx = xx.i[1] & ~0x80000000; + if (hx == 0x7ff00000) { + dummy = huge; + dummy *= huge; + } else if (hx < 0x00100000) { + dummy = tiny; + dummy *= tiny; + } + + return (xx.d); +} + +#else +#error Unknown architecture +#endif diff --git a/usr/src/libm/src/m9x/nexttowardf.c b/usr/src/libm/src/m9x/nexttowardf.c new file mode 100644 index 0000000..0bf8a05 --- /dev/null +++ b/usr/src/libm/src/m9x/nexttowardf.c @@ -0,0 +1,184 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)nexttowardf.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak nexttowardf = __nexttowardf +#endif + +#include "libm.h" + +static union { + unsigned i; + float f; +} C[] = { + 0x00800000, + 0x7f000000, + 0x7fffffff +}; + +#define tiny C[0].f +#define huge C[1].f +#define qnan C[2].f + +#if defined(__sparc) + +enum fcc_type { + fcc_equal = 0, + fcc_less = 1, + fcc_greater = 2, + fcc_unordered = 3 +}; + +#ifdef __sparcv9 +#define _Q_cmp _Qp_cmp +#endif + +extern enum fcc_type _Q_cmp(const long double *, const long double *); + +float +__nexttowardf(float x, long double y) { + union { + unsigned i; + float f; + } xx; + union { + unsigned i[4]; + long double q; + } yy; + long double lx; + unsigned hx; + volatile float dummy; + enum fcc_type rel; + + /* + * It would be somewhat more efficient to check for NaN and + * zero operands before converting x to long double and then + * to code the comparison in line rather than calling _Q_cmp. + * However, since this code probably won't get used much, + * I'm opting in favor of simplicity instead. + */ + lx = xx.f = x; + hx = xx.i & ~0x80000000; + + /* check for each of four possible orderings */ + rel = _Q_cmp(&lx, &y); + if (rel == fcc_unordered) + return (qnan); + + if (rel == fcc_equal) { + if (hx == 0) { /* x is zero; return zero with y's sign */ + yy.q = y; + xx.i = yy.i[0]; + return (xx.f); + } + return (x); + } + + if (rel == fcc_less) { + if (hx == 0) /* x is zero */ + xx.i = 0x00000001; + else if ((int) xx.i >= 0) /* x is positive */ + xx.i++; + else + xx.i--; + } else { + if (hx == 0) /* x is zero */ + xx.i = 0x80000001; + else if ((int) xx.i >= 0) /* x is positive */ + xx.i--; + else + xx.i++; + } + + /* raise exceptions as needed */ + hx = xx.i & ~0x80000000; + if (hx == 0x7f800000) { + dummy = huge; + dummy *= huge; + } else if (hx < 0x00800000) { + dummy = tiny; + dummy *= tiny; + } + + return (xx.f); +} + +#elif defined(__i386) + +float +__nexttowardf(float x, long double y) { + union { + unsigned i; + float f; + } xx; + unsigned hx; + long double lx; + volatile float dummy; + + lx = xx.f = x; + hx = xx.i & ~0x80000000; + + /* check for each of four possible orderings */ + if (isunordered(lx, y)) + return ((float) (lx + y)); + + if (lx == y) + return ((float) y); + + if (lx < y) { + if (hx == 0) /* x is zero */ + xx.i = 0x00000001; + else if ((int) xx.i >= 0) /* x is positive */ + xx.i++; + else + xx.i--; + } else { + if (hx == 0) /* x is zero */ + xx.i = 0x80000001; + else if ((int) xx.i >= 0) /* x is positive */ + xx.i--; + else + xx.i++; + } + + /* raise exceptions as needed */ + hx = xx.i & ~0x80000000; + if (hx == 0x7f800000) { + dummy = huge; + dummy *= huge; + } else if (hx < 0x00800000) { + dummy = tiny; + dummy *= tiny; + } + + return (xx.f); +} + +#else +#error Unknown architecture +#endif diff --git a/usr/src/libm/src/m9x/nexttowardl.c b/usr/src/libm/src/m9x/nexttowardl.c new file mode 100644 index 0000000..4578738 --- /dev/null +++ b/usr/src/libm/src/m9x/nexttowardl.c @@ -0,0 +1,118 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)nexttowardl.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak nexttowardl = __nexttowardl +#endif + +#include "libm.h" +#include <float.h> /* LDBL_MAX, LDBL_MIN */ + +#if defined(__sparc) +#define n0 0 +#define n1 1 +#define n2 2 +#define n3 3 +#define X86PDNRM1(x) +#define INC(px) { \ + if (++px[n3] == 0) \ + if (++px[n2] == 0) \ + if (++px[n1] == 0) \ + ++px[n0]; \ + } +#define DEC(px) { \ + if (--px[n3] == 0xffffffff) \ + if (--px[n2] == 0xffffffff) \ + if (--px[n1] == 0xffffffff) \ + --px[n0]; \ + } +#elif defined(__i386) +#define n0 2 +#define n1 1 +#define n2 0 +#define n3 0 +/* + * if pseudo-denormal, replace by the equivalent normal + */ +#define X86PDNRM1(x) if (XBIASED_EXP(x) == 0 && (((int *) &x)[1] & \ + 0x80000000) != 0) \ + ((int *) &x)[2] |= 1 +#define INC(px) { \ + if (++px[n2] == 0) \ + if ((++px[n1] & ~0x80000000) == 0) \ + px[n1] = 0x80000000, ++px[n0]; \ + } +#define DEC(px) { \ + if (--px[n2] == 0xffffffff) \ + if (--px[n1] == 0x7fffffff) \ + if ((--px[n0] & 0x7fff) != 0) \ + px[n1] |= 0x80000000; \ + } +#endif + +long double +nexttowardl(long double x, long double y) { + int *px = (int *) &x; + int *py = (int *) &y; + + if (x == y) + return (y); /* C99 requirement */ + if (x != x || y != y) + return (x * y); + + if (ISZEROL(x)) { /* x == 0.0 */ + px[n0] = py[n0] & XSGNMSK; + px[n1] = px[n2] = 0; + px[n3] = 1; + } else { + X86PDNRM1(x); + if ((px[n0] & XSGNMSK) == 0) { /* x > 0.0 */ + if (x > y) /* x > y */ + DEC(px) + else + INC(px) + } else { + if (x < y) /* x < y */ + DEC(px) + else + INC(px) + } + } +#ifndef lint + { + volatile long double dummy; + int k = XBIASED_EXP(x); + + if (k == 0) + dummy = LDBL_MIN * copysignl(LDBL_MIN, x); + else if (k == 0x7fff) + dummy = LDBL_MAX * copysignl(LDBL_MAX, x); + } +#endif + return (x); +} diff --git a/usr/src/libm/src/m9x/regset.h b/usr/src/libm/src/m9x/regset.h new file mode 100644 index 0000000..54c9306 --- /dev/null +++ b/usr/src/libm/src/m9x/regset.h @@ -0,0 +1,128 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Parts of Solaris 10 x86 /usr/include/sys/regset.h + */ + +#ifndef _SYS_REGSET_H +#define _SYS_REGSET_H + +#pragma ident "@(#)regset.h 1.3 06/01/31 SMI" + +#include <sys/types.h> + +typedef union { + long double _q; + uint32_t _l[4]; +} myupad128_t; + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The names and offsets defined here are specified by i386 ABI suppl. + */ + +#define SS 18 /* only stored on a privilege transition */ +#define UESP 17 /* only stored on a privilege transition */ +#define EFL 16 +#define CS 15 +#define EIP 14 +#define ERR 13 +#define TRAPNO 12 +#define EAX 11 +#define ECX 10 +#define EDX 9 +#define EBX 8 +#define ESP 7 +#define EBP 6 +#define ESI 5 +#define EDI 4 +#define DS 3 +#define ES 2 +#define FS 1 +#define GS 0 + +/* aliases for portability */ + +#define REG_PC EIP +#define REG_FP EBP +#define REG_SP UESP +#define REG_PS EFL +#define REG_R0 EAX +#define REG_R1 EDX + +/* + * A gregset_t is defined as an array type for compatibility with the reference + * source. This is important due to differences in the way the C language + * treats arrays and structures as parameters. + */ +#define _NGREG 19 + +typedef int greg_t; +typedef greg_t gregset_t[_NGREG]; + +/* + * This definition of the floating point structure is binary + * compatible with the Intel386 psABI definition, and source + * compatible with that specification for x87-style floating point. + * It also allows SSE/SSE2 state to be accessed on machines that + * possess such hardware capabilities. + */ +typedef struct fpu { + union { + struct fpchip_state { + uint32_t state[27]; /* 287/387 saved state */ + uint32_t status; /* saved at exception */ + uint32_t mxcsr; /* SSE control and status */ + uint32_t xstatus; /* SSE mxcsr at exception */ + uint32_t __pad[2]; /* align to 128-bits */ + myupad128_t xmm[8]; /* %xmm0-%xmm7 */ + } fpchip_state; + struct fp_emul_space { /* for emulator(s) */ + uint8_t fp_emul[246]; + uint8_t fp_epad[2]; + } fp_emul_space; + uint32_t f_fpregs[95]; /* union of the above */ + } fp_reg_set; +} fpregset_t; + +/* + * Structure mcontext defines the complete hardware machine state. + * (This structure is specified in the i386 ABI suppl.) + */ +typedef struct { + gregset_t gregs; /* general register set */ + fpregset_t fpregs; /* floating point register set */ +} mcontext_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_REGSET_H */ diff --git a/usr/src/libm/src/m9x/remquo.c b/usr/src/libm/src/m9x/remquo.c new file mode 100644 index 0000000..25d501e --- /dev/null +++ b/usr/src/libm/src/m9x/remquo.c @@ -0,0 +1,267 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)remquo.c 1.10 06/01/31 SMI" + +#pragma weak remquo = __remquo + +/* INDENT OFF */ +/* + * double remquo(double x, double y, int *quo) return remainder(x,y) and an + * integer pointer quo such that *quo = N mod {2**31}, where N is the + * exact integral part of x/y rounded to nearest even. + * + * remquo call internal fmodquo + */ +/* INDENT ON */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" +#include <math.h> /* fabs() */ + +#if defined(__sparc) +#define HIWORD 0 +#define LOWORD 1 +#elif defined(__i386) +#define HIWORD 1 +#define LOWORD 0 +#else +#error Unknown architecture +#endif +#define __HI(x) ((int *) &x)[HIWORD] +#define __LO(x) ((int *) &x)[LOWORD] + +static const double one = 1.0, Zero[] = {0.0, -0.0}; + +static double +fmodquo(double x, double y, int *quo) { + int n, hx, hy, hz, ix, iy, sx, sq, i, m; + unsigned lx, ly, lz; + + hx = __HI(x); /* high word of x */ + lx = __LO(x); /* low word of x */ + hy = __HI(y); /* high word of y */ + ly = __LO(y); /* low word of y */ + sx = hx & 0x80000000; /* sign of x */ + sq = (hx ^ hy) & 0x80000000; /* sign of x/y */ + hx ^= sx; /* |x| */ + hy &= 0x7fffffff; /* |y| */ + + /* purge off exception values */ + *quo = 0; + if ((hy | ly) == 0 || hx >= 0x7ff00000 || /* y=0, or x !finite */ + (hy | ((ly | -ly) >> 31)) > 0x7ff00000) /* or y is NaN */ + return ((x * y) / (x * y)); + if (hx <= hy) { + if (hx < hy || lx < ly) + return (x); /* |x|<|y| return x */ + if (lx == ly) { + *quo = 1 + (sq >> 30); + /* |x|=|y| return x*0 */ + return (Zero[(unsigned) sx >> 31]); + } + } + + /* determine ix = ilogb(x) */ + if (hx < 0x00100000) { /* subnormal x */ + if (hx == 0) { + for (ix = -1043, i = lx; i > 0; i <<= 1) + ix -= 1; + } else { + for (ix = -1022, i = (hx << 11); i > 0; i <<= 1) + ix -= 1; + } + } else + ix = (hx >> 20) - 1023; + + /* determine iy = ilogb(y) */ + if (hy < 0x00100000) { /* subnormal y */ + if (hy == 0) { + for (iy = -1043, i = ly; i > 0; i <<= 1) + iy -= 1; + } else { + for (iy = -1022, i = (hy << 11); i > 0; i <<= 1) + iy -= 1; + } + } else + iy = (hy >> 20) - 1023; + + /* set up {hx,lx}, {hy,ly} and align y to x */ + if (ix >= -1022) + hx = 0x00100000 | (0x000fffff & hx); + else { /* subnormal x, shift x to normal */ + n = -1022 - ix; + if (n <= 31) { + hx = (hx << n) | (lx >> (32 - n)); + lx <<= n; + } else { + hx = lx << (n - 32); + lx = 0; + } + } + if (iy >= -1022) + hy = 0x00100000 | (0x000fffff & hy); + else { /* subnormal y, shift y to normal */ + n = -1022 - iy; + if (n <= 31) { + hy = (hy << n) | (ly >> (32 - n)); + ly <<= n; + } else { + hy = ly << (n - 32); + ly = 0; + } + } + + /* fix point fmod */ + n = ix - iy; + m = 0; + while (n--) { + hz = hx - hy; + lz = lx - ly; + if (lx < ly) + hz -= 1; + if (hz < 0) { + hx = hx + hx + (lx >> 31); + lx = lx + lx; + } else { + m += 1; + if ((hz | lz) == 0) { /* return sign(x)*0 */ + if (n < 31) + m <<= 1 + n; + else + m = 0; + m &= 0x7fffffff; + *quo = sq >= 0 ? m : -m; + return (Zero[(unsigned) sx >> 31]); + } + hx = hz + hz + (lz >> 31); + lx = lz + lz; + } + m += m; + } + hz = hx - hy; + lz = lx - ly; + if (lx < ly) + hz -= 1; + if (hz >= 0) { + hx = hz; + lx = lz; + m += 1; + } + m &= 0x7fffffff; + *quo = sq >= 0 ? m : -m; + + /* convert back to floating value and restore the sign */ + if ((hx | lx) == 0) { /* return sign(x)*0 */ + return (Zero[(unsigned) sx >> 31]); + } + while (hx < 0x00100000) { /* normalize x */ + hx = hx + hx + (lx >> 31); + lx = lx + lx; + iy -= 1; + } + if (iy >= -1022) { /* normalize output */ + hx = (hx - 0x00100000) | ((iy + 1023) << 20); + __HI(x) = hx | sx; + __LO(x) = lx; + } else { /* subnormal output */ + n = -1022 - iy; + if (n <= 20) { + lx = (lx >> n) | ((unsigned) hx << (32 - n)); + hx >>= n; + } else if (n <= 31) { + lx = (hx << (32 - n)) | (lx >> n); + hx = sx; + } else { + lx = hx >> (n - 32); + hx = sx; + } + __HI(x) = hx | sx; + __LO(x) = lx; + x *= one; /* create necessary signal */ + } + return (x); /* exact output */ +} + +#define zero Zero[0] + +double +remquo(double x, double y, int *quo) { + int hx, hy, sx, sq; + double v; + unsigned ly; + + hx = __HI(x); /* high word of x */ + hy = __HI(y); /* high word of y */ + ly = __LO(y); /* low word of y */ + sx = hx & 0x80000000; /* sign of x */ + sq = (hx ^ hy) & 0x80000000; /* sign of x/y */ + hx ^= sx; /* |x| */ + hy &= 0x7fffffff; /* |y| */ + + /* purge off exception values */ + *quo = 0; + if ((hy | ly) == 0 || hx >= 0x7ff00000 || /* y=0, or x !finite */ + (hy | ((ly | -ly) >> 31)) > 0x7ff00000) /* or y is NaN */ + return ((x * y) / (x * y)); + + y = fabs(y); + x = fabs(x); + if (hy <= 0x7fdfffff) { + x = fmodquo(x, y + y, quo); + *quo = ((*quo) & 0x3fffffff) << 1; + } + if (hy < 0x00200000) { + if (x + x > y) { + *quo += 1; + if (x == y) + x = zero; + else + x -= y; + if (x + x >= y) { + x -= y; + *quo += 1; + } + } + } else { + v = 0.5 * y; + if (x > v) { + *quo += 1; + if (x == y) + x = zero; + else + x -= y; + if (x >= v) { + x -= y; + *quo += 1; + } + } + } + if (sq != 0) + *quo = -(*quo); + return (sx == 0 ? x : -x); +} diff --git a/usr/src/libm/src/m9x/remquof.c b/usr/src/libm/src/m9x/remquof.c new file mode 100644 index 0000000..14a2f73 --- /dev/null +++ b/usr/src/libm/src/m9x/remquof.c @@ -0,0 +1,267 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)remquof.c 1.10 06/01/31 SMI" + +#pragma weak remquof = __remquof + +/* INDENT OFF */ +/* + * float remquof(float x, float y, int *quo) return remainderf(x,y) and an + * integer pointer quo such that *quo = N mod (2**31), where N is the + * exact integeral part of x/y rounded to nearest even. + * + * remquof call internal fmodquof + */ + +#include "libm.h" +#include "libm_synonyms.h" +#include "libm_protos.h" +#include <math.h> +extern float fabsf(float); + +static const int + is = (int) 0x80000000, + im = 0x007fffff, + ii = 0x7f800000, + iu = 0x00800000; + +static const float zero = 0.0F, half = 0.5F; +/* INDENT ON */ + +static float +fmodquof(float x, float y, int *quo) { + float w; + int hx, ix, iy, iz, k, ny, nd, m, sq; + + hx = *(int *) &x; + ix = hx & 0x7fffffff; + iy = *(int *) &y; + sq = (iy ^ hx) & is; /* sign of x/y */ + iy &= 0x7fffffff; + + /* purge off exception values */ + *quo = 0; + if (ix >= ii || iy > ii || iy == 0) { + w = x * y; + w = w / w; + } else if (ix <= iy) { + if (ix < iy) + w = x; /* return x if |x|<|y| */ + else { + *quo = 1 + (sq >> 30); + w = zero * x; /* return sign(x)*0.0 */ + } + } else { + /* INDENT OFF */ + /* + * scale x,y to "normal" with + * ny = exponent of y + * nd = exponent of x minus exponent of y + */ + /* INDENT ON */ + ny = iy >> 23; + k = ix >> 23; + + /* special case for subnormal y or x */ + if (ny == 0) { + ny = 1; + while (iy < iu) { + ny -= 1; + iy += iy; + } + nd = k - ny; + if (k == 0) { + nd += 1; + while (ix < iu) { + nd -= 1; + ix += ix; + } + } else + ix = iu | (ix & im); + } else { + nd = k - ny; + ix = iu | (ix & im); + iy = iu | (iy & im); + } + /* INDENT OFF */ + /* fix point fmod for normalized ix and iy */ + /* + * while (nd--) { + * iz = ix - iy; + * if (iz < 0) + * ix = ix + ix; + * else if (iz == 0) { + * *(int *) &w = is & hx; + * return w; + * } else + * ix = iz + iz; + * } + */ + /* INDENT ON */ + /* unroll the above loop 4 times to gain performance */ + m = 0; + k = nd >> 2; + nd -= (k << 2); + while (k--) { + iz = ix - iy; + if (iz >= 0) { + m += 1; + ix = iz + iz; + } else + ix += ix; + m += m; + iz = ix - iy; + if (iz >= 0) { + m += 1; + ix = iz + iz; + } else + ix += ix; + m += m; + iz = ix - iy; + if (iz >= 0) { + m += 1; + ix = iz + iz; + } else + ix += ix; + m += m; + iz = ix - iy; + if (iz >= 0) { + m += 1; + ix = iz + iz; + } else + ix += ix; + m += m; + if (iz == 0) { + iz = (k << 2) + nd; + if (iz < 32) + m <<= iz; + else + m = 0; + m &= 0x7fffffff; + *quo = sq >= 0 ? m : -m; + *(int *) &w = is & hx; + return (w); + } + } + while (nd--) { + iz = ix - iy; + if (iz >= 0) { + m += 1; + ix = iz + iz; + } else + ix += ix; + m += m; + } + /* end of unrolling */ + + iz = ix - iy; + if (iz >= 0) { + m += 1; + ix = iz; + } + m &= 0x7fffffff; + *quo = sq >= 0 ? m : -m; + + /* convert back to floating value and restore the sign */ + if (ix == 0) { + *(int *) &w = is & hx; + return (w); + } + while (ix < iu) { + ix += ix; + ny -= 1; + } + while (ix > (iu + iu)) { + ny += 1; + ix >>= 1; + } + if (ny > 0) + *(int *) &w = (is & hx) | (ix & im) | (ny << 23); + else { /* subnormal output */ + k = -ny + 1; + ix >>= k; + *(int *) &w = (is & hx) | ix; + } + } + return (w); +} + +float +remquof(float x, float y, int *quo) { + int hx, hy, sx, sq; + float v; + + hx = *(int *) &x; /* high word of x */ + hy = *(int *) &y; /* high word of y */ + sx = hx & is; /* sign of x */ + sq = (hx ^ hy) & is; /* sign of x/y */ + hx ^= sx; /* |x| */ + hy &= 0x7fffffff; /* |y| */ + + /* purge off exception values: y is 0 or NaN, x is Inf or NaN */ + *quo = 0; + if (hx >= ii || hy > ii || hy == 0) { + v = x * y; + return (v / v); + } + + y = fabsf(y); + x = fabsf(x); + if (hy <= 0x7f7fffff) { + x = fmodquof(x, y + y, quo); + *quo = ((*quo) & 0x3fffffff) << 1; + } + if (hy < 0x01000000) { + if (x + x > y) { + *quo += 1; + if (x == y) + x = zero; + else + x -= y; + if (x + x >= y) { + x -= y; + *quo += 1; + } + } + } else { + v = half * y; + if (x > v) { + *quo += 1; + if (x == y) + x = zero; + else + x -= y; + if (x >= v) { + x -= y; + *quo += 1; + } + } + } + if (sq != 0) + *quo = -(*quo); + return (sx == 0 ? x : -x); +} diff --git a/usr/src/libm/src/m9x/remquol.c b/usr/src/libm/src/m9x/remquol.c new file mode 100644 index 0000000..5d24a86 --- /dev/null +++ b/usr/src/libm/src/m9x/remquol.c @@ -0,0 +1,344 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)remquol.c 1.8 06/01/31 SMI" + +#pragma weak remquol = __remquol + +#include "libm.h" +#include "libm_synonyms.h" +#include <sunmath.h> /* fabsl */ +/* INDENT OFF */ +static const int + is = -0x7fffffff - 1, + im = 0x0000ffff, + iu = 0x00010000; + +static const long double zero = 0.0L, one = 1.0L; +/* INDENT ON */ + +#if defined(__sparc) +#define __H0(x) ((int *) &x)[0] +#define __H1(x) ((int *) &x)[1] +#define __H2(x) ((int *) &x)[2] +#define __H3(x) ((int *) &x)[3] +#else +#error Unsupported architecture +#endif + +/* + * On entrance: *quo is initialized to 0, x finite and y non-zero & ordered + */ +static long double +fmodquol(long double x, long double y, int *quo) { + long double a, b; + int n, ix, iy, k, sx, sq, m; + int hx; + int x0, y0, z0, carry; + unsigned x1, x2, x3, y1, y2, y3, z1, z2, z3; + + hx = __H0(x); + x1 = __H1(x); + x2 = __H2(x); + x3 = __H3(x); + y0 = __H0(y); + y1 = __H1(y); + y2 = __H2(y); + y3 = __H3(y); + + sx = hx & is; + sq = (hx ^ y0) & is; + x0 = hx ^ sx; + y0 &= ~0x80000000; + + a = fabsl(x); + b = fabsl(y); + if (a <= b) { + if (a < b) + return (x); + else { + *quo = 1 + (sq >> 30); + return (zero * x); + } + } + /* determine ix = ilogbl(x) */ + if (x0 < iu) { /* subnormal x */ + ix = 0; + ix = -16382; + while (x0 == 0) { + ix -= 16; + x0 = x1 >> 16; + x1 = (x1 << 16) | (x2 >> 16); + x2 = (x2 << 16) | (x3 >> 16); + x3 = (x3 << 16); + } + while (x0 < iu) { + ix -= 1; + x0 = (x0 << 1) | (x1 >> 31); + x1 = (x1 << 1) | (x2 >> 31); + x2 = (x2 << 1) | (x3 >> 31); + x3 <<= 1; + } + } else { + ix = (x0 >> 16) - 16383; + x0 = iu | (x0 & im); + } + + /* determine iy = ilogbl(y) */ + if (y0 < iu) { /* subnormal y */ + iy = -16382; + while (y0 == 0) { + iy -= 16; + y0 = y1 >> 16; + y1 = (y1 << 16) | (y2 >> 16); + y2 = (y2 << 16) | (y3 >> 16); + y3 = (y3 << 16); + } + while (y0 < iu) { + iy -= 1; + y0 = (y0 << 1) | (y1 >> 31); + y1 = (y1 << 1) | (y2 >> 31); + y2 = (y2 << 1) | (y3 >> 31); + y3 <<= 1; + } + } else { + iy = (y0 >> 16) - 16383; + y0 = iu | (y0 & im); + } + + + /* fix point fmod */ + n = ix - iy; + m = 0; + while (n--) { + while (x0 == 0 && n >= 16) { + m <<= 16; + n -= 16; + x0 = x1 >> 16; + x1 = (x1 << 16) | (x2 >> 16); + x2 = (x2 << 16) | (x3 >> 16); + x3 = (x3 << 16); + } + while (x0 < iu && n >= 1) { + m += m; + n -= 1; + x0 = (x0 << 1) | (x1 >> 31); + x1 = (x1 << 1) | (x2 >> 31); + x2 = (x2 << 1) | (x3 >> 31); + x3 = (x3 << 1); + } + carry = 0; + z3 = x3 - y3; + carry = z3 > x3; + if (carry == 0) { + z2 = x2 - y2; + carry = z2 > x2; + } else { + z2 = x2 - y2 - 1; + carry = z2 >= x2; + } + if (carry == 0) { + z1 = x1 - y1; + carry = z1 > x1; + } else { + z1 = x1 - y1 - 1; + carry = z1 >= x1; + } + z0 = x0 - y0 - carry; + if (z0 < 0) { /* double x */ + x0 = x0 + x0 + ((x1 & is) != 0); + x1 = x1 + x1 + ((x2 & is) != 0); + x2 = x2 + x2 + ((x3 & is) != 0); + x3 = x3 + x3; + m += m; + } else { + m += 1; + if (z0 == 0) { + if ((z1 | z2 | z3) == 0) { + /* 0: we are done */ + if (n < 31) + m <<= (1 + n); + else + m = 0; + m &= ~0x80000000; + *quo = sq >= 0 ? m : -m; + __H0(a) = hx & is; + __H1(a) = __H2(a) = __H3(a) = 0; + return (a); + } + } + /* x = z << 1 */ + z0 = z0 + z0 + ((z1 & is) != 0); + z1 = z1 + z1 + ((z2 & is) != 0); + z2 = z2 + z2 + ((z3 & is) != 0); + z3 = z3 + z3; + x0 = z0; + x1 = z1; + x2 = z2; + x3 = z3; + m += m; + } + } + carry = 0; + z3 = x3 - y3; + carry = z3 > x3; + if (carry == 0) { + z2 = x2 - y2; + carry = z2 > x2; + } else { + z2 = x2 - y2 - 1; + carry = z2 >= x2; + } + if (carry == 0) { + z1 = x1 - y1; + carry = z1 > x1; + } else { + z1 = x1 - y1 - 1; + carry = z1 >= x1; + } + z0 = x0 - y0 - carry; + if (z0 >= 0) { + x0 = z0; + x1 = z1; + x2 = z2; + x3 = z3; + m += 1; + } + m &= ~0x80000000; + *quo = sq >= 0 ? m : -m; + + /* convert back to floating value and restore the sign */ + if ((x0 | x1 | x2 | x3) == 0) { + __H0(a) = hx & is; + __H1(a) = __H2(a) = __H3(a) = 0; + return (a); + } + while (x0 < iu) { + if (x0 == 0) { + iy -= 16; + x0 = x1 >> 16; + x1 = (x1 << 16) | (x2 >> 16); + x2 = (x2 << 16) | (x3 >> 16); + x3 = (x3 << 16); + } else { + x0 = x0 + x0 + ((x1 & is) != 0); + x1 = x1 + x1 + ((x2 & is) != 0); + x2 = x2 + x2 + ((x3 & is) != 0); + x3 = x3 + x3; + iy -= 1; + } + } + + /* normalize output */ + if (iy >= -16382) { + __H0(a) = sx | (x0 - iu) | ((iy + 16383) << 16); + __H1(a) = x1; + __H2(a) = x2; + __H3(a) = x3; + } else { /* subnormal output */ + n = -16382 - iy; + k = n & 31; + if (k <= 16) { + x3 = (x2 << (32 - k)) | (x3 >> k); + x2 = (x1 << (32 - k)) | (x2 >> k); + x1 = (x0 << (32 - k)) | (x1 >> k); + x0 >>= k; + } else { + x3 = (x2 << (32 - k)) | (x3 >> k); + x2 = (x1 << (32 - k)) | (x2 >> k); + x1 = (x0 << (32 - k)) | (x1 >> k); + x0 = 0; + } + while (n >= 32) { + n -= 32; + x3 = x2; + x2 = x1; + x1 = x0; + x0 = 0; + } + __H0(a) = x0 | sx; + __H1(a) = x1; + __H2(a) = x2; + __H3(a) = x3; + a *= one; + } + return (a); +} + +long double +remquol(long double x, long double y, int *quo) { + int hx, hy, sx, sq; + long double v; + + hx = __H0(x); /* high word of x */ + hy = __H0(y); /* high word of y */ + sx = hx & is; /* sign of x */ + sq = (hx ^ hy) & is; /* sign of x/y */ + hx ^= sx; /* |x| */ + hy &= ~0x80000000; + + /* purge off exception values */ + *quo = 0; + /* y=0, y is NaN, x is NaN or inf */ + if (y == 0.0L || y != y || hx >= 0x7fff0000) + return ((x * y) / (x * y)); + + y = fabsl(y); + x = fabsl(x); + if (hy <= 0x7ffdffff) { + x = fmodquol(x, y + y, quo); + *quo = ((*quo) & 0x3fffffff) << 1; + } + if (hy < 0x00020000) { + if (x + x > y) { + *quo += 1; + if (x == y) + x = zero; + else + x -= y; + if (x + x >= y) { + x -= y; + *quo += 1; + } + } + } else { + v = 0.5L * y; + if (x > v) { + *quo += 1; + if (x == y) + x = zero; + else + x -= y; + if (x >= v) { + x -= y; + *quo += 1; + } + } + } + if (sq != 0) + *quo = -(*quo); + return (sx == 0 ? x : -x); +} diff --git a/usr/src/libm/src/m9x/round.c b/usr/src/libm/src/m9x/round.c new file mode 100644 index 0000000..f635830 --- /dev/null +++ b/usr/src/libm/src/m9x/round.c @@ -0,0 +1,75 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)round.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak round = __round +#endif + +#include "libm.h" + +double +round(double x) { + union { + unsigned i[2]; + double d; + } xx; + unsigned hx, sx, i; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + sx = xx.i[HIWORD] & 0x80000000; + if (hx < 0x43300000) { /* |x| < 2^52 */ + if (hx < 0x3ff00000) { /* |x| < 1 */ + if (hx >= 0x3fe00000) + return (sx ? -1.0 : 1.0); + return (sx ? -0.0 : 0.0); + } + + /* round x at the integer bit */ + if (hx < 0x41300000) { + i = 1 << (0x412 - (hx >> 20)); + xx.i[HIWORD] = (xx.i[HIWORD] + i) & ~(i | (i - 1)); + xx.i[LOWORD] = 0; + } else { + i = 1 << (0x432 - (hx >> 20)); + xx.i[LOWORD] += i; + if (xx.i[LOWORD] < i) + xx.i[HIWORD]++; + xx.i[LOWORD] &= ~(i | (i - 1)); + } + return (xx.d); + } else if (hx < 0x7ff00000) + return (x); + else +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (hx >= 0x7ff80000 ? x : x + x); + /* assumes sparc-like QNaN */ +#else + return (x + x); +#endif +} diff --git a/usr/src/libm/src/m9x/roundf.c b/usr/src/libm/src/m9x/roundf.c new file mode 100644 index 0000000..6d6adb7 --- /dev/null +++ b/usr/src/libm/src/m9x/roundf.c @@ -0,0 +1,65 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)roundf.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak roundf = __roundf +#endif + +#include "libm.h" + +float +roundf(float x) { + union { + unsigned i; + float f; + } xx; + unsigned hx, sx, i; + + xx.f = x; + hx = xx.i & ~0x80000000; + sx = xx.i & 0x80000000; + if (hx < 0x4b000000) { /* |x| < 2^23 */ + if (hx < 0x3f800000) { /* |x| < 1 */ + if (hx >= 0x3f000000) + return (sx ? -1.0F : 1.0F); + return (sx ? -0.0F : 0.0F); + } + + /* round x at the integer bit */ + i = 1 << (0x95 - (hx >> 23)); + xx.i = (xx.i + i) & ~((i << 1) - 1); + return (xx.f); + } else if (hx < 0x7f800000) /* |x| is integral */ + return (x); + else +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (hx > 0x7f800000 ? x * x : x + x); +#else + return (x + x); +#endif +} diff --git a/usr/src/libm/src/m9x/roundl.c b/usr/src/libm/src/m9x/roundl.c new file mode 100644 index 0000000..c4859b2 --- /dev/null +++ b/usr/src/libm/src/m9x/roundl.c @@ -0,0 +1,165 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)roundl.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak roundl = __roundl +#endif + +#include "libm.h" + +#if defined(__sparc) +long double +roundl(long double x) { + union { + unsigned i[4]; + long double q; + } xx; + unsigned hx, sx, v; + int j; + + xx.q = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx >= 0x406f0000) /* |x| >= 2^112 + ... or x is nan */ + return (hx >= 0x7fff0000 ? x + x : x); + + /* handle |x| < 1 */ + if (hx < 0x3fff0000) { + if (hx >= 0x3ffe0000) + return (sx ? -1.0L : 1.0L); + return (sx ? -0.0L : 0.0L); + } + + xx.i[0] = hx; + j = 0x406f - (hx >> 16); /* 1 <= j <= 112 */ + if (j >= 96) { /* 96 <= j <= 112 */ + v = (1U << (j - 96)) >> 1; + if (v) { + if (xx.i[0] & v) + xx.i[0] += v; + xx.i[0] &= ~(v - 1); + } else if (xx.i[1] & 0x80000000) + ++xx.i[0]; + xx.i[1] = xx.i[2] = xx.i[3] = 0; + } else if (j >= 64) { /* 64 <= j <= 95 */ + v = (1U << (j - 64)) >> 1; + if (v) { + if (xx.i[1] & v) { + xx.i[1] += v; + if (xx.i[1] < v) + ++xx.i[0]; + } + xx.i[1] &= ~(v - 1); + } else if (xx.i[2] & 0x80000000) { + if (++xx.i[1] == 0) + ++xx.i[0]; + } + xx.i[2] = xx.i[3] = 0; + } else if (j >= 32) { /* 32 <= j <= 63 */ + v = (1U << (j - 32)) >> 1; + if (v) { + if (xx.i[2] & v) { + xx.i[2] += v; + if (xx.i[2] < v) { + if (++xx.i[1] == 0) + ++xx.i[0]; + } + } + xx.i[2] &= ~(v - 1); + } else if (xx.i[3] & 0x80000000) { + if (++xx.i[2] == 0) { + if (++xx.i[1] == 0) + ++xx.i[0]; + } + } + xx.i[3] = 0; + } else { /* 1 <= j <= 31 */ + v = 1U << (j - 1); + if (xx.i[3] & v) { + xx.i[3] += v; + if (xx.i[3] < v) { + if (++xx.i[2] == 0) { + if (++xx.i[1] == 0) + ++xx.i[0]; + } + } + } + xx.i[3] &= ~(v - 1); + } + + /* negate result if need be */ + if (sx) + xx.i[0] |= 0x80000000; + return (xx.q); +} +#elif defined(__i386) +long double +roundl(long double x) { + union { + unsigned i[3]; + long double e; + } xx; + int ex, sx, i; + + xx.e = x; + ex = xx.i[2] & 0x7fff; + sx = xx.i[2] & 0x8000; + if (ex < 0x403e) { /* |x| < 2^63 */ + if (ex < 0x3fff) { /* |x| < 1 */ + if (ex >= 0x3ffe) + return (sx ? -1.0L : 1.0L); + return (sx ? -0.0L : 0.0L); + } + + /* round x at the integer bit */ + if (ex < 0x401e) { + i = 1 << (0x401d - ex); + xx.i[1] = (xx.i[1] + i) & ~(i | (i - 1)); + xx.i[0] = 0; + } else { + i = 1 << (0x403d - ex); + xx.i[0] += i; + if (xx.i[0] < i) + xx.i[1]++; + xx.i[0] &= ~(i | (i - 1)); + } + if (xx.i[1] == 0) { + xx.i[2] = sx | ++ex; + xx.i[1] = 0x80000000U; + } + return (xx.e); + } else if (ex < 0x7fff) /* x is integral */ + return (x); + else /* inf or nan */ + return (x + x); +} +#else +#error Unknown architecture +#endif /* defined(__sparc) || defined(__i386) */ diff --git a/usr/src/libm/src/m9x/scalbln.c b/usr/src/libm/src/m9x/scalbln.c new file mode 100644 index 0000000..731d531 --- /dev/null +++ b/usr/src/libm/src/m9x/scalbln.c @@ -0,0 +1,108 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)scalbln.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak scalbln = __scalbln +#endif + +#include "libm.h" +#include <float.h> /* DBL_MAX, DBL_MIN */ + +static const double twom54 = 5.5511151231257827021181583404541015625e-17; +#if defined(USE_FPSCALE) || defined(__i386) +static const double two52 = 4503599627370496.0; +#else +/* + * Normalize non-zero subnormal x and return biased exponent of x in [-51,0] + */ +static int +ilogb_biased(unsigned *px) { + int s = 52; + unsigned v = px[HIWORD] & ~0x80000000, w = px[LOWORD], t = v; + + if (t) + s -= 32; + else + t = w; + if (t & 0xffff0000) + s -= 16, t >>= 16; + if (t & 0xff00) + s -= 8, t >>= 8; + if (t & 0xf0) + s -= 4, t >>= 4; + t <<= 1; + s -= (0xffffaa50 >> t) & 0x3; + if (s < 32) { + v = (v << s) | w >> (32 - s); + w <<= s; + } else { + v = w << (s - 32); + w = 0; + } + px[HIWORD] = (px[HIWORD] & 0x80000000) | v; + px[LOWORD] = w; + return (1 - s); +} +#endif /* defined(USE_FPSCALE) */ + +double +scalbln(double x, long n) { + int *px = (int *) &x, ix, k; + + ix = px[HIWORD] & ~0x80000000; + k = ix >> 20; + if (k == 0x7ff) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return ((px[HIWORD] & 0x80000) != 0 ? x : x + x); + /* assumes sparc-like QNaN */ +#else + return (x + x); +#endif + if ((px[LOWORD] | ix) == 0 || n == 0) + return (x); + if (k == 0) { +#if defined(USE_FPSCALE) || defined(__i386) + x *= two52; + k = ((px[HIWORD] & ~0x80000000) >> 20) - 52; +#else + k = ilogb_biased((unsigned *) px); +#endif + } + k += (int) n; + if (n > 5000 || k > 0x7fe) + return (DBL_MAX * copysign(DBL_MAX, x)); + if (n < -5000 || k <= -54) + return (DBL_MIN * copysign(DBL_MIN, x)); + if (k > 0) { + px[HIWORD] = (px[HIWORD] & ~0x7ff00000) | (k << 20); + return (x); + } + k += 54; + px[HIWORD] = (px[HIWORD] & ~0x7ff00000) | (k << 20); + return (x * twom54); +} diff --git a/usr/src/libm/src/m9x/scalblnf.c b/usr/src/libm/src/m9x/scalblnf.c new file mode 100644 index 0000000..ae69036 --- /dev/null +++ b/usr/src/libm/src/m9x/scalblnf.c @@ -0,0 +1,93 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)scalblnf.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak scalblnf = __scalblnf +#endif + +#include "libm.h" +#include <float.h> /* FLT_MAX, FLT_MIN */ + +static const float twom25f = 2.98023223876953125e-8F; +#if defined(USE_FPSCALE) || defined(__i386) +static const float two23f = 8388608.0F; +#else +/* + * v: a non-zero subnormal |x|; returns [-22, 0] + */ +static int +ilogbf_biased(unsigned v) { + int r = -22; + + if (v & 0xffff0000) + r += 16, v >>= 16; + if (v & 0xff00) + r += 8, v >>= 8; + if (v & 0xf0) + r += 4, v >>= 4; + v <<= 1; + return (r + ((0xffffaa50 >> v) & 0x3)); +} +#endif /* defined(USE_FPSCALE) */ + +float +scalblnf(float x, long n) { + int *px = (int *) &x, ix, k; + + ix = *px & ~0x80000000; + k = ix >> 23; + if (k == 0xff) +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (ix > 0x7f800000 ? x * x : x); +#else + return (x + x); +#endif + if (ix == 0 || n == 0) + return (x); + if (k == 0) { +#if defined(USE_FPSCALE) || defined(__i386) + x *= two23f; + k = ((*px & ~0x80000000) >> 23) - 23; +#else + k = ilogbf_biased(ix); + *px = (*px & 0x80000000) | (ix << (-k + 1)); +#endif + } + k += (int) n; + if (n > 5000 || k > 0xfe) + return (FLT_MAX * copysignf(FLT_MAX, x)); + if (n < -5000 || k <= -25) + return (FLT_MIN * copysignf(FLT_MIN, x)); + if (k > 0) { + *px = (*px & ~0x7f800000) | (k << 23); + return (x); + } + k += 25; + *px = (*px & ~0x7f800000) | (k << 23); + return (x * twom25f); +} diff --git a/usr/src/libm/src/m9x/scalblnl.c b/usr/src/libm/src/m9x/scalblnl.c new file mode 100644 index 0000000..f017495 --- /dev/null +++ b/usr/src/libm/src/m9x/scalblnl.c @@ -0,0 +1,81 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)scalblnl.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak scalblnl = __scalblnl +#endif + +#include "libm.h" +#include <float.h> /* LDBL_MAX, LDBL_MIN */ + +#if defined(__sparc) +#define XSET_EXP(k, x) ((int *) &x)[0] = (((int *) &x)[0] & ~0x7fff0000) | \ + (k << 16) +#define ISINFNANL(k, x) (k == 0x7fff) +#define XTWOT_OFFSET 113 +static const long double xtwot = 10384593717069655257060992658440192.0L, + /* 2^113 */ + twomtm1 = 4.814824860968089632639944856462318296E-35L; /* 2^-114 */ +#elif defined(__i386) +#define XSET_EXP(k, x) ((int *) &x)[2] = (((int *) &x)[2] & ~0x7fff) | k +#if defined(HANDLE_UNSUPPORTED) +#define ISINFNANL(k, x) (k == 0x7fff || k != 0 && \ + (((int *) &x)[1] & 0x80000000) == 0) +#else +#define ISINFNANL(k, x) (k == 0x7fff) +#endif +#define XTWOT_OFFSET 64 +static const long double xtwot = 18446744073709551616.0L, /* 2^64 */ + twomtm1 = 2.7105054312137610850186E-20L; /* 2^-65 */ +#endif + +long double +scalblnl(long double x, long n) { + int k = XBIASED_EXP(x); + + if (ISINFNANL(k, x)) + return (x + x); + if (ISZEROL(x) || n == 0) + return (x); + if (k == 0) { + x *= xtwot; + k = XBIASED_EXP(x) - XTWOT_OFFSET; + } + k += (int) n; + if (n > 50000 || k > 0x7ffe) + return (LDBL_MAX * copysignl(LDBL_MAX, x)); + if (n < -50000 || k <= -XTWOT_OFFSET - 1) + return (LDBL_MIN * copysignl(LDBL_MIN, x)); + if (k > 0) { + XSET_EXP(k, x); + return (x); + } + k += XTWOT_OFFSET + 1; + XSET_EXP(k, x); + return (x * twomtm1); +} diff --git a/usr/src/libm/src/m9x/tgamma.c b/usr/src/libm/src/m9x/tgamma.c new file mode 100644 index 0000000..4e5253f --- /dev/null +++ b/usr/src/libm/src/m9x/tgamma.c @@ -0,0 +1,1703 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)tgamma.c 1.13 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak tgamma = __tgamma +#endif + +/* INDENT OFF */ +/* + * True gamma function + * double tgamma(double x) + * + * Error: + * ------ + * Less that one ulp for both positive and negative arguments. + * + * Algorithm: + * --------- + * A: For negative argument + * (1) gamma(-n or -inf) is NaN + * (2) Underflow Threshold + * (3) Reduction to gamma(1+x) + * B: For x between 1 and 2 + * C: For x between 0 and 1 + * D: For x between 2 and 8 + * E: Overflow thresold {see over.c} + * F: For overflow_threshold >= x >= 8 + * + * Implementation details + * ----------------------- + * -pi + * (A) For negative argument, use gamma(-x) = ------------------------. + * (sin(pi*x)*gamma(1+x)) + * + * (1) gamma(-n or -inf) is NaN with invalid signal by SUSv3 spec. + * (Ideally, gamma(-n) = 1/sinpi(n) = (-1)**(n+1) * inf.) + * + * (2) Underflow Threshold. For each precision, there is a value T + * such that when x>T and when x is not an integer, gamma(-x) will + * always underflow. A table of the underflow threshold value is given + * below. For proof, see file "under.c". + * + * Precision underflow threshold T = + * ---------------------------------------------------------------------- + * single 41.000041962 = 41 + 11 ULP + * (machine format) 4224000B + * double 183.000000000000312639 = 183 + 11 ULP + * (machine format) 4066E000 0000000B + * quad 1774.0000000000000000000000000000017749370 = 1774 + 9 ULP + * (machine format) 4009BB80000000000000000000000009 + * ---------------------------------------------------------------------- + * + * (3) Reduction to gamma(1+x). + * Because of (1) and (2), we need only consider non-integral x + * such that 0<x<T. Let k = [x] and z = x-[x]. Define + * sin(x*pi) cos(x*pi) + * kpsin(x) = --------- and kpcos(x) = --------- . Then + * pi pi + * 1 + * gamma(-x) = --------------------. + * -kpsin(x)*gamma(1+x) + * Since x = k+z, + * k+1 + * -sin(x*pi) = -sin(k*pi+z*pi) = (-1) *sin(z*pi), + * k+1 + * we have -kpsin(x) = (-1) * kpsin(z). We can further + * reduce z to t by + * (I) t = z when 0.00000 <= z < 0.31830... + * (II) t = 0.5-z when 0.31830... <= z < 0.681690... + * (III) t = 1-z when 0.681690... <= z < 1.00000 + * and correspondingly + * (I) kpsin(z) = kpsin(t) ... 0<= z < 0.3184 + * (II) kpsin(z) = kpcos(t) ... |t| < 0.182 + * (III) kpsin(z) = kpsin(t) ... 0<= t < 0.3184 + * + * Using a special Remez algorithm, we obtain the following polynomial + * approximation for kpsin(t) for 0<=t<0.3184: + * + * Computation note: in simulating higher precision arithmetic, kcpsin + * return head = t and tail = ks[0]*t^3 + (...) to maintain extra bits. + * + * Quad precision, remez error <= 2**(-129.74) + * 3 5 27 + * kpsin(t) = t + ks[0] * t + ks[1] * t + ... + ks[12] * t + * + * ks[ 0] = -1.64493406684822643647241516664602518705158902870e+0000 + * ks[ 1] = 8.11742425283353643637002772405874238094995726160e-0001 + * ks[ 2] = -1.90751824122084213696472111835337366232282723933e-0001 + * ks[ 3] = 2.61478478176548005046532613563241288115395517084e-0002 + * ks[ 4] = -2.34608103545582363750893072647117829448016479971e-0003 + * ks[ 5] = 1.48428793031071003684606647212534027556262040158e-0004 + * ks[ 6] = -6.97587366165638046518462722252768122615952898698e-0006 + * ks[ 7] = 2.53121740413702536928659271747187500934840057929e-0007 + * ks[ 8] = -7.30471182221385990397683641695766121301933621956e-0009 + * ks[ 9] = 1.71653847451163495739958249695549313987973589884e-0010 + * ks[10] = -3.34813314714560776122245796929054813458341420565e-0012 + * ks[11] = 5.50724992262622033449487808306969135431411753047e-0014 + * ks[12] = -7.67678132753577998601234393215802221104236979928e-0016 + * + * Double precision, Remez error <= 2**(-62.9) + * 3 5 15 + * kpsin(t) = t + ks[0] * t + ks[1] * t + ... + ks[6] * t + * + * ks[0] = -1.644934066848226406065691 (0x3ffa51a6 625307d3) + * ks[1] = 8.11742425283341655883668741874008920850698590621e-0001 + * ks[2] = -1.90751824120862873825597279118304943994042258291e-0001 + * ks[3] = 2.61478477632554278317289628332654539353521911570e-0002 + * ks[4] = -2.34607978510202710377617190278735525354347705866e-0003 + * ks[5] = 1.48413292290051695897242899977121846763824221705e-0004 + * ks[6] = -6.87730769637543488108688726777687262485357072242e-0006 + * + * Single precision, Remez error <= 2**(-34.09) + * 3 5 9 + * kpsin(t) = t + ks[0] * t + ks[1] * t + ... + ks[3] * t + * + * ks[0] = -1.64493404985645811354476665052005342839447790544e+0000 + * ks[1] = 8.11740794458351064092797249069438269367389272270e-0001 + * ks[2] = -1.90703144603551216933075809162889536878854055202e-0001 + * ks[3] = 2.55742333994264563281155312271481108635575331201e-0002 + * + * Computation note: in simulating higher precision arithmetic, kcpsin + * return head = t and tail = kc[0]*t^3 + (...) to maintain extra bits + * precision. + * + * And for kpcos(t) for |t|< 0.183: + * + * Quad precision, remez <= 2**(-122.48) + * 2 4 22 + * kpcos(t) = 1/pi + pi/2 * t + kc[2] * t + ... + kc[11] * t + * + * kc[2] = 1.29192819501249250731151312779548918765320728489e+0000 + * kc[3] = -4.25027339979557573976029596929319207009444090366e-0001 + * kc[4] = 7.49080661650990096109672954618317623888421628613e-0002 + * kc[5] = -8.21458866111282287985539464173976555436050215120e-0003 + * kc[6] = 6.14202578809529228503205255165761204750211603402e-0004 + * kc[7] = -3.33073432691149607007217330302595267179545908740e-0005 + * kc[8] = 1.36970959047832085796809745461530865597993680204e-0006 + * kc[9] = -4.41780774262583514450246512727201806217271097336e-0008 + * kc[10]= 1.14741409212381858820016567664488123478660705759e-0009 + * kc[11]= -2.44261236114707374558437500654381006300502749632e-0011 + * + * Double precision, remez < 2**(61.91) + * 2 4 12 + * kpcos(t) = 1/pi + pi/2 *t + kc[2] * t + ... + kc[6] * t + * + * kc[2] = 1.29192819501230224953283586722575766189551966008e+0000 + * kc[3] = -4.25027339940149518500158850753393173519732149213e-0001 + * kc[4] = 7.49080625187015312373925142219429422375556727752e-0002 + * kc[5] = -8.21442040906099210866977352284054849051348692715e-0003 + * kc[6] = 6.10411356829515414575566564733632532333904115968e-0004 + * + * Single precision, remez < 2**(-30.13) + * 2 6 + * kpcos(t) = kc[0] + kc[1] * t + ... + kc[3] * t + * + * kc[0] = 3.18309886183790671537767526745028724068919291480e-0001 + * kc[1] = -1.57079581447762568199467875065854538626594937791e+0000 + * kc[2] = 1.29183528092558692844073004029568674027807393862e+0000 + * kc[3] = -4.20232949771307685981015914425195471602739075537e-0001 + * + * Computation note: in simulating higher precision arithmetic, kcpcos + * return head = 1/pi chopped, and tail = pi/2 *t^2 + (tail part of 1/pi + * + ...) to maintain extra bits precision. In particular, pi/2 * t^2 + * is calculated with great care. + * + * Thus, the computation of gamma(-x), x>0, is: + * Let k = int(x), z = x-k. + * For z in (I) + * k+1 + * (-1) + * gamma(-x) = ------------------- ; + * kpsin(z)*gamma(1+x) + * + * otherwise, for z in (II), + * k+1 + * (-1) + * gamma(-x) = ----------------------- ; + * kpcos(0.5-z)*gamma(1+x) + * + * otherwise, for z in (III), + * k+1 + * (-1) + * gamma(-x) = --------------------- . + * kpsin(1-z)*gamma(1+x) + * + * Thus, the computation of gamma(-x) reduced to the computation of + * gamma(1+x) and kpsin(), kpcos(). + * + * (B) For x between 1 and 2. We break [1,2] into three parts: + * GT1 = [1.0000, 1.2845] + * GT2 = [1.2844, 1.6374] + * GT3 = [1.6373, 2.0000] + * + * For x in GTi, i=1,2,3, let + * z1 = 1.134861805732790769689793935774652917006 + * gz1 = gamma(z1) = 0.9382046279096824494097535615803269576988 + * tz1 = gamma'(z1) = -0.3517214357852935791015625000000000000000 + * + * z2 = 1.461632144968362341262659542325721328468e+0000 + * gz2 = gamma(z2) = 0.8856031944108887002788159005825887332080 + * tz2 = gamma'(z2) = 0.00 + * + * z3 = 1.819773101100500601787868704921606996312e+0000 + * gz3 = gamma(z3) = 0.9367814114636523216188468970808378497426 + * tz3 = gamma'(z3) = 0.2805306315422058105468750000000000000000 + * + * and + * y = x-zi ... for extra precision, write y = y.h + y.l + * Then + * gamma(x) = gzi + tzi*(y.h+y.l) + y*y*Ri(y), + * = gzi.h + (tzi*y.h + ((tzi*y.l+gzi.l) + y*y*Ri(y))) + * = gy.h + gy.l + * where + * (I) For double precision + * + * Ri(y) = Pi(y)/Qi(y), i=1,2,3; + * + * P1(y) = p1[0] + p1[1]*y + ... + p1[4]*y^4 + * Q1(y) = q1[0] + q1[1]*y + ... + q1[5]*y^5 + * + * P2(y) = p2[0] + p2[1]*y + ... + p2[3]*y^3 + * Q2(y) = q2[0] + q2[1]*y + ... + q2[6]*y^6 + * + * P3(y) = p3[0] + p3[1]*y + ... + p3[4]*y^4 + * Q3(y) = q3[0] + q3[1]*y + ... + q3[5]*y^5 + * + * Remez precision of Ri(y): + * |gamma(x)-(gzi+tzi*y) - y*y*Ri(y)| <= 2**-62.3 ... for i = 1 + * <= 2**-59.4 ... for i = 2 + * <= 2**-62.1 ... for i = 3 + * + * (II) For quad precision + * + * Ri(y) = Pi(y)/Qi(y), i=1,2,3; + * + * P1(y) = p1[0] + p1[1]*y + ... + p1[9]*y^9 + * Q1(y) = q1[0] + q1[1]*y + ... + q1[8]*y^8 + * + * P2(y) = p2[0] + p2[1]*y + ... + p2[9]*y^9 + * Q2(y) = q2[0] + q2[1]*y + ... + q2[9]*y^9 + * + * P3(y) = p3[0] + p3[1]*y + ... + p3[9]*y^9 + * Q3(y) = q3[0] + q3[1]*y + ... + q3[9]*y^9 + * + * Remez precision of Ri(y): + * |gamma(x)-(gzi+tzi*y) - y*y*Ri(y)| <= 2**-118.2 ... for i = 1 + * <= 2**-126.8 ... for i = 2 + * <= 2**-119.5 ... for i = 3 + * + * (III) For single precision + * + * Ri(y) = Pi(y), i=1,2,3; + * + * P1(y) = p1[0] + p1[1]*y + ... + p1[5]*y^5 + * + * P2(y) = p2[0] + p2[1]*y + ... + p2[5]*y^5 + * + * P3(y) = p3[0] + p3[1]*y + ... + p3[4]*y^4 + * + * Remez precision of Ri(y): + * |gamma(x)-(gzi+tzi*y) - y*y*Ri(y)| <= 2**-30.8 ... for i = 1 + * <= 2**-31.6 ... for i = 2 + * <= 2**-29.5 ... for i = 3 + * + * Notes. (1) GTi and zi are choosen to balance the interval width and + * minimize the distant between gamma(x) and the tangent line at + * zi. In particular, we have + * |gamma(x)-(gzi+tzi*(x-zi))| <= 0.01436... for x in [1,z2] + * <= 0.01265... for x in [z2,2] + * + * (2) zi are slightly adjusted so that tzi=gamma'(zi) is very + * close to a single precision value. + * + * Coefficents: Single precision + * i= 1: + * P1[0] = 7.09087253435088360271451613398019280077561279443e-0001 + * P1[1] = -5.17229560788652108545141978238701790105241761089e-0001 + * P1[2] = 5.23403394528150789405825222323770647162337764327e-0001 + * P1[3] = -4.54586308717075010784041566069480411732634814899e-0001 + * P1[4] = 4.20596490915239085459964590559256913498190955233e-0001 + * P1[5] = -3.57307589712377520978332185838241458642142185789e-0001 + * + * i = 2: + * p2[0] = 4.28486983980295198166056119223984284434264344578e-0001 + * p2[1] = -1.30704539487709138528680121627899735386650103914e-0001 + * p2[2] = 1.60856285038051955072861219352655851542955430871e-0001 + * p2[3] = -9.22285161346010583774458802067371182158937943507e-0002 + * p2[4] = 7.19240511767225260740890292605070595560626179357e-0002 + * p2[5] = -4.88158265593355093703112238534484636193260459574e-0002 + * + * i = 3 + * p3[0] = 3.82409531118807759081121479786092134814808872880e-0001 + * p3[1] = 2.65309888180188647956400403013495759365167853426e-0002 + * p3[2] = 8.06815109775079171923561169415370309376296739835e-0002 + * p3[3] = -1.54821591666137613928840890835174351674007764799e-0002 + * p3[4] = 1.76308239242717268530498313416899188157165183405e-0002 + * + * Coefficents: Double precision + * i = 1: + * p1[0] = 0.70908683619977797008004927192814648151397705078125000 + * p1[1] = 1.71987061393048558089579513384356441668351720061e-0001 + * p1[2] = -3.19273345791990970293320316122813960527705450671e-0002 + * p1[3] = 8.36172645419110036267169600390549973563534476989e-0003 + * p1[4] = 1.13745336648572838333152213474277971244629758101e-0003 + * q1[0] = 1.0 + * q1[1] = 9.71980217826032937526460731778472389791321968082e-0001 + * q1[2] = -7.43576743326756176594084137256042653497087666030e-0002 + * q1[3] = -1.19345944932265559769719470515102012246995255372e-0001 + * q1[4] = 1.59913445751425002620935120470781382215050284762e-0002 + * q1[5] = 1.12601136853374984566572691306402321911547550783e-0003 + * i = 2: + * p2[0] = 0.42848681585558601181418225678498856723308563232421875 + * p2[1] = 6.53596762668970816023718845105667418483122103629e-0002 + * p2[2] = -6.97280829631212931321050770925128264272768936731e-0003 + * p2[3] = 6.46342359021981718947208605674813260166116632899e-0003 + * q2[0] = 1.0 + * q2[1] = 4.57572620560506047062553957454062012327519313936e-0001 + * q2[2] = -2.52182594886075452859655003407796103083422572036e-0001 + * q2[3] = -1.82970945407778594681348166040103197178711552827e-0002 + * q2[4] = 2.43574726993169566475227642128830141304953840502e-0002 + * q2[5] = -5.20390406466942525358645957564897411258667085501e-0003 + * q2[6] = 4.79520251383279837635552431988023256031951133885e-0004 + * i = 3: + * p3[0] = 0.382409479734567459008331979930517263710498809814453125 + * p3[1] = 1.42876048697668161599069814043449301572928034140e-0001 + * p3[2] = 3.42157571052250536817923866013561760785748899071e-0003 + * p3[3] = -5.01542621710067521405087887856991700987709272937e-0004 + * p3[4] = 8.89285814866740910123834688163838287618332122670e-0004 + * q3[0] = 1.0 + * q3[1] = 3.04253086629444201002215640948957897906299633168e-0001 + * q3[2] = -2.23162407379999477282555672834881213873185520006e-0001 + * q3[3] = -1.05060867741952065921809811933670131427552903636e-0002 + * q3[4] = 1.70511763916186982473301861980856352005926669320e-0002 + * q3[5] = -2.12950201683609187927899416700094630764182477464e-0003 + * + * Note that all pi0 are exact in double, which is obtained by a + * special Remez Algorithm. + * + * Coefficents: Quad precision + * i = 1: + * p1[0] = 0.709086836199777919037185741507610124611513720557 + * p1[1] = 4.45754781206489035827915969367354835667391606951e-0001 + * p1[2] = 3.21049298735832382311662273882632210062918153852e-0002 + * p1[3] = -5.71296796342106617651765245858289197369688864350e-0003 + * p1[4] = 6.04666892891998977081619174969855831606965352773e-0003 + * p1[5] = 8.99106186996888711939627812174765258822658645168e-0004 + * p1[6] = -6.96496846144407741431207008527018441810175568949e-0005 + * p1[7] = 1.52597046118984020814225409300131445070213882429e-0005 + * p1[8] = 5.68521076168495673844711465407432189190681541547e-0007 + * p1[9] = 3.30749673519634895220582062520286565610418952979e-0008 + * q1[0] = 1.0+0000 + * q1[1] = 1.35806511721671070408570853537257079579490650668e+0000 + * q1[2] = 2.97567810153429553405327140096063086994072952961e-0001 + * q1[3] = -1.52956835982588571502954372821681851681118097870e-0001 + * q1[4] = -2.88248519561420109768781615289082053597954521218e-0002 + * q1[5] = 1.03475311719937405219789948456313936302378395955e-0002 + * q1[6] = 4.12310203243891222368965360124391297374822742313e-0004 + * q1[7] = -3.12653708152290867248931925120380729518332507388e-0004 + * q1[8] = 2.36672170850409745237358105667757760527014332458e-0005 + * + * i = 2: + * p2[0] = 0.428486815855585429730209907810650616737756697477 + * p2[1] = 2.63622124067885222919192651151581541943362617352e-0001 + * p2[2] = 3.85520683670028865731877276741390421744971446855e-0002 + * p2[3] = 3.05065978278128549958897133190295325258023525862e-0003 + * p2[4] = 2.48232934951723128892080415054084339152450445081e-0003 + * p2[5] = 3.67092777065632360693313762221411547741550105407e-0004 + * p2[6] = 3.81228045616085789674530902563145250532194518946e-0006 + * p2[7] = 4.61677225867087554059531455133839175822537617677e-0006 + * p2[8] = 2.18209052385703200438239200991201916609364872993e-0007 + * p2[9] = 1.00490538985245846460006244065624754421022542454e-0008 + * q2[0] = 1.0 + * q2[1] = 9.20276350207639290567783725273128544224570775056e-0001 + * q2[2] = -4.79533683654165107448020515733883781138947771495e-0003 + * q2[3] = -1.24538337585899300494444600248687901947684291683e-0001 + * q2[4] = 4.49866050763472358547524708431719114204535491412e-0003 + * q2[5] = 7.20715455697920560621638325356292640604078591907e-0003 + * q2[6] = -8.68513169029126780280798337091982780598228096116e-0004 + * q2[7] = -1.25104431629401181525027098222745544809974229874e-0004 + * q2[8] = 3.10558344839000038489191304550998047521253437464e-0005 + * q2[9] = -1.76829227852852176018537139573609433652506765712e-0006 + * + * i = 3 + * p3[0] = 0.3824094797345675048502747661075355640070439388902 + * p3[1] = 3.42198093076618495415854906335908427159833377774e-0001 + * p3[2] = 9.63828189500585568303961406863153237440702754858e-0002 + * p3[3] = 8.76069421042696384852462044188520252156846768667e-0003 + * p3[4] = 1.86477890389161491224872014149309015261897537488e-0003 + * p3[5] = 8.16871354540309895879974742853701311541286944191e-0004 + * p3[6] = 6.83783483674600322518695090864659381650125625216e-0005 + * p3[7] = -1.10168269719261574708565935172719209272190828456e-0006 + * p3[8] = 9.66243228508380420159234853278906717065629721016e-0007 + * p3[9] = 2.31858885579177250541163820671121664974334728142e-0008 + * q3[0] = 1.0 + * q3[1] = 8.25479821168813634632437430090376252512793067339e-0001 + * q3[2] = -1.62251363073937769739639623669295110346015576320e-0002 + * q3[3] = -1.10621286905916732758745130629426559691187579852e-0001 + * q3[4] = 3.48309693970985612644446415789230015515365291459e-0003 + * q3[5] = 6.73553737487488333032431261131289672347043401328e-0003 + * q3[6] = -7.63222008393372630162743587811004613050245128051e-0004 + * q3[7] = -1.35792670669190631476784768961953711773073251336e-0004 + * q3[8] = 3.19610150954223587006220730065608156460205690618e-0005 + * q3[9] = -1.82096553862822346610109522015129585693354348322e-0006 + * + * (C) For x between 0 and 1. + * Let P stand for the number of significant bits in the working precision. + * -P 1 + * (1)For 0 <= x <= 2 , gamma(x) is computed by --- rounded to nearest. + * x + * The error is bound by 0.739 ulp(gamma(x)) in IEEE double precision. + * Proof. + * 1 2 + * Since -------- ~ x + 0.577...*x - ..., we have, for small x, + * gamma(x) + * 1 1 + * ----------- < gamma(x) < --- and + * x(1+0.578x) x + * 1 1 1 + * 0 < --- - gamma(x) <= --- - ----------- < 0.578 + * x x x(1+0.578x) + * 1 1 -P + * The error is thus bounded by --- ulp(---) + 0.578. Since x <= 2 , + * 2 x + * 1 P 1 P 1 + * --- >= 2 , ulp(---) >= ulp(2 ) >= 2. Thus 0.578=0.289*2<=0.289ulp(-) + * x x x + * Thus + * 1 1 + * | gamma(x) - [---] rounded | <= (0.5+0.289)*ulp(---). + * x x + * -P 1 + * Note that for x<= 2 , it is easy to see that ulp(---)=ulp(gamma(x)) + * x + * n 1 + * except only when x = 2 , (n<= -53). In such cases, --- is exact + * x + * and therefore the error is bounded by + * 1 + * 0.298*ulp(---) = 0.298*2*ulp(gamma(x)) = 0.578ulp(gamma(x)). + * x + * Thus we conclude that the error in gamma is less than 0.739 ulp. + * + * (2)Otherwise, for x in GTi-1 (see B), let y = x-(zi-1). From (B) we obtain + * gamma(1+x) + * gamma(1+x) = gy.h + gy.l, then compute gamma(x) by -----------. + * x + * gy.h + * Implementaion note. Write x = x.h+x.l, and Let th = ----- chopped to + * x + * 20 bits, then + * gy.h+gy.l + * gamma(x) = th + (---------- - th ) + * x + * 1 + * = th + ---*(gy.h-th*x.h+gy.l-th*x.l) + * x + * + * (D) For x between 2 and 8. Let n = 1+x chopped to an integer. Then + * + * gamma(x)=(x-1)*(x-2)*...*(x-n)*gamma(x-n) + * + * Since x-n is between 1 and 2, we can apply (B) to compute gamma(x). + * + * Implementation detail. The computation of (x-1)(x-2)...(x-n) in simulated + * higher precision arithmetic can be somewhat optimized. For example, in + * computing (x-1)*(x-2)*(x-3)*(x-4), if we compute (x-1)*(x-4) = z.h+z.l, + * then (x-2)(x-3) = z.h+2+z.l readily. In below, we list the expression + * of the formula to compute gamma(x). + * + * Assume x-n is in GTi (i=1,2, or 3, see B for detail). Let y = x - n - zi. + * By (B) we have gamma(x-n) = gy.h+gy.l. If x = x.h+x.l, then we have + * n=1 (x in [2,3]): + * gamma(x) = (x-1)*gamma(x-1) = (x-1)*(gy.h+gy.l) + * = [(x.h-1)+x.l]*(gy.h+gy.l) + * n=2 (x in [3,4]): + * gamma(x) = (x-1)(x-2)*gamma(x-2) = (x-1)*(x-2)*(gy.h+gy.l) + * = ((x.h-2)+x.l)*((x.h-1)+x.l)*(gy.h+gy.l) + * = [x.h*(x.h-3)+2+x.l*(x+(x.h-3))]*(gy.h+gy.l) + * n=3 (x in [4,5]) + * gamma(x) = (x-1)(x-2)(x-3)*(gy.h+gy.l) + * = (x.h*(x.h-3)+2+x.l*(x+(x.h-3)))*[((x.h-3)+x.l)(gy.h+gy.l)] + * n=4 (x in [5,6]) + * gamma(x) = [(x-1)(x-4)]*[(x-2)(x-3)]*(gy.h+gy.l) + * = [(x.h*(x.h-5)+4+x.l(x+(x.h-5)))]*[(x-2)*(x-3)]*(gy.h+gy.l) + * = (y.h+y.l)*(y.h+1+y.l)*(gy.h+gy.l) + * n=5 (x in [6,7]) + * gamma(x) = [(x-1)(x-4)]*[(x-2)(x-3)]*[(x-5)*(gy.h+gy.l)] + * n=6 (x in [7,8]) + * gamma(x) = [(x-1)(x-6)]*[(x-2)(x-5)]*[(x-3)(x-4)]*(gy.h+gy.l)] + * = [(y.h+y.l)(y.h+4+y.l)][(y.h+6+y.l)(gy.h+gy.l)] + * + * (E)Overflow Thresold. For x > Overflow thresold of gamma, + * return huge*huge (overflow). + * + * By checking whether lgamma(x) >= 2**{128,1024,16384}, one can + * determine the overflow threshold for x in single, double, and + * quad precision. See over.c for details. + * + * The overflow threshold of gamma(x) are + * + * single: x = 3.5040096283e+01 + * = 0x420C290F (IEEE single) + * double: x = 1.71624376956302711505e+02 + * = 0x406573FAE561F647 (IEEE double) + * quad: x = 1.7555483429044629170038892160702032034177e+03 + * = 0x4009B6E3180CD66A5C4206F128BA77F4 (quad) + * + * (F)For overflow_threshold >= x >= 8, we use asymptotic approximation. + * (1) Stirling's formula + * + * log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x)) + * = L1 + L2 + L3, + * where + * L1(x) = (x-.5)*(log(x)-1), + * L2 = .5(log(2pi)-1) = 0.41893853...., + * L3(x) = (1/x)P(1/(x*x)), + * + * The range of L1,L2, and L3 are as follows: + * + * ------------------------------------------------------------------ + * Range(L1) = (single) [8.09..,88.30..] =[2** 3.01..,2** 6.46..] + * (double) [8.09..,709.3..] =[2** 3.01..,2** 9.47..] + * (quad) [8.09..,11356.10..]=[2** 3.01..,2** 13.47..] + * Range(L2) = 0.41893853..... + * Range(L3) = [0.0104...., 0.00048....] =[2**-6.58..,2**-11.02..] + * ------------------------------------------------------------------ + * + * Gamma(x) is then computed by exp(L1+L2+L3). + * + * (2) Error analysis of (F): + * -------------------------- + * The error in Gamma(x) depends on the error inherited in the computation + * of L= L1+L2+L3. Let L' be the computed value of L. The absolute error + * in L' is t = L-L'. Since exp(L') = exp(L-t) = exp(L)*exp(t) ~ + * (1+t)*exp(L), the relative error in exp(L') is approximately t. + * + * To guarantee the relatively accuracy in exp(L'), we would like + * |t| < 2**(-P-5) where P denotes for the number of significant bits + * of the working precision. Consequently, each of the L1,L2, and L3 + * must be computed with absolute error bounded by 2**(-P-5) in absolute + * value. + * + * Since L2 is a constant, it can be pre-computed to the desired accuracy. + * Also |L3| < 2**-6; therefore, it suffices to compute L3 with the + * working precision. That is, + * L3(x) approxmiate log(G(x))-(x-.5)(log(x)-1)-.5(log(2pi)-1) + * to a precision bounded by 2**(-P-5). + * + * 2**(-6) + * _________V___________________ + * L1(x): |_________|___________________| + * __ ________________________ + * L2: |__|________________________| + * __________________________ + * + L3(x): |__________________________| + * ------------------------------------------- + * [leading] + [Trailing] + * + * For L1(x)=(x-0.5)*(log(x)-1), we need ilogb(L1(x))+5 extra bits for + * both multiplicants to guarantee L1(x)'s absolute error is bounded by + * 2**(-P-5) in absolute value. Here ilogb(y) is defined to be the unbias + * binary exponent of y in IEEE format. We can get x-0.5 to the desire + * accuracy easily. It remains to compute log(x)-1 with ilogb(L1(x))+5 + * extra bits accracy. Note that the range of L1 is 88.30.., 709.3.., and + * 11356.10... for single, double, and quadruple precision, we have + * + * single double quadruple + * ------------------------------------ + * ilogb(L1(x))+5 <= 11 14 18 + * ------------------------------------ + * + * (3) Table Driven Method for log(x)-1: + * -------------------------------------- + * Let x = 2**n * y, where 1 <= y < 2. Let Z={z(i),i=1,...,m} + * be a set of predetermined evenly distributed floating point numbers + * in [1, 2]. Let z(j) be the closest one to y, then + * log(x)-1 = n*log(2)-1 + log(y) + * = n*log(2)-1 + log(z(j)*y/z(j)) + * = n*log(2)-1 + log(z(j)) + log(y/z(j)) + * = T1(n) + T2(j) + T3, + * + * where T1(n) = n*log(2)-1 and T2(j) = log(z(j)). Both T1 and T2 can be + * pre-calculated and be looked-up in a table. Note that 8 <= x < 1756 + * implies 3<=n<=10 implies 1.079.. < T1(n) < 6.931. + * + * + * y-z(i) y 1+s + * For T3, let s = --------; then ----- = ----- and + * y+z(i) z(i) 1-s + * 1+s 2 3 2 5 + * T3 = log(-----) = 2s + --- s + --- s + .... + * 1-s 3 5 + * + * Suppose the first term 2s is compute in extra precision. The + * dominating error in T3 would then be the rounding error of the + * second term 2/3*s**3. To force the rounding bounded by + * the required accuracy, we have + * single: |2/3*s**3| < 2**-11 ==> |s|<0.09014... + * double: |2/3*s**3| < 2**-14 ==> |s|<0.04507... + * quad : |2/3*s**3| < 2**-18 ==> |s|<0.01788... = 2**(-5.80..) + * + * Base on this analysis, we choose Z = {z(i)|z(i)=1+i/64+1/128, 0<=i<=63}. + * For any y in [1,2), let j = [64*y] chopped to integer, then z(j) is + * the closest to y, and it is not difficult to see that |s| < 2**(-8). + * Please note that the polynomial approximation of T3 must be accurate + * -24-11 -35 -53-14 -67 -113-18 -131 + * to 2 =2 , 2 = 2 , and 2 =2 + * for single, double, and quadruple precision respectively. + * + * Inplementation notes. + * (1) Table look-up entries for T1(n) and T2(j), as well as the calculation + * of the leading term 2s in T3, are broken up into leading and trailing + * part such that (leading part)* 2**24 will always be an integer. That + * will guarantee the addition of the leading parts will be exact. + * + * 2**(-24) + * _________V___________________ + * T1(n): |_________|___________________| + * _______ ______________________ + * T2(j): |_______|______________________| + * ____ _______________________ + * 2s: |____|_______________________| + * __________________________ + * + T3(s)-2s: |__________________________| + * ------------------------------------------- + * [leading] + [Trailing] + * + * (2) How to compute 2s accurately. + * (A) Compute v = 2s to the working precision. If |v| < 2**(-18), + * stop. + * (B) chopped v to 2**(-24): v = ((int)(v*2**24))/2**24 + * (C) 2s = v + (2s - v), where + * 1 + * 2s - v = --- * (2(y-z) - v*(y+z) ) + * y+z + * 1 + * = --- * ( [2(y-z) - v*(y+z)_h ] - v*(y+z)_l ) + * y+z + * where (y+z)_h = (y+z) rounded to 24 bits by (double)(float), + * and (y+z)_l = ((z+z)-(y+z)_h)+(y-z). Note the the quantity + * in [] is exact. + * 2 4 + * (3) Remez approximation for (T3(s)-2s)/s = T3[0]*s + T3[1]*s + ...: + * Single precision: 1 term (compute in double precision arithmetic) + * T3(s) = 2s + S1*s^3, S1 = 0.6666717231848518054693623697539230 + * Remez error: |T3(s)/s - (2s+S1*s^3)| < 2**(-35.87) + * Double precision: 3 terms, Remez error is bounded by 2**(-72.40), + * see "tgamma_log" + * Quad precision: 7 terms, Remez error is bounded by 2**(-136.54), + * see "tgammal_log" + * + * The computation of 0.5*(ln(2pi)-1): + * 0.5*(ln(2pi)-1) = 0.4189385332046727417803297364056176398614... + * split 0.5*(ln(2pi)-1) to hln2pi_h + hln2pi_l, where hln2pi_h is the + * leading 21 bits of the constant. + * hln2pi_h= 0.4189383983612060546875 + * hln2pi_l= 1.348434666870928297364056176398612173648e-07 + * + * The computation of 1/x*P(1/x^2) = log(G(x))-(x-.5)(ln(x)-1)-(.5ln(2pi)-1): + * Let s = 1/x <= 1/8 < 0.125. We have + * quad precision + * |GP(s) - s*P(s^2)| <= 2**(-120.6), where + * 3 5 39 + * GP(s) = GP0*s+GP1*s +GP2*s +... +GP19*s , + * GP0 = 0.083333333333333333333333333333333172839171301 + * hex 0x3ffe5555 55555555 55555555 55555548 + * GP1 = -2.77777777777777777777777777492501211999399424104e-0003 + * GP2 = 7.93650793650793650793635650541638236350020883243e-0004 + * GP3 = -5.95238095238095238057299772679324503339241961704e-0004 + * GP4 = 8.41750841750841696138422987977683524926142600321e-0004 + * GP5 = -1.91752691752686682825032547823699662178842123308e-0003 + * GP6 = 6.41025641022403480921891559356473451161279359322e-0003 + * GP7 = -2.95506535798414019189819587455577003732808185071e-0002 + * GP8 = 1.79644367229970031486079180060923073476568732136e-0001 + * GP9 = -1.39243086487274662174562872567057200255649290646e+0000 + * GP10 = 1.34025874044417962188677816477842265259608269775e+0001 + * GP11 = -1.56803713480127469414495545399982508700748274318e+0002 + * GP12 = 2.18739841656201561694927630335099313968924493891e+0003 + * GP13 = -3.55249848644100338419187038090925410976237921269e+0004 + * GP14 = 6.43464880437835286216768959439484376449179576452e+0005 + * GP15 = -1.20459154385577014992600342782821389605893904624e+0007 + * GP16 = 2.09263249637351298563934942349749718491071093210e+0008 + * GP17 = -2.96247483183169219343745316433899599834685703457e+0009 + * GP18 = 2.88984933605896033154727626086506756972327292981e+0010 + * GP19 = -1.40960434146030007732838382416230610302678063984e+0011 + * + * double precision + * |GP(s) - s*P(s^2)| <= 2**(-63.5), where + * 3 5 7 9 11 13 15 + * GP(s) = GP0*s+GP1*s +GP2*s +GP3*s +GP4*s +GP5*s +GP6*s +GP7*s , + * + * GP0= 0.0833333333333333287074040640618477 (3FB55555 55555555) + * GP1= -2.77777777776649355200565611114627670089130772843e-0003 + * GP2= 7.93650787486083724805476194170211775784158551509e-0004 + * GP3= -5.95236628558314928757811419580281294593903582971e-0004 + * GP4= 8.41566473999853451983137162780427812781178932540e-0004 + * GP5= -1.90424776670441373564512942038926168175921303212e-0003 + * GP6= 5.84933161530949666312333949534482303007354299178e-0003 + * GP7= -1.59453228931082030262124832506144392496561694550e-0002 + * single precision + * |GP(s) - s*P(s^2)| <= 2**(-37.78), where + * 3 5 + * GP(s) = GP0*s+GP1*s +GP2*s + * GP0 = 8.33333330959694065245736888749042811909994573178e-0002 + * GP1 = -2.77765545601667179767706600890361535225507762168e-0003 + * GP2 = 7.77830853479775281781085278324621033523037489883e-0004 + * + * + * Implementation note: + * z = (1/x), z2 = z*z, z4 = z2*z2; + * p = z*(GP0+z2*(GP1+....+z2*GP7)) + * = z*(GP0+(z4*(GP2+z4*(GP4+z4*GP6))+z2*(GP1+z4*(GP3+z4*(GP5+z4*GP7))))) + * + * Adding everything up: + * t = rr.h*ww.h+hln2pi_h ... exact + * w = (hln2pi_l + ((x-0.5)*ww.l+rr.l*ww.h)) + p + * + * Computing exp(t+w): + * s = t+w; write s = (n+j/32)*ln2+r, |r|<=(1/64)*ln2, then + * exp(s) = 2**n * (2**(j/32) + 2**(j/32)*expm1(r)), where + * expm1(r) = r + Et1*r^2 + Et2*r^3 + ... + Et5*r^6, and + * 2**(j/32) is obtained by table look-up S[j]+S_trail[j]. + * Remez error bound: + * |exp(r) - (1+r+Et1*r^2+...+Et5*r^6)| <= 2^(-63). + */ + +#include "libm.h" + +#define __HI(x) ((int *) &x)[HIWORD] +#define __LO(x) ((unsigned *) &x)[LOWORD] + +struct Double { + double h; + double l; +}; + +/* Hex value of GP0 shoule be 3FB55555 55555555 */ +static const double c[] = { + +1.0, + +2.0, + +0.5, + +1.0e-300, + +6.66666666666666740682e-01, /* A1=T3[0] */ + +3.99999999955626478023093908674902212920e-01, /* A2=T3[1] */ + +2.85720221533145659809237398709372330980e-01, /* A3=T3[2] */ + +0.0833333333333333287074040640618477, /* GP[0] */ + -2.77777777776649355200565611114627670089130772843e-03, + +7.93650787486083724805476194170211775784158551509e-04, + -5.95236628558314928757811419580281294593903582971e-04, + +8.41566473999853451983137162780427812781178932540e-04, + -1.90424776670441373564512942038926168175921303212e-03, + +5.84933161530949666312333949534482303007354299178e-03, + -1.59453228931082030262124832506144392496561694550e-02, + +4.18937683105468750000e-01, /* hln2pi_h */ + +8.50099203991780279640e-07, /* hln2pi_l */ + +4.18938533204672741744150788368695779923320328369e-01, /* hln2pi */ + +2.16608493865351192653e-02, /* ln2_32hi */ + +5.96317165397058656257e-12, /* ln2_32lo */ + +4.61662413084468283841e+01, /* invln2_32 */ + +5.0000000000000000000e-1, /* Et1 */ + +1.66666666665223585560605991943703896196054020060e-01, /* Et2 */ + +4.16666666665895103520154073534275286743788421687e-02, /* Et3 */ + +8.33336844093536520775865096538773197505523826029e-03, /* Et4 */ + +1.38889201930843436040204096950052984793587640227e-03, /* Et5 */ +}; + +#define one c[0] +#define two c[1] +#define half c[2] +#define tiny c[3] +#define A1 c[4] +#define A2 c[5] +#define A3 c[6] +#define GP0 c[7] +#define GP1 c[8] +#define GP2 c[9] +#define GP3 c[10] +#define GP4 c[11] +#define GP5 c[12] +#define GP6 c[13] +#define GP7 c[14] +#define hln2pi_h c[15] +#define hln2pi_l c[16] +#define hln2pi c[17] +#define ln2_32hi c[18] +#define ln2_32lo c[19] +#define invln2_32 c[20] +#define Et1 c[21] +#define Et2 c[22] +#define Et3 c[23] +#define Et4 c[24] +#define Et5 c[25] + +/* + * double precision coefficients for computing log(x)-1 in tgamma. + * See "algorithm" for details + * + * log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y, 1<=y<2, + * j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and + * T1(n) = T1[2n,2n+1] = n*log(2)-1, + * T2(j) = T2[2j,2j+1] = log(z[j]), + * T3(s) = 2s + T3[0]s^3 + T3[1]s^5 + T3[2]s^7 + * = 2s + A1*s^3 + A2*s^5 + A3*s^7 (see const A1,A2,A3) + * Note + * (1) the leading entries are truncated to 24 binary point. + * See Remezpak/sun/tgamma_log_64.c + * (2) Remez error for T3(s) is bounded by 2**(-72.4) + * See mpremez/work/Log/tgamma_log_4_outr2 + */ + +static const double T1[] = { + -1.00000000000000000000e+00, /* 0xBFF00000 0x00000000 */ + +0.00000000000000000000e+00, /* 0x00000000 0x00000000 */ + -3.06852817535400390625e-01, /* 0xBFD3A37A 0x00000000 */ + -1.90465429995776763166e-09, /* 0xBE205C61 0x0CA86C38 */ + +3.86294305324554443359e-01, /* 0x3FD8B90B 0xC0000000 */ + +5.57953361754750897367e-08, /* 0x3E6DF473 0xDE6AF279 */ + +1.07944148778915405273e+00, /* 0x3FF14564 0x70000000 */ + +5.38906818755173187963e-08, /* 0x3E6CEEAD 0xCDA06BB5 */ + +1.77258867025375366211e+00, /* 0x3FFC5C85 0xF0000000 */ + +5.19860275755595544734e-08, /* 0x3E6BE8E7 0xBCD5E4F2 */ + +2.46573585271835327148e+00, /* 0x4003B9D3 0xB8000000 */ + +5.00813732756017835330e-08, /* 0x3E6AE321 0xAC0B5E2E */ + +3.15888303518295288086e+00, /* 0x40094564 0x78000000 */ + +4.81767189756440192100e-08, /* 0x3E69DD5B 0x9B40D76B */ + +3.85203021764755249023e+00, /* 0x400ED0F5 0x38000000 */ + +4.62720646756862482697e-08, /* 0x3E68D795 0x8A7650A7 */ + +4.54517740011215209961e+00, /* 0x40122E42 0xFC000000 */ + +4.43674103757284839467e-08, /* 0x3E67D1CF 0x79ABC9E4 */ + +5.23832458257675170898e+00, /* 0x4014F40B 0x5C000000 */ + +4.24627560757707130063e-08, /* 0x3E66CC09 0x68E14320 */ + +5.93147176504135131836e+00, /* 0x4017B9D3 0xBC000000 */ + +4.05581017758129486834e-08, /* 0x3E65C643 0x5816BC5D */ +}; + +static const double T2[] = { + +7.78210163116455078125e-03, /* 0x3F7FE020 0x00000000 */ + +3.88108903981662140884e-08, /* 0x3E64D620 0xCF11F86F */ + +2.31670141220092773438e-02, /* 0x3F97B918 0x00000000 */ + +4.51595251008850513740e-08, /* 0x3E683EAD 0x88D54940 */ + +3.83188128471374511719e-02, /* 0x3FA39E86 0x00000000 */ + +5.14549991480218823411e-08, /* 0x3E6B9FEB 0xD5FA9016 */ + +5.32444715499877929688e-02, /* 0x3FAB42DC 0x00000000 */ + +4.29688244898971182165e-08, /* 0x3E671197 0x1BEC28D1 */ + +6.79506063461303710938e-02, /* 0x3FB16536 0x00000000 */ + +5.55623773783008185114e-08, /* 0x3E6DD46F 0x5C1D0C4C */ + +8.24436545372009277344e-02, /* 0x3FB51B07 0x00000000 */ + +1.46738736635337847313e-08, /* 0x3E4F830C 0x1FB493C7 */ + +9.67295765876770019531e-02, /* 0x3FB8C345 0x00000000 */ + +4.98708741103424492282e-08, /* 0x3E6AC633 0x641EB597 */ + +1.10814332962036132812e-01, /* 0x3FBC5E54 0x00000000 */ + +3.33782539813823062226e-08, /* 0x3E61EB78 0xE862BAC3 */ + +1.24703466892242431641e-01, /* 0x3FBFEC91 0x00000000 */ + +1.16087148042227818450e-08, /* 0x3E48EDF5 0x5D551729 */ + +1.38402283191680908203e-01, /* 0x3FC1B72A 0x80000000 */ + +3.96674382274822001957e-08, /* 0x3E654BD9 0xE80A4181 */ + +1.51916027069091796875e-01, /* 0x3FC371FC 0x00000000 */ + +1.49567501781968021494e-08, /* 0x3E500F47 0xBA1DE6CB */ + +1.65249526500701904297e-01, /* 0x3FC526E5 0x80000000 */ + +4.63946052585787334062e-08, /* 0x3E68E86D 0x0DE8B900 */ + +1.78407609462738037109e-01, /* 0x3FC6D60F 0x80000000 */ + +4.80100802600100279538e-08, /* 0x3E69C674 0x8723551E */ + +1.91394805908203125000e-01, /* 0x3FC87FA0 0x00000000 */ + +4.70914263296092971436e-08, /* 0x3E694832 0x44240802 */ + +2.04215526580810546875e-01, /* 0x3FCA23BC 0x00000000 */ + +1.48478803446288209001e-08, /* 0x3E4FE2B5 0x63193712 */ + +2.16873884201049804688e-01, /* 0x3FCBC286 0x00000000 */ + +5.40995645549315919488e-08, /* 0x3E6D0B63 0x358A7E74 */ + +2.29374051094055175781e-01, /* 0x3FCD5C21 0x00000000 */ + +4.99707906542102284117e-08, /* 0x3E6AD3EE 0xE456E443 */ + +2.41719901561737060547e-01, /* 0x3FCEF0AD 0x80000000 */ + +3.53254081075974352804e-08, /* 0x3E62F716 0x4D948638 */ + +2.53915190696716308594e-01, /* 0x3FD04025 0x80000000 */ + +1.92842471355435739091e-08, /* 0x3E54B4D0 0x40DAE27C */ + +2.65963494777679443359e-01, /* 0x3FD1058B 0xC0000000 */ + +5.37194584979797487125e-08, /* 0x3E6CD725 0x6A8C4FD0 */ + +2.77868449687957763672e-01, /* 0x3FD1C898 0xC0000000 */ + +1.31549854251447496506e-09, /* 0x3E16999F 0xAFBC68E7 */ + +2.89633274078369140625e-01, /* 0x3FD2895A 0x00000000 */ + +1.85046735362538929911e-08, /* 0x3E53DE86 0xA35EB493 */ + +3.01261305809020996094e-01, /* 0x3FD347DD 0x80000000 */ + +2.47691407849191245052e-08, /* 0x3E5A987D 0x54D64567 */ + +3.12755703926086425781e-01, /* 0x3FD40430 0x80000000 */ + +6.07781046260499658610e-09, /* 0x3E3A1A9F 0x8EF4304A */ + +3.24119448661804199219e-01, /* 0x3FD4BE5F 0x80000000 */ + +1.99924077768719198045e-08, /* 0x3E557778 0xA0DB4C99 */ + +3.35355520248413085938e-01, /* 0x3FD57677 0x00000000 */ + +2.16727247443196802771e-08, /* 0x3E57455A 0x6C549AB7 */ + +3.46466720104217529297e-01, /* 0x3FD62C82 0xC0000000 */ + +4.72419910516215900493e-08, /* 0x3E695CE3 0xCA97B7B0 */ + +3.57455849647521972656e-01, /* 0x3FD6E08E 0x80000000 */ + +3.92742818015697624778e-08, /* 0x3E6515D0 0xF1C609CA */ + +3.68325531482696533203e-01, /* 0x3FD792A5 0x40000000 */ + +2.96760111198451042238e-08, /* 0x3E5FDD47 0xA27C15DA */ + +3.79078328609466552734e-01, /* 0x3FD842D1 0xC0000000 */ + +2.43255029056564770289e-08, /* 0x3E5A1E8B 0x17493B14 */ + +3.89716744422912597656e-01, /* 0x3FD8F11E 0x80000000 */ + +6.71711261571421332726e-09, /* 0x3E3CD98B 0x1DF85DA7 */ + +4.00243163108825683594e-01, /* 0x3FD99D95 0x80000000 */ + +1.01818702333557515008e-09, /* 0x3E117E08 0xACBA92EF */ + +4.10659909248352050781e-01, /* 0x3FDA4840 0x80000000 */ + +1.57369163351530571459e-08, /* 0x3E50E5BB 0x0A2BFCA7 */ + +4.20969247817993164062e-01, /* 0x3FDAF129 0x00000000 */ + +4.68261364720663662040e-08, /* 0x3E6923BC 0x358899C2 */ + +4.31173443794250488281e-01, /* 0x3FDB9858 0x80000000 */ + +2.10241208525779214510e-08, /* 0x3E569310 0xFB598FB1 */ + +4.41274523735046386719e-01, /* 0x3FDC3DD7 0x80000000 */ + +3.70698288427707487748e-08, /* 0x3E63E6D6 0xA6B9D9E1 */ + +4.51274633407592773438e-01, /* 0x3FDCE1AF 0x00000000 */ + +1.07318658117071930723e-08, /* 0x3E470BE7 0xD6F6FA58 */ + +4.61175680160522460938e-01, /* 0x3FDD83E7 0x00000000 */ + +3.49616477054305011286e-08, /* 0x3E62C517 0x9F2828AE */ + +4.70979690551757812500e-01, /* 0x3FDE2488 0x00000000 */ + +2.46670332000468969567e-08, /* 0x3E5A7C6C 0x261CBD8F */ + +4.80688512325286865234e-01, /* 0x3FDEC399 0xC0000000 */ + +1.70204650424422423704e-08, /* 0x3E52468C 0xC0175CEE */ + +4.90303933620452880859e-01, /* 0x3FDF6123 0xC0000000 */ + +5.44247409572909703749e-08, /* 0x3E6D3814 0x5630A2B6 */ + +4.99827861785888671875e-01, /* 0x3FDFFD2E 0x00000000 */ + +7.77056065794633071345e-09, /* 0x3E40AFE9 0x30AB2FA0 */ + +5.09261846542358398438e-01, /* 0x3FE04BDF 0x80000000 */ + +5.52474495483665749052e-08, /* 0x3E6DA926 0xD265FCC1 */ + +5.18607735633850097656e-01, /* 0x3FE0986F 0x40000000 */ + +2.85741955344967264536e-08, /* 0x3E5EAE6A 0x41723FB5 */ + +5.27867078781127929688e-01, /* 0x3FE0E449 0x80000000 */ + +1.08397144554263914271e-08, /* 0x3E474732 0x2FDBAB97 */ + +5.37041425704956054688e-01, /* 0x3FE12F71 0x80000000 */ + +4.01919275998792285777e-08, /* 0x3E6593EF 0xBC530123 */ + +5.46132385730743408203e-01, /* 0x3FE179EA 0xA0000000 */ + +5.18673922421792693237e-08, /* 0x3E6BD899 0xA0BFC60E */ + +5.55141448974609375000e-01, /* 0x3FE1C3B8 0x00000000 */ + +5.85658922177154808539e-08, /* 0x3E6F713C 0x24BC94F9 */ + +5.64070105552673339844e-01, /* 0x3FE20CDC 0xC0000000 */ + +3.27321296262276338905e-08, /* 0x3E6192AB 0x6D93503D */ + +5.72919726371765136719e-01, /* 0x3FE2555B 0xC0000000 */ + +2.71900203723740076878e-08, /* 0x3E5D31EF 0x96780876 */ + +5.81691682338714599609e-01, /* 0x3FE29D37 0xE0000000 */ + +5.72959078829112371070e-08, /* 0x3E6EC2B0 0x8AC85CD7 */ + +5.90387403964996337891e-01, /* 0x3FE2E474 0x20000000 */ + +4.26371800367512948470e-08, /* 0x3E66E402 0x68405422 */ + +5.99008142948150634766e-01, /* 0x3FE32B13 0x20000000 */ + +4.66979327646159769249e-08, /* 0x3E69121D 0x71320557 */ + +6.07555210590362548828e-01, /* 0x3FE37117 0xA0000000 */ + +3.96341792466729582847e-08, /* 0x3E654747 0xB5C5DD02 */ + +6.16029858589172363281e-01, /* 0x3FE3B684 0x40000000 */ + +1.86263416563663175432e-08, /* 0x3E53FFF8 0x455F1DBE */ + +6.24433279037475585938e-01, /* 0x3FE3FB5B 0x80000000 */ + +8.97441791510503832111e-09, /* 0x3E4345BD 0x096D3A75 */ + +6.32766664028167724609e-01, /* 0x3FE43F9F 0xE0000000 */ + +5.54287010493641158796e-09, /* 0x3E37CE73 0x3BD393DD */ + +6.41031146049499511719e-01, /* 0x3FE48353 0xC0000000 */ + +3.33714317793368531132e-08, /* 0x3E61EA88 0xDF73D5E9 */ + +6.49227917194366455078e-01, /* 0x3FE4C679 0xA0000000 */ + +2.94307433638127158696e-08, /* 0x3E5F99DC 0x7362D1DA */ + +6.57358050346374511719e-01, /* 0x3FE50913 0xC0000000 */ + +2.23619855184231409785e-08, /* 0x3E5802D0 0xD6979675 */ + +6.65422618389129638672e-01, /* 0x3FE54B24 0x60000000 */ + +1.41559608102782173188e-08, /* 0x3E4E6652 0x5EA4550A */ + +6.73422634601593017578e-01, /* 0x3FE58CAD 0xA0000000 */ + +4.06105737027198329700e-08, /* 0x3E65CD79 0x893092F2 */ + +6.81359171867370605469e-01, /* 0x3FE5CDB1 0xC0000000 */ + +5.29405324634793230630e-08, /* 0x3E6C6C17 0x648CF6E4 */ + +6.89233243465423583984e-01, /* 0x3FE60E32 0xE0000000 */ + +3.77733853963405370102e-08, /* 0x3E644788 0xD8CA7C89 */ +}; + +/* S[j],S_trail[j] = 2**(j/32.) for the final computation of exp(t+w) */ +static const double S[] = { + +1.00000000000000000000e+00, /* 3FF0000000000000 */ + +1.02189714865411662714e+00, /* 3FF059B0D3158574 */ + +1.04427378242741375480e+00, /* 3FF0B5586CF9890F */ + +1.06714040067682369717e+00, /* 3FF11301D0125B51 */ + +1.09050773266525768967e+00, /* 3FF172B83C7D517B */ + +1.11438674259589243221e+00, /* 3FF1D4873168B9AA */ + +1.13878863475669156458e+00, /* 3FF2387A6E756238 */ + +1.16372485877757747552e+00, /* 3FF29E9DF51FDEE1 */ + +1.18920711500272102690e+00, /* 3FF306FE0A31B715 */ + +1.21524735998046895524e+00, /* 3FF371A7373AA9CB */ + +1.24185781207348400201e+00, /* 3FF3DEA64C123422 */ + +1.26905095719173321989e+00, /* 3FF44E086061892D */ + +1.29683955465100964055e+00, /* 3FF4BFDAD5362A27 */ + +1.32523664315974132322e+00, /* 3FF5342B569D4F82 */ + +1.35425554693689265129e+00, /* 3FF5AB07DD485429 */ + +1.38390988196383202258e+00, /* 3FF6247EB03A5585 */ + +1.41421356237309514547e+00, /* 3FF6A09E667F3BCD */ + +1.44518080697704665027e+00, /* 3FF71F75E8EC5F74 */ + +1.47682614593949934623e+00, /* 3FF7A11473EB0187 */ + +1.50916442759342284141e+00, /* 3FF82589994CCE13 */ + +1.54221082540794074411e+00, /* 3FF8ACE5422AA0DB */ + +1.57598084510788649659e+00, /* 3FF93737B0CDC5E5 */ + +1.61049033194925428347e+00, /* 3FF9C49182A3F090 */ + +1.64575547815396494578e+00, /* 3FFA5503B23E255D */ + +1.68179283050742900407e+00, /* 3FFAE89F995AD3AD */ + +1.71861929812247793414e+00, /* 3FFB7F76F2FB5E47 */ + +1.75625216037329945351e+00, /* 3FFC199BDD85529C */ + +1.79470907500310716820e+00, /* 3FFCB720DCEF9069 */ + +1.83400808640934243066e+00, /* 3FFD5818DCFBA487 */ + +1.87416763411029996256e+00, /* 3FFDFC97337B9B5F */ + +1.91520656139714740007e+00, /* 3FFEA4AFA2A490DA */ + +1.95714412417540017941e+00, /* 3FFF50765B6E4540 */ +}; + +static const double S_trail[] = { + +0.00000000000000000000e+00, + +5.10922502897344389359e-17, /* 3C8D73E2A475B465 */ + +8.55188970553796365958e-17, /* 3C98A62E4ADC610A */ + -7.89985396684158212226e-17, /* BC96C51039449B3A */ + -3.04678207981247114697e-17, /* BC819041B9D78A76 */ + +1.04102784568455709549e-16, /* 3C9E016E00A2643C */ + +8.91281267602540777782e-17, /* 3C99B07EB6C70573 */ + +3.82920483692409349872e-17, /* 3C8612E8AFAD1255 */ + +3.98201523146564611098e-17, /* 3C86F46AD23182E4 */ + -7.71263069268148813091e-17, /* BC963AEABF42EAE2 */ + +4.65802759183693679123e-17, /* 3C8ADA0911F09EBC */ + +2.66793213134218609523e-18, /* 3C489B7A04EF80D0 */ + +2.53825027948883149593e-17, /* 3C7D4397AFEC42E2 */ + -2.85873121003886075697e-17, /* BC807ABE1DB13CAC */ + +7.70094837980298946162e-17, /* 3C96324C054647AD */ + -6.77051165879478628716e-17, /* BC9383C17E40B497 */ + -9.66729331345291345105e-17, /* BC9BDD3413B26456 */ + -3.02375813499398731940e-17, /* BC816E4786887A99 */ + -3.48399455689279579579e-17, /* BC841577EE04992F */ + -1.01645532775429503911e-16, /* BC9D4C1DD41532D8 */ + +7.94983480969762085616e-17, /* 3C96E9F156864B27 */ + -1.01369164712783039808e-17, /* BC675FC781B57EBC */ + +2.47071925697978878522e-17, /* 3C7C7C46B071F2BE */ + -1.01256799136747726038e-16, /* BC9D2F6EDB8D41E1 */ + +8.19901002058149652013e-17, /* 3C97A1CD345DCC81 */ + -1.85138041826311098821e-17, /* BC75584F7E54AC3B */ + +2.96014069544887330703e-17, /* 3C811065895048DD */ + +1.82274584279120867698e-17, /* 3C7503CBD1E949DB */ + +3.28310722424562658722e-17, /* 3C82ED02D75B3706 */ + -6.12276341300414256164e-17, /* BC91A5CD4F184B5C */ + -1.06199460561959626376e-16, /* BC9E9C23179C2893 */ + +8.96076779103666776760e-17, /* 3C99D3E12DD8A18B */ +}; + +/* Primary interval GTi() */ +static const double cr[] = { +/* p1, q1 */ + +0.70908683619977797008004927192814648151397705078125000, + +1.71987061393048558089579513384356441668351720061e-0001, + -3.19273345791990970293320316122813960527705450671e-0002, + +8.36172645419110036267169600390549973563534476989e-0003, + +1.13745336648572838333152213474277971244629758101e-0003, + +1.0, + +9.71980217826032937526460731778472389791321968082e-0001, + -7.43576743326756176594084137256042653497087666030e-0002, + -1.19345944932265559769719470515102012246995255372e-0001, + +1.59913445751425002620935120470781382215050284762e-0002, + +1.12601136853374984566572691306402321911547550783e-0003, +/* p2, q2 */ + +0.42848681585558601181418225678498856723308563232421875, + +6.53596762668970816023718845105667418483122103629e-0002, + -6.97280829631212931321050770925128264272768936731e-0003, + +6.46342359021981718947208605674813260166116632899e-0003, + +1.0, + +4.57572620560506047062553957454062012327519313936e-0001, + -2.52182594886075452859655003407796103083422572036e-0001, + -1.82970945407778594681348166040103197178711552827e-0002, + +2.43574726993169566475227642128830141304953840502e-0002, + -5.20390406466942525358645957564897411258667085501e-0003, + +4.79520251383279837635552431988023256031951133885e-0004, +/* p3, q3 */ + +0.382409479734567459008331979930517263710498809814453125, + +1.42876048697668161599069814043449301572928034140e-0001, + +3.42157571052250536817923866013561760785748899071e-0003, + -5.01542621710067521405087887856991700987709272937e-0004, + +8.89285814866740910123834688163838287618332122670e-0004, + +1.0, + +3.04253086629444201002215640948957897906299633168e-0001, + -2.23162407379999477282555672834881213873185520006e-0001, + -1.05060867741952065921809811933670131427552903636e-0002, + +1.70511763916186982473301861980856352005926669320e-0002, + -2.12950201683609187927899416700094630764182477464e-0003, +}; + +#define P10 cr[0] +#define P11 cr[1] +#define P12 cr[2] +#define P13 cr[3] +#define P14 cr[4] +#define Q10 cr[5] +#define Q11 cr[6] +#define Q12 cr[7] +#define Q13 cr[8] +#define Q14 cr[9] +#define Q15 cr[10] +#define P20 cr[11] +#define P21 cr[12] +#define P22 cr[13] +#define P23 cr[14] +#define Q20 cr[15] +#define Q21 cr[16] +#define Q22 cr[17] +#define Q23 cr[18] +#define Q24 cr[19] +#define Q25 cr[20] +#define Q26 cr[21] +#define P30 cr[22] +#define P31 cr[23] +#define P32 cr[24] +#define P33 cr[25] +#define P34 cr[26] +#define Q30 cr[27] +#define Q31 cr[28] +#define Q32 cr[29] +#define Q33 cr[30] +#define Q34 cr[31] +#define Q35 cr[32] + +static const double + GZ1_h = +0.938204627909682398190, + GZ1_l = +5.121952600248205157935e-17, + GZ2_h = +0.885603194410888749921, + GZ2_l = -4.964236872556339810692e-17, + GZ3_h = +0.936781411463652347038, + GZ3_l = -2.541923110834479415023e-17, + TZ1 = -0.3517214357852935791015625, + TZ3 = +0.280530631542205810546875; +/* INDENT ON */ + +/* compute gamma(y=yh+yl) for y in GT1 = [1.0000, 1.2845] */ +/* assume yh got 20 significant bits */ +static struct Double +GT1(double yh, double yl) { + double t3, t4, y, z; + struct Double r; + + y = yh + yl; + z = y * y; + t3 = (z * (P10 + y * ((P11 + y * P12) + z * (P13 + y * P14)))) / + (Q10 + y * ((Q11 + y * Q12) + z * ((Q13 + Q14 * y) + z * Q15))); + t3 += (TZ1 * yl + GZ1_l); + t4 = TZ1 * yh; + r.h = (double) ((float) (t4 + GZ1_h + t3)); + t3 += (t4 - (r.h - GZ1_h)); + r.l = t3; + return (r); +} + +/* compute gamma(y=yh+yl) for y in GT2 = [1.2844, 1.6374] */ +/* assume yh got 20 significant bits */ +static struct Double +GT2(double yh, double yl) { + double t3, y, z; + struct Double r; + + y = yh + yl; + z = y * y; + t3 = (z * (P20 + y * P21 + z * (P22 + y * P23))) / + (Q20 + (y * ((Q21 + Q22 * y) + z * Q23) + + (z * z) * ((Q24 + Q25 * y) + z * Q26))) + GZ2_l; + r.h = (double) ((float) (GZ2_h + t3)); + r.l = t3 - (r.h - GZ2_h); + return (r); +} + +/* compute gamma(y=yh+yl) for y in GT3 = [1.6373, 2.0000] */ +/* assume yh got 20 significant bits */ +static struct Double +GT3(double yh, double yl) { + double t3, t4, y, z; + struct Double r; + + y = yh + yl; + z = y * y; + t3 = (z * (P30 + y * ((P31 + y * P32) + z * (P33 + y * P34)))) / + (Q30 + y * ((Q31 + y * Q32) + z * ((Q33 + Q34 * y) + z * Q35))); + t3 += (TZ3 * yl + GZ3_l); + t4 = TZ3 * yh; + r.h = (double) ((float) (t4 + GZ3_h + t3)); + t3 += (t4 - (r.h - GZ3_h)); + r.l = t3; + return (r); +} + +/* INDENT OFF */ +/* + * return tgamma(x) scaled by 2**-m for 8<x<=171.62... using Stirling's formula + * log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x)) + * = L1 + L2 + L3, + */ +/* INDENT ON */ +static struct Double +large_gam(double x, int *m) { + double z, t1, t2, t3, z2, t5, w, y, u, r, z4, v, t24 = 16777216.0, + p24 = 1.0 / 16777216.0; + int n2, j2, k, ix, j; + unsigned lx; + struct Double zz; + double u2, ss_h, ss_l, r_h, w_h, w_l, t4; + +/* INDENT OFF */ +/* + * compute ss = ss.h+ss.l = log(x)-1 (see tgamma_log.h for details) + * + * log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y, 1<=y<2, + * j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and + * T1(n) = T1[2n,2n+1] = n*log(2)-1, + * T2(j) = T2[2j,2j+1] = log(z[j]), + * T3(s) = 2s + A1[0]s^3 + A2[1]s^5 + A3[2]s^7 + * Note + * (1) the leading entries are truncated to 24 binary point. + * (2) Remez error for T3(s) is bounded by 2**(-72.4) + * 2**(-24) + * _________V___________________ + * T1(n): |_________|___________________| + * _______ ______________________ + * T2(j): |_______|______________________| + * ____ _______________________ + * 2s: |____|_______________________| + * __________________________ + * + T3(s)-2s: |__________________________| + * ------------------------------------------- + * [leading] + [Trailing] + */ +/* INDENT ON */ + ix = __HI(x); + lx = __LO(x); + n2 = (ix >> 20) - 0x3ff; /* exponent of x, range:3-7 */ + n2 += n2; /* 2n */ + ix = (ix & 0x000fffff) | 0x3ff00000; /* y = scale x to [1,2] */ + __HI(y) = ix; + __LO(y) = lx; + __HI(z) = (ix & 0xffffc000) | 0x2000; /* z[j]=1+j/64+1/128 */ + __LO(z) = 0; + j2 = (ix >> 13) & 0x7e; /* 2j */ + t1 = y + z; + t2 = y - z; + r = one / t1; + t1 = (double) ((float) t1); + u = r * t2; /* u = (y-z)/(y+z) */ + t4 = T2[j2 + 1] + T1[n2 + 1]; + z2 = u * u; + k = __HI(u) & 0x7fffffff; + t3 = T2[j2] + T1[n2]; + if ((k >> 20) < 0x3ec) { /* |u|<2**-19 */ + t2 = t4 + u * ((two + z2 * A1) + (z2 * z2) * (A2 + z2 * A3)); + } else { + t5 = t4 + u * (z2 * A1 + (z2 * z2) * (A2 + z2 * A3)); + u2 = u + u; + v = (double) ((int) (u2 * t24)) * p24; + t2 = t5 + r * ((two * t2 - v * t1) - v * (y - (t1 - z))); + t3 += v; + } + ss_h = (double) ((float) (t2 + t3)); + ss_l = t2 - (ss_h - t3); + + /* + * compute ww = (x-.5)*(log(x)-1) + .5*(log(2pi)-1) + 1/x*(P(1/x^2))) + * where ss = log(x) - 1 in already in extra precision + */ + z = one / x; + r = x - half; + r_h = (double) ((float) r); + w_h = r_h * ss_h + hln2pi_h; + z2 = z * z; + w = (r - r_h) * ss_h + r * ss_l; + z4 = z2 * z2; + t1 = z2 * (GP1 + z4 * (GP3 + z4 * (GP5 + z4 * GP7))); + t2 = z4 * (GP2 + z4 * (GP4 + z4 * GP6)); + t1 += t2; + w += hln2pi_l; + w_l = z * (GP0 + t1) + w; + k = (int) ((w_h + w_l) * invln2_32 + half); + + /* compute the exponential of w_h+w_l */ + j = k & 0x1f; + *m = (k >> 5); + t3 = (double) k; + + /* perform w - k*ln2_32 (represent as w_h - w_l) */ + t1 = w_h - t3 * ln2_32hi; + t2 = t3 * ln2_32lo; + w = w_l - t2; + w_h = t1 + w_l; + w_l = t2 - (w_l - (w_h - t1)); + + /* compute exp(w_h+w_l) */ + z = w_h - w_l; + z2 = z * z; + t1 = z2 * (Et1 + z2 * (Et3 + z2 * Et5)); + t2 = z2 * (Et2 + z2 * Et4); + t3 = w_h - (w_l - (t1 + z * t2)); + zz.l = S_trail[j] * (one + t3) + S[j] * t3; + zz.h = S[j]; + return (zz); +} + +/* INDENT OFF */ +/* + * kpsin(x)= sin(pi*x)/pi + * 3 5 7 9 11 13 15 + * = x+ks[0]*x +ks[1]*x +ks[2]*x +ks[3]*x +ks[4]*x +ks[5]*x +ks[6]*x + */ +static const double ks[] = { + -1.64493406684822640606569, + +8.11742425283341655883668741874008920850698590621e-0001, + -1.90751824120862873825597279118304943994042258291e-0001, + +2.61478477632554278317289628332654539353521911570e-0002, + -2.34607978510202710377617190278735525354347705866e-0003, + +1.48413292290051695897242899977121846763824221705e-0004, + -6.87730769637543488108688726777687262485357072242e-0006, +}; +/* INDENT ON */ + +/* assume x is not tiny and positive */ +static struct Double +kpsin(double x) { + double z, t1, t2, t3, t4; + struct Double xx; + + z = x * x; + xx.h = x; + t1 = z * x; + t2 = z * z; + t4 = t1 * ks[0]; + t3 = (t1 * z) * ((ks[1] + z * ks[2] + t2 * ks[3]) + (z * t2) * + (ks[4] + z * ks[5] + t2 * ks[6])); + xx.l = t4 + t3; + return (xx); +} + +/* INDENT OFF */ +/* + * kpcos(x)= cos(pi*x)/pi + * 2 4 6 8 10 12 + * = 1/pi +kc[0]*x +kc[1]*x +kc[2]*x +kc[3]*x +kc[4]*x +kc[5]*x + */ + +static const double one_pi_h = 0.318309886183790635705292970, + one_pi_l = 3.583247455607534006714276420e-17; +static const double npi_2_h = -1.5625, + npi_2_l = -0.00829632679489661923132169163975055099555883223; +static const double kc[] = { + -1.57079632679489661923132169163975055099555883223e+0000, + +1.29192819501230224953283586722575766189551966008e+0000, + -4.25027339940149518500158850753393173519732149213e-0001, + +7.49080625187015312373925142219429422375556727752e-0002, + -8.21442040906099210866977352284054849051348692715e-0003, + +6.10411356829515414575566564733632532333904115968e-0004, +}; +/* INDENT ON */ + +/* assume x is not tiny and positive */ +static struct Double +kpcos(double x) { + double z, t1, t2, t3, t4, x4, x8; + struct Double xx; + + z = x * x; + xx.h = one_pi_h; + t1 = (double) ((float) x); + x4 = z * z; + t2 = npi_2_l * z + npi_2_h * (x + t1) * (x - t1); + t3 = one_pi_l + x4 * ((kc[1] + z * kc[2]) + x4 * (kc[3] + z * + kc[4] + x4 * kc[5])); + t4 = t1 * t1; /* 48 bits mantissa */ + x8 = t2 + t3; + t4 *= npi_2_h; /* npi_2_h is 5 bits const. The product is exact */ + xx.l = x8 + t4; /* that will minimized the rounding error in xx.l */ + return (xx); +} + +/* INDENT OFF */ +static const double + /* 0.134861805732790769689793935774652917006 */ + t0z1 = 0.1348618057327907737708, + t0z1_l = -4.0810077708578299022531e-18, + /* 0.461632144968362341262659542325721328468 */ + t0z2 = 0.4616321449683623567850, + t0z2_l = -1.5522348162858676890521e-17, + /* 0.819773101100500601787868704921606996312 */ + t0z3 = 0.8197731011005006118708, + t0z3_l = -1.0082945122487103498325e-17; + /* 1.134861805732790769689793935774652917006 */ +/* INDENT ON */ + +/* gamma(x+i) for 0 <= x < 1 */ +static struct Double +gam_n(int i, double x) { + struct Double rr, yy; + double r1, r2, t2, z, xh, xl, yh, yl, zh, z1, z2, zl, x5, wh, wl; + + /* compute yy = gamma(x+1) */ + if (x > 0.2845) { + if (x > 0.6374) { + r1 = x - t0z3; + r2 = (double) ((float) (r1 - t0z3_l)); + t2 = r1 - r2; + yy = GT3(r2, t2 - t0z3_l); + } else { + r1 = x - t0z2; + r2 = (double) ((float) (r1 - t0z2_l)); + t2 = r1 - r2; + yy = GT2(r2, t2 - t0z2_l); + } + } else { + r1 = x - t0z1; + r2 = (double) ((float) (r1 - t0z1_l)); + t2 = r1 - r2; + yy = GT1(r2, t2 - t0z1_l); + } + + /* compute gamma(x+i) = (x+i-1)*...*(x+1)*yy, 0<i<8 */ + switch (i) { + case 0: /* yy/x */ + r1 = one / x; + xh = (double) ((float) x); /* x is not tiny */ + rr.h = (double) ((float) ((yy.h + yy.l) * r1)); + rr.l = r1 * (yy.h - rr.h * xh) - + ((r1 * rr.h) * (x - xh) - r1 * yy.l); + break; + case 1: /* yy */ + rr.h = yy.h; + rr.l = yy.l; + break; + case 2: /* (x+1)*yy */ + z = x + one; /* may not be exact */ + zh = (double) ((float) z); + rr.h = zh * yy.h; + rr.l = z * yy.l + (x - (zh - one)) * yy.h; + break; + case 3: /* (x+2)*(x+1)*yy */ + z1 = x + one; + z2 = x + 2.0; + z = z1 * z2; + xh = (double) ((float) z); + zh = (double) ((float) z1); + xl = (x - (zh - one)) * (z2 + zh) - (xh - zh * (zh + one)); + rr.h = xh * yy.h; + rr.l = z * yy.l + xl * yy.h; + break; + + case 4: /* (x+1)*(x+3)*(x+2)*yy */ + z1 = x + 2.0; + z2 = (x + one) * (x + 3.0); + zh = z1; + __LO(zh) = 0; + __HI(zh) &= 0xfffffff8; /* zh 18 bits mantissa */ + zl = x - (zh - 2.0); + z = z1 * z2; + xh = (double) ((float) z); + xl = zl * (z2 + zh * (z1 + zh)) - (xh - zh * (zh * zh - one)); + rr.h = xh * yy.h; + rr.l = z * yy.l + xl * yy.h; + break; + case 5: /* ((x+1)*(x+4)*(x+2)*(x+3))*yy */ + z1 = x + 2.0; + z2 = x + 3.0; + z = z1 * z2; + zh = (double) ((float) z1); + yh = (double) ((float) z); + yl = (x - (zh - 2.0)) * (z2 + zh) - (yh - zh * (zh + one)); + z2 = z - 2.0; + z *= z2; + xh = (double) ((float) z); + xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0)); + rr.h = xh * yy.h; + rr.l = z * yy.l + xl * yy.h; + break; + case 6: /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5))*yy */ + z1 = x + 2.0; + z2 = x + 3.0; + z = z1 * z2; + zh = (double) ((float) z1); + yh = (double) ((float) z); + z1 = x - (zh - 2.0); + yl = z1 * (z2 + zh) - (yh - zh * (zh + one)); + z2 = z - 2.0; + x5 = x + 5.0; + z *= z2; + xh = (double) ((float) z); + zh += 3.0; + xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0)); + /* xh+xl=(x+1)*...*(x+4) */ + /* wh+wl=(x+5)*yy */ + wh = (double) ((float) (x5 * (yy.h + yy.l))); + wl = (z1 * yy.h + x5 * yy.l) - (wh - zh * yy.h); + rr.h = wh * xh; + rr.l = z * wl + xl * wh; + break; + case 7: /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5)*(x+6))*yy */ + z1 = x + 3.0; + z2 = x + 4.0; + z = z2 * z1; + zh = (double) ((float) z1); + yh = (double) ((float) z); /* yh+yl = (x+3)(x+4) */ + yl = (x - (zh - 3.0)) * (z2 + zh) - (yh - (zh * (zh + one))); + z1 = x + 6.0; + z2 = z - 2.0; /* z2 = (x+2)*(x+5) */ + z *= z2; + xh = (double) ((float) z); + xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0)); + /* xh+xl=(x+2)*...*(x+5) */ + /* wh+wl=(x+1)(x+6)*yy */ + z2 -= 4.0; /* z2 = (x+1)(x+6) */ + wh = (double) ((float) (z2 * (yy.h + yy.l))); + wl = (z2 * yy.l + yl * yy.h) - (wh - (yh - 6.0) * yy.h); + rr.h = wh * xh; + rr.l = z * wl + xl * wh; + } + return (rr); +} + +double +tgamma(double x) { + struct Double ss, ww; + double t, t1, t2, t3, t4, t5, w, y, z, z1, z2, z3, z5; + int i, j, k, m, ix, hx, xk; + unsigned lx; + + hx = __HI(x); + lx = __LO(x); + ix = hx & 0x7fffffff; + y = x; + + if (ix < 0x3ca00000) + return (one / x); /* |x| < 2**-53 */ + if (ix >= 0x7ff00000) + /* +Inf -> +Inf, -Inf or NaN -> NaN */ + return (x * ((hx < 0)? 0.0 : x)); + if (hx > 0x406573fa || /* x > 171.62... overflow to +inf */ + (hx == 0x406573fa && lx > 0xE561F647)) { + z = x / tiny; + return (z * z); + } + if (hx >= 0x40200000) { /* x >= 8 */ + ww = large_gam(x, &m); + w = ww.h + ww.l; + __HI(w) += m << 20; + return (w); + } + if (hx > 0) { /* x from 0 to 8 */ + i = (int) x; + ww = gam_n(i, x - (double) i); + return (ww.h + ww.l); + } + + /* negative x */ + /* INDENT OFF */ + /* + * compute: xk = + * -2 ... x is an even int (-inf is even) + * -1 ... x is an odd int + * +0 ... x is not an int but chopped to an even int + * +1 ... x is not an int but chopped to an odd int + */ + /* INDENT ON */ + xk = 0; + if (ix >= 0x43300000) { + if (ix >= 0x43400000) + xk = -2; + else + xk = -2 + (lx & 1); + } else if (ix >= 0x3ff00000) { + k = (ix >> 20) - 0x3ff; + if (k > 20) { + j = lx >> (52 - k); + if ((j << (52 - k)) == lx) + xk = -2 + (j & 1); + else + xk = j & 1; + } else { + j = ix >> (20 - k); + if ((j << (20 - k)) == ix && lx == 0) + xk = -2 + (j & 1); + else + xk = j & 1; + } + } + if (xk < 0) + /* ideally gamma(-n)= (-1)**(n+1) * inf, but c99 expect NaN */ + return ((x - x) / (x - x)); /* 0/0 = NaN */ + + + /* negative underflow thresold */ + if (ix > 0x4066e000 || (ix == 0x4066e000 && lx > 11)) { + /* x < -183.0 - 11ulp */ + z = tiny / x; + if (xk == 1) + z = -z; + return (z * tiny); + } + + /* now compute gamma(x) by -1/((sin(pi*y)/pi)*gamma(1+y)), y = -x */ + + /* + * First compute ss = -sin(pi*y)/pi , so that + * gamma(x) = 1/(ss*gamma(1+y)) + */ + y = -x; + j = (int) y; + z = y - (double) j; + if (z > 0.3183098861837906715377675) + if (z > 0.6816901138162093284622325) + ss = kpsin(one - z); + else + ss = kpcos(0.5 - z); + else + ss = kpsin(z); + if (xk == 0) { + ss.h = -ss.h; + ss.l = -ss.l; + } + + /* Then compute ww = gamma(1+y), note that result scale to 2**m */ + m = 0; + if (j < 7) { + ww = gam_n(j + 1, z); + } else { + w = y + one; + if ((lx & 1) == 0) { /* y+1 exact (note that y<184) */ + ww = large_gam(w, &m); + } else { + t = w - one; + if (t == y) { /* y+one exact */ + ww = large_gam(w, &m); + } else { /* use y*gamma(y) */ + if (j == 7) + ww = gam_n(j, z); + else + ww = large_gam(y, &m); + t4 = ww.h + ww.l; + t1 = (double) ((float) y); + t2 = (double) ((float) t4); + /* t4 will not be too large */ + ww.l = y * (ww.l - (t2 - ww.h)) + (y - t1) * t2; + ww.h = t1 * t2; + } + } + } + + /* compute 1/(ss*ww) */ + t3 = ss.h + ss.l; + t4 = ww.h + ww.l; + t1 = (double) ((float) t3); + t2 = (double) ((float) t4); + z1 = ss.l - (t1 - ss.h); /* (t1,z1) = ss */ + z2 = ww.l - (t2 - ww.h); /* (t2,z2) = ww */ + t3 = t3 * t4; /* t3 = ss*ww */ + z3 = one / t3; /* z3 = 1/(ss*ww) */ + t5 = t1 * t2; + z5 = z1 * t4 + t1 * z2; /* (t5,z5) = ss*ww */ + t1 = (double) ((float) t3); /* (t1,z1) = ss*ww */ + z1 = z5 - (t1 - t5); + t2 = (double) ((float) z3); /* leading 1/(ss*ww) */ + z2 = z3 * (t2 * z1 - (one - t2 * t1)); + z = t2 - z2; + + /* check whether z*2**-m underflow */ + if (m != 0) { + hx = __HI(z); + i = hx & 0x80000000; + ix = hx ^ i; + j = ix >> 20; + if (j > m) { + ix -= m << 20; + __HI(z) = ix ^ i; + } else if ((m - j) > 52) { + /* underflow */ + if (xk == 0) + z = -tiny * tiny; + else + z = tiny * tiny; + } else { + /* subnormal */ + m -= 60; + t = one; + __HI(t) -= 60 << 20; + ix -= m << 20; + __HI(z) = ix ^ i; + z *= t; + } + } + return (z); +} diff --git a/usr/src/libm/src/m9x/tgammaf.c b/usr/src/libm/src/m9x/tgammaf.c new file mode 100644 index 0000000..538cf89 --- /dev/null +++ b/usr/src/libm/src/m9x/tgammaf.c @@ -0,0 +1,545 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)tgammaf.c 1.10 06/01/31 SMI" + +#pragma weak tgammaf = __tgammaf + +/* + * True gamma function + * + * float tgammaf(float x) + * + * Algorithm: see tgamma.c + * + * Maximum error observed: 0.87ulp (both positive and negative arguments) + */ + +#include "libm.h" +#include "libm_synonyms.h" +#include <math.h> +#include <sunmath.h> + +#if defined(__sparc) +#define HIWORD 0 +#define LOWORD 1 +#elif defined(__i386) +#define HIWORD 1 +#define LOWORD 0 +#else +#error Unknown architecture +#endif +#define __HI(x) ((int *) &x)[HIWORD] +#define __LO(x) ((unsigned *) &x)[LOWORD] + +/* Coefficients for primary intervals GTi() */ +static const double cr[] = { + /* p1 */ + +7.09087253435088360271451613398019280077561279443e-0001, + -5.17229560788652108545141978238701790105241761089e-0001, + +5.23403394528150789405825222323770647162337764327e-0001, + -4.54586308717075010784041566069480411732634814899e-0001, + +4.20596490915239085459964590559256913498190955233e-0001, + -3.57307589712377520978332185838241458642142185789e-0001, + + /* p2 */ + +4.28486983980295198166056119223984284434264344578e-0001, + -1.30704539487709138528680121627899735386650103914e-0001, + +1.60856285038051955072861219352655851542955430871e-0001, + -9.22285161346010583774458802067371182158937943507e-0002, + +7.19240511767225260740890292605070595560626179357e-0002, + -4.88158265593355093703112238534484636193260459574e-0002, + + /* p3 */ + +3.82409531118807759081121479786092134814808872880e-0001, + +2.65309888180188647956400403013495759365167853426e-0002, + +8.06815109775079171923561169415370309376296739835e-0002, + -1.54821591666137613928840890835174351674007764799e-0002, + +1.76308239242717268530498313416899188157165183405e-0002, + + /* GZi and TZi */ + +0.9382046279096824494097535615803269576988, /* GZ1 */ + +0.8856031944108887002788159005825887332080, /* GZ2 */ + +0.9367814114636523216188468970808378497426, /* GZ3 */ + -0.3517214357852935791015625, /* TZ1 */ + +0.280530631542205810546875, /* TZ3 */ +}; + +#define P10 cr[0] +#define P11 cr[1] +#define P12 cr[2] +#define P13 cr[3] +#define P14 cr[4] +#define P15 cr[5] +#define P20 cr[6] +#define P21 cr[7] +#define P22 cr[8] +#define P23 cr[9] +#define P24 cr[10] +#define P25 cr[11] +#define P30 cr[12] +#define P31 cr[13] +#define P32 cr[14] +#define P33 cr[15] +#define P34 cr[16] +#define GZ1 cr[17] +#define GZ2 cr[18] +#define GZ3 cr[19] +#define TZ1 cr[20] +#define TZ3 cr[21] + +/* compute gamma(y) for y in GT1 = [1.0000, 1.2845] */ +static double +GT1(double y) { + double z, r; + + z = y * y; + r = TZ1 * y + z * ((P10 + y * P11 + z * P12) + (z * y) * (P13 + y * + P14 + z * P15)); + return (GZ1 + r); +} + +/* compute gamma(y) for y in GT2 = [1.2844, 1.6374] */ +static double +GT2(double y) { + double z; + + z = y * y; + return (GZ2 + z * ((P20 + y * P21 + z * P22) + (z * y) * (P23 + y * + P24 + z * P25))); +} + +/* compute gamma(y) for y in GT3 = [1.6373, 2.0000] */ +static double +GT3(double y) { +double z, r; + + z = y * y; + r = TZ3 * y + z * ((P30 + y * P31 + z * P32) + (z * y) * (P33 + y * + P34)); + return (GZ3 + r); +} + +/* INDENT OFF */ +static const double c[] = { ++1.0, ++2.0, ++0.5, ++1.0e-300, ++6.666717231848518054693623697539230e-0001, /* A1=T3[0] */ ++8.33333330959694065245736888749042811909994573178e-0002, /* GP[0] */ +-2.77765545601667179767706600890361535225507762168e-0003, /* GP[1] */ ++7.77830853479775281781085278324621033523037489883e-0004, /* GP[2] */ ++4.18938533204672741744150788368695779923320328369e-0001, /* hln2pi */ ++2.16608493924982901946e-02, /* ln2_32 */ ++4.61662413084468283841e+01, /* invln2_32 */ ++5.00004103388988968841156421415669985414073453720e-0001, /* Et1 */ ++1.66667656752800761782778277828110208108687545908e-0001, /* Et2 */ +}; + +#define one c[0] +#define two c[1] +#define half c[2] +#define tiny c[3] +#define A1 c[4] +#define GP0 c[5] +#define GP1 c[6] +#define GP2 c[7] +#define hln2pi c[8] +#define ln2_32 c[9] +#define invln2_32 c[10] +#define Et1 c[11] +#define Et2 c[12] + +/* S[j] = 2**(j/32.) for the final computation of exp(w) */ +static const double S[] = { ++1.00000000000000000000e+00, /* 3FF0000000000000 */ ++1.02189714865411662714e+00, /* 3FF059B0D3158574 */ ++1.04427378242741375480e+00, /* 3FF0B5586CF9890F */ ++1.06714040067682369717e+00, /* 3FF11301D0125B51 */ ++1.09050773266525768967e+00, /* 3FF172B83C7D517B */ ++1.11438674259589243221e+00, /* 3FF1D4873168B9AA */ ++1.13878863475669156458e+00, /* 3FF2387A6E756238 */ ++1.16372485877757747552e+00, /* 3FF29E9DF51FDEE1 */ ++1.18920711500272102690e+00, /* 3FF306FE0A31B715 */ ++1.21524735998046895524e+00, /* 3FF371A7373AA9CB */ ++1.24185781207348400201e+00, /* 3FF3DEA64C123422 */ ++1.26905095719173321989e+00, /* 3FF44E086061892D */ ++1.29683955465100964055e+00, /* 3FF4BFDAD5362A27 */ ++1.32523664315974132322e+00, /* 3FF5342B569D4F82 */ ++1.35425554693689265129e+00, /* 3FF5AB07DD485429 */ ++1.38390988196383202258e+00, /* 3FF6247EB03A5585 */ ++1.41421356237309514547e+00, /* 3FF6A09E667F3BCD */ ++1.44518080697704665027e+00, /* 3FF71F75E8EC5F74 */ ++1.47682614593949934623e+00, /* 3FF7A11473EB0187 */ ++1.50916442759342284141e+00, /* 3FF82589994CCE13 */ ++1.54221082540794074411e+00, /* 3FF8ACE5422AA0DB */ ++1.57598084510788649659e+00, /* 3FF93737B0CDC5E5 */ ++1.61049033194925428347e+00, /* 3FF9C49182A3F090 */ ++1.64575547815396494578e+00, /* 3FFA5503B23E255D */ ++1.68179283050742900407e+00, /* 3FFAE89F995AD3AD */ ++1.71861929812247793414e+00, /* 3FFB7F76F2FB5E47 */ ++1.75625216037329945351e+00, /* 3FFC199BDD85529C */ ++1.79470907500310716820e+00, /* 3FFCB720DCEF9069 */ ++1.83400808640934243066e+00, /* 3FFD5818DCFBA487 */ ++1.87416763411029996256e+00, /* 3FFDFC97337B9B5F */ ++1.91520656139714740007e+00, /* 3FFEA4AFA2A490DA */ ++1.95714412417540017941e+00, /* 3FFF50765B6E4540 */ +}; +/* INDENT ON */ + +/* INDENT OFF */ +/* + * return tgammaf(x) in double for 8<x<=35.040096283... using Stirling's formula + * log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x)) + */ +/* + * compute ss = log(x)-1 + * + * log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y, 1<=y<2, + * j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and + * T1(n-3) = n*log(2)-1, n=3,4,5 + * T2(j) = log(z[j]), + * T3(s) = 2s + A1*s^3 + * Note + * (1) Remez error for T3(s) is bounded by 2**(-35.8) + * (see mpremez/work/Log/tgamma_log_2_outr1) + */ + +static const double T1[] = { /* T1[j]=(j+3)*log(2)-1 */ ++1.079441541679835928251696364375e+00, ++1.772588722239781237668928485833e+00, ++2.465735902799726547086160607291e+00, +}; + +static const double T2[] = { /* T2[j]=log(1+j/64+1/128) */ ++7.782140442054948947462900061137e-03, ++2.316705928153437822879916096229e-02, ++3.831886430213659919375532512380e-02, ++5.324451451881228286587019378653e-02, ++6.795066190850774939456527777263e-02, ++8.244366921107459126816006866831e-02, ++9.672962645855111229557105648746e-02, ++1.108143663402901141948061693232e-01, ++1.247034785009572358634065153809e-01, ++1.384023228591191356853258736016e-01, ++1.519160420258419750718034248969e-01, ++1.652495728953071628756114492772e-01, ++1.784076574728182971194002415109e-01, ++1.913948529996294546092988075613e-01, ++2.042155414286908915038203861962e-01, ++2.168739383006143596190895257443e-01, ++2.293741010648458299914807250461e-01, ++2.417199368871451681443075159135e-01, ++2.539152099809634441373232979066e-01, ++2.659635484971379413391259265375e-01, ++2.778684510034563061863500329234e-01, ++2.896332925830426768788930555257e-01, ++3.012613305781617810128755382338e-01, ++3.127557100038968883862465596883e-01, ++3.241194686542119760906707604350e-01, ++3.353555419211378302571795798142e-01, ++3.464667673462085809184621884258e-01, ++3.574558889218037742260094901409e-01, ++3.683255611587076530482301540504e-01, ++3.790783529349694583908533456310e-01, ++3.897167511400252133704636040035e-01, ++4.002431641270127069293251019951e-01, ++4.106599249852683859343062031758e-01, ++4.209692946441296361288671615068e-01, ++4.311734648183713408591724789556e-01, ++4.412745608048752294894964416613e-01, ++4.512746441394585851446923830790e-01, ++4.611757151221701663679999255979e-01, ++4.709797152187910125468978560564e-01, ++4.806885293457519076766184554480e-01, ++4.903039880451938381503461596457e-01, ++4.998278695564493298213314152470e-01, ++5.092619017898079468040749192283e-01, ++5.186077642080456321529769963648e-01, ++5.278670896208423851138922177783e-01, ++5.370414658968836545667292441538e-01, ++5.461324375981356503823972092312e-01, ++5.551415075405015927154803595159e-01, ++5.640701382848029660713842900902e-01, ++5.729197535617855090927567266263e-01, ++5.816917396346224825206107537254e-01, ++5.903874466021763746419167081236e-01, ++5.990081896460833993816000244617e-01, ++6.075552502245417955010851527911e-01, ++6.160298772155140196475659281967e-01, ++6.244332880118935010425387440547e-01, ++6.327666695710378295457864685036e-01, ++6.410311794209312910556013344054e-01, ++6.492279466251098188908399699053e-01, ++6.573580727083600301418900232459e-01, ++6.654226325450904489500926100067e-01, ++6.734226752121667202979603888010e-01, ++6.813592248079030689480715595681e-01, ++6.892332812388089803249143378146e-01, +}; +/* INDENT ON */ + +static double +large_gam(double x) { + double ss, zz, z, t1, t2, w, y, u; + unsigned lx; + int k, ix, j, m; + + ix = __HI(x); + lx = __LO(x); + m = (ix >> 20) - 0x3ff; /* exponent of x, range:3-5 */ + ix = (ix & 0x000fffff) | 0x3ff00000; /* y = scale x to [1,2] */ + __HI(y) = ix; + __LO(y) = lx; + __HI(z) = (ix & 0xffffc000) | 0x2000; /* z[j]=1+j/64+1/128 */ + __LO(z) = 0; + j = (ix >> 14) & 0x3f; + t1 = y + z; + t2 = y - z; + u = t2 / t1; + ss = T1[m - 3] + T2[j] + u * (two + A1 * (u * u)); + /* ss = log(x)-1 */ + /* + * compute ww = (x-.5)*(log(x)-1) + .5*(log(2pi)-1) + 1/x*(P(1/x^2))) + * where ss = log(x) - 1 + */ + z = one / x; + zz = z * z; + w = ((x - half) * ss + hln2pi) + z * (GP0 + zz * GP1 + (zz * zz) * GP2); + k = (int) (w * invln2_32 + half); + + /* compute the exponential of w */ + j = k & 0x1f; + m = k >> 5; + z = w - (double) k *ln2_32; + zz = S[j] * (one + z + (z * z) * (Et1 + z * Et2)); + __HI(zz) += m << 20; + return (zz); +} +/* INDENT OFF */ +/* + * kpsin(x)= sin(pi*x)/pi + * 3 5 7 9 + * = x+ks[0]*x +ks[1]*x +ks[2]*x +ks[3]*x + */ +static const double ks[] = { +-1.64493404985645811354476665052005342839447790544e+0000, ++8.11740794458351064092797249069438269367389272270e-0001, +-1.90703144603551216933075809162889536878854055202e-0001, ++2.55742333994264563281155312271481108635575331201e-0002, +}; +/* INDENT ON */ + +static double +kpsin(double x) { + double z; + + z = x * x; + return (x + (x * z) * ((ks[0] + z * ks[1]) + (z * z) * (ks[2] + z * + ks[3]))); +} + +/* INDENT OFF */ +/* + * kpcos(x)= cos(pi*x)/pi + * 2 4 6 + * = kc[0]+kc[1]*x +kc[2]*x +kc[3]*x + */ +static const double kc[] = { ++3.18309886183790671537767526745028724068919291480e-0001, +-1.57079581447762568199467875065854538626594937791e+0000, ++1.29183528092558692844073004029568674027807393862e+0000, +-4.20232949771307685981015914425195471602739075537e-0001, +}; +/* INDENT ON */ + +static double +kpcos(double x) { + double z; + + z = x * x; + return (kc[0] + z * (kc[1] + z * kc[2] + (z * z) * kc[3])); +} + +/* INDENT OFF */ +static const double +t0z1 = 0.134861805732790769689793935774652917006, +t0z2 = 0.461632144968362341262659542325721328468, +t0z3 = 0.819773101100500601787868704921606996312; + /* 1.134861805732790769689793935774652917006 */ +/* INDENT ON */ + +/* + * gamma(x+i) for 0 <= x < 1 + */ +static double +gam_n(int i, double x) { + double rr, yy; + double z1, z2; + + /* compute yy = gamma(x+1) */ + if (x > 0.2845) { + if (x > 0.6374) + yy = GT3(x - t0z3); + else + yy = GT2(x - t0z2); + } else + yy = GT1(x - t0z1); + + /* compute gamma(x+i) = (x+i-1)*...*(x+1)*yy, 0<i<8 */ + switch (i) { + case 0: /* yy/x */ + rr = yy / x; + break; + case 1: /* yy */ + rr = yy; + break; + case 2: /* (x+1)*yy */ + rr = (x + one) * yy; + break; + case 3: /* (x+2)*(x+1)*yy */ + rr = (x + one) * (x + two) * yy; + break; + + case 4: /* (x+1)*(x+3)*(x+2)*yy */ + rr = (x + one) * (x + two) * ((x + 3.0) * yy); + break; + case 5: /* ((x+1)*(x+4)*(x+2)*(x+3))*yy */ + z1 = (x + two) * (x + 3.0) * yy; + z2 = (x + one) * (x + 4.0); + rr = z1 * z2; + break; + case 6: /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5))*yy */ + z1 = (x + two) * (x + 3.0); + z2 = (x + 5.0) * yy; + rr = z1 * (z1 - two) * z2; + break; + case 7: /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5)*(x+6))*yy */ + z1 = (x + two) * (x + 3.0); + z2 = (x + 5.0) * (x + 6.0) * yy; + rr = z1 * (z1 - two) * z2; + break; + } + return (rr); +} + +float +tgammaf(float xf) { + float zf; + double ss, ww; + double x, y, z; + int i, j, k, ix, hx, xk; + + hx = *(int *) &xf; + ix = hx & 0x7fffffff; + + x = (double) xf; + if (ix < 0x33800000) + return (1.0F / xf); /* |x| < 2**-24 */ + + if (ix >= 0x7f800000) + return (xf * ((hx < 0)? 0.0F : xf)); /* +-Inf or NaN */ + + if (hx > 0x420C290F) /* x > 35.040096283... overflow */ + return (float)(x / tiny); + + if (hx >= 0x41000000) /* x >= 8 */ + return ((float) large_gam(x)); + + if (hx > 0) { /* x from 0 to 8 */ + i = (int) xf; + return ((float) gam_n(i, x - (double) i)); + } + + /* negative x */ + /* INDENT OFF */ + /* + * compute xk = + * -2 ... x is an even int (-inf is considered even) + * -1 ... x is an odd int + * +0 ... x is not an int but chopped to an even int + * +1 ... x is not an int but chopped to an odd int + */ + /* INDENT ON */ + xk = 0; + if (ix >= 0x4b000000) { + if (ix > 0x4b000000) + xk = -2; + else + xk = -2 + (ix & 1); + } else if (ix >= 0x3f800000) { + k = (ix >> 23) - 0x7f; + j = ix >> (23 - k); + if ((j << (23 - k)) == ix) + xk = -2 + (j & 1); + else + xk = j & 1; + } + if (xk < 0) { + /* 0/0 invalid NaN, ideally gamma(-n)= (-1)**(n+1) * inf */ + zf = xf - xf; + return (zf / zf); + } + + /* negative underflow thresold */ + if (ix > 0x4224000B) { /* x < -(41+11ulp) */ + if (xk == 0) + z = -tiny; + else + z = tiny; + return ((float)z); + } + + /* INDENT OFF */ + /* now compute gamma(x) by -1/((sin(pi*y)/pi)*gamma(1+y)), y = -x */ + /* + * First compute ss = -sin(pi*y)/pi , so that + * gamma(x) = 1/(ss*gamma(1+y)) + */ + /* INDENT ON */ + y = -x; + j = (int) y; + z = y - (double) j; + if (z > 0.3183098861837906715377675) + if (z > 0.6816901138162093284622325) + ss = kpsin(one - z); + else + ss = kpcos(0.5 - z); + else + ss = kpsin(z); + if (xk == 0) + ss = -ss; + + /* Then compute ww = gamma(1+y) */ + if (j < 7) + ww = gam_n(j + 1, z); + else + ww = large_gam(y + one); + + /* return 1/(ss*ww) */ + return ((float) (one / (ww * ss))); +} diff --git a/usr/src/libm/src/m9x/tgammal.c b/usr/src/libm/src/m9x/tgammal.c new file mode 100644 index 0000000..b0297de --- /dev/null +++ b/usr/src/libm/src/m9x/tgammal.c @@ -0,0 +1,1166 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)tgammal.c 1.9 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak tgammal = __tgammal +#endif + +#include "libm.h" + +#if defined(__sparc) +#define H0_WORD(x) ((unsigned *) &x)[0] +#define H3_WORD(x) ((unsigned *) &x)[3] +#define CHOPPED(x) (long double) ((double) (x)) +#elif defined(__i386) +#define H0_WORD(x) ((((int *) &x)[2] << 16) | \ + (0x0000ffff & (((unsigned *) &x)[1] >> 15))) +#define H3_WORD(x) ((unsigned *) &x)[0] +#define CHOPPED(x) (long double) ((float) (x)) +#else +#error Unknown architecture +#endif + +struct LDouble { + long double h, l; +}; + +/* INDENT OFF */ +/* Primary interval GTi() */ +static const long double P1[] = { + +0.709086836199777919037185741507610124611513720557L, + +4.45754781206489035827915969367354835667391606951e-0001L, + +3.21049298735832382311662273882632210062918153852e-0002L, + -5.71296796342106617651765245858289197369688864350e-0003L, + +6.04666892891998977081619174969855831606965352773e-0003L, + +8.99106186996888711939627812174765258822658645168e-0004L, + -6.96496846144407741431207008527018441810175568949e-0005L, + +1.52597046118984020814225409300131445070213882429e-0005L, + +5.68521076168495673844711465407432189190681541547e-0007L, + +3.30749673519634895220582062520286565610418952979e-0008L, +}; +static const long double Q1[] = { + +1.0+0000L, + +1.35806511721671070408570853537257079579490650668e+0000L, + +2.97567810153429553405327140096063086994072952961e-0001L, + -1.52956835982588571502954372821681851681118097870e-0001L, + -2.88248519561420109768781615289082053597954521218e-0002L, + +1.03475311719937405219789948456313936302378395955e-0002L, + +4.12310203243891222368965360124391297374822742313e-0004L, + -3.12653708152290867248931925120380729518332507388e-0004L, + +2.36672170850409745237358105667757760527014332458e-0005L, +}; +static const long double P2[] = { + +0.428486815855585429730209907810650135255270600668084114L, + +2.62768479103809762805691743305424077975230551176e-0001L, + +3.81187532685392297608310837995193946591425896150e-0002L, + +3.00063075891811043820666846129131255948527925381e-0003L, + +2.47315407812279164228398470797498649142513408654e-0003L, + +3.62838199917848372586173483147214880464782938664e-0004L, + +3.43991105975492623982725644046473030098172692423e-0006L, + +4.56902151569603272237014240794257659159045432895e-0006L, + +2.13734755837595695602045100675540011352948958453e-0007L, + +9.74123440547918230781670266967882492234877125358e-0009L, +}; +static const long double Q2[] = { + +1.0L, + +9.18284118632506842664645516830761489700556179701e-0001L, + -6.41430858837830766045202076965923776189154874947e-0003L, + -1.24400885809771073213345747437964149775410921376e-0001L, + +4.69803798146251757538856567522481979624746875964e-0003L, + +7.18309447069495315914284705109868696262662082731e-0003L, + -8.75812626987894695112722600697653425786166399105e-0004L, + -1.23539972377769277995959339188431498626674835169e-0004L, + +3.10019017590151598732360097849672925448587547746e-0005L, + -1.77260223349332617658921874288026777465782364070e-0006L, +}; +static const long double P3[] = { + +0.3824094797345675048502747661075355640070439388902L, + +3.42198093076618495415854906335908427159833377774e-0001L, + +9.63828189500585568303961406863153237440702754858e-0002L, + +8.76069421042696384852462044188520252156846768667e-0003L, + +1.86477890389161491224872014149309015261897537488e-0003L, + +8.16871354540309895879974742853701311541286944191e-0004L, + +6.83783483674600322518695090864659381650125625216e-0005L, + -1.10168269719261574708565935172719209272190828456e-0006L, + +9.66243228508380420159234853278906717065629721016e-0007L, + +2.31858885579177250541163820671121664974334728142e-0008L, +}; +static const long double Q3[] = { + +1.0L, + +8.25479821168813634632437430090376252512793067339e-0001L, + -1.62251363073937769739639623669295110346015576320e-0002L, + -1.10621286905916732758745130629426559691187579852e-0001L, + +3.48309693970985612644446415789230015515365291459e-0003L, + +6.73553737487488333032431261131289672347043401328e-0003L, + -7.63222008393372630162743587811004613050245128051e-0004L, + -1.35792670669190631476784768961953711773073251336e-0004L, + +3.19610150954223587006220730065608156460205690618e-0005L, + -1.82096553862822346610109522015129585693354348322e-0006L, +}; + +static const long double +#if defined(__i386) +GZ1_h = 0.938204627909682449364570100414084663498215377L, +GZ1_l = 4.518346116624229420055327632718530617227944106e-20L, +GZ2_h = 0.885603194410888700264725126309883762587560340L, +GZ2_l = 1.409077427270497062039119290776508217077297169e-20L, +GZ3_h = 0.936781411463652321613537060640553022494714241L, +GZ3_l = 5.309836440284827247897772963887219035221996813e-21L, +#else +GZ1_h = 0.938204627909682449409753561580326910854647031L, +GZ1_l = 4.684412162199460089642452580902345976446297037e-35L, +GZ2_h = 0.885603194410888700278815900582588658192658794L, +GZ2_l = 7.501529273890253789219935569758713534641074860e-35L, +GZ3_h = 0.936781411463652321618846897080837818855399840L, +GZ3_l = 3.088721217404784363585591914529361687403776917e-35L, +#endif +TZ1 = -0.3517214357852935791015625L, +TZ3 = 0.280530631542205810546875L; +/* INDENT ON */ + +/* INDENT OFF */ +/* + * compute gamma(y=yh+yl) for y in GT1 = [1.0000, 1.2845] + * ...assume yh got 53 or 24(i386) significant bits + */ +/* INDENT ON */ +static struct LDouble +GT1(long double yh, long double yl) { + long double t3, t4, y; + int i; + struct LDouble r; + + y = yh + yl; + for (t4 = Q1[8], t3 = P1[8] + y * P1[9], i = 7; i >= 0; i--) { + t4 = t4 * y + Q1[i]; + t3 = t3 * y + P1[i]; + } + t3 = (y * y) * t3 / t4; + t3 += (TZ1 * yl + GZ1_l); + t4 = TZ1 * yh; + r.h = CHOPPED((t4 + GZ1_h + t3)); + t3 += (t4 - (r.h - GZ1_h)); + r.l = t3; + return (r); +} + +/* INDENT OFF */ +/* + * compute gamma(y=yh+yl) for y in GT2 = [1.2844, 1.6374] + * ...assume yh got 53 significant bits + */ +/* INDENT ON */ +static struct LDouble +GT2(long double yh, long double yl) { + long double t3, t4, y; + int i; + struct LDouble r; + + y = yh + yl; + for (t4 = Q2[9], t3 = P2[9], i = 8; i >= 0; i--) { + t4 = t4 * y + Q2[i]; + t3 = t3 * y + P2[i]; + } + t3 = GZ2_l + (y * y) * t3 / t4; + r.h = CHOPPED((GZ2_h + t3)); + r.l = t3 - (r.h - GZ2_h); + return (r); +} + +/* INDENT OFF */ +/* + * compute gamma(y=yh+yl) for y in GT3 = [1.6373, 2.0000] + * ...assume yh got 53 significant bits + */ +/* INDENT ON */ +static struct LDouble +GT3(long double yh, long double yl) { + long double t3, t4, y; + int i; + struct LDouble r; + + y = yh + yl; + for (t4 = Q3[9], t3 = P3[9], i = 8; i >= 0; i--) { + t4 = t4 * y + Q3[i]; + t3 = t3 * y + P3[i]; + } + t3 = (y * y) * t3 / t4; + t3 += (TZ3 * yl + GZ3_l); + t4 = TZ3 * yh; + r.h = CHOPPED((t4 + GZ3_h + t3)); + t3 += (t4 - (r.h - GZ3_h)); + r.l = t3; + return (r); +} + +/* INDENT OFF */ +/* Hex value of GP[0] shoule be 3FB55555 55555555 */ +static const long double GP[] = { + +0.083333333333333333333333333333333172839171301L, + -2.77777777777777777777777777492501211999399424104e-0003L, + +7.93650793650793650793635650541638236350020883243e-0004L, + -5.95238095238095238057299772679324503339241961704e-0004L, + +8.41750841750841696138422987977683524926142600321e-0004L, + -1.91752691752686682825032547823699662178842123308e-0003L, + +6.41025641022403480921891559356473451161279359322e-0003L, + -2.95506535798414019189819587455577003732808185071e-0002L, + +1.79644367229970031486079180060923073476568732136e-0001L, + -1.39243086487274662174562872567057200255649290646e+0000L, + +1.34025874044417962188677816477842265259608269775e+0001L, + -1.56803713480127469414495545399982508700748274318e+0002L, + +2.18739841656201561694927630335099313968924493891e+0003L, + -3.55249848644100338419187038090925410976237921269e+0004L, + +6.43464880437835286216768959439484376449179576452e+0005L, + -1.20459154385577014992600342782821389605893904624e+0007L, + +2.09263249637351298563934942349749718491071093210e+0008L, + -2.96247483183169219343745316433899599834685703457e+0009L, + +2.88984933605896033154727626086506756972327292981e+0010L, + -1.40960434146030007732838382416230610302678063984e+0011L, /* 19 */ +}; + +static const long double T3[] = { + +0.666666666666666666666666666666666634567834260213L, /* T3[0] */ + +0.400000000000000000000000000040853636176634934140L, /* T3[1] */ + +0.285714285714285714285696975252753987869020263448L, /* T3[2] */ + +0.222222222222222225593221101192317258554772129875L, /* T3[3] */ + +0.181818181817850192105847183461778186703779262916L, /* T3[4] */ + +0.153846169861348633757101285952333369222567014596L, /* T3[5] */ + +0.133033462889260193922261296772841229985047571265L, /* T3[6] */ +}; + +static const long double c[] = { +0.0L, +1.0L, +2.0L, +0.5L, +1.0e-4930L, /* tiny */ +4.18937683105468750000e-01L, /* hln2pim1_h */ +8.50099203991780329736405617639861397473637783412817152e-07L, /* hln2pim1_l */ +0.418938533204672741780329736405617639861397473637783412817152L, /* hln2pim1 */ +2.16608493865351192653179168701171875e-02L, /* ln2_32hi */ +5.96317165397058692545083025235937919875797669127130e-12L, /* ln2_32lo */ +46.16624130844682903551758979206054839765267053289554989233L, /* invln2_32 */ +#if defined(__i386) +1.7555483429044629170023839037639845628291e+03L, /* overflow */ +#else +1.7555483429044629170038892160702032034177e+03L, /* overflow */ +#endif +}; + +#define zero c[0] +#define one c[1] +#define two c[2] +#define half c[3] +#define tiny c[4] +#define hln2pim1_h c[5] +#define hln2pim1_l c[6] +#define hln2pim1 c[7] +#define ln2_32hi c[8] +#define ln2_32lo c[9] +#define invln2_32 c[10] +#define overflow c[11] + +/* + * |exp(r) - (1+r+Et0*r^2+...+Et10*r^12)| <= 2^(-128.88) for |r|<=ln2/64 + */ +static const long double Et[] = { + +5.0000000000000000000e-1L, + +1.66666666666666666666666666666828835166292152466e-0001L, + +4.16666666666666666666666666666693398646592712189e-0002L, + +8.33333333333333333333331748774512601775591115951e-0003L, + +1.38888888888888888888888845356011511394764753997e-0003L, + +1.98412698412698413237140350092993252684198882102e-0004L, + +2.48015873015873016080222025357442659895814371694e-0005L, + +2.75573192239028921114572986441972140933432317798e-0006L, + +2.75573192239448470555548102895526369739856219317e-0007L, + +2.50521677867683935940853997995937600214167232477e-0008L, + +2.08767928899010367374984448513685566514152147362e-0009L, +}; + +/* + * long double precision coefficients for computing log(x)-1 in tgamma. + * See "algorithm" for details + * + * log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y, 1<=y<2, + * j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and + * T1(n) = T1[2n,2n+1] = n*log(2)-1, + * T2(j) = T2[2j,2j+1] = log(z[j]), + * T3(s) = 2s + T3[0]s^3 + T3[1]s^5 + T3[2]s^7 + ... + T3[6]s^15 + * Note + * (1) the leading entries are truncated to 24 binary point. + * (2) Remez error for T3(s) is bounded by 2**(-136.54) + */ +static const long double T1[] = { +-1.000000000000000000000000000000000000000000e+00L, + +0.000000000000000000000000000000000000000000e+00L, +-3.068528175354003906250000000000000000000000e-01L, +-1.904654299957767878541823431924500011926579e-09L, + +3.862943053245544433593750000000000000000000e-01L, + +5.579533617547508924291635313615100141107647e-08L, + +1.079441487789154052734375000000000000000000e+00L, + +5.389068187551732136437452970422650211661470e-08L, + +1.772588670253753662109375000000000000000000e+00L, + +5.198602757555955348583270627230200282215294e-08L, + +2.465735852718353271484375000000000000000000e+00L, + +5.008137327560178560729088284037750352769117e-08L, + +3.158883035182952880859375000000000000000000e+00L, + +4.817671897564401772874905940845299849351090e-08L, + +3.852030217647552490234375000000000000000000e+00L, + +4.627206467568624985020723597652849919904913e-08L, + +4.545177400112152099609375000000000000000000e+00L, + +4.436741037572848197166541254460399990458737e-08L, + +5.238324582576751708984375000000000000000000e+00L, + +4.246275607577071409312358911267950061012560e-08L, + +5.931471765041351318359375000000000000000000e+00L, + +4.055810177581294621458176568075500131566384e-08L, +}; + +/* + * T2[2i,2i+1] = log(1+i/64+1/128) + */ +static const long double T2[] = { + +7.7821016311645507812500000000000000000000e-03L, + +3.8810890398166212900061136763678127453570e-08L, + +2.3167014122009277343750000000000000000000e-02L, + +4.5159525100885049160962289916579411752759e-08L, + +3.8318812847137451171875000000000000000000e-02L, + +5.1454999148021880325123797290345960518164e-08L, + +5.3244471549987792968750000000000000000000e-02L, + +4.2968824489897120193786528776939573415076e-08L, + +6.7950606346130371093750000000000000000000e-02L, + +5.5562377378300815277772629414034632394030e-08L, + +8.2443654537200927734375000000000000000000e-02L, + +1.4673873663533785068668307805914095366600e-08L, + +9.6729576587677001953125000000000000000000e-02L, + +4.9870874110342446056487463437015041543346e-08L, + +1.1081433296203613281250000000000000000000e-01L, + +3.3378253981382306169323211928098474801099e-08L, + +1.2470346689224243164062500000000000000000e-01L, + +1.1608714804222781515380863268491613205318e-08L, + +1.3840228319168090820312500000000000000000e-01L, + +3.9667438227482200873601649187393160823607e-08L, + +1.5191602706909179687500000000000000000000e-01L, + +1.4956750178196803424896884511327584958252e-08L, + +1.6524952650070190429687500000000000000000e-01L, + +4.6394605258578736449277240313729237989366e-08L, + +1.7840760946273803710937500000000000000000e-01L, + +4.8010080260010025241510941968354682199540e-08L, + +1.9139480590820312500000000000000000000000e-01L, + +4.7091426329609298807561308873447039132856e-08L, + +2.0421552658081054687500000000000000000000e-01L, + +1.4847880344628820386196239272213742113867e-08L, + +2.1687388420104980468750000000000000000000e-01L, + +5.4099564554931589525744347498478964801484e-08L, + +2.2937405109405517578125000000000000000000e-01L, + +4.9970790654210230725046139871550961365282e-08L, + +2.4171990156173706054687500000000000000000e-01L, + +3.5325408107597432515913513900103385655073e-08L, + +2.5391519069671630859375000000000000000000e-01L, + +1.9284247135543573297906606667466299224747e-08L, + +2.6596349477767944335937500000000000000000e-01L, + +5.3719458497979750926537543389268821141517e-08L, + +2.7786844968795776367187500000000000000000e-01L, + +1.3154985425144750329234012330820349974537e-09L, + +2.8963327407836914062500000000000000000000e-01L, + +1.8504673536253893055525668970003860369760e-08L, + +3.0126130580902099609375000000000000000000e-01L, + +2.4769140784919125538233755492657352680723e-08L, + +3.1275570392608642578125000000000000000000e-01L, + +6.0778104626049965596883190321597861455475e-09L, + +3.2411944866180419921875000000000000000000e-01L, + +1.9992407776871920760434987352182336158873e-08L, + +3.3535552024841308593750000000000000000000e-01L, + +2.1672724744319679579814166199074433006807e-08L, + +3.4646672010421752929687500000000000000000e-01L, + +4.7241991051621587188425772950711830538414e-08L, + +3.5745584964752197265625000000000000000000e-01L, + +3.9274281801569759490140904474434669956562e-08L, + +3.6832553148269653320312500000000000000000e-01L, + +2.9676011119845105154050398826897178765758e-08L, + +3.7907832860946655273437500000000000000000e-01L, + +2.4325502905656478345631019858881408009210e-08L, + +3.8971674442291259765625000000000000000000e-01L, + +6.7171126157142136040035208670510556529487e-09L, + +4.0024316310882568359375000000000000000000e-01L, + +1.0181870233355751019951311700799406124957e-09L, + +4.1065990924835205078125000000000000000000e-01L, + +1.5736916335153056203175822787661567534220e-08L, + +4.2096924781799316406250000000000000000000e-01L, + +4.6826136472066367161506795972449857268707e-08L, + +4.3117344379425048828125000000000000000000e-01L, + +2.1024120852577922478955594998480144051225e-08L, + +4.4127452373504638671875000000000000000000e-01L, + +3.7069828842770746441661301225362605528786e-08L, + +4.5127463340759277343750000000000000000000e-01L, + +1.0731865811707192383079012478685922879010e-08L, + +4.6117568016052246093750000000000000000000e-01L, + +3.4961647705430499925597855358603099030515e-08L, + +4.7097969055175781250000000000000000000000e-01L, + +2.4667033200046897856056359251373510964634e-08L, + +4.8068851232528686523437500000000000000000e-01L, + +1.7020465042442243455448011551208861216878e-08L, + +4.9030393362045288085937500000000000000000e-01L, + +5.4424740957290971159645746860530583309571e-08L, + +4.9982786178588867187500000000000000000000e-01L, + +7.7705606579463314152470441415126573566105e-09L, + +5.0926184654235839843750000000000000000000e-01L, + +5.5247449548366574919228323824878565745713e-08L, + +5.1860773563385009765625000000000000000000e-01L, + +2.8574195534496726996364798698556235730848e-08L, + +5.2786707878112792968750000000000000000000e-01L, + +1.0839714455426392217778300963558522088193e-08L, + +5.3704142570495605468750000000000000000000e-01L, + +4.0191927599879229244153832299023744345999e-08L, + +5.4613238573074340820312500000000000000000e-01L, + +5.1867392242179272209231209163864971792889e-08L, + +5.5514144897460937500000000000000000000000e-01L, + +5.8565892217715480359515904050170125743178e-08L, + +5.6407010555267333984375000000000000000000e-01L, + +3.2732129626227634290090190711817681692354e-08L, + +5.7291972637176513671875000000000000000000e-01L, + +2.7190020372374006726626261068626400393936e-08L, + +5.8169168233871459960937500000000000000000e-01L, + +5.7295907882911235753725372340709967597394e-08L, + +5.9038740396499633789062500000000000000000e-01L, + +4.2637180036751291708123598757577783615014e-08L, + +5.9900814294815063476562500000000000000000e-01L, + +4.6697932764615975024461651502060474048774e-08L, + +6.0755521059036254882812500000000000000000e-01L, + +3.9634179246672960152791125371893149820625e-08L, + +6.1602985858917236328125000000000000000000e-01L, + +1.8626341656366315928196700650292529688219e-08L, + +6.2443327903747558593750000000000000000000e-01L, + +8.9744179151050387440546731199093039879228e-09L, + +6.3276666402816772460937500000000000000000e-01L, + +5.5428701049364114685035797584887586099726e-09L, + +6.4103114604949951171875000000000000000000e-01L, + +3.3371431779336851334405392546708949047361e-08L, + +6.4922791719436645507812500000000000000000e-01L, + +2.9430743363812714969905311122271269100885e-08L, + +6.5735805034637451171875000000000000000000e-01L, + +2.2361985518423140023245936165514147093250e-08L, + +6.6542261838912963867187500000000000000000e-01L, + +1.4155960810278217610006660181148303091649e-08L, + +6.7342263460159301757812500000000000000000e-01L, + +4.0610573702719835388801017264750843477878e-08L, + +6.8135917186737060546875000000000000000000e-01L, + +5.2940532463479321559568089441735584156689e-08L, + +6.8923324346542358398437500000000000000000e-01L, + +3.7773385396340539337814603903232796216537e-08L, +}; + +/* + * S[j],S_trail[j] = 2**(j/32.) for the final computation of exp(t+w) + */ +static const long double S[] = { +#if defined(__i386) + +1.0000000000000000000000000e+00L, + +1.0218971486541166782081522e+00L, + +1.0442737824274138402382006e+00L, + +1.0671404006768236181297224e+00L, + +1.0905077326652576591003302e+00L, + +1.1143867425958925362894369e+00L, + +1.1387886347566916536971221e+00L, + +1.1637248587775775137938619e+00L, + +1.1892071150027210666875674e+00L, + +1.2152473599804688780476325e+00L, + +1.2418578120734840485256747e+00L, + +1.2690509571917332224885722e+00L, + +1.2968395546510096659215822e+00L, + +1.3252366431597412945939118e+00L, + +1.3542555469368927282668852e+00L, + +1.3839098819638319548151403e+00L, + +1.4142135623730950487637881e+00L, + +1.4451808069770466200253470e+00L, + +1.4768261459394993113155431e+00L, + +1.5091644275934227397133885e+00L, + +1.5422108254079408235859630e+00L, + +1.5759808451078864864006862e+00L, + +1.6104903319492543080837174e+00L, + +1.6457554781539648445110730e+00L, + +1.6817928305074290860378350e+00L, + +1.7186192981224779156032914e+00L, + +1.7562521603732994831094730e+00L, + +1.7947090750031071864148413e+00L, + +1.8340080864093424633989166e+00L, + +1.8741676341102999013002103e+00L, + +1.9152065613971472938202589e+00L, + +1.9571441241754002689657438e+00L, +#else + +1.00000000000000000000000000000000000e+00L, + +1.02189714865411667823448013478329942e+00L, + +1.04427378242741384032196647873992910e+00L, + +1.06714040067682361816952112099280918e+00L, + +1.09050773266525765920701065576070789e+00L, + +1.11438674259589253630881295691960313e+00L, + +1.13878863475669165370383028384151134e+00L, + +1.16372485877757751381357359909218536e+00L, + +1.18920711500272106671749997056047593e+00L, + +1.21524735998046887811652025133879836e+00L, + +1.24185781207348404859367746872659561e+00L, + +1.26905095719173322255441908103233805e+00L, + +1.29683955465100966593375411779245118e+00L, + +1.32523664315974129462953709549872168e+00L, + +1.35425554693689272829801474014070273e+00L, + +1.38390988196383195487265952726519287e+00L, + +1.41421356237309504880168872420969798e+00L, + +1.44518080697704662003700624147167095e+00L, + +1.47682614593949931138690748037404985e+00L, + +1.50916442759342273976601955103319352e+00L, + +1.54221082540794082361229186209073479e+00L, + +1.57598084510788648645527016018190504e+00L, + +1.61049033194925430817952066735740067e+00L, + +1.64575547815396484451875672472582254e+00L, + +1.68179283050742908606225095246642969e+00L, + +1.71861929812247791562934437645631244e+00L, + +1.75625216037329948311216061937531314e+00L, + +1.79470907500310718642770324212778174e+00L, + +1.83400808640934246348708318958828892e+00L, + +1.87416763411029990132999894995444645e+00L, + +1.91520656139714729387261127029583086e+00L, + +1.95714412417540026901832225162687149e+00L, +#endif +}; +static const long double S_trail[] = { +#if defined(__i386) + +0.0000000000000000000000000e+00L, + +2.6327965667180882569382524e-20L, + +8.3765863521895191129661899e-20L, + +3.9798705777454504249209575e-20L, + +1.0668046596651558640993042e-19L, + +1.9376009847285360448117114e-20L, + +6.7081819456112953751277576e-21L, + +1.9711680502629186462729727e-20L, + +2.9932584438449523689104569e-20L, + +6.8887754153039109411061914e-20L, + +6.8002718741225378942847820e-20L, + +6.5846917376975403439742349e-20L, + +1.2171958727511372194876001e-20L, + +3.5625253228704087115438260e-20L, + +3.1129551559077560956309179e-20L, + +5.7519192396164779846216492e-20L, + +3.7900651177865141593101239e-20L, + +1.1659262405698741798080115e-20L, + +7.1364385105284695967172478e-20L, + +5.2631003710812203588788949e-20L, + +2.6328853788732632868460580e-20L, + +5.4583950085438242788190141e-20L, + +9.5803254376938269960718656e-20L, + +7.6837733983874245823512279e-21L, + +2.4415965910835093824202087e-20L, + +2.6052966871016580981769728e-20L, + +2.6876456344632553875309579e-21L, + +1.2861930155613700201703279e-20L, + +8.8166633394037485606572294e-20L, + +2.9788615389580190940837037e-20L, + +5.2352341619805098677422139e-20L, + +5.2578463064010463732242363e-20L, +#else + +0.00000000000000000000000000000000000e+00L, + +1.80506787420330954745573333054573786e-35L, +-9.37452029228042742195756741973083214e-35L, +-1.59696844729275877071290963023149997e-35L, + +9.11249341012502297851168610167248666e-35L, +-6.50422820697854828723037477525938871e-35L, +-8.14846884452585113732569176748815532e-35L, +-5.06621457672180031337233074514290335e-35L, +-1.35983097468881697374987563824591912e-35L, + +9.49742763556319647030771056643324660e-35L, +-3.28317052317699860161506596533391526e-36L, +-5.01723570938719041029018653045842895e-35L, +-2.39147479768910917162283430160264014e-35L, +-8.35057135763390881529889073794408385e-36L, + +7.03675688907326504242173719067187644e-35L, +-5.18248485306464645753689301856695619e-35L, + +9.42224254862183206569211673639406488e-35L, +-3.96750082539886230916730613021641828e-35L, + +7.14352899156330061452327361509276724e-35L, + +1.15987125286798512424651783410044433e-35L, + +4.69693347835811549530973921320187447e-35L, +-3.38651317599500471079924198499981917e-35L, +-8.58731877429824706886865593510387445e-35L, +-9.60595154874935050318549936224606909e-35L, + +9.60973393212801278450755869714178581e-35L, + +6.37839792144002843924476144978084855e-35L, + +7.79243078569586424945646112516927770e-35L, + +7.36133776758845652413193083663393220e-35L, +-6.47299514791334723003521457561217053e-35L, + +8.58747441795369869427879806229522962e-35L, + +2.37181542282517483569165122830269098e-35L, +-3.02689168209611877300459737342190031e-37L, +#endif +}; +/* INDENT ON */ + +/* INDENT OFF */ +/* + * return tgamma(x) scaled by 2**-m for 8<x<=171.62... using Stirling's formula + * log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x)) + * = L1 + L2 + L3, + */ +/* INDENT ON */ +static struct LDouble +large_gam(long double x, int *m) { + long double z, t1, t2, t3, z2, t5, w, y, u, r, v; + long double t24 = 16777216.0L, p24 = 1.0L / 16777216.0L; + int n2, j2, k, ix, j, i; + struct LDouble zz; + long double u2, ss_h, ss_l, r_h, w_h, w_l, t4; + +/* INDENT OFF */ +/* + * compute ss = ss.h+ss.l = log(x)-1 (see tgamma_log.h for details) + * + * log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y, 1<=y<2, + * j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and + * T1(n) = T1[2n,2n+1] = n*log(2)-1, + * T2(j) = T2[2j,2j+1] = log(z[j]), + * T3(s) = 2s + T3[0]s^3 + T3[1]s^5 + ... + T3[6]s^15 + * Note + * (1) the leading entries are truncated to 24 binary point. + * (2) Remez error for T3(s) is bounded by 2**(-72.4) + * 2**(-24) + * _________V___________________ + * T1(n): |_________|___________________| + * _______ ______________________ + * T2(j): |_______|______________________| + * ____ _______________________ + * 2s: |____|_______________________| + * __________________________ + * + T3(s)-2s: |__________________________| + * ------------------------------------------- + * [leading] + [Trailing] + */ + /* INDENT ON */ + ix = H0_WORD(x); + n2 = (ix >> 16) - 0x3fff; /* exponent of x, range:3-10 */ + y = scalbnl(x, -n2); /* y = scale x to [1,2] */ + n2 += n2; /* 2n */ + j = (ix >> 10) & 0x3f; /* j */ + z = 1.0078125L + (long double) j * 0.015625L; /* z[j]=1+j/64+1/128 */ + j2 = j + j; + t1 = y + z; + t2 = y - z; + r = one / t1; + u = r * t2; /* u = (y-z)/(y+z) */ + t1 = CHOPPED(t1); + t4 = T2[j2 + 1] + T1[n2 + 1]; + z2 = u * u; + k = H0_WORD(u) & 0x7fffffff; + t3 = T2[j2] + T1[n2]; + for (t5 = T3[6], i = 5; i >= 0; i--) + t5 = z2 * t5 + T3[i]; + if ((k >> 16) < 0x3fec) { /* |u|<2**-19 */ + t2 = t4 + u * (two + z2 * t5); + } else { + t5 = t4 + (u * z2) * t5; + u2 = u + u; + v = (long double) ((int) (u2 * t24)) * p24; + t2 = t5 + r * ((two * t2 - v * t1) - v * (y - (t1 - z))); + t3 += v; + } + ss_h = CHOPPED((t2 + t3)); + ss_l = t2 - (ss_h - t3); +/* INDENT OFF */ +/* + * compute ww = (x-.5)*(log(x)-1) + .5*(log(2pi)-1) + 1/x*(P(1/x^2))) + * where ss = log(x) - 1 in already in extra precision + */ + /* INDENT ON */ + z = one / x; + r = x - half; + r_h = CHOPPED((r)); + w_h = r_h * ss_h + hln2pim1_h; + z2 = z * z; + w = (r - r_h) * ss_h + r * ss_l; + t1 = GP[19]; + for (i = 18; i > 0; i--) + t1 = z2 * t1 + GP[i]; + w += hln2pim1_l; + w_l = z * (GP[0] + z2 * t1) + w; + k = (int) ((w_h + w_l) * invln2_32 + half); + + /* compute the exponential of w_h+w_l */ + + j = k & 0x1f; + *m = k >> 5; + t3 = (long double) k; + + /* perform w - k*ln2_32 (represent as w_h - w_l) */ + t1 = w_h - t3 * ln2_32hi; + t2 = t3 * ln2_32lo; + w = t2 - w_l; + w_h = t1 - w; + w_l = w - (t1 - w_h); + + /* compute exp(w_h-w_l) */ + z = w_h - w_l; + for (t1 = Et[10], i = 9; i >= 0; i--) + t1 = z * t1 + Et[i]; + t3 = w_h - (w_l - (z * z) * t1); /* t3 = expm1(z) */ + zz.l = S_trail[j] * (one + t3) + S[j] * t3; + zz.h = S[j]; + return (zz); +} + +/* INDENT OFF */ +/* + * kpsin(x)= sin(pi*x)/pi + * 3 5 7 9 11 27 + * = x+ks[0]*x +ks[1]*x +ks[2]*x +ks[3]*x +ks[4]*x + ... + ks[12]*x + */ +static const long double ks[] = { + -1.64493406684822643647241516664602518705158902870e+0000L, + +8.11742425283353643637002772405874238094995726160e-0001L, + -1.90751824122084213696472111835337366232282723933e-0001L, + +2.61478478176548005046532613563241288115395517084e-0002L, + -2.34608103545582363750893072647117829448016479971e-0003L, + +1.48428793031071003684606647212534027556262040158e-0004L, + -6.97587366165638046518462722252768122615952898698e-0006L, + +2.53121740413702536928659271747187500934840057929e-0007L, + -7.30471182221385990397683641695766121301933621956e-0009L, + +1.71653847451163495739958249695549313987973589884e-0010L, + -3.34813314714560776122245796929054813458341420565e-0012L, + +5.50724992262622033449487808306969135431411753047e-0014L, + -7.67678132753577998601234393215802221104236979928e-0016L, +}; +/* INDENT ON */ + +/* + * assume x is not tiny and positive + */ +static struct LDouble +kpsin(long double x) { + long double z, t1, t2; + struct LDouble xx; + int i; + + z = x * x; + xx.h = x; + for (t2 = ks[12], i = 11; i > 0; i--) + t2 = z * t2 + ks[i]; + t1 = z * x; + t2 *= z * t1; + xx.l = t1 * ks[0] + t2; + return (xx); +} + +/* INDENT OFF */ +/* + * kpcos(x)= cos(pi*x)/pi + * 2 4 6 8 10 12 + * = 1/pi +kc[0]*x +kc[1]*x +kc[2]*x +kc[3]*x +kc[4]*x +kc[5]*x + * + * 2 4 6 8 10 22 + * = 1/pi - pi/2*x +kc[0]*x +kc[1]*x +kc[2]*x +kc[3]*x +...+kc[9]*x + * + * -pi/2*x*x = (npi_2_h + npi_2_l) * (x_f+x_l)*(x_f+x_l) + * = npi_2_h*(x_f+x_l)*(x_f+x_l) + npi_2_l*x*x + * = npi_2_h*x_f*x_f + npi_2_h*(x*x-x_f*x_f) + npi_2_l*x*x + * = npi_2_h*x_f*x_f + npi_2_h*(x+x_f)*(x-x_f) + npi_2_l*x*x + * Here x_f = (long double) (float)x + * Note that pi/2(in hex) = + * 1.921FB54442D18469898CC51701B839A252049C1114CF98E804177D4C76273644A29 + * npi_2_h = -pi/2 chopped to 25 bits = -1.921FB50000000000000000000000000 = + * -1.570796310901641845703125000000000 and + * npi_2_l = + * -0.0000004442D18469898CC51701B839A252049C1114CF98E804177D4C76273644A29 = + * -.0000000158932547735281966916397514420985846996875529104874722961539 = + * -1.5893254773528196691639751442098584699687552910487472296153e-8 + * 1/pi(in hex) = + * .517CC1B727220A94FE13ABE8FA9A6EE06DB14ACC9E21C820FF28B1D5EF5DE2B + * will be splitted into: + * one_pi_h = 1/pi chopped to 48 bits = .517CC1B727220000000000... and + * one_pi_l = .0000000000000A94FE13ABE8FA9A6EE06DB14ACC9E21C820FF28B1D5EF5DE2B + */ + +static const long double +#if defined(__i386) +one_pi_h = 0.3183098861481994390487670898437500L, /* 31 bits */ +one_pi_l = 3.559123248900043690127872406891929148e-11L, +#else +one_pi_h = 0.31830988618379052468299050815403461456298828125L, +one_pi_l = 1.46854777018590994109505931010230912897495334688117e-16L, +#endif +npi_2_h = -1.570796310901641845703125000000000L, +npi_2_l = -1.5893254773528196691639751442098584699687552910e-8L; + +static const long double kc[] = { + +1.29192819501249250731151312779548918765320728489e+0000L, + -4.25027339979557573976029596929319207009444090366e-0001L, + +7.49080661650990096109672954618317623888421628613e-0002L, + -8.21458866111282287985539464173976555436050215120e-0003L, + +6.14202578809529228503205255165761204750211603402e-0004L, + -3.33073432691149607007217330302595267179545908740e-0005L, + +1.36970959047832085796809745461530865597993680204e-0006L, + -4.41780774262583514450246512727201806217271097336e-0008L, + +1.14741409212381858820016567664488123478660705759e-0009L, + -2.44261236114707374558437500654381006300502749632e-0011L, +}; +/* INDENT ON */ + +/* + * assume x is not tiny and positive + */ +static struct LDouble +kpcos(long double x) { + long double z, t1, t2, t3, t4, x4, x8; + int i; + struct LDouble xx; + + z = x * x; + xx.h = one_pi_h; + t1 = (long double) ((float) x); + x4 = z * z; + t2 = npi_2_l * z + npi_2_h * (x + t1) * (x - t1); + for (i = 8, t3 = kc[9]; i >= 0; i--) + t3 = z * t3 + kc[i]; + t3 = one_pi_l + x4 * t3; + t4 = t1 * t1 * npi_2_h; + x8 = t2 + t3; + xx.l = x8 + t4; + return (xx); +} + +/* INDENT OFF */ +static const long double + /* 0.13486180573279076968979393577465291700642511139552429398233 */ +#if defined(__i386) +t0z1 = 0.1348618057327907696779385054997035808810L, +t0z1_l = 1.1855430274949336125392717150257379614654e-20L, +#else +t0z1 = 0.1348618057327907696897939357746529168654L, +t0z1_l = 1.4102088588676879418739164486159514674310e-37L, +#endif + /* 0.46163214496836234126265954232572132846819620400644635129599 */ +#if defined(__i386) +t0z2 = 0.4616321449683623412538115843295472018326L, +t0z2_l = 8.84795799617412663558532305039261747030640e-21L, +#else +t0z2 = 0.46163214496836234126265954232572132343318L, +t0z2_l = 5.03501162329616380465302666480916271611101e-36L, +#endif + /* 0.81977310110050060178786870492160699631174407846245179119586 */ +#if defined(__i386) +t0z3 = 0.81977310110050060178773362329351925836817L, +t0z3_l = 1.350816280877379435658077052534574556256230e-22L +#else +t0z3 = 0.8197731011005006017878687049216069516957449L, +t0z3_l = 4.461599916947014419045492615933551648857380e-35L +#endif +; +/* INDENT ON */ + +/* + * gamma(x+i) for 0 <= x < 1 + */ +static struct LDouble +gam_n(int i, long double x) { + struct LDouble rr, yy; + long double r1, r2, t2, z, xh, xl, yh, yl, zh, z1, z2, zl, x5, wh, wl; + + /* compute yy = gamma(x+1) */ + if (x > 0.2845L) { + if (x > 0.6374L) { + r1 = x - t0z3; + r2 = CHOPPED((r1 - t0z3_l)); + t2 = r1 - r2; + yy = GT3(r2, t2 - t0z3_l); + } else { + r1 = x - t0z2; + r2 = CHOPPED((r1 - t0z2_l)); + t2 = r1 - r2; + yy = GT2(r2, t2 - t0z2_l); + } + } else { + r1 = x - t0z1; + r2 = CHOPPED((r1 - t0z1_l)); + t2 = r1 - r2; + yy = GT1(r2, t2 - t0z1_l); + } + /* compute gamma(x+i) = (x+i-1)*...*(x+1)*yy, 0<i<8 */ + switch (i) { + case 0: /* yy/x */ + r1 = one / x; + xh = CHOPPED((x)); /* x is not tiny */ + rr.h = CHOPPED(((yy.h + yy.l) * r1)); + rr.l = r1 * (yy.h - rr.h * xh) - ((r1 * rr.h) * (x - xh) - + r1 * yy.l); + break; + case 1: /* yy */ + rr.h = yy.h; + rr.l = yy.l; + break; + case 2: /* (x+1)*yy */ + z = x + one; /* may not be exact */ + zh = CHOPPED((z)); + rr.h = zh * yy.h; + rr.l = z * yy.l + (x - (zh - one)) * yy.h; + break; + case 3: /* (x+2)*(x+1)*yy */ + z1 = x + one; + z2 = x + 2.0L; + z = z1 * z2; + xh = CHOPPED((z)); + zh = CHOPPED((z1)); + xl = (x - (zh - one)) * (z2 + zh) - (xh - zh * (zh + one)); + + rr.h = xh * yy.h; + rr.l = z * yy.l + xl * yy.h; + break; + + case 4: /* (x+1)*(x+3)*(x+2)*yy */ + z1 = x + 2.0L; + z2 = (x + one) * (x + 3.0L); + zh = CHOPPED(z1); + zl = x - (zh - 2.0L); + xh = CHOPPED(z2); + xl = zl * (zh + z1) - (xh - (zh * zh - one)); + + /* wh+wl=(x+2)*yy */ + wh = CHOPPED((z1 * (yy.h + yy.l))); + wl = (zl * yy.h + z1 * yy.l) - (wh - zh * yy.h); + + rr.h = xh * wh; + rr.l = z2 * wl + xl * wh; + + break; + case 5: /* ((x+1)*(x+4)*(x+2)*(x+3))*yy */ + z1 = x + 2.0L; + z2 = x + 3.0L; + z = z1 * z2; + zh = CHOPPED((z1)); + yh = CHOPPED((z)); + yl = (x - (zh - 2.0L)) * (z2 + zh) - (yh - zh * (zh + one)); + z2 = z - 2.0L; + z *= z2; + xh = CHOPPED((z)); + xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0L)); + rr.h = xh * yy.h; + rr.l = z * yy.l + xl * yy.h; + break; + case 6: /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5))*yy */ + z1 = x + 2.0L; + z2 = x + 3.0L; + z = z1 * z2; + zh = CHOPPED((z1)); + yh = CHOPPED((z)); + z1 = x - (zh - 2.0L); + yl = z1 * (z2 + zh) - (yh - zh * (zh + one)); + z2 = z - 2.0L; + x5 = x + 5.0L; + z *= z2; + xh = CHOPPED(z); + zh += 3.0; + xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0L)); + /* xh+xl=(x+1)*...*(x+4) */ + /* wh+wl=(x+5)*yy */ + wh = CHOPPED((x5 * (yy.h + yy.l))); + wl = (z1 * yy.h + x5 * yy.l) - (wh - zh * yy.h); + rr.h = wh * xh; + rr.l = z * wl + xl * wh; + break; + case 7: /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5)*(x+6))*yy */ + z1 = x + 3.0L; + z2 = x + 4.0L; + z = z2 * z1; + zh = CHOPPED((z1)); + yh = CHOPPED((z)); /* yh+yl = (x+3)(x+4) */ + yl = (x - (zh - 3.0L)) * (z2 + zh) - (yh - (zh * (zh + one))); + z1 = x + 6.0L; + z2 = z - 2.0L; /* z2 = (x+2)*(x+5) */ + z *= z2; + xh = CHOPPED((z)); + xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0L)); + /* xh+xl=(x+2)*...*(x+5) */ + /* wh+wl=(x+1)(x+6)*yy */ + z2 -= 4.0L; /* z2 = (x+1)(x+6) */ + wh = CHOPPED((z2 * (yy.h + yy.l))); + wl = (z2 * yy.l + yl * yy.h) - (wh - (yh - 6.0L) * yy.h); + rr.h = wh * xh; + rr.l = z * wl + xl * wh; + } + return (rr); +} + +long double +tgammal(long double x) { + struct LDouble ss, ww; + long double t, t1, t2, t3, t4, t5, w, y, z, z1, z2, z3, z5; + int i, j, m, ix, hx, xk; + unsigned lx; + + hx = H0_WORD(x); + lx = H3_WORD(x); + ix = hx & 0x7fffffff; + y = x; + if (ix < 0x3f8e0000) { /* x < 2**-113 */ + return (one / x); + } + if (ix >= 0x7fff0000) + return (x * ((hx < 0)? zero : x)); /* Inf or NaN */ + if (x > overflow) /* overflow threshold */ + return (x * 1.0e4932L); + if (hx >= 0x40020000) { /* x >= 8 */ + ww = large_gam(x, &m); + w = ww.h + ww.l; + return (scalbnl(w, m)); + } + + if (hx > 0) { /* x from 0 to 8 */ + i = (int) x; + ww = gam_n(i, x - (long double) i); + return (ww.h + ww.l); + } + /* INDENT OFF */ + /* negative x */ + /* + * compute xk = + * -2 ... x is an even int (-inf is considered an even #) + * -1 ... x is an odd int + * +0 ... x is not an int but chopped to an even int + * +1 ... x is not an int but chopped to an odd int + */ + /* INDENT ON */ + xk = 0; +#if defined(__i386) + if (ix >= 0x403e0000) { /* x >= 2**63 } */ + if (ix >= 0x403f0000) + xk = -2; + else + xk = -2 + (lx & 1); +#else + if (ix >= 0x406f0000) { /* x >= 2**112 */ + if (ix >= 0x40700000) + xk = -2; + else + xk = -2 + (lx & 1); +#endif + } else if (ix >= 0x3fff0000) { + w = -x; + t1 = floorl(w); + t2 = t1 * half; + t3 = floorl(t2); + if (t1 == w) { + if (t2 == t3) + xk = -2; + else + xk = -1; + } else { + if (t2 == t3) + xk = 0; + else + xk = 1; + } + } + + if (xk < 0) { + /* return NaN. Ideally gamma(-n)= (-1)**(n+1) * inf */ + return (x - x) / (x - x); + } + + /* + * negative underflow thresold -(1774+9ulp) + */ + if (x < -1774.0000000000000000000000000000017749370L) { + z = tiny / x; + if (xk == 1) + z = -z; + return (z * tiny); + } + + /* INDENT OFF */ + /* + * now compute gamma(x) by -1/((sin(pi*y)/pi)*gamma(1+y)), y = -x + */ + /* + * First compute ss = -sin(pi*y)/pi so that + * gamma(x) = 1/(ss*gamma(1+y)) + */ + /* INDENT ON */ + y = -x; + j = (int) y; + z = y - (long double) j; + if (z > 0.3183098861837906715377675L) + if (z > 0.6816901138162093284622325L) + ss = kpsin(one - z); + else + ss = kpcos(0.5L - z); + else + ss = kpsin(z); + if (xk == 0) { + ss.h = -ss.h; + ss.l = -ss.l; + } + + /* Then compute ww = gamma(1+y), note that result scale to 2**m */ + m = 0; + if (j < 7) { + ww = gam_n(j + 1, z); + } else { + w = y + one; + if ((lx & 1) == 0) { /* y+1 exact (note that y<184) */ + ww = large_gam(w, &m); + } else { + t = w - one; + if (t == y) { /* y+one exact */ + ww = large_gam(w, &m); + } else { /* use y*gamma(y) */ + if (j == 7) + ww = gam_n(j, z); + else + ww = large_gam(y, &m); + t4 = ww.h + ww.l; + t1 = CHOPPED((y)); + t2 = CHOPPED((t4)); + /* t4 will not be too large */ + ww.l = y * (ww.l - (t2 - ww.h)) + (y - t1) * t2; + ww.h = t1 * t2; + } + } + } + + /* compute 1/(ss*ww) */ + t3 = ss.h + ss.l; + t4 = ww.h + ww.l; + t1 = CHOPPED((t3)); + t2 = CHOPPED((t4)); + z1 = ss.l - (t1 - ss.h); /* (t1,z1) = ss */ + z2 = ww.l - (t2 - ww.h); /* (t2,z2) = ww */ + t3 = t3 * t4; /* t3 = ss*ww */ + z3 = one / t3; /* z3 = 1/(ss*ww) */ + t5 = t1 * t2; + z5 = z1 * t4 + t1 * z2; /* (t5,z5) = ss*ww */ + t1 = CHOPPED((t3)); /* (t1,z1) = ss*ww */ + z1 = z5 - (t1 - t5); + t2 = CHOPPED((z3)); /* leading 1/(ss*ww) */ + z2 = z3 * (t2 * z1 - (one - t2 * t1)); + z = t2 - z2; + + return (scalbnl(z, -m)); +} diff --git a/usr/src/libm/src/m9x/trunc.c b/usr/src/libm/src/m9x/trunc.c new file mode 100644 index 0000000..3797730 --- /dev/null +++ b/usr/src/libm/src/m9x/trunc.c @@ -0,0 +1,69 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)trunc.c 1.4 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak trunc = __trunc +#endif + +#include "libm.h" + +double +trunc(double x) { + union { + unsigned i[2]; + double d; + } xx; + unsigned hx, sx, i; + + xx.d = x; + hx = xx.i[HIWORD] & ~0x80000000; + sx = xx.i[HIWORD] & 0x80000000; + if (hx < 0x43300000) { /* |x| < 2^52 */ + if (hx < 0x3ff00000) /* |x| < 1 */ + return (sx ? -0.0 : 0.0); + + /* chop x at the integer bit */ + if (hx < 0x41300000) { + i = 1 << (0x412 - (hx >> 20)); + xx.i[HIWORD] &= ~(i | (i - 1)); + xx.i[LOWORD] = 0; + } else { + i = 1 << (0x432 - (hx >> 20)); + xx.i[LOWORD] &= ~(i | (i - 1)); + } + return (xx.d); + } else if (hx < 0x7ff00000) + return (x); + else +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (hx >= 0x7ff80000 ? x : x + x); + /* assumes sparc-like QNaN */ +#else + return (x + x); +#endif +} diff --git a/usr/src/libm/src/m9x/truncf.c b/usr/src/libm/src/m9x/truncf.c new file mode 100644 index 0000000..e2cc454 --- /dev/null +++ b/usr/src/libm/src/m9x/truncf.c @@ -0,0 +1,62 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)truncf.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak truncf = __truncf +#endif + +#include "libm.h" + +float +truncf(float x) { + union { + unsigned i; + float f; + } xx; + unsigned hx, sx, i; + + xx.f = x; + hx = xx.i & ~0x80000000; + sx = xx.i & 0x80000000; + if (hx < 0x4b000000) { /* |x| < 2^23 */ + if (hx < 0x3f800000) /* |x| < 1 */ + return (sx ? -0.0F : 0.0F); + + /* chop x at the integer bit */ + i = 1 << (0x95 - (hx >> 23)); + xx.i &= ~((i << 1) - 1); + return (xx.f); + } else if (hx < 0x7f800000) /* |x| is integral */ + return (x); + else +#if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN) + return (hx > 0x7f800000 ? x * x : x + x); +#else + return (x + x); +#endif +} diff --git a/usr/src/libm/src/m9x/truncl.c b/usr/src/libm/src/m9x/truncl.c new file mode 100644 index 0000000..ba0724a --- /dev/null +++ b/usr/src/libm/src/m9x/truncl.c @@ -0,0 +1,109 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "@(#)truncl.c 1.3 06/01/31 SMI" + +#if defined(ELFOBJ) +#pragma weak truncl = __truncl +#endif + +#include "libm.h" + +#if defined(__sparc) +long double +truncl(long double x) { + union { + unsigned i[4]; + long double q; + } xx; + unsigned hx, sx; + int j; + + xx.q = x; + sx = xx.i[0] & 0x80000000; + hx = xx.i[0] & ~0x80000000; + + /* handle trivial cases */ + if (hx >= 0x406f0000) /* |x| >= 2^112 + ... or x is nan */ + return (hx >= 0x7fff0000 ? x + x : x); + + /* handle |x| < 1 */ + if (hx < 0x3fff0000) + return (sx ? -0.0L : 0.0L); + + j = 0x406f - (hx >> 16); /* 1 <= j <= 112 */ + xx.i[0] = hx; + if (j >= 96) { /* 96 <= j <= 112 */ + xx.i[0] &= ~((1 << (j - 96)) - 1); + xx.i[1] = xx.i[2] = xx.i[3] = 0; + } else if (j >= 64) { /* 64 <= j <= 95 */ + xx.i[1] &= ~((1 << (j - 64)) - 1); + xx.i[2] = xx.i[3] = 0; + } else if (j >= 32) { /* 32 <= j <= 63 */ + xx.i[2] &= ~((1 << (j - 32)) - 1); + xx.i[3] = 0; + } else /* 1 <= j <= 31 */ + xx.i[3] &= ~((1 << j) - 1); + + /* negate result if need be */ + if (sx) + xx.i[0] |= 0x80000000; + return (xx.q); +} +#elif defined(__i386) +long double +truncl(long double x) { + union { + unsigned i[3]; + long double e; + } xx; + int ex, sx, i; + + xx.e = x; + ex = xx.i[2] & 0x7fff; + sx = xx.i[2] & 0x8000; + if (ex < 0x403e) { /* |x| < 2^63 */ + if (ex < 0x3fff) /* |x| < 1 */ + return (sx ? -0.0L : 0.0L); + + /* chop x at the integer bit */ + if (ex < 0x401e) { + i = 1 << (0x401d - ex); + xx.i[1] &= ~(i | (i - 1)); + xx.i[0] = 0; + } else { + i = 1 << (0x403d - ex); + xx.i[0] &= ~(i | (i - 1)); + } + return (xx.e); + } else if (ex < 0x7fff) /* x is integral */ + return (x); + else /* inf or nan */ + return (x + x); +} +#else +#error Unknown architecture +#endif /* defined(__sparc) || defined(__i386) */ |